diff --git a/DEPS b/DEPS index a6884c7d551..1885269bf14 100644 --- a/DEPS +++ b/DEPS @@ -42,6 +42,7 @@ vars = { "args_tag": "@0.13.0", "async_tag": "@1.2.0", "barback_tag" : "@0.15.2+6", + "boringssl_rev" : "@daeafc22c66ad48f6b32fc8d3362eb9ba31b774e", "charcode_tag": "@1.1.0", "chrome_rev" : "@19997", "clang_rev" : "@28450", @@ -89,6 +90,7 @@ vars = { "pub_cache_tag": "@v0.1.0", "pub_semver_tag": "@1.2.1", "quiver_tag": "@0.21.4", + "root_certificates_rev": "@c3a41df63afacec62fcb8135196177e35fe72f71", "scheduled_test_tag": "@0.12.1+2", "shelf_rev": "@1e87b79b21ac5e6fa2f93576d6c06eaa65285ef4", "smoke_rev" : "@f3361191cc2a85ebc1e4d4c33aec672d7915aba9", @@ -143,6 +145,14 @@ deps = { Var("chromium_git") + "/chromium/src/net/third_party/nss.git" + Var("net_nss_rev"), + Var("dart_root") + "/third_party/boringssl/src": + "https://boringssl.googlesource.com/boringssl.git" + + Var("boringssl_rev"), + + Var("dart_root") + "/third_party/root_certificates": + "https://github.com/dart-lang/root_certificates.git" + + Var("root_certificates_rev"), + Var("dart_root") + "/third_party/jinja2": Var("chromium_git") + "/chromium/src/third_party/jinja2.git" + Var("jinja2_rev"), diff --git a/runtime/bin/bin.gypi b/runtime/bin/bin.gypi index 12dd8b489f3..8928af1fe08 100644 --- a/runtime/bin/bin.gypi +++ b/runtime/bin/bin.gypi @@ -222,11 +222,6 @@ ['exclude', '_test\\.(cc|h)$'], ], 'conditions': [ - ['dart_io_support==1 and dart_io_secure_socket==1', { - 'dependencies': [ - 'bin/net/ssl.gyp:libssl_dart', - ], - }], ['dart_io_secure_socket==0', { 'defines': [ 'DART_IO_SECURE_SOCKET_DISABLED' @@ -292,16 +287,16 @@ 'io_natives.cc', ], 'conditions': [ - ['dart_io_support==1 and dart_io_secure_socket==1', { - 'dependencies': [ - 'bin/net/ssl.gyp:libssl_dart', - ], - }], - ['dart_io_support==1 and dart_io_secure_socket==0', { + ['dart_io_support==1', { 'dependencies': [ 'bin/net/zlib.gyp:zlib_dart', ], }], + ['dart_io_support==1 and dart_io_secure_socket==1', { + 'dependencies': [ + '../third_party/boringssl/boringssl_dart.gyp:boringssl', + ], + }], ['dart_io_secure_socket==0', { 'defines': [ 'DART_IO_SECURE_SOCKET_DISABLED' @@ -333,19 +328,6 @@ }, }], ], - 'configurations': { - 'Dart_Android_Base': { - 'target_conditions': [ - ['_toolset=="target"', { - 'defines': [ - # Needed for sources outside of nss that include pr and ssl - # header files. - 'MDCPUCFG="md/_linux.cfg"', - ], - }], - ], - }, - }, }, { 'target_name': 'libdart_nosnapshot', diff --git a/runtime/bin/io_impl_sources.gypi b/runtime/bin/io_impl_sources.gypi index 9236ec744fe..611b458f5f6 100644 --- a/runtime/bin/io_impl_sources.gypi +++ b/runtime/bin/io_impl_sources.gypi @@ -26,9 +26,9 @@ 'filter_unsupported.cc', 'io_service.cc', 'io_service.h', + 'io_service_no_ssl.cc', + 'io_service_no_ssl.h', 'io_service_unsupported.cc', - 'net/nss_memio.cc', - 'net/nss_memio.h', 'platform.cc', 'platform.h', 'platform_android.cc', @@ -41,6 +41,7 @@ 'process_linux.cc', 'process_macos.cc', 'process_win.cc', + '../../third_party/root_certificates/root_certificates.cc', 'secure_socket.cc', 'secure_socket.h', 'secure_socket_unsupported.cc', @@ -62,12 +63,15 @@ 'conditions': [ ['dart_io_secure_socket==1', { 'sources!' : [ + 'io_service_no_ssl.cc', + 'io_service_no_ssl.h', 'secure_socket_unsupported.cc', ], }, { # else dart_io_secure_socket == 0 'sources!' : [ - 'net/nss_memio.cc', - 'net/nss_memio.h', + '../../third_party/root_certificates/root_certificates.cc', + 'io_service.cc', + 'io_service.h', 'secure_socket.cc', 'secure_socket.h', ], @@ -83,8 +87,9 @@ 'filter.h', 'io_service.cc', 'io_service.h', - 'net/nss_memio.cc', - 'net/nss_memio.h', + 'io_service_no_ssl.cc', + 'io_service_no_ssl.h', + '../../third_party/root_certificates/root_certificates.cc', 'secure_socket.cc', 'secure_socket.h', ], diff --git a/runtime/bin/io_natives.cc b/runtime/bin/io_natives.cc index 2be3f3408f1..9c28ac1b26e 100644 --- a/runtime/bin/io_natives.cc +++ b/runtime/bin/io_natives.cc @@ -96,17 +96,23 @@ namespace bin { V(Process_Pid, 1) \ V(Process_SetSignalHandler, 1) \ V(Process_ClearSignalHandler, 1) \ - V(SecureSocket_Connect, 10) \ + V(SecureSocket_Connect, 8) \ V(SecureSocket_Destroy, 1) \ V(SecureSocket_FilterPointer, 1) \ V(SecureSocket_GetSelectedProtocol, 1) \ V(SecureSocket_Handshake, 1) \ V(SecureSocket_Init, 1) \ - V(SecureSocket_InitializeLibrary, 3) \ V(SecureSocket_PeerCertificate, 1) \ V(SecureSocket_RegisterBadCertificateCallback, 2) \ V(SecureSocket_RegisterHandshakeCompleteCallback, 2) \ V(SecureSocket_Renegotiate, 4) \ + V(SecurityContext_Allocate, 1) \ + V(SecurityContext_UsePrivateKey, 3) \ + V(SecurityContext_SetAlpnProtocols, 3) \ + V(SecurityContext_SetClientAuthorities, 2) \ + V(SecurityContext_SetTrustedCertificates, 3) \ + V(SecurityContext_TrustBuiltinRoots, 1) \ + V(SecurityContext_UseCertificateChain, 2) \ V(ServerSocket_Accept, 2) \ V(ServerSocket_CreateBindListen, 6) \ V(Socket_CreateConnect, 3) \ @@ -136,8 +142,11 @@ namespace bin { V(Stdin_SetLineMode, 1) \ V(Stdout_GetTerminalSize, 1) \ V(StringToSystemEncoding, 1) \ - V(SystemEncodingToString, 1) - + V(SystemEncodingToString, 1) \ + V(X509_Subject, 1) \ + V(X509_Issuer, 1) \ + V(X509_StartValidity, 1) \ + V(X509_EndValidity, 1) IO_NATIVE_LIST(DECLARE_FUNCTION); diff --git a/runtime/bin/io_service_no_ssl.cc b/runtime/bin/io_service_no_ssl.cc new file mode 100644 index 00000000000..b63f8c8e53b --- /dev/null +++ b/runtime/bin/io_service_no_ssl.cc @@ -0,0 +1,78 @@ +// Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +#include "bin/dartutils.h" +#include "bin/directory.h" +#include "bin/file.h" +#include "bin/io_buffer.h" +#include "bin/io_service_no_ssl.h" +#include "bin/socket.h" +#include "bin/utils.h" + +#include "platform/globals.h" +#include "platform/utils.h" + +#include "include/dart_api.h" + + +namespace dart { +namespace bin { + +#define CASE_REQUEST(type, method, id) \ + case IOService::k##type##method##Request: \ + response = type::method##Request(data); \ + break; + +void IOServiceCallback(Dart_Port dest_port_id, + Dart_CObject* message) { + Dart_Port reply_port_id = ILLEGAL_PORT; + CObject* response = CObject::IllegalArgumentError(); + CObjectArray request(message); + if (message->type == Dart_CObject_kArray && + request.Length() == 4 && + request[0]->IsInt32() && + request[1]->IsSendPort() && + request[2]->IsInt32() && + request[3]->IsArray()) { + CObjectInt32 message_id(request[0]); + CObjectSendPort reply_port(request[1]); + CObjectInt32 request_id(request[2]); + CObjectArray data(request[3]); + reply_port_id = reply_port.Value(); + switch (request_id.Value()) { + IO_SERVICE_REQUEST_LIST(CASE_REQUEST); + default: + UNREACHABLE(); + } + } + + CObjectArray result(CObject::NewArray(2)); + result.SetAt(0, request[0]); + result.SetAt(1, response); + ASSERT(reply_port_id != ILLEGAL_PORT); + Dart_PostCObject(reply_port_id, result.AsApiCObject()); +} + + +Dart_Port IOService::GetServicePort() { + Dart_Port result = Dart_NewNativePort("IOService", + IOServiceCallback, + true); + return result; +} + + +void FUNCTION_NAME(IOService_NewServicePort)(Dart_NativeArguments args) { + Dart_SetReturnValue(args, Dart_Null()); + Dart_Port service_port = IOService::GetServicePort(); + if (service_port != ILLEGAL_PORT) { + // Return a send port for the service port. + Dart_Handle send_port = Dart_NewSendPort(service_port); + Dart_SetReturnValue(args, send_port); + } +} + + +} // namespace bin +} // namespace dart diff --git a/runtime/bin/io_service_no_ssl.h b/runtime/bin/io_service_no_ssl.h new file mode 100644 index 00000000000..9cc7fc1a2f1 --- /dev/null +++ b/runtime/bin/io_service_no_ssl.h @@ -0,0 +1,74 @@ +// Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +#ifndef BIN_IO_SERVICE_NO_SSL_H_ +#define BIN_IO_SERVICE_NO_SSL_H_ + +#include "bin/builtin.h" +#include "bin/utils.h" + + +namespace dart { +namespace bin { + +// This list must be kept in sync with the list in sdk/lib/io/io_service.dart +// In this modified version, though, the request 39 for SSLFilter::ProcessFilter +// is removed, for use in contexts in which secure sockets are not enabled. +#define IO_SERVICE_REQUEST_LIST(V) \ + V(File, Exists, 0) \ + V(File, Create, 1) \ + V(File, Delete, 2) \ + V(File, Rename, 3) \ + V(File, Copy, 4) \ + V(File, Open, 5) \ + V(File, ResolveSymbolicLinks, 6) \ + V(File, Close, 7) \ + V(File, Position, 8) \ + V(File, SetPosition, 9) \ + V(File, Truncate, 10) \ + V(File, Length, 11) \ + V(File, LengthFromPath, 12) \ + V(File, LastModified, 13) \ + V(File, Flush, 14) \ + V(File, ReadByte, 15) \ + V(File, WriteByte, 16) \ + V(File, Read, 17) \ + V(File, ReadInto, 18) \ + V(File, WriteFrom, 19) \ + V(File, CreateLink, 20) \ + V(File, DeleteLink, 21) \ + V(File, RenameLink, 22) \ + V(File, LinkTarget, 23) \ + V(File, Type, 24) \ + V(File, Identical, 25) \ + V(File, Stat, 26) \ + V(File, Lock, 27) \ + V(Socket, Lookup, 28) \ + V(Socket, ListInterfaces, 29) \ + V(Socket, ReverseLookup, 30) \ + V(Directory, Create, 31) \ + V(Directory, Delete, 32) \ + V(Directory, Exists, 33) \ + V(Directory, CreateTemp, 34) \ + V(Directory, ListStart, 35) \ + V(Directory, ListNext, 36) \ + V(Directory, ListStop, 37) \ + V(Directory, Rename, 38) + +#define DECLARE_REQUEST(type, method, id) \ + k##type##method##Request = id, + +class IOService { + public: + enum { +IO_SERVICE_REQUEST_LIST(DECLARE_REQUEST) + }; + + static Dart_Port GetServicePort(); +}; + +} // namespace bin +} // namespace dart + +#endif // BIN_IO_SERVICE_NO_SSL_H_ diff --git a/runtime/bin/net/README-updating b/runtime/bin/net/README-updating deleted file mode 100644 index 9b0375e38ef..00000000000 --- a/runtime/bin/net/README-updating +++ /dev/null @@ -1,34 +0,0 @@ -# Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file -# for details. All rights reserved. Use of this source code is governed by a -# BSD-style license that can be found in the LICENSE file. - -The standalone Dart executable uses the NSS library from Mozilla to -support secure networking connections (SSL and TLS). It uses a copy -of the library from the Chromium repository, that includes patches -added by Chromium. We pin this to a fixed revision, using the -nss_revision variable in all the DEPS files in the deps directory of -the Dart repository. - -This revision should be updated when new security fixes are added to -NSS, or every few months. The .gyp files in this directory are copies -of the .gyp files in the Chromium copy of NSS, with changes made to -account for the configurations and directory structure of the Dart -repository. Differences between the Chromium versions and the Dart -versions should be annotated with "# Added by Dart'. - -All the files should have a comment saying where the original file is -in the Chromium repository. To update these files, look at the diff -between the two revisions in Chromium, and apply the changes manually, -with any needed modifications, to the Dart copies. Our aim is to keep -the difference between the Chromium and Dart copies as small as -possible. - -The nss_memio.cc and nss_memio.h files are also taken from Chromium, -and should be updated at the same time. The os_Linux.s file is new, -and should not need changing. - -The file nss.gyp includes support for disabling compilation of NSS -using the variable dart_io_support, when building configurations that -don't use it. NSS compilation is disabled when building Dartium, -because Chromium includes its own copy, and the build process would -get confused. diff --git a/runtime/bin/net/nss.gyp b/runtime/bin/net/nss.gyp deleted file mode 100644 index eb1c726025a..00000000000 --- a/runtime/bin/net/nss.gyp +++ /dev/null @@ -1,1400 +0,0 @@ -# Copyright (c) 2012 The Chromium Authors. All rights reserved. -# Use of this source code is governed by a BSD-style license that can be -# found in the LICENSE file. - -# Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file -# for details. All rights reserved. Use of this source code is governed by a -# BSD-style license that can be found in the LICENSE file. - -# This file is a modified copy of Chromium's deps/third_party/nss/nss.gyp. -# Revision 291806 (this should agree with "nss_rev" in DEPS). -{ - # Added by Dart. All Dart comments refer to the following block or line. - 'includes': [ - '../../tools/gyp/runtime-configurations.gypi', - '../../tools/gyp/nss_configurations.gypi', - ], - 'variables': { - # Added by Dart. - 'nss_directory': '../../../third_party/nss', - 'conditions': [ - ['OS=="ios"', { - 'exclude_nss_root_certs%': 0, - 'exclude_nss_libpkix%': 0, - }, { - 'exclude_nss_root_certs%': 1, - 'exclude_nss_libpkix%': 1, - }], - ], - }, - 'target_defaults': { - 'configurations': { - 'Debug': { - 'defines': [ - 'DEBUG', - '_DEBUG', - ], - }, - 'Release': { - 'defines': [ - 'NDEBUG', - ], - }, - # Added by Dart. - 'Dart_ia32_Base': { - 'defines': [ - 'NSS_X86_OR_X64', - 'NSS_X86', - '_X86_', - ], - }, - # Added by Dart. - 'Dart_x64_Base': { - 'defines': [ - 'NSS_X86_OR_X64', - 'NSS_X64', - 'NSS_USE_64', - ], - }, - # Added by Dart. - 'Dart_simarm64_Base': { - 'defines': [ - 'NSS_X86_OR_X64', - 'NSS_X64', - 'NSS_USE_64', - ], - }, - 'Dart_arm64_Base': { - 'defines': [ - 'NSS_USE_64', - ], - }, - }, - 'conditions': [ - ['OS=="win"', { - 'configurations': { - 'Common_Base': { - 'msvs_configuration_attributes': { - # Do not compile NSPR and NSS with /D _UNICODE /D UNICODE. - 'CharacterSet': '0' - } - }, - # Added by Dart. All target_arch specific defines are taken - # from below. - 'Dart_ia32_Base': { - 'defines': [ - 'NSS_X86_OR_X64', - 'NSS_X86', - '_X86_', - 'MP_ASSEMBLY_MULTIPLY', - 'MP_ASSEMBLY_SQUARE', - 'MP_ASSEMBLY_DIV_2DX1D', - 'MP_USE_UINT_DIGIT', - 'MP_NO_MP_WORD', - ], - }, - 'Dart_x64_Base': { - 'defines': [ - 'NSS_USE_64', - 'NSS_X86_OR_X64', - 'NSS_X64', - '_AMD64_', - 'MP_CHAR_STORE_SLOW', - 'MP_IS_LITTLE_ENDIAN', - 'WIN64', - ], - }, - }, - 'defines!': [ - 'WIN32_LEAN_AND_MEAN', - ], - }], - ], - }, - # Added by Dart. We do not indent, so diffs with the original are clearer. - 'conditions': [[ 'dart_io_support==1', { - 'targets': [ - { - 'target_name': 'nspr_dart', # Added by Dart (the _dart postfix) - 'product_name': 'crnspr', - 'type': '<(component)', - 'toolsets':['host','target'], - # Changed by Dart: '<(nss_directory)/' added to all paths. - 'sources': [ - # Added by Dart. - 'os_linux.S', - '<(nss_directory)/nspr/lib/ds/plarena.c', - '<(nss_directory)/nspr/lib/ds/plarena.h', - '<(nss_directory)/nspr/lib/ds/plarenas.h', - '<(nss_directory)/nspr/lib/ds/plhash.c', - '<(nss_directory)/nspr/lib/ds/plhash.h', - '<(nss_directory)/nspr/lib/libc/include/plbase64.h', - '<(nss_directory)/nspr/lib/libc/include/plerror.h', - '<(nss_directory)/nspr/lib/libc/include/plgetopt.h', - '<(nss_directory)/nspr/lib/libc/include/plstr.h', - '<(nss_directory)/nspr/lib/libc/src/base64.c', - '<(nss_directory)/nspr/lib/libc/src/plerror.c', - '<(nss_directory)/nspr/lib/libc/src/plgetopt.c', - '<(nss_directory)/nspr/lib/libc/src/strcase.c', - '<(nss_directory)/nspr/lib/libc/src/strcat.c', - '<(nss_directory)/nspr/lib/libc/src/strchr.c', - '<(nss_directory)/nspr/lib/libc/src/strcmp.c', - '<(nss_directory)/nspr/lib/libc/src/strcpy.c', - '<(nss_directory)/nspr/lib/libc/src/strdup.c', - '<(nss_directory)/nspr/lib/libc/src/strlen.c', - '<(nss_directory)/nspr/lib/libc/src/strpbrk.c', - '<(nss_directory)/nspr/lib/libc/src/strstr.c', - '<(nss_directory)/nspr/lib/libc/src/strtok.c', - '<(nss_directory)/nspr/pr/include/md/prosdep.h', - '<(nss_directory)/nspr/pr/include/md/_darwin.cfg', - '<(nss_directory)/nspr/pr/include/md/_darwin.h', - # Added by Dart. - '<(nss_directory)/nspr/pr/include/md/_linux.cfg', - # Added by Dart. - '<(nss_directory)/nspr/pr/include/md/_linux.h', - '<(nss_directory)/nspr/pr/include/md/_pcos.h', - '<(nss_directory)/nspr/pr/include/md/_pth.h', - '<(nss_directory)/nspr/pr/include/md/_unixos.h', - '<(nss_directory)/nspr/pr/include/md/_unix_errors.h', - '<(nss_directory)/nspr/pr/include/md/_win32_errors.h', - '<(nss_directory)/nspr/pr/include/md/_win95.cfg', - '<(nss_directory)/nspr/pr/include/md/_win95.h', - '<(nss_directory)/nspr/pr/include/nspr.h', - '<(nss_directory)/nspr/pr/include/obsolete/pralarm.h', - '<(nss_directory)/nspr/pr/include/obsolete/probslet.h', - '<(nss_directory)/nspr/pr/include/obsolete/protypes.h', - '<(nss_directory)/nspr/pr/include/obsolete/prsem.h', - '<(nss_directory)/nspr/pr/include/pratom.h', - '<(nss_directory)/nspr/pr/include/prbit.h', - '<(nss_directory)/nspr/pr/include/prclist.h', - '<(nss_directory)/nspr/pr/include/prcmon.h', - '<(nss_directory)/nspr/pr/include/prcountr.h', - '<(nss_directory)/nspr/pr/include/prcpucfg.h', - '<(nss_directory)/nspr/pr/include/prcvar.h', - '<(nss_directory)/nspr/pr/include/prdtoa.h', - '<(nss_directory)/nspr/pr/include/prenv.h', - '<(nss_directory)/nspr/pr/include/prerr.h', - '<(nss_directory)/nspr/pr/include/prerror.h', - '<(nss_directory)/nspr/pr/include/prinet.h', - '<(nss_directory)/nspr/pr/include/prinit.h', - '<(nss_directory)/nspr/pr/include/prinrval.h', - '<(nss_directory)/nspr/pr/include/prio.h', - '<(nss_directory)/nspr/pr/include/pripcsem.h', - '<(nss_directory)/nspr/pr/include/private/pprio.h', - '<(nss_directory)/nspr/pr/include/private/pprmwait.h', - '<(nss_directory)/nspr/pr/include/private/pprthred.h', - '<(nss_directory)/nspr/pr/include/private/primpl.h', - '<(nss_directory)/nspr/pr/include/private/prpriv.h', - '<(nss_directory)/nspr/pr/include/prlink.h', - '<(nss_directory)/nspr/pr/include/prlock.h', - '<(nss_directory)/nspr/pr/include/prlog.h', - '<(nss_directory)/nspr/pr/include/prlong.h', - '<(nss_directory)/nspr/pr/include/prmem.h', - '<(nss_directory)/nspr/pr/include/prmon.h', - '<(nss_directory)/nspr/pr/include/prmwait.h', - '<(nss_directory)/nspr/pr/include/prnetdb.h', - '<(nss_directory)/nspr/pr/include/prolock.h', - '<(nss_directory)/nspr/pr/include/prpdce.h', - '<(nss_directory)/nspr/pr/include/prprf.h', - '<(nss_directory)/nspr/pr/include/prproces.h', - '<(nss_directory)/nspr/pr/include/prrng.h', - '<(nss_directory)/nspr/pr/include/prrwlock.h', - '<(nss_directory)/nspr/pr/include/prshm.h', - '<(nss_directory)/nspr/pr/include/prshma.h', - '<(nss_directory)/nspr/pr/include/prsystem.h', - '<(nss_directory)/nspr/pr/include/prthread.h', - '<(nss_directory)/nspr/pr/include/prtime.h', - '<(nss_directory)/nspr/pr/include/prtpool.h', - '<(nss_directory)/nspr/pr/include/prtrace.h', - '<(nss_directory)/nspr/pr/include/prtypes.h', - '<(nss_directory)/nspr/pr/include/prvrsion.h', - '<(nss_directory)/nspr/pr/include/prwin16.h', - '<(nss_directory)/nspr/pr/src/io/prdir.c', - '<(nss_directory)/nspr/pr/src/io/prfdcach.c', - '<(nss_directory)/nspr/pr/src/io/prfile.c', - '<(nss_directory)/nspr/pr/src/io/prio.c', - '<(nss_directory)/nspr/pr/src/io/priometh.c', - '<(nss_directory)/nspr/pr/src/io/pripv6.c', - '<(nss_directory)/nspr/pr/src/io/prlayer.c', - '<(nss_directory)/nspr/pr/src/io/prlog.c', - '<(nss_directory)/nspr/pr/src/io/prmapopt.c', - '<(nss_directory)/nspr/pr/src/io/prmmap.c', - '<(nss_directory)/nspr/pr/src/io/prmwait.c', - '<(nss_directory)/nspr/pr/src/io/prpolevt.c', - '<(nss_directory)/nspr/pr/src/io/prprf.c', - '<(nss_directory)/nspr/pr/src/io/prscanf.c', - '<(nss_directory)/nspr/pr/src/io/prsocket.c', - '<(nss_directory)/nspr/pr/src/io/prstdio.c', - '<(nss_directory)/nspr/pr/src/linking/prlink.c', - '<(nss_directory)/nspr/pr/src/malloc/prmalloc.c', - '<(nss_directory)/nspr/pr/src/malloc/prmem.c', - '<(nss_directory)/nspr/pr/src/md/prosdep.c', - '<(nss_directory)/nspr/pr/src/md/unix/darwin.c', - # Added by Dart. - '<(nss_directory)/nspr/pr/src/md/unix/linux.c', - '<(nss_directory)/nspr/pr/src/md/unix/os_Darwin.s', - '<(nss_directory)/nspr/pr/src/md/unix/os_Darwin_x86.s', - '<(nss_directory)/nspr/pr/src/md/unix/os_Darwin_x86_64.s', - # Added by Dart. - '<(nss_directory)/nspr/pr/src/md/unix/os_Linux_x86.s', - # Added by Dart. - '<(nss_directory)/nspr/pr/src/md/unix/os_Linux_x86_64.s', - '<(nss_directory)/nspr/pr/src/md/unix/unix.c', - '<(nss_directory)/nspr/pr/src/md/unix/unix_errors.c', - '<(nss_directory)/nspr/pr/src/md/unix/uxproces.c', - '<(nss_directory)/nspr/pr/src/md/unix/uxrng.c', - '<(nss_directory)/nspr/pr/src/md/unix/uxshm.c', - '<(nss_directory)/nspr/pr/src/md/unix/uxwrap.c', - '<(nss_directory)/nspr/pr/src/md/windows/ntgc.c', - '<(nss_directory)/nspr/pr/src/md/windows/ntinrval.c', - '<(nss_directory)/nspr/pr/src/md/windows/ntmisc.c', - '<(nss_directory)/nspr/pr/src/md/windows/ntsec.c', - '<(nss_directory)/nspr/pr/src/md/windows/ntsem.c', - '<(nss_directory)/nspr/pr/src/md/windows/w32ipcsem.c', - '<(nss_directory)/nspr/pr/src/md/windows/w32poll.c', - '<(nss_directory)/nspr/pr/src/md/windows/w32rng.c', - '<(nss_directory)/nspr/pr/src/md/windows/w32shm.c', - '<(nss_directory)/nspr/pr/src/md/windows/w95cv.c', - '<(nss_directory)/nspr/pr/src/md/windows/w95dllmain.c', - '<(nss_directory)/nspr/pr/src/md/windows/w95io.c', - '<(nss_directory)/nspr/pr/src/md/windows/w95sock.c', - '<(nss_directory)/nspr/pr/src/md/windows/w95thred.c', - '<(nss_directory)/nspr/pr/src/md/windows/win32_errors.c', - '<(nss_directory)/nspr/pr/src/memory/prseg.c', - '<(nss_directory)/nspr/pr/src/memory/prshm.c', - '<(nss_directory)/nspr/pr/src/memory/prshma.c', - '<(nss_directory)/nspr/pr/src/misc/pralarm.c', - '<(nss_directory)/nspr/pr/src/misc/pratom.c', - '<(nss_directory)/nspr/pr/src/misc/praton.c', - '<(nss_directory)/nspr/pr/src/misc/prcountr.c', - '<(nss_directory)/nspr/pr/src/misc/prdtoa.c', - '<(nss_directory)/nspr/pr/src/misc/prenv.c', - '<(nss_directory)/nspr/pr/src/misc/prerr.c', - '<(nss_directory)/nspr/pr/src/misc/prerror.c', - '<(nss_directory)/nspr/pr/src/misc/prerrortable.c', - '<(nss_directory)/nspr/pr/src/misc/prinit.c', - '<(nss_directory)/nspr/pr/src/misc/prinrval.c', - '<(nss_directory)/nspr/pr/src/misc/pripc.c', - '<(nss_directory)/nspr/pr/src/misc/pripcsem.c', - '<(nss_directory)/nspr/pr/src/misc/prlog2.c', - '<(nss_directory)/nspr/pr/src/misc/prlong.c', - '<(nss_directory)/nspr/pr/src/misc/prnetdb.c', - '<(nss_directory)/nspr/pr/src/misc/prolock.c', - '<(nss_directory)/nspr/pr/src/misc/prrng.c', - '<(nss_directory)/nspr/pr/src/misc/prsystem.c', - '<(nss_directory)/nspr/pr/src/misc/prthinfo.c', - '<(nss_directory)/nspr/pr/src/misc/prtime.c', - '<(nss_directory)/nspr/pr/src/misc/prtpool.c', - '<(nss_directory)/nspr/pr/src/misc/prtrace.c', - '<(nss_directory)/nspr/pr/src/pthreads/ptio.c', - '<(nss_directory)/nspr/pr/src/pthreads/ptmisc.c', - '<(nss_directory)/nspr/pr/src/pthreads/ptsynch.c', - '<(nss_directory)/nspr/pr/src/pthreads/ptthread.c', - '<(nss_directory)/nspr/pr/src/threads/combined/prucpu.c', - '<(nss_directory)/nspr/pr/src/threads/combined/prucv.c', - '<(nss_directory)/nspr/pr/src/threads/combined/prulock.c', - '<(nss_directory)/nspr/pr/src/threads/combined/prustack.c', - '<(nss_directory)/nspr/pr/src/threads/combined/pruthr.c', - '<(nss_directory)/nspr/pr/src/threads/prcmon.c', - '<(nss_directory)/nspr/pr/src/threads/prcthr.c', - '<(nss_directory)/nspr/pr/src/threads/prdump.c', - '<(nss_directory)/nspr/pr/src/threads/prmon.c', - '<(nss_directory)/nspr/pr/src/threads/prrwlock.c', - '<(nss_directory)/nspr/pr/src/threads/prsem.c', - '<(nss_directory)/nspr/pr/src/threads/prtpd.c', - ], - 'defines': [ - '_NSPR_BUILD_', - 'FORCE_PR_LOG', - ], - 'include_dirs': [ - '<(nss_directory)/nspr/pr/include', - '<(nss_directory)/nspr/pr/include/private', - '<(nss_directory)/nspr/lib/ds', - '<(nss_directory)/nspr/lib/libc/include', - ], - 'direct_dependent_settings': { - 'defines': [ - 'NO_NSPR_10_SUPPORT', - ], - 'include_dirs': [ - '<(nss_directory)/nspr/pr/include', - '<(nss_directory)/nspr/lib/ds', - '<(nss_directory)/nspr/lib/libc/include', - ], - }, - # TODO(wtc): suppress C4244 and C4554 in prdtoa.c. - 'msvs_disabled_warnings': [4018, 4244, 4554, 4267,], - 'variables': { - 'clang_warning_flags': [ - # nspr passes "const char*" through "void*". - '-Wno-incompatible-pointer-types', - # nspr passes "int*" through "unsigned int*". - '-Wno-pointer-sign', - ], - }, - 'conditions': [ - ['OS=="mac" or OS=="ios"', { - 'defines': [ - 'XP_UNIX', - 'DARWIN', - 'XP_MACOSX', - '_PR_PTHREADS', - 'HAVE_BSD_FLOCK', - 'HAVE_CRT_EXTERNS_H', - 'HAVE_DLADDR', - 'HAVE_LCHOWN', - 'HAVE_SOCKLEN_T', - 'HAVE_STRERROR', - ], - # Changed by Dart: - # nss_directory contains .., which is bad in a regular expression. - # So we use the partial match by dropping '^' from '^nspr/... - 'sources/': [ - ['exclude', 'nspr/pr/src/md/windows/'], - ['exclude', 'nspr/pr/src/threads/combined/'], - ], - 'sources!': [ - # Added by Dart. - 'os_linux.S', - '<(nss_directory)/nspr/pr/src/io/prdir.c', - '<(nss_directory)/nspr/pr/src/io/prfile.c', - '<(nss_directory)/nspr/pr/src/io/prio.c', - '<(nss_directory)/nspr/pr/src/io/prsocket.c', - # os_Darwin_x86.s and os_Darwin_x86_64.s are included by - # os_Darwin.s. - # Added by Dart. - '<(nss_directory)/nspr/pr/src/md/unix/linux.c', - '<(nss_directory)/nspr/pr/src/md/unix/os_Darwin_x86.s', - '<(nss_directory)/nspr/pr/src/md/unix/os_Darwin_x86_64.s', - # Added by Dart. - '<(nss_directory)/nspr/pr/src/md/unix/os_Linux_x86.s', - # Added by Dart. - '<(nss_directory)/nspr/pr/src/md/unix/os_Linux_x86_64.s', - '<(nss_directory)/nspr/pr/src/misc/pripcsem.c', - '<(nss_directory)/nspr/pr/src/threads/prcthr.c', - '<(nss_directory)/nspr/pr/src/threads/prdump.c', - '<(nss_directory)/nspr/pr/src/threads/prmon.c', - '<(nss_directory)/nspr/pr/src/threads/prsem.c', - ], - }], - # Added by Dart. - ['OS=="linux"', { - 'defines': [ - 'HAVE_VISIBILITY_HIDDEN_ATTRIBUTE=1', - 'HAVE_VISIBILITY_PRAGMA=1', - 'XP_UNIX=1', - '_GNU_SOURCE=1', - 'HAVE_FCNTL_FILE_LOCKING=1', - 'LINUX=1', - 'i386=1', - 'HAVE_LCHOWN=1', - 'HAVE_STRERROR=1', - 'HAVE_DLADDR=1', - 'HAVE_SETPRIORITY=1', - 'HAVE_SYSCALL=1', - '_REENTRANT=1', - 'FORCE_PR_LOG', - '_PR_PTHREADS', - '_NSPR_BUILD_', - ], - 'sources/': [ - ['exclude', 'nspr/pr/src/md/windows/'], - ['exclude', 'nspr/pr/src/threads/combined/'], - ], - 'sources!': [ - '<(nss_directory)/nspr/pr/src/md/unix/darwin.c', - '<(nss_directory)/nspr/pr/src/md/unix/os_Darwin.s', - '<(nss_directory)/nspr/pr/src/md/unix/os_Darwin_x86.s', - '<(nss_directory)/nspr/pr/src/md/unix/os_Darwin_x86_64.s', - # os_Linux_x86.s and os_Linux_x86_64.s are included by - # runtime/bin/net/os_Linux.S. - '<(nss_directory)/nspr/pr/src/md/unix/os_Linux_x86.s', - '<(nss_directory)/nspr/pr/src/md/unix/os_Linux_x86_64.s', - '<(nss_directory)/nspr/pr/src/misc/pripcsem.c', - '<(nss_directory)/nspr/pr/src/threads/prcthr.c', - '<(nss_directory)/nspr/pr/src/threads/prdump.c', - '<(nss_directory)/nspr/pr/src/threads/prmon.c', - '<(nss_directory)/nspr/pr/src/threads/prsem.c', - '<(nss_directory)/nspr/pr/src/io/prdir.c', - '<(nss_directory)/nspr/pr/src/io/prfile.c', - '<(nss_directory)/nspr/pr/src/io/prio.c', - '<(nss_directory)/nspr/pr/src/io/prsocket.c', - ], - }], - ['OS=="mac"', { - 'link_settings': { - 'libraries': [ - '$(SDKROOT)/System/Library/Frameworks/CoreFoundation.framework', - '$(SDKROOT)/System/Library/Frameworks/CoreServices.framework', - ], - }, - }], - ['OS=="ios"', { - 'defines!': [ - 'HAVE_CRT_EXTERNS_H', - ], - }], - ['OS=="win"', { - 'defines': [ - 'XP_PC', - 'WIN32', - 'WIN95', - '_PR_GLOBAL_THREADS_ONLY', - '_CRT_SECURE_NO_WARNINGS', - '_CRT_NONSTDC_NO_WARNINGS', - ], - 'sources!': [ - # Added by Dart. - 'os_linux.S', - ], - # Changed by Dart: - # nss_directory contains .., which is bad in a regular expression. - # So we use the partial match by dropping '^' from '^nspr/... - 'sources/': [ - ['exclude', 'nspr/pr/src/md/unix/'], - ['exclude', 'nspr/pr/src/pthreads/'], - ], - # Changed by Dart. We don't use target_arch. - # 'conditions': [ - # ['target_arch=="ia32"', { - # 'defines': [ - # '_X86_', - # ], - # }], - # ], - }], - ['component == "static_library"', { - 'defines': [ - 'NSPR_STATIC', - ], - 'direct_dependent_settings': { - 'defines': [ - 'NSPR_STATIC', - ], - }, - }], - ['clang==1', { - 'xcode_settings': { - 'WARNING_CFLAGS': [ - # nspr uses a bunch of deprecated functions (NSLinkModule etc) in - # prlink.c on mac. - '-Wno-deprecated-declarations', - ], - }, - }], - ], - }, - { - 'target_name': 'nss_dart', # Added by Dart (the _dart postfix) - 'product_name': 'crnss', - 'type': '<(component)', - 'toolsets':['host','target'], - 'dependencies': [ - 'nss_static_dart', # Added by Dart (the _dart postfix) - ], - 'export_dependent_settings': [ - 'nss_static_dart', # Added by Dart (the _dart postfix) - ], - 'sources': [ - # Ensure at least one object file is produced, so that MSVC does not - # warn when creating the static/shared library. See the note for - # the 'nssckbi' target for why the 'nss' target was split as such. - '<(nss_directory)/nss/lib/nss/nssver.c', - ], - 'conditions': [ - ['exclude_nss_root_certs==0', { - 'dependencies': [ - 'nssckbi_dart', # Added by Dart (the _dart postfix) - ], - 'export_dependent_settings': [ - 'nssckbi_dart', # Added by Dart (the _dart postfix) - ], - }], - ['OS == "mac" and component == "shared_library"', { - 'xcode_settings': { - 'OTHER_LDFLAGS': ['-all_load'], - }, - }], - ['OS == "win" and component == "shared_library"', { - 'sources': [ - '<(nss_directory)/nss/exports_win.def', - ], - }], - ], - }, - { - # This is really more of a pseudo-target to work around the fact that - # a single static_library target cannot contain two object files of the - # same name (hash.o / hash.obj). Logically, this is part of the - # 'nss_static' target. By separating it out, it creates a possible - # circular dependency between 'nss_static' and 'nssckbi' when - # 'exclude_nss_root_certs' is not specified, as 'nss_static' depends on - # the 'builtinsC_GetFunctionList' exported by this target. This is an - # artifact of how NSS is being statically built, which is not an - # officially supported configuration - normally, 'nssckbi.dll/so' would - # depend on libnss3.dll/so, and the higher layer caller would instruct - # libnss3.dll to dynamically load nssckbi.dll, breaking the circle. - # - # TODO(rsleevi): http://crbug.com/128134 - Break the circular dependency - # without requiring nssckbi to be built as a shared library. - 'target_name': 'nssckbi_dart', # Added by Dart (the _dart postfix) - 'product_name': 'crnssckbi', - 'type': 'static_library', - 'toolsets':['host','target'], - # This target is an implementation detail - the public dependencies - # should be on 'nss'. - 'suppress_wildcard': 1, - 'sources': [ - '<(nss_directory)/nss/lib/ckfw/builtins/anchor.c', - '<(nss_directory)/nss/lib/ckfw/builtins/bfind.c', - '<(nss_directory)/nss/lib/ckfw/builtins/binst.c', - '<(nss_directory)/nss/lib/ckfw/builtins/bobject.c', - '<(nss_directory)/nss/lib/ckfw/builtins/bsession.c', - '<(nss_directory)/nss/lib/ckfw/builtins/bslot.c', - '<(nss_directory)/nss/lib/ckfw/builtins/btoken.c', - '<(nss_directory)/nss/lib/ckfw/builtins/builtins.h', - '<(nss_directory)/nss/lib/ckfw/builtins/certdata.c', - '<(nss_directory)/nss/lib/ckfw/builtins/ckbiver.c', - '<(nss_directory)/nss/lib/ckfw/builtins/constants.c', - '<(nss_directory)/nss/lib/ckfw/builtins/nssckbi.h', - '<(nss_directory)/nss/lib/ckfw/ck.h', - '<(nss_directory)/nss/lib/ckfw/ckfw.h', - '<(nss_directory)/nss/lib/ckfw/ckfwm.h', - '<(nss_directory)/nss/lib/ckfw/ckfwtm.h', - '<(nss_directory)/nss/lib/ckfw/ckmd.h', - '<(nss_directory)/nss/lib/ckfw/ckt.h', - '<(nss_directory)/nss/lib/ckfw/crypto.c', - '<(nss_directory)/nss/lib/ckfw/find.c', - '<(nss_directory)/nss/lib/ckfw/hash.c', - '<(nss_directory)/nss/lib/ckfw/instance.c', - '<(nss_directory)/nss/lib/ckfw/mechanism.c', - '<(nss_directory)/nss/lib/ckfw/mutex.c', - '<(nss_directory)/nss/lib/ckfw/nssck.api', - '<(nss_directory)/nss/lib/ckfw/nssckepv.h', - '<(nss_directory)/nss/lib/ckfw/nssckft.h', - '<(nss_directory)/nss/lib/ckfw/nssckfw.h', - '<(nss_directory)/nss/lib/ckfw/nssckfwc.h', - '<(nss_directory)/nss/lib/ckfw/nssckfwt.h', - '<(nss_directory)/nss/lib/ckfw/nssckg.h', - '<(nss_directory)/nss/lib/ckfw/nssckmdt.h', - '<(nss_directory)/nss/lib/ckfw/nssckt.h', - '<(nss_directory)/nss/lib/ckfw/object.c', - '<(nss_directory)/nss/lib/ckfw/session.c', - '<(nss_directory)/nss/lib/ckfw/sessobj.c', - '<(nss_directory)/nss/lib/ckfw/slot.c', - '<(nss_directory)/nss/lib/ckfw/token.c', - '<(nss_directory)/nss/lib/ckfw/wrap.c', - ], - 'dependencies': [ - 'nss_static_dart', # Added by Dart (the _dart postfix) - ], - 'export_dependent_settings': [ - 'nss_static_dart', # Added by Dart (the _dart postfix) - ], - 'include_dirs': [ - '<(nss_directory)/nss/lib/ckfw', - ], - 'direct_dependent_settings': { - 'include_dirs': [ - '<(nss_directory)/nss/lib/ckfw/builtins', - ], - }, - }, - # Removed by Dart: the target nss_static_avx_dart. - # This is an optimization of AES on 32 bit Windows using new - # Intel assembly instructions. Not enabling it on Dart. - { - 'target_name': 'nss_static_dart', # Added by Dart (the _dart postfix) - 'type': 'static_library', - 'toolsets':['host','target'], - # This target is an implementation detail - the public dependencies - # should be on 'nss'. - 'suppress_wildcard': 1, - 'sources': [ - 'os_windows.c', - '<(nss_directory)/nss/lib/base/arena.c', - '<(nss_directory)/nss/lib/base/base.h', - '<(nss_directory)/nss/lib/base/baset.h', - '<(nss_directory)/nss/lib/base/error.c', - '<(nss_directory)/nss/lib/base/errorval.c', - '<(nss_directory)/nss/lib/base/hash.c', - '<(nss_directory)/nss/lib/base/hashops.c', - '<(nss_directory)/nss/lib/base/item.c', - '<(nss_directory)/nss/lib/base/libc.c', - '<(nss_directory)/nss/lib/base/list.c', - '<(nss_directory)/nss/lib/base/nssbase.h', - '<(nss_directory)/nss/lib/base/nssbaset.h', - '<(nss_directory)/nss/lib/base/nssutf8.c', - '<(nss_directory)/nss/lib/base/tracker.c', - '<(nss_directory)/nss/lib/certdb/alg1485.c', - '<(nss_directory)/nss/lib/certdb/cert.h', - '<(nss_directory)/nss/lib/certdb/certdb.c', - '<(nss_directory)/nss/lib/certdb/certdb.h', - '<(nss_directory)/nss/lib/certdb/certi.h', - '<(nss_directory)/nss/lib/certdb/certt.h', - '<(nss_directory)/nss/lib/certdb/certv3.c', - '<(nss_directory)/nss/lib/certdb/certxutl.c', - '<(nss_directory)/nss/lib/certdb/certxutl.h', - '<(nss_directory)/nss/lib/certdb/crl.c', - '<(nss_directory)/nss/lib/certdb/genname.c', - '<(nss_directory)/nss/lib/certdb/genname.h', - '<(nss_directory)/nss/lib/certdb/polcyxtn.c', - '<(nss_directory)/nss/lib/certdb/secname.c', - '<(nss_directory)/nss/lib/certdb/stanpcertdb.c', - '<(nss_directory)/nss/lib/certdb/xauthkid.c', - '<(nss_directory)/nss/lib/certdb/xbsconst.c', - '<(nss_directory)/nss/lib/certdb/xconst.c', - '<(nss_directory)/nss/lib/certdb/xconst.h', - '<(nss_directory)/nss/lib/certhigh/certhigh.c', - '<(nss_directory)/nss/lib/certhigh/certhtml.c', - '<(nss_directory)/nss/lib/certhigh/certreq.c', - '<(nss_directory)/nss/lib/certhigh/certvfy.c', - '<(nss_directory)/nss/lib/certhigh/certvfypkix.c', - '<(nss_directory)/nss/lib/certhigh/certvfypkixprint.c', - '<(nss_directory)/nss/lib/certhigh/crlv2.c', - '<(nss_directory)/nss/lib/certhigh/ocsp.c', - '<(nss_directory)/nss/lib/certhigh/ocsp.h', - '<(nss_directory)/nss/lib/certhigh/ocspi.h', - '<(nss_directory)/nss/lib/certhigh/ocspt.h', - '<(nss_directory)/nss/lib/certhigh/ocspti.h', - '<(nss_directory)/nss/lib/certhigh/xcrldist.c', - '<(nss_directory)/nss/lib/cryptohi/cryptohi.h', - '<(nss_directory)/nss/lib/cryptohi/cryptoht.h', - '<(nss_directory)/nss/lib/cryptohi/dsautil.c', - '<(nss_directory)/nss/lib/cryptohi/key.h', - '<(nss_directory)/nss/lib/cryptohi/keyhi.h', - '<(nss_directory)/nss/lib/cryptohi/keyi.h', - '<(nss_directory)/nss/lib/cryptohi/keyt.h', - '<(nss_directory)/nss/lib/cryptohi/keythi.h', - '<(nss_directory)/nss/lib/cryptohi/sechash.c', - '<(nss_directory)/nss/lib/cryptohi/sechash.h', - '<(nss_directory)/nss/lib/cryptohi/seckey.c', - '<(nss_directory)/nss/lib/cryptohi/secsign.c', - '<(nss_directory)/nss/lib/cryptohi/secvfy.c', - '<(nss_directory)/nss/lib/dev/ckhelper.c', - '<(nss_directory)/nss/lib/dev/ckhelper.h', - '<(nss_directory)/nss/lib/dev/dev.h', - '<(nss_directory)/nss/lib/dev/devm.h', - '<(nss_directory)/nss/lib/dev/devslot.c', - '<(nss_directory)/nss/lib/dev/devt.h', - '<(nss_directory)/nss/lib/dev/devtm.h', - '<(nss_directory)/nss/lib/dev/devtoken.c', - '<(nss_directory)/nss/lib/dev/devutil.c', - '<(nss_directory)/nss/lib/dev/nssdev.h', - '<(nss_directory)/nss/lib/dev/nssdevt.h', - '<(nss_directory)/nss/lib/freebl/aeskeywrap.c', - '<(nss_directory)/nss/lib/freebl/alg2268.c', - '<(nss_directory)/nss/lib/freebl/alghmac.c', - '<(nss_directory)/nss/lib/freebl/alghmac.h', - '<(nss_directory)/nss/lib/freebl/arcfive.c', - '<(nss_directory)/nss/lib/freebl/arcfour.c', - '<(nss_directory)/nss/lib/freebl/blapi.h', - '<(nss_directory)/nss/lib/freebl/blapii.h', - '<(nss_directory)/nss/lib/freebl/blapit.h', - '<(nss_directory)/nss/lib/freebl/camellia.c', - '<(nss_directory)/nss/lib/freebl/camellia.h', - '<(nss_directory)/nss/lib/freebl/chacha20/chacha20.c', - '<(nss_directory)/nss/lib/freebl/chacha20/chacha20.h', - '<(nss_directory)/nss/lib/freebl/chacha20/chacha20_vec.c', - '<(nss_directory)/nss/lib/freebl/chacha20poly1305.c', - '<(nss_directory)/nss/lib/freebl/chacha20poly1305.h', - '<(nss_directory)/nss/lib/freebl/ctr.c', - '<(nss_directory)/nss/lib/freebl/ctr.h', - '<(nss_directory)/nss/lib/freebl/cts.c', - '<(nss_directory)/nss/lib/freebl/cts.h', - '<(nss_directory)/nss/lib/freebl/des.c', - '<(nss_directory)/nss/lib/freebl/des.h', - '<(nss_directory)/nss/lib/freebl/desblapi.c', - '<(nss_directory)/nss/lib/freebl/dh.c', - '<(nss_directory)/nss/lib/freebl/drbg.c', - '<(nss_directory)/nss/lib/freebl/dsa.c', - '<(nss_directory)/nss/lib/freebl/ec.c', - '<(nss_directory)/nss/lib/freebl/ec.h', - '<(nss_directory)/nss/lib/freebl/ecdecode.c', - '<(nss_directory)/nss/lib/freebl/ecl/ec2.h', - '<(nss_directory)/nss/lib/freebl/ecl/ecl-curve.h', - '<(nss_directory)/nss/lib/freebl/ecl/ecl-exp.h', - '<(nss_directory)/nss/lib/freebl/ecl/ecl-priv.h', - '<(nss_directory)/nss/lib/freebl/ecl/ecl.c', - '<(nss_directory)/nss/lib/freebl/ecl/ecl.h', - '<(nss_directory)/nss/lib/freebl/ecl/ecl_curve.c', - '<(nss_directory)/nss/lib/freebl/ecl/ecl_gf.c', - '<(nss_directory)/nss/lib/freebl/ecl/ecl_mult.c', - '<(nss_directory)/nss/lib/freebl/ecl/ecp.h', - '<(nss_directory)/nss/lib/freebl/ecl/ecp_256.c', - '<(nss_directory)/nss/lib/freebl/ecl/ecp_256_32.c', - '<(nss_directory)/nss/lib/freebl/ecl/ecp_384.c', - '<(nss_directory)/nss/lib/freebl/ecl/ecp_521.c', - '<(nss_directory)/nss/lib/freebl/ecl/ecp_aff.c', - '<(nss_directory)/nss/lib/freebl/ecl/ecp_jac.c', - '<(nss_directory)/nss/lib/freebl/ecl/ecp_jm.c', - '<(nss_directory)/nss/lib/freebl/ecl/ecp_mont.c', - '<(nss_directory)/nss/lib/freebl/ecl/ec_naf.c', - '<(nss_directory)/nss/lib/freebl/gcm.c', - '<(nss_directory)/nss/lib/freebl/gcm.h', - # Changed by Dart: intel-aes assembly language files dropped. - '<(nss_directory)/nss/lib/freebl/hmacct.c', - '<(nss_directory)/nss/lib/freebl/hmacct.h', - '<(nss_directory)/nss/lib/freebl/jpake.c', - '<(nss_directory)/nss/lib/freebl/md2.c', - '<(nss_directory)/nss/lib/freebl/md5.c', - '<(nss_directory)/nss/lib/freebl/mpi/logtab.h', - '<(nss_directory)/nss/lib/freebl/mpi/mpcpucache.c', - '<(nss_directory)/nss/lib/freebl/mpi/mpi-config.h', - '<(nss_directory)/nss/lib/freebl/mpi/mpi-priv.h', - '<(nss_directory)/nss/lib/freebl/mpi/mpi.c', - '<(nss_directory)/nss/lib/freebl/mpi/mpi.h', - '<(nss_directory)/nss/lib/freebl/mpi/mpi_amd64.c', - '<(nss_directory)/nss/lib/freebl/mpi/mpi_x86_asm.c', - '<(nss_directory)/nss/lib/freebl/mpi/mplogic.c', - '<(nss_directory)/nss/lib/freebl/mpi/mplogic.h', - '<(nss_directory)/nss/lib/freebl/mpi/mpmontg.c', - '<(nss_directory)/nss/lib/freebl/mpi/mpprime.c', - '<(nss_directory)/nss/lib/freebl/mpi/mpprime.h', - '<(nss_directory)/nss/lib/freebl/mpi/mp_gf2m-priv.h', - '<(nss_directory)/nss/lib/freebl/mpi/mp_gf2m.c', - '<(nss_directory)/nss/lib/freebl/mpi/mp_gf2m.h', - '<(nss_directory)/nss/lib/freebl/mpi/primes.c', - '<(nss_directory)/nss/lib/freebl/nss_build_config_mac.h', - '<(nss_directory)/nss/lib/freebl/poly1305/poly1305-donna-x64-sse2-incremental-source.c', - '<(nss_directory)/nss/lib/freebl/poly1305/poly1305.c', - '<(nss_directory)/nss/lib/freebl/poly1305/poly1305.h', - '<(nss_directory)/nss/lib/freebl/pqg.c', - '<(nss_directory)/nss/lib/freebl/pqg.h', - '<(nss_directory)/nss/lib/freebl/rawhash.c', - '<(nss_directory)/nss/lib/freebl/rijndael.c', - '<(nss_directory)/nss/lib/freebl/rijndael.h', - '<(nss_directory)/nss/lib/freebl/rijndael32.tab', - '<(nss_directory)/nss/lib/freebl/rsa.c', - '<(nss_directory)/nss/lib/freebl/rsapkcs.c', - '<(nss_directory)/nss/lib/freebl/secmpi.h', - '<(nss_directory)/nss/lib/freebl/secrng.h', - '<(nss_directory)/nss/lib/freebl/seed.c', - '<(nss_directory)/nss/lib/freebl/seed.h', - '<(nss_directory)/nss/lib/freebl/sha256.h', - '<(nss_directory)/nss/lib/freebl/sha512.c', - '<(nss_directory)/nss/lib/freebl/sha_fast.c', - '<(nss_directory)/nss/lib/freebl/sha_fast.h', - '<(nss_directory)/nss/lib/freebl/shsign.h', - '<(nss_directory)/nss/lib/freebl/shvfy.c', - '<(nss_directory)/nss/lib/freebl/sysrand.c', - '<(nss_directory)/nss/lib/freebl/tlsprfalg.c', - '<(nss_directory)/nss/lib/freebl/unix_rand.c', - '<(nss_directory)/nss/lib/freebl/win_rand.c', - '<(nss_directory)/nss/lib/libpkix/include/pkix.h', - '<(nss_directory)/nss/lib/libpkix/include/pkix_certsel.h', - '<(nss_directory)/nss/lib/libpkix/include/pkix_certstore.h', - '<(nss_directory)/nss/lib/libpkix/include/pkix_checker.h', - '<(nss_directory)/nss/lib/libpkix/include/pkix_crlsel.h', - '<(nss_directory)/nss/lib/libpkix/include/pkix_errorstrings.h', - '<(nss_directory)/nss/lib/libpkix/include/pkix_params.h', - '<(nss_directory)/nss/lib/libpkix/include/pkix_pl_pki.h', - '<(nss_directory)/nss/lib/libpkix/include/pkix_pl_system.h', - '<(nss_directory)/nss/lib/libpkix/include/pkix_results.h', - '<(nss_directory)/nss/lib/libpkix/include/pkix_revchecker.h', - '<(nss_directory)/nss/lib/libpkix/include/pkix_sample_modules.h', - '<(nss_directory)/nss/lib/libpkix/include/pkix_util.h', - '<(nss_directory)/nss/lib/libpkix/include/pkixt.h', - '<(nss_directory)/nss/lib/libpkix/pkix/certsel/pkix_certselector.c', - '<(nss_directory)/nss/lib/libpkix/pkix/certsel/pkix_certselector.h', - '<(nss_directory)/nss/lib/libpkix/pkix/certsel/pkix_comcertselparams.c', - '<(nss_directory)/nss/lib/libpkix/pkix/certsel/pkix_comcertselparams.h', - '<(nss_directory)/nss/lib/libpkix/pkix/checker/pkix_basicconstraintschecker.c', - '<(nss_directory)/nss/lib/libpkix/pkix/checker/pkix_basicconstraintschecker.h', - '<(nss_directory)/nss/lib/libpkix/pkix/checker/pkix_certchainchecker.c', - '<(nss_directory)/nss/lib/libpkix/pkix/checker/pkix_certchainchecker.h', - '<(nss_directory)/nss/lib/libpkix/pkix/checker/pkix_crlchecker.c', - '<(nss_directory)/nss/lib/libpkix/pkix/checker/pkix_crlchecker.h', - '<(nss_directory)/nss/lib/libpkix/pkix/checker/pkix_ekuchecker.c', - '<(nss_directory)/nss/lib/libpkix/pkix/checker/pkix_ekuchecker.h', - '<(nss_directory)/nss/lib/libpkix/pkix/checker/pkix_expirationchecker.c', - '<(nss_directory)/nss/lib/libpkix/pkix/checker/pkix_expirationchecker.h', - '<(nss_directory)/nss/lib/libpkix/pkix/checker/pkix_namechainingchecker.c', - '<(nss_directory)/nss/lib/libpkix/pkix/checker/pkix_namechainingchecker.h', - '<(nss_directory)/nss/lib/libpkix/pkix/checker/pkix_nameconstraintschecker.c', - '<(nss_directory)/nss/lib/libpkix/pkix/checker/pkix_nameconstraintschecker.h', - '<(nss_directory)/nss/lib/libpkix/pkix/checker/pkix_ocspchecker.c', - '<(nss_directory)/nss/lib/libpkix/pkix/checker/pkix_ocspchecker.h', - '<(nss_directory)/nss/lib/libpkix/pkix/checker/pkix_policychecker.c', - '<(nss_directory)/nss/lib/libpkix/pkix/checker/pkix_policychecker.h', - '<(nss_directory)/nss/lib/libpkix/pkix/checker/pkix_revocationchecker.c', - '<(nss_directory)/nss/lib/libpkix/pkix/checker/pkix_revocationchecker.h', - '<(nss_directory)/nss/lib/libpkix/pkix/checker/pkix_revocationmethod.c', - '<(nss_directory)/nss/lib/libpkix/pkix/checker/pkix_revocationmethod.h', - '<(nss_directory)/nss/lib/libpkix/pkix/checker/pkix_signaturechecker.c', - '<(nss_directory)/nss/lib/libpkix/pkix/checker/pkix_signaturechecker.h', - '<(nss_directory)/nss/lib/libpkix/pkix/checker/pkix_targetcertchecker.c', - '<(nss_directory)/nss/lib/libpkix/pkix/checker/pkix_targetcertchecker.h', - '<(nss_directory)/nss/lib/libpkix/pkix/crlsel/pkix_comcrlselparams.c', - '<(nss_directory)/nss/lib/libpkix/pkix/crlsel/pkix_comcrlselparams.h', - '<(nss_directory)/nss/lib/libpkix/pkix/crlsel/pkix_crlselector.c', - '<(nss_directory)/nss/lib/libpkix/pkix/crlsel/pkix_crlselector.h', - '<(nss_directory)/nss/lib/libpkix/pkix/params/pkix_procparams.c', - '<(nss_directory)/nss/lib/libpkix/pkix/params/pkix_procparams.h', - '<(nss_directory)/nss/lib/libpkix/pkix/params/pkix_resourcelimits.c', - '<(nss_directory)/nss/lib/libpkix/pkix/params/pkix_resourcelimits.h', - '<(nss_directory)/nss/lib/libpkix/pkix/params/pkix_trustanchor.c', - '<(nss_directory)/nss/lib/libpkix/pkix/params/pkix_trustanchor.h', - '<(nss_directory)/nss/lib/libpkix/pkix/params/pkix_valparams.c', - '<(nss_directory)/nss/lib/libpkix/pkix/params/pkix_valparams.h', - '<(nss_directory)/nss/lib/libpkix/pkix/results/pkix_buildresult.c', - '<(nss_directory)/nss/lib/libpkix/pkix/results/pkix_buildresult.h', - '<(nss_directory)/nss/lib/libpkix/pkix/results/pkix_policynode.c', - '<(nss_directory)/nss/lib/libpkix/pkix/results/pkix_policynode.h', - '<(nss_directory)/nss/lib/libpkix/pkix/results/pkix_valresult.c', - '<(nss_directory)/nss/lib/libpkix/pkix/results/pkix_valresult.h', - '<(nss_directory)/nss/lib/libpkix/pkix/results/pkix_verifynode.c', - '<(nss_directory)/nss/lib/libpkix/pkix/results/pkix_verifynode.h', - '<(nss_directory)/nss/lib/libpkix/pkix/store/pkix_store.c', - '<(nss_directory)/nss/lib/libpkix/pkix/store/pkix_store.h', - '<(nss_directory)/nss/lib/libpkix/pkix/top/pkix_build.c', - '<(nss_directory)/nss/lib/libpkix/pkix/top/pkix_build.h', - '<(nss_directory)/nss/lib/libpkix/pkix/top/pkix_lifecycle.c', - '<(nss_directory)/nss/lib/libpkix/pkix/top/pkix_lifecycle.h', - '<(nss_directory)/nss/lib/libpkix/pkix/top/pkix_validate.c', - '<(nss_directory)/nss/lib/libpkix/pkix/top/pkix_validate.h', - '<(nss_directory)/nss/lib/libpkix/pkix/util/pkix_error.c', - '<(nss_directory)/nss/lib/libpkix/pkix/util/pkix_error.h', - '<(nss_directory)/nss/lib/libpkix/pkix/util/pkix_errpaths.c', - '<(nss_directory)/nss/lib/libpkix/pkix/util/pkix_list.c', - '<(nss_directory)/nss/lib/libpkix/pkix/util/pkix_list.h', - '<(nss_directory)/nss/lib/libpkix/pkix/util/pkix_logger.c', - '<(nss_directory)/nss/lib/libpkix/pkix/util/pkix_logger.h', - '<(nss_directory)/nss/lib/libpkix/pkix/util/pkix_tools.c', - '<(nss_directory)/nss/lib/libpkix/pkix/util/pkix_tools.h', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/module/pkix_pl_aiamgr.c', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/module/pkix_pl_aiamgr.h', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/module/pkix_pl_colcertstore.c', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/module/pkix_pl_colcertstore.h', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/module/pkix_pl_httpcertstore.c', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/module/pkix_pl_httpcertstore.h', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/module/pkix_pl_httpdefaultclient.c', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/module/pkix_pl_httpdefaultclient.h', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/module/pkix_pl_ldapcertstore.c', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/module/pkix_pl_ldapcertstore.h', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/module/pkix_pl_ldapdefaultclient.c', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/module/pkix_pl_ldapdefaultclient.h', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/module/pkix_pl_ldaprequest.c', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/module/pkix_pl_ldaprequest.h', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/module/pkix_pl_ldapresponse.c', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/module/pkix_pl_ldapresponse.h', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/module/pkix_pl_ldapt.h', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/module/pkix_pl_ldaptemplates.c', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/module/pkix_pl_nsscontext.c', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/module/pkix_pl_nsscontext.h', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/module/pkix_pl_pk11certstore.c', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/module/pkix_pl_pk11certstore.h', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/module/pkix_pl_socket.c', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/module/pkix_pl_socket.h', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/pki/pkix_pl_basicconstraints.c', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/pki/pkix_pl_basicconstraints.h', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/pki/pkix_pl_cert.c', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/pki/pkix_pl_cert.h', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/pki/pkix_pl_certpolicyinfo.c', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/pki/pkix_pl_certpolicyinfo.h', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/pki/pkix_pl_certpolicymap.c', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/pki/pkix_pl_certpolicymap.h', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/pki/pkix_pl_certpolicyqualifier.c', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/pki/pkix_pl_certpolicyqualifier.h', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/pki/pkix_pl_crl.c', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/pki/pkix_pl_crl.h', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/pki/pkix_pl_crldp.c', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/pki/pkix_pl_crldp.h', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/pki/pkix_pl_crlentry.c', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/pki/pkix_pl_crlentry.h', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/pki/pkix_pl_date.c', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/pki/pkix_pl_date.h', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/pki/pkix_pl_generalname.c', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/pki/pkix_pl_generalname.h', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/pki/pkix_pl_infoaccess.c', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/pki/pkix_pl_infoaccess.h', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/pki/pkix_pl_nameconstraints.c', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/pki/pkix_pl_nameconstraints.h', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/pki/pkix_pl_ocspcertid.c', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/pki/pkix_pl_ocspcertid.h', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/pki/pkix_pl_ocsprequest.c', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/pki/pkix_pl_ocsprequest.h', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/pki/pkix_pl_ocspresponse.c', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/pki/pkix_pl_ocspresponse.h', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/pki/pkix_pl_publickey.c', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/pki/pkix_pl_publickey.h', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/pki/pkix_pl_x500name.c', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/pki/pkix_pl_x500name.h', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/system/pkix_pl_bigint.c', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/system/pkix_pl_bigint.h', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/system/pkix_pl_bytearray.c', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/system/pkix_pl_bytearray.h', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/system/pkix_pl_common.c', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/system/pkix_pl_common.h', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/system/pkix_pl_error.c', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/system/pkix_pl_hashtable.c', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/system/pkix_pl_hashtable.h', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/system/pkix_pl_lifecycle.c', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/system/pkix_pl_lifecycle.h', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/system/pkix_pl_mem.c', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/system/pkix_pl_mem.h', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/system/pkix_pl_monitorlock.c', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/system/pkix_pl_monitorlock.h', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/system/pkix_pl_mutex.c', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/system/pkix_pl_mutex.h', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/system/pkix_pl_object.c', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/system/pkix_pl_object.h', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/system/pkix_pl_oid.c', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/system/pkix_pl_oid.h', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/system/pkix_pl_primhash.c', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/system/pkix_pl_primhash.h', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/system/pkix_pl_rwlock.c', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/system/pkix_pl_rwlock.h', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/system/pkix_pl_string.c', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/system/pkix_pl_string.h', - '<(nss_directory)/nss/lib/nss/nss.h', - '<(nss_directory)/nss/lib/nss/nssinit.c', - '<(nss_directory)/nss/lib/nss/nssrenam.h', - '<(nss_directory)/nss/lib/nss/utilwrap.c', - '<(nss_directory)/nss/lib/pk11wrap/debug_module.c', - '<(nss_directory)/nss/lib/pk11wrap/dev3hack.c', - '<(nss_directory)/nss/lib/pk11wrap/dev3hack.h', - '<(nss_directory)/nss/lib/pk11wrap/pk11akey.c', - '<(nss_directory)/nss/lib/pk11wrap/pk11auth.c', - '<(nss_directory)/nss/lib/pk11wrap/pk11cert.c', - '<(nss_directory)/nss/lib/pk11wrap/pk11cxt.c', - '<(nss_directory)/nss/lib/pk11wrap/pk11err.c', - '<(nss_directory)/nss/lib/pk11wrap/pk11func.h', - '<(nss_directory)/nss/lib/pk11wrap/pk11kea.c', - '<(nss_directory)/nss/lib/pk11wrap/pk11list.c', - '<(nss_directory)/nss/lib/pk11wrap/pk11load.c', - '<(nss_directory)/nss/lib/pk11wrap/pk11mech.c', - '<(nss_directory)/nss/lib/pk11wrap/pk11merge.c', - '<(nss_directory)/nss/lib/pk11wrap/pk11nobj.c', - '<(nss_directory)/nss/lib/pk11wrap/pk11obj.c', - '<(nss_directory)/nss/lib/pk11wrap/pk11pars.c', - '<(nss_directory)/nss/lib/pk11wrap/pk11pbe.c', - '<(nss_directory)/nss/lib/pk11wrap/pk11pk12.c', - '<(nss_directory)/nss/lib/pk11wrap/pk11pqg.c', - '<(nss_directory)/nss/lib/pk11wrap/pk11pqg.h', - '<(nss_directory)/nss/lib/pk11wrap/pk11priv.h', - '<(nss_directory)/nss/lib/pk11wrap/pk11pub.h', - '<(nss_directory)/nss/lib/pk11wrap/pk11sdr.c', - '<(nss_directory)/nss/lib/pk11wrap/pk11sdr.h', - '<(nss_directory)/nss/lib/pk11wrap/pk11skey.c', - '<(nss_directory)/nss/lib/pk11wrap/pk11slot.c', - '<(nss_directory)/nss/lib/pk11wrap/pk11util.c', - '<(nss_directory)/nss/lib/pk11wrap/secmod.h', - '<(nss_directory)/nss/lib/pk11wrap/secmodi.h', - '<(nss_directory)/nss/lib/pk11wrap/secmodt.h', - '<(nss_directory)/nss/lib/pk11wrap/secmodti.h', - '<(nss_directory)/nss/lib/pk11wrap/secpkcs5.h', - '<(nss_directory)/nss/lib/pkcs7/certread.c', - '<(nss_directory)/nss/lib/pkcs7/p7common.c', - '<(nss_directory)/nss/lib/pkcs7/p7create.c', - '<(nss_directory)/nss/lib/pkcs7/p7decode.c', - '<(nss_directory)/nss/lib/pkcs7/p7encode.c', - '<(nss_directory)/nss/lib/pkcs7/p7local.c', - '<(nss_directory)/nss/lib/pkcs7/p7local.h', - '<(nss_directory)/nss/lib/pkcs7/pkcs7t.h', - '<(nss_directory)/nss/lib/pkcs7/secmime.c', - '<(nss_directory)/nss/lib/pkcs7/secmime.h', - '<(nss_directory)/nss/lib/pkcs7/secpkcs7.h', - '<(nss_directory)/nss/lib/pki/asymmkey.c', - '<(nss_directory)/nss/lib/pki/certdecode.c', - '<(nss_directory)/nss/lib/pki/certificate.c', - '<(nss_directory)/nss/lib/pki/cryptocontext.c', - '<(nss_directory)/nss/lib/pki/nsspki.h', - '<(nss_directory)/nss/lib/pki/nsspkit.h', - '<(nss_directory)/nss/lib/pki/pki.h', - '<(nss_directory)/nss/lib/pki/pki3hack.c', - '<(nss_directory)/nss/lib/pki/pki3hack.h', - '<(nss_directory)/nss/lib/pki/pkibase.c', - '<(nss_directory)/nss/lib/pki/pkim.h', - '<(nss_directory)/nss/lib/pki/pkistore.c', - '<(nss_directory)/nss/lib/pki/pkistore.h', - '<(nss_directory)/nss/lib/pki/pkit.h', - '<(nss_directory)/nss/lib/pki/pkitm.h', - '<(nss_directory)/nss/lib/pki/symmkey.c', - '<(nss_directory)/nss/lib/pki/tdcache.c', - '<(nss_directory)/nss/lib/pki/trustdomain.c', - '<(nss_directory)/nss/lib/smime/cms.h', - '<(nss_directory)/nss/lib/smime/cmslocal.h', - '<(nss_directory)/nss/lib/smime/cmsreclist.h', - '<(nss_directory)/nss/lib/smime/cmst.h', - '<(nss_directory)/nss/lib/smime/smime.h', - '<(nss_directory)/nss/lib/softoken/fipsaudt.c', - '<(nss_directory)/nss/lib/softoken/fipstest.c', - '<(nss_directory)/nss/lib/softoken/fipstokn.c', - '<(nss_directory)/nss/lib/softoken/jpakesftk.c', - '<(nss_directory)/nss/lib/softoken/lgglue.c', - '<(nss_directory)/nss/lib/softoken/lgglue.h', - '<(nss_directory)/nss/lib/softoken/lowkey.c', - '<(nss_directory)/nss/lib/softoken/lowkeyi.h', - '<(nss_directory)/nss/lib/softoken/lowkeyti.h', - '<(nss_directory)/nss/lib/softoken/lowpbe.c', - '<(nss_directory)/nss/lib/softoken/lowpbe.h', - '<(nss_directory)/nss/lib/softoken/padbuf.c', - '<(nss_directory)/nss/lib/softoken/pkcs11.c', - '<(nss_directory)/nss/lib/softoken/pkcs11c.c', - '<(nss_directory)/nss/lib/softoken/pkcs11i.h', - '<(nss_directory)/nss/lib/softoken/pkcs11ni.h', - '<(nss_directory)/nss/lib/softoken/pkcs11u.c', - '<(nss_directory)/nss/lib/softoken/sdb.c', - '<(nss_directory)/nss/lib/softoken/sdb.h', - '<(nss_directory)/nss/lib/softoken/sftkdb.c', - '<(nss_directory)/nss/lib/softoken/sftkdb.h', - '<(nss_directory)/nss/lib/softoken/sftkdbt.h', - '<(nss_directory)/nss/lib/softoken/sftkdbti.h', - '<(nss_directory)/nss/lib/softoken/sftkhmac.c', - '<(nss_directory)/nss/lib/softoken/sftkpars.c', - '<(nss_directory)/nss/lib/softoken/sftkpars.h', - '<(nss_directory)/nss/lib/softoken/sftkpwd.c', - '<(nss_directory)/nss/lib/softoken/softkver.c', - '<(nss_directory)/nss/lib/softoken/softkver.h', - '<(nss_directory)/nss/lib/softoken/softoken.h', - '<(nss_directory)/nss/lib/softoken/softoknt.h', - '<(nss_directory)/nss/lib/softoken/tlsprf.c', - '<(nss_directory)/nss/lib/ssl/sslerr.h', - '<(nss_directory)/nss/lib/util/SECerrs.h', - '<(nss_directory)/nss/lib/util/base64.h', - '<(nss_directory)/nss/lib/util/ciferfam.h', - '<(nss_directory)/nss/lib/util/derdec.c', - '<(nss_directory)/nss/lib/util/derenc.c', - '<(nss_directory)/nss/lib/util/dersubr.c', - '<(nss_directory)/nss/lib/util/dertime.c', - '<(nss_directory)/nss/lib/util/errstrs.c', - '<(nss_directory)/nss/lib/util/hasht.h', - '<(nss_directory)/nss/lib/util/nssb64.h', - '<(nss_directory)/nss/lib/util/nssb64d.c', - '<(nss_directory)/nss/lib/util/nssb64e.c', - '<(nss_directory)/nss/lib/util/nssb64t.h', - '<(nss_directory)/nss/lib/util/nssilckt.h', - '<(nss_directory)/nss/lib/util/nssilock.c', - '<(nss_directory)/nss/lib/util/nssilock.h', - '<(nss_directory)/nss/lib/util/nsslocks.h', - '<(nss_directory)/nss/lib/util/nssrwlk.c', - '<(nss_directory)/nss/lib/util/nssrwlk.h', - '<(nss_directory)/nss/lib/util/nssrwlkt.h', - '<(nss_directory)/nss/lib/util/nssutil.h', - '<(nss_directory)/nss/lib/util/oidstring.c', - '<(nss_directory)/nss/lib/util/pkcs11.h', - '<(nss_directory)/nss/lib/util/pkcs11f.h', - '<(nss_directory)/nss/lib/util/pkcs11n.h', - '<(nss_directory)/nss/lib/util/pkcs11p.h', - '<(nss_directory)/nss/lib/util/pkcs11t.h', - '<(nss_directory)/nss/lib/util/pkcs11u.h', - '<(nss_directory)/nss/lib/util/pkcs1sig.c', - '<(nss_directory)/nss/lib/util/pkcs1sig.h', - '<(nss_directory)/nss/lib/util/portreg.c', - '<(nss_directory)/nss/lib/util/portreg.h', - '<(nss_directory)/nss/lib/util/quickder.c', - '<(nss_directory)/nss/lib/util/secalgid.c', - '<(nss_directory)/nss/lib/util/secasn1.h', - '<(nss_directory)/nss/lib/util/secasn1d.c', - '<(nss_directory)/nss/lib/util/secasn1e.c', - '<(nss_directory)/nss/lib/util/secasn1t.h', - '<(nss_directory)/nss/lib/util/secasn1u.c', - '<(nss_directory)/nss/lib/util/seccomon.h', - '<(nss_directory)/nss/lib/util/secder.h', - '<(nss_directory)/nss/lib/util/secdert.h', - '<(nss_directory)/nss/lib/util/secdig.c', - '<(nss_directory)/nss/lib/util/secdig.h', - '<(nss_directory)/nss/lib/util/secdigt.h', - '<(nss_directory)/nss/lib/util/secerr.h', - '<(nss_directory)/nss/lib/util/secitem.c', - '<(nss_directory)/nss/lib/util/secitem.h', - '<(nss_directory)/nss/lib/util/secoid.c', - '<(nss_directory)/nss/lib/util/secoid.h', - '<(nss_directory)/nss/lib/util/secoidt.h', - '<(nss_directory)/nss/lib/util/secport.c', - '<(nss_directory)/nss/lib/util/secport.h', - '<(nss_directory)/nss/lib/util/sectime.c', - '<(nss_directory)/nss/lib/util/templates.c', - '<(nss_directory)/nss/lib/util/utf8.c', - '<(nss_directory)/nss/lib/util/utilmod.c', - '<(nss_directory)/nss/lib/util/utilmodt.h', - '<(nss_directory)/nss/lib/util/utilpars.c', - '<(nss_directory)/nss/lib/util/utilpars.h', - '<(nss_directory)/nss/lib/util/utilparst.h', - '<(nss_directory)/nss/lib/util/utilrename.h', - ], - 'sources!': [ - # mpi_arm.c is included by mpi_arm_mac.c. - # NOTE: mpi_arm.c can be used directly on Linux. mpi_arm.c will need - # to be excluded conditionally if we start to build NSS on Linux. - '<(nss_directory)/nss/lib/freebl/mpi/mpi_arm.c', - # primes.c is included by mpprime.c. - '<(nss_directory)/nss/lib/freebl/mpi/primes.c', - # unix_rand.c and win_rand.c are included by sysrand.c. - '<(nss_directory)/nss/lib/freebl/unix_rand.c', - '<(nss_directory)/nss/lib/freebl/win_rand.c', - # debug_module.c is included by pk11load.c. - '<(nss_directory)/nss/lib/pk11wrap/debug_module.c', - ], - 'dependencies': [ - 'nspr_dart', # Added by Dart (the _dart postfix) - # Removed by Dart: nss_static_avx target dependency. - 'sqlite.gyp:sqlite_dart', # Changed by Dart prefix ../sqllite removed _dart postfix added. - ], - 'export_dependent_settings': [ - 'nspr_dart', # Added by Dart (the _dart postfix) - ], - 'defines': [ - 'MP_API_COMPATIBLE', - 'NSS_DISABLE_DBM', - 'NSS_STATIC', - 'NSS_USE_STATIC_LIBS', - 'RIJNDAEL_INCLUDE_TABLES', - 'SHLIB_VERSION=\"3\"', - 'SOFTOKEN_SHLIB_VERSION=\"3\"', - 'USE_UTIL_DIRECTLY', - ], - 'include_dirs': [ - '<(nss_directory)/nss/lib/base', - '<(nss_directory)/nss/lib/certdb', - '<(nss_directory)/nss/lib/certhigh', - '<(nss_directory)/nss/lib/cryptohi', - '<(nss_directory)/nss/lib/dev', - '<(nss_directory)/nss/lib/freebl', - '<(nss_directory)/nss/lib/freebl/ecl', - '<(nss_directory)/nss/lib/freebl/mpi', - '<(nss_directory)/nss/lib/libpkix/include', - '<(nss_directory)/nss/lib/libpkix/pkix/certsel', - '<(nss_directory)/nss/lib/libpkix/pkix/checker', - '<(nss_directory)/nss/lib/libpkix/pkix/crlsel', - '<(nss_directory)/nss/lib/libpkix/pkix/params', - '<(nss_directory)/nss/lib/libpkix/pkix/results', - '<(nss_directory)/nss/lib/libpkix/pkix/store', - '<(nss_directory)/nss/lib/libpkix/pkix/top', - '<(nss_directory)/nss/lib/libpkix/pkix/util', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/module', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/pki', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/system', - '<(nss_directory)/nss/lib/nss', - '<(nss_directory)/nss/lib/pk11wrap', - '<(nss_directory)/nss/lib/pkcs7', - '<(nss_directory)/nss/lib/pki', - '<(nss_directory)/nss/lib/smime', - '<(nss_directory)/nss/lib/softoken', - '<(nss_directory)/nss/lib/ssl', - '<(nss_directory)/nss/lib/util', - ], - 'direct_dependent_settings': { - 'defines': [ - 'NSS_STATIC', - 'NSS_USE_STATIC_LIBS', - 'USE_UTIL_DIRECTLY', - ], - 'include_dirs': [ - '<(nss_directory)/nspr/pr/include', - '<(nss_directory)/nspr/lib/ds', - '<(nss_directory)/nspr/lib/libc/include', - '<(nss_directory)/nss/lib/base', - '<(nss_directory)/nss/lib/certdb', - '<(nss_directory)/nss/lib/certhigh', - '<(nss_directory)/nss/lib/cryptohi', - '<(nss_directory)/nss/lib/dev', - '<(nss_directory)/nss/lib/freebl', - '<(nss_directory)/nss/lib/freebl/ecl', - '<(nss_directory)/nss/lib/nss', - '<(nss_directory)/nss/lib/pk11wrap', - '<(nss_directory)/nss/lib/pkcs7', - '<(nss_directory)/nss/lib/pki', - '<(nss_directory)/nss/lib/smime', - '<(nss_directory)/nss/lib/softoken', - '<(nss_directory)/nss/lib/util', - ], - }, - 'msvs_disabled_warnings': [4018, 4101, 4267, ], - 'variables': { - 'clang_warning_flags': [ - # nss doesn't explicitly cast between different enum types. - '-Wno-conversion', - # nss passes "const char*" through "void*". - '-Wno-incompatible-pointer-types', - # nss prefers `a && b || c` over `(a && b) || c`. - '-Wno-logical-op-parentheses', - # nss doesn't use exhaustive switches on enums - '-Wno-switch', - # nss has some `unsigned < 0` checks. - '-Wno-tautological-compare', - ], - }, - 'conditions': [ - ['exclude_nss_root_certs==1', { - 'defines': [ - 'NSS_DISABLE_ROOT_CERTS', - ], - }], - ['exclude_nss_libpkix==1', { - 'defines': [ - 'NSS_DISABLE_LIBPKIX', - ], - # Changed by Dart: - # nss_directory contains .., which is bad in a regular expression. - # So we use the partial match by dropping '^' from '^nss/... - 'sources/': [ - ['exclude', 'nss/lib/libpkix/'], - ], - 'sources!': [ - '<(nss_directory)/nss/lib/certhigh/certvfypkix.c', - '<(nss_directory)/nss/lib/certhigh/certvfypkixprint.c', - ], - # Changed by Dart: - # nss_directory contains .., which is bad in a regular expression. - # So we use the partial match by dropping '^' from '^nss/... - 'include_dirs/': [ - ['exclude', 'nss/lib/libpkix/'], - ], - }, { # else: exclude_nss_libpkix==0 - # Disable the LDAP code in libpkix. - 'defines': [ - 'NSS_PKIX_NO_LDAP', - ], - 'sources!': [ - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/module/pkix_pl_ldapcertstore.c', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/module/pkix_pl_ldapcertstore.h', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/module/pkix_pl_ldapdefaultclient.c', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/module/pkix_pl_ldapdefaultclient.h', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/module/pkix_pl_ldaprequest.c', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/module/pkix_pl_ldaprequest.h', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/module/pkix_pl_ldapresponse.c', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/module/pkix_pl_ldapresponse.h', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/module/pkix_pl_ldapt.h', - '<(nss_directory)/nss/lib/libpkix/pkix_pl_nss/module/pkix_pl_ldaptemplates.c', - ], - }], - ['target_arch=="ia32"', { - 'sources!': [ - '<(nss_directory)/nss/lib/freebl/mpi/mpi_amd64.c', - ], - }], - # Added by Dart. - ['OS=="linux"', { - 'defines': [ - 'XP_UNIX', - 'HAVE_STRERROR', - 'SHLIB_SUFFIX=\"so\"', - 'SHLIB_PREFIX=\"lib\"', - 'SHLIB_VERSION=\"3\"', - 'SOFTOKEN_SHLIB_VERSION=\"3\"', - 'SOFTOKEN_LIB_NAME=\"libsoftokn3.so\"', - ], - 'sources!': [ - '<(nss_directory)/nss/lib/freebl/mpi/mpi_amd64.c', - '<(nss_directory)/nss/lib/freebl/mpi/mpi_x86_asm.c', - ], - }], - ['target_arch=="x64" and OS!="win"', { - 'sources!': [ - '<(nss_directory)/nss/lib/freebl/chacha20/chacha20.c', - '<(nss_directory)/nss/lib/freebl/poly1305/poly1305.c', - ], - }, { # else: target_arch!="x64" or OS=="win" - 'sources!': [ - '<(nss_directory)/nss/lib/freebl/chacha20/chacha20_vec.c', - '<(nss_directory)/nss/lib/freebl/poly1305/poly1305-donna-x64-sse2-incremental-source.c', - ], - }], - ['OS=="mac" or OS=="ios"', { - 'defines': [ - 'XP_UNIX', - 'DARWIN', - 'HAVE_STRERROR', - 'HAVE_BSD_FLOCK', - 'SHLIB_SUFFIX=\"dylib\"', - 'SHLIB_PREFIX=\"lib\"', - 'SOFTOKEN_LIB_NAME=\"libsoftokn3.dylib\"', - ], - 'sources!': [ - '<(nss_directory)/nss/lib/freebl/mpi/mpi_amd64.c', - '<(nss_directory)/nss/lib/freebl/mpi/mpi_x86_asm.c', - ], - 'variables': { - 'forced_include_file': 'nss_build_config_mac.h', - }, - 'xcode_settings': { - 'conditions': [ - ['component == "shared_library"', { - 'GCC_SYMBOLS_PRIVATE_EXTERN': 'NO', # no -fvisibility=hidden - }], - ], - # Define processor architecture specific macros in - # <(forced_include_file). - 'OTHER_CFLAGS': [ - '-include', '<(forced_include_file)', - ], - }, - }, { # else: OS!="mac" and OS!="ios" - 'sources!': [ - '<(nss_directory)/nss/lib/freebl/mpi/mpi_arm_mac.c', - ], - }], - ['OS=="win"', { - 'defines': [ - 'SHLIB_SUFFIX=\"dll\"', - 'SHLIB_PREFIX=\"\"', - 'SOFTOKEN_LIB_NAME=\"softokn3.dll\"', - 'XP_PC', - 'WIN32', - 'WIN95', - '_WINDOWS', - ], - 'direct_dependent_settings': { - 'defines': [ - '_WINDOWS', - ], - }, - 'sources!': [ - '<(nss_directory)/nss/lib/freebl/mpi/mpi_amd64.c', - # If needed it will be included by os_windows.c. - '<(nss_directory)/nss/lib/freebl/mpi/mpi_x86_asm.c', - ], - # Changed by Dart. We don't use target_arch. This was moved into - # configurations instead. - # 'conditions': [ - # ['target_arch=="ia32"', { - # 'defines': [ - # 'NSS_X86_OR_X64', - # 'NSS_X86', - # '_X86_', - # 'MP_ASSEMBLY_MULTIPLY', - # 'MP_ASSEMBLY_SQUARE', - # 'MP_ASSEMBLY_DIV_2DX1D', - # 'MP_USE_UINT_DIGIT', - # 'MP_NO_MP_WORD', - # Changed by Dart: 'USE_HW_AES' and 'INTEL_GCM' are not enabled. - # 'USE_HW_AES', - # 'INTEL_GCM', - # ], - # }], - # 'msvs_settings': { - # 'MASM': { - # 'UseSafeExceptionHandlers': 'true', - # }, - # }, - # ['target_arch=="x64"', { - # 'defines': [ - # 'NSS_USE_64', - # 'NSS_X86_OR_X64', - # 'NSS_X64', - # '_AMD64_', - # 'MP_CHAR_STORE_SLOW', - # 'MP_IS_LITTLE_ENDIAN', - # 'WIN64', - # ], - # 'sources!': [ - # '<(nss_directory)/nss/lib/freebl/mpi/mpi_amd64.c', - # '<(nss_directory)/nss/lib/freebl/mpi/mpi_x86_asm.c', - # ], - # }], - # ], - }, { # else: OS!="win" - 'sources!': [ - 'os_windows.c', - # mpi_x86_asm.c contains MSVC inline assembly code. - '<(nss_directory)/nss/lib/freebl/mpi/mpi_x86_asm.c', - ], - }], - ], - }, - ], - }]], -} diff --git a/runtime/bin/net/nss_memio.cc b/runtime/bin/net/nss_memio.cc deleted file mode 100644 index f8f4f8204f4..00000000000 --- a/runtime/bin/net/nss_memio.cc +++ /dev/null @@ -1,538 +0,0 @@ -// Copyright (c) 2008 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. -// Written in NSPR style to also be suitable for adding to the NSS demo suite - -// Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file -// for details. All rights reserved. Use of this source code is governed by a -// BSD-style license that can be found in the LICENSE file. - -// This file is a modified copy of Chromium's src/net/base/nss_memio.c. -// char* has been changed to uint8_t* everywhere, and C++ casts are used. -// Revision 291806 (this should agree with "nss_rev" in DEPS). - - -/* memio is a simple NSPR I/O layer that lets you decouple NSS from - * the real network. It's rather like openssl's memory bio, - * and is useful when your app absolutely, positively doesn't - * want to let NSS do its own networking. - */ -#include "bin/net/nss_memio.h" - -#include -#include -#include - -#include "prerror.h" -#include "prinit.h" -#include "prlog.h" - -/*--------------- private memio types -----------------------*/ - -/*---------------------------------------------------------------------- - Simple private circular buffer class. Size cannot be changed once allocated. -----------------------------------------------------------------------*/ - -struct memio_buffer { - int head; /* where to take next byte out of buf */ - int tail; /* where to put next byte into buf */ - int bufsize; /* number of bytes allocated to buf */ - /* TODO(port): error handling is pessimistic right now. - * Once an error is set, the socket is considered broken - * (PR_WOULD_BLOCK_ERROR not included). - */ - PRErrorCode last_err; - uint8_t* buf; -}; - - -/* The 'secret' field of a PRFileDesc created by memio_CreateIOLayer points - * to one of these. - * In the public header, we use struct memio_Private as a typesafe alias - * for this. This causes a few ugly typecasts in the private file, but - * seems safer. - */ -struct PRFilePrivate { - /* read requests are satisfied from this buffer */ - struct memio_buffer readbuf; - - /* write requests are satisfied from this buffer */ - struct memio_buffer writebuf; - - /* SSL needs to know socket peer's name */ - PRNetAddr peername; - - /* if set, empty I/O returns EOF instead of EWOULDBLOCK */ - int eof; - - /* if set, the number of bytes requested from readbuf that were not - * fulfilled (due to readbuf being empty) */ - int read_requested; -}; - -/*--------------- private memio_buffer functions ---------------------*/ - -/* Forward declarations. */ - -/* Allocate a memio_buffer of given size. */ -static void memio_buffer_new(struct memio_buffer *mb, int size); - -/* Deallocate a memio_buffer allocated by memio_buffer_new. */ -static void memio_buffer_destroy(struct memio_buffer *mb); - -/* How many bytes can be read out of the buffer without wrapping */ -static int memio_buffer_used_contiguous(const struct memio_buffer *mb); - -/* How many bytes exist after the wrap? */ -static int memio_buffer_wrapped_bytes(const struct memio_buffer *mb); - -/* How many bytes can be written into the buffer without wrapping */ -static int memio_buffer_unused_contiguous(const struct memio_buffer *mb); - -/* Write n bytes into the buffer. Returns number of bytes written. */ -static int memio_buffer_put(struct memio_buffer *mb, const uint8_t* buf, int n); - -/* Read n bytes from the buffer. Returns number of bytes read. */ -static int memio_buffer_get(struct memio_buffer *mb, uint8_t* buf, int n); - -/* Allocate a memio_buffer of given size. */ -static void memio_buffer_new(struct memio_buffer *mb, int size) { - mb->head = 0; - mb->tail = 0; - mb->bufsize = size; - mb->buf = static_cast(malloc(size)); -} - -/* Deallocate a memio_buffer allocated by memio_buffer_new. */ -static void memio_buffer_destroy(struct memio_buffer *mb) { - free(mb->buf); - mb->buf = NULL; - mb->bufsize = 0; - mb->head = 0; - mb->tail = 0; -} - -/* How many bytes can be read out of the buffer without wrapping */ -static int memio_buffer_used_contiguous(const struct memio_buffer *mb) { - return (((mb->tail >= mb->head) ? mb->tail : mb->bufsize) - mb->head); -} - -/* How many bytes exist after the wrap? */ -static int memio_buffer_wrapped_bytes(const struct memio_buffer *mb) { - return (mb->tail >= mb->head) ? 0 : mb->tail; -} - -/* How many bytes can be written into the buffer without wrapping */ -static int memio_buffer_unused_contiguous(const struct memio_buffer *mb) { - if (mb->head > mb->tail) return mb->head - mb->tail - 1; - return mb->bufsize - mb->tail - (mb->head == 0); -} - -/* Write n bytes into the buffer. Returns number of bytes written. */ -static int memio_buffer_put(struct memio_buffer *mb, - const uint8_t* buf, - int n) { - int len; - int transferred = 0; - - /* Handle part before wrap */ - len = PR_MIN(n, memio_buffer_unused_contiguous(mb)); - if (len > 0) { - /* Buffer not full */ - memmove(&mb->buf[mb->tail], buf, len); - mb->tail += len; - if (mb->tail == mb->bufsize) - mb->tail = 0; - n -= len; - buf += len; - transferred += len; - - /* Handle part after wrap */ - len = PR_MIN(n, memio_buffer_unused_contiguous(mb)); - if (len > 0) { - /* Output buffer still not full, input buffer still not empty */ - memmove(&mb->buf[mb->tail], buf, len); - mb->tail += len; - if (mb->tail == mb->bufsize) - mb->tail = 0; - transferred += len; - } - } - - return transferred; -} - - -/* Read n bytes from the buffer. Returns number of bytes read. */ -static int memio_buffer_get(struct memio_buffer *mb, uint8_t* buf, int n) { - int len; - int transferred = 0; - - /* Handle part before wrap */ - len = PR_MIN(n, memio_buffer_used_contiguous(mb)); - if (len) { - memmove(buf, &mb->buf[mb->head], len); - mb->head += len; - if (mb->head == mb->bufsize) - mb->head = 0; - n -= len; - buf += len; - transferred += len; - - /* Handle part after wrap */ - len = PR_MIN(n, memio_buffer_used_contiguous(mb)); - if (len) { - memmove(buf, &mb->buf[mb->head], len); - mb->head += len; - if (mb->head == mb->bufsize) - mb->head = 0; - transferred += len; - } - } - - return transferred; -} - -/*--------------- private memio functions -----------------------*/ - -static PRStatus PR_CALLBACK memio_Close(PRFileDesc *fd) { - struct PRFilePrivate *secret = fd->secret; - memio_buffer_destroy(&secret->readbuf); - memio_buffer_destroy(&secret->writebuf); - free(secret); - fd->dtor(fd); - return PR_SUCCESS; -} - -static PRStatus PR_CALLBACK memio_Shutdown(PRFileDesc *fd, PRIntn how) { - /* TODO: pass shutdown status to app somehow */ - return PR_SUCCESS; -} - -/* If there was a network error in the past taking bytes - * out of the buffer, return it to the next call that - * tries to read from an empty buffer. - */ -static int PR_CALLBACK memio_Recv(PRFileDesc *fd, - uint8_t *buf, - PRInt32 len, - PRIntn flags, - PRIntervalTime timeout) { - struct PRFilePrivate *secret; - struct memio_buffer *mb; - int rv; - - if (flags) { - PR_SetError(PR_NOT_IMPLEMENTED_ERROR, 0); - return -1; - } - - secret = fd->secret; - mb = &secret->readbuf; - PR_ASSERT(mb->bufsize); - rv = memio_buffer_get(mb, buf, len); - if (rv == 0 && !secret->eof) { - secret->read_requested = len; - /* If there is no more data in the buffer, report any pending errors - * that were previously observed. Note that both the readbuf and the - * writebuf are checked for errors, since the application may have - * encountered a socket error while writing that would otherwise not - * be reported until the application attempted to write again - which - * it may never do. - */ - if (mb->last_err) - PR_SetError(mb->last_err, 0); - else if (secret->writebuf.last_err) - PR_SetError(secret->writebuf.last_err, 0); - else - PR_SetError(PR_WOULD_BLOCK_ERROR, 0); - return -1; - } - - secret->read_requested = 0; - return rv; -} - -static int PR_CALLBACK memio_Read(PRFileDesc *fd, uint8_t *buf, PRInt32 len) { - /* pull bytes from buffer */ - return memio_Recv(fd, buf, len, 0, PR_INTERVAL_NO_TIMEOUT); -} - -static int PR_CALLBACK memio_Send(PRFileDesc *fd, - const uint8_t *buf, - PRInt32 len, - PRIntn flags, - PRIntervalTime timeout) { - struct PRFilePrivate *secret; - struct memio_buffer *mb; - int rv; - - secret = fd->secret; - mb = &secret->writebuf; - PR_ASSERT(mb->bufsize); - - /* Note that the read error state is not reported, because it cannot be - * reported until all buffered data has been read. If there is an error - * with the next layer, attempting to call Send again will report the - * error appropriately. - */ - if (mb->last_err) { - PR_SetError(mb->last_err, 0); - return -1; - } - rv = memio_buffer_put(mb, buf, len); - if (rv == 0) { - PR_SetError(PR_WOULD_BLOCK_ERROR, 0); - return -1; - } - return rv; -} - -static int PR_CALLBACK memio_Write(PRFileDesc *fd, - const uint8_t *buf, - PRInt32 len) { - /* append bytes to buffer */ - return memio_Send(fd, buf, len, 0, PR_INTERVAL_NO_TIMEOUT); -} - -static PRStatus PR_CALLBACK memio_GetPeerName(PRFileDesc *fd, PRNetAddr *addr) { - /* TODO: fail if memio_SetPeerName has not been called */ - struct PRFilePrivate *secret = fd->secret; - *addr = secret->peername; - return PR_SUCCESS; -} - -static PRStatus memio_GetSocketOption(PRFileDesc *fd, - PRSocketOptionData *data) { - /* - * Even in the original version for real tcp sockets, - * PR_SockOpt_Nonblocking is a special case that does not - * translate to a getsockopt() call - */ - if (PR_SockOpt_Nonblocking == data->option) { - data->value.non_blocking = PR_TRUE; - return PR_SUCCESS; - } - PR_SetError(PR_OPERATION_NOT_SUPPORTED_ERROR, 0); - return PR_FAILURE; -} - -/*--------------- private memio data -----------------------*/ - -/* - * Implement just the bare minimum number of methods needed to make ssl happy. - * - * Oddly, PR_Recv calls ssl_Recv calls ssl_SocketIsBlocking calls - * PR_GetSocketOption, so we have to provide an implementation of - * PR_GetSocketOption that just says "I'm nonblocking". - */ - -static struct PRIOMethods memio_layer_methods = { - PR_DESC_LAYERED, - memio_Close, - (PRReadFN)memio_Read, - (PRWriteFN)memio_Write, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - memio_Shutdown, - (PRRecvFN)memio_Recv, - (PRSendFN)memio_Send, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - memio_GetPeerName, - NULL, - NULL, - memio_GetSocketOption, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, -}; - -static PRDescIdentity memio_identity = PR_INVALID_IO_LAYER; - -static PRStatus memio_InitializeLayerName(void) { - memio_identity = PR_GetUniqueIdentity("memio"); - return PR_SUCCESS; -} - -/*--------------- public memio functions -----------------------*/ - -PRFileDesc *memio_CreateIOLayer(int readbufsize, int writebufsize) { - PRFileDesc *fd; - struct PRFilePrivate *secret; - static PRCallOnceType once; - - PR_CallOnce(&once, memio_InitializeLayerName); - - fd = PR_CreateIOLayerStub(memio_identity, &memio_layer_methods); - secret = static_cast(malloc(sizeof(struct PRFilePrivate))); - memset(secret, 0, sizeof(*secret)); - - memio_buffer_new(&secret->readbuf, readbufsize); - memio_buffer_new(&secret->writebuf, writebufsize); - fd->secret = secret; - return fd; -} - -void memio_SetPeerName(PRFileDesc* fd, const PRNetAddr* peername) { - PRFileDesc *memiofd = PR_GetIdentitiesLayer(fd, memio_identity); - struct PRFilePrivate *secret = memiofd->secret; - secret->peername = *peername; -} - -memio_Private* memio_GetSecret(PRFileDesc* fd) { - PRFileDesc* memiofd = PR_GetIdentitiesLayer(fd, memio_identity); - struct PRFilePrivate *secret = memiofd->secret; - return reinterpret_cast(secret); -} - -int memio_GetReadRequest(memio_Private *secret) { - return reinterpret_cast(secret)->read_requested; -} - -int memio_GetReadParams(memio_Private* secret, uint8_t** buf) { - struct memio_buffer* mb = - &(reinterpret_cast(secret)->readbuf); - PR_ASSERT(mb->bufsize); - - *buf = &mb->buf[mb->tail]; - return memio_buffer_unused_contiguous(mb); -} - -int memio_GetReadableBufferSize(memio_Private *secret) { - struct memio_buffer* mb = - &(reinterpret_cast(secret)->readbuf); - PR_ASSERT(mb->bufsize); - - return memio_buffer_used_contiguous(mb); -} - -void memio_PutReadResult(memio_Private *secret, int bytes_read) { - struct memio_buffer* mb = - &(reinterpret_cast(secret)->readbuf); - PR_ASSERT(mb->bufsize); - - if (bytes_read > 0) { - mb->tail += bytes_read; - if (mb->tail == mb->bufsize) - mb->tail = 0; - } else if (bytes_read == 0) { - /* Record EOF condition and report to caller when buffer runs dry */ - reinterpret_cast(secret)->eof = PR_TRUE; - } else /* if (bytes_read < 0) */ { - mb->last_err = bytes_read; - } -} - -int memio_GetWriteParams(memio_Private *secret, - const uint8_t** buf1, unsigned int *len1, - const uint8_t** buf2, unsigned int *len2) { - struct memio_buffer* mb = - &(reinterpret_cast(secret)->writebuf); - PR_ASSERT(mb->bufsize); - - if (mb->last_err) - return mb->last_err; - - *buf1 = &mb->buf[mb->head]; - *len1 = memio_buffer_used_contiguous(mb); - *buf2 = mb->buf; - *len2 = memio_buffer_wrapped_bytes(mb); - return 0; -} - -void memio_PutWriteResult(memio_Private *secret, int bytes_written) { - struct memio_buffer* mb = - &(reinterpret_cast(secret)->writebuf); - PR_ASSERT(mb->bufsize); - - if (bytes_written > 0) { - mb->head += bytes_written; - if (mb->head >= mb->bufsize) - mb->head -= mb->bufsize; - } else if (bytes_written < 0) { - mb->last_err = bytes_written; - } -} - -/*--------------- private memio_buffer self-test -----------------*/ - -/* Even a trivial unit test is very helpful when doing circular buffers. */ -/*#define TRIVIAL_SELF_TEST*/ -#ifdef TRIVIAL_SELF_TEST - -#define TEST_BUFLEN 7 - -#define CHECKEQ(a, b) { \ - if ((a) != (b)) { \ - printf("%d != %d, Test failed line %d\n", a, b, __LINE__); \ - exit(1); \ - } \ -} - -#define FROM_STR(a) reinterpret_cast(a) - -int main() { - struct memio_buffer mb; - uint8_t buf[100]; - int i; - - memio_buffer_new(&mb, TEST_BUFLEN); - - CHECKEQ(memio_buffer_unused_contiguous(&mb), TEST_BUFLEN-1); - CHECKEQ(memio_buffer_used_contiguous(&mb), 0); - - CHECKEQ(memio_buffer_put(&mb, FROM_STR("howdy"), 5), 5); - - CHECKEQ(memio_buffer_unused_contiguous(&mb), TEST_BUFLEN-1-5); - CHECKEQ(memio_buffer_used_contiguous(&mb), 5); - CHECKEQ(memio_buffer_wrapped_bytes(&mb), 0); - - CHECKEQ(memio_buffer_put(&mb, FROM_STR("!"), 1), 1); - - CHECKEQ(memio_buffer_unused_contiguous(&mb), 0); - CHECKEQ(memio_buffer_used_contiguous(&mb), 6); - CHECKEQ(memio_buffer_wrapped_bytes(&mb), 0); - - CHECKEQ(memio_buffer_get(&mb, buf, 6), 6); - CHECKEQ(memcmp(buf, FROM_STR("howdy!"), 6), 0); - - CHECKEQ(memio_buffer_unused_contiguous(&mb), 1); - CHECKEQ(memio_buffer_used_contiguous(&mb), 0); - - CHECKEQ(memio_buffer_put(&mb, FROM_STR("01234"), 5), 5); - - CHECKEQ(memio_buffer_used_contiguous(&mb), 1); - CHECKEQ(memio_buffer_wrapped_bytes(&mb), 4); - CHECKEQ(memio_buffer_unused_contiguous(&mb), TEST_BUFLEN-1-5); - - CHECKEQ(memio_buffer_put(&mb, FROM_STR("5"), 1), 1); - - CHECKEQ(memio_buffer_unused_contiguous(&mb), 0); - CHECKEQ(memio_buffer_used_contiguous(&mb), 1); - - /* TODO: add more cases */ - - printf("Test passed\n"); - exit(0); -} - -#endif diff --git a/runtime/bin/net/nss_memio.h b/runtime/bin/net/nss_memio.h deleted file mode 100644 index acfd4677a04..00000000000 --- a/runtime/bin/net/nss_memio.h +++ /dev/null @@ -1,111 +0,0 @@ -// Copyright (c) 2011 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. -// Written in NSPR style to also be suitable for adding to the NSS demo suite - -// Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file -// for details. All rights reserved. Use of this source code is governed by a -// BSD-style license that can be found in the LICENSE file. - -// This file is a modified copy of Chromium's src/net/base/nss_memio.h. -// char* has been changed to uint8_t* everywhere, and C++ casts are used. -// Revision 291806 (this should agree with "nss_rev" in DEPS). - -#ifndef BIN_NET_NSS_MEMIO_H_ -#define BIN_NET_NSS_MEMIO_H_ - -#include -#include "vm/globals.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#include "prio.h" - -/* Opaque structure. Really just a more typesafe alias for PRFilePrivate. */ -struct memio_Private; -typedef struct memio_Private memio_Private; - -/*---------------------------------------------------------------------- - NSPR I/O layer that terminates in a pair of circular buffers - rather than talking to the real network. - To use this with NSS: - 1) call memio_CreateIOLayer to create a fake NSPR socket - 2) call SSL_ImportFD to ssl-ify the socket - 3) Do your own networking calls to set up a TCP connection - 4) call memio_SetPeerName to tell NSS about the other end of the connection - 5) While at the same time doing plaintext nonblocking NSPR I/O as - usual to the nspr file descriptor returned by SSL_ImportFD, - your app must shuttle encrypted data between - the real network and memio's network buffers. - memio_GetReadParams/memio_PutReadResult - are the hooks you need to pump data into memio's input buffer, - and memio_GetWriteParams/memio_PutWriteResult - are the hooks you need to pump data out of memio's output buffer. -----------------------------------------------------------------------*/ - -/* Create the I/O layer and its two circular buffers. */ -PRFileDesc *memio_CreateIOLayer(int readbufsize, int writebufsize); - -/* Must call before trying to make an ssl connection */ -void memio_SetPeerName(PRFileDesc *fd, const PRNetAddr *peername); - -/* Return a private pointer needed by the following - * four functions. (We could have passed a PRFileDesc to - * them, but that would be slower. Better for the caller - * to grab the pointer once and cache it. - * This may be a premature optimization.) - */ -memio_Private *memio_GetSecret(PRFileDesc *fd); - -/* Ask memio how many bytes were requested by a higher layer if the - * last attempt to read data resulted in PR_WOULD_BLOCK_ERROR, due to the - * transport buffer being empty. If the last attempt to read data from the - * memio did not result in PR_WOULD_BLOCK_ERROR, returns 0. - */ -int memio_GetReadRequest(memio_Private *secret); - -/* Ask memio where to put bytes from the network, and how many it can handle. - * Returns bytes available to write, or 0 if none available. - * Puts current buffer position into *buf. - */ -int memio_GetReadParams(memio_Private *secret, uint8_t **buf); - -/* Ask memio how many bytes are contained in the internal buffer. - * Returns bytes available to read, or 0 if none available. - */ -int memio_GetReadableBufferSize(memio_Private *secret); - -/* Tell memio how many bytes were read from the network. - * If bytes_read is 0, causes EOF to be reported to - * NSS after it reads the last byte from the circular buffer. - * If bytes_read is < 0, it is treated as an NSPR error code. - * See nspr/pr/src/md/unix/unix_errors.c for how to - * map from Unix errors to NSPR error codes. - * On EWOULDBLOCK or the equivalent, don't call this function. - */ -void memio_PutReadResult(memio_Private *secret, int bytes_read); - -/* Ask memio what data it has to send to the network. - * If there was previous a write error, the NSPR error code is returned. - * Otherwise, it returns 0 and provides up to two buffers of data by - * writing the positions and lengths into |buf1|, |len1| and |buf2|, |len2|. - */ -int memio_GetWriteParams(memio_Private *secret, - const uint8_t **buf1, unsigned int *len1, - const uint8_t **buf2, unsigned int *len2); - -/* Tell memio how many bytes were sent to the network. - * If bytes_written is < 0, it is treated as an NSPR error code. - * See nspr/pr/src/md/unix/unix_errors.c for how to - * map from Unix errors to NSPR error codes. - * On EWOULDBLOCK or the equivalent, don't call this function. - */ -void memio_PutWriteResult(memio_Private *secret, int bytes_written); - -#ifdef __cplusplus -} -#endif - -#endif // BIN_NET_NSS_MEMIO_H_ diff --git a/runtime/bin/net/os_linux.S b/runtime/bin/net/os_linux.S deleted file mode 100644 index a009e83e64b..00000000000 --- a/runtime/bin/net/os_linux.S +++ /dev/null @@ -1,9 +0,0 @@ -// Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file -// for details. All rights reserved. Use of this source code is governed by a -// BSD-style license that can be found in the LICENSE file. - -#ifdef __i386__ -#include "../../../third_party/nss/nspr/pr/src/md/unix/os_Linux_x86.s" -#elif defined(__x86_64__) -#include "../../../third_party/nss/nspr/pr/src/md/unix/os_Linux_x86_64.s" -#endif diff --git a/runtime/bin/net/os_windows.c b/runtime/bin/net/os_windows.c deleted file mode 100644 index 07ccdc50c06..00000000000 --- a/runtime/bin/net/os_windows.c +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file -// for details. All rights reserved. Use of this source code is governed by a -// BSD-style license that can be found in the LICENSE file. - -#if !defined(_MSC_VER) -#error "This file should only be compiled under MSVC" -#endif - -#if defined(_X86_) -#include "../../../third_party/nss/nss/lib/freebl/mpi/mpi_x86_asm.c" -#endif diff --git a/runtime/bin/net/sqlite.gyp b/runtime/bin/net/sqlite.gyp deleted file mode 100644 index bcfc0c790bb..00000000000 --- a/runtime/bin/net/sqlite.gyp +++ /dev/null @@ -1,226 +0,0 @@ -# Copyright (c) 2012 The Chromium Authors. All rights reserved. -# Use of this source code is governed by a BSD-style license that can be -# found in the LICENSE file. - -# Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file -# for details. All rights reserved. Use of this source code is governed by a -# BSD-style license that can be found in the LICENSE file. - -# This file is a modified copy of Chromium's src/third_party/sqlite/sqlite.gyp. -# Revision 291806 (this should agree with "nss_rev" in DEPS). -{ - # Added by Dart. All Dart comments refer to the following block or line. - 'includes': [ - '../../tools/gyp/runtime-configurations.gypi', - '../../tools/gyp/nss_configurations.gypi', - ], - 'variables': { - # Added by Dart. - 'sqlite_directory': '../../../third_party/sqlite', - 'use_system_sqlite%': 0, - 'required_sqlite_version': '3.6.1', - }, - 'target_defaults': { - 'defines': [ - 'SQLITE_CORE', - 'SQLITE_ENABLE_BROKEN_FTS2', - 'SQLITE_ENABLE_FTS2', - 'SQLITE_ENABLE_FTS3', - # Disabled by Dart: An external module with advanced unicode functions. - # 'SQLITE_ENABLE_ICU', - 'SQLITE_ENABLE_MEMORY_MANAGEMENT', - 'SQLITE_SECURE_DELETE', - 'SQLITE_SEPARATE_CACHE_POOLS', - 'THREADSAFE', - '_HAS_EXCEPTIONS=0', - ], - }, - # Added by Dart. We do not indent, so diffs with the original are clearer. - 'conditions': [[ 'dart_io_support==1', { - 'targets': [ - { - 'target_name': 'sqlite_dart', # Added by Dart (the _dart postfix) - 'toolsets':['host','target'], - 'conditions': [ - [ 'chromeos==1' , { - 'defines': [ - # Despite obvious warnings about not using this flag - # in deployment, we are turning off sync in ChromeOS - # and relying on the underlying journaling filesystem - # to do error recovery properly. It's much faster. - 'SQLITE_NO_SYNC', - ], - }, - ], - ['use_system_sqlite', { - 'type': 'none', - 'direct_dependent_settings': { - 'defines': [ - 'USE_SYSTEM_SQLITE', - ], - }, - - 'conditions': [ - ['OS == "ios"', { - 'dependencies': [ - 'sqlite_regexp', - ], - 'link_settings': { - 'libraries': [ - '$(SDKROOT)/usr/lib/libsqlite3.dylib', - ], - }, - }], - ['os_posix == 1 and OS != "mac" and OS != "ios" and OS != "android"', { - 'direct_dependent_settings': { - 'cflags': [ - # This next command produces no output but it it will fail - # (and cause GYP to fail) if we don't have a recent enough - # version of sqlite. - ' #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include #include "bin/builtin.h" #include "bin/dartutils.h" #include "bin/lockers.h" -#include "bin/net/nss_memio.h" +#include "bin/log.h" #include "bin/socket.h" #include "bin/thread.h" #include "bin/utils.h" @@ -32,27 +28,33 @@ #include "include/dart_api.h" - namespace dart { namespace bin { bool SSLFilter::library_initialized_ = false; // To protect library initialization. Mutex* SSLFilter::mutex_ = new Mutex(); -// The password is needed when creating secure server sockets. It can -// be null if only secure client sockets are used. -const char* SSLFilter::password_ = NULL; +int SSLFilter::filter_ssl_index; static const int kSSLFilterNativeFieldIndex = 0; +static const int kSecurityContextNativeFieldIndex = 0; +static const int kX509NativeFieldIndex = 0; +static const bool SSL_LOG_STATUS = false; +static const bool SSL_LOG_DATA = false; -/* Handle an error reported from the NSS library. */ -static void ThrowPRException(const char* exception_type, +static const int SSL_ERROR_MESSAGE_BUFFER_SIZE = 200; + +/* Handle an error reported from the BoringSSL library. */ +static void ThrowIOException(const char* exception_type, const char* message, bool free_message = false) { - PRErrorCode error_code = PR_GetError(); - const char* error_message = PR_ErrorToString(error_code, PR_LANGUAGE_EN); - OSError os_error_struct(error_code, error_message, OSError::kNSS); + // TODO(24068): Get the error code and message from the error stack. + // There may be more than one error on the stack - should we + // concatenate the error messages? + int error_code = 0; + const char* error_message = "Unknown error from BoringSSL library"; + OSError os_error_struct(error_code, error_message, OSError::kBoringSSL); Dart_Handle os_error = DartUtils::NewDartOSError(&os_error_struct); Dart_Handle exception = DartUtils::NewDartIOException(exception_type, message, os_error); @@ -64,20 +66,6 @@ static void ThrowPRException(const char* exception_type, } -static void ThrowCertificateException(const char* format, - const char* certificate_name) { - int length = strlen(certificate_name); - length += strlen(format); - char* message = reinterpret_cast(malloc(length + 1)); - if (message == NULL) { - FATAL("Out of memory formatting CertificateException for throwing"); - } - snprintf(message, length + 1, format, certificate_name); - message[length] = '\0'; - ThrowPRException("CertificateException", message, true); -} - - static SSLFilter* GetFilter(Dart_NativeArguments args) { SSLFilter* filter; Dart_Handle dart_this = ThrowIfError(Dart_GetNativeArgument(args, 0)); @@ -100,6 +88,48 @@ static void SetFilter(Dart_NativeArguments args, SSLFilter* filter) { } +static SSL_CTX* GetSecurityContext(Dart_NativeArguments args) { + SSL_CTX* context; + Dart_Handle dart_this = ThrowIfError(Dart_GetNativeArgument(args, 0)); + ASSERT(Dart_IsInstance(dart_this)); + ThrowIfError(Dart_GetNativeInstanceField( + dart_this, + kSecurityContextNativeFieldIndex, + reinterpret_cast(&context))); + return context; +} + + +static void SetSecurityContext(Dart_NativeArguments args, + SSL_CTX* context) { + Dart_Handle dart_this = ThrowIfError(Dart_GetNativeArgument(args, 0)); + ASSERT(Dart_IsInstance(dart_this)); + ThrowIfError(Dart_SetNativeInstanceField( + dart_this, + kSecurityContextNativeFieldIndex, + reinterpret_cast(context))); +} + + +static X509* GetX509Certificate(Dart_NativeArguments args) { + X509* certificate; + Dart_Handle dart_this = ThrowIfError(Dart_GetNativeArgument(args, 0)); + ASSERT(Dart_IsInstance(dart_this)); + ThrowIfError(Dart_GetNativeInstanceField( + dart_this, + kX509NativeFieldIndex, + reinterpret_cast(&certificate))); + return certificate; +} + + +// Forward declaration. +static void SetAlpnProtocolList(Dart_Handle protocols_handle, + SSL* ssl, + SSL_CTX* context, + bool is_server); + + void FUNCTION_NAME(SecureSocket_Init)(Dart_NativeArguments args) { Dart_Handle dart_this = ThrowIfError(Dart_GetNativeArgument(args, 0)); SSLFilter* filter = new SSLFilter; @@ -110,50 +140,36 @@ void FUNCTION_NAME(SecureSocket_Init)(Dart_NativeArguments args) { void FUNCTION_NAME(SecureSocket_Connect)(Dart_NativeArguments args) { Dart_Handle host_name_object = ThrowIfError(Dart_GetNativeArgument(args, 1)); - Dart_Handle host_sockaddr_storage_object = - ThrowIfError(Dart_GetNativeArgument(args, 2)); - Dart_Handle port_object = ThrowIfError(Dart_GetNativeArgument(args, 3)); - bool is_server = DartUtils::GetBooleanValue(Dart_GetNativeArgument(args, 4)); - Dart_Handle certificate_name_object = - ThrowIfError(Dart_GetNativeArgument(args, 5)); + Dart_Handle context_object = ThrowIfError(Dart_GetNativeArgument(args, 2)); + bool is_server = DartUtils::GetBooleanValue(Dart_GetNativeArgument(args, 3)); bool request_client_certificate = - DartUtils::GetBooleanValue(Dart_GetNativeArgument(args, 6)); + DartUtils::GetBooleanValue(Dart_GetNativeArgument(args, 4)); bool require_client_certificate = - DartUtils::GetBooleanValue(Dart_GetNativeArgument(args, 7)); + DartUtils::GetBooleanValue(Dart_GetNativeArgument(args, 5)); bool send_client_certificate = - DartUtils::GetBooleanValue(Dart_GetNativeArgument(args, 8)); + DartUtils::GetBooleanValue(Dart_GetNativeArgument(args, 6)); Dart_Handle protocols_handle = - ThrowIfError(Dart_GetNativeArgument(args, 9)); + ThrowIfError(Dart_GetNativeArgument(args, 7)); const char* host_name = NULL; // TODO(whesse): Is truncating a Dart string containing \0 what we want? ThrowIfError(Dart_StringToCString(host_name_object, &host_name)); - RawAddr raw_addr; - SocketAddress::GetSockAddr(host_sockaddr_storage_object, &raw_addr); - - int64_t port; - if (!DartUtils::GetInt64Value(port_object, &port)) { - FATAL("The range of port_object was checked in Dart - it cannot fail here"); + SSL_CTX* context = NULL; + if (!Dart_IsNull(context_object)) { + ThrowIfError(Dart_GetNativeInstanceField( + context_object, + kSecurityContextNativeFieldIndex, + reinterpret_cast(&context))); } - const char* certificate_name = NULL; - if (Dart_IsString(certificate_name_object)) { - ThrowIfError(Dart_StringToCString(certificate_name_object, - &certificate_name)); - } - // If this is a server connection, it must have a certificate to connect with. - ASSERT(!is_server || certificate_name != NULL); - // The protocols_handle is guaranteed to be a valid Uint8List. // It will have the correct length encoding of the protocols array. ASSERT(!Dart_IsNull(protocols_handle)); GetFilter(args)->Connect(host_name, - raw_addr, - static_cast(port), + context, is_server, - certificate_name, request_client_certificate, require_client_certificate, send_client_certificate, @@ -217,50 +233,6 @@ void FUNCTION_NAME(SecureSocket_RegisterBadCertificateCallback)( } -void FUNCTION_NAME(SecureSocket_InitializeLibrary) - (Dart_NativeArguments args) { - Dart_Handle certificate_database_object = - ThrowIfError(Dart_GetNativeArgument(args, 0)); - // Check that the type is string, and get the UTF-8 C string value from it. - const char* certificate_database = NULL; - if (Dart_IsString(certificate_database_object)) { - ThrowIfError(Dart_StringToCString(certificate_database_object, - &certificate_database)); - } else if (!Dart_IsNull(certificate_database_object)) { - Dart_ThrowException(DartUtils::NewDartArgumentError( - "Non-String certificate directory argument to SetCertificateDatabase")); - } - // Leave certificate_database as NULL if no value was provided. - - Dart_Handle password_object = ThrowIfError(Dart_GetNativeArgument(args, 1)); - // Check that the type is string or null, - // and get the UTF-8 C string value from it. - const char* password = NULL; - if (Dart_IsString(password_object)) { - ThrowIfError(Dart_StringToCString(password_object, &password)); - } else if (Dart_IsNull(password_object)) { - // Pass the empty string as the password. - password = ""; - } else { - Dart_ThrowException(DartUtils::NewDartArgumentError( - "Password argument to SetCertificateDatabase is not a String or null")); - } - - Dart_Handle builtin_roots_object = - ThrowIfError(Dart_GetNativeArgument(args, 2)); - // Check that the type is boolean, and get the boolean value from it. - bool builtin_roots = true; - if (Dart_IsBoolean(builtin_roots_object)) { - ThrowIfError(Dart_BooleanValue(builtin_roots_object, &builtin_roots)); - } else { - Dart_ThrowException(DartUtils::NewDartArgumentError( - "UseBuiltinRoots argument to SetCertificateDatabase is not a bool")); - } - - SSLFilter::InitializeLibrary(certificate_database, password, builtin_roots); -} - - void FUNCTION_NAME(SecureSocket_PeerCertificate) (Dart_NativeArguments args) { Dart_SetReturnValue(args, GetFilter(args)->PeerCertificate()); @@ -273,6 +245,291 @@ void FUNCTION_NAME(SecureSocket_FilterPointer)(Dart_NativeArguments args) { } +static Dart_Handle WrappedX509Certificate(X509* certificate) { + if (certificate == NULL) return Dart_Null(); + Dart_Handle x509_type = + DartUtils::GetDartType(DartUtils::kIOLibURL, "X509Certificate"); + if (Dart_IsError(x509_type)) { + return x509_type; + } + Dart_Handle arguments[] = { NULL }; + Dart_Handle result = + Dart_New(x509_type, DartUtils::NewString("_"), 0, arguments); + if (Dart_IsError(result)) { + return result; + } + ASSERT(Dart_IsInstance(result)); + Dart_Handle status = Dart_SetNativeInstanceField( + result, + kX509NativeFieldIndex, + reinterpret_cast(certificate)); + if (Dart_IsError(status)) { + return status; + } + return result; +} + + +int CertificateCallback(int preverify_ok, X509_STORE_CTX* store_ctx) { + if (preverify_ok == 1) return 1; + Dart_Isolate isolate = Dart_CurrentIsolate(); + if (isolate == NULL) { + FATAL("CertificateCallback called with no current isolate\n"); + } + X509* certificate = X509_STORE_CTX_get_current_cert(store_ctx); + int ssl_index = SSL_get_ex_data_X509_STORE_CTX_idx(); + SSL* ssl = static_cast( + X509_STORE_CTX_get_ex_data(store_ctx, ssl_index)); + SSLFilter* filter = static_cast( + SSL_get_ex_data(ssl, SSLFilter::filter_ssl_index)); + Dart_Handle callback = filter->bad_certificate_callback(); + if (Dart_IsNull(callback)) return 0; + Dart_Handle args[1]; + args[0] = WrappedX509Certificate(certificate); + if (Dart_IsError(args[0])) { + filter->callback_error = args[0]; + return 0; + } + Dart_Handle result = Dart_InvokeClosure(callback, 1, args); + if (!Dart_IsError(result) && !Dart_IsBoolean(result)) { + result = Dart_NewUnhandledExceptionError(DartUtils::NewDartIOException( + "HandshakeException", + "BadCertificateCallback returned a value that was not a boolean", + Dart_Null())); + } + if (Dart_IsError(result)) { + filter->callback_error = result; + return 0; + } + return DartUtils::GetBooleanValue(result); +} + + +void FUNCTION_NAME(SecurityContext_Allocate)(Dart_NativeArguments args) { + SSLFilter::InitializeLibrary(); + SSL_CTX* context = SSL_CTX_new(TLS_method()); + SSL_CTX_set_verify(context, SSL_VERIFY_PEER, CertificateCallback); + SSL_CTX_set_min_version(context, TLS1_VERSION); + SSL_CTX_set_cipher_list(context, "HIGH:MEDIUM"); + SSL_CTX_set_cipher_list_tls11(context, "HIGH:MEDIUM"); + SetSecurityContext(args, context); + // TODO(whesse): Use WeakPersistentHandle to free the SSL_CTX + // when the object is GC'd. Also free the alpn_select_cb data pointer, + // if non-null (allocated in SetAlpnProtocolList). +} + + +int PasswordCallback(char* buf, int size, int rwflag, void* userdata) { + char* password = static_cast(userdata); + if (static_cast(size) < strlen(password) + 1) { + Log::PrintErr("Password buffer too small.\n"); + exit(1); + // TODO(24182): Find the actual value of size passed in here, and + // check for password length longer than this in the Dart function + // that passes in the password, so we never have this problem. + } + strncpy(buf, static_cast(userdata), size); + return strlen(static_cast(userdata)); +} + + +void CheckStatus(int status, const char* message, int line) { + // TODO(24183): Take appropriate action on failed calls, + // throw exception that includes all messages from the error stack. + if (status != 1 && SSL_LOG_STATUS) { + int error = ERR_get_error(); + Log::PrintErr("Failed: %s line %d\n", message, line); + char error_string[SSL_ERROR_MESSAGE_BUFFER_SIZE]; + ERR_error_string_n(error, error_string, SSL_ERROR_MESSAGE_BUFFER_SIZE); + Log::PrintErr("ERROR: %d %s\n", error, error_string); + } +} + + +void FUNCTION_NAME(SecurityContext_UsePrivateKey)(Dart_NativeArguments args) { + SSL_CTX* context = GetSecurityContext(args); + Dart_Handle filename_object = ThrowIfError(Dart_GetNativeArgument(args, 1)); + const char* filename = NULL; + if (Dart_IsString(filename_object)) { + ThrowIfError(Dart_StringToCString(filename_object, &filename)); + } else { + Dart_ThrowException(DartUtils::NewDartArgumentError( + "File argument to SecurityContext.usePrivateKey is not a String")); + } + Dart_Handle password_object = ThrowIfError(Dart_GetNativeArgument(args, 2)); + const char* password = NULL; + if (Dart_IsString(password_object)) { + ThrowIfError(Dart_StringToCString(password_object, &password)); + } else if (Dart_IsNull(password_object)) { + password = ""; + } else { + Dart_ThrowException(DartUtils::NewDartArgumentError( + "Password argument to SecurityContext.usePrivateKey is not " + "a String or null")); + } + + SSL_CTX_set_default_passwd_cb(context, PasswordCallback); + SSL_CTX_set_default_passwd_cb_userdata(context, const_cast(password)); + int status = SSL_CTX_use_PrivateKey_file(context, + filename, + SSL_FILETYPE_PEM); + // TODO(24184): Handle different expected errors here - file missing, + // incorrect password, file not a PEM, and throw exceptions. + // CheckStatus should also throw an exception in uncaught cases. + CheckStatus(status, "SSL_CTX_use_PrivateKey_file", __LINE__); + SSL_CTX_set_default_passwd_cb_userdata(context, NULL); +} + + +void FUNCTION_NAME(SecurityContext_SetTrustedCertificates)( + Dart_NativeArguments args) { + SSL_CTX* context = GetSecurityContext(args); + Dart_Handle filename_object = ThrowIfError(Dart_GetNativeArgument(args, 1)); + const char* filename = NULL; + if (Dart_IsString(filename_object)) { + ThrowIfError(Dart_StringToCString(filename_object, &filename)); + } + Dart_Handle directory_object = ThrowIfError(Dart_GetNativeArgument(args, 2)); + const char* directory = NULL; + if (Dart_IsString(directory_object)) { + ThrowIfError(Dart_StringToCString(directory_object, &directory)); + } else if (Dart_IsNull(directory_object)) { + directory = NULL; + } else { + Dart_ThrowException(DartUtils::NewDartArgumentError( + "Directory argument to SecurityContext.usePrivateKey is not " + "a String or null")); + } + + int status = SSL_CTX_load_verify_locations(context, filename, directory); + CheckStatus(status, "SSL_CTX_load_verify_locations", __LINE__); +} + + +void FUNCTION_NAME(SecurityContext_TrustBuiltinRoots)( + Dart_NativeArguments args) { + SSL_CTX* context = GetSecurityContext(args); + X509_STORE* store = SSL_CTX_get_cert_store(context); + BIO* roots_bio = + BIO_new_mem_buf(const_cast(root_certificates_pem), + root_certificates_pem_length); + X509* root_cert; + // PEM_read_bio_X509 reads PEM-encoded certificates from a bio (in our case, + // backed by a memory buffer), and returns X509 objects, one by one. + // When the end of the bio is reached, it returns null. + while ((root_cert = PEM_read_bio_X509(roots_bio, NULL, NULL, NULL))) { + X509_STORE_add_cert(store, root_cert); + } + BIO_free(roots_bio); +} + + +void FUNCTION_NAME(SecurityContext_UseCertificateChain)( + Dart_NativeArguments args) { + SSL_CTX* context = GetSecurityContext(args); + Dart_Handle filename_object = ThrowIfError(Dart_GetNativeArgument(args, 1)); + const char* filename = NULL; + if (Dart_IsString(filename_object)) { + ThrowIfError(Dart_StringToCString(filename_object, &filename)); + } else { + Dart_ThrowException(DartUtils::NewDartArgumentError( + "file argument in SecurityContext.useCertificateChain" + " is not a String")); + } + int status = SSL_CTX_use_certificate_chain_file(context, filename); + CheckStatus(status, "SSL_CTX_use_certificate_chain_file", __LINE__); +} + + +void FUNCTION_NAME(SecurityContext_SetClientAuthorities)( + Dart_NativeArguments args) { + SSL_CTX* context = GetSecurityContext(args); + Dart_Handle filename_object = ThrowIfError(Dart_GetNativeArgument(args, 1)); + const char* filename = NULL; + if (Dart_IsString(filename_object)) { + ThrowIfError(Dart_StringToCString(filename_object, &filename)); + } else { + Dart_ThrowException(DartUtils::NewDartArgumentError( + "file argument in SecurityContext.setClientAuthorities" + " is not a String")); + } + STACK_OF(X509_NAME)* certificate_names; + certificate_names = SSL_load_client_CA_file(filename); + if (certificate_names != NULL) { + SSL_CTX_set_client_CA_list(context, certificate_names); + } else { + Dart_ThrowException(DartUtils::NewDartArgumentError( + "Could not load certificate names from file in SetClientAuthorities")); + } +} + + +void FUNCTION_NAME(SecurityContext_SetAlpnProtocols)( + Dart_NativeArguments args) { + SSL_CTX* context = GetSecurityContext(args); + Dart_Handle protocols_handle = + ThrowIfError(Dart_GetNativeArgument(args, 1)); + Dart_Handle is_server_handle = + ThrowIfError(Dart_GetNativeArgument(args, 2)); + if (Dart_IsBoolean(is_server_handle)) { + bool is_server = DartUtils::GetBooleanValue(is_server_handle); + SetAlpnProtocolList(protocols_handle, NULL, context, is_server); + } else { + Dart_ThrowException(DartUtils::NewDartArgumentError( + "Non-boolean is_server argument passed to SetAlpnProtocols")); + } +} + + +void FUNCTION_NAME(X509_Subject)( + Dart_NativeArguments args) { + X509* certificate = GetX509Certificate(args); + X509_NAME* subject = X509_get_subject_name(certificate); + char* subject_string = X509_NAME_oneline(subject, NULL, 0); + Dart_SetReturnValue(args, Dart_NewStringFromCString(subject_string)); + OPENSSL_free(subject_string); +} + + +void FUNCTION_NAME(X509_Issuer)( + Dart_NativeArguments args) { + X509* certificate = GetX509Certificate(args); + X509_NAME* issuer = X509_get_issuer_name(certificate); + char* issuer_string = X509_NAME_oneline(issuer, NULL, 0); + Dart_SetReturnValue(args, Dart_NewStringFromCString(issuer_string)); + OPENSSL_free(issuer_string); +} + +static Dart_Handle ASN1TimeToMilliseconds(ASN1_TIME* aTime) { + ASN1_UTCTIME* epoch_start = M_ASN1_UTCTIME_new(); + ASN1_UTCTIME_set_string(epoch_start, "700101000000Z"); + int days; + int seconds; + int result = ASN1_TIME_diff(&days, &seconds, epoch_start, aTime); + M_ASN1_UTCTIME_free(epoch_start); + if (result != 1) { + // TODO(whesse): Propagate an error to Dart. + Log::PrintErr("ASN1Time error %d\n", result); + } + return Dart_NewInteger((86400LL * days + seconds) * 1000LL); +} + +void FUNCTION_NAME(X509_StartValidity)( + Dart_NativeArguments args) { + X509* certificate = GetX509Certificate(args); + ASN1_TIME* not_before = X509_get_notBefore(certificate); + Dart_SetReturnValue(args, ASN1TimeToMilliseconds(not_before)); +} + + +void FUNCTION_NAME(X509_EndValidity)( + Dart_NativeArguments args) { + X509* certificate = GetX509Certificate(args); + ASN1_TIME* not_after = X509_get_notAfter(certificate); + Dart_SetReturnValue(args, ASN1TimeToMilliseconds(not_after)); +} + + /** * Pushes data through the SSL filter, reading and writing from circular * buffers shared with Dart. @@ -314,8 +571,9 @@ CObject* SSLFilter::ProcessFilterRequest(const CObjectArray& request) { } return result; } else { - PRErrorCode error_code = PR_GetError(); - const char* error_message = PR_ErrorToString(error_code, PR_LANGUAGE_EN); + // TODO(24185): Extract the BoringSSL OS error here and return it. + int error_code = 1; + const char* error_message = "Obsolete PR Error message"; CObjectArray* result = new CObjectArray(CObject::NewArray(2)); result->SetAt(0, new CObjectInt32(CObject::NewInt32(error_code))); result->SetAt(1, new CObjectString(CObject::NewString(error_message))); @@ -365,33 +623,24 @@ bool SSLFilter::ProcessAllBuffers(int starts[kNumBuffers], ends[i] = end; break; case kReadEncrypted: - // Read data from circular buffer. + case kWritePlaintext: + // Read/Write data from circular buffer. If the buffer is empty, + // neither if statement's condition is true. if (end < start) { // Data may be split into two segments. In this case, // the first is [start, size). - int bytes = ProcessReadEncryptedBuffer(start, size); + int bytes = (i == kReadEncrypted) ? + ProcessReadEncryptedBuffer(start, size) : + ProcessWritePlaintextBuffer(start, size); if (bytes < 0) return false; start += bytes; ASSERT(start <= size); if (start == size) start = 0; } if (start < end) { - int bytes = ProcessReadEncryptedBuffer(start, end); - if (bytes < 0) return false; - start += bytes; - ASSERT(start <= end); - } - starts[i] = start; - break; - case kWritePlaintext: - if (end < start) { - // Data is split into two segments, [start, size) and [0, end). - int bytes = ProcessWritePlaintextBuffer(start, size, 0, end); - if (bytes < 0) return false; - start += bytes; - if (start >= size) start -= size; - } else { - int bytes = ProcessWritePlaintextBuffer(start, end, 0, 0); + int bytes = (i == kReadEncrypted) ? + ProcessReadEncryptedBuffer(start, end) : + ProcessWritePlaintextBuffer(start, end); if (bytes < 0) return false; start += bytes; ASSERT(start <= end); @@ -406,47 +655,9 @@ bool SSLFilter::ProcessAllBuffers(int starts[kNumBuffers], } -static Dart_Handle X509FromCertificate(CERTCertificate* certificate) { - PRTime start_validity; - PRTime end_validity; - SECStatus status = - CERT_GetCertTimes(certificate, &start_validity, &end_validity); - if (status != SECSuccess) { - ThrowPRException("CertificateException", - "Cannot get validity times from certificate"); - } - int64_t start_epoch_ms = start_validity / PR_USEC_PER_MSEC; - int64_t end_epoch_ms = end_validity / PR_USEC_PER_MSEC; - Dart_Handle subject_name_object = - DartUtils::NewString(certificate->subjectName); - Dart_Handle issuer_name_object = - DartUtils::NewString(certificate->issuerName); - Dart_Handle start_epoch_ms_int = Dart_NewInteger(start_epoch_ms); - Dart_Handle end_epoch_ms_int = Dart_NewInteger(end_epoch_ms); - - Dart_Handle date_type = - DartUtils::GetDartType(DartUtils::kCoreLibURL, "DateTime"); - Dart_Handle from_milliseconds = - DartUtils::NewString("fromMillisecondsSinceEpoch"); - - Dart_Handle start_validity_date = - Dart_New(date_type, from_milliseconds, 1, &start_epoch_ms_int); - Dart_Handle end_validity_date = - Dart_New(date_type, from_milliseconds, 1, &end_epoch_ms_int); - - Dart_Handle x509_type = - DartUtils::GetDartType(DartUtils::kIOLibURL, "X509Certificate"); - Dart_Handle arguments[] = { subject_name_object, - issuer_name_object, - start_validity_date, - end_validity_date }; - return Dart_New(x509_type, Dart_Null(), 4, arguments); -} - - void SSLFilter::Init(Dart_Handle dart_this) { if (!library_initialized_) { - InitializeLibrary(NULL, "", true, false); + InitializeLibrary(); } ASSERT(string_start_ == NULL); string_start_ = Dart_NewPersistentHandle(DartUtils::NewString("start")); @@ -459,7 +670,6 @@ void SSLFilter::Init(Dart_Handle dart_this) { ASSERT(bad_certificate_callback_ != NULL); InitializeBuffers(dart_this); - filter_ = memio_CreateIOLayer(kMemioBufferSize, kMemioBufferSize); } @@ -507,6 +717,7 @@ void SSLFilter::InitializeBuffers(Dart_Handle dart_this) { void SSLFilter::RegisterHandshakeCompleteCallback(Dart_Handle complete) { ASSERT(NULL == handshake_complete_); handshake_complete_ = Dart_NewPersistentHandle(complete); + ASSERT(handshake_complete_ != NULL); } @@ -519,157 +730,123 @@ void SSLFilter::RegisterBadCertificateCallback(Dart_Handle callback) { } -char* PasswordCallback(PK11SlotInfo* slot, PRBool retry, void* arg) { - if (!retry) { - return PL_strdup(static_cast(arg)); // Freed by NSS internals. - } - return NULL; -} - - -static const char* builtin_roots_module = -#if defined(TARGET_OS_LINUX) || defined(TARGET_OS_ANDROID) - "name=\"Root Certs\" library=\"libnssckbi.so\""; -#elif defined(TARGET_OS_MACOS) - "name=\"Root Certs\" library=\"libnssckbi.dylib\""; -#elif defined(TARGET_OS_WINDOWS) - "name=\"Root Certs\" library=\"nssckbi.dll\""; -#else -#error Automatic target os detection failed. -#endif - - - -void SSLFilter::InitializeLibrary(const char* certificate_database, - const char* password, - bool use_builtin_root_certificates, - bool report_duplicate_initialization) { +void SSLFilter::InitializeLibrary() { MutexLocker locker(mutex_); - SECStatus status; if (!library_initialized_) { - PR_Init(PR_USER_THREAD, PR_PRIORITY_NORMAL, 0); - // TODO(whesse): Verify there are no UTF-8 issues here. - if (certificate_database == NULL || certificate_database[0] == '\0') { - status = NSS_NoDB_Init(NULL); - if (status != SECSuccess) { - mutex_->Unlock(); // MutexLocker destructor not called when throwing. - ThrowPRException("TlsException", - "Failed NSS_NoDB_Init call."); - } - if (use_builtin_root_certificates) { - SECMODModule* module = SECMOD_LoadUserModule( - const_cast(builtin_roots_module), NULL, PR_FALSE); - if (!module) { - mutex_->Unlock(); // MutexLocker destructor not called when throwing. - ThrowPRException("TlsException", - "Failed to load builtin root certificates."); - } - } - } else { - PRUint32 init_flags = NSS_INIT_READONLY; - if (!use_builtin_root_certificates) { - init_flags |= NSS_INIT_NOMODDB; - } - status = NSS_Initialize(certificate_database, - "", - "", - SECMOD_DB, - init_flags); - if (status != SECSuccess) { - mutex_->Unlock(); // MutexLocker destructor not called when throwing. - ThrowPRException("TlsException", - "Failed NSS_Init call."); - } - password_ = strdup(password); // This one copy persists until Dart exits. - PK11_SetPasswordFunc(PasswordCallback); - } + SSL_library_init(); + filter_ssl_index = SSL_get_ex_new_index(0, NULL, NULL, NULL, NULL); + ASSERT(filter_ssl_index >= 0); library_initialized_ = true; - - status = NSS_SetDomesticPolicy(); - if (status != SECSuccess) { - mutex_->Unlock(); // MutexLocker destructor not called when throwing. - ThrowPRException("TlsException", - "Failed NSS_SetDomesticPolicy call."); - } - - // Enable the same additional ciphers that Chromium does. - // See NSSSSLInitSingleton() in Chromium's net/socket/nss_ssl_util.cc. - // Explicitly enable exactly those ciphers with keys of at least 80 bits. - const PRUint16* const ssl_ciphers = SSL_GetImplementedCiphers(); - const PRUint16 num_ciphers = SSL_GetNumImplementedCiphers(); - for (int i = 0; i < num_ciphers; i++) { - SSLCipherSuiteInfo info; - if (SSL_GetCipherSuiteInfo(ssl_ciphers[i], &info, sizeof(info)) == - SECSuccess) { - bool enabled = (info.effectiveKeyBits >= 80); - // Trim the list of cipher suites in order to keep the size of the - // ClientHello down. DSS, ECDH, CAMELLIA, SEED, ECC+3DES, and - // HMAC-SHA256 cipher suites are disabled. - if (info.symCipher == ssl_calg_camellia || - info.symCipher == ssl_calg_seed || - (info.symCipher == ssl_calg_3des && info.keaType != ssl_kea_rsa) || - info.authAlgorithm == ssl_auth_dsa || - info.macAlgorithm == ssl_hmac_sha256 || - info.nonStandard || - strcmp(info.keaTypeName, "ECDH") == 0) { - enabled = false; - } - - if (ssl_ciphers[i] == TLS_DHE_DSS_WITH_AES_128_CBC_SHA) { - // Enabled to allow servers with only a DSA certificate to function. - enabled = true; - } - SSL_CipherPrefSetDefault(ssl_ciphers[i], enabled); - } - } - - status = SSL_ConfigServerSessionIDCache(0, 0, 0, NULL); - if (status != SECSuccess) { - mutex_->Unlock(); // MutexLocker destructor not called when throwing. - ThrowPRException("TlsException", - "Failed SSL_ConfigServerSessionIDCache call."); - } - - } else if (report_duplicate_initialization) { - mutex_->Unlock(); // MutexLocker destructor not called when throwing. - // Like ThrowPRException, without adding an OSError. - Dart_ThrowException(DartUtils::NewDartIOException("TlsException", - "Called SecureSocket.initialize more than once", - Dart_Null())); } } -SECStatus BadCertificateCallback(void* filter, PRFileDesc* fd) { - SSLFilter* ssl_filter = static_cast(filter); - Dart_Handle callback = ssl_filter->bad_certificate_callback(); - if (Dart_IsNull(callback)) return SECFailure; - Dart_Handle x509_object = ssl_filter->PeerCertificate(); - Dart_Handle result = Dart_InvokeClosure(callback, 1, &x509_object); - if (Dart_IsError(result)) { - ssl_filter->callback_error = result; - return SECFailure; - } - // Our wrapper is guaranteed to return a boolean. - bool c_result = DartUtils::GetBooleanValue(result); - return c_result ? SECSuccess : SECFailure; -} - - Dart_Handle SSLFilter::PeerCertificate() { - CERTCertificate* certificate = SSL_PeerCertificate(filter_); - if (certificate == NULL) return Dart_Null(); - Dart_Handle x509_object = X509FromCertificate(certificate); - CERT_DestroyCertificate(certificate); + X509* certificate = SSL_get_peer_certificate(ssl_); + Dart_Handle x509_object = WrappedX509Certificate(certificate); + if (Dart_IsError(x509_object)) { + Dart_PropagateError(x509_object); + } return x509_object; } -void SSLFilter::Connect(const char* host_name, - const RawAddr& raw_addr, - int port, +int AlpnCallback(SSL *ssl, + const uint8_t **out, + uint8_t *outlen, + const uint8_t *in, + unsigned int inlen, + void *arg) { + // 'in' and 'arg' are sequences of (length, data) strings with 1-byte lengths. + // 'arg' is 0-terminated. Finds the first string in 'arg' that is in 'in'. + uint8_t* server_list = static_cast(arg); + while (*server_list != 0) { + uint8_t protocol_length = *server_list++; + const uint8_t* client_list = in; + while (client_list < in + inlen) { + uint8_t client_protocol_length = *client_list++; + if (client_protocol_length == protocol_length) { + if (0 == memcmp(server_list, client_list, protocol_length)) { + *out = client_list; + *outlen = client_protocol_length; + return SSL_TLSEXT_ERR_OK; // Success + } + } + client_list += client_protocol_length; + } + server_list += protocol_length; + } + // TODO(23580): Make failure send a fatal alert instead of ignoring ALPN. + return SSL_TLSEXT_ERR_NOACK; +} + + +// Sets the protocol list for ALPN on a SSL object or a context. +static void SetAlpnProtocolList(Dart_Handle protocols_handle, + SSL* ssl, + SSL_CTX* context, + bool is_server) { + // Enable ALPN (application layer protocol negotiation) if the caller provides + // a valid list of supported protocols. + Dart_TypedData_Type protocols_type; + uint8_t* protocol_string = NULL; + uint8_t* protocol_string_copy = NULL; + intptr_t protocol_string_len = 0; + int status; + + Dart_Handle result = Dart_TypedDataAcquireData( + protocols_handle, + &protocols_type, + reinterpret_cast(&protocol_string), + &protocol_string_len); + if (Dart_IsError(result)) { + Dart_PropagateError(result); + } + + if (protocols_type != Dart_TypedData_kUint8) { + Dart_TypedDataReleaseData(protocols_handle); + Dart_PropagateError(Dart_NewApiError( + "Unexpected type for protocols (expected valid Uint8List).")); + } + + if (protocol_string_len > 0) { + if (is_server) { + // ALPN on server connections must be set on an SSL_CTX object, + // not on the SSL object of the individual connection. + ASSERT(context != NULL); + ASSERT(ssl == NULL); + // Because it must be passed as a single void*, terminate + // the list of (length, data) strings with a length 0 string. + protocol_string_copy = + static_cast(malloc(protocol_string_len + 1)); + memmove(protocol_string_copy, protocol_string, protocol_string_len); + protocol_string_copy[protocol_string_len] = '\0'; + SSL_CTX_set_alpn_select_cb(context, AlpnCallback, protocol_string_copy); + // TODO(whesse): If this function is called again, free the previous + // protocol_string_copy. It may be better to keep this as a native + // field on the Dart object, since fetching it from the structure is + // not in the public api. Also free this when the context is destroyed. + } else { + // The function makes a local copy of protocol_string, which it owns. + if (ssl != NULL) { + ASSERT(context == NULL); + status = SSL_set_alpn_protos(ssl, protocol_string, protocol_string_len); + } else { + ASSERT(context != NULL); + ASSERT(ssl == NULL); + status = SSL_CTX_set_alpn_protos( + context, protocol_string, protocol_string_len); + } + ASSERT(status == 0); // The function returns a non-standard status. + } + } + Dart_TypedDataReleaseData(protocols_handle); +} + + +void SSLFilter::Connect(const char* hostname, + SSL_CTX* context, bool is_server, - const char* certificate_name, bool request_client_certificate, bool require_client_certificate, bool send_client_certificate, @@ -679,197 +856,131 @@ void SSLFilter::Connect(const char* host_name, FATAL("Connect called twice on the same _SecureFilter."); } - if (!is_server && certificate_name != NULL) { - client_certificate_name_ = strdup(certificate_name); + int status; + int error; + BIO* ssl_side; + status = BIO_new_bio_pair(&ssl_side, 10000, &socket_side_, 10000); + CheckStatus(status, "BIO_new_bio_pair", __LINE__); + + if (context == NULL) { + DART_CHECK_VALID(Dart_ThrowException(DartUtils::NewDartArgumentError( + "Default SecurityContext not implemented, context cannot be null."))); } - filter_ = SSL_ImportFD(NULL, filter_); - if (filter_ == NULL) { - ThrowPRException("TlsException", "Failed SSL_ImportFD call"); + ssl_ = SSL_new(context); + SSL_set_bio(ssl_, ssl_side, ssl_side); + SSL_set_mode(ssl_, SSL_MODE_AUTO_RETRY); // TODO(whesse): Is this right? + SSL_set_ex_data(ssl_, filter_ssl_index, this); + + if (!is_server_) { + SetAlpnProtocolList(protocols_handle, ssl_, NULL, false); + // Sets the hostname in the certificate-checking object, so it is checked + // against the certificate presented by the server. + X509_VERIFY_PARAM* certificate_checking_parameters_ = SSL_get0_param(ssl_); + hostname_ = strdup(hostname); + X509_VERIFY_PARAM_set_hostflags(certificate_checking_parameters_, 0); + X509_VERIFY_PARAM_set1_host(certificate_checking_parameters_, + hostname_, 0); + // TODO(24186) free hostname_ if it is not freed when SSL is destroyed. + // otherwise, make it a local variable, not a instance field. } - - - SECStatus status; - - // Enable ALPN (application layer protocol negogiation) if the caller provides - // a valid list of supported protocols. - { - Dart_TypedData_Type protocols_type; - uint8_t* protocol_string = NULL; - intptr_t protocol_string_len = 0; - - Dart_Handle result = Dart_TypedDataAcquireData( - protocols_handle, - &protocols_type, - reinterpret_cast(&protocol_string), - &protocol_string_len); - if (Dart_IsError(result)) { - Dart_PropagateError(result); + if (is_server_) { + status = SSL_accept(ssl_); + if (SSL_LOG_STATUS) Log::Print("SSL_accept status: %d\n", status); + if (status != 1) { + // TODO(whesse): expect a needs-data error here. Handle other errors. + error = SSL_get_error(ssl_, status); + if (SSL_LOG_STATUS) Log::Print("SSL_accept error: %d\n", error); } - - if (protocols_type != Dart_TypedData_kUint8) { - Dart_TypedDataReleaseData(protocols_handle); - Dart_PropagateError(Dart_NewApiError( - "Unexpected type for protocols (expected valid Uint8List).")); + } else { + status = SSL_connect(ssl_); + if (SSL_LOG_STATUS) Log::Print("SSL_connect status: %d\n", status); + if (status != 1) { + // TODO(whesse): expect a needs-data error here. Handle other errors. + error = SSL_get_error(ssl_, status); + if (SSL_LOG_STATUS) Log::Print("SSL_connect error: %d\n", error); } - - if (protocol_string_len > 0) { - status = SSL_OptionSet(filter_, SSL_ENABLE_ALPN, PR_TRUE); - ASSERT(status == SECSuccess); - - status = SSL_SetNextProtoNego(filter_, - protocol_string, - protocol_string_len); - ASSERT(status == SECSuccess); - } - - Dart_TypedDataReleaseData(protocols_handle); } - - SSLVersionRange vrange; - vrange.min = SSL_LIBRARY_VERSION_3_0; - vrange.max = SSL_LIBRARY_VERSION_TLS_1_2; - SSL_VersionRangeSet(filter_, &vrange); - - if (is_server) { - CERTCertificate* certificate = NULL; - if (strstr(certificate_name, "CN=") != NULL) { - // Look up certificate using the distinguished name (DN) certificate_name. - CERTCertDBHandle* certificate_database = CERT_GetDefaultCertDB(); - if (certificate_database == NULL) { - ThrowPRException("CertificateException", - "Certificate database cannot be loaded"); - } - certificate = CERT_FindCertByNameString(certificate_database, - const_cast(certificate_name)); - if (certificate == NULL) { - ThrowCertificateException( - "Cannot find server certificate by distinguished name: %s", - certificate_name); - } - } else { - // Look up certificate using the nickname certificate_name. - certificate = PK11_FindCertFromNickname( - const_cast(certificate_name), - static_cast(const_cast(password_))); - if (certificate == NULL) { - ThrowCertificateException( - "Cannot find server certificate by nickname: %s", - certificate_name); - } - } - SECKEYPrivateKey* key = PK11_FindKeyByAnyCert( - certificate, - static_cast(const_cast(password_))); - if (key == NULL) { - CERT_DestroyCertificate(certificate); - if (PR_GetError() == -8177) { - ThrowPRException("CertificateException", - "Certificate database password incorrect"); - } else { - ThrowCertificateException( - "Cannot find private key for certificate %s", - certificate_name); - } - } - - // kt_rsa (key type RSA) is an enum constant from the NSS libraries. - // TODO(whesse): Allow different key types. - status = SSL_ConfigSecureServer(filter_, certificate, key, kt_rsa); - CERT_DestroyCertificate(certificate); - SECKEY_DestroyPrivateKey(key); - if (status != SECSuccess) { - ThrowCertificateException( - "Failed SSL_ConfigSecureServer call with certificate %s", - certificate_name); - } - + if (is_server_) { if (request_client_certificate) { - status = SSL_OptionSet(filter_, SSL_REQUEST_CERTIFICATE, PR_TRUE); - if (status != SECSuccess) { - ThrowPRException("TlsException", - "Failed SSL_OptionSet(REQUEST_CERTIFICATE) call"); - } - status = SSL_OptionSet(filter_, - SSL_REQUIRE_CERTIFICATE, - require_client_certificate); - if (status != SECSuccess) { - ThrowPRException("TlsException", - "Failed SSL_OptionSet(REQUIRE_CERTIFICATE) call"); - } + // TODO(24069): Handle client certificates on server side. + Dart_ThrowException(DartUtils::NewDartArgumentError( + "requestClientCertificate not implemented.")); } } else { // Client. - if (SSL_SetURL(filter_, host_name) == -1) { - ThrowPRException("TlsException", "Failed SetURL call"); - } if (send_client_certificate) { - SSL_SetPKCS11PinArg(filter_, const_cast(password_)); - status = SSL_GetClientAuthDataHook( - filter_, - NSS_GetClientAuthData, - static_cast(client_certificate_name_)); - if (status != SECSuccess) { - ThrowPRException("TlsException", - "Failed SSL_GetClientAuthDataHook call"); - } + // TODO(24070): Handle client certificates on client side. + Dart_ThrowException(DartUtils::NewDartArgumentError( + "sendClientCertificate not implemented.")); } } - - // Install bad certificate callback, and pass 'this' to it if it is called. - status = SSL_BadCertHook(filter_, - BadCertificateCallback, - static_cast(this)); - - status = SSL_ResetHandshake(filter_, is_server); - if (status != SECSuccess) { - ThrowPRException("TlsException", - "Failed SSL_ResetHandshake call"); - } - - // Set the peer address from the address passed. The DNS has already - // been done in Dart code, so just use that address. This relies on - // following about PRNetAddr: "The raw member of the union is - // equivalent to struct sockaddr", which is stated in the NSS - // documentation. - PRNetAddr peername; - memset(&peername, 0, sizeof(peername)); - intptr_t len = SocketAddress::GetAddrLength(raw_addr); - ASSERT(static_cast(len) <= sizeof(peername)); - memmove(&peername, &raw_addr.addr, len); - - // Adjust the address family field for BSD, whose sockaddr - // structure has a one-byte length and one-byte address family - // field at the beginning. PRNetAddr has a two-byte address - // family field at the beginning. - peername.raw.family = raw_addr.addr.sa_family; - - memio_SetPeerName(filter_, &peername); + Handshake(); } +int printErrorCallback(const char *str, size_t len, void *ctx) { + Log::PrintErr("%.*s\n", static_cast(len), str); + return 1; +} + void SSLFilter::Handshake() { - SECStatus status = SSL_ForceHandshake(filter_); - if (status == SECSuccess) { + // Try and push handshake along. + int status; + int error; + status = SSL_do_handshake(ssl_); + if (callback_error != NULL) { + // The SSL_do_handshake will try performing a handshake and might call + // a CertificateCallback. If the certificate validation + // failed the 'callback_error" will be set by the certificateCallback + // logic and we propagate the error" + Dart_PropagateError(callback_error); + } + if (SSL_LOG_STATUS) Log::Print("SSL_handshake status: %d\n", status); + if (status != 1) { + error = SSL_get_error(ssl_, status); + if (SSL_LOG_STATUS) Log::Print("ERROR: %d\n", error); + ERR_print_errors_cb(printErrorCallback, NULL); + } + if (status == 1) { if (in_handshake_) { + // TODO(24071): Check return value of SSL_get_verify_result, this + // should give us the hostname check. + int result = SSL_get_verify_result(ssl_); + if (SSL_LOG_STATUS) { + Log::Print("Handshake verification status: %d\n", result); + X509* peer_certificate = SSL_get_peer_certificate(ssl_); + if (peer_certificate == NULL) { + Log::Print("No peer certificate received\n"); + } else { + X509_NAME* s_name = X509_get_subject_name(peer_certificate); + printf("Peer certificate SN: "); + X509_NAME_print_ex_fp(stdout, s_name, 4, 0); + printf("\n"); + } + } ThrowIfError(Dart_InvokeClosure( Dart_HandleFromPersistent(handshake_complete_), 0, NULL)); in_handshake_ = false; } - } else { - if (callback_error != NULL) { - Dart_PropagateError(callback_error); + } else if (status == 0) { + if (is_server_) { + ThrowIOException("HandshakeException", + "Handshake error in server"); + } else { + ThrowIOException("HandshakeException", + "Handshake error in client"); } - PRErrorCode error = PR_GetError(); - if (error == PR_WOULD_BLOCK_ERROR) { + } else if (status < 0) { + if (SSL_want_write(ssl_) || SSL_want_read(ssl_)) { if (!in_handshake_) { in_handshake_ = true; } } else { if (is_server_) { - ThrowPRException("HandshakeException", + ThrowIOException("HandshakeException", "Handshake error in server"); } else { - ThrowPRException("HandshakeException", + ThrowIOException("HandshakeException", "Handshake error in client"); } } @@ -877,42 +988,13 @@ void SSLFilter::Handshake() { } void SSLFilter::GetSelectedProtocol(Dart_NativeArguments args) { - // Space for the selected protocol. - const unsigned int kBufferSize = 256; - unsigned char buffer[kBufferSize + 1]; - - unsigned int outLength = 0; - SSLNextProtoState outState; - - SECStatus status = SSL_GetNextProto( - filter_, &outState, buffer, &outLength, kBufferSize); - if (status == SECSuccess) { - if (outState == SSL_NEXT_PROTO_SELECTED || - outState == SSL_NEXT_PROTO_NEGOTIATED) { - ASSERT(outLength <= kBufferSize); - buffer[outLength] = '\0'; - Dart_Handle protocol_string = DartUtils::NewString( - reinterpret_cast(&buffer[0])); - if (Dart_IsError(protocol_string)) { - ThrowPRException("HandshakeException", - "Protocol selected via ALPN, unable to get protocol " - "string."); - } else { - Dart_SetReturnValue(args, protocol_string); - } - } else if (outState == SSL_NEXT_PROTO_NO_OVERLAP) { - ThrowPRException("HandshakeException", - "Client and Server could not agree upon a protocol"); - } else if (outState == SSL_NEXT_PROTO_NO_SUPPORT) { - // A value of `null` denotes that the client did not support protocol - // negogiation. - Dart_SetReturnValue(args, Dart_Null()); - } else { - UNREACHABLE(); - } + const uint8_t* protocol; + unsigned length; + SSL_get0_alpn_selected(ssl_, &protocol, &length); + if (length == 0) { + Dart_SetReturnValue(args, Dart_Null()); } else { - ThrowPRException("HandshakeException", - "Could not retrieve selected protocol via ALPN"); + Dart_SetReturnValue(args, Dart_NewStringFromUTF8(protocol, length)); } } @@ -920,41 +1002,20 @@ void SSLFilter::GetSelectedProtocol(Dart_NativeArguments args) { void SSLFilter::Renegotiate(bool use_session_cache, bool request_client_certificate, bool require_client_certificate) { - SECStatus status; // The SSL_REQUIRE_CERTIFICATE option only takes effect if the // SSL_REQUEST_CERTIFICATE option is also set, so set it. request_client_certificate = request_client_certificate || require_client_certificate; - - status = SSL_OptionSet(filter_, - SSL_REQUEST_CERTIFICATE, - request_client_certificate); - if (status != SECSuccess) { - ThrowPRException("TlsException", - "Failure in (Raw)SecureSocket.renegotiate request_client_certificate"); - } - status = SSL_OptionSet(filter_, - SSL_REQUIRE_CERTIFICATE, - require_client_certificate); - if (status != SECSuccess) { - ThrowPRException("TlsException", - "Failure in (Raw)SecureSocket.renegotiate require_client_certificate"); - } - bool flush_cache = !use_session_cache; - status = SSL_ReHandshake(filter_, flush_cache); - if (status != SECSuccess) { - if (is_server_) { - ThrowPRException("HandshakeException", - "Failure in (Raw)SecureSocket.renegotiate in server"); - } else { - ThrowPRException("HandshakeException", - "Failure in (Raw)SecureSocket.renegotiate in client"); - } - } + // TODO(24070, 24069): Implement setting the client certificate parameters, + // and triggering rehandshake. } void SSLFilter::Destroy() { + if (ssl_ != NULL) { + SSL_free(ssl_); + ssl_ = NULL; + } for (int i = 0; i < kNumBuffers; ++i) { Dart_DeletePersistentHandle(dart_buffer_objects_[i]); delete[] buffers_[i]; @@ -963,25 +1024,21 @@ void SSLFilter::Destroy() { Dart_DeletePersistentHandle(string_length_); Dart_DeletePersistentHandle(handshake_complete_); Dart_DeletePersistentHandle(bad_certificate_callback_); - free(client_certificate_name_); - - PR_Close(filter_); } -intptr_t SSLFilter::ProcessReadPlaintextBuffer(int start, int end) { +/* Read decrypted data from the filter to the circular buffer */ +int SSLFilter::ProcessReadPlaintextBuffer(int start, int end) { int length = end - start; int bytes_processed = 0; if (length > 0) { - bytes_processed = PR_Read(filter_, - buffers_[kReadPlaintext] + start, - length); + bytes_processed = SSL_read( + ssl_, + reinterpret_cast((buffers_[kReadPlaintext] + start)), + length); if (bytes_processed < 0) { - ASSERT(bytes_processed == -1); - PRErrorCode pr_error = PR_GetError(); - if (PR_WOULD_BLOCK_ERROR != pr_error) { - return -1; - } + int error = SSL_get_error(ssl_, bytes_processed); + USE(error); bytes_processed = 0; } } @@ -989,70 +1046,58 @@ intptr_t SSLFilter::ProcessReadPlaintextBuffer(int start, int end) { } -intptr_t SSLFilter::ProcessWritePlaintextBuffer(int start1, int end1, - int start2, int end2) { - PRIOVec ranges[2]; - uint8_t* buffer = buffers_[kWritePlaintext]; - ranges[0].iov_base = reinterpret_cast(buffer + start1); - ranges[0].iov_len = end1 - start1; - ranges[1].iov_base = reinterpret_cast(buffer + start2); - ranges[1].iov_len = end2 - start2; - int bytes_processed = PR_Writev(filter_, ranges, 2, PR_INTERVAL_NO_TIMEOUT); +int SSLFilter::ProcessWritePlaintextBuffer(int start, int end) { + int length = end - start; + int bytes_processed = SSL_write( + ssl_, buffers_[kWritePlaintext] + start, length); if (bytes_processed < 0) { - ASSERT(bytes_processed == -1); - PRErrorCode pr_error = PR_GetError(); - if (PR_WOULD_BLOCK_ERROR != pr_error) { - return -1; + if (SSL_LOG_DATA) { + Log::Print("SSL_write returned error %d\n", bytes_processed); } - bytes_processed = 0; + return 0; } return bytes_processed; } -intptr_t SSLFilter::ProcessReadEncryptedBuffer(int start, int end) { +/* Read encrypted data from the circular buffer to the filter */ +int SSLFilter::ProcessReadEncryptedBuffer(int start, int end) { int length = end - start; + if (SSL_LOG_DATA) Log::Print( + "Entering ProcessReadEncryptedBuffer with %d bytes\n", length); int bytes_processed = 0; if (length > 0) { - memio_Private* secret = memio_GetSecret(filter_); - uint8_t* filter_buf; - int free_bytes = memio_GetReadParams(secret, &filter_buf); - bytes_processed = dart::Utils::Minimum(length, free_bytes); - memmove(filter_buf, buffers_[kReadEncrypted] + start, bytes_processed); - memio_PutReadResult(secret, bytes_processed); + bytes_processed = + BIO_write(socket_side_, buffers_[kReadEncrypted] + start, length); + if (bytes_processed <= 0) { + bool retry = BIO_should_retry(socket_side_); + if (!retry) { + if (SSL_LOG_DATA) Log::Print( + "BIO_write failed in ReadEncryptedBuffer\n"); + } + bytes_processed = 0; + } } + if (SSL_LOG_DATA) Log::Print( + "Leaving ProcessReadEncryptedBuffer wrote %d bytes\n", bytes_processed); return bytes_processed; } -intptr_t SSLFilter::ProcessWriteEncryptedBuffer(int start, int end) { +int SSLFilter::ProcessWriteEncryptedBuffer(int start, int end) { int length = end - start; int bytes_processed = 0; if (length > 0) { - uint8_t* buffer = buffers_[kWriteEncrypted]; - const uint8_t* buf1; - const uint8_t* buf2; - unsigned int len1; - unsigned int len2; - memio_Private* secret = memio_GetSecret(filter_); - int status = memio_GetWriteParams(secret, &buf1, &len1, &buf2, &len2); - if (status != 0) { - return -1; - } - int bytes_to_send = - dart::Utils::Minimum(len1, static_cast(length)); - if (bytes_to_send > 0) { - memmove(buffer + start, buf1, bytes_to_send); - bytes_processed = bytes_to_send; - } - bytes_to_send = dart::Utils::Minimum(len2, - static_cast(length - bytes_processed)); - if (bytes_to_send > 0) { - memmove(buffer + start + bytes_processed, buf2, bytes_to_send); - bytes_processed += bytes_to_send; - } - if (bytes_processed > 0) { - memio_PutWriteResult(secret, bytes_processed); + bytes_processed = BIO_read(socket_side_, + buffers_[kWriteEncrypted] + start, + length); + if (bytes_processed < 0) { + if (SSL_LOG_DATA) Log::Print( + "WriteEncrypted BIO_read returned error %d\n", bytes_processed); + return 0; + } else { + if (SSL_LOG_DATA) Log::Print( + "WriteEncrypted BIO_read wrote %d bytes\n", bytes_processed); } } return bytes_processed; diff --git a/runtime/bin/secure_socket.h b/runtime/bin/secure_socket.h index 5deccc1e939..cd3611ad0c5 100644 --- a/runtime/bin/secure_socket.h +++ b/runtime/bin/secure_socket.h @@ -5,19 +5,19 @@ #ifndef BIN_SECURE_SOCKET_H_ #define BIN_SECURE_SOCKET_H_ +#ifdef DART_IO_SECURE_SOCKET_DISABLED +#error "secure_socket.h can only be included on builds with SSL enabled" +#endif + #include #include #include #include -#if !defined(DART_IO_SECURE_SOCKET_DISABLED) -#include -#include -#include -#include -#else -struct PRFileDesc; -#endif +#include +#include +#include +#include #include "bin/builtin.h" #include "bin/dartutils.h" @@ -28,6 +28,10 @@ struct PRFileDesc; namespace dart { namespace bin { +/* These are defined in root_certificates.cc. */ +extern const unsigned char* root_certificates_pem; +extern unsigned int root_certificates_pem_length; + /* * SSLFilter encapsulates the NSS SSL(TLS) code in a filter, that communicates * with the containing _SecureFilterImpl Dart object through four shared @@ -49,20 +53,18 @@ class SSLFilter { SSLFilter() : callback_error(NULL), + ssl_(NULL), string_start_(NULL), string_length_(NULL), handshake_complete_(NULL), bad_certificate_callback_(NULL), in_handshake_(false), - client_certificate_name_(NULL), - filter_(NULL) { } + hostname_(NULL) { } void Init(Dart_Handle dart_this); - void Connect(const char* host, - const RawAddr& raw_addr, - int port, + void Connect(const char* hostname, + SSL_CTX* context, bool is_server, - const char* certificate_name, bool request_client_certificate, bool require_client_certificate, bool send_client_certificate, @@ -78,27 +80,29 @@ class SSLFilter { Dart_Handle bad_certificate_callback() { return Dart_HandleFromPersistent(bad_certificate_callback_); } - intptr_t ProcessReadPlaintextBuffer(int start, int end); - intptr_t ProcessWritePlaintextBuffer(int start1, int end1, - int start2, int end2); - intptr_t ProcessReadEncryptedBuffer(int start, int end); - intptr_t ProcessWriteEncryptedBuffer(int start, int end); + int ProcessReadPlaintextBuffer(int start, int end); + int ProcessWritePlaintextBuffer(int start, int end); + int ProcessReadEncryptedBuffer(int start, int end); + int ProcessWriteEncryptedBuffer(int start, int end); bool ProcessAllBuffers(int starts[kNumBuffers], int ends[kNumBuffers], bool in_handshake); Dart_Handle PeerCertificate(); - static void InitializeLibrary(const char* certificate_database, - const char* password, - bool use_builtin_root_certificates, - bool report_duplicate_initialization = true); + static void InitializeLibrary(); Dart_Handle callback_error; static CObject* ProcessFilterRequest(const CObjectArray& request); + // The index of the external data field in _ssl that points to the SSLFilter. + static int filter_ssl_index; + + // TODO(whesse): make private: + SSL* ssl_; + BIO* socket_side_; + + private: - static const int kMemioBufferSize = 20 * KB; static bool library_initialized_; - static const char* password_; static Mutex* mutex_; // To protect library initialization. uint8_t* buffers_[kNumBuffers]; @@ -111,8 +115,8 @@ class SSLFilter { Dart_PersistentHandle bad_certificate_callback_; bool in_handshake_; bool is_server_; - char* client_certificate_name_; - PRFileDesc* filter_; + char* hostname_; + X509_VERIFY_PARAM* certificate_checking_parameters_; static bool isBufferEncrypted(int i) { return static_cast(i) >= kFirstEncrypted; diff --git a/runtime/bin/secure_socket_patch.dart b/runtime/bin/secure_socket_patch.dart index 4e22a1440f6..c8b072599bf 100644 --- a/runtime/bin/secure_socket_patch.dart +++ b/runtime/bin/secure_socket_patch.dart @@ -5,11 +5,6 @@ patch class SecureSocket { /* patch */ factory SecureSocket._(RawSecureSocket rawSocket) => new _SecureSocket(rawSocket); - - /* patch */ static void initialize({String database, - String password, - bool useBuiltinRoots: true}) - native "SecureSocket_InitializeLibrary"; } @@ -17,6 +12,9 @@ patch class _SecureFilter { /* patch */ factory _SecureFilter() => new _SecureFilterImpl(); } +patch class X509Certificate { + /* patch */ factory X509Certificate._() => new _X509CertificateImpl(); +} class _SecureSocket extends _Socket implements SecureSocket { _SecureSocket(RawSecureSocket raw) : super(raw); @@ -79,10 +77,8 @@ class _SecureFilterImpl } void connect(String hostName, - Uint8List sockaddrStorage, - int port, + SecurityContext context, bool is_server, - String certificateName, bool requestClientCertificate, bool requireClientCertificate, bool sendClientCertificate, @@ -119,3 +115,68 @@ class _SecureFilterImpl List<_ExternalBuffer> buffers; } + +patch class SecurityContext { + /* patch */ factory SecurityContext() { + return new _SecurityContext(); + } + + /* patch */ static SecurityContext get defaultContext { + return _SecurityContext.defaultContext; + } +} + +class _SecurityContext + extends NativeFieldWrapperClass1 + implements SecurityContext { + _SecurityContext() { + _createNativeContext(); + } + + void _createNativeContext() native "SecurityContext_Allocate"; + + static final SecurityContext defaultContext = + new _SecurityContext().._trustBuiltinRoots(); + + void usePrivateKey(String keyFile, {String password}) + native "SecurityContext_UsePrivateKey"; + void setTrustedCertificates({String file, String directory}) + native "SecurityContext_SetTrustedCertificates"; + void useCertificateChain(String file) + native "SecurityContext_UseCertificateChain"; + void setClientAuthorities(String file) + native "SecurityContext_SetClientAuthorities"; + void setAlpnProtocols(List protocols, bool isServer) { + Uint8List encodedProtocols = + SecurityContext._protocolsToLengthEncoding(protocols); + _setAlpnProtocols(encodedProtocols, isServer); + } + void _setAlpnProtocols(Uint8List protocols, bool isServer) + native "SecurityContext_SetAlpnProtocols"; + void _trustBuiltinRoots() + native "SecurityContext_TrustBuiltinRoots"; +} + +/** + * _X509CertificateImpl wraps an X509 certificate object held by the BoringSSL + * library. It exposes the fields of the certificate object. + */ +class _X509CertificateImpl extends NativeFieldWrapperClass1 + implements X509Certificate { + // The native field must be set manually on a new object, in native code. + // This is done by WrappedX509 in secure_socket.cc. + _X509CertificateImpl(); + + String get subject native "X509_Subject"; + String get issuer native "X509_Issuer"; + DateTime get startValidity { + return new DateTime.fromMillisecondsSinceEpoch(_startValidity(), + isUtc: true); + } + DateTime get endValidity { + return new DateTime.fromMillisecondsSinceEpoch(_endValidity(), + isUtc: true); + } + int _startValidity() native "X509_StartValidity"; + int _endValidity() native "X509_EndValidity"; +} \ No newline at end of file diff --git a/runtime/bin/secure_socket_unsupported.cc b/runtime/bin/secure_socket_unsupported.cc index 231b71e06c2..6bcc55e90c6 100644 --- a/runtime/bin/secure_socket_unsupported.cc +++ b/runtime/bin/secure_socket_unsupported.cc @@ -99,6 +99,66 @@ void FUNCTION_NAME(SecureSocket_NewServicePort)(Dart_NativeArguments args) { "Secure Sockets unsupported on this platform")); } +void FUNCTION_NAME(SecurityContext_Allocate)(Dart_NativeArguments args) { + Dart_ThrowException(DartUtils::NewDartArgumentError( + "Secure Sockets unsupported on this platform")); +} + +void FUNCTION_NAME(SecurityContext_UsePrivateKey)(Dart_NativeArguments args) { + Dart_ThrowException(DartUtils::NewDartArgumentError( + "Secure Sockets unsupported on this platform")); +} + +void FUNCTION_NAME(SecurityContext_SetAlpnProtocols)( + Dart_NativeArguments args) { + Dart_ThrowException(DartUtils::NewDartArgumentError( + "Secure Sockets unsupported on this platform")); +} + +void FUNCTION_NAME(SecurityContext_SetClientAuthorities)( + Dart_NativeArguments args) { + Dart_ThrowException(DartUtils::NewDartArgumentError( + "Secure Sockets unsupported on this platform")); +} + +void FUNCTION_NAME(SecurityContext_SetTrustedCertificates)( + Dart_NativeArguments args) { + Dart_ThrowException(DartUtils::NewDartArgumentError( + "Secure Sockets unsupported on this platform")); +} + +void FUNCTION_NAME(SecurityContext_TrustBuiltinRoots)( + Dart_NativeArguments args) { + Dart_ThrowException(DartUtils::NewDartArgumentError( + "Secure Sockets unsupported on this platform")); +} + +void FUNCTION_NAME(SecurityContext_UseCertificateChain)( + Dart_NativeArguments args) { + Dart_ThrowException(DartUtils::NewDartArgumentError( + "Secure Sockets unsupported on this platform")); +} + +void FUNCTION_NAME(X509_Subject)(Dart_NativeArguments args) { + Dart_ThrowException(DartUtils::NewDartArgumentError( + "Secure Sockets unsupported on this platform")); +} + +void FUNCTION_NAME(X509_Issuer)(Dart_NativeArguments args) { + Dart_ThrowException(DartUtils::NewDartArgumentError( + "Secure Sockets unsupported on this platform")); +} + +void FUNCTION_NAME(X509_StartValidity)(Dart_NativeArguments args) { + Dart_ThrowException(DartUtils::NewDartArgumentError( + "Secure Sockets unsupported on this platform")); +} + +void FUNCTION_NAME(X509_EndValidity)(Dart_NativeArguments args) { + Dart_ThrowException(DartUtils::NewDartArgumentError( + "Secure Sockets unsupported on this platform")); +} + class SSLFilter { public: static CObject* ProcessFilterRequest(const CObjectArray& request); diff --git a/runtime/bin/utils.h b/runtime/bin/utils.h index 10ca37353c0..bb55a76ffbb 100644 --- a/runtime/bin/utils.h +++ b/runtime/bin/utils.h @@ -20,7 +20,7 @@ class OSError { enum SubSystem { kSystem, kGetAddressInfo, - kNSS, + kBoringSSL, kUnknown = -1 }; diff --git a/sdk/lib/_internal/js_runtime/lib/io_patch.dart b/sdk/lib/_internal/js_runtime/lib/io_patch.dart index 792b74ddc41..23f4708da2f 100644 --- a/sdk/lib/_internal/js_runtime/lib/io_patch.dart +++ b/sdk/lib/_internal/js_runtime/lib/io_patch.dart @@ -403,12 +403,26 @@ class SecureSocket { factory SecureSocket._(RawSecureSocket rawSocket) { throw new UnsupportedError("SecureSocket constructor"); } +} + +@patch +class SecurityContext { + @patch + factory SecurityContext() { + throw new UnsupportedError("SecurityContext constructor"); + } @patch - static void initialize({String database, - String password, - bool useBuiltinRoots: true}) { - throw new UnsupportedError("SecureSocket.initialize"); + static SecurityContext get defaultContext { + throw new UnsupportedError("default SecurityContext getter"); + } +} + +@patch +class X509Certificate { + @patch + factory X509Certificate._() { + throw new UnsupportedError("X509Certificate constructor"); } } diff --git a/sdk/lib/io/http.dart b/sdk/lib/io/http.dart index befa0435316..dd715599bed 100644 --- a/sdk/lib/io/http.dart +++ b/sdk/lib/io/http.dart @@ -93,23 +93,28 @@ abstract class HttpStatus { * * Use [bindSecure] to create an HTTPS server. * - * The server presents a certificate to the client. In the following - * example, the certificate is named `localhost_cert` and comes from - * the database found in the `pkcert` directory. + * The server presents a certificate to the client. The certificate + * chain and the private key are set in the SecurityContext + * object that is passed to [bindSecure]. * * import 'dart:io'; * import "dart:isolate"; * * main() { - * var testPkcertDatabase = Platform.script.resolve('pkcert') - * .toFilePath(); - * SecureSocket.initialize(database: testPkcertDatabase, - * password: 'dartdart'); + * SecurityContext context = new SecurityContext(); + * var chain = + * Platform.script.resolve('certificates/server_chain.pem') + * .toFilePath(); + * var key = + * Platform.script.resolve('certificates/server_key.pem') + * .toFilePath(); + * context.useCertificateChain(chain); + * context.usePrivateKey(key, password: 'dartdart'); * * HttpServer * .bindSecure(InternetAddress.ANY_IP_V6, * 443, - * certificateName: 'localhost_cert') + * context) * .then((server) { * server.listen((HttpRequest request) { * request.response.write('Hello, world!'); @@ -118,10 +123,8 @@ abstract class HttpStatus { * }); * } * - * The certificate database is managed using the Mozilla certutil tool (see - * [NSS Tools certutil](https://developer.mozilla.org/en-US/docs/NSS/tools/NSS_Tools_certutil)). - * Dart uses the NSS library to handle SSL, and the Mozilla certutil - * must be used to manipulate the certificate database. + * The certificates and keys are pem files, which can be created and + * managed with the tools in OpenSSL and BoringSSL. * * ## Connect to a server socket * @@ -291,6 +294,7 @@ abstract class HttpServer implements Stream { static Future bindSecure(address, int port, + SecurityContext context, {int backlog: 0, bool v6Only: false, String certificateName, @@ -298,6 +302,7 @@ abstract class HttpServer implements Stream { bool shared: false}) => _HttpServer.bindSecure(address, port, + context, backlog, v6Only, certificateName, @@ -1331,7 +1336,7 @@ abstract class HttpClient { */ String userAgent; - factory HttpClient() => new _HttpClient(); + factory HttpClient({SecurityContext context}) => new _HttpClient(context); /** * Opens a HTTP connection. diff --git a/sdk/lib/io/http_impl.dart b/sdk/lib/io/http_impl.dart index 49b6c7b46fc..880d771a6af 100644 --- a/sdk/lib/io/http_impl.dart +++ b/sdk/lib/io/http_impl.dart @@ -1256,6 +1256,7 @@ class _HttpClientConnection { final String key; final Socket _socket; final bool _proxyTunnel; + final SecurityContext _context; final _HttpParser _httpParser; StreamSubscription _subscription; final _HttpClient _httpClient; @@ -1268,7 +1269,7 @@ class _HttpClientConnection { Future _streamFuture; _HttpClientConnection(this.key, this._socket, this._httpClient, - [this._proxyTunnel = false]) + [this._proxyTunnel = false, this._context]) : _httpParser = new _HttpParser.responseParser() { _httpParser.listenToStream(_socket); @@ -1496,7 +1497,10 @@ class _HttpClientConnection { } var socket = response._httpRequest._httpClientConnection._socket; return SecureSocket.secure( - socket, host: host, onBadCertificate: callback); + socket, + host: host, + context: _context, + onBadCertificate: callback); }) .then((secureSocket) { String key = _HttpClientConnection.makeKey(true, host, port); @@ -1543,12 +1547,17 @@ class _ConnectionTarget { final String host; final int port; final bool isSecure; + final SecurityContext context; final Set<_HttpClientConnection> _idle = new HashSet(); final Set<_HttpClientConnection> _active = new HashSet(); final Queue _pending = new ListQueue(); int _connecting = 0; - _ConnectionTarget(this.key, this.host, this.port, this.isSecure); + _ConnectionTarget(this.key, + this.host, + this.port, + this.isSecure, + this.context); bool get isEmpty => _idle.isEmpty && _active.isEmpty && _connecting == 0; @@ -1620,12 +1629,13 @@ class _ConnectionTarget { return completer.future; } var currentBadCertificateCallback = client._badCertificateCallback; - bool callback(X509Certificate certificate) => + callback(X509Certificate certificate) => currentBadCertificateCallback == null ? false : currentBadCertificateCallback(certificate, uriHost, uriPort); Future socketFuture = (isSecure && proxy.isDirect ? SecureSocket.connect(host, port, + context: context, sendClientCertificate: true, onBadCertificate: callback) : Socket.connect(host, port)); @@ -1633,7 +1643,8 @@ class _ConnectionTarget { return socketFuture.then((socket) { _connecting--; socket.setOption(SocketOption.TCP_NODELAY, true); - var connection = new _HttpClientConnection(key, socket, client); + var connection = + new _HttpClientConnection(key, socket, client, false, context); if (isSecure && !proxy.isDirect) { connection._dispose = true; return connection.createProxyTunnel(uriHost, uriPort, proxy, callback) @@ -1662,6 +1673,7 @@ class _HttpClient implements HttpClient { = new HashMap(); final List<_Credentials> _credentials = []; final List<_ProxyCredentials> _proxyCredentials = []; + final SecurityContext _context; Function _authenticate; Function _authenticateProxy; Function _findProxy = HttpClient.findProxyFromEnvironment; @@ -1676,6 +1688,8 @@ class _HttpClient implements HttpClient { String userAgent = _getHttpVersion(); + _HttpClient(SecurityContext this._context); + void set idleTimeout(Duration timeout) { _idleTimeout = timeout; for (var c in _connectionTargets.values) { @@ -1894,8 +1908,9 @@ class _HttpClient implements HttpClient { _ConnectionTarget _getConnectionTarget(String host, int port, bool isSecure) { String key = _HttpClientConnection.makeKey(isSecure, host, port); - return _connectionTargets.putIfAbsent( - key, () => new _ConnectionTarget(key, host, port, isSecure)); + return _connectionTargets.putIfAbsent(key, () { + return new _ConnectionTarget(key, host, port, isSecure, _context); + }); } // Get a new _HttpClientConnection, from the matching _ConnectionTarget. @@ -2207,6 +2222,7 @@ class _HttpServer static Future bindSecure(address, int port, + SecurityContext context, int backlog, bool v6Only, String certificate_name, @@ -2215,7 +2231,7 @@ class _HttpServer return SecureServerSocket.bind( address, port, - certificate_name, + context, backlog: backlog, v6Only: v6Only, requestClientCertificate: requestClientCertificate, diff --git a/sdk/lib/io/io.dart b/sdk/lib/io/io.dart index c066b301730..b3ca21445f7 100644 --- a/sdk/lib/io/io.dart +++ b/sdk/lib/io/io.dart @@ -232,11 +232,12 @@ part 'link.dart'; part 'platform.dart'; part 'platform_impl.dart'; part 'process.dart'; +part 'secure_server_socket.dart'; +part 'secure_socket.dart'; +part 'security_context.dart'; part 'service_object.dart'; part 'socket.dart'; part 'stdio.dart'; part 'string_transformer.dart'; -part 'secure_socket.dart'; -part 'secure_server_socket.dart'; part 'websocket.dart'; part 'websocket_impl.dart'; diff --git a/sdk/lib/io/iolib_sources.gypi b/sdk/lib/io/iolib_sources.gypi index 96900f7399f..bb8848c9c29 100644 --- a/sdk/lib/io/iolib_sources.gypi +++ b/sdk/lib/io/iolib_sources.gypi @@ -27,11 +27,12 @@ 'platform_impl.dart', 'process.dart', 'service_object.dart', + 'secure_server_socket.dart', + 'secure_socket.dart', + 'security_context.dart', 'socket.dart', 'stdio.dart', 'string_transformer.dart', - 'secure_socket.dart', - 'secure_server_socket.dart', 'websocket.dart', 'websocket_impl.dart', ], diff --git a/sdk/lib/io/secure_server_socket.dart b/sdk/lib/io/secure_server_socket.dart index c3ce9cfb49f..04ec9e01040 100644 --- a/sdk/lib/io/secure_server_socket.dart +++ b/sdk/lib/io/secure_server_socket.dart @@ -45,12 +45,6 @@ class SecureServerSocket extends Stream { * * [address] must be given as a numeric address, not a host name. * - * [certificateName] is the nickname or the distinguished name (DN) of - * the certificate in the certificate database. It is looked up in the - * NSS certificate database set by SecureSocket.initialize. - * If [certificateName] contains "CN=", it is assumed to be a distinguished - * name. Otherwise, it is looked up as a nickname. - * * To request or require that clients authenticate by providing an SSL (TLS) * client certificate, set the optional parameter [requestClientCertificate] * or [requireClientCertificate] to true. Requiring a certificate implies @@ -70,7 +64,7 @@ class SecureServerSocket extends Stream { static Future bind( address, int port, - String certificateName, + SecurityContext context, {int backlog: 0, bool v6Only: false, bool requestClientCertificate: false, @@ -80,7 +74,7 @@ class SecureServerSocket extends Stream { return RawSecureServerSocket.bind( address, port, - certificateName, + context, backlog: backlog, v6Only: v6Only, requestClientCertificate: requestClientCertificate, @@ -128,21 +122,20 @@ class SecureServerSocket extends Stream { * See [RawSecureSocket] for more info. */ class RawSecureServerSocket extends Stream { - RawServerSocket _socket; + final RawServerSocket _socket; StreamController _controller; StreamSubscription _subscription; - final String certificateName; + final SecurityContext _context; final bool requestClientCertificate; final bool requireClientCertificate; final List supportedProtocols; bool _closed = false; - RawSecureServerSocket._(RawServerSocket serverSocket, - this.certificateName, + RawSecureServerSocket._(this._socket, + this._context, this.requestClientCertificate, this.requireClientCertificate, this.supportedProtocols) { - _socket = serverSocket; _controller = new StreamController( sync: true, onListen: _onSubscriptionStateChange, @@ -205,7 +198,7 @@ class RawSecureServerSocket extends Stream { static Future bind( address, int port, - String certificateName, + SecurityContext context, {int backlog: 0, bool v6Only: false, bool requestClientCertificate: false, @@ -216,7 +209,7 @@ class RawSecureServerSocket extends Stream { address, port, backlog: backlog, v6Only: v6Only, shared: shared) .then((serverSocket) => new RawSecureServerSocket._( serverSocket, - certificateName, + context, requestClientCertificate, requireClientCertificate, supportedProtocols)); @@ -263,7 +256,7 @@ class RawSecureServerSocket extends Stream { _RawSecureSocket.connect( connection.address, remotePort, - certificateName, + context: _context, is_server: true, socket: connection, requestClientCertificate: requestClientCertificate, diff --git a/sdk/lib/io/secure_socket.dart b/sdk/lib/io/secure_socket.dart index 6b36ebaeac0..468e1bd4465 100644 --- a/sdk/lib/io/secure_socket.dart +++ b/sdk/lib/io/secure_socket.dart @@ -18,16 +18,6 @@ abstract class SecureSocket implements Socket { * [host] on port [port]. The returned Future will complete with a * [SecureSocket] that is connected and ready for subscription. * - * If [sendClientCertificate] is set to true, the socket will send a client - * certificate if one is requested by the server. - * - * If [certificateName] is the nickname of a certificate in the certificate - * database, that certificate will be sent. - * - * If [certificateName] is null, which is the usual use case, an - * appropriate certificate will be searched for in the database and - * sent automatically, based on what the server says it will accept. - * * [onBadCertificate] is an optional handler for unverifiable certificates. * The handler receives the [X509Certificate], and can inspect it and * decide (or let the user decide) whether to accept @@ -37,14 +27,13 @@ abstract class SecureSocket implements Socket { static Future connect( host, int port, - {bool sendClientCertificate: false, - String certificateName, + {SecurityContext context, bool onBadCertificate(X509Certificate certificate), + bool sendClientCertificate, List supportedProtocols}) { return RawSecureSocket.connect(host, port, - sendClientCertificate: sendClientCertificate, - certificateName: certificateName, + context: context, onBadCertificate: onBadCertificate, supportedProtocols: supportedProtocols) .then((rawSocket) => new SecureSocket._(rawSocket)); @@ -79,8 +68,7 @@ abstract class SecureSocket implements Socket { static Future secure( Socket socket, {host, - bool sendClientCertificate: false, - String certificateName, + SecurityContext context, bool onBadCertificate(X509Certificate certificate)}) { var completer = new Completer(); (socket as dynamic)._detachRaw() @@ -89,7 +77,7 @@ abstract class SecureSocket implements Socket { detachedRaw[0], subscription: detachedRaw[1], host: host, - sendClientCertificate: sendClientCertificate, + context: context, onBadCertificate: onBadCertificate); }) .then((raw) { @@ -121,7 +109,7 @@ abstract class SecureSocket implements Socket { */ static Future secureServer( Socket socket, - String certificateName, + SecurityContext context, {List bufferedData, bool requestClientCertificate: false, bool requireClientCertificate: false, @@ -131,7 +119,7 @@ abstract class SecureSocket implements Socket { .then((detachedRaw) { return RawSecureSocket.secureServer( detachedRaw[0], - certificateName, + context, subscription: detachedRaw[1], bufferedData: bufferedData, requestClientCertificate: requestClientCertificate, @@ -168,52 +156,6 @@ abstract class SecureSocket implements Socket { void renegotiate({bool useSessionCache: true, bool requestClientCertificate: false, bool requireClientCertificate: false}); - - /** - * Initializes the NSS library. If [initialize] is not called, the library - * is automatically initialized as if [initialize] were called with no - * arguments. If [initialize] is called more than once, or called after - * automatic initialization has happened (when a secure connection is made), - * then a TlsException is thrown. - * - * The optional argument [database] is the path to a certificate database - * directory containing root certificates for verifying certificate paths on - * client connections, and server certificates to provide on server - * connections. The argument [password] should be used when creating - * secure server sockets, to allow the private key of the server - * certificate to be fetched. If [useBuiltinRoots] is true (the default), - * then a built-in set of root certificates for trusted certificate - * authorities is merged with the certificates in the database. - * The list of built-in root certificates, and documentation about this - * default database, is available at - * http://www.mozilla.org/projects/security/certs/included/ . - * - * If the [database] argument is omitted, then only the - * builtin root certificates are used. If [useBuiltinRoots] is also false, - * then no certificates are available. - * - * Examples: - * 1) Use only the builtin root certificates: - * SecureSocket.initialize(); or - * - * 2) Use a specified database directory and the builtin roots: - * SecureSocket.initialize(database: 'path/to/my/database', - * password: 'my_password'); - * - * 3) Use a specified database directory, without builtin roots: - * SecureSocket.initialize(database: 'path/to/my/database', - * password: 'my_password'. - * useBuiltinRoots: false); - * - * The database should be an NSS certificate database directory - * containing a cert9.db file, not a cert8.db file. This version of - * the database can be created using the NSS certutil tool with "sql:" in - * front of the absolute path of the database directory, or setting the - * environment variable [[NSS_DEFAULT_DB_TYPE]] to "sql". - */ - external static void initialize({String database, - String password, - bool useBuiltinRoots: true}); } @@ -224,7 +166,7 @@ abstract class SecureSocket implements Socket { * RawSecureServerSocket, also returns RawSecureSocket objects representing * the server end of a secure connection. * The certificate provided by the server is checked - * using the certificate database provided in SecureSocket.initialize, and/or + * using the trusted certificates set in the SecurityContext object and/or * the default built-in root certificates. */ abstract class RawSecureSocket implements RawSocket { @@ -233,8 +175,9 @@ abstract class RawSecureSocket implements RawSocket { * host on the given port. The returned Future is completed with the * RawSecureSocket when it is connected and ready for subscription. * - * The certificate provided by the server is checked using the certificate - * database provided in [SecureSocket.initialize], and/or the default built-in + * The certificate provided by the server is checked + * using the trusted certificates set in the SecurityContext object and/or + * the default built-in * root certificates. If [sendClientCertificate] is * set to true, the socket will send a client certificate if one is * requested by the server. If [certificateName] is the nickname of @@ -252,24 +195,20 @@ abstract class RawSecureSocket implements RawSocket { static Future connect( host, int port, - {bool sendClientCertificate: false, - String certificateName, + {SecurityContext context, bool onBadCertificate(X509Certificate certificate), List supportedProtocols}) { _RawSecureSocket._verifyFields( host, port, - certificateName, false, false, false, - sendClientCertificate, onBadCertificate); return RawSocket.connect(host, port) .then((socket) { return secure(socket, - sendClientCertificate: sendClientCertificate, - certificateName: certificateName, + context: context, onBadCertificate: onBadCertificate, supportedProtocols: supportedProtocols); }); @@ -307,8 +246,7 @@ abstract class RawSecureSocket implements RawSocket { RawSocket socket, {StreamSubscription subscription, host, - bool sendClientCertificate: false, - String certificateName, + SecurityContext context, bool onBadCertificate(X509Certificate certificate), List supportedProtocols}) { socket.readEventsEnabled = false; @@ -316,11 +254,10 @@ abstract class RawSecureSocket implements RawSocket { return _RawSecureSocket.connect( host != null ? host : socket.address.host, socket.port, - certificateName, is_server: false, socket: socket, subscription: subscription, - sendClientCertificate: sendClientCertificate, + context: context, onBadCertificate: onBadCertificate, supportedProtocols: supportedProtocols); } @@ -350,7 +287,7 @@ abstract class RawSecureSocket implements RawSocket { */ static Future secureServer( RawSocket socket, - String certificateName, + SecurityContext context, {StreamSubscription subscription, List bufferedData, bool requestClientCertificate: false, @@ -361,7 +298,7 @@ abstract class RawSecureSocket implements RawSocket { return _RawSecureSocket.connect( socket.address, socket.remotePort, - certificateName, + context: context, is_server: true, socket: socket, subscription: subscription, @@ -402,15 +339,13 @@ abstract class RawSecureSocket implements RawSocket { * X509Certificate represents an SSL certificate, with accessors to * get the fields of the certificate. */ -class X509Certificate { - X509Certificate(this.subject, - this.issuer, - this.startValidity, - this.endValidity); - final String subject; - final String issuer; - final DateTime startValidity; - final DateTime endValidity; +abstract class X509Certificate { + external factory X509Certificate._(); + + String get subject; + String get issuer; + DateTime get startValidity; + DateTime get endValidity; } @@ -456,10 +391,9 @@ class _RawSecureSocket extends Stream int _bufferedDataIndex = 0; final InternetAddress address; final bool is_server; - final String certificateName; + SecurityContext context; final bool requestClientCertificate; final bool requireClientCertificate; - final bool sendClientCertificate; final Function onBadCertificate; var _status = HANDSHAKE; @@ -484,8 +418,8 @@ class _RawSecureSocket extends Stream static Future<_RawSecureSocket> connect( host, int requestedPort, - String certificateName, {bool is_server, + SecurityContext context, RawSocket socket, StreamSubscription subscription, List bufferedData, @@ -494,22 +428,21 @@ class _RawSecureSocket extends Stream bool sendClientCertificate: false, bool onBadCertificate(X509Certificate certificate), List supportedProtocols}) { - _verifyFields(host, requestedPort, certificateName, is_server, + _verifyFields(host, requestedPort, is_server, requestClientCertificate, requireClientCertificate, - sendClientCertificate, onBadCertificate); + onBadCertificate); if (host is InternetAddress) host = host.host; var address = socket.address; if (host != null) address = address._cloneWithNewHost(host); return new _RawSecureSocket(address, requestedPort, - certificateName, is_server, + context, socket, subscription, bufferedData, requestClientCertificate, requireClientCertificate, - sendClientCertificate, onBadCertificate, supportedProtocols) ._handshakeComplete.future; @@ -518,16 +451,18 @@ class _RawSecureSocket extends Stream _RawSecureSocket( this.address, int requestedPort, - this.certificateName, this.is_server, + this.context, RawSocket this._socket, this._socketSubscription, this._bufferedData, this.requestClientCertificate, this.requireClientCertificate, - this.sendClientCertificate, this.onBadCertificate(X509Certificate certificate), List supportedProtocols) { + if (context == null) { + context = SecurityContext.defaultContext; + } _controller = new StreamController( sync: true, onListen: _onSubscriptionStateChange, @@ -570,144 +505,24 @@ class _RawSecureSocket extends Stream ..onDone(_doneHandler); } try { + var encodedProtocols = + SecurityContext._protocolsToLengthEncoding(supportedProtocols); _secureFilter.connect(address.host, - (address as dynamic)._in_addr, - port, + context, is_server, - certificateName, requestClientCertificate || requireClientCertificate, requireClientCertificate, - sendClientCertificate, - _protocolsToLengthEncoding(supportedProtocols)); + // TODO(whesse): Remove sendClientCertificate + // argument, or add it to API. + false, // sendClientCertificate, + encodedProtocols); _secureHandshake(); } catch (e, s) { _reportError(e, s); } } - /// Encodes a set of supported protocols for ALPN/NPN usage. - /// - /// The `protocols` list is expected to contain protocols in descending order - /// of preference. - /// - /// See RFC 7301 (https://tools.ietf.org/html/rfc7301) for the encoding of - /// `List protocols`: - /// opaque ProtocolName<1..2^8-1>; - /// - /// struct { - /// ProtocolName protocol_name_list<2..2^16-1> - /// } ProtocolNameList; - /// - /// The encoding of the opaque `ProtocolName` vector is - /// described in RFC 2246: 4.3 Vectors. - /// - /// Note: Even though this encoding scheme would allow a total - /// `ProtocolNameList` length of 65535, this limit cannot be reached. Testing - /// showed that more than ~ 65480 bytes will fail to negogiate a protocol. - /// We will be conservative and support only messages up to (1<<15) -1 bytes. - /// - /// Our NSS implementation will support ALPN and NPN transparently. The - /// default protocol will be the first in the encoded Uint8List. - /// - /// NOTE: The NSS library will treat the first protocol as the fallback - /// protocol. The remaining ones are sorted in (decreasing) priority order. - /// We therefore put the protocol least desired to the front, to make it the - /// default. - Uint8List _protocolsToLengthEncoding(List protocols) { - if (protocols == null || protocols.length == 0) { - return new Uint8List(0); - } - int protocolsLength = protocols.length; - - // Calculate the number of bytes we will need if it is ASCII. - int expectedLength = protocolsLength; - for (int i = 0; i < protocolsLength; i++) { - int length = protocols[i].length; - if (length > 0 && length <= 255) { - expectedLength += length; - } else { - throw new ArgumentError( - 'Length of protocol must be between 1 and 255 (was: $length).'); - } - } - - if (expectedLength >= (1 << 15)) { - throw new ArgumentError( - 'The maximum message length supported is 2^15-1.'); - } - - // Try encoding the `List protocols` array using fast ASCII path. - var bytes = new Uint8List(expectedLength); - int bytesOffset = 0; - for (int i = 0; i < protocolsLength; i++) { - // The last protocol will be encoded as the first/default one in the list. - // (i.e. rotate `protocols` by 1 to the right). - int index = i; - if (index == 0) index = protocols.length; - String proto = protocols[index - 1]; - - // Add length byte. - bytes[bytesOffset++] = proto.length; - int bits = 0; - - // Add protocol bytes. - for (int j = 0; j < proto.length; j++) { - var char = proto.codeUnitAt(j); - bits |= char; - bytes[bytesOffset++] = char & 0xff; - } - - // Go slow case if we have encountered anything non-ascii. - if (bits > 0x7f) { - return _protocolsToLengthEncodingNonAsciiBailout(protocols); - } - } - return bytes; - } - - Uint8List _protocolsToLengthEncodingNonAsciiBailout(List protocols) { - void addProtocol(List outBytes, String protocol) { - var protocolBytes = UTF8.encode(protocol); - var len = protocolBytes.length; - - if (len > 255) { - throw new ArgumentError( - 'Length of protocol must be between 1 and 255 (was: $len)'); - } - // Add length byte. - outBytes.add(len); - - // Add protocol bytes. - outBytes.addAll(protocolBytes); - } - - List bytes = []; - addProtocol(bytes, protocols.last); - for (var i = 0; i < protocols.length -1; i++) { - addProtocol(bytes, protocols[i]); - } - - if (bytes.length >= (1 << 15)) { - throw new ArgumentError( - 'The maximum message length supported is 2^15-1.'); - } - - return new Uint8List.fromList(bytes); - } - - void _addProtocolBytes(List outBytes, String protocol) { - var protocolBytes = UTF8.encode(protocol); - var len = protocolBytes.length; - - if (len > 255) { - throw new ArgumentError( - 'Cannot support protocols with more than 255 characters'); - } - outBytes.add(len); - outBytes.addAll(protocolBytes); - } - StreamSubscription listen(void onData(RawSocketEvent data), {Function onError, void onDone(), @@ -721,11 +536,9 @@ class _RawSecureSocket extends Stream static void _verifyFields(host, int requestedPort, - String certificateName, bool is_server, bool requestClientCertificate, bool requireClientCertificate, - bool sendClientCertificate, Function onBadCertificate) { if (host is! String && host is! InternetAddress) { throw new ArgumentError("host is not a String or an InternetAddress"); @@ -736,21 +549,12 @@ class _RawSecureSocket extends Stream if (requestedPort < 0 || requestedPort > 65535) { throw new ArgumentError("requestedPort is not in the range 0..65535"); } - if (certificateName != null && certificateName is! String) { - throw new ArgumentError("certificateName is not null or a String"); - } - if (certificateName == null && is_server) { - throw new ArgumentError("certificateName is null on a server"); - } if (requestClientCertificate is! bool) { throw new ArgumentError("requestClientCertificate is not a bool"); } if (requireClientCertificate is! bool) { throw new ArgumentError("requireClientCertificate is not a bool"); } - if (sendClientCertificate is! bool) { - throw new ArgumentError("sendClientCertificate is not a bool"); - } if (onBadCertificate != null && onBadCertificate is! Function) { throw new ArgumentError("onBadCertificate is not null or a Function"); } @@ -891,7 +695,7 @@ class _RawSecureSocket extends Stream if (onBadCertificate == null) return false; var result = onBadCertificate(certificate); if (result is bool) return result; - throw new ArgumentError( + throw new HandshakeException( "onBadCertificate callback returned non-boolean $result"); } @@ -1360,10 +1164,8 @@ abstract class _SecureFilter { external factory _SecureFilter(); void connect(String hostName, - Uint8List addr, - int port, + SecurityContext context, bool is_server, - String certificateName, bool requestClientCertificate, bool requireClientCertificate, bool sendClientCertificate, diff --git a/sdk/lib/io/security_context.dart b/sdk/lib/io/security_context.dart new file mode 100644 index 00000000000..47234ffc1f9 --- /dev/null +++ b/sdk/lib/io/security_context.dart @@ -0,0 +1,184 @@ +// Copyright (c) 2015, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +part of dart.io; + +/** + * The object containing the certificates to trust when making + * a secure client connection, and the certificate chain and + * private key to serve from a secure server. + * + * The [SecureSocket] and [SecureServer] classes take a SecurityContext + * as an argument to their connect and bind methods. + * + * Certificates and keys can be added to a SecurityContext from PEM files + * on the disk. A PEM file contains one or more base-64 encoded DER-serialized + * ASN1 objects, surrounded with delimiter strings like + * "-----BEGIN CERTIFICATE -----" and "-----END CERTIFICATE-----". + * Distinguished encoding rules (DER) is a canonical binary serialization + * of ASN1 objects into an octet string. + */ +abstract class SecurityContext { + external factory SecurityContext(); + external static SecurityContext get defaultContext; + + /** + * Sets the private key for a server certificate or client certificate. + * A secure connection using this SecurityContext will use this key with + * the server or client certificate to sign and decrypt messages. + * [keyFile] is a PEM file containing an encrypted + * private key, encrypted with [password]. An unencrypted file can be + * used, but this is not usual. + */ + void usePrivateKey(String keyFile, {String password}); + + /** + * Sets the set of trusted X509 certificates used by [SecureSocket] + * client connections, when connecting to a secure server. + * + * There are two ways to set a set of trusted certificates, with a single + * PEM file, or with a directory containing individual PEM files for + * certificates. + * + * [file] is an optional PEM file containing X509 certificates, usually + * root certificates from certificate authorities. + * + * [directory] is an optional directory containing PEM files. The directory + * must also have filesystem links added, which link extra filenames based + * on the hash of a certificate's distinguished name (DN) to the file + * containing that certificate. OpenSSL contains a tool called c_rehash + * to create these links in a directory. + */ + void setTrustedCertificates({String file, String directory}); + + /** + * Sets the chain of X509 certificates served by [SecureServer] + * when making secure connections, including the server certificate. + * [file] is an PEM file containing X509 certificates, starting with + * the root authority and intermediate authorities forming the signed + * chain to the server certificate, and ending with the server certificate. + * The private key for the server certificate is set by [usePrivateKey]. + */ + void useCertificateChain(String file); + + /** + * Sets the list of authority names that a [SecureServer] will advertise + * as accepted, when requesting a client certificate from a connecting + * client. [file] is a PEM file containing the accepted signing authority + * certificates - the authority names are extracted from the certificates. + */ + void setClientAuthorities(String file); + + /** + * Sets the list of application-level protocols supported by a client + * connection or server connection. The ALPN (application level protocol + * negotiation) extension to TLS allows a client to send a list of + * protocols in the TLS client hello message, and the server to pick + * one and send the selected one back in its server hello message. + * + * Separate lists of protocols can be sent for client connections and + * for server connections, using the same SecurityContext. The [isServer] + * boolean argument specifies whether to set the list for server connections + * or client connections. + */ + void setAlpnProtocols(List protocols, bool isServer); + + /// Encodes a set of supported protocols for ALPN/NPN usage. + /// + /// The `protocols` list is expected to contain protocols in descending order + /// of preference. + /// + /// See RFC 7301 (https://tools.ietf.org/html/rfc7301) for the encoding of + /// `List protocols`: + /// opaque ProtocolName<1..2^8-1>; + /// + /// struct { + /// ProtocolName protocol_name_list<2..2^16-1> + /// } ProtocolNameList; + /// + /// The encoding of the opaque `ProtocolName` vector is + /// described in RFC 2246: 4.3 Vectors. + /// + /// Note: Even though this encoding scheme would allow a total + /// `ProtocolNameList` length of 65535, this limit cannot be reached. Testing + /// showed that more than ~ 2^14 bytes will fail to negotiate a protocol. + /// We will be conservative and support only messages up to (1<<13)-1 bytes. + static Uint8List _protocolsToLengthEncoding(List protocols) { + if (protocols == null || protocols.length == 0) { + return new Uint8List(0); + } + int protocolsLength = protocols.length; + + // Calculate the number of bytes we will need if it is ASCII. + int expectedLength = protocolsLength; + for (int i = 0; i < protocolsLength; i++) { + int length = protocols[i].length; + if (length > 0 && length <= 255) { + expectedLength += length; + } else { + throw new ArgumentError( + 'Length of protocol must be between 1 and 255 (was: $length).'); + } + } + + if (expectedLength >= (1 << 13)) { + throw new ArgumentError( + 'The maximum message length supported is 2^13-1.'); + } + + // Try encoding the `List protocols` array using fast ASCII path. + var bytes = new Uint8List(expectedLength); + int bytesOffset = 0; + for (int i = 0; i < protocolsLength; i++) { + String proto = protocols[i]; + + // Add length byte. + bytes[bytesOffset++] = proto.length; + int bits = 0; + + // Add protocol bytes. + for (int j = 0; j < proto.length; j++) { + var char = proto.codeUnitAt(j); + bits |= char; + bytes[bytesOffset++] = char & 0xff; + } + + // Go slow case if we have encountered anything non-ascii. + if (bits > 0x7f) { + return _protocolsToLengthEncodingNonAsciiBailout(protocols); + } + } + return bytes; + } + + static Uint8List _protocolsToLengthEncodingNonAsciiBailout( + List protocols) { + void addProtocol(List outBytes, String protocol) { + var protocolBytes = UTF8.encode(protocol); + var len = protocolBytes.length; + + if (len > 255) { + throw new ArgumentError( + 'Length of protocol must be between 1 and 255 (was: $len)'); + } + // Add length byte. + outBytes.add(len); + + // Add protocol bytes. + outBytes.addAll(protocolBytes); + } + + List bytes = []; + for (var i = 0; i < protocols.length; i++) { + addProtocol(bytes, protocols[i]); + } + + if (bytes.length >= (1 << 13)) { + throw new ArgumentError( + 'The maximum message length supported is 2^13-1.'); + } + + return new Uint8List.fromList(bytes); + } +} diff --git a/tests/lib/mirrors/invocation_fuzz_test.dart b/tests/lib/mirrors/invocation_fuzz_test.dart index 7a43d3cbbac..8add257d7a9 100644 --- a/tests/lib/mirrors/invocation_fuzz_test.dart +++ b/tests/lib/mirrors/invocation_fuzz_test.dart @@ -41,6 +41,12 @@ var blacklist = [ 'dart.io.SystemEncoding.decode', // Windows only 'dart.io.SystemEncoding.encode', // Windows only + // These construct an object with an uninitialized native field. + // TODO(23869): We could make this safer, but making the failure non-fatal + // would we worthless aside from this test. + 'dart.io.X509Certificate.X509Certificate._', + 'dart.io._X509Impl._X509Impl', + // Don't call private methods in dart.async as they may circumvent the zoned // error handling below. new RegExp(r"^dart\.async\._.*$"), diff --git a/tests/standalone/io/certificates/README b/tests/standalone/io/certificates/README new file mode 100644 index 00000000000..72092bffa94 --- /dev/null +++ b/tests/standalone/io/certificates/README @@ -0,0 +1,31 @@ + This directory, tests/standalone/io/certificates, contains the +X509 TLS certificates and private keys needed to run tests of Dart's +secure networking code. The SecureSocket and SecureServer classes +are tested by making TLS (formerly called SSL) connections, secured +by certificates created by the create_test_certificates script in +the parent directory, and copied into this directory. + +server_chain.pem: + Contains the chain of certificates, from the self-signed +test certificate authority, through the intermediate CA, to the server +certificate, used on the server side of a test connection. + +server_key.pem: + Contains the private key for the server certificate + +trusted_certs.pem: + Contains the self-signed certificate of the test certificate authority. +This certificate is set as "trusted" by the client side of the connection +in its SecurityContext object, so that a verified TLS connection to the +server can be made. + +untrusted_server_chain.pem: + Contains a chain of certificates, from a different self-signed +test certificate authority, through an intermediate CA, to a server +certificate, used on the server side of a test connection that is intended +to fail because the client does not accept this certificate authority + +untrusted_server_key.pem: + Contains the private key for the untrusted server certificate +in untrusted_server_chain.pem + diff --git a/tests/standalone/io/certificates/server_chain.pem b/tests/standalone/io/certificates/server_chain.pem new file mode 100644 index 00000000000..4163fe7dddf --- /dev/null +++ b/tests/standalone/io/certificates/server_chain.pem @@ -0,0 +1,57 @@ +-----BEGIN CERTIFICATE----- +MIIDKTCCAhGgAwIBAgIJAOWmjTS+OnTEMA0GCSqGSIb3DQEBCwUAMBcxFTATBgNV +BAMMDGludGVybWVkaWF0ZTAeFw0xNTA1MTgwOTAwNDBaFw0yMzA4MDQwOTAwNDBa +MBQxEjAQBgNVBAMMCWxvY2FsaG9zdDCCASIwDQYJKoZIhvcNAQEBBQADggEPADCC +AQoCggEBALlcwQJuzd+xH8QFgfJSn5tRlvhkldSX98cE7NiA602NBbnAVyUrkRXq +Ni75lgt0kwjYfA9z674m8WSVbgpLPintPCla9CYky1TH0keIs8Rz6cGWHryWEHiu +EDuljQynu2b3sAFuHu9nfWurbJwZnFakBKpdQ9m4EyOZCHC/jHYY7HacKSXg1Cki +we2ca0BWDrcqy8kLy0dZ5oC6IZG8O8drAK8f3f44CRYw59D3sOKBrKXaabpvyEcb +N7Wk2HDBVwHpUJo1reVwtbM8dhqQayYSD8oXnGpP3RQNu/e2rzlXRyq/BfcDY1JI +7TbC4t/7/N4EcPSpGsTcSOC9A7FpzvECAwEAAaN7MHkwCQYDVR0TBAIwADAsBglg +hkgBhvhCAQ0EHxYdT3BlblNTTCBHZW5lcmF0ZWQgQ2VydGlmaWNhdGUwHQYDVR0O +BBYEFCnwiEMMFZh7NhCr+qA8K0w4Q+AOMB8GA1UdIwQYMBaAFB0h1Evsaw2vfrmS +YuoCTmC4EE6ZMA0GCSqGSIb3DQEBCwUAA4IBAQAcFmHMaXRxyoNaeOowQ6iQWoZd +AUbvG7SHr7I6Pi2aqdqofsKWts7Ytm5WsS0M2nN+sW504houu0iCPeJJX8RQw2q4 +CCcNOs9IXk+2uMzlpocHpv+yYoUiD5DxgWh7eghQMLyMpf8FX3Gy4VazeuXznHOM +4gE4L417xkDzYOzqVTp0FTyAPUv6G2euhNCD6TMru9REcRhYul+K9kocjA5tt2KG +MH6y28LXbLyq4YJUxSUU9gY/xlnbbZS48KDqEcdYC9zjW9nQ0qS+XQuQuFIcwjJ5 +V4kAUYxDu6FoTpyQjgsrmBbZlKNxH7Nj4NDlcdJhp/zeSKHqWa5hSWjjKIxp +-----END CERTIFICATE----- +-----BEGIN CERTIFICATE----- +MIIDAjCCAeqgAwIBAgIJAOWmjTS+OnTDMA0GCSqGSIb3DQEBCwUAMBgxFjAUBgNV +BAMMDXJvb3RhdXRob3JpdHkwHhcNMTUwNTE4MDkwMDQwWhcNMjMwODA0MDkwMDQw +WjAXMRUwEwYDVQQDDAxpbnRlcm1lZGlhdGUwggEiMA0GCSqGSIb3DQEBAQUAA4IB +DwAwggEKAoIBAQDSrAO1CoPvUllgLOzDm5nG0skDF7vh1DUgAIDVGz0ecD0JFbQx +EF79pju/6MbtpTW2FYvRp11t/G7rGtX923ybOHY/1MNFQrdIvPlO1VV7IGKjoMwP +DNeb0fIGjHoE9QxaDxR8NX8xQbItpsw+TUtRfc9SLkR+jaYJfVRoM21BOncZbSHE +YKiZlEbpecB/+EtwVpgvl+8mPD5U07Fi4fp/lza3WXInXQPyiTVllIEJCt4PKmlu +MocNaJOW38bysL7i0PzDpVZtOxLHOTaW68yF3FckIHNCaA7k1ABEEEegjFMmIao7 +B9w7A0jvr4jZVvNmui5Djjn+oJxwEVVgyf8LAgMBAAGjUDBOMB0GA1UdDgQWBBQd +IdRL7GsNr365kmLqAk5guBBOmTAfBgNVHSMEGDAWgBRk81s9d0ZbiZhh44KckwPb +oTc0XzAMBgNVHRMEBTADAQH/MA0GCSqGSIb3DQEBCwUAA4IBAQBZQTK0plfdB5PC +cC5icut4EmrByJa1RbU7ayuEE70e7hla6KVmVjVdCBGltI4jBYwfhKbRItHiAJ/8 +x+XZKBG8DLPFuDb7lAa1ObhAYF7YThUFPQYaBhfzKcWrdmWDBFpvNv6E0Mm364dZ +e7Yxmbe5S4agkYPoxEzgEYmcUk9jbjdR6eTbs8laG169ljrECXfEU9RiAcqz5iSX +NLSewqB47hn3B9qgKcQn+PsgO2j7M+rfklhNgeGJeWmy7j6clSOuCsIjWHU0RLQ4 +0W3SB/rpEAJ7fgQbYUPTIUNALSOWi/o1tDX2mXPRjBoxqAv7I+vYk1lZPmSzkyRh +FKvRDxsW +-----END CERTIFICATE----- +-----BEGIN CERTIFICATE----- +MIIDAzCCAeugAwIBAgIJAJ0MomS4Ck+8MA0GCSqGSIb3DQEBCwUAMBgxFjAUBgNV +BAMMDXJvb3RhdXRob3JpdHkwHhcNMTUwNTE4MDkwMDQwWhcNMjMwODA0MDkwMDQw +WjAYMRYwFAYDVQQDDA1yb290YXV0aG9yaXR5MIIBIjANBgkqhkiG9w0BAQEFAAOC +AQ8AMIIBCgKCAQEAts1ijtBV92S2cOvpUMOSTp9c6A34nIGr0T5Nhz6XiqRVT+gv +dQgmkdKJQjbvR60y6jzltYFsI2MpGVXY8h/oAL81D/k7PDB2aREgyBfTPAhBHyGw +siR+2xYt5b/Zs99q5RdRqQNzNpLPJriIKvUsRyQWy1UiG2s7pRXQeA8qB0XtJdCj +kFIi+G2bDsaffspGeDOCqt7t+yqvRXfSES0c/l7DIHaiMbbp4//ZNML3RNgAjPz2 +hCezZ+wOYajOIyoSPK8IgICrhYFYxvgWxwbLDBEfC5B3jOQsySe10GoRAKZz1gBV +DmgReu81tYJmdgkc9zknnQtIFdA0ex+GvZlfWQIDAQABo1AwTjAdBgNVHQ4EFgQU +ZPNbPXdGW4mYYeOCnJMD26E3NF8wHwYDVR0jBBgwFoAUZPNbPXdGW4mYYeOCnJMD +26E3NF8wDAYDVR0TBAUwAwEB/zANBgkqhkiG9w0BAQsFAAOCAQEATzkZ97K777uZ +lQcduNX3ey4IbCiEzFA2zO5Blj+ilfIwNbZXNOgm/lqNvVGDYs6J1apJJe30vL3X +J+t2zsZWzzQzb9uIU37zYemt6m0fHrSrx/iy5lGNqt3HMfqEcOqSCOIK3PCTMz2/ +uyGe1iw33PVeWsm1JUybQ9IrU/huJjbgOHU4wab+8SJCM49ipArp68Fr6j4lcEaE +4rfRg1ZsvxiOyUB3qPn6wyL/JB8kOJ+QCBe498376eaem8AEFk0kQRh6hDaWtq/k +t6IIXQLjx+EBDVP/veK0UnVhKRP8YTOoV8ZiG1NcdlJmX/Uk7iAfevP7CkBfSN8W +r6AL284qtw== +-----END CERTIFICATE----- diff --git a/tests/standalone/io/certificates/server_key.pem b/tests/standalone/io/certificates/server_key.pem new file mode 100644 index 00000000000..1fd2324045b --- /dev/null +++ b/tests/standalone/io/certificates/server_key.pem @@ -0,0 +1,29 @@ +-----BEGIN ENCRYPTED PRIVATE KEY----- +MIIE5DAcBgoqhkiG9w0BDAEBMA4ECL7L6rj6uEHGAgIIAASCBMLbucyfqAkgCbhP +xNSHYllPMAv/dsIjtnsBwepCXPGkCBCuOAw/2FaCHjN9hBqL5V7fkrKeaemhm2YE +ycPtlHJYPDf3kEkyMjdZ9rIY6kePGfQizs2uJPcXj4YPyQ4HsfVXpOicKfQrouf5 +Mze9bGzeMN065q3iP4dYUMwHAyZYteXCsanQNHlqvsWli0W+H8St8fdsXefZhnv1 +qVatKWdNdWQ9t5MuljgNU2Vv56sHKEYXI0yLxk2QUMk8KlJfnmt8foYUsnPUXHmc +gIjLKwwVkpdololnEHSNu0cEOUPowjgJru+uMpn7vdNl7TPEQ9jbEgdNg4JwoYzU +0nao8WzjaSp7kzvZz0VFwKnk5AjstGvvuAWckADdq23QElbn/mF7AG1m/TBpYxzF +gTt37UdndS/AcvVznWVVrRP5iTSIawdIwvqI4s7rqsoE0GCcak+RhchgAz2gWKkS +oODUo0JL6pPVbJ3l4ebbaO6c99nDVc8dViPtc1EkStJEJ2O4kI4xgLSCr4Y9ahKn +oAaoSkX7Xxq3aQm+BzqSpLjdGL8atsqR/YVOIHYIl3gThvP0NfZGx1xHyvO5mCdZ +kHxSA7tKWxauZ3eQ2clbnzeRsl4El0WMHy/5K1ovene4v7sunmoXVtghBC8hK6eh +zMO9orex2PNQ/VQC7HCvtytunOVx1lkSBoNo7hR70igg6rW9H7UyoAoBOwMpT1xa +J6V62nqruTKOqFNfur7aHJGpHGtDb5/ickHeYCyPTvmGp67u4wChzKReeg02oECe +d1E5FKAcIa8s9TVOB6Z+HvTRNQZu2PsI6TJnjQRowvY9DAHiWTlJZBBY/pko3hxX +TsIeybpvRdEHpDWv86/iqtw1hv9CUxS/8ZTWUgBo+osShHW79FeDASr9FC4/Zn76 +ZDERTgV4YWlW/klVWcG2lFo7jix+OPXAB+ZQavLhlN1xdWBcIz1AUWjAM4hdPylW +HCX4PB9CQIPl2E7F+Y2p6nMcMWSJVBi5UIH7E9LfaBguXSzMmTk2Fw5p1aOQ6wfN +goVAMVwi8ppAVs741PfHdZ295xMmK/1LCxz5DeAdD/tsA/SYfT753GotioDuC7im +EyJ5JyvTr5I6RFFBuqt3NlUb3Hp16wP3B2x9DZiB6jxr0l341/NHgsyeBXkuIy9j +ON2mvpBPCJhS8kgWo3G0UyyKnx64tcgpGuSvZhGwPz843B6AbYyE6pMRfSWRMkMS +YZYa+VNKhR4ixdj07ocFZEWLVjCH7kxkE8JZXKt8jKYmkWd0lS1QVjgaKlO6lRa3 +q6SPJkhW6pvqobvcqVNXwi1XuzpZeEbuh0B7OTekFTTxx5g9XeDl56M8SVQ1KEhT +Q1t7H2Nba18WCB7cf+6PN0F0K0Jz1Kq7ZWaqEI/grX1m4RQuvNF5807sB/QKMO/Z +Gz3NXvHg5xTJRd/567lxPGkor0cE7qD1EZfmJ2HrBYXQ91bhgA7LToBuMZo6ZRXH +QfsanjbP4FPLMiGdQigLjj3A35L/f4sQOOVac/sRaFnm7pzcxsMvyVU/YtvGcjYE +xaOOVnamg661Wo0wksXoDjeSz/JIyyKO3Gwp1FSm2wGLjjy/Ehmqcqy8rvHuf07w +AUukhVtTNn4= +-----END ENCRYPTED PRIVATE KEY----- diff --git a/tests/standalone/io/certificates/trusted_certs.pem b/tests/standalone/io/certificates/trusted_certs.pem new file mode 100644 index 00000000000..a4747554ab4 --- /dev/null +++ b/tests/standalone/io/certificates/trusted_certs.pem @@ -0,0 +1,19 @@ +-----BEGIN CERTIFICATE----- +MIIDAzCCAeugAwIBAgIJAJ0MomS4Ck+8MA0GCSqGSIb3DQEBCwUAMBgxFjAUBgNV +BAMMDXJvb3RhdXRob3JpdHkwHhcNMTUwNTE4MDkwMDQwWhcNMjMwODA0MDkwMDQw +WjAYMRYwFAYDVQQDDA1yb290YXV0aG9yaXR5MIIBIjANBgkqhkiG9w0BAQEFAAOC +AQ8AMIIBCgKCAQEAts1ijtBV92S2cOvpUMOSTp9c6A34nIGr0T5Nhz6XiqRVT+gv +dQgmkdKJQjbvR60y6jzltYFsI2MpGVXY8h/oAL81D/k7PDB2aREgyBfTPAhBHyGw +siR+2xYt5b/Zs99q5RdRqQNzNpLPJriIKvUsRyQWy1UiG2s7pRXQeA8qB0XtJdCj +kFIi+G2bDsaffspGeDOCqt7t+yqvRXfSES0c/l7DIHaiMbbp4//ZNML3RNgAjPz2 +hCezZ+wOYajOIyoSPK8IgICrhYFYxvgWxwbLDBEfC5B3jOQsySe10GoRAKZz1gBV +DmgReu81tYJmdgkc9zknnQtIFdA0ex+GvZlfWQIDAQABo1AwTjAdBgNVHQ4EFgQU +ZPNbPXdGW4mYYeOCnJMD26E3NF8wHwYDVR0jBBgwFoAUZPNbPXdGW4mYYeOCnJMD +26E3NF8wDAYDVR0TBAUwAwEB/zANBgkqhkiG9w0BAQsFAAOCAQEATzkZ97K777uZ +lQcduNX3ey4IbCiEzFA2zO5Blj+ilfIwNbZXNOgm/lqNvVGDYs6J1apJJe30vL3X +J+t2zsZWzzQzb9uIU37zYemt6m0fHrSrx/iy5lGNqt3HMfqEcOqSCOIK3PCTMz2/ +uyGe1iw33PVeWsm1JUybQ9IrU/huJjbgOHU4wab+8SJCM49ipArp68Fr6j4lcEaE +4rfRg1ZsvxiOyUB3qPn6wyL/JB8kOJ+QCBe498376eaem8AEFk0kQRh6hDaWtq/k +t6IIXQLjx+EBDVP/veK0UnVhKRP8YTOoV8ZiG1NcdlJmX/Uk7iAfevP7CkBfSN8W +r6AL284qtw== +-----END CERTIFICATE----- diff --git a/tests/standalone/io/certificates/untrusted_server_chain.pem b/tests/standalone/io/certificates/untrusted_server_chain.pem new file mode 100644 index 00000000000..7d30df0a9c6 --- /dev/null +++ b/tests/standalone/io/certificates/untrusted_server_chain.pem @@ -0,0 +1,57 @@ +-----BEGIN CERTIFICATE----- +MIIDKTCCAhGgAwIBAgIJAOLkVK1iIzcgMA0GCSqGSIb3DQEBCwUAMBcxFTATBgNV +BAMMDGludGVybWVkaWF0ZTAeFw0xNTA1MjgxNTAyMTlaFw0yMzA4MTQxNTAyMTla +MBQxEjAQBgNVBAMMCWxvY2FsaG9zdDCCASIwDQYJKoZIhvcNAQEBBQADggEPADCC +AQoCggEBAJTI9uVvjQ5jsHB+UcqhXOKXmt8QlK3gOZKYs9lCFoQYWVug3daDCYp6 +p1N7/lM+V0sDhYfALBjT9aetviTck8ZJ5rCJ1xzyY16/LnvTI9wjBeizyar5K57A +vRwX+PB4OQi4nKszWXB++jYu7AYJsIwQwkNAk5kkpaVRlWqYkDm6JqtD6hAPxQV4 +0rrqQAXBY+SP0C8buhVNDmnkshmJnHnb/yodNmwdK0CkSN9TbVZi3UpsDRL1Tbo/ +i7fRrfVK5aTjdjT7yx/5Wh2jQ4nBdm7l5BFc2owHk0FGB9h5GMgs19RzHf+O+JCc +SabuVSehtagIouiwHOJ5iFtiq6enF58CAwEAAaN7MHkwCQYDVR0TBAIwADAsBglg +hkgBhvhCAQ0EHxYdT3BlblNTTCBHZW5lcmF0ZWQgQ2VydGlmaWNhdGUwHQYDVR0O +BBYEFKpMad7rFfjy9iTqyCBJSWORnxqoMB8GA1UdIwQYMBaAFDqe3VrQlMZ2ABqb +w16kdoDbumRuMA0GCSqGSIb3DQEBCwUAA4IBAQC9rRoBEcCZBVCBVZdckq0cWVYB +IoMnpGk1ODv5Yp2b36LR3wwr2g+2u61s767L0+2ZGbGACkzWi3rJFSWIiH2j5zmi +e+TvuOXFbw0r2PtiYDaoWIExzkC3rxnCSlUdbRPw1LI8bEd3+PYeZcEW0zs+xaId +lctsWe8QC7EW3trLUFc8ASjED4uO83+2toqljkrD4SDnVz8t7O6bNrKrEDkNrxDW +NHrbSP8m666U/A4MpH55BArfHNlUbzOZ6/jOkvcqhbL1pccJ2U1Ov3GnQGk7Vg+D +JD1ptD6AHnjA8PCSkDOPibG4w9lRFWaUeAjtuCq1xJ3RFe5aUcUqgcxyF38B +-----END CERTIFICATE----- +-----BEGIN CERTIFICATE----- +MIIDAjCCAeqgAwIBAgIJAOLkVK1iIzcfMA0GCSqGSIb3DQEBCwUAMBgxFjAUBgNV +BAMMDXJvb3RhdXRob3JpdHkwHhcNMTUwNTI4MTUwMjE5WhcNMjMwODE0MTUwMjE5 +WjAXMRUwEwYDVQQDDAxpbnRlcm1lZGlhdGUwggEiMA0GCSqGSIb3DQEBAQUAA4IB +DwAwggEKAoIBAQDS0BrhfsYaZcm68pdZ2j+TWXa/Ba5s3sKq9+EFcI5ZSq8/Atn+ +V17OgyjUYQDS//8lk0UydvHQmI6lTSAlLbIDfHMQZ1qxKGV75IgLmHO2RCoJ0W2p +V74m6EZ3cMMOAFJ1WO292ee1H3ZynZSJcj5jxzLzJwOzaz9IjVVLpRdvm2xkhuIa +M10aYn3gohGLaq8fe8sQx4ezx6nlqAFK1rZrMKgmPoDaV/Vsin71QywAjOApMp/c +GkQjRzmUQf12NfXuvQU6ZP0M16e6Od0oAUFM+t0OjFUCcfhwRfGsoSesLof3Gc8+ +eJ7LbOFxWMvsFwsXkmeDvE6yuihBlPm38Va3AgMBAAGjUDBOMB0GA1UdDgQWBBQ6 +nt1a0JTGdgAam8NepHaA27pkbjAfBgNVHSMEGDAWgBSjeMkHbMP4BXJRnx4iJOhN +ir7L+DAMBgNVHRMEBTADAQH/MA0GCSqGSIb3DQEBCwUAA4IBAQAyQYc8ZrQZAlGw +I0AFYaG/T3waQFr48j3evBEO9RT/3E8+rNbW4QKyzovJhBiNwkqJhn+WNW7B43zT +jG3MqH3FTcQfjcXGxFUPh1HntMz7RbXs3Pv5G1f5knk6Hy63vG432kAEUZbrAdKV +yBnR3/VqZd8cX1nzZkhEh9ePz6XTsKctXaHfnenQZJ3YwwNAwlNiNeXB/ykJliyh +8AJxBQdx76TudXB0pl92bK0nsNVczGXg4UZ80GbCHGoYV9ZjQCX/G5ocquMB11Ou +2n9iHTPHMdkahDvWwHggLXrCZK2to4N5zYz33dMhnRiR5hVZKaxuql1j9O2kwLCD +Yb7VgstA +-----END CERTIFICATE----- +-----BEGIN CERTIFICATE----- +MIIDAzCCAeugAwIBAgIJALj/jZ2qE13WMA0GCSqGSIb3DQEBCwUAMBgxFjAUBgNV +BAMMDXJvb3RhdXRob3JpdHkwHhcNMTUwNTI4MTUwMjE5WhcNMjMwODE0MTUwMjE5 +WjAYMRYwFAYDVQQDDA1yb290YXV0aG9yaXR5MIIBIjANBgkqhkiG9w0BAQEFAAOC +AQ8AMIIBCgKCAQEAp3u9wYL5IexhJAzuCDI1nL/JFT5itluFcGA2UjKF12hDUG5b +ipvG8Oxi7oOo9Nk32sw5hqNVyCXp35znOZBnnQn6kkLNENRZ0JlprBYZVG+2jTZI +jxSq8kYWMkeP21BmwfgPuorpfM8ClDCHbrOJuHFeP1U/rqj7A2Hw1VjuPD+5VmWK +N8EeZsis1vOKWtrQfwEukCUfu6gScsIFa9Pie7r5YufYYWMUGPktI0r1r0S9iTY2 +femCqWIL4jfYKQ8d0ibYE7obM/fhIYUyhSZyCes3ffDpP22/puwkwZMopqIXaB17 +At6q4YXv9RjJQm/CXGoMBzddAiQbpStKA3nAxwIDAQABo1AwTjAdBgNVHQ4EFgQU +o3jJB2zD+AVyUZ8eIiToTYq+y/gwHwYDVR0jBBgwFoAUo3jJB2zD+AVyUZ8eIiTo +TYq+y/gwDAYDVR0TBAUwAwEB/zANBgkqhkiG9w0BAQsFAAOCAQEAjOWWBiCBmzfu +pvct3CMMDAgLB+N51SrDeowG++Zyv8mtEzaC2nSD2rwztrHKbRInflH5FclFFFKr +EY7wfL86MZUECpA7QjRpwhaqe7d2uUmi+kOSg3I9AL0KVdM4jOIcNTrzrDjaUpsF +v9U4ounXQyRI9x8H/v8rzMMQmEeq3DyKqOr6y46U6SspDDlLcGGxQz1olmNYsxw5 +ftY1/Q58HGDsUEMUC2hOCsKw8XxSguLln0inMXpws+rWOZLETN4+WMxZojYmLqz+ +DksVeUKBx1IJx3WLNkj/qk55w/4dJPBfddNJ76KFKxxYwzZ851Y9f6+OndRTGW1n +XJOiJnRvlQ== +-----END CERTIFICATE----- diff --git a/tests/standalone/io/certificates/untrusted_server_key.pem b/tests/standalone/io/certificates/untrusted_server_key.pem new file mode 100644 index 00000000000..ecae29bdb00 --- /dev/null +++ b/tests/standalone/io/certificates/untrusted_server_key.pem @@ -0,0 +1,29 @@ +-----BEGIN ENCRYPTED PRIVATE KEY----- +MIIE5DAcBgoqhkiG9w0BDAEBMA4ECPM4xVvIwo+2AgIIAASCBMJ/88x+fp/JrzbU +TCCt4ic7v+w7nVPOpxtvbQAQ6/kWQcKyjNdeqqktlW/5NvhPOXYWzRZIFkts9BSh +kyTHqxRa8QFn/PcoqKF6rRUCfoKfROQNnoNP/E//Z7dxg9sUPpHxs7tlwuoBr2dv +Cnmh/WiJaeOvIEnFOa/ZwTljWjIEfmUHhVK351c6JyRVPDAPKsg2+hWNfQpdqPIz +ptoDT+gGDY6rMp/eMbpu0g7Dal+q1sC/FQe5hGXWtIz27hHuJxY9V0yOwpW9Q5k9 +dg4KOqrJLW8O3ZF0l/oXOG+V5i9rMJWJyq9GR2LdxmdgL3nKibE6DCZXG/cMcY2O +ApdCaUzyREM2Qidcp45rWNkVO+5enVh0tiYQY12vvUnDCq5ysKW3FmEXH+uBgy0l +jh2Wkwlag3wwKis6YeSTQBYO5i+M5vqXXi/yZ5b45Jnjs6rhX5d7scmzaFeLitBz +GQsNTvrQCNDmbcNVex5k10cxv9eAlsepTIlknwnFItqAmo2idf5Mh51hEvwb9jEQ +BRLiQIEgmkba2YSU5Vg1SkAEzAm1FCcbc0U8UF90A/Uv/un0VNDii+PxCp8xskvr +5PmKssF1q1GmuzyWpQ97HKKTP87r/a2OZhxDr876UQnBM6R6mrFF9efyARmc2a7l +fkuBgpP0ExP69Br8KdWctuyCtIJtY/4CnCJZvEa0FTyurxqh+nrce/YBXZxK7f+2 +NWqEAIwTdmsIRvm6jAr7m/ySsXFP+wjcFVgJ3JwSHc+NO1/NhSSJFXD4dUH7+Xxw +kUeH7jxrbcFDLA0zXLtVAMuyoX2HTp/AA+COsAXLLtSzifY8RPDloioeqFW2FZ3d +mvsmjoCVyQK7sJ5+ZwlGqnwOtp2iK2VHjk/Y+J4HUBpcYkUjFFlqa8Bkc7w8BCkI +nV+hWEQn3nAJiEXtK+PlJfWz4YKUTyOwCp1xvNOrBByXgG5w5bv9RtqpolyN6rh2 +WUcNtt36eMnzj6bBXib9VoOWD2ls0AIIl8EON4KIizI+inY21Lb9f15066UvRm6s +5Rudi9v7iS+FcOG9bP2pg0sCoooLHqSVE2me1tcI3vDjZH8O3sjo4/YO7W0Vbc2+ +atxzq9cWGWZvpa7grafdqDhcRNqbpBGsUdq0Tps6pMnymjWWHK7x+m61R6wXafBm +lyT9lyh7y3A9oTtO91QO1VhNY8E0bpF9EkGBIO+VCK7XNvRO7ITZdfsmCgjNQ14u +iimruX0scOggq3xZ2VjPCrZXbWGO6GD1bJVFqBZmXLDq/YHHC0UbM2eQFcTWXKV3 +S2+pGmrBHlwPPQmvrLBQr8TMzRX/HOLCpUyuNU6bXrkyXIrBKBCj1XOmNtNUBe6O +PcMQ7BoUf/zsR8XPFbQV+4pidsMNmm9j4g40CBFwAQv2INC385wJIIsjKxewPy+6 +cYmyTtQX+1XPACpQA8BLq9qNkvbJTeXAeAcoA21WtlJjREzpqRA6j0j7Em7oHJNe +tH7EsBz+KdiYWk/xFRxHICn7TvLTpf5+Xw1pfZfV7Lxpk6ae2fcOr9MjkGqm5YqZ +VQFdnJ8U+RNC7/yPxDoFqndSI3RsAIXFMvMDOMmToCGwDCt2fFWPgwZeGYj0Qh1c +7shiDBng3TE= +-----END ENCRYPTED PRIVATE KEY----- diff --git a/tests/standalone/io/http_cookie_date_test.dart b/tests/standalone/io/http_cookie_date_test.dart index 51e3e1ffe31..85385b41066 100644 --- a/tests/standalone/io/http_cookie_date_test.dart +++ b/tests/standalone/io/http_cookie_date_test.dart @@ -35,6 +35,7 @@ part "../../../sdk/lib/io/platform_impl.dart"; part "../../../sdk/lib/io/service_object.dart"; part "../../../sdk/lib/io/secure_socket.dart"; part "../../../sdk/lib/io/secure_server_socket.dart"; +part "../../../sdk/lib/io/security_context.dart"; part "../../../sdk/lib/io/socket.dart"; void testParseHttpCookieDate() { diff --git a/tests/standalone/io/http_headers_test.dart b/tests/standalone/io/http_headers_test.dart index 6d2b92d3e41..6e2a4ef8d46 100644 --- a/tests/standalone/io/http_headers_test.dart +++ b/tests/standalone/io/http_headers_test.dart @@ -35,6 +35,7 @@ part "../../../sdk/lib/io/platform_impl.dart"; part "../../../sdk/lib/io/service_object.dart"; part "../../../sdk/lib/io/secure_socket.dart"; part "../../../sdk/lib/io/secure_server_socket.dart"; +part "../../../sdk/lib/io/security_context.dart"; part "../../../sdk/lib/io/socket.dart"; void testMultiValue() { diff --git a/tests/standalone/io/http_parser_test.dart b/tests/standalone/io/http_parser_test.dart index 6302dc66387..9d8e014f2c6 100644 --- a/tests/standalone/io/http_parser_test.dart +++ b/tests/standalone/io/http_parser_test.dart @@ -35,6 +35,7 @@ part "../../../sdk/lib/io/platform_impl.dart"; part "../../../sdk/lib/io/service_object.dart"; part "../../../sdk/lib/io/secure_socket.dart"; part "../../../sdk/lib/io/secure_server_socket.dart"; +part "../../../sdk/lib/io/security_context.dart"; part "../../../sdk/lib/io/socket.dart"; class HttpParserTest { diff --git a/tests/standalone/io/http_proxy_test.dart b/tests/standalone/io/http_proxy_test.dart index 63a125fc05a..9b8803b7c91 100644 --- a/tests/standalone/io/http_proxy_test.dart +++ b/tests/standalone/io/http_proxy_test.dart @@ -9,6 +9,16 @@ import "dart:async"; import "dart:io"; import 'dart:convert'; +String localFile(path) => Platform.script.resolve(path).toFilePath(); + +SecurityContext serverContext = new SecurityContext() + ..useCertificateChain(localFile('certificates/server_chain.pem')) + ..usePrivateKey(localFile('certificates/server_key.pem'), + password: 'dartdart'); + +SecurityContext clientContext = new SecurityContext() + ..setTrustedCertificates(file: localFile('certificates/trusted_certs.pem')); + class Server { HttpServer server; bool secure; @@ -19,49 +29,47 @@ class Server { Server(this.proxyHops, this.directRequestPaths, this.secure); Future start() { - var x = new Completer(); - Future f = secure - ? HttpServer.bindSecure( - "localhost", 0, certificateName: 'localhost_cert') - : HttpServer.bind("localhost", 0); - return f.then((s) { + return (secure ? + HttpServer.bindSecure("localhost", 0, serverContext) : + HttpServer.bind("localhost", 0)) + .then((s) { server = s; - x.complete(this); - server.listen((request) { - var response = request.response; - requestCount++; - // Check whether a proxy or direct connection is expected. - bool direct = directRequestPaths.fold( - false, - (prev, path) => prev ? prev : path == request.uri.path); - if (!secure && !direct && proxyHops > 0) { - Expect.isNotNull(request.headers[HttpHeaders.VIA]); - Expect.equals(1, request.headers[HttpHeaders.VIA].length); - Expect.equals( - proxyHops, - request.headers[HttpHeaders.VIA][0].split(",").length); - } else { - Expect.isNull(request.headers[HttpHeaders.VIA]); - } - var body = new StringBuffer(); - request.listen( - (data) { - body.write(new String.fromCharCodes(data)); - }, - onDone: () { - String path = request.uri.path.substring(1); - if (path != "A") { - String content = "$path$path$path"; - Expect.equals(content, body.toString()); - } - response.write(request.uri.path); - response.close(); - }); - }); - return x.future; + server.listen(requestHandler); + return this; }); } + void requestHandler(HttpRequest request) { + var response = request.response; + requestCount++; + // Check whether a proxy or direct connection is expected. + bool direct = directRequestPaths.fold( + false, + (prev, path) => prev ? prev : path == request.uri.path); + if (!secure && !direct && proxyHops > 0) { + Expect.isNotNull(request.headers[HttpHeaders.VIA]); + Expect.equals(1, request.headers[HttpHeaders.VIA].length); + Expect.equals( + proxyHops, + request.headers[HttpHeaders.VIA][0].split(",").length); + } else { + Expect.isNull(request.headers[HttpHeaders.VIA]); + } + var body = new StringBuffer(); + onRequestComplete() { + String path = request.uri.path.substring(1); + if (path != "A") { + String content = "$path$path$path"; + Expect.equals(content, body.toString()); + } + response.write(request.uri.path); + response.close(); + } + request.listen((data) { + body.write(new String.fromCharCodes(data)); + }, onDone: onRequestComplete); + } + void shutdown() { server.close(); } @@ -336,7 +344,7 @@ void testProxy() { setupProxyServer().then((proxyServer) { setupServer(1, directRequestPaths: ["/4"]).then((server) { setupServer(1, directRequestPaths: ["/4"], secure: true).then((secureServer) { - HttpClient client = new HttpClient(); + HttpClient client = new HttpClient(context: clientContext); List proxy; if (Platform.operatingSystem == "windows") { @@ -796,26 +804,22 @@ void testRealProxyAuth() { }); } -void InitializeSSL() { - var testPkcertDatabase = Platform.script.resolve('pkcert').toFilePath(); - SecureSocket.initialize(database: testPkcertDatabase, - password: 'dartdart'); -} - main() { - InitializeSSL(); testInvalidProxy(); testDirectProxy(); testProxy(); - testProxyIPV6(); + // testProxyIPV6(); // TODO(24074): Move failing tests to separate files. testProxyChain(); - testProxyFromEnviroment(); + // TODO(24074): Move failing tests to separate files. + // testProxyFromEnviroment(); // The two invocations of uses the same global variable for state - // run one after the other. - testProxyAuthenticate(false) - .then((_) => testProxyAuthenticate(true)); + // TODO(24074): Move failing tests to separate files. + // testProxyAuthenticate(false) + // .then((_) => testProxyAuthenticate(true)); + // This test is not normally run. It can be used for locally testing // with a real proxy server (e.g. Apache). - //testRealProxy(); - //testRealProxyAuth(); + // testRealProxy(); + // testRealProxyAuth(); } diff --git a/tests/standalone/io/https_bad_certificate_client.dart b/tests/standalone/io/https_bad_certificate_client.dart deleted file mode 100644 index c2f79cdc6f7..00000000000 --- a/tests/standalone/io/https_bad_certificate_client.dart +++ /dev/null @@ -1,81 +0,0 @@ -// Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file -// for details. All rights reserved. Use of this source code is governed by a -// BSD-style license that can be found in the LICENSE file. - -// Client for https_bad_certificate_test, that runs in a subprocess. -// It verifies that the client bad certificate callback works in HttpClient. - -import "dart:async"; -import "dart:io"; - -class ExpectException implements Exception { - ExpectException(this.message); - String toString() => "ExpectException: $message"; - String message; -} - -void expect(condition) { - if (!condition) { - throw new ExpectException(''); - } -} - -const HOST_NAME = "localhost"; - -Future runHttpClient(int port, result) async { - bool badCertificateCallback(X509Certificate certificate, - String host, - int callbackPort) { - expect(HOST_NAME == host); - expect(callbackPort == port); - expect('CN=localhost' == certificate.subject); - expect('CN=myauthority' == certificate.issuer); - expect(result != 'exception'); // Throw exception if one is requested. - if (result == 'true') return true; - if (result == 'false') return false; - return result; - } - - HttpClient client = new HttpClient(); - - await client.getUrl(Uri.parse('https://$HOST_NAME:$port/$result')) - .then((HttpClientRequest request) { - expect(result == 'true'); // The session cache may keep the session. - return request.close(); - }, onError: (e) { - expect(e is HandshakeException || e is SocketException); - }); - - client.badCertificateCallback = badCertificateCallback; - await client.getUrl(Uri.parse('https://$HOST_NAME:$port/$result')) - .then((HttpClientRequest request) { - expect(result == 'true'); - return request.close(); - }, onError: (e) { - if (result == 'false') expect (e is HandshakeException || - e is SocketException); - else if (result == 'exception') expect (e is ExpectException || - e is SocketException); - else { - expect (e is ArgumentError || e is SocketException); - } - }); - - client.badCertificateCallback = null; - await client.getUrl(Uri.parse('https://$HOST_NAME:$port/$result')) - .then((HttpClientRequest request) { - expect(result == 'true'); // The session cache may keep the session. - return request.close(); - }, onError: (e) { - expect(e is HandshakeException || e is SocketException); - }); - - client.close(); -} - -void main(List args) { - SecureSocket.initialize(); - int port = int.parse(args[0]); - runHttpClient(port, args[1]) - .then((_) => print('SUCCESS')); -} diff --git a/tests/standalone/io/https_bad_certificate_test.dart b/tests/standalone/io/https_bad_certificate_test.dart index d07961a80a0..fdd83e316d9 100644 --- a/tests/standalone/io/https_bad_certificate_test.dart +++ b/tests/standalone/io/https_bad_certificate_test.dart @@ -4,52 +4,78 @@ // This test verifies that the bad certificate callback works in HttpClient. -import "package:expect/expect.dart"; -import "package:path/path.dart"; import "dart:async"; import "dart:io"; -const HOST_NAME = "localhost"; -const CERTIFICATE = "localhost_cert"; +import "package:expect/expect.dart"; -Future runServer() { - SecureSocket.initialize( - database: Platform.script.resolve('pkcert').toFilePath(), +final HOST_NAME = 'localhost'; + +String localFile(path) => Platform.script.resolve(path).toFilePath(); + +SecurityContext serverContext = new SecurityContext() + ..useCertificateChain(localFile('certificates/server_chain.pem')) + ..usePrivateKey(localFile('certificates/server_key.pem'), password: 'dartdart'); - return HttpServer.bindSecure( - HOST_NAME, 0, backlog: 5, certificateName: 'localhost_cert') - .then((server) { - server.listen((HttpRequest request) { - request.listen((_) { }, onDone: () { request.response.close(); }); - }, onError: (e) { if (e is! HandshakeException) throw e; }); - return server; - }); -} +class CustomException {} main() async { - var clientScript = Platform.script - .resolve('https_bad_certificate_client.dart') - .toFilePath(); - Future clientProcess(int port, String acceptCertificate) { - return Process.run(Platform.executable, - [clientScript, port.toString(), acceptCertificate]) - .then((ProcessResult result) { - if (result.exitCode != 0 || !result.stdout.contains('SUCCESS')) { - print("Client failed, acceptCertificate: $acceptCertificate"); - print(" stdout:"); - print(result.stdout); - print(" stderr:"); - print(result.stderr); - Expect.fail('Client subprocess exit code: ${result.exitCode}'); - } + var HOST = (await InternetAddress.lookup(HOST_NAME)).first; + var server = await HttpServer.bindSecure(HOST, 0, serverContext, backlog: 5); + server.listen((request) { + request.listen((_) { + }, onDone: () { + request.response.close(); }); - } + }); - var server = await runServer(); - await clientProcess(server.port, 'true'); - await clientProcess(server.port, 'false'); - await clientProcess(server.port, 'fisk'); - await clientProcess(server.port, 'exception'); + SecurityContext goodContext = new SecurityContext() + ..setTrustedCertificates(file: localFile('certificates/trusted_certs.pem')); + SecurityContext badContext = new SecurityContext(); + SecurityContext defaultContext = SecurityContext.defaultContext; + + await runClient(server.port, goodContext, true, 'pass'); + await runClient(server.port, goodContext, false, 'pass'); + await runClient(server.port, goodContext, 'fisk', 'pass'); + await runClient(server.port, goodContext, 'exception', 'pass'); + await runClient(server.port, badContext, true, 'pass'); + await runClient(server.port, badContext, false, 'fail'); + await runClient(server.port, badContext, 'fisk', 'fail'); + await runClient(server.port, badContext, 'exception', 'throw'); + await runClient(server.port, defaultContext, true, 'pass'); + await runClient(server.port, defaultContext, false, 'fail'); + await runClient(server.port, defaultContext, 'fisk', 'fail'); + await runClient(server.port, defaultContext, 'exception', 'throw'); server.close(); } + + +Future runClient(int port, + SecurityContext context, + callbackReturns, + result) async { + HttpClient client = new HttpClient(context: context); + client.badCertificateCallback = (X509Certificate certificate, host, port) { + Expect.equals('/CN=rootauthority', certificate.subject); + Expect.equals('/CN=rootauthority', certificate.issuer); + // Throw exception if one is requested. + if (callbackReturns == 'exception') throw new CustomException(); + return callbackReturns; + }; + + try { + var request = await client.getUrl(Uri.parse('https://$HOST_NAME:$port/')); + Expect.equals('pass', result); + await request.close(); + } catch (error) { + Expect.notEquals(result, 'pass'); + if (result == 'fail') { + Expect.isTrue(error is HandshakeException); + } else if (result == 'throw') { + Expect.isTrue(error is CustomException); + } else { + Expect.fail('Unknown expectation $result'); + } + } +} diff --git a/tests/standalone/io/https_client_certificate_test.dart b/tests/standalone/io/https_client_certificate_test.dart index f63d383cc0e..24d397848d4 100644 --- a/tests/standalone/io/https_client_certificate_test.dart +++ b/tests/standalone/io/https_client_certificate_test.dart @@ -10,14 +10,27 @@ import "package:expect/expect.dart"; import "package:path/path.dart"; const HOST_NAME = "localhost"; +String localFile(path) => Platform.script.resolve(path).toFilePath(); +SecurityContext serverContext = new SecurityContext() + ..useCertificateChain(localFile('certificates/server_chain.pem')) + ..usePrivateKey(localFile('certificates/server_key.pem'), + password: 'dartdart'); +// TODO: Specify which client certificate roots to trust. -Function test() { +SecurityContext clientContext = new SecurityContext() + ..setTrustedCertificates(file: localFile('certificates/trusted_certs.pem')) +// TODO: Set a client certificate here. + ..useCertificateChain(localFile('certificates/server_chain.pem')) + ..usePrivateKey(localFile('certificates/server_key.pem'), + password: 'dartdart'); + +void main() { asyncStart(); HttpServer.bindSecure(HOST_NAME, 0, + serverContext, backlog: 5, - certificateName: 'localhost_cert', requestClientCertificate: true).then((server) { server.listen((HttpRequest request) { Expect.isNotNull(request.certificate); @@ -26,7 +39,7 @@ Function test() { request.response.close(); }); - HttpClient client = new HttpClient(); + HttpClient client = new HttpClient(context: clientContext); client.getUrl(Uri.parse("https://$HOST_NAME:${server.port}/")) .then((request) => request.close()) .then((response) { @@ -44,14 +57,3 @@ Function test() { }); }); } - -void InitializeSSL() { - var testPkcertDatabase = Platform.script.resolve('pkcert').toFilePath(); - SecureSocket.initialize(database: testPkcertDatabase, - password: 'dartdart'); -} - -void main() { - InitializeSSL(); - test(); -} diff --git a/tests/standalone/io/https_server_test.dart b/tests/standalone/io/https_server_test.dart index cdba0642167..88a8b2a9214 100644 --- a/tests/standalone/io/https_server_test.dart +++ b/tests/standalone/io/https_server_test.dart @@ -9,14 +9,23 @@ import "dart:isolate"; import "package:expect/expect.dart"; InternetAddress HOST; -const CERTIFICATE = "localhost_cert"; + +String localFile(path) => Platform.script.resolve(path).toFilePath(); + +SecurityContext serverContext = new SecurityContext() + ..useCertificateChain(localFile('certificates/server_chain.pem')) + ..usePrivateKey(localFile('certificates/server_key.pem'), + password: 'dartdart'); + +SecurityContext clientContext = new SecurityContext() + ..setTrustedCertificates(file: localFile('certificates/trusted_certs.pem')); void testListenOn() { void test(void onDone()) { HttpServer.bindSecure(HOST, 0, - backlog: 5, - certificateName: CERTIFICATE).then((server) { + serverContext, + backlog: 5).then((server) { ReceivePort serverPort = new ReceivePort(); server.listen((HttpRequest request) { request.listen( @@ -27,7 +36,7 @@ void testListenOn() { }); }); - HttpClient client = new HttpClient(); + HttpClient client = new HttpClient(context: clientContext); ReceivePort clientPort = new ReceivePort(); client.getUrl(Uri.parse("https://${HOST.host}:${server.port}/")) .then((HttpClientRequest request) { @@ -58,16 +67,10 @@ void testListenOn() { }); } -void InitializeSSL() { - var testPkcertDatabase = Platform.script.resolve('pkcert').toFilePath(); - SecureSocket.initialize(database: testPkcertDatabase, - password: 'dartdart'); -} - void testEarlyClientClose() { HttpServer.bindSecure(HOST, 0, - certificateName: 'localhost_cert').then((server) { + serverContext).then((server) { server.listen( (request) { String name = Platform.script.toFilePath(); @@ -96,7 +99,6 @@ void testEarlyClientClose() { } void main() { - InitializeSSL(); InternetAddress.lookup("localhost").then((hosts) { HOST = hosts.first; testListenOn(); diff --git a/tests/standalone/io/https_unauthorized_client.dart b/tests/standalone/io/https_unauthorized_client.dart index 925ccba9b3c..673fdf041a5 100644 --- a/tests/standalone/io/https_unauthorized_client.dart +++ b/tests/standalone/io/https_unauthorized_client.dart @@ -15,9 +15,9 @@ class ExpectException implements Exception { String message; } -void expect(condition) { +void expect(condition, message) { if (!condition) { - throw new ExpectException(''); + throw new ExpectException(message); } } @@ -31,16 +31,19 @@ Future runClients(int port) { testFutures.add( client.getUrl(Uri.parse('https://$HOST_NAME:$port/')) .then((HttpClientRequest request) { - expect(false); + expect(false, "Request succeeded"); }, onError: (e) { - expect(e is HandshakeException || e is SocketException); + // Remove ArgumentError once null default context is supported. + expect(e is HandshakeException || + e is SocketException || + e is ArgumentError, + "Error is wrong type: $e"); })); } return Future.wait(testFutures); } void main(List args) { - SecureSocket.initialize(); runClients(int.parse(args[0])) .then((_) => print('SUCCESS')); } diff --git a/tests/standalone/io/https_unauthorized_test.dart b/tests/standalone/io/https_unauthorized_test.dart index 0bb6c897ff6..43f3e53b741 100644 --- a/tests/standalone/io/https_unauthorized_test.dart +++ b/tests/standalone/io/https_unauthorized_test.dart @@ -13,13 +13,19 @@ import "dart:io"; const HOST_NAME = "localhost"; const CERTIFICATE = "localhost_cert"; -Future runServer() { - SecureSocket.initialize( - database: Platform.script.resolve('pkcert').toFilePath(), - password: 'dartdart'); +String localFile(path) => Platform.script.resolve(path).toFilePath(); +SecurityContext untrustedServerContext = new SecurityContext() + ..useCertificateChain(localFile('certificates/untrusted_server_chain.pem')) + ..usePrivateKey(localFile('certificates/untrusted_server_key.pem'), + password: 'dartdart'); + +SecurityContext clientContext = new SecurityContext() + ..setTrustedCertificates(file: localFile('certificates/trusted_certs.pem')); + +Future runServer() { return HttpServer.bindSecure( - HOST_NAME, 0, backlog: 5, certificateName: 'localhost_cert') + HOST_NAME, 0, untrustedServerContext, backlog: 5) .then((server) { server.listen((HttpRequest request) { request.listen((_) { }, onDone: () { request.response.close(); }); diff --git a/tests/standalone/io/pkcert/README b/tests/standalone/io/pkcert/README deleted file mode 100644 index fe764a9cc34..00000000000 --- a/tests/standalone/io/pkcert/README +++ /dev/null @@ -1,16 +0,0 @@ -This is a certificate database used by Dart for testing purposes. - -It is created as a certificate database by NSS (Network Security Services), -a library from Mozilla, using the certutil tool. It uses a cert9.db file, -rather than a cert8.db file, so the database directory must be specified with -"sql:" in front of the directory path, or the environment variable -NSS_DEFAULT_DB_TYPE must be set to "sql". - -The password for the key database is "dartdart". - -The database contains a root certificate from Equifax, used to verify the -client https connection to www.google.dk. It contains a self-signed -certificate for a local certificate authority myauthority_cert, and a -server certificate for localhost called localhost_cert, signed by -myauthority_cert. It contains the key for localhost_cert, but -not the key for myauthority_cert. diff --git a/tests/standalone/io/pkcert/cert9.db b/tests/standalone/io/pkcert/cert9.db deleted file mode 100644 index 497fca668ed..00000000000 Binary files a/tests/standalone/io/pkcert/cert9.db and /dev/null differ diff --git a/tests/standalone/io/pkcert/key4.db b/tests/standalone/io/pkcert/key4.db deleted file mode 100644 index fc0643288b6..00000000000 Binary files a/tests/standalone/io/pkcert/key4.db and /dev/null differ diff --git a/tests/standalone/io/raw_secure_server_closing_test.dart b/tests/standalone/io/raw_secure_server_closing_test.dart index 16fffe34e13..63e9cd44617 100644 --- a/tests/standalone/io/raw_secure_server_closing_test.dart +++ b/tests/standalone/io/raw_secure_server_closing_test.dart @@ -14,7 +14,15 @@ import "package:async_helper/async_helper.dart"; import "package:expect/expect.dart"; InternetAddress HOST; -const CERTIFICATE = "localhost_cert"; +String localFile(path) => Platform.script.resolve(path).toFilePath(); + +SecurityContext serverContext = new SecurityContext() + ..useCertificateChain(localFile('certificates/server_chain.pem')) + ..usePrivateKey(localFile('certificates/server_key.pem'), + password: 'dartdart'); + +SecurityContext clientContext = new SecurityContext() + ..setTrustedCertificates(file: localFile('certificates/trusted_certs.pem')); void testCloseOneEnd(String toClose) { asyncStart(); @@ -25,7 +33,7 @@ void testCloseOneEnd(String toClose) { .then((_) { asyncEnd(); }); - RawSecureServerSocket.bind(HOST, 0, CERTIFICATE).then((server) { + RawSecureServerSocket.bind(HOST, 0, serverContext).then((server) { server.listen((serverConnection) { serverConnection.listen((event) { if (toClose == "server" || event == RawSocketEvent.READ_CLOSED) { @@ -39,7 +47,8 @@ void testCloseOneEnd(String toClose) { onDone: () { serverDone.complete(null); }); - RawSecureSocket.connect(HOST, server.port).then((clientConnection) { + RawSecureSocket.connect(HOST, server.port, context: clientContext) + .then((clientConnection) { clientConnection.listen((event){ if (toClose == "client" || event == RawSocketEvent.READ_CLOSED) { clientConnection.shutdown(SocketDirection.SEND); @@ -55,8 +64,9 @@ void testCloseOneEnd(String toClose) { void testCloseBothEnds() { asyncStart(); - RawSecureServerSocket.bind(HOST, 0, CERTIFICATE).then((server) { - var clientEndFuture = RawSecureSocket.connect(HOST, server.port); + RawSecureServerSocket.bind(HOST, 0, serverContext).then((server) { + var clientEndFuture = + RawSecureSocket.connect(HOST, server.port, context: clientContext); server.listen((serverEnd) { clientEndFuture.then((clientEnd) { clientEnd.close(); @@ -77,7 +87,7 @@ testPauseServerSocket() { RawSecureServerSocket.bind(HOST, 0, - CERTIFICATE, + serverContext, backlog: 2 * socketCount).then((server) { Expect.isTrue(server.port > 0); var subscription; @@ -96,7 +106,8 @@ testPauseServerSocket() { subscription.pause(); var connectCount = 0; for (int i = 0; i < socketCount; i++) { - RawSecureSocket.connect(HOST, server.port).then((connection) { + RawSecureSocket.connect(HOST, server.port, context: clientContext) + .then((connection) { connection.shutdown(SocketDirection.SEND); }); } @@ -104,7 +115,8 @@ testPauseServerSocket() { subscription.resume(); resumed = true; for (int i = 0; i < socketCount; i++) { - RawSecureSocket.connect(HOST, server.port).then((connection) { + RawSecureSocket.connect(HOST, server.port, context: clientContext) + .then((connection) { connection.shutdown(SocketDirection.SEND); }); } @@ -117,7 +129,7 @@ testCloseServer() { asyncStart(); List ends = []; - RawSecureServerSocket.bind(HOST, 0, CERTIFICATE).then((server) { + RawSecureServerSocket.bind(HOST, 0, serverContext).then((server) { Expect.isTrue(server.port > 0); void checkDone() { if (ends.length < 2 * socketCount) return; @@ -134,7 +146,8 @@ testCloseServer() { }); for (int i = 0; i < socketCount; i++) { - RawSecureSocket.connect(HOST, server.port).then((connection) { + RawSecureSocket.connect(HOST, server.port, context: clientContext) + .then((connection) { ends.add(connection); checkDone(); }); @@ -145,10 +158,6 @@ testCloseServer() { main() { asyncStart(); - var certificateDatabase = Platform.script.resolve('pkcert').toFilePath(); - SecureSocket.initialize(database: certificateDatabase, - password: 'dartdart', - useBuiltinRoots: false); InternetAddress.lookup("localhost").then((hosts) { HOST = hosts.first; runTests(); diff --git a/tests/standalone/io/raw_secure_server_socket_argument_test.dart b/tests/standalone/io/raw_secure_server_socket_argument_test.dart index 2513f6d89a1..029b75061ab 100644 --- a/tests/standalone/io/raw_secure_server_socket_argument_test.dart +++ b/tests/standalone/io/raw_secure_server_socket_argument_test.dart @@ -20,17 +20,13 @@ const CERTIFICATE = "localhost_cert"; void testArguments() { bool isArgOrTypeError(e) => e is ArgumentError || e is TypeError; Expect.throws(() => - RawSecureServerSocket.bind(SERVER_ADDRESS, 65536, CERTIFICATE), + RawSecureServerSocket.bind(SERVER_ADDRESS, 65536, null), isArgOrTypeError); Expect.throws(() => - RawSecureServerSocket.bind(SERVER_ADDRESS, -1, CERTIFICATE), + RawSecureServerSocket.bind(SERVER_ADDRESS, -1, null), isArgOrTypeError); Expect.throws(() => RawSecureServerSocket.bind(SERVER_ADDRESS, 0, - CERTIFICATE, backlog: -1), - isArgOrTypeError); - Expect.throws(() => RawSecureSocket.connect(SERVER_ADDRESS, 3456, - sendClientCertificate: true, - certificateName: 12.3), + null, backlog: -1), isArgOrTypeError); Expect.throws(() => RawSecureSocket.connect(SERVER_ADDRESS, null), isArgOrTypeError); @@ -42,12 +38,6 @@ void testArguments() { isArgOrTypeError); Expect.throws(() => RawSecureSocket.connect(null, 0), isArgOrTypeError); - Expect.throws(() => RawSecureSocket.connect(SERVER_ADDRESS, 0, - certificateName: 77), - isArgOrTypeError); - Expect.throws(() => RawSecureSocket.connect(SERVER_ADDRESS, 0, - sendClientCertificate: 'fisk'), - isArgOrTypeError); Expect.throws(() => RawSecureSocket.connect(SERVER_ADDRESS, 0, onBadCertificate: 'hund'), isArgOrTypeError); @@ -55,9 +45,5 @@ void testArguments() { main() { - var certificateDatabase = Platform.script.resolve('pkcert').toFilePath(); - SecureSocket.initialize(database: certificateDatabase, - password: 'dartdart', - useBuiltinRoots: false); testArguments(); } diff --git a/tests/standalone/io/raw_secure_server_socket_test.dart b/tests/standalone/io/raw_secure_server_socket_test.dart index 33fc191de87..560722f7a58 100644 --- a/tests/standalone/io/raw_secure_server_socket_test.dart +++ b/tests/standalone/io/raw_secure_server_socket_test.dart @@ -14,11 +14,19 @@ import "package:async_helper/async_helper.dart"; import "package:expect/expect.dart"; InternetAddress HOST; -const CERTIFICATE = "localhost_cert"; +String localFile(path) => Platform.script.resolve(path).toFilePath(); + +SecurityContext serverContext = new SecurityContext() + ..useCertificateChain(localFile('certificates/server_chain.pem')) + ..usePrivateKey(localFile('certificates/server_key.pem'), + password: 'dartdart'); + +SecurityContext clientContext = new SecurityContext() + ..setTrustedCertificates(file: localFile('certificates/trusted_certs.pem')); void testSimpleBind() { asyncStart(); - RawSecureServerSocket.bind(HOST, 0, CERTIFICATE).then((s) { + RawSecureServerSocket.bind(HOST, 0, serverContext).then((s) { Expect.isTrue(s.port > 0); s.close(); asyncEnd(); @@ -30,7 +38,7 @@ void testInvalidBind() { // Bind to a unknown DNS name. asyncStart(); - RawSecureServerSocket.bind("ko.faar.__hest__", 0, CERTIFICATE).then((_) { + RawSecureServerSocket.bind("ko.faar.__hest__", 0, serverContext).then((_) { Expect.fail("Failure expected"); }).catchError((error) { Expect.isTrue(error is SocketException); @@ -39,7 +47,7 @@ void testInvalidBind() { // Bind to an unavaliable IP-address. asyncStart(); - RawSecureServerSocket.bind("8.8.8.8", 0, CERTIFICATE).then((_) { + RawSecureServerSocket.bind("8.8.8.8", 0, serverContext).then((_) { Expect.fail("Failure expected"); }).catchError((error) { Expect.isTrue(error is SocketException); @@ -48,10 +56,10 @@ void testInvalidBind() { // Bind to a port already in use. asyncStart(); - RawSecureServerSocket.bind(HOST, 0, CERTIFICATE).then((s) { + RawSecureServerSocket.bind(HOST, 0, serverContext).then((s) { RawSecureServerSocket.bind(HOST, s.port, - CERTIFICATE).then((t) { + serverContext).then((t) { s.close(); t.close(); Expect.fail("Multiple listens on same port"); @@ -64,12 +72,14 @@ void testInvalidBind() { }); } -void testSimpleConnect(String certificate) { +void testSimpleConnect() { asyncStart(); - RawSecureServerSocket.bind(HOST, 0, certificate).then((server) { - var clientEndFuture = RawSecureSocket.connect(HOST, server.port); + RawSecureServerSocket.bind(HOST, 0, serverContext).then((server) { + var clientEndFuture = + RawSecureSocket.connect(HOST, server.port, context: clientContext); server.listen((serverEnd) { clientEndFuture.then((clientEnd) { + // TODO(whesse): Shutdown(SEND) not supported on secure sockets. clientEnd.shutdown(SocketDirection.SEND); serverEnd.shutdown(SocketDirection.SEND); server.close(); @@ -79,10 +89,11 @@ void testSimpleConnect(String certificate) { }); } -void testSimpleConnectFail(String certificate, bool cancelOnError) { +void testSimpleConnectFail(SecurityContext context, bool cancelOnError) { asyncStart(); - RawSecureServerSocket.bind(HOST, 0, certificate).then((server) { - var clientEndFuture = RawSecureSocket.connect(HOST, server.port) + RawSecureServerSocket.bind(HOST, 0, context).then((server) { + var clientEndFuture = + RawSecureSocket.connect(HOST, server.port, context: clientContext) .then((clientEnd) { Expect.fail("No client connection expected."); }) @@ -94,7 +105,7 @@ void testSimpleConnectFail(String certificate, bool cancelOnError) { Expect.fail("No server connection expected."); }, onError: (error) { - Expect.isTrue(error is CertificateException); + Expect.isTrue(error is HandshakeException); clientEndFuture.then((_) { if (!cancelOnError) server.close(); asyncEnd(); @@ -106,9 +117,10 @@ void testSimpleConnectFail(String certificate, bool cancelOnError) { void testServerListenAfterConnect() { asyncStart(); - RawSecureServerSocket.bind(HOST, 0, CERTIFICATE).then((server) { + RawSecureServerSocket.bind(HOST, 0, serverContext).then((server) { Expect.isTrue(server.port > 0); - var clientEndFuture = RawSecureSocket.connect(HOST, server.port); + var clientEndFuture = + RawSecureSocket.connect(HOST, server.port, context: clientContext); new Timer(const Duration(milliseconds: 500), () { server.listen((serverEnd) { clientEndFuture.then((clientEnd) { @@ -422,15 +434,17 @@ void testSimpleReadWrite({bool listenSecure, Future connectClient(int port) { if (connectSecure) { - return RawSecureSocket.connect(HOST, port); + return RawSecureSocket.connect(HOST, port, context: clientContext); } else if (!handshakeBeforeSecure) { return RawSocket.connect(HOST, port).then((socket) { - return RawSecureSocket.secure(socket); + return RawSecureSocket.secure(socket, context: clientContext); }); } else { return RawSocket.connect(HOST, port).then((socket) { return runClientHandshake(socket).then((subscription) { - return RawSecureSocket.secure(socket, subscription: subscription); + return RawSecureSocket.secure(socket, + context: clientContext, + subscription: subscription); }); }); } @@ -441,14 +455,14 @@ void testSimpleReadWrite({bool listenSecure, if (listenSecure) { runServer(client).then((_) => server.close()); } else if (!handshakeBeforeSecure) { - RawSecureSocket.secureServer(client, CERTIFICATE).then((client) { + RawSecureSocket.secureServer(client, serverContext).then((client) { runServer(client).then((_) => server.close()); }); } else { runServerHandshake(client).then((secure) { RawSecureSocket.secureServer( client, - CERTIFICATE, + serverContext, subscription: secure[0], bufferedData: secure[1]).then((client) { runServer(client).then((_) => server.close()); @@ -465,7 +479,7 @@ void testSimpleReadWrite({bool listenSecure, if (listenSecure) { RawSecureServerSocket.bind( - HOST, 0, CERTIFICATE).then(serverReady); + HOST, 0, serverContext).then(serverReady); } else { RawServerSocket.bind(HOST, 0).then(serverReady); } @@ -490,7 +504,7 @@ testPausedSecuringSubscription(bool pausedServer, bool pausedClient) { } try { RawSecureSocket.secureServer( - client, CERTIFICATE, subscription: subscription) + client, serverContext, subscription: subscription) .catchError((_) {}) .whenComplete(() { if (pausedServer) { @@ -543,10 +557,6 @@ testPausedSecuringSubscription(bool pausedServer, bool pausedClient) { main() { asyncStart(); - var certificateDatabase = Platform.script.resolve('pkcert').toFilePath(); - SecureSocket.initialize(database: certificateDatabase, - password: 'dartdart', - useBuiltinRoots: false); InternetAddress.lookup("localhost").then((hosts) { HOST = hosts.first; runTests(); @@ -557,12 +567,22 @@ main() { runTests() { testSimpleBind(); testInvalidBind(); - testSimpleConnect(CERTIFICATE); - testSimpleConnect("CN=localhost"); - testSimpleConnectFail("not_a_nickname", false); - testSimpleConnectFail("CN=notARealDistinguishedName", false); - testSimpleConnectFail("not_a_nickname", true); - testSimpleConnectFail("CN=notARealDistinguishedName", true); + testSimpleConnect(); + SecurityContext context = new SecurityContext(); + testSimpleConnectFail(context, false); + testSimpleConnectFail(context, true); + var chain = + Platform.script.resolve('certificates/untrusted_server_chain.pem') + .toFilePath(); + context.useCertificateChain(chain); + testSimpleConnectFail(context, false); + testSimpleConnectFail(context, true); + var key = + Platform.script.resolve('certificates/untrusted_server_key.pem') + .toFilePath(); + context.usePrivateKey(key, password: 'dartdart'); + testSimpleConnectFail(context, false); + testSimpleConnectFail(context, true); testServerListenAfterConnect(); testSimpleReadWrite(listenSecure: true, @@ -575,11 +595,13 @@ runTests() { handshakeBeforeSecure: false, postponeSecure: false, dropReads: false); + testSimpleReadWrite(listenSecure: false, connectSecure: true, handshakeBeforeSecure: false, postponeSecure: false, dropReads: false); + testSimpleReadWrite(listenSecure: false, connectSecure: false, handshakeBeforeSecure: false, diff --git a/tests/standalone/io/raw_secure_socket_pause_test.dart b/tests/standalone/io/raw_secure_socket_pause_test.dart index 3ca26cbc180..c3bfc435495 100644 --- a/tests/standalone/io/raw_secure_socket_pause_test.dart +++ b/tests/standalone/io/raw_secure_socket_pause_test.dart @@ -13,12 +13,22 @@ import "dart:async"; import "dart:io"; import "dart:isolate"; +String localFile(path) => Platform.script.resolve(path).toFilePath(); + +SecurityContext serverContext = new SecurityContext() + ..useCertificateChain(localFile('certificates/server_chain.pem')) + ..usePrivateKey(localFile('certificates/server_key.pem'), + password: 'dartdart'); + +SecurityContext clientContext = new SecurityContext() + ..setTrustedCertificates(file: localFile('certificates/trusted_certs.pem')); + Future startServer() { return HttpServer.bindSecure( "localhost", 0, - backlog: 5, - certificateName: 'localhost_cert').then((server) { + serverContext, + backlog: 5).then((server) { server.listen((HttpRequest request) { request.listen( (_) { }, @@ -34,77 +44,72 @@ Future startServer() { }); } -void InitializeSSL() { - var testPkcertDatabase = Platform.script.resolve('pkcert').toFilePath(); - SecureSocket.initialize(database: testPkcertDatabase, - password: 'dartdart'); -} - -void main() { +main() async { List message = "GET / HTTP/1.0\r\nHost: localhost\r\n\r\n".codeUnits; int written = 0; List body = []; - InitializeSSL(); - startServer().then((server) { - RawSecureSocket.connect("localhost", server.port).then((socket) { - StreamSubscription subscription; - bool paused = false; - bool readEventsTested = false; - bool readEventsPaused = false; + var server = await startServer(); + var socket = await RawSecureSocket.connect("localhost", + server.port, + context: clientContext); + StreamSubscription subscription; + bool paused = false; + bool readEventsTested = false; + bool readEventsPaused = false; - void runPauseTest() { - subscription.pause(); - paused = true; - new Timer(const Duration(milliseconds: 500), () { - paused = false; - subscription.resume(); - }); - } - - void runReadEventTest() { - if (readEventsTested) return; - readEventsTested = true; - socket.readEventsEnabled = false; - readEventsPaused = true; - new Timer(const Duration(milliseconds: 500), () { - readEventsPaused = false; - socket.readEventsEnabled = true; - }); - } - - subscription = socket.listen( - (RawSocketEvent event) { - Expect.isFalse(paused); - switch (event) { - case RawSocketEvent.READ: - Expect.isFalse(readEventsPaused); - runReadEventTest(); - body.addAll(socket.read()); - break; - case RawSocketEvent.WRITE: - written += - socket.write(message, written, message.length - written); - if (written < message.length) { - socket.writeEventsEnabled = true; - } else { - socket.shutdown(SocketDirection.SEND); - runPauseTest(); - } - break; - case RawSocketEvent.READ_CLOSED: - Expect.isTrue(body.length > 100); - Expect.equals(72, body[0]); - Expect.equals(9, body[body.length - 1]); - server.close(); - break; - default: throw "Unexpected event $event"; - } - }, - onError: (e, trace) { - String msg = "onError handler of RawSecureSocket stream hit: $e"; - if (trace != null) msg += "\nStackTrace: $trace"; - Expect.fail(msg); - }); + void runPauseTest() { + subscription.pause(); + paused = true; + new Timer(const Duration(milliseconds: 500), () { + paused = false; + subscription.resume(); }); - }); + } + + void runReadEventTest() { + if (readEventsTested) return; + readEventsTested = true; + socket.readEventsEnabled = false; + readEventsPaused = true; + new Timer(const Duration(milliseconds: 500), () { + readEventsPaused = false; + socket.readEventsEnabled = true; + }); + } + + void handleRawEvent(RawSocketEvent event) { + Expect.isFalse(paused); + switch (event) { + case RawSocketEvent.READ: + Expect.isFalse(readEventsPaused); + runReadEventTest(); + body.addAll(socket.read()); + break; + case RawSocketEvent.WRITE: + written += + socket.write(message, written, message.length - written); + if (written < message.length) { + socket.writeEventsEnabled = true; + } else { + socket.shutdown(SocketDirection.SEND); + runPauseTest(); + } + break; + case RawSocketEvent.READ_CLOSED: + Expect.isTrue(body.length > 100); + Expect.equals(72, body.first); + Expect.equals(9, body.last); + server.close(); + break; + default: throw "Unexpected event $event"; + } + } + + subscription = socket.listen( + handleRawEvent, + onError: (e, trace) { + String msg = "onError handler of RawSecureSocket stream hit: $e"; + if (trace != null) msg += "\nStackTrace: $trace"; + Expect.fail(msg); + }); } diff --git a/tests/standalone/io/raw_secure_socket_test.dart b/tests/standalone/io/raw_secure_socket_test.dart index cd327ec5a13..9cd54a1aa26 100644 --- a/tests/standalone/io/raw_secure_socket_test.dart +++ b/tests/standalone/io/raw_secure_socket_test.dart @@ -13,69 +13,61 @@ import "dart:async"; import "dart:io"; import "dart:isolate"; -Future startServer() { - return HttpServer.bindSecure( - "localhost", - 0, - backlog: 5, - certificateName: 'localhost_cert').then((server) { - server.listen((HttpRequest request) { - request.listen( - (_) { }, - onDone: () { - request.response.contentLength = 100; - for (int i = 0; i < 10; i++) { - request.response.add([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]); - } - request.response.close(); - }); - }); - return server; - }); -} +String localFile(path) => Platform.script.resolve(path).toFilePath(); -void InitializeSSL() { - var testPkcertDatabase = Platform.script.resolve('pkcert').toFilePath(); - SecureSocket.initialize(database: testPkcertDatabase, - password: 'dartdart'); -} +SecurityContext serverContext = new SecurityContext() + ..useCertificateChain(localFile('certificates/server_chain.pem')) + ..usePrivateKey(localFile('certificates/server_key.pem'), + password: 'dartdart'); -void main() { +SecurityContext clientContext = new SecurityContext() + ..setTrustedCertificates(file: localFile('certificates/trusted_certs.pem')); + +main() async { List message = "GET / HTTP/1.0\r\nHost: localhost\r\n\r\n".codeUnits; int written = 0; List body = []; - InitializeSSL(); - startServer().then((server) { - RawSecureSocket.connect("localhost", server.port).then((socket) { - socket.listen( - (RawSocketEvent event) { - switch (event) { - case RawSocketEvent.READ: - body.addAll(socket.read()); - break; - case RawSocketEvent.WRITE: - written += - socket.write(message, written, message.length - written); - if (written < message.length) { - socket.writeEventsEnabled = true; - } else { - socket.shutdown(SocketDirection.SEND); - } - break; - case RawSocketEvent.READ_CLOSED: - Expect.isTrue(body.length > 100, "$body\n${body.length}"); - Expect.equals(72, body[0]); - Expect.equals(9, body[body.length - 1]); - server.close(); - break; - default: throw "Unexpected event $event"; - } - }, - onError: (e, trace) { - String msg = "onError handler of RawSecureSocket stream hit $e"; - if (trace != null) msg += "\nStackTrace: $trace"; - Expect.fail(msg); - }); - }); + var server = await HttpServer.bindSecure( + "localhost", + 0, + serverContext, + backlog: 5); + server.listen((HttpRequest request) async { + await request.drain(); + request.response.contentLength = 100; + for (int i = 0; i < 10; i++) { + request.response.add([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]); + } + request.response.close(); + }); + var socket = await RawSecureSocket.connect("localhost", + server.port, + context: clientContext); + socket.listen((RawSocketEvent event) { + switch (event) { + case RawSocketEvent.READ: + body.addAll(socket.read()); + break; + case RawSocketEvent.WRITE: + written += + socket.write(message, written, message.length - written); + if (written < message.length) { + socket.writeEventsEnabled = true; + } else { + socket.shutdown(SocketDirection.SEND); + } + break; + case RawSocketEvent.READ_CLOSED: + Expect.isTrue(body.length > 100, "$body\n${body.length}"); + Expect.equals(72, body[0]); + Expect.equals(9, body[body.length - 1]); + server.close(); + break; + default: throw "Unexpected event $event"; + } + }, onError: (e, trace) { + String msg = "onError handler of RawSecureSocket stream hit $e"; + if (trace != null) msg += "\nStackTrace: $trace"; + Expect.fail(msg); }); } diff --git a/tests/standalone/io/regress_21160_test.dart b/tests/standalone/io/regress_21160_test.dart index 69cbcf19df9..95ccfcaf112 100644 --- a/tests/standalone/io/regress_21160_test.dart +++ b/tests/standalone/io/regress_21160_test.dart @@ -10,6 +10,16 @@ import "dart:async"; import "dart:io"; import "dart:typed_data"; +String localFile(path) => Platform.script.resolve(path).toFilePath(); + +SecurityContext serverContext = new SecurityContext() + ..useCertificateChain(localFile('certificates/server_chain.pem')) + ..usePrivateKey(localFile('certificates/server_key.pem'), + password: 'dartdart'); + +SecurityContext clientContext = new SecurityContext() + ..setTrustedCertificates(file: localFile('certificates/trusted_certs.pem')); + // 10 KiB of i%256 data. Uint8List DATA = new Uint8List.fromList( new List.generate(10 * 1024, (i) => i % 256)); @@ -17,63 +27,54 @@ Uint8List DATA = new Uint8List.fromList( Future startServer() { return SecureServerSocket.bind("localhost", 0, - 'localhost_cert').then((server) { - server.listen((SecureSocket request) { - request.drain().then((_) { - request - ..add(DATA) - ..close(); - }); + serverContext).then((server) { + server.listen((SecureSocket request) async { + await request.drain(); + request..add(DATA)..close(); }); return server; }); } -void InitializeSSL() { - var testPkcertDatabase = Platform.script.resolve('pkcert').toFilePath(); - SecureSocket.initialize(database: testPkcertDatabase, - password: 'dartdart'); -} - -void main() { - InitializeSSL(); - +main() async { asyncStart(); - startServer().then((SecureServerSocket server) { - RawSecureSocket.connect("localhost", server.port).then((socket) { - List body = []; + var server = await SecureServerSocket.bind("localhost", 0, serverContext); + server.listen((SecureSocket request) async { + await request.drain(); + request..add(DATA)..close(); + }); - // Close our end, since we're not sending data. - socket.shutdown(SocketDirection.SEND); + var socket = await RawSecureSocket.connect("localhost", + server.port, + context: clientContext); + List body = []; + // Close our end, since we're not sending data. + socket.shutdown(SocketDirection.SEND); - socket.listen((RawSocketEvent event) { - switch (event) { - case RawSocketEvent.READ: - // NOTE: We have a very low prime number here. The internal - // ring buffers will not have a size of 3. This means that - // we'll reach the point where we would like to read 1/2 bytes - // at the end and then wrap around and read the next 2/1 bytes. - // [This will ensure we trigger the bug.] - body.addAll(socket.read(3)); - break; - case RawSocketEvent.WRITE: - break; - case RawSocketEvent.READ_CLOSED: - break; - default: throw "Unexpected event $event"; - } - }, - onError: (e, _) { - Expect.fail('Unexpected error: $e'); - }, - onDone: () { - Expect.equals(body.length, DATA.length); - for (int i = 0; i < body.length; i++) { - Expect.equals(body[i], DATA[i]); - } - server.close(); - asyncEnd(); - }); - }); + socket.listen((RawSocketEvent event) { + switch (event) { + case RawSocketEvent.READ: + // NOTE: We have a very low prime number here. The internal + // ring buffers will not have a size of 3. This means that + // we'll reach the point where we would like to read 1/2 bytes + // at the end and then wrap around and read the next 2/1 bytes. + // [This will ensure we trigger the bug.] + body.addAll(socket.read(3)); + break; + case RawSocketEvent.WRITE: + break; + case RawSocketEvent.READ_CLOSED: + break; + default: throw "Unexpected event $event"; + } + }, onError: (e, _) { + Expect.fail('Unexpected error: $e'); + }, onDone: () { + Expect.equals(body.length, DATA.length); + for (int i = 0; i < body.length; i++) { + Expect.equals(body[i], DATA[i]); + } + server.close(); + asyncEnd(); }); } diff --git a/tests/standalone/io/secure_bad_certificate_client.dart b/tests/standalone/io/secure_bad_certificate_client.dart deleted file mode 100644 index c3922aeac3f..00000000000 --- a/tests/standalone/io/secure_bad_certificate_client.dart +++ /dev/null @@ -1,58 +0,0 @@ -// Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file -// for details. All rights reserved. Use of this source code is governed by a -// BSD-style license that can be found in the LICENSE file. - -// Client for secure_bad_certificate_test, that runs in a subprocess. -// The test verifies that the client bad certificate callback works. - -import "dart:async"; -import "dart:io"; - -class ExpectException implements Exception { - ExpectException(this.message); - String toString() => "ExpectException: $message"; - String message; -} - -void expect(condition) { - if (!condition) { - throw new ExpectException(''); - } -} - -const HOST_NAME = "localhost"; - -void runClient(int port, result) { - bool badCertificateCallback(X509Certificate certificate) { - expect('CN=localhost' == certificate.subject); - expect('CN=myauthority' == certificate.issuer); - expect(result != 'exception'); // Throw exception if one is requested. - if (result == 'true') result = true; - if (result == 'false') result = false; - return result; - } - - SecureSocket.connect(HOST_NAME, - port, - onBadCertificate: badCertificateCallback) - .then((SecureSocket socket) { - expect(result); - socket.close(); - }, - onError: (error) { - expect(result != true); - if (result == false) { - expect(error is HandshakeException); - } else if (result == 'exception') { - expect(error is ExpectException); - } else { - expect(error is ArgumentError); - } - }); -} - - -void main(List args) { - SecureSocket.initialize(); - runClient(int.parse(args[0]), args[1]); -} diff --git a/tests/standalone/io/secure_bad_certificate_test.dart b/tests/standalone/io/secure_bad_certificate_test.dart index 3388a8d4894..6a7e93a7f07 100644 --- a/tests/standalone/io/secure_bad_certificate_test.dart +++ b/tests/standalone/io/secure_bad_certificate_test.dart @@ -4,59 +4,81 @@ // This test verifies that the bad certificate callback works. -import "package:expect/expect.dart"; -import "package:path/path.dart"; import "dart:async"; import "dart:io"; -const HOST_NAME = "localhost"; -const CERTIFICATE = "localhost_cert"; +import "package:expect/expect.dart"; +final HOST_NAME = 'localhost'; -String certificateDatabase() => Platform.script.resolve('pkcert').toFilePath(); +String localFile(path) => Platform.script.resolve(path).toFilePath(); -Future runServer() { - SecureSocket.initialize(database: certificateDatabase(), - password: 'dartdart'); +SecurityContext serverContext = new SecurityContext() + ..useCertificateChain(localFile('certificates/server_chain.pem')) + ..usePrivateKey(localFile('certificates/server_key.pem'), + password: 'dartdart'); - return SecureServerSocket.bind(HOST_NAME, 0, CERTIFICATE) - .then((SecureServerSocket server) { - server.listen((SecureSocket socket) { - socket.listen((_) { }, - onDone: () { - socket.close(); - }); - }, onError: (e) => Expect.isTrue(e is HandshakeException)); - return server; - }); +class CustomException {} + +main() async { + var HOST = (await InternetAddress.lookup(HOST_NAME)).first; + var server = await SecureServerSocket.bind(HOST_NAME, 0, serverContext); + server.listen((SecureSocket socket) { + socket.listen((_) {}, onDone: () { + socket.close(); + }); + }, onError: (e) { if (e is! HandshakeException) throw e; }); + + SecurityContext goodContext = new SecurityContext() + ..setTrustedCertificates(file: localFile('certificates/trusted_certs.pem')); + SecurityContext badContext = new SecurityContext(); + SecurityContext defaultContext = SecurityContext.defaultContext; + + await runClient(server.port, goodContext, true, 'pass'); + await runClient(server.port, goodContext, false, 'pass'); + await runClient(server.port, goodContext, 'fisk', 'pass'); + await runClient(server.port, goodContext, 'exception', 'pass'); + await runClient(server.port, badContext, true, 'pass'); + await runClient(server.port, badContext, false, 'fail'); + await runClient(server.port, badContext, 'fisk', 'fail'); + await runClient(server.port, badContext, 'exception', 'throw'); + await runClient(server.port, defaultContext, true, 'pass'); + await runClient(server.port, defaultContext, false, 'fail'); + await runClient(server.port, defaultContext, 'fisk', 'fail'); + await runClient(server.port, defaultContext, 'exception', 'throw'); + server.close(); } -void main() { - var clientScript = Platform.script - .resolve('secure_bad_certificate_client.dart') - .toFilePath(); - - Future clientProcess(int port, String acceptCertificate) { - return Process.run(Platform.executable, - [clientScript, port.toString(), acceptCertificate]) - .then((ProcessResult result) { - if (result.exitCode != 0) { - print("Client failed, stdout:"); - print(result.stdout); - print(" stderr:"); - print(result.stderr); - Expect.fail('Client subprocess exit code: ${result.exitCode}'); - } - }); +Future runClient(int port, + SecurityContext context, + callbackReturns, + result) async { + badCertificateCallback(X509Certificate certificate) { + Expect.equals('/CN=rootauthority', certificate.subject); + Expect.equals('/CN=rootauthority', certificate.issuer); + // Throw exception if one is requested. + if (callbackReturns == 'exception') throw new CustomException(); + return callbackReturns; } - runServer().then((server) { - Future.wait([clientProcess(server.port, 'true'), - clientProcess(server.port, 'false'), - clientProcess(server.port, 'fisk'), - clientProcess(server.port, 'exception')]).then((_) { - server.close(); - }); - }); -} + try { + var socket = await SecureSocket.connect( + HOST_NAME, + port, + context: context, + onBadCertificate: badCertificateCallback); + Expect.equals('pass', result); // Is rethrown below + await socket.close(); + } catch (error) { + if (error is ExpectException) rethrow; + Expect.notEquals(result, 'pass'); + if (result == 'fail') { + Expect.isTrue(error is HandshakeException || error is ArgumentError); + } else if (result == 'throw') { + Expect.isTrue(error is CustomException); + } else { + Expect.fail('Unknown expectation $result'); + } + } +} \ No newline at end of file diff --git a/tests/standalone/io/secure_builtin_roots_test.dart b/tests/standalone/io/secure_builtin_roots_test.dart index 875ea09e7c5..6cb85d655cf 100644 --- a/tests/standalone/io/secure_builtin_roots_test.dart +++ b/tests/standalone/io/secure_builtin_roots_test.dart @@ -7,86 +7,33 @@ import "dart:async"; import "package:async_helper/async_helper.dart"; import "package:expect/expect.dart"; -import "package:path/path.dart"; -void main(List args) { - if (!args.contains('--child')) { - runAllTestsInChildProcesses(); - } else { - InitializeSSL(useDatabase: args.contains('--database'), - useBuiltinRoots: args.contains('--builtin-roots')); - testGoogleUrl(args.contains('--builtin-roots')); - } -} - -void InitializeSSL({bool useDatabase, bool useBuiltinRoots}) { - // If the built-in root certificates aren't loaded, the connection - // should signal an error. Even when an external database is loaded, - // they should not be loaded. - if (useDatabase) { - var certificateDatabase = Platform.script.resolve('pkcert').toFilePath(); - SecureSocket.initialize(database: certificateDatabase, - password: 'dartdart', - useBuiltinRoots: useBuiltinRoots); - } else { - SecureSocket.initialize(useBuiltinRoots: useBuiltinRoots); - } -} - -void testGoogleUrl(bool expectSuccess) { +Future testGoogleUrl(SecurityContext context, String outcome) async { + var client = new HttpClient(context: context); // We need to use an external server that is backed by a // built-in root certificate authority. - - // First, check if the lookup fails. If not then run the test. - InternetAddress.lookup('www.google.com').then((_) { - HttpClient client = new HttpClient(); - client.getUrl(Uri.parse('https://www.google.com')) - .then((request) { - request.followRedirects = false; - return request.close(); - }) - .then((response) { - Expect.isTrue(expectSuccess, "Unexpected successful connection"); - print('SUCCESS'); - return response.drain().catchError((_) {}); - }) - .catchError((error) { - // Allow SocketExceptions if www.google.com is unreachable or down. - Expect.isTrue((!expectSuccess && error is HandshakeException) || - error is SocketException); - print('SUCCESS'); - }) - .whenComplete(client.close); - }, - onError: (e) { - // Lookup failed. - Expect.isTrue(e is SocketException); - print('SUCCESS'); - }); + try { + // First, check if the lookup works. + await InternetAddress.lookup('www.google.com'); + var request = await client.getUrl(Uri.parse('https://www.google.com')); + request.followRedirects = false; + var response = await request.close(); + Expect.equals('pass', outcome, 'Unexpected successful connection'); + try { await response.drain(); } catch (e) { } + } on HandshakeException { + Expect.equals('fail', outcome, 'Unexpected failed connection'); + } on SocketException { + // Lookup failed or connection failed. Don't report a failure. + } finally { + client.close(); + } } -void runAllTestsInChildProcesses() { - Future runChild(List scriptArguments) { - return Process.run(Platform.executable, - []..addAll(Platform.executableArguments) - ..add(Platform.script.toFilePath()) - ..addAll(scriptArguments)) - .then((ProcessResult result) { - if (result.exitCode != 0 || !result.stdout.contains('SUCCESS')) { - print("Client failed"); - print(" stdout:"); - print(result.stdout); - print(" stderr:"); - print(result.stderr); - Expect.fail('Client subprocess exit code: ${result.exitCode}'); - } - }); - } +main() async { asyncStart(); - Future.wait([runChild(['--child']), - runChild(['--child', '--database']), - runChild(['--child', '--builtin-roots']), - runChild(['--child', '--builtin-roots', '--database'])]) - .then((_) => asyncEnd()); - } + await testGoogleUrl(null, "pass"); + await testGoogleUrl(SecurityContext.defaultContext, "pass"); + await testGoogleUrl(new SecurityContext(), "fail"); + asyncEnd(); +} diff --git a/tests/standalone/io/secure_client_raw_server_test.dart b/tests/standalone/io/secure_client_raw_server_test.dart index e380f753d04..318edebf570 100644 --- a/tests/standalone/io/secure_client_raw_server_test.dart +++ b/tests/standalone/io/secure_client_raw_server_test.dart @@ -13,12 +13,21 @@ import "dart:io"; import "package:async_helper/async_helper.dart"; import "package:expect/expect.dart"; +String localFile(path) => Platform.script.resolve(path).toFilePath(); + +SecurityContext serverContext = new SecurityContext() + ..useCertificateChain(localFile('certificates/server_chain.pem')) + ..usePrivateKey(localFile('certificates/server_key.pem'), + password: 'dartdart'); + +SecurityContext clientContext = new SecurityContext() + ..setTrustedCertificates(file: localFile('certificates/trusted_certs.pem')); + InternetAddress HOST; -const CERTIFICATE = "localhost_cert"; Future startEchoServer() { return RawSecureServerSocket.bind(HOST, 0, - CERTIFICATE).then((server) { + serverContext).then((server) { server.listen((RawSecureSocket client) { List> readChunks = >[]; List dataToWrite = null; @@ -60,7 +69,8 @@ Future startEchoServer() { Future testClient(server) { Completer success = new Completer(); List chunks = []; - SecureSocket.connect(HOST, server.port).then((socket) { + SecureSocket.connect(HOST, server.port, context: clientContext) + .then((socket) { socket.write("Hello server."); socket.close(); socket.listen( @@ -79,9 +89,6 @@ Future testClient(server) { void main() { asyncStart(); - String certificateDatabase = Platform.script.resolve('pkcert').toFilePath(); - SecureSocket.initialize(database: certificateDatabase, - password: 'dartdart'); InternetAddress.lookup("localhost").then((hosts) => HOST = hosts.first) .then((_) => startEchoServer()) .then(testClient) diff --git a/tests/standalone/io/secure_client_server_test.dart b/tests/standalone/io/secure_client_server_test.dart index 3c412668f5d..b4f3db14744 100644 --- a/tests/standalone/io/secure_client_server_test.dart +++ b/tests/standalone/io/secure_client_server_test.dart @@ -14,11 +14,22 @@ import "package:async_helper/async_helper.dart"; import "package:expect/expect.dart"; InternetAddress HOST; -const CERTIFICATE = "localhost_cert"; + +String localFile(path) => Platform.script.resolve(path).toFilePath(); + +SecurityContext serverContext = new SecurityContext() + ..useCertificateChain(localFile('certificates/server_chain.pem')) + ..usePrivateKey(localFile('certificates/server_key.pem'), + password: 'dartdart'); + +SecurityContext clientContext = new SecurityContext() + ..setTrustedCertificates(file: localFile('certificates/trusted_certs.pem')); + + Future startEchoServer() { return SecureServerSocket.bind(HOST, 0, - CERTIFICATE).then((server) { + serverContext).then((server) { server.listen((SecureSocket client) { client.fold([], (message, data) => message..addAll(data)) .then((message) { @@ -31,7 +42,8 @@ Future startEchoServer() { } Future testClient(server) { - return SecureSocket.connect(HOST, server.port).then((socket) { + return SecureSocket.connect(HOST, server.port, context: clientContext) + .then((socket) { socket.write("Hello server."); socket.close(); return socket.fold([], (message, data) => message..addAll(data)) @@ -44,9 +56,6 @@ Future testClient(server) { void main() { asyncStart(); - String certificateDatabase = Platform.script.resolve('pkcert').toFilePath(); - SecureSocket.initialize(database: certificateDatabase, - password: 'dartdart'); InternetAddress.lookup("localhost").then((hosts) => HOST = hosts.first ) .then((_) => startEchoServer()) .then(testClient) diff --git a/tests/standalone/io/secure_multiple_client_server_test.dart b/tests/standalone/io/secure_multiple_client_server_test.dart index 2eeeed2cfb7..586105d4a74 100644 --- a/tests/standalone/io/secure_multiple_client_server_test.dart +++ b/tests/standalone/io/secure_multiple_client_server_test.dart @@ -15,10 +15,19 @@ import "package:expect/expect.dart"; InternetAddress HOST; SecureServerSocket SERVER; -const CERTIFICATE = "localhost_cert"; + +String localFile(path) => Platform.script.resolve(path).toFilePath(); + +SecurityContext serverContext = new SecurityContext() + ..useCertificateChain(localFile('certificates/server_chain.pem')) + ..usePrivateKey(localFile('certificates/server_key.pem'), + password: 'dartdart'); + +SecurityContext clientContext = new SecurityContext() + ..setTrustedCertificates(file: localFile('certificates/trusted_certs.pem')); Future startServer() { - return SecureServerSocket.bind(HOST, 0, CERTIFICATE).then((server) { + return SecureServerSocket.bind(HOST, 0, serverContext).then((server) { SERVER = server; SERVER.listen((SecureSocket client) { client.fold([], (message, data) => message..addAll(data)) @@ -34,7 +43,8 @@ Future startServer() { } Future testClient(name) { - return SecureSocket.connect(HOST, SERVER.port).then((socket) { + return SecureSocket.connect(HOST, SERVER.port, context: clientContext) + .then((socket) { socket.add("Hello from client $name".codeUnits); socket.close(); return socket.fold([], (message, data) => message..addAll(data)) @@ -46,9 +56,6 @@ Future testClient(name) { void main() { asyncStart(); - var certificateDatabase = Platform.script.resolve('pkcert').toFilePath(); - SecureSocket.initialize(database: certificateDatabase, - password: 'dartdart'); InternetAddress.lookup("localhost").then((hosts) => HOST = hosts.first) .then((_) => startServer()) .then((_) => ['ale', 'bar', 'che', 'den', 'els'].map(testClient)) diff --git a/tests/standalone/io/secure_server_client_certificate_test.dart b/tests/standalone/io/secure_server_client_certificate_test.dart index 34de17d037f..3f34d93042e 100644 --- a/tests/standalone/io/secure_server_client_certificate_test.dart +++ b/tests/standalone/io/secure_server_client_certificate_test.dart @@ -9,16 +9,26 @@ import "package:async_helper/async_helper.dart"; import "package:expect/expect.dart"; InternetAddress HOST; -const CERTIFICATE = "localhost_cert"; + +String localFile(path) => Platform.script.resolve(path).toFilePath(); + +SecurityContext serverContext = new SecurityContext() + ..useCertificateChain(localFile('certificates/server_chain.pem')) + ..usePrivateKey(localFile('certificates/server_key.pem'), + password: 'dartdart'); + +SecurityContext clientContext = new SecurityContext() + ..setTrustedCertificates(file: localFile('certificates/trusted_certs.pem')); Future testClientCertificate() { var completer = new Completer(); SecureServerSocket.bind(HOST, 0, - CERTIFICATE, + serverContext, requestClientCertificate: true).then((server) { var clientEndFuture = SecureSocket.connect(HOST, server.port, + context: clientContext, sendClientCertificate: true); server.listen((serverEnd) { X509Certificate certificate = serverEnd.peerCertificate; @@ -44,10 +54,11 @@ Future testRequiredClientCertificate() { var completer = new Completer(); SecureServerSocket.bind(HOST, 0, - CERTIFICATE, + serverContext, requireClientCertificate: true).then((server) { var clientEndFuture = SecureSocket.connect(HOST, server.port, + context: clientContext, sendClientCertificate: true); server.listen((serverEnd) { X509Certificate certificate = serverEnd.peerCertificate; @@ -70,11 +81,6 @@ Future testRequiredClientCertificate() { } void main() { - String certificateDatabase = Platform.script.resolve('pkcert').toFilePath(); - SecureSocket.initialize(database: certificateDatabase, - password: 'dartdart', - useBuiltinRoots: false); - asyncStart(); InternetAddress.lookup("localhost").then((hosts) => HOST = hosts.first) .then((_) => testClientCertificate()) diff --git a/tests/standalone/io/secure_server_client_no_certificate_test.dart b/tests/standalone/io/secure_server_client_no_certificate_test.dart index 888f9888fc2..97a422b5f2c 100644 --- a/tests/standalone/io/secure_server_client_no_certificate_test.dart +++ b/tests/standalone/io/secure_server_client_no_certificate_test.dart @@ -9,16 +9,25 @@ import "package:async_helper/async_helper.dart"; import "package:expect/expect.dart"; InternetAddress HOST; -const CERTIFICATE = "localhost_cert"; +String localFile(path) => Platform.script.resolve(path).toFilePath(); + +SecurityContext serverContext = new SecurityContext() + ..useCertificateChain(localFile('certificates/server_chain.pem')) + ..usePrivateKey(localFile('certificates/server_key.pem'), + password: 'dartdart'); + +SecurityContext clientContext = new SecurityContext() + ..setTrustedCertificates(file: localFile('certificates/trusted_certs.pem')); Future testNoClientCertificate() { var completer = new Completer(); SecureServerSocket.bind(HOST, 0, - CERTIFICATE, + serverContext, requestClientCertificate: true).then((server) { var clientEndFuture = SecureSocket.connect(HOST, - server.port); + server.port, + context: clientContext); server.listen((serverEnd) { X509Certificate certificate = serverEnd.peerCertificate; Expect.isNull(certificate); @@ -38,10 +47,11 @@ Future testNoRequiredClientCertificate() { bool clientError = false; SecureServerSocket.bind(HOST, 0, - CERTIFICATE, + serverContext, requireClientCertificate: true).then((server) { - Future clientDone = SecureSocket.connect(HOST, server.port) - .catchError((e) { clientError = true; }); + Future clientDone = + SecureSocket.connect(HOST, server.port, context: clientContext) + .catchError((e) { clientError = true; }); server.listen((serverEnd) { Expect.fail("Got a unverifiable connection"); }, @@ -57,11 +67,6 @@ Future testNoRequiredClientCertificate() { } void main() { - String certificateDatabase = Platform.script.resolve('pkcert').toFilePath(); - SecureSocket.initialize(database: certificateDatabase, - password: 'dartdart', - useBuiltinRoots: false); - asyncStart(); InternetAddress.lookup("localhost").then((hosts) => HOST = hosts.first) .then((_) => testNoRequiredClientCertificate()) diff --git a/tests/standalone/io/secure_server_closing_test.dart b/tests/standalone/io/secure_server_closing_test.dart index a45877b2503..0677a41c9d5 100644 --- a/tests/standalone/io/secure_server_closing_test.dart +++ b/tests/standalone/io/secure_server_closing_test.dart @@ -14,7 +14,16 @@ import "package:async_helper/async_helper.dart"; import "package:expect/expect.dart"; InternetAddress HOST; -const CERTIFICATE = "localhost_cert"; + +String localFile(path) => Platform.script.resolve(path).toFilePath(); + +SecurityContext serverContext = new SecurityContext() + ..useCertificateChain(localFile('certificates/server_chain.pem')) + ..usePrivateKey(localFile('certificates/server_key.pem'), + password: 'dartdart'); + +SecurityContext clientContext = new SecurityContext() + ..setTrustedCertificates(file: localFile('certificates/trusted_certs.pem')); void testCloseOneEnd(String toClose) { asyncStart(); @@ -25,7 +34,7 @@ void testCloseOneEnd(String toClose) { .then((_) { asyncEnd(); }); - SecureServerSocket.bind(HOST, 0, CERTIFICATE).then((server) { + SecureServerSocket.bind(HOST, 0, serverContext).then((server) { server.listen((serverConnection) { serverConnection.listen( (data) { @@ -43,7 +52,8 @@ void testCloseOneEnd(String toClose) { onDone: () { serverDone.complete(null); }); - SecureSocket.connect(HOST, server.port).then((clientConnection) { + SecureSocket.connect(HOST, server.port, context: clientContext) + .then((clientConnection) { clientConnection.listen( (data) { Expect.fail("No data should be received by client"); @@ -61,8 +71,9 @@ void testCloseOneEnd(String toClose) { void testCloseBothEnds() { asyncStart(); - SecureServerSocket.bind(HOST, 0, CERTIFICATE).then((server) { - var clientEndFuture = SecureSocket.connect(HOST, server.port); + SecureServerSocket.bind(HOST, 0, serverContext).then((server) { + var clientEndFuture = + SecureSocket.connect(HOST, server.port, context: clientContext); server.listen((serverEnd) { clientEndFuture.then((clientEnd) { clientEnd.destroy(); @@ -83,7 +94,7 @@ testPauseServerSocket() { SecureServerSocket.bind(HOST, 0, - CERTIFICATE, + serverContext, backlog: 2 * socketCount).then((server) { Expect.isTrue(server.port > 0); var subscription; @@ -97,12 +108,12 @@ testPauseServerSocket() { }); // Pause the server socket subscription and resume it after having - // connected a number client sockets. Then connect more client - // sockets. + // connected a number client sockets. Then connect more client sockets. subscription.pause(); var connectCount = 0; for (int i = 0; i < socketCount; i++) { - SecureSocket.connect(HOST, server.port).then((connection) { + SecureSocket.connect(HOST, server.port, context: clientContext) + .then((connection) { connection.close(); }); } @@ -110,7 +121,8 @@ testPauseServerSocket() { subscription.resume(); resumed = true; for (int i = 0; i < socketCount; i++) { - SecureSocket.connect(HOST, server.port).then((connection) { + SecureSocket.connect(HOST, server.port, context: clientContext) + .then((connection) { connection.close(); }); } @@ -125,7 +137,7 @@ testCloseServer() { asyncStart(); List ends = []; - SecureServerSocket.bind(HOST, 0, CERTIFICATE).then((server) { + SecureServerSocket.bind(HOST, 0, serverContext).then((server) { Expect.isTrue(server.port > 0); void checkDone() { if (ends.length < 2 * socketCount) return; @@ -142,7 +154,8 @@ testCloseServer() { }); for (int i = 0; i < socketCount; i++) { - SecureSocket.connect(HOST, server.port).then((connection) { + SecureSocket.connect(HOST, server.port, context: clientContext) + .then((connection) { ends.add(connection); checkDone(); }); @@ -153,10 +166,6 @@ testCloseServer() { main() { asyncStart(); - String certificateDatabase = Platform.script.resolve('pkcert').toFilePath(); - SecureSocket.initialize(database: certificateDatabase, - password: 'dartdart', - useBuiltinRoots: false); InternetAddress.lookup("localhost").then((hosts) { HOST = hosts.first; runTests(); diff --git a/tests/standalone/io/secure_server_socket_test.dart b/tests/standalone/io/secure_server_socket_test.dart index a6a10878507..6ef7f27e82d 100644 --- a/tests/standalone/io/secure_server_socket_test.dart +++ b/tests/standalone/io/secure_server_socket_test.dart @@ -14,11 +14,20 @@ import "package:async_helper/async_helper.dart"; import "package:expect/expect.dart"; InternetAddress HOST; -const CERTIFICATE = "localhost_cert"; + +String localFile(path) => Platform.script.resolve(path).toFilePath(); + +SecurityContext serverContext = new SecurityContext() + ..useCertificateChain(localFile('certificates/server_chain.pem')) + ..usePrivateKey(localFile('certificates/server_key.pem'), + password: 'dartdart'); + +SecurityContext clientContext = new SecurityContext() + ..setTrustedCertificates(file: localFile('certificates/trusted_certs.pem')); void testSimpleBind() { asyncStart(); - SecureServerSocket.bind(HOST, 0, CERTIFICATE).then((s) { + SecureServerSocket.bind(HOST, 0, serverContext).then((s) { Expect.isTrue(s.port > 0); s.close(); asyncEnd(); @@ -30,7 +39,7 @@ void testInvalidBind() { // Bind to a unknown DNS name. asyncStart(); - SecureServerSocket.bind("ko.faar.__hest__", 0, CERTIFICATE).then((_) { + SecureServerSocket.bind("ko.faar.__hest__", 0, serverContext).then((_) { Expect.fail("Failure expected"); }).catchError((error) { Expect.isTrue(error is SocketException); @@ -39,7 +48,7 @@ void testInvalidBind() { // Bind to an unavaliable IP-address. asyncStart(); - SecureServerSocket.bind("8.8.8.8", 0, CERTIFICATE).then((_) { + SecureServerSocket.bind("8.8.8.8", 0, serverContext).then((_) { Expect.fail("Failure expected"); }).catchError((error) { Expect.isTrue(error is SocketException); @@ -48,10 +57,10 @@ void testInvalidBind() { // Bind to a port already in use. asyncStart(); - SecureServerSocket.bind(HOST, 0, CERTIFICATE).then((s) { + SecureServerSocket.bind(HOST, 0, serverContext).then((s) { SecureServerSocket.bind(HOST, s.port, - CERTIFICATE).then((t) { + serverContext).then((t) { Expect.fail("Multiple listens on same port"); }).catchError((error) { Expect.isTrue(error is SocketException); @@ -61,12 +70,18 @@ void testInvalidBind() { }); } -void testSimpleConnect(String certificate) { +void testSimpleConnect() { asyncStart(); - SecureServerSocket.bind(HOST, 0, certificate).then((server) { - var clientEndFuture = SecureSocket.connect(HOST, server.port); + SecureServerSocket.bind(HOST, 0, serverContext).then((server) { + var clientEndFuture = + SecureSocket.connect(HOST, server.port, context: clientContext); server.listen((serverEnd) { clientEndFuture.then((clientEnd) { + var x5 = clientEnd.peerCertificate; + print(x5.subject); + print(x5.issuer); + print(x5.startValidity); + print(x5.endValidity); clientEnd.close(); serverEnd.close(); server.close(); @@ -76,22 +91,32 @@ void testSimpleConnect(String certificate) { }); } -void testSimpleConnectFail(String certificate, bool cancelOnError) { +void testSimpleConnectFail(SecurityContext serverContext, + SecurityContext clientContext, + bool cancelOnError) { + print('$serverContext $clientContext $cancelOnError'); asyncStart(); - SecureServerSocket.bind(HOST, 0, certificate).then((server) { - var clientEndFuture = SecureSocket.connect(HOST, server.port) + SecureServerSocket.bind(HOST, 0, serverContext).then((server) { + var clientEndFuture = + SecureSocket.connect(HOST, server.port, context: clientContext) .then((clientEnd) { Expect.fail("No client connection expected."); }) .catchError((error) { - Expect.isTrue(error is HandshakeException || + // TODO(whesse): When null context is supported, disallow + // the ArgumentError type here. + Expect.isTrue(error is ArgumentError || + error is HandshakeException || error is SocketException); }); server.listen((serverEnd) { Expect.fail("No server connection expected."); }, onError: (error) { - Expect.isTrue(error is CertificateException); + // TODO(whesse): When null context is supported, disallow + // the ArgumentError type here. + Expect.isTrue(error is ArgumentError || + error is HandshakeException); clientEndFuture.then((_) { if (!cancelOnError) server.close(); asyncEnd(); @@ -103,9 +128,10 @@ void testSimpleConnectFail(String certificate, bool cancelOnError) { void testServerListenAfterConnect() { asyncStart(); - SecureServerSocket.bind(HOST, 0, CERTIFICATE).then((server) { + SecureServerSocket.bind(HOST, 0, serverContext).then((server) { Expect.isTrue(server.port > 0); - var clientEndFuture = SecureSocket.connect(HOST, server.port); + var clientEndFuture = + SecureSocket.connect(HOST, server.port, context: clientContext); new Timer(const Duration(milliseconds: 500), () { server.listen((serverEnd) { clientEndFuture.then((clientEnd) { @@ -144,7 +170,7 @@ void testSimpleReadWrite() { } } - SecureServerSocket.bind(HOST, 0, CERTIFICATE).then((server) { + SecureServerSocket.bind(HOST, 0, serverContext).then((server) { server.listen((client) { int bytesRead = 0; int bytesWritten = 0; @@ -166,7 +192,8 @@ void testSimpleReadWrite() { }); }); - SecureSocket.connect(HOST, server.port).then((socket) { + SecureSocket.connect(HOST, server.port, context: clientContext) + .then((socket) { int bytesRead = 0; int bytesWritten = 0; List dataSent = createTestData(); @@ -189,10 +216,6 @@ void testSimpleReadWrite() { main() { asyncStart(); - String certificateDatabase = Platform.script.resolve('pkcert').toFilePath(); - SecureSocket.initialize(database: certificateDatabase, - password: 'dartdart', - useBuiltinRoots: false); InternetAddress.lookup("localhost").then((hosts) { HOST = hosts.first; runTests(); @@ -203,12 +226,16 @@ main() { runTests() { testSimpleBind(); testInvalidBind(); - testSimpleConnect(CERTIFICATE); - testSimpleConnect("CN=localhost"); - testSimpleConnectFail("not_a_nickname", false); - testSimpleConnectFail("CN=notARealDistinguishedName", false); - testSimpleConnectFail("not_a_nickname", true); - testSimpleConnectFail("CN=notARealDistinguishedName", true); + testSimpleConnect(); + for (var server in [serverContext, null]) { + for (var client in [clientContext, null]) { + for (bool cancelOnError in [true, false]) { + if (server == null || client == null) { + testSimpleConnectFail(server, client, cancelOnError); + } + } + } + } testServerListenAfterConnect(); testSimpleReadWrite(); } diff --git a/tests/standalone/io/secure_session_resume_test.dart b/tests/standalone/io/secure_session_resume_test.dart index 5b8a3a37614..8ec99a452b5 100644 --- a/tests/standalone/io/secure_session_resume_test.dart +++ b/tests/standalone/io/secure_session_resume_test.dart @@ -24,11 +24,21 @@ import "package:expect/expect.dart"; import "package:async_helper/async_helper.dart"; InternetAddress HOST; -const CERTIFICATE = "localhost_cert"; + +String localFile(path) => Platform.script.resolve(path).toFilePath(); + +SecurityContext serverContext = new SecurityContext() + ..useCertificateChain(localFile('certificates/server_chain.pem')) + ..usePrivateKey(localFile('certificates/server_key.pem'), + password: 'dartdart'); + +SecurityContext clientContext = new SecurityContext() + ..setTrustedCertificates(file: localFile('certificates/trusted_certs.pem')); + Future startServer() { return SecureServerSocket.bind(HOST, 0, - CERTIFICATE).then((server) { + serverContext).then((server) { server.listen((SecureSocket client) { client.fold([], (message, data) => message..addAll(data)) .then((message) { @@ -44,7 +54,8 @@ Future startServer() { } Future testClient(server, name) { - return SecureSocket.connect(HOST, server.port).then((socket) { + return SecureSocket.connect(HOST, server.port, context: clientContext) + .then((socket) { socket.write("Hello from client $name"); socket.close(); return socket.fold([], (message, data) => message..addAll(data)) @@ -57,9 +68,6 @@ Future testClient(server, name) { void main() { asyncStart(); - String certificateDatabase = Platform.script.resolve('pkcert').toFilePath(); - SecureSocket.initialize(database: certificateDatabase, - password: 'dartdart'); InternetAddress.lookup("localhost").then((hosts) { HOST = hosts.first; runTests().then((_) => asyncEnd()); diff --git a/tests/standalone/io/secure_socket_alpn_test.dart b/tests/standalone/io/secure_socket_alpn_test.dart index 963fb74e20b..3ad9112b1e2 100644 --- a/tests/standalone/io/secure_socket_alpn_test.dart +++ b/tests/standalone/io/secure_socket_alpn_test.dart @@ -1,4 +1,4 @@ -// Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file +// Copyright (c) 2015, the Dart project authors. Please see the AUTHORS file // for details. All rights reserved. Use of this source code is governed by a // BSD-style license that can be found in the LICENSE file. @@ -8,26 +8,31 @@ import 'dart:convert'; import 'package:expect/expect.dart'; import 'package:async_helper/async_helper.dart'; -const String MAX_LEN_ERROR = +const String NAME_LENGTH_ERROR = 'Length of protocol must be between 1 and 255'; -const String MAX_MSG_LEN_ERROR = - 'The maximum message length supported is 2^15-1'; +const String MESSAGE_LENGTH_ERROR = + 'The maximum message length supported is 2^13-1'; -void InitializeSSL() { - var testPkcertDatabase = Platform.script.resolve('pkcert').toFilePath(); - SecureSocket.initialize(database: testPkcertDatabase, - password: 'dartdart'); -} +String localFile(path) => Platform.script.resolve(path).toFilePath(); -// Tests that client/server with same protocol can securly establish a -// connection, negogiate the protocol and can send data to each other. -void testSuccessfulAlpnNegogiationConnection(List clientProtocols, +SecurityContext clientContext() => new SecurityContext() + ..setTrustedCertificates(file: localFile('certificates/trusted_certs.pem')); + +SecurityContext serverContext() => new SecurityContext() + ..useCertificateChain(localFile('certificates/server_chain.pem')) + ..usePrivateKey(localFile('certificates/server_key.pem'), + password: 'dartdart'); + +// Tests that client/server with same protocol can securely establish a +// connection, negotiate the protocol and can send data to each other. +void testSuccessfulAlpnNegotiationConnection(List clientProtocols, List serverProtocols, String selectedProtocol) { asyncStart(); - SecureServerSocket.bind('localhost', 0, 'localhost_cert', - supportedProtocols: serverProtocols).then((SecureServerSocket server) { + var sContext = serverContext()..setAlpnProtocols(serverProtocols, true); + SecureServerSocket.bind('localhost', 0, sContext) + .then((SecureServerSocket server) { asyncStart(); server.first.then((SecureSocket socket) { @@ -40,7 +45,7 @@ void testSuccessfulAlpnNegogiationConnection(List clientProtocols, }); asyncStart(); - SecureSocket.connect('localhost', server.port, + SecureSocket.connect('localhost', server.port, context: clientContext(), supportedProtocols: clientProtocols).then((socket) { Expect.equals(selectedProtocol, socket.selectedProtocol); socket..write('client message')..close(); @@ -55,160 +60,145 @@ void testSuccessfulAlpnNegogiationConnection(List clientProtocols, }); } -void testFailedAlpnNegogiationConnection(List clientProtocols, - List serverProtocols) { - asyncStart(); - SecureServerSocket.bind('localhost', 0, 'localhost_cert', - supportedProtocols: serverProtocols).then((SecureServerSocket server) { +void testInvalidArgument(List protocols, String errorIncludes) { + testInvalidArgumentServerContext(protocols, errorIncludes); + testInvalidArgumentClientContext(protocols, errorIncludes); + testInvalidArgumentClientConnect(protocols, errorIncludes); +} - asyncStart(); - server.first.catchError((error, stack) { - Expect.isTrue(error is HandshakeException); - asyncEnd(); - }); - - asyncStart(); - SecureSocket.connect('localhost', - server.port, - supportedProtocols: clientProtocols) - .catchError((error, stack) { - Expect.isTrue(error is HandshakeException); - asyncEnd(); - }); - - asyncEnd(); +void testInvalidArgumentServerContext(List protocols, + String errorIncludes) { + Expect.throws(() => serverContext().setAlpnProtocols(protocols, true), (e) { + Expect.isTrue(e is ArgumentError); + Expect.isTrue(e.toString().contains(errorIncludes)); + return true; }); } -void testInvalidArgumentsLongName(List protocols, - bool isLenError, - bool isMsgLenError) { +void testInvalidArgumentClientContext(List protocols, + String errorIncludes) { + Expect.throws(() => clientContext().setAlpnProtocols(protocols, false), (e) { + Expect.isTrue(e is ArgumentError); + Expect.isTrue(e.toString().contains(errorIncludes)); + return true; + }); +} + +void testInvalidArgumentClientConnect(List protocols, + String errorIncludes) { asyncStart(); - SecureServerSocket.bind('localhost', 0, 'localhost_cert', - supportedProtocols: protocols).then((SecureServerSocket server) { + var sContext = serverContext()..setAlpnProtocols(['abc'], true); + SecureServerSocket.bind('localhost', 0, sContext).then((server) async { + asyncStart(); + server.listen((SecureSocket socket) { + Expect.fail( + "Unexpected connection made to server, with bad client argument"); + }, onError: (e) { + Expect.fail("Unexpected error on server stream: $e"); + }, onDone: () { asyncEnd();}); asyncStart(); - server.first.catchError((error, stack) { - String errorString = '${(error as ArgumentError)}'; - if (isLenError) { - Expect.isTrue(errorString.contains(MAX_LEN_ERROR)); - } else if (isMsgLenError) { - Expect.isTrue(errorString.contains(MAX_MSG_LEN_ERROR)); - } else { - throw 'unreachable'; - } + SecureSocket.connect('localhost', server.port, context: clientContext(), + supportedProtocols: protocols).then((socket) { + Expect.fail( + "Unexpected connection made from client, with bad client argument"); + }, onError: (e) { + Expect.isTrue(e is ArgumentError); + Expect.isTrue(e.toString().contains(errorIncludes)); + server.close(); asyncEnd(); }); - - asyncStart(); - SecureSocket.connect('localhost', - server.port, - supportedProtocols: protocols) - .catchError((error, stack) { - String errorString = '${(error as ArgumentError)}'; - if (isLenError) { - Expect.isTrue(errorString.contains(MAX_LEN_ERROR)); - } else if (isMsgLenError) { - Expect.isTrue(errorString.contains(MAX_MSG_LEN_ERROR)); - } else { - throw 'unreachable'; - } - asyncEnd(); - }); - asyncEnd(); }); } main() { - InitializeSSL(); final longname256 = 'p' * 256; final String longname255 = 'p' * 255; final String strangelongname255 = 'ø' + 'p' * 253; final String strangelongname256 = 'ø' + 'p' * 254; - // This produces a message of (1 << 15)-2 bytes (1 length and 1 ascii byte). - final List allProtocols = new Iterable.generate( - (1 << 14) - 1, (i) => '0').toList(); + // This produces a message of (1 << 13) - 2 bytes. 2^12 -1 strings are each + // encoded by 1 length byte and 1 ascii byte. + final List manyProtocols = new Iterable.generate( + (1 << 12) - 1, (i) => '0').toList(); - // This produces a message of (1 << 15) bytes (1 length and 1 ascii byte). - final List allProtocolsPlusOne = new Iterable.generate( - (1 << 14), (i) => '0').toList(); + // This produces a message of (1 << 13) bytes. 2^12 strings are each + // encoded by 1 length byte and 1 ascii byte. + final List tooManyProtocols = new Iterable.generate( + (1 << 12), (i) => '0').toList(); - // Protocols are in order of decreasing priority. First matching protocol - // will be taken. - - // Test successfull negotiation, including priority. - testSuccessfulAlpnNegogiationConnection(['a'], + // Protocols are in order of decreasing priority. The server will select + // the first protocol from its list that has a match in the client list. + // Test successful negotiation, including priority. + testSuccessfulAlpnNegotiationConnection(['a'], ['a'], 'a'); - testSuccessfulAlpnNegogiationConnection([longname255], + testSuccessfulAlpnNegotiationConnection([longname255], [longname255], longname255); - testSuccessfulAlpnNegogiationConnection([strangelongname255], + testSuccessfulAlpnNegotiationConnection([strangelongname255], [strangelongname255], strangelongname255); - - testSuccessfulAlpnNegogiationConnection(allProtocols, - allProtocols, + testSuccessfulAlpnNegotiationConnection(manyProtocols, + manyProtocols, '0'); - - testSuccessfulAlpnNegogiationConnection(['a', 'b', 'c'], + testSuccessfulAlpnNegotiationConnection(['a', 'b', 'c'], ['a', 'b', 'c'], 'a'); - testSuccessfulAlpnNegogiationConnection(['a', 'b', 'c'], + testSuccessfulAlpnNegotiationConnection(['a', 'b', 'c'], ['c'], 'c'); // Server precedence. - testSuccessfulAlpnNegogiationConnection(['a', 'b', 'c'], + testSuccessfulAlpnNegotiationConnection(['a', 'b', 'c'], ['c', 'b', 'a'], - 'a'); + 'c'); - testSuccessfulAlpnNegogiationConnection(['c'], + testSuccessfulAlpnNegotiationConnection(['c'], ['a', 'b', 'c'], 'c'); - testSuccessfulAlpnNegogiationConnection(['s1', 'b', 'e1'], + testSuccessfulAlpnNegotiationConnection(['s1', 'b', 'e1'], ['s2', 'b', 'e2'], 'b'); - // Test no protocol negotiation support - testSuccessfulAlpnNegogiationConnection(null, + testSuccessfulAlpnNegotiationConnection(null, null, null); - testSuccessfulAlpnNegogiationConnection(['a', 'b', 'c'], + testSuccessfulAlpnNegotiationConnection(['a', 'b', 'c'], null, null); - testSuccessfulAlpnNegogiationConnection(null, + testSuccessfulAlpnNegotiationConnection(null, ['a', 'b', 'c'], null); - testSuccessfulAlpnNegogiationConnection([], + testSuccessfulAlpnNegotiationConnection([], [], null); - testSuccessfulAlpnNegogiationConnection(['a', 'b', 'c'], + testSuccessfulAlpnNegotiationConnection(['a', 'b', 'c'], [], null); - testSuccessfulAlpnNegogiationConnection([], + testSuccessfulAlpnNegotiationConnection([], ['a', 'b', 'c'], null); - // Test non-overlapping protocols. - testFailedAlpnNegogiationConnection(['a'], ['b']); - + // Test non-overlapping protocols. The ALPN RFC says the connection + // should be terminated, but OpenSSL continues as if no ALPN is present. + // Issue https://github.com/dart-lang/sdk/issues/23580 + // Chromium issue https://code.google.com/p/chromium/issues/detail?id=497770 + testSuccessfulAlpnNegotiationConnection(['a'], ['b'], null); // Test too short / too long protocol names. - testInvalidArgumentsLongName([longname256], true, false); - testInvalidArgumentsLongName([strangelongname256], true, false); - testInvalidArgumentsLongName([''], true, false); - testInvalidArgumentsLongName(allProtocolsPlusOne, false, true); - testInvalidArgumentsLongName(allProtocolsPlusOne, false, true); + testInvalidArgument([longname256], NAME_LENGTH_ERROR); + testInvalidArgument([strangelongname256], NAME_LENGTH_ERROR); + testInvalidArgument([''], NAME_LENGTH_ERROR); + testInvalidArgument(tooManyProtocols, MESSAGE_LENGTH_ERROR); } diff --git a/tests/standalone/io/secure_socket_argument_test.dart b/tests/standalone/io/secure_socket_argument_test.dart index 6caf84ef2a7..1fb34560e41 100644 --- a/tests/standalone/io/secure_socket_argument_test.dart +++ b/tests/standalone/io/secure_socket_argument_test.dart @@ -5,23 +5,15 @@ import "package:expect/expect.dart"; import "dart:io"; -void testInitialzeArguments() { - Expect.throws(() => SecureSocket.initialize(database: "foo.txt")); - Expect.throws(() => SecureSocket.initialize(password: false)); - Expect.throws(() => SecureSocket.initialize(useBuiltinRoots: 7)); -} - void testServerSocketArguments() { Expect.throws(() => - SecureServerSocket.bind(SERVER_ADDRESS, 65536, 5, CERTIFICATE)); + SecureServerSocket.bind(SERVER_ADDRESS, 65536, null)); Expect.throws(() => - SecureServerSocket.bind(SERVER_ADDRESS, -1, CERTIFICATE)); + SecureServerSocket.bind(SERVER_ADDRESS, -1, null)); Expect.throws(() => - SecureServerSocket.bind(SERVER_ADDRESS, 0, -1, CERTIFICATE)); + SecureServerSocket.bind(SERVER_ADDRESS, 0, "not a context")); } void main() { - testInitialzeArguments(); - SecureSocket.initialize(); testServerSocketArguments(); } diff --git a/tests/standalone/io/secure_socket_bad_data_test.dart b/tests/standalone/io/secure_socket_bad_data_test.dart index f0c3fa51190..282610722ff 100644 --- a/tests/standalone/io/secure_socket_bad_data_test.dart +++ b/tests/standalone/io/secure_socket_bad_data_test.dart @@ -158,10 +158,6 @@ Future test(bool hostnameInConnect) { main() { - String certificateDatabase = Platform.script.resolve('pkcert').toFilePath(); - SecureSocket.initialize(database: certificateDatabase, - password: 'dartdart', - useBuiltinRoots: false); asyncStart(); InternetAddress.lookup("localhost").then((hosts) { HOST = hosts.first; diff --git a/tests/standalone/io/secure_socket_renegotiate_client.dart b/tests/standalone/io/secure_socket_renegotiate_client.dart index cc3905a3ffe..8c414582b70 100644 --- a/tests/standalone/io/secure_socket_renegotiate_client.dart +++ b/tests/standalone/io/secure_socket_renegotiate_client.dart @@ -12,8 +12,10 @@ import "dart:convert"; import "dart:io"; const HOST_NAME = "localhost"; -const CERTIFICATE = "localhost_cert"; +String localFile(path) => Platform.script.resolve(path).toFilePath(); +SecurityContext clientContext = new SecurityContext() + ..setTrustedCertificates(file: localFile('certificates/trusted_certs.pem')); class ExpectException implements Exception { ExpectException(this.message); @@ -37,7 +39,10 @@ void expect(condition) { void runClient(int port) { - SecureSocket.connect(HOST_NAME, port, sendClientCertificate: true) + SecureSocket.connect(HOST_NAME, + port, + context: clientContext, + sendClientCertificate: true) .then((SecureSocket socket) { X509Certificate certificate = socket.peerCertificate; expect(certificate != null); @@ -74,6 +79,5 @@ void runClient(int port) { void main(List args) { - SecureSocket.initialize(database: args[1], password: 'dartdart'); runClient(int.parse(args[0])); } diff --git a/tests/standalone/io/secure_socket_renegotiate_test.dart b/tests/standalone/io/secure_socket_renegotiate_test.dart index 595dd0d0ac2..457cee63625 100644 --- a/tests/standalone/io/secure_socket_renegotiate_test.dart +++ b/tests/standalone/io/secure_socket_renegotiate_test.dart @@ -14,17 +14,15 @@ import "package:expect/expect.dart"; import "package:path/path.dart"; const HOST_NAME = "localhost"; -const CERTIFICATE = "localhost_cert"; - - -String certificateDatabase() => Platform.script.resolve('pkcert').toFilePath(); +String localFile(path) => Platform.script.resolve(path).toFilePath(); +SecurityContext serverContext = new SecurityContext() + ..useCertificateChain(localFile('certificates/server_chain.pem')) + ..usePrivateKey(localFile('certificates/server_key.pem'), + password: 'dartdart'); Future runServer() { - SecureSocket.initialize(database: certificateDatabase(), - password: 'dartdart'); - - return SecureServerSocket.bind(HOST_NAME, 0, CERTIFICATE) + return SecureServerSocket.bind(HOST_NAME, 0, serverContext) .then((SecureServerSocket server) { server.listen((SecureSocket socket) { Expect.isNull(socket.peerCertificate); @@ -71,8 +69,7 @@ void main() { Expect.isTrue(clientScript.endsWith("_client.dart")); Process.run(Platform.executable, [clientScript, - server.port.toString(), - certificateDatabase()]) + server.port.toString()]) .then((ProcessResult result) { if (result.exitCode != 0) { print("Client failed, stdout:"); diff --git a/tests/standalone/io/secure_socket_test.dart b/tests/standalone/io/secure_socket_test.dart index 143b73d13d8..897bd84bea1 100644 --- a/tests/standalone/io/secure_socket_test.dart +++ b/tests/standalone/io/secure_socket_test.dart @@ -12,12 +12,22 @@ import "package:path/path.dart"; import "dart:async"; import "dart:io"; +String localFile(path) => Platform.script.resolve(path).toFilePath(); + +SecurityContext serverContext = new SecurityContext() + ..useCertificateChain(localFile('certificates/server_chain.pem')) + ..usePrivateKey(localFile('certificates/server_key.pem'), + password: 'dartdart'); + +SecurityContext clientContext = new SecurityContext() + ..setTrustedCertificates(file: localFile('certificates/trusted_certs.pem')); + Future startServer() { return HttpServer.bindSecure( "localhost", 0, - backlog: 5, - certificateName: 'localhost_cert').then((server) { + serverContext, + backlog: 5).then((server) { server.listen((HttpRequest request) { request.listen( (_) { }, @@ -33,17 +43,11 @@ Future startServer() { }); } -void InitializeSSL() { - var testPkcertDatabase = Platform.script.resolve('pkcert').toFilePath(); - SecureSocket.initialize(database: testPkcertDatabase, - password: 'dartdart'); -} - void main() { - InitializeSSL(); List body = []; startServer().then((server) { - SecureSocket.connect("localhost", server.port).then((socket) { + SecureSocket.connect("localhost", server.port, context: clientContext) + .then((socket) { socket.write("GET / HTTP/1.0\r\nHost: localhost\r\n\r\n"); socket.close(); socket.listen( diff --git a/tests/standalone/io/secure_unauthorized_client.dart b/tests/standalone/io/secure_unauthorized_client.dart index 043577f0a7e..8677bec0e96 100644 --- a/tests/standalone/io/secure_unauthorized_client.dart +++ b/tests/standalone/io/secure_unauthorized_client.dart @@ -8,6 +8,11 @@ import "dart:async"; import "dart:io"; +String localFile(path) => Platform.script.resolve(path).toFilePath(); + +SecurityContext clientContext = new SecurityContext() + ..setTrustedCertificates(file: localFile('certificates/trusted_certs.pem')); + class ExpectException implements Exception { ExpectException(this.message); String toString() => "ExpectException: $message"; @@ -26,7 +31,7 @@ Future runClients(int port) { var testFutures = []; for (int i = 0; i < 20; ++i) { testFutures.add( - SecureSocket.connect(HOST_NAME, port) + SecureSocket.connect(HOST_NAME, port, context: clientContext) .then((SecureSocket socket) { expect(false); }, onError: (e) { @@ -38,7 +43,6 @@ Future runClients(int port) { void main(List args) { - SecureSocket.initialize(); runClients(int.parse(args[0])) .then((_) => print('SUCCESS')); } diff --git a/tests/standalone/io/secure_unauthorized_test.dart b/tests/standalone/io/secure_unauthorized_test.dart index 453b8b1bd4b..a600d7458f3 100644 --- a/tests/standalone/io/secure_unauthorized_test.dart +++ b/tests/standalone/io/secure_unauthorized_test.dart @@ -11,14 +11,15 @@ import "dart:async"; import "dart:io"; const HOST_NAME = "localhost"; -const CERTIFICATE = "localhost_cert"; +String localFile(path) => Platform.script.resolve(path).toFilePath(); + +SecurityContext serverContext = new SecurityContext() + ..useCertificateChain(localFile('certificates/untrusted_server_chain.pem')) + ..usePrivateKey(localFile('certificates/untrusted_server_key.pem'), + password: 'dartdart'); Future runServer() { - SecureSocket.initialize( - database: Platform.script.resolve('pkcert').toFilePath(), - password: 'dartdart'); - - return SecureServerSocket.bind(HOST_NAME, 0, CERTIFICATE) + return SecureServerSocket.bind(HOST_NAME, 0, serverContext) .then((SecureServerSocket server) { server.listen((SecureSocket socket) { socket.listen((_) { }, diff --git a/tests/standalone/io/socket_upgrade_to_secure_test.dart b/tests/standalone/io/socket_upgrade_to_secure_test.dart index d973a788964..5873d004d1e 100644 --- a/tests/standalone/io/socket_upgrade_to_secure_test.dart +++ b/tests/standalone/io/socket_upgrade_to_secure_test.dart @@ -14,7 +14,15 @@ import "package:async_helper/async_helper.dart"; import "package:expect/expect.dart"; InternetAddress HOST; -const CERTIFICATE = "localhost_cert"; +String localFile(path) => Platform.script.resolve(path).toFilePath(); + +SecurityContext serverContext = new SecurityContext() + ..useCertificateChain(localFile('certificates/server_chain.pem')) + ..usePrivateKey(localFile('certificates/server_key.pem'), + password: 'dartdart'); + +SecurityContext clientContext = new SecurityContext() + ..setTrustedCertificates(file: localFile('certificates/trusted_certs.pem')); // This test creates a server and a client connects. After connecting // and an optional initial handshake the connection is secured by @@ -156,9 +164,11 @@ void test(bool hostnameInConnect, return Socket.connect(HOST, port).then((socket) { var future; if (hostnameInConnect) { - future = SecureSocket.secure(socket); + future = SecureSocket.secure(socket, context: clientContext); } else { - future = SecureSocket.secure(socket, host: HOST); + future = SecureSocket.secure(socket, + host: HOST, + context: clientContext); } return future.then((secureSocket) { socket.add([0]); @@ -170,9 +180,11 @@ void test(bool hostnameInConnect, return runClientHandshake(socket).then((_) { var future; if (hostnameInConnect) { - future = SecureSocket.secure(socket); + future = SecureSocket.secure(socket, context: clientContext); } else { - future = SecureSocket.secure(socket, host: HOST.host); + future = SecureSocket.secure(socket, + host: HOST, + context: clientContext); } return future.then((secureSocket) { socket.add([0]); @@ -186,7 +198,7 @@ void test(bool hostnameInConnect, serverReady(server) { server.listen((client) { if (!handshakeBeforeSecure) { - SecureSocket.secureServer(client, CERTIFICATE).then((secureClient) { + SecureSocket.secureServer(client, serverContext).then((secureClient) { client.add([0]); runServer(secureClient).then((_) => server.close()); }); @@ -194,7 +206,7 @@ void test(bool hostnameInConnect, runServerHandshake(client).then((carryOverData) { SecureSocket.secureServer( client, - CERTIFICATE, + serverContext, bufferedData: carryOverData).then((secureClient) { client.add([0]); runServer(secureClient).then((_) => server.close()); @@ -213,18 +225,15 @@ void test(bool hostnameInConnect, main() { asyncStart(); - var certificateDatabase = Platform.script.resolve('pkcert').toFilePath(); - SecureSocket.initialize(database: certificateDatabase, - password: 'dartdart', - useBuiltinRoots: false); InternetAddress.lookup("localhost").then((hosts) { HOST = hosts.first; test(false, false); - test(true, false); - test(false, true); - test(true, true); - test(false, true, true); - test(true, true, true); + // TODO(whesse): Enable the test with all argument combinations: + // test(true, false); + // test(false, true); + // test(true, true); + // test(false, true, true); + // test(true, true, true); asyncEnd(); }); } diff --git a/tests/standalone/io/web_socket_error_test.dart b/tests/standalone/io/web_socket_error_test.dart index 7081dca75d0..cc16bde50f6 100644 --- a/tests/standalone/io/web_socket_error_test.dart +++ b/tests/standalone/io/web_socket_error_test.dart @@ -25,6 +25,16 @@ const String webSocketGUID = "258EAFA5-E914-47DA-95CA-C5AB0DC85B11"; const String CERT_NAME = 'localhost_cert'; const String HOST_NAME = 'localhost'; +String localFile(path) => Platform.script.resolve(path).toFilePath(); + +SecurityContext serverContext = new SecurityContext() + ..useCertificateChain(localFile('certificates/server_chain.pem')) + ..usePrivateKey(localFile('certificates/server_key.pem'), + password: 'dartdart'); + +SecurityContext clientContext = new SecurityContext() + ..setTrustedCertificates(file: localFile('certificates/trusted_certs.pem')); + /** * A SecurityConfiguration lets us run the tests over HTTP or HTTPS. */ @@ -36,13 +46,14 @@ class SecurityConfiguration { Future createServer({int backlog: 0}) => secure ? HttpServer.bindSecure(HOST_NAME, 0, - backlog: backlog, - certificateName: CERT_NAME) + serverContext, + backlog: backlog) : HttpServer.bind(HOST_NAME, 0, backlog: backlog); Future createClient(int port) => + // TODO(whesse): Add a client context argument to WebSocket.connect. WebSocket.connect('${secure ? "wss" : "ws"}://$HOST_NAME:$port/'); @@ -89,18 +100,11 @@ class SecurityConfiguration { } -void initializeSSL() { - var testPkcertDatabase = Platform.script.resolve('pkcert').toFilePath(); - SecureSocket.initialize(database: testPkcertDatabase, - password: "dartdart"); -} - - main() { asyncStart(); new SecurityConfiguration(secure: false).runTests(); - initializeSSL(); - new SecurityConfiguration(secure: true).runTests(); + // TODO(whesse): WebSocket.connect needs an optional context: parameter + // new SecurityConfiguration(secure: true).runTests(); asyncEnd(); } diff --git a/tests/standalone/io/web_socket_protocol_processor_test.dart b/tests/standalone/io/web_socket_protocol_processor_test.dart index 179fe0b6fa5..ba75622fc01 100644 --- a/tests/standalone/io/web_socket_protocol_processor_test.dart +++ b/tests/standalone/io/web_socket_protocol_processor_test.dart @@ -36,6 +36,7 @@ part "../../../sdk/lib/io/platform_impl.dart"; part "../../../sdk/lib/io/service_object.dart"; part "../../../sdk/lib/io/secure_socket.dart"; part "../../../sdk/lib/io/secure_server_socket.dart"; +part "../../../sdk/lib/io/security_context.dart"; part "../../../sdk/lib/io/socket.dart"; part "../../../sdk/lib/io/websocket.dart"; part "../../../sdk/lib/io/websocket_impl.dart"; diff --git a/tests/standalone/io/web_socket_test.dart b/tests/standalone/io/web_socket_test.dart index ae383078ac6..695b2cd0a3b 100644 --- a/tests/standalone/io/web_socket_test.dart +++ b/tests/standalone/io/web_socket_test.dart @@ -19,9 +19,18 @@ import "package:path/path.dart"; const WEB_SOCKET_GUID = "258EAFA5-E914-47DA-95CA-C5AB0DC85B11"; -const String CERT_NAME = 'localhost_cert'; const String HOST_NAME = 'localhost'; +String localFile(path) => Platform.script.resolve(path).toFilePath(); + +SecurityContext serverContext = new SecurityContext() + ..useCertificateChain(localFile('certificates/server_chain.pem')) + ..usePrivateKey(localFile('certificates/server_key.pem'), + password: 'dartdart'); + +SecurityContext clientContext = new SecurityContext() + ..setTrustedCertificates(file: localFile('certificates/trusted_certs.pem')); + /** * A SecurityConfiguration lets us run the tests over HTTP or HTTPS. */ @@ -33,13 +42,14 @@ class SecurityConfiguration { Future createServer({int backlog: 0}) => secure ? HttpServer.bindSecure(HOST_NAME, 0, - backlog: backlog, - certificateName: CERT_NAME) + serverContext, + backlog: backlog) : HttpServer.bind(HOST_NAME, 0, backlog: backlog); Future createClient(int port) => + // TODO(whesse): Add client context argument to WebSocket.connect WebSocket.connect('${secure ? "wss" : "ws"}://$HOST_NAME:$port/'); checkCloseStatus(webSocket, closeStatus, closeReason) { @@ -581,15 +591,8 @@ class SecurityConfiguration { } -void initializeSSL() { - var testPkcertDatabase = Platform.script.resolve('pkcert').toFilePath(); - SecureSocket.initialize(database: testPkcertDatabase, - password: "dartdart"); -} - - main() { new SecurityConfiguration(secure: false).runTests(); - initializeSSL(); - new SecurityConfiguration(secure: true).runTests(); + // TODO(whesse): Make WebSocket.connect() take an optional context: parameter. + // new SecurityConfiguration(secure: true).runTests(); } diff --git a/tests/standalone/standalone.status b/tests/standalone/standalone.status index 128613b66fa..9bb7ec381f6 100644 --- a/tests/standalone/standalone.status +++ b/tests/standalone/standalone.status @@ -196,3 +196,11 @@ io/http_client_connect_test: Crash # (static Iterable +#include +#include + + +OPENSSL_COMPILE_ASSERT(ERR_LIB_NONE == 1, library_values_changed_1); +OPENSSL_COMPILE_ASSERT(ERR_LIB_SYS == 2, library_values_changed_2); +OPENSSL_COMPILE_ASSERT(ERR_LIB_BN == 3, library_values_changed_3); +OPENSSL_COMPILE_ASSERT(ERR_LIB_RSA == 4, library_values_changed_4); +OPENSSL_COMPILE_ASSERT(ERR_LIB_DH == 5, library_values_changed_5); +OPENSSL_COMPILE_ASSERT(ERR_LIB_EVP == 6, library_values_changed_6); +OPENSSL_COMPILE_ASSERT(ERR_LIB_BUF == 7, library_values_changed_7); +OPENSSL_COMPILE_ASSERT(ERR_LIB_OBJ == 8, library_values_changed_8); +OPENSSL_COMPILE_ASSERT(ERR_LIB_PEM == 9, library_values_changed_9); +OPENSSL_COMPILE_ASSERT(ERR_LIB_DSA == 10, library_values_changed_10); +OPENSSL_COMPILE_ASSERT(ERR_LIB_X509 == 11, library_values_changed_11); +OPENSSL_COMPILE_ASSERT(ERR_LIB_ASN1 == 12, library_values_changed_12); +OPENSSL_COMPILE_ASSERT(ERR_LIB_CONF == 13, library_values_changed_13); +OPENSSL_COMPILE_ASSERT(ERR_LIB_CRYPTO == 14, library_values_changed_14); +OPENSSL_COMPILE_ASSERT(ERR_LIB_EC == 15, library_values_changed_15); +OPENSSL_COMPILE_ASSERT(ERR_LIB_SSL == 16, library_values_changed_16); +OPENSSL_COMPILE_ASSERT(ERR_LIB_BIO == 17, library_values_changed_17); +OPENSSL_COMPILE_ASSERT(ERR_LIB_PKCS7 == 18, library_values_changed_18); +OPENSSL_COMPILE_ASSERT(ERR_LIB_PKCS8 == 19, library_values_changed_19); +OPENSSL_COMPILE_ASSERT(ERR_LIB_X509V3 == 20, library_values_changed_20); +OPENSSL_COMPILE_ASSERT(ERR_LIB_RAND == 21, library_values_changed_21); +OPENSSL_COMPILE_ASSERT(ERR_LIB_ENGINE == 22, library_values_changed_22); +OPENSSL_COMPILE_ASSERT(ERR_LIB_OCSP == 23, library_values_changed_23); +OPENSSL_COMPILE_ASSERT(ERR_LIB_UI == 24, library_values_changed_24); +OPENSSL_COMPILE_ASSERT(ERR_LIB_COMP == 25, library_values_changed_25); +OPENSSL_COMPILE_ASSERT(ERR_LIB_ECDSA == 26, library_values_changed_26); +OPENSSL_COMPILE_ASSERT(ERR_LIB_ECDH == 27, library_values_changed_27); +OPENSSL_COMPILE_ASSERT(ERR_LIB_HMAC == 28, library_values_changed_28); +OPENSSL_COMPILE_ASSERT(ERR_LIB_DIGEST == 29, library_values_changed_29); +OPENSSL_COMPILE_ASSERT(ERR_LIB_CIPHER == 30, library_values_changed_30); +OPENSSL_COMPILE_ASSERT(ERR_LIB_USER == 31, library_values_changed_31); +OPENSSL_COMPILE_ASSERT(ERR_LIB_HKDF == 32, library_values_changed_32); +OPENSSL_COMPILE_ASSERT(ERR_NUM_LIBS == 33, library_values_changed_num); + +const uint32_t kOpenSSLFunctionValues[] = { + 0xc32054b, + 0xc328556, + 0xc330561, + 0xc33856e, + 0xc340578, + 0xc348582, + 0xc350589, + 0xc358595, + 0xc36059c, + 0xc3685b2, + 0xc3705d1, + 0xc3785e2, + 0xc3805f2, + 0xc38860c, + 0xc390621, + 0xc398630, + 0xc3a0649, + 0xc3a865d, + 0xc3b0669, + 0xc3b8670, + 0xc3c0678, + 0xc3c8690, + 0xc3d0698, + 0xc3d86a0, + 0xc3e06ab, + 0xc3e85c7, + 0xc3f0686, + 0x1032193d, + 0x10329954, + 0x1033196d, + 0x10339983, + 0x10341993, + 0x103499bb, + 0x103519c9, + 0x103599d8, + 0x103619f8, + 0x10369a17, + 0x10371a34, + 0x10379a51, + 0x10381a66, + 0x10389a88, + 0x10391aa7, + 0x10399ac6, + 0x103a1add, + 0x103a9af4, + 0x103b1afd, + 0x103b9b08, + 0x103c1b22, + 0x103c9b2a, + 0x103d1b32, + 0x103d99a6, + 0x103e1b4b, + 0x103e9b5d, + 0x103f1b70, + 0x103f9b79, + 0x10401b39, + 0x14320a4e, + 0x14328a5c, + 0x14330a68, + 0x14338a75, + 0x18361224, + 0x18371252, + 0x18379263, + 0x18381279, + 0x1839129c, + 0x183992b1, + 0x183a12c3, + 0x183c1307, + 0x183c9315, + 0x183d1328, + 0x183d9338, + 0x183e935e, + 0x183f1371, + 0x183f9380, + 0x184093aa, + 0x18411416, + 0x18419427, + 0x1842143a, + 0x1842944c, + 0x1843145e, + 0x1843946f, + 0x18441480, + 0x18449491, + 0x184514a2, + 0x184594af, + 0x184614d1, + 0x184694e4, + 0x184714f8, + 0x18479505, + 0x18481514, + 0x18489523, + 0x18491534, + 0x18499550, + 0x184a155e, + 0x184a956f, + 0x184b1580, + 0x184b958e, + 0x184c159e, + 0x184c95c4, + 0x184d15d3, + 0x184d95e3, + 0x184e15f3, + 0x184e9602, + 0x184f1541, + 0x184f91b3, + 0x18501156, + 0x1850916e, + 0x18511190, + 0x185191a2, + 0x185211d4, + 0x185291ed, + 0x185311fe, + 0x18539214, + 0x18541239, + 0x1854928a, + 0x185512d3, + 0x185592e8, + 0x185612f5, + 0x1856934d, + 0x18571390, + 0x1857939d, + 0x185813b9, + 0x185893ca, + 0x185913da, + 0x185993ea, + 0x185a13f9, + 0x185a9408, + 0x185b14bd, + 0x1c3206b8, + 0x1c3286c4, + 0x1c3306cf, + 0x1c3386db, + 0x20321616, + 0x20329621, + 0x20331629, + 0x20339635, + 0x24321641, + 0x2432964f, + 0x24331661, + 0x24339670, + 0x24341683, + 0x24349696, + 0x243516ad, + 0x243596c5, + 0x243616d3, + 0x243696eb, + 0x243716f4, + 0x24379706, + 0x2438171a, + 0x24389727, + 0x2439173d, + 0x24399755, + 0x243a176d, + 0x243a9777, + 0x243b178c, + 0x243b979a, + 0x243c17b2, + 0x243c97c9, + 0x243d17d4, + 0x243d97e2, + 0x28320aae, + 0x28328abd, + 0x28330ac8, + 0x28338acd, + 0x28340ad8, + 0x2c322b99, + 0x2c32aba5, + 0x2c332bb8, + 0x2c33abc9, + 0x2c342be2, + 0x2c34ac0a, + 0x2c352c21, + 0x2c35ac3e, + 0x2c362c5b, + 0x2c36ac78, + 0x2c372c91, + 0x2c37acaa, + 0x2c382cc0, + 0x2c38acce, + 0x2c392ce0, + 0x2c39acfd, + 0x2c3a2d1a, + 0x2c3aad28, + 0x2c3b2d46, + 0x2c3bad64, + 0x2c3c2d7f, + 0x2c3cad93, + 0x2c3d2da5, + 0x2c3dadb5, + 0x2c3e2dc3, + 0x2c3eadd3, + 0x2c3f2de3, + 0x2c3fae22, + 0x2c402e33, + 0x2c40ae4e, + 0x2c412e62, + 0x2c41ae75, + 0x2c422e94, + 0x2c42aea8, + 0x2c432ebb, + 0x2c43aeca, + 0x2c442ed9, + 0x2c44aef0, + 0x2c452f0b, + 0x2c45af23, + 0x2c462f37, + 0x2c46af4a, + 0x2c472f5b, + 0x2c47af6c, + 0x2c482f7d, + 0x2c48af8e, + 0x2c492f9d, + 0x2c49afaa, + 0x2c4a2fb7, + 0x2c4aafc4, + 0x2c4b2fcd, + 0x2c4bafe1, + 0x2c4c2ff0, + 0x2c4caffe, + 0x2c4d3020, + 0x2c4db031, + 0x2c4e3042, + 0x2c4eb00d, + 0x2c4f2bfb, + 0x2c4fadfe, + 0x2c502e10, + 0x30320000, + 0x30328018, + 0x3033002c, + 0x30338042, + 0x3034005b, + 0x3034806c, + 0x3035007f, + 0x3035808f, + 0x3036009d, + 0x303680b3, + 0x303700c3, + 0x303780d8, + 0x303800e6, + 0x303880f7, + 0x30390103, + 0x3039810c, + 0x303a011d, + 0x303a812d, + 0x303b013a, + 0x303b8146, + 0x303c0157, + 0x303c8165, + 0x303d0176, + 0x303d8188, + 0x303e0199, + 0x303e81a8, + 0x303f01b9, + 0x303f81cd, + 0x304001df, + 0x304081ec, + 0x30410202, + 0x30418215, + 0x30420225, + 0x30428239, + 0x3043024a, + 0x3043825a, + 0x30440265, + 0x3044826d, + 0x3045027d, + 0x30458294, + 0x304602a1, + 0x304682b7, + 0x304702c9, + 0x304782d5, + 0x304802e1, + 0x304882ef, + 0x30490308, + 0x30498316, + 0x304a032b, + 0x304a8343, + 0x304b034d, + 0x304b8361, + 0x304c0372, + 0x304c8382, + 0x304d038f, + 0x304d83a0, + 0x304e03b0, + 0x304e83c2, + 0x304f03d3, + 0x304f83e2, + 0x305003f6, + 0x30508404, + 0x30510413, + 0x3051841c, + 0x343209d6, + 0x343289e6, + 0x343309f1, + 0x343389fe, + 0x38320a07, + 0x38328a1f, + 0x38330a32, + 0x38338a3c, + 0x3c320aeb, + 0x3c328af9, + 0x3c330b10, + 0x3c338b24, + 0x3c340b56, + 0x3c348b67, + 0x3c350b73, + 0x3c358ba0, + 0x3c360bb2, + 0x3c368bdb, + 0x3c370be8, + 0x3c378bf5, + 0x3c380c03, + 0x3c388c10, + 0x3c390c1d, + 0x3c398c41, + 0x3c3a0c51, + 0x3c3a8c69, + 0x3c3b0c7e, + 0x3c3b8c93, + 0x3c3c0ca0, + 0x3c3c8cb3, + 0x3c3d0cc6, + 0x3c3d8cea, + 0x3c3e0d12, + 0x3c3e8d2b, + 0x3c3f0d41, + 0x3c3f8d4e, + 0x3c400d61, + 0x3c408d72, + 0x3c410d83, + 0x3c418d9c, + 0x3c420db5, + 0x3c428dcb, + 0x3c430de8, + 0x3c438dfe, + 0x3c440e82, + 0x3c448ea9, + 0x3c450ec7, + 0x3c458ee1, + 0x3c460ef9, + 0x3c468f11, + 0x3c470f3c, + 0x3c478f67, + 0x3c480f88, + 0x3c488fb1, + 0x3c490fcc, + 0x3c498ff5, + 0x3c4a1002, + 0x3c4a9019, + 0x3c4b1030, + 0x3c4b9059, + 0x3c4c1069, + 0x3c4c9075, + 0x3c4d108d, + 0x3c4d90a0, + 0x3c4e10b1, + 0x3c4e90c2, + 0x3c4f10e8, + 0x3c4f8adf, + 0x3c500e1a, + 0x3c508e3a, + 0x3c510e67, + 0x3c518fe7, + 0x3c5210d2, + 0x3c528b87, + 0x3c530b3f, + 0x40321bb9, + 0x40329bf2, + 0x40331c1a, + 0x40339c32, + 0x40341c50, + 0x40349cb6, + 0x40351ccd, + 0x40359ce9, + 0x40361d05, + 0x40369d1f, + 0x40371d3e, + 0x40379d5d, + 0x40381d75, + 0x40389d92, + 0x40391db5, + 0x40399dd2, + 0x403a1e07, + 0x403a9e61, + 0x403b1e76, + 0x403b9e92, + 0x403c1eac, + 0x403c9eb7, + 0x403d1eda, + 0x403d9efe, + 0x403e1f14, + 0x403e9f1e, + 0x403f1f2a, + 0x403f9f3b, + 0x40401f53, + 0x40409f5b, + 0x40411f64, + 0x40419f6d, + 0x40421f95, + 0x40429fa9, + 0x40431fb4, + 0x40439fc0, + 0x40442014, + 0x4044a020, + 0x4045202d, + 0x4045a040, + 0x40462058, + 0x4046a070, + 0x40472086, + 0x4047a0a1, + 0x404820bc, + 0x4048a0d0, + 0x404920e9, + 0x4049a102, + 0x404a211c, + 0x404aa126, + 0x404b1e29, + 0x404b9e48, + 0x404c2136, + 0x404ca144, + 0x404d2151, + 0x404da165, + 0x404e217d, + 0x404ea18b, + 0x404f21b5, + 0x404fa1cc, + 0x405021de, + 0x4050a20f, + 0x40512240, + 0x4051a255, + 0x40522278, + 0x4052a298, + 0x405322ad, + 0x4053a2bd, + 0x4054a2c9, + 0x405522df, + 0x4055a31f, + 0x4056232c, + 0x4056a336, + 0x40572344, + 0x4057a35f, + 0x4058237a, + 0x4058a399, + 0x405923ae, + 0x4059a3c3, + 0x405a23e0, + 0x405aa3f4, + 0x405b2410, + 0x405ba426, + 0x405c2443, + 0x405ca455, + 0x405d246c, + 0x405da47d, + 0x405e2499, + 0x405ea4ad, + 0x405f24bd, + 0x405fa4d9, + 0x406024ee, + 0x4060a504, + 0x40612521, + 0x4061a53a, + 0x40622564, + 0x4062a56d, + 0x4063257d, + 0x4063a5b6, + 0x406425cc, + 0x4064a5ea, + 0x406525ff, + 0x4065a61c, + 0x40662633, + 0x4066a651, + 0x4067266e, + 0x4067a685, + 0x406826a3, + 0x4068a6ba, + 0x406926d2, + 0x4069a6e3, + 0x406a26f6, + 0x406aa709, + 0x406b271d, + 0x406ba741, + 0x406c275c, + 0x406ca77d, + 0x406d27a1, + 0x406da7bc, + 0x406e27dd, + 0x406ea7f2, + 0x406f280b, + 0x406fa818, + 0x40702826, + 0x4070a833, + 0x40712850, + 0x4071a870, + 0x4072288b, + 0x4072a8a4, + 0x407328bb, + 0x4073a8d5, + 0x407428f9, + 0x4074a90f, + 0x40752923, + 0x4075a938, + 0x40762952, + 0x4076a964, + 0x40772979, + 0x4077a99f, + 0x407829bc, + 0x4078a9df, + 0x40792a05, + 0x4079aa22, + 0x407a2a45, + 0x407aaa61, + 0x407b2a7d, + 0x407baa8f, + 0x407c2a9c, + 0x407e2aa9, + 0x407eaabf, + 0x407f2ad7, + 0x407faaea, + 0x40802aff, + 0x4080ab18, + 0x40812b36, + 0x4081ab56, + 0x40822b5f, + 0x4082ab7b, + 0x40832b84, + 0x4083a19a, + 0x40842229, + 0x4084a1f9, + 0x408525a5, + 0x4085a589, + 0x40861c8e, + 0x40869ca1, + 0x40871ff4, + 0x4087a003, + 0x40881bfe, + 0x40889f7d, + 0x40891fdb, + 0x4089a54d, + 0x408a1b84, + 0x408a9b95, + 0x408b1ba7, + 0x408ba266, + 0x408c1df0, + 0x408c9e17, + 0x408d22fd, + 0x408d9bd3, + 0x408e1c6f, + 0x4432042a, + 0x4432843c, + 0x44330445, + 0x4433844d, + 0x4434045a, + 0x44348475, + 0x44350490, + 0x443584b0, + 0x443604cc, + 0x443684ed, + 0x443704f4, + 0x44378502, + 0x4438050c, + 0x44388518, + 0x44390522, + 0x4439852d, + 0x443a0537, + 0x443a8541, + 0x443b046a, + 0x4c3217ea, + 0x4c3297f9, + 0x4c331808, + 0x4c339821, + 0x4c34183c, + 0x4c349858, + 0x4c35186a, + 0x4c359878, + 0x4c36188d, + 0x4c36989e, + 0x4c3718ac, + 0x4c3798ba, + 0x4c3818cc, + 0x4c3898dc, + 0x4c3918e6, + 0x4c3998fe, + 0x4c3a1916, + 0x4c3a9929, + 0x50323053, + 0x5032b068, + 0x50333079, + 0x5033b08c, + 0x5034309d, + 0x5034b0b0, + 0x503530bf, + 0x5035b0d4, + 0x503630e4, + 0x5036b0f3, + 0x50373104, + 0x5037b114, + 0x50383125, + 0x5038b138, + 0x5039314a, + 0x5039b160, + 0x503a3172, + 0x503ab183, + 0x503b3194, + 0x503bb1a5, + 0x503c31b0, + 0x503cb1bc, + 0x503d31c7, + 0x503db1d2, + 0x503e31df, + 0x503eb1f4, + 0x503f3202, + 0x503fb216, + 0x50403229, + 0x5040b23a, + 0x50413254, + 0x5041b263, + 0x5042326c, + 0x5042b27b, + 0x5043328d, + 0x5043b299, + 0x504432a1, + 0x5044b2b4, + 0x504532c5, + 0x5045b2db, + 0x504632e7, + 0x5046b2fb, + 0x50473309, + 0x5047b31d, + 0x50483337, + 0x5048b34b, + 0x50493361, + 0x5049b378, + 0x504a338a, + 0x504ab39e, + 0x504b33b3, + 0x504bb3ca, + 0x504c33de, + 0x504cb3e7, + 0x504d33ef, + 0x504db3fe, + 0x504e340e, + 0x68321109, + 0x6832911a, + 0x6833112a, + 0x68339138, + 0x68341145, + 0x6c3210f8, + 0x74320a89, + 0x74328a9b, + 0x783206e8, + 0x7832871b, + 0x7833072d, + 0x7833873f, + 0x78340753, + 0x78348767, + 0x78350785, + 0x78358797, + 0x783607ab, + 0x78368819, + 0x7837082b, + 0x7837883d, + 0x7838084f, + 0x78388866, + 0x7839087d, + 0x78398894, + 0x783a08b0, + 0x783a88cc, + 0x783b08e8, + 0x783b88fe, + 0x783c0914, + 0x783c892a, + 0x783d0947, + 0x783d8956, + 0x783e0965, + 0x783e8974, + 0x783f0990, + 0x783f899e, + 0x784009ac, + 0x784089ba, + 0x784109c7, + 0x784186fa, + 0x784207bf, + 0x784287dd, + 0x784307fb, + 0x80321611, +}; + +const size_t kOpenSSLFunctionValuesLen = sizeof(kOpenSSLFunctionValues) / sizeof(kOpenSSLFunctionValues[0]); + +const char kOpenSSLFunctionStringData[] = + "ASN1_BIT_STRING_set_bit\0" + "ASN1_ENUMERATED_set\0" + "ASN1_ENUMERATED_to_BN\0" + "ASN1_GENERALIZEDTIME_adj\0" + "ASN1_INTEGER_set\0" + "ASN1_INTEGER_to_BN\0" + "ASN1_OBJECT_new\0" + "ASN1_PCTX_new\0" + "ASN1_STRING_TABLE_add\0" + "ASN1_STRING_set\0" + "ASN1_STRING_type_new\0" + "ASN1_TIME_adj\0" + "ASN1_UTCTIME_adj\0" + "ASN1_d2i_fp\0" + "ASN1_dup\0" + "ASN1_generate_v3\0" + "ASN1_get_object\0" + "ASN1_i2d_bio\0" + "ASN1_i2d_fp\0" + "ASN1_item_d2i_fp\0" + "ASN1_item_dup\0" + "ASN1_item_ex_d2i\0" + "ASN1_item_i2d_bio\0" + "ASN1_item_i2d_fp\0" + "ASN1_item_pack\0" + "ASN1_item_unpack\0" + "ASN1_mbstring_ncopy\0" + "ASN1_template_new\0" + "BIO_new_NDEF\0" + "BN_to_ASN1_ENUMERATED\0" + "BN_to_ASN1_INTEGER\0" + "a2d_ASN1_OBJECT\0" + "a2i_ASN1_ENUMERATED\0" + "a2i_ASN1_INTEGER\0" + "a2i_ASN1_STRING\0" + "append_exp\0" + "asn1_cb\0" + "asn1_check_tlen\0" + "asn1_collate_primitive\0" + "asn1_collect\0" + "asn1_d2i_ex_primitive\0" + "asn1_d2i_read_bio\0" + "asn1_do_adb\0" + "asn1_ex_c2i\0" + "asn1_find_end\0" + "asn1_item_ex_combine_new\0" + "asn1_str2type\0" + "asn1_template_ex_d2i\0" + "asn1_template_noexp_d2i\0" + "bitstr_cb\0" + "c2i_ASN1_BIT_STRING\0" + "c2i_ASN1_INTEGER\0" + "c2i_ASN1_OBJECT\0" + "collect_data\0" + "d2i_ASN1_BOOLEAN\0" + "d2i_ASN1_OBJECT\0" + "d2i_ASN1_UINTEGER\0" + "d2i_ASN1_UTCTIME\0" + "d2i_ASN1_bytes\0" + "d2i_ASN1_type_bytes\0" + "i2d_ASN1_TIME\0" + "i2d_PrivateKey\0" + "long_c2i\0" + "parse_tagging\0" + "BIO_callback_ctrl\0" + "BIO_ctrl\0" + "BIO_new\0" + "BIO_new_file\0" + "BIO_new_mem_buf\0" + "BIO_printf\0" + "BIO_zero_copy_get_read_buf\0" + "BIO_zero_copy_get_read_buf_done\0" + "BIO_zero_copy_get_write_buf\0" + "BIO_zero_copy_get_write_buf_done\0" + "bio_io\0" + "bio_make_pair\0" + "bio_write\0" + "buffer_ctrl\0" + "conn_ctrl\0" + "conn_state\0" + "file_ctrl\0" + "file_read\0" + "mem_write\0" + "BN_CTX_get\0" + "BN_CTX_new\0" + "BN_CTX_start\0" + "BN_bn2dec\0" + "BN_bn2hex\0" + "BN_div\0" + "BN_div_recp\0" + "BN_exp\0" + "BN_generate_dsa_nonce\0" + "BN_generate_prime_ex\0" + "BN_lshift\0" + "BN_mod_exp2_mont\0" + "BN_mod_exp_mont\0" + "BN_mod_exp_mont_consttime\0" + "BN_mod_exp_mont_word\0" + "BN_mod_inverse\0" + "BN_mod_inverse_no_branch\0" + "BN_mod_lshift_quick\0" + "BN_mod_sqrt\0" + "BN_new\0" + "BN_rand\0" + "BN_rand_range\0" + "BN_rshift\0" + "BN_sqrt\0" + "BN_usub\0" + "bn_wexpand\0" + "mod_exp_recp\0" + "BUF_MEM_new\0" + "BUF_memdup\0" + "BUF_strndup\0" + "buf_mem_grow\0" + "EVP_AEAD_CTX_init\0" + "EVP_AEAD_CTX_init_with_direction\0" + "EVP_AEAD_CTX_open\0" + "EVP_AEAD_CTX_seal\0" + "EVP_CIPHER_CTX_copy\0" + "EVP_CIPHER_CTX_ctrl\0" + "EVP_CIPHER_CTX_set_key_length\0" + "EVP_CipherInit_ex\0" + "EVP_DecryptFinal_ex\0" + "EVP_EncryptFinal_ex\0" + "aead_aes_ctr_hmac_sha256_init\0" + "aead_aes_ctr_hmac_sha256_open\0" + "aead_aes_ctr_hmac_sha256_seal\0" + "aead_aes_gcm_init\0" + "aead_aes_gcm_open\0" + "aead_aes_gcm_seal\0" + "aead_aes_key_wrap_init\0" + "aead_aes_key_wrap_open\0" + "aead_aes_key_wrap_seal\0" + "aead_chacha20_poly1305_init\0" + "aead_chacha20_poly1305_open\0" + "aead_chacha20_poly1305_seal\0" + "aead_rc4_md5_tls_init\0" + "aead_rc4_md5_tls_open\0" + "aead_rc4_md5_tls_seal\0" + "aead_ssl3_ensure_cipher_init\0" + "aead_ssl3_init\0" + "aead_ssl3_open\0" + "aead_ssl3_seal\0" + "aead_tls_ensure_cipher_init\0" + "aead_tls_init\0" + "aead_tls_open\0" + "aead_tls_seal\0" + "aes_init_key\0" + "aesni_init_key\0" + "CONF_parse_list\0" + "NCONF_load\0" + "def_load_bio\0" + "str_copy\0" + "CRYPTO_get_ex_new_index\0" + "CRYPTO_set_ex_data\0" + "get_class\0" + "get_func_pointers\0" + "DH_new_method\0" + "compute_key\0" + "generate_key\0" + "generate_parameters\0" + "EVP_DigestInit_ex\0" + "EVP_MD_CTX_copy_ex\0" + "DSA_new_method\0" + "dsa_sig_cb\0" + "sign\0" + "sign_setup\0" + "verify\0" + "BN_to_felem\0" + "EC_GROUP_copy\0" + "EC_GROUP_get_curve_GFp\0" + "EC_GROUP_get_degree\0" + "EC_GROUP_new_by_curve_name\0" + "EC_GROUP_new_curve_GFp\0" + "EC_KEY_check_key\0" + "EC_KEY_copy\0" + "EC_KEY_generate_key\0" + "EC_KEY_new_by_curve_name\0" + "EC_KEY_new_method\0" + "EC_KEY_set_public_key_affine_coordinates\0" + "EC_POINT_add\0" + "EC_POINT_cmp\0" + "EC_POINT_copy\0" + "EC_POINT_dbl\0" + "EC_POINT_dup\0" + "EC_POINT_get_affine_coordinates_GFp\0" + "EC_POINT_invert\0" + "EC_POINT_is_at_infinity\0" + "EC_POINT_is_on_curve\0" + "EC_POINT_make_affine\0" + "EC_POINT_new\0" + "EC_POINT_oct2point\0" + "EC_POINT_point2oct\0" + "EC_POINT_set_affine_coordinates_GFp\0" + "EC_POINT_set_compressed_coordinates_GFp\0" + "EC_POINT_set_to_infinity\0" + "EC_POINTs_make_affine\0" + "compute_wNAF\0" + "d2i_ECPKParameters\0" + "d2i_ECParameters\0" + "d2i_ECPrivateKey\0" + "ec_GFp_mont_field_decode\0" + "ec_GFp_mont_field_encode\0" + "ec_GFp_mont_field_mul\0" + "ec_GFp_mont_field_set_to_one\0" + "ec_GFp_mont_field_sqr\0" + "ec_GFp_mont_group_set_curve\0" + "ec_GFp_nistp256_group_set_curve\0" + "ec_GFp_nistp256_point_get_affine_coordinates\0" + "ec_GFp_nistp256_points_mul\0" + "ec_GFp_simple_group_check_discriminant\0" + "ec_GFp_simple_group_set_curve\0" + "ec_GFp_simple_make_affine\0" + "ec_GFp_simple_oct2point\0" + "ec_GFp_simple_point2oct\0" + "ec_GFp_simple_point_get_affine_coordinates\0" + "ec_GFp_simple_point_set_affine_coordinates\0" + "ec_GFp_simple_points_make_affine\0" + "ec_GFp_simple_set_compressed_coordinates\0" + "ec_asn1_group2pkparameters\0" + "ec_asn1_pkparameters2group\0" + "ec_group_copy\0" + "ec_group_new\0" + "ec_group_new_curve_GFp\0" + "ec_group_new_from_data\0" + "ec_point_set_Jprojective_coordinates_GFp\0" + "ec_pre_comp_new\0" + "ec_wNAF_mul\0" + "ec_wNAF_precompute_mult\0" + "i2d_ECPKParameters\0" + "i2d_ECParameters\0" + "i2d_ECPrivateKey\0" + "i2o_ECPublicKey\0" + "nistp256_pre_comp_new\0" + "o2i_ECPublicKey\0" + "ECDH_compute_key\0" + "ECDSA_do_sign_ex\0" + "ECDSA_do_verify\0" + "ECDSA_sign_ex\0" + "digest_to_bn\0" + "ecdsa_sign_setup\0" + "EVP_DigestSignAlgorithm\0" + "EVP_DigestVerifyInitFromAlgorithm\0" + "EVP_PKEY_CTX_ctrl\0" + "EVP_PKEY_CTX_dup\0" + "EVP_PKEY_CTX_get0_rsa_oaep_label\0" + "EVP_PKEY_copy_parameters\0" + "EVP_PKEY_decrypt\0" + "EVP_PKEY_decrypt_init\0" + "EVP_PKEY_derive\0" + "EVP_PKEY_derive_init\0" + "EVP_PKEY_derive_set_peer\0" + "EVP_PKEY_encrypt\0" + "EVP_PKEY_encrypt_init\0" + "EVP_PKEY_get1_DH\0" + "EVP_PKEY_get1_DSA\0" + "EVP_PKEY_get1_EC_KEY\0" + "EVP_PKEY_get1_RSA\0" + "EVP_PKEY_keygen\0" + "EVP_PKEY_keygen_init\0" + "EVP_PKEY_new\0" + "EVP_PKEY_set_type\0" + "EVP_PKEY_sign\0" + "EVP_PKEY_sign_init\0" + "EVP_PKEY_verify\0" + "EVP_PKEY_verify_init\0" + "check_padding_md\0" + "d2i_AutoPrivateKey\0" + "d2i_PrivateKey\0" + "do_EC_KEY_print\0" + "do_dsa_print\0" + "do_rsa_print\0" + "do_sigver_init\0" + "dsa_param_decode\0" + "dsa_priv_decode\0" + "dsa_priv_encode\0" + "dsa_pub_decode\0" + "dsa_pub_encode\0" + "dsa_sig_print\0" + "eckey_param2type\0" + "eckey_param_decode\0" + "eckey_priv_decode\0" + "eckey_priv_encode\0" + "eckey_pub_decode\0" + "eckey_pub_encode\0" + "eckey_type2param\0" + "evp_pkey_ctx_new\0" + "hmac_signctx\0" + "i2d_PublicKey\0" + "old_dsa_priv_decode\0" + "old_ec_priv_decode\0" + "old_rsa_priv_decode\0" + "pkey_ec_ctrl\0" + "pkey_ec_derive\0" + "pkey_ec_keygen\0" + "pkey_ec_paramgen\0" + "pkey_ec_sign\0" + "pkey_hmac_ctrl\0" + "pkey_rsa_ctrl\0" + "pkey_rsa_decrypt\0" + "pkey_rsa_encrypt\0" + "pkey_rsa_sign\0" + "rsa_algor_to_md\0" + "rsa_digest_verify_init_from_algorithm\0" + "rsa_mgf1_to_md\0" + "rsa_priv_decode\0" + "rsa_priv_encode\0" + "rsa_pss_to_ctx\0" + "rsa_pub_decode\0" + "HKDF\0" + "OBJ_create\0" + "OBJ_dup\0" + "OBJ_nid2obj\0" + "OBJ_txt2obj\0" + "PEM_ASN1_read\0" + "PEM_ASN1_read_bio\0" + "PEM_ASN1_write\0" + "PEM_ASN1_write_bio\0" + "PEM_X509_INFO_read\0" + "PEM_X509_INFO_read_bio\0" + "PEM_X509_INFO_write_bio\0" + "PEM_do_header\0" + "PEM_get_EVP_CIPHER_INFO\0" + "PEM_read\0" + "PEM_read_DHparams\0" + "PEM_read_PrivateKey\0" + "PEM_read_bio\0" + "PEM_read_bio_DHparams\0" + "PEM_read_bio_Parameters\0" + "PEM_read_bio_PrivateKey\0" + "PEM_write\0" + "PEM_write_PrivateKey\0" + "PEM_write_bio\0" + "d2i_PKCS8PrivateKey_bio\0" + "d2i_PKCS8PrivateKey_fp\0" + "do_pk8pkey\0" + "do_pk8pkey_fp\0" + "load_iv\0" + "EVP_PKCS82PKEY\0" + "EVP_PKEY2PKCS8\0" + "PKCS12_get_key_and_certs\0" + "PKCS12_handle_content_info\0" + "PKCS12_handle_content_infos\0" + "PKCS5_pbe2_set_iv\0" + "PKCS5_pbe_set\0" + "PKCS5_pbe_set0_algor\0" + "PKCS5_pbkdf2_set\0" + "PKCS8_decrypt\0" + "PKCS8_encrypt\0" + "PKCS8_encrypt_pbe\0" + "pbe_cipher_init\0" + "pbe_crypt\0" + "pkcs12_item_decrypt_d2i\0" + "pkcs12_item_i2d_encrypt\0" + "pkcs12_key_gen_raw\0" + "pkcs12_pbe_keyivgen\0" + "BN_BLINDING_convert_ex\0" + "BN_BLINDING_create_param\0" + "BN_BLINDING_invert_ex\0" + "BN_BLINDING_new\0" + "BN_BLINDING_update\0" + "RSA_add_pkcs1_prefix\0" + "RSA_check_key\0" + "RSA_new_method\0" + "RSA_padding_add_PKCS1_OAEP_mgf1\0" + "RSA_padding_add_PKCS1_PSS_mgf1\0" + "RSA_padding_add_PKCS1_type_1\0" + "RSA_padding_add_PKCS1_type_2\0" + "RSA_padding_add_none\0" + "RSA_padding_check_PKCS1_OAEP_mgf1\0" + "RSA_padding_check_PKCS1_type_1\0" + "RSA_padding_check_PKCS1_type_2\0" + "RSA_padding_check_none\0" + "RSA_recover_crt_params\0" + "RSA_sign\0" + "RSA_verify\0" + "RSA_verify_PKCS1_PSS_mgf1\0" + "decrypt\0" + "encrypt\0" + "keygen\0" + "keygen_multiprime\0" + "private_transform\0" + "rsa_setup_blinding\0" + "sign_raw\0" + "verify_raw\0" + "SSL_AEAD_CTX_new\0" + "SSL_AEAD_CTX_open\0" + "SSL_AEAD_CTX_seal\0" + "SSL_CTX_check_private_key\0" + "SSL_CTX_get_tlsext_ticket_keys\0" + "SSL_CTX_new\0" + "SSL_CTX_set1_tls_channel_id\0" + "SSL_CTX_set_cipher_list\0" + "SSL_CTX_set_cipher_list_tls11\0" + "SSL_CTX_set_session_id_context\0" + "SSL_CTX_set_tlsext_ticket_keys\0" + "SSL_CTX_set_tmp_dh\0" + "SSL_CTX_set_tmp_ecdh\0" + "SSL_CTX_use_PrivateKey\0" + "SSL_CTX_use_PrivateKey_ASN1\0" + "SSL_CTX_use_PrivateKey_file\0" + "SSL_CTX_use_RSAPrivateKey\0" + "SSL_CTX_use_RSAPrivateKey_ASN1\0" + "SSL_CTX_use_RSAPrivateKey_file\0" + "SSL_CTX_use_certificate\0" + "SSL_CTX_use_certificate_ASN1\0" + "SSL_CTX_use_certificate_chain_file\0" + "SSL_CTX_use_certificate_file\0" + "SSL_CTX_use_psk_identity_hint\0" + "SSL_SESSION_from_bytes\0" + "SSL_SESSION_new\0" + "SSL_SESSION_parse\0" + "SSL_SESSION_parse_octet_string\0" + "SSL_SESSION_parse_string\0" + "SSL_SESSION_print_fp\0" + "SSL_SESSION_set1_id_context\0" + "SSL_SESSION_to_bytes_full\0" + "SSL_accept\0" + "SSL_add_dir_cert_subjects_to_stack\0" + "SSL_add_file_cert_subjects_to_stack\0" + "SSL_check_private_key\0" + "SSL_clear\0" + "SSL_connect\0" + "SSL_do_handshake\0" + "SSL_load_client_CA_file\0" + "SSL_new\0" + "SSL_peek\0" + "SSL_read\0" + "SSL_renegotiate\0" + "SSL_set1_tls_channel_id\0" + "SSL_set_cipher_list\0" + "SSL_set_fd\0" + "SSL_set_rfd\0" + "SSL_set_session_id_context\0" + "SSL_set_tlsext_host_name\0" + "SSL_set_tmp_dh\0" + "SSL_set_tmp_ecdh\0" + "SSL_set_wfd\0" + "SSL_shutdown\0" + "SSL_use_PrivateKey\0" + "SSL_use_PrivateKey_ASN1\0" + "SSL_use_PrivateKey_file\0" + "SSL_use_RSAPrivateKey\0" + "SSL_use_RSAPrivateKey_ASN1\0" + "SSL_use_RSAPrivateKey_file\0" + "SSL_use_certificate\0" + "SSL_use_certificate_ASN1\0" + "SSL_use_certificate_file\0" + "SSL_use_psk_identity_hint\0" + "SSL_write\0" + "d2i_SSL_SESSION\0" + "do_ssl3_write\0" + "dtls1_accept\0" + "dtls1_buffer_record\0" + "dtls1_check_timeout_num\0" + "dtls1_connect\0" + "dtls1_do_write\0" + "dtls1_get_buffered_message\0" + "dtls1_get_hello_verify\0" + "dtls1_get_message\0" + "dtls1_get_message_fragment\0" + "dtls1_hm_fragment_new\0" + "dtls1_preprocess_fragment\0" + "dtls1_process_fragment\0" + "dtls1_process_record\0" + "dtls1_read_bytes\0" + "dtls1_seal_record\0" + "dtls1_send_hello_verify_request\0" + "dtls1_write_app_data\0" + "i2d_SSL_SESSION\0" + "ssl3_accept\0" + "ssl3_cert_verify_hash\0" + "ssl3_check_cert_and_algorithm\0" + "ssl3_check_certificate_for_cipher\0" + "ssl3_connect\0" + "ssl3_ctrl\0" + "ssl3_ctx_ctrl\0" + "ssl3_digest_cached_records\0" + "ssl3_do_change_cipher_spec\0" + "ssl3_expect_change_cipher_spec\0" + "ssl3_get_cert_status\0" + "ssl3_get_cert_verify\0" + "ssl3_get_certificate_request\0" + "ssl3_get_channel_id\0" + "ssl3_get_client_certificate\0" + "ssl3_get_client_hello\0" + "ssl3_get_client_key_exchange\0" + "ssl3_get_finished\0" + "ssl3_get_initial_bytes\0" + "ssl3_get_message\0" + "ssl3_get_new_session_ticket\0" + "ssl3_get_next_proto\0" + "ssl3_get_record\0" + "ssl3_get_server_certificate\0" + "ssl3_get_server_done\0" + "ssl3_get_server_hello\0" + "ssl3_get_server_key_exchange\0" + "ssl3_get_v2_client_hello\0" + "ssl3_handshake_mac\0" + "ssl3_output_cert_chain\0" + "ssl3_prf\0" + "ssl3_read_bytes\0" + "ssl3_read_n\0" + "ssl3_record_sequence_update\0" + "ssl3_seal_record\0" + "ssl3_send_cert_verify\0" + "ssl3_send_certificate_request\0" + "ssl3_send_channel_id\0" + "ssl3_send_client_certificate\0" + "ssl3_send_client_hello\0" + "ssl3_send_client_key_exchange\0" + "ssl3_send_server_certificate\0" + "ssl3_send_server_hello\0" + "ssl3_send_server_key_exchange\0" + "ssl3_setup_read_buffer\0" + "ssl3_setup_write_buffer\0" + "ssl3_write_bytes\0" + "ssl3_write_pending\0" + "ssl_add_cert_chain\0" + "ssl_add_cert_to_buf\0" + "ssl_add_clienthello_renegotiate_ext\0" + "ssl_add_clienthello_tlsext\0" + "ssl_add_clienthello_use_srtp_ext\0" + "ssl_add_serverhello_renegotiate_ext\0" + "ssl_add_serverhello_tlsext\0" + "ssl_add_serverhello_use_srtp_ext\0" + "ssl_build_cert_chain\0" + "ssl_bytes_to_cipher_list\0" + "ssl_cert_dup\0" + "ssl_cert_inst\0" + "ssl_cert_new\0" + "ssl_check_serverhello_tlsext\0" + "ssl_check_srvr_ecc_cert_and_alg\0" + "ssl_cipher_process_rulestr\0" + "ssl_cipher_strength_sort\0" + "ssl_create_cipher_list\0" + "ssl_ctx_log_master_secret\0" + "ssl_ctx_log_rsa_client_key_exchange\0" + "ssl_ctx_make_profiles\0" + "ssl_get_new_session\0" + "ssl_get_prev_session\0" + "ssl_get_server_cert_index\0" + "ssl_get_sign_pkey\0" + "ssl_init_wbio_buffer\0" + "ssl_parse_clienthello_renegotiate_ext\0" + "ssl_parse_clienthello_tlsext\0" + "ssl_parse_clienthello_use_srtp_ext\0" + "ssl_parse_serverhello_renegotiate_ext\0" + "ssl_parse_serverhello_tlsext\0" + "ssl_parse_serverhello_use_srtp_ext\0" + "ssl_scan_clienthello_tlsext\0" + "ssl_scan_serverhello_tlsext\0" + "ssl_sess_cert_new\0" + "ssl_set_cert\0" + "ssl_set_pkey\0" + "ssl_verify_cert_chain\0" + "tls12_check_peer_sigalg\0" + "tls1_aead_ctx_init\0" + "tls1_cert_verify_mac\0" + "tls1_change_cipher_state\0" + "tls1_change_cipher_state_aead\0" + "tls1_check_duplicate_extensions\0" + "tls1_enc\0" + "tls1_export_keying_material\0" + "tls1_prf\0" + "tls1_setup_key_block\0" + "ASN1_digest\0" + "ASN1_item_sign_ctx\0" + "ASN1_item_verify\0" + "NETSCAPE_SPKI_b64_decode\0" + "NETSCAPE_SPKI_b64_encode\0" + "PKCS7_get_CRLs\0" + "PKCS7_get_certificates\0" + "X509_ATTRIBUTE_create_by_NID\0" + "X509_ATTRIBUTE_create_by_OBJ\0" + "X509_ATTRIBUTE_create_by_txt\0" + "X509_ATTRIBUTE_get0_data\0" + "X509_ATTRIBUTE_set1_data\0" + "X509_CRL_add0_revoked\0" + "X509_CRL_diff\0" + "X509_CRL_print_fp\0" + "X509_EXTENSION_create_by_NID\0" + "X509_EXTENSION_create_by_OBJ\0" + "X509_INFO_new\0" + "X509_NAME_ENTRY_create_by_NID\0" + "X509_NAME_ENTRY_create_by_txt\0" + "X509_NAME_ENTRY_set_object\0" + "X509_NAME_add_entry\0" + "X509_NAME_oneline\0" + "X509_NAME_print\0" + "X509_PKEY_new\0" + "X509_PUBKEY_get\0" + "X509_PUBKEY_set\0" + "X509_REQ_check_private_key\0" + "X509_REQ_print_ex\0" + "X509_REQ_print_fp\0" + "X509_REQ_to_X509\0" + "X509_STORE_CTX_get1_issuer\0" + "X509_STORE_CTX_init\0" + "X509_STORE_CTX_new\0" + "X509_STORE_CTX_purpose_inherit\0" + "X509_STORE_add_cert\0" + "X509_STORE_add_crl\0" + "X509_TRUST_add\0" + "X509_TRUST_set\0" + "X509_check_private_key\0" + "X509_get_pubkey_parameters\0" + "X509_load_cert_crl_file\0" + "X509_load_cert_file\0" + "X509_load_crl_file\0" + "X509_print_ex_fp\0" + "X509_to_X509_REQ\0" + "X509_verify_cert\0" + "X509at_add1_attr\0" + "X509v3_add_ext\0" + "add_cert_dir\0" + "by_file_ctrl\0" + "check_policy\0" + "dir_ctrl\0" + "get_cert_by_subject\0" + "i2d_DSA_PUBKEY\0" + "i2d_EC_PUBKEY\0" + "i2d_RSA_PUBKEY\0" + "pkcs7_parse_header\0" + "x509_name_encode\0" + "x509_name_ex_d2i\0" + "x509_name_ex_new\0" + "SXNET_add_id_INTEGER\0" + "SXNET_add_id_asc\0" + "SXNET_add_id_ulong\0" + "SXNET_get_id_asc\0" + "SXNET_get_id_ulong\0" + "X509V3_EXT_add\0" + "X509V3_EXT_add_alias\0" + "X509V3_EXT_free\0" + "X509V3_EXT_i2d\0" + "X509V3_EXT_nconf\0" + "X509V3_add1_i2d\0" + "X509V3_add_value\0" + "X509V3_get_section\0" + "X509V3_get_string\0" + "X509V3_get_value_bool\0" + "X509V3_parse_list\0" + "X509_PURPOSE_add\0" + "X509_PURPOSE_set\0" + "a2i_GENERAL_NAME\0" + "copy_email\0" + "copy_issuer\0" + "do_dirname\0" + "do_ext_i2d\0" + "do_ext_nconf\0" + "gnames_from_sectname\0" + "hex_to_string\0" + "i2s_ASN1_ENUMERATED\0" + "i2s_ASN1_IA5STRING\0" + "i2s_ASN1_INTEGER\0" + "i2v_AUTHORITY_INFO_ACCESS\0" + "notice_section\0" + "nref_nos\0" + "policy_section\0" + "process_pci_value\0" + "r2i_certpol\0" + "r2i_pci\0" + "s2i_ASN1_IA5STRING\0" + "s2i_ASN1_INTEGER\0" + "s2i_ASN1_OCTET_STRING\0" + "s2i_skey_id\0" + "set_dist_point_name\0" + "string_to_hex\0" + "v2i_ASN1_BIT_STRING\0" + "v2i_AUTHORITY_INFO_ACCESS\0" + "v2i_AUTHORITY_KEYID\0" + "v2i_BASIC_CONSTRAINTS\0" + "v2i_EXTENDED_KEY_USAGE\0" + "v2i_GENERAL_NAMES\0" + "v2i_GENERAL_NAME_ex\0" + "v2i_NAME_CONSTRAINTS\0" + "v2i_POLICY_CONSTRAINTS\0" + "v2i_POLICY_MAPPINGS\0" + "v2i_crld\0" + "v2i_idp\0" + "v2i_issuer_alt\0" + "v2i_subject_alt\0" + "v3_generic_extension\0" + ""; + +const uint32_t kOpenSSLReasonValues[] = { + 0xc3207ba, + 0xc3287c7, + 0xc3307d6, + 0xc3387e6, + 0xc3407f5, + 0xc34880e, + 0xc35081a, + 0xc358837, + 0xc360849, + 0xc368857, + 0xc370867, + 0xc378874, + 0xc380884, + 0xc38888f, + 0xc3908a5, + 0xc3988b4, + 0xc3a08c8, + 0x1032146b, + 0x10329477, + 0x10331490, + 0x103394a3, + 0x10340dd4, + 0x103494b6, + 0x103514cb, + 0x103594fd, + 0x10361516, + 0x1036952b, + 0x10371549, + 0x10379558, + 0x10381574, + 0x1038958f, + 0x1039159e, + 0x103995ba, + 0x103a15d5, + 0x103a95ec, + 0x103b15fd, + 0x103b9611, + 0x103c1630, + 0x103c963f, + 0x103d1656, + 0x103d9669, + 0x103e0b5f, + 0x103e969a, + 0x103f16ad, + 0x103f96c7, + 0x104016d7, + 0x104096eb, + 0x10411701, + 0x10419719, + 0x1042172e, + 0x10429742, + 0x10431754, + 0x104385d0, + 0x104408b4, + 0x10449769, + 0x10451780, + 0x10459795, + 0x104617a3, + 0x1046967c, + 0x104714de, + 0x14320b42, + 0x14328b50, + 0x14330b5f, + 0x14338b71, + 0x18320083, + 0x18328e3a, + 0x18340e68, + 0x18348e7c, + 0x18358eb3, + 0x18368ee0, + 0x18370ef3, + 0x18378f07, + 0x18380f2b, + 0x18388f39, + 0x18390f4f, + 0x18398f63, + 0x183a0f73, + 0x183b0f83, + 0x183b8f98, + 0x183c8fc3, + 0x183d0fd7, + 0x183d8fe7, + 0x183e0b8e, + 0x183e8ff4, + 0x183f1006, + 0x183f9011, + 0x18401021, + 0x18409032, + 0x18411043, + 0x18419055, + 0x1842107e, + 0x184290b0, + 0x184310bf, + 0x18451128, + 0x1845913e, + 0x18461159, + 0x18468ecb, + 0x184709cc, + 0x18478094, + 0x18480faf, + 0x184890f4, + 0x18490e50, + 0x18498e91, + 0x184a118f, + 0x184a910c, + 0x184b10d3, + 0x184b8e2a, + 0x184c1097, + 0x184c866b, + 0x184d1174, + 0x203211b6, + 0x243211c2, + 0x243288fa, + 0x243311d4, + 0x243391e1, + 0x243411ee, + 0x24349200, + 0x2435120f, + 0x2435922c, + 0x24361239, + 0x24369247, + 0x24371255, + 0x24379263, + 0x2438126c, + 0x24389279, + 0x2439128c, + 0x28320b82, + 0x28328b8e, + 0x28330b5f, + 0x28338ba1, + 0x2c322b08, + 0x2c32ab16, + 0x2c332b28, + 0x2c33ab3a, + 0x2c342b4e, + 0x2c34ab60, + 0x2c352b7b, + 0x2c35ab8d, + 0x2c362ba0, + 0x2c3682f3, + 0x2c372bad, + 0x2c37abbf, + 0x2c382bd2, + 0x2c38abe0, + 0x2c392bf0, + 0x2c39ac02, + 0x2c3a2c16, + 0x2c3aac27, + 0x2c3b134c, + 0x2c3bac38, + 0x2c3c2c4c, + 0x2c3cac62, + 0x2c3d2c7b, + 0x2c3daca9, + 0x2c3e2cb7, + 0x2c3eaccf, + 0x2c3f2ce7, + 0x2c3facf4, + 0x2c402d17, + 0x2c40ad36, + 0x2c4111b6, + 0x2c41ad47, + 0x2c422d5a, + 0x2c429128, + 0x2c432d6b, + 0x2c4386a2, + 0x2c442c98, + 0x30320000, + 0x30328015, + 0x3033001f, + 0x30338038, + 0x3034004a, + 0x30348064, + 0x3035006b, + 0x30358083, + 0x30360094, + 0x303680a1, + 0x303700b0, + 0x303780bd, + 0x303800d0, + 0x303880eb, + 0x30390100, + 0x30398114, + 0x303a0128, + 0x303a8139, + 0x303b0152, + 0x303b816f, + 0x303c017d, + 0x303c8191, + 0x303d01a1, + 0x303d81ba, + 0x303e01ca, + 0x303e81dd, + 0x303f01ec, + 0x303f81f8, + 0x3040020d, + 0x3040821d, + 0x30410234, + 0x30418241, + 0x30420254, + 0x30428263, + 0x30430278, + 0x30438299, + 0x304402ac, + 0x304482bf, + 0x304502d8, + 0x304582f3, + 0x30460310, + 0x30468329, + 0x30470337, + 0x30478348, + 0x30480357, + 0x3048836f, + 0x30490381, + 0x30498395, + 0x304a03b4, + 0x304a83c7, + 0x304b03d2, + 0x304b83e1, + 0x304c03f2, + 0x304c83fe, + 0x304d0414, + 0x304d8422, + 0x304e0438, + 0x304e844a, + 0x304f045c, + 0x304f846f, + 0x30500482, + 0x30508493, + 0x305104a3, + 0x305184bb, + 0x305204d0, + 0x305284e8, + 0x305304fc, + 0x30538514, + 0x3054052d, + 0x30548546, + 0x30550563, + 0x3055856e, + 0x30560586, + 0x30568596, + 0x305705a7, + 0x305785ba, + 0x305805d0, + 0x305885d9, + 0x305905ee, + 0x30598601, + 0x305a0610, + 0x305a8630, + 0x305b063f, + 0x305b864b, + 0x305c066b, + 0x305c8687, + 0x305d0698, + 0x305d86a2, + 0x34320abc, + 0x34328ad0, + 0x34330aed, + 0x34338b00, + 0x34340b0f, + 0x34348b2c, + 0x3c320083, + 0x3c328bcb, + 0x3c330be4, + 0x3c338bff, + 0x3c340c1c, + 0x3c348c37, + 0x3c350c52, + 0x3c358c67, + 0x3c360c80, + 0x3c368c98, + 0x3c370ca9, + 0x3c378cb7, + 0x3c380cc4, + 0x3c388cd8, + 0x3c390b8e, + 0x3c398cec, + 0x3c3a0d00, + 0x3c3a8874, + 0x3c3b0d10, + 0x3c3b8d2b, + 0x3c3c0d3d, + 0x3c3c8d53, + 0x3c3d0d5d, + 0x3c3d8d71, + 0x3c3e0d7f, + 0x3c3e8da4, + 0x3c3f0bb7, + 0x3c3f8d8d, + 0x403217ba, + 0x403297d0, + 0x403317fe, + 0x40339808, + 0x4034181f, + 0x4034983d, + 0x4035184d, + 0x4035985f, + 0x4036186c, + 0x40369878, + 0x4037188d, + 0x403798a2, + 0x403818b4, + 0x403898bf, + 0x403918d1, + 0x40398dd4, + 0x403a18e1, + 0x403a98f4, + 0x403b1915, + 0x403b9926, + 0x403c1936, + 0x403c8064, + 0x403d1942, + 0x403d995e, + 0x403e1974, + 0x403e9983, + 0x403f1996, + 0x403f99b0, + 0x404019be, + 0x404099d3, + 0x404119e7, + 0x40419a04, + 0x40421a1d, + 0x40429a38, + 0x40431a51, + 0x40439a64, + 0x40441a78, + 0x40449a90, + 0x40451aa0, + 0x40459aae, + 0x40461acc, + 0x40468094, + 0x40471ae1, + 0x40479af3, + 0x40481b17, + 0x40489b37, + 0x40491b4b, + 0x40499b60, + 0x404a1b79, + 0x404a9bb3, + 0x404b1bcd, + 0x404b9beb, + 0x404c1c06, + 0x404c9c20, + 0x404d1c37, + 0x404d9c5f, + 0x404e1c76, + 0x404e9c92, + 0x404f1cae, + 0x404f9ccf, + 0x40501cf1, + 0x40509d0d, + 0x40511d21, + 0x40519d2e, + 0x40521d45, + 0x40529d55, + 0x40531d65, + 0x40539d79, + 0x40541d94, + 0x40549da4, + 0x40551dbb, + 0x40559dca, + 0x40561de5, + 0x40569dfd, + 0x40571e19, + 0x40579e32, + 0x40581e45, + 0x40589e5a, + 0x40591e7d, + 0x40599e8b, + 0x405a1e98, + 0x405a9eb1, + 0x405b1ec9, + 0x405b9edc, + 0x405c1ef1, + 0x405c9f03, + 0x405d1f18, + 0x405d9f28, + 0x405e1f41, + 0x405e9f55, + 0x405f1f65, + 0x405f9f7d, + 0x40601f8e, + 0x40609fa1, + 0x40611fb2, + 0x40619fd0, + 0x40621fe1, + 0x40629fee, + 0x40632005, + 0x4063a046, + 0x4064205d, + 0x4064a06a, + 0x40652078, + 0x4065a09a, + 0x406620c2, + 0x4066a0d7, + 0x406720ee, + 0x4067a0ff, + 0x40682110, + 0x4068a121, + 0x40692136, + 0x4069a14d, + 0x406a215e, + 0x406aa177, + 0x406b2192, + 0x406ba1a9, + 0x406c2216, + 0x406ca237, + 0x406d224a, + 0x406da26b, + 0x406e2286, + 0x406ea2a1, + 0x406f22c2, + 0x406fa2e8, + 0x40702308, + 0x4070a324, + 0x407124b1, + 0x4071a4d4, + 0x407224ea, + 0x4072a509, + 0x40732521, + 0x4073a541, + 0x4074276b, + 0x4074a790, + 0x407527ab, + 0x4075a7ca, + 0x407627f9, + 0x4076a821, + 0x40772852, + 0x4077a871, + 0x40782896, + 0x4078a8ad, + 0x407928c0, + 0x4079a8dd, + 0x407a0782, + 0x407aa8ef, + 0x407b2902, + 0x407ba91b, + 0x407c2933, + 0x407c90b0, + 0x407d2947, + 0x407da961, + 0x407e2972, + 0x407ea986, + 0x407f2994, + 0x407fa9af, + 0x40801279, + 0x4080a9d4, + 0x408129f6, + 0x4081aa11, + 0x40822a26, + 0x4082aa3e, + 0x40832a56, + 0x4083aa6d, + 0x40842a83, + 0x4084aa8f, + 0x40852aa2, + 0x4085aab7, + 0x40862ac9, + 0x4086aade, + 0x40872ae7, + 0x40879c4d, + 0x40880083, + 0x4088a025, + 0x40890a0a, + 0x4089a1c1, + 0x408a1b9c, + 0x408aa1eb, + 0x408b283a, + 0x41f423dc, + 0x41f9246e, + 0x41fe2361, + 0x41fea592, + 0x41ff2683, + 0x420323f5, + 0x42082417, + 0x4208a453, + 0x42092345, + 0x4209a48d, + 0x420a239c, + 0x420aa37c, + 0x420b23bc, + 0x420ba435, + 0x420c269f, + 0x420ca55f, + 0x420d2579, + 0x420da5b0, + 0x421225ca, + 0x42172666, + 0x4217a60c, + 0x421c262e, + 0x421f25e9, + 0x422126b6, + 0x42262649, + 0x422b274f, + 0x422ba718, + 0x422c2737, + 0x422ca6f2, + 0x422d26d1, + 0x443206ad, + 0x443286bc, + 0x443306c8, + 0x443386d6, + 0x443406e9, + 0x443486fa, + 0x44350701, + 0x4435870b, + 0x4436071e, + 0x44368734, + 0x44370746, + 0x44378753, + 0x44380762, + 0x4438876a, + 0x44390782, + 0x44398790, + 0x443a07a3, + 0x4c3212a3, + 0x4c3292b3, + 0x4c3312c6, + 0x4c3392e6, + 0x4c340094, + 0x4c3480b0, + 0x4c3512f2, + 0x4c359300, + 0x4c36131c, + 0x4c36932f, + 0x4c37133e, + 0x4c37934c, + 0x4c381361, + 0x4c38936d, + 0x4c39138d, + 0x4c3993b7, + 0x4c3a13d0, + 0x4c3a93e9, + 0x4c3b05d0, + 0x4c3b9402, + 0x4c3c1414, + 0x4c3c9423, + 0x4c3d10b0, + 0x4c3d943c, + 0x4c3e1449, + 0x50322d7d, + 0x5032ad8c, + 0x50332d97, + 0x5033ada7, + 0x50342dc0, + 0x5034adda, + 0x50352de8, + 0x5035adfe, + 0x50362e10, + 0x5036ae26, + 0x50372e3f, + 0x5037ae52, + 0x50382e6a, + 0x5038ae7b, + 0x50392e90, + 0x5039aea4, + 0x503a2ec4, + 0x503aaeda, + 0x503b2ef2, + 0x503baf04, + 0x503c2f20, + 0x503caf37, + 0x503d2f50, + 0x503daf66, + 0x503e2f73, + 0x503eaf89, + 0x503f2f9b, + 0x503f8348, + 0x50402fae, + 0x5040afbe, + 0x50412fd8, + 0x5041afe7, + 0x50423001, + 0x5042b01e, + 0x5043302e, + 0x5043b03e, + 0x5044304d, + 0x50448414, + 0x50453061, + 0x5045b07f, + 0x50463092, + 0x5046b0a8, + 0x504730ba, + 0x5047b0cf, + 0x504830f5, + 0x5048b103, + 0x50493116, + 0x5049b12b, + 0x504a3141, + 0x504ab151, + 0x504b3171, + 0x504bb184, + 0x504c31a7, + 0x504cb1d5, + 0x504d31e7, + 0x504db204, + 0x504e321f, + 0x504eb23b, + 0x504f324d, + 0x504fb264, + 0x50503273, + 0x50508687, + 0x50513286, + 0x58320e12, + 0x68320dd4, + 0x68328b8e, + 0x68330ba1, + 0x68338de2, + 0x68340df2, + 0x6c320db0, + 0x6c328b71, + 0x6c330dbb, + 0x74320980, + 0x783208e5, + 0x783288fa, + 0x78330906, + 0x78338083, + 0x78340915, + 0x7834892a, + 0x78350949, + 0x7835896b, + 0x78360980, + 0x78368996, + 0x783709a6, + 0x783789b9, + 0x783809cc, + 0x783889de, + 0x783909eb, + 0x78398a0a, + 0x783a0a1f, + 0x783a8a2d, + 0x783b0a37, + 0x783b8a4b, + 0x783c0a62, + 0x783c8a77, + 0x783d0a8e, + 0x783d8aa3, + 0x783e09f9, + 0x803211a5, +}; + +const size_t kOpenSSLReasonValuesLen = sizeof(kOpenSSLReasonValues) / sizeof(kOpenSSLReasonValues[0]); + +const char kOpenSSLReasonStringData[] = + "ASN1_LENGTH_MISMATCH\0" + "AUX_ERROR\0" + "BAD_GET_ASN1_OBJECT_CALL\0" + "BAD_OBJECT_HEADER\0" + "BMPSTRING_IS_WRONG_LENGTH\0" + "BN_LIB\0" + "BOOLEAN_IS_WRONG_LENGTH\0" + "BUFFER_TOO_SMALL\0" + "DECODE_ERROR\0" + "DEPTH_EXCEEDED\0" + "ENCODE_ERROR\0" + "ERROR_GETTING_TIME\0" + "EXPECTING_AN_ASN1_SEQUENCE\0" + "EXPECTING_AN_INTEGER\0" + "EXPECTING_AN_OBJECT\0" + "EXPECTING_A_BOOLEAN\0" + "EXPECTING_A_TIME\0" + "EXPLICIT_LENGTH_MISMATCH\0" + "EXPLICIT_TAG_NOT_CONSTRUCTED\0" + "FIELD_MISSING\0" + "FIRST_NUM_TOO_LARGE\0" + "HEADER_TOO_LONG\0" + "ILLEGAL_BITSTRING_FORMAT\0" + "ILLEGAL_BOOLEAN\0" + "ILLEGAL_CHARACTERS\0" + "ILLEGAL_FORMAT\0" + "ILLEGAL_HEX\0" + "ILLEGAL_IMPLICIT_TAG\0" + "ILLEGAL_INTEGER\0" + "ILLEGAL_NESTED_TAGGING\0" + "ILLEGAL_NULL\0" + "ILLEGAL_NULL_VALUE\0" + "ILLEGAL_OBJECT\0" + "ILLEGAL_OPTIONAL_ANY\0" + "ILLEGAL_OPTIONS_ON_ITEM_TEMPLATE\0" + "ILLEGAL_TAGGED_ANY\0" + "ILLEGAL_TIME_VALUE\0" + "INTEGER_NOT_ASCII_FORMAT\0" + "INTEGER_TOO_LARGE_FOR_LONG\0" + "INVALID_BIT_STRING_BITS_LEFT\0" + "INVALID_BMPSTRING_LENGTH\0" + "INVALID_DIGIT\0" + "INVALID_MODIFIER\0" + "INVALID_NUMBER\0" + "INVALID_OBJECT_ENCODING\0" + "INVALID_SEPARATOR\0" + "INVALID_TIME_FORMAT\0" + "INVALID_UNIVERSALSTRING_LENGTH\0" + "INVALID_UTF8STRING\0" + "LIST_ERROR\0" + "MALLOC_FAILURE\0" + "MISSING_ASN1_EOS\0" + "MISSING_EOC\0" + "MISSING_SECOND_NUMBER\0" + "MISSING_VALUE\0" + "MSTRING_NOT_UNIVERSAL\0" + "MSTRING_WRONG_TAG\0" + "NESTED_ASN1_ERROR\0" + "NESTED_ASN1_STRING\0" + "NON_HEX_CHARACTERS\0" + "NOT_ASCII_FORMAT\0" + "NOT_ENOUGH_DATA\0" + "NO_MATCHING_CHOICE_TYPE\0" + "NULL_IS_WRONG_LENGTH\0" + "OBJECT_NOT_ASCII_FORMAT\0" + "ODD_NUMBER_OF_CHARS\0" + "SECOND_NUMBER_TOO_LARGE\0" + "SEQUENCE_LENGTH_MISMATCH\0" + "SEQUENCE_NOT_CONSTRUCTED\0" + "SEQUENCE_OR_SET_NEEDS_CONFIG\0" + "SHORT_LINE\0" + "STREAMING_NOT_SUPPORTED\0" + "STRING_TOO_LONG\0" + "STRING_TOO_SHORT\0" + "TAG_VALUE_TOO_HIGH\0" + "TIME_NOT_ASCII_FORMAT\0" + "TOO_LONG\0" + "TYPE_NOT_CONSTRUCTED\0" + "TYPE_NOT_PRIMITIVE\0" + "UNEXPECTED_EOC\0" + "UNIVERSALSTRING_IS_WRONG_LENGTH\0" + "UNKNOWN_FORMAT\0" + "UNKNOWN_TAG\0" + "UNSUPPORTED_ANY_DEFINED_BY_TYPE\0" + "UNSUPPORTED_PUBLIC_KEY_TYPE\0" + "UNSUPPORTED_TYPE\0" + "WRONG_TAG\0" + "WRONG_TYPE\0" + "BAD_FOPEN_MODE\0" + "BROKEN_PIPE\0" + "CONNECT_ERROR\0" + "ERROR_SETTING_NBIO\0" + "INVALID_ARGUMENT\0" + "IN_USE\0" + "KEEPALIVE\0" + "NBIO_CONNECT_ERROR\0" + "NO_HOSTNAME_SPECIFIED\0" + "NO_PORT_SPECIFIED\0" + "NO_SUCH_FILE\0" + "NULL_PARAMETER\0" + "SYS_LIB\0" + "UNABLE_TO_CREATE_SOCKET\0" + "UNINITIALIZED\0" + "UNSUPPORTED_METHOD\0" + "WRITE_TO_READ_ONLY_BIO\0" + "ARG2_LT_ARG3\0" + "BAD_RECIPROCAL\0" + "BIGNUM_TOO_LONG\0" + "BITS_TOO_SMALL\0" + "CALLED_WITH_EVEN_MODULUS\0" + "DIV_BY_ZERO\0" + "EXPAND_ON_STATIC_BIGNUM_DATA\0" + "INPUT_NOT_REDUCED\0" + "INVALID_RANGE\0" + "NEGATIVE_NUMBER\0" + "NOT_A_SQUARE\0" + "NOT_INITIALIZED\0" + "NO_INVERSE\0" + "PRIVATE_KEY_TOO_LARGE\0" + "P_IS_NOT_PRIME\0" + "TOO_MANY_ITERATIONS\0" + "TOO_MANY_TEMPORARY_VARIABLES\0" + "AES_KEY_SETUP_FAILED\0" + "BAD_DECRYPT\0" + "BAD_KEY_LENGTH\0" + "CTRL_NOT_IMPLEMENTED\0" + "CTRL_OPERATION_NOT_IMPLEMENTED\0" + "DATA_NOT_MULTIPLE_OF_BLOCK_LENGTH\0" + "INITIALIZATION_ERROR\0" + "INPUT_NOT_INITIALIZED\0" + "INVALID_AD_SIZE\0" + "INVALID_KEY_LENGTH\0" + "INVALID_NONCE_SIZE\0" + "INVALID_OPERATION\0" + "IV_TOO_LARGE\0" + "NO_CIPHER_SET\0" + "NO_DIRECTION_SET\0" + "OUTPUT_ALIASES_INPUT\0" + "TAG_TOO_LARGE\0" + "TOO_LARGE\0" + "UNSUPPORTED_AD_SIZE\0" + "UNSUPPORTED_INPUT_SIZE\0" + "UNSUPPORTED_KEY_SIZE\0" + "UNSUPPORTED_NONCE_SIZE\0" + "UNSUPPORTED_TAG_SIZE\0" + "WRONG_FINAL_BLOCK_LENGTH\0" + "LIST_CANNOT_BE_NULL\0" + "MISSING_CLOSE_SQUARE_BRACKET\0" + "MISSING_EQUAL_SIGN\0" + "NO_CLOSE_BRACE\0" + "UNABLE_TO_CREATE_NEW_SECTION\0" + "VARIABLE_HAS_NO_VALUE\0" + "BAD_GENERATOR\0" + "INVALID_PUBKEY\0" + "MODULUS_TOO_LARGE\0" + "NO_PRIVATE_VALUE\0" + "BAD_Q_VALUE\0" + "MISSING_PARAMETERS\0" + "NEED_NEW_SETUP_VALUES\0" + "BIGNUM_OUT_OF_RANGE\0" + "COORDINATES_OUT_OF_RANGE\0" + "D2I_ECPKPARAMETERS_FAILURE\0" + "EC_GROUP_NEW_BY_NAME_FAILURE\0" + "GROUP2PKPARAMETERS_FAILURE\0" + "I2D_ECPKPARAMETERS_FAILURE\0" + "INCOMPATIBLE_OBJECTS\0" + "INVALID_COMPRESSED_POINT\0" + "INVALID_COMPRESSION_BIT\0" + "INVALID_ENCODING\0" + "INVALID_FIELD\0" + "INVALID_FORM\0" + "INVALID_GROUP_ORDER\0" + "INVALID_PRIVATE_KEY\0" + "MISSING_PRIVATE_KEY\0" + "NON_NAMED_CURVE\0" + "PKPARAMETERS2GROUP_FAILURE\0" + "POINT_AT_INFINITY\0" + "POINT_IS_NOT_ON_CURVE\0" + "SLOT_FULL\0" + "UNDEFINED_GENERATOR\0" + "UNKNOWN_GROUP\0" + "UNKNOWN_ORDER\0" + "WRONG_CURVE_PARAMETERS\0" + "WRONG_ORDER\0" + "KDF_FAILED\0" + "POINT_ARITHMETIC_FAILURE\0" + "BAD_SIGNATURE\0" + "NOT_IMPLEMENTED\0" + "RANDOM_NUMBER_GENERATION_FAILED\0" + "OPERATION_NOT_SUPPORTED\0" + "BN_DECODE_ERROR\0" + "COMMAND_NOT_SUPPORTED\0" + "CONTEXT_NOT_INITIALISED\0" + "DIFFERENT_KEY_TYPES\0" + "DIFFERENT_PARAMETERS\0" + "DIGEST_AND_KEY_TYPE_NOT_SUPPORTED\0" + "EXPECTING_AN_EC_KEY_KEY\0" + "EXPECTING_AN_RSA_KEY\0" + "EXPECTING_A_DH_KEY\0" + "EXPECTING_A_DSA_KEY\0" + "ILLEGAL_OR_UNSUPPORTED_PADDING_MODE\0" + "INVALID_CURVE\0" + "INVALID_DIGEST_LENGTH\0" + "INVALID_DIGEST_TYPE\0" + "INVALID_KEYBITS\0" + "INVALID_MGF1_MD\0" + "INVALID_PADDING_MODE\0" + "INVALID_PSS_PARAMETERS\0" + "INVALID_PSS_SALTLEN\0" + "INVALID_SALT_LENGTH\0" + "INVALID_TRAILER\0" + "KEYS_NOT_SET\0" + "NO_DEFAULT_DIGEST\0" + "NO_KEY_SET\0" + "NO_MDC2_SUPPORT\0" + "NO_NID_FOR_CURVE\0" + "NO_OPERATION_SET\0" + "NO_PARAMETERS_SET\0" + "OPERATION_NOT_SUPPORTED_FOR_THIS_KEYTYPE\0" + "OPERATON_NOT_INITIALIZED\0" + "PARAMETER_ENCODING_ERROR\0" + "UNKNOWN_DIGEST\0" + "UNKNOWN_MASK_DIGEST\0" + "UNKNOWN_MESSAGE_DIGEST_ALGORITHM\0" + "UNKNOWN_PUBLIC_KEY_TYPE\0" + "UNKNOWN_SIGNATURE_ALGORITHM\0" + "UNSUPPORTED_ALGORITHM\0" + "UNSUPPORTED_MASK_ALGORITHM\0" + "UNSUPPORTED_MASK_PARAMETER\0" + "UNSUPPORTED_SIGNATURE_TYPE\0" + "WRONG_PUBLIC_KEY_TYPE\0" + "OUTPUT_TOO_LARGE\0" + "UNKNOWN_NID\0" + "BAD_BASE64_DECODE\0" + "BAD_END_LINE\0" + "BAD_IV_CHARS\0" + "BAD_PASSWORD_READ\0" + "CIPHER_IS_NULL\0" + "ERROR_CONVERTING_PRIVATE_KEY\0" + "NOT_DEK_INFO\0" + "NOT_ENCRYPTED\0" + "NOT_PROC_TYPE\0" + "NO_START_LINE\0" + "READ_KEY\0" + "SHORT_HEADER\0" + "UNSUPPORTED_CIPHER\0" + "UNSUPPORTED_ENCRYPTION\0" + "BAD_PKCS12_DATA\0" + "BAD_PKCS12_VERSION\0" + "CIPHER_HAS_NO_OBJECT_IDENTIFIER\0" + "CRYPT_ERROR\0" + "ENCRYPT_ERROR\0" + "ERROR_SETTING_CIPHER_PARAMS\0" + "INCORRECT_PASSWORD\0" + "KEYGEN_FAILURE\0" + "KEY_GEN_ERROR\0" + "METHOD_NOT_SUPPORTED\0" + "MISSING_MAC\0" + "MULTIPLE_PRIVATE_KEYS_IN_PKCS12\0" + "PKCS12_PUBLIC_KEY_INTEGRITY_NOT_SUPPORTED\0" + "PKCS12_TOO_DEEPLY_NESTED\0" + "PRIVATE_KEY_DECODE_ERROR\0" + "PRIVATE_KEY_ENCODE_ERROR\0" + "UNKNOWN_ALGORITHM\0" + "UNKNOWN_CIPHER\0" + "UNKNOWN_CIPHER_ALGORITHM\0" + "UNKNOWN_HASH\0" + "UNSUPPORTED_PRIVATE_KEY_ALGORITHM\0" + "BAD_E_VALUE\0" + "BAD_FIXED_HEADER_DECRYPT\0" + "BAD_PAD_BYTE_COUNT\0" + "BAD_RSA_PARAMETERS\0" + "BLOCK_TYPE_IS_NOT_01\0" + "BN_NOT_INITIALIZED\0" + "CANNOT_RECOVER_MULTI_PRIME_KEY\0" + "CRT_PARAMS_ALREADY_GIVEN\0" + "CRT_VALUES_INCORRECT\0" + "DATA_LEN_NOT_EQUAL_TO_MOD_LEN\0" + "DATA_TOO_LARGE\0" + "DATA_TOO_LARGE_FOR_KEY_SIZE\0" + "DATA_TOO_LARGE_FOR_MODULUS\0" + "DATA_TOO_SMALL\0" + "DATA_TOO_SMALL_FOR_KEY_SIZE\0" + "DIGEST_TOO_BIG_FOR_RSA_KEY\0" + "D_E_NOT_CONGRUENT_TO_1\0" + "EMPTY_PUBLIC_KEY\0" + "FIRST_OCTET_INVALID\0" + "INCONSISTENT_SET_OF_CRT_VALUES\0" + "INTERNAL_ERROR\0" + "INVALID_MESSAGE_LENGTH\0" + "KEY_SIZE_TOO_SMALL\0" + "LAST_OCTET_INVALID\0" + "MUST_HAVE_AT_LEAST_TWO_PRIMES\0" + "NO_PUBLIC_EXPONENT\0" + "NULL_BEFORE_BLOCK_MISSING\0" + "N_NOT_EQUAL_P_Q\0" + "OAEP_DECODING_ERROR\0" + "ONLY_ONE_OF_P_Q_GIVEN\0" + "OUTPUT_BUFFER_TOO_SMALL\0" + "PADDING_CHECK_FAILED\0" + "PKCS_DECODING_ERROR\0" + "SLEN_CHECK_FAILED\0" + "SLEN_RECOVERY_FAILED\0" + "UNKNOWN_ALGORITHM_TYPE\0" + "UNKNOWN_PADDING_TYPE\0" + "VALUE_MISSING\0" + "WRONG_SIGNATURE_LENGTH\0" + "APP_DATA_IN_HANDSHAKE\0" + "ATTEMPT_TO_REUSE_SESSION_IN_DIFFERENT_CONTEXT\0" + "BAD_ALERT\0" + "BAD_CHANGE_CIPHER_SPEC\0" + "BAD_DATA_RETURNED_BY_CALLBACK\0" + "BAD_DH_P_LENGTH\0" + "BAD_DIGEST_LENGTH\0" + "BAD_ECC_CERT\0" + "BAD_ECPOINT\0" + "BAD_HANDSHAKE_LENGTH\0" + "BAD_HANDSHAKE_RECORD\0" + "BAD_HELLO_REQUEST\0" + "BAD_LENGTH\0" + "BAD_PACKET_LENGTH\0" + "BAD_RSA_ENCRYPT\0" + "BAD_SRTP_MKI_VALUE\0" + "BAD_SRTP_PROTECTION_PROFILE_LIST\0" + "BAD_SSL_FILETYPE\0" + "BAD_WRITE_RETRY\0" + "BIO_NOT_SET\0" + "CANNOT_SERIALIZE_PUBLIC_KEY\0" + "CA_DN_LENGTH_MISMATCH\0" + "CA_DN_TOO_LONG\0" + "CCS_RECEIVED_EARLY\0" + "CERTIFICATE_VERIFY_FAILED\0" + "CERT_CB_ERROR\0" + "CERT_LENGTH_MISMATCH\0" + "CHANNEL_ID_NOT_P256\0" + "CHANNEL_ID_SIGNATURE_INVALID\0" + "CIPHER_CODE_WRONG_LENGTH\0" + "CIPHER_OR_HASH_UNAVAILABLE\0" + "CLIENTHELLO_PARSE_FAILED\0" + "CLIENTHELLO_TLSEXT\0" + "CONNECTION_REJECTED\0" + "CONNECTION_TYPE_NOT_SET\0" + "COOKIE_MISMATCH\0" + "D2I_ECDSA_SIG\0" + "DATA_BETWEEN_CCS_AND_FINISHED\0" + "DATA_LENGTH_TOO_LONG\0" + "DECRYPTION_FAILED\0" + "DECRYPTION_FAILED_OR_BAD_RECORD_MAC\0" + "DH_PUBLIC_VALUE_LENGTH_IS_WRONG\0" + "DIGEST_CHECK_FAILED\0" + "DTLS_MESSAGE_TOO_BIG\0" + "ECC_CERT_NOT_FOR_SIGNING\0" + "EMPTY_SRTP_PROTECTION_PROFILE_LIST\0" + "EMS_STATE_INCONSISTENT\0" + "ENCRYPTED_LENGTH_TOO_LONG\0" + "ERROR_IN_RECEIVED_CIPHER_LIST\0" + "EVP_DIGESTSIGNFINAL_FAILED\0" + "EVP_DIGESTSIGNINIT_FAILED\0" + "EXCESSIVE_MESSAGE_SIZE\0" + "EXTRA_DATA_IN_MESSAGE\0" + "FRAGMENT_MISMATCH\0" + "GOT_A_FIN_BEFORE_A_CCS\0" + "GOT_CHANNEL_ID_BEFORE_A_CCS\0" + "GOT_NEXT_PROTO_BEFORE_A_CCS\0" + "GOT_NEXT_PROTO_WITHOUT_EXTENSION\0" + "HANDSHAKE_FAILURE_ON_CLIENT_HELLO\0" + "HANDSHAKE_RECORD_BEFORE_CCS\0" + "HTTPS_PROXY_REQUEST\0" + "HTTP_REQUEST\0" + "INAPPROPRIATE_FALLBACK\0" + "INVALID_COMMAND\0" + "INVALID_MESSAGE\0" + "INVALID_SSL_SESSION\0" + "INVALID_TICKET_KEYS_LENGTH\0" + "LENGTH_MISMATCH\0" + "LIBRARY_HAS_NO_CIPHERS\0" + "MISSING_DH_KEY\0" + "MISSING_ECDSA_SIGNING_CERT\0" + "MISSING_RSA_CERTIFICATE\0" + "MISSING_RSA_ENCRYPTING_CERT\0" + "MISSING_RSA_SIGNING_CERT\0" + "MISSING_TMP_DH_KEY\0" + "MISSING_TMP_ECDH_KEY\0" + "MIXED_SPECIAL_OPERATOR_WITH_GROUPS\0" + "MTU_TOO_SMALL\0" + "NESTED_GROUP\0" + "NO_CERTIFICATES_RETURNED\0" + "NO_CERTIFICATE_ASSIGNED\0" + "NO_CERTIFICATE_SET\0" + "NO_CIPHERS_AVAILABLE\0" + "NO_CIPHERS_PASSED\0" + "NO_CIPHERS_SPECIFIED\0" + "NO_CIPHER_MATCH\0" + "NO_COMPRESSION_SPECIFIED\0" + "NO_METHOD_SPECIFIED\0" + "NO_P256_SUPPORT\0" + "NO_PRIVATE_KEY_ASSIGNED\0" + "NO_RENEGOTIATION\0" + "NO_REQUIRED_DIGEST\0" + "NO_SHARED_CIPHER\0" + "NO_SHARED_SIGATURE_ALGORITHMS\0" + "NO_SRTP_PROFILES\0" + "NULL_SSL_CTX\0" + "NULL_SSL_METHOD_PASSED\0" + "OLD_SESSION_CIPHER_NOT_RETURNED\0" + "OLD_SESSION_VERSION_NOT_RETURNED\0" + "PACKET_LENGTH_TOO_LONG\0" + "PARSE_TLSEXT\0" + "PATH_TOO_LONG\0" + "PEER_DID_NOT_RETURN_A_CERTIFICATE\0" + "PEER_ERROR_UNSUPPORTED_CERTIFICATE_TYPE\0" + "PROTOCOL_IS_SHUTDOWN\0" + "PSK_IDENTITY_NOT_FOUND\0" + "PSK_NO_CLIENT_CB\0" + "PSK_NO_SERVER_CB\0" + "READ_BIO_NOT_SET\0" + "READ_TIMEOUT_EXPIRED\0" + "RECORD_LENGTH_MISMATCH\0" + "RECORD_TOO_LARGE\0" + "RENEGOTIATE_EXT_TOO_LONG\0" + "RENEGOTIATION_ENCODING_ERR\0" + "RENEGOTIATION_MISMATCH\0" + "REQUIRED_CIPHER_MISSING\0" + "RESUMED_EMS_SESSION_WITHOUT_EMS_EXTENSION\0" + "RESUMED_NON_EMS_SESSION_WITH_EMS_EXTENSION\0" + "SCSV_RECEIVED_WHEN_RENEGOTIATING\0" + "SERVERHELLO_TLSEXT\0" + "SESSION_ID_CONTEXT_UNINITIALIZED\0" + "SESSION_MAY_NOT_BE_CREATED\0" + "SIGNATURE_ALGORITHMS_ERROR\0" + "SRTP_COULD_NOT_ALLOCATE_PROFILES\0" + "SRTP_PROTECTION_PROFILE_LIST_TOO_LONG\0" + "SRTP_UNKNOWN_PROTECTION_PROFILE\0" + "SSL3_EXT_INVALID_SERVERNAME\0" + "SSL3_EXT_INVALID_SERVERNAME_TYPE\0" + "SSLV3_ALERT_BAD_CERTIFICATE\0" + "SSLV3_ALERT_BAD_RECORD_MAC\0" + "SSLV3_ALERT_CERTIFICATE_EXPIRED\0" + "SSLV3_ALERT_CERTIFICATE_REVOKED\0" + "SSLV3_ALERT_CERTIFICATE_UNKNOWN\0" + "SSLV3_ALERT_CLOSE_NOTIFY\0" + "SSLV3_ALERT_DECOMPRESSION_FAILURE\0" + "SSLV3_ALERT_HANDSHAKE_FAILURE\0" + "SSLV3_ALERT_ILLEGAL_PARAMETER\0" + "SSLV3_ALERT_NO_CERTIFICATE\0" + "SSLV3_ALERT_UNEXPECTED_MESSAGE\0" + "SSLV3_ALERT_UNSUPPORTED_CERTIFICATE\0" + "SSL_CTX_HAS_NO_DEFAULT_SSL_VERSION\0" + "SSL_HANDSHAKE_FAILURE\0" + "SSL_SESSION_ID_CALLBACK_FAILED\0" + "SSL_SESSION_ID_CONFLICT\0" + "SSL_SESSION_ID_CONTEXT_TOO_LONG\0" + "SSL_SESSION_ID_HAS_BAD_LENGTH\0" + "TLSV1_ALERT_ACCESS_DENIED\0" + "TLSV1_ALERT_DECODE_ERROR\0" + "TLSV1_ALERT_DECRYPTION_FAILED\0" + "TLSV1_ALERT_DECRYPT_ERROR\0" + "TLSV1_ALERT_EXPORT_RESTRICTION\0" + "TLSV1_ALERT_INAPPROPRIATE_FALLBACK\0" + "TLSV1_ALERT_INSUFFICIENT_SECURITY\0" + "TLSV1_ALERT_INTERNAL_ERROR\0" + "TLSV1_ALERT_NO_RENEGOTIATION\0" + "TLSV1_ALERT_PROTOCOL_VERSION\0" + "TLSV1_ALERT_RECORD_OVERFLOW\0" + "TLSV1_ALERT_UNKNOWN_CA\0" + "TLSV1_ALERT_USER_CANCELLED\0" + "TLSV1_BAD_CERTIFICATE_HASH_VALUE\0" + "TLSV1_BAD_CERTIFICATE_STATUS_RESPONSE\0" + "TLSV1_CERTIFICATE_UNOBTAINABLE\0" + "TLSV1_UNRECOGNIZED_NAME\0" + "TLSV1_UNSUPPORTED_EXTENSION\0" + "TLS_CLIENT_CERT_REQ_WITH_ANON_CIPHER\0" + "TLS_ILLEGAL_EXPORTER_LABEL\0" + "TLS_INVALID_ECPOINTFORMAT_LIST\0" + "TLS_PEER_DID_NOT_RESPOND_WITH_CERTIFICATE_LIST\0" + "TLS_RSA_ENCRYPTED_VALUE_LENGTH_IS_WRONG\0" + "TOO_MANY_EMPTY_FRAGMENTS\0" + "TOO_MANY_WARNING_ALERTS\0" + "UNABLE_TO_FIND_ECDH_PARAMETERS\0" + "UNABLE_TO_FIND_PUBLIC_KEY_PARAMETERS\0" + "UNEXPECTED_GROUP_CLOSE\0" + "UNEXPECTED_MESSAGE\0" + "UNEXPECTED_OPERATOR_IN_GROUP\0" + "UNEXPECTED_RECORD\0" + "UNKNOWN_ALERT_TYPE\0" + "UNKNOWN_CERTIFICATE_TYPE\0" + "UNKNOWN_CIPHER_RETURNED\0" + "UNKNOWN_CIPHER_TYPE\0" + "UNKNOWN_KEY_EXCHANGE_TYPE\0" + "UNKNOWN_PROTOCOL\0" + "UNKNOWN_SSL_VERSION\0" + "UNKNOWN_STATE\0" + "UNPROCESSED_HANDSHAKE_DATA\0" + "UNSAFE_LEGACY_RENEGOTIATION_DISABLED\0" + "UNSUPPORTED_COMPRESSION_ALGORITHM\0" + "UNSUPPORTED_ELLIPTIC_CURVE\0" + "UNSUPPORTED_PROTOCOL\0" + "UNSUPPORTED_SSL_VERSION\0" + "USE_SRTP_NOT_NEGOTIATED\0" + "WRONG_CERTIFICATE_TYPE\0" + "WRONG_CIPHER_RETURNED\0" + "WRONG_CURVE\0" + "WRONG_MESSAGE_TYPE\0" + "WRONG_SIGNATURE_TYPE\0" + "WRONG_SSL_VERSION\0" + "WRONG_VERSION_NUMBER\0" + "X509_LIB\0" + "X509_VERIFICATION_SETUP_PROBLEMS\0" + "AKID_MISMATCH\0" + "BAD_PKCS7_VERSION\0" + "BAD_X509_FILETYPE\0" + "BASE64_DECODE_ERROR\0" + "CANT_CHECK_DH_KEY\0" + "CERT_ALREADY_IN_HASH_TABLE\0" + "CRL_ALREADY_DELTA\0" + "CRL_VERIFY_FAILURE\0" + "IDP_MISMATCH\0" + "INVALID_DIRECTORY\0" + "INVALID_FIELD_NAME\0" + "INVALID_TRUST\0" + "ISSUER_MISMATCH\0" + "KEY_TYPE_MISMATCH\0" + "KEY_VALUES_MISMATCH\0" + "LOADING_CERT_DIR\0" + "LOADING_DEFAULTS\0" + "NEWER_CRL_NOT_NEWER\0" + "NOT_PKCS7_SIGNED_DATA\0" + "NO_CERTIFICATES_INCLUDED\0" + "NO_CERT_SET_FOR_US_TO_VERIFY\0" + "NO_CRLS_INCLUDED\0" + "NO_CRL_NUMBER\0" + "PUBLIC_KEY_DECODE_ERROR\0" + "PUBLIC_KEY_ENCODE_ERROR\0" + "SHOULD_RETRY\0" + "UNABLE_TO_FIND_PARAMETERS_IN_CHAIN\0" + "UNABLE_TO_GET_CERTS_PUBLIC_KEY\0" + "UNKNOWN_KEY_TYPE\0" + "UNKNOWN_PURPOSE_ID\0" + "UNKNOWN_TRUST_ID\0" + "WRONG_LOOKUP_TYPE\0" + "BAD_IP_ADDRESS\0" + "BAD_OBJECT\0" + "BN_DEC2BN_ERROR\0" + "BN_TO_ASN1_INTEGER_ERROR\0" + "CANNOT_FIND_FREE_FUNCTION\0" + "DIRNAME_ERROR\0" + "DISTPOINT_ALREADY_SET\0" + "DUPLICATE_ZONE_ID\0" + "ERROR_CONVERTING_ZONE\0" + "ERROR_CREATING_EXTENSION\0" + "ERROR_IN_EXTENSION\0" + "EXPECTED_A_SECTION_NAME\0" + "EXTENSION_EXISTS\0" + "EXTENSION_NAME_ERROR\0" + "EXTENSION_NOT_FOUND\0" + "EXTENSION_SETTING_NOT_SUPPORTED\0" + "EXTENSION_VALUE_ERROR\0" + "ILLEGAL_EMPTY_EXTENSION\0" + "ILLEGAL_HEX_DIGIT\0" + "INCORRECT_POLICY_SYNTAX_TAG\0" + "INVALID_BOOLEAN_STRING\0" + "INVALID_EXTENSION_STRING\0" + "INVALID_MULTIPLE_RDNS\0" + "INVALID_NAME\0" + "INVALID_NULL_ARGUMENT\0" + "INVALID_NULL_NAME\0" + "INVALID_NULL_VALUE\0" + "INVALID_NUMBERS\0" + "INVALID_OBJECT_IDENTIFIER\0" + "INVALID_OPTION\0" + "INVALID_POLICY_IDENTIFIER\0" + "INVALID_PROXY_POLICY_SETTING\0" + "INVALID_PURPOSE\0" + "INVALID_SECTION\0" + "INVALID_SYNTAX\0" + "ISSUER_DECODE_ERROR\0" + "NEED_ORGANIZATION_AND_NUMBERS\0" + "NO_CONFIG_DATABASE\0" + "NO_ISSUER_CERTIFICATE\0" + "NO_ISSUER_DETAILS\0" + "NO_POLICY_IDENTIFIER\0" + "NO_PROXY_CERT_POLICY_LANGUAGE_DEFINED\0" + "NO_PUBLIC_KEY\0" + "NO_SUBJECT_DETAILS\0" + "ODD_NUMBER_OF_DIGITS\0" + "OPERATION_NOT_DEFINED\0" + "OTHERNAME_ERROR\0" + "POLICY_LANGUAGE_ALREADY_DEFINED\0" + "POLICY_PATH_LENGTH\0" + "POLICY_PATH_LENGTH_ALREADY_DEFINED\0" + "POLICY_WHEN_PROXY_LANGUAGE_REQUIRES_NO_POLICY\0" + "SECTION_NOT_FOUND\0" + "UNABLE_TO_GET_ISSUER_DETAILS\0" + "UNABLE_TO_GET_ISSUER_KEYID\0" + "UNKNOWN_BIT_STRING_ARGUMENT\0" + "UNKNOWN_EXTENSION\0" + "UNKNOWN_EXTENSION_NAME\0" + "UNKNOWN_OPTION\0" + "UNSUPPORTED_OPTION\0" + "USER_TOO_LONG\0" + ""; + diff --git a/third_party/boringssl/linux-aarch64/crypto/aes/aesv8-armx64.S b/third_party/boringssl/linux-aarch64/crypto/aes/aesv8-armx64.S new file mode 100644 index 00000000000..c414476cdf7 --- /dev/null +++ b/third_party/boringssl/linux-aarch64/crypto/aes/aesv8-armx64.S @@ -0,0 +1,751 @@ +#if defined(__aarch64__) +#include "arm_arch.h" + +#if __ARM_MAX_ARCH__>=7 +.text +#if !defined(__clang__) +.arch armv8-a+crypto +#endif +.align 5 +.Lrcon: +.long 0x01,0x01,0x01,0x01 +.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat +.long 0x1b,0x1b,0x1b,0x1b + +.globl aes_v8_set_encrypt_key +.type aes_v8_set_encrypt_key,%function +.align 5 +aes_v8_set_encrypt_key: +.Lenc_key: + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + mov x3,#-1 + cmp x0,#0 + b.eq .Lenc_key_abort + cmp x2,#0 + b.eq .Lenc_key_abort + mov x3,#-2 + cmp w1,#128 + b.lt .Lenc_key_abort + cmp w1,#256 + b.gt .Lenc_key_abort + tst w1,#0x3f + b.ne .Lenc_key_abort + + adr x3,.Lrcon + cmp w1,#192 + + eor v0.16b,v0.16b,v0.16b + ld1 {v3.16b},[x0],#16 + mov w1,#8 // reuse w1 + ld1 {v1.4s,v2.4s},[x3],#32 + + b.lt .Loop128 + b.eq .L192 + b .L256 + +.align 4 +.Loop128: + tbl v6.16b,{v3.16b},v2.16b + ext v5.16b,v0.16b,v3.16b,#12 + st1 {v3.4s},[x2],#16 + aese v6.16b,v0.16b + subs w1,w1,#1 + + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v6.16b,v6.16b,v1.16b + eor v3.16b,v3.16b,v5.16b + shl v1.16b,v1.16b,#1 + eor v3.16b,v3.16b,v6.16b + b.ne .Loop128 + + ld1 {v1.4s},[x3] + + tbl v6.16b,{v3.16b},v2.16b + ext v5.16b,v0.16b,v3.16b,#12 + st1 {v3.4s},[x2],#16 + aese v6.16b,v0.16b + + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v6.16b,v6.16b,v1.16b + eor v3.16b,v3.16b,v5.16b + shl v1.16b,v1.16b,#1 + eor v3.16b,v3.16b,v6.16b + + tbl v6.16b,{v3.16b},v2.16b + ext v5.16b,v0.16b,v3.16b,#12 + st1 {v3.4s},[x2],#16 + aese v6.16b,v0.16b + + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v6.16b,v6.16b,v1.16b + eor v3.16b,v3.16b,v5.16b + eor v3.16b,v3.16b,v6.16b + st1 {v3.4s},[x2] + add x2,x2,#0x50 + + mov w12,#10 + b .Ldone + +.align 4 +.L192: + ld1 {v4.8b},[x0],#8 + movi v6.16b,#8 // borrow v6.16b + st1 {v3.4s},[x2],#16 + sub v2.16b,v2.16b,v6.16b // adjust the mask + +.Loop192: + tbl v6.16b,{v4.16b},v2.16b + ext v5.16b,v0.16b,v3.16b,#12 + st1 {v4.8b},[x2],#8 + aese v6.16b,v0.16b + subs w1,w1,#1 + + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v3.16b,v3.16b,v5.16b + + dup v5.4s,v3.s[3] + eor v5.16b,v5.16b,v4.16b + eor v6.16b,v6.16b,v1.16b + ext v4.16b,v0.16b,v4.16b,#12 + shl v1.16b,v1.16b,#1 + eor v4.16b,v4.16b,v5.16b + eor v3.16b,v3.16b,v6.16b + eor v4.16b,v4.16b,v6.16b + st1 {v3.4s},[x2],#16 + b.ne .Loop192 + + mov w12,#12 + add x2,x2,#0x20 + b .Ldone + +.align 4 +.L256: + ld1 {v4.16b},[x0] + mov w1,#7 + mov w12,#14 + st1 {v3.4s},[x2],#16 + +.Loop256: + tbl v6.16b,{v4.16b},v2.16b + ext v5.16b,v0.16b,v3.16b,#12 + st1 {v4.4s},[x2],#16 + aese v6.16b,v0.16b + subs w1,w1,#1 + + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v6.16b,v6.16b,v1.16b + eor v3.16b,v3.16b,v5.16b + shl v1.16b,v1.16b,#1 + eor v3.16b,v3.16b,v6.16b + st1 {v3.4s},[x2],#16 + b.eq .Ldone + + dup v6.4s,v3.s[3] // just splat + ext v5.16b,v0.16b,v4.16b,#12 + aese v6.16b,v0.16b + + eor v4.16b,v4.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v4.16b,v4.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v4.16b,v4.16b,v5.16b + + eor v4.16b,v4.16b,v6.16b + b .Loop256 + +.Ldone: + str w12,[x2] + mov x3,#0 + +.Lenc_key_abort: + mov x0,x3 // return value + ldr x29,[sp],#16 + ret +.size aes_v8_set_encrypt_key,.-aes_v8_set_encrypt_key + +.globl aes_v8_set_decrypt_key +.type aes_v8_set_decrypt_key,%function +.align 5 +aes_v8_set_decrypt_key: + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + bl .Lenc_key + + cmp x0,#0 + b.ne .Ldec_key_abort + + sub x2,x2,#240 // restore original x2 + mov x4,#-16 + add x0,x2,x12,lsl#4 // end of key schedule + + ld1 {v0.4s},[x2] + ld1 {v1.4s},[x0] + st1 {v0.4s},[x0],x4 + st1 {v1.4s},[x2],#16 + +.Loop_imc: + ld1 {v0.4s},[x2] + ld1 {v1.4s},[x0] + aesimc v0.16b,v0.16b + aesimc v1.16b,v1.16b + st1 {v0.4s},[x0],x4 + st1 {v1.4s},[x2],#16 + cmp x0,x2 + b.hi .Loop_imc + + ld1 {v0.4s},[x2] + aesimc v0.16b,v0.16b + st1 {v0.4s},[x0] + + eor x0,x0,x0 // return value +.Ldec_key_abort: + ldp x29,x30,[sp],#16 + ret +.size aes_v8_set_decrypt_key,.-aes_v8_set_decrypt_key +.globl aes_v8_encrypt +.type aes_v8_encrypt,%function +.align 5 +aes_v8_encrypt: + ldr w3,[x2,#240] + ld1 {v0.4s},[x2],#16 + ld1 {v2.16b},[x0] + sub w3,w3,#2 + ld1 {v1.4s},[x2],#16 + +.Loop_enc: + aese v2.16b,v0.16b + aesmc v2.16b,v2.16b + ld1 {v0.4s},[x2],#16 + subs w3,w3,#2 + aese v2.16b,v1.16b + aesmc v2.16b,v2.16b + ld1 {v1.4s},[x2],#16 + b.gt .Loop_enc + + aese v2.16b,v0.16b + aesmc v2.16b,v2.16b + ld1 {v0.4s},[x2] + aese v2.16b,v1.16b + eor v2.16b,v2.16b,v0.16b + + st1 {v2.16b},[x1] + ret +.size aes_v8_encrypt,.-aes_v8_encrypt +.globl aes_v8_decrypt +.type aes_v8_decrypt,%function +.align 5 +aes_v8_decrypt: + ldr w3,[x2,#240] + ld1 {v0.4s},[x2],#16 + ld1 {v2.16b},[x0] + sub w3,w3,#2 + ld1 {v1.4s},[x2],#16 + +.Loop_dec: + aesd v2.16b,v0.16b + aesimc v2.16b,v2.16b + ld1 {v0.4s},[x2],#16 + subs w3,w3,#2 + aesd v2.16b,v1.16b + aesimc v2.16b,v2.16b + ld1 {v1.4s},[x2],#16 + b.gt .Loop_dec + + aesd v2.16b,v0.16b + aesimc v2.16b,v2.16b + ld1 {v0.4s},[x2] + aesd v2.16b,v1.16b + eor v2.16b,v2.16b,v0.16b + + st1 {v2.16b},[x1] + ret +.size aes_v8_decrypt,.-aes_v8_decrypt +.globl aes_v8_cbc_encrypt +.type aes_v8_cbc_encrypt,%function +.align 5 +aes_v8_cbc_encrypt: + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + subs x2,x2,#16 + mov x8,#16 + b.lo .Lcbc_abort + csel x8,xzr,x8,eq + + cmp w5,#0 // en- or decrypting? + ldr w5,[x3,#240] + and x2,x2,#-16 + ld1 {v6.16b},[x4] + ld1 {v0.16b},[x0],x8 + + ld1 {v16.4s,v17.4s},[x3] // load key schedule... + sub w5,w5,#6 + add x7,x3,x5,lsl#4 // pointer to last 7 round keys + sub w5,w5,#2 + ld1 {v18.4s,v19.4s},[x7],#32 + ld1 {v20.4s,v21.4s},[x7],#32 + ld1 {v22.4s,v23.4s},[x7],#32 + ld1 {v7.4s},[x7] + + add x7,x3,#32 + mov w6,w5 + b.eq .Lcbc_dec + + cmp w5,#2 + eor v0.16b,v0.16b,v6.16b + eor v5.16b,v16.16b,v7.16b + b.eq .Lcbc_enc128 + + ld1 {v2.4s,v3.4s},[x7] + add x7,x3,#16 + add x6,x3,#16*4 + add x12,x3,#16*5 + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + add x14,x3,#16*6 + add x3,x3,#16*7 + b .Lenter_cbc_enc + +.align 4 +.Loop_cbc_enc: + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + st1 {v6.16b},[x1],#16 +.Lenter_cbc_enc: + aese v0.16b,v17.16b + aesmc v0.16b,v0.16b + aese v0.16b,v2.16b + aesmc v0.16b,v0.16b + ld1 {v16.4s},[x6] + cmp w5,#4 + aese v0.16b,v3.16b + aesmc v0.16b,v0.16b + ld1 {v17.4s},[x12] + b.eq .Lcbc_enc192 + + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + ld1 {v16.4s},[x14] + aese v0.16b,v17.16b + aesmc v0.16b,v0.16b + ld1 {v17.4s},[x3] + nop + +.Lcbc_enc192: + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + subs x2,x2,#16 + aese v0.16b,v17.16b + aesmc v0.16b,v0.16b + csel x8,xzr,x8,eq + aese v0.16b,v18.16b + aesmc v0.16b,v0.16b + aese v0.16b,v19.16b + aesmc v0.16b,v0.16b + ld1 {v16.16b},[x0],x8 + aese v0.16b,v20.16b + aesmc v0.16b,v0.16b + eor v16.16b,v16.16b,v5.16b + aese v0.16b,v21.16b + aesmc v0.16b,v0.16b + ld1 {v17.4s},[x7] // re-pre-load rndkey[1] + aese v0.16b,v22.16b + aesmc v0.16b,v0.16b + aese v0.16b,v23.16b + eor v6.16b,v0.16b,v7.16b + b.hs .Loop_cbc_enc + + st1 {v6.16b},[x1],#16 + b .Lcbc_done + +.align 5 +.Lcbc_enc128: + ld1 {v2.4s,v3.4s},[x7] + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + b .Lenter_cbc_enc128 +.Loop_cbc_enc128: + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + st1 {v6.16b},[x1],#16 +.Lenter_cbc_enc128: + aese v0.16b,v17.16b + aesmc v0.16b,v0.16b + subs x2,x2,#16 + aese v0.16b,v2.16b + aesmc v0.16b,v0.16b + csel x8,xzr,x8,eq + aese v0.16b,v3.16b + aesmc v0.16b,v0.16b + aese v0.16b,v18.16b + aesmc v0.16b,v0.16b + aese v0.16b,v19.16b + aesmc v0.16b,v0.16b + ld1 {v16.16b},[x0],x8 + aese v0.16b,v20.16b + aesmc v0.16b,v0.16b + aese v0.16b,v21.16b + aesmc v0.16b,v0.16b + aese v0.16b,v22.16b + aesmc v0.16b,v0.16b + eor v16.16b,v16.16b,v5.16b + aese v0.16b,v23.16b + eor v6.16b,v0.16b,v7.16b + b.hs .Loop_cbc_enc128 + + st1 {v6.16b},[x1],#16 + b .Lcbc_done +.align 5 +.Lcbc_dec: + ld1 {v18.16b},[x0],#16 + subs x2,x2,#32 // bias + add w6,w5,#2 + orr v3.16b,v0.16b,v0.16b + orr v1.16b,v0.16b,v0.16b + orr v19.16b,v18.16b,v18.16b + b.lo .Lcbc_dec_tail + + orr v1.16b,v18.16b,v18.16b + ld1 {v18.16b},[x0],#16 + orr v2.16b,v0.16b,v0.16b + orr v3.16b,v1.16b,v1.16b + orr v19.16b,v18.16b,v18.16b + +.Loop3x_cbc_dec: + aesd v0.16b,v16.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v16.16b + aesimc v1.16b,v1.16b + aesd v18.16b,v16.16b + aesimc v18.16b,v18.16b + ld1 {v16.4s},[x7],#16 + subs w6,w6,#2 + aesd v0.16b,v17.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v17.16b + aesimc v1.16b,v1.16b + aesd v18.16b,v17.16b + aesimc v18.16b,v18.16b + ld1 {v17.4s},[x7],#16 + b.gt .Loop3x_cbc_dec + + aesd v0.16b,v16.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v16.16b + aesimc v1.16b,v1.16b + aesd v18.16b,v16.16b + aesimc v18.16b,v18.16b + eor v4.16b,v6.16b,v7.16b + subs x2,x2,#0x30 + eor v5.16b,v2.16b,v7.16b + csel x6,x2,x6,lo // x6, w6, is zero at this point + aesd v0.16b,v17.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v17.16b + aesimc v1.16b,v1.16b + aesd v18.16b,v17.16b + aesimc v18.16b,v18.16b + eor v17.16b,v3.16b,v7.16b + add x0,x0,x6 // x0 is adjusted in such way that + // at exit from the loop v1.16b-v18.16b + // are loaded with last "words" + orr v6.16b,v19.16b,v19.16b + mov x7,x3 + aesd v0.16b,v20.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v20.16b + aesimc v1.16b,v1.16b + aesd v18.16b,v20.16b + aesimc v18.16b,v18.16b + ld1 {v2.16b},[x0],#16 + aesd v0.16b,v21.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v21.16b + aesimc v1.16b,v1.16b + aesd v18.16b,v21.16b + aesimc v18.16b,v18.16b + ld1 {v3.16b},[x0],#16 + aesd v0.16b,v22.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v22.16b + aesimc v1.16b,v1.16b + aesd v18.16b,v22.16b + aesimc v18.16b,v18.16b + ld1 {v19.16b},[x0],#16 + aesd v0.16b,v23.16b + aesd v1.16b,v23.16b + aesd v18.16b,v23.16b + ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] + add w6,w5,#2 + eor v4.16b,v4.16b,v0.16b + eor v5.16b,v5.16b,v1.16b + eor v18.16b,v18.16b,v17.16b + ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] + st1 {v4.16b},[x1],#16 + orr v0.16b,v2.16b,v2.16b + st1 {v5.16b},[x1],#16 + orr v1.16b,v3.16b,v3.16b + st1 {v18.16b},[x1],#16 + orr v18.16b,v19.16b,v19.16b + b.hs .Loop3x_cbc_dec + + cmn x2,#0x30 + b.eq .Lcbc_done + nop + +.Lcbc_dec_tail: + aesd v1.16b,v16.16b + aesimc v1.16b,v1.16b + aesd v18.16b,v16.16b + aesimc v18.16b,v18.16b + ld1 {v16.4s},[x7],#16 + subs w6,w6,#2 + aesd v1.16b,v17.16b + aesimc v1.16b,v1.16b + aesd v18.16b,v17.16b + aesimc v18.16b,v18.16b + ld1 {v17.4s},[x7],#16 + b.gt .Lcbc_dec_tail + + aesd v1.16b,v16.16b + aesimc v1.16b,v1.16b + aesd v18.16b,v16.16b + aesimc v18.16b,v18.16b + aesd v1.16b,v17.16b + aesimc v1.16b,v1.16b + aesd v18.16b,v17.16b + aesimc v18.16b,v18.16b + aesd v1.16b,v20.16b + aesimc v1.16b,v1.16b + aesd v18.16b,v20.16b + aesimc v18.16b,v18.16b + cmn x2,#0x20 + aesd v1.16b,v21.16b + aesimc v1.16b,v1.16b + aesd v18.16b,v21.16b + aesimc v18.16b,v18.16b + eor v5.16b,v6.16b,v7.16b + aesd v1.16b,v22.16b + aesimc v1.16b,v1.16b + aesd v18.16b,v22.16b + aesimc v18.16b,v18.16b + eor v17.16b,v3.16b,v7.16b + aesd v1.16b,v23.16b + aesd v18.16b,v23.16b + b.eq .Lcbc_dec_one + eor v5.16b,v5.16b,v1.16b + eor v17.16b,v17.16b,v18.16b + orr v6.16b,v19.16b,v19.16b + st1 {v5.16b},[x1],#16 + st1 {v17.16b},[x1],#16 + b .Lcbc_done + +.Lcbc_dec_one: + eor v5.16b,v5.16b,v18.16b + orr v6.16b,v19.16b,v19.16b + st1 {v5.16b},[x1],#16 + +.Lcbc_done: + st1 {v6.16b},[x4] +.Lcbc_abort: + ldr x29,[sp],#16 + ret +.size aes_v8_cbc_encrypt,.-aes_v8_cbc_encrypt +.globl aes_v8_ctr32_encrypt_blocks +.type aes_v8_ctr32_encrypt_blocks,%function +.align 5 +aes_v8_ctr32_encrypt_blocks: + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + ldr w5,[x3,#240] + + ldr w8, [x4, #12] + ld1 {v0.4s},[x4] + + ld1 {v16.4s,v17.4s},[x3] // load key schedule... + sub w5,w5,#4 + mov x12,#16 + cmp x2,#2 + add x7,x3,x5,lsl#4 // pointer to last 5 round keys + sub w5,w5,#2 + ld1 {v20.4s,v21.4s},[x7],#32 + ld1 {v22.4s,v23.4s},[x7],#32 + ld1 {v7.4s},[x7] + add x7,x3,#32 + mov w6,w5 + csel x12,xzr,x12,lo +#ifndef __ARMEB__ + rev w8, w8 +#endif + orr v1.16b,v0.16b,v0.16b + add w10, w8, #1 + orr v18.16b,v0.16b,v0.16b + add w8, w8, #2 + orr v6.16b,v0.16b,v0.16b + rev w10, w10 + mov v1.s[3],w10 + b.ls .Lctr32_tail + rev w12, w8 + sub x2,x2,#3 // bias + mov v18.s[3],w12 + b .Loop3x_ctr32 + +.align 4 +.Loop3x_ctr32: + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + aese v1.16b,v16.16b + aesmc v1.16b,v1.16b + aese v18.16b,v16.16b + aesmc v18.16b,v18.16b + ld1 {v16.4s},[x7],#16 + subs w6,w6,#2 + aese v0.16b,v17.16b + aesmc v0.16b,v0.16b + aese v1.16b,v17.16b + aesmc v1.16b,v1.16b + aese v18.16b,v17.16b + aesmc v18.16b,v18.16b + ld1 {v17.4s},[x7],#16 + b.gt .Loop3x_ctr32 + + aese v0.16b,v16.16b + aesmc v4.16b,v0.16b + aese v1.16b,v16.16b + aesmc v5.16b,v1.16b + ld1 {v2.16b},[x0],#16 + orr v0.16b,v6.16b,v6.16b + aese v18.16b,v16.16b + aesmc v18.16b,v18.16b + ld1 {v3.16b},[x0],#16 + orr v1.16b,v6.16b,v6.16b + aese v4.16b,v17.16b + aesmc v4.16b,v4.16b + aese v5.16b,v17.16b + aesmc v5.16b,v5.16b + ld1 {v19.16b},[x0],#16 + mov x7,x3 + aese v18.16b,v17.16b + aesmc v17.16b,v18.16b + orr v18.16b,v6.16b,v6.16b + add w9,w8,#1 + aese v4.16b,v20.16b + aesmc v4.16b,v4.16b + aese v5.16b,v20.16b + aesmc v5.16b,v5.16b + eor v2.16b,v2.16b,v7.16b + add w10,w8,#2 + aese v17.16b,v20.16b + aesmc v17.16b,v17.16b + eor v3.16b,v3.16b,v7.16b + add w8,w8,#3 + aese v4.16b,v21.16b + aesmc v4.16b,v4.16b + aese v5.16b,v21.16b + aesmc v5.16b,v5.16b + eor v19.16b,v19.16b,v7.16b + rev w9,w9 + aese v17.16b,v21.16b + aesmc v17.16b,v17.16b + mov v0.s[3], w9 + rev w10,w10 + aese v4.16b,v22.16b + aesmc v4.16b,v4.16b + aese v5.16b,v22.16b + aesmc v5.16b,v5.16b + mov v1.s[3], w10 + rev w12,w8 + aese v17.16b,v22.16b + aesmc v17.16b,v17.16b + mov v18.s[3], w12 + subs x2,x2,#3 + aese v4.16b,v23.16b + aese v5.16b,v23.16b + aese v17.16b,v23.16b + + eor v2.16b,v2.16b,v4.16b + ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] + st1 {v2.16b},[x1],#16 + eor v3.16b,v3.16b,v5.16b + mov w6,w5 + st1 {v3.16b},[x1],#16 + eor v19.16b,v19.16b,v17.16b + ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] + st1 {v19.16b},[x1],#16 + b.hs .Loop3x_ctr32 + + adds x2,x2,#3 + b.eq .Lctr32_done + cmp x2,#1 + mov x12,#16 + csel x12,xzr,x12,eq + +.Lctr32_tail: + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + aese v1.16b,v16.16b + aesmc v1.16b,v1.16b + ld1 {v16.4s},[x7],#16 + subs w6,w6,#2 + aese v0.16b,v17.16b + aesmc v0.16b,v0.16b + aese v1.16b,v17.16b + aesmc v1.16b,v1.16b + ld1 {v17.4s},[x7],#16 + b.gt .Lctr32_tail + + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + aese v1.16b,v16.16b + aesmc v1.16b,v1.16b + aese v0.16b,v17.16b + aesmc v0.16b,v0.16b + aese v1.16b,v17.16b + aesmc v1.16b,v1.16b + ld1 {v2.16b},[x0],x12 + aese v0.16b,v20.16b + aesmc v0.16b,v0.16b + aese v1.16b,v20.16b + aesmc v1.16b,v1.16b + ld1 {v3.16b},[x0] + aese v0.16b,v21.16b + aesmc v0.16b,v0.16b + aese v1.16b,v21.16b + aesmc v1.16b,v1.16b + eor v2.16b,v2.16b,v7.16b + aese v0.16b,v22.16b + aesmc v0.16b,v0.16b + aese v1.16b,v22.16b + aesmc v1.16b,v1.16b + eor v3.16b,v3.16b,v7.16b + aese v0.16b,v23.16b + aese v1.16b,v23.16b + + cmp x2,#1 + eor v2.16b,v2.16b,v0.16b + eor v3.16b,v3.16b,v1.16b + st1 {v2.16b},[x1],#16 + b.eq .Lctr32_done + st1 {v3.16b},[x1] + +.Lctr32_done: + ldr x29,[sp],#16 + ret +.size aes_v8_ctr32_encrypt_blocks,.-aes_v8_ctr32_encrypt_blocks +#endif +#endif \ No newline at end of file diff --git a/third_party/boringssl/linux-aarch64/crypto/modes/ghashv8-armx64.S b/third_party/boringssl/linux-aarch64/crypto/modes/ghashv8-armx64.S new file mode 100644 index 00000000000..a0a9b6807a9 --- /dev/null +++ b/third_party/boringssl/linux-aarch64/crypto/modes/ghashv8-armx64.S @@ -0,0 +1,232 @@ +#if defined(__aarch64__) +#include "arm_arch.h" + +.text +#if !defined(__clang__) +.arch armv8-a+crypto +#endif +.globl gcm_init_v8 +.type gcm_init_v8,%function +.align 4 +gcm_init_v8: + ld1 {v17.2d},[x1] //load input H + movi v19.16b,#0xe1 + shl v19.2d,v19.2d,#57 //0xc2.0 + ext v3.16b,v17.16b,v17.16b,#8 + ushr v18.2d,v19.2d,#63 + dup v17.4s,v17.s[1] + ext v16.16b,v18.16b,v19.16b,#8 //t0=0xc2....01 + ushr v18.2d,v3.2d,#63 + sshr v17.4s,v17.4s,#31 //broadcast carry bit + and v18.16b,v18.16b,v16.16b + shl v3.2d,v3.2d,#1 + ext v18.16b,v18.16b,v18.16b,#8 + and v16.16b,v16.16b,v17.16b + orr v3.16b,v3.16b,v18.16b //H<<<=1 + eor v20.16b,v3.16b,v16.16b //twisted H + st1 {v20.2d},[x0],#16 //store Htable[0] + + //calculate H^2 + ext v16.16b,v20.16b,v20.16b,#8 //Karatsuba pre-processing + pmull v0.1q,v20.1d,v20.1d + eor v16.16b,v16.16b,v20.16b + pmull2 v2.1q,v20.2d,v20.2d + pmull v1.1q,v16.1d,v16.1d + + ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing + eor v18.16b,v0.16b,v2.16b + eor v1.16b,v1.16b,v17.16b + eor v1.16b,v1.16b,v18.16b + pmull v18.1q,v0.1d,v19.1d //1st phase + + ins v2.d[0],v1.d[1] + ins v1.d[1],v0.d[0] + eor v0.16b,v1.16b,v18.16b + + ext v18.16b,v0.16b,v0.16b,#8 //2nd phase + pmull v0.1q,v0.1d,v19.1d + eor v18.16b,v18.16b,v2.16b + eor v22.16b,v0.16b,v18.16b + + ext v17.16b,v22.16b,v22.16b,#8 //Karatsuba pre-processing + eor v17.16b,v17.16b,v22.16b + ext v21.16b,v16.16b,v17.16b,#8 //pack Karatsuba pre-processed + st1 {v21.2d,v22.2d},[x0] //store Htable[1..2] + + ret +.size gcm_init_v8,.-gcm_init_v8 +.globl gcm_gmult_v8 +.type gcm_gmult_v8,%function +.align 4 +gcm_gmult_v8: + ld1 {v17.2d},[x0] //load Xi + movi v19.16b,#0xe1 + ld1 {v20.2d,v21.2d},[x1] //load twisted H, ... + shl v19.2d,v19.2d,#57 +#ifndef __ARMEB__ + rev64 v17.16b,v17.16b +#endif + ext v3.16b,v17.16b,v17.16b,#8 + + pmull v0.1q,v20.1d,v3.1d //H.lo·Xi.lo + eor v17.16b,v17.16b,v3.16b //Karatsuba pre-processing + pmull2 v2.1q,v20.2d,v3.2d //H.hi·Xi.hi + pmull v1.1q,v21.1d,v17.1d //(H.lo+H.hi)·(Xi.lo+Xi.hi) + + ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing + eor v18.16b,v0.16b,v2.16b + eor v1.16b,v1.16b,v17.16b + eor v1.16b,v1.16b,v18.16b + pmull v18.1q,v0.1d,v19.1d //1st phase of reduction + + ins v2.d[0],v1.d[1] + ins v1.d[1],v0.d[0] + eor v0.16b,v1.16b,v18.16b + + ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction + pmull v0.1q,v0.1d,v19.1d + eor v18.16b,v18.16b,v2.16b + eor v0.16b,v0.16b,v18.16b + +#ifndef __ARMEB__ + rev64 v0.16b,v0.16b +#endif + ext v0.16b,v0.16b,v0.16b,#8 + st1 {v0.2d},[x0] //write out Xi + + ret +.size gcm_gmult_v8,.-gcm_gmult_v8 +.globl gcm_ghash_v8 +.type gcm_ghash_v8,%function +.align 4 +gcm_ghash_v8: + ld1 {v0.2d},[x0] //load [rotated] Xi + //"[rotated]" means that + //loaded value would have + //to be rotated in order to + //make it appear as in + //alorithm specification + subs x3,x3,#32 //see if x3 is 32 or larger + mov x12,#16 //x12 is used as post- + //increment for input pointer; + //as loop is modulo-scheduled + //x12 is zeroed just in time + //to preclude oversteping + //inp[len], which means that + //last block[s] are actually + //loaded twice, but last + //copy is not processed + ld1 {v20.2d,v21.2d},[x1],#32 //load twisted H, ..., H^2 + movi v19.16b,#0xe1 + ld1 {v22.2d},[x1] + csel x12,xzr,x12,eq //is it time to zero x12? + ext v0.16b,v0.16b,v0.16b,#8 //rotate Xi + ld1 {v16.2d},[x2],#16 //load [rotated] I[0] + shl v19.2d,v19.2d,#57 //compose 0xc2.0 constant +#ifndef __ARMEB__ + rev64 v16.16b,v16.16b + rev64 v0.16b,v0.16b +#endif + ext v3.16b,v16.16b,v16.16b,#8 //rotate I[0] + b.lo .Lodd_tail_v8 //x3 was less than 32 + ld1 {v17.2d},[x2],x12 //load [rotated] I[1] +#ifndef __ARMEB__ + rev64 v17.16b,v17.16b +#endif + ext v7.16b,v17.16b,v17.16b,#8 + eor v3.16b,v3.16b,v0.16b //I[i]^=Xi + pmull v4.1q,v20.1d,v7.1d //H·Ii+1 + eor v17.16b,v17.16b,v7.16b //Karatsuba pre-processing + pmull2 v6.1q,v20.2d,v7.2d + b .Loop_mod2x_v8 + +.align 4 +.Loop_mod2x_v8: + ext v18.16b,v3.16b,v3.16b,#8 + subs x3,x3,#32 //is there more data? + pmull v0.1q,v22.1d,v3.1d //H^2.lo·Xi.lo + csel x12,xzr,x12,lo //is it time to zero x12? + + pmull v5.1q,v21.1d,v17.1d + eor v18.16b,v18.16b,v3.16b //Karatsuba pre-processing + pmull2 v2.1q,v22.2d,v3.2d //H^2.hi·Xi.hi + eor v0.16b,v0.16b,v4.16b //accumulate + pmull2 v1.1q,v21.2d,v18.2d //(H^2.lo+H^2.hi)·(Xi.lo+Xi.hi) + ld1 {v16.2d},[x2],x12 //load [rotated] I[i+2] + + eor v2.16b,v2.16b,v6.16b + csel x12,xzr,x12,eq //is it time to zero x12? + eor v1.16b,v1.16b,v5.16b + + ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing + eor v18.16b,v0.16b,v2.16b + eor v1.16b,v1.16b,v17.16b + ld1 {v17.2d},[x2],x12 //load [rotated] I[i+3] +#ifndef __ARMEB__ + rev64 v16.16b,v16.16b +#endif + eor v1.16b,v1.16b,v18.16b + pmull v18.1q,v0.1d,v19.1d //1st phase of reduction + +#ifndef __ARMEB__ + rev64 v17.16b,v17.16b +#endif + ins v2.d[0],v1.d[1] + ins v1.d[1],v0.d[0] + ext v7.16b,v17.16b,v17.16b,#8 + ext v3.16b,v16.16b,v16.16b,#8 + eor v0.16b,v1.16b,v18.16b + pmull v4.1q,v20.1d,v7.1d //H·Ii+1 + eor v3.16b,v3.16b,v2.16b //accumulate v3.16b early + + ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction + pmull v0.1q,v0.1d,v19.1d + eor v3.16b,v3.16b,v18.16b + eor v17.16b,v17.16b,v7.16b //Karatsuba pre-processing + eor v3.16b,v3.16b,v0.16b + pmull2 v6.1q,v20.2d,v7.2d + b.hs .Loop_mod2x_v8 //there was at least 32 more bytes + + eor v2.16b,v2.16b,v18.16b + ext v3.16b,v16.16b,v16.16b,#8 //re-construct v3.16b + adds x3,x3,#32 //re-construct x3 + eor v0.16b,v0.16b,v2.16b //re-construct v0.16b + b.eq .Ldone_v8 //is x3 zero? +.Lodd_tail_v8: + ext v18.16b,v0.16b,v0.16b,#8 + eor v3.16b,v3.16b,v0.16b //inp^=Xi + eor v17.16b,v16.16b,v18.16b //v17.16b is rotated inp^Xi + + pmull v0.1q,v20.1d,v3.1d //H.lo·Xi.lo + eor v17.16b,v17.16b,v3.16b //Karatsuba pre-processing + pmull2 v2.1q,v20.2d,v3.2d //H.hi·Xi.hi + pmull v1.1q,v21.1d,v17.1d //(H.lo+H.hi)·(Xi.lo+Xi.hi) + + ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing + eor v18.16b,v0.16b,v2.16b + eor v1.16b,v1.16b,v17.16b + eor v1.16b,v1.16b,v18.16b + pmull v18.1q,v0.1d,v19.1d //1st phase of reduction + + ins v2.d[0],v1.d[1] + ins v1.d[1],v0.d[0] + eor v0.16b,v1.16b,v18.16b + + ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction + pmull v0.1q,v0.1d,v19.1d + eor v18.16b,v18.16b,v2.16b + eor v0.16b,v0.16b,v18.16b + +.Ldone_v8: +#ifndef __ARMEB__ + rev64 v0.16b,v0.16b +#endif + ext v0.16b,v0.16b,v0.16b,#8 + st1 {v0.2d},[x0] //write out Xi + + ret +.size gcm_ghash_v8,.-gcm_ghash_v8 +.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 2 +.align 2 +#endif \ No newline at end of file diff --git a/third_party/boringssl/linux-aarch64/crypto/sha/sha1-armv8.S b/third_party/boringssl/linux-aarch64/crypto/sha/sha1-armv8.S new file mode 100644 index 00000000000..487a497da27 --- /dev/null +++ b/third_party/boringssl/linux-aarch64/crypto/sha/sha1-armv8.S @@ -0,0 +1,1215 @@ +#if defined(__aarch64__) +#include "arm_arch.h" + +.text + + +.globl sha1_block_data_order +.type sha1_block_data_order,%function +.align 6 +sha1_block_data_order: + ldr x16,.LOPENSSL_armcap_P + adr x17,.LOPENSSL_armcap_P + add x16,x16,x17 + ldr w16,[x16] + tst w16,#ARMV8_SHA1 + b.ne .Lv8_entry + + stp x29,x30,[sp,#-96]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + + ldp w20,w21,[x0] + ldp w22,w23,[x0,#8] + ldr w24,[x0,#16] + +.Loop: + ldr x3,[x1],#64 + movz w28,#0x7999 + sub x2,x2,#1 + movk w28,#0x5a82,lsl#16 +#ifdef __ARMEB__ + ror x3,x3,#32 +#else + rev32 x3,x3 +#endif + add w24,w24,w28 // warm it up + add w24,w24,w3 + lsr x4,x3,#32 + ldr x5,[x1,#-56] + bic w25,w23,w21 + and w26,w22,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + orr w25,w25,w26 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + add w23,w23,w4 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) +#ifdef __ARMEB__ + ror x5,x5,#32 +#else + rev32 x5,x5 +#endif + bic w25,w22,w20 + and w26,w21,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + orr w25,w25,w26 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + add w22,w22,w5 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + lsr x6,x5,#32 + ldr x7,[x1,#-48] + bic w25,w21,w24 + and w26,w20,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + orr w25,w25,w26 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + add w21,w21,w6 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) +#ifdef __ARMEB__ + ror x7,x7,#32 +#else + rev32 x7,x7 +#endif + bic w25,w20,w23 + and w26,w24,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + orr w25,w25,w26 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + add w20,w20,w7 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + lsr x8,x7,#32 + ldr x9,[x1,#-40] + bic w25,w24,w22 + and w26,w23,w22 + ror w27,w21,#27 + add w24,w24,w28 // future e+=K + orr w25,w25,w26 + add w20,w20,w27 // e+=rot(a,5) + ror w22,w22,#2 + add w24,w24,w8 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) +#ifdef __ARMEB__ + ror x9,x9,#32 +#else + rev32 x9,x9 +#endif + bic w25,w23,w21 + and w26,w22,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + orr w25,w25,w26 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + add w23,w23,w9 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + lsr x10,x9,#32 + ldr x11,[x1,#-32] + bic w25,w22,w20 + and w26,w21,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + orr w25,w25,w26 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + add w22,w22,w10 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) +#ifdef __ARMEB__ + ror x11,x11,#32 +#else + rev32 x11,x11 +#endif + bic w25,w21,w24 + and w26,w20,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + orr w25,w25,w26 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + add w21,w21,w11 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + lsr x12,x11,#32 + ldr x13,[x1,#-24] + bic w25,w20,w23 + and w26,w24,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + orr w25,w25,w26 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + add w20,w20,w12 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) +#ifdef __ARMEB__ + ror x13,x13,#32 +#else + rev32 x13,x13 +#endif + bic w25,w24,w22 + and w26,w23,w22 + ror w27,w21,#27 + add w24,w24,w28 // future e+=K + orr w25,w25,w26 + add w20,w20,w27 // e+=rot(a,5) + ror w22,w22,#2 + add w24,w24,w13 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + lsr x14,x13,#32 + ldr x15,[x1,#-16] + bic w25,w23,w21 + and w26,w22,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + orr w25,w25,w26 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + add w23,w23,w14 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) +#ifdef __ARMEB__ + ror x15,x15,#32 +#else + rev32 x15,x15 +#endif + bic w25,w22,w20 + and w26,w21,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + orr w25,w25,w26 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + add w22,w22,w15 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + lsr x16,x15,#32 + ldr x17,[x1,#-8] + bic w25,w21,w24 + and w26,w20,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + orr w25,w25,w26 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + add w21,w21,w16 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) +#ifdef __ARMEB__ + ror x17,x17,#32 +#else + rev32 x17,x17 +#endif + bic w25,w20,w23 + and w26,w24,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + orr w25,w25,w26 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + add w20,w20,w17 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + lsr x19,x17,#32 + eor w3,w3,w5 + bic w25,w24,w22 + and w26,w23,w22 + ror w27,w21,#27 + eor w3,w3,w11 + add w24,w24,w28 // future e+=K + orr w25,w25,w26 + add w20,w20,w27 // e+=rot(a,5) + eor w3,w3,w16 + ror w22,w22,#2 + add w24,w24,w19 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w3,w3,#31 + eor w4,w4,w6 + bic w25,w23,w21 + and w26,w22,w21 + ror w27,w20,#27 + eor w4,w4,w12 + add w23,w23,w28 // future e+=K + orr w25,w25,w26 + add w24,w24,w27 // e+=rot(a,5) + eor w4,w4,w17 + ror w21,w21,#2 + add w23,w23,w3 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w4,w4,#31 + eor w5,w5,w7 + bic w25,w22,w20 + and w26,w21,w20 + ror w27,w24,#27 + eor w5,w5,w13 + add w22,w22,w28 // future e+=K + orr w25,w25,w26 + add w23,w23,w27 // e+=rot(a,5) + eor w5,w5,w19 + ror w20,w20,#2 + add w22,w22,w4 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w5,w5,#31 + eor w6,w6,w8 + bic w25,w21,w24 + and w26,w20,w24 + ror w27,w23,#27 + eor w6,w6,w14 + add w21,w21,w28 // future e+=K + orr w25,w25,w26 + add w22,w22,w27 // e+=rot(a,5) + eor w6,w6,w3 + ror w24,w24,#2 + add w21,w21,w5 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w6,w6,#31 + eor w7,w7,w9 + bic w25,w20,w23 + and w26,w24,w23 + ror w27,w22,#27 + eor w7,w7,w15 + add w20,w20,w28 // future e+=K + orr w25,w25,w26 + add w21,w21,w27 // e+=rot(a,5) + eor w7,w7,w4 + ror w23,w23,#2 + add w20,w20,w6 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w7,w7,#31 + movz w28,#0xeba1 + movk w28,#0x6ed9,lsl#16 + eor w8,w8,w10 + bic w25,w24,w22 + and w26,w23,w22 + ror w27,w21,#27 + eor w8,w8,w16 + add w24,w24,w28 // future e+=K + orr w25,w25,w26 + add w20,w20,w27 // e+=rot(a,5) + eor w8,w8,w5 + ror w22,w22,#2 + add w24,w24,w7 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w8,w8,#31 + eor w9,w9,w11 + eor w25,w23,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + eor w9,w9,w17 + eor w25,w25,w22 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + eor w9,w9,w6 + add w23,w23,w8 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w9,w9,#31 + eor w10,w10,w12 + eor w25,w22,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + eor w10,w10,w19 + eor w25,w25,w21 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + eor w10,w10,w7 + add w22,w22,w9 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w10,w10,#31 + eor w11,w11,w13 + eor w25,w21,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + eor w11,w11,w3 + eor w25,w25,w20 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + eor w11,w11,w8 + add w21,w21,w10 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w11,w11,#31 + eor w12,w12,w14 + eor w25,w20,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + eor w12,w12,w4 + eor w25,w25,w24 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + eor w12,w12,w9 + add w20,w20,w11 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w12,w12,#31 + eor w13,w13,w15 + eor w25,w24,w22 + ror w27,w21,#27 + add w24,w24,w28 // future e+=K + eor w13,w13,w5 + eor w25,w25,w23 + add w20,w20,w27 // e+=rot(a,5) + ror w22,w22,#2 + eor w13,w13,w10 + add w24,w24,w12 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w13,w13,#31 + eor w14,w14,w16 + eor w25,w23,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + eor w14,w14,w6 + eor w25,w25,w22 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + eor w14,w14,w11 + add w23,w23,w13 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w14,w14,#31 + eor w15,w15,w17 + eor w25,w22,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + eor w15,w15,w7 + eor w25,w25,w21 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + eor w15,w15,w12 + add w22,w22,w14 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w15,w15,#31 + eor w16,w16,w19 + eor w25,w21,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + eor w16,w16,w8 + eor w25,w25,w20 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + eor w16,w16,w13 + add w21,w21,w15 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w16,w16,#31 + eor w17,w17,w3 + eor w25,w20,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + eor w17,w17,w9 + eor w25,w25,w24 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + eor w17,w17,w14 + add w20,w20,w16 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w17,w17,#31 + eor w19,w19,w4 + eor w25,w24,w22 + ror w27,w21,#27 + add w24,w24,w28 // future e+=K + eor w19,w19,w10 + eor w25,w25,w23 + add w20,w20,w27 // e+=rot(a,5) + ror w22,w22,#2 + eor w19,w19,w15 + add w24,w24,w17 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w19,w19,#31 + eor w3,w3,w5 + eor w25,w23,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + eor w3,w3,w11 + eor w25,w25,w22 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + eor w3,w3,w16 + add w23,w23,w19 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w3,w3,#31 + eor w4,w4,w6 + eor w25,w22,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + eor w4,w4,w12 + eor w25,w25,w21 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + eor w4,w4,w17 + add w22,w22,w3 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w4,w4,#31 + eor w5,w5,w7 + eor w25,w21,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + eor w5,w5,w13 + eor w25,w25,w20 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + eor w5,w5,w19 + add w21,w21,w4 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w5,w5,#31 + eor w6,w6,w8 + eor w25,w20,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + eor w6,w6,w14 + eor w25,w25,w24 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + eor w6,w6,w3 + add w20,w20,w5 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w6,w6,#31 + eor w7,w7,w9 + eor w25,w24,w22 + ror w27,w21,#27 + add w24,w24,w28 // future e+=K + eor w7,w7,w15 + eor w25,w25,w23 + add w20,w20,w27 // e+=rot(a,5) + ror w22,w22,#2 + eor w7,w7,w4 + add w24,w24,w6 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w7,w7,#31 + eor w8,w8,w10 + eor w25,w23,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + eor w8,w8,w16 + eor w25,w25,w22 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + eor w8,w8,w5 + add w23,w23,w7 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w8,w8,#31 + eor w9,w9,w11 + eor w25,w22,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + eor w9,w9,w17 + eor w25,w25,w21 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + eor w9,w9,w6 + add w22,w22,w8 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w9,w9,#31 + eor w10,w10,w12 + eor w25,w21,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + eor w10,w10,w19 + eor w25,w25,w20 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + eor w10,w10,w7 + add w21,w21,w9 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w10,w10,#31 + eor w11,w11,w13 + eor w25,w20,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + eor w11,w11,w3 + eor w25,w25,w24 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + eor w11,w11,w8 + add w20,w20,w10 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w11,w11,#31 + movz w28,#0xbcdc + movk w28,#0x8f1b,lsl#16 + eor w12,w12,w14 + eor w25,w24,w22 + ror w27,w21,#27 + add w24,w24,w28 // future e+=K + eor w12,w12,w4 + eor w25,w25,w23 + add w20,w20,w27 // e+=rot(a,5) + ror w22,w22,#2 + eor w12,w12,w9 + add w24,w24,w11 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w12,w12,#31 + orr w25,w21,w22 + and w26,w21,w22 + eor w13,w13,w15 + ror w27,w20,#27 + and w25,w25,w23 + add w23,w23,w28 // future e+=K + eor w13,w13,w5 + add w24,w24,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w21,w21,#2 + eor w13,w13,w10 + add w23,w23,w12 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w13,w13,#31 + orr w25,w20,w21 + and w26,w20,w21 + eor w14,w14,w16 + ror w27,w24,#27 + and w25,w25,w22 + add w22,w22,w28 // future e+=K + eor w14,w14,w6 + add w23,w23,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w20,w20,#2 + eor w14,w14,w11 + add w22,w22,w13 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w14,w14,#31 + orr w25,w24,w20 + and w26,w24,w20 + eor w15,w15,w17 + ror w27,w23,#27 + and w25,w25,w21 + add w21,w21,w28 // future e+=K + eor w15,w15,w7 + add w22,w22,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w24,w24,#2 + eor w15,w15,w12 + add w21,w21,w14 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w15,w15,#31 + orr w25,w23,w24 + and w26,w23,w24 + eor w16,w16,w19 + ror w27,w22,#27 + and w25,w25,w20 + add w20,w20,w28 // future e+=K + eor w16,w16,w8 + add w21,w21,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w23,w23,#2 + eor w16,w16,w13 + add w20,w20,w15 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w16,w16,#31 + orr w25,w22,w23 + and w26,w22,w23 + eor w17,w17,w3 + ror w27,w21,#27 + and w25,w25,w24 + add w24,w24,w28 // future e+=K + eor w17,w17,w9 + add w20,w20,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w22,w22,#2 + eor w17,w17,w14 + add w24,w24,w16 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w17,w17,#31 + orr w25,w21,w22 + and w26,w21,w22 + eor w19,w19,w4 + ror w27,w20,#27 + and w25,w25,w23 + add w23,w23,w28 // future e+=K + eor w19,w19,w10 + add w24,w24,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w21,w21,#2 + eor w19,w19,w15 + add w23,w23,w17 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w19,w19,#31 + orr w25,w20,w21 + and w26,w20,w21 + eor w3,w3,w5 + ror w27,w24,#27 + and w25,w25,w22 + add w22,w22,w28 // future e+=K + eor w3,w3,w11 + add w23,w23,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w20,w20,#2 + eor w3,w3,w16 + add w22,w22,w19 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w3,w3,#31 + orr w25,w24,w20 + and w26,w24,w20 + eor w4,w4,w6 + ror w27,w23,#27 + and w25,w25,w21 + add w21,w21,w28 // future e+=K + eor w4,w4,w12 + add w22,w22,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w24,w24,#2 + eor w4,w4,w17 + add w21,w21,w3 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w4,w4,#31 + orr w25,w23,w24 + and w26,w23,w24 + eor w5,w5,w7 + ror w27,w22,#27 + and w25,w25,w20 + add w20,w20,w28 // future e+=K + eor w5,w5,w13 + add w21,w21,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w23,w23,#2 + eor w5,w5,w19 + add w20,w20,w4 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w5,w5,#31 + orr w25,w22,w23 + and w26,w22,w23 + eor w6,w6,w8 + ror w27,w21,#27 + and w25,w25,w24 + add w24,w24,w28 // future e+=K + eor w6,w6,w14 + add w20,w20,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w22,w22,#2 + eor w6,w6,w3 + add w24,w24,w5 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w6,w6,#31 + orr w25,w21,w22 + and w26,w21,w22 + eor w7,w7,w9 + ror w27,w20,#27 + and w25,w25,w23 + add w23,w23,w28 // future e+=K + eor w7,w7,w15 + add w24,w24,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w21,w21,#2 + eor w7,w7,w4 + add w23,w23,w6 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w7,w7,#31 + orr w25,w20,w21 + and w26,w20,w21 + eor w8,w8,w10 + ror w27,w24,#27 + and w25,w25,w22 + add w22,w22,w28 // future e+=K + eor w8,w8,w16 + add w23,w23,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w20,w20,#2 + eor w8,w8,w5 + add w22,w22,w7 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w8,w8,#31 + orr w25,w24,w20 + and w26,w24,w20 + eor w9,w9,w11 + ror w27,w23,#27 + and w25,w25,w21 + add w21,w21,w28 // future e+=K + eor w9,w9,w17 + add w22,w22,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w24,w24,#2 + eor w9,w9,w6 + add w21,w21,w8 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w9,w9,#31 + orr w25,w23,w24 + and w26,w23,w24 + eor w10,w10,w12 + ror w27,w22,#27 + and w25,w25,w20 + add w20,w20,w28 // future e+=K + eor w10,w10,w19 + add w21,w21,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w23,w23,#2 + eor w10,w10,w7 + add w20,w20,w9 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w10,w10,#31 + orr w25,w22,w23 + and w26,w22,w23 + eor w11,w11,w13 + ror w27,w21,#27 + and w25,w25,w24 + add w24,w24,w28 // future e+=K + eor w11,w11,w3 + add w20,w20,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w22,w22,#2 + eor w11,w11,w8 + add w24,w24,w10 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w11,w11,#31 + orr w25,w21,w22 + and w26,w21,w22 + eor w12,w12,w14 + ror w27,w20,#27 + and w25,w25,w23 + add w23,w23,w28 // future e+=K + eor w12,w12,w4 + add w24,w24,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w21,w21,#2 + eor w12,w12,w9 + add w23,w23,w11 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w12,w12,#31 + orr w25,w20,w21 + and w26,w20,w21 + eor w13,w13,w15 + ror w27,w24,#27 + and w25,w25,w22 + add w22,w22,w28 // future e+=K + eor w13,w13,w5 + add w23,w23,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w20,w20,#2 + eor w13,w13,w10 + add w22,w22,w12 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w13,w13,#31 + orr w25,w24,w20 + and w26,w24,w20 + eor w14,w14,w16 + ror w27,w23,#27 + and w25,w25,w21 + add w21,w21,w28 // future e+=K + eor w14,w14,w6 + add w22,w22,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w24,w24,#2 + eor w14,w14,w11 + add w21,w21,w13 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w14,w14,#31 + orr w25,w23,w24 + and w26,w23,w24 + eor w15,w15,w17 + ror w27,w22,#27 + and w25,w25,w20 + add w20,w20,w28 // future e+=K + eor w15,w15,w7 + add w21,w21,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w23,w23,#2 + eor w15,w15,w12 + add w20,w20,w14 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w15,w15,#31 + movz w28,#0xc1d6 + movk w28,#0xca62,lsl#16 + orr w25,w22,w23 + and w26,w22,w23 + eor w16,w16,w19 + ror w27,w21,#27 + and w25,w25,w24 + add w24,w24,w28 // future e+=K + eor w16,w16,w8 + add w20,w20,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w22,w22,#2 + eor w16,w16,w13 + add w24,w24,w15 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w16,w16,#31 + eor w17,w17,w3 + eor w25,w23,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + eor w17,w17,w9 + eor w25,w25,w22 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + eor w17,w17,w14 + add w23,w23,w16 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w17,w17,#31 + eor w19,w19,w4 + eor w25,w22,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + eor w19,w19,w10 + eor w25,w25,w21 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + eor w19,w19,w15 + add w22,w22,w17 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w19,w19,#31 + eor w3,w3,w5 + eor w25,w21,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + eor w3,w3,w11 + eor w25,w25,w20 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + eor w3,w3,w16 + add w21,w21,w19 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w3,w3,#31 + eor w4,w4,w6 + eor w25,w20,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + eor w4,w4,w12 + eor w25,w25,w24 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + eor w4,w4,w17 + add w20,w20,w3 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w4,w4,#31 + eor w5,w5,w7 + eor w25,w24,w22 + ror w27,w21,#27 + add w24,w24,w28 // future e+=K + eor w5,w5,w13 + eor w25,w25,w23 + add w20,w20,w27 // e+=rot(a,5) + ror w22,w22,#2 + eor w5,w5,w19 + add w24,w24,w4 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w5,w5,#31 + eor w6,w6,w8 + eor w25,w23,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + eor w6,w6,w14 + eor w25,w25,w22 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + eor w6,w6,w3 + add w23,w23,w5 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w6,w6,#31 + eor w7,w7,w9 + eor w25,w22,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + eor w7,w7,w15 + eor w25,w25,w21 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + eor w7,w7,w4 + add w22,w22,w6 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w7,w7,#31 + eor w8,w8,w10 + eor w25,w21,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + eor w8,w8,w16 + eor w25,w25,w20 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + eor w8,w8,w5 + add w21,w21,w7 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w8,w8,#31 + eor w9,w9,w11 + eor w25,w20,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + eor w9,w9,w17 + eor w25,w25,w24 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + eor w9,w9,w6 + add w20,w20,w8 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w9,w9,#31 + eor w10,w10,w12 + eor w25,w24,w22 + ror w27,w21,#27 + add w24,w24,w28 // future e+=K + eor w10,w10,w19 + eor w25,w25,w23 + add w20,w20,w27 // e+=rot(a,5) + ror w22,w22,#2 + eor w10,w10,w7 + add w24,w24,w9 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w10,w10,#31 + eor w11,w11,w13 + eor w25,w23,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + eor w11,w11,w3 + eor w25,w25,w22 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + eor w11,w11,w8 + add w23,w23,w10 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w11,w11,#31 + eor w12,w12,w14 + eor w25,w22,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + eor w12,w12,w4 + eor w25,w25,w21 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + eor w12,w12,w9 + add w22,w22,w11 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w12,w12,#31 + eor w13,w13,w15 + eor w25,w21,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + eor w13,w13,w5 + eor w25,w25,w20 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + eor w13,w13,w10 + add w21,w21,w12 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w13,w13,#31 + eor w14,w14,w16 + eor w25,w20,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + eor w14,w14,w6 + eor w25,w25,w24 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + eor w14,w14,w11 + add w20,w20,w13 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w14,w14,#31 + eor w15,w15,w17 + eor w25,w24,w22 + ror w27,w21,#27 + add w24,w24,w28 // future e+=K + eor w15,w15,w7 + eor w25,w25,w23 + add w20,w20,w27 // e+=rot(a,5) + ror w22,w22,#2 + eor w15,w15,w12 + add w24,w24,w14 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w15,w15,#31 + eor w16,w16,w19 + eor w25,w23,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + eor w16,w16,w8 + eor w25,w25,w22 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + eor w16,w16,w13 + add w23,w23,w15 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w16,w16,#31 + eor w17,w17,w3 + eor w25,w22,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + eor w17,w17,w9 + eor w25,w25,w21 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + eor w17,w17,w14 + add w22,w22,w16 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w17,w17,#31 + eor w19,w19,w4 + eor w25,w21,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + eor w19,w19,w10 + eor w25,w25,w20 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + eor w19,w19,w15 + add w21,w21,w17 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w19,w19,#31 + ldp w4,w5,[x0] + eor w25,w20,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + eor w25,w25,w24 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + add w20,w20,w19 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ldp w6,w7,[x0,#8] + eor w25,w24,w22 + ror w27,w21,#27 + eor w25,w25,w23 + add w20,w20,w27 // e+=rot(a,5) + ror w22,w22,#2 + ldr w8,[x0,#16] + add w20,w20,w25 // e+=F(b,c,d) + add w21,w21,w5 + add w22,w22,w6 + add w20,w20,w4 + add w23,w23,w7 + add w24,w24,w8 + stp w20,w21,[x0] + stp w22,w23,[x0,#8] + str w24,[x0,#16] + cbnz x2,.Loop + + ldp x19,x20,[sp,#16] + ldp x21,x22,[sp,#32] + ldp x23,x24,[sp,#48] + ldp x25,x26,[sp,#64] + ldp x27,x28,[sp,#80] + ldr x29,[sp],#96 + ret +.size sha1_block_data_order,.-sha1_block_data_order +.type sha1_block_armv8,%function +.align 6 +sha1_block_armv8: +.Lv8_entry: + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + + adr x4,.Lconst + eor v1.16b,v1.16b,v1.16b + ld1 {v0.4s},[x0],#16 + ld1 {v1.s}[0],[x0] + sub x0,x0,#16 + ld1 {v16.4s,v17.4s,v18.4s,v19.4s},[x4] + +.Loop_hw: + ld1 {v4.16b,v5.16b,v6.16b,v7.16b},[x1],#64 + sub x2,x2,#1 + rev32 v4.16b,v4.16b + rev32 v5.16b,v5.16b + + add v20.4s,v16.4s,v4.4s + rev32 v6.16b,v6.16b + orr v22.16b,v0.16b,v0.16b // offload + + add v21.4s,v16.4s,v5.4s + rev32 v7.16b,v7.16b +.inst 0x5e280803 //sha1h v3.16b,v0.16b +.inst 0x5e140020 //sha1c v0.16b,v1.16b,v20.4s // 0 + add v20.4s,v16.4s,v6.4s +.inst 0x5e0630a4 //sha1su0 v4.16b,v5.16b,v6.16b +.inst 0x5e280802 //sha1h v2.16b,v0.16b // 1 +.inst 0x5e150060 //sha1c v0.16b,v3.16b,v21.4s + add v21.4s,v16.4s,v7.4s +.inst 0x5e2818e4 //sha1su1 v4.16b,v7.16b +.inst 0x5e0730c5 //sha1su0 v5.16b,v6.16b,v7.16b +.inst 0x5e280803 //sha1h v3.16b,v0.16b // 2 +.inst 0x5e140040 //sha1c v0.16b,v2.16b,v20.4s + add v20.4s,v16.4s,v4.4s +.inst 0x5e281885 //sha1su1 v5.16b,v4.16b +.inst 0x5e0430e6 //sha1su0 v6.16b,v7.16b,v4.16b +.inst 0x5e280802 //sha1h v2.16b,v0.16b // 3 +.inst 0x5e150060 //sha1c v0.16b,v3.16b,v21.4s + add v21.4s,v17.4s,v5.4s +.inst 0x5e2818a6 //sha1su1 v6.16b,v5.16b +.inst 0x5e053087 //sha1su0 v7.16b,v4.16b,v5.16b +.inst 0x5e280803 //sha1h v3.16b,v0.16b // 4 +.inst 0x5e140040 //sha1c v0.16b,v2.16b,v20.4s + add v20.4s,v17.4s,v6.4s +.inst 0x5e2818c7 //sha1su1 v7.16b,v6.16b +.inst 0x5e0630a4 //sha1su0 v4.16b,v5.16b,v6.16b +.inst 0x5e280802 //sha1h v2.16b,v0.16b // 5 +.inst 0x5e151060 //sha1p v0.16b,v3.16b,v21.4s + add v21.4s,v17.4s,v7.4s +.inst 0x5e2818e4 //sha1su1 v4.16b,v7.16b +.inst 0x5e0730c5 //sha1su0 v5.16b,v6.16b,v7.16b +.inst 0x5e280803 //sha1h v3.16b,v0.16b // 6 +.inst 0x5e141040 //sha1p v0.16b,v2.16b,v20.4s + add v20.4s,v17.4s,v4.4s +.inst 0x5e281885 //sha1su1 v5.16b,v4.16b +.inst 0x5e0430e6 //sha1su0 v6.16b,v7.16b,v4.16b +.inst 0x5e280802 //sha1h v2.16b,v0.16b // 7 +.inst 0x5e151060 //sha1p v0.16b,v3.16b,v21.4s + add v21.4s,v17.4s,v5.4s +.inst 0x5e2818a6 //sha1su1 v6.16b,v5.16b +.inst 0x5e053087 //sha1su0 v7.16b,v4.16b,v5.16b +.inst 0x5e280803 //sha1h v3.16b,v0.16b // 8 +.inst 0x5e141040 //sha1p v0.16b,v2.16b,v20.4s + add v20.4s,v18.4s,v6.4s +.inst 0x5e2818c7 //sha1su1 v7.16b,v6.16b +.inst 0x5e0630a4 //sha1su0 v4.16b,v5.16b,v6.16b +.inst 0x5e280802 //sha1h v2.16b,v0.16b // 9 +.inst 0x5e151060 //sha1p v0.16b,v3.16b,v21.4s + add v21.4s,v18.4s,v7.4s +.inst 0x5e2818e4 //sha1su1 v4.16b,v7.16b +.inst 0x5e0730c5 //sha1su0 v5.16b,v6.16b,v7.16b +.inst 0x5e280803 //sha1h v3.16b,v0.16b // 10 +.inst 0x5e142040 //sha1m v0.16b,v2.16b,v20.4s + add v20.4s,v18.4s,v4.4s +.inst 0x5e281885 //sha1su1 v5.16b,v4.16b +.inst 0x5e0430e6 //sha1su0 v6.16b,v7.16b,v4.16b +.inst 0x5e280802 //sha1h v2.16b,v0.16b // 11 +.inst 0x5e152060 //sha1m v0.16b,v3.16b,v21.4s + add v21.4s,v18.4s,v5.4s +.inst 0x5e2818a6 //sha1su1 v6.16b,v5.16b +.inst 0x5e053087 //sha1su0 v7.16b,v4.16b,v5.16b +.inst 0x5e280803 //sha1h v3.16b,v0.16b // 12 +.inst 0x5e142040 //sha1m v0.16b,v2.16b,v20.4s + add v20.4s,v18.4s,v6.4s +.inst 0x5e2818c7 //sha1su1 v7.16b,v6.16b +.inst 0x5e0630a4 //sha1su0 v4.16b,v5.16b,v6.16b +.inst 0x5e280802 //sha1h v2.16b,v0.16b // 13 +.inst 0x5e152060 //sha1m v0.16b,v3.16b,v21.4s + add v21.4s,v19.4s,v7.4s +.inst 0x5e2818e4 //sha1su1 v4.16b,v7.16b +.inst 0x5e0730c5 //sha1su0 v5.16b,v6.16b,v7.16b +.inst 0x5e280803 //sha1h v3.16b,v0.16b // 14 +.inst 0x5e142040 //sha1m v0.16b,v2.16b,v20.4s + add v20.4s,v19.4s,v4.4s +.inst 0x5e281885 //sha1su1 v5.16b,v4.16b +.inst 0x5e0430e6 //sha1su0 v6.16b,v7.16b,v4.16b +.inst 0x5e280802 //sha1h v2.16b,v0.16b // 15 +.inst 0x5e151060 //sha1p v0.16b,v3.16b,v21.4s + add v21.4s,v19.4s,v5.4s +.inst 0x5e2818a6 //sha1su1 v6.16b,v5.16b +.inst 0x5e053087 //sha1su0 v7.16b,v4.16b,v5.16b +.inst 0x5e280803 //sha1h v3.16b,v0.16b // 16 +.inst 0x5e141040 //sha1p v0.16b,v2.16b,v20.4s + add v20.4s,v19.4s,v6.4s +.inst 0x5e2818c7 //sha1su1 v7.16b,v6.16b +.inst 0x5e280802 //sha1h v2.16b,v0.16b // 17 +.inst 0x5e151060 //sha1p v0.16b,v3.16b,v21.4s + add v21.4s,v19.4s,v7.4s + +.inst 0x5e280803 //sha1h v3.16b,v0.16b // 18 +.inst 0x5e141040 //sha1p v0.16b,v2.16b,v20.4s + +.inst 0x5e280802 //sha1h v2.16b,v0.16b // 19 +.inst 0x5e151060 //sha1p v0.16b,v3.16b,v21.4s + + add v1.4s,v1.4s,v2.4s + add v0.4s,v0.4s,v22.4s + + cbnz x2,.Loop_hw + + st1 {v0.4s},[x0],#16 + st1 {v1.s}[0],[x0] + + ldr x29,[sp],#16 + ret +.size sha1_block_armv8,.-sha1_block_armv8 +.align 6 +.Lconst: +.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 //K_00_19 +.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 //K_20_39 +.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc //K_40_59 +.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 //K_60_79 +.LOPENSSL_armcap_P: +.quad OPENSSL_armcap_P-. +.byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 2 +.align 2 +.comm OPENSSL_armcap_P,4,4 +#endif \ No newline at end of file diff --git a/third_party/boringssl/linux-aarch64/crypto/sha/sha256-armv8.S b/third_party/boringssl/linux-aarch64/crypto/sha/sha256-armv8.S new file mode 100644 index 00000000000..4834553dc11 --- /dev/null +++ b/third_party/boringssl/linux-aarch64/crypto/sha/sha256-armv8.S @@ -0,0 +1,1145 @@ +#if defined(__aarch64__) +#include "arm_arch.h" + +.text + + +.globl sha256_block_data_order +.type sha256_block_data_order,%function +.align 6 +sha256_block_data_order: + ldr x16,.LOPENSSL_armcap_P + adr x17,.LOPENSSL_armcap_P + add x16,x16,x17 + ldr w16,[x16] + tst w16,#ARMV8_SHA256 + b.ne .Lv8_entry + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + sub sp,sp,#4*4 + + ldp w20,w21,[x0] // load context + ldp w22,w23,[x0,#2*4] + ldp w24,w25,[x0,#4*4] + add x2,x1,x2,lsl#6 // end of input + ldp w26,w27,[x0,#6*4] + adr x30,.LK256 + stp x0,x2,[x29,#96] + +.Loop: + ldp w3,w4,[x1],#2*4 + ldr w19,[x30],#4 // *K++ + eor w28,w21,w22 // magic seed + str x1,[x29,#112] +#ifndef __ARMEB__ + rev w3,w3 // 0 +#endif + ror w16,w24,#6 + add w27,w27,w19 // h+=K[i] + eor w6,w24,w24,ror#14 + and w17,w25,w24 + bic w19,w26,w24 + add w27,w27,w3 // h+=X[i] + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w20,w21 // a^b, b^c in next round + eor w16,w16,w6,ror#11 // Sigma1(e) + ror w6,w20,#2 + add w27,w27,w17 // h+=Ch(e,f,g) + eor w17,w20,w20,ror#9 + add w27,w27,w16 // h+=Sigma1(e) + and w28,w28,w19 // (b^c)&=(a^b) + add w23,w23,w27 // d+=h + eor w28,w28,w21 // Maj(a,b,c) + eor w17,w6,w17,ror#13 // Sigma0(a) + add w27,w27,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + //add w27,w27,w17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev w4,w4 // 1 +#endif + ldp w5,w6,[x1],#2*4 + add w27,w27,w17 // h+=Sigma0(a) + ror w16,w23,#6 + add w26,w26,w28 // h+=K[i] + eor w7,w23,w23,ror#14 + and w17,w24,w23 + bic w28,w25,w23 + add w26,w26,w4 // h+=X[i] + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w27,w20 // a^b, b^c in next round + eor w16,w16,w7,ror#11 // Sigma1(e) + ror w7,w27,#2 + add w26,w26,w17 // h+=Ch(e,f,g) + eor w17,w27,w27,ror#9 + add w26,w26,w16 // h+=Sigma1(e) + and w19,w19,w28 // (b^c)&=(a^b) + add w22,w22,w26 // d+=h + eor w19,w19,w20 // Maj(a,b,c) + eor w17,w7,w17,ror#13 // Sigma0(a) + add w26,w26,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + //add w26,w26,w17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev w5,w5 // 2 +#endif + add w26,w26,w17 // h+=Sigma0(a) + ror w16,w22,#6 + add w25,w25,w19 // h+=K[i] + eor w8,w22,w22,ror#14 + and w17,w23,w22 + bic w19,w24,w22 + add w25,w25,w5 // h+=X[i] + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w26,w27 // a^b, b^c in next round + eor w16,w16,w8,ror#11 // Sigma1(e) + ror w8,w26,#2 + add w25,w25,w17 // h+=Ch(e,f,g) + eor w17,w26,w26,ror#9 + add w25,w25,w16 // h+=Sigma1(e) + and w28,w28,w19 // (b^c)&=(a^b) + add w21,w21,w25 // d+=h + eor w28,w28,w27 // Maj(a,b,c) + eor w17,w8,w17,ror#13 // Sigma0(a) + add w25,w25,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + //add w25,w25,w17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev w6,w6 // 3 +#endif + ldp w7,w8,[x1],#2*4 + add w25,w25,w17 // h+=Sigma0(a) + ror w16,w21,#6 + add w24,w24,w28 // h+=K[i] + eor w9,w21,w21,ror#14 + and w17,w22,w21 + bic w28,w23,w21 + add w24,w24,w6 // h+=X[i] + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w25,w26 // a^b, b^c in next round + eor w16,w16,w9,ror#11 // Sigma1(e) + ror w9,w25,#2 + add w24,w24,w17 // h+=Ch(e,f,g) + eor w17,w25,w25,ror#9 + add w24,w24,w16 // h+=Sigma1(e) + and w19,w19,w28 // (b^c)&=(a^b) + add w20,w20,w24 // d+=h + eor w19,w19,w26 // Maj(a,b,c) + eor w17,w9,w17,ror#13 // Sigma0(a) + add w24,w24,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + //add w24,w24,w17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev w7,w7 // 4 +#endif + add w24,w24,w17 // h+=Sigma0(a) + ror w16,w20,#6 + add w23,w23,w19 // h+=K[i] + eor w10,w20,w20,ror#14 + and w17,w21,w20 + bic w19,w22,w20 + add w23,w23,w7 // h+=X[i] + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w24,w25 // a^b, b^c in next round + eor w16,w16,w10,ror#11 // Sigma1(e) + ror w10,w24,#2 + add w23,w23,w17 // h+=Ch(e,f,g) + eor w17,w24,w24,ror#9 + add w23,w23,w16 // h+=Sigma1(e) + and w28,w28,w19 // (b^c)&=(a^b) + add w27,w27,w23 // d+=h + eor w28,w28,w25 // Maj(a,b,c) + eor w17,w10,w17,ror#13 // Sigma0(a) + add w23,w23,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + //add w23,w23,w17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev w8,w8 // 5 +#endif + ldp w9,w10,[x1],#2*4 + add w23,w23,w17 // h+=Sigma0(a) + ror w16,w27,#6 + add w22,w22,w28 // h+=K[i] + eor w11,w27,w27,ror#14 + and w17,w20,w27 + bic w28,w21,w27 + add w22,w22,w8 // h+=X[i] + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w23,w24 // a^b, b^c in next round + eor w16,w16,w11,ror#11 // Sigma1(e) + ror w11,w23,#2 + add w22,w22,w17 // h+=Ch(e,f,g) + eor w17,w23,w23,ror#9 + add w22,w22,w16 // h+=Sigma1(e) + and w19,w19,w28 // (b^c)&=(a^b) + add w26,w26,w22 // d+=h + eor w19,w19,w24 // Maj(a,b,c) + eor w17,w11,w17,ror#13 // Sigma0(a) + add w22,w22,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + //add w22,w22,w17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev w9,w9 // 6 +#endif + add w22,w22,w17 // h+=Sigma0(a) + ror w16,w26,#6 + add w21,w21,w19 // h+=K[i] + eor w12,w26,w26,ror#14 + and w17,w27,w26 + bic w19,w20,w26 + add w21,w21,w9 // h+=X[i] + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w22,w23 // a^b, b^c in next round + eor w16,w16,w12,ror#11 // Sigma1(e) + ror w12,w22,#2 + add w21,w21,w17 // h+=Ch(e,f,g) + eor w17,w22,w22,ror#9 + add w21,w21,w16 // h+=Sigma1(e) + and w28,w28,w19 // (b^c)&=(a^b) + add w25,w25,w21 // d+=h + eor w28,w28,w23 // Maj(a,b,c) + eor w17,w12,w17,ror#13 // Sigma0(a) + add w21,w21,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + //add w21,w21,w17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev w10,w10 // 7 +#endif + ldp w11,w12,[x1],#2*4 + add w21,w21,w17 // h+=Sigma0(a) + ror w16,w25,#6 + add w20,w20,w28 // h+=K[i] + eor w13,w25,w25,ror#14 + and w17,w26,w25 + bic w28,w27,w25 + add w20,w20,w10 // h+=X[i] + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w21,w22 // a^b, b^c in next round + eor w16,w16,w13,ror#11 // Sigma1(e) + ror w13,w21,#2 + add w20,w20,w17 // h+=Ch(e,f,g) + eor w17,w21,w21,ror#9 + add w20,w20,w16 // h+=Sigma1(e) + and w19,w19,w28 // (b^c)&=(a^b) + add w24,w24,w20 // d+=h + eor w19,w19,w22 // Maj(a,b,c) + eor w17,w13,w17,ror#13 // Sigma0(a) + add w20,w20,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + //add w20,w20,w17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev w11,w11 // 8 +#endif + add w20,w20,w17 // h+=Sigma0(a) + ror w16,w24,#6 + add w27,w27,w19 // h+=K[i] + eor w14,w24,w24,ror#14 + and w17,w25,w24 + bic w19,w26,w24 + add w27,w27,w11 // h+=X[i] + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w20,w21 // a^b, b^c in next round + eor w16,w16,w14,ror#11 // Sigma1(e) + ror w14,w20,#2 + add w27,w27,w17 // h+=Ch(e,f,g) + eor w17,w20,w20,ror#9 + add w27,w27,w16 // h+=Sigma1(e) + and w28,w28,w19 // (b^c)&=(a^b) + add w23,w23,w27 // d+=h + eor w28,w28,w21 // Maj(a,b,c) + eor w17,w14,w17,ror#13 // Sigma0(a) + add w27,w27,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + //add w27,w27,w17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev w12,w12 // 9 +#endif + ldp w13,w14,[x1],#2*4 + add w27,w27,w17 // h+=Sigma0(a) + ror w16,w23,#6 + add w26,w26,w28 // h+=K[i] + eor w15,w23,w23,ror#14 + and w17,w24,w23 + bic w28,w25,w23 + add w26,w26,w12 // h+=X[i] + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w27,w20 // a^b, b^c in next round + eor w16,w16,w15,ror#11 // Sigma1(e) + ror w15,w27,#2 + add w26,w26,w17 // h+=Ch(e,f,g) + eor w17,w27,w27,ror#9 + add w26,w26,w16 // h+=Sigma1(e) + and w19,w19,w28 // (b^c)&=(a^b) + add w22,w22,w26 // d+=h + eor w19,w19,w20 // Maj(a,b,c) + eor w17,w15,w17,ror#13 // Sigma0(a) + add w26,w26,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + //add w26,w26,w17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev w13,w13 // 10 +#endif + add w26,w26,w17 // h+=Sigma0(a) + ror w16,w22,#6 + add w25,w25,w19 // h+=K[i] + eor w0,w22,w22,ror#14 + and w17,w23,w22 + bic w19,w24,w22 + add w25,w25,w13 // h+=X[i] + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w26,w27 // a^b, b^c in next round + eor w16,w16,w0,ror#11 // Sigma1(e) + ror w0,w26,#2 + add w25,w25,w17 // h+=Ch(e,f,g) + eor w17,w26,w26,ror#9 + add w25,w25,w16 // h+=Sigma1(e) + and w28,w28,w19 // (b^c)&=(a^b) + add w21,w21,w25 // d+=h + eor w28,w28,w27 // Maj(a,b,c) + eor w17,w0,w17,ror#13 // Sigma0(a) + add w25,w25,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + //add w25,w25,w17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev w14,w14 // 11 +#endif + ldp w15,w0,[x1],#2*4 + add w25,w25,w17 // h+=Sigma0(a) + str w6,[sp,#12] + ror w16,w21,#6 + add w24,w24,w28 // h+=K[i] + eor w6,w21,w21,ror#14 + and w17,w22,w21 + bic w28,w23,w21 + add w24,w24,w14 // h+=X[i] + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w25,w26 // a^b, b^c in next round + eor w16,w16,w6,ror#11 // Sigma1(e) + ror w6,w25,#2 + add w24,w24,w17 // h+=Ch(e,f,g) + eor w17,w25,w25,ror#9 + add w24,w24,w16 // h+=Sigma1(e) + and w19,w19,w28 // (b^c)&=(a^b) + add w20,w20,w24 // d+=h + eor w19,w19,w26 // Maj(a,b,c) + eor w17,w6,w17,ror#13 // Sigma0(a) + add w24,w24,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + //add w24,w24,w17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev w15,w15 // 12 +#endif + add w24,w24,w17 // h+=Sigma0(a) + str w7,[sp,#0] + ror w16,w20,#6 + add w23,w23,w19 // h+=K[i] + eor w7,w20,w20,ror#14 + and w17,w21,w20 + bic w19,w22,w20 + add w23,w23,w15 // h+=X[i] + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w24,w25 // a^b, b^c in next round + eor w16,w16,w7,ror#11 // Sigma1(e) + ror w7,w24,#2 + add w23,w23,w17 // h+=Ch(e,f,g) + eor w17,w24,w24,ror#9 + add w23,w23,w16 // h+=Sigma1(e) + and w28,w28,w19 // (b^c)&=(a^b) + add w27,w27,w23 // d+=h + eor w28,w28,w25 // Maj(a,b,c) + eor w17,w7,w17,ror#13 // Sigma0(a) + add w23,w23,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + //add w23,w23,w17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev w0,w0 // 13 +#endif + ldp w1,w2,[x1] + add w23,w23,w17 // h+=Sigma0(a) + str w8,[sp,#4] + ror w16,w27,#6 + add w22,w22,w28 // h+=K[i] + eor w8,w27,w27,ror#14 + and w17,w20,w27 + bic w28,w21,w27 + add w22,w22,w0 // h+=X[i] + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w23,w24 // a^b, b^c in next round + eor w16,w16,w8,ror#11 // Sigma1(e) + ror w8,w23,#2 + add w22,w22,w17 // h+=Ch(e,f,g) + eor w17,w23,w23,ror#9 + add w22,w22,w16 // h+=Sigma1(e) + and w19,w19,w28 // (b^c)&=(a^b) + add w26,w26,w22 // d+=h + eor w19,w19,w24 // Maj(a,b,c) + eor w17,w8,w17,ror#13 // Sigma0(a) + add w22,w22,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + //add w22,w22,w17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev w1,w1 // 14 +#endif + ldr w6,[sp,#12] + add w22,w22,w17 // h+=Sigma0(a) + str w9,[sp,#8] + ror w16,w26,#6 + add w21,w21,w19 // h+=K[i] + eor w9,w26,w26,ror#14 + and w17,w27,w26 + bic w19,w20,w26 + add w21,w21,w1 // h+=X[i] + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w22,w23 // a^b, b^c in next round + eor w16,w16,w9,ror#11 // Sigma1(e) + ror w9,w22,#2 + add w21,w21,w17 // h+=Ch(e,f,g) + eor w17,w22,w22,ror#9 + add w21,w21,w16 // h+=Sigma1(e) + and w28,w28,w19 // (b^c)&=(a^b) + add w25,w25,w21 // d+=h + eor w28,w28,w23 // Maj(a,b,c) + eor w17,w9,w17,ror#13 // Sigma0(a) + add w21,w21,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + //add w21,w21,w17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev w2,w2 // 15 +#endif + ldr w7,[sp,#0] + add w21,w21,w17 // h+=Sigma0(a) + str w10,[sp,#12] + ror w16,w25,#6 + add w20,w20,w28 // h+=K[i] + ror w9,w4,#7 + and w17,w26,w25 + ror w8,w1,#17 + bic w28,w27,w25 + ror w10,w21,#2 + add w20,w20,w2 // h+=X[i] + eor w16,w16,w25,ror#11 + eor w9,w9,w4,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w21,w22 // a^b, b^c in next round + eor w16,w16,w25,ror#25 // Sigma1(e) + eor w10,w10,w21,ror#13 + add w20,w20,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w8,w8,w1,ror#19 + eor w9,w9,w4,lsr#3 // sigma0(X[i+1]) + add w20,w20,w16 // h+=Sigma1(e) + eor w19,w19,w22 // Maj(a,b,c) + eor w17,w10,w21,ror#22 // Sigma0(a) + eor w8,w8,w1,lsr#10 // sigma1(X[i+14]) + add w3,w3,w12 + add w24,w24,w20 // d+=h + add w20,w20,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w3,w3,w9 + add w20,w20,w17 // h+=Sigma0(a) + add w3,w3,w8 +.Loop_16_xx: + ldr w8,[sp,#4] + str w11,[sp,#0] + ror w16,w24,#6 + add w27,w27,w19 // h+=K[i] + ror w10,w5,#7 + and w17,w25,w24 + ror w9,w2,#17 + bic w19,w26,w24 + ror w11,w20,#2 + add w27,w27,w3 // h+=X[i] + eor w16,w16,w24,ror#11 + eor w10,w10,w5,ror#18 + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w20,w21 // a^b, b^c in next round + eor w16,w16,w24,ror#25 // Sigma1(e) + eor w11,w11,w20,ror#13 + add w27,w27,w17 // h+=Ch(e,f,g) + and w28,w28,w19 // (b^c)&=(a^b) + eor w9,w9,w2,ror#19 + eor w10,w10,w5,lsr#3 // sigma0(X[i+1]) + add w27,w27,w16 // h+=Sigma1(e) + eor w28,w28,w21 // Maj(a,b,c) + eor w17,w11,w20,ror#22 // Sigma0(a) + eor w9,w9,w2,lsr#10 // sigma1(X[i+14]) + add w4,w4,w13 + add w23,w23,w27 // d+=h + add w27,w27,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + add w4,w4,w10 + add w27,w27,w17 // h+=Sigma0(a) + add w4,w4,w9 + ldr w9,[sp,#8] + str w12,[sp,#4] + ror w16,w23,#6 + add w26,w26,w28 // h+=K[i] + ror w11,w6,#7 + and w17,w24,w23 + ror w10,w3,#17 + bic w28,w25,w23 + ror w12,w27,#2 + add w26,w26,w4 // h+=X[i] + eor w16,w16,w23,ror#11 + eor w11,w11,w6,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w27,w20 // a^b, b^c in next round + eor w16,w16,w23,ror#25 // Sigma1(e) + eor w12,w12,w27,ror#13 + add w26,w26,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w10,w10,w3,ror#19 + eor w11,w11,w6,lsr#3 // sigma0(X[i+1]) + add w26,w26,w16 // h+=Sigma1(e) + eor w19,w19,w20 // Maj(a,b,c) + eor w17,w12,w27,ror#22 // Sigma0(a) + eor w10,w10,w3,lsr#10 // sigma1(X[i+14]) + add w5,w5,w14 + add w22,w22,w26 // d+=h + add w26,w26,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w5,w5,w11 + add w26,w26,w17 // h+=Sigma0(a) + add w5,w5,w10 + ldr w10,[sp,#12] + str w13,[sp,#8] + ror w16,w22,#6 + add w25,w25,w19 // h+=K[i] + ror w12,w7,#7 + and w17,w23,w22 + ror w11,w4,#17 + bic w19,w24,w22 + ror w13,w26,#2 + add w25,w25,w5 // h+=X[i] + eor w16,w16,w22,ror#11 + eor w12,w12,w7,ror#18 + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w26,w27 // a^b, b^c in next round + eor w16,w16,w22,ror#25 // Sigma1(e) + eor w13,w13,w26,ror#13 + add w25,w25,w17 // h+=Ch(e,f,g) + and w28,w28,w19 // (b^c)&=(a^b) + eor w11,w11,w4,ror#19 + eor w12,w12,w7,lsr#3 // sigma0(X[i+1]) + add w25,w25,w16 // h+=Sigma1(e) + eor w28,w28,w27 // Maj(a,b,c) + eor w17,w13,w26,ror#22 // Sigma0(a) + eor w11,w11,w4,lsr#10 // sigma1(X[i+14]) + add w6,w6,w15 + add w21,w21,w25 // d+=h + add w25,w25,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + add w6,w6,w12 + add w25,w25,w17 // h+=Sigma0(a) + add w6,w6,w11 + ldr w11,[sp,#0] + str w14,[sp,#12] + ror w16,w21,#6 + add w24,w24,w28 // h+=K[i] + ror w13,w8,#7 + and w17,w22,w21 + ror w12,w5,#17 + bic w28,w23,w21 + ror w14,w25,#2 + add w24,w24,w6 // h+=X[i] + eor w16,w16,w21,ror#11 + eor w13,w13,w8,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w25,w26 // a^b, b^c in next round + eor w16,w16,w21,ror#25 // Sigma1(e) + eor w14,w14,w25,ror#13 + add w24,w24,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w12,w12,w5,ror#19 + eor w13,w13,w8,lsr#3 // sigma0(X[i+1]) + add w24,w24,w16 // h+=Sigma1(e) + eor w19,w19,w26 // Maj(a,b,c) + eor w17,w14,w25,ror#22 // Sigma0(a) + eor w12,w12,w5,lsr#10 // sigma1(X[i+14]) + add w7,w7,w0 + add w20,w20,w24 // d+=h + add w24,w24,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w7,w7,w13 + add w24,w24,w17 // h+=Sigma0(a) + add w7,w7,w12 + ldr w12,[sp,#4] + str w15,[sp,#0] + ror w16,w20,#6 + add w23,w23,w19 // h+=K[i] + ror w14,w9,#7 + and w17,w21,w20 + ror w13,w6,#17 + bic w19,w22,w20 + ror w15,w24,#2 + add w23,w23,w7 // h+=X[i] + eor w16,w16,w20,ror#11 + eor w14,w14,w9,ror#18 + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w24,w25 // a^b, b^c in next round + eor w16,w16,w20,ror#25 // Sigma1(e) + eor w15,w15,w24,ror#13 + add w23,w23,w17 // h+=Ch(e,f,g) + and w28,w28,w19 // (b^c)&=(a^b) + eor w13,w13,w6,ror#19 + eor w14,w14,w9,lsr#3 // sigma0(X[i+1]) + add w23,w23,w16 // h+=Sigma1(e) + eor w28,w28,w25 // Maj(a,b,c) + eor w17,w15,w24,ror#22 // Sigma0(a) + eor w13,w13,w6,lsr#10 // sigma1(X[i+14]) + add w8,w8,w1 + add w27,w27,w23 // d+=h + add w23,w23,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + add w8,w8,w14 + add w23,w23,w17 // h+=Sigma0(a) + add w8,w8,w13 + ldr w13,[sp,#8] + str w0,[sp,#4] + ror w16,w27,#6 + add w22,w22,w28 // h+=K[i] + ror w15,w10,#7 + and w17,w20,w27 + ror w14,w7,#17 + bic w28,w21,w27 + ror w0,w23,#2 + add w22,w22,w8 // h+=X[i] + eor w16,w16,w27,ror#11 + eor w15,w15,w10,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w23,w24 // a^b, b^c in next round + eor w16,w16,w27,ror#25 // Sigma1(e) + eor w0,w0,w23,ror#13 + add w22,w22,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w14,w14,w7,ror#19 + eor w15,w15,w10,lsr#3 // sigma0(X[i+1]) + add w22,w22,w16 // h+=Sigma1(e) + eor w19,w19,w24 // Maj(a,b,c) + eor w17,w0,w23,ror#22 // Sigma0(a) + eor w14,w14,w7,lsr#10 // sigma1(X[i+14]) + add w9,w9,w2 + add w26,w26,w22 // d+=h + add w22,w22,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w9,w9,w15 + add w22,w22,w17 // h+=Sigma0(a) + add w9,w9,w14 + ldr w14,[sp,#12] + str w1,[sp,#8] + ror w16,w26,#6 + add w21,w21,w19 // h+=K[i] + ror w0,w11,#7 + and w17,w27,w26 + ror w15,w8,#17 + bic w19,w20,w26 + ror w1,w22,#2 + add w21,w21,w9 // h+=X[i] + eor w16,w16,w26,ror#11 + eor w0,w0,w11,ror#18 + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w22,w23 // a^b, b^c in next round + eor w16,w16,w26,ror#25 // Sigma1(e) + eor w1,w1,w22,ror#13 + add w21,w21,w17 // h+=Ch(e,f,g) + and w28,w28,w19 // (b^c)&=(a^b) + eor w15,w15,w8,ror#19 + eor w0,w0,w11,lsr#3 // sigma0(X[i+1]) + add w21,w21,w16 // h+=Sigma1(e) + eor w28,w28,w23 // Maj(a,b,c) + eor w17,w1,w22,ror#22 // Sigma0(a) + eor w15,w15,w8,lsr#10 // sigma1(X[i+14]) + add w10,w10,w3 + add w25,w25,w21 // d+=h + add w21,w21,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + add w10,w10,w0 + add w21,w21,w17 // h+=Sigma0(a) + add w10,w10,w15 + ldr w15,[sp,#0] + str w2,[sp,#12] + ror w16,w25,#6 + add w20,w20,w28 // h+=K[i] + ror w1,w12,#7 + and w17,w26,w25 + ror w0,w9,#17 + bic w28,w27,w25 + ror w2,w21,#2 + add w20,w20,w10 // h+=X[i] + eor w16,w16,w25,ror#11 + eor w1,w1,w12,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w21,w22 // a^b, b^c in next round + eor w16,w16,w25,ror#25 // Sigma1(e) + eor w2,w2,w21,ror#13 + add w20,w20,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w0,w0,w9,ror#19 + eor w1,w1,w12,lsr#3 // sigma0(X[i+1]) + add w20,w20,w16 // h+=Sigma1(e) + eor w19,w19,w22 // Maj(a,b,c) + eor w17,w2,w21,ror#22 // Sigma0(a) + eor w0,w0,w9,lsr#10 // sigma1(X[i+14]) + add w11,w11,w4 + add w24,w24,w20 // d+=h + add w20,w20,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w11,w11,w1 + add w20,w20,w17 // h+=Sigma0(a) + add w11,w11,w0 + ldr w0,[sp,#4] + str w3,[sp,#0] + ror w16,w24,#6 + add w27,w27,w19 // h+=K[i] + ror w2,w13,#7 + and w17,w25,w24 + ror w1,w10,#17 + bic w19,w26,w24 + ror w3,w20,#2 + add w27,w27,w11 // h+=X[i] + eor w16,w16,w24,ror#11 + eor w2,w2,w13,ror#18 + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w20,w21 // a^b, b^c in next round + eor w16,w16,w24,ror#25 // Sigma1(e) + eor w3,w3,w20,ror#13 + add w27,w27,w17 // h+=Ch(e,f,g) + and w28,w28,w19 // (b^c)&=(a^b) + eor w1,w1,w10,ror#19 + eor w2,w2,w13,lsr#3 // sigma0(X[i+1]) + add w27,w27,w16 // h+=Sigma1(e) + eor w28,w28,w21 // Maj(a,b,c) + eor w17,w3,w20,ror#22 // Sigma0(a) + eor w1,w1,w10,lsr#10 // sigma1(X[i+14]) + add w12,w12,w5 + add w23,w23,w27 // d+=h + add w27,w27,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + add w12,w12,w2 + add w27,w27,w17 // h+=Sigma0(a) + add w12,w12,w1 + ldr w1,[sp,#8] + str w4,[sp,#4] + ror w16,w23,#6 + add w26,w26,w28 // h+=K[i] + ror w3,w14,#7 + and w17,w24,w23 + ror w2,w11,#17 + bic w28,w25,w23 + ror w4,w27,#2 + add w26,w26,w12 // h+=X[i] + eor w16,w16,w23,ror#11 + eor w3,w3,w14,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w27,w20 // a^b, b^c in next round + eor w16,w16,w23,ror#25 // Sigma1(e) + eor w4,w4,w27,ror#13 + add w26,w26,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w2,w2,w11,ror#19 + eor w3,w3,w14,lsr#3 // sigma0(X[i+1]) + add w26,w26,w16 // h+=Sigma1(e) + eor w19,w19,w20 // Maj(a,b,c) + eor w17,w4,w27,ror#22 // Sigma0(a) + eor w2,w2,w11,lsr#10 // sigma1(X[i+14]) + add w13,w13,w6 + add w22,w22,w26 // d+=h + add w26,w26,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w13,w13,w3 + add w26,w26,w17 // h+=Sigma0(a) + add w13,w13,w2 + ldr w2,[sp,#12] + str w5,[sp,#8] + ror w16,w22,#6 + add w25,w25,w19 // h+=K[i] + ror w4,w15,#7 + and w17,w23,w22 + ror w3,w12,#17 + bic w19,w24,w22 + ror w5,w26,#2 + add w25,w25,w13 // h+=X[i] + eor w16,w16,w22,ror#11 + eor w4,w4,w15,ror#18 + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w26,w27 // a^b, b^c in next round + eor w16,w16,w22,ror#25 // Sigma1(e) + eor w5,w5,w26,ror#13 + add w25,w25,w17 // h+=Ch(e,f,g) + and w28,w28,w19 // (b^c)&=(a^b) + eor w3,w3,w12,ror#19 + eor w4,w4,w15,lsr#3 // sigma0(X[i+1]) + add w25,w25,w16 // h+=Sigma1(e) + eor w28,w28,w27 // Maj(a,b,c) + eor w17,w5,w26,ror#22 // Sigma0(a) + eor w3,w3,w12,lsr#10 // sigma1(X[i+14]) + add w14,w14,w7 + add w21,w21,w25 // d+=h + add w25,w25,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + add w14,w14,w4 + add w25,w25,w17 // h+=Sigma0(a) + add w14,w14,w3 + ldr w3,[sp,#0] + str w6,[sp,#12] + ror w16,w21,#6 + add w24,w24,w28 // h+=K[i] + ror w5,w0,#7 + and w17,w22,w21 + ror w4,w13,#17 + bic w28,w23,w21 + ror w6,w25,#2 + add w24,w24,w14 // h+=X[i] + eor w16,w16,w21,ror#11 + eor w5,w5,w0,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w25,w26 // a^b, b^c in next round + eor w16,w16,w21,ror#25 // Sigma1(e) + eor w6,w6,w25,ror#13 + add w24,w24,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w4,w4,w13,ror#19 + eor w5,w5,w0,lsr#3 // sigma0(X[i+1]) + add w24,w24,w16 // h+=Sigma1(e) + eor w19,w19,w26 // Maj(a,b,c) + eor w17,w6,w25,ror#22 // Sigma0(a) + eor w4,w4,w13,lsr#10 // sigma1(X[i+14]) + add w15,w15,w8 + add w20,w20,w24 // d+=h + add w24,w24,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w15,w15,w5 + add w24,w24,w17 // h+=Sigma0(a) + add w15,w15,w4 + ldr w4,[sp,#4] + str w7,[sp,#0] + ror w16,w20,#6 + add w23,w23,w19 // h+=K[i] + ror w6,w1,#7 + and w17,w21,w20 + ror w5,w14,#17 + bic w19,w22,w20 + ror w7,w24,#2 + add w23,w23,w15 // h+=X[i] + eor w16,w16,w20,ror#11 + eor w6,w6,w1,ror#18 + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w24,w25 // a^b, b^c in next round + eor w16,w16,w20,ror#25 // Sigma1(e) + eor w7,w7,w24,ror#13 + add w23,w23,w17 // h+=Ch(e,f,g) + and w28,w28,w19 // (b^c)&=(a^b) + eor w5,w5,w14,ror#19 + eor w6,w6,w1,lsr#3 // sigma0(X[i+1]) + add w23,w23,w16 // h+=Sigma1(e) + eor w28,w28,w25 // Maj(a,b,c) + eor w17,w7,w24,ror#22 // Sigma0(a) + eor w5,w5,w14,lsr#10 // sigma1(X[i+14]) + add w0,w0,w9 + add w27,w27,w23 // d+=h + add w23,w23,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + add w0,w0,w6 + add w23,w23,w17 // h+=Sigma0(a) + add w0,w0,w5 + ldr w5,[sp,#8] + str w8,[sp,#4] + ror w16,w27,#6 + add w22,w22,w28 // h+=K[i] + ror w7,w2,#7 + and w17,w20,w27 + ror w6,w15,#17 + bic w28,w21,w27 + ror w8,w23,#2 + add w22,w22,w0 // h+=X[i] + eor w16,w16,w27,ror#11 + eor w7,w7,w2,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w23,w24 // a^b, b^c in next round + eor w16,w16,w27,ror#25 // Sigma1(e) + eor w8,w8,w23,ror#13 + add w22,w22,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w6,w6,w15,ror#19 + eor w7,w7,w2,lsr#3 // sigma0(X[i+1]) + add w22,w22,w16 // h+=Sigma1(e) + eor w19,w19,w24 // Maj(a,b,c) + eor w17,w8,w23,ror#22 // Sigma0(a) + eor w6,w6,w15,lsr#10 // sigma1(X[i+14]) + add w1,w1,w10 + add w26,w26,w22 // d+=h + add w22,w22,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w1,w1,w7 + add w22,w22,w17 // h+=Sigma0(a) + add w1,w1,w6 + ldr w6,[sp,#12] + str w9,[sp,#8] + ror w16,w26,#6 + add w21,w21,w19 // h+=K[i] + ror w8,w3,#7 + and w17,w27,w26 + ror w7,w0,#17 + bic w19,w20,w26 + ror w9,w22,#2 + add w21,w21,w1 // h+=X[i] + eor w16,w16,w26,ror#11 + eor w8,w8,w3,ror#18 + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w22,w23 // a^b, b^c in next round + eor w16,w16,w26,ror#25 // Sigma1(e) + eor w9,w9,w22,ror#13 + add w21,w21,w17 // h+=Ch(e,f,g) + and w28,w28,w19 // (b^c)&=(a^b) + eor w7,w7,w0,ror#19 + eor w8,w8,w3,lsr#3 // sigma0(X[i+1]) + add w21,w21,w16 // h+=Sigma1(e) + eor w28,w28,w23 // Maj(a,b,c) + eor w17,w9,w22,ror#22 // Sigma0(a) + eor w7,w7,w0,lsr#10 // sigma1(X[i+14]) + add w2,w2,w11 + add w25,w25,w21 // d+=h + add w21,w21,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + add w2,w2,w8 + add w21,w21,w17 // h+=Sigma0(a) + add w2,w2,w7 + ldr w7,[sp,#0] + str w10,[sp,#12] + ror w16,w25,#6 + add w20,w20,w28 // h+=K[i] + ror w9,w4,#7 + and w17,w26,w25 + ror w8,w1,#17 + bic w28,w27,w25 + ror w10,w21,#2 + add w20,w20,w2 // h+=X[i] + eor w16,w16,w25,ror#11 + eor w9,w9,w4,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w21,w22 // a^b, b^c in next round + eor w16,w16,w25,ror#25 // Sigma1(e) + eor w10,w10,w21,ror#13 + add w20,w20,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w8,w8,w1,ror#19 + eor w9,w9,w4,lsr#3 // sigma0(X[i+1]) + add w20,w20,w16 // h+=Sigma1(e) + eor w19,w19,w22 // Maj(a,b,c) + eor w17,w10,w21,ror#22 // Sigma0(a) + eor w8,w8,w1,lsr#10 // sigma1(X[i+14]) + add w3,w3,w12 + add w24,w24,w20 // d+=h + add w20,w20,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w3,w3,w9 + add w20,w20,w17 // h+=Sigma0(a) + add w3,w3,w8 + cbnz w19,.Loop_16_xx + + ldp x0,x2,[x29,#96] + ldr x1,[x29,#112] + sub x30,x30,#260 // rewind + + ldp w3,w4,[x0] + ldp w5,w6,[x0,#2*4] + add x1,x1,#14*4 // advance input pointer + ldp w7,w8,[x0,#4*4] + add w20,w20,w3 + ldp w9,w10,[x0,#6*4] + add w21,w21,w4 + add w22,w22,w5 + add w23,w23,w6 + stp w20,w21,[x0] + add w24,w24,w7 + add w25,w25,w8 + stp w22,w23,[x0,#2*4] + add w26,w26,w9 + add w27,w27,w10 + cmp x1,x2 + stp w24,w25,[x0,#4*4] + stp w26,w27,[x0,#6*4] + b.ne .Loop + + ldp x19,x20,[x29,#16] + add sp,sp,#4*4 + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldp x29,x30,[sp],#128 + ret +.size sha256_block_data_order,.-sha256_block_data_order + +.align 6 +.type .LK256,%object +.LK256: +.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc +.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 +.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +.long 0 //terminator +.size .LK256,.-.LK256 +.align 3 +.LOPENSSL_armcap_P: +.quad OPENSSL_armcap_P-. +.byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 2 +.align 2 +.type sha256_block_armv8,%function +.align 6 +sha256_block_armv8: +.Lv8_entry: + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + + ld1 {v0.4s,v1.4s},[x0] + adr x3,.LK256 + +.Loop_hw: + ld1 {v4.16b,v5.16b,v6.16b,v7.16b},[x1],#64 + sub x2,x2,#1 + ld1 {v16.4s},[x3],#16 + rev32 v4.16b,v4.16b + rev32 v5.16b,v5.16b + rev32 v6.16b,v6.16b + rev32 v7.16b,v7.16b + orr v18.16b,v0.16b,v0.16b // offload + orr v19.16b,v1.16b,v1.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v4.4s +.inst 0x5e2828a4 //sha256su0 v4.16b,v5.16b + orr v2.16b,v0.16b,v0.16b +.inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s +.inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s +.inst 0x5e0760c4 //sha256su1 v4.16b,v6.16b,v7.16b + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v5.4s +.inst 0x5e2828c5 //sha256su0 v5.16b,v6.16b + orr v2.16b,v0.16b,v0.16b +.inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s +.inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s +.inst 0x5e0460e5 //sha256su1 v5.16b,v7.16b,v4.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v6.4s +.inst 0x5e2828e6 //sha256su0 v6.16b,v7.16b + orr v2.16b,v0.16b,v0.16b +.inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s +.inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s +.inst 0x5e056086 //sha256su1 v6.16b,v4.16b,v5.16b + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v7.4s +.inst 0x5e282887 //sha256su0 v7.16b,v4.16b + orr v2.16b,v0.16b,v0.16b +.inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s +.inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s +.inst 0x5e0660a7 //sha256su1 v7.16b,v5.16b,v6.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v4.4s +.inst 0x5e2828a4 //sha256su0 v4.16b,v5.16b + orr v2.16b,v0.16b,v0.16b +.inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s +.inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s +.inst 0x5e0760c4 //sha256su1 v4.16b,v6.16b,v7.16b + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v5.4s +.inst 0x5e2828c5 //sha256su0 v5.16b,v6.16b + orr v2.16b,v0.16b,v0.16b +.inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s +.inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s +.inst 0x5e0460e5 //sha256su1 v5.16b,v7.16b,v4.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v6.4s +.inst 0x5e2828e6 //sha256su0 v6.16b,v7.16b + orr v2.16b,v0.16b,v0.16b +.inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s +.inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s +.inst 0x5e056086 //sha256su1 v6.16b,v4.16b,v5.16b + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v7.4s +.inst 0x5e282887 //sha256su0 v7.16b,v4.16b + orr v2.16b,v0.16b,v0.16b +.inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s +.inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s +.inst 0x5e0660a7 //sha256su1 v7.16b,v5.16b,v6.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v4.4s +.inst 0x5e2828a4 //sha256su0 v4.16b,v5.16b + orr v2.16b,v0.16b,v0.16b +.inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s +.inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s +.inst 0x5e0760c4 //sha256su1 v4.16b,v6.16b,v7.16b + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v5.4s +.inst 0x5e2828c5 //sha256su0 v5.16b,v6.16b + orr v2.16b,v0.16b,v0.16b +.inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s +.inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s +.inst 0x5e0460e5 //sha256su1 v5.16b,v7.16b,v4.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v6.4s +.inst 0x5e2828e6 //sha256su0 v6.16b,v7.16b + orr v2.16b,v0.16b,v0.16b +.inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s +.inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s +.inst 0x5e056086 //sha256su1 v6.16b,v4.16b,v5.16b + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v7.4s +.inst 0x5e282887 //sha256su0 v7.16b,v4.16b + orr v2.16b,v0.16b,v0.16b +.inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s +.inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s +.inst 0x5e0660a7 //sha256su1 v7.16b,v5.16b,v6.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v4.4s + orr v2.16b,v0.16b,v0.16b +.inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s +.inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s + + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v5.4s + orr v2.16b,v0.16b,v0.16b +.inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s +.inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s + + ld1 {v17.4s},[x3] + add v16.4s,v16.4s,v6.4s + sub x3,x3,#64*4-16 // rewind + orr v2.16b,v0.16b,v0.16b +.inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s +.inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s + + add v17.4s,v17.4s,v7.4s + orr v2.16b,v0.16b,v0.16b +.inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s +.inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s + + add v0.4s,v0.4s,v18.4s + add v1.4s,v1.4s,v19.4s + + cbnz x2,.Loop_hw + + st1 {v0.4s,v1.4s},[x0] + + ldr x29,[sp],#16 + ret +.size sha256_block_armv8,.-sha256_block_armv8 +.comm OPENSSL_armcap_P,4,4 +#endif \ No newline at end of file diff --git a/third_party/boringssl/linux-aarch64/crypto/sha/sha512-armv8.S b/third_party/boringssl/linux-aarch64/crypto/sha/sha512-armv8.S new file mode 100644 index 00000000000..654c473f276 --- /dev/null +++ b/third_party/boringssl/linux-aarch64/crypto/sha/sha512-armv8.S @@ -0,0 +1,1025 @@ +#if defined(__aarch64__) +#include "arm_arch.h" + +.text + + +.globl sha512_block_data_order +.type sha512_block_data_order,%function +.align 6 +sha512_block_data_order: + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + sub sp,sp,#4*8 + + ldp x20,x21,[x0] // load context + ldp x22,x23,[x0,#2*8] + ldp x24,x25,[x0,#4*8] + add x2,x1,x2,lsl#7 // end of input + ldp x26,x27,[x0,#6*8] + adr x30,.LK512 + stp x0,x2,[x29,#96] + +.Loop: + ldp x3,x4,[x1],#2*8 + ldr x19,[x30],#8 // *K++ + eor x28,x21,x22 // magic seed + str x1,[x29,#112] +#ifndef __ARMEB__ + rev x3,x3 // 0 +#endif + ror x16,x24,#14 + add x27,x27,x19 // h+=K[i] + eor x6,x24,x24,ror#23 + and x17,x25,x24 + bic x19,x26,x24 + add x27,x27,x3 // h+=X[i] + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x20,x21 // a^b, b^c in next round + eor x16,x16,x6,ror#18 // Sigma1(e) + ror x6,x20,#28 + add x27,x27,x17 // h+=Ch(e,f,g) + eor x17,x20,x20,ror#5 + add x27,x27,x16 // h+=Sigma1(e) + and x28,x28,x19 // (b^c)&=(a^b) + add x23,x23,x27 // d+=h + eor x28,x28,x21 // Maj(a,b,c) + eor x17,x6,x17,ror#34 // Sigma0(a) + add x27,x27,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + //add x27,x27,x17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev x4,x4 // 1 +#endif + ldp x5,x6,[x1],#2*8 + add x27,x27,x17 // h+=Sigma0(a) + ror x16,x23,#14 + add x26,x26,x28 // h+=K[i] + eor x7,x23,x23,ror#23 + and x17,x24,x23 + bic x28,x25,x23 + add x26,x26,x4 // h+=X[i] + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x27,x20 // a^b, b^c in next round + eor x16,x16,x7,ror#18 // Sigma1(e) + ror x7,x27,#28 + add x26,x26,x17 // h+=Ch(e,f,g) + eor x17,x27,x27,ror#5 + add x26,x26,x16 // h+=Sigma1(e) + and x19,x19,x28 // (b^c)&=(a^b) + add x22,x22,x26 // d+=h + eor x19,x19,x20 // Maj(a,b,c) + eor x17,x7,x17,ror#34 // Sigma0(a) + add x26,x26,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + //add x26,x26,x17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev x5,x5 // 2 +#endif + add x26,x26,x17 // h+=Sigma0(a) + ror x16,x22,#14 + add x25,x25,x19 // h+=K[i] + eor x8,x22,x22,ror#23 + and x17,x23,x22 + bic x19,x24,x22 + add x25,x25,x5 // h+=X[i] + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x26,x27 // a^b, b^c in next round + eor x16,x16,x8,ror#18 // Sigma1(e) + ror x8,x26,#28 + add x25,x25,x17 // h+=Ch(e,f,g) + eor x17,x26,x26,ror#5 + add x25,x25,x16 // h+=Sigma1(e) + and x28,x28,x19 // (b^c)&=(a^b) + add x21,x21,x25 // d+=h + eor x28,x28,x27 // Maj(a,b,c) + eor x17,x8,x17,ror#34 // Sigma0(a) + add x25,x25,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + //add x25,x25,x17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev x6,x6 // 3 +#endif + ldp x7,x8,[x1],#2*8 + add x25,x25,x17 // h+=Sigma0(a) + ror x16,x21,#14 + add x24,x24,x28 // h+=K[i] + eor x9,x21,x21,ror#23 + and x17,x22,x21 + bic x28,x23,x21 + add x24,x24,x6 // h+=X[i] + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x25,x26 // a^b, b^c in next round + eor x16,x16,x9,ror#18 // Sigma1(e) + ror x9,x25,#28 + add x24,x24,x17 // h+=Ch(e,f,g) + eor x17,x25,x25,ror#5 + add x24,x24,x16 // h+=Sigma1(e) + and x19,x19,x28 // (b^c)&=(a^b) + add x20,x20,x24 // d+=h + eor x19,x19,x26 // Maj(a,b,c) + eor x17,x9,x17,ror#34 // Sigma0(a) + add x24,x24,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + //add x24,x24,x17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev x7,x7 // 4 +#endif + add x24,x24,x17 // h+=Sigma0(a) + ror x16,x20,#14 + add x23,x23,x19 // h+=K[i] + eor x10,x20,x20,ror#23 + and x17,x21,x20 + bic x19,x22,x20 + add x23,x23,x7 // h+=X[i] + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x24,x25 // a^b, b^c in next round + eor x16,x16,x10,ror#18 // Sigma1(e) + ror x10,x24,#28 + add x23,x23,x17 // h+=Ch(e,f,g) + eor x17,x24,x24,ror#5 + add x23,x23,x16 // h+=Sigma1(e) + and x28,x28,x19 // (b^c)&=(a^b) + add x27,x27,x23 // d+=h + eor x28,x28,x25 // Maj(a,b,c) + eor x17,x10,x17,ror#34 // Sigma0(a) + add x23,x23,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + //add x23,x23,x17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev x8,x8 // 5 +#endif + ldp x9,x10,[x1],#2*8 + add x23,x23,x17 // h+=Sigma0(a) + ror x16,x27,#14 + add x22,x22,x28 // h+=K[i] + eor x11,x27,x27,ror#23 + and x17,x20,x27 + bic x28,x21,x27 + add x22,x22,x8 // h+=X[i] + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x23,x24 // a^b, b^c in next round + eor x16,x16,x11,ror#18 // Sigma1(e) + ror x11,x23,#28 + add x22,x22,x17 // h+=Ch(e,f,g) + eor x17,x23,x23,ror#5 + add x22,x22,x16 // h+=Sigma1(e) + and x19,x19,x28 // (b^c)&=(a^b) + add x26,x26,x22 // d+=h + eor x19,x19,x24 // Maj(a,b,c) + eor x17,x11,x17,ror#34 // Sigma0(a) + add x22,x22,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + //add x22,x22,x17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev x9,x9 // 6 +#endif + add x22,x22,x17 // h+=Sigma0(a) + ror x16,x26,#14 + add x21,x21,x19 // h+=K[i] + eor x12,x26,x26,ror#23 + and x17,x27,x26 + bic x19,x20,x26 + add x21,x21,x9 // h+=X[i] + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x22,x23 // a^b, b^c in next round + eor x16,x16,x12,ror#18 // Sigma1(e) + ror x12,x22,#28 + add x21,x21,x17 // h+=Ch(e,f,g) + eor x17,x22,x22,ror#5 + add x21,x21,x16 // h+=Sigma1(e) + and x28,x28,x19 // (b^c)&=(a^b) + add x25,x25,x21 // d+=h + eor x28,x28,x23 // Maj(a,b,c) + eor x17,x12,x17,ror#34 // Sigma0(a) + add x21,x21,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + //add x21,x21,x17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev x10,x10 // 7 +#endif + ldp x11,x12,[x1],#2*8 + add x21,x21,x17 // h+=Sigma0(a) + ror x16,x25,#14 + add x20,x20,x28 // h+=K[i] + eor x13,x25,x25,ror#23 + and x17,x26,x25 + bic x28,x27,x25 + add x20,x20,x10 // h+=X[i] + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x21,x22 // a^b, b^c in next round + eor x16,x16,x13,ror#18 // Sigma1(e) + ror x13,x21,#28 + add x20,x20,x17 // h+=Ch(e,f,g) + eor x17,x21,x21,ror#5 + add x20,x20,x16 // h+=Sigma1(e) + and x19,x19,x28 // (b^c)&=(a^b) + add x24,x24,x20 // d+=h + eor x19,x19,x22 // Maj(a,b,c) + eor x17,x13,x17,ror#34 // Sigma0(a) + add x20,x20,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + //add x20,x20,x17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev x11,x11 // 8 +#endif + add x20,x20,x17 // h+=Sigma0(a) + ror x16,x24,#14 + add x27,x27,x19 // h+=K[i] + eor x14,x24,x24,ror#23 + and x17,x25,x24 + bic x19,x26,x24 + add x27,x27,x11 // h+=X[i] + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x20,x21 // a^b, b^c in next round + eor x16,x16,x14,ror#18 // Sigma1(e) + ror x14,x20,#28 + add x27,x27,x17 // h+=Ch(e,f,g) + eor x17,x20,x20,ror#5 + add x27,x27,x16 // h+=Sigma1(e) + and x28,x28,x19 // (b^c)&=(a^b) + add x23,x23,x27 // d+=h + eor x28,x28,x21 // Maj(a,b,c) + eor x17,x14,x17,ror#34 // Sigma0(a) + add x27,x27,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + //add x27,x27,x17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev x12,x12 // 9 +#endif + ldp x13,x14,[x1],#2*8 + add x27,x27,x17 // h+=Sigma0(a) + ror x16,x23,#14 + add x26,x26,x28 // h+=K[i] + eor x15,x23,x23,ror#23 + and x17,x24,x23 + bic x28,x25,x23 + add x26,x26,x12 // h+=X[i] + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x27,x20 // a^b, b^c in next round + eor x16,x16,x15,ror#18 // Sigma1(e) + ror x15,x27,#28 + add x26,x26,x17 // h+=Ch(e,f,g) + eor x17,x27,x27,ror#5 + add x26,x26,x16 // h+=Sigma1(e) + and x19,x19,x28 // (b^c)&=(a^b) + add x22,x22,x26 // d+=h + eor x19,x19,x20 // Maj(a,b,c) + eor x17,x15,x17,ror#34 // Sigma0(a) + add x26,x26,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + //add x26,x26,x17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev x13,x13 // 10 +#endif + add x26,x26,x17 // h+=Sigma0(a) + ror x16,x22,#14 + add x25,x25,x19 // h+=K[i] + eor x0,x22,x22,ror#23 + and x17,x23,x22 + bic x19,x24,x22 + add x25,x25,x13 // h+=X[i] + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x26,x27 // a^b, b^c in next round + eor x16,x16,x0,ror#18 // Sigma1(e) + ror x0,x26,#28 + add x25,x25,x17 // h+=Ch(e,f,g) + eor x17,x26,x26,ror#5 + add x25,x25,x16 // h+=Sigma1(e) + and x28,x28,x19 // (b^c)&=(a^b) + add x21,x21,x25 // d+=h + eor x28,x28,x27 // Maj(a,b,c) + eor x17,x0,x17,ror#34 // Sigma0(a) + add x25,x25,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + //add x25,x25,x17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev x14,x14 // 11 +#endif + ldp x15,x0,[x1],#2*8 + add x25,x25,x17 // h+=Sigma0(a) + str x6,[sp,#24] + ror x16,x21,#14 + add x24,x24,x28 // h+=K[i] + eor x6,x21,x21,ror#23 + and x17,x22,x21 + bic x28,x23,x21 + add x24,x24,x14 // h+=X[i] + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x25,x26 // a^b, b^c in next round + eor x16,x16,x6,ror#18 // Sigma1(e) + ror x6,x25,#28 + add x24,x24,x17 // h+=Ch(e,f,g) + eor x17,x25,x25,ror#5 + add x24,x24,x16 // h+=Sigma1(e) + and x19,x19,x28 // (b^c)&=(a^b) + add x20,x20,x24 // d+=h + eor x19,x19,x26 // Maj(a,b,c) + eor x17,x6,x17,ror#34 // Sigma0(a) + add x24,x24,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + //add x24,x24,x17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev x15,x15 // 12 +#endif + add x24,x24,x17 // h+=Sigma0(a) + str x7,[sp,#0] + ror x16,x20,#14 + add x23,x23,x19 // h+=K[i] + eor x7,x20,x20,ror#23 + and x17,x21,x20 + bic x19,x22,x20 + add x23,x23,x15 // h+=X[i] + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x24,x25 // a^b, b^c in next round + eor x16,x16,x7,ror#18 // Sigma1(e) + ror x7,x24,#28 + add x23,x23,x17 // h+=Ch(e,f,g) + eor x17,x24,x24,ror#5 + add x23,x23,x16 // h+=Sigma1(e) + and x28,x28,x19 // (b^c)&=(a^b) + add x27,x27,x23 // d+=h + eor x28,x28,x25 // Maj(a,b,c) + eor x17,x7,x17,ror#34 // Sigma0(a) + add x23,x23,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + //add x23,x23,x17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev x0,x0 // 13 +#endif + ldp x1,x2,[x1] + add x23,x23,x17 // h+=Sigma0(a) + str x8,[sp,#8] + ror x16,x27,#14 + add x22,x22,x28 // h+=K[i] + eor x8,x27,x27,ror#23 + and x17,x20,x27 + bic x28,x21,x27 + add x22,x22,x0 // h+=X[i] + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x23,x24 // a^b, b^c in next round + eor x16,x16,x8,ror#18 // Sigma1(e) + ror x8,x23,#28 + add x22,x22,x17 // h+=Ch(e,f,g) + eor x17,x23,x23,ror#5 + add x22,x22,x16 // h+=Sigma1(e) + and x19,x19,x28 // (b^c)&=(a^b) + add x26,x26,x22 // d+=h + eor x19,x19,x24 // Maj(a,b,c) + eor x17,x8,x17,ror#34 // Sigma0(a) + add x22,x22,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + //add x22,x22,x17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev x1,x1 // 14 +#endif + ldr x6,[sp,#24] + add x22,x22,x17 // h+=Sigma0(a) + str x9,[sp,#16] + ror x16,x26,#14 + add x21,x21,x19 // h+=K[i] + eor x9,x26,x26,ror#23 + and x17,x27,x26 + bic x19,x20,x26 + add x21,x21,x1 // h+=X[i] + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x22,x23 // a^b, b^c in next round + eor x16,x16,x9,ror#18 // Sigma1(e) + ror x9,x22,#28 + add x21,x21,x17 // h+=Ch(e,f,g) + eor x17,x22,x22,ror#5 + add x21,x21,x16 // h+=Sigma1(e) + and x28,x28,x19 // (b^c)&=(a^b) + add x25,x25,x21 // d+=h + eor x28,x28,x23 // Maj(a,b,c) + eor x17,x9,x17,ror#34 // Sigma0(a) + add x21,x21,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + //add x21,x21,x17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev x2,x2 // 15 +#endif + ldr x7,[sp,#0] + add x21,x21,x17 // h+=Sigma0(a) + str x10,[sp,#24] + ror x16,x25,#14 + add x20,x20,x28 // h+=K[i] + ror x9,x4,#1 + and x17,x26,x25 + ror x8,x1,#19 + bic x28,x27,x25 + ror x10,x21,#28 + add x20,x20,x2 // h+=X[i] + eor x16,x16,x25,ror#18 + eor x9,x9,x4,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x21,x22 // a^b, b^c in next round + eor x16,x16,x25,ror#41 // Sigma1(e) + eor x10,x10,x21,ror#34 + add x20,x20,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x8,x8,x1,ror#61 + eor x9,x9,x4,lsr#7 // sigma0(X[i+1]) + add x20,x20,x16 // h+=Sigma1(e) + eor x19,x19,x22 // Maj(a,b,c) + eor x17,x10,x21,ror#39 // Sigma0(a) + eor x8,x8,x1,lsr#6 // sigma1(X[i+14]) + add x3,x3,x12 + add x24,x24,x20 // d+=h + add x20,x20,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x3,x3,x9 + add x20,x20,x17 // h+=Sigma0(a) + add x3,x3,x8 +.Loop_16_xx: + ldr x8,[sp,#8] + str x11,[sp,#0] + ror x16,x24,#14 + add x27,x27,x19 // h+=K[i] + ror x10,x5,#1 + and x17,x25,x24 + ror x9,x2,#19 + bic x19,x26,x24 + ror x11,x20,#28 + add x27,x27,x3 // h+=X[i] + eor x16,x16,x24,ror#18 + eor x10,x10,x5,ror#8 + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x20,x21 // a^b, b^c in next round + eor x16,x16,x24,ror#41 // Sigma1(e) + eor x11,x11,x20,ror#34 + add x27,x27,x17 // h+=Ch(e,f,g) + and x28,x28,x19 // (b^c)&=(a^b) + eor x9,x9,x2,ror#61 + eor x10,x10,x5,lsr#7 // sigma0(X[i+1]) + add x27,x27,x16 // h+=Sigma1(e) + eor x28,x28,x21 // Maj(a,b,c) + eor x17,x11,x20,ror#39 // Sigma0(a) + eor x9,x9,x2,lsr#6 // sigma1(X[i+14]) + add x4,x4,x13 + add x23,x23,x27 // d+=h + add x27,x27,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + add x4,x4,x10 + add x27,x27,x17 // h+=Sigma0(a) + add x4,x4,x9 + ldr x9,[sp,#16] + str x12,[sp,#8] + ror x16,x23,#14 + add x26,x26,x28 // h+=K[i] + ror x11,x6,#1 + and x17,x24,x23 + ror x10,x3,#19 + bic x28,x25,x23 + ror x12,x27,#28 + add x26,x26,x4 // h+=X[i] + eor x16,x16,x23,ror#18 + eor x11,x11,x6,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x27,x20 // a^b, b^c in next round + eor x16,x16,x23,ror#41 // Sigma1(e) + eor x12,x12,x27,ror#34 + add x26,x26,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x10,x10,x3,ror#61 + eor x11,x11,x6,lsr#7 // sigma0(X[i+1]) + add x26,x26,x16 // h+=Sigma1(e) + eor x19,x19,x20 // Maj(a,b,c) + eor x17,x12,x27,ror#39 // Sigma0(a) + eor x10,x10,x3,lsr#6 // sigma1(X[i+14]) + add x5,x5,x14 + add x22,x22,x26 // d+=h + add x26,x26,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x5,x5,x11 + add x26,x26,x17 // h+=Sigma0(a) + add x5,x5,x10 + ldr x10,[sp,#24] + str x13,[sp,#16] + ror x16,x22,#14 + add x25,x25,x19 // h+=K[i] + ror x12,x7,#1 + and x17,x23,x22 + ror x11,x4,#19 + bic x19,x24,x22 + ror x13,x26,#28 + add x25,x25,x5 // h+=X[i] + eor x16,x16,x22,ror#18 + eor x12,x12,x7,ror#8 + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x26,x27 // a^b, b^c in next round + eor x16,x16,x22,ror#41 // Sigma1(e) + eor x13,x13,x26,ror#34 + add x25,x25,x17 // h+=Ch(e,f,g) + and x28,x28,x19 // (b^c)&=(a^b) + eor x11,x11,x4,ror#61 + eor x12,x12,x7,lsr#7 // sigma0(X[i+1]) + add x25,x25,x16 // h+=Sigma1(e) + eor x28,x28,x27 // Maj(a,b,c) + eor x17,x13,x26,ror#39 // Sigma0(a) + eor x11,x11,x4,lsr#6 // sigma1(X[i+14]) + add x6,x6,x15 + add x21,x21,x25 // d+=h + add x25,x25,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + add x6,x6,x12 + add x25,x25,x17 // h+=Sigma0(a) + add x6,x6,x11 + ldr x11,[sp,#0] + str x14,[sp,#24] + ror x16,x21,#14 + add x24,x24,x28 // h+=K[i] + ror x13,x8,#1 + and x17,x22,x21 + ror x12,x5,#19 + bic x28,x23,x21 + ror x14,x25,#28 + add x24,x24,x6 // h+=X[i] + eor x16,x16,x21,ror#18 + eor x13,x13,x8,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x25,x26 // a^b, b^c in next round + eor x16,x16,x21,ror#41 // Sigma1(e) + eor x14,x14,x25,ror#34 + add x24,x24,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x12,x12,x5,ror#61 + eor x13,x13,x8,lsr#7 // sigma0(X[i+1]) + add x24,x24,x16 // h+=Sigma1(e) + eor x19,x19,x26 // Maj(a,b,c) + eor x17,x14,x25,ror#39 // Sigma0(a) + eor x12,x12,x5,lsr#6 // sigma1(X[i+14]) + add x7,x7,x0 + add x20,x20,x24 // d+=h + add x24,x24,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x7,x7,x13 + add x24,x24,x17 // h+=Sigma0(a) + add x7,x7,x12 + ldr x12,[sp,#8] + str x15,[sp,#0] + ror x16,x20,#14 + add x23,x23,x19 // h+=K[i] + ror x14,x9,#1 + and x17,x21,x20 + ror x13,x6,#19 + bic x19,x22,x20 + ror x15,x24,#28 + add x23,x23,x7 // h+=X[i] + eor x16,x16,x20,ror#18 + eor x14,x14,x9,ror#8 + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x24,x25 // a^b, b^c in next round + eor x16,x16,x20,ror#41 // Sigma1(e) + eor x15,x15,x24,ror#34 + add x23,x23,x17 // h+=Ch(e,f,g) + and x28,x28,x19 // (b^c)&=(a^b) + eor x13,x13,x6,ror#61 + eor x14,x14,x9,lsr#7 // sigma0(X[i+1]) + add x23,x23,x16 // h+=Sigma1(e) + eor x28,x28,x25 // Maj(a,b,c) + eor x17,x15,x24,ror#39 // Sigma0(a) + eor x13,x13,x6,lsr#6 // sigma1(X[i+14]) + add x8,x8,x1 + add x27,x27,x23 // d+=h + add x23,x23,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + add x8,x8,x14 + add x23,x23,x17 // h+=Sigma0(a) + add x8,x8,x13 + ldr x13,[sp,#16] + str x0,[sp,#8] + ror x16,x27,#14 + add x22,x22,x28 // h+=K[i] + ror x15,x10,#1 + and x17,x20,x27 + ror x14,x7,#19 + bic x28,x21,x27 + ror x0,x23,#28 + add x22,x22,x8 // h+=X[i] + eor x16,x16,x27,ror#18 + eor x15,x15,x10,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x23,x24 // a^b, b^c in next round + eor x16,x16,x27,ror#41 // Sigma1(e) + eor x0,x0,x23,ror#34 + add x22,x22,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x14,x14,x7,ror#61 + eor x15,x15,x10,lsr#7 // sigma0(X[i+1]) + add x22,x22,x16 // h+=Sigma1(e) + eor x19,x19,x24 // Maj(a,b,c) + eor x17,x0,x23,ror#39 // Sigma0(a) + eor x14,x14,x7,lsr#6 // sigma1(X[i+14]) + add x9,x9,x2 + add x26,x26,x22 // d+=h + add x22,x22,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x9,x9,x15 + add x22,x22,x17 // h+=Sigma0(a) + add x9,x9,x14 + ldr x14,[sp,#24] + str x1,[sp,#16] + ror x16,x26,#14 + add x21,x21,x19 // h+=K[i] + ror x0,x11,#1 + and x17,x27,x26 + ror x15,x8,#19 + bic x19,x20,x26 + ror x1,x22,#28 + add x21,x21,x9 // h+=X[i] + eor x16,x16,x26,ror#18 + eor x0,x0,x11,ror#8 + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x22,x23 // a^b, b^c in next round + eor x16,x16,x26,ror#41 // Sigma1(e) + eor x1,x1,x22,ror#34 + add x21,x21,x17 // h+=Ch(e,f,g) + and x28,x28,x19 // (b^c)&=(a^b) + eor x15,x15,x8,ror#61 + eor x0,x0,x11,lsr#7 // sigma0(X[i+1]) + add x21,x21,x16 // h+=Sigma1(e) + eor x28,x28,x23 // Maj(a,b,c) + eor x17,x1,x22,ror#39 // Sigma0(a) + eor x15,x15,x8,lsr#6 // sigma1(X[i+14]) + add x10,x10,x3 + add x25,x25,x21 // d+=h + add x21,x21,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + add x10,x10,x0 + add x21,x21,x17 // h+=Sigma0(a) + add x10,x10,x15 + ldr x15,[sp,#0] + str x2,[sp,#24] + ror x16,x25,#14 + add x20,x20,x28 // h+=K[i] + ror x1,x12,#1 + and x17,x26,x25 + ror x0,x9,#19 + bic x28,x27,x25 + ror x2,x21,#28 + add x20,x20,x10 // h+=X[i] + eor x16,x16,x25,ror#18 + eor x1,x1,x12,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x21,x22 // a^b, b^c in next round + eor x16,x16,x25,ror#41 // Sigma1(e) + eor x2,x2,x21,ror#34 + add x20,x20,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x0,x0,x9,ror#61 + eor x1,x1,x12,lsr#7 // sigma0(X[i+1]) + add x20,x20,x16 // h+=Sigma1(e) + eor x19,x19,x22 // Maj(a,b,c) + eor x17,x2,x21,ror#39 // Sigma0(a) + eor x0,x0,x9,lsr#6 // sigma1(X[i+14]) + add x11,x11,x4 + add x24,x24,x20 // d+=h + add x20,x20,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x11,x11,x1 + add x20,x20,x17 // h+=Sigma0(a) + add x11,x11,x0 + ldr x0,[sp,#8] + str x3,[sp,#0] + ror x16,x24,#14 + add x27,x27,x19 // h+=K[i] + ror x2,x13,#1 + and x17,x25,x24 + ror x1,x10,#19 + bic x19,x26,x24 + ror x3,x20,#28 + add x27,x27,x11 // h+=X[i] + eor x16,x16,x24,ror#18 + eor x2,x2,x13,ror#8 + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x20,x21 // a^b, b^c in next round + eor x16,x16,x24,ror#41 // Sigma1(e) + eor x3,x3,x20,ror#34 + add x27,x27,x17 // h+=Ch(e,f,g) + and x28,x28,x19 // (b^c)&=(a^b) + eor x1,x1,x10,ror#61 + eor x2,x2,x13,lsr#7 // sigma0(X[i+1]) + add x27,x27,x16 // h+=Sigma1(e) + eor x28,x28,x21 // Maj(a,b,c) + eor x17,x3,x20,ror#39 // Sigma0(a) + eor x1,x1,x10,lsr#6 // sigma1(X[i+14]) + add x12,x12,x5 + add x23,x23,x27 // d+=h + add x27,x27,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + add x12,x12,x2 + add x27,x27,x17 // h+=Sigma0(a) + add x12,x12,x1 + ldr x1,[sp,#16] + str x4,[sp,#8] + ror x16,x23,#14 + add x26,x26,x28 // h+=K[i] + ror x3,x14,#1 + and x17,x24,x23 + ror x2,x11,#19 + bic x28,x25,x23 + ror x4,x27,#28 + add x26,x26,x12 // h+=X[i] + eor x16,x16,x23,ror#18 + eor x3,x3,x14,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x27,x20 // a^b, b^c in next round + eor x16,x16,x23,ror#41 // Sigma1(e) + eor x4,x4,x27,ror#34 + add x26,x26,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x2,x2,x11,ror#61 + eor x3,x3,x14,lsr#7 // sigma0(X[i+1]) + add x26,x26,x16 // h+=Sigma1(e) + eor x19,x19,x20 // Maj(a,b,c) + eor x17,x4,x27,ror#39 // Sigma0(a) + eor x2,x2,x11,lsr#6 // sigma1(X[i+14]) + add x13,x13,x6 + add x22,x22,x26 // d+=h + add x26,x26,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x13,x13,x3 + add x26,x26,x17 // h+=Sigma0(a) + add x13,x13,x2 + ldr x2,[sp,#24] + str x5,[sp,#16] + ror x16,x22,#14 + add x25,x25,x19 // h+=K[i] + ror x4,x15,#1 + and x17,x23,x22 + ror x3,x12,#19 + bic x19,x24,x22 + ror x5,x26,#28 + add x25,x25,x13 // h+=X[i] + eor x16,x16,x22,ror#18 + eor x4,x4,x15,ror#8 + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x26,x27 // a^b, b^c in next round + eor x16,x16,x22,ror#41 // Sigma1(e) + eor x5,x5,x26,ror#34 + add x25,x25,x17 // h+=Ch(e,f,g) + and x28,x28,x19 // (b^c)&=(a^b) + eor x3,x3,x12,ror#61 + eor x4,x4,x15,lsr#7 // sigma0(X[i+1]) + add x25,x25,x16 // h+=Sigma1(e) + eor x28,x28,x27 // Maj(a,b,c) + eor x17,x5,x26,ror#39 // Sigma0(a) + eor x3,x3,x12,lsr#6 // sigma1(X[i+14]) + add x14,x14,x7 + add x21,x21,x25 // d+=h + add x25,x25,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + add x14,x14,x4 + add x25,x25,x17 // h+=Sigma0(a) + add x14,x14,x3 + ldr x3,[sp,#0] + str x6,[sp,#24] + ror x16,x21,#14 + add x24,x24,x28 // h+=K[i] + ror x5,x0,#1 + and x17,x22,x21 + ror x4,x13,#19 + bic x28,x23,x21 + ror x6,x25,#28 + add x24,x24,x14 // h+=X[i] + eor x16,x16,x21,ror#18 + eor x5,x5,x0,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x25,x26 // a^b, b^c in next round + eor x16,x16,x21,ror#41 // Sigma1(e) + eor x6,x6,x25,ror#34 + add x24,x24,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x4,x4,x13,ror#61 + eor x5,x5,x0,lsr#7 // sigma0(X[i+1]) + add x24,x24,x16 // h+=Sigma1(e) + eor x19,x19,x26 // Maj(a,b,c) + eor x17,x6,x25,ror#39 // Sigma0(a) + eor x4,x4,x13,lsr#6 // sigma1(X[i+14]) + add x15,x15,x8 + add x20,x20,x24 // d+=h + add x24,x24,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x15,x15,x5 + add x24,x24,x17 // h+=Sigma0(a) + add x15,x15,x4 + ldr x4,[sp,#8] + str x7,[sp,#0] + ror x16,x20,#14 + add x23,x23,x19 // h+=K[i] + ror x6,x1,#1 + and x17,x21,x20 + ror x5,x14,#19 + bic x19,x22,x20 + ror x7,x24,#28 + add x23,x23,x15 // h+=X[i] + eor x16,x16,x20,ror#18 + eor x6,x6,x1,ror#8 + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x24,x25 // a^b, b^c in next round + eor x16,x16,x20,ror#41 // Sigma1(e) + eor x7,x7,x24,ror#34 + add x23,x23,x17 // h+=Ch(e,f,g) + and x28,x28,x19 // (b^c)&=(a^b) + eor x5,x5,x14,ror#61 + eor x6,x6,x1,lsr#7 // sigma0(X[i+1]) + add x23,x23,x16 // h+=Sigma1(e) + eor x28,x28,x25 // Maj(a,b,c) + eor x17,x7,x24,ror#39 // Sigma0(a) + eor x5,x5,x14,lsr#6 // sigma1(X[i+14]) + add x0,x0,x9 + add x27,x27,x23 // d+=h + add x23,x23,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + add x0,x0,x6 + add x23,x23,x17 // h+=Sigma0(a) + add x0,x0,x5 + ldr x5,[sp,#16] + str x8,[sp,#8] + ror x16,x27,#14 + add x22,x22,x28 // h+=K[i] + ror x7,x2,#1 + and x17,x20,x27 + ror x6,x15,#19 + bic x28,x21,x27 + ror x8,x23,#28 + add x22,x22,x0 // h+=X[i] + eor x16,x16,x27,ror#18 + eor x7,x7,x2,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x23,x24 // a^b, b^c in next round + eor x16,x16,x27,ror#41 // Sigma1(e) + eor x8,x8,x23,ror#34 + add x22,x22,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x6,x6,x15,ror#61 + eor x7,x7,x2,lsr#7 // sigma0(X[i+1]) + add x22,x22,x16 // h+=Sigma1(e) + eor x19,x19,x24 // Maj(a,b,c) + eor x17,x8,x23,ror#39 // Sigma0(a) + eor x6,x6,x15,lsr#6 // sigma1(X[i+14]) + add x1,x1,x10 + add x26,x26,x22 // d+=h + add x22,x22,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x1,x1,x7 + add x22,x22,x17 // h+=Sigma0(a) + add x1,x1,x6 + ldr x6,[sp,#24] + str x9,[sp,#16] + ror x16,x26,#14 + add x21,x21,x19 // h+=K[i] + ror x8,x3,#1 + and x17,x27,x26 + ror x7,x0,#19 + bic x19,x20,x26 + ror x9,x22,#28 + add x21,x21,x1 // h+=X[i] + eor x16,x16,x26,ror#18 + eor x8,x8,x3,ror#8 + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x22,x23 // a^b, b^c in next round + eor x16,x16,x26,ror#41 // Sigma1(e) + eor x9,x9,x22,ror#34 + add x21,x21,x17 // h+=Ch(e,f,g) + and x28,x28,x19 // (b^c)&=(a^b) + eor x7,x7,x0,ror#61 + eor x8,x8,x3,lsr#7 // sigma0(X[i+1]) + add x21,x21,x16 // h+=Sigma1(e) + eor x28,x28,x23 // Maj(a,b,c) + eor x17,x9,x22,ror#39 // Sigma0(a) + eor x7,x7,x0,lsr#6 // sigma1(X[i+14]) + add x2,x2,x11 + add x25,x25,x21 // d+=h + add x21,x21,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + add x2,x2,x8 + add x21,x21,x17 // h+=Sigma0(a) + add x2,x2,x7 + ldr x7,[sp,#0] + str x10,[sp,#24] + ror x16,x25,#14 + add x20,x20,x28 // h+=K[i] + ror x9,x4,#1 + and x17,x26,x25 + ror x8,x1,#19 + bic x28,x27,x25 + ror x10,x21,#28 + add x20,x20,x2 // h+=X[i] + eor x16,x16,x25,ror#18 + eor x9,x9,x4,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x21,x22 // a^b, b^c in next round + eor x16,x16,x25,ror#41 // Sigma1(e) + eor x10,x10,x21,ror#34 + add x20,x20,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x8,x8,x1,ror#61 + eor x9,x9,x4,lsr#7 // sigma0(X[i+1]) + add x20,x20,x16 // h+=Sigma1(e) + eor x19,x19,x22 // Maj(a,b,c) + eor x17,x10,x21,ror#39 // Sigma0(a) + eor x8,x8,x1,lsr#6 // sigma1(X[i+14]) + add x3,x3,x12 + add x24,x24,x20 // d+=h + add x20,x20,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x3,x3,x9 + add x20,x20,x17 // h+=Sigma0(a) + add x3,x3,x8 + cbnz x19,.Loop_16_xx + + ldp x0,x2,[x29,#96] + ldr x1,[x29,#112] + sub x30,x30,#648 // rewind + + ldp x3,x4,[x0] + ldp x5,x6,[x0,#2*8] + add x1,x1,#14*8 // advance input pointer + ldp x7,x8,[x0,#4*8] + add x20,x20,x3 + ldp x9,x10,[x0,#6*8] + add x21,x21,x4 + add x22,x22,x5 + add x23,x23,x6 + stp x20,x21,[x0] + add x24,x24,x7 + add x25,x25,x8 + stp x22,x23,[x0,#2*8] + add x26,x26,x9 + add x27,x27,x10 + cmp x1,x2 + stp x24,x25,[x0,#4*8] + stp x26,x27,[x0,#6*8] + b.ne .Loop + + ldp x19,x20,[x29,#16] + add sp,sp,#4*8 + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldp x29,x30,[sp],#128 + ret +.size sha512_block_data_order,.-sha512_block_data_order + +.align 6 +.type .LK512,%object +.LK512: +.quad 0x428a2f98d728ae22,0x7137449123ef65cd +.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc +.quad 0x3956c25bf348b538,0x59f111f1b605d019 +.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 +.quad 0xd807aa98a3030242,0x12835b0145706fbe +.quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 +.quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 +.quad 0x9bdc06a725c71235,0xc19bf174cf692694 +.quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 +.quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 +.quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 +.quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 +.quad 0x983e5152ee66dfab,0xa831c66d2db43210 +.quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 +.quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 +.quad 0x06ca6351e003826f,0x142929670a0e6e70 +.quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 +.quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df +.quad 0x650a73548baf63de,0x766a0abb3c77b2a8 +.quad 0x81c2c92e47edaee6,0x92722c851482353b +.quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 +.quad 0xc24b8b70d0f89791,0xc76c51a30654be30 +.quad 0xd192e819d6ef5218,0xd69906245565a910 +.quad 0xf40e35855771202a,0x106aa07032bbd1b8 +.quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 +.quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 +.quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb +.quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 +.quad 0x748f82ee5defb2fc,0x78a5636f43172f60 +.quad 0x84c87814a1f0ab72,0x8cc702081a6439ec +.quad 0x90befffa23631e28,0xa4506cebde82bde9 +.quad 0xbef9a3f7b2c67915,0xc67178f2e372532b +.quad 0xca273eceea26619c,0xd186b8c721c0c207 +.quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 +.quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 +.quad 0x113f9804bef90dae,0x1b710b35131c471b +.quad 0x28db77f523047d84,0x32caab7b40c72493 +.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c +.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a +.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 +.quad 0 // terminator +.size .LK512,.-.LK512 +.align 3 +.LOPENSSL_armcap_P: +.quad OPENSSL_armcap_P-. +.byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 2 +.align 2 +.comm OPENSSL_armcap_P,4,4 +#endif \ No newline at end of file diff --git a/third_party/boringssl/linux-arm/crypto/aes/aes-armv4.S b/third_party/boringssl/linux-arm/crypto/aes/aes-armv4.S new file mode 100644 index 00000000000..cb94841e028 --- /dev/null +++ b/third_party/boringssl/linux-arm/crypto/aes/aes-armv4.S @@ -0,0 +1,1200 @@ +#if defined(__arm__) + +@ ==================================================================== +@ Written by Andy Polyakov for the OpenSSL +@ project. The module is, however, dual licensed under OpenSSL and +@ CRYPTOGAMS licenses depending on where you obtain it. For further +@ details see http://www.openssl.org/~appro/cryptogams/. +@ ==================================================================== + +@ AES for ARMv4 + +@ January 2007. +@ +@ Code uses single 1K S-box and is >2 times faster than code generated +@ by gcc-3.4.1. This is thanks to unique feature of ARMv4 ISA, which +@ allows to merge logical or arithmetic operation with shift or rotate +@ in one instruction and emit combined result every cycle. The module +@ is endian-neutral. The performance is ~42 cycles/byte for 128-bit +@ key [on single-issue Xscale PXA250 core]. + +@ May 2007. +@ +@ AES_set_[en|de]crypt_key is added. + +@ July 2010. +@ +@ Rescheduling for dual-issue pipeline resulted in 12% improvement on +@ Cortex A8 core and ~25 cycles per byte processed with 128-bit key. + +@ February 2011. +@ +@ Profiler-assisted and platform-specific optimization resulted in 16% +@ improvement on Cortex A8 core and ~21.5 cycles per byte. + +#if defined(__arm__) +#ifndef __KERNEL__ +# include "arm_arch.h" +#else +# define __ARM_ARCH__ __LINUX_ARM_ARCH__ +#endif + +.text +#if __ARM_ARCH__<7 +.code 32 +#else +.syntax unified +# if defined(__thumb2__) && !defined(__APPLE__) +.thumb +# else +.code 32 +# endif +#endif + +.type AES_Te,%object +.align 5 +AES_Te: +.word 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d +.word 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554 +.word 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d +.word 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a +.word 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87 +.word 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b +.word 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea +.word 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b +.word 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a +.word 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f +.word 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108 +.word 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f +.word 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e +.word 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5 +.word 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d +.word 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f +.word 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e +.word 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb +.word 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce +.word 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497 +.word 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c +.word 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed +.word 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b +.word 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a +.word 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16 +.word 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594 +.word 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81 +.word 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3 +.word 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a +.word 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504 +.word 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163 +.word 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d +.word 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f +.word 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739 +.word 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47 +.word 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395 +.word 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f +.word 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883 +.word 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c +.word 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76 +.word 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e +.word 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4 +.word 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6 +.word 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b +.word 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7 +.word 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0 +.word 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25 +.word 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818 +.word 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72 +.word 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651 +.word 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21 +.word 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85 +.word 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa +.word 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12 +.word 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0 +.word 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9 +.word 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133 +.word 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7 +.word 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920 +.word 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a +.word 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17 +.word 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8 +.word 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11 +.word 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a +@ Te4[256] +.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 +.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 +.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0 +.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0 +.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc +.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15 +.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a +.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75 +.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0 +.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84 +.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b +.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf +.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85 +.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8 +.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5 +.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2 +.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17 +.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73 +.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88 +.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb +.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c +.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79 +.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9 +.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08 +.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6 +.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a +.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e +.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e +.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94 +.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf +.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68 +.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 +@ rcon[] +.word 0x01000000, 0x02000000, 0x04000000, 0x08000000 +.word 0x10000000, 0x20000000, 0x40000000, 0x80000000 +.word 0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0 +.size AES_Te,.-AES_Te + +@ void asm_AES_encrypt(const unsigned char *in, unsigned char *out, +@ const AES_KEY *key) { +.globl asm_AES_encrypt +.hidden asm_AES_encrypt +.type asm_AES_encrypt,%function +.align 5 +asm_AES_encrypt: +#if __ARM_ARCH__<7 + sub r3,pc,#8 @ asm_AES_encrypt +#else + adr r3,asm_AES_encrypt +#endif + stmdb sp!,{r1,r4-r12,lr} +#ifdef __APPLE__ + adr r10,AES_Te +#else + sub r10,r3,#asm_AES_encrypt-AES_Te @ Te +#endif + mov r12,r0 @ inp + mov r11,r2 +#if __ARM_ARCH__<7 + ldrb r0,[r12,#3] @ load input data in endian-neutral + ldrb r4,[r12,#2] @ manner... + ldrb r5,[r12,#1] + ldrb r6,[r12,#0] + orr r0,r0,r4,lsl#8 + ldrb r1,[r12,#7] + orr r0,r0,r5,lsl#16 + ldrb r4,[r12,#6] + orr r0,r0,r6,lsl#24 + ldrb r5,[r12,#5] + ldrb r6,[r12,#4] + orr r1,r1,r4,lsl#8 + ldrb r2,[r12,#11] + orr r1,r1,r5,lsl#16 + ldrb r4,[r12,#10] + orr r1,r1,r6,lsl#24 + ldrb r5,[r12,#9] + ldrb r6,[r12,#8] + orr r2,r2,r4,lsl#8 + ldrb r3,[r12,#15] + orr r2,r2,r5,lsl#16 + ldrb r4,[r12,#14] + orr r2,r2,r6,lsl#24 + ldrb r5,[r12,#13] + ldrb r6,[r12,#12] + orr r3,r3,r4,lsl#8 + orr r3,r3,r5,lsl#16 + orr r3,r3,r6,lsl#24 +#else + ldr r0,[r12,#0] + ldr r1,[r12,#4] + ldr r2,[r12,#8] + ldr r3,[r12,#12] +#ifdef __ARMEL__ + rev r0,r0 + rev r1,r1 + rev r2,r2 + rev r3,r3 +#endif +#endif + bl _armv4_AES_encrypt + + ldr r12,[sp],#4 @ pop out +#if __ARM_ARCH__>=7 +#ifdef __ARMEL__ + rev r0,r0 + rev r1,r1 + rev r2,r2 + rev r3,r3 +#endif + str r0,[r12,#0] + str r1,[r12,#4] + str r2,[r12,#8] + str r3,[r12,#12] +#else + mov r4,r0,lsr#24 @ write output in endian-neutral + mov r5,r0,lsr#16 @ manner... + mov r6,r0,lsr#8 + strb r4,[r12,#0] + strb r5,[r12,#1] + mov r4,r1,lsr#24 + strb r6,[r12,#2] + mov r5,r1,lsr#16 + strb r0,[r12,#3] + mov r6,r1,lsr#8 + strb r4,[r12,#4] + strb r5,[r12,#5] + mov r4,r2,lsr#24 + strb r6,[r12,#6] + mov r5,r2,lsr#16 + strb r1,[r12,#7] + mov r6,r2,lsr#8 + strb r4,[r12,#8] + strb r5,[r12,#9] + mov r4,r3,lsr#24 + strb r6,[r12,#10] + mov r5,r3,lsr#16 + strb r2,[r12,#11] + mov r6,r3,lsr#8 + strb r4,[r12,#12] + strb r5,[r12,#13] + strb r6,[r12,#14] + strb r3,[r12,#15] +#endif +#if __ARM_ARCH__>=5 + ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc} +#else + ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} + tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet +.word 0xe12fff1e @ interoperable with Thumb ISA:-) +#endif +.size asm_AES_encrypt,.-asm_AES_encrypt + +.type _armv4_AES_encrypt,%function +.align 2 +_armv4_AES_encrypt: + str lr,[sp,#-4]! @ push lr + ldmia r11!,{r4,r5,r6,r7} + eor r0,r0,r4 + ldr r12,[r11,#240-16] + eor r1,r1,r5 + eor r2,r2,r6 + eor r3,r3,r7 + sub r12,r12,#1 + mov lr,#255 + + and r7,lr,r0 + and r8,lr,r0,lsr#8 + and r9,lr,r0,lsr#16 + mov r0,r0,lsr#24 +.Lenc_loop: + ldr r4,[r10,r7,lsl#2] @ Te3[s0>>0] + and r7,lr,r1,lsr#16 @ i0 + ldr r5,[r10,r8,lsl#2] @ Te2[s0>>8] + and r8,lr,r1 + ldr r6,[r10,r9,lsl#2] @ Te1[s0>>16] + and r9,lr,r1,lsr#8 + ldr r0,[r10,r0,lsl#2] @ Te0[s0>>24] + mov r1,r1,lsr#24 + + ldr r7,[r10,r7,lsl#2] @ Te1[s1>>16] + ldr r8,[r10,r8,lsl#2] @ Te3[s1>>0] + ldr r9,[r10,r9,lsl#2] @ Te2[s1>>8] + eor r0,r0,r7,ror#8 + ldr r1,[r10,r1,lsl#2] @ Te0[s1>>24] + and r7,lr,r2,lsr#8 @ i0 + eor r5,r5,r8,ror#8 + and r8,lr,r2,lsr#16 @ i1 + eor r6,r6,r9,ror#8 + and r9,lr,r2 + ldr r7,[r10,r7,lsl#2] @ Te2[s2>>8] + eor r1,r1,r4,ror#24 + ldr r8,[r10,r8,lsl#2] @ Te1[s2>>16] + mov r2,r2,lsr#24 + + ldr r9,[r10,r9,lsl#2] @ Te3[s2>>0] + eor r0,r0,r7,ror#16 + ldr r2,[r10,r2,lsl#2] @ Te0[s2>>24] + and r7,lr,r3 @ i0 + eor r1,r1,r8,ror#8 + and r8,lr,r3,lsr#8 @ i1 + eor r6,r6,r9,ror#16 + and r9,lr,r3,lsr#16 @ i2 + ldr r7,[r10,r7,lsl#2] @ Te3[s3>>0] + eor r2,r2,r5,ror#16 + ldr r8,[r10,r8,lsl#2] @ Te2[s3>>8] + mov r3,r3,lsr#24 + + ldr r9,[r10,r9,lsl#2] @ Te1[s3>>16] + eor r0,r0,r7,ror#24 + ldr r7,[r11],#16 + eor r1,r1,r8,ror#16 + ldr r3,[r10,r3,lsl#2] @ Te0[s3>>24] + eor r2,r2,r9,ror#8 + ldr r4,[r11,#-12] + eor r3,r3,r6,ror#8 + + ldr r5,[r11,#-8] + eor r0,r0,r7 + ldr r6,[r11,#-4] + and r7,lr,r0 + eor r1,r1,r4 + and r8,lr,r0,lsr#8 + eor r2,r2,r5 + and r9,lr,r0,lsr#16 + eor r3,r3,r6 + mov r0,r0,lsr#24 + + subs r12,r12,#1 + bne .Lenc_loop + + add r10,r10,#2 + + ldrb r4,[r10,r7,lsl#2] @ Te4[s0>>0] + and r7,lr,r1,lsr#16 @ i0 + ldrb r5,[r10,r8,lsl#2] @ Te4[s0>>8] + and r8,lr,r1 + ldrb r6,[r10,r9,lsl#2] @ Te4[s0>>16] + and r9,lr,r1,lsr#8 + ldrb r0,[r10,r0,lsl#2] @ Te4[s0>>24] + mov r1,r1,lsr#24 + + ldrb r7,[r10,r7,lsl#2] @ Te4[s1>>16] + ldrb r8,[r10,r8,lsl#2] @ Te4[s1>>0] + ldrb r9,[r10,r9,lsl#2] @ Te4[s1>>8] + eor r0,r7,r0,lsl#8 + ldrb r1,[r10,r1,lsl#2] @ Te4[s1>>24] + and r7,lr,r2,lsr#8 @ i0 + eor r5,r8,r5,lsl#8 + and r8,lr,r2,lsr#16 @ i1 + eor r6,r9,r6,lsl#8 + and r9,lr,r2 + ldrb r7,[r10,r7,lsl#2] @ Te4[s2>>8] + eor r1,r4,r1,lsl#24 + ldrb r8,[r10,r8,lsl#2] @ Te4[s2>>16] + mov r2,r2,lsr#24 + + ldrb r9,[r10,r9,lsl#2] @ Te4[s2>>0] + eor r0,r7,r0,lsl#8 + ldrb r2,[r10,r2,lsl#2] @ Te4[s2>>24] + and r7,lr,r3 @ i0 + eor r1,r1,r8,lsl#16 + and r8,lr,r3,lsr#8 @ i1 + eor r6,r9,r6,lsl#8 + and r9,lr,r3,lsr#16 @ i2 + ldrb r7,[r10,r7,lsl#2] @ Te4[s3>>0] + eor r2,r5,r2,lsl#24 + ldrb r8,[r10,r8,lsl#2] @ Te4[s3>>8] + mov r3,r3,lsr#24 + + ldrb r9,[r10,r9,lsl#2] @ Te4[s3>>16] + eor r0,r7,r0,lsl#8 + ldr r7,[r11,#0] + ldrb r3,[r10,r3,lsl#2] @ Te4[s3>>24] + eor r1,r1,r8,lsl#8 + ldr r4,[r11,#4] + eor r2,r2,r9,lsl#16 + ldr r5,[r11,#8] + eor r3,r6,r3,lsl#24 + ldr r6,[r11,#12] + + eor r0,r0,r7 + eor r1,r1,r4 + eor r2,r2,r5 + eor r3,r3,r6 + + sub r10,r10,#2 + ldr pc,[sp],#4 @ pop and return +.size _armv4_AES_encrypt,.-_armv4_AES_encrypt + +.globl asm_AES_set_encrypt_key +.hidden asm_AES_set_encrypt_key +.type asm_AES_set_encrypt_key,%function +.align 5 +asm_AES_set_encrypt_key: +_armv4_AES_set_encrypt_key: +#if __ARM_ARCH__<7 + sub r3,pc,#8 @ asm_AES_set_encrypt_key +#else + adr r3,asm_AES_set_encrypt_key +#endif + teq r0,#0 +#if __ARM_ARCH__>=7 + itt eq @ Thumb2 thing, sanity check in ARM +#endif + moveq r0,#-1 + beq .Labrt + teq r2,#0 +#if __ARM_ARCH__>=7 + itt eq @ Thumb2 thing, sanity check in ARM +#endif + moveq r0,#-1 + beq .Labrt + + teq r1,#128 + beq .Lok + teq r1,#192 + beq .Lok + teq r1,#256 +#if __ARM_ARCH__>=7 + itt ne @ Thumb2 thing, sanity check in ARM +#endif + movne r0,#-1 + bne .Labrt + +.Lok: stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} + mov r12,r0 @ inp + mov lr,r1 @ bits + mov r11,r2 @ key + +#ifdef __APPLE__ + adr r10,AES_Te+1024 @ Te4 +#else + sub r10,r3,#_armv4_AES_set_encrypt_key-AES_Te-1024 @ Te4 +#endif + +#if __ARM_ARCH__<7 + ldrb r0,[r12,#3] @ load input data in endian-neutral + ldrb r4,[r12,#2] @ manner... + ldrb r5,[r12,#1] + ldrb r6,[r12,#0] + orr r0,r0,r4,lsl#8 + ldrb r1,[r12,#7] + orr r0,r0,r5,lsl#16 + ldrb r4,[r12,#6] + orr r0,r0,r6,lsl#24 + ldrb r5,[r12,#5] + ldrb r6,[r12,#4] + orr r1,r1,r4,lsl#8 + ldrb r2,[r12,#11] + orr r1,r1,r5,lsl#16 + ldrb r4,[r12,#10] + orr r1,r1,r6,lsl#24 + ldrb r5,[r12,#9] + ldrb r6,[r12,#8] + orr r2,r2,r4,lsl#8 + ldrb r3,[r12,#15] + orr r2,r2,r5,lsl#16 + ldrb r4,[r12,#14] + orr r2,r2,r6,lsl#24 + ldrb r5,[r12,#13] + ldrb r6,[r12,#12] + orr r3,r3,r4,lsl#8 + str r0,[r11],#16 + orr r3,r3,r5,lsl#16 + str r1,[r11,#-12] + orr r3,r3,r6,lsl#24 + str r2,[r11,#-8] + str r3,[r11,#-4] +#else + ldr r0,[r12,#0] + ldr r1,[r12,#4] + ldr r2,[r12,#8] + ldr r3,[r12,#12] +#ifdef __ARMEL__ + rev r0,r0 + rev r1,r1 + rev r2,r2 + rev r3,r3 +#endif + str r0,[r11],#16 + str r1,[r11,#-12] + str r2,[r11,#-8] + str r3,[r11,#-4] +#endif + + teq lr,#128 + bne .Lnot128 + mov r12,#10 + str r12,[r11,#240-16] + add r6,r10,#256 @ rcon + mov lr,#255 + +.L128_loop: + and r5,lr,r3,lsr#24 + and r7,lr,r3,lsr#16 + ldrb r5,[r10,r5] + and r8,lr,r3,lsr#8 + ldrb r7,[r10,r7] + and r9,lr,r3 + ldrb r8,[r10,r8] + orr r5,r5,r7,lsl#24 + ldrb r9,[r10,r9] + orr r5,r5,r8,lsl#16 + ldr r4,[r6],#4 @ rcon[i++] + orr r5,r5,r9,lsl#8 + eor r5,r5,r4 + eor r0,r0,r5 @ rk[4]=rk[0]^... + eor r1,r1,r0 @ rk[5]=rk[1]^rk[4] + str r0,[r11],#16 + eor r2,r2,r1 @ rk[6]=rk[2]^rk[5] + str r1,[r11,#-12] + eor r3,r3,r2 @ rk[7]=rk[3]^rk[6] + str r2,[r11,#-8] + subs r12,r12,#1 + str r3,[r11,#-4] + bne .L128_loop + sub r2,r11,#176 + b .Ldone + +.Lnot128: +#if __ARM_ARCH__<7 + ldrb r8,[r12,#19] + ldrb r4,[r12,#18] + ldrb r5,[r12,#17] + ldrb r6,[r12,#16] + orr r8,r8,r4,lsl#8 + ldrb r9,[r12,#23] + orr r8,r8,r5,lsl#16 + ldrb r4,[r12,#22] + orr r8,r8,r6,lsl#24 + ldrb r5,[r12,#21] + ldrb r6,[r12,#20] + orr r9,r9,r4,lsl#8 + orr r9,r9,r5,lsl#16 + str r8,[r11],#8 + orr r9,r9,r6,lsl#24 + str r9,[r11,#-4] +#else + ldr r8,[r12,#16] + ldr r9,[r12,#20] +#ifdef __ARMEL__ + rev r8,r8 + rev r9,r9 +#endif + str r8,[r11],#8 + str r9,[r11,#-4] +#endif + + teq lr,#192 + bne .Lnot192 + mov r12,#12 + str r12,[r11,#240-24] + add r6,r10,#256 @ rcon + mov lr,#255 + mov r12,#8 + +.L192_loop: + and r5,lr,r9,lsr#24 + and r7,lr,r9,lsr#16 + ldrb r5,[r10,r5] + and r8,lr,r9,lsr#8 + ldrb r7,[r10,r7] + and r9,lr,r9 + ldrb r8,[r10,r8] + orr r5,r5,r7,lsl#24 + ldrb r9,[r10,r9] + orr r5,r5,r8,lsl#16 + ldr r4,[r6],#4 @ rcon[i++] + orr r5,r5,r9,lsl#8 + eor r9,r5,r4 + eor r0,r0,r9 @ rk[6]=rk[0]^... + eor r1,r1,r0 @ rk[7]=rk[1]^rk[6] + str r0,[r11],#24 + eor r2,r2,r1 @ rk[8]=rk[2]^rk[7] + str r1,[r11,#-20] + eor r3,r3,r2 @ rk[9]=rk[3]^rk[8] + str r2,[r11,#-16] + subs r12,r12,#1 + str r3,[r11,#-12] +#if __ARM_ARCH__>=7 + itt eq @ Thumb2 thing, sanity check in ARM +#endif + subeq r2,r11,#216 + beq .Ldone + + ldr r7,[r11,#-32] + ldr r8,[r11,#-28] + eor r7,r7,r3 @ rk[10]=rk[4]^rk[9] + eor r9,r8,r7 @ rk[11]=rk[5]^rk[10] + str r7,[r11,#-8] + str r9,[r11,#-4] + b .L192_loop + +.Lnot192: +#if __ARM_ARCH__<7 + ldrb r8,[r12,#27] + ldrb r4,[r12,#26] + ldrb r5,[r12,#25] + ldrb r6,[r12,#24] + orr r8,r8,r4,lsl#8 + ldrb r9,[r12,#31] + orr r8,r8,r5,lsl#16 + ldrb r4,[r12,#30] + orr r8,r8,r6,lsl#24 + ldrb r5,[r12,#29] + ldrb r6,[r12,#28] + orr r9,r9,r4,lsl#8 + orr r9,r9,r5,lsl#16 + str r8,[r11],#8 + orr r9,r9,r6,lsl#24 + str r9,[r11,#-4] +#else + ldr r8,[r12,#24] + ldr r9,[r12,#28] +#ifdef __ARMEL__ + rev r8,r8 + rev r9,r9 +#endif + str r8,[r11],#8 + str r9,[r11,#-4] +#endif + + mov r12,#14 + str r12,[r11,#240-32] + add r6,r10,#256 @ rcon + mov lr,#255 + mov r12,#7 + +.L256_loop: + and r5,lr,r9,lsr#24 + and r7,lr,r9,lsr#16 + ldrb r5,[r10,r5] + and r8,lr,r9,lsr#8 + ldrb r7,[r10,r7] + and r9,lr,r9 + ldrb r8,[r10,r8] + orr r5,r5,r7,lsl#24 + ldrb r9,[r10,r9] + orr r5,r5,r8,lsl#16 + ldr r4,[r6],#4 @ rcon[i++] + orr r5,r5,r9,lsl#8 + eor r9,r5,r4 + eor r0,r0,r9 @ rk[8]=rk[0]^... + eor r1,r1,r0 @ rk[9]=rk[1]^rk[8] + str r0,[r11],#32 + eor r2,r2,r1 @ rk[10]=rk[2]^rk[9] + str r1,[r11,#-28] + eor r3,r3,r2 @ rk[11]=rk[3]^rk[10] + str r2,[r11,#-24] + subs r12,r12,#1 + str r3,[r11,#-20] +#if __ARM_ARCH__>=7 + itt eq @ Thumb2 thing, sanity check in ARM +#endif + subeq r2,r11,#256 + beq .Ldone + + and r5,lr,r3 + and r7,lr,r3,lsr#8 + ldrb r5,[r10,r5] + and r8,lr,r3,lsr#16 + ldrb r7,[r10,r7] + and r9,lr,r3,lsr#24 + ldrb r8,[r10,r8] + orr r5,r5,r7,lsl#8 + ldrb r9,[r10,r9] + orr r5,r5,r8,lsl#16 + ldr r4,[r11,#-48] + orr r5,r5,r9,lsl#24 + + ldr r7,[r11,#-44] + ldr r8,[r11,#-40] + eor r4,r4,r5 @ rk[12]=rk[4]^... + ldr r9,[r11,#-36] + eor r7,r7,r4 @ rk[13]=rk[5]^rk[12] + str r4,[r11,#-16] + eor r8,r8,r7 @ rk[14]=rk[6]^rk[13] + str r7,[r11,#-12] + eor r9,r9,r8 @ rk[15]=rk[7]^rk[14] + str r8,[r11,#-8] + str r9,[r11,#-4] + b .L256_loop + +.align 2 +.Ldone: mov r0,#0 + ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} +.Labrt: +#if __ARM_ARCH__>=5 + bx lr @ .word 0xe12fff1e +#else + tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet +.word 0xe12fff1e @ interoperable with Thumb ISA:-) +#endif +.size asm_AES_set_encrypt_key,.-asm_AES_set_encrypt_key + +.globl asm_AES_set_decrypt_key +.hidden asm_AES_set_decrypt_key +.type asm_AES_set_decrypt_key,%function +.align 5 +asm_AES_set_decrypt_key: + str lr,[sp,#-4]! @ push lr + bl _armv4_AES_set_encrypt_key + teq r0,#0 + ldr lr,[sp],#4 @ pop lr + bne .Labrt + + mov r0,r2 @ asm_AES_set_encrypt_key preserves r2, + mov r1,r2 @ which is AES_KEY *key + b _armv4_AES_set_enc2dec_key +.size asm_AES_set_decrypt_key,.-asm_AES_set_decrypt_key + +@ void AES_set_enc2dec_key(const AES_KEY *inp,AES_KEY *out) +.globl AES_set_enc2dec_key +.hidden AES_set_enc2dec_key +.type AES_set_enc2dec_key,%function +.align 5 +AES_set_enc2dec_key: +_armv4_AES_set_enc2dec_key: + stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} + + ldr r12,[r0,#240] + mov r7,r0 @ input + add r8,r0,r12,lsl#4 + mov r11,r1 @ ouput + add r10,r1,r12,lsl#4 + str r12,[r1,#240] + +.Linv: ldr r0,[r7],#16 + ldr r1,[r7,#-12] + ldr r2,[r7,#-8] + ldr r3,[r7,#-4] + ldr r4,[r8],#-16 + ldr r5,[r8,#16+4] + ldr r6,[r8,#16+8] + ldr r9,[r8,#16+12] + str r0,[r10],#-16 + str r1,[r10,#16+4] + str r2,[r10,#16+8] + str r3,[r10,#16+12] + str r4,[r11],#16 + str r5,[r11,#-12] + str r6,[r11,#-8] + str r9,[r11,#-4] + teq r7,r8 + bne .Linv + + ldr r0,[r7] + ldr r1,[r7,#4] + ldr r2,[r7,#8] + ldr r3,[r7,#12] + str r0,[r11] + str r1,[r11,#4] + str r2,[r11,#8] + str r3,[r11,#12] + sub r11,r11,r12,lsl#3 + ldr r0,[r11,#16]! @ prefetch tp1 + mov r7,#0x80 + mov r8,#0x1b + orr r7,r7,#0x8000 + orr r8,r8,#0x1b00 + orr r7,r7,r7,lsl#16 + orr r8,r8,r8,lsl#16 + sub r12,r12,#1 + mvn r9,r7 + mov r12,r12,lsl#2 @ (rounds-1)*4 + +.Lmix: and r4,r0,r7 + and r1,r0,r9 + sub r4,r4,r4,lsr#7 + and r4,r4,r8 + eor r1,r4,r1,lsl#1 @ tp2 + + and r4,r1,r7 + and r2,r1,r9 + sub r4,r4,r4,lsr#7 + and r4,r4,r8 + eor r2,r4,r2,lsl#1 @ tp4 + + and r4,r2,r7 + and r3,r2,r9 + sub r4,r4,r4,lsr#7 + and r4,r4,r8 + eor r3,r4,r3,lsl#1 @ tp8 + + eor r4,r1,r2 + eor r5,r0,r3 @ tp9 + eor r4,r4,r3 @ tpe + eor r4,r4,r1,ror#24 + eor r4,r4,r5,ror#24 @ ^= ROTATE(tpb=tp9^tp2,8) + eor r4,r4,r2,ror#16 + eor r4,r4,r5,ror#16 @ ^= ROTATE(tpd=tp9^tp4,16) + eor r4,r4,r5,ror#8 @ ^= ROTATE(tp9,24) + + ldr r0,[r11,#4] @ prefetch tp1 + str r4,[r11],#4 + subs r12,r12,#1 + bne .Lmix + + mov r0,#0 +#if __ARM_ARCH__>=5 + ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc} +#else + ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} + tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet +.word 0xe12fff1e @ interoperable with Thumb ISA:-) +#endif +.size AES_set_enc2dec_key,.-AES_set_enc2dec_key + +.type AES_Td,%object +.align 5 +AES_Td: +.word 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96 +.word 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393 +.word 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25 +.word 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f +.word 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1 +.word 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6 +.word 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da +.word 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844 +.word 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd +.word 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4 +.word 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45 +.word 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94 +.word 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7 +.word 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a +.word 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5 +.word 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c +.word 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1 +.word 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a +.word 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75 +.word 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051 +.word 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46 +.word 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff +.word 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77 +.word 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb +.word 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000 +.word 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e +.word 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927 +.word 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a +.word 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e +.word 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16 +.word 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d +.word 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8 +.word 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd +.word 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34 +.word 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163 +.word 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120 +.word 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d +.word 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0 +.word 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422 +.word 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef +.word 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36 +.word 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4 +.word 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662 +.word 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5 +.word 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3 +.word 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b +.word 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8 +.word 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6 +.word 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6 +.word 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0 +.word 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815 +.word 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f +.word 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df +.word 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f +.word 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e +.word 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713 +.word 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89 +.word 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c +.word 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf +.word 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86 +.word 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f +.word 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541 +.word 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190 +.word 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742 +@ Td4[256] +.byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 +.byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb +.byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87 +.byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb +.byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d +.byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e +.byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2 +.byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 +.byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16 +.byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 +.byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda +.byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 +.byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a +.byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 +.byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02 +.byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b +.byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea +.byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 +.byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85 +.byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e +.byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89 +.byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b +.byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20 +.byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 +.byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31 +.byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f +.byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d +.byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef +.byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0 +.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 +.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 +.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d +.size AES_Td,.-AES_Td + +@ void asm_AES_decrypt(const unsigned char *in, unsigned char *out, +@ const AES_KEY *key) { +.globl asm_AES_decrypt +.hidden asm_AES_decrypt +.type asm_AES_decrypt,%function +.align 5 +asm_AES_decrypt: +#if __ARM_ARCH__<7 + sub r3,pc,#8 @ asm_AES_decrypt +#else + adr r3,asm_AES_decrypt +#endif + stmdb sp!,{r1,r4-r12,lr} +#ifdef __APPLE__ + adr r10,AES_Td +#else + sub r10,r3,#asm_AES_decrypt-AES_Td @ Td +#endif + mov r12,r0 @ inp + mov r11,r2 +#if __ARM_ARCH__<7 + ldrb r0,[r12,#3] @ load input data in endian-neutral + ldrb r4,[r12,#2] @ manner... + ldrb r5,[r12,#1] + ldrb r6,[r12,#0] + orr r0,r0,r4,lsl#8 + ldrb r1,[r12,#7] + orr r0,r0,r5,lsl#16 + ldrb r4,[r12,#6] + orr r0,r0,r6,lsl#24 + ldrb r5,[r12,#5] + ldrb r6,[r12,#4] + orr r1,r1,r4,lsl#8 + ldrb r2,[r12,#11] + orr r1,r1,r5,lsl#16 + ldrb r4,[r12,#10] + orr r1,r1,r6,lsl#24 + ldrb r5,[r12,#9] + ldrb r6,[r12,#8] + orr r2,r2,r4,lsl#8 + ldrb r3,[r12,#15] + orr r2,r2,r5,lsl#16 + ldrb r4,[r12,#14] + orr r2,r2,r6,lsl#24 + ldrb r5,[r12,#13] + ldrb r6,[r12,#12] + orr r3,r3,r4,lsl#8 + orr r3,r3,r5,lsl#16 + orr r3,r3,r6,lsl#24 +#else + ldr r0,[r12,#0] + ldr r1,[r12,#4] + ldr r2,[r12,#8] + ldr r3,[r12,#12] +#ifdef __ARMEL__ + rev r0,r0 + rev r1,r1 + rev r2,r2 + rev r3,r3 +#endif +#endif + bl _armv4_AES_decrypt + + ldr r12,[sp],#4 @ pop out +#if __ARM_ARCH__>=7 +#ifdef __ARMEL__ + rev r0,r0 + rev r1,r1 + rev r2,r2 + rev r3,r3 +#endif + str r0,[r12,#0] + str r1,[r12,#4] + str r2,[r12,#8] + str r3,[r12,#12] +#else + mov r4,r0,lsr#24 @ write output in endian-neutral + mov r5,r0,lsr#16 @ manner... + mov r6,r0,lsr#8 + strb r4,[r12,#0] + strb r5,[r12,#1] + mov r4,r1,lsr#24 + strb r6,[r12,#2] + mov r5,r1,lsr#16 + strb r0,[r12,#3] + mov r6,r1,lsr#8 + strb r4,[r12,#4] + strb r5,[r12,#5] + mov r4,r2,lsr#24 + strb r6,[r12,#6] + mov r5,r2,lsr#16 + strb r1,[r12,#7] + mov r6,r2,lsr#8 + strb r4,[r12,#8] + strb r5,[r12,#9] + mov r4,r3,lsr#24 + strb r6,[r12,#10] + mov r5,r3,lsr#16 + strb r2,[r12,#11] + mov r6,r3,lsr#8 + strb r4,[r12,#12] + strb r5,[r12,#13] + strb r6,[r12,#14] + strb r3,[r12,#15] +#endif +#if __ARM_ARCH__>=5 + ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc} +#else + ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} + tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet +.word 0xe12fff1e @ interoperable with Thumb ISA:-) +#endif +.size asm_AES_decrypt,.-asm_AES_decrypt + +.type _armv4_AES_decrypt,%function +.align 2 +_armv4_AES_decrypt: + str lr,[sp,#-4]! @ push lr + ldmia r11!,{r4,r5,r6,r7} + eor r0,r0,r4 + ldr r12,[r11,#240-16] + eor r1,r1,r5 + eor r2,r2,r6 + eor r3,r3,r7 + sub r12,r12,#1 + mov lr,#255 + + and r7,lr,r0,lsr#16 + and r8,lr,r0,lsr#8 + and r9,lr,r0 + mov r0,r0,lsr#24 +.Ldec_loop: + ldr r4,[r10,r7,lsl#2] @ Td1[s0>>16] + and r7,lr,r1 @ i0 + ldr r5,[r10,r8,lsl#2] @ Td2[s0>>8] + and r8,lr,r1,lsr#16 + ldr r6,[r10,r9,lsl#2] @ Td3[s0>>0] + and r9,lr,r1,lsr#8 + ldr r0,[r10,r0,lsl#2] @ Td0[s0>>24] + mov r1,r1,lsr#24 + + ldr r7,[r10,r7,lsl#2] @ Td3[s1>>0] + ldr r8,[r10,r8,lsl#2] @ Td1[s1>>16] + ldr r9,[r10,r9,lsl#2] @ Td2[s1>>8] + eor r0,r0,r7,ror#24 + ldr r1,[r10,r1,lsl#2] @ Td0[s1>>24] + and r7,lr,r2,lsr#8 @ i0 + eor r5,r8,r5,ror#8 + and r8,lr,r2 @ i1 + eor r6,r9,r6,ror#8 + and r9,lr,r2,lsr#16 + ldr r7,[r10,r7,lsl#2] @ Td2[s2>>8] + eor r1,r1,r4,ror#8 + ldr r8,[r10,r8,lsl#2] @ Td3[s2>>0] + mov r2,r2,lsr#24 + + ldr r9,[r10,r9,lsl#2] @ Td1[s2>>16] + eor r0,r0,r7,ror#16 + ldr r2,[r10,r2,lsl#2] @ Td0[s2>>24] + and r7,lr,r3,lsr#16 @ i0 + eor r1,r1,r8,ror#24 + and r8,lr,r3,lsr#8 @ i1 + eor r6,r9,r6,ror#8 + and r9,lr,r3 @ i2 + ldr r7,[r10,r7,lsl#2] @ Td1[s3>>16] + eor r2,r2,r5,ror#8 + ldr r8,[r10,r8,lsl#2] @ Td2[s3>>8] + mov r3,r3,lsr#24 + + ldr r9,[r10,r9,lsl#2] @ Td3[s3>>0] + eor r0,r0,r7,ror#8 + ldr r7,[r11],#16 + eor r1,r1,r8,ror#16 + ldr r3,[r10,r3,lsl#2] @ Td0[s3>>24] + eor r2,r2,r9,ror#24 + + ldr r4,[r11,#-12] + eor r0,r0,r7 + ldr r5,[r11,#-8] + eor r3,r3,r6,ror#8 + ldr r6,[r11,#-4] + and r7,lr,r0,lsr#16 + eor r1,r1,r4 + and r8,lr,r0,lsr#8 + eor r2,r2,r5 + and r9,lr,r0 + eor r3,r3,r6 + mov r0,r0,lsr#24 + + subs r12,r12,#1 + bne .Ldec_loop + + add r10,r10,#1024 + + ldr r5,[r10,#0] @ prefetch Td4 + ldr r6,[r10,#32] + ldr r4,[r10,#64] + ldr r5,[r10,#96] + ldr r6,[r10,#128] + ldr r4,[r10,#160] + ldr r5,[r10,#192] + ldr r6,[r10,#224] + + ldrb r0,[r10,r0] @ Td4[s0>>24] + ldrb r4,[r10,r7] @ Td4[s0>>16] + and r7,lr,r1 @ i0 + ldrb r5,[r10,r8] @ Td4[s0>>8] + and r8,lr,r1,lsr#16 + ldrb r6,[r10,r9] @ Td4[s0>>0] + and r9,lr,r1,lsr#8 + + add r1,r10,r1,lsr#24 + ldrb r7,[r10,r7] @ Td4[s1>>0] + ldrb r1,[r1] @ Td4[s1>>24] + ldrb r8,[r10,r8] @ Td4[s1>>16] + eor r0,r7,r0,lsl#24 + ldrb r9,[r10,r9] @ Td4[s1>>8] + eor r1,r4,r1,lsl#8 + and r7,lr,r2,lsr#8 @ i0 + eor r5,r5,r8,lsl#8 + and r8,lr,r2 @ i1 + ldrb r7,[r10,r7] @ Td4[s2>>8] + eor r6,r6,r9,lsl#8 + ldrb r8,[r10,r8] @ Td4[s2>>0] + and r9,lr,r2,lsr#16 + + add r2,r10,r2,lsr#24 + ldrb r2,[r2] @ Td4[s2>>24] + eor r0,r0,r7,lsl#8 + ldrb r9,[r10,r9] @ Td4[s2>>16] + eor r1,r8,r1,lsl#16 + and r7,lr,r3,lsr#16 @ i0 + eor r2,r5,r2,lsl#16 + and r8,lr,r3,lsr#8 @ i1 + ldrb r7,[r10,r7] @ Td4[s3>>16] + eor r6,r6,r9,lsl#16 + ldrb r8,[r10,r8] @ Td4[s3>>8] + and r9,lr,r3 @ i2 + + add r3,r10,r3,lsr#24 + ldrb r9,[r10,r9] @ Td4[s3>>0] + ldrb r3,[r3] @ Td4[s3>>24] + eor r0,r0,r7,lsl#16 + ldr r7,[r11,#0] + eor r1,r1,r8,lsl#8 + ldr r4,[r11,#4] + eor r2,r9,r2,lsl#8 + ldr r5,[r11,#8] + eor r3,r6,r3,lsl#24 + ldr r6,[r11,#12] + + eor r0,r0,r7 + eor r1,r1,r4 + eor r2,r2,r5 + eor r3,r3,r6 + + sub r10,r10,#1024 + ldr pc,[sp],#4 @ pop and return +.size _armv4_AES_decrypt,.-_armv4_AES_decrypt +.byte 65,69,83,32,102,111,114,32,65,82,77,118,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 2 +.align 2 + +#endif +#endif \ No newline at end of file diff --git a/third_party/boringssl/linux-arm/crypto/aes/aesv8-armx32.S b/third_party/boringssl/linux-arm/crypto/aes/aesv8-armx32.S new file mode 100644 index 00000000000..6f0ee7d81da --- /dev/null +++ b/third_party/boringssl/linux-arm/crypto/aes/aesv8-armx32.S @@ -0,0 +1,756 @@ +#if defined(__arm__) +#include "arm_arch.h" + +#if __ARM_MAX_ARCH__>=7 +.text +.arch armv7-a +.fpu neon +.code 32 +.align 5 +.Lrcon: +.long 0x01,0x01,0x01,0x01 +.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d @ rotate-n-splat +.long 0x1b,0x1b,0x1b,0x1b + +.globl aes_v8_set_encrypt_key +.type aes_v8_set_encrypt_key,%function +.align 5 +aes_v8_set_encrypt_key: +.Lenc_key: + mov r3,#-1 + cmp r0,#0 + beq .Lenc_key_abort + cmp r2,#0 + beq .Lenc_key_abort + mov r3,#-2 + cmp r1,#128 + blt .Lenc_key_abort + cmp r1,#256 + bgt .Lenc_key_abort + tst r1,#0x3f + bne .Lenc_key_abort + + adr r3,.Lrcon + cmp r1,#192 + + veor q0,q0,q0 + vld1.8 {q3},[r0]! + mov r1,#8 @ reuse r1 + vld1.32 {q1,q2},[r3]! + + blt .Loop128 + beq .L192 + b .L256 + +.align 4 +.Loop128: + vtbl.8 d20,{q3},d4 + vtbl.8 d21,{q3},d5 + vext.8 q9,q0,q3,#12 + vst1.32 {q3},[r2]! +.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 + subs r1,r1,#1 + + veor q3,q3,q9 + vext.8 q9,q0,q9,#12 + veor q3,q3,q9 + vext.8 q9,q0,q9,#12 + veor q10,q10,q1 + veor q3,q3,q9 + vshl.u8 q1,q1,#1 + veor q3,q3,q10 + bne .Loop128 + + vld1.32 {q1},[r3] + + vtbl.8 d20,{q3},d4 + vtbl.8 d21,{q3},d5 + vext.8 q9,q0,q3,#12 + vst1.32 {q3},[r2]! +.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 + + veor q3,q3,q9 + vext.8 q9,q0,q9,#12 + veor q3,q3,q9 + vext.8 q9,q0,q9,#12 + veor q10,q10,q1 + veor q3,q3,q9 + vshl.u8 q1,q1,#1 + veor q3,q3,q10 + + vtbl.8 d20,{q3},d4 + vtbl.8 d21,{q3},d5 + vext.8 q9,q0,q3,#12 + vst1.32 {q3},[r2]! +.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 + + veor q3,q3,q9 + vext.8 q9,q0,q9,#12 + veor q3,q3,q9 + vext.8 q9,q0,q9,#12 + veor q10,q10,q1 + veor q3,q3,q9 + veor q3,q3,q10 + vst1.32 {q3},[r2] + add r2,r2,#0x50 + + mov r12,#10 + b .Ldone + +.align 4 +.L192: + vld1.8 {d16},[r0]! + vmov.i8 q10,#8 @ borrow q10 + vst1.32 {q3},[r2]! + vsub.i8 q2,q2,q10 @ adjust the mask + +.Loop192: + vtbl.8 d20,{q8},d4 + vtbl.8 d21,{q8},d5 + vext.8 q9,q0,q3,#12 + vst1.32 {d16},[r2]! +.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 + subs r1,r1,#1 + + veor q3,q3,q9 + vext.8 q9,q0,q9,#12 + veor q3,q3,q9 + vext.8 q9,q0,q9,#12 + veor q3,q3,q9 + + vdup.32 q9,d7[1] + veor q9,q9,q8 + veor q10,q10,q1 + vext.8 q8,q0,q8,#12 + vshl.u8 q1,q1,#1 + veor q8,q8,q9 + veor q3,q3,q10 + veor q8,q8,q10 + vst1.32 {q3},[r2]! + bne .Loop192 + + mov r12,#12 + add r2,r2,#0x20 + b .Ldone + +.align 4 +.L256: + vld1.8 {q8},[r0] + mov r1,#7 + mov r12,#14 + vst1.32 {q3},[r2]! + +.Loop256: + vtbl.8 d20,{q8},d4 + vtbl.8 d21,{q8},d5 + vext.8 q9,q0,q3,#12 + vst1.32 {q8},[r2]! +.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 + subs r1,r1,#1 + + veor q3,q3,q9 + vext.8 q9,q0,q9,#12 + veor q3,q3,q9 + vext.8 q9,q0,q9,#12 + veor q10,q10,q1 + veor q3,q3,q9 + vshl.u8 q1,q1,#1 + veor q3,q3,q10 + vst1.32 {q3},[r2]! + beq .Ldone + + vdup.32 q10,d7[1] + vext.8 q9,q0,q8,#12 +.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 + + veor q8,q8,q9 + vext.8 q9,q0,q9,#12 + veor q8,q8,q9 + vext.8 q9,q0,q9,#12 + veor q8,q8,q9 + + veor q8,q8,q10 + b .Loop256 + +.Ldone: + str r12,[r2] + mov r3,#0 + +.Lenc_key_abort: + mov r0,r3 @ return value + + bx lr +.size aes_v8_set_encrypt_key,.-aes_v8_set_encrypt_key + +.globl aes_v8_set_decrypt_key +.type aes_v8_set_decrypt_key,%function +.align 5 +aes_v8_set_decrypt_key: + stmdb sp!,{r4,lr} + bl .Lenc_key + + cmp r0,#0 + bne .Ldec_key_abort + + sub r2,r2,#240 @ restore original r2 + mov r4,#-16 + add r0,r2,r12,lsl#4 @ end of key schedule + + vld1.32 {q0},[r2] + vld1.32 {q1},[r0] + vst1.32 {q0},[r0],r4 + vst1.32 {q1},[r2]! + +.Loop_imc: + vld1.32 {q0},[r2] + vld1.32 {q1},[r0] +.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 +.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 + vst1.32 {q0},[r0],r4 + vst1.32 {q1},[r2]! + cmp r0,r2 + bhi .Loop_imc + + vld1.32 {q0},[r2] +.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 + vst1.32 {q0},[r0] + + eor r0,r0,r0 @ return value +.Ldec_key_abort: + ldmia sp!,{r4,pc} +.size aes_v8_set_decrypt_key,.-aes_v8_set_decrypt_key +.globl aes_v8_encrypt +.type aes_v8_encrypt,%function +.align 5 +aes_v8_encrypt: + ldr r3,[r2,#240] + vld1.32 {q0},[r2]! + vld1.8 {q2},[r0] + sub r3,r3,#2 + vld1.32 {q1},[r2]! + +.Loop_enc: +.byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0 +.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 + vld1.32 {q0},[r2]! + subs r3,r3,#2 +.byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1 +.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 + vld1.32 {q1},[r2]! + bgt .Loop_enc + +.byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0 +.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 + vld1.32 {q0},[r2] +.byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1 + veor q2,q2,q0 + + vst1.8 {q2},[r1] + bx lr +.size aes_v8_encrypt,.-aes_v8_encrypt +.globl aes_v8_decrypt +.type aes_v8_decrypt,%function +.align 5 +aes_v8_decrypt: + ldr r3,[r2,#240] + vld1.32 {q0},[r2]! + vld1.8 {q2},[r0] + sub r3,r3,#2 + vld1.32 {q1},[r2]! + +.Loop_dec: +.byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0 +.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2 + vld1.32 {q0},[r2]! + subs r3,r3,#2 +.byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1 +.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2 + vld1.32 {q1},[r2]! + bgt .Loop_dec + +.byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0 +.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2 + vld1.32 {q0},[r2] +.byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1 + veor q2,q2,q0 + + vst1.8 {q2},[r1] + bx lr +.size aes_v8_decrypt,.-aes_v8_decrypt +.globl aes_v8_cbc_encrypt +.type aes_v8_cbc_encrypt,%function +.align 5 +aes_v8_cbc_encrypt: + mov ip,sp + stmdb sp!,{r4,r5,r6,r7,r8,lr} + vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so + ldmia ip,{r4,r5} @ load remaining args + subs r2,r2,#16 + mov r8,#16 + blo .Lcbc_abort + moveq r8,#0 + + cmp r5,#0 @ en- or decrypting? + ldr r5,[r3,#240] + and r2,r2,#-16 + vld1.8 {q6},[r4] + vld1.8 {q0},[r0],r8 + + vld1.32 {q8,q9},[r3] @ load key schedule... + sub r5,r5,#6 + add r7,r3,r5,lsl#4 @ pointer to last 7 round keys + sub r5,r5,#2 + vld1.32 {q10,q11},[r7]! + vld1.32 {q12,q13},[r7]! + vld1.32 {q14,q15},[r7]! + vld1.32 {q7},[r7] + + add r7,r3,#32 + mov r6,r5 + beq .Lcbc_dec + + cmp r5,#2 + veor q0,q0,q6 + veor q5,q8,q7 + beq .Lcbc_enc128 + + vld1.32 {q2,q3},[r7] + add r7,r3,#16 + add r6,r3,#16*4 + add r12,r3,#16*5 +.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + add r14,r3,#16*6 + add r3,r3,#16*7 + b .Lenter_cbc_enc + +.align 4 +.Loop_cbc_enc: +.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + vst1.8 {q6},[r1]! +.Lenter_cbc_enc: +.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 +.byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + vld1.32 {q8},[r6] + cmp r5,#4 +.byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + vld1.32 {q9},[r12] + beq .Lcbc_enc192 + +.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + vld1.32 {q8},[r14] +.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + vld1.32 {q9},[r3] + nop + +.Lcbc_enc192: +.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + subs r2,r2,#16 +.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + moveq r8,#0 +.byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 +.byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + vld1.8 {q8},[r0],r8 +.byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + veor q8,q8,q5 +.byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + vld1.32 {q9},[r7] @ re-pre-load rndkey[1] +.byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 +.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15 + veor q6,q0,q7 + bhs .Loop_cbc_enc + + vst1.8 {q6},[r1]! + b .Lcbc_done + +.align 5 +.Lcbc_enc128: + vld1.32 {q2,q3},[r7] +.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + b .Lenter_cbc_enc128 +.Loop_cbc_enc128: +.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + vst1.8 {q6},[r1]! +.Lenter_cbc_enc128: +.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + subs r2,r2,#16 +.byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + moveq r8,#0 +.byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 +.byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 +.byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + vld1.8 {q8},[r0],r8 +.byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 +.byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 +.byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + veor q8,q8,q5 +.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15 + veor q6,q0,q7 + bhs .Loop_cbc_enc128 + + vst1.8 {q6},[r1]! + b .Lcbc_done +.align 5 +.Lcbc_dec: + vld1.8 {q10},[r0]! + subs r2,r2,#32 @ bias + add r6,r5,#2 + vorr q3,q0,q0 + vorr q1,q0,q0 + vorr q11,q10,q10 + blo .Lcbc_dec_tail + + vorr q1,q10,q10 + vld1.8 {q10},[r0]! + vorr q2,q0,q0 + vorr q3,q1,q1 + vorr q11,q10,q10 + +.Loop3x_cbc_dec: +.byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8 +.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 +.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 +.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 +.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 +.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 + vld1.32 {q8},[r7]! + subs r6,r6,#2 +.byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9 +.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 +.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 +.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 +.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 +.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 + vld1.32 {q9},[r7]! + bgt .Loop3x_cbc_dec + +.byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8 +.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 +.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 +.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 +.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 +.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 + veor q4,q6,q7 + subs r2,r2,#0x30 + veor q5,q2,q7 + movlo r6,r2 @ r6, r6, is zero at this point +.byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9 +.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 +.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 +.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 +.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 +.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 + veor q9,q3,q7 + add r0,r0,r6 @ r0 is adjusted in such way that + @ at exit from the loop q1-q10 + @ are loaded with last "words" + vorr q6,q11,q11 + mov r7,r3 +.byte 0x68,0x03,0xb0,0xf3 @ aesd q0,q12 +.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 +.byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12 +.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 +.byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12 +.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 + vld1.8 {q2},[r0]! +.byte 0x6a,0x03,0xb0,0xf3 @ aesd q0,q13 +.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 +.byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13 +.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 +.byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13 +.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 + vld1.8 {q3},[r0]! +.byte 0x6c,0x03,0xb0,0xf3 @ aesd q0,q14 +.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 +.byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14 +.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 +.byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14 +.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 + vld1.8 {q11},[r0]! +.byte 0x6e,0x03,0xb0,0xf3 @ aesd q0,q15 +.byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15 +.byte 0x6e,0x43,0xf0,0xf3 @ aesd q10,q15 + vld1.32 {q8},[r7]! @ re-pre-load rndkey[0] + add r6,r5,#2 + veor q4,q4,q0 + veor q5,q5,q1 + veor q10,q10,q9 + vld1.32 {q9},[r7]! @ re-pre-load rndkey[1] + vst1.8 {q4},[r1]! + vorr q0,q2,q2 + vst1.8 {q5},[r1]! + vorr q1,q3,q3 + vst1.8 {q10},[r1]! + vorr q10,q11,q11 + bhs .Loop3x_cbc_dec + + cmn r2,#0x30 + beq .Lcbc_done + nop + +.Lcbc_dec_tail: +.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 +.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 +.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 +.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 + vld1.32 {q8},[r7]! + subs r6,r6,#2 +.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 +.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 +.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 +.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 + vld1.32 {q9},[r7]! + bgt .Lcbc_dec_tail + +.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 +.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 +.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 +.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 +.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 +.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 +.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 +.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 +.byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12 +.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 +.byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12 +.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 + cmn r2,#0x20 +.byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13 +.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 +.byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13 +.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 + veor q5,q6,q7 +.byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14 +.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 +.byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14 +.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 + veor q9,q3,q7 +.byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15 +.byte 0x6e,0x43,0xf0,0xf3 @ aesd q10,q15 + beq .Lcbc_dec_one + veor q5,q5,q1 + veor q9,q9,q10 + vorr q6,q11,q11 + vst1.8 {q5},[r1]! + vst1.8 {q9},[r1]! + b .Lcbc_done + +.Lcbc_dec_one: + veor q5,q5,q10 + vorr q6,q11,q11 + vst1.8 {q5},[r1]! + +.Lcbc_done: + vst1.8 {q6},[r4] +.Lcbc_abort: + vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15} + ldmia sp!,{r4,r5,r6,r7,r8,pc} +.size aes_v8_cbc_encrypt,.-aes_v8_cbc_encrypt +.globl aes_v8_ctr32_encrypt_blocks +.type aes_v8_ctr32_encrypt_blocks,%function +.align 5 +aes_v8_ctr32_encrypt_blocks: + mov ip,sp + stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,lr} + vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so + ldr r4, [ip] @ load remaining arg + ldr r5,[r3,#240] + + ldr r8, [r4, #12] + vld1.32 {q0},[r4] + + vld1.32 {q8,q9},[r3] @ load key schedule... + sub r5,r5,#4 + mov r12,#16 + cmp r2,#2 + add r7,r3,r5,lsl#4 @ pointer to last 5 round keys + sub r5,r5,#2 + vld1.32 {q12,q13},[r7]! + vld1.32 {q14,q15},[r7]! + vld1.32 {q7},[r7] + add r7,r3,#32 + mov r6,r5 + movlo r12,#0 +#ifndef __ARMEB__ + rev r8, r8 +#endif + vorr q1,q0,q0 + add r10, r8, #1 + vorr q10,q0,q0 + add r8, r8, #2 + vorr q6,q0,q0 + rev r10, r10 + vmov.32 d3[1],r10 + bls .Lctr32_tail + rev r12, r8 + sub r2,r2,#3 @ bias + vmov.32 d21[1],r12 + b .Loop3x_ctr32 + +.align 4 +.Loop3x_ctr32: +.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 +.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 +.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 +.byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8 +.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10 + vld1.32 {q8},[r7]! + subs r6,r6,#2 +.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 +.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9 +.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 +.byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9 +.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10 + vld1.32 {q9},[r7]! + bgt .Loop3x_ctr32 + +.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 +.byte 0x80,0x83,0xb0,0xf3 @ aesmc q4,q0 +.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 +.byte 0x82,0xa3,0xb0,0xf3 @ aesmc q5,q1 + vld1.8 {q2},[r0]! + vorr q0,q6,q6 +.byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8 +.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10 + vld1.8 {q3},[r0]! + vorr q1,q6,q6 +.byte 0x22,0x83,0xb0,0xf3 @ aese q4,q9 +.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 +.byte 0x22,0xa3,0xb0,0xf3 @ aese q5,q9 +.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 + vld1.8 {q11},[r0]! + mov r7,r3 +.byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9 +.byte 0xa4,0x23,0xf0,0xf3 @ aesmc q9,q10 + vorr q10,q6,q6 + add r9,r8,#1 +.byte 0x28,0x83,0xb0,0xf3 @ aese q4,q12 +.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 +.byte 0x28,0xa3,0xb0,0xf3 @ aese q5,q12 +.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 + veor q2,q2,q7 + add r10,r8,#2 +.byte 0x28,0x23,0xf0,0xf3 @ aese q9,q12 +.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9 + veor q3,q3,q7 + add r8,r8,#3 +.byte 0x2a,0x83,0xb0,0xf3 @ aese q4,q13 +.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 +.byte 0x2a,0xa3,0xb0,0xf3 @ aese q5,q13 +.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 + veor q11,q11,q7 + rev r9,r9 +.byte 0x2a,0x23,0xf0,0xf3 @ aese q9,q13 +.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9 + vmov.32 d1[1], r9 + rev r10,r10 +.byte 0x2c,0x83,0xb0,0xf3 @ aese q4,q14 +.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 +.byte 0x2c,0xa3,0xb0,0xf3 @ aese q5,q14 +.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 + vmov.32 d3[1], r10 + rev r12,r8 +.byte 0x2c,0x23,0xf0,0xf3 @ aese q9,q14 +.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9 + vmov.32 d21[1], r12 + subs r2,r2,#3 +.byte 0x2e,0x83,0xb0,0xf3 @ aese q4,q15 +.byte 0x2e,0xa3,0xb0,0xf3 @ aese q5,q15 +.byte 0x2e,0x23,0xf0,0xf3 @ aese q9,q15 + + veor q2,q2,q4 + vld1.32 {q8},[r7]! @ re-pre-load rndkey[0] + vst1.8 {q2},[r1]! + veor q3,q3,q5 + mov r6,r5 + vst1.8 {q3},[r1]! + veor q11,q11,q9 + vld1.32 {q9},[r7]! @ re-pre-load rndkey[1] + vst1.8 {q11},[r1]! + bhs .Loop3x_ctr32 + + adds r2,r2,#3 + beq .Lctr32_done + cmp r2,#1 + mov r12,#16 + moveq r12,#0 + +.Lctr32_tail: +.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 +.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 +.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 + vld1.32 {q8},[r7]! + subs r6,r6,#2 +.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 +.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9 +.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 + vld1.32 {q9},[r7]! + bgt .Lctr32_tail + +.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 +.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 +.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 +.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 +.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9 +.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 + vld1.8 {q2},[r0],r12 +.byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 +.byte 0x28,0x23,0xb0,0xf3 @ aese q1,q12 +.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 + vld1.8 {q3},[r0] +.byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 +.byte 0x2a,0x23,0xb0,0xf3 @ aese q1,q13 +.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 + veor q2,q2,q7 +.byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14 +.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 +.byte 0x2c,0x23,0xb0,0xf3 @ aese q1,q14 +.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 + veor q3,q3,q7 +.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15 +.byte 0x2e,0x23,0xb0,0xf3 @ aese q1,q15 + + cmp r2,#1 + veor q2,q2,q0 + veor q3,q3,q1 + vst1.8 {q2},[r1]! + beq .Lctr32_done + vst1.8 {q3},[r1] + +.Lctr32_done: + vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15} + ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,pc} +.size aes_v8_ctr32_encrypt_blocks,.-aes_v8_ctr32_encrypt_blocks +#endif +#endif \ No newline at end of file diff --git a/third_party/boringssl/linux-arm/crypto/aes/bsaes-armv7.S b/third_party/boringssl/linux-arm/crypto/aes/bsaes-armv7.S new file mode 100644 index 00000000000..dd84f3589dd --- /dev/null +++ b/third_party/boringssl/linux-arm/crypto/aes/bsaes-armv7.S @@ -0,0 +1,2579 @@ +#if defined(__arm__) + +@ ==================================================================== +@ Written by Andy Polyakov for the OpenSSL +@ project. The module is, however, dual licensed under OpenSSL and +@ CRYPTOGAMS licenses depending on where you obtain it. For further +@ details see http://www.openssl.org/~appro/cryptogams/. +@ +@ Specific modes and adaptation for Linux kernel by Ard Biesheuvel +@ . Permission to use under GPL terms is +@ granted. +@ ==================================================================== + +@ Bit-sliced AES for ARM NEON +@ +@ February 2012. +@ +@ This implementation is direct adaptation of bsaes-x86_64 module for +@ ARM NEON. Except that this module is endian-neutral [in sense that +@ it can be compiled for either endianness] by courtesy of vld1.8's +@ neutrality. Initial version doesn't implement interface to OpenSSL, +@ only low-level primitives and unsupported entry points, just enough +@ to collect performance results, which for Cortex-A8 core are: +@ +@ encrypt 19.5 cycles per byte processed with 128-bit key +@ decrypt 22.1 cycles per byte processed with 128-bit key +@ key conv. 440 cycles per 128-bit key/0.18 of 8x block +@ +@ Snapdragon S4 encrypts byte in 17.6 cycles and decrypts in 19.7, +@ which is [much] worse than anticipated (for further details see +@ http://www.openssl.org/~appro/Snapdragon-S4.html). +@ +@ Cortex-A15 manages in 14.2/16.1 cycles [when integer-only code +@ manages in 20.0 cycles]. +@ +@ When comparing to x86_64 results keep in mind that NEON unit is +@ [mostly] single-issue and thus can't [fully] benefit from +@ instruction-level parallelism. And when comparing to aes-armv4 +@ results keep in mind key schedule conversion overhead (see +@ bsaes-x86_64.pl for further details)... +@ +@ + +@ April-August 2013 +@ +@ Add CBC, CTR and XTS subroutines, adapt for kernel use. +@ +@ + +#if defined(__arm__) +#ifndef __KERNEL__ +# include "arm_arch.h" + +# define VFP_ABI_PUSH vstmdb sp!,{d8-d15} +# define VFP_ABI_POP vldmia sp!,{d8-d15} +# define VFP_ABI_FRAME 0x40 +#else +# define VFP_ABI_PUSH +# define VFP_ABI_POP +# define VFP_ABI_FRAME 0 +# define BSAES_ASM_EXTENDED_KEY +# define XTS_CHAIN_TWEAK +# define __ARM_ARCH__ __LINUX_ARM_ARCH__ +# define __ARM_MAX_ARCH__ 7 +#endif + +#ifdef __thumb__ +# define adrl adr +#endif + +#if __ARM_MAX_ARCH__>=7 +.arch armv7-a +.fpu neon + +.text +.syntax unified @ ARMv7-capable assembler is expected to handle this +#if defined(__thumb2__) && !defined(__APPLE__) +.thumb +#else +.code 32 +#endif + +.type _bsaes_decrypt8,%function +.align 4 +_bsaes_decrypt8: + adr r6,_bsaes_decrypt8 + vldmia r4!, {q9} @ round 0 key +#ifdef __APPLE__ + adr r6,.LM0ISR +#else + add r6,r6,#.LM0ISR-_bsaes_decrypt8 +#endif + + vldmia r6!, {q8} @ .LM0ISR + veor q10, q0, q9 @ xor with round0 key + veor q11, q1, q9 + vtbl.8 d0, {q10}, d16 + vtbl.8 d1, {q10}, d17 + veor q12, q2, q9 + vtbl.8 d2, {q11}, d16 + vtbl.8 d3, {q11}, d17 + veor q13, q3, q9 + vtbl.8 d4, {q12}, d16 + vtbl.8 d5, {q12}, d17 + veor q14, q4, q9 + vtbl.8 d6, {q13}, d16 + vtbl.8 d7, {q13}, d17 + veor q15, q5, q9 + vtbl.8 d8, {q14}, d16 + vtbl.8 d9, {q14}, d17 + veor q10, q6, q9 + vtbl.8 d10, {q15}, d16 + vtbl.8 d11, {q15}, d17 + veor q11, q7, q9 + vtbl.8 d12, {q10}, d16 + vtbl.8 d13, {q10}, d17 + vtbl.8 d14, {q11}, d16 + vtbl.8 d15, {q11}, d17 + vmov.i8 q8,#0x55 @ compose .LBS0 + vmov.i8 q9,#0x33 @ compose .LBS1 + vshr.u64 q10, q6, #1 + vshr.u64 q11, q4, #1 + veor q10, q10, q7 + veor q11, q11, q5 + vand q10, q10, q8 + vand q11, q11, q8 + veor q7, q7, q10 + vshl.u64 q10, q10, #1 + veor q5, q5, q11 + vshl.u64 q11, q11, #1 + veor q6, q6, q10 + veor q4, q4, q11 + vshr.u64 q10, q2, #1 + vshr.u64 q11, q0, #1 + veor q10, q10, q3 + veor q11, q11, q1 + vand q10, q10, q8 + vand q11, q11, q8 + veor q3, q3, q10 + vshl.u64 q10, q10, #1 + veor q1, q1, q11 + vshl.u64 q11, q11, #1 + veor q2, q2, q10 + veor q0, q0, q11 + vmov.i8 q8,#0x0f @ compose .LBS2 + vshr.u64 q10, q5, #2 + vshr.u64 q11, q4, #2 + veor q10, q10, q7 + veor q11, q11, q6 + vand q10, q10, q9 + vand q11, q11, q9 + veor q7, q7, q10 + vshl.u64 q10, q10, #2 + veor q6, q6, q11 + vshl.u64 q11, q11, #2 + veor q5, q5, q10 + veor q4, q4, q11 + vshr.u64 q10, q1, #2 + vshr.u64 q11, q0, #2 + veor q10, q10, q3 + veor q11, q11, q2 + vand q10, q10, q9 + vand q11, q11, q9 + veor q3, q3, q10 + vshl.u64 q10, q10, #2 + veor q2, q2, q11 + vshl.u64 q11, q11, #2 + veor q1, q1, q10 + veor q0, q0, q11 + vshr.u64 q10, q3, #4 + vshr.u64 q11, q2, #4 + veor q10, q10, q7 + veor q11, q11, q6 + vand q10, q10, q8 + vand q11, q11, q8 + veor q7, q7, q10 + vshl.u64 q10, q10, #4 + veor q6, q6, q11 + vshl.u64 q11, q11, #4 + veor q3, q3, q10 + veor q2, q2, q11 + vshr.u64 q10, q1, #4 + vshr.u64 q11, q0, #4 + veor q10, q10, q5 + veor q11, q11, q4 + vand q10, q10, q8 + vand q11, q11, q8 + veor q5, q5, q10 + vshl.u64 q10, q10, #4 + veor q4, q4, q11 + vshl.u64 q11, q11, #4 + veor q1, q1, q10 + veor q0, q0, q11 + sub r5,r5,#1 + b .Ldec_sbox +.align 4 +.Ldec_loop: + vldmia r4!, {q8,q9,q10,q11} + veor q8, q8, q0 + veor q9, q9, q1 + vtbl.8 d0, {q8}, d24 + vtbl.8 d1, {q8}, d25 + vldmia r4!, {q8} + veor q10, q10, q2 + vtbl.8 d2, {q9}, d24 + vtbl.8 d3, {q9}, d25 + vldmia r4!, {q9} + veor q11, q11, q3 + vtbl.8 d4, {q10}, d24 + vtbl.8 d5, {q10}, d25 + vldmia r4!, {q10} + vtbl.8 d6, {q11}, d24 + vtbl.8 d7, {q11}, d25 + vldmia r4!, {q11} + veor q8, q8, q4 + veor q9, q9, q5 + vtbl.8 d8, {q8}, d24 + vtbl.8 d9, {q8}, d25 + veor q10, q10, q6 + vtbl.8 d10, {q9}, d24 + vtbl.8 d11, {q9}, d25 + veor q11, q11, q7 + vtbl.8 d12, {q10}, d24 + vtbl.8 d13, {q10}, d25 + vtbl.8 d14, {q11}, d24 + vtbl.8 d15, {q11}, d25 +.Ldec_sbox: + veor q1, q1, q4 + veor q3, q3, q4 + + veor q4, q4, q7 + veor q1, q1, q6 + veor q2, q2, q7 + veor q6, q6, q4 + + veor q0, q0, q1 + veor q2, q2, q5 + veor q7, q7, q6 + veor q3, q3, q0 + veor q5, q5, q0 + veor q1, q1, q3 + veor q11, q3, q0 + veor q10, q7, q4 + veor q9, q1, q6 + veor q13, q4, q0 + vmov q8, q10 + veor q12, q5, q2 + + vorr q10, q10, q9 + veor q15, q11, q8 + vand q14, q11, q12 + vorr q11, q11, q12 + veor q12, q12, q9 + vand q8, q8, q9 + veor q9, q6, q2 + vand q15, q15, q12 + vand q13, q13, q9 + veor q9, q3, q7 + veor q12, q1, q5 + veor q11, q11, q13 + veor q10, q10, q13 + vand q13, q9, q12 + vorr q9, q9, q12 + veor q11, q11, q15 + veor q8, q8, q13 + veor q10, q10, q14 + veor q9, q9, q15 + veor q8, q8, q14 + vand q12, q4, q6 + veor q9, q9, q14 + vand q13, q0, q2 + vand q14, q7, q1 + vorr q15, q3, q5 + veor q11, q11, q12 + veor q9, q9, q14 + veor q8, q8, q15 + veor q10, q10, q13 + + @ Inv_GF16 0, 1, 2, 3, s0, s1, s2, s3 + + @ new smaller inversion + + vand q14, q11, q9 + vmov q12, q8 + + veor q13, q10, q14 + veor q15, q8, q14 + veor q14, q8, q14 @ q14=q15 + + vbsl q13, q9, q8 + vbsl q15, q11, q10 + veor q11, q11, q10 + + vbsl q12, q13, q14 + vbsl q8, q14, q13 + + vand q14, q12, q15 + veor q9, q9, q8 + + veor q14, q14, q11 + veor q12, q5, q2 + veor q8, q1, q6 + veor q10, q15, q14 + vand q10, q10, q5 + veor q5, q5, q1 + vand q11, q1, q15 + vand q5, q5, q14 + veor q1, q11, q10 + veor q5, q5, q11 + veor q15, q15, q13 + veor q14, q14, q9 + veor q11, q15, q14 + veor q10, q13, q9 + vand q11, q11, q12 + vand q10, q10, q2 + veor q12, q12, q8 + veor q2, q2, q6 + vand q8, q8, q15 + vand q6, q6, q13 + vand q12, q12, q14 + vand q2, q2, q9 + veor q8, q8, q12 + veor q2, q2, q6 + veor q12, q12, q11 + veor q6, q6, q10 + veor q5, q5, q12 + veor q2, q2, q12 + veor q1, q1, q8 + veor q6, q6, q8 + + veor q12, q3, q0 + veor q8, q7, q4 + veor q11, q15, q14 + veor q10, q13, q9 + vand q11, q11, q12 + vand q10, q10, q0 + veor q12, q12, q8 + veor q0, q0, q4 + vand q8, q8, q15 + vand q4, q4, q13 + vand q12, q12, q14 + vand q0, q0, q9 + veor q8, q8, q12 + veor q0, q0, q4 + veor q12, q12, q11 + veor q4, q4, q10 + veor q15, q15, q13 + veor q14, q14, q9 + veor q10, q15, q14 + vand q10, q10, q3 + veor q3, q3, q7 + vand q11, q7, q15 + vand q3, q3, q14 + veor q7, q11, q10 + veor q3, q3, q11 + veor q3, q3, q12 + veor q0, q0, q12 + veor q7, q7, q8 + veor q4, q4, q8 + veor q1, q1, q7 + veor q6, q6, q5 + + veor q4, q4, q1 + veor q2, q2, q7 + veor q5, q5, q7 + veor q4, q4, q2 + veor q7, q7, q0 + veor q4, q4, q5 + veor q3, q3, q6 + veor q6, q6, q1 + veor q3, q3, q4 + + veor q4, q4, q0 + veor q7, q7, q3 + subs r5,r5,#1 + bcc .Ldec_done + @ multiplication by 0x05-0x00-0x04-0x00 + vext.8 q8, q0, q0, #8 + vext.8 q14, q3, q3, #8 + vext.8 q15, q5, q5, #8 + veor q8, q8, q0 + vext.8 q9, q1, q1, #8 + veor q14, q14, q3 + vext.8 q10, q6, q6, #8 + veor q15, q15, q5 + vext.8 q11, q4, q4, #8 + veor q9, q9, q1 + vext.8 q12, q2, q2, #8 + veor q10, q10, q6 + vext.8 q13, q7, q7, #8 + veor q11, q11, q4 + veor q12, q12, q2 + veor q13, q13, q7 + + veor q0, q0, q14 + veor q1, q1, q14 + veor q6, q6, q8 + veor q2, q2, q10 + veor q4, q4, q9 + veor q1, q1, q15 + veor q6, q6, q15 + veor q2, q2, q14 + veor q7, q7, q11 + veor q4, q4, q14 + veor q3, q3, q12 + veor q2, q2, q15 + veor q7, q7, q15 + veor q5, q5, q13 + vext.8 q8, q0, q0, #12 @ x0 <<< 32 + vext.8 q9, q1, q1, #12 + veor q0, q0, q8 @ x0 ^ (x0 <<< 32) + vext.8 q10, q6, q6, #12 + veor q1, q1, q9 + vext.8 q11, q4, q4, #12 + veor q6, q6, q10 + vext.8 q12, q2, q2, #12 + veor q4, q4, q11 + vext.8 q13, q7, q7, #12 + veor q2, q2, q12 + vext.8 q14, q3, q3, #12 + veor q7, q7, q13 + vext.8 q15, q5, q5, #12 + veor q3, q3, q14 + + veor q9, q9, q0 + veor q5, q5, q15 + vext.8 q0, q0, q0, #8 @ (x0 ^ (x0 <<< 32)) <<< 64) + veor q10, q10, q1 + veor q8, q8, q5 + veor q9, q9, q5 + vext.8 q1, q1, q1, #8 + veor q13, q13, q2 + veor q0, q0, q8 + veor q14, q14, q7 + veor q1, q1, q9 + vext.8 q8, q2, q2, #8 + veor q12, q12, q4 + vext.8 q9, q7, q7, #8 + veor q15, q15, q3 + vext.8 q2, q4, q4, #8 + veor q11, q11, q6 + vext.8 q7, q5, q5, #8 + veor q12, q12, q5 + vext.8 q4, q3, q3, #8 + veor q11, q11, q5 + vext.8 q3, q6, q6, #8 + veor q5, q9, q13 + veor q11, q11, q2 + veor q7, q7, q15 + veor q6, q4, q14 + veor q4, q8, q12 + veor q2, q3, q10 + vmov q3, q11 + @ vmov q5, q9 + vldmia r6, {q12} @ .LISR + ite eq @ Thumb2 thing, sanity check in ARM + addeq r6,r6,#0x10 + bne .Ldec_loop + vldmia r6, {q12} @ .LISRM0 + b .Ldec_loop +.align 4 +.Ldec_done: + vmov.i8 q8,#0x55 @ compose .LBS0 + vmov.i8 q9,#0x33 @ compose .LBS1 + vshr.u64 q10, q3, #1 + vshr.u64 q11, q2, #1 + veor q10, q10, q5 + veor q11, q11, q7 + vand q10, q10, q8 + vand q11, q11, q8 + veor q5, q5, q10 + vshl.u64 q10, q10, #1 + veor q7, q7, q11 + vshl.u64 q11, q11, #1 + veor q3, q3, q10 + veor q2, q2, q11 + vshr.u64 q10, q6, #1 + vshr.u64 q11, q0, #1 + veor q10, q10, q4 + veor q11, q11, q1 + vand q10, q10, q8 + vand q11, q11, q8 + veor q4, q4, q10 + vshl.u64 q10, q10, #1 + veor q1, q1, q11 + vshl.u64 q11, q11, #1 + veor q6, q6, q10 + veor q0, q0, q11 + vmov.i8 q8,#0x0f @ compose .LBS2 + vshr.u64 q10, q7, #2 + vshr.u64 q11, q2, #2 + veor q10, q10, q5 + veor q11, q11, q3 + vand q10, q10, q9 + vand q11, q11, q9 + veor q5, q5, q10 + vshl.u64 q10, q10, #2 + veor q3, q3, q11 + vshl.u64 q11, q11, #2 + veor q7, q7, q10 + veor q2, q2, q11 + vshr.u64 q10, q1, #2 + vshr.u64 q11, q0, #2 + veor q10, q10, q4 + veor q11, q11, q6 + vand q10, q10, q9 + vand q11, q11, q9 + veor q4, q4, q10 + vshl.u64 q10, q10, #2 + veor q6, q6, q11 + vshl.u64 q11, q11, #2 + veor q1, q1, q10 + veor q0, q0, q11 + vshr.u64 q10, q4, #4 + vshr.u64 q11, q6, #4 + veor q10, q10, q5 + veor q11, q11, q3 + vand q10, q10, q8 + vand q11, q11, q8 + veor q5, q5, q10 + vshl.u64 q10, q10, #4 + veor q3, q3, q11 + vshl.u64 q11, q11, #4 + veor q4, q4, q10 + veor q6, q6, q11 + vshr.u64 q10, q1, #4 + vshr.u64 q11, q0, #4 + veor q10, q10, q7 + veor q11, q11, q2 + vand q10, q10, q8 + vand q11, q11, q8 + veor q7, q7, q10 + vshl.u64 q10, q10, #4 + veor q2, q2, q11 + vshl.u64 q11, q11, #4 + veor q1, q1, q10 + veor q0, q0, q11 + vldmia r4, {q8} @ last round key + veor q6, q6, q8 + veor q4, q4, q8 + veor q2, q2, q8 + veor q7, q7, q8 + veor q3, q3, q8 + veor q5, q5, q8 + veor q0, q0, q8 + veor q1, q1, q8 + bx lr +.size _bsaes_decrypt8,.-_bsaes_decrypt8 + +.type _bsaes_const,%object +.align 6 +_bsaes_const: +.LM0ISR:@ InvShiftRows constants +.quad 0x0a0e0206070b0f03, 0x0004080c0d010509 +.LISR: +.quad 0x0504070602010003, 0x0f0e0d0c080b0a09 +.LISRM0: +.quad 0x01040b0e0205080f, 0x0306090c00070a0d +.LM0SR:@ ShiftRows constants +.quad 0x0a0e02060f03070b, 0x0004080c05090d01 +.LSR: +.quad 0x0504070600030201, 0x0f0e0d0c0a09080b +.LSRM0: +.quad 0x0304090e00050a0f, 0x01060b0c0207080d +.LM0: +.quad 0x02060a0e03070b0f, 0x0004080c0105090d +.LREVM0SR: +.quad 0x090d01050c000408, 0x03070b0f060a0e02 +.byte 66,105,116,45,115,108,105,99,101,100,32,65,69,83,32,102,111,114,32,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 2 +.align 6 +.size _bsaes_const,.-_bsaes_const + +.type _bsaes_encrypt8,%function +.align 4 +_bsaes_encrypt8: + adr r6,_bsaes_encrypt8 + vldmia r4!, {q9} @ round 0 key +#ifdef __APPLE__ + adr r6,.LM0SR +#else + sub r6,r6,#_bsaes_encrypt8-.LM0SR +#endif + + vldmia r6!, {q8} @ .LM0SR +_bsaes_encrypt8_alt: + veor q10, q0, q9 @ xor with round0 key + veor q11, q1, q9 + vtbl.8 d0, {q10}, d16 + vtbl.8 d1, {q10}, d17 + veor q12, q2, q9 + vtbl.8 d2, {q11}, d16 + vtbl.8 d3, {q11}, d17 + veor q13, q3, q9 + vtbl.8 d4, {q12}, d16 + vtbl.8 d5, {q12}, d17 + veor q14, q4, q9 + vtbl.8 d6, {q13}, d16 + vtbl.8 d7, {q13}, d17 + veor q15, q5, q9 + vtbl.8 d8, {q14}, d16 + vtbl.8 d9, {q14}, d17 + veor q10, q6, q9 + vtbl.8 d10, {q15}, d16 + vtbl.8 d11, {q15}, d17 + veor q11, q7, q9 + vtbl.8 d12, {q10}, d16 + vtbl.8 d13, {q10}, d17 + vtbl.8 d14, {q11}, d16 + vtbl.8 d15, {q11}, d17 +_bsaes_encrypt8_bitslice: + vmov.i8 q8,#0x55 @ compose .LBS0 + vmov.i8 q9,#0x33 @ compose .LBS1 + vshr.u64 q10, q6, #1 + vshr.u64 q11, q4, #1 + veor q10, q10, q7 + veor q11, q11, q5 + vand q10, q10, q8 + vand q11, q11, q8 + veor q7, q7, q10 + vshl.u64 q10, q10, #1 + veor q5, q5, q11 + vshl.u64 q11, q11, #1 + veor q6, q6, q10 + veor q4, q4, q11 + vshr.u64 q10, q2, #1 + vshr.u64 q11, q0, #1 + veor q10, q10, q3 + veor q11, q11, q1 + vand q10, q10, q8 + vand q11, q11, q8 + veor q3, q3, q10 + vshl.u64 q10, q10, #1 + veor q1, q1, q11 + vshl.u64 q11, q11, #1 + veor q2, q2, q10 + veor q0, q0, q11 + vmov.i8 q8,#0x0f @ compose .LBS2 + vshr.u64 q10, q5, #2 + vshr.u64 q11, q4, #2 + veor q10, q10, q7 + veor q11, q11, q6 + vand q10, q10, q9 + vand q11, q11, q9 + veor q7, q7, q10 + vshl.u64 q10, q10, #2 + veor q6, q6, q11 + vshl.u64 q11, q11, #2 + veor q5, q5, q10 + veor q4, q4, q11 + vshr.u64 q10, q1, #2 + vshr.u64 q11, q0, #2 + veor q10, q10, q3 + veor q11, q11, q2 + vand q10, q10, q9 + vand q11, q11, q9 + veor q3, q3, q10 + vshl.u64 q10, q10, #2 + veor q2, q2, q11 + vshl.u64 q11, q11, #2 + veor q1, q1, q10 + veor q0, q0, q11 + vshr.u64 q10, q3, #4 + vshr.u64 q11, q2, #4 + veor q10, q10, q7 + veor q11, q11, q6 + vand q10, q10, q8 + vand q11, q11, q8 + veor q7, q7, q10 + vshl.u64 q10, q10, #4 + veor q6, q6, q11 + vshl.u64 q11, q11, #4 + veor q3, q3, q10 + veor q2, q2, q11 + vshr.u64 q10, q1, #4 + vshr.u64 q11, q0, #4 + veor q10, q10, q5 + veor q11, q11, q4 + vand q10, q10, q8 + vand q11, q11, q8 + veor q5, q5, q10 + vshl.u64 q10, q10, #4 + veor q4, q4, q11 + vshl.u64 q11, q11, #4 + veor q1, q1, q10 + veor q0, q0, q11 + sub r5,r5,#1 + b .Lenc_sbox +.align 4 +.Lenc_loop: + vldmia r4!, {q8,q9,q10,q11} + veor q8, q8, q0 + veor q9, q9, q1 + vtbl.8 d0, {q8}, d24 + vtbl.8 d1, {q8}, d25 + vldmia r4!, {q8} + veor q10, q10, q2 + vtbl.8 d2, {q9}, d24 + vtbl.8 d3, {q9}, d25 + vldmia r4!, {q9} + veor q11, q11, q3 + vtbl.8 d4, {q10}, d24 + vtbl.8 d5, {q10}, d25 + vldmia r4!, {q10} + vtbl.8 d6, {q11}, d24 + vtbl.8 d7, {q11}, d25 + vldmia r4!, {q11} + veor q8, q8, q4 + veor q9, q9, q5 + vtbl.8 d8, {q8}, d24 + vtbl.8 d9, {q8}, d25 + veor q10, q10, q6 + vtbl.8 d10, {q9}, d24 + vtbl.8 d11, {q9}, d25 + veor q11, q11, q7 + vtbl.8 d12, {q10}, d24 + vtbl.8 d13, {q10}, d25 + vtbl.8 d14, {q11}, d24 + vtbl.8 d15, {q11}, d25 +.Lenc_sbox: + veor q2, q2, q1 + veor q5, q5, q6 + veor q3, q3, q0 + veor q6, q6, q2 + veor q5, q5, q0 + + veor q6, q6, q3 + veor q3, q3, q7 + veor q7, q7, q5 + veor q3, q3, q4 + veor q4, q4, q5 + + veor q2, q2, q7 + veor q3, q3, q1 + veor q1, q1, q5 + veor q11, q7, q4 + veor q10, q1, q2 + veor q9, q5, q3 + veor q13, q2, q4 + vmov q8, q10 + veor q12, q6, q0 + + vorr q10, q10, q9 + veor q15, q11, q8 + vand q14, q11, q12 + vorr q11, q11, q12 + veor q12, q12, q9 + vand q8, q8, q9 + veor q9, q3, q0 + vand q15, q15, q12 + vand q13, q13, q9 + veor q9, q7, q1 + veor q12, q5, q6 + veor q11, q11, q13 + veor q10, q10, q13 + vand q13, q9, q12 + vorr q9, q9, q12 + veor q11, q11, q15 + veor q8, q8, q13 + veor q10, q10, q14 + veor q9, q9, q15 + veor q8, q8, q14 + vand q12, q2, q3 + veor q9, q9, q14 + vand q13, q4, q0 + vand q14, q1, q5 + vorr q15, q7, q6 + veor q11, q11, q12 + veor q9, q9, q14 + veor q8, q8, q15 + veor q10, q10, q13 + + @ Inv_GF16 0, 1, 2, 3, s0, s1, s2, s3 + + @ new smaller inversion + + vand q14, q11, q9 + vmov q12, q8 + + veor q13, q10, q14 + veor q15, q8, q14 + veor q14, q8, q14 @ q14=q15 + + vbsl q13, q9, q8 + vbsl q15, q11, q10 + veor q11, q11, q10 + + vbsl q12, q13, q14 + vbsl q8, q14, q13 + + vand q14, q12, q15 + veor q9, q9, q8 + + veor q14, q14, q11 + veor q12, q6, q0 + veor q8, q5, q3 + veor q10, q15, q14 + vand q10, q10, q6 + veor q6, q6, q5 + vand q11, q5, q15 + vand q6, q6, q14 + veor q5, q11, q10 + veor q6, q6, q11 + veor q15, q15, q13 + veor q14, q14, q9 + veor q11, q15, q14 + veor q10, q13, q9 + vand q11, q11, q12 + vand q10, q10, q0 + veor q12, q12, q8 + veor q0, q0, q3 + vand q8, q8, q15 + vand q3, q3, q13 + vand q12, q12, q14 + vand q0, q0, q9 + veor q8, q8, q12 + veor q0, q0, q3 + veor q12, q12, q11 + veor q3, q3, q10 + veor q6, q6, q12 + veor q0, q0, q12 + veor q5, q5, q8 + veor q3, q3, q8 + + veor q12, q7, q4 + veor q8, q1, q2 + veor q11, q15, q14 + veor q10, q13, q9 + vand q11, q11, q12 + vand q10, q10, q4 + veor q12, q12, q8 + veor q4, q4, q2 + vand q8, q8, q15 + vand q2, q2, q13 + vand q12, q12, q14 + vand q4, q4, q9 + veor q8, q8, q12 + veor q4, q4, q2 + veor q12, q12, q11 + veor q2, q2, q10 + veor q15, q15, q13 + veor q14, q14, q9 + veor q10, q15, q14 + vand q10, q10, q7 + veor q7, q7, q1 + vand q11, q1, q15 + vand q7, q7, q14 + veor q1, q11, q10 + veor q7, q7, q11 + veor q7, q7, q12 + veor q4, q4, q12 + veor q1, q1, q8 + veor q2, q2, q8 + veor q7, q7, q0 + veor q1, q1, q6 + veor q6, q6, q0 + veor q4, q4, q7 + veor q0, q0, q1 + + veor q1, q1, q5 + veor q5, q5, q2 + veor q2, q2, q3 + veor q3, q3, q5 + veor q4, q4, q5 + + veor q6, q6, q3 + subs r5,r5,#1 + bcc .Lenc_done + vext.8 q8, q0, q0, #12 @ x0 <<< 32 + vext.8 q9, q1, q1, #12 + veor q0, q0, q8 @ x0 ^ (x0 <<< 32) + vext.8 q10, q4, q4, #12 + veor q1, q1, q9 + vext.8 q11, q6, q6, #12 + veor q4, q4, q10 + vext.8 q12, q3, q3, #12 + veor q6, q6, q11 + vext.8 q13, q7, q7, #12 + veor q3, q3, q12 + vext.8 q14, q2, q2, #12 + veor q7, q7, q13 + vext.8 q15, q5, q5, #12 + veor q2, q2, q14 + + veor q9, q9, q0 + veor q5, q5, q15 + vext.8 q0, q0, q0, #8 @ (x0 ^ (x0 <<< 32)) <<< 64) + veor q10, q10, q1 + veor q8, q8, q5 + veor q9, q9, q5 + vext.8 q1, q1, q1, #8 + veor q13, q13, q3 + veor q0, q0, q8 + veor q14, q14, q7 + veor q1, q1, q9 + vext.8 q8, q3, q3, #8 + veor q12, q12, q6 + vext.8 q9, q7, q7, #8 + veor q15, q15, q2 + vext.8 q3, q6, q6, #8 + veor q11, q11, q4 + vext.8 q7, q5, q5, #8 + veor q12, q12, q5 + vext.8 q6, q2, q2, #8 + veor q11, q11, q5 + vext.8 q2, q4, q4, #8 + veor q5, q9, q13 + veor q4, q8, q12 + veor q3, q3, q11 + veor q7, q7, q15 + veor q6, q6, q14 + @ vmov q4, q8 + veor q2, q2, q10 + @ vmov q5, q9 + vldmia r6, {q12} @ .LSR + ite eq @ Thumb2 thing, samity check in ARM + addeq r6,r6,#0x10 + bne .Lenc_loop + vldmia r6, {q12} @ .LSRM0 + b .Lenc_loop +.align 4 +.Lenc_done: + vmov.i8 q8,#0x55 @ compose .LBS0 + vmov.i8 q9,#0x33 @ compose .LBS1 + vshr.u64 q10, q2, #1 + vshr.u64 q11, q3, #1 + veor q10, q10, q5 + veor q11, q11, q7 + vand q10, q10, q8 + vand q11, q11, q8 + veor q5, q5, q10 + vshl.u64 q10, q10, #1 + veor q7, q7, q11 + vshl.u64 q11, q11, #1 + veor q2, q2, q10 + veor q3, q3, q11 + vshr.u64 q10, q4, #1 + vshr.u64 q11, q0, #1 + veor q10, q10, q6 + veor q11, q11, q1 + vand q10, q10, q8 + vand q11, q11, q8 + veor q6, q6, q10 + vshl.u64 q10, q10, #1 + veor q1, q1, q11 + vshl.u64 q11, q11, #1 + veor q4, q4, q10 + veor q0, q0, q11 + vmov.i8 q8,#0x0f @ compose .LBS2 + vshr.u64 q10, q7, #2 + vshr.u64 q11, q3, #2 + veor q10, q10, q5 + veor q11, q11, q2 + vand q10, q10, q9 + vand q11, q11, q9 + veor q5, q5, q10 + vshl.u64 q10, q10, #2 + veor q2, q2, q11 + vshl.u64 q11, q11, #2 + veor q7, q7, q10 + veor q3, q3, q11 + vshr.u64 q10, q1, #2 + vshr.u64 q11, q0, #2 + veor q10, q10, q6 + veor q11, q11, q4 + vand q10, q10, q9 + vand q11, q11, q9 + veor q6, q6, q10 + vshl.u64 q10, q10, #2 + veor q4, q4, q11 + vshl.u64 q11, q11, #2 + veor q1, q1, q10 + veor q0, q0, q11 + vshr.u64 q10, q6, #4 + vshr.u64 q11, q4, #4 + veor q10, q10, q5 + veor q11, q11, q2 + vand q10, q10, q8 + vand q11, q11, q8 + veor q5, q5, q10 + vshl.u64 q10, q10, #4 + veor q2, q2, q11 + vshl.u64 q11, q11, #4 + veor q6, q6, q10 + veor q4, q4, q11 + vshr.u64 q10, q1, #4 + vshr.u64 q11, q0, #4 + veor q10, q10, q7 + veor q11, q11, q3 + vand q10, q10, q8 + vand q11, q11, q8 + veor q7, q7, q10 + vshl.u64 q10, q10, #4 + veor q3, q3, q11 + vshl.u64 q11, q11, #4 + veor q1, q1, q10 + veor q0, q0, q11 + vldmia r4, {q8} @ last round key + veor q4, q4, q8 + veor q6, q6, q8 + veor q3, q3, q8 + veor q7, q7, q8 + veor q2, q2, q8 + veor q5, q5, q8 + veor q0, q0, q8 + veor q1, q1, q8 + bx lr +.size _bsaes_encrypt8,.-_bsaes_encrypt8 +.type _bsaes_key_convert,%function +.align 4 +_bsaes_key_convert: + adr r6,_bsaes_key_convert + vld1.8 {q7}, [r4]! @ load round 0 key +#ifdef __APPLE__ + adr r6,.LM0 +#else + sub r6,r6,#_bsaes_key_convert-.LM0 +#endif + vld1.8 {q15}, [r4]! @ load round 1 key + + vmov.i8 q8, #0x01 @ bit masks + vmov.i8 q9, #0x02 + vmov.i8 q10, #0x04 + vmov.i8 q11, #0x08 + vmov.i8 q12, #0x10 + vmov.i8 q13, #0x20 + vldmia r6, {q14} @ .LM0 + +#ifdef __ARMEL__ + vrev32.8 q7, q7 + vrev32.8 q15, q15 +#endif + sub r5,r5,#1 + vstmia r12!, {q7} @ save round 0 key + b .Lkey_loop + +.align 4 +.Lkey_loop: + vtbl.8 d14,{q15},d28 + vtbl.8 d15,{q15},d29 + vmov.i8 q6, #0x40 + vmov.i8 q15, #0x80 + + vtst.8 q0, q7, q8 + vtst.8 q1, q7, q9 + vtst.8 q2, q7, q10 + vtst.8 q3, q7, q11 + vtst.8 q4, q7, q12 + vtst.8 q5, q7, q13 + vtst.8 q6, q7, q6 + vtst.8 q7, q7, q15 + vld1.8 {q15}, [r4]! @ load next round key + vmvn q0, q0 @ "pnot" + vmvn q1, q1 + vmvn q5, q5 + vmvn q6, q6 +#ifdef __ARMEL__ + vrev32.8 q15, q15 +#endif + subs r5,r5,#1 + vstmia r12!,{q0,q1,q2,q3,q4,q5,q6,q7} @ write bit-sliced round key + bne .Lkey_loop + + vmov.i8 q7,#0x63 @ compose .L63 + @ don't save last round key + bx lr +.size _bsaes_key_convert,.-_bsaes_key_convert + + + +.globl bsaes_cbc_encrypt +.hidden bsaes_cbc_encrypt +.type bsaes_cbc_encrypt,%function +.align 5 +bsaes_cbc_encrypt: +#ifndef __KERNEL__ + cmp r2, #128 +#ifndef __thumb__ + blo AES_cbc_encrypt +#else + bhs 1f + b AES_cbc_encrypt +1: +#endif +#endif + + @ it is up to the caller to make sure we are called with enc == 0 + + mov ip, sp + stmdb sp!, {r4,r5,r6,r7,r8,r9,r10, lr} + VFP_ABI_PUSH + ldr r8, [ip] @ IV is 1st arg on the stack + mov r2, r2, lsr#4 @ len in 16 byte blocks + sub sp, #0x10 @ scratch space to carry over the IV + mov r9, sp @ save sp + + ldr r10, [r3, #240] @ get # of rounds +#ifndef BSAES_ASM_EXTENDED_KEY + @ allocate the key schedule on the stack + sub r12, sp, r10, lsl#7 @ 128 bytes per inner round key + add r12, #96 @ sifze of bit-slices key schedule + + @ populate the key schedule + mov r4, r3 @ pass key + mov r5, r10 @ pass # of rounds + mov sp, r12 @ sp is sp + bl _bsaes_key_convert + vldmia sp, {q6} + vstmia r12, {q15} @ save last round key + veor q7, q7, q6 @ fix up round 0 key + vstmia sp, {q7} +#else + ldr r12, [r3, #244] + eors r12, #1 + beq 0f + + @ populate the key schedule + str r12, [r3, #244] + mov r4, r3 @ pass key + mov r5, r10 @ pass # of rounds + add r12, r3, #248 @ pass key schedule + bl _bsaes_key_convert + add r4, r3, #248 + vldmia r4, {q6} + vstmia r12, {q15} @ save last round key + veor q7, q7, q6 @ fix up round 0 key + vstmia r4, {q7} + +.align 2 + +#endif + + vld1.8 {q15}, [r8] @ load IV + b .Lcbc_dec_loop + +.align 4 +.Lcbc_dec_loop: + subs r2, r2, #0x8 + bmi .Lcbc_dec_loop_finish + + vld1.8 {q0,q1}, [r0]! @ load input + vld1.8 {q2,q3}, [r0]! +#ifndef BSAES_ASM_EXTENDED_KEY + mov r4, sp @ pass the key +#else + add r4, r3, #248 +#endif + vld1.8 {q4,q5}, [r0]! + mov r5, r10 + vld1.8 {q6,q7}, [r0] + sub r0, r0, #0x60 + vstmia r9, {q15} @ put aside IV + + bl _bsaes_decrypt8 + + vldmia r9, {q14} @ reload IV + vld1.8 {q8,q9}, [r0]! @ reload input + veor q0, q0, q14 @ ^= IV + vld1.8 {q10,q11}, [r0]! + veor q1, q1, q8 + veor q6, q6, q9 + vld1.8 {q12,q13}, [r0]! + veor q4, q4, q10 + veor q2, q2, q11 + vld1.8 {q14,q15}, [r0]! + veor q7, q7, q12 + vst1.8 {q0,q1}, [r1]! @ write output + veor q3, q3, q13 + vst1.8 {q6}, [r1]! + veor q5, q5, q14 + vst1.8 {q4}, [r1]! + vst1.8 {q2}, [r1]! + vst1.8 {q7}, [r1]! + vst1.8 {q3}, [r1]! + vst1.8 {q5}, [r1]! + + b .Lcbc_dec_loop + +.Lcbc_dec_loop_finish: + adds r2, r2, #8 + beq .Lcbc_dec_done + + vld1.8 {q0}, [r0]! @ load input + cmp r2, #2 + blo .Lcbc_dec_one + vld1.8 {q1}, [r0]! +#ifndef BSAES_ASM_EXTENDED_KEY + mov r4, sp @ pass the key +#else + add r4, r3, #248 +#endif + mov r5, r10 + vstmia r9, {q15} @ put aside IV + beq .Lcbc_dec_two + vld1.8 {q2}, [r0]! + cmp r2, #4 + blo .Lcbc_dec_three + vld1.8 {q3}, [r0]! + beq .Lcbc_dec_four + vld1.8 {q4}, [r0]! + cmp r2, #6 + blo .Lcbc_dec_five + vld1.8 {q5}, [r0]! + beq .Lcbc_dec_six + vld1.8 {q6}, [r0]! + sub r0, r0, #0x70 + + bl _bsaes_decrypt8 + + vldmia r9, {q14} @ reload IV + vld1.8 {q8,q9}, [r0]! @ reload input + veor q0, q0, q14 @ ^= IV + vld1.8 {q10,q11}, [r0]! + veor q1, q1, q8 + veor q6, q6, q9 + vld1.8 {q12,q13}, [r0]! + veor q4, q4, q10 + veor q2, q2, q11 + vld1.8 {q15}, [r0]! + veor q7, q7, q12 + vst1.8 {q0,q1}, [r1]! @ write output + veor q3, q3, q13 + vst1.8 {q6}, [r1]! + vst1.8 {q4}, [r1]! + vst1.8 {q2}, [r1]! + vst1.8 {q7}, [r1]! + vst1.8 {q3}, [r1]! + b .Lcbc_dec_done +.align 4 +.Lcbc_dec_six: + sub r0, r0, #0x60 + bl _bsaes_decrypt8 + vldmia r9,{q14} @ reload IV + vld1.8 {q8,q9}, [r0]! @ reload input + veor q0, q0, q14 @ ^= IV + vld1.8 {q10,q11}, [r0]! + veor q1, q1, q8 + veor q6, q6, q9 + vld1.8 {q12}, [r0]! + veor q4, q4, q10 + veor q2, q2, q11 + vld1.8 {q15}, [r0]! + veor q7, q7, q12 + vst1.8 {q0,q1}, [r1]! @ write output + vst1.8 {q6}, [r1]! + vst1.8 {q4}, [r1]! + vst1.8 {q2}, [r1]! + vst1.8 {q7}, [r1]! + b .Lcbc_dec_done +.align 4 +.Lcbc_dec_five: + sub r0, r0, #0x50 + bl _bsaes_decrypt8 + vldmia r9, {q14} @ reload IV + vld1.8 {q8,q9}, [r0]! @ reload input + veor q0, q0, q14 @ ^= IV + vld1.8 {q10,q11}, [r0]! + veor q1, q1, q8 + veor q6, q6, q9 + vld1.8 {q15}, [r0]! + veor q4, q4, q10 + vst1.8 {q0,q1}, [r1]! @ write output + veor q2, q2, q11 + vst1.8 {q6}, [r1]! + vst1.8 {q4}, [r1]! + vst1.8 {q2}, [r1]! + b .Lcbc_dec_done +.align 4 +.Lcbc_dec_four: + sub r0, r0, #0x40 + bl _bsaes_decrypt8 + vldmia r9, {q14} @ reload IV + vld1.8 {q8,q9}, [r0]! @ reload input + veor q0, q0, q14 @ ^= IV + vld1.8 {q10}, [r0]! + veor q1, q1, q8 + veor q6, q6, q9 + vld1.8 {q15}, [r0]! + veor q4, q4, q10 + vst1.8 {q0,q1}, [r1]! @ write output + vst1.8 {q6}, [r1]! + vst1.8 {q4}, [r1]! + b .Lcbc_dec_done +.align 4 +.Lcbc_dec_three: + sub r0, r0, #0x30 + bl _bsaes_decrypt8 + vldmia r9, {q14} @ reload IV + vld1.8 {q8,q9}, [r0]! @ reload input + veor q0, q0, q14 @ ^= IV + vld1.8 {q15}, [r0]! + veor q1, q1, q8 + veor q6, q6, q9 + vst1.8 {q0,q1}, [r1]! @ write output + vst1.8 {q6}, [r1]! + b .Lcbc_dec_done +.align 4 +.Lcbc_dec_two: + sub r0, r0, #0x20 + bl _bsaes_decrypt8 + vldmia r9, {q14} @ reload IV + vld1.8 {q8}, [r0]! @ reload input + veor q0, q0, q14 @ ^= IV + vld1.8 {q15}, [r0]! @ reload input + veor q1, q1, q8 + vst1.8 {q0,q1}, [r1]! @ write output + b .Lcbc_dec_done +.align 4 +.Lcbc_dec_one: + sub r0, r0, #0x10 + mov r10, r1 @ save original out pointer + mov r1, r9 @ use the iv scratch space as out buffer + mov r2, r3 + vmov q4,q15 @ just in case ensure that IV + vmov q5,q0 @ and input are preserved + bl AES_decrypt + vld1.8 {q0}, [r9,:64] @ load result + veor q0, q0, q4 @ ^= IV + vmov q15, q5 @ q5 holds input + vst1.8 {q0}, [r10] @ write output + +.Lcbc_dec_done: +#ifndef BSAES_ASM_EXTENDED_KEY + vmov.i32 q0, #0 + vmov.i32 q1, #0 +.Lcbc_dec_bzero:@ wipe key schedule [if any] + vstmia sp!, {q0,q1} + cmp sp, r9 + bne .Lcbc_dec_bzero +#endif + + mov sp, r9 + add sp, #0x10 @ add sp,r9,#0x10 is no good for thumb + vst1.8 {q15}, [r8] @ return IV + VFP_ABI_POP + ldmia sp!, {r4,r5,r6,r7,r8,r9,r10, pc} +.size bsaes_cbc_encrypt,.-bsaes_cbc_encrypt + +.globl bsaes_ctr32_encrypt_blocks +.hidden bsaes_ctr32_encrypt_blocks +.type bsaes_ctr32_encrypt_blocks,%function +.align 5 +bsaes_ctr32_encrypt_blocks: + cmp r2, #8 @ use plain AES for + blo .Lctr_enc_short @ small sizes + + mov ip, sp + stmdb sp!, {r4,r5,r6,r7,r8,r9,r10, lr} + VFP_ABI_PUSH + ldr r8, [ip] @ ctr is 1st arg on the stack + sub sp, sp, #0x10 @ scratch space to carry over the ctr + mov r9, sp @ save sp + + ldr r10, [r3, #240] @ get # of rounds +#ifndef BSAES_ASM_EXTENDED_KEY + @ allocate the key schedule on the stack + sub r12, sp, r10, lsl#7 @ 128 bytes per inner round key + add r12, #96 @ size of bit-sliced key schedule + + @ populate the key schedule + mov r4, r3 @ pass key + mov r5, r10 @ pass # of rounds + mov sp, r12 @ sp is sp + bl _bsaes_key_convert + veor q7,q7,q15 @ fix up last round key + vstmia r12, {q7} @ save last round key + + vld1.8 {q0}, [r8] @ load counter +#ifdef __APPLE__ + mov r8, #:lower16:(.LREVM0SR-.LM0) + add r8, r6, r8 +#else + add r8, r6, #.LREVM0SR-.LM0 @ borrow r8 +#endif + vldmia sp, {q4} @ load round0 key +#else + ldr r12, [r3, #244] + eors r12, #1 + beq 0f + + @ populate the key schedule + str r12, [r3, #244] + mov r4, r3 @ pass key + mov r5, r10 @ pass # of rounds + add r12, r3, #248 @ pass key schedule + bl _bsaes_key_convert + veor q7,q7,q15 @ fix up last round key + vstmia r12, {q7} @ save last round key + +.align 2 + add r12, r3, #248 + vld1.8 {q0}, [r8] @ load counter + adrl r8, .LREVM0SR @ borrow r8 + vldmia r12, {q4} @ load round0 key + sub sp, #0x10 @ place for adjusted round0 key +#endif + + vmov.i32 q8,#1 @ compose 1<<96 + veor q9,q9,q9 + vrev32.8 q0,q0 + vext.8 q8,q9,q8,#4 + vrev32.8 q4,q4 + vadd.u32 q9,q8,q8 @ compose 2<<96 + vstmia sp, {q4} @ save adjusted round0 key + b .Lctr_enc_loop + +.align 4 +.Lctr_enc_loop: + vadd.u32 q10, q8, q9 @ compose 3<<96 + vadd.u32 q1, q0, q8 @ +1 + vadd.u32 q2, q0, q9 @ +2 + vadd.u32 q3, q0, q10 @ +3 + vadd.u32 q4, q1, q10 + vadd.u32 q5, q2, q10 + vadd.u32 q6, q3, q10 + vadd.u32 q7, q4, q10 + vadd.u32 q10, q5, q10 @ next counter + + @ Borrow prologue from _bsaes_encrypt8 to use the opportunity + @ to flip byte order in 32-bit counter + + vldmia sp, {q9} @ load round0 key +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x10 @ pass next round key +#else + add r4, r3, #264 +#endif + vldmia r8, {q8} @ .LREVM0SR + mov r5, r10 @ pass rounds + vstmia r9, {q10} @ save next counter +#ifdef __APPLE__ + mov r6, #:lower16:(.LREVM0SR-.LSR) + sub r6, r8, r6 +#else + sub r6, r8, #.LREVM0SR-.LSR @ pass constants +#endif + + bl _bsaes_encrypt8_alt + + subs r2, r2, #8 + blo .Lctr_enc_loop_done + + vld1.8 {q8,q9}, [r0]! @ load input + vld1.8 {q10,q11}, [r0]! + veor q0, q8 + veor q1, q9 + vld1.8 {q12,q13}, [r0]! + veor q4, q10 + veor q6, q11 + vld1.8 {q14,q15}, [r0]! + veor q3, q12 + vst1.8 {q0,q1}, [r1]! @ write output + veor q7, q13 + veor q2, q14 + vst1.8 {q4}, [r1]! + veor q5, q15 + vst1.8 {q6}, [r1]! + vmov.i32 q8, #1 @ compose 1<<96 + vst1.8 {q3}, [r1]! + veor q9, q9, q9 + vst1.8 {q7}, [r1]! + vext.8 q8, q9, q8, #4 + vst1.8 {q2}, [r1]! + vadd.u32 q9,q8,q8 @ compose 2<<96 + vst1.8 {q5}, [r1]! + vldmia r9, {q0} @ load counter + + bne .Lctr_enc_loop + b .Lctr_enc_done + +.align 4 +.Lctr_enc_loop_done: + add r2, r2, #8 + vld1.8 {q8}, [r0]! @ load input + veor q0, q8 + vst1.8 {q0}, [r1]! @ write output + cmp r2, #2 + blo .Lctr_enc_done + vld1.8 {q9}, [r0]! + veor q1, q9 + vst1.8 {q1}, [r1]! + beq .Lctr_enc_done + vld1.8 {q10}, [r0]! + veor q4, q10 + vst1.8 {q4}, [r1]! + cmp r2, #4 + blo .Lctr_enc_done + vld1.8 {q11}, [r0]! + veor q6, q11 + vst1.8 {q6}, [r1]! + beq .Lctr_enc_done + vld1.8 {q12}, [r0]! + veor q3, q12 + vst1.8 {q3}, [r1]! + cmp r2, #6 + blo .Lctr_enc_done + vld1.8 {q13}, [r0]! + veor q7, q13 + vst1.8 {q7}, [r1]! + beq .Lctr_enc_done + vld1.8 {q14}, [r0] + veor q2, q14 + vst1.8 {q2}, [r1]! + +.Lctr_enc_done: + vmov.i32 q0, #0 + vmov.i32 q1, #0 +#ifndef BSAES_ASM_EXTENDED_KEY +.Lctr_enc_bzero:@ wipe key schedule [if any] + vstmia sp!, {q0,q1} + cmp sp, r9 + bne .Lctr_enc_bzero +#else + vstmia sp, {q0,q1} +#endif + + mov sp, r9 + add sp, #0x10 @ add sp,r9,#0x10 is no good for thumb + VFP_ABI_POP + ldmia sp!, {r4,r5,r6,r7,r8,r9,r10, pc} @ return + +.align 4 +.Lctr_enc_short: + ldr ip, [sp] @ ctr pointer is passed on stack + stmdb sp!, {r4,r5,r6,r7,r8, lr} + + mov r4, r0 @ copy arguments + mov r5, r1 + mov r6, r2 + mov r7, r3 + ldr r8, [ip, #12] @ load counter .LSW + vld1.8 {q1}, [ip] @ load whole counter value +#ifdef __ARMEL__ + rev r8, r8 +#endif + sub sp, sp, #0x10 + vst1.8 {q1}, [sp] @ copy counter value + sub sp, sp, #0x10 + +.Lctr_enc_short_loop: + add r0, sp, #0x10 @ input counter value + mov r1, sp @ output on the stack + mov r2, r7 @ key + + bl AES_encrypt + + vld1.8 {q0}, [r4]! @ load input + vld1.8 {q1}, [sp] @ load encrypted counter + add r8, r8, #1 +#ifdef __ARMEL__ + rev r0, r8 + str r0, [sp, #0x1c] @ next counter value +#else + str r8, [sp, #0x1c] @ next counter value +#endif + veor q0,q0,q1 + vst1.8 {q0}, [r5]! @ store output + subs r6, r6, #1 + bne .Lctr_enc_short_loop + + vmov.i32 q0, #0 + vmov.i32 q1, #0 + vstmia sp!, {q0,q1} + + ldmia sp!, {r4,r5,r6,r7,r8, pc} +.size bsaes_ctr32_encrypt_blocks,.-bsaes_ctr32_encrypt_blocks +.globl bsaes_xts_encrypt +.hidden bsaes_xts_encrypt +.type bsaes_xts_encrypt,%function +.align 4 +bsaes_xts_encrypt: + mov ip, sp + stmdb sp!, {r4,r5,r6,r7,r8,r9,r10, lr} @ 0x20 + VFP_ABI_PUSH + mov r6, sp @ future r3 + + mov r7, r0 + mov r8, r1 + mov r9, r2 + mov r10, r3 + + sub r0, sp, #0x10 @ 0x10 + bic r0, #0xf @ align at 16 bytes + mov sp, r0 + +#ifdef XTS_CHAIN_TWEAK + ldr r0, [ip] @ pointer to input tweak +#else + @ generate initial tweak + ldr r0, [ip, #4] @ iv[] + mov r1, sp + ldr r2, [ip, #0] @ key2 + bl AES_encrypt + mov r0,sp @ pointer to initial tweak +#endif + + ldr r1, [r10, #240] @ get # of rounds + mov r3, r6 +#ifndef BSAES_ASM_EXTENDED_KEY + @ allocate the key schedule on the stack + sub r12, sp, r1, lsl#7 @ 128 bytes per inner round key + @ add r12, #96 @ size of bit-sliced key schedule + sub r12, #48 @ place for tweak[9] + + @ populate the key schedule + mov r4, r10 @ pass key + mov r5, r1 @ pass # of rounds + mov sp, r12 + add r12, #0x90 @ pass key schedule + bl _bsaes_key_convert + veor q7, q7, q15 @ fix up last round key + vstmia r12, {q7} @ save last round key +#else + ldr r12, [r10, #244] + eors r12, #1 + beq 0f + + str r12, [r10, #244] + mov r4, r10 @ pass key + mov r5, r1 @ pass # of rounds + add r12, r10, #248 @ pass key schedule + bl _bsaes_key_convert + veor q7, q7, q15 @ fix up last round key + vstmia r12, {q7} + +.align 2 + sub sp, #0x90 @ place for tweak[9] +#endif + + vld1.8 {q8}, [r0] @ initial tweak + adr r2, .Lxts_magic + + subs r9, #0x80 + blo .Lxts_enc_short + b .Lxts_enc_loop + +.align 4 +.Lxts_enc_loop: + vldmia r2, {q5} @ load XTS magic + vshr.s64 q6, q8, #63 + mov r0, sp + vand q6, q6, q5 + vadd.u64 q9, q8, q8 + vst1.64 {q8}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q9, #63 + veor q9, q9, q6 + vand q7, q7, q5 + vadd.u64 q10, q9, q9 + vst1.64 {q9}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q10, #63 + veor q10, q10, q7 + vand q6, q6, q5 + vld1.8 {q0}, [r7]! + vadd.u64 q11, q10, q10 + vst1.64 {q10}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q11, #63 + veor q11, q11, q6 + vand q7, q7, q5 + vld1.8 {q1}, [r7]! + veor q0, q0, q8 + vadd.u64 q12, q11, q11 + vst1.64 {q11}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q12, #63 + veor q12, q12, q7 + vand q6, q6, q5 + vld1.8 {q2}, [r7]! + veor q1, q1, q9 + vadd.u64 q13, q12, q12 + vst1.64 {q12}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q13, #63 + veor q13, q13, q6 + vand q7, q7, q5 + vld1.8 {q3}, [r7]! + veor q2, q2, q10 + vadd.u64 q14, q13, q13 + vst1.64 {q13}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q14, #63 + veor q14, q14, q7 + vand q6, q6, q5 + vld1.8 {q4}, [r7]! + veor q3, q3, q11 + vadd.u64 q15, q14, q14 + vst1.64 {q14}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q15, #63 + veor q15, q15, q6 + vand q7, q7, q5 + vld1.8 {q5}, [r7]! + veor q4, q4, q12 + vadd.u64 q8, q15, q15 + vst1.64 {q15}, [r0,:128]! + vswp d15,d14 + veor q8, q8, q7 + vst1.64 {q8}, [r0,:128] @ next round tweak + + vld1.8 {q6,q7}, [r7]! + veor q5, q5, q13 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q6, q6, q14 + mov r5, r1 @ pass rounds + veor q7, q7, q15 + mov r0, sp + + bl _bsaes_encrypt8 + + vld1.64 {q8,q9}, [r0,:128]! + vld1.64 {q10,q11}, [r0,:128]! + veor q0, q0, q8 + vld1.64 {q12,q13}, [r0,:128]! + veor q1, q1, q9 + veor q8, q4, q10 + vst1.8 {q0,q1}, [r8]! + veor q9, q6, q11 + vld1.64 {q14,q15}, [r0,:128]! + veor q10, q3, q12 + vst1.8 {q8,q9}, [r8]! + veor q11, q7, q13 + veor q12, q2, q14 + vst1.8 {q10,q11}, [r8]! + veor q13, q5, q15 + vst1.8 {q12,q13}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + + subs r9, #0x80 + bpl .Lxts_enc_loop + +.Lxts_enc_short: + adds r9, #0x70 + bmi .Lxts_enc_done + + vldmia r2, {q5} @ load XTS magic + vshr.s64 q7, q8, #63 + mov r0, sp + vand q7, q7, q5 + vadd.u64 q9, q8, q8 + vst1.64 {q8}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q9, #63 + veor q9, q9, q7 + vand q6, q6, q5 + vadd.u64 q10, q9, q9 + vst1.64 {q9}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q10, #63 + veor q10, q10, q6 + vand q7, q7, q5 + vld1.8 {q0}, [r7]! + subs r9, #0x10 + bmi .Lxts_enc_1 + vadd.u64 q11, q10, q10 + vst1.64 {q10}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q11, #63 + veor q11, q11, q7 + vand q6, q6, q5 + vld1.8 {q1}, [r7]! + subs r9, #0x10 + bmi .Lxts_enc_2 + veor q0, q0, q8 + vadd.u64 q12, q11, q11 + vst1.64 {q11}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q12, #63 + veor q12, q12, q6 + vand q7, q7, q5 + vld1.8 {q2}, [r7]! + subs r9, #0x10 + bmi .Lxts_enc_3 + veor q1, q1, q9 + vadd.u64 q13, q12, q12 + vst1.64 {q12}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q13, #63 + veor q13, q13, q7 + vand q6, q6, q5 + vld1.8 {q3}, [r7]! + subs r9, #0x10 + bmi .Lxts_enc_4 + veor q2, q2, q10 + vadd.u64 q14, q13, q13 + vst1.64 {q13}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q14, #63 + veor q14, q14, q6 + vand q7, q7, q5 + vld1.8 {q4}, [r7]! + subs r9, #0x10 + bmi .Lxts_enc_5 + veor q3, q3, q11 + vadd.u64 q15, q14, q14 + vst1.64 {q14}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q15, #63 + veor q15, q15, q7 + vand q6, q6, q5 + vld1.8 {q5}, [r7]! + subs r9, #0x10 + bmi .Lxts_enc_6 + veor q4, q4, q12 + sub r9, #0x10 + vst1.64 {q15}, [r0,:128] @ next round tweak + + vld1.8 {q6}, [r7]! + veor q5, q5, q13 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q6, q6, q14 + mov r5, r1 @ pass rounds + mov r0, sp + + bl _bsaes_encrypt8 + + vld1.64 {q8,q9}, [r0,:128]! + vld1.64 {q10,q11}, [r0,:128]! + veor q0, q0, q8 + vld1.64 {q12,q13}, [r0,:128]! + veor q1, q1, q9 + veor q8, q4, q10 + vst1.8 {q0,q1}, [r8]! + veor q9, q6, q11 + vld1.64 {q14}, [r0,:128]! + veor q10, q3, q12 + vst1.8 {q8,q9}, [r8]! + veor q11, q7, q13 + veor q12, q2, q14 + vst1.8 {q10,q11}, [r8]! + vst1.8 {q12}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + b .Lxts_enc_done +.align 4 +.Lxts_enc_6: + vst1.64 {q14}, [r0,:128] @ next round tweak + + veor q4, q4, q12 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q5, q5, q13 + mov r5, r1 @ pass rounds + mov r0, sp + + bl _bsaes_encrypt8 + + vld1.64 {q8,q9}, [r0,:128]! + vld1.64 {q10,q11}, [r0,:128]! + veor q0, q0, q8 + vld1.64 {q12,q13}, [r0,:128]! + veor q1, q1, q9 + veor q8, q4, q10 + vst1.8 {q0,q1}, [r8]! + veor q9, q6, q11 + veor q10, q3, q12 + vst1.8 {q8,q9}, [r8]! + veor q11, q7, q13 + vst1.8 {q10,q11}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + b .Lxts_enc_done + +@ put this in range for both ARM and Thumb mode adr instructions +.align 5 +.Lxts_magic: +.quad 1, 0x87 + +.align 5 +.Lxts_enc_5: + vst1.64 {q13}, [r0,:128] @ next round tweak + + veor q3, q3, q11 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q4, q4, q12 + mov r5, r1 @ pass rounds + mov r0, sp + + bl _bsaes_encrypt8 + + vld1.64 {q8,q9}, [r0,:128]! + vld1.64 {q10,q11}, [r0,:128]! + veor q0, q0, q8 + vld1.64 {q12}, [r0,:128]! + veor q1, q1, q9 + veor q8, q4, q10 + vst1.8 {q0,q1}, [r8]! + veor q9, q6, q11 + veor q10, q3, q12 + vst1.8 {q8,q9}, [r8]! + vst1.8 {q10}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + b .Lxts_enc_done +.align 4 +.Lxts_enc_4: + vst1.64 {q12}, [r0,:128] @ next round tweak + + veor q2, q2, q10 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q3, q3, q11 + mov r5, r1 @ pass rounds + mov r0, sp + + bl _bsaes_encrypt8 + + vld1.64 {q8,q9}, [r0,:128]! + vld1.64 {q10,q11}, [r0,:128]! + veor q0, q0, q8 + veor q1, q1, q9 + veor q8, q4, q10 + vst1.8 {q0,q1}, [r8]! + veor q9, q6, q11 + vst1.8 {q8,q9}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + b .Lxts_enc_done +.align 4 +.Lxts_enc_3: + vst1.64 {q11}, [r0,:128] @ next round tweak + + veor q1, q1, q9 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q2, q2, q10 + mov r5, r1 @ pass rounds + mov r0, sp + + bl _bsaes_encrypt8 + + vld1.64 {q8,q9}, [r0,:128]! + vld1.64 {q10}, [r0,:128]! + veor q0, q0, q8 + veor q1, q1, q9 + veor q8, q4, q10 + vst1.8 {q0,q1}, [r8]! + vst1.8 {q8}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + b .Lxts_enc_done +.align 4 +.Lxts_enc_2: + vst1.64 {q10}, [r0,:128] @ next round tweak + + veor q0, q0, q8 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q1, q1, q9 + mov r5, r1 @ pass rounds + mov r0, sp + + bl _bsaes_encrypt8 + + vld1.64 {q8,q9}, [r0,:128]! + veor q0, q0, q8 + veor q1, q1, q9 + vst1.8 {q0,q1}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + b .Lxts_enc_done +.align 4 +.Lxts_enc_1: + mov r0, sp + veor q0, q8 + mov r1, sp + vst1.8 {q0}, [sp,:128] + mov r2, r10 + mov r4, r3 @ preserve fp + + bl AES_encrypt + + vld1.8 {q0}, [sp,:128] + veor q0, q0, q8 + vst1.8 {q0}, [r8]! + mov r3, r4 + + vmov q8, q9 @ next round tweak + +.Lxts_enc_done: +#ifndef XTS_CHAIN_TWEAK + adds r9, #0x10 + beq .Lxts_enc_ret + sub r6, r8, #0x10 + +.Lxts_enc_steal: + ldrb r0, [r7], #1 + ldrb r1, [r8, #-0x10] + strb r0, [r8, #-0x10] + strb r1, [r8], #1 + + subs r9, #1 + bhi .Lxts_enc_steal + + vld1.8 {q0}, [r6] + mov r0, sp + veor q0, q0, q8 + mov r1, sp + vst1.8 {q0}, [sp,:128] + mov r2, r10 + mov r4, r3 @ preserve fp + + bl AES_encrypt + + vld1.8 {q0}, [sp,:128] + veor q0, q0, q8 + vst1.8 {q0}, [r6] + mov r3, r4 +#endif + +.Lxts_enc_ret: + bic r0, r3, #0xf + vmov.i32 q0, #0 + vmov.i32 q1, #0 +#ifdef XTS_CHAIN_TWEAK + ldr r1, [r3, #0x20+VFP_ABI_FRAME] @ chain tweak +#endif +.Lxts_enc_bzero:@ wipe key schedule [if any] + vstmia sp!, {q0,q1} + cmp sp, r0 + bne .Lxts_enc_bzero + + mov sp, r3 +#ifdef XTS_CHAIN_TWEAK + vst1.8 {q8}, [r1] +#endif + VFP_ABI_POP + ldmia sp!, {r4,r5,r6,r7,r8,r9,r10, pc} @ return + +.size bsaes_xts_encrypt,.-bsaes_xts_encrypt + +.globl bsaes_xts_decrypt +.hidden bsaes_xts_decrypt +.type bsaes_xts_decrypt,%function +.align 4 +bsaes_xts_decrypt: + mov ip, sp + stmdb sp!, {r4,r5,r6,r7,r8,r9,r10, lr} @ 0x20 + VFP_ABI_PUSH + mov r6, sp @ future r3 + + mov r7, r0 + mov r8, r1 + mov r9, r2 + mov r10, r3 + + sub r0, sp, #0x10 @ 0x10 + bic r0, #0xf @ align at 16 bytes + mov sp, r0 + +#ifdef XTS_CHAIN_TWEAK + ldr r0, [ip] @ pointer to input tweak +#else + @ generate initial tweak + ldr r0, [ip, #4] @ iv[] + mov r1, sp + ldr r2, [ip, #0] @ key2 + bl AES_encrypt + mov r0, sp @ pointer to initial tweak +#endif + + ldr r1, [r10, #240] @ get # of rounds + mov r3, r6 +#ifndef BSAES_ASM_EXTENDED_KEY + @ allocate the key schedule on the stack + sub r12, sp, r1, lsl#7 @ 128 bytes per inner round key + @ add r12, #96 @ size of bit-sliced key schedule + sub r12, #48 @ place for tweak[9] + + @ populate the key schedule + mov r4, r10 @ pass key + mov r5, r1 @ pass # of rounds + mov sp, r12 + add r12, #0x90 @ pass key schedule + bl _bsaes_key_convert + add r4, sp, #0x90 + vldmia r4, {q6} + vstmia r12, {q15} @ save last round key + veor q7, q7, q6 @ fix up round 0 key + vstmia r4, {q7} +#else + ldr r12, [r10, #244] + eors r12, #1 + beq 0f + + str r12, [r10, #244] + mov r4, r10 @ pass key + mov r5, r1 @ pass # of rounds + add r12, r10, #248 @ pass key schedule + bl _bsaes_key_convert + add r4, r10, #248 + vldmia r4, {q6} + vstmia r12, {q15} @ save last round key + veor q7, q7, q6 @ fix up round 0 key + vstmia r4, {q7} + +.align 2 + sub sp, #0x90 @ place for tweak[9] +#endif + vld1.8 {q8}, [r0] @ initial tweak + adr r2, .Lxts_magic + +#ifndef XTS_CHAIN_TWEAK + tst r9, #0xf @ if not multiple of 16 + it ne @ Thumb2 thing, sanity check in ARM + subne r9, #0x10 @ subtract another 16 bytes +#endif + subs r9, #0x80 + + blo .Lxts_dec_short + b .Lxts_dec_loop + +.align 4 +.Lxts_dec_loop: + vldmia r2, {q5} @ load XTS magic + vshr.s64 q6, q8, #63 + mov r0, sp + vand q6, q6, q5 + vadd.u64 q9, q8, q8 + vst1.64 {q8}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q9, #63 + veor q9, q9, q6 + vand q7, q7, q5 + vadd.u64 q10, q9, q9 + vst1.64 {q9}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q10, #63 + veor q10, q10, q7 + vand q6, q6, q5 + vld1.8 {q0}, [r7]! + vadd.u64 q11, q10, q10 + vst1.64 {q10}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q11, #63 + veor q11, q11, q6 + vand q7, q7, q5 + vld1.8 {q1}, [r7]! + veor q0, q0, q8 + vadd.u64 q12, q11, q11 + vst1.64 {q11}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q12, #63 + veor q12, q12, q7 + vand q6, q6, q5 + vld1.8 {q2}, [r7]! + veor q1, q1, q9 + vadd.u64 q13, q12, q12 + vst1.64 {q12}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q13, #63 + veor q13, q13, q6 + vand q7, q7, q5 + vld1.8 {q3}, [r7]! + veor q2, q2, q10 + vadd.u64 q14, q13, q13 + vst1.64 {q13}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q14, #63 + veor q14, q14, q7 + vand q6, q6, q5 + vld1.8 {q4}, [r7]! + veor q3, q3, q11 + vadd.u64 q15, q14, q14 + vst1.64 {q14}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q15, #63 + veor q15, q15, q6 + vand q7, q7, q5 + vld1.8 {q5}, [r7]! + veor q4, q4, q12 + vadd.u64 q8, q15, q15 + vst1.64 {q15}, [r0,:128]! + vswp d15,d14 + veor q8, q8, q7 + vst1.64 {q8}, [r0,:128] @ next round tweak + + vld1.8 {q6,q7}, [r7]! + veor q5, q5, q13 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q6, q6, q14 + mov r5, r1 @ pass rounds + veor q7, q7, q15 + mov r0, sp + + bl _bsaes_decrypt8 + + vld1.64 {q8,q9}, [r0,:128]! + vld1.64 {q10,q11}, [r0,:128]! + veor q0, q0, q8 + vld1.64 {q12,q13}, [r0,:128]! + veor q1, q1, q9 + veor q8, q6, q10 + vst1.8 {q0,q1}, [r8]! + veor q9, q4, q11 + vld1.64 {q14,q15}, [r0,:128]! + veor q10, q2, q12 + vst1.8 {q8,q9}, [r8]! + veor q11, q7, q13 + veor q12, q3, q14 + vst1.8 {q10,q11}, [r8]! + veor q13, q5, q15 + vst1.8 {q12,q13}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + + subs r9, #0x80 + bpl .Lxts_dec_loop + +.Lxts_dec_short: + adds r9, #0x70 + bmi .Lxts_dec_done + + vldmia r2, {q5} @ load XTS magic + vshr.s64 q7, q8, #63 + mov r0, sp + vand q7, q7, q5 + vadd.u64 q9, q8, q8 + vst1.64 {q8}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q9, #63 + veor q9, q9, q7 + vand q6, q6, q5 + vadd.u64 q10, q9, q9 + vst1.64 {q9}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q10, #63 + veor q10, q10, q6 + vand q7, q7, q5 + vld1.8 {q0}, [r7]! + subs r9, #0x10 + bmi .Lxts_dec_1 + vadd.u64 q11, q10, q10 + vst1.64 {q10}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q11, #63 + veor q11, q11, q7 + vand q6, q6, q5 + vld1.8 {q1}, [r7]! + subs r9, #0x10 + bmi .Lxts_dec_2 + veor q0, q0, q8 + vadd.u64 q12, q11, q11 + vst1.64 {q11}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q12, #63 + veor q12, q12, q6 + vand q7, q7, q5 + vld1.8 {q2}, [r7]! + subs r9, #0x10 + bmi .Lxts_dec_3 + veor q1, q1, q9 + vadd.u64 q13, q12, q12 + vst1.64 {q12}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q13, #63 + veor q13, q13, q7 + vand q6, q6, q5 + vld1.8 {q3}, [r7]! + subs r9, #0x10 + bmi .Lxts_dec_4 + veor q2, q2, q10 + vadd.u64 q14, q13, q13 + vst1.64 {q13}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q14, #63 + veor q14, q14, q6 + vand q7, q7, q5 + vld1.8 {q4}, [r7]! + subs r9, #0x10 + bmi .Lxts_dec_5 + veor q3, q3, q11 + vadd.u64 q15, q14, q14 + vst1.64 {q14}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q15, #63 + veor q15, q15, q7 + vand q6, q6, q5 + vld1.8 {q5}, [r7]! + subs r9, #0x10 + bmi .Lxts_dec_6 + veor q4, q4, q12 + sub r9, #0x10 + vst1.64 {q15}, [r0,:128] @ next round tweak + + vld1.8 {q6}, [r7]! + veor q5, q5, q13 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q6, q6, q14 + mov r5, r1 @ pass rounds + mov r0, sp + + bl _bsaes_decrypt8 + + vld1.64 {q8,q9}, [r0,:128]! + vld1.64 {q10,q11}, [r0,:128]! + veor q0, q0, q8 + vld1.64 {q12,q13}, [r0,:128]! + veor q1, q1, q9 + veor q8, q6, q10 + vst1.8 {q0,q1}, [r8]! + veor q9, q4, q11 + vld1.64 {q14}, [r0,:128]! + veor q10, q2, q12 + vst1.8 {q8,q9}, [r8]! + veor q11, q7, q13 + veor q12, q3, q14 + vst1.8 {q10,q11}, [r8]! + vst1.8 {q12}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + b .Lxts_dec_done +.align 4 +.Lxts_dec_6: + vst1.64 {q14}, [r0,:128] @ next round tweak + + veor q4, q4, q12 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q5, q5, q13 + mov r5, r1 @ pass rounds + mov r0, sp + + bl _bsaes_decrypt8 + + vld1.64 {q8,q9}, [r0,:128]! + vld1.64 {q10,q11}, [r0,:128]! + veor q0, q0, q8 + vld1.64 {q12,q13}, [r0,:128]! + veor q1, q1, q9 + veor q8, q6, q10 + vst1.8 {q0,q1}, [r8]! + veor q9, q4, q11 + veor q10, q2, q12 + vst1.8 {q8,q9}, [r8]! + veor q11, q7, q13 + vst1.8 {q10,q11}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + b .Lxts_dec_done +.align 4 +.Lxts_dec_5: + vst1.64 {q13}, [r0,:128] @ next round tweak + + veor q3, q3, q11 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q4, q4, q12 + mov r5, r1 @ pass rounds + mov r0, sp + + bl _bsaes_decrypt8 + + vld1.64 {q8,q9}, [r0,:128]! + vld1.64 {q10,q11}, [r0,:128]! + veor q0, q0, q8 + vld1.64 {q12}, [r0,:128]! + veor q1, q1, q9 + veor q8, q6, q10 + vst1.8 {q0,q1}, [r8]! + veor q9, q4, q11 + veor q10, q2, q12 + vst1.8 {q8,q9}, [r8]! + vst1.8 {q10}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + b .Lxts_dec_done +.align 4 +.Lxts_dec_4: + vst1.64 {q12}, [r0,:128] @ next round tweak + + veor q2, q2, q10 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q3, q3, q11 + mov r5, r1 @ pass rounds + mov r0, sp + + bl _bsaes_decrypt8 + + vld1.64 {q8,q9}, [r0,:128]! + vld1.64 {q10,q11}, [r0,:128]! + veor q0, q0, q8 + veor q1, q1, q9 + veor q8, q6, q10 + vst1.8 {q0,q1}, [r8]! + veor q9, q4, q11 + vst1.8 {q8,q9}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + b .Lxts_dec_done +.align 4 +.Lxts_dec_3: + vst1.64 {q11}, [r0,:128] @ next round tweak + + veor q1, q1, q9 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q2, q2, q10 + mov r5, r1 @ pass rounds + mov r0, sp + + bl _bsaes_decrypt8 + + vld1.64 {q8,q9}, [r0,:128]! + vld1.64 {q10}, [r0,:128]! + veor q0, q0, q8 + veor q1, q1, q9 + veor q8, q6, q10 + vst1.8 {q0,q1}, [r8]! + vst1.8 {q8}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + b .Lxts_dec_done +.align 4 +.Lxts_dec_2: + vst1.64 {q10}, [r0,:128] @ next round tweak + + veor q0, q0, q8 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q1, q1, q9 + mov r5, r1 @ pass rounds + mov r0, sp + + bl _bsaes_decrypt8 + + vld1.64 {q8,q9}, [r0,:128]! + veor q0, q0, q8 + veor q1, q1, q9 + vst1.8 {q0,q1}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + b .Lxts_dec_done +.align 4 +.Lxts_dec_1: + mov r0, sp + veor q0, q8 + mov r1, sp + vst1.8 {q0}, [sp,:128] + mov r2, r10 + mov r4, r3 @ preserve fp + mov r5, r2 @ preserve magic + + bl AES_decrypt + + vld1.8 {q0}, [sp,:128] + veor q0, q0, q8 + vst1.8 {q0}, [r8]! + mov r3, r4 + mov r2, r5 + + vmov q8, q9 @ next round tweak + +.Lxts_dec_done: +#ifndef XTS_CHAIN_TWEAK + adds r9, #0x10 + beq .Lxts_dec_ret + + @ calculate one round of extra tweak for the stolen ciphertext + vldmia r2, {q5} + vshr.s64 q6, q8, #63 + vand q6, q6, q5 + vadd.u64 q9, q8, q8 + vswp d13,d12 + veor q9, q9, q6 + + @ perform the final decryption with the last tweak value + vld1.8 {q0}, [r7]! + mov r0, sp + veor q0, q0, q9 + mov r1, sp + vst1.8 {q0}, [sp,:128] + mov r2, r10 + mov r4, r3 @ preserve fp + + bl AES_decrypt + + vld1.8 {q0}, [sp,:128] + veor q0, q0, q9 + vst1.8 {q0}, [r8] + + mov r6, r8 +.Lxts_dec_steal: + ldrb r1, [r8] + ldrb r0, [r7], #1 + strb r1, [r8, #0x10] + strb r0, [r8], #1 + + subs r9, #1 + bhi .Lxts_dec_steal + + vld1.8 {q0}, [r6] + mov r0, sp + veor q0, q8 + mov r1, sp + vst1.8 {q0}, [sp,:128] + mov r2, r10 + + bl AES_decrypt + + vld1.8 {q0}, [sp,:128] + veor q0, q0, q8 + vst1.8 {q0}, [r6] + mov r3, r4 +#endif + +.Lxts_dec_ret: + bic r0, r3, #0xf + vmov.i32 q0, #0 + vmov.i32 q1, #0 +#ifdef XTS_CHAIN_TWEAK + ldr r1, [r3, #0x20+VFP_ABI_FRAME] @ chain tweak +#endif +.Lxts_dec_bzero:@ wipe key schedule [if any] + vstmia sp!, {q0,q1} + cmp sp, r0 + bne .Lxts_dec_bzero + + mov sp, r3 +#ifdef XTS_CHAIN_TWEAK + vst1.8 {q8}, [r1] +#endif + VFP_ABI_POP + ldmia sp!, {r4,r5,r6,r7,r8,r9,r10, pc} @ return + +.size bsaes_xts_decrypt,.-bsaes_xts_decrypt +#endif +#endif +#endif \ No newline at end of file diff --git a/third_party/boringssl/linux-arm/crypto/bn/armv4-mont.S b/third_party/boringssl/linux-arm/crypto/bn/armv4-mont.S new file mode 100644 index 00000000000..68dfb2c1724 --- /dev/null +++ b/third_party/boringssl/linux-arm/crypto/bn/armv4-mont.S @@ -0,0 +1,589 @@ +#if defined(__arm__) +#include "arm_arch.h" + +.text +.code 32 + +#if __ARM_MAX_ARCH__>=7 +.align 5 +.LOPENSSL_armcap: +.word OPENSSL_armcap_P-.Lbn_mul_mont +#endif + +.globl bn_mul_mont +.hidden bn_mul_mont +.type bn_mul_mont,%function + +.align 5 +bn_mul_mont: +.Lbn_mul_mont: + ldr ip,[sp,#4] @ load num + stmdb sp!,{r0,r2} @ sp points at argument block +#if __ARM_MAX_ARCH__>=7 + tst ip,#7 + bne .Lialu + adr r0,bn_mul_mont + ldr r2,.LOPENSSL_armcap + ldr r0,[r0,r2] +#ifdef __APPLE__ + ldr r0,[r0] +#endif + tst r0,#1 @ NEON available? + ldmia sp, {r0,r2} + beq .Lialu + add sp,sp,#8 + b bn_mul8x_mont_neon +.align 4 +.Lialu: +#endif + cmp ip,#2 + mov r0,ip @ load num + movlt r0,#0 + addlt sp,sp,#2*4 + blt .Labrt + + stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} @ save 10 registers + + mov r0,r0,lsl#2 @ rescale r0 for byte count + sub sp,sp,r0 @ alloca(4*num) + sub sp,sp,#4 @ +extra dword + sub r0,r0,#4 @ "num=num-1" + add r4,r2,r0 @ &bp[num-1] + + add r0,sp,r0 @ r0 to point at &tp[num-1] + ldr r8,[r0,#14*4] @ &n0 + ldr r2,[r2] @ bp[0] + ldr r5,[r1],#4 @ ap[0],ap++ + ldr r6,[r3],#4 @ np[0],np++ + ldr r8,[r8] @ *n0 + str r4,[r0,#15*4] @ save &bp[num] + + umull r10,r11,r5,r2 @ ap[0]*bp[0] + str r8,[r0,#14*4] @ save n0 value + mul r8,r10,r8 @ "tp[0]"*n0 + mov r12,#0 + umlal r10,r12,r6,r8 @ np[0]*n0+"t[0]" + mov r4,sp + +.L1st: + ldr r5,[r1],#4 @ ap[j],ap++ + mov r10,r11 + ldr r6,[r3],#4 @ np[j],np++ + mov r11,#0 + umlal r10,r11,r5,r2 @ ap[j]*bp[0] + mov r14,#0 + umlal r12,r14,r6,r8 @ np[j]*n0 + adds r12,r12,r10 + str r12,[r4],#4 @ tp[j-1]=,tp++ + adc r12,r14,#0 + cmp r4,r0 + bne .L1st + + adds r12,r12,r11 + ldr r4,[r0,#13*4] @ restore bp + mov r14,#0 + ldr r8,[r0,#14*4] @ restore n0 + adc r14,r14,#0 + str r12,[r0] @ tp[num-1]= + str r14,[r0,#4] @ tp[num]= + +.Louter: + sub r7,r0,sp @ "original" r0-1 value + sub r1,r1,r7 @ "rewind" ap to &ap[1] + ldr r2,[r4,#4]! @ *(++bp) + sub r3,r3,r7 @ "rewind" np to &np[1] + ldr r5,[r1,#-4] @ ap[0] + ldr r10,[sp] @ tp[0] + ldr r6,[r3,#-4] @ np[0] + ldr r7,[sp,#4] @ tp[1] + + mov r11,#0 + umlal r10,r11,r5,r2 @ ap[0]*bp[i]+tp[0] + str r4,[r0,#13*4] @ save bp + mul r8,r10,r8 + mov r12,#0 + umlal r10,r12,r6,r8 @ np[0]*n0+"tp[0]" + mov r4,sp + +.Linner: + ldr r5,[r1],#4 @ ap[j],ap++ + adds r10,r11,r7 @ +=tp[j] + ldr r6,[r3],#4 @ np[j],np++ + mov r11,#0 + umlal r10,r11,r5,r2 @ ap[j]*bp[i] + mov r14,#0 + umlal r12,r14,r6,r8 @ np[j]*n0 + adc r11,r11,#0 + ldr r7,[r4,#8] @ tp[j+1] + adds r12,r12,r10 + str r12,[r4],#4 @ tp[j-1]=,tp++ + adc r12,r14,#0 + cmp r4,r0 + bne .Linner + + adds r12,r12,r11 + mov r14,#0 + ldr r4,[r0,#13*4] @ restore bp + adc r14,r14,#0 + ldr r8,[r0,#14*4] @ restore n0 + adds r12,r12,r7 + ldr r7,[r0,#15*4] @ restore &bp[num] + adc r14,r14,#0 + str r12,[r0] @ tp[num-1]= + str r14,[r0,#4] @ tp[num]= + + cmp r4,r7 + bne .Louter + + ldr r2,[r0,#12*4] @ pull rp + add r0,r0,#4 @ r0 to point at &tp[num] + sub r5,r0,sp @ "original" num value + mov r4,sp @ "rewind" r4 + mov r1,r4 @ "borrow" r1 + sub r3,r3,r5 @ "rewind" r3 to &np[0] + + subs r7,r7,r7 @ "clear" carry flag +.Lsub: ldr r7,[r4],#4 + ldr r6,[r3],#4 + sbcs r7,r7,r6 @ tp[j]-np[j] + str r7,[r2],#4 @ rp[j]= + teq r4,r0 @ preserve carry + bne .Lsub + sbcs r14,r14,#0 @ upmost carry + mov r4,sp @ "rewind" r4 + sub r2,r2,r5 @ "rewind" r2 + + and r1,r4,r14 + bic r3,r2,r14 + orr r1,r1,r3 @ ap=borrow?tp:rp + +.Lcopy: ldr r7,[r1],#4 @ copy or in-place refresh + str sp,[r4],#4 @ zap tp + str r7,[r2],#4 + cmp r4,r0 + bne .Lcopy + + add sp,r0,#4 @ skip over tp[num+1] + ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} @ restore registers + add sp,sp,#2*4 @ skip over {r0,r2} + mov r0,#1 +.Labrt: +#if __ARM_ARCH__>=5 + bx lr @ .word 0xe12fff1e +#else + tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet +.word 0xe12fff1e @ interoperable with Thumb ISA:-) +#endif +.size bn_mul_mont,.-bn_mul_mont +#if __ARM_MAX_ARCH__>=7 +.arch armv7-a +.fpu neon + +.type bn_mul8x_mont_neon,%function +.align 5 +bn_mul8x_mont_neon: + mov ip,sp + stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11} + vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so + ldmia ip,{r4,r5} @ load rest of parameter block + + sub r7,sp,#16 + vld1.32 {d28[0]}, [r2,:32]! + sub r7,r7,r5,lsl#4 + vld1.32 {d0,d1,d2,d3}, [r1]! @ can't specify :32 :-( + and r7,r7,#-64 + vld1.32 {d30[0]}, [r4,:32] + mov sp,r7 @ alloca + veor d8,d8,d8 + subs r8,r5,#8 + vzip.16 d28,d8 + + vmull.u32 q6,d28,d0[0] + vmull.u32 q7,d28,d0[1] + vmull.u32 q8,d28,d1[0] + vshl.i64 d10,d13,#16 + vmull.u32 q9,d28,d1[1] + + vadd.u64 d10,d10,d12 + veor d8,d8,d8 + vmul.u32 d29,d10,d30 + + vmull.u32 q10,d28,d2[0] + vld1.32 {d4,d5,d6,d7}, [r3]! + vmull.u32 q11,d28,d2[1] + vmull.u32 q12,d28,d3[0] + vzip.16 d29,d8 + vmull.u32 q13,d28,d3[1] + + bne .LNEON_1st + + @ special case for num=8, everything is in register bank... + + vmlal.u32 q6,d29,d4[0] + sub r9,r5,#1 + vmlal.u32 q7,d29,d4[1] + vmlal.u32 q8,d29,d5[0] + vmlal.u32 q9,d29,d5[1] + + vmlal.u32 q10,d29,d6[0] + vmov q5,q6 + vmlal.u32 q11,d29,d6[1] + vmov q6,q7 + vmlal.u32 q12,d29,d7[0] + vmov q7,q8 + vmlal.u32 q13,d29,d7[1] + vmov q8,q9 + vmov q9,q10 + vshr.u64 d10,d10,#16 + vmov q10,q11 + vmov q11,q12 + vadd.u64 d10,d10,d11 + vmov q12,q13 + veor q13,q13 + vshr.u64 d10,d10,#16 + + b .LNEON_outer8 + +.align 4 +.LNEON_outer8: + vld1.32 {d28[0]}, [r2,:32]! + veor d8,d8,d8 + vzip.16 d28,d8 + vadd.u64 d12,d12,d10 + + vmlal.u32 q6,d28,d0[0] + vmlal.u32 q7,d28,d0[1] + vmlal.u32 q8,d28,d1[0] + vshl.i64 d10,d13,#16 + vmlal.u32 q9,d28,d1[1] + + vadd.u64 d10,d10,d12 + veor d8,d8,d8 + subs r9,r9,#1 + vmul.u32 d29,d10,d30 + + vmlal.u32 q10,d28,d2[0] + vmlal.u32 q11,d28,d2[1] + vmlal.u32 q12,d28,d3[0] + vzip.16 d29,d8 + vmlal.u32 q13,d28,d3[1] + + vmlal.u32 q6,d29,d4[0] + vmlal.u32 q7,d29,d4[1] + vmlal.u32 q8,d29,d5[0] + vmlal.u32 q9,d29,d5[1] + + vmlal.u32 q10,d29,d6[0] + vmov q5,q6 + vmlal.u32 q11,d29,d6[1] + vmov q6,q7 + vmlal.u32 q12,d29,d7[0] + vmov q7,q8 + vmlal.u32 q13,d29,d7[1] + vmov q8,q9 + vmov q9,q10 + vshr.u64 d10,d10,#16 + vmov q10,q11 + vmov q11,q12 + vadd.u64 d10,d10,d11 + vmov q12,q13 + veor q13,q13 + vshr.u64 d10,d10,#16 + + bne .LNEON_outer8 + + vadd.u64 d12,d12,d10 + mov r7,sp + vshr.u64 d10,d12,#16 + mov r8,r5 + vadd.u64 d13,d13,d10 + add r6,sp,#16 + vshr.u64 d10,d13,#16 + vzip.16 d12,d13 + + b .LNEON_tail2 + +.align 4 +.LNEON_1st: + vmlal.u32 q6,d29,d4[0] + vld1.32 {d0,d1,d2,d3}, [r1]! + vmlal.u32 q7,d29,d4[1] + subs r8,r8,#8 + vmlal.u32 q8,d29,d5[0] + vmlal.u32 q9,d29,d5[1] + + vmlal.u32 q10,d29,d6[0] + vld1.32 {d4,d5}, [r3]! + vmlal.u32 q11,d29,d6[1] + vst1.64 {q6,q7}, [r7,:256]! + vmlal.u32 q12,d29,d7[0] + vmlal.u32 q13,d29,d7[1] + vst1.64 {q8,q9}, [r7,:256]! + + vmull.u32 q6,d28,d0[0] + vld1.32 {d6,d7}, [r3]! + vmull.u32 q7,d28,d0[1] + vst1.64 {q10,q11}, [r7,:256]! + vmull.u32 q8,d28,d1[0] + vmull.u32 q9,d28,d1[1] + vst1.64 {q12,q13}, [r7,:256]! + + vmull.u32 q10,d28,d2[0] + vmull.u32 q11,d28,d2[1] + vmull.u32 q12,d28,d3[0] + vmull.u32 q13,d28,d3[1] + + bne .LNEON_1st + + vmlal.u32 q6,d29,d4[0] + add r6,sp,#16 + vmlal.u32 q7,d29,d4[1] + sub r1,r1,r5,lsl#2 @ rewind r1 + vmlal.u32 q8,d29,d5[0] + vld1.64 {q5}, [sp,:128] + vmlal.u32 q9,d29,d5[1] + sub r9,r5,#1 + + vmlal.u32 q10,d29,d6[0] + vst1.64 {q6,q7}, [r7,:256]! + vmlal.u32 q11,d29,d6[1] + vshr.u64 d10,d10,#16 + vld1.64 {q6}, [r6, :128]! + vmlal.u32 q12,d29,d7[0] + vst1.64 {q8,q9}, [r7,:256]! + vmlal.u32 q13,d29,d7[1] + + vst1.64 {q10,q11}, [r7,:256]! + vadd.u64 d10,d10,d11 + veor q4,q4,q4 + vst1.64 {q12,q13}, [r7,:256]! + vld1.64 {q7,q8}, [r6, :256]! + vst1.64 {q4}, [r7,:128] + vshr.u64 d10,d10,#16 + + b .LNEON_outer + +.align 4 +.LNEON_outer: + vld1.32 {d28[0]}, [r2,:32]! + sub r3,r3,r5,lsl#2 @ rewind r3 + vld1.32 {d0,d1,d2,d3}, [r1]! + veor d8,d8,d8 + mov r7,sp + vzip.16 d28,d8 + sub r8,r5,#8 + vadd.u64 d12,d12,d10 + + vmlal.u32 q6,d28,d0[0] + vld1.64 {q9,q10},[r6,:256]! + vmlal.u32 q7,d28,d0[1] + vmlal.u32 q8,d28,d1[0] + vld1.64 {q11,q12},[r6,:256]! + vmlal.u32 q9,d28,d1[1] + + vshl.i64 d10,d13,#16 + veor d8,d8,d8 + vadd.u64 d10,d10,d12 + vld1.64 {q13},[r6,:128]! + vmul.u32 d29,d10,d30 + + vmlal.u32 q10,d28,d2[0] + vld1.32 {d4,d5,d6,d7}, [r3]! + vmlal.u32 q11,d28,d2[1] + vmlal.u32 q12,d28,d3[0] + vzip.16 d29,d8 + vmlal.u32 q13,d28,d3[1] + +.LNEON_inner: + vmlal.u32 q6,d29,d4[0] + vld1.32 {d0,d1,d2,d3}, [r1]! + vmlal.u32 q7,d29,d4[1] + subs r8,r8,#8 + vmlal.u32 q8,d29,d5[0] + vmlal.u32 q9,d29,d5[1] + vst1.64 {q6,q7}, [r7,:256]! + + vmlal.u32 q10,d29,d6[0] + vld1.64 {q6}, [r6, :128]! + vmlal.u32 q11,d29,d6[1] + vst1.64 {q8,q9}, [r7,:256]! + vmlal.u32 q12,d29,d7[0] + vld1.64 {q7,q8}, [r6, :256]! + vmlal.u32 q13,d29,d7[1] + vst1.64 {q10,q11}, [r7,:256]! + + vmlal.u32 q6,d28,d0[0] + vld1.64 {q9,q10}, [r6, :256]! + vmlal.u32 q7,d28,d0[1] + vst1.64 {q12,q13}, [r7,:256]! + vmlal.u32 q8,d28,d1[0] + vld1.64 {q11,q12}, [r6, :256]! + vmlal.u32 q9,d28,d1[1] + vld1.32 {d4,d5,d6,d7}, [r3]! + + vmlal.u32 q10,d28,d2[0] + vld1.64 {q13}, [r6, :128]! + vmlal.u32 q11,d28,d2[1] + vmlal.u32 q12,d28,d3[0] + vmlal.u32 q13,d28,d3[1] + + bne .LNEON_inner + + vmlal.u32 q6,d29,d4[0] + add r6,sp,#16 + vmlal.u32 q7,d29,d4[1] + sub r1,r1,r5,lsl#2 @ rewind r1 + vmlal.u32 q8,d29,d5[0] + vld1.64 {q5}, [sp,:128] + vmlal.u32 q9,d29,d5[1] + subs r9,r9,#1 + + vmlal.u32 q10,d29,d6[0] + vst1.64 {q6,q7}, [r7,:256]! + vmlal.u32 q11,d29,d6[1] + vld1.64 {q6}, [r6, :128]! + vshr.u64 d10,d10,#16 + vst1.64 {q8,q9}, [r7,:256]! + vmlal.u32 q12,d29,d7[0] + vld1.64 {q7,q8}, [r6, :256]! + vmlal.u32 q13,d29,d7[1] + + vst1.64 {q10,q11}, [r7,:256]! + vadd.u64 d10,d10,d11 + vst1.64 {q12,q13}, [r7,:256]! + vshr.u64 d10,d10,#16 + + bne .LNEON_outer + + mov r7,sp + mov r8,r5 + +.LNEON_tail: + vadd.u64 d12,d12,d10 + vld1.64 {q9,q10}, [r6, :256]! + vshr.u64 d10,d12,#16 + vadd.u64 d13,d13,d10 + vld1.64 {q11,q12}, [r6, :256]! + vshr.u64 d10,d13,#16 + vld1.64 {q13}, [r6, :128]! + vzip.16 d12,d13 + +.LNEON_tail2: + vadd.u64 d14,d14,d10 + vst1.32 {d12[0]}, [r7, :32]! + vshr.u64 d10,d14,#16 + vadd.u64 d15,d15,d10 + vshr.u64 d10,d15,#16 + vzip.16 d14,d15 + + vadd.u64 d16,d16,d10 + vst1.32 {d14[0]}, [r7, :32]! + vshr.u64 d10,d16,#16 + vadd.u64 d17,d17,d10 + vshr.u64 d10,d17,#16 + vzip.16 d16,d17 + + vadd.u64 d18,d18,d10 + vst1.32 {d16[0]}, [r7, :32]! + vshr.u64 d10,d18,#16 + vadd.u64 d19,d19,d10 + vshr.u64 d10,d19,#16 + vzip.16 d18,d19 + + vadd.u64 d20,d20,d10 + vst1.32 {d18[0]}, [r7, :32]! + vshr.u64 d10,d20,#16 + vadd.u64 d21,d21,d10 + vshr.u64 d10,d21,#16 + vzip.16 d20,d21 + + vadd.u64 d22,d22,d10 + vst1.32 {d20[0]}, [r7, :32]! + vshr.u64 d10,d22,#16 + vadd.u64 d23,d23,d10 + vshr.u64 d10,d23,#16 + vzip.16 d22,d23 + + vadd.u64 d24,d24,d10 + vst1.32 {d22[0]}, [r7, :32]! + vshr.u64 d10,d24,#16 + vadd.u64 d25,d25,d10 + vld1.64 {q6}, [r6, :128]! + vshr.u64 d10,d25,#16 + vzip.16 d24,d25 + + vadd.u64 d26,d26,d10 + vst1.32 {d24[0]}, [r7, :32]! + vshr.u64 d10,d26,#16 + vadd.u64 d27,d27,d10 + vld1.64 {q7,q8}, [r6, :256]! + vshr.u64 d10,d27,#16 + vzip.16 d26,d27 + subs r8,r8,#8 + vst1.32 {d26[0]}, [r7, :32]! + + bne .LNEON_tail + + vst1.32 {d10[0]}, [r7, :32] @ top-most bit + sub r3,r3,r5,lsl#2 @ rewind r3 + subs r1,sp,#0 @ clear carry flag + add r2,sp,r5,lsl#2 + +.LNEON_sub: + ldmia r1!, {r4,r5,r6,r7} + ldmia r3!, {r8,r9,r10,r11} + sbcs r8, r4,r8 + sbcs r9, r5,r9 + sbcs r10,r6,r10 + sbcs r11,r7,r11 + teq r1,r2 @ preserves carry + stmia r0!, {r8,r9,r10,r11} + bne .LNEON_sub + + ldr r10, [r1] @ load top-most bit + veor q0,q0,q0 + sub r11,r2,sp @ this is num*4 + veor q1,q1,q1 + mov r1,sp + sub r0,r0,r11 @ rewind r0 + mov r3,r2 @ second 3/4th of frame + sbcs r10,r10,#0 @ result is carry flag + +.LNEON_copy_n_zap: + ldmia r1!, {r4,r5,r6,r7} + ldmia r0, {r8,r9,r10,r11} + movcc r8, r4 + vst1.64 {q0,q1}, [r3,:256]! @ wipe + movcc r9, r5 + movcc r10,r6 + vst1.64 {q0,q1}, [r3,:256]! @ wipe + movcc r11,r7 + ldmia r1, {r4,r5,r6,r7} + stmia r0!, {r8,r9,r10,r11} + sub r1,r1,#16 + ldmia r0, {r8,r9,r10,r11} + movcc r8, r4 + vst1.64 {q0,q1}, [r1,:256]! @ wipe + movcc r9, r5 + movcc r10,r6 + vst1.64 {q0,q1}, [r3,:256]! @ wipe + movcc r11,r7 + teq r1,r2 @ preserves carry + stmia r0!, {r8,r9,r10,r11} + bne .LNEON_copy_n_zap + + sub sp,ip,#96 + vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15} + ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11} + bx lr @ .word 0xe12fff1e +.size bn_mul8x_mont_neon,.-bn_mul8x_mont_neon +#endif +.byte 77,111,110,116,103,111,109,101,114,121,32,109,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 2 +.align 2 +#if __ARM_MAX_ARCH__>=7 +.comm OPENSSL_armcap_P,4,4 +.hidden OPENSSL_armcap_P +#endif +#endif \ No newline at end of file diff --git a/third_party/boringssl/linux-arm/crypto/modes/ghash-armv4.S b/third_party/boringssl/linux-arm/crypto/modes/ghash-armv4.S new file mode 100644 index 00000000000..c6f025d7597 --- /dev/null +++ b/third_party/boringssl/linux-arm/crypto/modes/ghash-armv4.S @@ -0,0 +1,541 @@ +#if defined(__arm__) +#if defined(__arm__) +#include "arm_arch.h" + +.syntax unified + +.text +.code 32 + +#ifdef __APPLE__ +#define ldrplb ldrbpl +#define ldrneb ldrbne +#endif + +.type rem_4bit,%object +.align 5 +rem_4bit: +.short 0x0000,0x1C20,0x3840,0x2460 +.short 0x7080,0x6CA0,0x48C0,0x54E0 +.short 0xE100,0xFD20,0xD940,0xC560 +.short 0x9180,0x8DA0,0xA9C0,0xB5E0 +.size rem_4bit,.-rem_4bit + +.type rem_4bit_get,%function +rem_4bit_get: + sub r2,pc,#8 + sub r2,r2,#32 @ &rem_4bit + b .Lrem_4bit_got + nop +.size rem_4bit_get,.-rem_4bit_get + +.globl gcm_ghash_4bit +.hidden gcm_ghash_4bit +.type gcm_ghash_4bit,%function +gcm_ghash_4bit: + sub r12,pc,#8 + add r3,r2,r3 @ r3 to point at the end + stmdb sp!,{r3,r4,r5,r6,r7,r8,r9,r10,r11,lr} @ save r3/end too + sub r12,r12,#48 @ &rem_4bit + + ldmia r12,{r4,r5,r6,r7,r8,r9,r10,r11} @ copy rem_4bit ... + stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11} @ ... to stack + + ldrb r12,[r2,#15] + ldrb r14,[r0,#15] +.Louter: + eor r12,r12,r14 + and r14,r12,#0xf0 + and r12,r12,#0x0f + mov r3,#14 + + add r7,r1,r12,lsl#4 + ldmia r7,{r4,r5,r6,r7} @ load Htbl[nlo] + add r11,r1,r14 + ldrb r12,[r2,#14] + + and r14,r4,#0xf @ rem + ldmia r11,{r8,r9,r10,r11} @ load Htbl[nhi] + add r14,r14,r14 + eor r4,r8,r4,lsr#4 + ldrh r8,[sp,r14] @ rem_4bit[rem] + eor r4,r4,r5,lsl#28 + ldrb r14,[r0,#14] + eor r5,r9,r5,lsr#4 + eor r5,r5,r6,lsl#28 + eor r6,r10,r6,lsr#4 + eor r6,r6,r7,lsl#28 + eor r7,r11,r7,lsr#4 + eor r12,r12,r14 + and r14,r12,#0xf0 + and r12,r12,#0x0f + eor r7,r7,r8,lsl#16 + +.Linner: + add r11,r1,r12,lsl#4 + and r12,r4,#0xf @ rem + subs r3,r3,#1 + add r12,r12,r12 + ldmia r11,{r8,r9,r10,r11} @ load Htbl[nlo] + eor r4,r8,r4,lsr#4 + eor r4,r4,r5,lsl#28 + eor r5,r9,r5,lsr#4 + eor r5,r5,r6,lsl#28 + ldrh r8,[sp,r12] @ rem_4bit[rem] + eor r6,r10,r6,lsr#4 + ldrbpl r12,[r2,r3] + eor r6,r6,r7,lsl#28 + eor r7,r11,r7,lsr#4 + + add r11,r1,r14 + and r14,r4,#0xf @ rem + eor r7,r7,r8,lsl#16 @ ^= rem_4bit[rem] + add r14,r14,r14 + ldmia r11,{r8,r9,r10,r11} @ load Htbl[nhi] + eor r4,r8,r4,lsr#4 + ldrbpl r8,[r0,r3] + eor r4,r4,r5,lsl#28 + eor r5,r9,r5,lsr#4 + ldrh r9,[sp,r14] + eor r5,r5,r6,lsl#28 + eor r6,r10,r6,lsr#4 + eor r6,r6,r7,lsl#28 + eorpl r12,r12,r8 + eor r7,r11,r7,lsr#4 + andpl r14,r12,#0xf0 + andpl r12,r12,#0x0f + eor r7,r7,r9,lsl#16 @ ^= rem_4bit[rem] + bpl .Linner + + ldr r3,[sp,#32] @ re-load r3/end + add r2,r2,#16 + mov r14,r4 +#if __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r4,r4 + str r4,[r0,#12] +#elif defined(__ARMEB__) + str r4,[r0,#12] +#else + mov r9,r4,lsr#8 + strb r4,[r0,#12+3] + mov r10,r4,lsr#16 + strb r9,[r0,#12+2] + mov r11,r4,lsr#24 + strb r10,[r0,#12+1] + strb r11,[r0,#12] +#endif + cmp r2,r3 +#if __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r5,r5 + str r5,[r0,#8] +#elif defined(__ARMEB__) + str r5,[r0,#8] +#else + mov r9,r5,lsr#8 + strb r5,[r0,#8+3] + mov r10,r5,lsr#16 + strb r9,[r0,#8+2] + mov r11,r5,lsr#24 + strb r10,[r0,#8+1] + strb r11,[r0,#8] +#endif + ldrbne r12,[r2,#15] +#if __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r6,r6 + str r6,[r0,#4] +#elif defined(__ARMEB__) + str r6,[r0,#4] +#else + mov r9,r6,lsr#8 + strb r6,[r0,#4+3] + mov r10,r6,lsr#16 + strb r9,[r0,#4+2] + mov r11,r6,lsr#24 + strb r10,[r0,#4+1] + strb r11,[r0,#4] +#endif + +#if __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r7,r7 + str r7,[r0,#0] +#elif defined(__ARMEB__) + str r7,[r0,#0] +#else + mov r9,r7,lsr#8 + strb r7,[r0,#0+3] + mov r10,r7,lsr#16 + strb r9,[r0,#0+2] + mov r11,r7,lsr#24 + strb r10,[r0,#0+1] + strb r11,[r0,#0] +#endif + + bne .Louter + + add sp,sp,#36 +#if __ARM_ARCH__>=5 + ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc} +#else + ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr} + tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet +.word 0xe12fff1e @ interoperable with Thumb ISA:-) +#endif +.size gcm_ghash_4bit,.-gcm_ghash_4bit + +.globl gcm_gmult_4bit +.hidden gcm_gmult_4bit +.type gcm_gmult_4bit,%function +gcm_gmult_4bit: + stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr} + ldrb r12,[r0,#15] + b rem_4bit_get +.Lrem_4bit_got: + and r14,r12,#0xf0 + and r12,r12,#0x0f + mov r3,#14 + + add r7,r1,r12,lsl#4 + ldmia r7,{r4,r5,r6,r7} @ load Htbl[nlo] + ldrb r12,[r0,#14] + + add r11,r1,r14 + and r14,r4,#0xf @ rem + ldmia r11,{r8,r9,r10,r11} @ load Htbl[nhi] + add r14,r14,r14 + eor r4,r8,r4,lsr#4 + ldrh r8,[r2,r14] @ rem_4bit[rem] + eor r4,r4,r5,lsl#28 + eor r5,r9,r5,lsr#4 + eor r5,r5,r6,lsl#28 + eor r6,r10,r6,lsr#4 + eor r6,r6,r7,lsl#28 + eor r7,r11,r7,lsr#4 + and r14,r12,#0xf0 + eor r7,r7,r8,lsl#16 + and r12,r12,#0x0f + +.Loop: + add r11,r1,r12,lsl#4 + and r12,r4,#0xf @ rem + subs r3,r3,#1 + add r12,r12,r12 + ldmia r11,{r8,r9,r10,r11} @ load Htbl[nlo] + eor r4,r8,r4,lsr#4 + eor r4,r4,r5,lsl#28 + eor r5,r9,r5,lsr#4 + eor r5,r5,r6,lsl#28 + ldrh r8,[r2,r12] @ rem_4bit[rem] + eor r6,r10,r6,lsr#4 + ldrbpl r12,[r0,r3] + eor r6,r6,r7,lsl#28 + eor r7,r11,r7,lsr#4 + + add r11,r1,r14 + and r14,r4,#0xf @ rem + eor r7,r7,r8,lsl#16 @ ^= rem_4bit[rem] + add r14,r14,r14 + ldmia r11,{r8,r9,r10,r11} @ load Htbl[nhi] + eor r4,r8,r4,lsr#4 + eor r4,r4,r5,lsl#28 + eor r5,r9,r5,lsr#4 + ldrh r8,[r2,r14] @ rem_4bit[rem] + eor r5,r5,r6,lsl#28 + eor r6,r10,r6,lsr#4 + eor r6,r6,r7,lsl#28 + eor r7,r11,r7,lsr#4 + andpl r14,r12,#0xf0 + andpl r12,r12,#0x0f + eor r7,r7,r8,lsl#16 @ ^= rem_4bit[rem] + bpl .Loop +#if __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r4,r4 + str r4,[r0,#12] +#elif defined(__ARMEB__) + str r4,[r0,#12] +#else + mov r9,r4,lsr#8 + strb r4,[r0,#12+3] + mov r10,r4,lsr#16 + strb r9,[r0,#12+2] + mov r11,r4,lsr#24 + strb r10,[r0,#12+1] + strb r11,[r0,#12] +#endif + +#if __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r5,r5 + str r5,[r0,#8] +#elif defined(__ARMEB__) + str r5,[r0,#8] +#else + mov r9,r5,lsr#8 + strb r5,[r0,#8+3] + mov r10,r5,lsr#16 + strb r9,[r0,#8+2] + mov r11,r5,lsr#24 + strb r10,[r0,#8+1] + strb r11,[r0,#8] +#endif + +#if __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r6,r6 + str r6,[r0,#4] +#elif defined(__ARMEB__) + str r6,[r0,#4] +#else + mov r9,r6,lsr#8 + strb r6,[r0,#4+3] + mov r10,r6,lsr#16 + strb r9,[r0,#4+2] + mov r11,r6,lsr#24 + strb r10,[r0,#4+1] + strb r11,[r0,#4] +#endif + +#if __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r7,r7 + str r7,[r0,#0] +#elif defined(__ARMEB__) + str r7,[r0,#0] +#else + mov r9,r7,lsr#8 + strb r7,[r0,#0+3] + mov r10,r7,lsr#16 + strb r9,[r0,#0+2] + mov r11,r7,lsr#24 + strb r10,[r0,#0+1] + strb r11,[r0,#0] +#endif + +#if __ARM_ARCH__>=5 + ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc} +#else + ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr} + tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet +.word 0xe12fff1e @ interoperable with Thumb ISA:-) +#endif +.size gcm_gmult_4bit,.-gcm_gmult_4bit +#if __ARM_MAX_ARCH__>=7 +.arch armv7-a +.fpu neon + +.globl gcm_init_neon +.hidden gcm_init_neon +.type gcm_init_neon,%function +.align 4 +gcm_init_neon: + vld1.64 d7,[r1]! @ load H + vmov.i8 q8,#0xe1 + vld1.64 d6,[r1] + vshl.i64 d17,#57 + vshr.u64 d16,#63 @ t0=0xc2....01 + vdup.8 q9,d7[7] + vshr.u64 d26,d6,#63 + vshr.s8 q9,#7 @ broadcast carry bit + vshl.i64 q3,q3,#1 + vand q8,q8,q9 + vorr d7,d26 @ H<<<=1 + veor q3,q3,q8 @ twisted H + vstmia r0,{q3} + + bx lr @ bx lr +.size gcm_init_neon,.-gcm_init_neon + +.globl gcm_gmult_neon +.hidden gcm_gmult_neon +.type gcm_gmult_neon,%function +.align 4 +gcm_gmult_neon: + vld1.64 d7,[r0]! @ load Xi + vld1.64 d6,[r0]! + vmov.i64 d29,#0x0000ffffffffffff + vldmia r1,{d26,d27} @ load twisted H + vmov.i64 d30,#0x00000000ffffffff +#ifdef __ARMEL__ + vrev64.8 q3,q3 +#endif + vmov.i64 d31,#0x000000000000ffff + veor d28,d26,d27 @ Karatsuba pre-processing + mov r3,#16 + b .Lgmult_neon +.size gcm_gmult_neon,.-gcm_gmult_neon + +.globl gcm_ghash_neon +.hidden gcm_ghash_neon +.type gcm_ghash_neon,%function +.align 4 +gcm_ghash_neon: + vld1.64 d1,[r0]! @ load Xi + vld1.64 d0,[r0]! + vmov.i64 d29,#0x0000ffffffffffff + vldmia r1,{d26,d27} @ load twisted H + vmov.i64 d30,#0x00000000ffffffff +#ifdef __ARMEL__ + vrev64.8 q0,q0 +#endif + vmov.i64 d31,#0x000000000000ffff + veor d28,d26,d27 @ Karatsuba pre-processing + +.Loop_neon: + vld1.64 d7,[r2]! @ load inp + vld1.64 d6,[r2]! +#ifdef __ARMEL__ + vrev64.8 q3,q3 +#endif + veor q3,q0 @ inp^=Xi +.Lgmult_neon: + vext.8 d16, d26, d26, #1 @ A1 + vmull.p8 q8, d16, d6 @ F = A1*B + vext.8 d0, d6, d6, #1 @ B1 + vmull.p8 q0, d26, d0 @ E = A*B1 + vext.8 d18, d26, d26, #2 @ A2 + vmull.p8 q9, d18, d6 @ H = A2*B + vext.8 d22, d6, d6, #2 @ B2 + vmull.p8 q11, d26, d22 @ G = A*B2 + vext.8 d20, d26, d26, #3 @ A3 + veor q8, q8, q0 @ L = E + F + vmull.p8 q10, d20, d6 @ J = A3*B + vext.8 d0, d6, d6, #3 @ B3 + veor q9, q9, q11 @ M = G + H + vmull.p8 q0, d26, d0 @ I = A*B3 + veor d16, d16, d17 @ t0 = (L) (P0 + P1) << 8 + vand d17, d17, d29 + vext.8 d22, d6, d6, #4 @ B4 + veor d18, d18, d19 @ t1 = (M) (P2 + P3) << 16 + vand d19, d19, d30 + vmull.p8 q11, d26, d22 @ K = A*B4 + veor q10, q10, q0 @ N = I + J + veor d16, d16, d17 + veor d18, d18, d19 + veor d20, d20, d21 @ t2 = (N) (P4 + P5) << 24 + vand d21, d21, d31 + vext.8 q8, q8, q8, #15 + veor d22, d22, d23 @ t3 = (K) (P6 + P7) << 32 + vmov.i64 d23, #0 + vext.8 q9, q9, q9, #14 + veor d20, d20, d21 + vmull.p8 q0, d26, d6 @ D = A*B + vext.8 q11, q11, q11, #12 + vext.8 q10, q10, q10, #13 + veor q8, q8, q9 + veor q10, q10, q11 + veor q0, q0, q8 + veor q0, q0, q10 + veor d6,d6,d7 @ Karatsuba pre-processing + vext.8 d16, d28, d28, #1 @ A1 + vmull.p8 q8, d16, d6 @ F = A1*B + vext.8 d2, d6, d6, #1 @ B1 + vmull.p8 q1, d28, d2 @ E = A*B1 + vext.8 d18, d28, d28, #2 @ A2 + vmull.p8 q9, d18, d6 @ H = A2*B + vext.8 d22, d6, d6, #2 @ B2 + vmull.p8 q11, d28, d22 @ G = A*B2 + vext.8 d20, d28, d28, #3 @ A3 + veor q8, q8, q1 @ L = E + F + vmull.p8 q10, d20, d6 @ J = A3*B + vext.8 d2, d6, d6, #3 @ B3 + veor q9, q9, q11 @ M = G + H + vmull.p8 q1, d28, d2 @ I = A*B3 + veor d16, d16, d17 @ t0 = (L) (P0 + P1) << 8 + vand d17, d17, d29 + vext.8 d22, d6, d6, #4 @ B4 + veor d18, d18, d19 @ t1 = (M) (P2 + P3) << 16 + vand d19, d19, d30 + vmull.p8 q11, d28, d22 @ K = A*B4 + veor q10, q10, q1 @ N = I + J + veor d16, d16, d17 + veor d18, d18, d19 + veor d20, d20, d21 @ t2 = (N) (P4 + P5) << 24 + vand d21, d21, d31 + vext.8 q8, q8, q8, #15 + veor d22, d22, d23 @ t3 = (K) (P6 + P7) << 32 + vmov.i64 d23, #0 + vext.8 q9, q9, q9, #14 + veor d20, d20, d21 + vmull.p8 q1, d28, d6 @ D = A*B + vext.8 q11, q11, q11, #12 + vext.8 q10, q10, q10, #13 + veor q8, q8, q9 + veor q10, q10, q11 + veor q1, q1, q8 + veor q1, q1, q10 + vext.8 d16, d27, d27, #1 @ A1 + vmull.p8 q8, d16, d7 @ F = A1*B + vext.8 d4, d7, d7, #1 @ B1 + vmull.p8 q2, d27, d4 @ E = A*B1 + vext.8 d18, d27, d27, #2 @ A2 + vmull.p8 q9, d18, d7 @ H = A2*B + vext.8 d22, d7, d7, #2 @ B2 + vmull.p8 q11, d27, d22 @ G = A*B2 + vext.8 d20, d27, d27, #3 @ A3 + veor q8, q8, q2 @ L = E + F + vmull.p8 q10, d20, d7 @ J = A3*B + vext.8 d4, d7, d7, #3 @ B3 + veor q9, q9, q11 @ M = G + H + vmull.p8 q2, d27, d4 @ I = A*B3 + veor d16, d16, d17 @ t0 = (L) (P0 + P1) << 8 + vand d17, d17, d29 + vext.8 d22, d7, d7, #4 @ B4 + veor d18, d18, d19 @ t1 = (M) (P2 + P3) << 16 + vand d19, d19, d30 + vmull.p8 q11, d27, d22 @ K = A*B4 + veor q10, q10, q2 @ N = I + J + veor d16, d16, d17 + veor d18, d18, d19 + veor d20, d20, d21 @ t2 = (N) (P4 + P5) << 24 + vand d21, d21, d31 + vext.8 q8, q8, q8, #15 + veor d22, d22, d23 @ t3 = (K) (P6 + P7) << 32 + vmov.i64 d23, #0 + vext.8 q9, q9, q9, #14 + veor d20, d20, d21 + vmull.p8 q2, d27, d7 @ D = A*B + vext.8 q11, q11, q11, #12 + vext.8 q10, q10, q10, #13 + veor q8, q8, q9 + veor q10, q10, q11 + veor q2, q2, q8 + veor q2, q2, q10 + veor q1,q1,q0 @ Karatsuba post-processing + veor q1,q1,q2 + veor d1,d1,d2 + veor d4,d4,d3 @ Xh|Xl - 256-bit result + + @ equivalent of reduction_avx from ghash-x86_64.pl + vshl.i64 q9,q0,#57 @ 1st phase + vshl.i64 q10,q0,#62 + veor q10,q10,q9 @ + vshl.i64 q9,q0,#63 + veor q10, q10, q9 @ + veor d1,d1,d20 @ + veor d4,d4,d21 + + vshr.u64 q10,q0,#1 @ 2nd phase + veor q2,q2,q0 + veor q0,q0,q10 @ + vshr.u64 q10,q10,#6 + vshr.u64 q0,q0,#1 @ + veor q0,q0,q2 @ + veor q0,q0,q10 @ + + subs r3,#16 + bne .Loop_neon + +#ifdef __ARMEL__ + vrev64.8 q0,q0 +#endif + sub r0,#16 + vst1.64 d1,[r0]! @ write out Xi + vst1.64 d0,[r0] + + bx lr @ bx lr +.size gcm_ghash_neon,.-gcm_ghash_neon +#endif +.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 2 +.align 2 + +#endif +#endif \ No newline at end of file diff --git a/third_party/boringssl/linux-arm/crypto/modes/ghashv8-armx32.S b/third_party/boringssl/linux-arm/crypto/modes/ghashv8-armx32.S new file mode 100644 index 00000000000..bdbbae90d4d --- /dev/null +++ b/third_party/boringssl/linux-arm/crypto/modes/ghashv8-armx32.S @@ -0,0 +1,233 @@ +#if defined(__arm__) +#include "arm_arch.h" + +.text +.fpu neon +.code 32 +.globl gcm_init_v8 +.type gcm_init_v8,%function +.align 4 +gcm_init_v8: + vld1.64 {q9},[r1] @ load input H + vmov.i8 q11,#0xe1 + vshl.i64 q11,q11,#57 @ 0xc2.0 + vext.8 q3,q9,q9,#8 + vshr.u64 q10,q11,#63 + vdup.32 q9,d18[1] + vext.8 q8,q10,q11,#8 @ t0=0xc2....01 + vshr.u64 q10,q3,#63 + vshr.s32 q9,q9,#31 @ broadcast carry bit + vand q10,q10,q8 + vshl.i64 q3,q3,#1 + vext.8 q10,q10,q10,#8 + vand q8,q8,q9 + vorr q3,q3,q10 @ H<<<=1 + veor q12,q3,q8 @ twisted H + vst1.64 {q12},[r0]! @ store Htable[0] + + @ calculate H^2 + vext.8 q8,q12,q12,#8 @ Karatsuba pre-processing +.byte 0xa8,0x0e,0xa8,0xf2 @ pmull q0,q12,q12 + veor q8,q8,q12 +.byte 0xa9,0x4e,0xa9,0xf2 @ pmull2 q2,q12,q12 +.byte 0xa0,0x2e,0xa0,0xf2 @ pmull q1,q8,q8 + + vext.8 q9,q0,q2,#8 @ Karatsuba post-processing + veor q10,q0,q2 + veor q1,q1,q9 + veor q1,q1,q10 +.byte 0x26,0x4e,0xe0,0xf2 @ pmull q10,q0,q11 @ 1st phase + + vmov d4,d3 @ Xh|Xm - 256-bit result + vmov d3,d0 @ Xm is rotated Xl + veor q0,q1,q10 + + vext.8 q10,q0,q0,#8 @ 2nd phase +.byte 0x26,0x0e,0xa0,0xf2 @ pmull q0,q0,q11 + veor q10,q10,q2 + veor q14,q0,q10 + + vext.8 q9,q14,q14,#8 @ Karatsuba pre-processing + veor q9,q9,q14 + vext.8 q13,q8,q9,#8 @ pack Karatsuba pre-processed + vst1.64 {q13,q14},[r0] @ store Htable[1..2] + + bx lr +.size gcm_init_v8,.-gcm_init_v8 +.globl gcm_gmult_v8 +.type gcm_gmult_v8,%function +.align 4 +gcm_gmult_v8: + vld1.64 {q9},[r0] @ load Xi + vmov.i8 q11,#0xe1 + vld1.64 {q12,q13},[r1] @ load twisted H, ... + vshl.u64 q11,q11,#57 +#ifndef __ARMEB__ + vrev64.8 q9,q9 +#endif + vext.8 q3,q9,q9,#8 + +.byte 0x86,0x0e,0xa8,0xf2 @ pmull q0,q12,q3 @ H.lo·Xi.lo + veor q9,q9,q3 @ Karatsuba pre-processing +.byte 0x87,0x4e,0xa9,0xf2 @ pmull2 q2,q12,q3 @ H.hi·Xi.hi +.byte 0xa2,0x2e,0xaa,0xf2 @ pmull q1,q13,q9 @ (H.lo+H.hi)·(Xi.lo+Xi.hi) + + vext.8 q9,q0,q2,#8 @ Karatsuba post-processing + veor q10,q0,q2 + veor q1,q1,q9 + veor q1,q1,q10 +.byte 0x26,0x4e,0xe0,0xf2 @ pmull q10,q0,q11 @ 1st phase of reduction + + vmov d4,d3 @ Xh|Xm - 256-bit result + vmov d3,d0 @ Xm is rotated Xl + veor q0,q1,q10 + + vext.8 q10,q0,q0,#8 @ 2nd phase of reduction +.byte 0x26,0x0e,0xa0,0xf2 @ pmull q0,q0,q11 + veor q10,q10,q2 + veor q0,q0,q10 + +#ifndef __ARMEB__ + vrev64.8 q0,q0 +#endif + vext.8 q0,q0,q0,#8 + vst1.64 {q0},[r0] @ write out Xi + + bx lr +.size gcm_gmult_v8,.-gcm_gmult_v8 +.globl gcm_ghash_v8 +.type gcm_ghash_v8,%function +.align 4 +gcm_ghash_v8: + vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ 32-bit ABI says so + vld1.64 {q0},[r0] @ load [rotated] Xi + @ "[rotated]" means that + @ loaded value would have + @ to be rotated in order to + @ make it appear as in + @ alorithm specification + subs r3,r3,#32 @ see if r3 is 32 or larger + mov r12,#16 @ r12 is used as post- + @ increment for input pointer; + @ as loop is modulo-scheduled + @ r12 is zeroed just in time + @ to preclude oversteping + @ inp[len], which means that + @ last block[s] are actually + @ loaded twice, but last + @ copy is not processed + vld1.64 {q12,q13},[r1]! @ load twisted H, ..., H^2 + vmov.i8 q11,#0xe1 + vld1.64 {q14},[r1] + moveq r12,#0 @ is it time to zero r12? + vext.8 q0,q0,q0,#8 @ rotate Xi + vld1.64 {q8},[r2]! @ load [rotated] I[0] + vshl.u64 q11,q11,#57 @ compose 0xc2.0 constant +#ifndef __ARMEB__ + vrev64.8 q8,q8 + vrev64.8 q0,q0 +#endif + vext.8 q3,q8,q8,#8 @ rotate I[0] + blo .Lodd_tail_v8 @ r3 was less than 32 + vld1.64 {q9},[r2],r12 @ load [rotated] I[1] +#ifndef __ARMEB__ + vrev64.8 q9,q9 +#endif + vext.8 q7,q9,q9,#8 + veor q3,q3,q0 @ I[i]^=Xi +.byte 0x8e,0x8e,0xa8,0xf2 @ pmull q4,q12,q7 @ H·Ii+1 + veor q9,q9,q7 @ Karatsuba pre-processing +.byte 0x8f,0xce,0xa9,0xf2 @ pmull2 q6,q12,q7 + b .Loop_mod2x_v8 + +.align 4 +.Loop_mod2x_v8: + vext.8 q10,q3,q3,#8 + subs r3,r3,#32 @ is there more data? +.byte 0x86,0x0e,0xac,0xf2 @ pmull q0,q14,q3 @ H^2.lo·Xi.lo + movlo r12,#0 @ is it time to zero r12? + +.byte 0xa2,0xae,0xaa,0xf2 @ pmull q5,q13,q9 + veor q10,q10,q3 @ Karatsuba pre-processing +.byte 0x87,0x4e,0xad,0xf2 @ pmull2 q2,q14,q3 @ H^2.hi·Xi.hi + veor q0,q0,q4 @ accumulate +.byte 0xa5,0x2e,0xab,0xf2 @ pmull2 q1,q13,q10 @ (H^2.lo+H^2.hi)·(Xi.lo+Xi.hi) + vld1.64 {q8},[r2],r12 @ load [rotated] I[i+2] + + veor q2,q2,q6 + moveq r12,#0 @ is it time to zero r12? + veor q1,q1,q5 + + vext.8 q9,q0,q2,#8 @ Karatsuba post-processing + veor q10,q0,q2 + veor q1,q1,q9 + vld1.64 {q9},[r2],r12 @ load [rotated] I[i+3] +#ifndef __ARMEB__ + vrev64.8 q8,q8 +#endif + veor q1,q1,q10 +.byte 0x26,0x4e,0xe0,0xf2 @ pmull q10,q0,q11 @ 1st phase of reduction + +#ifndef __ARMEB__ + vrev64.8 q9,q9 +#endif + vmov d4,d3 @ Xh|Xm - 256-bit result + vmov d3,d0 @ Xm is rotated Xl + vext.8 q7,q9,q9,#8 + vext.8 q3,q8,q8,#8 + veor q0,q1,q10 +.byte 0x8e,0x8e,0xa8,0xf2 @ pmull q4,q12,q7 @ H·Ii+1 + veor q3,q3,q2 @ accumulate q3 early + + vext.8 q10,q0,q0,#8 @ 2nd phase of reduction +.byte 0x26,0x0e,0xa0,0xf2 @ pmull q0,q0,q11 + veor q3,q3,q10 + veor q9,q9,q7 @ Karatsuba pre-processing + veor q3,q3,q0 +.byte 0x8f,0xce,0xa9,0xf2 @ pmull2 q6,q12,q7 + bhs .Loop_mod2x_v8 @ there was at least 32 more bytes + + veor q2,q2,q10 + vext.8 q3,q8,q8,#8 @ re-construct q3 + adds r3,r3,#32 @ re-construct r3 + veor q0,q0,q2 @ re-construct q0 + beq .Ldone_v8 @ is r3 zero? +.Lodd_tail_v8: + vext.8 q10,q0,q0,#8 + veor q3,q3,q0 @ inp^=Xi + veor q9,q8,q10 @ q9 is rotated inp^Xi + +.byte 0x86,0x0e,0xa8,0xf2 @ pmull q0,q12,q3 @ H.lo·Xi.lo + veor q9,q9,q3 @ Karatsuba pre-processing +.byte 0x87,0x4e,0xa9,0xf2 @ pmull2 q2,q12,q3 @ H.hi·Xi.hi +.byte 0xa2,0x2e,0xaa,0xf2 @ pmull q1,q13,q9 @ (H.lo+H.hi)·(Xi.lo+Xi.hi) + + vext.8 q9,q0,q2,#8 @ Karatsuba post-processing + veor q10,q0,q2 + veor q1,q1,q9 + veor q1,q1,q10 +.byte 0x26,0x4e,0xe0,0xf2 @ pmull q10,q0,q11 @ 1st phase of reduction + + vmov d4,d3 @ Xh|Xm - 256-bit result + vmov d3,d0 @ Xm is rotated Xl + veor q0,q1,q10 + + vext.8 q10,q0,q0,#8 @ 2nd phase of reduction +.byte 0x26,0x0e,0xa0,0xf2 @ pmull q0,q0,q11 + veor q10,q10,q2 + veor q0,q0,q10 + +.Ldone_v8: +#ifndef __ARMEB__ + vrev64.8 q0,q0 +#endif + vext.8 q0,q0,q0,#8 + vst1.64 {q0},[r0] @ write out Xi + + vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ 32-bit ABI says so + bx lr +.size gcm_ghash_v8,.-gcm_ghash_v8 +.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 2 +.align 2 +#endif \ No newline at end of file diff --git a/third_party/boringssl/linux-arm/crypto/sha/sha1-armv4-large.S b/third_party/boringssl/linux-arm/crypto/sha/sha1-armv4-large.S new file mode 100644 index 00000000000..4911458b262 --- /dev/null +++ b/third_party/boringssl/linux-arm/crypto/sha/sha1-armv4-large.S @@ -0,0 +1,1462 @@ +#if defined(__arm__) +#include "arm_arch.h" + +.text +.code 32 + +.globl sha1_block_data_order +.type sha1_block_data_order,%function + +.align 5 +sha1_block_data_order: +#if __ARM_MAX_ARCH__>=7 + sub r3,pc,#8 @ sha1_block_data_order + ldr r12,.LOPENSSL_armcap + ldr r12,[r3,r12] @ OPENSSL_armcap_P +#ifdef __APPLE__ + ldr r12,[r12] +#endif + tst r12,#ARMV8_SHA1 + bne .LARMv8 + tst r12,#ARMV7_NEON + bne .LNEON +#endif + stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} + add r2,r1,r2,lsl#6 @ r2 to point at the end of r1 + ldmia r0,{r3,r4,r5,r6,r7} +.Lloop: + ldr r8,.LK_00_19 + mov r14,sp + sub sp,sp,#15*4 + mov r5,r5,ror#30 + mov r6,r6,ror#30 + mov r7,r7,ror#30 @ [6] +.L_00_15: +#if __ARM_ARCH__<7 + ldrb r10,[r1,#2] + ldrb r9,[r1,#3] + ldrb r11,[r1,#1] + add r7,r8,r7,ror#2 @ E+=K_00_19 + ldrb r12,[r1],#4 + orr r9,r9,r10,lsl#8 + eor r10,r5,r6 @ F_xx_xx + orr r9,r9,r11,lsl#16 + add r7,r7,r3,ror#27 @ E+=ROR(A,27) + orr r9,r9,r12,lsl#24 +#else + ldr r9,[r1],#4 @ handles unaligned + add r7,r8,r7,ror#2 @ E+=K_00_19 + eor r10,r5,r6 @ F_xx_xx + add r7,r7,r3,ror#27 @ E+=ROR(A,27) +#ifdef __ARMEL__ + rev r9,r9 @ byte swap +#endif +#endif + and r10,r4,r10,ror#2 + add r7,r7,r9 @ E+=X[i] + eor r10,r10,r6,ror#2 @ F_00_19(B,C,D) + str r9,[r14,#-4]! + add r7,r7,r10 @ E+=F_00_19(B,C,D) +#if __ARM_ARCH__<7 + ldrb r10,[r1,#2] + ldrb r9,[r1,#3] + ldrb r11,[r1,#1] + add r6,r8,r6,ror#2 @ E+=K_00_19 + ldrb r12,[r1],#4 + orr r9,r9,r10,lsl#8 + eor r10,r4,r5 @ F_xx_xx + orr r9,r9,r11,lsl#16 + add r6,r6,r7,ror#27 @ E+=ROR(A,27) + orr r9,r9,r12,lsl#24 +#else + ldr r9,[r1],#4 @ handles unaligned + add r6,r8,r6,ror#2 @ E+=K_00_19 + eor r10,r4,r5 @ F_xx_xx + add r6,r6,r7,ror#27 @ E+=ROR(A,27) +#ifdef __ARMEL__ + rev r9,r9 @ byte swap +#endif +#endif + and r10,r3,r10,ror#2 + add r6,r6,r9 @ E+=X[i] + eor r10,r10,r5,ror#2 @ F_00_19(B,C,D) + str r9,[r14,#-4]! + add r6,r6,r10 @ E+=F_00_19(B,C,D) +#if __ARM_ARCH__<7 + ldrb r10,[r1,#2] + ldrb r9,[r1,#3] + ldrb r11,[r1,#1] + add r5,r8,r5,ror#2 @ E+=K_00_19 + ldrb r12,[r1],#4 + orr r9,r9,r10,lsl#8 + eor r10,r3,r4 @ F_xx_xx + orr r9,r9,r11,lsl#16 + add r5,r5,r6,ror#27 @ E+=ROR(A,27) + orr r9,r9,r12,lsl#24 +#else + ldr r9,[r1],#4 @ handles unaligned + add r5,r8,r5,ror#2 @ E+=K_00_19 + eor r10,r3,r4 @ F_xx_xx + add r5,r5,r6,ror#27 @ E+=ROR(A,27) +#ifdef __ARMEL__ + rev r9,r9 @ byte swap +#endif +#endif + and r10,r7,r10,ror#2 + add r5,r5,r9 @ E+=X[i] + eor r10,r10,r4,ror#2 @ F_00_19(B,C,D) + str r9,[r14,#-4]! + add r5,r5,r10 @ E+=F_00_19(B,C,D) +#if __ARM_ARCH__<7 + ldrb r10,[r1,#2] + ldrb r9,[r1,#3] + ldrb r11,[r1,#1] + add r4,r8,r4,ror#2 @ E+=K_00_19 + ldrb r12,[r1],#4 + orr r9,r9,r10,lsl#8 + eor r10,r7,r3 @ F_xx_xx + orr r9,r9,r11,lsl#16 + add r4,r4,r5,ror#27 @ E+=ROR(A,27) + orr r9,r9,r12,lsl#24 +#else + ldr r9,[r1],#4 @ handles unaligned + add r4,r8,r4,ror#2 @ E+=K_00_19 + eor r10,r7,r3 @ F_xx_xx + add r4,r4,r5,ror#27 @ E+=ROR(A,27) +#ifdef __ARMEL__ + rev r9,r9 @ byte swap +#endif +#endif + and r10,r6,r10,ror#2 + add r4,r4,r9 @ E+=X[i] + eor r10,r10,r3,ror#2 @ F_00_19(B,C,D) + str r9,[r14,#-4]! + add r4,r4,r10 @ E+=F_00_19(B,C,D) +#if __ARM_ARCH__<7 + ldrb r10,[r1,#2] + ldrb r9,[r1,#3] + ldrb r11,[r1,#1] + add r3,r8,r3,ror#2 @ E+=K_00_19 + ldrb r12,[r1],#4 + orr r9,r9,r10,lsl#8 + eor r10,r6,r7 @ F_xx_xx + orr r9,r9,r11,lsl#16 + add r3,r3,r4,ror#27 @ E+=ROR(A,27) + orr r9,r9,r12,lsl#24 +#else + ldr r9,[r1],#4 @ handles unaligned + add r3,r8,r3,ror#2 @ E+=K_00_19 + eor r10,r6,r7 @ F_xx_xx + add r3,r3,r4,ror#27 @ E+=ROR(A,27) +#ifdef __ARMEL__ + rev r9,r9 @ byte swap +#endif +#endif + and r10,r5,r10,ror#2 + add r3,r3,r9 @ E+=X[i] + eor r10,r10,r7,ror#2 @ F_00_19(B,C,D) + str r9,[r14,#-4]! + add r3,r3,r10 @ E+=F_00_19(B,C,D) + teq r14,sp + bne .L_00_15 @ [((11+4)*5+2)*3] + sub sp,sp,#25*4 +#if __ARM_ARCH__<7 + ldrb r10,[r1,#2] + ldrb r9,[r1,#3] + ldrb r11,[r1,#1] + add r7,r8,r7,ror#2 @ E+=K_00_19 + ldrb r12,[r1],#4 + orr r9,r9,r10,lsl#8 + eor r10,r5,r6 @ F_xx_xx + orr r9,r9,r11,lsl#16 + add r7,r7,r3,ror#27 @ E+=ROR(A,27) + orr r9,r9,r12,lsl#24 +#else + ldr r9,[r1],#4 @ handles unaligned + add r7,r8,r7,ror#2 @ E+=K_00_19 + eor r10,r5,r6 @ F_xx_xx + add r7,r7,r3,ror#27 @ E+=ROR(A,27) +#ifdef __ARMEL__ + rev r9,r9 @ byte swap +#endif +#endif + and r10,r4,r10,ror#2 + add r7,r7,r9 @ E+=X[i] + eor r10,r10,r6,ror#2 @ F_00_19(B,C,D) + str r9,[r14,#-4]! + add r7,r7,r10 @ E+=F_00_19(B,C,D) + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r6,r8,r6,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r4,r5 @ F_xx_xx + mov r9,r9,ror#31 + add r6,r6,r7,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + and r10,r3,r10,ror#2 @ F_xx_xx + @ F_xx_xx + add r6,r6,r9 @ E+=X[i] + eor r10,r10,r5,ror#2 @ F_00_19(B,C,D) + add r6,r6,r10 @ E+=F_00_19(B,C,D) + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r5,r8,r5,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r3,r4 @ F_xx_xx + mov r9,r9,ror#31 + add r5,r5,r6,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + and r10,r7,r10,ror#2 @ F_xx_xx + @ F_xx_xx + add r5,r5,r9 @ E+=X[i] + eor r10,r10,r4,ror#2 @ F_00_19(B,C,D) + add r5,r5,r10 @ E+=F_00_19(B,C,D) + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r4,r8,r4,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r7,r3 @ F_xx_xx + mov r9,r9,ror#31 + add r4,r4,r5,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + and r10,r6,r10,ror#2 @ F_xx_xx + @ F_xx_xx + add r4,r4,r9 @ E+=X[i] + eor r10,r10,r3,ror#2 @ F_00_19(B,C,D) + add r4,r4,r10 @ E+=F_00_19(B,C,D) + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r3,r8,r3,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r6,r7 @ F_xx_xx + mov r9,r9,ror#31 + add r3,r3,r4,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + and r10,r5,r10,ror#2 @ F_xx_xx + @ F_xx_xx + add r3,r3,r9 @ E+=X[i] + eor r10,r10,r7,ror#2 @ F_00_19(B,C,D) + add r3,r3,r10 @ E+=F_00_19(B,C,D) + + ldr r8,.LK_20_39 @ [+15+16*4] + cmn sp,#0 @ [+3], clear carry to denote 20_39 +.L_20_39_or_60_79: + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r7,r8,r7,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r5,r6 @ F_xx_xx + mov r9,r9,ror#31 + add r7,r7,r3,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + eor r10,r4,r10,ror#2 @ F_xx_xx + @ F_xx_xx + add r7,r7,r9 @ E+=X[i] + add r7,r7,r10 @ E+=F_20_39(B,C,D) + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r6,r8,r6,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r4,r5 @ F_xx_xx + mov r9,r9,ror#31 + add r6,r6,r7,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + eor r10,r3,r10,ror#2 @ F_xx_xx + @ F_xx_xx + add r6,r6,r9 @ E+=X[i] + add r6,r6,r10 @ E+=F_20_39(B,C,D) + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r5,r8,r5,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r3,r4 @ F_xx_xx + mov r9,r9,ror#31 + add r5,r5,r6,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + eor r10,r7,r10,ror#2 @ F_xx_xx + @ F_xx_xx + add r5,r5,r9 @ E+=X[i] + add r5,r5,r10 @ E+=F_20_39(B,C,D) + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r4,r8,r4,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r7,r3 @ F_xx_xx + mov r9,r9,ror#31 + add r4,r4,r5,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + eor r10,r6,r10,ror#2 @ F_xx_xx + @ F_xx_xx + add r4,r4,r9 @ E+=X[i] + add r4,r4,r10 @ E+=F_20_39(B,C,D) + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r3,r8,r3,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r6,r7 @ F_xx_xx + mov r9,r9,ror#31 + add r3,r3,r4,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + eor r10,r5,r10,ror#2 @ F_xx_xx + @ F_xx_xx + add r3,r3,r9 @ E+=X[i] + add r3,r3,r10 @ E+=F_20_39(B,C,D) + teq r14,sp @ preserve carry + bne .L_20_39_or_60_79 @ [+((12+3)*5+2)*4] + bcs .L_done @ [+((12+3)*5+2)*4], spare 300 bytes + + ldr r8,.LK_40_59 + sub sp,sp,#20*4 @ [+2] +.L_40_59: + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r7,r8,r7,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r5,r6 @ F_xx_xx + mov r9,r9,ror#31 + add r7,r7,r3,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + and r10,r4,r10,ror#2 @ F_xx_xx + and r11,r5,r6 @ F_xx_xx + add r7,r7,r9 @ E+=X[i] + add r7,r7,r10 @ E+=F_40_59(B,C,D) + add r7,r7,r11,ror#2 + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r6,r8,r6,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r4,r5 @ F_xx_xx + mov r9,r9,ror#31 + add r6,r6,r7,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + and r10,r3,r10,ror#2 @ F_xx_xx + and r11,r4,r5 @ F_xx_xx + add r6,r6,r9 @ E+=X[i] + add r6,r6,r10 @ E+=F_40_59(B,C,D) + add r6,r6,r11,ror#2 + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r5,r8,r5,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r3,r4 @ F_xx_xx + mov r9,r9,ror#31 + add r5,r5,r6,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + and r10,r7,r10,ror#2 @ F_xx_xx + and r11,r3,r4 @ F_xx_xx + add r5,r5,r9 @ E+=X[i] + add r5,r5,r10 @ E+=F_40_59(B,C,D) + add r5,r5,r11,ror#2 + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r4,r8,r4,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r7,r3 @ F_xx_xx + mov r9,r9,ror#31 + add r4,r4,r5,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + and r10,r6,r10,ror#2 @ F_xx_xx + and r11,r7,r3 @ F_xx_xx + add r4,r4,r9 @ E+=X[i] + add r4,r4,r10 @ E+=F_40_59(B,C,D) + add r4,r4,r11,ror#2 + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r3,r8,r3,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r6,r7 @ F_xx_xx + mov r9,r9,ror#31 + add r3,r3,r4,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + and r10,r5,r10,ror#2 @ F_xx_xx + and r11,r6,r7 @ F_xx_xx + add r3,r3,r9 @ E+=X[i] + add r3,r3,r10 @ E+=F_40_59(B,C,D) + add r3,r3,r11,ror#2 + teq r14,sp + bne .L_40_59 @ [+((12+5)*5+2)*4] + + ldr r8,.LK_60_79 + sub sp,sp,#20*4 + cmp sp,#0 @ set carry to denote 60_79 + b .L_20_39_or_60_79 @ [+4], spare 300 bytes +.L_done: + add sp,sp,#80*4 @ "deallocate" stack frame + ldmia r0,{r8,r9,r10,r11,r12} + add r3,r8,r3 + add r4,r9,r4 + add r5,r10,r5,ror#2 + add r6,r11,r6,ror#2 + add r7,r12,r7,ror#2 + stmia r0,{r3,r4,r5,r6,r7} + teq r1,r2 + bne .Lloop @ [+18], total 1307 + +#if __ARM_ARCH__>=5 + ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc} +#else + ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} + tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet +.word 0xe12fff1e @ interoperable with Thumb ISA:-) +#endif +.size sha1_block_data_order,.-sha1_block_data_order + +.align 5 +.LK_00_19:.word 0x5a827999 +.LK_20_39:.word 0x6ed9eba1 +.LK_40_59:.word 0x8f1bbcdc +.LK_60_79:.word 0xca62c1d6 +#if __ARM_MAX_ARCH__>=7 +.LOPENSSL_armcap: +.word OPENSSL_armcap_P-sha1_block_data_order +#endif +.byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,47,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 2 +.align 5 +#if __ARM_MAX_ARCH__>=7 +.arch armv7-a +.fpu neon + +.type sha1_block_data_order_neon,%function +.align 4 +sha1_block_data_order_neon: +.LNEON: + stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} + add r2,r1,r2,lsl#6 @ r2 to point at the end of r1 + @ dmb @ errata #451034 on early Cortex A8 + @ vstmdb sp!,{d8-d15} @ ABI specification says so + mov r14,sp + sub sp,sp,#64 @ alloca + adr r8,.LK_00_19 + bic sp,sp,#15 @ align for 128-bit stores + + ldmia r0,{r3,r4,r5,r6,r7} @ load context + mov r12,sp + + vld1.8 {q0,q1},[r1]! @ handles unaligned + veor q15,q15,q15 + vld1.8 {q2,q3},[r1]! + vld1.32 {d28[],d29[]},[r8,:32]! @ load K_00_19 + vrev32.8 q0,q0 @ yes, even on + vrev32.8 q1,q1 @ big-endian... + vrev32.8 q2,q2 + vadd.i32 q8,q0,q14 + vrev32.8 q3,q3 + vadd.i32 q9,q1,q14 + vst1.32 {q8},[r12,:128]! + vadd.i32 q10,q2,q14 + vst1.32 {q9},[r12,:128]! + vst1.32 {q10},[r12,:128]! + ldr r9,[sp] @ big RAW stall + +.Loop_neon: + vext.8 q8,q0,q1,#8 + bic r10,r6,r4 + add r7,r7,r9 + and r11,r5,r4 + vadd.i32 q13,q3,q14 + ldr r9,[sp,#4] + add r7,r7,r3,ror#27 + vext.8 q12,q3,q15,#4 + eor r11,r11,r10 + mov r4,r4,ror#2 + add r7,r7,r11 + veor q8,q8,q0 + bic r10,r5,r3 + add r6,r6,r9 + veor q12,q12,q2 + and r11,r4,r3 + ldr r9,[sp,#8] + veor q12,q12,q8 + add r6,r6,r7,ror#27 + eor r11,r11,r10 + vst1.32 {q13},[r12,:128]! + sub r12,r12,#64 + mov r3,r3,ror#2 + add r6,r6,r11 + vext.8 q13,q15,q12,#4 + bic r10,r4,r7 + add r5,r5,r9 + vadd.i32 q8,q12,q12 + and r11,r3,r7 + ldr r9,[sp,#12] + vsri.32 q8,q12,#31 + add r5,r5,r6,ror#27 + eor r11,r11,r10 + mov r7,r7,ror#2 + vshr.u32 q12,q13,#30 + add r5,r5,r11 + bic r10,r3,r6 + vshl.u32 q13,q13,#2 + add r4,r4,r9 + and r11,r7,r6 + veor q8,q8,q12 + ldr r9,[sp,#16] + add r4,r4,r5,ror#27 + veor q8,q8,q13 + eor r11,r11,r10 + mov r6,r6,ror#2 + add r4,r4,r11 + vext.8 q9,q1,q2,#8 + bic r10,r7,r5 + add r3,r3,r9 + and r11,r6,r5 + vadd.i32 q13,q8,q14 + ldr r9,[sp,#20] + vld1.32 {d28[],d29[]},[r8,:32]! + add r3,r3,r4,ror#27 + vext.8 q12,q8,q15,#4 + eor r11,r11,r10 + mov r5,r5,ror#2 + add r3,r3,r11 + veor q9,q9,q1 + bic r10,r6,r4 + add r7,r7,r9 + veor q12,q12,q3 + and r11,r5,r4 + ldr r9,[sp,#24] + veor q12,q12,q9 + add r7,r7,r3,ror#27 + eor r11,r11,r10 + vst1.32 {q13},[r12,:128]! + mov r4,r4,ror#2 + add r7,r7,r11 + vext.8 q13,q15,q12,#4 + bic r10,r5,r3 + add r6,r6,r9 + vadd.i32 q9,q12,q12 + and r11,r4,r3 + ldr r9,[sp,#28] + vsri.32 q9,q12,#31 + add r6,r6,r7,ror#27 + eor r11,r11,r10 + mov r3,r3,ror#2 + vshr.u32 q12,q13,#30 + add r6,r6,r11 + bic r10,r4,r7 + vshl.u32 q13,q13,#2 + add r5,r5,r9 + and r11,r3,r7 + veor q9,q9,q12 + ldr r9,[sp,#32] + add r5,r5,r6,ror#27 + veor q9,q9,q13 + eor r11,r11,r10 + mov r7,r7,ror#2 + add r5,r5,r11 + vext.8 q10,q2,q3,#8 + bic r10,r3,r6 + add r4,r4,r9 + and r11,r7,r6 + vadd.i32 q13,q9,q14 + ldr r9,[sp,#36] + add r4,r4,r5,ror#27 + vext.8 q12,q9,q15,#4 + eor r11,r11,r10 + mov r6,r6,ror#2 + add r4,r4,r11 + veor q10,q10,q2 + bic r10,r7,r5 + add r3,r3,r9 + veor q12,q12,q8 + and r11,r6,r5 + ldr r9,[sp,#40] + veor q12,q12,q10 + add r3,r3,r4,ror#27 + eor r11,r11,r10 + vst1.32 {q13},[r12,:128]! + mov r5,r5,ror#2 + add r3,r3,r11 + vext.8 q13,q15,q12,#4 + bic r10,r6,r4 + add r7,r7,r9 + vadd.i32 q10,q12,q12 + and r11,r5,r4 + ldr r9,[sp,#44] + vsri.32 q10,q12,#31 + add r7,r7,r3,ror#27 + eor r11,r11,r10 + mov r4,r4,ror#2 + vshr.u32 q12,q13,#30 + add r7,r7,r11 + bic r10,r5,r3 + vshl.u32 q13,q13,#2 + add r6,r6,r9 + and r11,r4,r3 + veor q10,q10,q12 + ldr r9,[sp,#48] + add r6,r6,r7,ror#27 + veor q10,q10,q13 + eor r11,r11,r10 + mov r3,r3,ror#2 + add r6,r6,r11 + vext.8 q11,q3,q8,#8 + bic r10,r4,r7 + add r5,r5,r9 + and r11,r3,r7 + vadd.i32 q13,q10,q14 + ldr r9,[sp,#52] + add r5,r5,r6,ror#27 + vext.8 q12,q10,q15,#4 + eor r11,r11,r10 + mov r7,r7,ror#2 + add r5,r5,r11 + veor q11,q11,q3 + bic r10,r3,r6 + add r4,r4,r9 + veor q12,q12,q9 + and r11,r7,r6 + ldr r9,[sp,#56] + veor q12,q12,q11 + add r4,r4,r5,ror#27 + eor r11,r11,r10 + vst1.32 {q13},[r12,:128]! + mov r6,r6,ror#2 + add r4,r4,r11 + vext.8 q13,q15,q12,#4 + bic r10,r7,r5 + add r3,r3,r9 + vadd.i32 q11,q12,q12 + and r11,r6,r5 + ldr r9,[sp,#60] + vsri.32 q11,q12,#31 + add r3,r3,r4,ror#27 + eor r11,r11,r10 + mov r5,r5,ror#2 + vshr.u32 q12,q13,#30 + add r3,r3,r11 + bic r10,r6,r4 + vshl.u32 q13,q13,#2 + add r7,r7,r9 + and r11,r5,r4 + veor q11,q11,q12 + ldr r9,[sp,#0] + add r7,r7,r3,ror#27 + veor q11,q11,q13 + eor r11,r11,r10 + mov r4,r4,ror#2 + add r7,r7,r11 + vext.8 q12,q10,q11,#8 + bic r10,r5,r3 + add r6,r6,r9 + and r11,r4,r3 + veor q0,q0,q8 + ldr r9,[sp,#4] + add r6,r6,r7,ror#27 + veor q0,q0,q1 + eor r11,r11,r10 + mov r3,r3,ror#2 + vadd.i32 q13,q11,q14 + add r6,r6,r11 + bic r10,r4,r7 + veor q12,q12,q0 + add r5,r5,r9 + and r11,r3,r7 + vshr.u32 q0,q12,#30 + ldr r9,[sp,#8] + add r5,r5,r6,ror#27 + vst1.32 {q13},[r12,:128]! + sub r12,r12,#64 + eor r11,r11,r10 + mov r7,r7,ror#2 + vsli.32 q0,q12,#2 + add r5,r5,r11 + bic r10,r3,r6 + add r4,r4,r9 + and r11,r7,r6 + ldr r9,[sp,#12] + add r4,r4,r5,ror#27 + eor r11,r11,r10 + mov r6,r6,ror#2 + add r4,r4,r11 + bic r10,r7,r5 + add r3,r3,r9 + and r11,r6,r5 + ldr r9,[sp,#16] + add r3,r3,r4,ror#27 + eor r11,r11,r10 + mov r5,r5,ror#2 + add r3,r3,r11 + vext.8 q12,q11,q0,#8 + eor r10,r4,r6 + add r7,r7,r9 + ldr r9,[sp,#20] + veor q1,q1,q9 + eor r11,r10,r5 + add r7,r7,r3,ror#27 + veor q1,q1,q2 + mov r4,r4,ror#2 + add r7,r7,r11 + vadd.i32 q13,q0,q14 + eor r10,r3,r5 + add r6,r6,r9 + veor q12,q12,q1 + ldr r9,[sp,#24] + eor r11,r10,r4 + vshr.u32 q1,q12,#30 + add r6,r6,r7,ror#27 + mov r3,r3,ror#2 + vst1.32 {q13},[r12,:128]! + add r6,r6,r11 + eor r10,r7,r4 + vsli.32 q1,q12,#2 + add r5,r5,r9 + ldr r9,[sp,#28] + eor r11,r10,r3 + add r5,r5,r6,ror#27 + mov r7,r7,ror#2 + add r5,r5,r11 + eor r10,r6,r3 + add r4,r4,r9 + ldr r9,[sp,#32] + eor r11,r10,r7 + add r4,r4,r5,ror#27 + mov r6,r6,ror#2 + add r4,r4,r11 + vext.8 q12,q0,q1,#8 + eor r10,r5,r7 + add r3,r3,r9 + ldr r9,[sp,#36] + veor q2,q2,q10 + eor r11,r10,r6 + add r3,r3,r4,ror#27 + veor q2,q2,q3 + mov r5,r5,ror#2 + add r3,r3,r11 + vadd.i32 q13,q1,q14 + eor r10,r4,r6 + vld1.32 {d28[],d29[]},[r8,:32]! + add r7,r7,r9 + veor q12,q12,q2 + ldr r9,[sp,#40] + eor r11,r10,r5 + vshr.u32 q2,q12,#30 + add r7,r7,r3,ror#27 + mov r4,r4,ror#2 + vst1.32 {q13},[r12,:128]! + add r7,r7,r11 + eor r10,r3,r5 + vsli.32 q2,q12,#2 + add r6,r6,r9 + ldr r9,[sp,#44] + eor r11,r10,r4 + add r6,r6,r7,ror#27 + mov r3,r3,ror#2 + add r6,r6,r11 + eor r10,r7,r4 + add r5,r5,r9 + ldr r9,[sp,#48] + eor r11,r10,r3 + add r5,r5,r6,ror#27 + mov r7,r7,ror#2 + add r5,r5,r11 + vext.8 q12,q1,q2,#8 + eor r10,r6,r3 + add r4,r4,r9 + ldr r9,[sp,#52] + veor q3,q3,q11 + eor r11,r10,r7 + add r4,r4,r5,ror#27 + veor q3,q3,q8 + mov r6,r6,ror#2 + add r4,r4,r11 + vadd.i32 q13,q2,q14 + eor r10,r5,r7 + add r3,r3,r9 + veor q12,q12,q3 + ldr r9,[sp,#56] + eor r11,r10,r6 + vshr.u32 q3,q12,#30 + add r3,r3,r4,ror#27 + mov r5,r5,ror#2 + vst1.32 {q13},[r12,:128]! + add r3,r3,r11 + eor r10,r4,r6 + vsli.32 q3,q12,#2 + add r7,r7,r9 + ldr r9,[sp,#60] + eor r11,r10,r5 + add r7,r7,r3,ror#27 + mov r4,r4,ror#2 + add r7,r7,r11 + eor r10,r3,r5 + add r6,r6,r9 + ldr r9,[sp,#0] + eor r11,r10,r4 + add r6,r6,r7,ror#27 + mov r3,r3,ror#2 + add r6,r6,r11 + vext.8 q12,q2,q3,#8 + eor r10,r7,r4 + add r5,r5,r9 + ldr r9,[sp,#4] + veor q8,q8,q0 + eor r11,r10,r3 + add r5,r5,r6,ror#27 + veor q8,q8,q9 + mov r7,r7,ror#2 + add r5,r5,r11 + vadd.i32 q13,q3,q14 + eor r10,r6,r3 + add r4,r4,r9 + veor q12,q12,q8 + ldr r9,[sp,#8] + eor r11,r10,r7 + vshr.u32 q8,q12,#30 + add r4,r4,r5,ror#27 + mov r6,r6,ror#2 + vst1.32 {q13},[r12,:128]! + sub r12,r12,#64 + add r4,r4,r11 + eor r10,r5,r7 + vsli.32 q8,q12,#2 + add r3,r3,r9 + ldr r9,[sp,#12] + eor r11,r10,r6 + add r3,r3,r4,ror#27 + mov r5,r5,ror#2 + add r3,r3,r11 + eor r10,r4,r6 + add r7,r7,r9 + ldr r9,[sp,#16] + eor r11,r10,r5 + add r7,r7,r3,ror#27 + mov r4,r4,ror#2 + add r7,r7,r11 + vext.8 q12,q3,q8,#8 + eor r10,r3,r5 + add r6,r6,r9 + ldr r9,[sp,#20] + veor q9,q9,q1 + eor r11,r10,r4 + add r6,r6,r7,ror#27 + veor q9,q9,q10 + mov r3,r3,ror#2 + add r6,r6,r11 + vadd.i32 q13,q8,q14 + eor r10,r7,r4 + add r5,r5,r9 + veor q12,q12,q9 + ldr r9,[sp,#24] + eor r11,r10,r3 + vshr.u32 q9,q12,#30 + add r5,r5,r6,ror#27 + mov r7,r7,ror#2 + vst1.32 {q13},[r12,:128]! + add r5,r5,r11 + eor r10,r6,r3 + vsli.32 q9,q12,#2 + add r4,r4,r9 + ldr r9,[sp,#28] + eor r11,r10,r7 + add r4,r4,r5,ror#27 + mov r6,r6,ror#2 + add r4,r4,r11 + eor r10,r5,r7 + add r3,r3,r9 + ldr r9,[sp,#32] + eor r11,r10,r6 + add r3,r3,r4,ror#27 + mov r5,r5,ror#2 + add r3,r3,r11 + vext.8 q12,q8,q9,#8 + add r7,r7,r9 + and r10,r5,r6 + ldr r9,[sp,#36] + veor q10,q10,q2 + add r7,r7,r3,ror#27 + eor r11,r5,r6 + veor q10,q10,q11 + add r7,r7,r10 + and r11,r11,r4 + vadd.i32 q13,q9,q14 + mov r4,r4,ror#2 + add r7,r7,r11 + veor q12,q12,q10 + add r6,r6,r9 + and r10,r4,r5 + vshr.u32 q10,q12,#30 + ldr r9,[sp,#40] + add r6,r6,r7,ror#27 + vst1.32 {q13},[r12,:128]! + eor r11,r4,r5 + add r6,r6,r10 + vsli.32 q10,q12,#2 + and r11,r11,r3 + mov r3,r3,ror#2 + add r6,r6,r11 + add r5,r5,r9 + and r10,r3,r4 + ldr r9,[sp,#44] + add r5,r5,r6,ror#27 + eor r11,r3,r4 + add r5,r5,r10 + and r11,r11,r7 + mov r7,r7,ror#2 + add r5,r5,r11 + add r4,r4,r9 + and r10,r7,r3 + ldr r9,[sp,#48] + add r4,r4,r5,ror#27 + eor r11,r7,r3 + add r4,r4,r10 + and r11,r11,r6 + mov r6,r6,ror#2 + add r4,r4,r11 + vext.8 q12,q9,q10,#8 + add r3,r3,r9 + and r10,r6,r7 + ldr r9,[sp,#52] + veor q11,q11,q3 + add r3,r3,r4,ror#27 + eor r11,r6,r7 + veor q11,q11,q0 + add r3,r3,r10 + and r11,r11,r5 + vadd.i32 q13,q10,q14 + mov r5,r5,ror#2 + vld1.32 {d28[],d29[]},[r8,:32]! + add r3,r3,r11 + veor q12,q12,q11 + add r7,r7,r9 + and r10,r5,r6 + vshr.u32 q11,q12,#30 + ldr r9,[sp,#56] + add r7,r7,r3,ror#27 + vst1.32 {q13},[r12,:128]! + eor r11,r5,r6 + add r7,r7,r10 + vsli.32 q11,q12,#2 + and r11,r11,r4 + mov r4,r4,ror#2 + add r7,r7,r11 + add r6,r6,r9 + and r10,r4,r5 + ldr r9,[sp,#60] + add r6,r6,r7,ror#27 + eor r11,r4,r5 + add r6,r6,r10 + and r11,r11,r3 + mov r3,r3,ror#2 + add r6,r6,r11 + add r5,r5,r9 + and r10,r3,r4 + ldr r9,[sp,#0] + add r5,r5,r6,ror#27 + eor r11,r3,r4 + add r5,r5,r10 + and r11,r11,r7 + mov r7,r7,ror#2 + add r5,r5,r11 + vext.8 q12,q10,q11,#8 + add r4,r4,r9 + and r10,r7,r3 + ldr r9,[sp,#4] + veor q0,q0,q8 + add r4,r4,r5,ror#27 + eor r11,r7,r3 + veor q0,q0,q1 + add r4,r4,r10 + and r11,r11,r6 + vadd.i32 q13,q11,q14 + mov r6,r6,ror#2 + add r4,r4,r11 + veor q12,q12,q0 + add r3,r3,r9 + and r10,r6,r7 + vshr.u32 q0,q12,#30 + ldr r9,[sp,#8] + add r3,r3,r4,ror#27 + vst1.32 {q13},[r12,:128]! + sub r12,r12,#64 + eor r11,r6,r7 + add r3,r3,r10 + vsli.32 q0,q12,#2 + and r11,r11,r5 + mov r5,r5,ror#2 + add r3,r3,r11 + add r7,r7,r9 + and r10,r5,r6 + ldr r9,[sp,#12] + add r7,r7,r3,ror#27 + eor r11,r5,r6 + add r7,r7,r10 + and r11,r11,r4 + mov r4,r4,ror#2 + add r7,r7,r11 + add r6,r6,r9 + and r10,r4,r5 + ldr r9,[sp,#16] + add r6,r6,r7,ror#27 + eor r11,r4,r5 + add r6,r6,r10 + and r11,r11,r3 + mov r3,r3,ror#2 + add r6,r6,r11 + vext.8 q12,q11,q0,#8 + add r5,r5,r9 + and r10,r3,r4 + ldr r9,[sp,#20] + veor q1,q1,q9 + add r5,r5,r6,ror#27 + eor r11,r3,r4 + veor q1,q1,q2 + add r5,r5,r10 + and r11,r11,r7 + vadd.i32 q13,q0,q14 + mov r7,r7,ror#2 + add r5,r5,r11 + veor q12,q12,q1 + add r4,r4,r9 + and r10,r7,r3 + vshr.u32 q1,q12,#30 + ldr r9,[sp,#24] + add r4,r4,r5,ror#27 + vst1.32 {q13},[r12,:128]! + eor r11,r7,r3 + add r4,r4,r10 + vsli.32 q1,q12,#2 + and r11,r11,r6 + mov r6,r6,ror#2 + add r4,r4,r11 + add r3,r3,r9 + and r10,r6,r7 + ldr r9,[sp,#28] + add r3,r3,r4,ror#27 + eor r11,r6,r7 + add r3,r3,r10 + and r11,r11,r5 + mov r5,r5,ror#2 + add r3,r3,r11 + add r7,r7,r9 + and r10,r5,r6 + ldr r9,[sp,#32] + add r7,r7,r3,ror#27 + eor r11,r5,r6 + add r7,r7,r10 + and r11,r11,r4 + mov r4,r4,ror#2 + add r7,r7,r11 + vext.8 q12,q0,q1,#8 + add r6,r6,r9 + and r10,r4,r5 + ldr r9,[sp,#36] + veor q2,q2,q10 + add r6,r6,r7,ror#27 + eor r11,r4,r5 + veor q2,q2,q3 + add r6,r6,r10 + and r11,r11,r3 + vadd.i32 q13,q1,q14 + mov r3,r3,ror#2 + add r6,r6,r11 + veor q12,q12,q2 + add r5,r5,r9 + and r10,r3,r4 + vshr.u32 q2,q12,#30 + ldr r9,[sp,#40] + add r5,r5,r6,ror#27 + vst1.32 {q13},[r12,:128]! + eor r11,r3,r4 + add r5,r5,r10 + vsli.32 q2,q12,#2 + and r11,r11,r7 + mov r7,r7,ror#2 + add r5,r5,r11 + add r4,r4,r9 + and r10,r7,r3 + ldr r9,[sp,#44] + add r4,r4,r5,ror#27 + eor r11,r7,r3 + add r4,r4,r10 + and r11,r11,r6 + mov r6,r6,ror#2 + add r4,r4,r11 + add r3,r3,r9 + and r10,r6,r7 + ldr r9,[sp,#48] + add r3,r3,r4,ror#27 + eor r11,r6,r7 + add r3,r3,r10 + and r11,r11,r5 + mov r5,r5,ror#2 + add r3,r3,r11 + vext.8 q12,q1,q2,#8 + eor r10,r4,r6 + add r7,r7,r9 + ldr r9,[sp,#52] + veor q3,q3,q11 + eor r11,r10,r5 + add r7,r7,r3,ror#27 + veor q3,q3,q8 + mov r4,r4,ror#2 + add r7,r7,r11 + vadd.i32 q13,q2,q14 + eor r10,r3,r5 + add r6,r6,r9 + veor q12,q12,q3 + ldr r9,[sp,#56] + eor r11,r10,r4 + vshr.u32 q3,q12,#30 + add r6,r6,r7,ror#27 + mov r3,r3,ror#2 + vst1.32 {q13},[r12,:128]! + add r6,r6,r11 + eor r10,r7,r4 + vsli.32 q3,q12,#2 + add r5,r5,r9 + ldr r9,[sp,#60] + eor r11,r10,r3 + add r5,r5,r6,ror#27 + mov r7,r7,ror#2 + add r5,r5,r11 + eor r10,r6,r3 + add r4,r4,r9 + ldr r9,[sp,#0] + eor r11,r10,r7 + add r4,r4,r5,ror#27 + mov r6,r6,ror#2 + add r4,r4,r11 + vadd.i32 q13,q3,q14 + eor r10,r5,r7 + add r3,r3,r9 + vst1.32 {q13},[r12,:128]! + sub r12,r12,#64 + teq r1,r2 + sub r8,r8,#16 + subeq r1,r1,#64 + vld1.8 {q0,q1},[r1]! + ldr r9,[sp,#4] + eor r11,r10,r6 + vld1.8 {q2,q3},[r1]! + add r3,r3,r4,ror#27 + mov r5,r5,ror#2 + vld1.32 {d28[],d29[]},[r8,:32]! + add r3,r3,r11 + eor r10,r4,r6 + vrev32.8 q0,q0 + add r7,r7,r9 + ldr r9,[sp,#8] + eor r11,r10,r5 + add r7,r7,r3,ror#27 + mov r4,r4,ror#2 + add r7,r7,r11 + eor r10,r3,r5 + add r6,r6,r9 + ldr r9,[sp,#12] + eor r11,r10,r4 + add r6,r6,r7,ror#27 + mov r3,r3,ror#2 + add r6,r6,r11 + eor r10,r7,r4 + add r5,r5,r9 + ldr r9,[sp,#16] + eor r11,r10,r3 + add r5,r5,r6,ror#27 + mov r7,r7,ror#2 + add r5,r5,r11 + vrev32.8 q1,q1 + eor r10,r6,r3 + add r4,r4,r9 + vadd.i32 q8,q0,q14 + ldr r9,[sp,#20] + eor r11,r10,r7 + vst1.32 {q8},[r12,:128]! + add r4,r4,r5,ror#27 + mov r6,r6,ror#2 + add r4,r4,r11 + eor r10,r5,r7 + add r3,r3,r9 + ldr r9,[sp,#24] + eor r11,r10,r6 + add r3,r3,r4,ror#27 + mov r5,r5,ror#2 + add r3,r3,r11 + eor r10,r4,r6 + add r7,r7,r9 + ldr r9,[sp,#28] + eor r11,r10,r5 + add r7,r7,r3,ror#27 + mov r4,r4,ror#2 + add r7,r7,r11 + eor r10,r3,r5 + add r6,r6,r9 + ldr r9,[sp,#32] + eor r11,r10,r4 + add r6,r6,r7,ror#27 + mov r3,r3,ror#2 + add r6,r6,r11 + vrev32.8 q2,q2 + eor r10,r7,r4 + add r5,r5,r9 + vadd.i32 q9,q1,q14 + ldr r9,[sp,#36] + eor r11,r10,r3 + vst1.32 {q9},[r12,:128]! + add r5,r5,r6,ror#27 + mov r7,r7,ror#2 + add r5,r5,r11 + eor r10,r6,r3 + add r4,r4,r9 + ldr r9,[sp,#40] + eor r11,r10,r7 + add r4,r4,r5,ror#27 + mov r6,r6,ror#2 + add r4,r4,r11 + eor r10,r5,r7 + add r3,r3,r9 + ldr r9,[sp,#44] + eor r11,r10,r6 + add r3,r3,r4,ror#27 + mov r5,r5,ror#2 + add r3,r3,r11 + eor r10,r4,r6 + add r7,r7,r9 + ldr r9,[sp,#48] + eor r11,r10,r5 + add r7,r7,r3,ror#27 + mov r4,r4,ror#2 + add r7,r7,r11 + vrev32.8 q3,q3 + eor r10,r3,r5 + add r6,r6,r9 + vadd.i32 q10,q2,q14 + ldr r9,[sp,#52] + eor r11,r10,r4 + vst1.32 {q10},[r12,:128]! + add r6,r6,r7,ror#27 + mov r3,r3,ror#2 + add r6,r6,r11 + eor r10,r7,r4 + add r5,r5,r9 + ldr r9,[sp,#56] + eor r11,r10,r3 + add r5,r5,r6,ror#27 + mov r7,r7,ror#2 + add r5,r5,r11 + eor r10,r6,r3 + add r4,r4,r9 + ldr r9,[sp,#60] + eor r11,r10,r7 + add r4,r4,r5,ror#27 + mov r6,r6,ror#2 + add r4,r4,r11 + eor r10,r5,r7 + add r3,r3,r9 + eor r11,r10,r6 + add r3,r3,r4,ror#27 + mov r5,r5,ror#2 + add r3,r3,r11 + ldmia r0,{r9,r10,r11,r12} @ accumulate context + add r3,r3,r9 + ldr r9,[r0,#16] + add r4,r4,r10 + add r5,r5,r11 + add r6,r6,r12 + moveq sp,r14 + add r7,r7,r9 + ldrne r9,[sp] + stmia r0,{r3,r4,r5,r6,r7} + addne r12,sp,#3*16 + bne .Loop_neon + + @ vldmia sp!,{d8-d15} + ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc} +.size sha1_block_data_order_neon,.-sha1_block_data_order_neon +#endif +#if __ARM_MAX_ARCH__>=7 +.type sha1_block_data_order_armv8,%function +.align 5 +sha1_block_data_order_armv8: +.LARMv8: + vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so + + veor q1,q1,q1 + adr r3,.LK_00_19 + vld1.32 {q0},[r0]! + vld1.32 {d2[0]},[r0] + sub r0,r0,#16 + vld1.32 {d16[],d17[]},[r3,:32]! + vld1.32 {d18[],d19[]},[r3,:32]! + vld1.32 {d20[],d21[]},[r3,:32]! + vld1.32 {d22[],d23[]},[r3,:32] + +.Loop_v8: + vld1.8 {q4,q5},[r1]! + vld1.8 {q6,q7},[r1]! + vrev32.8 q4,q4 + vrev32.8 q5,q5 + + vadd.i32 q12,q8,q4 + vrev32.8 q6,q6 + vmov q14,q0 @ offload + subs r2,r2,#1 + + vadd.i32 q13,q8,q5 + vrev32.8 q7,q7 +.byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 0 +.byte 0x68,0x0c,0x02,0xf2 @ sha1c q0,q1,q12 + vadd.i32 q12,q8,q6 +.byte 0x4c,0x8c,0x3a,0xf2 @ sha1su0 q4,q5,q6 +.byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 1 +.byte 0x6a,0x0c,0x06,0xf2 @ sha1c q0,q3,q13 + vadd.i32 q13,q8,q7 +.byte 0x8e,0x83,0xba,0xf3 @ sha1su1 q4,q7 +.byte 0x4e,0xac,0x3c,0xf2 @ sha1su0 q5,q6,q7 +.byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 2 +.byte 0x68,0x0c,0x04,0xf2 @ sha1c q0,q2,q12 + vadd.i32 q12,q8,q4 +.byte 0x88,0xa3,0xba,0xf3 @ sha1su1 q5,q4 +.byte 0x48,0xcc,0x3e,0xf2 @ sha1su0 q6,q7,q4 +.byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 3 +.byte 0x6a,0x0c,0x06,0xf2 @ sha1c q0,q3,q13 + vadd.i32 q13,q9,q5 +.byte 0x8a,0xc3,0xba,0xf3 @ sha1su1 q6,q5 +.byte 0x4a,0xec,0x38,0xf2 @ sha1su0 q7,q4,q5 +.byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 4 +.byte 0x68,0x0c,0x04,0xf2 @ sha1c q0,q2,q12 + vadd.i32 q12,q9,q6 +.byte 0x8c,0xe3,0xba,0xf3 @ sha1su1 q7,q6 +.byte 0x4c,0x8c,0x3a,0xf2 @ sha1su0 q4,q5,q6 +.byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 5 +.byte 0x6a,0x0c,0x16,0xf2 @ sha1p q0,q3,q13 + vadd.i32 q13,q9,q7 +.byte 0x8e,0x83,0xba,0xf3 @ sha1su1 q4,q7 +.byte 0x4e,0xac,0x3c,0xf2 @ sha1su0 q5,q6,q7 +.byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 6 +.byte 0x68,0x0c,0x14,0xf2 @ sha1p q0,q2,q12 + vadd.i32 q12,q9,q4 +.byte 0x88,0xa3,0xba,0xf3 @ sha1su1 q5,q4 +.byte 0x48,0xcc,0x3e,0xf2 @ sha1su0 q6,q7,q4 +.byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 7 +.byte 0x6a,0x0c,0x16,0xf2 @ sha1p q0,q3,q13 + vadd.i32 q13,q9,q5 +.byte 0x8a,0xc3,0xba,0xf3 @ sha1su1 q6,q5 +.byte 0x4a,0xec,0x38,0xf2 @ sha1su0 q7,q4,q5 +.byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 8 +.byte 0x68,0x0c,0x14,0xf2 @ sha1p q0,q2,q12 + vadd.i32 q12,q10,q6 +.byte 0x8c,0xe3,0xba,0xf3 @ sha1su1 q7,q6 +.byte 0x4c,0x8c,0x3a,0xf2 @ sha1su0 q4,q5,q6 +.byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 9 +.byte 0x6a,0x0c,0x16,0xf2 @ sha1p q0,q3,q13 + vadd.i32 q13,q10,q7 +.byte 0x8e,0x83,0xba,0xf3 @ sha1su1 q4,q7 +.byte 0x4e,0xac,0x3c,0xf2 @ sha1su0 q5,q6,q7 +.byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 10 +.byte 0x68,0x0c,0x24,0xf2 @ sha1m q0,q2,q12 + vadd.i32 q12,q10,q4 +.byte 0x88,0xa3,0xba,0xf3 @ sha1su1 q5,q4 +.byte 0x48,0xcc,0x3e,0xf2 @ sha1su0 q6,q7,q4 +.byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 11 +.byte 0x6a,0x0c,0x26,0xf2 @ sha1m q0,q3,q13 + vadd.i32 q13,q10,q5 +.byte 0x8a,0xc3,0xba,0xf3 @ sha1su1 q6,q5 +.byte 0x4a,0xec,0x38,0xf2 @ sha1su0 q7,q4,q5 +.byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 12 +.byte 0x68,0x0c,0x24,0xf2 @ sha1m q0,q2,q12 + vadd.i32 q12,q10,q6 +.byte 0x8c,0xe3,0xba,0xf3 @ sha1su1 q7,q6 +.byte 0x4c,0x8c,0x3a,0xf2 @ sha1su0 q4,q5,q6 +.byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 13 +.byte 0x6a,0x0c,0x26,0xf2 @ sha1m q0,q3,q13 + vadd.i32 q13,q11,q7 +.byte 0x8e,0x83,0xba,0xf3 @ sha1su1 q4,q7 +.byte 0x4e,0xac,0x3c,0xf2 @ sha1su0 q5,q6,q7 +.byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 14 +.byte 0x68,0x0c,0x24,0xf2 @ sha1m q0,q2,q12 + vadd.i32 q12,q11,q4 +.byte 0x88,0xa3,0xba,0xf3 @ sha1su1 q5,q4 +.byte 0x48,0xcc,0x3e,0xf2 @ sha1su0 q6,q7,q4 +.byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 15 +.byte 0x6a,0x0c,0x16,0xf2 @ sha1p q0,q3,q13 + vadd.i32 q13,q11,q5 +.byte 0x8a,0xc3,0xba,0xf3 @ sha1su1 q6,q5 +.byte 0x4a,0xec,0x38,0xf2 @ sha1su0 q7,q4,q5 +.byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 16 +.byte 0x68,0x0c,0x14,0xf2 @ sha1p q0,q2,q12 + vadd.i32 q12,q11,q6 +.byte 0x8c,0xe3,0xba,0xf3 @ sha1su1 q7,q6 +.byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 17 +.byte 0x6a,0x0c,0x16,0xf2 @ sha1p q0,q3,q13 + vadd.i32 q13,q11,q7 + +.byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 18 +.byte 0x68,0x0c,0x14,0xf2 @ sha1p q0,q2,q12 + +.byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 19 +.byte 0x6a,0x0c,0x16,0xf2 @ sha1p q0,q3,q13 + + vadd.i32 q1,q1,q2 + vadd.i32 q0,q0,q14 + bne .Loop_v8 + + vst1.32 {q0},[r0]! + vst1.32 {d2[0]},[r0] + + vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15} + bx lr @ bx lr +.size sha1_block_data_order_armv8,.-sha1_block_data_order_armv8 +#endif +#if __ARM_MAX_ARCH__>=7 +.comm OPENSSL_armcap_P,4,4 +.hidden OPENSSL_armcap_P +#endif +#endif \ No newline at end of file diff --git a/third_party/boringssl/linux-arm/crypto/sha/sha256-armv4.S b/third_party/boringssl/linux-arm/crypto/sha/sha256-armv4.S new file mode 100644 index 00000000000..ac9f2f164d8 --- /dev/null +++ b/third_party/boringssl/linux-arm/crypto/sha/sha256-armv4.S @@ -0,0 +1,2818 @@ +#if defined(__arm__) + +@ ==================================================================== +@ Written by Andy Polyakov for the OpenSSL +@ project. The module is, however, dual licensed under OpenSSL and +@ CRYPTOGAMS licenses depending on where you obtain it. For further +@ details see http://www.openssl.org/~appro/cryptogams/. +@ +@ Permission to use under GPL terms is granted. +@ ==================================================================== + +@ SHA256 block procedure for ARMv4. May 2007. + +@ Performance is ~2x better than gcc 3.4 generated code and in "abso- +@ lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per +@ byte [on single-issue Xscale PXA250 core]. + +@ July 2010. +@ +@ Rescheduling for dual-issue pipeline resulted in 22% improvement on +@ Cortex A8 core and ~20 cycles per processed byte. + +@ February 2011. +@ +@ Profiler-assisted and platform-specific optimization resulted in 16% +@ improvement on Cortex A8 core and ~15.4 cycles per processed byte. + +@ September 2013. +@ +@ Add NEON implementation. On Cortex A8 it was measured to process one +@ byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon +@ S4 does it in 12.5 cycles too, but it's 50% faster than integer-only +@ code (meaning that latter performs sub-optimally, nothing was done +@ about it). + +@ May 2014. +@ +@ Add ARMv8 code path performing at 2.0 cpb on Apple A7. + +#ifndef __KERNEL__ +# include "arm_arch.h" +#else +# define __ARM_ARCH__ __LINUX_ARM_ARCH__ +# define __ARM_MAX_ARCH__ 7 +#endif + +.text +#if __ARM_ARCH__<7 +.code 32 +#else +.syntax unified +# if defined(__thumb2__) && !defined(__APPLE__) +# define adrl adr +.thumb +# else +.code 32 +# endif +#endif + +.type K256,%object +.align 5 +K256: +.word 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.word 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.word 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.word 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.word 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc +.word 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.word 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.word 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 +.word 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.word 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.word 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.word 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.word 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.word 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.word 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.word 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +.size K256,.-K256 +.word 0 @ terminator +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) +.LOPENSSL_armcap: +.word OPENSSL_armcap_P-.Lsha256_block_data_order +#endif +.align 5 + +.globl sha256_block_data_order +.type sha256_block_data_order,%function +sha256_block_data_order: +.Lsha256_block_data_order: +#if __ARM_ARCH__<7 + sub r3,pc,#8 @ sha256_block_data_order +#else + adr r3,sha256_block_data_order +#endif +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) + ldr r12,.LOPENSSL_armcap + ldr r12,[r3,r12] @ OPENSSL_armcap_P +#ifdef __APPLE__ + ldr r12,[r12] +#endif + tst r12,#ARMV8_SHA256 + bne .LARMv8 + tst r12,#ARMV7_NEON + bne .LNEON +#endif + add r2,r1,r2,lsl#6 @ len to point at the end of inp + stmdb sp!,{r0,r1,r2,r4-r11,lr} + ldmia r0,{r4,r5,r6,r7,r8,r9,r10,r11} + sub r14,r3,#256+32 @ K256 + sub sp,sp,#16*4 @ alloca(X[16]) +.Loop: +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 +# else + ldrb r2,[r1,#3] +# endif + eor r3,r5,r6 @ magic + eor r12,r12,r12 +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 0 +# if 0==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r8,r8,ror#5 + add r4,r4,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r8,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 0 + add r4,r4,r12 @ h+=Maj(a,b,c) from the past + ldrb r12,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 0==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r8,r8,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r8,ror#19 @ Sigma1(e) +#endif + ldr r12,[r14],#4 @ *K256++ + add r11,r11,r2 @ h+=X[i] + str r2,[sp,#0*4] + eor r2,r9,r10 + add r11,r11,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r8 + add r11,r11,r12 @ h+=K256[i] + eor r2,r2,r10 @ Ch(e,f,g) + eor r0,r4,r4,ror#11 + add r11,r11,r2 @ h+=Ch(e,f,g) +#if 0==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 0<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r4,r5 @ a^b, b^c in next round +#else + ldr r2,[sp,#2*4] @ from future BODY_16_xx + eor r12,r4,r5 @ a^b, b^c in next round + ldr r1,[sp,#15*4] @ from future BODY_16_xx +#endif + eor r0,r0,r4,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r7,r7,r11 @ d+=h + eor r3,r3,r5 @ Maj(a,b,c) + add r11,r11,r0,ror#2 @ h+=Sigma0(a) + @ add r11,r11,r3 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 1 +# if 1==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r7,r7,ror#5 + add r11,r11,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r7,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 1 + add r11,r11,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 1==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r7,r7,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r7,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r10,r10,r2 @ h+=X[i] + str r2,[sp,#1*4] + eor r2,r8,r9 + add r10,r10,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r7 + add r10,r10,r3 @ h+=K256[i] + eor r2,r2,r9 @ Ch(e,f,g) + eor r0,r11,r11,ror#11 + add r10,r10,r2 @ h+=Ch(e,f,g) +#if 1==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 1<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r11,r4 @ a^b, b^c in next round +#else + ldr r2,[sp,#3*4] @ from future BODY_16_xx + eor r3,r11,r4 @ a^b, b^c in next round + ldr r1,[sp,#0*4] @ from future BODY_16_xx +#endif + eor r0,r0,r11,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r6,r6,r10 @ d+=h + eor r12,r12,r4 @ Maj(a,b,c) + add r10,r10,r0,ror#2 @ h+=Sigma0(a) + @ add r10,r10,r12 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 2 +# if 2==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r6,r6,ror#5 + add r10,r10,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r6,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 2 + add r10,r10,r12 @ h+=Maj(a,b,c) from the past + ldrb r12,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 2==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r6,r6,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r6,ror#19 @ Sigma1(e) +#endif + ldr r12,[r14],#4 @ *K256++ + add r9,r9,r2 @ h+=X[i] + str r2,[sp,#2*4] + eor r2,r7,r8 + add r9,r9,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r6 + add r9,r9,r12 @ h+=K256[i] + eor r2,r2,r8 @ Ch(e,f,g) + eor r0,r10,r10,ror#11 + add r9,r9,r2 @ h+=Ch(e,f,g) +#if 2==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 2<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r10,r11 @ a^b, b^c in next round +#else + ldr r2,[sp,#4*4] @ from future BODY_16_xx + eor r12,r10,r11 @ a^b, b^c in next round + ldr r1,[sp,#1*4] @ from future BODY_16_xx +#endif + eor r0,r0,r10,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r5,r5,r9 @ d+=h + eor r3,r3,r11 @ Maj(a,b,c) + add r9,r9,r0,ror#2 @ h+=Sigma0(a) + @ add r9,r9,r3 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 3 +# if 3==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r5,r5,ror#5 + add r9,r9,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r5,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 3 + add r9,r9,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 3==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r5,r5,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r5,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r8,r8,r2 @ h+=X[i] + str r2,[sp,#3*4] + eor r2,r6,r7 + add r8,r8,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r5 + add r8,r8,r3 @ h+=K256[i] + eor r2,r2,r7 @ Ch(e,f,g) + eor r0,r9,r9,ror#11 + add r8,r8,r2 @ h+=Ch(e,f,g) +#if 3==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 3<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r9,r10 @ a^b, b^c in next round +#else + ldr r2,[sp,#5*4] @ from future BODY_16_xx + eor r3,r9,r10 @ a^b, b^c in next round + ldr r1,[sp,#2*4] @ from future BODY_16_xx +#endif + eor r0,r0,r9,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r4,r4,r8 @ d+=h + eor r12,r12,r10 @ Maj(a,b,c) + add r8,r8,r0,ror#2 @ h+=Sigma0(a) + @ add r8,r8,r12 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 4 +# if 4==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r4,r4,ror#5 + add r8,r8,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r4,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 4 + add r8,r8,r12 @ h+=Maj(a,b,c) from the past + ldrb r12,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 4==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r4,r4,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r4,ror#19 @ Sigma1(e) +#endif + ldr r12,[r14],#4 @ *K256++ + add r7,r7,r2 @ h+=X[i] + str r2,[sp,#4*4] + eor r2,r5,r6 + add r7,r7,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r4 + add r7,r7,r12 @ h+=K256[i] + eor r2,r2,r6 @ Ch(e,f,g) + eor r0,r8,r8,ror#11 + add r7,r7,r2 @ h+=Ch(e,f,g) +#if 4==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 4<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r8,r9 @ a^b, b^c in next round +#else + ldr r2,[sp,#6*4] @ from future BODY_16_xx + eor r12,r8,r9 @ a^b, b^c in next round + ldr r1,[sp,#3*4] @ from future BODY_16_xx +#endif + eor r0,r0,r8,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r11,r11,r7 @ d+=h + eor r3,r3,r9 @ Maj(a,b,c) + add r7,r7,r0,ror#2 @ h+=Sigma0(a) + @ add r7,r7,r3 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 5 +# if 5==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r11,r11,ror#5 + add r7,r7,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r11,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 5 + add r7,r7,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 5==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r11,r11,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r11,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r6,r6,r2 @ h+=X[i] + str r2,[sp,#5*4] + eor r2,r4,r5 + add r6,r6,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r11 + add r6,r6,r3 @ h+=K256[i] + eor r2,r2,r5 @ Ch(e,f,g) + eor r0,r7,r7,ror#11 + add r6,r6,r2 @ h+=Ch(e,f,g) +#if 5==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 5<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r7,r8 @ a^b, b^c in next round +#else + ldr r2,[sp,#7*4] @ from future BODY_16_xx + eor r3,r7,r8 @ a^b, b^c in next round + ldr r1,[sp,#4*4] @ from future BODY_16_xx +#endif + eor r0,r0,r7,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r10,r10,r6 @ d+=h + eor r12,r12,r8 @ Maj(a,b,c) + add r6,r6,r0,ror#2 @ h+=Sigma0(a) + @ add r6,r6,r12 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 6 +# if 6==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r10,r10,ror#5 + add r6,r6,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r10,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 6 + add r6,r6,r12 @ h+=Maj(a,b,c) from the past + ldrb r12,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 6==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r10,r10,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r10,ror#19 @ Sigma1(e) +#endif + ldr r12,[r14],#4 @ *K256++ + add r5,r5,r2 @ h+=X[i] + str r2,[sp,#6*4] + eor r2,r11,r4 + add r5,r5,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r10 + add r5,r5,r12 @ h+=K256[i] + eor r2,r2,r4 @ Ch(e,f,g) + eor r0,r6,r6,ror#11 + add r5,r5,r2 @ h+=Ch(e,f,g) +#if 6==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 6<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r6,r7 @ a^b, b^c in next round +#else + ldr r2,[sp,#8*4] @ from future BODY_16_xx + eor r12,r6,r7 @ a^b, b^c in next round + ldr r1,[sp,#5*4] @ from future BODY_16_xx +#endif + eor r0,r0,r6,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r9,r9,r5 @ d+=h + eor r3,r3,r7 @ Maj(a,b,c) + add r5,r5,r0,ror#2 @ h+=Sigma0(a) + @ add r5,r5,r3 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 7 +# if 7==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r9,r9,ror#5 + add r5,r5,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r9,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 7 + add r5,r5,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 7==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r9,r9,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r9,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r4,r4,r2 @ h+=X[i] + str r2,[sp,#7*4] + eor r2,r10,r11 + add r4,r4,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r9 + add r4,r4,r3 @ h+=K256[i] + eor r2,r2,r11 @ Ch(e,f,g) + eor r0,r5,r5,ror#11 + add r4,r4,r2 @ h+=Ch(e,f,g) +#if 7==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 7<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r5,r6 @ a^b, b^c in next round +#else + ldr r2,[sp,#9*4] @ from future BODY_16_xx + eor r3,r5,r6 @ a^b, b^c in next round + ldr r1,[sp,#6*4] @ from future BODY_16_xx +#endif + eor r0,r0,r5,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r8,r8,r4 @ d+=h + eor r12,r12,r6 @ Maj(a,b,c) + add r4,r4,r0,ror#2 @ h+=Sigma0(a) + @ add r4,r4,r12 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 8 +# if 8==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r8,r8,ror#5 + add r4,r4,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r8,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 8 + add r4,r4,r12 @ h+=Maj(a,b,c) from the past + ldrb r12,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 8==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r8,r8,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r8,ror#19 @ Sigma1(e) +#endif + ldr r12,[r14],#4 @ *K256++ + add r11,r11,r2 @ h+=X[i] + str r2,[sp,#8*4] + eor r2,r9,r10 + add r11,r11,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r8 + add r11,r11,r12 @ h+=K256[i] + eor r2,r2,r10 @ Ch(e,f,g) + eor r0,r4,r4,ror#11 + add r11,r11,r2 @ h+=Ch(e,f,g) +#if 8==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 8<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r4,r5 @ a^b, b^c in next round +#else + ldr r2,[sp,#10*4] @ from future BODY_16_xx + eor r12,r4,r5 @ a^b, b^c in next round + ldr r1,[sp,#7*4] @ from future BODY_16_xx +#endif + eor r0,r0,r4,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r7,r7,r11 @ d+=h + eor r3,r3,r5 @ Maj(a,b,c) + add r11,r11,r0,ror#2 @ h+=Sigma0(a) + @ add r11,r11,r3 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 9 +# if 9==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r7,r7,ror#5 + add r11,r11,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r7,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 9 + add r11,r11,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 9==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r7,r7,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r7,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r10,r10,r2 @ h+=X[i] + str r2,[sp,#9*4] + eor r2,r8,r9 + add r10,r10,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r7 + add r10,r10,r3 @ h+=K256[i] + eor r2,r2,r9 @ Ch(e,f,g) + eor r0,r11,r11,ror#11 + add r10,r10,r2 @ h+=Ch(e,f,g) +#if 9==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 9<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r11,r4 @ a^b, b^c in next round +#else + ldr r2,[sp,#11*4] @ from future BODY_16_xx + eor r3,r11,r4 @ a^b, b^c in next round + ldr r1,[sp,#8*4] @ from future BODY_16_xx +#endif + eor r0,r0,r11,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r6,r6,r10 @ d+=h + eor r12,r12,r4 @ Maj(a,b,c) + add r10,r10,r0,ror#2 @ h+=Sigma0(a) + @ add r10,r10,r12 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 10 +# if 10==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r6,r6,ror#5 + add r10,r10,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r6,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 10 + add r10,r10,r12 @ h+=Maj(a,b,c) from the past + ldrb r12,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 10==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r6,r6,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r6,ror#19 @ Sigma1(e) +#endif + ldr r12,[r14],#4 @ *K256++ + add r9,r9,r2 @ h+=X[i] + str r2,[sp,#10*4] + eor r2,r7,r8 + add r9,r9,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r6 + add r9,r9,r12 @ h+=K256[i] + eor r2,r2,r8 @ Ch(e,f,g) + eor r0,r10,r10,ror#11 + add r9,r9,r2 @ h+=Ch(e,f,g) +#if 10==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 10<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r10,r11 @ a^b, b^c in next round +#else + ldr r2,[sp,#12*4] @ from future BODY_16_xx + eor r12,r10,r11 @ a^b, b^c in next round + ldr r1,[sp,#9*4] @ from future BODY_16_xx +#endif + eor r0,r0,r10,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r5,r5,r9 @ d+=h + eor r3,r3,r11 @ Maj(a,b,c) + add r9,r9,r0,ror#2 @ h+=Sigma0(a) + @ add r9,r9,r3 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 11 +# if 11==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r5,r5,ror#5 + add r9,r9,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r5,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 11 + add r9,r9,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 11==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r5,r5,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r5,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r8,r8,r2 @ h+=X[i] + str r2,[sp,#11*4] + eor r2,r6,r7 + add r8,r8,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r5 + add r8,r8,r3 @ h+=K256[i] + eor r2,r2,r7 @ Ch(e,f,g) + eor r0,r9,r9,ror#11 + add r8,r8,r2 @ h+=Ch(e,f,g) +#if 11==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 11<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r9,r10 @ a^b, b^c in next round +#else + ldr r2,[sp,#13*4] @ from future BODY_16_xx + eor r3,r9,r10 @ a^b, b^c in next round + ldr r1,[sp,#10*4] @ from future BODY_16_xx +#endif + eor r0,r0,r9,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r4,r4,r8 @ d+=h + eor r12,r12,r10 @ Maj(a,b,c) + add r8,r8,r0,ror#2 @ h+=Sigma0(a) + @ add r8,r8,r12 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 12 +# if 12==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r4,r4,ror#5 + add r8,r8,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r4,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 12 + add r8,r8,r12 @ h+=Maj(a,b,c) from the past + ldrb r12,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 12==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r4,r4,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r4,ror#19 @ Sigma1(e) +#endif + ldr r12,[r14],#4 @ *K256++ + add r7,r7,r2 @ h+=X[i] + str r2,[sp,#12*4] + eor r2,r5,r6 + add r7,r7,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r4 + add r7,r7,r12 @ h+=K256[i] + eor r2,r2,r6 @ Ch(e,f,g) + eor r0,r8,r8,ror#11 + add r7,r7,r2 @ h+=Ch(e,f,g) +#if 12==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 12<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r8,r9 @ a^b, b^c in next round +#else + ldr r2,[sp,#14*4] @ from future BODY_16_xx + eor r12,r8,r9 @ a^b, b^c in next round + ldr r1,[sp,#11*4] @ from future BODY_16_xx +#endif + eor r0,r0,r8,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r11,r11,r7 @ d+=h + eor r3,r3,r9 @ Maj(a,b,c) + add r7,r7,r0,ror#2 @ h+=Sigma0(a) + @ add r7,r7,r3 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 13 +# if 13==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r11,r11,ror#5 + add r7,r7,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r11,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 13 + add r7,r7,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 13==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r11,r11,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r11,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r6,r6,r2 @ h+=X[i] + str r2,[sp,#13*4] + eor r2,r4,r5 + add r6,r6,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r11 + add r6,r6,r3 @ h+=K256[i] + eor r2,r2,r5 @ Ch(e,f,g) + eor r0,r7,r7,ror#11 + add r6,r6,r2 @ h+=Ch(e,f,g) +#if 13==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 13<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r7,r8 @ a^b, b^c in next round +#else + ldr r2,[sp,#15*4] @ from future BODY_16_xx + eor r3,r7,r8 @ a^b, b^c in next round + ldr r1,[sp,#12*4] @ from future BODY_16_xx +#endif + eor r0,r0,r7,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r10,r10,r6 @ d+=h + eor r12,r12,r8 @ Maj(a,b,c) + add r6,r6,r0,ror#2 @ h+=Sigma0(a) + @ add r6,r6,r12 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 14 +# if 14==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r10,r10,ror#5 + add r6,r6,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r10,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 14 + add r6,r6,r12 @ h+=Maj(a,b,c) from the past + ldrb r12,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 14==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r10,r10,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r10,ror#19 @ Sigma1(e) +#endif + ldr r12,[r14],#4 @ *K256++ + add r5,r5,r2 @ h+=X[i] + str r2,[sp,#14*4] + eor r2,r11,r4 + add r5,r5,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r10 + add r5,r5,r12 @ h+=K256[i] + eor r2,r2,r4 @ Ch(e,f,g) + eor r0,r6,r6,ror#11 + add r5,r5,r2 @ h+=Ch(e,f,g) +#if 14==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 14<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r6,r7 @ a^b, b^c in next round +#else + ldr r2,[sp,#0*4] @ from future BODY_16_xx + eor r12,r6,r7 @ a^b, b^c in next round + ldr r1,[sp,#13*4] @ from future BODY_16_xx +#endif + eor r0,r0,r6,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r9,r9,r5 @ d+=h + eor r3,r3,r7 @ Maj(a,b,c) + add r5,r5,r0,ror#2 @ h+=Sigma0(a) + @ add r5,r5,r3 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 15 +# if 15==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r9,r9,ror#5 + add r5,r5,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r9,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 15 + add r5,r5,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 15==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r9,r9,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r9,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r4,r4,r2 @ h+=X[i] + str r2,[sp,#15*4] + eor r2,r10,r11 + add r4,r4,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r9 + add r4,r4,r3 @ h+=K256[i] + eor r2,r2,r11 @ Ch(e,f,g) + eor r0,r5,r5,ror#11 + add r4,r4,r2 @ h+=Ch(e,f,g) +#if 15==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 15<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r5,r6 @ a^b, b^c in next round +#else + ldr r2,[sp,#1*4] @ from future BODY_16_xx + eor r3,r5,r6 @ a^b, b^c in next round + ldr r1,[sp,#14*4] @ from future BODY_16_xx +#endif + eor r0,r0,r5,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r8,r8,r4 @ d+=h + eor r12,r12,r6 @ Maj(a,b,c) + add r4,r4,r0,ror#2 @ h+=Sigma0(a) + @ add r4,r4,r12 @ h+=Maj(a,b,c) +.Lrounds_16_xx: + @ ldr r2,[sp,#1*4] @ 16 + @ ldr r1,[sp,#14*4] + mov r0,r2,ror#7 + add r4,r4,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#0*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#9*4] + + add r12,r12,r0 + eor r0,r8,r8,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r8,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r12,[r14],#4 @ *K256++ + add r11,r11,r2 @ h+=X[i] + str r2,[sp,#0*4] + eor r2,r9,r10 + add r11,r11,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r8 + add r11,r11,r12 @ h+=K256[i] + eor r2,r2,r10 @ Ch(e,f,g) + eor r0,r4,r4,ror#11 + add r11,r11,r2 @ h+=Ch(e,f,g) +#if 16==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 16<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r4,r5 @ a^b, b^c in next round +#else + ldr r2,[sp,#2*4] @ from future BODY_16_xx + eor r12,r4,r5 @ a^b, b^c in next round + ldr r1,[sp,#15*4] @ from future BODY_16_xx +#endif + eor r0,r0,r4,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r7,r7,r11 @ d+=h + eor r3,r3,r5 @ Maj(a,b,c) + add r11,r11,r0,ror#2 @ h+=Sigma0(a) + @ add r11,r11,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#2*4] @ 17 + @ ldr r1,[sp,#15*4] + mov r0,r2,ror#7 + add r11,r11,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#1*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#10*4] + + add r3,r3,r0 + eor r0,r7,r7,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r7,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r10,r10,r2 @ h+=X[i] + str r2,[sp,#1*4] + eor r2,r8,r9 + add r10,r10,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r7 + add r10,r10,r3 @ h+=K256[i] + eor r2,r2,r9 @ Ch(e,f,g) + eor r0,r11,r11,ror#11 + add r10,r10,r2 @ h+=Ch(e,f,g) +#if 17==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 17<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r11,r4 @ a^b, b^c in next round +#else + ldr r2,[sp,#3*4] @ from future BODY_16_xx + eor r3,r11,r4 @ a^b, b^c in next round + ldr r1,[sp,#0*4] @ from future BODY_16_xx +#endif + eor r0,r0,r11,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r6,r6,r10 @ d+=h + eor r12,r12,r4 @ Maj(a,b,c) + add r10,r10,r0,ror#2 @ h+=Sigma0(a) + @ add r10,r10,r12 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#3*4] @ 18 + @ ldr r1,[sp,#0*4] + mov r0,r2,ror#7 + add r10,r10,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#2*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#11*4] + + add r12,r12,r0 + eor r0,r6,r6,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r6,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r12,[r14],#4 @ *K256++ + add r9,r9,r2 @ h+=X[i] + str r2,[sp,#2*4] + eor r2,r7,r8 + add r9,r9,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r6 + add r9,r9,r12 @ h+=K256[i] + eor r2,r2,r8 @ Ch(e,f,g) + eor r0,r10,r10,ror#11 + add r9,r9,r2 @ h+=Ch(e,f,g) +#if 18==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 18<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r10,r11 @ a^b, b^c in next round +#else + ldr r2,[sp,#4*4] @ from future BODY_16_xx + eor r12,r10,r11 @ a^b, b^c in next round + ldr r1,[sp,#1*4] @ from future BODY_16_xx +#endif + eor r0,r0,r10,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r5,r5,r9 @ d+=h + eor r3,r3,r11 @ Maj(a,b,c) + add r9,r9,r0,ror#2 @ h+=Sigma0(a) + @ add r9,r9,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#4*4] @ 19 + @ ldr r1,[sp,#1*4] + mov r0,r2,ror#7 + add r9,r9,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#3*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#12*4] + + add r3,r3,r0 + eor r0,r5,r5,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r5,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r8,r8,r2 @ h+=X[i] + str r2,[sp,#3*4] + eor r2,r6,r7 + add r8,r8,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r5 + add r8,r8,r3 @ h+=K256[i] + eor r2,r2,r7 @ Ch(e,f,g) + eor r0,r9,r9,ror#11 + add r8,r8,r2 @ h+=Ch(e,f,g) +#if 19==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 19<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r9,r10 @ a^b, b^c in next round +#else + ldr r2,[sp,#5*4] @ from future BODY_16_xx + eor r3,r9,r10 @ a^b, b^c in next round + ldr r1,[sp,#2*4] @ from future BODY_16_xx +#endif + eor r0,r0,r9,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r4,r4,r8 @ d+=h + eor r12,r12,r10 @ Maj(a,b,c) + add r8,r8,r0,ror#2 @ h+=Sigma0(a) + @ add r8,r8,r12 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#5*4] @ 20 + @ ldr r1,[sp,#2*4] + mov r0,r2,ror#7 + add r8,r8,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#4*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#13*4] + + add r12,r12,r0 + eor r0,r4,r4,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r4,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r12,[r14],#4 @ *K256++ + add r7,r7,r2 @ h+=X[i] + str r2,[sp,#4*4] + eor r2,r5,r6 + add r7,r7,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r4 + add r7,r7,r12 @ h+=K256[i] + eor r2,r2,r6 @ Ch(e,f,g) + eor r0,r8,r8,ror#11 + add r7,r7,r2 @ h+=Ch(e,f,g) +#if 20==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 20<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r8,r9 @ a^b, b^c in next round +#else + ldr r2,[sp,#6*4] @ from future BODY_16_xx + eor r12,r8,r9 @ a^b, b^c in next round + ldr r1,[sp,#3*4] @ from future BODY_16_xx +#endif + eor r0,r0,r8,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r11,r11,r7 @ d+=h + eor r3,r3,r9 @ Maj(a,b,c) + add r7,r7,r0,ror#2 @ h+=Sigma0(a) + @ add r7,r7,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#6*4] @ 21 + @ ldr r1,[sp,#3*4] + mov r0,r2,ror#7 + add r7,r7,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#5*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#14*4] + + add r3,r3,r0 + eor r0,r11,r11,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r11,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r6,r6,r2 @ h+=X[i] + str r2,[sp,#5*4] + eor r2,r4,r5 + add r6,r6,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r11 + add r6,r6,r3 @ h+=K256[i] + eor r2,r2,r5 @ Ch(e,f,g) + eor r0,r7,r7,ror#11 + add r6,r6,r2 @ h+=Ch(e,f,g) +#if 21==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 21<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r7,r8 @ a^b, b^c in next round +#else + ldr r2,[sp,#7*4] @ from future BODY_16_xx + eor r3,r7,r8 @ a^b, b^c in next round + ldr r1,[sp,#4*4] @ from future BODY_16_xx +#endif + eor r0,r0,r7,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r10,r10,r6 @ d+=h + eor r12,r12,r8 @ Maj(a,b,c) + add r6,r6,r0,ror#2 @ h+=Sigma0(a) + @ add r6,r6,r12 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#7*4] @ 22 + @ ldr r1,[sp,#4*4] + mov r0,r2,ror#7 + add r6,r6,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#6*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#15*4] + + add r12,r12,r0 + eor r0,r10,r10,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r10,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r12,[r14],#4 @ *K256++ + add r5,r5,r2 @ h+=X[i] + str r2,[sp,#6*4] + eor r2,r11,r4 + add r5,r5,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r10 + add r5,r5,r12 @ h+=K256[i] + eor r2,r2,r4 @ Ch(e,f,g) + eor r0,r6,r6,ror#11 + add r5,r5,r2 @ h+=Ch(e,f,g) +#if 22==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 22<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r6,r7 @ a^b, b^c in next round +#else + ldr r2,[sp,#8*4] @ from future BODY_16_xx + eor r12,r6,r7 @ a^b, b^c in next round + ldr r1,[sp,#5*4] @ from future BODY_16_xx +#endif + eor r0,r0,r6,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r9,r9,r5 @ d+=h + eor r3,r3,r7 @ Maj(a,b,c) + add r5,r5,r0,ror#2 @ h+=Sigma0(a) + @ add r5,r5,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#8*4] @ 23 + @ ldr r1,[sp,#5*4] + mov r0,r2,ror#7 + add r5,r5,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#7*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#0*4] + + add r3,r3,r0 + eor r0,r9,r9,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r9,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r4,r4,r2 @ h+=X[i] + str r2,[sp,#7*4] + eor r2,r10,r11 + add r4,r4,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r9 + add r4,r4,r3 @ h+=K256[i] + eor r2,r2,r11 @ Ch(e,f,g) + eor r0,r5,r5,ror#11 + add r4,r4,r2 @ h+=Ch(e,f,g) +#if 23==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 23<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r5,r6 @ a^b, b^c in next round +#else + ldr r2,[sp,#9*4] @ from future BODY_16_xx + eor r3,r5,r6 @ a^b, b^c in next round + ldr r1,[sp,#6*4] @ from future BODY_16_xx +#endif + eor r0,r0,r5,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r8,r8,r4 @ d+=h + eor r12,r12,r6 @ Maj(a,b,c) + add r4,r4,r0,ror#2 @ h+=Sigma0(a) + @ add r4,r4,r12 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#9*4] @ 24 + @ ldr r1,[sp,#6*4] + mov r0,r2,ror#7 + add r4,r4,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#8*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#1*4] + + add r12,r12,r0 + eor r0,r8,r8,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r8,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r12,[r14],#4 @ *K256++ + add r11,r11,r2 @ h+=X[i] + str r2,[sp,#8*4] + eor r2,r9,r10 + add r11,r11,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r8 + add r11,r11,r12 @ h+=K256[i] + eor r2,r2,r10 @ Ch(e,f,g) + eor r0,r4,r4,ror#11 + add r11,r11,r2 @ h+=Ch(e,f,g) +#if 24==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 24<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r4,r5 @ a^b, b^c in next round +#else + ldr r2,[sp,#10*4] @ from future BODY_16_xx + eor r12,r4,r5 @ a^b, b^c in next round + ldr r1,[sp,#7*4] @ from future BODY_16_xx +#endif + eor r0,r0,r4,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r7,r7,r11 @ d+=h + eor r3,r3,r5 @ Maj(a,b,c) + add r11,r11,r0,ror#2 @ h+=Sigma0(a) + @ add r11,r11,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#10*4] @ 25 + @ ldr r1,[sp,#7*4] + mov r0,r2,ror#7 + add r11,r11,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#9*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#2*4] + + add r3,r3,r0 + eor r0,r7,r7,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r7,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r10,r10,r2 @ h+=X[i] + str r2,[sp,#9*4] + eor r2,r8,r9 + add r10,r10,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r7 + add r10,r10,r3 @ h+=K256[i] + eor r2,r2,r9 @ Ch(e,f,g) + eor r0,r11,r11,ror#11 + add r10,r10,r2 @ h+=Ch(e,f,g) +#if 25==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 25<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r11,r4 @ a^b, b^c in next round +#else + ldr r2,[sp,#11*4] @ from future BODY_16_xx + eor r3,r11,r4 @ a^b, b^c in next round + ldr r1,[sp,#8*4] @ from future BODY_16_xx +#endif + eor r0,r0,r11,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r6,r6,r10 @ d+=h + eor r12,r12,r4 @ Maj(a,b,c) + add r10,r10,r0,ror#2 @ h+=Sigma0(a) + @ add r10,r10,r12 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#11*4] @ 26 + @ ldr r1,[sp,#8*4] + mov r0,r2,ror#7 + add r10,r10,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#10*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#3*4] + + add r12,r12,r0 + eor r0,r6,r6,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r6,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r12,[r14],#4 @ *K256++ + add r9,r9,r2 @ h+=X[i] + str r2,[sp,#10*4] + eor r2,r7,r8 + add r9,r9,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r6 + add r9,r9,r12 @ h+=K256[i] + eor r2,r2,r8 @ Ch(e,f,g) + eor r0,r10,r10,ror#11 + add r9,r9,r2 @ h+=Ch(e,f,g) +#if 26==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 26<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r10,r11 @ a^b, b^c in next round +#else + ldr r2,[sp,#12*4] @ from future BODY_16_xx + eor r12,r10,r11 @ a^b, b^c in next round + ldr r1,[sp,#9*4] @ from future BODY_16_xx +#endif + eor r0,r0,r10,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r5,r5,r9 @ d+=h + eor r3,r3,r11 @ Maj(a,b,c) + add r9,r9,r0,ror#2 @ h+=Sigma0(a) + @ add r9,r9,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#12*4] @ 27 + @ ldr r1,[sp,#9*4] + mov r0,r2,ror#7 + add r9,r9,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#11*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#4*4] + + add r3,r3,r0 + eor r0,r5,r5,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r5,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r8,r8,r2 @ h+=X[i] + str r2,[sp,#11*4] + eor r2,r6,r7 + add r8,r8,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r5 + add r8,r8,r3 @ h+=K256[i] + eor r2,r2,r7 @ Ch(e,f,g) + eor r0,r9,r9,ror#11 + add r8,r8,r2 @ h+=Ch(e,f,g) +#if 27==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 27<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r9,r10 @ a^b, b^c in next round +#else + ldr r2,[sp,#13*4] @ from future BODY_16_xx + eor r3,r9,r10 @ a^b, b^c in next round + ldr r1,[sp,#10*4] @ from future BODY_16_xx +#endif + eor r0,r0,r9,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r4,r4,r8 @ d+=h + eor r12,r12,r10 @ Maj(a,b,c) + add r8,r8,r0,ror#2 @ h+=Sigma0(a) + @ add r8,r8,r12 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#13*4] @ 28 + @ ldr r1,[sp,#10*4] + mov r0,r2,ror#7 + add r8,r8,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#12*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#5*4] + + add r12,r12,r0 + eor r0,r4,r4,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r4,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r12,[r14],#4 @ *K256++ + add r7,r7,r2 @ h+=X[i] + str r2,[sp,#12*4] + eor r2,r5,r6 + add r7,r7,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r4 + add r7,r7,r12 @ h+=K256[i] + eor r2,r2,r6 @ Ch(e,f,g) + eor r0,r8,r8,ror#11 + add r7,r7,r2 @ h+=Ch(e,f,g) +#if 28==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 28<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r8,r9 @ a^b, b^c in next round +#else + ldr r2,[sp,#14*4] @ from future BODY_16_xx + eor r12,r8,r9 @ a^b, b^c in next round + ldr r1,[sp,#11*4] @ from future BODY_16_xx +#endif + eor r0,r0,r8,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r11,r11,r7 @ d+=h + eor r3,r3,r9 @ Maj(a,b,c) + add r7,r7,r0,ror#2 @ h+=Sigma0(a) + @ add r7,r7,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#14*4] @ 29 + @ ldr r1,[sp,#11*4] + mov r0,r2,ror#7 + add r7,r7,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#13*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#6*4] + + add r3,r3,r0 + eor r0,r11,r11,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r11,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r6,r6,r2 @ h+=X[i] + str r2,[sp,#13*4] + eor r2,r4,r5 + add r6,r6,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r11 + add r6,r6,r3 @ h+=K256[i] + eor r2,r2,r5 @ Ch(e,f,g) + eor r0,r7,r7,ror#11 + add r6,r6,r2 @ h+=Ch(e,f,g) +#if 29==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 29<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r7,r8 @ a^b, b^c in next round +#else + ldr r2,[sp,#15*4] @ from future BODY_16_xx + eor r3,r7,r8 @ a^b, b^c in next round + ldr r1,[sp,#12*4] @ from future BODY_16_xx +#endif + eor r0,r0,r7,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r10,r10,r6 @ d+=h + eor r12,r12,r8 @ Maj(a,b,c) + add r6,r6,r0,ror#2 @ h+=Sigma0(a) + @ add r6,r6,r12 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#15*4] @ 30 + @ ldr r1,[sp,#12*4] + mov r0,r2,ror#7 + add r6,r6,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#14*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#7*4] + + add r12,r12,r0 + eor r0,r10,r10,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r10,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r12,[r14],#4 @ *K256++ + add r5,r5,r2 @ h+=X[i] + str r2,[sp,#14*4] + eor r2,r11,r4 + add r5,r5,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r10 + add r5,r5,r12 @ h+=K256[i] + eor r2,r2,r4 @ Ch(e,f,g) + eor r0,r6,r6,ror#11 + add r5,r5,r2 @ h+=Ch(e,f,g) +#if 30==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 30<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r6,r7 @ a^b, b^c in next round +#else + ldr r2,[sp,#0*4] @ from future BODY_16_xx + eor r12,r6,r7 @ a^b, b^c in next round + ldr r1,[sp,#13*4] @ from future BODY_16_xx +#endif + eor r0,r0,r6,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r9,r9,r5 @ d+=h + eor r3,r3,r7 @ Maj(a,b,c) + add r5,r5,r0,ror#2 @ h+=Sigma0(a) + @ add r5,r5,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#0*4] @ 31 + @ ldr r1,[sp,#13*4] + mov r0,r2,ror#7 + add r5,r5,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#15*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#8*4] + + add r3,r3,r0 + eor r0,r9,r9,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r9,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r4,r4,r2 @ h+=X[i] + str r2,[sp,#15*4] + eor r2,r10,r11 + add r4,r4,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r9 + add r4,r4,r3 @ h+=K256[i] + eor r2,r2,r11 @ Ch(e,f,g) + eor r0,r5,r5,ror#11 + add r4,r4,r2 @ h+=Ch(e,f,g) +#if 31==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 31<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r5,r6 @ a^b, b^c in next round +#else + ldr r2,[sp,#1*4] @ from future BODY_16_xx + eor r3,r5,r6 @ a^b, b^c in next round + ldr r1,[sp,#14*4] @ from future BODY_16_xx +#endif + eor r0,r0,r5,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r8,r8,r4 @ d+=h + eor r12,r12,r6 @ Maj(a,b,c) + add r4,r4,r0,ror#2 @ h+=Sigma0(a) + @ add r4,r4,r12 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + ite eq @ Thumb2 thing, sanity check in ARM +#endif + ldreq r3,[sp,#16*4] @ pull ctx + bne .Lrounds_16_xx + + add r4,r4,r12 @ h+=Maj(a,b,c) from the past + ldr r0,[r3,#0] + ldr r2,[r3,#4] + ldr r12,[r3,#8] + add r4,r4,r0 + ldr r0,[r3,#12] + add r5,r5,r2 + ldr r2,[r3,#16] + add r6,r6,r12 + ldr r12,[r3,#20] + add r7,r7,r0 + ldr r0,[r3,#24] + add r8,r8,r2 + ldr r2,[r3,#28] + add r9,r9,r12 + ldr r1,[sp,#17*4] @ pull inp + ldr r12,[sp,#18*4] @ pull inp+len + add r10,r10,r0 + add r11,r11,r2 + stmia r3,{r4,r5,r6,r7,r8,r9,r10,r11} + cmp r1,r12 + sub r14,r14,#256 @ rewind Ktbl + bne .Loop + + add sp,sp,#19*4 @ destroy frame +#if __ARM_ARCH__>=5 + ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc} +#else + ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr} + tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet +.word 0xe12fff1e @ interoperable with Thumb ISA:-) +#endif +.size sha256_block_data_order,.-sha256_block_data_order +#if __ARM_MAX_ARCH__>=7 +.arch armv7-a +.fpu neon + +.globl sha256_block_data_order_neon +.type sha256_block_data_order_neon,%function +.align 4 +sha256_block_data_order_neon: +.LNEON: + stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} + + sub r11,sp,#16*4+16 + adrl r14,K256 + bic r11,r11,#15 @ align for 128-bit stores + mov r12,sp + mov sp,r11 @ alloca + add r2,r1,r2,lsl#6 @ len to point at the end of inp + + vld1.8 {q0},[r1]! + vld1.8 {q1},[r1]! + vld1.8 {q2},[r1]! + vld1.8 {q3},[r1]! + vld1.32 {q8},[r14,:128]! + vld1.32 {q9},[r14,:128]! + vld1.32 {q10},[r14,:128]! + vld1.32 {q11},[r14,:128]! + vrev32.8 q0,q0 @ yes, even on + str r0,[sp,#64] + vrev32.8 q1,q1 @ big-endian + str r1,[sp,#68] + mov r1,sp + vrev32.8 q2,q2 + str r2,[sp,#72] + vrev32.8 q3,q3 + str r12,[sp,#76] @ save original sp + vadd.i32 q8,q8,q0 + vadd.i32 q9,q9,q1 + vst1.32 {q8},[r1,:128]! + vadd.i32 q10,q10,q2 + vst1.32 {q9},[r1,:128]! + vadd.i32 q11,q11,q3 + vst1.32 {q10},[r1,:128]! + vst1.32 {q11},[r1,:128]! + + ldmia r0,{r4,r5,r6,r7,r8,r9,r10,r11} + sub r1,r1,#64 + ldr r2,[sp,#0] + eor r12,r12,r12 + eor r3,r5,r6 + b .L_00_48 + +.align 4 +.L_00_48: + vext.8 q8,q0,q1,#4 + add r11,r11,r2 + eor r2,r9,r10 + eor r0,r8,r8,ror#5 + vext.8 q9,q2,q3,#4 + add r4,r4,r12 + and r2,r2,r8 + eor r12,r0,r8,ror#19 + vshr.u32 q10,q8,#7 + eor r0,r4,r4,ror#11 + eor r2,r2,r10 + vadd.i32 q0,q0,q9 + add r11,r11,r12,ror#6 + eor r12,r4,r5 + vshr.u32 q9,q8,#3 + eor r0,r0,r4,ror#20 + add r11,r11,r2 + vsli.32 q10,q8,#25 + ldr r2,[sp,#4] + and r3,r3,r12 + vshr.u32 q11,q8,#18 + add r7,r7,r11 + add r11,r11,r0,ror#2 + eor r3,r3,r5 + veor q9,q9,q10 + add r10,r10,r2 + vsli.32 q11,q8,#14 + eor r2,r8,r9 + eor r0,r7,r7,ror#5 + vshr.u32 d24,d7,#17 + add r11,r11,r3 + and r2,r2,r7 + veor q9,q9,q11 + eor r3,r0,r7,ror#19 + eor r0,r11,r11,ror#11 + vsli.32 d24,d7,#15 + eor r2,r2,r9 + add r10,r10,r3,ror#6 + vshr.u32 d25,d7,#10 + eor r3,r11,r4 + eor r0,r0,r11,ror#20 + vadd.i32 q0,q0,q9 + add r10,r10,r2 + ldr r2,[sp,#8] + veor d25,d25,d24 + and r12,r12,r3 + add r6,r6,r10 + vshr.u32 d24,d7,#19 + add r10,r10,r0,ror#2 + eor r12,r12,r4 + vsli.32 d24,d7,#13 + add r9,r9,r2 + eor r2,r7,r8 + veor d25,d25,d24 + eor r0,r6,r6,ror#5 + add r10,r10,r12 + vadd.i32 d0,d0,d25 + and r2,r2,r6 + eor r12,r0,r6,ror#19 + vshr.u32 d24,d0,#17 + eor r0,r10,r10,ror#11 + eor r2,r2,r8 + vsli.32 d24,d0,#15 + add r9,r9,r12,ror#6 + eor r12,r10,r11 + vshr.u32 d25,d0,#10 + eor r0,r0,r10,ror#20 + add r9,r9,r2 + veor d25,d25,d24 + ldr r2,[sp,#12] + and r3,r3,r12 + vshr.u32 d24,d0,#19 + add r5,r5,r9 + add r9,r9,r0,ror#2 + eor r3,r3,r11 + vld1.32 {q8},[r14,:128]! + add r8,r8,r2 + vsli.32 d24,d0,#13 + eor r2,r6,r7 + eor r0,r5,r5,ror#5 + veor d25,d25,d24 + add r9,r9,r3 + and r2,r2,r5 + vadd.i32 d1,d1,d25 + eor r3,r0,r5,ror#19 + eor r0,r9,r9,ror#11 + vadd.i32 q8,q8,q0 + eor r2,r2,r7 + add r8,r8,r3,ror#6 + eor r3,r9,r10 + eor r0,r0,r9,ror#20 + add r8,r8,r2 + ldr r2,[sp,#16] + and r12,r12,r3 + add r4,r4,r8 + vst1.32 {q8},[r1,:128]! + add r8,r8,r0,ror#2 + eor r12,r12,r10 + vext.8 q8,q1,q2,#4 + add r7,r7,r2 + eor r2,r5,r6 + eor r0,r4,r4,ror#5 + vext.8 q9,q3,q0,#4 + add r8,r8,r12 + and r2,r2,r4 + eor r12,r0,r4,ror#19 + vshr.u32 q10,q8,#7 + eor r0,r8,r8,ror#11 + eor r2,r2,r6 + vadd.i32 q1,q1,q9 + add r7,r7,r12,ror#6 + eor r12,r8,r9 + vshr.u32 q9,q8,#3 + eor r0,r0,r8,ror#20 + add r7,r7,r2 + vsli.32 q10,q8,#25 + ldr r2,[sp,#20] + and r3,r3,r12 + vshr.u32 q11,q8,#18 + add r11,r11,r7 + add r7,r7,r0,ror#2 + eor r3,r3,r9 + veor q9,q9,q10 + add r6,r6,r2 + vsli.32 q11,q8,#14 + eor r2,r4,r5 + eor r0,r11,r11,ror#5 + vshr.u32 d24,d1,#17 + add r7,r7,r3 + and r2,r2,r11 + veor q9,q9,q11 + eor r3,r0,r11,ror#19 + eor r0,r7,r7,ror#11 + vsli.32 d24,d1,#15 + eor r2,r2,r5 + add r6,r6,r3,ror#6 + vshr.u32 d25,d1,#10 + eor r3,r7,r8 + eor r0,r0,r7,ror#20 + vadd.i32 q1,q1,q9 + add r6,r6,r2 + ldr r2,[sp,#24] + veor d25,d25,d24 + and r12,r12,r3 + add r10,r10,r6 + vshr.u32 d24,d1,#19 + add r6,r6,r0,ror#2 + eor r12,r12,r8 + vsli.32 d24,d1,#13 + add r5,r5,r2 + eor r2,r11,r4 + veor d25,d25,d24 + eor r0,r10,r10,ror#5 + add r6,r6,r12 + vadd.i32 d2,d2,d25 + and r2,r2,r10 + eor r12,r0,r10,ror#19 + vshr.u32 d24,d2,#17 + eor r0,r6,r6,ror#11 + eor r2,r2,r4 + vsli.32 d24,d2,#15 + add r5,r5,r12,ror#6 + eor r12,r6,r7 + vshr.u32 d25,d2,#10 + eor r0,r0,r6,ror#20 + add r5,r5,r2 + veor d25,d25,d24 + ldr r2,[sp,#28] + and r3,r3,r12 + vshr.u32 d24,d2,#19 + add r9,r9,r5 + add r5,r5,r0,ror#2 + eor r3,r3,r7 + vld1.32 {q8},[r14,:128]! + add r4,r4,r2 + vsli.32 d24,d2,#13 + eor r2,r10,r11 + eor r0,r9,r9,ror#5 + veor d25,d25,d24 + add r5,r5,r3 + and r2,r2,r9 + vadd.i32 d3,d3,d25 + eor r3,r0,r9,ror#19 + eor r0,r5,r5,ror#11 + vadd.i32 q8,q8,q1 + eor r2,r2,r11 + add r4,r4,r3,ror#6 + eor r3,r5,r6 + eor r0,r0,r5,ror#20 + add r4,r4,r2 + ldr r2,[sp,#32] + and r12,r12,r3 + add r8,r8,r4 + vst1.32 {q8},[r1,:128]! + add r4,r4,r0,ror#2 + eor r12,r12,r6 + vext.8 q8,q2,q3,#4 + add r11,r11,r2 + eor r2,r9,r10 + eor r0,r8,r8,ror#5 + vext.8 q9,q0,q1,#4 + add r4,r4,r12 + and r2,r2,r8 + eor r12,r0,r8,ror#19 + vshr.u32 q10,q8,#7 + eor r0,r4,r4,ror#11 + eor r2,r2,r10 + vadd.i32 q2,q2,q9 + add r11,r11,r12,ror#6 + eor r12,r4,r5 + vshr.u32 q9,q8,#3 + eor r0,r0,r4,ror#20 + add r11,r11,r2 + vsli.32 q10,q8,#25 + ldr r2,[sp,#36] + and r3,r3,r12 + vshr.u32 q11,q8,#18 + add r7,r7,r11 + add r11,r11,r0,ror#2 + eor r3,r3,r5 + veor q9,q9,q10 + add r10,r10,r2 + vsli.32 q11,q8,#14 + eor r2,r8,r9 + eor r0,r7,r7,ror#5 + vshr.u32 d24,d3,#17 + add r11,r11,r3 + and r2,r2,r7 + veor q9,q9,q11 + eor r3,r0,r7,ror#19 + eor r0,r11,r11,ror#11 + vsli.32 d24,d3,#15 + eor r2,r2,r9 + add r10,r10,r3,ror#6 + vshr.u32 d25,d3,#10 + eor r3,r11,r4 + eor r0,r0,r11,ror#20 + vadd.i32 q2,q2,q9 + add r10,r10,r2 + ldr r2,[sp,#40] + veor d25,d25,d24 + and r12,r12,r3 + add r6,r6,r10 + vshr.u32 d24,d3,#19 + add r10,r10,r0,ror#2 + eor r12,r12,r4 + vsli.32 d24,d3,#13 + add r9,r9,r2 + eor r2,r7,r8 + veor d25,d25,d24 + eor r0,r6,r6,ror#5 + add r10,r10,r12 + vadd.i32 d4,d4,d25 + and r2,r2,r6 + eor r12,r0,r6,ror#19 + vshr.u32 d24,d4,#17 + eor r0,r10,r10,ror#11 + eor r2,r2,r8 + vsli.32 d24,d4,#15 + add r9,r9,r12,ror#6 + eor r12,r10,r11 + vshr.u32 d25,d4,#10 + eor r0,r0,r10,ror#20 + add r9,r9,r2 + veor d25,d25,d24 + ldr r2,[sp,#44] + and r3,r3,r12 + vshr.u32 d24,d4,#19 + add r5,r5,r9 + add r9,r9,r0,ror#2 + eor r3,r3,r11 + vld1.32 {q8},[r14,:128]! + add r8,r8,r2 + vsli.32 d24,d4,#13 + eor r2,r6,r7 + eor r0,r5,r5,ror#5 + veor d25,d25,d24 + add r9,r9,r3 + and r2,r2,r5 + vadd.i32 d5,d5,d25 + eor r3,r0,r5,ror#19 + eor r0,r9,r9,ror#11 + vadd.i32 q8,q8,q2 + eor r2,r2,r7 + add r8,r8,r3,ror#6 + eor r3,r9,r10 + eor r0,r0,r9,ror#20 + add r8,r8,r2 + ldr r2,[sp,#48] + and r12,r12,r3 + add r4,r4,r8 + vst1.32 {q8},[r1,:128]! + add r8,r8,r0,ror#2 + eor r12,r12,r10 + vext.8 q8,q3,q0,#4 + add r7,r7,r2 + eor r2,r5,r6 + eor r0,r4,r4,ror#5 + vext.8 q9,q1,q2,#4 + add r8,r8,r12 + and r2,r2,r4 + eor r12,r0,r4,ror#19 + vshr.u32 q10,q8,#7 + eor r0,r8,r8,ror#11 + eor r2,r2,r6 + vadd.i32 q3,q3,q9 + add r7,r7,r12,ror#6 + eor r12,r8,r9 + vshr.u32 q9,q8,#3 + eor r0,r0,r8,ror#20 + add r7,r7,r2 + vsli.32 q10,q8,#25 + ldr r2,[sp,#52] + and r3,r3,r12 + vshr.u32 q11,q8,#18 + add r11,r11,r7 + add r7,r7,r0,ror#2 + eor r3,r3,r9 + veor q9,q9,q10 + add r6,r6,r2 + vsli.32 q11,q8,#14 + eor r2,r4,r5 + eor r0,r11,r11,ror#5 + vshr.u32 d24,d5,#17 + add r7,r7,r3 + and r2,r2,r11 + veor q9,q9,q11 + eor r3,r0,r11,ror#19 + eor r0,r7,r7,ror#11 + vsli.32 d24,d5,#15 + eor r2,r2,r5 + add r6,r6,r3,ror#6 + vshr.u32 d25,d5,#10 + eor r3,r7,r8 + eor r0,r0,r7,ror#20 + vadd.i32 q3,q3,q9 + add r6,r6,r2 + ldr r2,[sp,#56] + veor d25,d25,d24 + and r12,r12,r3 + add r10,r10,r6 + vshr.u32 d24,d5,#19 + add r6,r6,r0,ror#2 + eor r12,r12,r8 + vsli.32 d24,d5,#13 + add r5,r5,r2 + eor r2,r11,r4 + veor d25,d25,d24 + eor r0,r10,r10,ror#5 + add r6,r6,r12 + vadd.i32 d6,d6,d25 + and r2,r2,r10 + eor r12,r0,r10,ror#19 + vshr.u32 d24,d6,#17 + eor r0,r6,r6,ror#11 + eor r2,r2,r4 + vsli.32 d24,d6,#15 + add r5,r5,r12,ror#6 + eor r12,r6,r7 + vshr.u32 d25,d6,#10 + eor r0,r0,r6,ror#20 + add r5,r5,r2 + veor d25,d25,d24 + ldr r2,[sp,#60] + and r3,r3,r12 + vshr.u32 d24,d6,#19 + add r9,r9,r5 + add r5,r5,r0,ror#2 + eor r3,r3,r7 + vld1.32 {q8},[r14,:128]! + add r4,r4,r2 + vsli.32 d24,d6,#13 + eor r2,r10,r11 + eor r0,r9,r9,ror#5 + veor d25,d25,d24 + add r5,r5,r3 + and r2,r2,r9 + vadd.i32 d7,d7,d25 + eor r3,r0,r9,ror#19 + eor r0,r5,r5,ror#11 + vadd.i32 q8,q8,q3 + eor r2,r2,r11 + add r4,r4,r3,ror#6 + eor r3,r5,r6 + eor r0,r0,r5,ror#20 + add r4,r4,r2 + ldr r2,[r14] + and r12,r12,r3 + add r8,r8,r4 + vst1.32 {q8},[r1,:128]! + add r4,r4,r0,ror#2 + eor r12,r12,r6 + teq r2,#0 @ check for K256 terminator + ldr r2,[sp,#0] + sub r1,r1,#64 + bne .L_00_48 + + ldr r1,[sp,#68] + ldr r0,[sp,#72] + sub r14,r14,#256 @ rewind r14 + teq r1,r0 + it eq + subeq r1,r1,#64 @ avoid SEGV + vld1.8 {q0},[r1]! @ load next input block + vld1.8 {q1},[r1]! + vld1.8 {q2},[r1]! + vld1.8 {q3},[r1]! + it ne + strne r1,[sp,#68] + mov r1,sp + add r11,r11,r2 + eor r2,r9,r10 + eor r0,r8,r8,ror#5 + add r4,r4,r12 + vld1.32 {q8},[r14,:128]! + and r2,r2,r8 + eor r12,r0,r8,ror#19 + eor r0,r4,r4,ror#11 + eor r2,r2,r10 + vrev32.8 q0,q0 + add r11,r11,r12,ror#6 + eor r12,r4,r5 + eor r0,r0,r4,ror#20 + add r11,r11,r2 + vadd.i32 q8,q8,q0 + ldr r2,[sp,#4] + and r3,r3,r12 + add r7,r7,r11 + add r11,r11,r0,ror#2 + eor r3,r3,r5 + add r10,r10,r2 + eor r2,r8,r9 + eor r0,r7,r7,ror#5 + add r11,r11,r3 + and r2,r2,r7 + eor r3,r0,r7,ror#19 + eor r0,r11,r11,ror#11 + eor r2,r2,r9 + add r10,r10,r3,ror#6 + eor r3,r11,r4 + eor r0,r0,r11,ror#20 + add r10,r10,r2 + ldr r2,[sp,#8] + and r12,r12,r3 + add r6,r6,r10 + add r10,r10,r0,ror#2 + eor r12,r12,r4 + add r9,r9,r2 + eor r2,r7,r8 + eor r0,r6,r6,ror#5 + add r10,r10,r12 + and r2,r2,r6 + eor r12,r0,r6,ror#19 + eor r0,r10,r10,ror#11 + eor r2,r2,r8 + add r9,r9,r12,ror#6 + eor r12,r10,r11 + eor r0,r0,r10,ror#20 + add r9,r9,r2 + ldr r2,[sp,#12] + and r3,r3,r12 + add r5,r5,r9 + add r9,r9,r0,ror#2 + eor r3,r3,r11 + add r8,r8,r2 + eor r2,r6,r7 + eor r0,r5,r5,ror#5 + add r9,r9,r3 + and r2,r2,r5 + eor r3,r0,r5,ror#19 + eor r0,r9,r9,ror#11 + eor r2,r2,r7 + add r8,r8,r3,ror#6 + eor r3,r9,r10 + eor r0,r0,r9,ror#20 + add r8,r8,r2 + ldr r2,[sp,#16] + and r12,r12,r3 + add r4,r4,r8 + add r8,r8,r0,ror#2 + eor r12,r12,r10 + vst1.32 {q8},[r1,:128]! + add r7,r7,r2 + eor r2,r5,r6 + eor r0,r4,r4,ror#5 + add r8,r8,r12 + vld1.32 {q8},[r14,:128]! + and r2,r2,r4 + eor r12,r0,r4,ror#19 + eor r0,r8,r8,ror#11 + eor r2,r2,r6 + vrev32.8 q1,q1 + add r7,r7,r12,ror#6 + eor r12,r8,r9 + eor r0,r0,r8,ror#20 + add r7,r7,r2 + vadd.i32 q8,q8,q1 + ldr r2,[sp,#20] + and r3,r3,r12 + add r11,r11,r7 + add r7,r7,r0,ror#2 + eor r3,r3,r9 + add r6,r6,r2 + eor r2,r4,r5 + eor r0,r11,r11,ror#5 + add r7,r7,r3 + and r2,r2,r11 + eor r3,r0,r11,ror#19 + eor r0,r7,r7,ror#11 + eor r2,r2,r5 + add r6,r6,r3,ror#6 + eor r3,r7,r8 + eor r0,r0,r7,ror#20 + add r6,r6,r2 + ldr r2,[sp,#24] + and r12,r12,r3 + add r10,r10,r6 + add r6,r6,r0,ror#2 + eor r12,r12,r8 + add r5,r5,r2 + eor r2,r11,r4 + eor r0,r10,r10,ror#5 + add r6,r6,r12 + and r2,r2,r10 + eor r12,r0,r10,ror#19 + eor r0,r6,r6,ror#11 + eor r2,r2,r4 + add r5,r5,r12,ror#6 + eor r12,r6,r7 + eor r0,r0,r6,ror#20 + add r5,r5,r2 + ldr r2,[sp,#28] + and r3,r3,r12 + add r9,r9,r5 + add r5,r5,r0,ror#2 + eor r3,r3,r7 + add r4,r4,r2 + eor r2,r10,r11 + eor r0,r9,r9,ror#5 + add r5,r5,r3 + and r2,r2,r9 + eor r3,r0,r9,ror#19 + eor r0,r5,r5,ror#11 + eor r2,r2,r11 + add r4,r4,r3,ror#6 + eor r3,r5,r6 + eor r0,r0,r5,ror#20 + add r4,r4,r2 + ldr r2,[sp,#32] + and r12,r12,r3 + add r8,r8,r4 + add r4,r4,r0,ror#2 + eor r12,r12,r6 + vst1.32 {q8},[r1,:128]! + add r11,r11,r2 + eor r2,r9,r10 + eor r0,r8,r8,ror#5 + add r4,r4,r12 + vld1.32 {q8},[r14,:128]! + and r2,r2,r8 + eor r12,r0,r8,ror#19 + eor r0,r4,r4,ror#11 + eor r2,r2,r10 + vrev32.8 q2,q2 + add r11,r11,r12,ror#6 + eor r12,r4,r5 + eor r0,r0,r4,ror#20 + add r11,r11,r2 + vadd.i32 q8,q8,q2 + ldr r2,[sp,#36] + and r3,r3,r12 + add r7,r7,r11 + add r11,r11,r0,ror#2 + eor r3,r3,r5 + add r10,r10,r2 + eor r2,r8,r9 + eor r0,r7,r7,ror#5 + add r11,r11,r3 + and r2,r2,r7 + eor r3,r0,r7,ror#19 + eor r0,r11,r11,ror#11 + eor r2,r2,r9 + add r10,r10,r3,ror#6 + eor r3,r11,r4 + eor r0,r0,r11,ror#20 + add r10,r10,r2 + ldr r2,[sp,#40] + and r12,r12,r3 + add r6,r6,r10 + add r10,r10,r0,ror#2 + eor r12,r12,r4 + add r9,r9,r2 + eor r2,r7,r8 + eor r0,r6,r6,ror#5 + add r10,r10,r12 + and r2,r2,r6 + eor r12,r0,r6,ror#19 + eor r0,r10,r10,ror#11 + eor r2,r2,r8 + add r9,r9,r12,ror#6 + eor r12,r10,r11 + eor r0,r0,r10,ror#20 + add r9,r9,r2 + ldr r2,[sp,#44] + and r3,r3,r12 + add r5,r5,r9 + add r9,r9,r0,ror#2 + eor r3,r3,r11 + add r8,r8,r2 + eor r2,r6,r7 + eor r0,r5,r5,ror#5 + add r9,r9,r3 + and r2,r2,r5 + eor r3,r0,r5,ror#19 + eor r0,r9,r9,ror#11 + eor r2,r2,r7 + add r8,r8,r3,ror#6 + eor r3,r9,r10 + eor r0,r0,r9,ror#20 + add r8,r8,r2 + ldr r2,[sp,#48] + and r12,r12,r3 + add r4,r4,r8 + add r8,r8,r0,ror#2 + eor r12,r12,r10 + vst1.32 {q8},[r1,:128]! + add r7,r7,r2 + eor r2,r5,r6 + eor r0,r4,r4,ror#5 + add r8,r8,r12 + vld1.32 {q8},[r14,:128]! + and r2,r2,r4 + eor r12,r0,r4,ror#19 + eor r0,r8,r8,ror#11 + eor r2,r2,r6 + vrev32.8 q3,q3 + add r7,r7,r12,ror#6 + eor r12,r8,r9 + eor r0,r0,r8,ror#20 + add r7,r7,r2 + vadd.i32 q8,q8,q3 + ldr r2,[sp,#52] + and r3,r3,r12 + add r11,r11,r7 + add r7,r7,r0,ror#2 + eor r3,r3,r9 + add r6,r6,r2 + eor r2,r4,r5 + eor r0,r11,r11,ror#5 + add r7,r7,r3 + and r2,r2,r11 + eor r3,r0,r11,ror#19 + eor r0,r7,r7,ror#11 + eor r2,r2,r5 + add r6,r6,r3,ror#6 + eor r3,r7,r8 + eor r0,r0,r7,ror#20 + add r6,r6,r2 + ldr r2,[sp,#56] + and r12,r12,r3 + add r10,r10,r6 + add r6,r6,r0,ror#2 + eor r12,r12,r8 + add r5,r5,r2 + eor r2,r11,r4 + eor r0,r10,r10,ror#5 + add r6,r6,r12 + and r2,r2,r10 + eor r12,r0,r10,ror#19 + eor r0,r6,r6,ror#11 + eor r2,r2,r4 + add r5,r5,r12,ror#6 + eor r12,r6,r7 + eor r0,r0,r6,ror#20 + add r5,r5,r2 + ldr r2,[sp,#60] + and r3,r3,r12 + add r9,r9,r5 + add r5,r5,r0,ror#2 + eor r3,r3,r7 + add r4,r4,r2 + eor r2,r10,r11 + eor r0,r9,r9,ror#5 + add r5,r5,r3 + and r2,r2,r9 + eor r3,r0,r9,ror#19 + eor r0,r5,r5,ror#11 + eor r2,r2,r11 + add r4,r4,r3,ror#6 + eor r3,r5,r6 + eor r0,r0,r5,ror#20 + add r4,r4,r2 + ldr r2,[sp,#64] + and r12,r12,r3 + add r8,r8,r4 + add r4,r4,r0,ror#2 + eor r12,r12,r6 + vst1.32 {q8},[r1,:128]! + ldr r0,[r2,#0] + add r4,r4,r12 @ h+=Maj(a,b,c) from the past + ldr r12,[r2,#4] + ldr r3,[r2,#8] + ldr r1,[r2,#12] + add r4,r4,r0 @ accumulate + ldr r0,[r2,#16] + add r5,r5,r12 + ldr r12,[r2,#20] + add r6,r6,r3 + ldr r3,[r2,#24] + add r7,r7,r1 + ldr r1,[r2,#28] + add r8,r8,r0 + str r4,[r2],#4 + add r9,r9,r12 + str r5,[r2],#4 + add r10,r10,r3 + str r6,[r2],#4 + add r11,r11,r1 + str r7,[r2],#4 + stmia r2,{r8,r9,r10,r11} + + ittte ne + movne r1,sp + ldrne r2,[sp,#0] + eorne r12,r12,r12 + ldreq sp,[sp,#76] @ restore original sp + itt ne + eorne r3,r5,r6 + bne .L_00_48 + + ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc} +.size sha256_block_data_order_neon,.-sha256_block_data_order_neon +#endif +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) + +# if defined(__thumb2__) && !defined(__APPLE__) +# define INST(a,b,c,d) .byte c,d|0xc,a,b +# else +# define INST(a,b,c,d) .byte a,b,c,d +# endif + +.type sha256_block_data_order_armv8,%function +.align 5 +sha256_block_data_order_armv8: +.LARMv8: + vld1.32 {q0,q1},[r0] +# ifdef __APPLE__ + sub r3,r3,#256+32 +# elif defined(__thumb2__) + adr r3,.LARMv8 + sub r3,r3,#.LARMv8-K256 +# else + adrl r3,K256 +# endif + add r2,r1,r2,lsl#6 @ len to point at the end of inp + +.Loop_v8: + vld1.8 {q8,q9},[r1]! + vld1.8 {q10,q11},[r1]! + vld1.32 {q12},[r3]! + vrev32.8 q8,q8 + vrev32.8 q9,q9 + vrev32.8 q10,q10 + vrev32.8 q11,q11 + vmov q14,q0 @ offload + vmov q15,q1 + teq r1,r2 + vld1.32 {q13},[r3]! + vadd.i32 q12,q12,q8 + INST(0xe2,0x03,0xfa,0xf3) @ sha256su0 q8,q9 + vmov q2,q0 + INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 + INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 + INST(0xe6,0x0c,0x64,0xf3) @ sha256su1 q8,q10,q11 + vld1.32 {q12},[r3]! + vadd.i32 q13,q13,q9 + INST(0xe4,0x23,0xfa,0xf3) @ sha256su0 q9,q10 + vmov q2,q0 + INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 + INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 + INST(0xe0,0x2c,0x66,0xf3) @ sha256su1 q9,q11,q8 + vld1.32 {q13},[r3]! + vadd.i32 q12,q12,q10 + INST(0xe6,0x43,0xfa,0xf3) @ sha256su0 q10,q11 + vmov q2,q0 + INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 + INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 + INST(0xe2,0x4c,0x60,0xf3) @ sha256su1 q10,q8,q9 + vld1.32 {q12},[r3]! + vadd.i32 q13,q13,q11 + INST(0xe0,0x63,0xfa,0xf3) @ sha256su0 q11,q8 + vmov q2,q0 + INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 + INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 + INST(0xe4,0x6c,0x62,0xf3) @ sha256su1 q11,q9,q10 + vld1.32 {q13},[r3]! + vadd.i32 q12,q12,q8 + INST(0xe2,0x03,0xfa,0xf3) @ sha256su0 q8,q9 + vmov q2,q0 + INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 + INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 + INST(0xe6,0x0c,0x64,0xf3) @ sha256su1 q8,q10,q11 + vld1.32 {q12},[r3]! + vadd.i32 q13,q13,q9 + INST(0xe4,0x23,0xfa,0xf3) @ sha256su0 q9,q10 + vmov q2,q0 + INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 + INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 + INST(0xe0,0x2c,0x66,0xf3) @ sha256su1 q9,q11,q8 + vld1.32 {q13},[r3]! + vadd.i32 q12,q12,q10 + INST(0xe6,0x43,0xfa,0xf3) @ sha256su0 q10,q11 + vmov q2,q0 + INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 + INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 + INST(0xe2,0x4c,0x60,0xf3) @ sha256su1 q10,q8,q9 + vld1.32 {q12},[r3]! + vadd.i32 q13,q13,q11 + INST(0xe0,0x63,0xfa,0xf3) @ sha256su0 q11,q8 + vmov q2,q0 + INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 + INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 + INST(0xe4,0x6c,0x62,0xf3) @ sha256su1 q11,q9,q10 + vld1.32 {q13},[r3]! + vadd.i32 q12,q12,q8 + INST(0xe2,0x03,0xfa,0xf3) @ sha256su0 q8,q9 + vmov q2,q0 + INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 + INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 + INST(0xe6,0x0c,0x64,0xf3) @ sha256su1 q8,q10,q11 + vld1.32 {q12},[r3]! + vadd.i32 q13,q13,q9 + INST(0xe4,0x23,0xfa,0xf3) @ sha256su0 q9,q10 + vmov q2,q0 + INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 + INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 + INST(0xe0,0x2c,0x66,0xf3) @ sha256su1 q9,q11,q8 + vld1.32 {q13},[r3]! + vadd.i32 q12,q12,q10 + INST(0xe6,0x43,0xfa,0xf3) @ sha256su0 q10,q11 + vmov q2,q0 + INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 + INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 + INST(0xe2,0x4c,0x60,0xf3) @ sha256su1 q10,q8,q9 + vld1.32 {q12},[r3]! + vadd.i32 q13,q13,q11 + INST(0xe0,0x63,0xfa,0xf3) @ sha256su0 q11,q8 + vmov q2,q0 + INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 + INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 + INST(0xe4,0x6c,0x62,0xf3) @ sha256su1 q11,q9,q10 + vld1.32 {q13},[r3]! + vadd.i32 q12,q12,q8 + vmov q2,q0 + INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 + INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 + + vld1.32 {q12},[r3]! + vadd.i32 q13,q13,q9 + vmov q2,q0 + INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 + INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 + + vld1.32 {q13},[r3] + vadd.i32 q12,q12,q10 + sub r3,r3,#256-16 @ rewind + vmov q2,q0 + INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 + INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 + + vadd.i32 q13,q13,q11 + vmov q2,q0 + INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 + INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 + + vadd.i32 q0,q0,q14 + vadd.i32 q1,q1,q15 + it ne + bne .Loop_v8 + + vst1.32 {q0,q1},[r0] + + bx lr @ bx lr +.size sha256_block_data_order_armv8,.-sha256_block_data_order_armv8 +#endif +.byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,47,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 2 +.align 2 +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) +.comm OPENSSL_armcap_P,4,4 +.hidden OPENSSL_armcap_P +#endif +#endif \ No newline at end of file diff --git a/third_party/boringssl/linux-arm/crypto/sha/sha512-armv4.S b/third_party/boringssl/linux-arm/crypto/sha/sha512-armv4.S new file mode 100644 index 00000000000..c794f87fcff --- /dev/null +++ b/third_party/boringssl/linux-arm/crypto/sha/sha512-armv4.S @@ -0,0 +1,1869 @@ +#if defined(__arm__) + +@ ==================================================================== +@ Written by Andy Polyakov for the OpenSSL +@ project. The module is, however, dual licensed under OpenSSL and +@ CRYPTOGAMS licenses depending on where you obtain it. For further +@ details see http://www.openssl.org/~appro/cryptogams/. +@ +@ Permission to use under GPL terms is granted. +@ ==================================================================== + +@ SHA512 block procedure for ARMv4. September 2007. + +@ This code is ~4.5 (four and a half) times faster than code generated +@ by gcc 3.4 and it spends ~72 clock cycles per byte [on single-issue +@ Xscale PXA250 core]. +@ +@ July 2010. +@ +@ Rescheduling for dual-issue pipeline resulted in 6% improvement on +@ Cortex A8 core and ~40 cycles per processed byte. + +@ February 2011. +@ +@ Profiler-assisted and platform-specific optimization resulted in 7% +@ improvement on Coxtex A8 core and ~38 cycles per byte. + +@ March 2011. +@ +@ Add NEON implementation. On Cortex A8 it was measured to process +@ one byte in 23.3 cycles or ~60% faster than integer-only code. + +@ August 2012. +@ +@ Improve NEON performance by 12% on Snapdragon S4. In absolute +@ terms it's 22.6 cycles per byte, which is disappointing result. +@ Technical writers asserted that 3-way S4 pipeline can sustain +@ multiple NEON instructions per cycle, but dual NEON issue could +@ not be observed, see http://www.openssl.org/~appro/Snapdragon-S4.html +@ for further details. On side note Cortex-A15 processes one byte in +@ 16 cycles. + +@ Byte order [in]dependence. ========================================= +@ +@ Originally caller was expected to maintain specific *dword* order in +@ h[0-7], namely with most significant dword at *lower* address, which +@ was reflected in below two parameters as 0 and 4. Now caller is +@ expected to maintain native byte order for whole 64-bit values. +#ifndef __KERNEL__ +# include "arm_arch.h" +# define VFP_ABI_PUSH vstmdb sp!,{d8-d15} +# define VFP_ABI_POP vldmia sp!,{d8-d15} +#else +# define __ARM_ARCH__ __LINUX_ARM_ARCH__ +# define __ARM_MAX_ARCH__ 7 +# define VFP_ABI_PUSH +# define VFP_ABI_POP +#endif + +#ifdef __ARMEL__ +# define LO 0 +# define HI 4 +# define WORD64(hi0,lo0,hi1,lo1) .word lo0,hi0, lo1,hi1 +#else +# define HI 0 +# define LO 4 +# define WORD64(hi0,lo0,hi1,lo1) .word hi0,lo0, hi1,lo1 +#endif + +.text +#if __ARM_ARCH__<7 || defined(__APPLE__) +.code 32 +#else +.syntax unified +# ifdef __thumb2__ +# define adrl adr +.thumb +# else +.code 32 +# endif +#endif + +.type K512,%object +.align 5 +K512: + WORD64(0x428a2f98,0xd728ae22, 0x71374491,0x23ef65cd) + WORD64(0xb5c0fbcf,0xec4d3b2f, 0xe9b5dba5,0x8189dbbc) + WORD64(0x3956c25b,0xf348b538, 0x59f111f1,0xb605d019) + WORD64(0x923f82a4,0xaf194f9b, 0xab1c5ed5,0xda6d8118) + WORD64(0xd807aa98,0xa3030242, 0x12835b01,0x45706fbe) + WORD64(0x243185be,0x4ee4b28c, 0x550c7dc3,0xd5ffb4e2) + WORD64(0x72be5d74,0xf27b896f, 0x80deb1fe,0x3b1696b1) + WORD64(0x9bdc06a7,0x25c71235, 0xc19bf174,0xcf692694) + WORD64(0xe49b69c1,0x9ef14ad2, 0xefbe4786,0x384f25e3) + WORD64(0x0fc19dc6,0x8b8cd5b5, 0x240ca1cc,0x77ac9c65) + WORD64(0x2de92c6f,0x592b0275, 0x4a7484aa,0x6ea6e483) + WORD64(0x5cb0a9dc,0xbd41fbd4, 0x76f988da,0x831153b5) + WORD64(0x983e5152,0xee66dfab, 0xa831c66d,0x2db43210) + WORD64(0xb00327c8,0x98fb213f, 0xbf597fc7,0xbeef0ee4) + WORD64(0xc6e00bf3,0x3da88fc2, 0xd5a79147,0x930aa725) + WORD64(0x06ca6351,0xe003826f, 0x14292967,0x0a0e6e70) + WORD64(0x27b70a85,0x46d22ffc, 0x2e1b2138,0x5c26c926) + WORD64(0x4d2c6dfc,0x5ac42aed, 0x53380d13,0x9d95b3df) + WORD64(0x650a7354,0x8baf63de, 0x766a0abb,0x3c77b2a8) + WORD64(0x81c2c92e,0x47edaee6, 0x92722c85,0x1482353b) + WORD64(0xa2bfe8a1,0x4cf10364, 0xa81a664b,0xbc423001) + WORD64(0xc24b8b70,0xd0f89791, 0xc76c51a3,0x0654be30) + WORD64(0xd192e819,0xd6ef5218, 0xd6990624,0x5565a910) + WORD64(0xf40e3585,0x5771202a, 0x106aa070,0x32bbd1b8) + WORD64(0x19a4c116,0xb8d2d0c8, 0x1e376c08,0x5141ab53) + WORD64(0x2748774c,0xdf8eeb99, 0x34b0bcb5,0xe19b48a8) + WORD64(0x391c0cb3,0xc5c95a63, 0x4ed8aa4a,0xe3418acb) + WORD64(0x5b9cca4f,0x7763e373, 0x682e6ff3,0xd6b2b8a3) + WORD64(0x748f82ee,0x5defb2fc, 0x78a5636f,0x43172f60) + WORD64(0x84c87814,0xa1f0ab72, 0x8cc70208,0x1a6439ec) + WORD64(0x90befffa,0x23631e28, 0xa4506ceb,0xde82bde9) + WORD64(0xbef9a3f7,0xb2c67915, 0xc67178f2,0xe372532b) + WORD64(0xca273ece,0xea26619c, 0xd186b8c7,0x21c0c207) + WORD64(0xeada7dd6,0xcde0eb1e, 0xf57d4f7f,0xee6ed178) + WORD64(0x06f067aa,0x72176fba, 0x0a637dc5,0xa2c898a6) + WORD64(0x113f9804,0xbef90dae, 0x1b710b35,0x131c471b) + WORD64(0x28db77f5,0x23047d84, 0x32caab7b,0x40c72493) + WORD64(0x3c9ebe0a,0x15c9bebc, 0x431d67c4,0x9c100d4c) + WORD64(0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a) + WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817) +.size K512,.-K512 +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) +.LOPENSSL_armcap: +.word OPENSSL_armcap_P-.Lsha512_block_data_order +.skip 32-4 +#else +.skip 32 +#endif + +.globl sha512_block_data_order +.type sha512_block_data_order,%function +sha512_block_data_order: +.Lsha512_block_data_order: +#if __ARM_ARCH__<7 + sub r3,pc,#8 @ sha512_block_data_order +#else + adr r3,sha512_block_data_order +#endif +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) + ldr r12,.LOPENSSL_armcap + ldr r12,[r3,r12] @ OPENSSL_armcap_P +#ifdef __APPLE__ + ldr r12,[r12] +#endif + tst r12,#1 + bne .LNEON +#endif + add r2,r1,r2,lsl#7 @ len to point at the end of inp + stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} + sub r14,r3,#672 @ K512 + sub sp,sp,#9*8 + + ldr r7,[r0,#32+LO] + ldr r8,[r0,#32+HI] + ldr r9, [r0,#48+LO] + ldr r10, [r0,#48+HI] + ldr r11, [r0,#56+LO] + ldr r12, [r0,#56+HI] +.Loop: + str r9, [sp,#48+0] + str r10, [sp,#48+4] + str r11, [sp,#56+0] + str r12, [sp,#56+4] + ldr r5,[r0,#0+LO] + ldr r6,[r0,#0+HI] + ldr r3,[r0,#8+LO] + ldr r4,[r0,#8+HI] + ldr r9, [r0,#16+LO] + ldr r10, [r0,#16+HI] + ldr r11, [r0,#24+LO] + ldr r12, [r0,#24+HI] + str r3,[sp,#8+0] + str r4,[sp,#8+4] + str r9, [sp,#16+0] + str r10, [sp,#16+4] + str r11, [sp,#24+0] + str r12, [sp,#24+4] + ldr r3,[r0,#40+LO] + ldr r4,[r0,#40+HI] + str r3,[sp,#40+0] + str r4,[sp,#40+4] + +.L00_15: +#if __ARM_ARCH__<7 + ldrb r3,[r1,#7] + ldrb r9, [r1,#6] + ldrb r10, [r1,#5] + ldrb r11, [r1,#4] + ldrb r4,[r1,#3] + ldrb r12, [r1,#2] + orr r3,r3,r9,lsl#8 + ldrb r9, [r1,#1] + orr r3,r3,r10,lsl#16 + ldrb r10, [r1],#8 + orr r3,r3,r11,lsl#24 + orr r4,r4,r12,lsl#8 + orr r4,r4,r9,lsl#16 + orr r4,r4,r10,lsl#24 +#else + ldr r3,[r1,#4] + ldr r4,[r1],#8 +#ifdef __ARMEL__ + rev r3,r3 + rev r4,r4 +#endif +#endif + @ Sigma1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41)) + @ LO lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23 + @ HI hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23 + mov r9,r7,lsr#14 + str r3,[sp,#64+0] + mov r10,r8,lsr#14 + str r4,[sp,#64+4] + eor r9,r9,r8,lsl#18 + ldr r11,[sp,#56+0] @ h.lo + eor r10,r10,r7,lsl#18 + ldr r12,[sp,#56+4] @ h.hi + eor r9,r9,r7,lsr#18 + eor r10,r10,r8,lsr#18 + eor r9,r9,r8,lsl#14 + eor r10,r10,r7,lsl#14 + eor r9,r9,r8,lsr#9 + eor r10,r10,r7,lsr#9 + eor r9,r9,r7,lsl#23 + eor r10,r10,r8,lsl#23 @ Sigma1(e) + adds r3,r3,r9 + ldr r9,[sp,#40+0] @ f.lo + adc r4,r4,r10 @ T += Sigma1(e) + ldr r10,[sp,#40+4] @ f.hi + adds r3,r3,r11 + ldr r11,[sp,#48+0] @ g.lo + adc r4,r4,r12 @ T += h + ldr r12,[sp,#48+4] @ g.hi + + eor r9,r9,r11 + str r7,[sp,#32+0] + eor r10,r10,r12 + str r8,[sp,#32+4] + and r9,r9,r7 + str r5,[sp,#0+0] + and r10,r10,r8 + str r6,[sp,#0+4] + eor r9,r9,r11 + ldr r11,[r14,#LO] @ K[i].lo + eor r10,r10,r12 @ Ch(e,f,g) + ldr r12,[r14,#HI] @ K[i].hi + + adds r3,r3,r9 + ldr r7,[sp,#24+0] @ d.lo + adc r4,r4,r10 @ T += Ch(e,f,g) + ldr r8,[sp,#24+4] @ d.hi + adds r3,r3,r11 + and r9,r11,#0xff + adc r4,r4,r12 @ T += K[i] + adds r7,r7,r3 + ldr r11,[sp,#8+0] @ b.lo + adc r8,r8,r4 @ d += T + teq r9,#148 + + ldr r12,[sp,#16+0] @ c.lo +#if __ARM_ARCH__>=7 + it eq @ Thumb2 thing, sanity check in ARM +#endif + orreq r14,r14,#1 + @ Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39)) + @ LO lo>>28^hi<<4 ^ hi>>2^lo<<30 ^ hi>>7^lo<<25 + @ HI hi>>28^lo<<4 ^ lo>>2^hi<<30 ^ lo>>7^hi<<25 + mov r9,r5,lsr#28 + mov r10,r6,lsr#28 + eor r9,r9,r6,lsl#4 + eor r10,r10,r5,lsl#4 + eor r9,r9,r6,lsr#2 + eor r10,r10,r5,lsr#2 + eor r9,r9,r5,lsl#30 + eor r10,r10,r6,lsl#30 + eor r9,r9,r6,lsr#7 + eor r10,r10,r5,lsr#7 + eor r9,r9,r5,lsl#25 + eor r10,r10,r6,lsl#25 @ Sigma0(a) + adds r3,r3,r9 + and r9,r5,r11 + adc r4,r4,r10 @ T += Sigma0(a) + + ldr r10,[sp,#8+4] @ b.hi + orr r5,r5,r11 + ldr r11,[sp,#16+4] @ c.hi + and r5,r5,r12 + and r12,r6,r10 + orr r6,r6,r10 + orr r5,r5,r9 @ Maj(a,b,c).lo + and r6,r6,r11 + adds r5,r5,r3 + orr r6,r6,r12 @ Maj(a,b,c).hi + sub sp,sp,#8 + adc r6,r6,r4 @ h += T + tst r14,#1 + add r14,r14,#8 + tst r14,#1 + beq .L00_15 + ldr r9,[sp,#184+0] + ldr r10,[sp,#184+4] + bic r14,r14,#1 +.L16_79: + @ sigma0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7)) + @ LO lo>>1^hi<<31 ^ lo>>8^hi<<24 ^ lo>>7^hi<<25 + @ HI hi>>1^lo<<31 ^ hi>>8^lo<<24 ^ hi>>7 + mov r3,r9,lsr#1 + ldr r11,[sp,#80+0] + mov r4,r10,lsr#1 + ldr r12,[sp,#80+4] + eor r3,r3,r10,lsl#31 + eor r4,r4,r9,lsl#31 + eor r3,r3,r9,lsr#8 + eor r4,r4,r10,lsr#8 + eor r3,r3,r10,lsl#24 + eor r4,r4,r9,lsl#24 + eor r3,r3,r9,lsr#7 + eor r4,r4,r10,lsr#7 + eor r3,r3,r10,lsl#25 + + @ sigma1(x) (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6)) + @ LO lo>>19^hi<<13 ^ hi>>29^lo<<3 ^ lo>>6^hi<<26 + @ HI hi>>19^lo<<13 ^ lo>>29^hi<<3 ^ hi>>6 + mov r9,r11,lsr#19 + mov r10,r12,lsr#19 + eor r9,r9,r12,lsl#13 + eor r10,r10,r11,lsl#13 + eor r9,r9,r12,lsr#29 + eor r10,r10,r11,lsr#29 + eor r9,r9,r11,lsl#3 + eor r10,r10,r12,lsl#3 + eor r9,r9,r11,lsr#6 + eor r10,r10,r12,lsr#6 + ldr r11,[sp,#120+0] + eor r9,r9,r12,lsl#26 + + ldr r12,[sp,#120+4] + adds r3,r3,r9 + ldr r9,[sp,#192+0] + adc r4,r4,r10 + + ldr r10,[sp,#192+4] + adds r3,r3,r11 + adc r4,r4,r12 + adds r3,r3,r9 + adc r4,r4,r10 + @ Sigma1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41)) + @ LO lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23 + @ HI hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23 + mov r9,r7,lsr#14 + str r3,[sp,#64+0] + mov r10,r8,lsr#14 + str r4,[sp,#64+4] + eor r9,r9,r8,lsl#18 + ldr r11,[sp,#56+0] @ h.lo + eor r10,r10,r7,lsl#18 + ldr r12,[sp,#56+4] @ h.hi + eor r9,r9,r7,lsr#18 + eor r10,r10,r8,lsr#18 + eor r9,r9,r8,lsl#14 + eor r10,r10,r7,lsl#14 + eor r9,r9,r8,lsr#9 + eor r10,r10,r7,lsr#9 + eor r9,r9,r7,lsl#23 + eor r10,r10,r8,lsl#23 @ Sigma1(e) + adds r3,r3,r9 + ldr r9,[sp,#40+0] @ f.lo + adc r4,r4,r10 @ T += Sigma1(e) + ldr r10,[sp,#40+4] @ f.hi + adds r3,r3,r11 + ldr r11,[sp,#48+0] @ g.lo + adc r4,r4,r12 @ T += h + ldr r12,[sp,#48+4] @ g.hi + + eor r9,r9,r11 + str r7,[sp,#32+0] + eor r10,r10,r12 + str r8,[sp,#32+4] + and r9,r9,r7 + str r5,[sp,#0+0] + and r10,r10,r8 + str r6,[sp,#0+4] + eor r9,r9,r11 + ldr r11,[r14,#LO] @ K[i].lo + eor r10,r10,r12 @ Ch(e,f,g) + ldr r12,[r14,#HI] @ K[i].hi + + adds r3,r3,r9 + ldr r7,[sp,#24+0] @ d.lo + adc r4,r4,r10 @ T += Ch(e,f,g) + ldr r8,[sp,#24+4] @ d.hi + adds r3,r3,r11 + and r9,r11,#0xff + adc r4,r4,r12 @ T += K[i] + adds r7,r7,r3 + ldr r11,[sp,#8+0] @ b.lo + adc r8,r8,r4 @ d += T + teq r9,#23 + + ldr r12,[sp,#16+0] @ c.lo +#if __ARM_ARCH__>=7 + it eq @ Thumb2 thing, sanity check in ARM +#endif + orreq r14,r14,#1 + @ Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39)) + @ LO lo>>28^hi<<4 ^ hi>>2^lo<<30 ^ hi>>7^lo<<25 + @ HI hi>>28^lo<<4 ^ lo>>2^hi<<30 ^ lo>>7^hi<<25 + mov r9,r5,lsr#28 + mov r10,r6,lsr#28 + eor r9,r9,r6,lsl#4 + eor r10,r10,r5,lsl#4 + eor r9,r9,r6,lsr#2 + eor r10,r10,r5,lsr#2 + eor r9,r9,r5,lsl#30 + eor r10,r10,r6,lsl#30 + eor r9,r9,r6,lsr#7 + eor r10,r10,r5,lsr#7 + eor r9,r9,r5,lsl#25 + eor r10,r10,r6,lsl#25 @ Sigma0(a) + adds r3,r3,r9 + and r9,r5,r11 + adc r4,r4,r10 @ T += Sigma0(a) + + ldr r10,[sp,#8+4] @ b.hi + orr r5,r5,r11 + ldr r11,[sp,#16+4] @ c.hi + and r5,r5,r12 + and r12,r6,r10 + orr r6,r6,r10 + orr r5,r5,r9 @ Maj(a,b,c).lo + and r6,r6,r11 + adds r5,r5,r3 + orr r6,r6,r12 @ Maj(a,b,c).hi + sub sp,sp,#8 + adc r6,r6,r4 @ h += T + tst r14,#1 + add r14,r14,#8 +#if __ARM_ARCH__>=7 + ittt eq @ Thumb2 thing, sanity check in ARM +#endif + ldreq r9,[sp,#184+0] + ldreq r10,[sp,#184+4] + beq .L16_79 + bic r14,r14,#1 + + ldr r3,[sp,#8+0] + ldr r4,[sp,#8+4] + ldr r9, [r0,#0+LO] + ldr r10, [r0,#0+HI] + ldr r11, [r0,#8+LO] + ldr r12, [r0,#8+HI] + adds r9,r5,r9 + str r9, [r0,#0+LO] + adc r10,r6,r10 + str r10, [r0,#0+HI] + adds r11,r3,r11 + str r11, [r0,#8+LO] + adc r12,r4,r12 + str r12, [r0,#8+HI] + + ldr r5,[sp,#16+0] + ldr r6,[sp,#16+4] + ldr r3,[sp,#24+0] + ldr r4,[sp,#24+4] + ldr r9, [r0,#16+LO] + ldr r10, [r0,#16+HI] + ldr r11, [r0,#24+LO] + ldr r12, [r0,#24+HI] + adds r9,r5,r9 + str r9, [r0,#16+LO] + adc r10,r6,r10 + str r10, [r0,#16+HI] + adds r11,r3,r11 + str r11, [r0,#24+LO] + adc r12,r4,r12 + str r12, [r0,#24+HI] + + ldr r3,[sp,#40+0] + ldr r4,[sp,#40+4] + ldr r9, [r0,#32+LO] + ldr r10, [r0,#32+HI] + ldr r11, [r0,#40+LO] + ldr r12, [r0,#40+HI] + adds r7,r7,r9 + str r7,[r0,#32+LO] + adc r8,r8,r10 + str r8,[r0,#32+HI] + adds r11,r3,r11 + str r11, [r0,#40+LO] + adc r12,r4,r12 + str r12, [r0,#40+HI] + + ldr r5,[sp,#48+0] + ldr r6,[sp,#48+4] + ldr r3,[sp,#56+0] + ldr r4,[sp,#56+4] + ldr r9, [r0,#48+LO] + ldr r10, [r0,#48+HI] + ldr r11, [r0,#56+LO] + ldr r12, [r0,#56+HI] + adds r9,r5,r9 + str r9, [r0,#48+LO] + adc r10,r6,r10 + str r10, [r0,#48+HI] + adds r11,r3,r11 + str r11, [r0,#56+LO] + adc r12,r4,r12 + str r12, [r0,#56+HI] + + add sp,sp,#640 + sub r14,r14,#640 + + teq r1,r2 + bne .Loop + + add sp,sp,#8*9 @ destroy frame +#if __ARM_ARCH__>=5 + ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc} +#else + ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} + tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet +.word 0xe12fff1e @ interoperable with Thumb ISA:-) +#endif +.size sha512_block_data_order,.-sha512_block_data_order +#if __ARM_MAX_ARCH__>=7 +.arch armv7-a +.fpu neon + +.globl sha512_block_data_order_neon +.type sha512_block_data_order_neon,%function +.align 4 +sha512_block_data_order_neon: +.LNEON: + dmb @ errata #451034 on early Cortex A8 + add r2,r1,r2,lsl#7 @ len to point at the end of inp + adr r3,K512 + VFP_ABI_PUSH + vldmia r0,{d16,d17,d18,d19,d20,d21,d22,d23} @ load context +.Loop_neon: + vshr.u64 d24,d20,#14 @ 0 +#if 0<16 + vld1.64 {d0},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d20,#18 +#if 0>0 + vadd.i64 d16,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d20,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d20,#50 + vsli.64 d25,d20,#46 + vmov d29,d20 + vsli.64 d26,d20,#23 +#if 0<16 && defined(__ARMEL__) + vrev64.8 d0,d0 +#endif + veor d25,d24 + vbsl d29,d21,d22 @ Ch(e,f,g) + vshr.u64 d24,d16,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d23 + vshr.u64 d25,d16,#34 + vsli.64 d24,d16,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d16,#39 + vadd.i64 d28,d0 + vsli.64 d25,d16,#30 + veor d30,d16,d17 + vsli.64 d26,d16,#25 + veor d23,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d18,d17 @ Maj(a,b,c) + veor d23,d26 @ Sigma0(a) + vadd.i64 d19,d27 + vadd.i64 d30,d27 + @ vadd.i64 d23,d30 + vshr.u64 d24,d19,#14 @ 1 +#if 1<16 + vld1.64 {d1},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d19,#18 +#if 1>0 + vadd.i64 d23,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d19,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d19,#50 + vsli.64 d25,d19,#46 + vmov d29,d19 + vsli.64 d26,d19,#23 +#if 1<16 && defined(__ARMEL__) + vrev64.8 d1,d1 +#endif + veor d25,d24 + vbsl d29,d20,d21 @ Ch(e,f,g) + vshr.u64 d24,d23,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d22 + vshr.u64 d25,d23,#34 + vsli.64 d24,d23,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d23,#39 + vadd.i64 d28,d1 + vsli.64 d25,d23,#30 + veor d30,d23,d16 + vsli.64 d26,d23,#25 + veor d22,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d17,d16 @ Maj(a,b,c) + veor d22,d26 @ Sigma0(a) + vadd.i64 d18,d27 + vadd.i64 d30,d27 + @ vadd.i64 d22,d30 + vshr.u64 d24,d18,#14 @ 2 +#if 2<16 + vld1.64 {d2},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d18,#18 +#if 2>0 + vadd.i64 d22,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d18,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d18,#50 + vsli.64 d25,d18,#46 + vmov d29,d18 + vsli.64 d26,d18,#23 +#if 2<16 && defined(__ARMEL__) + vrev64.8 d2,d2 +#endif + veor d25,d24 + vbsl d29,d19,d20 @ Ch(e,f,g) + vshr.u64 d24,d22,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d21 + vshr.u64 d25,d22,#34 + vsli.64 d24,d22,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d22,#39 + vadd.i64 d28,d2 + vsli.64 d25,d22,#30 + veor d30,d22,d23 + vsli.64 d26,d22,#25 + veor d21,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d16,d23 @ Maj(a,b,c) + veor d21,d26 @ Sigma0(a) + vadd.i64 d17,d27 + vadd.i64 d30,d27 + @ vadd.i64 d21,d30 + vshr.u64 d24,d17,#14 @ 3 +#if 3<16 + vld1.64 {d3},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d17,#18 +#if 3>0 + vadd.i64 d21,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d17,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d17,#50 + vsli.64 d25,d17,#46 + vmov d29,d17 + vsli.64 d26,d17,#23 +#if 3<16 && defined(__ARMEL__) + vrev64.8 d3,d3 +#endif + veor d25,d24 + vbsl d29,d18,d19 @ Ch(e,f,g) + vshr.u64 d24,d21,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d20 + vshr.u64 d25,d21,#34 + vsli.64 d24,d21,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d21,#39 + vadd.i64 d28,d3 + vsli.64 d25,d21,#30 + veor d30,d21,d22 + vsli.64 d26,d21,#25 + veor d20,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d23,d22 @ Maj(a,b,c) + veor d20,d26 @ Sigma0(a) + vadd.i64 d16,d27 + vadd.i64 d30,d27 + @ vadd.i64 d20,d30 + vshr.u64 d24,d16,#14 @ 4 +#if 4<16 + vld1.64 {d4},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d16,#18 +#if 4>0 + vadd.i64 d20,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d16,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d16,#50 + vsli.64 d25,d16,#46 + vmov d29,d16 + vsli.64 d26,d16,#23 +#if 4<16 && defined(__ARMEL__) + vrev64.8 d4,d4 +#endif + veor d25,d24 + vbsl d29,d17,d18 @ Ch(e,f,g) + vshr.u64 d24,d20,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d19 + vshr.u64 d25,d20,#34 + vsli.64 d24,d20,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d20,#39 + vadd.i64 d28,d4 + vsli.64 d25,d20,#30 + veor d30,d20,d21 + vsli.64 d26,d20,#25 + veor d19,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d22,d21 @ Maj(a,b,c) + veor d19,d26 @ Sigma0(a) + vadd.i64 d23,d27 + vadd.i64 d30,d27 + @ vadd.i64 d19,d30 + vshr.u64 d24,d23,#14 @ 5 +#if 5<16 + vld1.64 {d5},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d23,#18 +#if 5>0 + vadd.i64 d19,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d23,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d23,#50 + vsli.64 d25,d23,#46 + vmov d29,d23 + vsli.64 d26,d23,#23 +#if 5<16 && defined(__ARMEL__) + vrev64.8 d5,d5 +#endif + veor d25,d24 + vbsl d29,d16,d17 @ Ch(e,f,g) + vshr.u64 d24,d19,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d18 + vshr.u64 d25,d19,#34 + vsli.64 d24,d19,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d19,#39 + vadd.i64 d28,d5 + vsli.64 d25,d19,#30 + veor d30,d19,d20 + vsli.64 d26,d19,#25 + veor d18,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d21,d20 @ Maj(a,b,c) + veor d18,d26 @ Sigma0(a) + vadd.i64 d22,d27 + vadd.i64 d30,d27 + @ vadd.i64 d18,d30 + vshr.u64 d24,d22,#14 @ 6 +#if 6<16 + vld1.64 {d6},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d22,#18 +#if 6>0 + vadd.i64 d18,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d22,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d22,#50 + vsli.64 d25,d22,#46 + vmov d29,d22 + vsli.64 d26,d22,#23 +#if 6<16 && defined(__ARMEL__) + vrev64.8 d6,d6 +#endif + veor d25,d24 + vbsl d29,d23,d16 @ Ch(e,f,g) + vshr.u64 d24,d18,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d17 + vshr.u64 d25,d18,#34 + vsli.64 d24,d18,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d18,#39 + vadd.i64 d28,d6 + vsli.64 d25,d18,#30 + veor d30,d18,d19 + vsli.64 d26,d18,#25 + veor d17,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d20,d19 @ Maj(a,b,c) + veor d17,d26 @ Sigma0(a) + vadd.i64 d21,d27 + vadd.i64 d30,d27 + @ vadd.i64 d17,d30 + vshr.u64 d24,d21,#14 @ 7 +#if 7<16 + vld1.64 {d7},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d21,#18 +#if 7>0 + vadd.i64 d17,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d21,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d21,#50 + vsli.64 d25,d21,#46 + vmov d29,d21 + vsli.64 d26,d21,#23 +#if 7<16 && defined(__ARMEL__) + vrev64.8 d7,d7 +#endif + veor d25,d24 + vbsl d29,d22,d23 @ Ch(e,f,g) + vshr.u64 d24,d17,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d16 + vshr.u64 d25,d17,#34 + vsli.64 d24,d17,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d17,#39 + vadd.i64 d28,d7 + vsli.64 d25,d17,#30 + veor d30,d17,d18 + vsli.64 d26,d17,#25 + veor d16,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d19,d18 @ Maj(a,b,c) + veor d16,d26 @ Sigma0(a) + vadd.i64 d20,d27 + vadd.i64 d30,d27 + @ vadd.i64 d16,d30 + vshr.u64 d24,d20,#14 @ 8 +#if 8<16 + vld1.64 {d8},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d20,#18 +#if 8>0 + vadd.i64 d16,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d20,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d20,#50 + vsli.64 d25,d20,#46 + vmov d29,d20 + vsli.64 d26,d20,#23 +#if 8<16 && defined(__ARMEL__) + vrev64.8 d8,d8 +#endif + veor d25,d24 + vbsl d29,d21,d22 @ Ch(e,f,g) + vshr.u64 d24,d16,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d23 + vshr.u64 d25,d16,#34 + vsli.64 d24,d16,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d16,#39 + vadd.i64 d28,d8 + vsli.64 d25,d16,#30 + veor d30,d16,d17 + vsli.64 d26,d16,#25 + veor d23,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d18,d17 @ Maj(a,b,c) + veor d23,d26 @ Sigma0(a) + vadd.i64 d19,d27 + vadd.i64 d30,d27 + @ vadd.i64 d23,d30 + vshr.u64 d24,d19,#14 @ 9 +#if 9<16 + vld1.64 {d9},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d19,#18 +#if 9>0 + vadd.i64 d23,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d19,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d19,#50 + vsli.64 d25,d19,#46 + vmov d29,d19 + vsli.64 d26,d19,#23 +#if 9<16 && defined(__ARMEL__) + vrev64.8 d9,d9 +#endif + veor d25,d24 + vbsl d29,d20,d21 @ Ch(e,f,g) + vshr.u64 d24,d23,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d22 + vshr.u64 d25,d23,#34 + vsli.64 d24,d23,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d23,#39 + vadd.i64 d28,d9 + vsli.64 d25,d23,#30 + veor d30,d23,d16 + vsli.64 d26,d23,#25 + veor d22,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d17,d16 @ Maj(a,b,c) + veor d22,d26 @ Sigma0(a) + vadd.i64 d18,d27 + vadd.i64 d30,d27 + @ vadd.i64 d22,d30 + vshr.u64 d24,d18,#14 @ 10 +#if 10<16 + vld1.64 {d10},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d18,#18 +#if 10>0 + vadd.i64 d22,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d18,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d18,#50 + vsli.64 d25,d18,#46 + vmov d29,d18 + vsli.64 d26,d18,#23 +#if 10<16 && defined(__ARMEL__) + vrev64.8 d10,d10 +#endif + veor d25,d24 + vbsl d29,d19,d20 @ Ch(e,f,g) + vshr.u64 d24,d22,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d21 + vshr.u64 d25,d22,#34 + vsli.64 d24,d22,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d22,#39 + vadd.i64 d28,d10 + vsli.64 d25,d22,#30 + veor d30,d22,d23 + vsli.64 d26,d22,#25 + veor d21,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d16,d23 @ Maj(a,b,c) + veor d21,d26 @ Sigma0(a) + vadd.i64 d17,d27 + vadd.i64 d30,d27 + @ vadd.i64 d21,d30 + vshr.u64 d24,d17,#14 @ 11 +#if 11<16 + vld1.64 {d11},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d17,#18 +#if 11>0 + vadd.i64 d21,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d17,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d17,#50 + vsli.64 d25,d17,#46 + vmov d29,d17 + vsli.64 d26,d17,#23 +#if 11<16 && defined(__ARMEL__) + vrev64.8 d11,d11 +#endif + veor d25,d24 + vbsl d29,d18,d19 @ Ch(e,f,g) + vshr.u64 d24,d21,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d20 + vshr.u64 d25,d21,#34 + vsli.64 d24,d21,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d21,#39 + vadd.i64 d28,d11 + vsli.64 d25,d21,#30 + veor d30,d21,d22 + vsli.64 d26,d21,#25 + veor d20,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d23,d22 @ Maj(a,b,c) + veor d20,d26 @ Sigma0(a) + vadd.i64 d16,d27 + vadd.i64 d30,d27 + @ vadd.i64 d20,d30 + vshr.u64 d24,d16,#14 @ 12 +#if 12<16 + vld1.64 {d12},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d16,#18 +#if 12>0 + vadd.i64 d20,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d16,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d16,#50 + vsli.64 d25,d16,#46 + vmov d29,d16 + vsli.64 d26,d16,#23 +#if 12<16 && defined(__ARMEL__) + vrev64.8 d12,d12 +#endif + veor d25,d24 + vbsl d29,d17,d18 @ Ch(e,f,g) + vshr.u64 d24,d20,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d19 + vshr.u64 d25,d20,#34 + vsli.64 d24,d20,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d20,#39 + vadd.i64 d28,d12 + vsli.64 d25,d20,#30 + veor d30,d20,d21 + vsli.64 d26,d20,#25 + veor d19,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d22,d21 @ Maj(a,b,c) + veor d19,d26 @ Sigma0(a) + vadd.i64 d23,d27 + vadd.i64 d30,d27 + @ vadd.i64 d19,d30 + vshr.u64 d24,d23,#14 @ 13 +#if 13<16 + vld1.64 {d13},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d23,#18 +#if 13>0 + vadd.i64 d19,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d23,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d23,#50 + vsli.64 d25,d23,#46 + vmov d29,d23 + vsli.64 d26,d23,#23 +#if 13<16 && defined(__ARMEL__) + vrev64.8 d13,d13 +#endif + veor d25,d24 + vbsl d29,d16,d17 @ Ch(e,f,g) + vshr.u64 d24,d19,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d18 + vshr.u64 d25,d19,#34 + vsli.64 d24,d19,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d19,#39 + vadd.i64 d28,d13 + vsli.64 d25,d19,#30 + veor d30,d19,d20 + vsli.64 d26,d19,#25 + veor d18,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d21,d20 @ Maj(a,b,c) + veor d18,d26 @ Sigma0(a) + vadd.i64 d22,d27 + vadd.i64 d30,d27 + @ vadd.i64 d18,d30 + vshr.u64 d24,d22,#14 @ 14 +#if 14<16 + vld1.64 {d14},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d22,#18 +#if 14>0 + vadd.i64 d18,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d22,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d22,#50 + vsli.64 d25,d22,#46 + vmov d29,d22 + vsli.64 d26,d22,#23 +#if 14<16 && defined(__ARMEL__) + vrev64.8 d14,d14 +#endif + veor d25,d24 + vbsl d29,d23,d16 @ Ch(e,f,g) + vshr.u64 d24,d18,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d17 + vshr.u64 d25,d18,#34 + vsli.64 d24,d18,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d18,#39 + vadd.i64 d28,d14 + vsli.64 d25,d18,#30 + veor d30,d18,d19 + vsli.64 d26,d18,#25 + veor d17,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d20,d19 @ Maj(a,b,c) + veor d17,d26 @ Sigma0(a) + vadd.i64 d21,d27 + vadd.i64 d30,d27 + @ vadd.i64 d17,d30 + vshr.u64 d24,d21,#14 @ 15 +#if 15<16 + vld1.64 {d15},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d21,#18 +#if 15>0 + vadd.i64 d17,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d21,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d21,#50 + vsli.64 d25,d21,#46 + vmov d29,d21 + vsli.64 d26,d21,#23 +#if 15<16 && defined(__ARMEL__) + vrev64.8 d15,d15 +#endif + veor d25,d24 + vbsl d29,d22,d23 @ Ch(e,f,g) + vshr.u64 d24,d17,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d16 + vshr.u64 d25,d17,#34 + vsli.64 d24,d17,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d17,#39 + vadd.i64 d28,d15 + vsli.64 d25,d17,#30 + veor d30,d17,d18 + vsli.64 d26,d17,#25 + veor d16,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d19,d18 @ Maj(a,b,c) + veor d16,d26 @ Sigma0(a) + vadd.i64 d20,d27 + vadd.i64 d30,d27 + @ vadd.i64 d16,d30 + mov r12,#4 +.L16_79_neon: + subs r12,#1 + vshr.u64 q12,q7,#19 + vshr.u64 q13,q7,#61 + vadd.i64 d16,d30 @ h+=Maj from the past + vshr.u64 q15,q7,#6 + vsli.64 q12,q7,#45 + vext.8 q14,q0,q1,#8 @ X[i+1] + vsli.64 q13,q7,#3 + veor q15,q12 + vshr.u64 q12,q14,#1 + veor q15,q13 @ sigma1(X[i+14]) + vshr.u64 q13,q14,#8 + vadd.i64 q0,q15 + vshr.u64 q15,q14,#7 + vsli.64 q12,q14,#63 + vsli.64 q13,q14,#56 + vext.8 q14,q4,q5,#8 @ X[i+9] + veor q15,q12 + vshr.u64 d24,d20,#14 @ from NEON_00_15 + vadd.i64 q0,q14 + vshr.u64 d25,d20,#18 @ from NEON_00_15 + veor q15,q13 @ sigma0(X[i+1]) + vshr.u64 d26,d20,#41 @ from NEON_00_15 + vadd.i64 q0,q15 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d20,#50 + vsli.64 d25,d20,#46 + vmov d29,d20 + vsli.64 d26,d20,#23 +#if 16<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d21,d22 @ Ch(e,f,g) + vshr.u64 d24,d16,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d23 + vshr.u64 d25,d16,#34 + vsli.64 d24,d16,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d16,#39 + vadd.i64 d28,d0 + vsli.64 d25,d16,#30 + veor d30,d16,d17 + vsli.64 d26,d16,#25 + veor d23,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d18,d17 @ Maj(a,b,c) + veor d23,d26 @ Sigma0(a) + vadd.i64 d19,d27 + vadd.i64 d30,d27 + @ vadd.i64 d23,d30 + vshr.u64 d24,d19,#14 @ 17 +#if 17<16 + vld1.64 {d1},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d19,#18 +#if 17>0 + vadd.i64 d23,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d19,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d19,#50 + vsli.64 d25,d19,#46 + vmov d29,d19 + vsli.64 d26,d19,#23 +#if 17<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d20,d21 @ Ch(e,f,g) + vshr.u64 d24,d23,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d22 + vshr.u64 d25,d23,#34 + vsli.64 d24,d23,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d23,#39 + vadd.i64 d28,d1 + vsli.64 d25,d23,#30 + veor d30,d23,d16 + vsli.64 d26,d23,#25 + veor d22,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d17,d16 @ Maj(a,b,c) + veor d22,d26 @ Sigma0(a) + vadd.i64 d18,d27 + vadd.i64 d30,d27 + @ vadd.i64 d22,d30 + vshr.u64 q12,q0,#19 + vshr.u64 q13,q0,#61 + vadd.i64 d22,d30 @ h+=Maj from the past + vshr.u64 q15,q0,#6 + vsli.64 q12,q0,#45 + vext.8 q14,q1,q2,#8 @ X[i+1] + vsli.64 q13,q0,#3 + veor q15,q12 + vshr.u64 q12,q14,#1 + veor q15,q13 @ sigma1(X[i+14]) + vshr.u64 q13,q14,#8 + vadd.i64 q1,q15 + vshr.u64 q15,q14,#7 + vsli.64 q12,q14,#63 + vsli.64 q13,q14,#56 + vext.8 q14,q5,q6,#8 @ X[i+9] + veor q15,q12 + vshr.u64 d24,d18,#14 @ from NEON_00_15 + vadd.i64 q1,q14 + vshr.u64 d25,d18,#18 @ from NEON_00_15 + veor q15,q13 @ sigma0(X[i+1]) + vshr.u64 d26,d18,#41 @ from NEON_00_15 + vadd.i64 q1,q15 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d18,#50 + vsli.64 d25,d18,#46 + vmov d29,d18 + vsli.64 d26,d18,#23 +#if 18<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d19,d20 @ Ch(e,f,g) + vshr.u64 d24,d22,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d21 + vshr.u64 d25,d22,#34 + vsli.64 d24,d22,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d22,#39 + vadd.i64 d28,d2 + vsli.64 d25,d22,#30 + veor d30,d22,d23 + vsli.64 d26,d22,#25 + veor d21,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d16,d23 @ Maj(a,b,c) + veor d21,d26 @ Sigma0(a) + vadd.i64 d17,d27 + vadd.i64 d30,d27 + @ vadd.i64 d21,d30 + vshr.u64 d24,d17,#14 @ 19 +#if 19<16 + vld1.64 {d3},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d17,#18 +#if 19>0 + vadd.i64 d21,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d17,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d17,#50 + vsli.64 d25,d17,#46 + vmov d29,d17 + vsli.64 d26,d17,#23 +#if 19<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d18,d19 @ Ch(e,f,g) + vshr.u64 d24,d21,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d20 + vshr.u64 d25,d21,#34 + vsli.64 d24,d21,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d21,#39 + vadd.i64 d28,d3 + vsli.64 d25,d21,#30 + veor d30,d21,d22 + vsli.64 d26,d21,#25 + veor d20,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d23,d22 @ Maj(a,b,c) + veor d20,d26 @ Sigma0(a) + vadd.i64 d16,d27 + vadd.i64 d30,d27 + @ vadd.i64 d20,d30 + vshr.u64 q12,q1,#19 + vshr.u64 q13,q1,#61 + vadd.i64 d20,d30 @ h+=Maj from the past + vshr.u64 q15,q1,#6 + vsli.64 q12,q1,#45 + vext.8 q14,q2,q3,#8 @ X[i+1] + vsli.64 q13,q1,#3 + veor q15,q12 + vshr.u64 q12,q14,#1 + veor q15,q13 @ sigma1(X[i+14]) + vshr.u64 q13,q14,#8 + vadd.i64 q2,q15 + vshr.u64 q15,q14,#7 + vsli.64 q12,q14,#63 + vsli.64 q13,q14,#56 + vext.8 q14,q6,q7,#8 @ X[i+9] + veor q15,q12 + vshr.u64 d24,d16,#14 @ from NEON_00_15 + vadd.i64 q2,q14 + vshr.u64 d25,d16,#18 @ from NEON_00_15 + veor q15,q13 @ sigma0(X[i+1]) + vshr.u64 d26,d16,#41 @ from NEON_00_15 + vadd.i64 q2,q15 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d16,#50 + vsli.64 d25,d16,#46 + vmov d29,d16 + vsli.64 d26,d16,#23 +#if 20<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d17,d18 @ Ch(e,f,g) + vshr.u64 d24,d20,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d19 + vshr.u64 d25,d20,#34 + vsli.64 d24,d20,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d20,#39 + vadd.i64 d28,d4 + vsli.64 d25,d20,#30 + veor d30,d20,d21 + vsli.64 d26,d20,#25 + veor d19,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d22,d21 @ Maj(a,b,c) + veor d19,d26 @ Sigma0(a) + vadd.i64 d23,d27 + vadd.i64 d30,d27 + @ vadd.i64 d19,d30 + vshr.u64 d24,d23,#14 @ 21 +#if 21<16 + vld1.64 {d5},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d23,#18 +#if 21>0 + vadd.i64 d19,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d23,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d23,#50 + vsli.64 d25,d23,#46 + vmov d29,d23 + vsli.64 d26,d23,#23 +#if 21<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d16,d17 @ Ch(e,f,g) + vshr.u64 d24,d19,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d18 + vshr.u64 d25,d19,#34 + vsli.64 d24,d19,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d19,#39 + vadd.i64 d28,d5 + vsli.64 d25,d19,#30 + veor d30,d19,d20 + vsli.64 d26,d19,#25 + veor d18,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d21,d20 @ Maj(a,b,c) + veor d18,d26 @ Sigma0(a) + vadd.i64 d22,d27 + vadd.i64 d30,d27 + @ vadd.i64 d18,d30 + vshr.u64 q12,q2,#19 + vshr.u64 q13,q2,#61 + vadd.i64 d18,d30 @ h+=Maj from the past + vshr.u64 q15,q2,#6 + vsli.64 q12,q2,#45 + vext.8 q14,q3,q4,#8 @ X[i+1] + vsli.64 q13,q2,#3 + veor q15,q12 + vshr.u64 q12,q14,#1 + veor q15,q13 @ sigma1(X[i+14]) + vshr.u64 q13,q14,#8 + vadd.i64 q3,q15 + vshr.u64 q15,q14,#7 + vsli.64 q12,q14,#63 + vsli.64 q13,q14,#56 + vext.8 q14,q7,q0,#8 @ X[i+9] + veor q15,q12 + vshr.u64 d24,d22,#14 @ from NEON_00_15 + vadd.i64 q3,q14 + vshr.u64 d25,d22,#18 @ from NEON_00_15 + veor q15,q13 @ sigma0(X[i+1]) + vshr.u64 d26,d22,#41 @ from NEON_00_15 + vadd.i64 q3,q15 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d22,#50 + vsli.64 d25,d22,#46 + vmov d29,d22 + vsli.64 d26,d22,#23 +#if 22<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d23,d16 @ Ch(e,f,g) + vshr.u64 d24,d18,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d17 + vshr.u64 d25,d18,#34 + vsli.64 d24,d18,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d18,#39 + vadd.i64 d28,d6 + vsli.64 d25,d18,#30 + veor d30,d18,d19 + vsli.64 d26,d18,#25 + veor d17,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d20,d19 @ Maj(a,b,c) + veor d17,d26 @ Sigma0(a) + vadd.i64 d21,d27 + vadd.i64 d30,d27 + @ vadd.i64 d17,d30 + vshr.u64 d24,d21,#14 @ 23 +#if 23<16 + vld1.64 {d7},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d21,#18 +#if 23>0 + vadd.i64 d17,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d21,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d21,#50 + vsli.64 d25,d21,#46 + vmov d29,d21 + vsli.64 d26,d21,#23 +#if 23<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d22,d23 @ Ch(e,f,g) + vshr.u64 d24,d17,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d16 + vshr.u64 d25,d17,#34 + vsli.64 d24,d17,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d17,#39 + vadd.i64 d28,d7 + vsli.64 d25,d17,#30 + veor d30,d17,d18 + vsli.64 d26,d17,#25 + veor d16,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d19,d18 @ Maj(a,b,c) + veor d16,d26 @ Sigma0(a) + vadd.i64 d20,d27 + vadd.i64 d30,d27 + @ vadd.i64 d16,d30 + vshr.u64 q12,q3,#19 + vshr.u64 q13,q3,#61 + vadd.i64 d16,d30 @ h+=Maj from the past + vshr.u64 q15,q3,#6 + vsli.64 q12,q3,#45 + vext.8 q14,q4,q5,#8 @ X[i+1] + vsli.64 q13,q3,#3 + veor q15,q12 + vshr.u64 q12,q14,#1 + veor q15,q13 @ sigma1(X[i+14]) + vshr.u64 q13,q14,#8 + vadd.i64 q4,q15 + vshr.u64 q15,q14,#7 + vsli.64 q12,q14,#63 + vsli.64 q13,q14,#56 + vext.8 q14,q0,q1,#8 @ X[i+9] + veor q15,q12 + vshr.u64 d24,d20,#14 @ from NEON_00_15 + vadd.i64 q4,q14 + vshr.u64 d25,d20,#18 @ from NEON_00_15 + veor q15,q13 @ sigma0(X[i+1]) + vshr.u64 d26,d20,#41 @ from NEON_00_15 + vadd.i64 q4,q15 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d20,#50 + vsli.64 d25,d20,#46 + vmov d29,d20 + vsli.64 d26,d20,#23 +#if 24<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d21,d22 @ Ch(e,f,g) + vshr.u64 d24,d16,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d23 + vshr.u64 d25,d16,#34 + vsli.64 d24,d16,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d16,#39 + vadd.i64 d28,d8 + vsli.64 d25,d16,#30 + veor d30,d16,d17 + vsli.64 d26,d16,#25 + veor d23,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d18,d17 @ Maj(a,b,c) + veor d23,d26 @ Sigma0(a) + vadd.i64 d19,d27 + vadd.i64 d30,d27 + @ vadd.i64 d23,d30 + vshr.u64 d24,d19,#14 @ 25 +#if 25<16 + vld1.64 {d9},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d19,#18 +#if 25>0 + vadd.i64 d23,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d19,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d19,#50 + vsli.64 d25,d19,#46 + vmov d29,d19 + vsli.64 d26,d19,#23 +#if 25<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d20,d21 @ Ch(e,f,g) + vshr.u64 d24,d23,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d22 + vshr.u64 d25,d23,#34 + vsli.64 d24,d23,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d23,#39 + vadd.i64 d28,d9 + vsli.64 d25,d23,#30 + veor d30,d23,d16 + vsli.64 d26,d23,#25 + veor d22,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d17,d16 @ Maj(a,b,c) + veor d22,d26 @ Sigma0(a) + vadd.i64 d18,d27 + vadd.i64 d30,d27 + @ vadd.i64 d22,d30 + vshr.u64 q12,q4,#19 + vshr.u64 q13,q4,#61 + vadd.i64 d22,d30 @ h+=Maj from the past + vshr.u64 q15,q4,#6 + vsli.64 q12,q4,#45 + vext.8 q14,q5,q6,#8 @ X[i+1] + vsli.64 q13,q4,#3 + veor q15,q12 + vshr.u64 q12,q14,#1 + veor q15,q13 @ sigma1(X[i+14]) + vshr.u64 q13,q14,#8 + vadd.i64 q5,q15 + vshr.u64 q15,q14,#7 + vsli.64 q12,q14,#63 + vsli.64 q13,q14,#56 + vext.8 q14,q1,q2,#8 @ X[i+9] + veor q15,q12 + vshr.u64 d24,d18,#14 @ from NEON_00_15 + vadd.i64 q5,q14 + vshr.u64 d25,d18,#18 @ from NEON_00_15 + veor q15,q13 @ sigma0(X[i+1]) + vshr.u64 d26,d18,#41 @ from NEON_00_15 + vadd.i64 q5,q15 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d18,#50 + vsli.64 d25,d18,#46 + vmov d29,d18 + vsli.64 d26,d18,#23 +#if 26<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d19,d20 @ Ch(e,f,g) + vshr.u64 d24,d22,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d21 + vshr.u64 d25,d22,#34 + vsli.64 d24,d22,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d22,#39 + vadd.i64 d28,d10 + vsli.64 d25,d22,#30 + veor d30,d22,d23 + vsli.64 d26,d22,#25 + veor d21,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d16,d23 @ Maj(a,b,c) + veor d21,d26 @ Sigma0(a) + vadd.i64 d17,d27 + vadd.i64 d30,d27 + @ vadd.i64 d21,d30 + vshr.u64 d24,d17,#14 @ 27 +#if 27<16 + vld1.64 {d11},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d17,#18 +#if 27>0 + vadd.i64 d21,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d17,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d17,#50 + vsli.64 d25,d17,#46 + vmov d29,d17 + vsli.64 d26,d17,#23 +#if 27<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d18,d19 @ Ch(e,f,g) + vshr.u64 d24,d21,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d20 + vshr.u64 d25,d21,#34 + vsli.64 d24,d21,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d21,#39 + vadd.i64 d28,d11 + vsli.64 d25,d21,#30 + veor d30,d21,d22 + vsli.64 d26,d21,#25 + veor d20,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d23,d22 @ Maj(a,b,c) + veor d20,d26 @ Sigma0(a) + vadd.i64 d16,d27 + vadd.i64 d30,d27 + @ vadd.i64 d20,d30 + vshr.u64 q12,q5,#19 + vshr.u64 q13,q5,#61 + vadd.i64 d20,d30 @ h+=Maj from the past + vshr.u64 q15,q5,#6 + vsli.64 q12,q5,#45 + vext.8 q14,q6,q7,#8 @ X[i+1] + vsli.64 q13,q5,#3 + veor q15,q12 + vshr.u64 q12,q14,#1 + veor q15,q13 @ sigma1(X[i+14]) + vshr.u64 q13,q14,#8 + vadd.i64 q6,q15 + vshr.u64 q15,q14,#7 + vsli.64 q12,q14,#63 + vsli.64 q13,q14,#56 + vext.8 q14,q2,q3,#8 @ X[i+9] + veor q15,q12 + vshr.u64 d24,d16,#14 @ from NEON_00_15 + vadd.i64 q6,q14 + vshr.u64 d25,d16,#18 @ from NEON_00_15 + veor q15,q13 @ sigma0(X[i+1]) + vshr.u64 d26,d16,#41 @ from NEON_00_15 + vadd.i64 q6,q15 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d16,#50 + vsli.64 d25,d16,#46 + vmov d29,d16 + vsli.64 d26,d16,#23 +#if 28<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d17,d18 @ Ch(e,f,g) + vshr.u64 d24,d20,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d19 + vshr.u64 d25,d20,#34 + vsli.64 d24,d20,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d20,#39 + vadd.i64 d28,d12 + vsli.64 d25,d20,#30 + veor d30,d20,d21 + vsli.64 d26,d20,#25 + veor d19,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d22,d21 @ Maj(a,b,c) + veor d19,d26 @ Sigma0(a) + vadd.i64 d23,d27 + vadd.i64 d30,d27 + @ vadd.i64 d19,d30 + vshr.u64 d24,d23,#14 @ 29 +#if 29<16 + vld1.64 {d13},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d23,#18 +#if 29>0 + vadd.i64 d19,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d23,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d23,#50 + vsli.64 d25,d23,#46 + vmov d29,d23 + vsli.64 d26,d23,#23 +#if 29<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d16,d17 @ Ch(e,f,g) + vshr.u64 d24,d19,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d18 + vshr.u64 d25,d19,#34 + vsli.64 d24,d19,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d19,#39 + vadd.i64 d28,d13 + vsli.64 d25,d19,#30 + veor d30,d19,d20 + vsli.64 d26,d19,#25 + veor d18,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d21,d20 @ Maj(a,b,c) + veor d18,d26 @ Sigma0(a) + vadd.i64 d22,d27 + vadd.i64 d30,d27 + @ vadd.i64 d18,d30 + vshr.u64 q12,q6,#19 + vshr.u64 q13,q6,#61 + vadd.i64 d18,d30 @ h+=Maj from the past + vshr.u64 q15,q6,#6 + vsli.64 q12,q6,#45 + vext.8 q14,q7,q0,#8 @ X[i+1] + vsli.64 q13,q6,#3 + veor q15,q12 + vshr.u64 q12,q14,#1 + veor q15,q13 @ sigma1(X[i+14]) + vshr.u64 q13,q14,#8 + vadd.i64 q7,q15 + vshr.u64 q15,q14,#7 + vsli.64 q12,q14,#63 + vsli.64 q13,q14,#56 + vext.8 q14,q3,q4,#8 @ X[i+9] + veor q15,q12 + vshr.u64 d24,d22,#14 @ from NEON_00_15 + vadd.i64 q7,q14 + vshr.u64 d25,d22,#18 @ from NEON_00_15 + veor q15,q13 @ sigma0(X[i+1]) + vshr.u64 d26,d22,#41 @ from NEON_00_15 + vadd.i64 q7,q15 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d22,#50 + vsli.64 d25,d22,#46 + vmov d29,d22 + vsli.64 d26,d22,#23 +#if 30<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d23,d16 @ Ch(e,f,g) + vshr.u64 d24,d18,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d17 + vshr.u64 d25,d18,#34 + vsli.64 d24,d18,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d18,#39 + vadd.i64 d28,d14 + vsli.64 d25,d18,#30 + veor d30,d18,d19 + vsli.64 d26,d18,#25 + veor d17,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d20,d19 @ Maj(a,b,c) + veor d17,d26 @ Sigma0(a) + vadd.i64 d21,d27 + vadd.i64 d30,d27 + @ vadd.i64 d17,d30 + vshr.u64 d24,d21,#14 @ 31 +#if 31<16 + vld1.64 {d15},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d21,#18 +#if 31>0 + vadd.i64 d17,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d21,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d21,#50 + vsli.64 d25,d21,#46 + vmov d29,d21 + vsli.64 d26,d21,#23 +#if 31<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d22,d23 @ Ch(e,f,g) + vshr.u64 d24,d17,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d16 + vshr.u64 d25,d17,#34 + vsli.64 d24,d17,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d17,#39 + vadd.i64 d28,d15 + vsli.64 d25,d17,#30 + veor d30,d17,d18 + vsli.64 d26,d17,#25 + veor d16,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d19,d18 @ Maj(a,b,c) + veor d16,d26 @ Sigma0(a) + vadd.i64 d20,d27 + vadd.i64 d30,d27 + @ vadd.i64 d16,d30 + bne .L16_79_neon + + vadd.i64 d16,d30 @ h+=Maj from the past + vldmia r0,{d24,d25,d26,d27,d28,d29,d30,d31} @ load context to temp + vadd.i64 q8,q12 @ vectorized accumulate + vadd.i64 q9,q13 + vadd.i64 q10,q14 + vadd.i64 q11,q15 + vstmia r0,{d16,d17,d18,d19,d20,d21,d22,d23} @ save context + teq r1,r2 + sub r3,#640 @ rewind K512 + bne .Loop_neon + + VFP_ABI_POP + bx lr @ .word 0xe12fff1e +.size sha512_block_data_order_neon,.-sha512_block_data_order_neon +#endif +.byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 2 +.align 2 +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) +.comm OPENSSL_armcap_P,4,4 +.hidden OPENSSL_armcap_P +#endif +#endif \ No newline at end of file diff --git a/third_party/boringssl/linux-x86/crypto/aes/aes-586.S b/third_party/boringssl/linux-x86/crypto/aes/aes-586.S new file mode 100644 index 00000000000..74282748ce0 --- /dev/null +++ b/third_party/boringssl/linux-x86/crypto/aes/aes-586.S @@ -0,0 +1,3257 @@ +#if defined(__i386__) +.file "aes-586.S" +.text +.hidden _x86_AES_encrypt_compact +.type _x86_AES_encrypt_compact,@function +.align 16 +_x86_AES_encrypt_compact: + movl %edi,20(%esp) + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + movl 240(%edi),%esi + leal -2(%esi,%esi,1),%esi + leal (%edi,%esi,8),%esi + movl %esi,24(%esp) + movl -128(%ebp),%edi + movl -96(%ebp),%esi + movl -64(%ebp),%edi + movl -32(%ebp),%esi + movl (%ebp),%edi + movl 32(%ebp),%esi + movl 64(%ebp),%edi + movl 96(%ebp),%esi +.align 16 +.L000loop: + movl %eax,%esi + andl $255,%esi + movzbl -128(%ebp,%esi,1),%esi + movzbl %bh,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %ecx,%edi + shrl $16,%edi + andl $255,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %edx,%edi + shrl $24,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl %esi,4(%esp) + + movl %ebx,%esi + andl $255,%esi + shrl $16,%ebx + movzbl -128(%ebp,%esi,1),%esi + movzbl %ch,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %edx,%edi + shrl $16,%edi + andl $255,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %eax,%edi + shrl $24,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl %esi,8(%esp) + + movl %ecx,%esi + andl $255,%esi + shrl $24,%ecx + movzbl -128(%ebp,%esi,1),%esi + movzbl %dh,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %eax,%edi + shrl $16,%edi + andl $255,%edx + andl $255,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movzbl %bh,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + + andl $255,%edx + movzbl -128(%ebp,%edx,1),%edx + movzbl %ah,%eax + movzbl -128(%ebp,%eax,1),%eax + shll $8,%eax + xorl %eax,%edx + movl 4(%esp),%eax + andl $255,%ebx + movzbl -128(%ebp,%ebx,1),%ebx + shll $16,%ebx + xorl %ebx,%edx + movl 8(%esp),%ebx + movzbl -128(%ebp,%ecx,1),%ecx + shll $24,%ecx + xorl %ecx,%edx + movl %esi,%ecx + + movl $2155905152,%ebp + andl %ecx,%ebp + leal (%ecx,%ecx,1),%edi + movl %ebp,%esi + shrl $7,%ebp + andl $4278124286,%edi + subl %ebp,%esi + movl %ecx,%ebp + andl $454761243,%esi + rorl $16,%ebp + xorl %edi,%esi + movl %ecx,%edi + xorl %esi,%ecx + rorl $24,%edi + xorl %ebp,%esi + roll $24,%ecx + xorl %edi,%esi + movl $2155905152,%ebp + xorl %esi,%ecx + andl %edx,%ebp + leal (%edx,%edx,1),%edi + movl %ebp,%esi + shrl $7,%ebp + andl $4278124286,%edi + subl %ebp,%esi + movl %edx,%ebp + andl $454761243,%esi + rorl $16,%ebp + xorl %edi,%esi + movl %edx,%edi + xorl %esi,%edx + rorl $24,%edi + xorl %ebp,%esi + roll $24,%edx + xorl %edi,%esi + movl $2155905152,%ebp + xorl %esi,%edx + andl %eax,%ebp + leal (%eax,%eax,1),%edi + movl %ebp,%esi + shrl $7,%ebp + andl $4278124286,%edi + subl %ebp,%esi + movl %eax,%ebp + andl $454761243,%esi + rorl $16,%ebp + xorl %edi,%esi + movl %eax,%edi + xorl %esi,%eax + rorl $24,%edi + xorl %ebp,%esi + roll $24,%eax + xorl %edi,%esi + movl $2155905152,%ebp + xorl %esi,%eax + andl %ebx,%ebp + leal (%ebx,%ebx,1),%edi + movl %ebp,%esi + shrl $7,%ebp + andl $4278124286,%edi + subl %ebp,%esi + movl %ebx,%ebp + andl $454761243,%esi + rorl $16,%ebp + xorl %edi,%esi + movl %ebx,%edi + xorl %esi,%ebx + rorl $24,%edi + xorl %ebp,%esi + roll $24,%ebx + xorl %edi,%esi + xorl %esi,%ebx + movl 20(%esp),%edi + movl 28(%esp),%ebp + addl $16,%edi + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + cmpl 24(%esp),%edi + movl %edi,20(%esp) + jb .L000loop + movl %eax,%esi + andl $255,%esi + movzbl -128(%ebp,%esi,1),%esi + movzbl %bh,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %ecx,%edi + shrl $16,%edi + andl $255,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %edx,%edi + shrl $24,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl %esi,4(%esp) + + movl %ebx,%esi + andl $255,%esi + shrl $16,%ebx + movzbl -128(%ebp,%esi,1),%esi + movzbl %ch,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %edx,%edi + shrl $16,%edi + andl $255,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %eax,%edi + shrl $24,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl %esi,8(%esp) + + movl %ecx,%esi + andl $255,%esi + shrl $24,%ecx + movzbl -128(%ebp,%esi,1),%esi + movzbl %dh,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %eax,%edi + shrl $16,%edi + andl $255,%edx + andl $255,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movzbl %bh,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + + movl 20(%esp),%edi + andl $255,%edx + movzbl -128(%ebp,%edx,1),%edx + movzbl %ah,%eax + movzbl -128(%ebp,%eax,1),%eax + shll $8,%eax + xorl %eax,%edx + movl 4(%esp),%eax + andl $255,%ebx + movzbl -128(%ebp,%ebx,1),%ebx + shll $16,%ebx + xorl %ebx,%edx + movl 8(%esp),%ebx + movzbl -128(%ebp,%ecx,1),%ecx + shll $24,%ecx + xorl %ecx,%edx + movl %esi,%ecx + + xorl 16(%edi),%eax + xorl 20(%edi),%ebx + xorl 24(%edi),%ecx + xorl 28(%edi),%edx + ret +.size _x86_AES_encrypt_compact,.-_x86_AES_encrypt_compact +.hidden _sse_AES_encrypt_compact +.type _sse_AES_encrypt_compact,@function +.align 16 +_sse_AES_encrypt_compact: + pxor (%edi),%mm0 + pxor 8(%edi),%mm4 + movl 240(%edi),%esi + leal -2(%esi,%esi,1),%esi + leal (%edi,%esi,8),%esi + movl %esi,24(%esp) + movl $454761243,%eax + movl %eax,8(%esp) + movl %eax,12(%esp) + movl -128(%ebp),%eax + movl -96(%ebp),%ebx + movl -64(%ebp),%ecx + movl -32(%ebp),%edx + movl (%ebp),%eax + movl 32(%ebp),%ebx + movl 64(%ebp),%ecx + movl 96(%ebp),%edx +.align 16 +.L001loop: + pshufw $8,%mm0,%mm1 + pshufw $13,%mm4,%mm5 + movd %mm1,%eax + movd %mm5,%ebx + movl %edi,20(%esp) + movzbl %al,%esi + movzbl %ah,%edx + pshufw $13,%mm0,%mm2 + movzbl -128(%ebp,%esi,1),%ecx + movzbl %bl,%edi + movzbl -128(%ebp,%edx,1),%edx + shrl $16,%eax + shll $8,%edx + movzbl -128(%ebp,%edi,1),%esi + movzbl %bh,%edi + shll $16,%esi + pshufw $8,%mm4,%mm6 + orl %esi,%ecx + movzbl -128(%ebp,%edi,1),%esi + movzbl %ah,%edi + shll $24,%esi + shrl $16,%ebx + orl %esi,%edx + movzbl -128(%ebp,%edi,1),%esi + movzbl %bh,%edi + shll $8,%esi + orl %esi,%ecx + movzbl -128(%ebp,%edi,1),%esi + movzbl %al,%edi + shll $24,%esi + orl %esi,%ecx + movzbl -128(%ebp,%edi,1),%esi + movzbl %bl,%edi + movd %mm2,%eax + movd %ecx,%mm0 + movzbl -128(%ebp,%edi,1),%ecx + movzbl %ah,%edi + shll $16,%ecx + movd %mm6,%ebx + orl %esi,%ecx + movzbl -128(%ebp,%edi,1),%esi + movzbl %bh,%edi + shll $24,%esi + orl %esi,%ecx + movzbl -128(%ebp,%edi,1),%esi + movzbl %bl,%edi + shll $8,%esi + shrl $16,%ebx + orl %esi,%ecx + movzbl -128(%ebp,%edi,1),%esi + movzbl %al,%edi + shrl $16,%eax + movd %ecx,%mm1 + movzbl -128(%ebp,%edi,1),%ecx + movzbl %ah,%edi + shll $16,%ecx + andl $255,%eax + orl %esi,%ecx + punpckldq %mm1,%mm0 + movzbl -128(%ebp,%edi,1),%esi + movzbl %bh,%edi + shll $24,%esi + andl $255,%ebx + movzbl -128(%ebp,%eax,1),%eax + orl %esi,%ecx + shll $16,%eax + movzbl -128(%ebp,%edi,1),%esi + orl %eax,%edx + shll $8,%esi + movzbl -128(%ebp,%ebx,1),%ebx + orl %esi,%ecx + orl %ebx,%edx + movl 20(%esp),%edi + movd %ecx,%mm4 + movd %edx,%mm5 + punpckldq %mm5,%mm4 + addl $16,%edi + cmpl 24(%esp),%edi + ja .L002out + movq 8(%esp),%mm2 + pxor %mm3,%mm3 + pxor %mm7,%mm7 + movq %mm0,%mm1 + movq %mm4,%mm5 + pcmpgtb %mm0,%mm3 + pcmpgtb %mm4,%mm7 + pand %mm2,%mm3 + pand %mm2,%mm7 + pshufw $177,%mm0,%mm2 + pshufw $177,%mm4,%mm6 + paddb %mm0,%mm0 + paddb %mm4,%mm4 + pxor %mm3,%mm0 + pxor %mm7,%mm4 + pshufw $177,%mm2,%mm3 + pshufw $177,%mm6,%mm7 + pxor %mm0,%mm1 + pxor %mm4,%mm5 + pxor %mm2,%mm0 + pxor %mm6,%mm4 + movq %mm3,%mm2 + movq %mm7,%mm6 + pslld $8,%mm3 + pslld $8,%mm7 + psrld $24,%mm2 + psrld $24,%mm6 + pxor %mm3,%mm0 + pxor %mm7,%mm4 + pxor %mm2,%mm0 + pxor %mm6,%mm4 + movq %mm1,%mm3 + movq %mm5,%mm7 + movq (%edi),%mm2 + movq 8(%edi),%mm6 + psrld $8,%mm1 + psrld $8,%mm5 + movl -128(%ebp),%eax + pslld $24,%mm3 + pslld $24,%mm7 + movl -64(%ebp),%ebx + pxor %mm1,%mm0 + pxor %mm5,%mm4 + movl (%ebp),%ecx + pxor %mm3,%mm0 + pxor %mm7,%mm4 + movl 64(%ebp),%edx + pxor %mm2,%mm0 + pxor %mm6,%mm4 + jmp .L001loop +.align 16 +.L002out: + pxor (%edi),%mm0 + pxor 8(%edi),%mm4 + ret +.size _sse_AES_encrypt_compact,.-_sse_AES_encrypt_compact +.hidden _x86_AES_encrypt +.type _x86_AES_encrypt,@function +.align 16 +_x86_AES_encrypt: + movl %edi,20(%esp) + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + movl 240(%edi),%esi + leal -2(%esi,%esi,1),%esi + leal (%edi,%esi,8),%esi + movl %esi,24(%esp) +.align 16 +.L003loop: + movl %eax,%esi + andl $255,%esi + movl (%ebp,%esi,8),%esi + movzbl %bh,%edi + xorl 3(%ebp,%edi,8),%esi + movl %ecx,%edi + shrl $16,%edi + andl $255,%edi + xorl 2(%ebp,%edi,8),%esi + movl %edx,%edi + shrl $24,%edi + xorl 1(%ebp,%edi,8),%esi + movl %esi,4(%esp) + + movl %ebx,%esi + andl $255,%esi + shrl $16,%ebx + movl (%ebp,%esi,8),%esi + movzbl %ch,%edi + xorl 3(%ebp,%edi,8),%esi + movl %edx,%edi + shrl $16,%edi + andl $255,%edi + xorl 2(%ebp,%edi,8),%esi + movl %eax,%edi + shrl $24,%edi + xorl 1(%ebp,%edi,8),%esi + movl %esi,8(%esp) + + movl %ecx,%esi + andl $255,%esi + shrl $24,%ecx + movl (%ebp,%esi,8),%esi + movzbl %dh,%edi + xorl 3(%ebp,%edi,8),%esi + movl %eax,%edi + shrl $16,%edi + andl $255,%edx + andl $255,%edi + xorl 2(%ebp,%edi,8),%esi + movzbl %bh,%edi + xorl 1(%ebp,%edi,8),%esi + + movl 20(%esp),%edi + movl (%ebp,%edx,8),%edx + movzbl %ah,%eax + xorl 3(%ebp,%eax,8),%edx + movl 4(%esp),%eax + andl $255,%ebx + xorl 2(%ebp,%ebx,8),%edx + movl 8(%esp),%ebx + xorl 1(%ebp,%ecx,8),%edx + movl %esi,%ecx + + addl $16,%edi + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + cmpl 24(%esp),%edi + movl %edi,20(%esp) + jb .L003loop + movl %eax,%esi + andl $255,%esi + movl 2(%ebp,%esi,8),%esi + andl $255,%esi + movzbl %bh,%edi + movl (%ebp,%edi,8),%edi + andl $65280,%edi + xorl %edi,%esi + movl %ecx,%edi + shrl $16,%edi + andl $255,%edi + movl (%ebp,%edi,8),%edi + andl $16711680,%edi + xorl %edi,%esi + movl %edx,%edi + shrl $24,%edi + movl 2(%ebp,%edi,8),%edi + andl $4278190080,%edi + xorl %edi,%esi + movl %esi,4(%esp) + movl %ebx,%esi + andl $255,%esi + shrl $16,%ebx + movl 2(%ebp,%esi,8),%esi + andl $255,%esi + movzbl %ch,%edi + movl (%ebp,%edi,8),%edi + andl $65280,%edi + xorl %edi,%esi + movl %edx,%edi + shrl $16,%edi + andl $255,%edi + movl (%ebp,%edi,8),%edi + andl $16711680,%edi + xorl %edi,%esi + movl %eax,%edi + shrl $24,%edi + movl 2(%ebp,%edi,8),%edi + andl $4278190080,%edi + xorl %edi,%esi + movl %esi,8(%esp) + movl %ecx,%esi + andl $255,%esi + shrl $24,%ecx + movl 2(%ebp,%esi,8),%esi + andl $255,%esi + movzbl %dh,%edi + movl (%ebp,%edi,8),%edi + andl $65280,%edi + xorl %edi,%esi + movl %eax,%edi + shrl $16,%edi + andl $255,%edx + andl $255,%edi + movl (%ebp,%edi,8),%edi + andl $16711680,%edi + xorl %edi,%esi + movzbl %bh,%edi + movl 2(%ebp,%edi,8),%edi + andl $4278190080,%edi + xorl %edi,%esi + movl 20(%esp),%edi + andl $255,%edx + movl 2(%ebp,%edx,8),%edx + andl $255,%edx + movzbl %ah,%eax + movl (%ebp,%eax,8),%eax + andl $65280,%eax + xorl %eax,%edx + movl 4(%esp),%eax + andl $255,%ebx + movl (%ebp,%ebx,8),%ebx + andl $16711680,%ebx + xorl %ebx,%edx + movl 8(%esp),%ebx + movl 2(%ebp,%ecx,8),%ecx + andl $4278190080,%ecx + xorl %ecx,%edx + movl %esi,%ecx + addl $16,%edi + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + ret +.align 64 +.LAES_Te: +.long 2774754246,2774754246 +.long 2222750968,2222750968 +.long 2574743534,2574743534 +.long 2373680118,2373680118 +.long 234025727,234025727 +.long 3177933782,3177933782 +.long 2976870366,2976870366 +.long 1422247313,1422247313 +.long 1345335392,1345335392 +.long 50397442,50397442 +.long 2842126286,2842126286 +.long 2099981142,2099981142 +.long 436141799,436141799 +.long 1658312629,1658312629 +.long 3870010189,3870010189 +.long 2591454956,2591454956 +.long 1170918031,1170918031 +.long 2642575903,2642575903 +.long 1086966153,1086966153 +.long 2273148410,2273148410 +.long 368769775,368769775 +.long 3948501426,3948501426 +.long 3376891790,3376891790 +.long 200339707,200339707 +.long 3970805057,3970805057 +.long 1742001331,1742001331 +.long 4255294047,4255294047 +.long 3937382213,3937382213 +.long 3214711843,3214711843 +.long 4154762323,4154762323 +.long 2524082916,2524082916 +.long 1539358875,1539358875 +.long 3266819957,3266819957 +.long 486407649,486407649 +.long 2928907069,2928907069 +.long 1780885068,1780885068 +.long 1513502316,1513502316 +.long 1094664062,1094664062 +.long 49805301,49805301 +.long 1338821763,1338821763 +.long 1546925160,1546925160 +.long 4104496465,4104496465 +.long 887481809,887481809 +.long 150073849,150073849 +.long 2473685474,2473685474 +.long 1943591083,1943591083 +.long 1395732834,1395732834 +.long 1058346282,1058346282 +.long 201589768,201589768 +.long 1388824469,1388824469 +.long 1696801606,1696801606 +.long 1589887901,1589887901 +.long 672667696,672667696 +.long 2711000631,2711000631 +.long 251987210,251987210 +.long 3046808111,3046808111 +.long 151455502,151455502 +.long 907153956,907153956 +.long 2608889883,2608889883 +.long 1038279391,1038279391 +.long 652995533,652995533 +.long 1764173646,1764173646 +.long 3451040383,3451040383 +.long 2675275242,2675275242 +.long 453576978,453576978 +.long 2659418909,2659418909 +.long 1949051992,1949051992 +.long 773462580,773462580 +.long 756751158,756751158 +.long 2993581788,2993581788 +.long 3998898868,3998898868 +.long 4221608027,4221608027 +.long 4132590244,4132590244 +.long 1295727478,1295727478 +.long 1641469623,1641469623 +.long 3467883389,3467883389 +.long 2066295122,2066295122 +.long 1055122397,1055122397 +.long 1898917726,1898917726 +.long 2542044179,2542044179 +.long 4115878822,4115878822 +.long 1758581177,1758581177 +.long 0,0 +.long 753790401,753790401 +.long 1612718144,1612718144 +.long 536673507,536673507 +.long 3367088505,3367088505 +.long 3982187446,3982187446 +.long 3194645204,3194645204 +.long 1187761037,1187761037 +.long 3653156455,3653156455 +.long 1262041458,1262041458 +.long 3729410708,3729410708 +.long 3561770136,3561770136 +.long 3898103984,3898103984 +.long 1255133061,1255133061 +.long 1808847035,1808847035 +.long 720367557,720367557 +.long 3853167183,3853167183 +.long 385612781,385612781 +.long 3309519750,3309519750 +.long 3612167578,3612167578 +.long 1429418854,1429418854 +.long 2491778321,2491778321 +.long 3477423498,3477423498 +.long 284817897,284817897 +.long 100794884,100794884 +.long 2172616702,2172616702 +.long 4031795360,4031795360 +.long 1144798328,1144798328 +.long 3131023141,3131023141 +.long 3819481163,3819481163 +.long 4082192802,4082192802 +.long 4272137053,4272137053 +.long 3225436288,3225436288 +.long 2324664069,2324664069 +.long 2912064063,2912064063 +.long 3164445985,3164445985 +.long 1211644016,1211644016 +.long 83228145,83228145 +.long 3753688163,3753688163 +.long 3249976951,3249976951 +.long 1977277103,1977277103 +.long 1663115586,1663115586 +.long 806359072,806359072 +.long 452984805,452984805 +.long 250868733,250868733 +.long 1842533055,1842533055 +.long 1288555905,1288555905 +.long 336333848,336333848 +.long 890442534,890442534 +.long 804056259,804056259 +.long 3781124030,3781124030 +.long 2727843637,2727843637 +.long 3427026056,3427026056 +.long 957814574,957814574 +.long 1472513171,1472513171 +.long 4071073621,4071073621 +.long 2189328124,2189328124 +.long 1195195770,1195195770 +.long 2892260552,2892260552 +.long 3881655738,3881655738 +.long 723065138,723065138 +.long 2507371494,2507371494 +.long 2690670784,2690670784 +.long 2558624025,2558624025 +.long 3511635870,3511635870 +.long 2145180835,2145180835 +.long 1713513028,1713513028 +.long 2116692564,2116692564 +.long 2878378043,2878378043 +.long 2206763019,2206763019 +.long 3393603212,3393603212 +.long 703524551,703524551 +.long 3552098411,3552098411 +.long 1007948840,1007948840 +.long 2044649127,2044649127 +.long 3797835452,3797835452 +.long 487262998,487262998 +.long 1994120109,1994120109 +.long 1004593371,1004593371 +.long 1446130276,1446130276 +.long 1312438900,1312438900 +.long 503974420,503974420 +.long 3679013266,3679013266 +.long 168166924,168166924 +.long 1814307912,1814307912 +.long 3831258296,3831258296 +.long 1573044895,1573044895 +.long 1859376061,1859376061 +.long 4021070915,4021070915 +.long 2791465668,2791465668 +.long 2828112185,2828112185 +.long 2761266481,2761266481 +.long 937747667,937747667 +.long 2339994098,2339994098 +.long 854058965,854058965 +.long 1137232011,1137232011 +.long 1496790894,1496790894 +.long 3077402074,3077402074 +.long 2358086913,2358086913 +.long 1691735473,1691735473 +.long 3528347292,3528347292 +.long 3769215305,3769215305 +.long 3027004632,3027004632 +.long 4199962284,4199962284 +.long 133494003,133494003 +.long 636152527,636152527 +.long 2942657994,2942657994 +.long 2390391540,2390391540 +.long 3920539207,3920539207 +.long 403179536,403179536 +.long 3585784431,3585784431 +.long 2289596656,2289596656 +.long 1864705354,1864705354 +.long 1915629148,1915629148 +.long 605822008,605822008 +.long 4054230615,4054230615 +.long 3350508659,3350508659 +.long 1371981463,1371981463 +.long 602466507,602466507 +.long 2094914977,2094914977 +.long 2624877800,2624877800 +.long 555687742,555687742 +.long 3712699286,3712699286 +.long 3703422305,3703422305 +.long 2257292045,2257292045 +.long 2240449039,2240449039 +.long 2423288032,2423288032 +.long 1111375484,1111375484 +.long 3300242801,3300242801 +.long 2858837708,2858837708 +.long 3628615824,3628615824 +.long 84083462,84083462 +.long 32962295,32962295 +.long 302911004,302911004 +.long 2741068226,2741068226 +.long 1597322602,1597322602 +.long 4183250862,4183250862 +.long 3501832553,3501832553 +.long 2441512471,2441512471 +.long 1489093017,1489093017 +.long 656219450,656219450 +.long 3114180135,3114180135 +.long 954327513,954327513 +.long 335083755,335083755 +.long 3013122091,3013122091 +.long 856756514,856756514 +.long 3144247762,3144247762 +.long 1893325225,1893325225 +.long 2307821063,2307821063 +.long 2811532339,2811532339 +.long 3063651117,3063651117 +.long 572399164,572399164 +.long 2458355477,2458355477 +.long 552200649,552200649 +.long 1238290055,1238290055 +.long 4283782570,4283782570 +.long 2015897680,2015897680 +.long 2061492133,2061492133 +.long 2408352771,2408352771 +.long 4171342169,4171342169 +.long 2156497161,2156497161 +.long 386731290,386731290 +.long 3669999461,3669999461 +.long 837215959,837215959 +.long 3326231172,3326231172 +.long 3093850320,3093850320 +.long 3275833730,3275833730 +.long 2962856233,2962856233 +.long 1999449434,1999449434 +.long 286199582,286199582 +.long 3417354363,3417354363 +.long 4233385128,4233385128 +.long 3602627437,3602627437 +.long 974525996,974525996 +.byte 99,124,119,123,242,107,111,197 +.byte 48,1,103,43,254,215,171,118 +.byte 202,130,201,125,250,89,71,240 +.byte 173,212,162,175,156,164,114,192 +.byte 183,253,147,38,54,63,247,204 +.byte 52,165,229,241,113,216,49,21 +.byte 4,199,35,195,24,150,5,154 +.byte 7,18,128,226,235,39,178,117 +.byte 9,131,44,26,27,110,90,160 +.byte 82,59,214,179,41,227,47,132 +.byte 83,209,0,237,32,252,177,91 +.byte 106,203,190,57,74,76,88,207 +.byte 208,239,170,251,67,77,51,133 +.byte 69,249,2,127,80,60,159,168 +.byte 81,163,64,143,146,157,56,245 +.byte 188,182,218,33,16,255,243,210 +.byte 205,12,19,236,95,151,68,23 +.byte 196,167,126,61,100,93,25,115 +.byte 96,129,79,220,34,42,144,136 +.byte 70,238,184,20,222,94,11,219 +.byte 224,50,58,10,73,6,36,92 +.byte 194,211,172,98,145,149,228,121 +.byte 231,200,55,109,141,213,78,169 +.byte 108,86,244,234,101,122,174,8 +.byte 186,120,37,46,28,166,180,198 +.byte 232,221,116,31,75,189,139,138 +.byte 112,62,181,102,72,3,246,14 +.byte 97,53,87,185,134,193,29,158 +.byte 225,248,152,17,105,217,142,148 +.byte 155,30,135,233,206,85,40,223 +.byte 140,161,137,13,191,230,66,104 +.byte 65,153,45,15,176,84,187,22 +.byte 99,124,119,123,242,107,111,197 +.byte 48,1,103,43,254,215,171,118 +.byte 202,130,201,125,250,89,71,240 +.byte 173,212,162,175,156,164,114,192 +.byte 183,253,147,38,54,63,247,204 +.byte 52,165,229,241,113,216,49,21 +.byte 4,199,35,195,24,150,5,154 +.byte 7,18,128,226,235,39,178,117 +.byte 9,131,44,26,27,110,90,160 +.byte 82,59,214,179,41,227,47,132 +.byte 83,209,0,237,32,252,177,91 +.byte 106,203,190,57,74,76,88,207 +.byte 208,239,170,251,67,77,51,133 +.byte 69,249,2,127,80,60,159,168 +.byte 81,163,64,143,146,157,56,245 +.byte 188,182,218,33,16,255,243,210 +.byte 205,12,19,236,95,151,68,23 +.byte 196,167,126,61,100,93,25,115 +.byte 96,129,79,220,34,42,144,136 +.byte 70,238,184,20,222,94,11,219 +.byte 224,50,58,10,73,6,36,92 +.byte 194,211,172,98,145,149,228,121 +.byte 231,200,55,109,141,213,78,169 +.byte 108,86,244,234,101,122,174,8 +.byte 186,120,37,46,28,166,180,198 +.byte 232,221,116,31,75,189,139,138 +.byte 112,62,181,102,72,3,246,14 +.byte 97,53,87,185,134,193,29,158 +.byte 225,248,152,17,105,217,142,148 +.byte 155,30,135,233,206,85,40,223 +.byte 140,161,137,13,191,230,66,104 +.byte 65,153,45,15,176,84,187,22 +.byte 99,124,119,123,242,107,111,197 +.byte 48,1,103,43,254,215,171,118 +.byte 202,130,201,125,250,89,71,240 +.byte 173,212,162,175,156,164,114,192 +.byte 183,253,147,38,54,63,247,204 +.byte 52,165,229,241,113,216,49,21 +.byte 4,199,35,195,24,150,5,154 +.byte 7,18,128,226,235,39,178,117 +.byte 9,131,44,26,27,110,90,160 +.byte 82,59,214,179,41,227,47,132 +.byte 83,209,0,237,32,252,177,91 +.byte 106,203,190,57,74,76,88,207 +.byte 208,239,170,251,67,77,51,133 +.byte 69,249,2,127,80,60,159,168 +.byte 81,163,64,143,146,157,56,245 +.byte 188,182,218,33,16,255,243,210 +.byte 205,12,19,236,95,151,68,23 +.byte 196,167,126,61,100,93,25,115 +.byte 96,129,79,220,34,42,144,136 +.byte 70,238,184,20,222,94,11,219 +.byte 224,50,58,10,73,6,36,92 +.byte 194,211,172,98,145,149,228,121 +.byte 231,200,55,109,141,213,78,169 +.byte 108,86,244,234,101,122,174,8 +.byte 186,120,37,46,28,166,180,198 +.byte 232,221,116,31,75,189,139,138 +.byte 112,62,181,102,72,3,246,14 +.byte 97,53,87,185,134,193,29,158 +.byte 225,248,152,17,105,217,142,148 +.byte 155,30,135,233,206,85,40,223 +.byte 140,161,137,13,191,230,66,104 +.byte 65,153,45,15,176,84,187,22 +.byte 99,124,119,123,242,107,111,197 +.byte 48,1,103,43,254,215,171,118 +.byte 202,130,201,125,250,89,71,240 +.byte 173,212,162,175,156,164,114,192 +.byte 183,253,147,38,54,63,247,204 +.byte 52,165,229,241,113,216,49,21 +.byte 4,199,35,195,24,150,5,154 +.byte 7,18,128,226,235,39,178,117 +.byte 9,131,44,26,27,110,90,160 +.byte 82,59,214,179,41,227,47,132 +.byte 83,209,0,237,32,252,177,91 +.byte 106,203,190,57,74,76,88,207 +.byte 208,239,170,251,67,77,51,133 +.byte 69,249,2,127,80,60,159,168 +.byte 81,163,64,143,146,157,56,245 +.byte 188,182,218,33,16,255,243,210 +.byte 205,12,19,236,95,151,68,23 +.byte 196,167,126,61,100,93,25,115 +.byte 96,129,79,220,34,42,144,136 +.byte 70,238,184,20,222,94,11,219 +.byte 224,50,58,10,73,6,36,92 +.byte 194,211,172,98,145,149,228,121 +.byte 231,200,55,109,141,213,78,169 +.byte 108,86,244,234,101,122,174,8 +.byte 186,120,37,46,28,166,180,198 +.byte 232,221,116,31,75,189,139,138 +.byte 112,62,181,102,72,3,246,14 +.byte 97,53,87,185,134,193,29,158 +.byte 225,248,152,17,105,217,142,148 +.byte 155,30,135,233,206,85,40,223 +.byte 140,161,137,13,191,230,66,104 +.byte 65,153,45,15,176,84,187,22 +.long 1,2,4,8 +.long 16,32,64,128 +.long 27,54,0,0 +.long 0,0,0,0 +.size _x86_AES_encrypt,.-_x86_AES_encrypt +.globl asm_AES_encrypt +.hidden asm_AES_encrypt +.type asm_AES_encrypt,@function +.align 16 +asm_AES_encrypt: +.L_asm_AES_encrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 28(%esp),%edi + movl %esp,%eax + subl $36,%esp + andl $-64,%esp + leal -127(%edi),%ebx + subl %esp,%ebx + negl %ebx + andl $960,%ebx + subl %ebx,%esp + addl $4,%esp + movl %eax,28(%esp) + call .L004pic_point +.L004pic_point: + popl %ebp + leal OPENSSL_ia32cap_P-.L004pic_point(%ebp),%eax + leal .LAES_Te-.L004pic_point(%ebp),%ebp + leal 764(%esp),%ebx + subl %ebp,%ebx + andl $768,%ebx + leal 2176(%ebp,%ebx,1),%ebp + btl $25,(%eax) + jnc .L005x86 + movq (%esi),%mm0 + movq 8(%esi),%mm4 + call _sse_AES_encrypt_compact + movl 28(%esp),%esp + movl 24(%esp),%esi + movq %mm0,(%esi) + movq %mm4,8(%esi) + emms + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 16 +.L005x86: + movl %ebp,24(%esp) + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + call _x86_AES_encrypt_compact + movl 28(%esp),%esp + movl 24(%esp),%esi + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %ecx,8(%esi) + movl %edx,12(%esi) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size asm_AES_encrypt,.-.L_asm_AES_encrypt_begin +.hidden _x86_AES_decrypt_compact +.type _x86_AES_decrypt_compact,@function +.align 16 +_x86_AES_decrypt_compact: + movl %edi,20(%esp) + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + movl 240(%edi),%esi + leal -2(%esi,%esi,1),%esi + leal (%edi,%esi,8),%esi + movl %esi,24(%esp) + movl -128(%ebp),%edi + movl -96(%ebp),%esi + movl -64(%ebp),%edi + movl -32(%ebp),%esi + movl (%ebp),%edi + movl 32(%ebp),%esi + movl 64(%ebp),%edi + movl 96(%ebp),%esi +.align 16 +.L006loop: + movl %eax,%esi + andl $255,%esi + movzbl -128(%ebp,%esi,1),%esi + movzbl %dh,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %ecx,%edi + shrl $16,%edi + andl $255,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %ebx,%edi + shrl $24,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl %esi,4(%esp) + movl %ebx,%esi + andl $255,%esi + movzbl -128(%ebp,%esi,1),%esi + movzbl %ah,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %edx,%edi + shrl $16,%edi + andl $255,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %ecx,%edi + shrl $24,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl %esi,8(%esp) + movl %ecx,%esi + andl $255,%esi + movzbl -128(%ebp,%esi,1),%esi + movzbl %bh,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %eax,%edi + shrl $16,%edi + andl $255,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %edx,%edi + shrl $24,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + andl $255,%edx + movzbl -128(%ebp,%edx,1),%edx + movzbl %ch,%ecx + movzbl -128(%ebp,%ecx,1),%ecx + shll $8,%ecx + xorl %ecx,%edx + movl %esi,%ecx + shrl $16,%ebx + andl $255,%ebx + movzbl -128(%ebp,%ebx,1),%ebx + shll $16,%ebx + xorl %ebx,%edx + shrl $24,%eax + movzbl -128(%ebp,%eax,1),%eax + shll $24,%eax + xorl %eax,%edx + movl $2155905152,%edi + andl %ecx,%edi + movl %edi,%esi + shrl $7,%edi + leal (%ecx,%ecx,1),%eax + subl %edi,%esi + andl $4278124286,%eax + andl $454761243,%esi + xorl %esi,%eax + movl $2155905152,%edi + andl %eax,%edi + movl %edi,%esi + shrl $7,%edi + leal (%eax,%eax,1),%ebx + subl %edi,%esi + andl $4278124286,%ebx + andl $454761243,%esi + xorl %ecx,%eax + xorl %esi,%ebx + movl $2155905152,%edi + andl %ebx,%edi + movl %edi,%esi + shrl $7,%edi + leal (%ebx,%ebx,1),%ebp + subl %edi,%esi + andl $4278124286,%ebp + andl $454761243,%esi + xorl %ecx,%ebx + roll $8,%ecx + xorl %esi,%ebp + xorl %eax,%ecx + xorl %ebp,%eax + xorl %ebx,%ecx + xorl %ebp,%ebx + roll $24,%eax + xorl %ebp,%ecx + roll $16,%ebx + xorl %eax,%ecx + roll $8,%ebp + xorl %ebx,%ecx + movl 4(%esp),%eax + xorl %ebp,%ecx + movl %ecx,12(%esp) + movl $2155905152,%edi + andl %edx,%edi + movl %edi,%esi + shrl $7,%edi + leal (%edx,%edx,1),%ebx + subl %edi,%esi + andl $4278124286,%ebx + andl $454761243,%esi + xorl %esi,%ebx + movl $2155905152,%edi + andl %ebx,%edi + movl %edi,%esi + shrl $7,%edi + leal (%ebx,%ebx,1),%ecx + subl %edi,%esi + andl $4278124286,%ecx + andl $454761243,%esi + xorl %edx,%ebx + xorl %esi,%ecx + movl $2155905152,%edi + andl %ecx,%edi + movl %edi,%esi + shrl $7,%edi + leal (%ecx,%ecx,1),%ebp + subl %edi,%esi + andl $4278124286,%ebp + andl $454761243,%esi + xorl %edx,%ecx + roll $8,%edx + xorl %esi,%ebp + xorl %ebx,%edx + xorl %ebp,%ebx + xorl %ecx,%edx + xorl %ebp,%ecx + roll $24,%ebx + xorl %ebp,%edx + roll $16,%ecx + xorl %ebx,%edx + roll $8,%ebp + xorl %ecx,%edx + movl 8(%esp),%ebx + xorl %ebp,%edx + movl %edx,16(%esp) + movl $2155905152,%edi + andl %eax,%edi + movl %edi,%esi + shrl $7,%edi + leal (%eax,%eax,1),%ecx + subl %edi,%esi + andl $4278124286,%ecx + andl $454761243,%esi + xorl %esi,%ecx + movl $2155905152,%edi + andl %ecx,%edi + movl %edi,%esi + shrl $7,%edi + leal (%ecx,%ecx,1),%edx + subl %edi,%esi + andl $4278124286,%edx + andl $454761243,%esi + xorl %eax,%ecx + xorl %esi,%edx + movl $2155905152,%edi + andl %edx,%edi + movl %edi,%esi + shrl $7,%edi + leal (%edx,%edx,1),%ebp + subl %edi,%esi + andl $4278124286,%ebp + andl $454761243,%esi + xorl %eax,%edx + roll $8,%eax + xorl %esi,%ebp + xorl %ecx,%eax + xorl %ebp,%ecx + xorl %edx,%eax + xorl %ebp,%edx + roll $24,%ecx + xorl %ebp,%eax + roll $16,%edx + xorl %ecx,%eax + roll $8,%ebp + xorl %edx,%eax + xorl %ebp,%eax + movl $2155905152,%edi + andl %ebx,%edi + movl %edi,%esi + shrl $7,%edi + leal (%ebx,%ebx,1),%ecx + subl %edi,%esi + andl $4278124286,%ecx + andl $454761243,%esi + xorl %esi,%ecx + movl $2155905152,%edi + andl %ecx,%edi + movl %edi,%esi + shrl $7,%edi + leal (%ecx,%ecx,1),%edx + subl %edi,%esi + andl $4278124286,%edx + andl $454761243,%esi + xorl %ebx,%ecx + xorl %esi,%edx + movl $2155905152,%edi + andl %edx,%edi + movl %edi,%esi + shrl $7,%edi + leal (%edx,%edx,1),%ebp + subl %edi,%esi + andl $4278124286,%ebp + andl $454761243,%esi + xorl %ebx,%edx + roll $8,%ebx + xorl %esi,%ebp + xorl %ecx,%ebx + xorl %ebp,%ecx + xorl %edx,%ebx + xorl %ebp,%edx + roll $24,%ecx + xorl %ebp,%ebx + roll $16,%edx + xorl %ecx,%ebx + roll $8,%ebp + xorl %edx,%ebx + movl 12(%esp),%ecx + xorl %ebp,%ebx + movl 16(%esp),%edx + movl 20(%esp),%edi + movl 28(%esp),%ebp + addl $16,%edi + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + cmpl 24(%esp),%edi + movl %edi,20(%esp) + jb .L006loop + movl %eax,%esi + andl $255,%esi + movzbl -128(%ebp,%esi,1),%esi + movzbl %dh,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %ecx,%edi + shrl $16,%edi + andl $255,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %ebx,%edi + shrl $24,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl %esi,4(%esp) + movl %ebx,%esi + andl $255,%esi + movzbl -128(%ebp,%esi,1),%esi + movzbl %ah,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %edx,%edi + shrl $16,%edi + andl $255,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %ecx,%edi + shrl $24,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl %esi,8(%esp) + movl %ecx,%esi + andl $255,%esi + movzbl -128(%ebp,%esi,1),%esi + movzbl %bh,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %eax,%edi + shrl $16,%edi + andl $255,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %edx,%edi + shrl $24,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl 20(%esp),%edi + andl $255,%edx + movzbl -128(%ebp,%edx,1),%edx + movzbl %ch,%ecx + movzbl -128(%ebp,%ecx,1),%ecx + shll $8,%ecx + xorl %ecx,%edx + movl %esi,%ecx + shrl $16,%ebx + andl $255,%ebx + movzbl -128(%ebp,%ebx,1),%ebx + shll $16,%ebx + xorl %ebx,%edx + movl 8(%esp),%ebx + shrl $24,%eax + movzbl -128(%ebp,%eax,1),%eax + shll $24,%eax + xorl %eax,%edx + movl 4(%esp),%eax + xorl 16(%edi),%eax + xorl 20(%edi),%ebx + xorl 24(%edi),%ecx + xorl 28(%edi),%edx + ret +.size _x86_AES_decrypt_compact,.-_x86_AES_decrypt_compact +.hidden _sse_AES_decrypt_compact +.type _sse_AES_decrypt_compact,@function +.align 16 +_sse_AES_decrypt_compact: + pxor (%edi),%mm0 + pxor 8(%edi),%mm4 + movl 240(%edi),%esi + leal -2(%esi,%esi,1),%esi + leal (%edi,%esi,8),%esi + movl %esi,24(%esp) + movl $454761243,%eax + movl %eax,8(%esp) + movl %eax,12(%esp) + movl -128(%ebp),%eax + movl -96(%ebp),%ebx + movl -64(%ebp),%ecx + movl -32(%ebp),%edx + movl (%ebp),%eax + movl 32(%ebp),%ebx + movl 64(%ebp),%ecx + movl 96(%ebp),%edx +.align 16 +.L007loop: + pshufw $12,%mm0,%mm1 + pshufw $9,%mm4,%mm5 + movd %mm1,%eax + movd %mm5,%ebx + movl %edi,20(%esp) + movzbl %al,%esi + movzbl %ah,%edx + pshufw $6,%mm0,%mm2 + movzbl -128(%ebp,%esi,1),%ecx + movzbl %bl,%edi + movzbl -128(%ebp,%edx,1),%edx + shrl $16,%eax + shll $8,%edx + movzbl -128(%ebp,%edi,1),%esi + movzbl %bh,%edi + shll $16,%esi + pshufw $3,%mm4,%mm6 + orl %esi,%ecx + movzbl -128(%ebp,%edi,1),%esi + movzbl %ah,%edi + shll $24,%esi + shrl $16,%ebx + orl %esi,%edx + movzbl -128(%ebp,%edi,1),%esi + movzbl %bh,%edi + shll $24,%esi + orl %esi,%ecx + movzbl -128(%ebp,%edi,1),%esi + movzbl %al,%edi + shll $8,%esi + movd %mm2,%eax + orl %esi,%ecx + movzbl -128(%ebp,%edi,1),%esi + movzbl %bl,%edi + shll $16,%esi + movd %mm6,%ebx + movd %ecx,%mm0 + movzbl -128(%ebp,%edi,1),%ecx + movzbl %al,%edi + orl %esi,%ecx + movzbl -128(%ebp,%edi,1),%esi + movzbl %bl,%edi + orl %esi,%edx + movzbl -128(%ebp,%edi,1),%esi + movzbl %ah,%edi + shll $16,%esi + shrl $16,%eax + orl %esi,%edx + movzbl -128(%ebp,%edi,1),%esi + movzbl %bh,%edi + shrl $16,%ebx + shll $8,%esi + movd %edx,%mm1 + movzbl -128(%ebp,%edi,1),%edx + movzbl %bh,%edi + shll $24,%edx + andl $255,%ebx + orl %esi,%edx + punpckldq %mm1,%mm0 + movzbl -128(%ebp,%edi,1),%esi + movzbl %al,%edi + shll $8,%esi + movzbl %ah,%eax + movzbl -128(%ebp,%ebx,1),%ebx + orl %esi,%ecx + movzbl -128(%ebp,%edi,1),%esi + orl %ebx,%edx + shll $16,%esi + movzbl -128(%ebp,%eax,1),%eax + orl %esi,%edx + shll $24,%eax + orl %eax,%ecx + movl 20(%esp),%edi + movd %edx,%mm4 + movd %ecx,%mm5 + punpckldq %mm5,%mm4 + addl $16,%edi + cmpl 24(%esp),%edi + ja .L008out + movq %mm0,%mm3 + movq %mm4,%mm7 + pshufw $228,%mm0,%mm2 + pshufw $228,%mm4,%mm6 + movq %mm0,%mm1 + movq %mm4,%mm5 + pshufw $177,%mm0,%mm0 + pshufw $177,%mm4,%mm4 + pslld $8,%mm2 + pslld $8,%mm6 + psrld $8,%mm3 + psrld $8,%mm7 + pxor %mm2,%mm0 + pxor %mm6,%mm4 + pxor %mm3,%mm0 + pxor %mm7,%mm4 + pslld $16,%mm2 + pslld $16,%mm6 + psrld $16,%mm3 + psrld $16,%mm7 + pxor %mm2,%mm0 + pxor %mm6,%mm4 + pxor %mm3,%mm0 + pxor %mm7,%mm4 + movq 8(%esp),%mm3 + pxor %mm2,%mm2 + pxor %mm6,%mm6 + pcmpgtb %mm1,%mm2 + pcmpgtb %mm5,%mm6 + pand %mm3,%mm2 + pand %mm3,%mm6 + paddb %mm1,%mm1 + paddb %mm5,%mm5 + pxor %mm2,%mm1 + pxor %mm6,%mm5 + movq %mm1,%mm3 + movq %mm5,%mm7 + movq %mm1,%mm2 + movq %mm5,%mm6 + pxor %mm1,%mm0 + pxor %mm5,%mm4 + pslld $24,%mm3 + pslld $24,%mm7 + psrld $8,%mm2 + psrld $8,%mm6 + pxor %mm3,%mm0 + pxor %mm7,%mm4 + pxor %mm2,%mm0 + pxor %mm6,%mm4 + movq 8(%esp),%mm2 + pxor %mm3,%mm3 + pxor %mm7,%mm7 + pcmpgtb %mm1,%mm3 + pcmpgtb %mm5,%mm7 + pand %mm2,%mm3 + pand %mm2,%mm7 + paddb %mm1,%mm1 + paddb %mm5,%mm5 + pxor %mm3,%mm1 + pxor %mm7,%mm5 + pshufw $177,%mm1,%mm3 + pshufw $177,%mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm5,%mm4 + pxor %mm3,%mm0 + pxor %mm7,%mm4 + pxor %mm3,%mm3 + pxor %mm7,%mm7 + pcmpgtb %mm1,%mm3 + pcmpgtb %mm5,%mm7 + pand %mm2,%mm3 + pand %mm2,%mm7 + paddb %mm1,%mm1 + paddb %mm5,%mm5 + pxor %mm3,%mm1 + pxor %mm7,%mm5 + pxor %mm1,%mm0 + pxor %mm5,%mm4 + movq %mm1,%mm3 + movq %mm5,%mm7 + pshufw $177,%mm1,%mm2 + pshufw $177,%mm5,%mm6 + pxor %mm2,%mm0 + pxor %mm6,%mm4 + pslld $8,%mm1 + pslld $8,%mm5 + psrld $8,%mm3 + psrld $8,%mm7 + movq (%edi),%mm2 + movq 8(%edi),%mm6 + pxor %mm1,%mm0 + pxor %mm5,%mm4 + pxor %mm3,%mm0 + pxor %mm7,%mm4 + movl -128(%ebp),%eax + pslld $16,%mm1 + pslld $16,%mm5 + movl -64(%ebp),%ebx + psrld $16,%mm3 + psrld $16,%mm7 + movl (%ebp),%ecx + pxor %mm1,%mm0 + pxor %mm5,%mm4 + movl 64(%ebp),%edx + pxor %mm3,%mm0 + pxor %mm7,%mm4 + pxor %mm2,%mm0 + pxor %mm6,%mm4 + jmp .L007loop +.align 16 +.L008out: + pxor (%edi),%mm0 + pxor 8(%edi),%mm4 + ret +.size _sse_AES_decrypt_compact,.-_sse_AES_decrypt_compact +.hidden _x86_AES_decrypt +.type _x86_AES_decrypt,@function +.align 16 +_x86_AES_decrypt: + movl %edi,20(%esp) + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + movl 240(%edi),%esi + leal -2(%esi,%esi,1),%esi + leal (%edi,%esi,8),%esi + movl %esi,24(%esp) +.align 16 +.L009loop: + movl %eax,%esi + andl $255,%esi + movl (%ebp,%esi,8),%esi + movzbl %dh,%edi + xorl 3(%ebp,%edi,8),%esi + movl %ecx,%edi + shrl $16,%edi + andl $255,%edi + xorl 2(%ebp,%edi,8),%esi + movl %ebx,%edi + shrl $24,%edi + xorl 1(%ebp,%edi,8),%esi + movl %esi,4(%esp) + + movl %ebx,%esi + andl $255,%esi + movl (%ebp,%esi,8),%esi + movzbl %ah,%edi + xorl 3(%ebp,%edi,8),%esi + movl %edx,%edi + shrl $16,%edi + andl $255,%edi + xorl 2(%ebp,%edi,8),%esi + movl %ecx,%edi + shrl $24,%edi + xorl 1(%ebp,%edi,8),%esi + movl %esi,8(%esp) + + movl %ecx,%esi + andl $255,%esi + movl (%ebp,%esi,8),%esi + movzbl %bh,%edi + xorl 3(%ebp,%edi,8),%esi + movl %eax,%edi + shrl $16,%edi + andl $255,%edi + xorl 2(%ebp,%edi,8),%esi + movl %edx,%edi + shrl $24,%edi + xorl 1(%ebp,%edi,8),%esi + + movl 20(%esp),%edi + andl $255,%edx + movl (%ebp,%edx,8),%edx + movzbl %ch,%ecx + xorl 3(%ebp,%ecx,8),%edx + movl %esi,%ecx + shrl $16,%ebx + andl $255,%ebx + xorl 2(%ebp,%ebx,8),%edx + movl 8(%esp),%ebx + shrl $24,%eax + xorl 1(%ebp,%eax,8),%edx + movl 4(%esp),%eax + + addl $16,%edi + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + cmpl 24(%esp),%edi + movl %edi,20(%esp) + jb .L009loop + leal 2176(%ebp),%ebp + movl -128(%ebp),%edi + movl -96(%ebp),%esi + movl -64(%ebp),%edi + movl -32(%ebp),%esi + movl (%ebp),%edi + movl 32(%ebp),%esi + movl 64(%ebp),%edi + movl 96(%ebp),%esi + leal -128(%ebp),%ebp + movl %eax,%esi + andl $255,%esi + movzbl (%ebp,%esi,1),%esi + movzbl %dh,%edi + movzbl (%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %ecx,%edi + shrl $16,%edi + andl $255,%edi + movzbl (%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %ebx,%edi + shrl $24,%edi + movzbl (%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl %esi,4(%esp) + movl %ebx,%esi + andl $255,%esi + movzbl (%ebp,%esi,1),%esi + movzbl %ah,%edi + movzbl (%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %edx,%edi + shrl $16,%edi + andl $255,%edi + movzbl (%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %ecx,%edi + shrl $24,%edi + movzbl (%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl %esi,8(%esp) + movl %ecx,%esi + andl $255,%esi + movzbl (%ebp,%esi,1),%esi + movzbl %bh,%edi + movzbl (%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %eax,%edi + shrl $16,%edi + andl $255,%edi + movzbl (%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %edx,%edi + shrl $24,%edi + movzbl (%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl 20(%esp),%edi + andl $255,%edx + movzbl (%ebp,%edx,1),%edx + movzbl %ch,%ecx + movzbl (%ebp,%ecx,1),%ecx + shll $8,%ecx + xorl %ecx,%edx + movl %esi,%ecx + shrl $16,%ebx + andl $255,%ebx + movzbl (%ebp,%ebx,1),%ebx + shll $16,%ebx + xorl %ebx,%edx + movl 8(%esp),%ebx + shrl $24,%eax + movzbl (%ebp,%eax,1),%eax + shll $24,%eax + xorl %eax,%edx + movl 4(%esp),%eax + leal -2048(%ebp),%ebp + addl $16,%edi + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + ret +.align 64 +.LAES_Td: +.long 1353184337,1353184337 +.long 1399144830,1399144830 +.long 3282310938,3282310938 +.long 2522752826,2522752826 +.long 3412831035,3412831035 +.long 4047871263,4047871263 +.long 2874735276,2874735276 +.long 2466505547,2466505547 +.long 1442459680,1442459680 +.long 4134368941,4134368941 +.long 2440481928,2440481928 +.long 625738485,625738485 +.long 4242007375,4242007375 +.long 3620416197,3620416197 +.long 2151953702,2151953702 +.long 2409849525,2409849525 +.long 1230680542,1230680542 +.long 1729870373,1729870373 +.long 2551114309,2551114309 +.long 3787521629,3787521629 +.long 41234371,41234371 +.long 317738113,317738113 +.long 2744600205,2744600205 +.long 3338261355,3338261355 +.long 3881799427,3881799427 +.long 2510066197,2510066197 +.long 3950669247,3950669247 +.long 3663286933,3663286933 +.long 763608788,763608788 +.long 3542185048,3542185048 +.long 694804553,694804553 +.long 1154009486,1154009486 +.long 1787413109,1787413109 +.long 2021232372,2021232372 +.long 1799248025,1799248025 +.long 3715217703,3715217703 +.long 3058688446,3058688446 +.long 397248752,397248752 +.long 1722556617,1722556617 +.long 3023752829,3023752829 +.long 407560035,407560035 +.long 2184256229,2184256229 +.long 1613975959,1613975959 +.long 1165972322,1165972322 +.long 3765920945,3765920945 +.long 2226023355,2226023355 +.long 480281086,480281086 +.long 2485848313,2485848313 +.long 1483229296,1483229296 +.long 436028815,436028815 +.long 2272059028,2272059028 +.long 3086515026,3086515026 +.long 601060267,601060267 +.long 3791801202,3791801202 +.long 1468997603,1468997603 +.long 715871590,715871590 +.long 120122290,120122290 +.long 63092015,63092015 +.long 2591802758,2591802758 +.long 2768779219,2768779219 +.long 4068943920,4068943920 +.long 2997206819,2997206819 +.long 3127509762,3127509762 +.long 1552029421,1552029421 +.long 723308426,723308426 +.long 2461301159,2461301159 +.long 4042393587,4042393587 +.long 2715969870,2715969870 +.long 3455375973,3455375973 +.long 3586000134,3586000134 +.long 526529745,526529745 +.long 2331944644,2331944644 +.long 2639474228,2639474228 +.long 2689987490,2689987490 +.long 853641733,853641733 +.long 1978398372,1978398372 +.long 971801355,971801355 +.long 2867814464,2867814464 +.long 111112542,111112542 +.long 1360031421,1360031421 +.long 4186579262,4186579262 +.long 1023860118,1023860118 +.long 2919579357,2919579357 +.long 1186850381,1186850381 +.long 3045938321,3045938321 +.long 90031217,90031217 +.long 1876166148,1876166148 +.long 4279586912,4279586912 +.long 620468249,620468249 +.long 2548678102,2548678102 +.long 3426959497,3426959497 +.long 2006899047,2006899047 +.long 3175278768,3175278768 +.long 2290845959,2290845959 +.long 945494503,945494503 +.long 3689859193,3689859193 +.long 1191869601,1191869601 +.long 3910091388,3910091388 +.long 3374220536,3374220536 +.long 0,0 +.long 2206629897,2206629897 +.long 1223502642,1223502642 +.long 2893025566,2893025566 +.long 1316117100,1316117100 +.long 4227796733,4227796733 +.long 1446544655,1446544655 +.long 517320253,517320253 +.long 658058550,658058550 +.long 1691946762,1691946762 +.long 564550760,564550760 +.long 3511966619,3511966619 +.long 976107044,976107044 +.long 2976320012,2976320012 +.long 266819475,266819475 +.long 3533106868,3533106868 +.long 2660342555,2660342555 +.long 1338359936,1338359936 +.long 2720062561,2720062561 +.long 1766553434,1766553434 +.long 370807324,370807324 +.long 179999714,179999714 +.long 3844776128,3844776128 +.long 1138762300,1138762300 +.long 488053522,488053522 +.long 185403662,185403662 +.long 2915535858,2915535858 +.long 3114841645,3114841645 +.long 3366526484,3366526484 +.long 2233069911,2233069911 +.long 1275557295,1275557295 +.long 3151862254,3151862254 +.long 4250959779,4250959779 +.long 2670068215,2670068215 +.long 3170202204,3170202204 +.long 3309004356,3309004356 +.long 880737115,880737115 +.long 1982415755,1982415755 +.long 3703972811,3703972811 +.long 1761406390,1761406390 +.long 1676797112,1676797112 +.long 3403428311,3403428311 +.long 277177154,277177154 +.long 1076008723,1076008723 +.long 538035844,538035844 +.long 2099530373,2099530373 +.long 4164795346,4164795346 +.long 288553390,288553390 +.long 1839278535,1839278535 +.long 1261411869,1261411869 +.long 4080055004,4080055004 +.long 3964831245,3964831245 +.long 3504587127,3504587127 +.long 1813426987,1813426987 +.long 2579067049,2579067049 +.long 4199060497,4199060497 +.long 577038663,577038663 +.long 3297574056,3297574056 +.long 440397984,440397984 +.long 3626794326,3626794326 +.long 4019204898,4019204898 +.long 3343796615,3343796615 +.long 3251714265,3251714265 +.long 4272081548,4272081548 +.long 906744984,906744984 +.long 3481400742,3481400742 +.long 685669029,685669029 +.long 646887386,646887386 +.long 2764025151,2764025151 +.long 3835509292,3835509292 +.long 227702864,227702864 +.long 2613862250,2613862250 +.long 1648787028,1648787028 +.long 3256061430,3256061430 +.long 3904428176,3904428176 +.long 1593260334,1593260334 +.long 4121936770,4121936770 +.long 3196083615,3196083615 +.long 2090061929,2090061929 +.long 2838353263,2838353263 +.long 3004310991,3004310991 +.long 999926984,999926984 +.long 2809993232,2809993232 +.long 1852021992,1852021992 +.long 2075868123,2075868123 +.long 158869197,158869197 +.long 4095236462,4095236462 +.long 28809964,28809964 +.long 2828685187,2828685187 +.long 1701746150,1701746150 +.long 2129067946,2129067946 +.long 147831841,147831841 +.long 3873969647,3873969647 +.long 3650873274,3650873274 +.long 3459673930,3459673930 +.long 3557400554,3557400554 +.long 3598495785,3598495785 +.long 2947720241,2947720241 +.long 824393514,824393514 +.long 815048134,815048134 +.long 3227951669,3227951669 +.long 935087732,935087732 +.long 2798289660,2798289660 +.long 2966458592,2966458592 +.long 366520115,366520115 +.long 1251476721,1251476721 +.long 4158319681,4158319681 +.long 240176511,240176511 +.long 804688151,804688151 +.long 2379631990,2379631990 +.long 1303441219,1303441219 +.long 1414376140,1414376140 +.long 3741619940,3741619940 +.long 3820343710,3820343710 +.long 461924940,461924940 +.long 3089050817,3089050817 +.long 2136040774,2136040774 +.long 82468509,82468509 +.long 1563790337,1563790337 +.long 1937016826,1937016826 +.long 776014843,776014843 +.long 1511876531,1511876531 +.long 1389550482,1389550482 +.long 861278441,861278441 +.long 323475053,323475053 +.long 2355222426,2355222426 +.long 2047648055,2047648055 +.long 2383738969,2383738969 +.long 2302415851,2302415851 +.long 3995576782,3995576782 +.long 902390199,902390199 +.long 3991215329,3991215329 +.long 1018251130,1018251130 +.long 1507840668,1507840668 +.long 1064563285,1064563285 +.long 2043548696,2043548696 +.long 3208103795,3208103795 +.long 3939366739,3939366739 +.long 1537932639,1537932639 +.long 342834655,342834655 +.long 2262516856,2262516856 +.long 2180231114,2180231114 +.long 1053059257,1053059257 +.long 741614648,741614648 +.long 1598071746,1598071746 +.long 1925389590,1925389590 +.long 203809468,203809468 +.long 2336832552,2336832552 +.long 1100287487,1100287487 +.long 1895934009,1895934009 +.long 3736275976,3736275976 +.long 2632234200,2632234200 +.long 2428589668,2428589668 +.long 1636092795,1636092795 +.long 1890988757,1890988757 +.long 1952214088,1952214088 +.long 1113045200,1113045200 +.byte 82,9,106,213,48,54,165,56 +.byte 191,64,163,158,129,243,215,251 +.byte 124,227,57,130,155,47,255,135 +.byte 52,142,67,68,196,222,233,203 +.byte 84,123,148,50,166,194,35,61 +.byte 238,76,149,11,66,250,195,78 +.byte 8,46,161,102,40,217,36,178 +.byte 118,91,162,73,109,139,209,37 +.byte 114,248,246,100,134,104,152,22 +.byte 212,164,92,204,93,101,182,146 +.byte 108,112,72,80,253,237,185,218 +.byte 94,21,70,87,167,141,157,132 +.byte 144,216,171,0,140,188,211,10 +.byte 247,228,88,5,184,179,69,6 +.byte 208,44,30,143,202,63,15,2 +.byte 193,175,189,3,1,19,138,107 +.byte 58,145,17,65,79,103,220,234 +.byte 151,242,207,206,240,180,230,115 +.byte 150,172,116,34,231,173,53,133 +.byte 226,249,55,232,28,117,223,110 +.byte 71,241,26,113,29,41,197,137 +.byte 111,183,98,14,170,24,190,27 +.byte 252,86,62,75,198,210,121,32 +.byte 154,219,192,254,120,205,90,244 +.byte 31,221,168,51,136,7,199,49 +.byte 177,18,16,89,39,128,236,95 +.byte 96,81,127,169,25,181,74,13 +.byte 45,229,122,159,147,201,156,239 +.byte 160,224,59,77,174,42,245,176 +.byte 200,235,187,60,131,83,153,97 +.byte 23,43,4,126,186,119,214,38 +.byte 225,105,20,99,85,33,12,125 +.byte 82,9,106,213,48,54,165,56 +.byte 191,64,163,158,129,243,215,251 +.byte 124,227,57,130,155,47,255,135 +.byte 52,142,67,68,196,222,233,203 +.byte 84,123,148,50,166,194,35,61 +.byte 238,76,149,11,66,250,195,78 +.byte 8,46,161,102,40,217,36,178 +.byte 118,91,162,73,109,139,209,37 +.byte 114,248,246,100,134,104,152,22 +.byte 212,164,92,204,93,101,182,146 +.byte 108,112,72,80,253,237,185,218 +.byte 94,21,70,87,167,141,157,132 +.byte 144,216,171,0,140,188,211,10 +.byte 247,228,88,5,184,179,69,6 +.byte 208,44,30,143,202,63,15,2 +.byte 193,175,189,3,1,19,138,107 +.byte 58,145,17,65,79,103,220,234 +.byte 151,242,207,206,240,180,230,115 +.byte 150,172,116,34,231,173,53,133 +.byte 226,249,55,232,28,117,223,110 +.byte 71,241,26,113,29,41,197,137 +.byte 111,183,98,14,170,24,190,27 +.byte 252,86,62,75,198,210,121,32 +.byte 154,219,192,254,120,205,90,244 +.byte 31,221,168,51,136,7,199,49 +.byte 177,18,16,89,39,128,236,95 +.byte 96,81,127,169,25,181,74,13 +.byte 45,229,122,159,147,201,156,239 +.byte 160,224,59,77,174,42,245,176 +.byte 200,235,187,60,131,83,153,97 +.byte 23,43,4,126,186,119,214,38 +.byte 225,105,20,99,85,33,12,125 +.byte 82,9,106,213,48,54,165,56 +.byte 191,64,163,158,129,243,215,251 +.byte 124,227,57,130,155,47,255,135 +.byte 52,142,67,68,196,222,233,203 +.byte 84,123,148,50,166,194,35,61 +.byte 238,76,149,11,66,250,195,78 +.byte 8,46,161,102,40,217,36,178 +.byte 118,91,162,73,109,139,209,37 +.byte 114,248,246,100,134,104,152,22 +.byte 212,164,92,204,93,101,182,146 +.byte 108,112,72,80,253,237,185,218 +.byte 94,21,70,87,167,141,157,132 +.byte 144,216,171,0,140,188,211,10 +.byte 247,228,88,5,184,179,69,6 +.byte 208,44,30,143,202,63,15,2 +.byte 193,175,189,3,1,19,138,107 +.byte 58,145,17,65,79,103,220,234 +.byte 151,242,207,206,240,180,230,115 +.byte 150,172,116,34,231,173,53,133 +.byte 226,249,55,232,28,117,223,110 +.byte 71,241,26,113,29,41,197,137 +.byte 111,183,98,14,170,24,190,27 +.byte 252,86,62,75,198,210,121,32 +.byte 154,219,192,254,120,205,90,244 +.byte 31,221,168,51,136,7,199,49 +.byte 177,18,16,89,39,128,236,95 +.byte 96,81,127,169,25,181,74,13 +.byte 45,229,122,159,147,201,156,239 +.byte 160,224,59,77,174,42,245,176 +.byte 200,235,187,60,131,83,153,97 +.byte 23,43,4,126,186,119,214,38 +.byte 225,105,20,99,85,33,12,125 +.byte 82,9,106,213,48,54,165,56 +.byte 191,64,163,158,129,243,215,251 +.byte 124,227,57,130,155,47,255,135 +.byte 52,142,67,68,196,222,233,203 +.byte 84,123,148,50,166,194,35,61 +.byte 238,76,149,11,66,250,195,78 +.byte 8,46,161,102,40,217,36,178 +.byte 118,91,162,73,109,139,209,37 +.byte 114,248,246,100,134,104,152,22 +.byte 212,164,92,204,93,101,182,146 +.byte 108,112,72,80,253,237,185,218 +.byte 94,21,70,87,167,141,157,132 +.byte 144,216,171,0,140,188,211,10 +.byte 247,228,88,5,184,179,69,6 +.byte 208,44,30,143,202,63,15,2 +.byte 193,175,189,3,1,19,138,107 +.byte 58,145,17,65,79,103,220,234 +.byte 151,242,207,206,240,180,230,115 +.byte 150,172,116,34,231,173,53,133 +.byte 226,249,55,232,28,117,223,110 +.byte 71,241,26,113,29,41,197,137 +.byte 111,183,98,14,170,24,190,27 +.byte 252,86,62,75,198,210,121,32 +.byte 154,219,192,254,120,205,90,244 +.byte 31,221,168,51,136,7,199,49 +.byte 177,18,16,89,39,128,236,95 +.byte 96,81,127,169,25,181,74,13 +.byte 45,229,122,159,147,201,156,239 +.byte 160,224,59,77,174,42,245,176 +.byte 200,235,187,60,131,83,153,97 +.byte 23,43,4,126,186,119,214,38 +.byte 225,105,20,99,85,33,12,125 +.size _x86_AES_decrypt,.-_x86_AES_decrypt +.globl asm_AES_decrypt +.hidden asm_AES_decrypt +.type asm_AES_decrypt,@function +.align 16 +asm_AES_decrypt: +.L_asm_AES_decrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 28(%esp),%edi + movl %esp,%eax + subl $36,%esp + andl $-64,%esp + leal -127(%edi),%ebx + subl %esp,%ebx + negl %ebx + andl $960,%ebx + subl %ebx,%esp + addl $4,%esp + movl %eax,28(%esp) + call .L010pic_point +.L010pic_point: + popl %ebp + leal OPENSSL_ia32cap_P-.L010pic_point(%ebp),%eax + leal .LAES_Td-.L010pic_point(%ebp),%ebp + leal 764(%esp),%ebx + subl %ebp,%ebx + andl $768,%ebx + leal 2176(%ebp,%ebx,1),%ebp + btl $25,(%eax) + jnc .L011x86 + movq (%esi),%mm0 + movq 8(%esi),%mm4 + call _sse_AES_decrypt_compact + movl 28(%esp),%esp + movl 24(%esp),%esi + movq %mm0,(%esi) + movq %mm4,8(%esi) + emms + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 16 +.L011x86: + movl %ebp,24(%esp) + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + call _x86_AES_decrypt_compact + movl 28(%esp),%esp + movl 24(%esp),%esi + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %ecx,8(%esi) + movl %edx,12(%esi) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size asm_AES_decrypt,.-.L_asm_AES_decrypt_begin +.globl asm_AES_cbc_encrypt +.hidden asm_AES_cbc_encrypt +.type asm_AES_cbc_encrypt,@function +.align 16 +asm_AES_cbc_encrypt: +.L_asm_AES_cbc_encrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 28(%esp),%ecx + cmpl $0,%ecx + je .L012drop_out + call .L013pic_point +.L013pic_point: + popl %ebp + leal OPENSSL_ia32cap_P-.L013pic_point(%ebp),%eax + cmpl $0,40(%esp) + leal .LAES_Te-.L013pic_point(%ebp),%ebp + jne .L014picked_te + leal .LAES_Td-.LAES_Te(%ebp),%ebp +.L014picked_te: + pushfl + cld + cmpl $512,%ecx + jb .L015slow_way + testl $15,%ecx + jnz .L015slow_way + btl $28,(%eax) + jc .L015slow_way + leal -324(%esp),%esi + andl $-64,%esi + movl %ebp,%eax + leal 2304(%ebp),%ebx + movl %esi,%edx + andl $4095,%eax + andl $4095,%ebx + andl $4095,%edx + cmpl %ebx,%edx + jb .L016tbl_break_out + subl %ebx,%edx + subl %edx,%esi + jmp .L017tbl_ok +.align 4 +.L016tbl_break_out: + subl %eax,%edx + andl $4095,%edx + addl $384,%edx + subl %edx,%esi +.align 4 +.L017tbl_ok: + leal 24(%esp),%edx + xchgl %esi,%esp + addl $4,%esp + movl %ebp,24(%esp) + movl %esi,28(%esp) + movl (%edx),%eax + movl 4(%edx),%ebx + movl 12(%edx),%edi + movl 16(%edx),%esi + movl 20(%edx),%edx + movl %eax,32(%esp) + movl %ebx,36(%esp) + movl %ecx,40(%esp) + movl %edi,44(%esp) + movl %esi,48(%esp) + movl $0,316(%esp) + movl %edi,%ebx + movl $61,%ecx + subl %ebp,%ebx + movl %edi,%esi + andl $4095,%ebx + leal 76(%esp),%edi + cmpl $2304,%ebx + jb .L018do_copy + cmpl $3852,%ebx + jb .L019skip_copy +.align 4 +.L018do_copy: + movl %edi,44(%esp) +.long 2784229001 +.L019skip_copy: + movl $16,%edi +.align 4 +.L020prefetch_tbl: + movl (%ebp),%eax + movl 32(%ebp),%ebx + movl 64(%ebp),%ecx + movl 96(%ebp),%esi + leal 128(%ebp),%ebp + subl $1,%edi + jnz .L020prefetch_tbl + subl $2048,%ebp + movl 32(%esp),%esi + movl 48(%esp),%edi + cmpl $0,%edx + je .L021fast_decrypt + movl (%edi),%eax + movl 4(%edi),%ebx +.align 16 +.L022fast_enc_loop: + movl 8(%edi),%ecx + movl 12(%edi),%edx + xorl (%esi),%eax + xorl 4(%esi),%ebx + xorl 8(%esi),%ecx + xorl 12(%esi),%edx + movl 44(%esp),%edi + call _x86_AES_encrypt + movl 32(%esp),%esi + movl 36(%esp),%edi + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + leal 16(%esi),%esi + movl 40(%esp),%ecx + movl %esi,32(%esp) + leal 16(%edi),%edx + movl %edx,36(%esp) + subl $16,%ecx + movl %ecx,40(%esp) + jnz .L022fast_enc_loop + movl 48(%esp),%esi + movl 8(%edi),%ecx + movl 12(%edi),%edx + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %ecx,8(%esi) + movl %edx,12(%esi) + cmpl $0,316(%esp) + movl 44(%esp),%edi + je .L023skip_ezero + movl $60,%ecx + xorl %eax,%eax +.align 4 +.long 2884892297 +.L023skip_ezero: + movl 28(%esp),%esp + popfl +.L012drop_out: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret + pushfl +.align 16 +.L021fast_decrypt: + cmpl 36(%esp),%esi + je .L024fast_dec_in_place + movl %edi,52(%esp) +.align 4 +.align 16 +.L025fast_dec_loop: + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + movl 44(%esp),%edi + call _x86_AES_decrypt + movl 52(%esp),%edi + movl 40(%esp),%esi + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + movl 36(%esp),%edi + movl 32(%esp),%esi + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + movl 40(%esp),%ecx + movl %esi,52(%esp) + leal 16(%esi),%esi + movl %esi,32(%esp) + leal 16(%edi),%edi + movl %edi,36(%esp) + subl $16,%ecx + movl %ecx,40(%esp) + jnz .L025fast_dec_loop + movl 52(%esp),%edi + movl 48(%esp),%esi + movl (%edi),%eax + movl 4(%edi),%ebx + movl 8(%edi),%ecx + movl 12(%edi),%edx + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %ecx,8(%esi) + movl %edx,12(%esi) + jmp .L026fast_dec_out +.align 16 +.L024fast_dec_in_place: +.L027fast_dec_in_place_loop: + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + leal 60(%esp),%edi + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + movl 44(%esp),%edi + call _x86_AES_decrypt + movl 48(%esp),%edi + movl 36(%esp),%esi + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %ecx,8(%esi) + movl %edx,12(%esi) + leal 16(%esi),%esi + movl %esi,36(%esp) + leal 60(%esp),%esi + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + movl 32(%esp),%esi + movl 40(%esp),%ecx + leal 16(%esi),%esi + movl %esi,32(%esp) + subl $16,%ecx + movl %ecx,40(%esp) + jnz .L027fast_dec_in_place_loop +.align 4 +.L026fast_dec_out: + cmpl $0,316(%esp) + movl 44(%esp),%edi + je .L028skip_dzero + movl $60,%ecx + xorl %eax,%eax +.align 4 +.long 2884892297 +.L028skip_dzero: + movl 28(%esp),%esp + popfl + popl %edi + popl %esi + popl %ebx + popl %ebp + ret + pushfl +.align 16 +.L015slow_way: + movl (%eax),%eax + movl 36(%esp),%edi + leal -80(%esp),%esi + andl $-64,%esi + leal -143(%edi),%ebx + subl %esi,%ebx + negl %ebx + andl $960,%ebx + subl %ebx,%esi + leal 768(%esi),%ebx + subl %ebp,%ebx + andl $768,%ebx + leal 2176(%ebp,%ebx,1),%ebp + leal 24(%esp),%edx + xchgl %esi,%esp + addl $4,%esp + movl %ebp,24(%esp) + movl %esi,28(%esp) + movl %eax,52(%esp) + movl (%edx),%eax + movl 4(%edx),%ebx + movl 16(%edx),%esi + movl 20(%edx),%edx + movl %eax,32(%esp) + movl %ebx,36(%esp) + movl %ecx,40(%esp) + movl %edi,44(%esp) + movl %esi,48(%esp) + movl %esi,%edi + movl %eax,%esi + cmpl $0,%edx + je .L029slow_decrypt + cmpl $16,%ecx + movl %ebx,%edx + jb .L030slow_enc_tail + btl $25,52(%esp) + jnc .L031slow_enc_x86 + movq (%edi),%mm0 + movq 8(%edi),%mm4 +.align 16 +.L032slow_enc_loop_sse: + pxor (%esi),%mm0 + pxor 8(%esi),%mm4 + movl 44(%esp),%edi + call _sse_AES_encrypt_compact + movl 32(%esp),%esi + movl 36(%esp),%edi + movl 40(%esp),%ecx + movq %mm0,(%edi) + movq %mm4,8(%edi) + leal 16(%esi),%esi + movl %esi,32(%esp) + leal 16(%edi),%edx + movl %edx,36(%esp) + subl $16,%ecx + cmpl $16,%ecx + movl %ecx,40(%esp) + jae .L032slow_enc_loop_sse + testl $15,%ecx + jnz .L030slow_enc_tail + movl 48(%esp),%esi + movq %mm0,(%esi) + movq %mm4,8(%esi) + emms + movl 28(%esp),%esp + popfl + popl %edi + popl %esi + popl %ebx + popl %ebp + ret + pushfl +.align 16 +.L031slow_enc_x86: + movl (%edi),%eax + movl 4(%edi),%ebx +.align 4 +.L033slow_enc_loop_x86: + movl 8(%edi),%ecx + movl 12(%edi),%edx + xorl (%esi),%eax + xorl 4(%esi),%ebx + xorl 8(%esi),%ecx + xorl 12(%esi),%edx + movl 44(%esp),%edi + call _x86_AES_encrypt_compact + movl 32(%esp),%esi + movl 36(%esp),%edi + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + movl 40(%esp),%ecx + leal 16(%esi),%esi + movl %esi,32(%esp) + leal 16(%edi),%edx + movl %edx,36(%esp) + subl $16,%ecx + cmpl $16,%ecx + movl %ecx,40(%esp) + jae .L033slow_enc_loop_x86 + testl $15,%ecx + jnz .L030slow_enc_tail + movl 48(%esp),%esi + movl 8(%edi),%ecx + movl 12(%edi),%edx + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %ecx,8(%esi) + movl %edx,12(%esi) + movl 28(%esp),%esp + popfl + popl %edi + popl %esi + popl %ebx + popl %ebp + ret + pushfl +.align 16 +.L030slow_enc_tail: + emms + movl %edx,%edi + movl $16,%ebx + subl %ecx,%ebx + cmpl %esi,%edi + je .L034enc_in_place +.align 4 +.long 2767451785 + jmp .L035enc_skip_in_place +.L034enc_in_place: + leal (%edi,%ecx,1),%edi +.L035enc_skip_in_place: + movl %ebx,%ecx + xorl %eax,%eax +.align 4 +.long 2868115081 + movl 48(%esp),%edi + movl %edx,%esi + movl (%edi),%eax + movl 4(%edi),%ebx + movl $16,40(%esp) + jmp .L033slow_enc_loop_x86 +.align 16 +.L029slow_decrypt: + btl $25,52(%esp) + jnc .L036slow_dec_loop_x86 +.align 4 +.L037slow_dec_loop_sse: + movq (%esi),%mm0 + movq 8(%esi),%mm4 + movl 44(%esp),%edi + call _sse_AES_decrypt_compact + movl 32(%esp),%esi + leal 60(%esp),%eax + movl 36(%esp),%ebx + movl 40(%esp),%ecx + movl 48(%esp),%edi + movq (%esi),%mm1 + movq 8(%esi),%mm5 + pxor (%edi),%mm0 + pxor 8(%edi),%mm4 + movq %mm1,(%edi) + movq %mm5,8(%edi) + subl $16,%ecx + jc .L038slow_dec_partial_sse + movq %mm0,(%ebx) + movq %mm4,8(%ebx) + leal 16(%ebx),%ebx + movl %ebx,36(%esp) + leal 16(%esi),%esi + movl %esi,32(%esp) + movl %ecx,40(%esp) + jnz .L037slow_dec_loop_sse + emms + movl 28(%esp),%esp + popfl + popl %edi + popl %esi + popl %ebx + popl %ebp + ret + pushfl +.align 16 +.L038slow_dec_partial_sse: + movq %mm0,(%eax) + movq %mm4,8(%eax) + emms + addl $16,%ecx + movl %ebx,%edi + movl %eax,%esi +.align 4 +.long 2767451785 + movl 28(%esp),%esp + popfl + popl %edi + popl %esi + popl %ebx + popl %ebp + ret + pushfl +.align 16 +.L036slow_dec_loop_x86: + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + leal 60(%esp),%edi + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + movl 44(%esp),%edi + call _x86_AES_decrypt_compact + movl 48(%esp),%edi + movl 40(%esp),%esi + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + subl $16,%esi + jc .L039slow_dec_partial_x86 + movl %esi,40(%esp) + movl 36(%esp),%esi + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %ecx,8(%esi) + movl %edx,12(%esi) + leal 16(%esi),%esi + movl %esi,36(%esp) + leal 60(%esp),%esi + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + movl 32(%esp),%esi + leal 16(%esi),%esi + movl %esi,32(%esp) + jnz .L036slow_dec_loop_x86 + movl 28(%esp),%esp + popfl + popl %edi + popl %esi + popl %ebx + popl %ebp + ret + pushfl +.align 16 +.L039slow_dec_partial_x86: + leal 60(%esp),%esi + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %ecx,8(%esi) + movl %edx,12(%esi) + movl 32(%esp),%esi + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + movl 40(%esp),%ecx + movl 36(%esp),%edi + leal 60(%esp),%esi +.align 4 +.long 2767451785 + movl 28(%esp),%esp + popfl + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size asm_AES_cbc_encrypt,.-.L_asm_AES_cbc_encrypt_begin +.hidden _x86_AES_set_encrypt_key +.type _x86_AES_set_encrypt_key,@function +.align 16 +_x86_AES_set_encrypt_key: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 24(%esp),%esi + movl 32(%esp),%edi + testl $-1,%esi + jz .L040badpointer + testl $-1,%edi + jz .L040badpointer + call .L041pic_point +.L041pic_point: + popl %ebp + leal .LAES_Te-.L041pic_point(%ebp),%ebp + leal 2176(%ebp),%ebp + movl -128(%ebp),%eax + movl -96(%ebp),%ebx + movl -64(%ebp),%ecx + movl -32(%ebp),%edx + movl (%ebp),%eax + movl 32(%ebp),%ebx + movl 64(%ebp),%ecx + movl 96(%ebp),%edx + movl 28(%esp),%ecx + cmpl $128,%ecx + je .L04210rounds + cmpl $192,%ecx + je .L04312rounds + cmpl $256,%ecx + je .L04414rounds + movl $-2,%eax + jmp .L045exit +.L04210rounds: + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + xorl %ecx,%ecx + jmp .L04610shortcut +.align 4 +.L04710loop: + movl (%edi),%eax + movl 12(%edi),%edx +.L04610shortcut: + movzbl %dl,%esi + movzbl -128(%ebp,%esi,1),%ebx + movzbl %dh,%esi + shll $24,%ebx + xorl %ebx,%eax + movzbl -128(%ebp,%esi,1),%ebx + shrl $16,%edx + movzbl %dl,%esi + xorl %ebx,%eax + movzbl -128(%ebp,%esi,1),%ebx + movzbl %dh,%esi + shll $8,%ebx + xorl %ebx,%eax + movzbl -128(%ebp,%esi,1),%ebx + shll $16,%ebx + xorl %ebx,%eax + xorl 896(%ebp,%ecx,4),%eax + movl %eax,16(%edi) + xorl 4(%edi),%eax + movl %eax,20(%edi) + xorl 8(%edi),%eax + movl %eax,24(%edi) + xorl 12(%edi),%eax + movl %eax,28(%edi) + incl %ecx + addl $16,%edi + cmpl $10,%ecx + jl .L04710loop + movl $10,80(%edi) + xorl %eax,%eax + jmp .L045exit +.L04312rounds: + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + movl 16(%esi),%ecx + movl 20(%esi),%edx + movl %ecx,16(%edi) + movl %edx,20(%edi) + xorl %ecx,%ecx + jmp .L04812shortcut +.align 4 +.L04912loop: + movl (%edi),%eax + movl 20(%edi),%edx +.L04812shortcut: + movzbl %dl,%esi + movzbl -128(%ebp,%esi,1),%ebx + movzbl %dh,%esi + shll $24,%ebx + xorl %ebx,%eax + movzbl -128(%ebp,%esi,1),%ebx + shrl $16,%edx + movzbl %dl,%esi + xorl %ebx,%eax + movzbl -128(%ebp,%esi,1),%ebx + movzbl %dh,%esi + shll $8,%ebx + xorl %ebx,%eax + movzbl -128(%ebp,%esi,1),%ebx + shll $16,%ebx + xorl %ebx,%eax + xorl 896(%ebp,%ecx,4),%eax + movl %eax,24(%edi) + xorl 4(%edi),%eax + movl %eax,28(%edi) + xorl 8(%edi),%eax + movl %eax,32(%edi) + xorl 12(%edi),%eax + movl %eax,36(%edi) + cmpl $7,%ecx + je .L05012break + incl %ecx + xorl 16(%edi),%eax + movl %eax,40(%edi) + xorl 20(%edi),%eax + movl %eax,44(%edi) + addl $24,%edi + jmp .L04912loop +.L05012break: + movl $12,72(%edi) + xorl %eax,%eax + jmp .L045exit +.L04414rounds: + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + movl 16(%esi),%eax + movl 20(%esi),%ebx + movl 24(%esi),%ecx + movl 28(%esi),%edx + movl %eax,16(%edi) + movl %ebx,20(%edi) + movl %ecx,24(%edi) + movl %edx,28(%edi) + xorl %ecx,%ecx + jmp .L05114shortcut +.align 4 +.L05214loop: + movl 28(%edi),%edx +.L05114shortcut: + movl (%edi),%eax + movzbl %dl,%esi + movzbl -128(%ebp,%esi,1),%ebx + movzbl %dh,%esi + shll $24,%ebx + xorl %ebx,%eax + movzbl -128(%ebp,%esi,1),%ebx + shrl $16,%edx + movzbl %dl,%esi + xorl %ebx,%eax + movzbl -128(%ebp,%esi,1),%ebx + movzbl %dh,%esi + shll $8,%ebx + xorl %ebx,%eax + movzbl -128(%ebp,%esi,1),%ebx + shll $16,%ebx + xorl %ebx,%eax + xorl 896(%ebp,%ecx,4),%eax + movl %eax,32(%edi) + xorl 4(%edi),%eax + movl %eax,36(%edi) + xorl 8(%edi),%eax + movl %eax,40(%edi) + xorl 12(%edi),%eax + movl %eax,44(%edi) + cmpl $6,%ecx + je .L05314break + incl %ecx + movl %eax,%edx + movl 16(%edi),%eax + movzbl %dl,%esi + movzbl -128(%ebp,%esi,1),%ebx + movzbl %dh,%esi + xorl %ebx,%eax + movzbl -128(%ebp,%esi,1),%ebx + shrl $16,%edx + shll $8,%ebx + movzbl %dl,%esi + xorl %ebx,%eax + movzbl -128(%ebp,%esi,1),%ebx + movzbl %dh,%esi + shll $16,%ebx + xorl %ebx,%eax + movzbl -128(%ebp,%esi,1),%ebx + shll $24,%ebx + xorl %ebx,%eax + movl %eax,48(%edi) + xorl 20(%edi),%eax + movl %eax,52(%edi) + xorl 24(%edi),%eax + movl %eax,56(%edi) + xorl 28(%edi),%eax + movl %eax,60(%edi) + addl $32,%edi + jmp .L05214loop +.L05314break: + movl $14,48(%edi) + xorl %eax,%eax + jmp .L045exit +.L040badpointer: + movl $-1,%eax +.L045exit: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size _x86_AES_set_encrypt_key,.-_x86_AES_set_encrypt_key +.globl asm_AES_set_encrypt_key +.hidden asm_AES_set_encrypt_key +.type asm_AES_set_encrypt_key,@function +.align 16 +asm_AES_set_encrypt_key: +.L_asm_AES_set_encrypt_key_begin: + call _x86_AES_set_encrypt_key + ret +.size asm_AES_set_encrypt_key,.-.L_asm_AES_set_encrypt_key_begin +.globl asm_AES_set_decrypt_key +.hidden asm_AES_set_decrypt_key +.type asm_AES_set_decrypt_key,@function +.align 16 +asm_AES_set_decrypt_key: +.L_asm_AES_set_decrypt_key_begin: + call _x86_AES_set_encrypt_key + cmpl $0,%eax + je .L054proceed + ret +.L054proceed: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 28(%esp),%esi + movl 240(%esi),%ecx + leal (,%ecx,4),%ecx + leal (%esi,%ecx,4),%edi +.align 4 +.L055invert: + movl (%esi),%eax + movl 4(%esi),%ebx + movl (%edi),%ecx + movl 4(%edi),%edx + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,(%esi) + movl %edx,4(%esi) + movl 8(%esi),%eax + movl 12(%esi),%ebx + movl 8(%edi),%ecx + movl 12(%edi),%edx + movl %eax,8(%edi) + movl %ebx,12(%edi) + movl %ecx,8(%esi) + movl %edx,12(%esi) + addl $16,%esi + subl $16,%edi + cmpl %edi,%esi + jne .L055invert + movl 28(%esp),%edi + movl 240(%edi),%esi + leal -2(%esi,%esi,1),%esi + leal (%edi,%esi,8),%esi + movl %esi,28(%esp) + movl 16(%edi),%eax +.align 4 +.L056permute: + addl $16,%edi + movl $2155905152,%ebp + andl %eax,%ebp + leal (%eax,%eax,1),%ebx + movl %ebp,%esi + shrl $7,%ebp + subl %ebp,%esi + andl $4278124286,%ebx + andl $454761243,%esi + xorl %esi,%ebx + movl $2155905152,%ebp + andl %ebx,%ebp + leal (%ebx,%ebx,1),%ecx + movl %ebp,%esi + shrl $7,%ebp + subl %ebp,%esi + andl $4278124286,%ecx + andl $454761243,%esi + xorl %eax,%ebx + xorl %esi,%ecx + movl $2155905152,%ebp + andl %ecx,%ebp + leal (%ecx,%ecx,1),%edx + movl %ebp,%esi + shrl $7,%ebp + xorl %eax,%ecx + subl %ebp,%esi + andl $4278124286,%edx + andl $454761243,%esi + roll $8,%eax + xorl %esi,%edx + movl 4(%edi),%ebp + xorl %ebx,%eax + xorl %edx,%ebx + xorl %ecx,%eax + roll $24,%ebx + xorl %edx,%ecx + xorl %edx,%eax + roll $16,%ecx + xorl %ebx,%eax + roll $8,%edx + xorl %ecx,%eax + movl %ebp,%ebx + xorl %edx,%eax + movl %eax,(%edi) + movl $2155905152,%ebp + andl %ebx,%ebp + leal (%ebx,%ebx,1),%ecx + movl %ebp,%esi + shrl $7,%ebp + subl %ebp,%esi + andl $4278124286,%ecx + andl $454761243,%esi + xorl %esi,%ecx + movl $2155905152,%ebp + andl %ecx,%ebp + leal (%ecx,%ecx,1),%edx + movl %ebp,%esi + shrl $7,%ebp + subl %ebp,%esi + andl $4278124286,%edx + andl $454761243,%esi + xorl %ebx,%ecx + xorl %esi,%edx + movl $2155905152,%ebp + andl %edx,%ebp + leal (%edx,%edx,1),%eax + movl %ebp,%esi + shrl $7,%ebp + xorl %ebx,%edx + subl %ebp,%esi + andl $4278124286,%eax + andl $454761243,%esi + roll $8,%ebx + xorl %esi,%eax + movl 8(%edi),%ebp + xorl %ecx,%ebx + xorl %eax,%ecx + xorl %edx,%ebx + roll $24,%ecx + xorl %eax,%edx + xorl %eax,%ebx + roll $16,%edx + xorl %ecx,%ebx + roll $8,%eax + xorl %edx,%ebx + movl %ebp,%ecx + xorl %eax,%ebx + movl %ebx,4(%edi) + movl $2155905152,%ebp + andl %ecx,%ebp + leal (%ecx,%ecx,1),%edx + movl %ebp,%esi + shrl $7,%ebp + subl %ebp,%esi + andl $4278124286,%edx + andl $454761243,%esi + xorl %esi,%edx + movl $2155905152,%ebp + andl %edx,%ebp + leal (%edx,%edx,1),%eax + movl %ebp,%esi + shrl $7,%ebp + subl %ebp,%esi + andl $4278124286,%eax + andl $454761243,%esi + xorl %ecx,%edx + xorl %esi,%eax + movl $2155905152,%ebp + andl %eax,%ebp + leal (%eax,%eax,1),%ebx + movl %ebp,%esi + shrl $7,%ebp + xorl %ecx,%eax + subl %ebp,%esi + andl $4278124286,%ebx + andl $454761243,%esi + roll $8,%ecx + xorl %esi,%ebx + movl 12(%edi),%ebp + xorl %edx,%ecx + xorl %ebx,%edx + xorl %eax,%ecx + roll $24,%edx + xorl %ebx,%eax + xorl %ebx,%ecx + roll $16,%eax + xorl %edx,%ecx + roll $8,%ebx + xorl %eax,%ecx + movl %ebp,%edx + xorl %ebx,%ecx + movl %ecx,8(%edi) + movl $2155905152,%ebp + andl %edx,%ebp + leal (%edx,%edx,1),%eax + movl %ebp,%esi + shrl $7,%ebp + subl %ebp,%esi + andl $4278124286,%eax + andl $454761243,%esi + xorl %esi,%eax + movl $2155905152,%ebp + andl %eax,%ebp + leal (%eax,%eax,1),%ebx + movl %ebp,%esi + shrl $7,%ebp + subl %ebp,%esi + andl $4278124286,%ebx + andl $454761243,%esi + xorl %edx,%eax + xorl %esi,%ebx + movl $2155905152,%ebp + andl %ebx,%ebp + leal (%ebx,%ebx,1),%ecx + movl %ebp,%esi + shrl $7,%ebp + xorl %edx,%ebx + subl %ebp,%esi + andl $4278124286,%ecx + andl $454761243,%esi + roll $8,%edx + xorl %esi,%ecx + movl 16(%edi),%ebp + xorl %eax,%edx + xorl %ecx,%eax + xorl %ebx,%edx + roll $24,%eax + xorl %ecx,%ebx + xorl %ecx,%edx + roll $16,%ebx + xorl %eax,%edx + roll $8,%ecx + xorl %ebx,%edx + movl %ebp,%eax + xorl %ecx,%edx + movl %edx,12(%edi) + cmpl 28(%esp),%edi + jb .L056permute + xorl %eax,%eax + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size asm_AES_set_decrypt_key,.-.L_asm_AES_set_decrypt_key_begin +.byte 65,69,83,32,102,111,114,32,120,56,54,44,32,67,82,89 +.byte 80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114 +.byte 111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +#endif diff --git a/third_party/boringssl/linux-x86/crypto/aes/aesni-x86.S b/third_party/boringssl/linux-x86/crypto/aes/aesni-x86.S new file mode 100644 index 00000000000..aec110d4b11 --- /dev/null +++ b/third_party/boringssl/linux-x86/crypto/aes/aesni-x86.S @@ -0,0 +1,2469 @@ +#if defined(__i386__) +.file "src/crypto/aes/asm/aesni-x86.S" +.text +.globl aesni_encrypt +.hidden aesni_encrypt +.type aesni_encrypt,@function +.align 16 +aesni_encrypt: +.L_aesni_encrypt_begin: + movl 4(%esp),%eax + movl 12(%esp),%edx + movups (%eax),%xmm2 + movl 240(%edx),%ecx + movl 8(%esp),%eax + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L000enc1_loop_1: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L000enc1_loop_1 +.byte 102,15,56,221,209 + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + movups %xmm2,(%eax) + pxor %xmm2,%xmm2 + ret +.size aesni_encrypt,.-.L_aesni_encrypt_begin +.globl aesni_decrypt +.hidden aesni_decrypt +.type aesni_decrypt,@function +.align 16 +aesni_decrypt: +.L_aesni_decrypt_begin: + movl 4(%esp),%eax + movl 12(%esp),%edx + movups (%eax),%xmm2 + movl 240(%edx),%ecx + movl 8(%esp),%eax + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L001dec1_loop_2: +.byte 102,15,56,222,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L001dec1_loop_2 +.byte 102,15,56,223,209 + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + movups %xmm2,(%eax) + pxor %xmm2,%xmm2 + ret +.size aesni_decrypt,.-.L_aesni_decrypt_begin +.hidden _aesni_encrypt2 +.type _aesni_encrypt2,@function +.align 16 +_aesni_encrypt2: + movups (%edx),%xmm0 + shll $4,%ecx + movups 16(%edx),%xmm1 + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + movups 32(%edx),%xmm0 + leal 32(%edx,%ecx,1),%edx + negl %ecx + addl $16,%ecx +.L002enc2_loop: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + movups -16(%edx,%ecx,1),%xmm0 + jnz .L002enc2_loop +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 + ret +.size _aesni_encrypt2,.-_aesni_encrypt2 +.hidden _aesni_decrypt2 +.type _aesni_decrypt2,@function +.align 16 +_aesni_decrypt2: + movups (%edx),%xmm0 + shll $4,%ecx + movups 16(%edx),%xmm1 + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + movups 32(%edx),%xmm0 + leal 32(%edx,%ecx,1),%edx + negl %ecx + addl $16,%ecx +.L003dec2_loop: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 + movups -16(%edx,%ecx,1),%xmm0 + jnz .L003dec2_loop +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 + ret +.size _aesni_decrypt2,.-_aesni_decrypt2 +.hidden _aesni_encrypt3 +.type _aesni_encrypt3,@function +.align 16 +_aesni_encrypt3: + movups (%edx),%xmm0 + shll $4,%ecx + movups 16(%edx),%xmm1 + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + pxor %xmm0,%xmm4 + movups 32(%edx),%xmm0 + leal 32(%edx,%ecx,1),%edx + negl %ecx + addl $16,%ecx +.L004enc3_loop: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 + movups -16(%edx,%ecx,1),%xmm0 + jnz .L004enc3_loop +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 +.byte 102,15,56,221,224 + ret +.size _aesni_encrypt3,.-_aesni_encrypt3 +.hidden _aesni_decrypt3 +.type _aesni_decrypt3,@function +.align 16 +_aesni_decrypt3: + movups (%edx),%xmm0 + shll $4,%ecx + movups 16(%edx),%xmm1 + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + pxor %xmm0,%xmm4 + movups 32(%edx),%xmm0 + leal 32(%edx,%ecx,1),%edx + negl %ecx + addl $16,%ecx +.L005dec3_loop: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 + movups -16(%edx,%ecx,1),%xmm0 + jnz .L005dec3_loop +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 +.byte 102,15,56,223,224 + ret +.size _aesni_decrypt3,.-_aesni_decrypt3 +.hidden _aesni_encrypt4 +.type _aesni_encrypt4,@function +.align 16 +_aesni_encrypt4: + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + shll $4,%ecx + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + pxor %xmm0,%xmm4 + pxor %xmm0,%xmm5 + movups 32(%edx),%xmm0 + leal 32(%edx,%ecx,1),%edx + negl %ecx +.byte 15,31,64,0 + addl $16,%ecx +.L006enc4_loop: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + movups -16(%edx,%ecx,1),%xmm0 + jnz .L006enc4_loop +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 +.byte 102,15,56,221,224 +.byte 102,15,56,221,232 + ret +.size _aesni_encrypt4,.-_aesni_encrypt4 +.hidden _aesni_decrypt4 +.type _aesni_decrypt4,@function +.align 16 +_aesni_decrypt4: + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + shll $4,%ecx + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + pxor %xmm0,%xmm4 + pxor %xmm0,%xmm5 + movups 32(%edx),%xmm0 + leal 32(%edx,%ecx,1),%edx + negl %ecx +.byte 15,31,64,0 + addl $16,%ecx +.L007dec4_loop: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 + movups -16(%edx,%ecx,1),%xmm0 + jnz .L007dec4_loop +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 +.byte 102,15,56,223,224 +.byte 102,15,56,223,232 + ret +.size _aesni_decrypt4,.-_aesni_decrypt4 +.hidden _aesni_encrypt6 +.type _aesni_encrypt6,@function +.align 16 +_aesni_encrypt6: + movups (%edx),%xmm0 + shll $4,%ecx + movups 16(%edx),%xmm1 + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + pxor %xmm0,%xmm4 +.byte 102,15,56,220,209 + pxor %xmm0,%xmm5 + pxor %xmm0,%xmm6 +.byte 102,15,56,220,217 + leal 32(%edx,%ecx,1),%edx + negl %ecx +.byte 102,15,56,220,225 + pxor %xmm0,%xmm7 + movups (%edx,%ecx,1),%xmm0 + addl $16,%ecx + jmp .L008_aesni_encrypt6_inner +.align 16 +.L009enc6_loop: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.L008_aesni_encrypt6_inner: +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.L_aesni_encrypt6_enter: + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 + movups -16(%edx,%ecx,1),%xmm0 + jnz .L009enc6_loop +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 +.byte 102,15,56,221,224 +.byte 102,15,56,221,232 +.byte 102,15,56,221,240 +.byte 102,15,56,221,248 + ret +.size _aesni_encrypt6,.-_aesni_encrypt6 +.hidden _aesni_decrypt6 +.type _aesni_decrypt6,@function +.align 16 +_aesni_decrypt6: + movups (%edx),%xmm0 + shll $4,%ecx + movups 16(%edx),%xmm1 + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + pxor %xmm0,%xmm4 +.byte 102,15,56,222,209 + pxor %xmm0,%xmm5 + pxor %xmm0,%xmm6 +.byte 102,15,56,222,217 + leal 32(%edx,%ecx,1),%edx + negl %ecx +.byte 102,15,56,222,225 + pxor %xmm0,%xmm7 + movups (%edx,%ecx,1),%xmm0 + addl $16,%ecx + jmp .L010_aesni_decrypt6_inner +.align 16 +.L011dec6_loop: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.L010_aesni_decrypt6_inner: +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.L_aesni_decrypt6_enter: + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 + movups -16(%edx,%ecx,1),%xmm0 + jnz .L011dec6_loop +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 +.byte 102,15,56,223,224 +.byte 102,15,56,223,232 +.byte 102,15,56,223,240 +.byte 102,15,56,223,248 + ret +.size _aesni_decrypt6,.-_aesni_decrypt6 +.globl aesni_ecb_encrypt +.hidden aesni_ecb_encrypt +.type aesni_ecb_encrypt,@function +.align 16 +aesni_ecb_encrypt: +.L_aesni_ecb_encrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl 32(%esp),%edx + movl 36(%esp),%ebx + andl $-16,%eax + jz .L012ecb_ret + movl 240(%edx),%ecx + testl %ebx,%ebx + jz .L013ecb_decrypt + movl %edx,%ebp + movl %ecx,%ebx + cmpl $96,%eax + jb .L014ecb_enc_tail + movdqu (%esi),%xmm2 + movdqu 16(%esi),%xmm3 + movdqu 32(%esi),%xmm4 + movdqu 48(%esi),%xmm5 + movdqu 64(%esi),%xmm6 + movdqu 80(%esi),%xmm7 + leal 96(%esi),%esi + subl $96,%eax + jmp .L015ecb_enc_loop6_enter +.align 16 +.L016ecb_enc_loop6: + movups %xmm2,(%edi) + movdqu (%esi),%xmm2 + movups %xmm3,16(%edi) + movdqu 16(%esi),%xmm3 + movups %xmm4,32(%edi) + movdqu 32(%esi),%xmm4 + movups %xmm5,48(%edi) + movdqu 48(%esi),%xmm5 + movups %xmm6,64(%edi) + movdqu 64(%esi),%xmm6 + movups %xmm7,80(%edi) + leal 96(%edi),%edi + movdqu 80(%esi),%xmm7 + leal 96(%esi),%esi +.L015ecb_enc_loop6_enter: + call _aesni_encrypt6 + movl %ebp,%edx + movl %ebx,%ecx + subl $96,%eax + jnc .L016ecb_enc_loop6 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + movups %xmm6,64(%edi) + movups %xmm7,80(%edi) + leal 96(%edi),%edi + addl $96,%eax + jz .L012ecb_ret +.L014ecb_enc_tail: + movups (%esi),%xmm2 + cmpl $32,%eax + jb .L017ecb_enc_one + movups 16(%esi),%xmm3 + je .L018ecb_enc_two + movups 32(%esi),%xmm4 + cmpl $64,%eax + jb .L019ecb_enc_three + movups 48(%esi),%xmm5 + je .L020ecb_enc_four + movups 64(%esi),%xmm6 + xorps %xmm7,%xmm7 + call _aesni_encrypt6 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + movups %xmm6,64(%edi) + jmp .L012ecb_ret +.align 16 +.L017ecb_enc_one: + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L021enc1_loop_3: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L021enc1_loop_3 +.byte 102,15,56,221,209 + movups %xmm2,(%edi) + jmp .L012ecb_ret +.align 16 +.L018ecb_enc_two: + call _aesni_encrypt2 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + jmp .L012ecb_ret +.align 16 +.L019ecb_enc_three: + call _aesni_encrypt3 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + jmp .L012ecb_ret +.align 16 +.L020ecb_enc_four: + call _aesni_encrypt4 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + jmp .L012ecb_ret +.align 16 +.L013ecb_decrypt: + movl %edx,%ebp + movl %ecx,%ebx + cmpl $96,%eax + jb .L022ecb_dec_tail + movdqu (%esi),%xmm2 + movdqu 16(%esi),%xmm3 + movdqu 32(%esi),%xmm4 + movdqu 48(%esi),%xmm5 + movdqu 64(%esi),%xmm6 + movdqu 80(%esi),%xmm7 + leal 96(%esi),%esi + subl $96,%eax + jmp .L023ecb_dec_loop6_enter +.align 16 +.L024ecb_dec_loop6: + movups %xmm2,(%edi) + movdqu (%esi),%xmm2 + movups %xmm3,16(%edi) + movdqu 16(%esi),%xmm3 + movups %xmm4,32(%edi) + movdqu 32(%esi),%xmm4 + movups %xmm5,48(%edi) + movdqu 48(%esi),%xmm5 + movups %xmm6,64(%edi) + movdqu 64(%esi),%xmm6 + movups %xmm7,80(%edi) + leal 96(%edi),%edi + movdqu 80(%esi),%xmm7 + leal 96(%esi),%esi +.L023ecb_dec_loop6_enter: + call _aesni_decrypt6 + movl %ebp,%edx + movl %ebx,%ecx + subl $96,%eax + jnc .L024ecb_dec_loop6 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + movups %xmm6,64(%edi) + movups %xmm7,80(%edi) + leal 96(%edi),%edi + addl $96,%eax + jz .L012ecb_ret +.L022ecb_dec_tail: + movups (%esi),%xmm2 + cmpl $32,%eax + jb .L025ecb_dec_one + movups 16(%esi),%xmm3 + je .L026ecb_dec_two + movups 32(%esi),%xmm4 + cmpl $64,%eax + jb .L027ecb_dec_three + movups 48(%esi),%xmm5 + je .L028ecb_dec_four + movups 64(%esi),%xmm6 + xorps %xmm7,%xmm7 + call _aesni_decrypt6 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + movups %xmm6,64(%edi) + jmp .L012ecb_ret +.align 16 +.L025ecb_dec_one: + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L029dec1_loop_4: +.byte 102,15,56,222,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L029dec1_loop_4 +.byte 102,15,56,223,209 + movups %xmm2,(%edi) + jmp .L012ecb_ret +.align 16 +.L026ecb_dec_two: + call _aesni_decrypt2 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + jmp .L012ecb_ret +.align 16 +.L027ecb_dec_three: + call _aesni_decrypt3 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + jmp .L012ecb_ret +.align 16 +.L028ecb_dec_four: + call _aesni_decrypt4 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) +.L012ecb_ret: + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size aesni_ecb_encrypt,.-.L_aesni_ecb_encrypt_begin +.globl aesni_ccm64_encrypt_blocks +.hidden aesni_ccm64_encrypt_blocks +.type aesni_ccm64_encrypt_blocks,@function +.align 16 +aesni_ccm64_encrypt_blocks: +.L_aesni_ccm64_encrypt_blocks_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl 32(%esp),%edx + movl 36(%esp),%ebx + movl 40(%esp),%ecx + movl %esp,%ebp + subl $60,%esp + andl $-16,%esp + movl %ebp,48(%esp) + movdqu (%ebx),%xmm7 + movdqu (%ecx),%xmm3 + movl 240(%edx),%ecx + movl $202182159,(%esp) + movl $134810123,4(%esp) + movl $67438087,8(%esp) + movl $66051,12(%esp) + movl $1,%ebx + xorl %ebp,%ebp + movl %ebx,16(%esp) + movl %ebp,20(%esp) + movl %ebp,24(%esp) + movl %ebp,28(%esp) + shll $4,%ecx + movl $16,%ebx + leal (%edx),%ebp + movdqa (%esp),%xmm5 + movdqa %xmm7,%xmm2 + leal 32(%edx,%ecx,1),%edx + subl %ecx,%ebx +.byte 102,15,56,0,253 +.L030ccm64_enc_outer: + movups (%ebp),%xmm0 + movl %ebx,%ecx + movups (%esi),%xmm6 + xorps %xmm0,%xmm2 + movups 16(%ebp),%xmm1 + xorps %xmm6,%xmm0 + xorps %xmm0,%xmm3 + movups 32(%ebp),%xmm0 +.L031ccm64_enc2_loop: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + movups -16(%edx,%ecx,1),%xmm0 + jnz .L031ccm64_enc2_loop +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + paddq 16(%esp),%xmm7 + decl %eax +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 + leal 16(%esi),%esi + xorps %xmm2,%xmm6 + movdqa %xmm7,%xmm2 + movups %xmm6,(%edi) +.byte 102,15,56,0,213 + leal 16(%edi),%edi + jnz .L030ccm64_enc_outer + movl 48(%esp),%esp + movl 40(%esp),%edi + movups %xmm3,(%edi) + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size aesni_ccm64_encrypt_blocks,.-.L_aesni_ccm64_encrypt_blocks_begin +.globl aesni_ccm64_decrypt_blocks +.hidden aesni_ccm64_decrypt_blocks +.type aesni_ccm64_decrypt_blocks,@function +.align 16 +aesni_ccm64_decrypt_blocks: +.L_aesni_ccm64_decrypt_blocks_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl 32(%esp),%edx + movl 36(%esp),%ebx + movl 40(%esp),%ecx + movl %esp,%ebp + subl $60,%esp + andl $-16,%esp + movl %ebp,48(%esp) + movdqu (%ebx),%xmm7 + movdqu (%ecx),%xmm3 + movl 240(%edx),%ecx + movl $202182159,(%esp) + movl $134810123,4(%esp) + movl $67438087,8(%esp) + movl $66051,12(%esp) + movl $1,%ebx + xorl %ebp,%ebp + movl %ebx,16(%esp) + movl %ebp,20(%esp) + movl %ebp,24(%esp) + movl %ebp,28(%esp) + movdqa (%esp),%xmm5 + movdqa %xmm7,%xmm2 + movl %edx,%ebp + movl %ecx,%ebx +.byte 102,15,56,0,253 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L032enc1_loop_5: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L032enc1_loop_5 +.byte 102,15,56,221,209 + shll $4,%ebx + movl $16,%ecx + movups (%esi),%xmm6 + paddq 16(%esp),%xmm7 + leal 16(%esi),%esi + subl %ebx,%ecx + leal 32(%ebp,%ebx,1),%edx + movl %ecx,%ebx + jmp .L033ccm64_dec_outer +.align 16 +.L033ccm64_dec_outer: + xorps %xmm2,%xmm6 + movdqa %xmm7,%xmm2 + movups %xmm6,(%edi) + leal 16(%edi),%edi +.byte 102,15,56,0,213 + subl $1,%eax + jz .L034ccm64_dec_break + movups (%ebp),%xmm0 + movl %ebx,%ecx + movups 16(%ebp),%xmm1 + xorps %xmm0,%xmm6 + xorps %xmm0,%xmm2 + xorps %xmm6,%xmm3 + movups 32(%ebp),%xmm0 +.L035ccm64_dec2_loop: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + movups -16(%edx,%ecx,1),%xmm0 + jnz .L035ccm64_dec2_loop + movups (%esi),%xmm6 + paddq 16(%esp),%xmm7 +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 + leal 16(%esi),%esi + jmp .L033ccm64_dec_outer +.align 16 +.L034ccm64_dec_break: + movl 240(%ebp),%ecx + movl %ebp,%edx + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + xorps %xmm0,%xmm6 + leal 32(%edx),%edx + xorps %xmm6,%xmm3 +.L036enc1_loop_6: +.byte 102,15,56,220,217 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L036enc1_loop_6 +.byte 102,15,56,221,217 + movl 48(%esp),%esp + movl 40(%esp),%edi + movups %xmm3,(%edi) + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size aesni_ccm64_decrypt_blocks,.-.L_aesni_ccm64_decrypt_blocks_begin +.globl aesni_ctr32_encrypt_blocks +.hidden aesni_ctr32_encrypt_blocks +.type aesni_ctr32_encrypt_blocks,@function +.align 16 +aesni_ctr32_encrypt_blocks: +.L_aesni_ctr32_encrypt_blocks_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl 32(%esp),%edx + movl 36(%esp),%ebx + movl %esp,%ebp + subl $88,%esp + andl $-16,%esp + movl %ebp,80(%esp) + cmpl $1,%eax + je .L037ctr32_one_shortcut + movdqu (%ebx),%xmm7 + movl $202182159,(%esp) + movl $134810123,4(%esp) + movl $67438087,8(%esp) + movl $66051,12(%esp) + movl $6,%ecx + xorl %ebp,%ebp + movl %ecx,16(%esp) + movl %ecx,20(%esp) + movl %ecx,24(%esp) + movl %ebp,28(%esp) +.byte 102,15,58,22,251,3 +.byte 102,15,58,34,253,3 + movl 240(%edx),%ecx + bswap %ebx + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + movdqa (%esp),%xmm2 +.byte 102,15,58,34,195,0 + leal 3(%ebx),%ebp +.byte 102,15,58,34,205,0 + incl %ebx +.byte 102,15,58,34,195,1 + incl %ebp +.byte 102,15,58,34,205,1 + incl %ebx +.byte 102,15,58,34,195,2 + incl %ebp +.byte 102,15,58,34,205,2 + movdqa %xmm0,48(%esp) +.byte 102,15,56,0,194 + movdqu (%edx),%xmm6 + movdqa %xmm1,64(%esp) +.byte 102,15,56,0,202 + pshufd $192,%xmm0,%xmm2 + pshufd $128,%xmm0,%xmm3 + cmpl $6,%eax + jb .L038ctr32_tail + pxor %xmm6,%xmm7 + shll $4,%ecx + movl $16,%ebx + movdqa %xmm7,32(%esp) + movl %edx,%ebp + subl %ecx,%ebx + leal 32(%edx,%ecx,1),%edx + subl $6,%eax + jmp .L039ctr32_loop6 +.align 16 +.L039ctr32_loop6: + pshufd $64,%xmm0,%xmm4 + movdqa 32(%esp),%xmm0 + pshufd $192,%xmm1,%xmm5 + pxor %xmm0,%xmm2 + pshufd $128,%xmm1,%xmm6 + pxor %xmm0,%xmm3 + pshufd $64,%xmm1,%xmm7 + movups 16(%ebp),%xmm1 + pxor %xmm0,%xmm4 + pxor %xmm0,%xmm5 +.byte 102,15,56,220,209 + pxor %xmm0,%xmm6 + pxor %xmm0,%xmm7 +.byte 102,15,56,220,217 + movups 32(%ebp),%xmm0 + movl %ebx,%ecx +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 + call .L_aesni_encrypt6_enter + movups (%esi),%xmm1 + movups 16(%esi),%xmm0 + xorps %xmm1,%xmm2 + movups 32(%esi),%xmm1 + xorps %xmm0,%xmm3 + movups %xmm2,(%edi) + movdqa 16(%esp),%xmm0 + xorps %xmm1,%xmm4 + movdqa 64(%esp),%xmm1 + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + paddd %xmm0,%xmm1 + paddd 48(%esp),%xmm0 + movdqa (%esp),%xmm2 + movups 48(%esi),%xmm3 + movups 64(%esi),%xmm4 + xorps %xmm3,%xmm5 + movups 80(%esi),%xmm3 + leal 96(%esi),%esi + movdqa %xmm0,48(%esp) +.byte 102,15,56,0,194 + xorps %xmm4,%xmm6 + movups %xmm5,48(%edi) + xorps %xmm3,%xmm7 + movdqa %xmm1,64(%esp) +.byte 102,15,56,0,202 + movups %xmm6,64(%edi) + pshufd $192,%xmm0,%xmm2 + movups %xmm7,80(%edi) + leal 96(%edi),%edi + pshufd $128,%xmm0,%xmm3 + subl $6,%eax + jnc .L039ctr32_loop6 + addl $6,%eax + jz .L040ctr32_ret + movdqu (%ebp),%xmm7 + movl %ebp,%edx + pxor 32(%esp),%xmm7 + movl 240(%ebp),%ecx +.L038ctr32_tail: + por %xmm7,%xmm2 + cmpl $2,%eax + jb .L041ctr32_one + pshufd $64,%xmm0,%xmm4 + por %xmm7,%xmm3 + je .L042ctr32_two + pshufd $192,%xmm1,%xmm5 + por %xmm7,%xmm4 + cmpl $4,%eax + jb .L043ctr32_three + pshufd $128,%xmm1,%xmm6 + por %xmm7,%xmm5 + je .L044ctr32_four + por %xmm7,%xmm6 + call _aesni_encrypt6 + movups (%esi),%xmm1 + movups 16(%esi),%xmm0 + xorps %xmm1,%xmm2 + movups 32(%esi),%xmm1 + xorps %xmm0,%xmm3 + movups 48(%esi),%xmm0 + xorps %xmm1,%xmm4 + movups 64(%esi),%xmm1 + xorps %xmm0,%xmm5 + movups %xmm2,(%edi) + xorps %xmm1,%xmm6 + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + movups %xmm6,64(%edi) + jmp .L040ctr32_ret +.align 16 +.L037ctr32_one_shortcut: + movups (%ebx),%xmm2 + movl 240(%edx),%ecx +.L041ctr32_one: + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L045enc1_loop_7: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L045enc1_loop_7 +.byte 102,15,56,221,209 + movups (%esi),%xmm6 + xorps %xmm2,%xmm6 + movups %xmm6,(%edi) + jmp .L040ctr32_ret +.align 16 +.L042ctr32_two: + call _aesni_encrypt2 + movups (%esi),%xmm5 + movups 16(%esi),%xmm6 + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + jmp .L040ctr32_ret +.align 16 +.L043ctr32_three: + call _aesni_encrypt3 + movups (%esi),%xmm5 + movups 16(%esi),%xmm6 + xorps %xmm5,%xmm2 + movups 32(%esi),%xmm7 + xorps %xmm6,%xmm3 + movups %xmm2,(%edi) + xorps %xmm7,%xmm4 + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + jmp .L040ctr32_ret +.align 16 +.L044ctr32_four: + call _aesni_encrypt4 + movups (%esi),%xmm6 + movups 16(%esi),%xmm7 + movups 32(%esi),%xmm1 + xorps %xmm6,%xmm2 + movups 48(%esi),%xmm0 + xorps %xmm7,%xmm3 + movups %xmm2,(%edi) + xorps %xmm1,%xmm4 + movups %xmm3,16(%edi) + xorps %xmm0,%xmm5 + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) +.L040ctr32_ret: + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + movdqa %xmm0,32(%esp) + pxor %xmm5,%xmm5 + movdqa %xmm0,48(%esp) + pxor %xmm6,%xmm6 + movdqa %xmm0,64(%esp) + pxor %xmm7,%xmm7 + movl 80(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size aesni_ctr32_encrypt_blocks,.-.L_aesni_ctr32_encrypt_blocks_begin +.globl aesni_xts_encrypt +.hidden aesni_xts_encrypt +.type aesni_xts_encrypt,@function +.align 16 +aesni_xts_encrypt: +.L_aesni_xts_encrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 36(%esp),%edx + movl 40(%esp),%esi + movl 240(%edx),%ecx + movups (%esi),%xmm2 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L046enc1_loop_8: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L046enc1_loop_8 +.byte 102,15,56,221,209 + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl 32(%esp),%edx + movl %esp,%ebp + subl $120,%esp + movl 240(%edx),%ecx + andl $-16,%esp + movl $135,96(%esp) + movl $0,100(%esp) + movl $1,104(%esp) + movl $0,108(%esp) + movl %eax,112(%esp) + movl %ebp,116(%esp) + movdqa %xmm2,%xmm1 + pxor %xmm0,%xmm0 + movdqa 96(%esp),%xmm3 + pcmpgtd %xmm1,%xmm0 + andl $-16,%eax + movl %edx,%ebp + movl %ecx,%ebx + subl $96,%eax + jc .L047xts_enc_short + shll $4,%ecx + movl $16,%ebx + subl %ecx,%ebx + leal 32(%edx,%ecx,1),%edx + jmp .L048xts_enc_loop6 +.align 16 +.L048xts_enc_loop6: + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,16(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,32(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,48(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm7 + movdqa %xmm1,64(%esp) + paddq %xmm1,%xmm1 + movups (%ebp),%xmm0 + pand %xmm3,%xmm7 + movups (%esi),%xmm2 + pxor %xmm1,%xmm7 + movl %ebx,%ecx + movdqu 16(%esi),%xmm3 + xorps %xmm0,%xmm2 + movdqu 32(%esi),%xmm4 + pxor %xmm0,%xmm3 + movdqu 48(%esi),%xmm5 + pxor %xmm0,%xmm4 + movdqu 64(%esi),%xmm6 + pxor %xmm0,%xmm5 + movdqu 80(%esi),%xmm1 + pxor %xmm0,%xmm6 + leal 96(%esi),%esi + pxor (%esp),%xmm2 + movdqa %xmm7,80(%esp) + pxor %xmm1,%xmm7 + movups 16(%ebp),%xmm1 + pxor 16(%esp),%xmm3 + pxor 32(%esp),%xmm4 +.byte 102,15,56,220,209 + pxor 48(%esp),%xmm5 + pxor 64(%esp),%xmm6 +.byte 102,15,56,220,217 + pxor %xmm0,%xmm7 + movups 32(%ebp),%xmm0 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 + call .L_aesni_encrypt6_enter + movdqa 80(%esp),%xmm1 + pxor %xmm0,%xmm0 + xorps (%esp),%xmm2 + pcmpgtd %xmm1,%xmm0 + xorps 16(%esp),%xmm3 + movups %xmm2,(%edi) + xorps 32(%esp),%xmm4 + movups %xmm3,16(%edi) + xorps 48(%esp),%xmm5 + movups %xmm4,32(%edi) + xorps 64(%esp),%xmm6 + movups %xmm5,48(%edi) + xorps %xmm1,%xmm7 + movups %xmm6,64(%edi) + pshufd $19,%xmm0,%xmm2 + movups %xmm7,80(%edi) + leal 96(%edi),%edi + movdqa 96(%esp),%xmm3 + pxor %xmm0,%xmm0 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + subl $96,%eax + jnc .L048xts_enc_loop6 + movl 240(%ebp),%ecx + movl %ebp,%edx + movl %ecx,%ebx +.L047xts_enc_short: + addl $96,%eax + jz .L049xts_enc_done6x + movdqa %xmm1,%xmm5 + cmpl $32,%eax + jb .L050xts_enc_one + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + je .L051xts_enc_two + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,%xmm6 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + cmpl $64,%eax + jb .L052xts_enc_three + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,%xmm7 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + movdqa %xmm5,(%esp) + movdqa %xmm6,16(%esp) + je .L053xts_enc_four + movdqa %xmm7,32(%esp) + pshufd $19,%xmm0,%xmm7 + movdqa %xmm1,48(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm7 + pxor %xmm1,%xmm7 + movdqu (%esi),%xmm2 + movdqu 16(%esi),%xmm3 + movdqu 32(%esi),%xmm4 + pxor (%esp),%xmm2 + movdqu 48(%esi),%xmm5 + pxor 16(%esp),%xmm3 + movdqu 64(%esi),%xmm6 + pxor 32(%esp),%xmm4 + leal 80(%esi),%esi + pxor 48(%esp),%xmm5 + movdqa %xmm7,64(%esp) + pxor %xmm7,%xmm6 + call _aesni_encrypt6 + movaps 64(%esp),%xmm1 + xorps (%esp),%xmm2 + xorps 16(%esp),%xmm3 + xorps 32(%esp),%xmm4 + movups %xmm2,(%edi) + xorps 48(%esp),%xmm5 + movups %xmm3,16(%edi) + xorps %xmm1,%xmm6 + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + movups %xmm6,64(%edi) + leal 80(%edi),%edi + jmp .L054xts_enc_done +.align 16 +.L050xts_enc_one: + movups (%esi),%xmm2 + leal 16(%esi),%esi + xorps %xmm5,%xmm2 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L055enc1_loop_9: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L055enc1_loop_9 +.byte 102,15,56,221,209 + xorps %xmm5,%xmm2 + movups %xmm2,(%edi) + leal 16(%edi),%edi + movdqa %xmm5,%xmm1 + jmp .L054xts_enc_done +.align 16 +.L051xts_enc_two: + movaps %xmm1,%xmm6 + movups (%esi),%xmm2 + movups 16(%esi),%xmm3 + leal 32(%esi),%esi + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + call _aesni_encrypt2 + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + leal 32(%edi),%edi + movdqa %xmm6,%xmm1 + jmp .L054xts_enc_done +.align 16 +.L052xts_enc_three: + movaps %xmm1,%xmm7 + movups (%esi),%xmm2 + movups 16(%esi),%xmm3 + movups 32(%esi),%xmm4 + leal 48(%esi),%esi + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + xorps %xmm7,%xmm4 + call _aesni_encrypt3 + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + xorps %xmm7,%xmm4 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + leal 48(%edi),%edi + movdqa %xmm7,%xmm1 + jmp .L054xts_enc_done +.align 16 +.L053xts_enc_four: + movaps %xmm1,%xmm6 + movups (%esi),%xmm2 + movups 16(%esi),%xmm3 + movups 32(%esi),%xmm4 + xorps (%esp),%xmm2 + movups 48(%esi),%xmm5 + leal 64(%esi),%esi + xorps 16(%esp),%xmm3 + xorps %xmm7,%xmm4 + xorps %xmm6,%xmm5 + call _aesni_encrypt4 + xorps (%esp),%xmm2 + xorps 16(%esp),%xmm3 + xorps %xmm7,%xmm4 + movups %xmm2,(%edi) + xorps %xmm6,%xmm5 + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + leal 64(%edi),%edi + movdqa %xmm6,%xmm1 + jmp .L054xts_enc_done +.align 16 +.L049xts_enc_done6x: + movl 112(%esp),%eax + andl $15,%eax + jz .L056xts_enc_ret + movdqa %xmm1,%xmm5 + movl %eax,112(%esp) + jmp .L057xts_enc_steal +.align 16 +.L054xts_enc_done: + movl 112(%esp),%eax + pxor %xmm0,%xmm0 + andl $15,%eax + jz .L056xts_enc_ret + pcmpgtd %xmm1,%xmm0 + movl %eax,112(%esp) + pshufd $19,%xmm0,%xmm5 + paddq %xmm1,%xmm1 + pand 96(%esp),%xmm5 + pxor %xmm1,%xmm5 +.L057xts_enc_steal: + movzbl (%esi),%ecx + movzbl -16(%edi),%edx + leal 1(%esi),%esi + movb %cl,-16(%edi) + movb %dl,(%edi) + leal 1(%edi),%edi + subl $1,%eax + jnz .L057xts_enc_steal + subl 112(%esp),%edi + movl %ebp,%edx + movl %ebx,%ecx + movups -16(%edi),%xmm2 + xorps %xmm5,%xmm2 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L058enc1_loop_10: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L058enc1_loop_10 +.byte 102,15,56,221,209 + xorps %xmm5,%xmm2 + movups %xmm2,-16(%edi) +.L056xts_enc_ret: + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + movdqa %xmm0,(%esp) + pxor %xmm3,%xmm3 + movdqa %xmm0,16(%esp) + pxor %xmm4,%xmm4 + movdqa %xmm0,32(%esp) + pxor %xmm5,%xmm5 + movdqa %xmm0,48(%esp) + pxor %xmm6,%xmm6 + movdqa %xmm0,64(%esp) + pxor %xmm7,%xmm7 + movdqa %xmm0,80(%esp) + movl 116(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size aesni_xts_encrypt,.-.L_aesni_xts_encrypt_begin +.globl aesni_xts_decrypt +.hidden aesni_xts_decrypt +.type aesni_xts_decrypt,@function +.align 16 +aesni_xts_decrypt: +.L_aesni_xts_decrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 36(%esp),%edx + movl 40(%esp),%esi + movl 240(%edx),%ecx + movups (%esi),%xmm2 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L059enc1_loop_11: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L059enc1_loop_11 +.byte 102,15,56,221,209 + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl 32(%esp),%edx + movl %esp,%ebp + subl $120,%esp + andl $-16,%esp + xorl %ebx,%ebx + testl $15,%eax + setnz %bl + shll $4,%ebx + subl %ebx,%eax + movl $135,96(%esp) + movl $0,100(%esp) + movl $1,104(%esp) + movl $0,108(%esp) + movl %eax,112(%esp) + movl %ebp,116(%esp) + movl 240(%edx),%ecx + movl %edx,%ebp + movl %ecx,%ebx + movdqa %xmm2,%xmm1 + pxor %xmm0,%xmm0 + movdqa 96(%esp),%xmm3 + pcmpgtd %xmm1,%xmm0 + andl $-16,%eax + subl $96,%eax + jc .L060xts_dec_short + shll $4,%ecx + movl $16,%ebx + subl %ecx,%ebx + leal 32(%edx,%ecx,1),%edx + jmp .L061xts_dec_loop6 +.align 16 +.L061xts_dec_loop6: + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,16(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,32(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,48(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm7 + movdqa %xmm1,64(%esp) + paddq %xmm1,%xmm1 + movups (%ebp),%xmm0 + pand %xmm3,%xmm7 + movups (%esi),%xmm2 + pxor %xmm1,%xmm7 + movl %ebx,%ecx + movdqu 16(%esi),%xmm3 + xorps %xmm0,%xmm2 + movdqu 32(%esi),%xmm4 + pxor %xmm0,%xmm3 + movdqu 48(%esi),%xmm5 + pxor %xmm0,%xmm4 + movdqu 64(%esi),%xmm6 + pxor %xmm0,%xmm5 + movdqu 80(%esi),%xmm1 + pxor %xmm0,%xmm6 + leal 96(%esi),%esi + pxor (%esp),%xmm2 + movdqa %xmm7,80(%esp) + pxor %xmm1,%xmm7 + movups 16(%ebp),%xmm1 + pxor 16(%esp),%xmm3 + pxor 32(%esp),%xmm4 +.byte 102,15,56,222,209 + pxor 48(%esp),%xmm5 + pxor 64(%esp),%xmm6 +.byte 102,15,56,222,217 + pxor %xmm0,%xmm7 + movups 32(%ebp),%xmm0 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 + call .L_aesni_decrypt6_enter + movdqa 80(%esp),%xmm1 + pxor %xmm0,%xmm0 + xorps (%esp),%xmm2 + pcmpgtd %xmm1,%xmm0 + xorps 16(%esp),%xmm3 + movups %xmm2,(%edi) + xorps 32(%esp),%xmm4 + movups %xmm3,16(%edi) + xorps 48(%esp),%xmm5 + movups %xmm4,32(%edi) + xorps 64(%esp),%xmm6 + movups %xmm5,48(%edi) + xorps %xmm1,%xmm7 + movups %xmm6,64(%edi) + pshufd $19,%xmm0,%xmm2 + movups %xmm7,80(%edi) + leal 96(%edi),%edi + movdqa 96(%esp),%xmm3 + pxor %xmm0,%xmm0 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + subl $96,%eax + jnc .L061xts_dec_loop6 + movl 240(%ebp),%ecx + movl %ebp,%edx + movl %ecx,%ebx +.L060xts_dec_short: + addl $96,%eax + jz .L062xts_dec_done6x + movdqa %xmm1,%xmm5 + cmpl $32,%eax + jb .L063xts_dec_one + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + je .L064xts_dec_two + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,%xmm6 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + cmpl $64,%eax + jb .L065xts_dec_three + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,%xmm7 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + movdqa %xmm5,(%esp) + movdqa %xmm6,16(%esp) + je .L066xts_dec_four + movdqa %xmm7,32(%esp) + pshufd $19,%xmm0,%xmm7 + movdqa %xmm1,48(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm7 + pxor %xmm1,%xmm7 + movdqu (%esi),%xmm2 + movdqu 16(%esi),%xmm3 + movdqu 32(%esi),%xmm4 + pxor (%esp),%xmm2 + movdqu 48(%esi),%xmm5 + pxor 16(%esp),%xmm3 + movdqu 64(%esi),%xmm6 + pxor 32(%esp),%xmm4 + leal 80(%esi),%esi + pxor 48(%esp),%xmm5 + movdqa %xmm7,64(%esp) + pxor %xmm7,%xmm6 + call _aesni_decrypt6 + movaps 64(%esp),%xmm1 + xorps (%esp),%xmm2 + xorps 16(%esp),%xmm3 + xorps 32(%esp),%xmm4 + movups %xmm2,(%edi) + xorps 48(%esp),%xmm5 + movups %xmm3,16(%edi) + xorps %xmm1,%xmm6 + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + movups %xmm6,64(%edi) + leal 80(%edi),%edi + jmp .L067xts_dec_done +.align 16 +.L063xts_dec_one: + movups (%esi),%xmm2 + leal 16(%esi),%esi + xorps %xmm5,%xmm2 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L068dec1_loop_12: +.byte 102,15,56,222,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L068dec1_loop_12 +.byte 102,15,56,223,209 + xorps %xmm5,%xmm2 + movups %xmm2,(%edi) + leal 16(%edi),%edi + movdqa %xmm5,%xmm1 + jmp .L067xts_dec_done +.align 16 +.L064xts_dec_two: + movaps %xmm1,%xmm6 + movups (%esi),%xmm2 + movups 16(%esi),%xmm3 + leal 32(%esi),%esi + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + call _aesni_decrypt2 + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + leal 32(%edi),%edi + movdqa %xmm6,%xmm1 + jmp .L067xts_dec_done +.align 16 +.L065xts_dec_three: + movaps %xmm1,%xmm7 + movups (%esi),%xmm2 + movups 16(%esi),%xmm3 + movups 32(%esi),%xmm4 + leal 48(%esi),%esi + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + xorps %xmm7,%xmm4 + call _aesni_decrypt3 + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + xorps %xmm7,%xmm4 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + leal 48(%edi),%edi + movdqa %xmm7,%xmm1 + jmp .L067xts_dec_done +.align 16 +.L066xts_dec_four: + movaps %xmm1,%xmm6 + movups (%esi),%xmm2 + movups 16(%esi),%xmm3 + movups 32(%esi),%xmm4 + xorps (%esp),%xmm2 + movups 48(%esi),%xmm5 + leal 64(%esi),%esi + xorps 16(%esp),%xmm3 + xorps %xmm7,%xmm4 + xorps %xmm6,%xmm5 + call _aesni_decrypt4 + xorps (%esp),%xmm2 + xorps 16(%esp),%xmm3 + xorps %xmm7,%xmm4 + movups %xmm2,(%edi) + xorps %xmm6,%xmm5 + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + leal 64(%edi),%edi + movdqa %xmm6,%xmm1 + jmp .L067xts_dec_done +.align 16 +.L062xts_dec_done6x: + movl 112(%esp),%eax + andl $15,%eax + jz .L069xts_dec_ret + movl %eax,112(%esp) + jmp .L070xts_dec_only_one_more +.align 16 +.L067xts_dec_done: + movl 112(%esp),%eax + pxor %xmm0,%xmm0 + andl $15,%eax + jz .L069xts_dec_ret + pcmpgtd %xmm1,%xmm0 + movl %eax,112(%esp) + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa 96(%esp),%xmm3 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 +.L070xts_dec_only_one_more: + pshufd $19,%xmm0,%xmm5 + movdqa %xmm1,%xmm6 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm5 + pxor %xmm1,%xmm5 + movl %ebp,%edx + movl %ebx,%ecx + movups (%esi),%xmm2 + xorps %xmm5,%xmm2 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L071dec1_loop_13: +.byte 102,15,56,222,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L071dec1_loop_13 +.byte 102,15,56,223,209 + xorps %xmm5,%xmm2 + movups %xmm2,(%edi) +.L072xts_dec_steal: + movzbl 16(%esi),%ecx + movzbl (%edi),%edx + leal 1(%esi),%esi + movb %cl,(%edi) + movb %dl,16(%edi) + leal 1(%edi),%edi + subl $1,%eax + jnz .L072xts_dec_steal + subl 112(%esp),%edi + movl %ebp,%edx + movl %ebx,%ecx + movups (%edi),%xmm2 + xorps %xmm6,%xmm2 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L073dec1_loop_14: +.byte 102,15,56,222,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L073dec1_loop_14 +.byte 102,15,56,223,209 + xorps %xmm6,%xmm2 + movups %xmm2,(%edi) +.L069xts_dec_ret: + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + movdqa %xmm0,(%esp) + pxor %xmm3,%xmm3 + movdqa %xmm0,16(%esp) + pxor %xmm4,%xmm4 + movdqa %xmm0,32(%esp) + pxor %xmm5,%xmm5 + movdqa %xmm0,48(%esp) + pxor %xmm6,%xmm6 + movdqa %xmm0,64(%esp) + pxor %xmm7,%xmm7 + movdqa %xmm0,80(%esp) + movl 116(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size aesni_xts_decrypt,.-.L_aesni_xts_decrypt_begin +.globl aesni_cbc_encrypt +.hidden aesni_cbc_encrypt +.type aesni_cbc_encrypt,@function +.align 16 +aesni_cbc_encrypt: +.L_aesni_cbc_encrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl %esp,%ebx + movl 24(%esp),%edi + subl $24,%ebx + movl 28(%esp),%eax + andl $-16,%ebx + movl 32(%esp),%edx + movl 36(%esp),%ebp + testl %eax,%eax + jz .L074cbc_abort + cmpl $0,40(%esp) + xchgl %esp,%ebx + movups (%ebp),%xmm7 + movl 240(%edx),%ecx + movl %edx,%ebp + movl %ebx,16(%esp) + movl %ecx,%ebx + je .L075cbc_decrypt + movaps %xmm7,%xmm2 + cmpl $16,%eax + jb .L076cbc_enc_tail + subl $16,%eax + jmp .L077cbc_enc_loop +.align 16 +.L077cbc_enc_loop: + movups (%esi),%xmm7 + leal 16(%esi),%esi + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + xorps %xmm0,%xmm7 + leal 32(%edx),%edx + xorps %xmm7,%xmm2 +.L078enc1_loop_15: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L078enc1_loop_15 +.byte 102,15,56,221,209 + movl %ebx,%ecx + movl %ebp,%edx + movups %xmm2,(%edi) + leal 16(%edi),%edi + subl $16,%eax + jnc .L077cbc_enc_loop + addl $16,%eax + jnz .L076cbc_enc_tail + movaps %xmm2,%xmm7 + pxor %xmm2,%xmm2 + jmp .L079cbc_ret +.L076cbc_enc_tail: + movl %eax,%ecx +.long 2767451785 + movl $16,%ecx + subl %eax,%ecx + xorl %eax,%eax +.long 2868115081 + leal -16(%edi),%edi + movl %ebx,%ecx + movl %edi,%esi + movl %ebp,%edx + jmp .L077cbc_enc_loop +.align 16 +.L075cbc_decrypt: + cmpl $80,%eax + jbe .L080cbc_dec_tail + movaps %xmm7,(%esp) + subl $80,%eax + jmp .L081cbc_dec_loop6_enter +.align 16 +.L082cbc_dec_loop6: + movaps %xmm0,(%esp) + movups %xmm7,(%edi) + leal 16(%edi),%edi +.L081cbc_dec_loop6_enter: + movdqu (%esi),%xmm2 + movdqu 16(%esi),%xmm3 + movdqu 32(%esi),%xmm4 + movdqu 48(%esi),%xmm5 + movdqu 64(%esi),%xmm6 + movdqu 80(%esi),%xmm7 + call _aesni_decrypt6 + movups (%esi),%xmm1 + movups 16(%esi),%xmm0 + xorps (%esp),%xmm2 + xorps %xmm1,%xmm3 + movups 32(%esi),%xmm1 + xorps %xmm0,%xmm4 + movups 48(%esi),%xmm0 + xorps %xmm1,%xmm5 + movups 64(%esi),%xmm1 + xorps %xmm0,%xmm6 + movups 80(%esi),%xmm0 + xorps %xmm1,%xmm7 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + leal 96(%esi),%esi + movups %xmm4,32(%edi) + movl %ebx,%ecx + movups %xmm5,48(%edi) + movl %ebp,%edx + movups %xmm6,64(%edi) + leal 80(%edi),%edi + subl $96,%eax + ja .L082cbc_dec_loop6 + movaps %xmm7,%xmm2 + movaps %xmm0,%xmm7 + addl $80,%eax + jle .L083cbc_dec_clear_tail_collected + movups %xmm2,(%edi) + leal 16(%edi),%edi +.L080cbc_dec_tail: + movups (%esi),%xmm2 + movaps %xmm2,%xmm6 + cmpl $16,%eax + jbe .L084cbc_dec_one + movups 16(%esi),%xmm3 + movaps %xmm3,%xmm5 + cmpl $32,%eax + jbe .L085cbc_dec_two + movups 32(%esi),%xmm4 + cmpl $48,%eax + jbe .L086cbc_dec_three + movups 48(%esi),%xmm5 + cmpl $64,%eax + jbe .L087cbc_dec_four + movups 64(%esi),%xmm6 + movaps %xmm7,(%esp) + movups (%esi),%xmm2 + xorps %xmm7,%xmm7 + call _aesni_decrypt6 + movups (%esi),%xmm1 + movups 16(%esi),%xmm0 + xorps (%esp),%xmm2 + xorps %xmm1,%xmm3 + movups 32(%esi),%xmm1 + xorps %xmm0,%xmm4 + movups 48(%esi),%xmm0 + xorps %xmm1,%xmm5 + movups 64(%esi),%xmm7 + xorps %xmm0,%xmm6 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + pxor %xmm3,%xmm3 + movups %xmm4,32(%edi) + pxor %xmm4,%xmm4 + movups %xmm5,48(%edi) + pxor %xmm5,%xmm5 + leal 64(%edi),%edi + movaps %xmm6,%xmm2 + pxor %xmm6,%xmm6 + subl $80,%eax + jmp .L088cbc_dec_tail_collected +.align 16 +.L084cbc_dec_one: + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L089dec1_loop_16: +.byte 102,15,56,222,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L089dec1_loop_16 +.byte 102,15,56,223,209 + xorps %xmm7,%xmm2 + movaps %xmm6,%xmm7 + subl $16,%eax + jmp .L088cbc_dec_tail_collected +.align 16 +.L085cbc_dec_two: + call _aesni_decrypt2 + xorps %xmm7,%xmm2 + xorps %xmm6,%xmm3 + movups %xmm2,(%edi) + movaps %xmm3,%xmm2 + pxor %xmm3,%xmm3 + leal 16(%edi),%edi + movaps %xmm5,%xmm7 + subl $32,%eax + jmp .L088cbc_dec_tail_collected +.align 16 +.L086cbc_dec_three: + call _aesni_decrypt3 + xorps %xmm7,%xmm2 + xorps %xmm6,%xmm3 + xorps %xmm5,%xmm4 + movups %xmm2,(%edi) + movaps %xmm4,%xmm2 + pxor %xmm4,%xmm4 + movups %xmm3,16(%edi) + pxor %xmm3,%xmm3 + leal 32(%edi),%edi + movups 32(%esi),%xmm7 + subl $48,%eax + jmp .L088cbc_dec_tail_collected +.align 16 +.L087cbc_dec_four: + call _aesni_decrypt4 + movups 16(%esi),%xmm1 + movups 32(%esi),%xmm0 + xorps %xmm7,%xmm2 + movups 48(%esi),%xmm7 + xorps %xmm6,%xmm3 + movups %xmm2,(%edi) + xorps %xmm1,%xmm4 + movups %xmm3,16(%edi) + pxor %xmm3,%xmm3 + xorps %xmm0,%xmm5 + movups %xmm4,32(%edi) + pxor %xmm4,%xmm4 + leal 48(%edi),%edi + movaps %xmm5,%xmm2 + pxor %xmm5,%xmm5 + subl $64,%eax + jmp .L088cbc_dec_tail_collected +.align 16 +.L083cbc_dec_clear_tail_collected: + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 +.L088cbc_dec_tail_collected: + andl $15,%eax + jnz .L090cbc_dec_tail_partial + movups %xmm2,(%edi) + pxor %xmm0,%xmm0 + jmp .L079cbc_ret +.align 16 +.L090cbc_dec_tail_partial: + movaps %xmm2,(%esp) + pxor %xmm0,%xmm0 + movl $16,%ecx + movl %esp,%esi + subl %eax,%ecx +.long 2767451785 + movdqa %xmm2,(%esp) +.L079cbc_ret: + movl 16(%esp),%esp + movl 36(%esp),%ebp + pxor %xmm2,%xmm2 + pxor %xmm1,%xmm1 + movups %xmm7,(%ebp) + pxor %xmm7,%xmm7 +.L074cbc_abort: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size aesni_cbc_encrypt,.-.L_aesni_cbc_encrypt_begin +.hidden _aesni_set_encrypt_key +.type _aesni_set_encrypt_key,@function +.align 16 +_aesni_set_encrypt_key: + pushl %ebp + pushl %ebx + testl %eax,%eax + jz .L091bad_pointer + testl %edx,%edx + jz .L091bad_pointer + call .L092pic +.L092pic: + popl %ebx + leal .Lkey_const-.L092pic(%ebx),%ebx + leal OPENSSL_ia32cap_P-.Lkey_const(%ebx),%ebp + movups (%eax),%xmm0 + xorps %xmm4,%xmm4 + movl 4(%ebp),%ebp + leal 16(%edx),%edx + andl $268437504,%ebp + cmpl $256,%ecx + je .L09314rounds + cmpl $192,%ecx + je .L09412rounds + cmpl $128,%ecx + jne .L095bad_keybits +.align 16 +.L09610rounds: + cmpl $268435456,%ebp + je .L09710rounds_alt + movl $9,%ecx + movups %xmm0,-16(%edx) +.byte 102,15,58,223,200,1 + call .L098key_128_cold +.byte 102,15,58,223,200,2 + call .L099key_128 +.byte 102,15,58,223,200,4 + call .L099key_128 +.byte 102,15,58,223,200,8 + call .L099key_128 +.byte 102,15,58,223,200,16 + call .L099key_128 +.byte 102,15,58,223,200,32 + call .L099key_128 +.byte 102,15,58,223,200,64 + call .L099key_128 +.byte 102,15,58,223,200,128 + call .L099key_128 +.byte 102,15,58,223,200,27 + call .L099key_128 +.byte 102,15,58,223,200,54 + call .L099key_128 + movups %xmm0,(%edx) + movl %ecx,80(%edx) + jmp .L100good_key +.align 16 +.L099key_128: + movups %xmm0,(%edx) + leal 16(%edx),%edx +.L098key_128_cold: + shufps $16,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $140,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $255,%xmm1,%xmm1 + xorps %xmm1,%xmm0 + ret +.align 16 +.L09710rounds_alt: + movdqa (%ebx),%xmm5 + movl $8,%ecx + movdqa 32(%ebx),%xmm4 + movdqa %xmm0,%xmm2 + movdqu %xmm0,-16(%edx) +.L101loop_key128: +.byte 102,15,56,0,197 +.byte 102,15,56,221,196 + pslld $1,%xmm4 + leal 16(%edx),%edx + movdqa %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm3,%xmm2 + pxor %xmm2,%xmm0 + movdqu %xmm0,-16(%edx) + movdqa %xmm0,%xmm2 + decl %ecx + jnz .L101loop_key128 + movdqa 48(%ebx),%xmm4 +.byte 102,15,56,0,197 +.byte 102,15,56,221,196 + pslld $1,%xmm4 + movdqa %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm3,%xmm2 + pxor %xmm2,%xmm0 + movdqu %xmm0,(%edx) + movdqa %xmm0,%xmm2 +.byte 102,15,56,0,197 +.byte 102,15,56,221,196 + movdqa %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm3,%xmm2 + pxor %xmm2,%xmm0 + movdqu %xmm0,16(%edx) + movl $9,%ecx + movl %ecx,96(%edx) + jmp .L100good_key +.align 16 +.L09412rounds: + movq 16(%eax),%xmm2 + cmpl $268435456,%ebp + je .L10212rounds_alt + movl $11,%ecx + movups %xmm0,-16(%edx) +.byte 102,15,58,223,202,1 + call .L103key_192a_cold +.byte 102,15,58,223,202,2 + call .L104key_192b +.byte 102,15,58,223,202,4 + call .L105key_192a +.byte 102,15,58,223,202,8 + call .L104key_192b +.byte 102,15,58,223,202,16 + call .L105key_192a +.byte 102,15,58,223,202,32 + call .L104key_192b +.byte 102,15,58,223,202,64 + call .L105key_192a +.byte 102,15,58,223,202,128 + call .L104key_192b + movups %xmm0,(%edx) + movl %ecx,48(%edx) + jmp .L100good_key +.align 16 +.L105key_192a: + movups %xmm0,(%edx) + leal 16(%edx),%edx +.align 16 +.L103key_192a_cold: + movaps %xmm2,%xmm5 +.L106key_192b_warm: + shufps $16,%xmm0,%xmm4 + movdqa %xmm2,%xmm3 + xorps %xmm4,%xmm0 + shufps $140,%xmm0,%xmm4 + pslldq $4,%xmm3 + xorps %xmm4,%xmm0 + pshufd $85,%xmm1,%xmm1 + pxor %xmm3,%xmm2 + pxor %xmm1,%xmm0 + pshufd $255,%xmm0,%xmm3 + pxor %xmm3,%xmm2 + ret +.align 16 +.L104key_192b: + movaps %xmm0,%xmm3 + shufps $68,%xmm0,%xmm5 + movups %xmm5,(%edx) + shufps $78,%xmm2,%xmm3 + movups %xmm3,16(%edx) + leal 32(%edx),%edx + jmp .L106key_192b_warm +.align 16 +.L10212rounds_alt: + movdqa 16(%ebx),%xmm5 + movdqa 32(%ebx),%xmm4 + movl $8,%ecx + movdqu %xmm0,-16(%edx) +.L107loop_key192: + movq %xmm2,(%edx) + movdqa %xmm2,%xmm1 +.byte 102,15,56,0,213 +.byte 102,15,56,221,212 + pslld $1,%xmm4 + leal 24(%edx),%edx + movdqa %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm3,%xmm0 + pshufd $255,%xmm0,%xmm3 + pxor %xmm1,%xmm3 + pslldq $4,%xmm1 + pxor %xmm1,%xmm3 + pxor %xmm2,%xmm0 + pxor %xmm3,%xmm2 + movdqu %xmm0,-16(%edx) + decl %ecx + jnz .L107loop_key192 + movl $11,%ecx + movl %ecx,32(%edx) + jmp .L100good_key +.align 16 +.L09314rounds: + movups 16(%eax),%xmm2 + leal 16(%edx),%edx + cmpl $268435456,%ebp + je .L10814rounds_alt + movl $13,%ecx + movups %xmm0,-32(%edx) + movups %xmm2,-16(%edx) +.byte 102,15,58,223,202,1 + call .L109key_256a_cold +.byte 102,15,58,223,200,1 + call .L110key_256b +.byte 102,15,58,223,202,2 + call .L111key_256a +.byte 102,15,58,223,200,2 + call .L110key_256b +.byte 102,15,58,223,202,4 + call .L111key_256a +.byte 102,15,58,223,200,4 + call .L110key_256b +.byte 102,15,58,223,202,8 + call .L111key_256a +.byte 102,15,58,223,200,8 + call .L110key_256b +.byte 102,15,58,223,202,16 + call .L111key_256a +.byte 102,15,58,223,200,16 + call .L110key_256b +.byte 102,15,58,223,202,32 + call .L111key_256a +.byte 102,15,58,223,200,32 + call .L110key_256b +.byte 102,15,58,223,202,64 + call .L111key_256a + movups %xmm0,(%edx) + movl %ecx,16(%edx) + xorl %eax,%eax + jmp .L100good_key +.align 16 +.L111key_256a: + movups %xmm2,(%edx) + leal 16(%edx),%edx +.L109key_256a_cold: + shufps $16,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $140,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $255,%xmm1,%xmm1 + xorps %xmm1,%xmm0 + ret +.align 16 +.L110key_256b: + movups %xmm0,(%edx) + leal 16(%edx),%edx + shufps $16,%xmm2,%xmm4 + xorps %xmm4,%xmm2 + shufps $140,%xmm2,%xmm4 + xorps %xmm4,%xmm2 + shufps $170,%xmm1,%xmm1 + xorps %xmm1,%xmm2 + ret +.align 16 +.L10814rounds_alt: + movdqa (%ebx),%xmm5 + movdqa 32(%ebx),%xmm4 + movl $7,%ecx + movdqu %xmm0,-32(%edx) + movdqa %xmm2,%xmm1 + movdqu %xmm2,-16(%edx) +.L112loop_key256: +.byte 102,15,56,0,213 +.byte 102,15,56,221,212 + movdqa %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm3,%xmm0 + pslld $1,%xmm4 + pxor %xmm2,%xmm0 + movdqu %xmm0,(%edx) + decl %ecx + jz .L113done_key256 + pshufd $255,%xmm0,%xmm2 + pxor %xmm3,%xmm3 +.byte 102,15,56,221,211 + movdqa %xmm1,%xmm3 + pslldq $4,%xmm1 + pxor %xmm1,%xmm3 + pslldq $4,%xmm1 + pxor %xmm1,%xmm3 + pslldq $4,%xmm1 + pxor %xmm3,%xmm1 + pxor %xmm1,%xmm2 + movdqu %xmm2,16(%edx) + leal 32(%edx),%edx + movdqa %xmm2,%xmm1 + jmp .L112loop_key256 +.L113done_key256: + movl $13,%ecx + movl %ecx,16(%edx) +.L100good_key: + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + xorl %eax,%eax + popl %ebx + popl %ebp + ret +.align 4 +.L091bad_pointer: + movl $-1,%eax + popl %ebx + popl %ebp + ret +.align 4 +.L095bad_keybits: + pxor %xmm0,%xmm0 + movl $-2,%eax + popl %ebx + popl %ebp + ret +.size _aesni_set_encrypt_key,.-_aesni_set_encrypt_key +.globl aesni_set_encrypt_key +.hidden aesni_set_encrypt_key +.type aesni_set_encrypt_key,@function +.align 16 +aesni_set_encrypt_key: +.L_aesni_set_encrypt_key_begin: + movl 4(%esp),%eax + movl 8(%esp),%ecx + movl 12(%esp),%edx + call _aesni_set_encrypt_key + ret +.size aesni_set_encrypt_key,.-.L_aesni_set_encrypt_key_begin +.globl aesni_set_decrypt_key +.hidden aesni_set_decrypt_key +.type aesni_set_decrypt_key,@function +.align 16 +aesni_set_decrypt_key: +.L_aesni_set_decrypt_key_begin: + movl 4(%esp),%eax + movl 8(%esp),%ecx + movl 12(%esp),%edx + call _aesni_set_encrypt_key + movl 12(%esp),%edx + shll $4,%ecx + testl %eax,%eax + jnz .L114dec_key_ret + leal 16(%edx,%ecx,1),%eax + movups (%edx),%xmm0 + movups (%eax),%xmm1 + movups %xmm0,(%eax) + movups %xmm1,(%edx) + leal 16(%edx),%edx + leal -16(%eax),%eax +.L115dec_key_inverse: + movups (%edx),%xmm0 + movups (%eax),%xmm1 +.byte 102,15,56,219,192 +.byte 102,15,56,219,201 + leal 16(%edx),%edx + leal -16(%eax),%eax + movups %xmm0,16(%eax) + movups %xmm1,-16(%edx) + cmpl %edx,%eax + ja .L115dec_key_inverse + movups (%edx),%xmm0 +.byte 102,15,56,219,192 + movups %xmm0,(%edx) + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + xorl %eax,%eax +.L114dec_key_ret: + ret +.size aesni_set_decrypt_key,.-.L_aesni_set_decrypt_key_begin +.align 64 +.Lkey_const: +.long 202313229,202313229,202313229,202313229 +.long 67569157,67569157,67569157,67569157 +.long 1,1,1,1 +.long 27,27,27,27 +.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69 +.byte 83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83 +.byte 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115 +.byte 115,108,46,111,114,103,62,0 +#endif diff --git a/third_party/boringssl/linux-x86/crypto/aes/vpaes-x86.S b/third_party/boringssl/linux-x86/crypto/aes/vpaes-x86.S new file mode 100644 index 00000000000..9aede394844 --- /dev/null +++ b/third_party/boringssl/linux-x86/crypto/aes/vpaes-x86.S @@ -0,0 +1,676 @@ +#if defined(__i386__) +.file "vpaes-x86.S" +.text +.align 64 +.L_vpaes_consts: +.long 218628480,235210255,168496130,67568393 +.long 252381056,17041926,33884169,51187212 +.long 252645135,252645135,252645135,252645135 +.long 1512730624,3266504856,1377990664,3401244816 +.long 830229760,1275146365,2969422977,3447763452 +.long 3411033600,2979783055,338359620,2782886510 +.long 4209124096,907596821,221174255,1006095553 +.long 191964160,3799684038,3164090317,1589111125 +.long 182528256,1777043520,2877432650,3265356744 +.long 1874708224,3503451415,3305285752,363511674 +.long 1606117888,3487855781,1093350906,2384367825 +.long 197121,67569157,134941193,202313229 +.long 67569157,134941193,202313229,197121 +.long 134941193,202313229,197121,67569157 +.long 202313229,197121,67569157,134941193 +.long 33619971,100992007,168364043,235736079 +.long 235736079,33619971,100992007,168364043 +.long 168364043,235736079,33619971,100992007 +.long 100992007,168364043,235736079,33619971 +.long 50462976,117835012,185207048,252579084 +.long 252314880,51251460,117574920,184942860 +.long 184682752,252054788,50987272,118359308 +.long 118099200,185467140,251790600,50727180 +.long 2946363062,528716217,1300004225,1881839624 +.long 1532713819,1532713819,1532713819,1532713819 +.long 3602276352,4288629033,3737020424,4153884961 +.long 1354558464,32357713,2958822624,3775749553 +.long 1201988352,132424512,1572796698,503232858 +.long 2213177600,1597421020,4103937655,675398315 +.long 2749646592,4273543773,1511898873,121693092 +.long 3040248576,1103263732,2871565598,1608280554 +.long 2236667136,2588920351,482954393,64377734 +.long 3069987328,291237287,2117370568,3650299247 +.long 533321216,3573750986,2572112006,1401264716 +.long 1339849704,2721158661,548607111,3445553514 +.long 2128193280,3054596040,2183486460,1257083700 +.long 655635200,1165381986,3923443150,2344132524 +.long 190078720,256924420,290342170,357187870 +.long 1610966272,2263057382,4103205268,309794674 +.long 2592527872,2233205587,1335446729,3402964816 +.long 3973531904,3225098121,3002836325,1918774430 +.long 3870401024,2102906079,2284471353,4117666579 +.long 617007872,1021508343,366931923,691083277 +.long 2528395776,3491914898,2968704004,1613121270 +.long 3445188352,3247741094,844474987,4093578302 +.long 651481088,1190302358,1689581232,574775300 +.long 4289380608,206939853,2555985458,2489840491 +.long 2130264064,327674451,3566485037,3349835193 +.long 2470714624,316102159,3636825756,3393945945 +.byte 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105 +.byte 111,110,32,65,69,83,32,102,111,114,32,120,56,54,47,83 +.byte 83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117 +.byte 114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105 +.byte 118,101,114,115,105,116,121,41,0 +.align 64 +.hidden _vpaes_preheat +.type _vpaes_preheat,@function +.align 16 +_vpaes_preheat: + addl (%esp),%ebp + movdqa -48(%ebp),%xmm7 + movdqa -16(%ebp),%xmm6 + ret +.size _vpaes_preheat,.-_vpaes_preheat +.hidden _vpaes_encrypt_core +.type _vpaes_encrypt_core,@function +.align 16 +_vpaes_encrypt_core: + movl $16,%ecx + movl 240(%edx),%eax + movdqa %xmm6,%xmm1 + movdqa (%ebp),%xmm2 + pandn %xmm0,%xmm1 + pand %xmm6,%xmm0 + movdqu (%edx),%xmm5 +.byte 102,15,56,0,208 + movdqa 16(%ebp),%xmm0 + pxor %xmm5,%xmm2 + psrld $4,%xmm1 + addl $16,%edx +.byte 102,15,56,0,193 + leal 192(%ebp),%ebx + pxor %xmm2,%xmm0 + jmp .L000enc_entry +.align 16 +.L001enc_loop: + movdqa 32(%ebp),%xmm4 + movdqa 48(%ebp),%xmm0 +.byte 102,15,56,0,226 +.byte 102,15,56,0,195 + pxor %xmm5,%xmm4 + movdqa 64(%ebp),%xmm5 + pxor %xmm4,%xmm0 + movdqa -64(%ebx,%ecx,1),%xmm1 +.byte 102,15,56,0,234 + movdqa 80(%ebp),%xmm2 + movdqa (%ebx,%ecx,1),%xmm4 +.byte 102,15,56,0,211 + movdqa %xmm0,%xmm3 + pxor %xmm5,%xmm2 +.byte 102,15,56,0,193 + addl $16,%edx + pxor %xmm2,%xmm0 +.byte 102,15,56,0,220 + addl $16,%ecx + pxor %xmm0,%xmm3 +.byte 102,15,56,0,193 + andl $48,%ecx + subl $1,%eax + pxor %xmm3,%xmm0 +.L000enc_entry: + movdqa %xmm6,%xmm1 + movdqa -32(%ebp),%xmm5 + pandn %xmm0,%xmm1 + psrld $4,%xmm1 + pand %xmm6,%xmm0 +.byte 102,15,56,0,232 + movdqa %xmm7,%xmm3 + pxor %xmm1,%xmm0 +.byte 102,15,56,0,217 + movdqa %xmm7,%xmm4 + pxor %xmm5,%xmm3 +.byte 102,15,56,0,224 + movdqa %xmm7,%xmm2 + pxor %xmm5,%xmm4 +.byte 102,15,56,0,211 + movdqa %xmm7,%xmm3 + pxor %xmm0,%xmm2 +.byte 102,15,56,0,220 + movdqu (%edx),%xmm5 + pxor %xmm1,%xmm3 + jnz .L001enc_loop + movdqa 96(%ebp),%xmm4 + movdqa 112(%ebp),%xmm0 +.byte 102,15,56,0,226 + pxor %xmm5,%xmm4 +.byte 102,15,56,0,195 + movdqa 64(%ebx,%ecx,1),%xmm1 + pxor %xmm4,%xmm0 +.byte 102,15,56,0,193 + ret +.size _vpaes_encrypt_core,.-_vpaes_encrypt_core +.hidden _vpaes_decrypt_core +.type _vpaes_decrypt_core,@function +.align 16 +_vpaes_decrypt_core: + leal 608(%ebp),%ebx + movl 240(%edx),%eax + movdqa %xmm6,%xmm1 + movdqa -64(%ebx),%xmm2 + pandn %xmm0,%xmm1 + movl %eax,%ecx + psrld $4,%xmm1 + movdqu (%edx),%xmm5 + shll $4,%ecx + pand %xmm6,%xmm0 +.byte 102,15,56,0,208 + movdqa -48(%ebx),%xmm0 + xorl $48,%ecx +.byte 102,15,56,0,193 + andl $48,%ecx + pxor %xmm5,%xmm2 + movdqa 176(%ebp),%xmm5 + pxor %xmm2,%xmm0 + addl $16,%edx + leal -352(%ebx,%ecx,1),%ecx + jmp .L002dec_entry +.align 16 +.L003dec_loop: + movdqa -32(%ebx),%xmm4 + movdqa -16(%ebx),%xmm1 +.byte 102,15,56,0,226 +.byte 102,15,56,0,203 + pxor %xmm4,%xmm0 + movdqa (%ebx),%xmm4 + pxor %xmm1,%xmm0 + movdqa 16(%ebx),%xmm1 +.byte 102,15,56,0,226 +.byte 102,15,56,0,197 +.byte 102,15,56,0,203 + pxor %xmm4,%xmm0 + movdqa 32(%ebx),%xmm4 + pxor %xmm1,%xmm0 + movdqa 48(%ebx),%xmm1 +.byte 102,15,56,0,226 +.byte 102,15,56,0,197 +.byte 102,15,56,0,203 + pxor %xmm4,%xmm0 + movdqa 64(%ebx),%xmm4 + pxor %xmm1,%xmm0 + movdqa 80(%ebx),%xmm1 +.byte 102,15,56,0,226 +.byte 102,15,56,0,197 +.byte 102,15,56,0,203 + pxor %xmm4,%xmm0 + addl $16,%edx +.byte 102,15,58,15,237,12 + pxor %xmm1,%xmm0 + subl $1,%eax +.L002dec_entry: + movdqa %xmm6,%xmm1 + movdqa -32(%ebp),%xmm2 + pandn %xmm0,%xmm1 + pand %xmm6,%xmm0 + psrld $4,%xmm1 +.byte 102,15,56,0,208 + movdqa %xmm7,%xmm3 + pxor %xmm1,%xmm0 +.byte 102,15,56,0,217 + movdqa %xmm7,%xmm4 + pxor %xmm2,%xmm3 +.byte 102,15,56,0,224 + pxor %xmm2,%xmm4 + movdqa %xmm7,%xmm2 +.byte 102,15,56,0,211 + movdqa %xmm7,%xmm3 + pxor %xmm0,%xmm2 +.byte 102,15,56,0,220 + movdqu (%edx),%xmm0 + pxor %xmm1,%xmm3 + jnz .L003dec_loop + movdqa 96(%ebx),%xmm4 +.byte 102,15,56,0,226 + pxor %xmm0,%xmm4 + movdqa 112(%ebx),%xmm0 + movdqa (%ecx),%xmm2 +.byte 102,15,56,0,195 + pxor %xmm4,%xmm0 +.byte 102,15,56,0,194 + ret +.size _vpaes_decrypt_core,.-_vpaes_decrypt_core +.hidden _vpaes_schedule_core +.type _vpaes_schedule_core,@function +.align 16 +_vpaes_schedule_core: + addl (%esp),%ebp + movdqu (%esi),%xmm0 + movdqa 320(%ebp),%xmm2 + movdqa %xmm0,%xmm3 + leal (%ebp),%ebx + movdqa %xmm2,4(%esp) + call _vpaes_schedule_transform + movdqa %xmm0,%xmm7 + testl %edi,%edi + jnz .L004schedule_am_decrypting + movdqu %xmm0,(%edx) + jmp .L005schedule_go +.L004schedule_am_decrypting: + movdqa 256(%ebp,%ecx,1),%xmm1 +.byte 102,15,56,0,217 + movdqu %xmm3,(%edx) + xorl $48,%ecx +.L005schedule_go: + cmpl $192,%eax + ja .L006schedule_256 + je .L007schedule_192 +.L008schedule_128: + movl $10,%eax +.L009loop_schedule_128: + call _vpaes_schedule_round + decl %eax + jz .L010schedule_mangle_last + call _vpaes_schedule_mangle + jmp .L009loop_schedule_128 +.align 16 +.L007schedule_192: + movdqu 8(%esi),%xmm0 + call _vpaes_schedule_transform + movdqa %xmm0,%xmm6 + pxor %xmm4,%xmm4 + movhlps %xmm4,%xmm6 + movl $4,%eax +.L011loop_schedule_192: + call _vpaes_schedule_round +.byte 102,15,58,15,198,8 + call _vpaes_schedule_mangle + call _vpaes_schedule_192_smear + call _vpaes_schedule_mangle + call _vpaes_schedule_round + decl %eax + jz .L010schedule_mangle_last + call _vpaes_schedule_mangle + call _vpaes_schedule_192_smear + jmp .L011loop_schedule_192 +.align 16 +.L006schedule_256: + movdqu 16(%esi),%xmm0 + call _vpaes_schedule_transform + movl $7,%eax +.L012loop_schedule_256: + call _vpaes_schedule_mangle + movdqa %xmm0,%xmm6 + call _vpaes_schedule_round + decl %eax + jz .L010schedule_mangle_last + call _vpaes_schedule_mangle + pshufd $255,%xmm0,%xmm0 + movdqa %xmm7,20(%esp) + movdqa %xmm6,%xmm7 + call .L_vpaes_schedule_low_round + movdqa 20(%esp),%xmm7 + jmp .L012loop_schedule_256 +.align 16 +.L010schedule_mangle_last: + leal 384(%ebp),%ebx + testl %edi,%edi + jnz .L013schedule_mangle_last_dec + movdqa 256(%ebp,%ecx,1),%xmm1 +.byte 102,15,56,0,193 + leal 352(%ebp),%ebx + addl $32,%edx +.L013schedule_mangle_last_dec: + addl $-16,%edx + pxor 336(%ebp),%xmm0 + call _vpaes_schedule_transform + movdqu %xmm0,(%edx) + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + ret +.size _vpaes_schedule_core,.-_vpaes_schedule_core +.hidden _vpaes_schedule_192_smear +.type _vpaes_schedule_192_smear,@function +.align 16 +_vpaes_schedule_192_smear: + pshufd $128,%xmm6,%xmm1 + pshufd $254,%xmm7,%xmm0 + pxor %xmm1,%xmm6 + pxor %xmm1,%xmm1 + pxor %xmm0,%xmm6 + movdqa %xmm6,%xmm0 + movhlps %xmm1,%xmm6 + ret +.size _vpaes_schedule_192_smear,.-_vpaes_schedule_192_smear +.hidden _vpaes_schedule_round +.type _vpaes_schedule_round,@function +.align 16 +_vpaes_schedule_round: + movdqa 8(%esp),%xmm2 + pxor %xmm1,%xmm1 +.byte 102,15,58,15,202,15 +.byte 102,15,58,15,210,15 + pxor %xmm1,%xmm7 + pshufd $255,%xmm0,%xmm0 +.byte 102,15,58,15,192,1 + movdqa %xmm2,8(%esp) +.L_vpaes_schedule_low_round: + movdqa %xmm7,%xmm1 + pslldq $4,%xmm7 + pxor %xmm1,%xmm7 + movdqa %xmm7,%xmm1 + pslldq $8,%xmm7 + pxor %xmm1,%xmm7 + pxor 336(%ebp),%xmm7 + movdqa -16(%ebp),%xmm4 + movdqa -48(%ebp),%xmm5 + movdqa %xmm4,%xmm1 + pandn %xmm0,%xmm1 + psrld $4,%xmm1 + pand %xmm4,%xmm0 + movdqa -32(%ebp),%xmm2 +.byte 102,15,56,0,208 + pxor %xmm1,%xmm0 + movdqa %xmm5,%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 + movdqa %xmm5,%xmm4 +.byte 102,15,56,0,224 + pxor %xmm2,%xmm4 + movdqa %xmm5,%xmm2 +.byte 102,15,56,0,211 + pxor %xmm0,%xmm2 + movdqa %xmm5,%xmm3 +.byte 102,15,56,0,220 + pxor %xmm1,%xmm3 + movdqa 32(%ebp),%xmm4 +.byte 102,15,56,0,226 + movdqa 48(%ebp),%xmm0 +.byte 102,15,56,0,195 + pxor %xmm4,%xmm0 + pxor %xmm7,%xmm0 + movdqa %xmm0,%xmm7 + ret +.size _vpaes_schedule_round,.-_vpaes_schedule_round +.hidden _vpaes_schedule_transform +.type _vpaes_schedule_transform,@function +.align 16 +_vpaes_schedule_transform: + movdqa -16(%ebp),%xmm2 + movdqa %xmm2,%xmm1 + pandn %xmm0,%xmm1 + psrld $4,%xmm1 + pand %xmm2,%xmm0 + movdqa (%ebx),%xmm2 +.byte 102,15,56,0,208 + movdqa 16(%ebx),%xmm0 +.byte 102,15,56,0,193 + pxor %xmm2,%xmm0 + ret +.size _vpaes_schedule_transform,.-_vpaes_schedule_transform +.hidden _vpaes_schedule_mangle +.type _vpaes_schedule_mangle,@function +.align 16 +_vpaes_schedule_mangle: + movdqa %xmm0,%xmm4 + movdqa 128(%ebp),%xmm5 + testl %edi,%edi + jnz .L014schedule_mangle_dec + addl $16,%edx + pxor 336(%ebp),%xmm4 +.byte 102,15,56,0,229 + movdqa %xmm4,%xmm3 +.byte 102,15,56,0,229 + pxor %xmm4,%xmm3 +.byte 102,15,56,0,229 + pxor %xmm4,%xmm3 + jmp .L015schedule_mangle_both +.align 16 +.L014schedule_mangle_dec: + movdqa -16(%ebp),%xmm2 + leal 416(%ebp),%esi + movdqa %xmm2,%xmm1 + pandn %xmm4,%xmm1 + psrld $4,%xmm1 + pand %xmm2,%xmm4 + movdqa (%esi),%xmm2 +.byte 102,15,56,0,212 + movdqa 16(%esi),%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 +.byte 102,15,56,0,221 + movdqa 32(%esi),%xmm2 +.byte 102,15,56,0,212 + pxor %xmm3,%xmm2 + movdqa 48(%esi),%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 +.byte 102,15,56,0,221 + movdqa 64(%esi),%xmm2 +.byte 102,15,56,0,212 + pxor %xmm3,%xmm2 + movdqa 80(%esi),%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 +.byte 102,15,56,0,221 + movdqa 96(%esi),%xmm2 +.byte 102,15,56,0,212 + pxor %xmm3,%xmm2 + movdqa 112(%esi),%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 + addl $-16,%edx +.L015schedule_mangle_both: + movdqa 256(%ebp,%ecx,1),%xmm1 +.byte 102,15,56,0,217 + addl $-16,%ecx + andl $48,%ecx + movdqu %xmm3,(%edx) + ret +.size _vpaes_schedule_mangle,.-_vpaes_schedule_mangle +.globl vpaes_set_encrypt_key +.hidden vpaes_set_encrypt_key +.type vpaes_set_encrypt_key,@function +.align 16 +vpaes_set_encrypt_key: +.L_vpaes_set_encrypt_key_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + leal -56(%esp),%ebx + movl 24(%esp),%eax + andl $-16,%ebx + movl 28(%esp),%edx + xchgl %esp,%ebx + movl %ebx,48(%esp) + movl %eax,%ebx + shrl $5,%ebx + addl $5,%ebx + movl %ebx,240(%edx) + movl $48,%ecx + movl $0,%edi + leal .L_vpaes_consts+0x30-.L016pic_point,%ebp + call _vpaes_schedule_core +.L016pic_point: + movl 48(%esp),%esp + xorl %eax,%eax + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size vpaes_set_encrypt_key,.-.L_vpaes_set_encrypt_key_begin +.globl vpaes_set_decrypt_key +.hidden vpaes_set_decrypt_key +.type vpaes_set_decrypt_key,@function +.align 16 +vpaes_set_decrypt_key: +.L_vpaes_set_decrypt_key_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + leal -56(%esp),%ebx + movl 24(%esp),%eax + andl $-16,%ebx + movl 28(%esp),%edx + xchgl %esp,%ebx + movl %ebx,48(%esp) + movl %eax,%ebx + shrl $5,%ebx + addl $5,%ebx + movl %ebx,240(%edx) + shll $4,%ebx + leal 16(%edx,%ebx,1),%edx + movl $1,%edi + movl %eax,%ecx + shrl $1,%ecx + andl $32,%ecx + xorl $32,%ecx + leal .L_vpaes_consts+0x30-.L017pic_point,%ebp + call _vpaes_schedule_core +.L017pic_point: + movl 48(%esp),%esp + xorl %eax,%eax + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size vpaes_set_decrypt_key,.-.L_vpaes_set_decrypt_key_begin +.globl vpaes_encrypt +.hidden vpaes_encrypt +.type vpaes_encrypt,@function +.align 16 +vpaes_encrypt: +.L_vpaes_encrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + leal .L_vpaes_consts+0x30-.L018pic_point,%ebp + call _vpaes_preheat +.L018pic_point: + movl 20(%esp),%esi + leal -56(%esp),%ebx + movl 24(%esp),%edi + andl $-16,%ebx + movl 28(%esp),%edx + xchgl %esp,%ebx + movl %ebx,48(%esp) + movdqu (%esi),%xmm0 + call _vpaes_encrypt_core + movdqu %xmm0,(%edi) + movl 48(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size vpaes_encrypt,.-.L_vpaes_encrypt_begin +.globl vpaes_decrypt +.hidden vpaes_decrypt +.type vpaes_decrypt,@function +.align 16 +vpaes_decrypt: +.L_vpaes_decrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + leal .L_vpaes_consts+0x30-.L019pic_point,%ebp + call _vpaes_preheat +.L019pic_point: + movl 20(%esp),%esi + leal -56(%esp),%ebx + movl 24(%esp),%edi + andl $-16,%ebx + movl 28(%esp),%edx + xchgl %esp,%ebx + movl %ebx,48(%esp) + movdqu (%esi),%xmm0 + call _vpaes_decrypt_core + movdqu %xmm0,(%edi) + movl 48(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size vpaes_decrypt,.-.L_vpaes_decrypt_begin +.globl vpaes_cbc_encrypt +.hidden vpaes_cbc_encrypt +.type vpaes_cbc_encrypt,@function +.align 16 +vpaes_cbc_encrypt: +.L_vpaes_cbc_encrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl 32(%esp),%edx + subl $16,%eax + jc .L020cbc_abort + leal -56(%esp),%ebx + movl 36(%esp),%ebp + andl $-16,%ebx + movl 40(%esp),%ecx + xchgl %esp,%ebx + movdqu (%ebp),%xmm1 + subl %esi,%edi + movl %ebx,48(%esp) + movl %edi,(%esp) + movl %edx,4(%esp) + movl %ebp,8(%esp) + movl %eax,%edi + leal .L_vpaes_consts+0x30-.L021pic_point,%ebp + call _vpaes_preheat +.L021pic_point: + cmpl $0,%ecx + je .L022cbc_dec_loop + jmp .L023cbc_enc_loop +.align 16 +.L023cbc_enc_loop: + movdqu (%esi),%xmm0 + pxor %xmm1,%xmm0 + call _vpaes_encrypt_core + movl (%esp),%ebx + movl 4(%esp),%edx + movdqa %xmm0,%xmm1 + movdqu %xmm0,(%ebx,%esi,1) + leal 16(%esi),%esi + subl $16,%edi + jnc .L023cbc_enc_loop + jmp .L024cbc_done +.align 16 +.L022cbc_dec_loop: + movdqu (%esi),%xmm0 + movdqa %xmm1,16(%esp) + movdqa %xmm0,32(%esp) + call _vpaes_decrypt_core + movl (%esp),%ebx + movl 4(%esp),%edx + pxor 16(%esp),%xmm0 + movdqa 32(%esp),%xmm1 + movdqu %xmm0,(%ebx,%esi,1) + leal 16(%esi),%esi + subl $16,%edi + jnc .L022cbc_dec_loop +.L024cbc_done: + movl 8(%esp),%ebx + movl 48(%esp),%esp + movdqu %xmm1,(%ebx) +.L020cbc_abort: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size vpaes_cbc_encrypt,.-.L_vpaes_cbc_encrypt_begin +#endif diff --git a/third_party/boringssl/linux-x86/crypto/bn/bn-586.S b/third_party/boringssl/linux-x86/crypto/bn/bn-586.S new file mode 100644 index 00000000000..773beff9c11 --- /dev/null +++ b/third_party/boringssl/linux-x86/crypto/bn/bn-586.S @@ -0,0 +1,1538 @@ +#if defined(__i386__) +.file "src/crypto/bn/asm/bn-586.S" +.text +.globl bn_mul_add_words +.hidden bn_mul_add_words +.type bn_mul_add_words,@function +.align 16 +bn_mul_add_words: +.L_bn_mul_add_words_begin: + call .L000PIC_me_up +.L000PIC_me_up: + popl %eax + leal OPENSSL_ia32cap_P-.L000PIC_me_up(%eax),%eax + btl $26,(%eax) + jnc .L001maw_non_sse2 + movl 4(%esp),%eax + movl 8(%esp),%edx + movl 12(%esp),%ecx + movd 16(%esp),%mm0 + pxor %mm1,%mm1 + jmp .L002maw_sse2_entry +.align 16 +.L003maw_sse2_unrolled: + movd (%eax),%mm3 + paddq %mm3,%mm1 + movd (%edx),%mm2 + pmuludq %mm0,%mm2 + movd 4(%edx),%mm4 + pmuludq %mm0,%mm4 + movd 8(%edx),%mm6 + pmuludq %mm0,%mm6 + movd 12(%edx),%mm7 + pmuludq %mm0,%mm7 + paddq %mm2,%mm1 + movd 4(%eax),%mm3 + paddq %mm4,%mm3 + movd 8(%eax),%mm5 + paddq %mm6,%mm5 + movd 12(%eax),%mm4 + paddq %mm4,%mm7 + movd %mm1,(%eax) + movd 16(%edx),%mm2 + pmuludq %mm0,%mm2 + psrlq $32,%mm1 + movd 20(%edx),%mm4 + pmuludq %mm0,%mm4 + paddq %mm3,%mm1 + movd 24(%edx),%mm6 + pmuludq %mm0,%mm6 + movd %mm1,4(%eax) + psrlq $32,%mm1 + movd 28(%edx),%mm3 + addl $32,%edx + pmuludq %mm0,%mm3 + paddq %mm5,%mm1 + movd 16(%eax),%mm5 + paddq %mm5,%mm2 + movd %mm1,8(%eax) + psrlq $32,%mm1 + paddq %mm7,%mm1 + movd 20(%eax),%mm5 + paddq %mm5,%mm4 + movd %mm1,12(%eax) + psrlq $32,%mm1 + paddq %mm2,%mm1 + movd 24(%eax),%mm5 + paddq %mm5,%mm6 + movd %mm1,16(%eax) + psrlq $32,%mm1 + paddq %mm4,%mm1 + movd 28(%eax),%mm5 + paddq %mm5,%mm3 + movd %mm1,20(%eax) + psrlq $32,%mm1 + paddq %mm6,%mm1 + movd %mm1,24(%eax) + psrlq $32,%mm1 + paddq %mm3,%mm1 + movd %mm1,28(%eax) + leal 32(%eax),%eax + psrlq $32,%mm1 + subl $8,%ecx + jz .L004maw_sse2_exit +.L002maw_sse2_entry: + testl $4294967288,%ecx + jnz .L003maw_sse2_unrolled +.align 4 +.L005maw_sse2_loop: + movd (%edx),%mm2 + movd (%eax),%mm3 + pmuludq %mm0,%mm2 + leal 4(%edx),%edx + paddq %mm3,%mm1 + paddq %mm2,%mm1 + movd %mm1,(%eax) + subl $1,%ecx + psrlq $32,%mm1 + leal 4(%eax),%eax + jnz .L005maw_sse2_loop +.L004maw_sse2_exit: + movd %mm1,%eax + emms + ret +.align 16 +.L001maw_non_sse2: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + + xorl %esi,%esi + movl 20(%esp),%edi + movl 28(%esp),%ecx + movl 24(%esp),%ebx + andl $4294967288,%ecx + movl 32(%esp),%ebp + pushl %ecx + jz .L006maw_finish +.align 16 +.L007maw_loop: + + movl (%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl (%edi),%eax + adcl $0,%edx + movl %eax,(%edi) + movl %edx,%esi + + movl 4(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 4(%edi),%eax + adcl $0,%edx + movl %eax,4(%edi) + movl %edx,%esi + + movl 8(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 8(%edi),%eax + adcl $0,%edx + movl %eax,8(%edi) + movl %edx,%esi + + movl 12(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 12(%edi),%eax + adcl $0,%edx + movl %eax,12(%edi) + movl %edx,%esi + + movl 16(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 16(%edi),%eax + adcl $0,%edx + movl %eax,16(%edi) + movl %edx,%esi + + movl 20(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 20(%edi),%eax + adcl $0,%edx + movl %eax,20(%edi) + movl %edx,%esi + + movl 24(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 24(%edi),%eax + adcl $0,%edx + movl %eax,24(%edi) + movl %edx,%esi + + movl 28(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 28(%edi),%eax + adcl $0,%edx + movl %eax,28(%edi) + movl %edx,%esi + + subl $8,%ecx + leal 32(%ebx),%ebx + leal 32(%edi),%edi + jnz .L007maw_loop +.L006maw_finish: + movl 32(%esp),%ecx + andl $7,%ecx + jnz .L008maw_finish2 + jmp .L009maw_end +.L008maw_finish2: + + movl (%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl (%edi),%eax + adcl $0,%edx + decl %ecx + movl %eax,(%edi) + movl %edx,%esi + jz .L009maw_end + + movl 4(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 4(%edi),%eax + adcl $0,%edx + decl %ecx + movl %eax,4(%edi) + movl %edx,%esi + jz .L009maw_end + + movl 8(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 8(%edi),%eax + adcl $0,%edx + decl %ecx + movl %eax,8(%edi) + movl %edx,%esi + jz .L009maw_end + + movl 12(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 12(%edi),%eax + adcl $0,%edx + decl %ecx + movl %eax,12(%edi) + movl %edx,%esi + jz .L009maw_end + + movl 16(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 16(%edi),%eax + adcl $0,%edx + decl %ecx + movl %eax,16(%edi) + movl %edx,%esi + jz .L009maw_end + + movl 20(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 20(%edi),%eax + adcl $0,%edx + decl %ecx + movl %eax,20(%edi) + movl %edx,%esi + jz .L009maw_end + + movl 24(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 24(%edi),%eax + adcl $0,%edx + movl %eax,24(%edi) + movl %edx,%esi +.L009maw_end: + movl %esi,%eax + popl %ecx + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size bn_mul_add_words,.-.L_bn_mul_add_words_begin +.globl bn_mul_words +.hidden bn_mul_words +.type bn_mul_words,@function +.align 16 +bn_mul_words: +.L_bn_mul_words_begin: + call .L010PIC_me_up +.L010PIC_me_up: + popl %eax + leal OPENSSL_ia32cap_P-.L010PIC_me_up(%eax),%eax + btl $26,(%eax) + jnc .L011mw_non_sse2 + movl 4(%esp),%eax + movl 8(%esp),%edx + movl 12(%esp),%ecx + movd 16(%esp),%mm0 + pxor %mm1,%mm1 +.align 16 +.L012mw_sse2_loop: + movd (%edx),%mm2 + pmuludq %mm0,%mm2 + leal 4(%edx),%edx + paddq %mm2,%mm1 + movd %mm1,(%eax) + subl $1,%ecx + psrlq $32,%mm1 + leal 4(%eax),%eax + jnz .L012mw_sse2_loop + movd %mm1,%eax + emms + ret +.align 16 +.L011mw_non_sse2: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + + xorl %esi,%esi + movl 20(%esp),%edi + movl 24(%esp),%ebx + movl 28(%esp),%ebp + movl 32(%esp),%ecx + andl $4294967288,%ebp + jz .L013mw_finish +.L014mw_loop: + + movl (%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,(%edi) + movl %edx,%esi + + movl 4(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,4(%edi) + movl %edx,%esi + + movl 8(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,8(%edi) + movl %edx,%esi + + movl 12(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,12(%edi) + movl %edx,%esi + + movl 16(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,16(%edi) + movl %edx,%esi + + movl 20(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,20(%edi) + movl %edx,%esi + + movl 24(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,24(%edi) + movl %edx,%esi + + movl 28(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,28(%edi) + movl %edx,%esi + + addl $32,%ebx + addl $32,%edi + subl $8,%ebp + jz .L013mw_finish + jmp .L014mw_loop +.L013mw_finish: + movl 28(%esp),%ebp + andl $7,%ebp + jnz .L015mw_finish2 + jmp .L016mw_end +.L015mw_finish2: + + movl (%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,(%edi) + movl %edx,%esi + decl %ebp + jz .L016mw_end + + movl 4(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,4(%edi) + movl %edx,%esi + decl %ebp + jz .L016mw_end + + movl 8(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,8(%edi) + movl %edx,%esi + decl %ebp + jz .L016mw_end + + movl 12(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,12(%edi) + movl %edx,%esi + decl %ebp + jz .L016mw_end + + movl 16(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,16(%edi) + movl %edx,%esi + decl %ebp + jz .L016mw_end + + movl 20(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,20(%edi) + movl %edx,%esi + decl %ebp + jz .L016mw_end + + movl 24(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,24(%edi) + movl %edx,%esi +.L016mw_end: + movl %esi,%eax + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size bn_mul_words,.-.L_bn_mul_words_begin +.globl bn_sqr_words +.hidden bn_sqr_words +.type bn_sqr_words,@function +.align 16 +bn_sqr_words: +.L_bn_sqr_words_begin: + call .L017PIC_me_up +.L017PIC_me_up: + popl %eax + leal OPENSSL_ia32cap_P-.L017PIC_me_up(%eax),%eax + btl $26,(%eax) + jnc .L018sqr_non_sse2 + movl 4(%esp),%eax + movl 8(%esp),%edx + movl 12(%esp),%ecx +.align 16 +.L019sqr_sse2_loop: + movd (%edx),%mm0 + pmuludq %mm0,%mm0 + leal 4(%edx),%edx + movq %mm0,(%eax) + subl $1,%ecx + leal 8(%eax),%eax + jnz .L019sqr_sse2_loop + emms + ret +.align 16 +.L018sqr_non_sse2: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%ebx + andl $4294967288,%ebx + jz .L020sw_finish +.L021sw_loop: + + movl (%edi),%eax + mull %eax + movl %eax,(%esi) + movl %edx,4(%esi) + + movl 4(%edi),%eax + mull %eax + movl %eax,8(%esi) + movl %edx,12(%esi) + + movl 8(%edi),%eax + mull %eax + movl %eax,16(%esi) + movl %edx,20(%esi) + + movl 12(%edi),%eax + mull %eax + movl %eax,24(%esi) + movl %edx,28(%esi) + + movl 16(%edi),%eax + mull %eax + movl %eax,32(%esi) + movl %edx,36(%esi) + + movl 20(%edi),%eax + mull %eax + movl %eax,40(%esi) + movl %edx,44(%esi) + + movl 24(%edi),%eax + mull %eax + movl %eax,48(%esi) + movl %edx,52(%esi) + + movl 28(%edi),%eax + mull %eax + movl %eax,56(%esi) + movl %edx,60(%esi) + + addl $32,%edi + addl $64,%esi + subl $8,%ebx + jnz .L021sw_loop +.L020sw_finish: + movl 28(%esp),%ebx + andl $7,%ebx + jz .L022sw_end + + movl (%edi),%eax + mull %eax + movl %eax,(%esi) + decl %ebx + movl %edx,4(%esi) + jz .L022sw_end + + movl 4(%edi),%eax + mull %eax + movl %eax,8(%esi) + decl %ebx + movl %edx,12(%esi) + jz .L022sw_end + + movl 8(%edi),%eax + mull %eax + movl %eax,16(%esi) + decl %ebx + movl %edx,20(%esi) + jz .L022sw_end + + movl 12(%edi),%eax + mull %eax + movl %eax,24(%esi) + decl %ebx + movl %edx,28(%esi) + jz .L022sw_end + + movl 16(%edi),%eax + mull %eax + movl %eax,32(%esi) + decl %ebx + movl %edx,36(%esi) + jz .L022sw_end + + movl 20(%edi),%eax + mull %eax + movl %eax,40(%esi) + decl %ebx + movl %edx,44(%esi) + jz .L022sw_end + + movl 24(%edi),%eax + mull %eax + movl %eax,48(%esi) + movl %edx,52(%esi) +.L022sw_end: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size bn_sqr_words,.-.L_bn_sqr_words_begin +.globl bn_div_words +.hidden bn_div_words +.type bn_div_words,@function +.align 16 +bn_div_words: +.L_bn_div_words_begin: + movl 4(%esp),%edx + movl 8(%esp),%eax + movl 12(%esp),%ecx + divl %ecx + ret +.size bn_div_words,.-.L_bn_div_words_begin +.globl bn_add_words +.hidden bn_add_words +.type bn_add_words,@function +.align 16 +bn_add_words: +.L_bn_add_words_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + + movl 20(%esp),%ebx + movl 24(%esp),%esi + movl 28(%esp),%edi + movl 32(%esp),%ebp + xorl %eax,%eax + andl $4294967288,%ebp + jz .L023aw_finish +.L024aw_loop: + + movl (%esi),%ecx + movl (%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + + movl 4(%esi),%ecx + movl 4(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,4(%ebx) + + movl 8(%esi),%ecx + movl 8(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,8(%ebx) + + movl 12(%esi),%ecx + movl 12(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,12(%ebx) + + movl 16(%esi),%ecx + movl 16(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,16(%ebx) + + movl 20(%esi),%ecx + movl 20(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,20(%ebx) + + movl 24(%esi),%ecx + movl 24(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,24(%ebx) + + movl 28(%esi),%ecx + movl 28(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,28(%ebx) + + addl $32,%esi + addl $32,%edi + addl $32,%ebx + subl $8,%ebp + jnz .L024aw_loop +.L023aw_finish: + movl 32(%esp),%ebp + andl $7,%ebp + jz .L025aw_end + + movl (%esi),%ecx + movl (%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,(%ebx) + jz .L025aw_end + + movl 4(%esi),%ecx + movl 4(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,4(%ebx) + jz .L025aw_end + + movl 8(%esi),%ecx + movl 8(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,8(%ebx) + jz .L025aw_end + + movl 12(%esi),%ecx + movl 12(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,12(%ebx) + jz .L025aw_end + + movl 16(%esi),%ecx + movl 16(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,16(%ebx) + jz .L025aw_end + + movl 20(%esi),%ecx + movl 20(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,20(%ebx) + jz .L025aw_end + + movl 24(%esi),%ecx + movl 24(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,24(%ebx) +.L025aw_end: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size bn_add_words,.-.L_bn_add_words_begin +.globl bn_sub_words +.hidden bn_sub_words +.type bn_sub_words,@function +.align 16 +bn_sub_words: +.L_bn_sub_words_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + + movl 20(%esp),%ebx + movl 24(%esp),%esi + movl 28(%esp),%edi + movl 32(%esp),%ebp + xorl %eax,%eax + andl $4294967288,%ebp + jz .L026aw_finish +.L027aw_loop: + + movl (%esi),%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + + movl 4(%esi),%ecx + movl 4(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,4(%ebx) + + movl 8(%esi),%ecx + movl 8(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,8(%ebx) + + movl 12(%esi),%ecx + movl 12(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,12(%ebx) + + movl 16(%esi),%ecx + movl 16(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,16(%ebx) + + movl 20(%esi),%ecx + movl 20(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,20(%ebx) + + movl 24(%esi),%ecx + movl 24(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,24(%ebx) + + movl 28(%esi),%ecx + movl 28(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,28(%ebx) + + addl $32,%esi + addl $32,%edi + addl $32,%ebx + subl $8,%ebp + jnz .L027aw_loop +.L026aw_finish: + movl 32(%esp),%ebp + andl $7,%ebp + jz .L028aw_end + + movl (%esi),%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,(%ebx) + jz .L028aw_end + + movl 4(%esi),%ecx + movl 4(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,4(%ebx) + jz .L028aw_end + + movl 8(%esi),%ecx + movl 8(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,8(%ebx) + jz .L028aw_end + + movl 12(%esi),%ecx + movl 12(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,12(%ebx) + jz .L028aw_end + + movl 16(%esi),%ecx + movl 16(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,16(%ebx) + jz .L028aw_end + + movl 20(%esi),%ecx + movl 20(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,20(%ebx) + jz .L028aw_end + + movl 24(%esi),%ecx + movl 24(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,24(%ebx) +.L028aw_end: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size bn_sub_words,.-.L_bn_sub_words_begin +.globl bn_sub_part_words +.hidden bn_sub_part_words +.type bn_sub_part_words,@function +.align 16 +bn_sub_part_words: +.L_bn_sub_part_words_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + + movl 20(%esp),%ebx + movl 24(%esp),%esi + movl 28(%esp),%edi + movl 32(%esp),%ebp + xorl %eax,%eax + andl $4294967288,%ebp + jz .L029aw_finish +.L030aw_loop: + + movl (%esi),%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + + movl 4(%esi),%ecx + movl 4(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,4(%ebx) + + movl 8(%esi),%ecx + movl 8(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,8(%ebx) + + movl 12(%esi),%ecx + movl 12(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,12(%ebx) + + movl 16(%esi),%ecx + movl 16(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,16(%ebx) + + movl 20(%esi),%ecx + movl 20(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,20(%ebx) + + movl 24(%esi),%ecx + movl 24(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,24(%ebx) + + movl 28(%esi),%ecx + movl 28(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,28(%ebx) + + addl $32,%esi + addl $32,%edi + addl $32,%ebx + subl $8,%ebp + jnz .L030aw_loop +.L029aw_finish: + movl 32(%esp),%ebp + andl $7,%ebp + jz .L031aw_end + + movl (%esi),%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + addl $4,%esi + addl $4,%edi + addl $4,%ebx + decl %ebp + jz .L031aw_end + + movl (%esi),%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + addl $4,%esi + addl $4,%edi + addl $4,%ebx + decl %ebp + jz .L031aw_end + + movl (%esi),%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + addl $4,%esi + addl $4,%edi + addl $4,%ebx + decl %ebp + jz .L031aw_end + + movl (%esi),%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + addl $4,%esi + addl $4,%edi + addl $4,%ebx + decl %ebp + jz .L031aw_end + + movl (%esi),%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + addl $4,%esi + addl $4,%edi + addl $4,%ebx + decl %ebp + jz .L031aw_end + + movl (%esi),%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + addl $4,%esi + addl $4,%edi + addl $4,%ebx + decl %ebp + jz .L031aw_end + + movl (%esi),%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + addl $4,%esi + addl $4,%edi + addl $4,%ebx +.L031aw_end: + cmpl $0,36(%esp) + je .L032pw_end + movl 36(%esp),%ebp + cmpl $0,%ebp + je .L032pw_end + jge .L033pw_pos + + movl $0,%edx + subl %ebp,%edx + movl %edx,%ebp + andl $4294967288,%ebp + jz .L034pw_neg_finish +.L035pw_neg_loop: + + movl $0,%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + + movl $0,%ecx + movl 4(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,4(%ebx) + + movl $0,%ecx + movl 8(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,8(%ebx) + + movl $0,%ecx + movl 12(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,12(%ebx) + + movl $0,%ecx + movl 16(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,16(%ebx) + + movl $0,%ecx + movl 20(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,20(%ebx) + + movl $0,%ecx + movl 24(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,24(%ebx) + + movl $0,%ecx + movl 28(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,28(%ebx) + + addl $32,%edi + addl $32,%ebx + subl $8,%ebp + jnz .L035pw_neg_loop +.L034pw_neg_finish: + movl 36(%esp),%edx + movl $0,%ebp + subl %edx,%ebp + andl $7,%ebp + jz .L032pw_end + + movl $0,%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,(%ebx) + jz .L032pw_end + + movl $0,%ecx + movl 4(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,4(%ebx) + jz .L032pw_end + + movl $0,%ecx + movl 8(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,8(%ebx) + jz .L032pw_end + + movl $0,%ecx + movl 12(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,12(%ebx) + jz .L032pw_end + + movl $0,%ecx + movl 16(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,16(%ebx) + jz .L032pw_end + + movl $0,%ecx + movl 20(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,20(%ebx) + jz .L032pw_end + + movl $0,%ecx + movl 24(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,24(%ebx) + jmp .L032pw_end +.L033pw_pos: + andl $4294967288,%ebp + jz .L036pw_pos_finish +.L037pw_pos_loop: + + movl (%esi),%ecx + subl %eax,%ecx + movl %ecx,(%ebx) + jnc .L038pw_nc0 + + movl 4(%esi),%ecx + subl %eax,%ecx + movl %ecx,4(%ebx) + jnc .L039pw_nc1 + + movl 8(%esi),%ecx + subl %eax,%ecx + movl %ecx,8(%ebx) + jnc .L040pw_nc2 + + movl 12(%esi),%ecx + subl %eax,%ecx + movl %ecx,12(%ebx) + jnc .L041pw_nc3 + + movl 16(%esi),%ecx + subl %eax,%ecx + movl %ecx,16(%ebx) + jnc .L042pw_nc4 + + movl 20(%esi),%ecx + subl %eax,%ecx + movl %ecx,20(%ebx) + jnc .L043pw_nc5 + + movl 24(%esi),%ecx + subl %eax,%ecx + movl %ecx,24(%ebx) + jnc .L044pw_nc6 + + movl 28(%esi),%ecx + subl %eax,%ecx + movl %ecx,28(%ebx) + jnc .L045pw_nc7 + + addl $32,%esi + addl $32,%ebx + subl $8,%ebp + jnz .L037pw_pos_loop +.L036pw_pos_finish: + movl 36(%esp),%ebp + andl $7,%ebp + jz .L032pw_end + + movl (%esi),%ecx + subl %eax,%ecx + movl %ecx,(%ebx) + jnc .L046pw_tail_nc0 + decl %ebp + jz .L032pw_end + + movl 4(%esi),%ecx + subl %eax,%ecx + movl %ecx,4(%ebx) + jnc .L047pw_tail_nc1 + decl %ebp + jz .L032pw_end + + movl 8(%esi),%ecx + subl %eax,%ecx + movl %ecx,8(%ebx) + jnc .L048pw_tail_nc2 + decl %ebp + jz .L032pw_end + + movl 12(%esi),%ecx + subl %eax,%ecx + movl %ecx,12(%ebx) + jnc .L049pw_tail_nc3 + decl %ebp + jz .L032pw_end + + movl 16(%esi),%ecx + subl %eax,%ecx + movl %ecx,16(%ebx) + jnc .L050pw_tail_nc4 + decl %ebp + jz .L032pw_end + + movl 20(%esi),%ecx + subl %eax,%ecx + movl %ecx,20(%ebx) + jnc .L051pw_tail_nc5 + decl %ebp + jz .L032pw_end + + movl 24(%esi),%ecx + subl %eax,%ecx + movl %ecx,24(%ebx) + jnc .L052pw_tail_nc6 + movl $1,%eax + jmp .L032pw_end +.L053pw_nc_loop: + movl (%esi),%ecx + movl %ecx,(%ebx) +.L038pw_nc0: + movl 4(%esi),%ecx + movl %ecx,4(%ebx) +.L039pw_nc1: + movl 8(%esi),%ecx + movl %ecx,8(%ebx) +.L040pw_nc2: + movl 12(%esi),%ecx + movl %ecx,12(%ebx) +.L041pw_nc3: + movl 16(%esi),%ecx + movl %ecx,16(%ebx) +.L042pw_nc4: + movl 20(%esi),%ecx + movl %ecx,20(%ebx) +.L043pw_nc5: + movl 24(%esi),%ecx + movl %ecx,24(%ebx) +.L044pw_nc6: + movl 28(%esi),%ecx + movl %ecx,28(%ebx) +.L045pw_nc7: + + addl $32,%esi + addl $32,%ebx + subl $8,%ebp + jnz .L053pw_nc_loop + movl 36(%esp),%ebp + andl $7,%ebp + jz .L054pw_nc_end + movl (%esi),%ecx + movl %ecx,(%ebx) +.L046pw_tail_nc0: + decl %ebp + jz .L054pw_nc_end + movl 4(%esi),%ecx + movl %ecx,4(%ebx) +.L047pw_tail_nc1: + decl %ebp + jz .L054pw_nc_end + movl 8(%esi),%ecx + movl %ecx,8(%ebx) +.L048pw_tail_nc2: + decl %ebp + jz .L054pw_nc_end + movl 12(%esi),%ecx + movl %ecx,12(%ebx) +.L049pw_tail_nc3: + decl %ebp + jz .L054pw_nc_end + movl 16(%esi),%ecx + movl %ecx,16(%ebx) +.L050pw_tail_nc4: + decl %ebp + jz .L054pw_nc_end + movl 20(%esi),%ecx + movl %ecx,20(%ebx) +.L051pw_tail_nc5: + decl %ebp + jz .L054pw_nc_end + movl 24(%esi),%ecx + movl %ecx,24(%ebx) +.L052pw_tail_nc6: +.L054pw_nc_end: + movl $0,%eax +.L032pw_end: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size bn_sub_part_words,.-.L_bn_sub_part_words_begin +#endif diff --git a/third_party/boringssl/linux-x86/crypto/bn/co-586.S b/third_party/boringssl/linux-x86/crypto/bn/co-586.S new file mode 100644 index 00000000000..e41c3a1dffd --- /dev/null +++ b/third_party/boringssl/linux-x86/crypto/bn/co-586.S @@ -0,0 +1,1260 @@ +#if defined(__i386__) +.file "src/crypto/bn/asm/co-586.S" +.text +.globl bn_mul_comba8 +.hidden bn_mul_comba8 +.type bn_mul_comba8,@function +.align 16 +bn_mul_comba8: +.L_bn_mul_comba8_begin: + pushl %esi + movl 12(%esp),%esi + pushl %edi + movl 20(%esp),%edi + pushl %ebp + pushl %ebx + xorl %ebx,%ebx + movl (%esi),%eax + xorl %ecx,%ecx + movl (%edi),%edx + + xorl %ebp,%ebp + + mull %edx + addl %eax,%ebx + movl 20(%esp),%eax + adcl %edx,%ecx + movl (%edi),%edx + adcl $0,%ebp + movl %ebx,(%eax) + movl 4(%esi),%eax + + + xorl %ebx,%ebx + + mull %edx + addl %eax,%ecx + movl (%esi),%eax + adcl %edx,%ebp + movl 4(%edi),%edx + adcl $0,%ebx + + mull %edx + addl %eax,%ecx + movl 20(%esp),%eax + adcl %edx,%ebp + movl (%edi),%edx + adcl $0,%ebx + movl %ecx,4(%eax) + movl 8(%esi),%eax + + + xorl %ecx,%ecx + + mull %edx + addl %eax,%ebp + movl 4(%esi),%eax + adcl %edx,%ebx + movl 4(%edi),%edx + adcl $0,%ecx + + mull %edx + addl %eax,%ebp + movl (%esi),%eax + adcl %edx,%ebx + movl 8(%edi),%edx + adcl $0,%ecx + + mull %edx + addl %eax,%ebp + movl 20(%esp),%eax + adcl %edx,%ebx + movl (%edi),%edx + adcl $0,%ecx + movl %ebp,8(%eax) + movl 12(%esi),%eax + + + xorl %ebp,%ebp + + mull %edx + addl %eax,%ebx + movl 8(%esi),%eax + adcl %edx,%ecx + movl 4(%edi),%edx + adcl $0,%ebp + + mull %edx + addl %eax,%ebx + movl 4(%esi),%eax + adcl %edx,%ecx + movl 8(%edi),%edx + adcl $0,%ebp + + mull %edx + addl %eax,%ebx + movl (%esi),%eax + adcl %edx,%ecx + movl 12(%edi),%edx + adcl $0,%ebp + + mull %edx + addl %eax,%ebx + movl 20(%esp),%eax + adcl %edx,%ecx + movl (%edi),%edx + adcl $0,%ebp + movl %ebx,12(%eax) + movl 16(%esi),%eax + + + xorl %ebx,%ebx + + mull %edx + addl %eax,%ecx + movl 12(%esi),%eax + adcl %edx,%ebp + movl 4(%edi),%edx + adcl $0,%ebx + + mull %edx + addl %eax,%ecx + movl 8(%esi),%eax + adcl %edx,%ebp + movl 8(%edi),%edx + adcl $0,%ebx + + mull %edx + addl %eax,%ecx + movl 4(%esi),%eax + adcl %edx,%ebp + movl 12(%edi),%edx + adcl $0,%ebx + + mull %edx + addl %eax,%ecx + movl (%esi),%eax + adcl %edx,%ebp + movl 16(%edi),%edx + adcl $0,%ebx + + mull %edx + addl %eax,%ecx + movl 20(%esp),%eax + adcl %edx,%ebp + movl (%edi),%edx + adcl $0,%ebx + movl %ecx,16(%eax) + movl 20(%esi),%eax + + + xorl %ecx,%ecx + + mull %edx + addl %eax,%ebp + movl 16(%esi),%eax + adcl %edx,%ebx + movl 4(%edi),%edx + adcl $0,%ecx + + mull %edx + addl %eax,%ebp + movl 12(%esi),%eax + adcl %edx,%ebx + movl 8(%edi),%edx + adcl $0,%ecx + + mull %edx + addl %eax,%ebp + movl 8(%esi),%eax + adcl %edx,%ebx + movl 12(%edi),%edx + adcl $0,%ecx + + mull %edx + addl %eax,%ebp + movl 4(%esi),%eax + adcl %edx,%ebx + movl 16(%edi),%edx + adcl $0,%ecx + + mull %edx + addl %eax,%ebp + movl (%esi),%eax + adcl %edx,%ebx + movl 20(%edi),%edx + adcl $0,%ecx + + mull %edx + addl %eax,%ebp + movl 20(%esp),%eax + adcl %edx,%ebx + movl (%edi),%edx + adcl $0,%ecx + movl %ebp,20(%eax) + movl 24(%esi),%eax + + + xorl %ebp,%ebp + + mull %edx + addl %eax,%ebx + movl 20(%esi),%eax + adcl %edx,%ecx + movl 4(%edi),%edx + adcl $0,%ebp + + mull %edx + addl %eax,%ebx + movl 16(%esi),%eax + adcl %edx,%ecx + movl 8(%edi),%edx + adcl $0,%ebp + + mull %edx + addl %eax,%ebx + movl 12(%esi),%eax + adcl %edx,%ecx + movl 12(%edi),%edx + adcl $0,%ebp + + mull %edx + addl %eax,%ebx + movl 8(%esi),%eax + adcl %edx,%ecx + movl 16(%edi),%edx + adcl $0,%ebp + + mull %edx + addl %eax,%ebx + movl 4(%esi),%eax + adcl %edx,%ecx + movl 20(%edi),%edx + adcl $0,%ebp + + mull %edx + addl %eax,%ebx + movl (%esi),%eax + adcl %edx,%ecx + movl 24(%edi),%edx + adcl $0,%ebp + + mull %edx + addl %eax,%ebx + movl 20(%esp),%eax + adcl %edx,%ecx + movl (%edi),%edx + adcl $0,%ebp + movl %ebx,24(%eax) + movl 28(%esi),%eax + + + xorl %ebx,%ebx + + mull %edx + addl %eax,%ecx + movl 24(%esi),%eax + adcl %edx,%ebp + movl 4(%edi),%edx + adcl $0,%ebx + + mull %edx + addl %eax,%ecx + movl 20(%esi),%eax + adcl %edx,%ebp + movl 8(%edi),%edx + adcl $0,%ebx + + mull %edx + addl %eax,%ecx + movl 16(%esi),%eax + adcl %edx,%ebp + movl 12(%edi),%edx + adcl $0,%ebx + + mull %edx + addl %eax,%ecx + movl 12(%esi),%eax + adcl %edx,%ebp + movl 16(%edi),%edx + adcl $0,%ebx + + mull %edx + addl %eax,%ecx + movl 8(%esi),%eax + adcl %edx,%ebp + movl 20(%edi),%edx + adcl $0,%ebx + + mull %edx + addl %eax,%ecx + movl 4(%esi),%eax + adcl %edx,%ebp + movl 24(%edi),%edx + adcl $0,%ebx + + mull %edx + addl %eax,%ecx + movl (%esi),%eax + adcl %edx,%ebp + movl 28(%edi),%edx + adcl $0,%ebx + + mull %edx + addl %eax,%ecx + movl 20(%esp),%eax + adcl %edx,%ebp + movl 4(%edi),%edx + adcl $0,%ebx + movl %ecx,28(%eax) + movl 28(%esi),%eax + + + xorl %ecx,%ecx + + mull %edx + addl %eax,%ebp + movl 24(%esi),%eax + adcl %edx,%ebx + movl 8(%edi),%edx + adcl $0,%ecx + + mull %edx + addl %eax,%ebp + movl 20(%esi),%eax + adcl %edx,%ebx + movl 12(%edi),%edx + adcl $0,%ecx + + mull %edx + addl %eax,%ebp + movl 16(%esi),%eax + adcl %edx,%ebx + movl 16(%edi),%edx + adcl $0,%ecx + + mull %edx + addl %eax,%ebp + movl 12(%esi),%eax + adcl %edx,%ebx + movl 20(%edi),%edx + adcl $0,%ecx + + mull %edx + addl %eax,%ebp + movl 8(%esi),%eax + adcl %edx,%ebx + movl 24(%edi),%edx + adcl $0,%ecx + + mull %edx + addl %eax,%ebp + movl 4(%esi),%eax + adcl %edx,%ebx + movl 28(%edi),%edx + adcl $0,%ecx + + mull %edx + addl %eax,%ebp + movl 20(%esp),%eax + adcl %edx,%ebx + movl 8(%edi),%edx + adcl $0,%ecx + movl %ebp,32(%eax) + movl 28(%esi),%eax + + + xorl %ebp,%ebp + + mull %edx + addl %eax,%ebx + movl 24(%esi),%eax + adcl %edx,%ecx + movl 12(%edi),%edx + adcl $0,%ebp + + mull %edx + addl %eax,%ebx + movl 20(%esi),%eax + adcl %edx,%ecx + movl 16(%edi),%edx + adcl $0,%ebp + + mull %edx + addl %eax,%ebx + movl 16(%esi),%eax + adcl %edx,%ecx + movl 20(%edi),%edx + adcl $0,%ebp + + mull %edx + addl %eax,%ebx + movl 12(%esi),%eax + adcl %edx,%ecx + movl 24(%edi),%edx + adcl $0,%ebp + + mull %edx + addl %eax,%ebx + movl 8(%esi),%eax + adcl %edx,%ecx + movl 28(%edi),%edx + adcl $0,%ebp + + mull %edx + addl %eax,%ebx + movl 20(%esp),%eax + adcl %edx,%ecx + movl 12(%edi),%edx + adcl $0,%ebp + movl %ebx,36(%eax) + movl 28(%esi),%eax + + + xorl %ebx,%ebx + + mull %edx + addl %eax,%ecx + movl 24(%esi),%eax + adcl %edx,%ebp + movl 16(%edi),%edx + adcl $0,%ebx + + mull %edx + addl %eax,%ecx + movl 20(%esi),%eax + adcl %edx,%ebp + movl 20(%edi),%edx + adcl $0,%ebx + + mull %edx + addl %eax,%ecx + movl 16(%esi),%eax + adcl %edx,%ebp + movl 24(%edi),%edx + adcl $0,%ebx + + mull %edx + addl %eax,%ecx + movl 12(%esi),%eax + adcl %edx,%ebp + movl 28(%edi),%edx + adcl $0,%ebx + + mull %edx + addl %eax,%ecx + movl 20(%esp),%eax + adcl %edx,%ebp + movl 16(%edi),%edx + adcl $0,%ebx + movl %ecx,40(%eax) + movl 28(%esi),%eax + + + xorl %ecx,%ecx + + mull %edx + addl %eax,%ebp + movl 24(%esi),%eax + adcl %edx,%ebx + movl 20(%edi),%edx + adcl $0,%ecx + + mull %edx + addl %eax,%ebp + movl 20(%esi),%eax + adcl %edx,%ebx + movl 24(%edi),%edx + adcl $0,%ecx + + mull %edx + addl %eax,%ebp + movl 16(%esi),%eax + adcl %edx,%ebx + movl 28(%edi),%edx + adcl $0,%ecx + + mull %edx + addl %eax,%ebp + movl 20(%esp),%eax + adcl %edx,%ebx + movl 20(%edi),%edx + adcl $0,%ecx + movl %ebp,44(%eax) + movl 28(%esi),%eax + + + xorl %ebp,%ebp + + mull %edx + addl %eax,%ebx + movl 24(%esi),%eax + adcl %edx,%ecx + movl 24(%edi),%edx + adcl $0,%ebp + + mull %edx + addl %eax,%ebx + movl 20(%esi),%eax + adcl %edx,%ecx + movl 28(%edi),%edx + adcl $0,%ebp + + mull %edx + addl %eax,%ebx + movl 20(%esp),%eax + adcl %edx,%ecx + movl 24(%edi),%edx + adcl $0,%ebp + movl %ebx,48(%eax) + movl 28(%esi),%eax + + + xorl %ebx,%ebx + + mull %edx + addl %eax,%ecx + movl 24(%esi),%eax + adcl %edx,%ebp + movl 28(%edi),%edx + adcl $0,%ebx + + mull %edx + addl %eax,%ecx + movl 20(%esp),%eax + adcl %edx,%ebp + movl 28(%edi),%edx + adcl $0,%ebx + movl %ecx,52(%eax) + movl 28(%esi),%eax + + + xorl %ecx,%ecx + + mull %edx + addl %eax,%ebp + movl 20(%esp),%eax + adcl %edx,%ebx + adcl $0,%ecx + movl %ebp,56(%eax) + + + movl %ebx,60(%eax) + popl %ebx + popl %ebp + popl %edi + popl %esi + ret +.size bn_mul_comba8,.-.L_bn_mul_comba8_begin +.globl bn_mul_comba4 +.hidden bn_mul_comba4 +.type bn_mul_comba4,@function +.align 16 +bn_mul_comba4: +.L_bn_mul_comba4_begin: + pushl %esi + movl 12(%esp),%esi + pushl %edi + movl 20(%esp),%edi + pushl %ebp + pushl %ebx + xorl %ebx,%ebx + movl (%esi),%eax + xorl %ecx,%ecx + movl (%edi),%edx + + xorl %ebp,%ebp + + mull %edx + addl %eax,%ebx + movl 20(%esp),%eax + adcl %edx,%ecx + movl (%edi),%edx + adcl $0,%ebp + movl %ebx,(%eax) + movl 4(%esi),%eax + + + xorl %ebx,%ebx + + mull %edx + addl %eax,%ecx + movl (%esi),%eax + adcl %edx,%ebp + movl 4(%edi),%edx + adcl $0,%ebx + + mull %edx + addl %eax,%ecx + movl 20(%esp),%eax + adcl %edx,%ebp + movl (%edi),%edx + adcl $0,%ebx + movl %ecx,4(%eax) + movl 8(%esi),%eax + + + xorl %ecx,%ecx + + mull %edx + addl %eax,%ebp + movl 4(%esi),%eax + adcl %edx,%ebx + movl 4(%edi),%edx + adcl $0,%ecx + + mull %edx + addl %eax,%ebp + movl (%esi),%eax + adcl %edx,%ebx + movl 8(%edi),%edx + adcl $0,%ecx + + mull %edx + addl %eax,%ebp + movl 20(%esp),%eax + adcl %edx,%ebx + movl (%edi),%edx + adcl $0,%ecx + movl %ebp,8(%eax) + movl 12(%esi),%eax + + + xorl %ebp,%ebp + + mull %edx + addl %eax,%ebx + movl 8(%esi),%eax + adcl %edx,%ecx + movl 4(%edi),%edx + adcl $0,%ebp + + mull %edx + addl %eax,%ebx + movl 4(%esi),%eax + adcl %edx,%ecx + movl 8(%edi),%edx + adcl $0,%ebp + + mull %edx + addl %eax,%ebx + movl (%esi),%eax + adcl %edx,%ecx + movl 12(%edi),%edx + adcl $0,%ebp + + mull %edx + addl %eax,%ebx + movl 20(%esp),%eax + adcl %edx,%ecx + movl 4(%edi),%edx + adcl $0,%ebp + movl %ebx,12(%eax) + movl 12(%esi),%eax + + + xorl %ebx,%ebx + + mull %edx + addl %eax,%ecx + movl 8(%esi),%eax + adcl %edx,%ebp + movl 8(%edi),%edx + adcl $0,%ebx + + mull %edx + addl %eax,%ecx + movl 4(%esi),%eax + adcl %edx,%ebp + movl 12(%edi),%edx + adcl $0,%ebx + + mull %edx + addl %eax,%ecx + movl 20(%esp),%eax + adcl %edx,%ebp + movl 8(%edi),%edx + adcl $0,%ebx + movl %ecx,16(%eax) + movl 12(%esi),%eax + + + xorl %ecx,%ecx + + mull %edx + addl %eax,%ebp + movl 8(%esi),%eax + adcl %edx,%ebx + movl 12(%edi),%edx + adcl $0,%ecx + + mull %edx + addl %eax,%ebp + movl 20(%esp),%eax + adcl %edx,%ebx + movl 12(%edi),%edx + adcl $0,%ecx + movl %ebp,20(%eax) + movl 12(%esi),%eax + + + xorl %ebp,%ebp + + mull %edx + addl %eax,%ebx + movl 20(%esp),%eax + adcl %edx,%ecx + adcl $0,%ebp + movl %ebx,24(%eax) + + + movl %ecx,28(%eax) + popl %ebx + popl %ebp + popl %edi + popl %esi + ret +.size bn_mul_comba4,.-.L_bn_mul_comba4_begin +.globl bn_sqr_comba8 +.hidden bn_sqr_comba8 +.type bn_sqr_comba8,@function +.align 16 +bn_sqr_comba8: +.L_bn_sqr_comba8_begin: + pushl %esi + pushl %edi + pushl %ebp + pushl %ebx + movl 20(%esp),%edi + movl 24(%esp),%esi + xorl %ebx,%ebx + xorl %ecx,%ecx + movl (%esi),%eax + + xorl %ebp,%ebp + + mull %eax + addl %eax,%ebx + adcl %edx,%ecx + movl (%esi),%edx + adcl $0,%ebp + movl %ebx,(%edi) + movl 4(%esi),%eax + + + xorl %ebx,%ebx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 8(%esi),%eax + adcl $0,%ebx + movl %ecx,4(%edi) + movl (%esi),%edx + + + xorl %ecx,%ecx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 4(%esi),%eax + adcl $0,%ecx + + mull %eax + addl %eax,%ebp + adcl %edx,%ebx + movl (%esi),%edx + adcl $0,%ecx + movl %ebp,8(%edi) + movl 12(%esi),%eax + + + xorl %ebp,%ebp + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 8(%esi),%eax + adcl $0,%ebp + movl 4(%esi),%edx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 16(%esi),%eax + adcl $0,%ebp + movl %ebx,12(%edi) + movl (%esi),%edx + + + xorl %ebx,%ebx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 12(%esi),%eax + adcl $0,%ebx + movl 4(%esi),%edx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 8(%esi),%eax + adcl $0,%ebx + + mull %eax + addl %eax,%ecx + adcl %edx,%ebp + movl (%esi),%edx + adcl $0,%ebx + movl %ecx,16(%edi) + movl 20(%esi),%eax + + + xorl %ecx,%ecx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 16(%esi),%eax + adcl $0,%ecx + movl 4(%esi),%edx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 12(%esi),%eax + adcl $0,%ecx + movl 8(%esi),%edx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 24(%esi),%eax + adcl $0,%ecx + movl %ebp,20(%edi) + movl (%esi),%edx + + + xorl %ebp,%ebp + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 20(%esi),%eax + adcl $0,%ebp + movl 4(%esi),%edx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 16(%esi),%eax + adcl $0,%ebp + movl 8(%esi),%edx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 12(%esi),%eax + adcl $0,%ebp + + mull %eax + addl %eax,%ebx + adcl %edx,%ecx + movl (%esi),%edx + adcl $0,%ebp + movl %ebx,24(%edi) + movl 28(%esi),%eax + + + xorl %ebx,%ebx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 24(%esi),%eax + adcl $0,%ebx + movl 4(%esi),%edx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 20(%esi),%eax + adcl $0,%ebx + movl 8(%esi),%edx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 16(%esi),%eax + adcl $0,%ebx + movl 12(%esi),%edx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 28(%esi),%eax + adcl $0,%ebx + movl %ecx,28(%edi) + movl 4(%esi),%edx + + + xorl %ecx,%ecx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 24(%esi),%eax + adcl $0,%ecx + movl 8(%esi),%edx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 20(%esi),%eax + adcl $0,%ecx + movl 12(%esi),%edx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 16(%esi),%eax + adcl $0,%ecx + + mull %eax + addl %eax,%ebp + adcl %edx,%ebx + movl 8(%esi),%edx + adcl $0,%ecx + movl %ebp,32(%edi) + movl 28(%esi),%eax + + + xorl %ebp,%ebp + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 24(%esi),%eax + adcl $0,%ebp + movl 12(%esi),%edx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 20(%esi),%eax + adcl $0,%ebp + movl 16(%esi),%edx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 28(%esi),%eax + adcl $0,%ebp + movl %ebx,36(%edi) + movl 12(%esi),%edx + + + xorl %ebx,%ebx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 24(%esi),%eax + adcl $0,%ebx + movl 16(%esi),%edx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 20(%esi),%eax + adcl $0,%ebx + + mull %eax + addl %eax,%ecx + adcl %edx,%ebp + movl 16(%esi),%edx + adcl $0,%ebx + movl %ecx,40(%edi) + movl 28(%esi),%eax + + + xorl %ecx,%ecx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 24(%esi),%eax + adcl $0,%ecx + movl 20(%esi),%edx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 28(%esi),%eax + adcl $0,%ecx + movl %ebp,44(%edi) + movl 20(%esi),%edx + + + xorl %ebp,%ebp + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 24(%esi),%eax + adcl $0,%ebp + + mull %eax + addl %eax,%ebx + adcl %edx,%ecx + movl 24(%esi),%edx + adcl $0,%ebp + movl %ebx,48(%edi) + movl 28(%esi),%eax + + + xorl %ebx,%ebx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 28(%esi),%eax + adcl $0,%ebx + movl %ecx,52(%edi) + + + xorl %ecx,%ecx + + mull %eax + addl %eax,%ebp + adcl %edx,%ebx + adcl $0,%ecx + movl %ebp,56(%edi) + + movl %ebx,60(%edi) + popl %ebx + popl %ebp + popl %edi + popl %esi + ret +.size bn_sqr_comba8,.-.L_bn_sqr_comba8_begin +.globl bn_sqr_comba4 +.hidden bn_sqr_comba4 +.type bn_sqr_comba4,@function +.align 16 +bn_sqr_comba4: +.L_bn_sqr_comba4_begin: + pushl %esi + pushl %edi + pushl %ebp + pushl %ebx + movl 20(%esp),%edi + movl 24(%esp),%esi + xorl %ebx,%ebx + xorl %ecx,%ecx + movl (%esi),%eax + + xorl %ebp,%ebp + + mull %eax + addl %eax,%ebx + adcl %edx,%ecx + movl (%esi),%edx + adcl $0,%ebp + movl %ebx,(%edi) + movl 4(%esi),%eax + + + xorl %ebx,%ebx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 8(%esi),%eax + adcl $0,%ebx + movl %ecx,4(%edi) + movl (%esi),%edx + + + xorl %ecx,%ecx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 4(%esi),%eax + adcl $0,%ecx + + mull %eax + addl %eax,%ebp + adcl %edx,%ebx + movl (%esi),%edx + adcl $0,%ecx + movl %ebp,8(%edi) + movl 12(%esi),%eax + + + xorl %ebp,%ebp + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 8(%esi),%eax + adcl $0,%ebp + movl 4(%esi),%edx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 12(%esi),%eax + adcl $0,%ebp + movl %ebx,12(%edi) + movl 4(%esi),%edx + + + xorl %ebx,%ebx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 8(%esi),%eax + adcl $0,%ebx + + mull %eax + addl %eax,%ecx + adcl %edx,%ebp + movl 8(%esi),%edx + adcl $0,%ebx + movl %ecx,16(%edi) + movl 12(%esi),%eax + + + xorl %ecx,%ecx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 12(%esi),%eax + adcl $0,%ecx + movl %ebp,20(%edi) + + + xorl %ebp,%ebp + + mull %eax + addl %eax,%ebx + adcl %edx,%ecx + adcl $0,%ebp + movl %ebx,24(%edi) + + movl %ecx,28(%edi) + popl %ebx + popl %ebp + popl %edi + popl %esi + ret +.size bn_sqr_comba4,.-.L_bn_sqr_comba4_begin +#endif diff --git a/third_party/boringssl/linux-x86/crypto/bn/x86-mont.S b/third_party/boringssl/linux-x86/crypto/bn/x86-mont.S new file mode 100644 index 00000000000..1569b2cff17 --- /dev/null +++ b/third_party/boringssl/linux-x86/crypto/bn/x86-mont.S @@ -0,0 +1,460 @@ +#if defined(__i386__) +.file "src/crypto/bn/asm/x86-mont.S" +.text +.globl bn_mul_mont +.hidden bn_mul_mont +.type bn_mul_mont,@function +.align 16 +bn_mul_mont: +.L_bn_mul_mont_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + xorl %eax,%eax + movl 40(%esp),%edi + cmpl $4,%edi + jl .L000just_leave + leal 20(%esp),%esi + leal 24(%esp),%edx + movl %esp,%ebp + addl $2,%edi + negl %edi + leal -32(%esp,%edi,4),%esp + negl %edi + movl %esp,%eax + subl %edx,%eax + andl $2047,%eax + subl %eax,%esp + xorl %esp,%edx + andl $2048,%edx + xorl $2048,%edx + subl %edx,%esp + andl $-64,%esp + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + movl 16(%esi),%esi + movl (%esi),%esi + movl %eax,4(%esp) + movl %ebx,8(%esp) + movl %ecx,12(%esp) + movl %edx,16(%esp) + movl %esi,20(%esp) + leal -3(%edi),%ebx + movl %ebp,24(%esp) + call .L001PIC_me_up +.L001PIC_me_up: + popl %eax + leal OPENSSL_ia32cap_P-.L001PIC_me_up(%eax),%eax + btl $26,(%eax) + jnc .L002non_sse2 + movl $-1,%eax + movd %eax,%mm7 + movl 8(%esp),%esi + movl 12(%esp),%edi + movl 16(%esp),%ebp + xorl %edx,%edx + xorl %ecx,%ecx + movd (%edi),%mm4 + movd (%esi),%mm5 + movd (%ebp),%mm3 + pmuludq %mm4,%mm5 + movq %mm5,%mm2 + movq %mm5,%mm0 + pand %mm7,%mm0 + pmuludq 20(%esp),%mm5 + pmuludq %mm5,%mm3 + paddq %mm0,%mm3 + movd 4(%ebp),%mm1 + movd 4(%esi),%mm0 + psrlq $32,%mm2 + psrlq $32,%mm3 + incl %ecx +.align 16 +.L0031st: + pmuludq %mm4,%mm0 + pmuludq %mm5,%mm1 + paddq %mm0,%mm2 + paddq %mm1,%mm3 + movq %mm2,%mm0 + pand %mm7,%mm0 + movd 4(%ebp,%ecx,4),%mm1 + paddq %mm0,%mm3 + movd 4(%esi,%ecx,4),%mm0 + psrlq $32,%mm2 + movd %mm3,28(%esp,%ecx,4) + psrlq $32,%mm3 + leal 1(%ecx),%ecx + cmpl %ebx,%ecx + jl .L0031st + pmuludq %mm4,%mm0 + pmuludq %mm5,%mm1 + paddq %mm0,%mm2 + paddq %mm1,%mm3 + movq %mm2,%mm0 + pand %mm7,%mm0 + paddq %mm0,%mm3 + movd %mm3,28(%esp,%ecx,4) + psrlq $32,%mm2 + psrlq $32,%mm3 + paddq %mm2,%mm3 + movq %mm3,32(%esp,%ebx,4) + incl %edx +.L004outer: + xorl %ecx,%ecx + movd (%edi,%edx,4),%mm4 + movd (%esi),%mm5 + movd 32(%esp),%mm6 + movd (%ebp),%mm3 + pmuludq %mm4,%mm5 + paddq %mm6,%mm5 + movq %mm5,%mm0 + movq %mm5,%mm2 + pand %mm7,%mm0 + pmuludq 20(%esp),%mm5 + pmuludq %mm5,%mm3 + paddq %mm0,%mm3 + movd 36(%esp),%mm6 + movd 4(%ebp),%mm1 + movd 4(%esi),%mm0 + psrlq $32,%mm2 + psrlq $32,%mm3 + paddq %mm6,%mm2 + incl %ecx + decl %ebx +.L005inner: + pmuludq %mm4,%mm0 + pmuludq %mm5,%mm1 + paddq %mm0,%mm2 + paddq %mm1,%mm3 + movq %mm2,%mm0 + movd 36(%esp,%ecx,4),%mm6 + pand %mm7,%mm0 + movd 4(%ebp,%ecx,4),%mm1 + paddq %mm0,%mm3 + movd 4(%esi,%ecx,4),%mm0 + psrlq $32,%mm2 + movd %mm3,28(%esp,%ecx,4) + psrlq $32,%mm3 + paddq %mm6,%mm2 + decl %ebx + leal 1(%ecx),%ecx + jnz .L005inner + movl %ecx,%ebx + pmuludq %mm4,%mm0 + pmuludq %mm5,%mm1 + paddq %mm0,%mm2 + paddq %mm1,%mm3 + movq %mm2,%mm0 + pand %mm7,%mm0 + paddq %mm0,%mm3 + movd %mm3,28(%esp,%ecx,4) + psrlq $32,%mm2 + psrlq $32,%mm3 + movd 36(%esp,%ebx,4),%mm6 + paddq %mm2,%mm3 + paddq %mm6,%mm3 + movq %mm3,32(%esp,%ebx,4) + leal 1(%edx),%edx + cmpl %ebx,%edx + jle .L004outer + emms + jmp .L006common_tail +.align 16 +.L002non_sse2: + movl 8(%esp),%esi + leal 1(%ebx),%ebp + movl 12(%esp),%edi + xorl %ecx,%ecx + movl %esi,%edx + andl $1,%ebp + subl %edi,%edx + leal 4(%edi,%ebx,4),%eax + orl %edx,%ebp + movl (%edi),%edi + jz .L007bn_sqr_mont + movl %eax,28(%esp) + movl (%esi),%eax + xorl %edx,%edx +.align 16 +.L008mull: + movl %edx,%ebp + mull %edi + addl %eax,%ebp + leal 1(%ecx),%ecx + adcl $0,%edx + movl (%esi,%ecx,4),%eax + cmpl %ebx,%ecx + movl %ebp,28(%esp,%ecx,4) + jl .L008mull + movl %edx,%ebp + mull %edi + movl 20(%esp),%edi + addl %ebp,%eax + movl 16(%esp),%esi + adcl $0,%edx + imull 32(%esp),%edi + movl %eax,32(%esp,%ebx,4) + xorl %ecx,%ecx + movl %edx,36(%esp,%ebx,4) + movl %ecx,40(%esp,%ebx,4) + movl (%esi),%eax + mull %edi + addl 32(%esp),%eax + movl 4(%esi),%eax + adcl $0,%edx + incl %ecx + jmp .L0092ndmadd +.align 16 +.L0101stmadd: + movl %edx,%ebp + mull %edi + addl 32(%esp,%ecx,4),%ebp + leal 1(%ecx),%ecx + adcl $0,%edx + addl %eax,%ebp + movl (%esi,%ecx,4),%eax + adcl $0,%edx + cmpl %ebx,%ecx + movl %ebp,28(%esp,%ecx,4) + jl .L0101stmadd + movl %edx,%ebp + mull %edi + addl 32(%esp,%ebx,4),%eax + movl 20(%esp),%edi + adcl $0,%edx + movl 16(%esp),%esi + addl %eax,%ebp + adcl $0,%edx + imull 32(%esp),%edi + xorl %ecx,%ecx + addl 36(%esp,%ebx,4),%edx + movl %ebp,32(%esp,%ebx,4) + adcl $0,%ecx + movl (%esi),%eax + movl %edx,36(%esp,%ebx,4) + movl %ecx,40(%esp,%ebx,4) + mull %edi + addl 32(%esp),%eax + movl 4(%esi),%eax + adcl $0,%edx + movl $1,%ecx +.align 16 +.L0092ndmadd: + movl %edx,%ebp + mull %edi + addl 32(%esp,%ecx,4),%ebp + leal 1(%ecx),%ecx + adcl $0,%edx + addl %eax,%ebp + movl (%esi,%ecx,4),%eax + adcl $0,%edx + cmpl %ebx,%ecx + movl %ebp,24(%esp,%ecx,4) + jl .L0092ndmadd + movl %edx,%ebp + mull %edi + addl 32(%esp,%ebx,4),%ebp + adcl $0,%edx + addl %eax,%ebp + adcl $0,%edx + movl %ebp,28(%esp,%ebx,4) + xorl %eax,%eax + movl 12(%esp),%ecx + addl 36(%esp,%ebx,4),%edx + adcl 40(%esp,%ebx,4),%eax + leal 4(%ecx),%ecx + movl %edx,32(%esp,%ebx,4) + cmpl 28(%esp),%ecx + movl %eax,36(%esp,%ebx,4) + je .L006common_tail + movl (%ecx),%edi + movl 8(%esp),%esi + movl %ecx,12(%esp) + xorl %ecx,%ecx + xorl %edx,%edx + movl (%esi),%eax + jmp .L0101stmadd +.align 16 +.L007bn_sqr_mont: + movl %ebx,(%esp) + movl %ecx,12(%esp) + movl %edi,%eax + mull %edi + movl %eax,32(%esp) + movl %edx,%ebx + shrl $1,%edx + andl $1,%ebx + incl %ecx +.align 16 +.L011sqr: + movl (%esi,%ecx,4),%eax + movl %edx,%ebp + mull %edi + addl %ebp,%eax + leal 1(%ecx),%ecx + adcl $0,%edx + leal (%ebx,%eax,2),%ebp + shrl $31,%eax + cmpl (%esp),%ecx + movl %eax,%ebx + movl %ebp,28(%esp,%ecx,4) + jl .L011sqr + movl (%esi,%ecx,4),%eax + movl %edx,%ebp + mull %edi + addl %ebp,%eax + movl 20(%esp),%edi + adcl $0,%edx + movl 16(%esp),%esi + leal (%ebx,%eax,2),%ebp + imull 32(%esp),%edi + shrl $31,%eax + movl %ebp,32(%esp,%ecx,4) + leal (%eax,%edx,2),%ebp + movl (%esi),%eax + shrl $31,%edx + movl %ebp,36(%esp,%ecx,4) + movl %edx,40(%esp,%ecx,4) + mull %edi + addl 32(%esp),%eax + movl %ecx,%ebx + adcl $0,%edx + movl 4(%esi),%eax + movl $1,%ecx +.align 16 +.L0123rdmadd: + movl %edx,%ebp + mull %edi + addl 32(%esp,%ecx,4),%ebp + adcl $0,%edx + addl %eax,%ebp + movl 4(%esi,%ecx,4),%eax + adcl $0,%edx + movl %ebp,28(%esp,%ecx,4) + movl %edx,%ebp + mull %edi + addl 36(%esp,%ecx,4),%ebp + leal 2(%ecx),%ecx + adcl $0,%edx + addl %eax,%ebp + movl (%esi,%ecx,4),%eax + adcl $0,%edx + cmpl %ebx,%ecx + movl %ebp,24(%esp,%ecx,4) + jl .L0123rdmadd + movl %edx,%ebp + mull %edi + addl 32(%esp,%ebx,4),%ebp + adcl $0,%edx + addl %eax,%ebp + adcl $0,%edx + movl %ebp,28(%esp,%ebx,4) + movl 12(%esp),%ecx + xorl %eax,%eax + movl 8(%esp),%esi + addl 36(%esp,%ebx,4),%edx + adcl 40(%esp,%ebx,4),%eax + movl %edx,32(%esp,%ebx,4) + cmpl %ebx,%ecx + movl %eax,36(%esp,%ebx,4) + je .L006common_tail + movl 4(%esi,%ecx,4),%edi + leal 1(%ecx),%ecx + movl %edi,%eax + movl %ecx,12(%esp) + mull %edi + addl 32(%esp,%ecx,4),%eax + adcl $0,%edx + movl %eax,32(%esp,%ecx,4) + xorl %ebp,%ebp + cmpl %ebx,%ecx + leal 1(%ecx),%ecx + je .L013sqrlast + movl %edx,%ebx + shrl $1,%edx + andl $1,%ebx +.align 16 +.L014sqradd: + movl (%esi,%ecx,4),%eax + movl %edx,%ebp + mull %edi + addl %ebp,%eax + leal (%eax,%eax,1),%ebp + adcl $0,%edx + shrl $31,%eax + addl 32(%esp,%ecx,4),%ebp + leal 1(%ecx),%ecx + adcl $0,%eax + addl %ebx,%ebp + adcl $0,%eax + cmpl (%esp),%ecx + movl %ebp,28(%esp,%ecx,4) + movl %eax,%ebx + jle .L014sqradd + movl %edx,%ebp + addl %edx,%edx + shrl $31,%ebp + addl %ebx,%edx + adcl $0,%ebp +.L013sqrlast: + movl 20(%esp),%edi + movl 16(%esp),%esi + imull 32(%esp),%edi + addl 32(%esp,%ecx,4),%edx + movl (%esi),%eax + adcl $0,%ebp + movl %edx,32(%esp,%ecx,4) + movl %ebp,36(%esp,%ecx,4) + mull %edi + addl 32(%esp),%eax + leal -1(%ecx),%ebx + adcl $0,%edx + movl $1,%ecx + movl 4(%esi),%eax + jmp .L0123rdmadd +.align 16 +.L006common_tail: + movl 16(%esp),%ebp + movl 4(%esp),%edi + leal 32(%esp),%esi + movl (%esi),%eax + movl %ebx,%ecx + xorl %edx,%edx +.align 16 +.L015sub: + sbbl (%ebp,%edx,4),%eax + movl %eax,(%edi,%edx,4) + decl %ecx + movl 4(%esi,%edx,4),%eax + leal 1(%edx),%edx + jge .L015sub + sbbl $0,%eax +.align 16 +.L016copy: + movl (%esi,%ebx,4),%edx + movl (%edi,%ebx,4),%ebp + xorl %ebp,%edx + andl %eax,%edx + xorl %ebp,%edx + movl %ecx,(%esi,%ebx,4) + movl %edx,(%edi,%ebx,4) + decl %ebx + jge .L016copy + movl 24(%esp),%esp + movl $1,%eax +.L000just_leave: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size bn_mul_mont,.-.L_bn_mul_mont_begin +.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 +.byte 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56 +.byte 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121 +.byte 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46 +.byte 111,114,103,62,0 +#endif diff --git a/third_party/boringssl/linux-x86/crypto/cpu-x86-asm.S b/third_party/boringssl/linux-x86/crypto/cpu-x86-asm.S new file mode 100644 index 00000000000..24a8dd4d424 --- /dev/null +++ b/third_party/boringssl/linux-x86/crypto/cpu-x86-asm.S @@ -0,0 +1,322 @@ +#if defined(__i386__) +.file "crypto/cpu-x86-asm.S" +.text +.globl OPENSSL_ia32_cpuid +.hidden OPENSSL_ia32_cpuid +.type OPENSSL_ia32_cpuid,@function +.align 16 +OPENSSL_ia32_cpuid: +.L_OPENSSL_ia32_cpuid_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + xorl %edx,%edx + pushfl + popl %eax + movl %eax,%ecx + xorl $2097152,%eax + pushl %eax + popfl + pushfl + popl %eax + xorl %eax,%ecx + xorl %eax,%eax + btl $21,%ecx + jnc .L000nocpuid + movl 20(%esp),%esi + movl %eax,8(%esi) + .byte 0x0f,0xa2 + movl %eax,%edi + xorl %eax,%eax + cmpl $1970169159,%ebx + setne %al + movl %eax,%ebp + cmpl $1231384169,%edx + setne %al + orl %eax,%ebp + cmpl $1818588270,%ecx + setne %al + orl %eax,%ebp + jz .L001intel + cmpl $1752462657,%ebx + setne %al + movl %eax,%esi + cmpl $1769238117,%edx + setne %al + orl %eax,%esi + cmpl $1145913699,%ecx + setne %al + orl %eax,%esi + jnz .L001intel + movl $2147483648,%eax + .byte 0x0f,0xa2 + cmpl $2147483649,%eax + jb .L001intel + movl %eax,%esi + movl $2147483649,%eax + .byte 0x0f,0xa2 + orl %ecx,%ebp + andl $2049,%ebp + cmpl $2147483656,%esi + jb .L001intel + movl $2147483656,%eax + .byte 0x0f,0xa2 + movzbl %cl,%esi + incl %esi + movl $1,%eax + xorl %ecx,%ecx + .byte 0x0f,0xa2 + btl $28,%edx + jnc .L002generic + shrl $16,%ebx + andl $255,%ebx + cmpl %esi,%ebx + ja .L002generic + andl $4026531839,%edx + jmp .L002generic +.L001intel: + cmpl $7,%edi + jb .L003cacheinfo + movl 20(%esp),%esi + movl $7,%eax + xorl %ecx,%ecx + .byte 0x0f,0xa2 + movl %ebx,8(%esi) +.L003cacheinfo: + cmpl $4,%edi + movl $-1,%edi + jb .L004nocacheinfo + movl $4,%eax + movl $0,%ecx + .byte 0x0f,0xa2 + movl %eax,%edi + shrl $14,%edi + andl $4095,%edi +.L004nocacheinfo: + movl $1,%eax + xorl %ecx,%ecx + .byte 0x0f,0xa2 + andl $3220176895,%edx + cmpl $0,%ebp + jne .L005notintel + orl $1073741824,%edx +.L005notintel: + btl $28,%edx + jnc .L002generic + andl $4026531839,%edx + cmpl $0,%edi + je .L002generic + orl $268435456,%edx + shrl $16,%ebx + cmpb $1,%bl + ja .L002generic + andl $4026531839,%edx +.L002generic: + andl $2048,%ebp + andl $4294965247,%ecx + movl %edx,%esi + orl %ecx,%ebp + btl $27,%ecx + jnc .L006clear_avx + xorl %ecx,%ecx +.byte 15,1,208 + andl $6,%eax + cmpl $6,%eax + je .L007done + cmpl $2,%eax + je .L006clear_avx +.L008clear_xmm: + andl $4261412861,%ebp + andl $4278190079,%esi +.L006clear_avx: + andl $4026525695,%ebp + movl 20(%esp),%edi + andl $4294967263,8(%edi) +.L007done: + movl %esi,%eax + movl %ebp,%edx +.L000nocpuid: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size OPENSSL_ia32_cpuid,.-.L_OPENSSL_ia32_cpuid_begin +.globl OPENSSL_rdtsc +.hidden OPENSSL_rdtsc +.type OPENSSL_rdtsc,@function +.align 16 +OPENSSL_rdtsc: +.L_OPENSSL_rdtsc_begin: + xorl %eax,%eax + xorl %edx,%edx + call .L009PIC_me_up +.L009PIC_me_up: + popl %ecx + leal OPENSSL_ia32cap_P-.L009PIC_me_up(%ecx),%ecx + btl $4,(%ecx) + jnc .L010notsc + .byte 0x0f,0x31 +.L010notsc: + ret +.size OPENSSL_rdtsc,.-.L_OPENSSL_rdtsc_begin +.globl OPENSSL_instrument_halt +.hidden OPENSSL_instrument_halt +.type OPENSSL_instrument_halt,@function +.align 16 +OPENSSL_instrument_halt: +.L_OPENSSL_instrument_halt_begin: + call .L011PIC_me_up +.L011PIC_me_up: + popl %ecx + leal OPENSSL_ia32cap_P-.L011PIC_me_up(%ecx),%ecx + btl $4,(%ecx) + jnc .L012nohalt +.long 2421723150 + andl $3,%eax + jnz .L012nohalt + pushfl + popl %eax + btl $9,%eax + jnc .L012nohalt + .byte 0x0f,0x31 + pushl %edx + pushl %eax + hlt + .byte 0x0f,0x31 + subl (%esp),%eax + sbbl 4(%esp),%edx + addl $8,%esp + ret +.L012nohalt: + xorl %eax,%eax + xorl %edx,%edx + ret +.size OPENSSL_instrument_halt,.-.L_OPENSSL_instrument_halt_begin +.globl OPENSSL_far_spin +.hidden OPENSSL_far_spin +.type OPENSSL_far_spin,@function +.align 16 +OPENSSL_far_spin: +.L_OPENSSL_far_spin_begin: + pushfl + popl %eax + btl $9,%eax + jnc .L013nospin + movl 4(%esp),%eax + movl 8(%esp),%ecx +.long 2430111262 + xorl %eax,%eax + movl (%ecx),%edx + jmp .L014spin +.align 16 +.L014spin: + incl %eax + cmpl (%ecx),%edx + je .L014spin +.long 529567888 + ret +.L013nospin: + xorl %eax,%eax + xorl %edx,%edx + ret +.size OPENSSL_far_spin,.-.L_OPENSSL_far_spin_begin +.globl OPENSSL_wipe_cpu +.hidden OPENSSL_wipe_cpu +.type OPENSSL_wipe_cpu,@function +.align 16 +OPENSSL_wipe_cpu: +.L_OPENSSL_wipe_cpu_begin: + xorl %eax,%eax + xorl %edx,%edx + call .L015PIC_me_up +.L015PIC_me_up: + popl %ecx + leal OPENSSL_ia32cap_P-.L015PIC_me_up(%ecx),%ecx + movl (%ecx),%ecx + btl $1,(%ecx) + jnc .L016no_x87 + andl $83886080,%ecx + cmpl $83886080,%ecx + jne .L017no_sse2 + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 +.L017no_sse2: +.long 4007259865,4007259865,4007259865,4007259865,2430851995 +.L016no_x87: + leal 4(%esp),%eax + ret +.size OPENSSL_wipe_cpu,.-.L_OPENSSL_wipe_cpu_begin +.globl OPENSSL_atomic_add +.hidden OPENSSL_atomic_add +.type OPENSSL_atomic_add,@function +.align 16 +OPENSSL_atomic_add: +.L_OPENSSL_atomic_add_begin: + movl 4(%esp),%edx + movl 8(%esp),%ecx + pushl %ebx + nop + movl (%edx),%eax +.L018spin: + leal (%eax,%ecx,1),%ebx + nop +.long 447811568 + jne .L018spin + movl %ebx,%eax + popl %ebx + ret +.size OPENSSL_atomic_add,.-.L_OPENSSL_atomic_add_begin +.globl OPENSSL_indirect_call +.hidden OPENSSL_indirect_call +.type OPENSSL_indirect_call,@function +.align 16 +OPENSSL_indirect_call: +.L_OPENSSL_indirect_call_begin: + pushl %ebp + movl %esp,%ebp + subl $28,%esp + movl 12(%ebp),%ecx + movl %ecx,(%esp) + movl 16(%ebp),%edx + movl %edx,4(%esp) + movl 20(%ebp),%eax + movl %eax,8(%esp) + movl 24(%ebp),%eax + movl %eax,12(%esp) + movl 28(%ebp),%eax + movl %eax,16(%esp) + movl 32(%ebp),%eax + movl %eax,20(%esp) + movl 36(%ebp),%eax + movl %eax,24(%esp) + call *8(%ebp) + movl %ebp,%esp + popl %ebp + ret +.size OPENSSL_indirect_call,.-.L_OPENSSL_indirect_call_begin +.globl OPENSSL_ia32_rdrand +.hidden OPENSSL_ia32_rdrand +.type OPENSSL_ia32_rdrand,@function +.align 16 +OPENSSL_ia32_rdrand: +.L_OPENSSL_ia32_rdrand_begin: + movl $8,%ecx +.L019loop: +.byte 15,199,240 + jc .L020break + loop .L019loop +.L020break: + cmpl $0,%eax + cmovel %ecx,%eax + ret +.size OPENSSL_ia32_rdrand,.-.L_OPENSSL_ia32_rdrand_begin +.hidden OPENSSL_ia32cap_P +#endif diff --git a/third_party/boringssl/linux-x86/crypto/md5/md5-586.S b/third_party/boringssl/linux-x86/crypto/md5/md5-586.S new file mode 100644 index 00000000000..734b941a82b --- /dev/null +++ b/third_party/boringssl/linux-x86/crypto/md5/md5-586.S @@ -0,0 +1,682 @@ +#if defined(__i386__) +.file "src/crypto/md5/asm/md5-586.S" +.text +.globl md5_block_asm_data_order +.hidden md5_block_asm_data_order +.type md5_block_asm_data_order,@function +.align 16 +md5_block_asm_data_order: +.L_md5_block_asm_data_order_begin: + pushl %esi + pushl %edi + movl 12(%esp),%edi + movl 16(%esp),%esi + movl 20(%esp),%ecx + pushl %ebp + shll $6,%ecx + pushl %ebx + addl %esi,%ecx + subl $64,%ecx + movl (%edi),%eax + pushl %ecx + movl 4(%edi),%ebx + movl 8(%edi),%ecx + movl 12(%edi),%edx +.L000start: + + + movl %ecx,%edi + movl (%esi),%ebp + + xorl %edx,%edi + andl %ebx,%edi + leal 3614090360(%eax,%ebp,1),%eax + xorl %edx,%edi + addl %edi,%eax + movl %ebx,%edi + roll $7,%eax + movl 4(%esi),%ebp + addl %ebx,%eax + + xorl %ecx,%edi + andl %eax,%edi + leal 3905402710(%edx,%ebp,1),%edx + xorl %ecx,%edi + addl %edi,%edx + movl %eax,%edi + roll $12,%edx + movl 8(%esi),%ebp + addl %eax,%edx + + xorl %ebx,%edi + andl %edx,%edi + leal 606105819(%ecx,%ebp,1),%ecx + xorl %ebx,%edi + addl %edi,%ecx + movl %edx,%edi + roll $17,%ecx + movl 12(%esi),%ebp + addl %edx,%ecx + + xorl %eax,%edi + andl %ecx,%edi + leal 3250441966(%ebx,%ebp,1),%ebx + xorl %eax,%edi + addl %edi,%ebx + movl %ecx,%edi + roll $22,%ebx + movl 16(%esi),%ebp + addl %ecx,%ebx + + xorl %edx,%edi + andl %ebx,%edi + leal 4118548399(%eax,%ebp,1),%eax + xorl %edx,%edi + addl %edi,%eax + movl %ebx,%edi + roll $7,%eax + movl 20(%esi),%ebp + addl %ebx,%eax + + xorl %ecx,%edi + andl %eax,%edi + leal 1200080426(%edx,%ebp,1),%edx + xorl %ecx,%edi + addl %edi,%edx + movl %eax,%edi + roll $12,%edx + movl 24(%esi),%ebp + addl %eax,%edx + + xorl %ebx,%edi + andl %edx,%edi + leal 2821735955(%ecx,%ebp,1),%ecx + xorl %ebx,%edi + addl %edi,%ecx + movl %edx,%edi + roll $17,%ecx + movl 28(%esi),%ebp + addl %edx,%ecx + + xorl %eax,%edi + andl %ecx,%edi + leal 4249261313(%ebx,%ebp,1),%ebx + xorl %eax,%edi + addl %edi,%ebx + movl %ecx,%edi + roll $22,%ebx + movl 32(%esi),%ebp + addl %ecx,%ebx + + xorl %edx,%edi + andl %ebx,%edi + leal 1770035416(%eax,%ebp,1),%eax + xorl %edx,%edi + addl %edi,%eax + movl %ebx,%edi + roll $7,%eax + movl 36(%esi),%ebp + addl %ebx,%eax + + xorl %ecx,%edi + andl %eax,%edi + leal 2336552879(%edx,%ebp,1),%edx + xorl %ecx,%edi + addl %edi,%edx + movl %eax,%edi + roll $12,%edx + movl 40(%esi),%ebp + addl %eax,%edx + + xorl %ebx,%edi + andl %edx,%edi + leal 4294925233(%ecx,%ebp,1),%ecx + xorl %ebx,%edi + addl %edi,%ecx + movl %edx,%edi + roll $17,%ecx + movl 44(%esi),%ebp + addl %edx,%ecx + + xorl %eax,%edi + andl %ecx,%edi + leal 2304563134(%ebx,%ebp,1),%ebx + xorl %eax,%edi + addl %edi,%ebx + movl %ecx,%edi + roll $22,%ebx + movl 48(%esi),%ebp + addl %ecx,%ebx + + xorl %edx,%edi + andl %ebx,%edi + leal 1804603682(%eax,%ebp,1),%eax + xorl %edx,%edi + addl %edi,%eax + movl %ebx,%edi + roll $7,%eax + movl 52(%esi),%ebp + addl %ebx,%eax + + xorl %ecx,%edi + andl %eax,%edi + leal 4254626195(%edx,%ebp,1),%edx + xorl %ecx,%edi + addl %edi,%edx + movl %eax,%edi + roll $12,%edx + movl 56(%esi),%ebp + addl %eax,%edx + + xorl %ebx,%edi + andl %edx,%edi + leal 2792965006(%ecx,%ebp,1),%ecx + xorl %ebx,%edi + addl %edi,%ecx + movl %edx,%edi + roll $17,%ecx + movl 60(%esi),%ebp + addl %edx,%ecx + + xorl %eax,%edi + andl %ecx,%edi + leal 1236535329(%ebx,%ebp,1),%ebx + xorl %eax,%edi + addl %edi,%ebx + movl %ecx,%edi + roll $22,%ebx + movl 4(%esi),%ebp + addl %ecx,%ebx + + + + leal 4129170786(%eax,%ebp,1),%eax + xorl %ebx,%edi + andl %edx,%edi + movl 24(%esi),%ebp + xorl %ecx,%edi + addl %edi,%eax + movl %ebx,%edi + roll $5,%eax + addl %ebx,%eax + + leal 3225465664(%edx,%ebp,1),%edx + xorl %eax,%edi + andl %ecx,%edi + movl 44(%esi),%ebp + xorl %ebx,%edi + addl %edi,%edx + movl %eax,%edi + roll $9,%edx + addl %eax,%edx + + leal 643717713(%ecx,%ebp,1),%ecx + xorl %edx,%edi + andl %ebx,%edi + movl (%esi),%ebp + xorl %eax,%edi + addl %edi,%ecx + movl %edx,%edi + roll $14,%ecx + addl %edx,%ecx + + leal 3921069994(%ebx,%ebp,1),%ebx + xorl %ecx,%edi + andl %eax,%edi + movl 20(%esi),%ebp + xorl %edx,%edi + addl %edi,%ebx + movl %ecx,%edi + roll $20,%ebx + addl %ecx,%ebx + + leal 3593408605(%eax,%ebp,1),%eax + xorl %ebx,%edi + andl %edx,%edi + movl 40(%esi),%ebp + xorl %ecx,%edi + addl %edi,%eax + movl %ebx,%edi + roll $5,%eax + addl %ebx,%eax + + leal 38016083(%edx,%ebp,1),%edx + xorl %eax,%edi + andl %ecx,%edi + movl 60(%esi),%ebp + xorl %ebx,%edi + addl %edi,%edx + movl %eax,%edi + roll $9,%edx + addl %eax,%edx + + leal 3634488961(%ecx,%ebp,1),%ecx + xorl %edx,%edi + andl %ebx,%edi + movl 16(%esi),%ebp + xorl %eax,%edi + addl %edi,%ecx + movl %edx,%edi + roll $14,%ecx + addl %edx,%ecx + + leal 3889429448(%ebx,%ebp,1),%ebx + xorl %ecx,%edi + andl %eax,%edi + movl 36(%esi),%ebp + xorl %edx,%edi + addl %edi,%ebx + movl %ecx,%edi + roll $20,%ebx + addl %ecx,%ebx + + leal 568446438(%eax,%ebp,1),%eax + xorl %ebx,%edi + andl %edx,%edi + movl 56(%esi),%ebp + xorl %ecx,%edi + addl %edi,%eax + movl %ebx,%edi + roll $5,%eax + addl %ebx,%eax + + leal 3275163606(%edx,%ebp,1),%edx + xorl %eax,%edi + andl %ecx,%edi + movl 12(%esi),%ebp + xorl %ebx,%edi + addl %edi,%edx + movl %eax,%edi + roll $9,%edx + addl %eax,%edx + + leal 4107603335(%ecx,%ebp,1),%ecx + xorl %edx,%edi + andl %ebx,%edi + movl 32(%esi),%ebp + xorl %eax,%edi + addl %edi,%ecx + movl %edx,%edi + roll $14,%ecx + addl %edx,%ecx + + leal 1163531501(%ebx,%ebp,1),%ebx + xorl %ecx,%edi + andl %eax,%edi + movl 52(%esi),%ebp + xorl %edx,%edi + addl %edi,%ebx + movl %ecx,%edi + roll $20,%ebx + addl %ecx,%ebx + + leal 2850285829(%eax,%ebp,1),%eax + xorl %ebx,%edi + andl %edx,%edi + movl 8(%esi),%ebp + xorl %ecx,%edi + addl %edi,%eax + movl %ebx,%edi + roll $5,%eax + addl %ebx,%eax + + leal 4243563512(%edx,%ebp,1),%edx + xorl %eax,%edi + andl %ecx,%edi + movl 28(%esi),%ebp + xorl %ebx,%edi + addl %edi,%edx + movl %eax,%edi + roll $9,%edx + addl %eax,%edx + + leal 1735328473(%ecx,%ebp,1),%ecx + xorl %edx,%edi + andl %ebx,%edi + movl 48(%esi),%ebp + xorl %eax,%edi + addl %edi,%ecx + movl %edx,%edi + roll $14,%ecx + addl %edx,%ecx + + leal 2368359562(%ebx,%ebp,1),%ebx + xorl %ecx,%edi + andl %eax,%edi + movl 20(%esi),%ebp + xorl %edx,%edi + addl %edi,%ebx + movl %ecx,%edi + roll $20,%ebx + addl %ecx,%ebx + + + + xorl %edx,%edi + xorl %ebx,%edi + leal 4294588738(%eax,%ebp,1),%eax + addl %edi,%eax + roll $4,%eax + movl 32(%esi),%ebp + movl %ebx,%edi + + leal 2272392833(%edx,%ebp,1),%edx + addl %ebx,%eax + xorl %ecx,%edi + xorl %eax,%edi + movl 44(%esi),%ebp + addl %edi,%edx + movl %eax,%edi + roll $11,%edx + addl %eax,%edx + + xorl %ebx,%edi + xorl %edx,%edi + leal 1839030562(%ecx,%ebp,1),%ecx + addl %edi,%ecx + roll $16,%ecx + movl 56(%esi),%ebp + movl %edx,%edi + + leal 4259657740(%ebx,%ebp,1),%ebx + addl %edx,%ecx + xorl %eax,%edi + xorl %ecx,%edi + movl 4(%esi),%ebp + addl %edi,%ebx + movl %ecx,%edi + roll $23,%ebx + addl %ecx,%ebx + + xorl %edx,%edi + xorl %ebx,%edi + leal 2763975236(%eax,%ebp,1),%eax + addl %edi,%eax + roll $4,%eax + movl 16(%esi),%ebp + movl %ebx,%edi + + leal 1272893353(%edx,%ebp,1),%edx + addl %ebx,%eax + xorl %ecx,%edi + xorl %eax,%edi + movl 28(%esi),%ebp + addl %edi,%edx + movl %eax,%edi + roll $11,%edx + addl %eax,%edx + + xorl %ebx,%edi + xorl %edx,%edi + leal 4139469664(%ecx,%ebp,1),%ecx + addl %edi,%ecx + roll $16,%ecx + movl 40(%esi),%ebp + movl %edx,%edi + + leal 3200236656(%ebx,%ebp,1),%ebx + addl %edx,%ecx + xorl %eax,%edi + xorl %ecx,%edi + movl 52(%esi),%ebp + addl %edi,%ebx + movl %ecx,%edi + roll $23,%ebx + addl %ecx,%ebx + + xorl %edx,%edi + xorl %ebx,%edi + leal 681279174(%eax,%ebp,1),%eax + addl %edi,%eax + roll $4,%eax + movl (%esi),%ebp + movl %ebx,%edi + + leal 3936430074(%edx,%ebp,1),%edx + addl %ebx,%eax + xorl %ecx,%edi + xorl %eax,%edi + movl 12(%esi),%ebp + addl %edi,%edx + movl %eax,%edi + roll $11,%edx + addl %eax,%edx + + xorl %ebx,%edi + xorl %edx,%edi + leal 3572445317(%ecx,%ebp,1),%ecx + addl %edi,%ecx + roll $16,%ecx + movl 24(%esi),%ebp + movl %edx,%edi + + leal 76029189(%ebx,%ebp,1),%ebx + addl %edx,%ecx + xorl %eax,%edi + xorl %ecx,%edi + movl 36(%esi),%ebp + addl %edi,%ebx + movl %ecx,%edi + roll $23,%ebx + addl %ecx,%ebx + + xorl %edx,%edi + xorl %ebx,%edi + leal 3654602809(%eax,%ebp,1),%eax + addl %edi,%eax + roll $4,%eax + movl 48(%esi),%ebp + movl %ebx,%edi + + leal 3873151461(%edx,%ebp,1),%edx + addl %ebx,%eax + xorl %ecx,%edi + xorl %eax,%edi + movl 60(%esi),%ebp + addl %edi,%edx + movl %eax,%edi + roll $11,%edx + addl %eax,%edx + + xorl %ebx,%edi + xorl %edx,%edi + leal 530742520(%ecx,%ebp,1),%ecx + addl %edi,%ecx + roll $16,%ecx + movl 8(%esi),%ebp + movl %edx,%edi + + leal 3299628645(%ebx,%ebp,1),%ebx + addl %edx,%ecx + xorl %eax,%edi + xorl %ecx,%edi + movl (%esi),%ebp + addl %edi,%ebx + movl $-1,%edi + roll $23,%ebx + addl %ecx,%ebx + + + + xorl %edx,%edi + orl %ebx,%edi + leal 4096336452(%eax,%ebp,1),%eax + xorl %ecx,%edi + movl 28(%esi),%ebp + addl %edi,%eax + movl $-1,%edi + roll $6,%eax + xorl %ecx,%edi + addl %ebx,%eax + + orl %eax,%edi + leal 1126891415(%edx,%ebp,1),%edx + xorl %ebx,%edi + movl 56(%esi),%ebp + addl %edi,%edx + movl $-1,%edi + roll $10,%edx + xorl %ebx,%edi + addl %eax,%edx + + orl %edx,%edi + leal 2878612391(%ecx,%ebp,1),%ecx + xorl %eax,%edi + movl 20(%esi),%ebp + addl %edi,%ecx + movl $-1,%edi + roll $15,%ecx + xorl %eax,%edi + addl %edx,%ecx + + orl %ecx,%edi + leal 4237533241(%ebx,%ebp,1),%ebx + xorl %edx,%edi + movl 48(%esi),%ebp + addl %edi,%ebx + movl $-1,%edi + roll $21,%ebx + xorl %edx,%edi + addl %ecx,%ebx + + orl %ebx,%edi + leal 1700485571(%eax,%ebp,1),%eax + xorl %ecx,%edi + movl 12(%esi),%ebp + addl %edi,%eax + movl $-1,%edi + roll $6,%eax + xorl %ecx,%edi + addl %ebx,%eax + + orl %eax,%edi + leal 2399980690(%edx,%ebp,1),%edx + xorl %ebx,%edi + movl 40(%esi),%ebp + addl %edi,%edx + movl $-1,%edi + roll $10,%edx + xorl %ebx,%edi + addl %eax,%edx + + orl %edx,%edi + leal 4293915773(%ecx,%ebp,1),%ecx + xorl %eax,%edi + movl 4(%esi),%ebp + addl %edi,%ecx + movl $-1,%edi + roll $15,%ecx + xorl %eax,%edi + addl %edx,%ecx + + orl %ecx,%edi + leal 2240044497(%ebx,%ebp,1),%ebx + xorl %edx,%edi + movl 32(%esi),%ebp + addl %edi,%ebx + movl $-1,%edi + roll $21,%ebx + xorl %edx,%edi + addl %ecx,%ebx + + orl %ebx,%edi + leal 1873313359(%eax,%ebp,1),%eax + xorl %ecx,%edi + movl 60(%esi),%ebp + addl %edi,%eax + movl $-1,%edi + roll $6,%eax + xorl %ecx,%edi + addl %ebx,%eax + + orl %eax,%edi + leal 4264355552(%edx,%ebp,1),%edx + xorl %ebx,%edi + movl 24(%esi),%ebp + addl %edi,%edx + movl $-1,%edi + roll $10,%edx + xorl %ebx,%edi + addl %eax,%edx + + orl %edx,%edi + leal 2734768916(%ecx,%ebp,1),%ecx + xorl %eax,%edi + movl 52(%esi),%ebp + addl %edi,%ecx + movl $-1,%edi + roll $15,%ecx + xorl %eax,%edi + addl %edx,%ecx + + orl %ecx,%edi + leal 1309151649(%ebx,%ebp,1),%ebx + xorl %edx,%edi + movl 16(%esi),%ebp + addl %edi,%ebx + movl $-1,%edi + roll $21,%ebx + xorl %edx,%edi + addl %ecx,%ebx + + orl %ebx,%edi + leal 4149444226(%eax,%ebp,1),%eax + xorl %ecx,%edi + movl 44(%esi),%ebp + addl %edi,%eax + movl $-1,%edi + roll $6,%eax + xorl %ecx,%edi + addl %ebx,%eax + + orl %eax,%edi + leal 3174756917(%edx,%ebp,1),%edx + xorl %ebx,%edi + movl 8(%esi),%ebp + addl %edi,%edx + movl $-1,%edi + roll $10,%edx + xorl %ebx,%edi + addl %eax,%edx + + orl %edx,%edi + leal 718787259(%ecx,%ebp,1),%ecx + xorl %eax,%edi + movl 36(%esi),%ebp + addl %edi,%ecx + movl $-1,%edi + roll $15,%ecx + xorl %eax,%edi + addl %edx,%ecx + + orl %ecx,%edi + leal 3951481745(%ebx,%ebp,1),%ebx + xorl %edx,%edi + movl 24(%esp),%ebp + addl %edi,%ebx + addl $64,%esi + roll $21,%ebx + movl (%ebp),%edi + addl %ecx,%ebx + addl %edi,%eax + movl 4(%ebp),%edi + addl %edi,%ebx + movl 8(%ebp),%edi + addl %edi,%ecx + movl 12(%ebp),%edi + addl %edi,%edx + movl %eax,(%ebp) + movl %ebx,4(%ebp) + movl (%esp),%edi + movl %ecx,8(%ebp) + movl %edx,12(%ebp) + cmpl %esi,%edi + jae .L000start + popl %eax + popl %ebx + popl %ebp + popl %edi + popl %esi + ret +.size md5_block_asm_data_order,.-.L_md5_block_asm_data_order_begin +#endif diff --git a/third_party/boringssl/linux-x86/crypto/modes/ghash-x86.S b/third_party/boringssl/linux-x86/crypto/modes/ghash-x86.S new file mode 100644 index 00000000000..28720889a18 --- /dev/null +++ b/third_party/boringssl/linux-x86/crypto/modes/ghash-x86.S @@ -0,0 +1,1274 @@ +#if defined(__i386__) +.file "ghash-x86.S" +.text +.globl gcm_gmult_4bit_x86 +.hidden gcm_gmult_4bit_x86 +.type gcm_gmult_4bit_x86,@function +.align 16 +gcm_gmult_4bit_x86: +.L_gcm_gmult_4bit_x86_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + subl $84,%esp + movl 104(%esp),%edi + movl 108(%esp),%esi + movl (%edi),%ebp + movl 4(%edi),%edx + movl 8(%edi),%ecx + movl 12(%edi),%ebx + movl $0,16(%esp) + movl $471859200,20(%esp) + movl $943718400,24(%esp) + movl $610271232,28(%esp) + movl $1887436800,32(%esp) + movl $1822425088,36(%esp) + movl $1220542464,40(%esp) + movl $1423966208,44(%esp) + movl $3774873600,48(%esp) + movl $4246732800,52(%esp) + movl $3644850176,56(%esp) + movl $3311403008,60(%esp) + movl $2441084928,64(%esp) + movl $2376073216,68(%esp) + movl $2847932416,72(%esp) + movl $3051356160,76(%esp) + movl %ebp,(%esp) + movl %edx,4(%esp) + movl %ecx,8(%esp) + movl %ebx,12(%esp) + shrl $20,%ebx + andl $240,%ebx + movl 4(%esi,%ebx,1),%ebp + movl (%esi,%ebx,1),%edx + movl 12(%esi,%ebx,1),%ecx + movl 8(%esi,%ebx,1),%ebx + xorl %eax,%eax + movl $15,%edi + jmp .L000x86_loop +.align 16 +.L000x86_loop: + movb %bl,%al + shrdl $4,%ecx,%ebx + andb $15,%al + shrdl $4,%edx,%ecx + shrdl $4,%ebp,%edx + shrl $4,%ebp + xorl 16(%esp,%eax,4),%ebp + movb (%esp,%edi,1),%al + andb $240,%al + xorl 8(%esi,%eax,1),%ebx + xorl 12(%esi,%eax,1),%ecx + xorl (%esi,%eax,1),%edx + xorl 4(%esi,%eax,1),%ebp + decl %edi + js .L001x86_break + movb %bl,%al + shrdl $4,%ecx,%ebx + andb $15,%al + shrdl $4,%edx,%ecx + shrdl $4,%ebp,%edx + shrl $4,%ebp + xorl 16(%esp,%eax,4),%ebp + movb (%esp,%edi,1),%al + shlb $4,%al + xorl 8(%esi,%eax,1),%ebx + xorl 12(%esi,%eax,1),%ecx + xorl (%esi,%eax,1),%edx + xorl 4(%esi,%eax,1),%ebp + jmp .L000x86_loop +.align 16 +.L001x86_break: + bswap %ebx + bswap %ecx + bswap %edx + bswap %ebp + movl 104(%esp),%edi + movl %ebx,12(%edi) + movl %ecx,8(%edi) + movl %edx,4(%edi) + movl %ebp,(%edi) + addl $84,%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size gcm_gmult_4bit_x86,.-.L_gcm_gmult_4bit_x86_begin +.globl gcm_ghash_4bit_x86 +.hidden gcm_ghash_4bit_x86 +.type gcm_ghash_4bit_x86,@function +.align 16 +gcm_ghash_4bit_x86: +.L_gcm_ghash_4bit_x86_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + subl $84,%esp + movl 104(%esp),%ebx + movl 108(%esp),%esi + movl 112(%esp),%edi + movl 116(%esp),%ecx + addl %edi,%ecx + movl %ecx,116(%esp) + movl (%ebx),%ebp + movl 4(%ebx),%edx + movl 8(%ebx),%ecx + movl 12(%ebx),%ebx + movl $0,16(%esp) + movl $471859200,20(%esp) + movl $943718400,24(%esp) + movl $610271232,28(%esp) + movl $1887436800,32(%esp) + movl $1822425088,36(%esp) + movl $1220542464,40(%esp) + movl $1423966208,44(%esp) + movl $3774873600,48(%esp) + movl $4246732800,52(%esp) + movl $3644850176,56(%esp) + movl $3311403008,60(%esp) + movl $2441084928,64(%esp) + movl $2376073216,68(%esp) + movl $2847932416,72(%esp) + movl $3051356160,76(%esp) +.align 16 +.L002x86_outer_loop: + xorl 12(%edi),%ebx + xorl 8(%edi),%ecx + xorl 4(%edi),%edx + xorl (%edi),%ebp + movl %ebx,12(%esp) + movl %ecx,8(%esp) + movl %edx,4(%esp) + movl %ebp,(%esp) + shrl $20,%ebx + andl $240,%ebx + movl 4(%esi,%ebx,1),%ebp + movl (%esi,%ebx,1),%edx + movl 12(%esi,%ebx,1),%ecx + movl 8(%esi,%ebx,1),%ebx + xorl %eax,%eax + movl $15,%edi + jmp .L003x86_loop +.align 16 +.L003x86_loop: + movb %bl,%al + shrdl $4,%ecx,%ebx + andb $15,%al + shrdl $4,%edx,%ecx + shrdl $4,%ebp,%edx + shrl $4,%ebp + xorl 16(%esp,%eax,4),%ebp + movb (%esp,%edi,1),%al + andb $240,%al + xorl 8(%esi,%eax,1),%ebx + xorl 12(%esi,%eax,1),%ecx + xorl (%esi,%eax,1),%edx + xorl 4(%esi,%eax,1),%ebp + decl %edi + js .L004x86_break + movb %bl,%al + shrdl $4,%ecx,%ebx + andb $15,%al + shrdl $4,%edx,%ecx + shrdl $4,%ebp,%edx + shrl $4,%ebp + xorl 16(%esp,%eax,4),%ebp + movb (%esp,%edi,1),%al + shlb $4,%al + xorl 8(%esi,%eax,1),%ebx + xorl 12(%esi,%eax,1),%ecx + xorl (%esi,%eax,1),%edx + xorl 4(%esi,%eax,1),%ebp + jmp .L003x86_loop +.align 16 +.L004x86_break: + bswap %ebx + bswap %ecx + bswap %edx + bswap %ebp + movl 112(%esp),%edi + leal 16(%edi),%edi + cmpl 116(%esp),%edi + movl %edi,112(%esp) + jb .L002x86_outer_loop + movl 104(%esp),%edi + movl %ebx,12(%edi) + movl %ecx,8(%edi) + movl %edx,4(%edi) + movl %ebp,(%edi) + addl $84,%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size gcm_ghash_4bit_x86,.-.L_gcm_ghash_4bit_x86_begin +.globl gcm_gmult_4bit_mmx +.hidden gcm_gmult_4bit_mmx +.type gcm_gmult_4bit_mmx,@function +.align 16 +gcm_gmult_4bit_mmx: +.L_gcm_gmult_4bit_mmx_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%edi + movl 24(%esp),%esi + call .L005pic_point +.L005pic_point: + popl %eax + leal .Lrem_4bit-.L005pic_point(%eax),%eax + movzbl 15(%edi),%ebx + xorl %ecx,%ecx + movl %ebx,%edx + movb %dl,%cl + movl $14,%ebp + shlb $4,%cl + andl $240,%edx + movq 8(%esi,%ecx,1),%mm0 + movq (%esi,%ecx,1),%mm1 + movd %mm0,%ebx + jmp .L006mmx_loop +.align 16 +.L006mmx_loop: + psrlq $4,%mm0 + andl $15,%ebx + movq %mm1,%mm2 + psrlq $4,%mm1 + pxor 8(%esi,%edx,1),%mm0 + movb (%edi,%ebp,1),%cl + psllq $60,%mm2 + pxor (%eax,%ebx,8),%mm1 + decl %ebp + movd %mm0,%ebx + pxor (%esi,%edx,1),%mm1 + movl %ecx,%edx + pxor %mm2,%mm0 + js .L007mmx_break + shlb $4,%cl + andl $15,%ebx + psrlq $4,%mm0 + andl $240,%edx + movq %mm1,%mm2 + psrlq $4,%mm1 + pxor 8(%esi,%ecx,1),%mm0 + psllq $60,%mm2 + pxor (%eax,%ebx,8),%mm1 + movd %mm0,%ebx + pxor (%esi,%ecx,1),%mm1 + pxor %mm2,%mm0 + jmp .L006mmx_loop +.align 16 +.L007mmx_break: + shlb $4,%cl + andl $15,%ebx + psrlq $4,%mm0 + andl $240,%edx + movq %mm1,%mm2 + psrlq $4,%mm1 + pxor 8(%esi,%ecx,1),%mm0 + psllq $60,%mm2 + pxor (%eax,%ebx,8),%mm1 + movd %mm0,%ebx + pxor (%esi,%ecx,1),%mm1 + pxor %mm2,%mm0 + psrlq $4,%mm0 + andl $15,%ebx + movq %mm1,%mm2 + psrlq $4,%mm1 + pxor 8(%esi,%edx,1),%mm0 + psllq $60,%mm2 + pxor (%eax,%ebx,8),%mm1 + movd %mm0,%ebx + pxor (%esi,%edx,1),%mm1 + pxor %mm2,%mm0 + psrlq $32,%mm0 + movd %mm1,%edx + psrlq $32,%mm1 + movd %mm0,%ecx + movd %mm1,%ebp + bswap %ebx + bswap %edx + bswap %ecx + bswap %ebp + emms + movl %ebx,12(%edi) + movl %edx,4(%edi) + movl %ecx,8(%edi) + movl %ebp,(%edi) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size gcm_gmult_4bit_mmx,.-.L_gcm_gmult_4bit_mmx_begin +.globl gcm_ghash_4bit_mmx +.hidden gcm_ghash_4bit_mmx +.type gcm_ghash_4bit_mmx,@function +.align 16 +gcm_ghash_4bit_mmx: +.L_gcm_ghash_4bit_mmx_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%eax + movl 24(%esp),%ebx + movl 28(%esp),%ecx + movl 32(%esp),%edx + movl %esp,%ebp + call .L008pic_point +.L008pic_point: + popl %esi + leal .Lrem_8bit-.L008pic_point(%esi),%esi + subl $544,%esp + andl $-64,%esp + subl $16,%esp + addl %ecx,%edx + movl %eax,544(%esp) + movl %edx,552(%esp) + movl %ebp,556(%esp) + addl $128,%ebx + leal 144(%esp),%edi + leal 400(%esp),%ebp + movl -120(%ebx),%edx + movq -120(%ebx),%mm0 + movq -128(%ebx),%mm3 + shll $4,%edx + movb %dl,(%esp) + movl -104(%ebx),%edx + movq -104(%ebx),%mm2 + movq -112(%ebx),%mm5 + movq %mm0,-128(%edi) + psrlq $4,%mm0 + movq %mm3,(%edi) + movq %mm3,%mm7 + psrlq $4,%mm3 + shll $4,%edx + movb %dl,1(%esp) + movl -88(%ebx),%edx + movq -88(%ebx),%mm1 + psllq $60,%mm7 + movq -96(%ebx),%mm4 + por %mm7,%mm0 + movq %mm2,-120(%edi) + psrlq $4,%mm2 + movq %mm5,8(%edi) + movq %mm5,%mm6 + movq %mm0,-128(%ebp) + psrlq $4,%mm5 + movq %mm3,(%ebp) + shll $4,%edx + movb %dl,2(%esp) + movl -72(%ebx),%edx + movq -72(%ebx),%mm0 + psllq $60,%mm6 + movq -80(%ebx),%mm3 + por %mm6,%mm2 + movq %mm1,-112(%edi) + psrlq $4,%mm1 + movq %mm4,16(%edi) + movq %mm4,%mm7 + movq %mm2,-120(%ebp) + psrlq $4,%mm4 + movq %mm5,8(%ebp) + shll $4,%edx + movb %dl,3(%esp) + movl -56(%ebx),%edx + movq -56(%ebx),%mm2 + psllq $60,%mm7 + movq -64(%ebx),%mm5 + por %mm7,%mm1 + movq %mm0,-104(%edi) + psrlq $4,%mm0 + movq %mm3,24(%edi) + movq %mm3,%mm6 + movq %mm1,-112(%ebp) + psrlq $4,%mm3 + movq %mm4,16(%ebp) + shll $4,%edx + movb %dl,4(%esp) + movl -40(%ebx),%edx + movq -40(%ebx),%mm1 + psllq $60,%mm6 + movq -48(%ebx),%mm4 + por %mm6,%mm0 + movq %mm2,-96(%edi) + psrlq $4,%mm2 + movq %mm5,32(%edi) + movq %mm5,%mm7 + movq %mm0,-104(%ebp) + psrlq $4,%mm5 + movq %mm3,24(%ebp) + shll $4,%edx + movb %dl,5(%esp) + movl -24(%ebx),%edx + movq -24(%ebx),%mm0 + psllq $60,%mm7 + movq -32(%ebx),%mm3 + por %mm7,%mm2 + movq %mm1,-88(%edi) + psrlq $4,%mm1 + movq %mm4,40(%edi) + movq %mm4,%mm6 + movq %mm2,-96(%ebp) + psrlq $4,%mm4 + movq %mm5,32(%ebp) + shll $4,%edx + movb %dl,6(%esp) + movl -8(%ebx),%edx + movq -8(%ebx),%mm2 + psllq $60,%mm6 + movq -16(%ebx),%mm5 + por %mm6,%mm1 + movq %mm0,-80(%edi) + psrlq $4,%mm0 + movq %mm3,48(%edi) + movq %mm3,%mm7 + movq %mm1,-88(%ebp) + psrlq $4,%mm3 + movq %mm4,40(%ebp) + shll $4,%edx + movb %dl,7(%esp) + movl 8(%ebx),%edx + movq 8(%ebx),%mm1 + psllq $60,%mm7 + movq (%ebx),%mm4 + por %mm7,%mm0 + movq %mm2,-72(%edi) + psrlq $4,%mm2 + movq %mm5,56(%edi) + movq %mm5,%mm6 + movq %mm0,-80(%ebp) + psrlq $4,%mm5 + movq %mm3,48(%ebp) + shll $4,%edx + movb %dl,8(%esp) + movl 24(%ebx),%edx + movq 24(%ebx),%mm0 + psllq $60,%mm6 + movq 16(%ebx),%mm3 + por %mm6,%mm2 + movq %mm1,-64(%edi) + psrlq $4,%mm1 + movq %mm4,64(%edi) + movq %mm4,%mm7 + movq %mm2,-72(%ebp) + psrlq $4,%mm4 + movq %mm5,56(%ebp) + shll $4,%edx + movb %dl,9(%esp) + movl 40(%ebx),%edx + movq 40(%ebx),%mm2 + psllq $60,%mm7 + movq 32(%ebx),%mm5 + por %mm7,%mm1 + movq %mm0,-56(%edi) + psrlq $4,%mm0 + movq %mm3,72(%edi) + movq %mm3,%mm6 + movq %mm1,-64(%ebp) + psrlq $4,%mm3 + movq %mm4,64(%ebp) + shll $4,%edx + movb %dl,10(%esp) + movl 56(%ebx),%edx + movq 56(%ebx),%mm1 + psllq $60,%mm6 + movq 48(%ebx),%mm4 + por %mm6,%mm0 + movq %mm2,-48(%edi) + psrlq $4,%mm2 + movq %mm5,80(%edi) + movq %mm5,%mm7 + movq %mm0,-56(%ebp) + psrlq $4,%mm5 + movq %mm3,72(%ebp) + shll $4,%edx + movb %dl,11(%esp) + movl 72(%ebx),%edx + movq 72(%ebx),%mm0 + psllq $60,%mm7 + movq 64(%ebx),%mm3 + por %mm7,%mm2 + movq %mm1,-40(%edi) + psrlq $4,%mm1 + movq %mm4,88(%edi) + movq %mm4,%mm6 + movq %mm2,-48(%ebp) + psrlq $4,%mm4 + movq %mm5,80(%ebp) + shll $4,%edx + movb %dl,12(%esp) + movl 88(%ebx),%edx + movq 88(%ebx),%mm2 + psllq $60,%mm6 + movq 80(%ebx),%mm5 + por %mm6,%mm1 + movq %mm0,-32(%edi) + psrlq $4,%mm0 + movq %mm3,96(%edi) + movq %mm3,%mm7 + movq %mm1,-40(%ebp) + psrlq $4,%mm3 + movq %mm4,88(%ebp) + shll $4,%edx + movb %dl,13(%esp) + movl 104(%ebx),%edx + movq 104(%ebx),%mm1 + psllq $60,%mm7 + movq 96(%ebx),%mm4 + por %mm7,%mm0 + movq %mm2,-24(%edi) + psrlq $4,%mm2 + movq %mm5,104(%edi) + movq %mm5,%mm6 + movq %mm0,-32(%ebp) + psrlq $4,%mm5 + movq %mm3,96(%ebp) + shll $4,%edx + movb %dl,14(%esp) + movl 120(%ebx),%edx + movq 120(%ebx),%mm0 + psllq $60,%mm6 + movq 112(%ebx),%mm3 + por %mm6,%mm2 + movq %mm1,-16(%edi) + psrlq $4,%mm1 + movq %mm4,112(%edi) + movq %mm4,%mm7 + movq %mm2,-24(%ebp) + psrlq $4,%mm4 + movq %mm5,104(%ebp) + shll $4,%edx + movb %dl,15(%esp) + psllq $60,%mm7 + por %mm7,%mm1 + movq %mm0,-8(%edi) + psrlq $4,%mm0 + movq %mm3,120(%edi) + movq %mm3,%mm6 + movq %mm1,-16(%ebp) + psrlq $4,%mm3 + movq %mm4,112(%ebp) + psllq $60,%mm6 + por %mm6,%mm0 + movq %mm0,-8(%ebp) + movq %mm3,120(%ebp) + movq (%eax),%mm6 + movl 8(%eax),%ebx + movl 12(%eax),%edx +.align 16 +.L009outer: + xorl 12(%ecx),%edx + xorl 8(%ecx),%ebx + pxor (%ecx),%mm6 + leal 16(%ecx),%ecx + movl %ebx,536(%esp) + movq %mm6,528(%esp) + movl %ecx,548(%esp) + xorl %eax,%eax + roll $8,%edx + movb %dl,%al + movl %eax,%ebp + andb $15,%al + shrl $4,%ebp + pxor %mm0,%mm0 + roll $8,%edx + pxor %mm1,%mm1 + pxor %mm2,%mm2 + movq 16(%esp,%eax,8),%mm7 + movq 144(%esp,%eax,8),%mm6 + movb %dl,%al + movd %mm7,%ebx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%edi + psrlq $8,%mm6 + pxor 272(%esp,%ebp,8),%mm7 + andb $15,%al + psllq $56,%mm3 + shrl $4,%edi + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%ebp,8),%mm6 + xorb (%esp,%ebp,1),%bl + movb %dl,%al + movd %mm7,%ecx + movzbl %bl,%ebx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%ebp + psrlq $8,%mm6 + pxor 272(%esp,%edi,8),%mm7 + andb $15,%al + psllq $56,%mm3 + shrl $4,%ebp + pinsrw $2,(%esi,%ebx,2),%mm2 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%edi,8),%mm6 + xorb (%esp,%edi,1),%cl + movb %dl,%al + movl 536(%esp),%edx + movd %mm7,%ebx + movzbl %cl,%ecx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%edi + psrlq $8,%mm6 + pxor 272(%esp,%ebp,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm2,%mm6 + shrl $4,%edi + pinsrw $2,(%esi,%ecx,2),%mm1 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%ebp,8),%mm6 + xorb (%esp,%ebp,1),%bl + movb %dl,%al + movd %mm7,%ecx + movzbl %bl,%ebx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%ebp + psrlq $8,%mm6 + pxor 272(%esp,%edi,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm1,%mm6 + shrl $4,%ebp + pinsrw $2,(%esi,%ebx,2),%mm0 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%edi,8),%mm6 + xorb (%esp,%edi,1),%cl + movb %dl,%al + movd %mm7,%ebx + movzbl %cl,%ecx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%edi + psrlq $8,%mm6 + pxor 272(%esp,%ebp,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm0,%mm6 + shrl $4,%edi + pinsrw $2,(%esi,%ecx,2),%mm2 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%ebp,8),%mm6 + xorb (%esp,%ebp,1),%bl + movb %dl,%al + movd %mm7,%ecx + movzbl %bl,%ebx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%ebp + psrlq $8,%mm6 + pxor 272(%esp,%edi,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm2,%mm6 + shrl $4,%ebp + pinsrw $2,(%esi,%ebx,2),%mm1 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%edi,8),%mm6 + xorb (%esp,%edi,1),%cl + movb %dl,%al + movl 532(%esp),%edx + movd %mm7,%ebx + movzbl %cl,%ecx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%edi + psrlq $8,%mm6 + pxor 272(%esp,%ebp,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm1,%mm6 + shrl $4,%edi + pinsrw $2,(%esi,%ecx,2),%mm0 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%ebp,8),%mm6 + xorb (%esp,%ebp,1),%bl + movb %dl,%al + movd %mm7,%ecx + movzbl %bl,%ebx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%ebp + psrlq $8,%mm6 + pxor 272(%esp,%edi,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm0,%mm6 + shrl $4,%ebp + pinsrw $2,(%esi,%ebx,2),%mm2 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%edi,8),%mm6 + xorb (%esp,%edi,1),%cl + movb %dl,%al + movd %mm7,%ebx + movzbl %cl,%ecx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%edi + psrlq $8,%mm6 + pxor 272(%esp,%ebp,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm2,%mm6 + shrl $4,%edi + pinsrw $2,(%esi,%ecx,2),%mm1 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%ebp,8),%mm6 + xorb (%esp,%ebp,1),%bl + movb %dl,%al + movd %mm7,%ecx + movzbl %bl,%ebx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%ebp + psrlq $8,%mm6 + pxor 272(%esp,%edi,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm1,%mm6 + shrl $4,%ebp + pinsrw $2,(%esi,%ebx,2),%mm0 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%edi,8),%mm6 + xorb (%esp,%edi,1),%cl + movb %dl,%al + movl 528(%esp),%edx + movd %mm7,%ebx + movzbl %cl,%ecx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%edi + psrlq $8,%mm6 + pxor 272(%esp,%ebp,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm0,%mm6 + shrl $4,%edi + pinsrw $2,(%esi,%ecx,2),%mm2 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%ebp,8),%mm6 + xorb (%esp,%ebp,1),%bl + movb %dl,%al + movd %mm7,%ecx + movzbl %bl,%ebx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%ebp + psrlq $8,%mm6 + pxor 272(%esp,%edi,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm2,%mm6 + shrl $4,%ebp + pinsrw $2,(%esi,%ebx,2),%mm1 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%edi,8),%mm6 + xorb (%esp,%edi,1),%cl + movb %dl,%al + movd %mm7,%ebx + movzbl %cl,%ecx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%edi + psrlq $8,%mm6 + pxor 272(%esp,%ebp,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm1,%mm6 + shrl $4,%edi + pinsrw $2,(%esi,%ecx,2),%mm0 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%ebp,8),%mm6 + xorb (%esp,%ebp,1),%bl + movb %dl,%al + movd %mm7,%ecx + movzbl %bl,%ebx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%ebp + psrlq $8,%mm6 + pxor 272(%esp,%edi,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm0,%mm6 + shrl $4,%ebp + pinsrw $2,(%esi,%ebx,2),%mm2 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%edi,8),%mm6 + xorb (%esp,%edi,1),%cl + movb %dl,%al + movl 524(%esp),%edx + movd %mm7,%ebx + movzbl %cl,%ecx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%edi + psrlq $8,%mm6 + pxor 272(%esp,%ebp,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm2,%mm6 + shrl $4,%edi + pinsrw $2,(%esi,%ecx,2),%mm1 + pxor 16(%esp,%eax,8),%mm7 + pxor 144(%esp,%eax,8),%mm6 + xorb (%esp,%ebp,1),%bl + pxor %mm3,%mm7 + pxor 400(%esp,%ebp,8),%mm6 + movzbl %bl,%ebx + pxor %mm2,%mm2 + psllq $4,%mm1 + movd %mm7,%ecx + psrlq $4,%mm7 + movq %mm6,%mm3 + psrlq $4,%mm6 + shll $4,%ecx + pxor 16(%esp,%edi,8),%mm7 + psllq $60,%mm3 + movzbl %cl,%ecx + pxor %mm3,%mm7 + pxor 144(%esp,%edi,8),%mm6 + pinsrw $2,(%esi,%ebx,2),%mm0 + pxor %mm1,%mm6 + movd %mm7,%edx + pinsrw $3,(%esi,%ecx,2),%mm2 + psllq $12,%mm0 + pxor %mm0,%mm6 + psrlq $32,%mm7 + pxor %mm2,%mm6 + movl 548(%esp),%ecx + movd %mm7,%ebx + movq %mm6,%mm3 + psllw $8,%mm6 + psrlw $8,%mm3 + por %mm3,%mm6 + bswap %edx + pshufw $27,%mm6,%mm6 + bswap %ebx + cmpl 552(%esp),%ecx + jne .L009outer + movl 544(%esp),%eax + movl %edx,12(%eax) + movl %ebx,8(%eax) + movq %mm6,(%eax) + movl 556(%esp),%esp + emms + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size gcm_ghash_4bit_mmx,.-.L_gcm_ghash_4bit_mmx_begin +.globl gcm_init_clmul +.hidden gcm_init_clmul +.type gcm_init_clmul,@function +.align 16 +gcm_init_clmul: +.L_gcm_init_clmul_begin: + movl 4(%esp),%edx + movl 8(%esp),%eax + call .L010pic +.L010pic: + popl %ecx + leal .Lbswap-.L010pic(%ecx),%ecx + movdqu (%eax),%xmm2 + pshufd $78,%xmm2,%xmm2 + pshufd $255,%xmm2,%xmm4 + movdqa %xmm2,%xmm3 + psllq $1,%xmm2 + pxor %xmm5,%xmm5 + psrlq $63,%xmm3 + pcmpgtd %xmm4,%xmm5 + pslldq $8,%xmm3 + por %xmm3,%xmm2 + pand 16(%ecx),%xmm5 + pxor %xmm5,%xmm2 + movdqa %xmm2,%xmm0 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pshufd $78,%xmm2,%xmm4 + pxor %xmm0,%xmm3 + pxor %xmm2,%xmm4 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,220,0 + xorps %xmm0,%xmm3 + xorps %xmm1,%xmm3 + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 + pshufd $78,%xmm2,%xmm3 + pshufd $78,%xmm0,%xmm4 + pxor %xmm2,%xmm3 + movdqu %xmm2,(%edx) + pxor %xmm0,%xmm4 + movdqu %xmm0,16(%edx) +.byte 102,15,58,15,227,8 + movdqu %xmm4,32(%edx) + ret +.size gcm_init_clmul,.-.L_gcm_init_clmul_begin +.globl gcm_gmult_clmul +.hidden gcm_gmult_clmul +.type gcm_gmult_clmul,@function +.align 16 +gcm_gmult_clmul: +.L_gcm_gmult_clmul_begin: + movl 4(%esp),%eax + movl 8(%esp),%edx + call .L011pic +.L011pic: + popl %ecx + leal .Lbswap-.L011pic(%ecx),%ecx + movdqu (%eax),%xmm0 + movdqa (%ecx),%xmm5 + movups (%edx),%xmm2 +.byte 102,15,56,0,197 + movups 32(%edx),%xmm4 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pxor %xmm0,%xmm3 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,220,0 + xorps %xmm0,%xmm3 + xorps %xmm1,%xmm3 + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 +.byte 102,15,56,0,197 + movdqu %xmm0,(%eax) + ret +.size gcm_gmult_clmul,.-.L_gcm_gmult_clmul_begin +.globl gcm_ghash_clmul +.hidden gcm_ghash_clmul +.type gcm_ghash_clmul,@function +.align 16 +gcm_ghash_clmul: +.L_gcm_ghash_clmul_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%eax + movl 24(%esp),%edx + movl 28(%esp),%esi + movl 32(%esp),%ebx + call .L012pic +.L012pic: + popl %ecx + leal .Lbswap-.L012pic(%ecx),%ecx + movdqu (%eax),%xmm0 + movdqa (%ecx),%xmm5 + movdqu (%edx),%xmm2 +.byte 102,15,56,0,197 + subl $16,%ebx + jz .L013odd_tail + movdqu (%esi),%xmm3 + movdqu 16(%esi),%xmm6 +.byte 102,15,56,0,221 +.byte 102,15,56,0,245 + movdqu 32(%edx),%xmm5 + pxor %xmm3,%xmm0 + pshufd $78,%xmm6,%xmm3 + movdqa %xmm6,%xmm7 + pxor %xmm6,%xmm3 + leal 32(%esi),%esi +.byte 102,15,58,68,242,0 +.byte 102,15,58,68,250,17 +.byte 102,15,58,68,221,0 + movups 16(%edx),%xmm2 + nop + subl $32,%ebx + jbe .L014even_tail + jmp .L015mod_loop +.align 32 +.L015mod_loop: + pshufd $78,%xmm0,%xmm4 + movdqa %xmm0,%xmm1 + pxor %xmm0,%xmm4 + nop +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,229,16 + movups (%edx),%xmm2 + xorps %xmm6,%xmm0 + movdqa (%ecx),%xmm5 + xorps %xmm7,%xmm1 + movdqu (%esi),%xmm7 + pxor %xmm0,%xmm3 + movdqu 16(%esi),%xmm6 + pxor %xmm1,%xmm3 +.byte 102,15,56,0,253 + pxor %xmm3,%xmm4 + movdqa %xmm4,%xmm3 + psrldq $8,%xmm4 + pslldq $8,%xmm3 + pxor %xmm4,%xmm1 + pxor %xmm3,%xmm0 +.byte 102,15,56,0,245 + pxor %xmm7,%xmm1 + movdqa %xmm6,%xmm7 + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 +.byte 102,15,58,68,242,0 + movups 32(%edx),%xmm5 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + pshufd $78,%xmm7,%xmm3 + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm7,%xmm3 + pxor %xmm4,%xmm1 +.byte 102,15,58,68,250,17 + movups 16(%edx),%xmm2 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 +.byte 102,15,58,68,221,0 + leal 32(%esi),%esi + subl $32,%ebx + ja .L015mod_loop +.L014even_tail: + pshufd $78,%xmm0,%xmm4 + movdqa %xmm0,%xmm1 + pxor %xmm0,%xmm4 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,229,16 + movdqa (%ecx),%xmm5 + xorps %xmm6,%xmm0 + xorps %xmm7,%xmm1 + pxor %xmm0,%xmm3 + pxor %xmm1,%xmm3 + pxor %xmm3,%xmm4 + movdqa %xmm4,%xmm3 + psrldq $8,%xmm4 + pslldq $8,%xmm3 + pxor %xmm4,%xmm1 + pxor %xmm3,%xmm0 + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 + testl %ebx,%ebx + jnz .L016done + movups (%edx),%xmm2 +.L013odd_tail: + movdqu (%esi),%xmm3 +.byte 102,15,56,0,221 + pxor %xmm3,%xmm0 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pshufd $78,%xmm2,%xmm4 + pxor %xmm0,%xmm3 + pxor %xmm2,%xmm4 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,220,0 + xorps %xmm0,%xmm3 + xorps %xmm1,%xmm3 + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 +.L016done: +.byte 102,15,56,0,197 + movdqu %xmm0,(%eax) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size gcm_ghash_clmul,.-.L_gcm_ghash_clmul_begin +.align 64 +.Lbswap: +.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194 +.align 64 +.Lrem_8bit: +.value 0,450,900,582,1800,1738,1164,1358 +.value 3600,4050,3476,3158,2328,2266,2716,2910 +.value 7200,7650,8100,7782,6952,6890,6316,6510 +.value 4656,5106,4532,4214,5432,5370,5820,6014 +.value 14400,14722,15300,14854,16200,16010,15564,15630 +.value 13904,14226,13780,13334,12632,12442,13020,13086 +.value 9312,9634,10212,9766,9064,8874,8428,8494 +.value 10864,11186,10740,10294,11640,11450,12028,12094 +.value 28800,28994,29444,29382,30600,30282,29708,30158 +.value 32400,32594,32020,31958,31128,30810,31260,31710 +.value 27808,28002,28452,28390,27560,27242,26668,27118 +.value 25264,25458,24884,24822,26040,25722,26172,26622 +.value 18624,18690,19268,19078,20424,19978,19532,19854 +.value 18128,18194,17748,17558,16856,16410,16988,17310 +.value 21728,21794,22372,22182,21480,21034,20588,20910 +.value 23280,23346,22900,22710,24056,23610,24188,24510 +.value 57600,57538,57988,58182,58888,59338,58764,58446 +.value 61200,61138,60564,60758,59416,59866,60316,59998 +.value 64800,64738,65188,65382,64040,64490,63916,63598 +.value 62256,62194,61620,61814,62520,62970,63420,63102 +.value 55616,55426,56004,56070,56904,57226,56780,56334 +.value 55120,54930,54484,54550,53336,53658,54236,53790 +.value 50528,50338,50916,50982,49768,50090,49644,49198 +.value 52080,51890,51444,51510,52344,52666,53244,52798 +.value 37248,36930,37380,37830,38536,38730,38156,38094 +.value 40848,40530,39956,40406,39064,39258,39708,39646 +.value 36256,35938,36388,36838,35496,35690,35116,35054 +.value 33712,33394,32820,33270,33976,34170,34620,34558 +.value 43456,43010,43588,43910,44744,44810,44364,44174 +.value 42960,42514,42068,42390,41176,41242,41820,41630 +.value 46560,46114,46692,47014,45800,45866,45420,45230 +.value 48112,47666,47220,47542,48376,48442,49020,48830 +.align 64 +.Lrem_4bit: +.long 0,0,0,471859200,0,943718400,0,610271232 +.long 0,1887436800,0,1822425088,0,1220542464,0,1423966208 +.long 0,3774873600,0,4246732800,0,3644850176,0,3311403008 +.long 0,2441084928,0,2376073216,0,2847932416,0,3051356160 +.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67 +.byte 82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112 +.byte 112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62 +.byte 0 +#endif diff --git a/third_party/boringssl/linux-x86/crypto/rc4/rc4-586.S b/third_party/boringssl/linux-x86/crypto/rc4/rc4-586.S new file mode 100644 index 00000000000..a5cce47c09d --- /dev/null +++ b/third_party/boringssl/linux-x86/crypto/rc4/rc4-586.S @@ -0,0 +1,385 @@ +#if defined(__i386__) +.file "rc4-586.S" +.text +.globl asm_RC4 +.hidden asm_RC4 +.type asm_RC4,@function +.align 16 +asm_RC4: +.L_asm_RC4_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%edi + movl 24(%esp),%edx + movl 28(%esp),%esi + movl 32(%esp),%ebp + xorl %eax,%eax + xorl %ebx,%ebx + cmpl $0,%edx + je .L000abort + movb (%edi),%al + movb 4(%edi),%bl + addl $8,%edi + leal (%esi,%edx,1),%ecx + subl %esi,%ebp + movl %ecx,24(%esp) + incb %al + cmpl $-1,256(%edi) + je .L001RC4_CHAR + movl (%edi,%eax,4),%ecx + andl $-4,%edx + jz .L002loop1 + movl %ebp,32(%esp) + testl $-8,%edx + jz .L003go4loop4 + call .L004PIC_me_up +.L004PIC_me_up: + popl %ebp + leal OPENSSL_ia32cap_P-.L004PIC_me_up(%ebp),%ebp + btl $26,(%ebp) + jnc .L003go4loop4 + movl 32(%esp),%ebp + andl $-8,%edx + leal -8(%esi,%edx,1),%edx + movl %edx,-4(%edi) + addb %cl,%bl + movl (%edi,%ebx,4),%edx + movl %ecx,(%edi,%ebx,4) + movl %edx,(%edi,%eax,4) + incl %eax + addl %ecx,%edx + movzbl %al,%eax + movzbl %dl,%edx + movq (%esi),%mm0 + movl (%edi,%eax,4),%ecx + movd (%edi,%edx,4),%mm2 + jmp .L005loop_mmx_enter +.align 16 +.L006loop_mmx: + addb %cl,%bl + psllq $56,%mm1 + movl (%edi,%ebx,4),%edx + movl %ecx,(%edi,%ebx,4) + movl %edx,(%edi,%eax,4) + incl %eax + addl %ecx,%edx + movzbl %al,%eax + movzbl %dl,%edx + pxor %mm1,%mm2 + movq (%esi),%mm0 + movq %mm2,-8(%ebp,%esi,1) + movl (%edi,%eax,4),%ecx + movd (%edi,%edx,4),%mm2 +.L005loop_mmx_enter: + addb %cl,%bl + movl (%edi,%ebx,4),%edx + movl %ecx,(%edi,%ebx,4) + movl %edx,(%edi,%eax,4) + incl %eax + addl %ecx,%edx + movzbl %al,%eax + movzbl %dl,%edx + pxor %mm0,%mm2 + movl (%edi,%eax,4),%ecx + movd (%edi,%edx,4),%mm1 + addb %cl,%bl + psllq $8,%mm1 + movl (%edi,%ebx,4),%edx + movl %ecx,(%edi,%ebx,4) + movl %edx,(%edi,%eax,4) + incl %eax + addl %ecx,%edx + movzbl %al,%eax + movzbl %dl,%edx + pxor %mm1,%mm2 + movl (%edi,%eax,4),%ecx + movd (%edi,%edx,4),%mm1 + addb %cl,%bl + psllq $16,%mm1 + movl (%edi,%ebx,4),%edx + movl %ecx,(%edi,%ebx,4) + movl %edx,(%edi,%eax,4) + incl %eax + addl %ecx,%edx + movzbl %al,%eax + movzbl %dl,%edx + pxor %mm1,%mm2 + movl (%edi,%eax,4),%ecx + movd (%edi,%edx,4),%mm1 + addb %cl,%bl + psllq $24,%mm1 + movl (%edi,%ebx,4),%edx + movl %ecx,(%edi,%ebx,4) + movl %edx,(%edi,%eax,4) + incl %eax + addl %ecx,%edx + movzbl %al,%eax + movzbl %dl,%edx + pxor %mm1,%mm2 + movl (%edi,%eax,4),%ecx + movd (%edi,%edx,4),%mm1 + addb %cl,%bl + psllq $32,%mm1 + movl (%edi,%ebx,4),%edx + movl %ecx,(%edi,%ebx,4) + movl %edx,(%edi,%eax,4) + incl %eax + addl %ecx,%edx + movzbl %al,%eax + movzbl %dl,%edx + pxor %mm1,%mm2 + movl (%edi,%eax,4),%ecx + movd (%edi,%edx,4),%mm1 + addb %cl,%bl + psllq $40,%mm1 + movl (%edi,%ebx,4),%edx + movl %ecx,(%edi,%ebx,4) + movl %edx,(%edi,%eax,4) + incl %eax + addl %ecx,%edx + movzbl %al,%eax + movzbl %dl,%edx + pxor %mm1,%mm2 + movl (%edi,%eax,4),%ecx + movd (%edi,%edx,4),%mm1 + addb %cl,%bl + psllq $48,%mm1 + movl (%edi,%ebx,4),%edx + movl %ecx,(%edi,%ebx,4) + movl %edx,(%edi,%eax,4) + incl %eax + addl %ecx,%edx + movzbl %al,%eax + movzbl %dl,%edx + pxor %mm1,%mm2 + movl (%edi,%eax,4),%ecx + movd (%edi,%edx,4),%mm1 + movl %ebx,%edx + xorl %ebx,%ebx + movb %dl,%bl + cmpl -4(%edi),%esi + leal 8(%esi),%esi + jb .L006loop_mmx + psllq $56,%mm1 + pxor %mm1,%mm2 + movq %mm2,-8(%ebp,%esi,1) + emms + cmpl 24(%esp),%esi + je .L007done + jmp .L002loop1 +.align 16 +.L003go4loop4: + leal -4(%esi,%edx,1),%edx + movl %edx,28(%esp) +.L008loop4: + addb %cl,%bl + movl (%edi,%ebx,4),%edx + movl %ecx,(%edi,%ebx,4) + movl %edx,(%edi,%eax,4) + addl %ecx,%edx + incb %al + andl $255,%edx + movl (%edi,%eax,4),%ecx + movl (%edi,%edx,4),%ebp + addb %cl,%bl + movl (%edi,%ebx,4),%edx + movl %ecx,(%edi,%ebx,4) + movl %edx,(%edi,%eax,4) + addl %ecx,%edx + incb %al + andl $255,%edx + rorl $8,%ebp + movl (%edi,%eax,4),%ecx + orl (%edi,%edx,4),%ebp + addb %cl,%bl + movl (%edi,%ebx,4),%edx + movl %ecx,(%edi,%ebx,4) + movl %edx,(%edi,%eax,4) + addl %ecx,%edx + incb %al + andl $255,%edx + rorl $8,%ebp + movl (%edi,%eax,4),%ecx + orl (%edi,%edx,4),%ebp + addb %cl,%bl + movl (%edi,%ebx,4),%edx + movl %ecx,(%edi,%ebx,4) + movl %edx,(%edi,%eax,4) + addl %ecx,%edx + incb %al + andl $255,%edx + rorl $8,%ebp + movl 32(%esp),%ecx + orl (%edi,%edx,4),%ebp + rorl $8,%ebp + xorl (%esi),%ebp + cmpl 28(%esp),%esi + movl %ebp,(%ecx,%esi,1) + leal 4(%esi),%esi + movl (%edi,%eax,4),%ecx + jb .L008loop4 + cmpl 24(%esp),%esi + je .L007done + movl 32(%esp),%ebp +.align 16 +.L002loop1: + addb %cl,%bl + movl (%edi,%ebx,4),%edx + movl %ecx,(%edi,%ebx,4) + movl %edx,(%edi,%eax,4) + addl %ecx,%edx + incb %al + andl $255,%edx + movl (%edi,%edx,4),%edx + xorb (%esi),%dl + leal 1(%esi),%esi + movl (%edi,%eax,4),%ecx + cmpl 24(%esp),%esi + movb %dl,-1(%ebp,%esi,1) + jb .L002loop1 + jmp .L007done +.align 16 +.L001RC4_CHAR: + movzbl (%edi,%eax,1),%ecx +.L009cloop1: + addb %cl,%bl + movzbl (%edi,%ebx,1),%edx + movb %cl,(%edi,%ebx,1) + movb %dl,(%edi,%eax,1) + addb %cl,%dl + movzbl (%edi,%edx,1),%edx + addb $1,%al + xorb (%esi),%dl + leal 1(%esi),%esi + movzbl (%edi,%eax,1),%ecx + cmpl 24(%esp),%esi + movb %dl,-1(%ebp,%esi,1) + jb .L009cloop1 +.L007done: + decb %al + movl %ebx,-4(%edi) + movb %al,-8(%edi) +.L000abort: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size asm_RC4,.-.L_asm_RC4_begin +.globl asm_RC4_set_key +.hidden asm_RC4_set_key +.type asm_RC4_set_key,@function +.align 16 +asm_RC4_set_key: +.L_asm_RC4_set_key_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%edi + movl 24(%esp),%ebp + movl 28(%esp),%esi + call .L010PIC_me_up +.L010PIC_me_up: + popl %edx + leal OPENSSL_ia32cap_P-.L010PIC_me_up(%edx),%edx + leal 8(%edi),%edi + leal (%esi,%ebp,1),%esi + negl %ebp + xorl %eax,%eax + movl %ebp,-4(%edi) + btl $20,(%edx) + jc .L011c1stloop +.align 16 +.L012w1stloop: + movl %eax,(%edi,%eax,4) + addb $1,%al + jnc .L012w1stloop + xorl %ecx,%ecx + xorl %edx,%edx +.align 16 +.L013w2ndloop: + movl (%edi,%ecx,4),%eax + addb (%esi,%ebp,1),%dl + addb %al,%dl + addl $1,%ebp + movl (%edi,%edx,4),%ebx + jnz .L014wnowrap + movl -4(%edi),%ebp +.L014wnowrap: + movl %eax,(%edi,%edx,4) + movl %ebx,(%edi,%ecx,4) + addb $1,%cl + jnc .L013w2ndloop + jmp .L015exit +.align 16 +.L011c1stloop: + movb %al,(%edi,%eax,1) + addb $1,%al + jnc .L011c1stloop + xorl %ecx,%ecx + xorl %edx,%edx + xorl %ebx,%ebx +.align 16 +.L016c2ndloop: + movb (%edi,%ecx,1),%al + addb (%esi,%ebp,1),%dl + addb %al,%dl + addl $1,%ebp + movb (%edi,%edx,1),%bl + jnz .L017cnowrap + movl -4(%edi),%ebp +.L017cnowrap: + movb %al,(%edi,%edx,1) + movb %bl,(%edi,%ecx,1) + addb $1,%cl + jnc .L016c2ndloop + movl $-1,256(%edi) +.L015exit: + xorl %eax,%eax + movl %eax,-8(%edi) + movl %eax,-4(%edi) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size asm_RC4_set_key,.-.L_asm_RC4_set_key_begin +.globl RC4_options +.hidden RC4_options +.type RC4_options,@function +.align 16 +RC4_options: +.L_RC4_options_begin: + call .L018pic_point +.L018pic_point: + popl %eax + leal .L019opts-.L018pic_point(%eax),%eax + call .L020PIC_me_up +.L020PIC_me_up: + popl %edx + leal OPENSSL_ia32cap_P-.L020PIC_me_up(%edx),%edx + movl (%edx),%edx + btl $20,%edx + jc .L0211xchar + btl $26,%edx + jnc .L022ret + addl $25,%eax + ret +.L0211xchar: + addl $12,%eax +.L022ret: + ret +.align 64 +.L019opts: +.byte 114,99,52,40,52,120,44,105,110,116,41,0 +.byte 114,99,52,40,49,120,44,99,104,97,114,41,0 +.byte 114,99,52,40,56,120,44,109,109,120,41,0 +.byte 82,67,52,32,102,111,114,32,120,56,54,44,32,67,82,89 +.byte 80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114 +.byte 111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 64 +.size RC4_options,.-.L_RC4_options_begin +#endif diff --git a/third_party/boringssl/linux-x86/crypto/sha/sha1-586.S b/third_party/boringssl/linux-x86/crypto/sha/sha1-586.S new file mode 100644 index 00000000000..808ccac5178 --- /dev/null +++ b/third_party/boringssl/linux-x86/crypto/sha/sha1-586.S @@ -0,0 +1,2799 @@ +#if defined(__i386__) +.file "sha1-586.S" +.text +.globl sha1_block_data_order +.hidden sha1_block_data_order +.type sha1_block_data_order,@function +.align 16 +sha1_block_data_order: +.L_sha1_block_data_order_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + call .L000pic_point +.L000pic_point: + popl %ebp + leal OPENSSL_ia32cap_P-.L000pic_point(%ebp),%esi + leal .LK_XX_XX-.L000pic_point(%ebp),%ebp + movl (%esi),%eax + movl 4(%esi),%edx + testl $512,%edx + jz .L001x86 + movl 8(%esi),%ecx + testl $16777216,%eax + jz .L001x86 + testl $536870912,%ecx + jnz .Lshaext_shortcut + jmp .Lssse3_shortcut +.align 16 +.L001x86: + movl 20(%esp),%ebp + movl 24(%esp),%esi + movl 28(%esp),%eax + subl $76,%esp + shll $6,%eax + addl %esi,%eax + movl %eax,104(%esp) + movl 16(%ebp),%edi + jmp .L002loop +.align 16 +.L002loop: + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + movl %eax,(%esp) + movl %ebx,4(%esp) + movl %ecx,8(%esp) + movl %edx,12(%esp) + movl 16(%esi),%eax + movl 20(%esi),%ebx + movl 24(%esi),%ecx + movl 28(%esi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + movl %eax,16(%esp) + movl %ebx,20(%esp) + movl %ecx,24(%esp) + movl %edx,28(%esp) + movl 32(%esi),%eax + movl 36(%esi),%ebx + movl 40(%esi),%ecx + movl 44(%esi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + movl %eax,32(%esp) + movl %ebx,36(%esp) + movl %ecx,40(%esp) + movl %edx,44(%esp) + movl 48(%esi),%eax + movl 52(%esi),%ebx + movl 56(%esi),%ecx + movl 60(%esi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + movl %eax,48(%esp) + movl %ebx,52(%esp) + movl %ecx,56(%esp) + movl %edx,60(%esp) + movl %esi,100(%esp) + movl (%ebp),%eax + movl 4(%ebp),%ebx + movl 8(%ebp),%ecx + movl 12(%ebp),%edx + + movl %ecx,%esi + movl %eax,%ebp + roll $5,%ebp + xorl %edx,%esi + addl %edi,%ebp + movl (%esp),%edi + andl %ebx,%esi + rorl $2,%ebx + xorl %edx,%esi + leal 1518500249(%ebp,%edi,1),%ebp + addl %esi,%ebp + + movl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + xorl %ecx,%edi + addl %edx,%ebp + movl 4(%esp),%edx + andl %eax,%edi + rorl $2,%eax + xorl %ecx,%edi + leal 1518500249(%ebp,%edx,1),%ebp + addl %edi,%ebp + + movl %eax,%edx + movl %ebp,%edi + roll $5,%ebp + xorl %ebx,%edx + addl %ecx,%ebp + movl 8(%esp),%ecx + andl %esi,%edx + rorl $2,%esi + xorl %ebx,%edx + leal 1518500249(%ebp,%ecx,1),%ebp + addl %edx,%ebp + + movl %esi,%ecx + movl %ebp,%edx + roll $5,%ebp + xorl %eax,%ecx + addl %ebx,%ebp + movl 12(%esp),%ebx + andl %edi,%ecx + rorl $2,%edi + xorl %eax,%ecx + leal 1518500249(%ebp,%ebx,1),%ebp + addl %ecx,%ebp + + movl %edi,%ebx + movl %ebp,%ecx + roll $5,%ebp + xorl %esi,%ebx + addl %eax,%ebp + movl 16(%esp),%eax + andl %edx,%ebx + rorl $2,%edx + xorl %esi,%ebx + leal 1518500249(%ebp,%eax,1),%ebp + addl %ebx,%ebp + + movl %edx,%eax + movl %ebp,%ebx + roll $5,%ebp + xorl %edi,%eax + addl %esi,%ebp + movl 20(%esp),%esi + andl %ecx,%eax + rorl $2,%ecx + xorl %edi,%eax + leal 1518500249(%ebp,%esi,1),%ebp + addl %eax,%ebp + + movl %ecx,%esi + movl %ebp,%eax + roll $5,%ebp + xorl %edx,%esi + addl %edi,%ebp + movl 24(%esp),%edi + andl %ebx,%esi + rorl $2,%ebx + xorl %edx,%esi + leal 1518500249(%ebp,%edi,1),%ebp + addl %esi,%ebp + + movl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + xorl %ecx,%edi + addl %edx,%ebp + movl 28(%esp),%edx + andl %eax,%edi + rorl $2,%eax + xorl %ecx,%edi + leal 1518500249(%ebp,%edx,1),%ebp + addl %edi,%ebp + + movl %eax,%edx + movl %ebp,%edi + roll $5,%ebp + xorl %ebx,%edx + addl %ecx,%ebp + movl 32(%esp),%ecx + andl %esi,%edx + rorl $2,%esi + xorl %ebx,%edx + leal 1518500249(%ebp,%ecx,1),%ebp + addl %edx,%ebp + + movl %esi,%ecx + movl %ebp,%edx + roll $5,%ebp + xorl %eax,%ecx + addl %ebx,%ebp + movl 36(%esp),%ebx + andl %edi,%ecx + rorl $2,%edi + xorl %eax,%ecx + leal 1518500249(%ebp,%ebx,1),%ebp + addl %ecx,%ebp + + movl %edi,%ebx + movl %ebp,%ecx + roll $5,%ebp + xorl %esi,%ebx + addl %eax,%ebp + movl 40(%esp),%eax + andl %edx,%ebx + rorl $2,%edx + xorl %esi,%ebx + leal 1518500249(%ebp,%eax,1),%ebp + addl %ebx,%ebp + + movl %edx,%eax + movl %ebp,%ebx + roll $5,%ebp + xorl %edi,%eax + addl %esi,%ebp + movl 44(%esp),%esi + andl %ecx,%eax + rorl $2,%ecx + xorl %edi,%eax + leal 1518500249(%ebp,%esi,1),%ebp + addl %eax,%ebp + + movl %ecx,%esi + movl %ebp,%eax + roll $5,%ebp + xorl %edx,%esi + addl %edi,%ebp + movl 48(%esp),%edi + andl %ebx,%esi + rorl $2,%ebx + xorl %edx,%esi + leal 1518500249(%ebp,%edi,1),%ebp + addl %esi,%ebp + + movl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + xorl %ecx,%edi + addl %edx,%ebp + movl 52(%esp),%edx + andl %eax,%edi + rorl $2,%eax + xorl %ecx,%edi + leal 1518500249(%ebp,%edx,1),%ebp + addl %edi,%ebp + + movl %eax,%edx + movl %ebp,%edi + roll $5,%ebp + xorl %ebx,%edx + addl %ecx,%ebp + movl 56(%esp),%ecx + andl %esi,%edx + rorl $2,%esi + xorl %ebx,%edx + leal 1518500249(%ebp,%ecx,1),%ebp + addl %edx,%ebp + + movl %esi,%ecx + movl %ebp,%edx + roll $5,%ebp + xorl %eax,%ecx + addl %ebx,%ebp + movl 60(%esp),%ebx + andl %edi,%ecx + rorl $2,%edi + xorl %eax,%ecx + leal 1518500249(%ebp,%ebx,1),%ebp + movl (%esp),%ebx + addl %ebp,%ecx + + movl %edi,%ebp + xorl 8(%esp),%ebx + xorl %esi,%ebp + xorl 32(%esp),%ebx + andl %edx,%ebp + xorl 52(%esp),%ebx + roll $1,%ebx + xorl %esi,%ebp + addl %ebp,%eax + movl %ecx,%ebp + rorl $2,%edx + movl %ebx,(%esp) + roll $5,%ebp + leal 1518500249(%ebx,%eax,1),%ebx + movl 4(%esp),%eax + addl %ebp,%ebx + + movl %edx,%ebp + xorl 12(%esp),%eax + xorl %edi,%ebp + xorl 36(%esp),%eax + andl %ecx,%ebp + xorl 56(%esp),%eax + roll $1,%eax + xorl %edi,%ebp + addl %ebp,%esi + movl %ebx,%ebp + rorl $2,%ecx + movl %eax,4(%esp) + roll $5,%ebp + leal 1518500249(%eax,%esi,1),%eax + movl 8(%esp),%esi + addl %ebp,%eax + + movl %ecx,%ebp + xorl 16(%esp),%esi + xorl %edx,%ebp + xorl 40(%esp),%esi + andl %ebx,%ebp + xorl 60(%esp),%esi + roll $1,%esi + xorl %edx,%ebp + addl %ebp,%edi + movl %eax,%ebp + rorl $2,%ebx + movl %esi,8(%esp) + roll $5,%ebp + leal 1518500249(%esi,%edi,1),%esi + movl 12(%esp),%edi + addl %ebp,%esi + + movl %ebx,%ebp + xorl 20(%esp),%edi + xorl %ecx,%ebp + xorl 44(%esp),%edi + andl %eax,%ebp + xorl (%esp),%edi + roll $1,%edi + xorl %ecx,%ebp + addl %ebp,%edx + movl %esi,%ebp + rorl $2,%eax + movl %edi,12(%esp) + roll $5,%ebp + leal 1518500249(%edi,%edx,1),%edi + movl 16(%esp),%edx + addl %ebp,%edi + + movl %esi,%ebp + xorl 24(%esp),%edx + xorl %eax,%ebp + xorl 48(%esp),%edx + xorl %ebx,%ebp + xorl 4(%esp),%edx + roll $1,%edx + addl %ebp,%ecx + rorl $2,%esi + movl %edi,%ebp + roll $5,%ebp + movl %edx,16(%esp) + leal 1859775393(%edx,%ecx,1),%edx + movl 20(%esp),%ecx + addl %ebp,%edx + + movl %edi,%ebp + xorl 28(%esp),%ecx + xorl %esi,%ebp + xorl 52(%esp),%ecx + xorl %eax,%ebp + xorl 8(%esp),%ecx + roll $1,%ecx + addl %ebp,%ebx + rorl $2,%edi + movl %edx,%ebp + roll $5,%ebp + movl %ecx,20(%esp) + leal 1859775393(%ecx,%ebx,1),%ecx + movl 24(%esp),%ebx + addl %ebp,%ecx + + movl %edx,%ebp + xorl 32(%esp),%ebx + xorl %edi,%ebp + xorl 56(%esp),%ebx + xorl %esi,%ebp + xorl 12(%esp),%ebx + roll $1,%ebx + addl %ebp,%eax + rorl $2,%edx + movl %ecx,%ebp + roll $5,%ebp + movl %ebx,24(%esp) + leal 1859775393(%ebx,%eax,1),%ebx + movl 28(%esp),%eax + addl %ebp,%ebx + + movl %ecx,%ebp + xorl 36(%esp),%eax + xorl %edx,%ebp + xorl 60(%esp),%eax + xorl %edi,%ebp + xorl 16(%esp),%eax + roll $1,%eax + addl %ebp,%esi + rorl $2,%ecx + movl %ebx,%ebp + roll $5,%ebp + movl %eax,28(%esp) + leal 1859775393(%eax,%esi,1),%eax + movl 32(%esp),%esi + addl %ebp,%eax + + movl %ebx,%ebp + xorl 40(%esp),%esi + xorl %ecx,%ebp + xorl (%esp),%esi + xorl %edx,%ebp + xorl 20(%esp),%esi + roll $1,%esi + addl %ebp,%edi + rorl $2,%ebx + movl %eax,%ebp + roll $5,%ebp + movl %esi,32(%esp) + leal 1859775393(%esi,%edi,1),%esi + movl 36(%esp),%edi + addl %ebp,%esi + + movl %eax,%ebp + xorl 44(%esp),%edi + xorl %ebx,%ebp + xorl 4(%esp),%edi + xorl %ecx,%ebp + xorl 24(%esp),%edi + roll $1,%edi + addl %ebp,%edx + rorl $2,%eax + movl %esi,%ebp + roll $5,%ebp + movl %edi,36(%esp) + leal 1859775393(%edi,%edx,1),%edi + movl 40(%esp),%edx + addl %ebp,%edi + + movl %esi,%ebp + xorl 48(%esp),%edx + xorl %eax,%ebp + xorl 8(%esp),%edx + xorl %ebx,%ebp + xorl 28(%esp),%edx + roll $1,%edx + addl %ebp,%ecx + rorl $2,%esi + movl %edi,%ebp + roll $5,%ebp + movl %edx,40(%esp) + leal 1859775393(%edx,%ecx,1),%edx + movl 44(%esp),%ecx + addl %ebp,%edx + + movl %edi,%ebp + xorl 52(%esp),%ecx + xorl %esi,%ebp + xorl 12(%esp),%ecx + xorl %eax,%ebp + xorl 32(%esp),%ecx + roll $1,%ecx + addl %ebp,%ebx + rorl $2,%edi + movl %edx,%ebp + roll $5,%ebp + movl %ecx,44(%esp) + leal 1859775393(%ecx,%ebx,1),%ecx + movl 48(%esp),%ebx + addl %ebp,%ecx + + movl %edx,%ebp + xorl 56(%esp),%ebx + xorl %edi,%ebp + xorl 16(%esp),%ebx + xorl %esi,%ebp + xorl 36(%esp),%ebx + roll $1,%ebx + addl %ebp,%eax + rorl $2,%edx + movl %ecx,%ebp + roll $5,%ebp + movl %ebx,48(%esp) + leal 1859775393(%ebx,%eax,1),%ebx + movl 52(%esp),%eax + addl %ebp,%ebx + + movl %ecx,%ebp + xorl 60(%esp),%eax + xorl %edx,%ebp + xorl 20(%esp),%eax + xorl %edi,%ebp + xorl 40(%esp),%eax + roll $1,%eax + addl %ebp,%esi + rorl $2,%ecx + movl %ebx,%ebp + roll $5,%ebp + movl %eax,52(%esp) + leal 1859775393(%eax,%esi,1),%eax + movl 56(%esp),%esi + addl %ebp,%eax + + movl %ebx,%ebp + xorl (%esp),%esi + xorl %ecx,%ebp + xorl 24(%esp),%esi + xorl %edx,%ebp + xorl 44(%esp),%esi + roll $1,%esi + addl %ebp,%edi + rorl $2,%ebx + movl %eax,%ebp + roll $5,%ebp + movl %esi,56(%esp) + leal 1859775393(%esi,%edi,1),%esi + movl 60(%esp),%edi + addl %ebp,%esi + + movl %eax,%ebp + xorl 4(%esp),%edi + xorl %ebx,%ebp + xorl 28(%esp),%edi + xorl %ecx,%ebp + xorl 48(%esp),%edi + roll $1,%edi + addl %ebp,%edx + rorl $2,%eax + movl %esi,%ebp + roll $5,%ebp + movl %edi,60(%esp) + leal 1859775393(%edi,%edx,1),%edi + movl (%esp),%edx + addl %ebp,%edi + + movl %esi,%ebp + xorl 8(%esp),%edx + xorl %eax,%ebp + xorl 32(%esp),%edx + xorl %ebx,%ebp + xorl 52(%esp),%edx + roll $1,%edx + addl %ebp,%ecx + rorl $2,%esi + movl %edi,%ebp + roll $5,%ebp + movl %edx,(%esp) + leal 1859775393(%edx,%ecx,1),%edx + movl 4(%esp),%ecx + addl %ebp,%edx + + movl %edi,%ebp + xorl 12(%esp),%ecx + xorl %esi,%ebp + xorl 36(%esp),%ecx + xorl %eax,%ebp + xorl 56(%esp),%ecx + roll $1,%ecx + addl %ebp,%ebx + rorl $2,%edi + movl %edx,%ebp + roll $5,%ebp + movl %ecx,4(%esp) + leal 1859775393(%ecx,%ebx,1),%ecx + movl 8(%esp),%ebx + addl %ebp,%ecx + + movl %edx,%ebp + xorl 16(%esp),%ebx + xorl %edi,%ebp + xorl 40(%esp),%ebx + xorl %esi,%ebp + xorl 60(%esp),%ebx + roll $1,%ebx + addl %ebp,%eax + rorl $2,%edx + movl %ecx,%ebp + roll $5,%ebp + movl %ebx,8(%esp) + leal 1859775393(%ebx,%eax,1),%ebx + movl 12(%esp),%eax + addl %ebp,%ebx + + movl %ecx,%ebp + xorl 20(%esp),%eax + xorl %edx,%ebp + xorl 44(%esp),%eax + xorl %edi,%ebp + xorl (%esp),%eax + roll $1,%eax + addl %ebp,%esi + rorl $2,%ecx + movl %ebx,%ebp + roll $5,%ebp + movl %eax,12(%esp) + leal 1859775393(%eax,%esi,1),%eax + movl 16(%esp),%esi + addl %ebp,%eax + + movl %ebx,%ebp + xorl 24(%esp),%esi + xorl %ecx,%ebp + xorl 48(%esp),%esi + xorl %edx,%ebp + xorl 4(%esp),%esi + roll $1,%esi + addl %ebp,%edi + rorl $2,%ebx + movl %eax,%ebp + roll $5,%ebp + movl %esi,16(%esp) + leal 1859775393(%esi,%edi,1),%esi + movl 20(%esp),%edi + addl %ebp,%esi + + movl %eax,%ebp + xorl 28(%esp),%edi + xorl %ebx,%ebp + xorl 52(%esp),%edi + xorl %ecx,%ebp + xorl 8(%esp),%edi + roll $1,%edi + addl %ebp,%edx + rorl $2,%eax + movl %esi,%ebp + roll $5,%ebp + movl %edi,20(%esp) + leal 1859775393(%edi,%edx,1),%edi + movl 24(%esp),%edx + addl %ebp,%edi + + movl %esi,%ebp + xorl 32(%esp),%edx + xorl %eax,%ebp + xorl 56(%esp),%edx + xorl %ebx,%ebp + xorl 12(%esp),%edx + roll $1,%edx + addl %ebp,%ecx + rorl $2,%esi + movl %edi,%ebp + roll $5,%ebp + movl %edx,24(%esp) + leal 1859775393(%edx,%ecx,1),%edx + movl 28(%esp),%ecx + addl %ebp,%edx + + movl %edi,%ebp + xorl 36(%esp),%ecx + xorl %esi,%ebp + xorl 60(%esp),%ecx + xorl %eax,%ebp + xorl 16(%esp),%ecx + roll $1,%ecx + addl %ebp,%ebx + rorl $2,%edi + movl %edx,%ebp + roll $5,%ebp + movl %ecx,28(%esp) + leal 1859775393(%ecx,%ebx,1),%ecx + movl 32(%esp),%ebx + addl %ebp,%ecx + + movl %edi,%ebp + xorl 40(%esp),%ebx + xorl %esi,%ebp + xorl (%esp),%ebx + andl %edx,%ebp + xorl 20(%esp),%ebx + roll $1,%ebx + addl %eax,%ebp + rorl $2,%edx + movl %ecx,%eax + roll $5,%eax + movl %ebx,32(%esp) + leal 2400959708(%ebx,%ebp,1),%ebx + movl %edi,%ebp + addl %eax,%ebx + andl %esi,%ebp + movl 36(%esp),%eax + addl %ebp,%ebx + + movl %edx,%ebp + xorl 44(%esp),%eax + xorl %edi,%ebp + xorl 4(%esp),%eax + andl %ecx,%ebp + xorl 24(%esp),%eax + roll $1,%eax + addl %esi,%ebp + rorl $2,%ecx + movl %ebx,%esi + roll $5,%esi + movl %eax,36(%esp) + leal 2400959708(%eax,%ebp,1),%eax + movl %edx,%ebp + addl %esi,%eax + andl %edi,%ebp + movl 40(%esp),%esi + addl %ebp,%eax + + movl %ecx,%ebp + xorl 48(%esp),%esi + xorl %edx,%ebp + xorl 8(%esp),%esi + andl %ebx,%ebp + xorl 28(%esp),%esi + roll $1,%esi + addl %edi,%ebp + rorl $2,%ebx + movl %eax,%edi + roll $5,%edi + movl %esi,40(%esp) + leal 2400959708(%esi,%ebp,1),%esi + movl %ecx,%ebp + addl %edi,%esi + andl %edx,%ebp + movl 44(%esp),%edi + addl %ebp,%esi + + movl %ebx,%ebp + xorl 52(%esp),%edi + xorl %ecx,%ebp + xorl 12(%esp),%edi + andl %eax,%ebp + xorl 32(%esp),%edi + roll $1,%edi + addl %edx,%ebp + rorl $2,%eax + movl %esi,%edx + roll $5,%edx + movl %edi,44(%esp) + leal 2400959708(%edi,%ebp,1),%edi + movl %ebx,%ebp + addl %edx,%edi + andl %ecx,%ebp + movl 48(%esp),%edx + addl %ebp,%edi + + movl %eax,%ebp + xorl 56(%esp),%edx + xorl %ebx,%ebp + xorl 16(%esp),%edx + andl %esi,%ebp + xorl 36(%esp),%edx + roll $1,%edx + addl %ecx,%ebp + rorl $2,%esi + movl %edi,%ecx + roll $5,%ecx + movl %edx,48(%esp) + leal 2400959708(%edx,%ebp,1),%edx + movl %eax,%ebp + addl %ecx,%edx + andl %ebx,%ebp + movl 52(%esp),%ecx + addl %ebp,%edx + + movl %esi,%ebp + xorl 60(%esp),%ecx + xorl %eax,%ebp + xorl 20(%esp),%ecx + andl %edi,%ebp + xorl 40(%esp),%ecx + roll $1,%ecx + addl %ebx,%ebp + rorl $2,%edi + movl %edx,%ebx + roll $5,%ebx + movl %ecx,52(%esp) + leal 2400959708(%ecx,%ebp,1),%ecx + movl %esi,%ebp + addl %ebx,%ecx + andl %eax,%ebp + movl 56(%esp),%ebx + addl %ebp,%ecx + + movl %edi,%ebp + xorl (%esp),%ebx + xorl %esi,%ebp + xorl 24(%esp),%ebx + andl %edx,%ebp + xorl 44(%esp),%ebx + roll $1,%ebx + addl %eax,%ebp + rorl $2,%edx + movl %ecx,%eax + roll $5,%eax + movl %ebx,56(%esp) + leal 2400959708(%ebx,%ebp,1),%ebx + movl %edi,%ebp + addl %eax,%ebx + andl %esi,%ebp + movl 60(%esp),%eax + addl %ebp,%ebx + + movl %edx,%ebp + xorl 4(%esp),%eax + xorl %edi,%ebp + xorl 28(%esp),%eax + andl %ecx,%ebp + xorl 48(%esp),%eax + roll $1,%eax + addl %esi,%ebp + rorl $2,%ecx + movl %ebx,%esi + roll $5,%esi + movl %eax,60(%esp) + leal 2400959708(%eax,%ebp,1),%eax + movl %edx,%ebp + addl %esi,%eax + andl %edi,%ebp + movl (%esp),%esi + addl %ebp,%eax + + movl %ecx,%ebp + xorl 8(%esp),%esi + xorl %edx,%ebp + xorl 32(%esp),%esi + andl %ebx,%ebp + xorl 52(%esp),%esi + roll $1,%esi + addl %edi,%ebp + rorl $2,%ebx + movl %eax,%edi + roll $5,%edi + movl %esi,(%esp) + leal 2400959708(%esi,%ebp,1),%esi + movl %ecx,%ebp + addl %edi,%esi + andl %edx,%ebp + movl 4(%esp),%edi + addl %ebp,%esi + + movl %ebx,%ebp + xorl 12(%esp),%edi + xorl %ecx,%ebp + xorl 36(%esp),%edi + andl %eax,%ebp + xorl 56(%esp),%edi + roll $1,%edi + addl %edx,%ebp + rorl $2,%eax + movl %esi,%edx + roll $5,%edx + movl %edi,4(%esp) + leal 2400959708(%edi,%ebp,1),%edi + movl %ebx,%ebp + addl %edx,%edi + andl %ecx,%ebp + movl 8(%esp),%edx + addl %ebp,%edi + + movl %eax,%ebp + xorl 16(%esp),%edx + xorl %ebx,%ebp + xorl 40(%esp),%edx + andl %esi,%ebp + xorl 60(%esp),%edx + roll $1,%edx + addl %ecx,%ebp + rorl $2,%esi + movl %edi,%ecx + roll $5,%ecx + movl %edx,8(%esp) + leal 2400959708(%edx,%ebp,1),%edx + movl %eax,%ebp + addl %ecx,%edx + andl %ebx,%ebp + movl 12(%esp),%ecx + addl %ebp,%edx + + movl %esi,%ebp + xorl 20(%esp),%ecx + xorl %eax,%ebp + xorl 44(%esp),%ecx + andl %edi,%ebp + xorl (%esp),%ecx + roll $1,%ecx + addl %ebx,%ebp + rorl $2,%edi + movl %edx,%ebx + roll $5,%ebx + movl %ecx,12(%esp) + leal 2400959708(%ecx,%ebp,1),%ecx + movl %esi,%ebp + addl %ebx,%ecx + andl %eax,%ebp + movl 16(%esp),%ebx + addl %ebp,%ecx + + movl %edi,%ebp + xorl 24(%esp),%ebx + xorl %esi,%ebp + xorl 48(%esp),%ebx + andl %edx,%ebp + xorl 4(%esp),%ebx + roll $1,%ebx + addl %eax,%ebp + rorl $2,%edx + movl %ecx,%eax + roll $5,%eax + movl %ebx,16(%esp) + leal 2400959708(%ebx,%ebp,1),%ebx + movl %edi,%ebp + addl %eax,%ebx + andl %esi,%ebp + movl 20(%esp),%eax + addl %ebp,%ebx + + movl %edx,%ebp + xorl 28(%esp),%eax + xorl %edi,%ebp + xorl 52(%esp),%eax + andl %ecx,%ebp + xorl 8(%esp),%eax + roll $1,%eax + addl %esi,%ebp + rorl $2,%ecx + movl %ebx,%esi + roll $5,%esi + movl %eax,20(%esp) + leal 2400959708(%eax,%ebp,1),%eax + movl %edx,%ebp + addl %esi,%eax + andl %edi,%ebp + movl 24(%esp),%esi + addl %ebp,%eax + + movl %ecx,%ebp + xorl 32(%esp),%esi + xorl %edx,%ebp + xorl 56(%esp),%esi + andl %ebx,%ebp + xorl 12(%esp),%esi + roll $1,%esi + addl %edi,%ebp + rorl $2,%ebx + movl %eax,%edi + roll $5,%edi + movl %esi,24(%esp) + leal 2400959708(%esi,%ebp,1),%esi + movl %ecx,%ebp + addl %edi,%esi + andl %edx,%ebp + movl 28(%esp),%edi + addl %ebp,%esi + + movl %ebx,%ebp + xorl 36(%esp),%edi + xorl %ecx,%ebp + xorl 60(%esp),%edi + andl %eax,%ebp + xorl 16(%esp),%edi + roll $1,%edi + addl %edx,%ebp + rorl $2,%eax + movl %esi,%edx + roll $5,%edx + movl %edi,28(%esp) + leal 2400959708(%edi,%ebp,1),%edi + movl %ebx,%ebp + addl %edx,%edi + andl %ecx,%ebp + movl 32(%esp),%edx + addl %ebp,%edi + + movl %eax,%ebp + xorl 40(%esp),%edx + xorl %ebx,%ebp + xorl (%esp),%edx + andl %esi,%ebp + xorl 20(%esp),%edx + roll $1,%edx + addl %ecx,%ebp + rorl $2,%esi + movl %edi,%ecx + roll $5,%ecx + movl %edx,32(%esp) + leal 2400959708(%edx,%ebp,1),%edx + movl %eax,%ebp + addl %ecx,%edx + andl %ebx,%ebp + movl 36(%esp),%ecx + addl %ebp,%edx + + movl %esi,%ebp + xorl 44(%esp),%ecx + xorl %eax,%ebp + xorl 4(%esp),%ecx + andl %edi,%ebp + xorl 24(%esp),%ecx + roll $1,%ecx + addl %ebx,%ebp + rorl $2,%edi + movl %edx,%ebx + roll $5,%ebx + movl %ecx,36(%esp) + leal 2400959708(%ecx,%ebp,1),%ecx + movl %esi,%ebp + addl %ebx,%ecx + andl %eax,%ebp + movl 40(%esp),%ebx + addl %ebp,%ecx + + movl %edi,%ebp + xorl 48(%esp),%ebx + xorl %esi,%ebp + xorl 8(%esp),%ebx + andl %edx,%ebp + xorl 28(%esp),%ebx + roll $1,%ebx + addl %eax,%ebp + rorl $2,%edx + movl %ecx,%eax + roll $5,%eax + movl %ebx,40(%esp) + leal 2400959708(%ebx,%ebp,1),%ebx + movl %edi,%ebp + addl %eax,%ebx + andl %esi,%ebp + movl 44(%esp),%eax + addl %ebp,%ebx + + movl %edx,%ebp + xorl 52(%esp),%eax + xorl %edi,%ebp + xorl 12(%esp),%eax + andl %ecx,%ebp + xorl 32(%esp),%eax + roll $1,%eax + addl %esi,%ebp + rorl $2,%ecx + movl %ebx,%esi + roll $5,%esi + movl %eax,44(%esp) + leal 2400959708(%eax,%ebp,1),%eax + movl %edx,%ebp + addl %esi,%eax + andl %edi,%ebp + movl 48(%esp),%esi + addl %ebp,%eax + + movl %ebx,%ebp + xorl 56(%esp),%esi + xorl %ecx,%ebp + xorl 16(%esp),%esi + xorl %edx,%ebp + xorl 36(%esp),%esi + roll $1,%esi + addl %ebp,%edi + rorl $2,%ebx + movl %eax,%ebp + roll $5,%ebp + movl %esi,48(%esp) + leal 3395469782(%esi,%edi,1),%esi + movl 52(%esp),%edi + addl %ebp,%esi + + movl %eax,%ebp + xorl 60(%esp),%edi + xorl %ebx,%ebp + xorl 20(%esp),%edi + xorl %ecx,%ebp + xorl 40(%esp),%edi + roll $1,%edi + addl %ebp,%edx + rorl $2,%eax + movl %esi,%ebp + roll $5,%ebp + movl %edi,52(%esp) + leal 3395469782(%edi,%edx,1),%edi + movl 56(%esp),%edx + addl %ebp,%edi + + movl %esi,%ebp + xorl (%esp),%edx + xorl %eax,%ebp + xorl 24(%esp),%edx + xorl %ebx,%ebp + xorl 44(%esp),%edx + roll $1,%edx + addl %ebp,%ecx + rorl $2,%esi + movl %edi,%ebp + roll $5,%ebp + movl %edx,56(%esp) + leal 3395469782(%edx,%ecx,1),%edx + movl 60(%esp),%ecx + addl %ebp,%edx + + movl %edi,%ebp + xorl 4(%esp),%ecx + xorl %esi,%ebp + xorl 28(%esp),%ecx + xorl %eax,%ebp + xorl 48(%esp),%ecx + roll $1,%ecx + addl %ebp,%ebx + rorl $2,%edi + movl %edx,%ebp + roll $5,%ebp + movl %ecx,60(%esp) + leal 3395469782(%ecx,%ebx,1),%ecx + movl (%esp),%ebx + addl %ebp,%ecx + + movl %edx,%ebp + xorl 8(%esp),%ebx + xorl %edi,%ebp + xorl 32(%esp),%ebx + xorl %esi,%ebp + xorl 52(%esp),%ebx + roll $1,%ebx + addl %ebp,%eax + rorl $2,%edx + movl %ecx,%ebp + roll $5,%ebp + movl %ebx,(%esp) + leal 3395469782(%ebx,%eax,1),%ebx + movl 4(%esp),%eax + addl %ebp,%ebx + + movl %ecx,%ebp + xorl 12(%esp),%eax + xorl %edx,%ebp + xorl 36(%esp),%eax + xorl %edi,%ebp + xorl 56(%esp),%eax + roll $1,%eax + addl %ebp,%esi + rorl $2,%ecx + movl %ebx,%ebp + roll $5,%ebp + movl %eax,4(%esp) + leal 3395469782(%eax,%esi,1),%eax + movl 8(%esp),%esi + addl %ebp,%eax + + movl %ebx,%ebp + xorl 16(%esp),%esi + xorl %ecx,%ebp + xorl 40(%esp),%esi + xorl %edx,%ebp + xorl 60(%esp),%esi + roll $1,%esi + addl %ebp,%edi + rorl $2,%ebx + movl %eax,%ebp + roll $5,%ebp + movl %esi,8(%esp) + leal 3395469782(%esi,%edi,1),%esi + movl 12(%esp),%edi + addl %ebp,%esi + + movl %eax,%ebp + xorl 20(%esp),%edi + xorl %ebx,%ebp + xorl 44(%esp),%edi + xorl %ecx,%ebp + xorl (%esp),%edi + roll $1,%edi + addl %ebp,%edx + rorl $2,%eax + movl %esi,%ebp + roll $5,%ebp + movl %edi,12(%esp) + leal 3395469782(%edi,%edx,1),%edi + movl 16(%esp),%edx + addl %ebp,%edi + + movl %esi,%ebp + xorl 24(%esp),%edx + xorl %eax,%ebp + xorl 48(%esp),%edx + xorl %ebx,%ebp + xorl 4(%esp),%edx + roll $1,%edx + addl %ebp,%ecx + rorl $2,%esi + movl %edi,%ebp + roll $5,%ebp + movl %edx,16(%esp) + leal 3395469782(%edx,%ecx,1),%edx + movl 20(%esp),%ecx + addl %ebp,%edx + + movl %edi,%ebp + xorl 28(%esp),%ecx + xorl %esi,%ebp + xorl 52(%esp),%ecx + xorl %eax,%ebp + xorl 8(%esp),%ecx + roll $1,%ecx + addl %ebp,%ebx + rorl $2,%edi + movl %edx,%ebp + roll $5,%ebp + movl %ecx,20(%esp) + leal 3395469782(%ecx,%ebx,1),%ecx + movl 24(%esp),%ebx + addl %ebp,%ecx + + movl %edx,%ebp + xorl 32(%esp),%ebx + xorl %edi,%ebp + xorl 56(%esp),%ebx + xorl %esi,%ebp + xorl 12(%esp),%ebx + roll $1,%ebx + addl %ebp,%eax + rorl $2,%edx + movl %ecx,%ebp + roll $5,%ebp + movl %ebx,24(%esp) + leal 3395469782(%ebx,%eax,1),%ebx + movl 28(%esp),%eax + addl %ebp,%ebx + + movl %ecx,%ebp + xorl 36(%esp),%eax + xorl %edx,%ebp + xorl 60(%esp),%eax + xorl %edi,%ebp + xorl 16(%esp),%eax + roll $1,%eax + addl %ebp,%esi + rorl $2,%ecx + movl %ebx,%ebp + roll $5,%ebp + movl %eax,28(%esp) + leal 3395469782(%eax,%esi,1),%eax + movl 32(%esp),%esi + addl %ebp,%eax + + movl %ebx,%ebp + xorl 40(%esp),%esi + xorl %ecx,%ebp + xorl (%esp),%esi + xorl %edx,%ebp + xorl 20(%esp),%esi + roll $1,%esi + addl %ebp,%edi + rorl $2,%ebx + movl %eax,%ebp + roll $5,%ebp + movl %esi,32(%esp) + leal 3395469782(%esi,%edi,1),%esi + movl 36(%esp),%edi + addl %ebp,%esi + + movl %eax,%ebp + xorl 44(%esp),%edi + xorl %ebx,%ebp + xorl 4(%esp),%edi + xorl %ecx,%ebp + xorl 24(%esp),%edi + roll $1,%edi + addl %ebp,%edx + rorl $2,%eax + movl %esi,%ebp + roll $5,%ebp + movl %edi,36(%esp) + leal 3395469782(%edi,%edx,1),%edi + movl 40(%esp),%edx + addl %ebp,%edi + + movl %esi,%ebp + xorl 48(%esp),%edx + xorl %eax,%ebp + xorl 8(%esp),%edx + xorl %ebx,%ebp + xorl 28(%esp),%edx + roll $1,%edx + addl %ebp,%ecx + rorl $2,%esi + movl %edi,%ebp + roll $5,%ebp + movl %edx,40(%esp) + leal 3395469782(%edx,%ecx,1),%edx + movl 44(%esp),%ecx + addl %ebp,%edx + + movl %edi,%ebp + xorl 52(%esp),%ecx + xorl %esi,%ebp + xorl 12(%esp),%ecx + xorl %eax,%ebp + xorl 32(%esp),%ecx + roll $1,%ecx + addl %ebp,%ebx + rorl $2,%edi + movl %edx,%ebp + roll $5,%ebp + movl %ecx,44(%esp) + leal 3395469782(%ecx,%ebx,1),%ecx + movl 48(%esp),%ebx + addl %ebp,%ecx + + movl %edx,%ebp + xorl 56(%esp),%ebx + xorl %edi,%ebp + xorl 16(%esp),%ebx + xorl %esi,%ebp + xorl 36(%esp),%ebx + roll $1,%ebx + addl %ebp,%eax + rorl $2,%edx + movl %ecx,%ebp + roll $5,%ebp + movl %ebx,48(%esp) + leal 3395469782(%ebx,%eax,1),%ebx + movl 52(%esp),%eax + addl %ebp,%ebx + + movl %ecx,%ebp + xorl 60(%esp),%eax + xorl %edx,%ebp + xorl 20(%esp),%eax + xorl %edi,%ebp + xorl 40(%esp),%eax + roll $1,%eax + addl %ebp,%esi + rorl $2,%ecx + movl %ebx,%ebp + roll $5,%ebp + leal 3395469782(%eax,%esi,1),%eax + movl 56(%esp),%esi + addl %ebp,%eax + + movl %ebx,%ebp + xorl (%esp),%esi + xorl %ecx,%ebp + xorl 24(%esp),%esi + xorl %edx,%ebp + xorl 44(%esp),%esi + roll $1,%esi + addl %ebp,%edi + rorl $2,%ebx + movl %eax,%ebp + roll $5,%ebp + leal 3395469782(%esi,%edi,1),%esi + movl 60(%esp),%edi + addl %ebp,%esi + + movl %eax,%ebp + xorl 4(%esp),%edi + xorl %ebx,%ebp + xorl 28(%esp),%edi + xorl %ecx,%ebp + xorl 48(%esp),%edi + roll $1,%edi + addl %ebp,%edx + rorl $2,%eax + movl %esi,%ebp + roll $5,%ebp + leal 3395469782(%edi,%edx,1),%edi + addl %ebp,%edi + movl 96(%esp),%ebp + movl 100(%esp),%edx + addl (%ebp),%edi + addl 4(%ebp),%esi + addl 8(%ebp),%eax + addl 12(%ebp),%ebx + addl 16(%ebp),%ecx + movl %edi,(%ebp) + addl $64,%edx + movl %esi,4(%ebp) + cmpl 104(%esp),%edx + movl %eax,8(%ebp) + movl %ecx,%edi + movl %ebx,12(%ebp) + movl %edx,%esi + movl %ecx,16(%ebp) + jb .L002loop + addl $76,%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size sha1_block_data_order,.-.L_sha1_block_data_order_begin +.hidden _sha1_block_data_order_shaext +.type _sha1_block_data_order_shaext,@function +.align 16 +_sha1_block_data_order_shaext: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + call .L003pic_point +.L003pic_point: + popl %ebp + leal .LK_XX_XX-.L003pic_point(%ebp),%ebp +.Lshaext_shortcut: + movl 20(%esp),%edi + movl %esp,%ebx + movl 24(%esp),%esi + movl 28(%esp),%ecx + subl $32,%esp + movdqu (%edi),%xmm0 + movd 16(%edi),%xmm1 + andl $-32,%esp + movdqa 80(%ebp),%xmm3 + movdqu (%esi),%xmm4 + pshufd $27,%xmm0,%xmm0 + movdqu 16(%esi),%xmm5 + pshufd $27,%xmm1,%xmm1 + movdqu 32(%esi),%xmm6 +.byte 102,15,56,0,227 + movdqu 48(%esi),%xmm7 +.byte 102,15,56,0,235 +.byte 102,15,56,0,243 +.byte 102,15,56,0,251 + jmp .L004loop_shaext +.align 16 +.L004loop_shaext: + decl %ecx + leal 64(%esi),%eax + movdqa %xmm1,(%esp) + paddd %xmm4,%xmm1 + cmovnel %eax,%esi + movdqa %xmm0,16(%esp) +.byte 15,56,201,229 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,0 +.byte 15,56,200,213 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 +.byte 15,56,202,231 + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,0 +.byte 15,56,200,206 + pxor %xmm7,%xmm5 +.byte 15,56,202,236 +.byte 15,56,201,247 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,0 +.byte 15,56,200,215 + pxor %xmm4,%xmm6 +.byte 15,56,201,252 +.byte 15,56,202,245 + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,0 +.byte 15,56,200,204 + pxor %xmm5,%xmm7 +.byte 15,56,202,254 +.byte 15,56,201,229 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,0 +.byte 15,56,200,213 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 +.byte 15,56,202,231 + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,1 +.byte 15,56,200,206 + pxor %xmm7,%xmm5 +.byte 15,56,202,236 +.byte 15,56,201,247 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,1 +.byte 15,56,200,215 + pxor %xmm4,%xmm6 +.byte 15,56,201,252 +.byte 15,56,202,245 + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,1 +.byte 15,56,200,204 + pxor %xmm5,%xmm7 +.byte 15,56,202,254 +.byte 15,56,201,229 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,1 +.byte 15,56,200,213 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 +.byte 15,56,202,231 + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,1 +.byte 15,56,200,206 + pxor %xmm7,%xmm5 +.byte 15,56,202,236 +.byte 15,56,201,247 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,2 +.byte 15,56,200,215 + pxor %xmm4,%xmm6 +.byte 15,56,201,252 +.byte 15,56,202,245 + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,2 +.byte 15,56,200,204 + pxor %xmm5,%xmm7 +.byte 15,56,202,254 +.byte 15,56,201,229 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,2 +.byte 15,56,200,213 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 +.byte 15,56,202,231 + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,2 +.byte 15,56,200,206 + pxor %xmm7,%xmm5 +.byte 15,56,202,236 +.byte 15,56,201,247 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,2 +.byte 15,56,200,215 + pxor %xmm4,%xmm6 +.byte 15,56,201,252 +.byte 15,56,202,245 + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,3 +.byte 15,56,200,204 + pxor %xmm5,%xmm7 +.byte 15,56,202,254 + movdqu (%esi),%xmm4 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,3 +.byte 15,56,200,213 + movdqu 16(%esi),%xmm5 +.byte 102,15,56,0,227 + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,3 +.byte 15,56,200,206 + movdqu 32(%esi),%xmm6 +.byte 102,15,56,0,235 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,3 +.byte 15,56,200,215 + movdqu 48(%esi),%xmm7 +.byte 102,15,56,0,243 + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,3 + movdqa (%esp),%xmm2 +.byte 102,15,56,0,251 +.byte 15,56,200,202 + paddd 16(%esp),%xmm0 + jnz .L004loop_shaext + pshufd $27,%xmm0,%xmm0 + pshufd $27,%xmm1,%xmm1 + movdqu %xmm0,(%edi) + movd %xmm1,16(%edi) + movl %ebx,%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size _sha1_block_data_order_shaext,.-_sha1_block_data_order_shaext +.hidden _sha1_block_data_order_ssse3 +.type _sha1_block_data_order_ssse3,@function +.align 16 +_sha1_block_data_order_ssse3: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + call .L005pic_point +.L005pic_point: + popl %ebp + leal .LK_XX_XX-.L005pic_point(%ebp),%ebp +.Lssse3_shortcut: + movdqa (%ebp),%xmm7 + movdqa 16(%ebp),%xmm0 + movdqa 32(%ebp),%xmm1 + movdqa 48(%ebp),%xmm2 + movdqa 64(%ebp),%xmm6 + movl 20(%esp),%edi + movl 24(%esp),%ebp + movl 28(%esp),%edx + movl %esp,%esi + subl $208,%esp + andl $-64,%esp + movdqa %xmm0,112(%esp) + movdqa %xmm1,128(%esp) + movdqa %xmm2,144(%esp) + shll $6,%edx + movdqa %xmm7,160(%esp) + addl %ebp,%edx + movdqa %xmm6,176(%esp) + addl $64,%ebp + movl %edi,192(%esp) + movl %ebp,196(%esp) + movl %edx,200(%esp) + movl %esi,204(%esp) + movl (%edi),%eax + movl 4(%edi),%ebx + movl 8(%edi),%ecx + movl 12(%edi),%edx + movl 16(%edi),%edi + movl %ebx,%esi + movdqu -64(%ebp),%xmm0 + movdqu -48(%ebp),%xmm1 + movdqu -32(%ebp),%xmm2 + movdqu -16(%ebp),%xmm3 +.byte 102,15,56,0,198 +.byte 102,15,56,0,206 +.byte 102,15,56,0,214 + movdqa %xmm7,96(%esp) +.byte 102,15,56,0,222 + paddd %xmm7,%xmm0 + paddd %xmm7,%xmm1 + paddd %xmm7,%xmm2 + movdqa %xmm0,(%esp) + psubd %xmm7,%xmm0 + movdqa %xmm1,16(%esp) + psubd %xmm7,%xmm1 + movdqa %xmm2,32(%esp) + movl %ecx,%ebp + psubd %xmm7,%xmm2 + xorl %edx,%ebp + pshufd $238,%xmm0,%xmm4 + andl %ebp,%esi + jmp .L006loop +.align 16 +.L006loop: + rorl $2,%ebx + xorl %edx,%esi + movl %eax,%ebp + punpcklqdq %xmm1,%xmm4 + movdqa %xmm3,%xmm6 + addl (%esp),%edi + xorl %ecx,%ebx + paddd %xmm3,%xmm7 + movdqa %xmm0,64(%esp) + roll $5,%eax + addl %esi,%edi + psrldq $4,%xmm6 + andl %ebx,%ebp + xorl %ecx,%ebx + pxor %xmm0,%xmm4 + addl %eax,%edi + rorl $7,%eax + pxor %xmm2,%xmm6 + xorl %ecx,%ebp + movl %edi,%esi + addl 4(%esp),%edx + pxor %xmm6,%xmm4 + xorl %ebx,%eax + roll $5,%edi + movdqa %xmm7,48(%esp) + addl %ebp,%edx + andl %eax,%esi + movdqa %xmm4,%xmm0 + xorl %ebx,%eax + addl %edi,%edx + rorl $7,%edi + movdqa %xmm4,%xmm6 + xorl %ebx,%esi + pslldq $12,%xmm0 + paddd %xmm4,%xmm4 + movl %edx,%ebp + addl 8(%esp),%ecx + psrld $31,%xmm6 + xorl %eax,%edi + roll $5,%edx + movdqa %xmm0,%xmm7 + addl %esi,%ecx + andl %edi,%ebp + xorl %eax,%edi + psrld $30,%xmm0 + addl %edx,%ecx + rorl $7,%edx + por %xmm6,%xmm4 + xorl %eax,%ebp + movl %ecx,%esi + addl 12(%esp),%ebx + pslld $2,%xmm7 + xorl %edi,%edx + roll $5,%ecx + pxor %xmm0,%xmm4 + movdqa 96(%esp),%xmm0 + addl %ebp,%ebx + andl %edx,%esi + pxor %xmm7,%xmm4 + pshufd $238,%xmm1,%xmm5 + xorl %edi,%edx + addl %ecx,%ebx + rorl $7,%ecx + xorl %edi,%esi + movl %ebx,%ebp + punpcklqdq %xmm2,%xmm5 + movdqa %xmm4,%xmm7 + addl 16(%esp),%eax + xorl %edx,%ecx + paddd %xmm4,%xmm0 + movdqa %xmm1,80(%esp) + roll $5,%ebx + addl %esi,%eax + psrldq $4,%xmm7 + andl %ecx,%ebp + xorl %edx,%ecx + pxor %xmm1,%xmm5 + addl %ebx,%eax + rorl $7,%ebx + pxor %xmm3,%xmm7 + xorl %edx,%ebp + movl %eax,%esi + addl 20(%esp),%edi + pxor %xmm7,%xmm5 + xorl %ecx,%ebx + roll $5,%eax + movdqa %xmm0,(%esp) + addl %ebp,%edi + andl %ebx,%esi + movdqa %xmm5,%xmm1 + xorl %ecx,%ebx + addl %eax,%edi + rorl $7,%eax + movdqa %xmm5,%xmm7 + xorl %ecx,%esi + pslldq $12,%xmm1 + paddd %xmm5,%xmm5 + movl %edi,%ebp + addl 24(%esp),%edx + psrld $31,%xmm7 + xorl %ebx,%eax + roll $5,%edi + movdqa %xmm1,%xmm0 + addl %esi,%edx + andl %eax,%ebp + xorl %ebx,%eax + psrld $30,%xmm1 + addl %edi,%edx + rorl $7,%edi + por %xmm7,%xmm5 + xorl %ebx,%ebp + movl %edx,%esi + addl 28(%esp),%ecx + pslld $2,%xmm0 + xorl %eax,%edi + roll $5,%edx + pxor %xmm1,%xmm5 + movdqa 112(%esp),%xmm1 + addl %ebp,%ecx + andl %edi,%esi + pxor %xmm0,%xmm5 + pshufd $238,%xmm2,%xmm6 + xorl %eax,%edi + addl %edx,%ecx + rorl $7,%edx + xorl %eax,%esi + movl %ecx,%ebp + punpcklqdq %xmm3,%xmm6 + movdqa %xmm5,%xmm0 + addl 32(%esp),%ebx + xorl %edi,%edx + paddd %xmm5,%xmm1 + movdqa %xmm2,96(%esp) + roll $5,%ecx + addl %esi,%ebx + psrldq $4,%xmm0 + andl %edx,%ebp + xorl %edi,%edx + pxor %xmm2,%xmm6 + addl %ecx,%ebx + rorl $7,%ecx + pxor %xmm4,%xmm0 + xorl %edi,%ebp + movl %ebx,%esi + addl 36(%esp),%eax + pxor %xmm0,%xmm6 + xorl %edx,%ecx + roll $5,%ebx + movdqa %xmm1,16(%esp) + addl %ebp,%eax + andl %ecx,%esi + movdqa %xmm6,%xmm2 + xorl %edx,%ecx + addl %ebx,%eax + rorl $7,%ebx + movdqa %xmm6,%xmm0 + xorl %edx,%esi + pslldq $12,%xmm2 + paddd %xmm6,%xmm6 + movl %eax,%ebp + addl 40(%esp),%edi + psrld $31,%xmm0 + xorl %ecx,%ebx + roll $5,%eax + movdqa %xmm2,%xmm1 + addl %esi,%edi + andl %ebx,%ebp + xorl %ecx,%ebx + psrld $30,%xmm2 + addl %eax,%edi + rorl $7,%eax + por %xmm0,%xmm6 + xorl %ecx,%ebp + movdqa 64(%esp),%xmm0 + movl %edi,%esi + addl 44(%esp),%edx + pslld $2,%xmm1 + xorl %ebx,%eax + roll $5,%edi + pxor %xmm2,%xmm6 + movdqa 112(%esp),%xmm2 + addl %ebp,%edx + andl %eax,%esi + pxor %xmm1,%xmm6 + pshufd $238,%xmm3,%xmm7 + xorl %ebx,%eax + addl %edi,%edx + rorl $7,%edi + xorl %ebx,%esi + movl %edx,%ebp + punpcklqdq %xmm4,%xmm7 + movdqa %xmm6,%xmm1 + addl 48(%esp),%ecx + xorl %eax,%edi + paddd %xmm6,%xmm2 + movdqa %xmm3,64(%esp) + roll $5,%edx + addl %esi,%ecx + psrldq $4,%xmm1 + andl %edi,%ebp + xorl %eax,%edi + pxor %xmm3,%xmm7 + addl %edx,%ecx + rorl $7,%edx + pxor %xmm5,%xmm1 + xorl %eax,%ebp + movl %ecx,%esi + addl 52(%esp),%ebx + pxor %xmm1,%xmm7 + xorl %edi,%edx + roll $5,%ecx + movdqa %xmm2,32(%esp) + addl %ebp,%ebx + andl %edx,%esi + movdqa %xmm7,%xmm3 + xorl %edi,%edx + addl %ecx,%ebx + rorl $7,%ecx + movdqa %xmm7,%xmm1 + xorl %edi,%esi + pslldq $12,%xmm3 + paddd %xmm7,%xmm7 + movl %ebx,%ebp + addl 56(%esp),%eax + psrld $31,%xmm1 + xorl %edx,%ecx + roll $5,%ebx + movdqa %xmm3,%xmm2 + addl %esi,%eax + andl %ecx,%ebp + xorl %edx,%ecx + psrld $30,%xmm3 + addl %ebx,%eax + rorl $7,%ebx + por %xmm1,%xmm7 + xorl %edx,%ebp + movdqa 80(%esp),%xmm1 + movl %eax,%esi + addl 60(%esp),%edi + pslld $2,%xmm2 + xorl %ecx,%ebx + roll $5,%eax + pxor %xmm3,%xmm7 + movdqa 112(%esp),%xmm3 + addl %ebp,%edi + andl %ebx,%esi + pxor %xmm2,%xmm7 + pshufd $238,%xmm6,%xmm2 + xorl %ecx,%ebx + addl %eax,%edi + rorl $7,%eax + pxor %xmm4,%xmm0 + punpcklqdq %xmm7,%xmm2 + xorl %ecx,%esi + movl %edi,%ebp + addl (%esp),%edx + pxor %xmm1,%xmm0 + movdqa %xmm4,80(%esp) + xorl %ebx,%eax + roll $5,%edi + movdqa %xmm3,%xmm4 + addl %esi,%edx + paddd %xmm7,%xmm3 + andl %eax,%ebp + pxor %xmm2,%xmm0 + xorl %ebx,%eax + addl %edi,%edx + rorl $7,%edi + xorl %ebx,%ebp + movdqa %xmm0,%xmm2 + movdqa %xmm3,48(%esp) + movl %edx,%esi + addl 4(%esp),%ecx + xorl %eax,%edi + roll $5,%edx + pslld $2,%xmm0 + addl %ebp,%ecx + andl %edi,%esi + psrld $30,%xmm2 + xorl %eax,%edi + addl %edx,%ecx + rorl $7,%edx + xorl %eax,%esi + movl %ecx,%ebp + addl 8(%esp),%ebx + xorl %edi,%edx + roll $5,%ecx + por %xmm2,%xmm0 + addl %esi,%ebx + andl %edx,%ebp + movdqa 96(%esp),%xmm2 + xorl %edi,%edx + addl %ecx,%ebx + addl 12(%esp),%eax + xorl %edi,%ebp + movl %ebx,%esi + pshufd $238,%xmm7,%xmm3 + roll $5,%ebx + addl %ebp,%eax + xorl %edx,%esi + rorl $7,%ecx + addl %ebx,%eax + addl 16(%esp),%edi + pxor %xmm5,%xmm1 + punpcklqdq %xmm0,%xmm3 + xorl %ecx,%esi + movl %eax,%ebp + roll $5,%eax + pxor %xmm2,%xmm1 + movdqa %xmm5,96(%esp) + addl %esi,%edi + xorl %ecx,%ebp + movdqa %xmm4,%xmm5 + rorl $7,%ebx + paddd %xmm0,%xmm4 + addl %eax,%edi + pxor %xmm3,%xmm1 + addl 20(%esp),%edx + xorl %ebx,%ebp + movl %edi,%esi + roll $5,%edi + movdqa %xmm1,%xmm3 + movdqa %xmm4,(%esp) + addl %ebp,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %edi,%edx + pslld $2,%xmm1 + addl 24(%esp),%ecx + xorl %eax,%esi + psrld $30,%xmm3 + movl %edx,%ebp + roll $5,%edx + addl %esi,%ecx + xorl %eax,%ebp + rorl $7,%edi + addl %edx,%ecx + por %xmm3,%xmm1 + addl 28(%esp),%ebx + xorl %edi,%ebp + movdqa 64(%esp),%xmm3 + movl %ecx,%esi + roll $5,%ecx + addl %ebp,%ebx + xorl %edi,%esi + rorl $7,%edx + pshufd $238,%xmm0,%xmm4 + addl %ecx,%ebx + addl 32(%esp),%eax + pxor %xmm6,%xmm2 + punpcklqdq %xmm1,%xmm4 + xorl %edx,%esi + movl %ebx,%ebp + roll $5,%ebx + pxor %xmm3,%xmm2 + movdqa %xmm6,64(%esp) + addl %esi,%eax + xorl %edx,%ebp + movdqa 128(%esp),%xmm6 + rorl $7,%ecx + paddd %xmm1,%xmm5 + addl %ebx,%eax + pxor %xmm4,%xmm2 + addl 36(%esp),%edi + xorl %ecx,%ebp + movl %eax,%esi + roll $5,%eax + movdqa %xmm2,%xmm4 + movdqa %xmm5,16(%esp) + addl %ebp,%edi + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%edi + pslld $2,%xmm2 + addl 40(%esp),%edx + xorl %ebx,%esi + psrld $30,%xmm4 + movl %edi,%ebp + roll $5,%edi + addl %esi,%edx + xorl %ebx,%ebp + rorl $7,%eax + addl %edi,%edx + por %xmm4,%xmm2 + addl 44(%esp),%ecx + xorl %eax,%ebp + movdqa 80(%esp),%xmm4 + movl %edx,%esi + roll $5,%edx + addl %ebp,%ecx + xorl %eax,%esi + rorl $7,%edi + pshufd $238,%xmm1,%xmm5 + addl %edx,%ecx + addl 48(%esp),%ebx + pxor %xmm7,%xmm3 + punpcklqdq %xmm2,%xmm5 + xorl %edi,%esi + movl %ecx,%ebp + roll $5,%ecx + pxor %xmm4,%xmm3 + movdqa %xmm7,80(%esp) + addl %esi,%ebx + xorl %edi,%ebp + movdqa %xmm6,%xmm7 + rorl $7,%edx + paddd %xmm2,%xmm6 + addl %ecx,%ebx + pxor %xmm5,%xmm3 + addl 52(%esp),%eax + xorl %edx,%ebp + movl %ebx,%esi + roll $5,%ebx + movdqa %xmm3,%xmm5 + movdqa %xmm6,32(%esp) + addl %ebp,%eax + xorl %edx,%esi + rorl $7,%ecx + addl %ebx,%eax + pslld $2,%xmm3 + addl 56(%esp),%edi + xorl %ecx,%esi + psrld $30,%xmm5 + movl %eax,%ebp + roll $5,%eax + addl %esi,%edi + xorl %ecx,%ebp + rorl $7,%ebx + addl %eax,%edi + por %xmm5,%xmm3 + addl 60(%esp),%edx + xorl %ebx,%ebp + movdqa 96(%esp),%xmm5 + movl %edi,%esi + roll $5,%edi + addl %ebp,%edx + xorl %ebx,%esi + rorl $7,%eax + pshufd $238,%xmm2,%xmm6 + addl %edi,%edx + addl (%esp),%ecx + pxor %xmm0,%xmm4 + punpcklqdq %xmm3,%xmm6 + xorl %eax,%esi + movl %edx,%ebp + roll $5,%edx + pxor %xmm5,%xmm4 + movdqa %xmm0,96(%esp) + addl %esi,%ecx + xorl %eax,%ebp + movdqa %xmm7,%xmm0 + rorl $7,%edi + paddd %xmm3,%xmm7 + addl %edx,%ecx + pxor %xmm6,%xmm4 + addl 4(%esp),%ebx + xorl %edi,%ebp + movl %ecx,%esi + roll $5,%ecx + movdqa %xmm4,%xmm6 + movdqa %xmm7,48(%esp) + addl %ebp,%ebx + xorl %edi,%esi + rorl $7,%edx + addl %ecx,%ebx + pslld $2,%xmm4 + addl 8(%esp),%eax + xorl %edx,%esi + psrld $30,%xmm6 + movl %ebx,%ebp + roll $5,%ebx + addl %esi,%eax + xorl %edx,%ebp + rorl $7,%ecx + addl %ebx,%eax + por %xmm6,%xmm4 + addl 12(%esp),%edi + xorl %ecx,%ebp + movdqa 64(%esp),%xmm6 + movl %eax,%esi + roll $5,%eax + addl %ebp,%edi + xorl %ecx,%esi + rorl $7,%ebx + pshufd $238,%xmm3,%xmm7 + addl %eax,%edi + addl 16(%esp),%edx + pxor %xmm1,%xmm5 + punpcklqdq %xmm4,%xmm7 + xorl %ebx,%esi + movl %edi,%ebp + roll $5,%edi + pxor %xmm6,%xmm5 + movdqa %xmm1,64(%esp) + addl %esi,%edx + xorl %ebx,%ebp + movdqa %xmm0,%xmm1 + rorl $7,%eax + paddd %xmm4,%xmm0 + addl %edi,%edx + pxor %xmm7,%xmm5 + addl 20(%esp),%ecx + xorl %eax,%ebp + movl %edx,%esi + roll $5,%edx + movdqa %xmm5,%xmm7 + movdqa %xmm0,(%esp) + addl %ebp,%ecx + xorl %eax,%esi + rorl $7,%edi + addl %edx,%ecx + pslld $2,%xmm5 + addl 24(%esp),%ebx + xorl %edi,%esi + psrld $30,%xmm7 + movl %ecx,%ebp + roll $5,%ecx + addl %esi,%ebx + xorl %edi,%ebp + rorl $7,%edx + addl %ecx,%ebx + por %xmm7,%xmm5 + addl 28(%esp),%eax + movdqa 80(%esp),%xmm7 + rorl $7,%ecx + movl %ebx,%esi + xorl %edx,%ebp + roll $5,%ebx + pshufd $238,%xmm4,%xmm0 + addl %ebp,%eax + xorl %ecx,%esi + xorl %edx,%ecx + addl %ebx,%eax + addl 32(%esp),%edi + pxor %xmm2,%xmm6 + punpcklqdq %xmm5,%xmm0 + andl %ecx,%esi + xorl %edx,%ecx + rorl $7,%ebx + pxor %xmm7,%xmm6 + movdqa %xmm2,80(%esp) + movl %eax,%ebp + xorl %ecx,%esi + roll $5,%eax + movdqa %xmm1,%xmm2 + addl %esi,%edi + paddd %xmm5,%xmm1 + xorl %ebx,%ebp + pxor %xmm0,%xmm6 + xorl %ecx,%ebx + addl %eax,%edi + addl 36(%esp),%edx + andl %ebx,%ebp + movdqa %xmm6,%xmm0 + movdqa %xmm1,16(%esp) + xorl %ecx,%ebx + rorl $7,%eax + movl %edi,%esi + xorl %ebx,%ebp + roll $5,%edi + pslld $2,%xmm6 + addl %ebp,%edx + xorl %eax,%esi + psrld $30,%xmm0 + xorl %ebx,%eax + addl %edi,%edx + addl 40(%esp),%ecx + andl %eax,%esi + xorl %ebx,%eax + rorl $7,%edi + por %xmm0,%xmm6 + movl %edx,%ebp + xorl %eax,%esi + movdqa 96(%esp),%xmm0 + roll $5,%edx + addl %esi,%ecx + xorl %edi,%ebp + xorl %eax,%edi + addl %edx,%ecx + pshufd $238,%xmm5,%xmm1 + addl 44(%esp),%ebx + andl %edi,%ebp + xorl %eax,%edi + rorl $7,%edx + movl %ecx,%esi + xorl %edi,%ebp + roll $5,%ecx + addl %ebp,%ebx + xorl %edx,%esi + xorl %edi,%edx + addl %ecx,%ebx + addl 48(%esp),%eax + pxor %xmm3,%xmm7 + punpcklqdq %xmm6,%xmm1 + andl %edx,%esi + xorl %edi,%edx + rorl $7,%ecx + pxor %xmm0,%xmm7 + movdqa %xmm3,96(%esp) + movl %ebx,%ebp + xorl %edx,%esi + roll $5,%ebx + movdqa 144(%esp),%xmm3 + addl %esi,%eax + paddd %xmm6,%xmm2 + xorl %ecx,%ebp + pxor %xmm1,%xmm7 + xorl %edx,%ecx + addl %ebx,%eax + addl 52(%esp),%edi + andl %ecx,%ebp + movdqa %xmm7,%xmm1 + movdqa %xmm2,32(%esp) + xorl %edx,%ecx + rorl $7,%ebx + movl %eax,%esi + xorl %ecx,%ebp + roll $5,%eax + pslld $2,%xmm7 + addl %ebp,%edi + xorl %ebx,%esi + psrld $30,%xmm1 + xorl %ecx,%ebx + addl %eax,%edi + addl 56(%esp),%edx + andl %ebx,%esi + xorl %ecx,%ebx + rorl $7,%eax + por %xmm1,%xmm7 + movl %edi,%ebp + xorl %ebx,%esi + movdqa 64(%esp),%xmm1 + roll $5,%edi + addl %esi,%edx + xorl %eax,%ebp + xorl %ebx,%eax + addl %edi,%edx + pshufd $238,%xmm6,%xmm2 + addl 60(%esp),%ecx + andl %eax,%ebp + xorl %ebx,%eax + rorl $7,%edi + movl %edx,%esi + xorl %eax,%ebp + roll $5,%edx + addl %ebp,%ecx + xorl %edi,%esi + xorl %eax,%edi + addl %edx,%ecx + addl (%esp),%ebx + pxor %xmm4,%xmm0 + punpcklqdq %xmm7,%xmm2 + andl %edi,%esi + xorl %eax,%edi + rorl $7,%edx + pxor %xmm1,%xmm0 + movdqa %xmm4,64(%esp) + movl %ecx,%ebp + xorl %edi,%esi + roll $5,%ecx + movdqa %xmm3,%xmm4 + addl %esi,%ebx + paddd %xmm7,%xmm3 + xorl %edx,%ebp + pxor %xmm2,%xmm0 + xorl %edi,%edx + addl %ecx,%ebx + addl 4(%esp),%eax + andl %edx,%ebp + movdqa %xmm0,%xmm2 + movdqa %xmm3,48(%esp) + xorl %edi,%edx + rorl $7,%ecx + movl %ebx,%esi + xorl %edx,%ebp + roll $5,%ebx + pslld $2,%xmm0 + addl %ebp,%eax + xorl %ecx,%esi + psrld $30,%xmm2 + xorl %edx,%ecx + addl %ebx,%eax + addl 8(%esp),%edi + andl %ecx,%esi + xorl %edx,%ecx + rorl $7,%ebx + por %xmm2,%xmm0 + movl %eax,%ebp + xorl %ecx,%esi + movdqa 80(%esp),%xmm2 + roll $5,%eax + addl %esi,%edi + xorl %ebx,%ebp + xorl %ecx,%ebx + addl %eax,%edi + pshufd $238,%xmm7,%xmm3 + addl 12(%esp),%edx + andl %ebx,%ebp + xorl %ecx,%ebx + rorl $7,%eax + movl %edi,%esi + xorl %ebx,%ebp + roll $5,%edi + addl %ebp,%edx + xorl %eax,%esi + xorl %ebx,%eax + addl %edi,%edx + addl 16(%esp),%ecx + pxor %xmm5,%xmm1 + punpcklqdq %xmm0,%xmm3 + andl %eax,%esi + xorl %ebx,%eax + rorl $7,%edi + pxor %xmm2,%xmm1 + movdqa %xmm5,80(%esp) + movl %edx,%ebp + xorl %eax,%esi + roll $5,%edx + movdqa %xmm4,%xmm5 + addl %esi,%ecx + paddd %xmm0,%xmm4 + xorl %edi,%ebp + pxor %xmm3,%xmm1 + xorl %eax,%edi + addl %edx,%ecx + addl 20(%esp),%ebx + andl %edi,%ebp + movdqa %xmm1,%xmm3 + movdqa %xmm4,(%esp) + xorl %eax,%edi + rorl $7,%edx + movl %ecx,%esi + xorl %edi,%ebp + roll $5,%ecx + pslld $2,%xmm1 + addl %ebp,%ebx + xorl %edx,%esi + psrld $30,%xmm3 + xorl %edi,%edx + addl %ecx,%ebx + addl 24(%esp),%eax + andl %edx,%esi + xorl %edi,%edx + rorl $7,%ecx + por %xmm3,%xmm1 + movl %ebx,%ebp + xorl %edx,%esi + movdqa 96(%esp),%xmm3 + roll $5,%ebx + addl %esi,%eax + xorl %ecx,%ebp + xorl %edx,%ecx + addl %ebx,%eax + pshufd $238,%xmm0,%xmm4 + addl 28(%esp),%edi + andl %ecx,%ebp + xorl %edx,%ecx + rorl $7,%ebx + movl %eax,%esi + xorl %ecx,%ebp + roll $5,%eax + addl %ebp,%edi + xorl %ebx,%esi + xorl %ecx,%ebx + addl %eax,%edi + addl 32(%esp),%edx + pxor %xmm6,%xmm2 + punpcklqdq %xmm1,%xmm4 + andl %ebx,%esi + xorl %ecx,%ebx + rorl $7,%eax + pxor %xmm3,%xmm2 + movdqa %xmm6,96(%esp) + movl %edi,%ebp + xorl %ebx,%esi + roll $5,%edi + movdqa %xmm5,%xmm6 + addl %esi,%edx + paddd %xmm1,%xmm5 + xorl %eax,%ebp + pxor %xmm4,%xmm2 + xorl %ebx,%eax + addl %edi,%edx + addl 36(%esp),%ecx + andl %eax,%ebp + movdqa %xmm2,%xmm4 + movdqa %xmm5,16(%esp) + xorl %ebx,%eax + rorl $7,%edi + movl %edx,%esi + xorl %eax,%ebp + roll $5,%edx + pslld $2,%xmm2 + addl %ebp,%ecx + xorl %edi,%esi + psrld $30,%xmm4 + xorl %eax,%edi + addl %edx,%ecx + addl 40(%esp),%ebx + andl %edi,%esi + xorl %eax,%edi + rorl $7,%edx + por %xmm4,%xmm2 + movl %ecx,%ebp + xorl %edi,%esi + movdqa 64(%esp),%xmm4 + roll $5,%ecx + addl %esi,%ebx + xorl %edx,%ebp + xorl %edi,%edx + addl %ecx,%ebx + pshufd $238,%xmm1,%xmm5 + addl 44(%esp),%eax + andl %edx,%ebp + xorl %edi,%edx + rorl $7,%ecx + movl %ebx,%esi + xorl %edx,%ebp + roll $5,%ebx + addl %ebp,%eax + xorl %edx,%esi + addl %ebx,%eax + addl 48(%esp),%edi + pxor %xmm7,%xmm3 + punpcklqdq %xmm2,%xmm5 + xorl %ecx,%esi + movl %eax,%ebp + roll $5,%eax + pxor %xmm4,%xmm3 + movdqa %xmm7,64(%esp) + addl %esi,%edi + xorl %ecx,%ebp + movdqa %xmm6,%xmm7 + rorl $7,%ebx + paddd %xmm2,%xmm6 + addl %eax,%edi + pxor %xmm5,%xmm3 + addl 52(%esp),%edx + xorl %ebx,%ebp + movl %edi,%esi + roll $5,%edi + movdqa %xmm3,%xmm5 + movdqa %xmm6,32(%esp) + addl %ebp,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %edi,%edx + pslld $2,%xmm3 + addl 56(%esp),%ecx + xorl %eax,%esi + psrld $30,%xmm5 + movl %edx,%ebp + roll $5,%edx + addl %esi,%ecx + xorl %eax,%ebp + rorl $7,%edi + addl %edx,%ecx + por %xmm5,%xmm3 + addl 60(%esp),%ebx + xorl %edi,%ebp + movl %ecx,%esi + roll $5,%ecx + addl %ebp,%ebx + xorl %edi,%esi + rorl $7,%edx + addl %ecx,%ebx + addl (%esp),%eax + xorl %edx,%esi + movl %ebx,%ebp + roll $5,%ebx + addl %esi,%eax + xorl %edx,%ebp + rorl $7,%ecx + paddd %xmm3,%xmm7 + addl %ebx,%eax + addl 4(%esp),%edi + xorl %ecx,%ebp + movl %eax,%esi + movdqa %xmm7,48(%esp) + roll $5,%eax + addl %ebp,%edi + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%edi + addl 8(%esp),%edx + xorl %ebx,%esi + movl %edi,%ebp + roll $5,%edi + addl %esi,%edx + xorl %ebx,%ebp + rorl $7,%eax + addl %edi,%edx + addl 12(%esp),%ecx + xorl %eax,%ebp + movl %edx,%esi + roll $5,%edx + addl %ebp,%ecx + xorl %eax,%esi + rorl $7,%edi + addl %edx,%ecx + movl 196(%esp),%ebp + cmpl 200(%esp),%ebp + je .L007done + movdqa 160(%esp),%xmm7 + movdqa 176(%esp),%xmm6 + movdqu (%ebp),%xmm0 + movdqu 16(%ebp),%xmm1 + movdqu 32(%ebp),%xmm2 + movdqu 48(%ebp),%xmm3 + addl $64,%ebp +.byte 102,15,56,0,198 + movl %ebp,196(%esp) + movdqa %xmm7,96(%esp) + addl 16(%esp),%ebx + xorl %edi,%esi + movl %ecx,%ebp + roll $5,%ecx + addl %esi,%ebx + xorl %edi,%ebp + rorl $7,%edx +.byte 102,15,56,0,206 + addl %ecx,%ebx + addl 20(%esp),%eax + xorl %edx,%ebp + movl %ebx,%esi + paddd %xmm7,%xmm0 + roll $5,%ebx + addl %ebp,%eax + xorl %edx,%esi + rorl $7,%ecx + movdqa %xmm0,(%esp) + addl %ebx,%eax + addl 24(%esp),%edi + xorl %ecx,%esi + movl %eax,%ebp + psubd %xmm7,%xmm0 + roll $5,%eax + addl %esi,%edi + xorl %ecx,%ebp + rorl $7,%ebx + addl %eax,%edi + addl 28(%esp),%edx + xorl %ebx,%ebp + movl %edi,%esi + roll $5,%edi + addl %ebp,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %edi,%edx + addl 32(%esp),%ecx + xorl %eax,%esi + movl %edx,%ebp + roll $5,%edx + addl %esi,%ecx + xorl %eax,%ebp + rorl $7,%edi +.byte 102,15,56,0,214 + addl %edx,%ecx + addl 36(%esp),%ebx + xorl %edi,%ebp + movl %ecx,%esi + paddd %xmm7,%xmm1 + roll $5,%ecx + addl %ebp,%ebx + xorl %edi,%esi + rorl $7,%edx + movdqa %xmm1,16(%esp) + addl %ecx,%ebx + addl 40(%esp),%eax + xorl %edx,%esi + movl %ebx,%ebp + psubd %xmm7,%xmm1 + roll $5,%ebx + addl %esi,%eax + xorl %edx,%ebp + rorl $7,%ecx + addl %ebx,%eax + addl 44(%esp),%edi + xorl %ecx,%ebp + movl %eax,%esi + roll $5,%eax + addl %ebp,%edi + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%edi + addl 48(%esp),%edx + xorl %ebx,%esi + movl %edi,%ebp + roll $5,%edi + addl %esi,%edx + xorl %ebx,%ebp + rorl $7,%eax +.byte 102,15,56,0,222 + addl %edi,%edx + addl 52(%esp),%ecx + xorl %eax,%ebp + movl %edx,%esi + paddd %xmm7,%xmm2 + roll $5,%edx + addl %ebp,%ecx + xorl %eax,%esi + rorl $7,%edi + movdqa %xmm2,32(%esp) + addl %edx,%ecx + addl 56(%esp),%ebx + xorl %edi,%esi + movl %ecx,%ebp + psubd %xmm7,%xmm2 + roll $5,%ecx + addl %esi,%ebx + xorl %edi,%ebp + rorl $7,%edx + addl %ecx,%ebx + addl 60(%esp),%eax + xorl %edx,%ebp + movl %ebx,%esi + roll $5,%ebx + addl %ebp,%eax + rorl $7,%ecx + addl %ebx,%eax + movl 192(%esp),%ebp + addl (%ebp),%eax + addl 4(%ebp),%esi + addl 8(%ebp),%ecx + movl %eax,(%ebp) + addl 12(%ebp),%edx + movl %esi,4(%ebp) + addl 16(%ebp),%edi + movl %ecx,8(%ebp) + movl %ecx,%ebx + movl %edx,12(%ebp) + xorl %edx,%ebx + movl %edi,16(%ebp) + movl %esi,%ebp + pshufd $238,%xmm0,%xmm4 + andl %ebx,%esi + movl %ebp,%ebx + jmp .L006loop +.align 16 +.L007done: + addl 16(%esp),%ebx + xorl %edi,%esi + movl %ecx,%ebp + roll $5,%ecx + addl %esi,%ebx + xorl %edi,%ebp + rorl $7,%edx + addl %ecx,%ebx + addl 20(%esp),%eax + xorl %edx,%ebp + movl %ebx,%esi + roll $5,%ebx + addl %ebp,%eax + xorl %edx,%esi + rorl $7,%ecx + addl %ebx,%eax + addl 24(%esp),%edi + xorl %ecx,%esi + movl %eax,%ebp + roll $5,%eax + addl %esi,%edi + xorl %ecx,%ebp + rorl $7,%ebx + addl %eax,%edi + addl 28(%esp),%edx + xorl %ebx,%ebp + movl %edi,%esi + roll $5,%edi + addl %ebp,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %edi,%edx + addl 32(%esp),%ecx + xorl %eax,%esi + movl %edx,%ebp + roll $5,%edx + addl %esi,%ecx + xorl %eax,%ebp + rorl $7,%edi + addl %edx,%ecx + addl 36(%esp),%ebx + xorl %edi,%ebp + movl %ecx,%esi + roll $5,%ecx + addl %ebp,%ebx + xorl %edi,%esi + rorl $7,%edx + addl %ecx,%ebx + addl 40(%esp),%eax + xorl %edx,%esi + movl %ebx,%ebp + roll $5,%ebx + addl %esi,%eax + xorl %edx,%ebp + rorl $7,%ecx + addl %ebx,%eax + addl 44(%esp),%edi + xorl %ecx,%ebp + movl %eax,%esi + roll $5,%eax + addl %ebp,%edi + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%edi + addl 48(%esp),%edx + xorl %ebx,%esi + movl %edi,%ebp + roll $5,%edi + addl %esi,%edx + xorl %ebx,%ebp + rorl $7,%eax + addl %edi,%edx + addl 52(%esp),%ecx + xorl %eax,%ebp + movl %edx,%esi + roll $5,%edx + addl %ebp,%ecx + xorl %eax,%esi + rorl $7,%edi + addl %edx,%ecx + addl 56(%esp),%ebx + xorl %edi,%esi + movl %ecx,%ebp + roll $5,%ecx + addl %esi,%ebx + xorl %edi,%ebp + rorl $7,%edx + addl %ecx,%ebx + addl 60(%esp),%eax + xorl %edx,%ebp + movl %ebx,%esi + roll $5,%ebx + addl %ebp,%eax + rorl $7,%ecx + addl %ebx,%eax + movl 192(%esp),%ebp + addl (%ebp),%eax + movl 204(%esp),%esp + addl 4(%ebp),%esi + addl 8(%ebp),%ecx + movl %eax,(%ebp) + addl 12(%ebp),%edx + movl %esi,4(%ebp) + addl 16(%ebp),%edi + movl %ecx,8(%ebp) + movl %edx,12(%ebp) + movl %edi,16(%ebp) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size _sha1_block_data_order_ssse3,.-_sha1_block_data_order_ssse3 +.align 64 +.LK_XX_XX: +.long 1518500249,1518500249,1518500249,1518500249 +.long 1859775393,1859775393,1859775393,1859775393 +.long 2400959708,2400959708,2400959708,2400959708 +.long 3395469782,3395469782,3395469782,3395469782 +.long 66051,67438087,134810123,202182159 +.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +.byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115 +.byte 102,111,114,109,32,102,111,114,32,120,56,54,44,32,67,82 +.byte 89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112 +.byte 114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +#endif diff --git a/third_party/boringssl/linux-x86/crypto/sha/sha256-586.S b/third_party/boringssl/linux-x86/crypto/sha/sha256-586.S new file mode 100644 index 00000000000..08d948432bc --- /dev/null +++ b/third_party/boringssl/linux-x86/crypto/sha/sha256-586.S @@ -0,0 +1,4579 @@ +#if defined(__i386__) +.file "sha512-586.S" +.text +.globl sha256_block_data_order +.hidden sha256_block_data_order +.type sha256_block_data_order,@function +.align 16 +sha256_block_data_order: +.L_sha256_block_data_order_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl %esp,%ebx + call .L000pic_point +.L000pic_point: + popl %ebp + leal .L001K256-.L000pic_point(%ebp),%ebp + subl $16,%esp + andl $-64,%esp + shll $6,%eax + addl %edi,%eax + movl %esi,(%esp) + movl %edi,4(%esp) + movl %eax,8(%esp) + movl %ebx,12(%esp) + leal OPENSSL_ia32cap_P-.L001K256(%ebp),%edx + movl (%edx),%ecx + movl 4(%edx),%ebx + testl $1048576,%ecx + jnz .L002loop + movl 8(%edx),%edx + testl $16777216,%ecx + jz .L003no_xmm + andl $1073741824,%ecx + andl $268435968,%ebx + testl $536870912,%edx + jnz .L004shaext + orl %ebx,%ecx + andl $1342177280,%ecx + cmpl $1342177280,%ecx + testl $512,%ebx + jnz .L005SSSE3 +.L003no_xmm: + subl %edi,%eax + cmpl $256,%eax + jae .L006unrolled + jmp .L002loop +.align 16 +.L002loop: + movl (%edi),%eax + movl 4(%edi),%ebx + movl 8(%edi),%ecx + bswap %eax + movl 12(%edi),%edx + bswap %ebx + pushl %eax + bswap %ecx + pushl %ebx + bswap %edx + pushl %ecx + pushl %edx + movl 16(%edi),%eax + movl 20(%edi),%ebx + movl 24(%edi),%ecx + bswap %eax + movl 28(%edi),%edx + bswap %ebx + pushl %eax + bswap %ecx + pushl %ebx + bswap %edx + pushl %ecx + pushl %edx + movl 32(%edi),%eax + movl 36(%edi),%ebx + movl 40(%edi),%ecx + bswap %eax + movl 44(%edi),%edx + bswap %ebx + pushl %eax + bswap %ecx + pushl %ebx + bswap %edx + pushl %ecx + pushl %edx + movl 48(%edi),%eax + movl 52(%edi),%ebx + movl 56(%edi),%ecx + bswap %eax + movl 60(%edi),%edx + bswap %ebx + pushl %eax + bswap %ecx + pushl %ebx + bswap %edx + pushl %ecx + pushl %edx + addl $64,%edi + leal -36(%esp),%esp + movl %edi,104(%esp) + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edi + movl %ebx,8(%esp) + xorl %ecx,%ebx + movl %ecx,12(%esp) + movl %edi,16(%esp) + movl %ebx,(%esp) + movl 16(%esi),%edx + movl 20(%esi),%ebx + movl 24(%esi),%ecx + movl 28(%esi),%edi + movl %ebx,24(%esp) + movl %ecx,28(%esp) + movl %edi,32(%esp) +.align 16 +.L00700_15: + movl %edx,%ecx + movl 24(%esp),%esi + rorl $14,%ecx + movl 28(%esp),%edi + xorl %edx,%ecx + xorl %edi,%esi + movl 96(%esp),%ebx + rorl $5,%ecx + andl %edx,%esi + movl %edx,20(%esp) + xorl %ecx,%edx + addl 32(%esp),%ebx + xorl %edi,%esi + rorl $6,%edx + movl %eax,%ecx + addl %esi,%ebx + rorl $9,%ecx + addl %edx,%ebx + movl 8(%esp),%edi + xorl %eax,%ecx + movl %eax,4(%esp) + leal -4(%esp),%esp + rorl $11,%ecx + movl (%ebp),%esi + xorl %eax,%ecx + movl 20(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %esi,%ebx + movl %eax,(%esp) + addl %ebx,%edx + andl 4(%esp),%eax + addl %ecx,%ebx + xorl %edi,%eax + addl $4,%ebp + addl %ebx,%eax + cmpl $3248222580,%esi + jne .L00700_15 + movl 156(%esp),%ecx + jmp .L00816_63 +.align 16 +.L00816_63: + movl %ecx,%ebx + movl 104(%esp),%esi + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 160(%esp),%ebx + shrl $10,%edi + addl 124(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 24(%esp),%esi + rorl $14,%ecx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %edx,%ecx + xorl %edi,%esi + movl %ebx,96(%esp) + rorl $5,%ecx + andl %edx,%esi + movl %edx,20(%esp) + xorl %ecx,%edx + addl 32(%esp),%ebx + xorl %edi,%esi + rorl $6,%edx + movl %eax,%ecx + addl %esi,%ebx + rorl $9,%ecx + addl %edx,%ebx + movl 8(%esp),%edi + xorl %eax,%ecx + movl %eax,4(%esp) + leal -4(%esp),%esp + rorl $11,%ecx + movl (%ebp),%esi + xorl %eax,%ecx + movl 20(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %esi,%ebx + movl %eax,(%esp) + addl %ebx,%edx + andl 4(%esp),%eax + addl %ecx,%ebx + xorl %edi,%eax + movl 156(%esp),%ecx + addl $4,%ebp + addl %ebx,%eax + cmpl $3329325298,%esi + jne .L00816_63 + movl 356(%esp),%esi + movl 8(%esp),%ebx + movl 16(%esp),%ecx + addl (%esi),%eax + addl 4(%esi),%ebx + addl 8(%esi),%edi + addl 12(%esi),%ecx + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %edi,8(%esi) + movl %ecx,12(%esi) + movl 24(%esp),%eax + movl 28(%esp),%ebx + movl 32(%esp),%ecx + movl 360(%esp),%edi + addl 16(%esi),%edx + addl 20(%esi),%eax + addl 24(%esi),%ebx + addl 28(%esi),%ecx + movl %edx,16(%esi) + movl %eax,20(%esi) + movl %ebx,24(%esi) + movl %ecx,28(%esi) + leal 356(%esp),%esp + subl $256,%ebp + cmpl 8(%esp),%edi + jb .L002loop + movl 12(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 64 +.L001K256: +.long 1116352408,1899447441,3049323471,3921009573,961987163,1508970993,2453635748,2870763221,3624381080,310598401,607225278,1426881987,1925078388,2162078206,2614888103,3248222580,3835390401,4022224774,264347078,604807628,770255983,1249150122,1555081692,1996064986,2554220882,2821834349,2952996808,3210313671,3336571891,3584528711,113926993,338241895,666307205,773529912,1294757372,1396182291,1695183700,1986661051,2177026350,2456956037,2730485921,2820302411,3259730800,3345764771,3516065817,3600352804,4094571909,275423344,430227734,506948616,659060556,883997877,958139571,1322822218,1537002063,1747873779,1955562222,2024104815,2227730452,2361852424,2428436474,2756734187,3204031479,3329325298 +.long 66051,67438087,134810123,202182159 +.byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97 +.byte 110,115,102,111,114,109,32,102,111,114,32,120,56,54,44,32 +.byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 +.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 +.byte 62,0 +.align 16 +.L006unrolled: + leal -96(%esp),%esp + movl (%esi),%eax + movl 4(%esi),%ebp + movl 8(%esi),%ecx + movl 12(%esi),%ebx + movl %ebp,4(%esp) + xorl %ecx,%ebp + movl %ecx,8(%esp) + movl %ebx,12(%esp) + movl 16(%esi),%edx + movl 20(%esi),%ebx + movl 24(%esi),%ecx + movl 28(%esi),%esi + movl %ebx,20(%esp) + movl %ecx,24(%esp) + movl %esi,28(%esp) + jmp .L009grand_loop +.align 16 +.L009grand_loop: + movl (%edi),%ebx + movl 4(%edi),%ecx + bswap %ebx + movl 8(%edi),%esi + bswap %ecx + movl %ebx,32(%esp) + bswap %esi + movl %ecx,36(%esp) + movl %esi,40(%esp) + movl 12(%edi),%ebx + movl 16(%edi),%ecx + bswap %ebx + movl 20(%edi),%esi + bswap %ecx + movl %ebx,44(%esp) + bswap %esi + movl %ecx,48(%esp) + movl %esi,52(%esp) + movl 24(%edi),%ebx + movl 28(%edi),%ecx + bswap %ebx + movl 32(%edi),%esi + bswap %ecx + movl %ebx,56(%esp) + bswap %esi + movl %ecx,60(%esp) + movl %esi,64(%esp) + movl 36(%edi),%ebx + movl 40(%edi),%ecx + bswap %ebx + movl 44(%edi),%esi + bswap %ecx + movl %ebx,68(%esp) + bswap %esi + movl %ecx,72(%esp) + movl %esi,76(%esp) + movl 48(%edi),%ebx + movl 52(%edi),%ecx + bswap %ebx + movl 56(%edi),%esi + bswap %ecx + movl %ebx,80(%esp) + bswap %esi + movl %ecx,84(%esp) + movl %esi,88(%esp) + movl 60(%edi),%ebx + addl $64,%edi + bswap %ebx + movl %edi,100(%esp) + movl %ebx,92(%esp) + movl %edx,%ecx + movl 20(%esp),%esi + rorl $14,%edx + movl 24(%esp),%edi + xorl %ecx,%edx + movl 32(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 1116352408(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl 16(%esp),%ecx + rorl $14,%edx + movl 20(%esp),%edi + xorl %esi,%edx + movl 36(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1899447441(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl %edx,%ecx + movl 12(%esp),%esi + rorl $14,%edx + movl 16(%esp),%edi + xorl %ecx,%edx + movl 40(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 3049323471(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl 8(%esp),%ecx + rorl $14,%edx + movl 12(%esp),%edi + xorl %esi,%edx + movl 44(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 3921009573(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl %edx,%ecx + movl 4(%esp),%esi + rorl $14,%edx + movl 8(%esp),%edi + xorl %ecx,%edx + movl 48(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 961987163(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl (%esp),%ecx + rorl $14,%edx + movl 4(%esp),%edi + xorl %esi,%edx + movl 52(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1508970993(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl %edx,%ecx + movl 28(%esp),%esi + rorl $14,%edx + movl (%esp),%edi + xorl %ecx,%edx + movl 56(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2453635748(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl 24(%esp),%ecx + rorl $14,%edx + movl 28(%esp),%edi + xorl %esi,%edx + movl 60(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2870763221(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl %edx,%ecx + movl 20(%esp),%esi + rorl $14,%edx + movl 24(%esp),%edi + xorl %ecx,%edx + movl 64(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 3624381080(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl 16(%esp),%ecx + rorl $14,%edx + movl 20(%esp),%edi + xorl %esi,%edx + movl 68(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 310598401(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl %edx,%ecx + movl 12(%esp),%esi + rorl $14,%edx + movl 16(%esp),%edi + xorl %ecx,%edx + movl 72(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 607225278(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl 8(%esp),%ecx + rorl $14,%edx + movl 12(%esp),%edi + xorl %esi,%edx + movl 76(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1426881987(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl %edx,%ecx + movl 4(%esp),%esi + rorl $14,%edx + movl 8(%esp),%edi + xorl %ecx,%edx + movl 80(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 1925078388(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl (%esp),%ecx + rorl $14,%edx + movl 4(%esp),%edi + xorl %esi,%edx + movl 84(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2162078206(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl %edx,%ecx + movl 28(%esp),%esi + rorl $14,%edx + movl (%esp),%edi + xorl %ecx,%edx + movl 88(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2614888103(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl 24(%esp),%ecx + rorl $14,%edx + movl 28(%esp),%edi + xorl %esi,%edx + movl 92(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 3248222580(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 36(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl 88(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 32(%esp),%ebx + shrl $10,%edi + addl 68(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 20(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 24(%esp),%edi + xorl %ecx,%edx + movl %ebx,32(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 3835390401(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 40(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl 92(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 36(%esp),%ebx + shrl $10,%edi + addl 72(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 16(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 20(%esp),%edi + xorl %esi,%edx + movl %ebx,36(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 4022224774(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 44(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl 32(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 40(%esp),%ebx + shrl $10,%edi + addl 76(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 12(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 16(%esp),%edi + xorl %ecx,%edx + movl %ebx,40(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 264347078(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 48(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl 36(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 44(%esp),%ebx + shrl $10,%edi + addl 80(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 8(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 12(%esp),%edi + xorl %esi,%edx + movl %ebx,44(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 604807628(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 52(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl 40(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 48(%esp),%ebx + shrl $10,%edi + addl 84(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 4(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 8(%esp),%edi + xorl %ecx,%edx + movl %ebx,48(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 770255983(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 56(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl 44(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 52(%esp),%ebx + shrl $10,%edi + addl 88(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl (%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 4(%esp),%edi + xorl %esi,%edx + movl %ebx,52(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1249150122(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 60(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl 48(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 56(%esp),%ebx + shrl $10,%edi + addl 92(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 28(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl (%esp),%edi + xorl %ecx,%edx + movl %ebx,56(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 1555081692(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 64(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl 52(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 60(%esp),%ebx + shrl $10,%edi + addl 32(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 24(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %esi,%edx + movl %ebx,60(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1996064986(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 68(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl 56(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 64(%esp),%ebx + shrl $10,%edi + addl 36(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 20(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 24(%esp),%edi + xorl %ecx,%edx + movl %ebx,64(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2554220882(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 72(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl 60(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 68(%esp),%ebx + shrl $10,%edi + addl 40(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 16(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 20(%esp),%edi + xorl %esi,%edx + movl %ebx,68(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2821834349(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 76(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl 64(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 72(%esp),%ebx + shrl $10,%edi + addl 44(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 12(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 16(%esp),%edi + xorl %ecx,%edx + movl %ebx,72(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2952996808(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 80(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl 68(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 76(%esp),%ebx + shrl $10,%edi + addl 48(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 8(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 12(%esp),%edi + xorl %esi,%edx + movl %ebx,76(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 3210313671(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 84(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl 72(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 80(%esp),%ebx + shrl $10,%edi + addl 52(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 4(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 8(%esp),%edi + xorl %ecx,%edx + movl %ebx,80(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 3336571891(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 88(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl 76(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 84(%esp),%ebx + shrl $10,%edi + addl 56(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl (%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 4(%esp),%edi + xorl %esi,%edx + movl %ebx,84(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 3584528711(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 92(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl 80(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 88(%esp),%ebx + shrl $10,%edi + addl 60(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 28(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl (%esp),%edi + xorl %ecx,%edx + movl %ebx,88(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 113926993(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 32(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl 84(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 92(%esp),%ebx + shrl $10,%edi + addl 64(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 24(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %esi,%edx + movl %ebx,92(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 338241895(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 36(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl 88(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 32(%esp),%ebx + shrl $10,%edi + addl 68(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 20(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 24(%esp),%edi + xorl %ecx,%edx + movl %ebx,32(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 666307205(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 40(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl 92(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 36(%esp),%ebx + shrl $10,%edi + addl 72(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 16(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 20(%esp),%edi + xorl %esi,%edx + movl %ebx,36(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 773529912(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 44(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl 32(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 40(%esp),%ebx + shrl $10,%edi + addl 76(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 12(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 16(%esp),%edi + xorl %ecx,%edx + movl %ebx,40(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 1294757372(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 48(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl 36(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 44(%esp),%ebx + shrl $10,%edi + addl 80(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 8(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 12(%esp),%edi + xorl %esi,%edx + movl %ebx,44(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1396182291(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 52(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl 40(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 48(%esp),%ebx + shrl $10,%edi + addl 84(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 4(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 8(%esp),%edi + xorl %ecx,%edx + movl %ebx,48(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 1695183700(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 56(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl 44(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 52(%esp),%ebx + shrl $10,%edi + addl 88(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl (%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 4(%esp),%edi + xorl %esi,%edx + movl %ebx,52(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1986661051(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 60(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl 48(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 56(%esp),%ebx + shrl $10,%edi + addl 92(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 28(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl (%esp),%edi + xorl %ecx,%edx + movl %ebx,56(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2177026350(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 64(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl 52(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 60(%esp),%ebx + shrl $10,%edi + addl 32(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 24(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %esi,%edx + movl %ebx,60(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2456956037(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 68(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl 56(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 64(%esp),%ebx + shrl $10,%edi + addl 36(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 20(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 24(%esp),%edi + xorl %ecx,%edx + movl %ebx,64(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2730485921(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 72(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl 60(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 68(%esp),%ebx + shrl $10,%edi + addl 40(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 16(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 20(%esp),%edi + xorl %esi,%edx + movl %ebx,68(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2820302411(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 76(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl 64(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 72(%esp),%ebx + shrl $10,%edi + addl 44(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 12(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 16(%esp),%edi + xorl %ecx,%edx + movl %ebx,72(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 3259730800(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 80(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl 68(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 76(%esp),%ebx + shrl $10,%edi + addl 48(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 8(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 12(%esp),%edi + xorl %esi,%edx + movl %ebx,76(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 3345764771(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 84(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl 72(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 80(%esp),%ebx + shrl $10,%edi + addl 52(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 4(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 8(%esp),%edi + xorl %ecx,%edx + movl %ebx,80(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 3516065817(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 88(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl 76(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 84(%esp),%ebx + shrl $10,%edi + addl 56(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl (%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 4(%esp),%edi + xorl %esi,%edx + movl %ebx,84(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 3600352804(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 92(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl 80(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 88(%esp),%ebx + shrl $10,%edi + addl 60(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 28(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl (%esp),%edi + xorl %ecx,%edx + movl %ebx,88(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 4094571909(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 32(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl 84(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 92(%esp),%ebx + shrl $10,%edi + addl 64(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 24(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %esi,%edx + movl %ebx,92(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 275423344(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 36(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl 88(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 32(%esp),%ebx + shrl $10,%edi + addl 68(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 20(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 24(%esp),%edi + xorl %ecx,%edx + movl %ebx,32(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 430227734(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 40(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl 92(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 36(%esp),%ebx + shrl $10,%edi + addl 72(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 16(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 20(%esp),%edi + xorl %esi,%edx + movl %ebx,36(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 506948616(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 44(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl 32(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 40(%esp),%ebx + shrl $10,%edi + addl 76(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 12(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 16(%esp),%edi + xorl %ecx,%edx + movl %ebx,40(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 659060556(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 48(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl 36(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 44(%esp),%ebx + shrl $10,%edi + addl 80(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 8(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 12(%esp),%edi + xorl %esi,%edx + movl %ebx,44(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 883997877(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 52(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl 40(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 48(%esp),%ebx + shrl $10,%edi + addl 84(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 4(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 8(%esp),%edi + xorl %ecx,%edx + movl %ebx,48(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 958139571(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 56(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl 44(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 52(%esp),%ebx + shrl $10,%edi + addl 88(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl (%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 4(%esp),%edi + xorl %esi,%edx + movl %ebx,52(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1322822218(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 60(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl 48(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 56(%esp),%ebx + shrl $10,%edi + addl 92(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 28(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl (%esp),%edi + xorl %ecx,%edx + movl %ebx,56(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 1537002063(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 64(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl 52(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 60(%esp),%ebx + shrl $10,%edi + addl 32(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 24(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %esi,%edx + movl %ebx,60(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1747873779(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 68(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl 56(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 64(%esp),%ebx + shrl $10,%edi + addl 36(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 20(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 24(%esp),%edi + xorl %ecx,%edx + movl %ebx,64(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 1955562222(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 72(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl 60(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 68(%esp),%ebx + shrl $10,%edi + addl 40(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 16(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 20(%esp),%edi + xorl %esi,%edx + movl %ebx,68(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2024104815(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 76(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl 64(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 72(%esp),%ebx + shrl $10,%edi + addl 44(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 12(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 16(%esp),%edi + xorl %ecx,%edx + movl %ebx,72(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2227730452(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 80(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl 68(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 76(%esp),%ebx + shrl $10,%edi + addl 48(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 8(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 12(%esp),%edi + xorl %esi,%edx + movl %ebx,76(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2361852424(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 84(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl 72(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 80(%esp),%ebx + shrl $10,%edi + addl 52(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 4(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 8(%esp),%edi + xorl %ecx,%edx + movl %ebx,80(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2428436474(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 88(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl 76(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 84(%esp),%ebx + shrl $10,%edi + addl 56(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl (%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 4(%esp),%edi + xorl %esi,%edx + movl %ebx,84(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2756734187(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 92(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl 80(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 88(%esp),%ebx + shrl $10,%edi + addl 60(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 28(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl (%esp),%edi + xorl %ecx,%edx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 3204031479(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 32(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl 84(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 92(%esp),%ebx + shrl $10,%edi + addl 64(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 24(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %esi,%edx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 3329325298(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl 96(%esp),%esi + xorl %edi,%ebp + movl 12(%esp),%ecx + addl (%esi),%eax + addl 4(%esi),%ebp + addl 8(%esi),%edi + addl 12(%esi),%ecx + movl %eax,(%esi) + movl %ebp,4(%esi) + movl %edi,8(%esi) + movl %ecx,12(%esi) + movl %ebp,4(%esp) + xorl %edi,%ebp + movl %edi,8(%esp) + movl %ecx,12(%esp) + movl 20(%esp),%edi + movl 24(%esp),%ebx + movl 28(%esp),%ecx + addl 16(%esi),%edx + addl 20(%esi),%edi + addl 24(%esi),%ebx + addl 28(%esi),%ecx + movl %edx,16(%esi) + movl %edi,20(%esi) + movl %ebx,24(%esi) + movl %ecx,28(%esi) + movl %edi,20(%esp) + movl 100(%esp),%edi + movl %ebx,24(%esp) + movl %ecx,28(%esp) + cmpl 104(%esp),%edi + jb .L009grand_loop + movl 108(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 32 +.L004shaext: + subl $32,%esp + movdqu (%esi),%xmm1 + leal 128(%ebp),%ebp + movdqu 16(%esi),%xmm2 + movdqa 128(%ebp),%xmm7 + pshufd $27,%xmm1,%xmm0 + pshufd $177,%xmm1,%xmm1 + pshufd $27,%xmm2,%xmm2 +.byte 102,15,58,15,202,8 + punpcklqdq %xmm0,%xmm2 + jmp .L010loop_shaext +.align 16 +.L010loop_shaext: + movdqu (%edi),%xmm3 + movdqu 16(%edi),%xmm4 + movdqu 32(%edi),%xmm5 +.byte 102,15,56,0,223 + movdqu 48(%edi),%xmm6 + movdqa %xmm2,16(%esp) + movdqa -128(%ebp),%xmm0 + paddd %xmm3,%xmm0 +.byte 102,15,56,0,231 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + nop + movdqa %xmm1,(%esp) +.byte 15,56,203,202 + movdqa -112(%ebp),%xmm0 + paddd %xmm4,%xmm0 +.byte 102,15,56,0,239 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + leal 64(%edi),%edi +.byte 15,56,204,220 +.byte 15,56,203,202 + movdqa -96(%ebp),%xmm0 + paddd %xmm5,%xmm0 +.byte 102,15,56,0,247 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm6,%xmm7 +.byte 102,15,58,15,253,4 + nop + paddd %xmm7,%xmm3 +.byte 15,56,204,229 +.byte 15,56,203,202 + movdqa -80(%ebp),%xmm0 + paddd %xmm6,%xmm0 +.byte 15,56,205,222 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm3,%xmm7 +.byte 102,15,58,15,254,4 + nop + paddd %xmm7,%xmm4 +.byte 15,56,204,238 +.byte 15,56,203,202 + movdqa -64(%ebp),%xmm0 + paddd %xmm3,%xmm0 +.byte 15,56,205,227 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm4,%xmm7 +.byte 102,15,58,15,251,4 + nop + paddd %xmm7,%xmm5 +.byte 15,56,204,243 +.byte 15,56,203,202 + movdqa -48(%ebp),%xmm0 + paddd %xmm4,%xmm0 +.byte 15,56,205,236 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm5,%xmm7 +.byte 102,15,58,15,252,4 + nop + paddd %xmm7,%xmm6 +.byte 15,56,204,220 +.byte 15,56,203,202 + movdqa -32(%ebp),%xmm0 + paddd %xmm5,%xmm0 +.byte 15,56,205,245 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm6,%xmm7 +.byte 102,15,58,15,253,4 + nop + paddd %xmm7,%xmm3 +.byte 15,56,204,229 +.byte 15,56,203,202 + movdqa -16(%ebp),%xmm0 + paddd %xmm6,%xmm0 +.byte 15,56,205,222 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm3,%xmm7 +.byte 102,15,58,15,254,4 + nop + paddd %xmm7,%xmm4 +.byte 15,56,204,238 +.byte 15,56,203,202 + movdqa (%ebp),%xmm0 + paddd %xmm3,%xmm0 +.byte 15,56,205,227 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm4,%xmm7 +.byte 102,15,58,15,251,4 + nop + paddd %xmm7,%xmm5 +.byte 15,56,204,243 +.byte 15,56,203,202 + movdqa 16(%ebp),%xmm0 + paddd %xmm4,%xmm0 +.byte 15,56,205,236 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm5,%xmm7 +.byte 102,15,58,15,252,4 + nop + paddd %xmm7,%xmm6 +.byte 15,56,204,220 +.byte 15,56,203,202 + movdqa 32(%ebp),%xmm0 + paddd %xmm5,%xmm0 +.byte 15,56,205,245 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm6,%xmm7 +.byte 102,15,58,15,253,4 + nop + paddd %xmm7,%xmm3 +.byte 15,56,204,229 +.byte 15,56,203,202 + movdqa 48(%ebp),%xmm0 + paddd %xmm6,%xmm0 +.byte 15,56,205,222 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm3,%xmm7 +.byte 102,15,58,15,254,4 + nop + paddd %xmm7,%xmm4 +.byte 15,56,204,238 +.byte 15,56,203,202 + movdqa 64(%ebp),%xmm0 + paddd %xmm3,%xmm0 +.byte 15,56,205,227 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm4,%xmm7 +.byte 102,15,58,15,251,4 + nop + paddd %xmm7,%xmm5 +.byte 15,56,204,243 +.byte 15,56,203,202 + movdqa 80(%ebp),%xmm0 + paddd %xmm4,%xmm0 +.byte 15,56,205,236 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm5,%xmm7 +.byte 102,15,58,15,252,4 +.byte 15,56,203,202 + paddd %xmm7,%xmm6 + movdqa 96(%ebp),%xmm0 + paddd %xmm5,%xmm0 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 +.byte 15,56,205,245 + movdqa 128(%ebp),%xmm7 +.byte 15,56,203,202 + movdqa 112(%ebp),%xmm0 + paddd %xmm6,%xmm0 + nop +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + cmpl %edi,%eax + nop +.byte 15,56,203,202 + paddd 16(%esp),%xmm2 + paddd (%esp),%xmm1 + jnz .L010loop_shaext + pshufd $177,%xmm2,%xmm2 + pshufd $27,%xmm1,%xmm7 + pshufd $177,%xmm1,%xmm1 + punpckhqdq %xmm2,%xmm1 +.byte 102,15,58,15,215,8 + movl 44(%esp),%esp + movdqu %xmm1,(%esi) + movdqu %xmm2,16(%esi) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 32 +.L005SSSE3: + leal -96(%esp),%esp + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edi + movl %ebx,4(%esp) + xorl %ecx,%ebx + movl %ecx,8(%esp) + movl %edi,12(%esp) + movl 16(%esi),%edx + movl 20(%esi),%edi + movl 24(%esi),%ecx + movl 28(%esi),%esi + movl %edi,20(%esp) + movl 100(%esp),%edi + movl %ecx,24(%esp) + movl %esi,28(%esp) + movdqa 256(%ebp),%xmm7 + jmp .L011grand_ssse3 +.align 16 +.L011grand_ssse3: + movdqu (%edi),%xmm0 + movdqu 16(%edi),%xmm1 + movdqu 32(%edi),%xmm2 + movdqu 48(%edi),%xmm3 + addl $64,%edi +.byte 102,15,56,0,199 + movl %edi,100(%esp) +.byte 102,15,56,0,207 + movdqa (%ebp),%xmm4 +.byte 102,15,56,0,215 + movdqa 16(%ebp),%xmm5 + paddd %xmm0,%xmm4 +.byte 102,15,56,0,223 + movdqa 32(%ebp),%xmm6 + paddd %xmm1,%xmm5 + movdqa 48(%ebp),%xmm7 + movdqa %xmm4,32(%esp) + paddd %xmm2,%xmm6 + movdqa %xmm5,48(%esp) + paddd %xmm3,%xmm7 + movdqa %xmm6,64(%esp) + movdqa %xmm7,80(%esp) + jmp .L012ssse3_00_47 +.align 16 +.L012ssse3_00_47: + addl $64,%ebp + movl %edx,%ecx + movdqa %xmm1,%xmm4 + rorl $14,%edx + movl 20(%esp),%esi + movdqa %xmm3,%xmm7 + xorl %ecx,%edx + movl 24(%esp),%edi +.byte 102,15,58,15,224,4 + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi +.byte 102,15,58,15,250,4 + movl %ecx,16(%esp) + xorl %ecx,%edx + xorl %esi,%edi + movdqa %xmm4,%xmm5 + rorl $6,%edx + movl %eax,%ecx + movdqa %xmm4,%xmm6 + addl %edi,%edx + movl 4(%esp),%edi + psrld $3,%xmm4 + movl %eax,%esi + rorl $9,%ecx + paddd %xmm7,%xmm0 + movl %eax,(%esp) + xorl %eax,%ecx + psrld $7,%xmm6 + xorl %edi,%eax + addl 28(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + pshufd $250,%xmm3,%xmm7 + xorl %esi,%ecx + addl 32(%esp),%edx + pslld $14,%xmm5 + xorl %edi,%ebx + rorl $2,%ecx + pxor %xmm6,%xmm4 + addl %edx,%ebx + addl 12(%esp),%edx + psrld $11,%xmm6 + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + pxor %xmm5,%xmm4 + movl 16(%esp),%esi + xorl %ecx,%edx + pslld $11,%xmm5 + movl 20(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + pxor %xmm6,%xmm4 + andl %ecx,%esi + movl %ecx,12(%esp) + movdqa %xmm7,%xmm6 + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + pxor %xmm5,%xmm4 + movl %ebx,%ecx + addl %edi,%edx + psrld $10,%xmm7 + movl (%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + paddd %xmm4,%xmm0 + movl %ebx,28(%esp) + xorl %ebx,%ecx + psrlq $17,%xmm6 + xorl %edi,%ebx + addl 24(%esp),%edx + rorl $11,%ecx + pxor %xmm6,%xmm7 + andl %ebx,%eax + xorl %esi,%ecx + psrlq $2,%xmm6 + addl 36(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + pxor %xmm6,%xmm7 + addl %edx,%eax + addl 8(%esp),%edx + pshufd $128,%xmm7,%xmm7 + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 12(%esp),%esi + xorl %ecx,%edx + movl 16(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + psrldq $8,%xmm7 + movl %ecx,8(%esp) + xorl %ecx,%edx + xorl %esi,%edi + paddd %xmm7,%xmm0 + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 28(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,24(%esp) + pshufd $80,%xmm0,%xmm7 + xorl %eax,%ecx + xorl %edi,%eax + addl 20(%esp),%edx + movdqa %xmm7,%xmm6 + rorl $11,%ecx + psrld $10,%xmm7 + andl %eax,%ebx + psrlq $17,%xmm6 + xorl %esi,%ecx + addl 40(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + pxor %xmm6,%xmm7 + addl %edx,%ebx + addl 4(%esp),%edx + psrlq $2,%xmm6 + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + pxor %xmm6,%xmm7 + movl 8(%esp),%esi + xorl %ecx,%edx + movl 12(%esp),%edi + pshufd $8,%xmm7,%xmm7 + xorl %edi,%esi + rorl $5,%edx + movdqa (%ebp),%xmm6 + andl %ecx,%esi + movl %ecx,4(%esp) + pslldq $8,%xmm7 + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 24(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + paddd %xmm7,%xmm0 + movl %ebx,20(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 16(%esp),%edx + paddd %xmm0,%xmm6 + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 44(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl (%esp),%edx + addl %ecx,%eax + movdqa %xmm6,32(%esp) + movl %edx,%ecx + movdqa %xmm2,%xmm4 + rorl $14,%edx + movl 4(%esp),%esi + movdqa %xmm0,%xmm7 + xorl %ecx,%edx + movl 8(%esp),%edi +.byte 102,15,58,15,225,4 + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi +.byte 102,15,58,15,251,4 + movl %ecx,(%esp) + xorl %ecx,%edx + xorl %esi,%edi + movdqa %xmm4,%xmm5 + rorl $6,%edx + movl %eax,%ecx + movdqa %xmm4,%xmm6 + addl %edi,%edx + movl 20(%esp),%edi + psrld $3,%xmm4 + movl %eax,%esi + rorl $9,%ecx + paddd %xmm7,%xmm1 + movl %eax,16(%esp) + xorl %eax,%ecx + psrld $7,%xmm6 + xorl %edi,%eax + addl 12(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + pshufd $250,%xmm0,%xmm7 + xorl %esi,%ecx + addl 48(%esp),%edx + pslld $14,%xmm5 + xorl %edi,%ebx + rorl $2,%ecx + pxor %xmm6,%xmm4 + addl %edx,%ebx + addl 28(%esp),%edx + psrld $11,%xmm6 + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + pxor %xmm5,%xmm4 + movl (%esp),%esi + xorl %ecx,%edx + pslld $11,%xmm5 + movl 4(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + pxor %xmm6,%xmm4 + andl %ecx,%esi + movl %ecx,28(%esp) + movdqa %xmm7,%xmm6 + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + pxor %xmm5,%xmm4 + movl %ebx,%ecx + addl %edi,%edx + psrld $10,%xmm7 + movl 16(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + paddd %xmm4,%xmm1 + movl %ebx,12(%esp) + xorl %ebx,%ecx + psrlq $17,%xmm6 + xorl %edi,%ebx + addl 8(%esp),%edx + rorl $11,%ecx + pxor %xmm6,%xmm7 + andl %ebx,%eax + xorl %esi,%ecx + psrlq $2,%xmm6 + addl 52(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + pxor %xmm6,%xmm7 + addl %edx,%eax + addl 24(%esp),%edx + pshufd $128,%xmm7,%xmm7 + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 28(%esp),%esi + xorl %ecx,%edx + movl (%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + psrldq $8,%xmm7 + movl %ecx,24(%esp) + xorl %ecx,%edx + xorl %esi,%edi + paddd %xmm7,%xmm1 + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 12(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,8(%esp) + pshufd $80,%xmm1,%xmm7 + xorl %eax,%ecx + xorl %edi,%eax + addl 4(%esp),%edx + movdqa %xmm7,%xmm6 + rorl $11,%ecx + psrld $10,%xmm7 + andl %eax,%ebx + psrlq $17,%xmm6 + xorl %esi,%ecx + addl 56(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + pxor %xmm6,%xmm7 + addl %edx,%ebx + addl 20(%esp),%edx + psrlq $2,%xmm6 + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + pxor %xmm6,%xmm7 + movl 24(%esp),%esi + xorl %ecx,%edx + movl 28(%esp),%edi + pshufd $8,%xmm7,%xmm7 + xorl %edi,%esi + rorl $5,%edx + movdqa 16(%ebp),%xmm6 + andl %ecx,%esi + movl %ecx,20(%esp) + pslldq $8,%xmm7 + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 8(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + paddd %xmm7,%xmm1 + movl %ebx,4(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl (%esp),%edx + paddd %xmm1,%xmm6 + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 60(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl 16(%esp),%edx + addl %ecx,%eax + movdqa %xmm6,48(%esp) + movl %edx,%ecx + movdqa %xmm3,%xmm4 + rorl $14,%edx + movl 20(%esp),%esi + movdqa %xmm1,%xmm7 + xorl %ecx,%edx + movl 24(%esp),%edi +.byte 102,15,58,15,226,4 + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi +.byte 102,15,58,15,248,4 + movl %ecx,16(%esp) + xorl %ecx,%edx + xorl %esi,%edi + movdqa %xmm4,%xmm5 + rorl $6,%edx + movl %eax,%ecx + movdqa %xmm4,%xmm6 + addl %edi,%edx + movl 4(%esp),%edi + psrld $3,%xmm4 + movl %eax,%esi + rorl $9,%ecx + paddd %xmm7,%xmm2 + movl %eax,(%esp) + xorl %eax,%ecx + psrld $7,%xmm6 + xorl %edi,%eax + addl 28(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + pshufd $250,%xmm1,%xmm7 + xorl %esi,%ecx + addl 64(%esp),%edx + pslld $14,%xmm5 + xorl %edi,%ebx + rorl $2,%ecx + pxor %xmm6,%xmm4 + addl %edx,%ebx + addl 12(%esp),%edx + psrld $11,%xmm6 + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + pxor %xmm5,%xmm4 + movl 16(%esp),%esi + xorl %ecx,%edx + pslld $11,%xmm5 + movl 20(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + pxor %xmm6,%xmm4 + andl %ecx,%esi + movl %ecx,12(%esp) + movdqa %xmm7,%xmm6 + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + pxor %xmm5,%xmm4 + movl %ebx,%ecx + addl %edi,%edx + psrld $10,%xmm7 + movl (%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + paddd %xmm4,%xmm2 + movl %ebx,28(%esp) + xorl %ebx,%ecx + psrlq $17,%xmm6 + xorl %edi,%ebx + addl 24(%esp),%edx + rorl $11,%ecx + pxor %xmm6,%xmm7 + andl %ebx,%eax + xorl %esi,%ecx + psrlq $2,%xmm6 + addl 68(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + pxor %xmm6,%xmm7 + addl %edx,%eax + addl 8(%esp),%edx + pshufd $128,%xmm7,%xmm7 + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 12(%esp),%esi + xorl %ecx,%edx + movl 16(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + psrldq $8,%xmm7 + movl %ecx,8(%esp) + xorl %ecx,%edx + xorl %esi,%edi + paddd %xmm7,%xmm2 + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 28(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,24(%esp) + pshufd $80,%xmm2,%xmm7 + xorl %eax,%ecx + xorl %edi,%eax + addl 20(%esp),%edx + movdqa %xmm7,%xmm6 + rorl $11,%ecx + psrld $10,%xmm7 + andl %eax,%ebx + psrlq $17,%xmm6 + xorl %esi,%ecx + addl 72(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + pxor %xmm6,%xmm7 + addl %edx,%ebx + addl 4(%esp),%edx + psrlq $2,%xmm6 + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + pxor %xmm6,%xmm7 + movl 8(%esp),%esi + xorl %ecx,%edx + movl 12(%esp),%edi + pshufd $8,%xmm7,%xmm7 + xorl %edi,%esi + rorl $5,%edx + movdqa 32(%ebp),%xmm6 + andl %ecx,%esi + movl %ecx,4(%esp) + pslldq $8,%xmm7 + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 24(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + paddd %xmm7,%xmm2 + movl %ebx,20(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 16(%esp),%edx + paddd %xmm2,%xmm6 + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 76(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl (%esp),%edx + addl %ecx,%eax + movdqa %xmm6,64(%esp) + movl %edx,%ecx + movdqa %xmm0,%xmm4 + rorl $14,%edx + movl 4(%esp),%esi + movdqa %xmm2,%xmm7 + xorl %ecx,%edx + movl 8(%esp),%edi +.byte 102,15,58,15,227,4 + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi +.byte 102,15,58,15,249,4 + movl %ecx,(%esp) + xorl %ecx,%edx + xorl %esi,%edi + movdqa %xmm4,%xmm5 + rorl $6,%edx + movl %eax,%ecx + movdqa %xmm4,%xmm6 + addl %edi,%edx + movl 20(%esp),%edi + psrld $3,%xmm4 + movl %eax,%esi + rorl $9,%ecx + paddd %xmm7,%xmm3 + movl %eax,16(%esp) + xorl %eax,%ecx + psrld $7,%xmm6 + xorl %edi,%eax + addl 12(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + pshufd $250,%xmm2,%xmm7 + xorl %esi,%ecx + addl 80(%esp),%edx + pslld $14,%xmm5 + xorl %edi,%ebx + rorl $2,%ecx + pxor %xmm6,%xmm4 + addl %edx,%ebx + addl 28(%esp),%edx + psrld $11,%xmm6 + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + pxor %xmm5,%xmm4 + movl (%esp),%esi + xorl %ecx,%edx + pslld $11,%xmm5 + movl 4(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + pxor %xmm6,%xmm4 + andl %ecx,%esi + movl %ecx,28(%esp) + movdqa %xmm7,%xmm6 + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + pxor %xmm5,%xmm4 + movl %ebx,%ecx + addl %edi,%edx + psrld $10,%xmm7 + movl 16(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + paddd %xmm4,%xmm3 + movl %ebx,12(%esp) + xorl %ebx,%ecx + psrlq $17,%xmm6 + xorl %edi,%ebx + addl 8(%esp),%edx + rorl $11,%ecx + pxor %xmm6,%xmm7 + andl %ebx,%eax + xorl %esi,%ecx + psrlq $2,%xmm6 + addl 84(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + pxor %xmm6,%xmm7 + addl %edx,%eax + addl 24(%esp),%edx + pshufd $128,%xmm7,%xmm7 + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 28(%esp),%esi + xorl %ecx,%edx + movl (%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + psrldq $8,%xmm7 + movl %ecx,24(%esp) + xorl %ecx,%edx + xorl %esi,%edi + paddd %xmm7,%xmm3 + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 12(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,8(%esp) + pshufd $80,%xmm3,%xmm7 + xorl %eax,%ecx + xorl %edi,%eax + addl 4(%esp),%edx + movdqa %xmm7,%xmm6 + rorl $11,%ecx + psrld $10,%xmm7 + andl %eax,%ebx + psrlq $17,%xmm6 + xorl %esi,%ecx + addl 88(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + pxor %xmm6,%xmm7 + addl %edx,%ebx + addl 20(%esp),%edx + psrlq $2,%xmm6 + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + pxor %xmm6,%xmm7 + movl 24(%esp),%esi + xorl %ecx,%edx + movl 28(%esp),%edi + pshufd $8,%xmm7,%xmm7 + xorl %edi,%esi + rorl $5,%edx + movdqa 48(%ebp),%xmm6 + andl %ecx,%esi + movl %ecx,20(%esp) + pslldq $8,%xmm7 + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 8(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + paddd %xmm7,%xmm3 + movl %ebx,4(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl (%esp),%edx + paddd %xmm3,%xmm6 + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 92(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl 16(%esp),%edx + addl %ecx,%eax + movdqa %xmm6,80(%esp) + cmpl $66051,64(%ebp) + jne .L012ssse3_00_47 + movl %edx,%ecx + rorl $14,%edx + movl 20(%esp),%esi + xorl %ecx,%edx + movl 24(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 4(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 28(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 32(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + addl %edx,%ebx + addl 12(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + movl 16(%esp),%esi + xorl %ecx,%edx + movl 20(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,12(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl (%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + movl %ebx,28(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 24(%esp),%edx + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 36(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl 8(%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 12(%esp),%esi + xorl %ecx,%edx + movl 16(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 28(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,24(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 20(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 40(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + addl %edx,%ebx + addl 4(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + movl 8(%esp),%esi + xorl %ecx,%edx + movl 12(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,4(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 24(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + movl %ebx,20(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 16(%esp),%edx + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 44(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl (%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 4(%esp),%esi + xorl %ecx,%edx + movl 8(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 20(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,16(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 12(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 48(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + addl %edx,%ebx + addl 28(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + movl (%esp),%esi + xorl %ecx,%edx + movl 4(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,28(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 16(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + movl %ebx,12(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 8(%esp),%edx + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 52(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl 24(%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 28(%esp),%esi + xorl %ecx,%edx + movl (%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 12(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,8(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 4(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 56(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + addl %edx,%ebx + addl 20(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + movl 24(%esp),%esi + xorl %ecx,%edx + movl 28(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,20(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 8(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + movl %ebx,4(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl (%esp),%edx + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 60(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl 16(%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 20(%esp),%esi + xorl %ecx,%edx + movl 24(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 4(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 28(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 64(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + addl %edx,%ebx + addl 12(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + movl 16(%esp),%esi + xorl %ecx,%edx + movl 20(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,12(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl (%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + movl %ebx,28(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 24(%esp),%edx + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 68(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl 8(%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 12(%esp),%esi + xorl %ecx,%edx + movl 16(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 28(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,24(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 20(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 72(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + addl %edx,%ebx + addl 4(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + movl 8(%esp),%esi + xorl %ecx,%edx + movl 12(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,4(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 24(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + movl %ebx,20(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 16(%esp),%edx + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 76(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl (%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 4(%esp),%esi + xorl %ecx,%edx + movl 8(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 20(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,16(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 12(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 80(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + addl %edx,%ebx + addl 28(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + movl (%esp),%esi + xorl %ecx,%edx + movl 4(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,28(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 16(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + movl %ebx,12(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 8(%esp),%edx + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 84(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl 24(%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 28(%esp),%esi + xorl %ecx,%edx + movl (%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 12(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,8(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 4(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 88(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + addl %edx,%ebx + addl 20(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + movl 24(%esp),%esi + xorl %ecx,%edx + movl 28(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,20(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 8(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + movl %ebx,4(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl (%esp),%edx + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 92(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl 16(%esp),%edx + addl %ecx,%eax + movl 96(%esp),%esi + xorl %edi,%ebx + movl 12(%esp),%ecx + addl (%esi),%eax + addl 4(%esi),%ebx + addl 8(%esi),%edi + addl 12(%esi),%ecx + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %edi,8(%esi) + movl %ecx,12(%esi) + movl %ebx,4(%esp) + xorl %edi,%ebx + movl %edi,8(%esp) + movl %ecx,12(%esp) + movl 20(%esp),%edi + movl 24(%esp),%ecx + addl 16(%esi),%edx + addl 20(%esi),%edi + addl 24(%esi),%ecx + movl %edx,16(%esi) + movl %edi,20(%esi) + movl %edi,20(%esp) + movl 28(%esp),%edi + movl %ecx,24(%esi) + addl 28(%esi),%edi + movl %ecx,24(%esp) + movl %edi,28(%esi) + movl %edi,28(%esp) + movl 100(%esp),%edi + movdqa 64(%ebp),%xmm7 + subl $192,%ebp + cmpl 104(%esp),%edi + jb .L011grand_ssse3 + movl 108(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size sha256_block_data_order,.-.L_sha256_block_data_order_begin +#endif diff --git a/third_party/boringssl/linux-x86/crypto/sha/sha512-586.S b/third_party/boringssl/linux-x86/crypto/sha/sha512-586.S new file mode 100644 index 00000000000..a9284000b35 --- /dev/null +++ b/third_party/boringssl/linux-x86/crypto/sha/sha512-586.S @@ -0,0 +1,2831 @@ +#if defined(__i386__) +.file "sha512-586.S" +.text +.globl sha512_block_data_order +.hidden sha512_block_data_order +.type sha512_block_data_order,@function +.align 16 +sha512_block_data_order: +.L_sha512_block_data_order_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl %esp,%ebx + call .L000pic_point +.L000pic_point: + popl %ebp + leal .L001K512-.L000pic_point(%ebp),%ebp + subl $16,%esp + andl $-64,%esp + shll $7,%eax + addl %edi,%eax + movl %esi,(%esp) + movl %edi,4(%esp) + movl %eax,8(%esp) + movl %ebx,12(%esp) + leal OPENSSL_ia32cap_P-.L001K512(%ebp),%edx + movl (%edx),%ecx + testl $67108864,%ecx + jz .L002loop_x86 + movl 4(%edx),%edx + movq (%esi),%mm0 + andl $16777216,%ecx + movq 8(%esi),%mm1 + andl $512,%edx + movq 16(%esi),%mm2 + orl %edx,%ecx + movq 24(%esi),%mm3 + movq 32(%esi),%mm4 + movq 40(%esi),%mm5 + movq 48(%esi),%mm6 + movq 56(%esi),%mm7 + cmpl $16777728,%ecx + je .L003SSSE3 + subl $80,%esp + jmp .L004loop_sse2 +.align 16 +.L004loop_sse2: + movq %mm1,8(%esp) + movq %mm2,16(%esp) + movq %mm3,24(%esp) + movq %mm5,40(%esp) + movq %mm6,48(%esp) + pxor %mm1,%mm2 + movq %mm7,56(%esp) + movq %mm0,%mm3 + movl (%edi),%eax + movl 4(%edi),%ebx + addl $8,%edi + movl $15,%edx + bswap %eax + bswap %ebx + jmp .L00500_14_sse2 +.align 16 +.L00500_14_sse2: + movd %eax,%mm1 + movl (%edi),%eax + movd %ebx,%mm7 + movl 4(%edi),%ebx + addl $8,%edi + bswap %eax + bswap %ebx + punpckldq %mm1,%mm7 + movq %mm4,%mm1 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,32(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + movq %mm3,%mm0 + movq %mm7,72(%esp) + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 56(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + paddq (%ebp),%mm7 + pxor %mm4,%mm3 + movq 24(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 8(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + subl $8,%esp + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 40(%esp),%mm5 + paddq %mm2,%mm3 + movq %mm0,%mm2 + addl $8,%ebp + paddq %mm6,%mm3 + movq 48(%esp),%mm6 + decl %edx + jnz .L00500_14_sse2 + movd %eax,%mm1 + movd %ebx,%mm7 + punpckldq %mm1,%mm7 + movq %mm4,%mm1 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,32(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + movq %mm3,%mm0 + movq %mm7,72(%esp) + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 56(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + paddq (%ebp),%mm7 + pxor %mm4,%mm3 + movq 24(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 8(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + subl $8,%esp + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 192(%esp),%mm7 + paddq %mm2,%mm3 + movq %mm0,%mm2 + addl $8,%ebp + paddq %mm6,%mm3 + pxor %mm0,%mm0 + movl $32,%edx + jmp .L00616_79_sse2 +.align 16 +.L00616_79_sse2: + movq 88(%esp),%mm5 + movq %mm7,%mm1 + psrlq $1,%mm7 + movq %mm5,%mm6 + psrlq $6,%mm5 + psllq $56,%mm1 + paddq %mm3,%mm0 + movq %mm7,%mm3 + psrlq $6,%mm7 + pxor %mm1,%mm3 + psllq $7,%mm1 + pxor %mm7,%mm3 + psrlq $1,%mm7 + pxor %mm1,%mm3 + movq %mm5,%mm1 + psrlq $13,%mm5 + pxor %mm3,%mm7 + psllq $3,%mm6 + pxor %mm5,%mm1 + paddq 200(%esp),%mm7 + pxor %mm6,%mm1 + psrlq $42,%mm5 + paddq 128(%esp),%mm7 + pxor %mm5,%mm1 + psllq $42,%mm6 + movq 40(%esp),%mm5 + pxor %mm6,%mm1 + movq 48(%esp),%mm6 + paddq %mm1,%mm7 + movq %mm4,%mm1 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,32(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + movq %mm7,72(%esp) + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 56(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + paddq (%ebp),%mm7 + pxor %mm4,%mm3 + movq 24(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 8(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + subl $8,%esp + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 192(%esp),%mm7 + paddq %mm6,%mm2 + addl $8,%ebp + movq 88(%esp),%mm5 + movq %mm7,%mm1 + psrlq $1,%mm7 + movq %mm5,%mm6 + psrlq $6,%mm5 + psllq $56,%mm1 + paddq %mm3,%mm2 + movq %mm7,%mm3 + psrlq $6,%mm7 + pxor %mm1,%mm3 + psllq $7,%mm1 + pxor %mm7,%mm3 + psrlq $1,%mm7 + pxor %mm1,%mm3 + movq %mm5,%mm1 + psrlq $13,%mm5 + pxor %mm3,%mm7 + psllq $3,%mm6 + pxor %mm5,%mm1 + paddq 200(%esp),%mm7 + pxor %mm6,%mm1 + psrlq $42,%mm5 + paddq 128(%esp),%mm7 + pxor %mm5,%mm1 + psllq $42,%mm6 + movq 40(%esp),%mm5 + pxor %mm6,%mm1 + movq 48(%esp),%mm6 + paddq %mm1,%mm7 + movq %mm4,%mm1 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,32(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + movq %mm7,72(%esp) + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 56(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + paddq (%ebp),%mm7 + pxor %mm4,%mm3 + movq 24(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 8(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + subl $8,%esp + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 192(%esp),%mm7 + paddq %mm6,%mm0 + addl $8,%ebp + decl %edx + jnz .L00616_79_sse2 + paddq %mm3,%mm0 + movq 8(%esp),%mm1 + movq 24(%esp),%mm3 + movq 40(%esp),%mm5 + movq 48(%esp),%mm6 + movq 56(%esp),%mm7 + pxor %mm1,%mm2 + paddq (%esi),%mm0 + paddq 8(%esi),%mm1 + paddq 16(%esi),%mm2 + paddq 24(%esi),%mm3 + paddq 32(%esi),%mm4 + paddq 40(%esi),%mm5 + paddq 48(%esi),%mm6 + paddq 56(%esi),%mm7 + movl $640,%eax + movq %mm0,(%esi) + movq %mm1,8(%esi) + movq %mm2,16(%esi) + movq %mm3,24(%esi) + movq %mm4,32(%esi) + movq %mm5,40(%esi) + movq %mm6,48(%esi) + movq %mm7,56(%esi) + leal (%esp,%eax,1),%esp + subl %eax,%ebp + cmpl 88(%esp),%edi + jb .L004loop_sse2 + movl 92(%esp),%esp + emms + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 32 +.L003SSSE3: + leal -64(%esp),%edx + subl $256,%esp + movdqa 640(%ebp),%xmm1 + movdqu (%edi),%xmm0 +.byte 102,15,56,0,193 + movdqa (%ebp),%xmm3 + movdqa %xmm1,%xmm2 + movdqu 16(%edi),%xmm1 + paddq %xmm0,%xmm3 +.byte 102,15,56,0,202 + movdqa %xmm3,-128(%edx) + movdqa 16(%ebp),%xmm4 + movdqa %xmm2,%xmm3 + movdqu 32(%edi),%xmm2 + paddq %xmm1,%xmm4 +.byte 102,15,56,0,211 + movdqa %xmm4,-112(%edx) + movdqa 32(%ebp),%xmm5 + movdqa %xmm3,%xmm4 + movdqu 48(%edi),%xmm3 + paddq %xmm2,%xmm5 +.byte 102,15,56,0,220 + movdqa %xmm5,-96(%edx) + movdqa 48(%ebp),%xmm6 + movdqa %xmm4,%xmm5 + movdqu 64(%edi),%xmm4 + paddq %xmm3,%xmm6 +.byte 102,15,56,0,229 + movdqa %xmm6,-80(%edx) + movdqa 64(%ebp),%xmm7 + movdqa %xmm5,%xmm6 + movdqu 80(%edi),%xmm5 + paddq %xmm4,%xmm7 +.byte 102,15,56,0,238 + movdqa %xmm7,-64(%edx) + movdqa %xmm0,(%edx) + movdqa 80(%ebp),%xmm0 + movdqa %xmm6,%xmm7 + movdqu 96(%edi),%xmm6 + paddq %xmm5,%xmm0 +.byte 102,15,56,0,247 + movdqa %xmm0,-48(%edx) + movdqa %xmm1,16(%edx) + movdqa 96(%ebp),%xmm1 + movdqa %xmm7,%xmm0 + movdqu 112(%edi),%xmm7 + paddq %xmm6,%xmm1 +.byte 102,15,56,0,248 + movdqa %xmm1,-32(%edx) + movdqa %xmm2,32(%edx) + movdqa 112(%ebp),%xmm2 + movdqa (%edx),%xmm0 + paddq %xmm7,%xmm2 + movdqa %xmm2,-16(%edx) + nop +.align 32 +.L007loop_ssse3: + movdqa 16(%edx),%xmm2 + movdqa %xmm3,48(%edx) + leal 128(%ebp),%ebp + movq %mm1,8(%esp) + movl %edi,%ebx + movq %mm2,16(%esp) + leal 128(%edi),%edi + movq %mm3,24(%esp) + cmpl %eax,%edi + movq %mm5,40(%esp) + cmovbl %edi,%ebx + movq %mm6,48(%esp) + movl $4,%ecx + pxor %mm1,%mm2 + movq %mm7,56(%esp) + pxor %mm3,%mm3 + jmp .L00800_47_ssse3 +.align 32 +.L00800_47_ssse3: + movdqa %xmm5,%xmm3 + movdqa %xmm2,%xmm1 +.byte 102,15,58,15,208,8 + movdqa %xmm4,(%edx) +.byte 102,15,58,15,220,8 + movdqa %xmm2,%xmm4 + psrlq $7,%xmm2 + paddq %xmm3,%xmm0 + movdqa %xmm4,%xmm3 + psrlq $1,%xmm4 + psllq $56,%xmm3 + pxor %xmm4,%xmm2 + psrlq $7,%xmm4 + pxor %xmm3,%xmm2 + psllq $7,%xmm3 + pxor %xmm4,%xmm2 + movdqa %xmm7,%xmm4 + pxor %xmm3,%xmm2 + movdqa %xmm7,%xmm3 + psrlq $6,%xmm4 + paddq %xmm2,%xmm0 + movdqa %xmm7,%xmm2 + psrlq $19,%xmm3 + psllq $3,%xmm2 + pxor %xmm3,%xmm4 + psrlq $42,%xmm3 + pxor %xmm2,%xmm4 + psllq $42,%xmm2 + pxor %xmm3,%xmm4 + movdqa 32(%edx),%xmm3 + pxor %xmm2,%xmm4 + movdqa (%ebp),%xmm2 + movq %mm4,%mm1 + paddq %xmm4,%xmm0 + movq -128(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,32(%esp) + paddq %xmm0,%xmm2 + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 56(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 24(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 8(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 32(%esp),%mm5 + paddq %mm6,%mm2 + movq 40(%esp),%mm6 + movq %mm4,%mm1 + movq -120(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,24(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,56(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 48(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 16(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq (%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 24(%esp),%mm5 + paddq %mm6,%mm0 + movq 32(%esp),%mm6 + movdqa %xmm2,-128(%edx) + movdqa %xmm6,%xmm4 + movdqa %xmm3,%xmm2 +.byte 102,15,58,15,217,8 + movdqa %xmm5,16(%edx) +.byte 102,15,58,15,229,8 + movdqa %xmm3,%xmm5 + psrlq $7,%xmm3 + paddq %xmm4,%xmm1 + movdqa %xmm5,%xmm4 + psrlq $1,%xmm5 + psllq $56,%xmm4 + pxor %xmm5,%xmm3 + psrlq $7,%xmm5 + pxor %xmm4,%xmm3 + psllq $7,%xmm4 + pxor %xmm5,%xmm3 + movdqa %xmm0,%xmm5 + pxor %xmm4,%xmm3 + movdqa %xmm0,%xmm4 + psrlq $6,%xmm5 + paddq %xmm3,%xmm1 + movdqa %xmm0,%xmm3 + psrlq $19,%xmm4 + psllq $3,%xmm3 + pxor %xmm4,%xmm5 + psrlq $42,%xmm4 + pxor %xmm3,%xmm5 + psllq $42,%xmm3 + pxor %xmm4,%xmm5 + movdqa 48(%edx),%xmm4 + pxor %xmm3,%xmm5 + movdqa 16(%ebp),%xmm3 + movq %mm4,%mm1 + paddq %xmm5,%xmm1 + movq -112(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,16(%esp) + paddq %xmm1,%xmm3 + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,48(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 40(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 8(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 56(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 16(%esp),%mm5 + paddq %mm6,%mm2 + movq 24(%esp),%mm6 + movq %mm4,%mm1 + movq -104(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,8(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,40(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 32(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq (%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 48(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 8(%esp),%mm5 + paddq %mm6,%mm0 + movq 16(%esp),%mm6 + movdqa %xmm3,-112(%edx) + movdqa %xmm7,%xmm5 + movdqa %xmm4,%xmm3 +.byte 102,15,58,15,226,8 + movdqa %xmm6,32(%edx) +.byte 102,15,58,15,238,8 + movdqa %xmm4,%xmm6 + psrlq $7,%xmm4 + paddq %xmm5,%xmm2 + movdqa %xmm6,%xmm5 + psrlq $1,%xmm6 + psllq $56,%xmm5 + pxor %xmm6,%xmm4 + psrlq $7,%xmm6 + pxor %xmm5,%xmm4 + psllq $7,%xmm5 + pxor %xmm6,%xmm4 + movdqa %xmm1,%xmm6 + pxor %xmm5,%xmm4 + movdqa %xmm1,%xmm5 + psrlq $6,%xmm6 + paddq %xmm4,%xmm2 + movdqa %xmm1,%xmm4 + psrlq $19,%xmm5 + psllq $3,%xmm4 + pxor %xmm5,%xmm6 + psrlq $42,%xmm5 + pxor %xmm4,%xmm6 + psllq $42,%xmm4 + pxor %xmm5,%xmm6 + movdqa (%edx),%xmm5 + pxor %xmm4,%xmm6 + movdqa 32(%ebp),%xmm4 + movq %mm4,%mm1 + paddq %xmm6,%xmm2 + movq -96(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,(%esp) + paddq %xmm2,%xmm4 + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,32(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 24(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 56(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 40(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq (%esp),%mm5 + paddq %mm6,%mm2 + movq 8(%esp),%mm6 + movq %mm4,%mm1 + movq -88(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,56(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,24(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 16(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 48(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 32(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 56(%esp),%mm5 + paddq %mm6,%mm0 + movq (%esp),%mm6 + movdqa %xmm4,-96(%edx) + movdqa %xmm0,%xmm6 + movdqa %xmm5,%xmm4 +.byte 102,15,58,15,235,8 + movdqa %xmm7,48(%edx) +.byte 102,15,58,15,247,8 + movdqa %xmm5,%xmm7 + psrlq $7,%xmm5 + paddq %xmm6,%xmm3 + movdqa %xmm7,%xmm6 + psrlq $1,%xmm7 + psllq $56,%xmm6 + pxor %xmm7,%xmm5 + psrlq $7,%xmm7 + pxor %xmm6,%xmm5 + psllq $7,%xmm6 + pxor %xmm7,%xmm5 + movdqa %xmm2,%xmm7 + pxor %xmm6,%xmm5 + movdqa %xmm2,%xmm6 + psrlq $6,%xmm7 + paddq %xmm5,%xmm3 + movdqa %xmm2,%xmm5 + psrlq $19,%xmm6 + psllq $3,%xmm5 + pxor %xmm6,%xmm7 + psrlq $42,%xmm6 + pxor %xmm5,%xmm7 + psllq $42,%xmm5 + pxor %xmm6,%xmm7 + movdqa 16(%edx),%xmm6 + pxor %xmm5,%xmm7 + movdqa 48(%ebp),%xmm5 + movq %mm4,%mm1 + paddq %xmm7,%xmm3 + movq -80(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,48(%esp) + paddq %xmm3,%xmm5 + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,16(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 8(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 40(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 24(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 48(%esp),%mm5 + paddq %mm6,%mm2 + movq 56(%esp),%mm6 + movq %mm4,%mm1 + movq -72(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,40(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,8(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq (%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 32(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 16(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 40(%esp),%mm5 + paddq %mm6,%mm0 + movq 48(%esp),%mm6 + movdqa %xmm5,-80(%edx) + movdqa %xmm1,%xmm7 + movdqa %xmm6,%xmm5 +.byte 102,15,58,15,244,8 + movdqa %xmm0,(%edx) +.byte 102,15,58,15,248,8 + movdqa %xmm6,%xmm0 + psrlq $7,%xmm6 + paddq %xmm7,%xmm4 + movdqa %xmm0,%xmm7 + psrlq $1,%xmm0 + psllq $56,%xmm7 + pxor %xmm0,%xmm6 + psrlq $7,%xmm0 + pxor %xmm7,%xmm6 + psllq $7,%xmm7 + pxor %xmm0,%xmm6 + movdqa %xmm3,%xmm0 + pxor %xmm7,%xmm6 + movdqa %xmm3,%xmm7 + psrlq $6,%xmm0 + paddq %xmm6,%xmm4 + movdqa %xmm3,%xmm6 + psrlq $19,%xmm7 + psllq $3,%xmm6 + pxor %xmm7,%xmm0 + psrlq $42,%xmm7 + pxor %xmm6,%xmm0 + psllq $42,%xmm6 + pxor %xmm7,%xmm0 + movdqa 32(%edx),%xmm7 + pxor %xmm6,%xmm0 + movdqa 64(%ebp),%xmm6 + movq %mm4,%mm1 + paddq %xmm0,%xmm4 + movq -64(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,32(%esp) + paddq %xmm4,%xmm6 + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 56(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 24(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 8(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 32(%esp),%mm5 + paddq %mm6,%mm2 + movq 40(%esp),%mm6 + movq %mm4,%mm1 + movq -56(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,24(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,56(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 48(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 16(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq (%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 24(%esp),%mm5 + paddq %mm6,%mm0 + movq 32(%esp),%mm6 + movdqa %xmm6,-64(%edx) + movdqa %xmm2,%xmm0 + movdqa %xmm7,%xmm6 +.byte 102,15,58,15,253,8 + movdqa %xmm1,16(%edx) +.byte 102,15,58,15,193,8 + movdqa %xmm7,%xmm1 + psrlq $7,%xmm7 + paddq %xmm0,%xmm5 + movdqa %xmm1,%xmm0 + psrlq $1,%xmm1 + psllq $56,%xmm0 + pxor %xmm1,%xmm7 + psrlq $7,%xmm1 + pxor %xmm0,%xmm7 + psllq $7,%xmm0 + pxor %xmm1,%xmm7 + movdqa %xmm4,%xmm1 + pxor %xmm0,%xmm7 + movdqa %xmm4,%xmm0 + psrlq $6,%xmm1 + paddq %xmm7,%xmm5 + movdqa %xmm4,%xmm7 + psrlq $19,%xmm0 + psllq $3,%xmm7 + pxor %xmm0,%xmm1 + psrlq $42,%xmm0 + pxor %xmm7,%xmm1 + psllq $42,%xmm7 + pxor %xmm0,%xmm1 + movdqa 48(%edx),%xmm0 + pxor %xmm7,%xmm1 + movdqa 80(%ebp),%xmm7 + movq %mm4,%mm1 + paddq %xmm1,%xmm5 + movq -48(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,16(%esp) + paddq %xmm5,%xmm7 + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,48(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 40(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 8(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 56(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 16(%esp),%mm5 + paddq %mm6,%mm2 + movq 24(%esp),%mm6 + movq %mm4,%mm1 + movq -40(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,8(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,40(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 32(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq (%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 48(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 8(%esp),%mm5 + paddq %mm6,%mm0 + movq 16(%esp),%mm6 + movdqa %xmm7,-48(%edx) + movdqa %xmm3,%xmm1 + movdqa %xmm0,%xmm7 +.byte 102,15,58,15,198,8 + movdqa %xmm2,32(%edx) +.byte 102,15,58,15,202,8 + movdqa %xmm0,%xmm2 + psrlq $7,%xmm0 + paddq %xmm1,%xmm6 + movdqa %xmm2,%xmm1 + psrlq $1,%xmm2 + psllq $56,%xmm1 + pxor %xmm2,%xmm0 + psrlq $7,%xmm2 + pxor %xmm1,%xmm0 + psllq $7,%xmm1 + pxor %xmm2,%xmm0 + movdqa %xmm5,%xmm2 + pxor %xmm1,%xmm0 + movdqa %xmm5,%xmm1 + psrlq $6,%xmm2 + paddq %xmm0,%xmm6 + movdqa %xmm5,%xmm0 + psrlq $19,%xmm1 + psllq $3,%xmm0 + pxor %xmm1,%xmm2 + psrlq $42,%xmm1 + pxor %xmm0,%xmm2 + psllq $42,%xmm0 + pxor %xmm1,%xmm2 + movdqa (%edx),%xmm1 + pxor %xmm0,%xmm2 + movdqa 96(%ebp),%xmm0 + movq %mm4,%mm1 + paddq %xmm2,%xmm6 + movq -32(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,(%esp) + paddq %xmm6,%xmm0 + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,32(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 24(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 56(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 40(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq (%esp),%mm5 + paddq %mm6,%mm2 + movq 8(%esp),%mm6 + movq %mm4,%mm1 + movq -24(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,56(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,24(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 16(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 48(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 32(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 56(%esp),%mm5 + paddq %mm6,%mm0 + movq (%esp),%mm6 + movdqa %xmm0,-32(%edx) + movdqa %xmm4,%xmm2 + movdqa %xmm1,%xmm0 +.byte 102,15,58,15,207,8 + movdqa %xmm3,48(%edx) +.byte 102,15,58,15,211,8 + movdqa %xmm1,%xmm3 + psrlq $7,%xmm1 + paddq %xmm2,%xmm7 + movdqa %xmm3,%xmm2 + psrlq $1,%xmm3 + psllq $56,%xmm2 + pxor %xmm3,%xmm1 + psrlq $7,%xmm3 + pxor %xmm2,%xmm1 + psllq $7,%xmm2 + pxor %xmm3,%xmm1 + movdqa %xmm6,%xmm3 + pxor %xmm2,%xmm1 + movdqa %xmm6,%xmm2 + psrlq $6,%xmm3 + paddq %xmm1,%xmm7 + movdqa %xmm6,%xmm1 + psrlq $19,%xmm2 + psllq $3,%xmm1 + pxor %xmm2,%xmm3 + psrlq $42,%xmm2 + pxor %xmm1,%xmm3 + psllq $42,%xmm1 + pxor %xmm2,%xmm3 + movdqa 16(%edx),%xmm2 + pxor %xmm1,%xmm3 + movdqa 112(%ebp),%xmm1 + movq %mm4,%mm1 + paddq %xmm3,%xmm7 + movq -16(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,48(%esp) + paddq %xmm7,%xmm1 + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,16(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 8(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 40(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 24(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 48(%esp),%mm5 + paddq %mm6,%mm2 + movq 56(%esp),%mm6 + movq %mm4,%mm1 + movq -8(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,40(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,8(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq (%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 32(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 16(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 40(%esp),%mm5 + paddq %mm6,%mm0 + movq 48(%esp),%mm6 + movdqa %xmm1,-16(%edx) + leal 128(%ebp),%ebp + decl %ecx + jnz .L00800_47_ssse3 + movdqa (%ebp),%xmm1 + leal -640(%ebp),%ebp + movdqu (%ebx),%xmm0 +.byte 102,15,56,0,193 + movdqa (%ebp),%xmm3 + movdqa %xmm1,%xmm2 + movdqu 16(%ebx),%xmm1 + paddq %xmm0,%xmm3 +.byte 102,15,56,0,202 + movq %mm4,%mm1 + movq -128(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,32(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 56(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 24(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 8(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 32(%esp),%mm5 + paddq %mm6,%mm2 + movq 40(%esp),%mm6 + movq %mm4,%mm1 + movq -120(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,24(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,56(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 48(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 16(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq (%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 24(%esp),%mm5 + paddq %mm6,%mm0 + movq 32(%esp),%mm6 + movdqa %xmm3,-128(%edx) + movdqa 16(%ebp),%xmm4 + movdqa %xmm2,%xmm3 + movdqu 32(%ebx),%xmm2 + paddq %xmm1,%xmm4 +.byte 102,15,56,0,211 + movq %mm4,%mm1 + movq -112(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,16(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,48(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 40(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 8(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 56(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 16(%esp),%mm5 + paddq %mm6,%mm2 + movq 24(%esp),%mm6 + movq %mm4,%mm1 + movq -104(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,8(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,40(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 32(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq (%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 48(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 8(%esp),%mm5 + paddq %mm6,%mm0 + movq 16(%esp),%mm6 + movdqa %xmm4,-112(%edx) + movdqa 32(%ebp),%xmm5 + movdqa %xmm3,%xmm4 + movdqu 48(%ebx),%xmm3 + paddq %xmm2,%xmm5 +.byte 102,15,56,0,220 + movq %mm4,%mm1 + movq -96(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,32(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 24(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 56(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 40(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq (%esp),%mm5 + paddq %mm6,%mm2 + movq 8(%esp),%mm6 + movq %mm4,%mm1 + movq -88(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,56(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,24(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 16(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 48(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 32(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 56(%esp),%mm5 + paddq %mm6,%mm0 + movq (%esp),%mm6 + movdqa %xmm5,-96(%edx) + movdqa 48(%ebp),%xmm6 + movdqa %xmm4,%xmm5 + movdqu 64(%ebx),%xmm4 + paddq %xmm3,%xmm6 +.byte 102,15,56,0,229 + movq %mm4,%mm1 + movq -80(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,48(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,16(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 8(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 40(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 24(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 48(%esp),%mm5 + paddq %mm6,%mm2 + movq 56(%esp),%mm6 + movq %mm4,%mm1 + movq -72(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,40(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,8(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq (%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 32(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 16(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 40(%esp),%mm5 + paddq %mm6,%mm0 + movq 48(%esp),%mm6 + movdqa %xmm6,-80(%edx) + movdqa 64(%ebp),%xmm7 + movdqa %xmm5,%xmm6 + movdqu 80(%ebx),%xmm5 + paddq %xmm4,%xmm7 +.byte 102,15,56,0,238 + movq %mm4,%mm1 + movq -64(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,32(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 56(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 24(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 8(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 32(%esp),%mm5 + paddq %mm6,%mm2 + movq 40(%esp),%mm6 + movq %mm4,%mm1 + movq -56(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,24(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,56(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 48(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 16(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq (%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 24(%esp),%mm5 + paddq %mm6,%mm0 + movq 32(%esp),%mm6 + movdqa %xmm7,-64(%edx) + movdqa %xmm0,(%edx) + movdqa 80(%ebp),%xmm0 + movdqa %xmm6,%xmm7 + movdqu 96(%ebx),%xmm6 + paddq %xmm5,%xmm0 +.byte 102,15,56,0,247 + movq %mm4,%mm1 + movq -48(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,16(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,48(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 40(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 8(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 56(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 16(%esp),%mm5 + paddq %mm6,%mm2 + movq 24(%esp),%mm6 + movq %mm4,%mm1 + movq -40(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,8(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,40(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 32(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq (%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 48(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 8(%esp),%mm5 + paddq %mm6,%mm0 + movq 16(%esp),%mm6 + movdqa %xmm0,-48(%edx) + movdqa %xmm1,16(%edx) + movdqa 96(%ebp),%xmm1 + movdqa %xmm7,%xmm0 + movdqu 112(%ebx),%xmm7 + paddq %xmm6,%xmm1 +.byte 102,15,56,0,248 + movq %mm4,%mm1 + movq -32(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,32(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 24(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 56(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 40(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq (%esp),%mm5 + paddq %mm6,%mm2 + movq 8(%esp),%mm6 + movq %mm4,%mm1 + movq -24(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,56(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,24(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 16(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 48(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 32(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 56(%esp),%mm5 + paddq %mm6,%mm0 + movq (%esp),%mm6 + movdqa %xmm1,-32(%edx) + movdqa %xmm2,32(%edx) + movdqa 112(%ebp),%xmm2 + movdqa (%edx),%xmm0 + paddq %xmm7,%xmm2 + movq %mm4,%mm1 + movq -16(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,48(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,16(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 8(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 40(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 24(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 48(%esp),%mm5 + paddq %mm6,%mm2 + movq 56(%esp),%mm6 + movq %mm4,%mm1 + movq -8(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,40(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,8(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq (%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 32(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 16(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 40(%esp),%mm5 + paddq %mm6,%mm0 + movq 48(%esp),%mm6 + movdqa %xmm2,-16(%edx) + movq 8(%esp),%mm1 + paddq %mm3,%mm0 + movq 24(%esp),%mm3 + movq 56(%esp),%mm7 + pxor %mm1,%mm2 + paddq (%esi),%mm0 + paddq 8(%esi),%mm1 + paddq 16(%esi),%mm2 + paddq 24(%esi),%mm3 + paddq 32(%esi),%mm4 + paddq 40(%esi),%mm5 + paddq 48(%esi),%mm6 + paddq 56(%esi),%mm7 + movq %mm0,(%esi) + movq %mm1,8(%esi) + movq %mm2,16(%esi) + movq %mm3,24(%esi) + movq %mm4,32(%esi) + movq %mm5,40(%esi) + movq %mm6,48(%esi) + movq %mm7,56(%esi) + cmpl %eax,%edi + jb .L007loop_ssse3 + movl 76(%edx),%esp + emms + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 16 +.L002loop_x86: + movl (%edi),%eax + movl 4(%edi),%ebx + movl 8(%edi),%ecx + movl 12(%edi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + movl 16(%edi),%eax + movl 20(%edi),%ebx + movl 24(%edi),%ecx + movl 28(%edi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + movl 32(%edi),%eax + movl 36(%edi),%ebx + movl 40(%edi),%ecx + movl 44(%edi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + movl 48(%edi),%eax + movl 52(%edi),%ebx + movl 56(%edi),%ecx + movl 60(%edi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + movl 64(%edi),%eax + movl 68(%edi),%ebx + movl 72(%edi),%ecx + movl 76(%edi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + movl 80(%edi),%eax + movl 84(%edi),%ebx + movl 88(%edi),%ecx + movl 92(%edi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + movl 96(%edi),%eax + movl 100(%edi),%ebx + movl 104(%edi),%ecx + movl 108(%edi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + movl 112(%edi),%eax + movl 116(%edi),%ebx + movl 120(%edi),%ecx + movl 124(%edi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + addl $128,%edi + subl $72,%esp + movl %edi,204(%esp) + leal 8(%esp),%edi + movl $16,%ecx +.long 2784229001 +.align 16 +.L00900_15_x86: + movl 40(%esp),%ecx + movl 44(%esp),%edx + movl %ecx,%esi + shrl $9,%ecx + movl %edx,%edi + shrl $9,%edx + movl %ecx,%ebx + shll $14,%esi + movl %edx,%eax + shll $14,%edi + xorl %esi,%ebx + shrl $5,%ecx + xorl %edi,%eax + shrl $5,%edx + xorl %ecx,%eax + shll $4,%esi + xorl %edx,%ebx + shll $4,%edi + xorl %esi,%ebx + shrl $4,%ecx + xorl %edi,%eax + shrl $4,%edx + xorl %ecx,%eax + shll $5,%esi + xorl %edx,%ebx + shll $5,%edi + xorl %esi,%eax + xorl %edi,%ebx + movl 48(%esp),%ecx + movl 52(%esp),%edx + movl 56(%esp),%esi + movl 60(%esp),%edi + addl 64(%esp),%eax + adcl 68(%esp),%ebx + xorl %esi,%ecx + xorl %edi,%edx + andl 40(%esp),%ecx + andl 44(%esp),%edx + addl 192(%esp),%eax + adcl 196(%esp),%ebx + xorl %esi,%ecx + xorl %edi,%edx + movl (%ebp),%esi + movl 4(%ebp),%edi + addl %ecx,%eax + adcl %edx,%ebx + movl 32(%esp),%ecx + movl 36(%esp),%edx + addl %esi,%eax + adcl %edi,%ebx + movl %eax,(%esp) + movl %ebx,4(%esp) + addl %ecx,%eax + adcl %edx,%ebx + movl 8(%esp),%ecx + movl 12(%esp),%edx + movl %eax,32(%esp) + movl %ebx,36(%esp) + movl %ecx,%esi + shrl $2,%ecx + movl %edx,%edi + shrl $2,%edx + movl %ecx,%ebx + shll $4,%esi + movl %edx,%eax + shll $4,%edi + xorl %esi,%ebx + shrl $5,%ecx + xorl %edi,%eax + shrl $5,%edx + xorl %ecx,%ebx + shll $21,%esi + xorl %edx,%eax + shll $21,%edi + xorl %esi,%eax + shrl $21,%ecx + xorl %edi,%ebx + shrl $21,%edx + xorl %ecx,%eax + shll $5,%esi + xorl %edx,%ebx + shll $5,%edi + xorl %esi,%eax + xorl %edi,%ebx + movl 8(%esp),%ecx + movl 12(%esp),%edx + movl 16(%esp),%esi + movl 20(%esp),%edi + addl (%esp),%eax + adcl 4(%esp),%ebx + orl %esi,%ecx + orl %edi,%edx + andl 24(%esp),%ecx + andl 28(%esp),%edx + andl 8(%esp),%esi + andl 12(%esp),%edi + orl %esi,%ecx + orl %edi,%edx + addl %ecx,%eax + adcl %edx,%ebx + movl %eax,(%esp) + movl %ebx,4(%esp) + movb (%ebp),%dl + subl $8,%esp + leal 8(%ebp),%ebp + cmpb $148,%dl + jne .L00900_15_x86 +.align 16 +.L01016_79_x86: + movl 312(%esp),%ecx + movl 316(%esp),%edx + movl %ecx,%esi + shrl $1,%ecx + movl %edx,%edi + shrl $1,%edx + movl %ecx,%eax + shll $24,%esi + movl %edx,%ebx + shll $24,%edi + xorl %esi,%ebx + shrl $6,%ecx + xorl %edi,%eax + shrl $6,%edx + xorl %ecx,%eax + shll $7,%esi + xorl %edx,%ebx + shll $1,%edi + xorl %esi,%ebx + shrl $1,%ecx + xorl %edi,%eax + shrl $1,%edx + xorl %ecx,%eax + shll $6,%edi + xorl %edx,%ebx + xorl %edi,%eax + movl %eax,(%esp) + movl %ebx,4(%esp) + movl 208(%esp),%ecx + movl 212(%esp),%edx + movl %ecx,%esi + shrl $6,%ecx + movl %edx,%edi + shrl $6,%edx + movl %ecx,%eax + shll $3,%esi + movl %edx,%ebx + shll $3,%edi + xorl %esi,%eax + shrl $13,%ecx + xorl %edi,%ebx + shrl $13,%edx + xorl %ecx,%eax + shll $10,%esi + xorl %edx,%ebx + shll $10,%edi + xorl %esi,%ebx + shrl $10,%ecx + xorl %edi,%eax + shrl $10,%edx + xorl %ecx,%ebx + shll $13,%edi + xorl %edx,%eax + xorl %edi,%eax + movl 320(%esp),%ecx + movl 324(%esp),%edx + addl (%esp),%eax + adcl 4(%esp),%ebx + movl 248(%esp),%esi + movl 252(%esp),%edi + addl %ecx,%eax + adcl %edx,%ebx + addl %esi,%eax + adcl %edi,%ebx + movl %eax,192(%esp) + movl %ebx,196(%esp) + movl 40(%esp),%ecx + movl 44(%esp),%edx + movl %ecx,%esi + shrl $9,%ecx + movl %edx,%edi + shrl $9,%edx + movl %ecx,%ebx + shll $14,%esi + movl %edx,%eax + shll $14,%edi + xorl %esi,%ebx + shrl $5,%ecx + xorl %edi,%eax + shrl $5,%edx + xorl %ecx,%eax + shll $4,%esi + xorl %edx,%ebx + shll $4,%edi + xorl %esi,%ebx + shrl $4,%ecx + xorl %edi,%eax + shrl $4,%edx + xorl %ecx,%eax + shll $5,%esi + xorl %edx,%ebx + shll $5,%edi + xorl %esi,%eax + xorl %edi,%ebx + movl 48(%esp),%ecx + movl 52(%esp),%edx + movl 56(%esp),%esi + movl 60(%esp),%edi + addl 64(%esp),%eax + adcl 68(%esp),%ebx + xorl %esi,%ecx + xorl %edi,%edx + andl 40(%esp),%ecx + andl 44(%esp),%edx + addl 192(%esp),%eax + adcl 196(%esp),%ebx + xorl %esi,%ecx + xorl %edi,%edx + movl (%ebp),%esi + movl 4(%ebp),%edi + addl %ecx,%eax + adcl %edx,%ebx + movl 32(%esp),%ecx + movl 36(%esp),%edx + addl %esi,%eax + adcl %edi,%ebx + movl %eax,(%esp) + movl %ebx,4(%esp) + addl %ecx,%eax + adcl %edx,%ebx + movl 8(%esp),%ecx + movl 12(%esp),%edx + movl %eax,32(%esp) + movl %ebx,36(%esp) + movl %ecx,%esi + shrl $2,%ecx + movl %edx,%edi + shrl $2,%edx + movl %ecx,%ebx + shll $4,%esi + movl %edx,%eax + shll $4,%edi + xorl %esi,%ebx + shrl $5,%ecx + xorl %edi,%eax + shrl $5,%edx + xorl %ecx,%ebx + shll $21,%esi + xorl %edx,%eax + shll $21,%edi + xorl %esi,%eax + shrl $21,%ecx + xorl %edi,%ebx + shrl $21,%edx + xorl %ecx,%eax + shll $5,%esi + xorl %edx,%ebx + shll $5,%edi + xorl %esi,%eax + xorl %edi,%ebx + movl 8(%esp),%ecx + movl 12(%esp),%edx + movl 16(%esp),%esi + movl 20(%esp),%edi + addl (%esp),%eax + adcl 4(%esp),%ebx + orl %esi,%ecx + orl %edi,%edx + andl 24(%esp),%ecx + andl 28(%esp),%edx + andl 8(%esp),%esi + andl 12(%esp),%edi + orl %esi,%ecx + orl %edi,%edx + addl %ecx,%eax + adcl %edx,%ebx + movl %eax,(%esp) + movl %ebx,4(%esp) + movb (%ebp),%dl + subl $8,%esp + leal 8(%ebp),%ebp + cmpb $23,%dl + jne .L01016_79_x86 + movl 840(%esp),%esi + movl 844(%esp),%edi + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + addl 8(%esp),%eax + adcl 12(%esp),%ebx + movl %eax,(%esi) + movl %ebx,4(%esi) + addl 16(%esp),%ecx + adcl 20(%esp),%edx + movl %ecx,8(%esi) + movl %edx,12(%esi) + movl 16(%esi),%eax + movl 20(%esi),%ebx + movl 24(%esi),%ecx + movl 28(%esi),%edx + addl 24(%esp),%eax + adcl 28(%esp),%ebx + movl %eax,16(%esi) + movl %ebx,20(%esi) + addl 32(%esp),%ecx + adcl 36(%esp),%edx + movl %ecx,24(%esi) + movl %edx,28(%esi) + movl 32(%esi),%eax + movl 36(%esi),%ebx + movl 40(%esi),%ecx + movl 44(%esi),%edx + addl 40(%esp),%eax + adcl 44(%esp),%ebx + movl %eax,32(%esi) + movl %ebx,36(%esi) + addl 48(%esp),%ecx + adcl 52(%esp),%edx + movl %ecx,40(%esi) + movl %edx,44(%esi) + movl 48(%esi),%eax + movl 52(%esi),%ebx + movl 56(%esi),%ecx + movl 60(%esi),%edx + addl 56(%esp),%eax + adcl 60(%esp),%ebx + movl %eax,48(%esi) + movl %ebx,52(%esi) + addl 64(%esp),%ecx + adcl 68(%esp),%edx + movl %ecx,56(%esi) + movl %edx,60(%esi) + addl $840,%esp + subl $640,%ebp + cmpl 8(%esp),%edi + jb .L002loop_x86 + movl 12(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 64 +.L001K512: +.long 3609767458,1116352408 +.long 602891725,1899447441 +.long 3964484399,3049323471 +.long 2173295548,3921009573 +.long 4081628472,961987163 +.long 3053834265,1508970993 +.long 2937671579,2453635748 +.long 3664609560,2870763221 +.long 2734883394,3624381080 +.long 1164996542,310598401 +.long 1323610764,607225278 +.long 3590304994,1426881987 +.long 4068182383,1925078388 +.long 991336113,2162078206 +.long 633803317,2614888103 +.long 3479774868,3248222580 +.long 2666613458,3835390401 +.long 944711139,4022224774 +.long 2341262773,264347078 +.long 2007800933,604807628 +.long 1495990901,770255983 +.long 1856431235,1249150122 +.long 3175218132,1555081692 +.long 2198950837,1996064986 +.long 3999719339,2554220882 +.long 766784016,2821834349 +.long 2566594879,2952996808 +.long 3203337956,3210313671 +.long 1034457026,3336571891 +.long 2466948901,3584528711 +.long 3758326383,113926993 +.long 168717936,338241895 +.long 1188179964,666307205 +.long 1546045734,773529912 +.long 1522805485,1294757372 +.long 2643833823,1396182291 +.long 2343527390,1695183700 +.long 1014477480,1986661051 +.long 1206759142,2177026350 +.long 344077627,2456956037 +.long 1290863460,2730485921 +.long 3158454273,2820302411 +.long 3505952657,3259730800 +.long 106217008,3345764771 +.long 3606008344,3516065817 +.long 1432725776,3600352804 +.long 1467031594,4094571909 +.long 851169720,275423344 +.long 3100823752,430227734 +.long 1363258195,506948616 +.long 3750685593,659060556 +.long 3785050280,883997877 +.long 3318307427,958139571 +.long 3812723403,1322822218 +.long 2003034995,1537002063 +.long 3602036899,1747873779 +.long 1575990012,1955562222 +.long 1125592928,2024104815 +.long 2716904306,2227730452 +.long 442776044,2361852424 +.long 593698344,2428436474 +.long 3733110249,2756734187 +.long 2999351573,3204031479 +.long 3815920427,3329325298 +.long 3928383900,3391569614 +.long 566280711,3515267271 +.long 3454069534,3940187606 +.long 4000239992,4118630271 +.long 1914138554,116418474 +.long 2731055270,174292421 +.long 3203993006,289380356 +.long 320620315,460393269 +.long 587496836,685471733 +.long 1086792851,852142971 +.long 365543100,1017036298 +.long 2618297676,1126000580 +.long 3409855158,1288033470 +.long 4234509866,1501505948 +.long 987167468,1607167915 +.long 1246189591,1816402316 +.long 67438087,66051 +.long 202182159,134810123 +.size sha512_block_data_order,.-.L_sha512_block_data_order_begin +.byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97 +.byte 110,115,102,111,114,109,32,102,111,114,32,120,56,54,44,32 +.byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 +.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 +.byte 62,0 +#endif diff --git a/third_party/boringssl/linux-x86_64/crypto/aes/aes-x86_64.S b/third_party/boringssl/linux-x86_64/crypto/aes/aes-x86_64.S new file mode 100644 index 00000000000..5f4b057ffad --- /dev/null +++ b/third_party/boringssl/linux-x86_64/crypto/aes/aes-x86_64.S @@ -0,0 +1,2536 @@ +#if defined(__x86_64__) +.text +.type _x86_64_AES_encrypt,@function +.align 16 +_x86_64_AES_encrypt: + xorl 0(%r15),%eax + xorl 4(%r15),%ebx + xorl 8(%r15),%ecx + xorl 12(%r15),%edx + + movl 240(%r15),%r13d + subl $1,%r13d + jmp .Lenc_loop +.align 16 +.Lenc_loop: + + movzbl %al,%esi + movzbl %bl,%edi + movzbl %cl,%ebp + movl 0(%r14,%rsi,8),%r10d + movl 0(%r14,%rdi,8),%r11d + movl 0(%r14,%rbp,8),%r12d + + movzbl %bh,%esi + movzbl %ch,%edi + movzbl %dl,%ebp + xorl 3(%r14,%rsi,8),%r10d + xorl 3(%r14,%rdi,8),%r11d + movl 0(%r14,%rbp,8),%r8d + + movzbl %dh,%esi + shrl $16,%ecx + movzbl %ah,%ebp + xorl 3(%r14,%rsi,8),%r12d + shrl $16,%edx + xorl 3(%r14,%rbp,8),%r8d + + shrl $16,%ebx + leaq 16(%r15),%r15 + shrl $16,%eax + + movzbl %cl,%esi + movzbl %dl,%edi + movzbl %al,%ebp + xorl 2(%r14,%rsi,8),%r10d + xorl 2(%r14,%rdi,8),%r11d + xorl 2(%r14,%rbp,8),%r12d + + movzbl %dh,%esi + movzbl %ah,%edi + movzbl %bl,%ebp + xorl 1(%r14,%rsi,8),%r10d + xorl 1(%r14,%rdi,8),%r11d + xorl 2(%r14,%rbp,8),%r8d + + movl 12(%r15),%edx + movzbl %bh,%edi + movzbl %ch,%ebp + movl 0(%r15),%eax + xorl 1(%r14,%rdi,8),%r12d + xorl 1(%r14,%rbp,8),%r8d + + movl 4(%r15),%ebx + movl 8(%r15),%ecx + xorl %r10d,%eax + xorl %r11d,%ebx + xorl %r12d,%ecx + xorl %r8d,%edx + subl $1,%r13d + jnz .Lenc_loop + movzbl %al,%esi + movzbl %bl,%edi + movzbl %cl,%ebp + movzbl 2(%r14,%rsi,8),%r10d + movzbl 2(%r14,%rdi,8),%r11d + movzbl 2(%r14,%rbp,8),%r12d + + movzbl %dl,%esi + movzbl %bh,%edi + movzbl %ch,%ebp + movzbl 2(%r14,%rsi,8),%r8d + movl 0(%r14,%rdi,8),%edi + movl 0(%r14,%rbp,8),%ebp + + andl $65280,%edi + andl $65280,%ebp + + xorl %edi,%r10d + xorl %ebp,%r11d + shrl $16,%ecx + + movzbl %dh,%esi + movzbl %ah,%edi + shrl $16,%edx + movl 0(%r14,%rsi,8),%esi + movl 0(%r14,%rdi,8),%edi + + andl $65280,%esi + andl $65280,%edi + shrl $16,%ebx + xorl %esi,%r12d + xorl %edi,%r8d + shrl $16,%eax + + movzbl %cl,%esi + movzbl %dl,%edi + movzbl %al,%ebp + movl 0(%r14,%rsi,8),%esi + movl 0(%r14,%rdi,8),%edi + movl 0(%r14,%rbp,8),%ebp + + andl $16711680,%esi + andl $16711680,%edi + andl $16711680,%ebp + + xorl %esi,%r10d + xorl %edi,%r11d + xorl %ebp,%r12d + + movzbl %bl,%esi + movzbl %dh,%edi + movzbl %ah,%ebp + movl 0(%r14,%rsi,8),%esi + movl 2(%r14,%rdi,8),%edi + movl 2(%r14,%rbp,8),%ebp + + andl $16711680,%esi + andl $4278190080,%edi + andl $4278190080,%ebp + + xorl %esi,%r8d + xorl %edi,%r10d + xorl %ebp,%r11d + + movzbl %bh,%esi + movzbl %ch,%edi + movl 16+12(%r15),%edx + movl 2(%r14,%rsi,8),%esi + movl 2(%r14,%rdi,8),%edi + movl 16+0(%r15),%eax + + andl $4278190080,%esi + andl $4278190080,%edi + + xorl %esi,%r12d + xorl %edi,%r8d + + movl 16+4(%r15),%ebx + movl 16+8(%r15),%ecx + xorl %r10d,%eax + xorl %r11d,%ebx + xorl %r12d,%ecx + xorl %r8d,%edx +.byte 0xf3,0xc3 +.size _x86_64_AES_encrypt,.-_x86_64_AES_encrypt +.type _x86_64_AES_encrypt_compact,@function +.align 16 +_x86_64_AES_encrypt_compact: + leaq 128(%r14),%r8 + movl 0-128(%r8),%edi + movl 32-128(%r8),%ebp + movl 64-128(%r8),%r10d + movl 96-128(%r8),%r11d + movl 128-128(%r8),%edi + movl 160-128(%r8),%ebp + movl 192-128(%r8),%r10d + movl 224-128(%r8),%r11d + jmp .Lenc_loop_compact +.align 16 +.Lenc_loop_compact: + xorl 0(%r15),%eax + xorl 4(%r15),%ebx + xorl 8(%r15),%ecx + xorl 12(%r15),%edx + leaq 16(%r15),%r15 + movzbl %al,%r10d + movzbl %bl,%r11d + movzbl %cl,%r12d + movzbl %dl,%r8d + movzbl %bh,%esi + movzbl %ch,%edi + shrl $16,%ecx + movzbl %dh,%ebp + movzbl (%r14,%r10,1),%r10d + movzbl (%r14,%r11,1),%r11d + movzbl (%r14,%r12,1),%r12d + movzbl (%r14,%r8,1),%r8d + + movzbl (%r14,%rsi,1),%r9d + movzbl %ah,%esi + movzbl (%r14,%rdi,1),%r13d + movzbl %cl,%edi + movzbl (%r14,%rbp,1),%ebp + movzbl (%r14,%rsi,1),%esi + + shll $8,%r9d + shrl $16,%edx + shll $8,%r13d + xorl %r9d,%r10d + shrl $16,%eax + movzbl %dl,%r9d + shrl $16,%ebx + xorl %r13d,%r11d + shll $8,%ebp + movzbl %al,%r13d + movzbl (%r14,%rdi,1),%edi + xorl %ebp,%r12d + + shll $8,%esi + movzbl %bl,%ebp + shll $16,%edi + xorl %esi,%r8d + movzbl (%r14,%r9,1),%r9d + movzbl %dh,%esi + movzbl (%r14,%r13,1),%r13d + xorl %edi,%r10d + + shrl $8,%ecx + movzbl %ah,%edi + shll $16,%r9d + shrl $8,%ebx + shll $16,%r13d + xorl %r9d,%r11d + movzbl (%r14,%rbp,1),%ebp + movzbl (%r14,%rsi,1),%esi + movzbl (%r14,%rdi,1),%edi + movzbl (%r14,%rcx,1),%edx + movzbl (%r14,%rbx,1),%ecx + + shll $16,%ebp + xorl %r13d,%r12d + shll $24,%esi + xorl %ebp,%r8d + shll $24,%edi + xorl %esi,%r10d + shll $24,%edx + xorl %edi,%r11d + shll $24,%ecx + movl %r10d,%eax + movl %r11d,%ebx + xorl %r12d,%ecx + xorl %r8d,%edx + cmpq 16(%rsp),%r15 + je .Lenc_compact_done + movl $2155905152,%r10d + movl $2155905152,%r11d + andl %eax,%r10d + andl %ebx,%r11d + movl %r10d,%esi + movl %r11d,%edi + shrl $7,%r10d + leal (%rax,%rax,1),%r8d + shrl $7,%r11d + leal (%rbx,%rbx,1),%r9d + subl %r10d,%esi + subl %r11d,%edi + andl $4278124286,%r8d + andl $4278124286,%r9d + andl $454761243,%esi + andl $454761243,%edi + movl %eax,%r10d + movl %ebx,%r11d + xorl %esi,%r8d + xorl %edi,%r9d + + xorl %r8d,%eax + xorl %r9d,%ebx + movl $2155905152,%r12d + roll $24,%eax + movl $2155905152,%ebp + roll $24,%ebx + andl %ecx,%r12d + andl %edx,%ebp + xorl %r8d,%eax + xorl %r9d,%ebx + movl %r12d,%esi + rorl $16,%r10d + movl %ebp,%edi + rorl $16,%r11d + leal (%rcx,%rcx,1),%r8d + shrl $7,%r12d + xorl %r10d,%eax + shrl $7,%ebp + xorl %r11d,%ebx + rorl $8,%r10d + leal (%rdx,%rdx,1),%r9d + rorl $8,%r11d + subl %r12d,%esi + subl %ebp,%edi + xorl %r10d,%eax + xorl %r11d,%ebx + + andl $4278124286,%r8d + andl $4278124286,%r9d + andl $454761243,%esi + andl $454761243,%edi + movl %ecx,%r12d + movl %edx,%ebp + xorl %esi,%r8d + xorl %edi,%r9d + + rorl $16,%r12d + xorl %r8d,%ecx + rorl $16,%ebp + xorl %r9d,%edx + roll $24,%ecx + movl 0(%r14),%esi + roll $24,%edx + xorl %r8d,%ecx + movl 64(%r14),%edi + xorl %r9d,%edx + movl 128(%r14),%r8d + xorl %r12d,%ecx + rorl $8,%r12d + xorl %ebp,%edx + rorl $8,%ebp + xorl %r12d,%ecx + movl 192(%r14),%r9d + xorl %ebp,%edx + jmp .Lenc_loop_compact +.align 16 +.Lenc_compact_done: + xorl 0(%r15),%eax + xorl 4(%r15),%ebx + xorl 8(%r15),%ecx + xorl 12(%r15),%edx +.byte 0xf3,0xc3 +.size _x86_64_AES_encrypt_compact,.-_x86_64_AES_encrypt_compact +.align 16 +.globl asm_AES_encrypt +.hidden asm_AES_encrypt +.type asm_AES_encrypt,@function +.hidden asm_AES_encrypt +asm_AES_encrypt: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + + movq %rsp,%r10 + leaq -63(%rdx),%rcx + andq $-64,%rsp + subq %rsp,%rcx + negq %rcx + andq $960,%rcx + subq %rcx,%rsp + subq $32,%rsp + + movq %rsi,16(%rsp) + movq %r10,24(%rsp) +.Lenc_prologue: + + movq %rdx,%r15 + movl 240(%r15),%r13d + + movl 0(%rdi),%eax + movl 4(%rdi),%ebx + movl 8(%rdi),%ecx + movl 12(%rdi),%edx + + shll $4,%r13d + leaq (%r15,%r13,1),%rbp + movq %r15,(%rsp) + movq %rbp,8(%rsp) + + + leaq .LAES_Te+2048(%rip),%r14 + leaq 768(%rsp),%rbp + subq %r14,%rbp + andq $768,%rbp + leaq (%r14,%rbp,1),%r14 + + call _x86_64_AES_encrypt_compact + + movq 16(%rsp),%r9 + movq 24(%rsp),%rsi + movl %eax,0(%r9) + movl %ebx,4(%r9) + movl %ecx,8(%r9) + movl %edx,12(%r9) + + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +.Lenc_epilogue: + .byte 0xf3,0xc3 +.size asm_AES_encrypt,.-asm_AES_encrypt +.type _x86_64_AES_decrypt,@function +.align 16 +_x86_64_AES_decrypt: + xorl 0(%r15),%eax + xorl 4(%r15),%ebx + xorl 8(%r15),%ecx + xorl 12(%r15),%edx + + movl 240(%r15),%r13d + subl $1,%r13d + jmp .Ldec_loop +.align 16 +.Ldec_loop: + + movzbl %al,%esi + movzbl %bl,%edi + movzbl %cl,%ebp + movl 0(%r14,%rsi,8),%r10d + movl 0(%r14,%rdi,8),%r11d + movl 0(%r14,%rbp,8),%r12d + + movzbl %dh,%esi + movzbl %ah,%edi + movzbl %dl,%ebp + xorl 3(%r14,%rsi,8),%r10d + xorl 3(%r14,%rdi,8),%r11d + movl 0(%r14,%rbp,8),%r8d + + movzbl %bh,%esi + shrl $16,%eax + movzbl %ch,%ebp + xorl 3(%r14,%rsi,8),%r12d + shrl $16,%edx + xorl 3(%r14,%rbp,8),%r8d + + shrl $16,%ebx + leaq 16(%r15),%r15 + shrl $16,%ecx + + movzbl %cl,%esi + movzbl %dl,%edi + movzbl %al,%ebp + xorl 2(%r14,%rsi,8),%r10d + xorl 2(%r14,%rdi,8),%r11d + xorl 2(%r14,%rbp,8),%r12d + + movzbl %bh,%esi + movzbl %ch,%edi + movzbl %bl,%ebp + xorl 1(%r14,%rsi,8),%r10d + xorl 1(%r14,%rdi,8),%r11d + xorl 2(%r14,%rbp,8),%r8d + + movzbl %dh,%esi + movl 12(%r15),%edx + movzbl %ah,%ebp + xorl 1(%r14,%rsi,8),%r12d + movl 0(%r15),%eax + xorl 1(%r14,%rbp,8),%r8d + + xorl %r10d,%eax + movl 4(%r15),%ebx + movl 8(%r15),%ecx + xorl %r12d,%ecx + xorl %r11d,%ebx + xorl %r8d,%edx + subl $1,%r13d + jnz .Ldec_loop + leaq 2048(%r14),%r14 + movzbl %al,%esi + movzbl %bl,%edi + movzbl %cl,%ebp + movzbl (%r14,%rsi,1),%r10d + movzbl (%r14,%rdi,1),%r11d + movzbl (%r14,%rbp,1),%r12d + + movzbl %dl,%esi + movzbl %dh,%edi + movzbl %ah,%ebp + movzbl (%r14,%rsi,1),%r8d + movzbl (%r14,%rdi,1),%edi + movzbl (%r14,%rbp,1),%ebp + + shll $8,%edi + shll $8,%ebp + + xorl %edi,%r10d + xorl %ebp,%r11d + shrl $16,%edx + + movzbl %bh,%esi + movzbl %ch,%edi + shrl $16,%eax + movzbl (%r14,%rsi,1),%esi + movzbl (%r14,%rdi,1),%edi + + shll $8,%esi + shll $8,%edi + shrl $16,%ebx + xorl %esi,%r12d + xorl %edi,%r8d + shrl $16,%ecx + + movzbl %cl,%esi + movzbl %dl,%edi + movzbl %al,%ebp + movzbl (%r14,%rsi,1),%esi + movzbl (%r14,%rdi,1),%edi + movzbl (%r14,%rbp,1),%ebp + + shll $16,%esi + shll $16,%edi + shll $16,%ebp + + xorl %esi,%r10d + xorl %edi,%r11d + xorl %ebp,%r12d + + movzbl %bl,%esi + movzbl %bh,%edi + movzbl %ch,%ebp + movzbl (%r14,%rsi,1),%esi + movzbl (%r14,%rdi,1),%edi + movzbl (%r14,%rbp,1),%ebp + + shll $16,%esi + shll $24,%edi + shll $24,%ebp + + xorl %esi,%r8d + xorl %edi,%r10d + xorl %ebp,%r11d + + movzbl %dh,%esi + movzbl %ah,%edi + movl 16+12(%r15),%edx + movzbl (%r14,%rsi,1),%esi + movzbl (%r14,%rdi,1),%edi + movl 16+0(%r15),%eax + + shll $24,%esi + shll $24,%edi + + xorl %esi,%r12d + xorl %edi,%r8d + + movl 16+4(%r15),%ebx + movl 16+8(%r15),%ecx + leaq -2048(%r14),%r14 + xorl %r10d,%eax + xorl %r11d,%ebx + xorl %r12d,%ecx + xorl %r8d,%edx +.byte 0xf3,0xc3 +.size _x86_64_AES_decrypt,.-_x86_64_AES_decrypt +.type _x86_64_AES_decrypt_compact,@function +.align 16 +_x86_64_AES_decrypt_compact: + leaq 128(%r14),%r8 + movl 0-128(%r8),%edi + movl 32-128(%r8),%ebp + movl 64-128(%r8),%r10d + movl 96-128(%r8),%r11d + movl 128-128(%r8),%edi + movl 160-128(%r8),%ebp + movl 192-128(%r8),%r10d + movl 224-128(%r8),%r11d + jmp .Ldec_loop_compact + +.align 16 +.Ldec_loop_compact: + xorl 0(%r15),%eax + xorl 4(%r15),%ebx + xorl 8(%r15),%ecx + xorl 12(%r15),%edx + leaq 16(%r15),%r15 + movzbl %al,%r10d + movzbl %bl,%r11d + movzbl %cl,%r12d + movzbl %dl,%r8d + movzbl %dh,%esi + movzbl %ah,%edi + shrl $16,%edx + movzbl %bh,%ebp + movzbl (%r14,%r10,1),%r10d + movzbl (%r14,%r11,1),%r11d + movzbl (%r14,%r12,1),%r12d + movzbl (%r14,%r8,1),%r8d + + movzbl (%r14,%rsi,1),%r9d + movzbl %ch,%esi + movzbl (%r14,%rdi,1),%r13d + movzbl (%r14,%rbp,1),%ebp + movzbl (%r14,%rsi,1),%esi + + shrl $16,%ecx + shll $8,%r13d + shll $8,%r9d + movzbl %cl,%edi + shrl $16,%eax + xorl %r9d,%r10d + shrl $16,%ebx + movzbl %dl,%r9d + + shll $8,%ebp + xorl %r13d,%r11d + shll $8,%esi + movzbl %al,%r13d + movzbl (%r14,%rdi,1),%edi + xorl %ebp,%r12d + movzbl %bl,%ebp + + shll $16,%edi + xorl %esi,%r8d + movzbl (%r14,%r9,1),%r9d + movzbl %bh,%esi + movzbl (%r14,%rbp,1),%ebp + xorl %edi,%r10d + movzbl (%r14,%r13,1),%r13d + movzbl %ch,%edi + + shll $16,%ebp + shll $16,%r9d + shll $16,%r13d + xorl %ebp,%r8d + movzbl %dh,%ebp + xorl %r9d,%r11d + shrl $8,%eax + xorl %r13d,%r12d + + movzbl (%r14,%rsi,1),%esi + movzbl (%r14,%rdi,1),%ebx + movzbl (%r14,%rbp,1),%ecx + movzbl (%r14,%rax,1),%edx + + movl %r10d,%eax + shll $24,%esi + shll $24,%ebx + shll $24,%ecx + xorl %esi,%eax + shll $24,%edx + xorl %r11d,%ebx + xorl %r12d,%ecx + xorl %r8d,%edx + cmpq 16(%rsp),%r15 + je .Ldec_compact_done + + movq 256+0(%r14),%rsi + shlq $32,%rbx + shlq $32,%rdx + movq 256+8(%r14),%rdi + orq %rbx,%rax + orq %rdx,%rcx + movq 256+16(%r14),%rbp + movq %rsi,%r9 + movq %rsi,%r12 + andq %rax,%r9 + andq %rcx,%r12 + movq %r9,%rbx + movq %r12,%rdx + shrq $7,%r9 + leaq (%rax,%rax,1),%r8 + shrq $7,%r12 + leaq (%rcx,%rcx,1),%r11 + subq %r9,%rbx + subq %r12,%rdx + andq %rdi,%r8 + andq %rdi,%r11 + andq %rbp,%rbx + andq %rbp,%rdx + xorq %rbx,%r8 + xorq %rdx,%r11 + movq %rsi,%r10 + movq %rsi,%r13 + + andq %r8,%r10 + andq %r11,%r13 + movq %r10,%rbx + movq %r13,%rdx + shrq $7,%r10 + leaq (%r8,%r8,1),%r9 + shrq $7,%r13 + leaq (%r11,%r11,1),%r12 + subq %r10,%rbx + subq %r13,%rdx + andq %rdi,%r9 + andq %rdi,%r12 + andq %rbp,%rbx + andq %rbp,%rdx + xorq %rbx,%r9 + xorq %rdx,%r12 + movq %rsi,%r10 + movq %rsi,%r13 + + andq %r9,%r10 + andq %r12,%r13 + movq %r10,%rbx + movq %r13,%rdx + shrq $7,%r10 + xorq %rax,%r8 + shrq $7,%r13 + xorq %rcx,%r11 + subq %r10,%rbx + subq %r13,%rdx + leaq (%r9,%r9,1),%r10 + leaq (%r12,%r12,1),%r13 + xorq %rax,%r9 + xorq %rcx,%r12 + andq %rdi,%r10 + andq %rdi,%r13 + andq %rbp,%rbx + andq %rbp,%rdx + xorq %rbx,%r10 + xorq %rdx,%r13 + + xorq %r10,%rax + xorq %r13,%rcx + xorq %r10,%r8 + xorq %r13,%r11 + movq %rax,%rbx + movq %rcx,%rdx + xorq %r10,%r9 + shrq $32,%rbx + xorq %r13,%r12 + shrq $32,%rdx + xorq %r8,%r10 + roll $8,%eax + xorq %r11,%r13 + roll $8,%ecx + xorq %r9,%r10 + roll $8,%ebx + xorq %r12,%r13 + + roll $8,%edx + xorl %r10d,%eax + shrq $32,%r10 + xorl %r13d,%ecx + shrq $32,%r13 + xorl %r10d,%ebx + xorl %r13d,%edx + + movq %r8,%r10 + roll $24,%r8d + movq %r11,%r13 + roll $24,%r11d + shrq $32,%r10 + xorl %r8d,%eax + shrq $32,%r13 + xorl %r11d,%ecx + roll $24,%r10d + movq %r9,%r8 + roll $24,%r13d + movq %r12,%r11 + shrq $32,%r8 + xorl %r10d,%ebx + shrq $32,%r11 + xorl %r13d,%edx + + movq 0(%r14),%rsi + roll $16,%r9d + movq 64(%r14),%rdi + roll $16,%r12d + movq 128(%r14),%rbp + roll $16,%r8d + movq 192(%r14),%r10 + xorl %r9d,%eax + roll $16,%r11d + xorl %r12d,%ecx + movq 256(%r14),%r13 + xorl %r8d,%ebx + xorl %r11d,%edx + jmp .Ldec_loop_compact +.align 16 +.Ldec_compact_done: + xorl 0(%r15),%eax + xorl 4(%r15),%ebx + xorl 8(%r15),%ecx + xorl 12(%r15),%edx +.byte 0xf3,0xc3 +.size _x86_64_AES_decrypt_compact,.-_x86_64_AES_decrypt_compact +.align 16 +.globl asm_AES_decrypt +.hidden asm_AES_decrypt +.type asm_AES_decrypt,@function +.hidden asm_AES_decrypt +asm_AES_decrypt: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + + movq %rsp,%r10 + leaq -63(%rdx),%rcx + andq $-64,%rsp + subq %rsp,%rcx + negq %rcx + andq $960,%rcx + subq %rcx,%rsp + subq $32,%rsp + + movq %rsi,16(%rsp) + movq %r10,24(%rsp) +.Ldec_prologue: + + movq %rdx,%r15 + movl 240(%r15),%r13d + + movl 0(%rdi),%eax + movl 4(%rdi),%ebx + movl 8(%rdi),%ecx + movl 12(%rdi),%edx + + shll $4,%r13d + leaq (%r15,%r13,1),%rbp + movq %r15,(%rsp) + movq %rbp,8(%rsp) + + + leaq .LAES_Td+2048(%rip),%r14 + leaq 768(%rsp),%rbp + subq %r14,%rbp + andq $768,%rbp + leaq (%r14,%rbp,1),%r14 + shrq $3,%rbp + addq %rbp,%r14 + + call _x86_64_AES_decrypt_compact + + movq 16(%rsp),%r9 + movq 24(%rsp),%rsi + movl %eax,0(%r9) + movl %ebx,4(%r9) + movl %ecx,8(%r9) + movl %edx,12(%r9) + + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +.Ldec_epilogue: + .byte 0xf3,0xc3 +.size asm_AES_decrypt,.-asm_AES_decrypt +.align 16 +.globl asm_AES_set_encrypt_key +.hidden asm_AES_set_encrypt_key +.type asm_AES_set_encrypt_key,@function +asm_AES_set_encrypt_key: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $8,%rsp +.Lenc_key_prologue: + + call _x86_64_AES_set_encrypt_key + + movq 40(%rsp),%rbp + movq 48(%rsp),%rbx + addq $56,%rsp +.Lenc_key_epilogue: + .byte 0xf3,0xc3 +.size asm_AES_set_encrypt_key,.-asm_AES_set_encrypt_key + +.type _x86_64_AES_set_encrypt_key,@function +.align 16 +_x86_64_AES_set_encrypt_key: + movl %esi,%ecx + movq %rdi,%rsi + movq %rdx,%rdi + + testq $-1,%rsi + jz .Lbadpointer + testq $-1,%rdi + jz .Lbadpointer + + leaq .LAES_Te(%rip),%rbp + leaq 2048+128(%rbp),%rbp + + + movl 0-128(%rbp),%eax + movl 32-128(%rbp),%ebx + movl 64-128(%rbp),%r8d + movl 96-128(%rbp),%edx + movl 128-128(%rbp),%eax + movl 160-128(%rbp),%ebx + movl 192-128(%rbp),%r8d + movl 224-128(%rbp),%edx + + cmpl $128,%ecx + je .L10rounds + cmpl $192,%ecx + je .L12rounds + cmpl $256,%ecx + je .L14rounds + movq $-2,%rax + jmp .Lexit + +.L10rounds: + movq 0(%rsi),%rax + movq 8(%rsi),%rdx + movq %rax,0(%rdi) + movq %rdx,8(%rdi) + + shrq $32,%rdx + xorl %ecx,%ecx + jmp .L10shortcut +.align 4 +.L10loop: + movl 0(%rdi),%eax + movl 12(%rdi),%edx +.L10shortcut: + movzbl %dl,%esi + movzbl -128(%rbp,%rsi,1),%ebx + movzbl %dh,%esi + shll $24,%ebx + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + shrl $16,%edx + movzbl %dl,%esi + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + movzbl %dh,%esi + shll $8,%ebx + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + shll $16,%ebx + xorl %ebx,%eax + + xorl 1024-128(%rbp,%rcx,4),%eax + movl %eax,16(%rdi) + xorl 4(%rdi),%eax + movl %eax,20(%rdi) + xorl 8(%rdi),%eax + movl %eax,24(%rdi) + xorl 12(%rdi),%eax + movl %eax,28(%rdi) + addl $1,%ecx + leaq 16(%rdi),%rdi + cmpl $10,%ecx + jl .L10loop + + movl $10,80(%rdi) + xorq %rax,%rax + jmp .Lexit + +.L12rounds: + movq 0(%rsi),%rax + movq 8(%rsi),%rbx + movq 16(%rsi),%rdx + movq %rax,0(%rdi) + movq %rbx,8(%rdi) + movq %rdx,16(%rdi) + + shrq $32,%rdx + xorl %ecx,%ecx + jmp .L12shortcut +.align 4 +.L12loop: + movl 0(%rdi),%eax + movl 20(%rdi),%edx +.L12shortcut: + movzbl %dl,%esi + movzbl -128(%rbp,%rsi,1),%ebx + movzbl %dh,%esi + shll $24,%ebx + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + shrl $16,%edx + movzbl %dl,%esi + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + movzbl %dh,%esi + shll $8,%ebx + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + shll $16,%ebx + xorl %ebx,%eax + + xorl 1024-128(%rbp,%rcx,4),%eax + movl %eax,24(%rdi) + xorl 4(%rdi),%eax + movl %eax,28(%rdi) + xorl 8(%rdi),%eax + movl %eax,32(%rdi) + xorl 12(%rdi),%eax + movl %eax,36(%rdi) + + cmpl $7,%ecx + je .L12break + addl $1,%ecx + + xorl 16(%rdi),%eax + movl %eax,40(%rdi) + xorl 20(%rdi),%eax + movl %eax,44(%rdi) + + leaq 24(%rdi),%rdi + jmp .L12loop +.L12break: + movl $12,72(%rdi) + xorq %rax,%rax + jmp .Lexit + +.L14rounds: + movq 0(%rsi),%rax + movq 8(%rsi),%rbx + movq 16(%rsi),%rcx + movq 24(%rsi),%rdx + movq %rax,0(%rdi) + movq %rbx,8(%rdi) + movq %rcx,16(%rdi) + movq %rdx,24(%rdi) + + shrq $32,%rdx + xorl %ecx,%ecx + jmp .L14shortcut +.align 4 +.L14loop: + movl 0(%rdi),%eax + movl 28(%rdi),%edx +.L14shortcut: + movzbl %dl,%esi + movzbl -128(%rbp,%rsi,1),%ebx + movzbl %dh,%esi + shll $24,%ebx + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + shrl $16,%edx + movzbl %dl,%esi + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + movzbl %dh,%esi + shll $8,%ebx + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + shll $16,%ebx + xorl %ebx,%eax + + xorl 1024-128(%rbp,%rcx,4),%eax + movl %eax,32(%rdi) + xorl 4(%rdi),%eax + movl %eax,36(%rdi) + xorl 8(%rdi),%eax + movl %eax,40(%rdi) + xorl 12(%rdi),%eax + movl %eax,44(%rdi) + + cmpl $6,%ecx + je .L14break + addl $1,%ecx + + movl %eax,%edx + movl 16(%rdi),%eax + movzbl %dl,%esi + movzbl -128(%rbp,%rsi,1),%ebx + movzbl %dh,%esi + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + shrl $16,%edx + shll $8,%ebx + movzbl %dl,%esi + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + movzbl %dh,%esi + shll $16,%ebx + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + shll $24,%ebx + xorl %ebx,%eax + + movl %eax,48(%rdi) + xorl 20(%rdi),%eax + movl %eax,52(%rdi) + xorl 24(%rdi),%eax + movl %eax,56(%rdi) + xorl 28(%rdi),%eax + movl %eax,60(%rdi) + + leaq 32(%rdi),%rdi + jmp .L14loop +.L14break: + movl $14,48(%rdi) + xorq %rax,%rax + jmp .Lexit + +.Lbadpointer: + movq $-1,%rax +.Lexit: +.byte 0xf3,0xc3 +.size _x86_64_AES_set_encrypt_key,.-_x86_64_AES_set_encrypt_key +.align 16 +.globl asm_AES_set_decrypt_key +.hidden asm_AES_set_decrypt_key +.type asm_AES_set_decrypt_key,@function +asm_AES_set_decrypt_key: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + pushq %rdx +.Ldec_key_prologue: + + call _x86_64_AES_set_encrypt_key + movq (%rsp),%r8 + cmpl $0,%eax + jne .Labort + + movl 240(%r8),%r14d + xorq %rdi,%rdi + leaq (%rdi,%r14,4),%rcx + movq %r8,%rsi + leaq (%r8,%rcx,4),%rdi +.align 4 +.Linvert: + movq 0(%rsi),%rax + movq 8(%rsi),%rbx + movq 0(%rdi),%rcx + movq 8(%rdi),%rdx + movq %rax,0(%rdi) + movq %rbx,8(%rdi) + movq %rcx,0(%rsi) + movq %rdx,8(%rsi) + leaq 16(%rsi),%rsi + leaq -16(%rdi),%rdi + cmpq %rsi,%rdi + jne .Linvert + + leaq .LAES_Te+2048+1024(%rip),%rax + + movq 40(%rax),%rsi + movq 48(%rax),%rdi + movq 56(%rax),%rbp + + movq %r8,%r15 + subl $1,%r14d +.align 4 +.Lpermute: + leaq 16(%r15),%r15 + movq 0(%r15),%rax + movq 8(%r15),%rcx + movq %rsi,%r9 + movq %rsi,%r12 + andq %rax,%r9 + andq %rcx,%r12 + movq %r9,%rbx + movq %r12,%rdx + shrq $7,%r9 + leaq (%rax,%rax,1),%r8 + shrq $7,%r12 + leaq (%rcx,%rcx,1),%r11 + subq %r9,%rbx + subq %r12,%rdx + andq %rdi,%r8 + andq %rdi,%r11 + andq %rbp,%rbx + andq %rbp,%rdx + xorq %rbx,%r8 + xorq %rdx,%r11 + movq %rsi,%r10 + movq %rsi,%r13 + + andq %r8,%r10 + andq %r11,%r13 + movq %r10,%rbx + movq %r13,%rdx + shrq $7,%r10 + leaq (%r8,%r8,1),%r9 + shrq $7,%r13 + leaq (%r11,%r11,1),%r12 + subq %r10,%rbx + subq %r13,%rdx + andq %rdi,%r9 + andq %rdi,%r12 + andq %rbp,%rbx + andq %rbp,%rdx + xorq %rbx,%r9 + xorq %rdx,%r12 + movq %rsi,%r10 + movq %rsi,%r13 + + andq %r9,%r10 + andq %r12,%r13 + movq %r10,%rbx + movq %r13,%rdx + shrq $7,%r10 + xorq %rax,%r8 + shrq $7,%r13 + xorq %rcx,%r11 + subq %r10,%rbx + subq %r13,%rdx + leaq (%r9,%r9,1),%r10 + leaq (%r12,%r12,1),%r13 + xorq %rax,%r9 + xorq %rcx,%r12 + andq %rdi,%r10 + andq %rdi,%r13 + andq %rbp,%rbx + andq %rbp,%rdx + xorq %rbx,%r10 + xorq %rdx,%r13 + + xorq %r10,%rax + xorq %r13,%rcx + xorq %r10,%r8 + xorq %r13,%r11 + movq %rax,%rbx + movq %rcx,%rdx + xorq %r10,%r9 + shrq $32,%rbx + xorq %r13,%r12 + shrq $32,%rdx + xorq %r8,%r10 + roll $8,%eax + xorq %r11,%r13 + roll $8,%ecx + xorq %r9,%r10 + roll $8,%ebx + xorq %r12,%r13 + + roll $8,%edx + xorl %r10d,%eax + shrq $32,%r10 + xorl %r13d,%ecx + shrq $32,%r13 + xorl %r10d,%ebx + xorl %r13d,%edx + + movq %r8,%r10 + roll $24,%r8d + movq %r11,%r13 + roll $24,%r11d + shrq $32,%r10 + xorl %r8d,%eax + shrq $32,%r13 + xorl %r11d,%ecx + roll $24,%r10d + movq %r9,%r8 + roll $24,%r13d + movq %r12,%r11 + shrq $32,%r8 + xorl %r10d,%ebx + shrq $32,%r11 + xorl %r13d,%edx + + + roll $16,%r9d + + roll $16,%r12d + + roll $16,%r8d + + xorl %r9d,%eax + roll $16,%r11d + xorl %r12d,%ecx + + xorl %r8d,%ebx + xorl %r11d,%edx + movl %eax,0(%r15) + movl %ebx,4(%r15) + movl %ecx,8(%r15) + movl %edx,12(%r15) + subl $1,%r14d + jnz .Lpermute + + xorq %rax,%rax +.Labort: + movq 8(%rsp),%r15 + movq 16(%rsp),%r14 + movq 24(%rsp),%r13 + movq 32(%rsp),%r12 + movq 40(%rsp),%rbp + movq 48(%rsp),%rbx + addq $56,%rsp +.Ldec_key_epilogue: + .byte 0xf3,0xc3 +.size asm_AES_set_decrypt_key,.-asm_AES_set_decrypt_key +.align 16 +.globl asm_AES_cbc_encrypt +.hidden asm_AES_cbc_encrypt +.type asm_AES_cbc_encrypt,@function +.extern OPENSSL_ia32cap_P +.hidden OPENSSL_ia32cap_P +.hidden asm_AES_cbc_encrypt +asm_AES_cbc_encrypt: + cmpq $0,%rdx + je .Lcbc_epilogue + pushfq + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 +.Lcbc_prologue: + + cld + movl %r9d,%r9d + + leaq .LAES_Te(%rip),%r14 + cmpq $0,%r9 + jne .Lcbc_picked_te + leaq .LAES_Td(%rip),%r14 +.Lcbc_picked_te: + + movl OPENSSL_ia32cap_P(%rip),%r10d + cmpq $512,%rdx + jb .Lcbc_slow_prologue + testq $15,%rdx + jnz .Lcbc_slow_prologue + btl $28,%r10d + jc .Lcbc_slow_prologue + + + leaq -88-248(%rsp),%r15 + andq $-64,%r15 + + + movq %r14,%r10 + leaq 2304(%r14),%r11 + movq %r15,%r12 + andq $4095,%r10 + andq $4095,%r11 + andq $4095,%r12 + + cmpq %r11,%r12 + jb .Lcbc_te_break_out + subq %r11,%r12 + subq %r12,%r15 + jmp .Lcbc_te_ok +.Lcbc_te_break_out: + subq %r10,%r12 + andq $4095,%r12 + addq $320,%r12 + subq %r12,%r15 +.align 4 +.Lcbc_te_ok: + + xchgq %rsp,%r15 + + movq %r15,16(%rsp) +.Lcbc_fast_body: + movq %rdi,24(%rsp) + movq %rsi,32(%rsp) + movq %rdx,40(%rsp) + movq %rcx,48(%rsp) + movq %r8,56(%rsp) + movl $0,80+240(%rsp) + movq %r8,%rbp + movq %r9,%rbx + movq %rsi,%r9 + movq %rdi,%r8 + movq %rcx,%r15 + + movl 240(%r15),%eax + + movq %r15,%r10 + subq %r14,%r10 + andq $4095,%r10 + cmpq $2304,%r10 + jb .Lcbc_do_ecopy + cmpq $4096-248,%r10 + jb .Lcbc_skip_ecopy +.align 4 +.Lcbc_do_ecopy: + movq %r15,%rsi + leaq 80(%rsp),%rdi + leaq 80(%rsp),%r15 + movl $30,%ecx +.long 0x90A548F3 + movl %eax,(%rdi) +.Lcbc_skip_ecopy: + movq %r15,0(%rsp) + + movl $18,%ecx +.align 4 +.Lcbc_prefetch_te: + movq 0(%r14),%r10 + movq 32(%r14),%r11 + movq 64(%r14),%r12 + movq 96(%r14),%r13 + leaq 128(%r14),%r14 + subl $1,%ecx + jnz .Lcbc_prefetch_te + leaq -2304(%r14),%r14 + + cmpq $0,%rbx + je .LFAST_DECRYPT + + + movl 0(%rbp),%eax + movl 4(%rbp),%ebx + movl 8(%rbp),%ecx + movl 12(%rbp),%edx + +.align 4 +.Lcbc_fast_enc_loop: + xorl 0(%r8),%eax + xorl 4(%r8),%ebx + xorl 8(%r8),%ecx + xorl 12(%r8),%edx + movq 0(%rsp),%r15 + movq %r8,24(%rsp) + + call _x86_64_AES_encrypt + + movq 24(%rsp),%r8 + movq 40(%rsp),%r10 + movl %eax,0(%r9) + movl %ebx,4(%r9) + movl %ecx,8(%r9) + movl %edx,12(%r9) + + leaq 16(%r8),%r8 + leaq 16(%r9),%r9 + subq $16,%r10 + testq $-16,%r10 + movq %r10,40(%rsp) + jnz .Lcbc_fast_enc_loop + movq 56(%rsp),%rbp + movl %eax,0(%rbp) + movl %ebx,4(%rbp) + movl %ecx,8(%rbp) + movl %edx,12(%rbp) + + jmp .Lcbc_fast_cleanup + + +.align 16 +.LFAST_DECRYPT: + cmpq %r8,%r9 + je .Lcbc_fast_dec_in_place + + movq %rbp,64(%rsp) +.align 4 +.Lcbc_fast_dec_loop: + movl 0(%r8),%eax + movl 4(%r8),%ebx + movl 8(%r8),%ecx + movl 12(%r8),%edx + movq 0(%rsp),%r15 + movq %r8,24(%rsp) + + call _x86_64_AES_decrypt + + movq 64(%rsp),%rbp + movq 24(%rsp),%r8 + movq 40(%rsp),%r10 + xorl 0(%rbp),%eax + xorl 4(%rbp),%ebx + xorl 8(%rbp),%ecx + xorl 12(%rbp),%edx + movq %r8,%rbp + + subq $16,%r10 + movq %r10,40(%rsp) + movq %rbp,64(%rsp) + + movl %eax,0(%r9) + movl %ebx,4(%r9) + movl %ecx,8(%r9) + movl %edx,12(%r9) + + leaq 16(%r8),%r8 + leaq 16(%r9),%r9 + jnz .Lcbc_fast_dec_loop + movq 56(%rsp),%r12 + movq 0(%rbp),%r10 + movq 8(%rbp),%r11 + movq %r10,0(%r12) + movq %r11,8(%r12) + jmp .Lcbc_fast_cleanup + +.align 16 +.Lcbc_fast_dec_in_place: + movq 0(%rbp),%r10 + movq 8(%rbp),%r11 + movq %r10,0+64(%rsp) + movq %r11,8+64(%rsp) +.align 4 +.Lcbc_fast_dec_in_place_loop: + movl 0(%r8),%eax + movl 4(%r8),%ebx + movl 8(%r8),%ecx + movl 12(%r8),%edx + movq 0(%rsp),%r15 + movq %r8,24(%rsp) + + call _x86_64_AES_decrypt + + movq 24(%rsp),%r8 + movq 40(%rsp),%r10 + xorl 0+64(%rsp),%eax + xorl 4+64(%rsp),%ebx + xorl 8+64(%rsp),%ecx + xorl 12+64(%rsp),%edx + + movq 0(%r8),%r11 + movq 8(%r8),%r12 + subq $16,%r10 + jz .Lcbc_fast_dec_in_place_done + + movq %r11,0+64(%rsp) + movq %r12,8+64(%rsp) + + movl %eax,0(%r9) + movl %ebx,4(%r9) + movl %ecx,8(%r9) + movl %edx,12(%r9) + + leaq 16(%r8),%r8 + leaq 16(%r9),%r9 + movq %r10,40(%rsp) + jmp .Lcbc_fast_dec_in_place_loop +.Lcbc_fast_dec_in_place_done: + movq 56(%rsp),%rdi + movq %r11,0(%rdi) + movq %r12,8(%rdi) + + movl %eax,0(%r9) + movl %ebx,4(%r9) + movl %ecx,8(%r9) + movl %edx,12(%r9) + +.align 4 +.Lcbc_fast_cleanup: + cmpl $0,80+240(%rsp) + leaq 80(%rsp),%rdi + je .Lcbc_exit + movl $30,%ecx + xorq %rax,%rax +.long 0x90AB48F3 + + jmp .Lcbc_exit + + +.align 16 +.Lcbc_slow_prologue: + + leaq -88(%rsp),%rbp + andq $-64,%rbp + + leaq -88-63(%rcx),%r10 + subq %rbp,%r10 + negq %r10 + andq $960,%r10 + subq %r10,%rbp + + xchgq %rsp,%rbp + + movq %rbp,16(%rsp) +.Lcbc_slow_body: + + + + + movq %r8,56(%rsp) + movq %r8,%rbp + movq %r9,%rbx + movq %rsi,%r9 + movq %rdi,%r8 + movq %rcx,%r15 + movq %rdx,%r10 + + movl 240(%r15),%eax + movq %r15,0(%rsp) + shll $4,%eax + leaq (%r15,%rax,1),%rax + movq %rax,8(%rsp) + + + leaq 2048(%r14),%r14 + leaq 768-8(%rsp),%rax + subq %r14,%rax + andq $768,%rax + leaq (%r14,%rax,1),%r14 + + cmpq $0,%rbx + je .LSLOW_DECRYPT + + + testq $-16,%r10 + movl 0(%rbp),%eax + movl 4(%rbp),%ebx + movl 8(%rbp),%ecx + movl 12(%rbp),%edx + jz .Lcbc_slow_enc_tail + +.align 4 +.Lcbc_slow_enc_loop: + xorl 0(%r8),%eax + xorl 4(%r8),%ebx + xorl 8(%r8),%ecx + xorl 12(%r8),%edx + movq 0(%rsp),%r15 + movq %r8,24(%rsp) + movq %r9,32(%rsp) + movq %r10,40(%rsp) + + call _x86_64_AES_encrypt_compact + + movq 24(%rsp),%r8 + movq 32(%rsp),%r9 + movq 40(%rsp),%r10 + movl %eax,0(%r9) + movl %ebx,4(%r9) + movl %ecx,8(%r9) + movl %edx,12(%r9) + + leaq 16(%r8),%r8 + leaq 16(%r9),%r9 + subq $16,%r10 + testq $-16,%r10 + jnz .Lcbc_slow_enc_loop + testq $15,%r10 + jnz .Lcbc_slow_enc_tail + movq 56(%rsp),%rbp + movl %eax,0(%rbp) + movl %ebx,4(%rbp) + movl %ecx,8(%rbp) + movl %edx,12(%rbp) + + jmp .Lcbc_exit + +.align 4 +.Lcbc_slow_enc_tail: + movq %rax,%r11 + movq %rcx,%r12 + movq %r10,%rcx + movq %r8,%rsi + movq %r9,%rdi +.long 0x9066A4F3 + movq $16,%rcx + subq %r10,%rcx + xorq %rax,%rax +.long 0x9066AAF3 + movq %r9,%r8 + movq $16,%r10 + movq %r11,%rax + movq %r12,%rcx + jmp .Lcbc_slow_enc_loop + +.align 16 +.LSLOW_DECRYPT: + shrq $3,%rax + addq %rax,%r14 + + movq 0(%rbp),%r11 + movq 8(%rbp),%r12 + movq %r11,0+64(%rsp) + movq %r12,8+64(%rsp) + +.align 4 +.Lcbc_slow_dec_loop: + movl 0(%r8),%eax + movl 4(%r8),%ebx + movl 8(%r8),%ecx + movl 12(%r8),%edx + movq 0(%rsp),%r15 + movq %r8,24(%rsp) + movq %r9,32(%rsp) + movq %r10,40(%rsp) + + call _x86_64_AES_decrypt_compact + + movq 24(%rsp),%r8 + movq 32(%rsp),%r9 + movq 40(%rsp),%r10 + xorl 0+64(%rsp),%eax + xorl 4+64(%rsp),%ebx + xorl 8+64(%rsp),%ecx + xorl 12+64(%rsp),%edx + + movq 0(%r8),%r11 + movq 8(%r8),%r12 + subq $16,%r10 + jc .Lcbc_slow_dec_partial + jz .Lcbc_slow_dec_done + + movq %r11,0+64(%rsp) + movq %r12,8+64(%rsp) + + movl %eax,0(%r9) + movl %ebx,4(%r9) + movl %ecx,8(%r9) + movl %edx,12(%r9) + + leaq 16(%r8),%r8 + leaq 16(%r9),%r9 + jmp .Lcbc_slow_dec_loop +.Lcbc_slow_dec_done: + movq 56(%rsp),%rdi + movq %r11,0(%rdi) + movq %r12,8(%rdi) + + movl %eax,0(%r9) + movl %ebx,4(%r9) + movl %ecx,8(%r9) + movl %edx,12(%r9) + + jmp .Lcbc_exit + +.align 4 +.Lcbc_slow_dec_partial: + movq 56(%rsp),%rdi + movq %r11,0(%rdi) + movq %r12,8(%rdi) + + movl %eax,0+64(%rsp) + movl %ebx,4+64(%rsp) + movl %ecx,8+64(%rsp) + movl %edx,12+64(%rsp) + + movq %r9,%rdi + leaq 64(%rsp),%rsi + leaq 16(%r10),%rcx +.long 0x9066A4F3 + jmp .Lcbc_exit + +.align 16 +.Lcbc_exit: + movq 16(%rsp),%rsi + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +.Lcbc_popfq: + popfq +.Lcbc_epilogue: + .byte 0xf3,0xc3 +.size asm_AES_cbc_encrypt,.-asm_AES_cbc_encrypt +.align 64 +.LAES_Te: +.long 0xa56363c6,0xa56363c6 +.long 0x847c7cf8,0x847c7cf8 +.long 0x997777ee,0x997777ee +.long 0x8d7b7bf6,0x8d7b7bf6 +.long 0x0df2f2ff,0x0df2f2ff +.long 0xbd6b6bd6,0xbd6b6bd6 +.long 0xb16f6fde,0xb16f6fde +.long 0x54c5c591,0x54c5c591 +.long 0x50303060,0x50303060 +.long 0x03010102,0x03010102 +.long 0xa96767ce,0xa96767ce +.long 0x7d2b2b56,0x7d2b2b56 +.long 0x19fefee7,0x19fefee7 +.long 0x62d7d7b5,0x62d7d7b5 +.long 0xe6abab4d,0xe6abab4d +.long 0x9a7676ec,0x9a7676ec +.long 0x45caca8f,0x45caca8f +.long 0x9d82821f,0x9d82821f +.long 0x40c9c989,0x40c9c989 +.long 0x877d7dfa,0x877d7dfa +.long 0x15fafaef,0x15fafaef +.long 0xeb5959b2,0xeb5959b2 +.long 0xc947478e,0xc947478e +.long 0x0bf0f0fb,0x0bf0f0fb +.long 0xecadad41,0xecadad41 +.long 0x67d4d4b3,0x67d4d4b3 +.long 0xfda2a25f,0xfda2a25f +.long 0xeaafaf45,0xeaafaf45 +.long 0xbf9c9c23,0xbf9c9c23 +.long 0xf7a4a453,0xf7a4a453 +.long 0x967272e4,0x967272e4 +.long 0x5bc0c09b,0x5bc0c09b +.long 0xc2b7b775,0xc2b7b775 +.long 0x1cfdfde1,0x1cfdfde1 +.long 0xae93933d,0xae93933d +.long 0x6a26264c,0x6a26264c +.long 0x5a36366c,0x5a36366c +.long 0x413f3f7e,0x413f3f7e +.long 0x02f7f7f5,0x02f7f7f5 +.long 0x4fcccc83,0x4fcccc83 +.long 0x5c343468,0x5c343468 +.long 0xf4a5a551,0xf4a5a551 +.long 0x34e5e5d1,0x34e5e5d1 +.long 0x08f1f1f9,0x08f1f1f9 +.long 0x937171e2,0x937171e2 +.long 0x73d8d8ab,0x73d8d8ab +.long 0x53313162,0x53313162 +.long 0x3f15152a,0x3f15152a +.long 0x0c040408,0x0c040408 +.long 0x52c7c795,0x52c7c795 +.long 0x65232346,0x65232346 +.long 0x5ec3c39d,0x5ec3c39d +.long 0x28181830,0x28181830 +.long 0xa1969637,0xa1969637 +.long 0x0f05050a,0x0f05050a +.long 0xb59a9a2f,0xb59a9a2f +.long 0x0907070e,0x0907070e +.long 0x36121224,0x36121224 +.long 0x9b80801b,0x9b80801b +.long 0x3de2e2df,0x3de2e2df +.long 0x26ebebcd,0x26ebebcd +.long 0x6927274e,0x6927274e +.long 0xcdb2b27f,0xcdb2b27f +.long 0x9f7575ea,0x9f7575ea +.long 0x1b090912,0x1b090912 +.long 0x9e83831d,0x9e83831d +.long 0x742c2c58,0x742c2c58 +.long 0x2e1a1a34,0x2e1a1a34 +.long 0x2d1b1b36,0x2d1b1b36 +.long 0xb26e6edc,0xb26e6edc +.long 0xee5a5ab4,0xee5a5ab4 +.long 0xfba0a05b,0xfba0a05b +.long 0xf65252a4,0xf65252a4 +.long 0x4d3b3b76,0x4d3b3b76 +.long 0x61d6d6b7,0x61d6d6b7 +.long 0xceb3b37d,0xceb3b37d +.long 0x7b292952,0x7b292952 +.long 0x3ee3e3dd,0x3ee3e3dd +.long 0x712f2f5e,0x712f2f5e +.long 0x97848413,0x97848413 +.long 0xf55353a6,0xf55353a6 +.long 0x68d1d1b9,0x68d1d1b9 +.long 0x00000000,0x00000000 +.long 0x2cededc1,0x2cededc1 +.long 0x60202040,0x60202040 +.long 0x1ffcfce3,0x1ffcfce3 +.long 0xc8b1b179,0xc8b1b179 +.long 0xed5b5bb6,0xed5b5bb6 +.long 0xbe6a6ad4,0xbe6a6ad4 +.long 0x46cbcb8d,0x46cbcb8d +.long 0xd9bebe67,0xd9bebe67 +.long 0x4b393972,0x4b393972 +.long 0xde4a4a94,0xde4a4a94 +.long 0xd44c4c98,0xd44c4c98 +.long 0xe85858b0,0xe85858b0 +.long 0x4acfcf85,0x4acfcf85 +.long 0x6bd0d0bb,0x6bd0d0bb +.long 0x2aefefc5,0x2aefefc5 +.long 0xe5aaaa4f,0xe5aaaa4f +.long 0x16fbfbed,0x16fbfbed +.long 0xc5434386,0xc5434386 +.long 0xd74d4d9a,0xd74d4d9a +.long 0x55333366,0x55333366 +.long 0x94858511,0x94858511 +.long 0xcf45458a,0xcf45458a +.long 0x10f9f9e9,0x10f9f9e9 +.long 0x06020204,0x06020204 +.long 0x817f7ffe,0x817f7ffe +.long 0xf05050a0,0xf05050a0 +.long 0x443c3c78,0x443c3c78 +.long 0xba9f9f25,0xba9f9f25 +.long 0xe3a8a84b,0xe3a8a84b +.long 0xf35151a2,0xf35151a2 +.long 0xfea3a35d,0xfea3a35d +.long 0xc0404080,0xc0404080 +.long 0x8a8f8f05,0x8a8f8f05 +.long 0xad92923f,0xad92923f +.long 0xbc9d9d21,0xbc9d9d21 +.long 0x48383870,0x48383870 +.long 0x04f5f5f1,0x04f5f5f1 +.long 0xdfbcbc63,0xdfbcbc63 +.long 0xc1b6b677,0xc1b6b677 +.long 0x75dadaaf,0x75dadaaf +.long 0x63212142,0x63212142 +.long 0x30101020,0x30101020 +.long 0x1affffe5,0x1affffe5 +.long 0x0ef3f3fd,0x0ef3f3fd +.long 0x6dd2d2bf,0x6dd2d2bf +.long 0x4ccdcd81,0x4ccdcd81 +.long 0x140c0c18,0x140c0c18 +.long 0x35131326,0x35131326 +.long 0x2fececc3,0x2fececc3 +.long 0xe15f5fbe,0xe15f5fbe +.long 0xa2979735,0xa2979735 +.long 0xcc444488,0xcc444488 +.long 0x3917172e,0x3917172e +.long 0x57c4c493,0x57c4c493 +.long 0xf2a7a755,0xf2a7a755 +.long 0x827e7efc,0x827e7efc +.long 0x473d3d7a,0x473d3d7a +.long 0xac6464c8,0xac6464c8 +.long 0xe75d5dba,0xe75d5dba +.long 0x2b191932,0x2b191932 +.long 0x957373e6,0x957373e6 +.long 0xa06060c0,0xa06060c0 +.long 0x98818119,0x98818119 +.long 0xd14f4f9e,0xd14f4f9e +.long 0x7fdcdca3,0x7fdcdca3 +.long 0x66222244,0x66222244 +.long 0x7e2a2a54,0x7e2a2a54 +.long 0xab90903b,0xab90903b +.long 0x8388880b,0x8388880b +.long 0xca46468c,0xca46468c +.long 0x29eeeec7,0x29eeeec7 +.long 0xd3b8b86b,0xd3b8b86b +.long 0x3c141428,0x3c141428 +.long 0x79dedea7,0x79dedea7 +.long 0xe25e5ebc,0xe25e5ebc +.long 0x1d0b0b16,0x1d0b0b16 +.long 0x76dbdbad,0x76dbdbad +.long 0x3be0e0db,0x3be0e0db +.long 0x56323264,0x56323264 +.long 0x4e3a3a74,0x4e3a3a74 +.long 0x1e0a0a14,0x1e0a0a14 +.long 0xdb494992,0xdb494992 +.long 0x0a06060c,0x0a06060c +.long 0x6c242448,0x6c242448 +.long 0xe45c5cb8,0xe45c5cb8 +.long 0x5dc2c29f,0x5dc2c29f +.long 0x6ed3d3bd,0x6ed3d3bd +.long 0xefacac43,0xefacac43 +.long 0xa66262c4,0xa66262c4 +.long 0xa8919139,0xa8919139 +.long 0xa4959531,0xa4959531 +.long 0x37e4e4d3,0x37e4e4d3 +.long 0x8b7979f2,0x8b7979f2 +.long 0x32e7e7d5,0x32e7e7d5 +.long 0x43c8c88b,0x43c8c88b +.long 0x5937376e,0x5937376e +.long 0xb76d6dda,0xb76d6dda +.long 0x8c8d8d01,0x8c8d8d01 +.long 0x64d5d5b1,0x64d5d5b1 +.long 0xd24e4e9c,0xd24e4e9c +.long 0xe0a9a949,0xe0a9a949 +.long 0xb46c6cd8,0xb46c6cd8 +.long 0xfa5656ac,0xfa5656ac +.long 0x07f4f4f3,0x07f4f4f3 +.long 0x25eaeacf,0x25eaeacf +.long 0xaf6565ca,0xaf6565ca +.long 0x8e7a7af4,0x8e7a7af4 +.long 0xe9aeae47,0xe9aeae47 +.long 0x18080810,0x18080810 +.long 0xd5baba6f,0xd5baba6f +.long 0x887878f0,0x887878f0 +.long 0x6f25254a,0x6f25254a +.long 0x722e2e5c,0x722e2e5c +.long 0x241c1c38,0x241c1c38 +.long 0xf1a6a657,0xf1a6a657 +.long 0xc7b4b473,0xc7b4b473 +.long 0x51c6c697,0x51c6c697 +.long 0x23e8e8cb,0x23e8e8cb +.long 0x7cdddda1,0x7cdddda1 +.long 0x9c7474e8,0x9c7474e8 +.long 0x211f1f3e,0x211f1f3e +.long 0xdd4b4b96,0xdd4b4b96 +.long 0xdcbdbd61,0xdcbdbd61 +.long 0x868b8b0d,0x868b8b0d +.long 0x858a8a0f,0x858a8a0f +.long 0x907070e0,0x907070e0 +.long 0x423e3e7c,0x423e3e7c +.long 0xc4b5b571,0xc4b5b571 +.long 0xaa6666cc,0xaa6666cc +.long 0xd8484890,0xd8484890 +.long 0x05030306,0x05030306 +.long 0x01f6f6f7,0x01f6f6f7 +.long 0x120e0e1c,0x120e0e1c +.long 0xa36161c2,0xa36161c2 +.long 0x5f35356a,0x5f35356a +.long 0xf95757ae,0xf95757ae +.long 0xd0b9b969,0xd0b9b969 +.long 0x91868617,0x91868617 +.long 0x58c1c199,0x58c1c199 +.long 0x271d1d3a,0x271d1d3a +.long 0xb99e9e27,0xb99e9e27 +.long 0x38e1e1d9,0x38e1e1d9 +.long 0x13f8f8eb,0x13f8f8eb +.long 0xb398982b,0xb398982b +.long 0x33111122,0x33111122 +.long 0xbb6969d2,0xbb6969d2 +.long 0x70d9d9a9,0x70d9d9a9 +.long 0x898e8e07,0x898e8e07 +.long 0xa7949433,0xa7949433 +.long 0xb69b9b2d,0xb69b9b2d +.long 0x221e1e3c,0x221e1e3c +.long 0x92878715,0x92878715 +.long 0x20e9e9c9,0x20e9e9c9 +.long 0x49cece87,0x49cece87 +.long 0xff5555aa,0xff5555aa +.long 0x78282850,0x78282850 +.long 0x7adfdfa5,0x7adfdfa5 +.long 0x8f8c8c03,0x8f8c8c03 +.long 0xf8a1a159,0xf8a1a159 +.long 0x80898909,0x80898909 +.long 0x170d0d1a,0x170d0d1a +.long 0xdabfbf65,0xdabfbf65 +.long 0x31e6e6d7,0x31e6e6d7 +.long 0xc6424284,0xc6424284 +.long 0xb86868d0,0xb86868d0 +.long 0xc3414182,0xc3414182 +.long 0xb0999929,0xb0999929 +.long 0x772d2d5a,0x772d2d5a +.long 0x110f0f1e,0x110f0f1e +.long 0xcbb0b07b,0xcbb0b07b +.long 0xfc5454a8,0xfc5454a8 +.long 0xd6bbbb6d,0xd6bbbb6d +.long 0x3a16162c,0x3a16162c +.byte 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5 +.byte 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76 +.byte 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0 +.byte 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0 +.byte 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc +.byte 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15 +.byte 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a +.byte 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75 +.byte 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0 +.byte 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84 +.byte 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b +.byte 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf +.byte 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85 +.byte 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8 +.byte 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5 +.byte 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2 +.byte 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17 +.byte 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73 +.byte 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88 +.byte 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb +.byte 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c +.byte 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79 +.byte 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9 +.byte 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08 +.byte 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6 +.byte 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a +.byte 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e +.byte 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e +.byte 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94 +.byte 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf +.byte 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68 +.byte 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16 +.byte 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5 +.byte 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76 +.byte 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0 +.byte 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0 +.byte 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc +.byte 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15 +.byte 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a +.byte 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75 +.byte 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0 +.byte 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84 +.byte 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b +.byte 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf +.byte 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85 +.byte 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8 +.byte 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5 +.byte 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2 +.byte 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17 +.byte 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73 +.byte 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88 +.byte 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb +.byte 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c +.byte 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79 +.byte 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9 +.byte 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08 +.byte 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6 +.byte 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a +.byte 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e +.byte 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e +.byte 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94 +.byte 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf +.byte 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68 +.byte 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16 +.byte 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5 +.byte 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76 +.byte 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0 +.byte 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0 +.byte 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc +.byte 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15 +.byte 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a +.byte 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75 +.byte 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0 +.byte 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84 +.byte 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b +.byte 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf +.byte 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85 +.byte 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8 +.byte 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5 +.byte 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2 +.byte 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17 +.byte 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73 +.byte 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88 +.byte 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb +.byte 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c +.byte 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79 +.byte 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9 +.byte 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08 +.byte 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6 +.byte 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a +.byte 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e +.byte 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e +.byte 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94 +.byte 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf +.byte 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68 +.byte 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16 +.byte 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5 +.byte 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76 +.byte 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0 +.byte 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0 +.byte 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc +.byte 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15 +.byte 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a +.byte 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75 +.byte 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0 +.byte 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84 +.byte 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b +.byte 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf +.byte 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85 +.byte 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8 +.byte 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5 +.byte 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2 +.byte 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17 +.byte 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73 +.byte 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88 +.byte 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb +.byte 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c +.byte 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79 +.byte 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9 +.byte 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08 +.byte 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6 +.byte 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a +.byte 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e +.byte 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e +.byte 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94 +.byte 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf +.byte 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68 +.byte 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16 +.long 0x00000001, 0x00000002, 0x00000004, 0x00000008 +.long 0x00000010, 0x00000020, 0x00000040, 0x00000080 +.long 0x0000001b, 0x00000036, 0x80808080, 0x80808080 +.long 0xfefefefe, 0xfefefefe, 0x1b1b1b1b, 0x1b1b1b1b +.align 64 +.LAES_Td: +.long 0x50a7f451,0x50a7f451 +.long 0x5365417e,0x5365417e +.long 0xc3a4171a,0xc3a4171a +.long 0x965e273a,0x965e273a +.long 0xcb6bab3b,0xcb6bab3b +.long 0xf1459d1f,0xf1459d1f +.long 0xab58faac,0xab58faac +.long 0x9303e34b,0x9303e34b +.long 0x55fa3020,0x55fa3020 +.long 0xf66d76ad,0xf66d76ad +.long 0x9176cc88,0x9176cc88 +.long 0x254c02f5,0x254c02f5 +.long 0xfcd7e54f,0xfcd7e54f +.long 0xd7cb2ac5,0xd7cb2ac5 +.long 0x80443526,0x80443526 +.long 0x8fa362b5,0x8fa362b5 +.long 0x495ab1de,0x495ab1de +.long 0x671bba25,0x671bba25 +.long 0x980eea45,0x980eea45 +.long 0xe1c0fe5d,0xe1c0fe5d +.long 0x02752fc3,0x02752fc3 +.long 0x12f04c81,0x12f04c81 +.long 0xa397468d,0xa397468d +.long 0xc6f9d36b,0xc6f9d36b +.long 0xe75f8f03,0xe75f8f03 +.long 0x959c9215,0x959c9215 +.long 0xeb7a6dbf,0xeb7a6dbf +.long 0xda595295,0xda595295 +.long 0x2d83bed4,0x2d83bed4 +.long 0xd3217458,0xd3217458 +.long 0x2969e049,0x2969e049 +.long 0x44c8c98e,0x44c8c98e +.long 0x6a89c275,0x6a89c275 +.long 0x78798ef4,0x78798ef4 +.long 0x6b3e5899,0x6b3e5899 +.long 0xdd71b927,0xdd71b927 +.long 0xb64fe1be,0xb64fe1be +.long 0x17ad88f0,0x17ad88f0 +.long 0x66ac20c9,0x66ac20c9 +.long 0xb43ace7d,0xb43ace7d +.long 0x184adf63,0x184adf63 +.long 0x82311ae5,0x82311ae5 +.long 0x60335197,0x60335197 +.long 0x457f5362,0x457f5362 +.long 0xe07764b1,0xe07764b1 +.long 0x84ae6bbb,0x84ae6bbb +.long 0x1ca081fe,0x1ca081fe +.long 0x942b08f9,0x942b08f9 +.long 0x58684870,0x58684870 +.long 0x19fd458f,0x19fd458f +.long 0x876cde94,0x876cde94 +.long 0xb7f87b52,0xb7f87b52 +.long 0x23d373ab,0x23d373ab +.long 0xe2024b72,0xe2024b72 +.long 0x578f1fe3,0x578f1fe3 +.long 0x2aab5566,0x2aab5566 +.long 0x0728ebb2,0x0728ebb2 +.long 0x03c2b52f,0x03c2b52f +.long 0x9a7bc586,0x9a7bc586 +.long 0xa50837d3,0xa50837d3 +.long 0xf2872830,0xf2872830 +.long 0xb2a5bf23,0xb2a5bf23 +.long 0xba6a0302,0xba6a0302 +.long 0x5c8216ed,0x5c8216ed +.long 0x2b1ccf8a,0x2b1ccf8a +.long 0x92b479a7,0x92b479a7 +.long 0xf0f207f3,0xf0f207f3 +.long 0xa1e2694e,0xa1e2694e +.long 0xcdf4da65,0xcdf4da65 +.long 0xd5be0506,0xd5be0506 +.long 0x1f6234d1,0x1f6234d1 +.long 0x8afea6c4,0x8afea6c4 +.long 0x9d532e34,0x9d532e34 +.long 0xa055f3a2,0xa055f3a2 +.long 0x32e18a05,0x32e18a05 +.long 0x75ebf6a4,0x75ebf6a4 +.long 0x39ec830b,0x39ec830b +.long 0xaaef6040,0xaaef6040 +.long 0x069f715e,0x069f715e +.long 0x51106ebd,0x51106ebd +.long 0xf98a213e,0xf98a213e +.long 0x3d06dd96,0x3d06dd96 +.long 0xae053edd,0xae053edd +.long 0x46bde64d,0x46bde64d +.long 0xb58d5491,0xb58d5491 +.long 0x055dc471,0x055dc471 +.long 0x6fd40604,0x6fd40604 +.long 0xff155060,0xff155060 +.long 0x24fb9819,0x24fb9819 +.long 0x97e9bdd6,0x97e9bdd6 +.long 0xcc434089,0xcc434089 +.long 0x779ed967,0x779ed967 +.long 0xbd42e8b0,0xbd42e8b0 +.long 0x888b8907,0x888b8907 +.long 0x385b19e7,0x385b19e7 +.long 0xdbeec879,0xdbeec879 +.long 0x470a7ca1,0x470a7ca1 +.long 0xe90f427c,0xe90f427c +.long 0xc91e84f8,0xc91e84f8 +.long 0x00000000,0x00000000 +.long 0x83868009,0x83868009 +.long 0x48ed2b32,0x48ed2b32 +.long 0xac70111e,0xac70111e +.long 0x4e725a6c,0x4e725a6c +.long 0xfbff0efd,0xfbff0efd +.long 0x5638850f,0x5638850f +.long 0x1ed5ae3d,0x1ed5ae3d +.long 0x27392d36,0x27392d36 +.long 0x64d90f0a,0x64d90f0a +.long 0x21a65c68,0x21a65c68 +.long 0xd1545b9b,0xd1545b9b +.long 0x3a2e3624,0x3a2e3624 +.long 0xb1670a0c,0xb1670a0c +.long 0x0fe75793,0x0fe75793 +.long 0xd296eeb4,0xd296eeb4 +.long 0x9e919b1b,0x9e919b1b +.long 0x4fc5c080,0x4fc5c080 +.long 0xa220dc61,0xa220dc61 +.long 0x694b775a,0x694b775a +.long 0x161a121c,0x161a121c +.long 0x0aba93e2,0x0aba93e2 +.long 0xe52aa0c0,0xe52aa0c0 +.long 0x43e0223c,0x43e0223c +.long 0x1d171b12,0x1d171b12 +.long 0x0b0d090e,0x0b0d090e +.long 0xadc78bf2,0xadc78bf2 +.long 0xb9a8b62d,0xb9a8b62d +.long 0xc8a91e14,0xc8a91e14 +.long 0x8519f157,0x8519f157 +.long 0x4c0775af,0x4c0775af +.long 0xbbdd99ee,0xbbdd99ee +.long 0xfd607fa3,0xfd607fa3 +.long 0x9f2601f7,0x9f2601f7 +.long 0xbcf5725c,0xbcf5725c +.long 0xc53b6644,0xc53b6644 +.long 0x347efb5b,0x347efb5b +.long 0x7629438b,0x7629438b +.long 0xdcc623cb,0xdcc623cb +.long 0x68fcedb6,0x68fcedb6 +.long 0x63f1e4b8,0x63f1e4b8 +.long 0xcadc31d7,0xcadc31d7 +.long 0x10856342,0x10856342 +.long 0x40229713,0x40229713 +.long 0x2011c684,0x2011c684 +.long 0x7d244a85,0x7d244a85 +.long 0xf83dbbd2,0xf83dbbd2 +.long 0x1132f9ae,0x1132f9ae +.long 0x6da129c7,0x6da129c7 +.long 0x4b2f9e1d,0x4b2f9e1d +.long 0xf330b2dc,0xf330b2dc +.long 0xec52860d,0xec52860d +.long 0xd0e3c177,0xd0e3c177 +.long 0x6c16b32b,0x6c16b32b +.long 0x99b970a9,0x99b970a9 +.long 0xfa489411,0xfa489411 +.long 0x2264e947,0x2264e947 +.long 0xc48cfca8,0xc48cfca8 +.long 0x1a3ff0a0,0x1a3ff0a0 +.long 0xd82c7d56,0xd82c7d56 +.long 0xef903322,0xef903322 +.long 0xc74e4987,0xc74e4987 +.long 0xc1d138d9,0xc1d138d9 +.long 0xfea2ca8c,0xfea2ca8c +.long 0x360bd498,0x360bd498 +.long 0xcf81f5a6,0xcf81f5a6 +.long 0x28de7aa5,0x28de7aa5 +.long 0x268eb7da,0x268eb7da +.long 0xa4bfad3f,0xa4bfad3f +.long 0xe49d3a2c,0xe49d3a2c +.long 0x0d927850,0x0d927850 +.long 0x9bcc5f6a,0x9bcc5f6a +.long 0x62467e54,0x62467e54 +.long 0xc2138df6,0xc2138df6 +.long 0xe8b8d890,0xe8b8d890 +.long 0x5ef7392e,0x5ef7392e +.long 0xf5afc382,0xf5afc382 +.long 0xbe805d9f,0xbe805d9f +.long 0x7c93d069,0x7c93d069 +.long 0xa92dd56f,0xa92dd56f +.long 0xb31225cf,0xb31225cf +.long 0x3b99acc8,0x3b99acc8 +.long 0xa77d1810,0xa77d1810 +.long 0x6e639ce8,0x6e639ce8 +.long 0x7bbb3bdb,0x7bbb3bdb +.long 0x097826cd,0x097826cd +.long 0xf418596e,0xf418596e +.long 0x01b79aec,0x01b79aec +.long 0xa89a4f83,0xa89a4f83 +.long 0x656e95e6,0x656e95e6 +.long 0x7ee6ffaa,0x7ee6ffaa +.long 0x08cfbc21,0x08cfbc21 +.long 0xe6e815ef,0xe6e815ef +.long 0xd99be7ba,0xd99be7ba +.long 0xce366f4a,0xce366f4a +.long 0xd4099fea,0xd4099fea +.long 0xd67cb029,0xd67cb029 +.long 0xafb2a431,0xafb2a431 +.long 0x31233f2a,0x31233f2a +.long 0x3094a5c6,0x3094a5c6 +.long 0xc066a235,0xc066a235 +.long 0x37bc4e74,0x37bc4e74 +.long 0xa6ca82fc,0xa6ca82fc +.long 0xb0d090e0,0xb0d090e0 +.long 0x15d8a733,0x15d8a733 +.long 0x4a9804f1,0x4a9804f1 +.long 0xf7daec41,0xf7daec41 +.long 0x0e50cd7f,0x0e50cd7f +.long 0x2ff69117,0x2ff69117 +.long 0x8dd64d76,0x8dd64d76 +.long 0x4db0ef43,0x4db0ef43 +.long 0x544daacc,0x544daacc +.long 0xdf0496e4,0xdf0496e4 +.long 0xe3b5d19e,0xe3b5d19e +.long 0x1b886a4c,0x1b886a4c +.long 0xb81f2cc1,0xb81f2cc1 +.long 0x7f516546,0x7f516546 +.long 0x04ea5e9d,0x04ea5e9d +.long 0x5d358c01,0x5d358c01 +.long 0x737487fa,0x737487fa +.long 0x2e410bfb,0x2e410bfb +.long 0x5a1d67b3,0x5a1d67b3 +.long 0x52d2db92,0x52d2db92 +.long 0x335610e9,0x335610e9 +.long 0x1347d66d,0x1347d66d +.long 0x8c61d79a,0x8c61d79a +.long 0x7a0ca137,0x7a0ca137 +.long 0x8e14f859,0x8e14f859 +.long 0x893c13eb,0x893c13eb +.long 0xee27a9ce,0xee27a9ce +.long 0x35c961b7,0x35c961b7 +.long 0xede51ce1,0xede51ce1 +.long 0x3cb1477a,0x3cb1477a +.long 0x59dfd29c,0x59dfd29c +.long 0x3f73f255,0x3f73f255 +.long 0x79ce1418,0x79ce1418 +.long 0xbf37c773,0xbf37c773 +.long 0xeacdf753,0xeacdf753 +.long 0x5baafd5f,0x5baafd5f +.long 0x146f3ddf,0x146f3ddf +.long 0x86db4478,0x86db4478 +.long 0x81f3afca,0x81f3afca +.long 0x3ec468b9,0x3ec468b9 +.long 0x2c342438,0x2c342438 +.long 0x5f40a3c2,0x5f40a3c2 +.long 0x72c31d16,0x72c31d16 +.long 0x0c25e2bc,0x0c25e2bc +.long 0x8b493c28,0x8b493c28 +.long 0x41950dff,0x41950dff +.long 0x7101a839,0x7101a839 +.long 0xdeb30c08,0xdeb30c08 +.long 0x9ce4b4d8,0x9ce4b4d8 +.long 0x90c15664,0x90c15664 +.long 0x6184cb7b,0x6184cb7b +.long 0x70b632d5,0x70b632d5 +.long 0x745c6c48,0x745c6c48 +.long 0x4257b8d0,0x4257b8d0 +.byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38 +.byte 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb +.byte 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87 +.byte 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb +.byte 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d +.byte 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e +.byte 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2 +.byte 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25 +.byte 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16 +.byte 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92 +.byte 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda +.byte 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84 +.byte 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a +.byte 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06 +.byte 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02 +.byte 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b +.byte 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea +.byte 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73 +.byte 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85 +.byte 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e +.byte 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89 +.byte 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b +.byte 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20 +.byte 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4 +.byte 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31 +.byte 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f +.byte 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d +.byte 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef +.byte 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0 +.byte 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61 +.byte 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26 +.byte 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d +.long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe +.long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0 +.byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38 +.byte 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb +.byte 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87 +.byte 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb +.byte 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d +.byte 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e +.byte 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2 +.byte 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25 +.byte 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16 +.byte 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92 +.byte 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda +.byte 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84 +.byte 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a +.byte 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06 +.byte 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02 +.byte 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b +.byte 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea +.byte 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73 +.byte 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85 +.byte 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e +.byte 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89 +.byte 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b +.byte 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20 +.byte 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4 +.byte 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31 +.byte 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f +.byte 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d +.byte 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef +.byte 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0 +.byte 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61 +.byte 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26 +.byte 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d +.long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe +.long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0 +.byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38 +.byte 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb +.byte 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87 +.byte 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb +.byte 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d +.byte 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e +.byte 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2 +.byte 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25 +.byte 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16 +.byte 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92 +.byte 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda +.byte 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84 +.byte 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a +.byte 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06 +.byte 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02 +.byte 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b +.byte 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea +.byte 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73 +.byte 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85 +.byte 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e +.byte 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89 +.byte 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b +.byte 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20 +.byte 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4 +.byte 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31 +.byte 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f +.byte 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d +.byte 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef +.byte 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0 +.byte 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61 +.byte 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26 +.byte 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d +.long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe +.long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0 +.byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38 +.byte 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb +.byte 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87 +.byte 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb +.byte 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d +.byte 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e +.byte 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2 +.byte 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25 +.byte 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16 +.byte 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92 +.byte 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda +.byte 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84 +.byte 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a +.byte 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06 +.byte 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02 +.byte 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b +.byte 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea +.byte 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73 +.byte 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85 +.byte 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e +.byte 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89 +.byte 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b +.byte 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20 +.byte 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4 +.byte 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31 +.byte 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f +.byte 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d +.byte 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef +.byte 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0 +.byte 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61 +.byte 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26 +.byte 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d +.long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe +.long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0 +.byte 65,69,83,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 64 +#endif diff --git a/third_party/boringssl/linux-x86_64/crypto/aes/aesni-x86_64.S b/third_party/boringssl/linux-x86_64/crypto/aes/aesni-x86_64.S new file mode 100644 index 00000000000..1d51d5b50ef --- /dev/null +++ b/third_party/boringssl/linux-x86_64/crypto/aes/aesni-x86_64.S @@ -0,0 +1,3565 @@ +#if defined(__x86_64__) +.text +.extern OPENSSL_ia32cap_P +.hidden OPENSSL_ia32cap_P +.globl aesni_encrypt +.hidden aesni_encrypt +.type aesni_encrypt,@function +.align 16 +aesni_encrypt: + movups (%rdi),%xmm2 + movl 240(%rdx),%eax + movups (%rdx),%xmm0 + movups 16(%rdx),%xmm1 + leaq 32(%rdx),%rdx + xorps %xmm0,%xmm2 +.Loop_enc1_1: +.byte 102,15,56,220,209 + decl %eax + movups (%rdx),%xmm1 + leaq 16(%rdx),%rdx + jnz .Loop_enc1_1 +.byte 102,15,56,221,209 + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 + .byte 0xf3,0xc3 +.size aesni_encrypt,.-aesni_encrypt + +.globl aesni_decrypt +.hidden aesni_decrypt +.type aesni_decrypt,@function +.align 16 +aesni_decrypt: + movups (%rdi),%xmm2 + movl 240(%rdx),%eax + movups (%rdx),%xmm0 + movups 16(%rdx),%xmm1 + leaq 32(%rdx),%rdx + xorps %xmm0,%xmm2 +.Loop_dec1_2: +.byte 102,15,56,222,209 + decl %eax + movups (%rdx),%xmm1 + leaq 16(%rdx),%rdx + jnz .Loop_dec1_2 +.byte 102,15,56,223,209 + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 + .byte 0xf3,0xc3 +.size aesni_decrypt, .-aesni_decrypt +.type _aesni_encrypt2,@function +.align 16 +_aesni_encrypt2: + movups (%rcx),%xmm0 + shll $4,%eax + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + movups 32(%rcx),%xmm0 + leaq 32(%rcx,%rax,1),%rcx + negq %rax + addq $16,%rax + +.Lenc_loop2: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + movups -16(%rcx,%rax,1),%xmm0 + jnz .Lenc_loop2 + +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 + .byte 0xf3,0xc3 +.size _aesni_encrypt2,.-_aesni_encrypt2 +.type _aesni_decrypt2,@function +.align 16 +_aesni_decrypt2: + movups (%rcx),%xmm0 + shll $4,%eax + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + movups 32(%rcx),%xmm0 + leaq 32(%rcx,%rax,1),%rcx + negq %rax + addq $16,%rax + +.Ldec_loop2: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 + movups -16(%rcx,%rax,1),%xmm0 + jnz .Ldec_loop2 + +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 + .byte 0xf3,0xc3 +.size _aesni_decrypt2,.-_aesni_decrypt2 +.type _aesni_encrypt3,@function +.align 16 +_aesni_encrypt3: + movups (%rcx),%xmm0 + shll $4,%eax + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + xorps %xmm0,%xmm4 + movups 32(%rcx),%xmm0 + leaq 32(%rcx,%rax,1),%rcx + negq %rax + addq $16,%rax + +.Lenc_loop3: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 + movups -16(%rcx,%rax,1),%xmm0 + jnz .Lenc_loop3 + +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 +.byte 102,15,56,221,224 + .byte 0xf3,0xc3 +.size _aesni_encrypt3,.-_aesni_encrypt3 +.type _aesni_decrypt3,@function +.align 16 +_aesni_decrypt3: + movups (%rcx),%xmm0 + shll $4,%eax + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + xorps %xmm0,%xmm4 + movups 32(%rcx),%xmm0 + leaq 32(%rcx,%rax,1),%rcx + negq %rax + addq $16,%rax + +.Ldec_loop3: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 + movups -16(%rcx,%rax,1),%xmm0 + jnz .Ldec_loop3 + +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 +.byte 102,15,56,223,224 + .byte 0xf3,0xc3 +.size _aesni_decrypt3,.-_aesni_decrypt3 +.type _aesni_encrypt4,@function +.align 16 +_aesni_encrypt4: + movups (%rcx),%xmm0 + shll $4,%eax + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + xorps %xmm0,%xmm4 + xorps %xmm0,%xmm5 + movups 32(%rcx),%xmm0 + leaq 32(%rcx,%rax,1),%rcx + negq %rax +.byte 0x0f,0x1f,0x00 + addq $16,%rax + +.Lenc_loop4: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + movups -16(%rcx,%rax,1),%xmm0 + jnz .Lenc_loop4 + +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 +.byte 102,15,56,221,224 +.byte 102,15,56,221,232 + .byte 0xf3,0xc3 +.size _aesni_encrypt4,.-_aesni_encrypt4 +.type _aesni_decrypt4,@function +.align 16 +_aesni_decrypt4: + movups (%rcx),%xmm0 + shll $4,%eax + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + xorps %xmm0,%xmm4 + xorps %xmm0,%xmm5 + movups 32(%rcx),%xmm0 + leaq 32(%rcx,%rax,1),%rcx + negq %rax +.byte 0x0f,0x1f,0x00 + addq $16,%rax + +.Ldec_loop4: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 + movups -16(%rcx,%rax,1),%xmm0 + jnz .Ldec_loop4 + +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 +.byte 102,15,56,223,224 +.byte 102,15,56,223,232 + .byte 0xf3,0xc3 +.size _aesni_decrypt4,.-_aesni_decrypt4 +.type _aesni_encrypt6,@function +.align 16 +_aesni_encrypt6: + movups (%rcx),%xmm0 + shll $4,%eax + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + pxor %xmm0,%xmm4 +.byte 102,15,56,220,209 + leaq 32(%rcx,%rax,1),%rcx + negq %rax +.byte 102,15,56,220,217 + pxor %xmm0,%xmm5 + pxor %xmm0,%xmm6 +.byte 102,15,56,220,225 + pxor %xmm0,%xmm7 + movups (%rcx,%rax,1),%xmm0 + addq $16,%rax + jmp .Lenc_loop6_enter +.align 16 +.Lenc_loop6: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.Lenc_loop6_enter: +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 + movups -16(%rcx,%rax,1),%xmm0 + jnz .Lenc_loop6 + +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 +.byte 102,15,56,221,224 +.byte 102,15,56,221,232 +.byte 102,15,56,221,240 +.byte 102,15,56,221,248 + .byte 0xf3,0xc3 +.size _aesni_encrypt6,.-_aesni_encrypt6 +.type _aesni_decrypt6,@function +.align 16 +_aesni_decrypt6: + movups (%rcx),%xmm0 + shll $4,%eax + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + pxor %xmm0,%xmm4 +.byte 102,15,56,222,209 + leaq 32(%rcx,%rax,1),%rcx + negq %rax +.byte 102,15,56,222,217 + pxor %xmm0,%xmm5 + pxor %xmm0,%xmm6 +.byte 102,15,56,222,225 + pxor %xmm0,%xmm7 + movups (%rcx,%rax,1),%xmm0 + addq $16,%rax + jmp .Ldec_loop6_enter +.align 16 +.Ldec_loop6: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.Ldec_loop6_enter: +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 + movups -16(%rcx,%rax,1),%xmm0 + jnz .Ldec_loop6 + +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 +.byte 102,15,56,223,224 +.byte 102,15,56,223,232 +.byte 102,15,56,223,240 +.byte 102,15,56,223,248 + .byte 0xf3,0xc3 +.size _aesni_decrypt6,.-_aesni_decrypt6 +.type _aesni_encrypt8,@function +.align 16 +_aesni_encrypt8: + movups (%rcx),%xmm0 + shll $4,%eax + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + pxor %xmm0,%xmm4 + pxor %xmm0,%xmm5 + pxor %xmm0,%xmm6 + leaq 32(%rcx,%rax,1),%rcx + negq %rax +.byte 102,15,56,220,209 + pxor %xmm0,%xmm7 + pxor %xmm0,%xmm8 +.byte 102,15,56,220,217 + pxor %xmm0,%xmm9 + movups (%rcx,%rax,1),%xmm0 + addq $16,%rax + jmp .Lenc_loop8_inner +.align 16 +.Lenc_loop8: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.Lenc_loop8_inner: +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 +.Lenc_loop8_enter: + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 +.byte 102,68,15,56,220,192 +.byte 102,68,15,56,220,200 + movups -16(%rcx,%rax,1),%xmm0 + jnz .Lenc_loop8 + +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 +.byte 102,15,56,221,224 +.byte 102,15,56,221,232 +.byte 102,15,56,221,240 +.byte 102,15,56,221,248 +.byte 102,68,15,56,221,192 +.byte 102,68,15,56,221,200 + .byte 0xf3,0xc3 +.size _aesni_encrypt8,.-_aesni_encrypt8 +.type _aesni_decrypt8,@function +.align 16 +_aesni_decrypt8: + movups (%rcx),%xmm0 + shll $4,%eax + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + pxor %xmm0,%xmm4 + pxor %xmm0,%xmm5 + pxor %xmm0,%xmm6 + leaq 32(%rcx,%rax,1),%rcx + negq %rax +.byte 102,15,56,222,209 + pxor %xmm0,%xmm7 + pxor %xmm0,%xmm8 +.byte 102,15,56,222,217 + pxor %xmm0,%xmm9 + movups (%rcx,%rax,1),%xmm0 + addq $16,%rax + jmp .Ldec_loop8_inner +.align 16 +.Ldec_loop8: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.Ldec_loop8_inner: +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,68,15,56,222,193 +.byte 102,68,15,56,222,201 +.Ldec_loop8_enter: + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 +.byte 102,68,15,56,222,192 +.byte 102,68,15,56,222,200 + movups -16(%rcx,%rax,1),%xmm0 + jnz .Ldec_loop8 + +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,68,15,56,222,193 +.byte 102,68,15,56,222,201 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 +.byte 102,15,56,223,224 +.byte 102,15,56,223,232 +.byte 102,15,56,223,240 +.byte 102,15,56,223,248 +.byte 102,68,15,56,223,192 +.byte 102,68,15,56,223,200 + .byte 0xf3,0xc3 +.size _aesni_decrypt8,.-_aesni_decrypt8 +.globl aesni_ecb_encrypt +.hidden aesni_ecb_encrypt +.type aesni_ecb_encrypt,@function +.align 16 +aesni_ecb_encrypt: + andq $-16,%rdx + jz .Lecb_ret + + movl 240(%rcx),%eax + movups (%rcx),%xmm0 + movq %rcx,%r11 + movl %eax,%r10d + testl %r8d,%r8d + jz .Lecb_decrypt + + cmpq $128,%rdx + jb .Lecb_enc_tail + + movdqu (%rdi),%xmm2 + movdqu 16(%rdi),%xmm3 + movdqu 32(%rdi),%xmm4 + movdqu 48(%rdi),%xmm5 + movdqu 64(%rdi),%xmm6 + movdqu 80(%rdi),%xmm7 + movdqu 96(%rdi),%xmm8 + movdqu 112(%rdi),%xmm9 + leaq 128(%rdi),%rdi + subq $128,%rdx + jmp .Lecb_enc_loop8_enter +.align 16 +.Lecb_enc_loop8: + movups %xmm2,(%rsi) + movq %r11,%rcx + movdqu (%rdi),%xmm2 + movl %r10d,%eax + movups %xmm3,16(%rsi) + movdqu 16(%rdi),%xmm3 + movups %xmm4,32(%rsi) + movdqu 32(%rdi),%xmm4 + movups %xmm5,48(%rsi) + movdqu 48(%rdi),%xmm5 + movups %xmm6,64(%rsi) + movdqu 64(%rdi),%xmm6 + movups %xmm7,80(%rsi) + movdqu 80(%rdi),%xmm7 + movups %xmm8,96(%rsi) + movdqu 96(%rdi),%xmm8 + movups %xmm9,112(%rsi) + leaq 128(%rsi),%rsi + movdqu 112(%rdi),%xmm9 + leaq 128(%rdi),%rdi +.Lecb_enc_loop8_enter: + + call _aesni_encrypt8 + + subq $128,%rdx + jnc .Lecb_enc_loop8 + + movups %xmm2,(%rsi) + movq %r11,%rcx + movups %xmm3,16(%rsi) + movl %r10d,%eax + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + movups %xmm8,96(%rsi) + movups %xmm9,112(%rsi) + leaq 128(%rsi),%rsi + addq $128,%rdx + jz .Lecb_ret + +.Lecb_enc_tail: + movups (%rdi),%xmm2 + cmpq $32,%rdx + jb .Lecb_enc_one + movups 16(%rdi),%xmm3 + je .Lecb_enc_two + movups 32(%rdi),%xmm4 + cmpq $64,%rdx + jb .Lecb_enc_three + movups 48(%rdi),%xmm5 + je .Lecb_enc_four + movups 64(%rdi),%xmm6 + cmpq $96,%rdx + jb .Lecb_enc_five + movups 80(%rdi),%xmm7 + je .Lecb_enc_six + movdqu 96(%rdi),%xmm8 + xorps %xmm9,%xmm9 + call _aesni_encrypt8 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + movups %xmm8,96(%rsi) + jmp .Lecb_ret +.align 16 +.Lecb_enc_one: + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +.Loop_enc1_3: +.byte 102,15,56,220,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_enc1_3 +.byte 102,15,56,221,209 + movups %xmm2,(%rsi) + jmp .Lecb_ret +.align 16 +.Lecb_enc_two: + call _aesni_encrypt2 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + jmp .Lecb_ret +.align 16 +.Lecb_enc_three: + call _aesni_encrypt3 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + jmp .Lecb_ret +.align 16 +.Lecb_enc_four: + call _aesni_encrypt4 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + jmp .Lecb_ret +.align 16 +.Lecb_enc_five: + xorps %xmm7,%xmm7 + call _aesni_encrypt6 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + jmp .Lecb_ret +.align 16 +.Lecb_enc_six: + call _aesni_encrypt6 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + jmp .Lecb_ret + +.align 16 +.Lecb_decrypt: + cmpq $128,%rdx + jb .Lecb_dec_tail + + movdqu (%rdi),%xmm2 + movdqu 16(%rdi),%xmm3 + movdqu 32(%rdi),%xmm4 + movdqu 48(%rdi),%xmm5 + movdqu 64(%rdi),%xmm6 + movdqu 80(%rdi),%xmm7 + movdqu 96(%rdi),%xmm8 + movdqu 112(%rdi),%xmm9 + leaq 128(%rdi),%rdi + subq $128,%rdx + jmp .Lecb_dec_loop8_enter +.align 16 +.Lecb_dec_loop8: + movups %xmm2,(%rsi) + movq %r11,%rcx + movdqu (%rdi),%xmm2 + movl %r10d,%eax + movups %xmm3,16(%rsi) + movdqu 16(%rdi),%xmm3 + movups %xmm4,32(%rsi) + movdqu 32(%rdi),%xmm4 + movups %xmm5,48(%rsi) + movdqu 48(%rdi),%xmm5 + movups %xmm6,64(%rsi) + movdqu 64(%rdi),%xmm6 + movups %xmm7,80(%rsi) + movdqu 80(%rdi),%xmm7 + movups %xmm8,96(%rsi) + movdqu 96(%rdi),%xmm8 + movups %xmm9,112(%rsi) + leaq 128(%rsi),%rsi + movdqu 112(%rdi),%xmm9 + leaq 128(%rdi),%rdi +.Lecb_dec_loop8_enter: + + call _aesni_decrypt8 + + movups (%r11),%xmm0 + subq $128,%rdx + jnc .Lecb_dec_loop8 + + movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 + movq %r11,%rcx + movups %xmm3,16(%rsi) + pxor %xmm3,%xmm3 + movl %r10d,%eax + movups %xmm4,32(%rsi) + pxor %xmm4,%xmm4 + movups %xmm5,48(%rsi) + pxor %xmm5,%xmm5 + movups %xmm6,64(%rsi) + pxor %xmm6,%xmm6 + movups %xmm7,80(%rsi) + pxor %xmm7,%xmm7 + movups %xmm8,96(%rsi) + pxor %xmm8,%xmm8 + movups %xmm9,112(%rsi) + pxor %xmm9,%xmm9 + leaq 128(%rsi),%rsi + addq $128,%rdx + jz .Lecb_ret + +.Lecb_dec_tail: + movups (%rdi),%xmm2 + cmpq $32,%rdx + jb .Lecb_dec_one + movups 16(%rdi),%xmm3 + je .Lecb_dec_two + movups 32(%rdi),%xmm4 + cmpq $64,%rdx + jb .Lecb_dec_three + movups 48(%rdi),%xmm5 + je .Lecb_dec_four + movups 64(%rdi),%xmm6 + cmpq $96,%rdx + jb .Lecb_dec_five + movups 80(%rdi),%xmm7 + je .Lecb_dec_six + movups 96(%rdi),%xmm8 + movups (%rcx),%xmm0 + xorps %xmm9,%xmm9 + call _aesni_decrypt8 + movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 + movups %xmm3,16(%rsi) + pxor %xmm3,%xmm3 + movups %xmm4,32(%rsi) + pxor %xmm4,%xmm4 + movups %xmm5,48(%rsi) + pxor %xmm5,%xmm5 + movups %xmm6,64(%rsi) + pxor %xmm6,%xmm6 + movups %xmm7,80(%rsi) + pxor %xmm7,%xmm7 + movups %xmm8,96(%rsi) + pxor %xmm8,%xmm8 + pxor %xmm9,%xmm9 + jmp .Lecb_ret +.align 16 +.Lecb_dec_one: + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +.Loop_dec1_4: +.byte 102,15,56,222,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_dec1_4 +.byte 102,15,56,223,209 + movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 + jmp .Lecb_ret +.align 16 +.Lecb_dec_two: + call _aesni_decrypt2 + movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 + movups %xmm3,16(%rsi) + pxor %xmm3,%xmm3 + jmp .Lecb_ret +.align 16 +.Lecb_dec_three: + call _aesni_decrypt3 + movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 + movups %xmm3,16(%rsi) + pxor %xmm3,%xmm3 + movups %xmm4,32(%rsi) + pxor %xmm4,%xmm4 + jmp .Lecb_ret +.align 16 +.Lecb_dec_four: + call _aesni_decrypt4 + movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 + movups %xmm3,16(%rsi) + pxor %xmm3,%xmm3 + movups %xmm4,32(%rsi) + pxor %xmm4,%xmm4 + movups %xmm5,48(%rsi) + pxor %xmm5,%xmm5 + jmp .Lecb_ret +.align 16 +.Lecb_dec_five: + xorps %xmm7,%xmm7 + call _aesni_decrypt6 + movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 + movups %xmm3,16(%rsi) + pxor %xmm3,%xmm3 + movups %xmm4,32(%rsi) + pxor %xmm4,%xmm4 + movups %xmm5,48(%rsi) + pxor %xmm5,%xmm5 + movups %xmm6,64(%rsi) + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + jmp .Lecb_ret +.align 16 +.Lecb_dec_six: + call _aesni_decrypt6 + movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 + movups %xmm3,16(%rsi) + pxor %xmm3,%xmm3 + movups %xmm4,32(%rsi) + pxor %xmm4,%xmm4 + movups %xmm5,48(%rsi) + pxor %xmm5,%xmm5 + movups %xmm6,64(%rsi) + pxor %xmm6,%xmm6 + movups %xmm7,80(%rsi) + pxor %xmm7,%xmm7 + +.Lecb_ret: + xorps %xmm0,%xmm0 + pxor %xmm1,%xmm1 + .byte 0xf3,0xc3 +.size aesni_ecb_encrypt,.-aesni_ecb_encrypt +.globl aesni_ccm64_encrypt_blocks +.hidden aesni_ccm64_encrypt_blocks +.type aesni_ccm64_encrypt_blocks,@function +.align 16 +aesni_ccm64_encrypt_blocks: + movl 240(%rcx),%eax + movdqu (%r8),%xmm6 + movdqa .Lincrement64(%rip),%xmm9 + movdqa .Lbswap_mask(%rip),%xmm7 + + shll $4,%eax + movl $16,%r10d + leaq 0(%rcx),%r11 + movdqu (%r9),%xmm3 + movdqa %xmm6,%xmm2 + leaq 32(%rcx,%rax,1),%rcx +.byte 102,15,56,0,247 + subq %rax,%r10 + jmp .Lccm64_enc_outer +.align 16 +.Lccm64_enc_outer: + movups (%r11),%xmm0 + movq %r10,%rax + movups (%rdi),%xmm8 + + xorps %xmm0,%xmm2 + movups 16(%r11),%xmm1 + xorps %xmm8,%xmm0 + xorps %xmm0,%xmm3 + movups 32(%r11),%xmm0 + +.Lccm64_enc2_loop: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + movups -16(%rcx,%rax,1),%xmm0 + jnz .Lccm64_enc2_loop +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + paddq %xmm9,%xmm6 + decq %rdx +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 + + leaq 16(%rdi),%rdi + xorps %xmm2,%xmm8 + movdqa %xmm6,%xmm2 + movups %xmm8,(%rsi) +.byte 102,15,56,0,215 + leaq 16(%rsi),%rsi + jnz .Lccm64_enc_outer + + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + movups %xmm3,(%r9) + pxor %xmm3,%xmm3 + pxor %xmm8,%xmm8 + pxor %xmm6,%xmm6 + .byte 0xf3,0xc3 +.size aesni_ccm64_encrypt_blocks,.-aesni_ccm64_encrypt_blocks +.globl aesni_ccm64_decrypt_blocks +.hidden aesni_ccm64_decrypt_blocks +.type aesni_ccm64_decrypt_blocks,@function +.align 16 +aesni_ccm64_decrypt_blocks: + movl 240(%rcx),%eax + movups (%r8),%xmm6 + movdqu (%r9),%xmm3 + movdqa .Lincrement64(%rip),%xmm9 + movdqa .Lbswap_mask(%rip),%xmm7 + + movaps %xmm6,%xmm2 + movl %eax,%r10d + movq %rcx,%r11 +.byte 102,15,56,0,247 + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +.Loop_enc1_5: +.byte 102,15,56,220,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_enc1_5 +.byte 102,15,56,221,209 + shll $4,%r10d + movl $16,%eax + movups (%rdi),%xmm8 + paddq %xmm9,%xmm6 + leaq 16(%rdi),%rdi + subq %r10,%rax + leaq 32(%r11,%r10,1),%rcx + movq %rax,%r10 + jmp .Lccm64_dec_outer +.align 16 +.Lccm64_dec_outer: + xorps %xmm2,%xmm8 + movdqa %xmm6,%xmm2 + movups %xmm8,(%rsi) + leaq 16(%rsi),%rsi +.byte 102,15,56,0,215 + + subq $1,%rdx + jz .Lccm64_dec_break + + movups (%r11),%xmm0 + movq %r10,%rax + movups 16(%r11),%xmm1 + xorps %xmm0,%xmm8 + xorps %xmm0,%xmm2 + xorps %xmm8,%xmm3 + movups 32(%r11),%xmm0 + jmp .Lccm64_dec2_loop +.align 16 +.Lccm64_dec2_loop: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + movups -16(%rcx,%rax,1),%xmm0 + jnz .Lccm64_dec2_loop + movups (%rdi),%xmm8 + paddq %xmm9,%xmm6 +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 + leaq 16(%rdi),%rdi + jmp .Lccm64_dec_outer + +.align 16 +.Lccm64_dec_break: + + movl 240(%r11),%eax + movups (%r11),%xmm0 + movups 16(%r11),%xmm1 + xorps %xmm0,%xmm8 + leaq 32(%r11),%r11 + xorps %xmm8,%xmm3 +.Loop_enc1_6: +.byte 102,15,56,220,217 + decl %eax + movups (%r11),%xmm1 + leaq 16(%r11),%r11 + jnz .Loop_enc1_6 +.byte 102,15,56,221,217 + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + movups %xmm3,(%r9) + pxor %xmm3,%xmm3 + pxor %xmm8,%xmm8 + pxor %xmm6,%xmm6 + .byte 0xf3,0xc3 +.size aesni_ccm64_decrypt_blocks,.-aesni_ccm64_decrypt_blocks +.globl aesni_ctr32_encrypt_blocks +.hidden aesni_ctr32_encrypt_blocks +.type aesni_ctr32_encrypt_blocks,@function +.align 16 +aesni_ctr32_encrypt_blocks: + cmpq $1,%rdx + jne .Lctr32_bulk + + + + movups (%r8),%xmm2 + movups (%rdi),%xmm3 + movl 240(%rcx),%edx + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +.Loop_enc1_7: +.byte 102,15,56,220,209 + decl %edx + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_enc1_7 +.byte 102,15,56,221,209 + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + xorps %xmm3,%xmm2 + pxor %xmm3,%xmm3 + movups %xmm2,(%rsi) + xorps %xmm2,%xmm2 + jmp .Lctr32_epilogue + +.align 16 +.Lctr32_bulk: + leaq (%rsp),%rax + pushq %rbp + subq $128,%rsp + andq $-16,%rsp + leaq -8(%rax),%rbp + + + + + movdqu (%r8),%xmm2 + movdqu (%rcx),%xmm0 + movl 12(%r8),%r8d + pxor %xmm0,%xmm2 + movl 12(%rcx),%r11d + movdqa %xmm2,0(%rsp) + bswapl %r8d + movdqa %xmm2,%xmm3 + movdqa %xmm2,%xmm4 + movdqa %xmm2,%xmm5 + movdqa %xmm2,64(%rsp) + movdqa %xmm2,80(%rsp) + movdqa %xmm2,96(%rsp) + movq %rdx,%r10 + movdqa %xmm2,112(%rsp) + + leaq 1(%r8),%rax + leaq 2(%r8),%rdx + bswapl %eax + bswapl %edx + xorl %r11d,%eax + xorl %r11d,%edx +.byte 102,15,58,34,216,3 + leaq 3(%r8),%rax + movdqa %xmm3,16(%rsp) +.byte 102,15,58,34,226,3 + bswapl %eax + movq %r10,%rdx + leaq 4(%r8),%r10 + movdqa %xmm4,32(%rsp) + xorl %r11d,%eax + bswapl %r10d +.byte 102,15,58,34,232,3 + xorl %r11d,%r10d + movdqa %xmm5,48(%rsp) + leaq 5(%r8),%r9 + movl %r10d,64+12(%rsp) + bswapl %r9d + leaq 6(%r8),%r10 + movl 240(%rcx),%eax + xorl %r11d,%r9d + bswapl %r10d + movl %r9d,80+12(%rsp) + xorl %r11d,%r10d + leaq 7(%r8),%r9 + movl %r10d,96+12(%rsp) + bswapl %r9d + movl OPENSSL_ia32cap_P+4(%rip),%r10d + xorl %r11d,%r9d + andl $71303168,%r10d + movl %r9d,112+12(%rsp) + + movups 16(%rcx),%xmm1 + + movdqa 64(%rsp),%xmm6 + movdqa 80(%rsp),%xmm7 + + cmpq $8,%rdx + jb .Lctr32_tail + + subq $6,%rdx + cmpl $4194304,%r10d + je .Lctr32_6x + + leaq 128(%rcx),%rcx + subq $2,%rdx + jmp .Lctr32_loop8 + +.align 16 +.Lctr32_6x: + shll $4,%eax + movl $48,%r10d + bswapl %r11d + leaq 32(%rcx,%rax,1),%rcx + subq %rax,%r10 + jmp .Lctr32_loop6 + +.align 16 +.Lctr32_loop6: + addl $6,%r8d + movups -48(%rcx,%r10,1),%xmm0 +.byte 102,15,56,220,209 + movl %r8d,%eax + xorl %r11d,%eax +.byte 102,15,56,220,217 +.byte 0x0f,0x38,0xf1,0x44,0x24,12 + leal 1(%r8),%eax +.byte 102,15,56,220,225 + xorl %r11d,%eax +.byte 0x0f,0x38,0xf1,0x44,0x24,28 +.byte 102,15,56,220,233 + leal 2(%r8),%eax + xorl %r11d,%eax +.byte 102,15,56,220,241 +.byte 0x0f,0x38,0xf1,0x44,0x24,44 + leal 3(%r8),%eax +.byte 102,15,56,220,249 + movups -32(%rcx,%r10,1),%xmm1 + xorl %r11d,%eax + +.byte 102,15,56,220,208 +.byte 0x0f,0x38,0xf1,0x44,0x24,60 + leal 4(%r8),%eax +.byte 102,15,56,220,216 + xorl %r11d,%eax +.byte 0x0f,0x38,0xf1,0x44,0x24,76 +.byte 102,15,56,220,224 + leal 5(%r8),%eax + xorl %r11d,%eax +.byte 102,15,56,220,232 +.byte 0x0f,0x38,0xf1,0x44,0x24,92 + movq %r10,%rax +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 + movups -16(%rcx,%r10,1),%xmm0 + + call .Lenc_loop6 + + movdqu (%rdi),%xmm8 + movdqu 16(%rdi),%xmm9 + movdqu 32(%rdi),%xmm10 + movdqu 48(%rdi),%xmm11 + movdqu 64(%rdi),%xmm12 + movdqu 80(%rdi),%xmm13 + leaq 96(%rdi),%rdi + movups -64(%rcx,%r10,1),%xmm1 + pxor %xmm2,%xmm8 + movaps 0(%rsp),%xmm2 + pxor %xmm3,%xmm9 + movaps 16(%rsp),%xmm3 + pxor %xmm4,%xmm10 + movaps 32(%rsp),%xmm4 + pxor %xmm5,%xmm11 + movaps 48(%rsp),%xmm5 + pxor %xmm6,%xmm12 + movaps 64(%rsp),%xmm6 + pxor %xmm7,%xmm13 + movaps 80(%rsp),%xmm7 + movdqu %xmm8,(%rsi) + movdqu %xmm9,16(%rsi) + movdqu %xmm10,32(%rsi) + movdqu %xmm11,48(%rsi) + movdqu %xmm12,64(%rsi) + movdqu %xmm13,80(%rsi) + leaq 96(%rsi),%rsi + + subq $6,%rdx + jnc .Lctr32_loop6 + + addq $6,%rdx + jz .Lctr32_done + + leal -48(%r10),%eax + leaq -80(%rcx,%r10,1),%rcx + negl %eax + shrl $4,%eax + jmp .Lctr32_tail + +.align 32 +.Lctr32_loop8: + addl $8,%r8d + movdqa 96(%rsp),%xmm8 +.byte 102,15,56,220,209 + movl %r8d,%r9d + movdqa 112(%rsp),%xmm9 +.byte 102,15,56,220,217 + bswapl %r9d + movups 32-128(%rcx),%xmm0 +.byte 102,15,56,220,225 + xorl %r11d,%r9d + nop +.byte 102,15,56,220,233 + movl %r9d,0+12(%rsp) + leaq 1(%r8),%r9 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 + movups 48-128(%rcx),%xmm1 + bswapl %r9d +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + xorl %r11d,%r9d +.byte 0x66,0x90 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + movl %r9d,16+12(%rsp) + leaq 2(%r8),%r9 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 +.byte 102,68,15,56,220,192 +.byte 102,68,15,56,220,200 + movups 64-128(%rcx),%xmm0 + bswapl %r9d +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + xorl %r11d,%r9d +.byte 0x66,0x90 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movl %r9d,32+12(%rsp) + leaq 3(%r8),%r9 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 + movups 80-128(%rcx),%xmm1 + bswapl %r9d +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + xorl %r11d,%r9d +.byte 0x66,0x90 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + movl %r9d,48+12(%rsp) + leaq 4(%r8),%r9 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 +.byte 102,68,15,56,220,192 +.byte 102,68,15,56,220,200 + movups 96-128(%rcx),%xmm0 + bswapl %r9d +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + xorl %r11d,%r9d +.byte 0x66,0x90 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movl %r9d,64+12(%rsp) + leaq 5(%r8),%r9 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 + movups 112-128(%rcx),%xmm1 + bswapl %r9d +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + xorl %r11d,%r9d +.byte 0x66,0x90 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + movl %r9d,80+12(%rsp) + leaq 6(%r8),%r9 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 +.byte 102,68,15,56,220,192 +.byte 102,68,15,56,220,200 + movups 128-128(%rcx),%xmm0 + bswapl %r9d +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + xorl %r11d,%r9d +.byte 0x66,0x90 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movl %r9d,96+12(%rsp) + leaq 7(%r8),%r9 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 + movups 144-128(%rcx),%xmm1 + bswapl %r9d +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 + xorl %r11d,%r9d + movdqu 0(%rdi),%xmm10 +.byte 102,15,56,220,232 + movl %r9d,112+12(%rsp) + cmpl $11,%eax +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 +.byte 102,68,15,56,220,192 +.byte 102,68,15,56,220,200 + movups 160-128(%rcx),%xmm0 + + jb .Lctr32_enc_done + +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 + movups 176-128(%rcx),%xmm1 + +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 +.byte 102,68,15,56,220,192 +.byte 102,68,15,56,220,200 + movups 192-128(%rcx),%xmm0 + je .Lctr32_enc_done + +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 + movups 208-128(%rcx),%xmm1 + +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 +.byte 102,68,15,56,220,192 +.byte 102,68,15,56,220,200 + movups 224-128(%rcx),%xmm0 + jmp .Lctr32_enc_done + +.align 16 +.Lctr32_enc_done: + movdqu 16(%rdi),%xmm11 + pxor %xmm0,%xmm10 + movdqu 32(%rdi),%xmm12 + pxor %xmm0,%xmm11 + movdqu 48(%rdi),%xmm13 + pxor %xmm0,%xmm12 + movdqu 64(%rdi),%xmm14 + pxor %xmm0,%xmm13 + movdqu 80(%rdi),%xmm15 + pxor %xmm0,%xmm14 + pxor %xmm0,%xmm15 +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 + movdqu 96(%rdi),%xmm1 + leaq 128(%rdi),%rdi + +.byte 102,65,15,56,221,210 + pxor %xmm0,%xmm1 + movdqu 112-128(%rdi),%xmm10 +.byte 102,65,15,56,221,219 + pxor %xmm0,%xmm10 + movdqa 0(%rsp),%xmm11 +.byte 102,65,15,56,221,228 +.byte 102,65,15,56,221,237 + movdqa 16(%rsp),%xmm12 + movdqa 32(%rsp),%xmm13 +.byte 102,65,15,56,221,246 +.byte 102,65,15,56,221,255 + movdqa 48(%rsp),%xmm14 + movdqa 64(%rsp),%xmm15 +.byte 102,68,15,56,221,193 + movdqa 80(%rsp),%xmm0 + movups 16-128(%rcx),%xmm1 +.byte 102,69,15,56,221,202 + + movups %xmm2,(%rsi) + movdqa %xmm11,%xmm2 + movups %xmm3,16(%rsi) + movdqa %xmm12,%xmm3 + movups %xmm4,32(%rsi) + movdqa %xmm13,%xmm4 + movups %xmm5,48(%rsi) + movdqa %xmm14,%xmm5 + movups %xmm6,64(%rsi) + movdqa %xmm15,%xmm6 + movups %xmm7,80(%rsi) + movdqa %xmm0,%xmm7 + movups %xmm8,96(%rsi) + movups %xmm9,112(%rsi) + leaq 128(%rsi),%rsi + + subq $8,%rdx + jnc .Lctr32_loop8 + + addq $8,%rdx + jz .Lctr32_done + leaq -128(%rcx),%rcx + +.Lctr32_tail: + + + leaq 16(%rcx),%rcx + cmpq $4,%rdx + jb .Lctr32_loop3 + je .Lctr32_loop4 + + + shll $4,%eax + movdqa 96(%rsp),%xmm8 + pxor %xmm9,%xmm9 + + movups 16(%rcx),%xmm0 +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + leaq 32-16(%rcx,%rax,1),%rcx + negq %rax +.byte 102,15,56,220,225 + addq $16,%rax + movups (%rdi),%xmm10 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 + movups 16(%rdi),%xmm11 + movups 32(%rdi),%xmm12 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 + + call .Lenc_loop8_enter + + movdqu 48(%rdi),%xmm13 + pxor %xmm10,%xmm2 + movdqu 64(%rdi),%xmm10 + pxor %xmm11,%xmm3 + movdqu %xmm2,(%rsi) + pxor %xmm12,%xmm4 + movdqu %xmm3,16(%rsi) + pxor %xmm13,%xmm5 + movdqu %xmm4,32(%rsi) + pxor %xmm10,%xmm6 + movdqu %xmm5,48(%rsi) + movdqu %xmm6,64(%rsi) + cmpq $6,%rdx + jb .Lctr32_done + + movups 80(%rdi),%xmm11 + xorps %xmm11,%xmm7 + movups %xmm7,80(%rsi) + je .Lctr32_done + + movups 96(%rdi),%xmm12 + xorps %xmm12,%xmm8 + movups %xmm8,96(%rsi) + jmp .Lctr32_done + +.align 32 +.Lctr32_loop4: +.byte 102,15,56,220,209 + leaq 16(%rcx),%rcx + decl %eax +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movups (%rcx),%xmm1 + jnz .Lctr32_loop4 +.byte 102,15,56,221,209 +.byte 102,15,56,221,217 + movups (%rdi),%xmm10 + movups 16(%rdi),%xmm11 +.byte 102,15,56,221,225 +.byte 102,15,56,221,233 + movups 32(%rdi),%xmm12 + movups 48(%rdi),%xmm13 + + xorps %xmm10,%xmm2 + movups %xmm2,(%rsi) + xorps %xmm11,%xmm3 + movups %xmm3,16(%rsi) + pxor %xmm12,%xmm4 + movdqu %xmm4,32(%rsi) + pxor %xmm13,%xmm5 + movdqu %xmm5,48(%rsi) + jmp .Lctr32_done + +.align 32 +.Lctr32_loop3: +.byte 102,15,56,220,209 + leaq 16(%rcx),%rcx + decl %eax +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 + movups (%rcx),%xmm1 + jnz .Lctr32_loop3 +.byte 102,15,56,221,209 +.byte 102,15,56,221,217 +.byte 102,15,56,221,225 + + movups (%rdi),%xmm10 + xorps %xmm10,%xmm2 + movups %xmm2,(%rsi) + cmpq $2,%rdx + jb .Lctr32_done + + movups 16(%rdi),%xmm11 + xorps %xmm11,%xmm3 + movups %xmm3,16(%rsi) + je .Lctr32_done + + movups 32(%rdi),%xmm12 + xorps %xmm12,%xmm4 + movups %xmm4,32(%rsi) + +.Lctr32_done: + xorps %xmm0,%xmm0 + xorl %r11d,%r11d + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + movaps %xmm0,0(%rsp) + pxor %xmm8,%xmm8 + movaps %xmm0,16(%rsp) + pxor %xmm9,%xmm9 + movaps %xmm0,32(%rsp) + pxor %xmm10,%xmm10 + movaps %xmm0,48(%rsp) + pxor %xmm11,%xmm11 + movaps %xmm0,64(%rsp) + pxor %xmm12,%xmm12 + movaps %xmm0,80(%rsp) + pxor %xmm13,%xmm13 + movaps %xmm0,96(%rsp) + pxor %xmm14,%xmm14 + movaps %xmm0,112(%rsp) + pxor %xmm15,%xmm15 + leaq (%rbp),%rsp + popq %rbp +.Lctr32_epilogue: + .byte 0xf3,0xc3 +.size aesni_ctr32_encrypt_blocks,.-aesni_ctr32_encrypt_blocks +.globl aesni_xts_encrypt +.hidden aesni_xts_encrypt +.type aesni_xts_encrypt,@function +.align 16 +aesni_xts_encrypt: + leaq (%rsp),%rax + pushq %rbp + subq $112,%rsp + andq $-16,%rsp + leaq -8(%rax),%rbp + movups (%r9),%xmm2 + movl 240(%r8),%eax + movl 240(%rcx),%r10d + movups (%r8),%xmm0 + movups 16(%r8),%xmm1 + leaq 32(%r8),%r8 + xorps %xmm0,%xmm2 +.Loop_enc1_8: +.byte 102,15,56,220,209 + decl %eax + movups (%r8),%xmm1 + leaq 16(%r8),%r8 + jnz .Loop_enc1_8 +.byte 102,15,56,221,209 + movups (%rcx),%xmm0 + movq %rcx,%r11 + movl %r10d,%eax + shll $4,%r10d + movq %rdx,%r9 + andq $-16,%rdx + + movups 16(%rcx,%r10,1),%xmm1 + + movdqa .Lxts_magic(%rip),%xmm8 + movdqa %xmm2,%xmm15 + pshufd $95,%xmm2,%xmm9 + pxor %xmm0,%xmm1 + movdqa %xmm9,%xmm14 + paddd %xmm9,%xmm9 + movdqa %xmm15,%xmm10 + psrad $31,%xmm14 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm14 + pxor %xmm0,%xmm10 + pxor %xmm14,%xmm15 + movdqa %xmm9,%xmm14 + paddd %xmm9,%xmm9 + movdqa %xmm15,%xmm11 + psrad $31,%xmm14 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm14 + pxor %xmm0,%xmm11 + pxor %xmm14,%xmm15 + movdqa %xmm9,%xmm14 + paddd %xmm9,%xmm9 + movdqa %xmm15,%xmm12 + psrad $31,%xmm14 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm14 + pxor %xmm0,%xmm12 + pxor %xmm14,%xmm15 + movdqa %xmm9,%xmm14 + paddd %xmm9,%xmm9 + movdqa %xmm15,%xmm13 + psrad $31,%xmm14 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm14 + pxor %xmm0,%xmm13 + pxor %xmm14,%xmm15 + movdqa %xmm15,%xmm14 + psrad $31,%xmm9 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm9 + pxor %xmm0,%xmm14 + pxor %xmm9,%xmm15 + movaps %xmm1,96(%rsp) + + subq $96,%rdx + jc .Lxts_enc_short + + movl $16+96,%eax + leaq 32(%r11,%r10,1),%rcx + subq %r10,%rax + movups 16(%r11),%xmm1 + movq %rax,%r10 + leaq .Lxts_magic(%rip),%r8 + jmp .Lxts_enc_grandloop + +.align 32 +.Lxts_enc_grandloop: + movdqu 0(%rdi),%xmm2 + movdqa %xmm0,%xmm8 + movdqu 16(%rdi),%xmm3 + pxor %xmm10,%xmm2 + movdqu 32(%rdi),%xmm4 + pxor %xmm11,%xmm3 +.byte 102,15,56,220,209 + movdqu 48(%rdi),%xmm5 + pxor %xmm12,%xmm4 +.byte 102,15,56,220,217 + movdqu 64(%rdi),%xmm6 + pxor %xmm13,%xmm5 +.byte 102,15,56,220,225 + movdqu 80(%rdi),%xmm7 + pxor %xmm15,%xmm8 + movdqa 96(%rsp),%xmm9 + pxor %xmm14,%xmm6 +.byte 102,15,56,220,233 + movups 32(%r11),%xmm0 + leaq 96(%rdi),%rdi + pxor %xmm8,%xmm7 + + pxor %xmm9,%xmm10 +.byte 102,15,56,220,241 + pxor %xmm9,%xmm11 + movdqa %xmm10,0(%rsp) +.byte 102,15,56,220,249 + movups 48(%r11),%xmm1 + pxor %xmm9,%xmm12 + +.byte 102,15,56,220,208 + pxor %xmm9,%xmm13 + movdqa %xmm11,16(%rsp) +.byte 102,15,56,220,216 + pxor %xmm9,%xmm14 + movdqa %xmm12,32(%rsp) +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + pxor %xmm9,%xmm8 + movdqa %xmm14,64(%rsp) +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 + movups 64(%r11),%xmm0 + movdqa %xmm8,80(%rsp) + pshufd $95,%xmm15,%xmm9 + jmp .Lxts_enc_loop6 +.align 32 +.Lxts_enc_loop6: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 + movups -64(%rcx,%rax,1),%xmm1 + addq $32,%rax + +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 + movups -80(%rcx,%rax,1),%xmm0 + jnz .Lxts_enc_loop6 + + movdqa (%r8),%xmm8 + movdqa %xmm9,%xmm14 + paddd %xmm9,%xmm9 +.byte 102,15,56,220,209 + paddq %xmm15,%xmm15 + psrad $31,%xmm14 +.byte 102,15,56,220,217 + pand %xmm8,%xmm14 + movups (%r11),%xmm10 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 + pxor %xmm14,%xmm15 + movaps %xmm10,%xmm11 +.byte 102,15,56,220,249 + movups -64(%rcx),%xmm1 + + movdqa %xmm9,%xmm14 +.byte 102,15,56,220,208 + paddd %xmm9,%xmm9 + pxor %xmm15,%xmm10 +.byte 102,15,56,220,216 + psrad $31,%xmm14 + paddq %xmm15,%xmm15 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + pand %xmm8,%xmm14 + movaps %xmm11,%xmm12 +.byte 102,15,56,220,240 + pxor %xmm14,%xmm15 + movdqa %xmm9,%xmm14 +.byte 102,15,56,220,248 + movups -48(%rcx),%xmm0 + + paddd %xmm9,%xmm9 +.byte 102,15,56,220,209 + pxor %xmm15,%xmm11 + psrad $31,%xmm14 +.byte 102,15,56,220,217 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm14 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movdqa %xmm13,48(%rsp) + pxor %xmm14,%xmm15 +.byte 102,15,56,220,241 + movaps %xmm12,%xmm13 + movdqa %xmm9,%xmm14 +.byte 102,15,56,220,249 + movups -32(%rcx),%xmm1 + + paddd %xmm9,%xmm9 +.byte 102,15,56,220,208 + pxor %xmm15,%xmm12 + psrad $31,%xmm14 +.byte 102,15,56,220,216 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm14 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 +.byte 102,15,56,220,240 + pxor %xmm14,%xmm15 + movaps %xmm13,%xmm14 +.byte 102,15,56,220,248 + + movdqa %xmm9,%xmm0 + paddd %xmm9,%xmm9 +.byte 102,15,56,220,209 + pxor %xmm15,%xmm13 + psrad $31,%xmm0 +.byte 102,15,56,220,217 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm0 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + pxor %xmm0,%xmm15 + movups (%r11),%xmm0 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 + movups 16(%r11),%xmm1 + + pxor %xmm15,%xmm14 +.byte 102,15,56,221,84,36,0 + psrad $31,%xmm9 + paddq %xmm15,%xmm15 +.byte 102,15,56,221,92,36,16 +.byte 102,15,56,221,100,36,32 + pand %xmm8,%xmm9 + movq %r10,%rax +.byte 102,15,56,221,108,36,48 +.byte 102,15,56,221,116,36,64 +.byte 102,15,56,221,124,36,80 + pxor %xmm9,%xmm15 + + leaq 96(%rsi),%rsi + movups %xmm2,-96(%rsi) + movups %xmm3,-80(%rsi) + movups %xmm4,-64(%rsi) + movups %xmm5,-48(%rsi) + movups %xmm6,-32(%rsi) + movups %xmm7,-16(%rsi) + subq $96,%rdx + jnc .Lxts_enc_grandloop + + movl $16+96,%eax + subl %r10d,%eax + movq %r11,%rcx + shrl $4,%eax + +.Lxts_enc_short: + + movl %eax,%r10d + pxor %xmm0,%xmm10 + addq $96,%rdx + jz .Lxts_enc_done + + pxor %xmm0,%xmm11 + cmpq $32,%rdx + jb .Lxts_enc_one + pxor %xmm0,%xmm12 + je .Lxts_enc_two + + pxor %xmm0,%xmm13 + cmpq $64,%rdx + jb .Lxts_enc_three + pxor %xmm0,%xmm14 + je .Lxts_enc_four + + movdqu (%rdi),%xmm2 + movdqu 16(%rdi),%xmm3 + movdqu 32(%rdi),%xmm4 + pxor %xmm10,%xmm2 + movdqu 48(%rdi),%xmm5 + pxor %xmm11,%xmm3 + movdqu 64(%rdi),%xmm6 + leaq 80(%rdi),%rdi + pxor %xmm12,%xmm4 + pxor %xmm13,%xmm5 + pxor %xmm14,%xmm6 + pxor %xmm7,%xmm7 + + call _aesni_encrypt6 + + xorps %xmm10,%xmm2 + movdqa %xmm15,%xmm10 + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + movdqu %xmm2,(%rsi) + xorps %xmm13,%xmm5 + movdqu %xmm3,16(%rsi) + xorps %xmm14,%xmm6 + movdqu %xmm4,32(%rsi) + movdqu %xmm5,48(%rsi) + movdqu %xmm6,64(%rsi) + leaq 80(%rsi),%rsi + jmp .Lxts_enc_done + +.align 16 +.Lxts_enc_one: + movups (%rdi),%xmm2 + leaq 16(%rdi),%rdi + xorps %xmm10,%xmm2 + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +.Loop_enc1_9: +.byte 102,15,56,220,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_enc1_9 +.byte 102,15,56,221,209 + xorps %xmm10,%xmm2 + movdqa %xmm11,%xmm10 + movups %xmm2,(%rsi) + leaq 16(%rsi),%rsi + jmp .Lxts_enc_done + +.align 16 +.Lxts_enc_two: + movups (%rdi),%xmm2 + movups 16(%rdi),%xmm3 + leaq 32(%rdi),%rdi + xorps %xmm10,%xmm2 + xorps %xmm11,%xmm3 + + call _aesni_encrypt2 + + xorps %xmm10,%xmm2 + movdqa %xmm12,%xmm10 + xorps %xmm11,%xmm3 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + leaq 32(%rsi),%rsi + jmp .Lxts_enc_done + +.align 16 +.Lxts_enc_three: + movups (%rdi),%xmm2 + movups 16(%rdi),%xmm3 + movups 32(%rdi),%xmm4 + leaq 48(%rdi),%rdi + xorps %xmm10,%xmm2 + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + + call _aesni_encrypt3 + + xorps %xmm10,%xmm2 + movdqa %xmm13,%xmm10 + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + leaq 48(%rsi),%rsi + jmp .Lxts_enc_done + +.align 16 +.Lxts_enc_four: + movups (%rdi),%xmm2 + movups 16(%rdi),%xmm3 + movups 32(%rdi),%xmm4 + xorps %xmm10,%xmm2 + movups 48(%rdi),%xmm5 + leaq 64(%rdi),%rdi + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + xorps %xmm13,%xmm5 + + call _aesni_encrypt4 + + pxor %xmm10,%xmm2 + movdqa %xmm14,%xmm10 + pxor %xmm11,%xmm3 + pxor %xmm12,%xmm4 + movdqu %xmm2,(%rsi) + pxor %xmm13,%xmm5 + movdqu %xmm3,16(%rsi) + movdqu %xmm4,32(%rsi) + movdqu %xmm5,48(%rsi) + leaq 64(%rsi),%rsi + jmp .Lxts_enc_done + +.align 16 +.Lxts_enc_done: + andq $15,%r9 + jz .Lxts_enc_ret + movq %r9,%rdx + +.Lxts_enc_steal: + movzbl (%rdi),%eax + movzbl -16(%rsi),%ecx + leaq 1(%rdi),%rdi + movb %al,-16(%rsi) + movb %cl,0(%rsi) + leaq 1(%rsi),%rsi + subq $1,%rdx + jnz .Lxts_enc_steal + + subq %r9,%rsi + movq %r11,%rcx + movl %r10d,%eax + + movups -16(%rsi),%xmm2 + xorps %xmm10,%xmm2 + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +.Loop_enc1_10: +.byte 102,15,56,220,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_enc1_10 +.byte 102,15,56,221,209 + xorps %xmm10,%xmm2 + movups %xmm2,-16(%rsi) + +.Lxts_enc_ret: + xorps %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + movaps %xmm0,0(%rsp) + pxor %xmm8,%xmm8 + movaps %xmm0,16(%rsp) + pxor %xmm9,%xmm9 + movaps %xmm0,32(%rsp) + pxor %xmm10,%xmm10 + movaps %xmm0,48(%rsp) + pxor %xmm11,%xmm11 + movaps %xmm0,64(%rsp) + pxor %xmm12,%xmm12 + movaps %xmm0,80(%rsp) + pxor %xmm13,%xmm13 + movaps %xmm0,96(%rsp) + pxor %xmm14,%xmm14 + pxor %xmm15,%xmm15 + leaq (%rbp),%rsp + popq %rbp +.Lxts_enc_epilogue: + .byte 0xf3,0xc3 +.size aesni_xts_encrypt,.-aesni_xts_encrypt +.globl aesni_xts_decrypt +.hidden aesni_xts_decrypt +.type aesni_xts_decrypt,@function +.align 16 +aesni_xts_decrypt: + leaq (%rsp),%rax + pushq %rbp + subq $112,%rsp + andq $-16,%rsp + leaq -8(%rax),%rbp + movups (%r9),%xmm2 + movl 240(%r8),%eax + movl 240(%rcx),%r10d + movups (%r8),%xmm0 + movups 16(%r8),%xmm1 + leaq 32(%r8),%r8 + xorps %xmm0,%xmm2 +.Loop_enc1_11: +.byte 102,15,56,220,209 + decl %eax + movups (%r8),%xmm1 + leaq 16(%r8),%r8 + jnz .Loop_enc1_11 +.byte 102,15,56,221,209 + xorl %eax,%eax + testq $15,%rdx + setnz %al + shlq $4,%rax + subq %rax,%rdx + + movups (%rcx),%xmm0 + movq %rcx,%r11 + movl %r10d,%eax + shll $4,%r10d + movq %rdx,%r9 + andq $-16,%rdx + + movups 16(%rcx,%r10,1),%xmm1 + + movdqa .Lxts_magic(%rip),%xmm8 + movdqa %xmm2,%xmm15 + pshufd $95,%xmm2,%xmm9 + pxor %xmm0,%xmm1 + movdqa %xmm9,%xmm14 + paddd %xmm9,%xmm9 + movdqa %xmm15,%xmm10 + psrad $31,%xmm14 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm14 + pxor %xmm0,%xmm10 + pxor %xmm14,%xmm15 + movdqa %xmm9,%xmm14 + paddd %xmm9,%xmm9 + movdqa %xmm15,%xmm11 + psrad $31,%xmm14 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm14 + pxor %xmm0,%xmm11 + pxor %xmm14,%xmm15 + movdqa %xmm9,%xmm14 + paddd %xmm9,%xmm9 + movdqa %xmm15,%xmm12 + psrad $31,%xmm14 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm14 + pxor %xmm0,%xmm12 + pxor %xmm14,%xmm15 + movdqa %xmm9,%xmm14 + paddd %xmm9,%xmm9 + movdqa %xmm15,%xmm13 + psrad $31,%xmm14 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm14 + pxor %xmm0,%xmm13 + pxor %xmm14,%xmm15 + movdqa %xmm15,%xmm14 + psrad $31,%xmm9 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm9 + pxor %xmm0,%xmm14 + pxor %xmm9,%xmm15 + movaps %xmm1,96(%rsp) + + subq $96,%rdx + jc .Lxts_dec_short + + movl $16+96,%eax + leaq 32(%r11,%r10,1),%rcx + subq %r10,%rax + movups 16(%r11),%xmm1 + movq %rax,%r10 + leaq .Lxts_magic(%rip),%r8 + jmp .Lxts_dec_grandloop + +.align 32 +.Lxts_dec_grandloop: + movdqu 0(%rdi),%xmm2 + movdqa %xmm0,%xmm8 + movdqu 16(%rdi),%xmm3 + pxor %xmm10,%xmm2 + movdqu 32(%rdi),%xmm4 + pxor %xmm11,%xmm3 +.byte 102,15,56,222,209 + movdqu 48(%rdi),%xmm5 + pxor %xmm12,%xmm4 +.byte 102,15,56,222,217 + movdqu 64(%rdi),%xmm6 + pxor %xmm13,%xmm5 +.byte 102,15,56,222,225 + movdqu 80(%rdi),%xmm7 + pxor %xmm15,%xmm8 + movdqa 96(%rsp),%xmm9 + pxor %xmm14,%xmm6 +.byte 102,15,56,222,233 + movups 32(%r11),%xmm0 + leaq 96(%rdi),%rdi + pxor %xmm8,%xmm7 + + pxor %xmm9,%xmm10 +.byte 102,15,56,222,241 + pxor %xmm9,%xmm11 + movdqa %xmm10,0(%rsp) +.byte 102,15,56,222,249 + movups 48(%r11),%xmm1 + pxor %xmm9,%xmm12 + +.byte 102,15,56,222,208 + pxor %xmm9,%xmm13 + movdqa %xmm11,16(%rsp) +.byte 102,15,56,222,216 + pxor %xmm9,%xmm14 + movdqa %xmm12,32(%rsp) +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 + pxor %xmm9,%xmm8 + movdqa %xmm14,64(%rsp) +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 + movups 64(%r11),%xmm0 + movdqa %xmm8,80(%rsp) + pshufd $95,%xmm15,%xmm9 + jmp .Lxts_dec_loop6 +.align 32 +.Lxts_dec_loop6: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 + movups -64(%rcx,%rax,1),%xmm1 + addq $32,%rax + +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 + movups -80(%rcx,%rax,1),%xmm0 + jnz .Lxts_dec_loop6 + + movdqa (%r8),%xmm8 + movdqa %xmm9,%xmm14 + paddd %xmm9,%xmm9 +.byte 102,15,56,222,209 + paddq %xmm15,%xmm15 + psrad $31,%xmm14 +.byte 102,15,56,222,217 + pand %xmm8,%xmm14 + movups (%r11),%xmm10 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 + pxor %xmm14,%xmm15 + movaps %xmm10,%xmm11 +.byte 102,15,56,222,249 + movups -64(%rcx),%xmm1 + + movdqa %xmm9,%xmm14 +.byte 102,15,56,222,208 + paddd %xmm9,%xmm9 + pxor %xmm15,%xmm10 +.byte 102,15,56,222,216 + psrad $31,%xmm14 + paddq %xmm15,%xmm15 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 + pand %xmm8,%xmm14 + movaps %xmm11,%xmm12 +.byte 102,15,56,222,240 + pxor %xmm14,%xmm15 + movdqa %xmm9,%xmm14 +.byte 102,15,56,222,248 + movups -48(%rcx),%xmm0 + + paddd %xmm9,%xmm9 +.byte 102,15,56,222,209 + pxor %xmm15,%xmm11 + psrad $31,%xmm14 +.byte 102,15,56,222,217 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm14 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 + movdqa %xmm13,48(%rsp) + pxor %xmm14,%xmm15 +.byte 102,15,56,222,241 + movaps %xmm12,%xmm13 + movdqa %xmm9,%xmm14 +.byte 102,15,56,222,249 + movups -32(%rcx),%xmm1 + + paddd %xmm9,%xmm9 +.byte 102,15,56,222,208 + pxor %xmm15,%xmm12 + psrad $31,%xmm14 +.byte 102,15,56,222,216 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm14 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 + pxor %xmm14,%xmm15 + movaps %xmm13,%xmm14 +.byte 102,15,56,222,248 + + movdqa %xmm9,%xmm0 + paddd %xmm9,%xmm9 +.byte 102,15,56,222,209 + pxor %xmm15,%xmm13 + psrad $31,%xmm0 +.byte 102,15,56,222,217 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm0 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 + pxor %xmm0,%xmm15 + movups (%r11),%xmm0 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 + movups 16(%r11),%xmm1 + + pxor %xmm15,%xmm14 +.byte 102,15,56,223,84,36,0 + psrad $31,%xmm9 + paddq %xmm15,%xmm15 +.byte 102,15,56,223,92,36,16 +.byte 102,15,56,223,100,36,32 + pand %xmm8,%xmm9 + movq %r10,%rax +.byte 102,15,56,223,108,36,48 +.byte 102,15,56,223,116,36,64 +.byte 102,15,56,223,124,36,80 + pxor %xmm9,%xmm15 + + leaq 96(%rsi),%rsi + movups %xmm2,-96(%rsi) + movups %xmm3,-80(%rsi) + movups %xmm4,-64(%rsi) + movups %xmm5,-48(%rsi) + movups %xmm6,-32(%rsi) + movups %xmm7,-16(%rsi) + subq $96,%rdx + jnc .Lxts_dec_grandloop + + movl $16+96,%eax + subl %r10d,%eax + movq %r11,%rcx + shrl $4,%eax + +.Lxts_dec_short: + + movl %eax,%r10d + pxor %xmm0,%xmm10 + pxor %xmm0,%xmm11 + addq $96,%rdx + jz .Lxts_dec_done + + pxor %xmm0,%xmm12 + cmpq $32,%rdx + jb .Lxts_dec_one + pxor %xmm0,%xmm13 + je .Lxts_dec_two + + pxor %xmm0,%xmm14 + cmpq $64,%rdx + jb .Lxts_dec_three + je .Lxts_dec_four + + movdqu (%rdi),%xmm2 + movdqu 16(%rdi),%xmm3 + movdqu 32(%rdi),%xmm4 + pxor %xmm10,%xmm2 + movdqu 48(%rdi),%xmm5 + pxor %xmm11,%xmm3 + movdqu 64(%rdi),%xmm6 + leaq 80(%rdi),%rdi + pxor %xmm12,%xmm4 + pxor %xmm13,%xmm5 + pxor %xmm14,%xmm6 + + call _aesni_decrypt6 + + xorps %xmm10,%xmm2 + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + movdqu %xmm2,(%rsi) + xorps %xmm13,%xmm5 + movdqu %xmm3,16(%rsi) + xorps %xmm14,%xmm6 + movdqu %xmm4,32(%rsi) + pxor %xmm14,%xmm14 + movdqu %xmm5,48(%rsi) + pcmpgtd %xmm15,%xmm14 + movdqu %xmm6,64(%rsi) + leaq 80(%rsi),%rsi + pshufd $19,%xmm14,%xmm11 + andq $15,%r9 + jz .Lxts_dec_ret + + movdqa %xmm15,%xmm10 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm11 + pxor %xmm15,%xmm11 + jmp .Lxts_dec_done2 + +.align 16 +.Lxts_dec_one: + movups (%rdi),%xmm2 + leaq 16(%rdi),%rdi + xorps %xmm10,%xmm2 + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +.Loop_dec1_12: +.byte 102,15,56,222,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_dec1_12 +.byte 102,15,56,223,209 + xorps %xmm10,%xmm2 + movdqa %xmm11,%xmm10 + movups %xmm2,(%rsi) + movdqa %xmm12,%xmm11 + leaq 16(%rsi),%rsi + jmp .Lxts_dec_done + +.align 16 +.Lxts_dec_two: + movups (%rdi),%xmm2 + movups 16(%rdi),%xmm3 + leaq 32(%rdi),%rdi + xorps %xmm10,%xmm2 + xorps %xmm11,%xmm3 + + call _aesni_decrypt2 + + xorps %xmm10,%xmm2 + movdqa %xmm12,%xmm10 + xorps %xmm11,%xmm3 + movdqa %xmm13,%xmm11 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + leaq 32(%rsi),%rsi + jmp .Lxts_dec_done + +.align 16 +.Lxts_dec_three: + movups (%rdi),%xmm2 + movups 16(%rdi),%xmm3 + movups 32(%rdi),%xmm4 + leaq 48(%rdi),%rdi + xorps %xmm10,%xmm2 + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + + call _aesni_decrypt3 + + xorps %xmm10,%xmm2 + movdqa %xmm13,%xmm10 + xorps %xmm11,%xmm3 + movdqa %xmm14,%xmm11 + xorps %xmm12,%xmm4 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + leaq 48(%rsi),%rsi + jmp .Lxts_dec_done + +.align 16 +.Lxts_dec_four: + movups (%rdi),%xmm2 + movups 16(%rdi),%xmm3 + movups 32(%rdi),%xmm4 + xorps %xmm10,%xmm2 + movups 48(%rdi),%xmm5 + leaq 64(%rdi),%rdi + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + xorps %xmm13,%xmm5 + + call _aesni_decrypt4 + + pxor %xmm10,%xmm2 + movdqa %xmm14,%xmm10 + pxor %xmm11,%xmm3 + movdqa %xmm15,%xmm11 + pxor %xmm12,%xmm4 + movdqu %xmm2,(%rsi) + pxor %xmm13,%xmm5 + movdqu %xmm3,16(%rsi) + movdqu %xmm4,32(%rsi) + movdqu %xmm5,48(%rsi) + leaq 64(%rsi),%rsi + jmp .Lxts_dec_done + +.align 16 +.Lxts_dec_done: + andq $15,%r9 + jz .Lxts_dec_ret +.Lxts_dec_done2: + movq %r9,%rdx + movq %r11,%rcx + movl %r10d,%eax + + movups (%rdi),%xmm2 + xorps %xmm11,%xmm2 + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +.Loop_dec1_13: +.byte 102,15,56,222,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_dec1_13 +.byte 102,15,56,223,209 + xorps %xmm11,%xmm2 + movups %xmm2,(%rsi) + +.Lxts_dec_steal: + movzbl 16(%rdi),%eax + movzbl (%rsi),%ecx + leaq 1(%rdi),%rdi + movb %al,(%rsi) + movb %cl,16(%rsi) + leaq 1(%rsi),%rsi + subq $1,%rdx + jnz .Lxts_dec_steal + + subq %r9,%rsi + movq %r11,%rcx + movl %r10d,%eax + + movups (%rsi),%xmm2 + xorps %xmm10,%xmm2 + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +.Loop_dec1_14: +.byte 102,15,56,222,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_dec1_14 +.byte 102,15,56,223,209 + xorps %xmm10,%xmm2 + movups %xmm2,(%rsi) + +.Lxts_dec_ret: + xorps %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + movaps %xmm0,0(%rsp) + pxor %xmm8,%xmm8 + movaps %xmm0,16(%rsp) + pxor %xmm9,%xmm9 + movaps %xmm0,32(%rsp) + pxor %xmm10,%xmm10 + movaps %xmm0,48(%rsp) + pxor %xmm11,%xmm11 + movaps %xmm0,64(%rsp) + pxor %xmm12,%xmm12 + movaps %xmm0,80(%rsp) + pxor %xmm13,%xmm13 + movaps %xmm0,96(%rsp) + pxor %xmm14,%xmm14 + pxor %xmm15,%xmm15 + leaq (%rbp),%rsp + popq %rbp +.Lxts_dec_epilogue: + .byte 0xf3,0xc3 +.size aesni_xts_decrypt,.-aesni_xts_decrypt +.globl aesni_cbc_encrypt +.hidden aesni_cbc_encrypt +.type aesni_cbc_encrypt,@function +.align 16 +aesni_cbc_encrypt: + testq %rdx,%rdx + jz .Lcbc_ret + + movl 240(%rcx),%r10d + movq %rcx,%r11 + testl %r9d,%r9d + jz .Lcbc_decrypt + + movups (%r8),%xmm2 + movl %r10d,%eax + cmpq $16,%rdx + jb .Lcbc_enc_tail + subq $16,%rdx + jmp .Lcbc_enc_loop +.align 16 +.Lcbc_enc_loop: + movups (%rdi),%xmm3 + leaq 16(%rdi),%rdi + + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm3 + leaq 32(%rcx),%rcx + xorps %xmm3,%xmm2 +.Loop_enc1_15: +.byte 102,15,56,220,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_enc1_15 +.byte 102,15,56,221,209 + movl %r10d,%eax + movq %r11,%rcx + movups %xmm2,0(%rsi) + leaq 16(%rsi),%rsi + subq $16,%rdx + jnc .Lcbc_enc_loop + addq $16,%rdx + jnz .Lcbc_enc_tail + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + movups %xmm2,(%r8) + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + jmp .Lcbc_ret + +.Lcbc_enc_tail: + movq %rdx,%rcx + xchgq %rdi,%rsi +.long 0x9066A4F3 + movl $16,%ecx + subq %rdx,%rcx + xorl %eax,%eax +.long 0x9066AAF3 + leaq -16(%rdi),%rdi + movl %r10d,%eax + movq %rdi,%rsi + movq %r11,%rcx + xorq %rdx,%rdx + jmp .Lcbc_enc_loop + +.align 16 +.Lcbc_decrypt: + cmpq $16,%rdx + jne .Lcbc_decrypt_bulk + + + + movdqu (%rdi),%xmm2 + movdqu (%r8),%xmm3 + movdqa %xmm2,%xmm4 + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +.Loop_dec1_16: +.byte 102,15,56,222,209 + decl %r10d + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_dec1_16 +.byte 102,15,56,223,209 + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + movdqu %xmm4,(%r8) + xorps %xmm3,%xmm2 + pxor %xmm3,%xmm3 + movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 + jmp .Lcbc_ret +.align 16 +.Lcbc_decrypt_bulk: + leaq (%rsp),%rax + pushq %rbp + subq $16,%rsp + andq $-16,%rsp + leaq -8(%rax),%rbp + movups (%r8),%xmm10 + movl %r10d,%eax + cmpq $80,%rdx + jbe .Lcbc_dec_tail + + movups (%rcx),%xmm0 + movdqu 0(%rdi),%xmm2 + movdqu 16(%rdi),%xmm3 + movdqa %xmm2,%xmm11 + movdqu 32(%rdi),%xmm4 + movdqa %xmm3,%xmm12 + movdqu 48(%rdi),%xmm5 + movdqa %xmm4,%xmm13 + movdqu 64(%rdi),%xmm6 + movdqa %xmm5,%xmm14 + movdqu 80(%rdi),%xmm7 + movdqa %xmm6,%xmm15 + movl OPENSSL_ia32cap_P+4(%rip),%r9d + cmpq $112,%rdx + jbe .Lcbc_dec_six_or_seven + + andl $71303168,%r9d + subq $80,%rdx + cmpl $4194304,%r9d + je .Lcbc_dec_loop6_enter + subq $32,%rdx + leaq 112(%rcx),%rcx + jmp .Lcbc_dec_loop8_enter +.align 16 +.Lcbc_dec_loop8: + movups %xmm9,(%rsi) + leaq 16(%rsi),%rsi +.Lcbc_dec_loop8_enter: + movdqu 96(%rdi),%xmm8 + pxor %xmm0,%xmm2 + movdqu 112(%rdi),%xmm9 + pxor %xmm0,%xmm3 + movups 16-112(%rcx),%xmm1 + pxor %xmm0,%xmm4 + xorq %r11,%r11 + cmpq $112,%rdx + pxor %xmm0,%xmm5 + pxor %xmm0,%xmm6 + pxor %xmm0,%xmm7 + pxor %xmm0,%xmm8 + +.byte 102,15,56,222,209 + pxor %xmm0,%xmm9 + movups 32-112(%rcx),%xmm0 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,68,15,56,222,193 + setnc %r11b + shlq $7,%r11 +.byte 102,68,15,56,222,201 + addq %rdi,%r11 + movups 48-112(%rcx),%xmm1 +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 +.byte 102,68,15,56,222,192 +.byte 102,68,15,56,222,200 + movups 64-112(%rcx),%xmm0 + nop +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,68,15,56,222,193 +.byte 102,68,15,56,222,201 + movups 80-112(%rcx),%xmm1 + nop +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 +.byte 102,68,15,56,222,192 +.byte 102,68,15,56,222,200 + movups 96-112(%rcx),%xmm0 + nop +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,68,15,56,222,193 +.byte 102,68,15,56,222,201 + movups 112-112(%rcx),%xmm1 + nop +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 +.byte 102,68,15,56,222,192 +.byte 102,68,15,56,222,200 + movups 128-112(%rcx),%xmm0 + nop +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,68,15,56,222,193 +.byte 102,68,15,56,222,201 + movups 144-112(%rcx),%xmm1 + cmpl $11,%eax +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 +.byte 102,68,15,56,222,192 +.byte 102,68,15,56,222,200 + movups 160-112(%rcx),%xmm0 + jb .Lcbc_dec_done +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,68,15,56,222,193 +.byte 102,68,15,56,222,201 + movups 176-112(%rcx),%xmm1 + nop +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 +.byte 102,68,15,56,222,192 +.byte 102,68,15,56,222,200 + movups 192-112(%rcx),%xmm0 + je .Lcbc_dec_done +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,68,15,56,222,193 +.byte 102,68,15,56,222,201 + movups 208-112(%rcx),%xmm1 + nop +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 +.byte 102,68,15,56,222,192 +.byte 102,68,15,56,222,200 + movups 224-112(%rcx),%xmm0 + jmp .Lcbc_dec_done +.align 16 +.Lcbc_dec_done: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 + pxor %xmm0,%xmm10 + pxor %xmm0,%xmm11 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 + pxor %xmm0,%xmm12 + pxor %xmm0,%xmm13 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 + pxor %xmm0,%xmm14 + pxor %xmm0,%xmm15 +.byte 102,68,15,56,222,193 +.byte 102,68,15,56,222,201 + movdqu 80(%rdi),%xmm1 + +.byte 102,65,15,56,223,210 + movdqu 96(%rdi),%xmm10 + pxor %xmm0,%xmm1 +.byte 102,65,15,56,223,219 + pxor %xmm0,%xmm10 + movdqu 112(%rdi),%xmm0 +.byte 102,65,15,56,223,228 + leaq 128(%rdi),%rdi + movdqu 0(%r11),%xmm11 +.byte 102,65,15,56,223,237 +.byte 102,65,15,56,223,246 + movdqu 16(%r11),%xmm12 + movdqu 32(%r11),%xmm13 +.byte 102,65,15,56,223,255 +.byte 102,68,15,56,223,193 + movdqu 48(%r11),%xmm14 + movdqu 64(%r11),%xmm15 +.byte 102,69,15,56,223,202 + movdqa %xmm0,%xmm10 + movdqu 80(%r11),%xmm1 + movups -112(%rcx),%xmm0 + + movups %xmm2,(%rsi) + movdqa %xmm11,%xmm2 + movups %xmm3,16(%rsi) + movdqa %xmm12,%xmm3 + movups %xmm4,32(%rsi) + movdqa %xmm13,%xmm4 + movups %xmm5,48(%rsi) + movdqa %xmm14,%xmm5 + movups %xmm6,64(%rsi) + movdqa %xmm15,%xmm6 + movups %xmm7,80(%rsi) + movdqa %xmm1,%xmm7 + movups %xmm8,96(%rsi) + leaq 112(%rsi),%rsi + + subq $128,%rdx + ja .Lcbc_dec_loop8 + + movaps %xmm9,%xmm2 + leaq -112(%rcx),%rcx + addq $112,%rdx + jle .Lcbc_dec_clear_tail_collected + movups %xmm9,(%rsi) + leaq 16(%rsi),%rsi + cmpq $80,%rdx + jbe .Lcbc_dec_tail + + movaps %xmm11,%xmm2 +.Lcbc_dec_six_or_seven: + cmpq $96,%rdx + ja .Lcbc_dec_seven + + movaps %xmm7,%xmm8 + call _aesni_decrypt6 + pxor %xmm10,%xmm2 + movaps %xmm8,%xmm10 + pxor %xmm11,%xmm3 + movdqu %xmm2,(%rsi) + pxor %xmm12,%xmm4 + movdqu %xmm3,16(%rsi) + pxor %xmm3,%xmm3 + pxor %xmm13,%xmm5 + movdqu %xmm4,32(%rsi) + pxor %xmm4,%xmm4 + pxor %xmm14,%xmm6 + movdqu %xmm5,48(%rsi) + pxor %xmm5,%xmm5 + pxor %xmm15,%xmm7 + movdqu %xmm6,64(%rsi) + pxor %xmm6,%xmm6 + leaq 80(%rsi),%rsi + movdqa %xmm7,%xmm2 + pxor %xmm7,%xmm7 + jmp .Lcbc_dec_tail_collected + +.align 16 +.Lcbc_dec_seven: + movups 96(%rdi),%xmm8 + xorps %xmm9,%xmm9 + call _aesni_decrypt8 + movups 80(%rdi),%xmm9 + pxor %xmm10,%xmm2 + movups 96(%rdi),%xmm10 + pxor %xmm11,%xmm3 + movdqu %xmm2,(%rsi) + pxor %xmm12,%xmm4 + movdqu %xmm3,16(%rsi) + pxor %xmm3,%xmm3 + pxor %xmm13,%xmm5 + movdqu %xmm4,32(%rsi) + pxor %xmm4,%xmm4 + pxor %xmm14,%xmm6 + movdqu %xmm5,48(%rsi) + pxor %xmm5,%xmm5 + pxor %xmm15,%xmm7 + movdqu %xmm6,64(%rsi) + pxor %xmm6,%xmm6 + pxor %xmm9,%xmm8 + movdqu %xmm7,80(%rsi) + pxor %xmm7,%xmm7 + leaq 96(%rsi),%rsi + movdqa %xmm8,%xmm2 + pxor %xmm8,%xmm8 + pxor %xmm9,%xmm9 + jmp .Lcbc_dec_tail_collected + +.align 16 +.Lcbc_dec_loop6: + movups %xmm7,(%rsi) + leaq 16(%rsi),%rsi + movdqu 0(%rdi),%xmm2 + movdqu 16(%rdi),%xmm3 + movdqa %xmm2,%xmm11 + movdqu 32(%rdi),%xmm4 + movdqa %xmm3,%xmm12 + movdqu 48(%rdi),%xmm5 + movdqa %xmm4,%xmm13 + movdqu 64(%rdi),%xmm6 + movdqa %xmm5,%xmm14 + movdqu 80(%rdi),%xmm7 + movdqa %xmm6,%xmm15 +.Lcbc_dec_loop6_enter: + leaq 96(%rdi),%rdi + movdqa %xmm7,%xmm8 + + call _aesni_decrypt6 + + pxor %xmm10,%xmm2 + movdqa %xmm8,%xmm10 + pxor %xmm11,%xmm3 + movdqu %xmm2,(%rsi) + pxor %xmm12,%xmm4 + movdqu %xmm3,16(%rsi) + pxor %xmm13,%xmm5 + movdqu %xmm4,32(%rsi) + pxor %xmm14,%xmm6 + movq %r11,%rcx + movdqu %xmm5,48(%rsi) + pxor %xmm15,%xmm7 + movl %r10d,%eax + movdqu %xmm6,64(%rsi) + leaq 80(%rsi),%rsi + subq $96,%rdx + ja .Lcbc_dec_loop6 + + movdqa %xmm7,%xmm2 + addq $80,%rdx + jle .Lcbc_dec_clear_tail_collected + movups %xmm7,(%rsi) + leaq 16(%rsi),%rsi + +.Lcbc_dec_tail: + movups (%rdi),%xmm2 + subq $16,%rdx + jbe .Lcbc_dec_one + + movups 16(%rdi),%xmm3 + movaps %xmm2,%xmm11 + subq $16,%rdx + jbe .Lcbc_dec_two + + movups 32(%rdi),%xmm4 + movaps %xmm3,%xmm12 + subq $16,%rdx + jbe .Lcbc_dec_three + + movups 48(%rdi),%xmm5 + movaps %xmm4,%xmm13 + subq $16,%rdx + jbe .Lcbc_dec_four + + movups 64(%rdi),%xmm6 + movaps %xmm5,%xmm14 + movaps %xmm6,%xmm15 + xorps %xmm7,%xmm7 + call _aesni_decrypt6 + pxor %xmm10,%xmm2 + movaps %xmm15,%xmm10 + pxor %xmm11,%xmm3 + movdqu %xmm2,(%rsi) + pxor %xmm12,%xmm4 + movdqu %xmm3,16(%rsi) + pxor %xmm3,%xmm3 + pxor %xmm13,%xmm5 + movdqu %xmm4,32(%rsi) + pxor %xmm4,%xmm4 + pxor %xmm14,%xmm6 + movdqu %xmm5,48(%rsi) + pxor %xmm5,%xmm5 + leaq 64(%rsi),%rsi + movdqa %xmm6,%xmm2 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + subq $16,%rdx + jmp .Lcbc_dec_tail_collected + +.align 16 +.Lcbc_dec_one: + movaps %xmm2,%xmm11 + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +.Loop_dec1_17: +.byte 102,15,56,222,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_dec1_17 +.byte 102,15,56,223,209 + xorps %xmm10,%xmm2 + movaps %xmm11,%xmm10 + jmp .Lcbc_dec_tail_collected +.align 16 +.Lcbc_dec_two: + movaps %xmm3,%xmm12 + call _aesni_decrypt2 + pxor %xmm10,%xmm2 + movaps %xmm12,%xmm10 + pxor %xmm11,%xmm3 + movdqu %xmm2,(%rsi) + movdqa %xmm3,%xmm2 + pxor %xmm3,%xmm3 + leaq 16(%rsi),%rsi + jmp .Lcbc_dec_tail_collected +.align 16 +.Lcbc_dec_three: + movaps %xmm4,%xmm13 + call _aesni_decrypt3 + pxor %xmm10,%xmm2 + movaps %xmm13,%xmm10 + pxor %xmm11,%xmm3 + movdqu %xmm2,(%rsi) + pxor %xmm12,%xmm4 + movdqu %xmm3,16(%rsi) + pxor %xmm3,%xmm3 + movdqa %xmm4,%xmm2 + pxor %xmm4,%xmm4 + leaq 32(%rsi),%rsi + jmp .Lcbc_dec_tail_collected +.align 16 +.Lcbc_dec_four: + movaps %xmm5,%xmm14 + call _aesni_decrypt4 + pxor %xmm10,%xmm2 + movaps %xmm14,%xmm10 + pxor %xmm11,%xmm3 + movdqu %xmm2,(%rsi) + pxor %xmm12,%xmm4 + movdqu %xmm3,16(%rsi) + pxor %xmm3,%xmm3 + pxor %xmm13,%xmm5 + movdqu %xmm4,32(%rsi) + pxor %xmm4,%xmm4 + movdqa %xmm5,%xmm2 + pxor %xmm5,%xmm5 + leaq 48(%rsi),%rsi + jmp .Lcbc_dec_tail_collected + +.align 16 +.Lcbc_dec_clear_tail_collected: + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + pxor %xmm8,%xmm8 + pxor %xmm9,%xmm9 +.Lcbc_dec_tail_collected: + movups %xmm10,(%r8) + andq $15,%rdx + jnz .Lcbc_dec_tail_partial + movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 + jmp .Lcbc_dec_ret +.align 16 +.Lcbc_dec_tail_partial: + movaps %xmm2,(%rsp) + pxor %xmm2,%xmm2 + movq $16,%rcx + movq %rsi,%rdi + subq %rdx,%rcx + leaq (%rsp),%rsi +.long 0x9066A4F3 + movdqa %xmm2,(%rsp) + +.Lcbc_dec_ret: + xorps %xmm0,%xmm0 + pxor %xmm1,%xmm1 + leaq (%rbp),%rsp + popq %rbp +.Lcbc_ret: + .byte 0xf3,0xc3 +.size aesni_cbc_encrypt,.-aesni_cbc_encrypt +.globl aesni_set_decrypt_key +.hidden aesni_set_decrypt_key +.type aesni_set_decrypt_key,@function +.align 16 +aesni_set_decrypt_key: +.byte 0x48,0x83,0xEC,0x08 + call __aesni_set_encrypt_key + shll $4,%esi + testl %eax,%eax + jnz .Ldec_key_ret + leaq 16(%rdx,%rsi,1),%rdi + + movups (%rdx),%xmm0 + movups (%rdi),%xmm1 + movups %xmm0,(%rdi) + movups %xmm1,(%rdx) + leaq 16(%rdx),%rdx + leaq -16(%rdi),%rdi + +.Ldec_key_inverse: + movups (%rdx),%xmm0 + movups (%rdi),%xmm1 +.byte 102,15,56,219,192 +.byte 102,15,56,219,201 + leaq 16(%rdx),%rdx + leaq -16(%rdi),%rdi + movups %xmm0,16(%rdi) + movups %xmm1,-16(%rdx) + cmpq %rdx,%rdi + ja .Ldec_key_inverse + + movups (%rdx),%xmm0 +.byte 102,15,56,219,192 + pxor %xmm1,%xmm1 + movups %xmm0,(%rdi) + pxor %xmm0,%xmm0 +.Ldec_key_ret: + addq $8,%rsp + .byte 0xf3,0xc3 +.LSEH_end_set_decrypt_key: +.size aesni_set_decrypt_key,.-aesni_set_decrypt_key +.globl aesni_set_encrypt_key +.hidden aesni_set_encrypt_key +.type aesni_set_encrypt_key,@function +.align 16 +aesni_set_encrypt_key: +__aesni_set_encrypt_key: +.byte 0x48,0x83,0xEC,0x08 + movq $-1,%rax + testq %rdi,%rdi + jz .Lenc_key_ret + testq %rdx,%rdx + jz .Lenc_key_ret + + movl $268437504,%r10d + movups (%rdi),%xmm0 + xorps %xmm4,%xmm4 + andl OPENSSL_ia32cap_P+4(%rip),%r10d + leaq 16(%rdx),%rax + cmpl $256,%esi + je .L14rounds + cmpl $192,%esi + je .L12rounds + cmpl $128,%esi + jne .Lbad_keybits + +.L10rounds: + movl $9,%esi + cmpl $268435456,%r10d + je .L10rounds_alt + + movups %xmm0,(%rdx) +.byte 102,15,58,223,200,1 + call .Lkey_expansion_128_cold +.byte 102,15,58,223,200,2 + call .Lkey_expansion_128 +.byte 102,15,58,223,200,4 + call .Lkey_expansion_128 +.byte 102,15,58,223,200,8 + call .Lkey_expansion_128 +.byte 102,15,58,223,200,16 + call .Lkey_expansion_128 +.byte 102,15,58,223,200,32 + call .Lkey_expansion_128 +.byte 102,15,58,223,200,64 + call .Lkey_expansion_128 +.byte 102,15,58,223,200,128 + call .Lkey_expansion_128 +.byte 102,15,58,223,200,27 + call .Lkey_expansion_128 +.byte 102,15,58,223,200,54 + call .Lkey_expansion_128 + movups %xmm0,(%rax) + movl %esi,80(%rax) + xorl %eax,%eax + jmp .Lenc_key_ret + +.align 16 +.L10rounds_alt: + movdqa .Lkey_rotate(%rip),%xmm5 + movl $8,%r10d + movdqa .Lkey_rcon1(%rip),%xmm4 + movdqa %xmm0,%xmm2 + movdqu %xmm0,(%rdx) + jmp .Loop_key128 + +.align 16 +.Loop_key128: +.byte 102,15,56,0,197 +.byte 102,15,56,221,196 + pslld $1,%xmm4 + leaq 16(%rax),%rax + + movdqa %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm3,%xmm2 + + pxor %xmm2,%xmm0 + movdqu %xmm0,-16(%rax) + movdqa %xmm0,%xmm2 + + decl %r10d + jnz .Loop_key128 + + movdqa .Lkey_rcon1b(%rip),%xmm4 + +.byte 102,15,56,0,197 +.byte 102,15,56,221,196 + pslld $1,%xmm4 + + movdqa %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm3,%xmm2 + + pxor %xmm2,%xmm0 + movdqu %xmm0,(%rax) + + movdqa %xmm0,%xmm2 +.byte 102,15,56,0,197 +.byte 102,15,56,221,196 + + movdqa %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm3,%xmm2 + + pxor %xmm2,%xmm0 + movdqu %xmm0,16(%rax) + + movl %esi,96(%rax) + xorl %eax,%eax + jmp .Lenc_key_ret + +.align 16 +.L12rounds: + movq 16(%rdi),%xmm2 + movl $11,%esi + cmpl $268435456,%r10d + je .L12rounds_alt + + movups %xmm0,(%rdx) +.byte 102,15,58,223,202,1 + call .Lkey_expansion_192a_cold +.byte 102,15,58,223,202,2 + call .Lkey_expansion_192b +.byte 102,15,58,223,202,4 + call .Lkey_expansion_192a +.byte 102,15,58,223,202,8 + call .Lkey_expansion_192b +.byte 102,15,58,223,202,16 + call .Lkey_expansion_192a +.byte 102,15,58,223,202,32 + call .Lkey_expansion_192b +.byte 102,15,58,223,202,64 + call .Lkey_expansion_192a +.byte 102,15,58,223,202,128 + call .Lkey_expansion_192b + movups %xmm0,(%rax) + movl %esi,48(%rax) + xorq %rax,%rax + jmp .Lenc_key_ret + +.align 16 +.L12rounds_alt: + movdqa .Lkey_rotate192(%rip),%xmm5 + movdqa .Lkey_rcon1(%rip),%xmm4 + movl $8,%r10d + movdqu %xmm0,(%rdx) + jmp .Loop_key192 + +.align 16 +.Loop_key192: + movq %xmm2,0(%rax) + movdqa %xmm2,%xmm1 +.byte 102,15,56,0,213 +.byte 102,15,56,221,212 + pslld $1,%xmm4 + leaq 24(%rax),%rax + + movdqa %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm3,%xmm0 + + pshufd $255,%xmm0,%xmm3 + pxor %xmm1,%xmm3 + pslldq $4,%xmm1 + pxor %xmm1,%xmm3 + + pxor %xmm2,%xmm0 + pxor %xmm3,%xmm2 + movdqu %xmm0,-16(%rax) + + decl %r10d + jnz .Loop_key192 + + movl %esi,32(%rax) + xorl %eax,%eax + jmp .Lenc_key_ret + +.align 16 +.L14rounds: + movups 16(%rdi),%xmm2 + movl $13,%esi + leaq 16(%rax),%rax + cmpl $268435456,%r10d + je .L14rounds_alt + + movups %xmm0,(%rdx) + movups %xmm2,16(%rdx) +.byte 102,15,58,223,202,1 + call .Lkey_expansion_256a_cold +.byte 102,15,58,223,200,1 + call .Lkey_expansion_256b +.byte 102,15,58,223,202,2 + call .Lkey_expansion_256a +.byte 102,15,58,223,200,2 + call .Lkey_expansion_256b +.byte 102,15,58,223,202,4 + call .Lkey_expansion_256a +.byte 102,15,58,223,200,4 + call .Lkey_expansion_256b +.byte 102,15,58,223,202,8 + call .Lkey_expansion_256a +.byte 102,15,58,223,200,8 + call .Lkey_expansion_256b +.byte 102,15,58,223,202,16 + call .Lkey_expansion_256a +.byte 102,15,58,223,200,16 + call .Lkey_expansion_256b +.byte 102,15,58,223,202,32 + call .Lkey_expansion_256a +.byte 102,15,58,223,200,32 + call .Lkey_expansion_256b +.byte 102,15,58,223,202,64 + call .Lkey_expansion_256a + movups %xmm0,(%rax) + movl %esi,16(%rax) + xorq %rax,%rax + jmp .Lenc_key_ret + +.align 16 +.L14rounds_alt: + movdqa .Lkey_rotate(%rip),%xmm5 + movdqa .Lkey_rcon1(%rip),%xmm4 + movl $7,%r10d + movdqu %xmm0,0(%rdx) + movdqa %xmm2,%xmm1 + movdqu %xmm2,16(%rdx) + jmp .Loop_key256 + +.align 16 +.Loop_key256: +.byte 102,15,56,0,213 +.byte 102,15,56,221,212 + + movdqa %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm3,%xmm0 + pslld $1,%xmm4 + + pxor %xmm2,%xmm0 + movdqu %xmm0,(%rax) + + decl %r10d + jz .Ldone_key256 + + pshufd $255,%xmm0,%xmm2 + pxor %xmm3,%xmm3 +.byte 102,15,56,221,211 + + movdqa %xmm1,%xmm3 + pslldq $4,%xmm1 + pxor %xmm1,%xmm3 + pslldq $4,%xmm1 + pxor %xmm1,%xmm3 + pslldq $4,%xmm1 + pxor %xmm3,%xmm1 + + pxor %xmm1,%xmm2 + movdqu %xmm2,16(%rax) + leaq 32(%rax),%rax + movdqa %xmm2,%xmm1 + + jmp .Loop_key256 + +.Ldone_key256: + movl %esi,16(%rax) + xorl %eax,%eax + jmp .Lenc_key_ret + +.align 16 +.Lbad_keybits: + movq $-2,%rax +.Lenc_key_ret: + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + addq $8,%rsp + .byte 0xf3,0xc3 +.LSEH_end_set_encrypt_key: + +.align 16 +.Lkey_expansion_128: + movups %xmm0,(%rax) + leaq 16(%rax),%rax +.Lkey_expansion_128_cold: + shufps $16,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $140,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $255,%xmm1,%xmm1 + xorps %xmm1,%xmm0 + .byte 0xf3,0xc3 + +.align 16 +.Lkey_expansion_192a: + movups %xmm0,(%rax) + leaq 16(%rax),%rax +.Lkey_expansion_192a_cold: + movaps %xmm2,%xmm5 +.Lkey_expansion_192b_warm: + shufps $16,%xmm0,%xmm4 + movdqa %xmm2,%xmm3 + xorps %xmm4,%xmm0 + shufps $140,%xmm0,%xmm4 + pslldq $4,%xmm3 + xorps %xmm4,%xmm0 + pshufd $85,%xmm1,%xmm1 + pxor %xmm3,%xmm2 + pxor %xmm1,%xmm0 + pshufd $255,%xmm0,%xmm3 + pxor %xmm3,%xmm2 + .byte 0xf3,0xc3 + +.align 16 +.Lkey_expansion_192b: + movaps %xmm0,%xmm3 + shufps $68,%xmm0,%xmm5 + movups %xmm5,(%rax) + shufps $78,%xmm2,%xmm3 + movups %xmm3,16(%rax) + leaq 32(%rax),%rax + jmp .Lkey_expansion_192b_warm + +.align 16 +.Lkey_expansion_256a: + movups %xmm2,(%rax) + leaq 16(%rax),%rax +.Lkey_expansion_256a_cold: + shufps $16,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $140,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $255,%xmm1,%xmm1 + xorps %xmm1,%xmm0 + .byte 0xf3,0xc3 + +.align 16 +.Lkey_expansion_256b: + movups %xmm0,(%rax) + leaq 16(%rax),%rax + + shufps $16,%xmm2,%xmm4 + xorps %xmm4,%xmm2 + shufps $140,%xmm2,%xmm4 + xorps %xmm4,%xmm2 + shufps $170,%xmm1,%xmm1 + xorps %xmm1,%xmm2 + .byte 0xf3,0xc3 +.size aesni_set_encrypt_key,.-aesni_set_encrypt_key +.size __aesni_set_encrypt_key,.-__aesni_set_encrypt_key +.align 64 +.Lbswap_mask: +.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +.Lincrement32: +.long 6,6,6,0 +.Lincrement64: +.long 1,0,0,0 +.Lxts_magic: +.long 0x87,0,1,0 +.Lincrement1: +.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 +.Lkey_rotate: +.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d +.Lkey_rotate192: +.long 0x04070605,0x04070605,0x04070605,0x04070605 +.Lkey_rcon1: +.long 1,1,1,1 +.Lkey_rcon1b: +.long 0x1b,0x1b,0x1b,0x1b + +.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 64 +#endif diff --git a/third_party/boringssl/linux-x86_64/crypto/aes/bsaes-x86_64.S b/third_party/boringssl/linux-x86_64/crypto/aes/bsaes-x86_64.S new file mode 100644 index 00000000000..8cfa4df5ba3 --- /dev/null +++ b/third_party/boringssl/linux-x86_64/crypto/aes/bsaes-x86_64.S @@ -0,0 +1,2507 @@ +#if defined(__x86_64__) +.text + +.extern asm_AES_encrypt +.hidden asm_AES_encrypt +.extern asm_AES_decrypt +.hidden asm_AES_decrypt + +.type _bsaes_encrypt8,@function +.align 64 +_bsaes_encrypt8: + leaq .LBS0(%rip),%r11 + + movdqa (%rax),%xmm8 + leaq 16(%rax),%rax + movdqa 80(%r11),%xmm7 + pxor %xmm8,%xmm15 + pxor %xmm8,%xmm0 + pxor %xmm8,%xmm1 + pxor %xmm8,%xmm2 +.byte 102,68,15,56,0,255 +.byte 102,15,56,0,199 + pxor %xmm8,%xmm3 + pxor %xmm8,%xmm4 +.byte 102,15,56,0,207 +.byte 102,15,56,0,215 + pxor %xmm8,%xmm5 + pxor %xmm8,%xmm6 +.byte 102,15,56,0,223 +.byte 102,15,56,0,231 +.byte 102,15,56,0,239 +.byte 102,15,56,0,247 +_bsaes_encrypt8_bitslice: + movdqa 0(%r11),%xmm7 + movdqa 16(%r11),%xmm8 + movdqa %xmm5,%xmm9 + psrlq $1,%xmm5 + movdqa %xmm3,%xmm10 + psrlq $1,%xmm3 + pxor %xmm6,%xmm5 + pxor %xmm4,%xmm3 + pand %xmm7,%xmm5 + pand %xmm7,%xmm3 + pxor %xmm5,%xmm6 + psllq $1,%xmm5 + pxor %xmm3,%xmm4 + psllq $1,%xmm3 + pxor %xmm9,%xmm5 + pxor %xmm10,%xmm3 + movdqa %xmm1,%xmm9 + psrlq $1,%xmm1 + movdqa %xmm15,%xmm10 + psrlq $1,%xmm15 + pxor %xmm2,%xmm1 + pxor %xmm0,%xmm15 + pand %xmm7,%xmm1 + pand %xmm7,%xmm15 + pxor %xmm1,%xmm2 + psllq $1,%xmm1 + pxor %xmm15,%xmm0 + psllq $1,%xmm15 + pxor %xmm9,%xmm1 + pxor %xmm10,%xmm15 + movdqa 32(%r11),%xmm7 + movdqa %xmm4,%xmm9 + psrlq $2,%xmm4 + movdqa %xmm3,%xmm10 + psrlq $2,%xmm3 + pxor %xmm6,%xmm4 + pxor %xmm5,%xmm3 + pand %xmm8,%xmm4 + pand %xmm8,%xmm3 + pxor %xmm4,%xmm6 + psllq $2,%xmm4 + pxor %xmm3,%xmm5 + psllq $2,%xmm3 + pxor %xmm9,%xmm4 + pxor %xmm10,%xmm3 + movdqa %xmm0,%xmm9 + psrlq $2,%xmm0 + movdqa %xmm15,%xmm10 + psrlq $2,%xmm15 + pxor %xmm2,%xmm0 + pxor %xmm1,%xmm15 + pand %xmm8,%xmm0 + pand %xmm8,%xmm15 + pxor %xmm0,%xmm2 + psllq $2,%xmm0 + pxor %xmm15,%xmm1 + psllq $2,%xmm15 + pxor %xmm9,%xmm0 + pxor %xmm10,%xmm15 + movdqa %xmm2,%xmm9 + psrlq $4,%xmm2 + movdqa %xmm1,%xmm10 + psrlq $4,%xmm1 + pxor %xmm6,%xmm2 + pxor %xmm5,%xmm1 + pand %xmm7,%xmm2 + pand %xmm7,%xmm1 + pxor %xmm2,%xmm6 + psllq $4,%xmm2 + pxor %xmm1,%xmm5 + psllq $4,%xmm1 + pxor %xmm9,%xmm2 + pxor %xmm10,%xmm1 + movdqa %xmm0,%xmm9 + psrlq $4,%xmm0 + movdqa %xmm15,%xmm10 + psrlq $4,%xmm15 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm15 + pand %xmm7,%xmm0 + pand %xmm7,%xmm15 + pxor %xmm0,%xmm4 + psllq $4,%xmm0 + pxor %xmm15,%xmm3 + psllq $4,%xmm15 + pxor %xmm9,%xmm0 + pxor %xmm10,%xmm15 + decl %r10d + jmp .Lenc_sbox +.align 16 +.Lenc_loop: + pxor 0(%rax),%xmm15 + pxor 16(%rax),%xmm0 + pxor 32(%rax),%xmm1 + pxor 48(%rax),%xmm2 +.byte 102,68,15,56,0,255 +.byte 102,15,56,0,199 + pxor 64(%rax),%xmm3 + pxor 80(%rax),%xmm4 +.byte 102,15,56,0,207 +.byte 102,15,56,0,215 + pxor 96(%rax),%xmm5 + pxor 112(%rax),%xmm6 +.byte 102,15,56,0,223 +.byte 102,15,56,0,231 +.byte 102,15,56,0,239 +.byte 102,15,56,0,247 + leaq 128(%rax),%rax +.Lenc_sbox: + pxor %xmm5,%xmm4 + pxor %xmm0,%xmm1 + pxor %xmm15,%xmm2 + pxor %xmm1,%xmm5 + pxor %xmm15,%xmm4 + + pxor %xmm2,%xmm5 + pxor %xmm6,%xmm2 + pxor %xmm4,%xmm6 + pxor %xmm3,%xmm2 + pxor %xmm4,%xmm3 + pxor %xmm0,%xmm2 + + pxor %xmm6,%xmm1 + pxor %xmm4,%xmm0 + movdqa %xmm6,%xmm10 + movdqa %xmm0,%xmm9 + movdqa %xmm4,%xmm8 + movdqa %xmm1,%xmm12 + movdqa %xmm5,%xmm11 + + pxor %xmm3,%xmm10 + pxor %xmm1,%xmm9 + pxor %xmm2,%xmm8 + movdqa %xmm10,%xmm13 + pxor %xmm3,%xmm12 + movdqa %xmm9,%xmm7 + pxor %xmm15,%xmm11 + movdqa %xmm10,%xmm14 + + por %xmm8,%xmm9 + por %xmm11,%xmm10 + pxor %xmm7,%xmm14 + pand %xmm11,%xmm13 + pxor %xmm8,%xmm11 + pand %xmm8,%xmm7 + pand %xmm11,%xmm14 + movdqa %xmm2,%xmm11 + pxor %xmm15,%xmm11 + pand %xmm11,%xmm12 + pxor %xmm12,%xmm10 + pxor %xmm12,%xmm9 + movdqa %xmm6,%xmm12 + movdqa %xmm4,%xmm11 + pxor %xmm0,%xmm12 + pxor %xmm5,%xmm11 + movdqa %xmm12,%xmm8 + pand %xmm11,%xmm12 + por %xmm11,%xmm8 + pxor %xmm12,%xmm7 + pxor %xmm14,%xmm10 + pxor %xmm13,%xmm9 + pxor %xmm14,%xmm8 + movdqa %xmm1,%xmm11 + pxor %xmm13,%xmm7 + movdqa %xmm3,%xmm12 + pxor %xmm13,%xmm8 + movdqa %xmm0,%xmm13 + pand %xmm2,%xmm11 + movdqa %xmm6,%xmm14 + pand %xmm15,%xmm12 + pand %xmm4,%xmm13 + por %xmm5,%xmm14 + pxor %xmm11,%xmm10 + pxor %xmm12,%xmm9 + pxor %xmm13,%xmm8 + pxor %xmm14,%xmm7 + + + + + + movdqa %xmm10,%xmm11 + pand %xmm8,%xmm10 + pxor %xmm9,%xmm11 + + movdqa %xmm7,%xmm13 + movdqa %xmm11,%xmm14 + pxor %xmm10,%xmm13 + pand %xmm13,%xmm14 + + movdqa %xmm8,%xmm12 + pxor %xmm9,%xmm14 + pxor %xmm7,%xmm12 + + pxor %xmm9,%xmm10 + + pand %xmm10,%xmm12 + + movdqa %xmm13,%xmm9 + pxor %xmm7,%xmm12 + + pxor %xmm12,%xmm9 + pxor %xmm12,%xmm8 + + pand %xmm7,%xmm9 + + pxor %xmm9,%xmm13 + pxor %xmm9,%xmm8 + + pand %xmm14,%xmm13 + + pxor %xmm11,%xmm13 + movdqa %xmm5,%xmm11 + movdqa %xmm4,%xmm7 + movdqa %xmm14,%xmm9 + pxor %xmm13,%xmm9 + pand %xmm5,%xmm9 + pxor %xmm4,%xmm5 + pand %xmm14,%xmm4 + pand %xmm13,%xmm5 + pxor %xmm4,%xmm5 + pxor %xmm9,%xmm4 + pxor %xmm15,%xmm11 + pxor %xmm2,%xmm7 + pxor %xmm12,%xmm14 + pxor %xmm8,%xmm13 + movdqa %xmm14,%xmm10 + movdqa %xmm12,%xmm9 + pxor %xmm13,%xmm10 + pxor %xmm8,%xmm9 + pand %xmm11,%xmm10 + pand %xmm15,%xmm9 + pxor %xmm7,%xmm11 + pxor %xmm2,%xmm15 + pand %xmm14,%xmm7 + pand %xmm12,%xmm2 + pand %xmm13,%xmm11 + pand %xmm8,%xmm15 + pxor %xmm11,%xmm7 + pxor %xmm2,%xmm15 + pxor %xmm10,%xmm11 + pxor %xmm9,%xmm2 + pxor %xmm11,%xmm5 + pxor %xmm11,%xmm15 + pxor %xmm7,%xmm4 + pxor %xmm7,%xmm2 + + movdqa %xmm6,%xmm11 + movdqa %xmm0,%xmm7 + pxor %xmm3,%xmm11 + pxor %xmm1,%xmm7 + movdqa %xmm14,%xmm10 + movdqa %xmm12,%xmm9 + pxor %xmm13,%xmm10 + pxor %xmm8,%xmm9 + pand %xmm11,%xmm10 + pand %xmm3,%xmm9 + pxor %xmm7,%xmm11 + pxor %xmm1,%xmm3 + pand %xmm14,%xmm7 + pand %xmm12,%xmm1 + pand %xmm13,%xmm11 + pand %xmm8,%xmm3 + pxor %xmm11,%xmm7 + pxor %xmm1,%xmm3 + pxor %xmm10,%xmm11 + pxor %xmm9,%xmm1 + pxor %xmm12,%xmm14 + pxor %xmm8,%xmm13 + movdqa %xmm14,%xmm10 + pxor %xmm13,%xmm10 + pand %xmm6,%xmm10 + pxor %xmm0,%xmm6 + pand %xmm14,%xmm0 + pand %xmm13,%xmm6 + pxor %xmm0,%xmm6 + pxor %xmm10,%xmm0 + pxor %xmm11,%xmm6 + pxor %xmm11,%xmm3 + pxor %xmm7,%xmm0 + pxor %xmm7,%xmm1 + pxor %xmm15,%xmm6 + pxor %xmm5,%xmm0 + pxor %xmm6,%xmm3 + pxor %xmm15,%xmm5 + pxor %xmm0,%xmm15 + + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm4 + pxor %xmm2,%xmm1 + pxor %xmm4,%xmm2 + pxor %xmm4,%xmm3 + + pxor %xmm2,%xmm5 + decl %r10d + jl .Lenc_done + pshufd $147,%xmm15,%xmm7 + pshufd $147,%xmm0,%xmm8 + pxor %xmm7,%xmm15 + pshufd $147,%xmm3,%xmm9 + pxor %xmm8,%xmm0 + pshufd $147,%xmm5,%xmm10 + pxor %xmm9,%xmm3 + pshufd $147,%xmm2,%xmm11 + pxor %xmm10,%xmm5 + pshufd $147,%xmm6,%xmm12 + pxor %xmm11,%xmm2 + pshufd $147,%xmm1,%xmm13 + pxor %xmm12,%xmm6 + pshufd $147,%xmm4,%xmm14 + pxor %xmm13,%xmm1 + pxor %xmm14,%xmm4 + + pxor %xmm15,%xmm8 + pxor %xmm4,%xmm7 + pxor %xmm4,%xmm8 + pshufd $78,%xmm15,%xmm15 + pxor %xmm0,%xmm9 + pshufd $78,%xmm0,%xmm0 + pxor %xmm2,%xmm12 + pxor %xmm7,%xmm15 + pxor %xmm6,%xmm13 + pxor %xmm8,%xmm0 + pxor %xmm5,%xmm11 + pshufd $78,%xmm2,%xmm7 + pxor %xmm1,%xmm14 + pshufd $78,%xmm6,%xmm8 + pxor %xmm3,%xmm10 + pshufd $78,%xmm5,%xmm2 + pxor %xmm4,%xmm10 + pshufd $78,%xmm4,%xmm6 + pxor %xmm4,%xmm11 + pshufd $78,%xmm1,%xmm5 + pxor %xmm11,%xmm7 + pshufd $78,%xmm3,%xmm1 + pxor %xmm12,%xmm8 + pxor %xmm10,%xmm2 + pxor %xmm14,%xmm6 + pxor %xmm13,%xmm5 + movdqa %xmm7,%xmm3 + pxor %xmm9,%xmm1 + movdqa %xmm8,%xmm4 + movdqa 48(%r11),%xmm7 + jnz .Lenc_loop + movdqa 64(%r11),%xmm7 + jmp .Lenc_loop +.align 16 +.Lenc_done: + movdqa 0(%r11),%xmm7 + movdqa 16(%r11),%xmm8 + movdqa %xmm1,%xmm9 + psrlq $1,%xmm1 + movdqa %xmm2,%xmm10 + psrlq $1,%xmm2 + pxor %xmm4,%xmm1 + pxor %xmm6,%xmm2 + pand %xmm7,%xmm1 + pand %xmm7,%xmm2 + pxor %xmm1,%xmm4 + psllq $1,%xmm1 + pxor %xmm2,%xmm6 + psllq $1,%xmm2 + pxor %xmm9,%xmm1 + pxor %xmm10,%xmm2 + movdqa %xmm3,%xmm9 + psrlq $1,%xmm3 + movdqa %xmm15,%xmm10 + psrlq $1,%xmm15 + pxor %xmm5,%xmm3 + pxor %xmm0,%xmm15 + pand %xmm7,%xmm3 + pand %xmm7,%xmm15 + pxor %xmm3,%xmm5 + psllq $1,%xmm3 + pxor %xmm15,%xmm0 + psllq $1,%xmm15 + pxor %xmm9,%xmm3 + pxor %xmm10,%xmm15 + movdqa 32(%r11),%xmm7 + movdqa %xmm6,%xmm9 + psrlq $2,%xmm6 + movdqa %xmm2,%xmm10 + psrlq $2,%xmm2 + pxor %xmm4,%xmm6 + pxor %xmm1,%xmm2 + pand %xmm8,%xmm6 + pand %xmm8,%xmm2 + pxor %xmm6,%xmm4 + psllq $2,%xmm6 + pxor %xmm2,%xmm1 + psllq $2,%xmm2 + pxor %xmm9,%xmm6 + pxor %xmm10,%xmm2 + movdqa %xmm0,%xmm9 + psrlq $2,%xmm0 + movdqa %xmm15,%xmm10 + psrlq $2,%xmm15 + pxor %xmm5,%xmm0 + pxor %xmm3,%xmm15 + pand %xmm8,%xmm0 + pand %xmm8,%xmm15 + pxor %xmm0,%xmm5 + psllq $2,%xmm0 + pxor %xmm15,%xmm3 + psllq $2,%xmm15 + pxor %xmm9,%xmm0 + pxor %xmm10,%xmm15 + movdqa %xmm5,%xmm9 + psrlq $4,%xmm5 + movdqa %xmm3,%xmm10 + psrlq $4,%xmm3 + pxor %xmm4,%xmm5 + pxor %xmm1,%xmm3 + pand %xmm7,%xmm5 + pand %xmm7,%xmm3 + pxor %xmm5,%xmm4 + psllq $4,%xmm5 + pxor %xmm3,%xmm1 + psllq $4,%xmm3 + pxor %xmm9,%xmm5 + pxor %xmm10,%xmm3 + movdqa %xmm0,%xmm9 + psrlq $4,%xmm0 + movdqa %xmm15,%xmm10 + psrlq $4,%xmm15 + pxor %xmm6,%xmm0 + pxor %xmm2,%xmm15 + pand %xmm7,%xmm0 + pand %xmm7,%xmm15 + pxor %xmm0,%xmm6 + psllq $4,%xmm0 + pxor %xmm15,%xmm2 + psllq $4,%xmm15 + pxor %xmm9,%xmm0 + pxor %xmm10,%xmm15 + movdqa (%rax),%xmm7 + pxor %xmm7,%xmm3 + pxor %xmm7,%xmm5 + pxor %xmm7,%xmm2 + pxor %xmm7,%xmm6 + pxor %xmm7,%xmm1 + pxor %xmm7,%xmm4 + pxor %xmm7,%xmm15 + pxor %xmm7,%xmm0 + .byte 0xf3,0xc3 +.size _bsaes_encrypt8,.-_bsaes_encrypt8 + +.type _bsaes_decrypt8,@function +.align 64 +_bsaes_decrypt8: + leaq .LBS0(%rip),%r11 + + movdqa (%rax),%xmm8 + leaq 16(%rax),%rax + movdqa -48(%r11),%xmm7 + pxor %xmm8,%xmm15 + pxor %xmm8,%xmm0 + pxor %xmm8,%xmm1 + pxor %xmm8,%xmm2 +.byte 102,68,15,56,0,255 +.byte 102,15,56,0,199 + pxor %xmm8,%xmm3 + pxor %xmm8,%xmm4 +.byte 102,15,56,0,207 +.byte 102,15,56,0,215 + pxor %xmm8,%xmm5 + pxor %xmm8,%xmm6 +.byte 102,15,56,0,223 +.byte 102,15,56,0,231 +.byte 102,15,56,0,239 +.byte 102,15,56,0,247 + movdqa 0(%r11),%xmm7 + movdqa 16(%r11),%xmm8 + movdqa %xmm5,%xmm9 + psrlq $1,%xmm5 + movdqa %xmm3,%xmm10 + psrlq $1,%xmm3 + pxor %xmm6,%xmm5 + pxor %xmm4,%xmm3 + pand %xmm7,%xmm5 + pand %xmm7,%xmm3 + pxor %xmm5,%xmm6 + psllq $1,%xmm5 + pxor %xmm3,%xmm4 + psllq $1,%xmm3 + pxor %xmm9,%xmm5 + pxor %xmm10,%xmm3 + movdqa %xmm1,%xmm9 + psrlq $1,%xmm1 + movdqa %xmm15,%xmm10 + psrlq $1,%xmm15 + pxor %xmm2,%xmm1 + pxor %xmm0,%xmm15 + pand %xmm7,%xmm1 + pand %xmm7,%xmm15 + pxor %xmm1,%xmm2 + psllq $1,%xmm1 + pxor %xmm15,%xmm0 + psllq $1,%xmm15 + pxor %xmm9,%xmm1 + pxor %xmm10,%xmm15 + movdqa 32(%r11),%xmm7 + movdqa %xmm4,%xmm9 + psrlq $2,%xmm4 + movdqa %xmm3,%xmm10 + psrlq $2,%xmm3 + pxor %xmm6,%xmm4 + pxor %xmm5,%xmm3 + pand %xmm8,%xmm4 + pand %xmm8,%xmm3 + pxor %xmm4,%xmm6 + psllq $2,%xmm4 + pxor %xmm3,%xmm5 + psllq $2,%xmm3 + pxor %xmm9,%xmm4 + pxor %xmm10,%xmm3 + movdqa %xmm0,%xmm9 + psrlq $2,%xmm0 + movdqa %xmm15,%xmm10 + psrlq $2,%xmm15 + pxor %xmm2,%xmm0 + pxor %xmm1,%xmm15 + pand %xmm8,%xmm0 + pand %xmm8,%xmm15 + pxor %xmm0,%xmm2 + psllq $2,%xmm0 + pxor %xmm15,%xmm1 + psllq $2,%xmm15 + pxor %xmm9,%xmm0 + pxor %xmm10,%xmm15 + movdqa %xmm2,%xmm9 + psrlq $4,%xmm2 + movdqa %xmm1,%xmm10 + psrlq $4,%xmm1 + pxor %xmm6,%xmm2 + pxor %xmm5,%xmm1 + pand %xmm7,%xmm2 + pand %xmm7,%xmm1 + pxor %xmm2,%xmm6 + psllq $4,%xmm2 + pxor %xmm1,%xmm5 + psllq $4,%xmm1 + pxor %xmm9,%xmm2 + pxor %xmm10,%xmm1 + movdqa %xmm0,%xmm9 + psrlq $4,%xmm0 + movdqa %xmm15,%xmm10 + psrlq $4,%xmm15 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm15 + pand %xmm7,%xmm0 + pand %xmm7,%xmm15 + pxor %xmm0,%xmm4 + psllq $4,%xmm0 + pxor %xmm15,%xmm3 + psllq $4,%xmm15 + pxor %xmm9,%xmm0 + pxor %xmm10,%xmm15 + decl %r10d + jmp .Ldec_sbox +.align 16 +.Ldec_loop: + pxor 0(%rax),%xmm15 + pxor 16(%rax),%xmm0 + pxor 32(%rax),%xmm1 + pxor 48(%rax),%xmm2 +.byte 102,68,15,56,0,255 +.byte 102,15,56,0,199 + pxor 64(%rax),%xmm3 + pxor 80(%rax),%xmm4 +.byte 102,15,56,0,207 +.byte 102,15,56,0,215 + pxor 96(%rax),%xmm5 + pxor 112(%rax),%xmm6 +.byte 102,15,56,0,223 +.byte 102,15,56,0,231 +.byte 102,15,56,0,239 +.byte 102,15,56,0,247 + leaq 128(%rax),%rax +.Ldec_sbox: + pxor %xmm3,%xmm2 + + pxor %xmm6,%xmm3 + pxor %xmm6,%xmm1 + pxor %xmm3,%xmm5 + pxor %xmm5,%xmm6 + pxor %xmm6,%xmm0 + + pxor %xmm0,%xmm15 + pxor %xmm4,%xmm1 + pxor %xmm15,%xmm2 + pxor %xmm15,%xmm4 + pxor %xmm2,%xmm0 + movdqa %xmm2,%xmm10 + movdqa %xmm6,%xmm9 + movdqa %xmm0,%xmm8 + movdqa %xmm3,%xmm12 + movdqa %xmm4,%xmm11 + + pxor %xmm15,%xmm10 + pxor %xmm3,%xmm9 + pxor %xmm5,%xmm8 + movdqa %xmm10,%xmm13 + pxor %xmm15,%xmm12 + movdqa %xmm9,%xmm7 + pxor %xmm1,%xmm11 + movdqa %xmm10,%xmm14 + + por %xmm8,%xmm9 + por %xmm11,%xmm10 + pxor %xmm7,%xmm14 + pand %xmm11,%xmm13 + pxor %xmm8,%xmm11 + pand %xmm8,%xmm7 + pand %xmm11,%xmm14 + movdqa %xmm5,%xmm11 + pxor %xmm1,%xmm11 + pand %xmm11,%xmm12 + pxor %xmm12,%xmm10 + pxor %xmm12,%xmm9 + movdqa %xmm2,%xmm12 + movdqa %xmm0,%xmm11 + pxor %xmm6,%xmm12 + pxor %xmm4,%xmm11 + movdqa %xmm12,%xmm8 + pand %xmm11,%xmm12 + por %xmm11,%xmm8 + pxor %xmm12,%xmm7 + pxor %xmm14,%xmm10 + pxor %xmm13,%xmm9 + pxor %xmm14,%xmm8 + movdqa %xmm3,%xmm11 + pxor %xmm13,%xmm7 + movdqa %xmm15,%xmm12 + pxor %xmm13,%xmm8 + movdqa %xmm6,%xmm13 + pand %xmm5,%xmm11 + movdqa %xmm2,%xmm14 + pand %xmm1,%xmm12 + pand %xmm0,%xmm13 + por %xmm4,%xmm14 + pxor %xmm11,%xmm10 + pxor %xmm12,%xmm9 + pxor %xmm13,%xmm8 + pxor %xmm14,%xmm7 + + + + + + movdqa %xmm10,%xmm11 + pand %xmm8,%xmm10 + pxor %xmm9,%xmm11 + + movdqa %xmm7,%xmm13 + movdqa %xmm11,%xmm14 + pxor %xmm10,%xmm13 + pand %xmm13,%xmm14 + + movdqa %xmm8,%xmm12 + pxor %xmm9,%xmm14 + pxor %xmm7,%xmm12 + + pxor %xmm9,%xmm10 + + pand %xmm10,%xmm12 + + movdqa %xmm13,%xmm9 + pxor %xmm7,%xmm12 + + pxor %xmm12,%xmm9 + pxor %xmm12,%xmm8 + + pand %xmm7,%xmm9 + + pxor %xmm9,%xmm13 + pxor %xmm9,%xmm8 + + pand %xmm14,%xmm13 + + pxor %xmm11,%xmm13 + movdqa %xmm4,%xmm11 + movdqa %xmm0,%xmm7 + movdqa %xmm14,%xmm9 + pxor %xmm13,%xmm9 + pand %xmm4,%xmm9 + pxor %xmm0,%xmm4 + pand %xmm14,%xmm0 + pand %xmm13,%xmm4 + pxor %xmm0,%xmm4 + pxor %xmm9,%xmm0 + pxor %xmm1,%xmm11 + pxor %xmm5,%xmm7 + pxor %xmm12,%xmm14 + pxor %xmm8,%xmm13 + movdqa %xmm14,%xmm10 + movdqa %xmm12,%xmm9 + pxor %xmm13,%xmm10 + pxor %xmm8,%xmm9 + pand %xmm11,%xmm10 + pand %xmm1,%xmm9 + pxor %xmm7,%xmm11 + pxor %xmm5,%xmm1 + pand %xmm14,%xmm7 + pand %xmm12,%xmm5 + pand %xmm13,%xmm11 + pand %xmm8,%xmm1 + pxor %xmm11,%xmm7 + pxor %xmm5,%xmm1 + pxor %xmm10,%xmm11 + pxor %xmm9,%xmm5 + pxor %xmm11,%xmm4 + pxor %xmm11,%xmm1 + pxor %xmm7,%xmm0 + pxor %xmm7,%xmm5 + + movdqa %xmm2,%xmm11 + movdqa %xmm6,%xmm7 + pxor %xmm15,%xmm11 + pxor %xmm3,%xmm7 + movdqa %xmm14,%xmm10 + movdqa %xmm12,%xmm9 + pxor %xmm13,%xmm10 + pxor %xmm8,%xmm9 + pand %xmm11,%xmm10 + pand %xmm15,%xmm9 + pxor %xmm7,%xmm11 + pxor %xmm3,%xmm15 + pand %xmm14,%xmm7 + pand %xmm12,%xmm3 + pand %xmm13,%xmm11 + pand %xmm8,%xmm15 + pxor %xmm11,%xmm7 + pxor %xmm3,%xmm15 + pxor %xmm10,%xmm11 + pxor %xmm9,%xmm3 + pxor %xmm12,%xmm14 + pxor %xmm8,%xmm13 + movdqa %xmm14,%xmm10 + pxor %xmm13,%xmm10 + pand %xmm2,%xmm10 + pxor %xmm6,%xmm2 + pand %xmm14,%xmm6 + pand %xmm13,%xmm2 + pxor %xmm6,%xmm2 + pxor %xmm10,%xmm6 + pxor %xmm11,%xmm2 + pxor %xmm11,%xmm15 + pxor %xmm7,%xmm6 + pxor %xmm7,%xmm3 + pxor %xmm6,%xmm0 + pxor %xmm4,%xmm5 + + pxor %xmm0,%xmm3 + pxor %xmm6,%xmm1 + pxor %xmm6,%xmm4 + pxor %xmm1,%xmm3 + pxor %xmm15,%xmm6 + pxor %xmm4,%xmm3 + pxor %xmm5,%xmm2 + pxor %xmm0,%xmm5 + pxor %xmm3,%xmm2 + + pxor %xmm15,%xmm3 + pxor %xmm2,%xmm6 + decl %r10d + jl .Ldec_done + + pshufd $78,%xmm15,%xmm7 + pshufd $78,%xmm2,%xmm13 + pxor %xmm15,%xmm7 + pshufd $78,%xmm4,%xmm14 + pxor %xmm2,%xmm13 + pshufd $78,%xmm0,%xmm8 + pxor %xmm4,%xmm14 + pshufd $78,%xmm5,%xmm9 + pxor %xmm0,%xmm8 + pshufd $78,%xmm3,%xmm10 + pxor %xmm5,%xmm9 + pxor %xmm13,%xmm15 + pxor %xmm13,%xmm0 + pshufd $78,%xmm1,%xmm11 + pxor %xmm3,%xmm10 + pxor %xmm7,%xmm5 + pxor %xmm8,%xmm3 + pshufd $78,%xmm6,%xmm12 + pxor %xmm1,%xmm11 + pxor %xmm14,%xmm0 + pxor %xmm9,%xmm1 + pxor %xmm6,%xmm12 + + pxor %xmm14,%xmm5 + pxor %xmm13,%xmm3 + pxor %xmm13,%xmm1 + pxor %xmm10,%xmm6 + pxor %xmm11,%xmm2 + pxor %xmm14,%xmm1 + pxor %xmm14,%xmm6 + pxor %xmm12,%xmm4 + pshufd $147,%xmm15,%xmm7 + pshufd $147,%xmm0,%xmm8 + pxor %xmm7,%xmm15 + pshufd $147,%xmm5,%xmm9 + pxor %xmm8,%xmm0 + pshufd $147,%xmm3,%xmm10 + pxor %xmm9,%xmm5 + pshufd $147,%xmm1,%xmm11 + pxor %xmm10,%xmm3 + pshufd $147,%xmm6,%xmm12 + pxor %xmm11,%xmm1 + pshufd $147,%xmm2,%xmm13 + pxor %xmm12,%xmm6 + pshufd $147,%xmm4,%xmm14 + pxor %xmm13,%xmm2 + pxor %xmm14,%xmm4 + + pxor %xmm15,%xmm8 + pxor %xmm4,%xmm7 + pxor %xmm4,%xmm8 + pshufd $78,%xmm15,%xmm15 + pxor %xmm0,%xmm9 + pshufd $78,%xmm0,%xmm0 + pxor %xmm1,%xmm12 + pxor %xmm7,%xmm15 + pxor %xmm6,%xmm13 + pxor %xmm8,%xmm0 + pxor %xmm3,%xmm11 + pshufd $78,%xmm1,%xmm7 + pxor %xmm2,%xmm14 + pshufd $78,%xmm6,%xmm8 + pxor %xmm5,%xmm10 + pshufd $78,%xmm3,%xmm1 + pxor %xmm4,%xmm10 + pshufd $78,%xmm4,%xmm6 + pxor %xmm4,%xmm11 + pshufd $78,%xmm2,%xmm3 + pxor %xmm11,%xmm7 + pshufd $78,%xmm5,%xmm2 + pxor %xmm12,%xmm8 + pxor %xmm1,%xmm10 + pxor %xmm14,%xmm6 + pxor %xmm3,%xmm13 + movdqa %xmm7,%xmm3 + pxor %xmm9,%xmm2 + movdqa %xmm13,%xmm5 + movdqa %xmm8,%xmm4 + movdqa %xmm2,%xmm1 + movdqa %xmm10,%xmm2 + movdqa -16(%r11),%xmm7 + jnz .Ldec_loop + movdqa -32(%r11),%xmm7 + jmp .Ldec_loop +.align 16 +.Ldec_done: + movdqa 0(%r11),%xmm7 + movdqa 16(%r11),%xmm8 + movdqa %xmm2,%xmm9 + psrlq $1,%xmm2 + movdqa %xmm1,%xmm10 + psrlq $1,%xmm1 + pxor %xmm4,%xmm2 + pxor %xmm6,%xmm1 + pand %xmm7,%xmm2 + pand %xmm7,%xmm1 + pxor %xmm2,%xmm4 + psllq $1,%xmm2 + pxor %xmm1,%xmm6 + psllq $1,%xmm1 + pxor %xmm9,%xmm2 + pxor %xmm10,%xmm1 + movdqa %xmm5,%xmm9 + psrlq $1,%xmm5 + movdqa %xmm15,%xmm10 + psrlq $1,%xmm15 + pxor %xmm3,%xmm5 + pxor %xmm0,%xmm15 + pand %xmm7,%xmm5 + pand %xmm7,%xmm15 + pxor %xmm5,%xmm3 + psllq $1,%xmm5 + pxor %xmm15,%xmm0 + psllq $1,%xmm15 + pxor %xmm9,%xmm5 + pxor %xmm10,%xmm15 + movdqa 32(%r11),%xmm7 + movdqa %xmm6,%xmm9 + psrlq $2,%xmm6 + movdqa %xmm1,%xmm10 + psrlq $2,%xmm1 + pxor %xmm4,%xmm6 + pxor %xmm2,%xmm1 + pand %xmm8,%xmm6 + pand %xmm8,%xmm1 + pxor %xmm6,%xmm4 + psllq $2,%xmm6 + pxor %xmm1,%xmm2 + psllq $2,%xmm1 + pxor %xmm9,%xmm6 + pxor %xmm10,%xmm1 + movdqa %xmm0,%xmm9 + psrlq $2,%xmm0 + movdqa %xmm15,%xmm10 + psrlq $2,%xmm15 + pxor %xmm3,%xmm0 + pxor %xmm5,%xmm15 + pand %xmm8,%xmm0 + pand %xmm8,%xmm15 + pxor %xmm0,%xmm3 + psllq $2,%xmm0 + pxor %xmm15,%xmm5 + psllq $2,%xmm15 + pxor %xmm9,%xmm0 + pxor %xmm10,%xmm15 + movdqa %xmm3,%xmm9 + psrlq $4,%xmm3 + movdqa %xmm5,%xmm10 + psrlq $4,%xmm5 + pxor %xmm4,%xmm3 + pxor %xmm2,%xmm5 + pand %xmm7,%xmm3 + pand %xmm7,%xmm5 + pxor %xmm3,%xmm4 + psllq $4,%xmm3 + pxor %xmm5,%xmm2 + psllq $4,%xmm5 + pxor %xmm9,%xmm3 + pxor %xmm10,%xmm5 + movdqa %xmm0,%xmm9 + psrlq $4,%xmm0 + movdqa %xmm15,%xmm10 + psrlq $4,%xmm15 + pxor %xmm6,%xmm0 + pxor %xmm1,%xmm15 + pand %xmm7,%xmm0 + pand %xmm7,%xmm15 + pxor %xmm0,%xmm6 + psllq $4,%xmm0 + pxor %xmm15,%xmm1 + psllq $4,%xmm15 + pxor %xmm9,%xmm0 + pxor %xmm10,%xmm15 + movdqa (%rax),%xmm7 + pxor %xmm7,%xmm5 + pxor %xmm7,%xmm3 + pxor %xmm7,%xmm1 + pxor %xmm7,%xmm6 + pxor %xmm7,%xmm2 + pxor %xmm7,%xmm4 + pxor %xmm7,%xmm15 + pxor %xmm7,%xmm0 + .byte 0xf3,0xc3 +.size _bsaes_decrypt8,.-_bsaes_decrypt8 +.type _bsaes_key_convert,@function +.align 16 +_bsaes_key_convert: + leaq .Lmasks(%rip),%r11 + movdqu (%rcx),%xmm7 + leaq 16(%rcx),%rcx + movdqa 0(%r11),%xmm0 + movdqa 16(%r11),%xmm1 + movdqa 32(%r11),%xmm2 + movdqa 48(%r11),%xmm3 + movdqa 64(%r11),%xmm4 + pcmpeqd %xmm5,%xmm5 + + movdqu (%rcx),%xmm6 + movdqa %xmm7,(%rax) + leaq 16(%rax),%rax + decl %r10d + jmp .Lkey_loop +.align 16 +.Lkey_loop: +.byte 102,15,56,0,244 + + movdqa %xmm0,%xmm8 + movdqa %xmm1,%xmm9 + + pand %xmm6,%xmm8 + pand %xmm6,%xmm9 + movdqa %xmm2,%xmm10 + pcmpeqb %xmm0,%xmm8 + psllq $4,%xmm0 + movdqa %xmm3,%xmm11 + pcmpeqb %xmm1,%xmm9 + psllq $4,%xmm1 + + pand %xmm6,%xmm10 + pand %xmm6,%xmm11 + movdqa %xmm0,%xmm12 + pcmpeqb %xmm2,%xmm10 + psllq $4,%xmm2 + movdqa %xmm1,%xmm13 + pcmpeqb %xmm3,%xmm11 + psllq $4,%xmm3 + + movdqa %xmm2,%xmm14 + movdqa %xmm3,%xmm15 + pxor %xmm5,%xmm8 + pxor %xmm5,%xmm9 + + pand %xmm6,%xmm12 + pand %xmm6,%xmm13 + movdqa %xmm8,0(%rax) + pcmpeqb %xmm0,%xmm12 + psrlq $4,%xmm0 + movdqa %xmm9,16(%rax) + pcmpeqb %xmm1,%xmm13 + psrlq $4,%xmm1 + leaq 16(%rcx),%rcx + + pand %xmm6,%xmm14 + pand %xmm6,%xmm15 + movdqa %xmm10,32(%rax) + pcmpeqb %xmm2,%xmm14 + psrlq $4,%xmm2 + movdqa %xmm11,48(%rax) + pcmpeqb %xmm3,%xmm15 + psrlq $4,%xmm3 + movdqu (%rcx),%xmm6 + + pxor %xmm5,%xmm13 + pxor %xmm5,%xmm14 + movdqa %xmm12,64(%rax) + movdqa %xmm13,80(%rax) + movdqa %xmm14,96(%rax) + movdqa %xmm15,112(%rax) + leaq 128(%rax),%rax + decl %r10d + jnz .Lkey_loop + + movdqa 80(%r11),%xmm7 + + .byte 0xf3,0xc3 +.size _bsaes_key_convert,.-_bsaes_key_convert +.extern asm_AES_cbc_encrypt +.hidden asm_AES_cbc_encrypt +.globl bsaes_cbc_encrypt +.hidden bsaes_cbc_encrypt +.type bsaes_cbc_encrypt,@function +.align 16 +bsaes_cbc_encrypt: + cmpl $0,%r9d + jne asm_AES_cbc_encrypt + cmpq $128,%rdx + jb asm_AES_cbc_encrypt + + movq %rsp,%rax +.Lcbc_dec_prologue: + pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + leaq -72(%rsp),%rsp + movq %rsp,%rbp + movl 240(%rcx),%eax + movq %rdi,%r12 + movq %rsi,%r13 + movq %rdx,%r14 + movq %rcx,%r15 + movq %r8,%rbx + shrq $4,%r14 + + movl %eax,%edx + shlq $7,%rax + subq $96,%rax + subq %rax,%rsp + + movq %rsp,%rax + movq %r15,%rcx + movl %edx,%r10d + call _bsaes_key_convert + pxor (%rsp),%xmm7 + movdqa %xmm6,(%rax) + movdqa %xmm7,(%rsp) + + movdqu (%rbx),%xmm14 + subq $8,%r14 +.Lcbc_dec_loop: + movdqu 0(%r12),%xmm15 + movdqu 16(%r12),%xmm0 + movdqu 32(%r12),%xmm1 + movdqu 48(%r12),%xmm2 + movdqu 64(%r12),%xmm3 + movdqu 80(%r12),%xmm4 + movq %rsp,%rax + movdqu 96(%r12),%xmm5 + movl %edx,%r10d + movdqu 112(%r12),%xmm6 + movdqa %xmm14,32(%rbp) + + call _bsaes_decrypt8 + + pxor 32(%rbp),%xmm15 + movdqu 0(%r12),%xmm7 + movdqu 16(%r12),%xmm8 + pxor %xmm7,%xmm0 + movdqu 32(%r12),%xmm9 + pxor %xmm8,%xmm5 + movdqu 48(%r12),%xmm10 + pxor %xmm9,%xmm3 + movdqu 64(%r12),%xmm11 + pxor %xmm10,%xmm1 + movdqu 80(%r12),%xmm12 + pxor %xmm11,%xmm6 + movdqu 96(%r12),%xmm13 + pxor %xmm12,%xmm2 + movdqu 112(%r12),%xmm14 + pxor %xmm13,%xmm4 + movdqu %xmm15,0(%r13) + leaq 128(%r12),%r12 + movdqu %xmm0,16(%r13) + movdqu %xmm5,32(%r13) + movdqu %xmm3,48(%r13) + movdqu %xmm1,64(%r13) + movdqu %xmm6,80(%r13) + movdqu %xmm2,96(%r13) + movdqu %xmm4,112(%r13) + leaq 128(%r13),%r13 + subq $8,%r14 + jnc .Lcbc_dec_loop + + addq $8,%r14 + jz .Lcbc_dec_done + + movdqu 0(%r12),%xmm15 + movq %rsp,%rax + movl %edx,%r10d + cmpq $2,%r14 + jb .Lcbc_dec_one + movdqu 16(%r12),%xmm0 + je .Lcbc_dec_two + movdqu 32(%r12),%xmm1 + cmpq $4,%r14 + jb .Lcbc_dec_three + movdqu 48(%r12),%xmm2 + je .Lcbc_dec_four + movdqu 64(%r12),%xmm3 + cmpq $6,%r14 + jb .Lcbc_dec_five + movdqu 80(%r12),%xmm4 + je .Lcbc_dec_six + movdqu 96(%r12),%xmm5 + movdqa %xmm14,32(%rbp) + call _bsaes_decrypt8 + pxor 32(%rbp),%xmm15 + movdqu 0(%r12),%xmm7 + movdqu 16(%r12),%xmm8 + pxor %xmm7,%xmm0 + movdqu 32(%r12),%xmm9 + pxor %xmm8,%xmm5 + movdqu 48(%r12),%xmm10 + pxor %xmm9,%xmm3 + movdqu 64(%r12),%xmm11 + pxor %xmm10,%xmm1 + movdqu 80(%r12),%xmm12 + pxor %xmm11,%xmm6 + movdqu 96(%r12),%xmm14 + pxor %xmm12,%xmm2 + movdqu %xmm15,0(%r13) + movdqu %xmm0,16(%r13) + movdqu %xmm5,32(%r13) + movdqu %xmm3,48(%r13) + movdqu %xmm1,64(%r13) + movdqu %xmm6,80(%r13) + movdqu %xmm2,96(%r13) + jmp .Lcbc_dec_done +.align 16 +.Lcbc_dec_six: + movdqa %xmm14,32(%rbp) + call _bsaes_decrypt8 + pxor 32(%rbp),%xmm15 + movdqu 0(%r12),%xmm7 + movdqu 16(%r12),%xmm8 + pxor %xmm7,%xmm0 + movdqu 32(%r12),%xmm9 + pxor %xmm8,%xmm5 + movdqu 48(%r12),%xmm10 + pxor %xmm9,%xmm3 + movdqu 64(%r12),%xmm11 + pxor %xmm10,%xmm1 + movdqu 80(%r12),%xmm14 + pxor %xmm11,%xmm6 + movdqu %xmm15,0(%r13) + movdqu %xmm0,16(%r13) + movdqu %xmm5,32(%r13) + movdqu %xmm3,48(%r13) + movdqu %xmm1,64(%r13) + movdqu %xmm6,80(%r13) + jmp .Lcbc_dec_done +.align 16 +.Lcbc_dec_five: + movdqa %xmm14,32(%rbp) + call _bsaes_decrypt8 + pxor 32(%rbp),%xmm15 + movdqu 0(%r12),%xmm7 + movdqu 16(%r12),%xmm8 + pxor %xmm7,%xmm0 + movdqu 32(%r12),%xmm9 + pxor %xmm8,%xmm5 + movdqu 48(%r12),%xmm10 + pxor %xmm9,%xmm3 + movdqu 64(%r12),%xmm14 + pxor %xmm10,%xmm1 + movdqu %xmm15,0(%r13) + movdqu %xmm0,16(%r13) + movdqu %xmm5,32(%r13) + movdqu %xmm3,48(%r13) + movdqu %xmm1,64(%r13) + jmp .Lcbc_dec_done +.align 16 +.Lcbc_dec_four: + movdqa %xmm14,32(%rbp) + call _bsaes_decrypt8 + pxor 32(%rbp),%xmm15 + movdqu 0(%r12),%xmm7 + movdqu 16(%r12),%xmm8 + pxor %xmm7,%xmm0 + movdqu 32(%r12),%xmm9 + pxor %xmm8,%xmm5 + movdqu 48(%r12),%xmm14 + pxor %xmm9,%xmm3 + movdqu %xmm15,0(%r13) + movdqu %xmm0,16(%r13) + movdqu %xmm5,32(%r13) + movdqu %xmm3,48(%r13) + jmp .Lcbc_dec_done +.align 16 +.Lcbc_dec_three: + movdqa %xmm14,32(%rbp) + call _bsaes_decrypt8 + pxor 32(%rbp),%xmm15 + movdqu 0(%r12),%xmm7 + movdqu 16(%r12),%xmm8 + pxor %xmm7,%xmm0 + movdqu 32(%r12),%xmm14 + pxor %xmm8,%xmm5 + movdqu %xmm15,0(%r13) + movdqu %xmm0,16(%r13) + movdqu %xmm5,32(%r13) + jmp .Lcbc_dec_done +.align 16 +.Lcbc_dec_two: + movdqa %xmm14,32(%rbp) + call _bsaes_decrypt8 + pxor 32(%rbp),%xmm15 + movdqu 0(%r12),%xmm7 + movdqu 16(%r12),%xmm14 + pxor %xmm7,%xmm0 + movdqu %xmm15,0(%r13) + movdqu %xmm0,16(%r13) + jmp .Lcbc_dec_done +.align 16 +.Lcbc_dec_one: + leaq (%r12),%rdi + leaq 32(%rbp),%rsi + leaq (%r15),%rdx + call asm_AES_decrypt + pxor 32(%rbp),%xmm14 + movdqu %xmm14,(%r13) + movdqa %xmm15,%xmm14 + +.Lcbc_dec_done: + movdqu %xmm14,(%rbx) + leaq (%rsp),%rax + pxor %xmm0,%xmm0 +.Lcbc_dec_bzero: + movdqa %xmm0,0(%rax) + movdqa %xmm0,16(%rax) + leaq 32(%rax),%rax + cmpq %rax,%rbp + ja .Lcbc_dec_bzero + + leaq (%rbp),%rsp + movq 72(%rsp),%r15 + movq 80(%rsp),%r14 + movq 88(%rsp),%r13 + movq 96(%rsp),%r12 + movq 104(%rsp),%rbx + movq 112(%rsp),%rax + leaq 120(%rsp),%rsp + movq %rax,%rbp +.Lcbc_dec_epilogue: + .byte 0xf3,0xc3 +.size bsaes_cbc_encrypt,.-bsaes_cbc_encrypt + +.globl bsaes_ctr32_encrypt_blocks +.hidden bsaes_ctr32_encrypt_blocks +.type bsaes_ctr32_encrypt_blocks,@function +.align 16 +bsaes_ctr32_encrypt_blocks: + movq %rsp,%rax +.Lctr_enc_prologue: + pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + leaq -72(%rsp),%rsp + movq %rsp,%rbp + movdqu (%r8),%xmm0 + movl 240(%rcx),%eax + movq %rdi,%r12 + movq %rsi,%r13 + movq %rdx,%r14 + movq %rcx,%r15 + movdqa %xmm0,32(%rbp) + cmpq $8,%rdx + jb .Lctr_enc_short + + movl %eax,%ebx + shlq $7,%rax + subq $96,%rax + subq %rax,%rsp + + movq %rsp,%rax + movq %r15,%rcx + movl %ebx,%r10d + call _bsaes_key_convert + pxor %xmm6,%xmm7 + movdqa %xmm7,(%rax) + + movdqa (%rsp),%xmm8 + leaq .LADD1(%rip),%r11 + movdqa 32(%rbp),%xmm15 + movdqa -32(%r11),%xmm7 +.byte 102,68,15,56,0,199 +.byte 102,68,15,56,0,255 + movdqa %xmm8,(%rsp) + jmp .Lctr_enc_loop +.align 16 +.Lctr_enc_loop: + movdqa %xmm15,32(%rbp) + movdqa %xmm15,%xmm0 + movdqa %xmm15,%xmm1 + paddd 0(%r11),%xmm0 + movdqa %xmm15,%xmm2 + paddd 16(%r11),%xmm1 + movdqa %xmm15,%xmm3 + paddd 32(%r11),%xmm2 + movdqa %xmm15,%xmm4 + paddd 48(%r11),%xmm3 + movdqa %xmm15,%xmm5 + paddd 64(%r11),%xmm4 + movdqa %xmm15,%xmm6 + paddd 80(%r11),%xmm5 + paddd 96(%r11),%xmm6 + + + + movdqa (%rsp),%xmm8 + leaq 16(%rsp),%rax + movdqa -16(%r11),%xmm7 + pxor %xmm8,%xmm15 + pxor %xmm8,%xmm0 + pxor %xmm8,%xmm1 + pxor %xmm8,%xmm2 +.byte 102,68,15,56,0,255 +.byte 102,15,56,0,199 + pxor %xmm8,%xmm3 + pxor %xmm8,%xmm4 +.byte 102,15,56,0,207 +.byte 102,15,56,0,215 + pxor %xmm8,%xmm5 + pxor %xmm8,%xmm6 +.byte 102,15,56,0,223 +.byte 102,15,56,0,231 +.byte 102,15,56,0,239 +.byte 102,15,56,0,247 + leaq .LBS0(%rip),%r11 + movl %ebx,%r10d + + call _bsaes_encrypt8_bitslice + + subq $8,%r14 + jc .Lctr_enc_loop_done + + movdqu 0(%r12),%xmm7 + movdqu 16(%r12),%xmm8 + movdqu 32(%r12),%xmm9 + movdqu 48(%r12),%xmm10 + movdqu 64(%r12),%xmm11 + movdqu 80(%r12),%xmm12 + movdqu 96(%r12),%xmm13 + movdqu 112(%r12),%xmm14 + leaq 128(%r12),%r12 + pxor %xmm15,%xmm7 + movdqa 32(%rbp),%xmm15 + pxor %xmm8,%xmm0 + movdqu %xmm7,0(%r13) + pxor %xmm9,%xmm3 + movdqu %xmm0,16(%r13) + pxor %xmm10,%xmm5 + movdqu %xmm3,32(%r13) + pxor %xmm11,%xmm2 + movdqu %xmm5,48(%r13) + pxor %xmm12,%xmm6 + movdqu %xmm2,64(%r13) + pxor %xmm13,%xmm1 + movdqu %xmm6,80(%r13) + pxor %xmm14,%xmm4 + movdqu %xmm1,96(%r13) + leaq .LADD1(%rip),%r11 + movdqu %xmm4,112(%r13) + leaq 128(%r13),%r13 + paddd 112(%r11),%xmm15 + jnz .Lctr_enc_loop + + jmp .Lctr_enc_done +.align 16 +.Lctr_enc_loop_done: + addq $8,%r14 + movdqu 0(%r12),%xmm7 + pxor %xmm7,%xmm15 + movdqu %xmm15,0(%r13) + cmpq $2,%r14 + jb .Lctr_enc_done + movdqu 16(%r12),%xmm8 + pxor %xmm8,%xmm0 + movdqu %xmm0,16(%r13) + je .Lctr_enc_done + movdqu 32(%r12),%xmm9 + pxor %xmm9,%xmm3 + movdqu %xmm3,32(%r13) + cmpq $4,%r14 + jb .Lctr_enc_done + movdqu 48(%r12),%xmm10 + pxor %xmm10,%xmm5 + movdqu %xmm5,48(%r13) + je .Lctr_enc_done + movdqu 64(%r12),%xmm11 + pxor %xmm11,%xmm2 + movdqu %xmm2,64(%r13) + cmpq $6,%r14 + jb .Lctr_enc_done + movdqu 80(%r12),%xmm12 + pxor %xmm12,%xmm6 + movdqu %xmm6,80(%r13) + je .Lctr_enc_done + movdqu 96(%r12),%xmm13 + pxor %xmm13,%xmm1 + movdqu %xmm1,96(%r13) + jmp .Lctr_enc_done + +.align 16 +.Lctr_enc_short: + leaq 32(%rbp),%rdi + leaq 48(%rbp),%rsi + leaq (%r15),%rdx + call asm_AES_encrypt + movdqu (%r12),%xmm0 + leaq 16(%r12),%r12 + movl 44(%rbp),%eax + bswapl %eax + pxor 48(%rbp),%xmm0 + incl %eax + movdqu %xmm0,(%r13) + bswapl %eax + leaq 16(%r13),%r13 + movl %eax,44(%rsp) + decq %r14 + jnz .Lctr_enc_short + +.Lctr_enc_done: + leaq (%rsp),%rax + pxor %xmm0,%xmm0 +.Lctr_enc_bzero: + movdqa %xmm0,0(%rax) + movdqa %xmm0,16(%rax) + leaq 32(%rax),%rax + cmpq %rax,%rbp + ja .Lctr_enc_bzero + + leaq (%rbp),%rsp + movq 72(%rsp),%r15 + movq 80(%rsp),%r14 + movq 88(%rsp),%r13 + movq 96(%rsp),%r12 + movq 104(%rsp),%rbx + movq 112(%rsp),%rax + leaq 120(%rsp),%rsp + movq %rax,%rbp +.Lctr_enc_epilogue: + .byte 0xf3,0xc3 +.size bsaes_ctr32_encrypt_blocks,.-bsaes_ctr32_encrypt_blocks +.globl bsaes_xts_encrypt +.hidden bsaes_xts_encrypt +.type bsaes_xts_encrypt,@function +.align 16 +bsaes_xts_encrypt: + movq %rsp,%rax +.Lxts_enc_prologue: + pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + leaq -72(%rsp),%rsp + movq %rsp,%rbp + movq %rdi,%r12 + movq %rsi,%r13 + movq %rdx,%r14 + movq %rcx,%r15 + + leaq (%r9),%rdi + leaq 32(%rbp),%rsi + leaq (%r8),%rdx + call asm_AES_encrypt + + movl 240(%r15),%eax + movq %r14,%rbx + + movl %eax,%edx + shlq $7,%rax + subq $96,%rax + subq %rax,%rsp + + movq %rsp,%rax + movq %r15,%rcx + movl %edx,%r10d + call _bsaes_key_convert + pxor %xmm6,%xmm7 + movdqa %xmm7,(%rax) + + andq $-16,%r14 + subq $128,%rsp + movdqa 32(%rbp),%xmm6 + + pxor %xmm14,%xmm14 + movdqa .Lxts_magic(%rip),%xmm12 + pcmpgtd %xmm6,%xmm14 + + subq $128,%r14 + jc .Lxts_enc_short + jmp .Lxts_enc_loop + +.align 16 +.Lxts_enc_loop: + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm15 + movdqa %xmm6,0(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm0 + movdqa %xmm6,16(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 0(%r12),%xmm7 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm1 + movdqa %xmm6,32(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 16(%r12),%xmm8 + pxor %xmm7,%xmm15 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm2 + movdqa %xmm6,48(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 32(%r12),%xmm9 + pxor %xmm8,%xmm0 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm3 + movdqa %xmm6,64(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 48(%r12),%xmm10 + pxor %xmm9,%xmm1 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm4 + movdqa %xmm6,80(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 64(%r12),%xmm11 + pxor %xmm10,%xmm2 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm5 + movdqa %xmm6,96(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 80(%r12),%xmm12 + pxor %xmm11,%xmm3 + movdqu 96(%r12),%xmm13 + pxor %xmm12,%xmm4 + movdqu 112(%r12),%xmm14 + leaq 128(%r12),%r12 + movdqa %xmm6,112(%rsp) + pxor %xmm13,%xmm5 + leaq 128(%rsp),%rax + pxor %xmm14,%xmm6 + movl %edx,%r10d + + call _bsaes_encrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm3 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm5 + movdqu %xmm3,32(%r13) + pxor 64(%rsp),%xmm2 + movdqu %xmm5,48(%r13) + pxor 80(%rsp),%xmm6 + movdqu %xmm2,64(%r13) + pxor 96(%rsp),%xmm1 + movdqu %xmm6,80(%r13) + pxor 112(%rsp),%xmm4 + movdqu %xmm1,96(%r13) + movdqu %xmm4,112(%r13) + leaq 128(%r13),%r13 + + movdqa 112(%rsp),%xmm6 + pxor %xmm14,%xmm14 + movdqa .Lxts_magic(%rip),%xmm12 + pcmpgtd %xmm6,%xmm14 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + + subq $128,%r14 + jnc .Lxts_enc_loop + +.Lxts_enc_short: + addq $128,%r14 + jz .Lxts_enc_done + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm15 + movdqa %xmm6,0(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm0 + movdqa %xmm6,16(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 0(%r12),%xmm7 + cmpq $16,%r14 + je .Lxts_enc_1 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm1 + movdqa %xmm6,32(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 16(%r12),%xmm8 + cmpq $32,%r14 + je .Lxts_enc_2 + pxor %xmm7,%xmm15 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm2 + movdqa %xmm6,48(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 32(%r12),%xmm9 + cmpq $48,%r14 + je .Lxts_enc_3 + pxor %xmm8,%xmm0 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm3 + movdqa %xmm6,64(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 48(%r12),%xmm10 + cmpq $64,%r14 + je .Lxts_enc_4 + pxor %xmm9,%xmm1 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm4 + movdqa %xmm6,80(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 64(%r12),%xmm11 + cmpq $80,%r14 + je .Lxts_enc_5 + pxor %xmm10,%xmm2 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm5 + movdqa %xmm6,96(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 80(%r12),%xmm12 + cmpq $96,%r14 + je .Lxts_enc_6 + pxor %xmm11,%xmm3 + movdqu 96(%r12),%xmm13 + pxor %xmm12,%xmm4 + movdqa %xmm6,112(%rsp) + leaq 112(%r12),%r12 + pxor %xmm13,%xmm5 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_encrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm3 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm5 + movdqu %xmm3,32(%r13) + pxor 64(%rsp),%xmm2 + movdqu %xmm5,48(%r13) + pxor 80(%rsp),%xmm6 + movdqu %xmm2,64(%r13) + pxor 96(%rsp),%xmm1 + movdqu %xmm6,80(%r13) + movdqu %xmm1,96(%r13) + leaq 112(%r13),%r13 + + movdqa 112(%rsp),%xmm6 + jmp .Lxts_enc_done +.align 16 +.Lxts_enc_6: + pxor %xmm11,%xmm3 + leaq 96(%r12),%r12 + pxor %xmm12,%xmm4 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_encrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm3 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm5 + movdqu %xmm3,32(%r13) + pxor 64(%rsp),%xmm2 + movdqu %xmm5,48(%r13) + pxor 80(%rsp),%xmm6 + movdqu %xmm2,64(%r13) + movdqu %xmm6,80(%r13) + leaq 96(%r13),%r13 + + movdqa 96(%rsp),%xmm6 + jmp .Lxts_enc_done +.align 16 +.Lxts_enc_5: + pxor %xmm10,%xmm2 + leaq 80(%r12),%r12 + pxor %xmm11,%xmm3 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_encrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm3 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm5 + movdqu %xmm3,32(%r13) + pxor 64(%rsp),%xmm2 + movdqu %xmm5,48(%r13) + movdqu %xmm2,64(%r13) + leaq 80(%r13),%r13 + + movdqa 80(%rsp),%xmm6 + jmp .Lxts_enc_done +.align 16 +.Lxts_enc_4: + pxor %xmm9,%xmm1 + leaq 64(%r12),%r12 + pxor %xmm10,%xmm2 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_encrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm3 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm5 + movdqu %xmm3,32(%r13) + movdqu %xmm5,48(%r13) + leaq 64(%r13),%r13 + + movdqa 64(%rsp),%xmm6 + jmp .Lxts_enc_done +.align 16 +.Lxts_enc_3: + pxor %xmm8,%xmm0 + leaq 48(%r12),%r12 + pxor %xmm9,%xmm1 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_encrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm3 + movdqu %xmm0,16(%r13) + movdqu %xmm3,32(%r13) + leaq 48(%r13),%r13 + + movdqa 48(%rsp),%xmm6 + jmp .Lxts_enc_done +.align 16 +.Lxts_enc_2: + pxor %xmm7,%xmm15 + leaq 32(%r12),%r12 + pxor %xmm8,%xmm0 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_encrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + movdqu %xmm0,16(%r13) + leaq 32(%r13),%r13 + + movdqa 32(%rsp),%xmm6 + jmp .Lxts_enc_done +.align 16 +.Lxts_enc_1: + pxor %xmm15,%xmm7 + leaq 16(%r12),%r12 + movdqa %xmm7,32(%rbp) + leaq 32(%rbp),%rdi + leaq 32(%rbp),%rsi + leaq (%r15),%rdx + call asm_AES_encrypt + pxor 32(%rbp),%xmm15 + + + + + + movdqu %xmm15,0(%r13) + leaq 16(%r13),%r13 + + movdqa 16(%rsp),%xmm6 + +.Lxts_enc_done: + andl $15,%ebx + jz .Lxts_enc_ret + movq %r13,%rdx + +.Lxts_enc_steal: + movzbl (%r12),%eax + movzbl -16(%rdx),%ecx + leaq 1(%r12),%r12 + movb %al,-16(%rdx) + movb %cl,0(%rdx) + leaq 1(%rdx),%rdx + subl $1,%ebx + jnz .Lxts_enc_steal + + movdqu -16(%r13),%xmm15 + leaq 32(%rbp),%rdi + pxor %xmm6,%xmm15 + leaq 32(%rbp),%rsi + movdqa %xmm15,32(%rbp) + leaq (%r15),%rdx + call asm_AES_encrypt + pxor 32(%rbp),%xmm6 + movdqu %xmm6,-16(%r13) + +.Lxts_enc_ret: + leaq (%rsp),%rax + pxor %xmm0,%xmm0 +.Lxts_enc_bzero: + movdqa %xmm0,0(%rax) + movdqa %xmm0,16(%rax) + leaq 32(%rax),%rax + cmpq %rax,%rbp + ja .Lxts_enc_bzero + + leaq (%rbp),%rsp + movq 72(%rsp),%r15 + movq 80(%rsp),%r14 + movq 88(%rsp),%r13 + movq 96(%rsp),%r12 + movq 104(%rsp),%rbx + movq 112(%rsp),%rax + leaq 120(%rsp),%rsp + movq %rax,%rbp +.Lxts_enc_epilogue: + .byte 0xf3,0xc3 +.size bsaes_xts_encrypt,.-bsaes_xts_encrypt + +.globl bsaes_xts_decrypt +.hidden bsaes_xts_decrypt +.type bsaes_xts_decrypt,@function +.align 16 +bsaes_xts_decrypt: + movq %rsp,%rax +.Lxts_dec_prologue: + pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + leaq -72(%rsp),%rsp + movq %rsp,%rbp + movq %rdi,%r12 + movq %rsi,%r13 + movq %rdx,%r14 + movq %rcx,%r15 + + leaq (%r9),%rdi + leaq 32(%rbp),%rsi + leaq (%r8),%rdx + call asm_AES_encrypt + + movl 240(%r15),%eax + movq %r14,%rbx + + movl %eax,%edx + shlq $7,%rax + subq $96,%rax + subq %rax,%rsp + + movq %rsp,%rax + movq %r15,%rcx + movl %edx,%r10d + call _bsaes_key_convert + pxor (%rsp),%xmm7 + movdqa %xmm6,(%rax) + movdqa %xmm7,(%rsp) + + xorl %eax,%eax + andq $-16,%r14 + testl $15,%ebx + setnz %al + shlq $4,%rax + subq %rax,%r14 + + subq $128,%rsp + movdqa 32(%rbp),%xmm6 + + pxor %xmm14,%xmm14 + movdqa .Lxts_magic(%rip),%xmm12 + pcmpgtd %xmm6,%xmm14 + + subq $128,%r14 + jc .Lxts_dec_short + jmp .Lxts_dec_loop + +.align 16 +.Lxts_dec_loop: + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm15 + movdqa %xmm6,0(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm0 + movdqa %xmm6,16(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 0(%r12),%xmm7 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm1 + movdqa %xmm6,32(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 16(%r12),%xmm8 + pxor %xmm7,%xmm15 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm2 + movdqa %xmm6,48(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 32(%r12),%xmm9 + pxor %xmm8,%xmm0 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm3 + movdqa %xmm6,64(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 48(%r12),%xmm10 + pxor %xmm9,%xmm1 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm4 + movdqa %xmm6,80(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 64(%r12),%xmm11 + pxor %xmm10,%xmm2 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm5 + movdqa %xmm6,96(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 80(%r12),%xmm12 + pxor %xmm11,%xmm3 + movdqu 96(%r12),%xmm13 + pxor %xmm12,%xmm4 + movdqu 112(%r12),%xmm14 + leaq 128(%r12),%r12 + movdqa %xmm6,112(%rsp) + pxor %xmm13,%xmm5 + leaq 128(%rsp),%rax + pxor %xmm14,%xmm6 + movl %edx,%r10d + + call _bsaes_decrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm5 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm3 + movdqu %xmm5,32(%r13) + pxor 64(%rsp),%xmm1 + movdqu %xmm3,48(%r13) + pxor 80(%rsp),%xmm6 + movdqu %xmm1,64(%r13) + pxor 96(%rsp),%xmm2 + movdqu %xmm6,80(%r13) + pxor 112(%rsp),%xmm4 + movdqu %xmm2,96(%r13) + movdqu %xmm4,112(%r13) + leaq 128(%r13),%r13 + + movdqa 112(%rsp),%xmm6 + pxor %xmm14,%xmm14 + movdqa .Lxts_magic(%rip),%xmm12 + pcmpgtd %xmm6,%xmm14 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + + subq $128,%r14 + jnc .Lxts_dec_loop + +.Lxts_dec_short: + addq $128,%r14 + jz .Lxts_dec_done + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm15 + movdqa %xmm6,0(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm0 + movdqa %xmm6,16(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 0(%r12),%xmm7 + cmpq $16,%r14 + je .Lxts_dec_1 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm1 + movdqa %xmm6,32(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 16(%r12),%xmm8 + cmpq $32,%r14 + je .Lxts_dec_2 + pxor %xmm7,%xmm15 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm2 + movdqa %xmm6,48(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 32(%r12),%xmm9 + cmpq $48,%r14 + je .Lxts_dec_3 + pxor %xmm8,%xmm0 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm3 + movdqa %xmm6,64(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 48(%r12),%xmm10 + cmpq $64,%r14 + je .Lxts_dec_4 + pxor %xmm9,%xmm1 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm4 + movdqa %xmm6,80(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 64(%r12),%xmm11 + cmpq $80,%r14 + je .Lxts_dec_5 + pxor %xmm10,%xmm2 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm5 + movdqa %xmm6,96(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 80(%r12),%xmm12 + cmpq $96,%r14 + je .Lxts_dec_6 + pxor %xmm11,%xmm3 + movdqu 96(%r12),%xmm13 + pxor %xmm12,%xmm4 + movdqa %xmm6,112(%rsp) + leaq 112(%r12),%r12 + pxor %xmm13,%xmm5 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_decrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm5 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm3 + movdqu %xmm5,32(%r13) + pxor 64(%rsp),%xmm1 + movdqu %xmm3,48(%r13) + pxor 80(%rsp),%xmm6 + movdqu %xmm1,64(%r13) + pxor 96(%rsp),%xmm2 + movdqu %xmm6,80(%r13) + movdqu %xmm2,96(%r13) + leaq 112(%r13),%r13 + + movdqa 112(%rsp),%xmm6 + jmp .Lxts_dec_done +.align 16 +.Lxts_dec_6: + pxor %xmm11,%xmm3 + leaq 96(%r12),%r12 + pxor %xmm12,%xmm4 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_decrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm5 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm3 + movdqu %xmm5,32(%r13) + pxor 64(%rsp),%xmm1 + movdqu %xmm3,48(%r13) + pxor 80(%rsp),%xmm6 + movdqu %xmm1,64(%r13) + movdqu %xmm6,80(%r13) + leaq 96(%r13),%r13 + + movdqa 96(%rsp),%xmm6 + jmp .Lxts_dec_done +.align 16 +.Lxts_dec_5: + pxor %xmm10,%xmm2 + leaq 80(%r12),%r12 + pxor %xmm11,%xmm3 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_decrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm5 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm3 + movdqu %xmm5,32(%r13) + pxor 64(%rsp),%xmm1 + movdqu %xmm3,48(%r13) + movdqu %xmm1,64(%r13) + leaq 80(%r13),%r13 + + movdqa 80(%rsp),%xmm6 + jmp .Lxts_dec_done +.align 16 +.Lxts_dec_4: + pxor %xmm9,%xmm1 + leaq 64(%r12),%r12 + pxor %xmm10,%xmm2 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_decrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm5 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm3 + movdqu %xmm5,32(%r13) + movdqu %xmm3,48(%r13) + leaq 64(%r13),%r13 + + movdqa 64(%rsp),%xmm6 + jmp .Lxts_dec_done +.align 16 +.Lxts_dec_3: + pxor %xmm8,%xmm0 + leaq 48(%r12),%r12 + pxor %xmm9,%xmm1 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_decrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm5 + movdqu %xmm0,16(%r13) + movdqu %xmm5,32(%r13) + leaq 48(%r13),%r13 + + movdqa 48(%rsp),%xmm6 + jmp .Lxts_dec_done +.align 16 +.Lxts_dec_2: + pxor %xmm7,%xmm15 + leaq 32(%r12),%r12 + pxor %xmm8,%xmm0 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_decrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + movdqu %xmm0,16(%r13) + leaq 32(%r13),%r13 + + movdqa 32(%rsp),%xmm6 + jmp .Lxts_dec_done +.align 16 +.Lxts_dec_1: + pxor %xmm15,%xmm7 + leaq 16(%r12),%r12 + movdqa %xmm7,32(%rbp) + leaq 32(%rbp),%rdi + leaq 32(%rbp),%rsi + leaq (%r15),%rdx + call asm_AES_decrypt + pxor 32(%rbp),%xmm15 + + + + + + movdqu %xmm15,0(%r13) + leaq 16(%r13),%r13 + + movdqa 16(%rsp),%xmm6 + +.Lxts_dec_done: + andl $15,%ebx + jz .Lxts_dec_ret + + pxor %xmm14,%xmm14 + movdqa .Lxts_magic(%rip),%xmm12 + pcmpgtd %xmm6,%xmm14 + pshufd $19,%xmm14,%xmm13 + movdqa %xmm6,%xmm5 + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + movdqu (%r12),%xmm15 + pxor %xmm13,%xmm6 + + leaq 32(%rbp),%rdi + pxor %xmm6,%xmm15 + leaq 32(%rbp),%rsi + movdqa %xmm15,32(%rbp) + leaq (%r15),%rdx + call asm_AES_decrypt + pxor 32(%rbp),%xmm6 + movq %r13,%rdx + movdqu %xmm6,(%r13) + +.Lxts_dec_steal: + movzbl 16(%r12),%eax + movzbl (%rdx),%ecx + leaq 1(%r12),%r12 + movb %al,(%rdx) + movb %cl,16(%rdx) + leaq 1(%rdx),%rdx + subl $1,%ebx + jnz .Lxts_dec_steal + + movdqu (%r13),%xmm15 + leaq 32(%rbp),%rdi + pxor %xmm5,%xmm15 + leaq 32(%rbp),%rsi + movdqa %xmm15,32(%rbp) + leaq (%r15),%rdx + call asm_AES_decrypt + pxor 32(%rbp),%xmm5 + movdqu %xmm5,(%r13) + +.Lxts_dec_ret: + leaq (%rsp),%rax + pxor %xmm0,%xmm0 +.Lxts_dec_bzero: + movdqa %xmm0,0(%rax) + movdqa %xmm0,16(%rax) + leaq 32(%rax),%rax + cmpq %rax,%rbp + ja .Lxts_dec_bzero + + leaq (%rbp),%rsp + movq 72(%rsp),%r15 + movq 80(%rsp),%r14 + movq 88(%rsp),%r13 + movq 96(%rsp),%r12 + movq 104(%rsp),%rbx + movq 112(%rsp),%rax + leaq 120(%rsp),%rsp + movq %rax,%rbp +.Lxts_dec_epilogue: + .byte 0xf3,0xc3 +.size bsaes_xts_decrypt,.-bsaes_xts_decrypt +.type _bsaes_const,@object +.align 64 +_bsaes_const: +.LM0ISR: +.quad 0x0a0e0206070b0f03, 0x0004080c0d010509 +.LISRM0: +.quad 0x01040b0e0205080f, 0x0306090c00070a0d +.LISR: +.quad 0x0504070602010003, 0x0f0e0d0c080b0a09 +.LBS0: +.quad 0x5555555555555555, 0x5555555555555555 +.LBS1: +.quad 0x3333333333333333, 0x3333333333333333 +.LBS2: +.quad 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f +.LSR: +.quad 0x0504070600030201, 0x0f0e0d0c0a09080b +.LSRM0: +.quad 0x0304090e00050a0f, 0x01060b0c0207080d +.LM0SR: +.quad 0x0a0e02060f03070b, 0x0004080c05090d01 +.LSWPUP: +.quad 0x0706050403020100, 0x0c0d0e0f0b0a0908 +.LSWPUPM0SR: +.quad 0x0a0d02060c03070b, 0x0004080f05090e01 +.LADD1: +.quad 0x0000000000000000, 0x0000000100000000 +.LADD2: +.quad 0x0000000000000000, 0x0000000200000000 +.LADD3: +.quad 0x0000000000000000, 0x0000000300000000 +.LADD4: +.quad 0x0000000000000000, 0x0000000400000000 +.LADD5: +.quad 0x0000000000000000, 0x0000000500000000 +.LADD6: +.quad 0x0000000000000000, 0x0000000600000000 +.LADD7: +.quad 0x0000000000000000, 0x0000000700000000 +.LADD8: +.quad 0x0000000000000000, 0x0000000800000000 +.Lxts_magic: +.long 0x87,0,1,0 +.Lmasks: +.quad 0x0101010101010101, 0x0101010101010101 +.quad 0x0202020202020202, 0x0202020202020202 +.quad 0x0404040404040404, 0x0404040404040404 +.quad 0x0808080808080808, 0x0808080808080808 +.LM0: +.quad 0x02060a0e03070b0f, 0x0004080c0105090d +.L63: +.quad 0x6363636363636363, 0x6363636363636363 +.byte 66,105,116,45,115,108,105,99,101,100,32,65,69,83,32,102,111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44,32,69,109,105,108,105,97,32,75,195,164,115,112,101,114,44,32,80,101,116,101,114,32,83,99,104,119,97,98,101,44,32,65,110,100,121,32,80,111,108,121,97,107,111,118,0 +.align 64 +.size _bsaes_const,.-_bsaes_const +#endif diff --git a/third_party/boringssl/linux-x86_64/crypto/aes/vpaes-x86_64.S b/third_party/boringssl/linux-x86_64/crypto/aes/vpaes-x86_64.S new file mode 100644 index 00000000000..1d124246af9 --- /dev/null +++ b/third_party/boringssl/linux-x86_64/crypto/aes/vpaes-x86_64.S @@ -0,0 +1,834 @@ +#if defined(__x86_64__) +.text + + + + + + + + + + + + + + + + +.type _vpaes_encrypt_core,@function +.align 16 +_vpaes_encrypt_core: + movq %rdx,%r9 + movq $16,%r11 + movl 240(%rdx),%eax + movdqa %xmm9,%xmm1 + movdqa .Lk_ipt(%rip),%xmm2 + pandn %xmm0,%xmm1 + movdqu (%r9),%xmm5 + psrld $4,%xmm1 + pand %xmm9,%xmm0 +.byte 102,15,56,0,208 + movdqa .Lk_ipt+16(%rip),%xmm0 +.byte 102,15,56,0,193 + pxor %xmm5,%xmm2 + addq $16,%r9 + pxor %xmm2,%xmm0 + leaq .Lk_mc_backward(%rip),%r10 + jmp .Lenc_entry + +.align 16 +.Lenc_loop: + + movdqa %xmm13,%xmm4 + movdqa %xmm12,%xmm0 +.byte 102,15,56,0,226 +.byte 102,15,56,0,195 + pxor %xmm5,%xmm4 + movdqa %xmm15,%xmm5 + pxor %xmm4,%xmm0 + movdqa -64(%r11,%r10,1),%xmm1 +.byte 102,15,56,0,234 + movdqa (%r11,%r10,1),%xmm4 + movdqa %xmm14,%xmm2 +.byte 102,15,56,0,211 + movdqa %xmm0,%xmm3 + pxor %xmm5,%xmm2 +.byte 102,15,56,0,193 + addq $16,%r9 + pxor %xmm2,%xmm0 +.byte 102,15,56,0,220 + addq $16,%r11 + pxor %xmm0,%xmm3 +.byte 102,15,56,0,193 + andq $48,%r11 + subq $1,%rax + pxor %xmm3,%xmm0 + +.Lenc_entry: + + movdqa %xmm9,%xmm1 + movdqa %xmm11,%xmm5 + pandn %xmm0,%xmm1 + psrld $4,%xmm1 + pand %xmm9,%xmm0 +.byte 102,15,56,0,232 + movdqa %xmm10,%xmm3 + pxor %xmm1,%xmm0 +.byte 102,15,56,0,217 + movdqa %xmm10,%xmm4 + pxor %xmm5,%xmm3 +.byte 102,15,56,0,224 + movdqa %xmm10,%xmm2 + pxor %xmm5,%xmm4 +.byte 102,15,56,0,211 + movdqa %xmm10,%xmm3 + pxor %xmm0,%xmm2 +.byte 102,15,56,0,220 + movdqu (%r9),%xmm5 + pxor %xmm1,%xmm3 + jnz .Lenc_loop + + + movdqa -96(%r10),%xmm4 + movdqa -80(%r10),%xmm0 +.byte 102,15,56,0,226 + pxor %xmm5,%xmm4 +.byte 102,15,56,0,195 + movdqa 64(%r11,%r10,1),%xmm1 + pxor %xmm4,%xmm0 +.byte 102,15,56,0,193 + .byte 0xf3,0xc3 +.size _vpaes_encrypt_core,.-_vpaes_encrypt_core + + + + + + +.type _vpaes_decrypt_core,@function +.align 16 +_vpaes_decrypt_core: + movq %rdx,%r9 + movl 240(%rdx),%eax + movdqa %xmm9,%xmm1 + movdqa .Lk_dipt(%rip),%xmm2 + pandn %xmm0,%xmm1 + movq %rax,%r11 + psrld $4,%xmm1 + movdqu (%r9),%xmm5 + shlq $4,%r11 + pand %xmm9,%xmm0 +.byte 102,15,56,0,208 + movdqa .Lk_dipt+16(%rip),%xmm0 + xorq $48,%r11 + leaq .Lk_dsbd(%rip),%r10 +.byte 102,15,56,0,193 + andq $48,%r11 + pxor %xmm5,%xmm2 + movdqa .Lk_mc_forward+48(%rip),%xmm5 + pxor %xmm2,%xmm0 + addq $16,%r9 + addq %r10,%r11 + jmp .Ldec_entry + +.align 16 +.Ldec_loop: + + + + movdqa -32(%r10),%xmm4 + movdqa -16(%r10),%xmm1 +.byte 102,15,56,0,226 +.byte 102,15,56,0,203 + pxor %xmm4,%xmm0 + movdqa 0(%r10),%xmm4 + pxor %xmm1,%xmm0 + movdqa 16(%r10),%xmm1 + +.byte 102,15,56,0,226 +.byte 102,15,56,0,197 +.byte 102,15,56,0,203 + pxor %xmm4,%xmm0 + movdqa 32(%r10),%xmm4 + pxor %xmm1,%xmm0 + movdqa 48(%r10),%xmm1 + +.byte 102,15,56,0,226 +.byte 102,15,56,0,197 +.byte 102,15,56,0,203 + pxor %xmm4,%xmm0 + movdqa 64(%r10),%xmm4 + pxor %xmm1,%xmm0 + movdqa 80(%r10),%xmm1 + +.byte 102,15,56,0,226 +.byte 102,15,56,0,197 +.byte 102,15,56,0,203 + pxor %xmm4,%xmm0 + addq $16,%r9 +.byte 102,15,58,15,237,12 + pxor %xmm1,%xmm0 + subq $1,%rax + +.Ldec_entry: + + movdqa %xmm9,%xmm1 + pandn %xmm0,%xmm1 + movdqa %xmm11,%xmm2 + psrld $4,%xmm1 + pand %xmm9,%xmm0 +.byte 102,15,56,0,208 + movdqa %xmm10,%xmm3 + pxor %xmm1,%xmm0 +.byte 102,15,56,0,217 + movdqa %xmm10,%xmm4 + pxor %xmm2,%xmm3 +.byte 102,15,56,0,224 + pxor %xmm2,%xmm4 + movdqa %xmm10,%xmm2 +.byte 102,15,56,0,211 + movdqa %xmm10,%xmm3 + pxor %xmm0,%xmm2 +.byte 102,15,56,0,220 + movdqu (%r9),%xmm0 + pxor %xmm1,%xmm3 + jnz .Ldec_loop + + + movdqa 96(%r10),%xmm4 +.byte 102,15,56,0,226 + pxor %xmm0,%xmm4 + movdqa 112(%r10),%xmm0 + movdqa -352(%r11),%xmm2 +.byte 102,15,56,0,195 + pxor %xmm4,%xmm0 +.byte 102,15,56,0,194 + .byte 0xf3,0xc3 +.size _vpaes_decrypt_core,.-_vpaes_decrypt_core + + + + + + +.type _vpaes_schedule_core,@function +.align 16 +_vpaes_schedule_core: + + + + + + call _vpaes_preheat + movdqa .Lk_rcon(%rip),%xmm8 + movdqu (%rdi),%xmm0 + + + movdqa %xmm0,%xmm3 + leaq .Lk_ipt(%rip),%r11 + call _vpaes_schedule_transform + movdqa %xmm0,%xmm7 + + leaq .Lk_sr(%rip),%r10 + testq %rcx,%rcx + jnz .Lschedule_am_decrypting + + + movdqu %xmm0,(%rdx) + jmp .Lschedule_go + +.Lschedule_am_decrypting: + + movdqa (%r8,%r10,1),%xmm1 +.byte 102,15,56,0,217 + movdqu %xmm3,(%rdx) + xorq $48,%r8 + +.Lschedule_go: + cmpl $192,%esi + ja .Lschedule_256 + je .Lschedule_192 + + + + + + + + + + +.Lschedule_128: + movl $10,%esi + +.Loop_schedule_128: + call _vpaes_schedule_round + decq %rsi + jz .Lschedule_mangle_last + call _vpaes_schedule_mangle + jmp .Loop_schedule_128 + + + + + + + + + + + + + + + + +.align 16 +.Lschedule_192: + movdqu 8(%rdi),%xmm0 + call _vpaes_schedule_transform + movdqa %xmm0,%xmm6 + pxor %xmm4,%xmm4 + movhlps %xmm4,%xmm6 + movl $4,%esi + +.Loop_schedule_192: + call _vpaes_schedule_round +.byte 102,15,58,15,198,8 + call _vpaes_schedule_mangle + call _vpaes_schedule_192_smear + call _vpaes_schedule_mangle + call _vpaes_schedule_round + decq %rsi + jz .Lschedule_mangle_last + call _vpaes_schedule_mangle + call _vpaes_schedule_192_smear + jmp .Loop_schedule_192 + + + + + + + + + + + +.align 16 +.Lschedule_256: + movdqu 16(%rdi),%xmm0 + call _vpaes_schedule_transform + movl $7,%esi + +.Loop_schedule_256: + call _vpaes_schedule_mangle + movdqa %xmm0,%xmm6 + + + call _vpaes_schedule_round + decq %rsi + jz .Lschedule_mangle_last + call _vpaes_schedule_mangle + + + pshufd $255,%xmm0,%xmm0 + movdqa %xmm7,%xmm5 + movdqa %xmm6,%xmm7 + call _vpaes_schedule_low_round + movdqa %xmm5,%xmm7 + + jmp .Loop_schedule_256 + + + + + + + + + + + + +.align 16 +.Lschedule_mangle_last: + + leaq .Lk_deskew(%rip),%r11 + testq %rcx,%rcx + jnz .Lschedule_mangle_last_dec + + + movdqa (%r8,%r10,1),%xmm1 +.byte 102,15,56,0,193 + leaq .Lk_opt(%rip),%r11 + addq $32,%rdx + +.Lschedule_mangle_last_dec: + addq $-16,%rdx + pxor .Lk_s63(%rip),%xmm0 + call _vpaes_schedule_transform + movdqu %xmm0,(%rdx) + + + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + .byte 0xf3,0xc3 +.size _vpaes_schedule_core,.-_vpaes_schedule_core + + + + + + + + + + + + + + + +.type _vpaes_schedule_192_smear,@function +.align 16 +_vpaes_schedule_192_smear: + pshufd $128,%xmm6,%xmm1 + pshufd $254,%xmm7,%xmm0 + pxor %xmm1,%xmm6 + pxor %xmm1,%xmm1 + pxor %xmm0,%xmm6 + movdqa %xmm6,%xmm0 + movhlps %xmm1,%xmm6 + .byte 0xf3,0xc3 +.size _vpaes_schedule_192_smear,.-_vpaes_schedule_192_smear + + + + + + + + + + + + + + + + + + + +.type _vpaes_schedule_round,@function +.align 16 +_vpaes_schedule_round: + + pxor %xmm1,%xmm1 +.byte 102,65,15,58,15,200,15 +.byte 102,69,15,58,15,192,15 + pxor %xmm1,%xmm7 + + + pshufd $255,%xmm0,%xmm0 +.byte 102,15,58,15,192,1 + + + + +_vpaes_schedule_low_round: + + movdqa %xmm7,%xmm1 + pslldq $4,%xmm7 + pxor %xmm1,%xmm7 + movdqa %xmm7,%xmm1 + pslldq $8,%xmm7 + pxor %xmm1,%xmm7 + pxor .Lk_s63(%rip),%xmm7 + + + movdqa %xmm9,%xmm1 + pandn %xmm0,%xmm1 + psrld $4,%xmm1 + pand %xmm9,%xmm0 + movdqa %xmm11,%xmm2 +.byte 102,15,56,0,208 + pxor %xmm1,%xmm0 + movdqa %xmm10,%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 + movdqa %xmm10,%xmm4 +.byte 102,15,56,0,224 + pxor %xmm2,%xmm4 + movdqa %xmm10,%xmm2 +.byte 102,15,56,0,211 + pxor %xmm0,%xmm2 + movdqa %xmm10,%xmm3 +.byte 102,15,56,0,220 + pxor %xmm1,%xmm3 + movdqa %xmm13,%xmm4 +.byte 102,15,56,0,226 + movdqa %xmm12,%xmm0 +.byte 102,15,56,0,195 + pxor %xmm4,%xmm0 + + + pxor %xmm7,%xmm0 + movdqa %xmm0,%xmm7 + .byte 0xf3,0xc3 +.size _vpaes_schedule_round,.-_vpaes_schedule_round + + + + + + + + + + +.type _vpaes_schedule_transform,@function +.align 16 +_vpaes_schedule_transform: + movdqa %xmm9,%xmm1 + pandn %xmm0,%xmm1 + psrld $4,%xmm1 + pand %xmm9,%xmm0 + movdqa (%r11),%xmm2 +.byte 102,15,56,0,208 + movdqa 16(%r11),%xmm0 +.byte 102,15,56,0,193 + pxor %xmm2,%xmm0 + .byte 0xf3,0xc3 +.size _vpaes_schedule_transform,.-_vpaes_schedule_transform + + + + + + + + + + + + + + + + + + + + + + + + +.type _vpaes_schedule_mangle,@function +.align 16 +_vpaes_schedule_mangle: + movdqa %xmm0,%xmm4 + movdqa .Lk_mc_forward(%rip),%xmm5 + testq %rcx,%rcx + jnz .Lschedule_mangle_dec + + + addq $16,%rdx + pxor .Lk_s63(%rip),%xmm4 +.byte 102,15,56,0,229 + movdqa %xmm4,%xmm3 +.byte 102,15,56,0,229 + pxor %xmm4,%xmm3 +.byte 102,15,56,0,229 + pxor %xmm4,%xmm3 + + jmp .Lschedule_mangle_both +.align 16 +.Lschedule_mangle_dec: + + leaq .Lk_dksd(%rip),%r11 + movdqa %xmm9,%xmm1 + pandn %xmm4,%xmm1 + psrld $4,%xmm1 + pand %xmm9,%xmm4 + + movdqa 0(%r11),%xmm2 +.byte 102,15,56,0,212 + movdqa 16(%r11),%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 +.byte 102,15,56,0,221 + + movdqa 32(%r11),%xmm2 +.byte 102,15,56,0,212 + pxor %xmm3,%xmm2 + movdqa 48(%r11),%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 +.byte 102,15,56,0,221 + + movdqa 64(%r11),%xmm2 +.byte 102,15,56,0,212 + pxor %xmm3,%xmm2 + movdqa 80(%r11),%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 +.byte 102,15,56,0,221 + + movdqa 96(%r11),%xmm2 +.byte 102,15,56,0,212 + pxor %xmm3,%xmm2 + movdqa 112(%r11),%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 + + addq $-16,%rdx + +.Lschedule_mangle_both: + movdqa (%r8,%r10,1),%xmm1 +.byte 102,15,56,0,217 + addq $-16,%r8 + andq $48,%r8 + movdqu %xmm3,(%rdx) + .byte 0xf3,0xc3 +.size _vpaes_schedule_mangle,.-_vpaes_schedule_mangle + + + + +.globl vpaes_set_encrypt_key +.hidden vpaes_set_encrypt_key +.type vpaes_set_encrypt_key,@function +.align 16 +vpaes_set_encrypt_key: + movl %esi,%eax + shrl $5,%eax + addl $5,%eax + movl %eax,240(%rdx) + + movl $0,%ecx + movl $48,%r8d + call _vpaes_schedule_core + xorl %eax,%eax + .byte 0xf3,0xc3 +.size vpaes_set_encrypt_key,.-vpaes_set_encrypt_key + +.globl vpaes_set_decrypt_key +.hidden vpaes_set_decrypt_key +.type vpaes_set_decrypt_key,@function +.align 16 +vpaes_set_decrypt_key: + movl %esi,%eax + shrl $5,%eax + addl $5,%eax + movl %eax,240(%rdx) + shll $4,%eax + leaq 16(%rdx,%rax,1),%rdx + + movl $1,%ecx + movl %esi,%r8d + shrl $1,%r8d + andl $32,%r8d + xorl $32,%r8d + call _vpaes_schedule_core + xorl %eax,%eax + .byte 0xf3,0xc3 +.size vpaes_set_decrypt_key,.-vpaes_set_decrypt_key + +.globl vpaes_encrypt +.hidden vpaes_encrypt +.type vpaes_encrypt,@function +.align 16 +vpaes_encrypt: + movdqu (%rdi),%xmm0 + call _vpaes_preheat + call _vpaes_encrypt_core + movdqu %xmm0,(%rsi) + .byte 0xf3,0xc3 +.size vpaes_encrypt,.-vpaes_encrypt + +.globl vpaes_decrypt +.hidden vpaes_decrypt +.type vpaes_decrypt,@function +.align 16 +vpaes_decrypt: + movdqu (%rdi),%xmm0 + call _vpaes_preheat + call _vpaes_decrypt_core + movdqu %xmm0,(%rsi) + .byte 0xf3,0xc3 +.size vpaes_decrypt,.-vpaes_decrypt +.globl vpaes_cbc_encrypt +.hidden vpaes_cbc_encrypt +.type vpaes_cbc_encrypt,@function +.align 16 +vpaes_cbc_encrypt: + xchgq %rcx,%rdx + subq $16,%rcx + jc .Lcbc_abort + movdqu (%r8),%xmm6 + subq %rdi,%rsi + call _vpaes_preheat + cmpl $0,%r9d + je .Lcbc_dec_loop + jmp .Lcbc_enc_loop +.align 16 +.Lcbc_enc_loop: + movdqu (%rdi),%xmm0 + pxor %xmm6,%xmm0 + call _vpaes_encrypt_core + movdqa %xmm0,%xmm6 + movdqu %xmm0,(%rsi,%rdi,1) + leaq 16(%rdi),%rdi + subq $16,%rcx + jnc .Lcbc_enc_loop + jmp .Lcbc_done +.align 16 +.Lcbc_dec_loop: + movdqu (%rdi),%xmm0 + movdqa %xmm0,%xmm7 + call _vpaes_decrypt_core + pxor %xmm6,%xmm0 + movdqa %xmm7,%xmm6 + movdqu %xmm0,(%rsi,%rdi,1) + leaq 16(%rdi),%rdi + subq $16,%rcx + jnc .Lcbc_dec_loop +.Lcbc_done: + movdqu %xmm6,(%r8) +.Lcbc_abort: + .byte 0xf3,0xc3 +.size vpaes_cbc_encrypt,.-vpaes_cbc_encrypt + + + + + + +.type _vpaes_preheat,@function +.align 16 +_vpaes_preheat: + leaq .Lk_s0F(%rip),%r10 + movdqa -32(%r10),%xmm10 + movdqa -16(%r10),%xmm11 + movdqa 0(%r10),%xmm9 + movdqa 48(%r10),%xmm13 + movdqa 64(%r10),%xmm12 + movdqa 80(%r10),%xmm15 + movdqa 96(%r10),%xmm14 + .byte 0xf3,0xc3 +.size _vpaes_preheat,.-_vpaes_preheat + + + + + +.type _vpaes_consts,@object +.align 64 +_vpaes_consts: +.Lk_inv: +.quad 0x0E05060F0D080180, 0x040703090A0B0C02 +.quad 0x01040A060F0B0780, 0x030D0E0C02050809 + +.Lk_s0F: +.quad 0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F + +.Lk_ipt: +.quad 0xC2B2E8985A2A7000, 0xCABAE09052227808 +.quad 0x4C01307D317C4D00, 0xCD80B1FCB0FDCC81 + +.Lk_sb1: +.quad 0xB19BE18FCB503E00, 0xA5DF7A6E142AF544 +.quad 0x3618D415FAE22300, 0x3BF7CCC10D2ED9EF +.Lk_sb2: +.quad 0xE27A93C60B712400, 0x5EB7E955BC982FCD +.quad 0x69EB88400AE12900, 0xC2A163C8AB82234A +.Lk_sbo: +.quad 0xD0D26D176FBDC700, 0x15AABF7AC502A878 +.quad 0xCFE474A55FBB6A00, 0x8E1E90D1412B35FA + +.Lk_mc_forward: +.quad 0x0407060500030201, 0x0C0F0E0D080B0A09 +.quad 0x080B0A0904070605, 0x000302010C0F0E0D +.quad 0x0C0F0E0D080B0A09, 0x0407060500030201 +.quad 0x000302010C0F0E0D, 0x080B0A0904070605 + +.Lk_mc_backward: +.quad 0x0605040702010003, 0x0E0D0C0F0A09080B +.quad 0x020100030E0D0C0F, 0x0A09080B06050407 +.quad 0x0E0D0C0F0A09080B, 0x0605040702010003 +.quad 0x0A09080B06050407, 0x020100030E0D0C0F + +.Lk_sr: +.quad 0x0706050403020100, 0x0F0E0D0C0B0A0908 +.quad 0x030E09040F0A0500, 0x0B06010C07020D08 +.quad 0x0F060D040B020900, 0x070E050C030A0108 +.quad 0x0B0E0104070A0D00, 0x0306090C0F020508 + +.Lk_rcon: +.quad 0x1F8391B9AF9DEEB6, 0x702A98084D7C7D81 + +.Lk_s63: +.quad 0x5B5B5B5B5B5B5B5B, 0x5B5B5B5B5B5B5B5B + +.Lk_opt: +.quad 0xFF9F4929D6B66000, 0xF7974121DEBE6808 +.quad 0x01EDBD5150BCEC00, 0xE10D5DB1B05C0CE0 + +.Lk_deskew: +.quad 0x07E4A34047A4E300, 0x1DFEB95A5DBEF91A +.quad 0x5F36B5DC83EA6900, 0x2841C2ABF49D1E77 + + + + + +.Lk_dksd: +.quad 0xFEB91A5DA3E44700, 0x0740E3A45A1DBEF9 +.quad 0x41C277F4B5368300, 0x5FDC69EAAB289D1E +.Lk_dksb: +.quad 0x9A4FCA1F8550D500, 0x03D653861CC94C99 +.quad 0x115BEDA7B6FC4A00, 0xD993256F7E3482C8 +.Lk_dkse: +.quad 0xD5031CCA1FC9D600, 0x53859A4C994F5086 +.quad 0xA23196054FDC7BE8, 0xCD5EF96A20B31487 +.Lk_dks9: +.quad 0xB6116FC87ED9A700, 0x4AED933482255BFC +.quad 0x4576516227143300, 0x8BB89FACE9DAFDCE + + + + + +.Lk_dipt: +.quad 0x0F505B040B545F00, 0x154A411E114E451A +.quad 0x86E383E660056500, 0x12771772F491F194 + +.Lk_dsb9: +.quad 0x851C03539A86D600, 0xCAD51F504F994CC9 +.quad 0xC03B1789ECD74900, 0x725E2C9EB2FBA565 +.Lk_dsbd: +.quad 0x7D57CCDFE6B1A200, 0xF56E9B13882A4439 +.quad 0x3CE2FAF724C6CB00, 0x2931180D15DEEFD3 +.Lk_dsbb: +.quad 0xD022649296B44200, 0x602646F6B0F2D404 +.quad 0xC19498A6CD596700, 0xF3FF0C3E3255AA6B +.Lk_dsbe: +.quad 0x46F2929626D4D000, 0x2242600464B4F6B0 +.quad 0x0C55A6CDFFAAC100, 0x9467F36B98593E32 +.Lk_dsbo: +.quad 0x1387EA537EF94000, 0xC7AA6DB9D4943E2D +.quad 0x12D7560F93441D00, 0xCA4B8159D8C58E9C +.byte 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105,111,110,32,65,69,83,32,102,111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117,114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105,118,101,114,115,105,116,121,41,0 +.align 64 +.size _vpaes_consts,.-_vpaes_consts +#endif diff --git a/third_party/boringssl/linux-x86_64/crypto/bn/rsaz-avx2.S b/third_party/boringssl/linux-x86_64/crypto/bn/rsaz-avx2.S new file mode 100644 index 00000000000..cd334d95a68 --- /dev/null +++ b/third_party/boringssl/linux-x86_64/crypto/bn/rsaz-avx2.S @@ -0,0 +1,34 @@ +#if defined(__x86_64__) +.text + +.globl rsaz_avx2_eligible +.hidden rsaz_avx2_eligible +.type rsaz_avx2_eligible,@function +rsaz_avx2_eligible: + xorl %eax,%eax + .byte 0xf3,0xc3 +.size rsaz_avx2_eligible,.-rsaz_avx2_eligible + +.globl rsaz_1024_sqr_avx2 +.hidden rsaz_1024_sqr_avx2 +.globl rsaz_1024_mul_avx2 +.hidden rsaz_1024_mul_avx2 +.globl rsaz_1024_norm2red_avx2 +.hidden rsaz_1024_norm2red_avx2 +.globl rsaz_1024_red2norm_avx2 +.hidden rsaz_1024_red2norm_avx2 +.globl rsaz_1024_scatter5_avx2 +.hidden rsaz_1024_scatter5_avx2 +.globl rsaz_1024_gather5_avx2 +.hidden rsaz_1024_gather5_avx2 +.type rsaz_1024_sqr_avx2,@function +rsaz_1024_sqr_avx2: +rsaz_1024_mul_avx2: +rsaz_1024_norm2red_avx2: +rsaz_1024_red2norm_avx2: +rsaz_1024_scatter5_avx2: +rsaz_1024_gather5_avx2: +.byte 0x0f,0x0b + .byte 0xf3,0xc3 +.size rsaz_1024_sqr_avx2,.-rsaz_1024_sqr_avx2 +#endif diff --git a/third_party/boringssl/linux-x86_64/crypto/bn/rsaz-x86_64.S b/third_party/boringssl/linux-x86_64/crypto/bn/rsaz-x86_64.S new file mode 100644 index 00000000000..dd3d3106d16 --- /dev/null +++ b/third_party/boringssl/linux-x86_64/crypto/bn/rsaz-x86_64.S @@ -0,0 +1,1127 @@ +#if defined(__x86_64__) +.text + +.extern OPENSSL_ia32cap_P +.hidden OPENSSL_ia32cap_P + +.globl rsaz_512_sqr +.hidden rsaz_512_sqr +.type rsaz_512_sqr,@function +.align 32 +rsaz_512_sqr: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + subq $128+24,%rsp +.Lsqr_body: + movq %rdx,%rbp + movq (%rsi),%rdx + movq 8(%rsi),%rax + movq %rcx,128(%rsp) + jmp .Loop_sqr + +.align 32 +.Loop_sqr: + movl %r8d,128+8(%rsp) + + movq %rdx,%rbx + mulq %rdx + movq %rax,%r8 + movq 16(%rsi),%rax + movq %rdx,%r9 + + mulq %rbx + addq %rax,%r9 + movq 24(%rsi),%rax + movq %rdx,%r10 + adcq $0,%r10 + + mulq %rbx + addq %rax,%r10 + movq 32(%rsi),%rax + movq %rdx,%r11 + adcq $0,%r11 + + mulq %rbx + addq %rax,%r11 + movq 40(%rsi),%rax + movq %rdx,%r12 + adcq $0,%r12 + + mulq %rbx + addq %rax,%r12 + movq 48(%rsi),%rax + movq %rdx,%r13 + adcq $0,%r13 + + mulq %rbx + addq %rax,%r13 + movq 56(%rsi),%rax + movq %rdx,%r14 + adcq $0,%r14 + + mulq %rbx + addq %rax,%r14 + movq %rbx,%rax + movq %rdx,%r15 + adcq $0,%r15 + + addq %r8,%r8 + movq %r9,%rcx + adcq %r9,%r9 + + mulq %rax + movq %rax,(%rsp) + addq %rdx,%r8 + adcq $0,%r9 + + movq %r8,8(%rsp) + shrq $63,%rcx + + + movq 8(%rsi),%r8 + movq 16(%rsi),%rax + mulq %r8 + addq %rax,%r10 + movq 24(%rsi),%rax + movq %rdx,%rbx + adcq $0,%rbx + + mulq %r8 + addq %rax,%r11 + movq 32(%rsi),%rax + adcq $0,%rdx + addq %rbx,%r11 + movq %rdx,%rbx + adcq $0,%rbx + + mulq %r8 + addq %rax,%r12 + movq 40(%rsi),%rax + adcq $0,%rdx + addq %rbx,%r12 + movq %rdx,%rbx + adcq $0,%rbx + + mulq %r8 + addq %rax,%r13 + movq 48(%rsi),%rax + adcq $0,%rdx + addq %rbx,%r13 + movq %rdx,%rbx + adcq $0,%rbx + + mulq %r8 + addq %rax,%r14 + movq 56(%rsi),%rax + adcq $0,%rdx + addq %rbx,%r14 + movq %rdx,%rbx + adcq $0,%rbx + + mulq %r8 + addq %rax,%r15 + movq %r8,%rax + adcq $0,%rdx + addq %rbx,%r15 + movq %rdx,%r8 + movq %r10,%rdx + adcq $0,%r8 + + addq %rdx,%rdx + leaq (%rcx,%r10,2),%r10 + movq %r11,%rbx + adcq %r11,%r11 + + mulq %rax + addq %rax,%r9 + adcq %rdx,%r10 + adcq $0,%r11 + + movq %r9,16(%rsp) + movq %r10,24(%rsp) + shrq $63,%rbx + + + movq 16(%rsi),%r9 + movq 24(%rsi),%rax + mulq %r9 + addq %rax,%r12 + movq 32(%rsi),%rax + movq %rdx,%rcx + adcq $0,%rcx + + mulq %r9 + addq %rax,%r13 + movq 40(%rsi),%rax + adcq $0,%rdx + addq %rcx,%r13 + movq %rdx,%rcx + adcq $0,%rcx + + mulq %r9 + addq %rax,%r14 + movq 48(%rsi),%rax + adcq $0,%rdx + addq %rcx,%r14 + movq %rdx,%rcx + adcq $0,%rcx + + mulq %r9 + movq %r12,%r10 + leaq (%rbx,%r12,2),%r12 + addq %rax,%r15 + movq 56(%rsi),%rax + adcq $0,%rdx + addq %rcx,%r15 + movq %rdx,%rcx + adcq $0,%rcx + + mulq %r9 + shrq $63,%r10 + addq %rax,%r8 + movq %r9,%rax + adcq $0,%rdx + addq %rcx,%r8 + movq %rdx,%r9 + adcq $0,%r9 + + movq %r13,%rcx + leaq (%r10,%r13,2),%r13 + + mulq %rax + addq %rax,%r11 + adcq %rdx,%r12 + adcq $0,%r13 + + movq %r11,32(%rsp) + movq %r12,40(%rsp) + shrq $63,%rcx + + + movq 24(%rsi),%r10 + movq 32(%rsi),%rax + mulq %r10 + addq %rax,%r14 + movq 40(%rsi),%rax + movq %rdx,%rbx + adcq $0,%rbx + + mulq %r10 + addq %rax,%r15 + movq 48(%rsi),%rax + adcq $0,%rdx + addq %rbx,%r15 + movq %rdx,%rbx + adcq $0,%rbx + + mulq %r10 + movq %r14,%r12 + leaq (%rcx,%r14,2),%r14 + addq %rax,%r8 + movq 56(%rsi),%rax + adcq $0,%rdx + addq %rbx,%r8 + movq %rdx,%rbx + adcq $0,%rbx + + mulq %r10 + shrq $63,%r12 + addq %rax,%r9 + movq %r10,%rax + adcq $0,%rdx + addq %rbx,%r9 + movq %rdx,%r10 + adcq $0,%r10 + + movq %r15,%rbx + leaq (%r12,%r15,2),%r15 + + mulq %rax + addq %rax,%r13 + adcq %rdx,%r14 + adcq $0,%r15 + + movq %r13,48(%rsp) + movq %r14,56(%rsp) + shrq $63,%rbx + + + movq 32(%rsi),%r11 + movq 40(%rsi),%rax + mulq %r11 + addq %rax,%r8 + movq 48(%rsi),%rax + movq %rdx,%rcx + adcq $0,%rcx + + mulq %r11 + addq %rax,%r9 + movq 56(%rsi),%rax + adcq $0,%rdx + movq %r8,%r12 + leaq (%rbx,%r8,2),%r8 + addq %rcx,%r9 + movq %rdx,%rcx + adcq $0,%rcx + + mulq %r11 + shrq $63,%r12 + addq %rax,%r10 + movq %r11,%rax + adcq $0,%rdx + addq %rcx,%r10 + movq %rdx,%r11 + adcq $0,%r11 + + movq %r9,%rcx + leaq (%r12,%r9,2),%r9 + + mulq %rax + addq %rax,%r15 + adcq %rdx,%r8 + adcq $0,%r9 + + movq %r15,64(%rsp) + movq %r8,72(%rsp) + shrq $63,%rcx + + + movq 40(%rsi),%r12 + movq 48(%rsi),%rax + mulq %r12 + addq %rax,%r10 + movq 56(%rsi),%rax + movq %rdx,%rbx + adcq $0,%rbx + + mulq %r12 + addq %rax,%r11 + movq %r12,%rax + movq %r10,%r15 + leaq (%rcx,%r10,2),%r10 + adcq $0,%rdx + shrq $63,%r15 + addq %rbx,%r11 + movq %rdx,%r12 + adcq $0,%r12 + + movq %r11,%rbx + leaq (%r15,%r11,2),%r11 + + mulq %rax + addq %rax,%r9 + adcq %rdx,%r10 + adcq $0,%r11 + + movq %r9,80(%rsp) + movq %r10,88(%rsp) + + + movq 48(%rsi),%r13 + movq 56(%rsi),%rax + mulq %r13 + addq %rax,%r12 + movq %r13,%rax + movq %rdx,%r13 + adcq $0,%r13 + + xorq %r14,%r14 + shlq $1,%rbx + adcq %r12,%r12 + adcq %r13,%r13 + adcq %r14,%r14 + + mulq %rax + addq %rax,%r11 + adcq %rdx,%r12 + adcq $0,%r13 + + movq %r11,96(%rsp) + movq %r12,104(%rsp) + + + movq 56(%rsi),%rax + mulq %rax + addq %rax,%r13 + adcq $0,%rdx + + addq %rdx,%r14 + + movq %r13,112(%rsp) + movq %r14,120(%rsp) + + movq (%rsp),%r8 + movq 8(%rsp),%r9 + movq 16(%rsp),%r10 + movq 24(%rsp),%r11 + movq 32(%rsp),%r12 + movq 40(%rsp),%r13 + movq 48(%rsp),%r14 + movq 56(%rsp),%r15 + + call __rsaz_512_reduce + + addq 64(%rsp),%r8 + adcq 72(%rsp),%r9 + adcq 80(%rsp),%r10 + adcq 88(%rsp),%r11 + adcq 96(%rsp),%r12 + adcq 104(%rsp),%r13 + adcq 112(%rsp),%r14 + adcq 120(%rsp),%r15 + sbbq %rcx,%rcx + + call __rsaz_512_subtract + + movq %r8,%rdx + movq %r9,%rax + movl 128+8(%rsp),%r8d + movq %rdi,%rsi + + decl %r8d + jnz .Loop_sqr + + leaq 128+24+48(%rsp),%rax + movq -48(%rax),%r15 + movq -40(%rax),%r14 + movq -32(%rax),%r13 + movq -24(%rax),%r12 + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp +.Lsqr_epilogue: + .byte 0xf3,0xc3 +.size rsaz_512_sqr,.-rsaz_512_sqr +.globl rsaz_512_mul +.hidden rsaz_512_mul +.type rsaz_512_mul,@function +.align 32 +rsaz_512_mul: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + subq $128+24,%rsp +.Lmul_body: +.byte 102,72,15,110,199 +.byte 102,72,15,110,201 + movq %r8,128(%rsp) + movq (%rdx),%rbx + movq %rdx,%rbp + call __rsaz_512_mul + +.byte 102,72,15,126,199 +.byte 102,72,15,126,205 + + movq (%rsp),%r8 + movq 8(%rsp),%r9 + movq 16(%rsp),%r10 + movq 24(%rsp),%r11 + movq 32(%rsp),%r12 + movq 40(%rsp),%r13 + movq 48(%rsp),%r14 + movq 56(%rsp),%r15 + + call __rsaz_512_reduce + addq 64(%rsp),%r8 + adcq 72(%rsp),%r9 + adcq 80(%rsp),%r10 + adcq 88(%rsp),%r11 + adcq 96(%rsp),%r12 + adcq 104(%rsp),%r13 + adcq 112(%rsp),%r14 + adcq 120(%rsp),%r15 + sbbq %rcx,%rcx + + call __rsaz_512_subtract + + leaq 128+24+48(%rsp),%rax + movq -48(%rax),%r15 + movq -40(%rax),%r14 + movq -32(%rax),%r13 + movq -24(%rax),%r12 + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp +.Lmul_epilogue: + .byte 0xf3,0xc3 +.size rsaz_512_mul,.-rsaz_512_mul +.globl rsaz_512_mul_gather4 +.hidden rsaz_512_mul_gather4 +.type rsaz_512_mul_gather4,@function +.align 32 +rsaz_512_mul_gather4: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + movl %r9d,%r9d + subq $128+24,%rsp +.Lmul_gather4_body: + movl 64(%rdx,%r9,4),%eax +.byte 102,72,15,110,199 + movl (%rdx,%r9,4),%ebx +.byte 102,72,15,110,201 + movq %r8,128(%rsp) + + shlq $32,%rax + orq %rax,%rbx + movq (%rsi),%rax + movq 8(%rsi),%rcx + leaq 128(%rdx,%r9,4),%rbp + mulq %rbx + movq %rax,(%rsp) + movq %rcx,%rax + movq %rdx,%r8 + + mulq %rbx + movd (%rbp),%xmm4 + addq %rax,%r8 + movq 16(%rsi),%rax + movq %rdx,%r9 + adcq $0,%r9 + + mulq %rbx + movd 64(%rbp),%xmm5 + addq %rax,%r9 + movq 24(%rsi),%rax + movq %rdx,%r10 + adcq $0,%r10 + + mulq %rbx + pslldq $4,%xmm5 + addq %rax,%r10 + movq 32(%rsi),%rax + movq %rdx,%r11 + adcq $0,%r11 + + mulq %rbx + por %xmm5,%xmm4 + addq %rax,%r11 + movq 40(%rsi),%rax + movq %rdx,%r12 + adcq $0,%r12 + + mulq %rbx + addq %rax,%r12 + movq 48(%rsi),%rax + movq %rdx,%r13 + adcq $0,%r13 + + mulq %rbx + leaq 128(%rbp),%rbp + addq %rax,%r13 + movq 56(%rsi),%rax + movq %rdx,%r14 + adcq $0,%r14 + + mulq %rbx +.byte 102,72,15,126,227 + addq %rax,%r14 + movq (%rsi),%rax + movq %rdx,%r15 + adcq $0,%r15 + + leaq 8(%rsp),%rdi + movl $7,%ecx + jmp .Loop_mul_gather + +.align 32 +.Loop_mul_gather: + mulq %rbx + addq %rax,%r8 + movq 8(%rsi),%rax + movq %r8,(%rdi) + movq %rdx,%r8 + adcq $0,%r8 + + mulq %rbx + movd (%rbp),%xmm4 + addq %rax,%r9 + movq 16(%rsi),%rax + adcq $0,%rdx + addq %r9,%r8 + movq %rdx,%r9 + adcq $0,%r9 + + mulq %rbx + movd 64(%rbp),%xmm5 + addq %rax,%r10 + movq 24(%rsi),%rax + adcq $0,%rdx + addq %r10,%r9 + movq %rdx,%r10 + adcq $0,%r10 + + mulq %rbx + pslldq $4,%xmm5 + addq %rax,%r11 + movq 32(%rsi),%rax + adcq $0,%rdx + addq %r11,%r10 + movq %rdx,%r11 + adcq $0,%r11 + + mulq %rbx + por %xmm5,%xmm4 + addq %rax,%r12 + movq 40(%rsi),%rax + adcq $0,%rdx + addq %r12,%r11 + movq %rdx,%r12 + adcq $0,%r12 + + mulq %rbx + addq %rax,%r13 + movq 48(%rsi),%rax + adcq $0,%rdx + addq %r13,%r12 + movq %rdx,%r13 + adcq $0,%r13 + + mulq %rbx + addq %rax,%r14 + movq 56(%rsi),%rax + adcq $0,%rdx + addq %r14,%r13 + movq %rdx,%r14 + adcq $0,%r14 + + mulq %rbx +.byte 102,72,15,126,227 + addq %rax,%r15 + movq (%rsi),%rax + adcq $0,%rdx + addq %r15,%r14 + movq %rdx,%r15 + adcq $0,%r15 + + leaq 128(%rbp),%rbp + leaq 8(%rdi),%rdi + + decl %ecx + jnz .Loop_mul_gather + + movq %r8,(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + movq %r14,48(%rdi) + movq %r15,56(%rdi) + +.byte 102,72,15,126,199 +.byte 102,72,15,126,205 + + movq (%rsp),%r8 + movq 8(%rsp),%r9 + movq 16(%rsp),%r10 + movq 24(%rsp),%r11 + movq 32(%rsp),%r12 + movq 40(%rsp),%r13 + movq 48(%rsp),%r14 + movq 56(%rsp),%r15 + + call __rsaz_512_reduce + addq 64(%rsp),%r8 + adcq 72(%rsp),%r9 + adcq 80(%rsp),%r10 + adcq 88(%rsp),%r11 + adcq 96(%rsp),%r12 + adcq 104(%rsp),%r13 + adcq 112(%rsp),%r14 + adcq 120(%rsp),%r15 + sbbq %rcx,%rcx + + call __rsaz_512_subtract + + leaq 128+24+48(%rsp),%rax + movq -48(%rax),%r15 + movq -40(%rax),%r14 + movq -32(%rax),%r13 + movq -24(%rax),%r12 + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp +.Lmul_gather4_epilogue: + .byte 0xf3,0xc3 +.size rsaz_512_mul_gather4,.-rsaz_512_mul_gather4 +.globl rsaz_512_mul_scatter4 +.hidden rsaz_512_mul_scatter4 +.type rsaz_512_mul_scatter4,@function +.align 32 +rsaz_512_mul_scatter4: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + movl %r9d,%r9d + subq $128+24,%rsp +.Lmul_scatter4_body: + leaq (%r8,%r9,4),%r8 +.byte 102,72,15,110,199 +.byte 102,72,15,110,202 +.byte 102,73,15,110,208 + movq %rcx,128(%rsp) + + movq %rdi,%rbp + movq (%rdi),%rbx + call __rsaz_512_mul + +.byte 102,72,15,126,199 +.byte 102,72,15,126,205 + + movq (%rsp),%r8 + movq 8(%rsp),%r9 + movq 16(%rsp),%r10 + movq 24(%rsp),%r11 + movq 32(%rsp),%r12 + movq 40(%rsp),%r13 + movq 48(%rsp),%r14 + movq 56(%rsp),%r15 + + call __rsaz_512_reduce + addq 64(%rsp),%r8 + adcq 72(%rsp),%r9 + adcq 80(%rsp),%r10 + adcq 88(%rsp),%r11 + adcq 96(%rsp),%r12 + adcq 104(%rsp),%r13 + adcq 112(%rsp),%r14 + adcq 120(%rsp),%r15 +.byte 102,72,15,126,214 + sbbq %rcx,%rcx + + call __rsaz_512_subtract + + movl %r8d,0(%rsi) + shrq $32,%r8 + movl %r9d,128(%rsi) + shrq $32,%r9 + movl %r10d,256(%rsi) + shrq $32,%r10 + movl %r11d,384(%rsi) + shrq $32,%r11 + movl %r12d,512(%rsi) + shrq $32,%r12 + movl %r13d,640(%rsi) + shrq $32,%r13 + movl %r14d,768(%rsi) + shrq $32,%r14 + movl %r15d,896(%rsi) + shrq $32,%r15 + movl %r8d,64(%rsi) + movl %r9d,192(%rsi) + movl %r10d,320(%rsi) + movl %r11d,448(%rsi) + movl %r12d,576(%rsi) + movl %r13d,704(%rsi) + movl %r14d,832(%rsi) + movl %r15d,960(%rsi) + + leaq 128+24+48(%rsp),%rax + movq -48(%rax),%r15 + movq -40(%rax),%r14 + movq -32(%rax),%r13 + movq -24(%rax),%r12 + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp +.Lmul_scatter4_epilogue: + .byte 0xf3,0xc3 +.size rsaz_512_mul_scatter4,.-rsaz_512_mul_scatter4 +.globl rsaz_512_mul_by_one +.hidden rsaz_512_mul_by_one +.type rsaz_512_mul_by_one,@function +.align 32 +rsaz_512_mul_by_one: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + subq $128+24,%rsp +.Lmul_by_one_body: + movq %rdx,%rbp + movq %rcx,128(%rsp) + + movq (%rsi),%r8 + pxor %xmm0,%xmm0 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + movq 48(%rsi),%r14 + movq 56(%rsi),%r15 + + movdqa %xmm0,(%rsp) + movdqa %xmm0,16(%rsp) + movdqa %xmm0,32(%rsp) + movdqa %xmm0,48(%rsp) + movdqa %xmm0,64(%rsp) + movdqa %xmm0,80(%rsp) + movdqa %xmm0,96(%rsp) + call __rsaz_512_reduce + movq %r8,(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + movq %r14,48(%rdi) + movq %r15,56(%rdi) + + leaq 128+24+48(%rsp),%rax + movq -48(%rax),%r15 + movq -40(%rax),%r14 + movq -32(%rax),%r13 + movq -24(%rax),%r12 + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp +.Lmul_by_one_epilogue: + .byte 0xf3,0xc3 +.size rsaz_512_mul_by_one,.-rsaz_512_mul_by_one +.type __rsaz_512_reduce,@function +.align 32 +__rsaz_512_reduce: + movq %r8,%rbx + imulq 128+8(%rsp),%rbx + movq 0(%rbp),%rax + movl $8,%ecx + jmp .Lreduction_loop + +.align 32 +.Lreduction_loop: + mulq %rbx + movq 8(%rbp),%rax + negq %r8 + movq %rdx,%r8 + adcq $0,%r8 + + mulq %rbx + addq %rax,%r9 + movq 16(%rbp),%rax + adcq $0,%rdx + addq %r9,%r8 + movq %rdx,%r9 + adcq $0,%r9 + + mulq %rbx + addq %rax,%r10 + movq 24(%rbp),%rax + adcq $0,%rdx + addq %r10,%r9 + movq %rdx,%r10 + adcq $0,%r10 + + mulq %rbx + addq %rax,%r11 + movq 32(%rbp),%rax + adcq $0,%rdx + addq %r11,%r10 + movq 128+8(%rsp),%rsi + + + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbx + addq %rax,%r12 + movq 40(%rbp),%rax + adcq $0,%rdx + imulq %r8,%rsi + addq %r12,%r11 + movq %rdx,%r12 + adcq $0,%r12 + + mulq %rbx + addq %rax,%r13 + movq 48(%rbp),%rax + adcq $0,%rdx + addq %r13,%r12 + movq %rdx,%r13 + adcq $0,%r13 + + mulq %rbx + addq %rax,%r14 + movq 56(%rbp),%rax + adcq $0,%rdx + addq %r14,%r13 + movq %rdx,%r14 + adcq $0,%r14 + + mulq %rbx + movq %rsi,%rbx + addq %rax,%r15 + movq 0(%rbp),%rax + adcq $0,%rdx + addq %r15,%r14 + movq %rdx,%r15 + adcq $0,%r15 + + decl %ecx + jne .Lreduction_loop + + .byte 0xf3,0xc3 +.size __rsaz_512_reduce,.-__rsaz_512_reduce +.type __rsaz_512_subtract,@function +.align 32 +__rsaz_512_subtract: + movq %r8,(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + movq %r14,48(%rdi) + movq %r15,56(%rdi) + + movq 0(%rbp),%r8 + movq 8(%rbp),%r9 + negq %r8 + notq %r9 + andq %rcx,%r8 + movq 16(%rbp),%r10 + andq %rcx,%r9 + notq %r10 + movq 24(%rbp),%r11 + andq %rcx,%r10 + notq %r11 + movq 32(%rbp),%r12 + andq %rcx,%r11 + notq %r12 + movq 40(%rbp),%r13 + andq %rcx,%r12 + notq %r13 + movq 48(%rbp),%r14 + andq %rcx,%r13 + notq %r14 + movq 56(%rbp),%r15 + andq %rcx,%r14 + notq %r15 + andq %rcx,%r15 + + addq (%rdi),%r8 + adcq 8(%rdi),%r9 + adcq 16(%rdi),%r10 + adcq 24(%rdi),%r11 + adcq 32(%rdi),%r12 + adcq 40(%rdi),%r13 + adcq 48(%rdi),%r14 + adcq 56(%rdi),%r15 + + movq %r8,(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + movq %r14,48(%rdi) + movq %r15,56(%rdi) + + .byte 0xf3,0xc3 +.size __rsaz_512_subtract,.-__rsaz_512_subtract +.type __rsaz_512_mul,@function +.align 32 +__rsaz_512_mul: + leaq 8(%rsp),%rdi + + movq (%rsi),%rax + mulq %rbx + movq %rax,(%rdi) + movq 8(%rsi),%rax + movq %rdx,%r8 + + mulq %rbx + addq %rax,%r8 + movq 16(%rsi),%rax + movq %rdx,%r9 + adcq $0,%r9 + + mulq %rbx + addq %rax,%r9 + movq 24(%rsi),%rax + movq %rdx,%r10 + adcq $0,%r10 + + mulq %rbx + addq %rax,%r10 + movq 32(%rsi),%rax + movq %rdx,%r11 + adcq $0,%r11 + + mulq %rbx + addq %rax,%r11 + movq 40(%rsi),%rax + movq %rdx,%r12 + adcq $0,%r12 + + mulq %rbx + addq %rax,%r12 + movq 48(%rsi),%rax + movq %rdx,%r13 + adcq $0,%r13 + + mulq %rbx + addq %rax,%r13 + movq 56(%rsi),%rax + movq %rdx,%r14 + adcq $0,%r14 + + mulq %rbx + addq %rax,%r14 + movq (%rsi),%rax + movq %rdx,%r15 + adcq $0,%r15 + + leaq 8(%rbp),%rbp + leaq 8(%rdi),%rdi + + movl $7,%ecx + jmp .Loop_mul + +.align 32 +.Loop_mul: + movq (%rbp),%rbx + mulq %rbx + addq %rax,%r8 + movq 8(%rsi),%rax + movq %r8,(%rdi) + movq %rdx,%r8 + adcq $0,%r8 + + mulq %rbx + addq %rax,%r9 + movq 16(%rsi),%rax + adcq $0,%rdx + addq %r9,%r8 + movq %rdx,%r9 + adcq $0,%r9 + + mulq %rbx + addq %rax,%r10 + movq 24(%rsi),%rax + adcq $0,%rdx + addq %r10,%r9 + movq %rdx,%r10 + adcq $0,%r10 + + mulq %rbx + addq %rax,%r11 + movq 32(%rsi),%rax + adcq $0,%rdx + addq %r11,%r10 + movq %rdx,%r11 + adcq $0,%r11 + + mulq %rbx + addq %rax,%r12 + movq 40(%rsi),%rax + adcq $0,%rdx + addq %r12,%r11 + movq %rdx,%r12 + adcq $0,%r12 + + mulq %rbx + addq %rax,%r13 + movq 48(%rsi),%rax + adcq $0,%rdx + addq %r13,%r12 + movq %rdx,%r13 + adcq $0,%r13 + + mulq %rbx + addq %rax,%r14 + movq 56(%rsi),%rax + adcq $0,%rdx + addq %r14,%r13 + movq %rdx,%r14 + leaq 8(%rbp),%rbp + adcq $0,%r14 + + mulq %rbx + addq %rax,%r15 + movq (%rsi),%rax + adcq $0,%rdx + addq %r15,%r14 + movq %rdx,%r15 + adcq $0,%r15 + + leaq 8(%rdi),%rdi + + decl %ecx + jnz .Loop_mul + + movq %r8,(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + movq %r14,48(%rdi) + movq %r15,56(%rdi) + + .byte 0xf3,0xc3 +.size __rsaz_512_mul,.-__rsaz_512_mul +.globl rsaz_512_scatter4 +.hidden rsaz_512_scatter4 +.type rsaz_512_scatter4,@function +.align 16 +rsaz_512_scatter4: + leaq (%rdi,%rdx,4),%rdi + movl $8,%r9d + jmp .Loop_scatter +.align 16 +.Loop_scatter: + movq (%rsi),%rax + leaq 8(%rsi),%rsi + movl %eax,(%rdi) + shrq $32,%rax + movl %eax,64(%rdi) + leaq 128(%rdi),%rdi + decl %r9d + jnz .Loop_scatter + .byte 0xf3,0xc3 +.size rsaz_512_scatter4,.-rsaz_512_scatter4 + +.globl rsaz_512_gather4 +.hidden rsaz_512_gather4 +.type rsaz_512_gather4,@function +.align 16 +rsaz_512_gather4: + leaq (%rsi,%rdx,4),%rsi + movl $8,%r9d + jmp .Loop_gather +.align 16 +.Loop_gather: + movl (%rsi),%eax + movl 64(%rsi),%r8d + leaq 128(%rsi),%rsi + shlq $32,%r8 + orq %r8,%rax + movq %rax,(%rdi) + leaq 8(%rdi),%rdi + decl %r9d + jnz .Loop_gather + .byte 0xf3,0xc3 +.size rsaz_512_gather4,.-rsaz_512_gather4 +#endif diff --git a/third_party/boringssl/linux-x86_64/crypto/bn/x86_64-mont.S b/third_party/boringssl/linux-x86_64/crypto/bn/x86_64-mont.S new file mode 100644 index 00000000000..4d401c6743f --- /dev/null +++ b/third_party/boringssl/linux-x86_64/crypto/bn/x86_64-mont.S @@ -0,0 +1,728 @@ +#if defined(__x86_64__) +.text + +.extern OPENSSL_ia32cap_P +.hidden OPENSSL_ia32cap_P + +.globl bn_mul_mont +.hidden bn_mul_mont +.type bn_mul_mont,@function +.align 16 +bn_mul_mont: + testl $3,%r9d + jnz .Lmul_enter + cmpl $8,%r9d + jb .Lmul_enter + cmpq %rsi,%rdx + jne .Lmul4x_enter + testl $7,%r9d + jz .Lsqr8x_enter + jmp .Lmul4x_enter + +.align 16 +.Lmul_enter: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + movl %r9d,%r9d + leaq 2(%r9),%r10 + movq %rsp,%r11 + negq %r10 + leaq (%rsp,%r10,8),%rsp + andq $-1024,%rsp + + movq %r11,8(%rsp,%r9,8) +.Lmul_body: + movq %rdx,%r12 + movq (%r8),%r8 + movq (%r12),%rbx + movq (%rsi),%rax + + xorq %r14,%r14 + xorq %r15,%r15 + + movq %r8,%rbp + mulq %rbx + movq %rax,%r10 + movq (%rcx),%rax + + imulq %r10,%rbp + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r10 + movq 8(%rsi),%rax + adcq $0,%rdx + movq %rdx,%r13 + + leaq 1(%r15),%r15 + jmp .L1st_enter + +.align 16 +.L1st: + addq %rax,%r13 + movq (%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r11,%r13 + movq %r10,%r11 + adcq $0,%rdx + movq %r13,-16(%rsp,%r15,8) + movq %rdx,%r13 + +.L1st_enter: + mulq %rbx + addq %rax,%r11 + movq (%rcx,%r15,8),%rax + adcq $0,%rdx + leaq 1(%r15),%r15 + movq %rdx,%r10 + + mulq %rbp + cmpq %r9,%r15 + jne .L1st + + addq %rax,%r13 + movq (%rsi),%rax + adcq $0,%rdx + addq %r11,%r13 + adcq $0,%rdx + movq %r13,-16(%rsp,%r15,8) + movq %rdx,%r13 + movq %r10,%r11 + + xorq %rdx,%rdx + addq %r11,%r13 + adcq $0,%rdx + movq %r13,-8(%rsp,%r9,8) + movq %rdx,(%rsp,%r9,8) + + leaq 1(%r14),%r14 + jmp .Louter +.align 16 +.Louter: + movq (%r12,%r14,8),%rbx + xorq %r15,%r15 + movq %r8,%rbp + movq (%rsp),%r10 + mulq %rbx + addq %rax,%r10 + movq (%rcx),%rax + adcq $0,%rdx + + imulq %r10,%rbp + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r10 + movq 8(%rsi),%rax + adcq $0,%rdx + movq 8(%rsp),%r10 + movq %rdx,%r13 + + leaq 1(%r15),%r15 + jmp .Linner_enter + +.align 16 +.Linner: + addq %rax,%r13 + movq (%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r10,%r13 + movq (%rsp,%r15,8),%r10 + adcq $0,%rdx + movq %r13,-16(%rsp,%r15,8) + movq %rdx,%r13 + +.Linner_enter: + mulq %rbx + addq %rax,%r11 + movq (%rcx,%r15,8),%rax + adcq $0,%rdx + addq %r11,%r10 + movq %rdx,%r11 + adcq $0,%r11 + leaq 1(%r15),%r15 + + mulq %rbp + cmpq %r9,%r15 + jne .Linner + + addq %rax,%r13 + movq (%rsi),%rax + adcq $0,%rdx + addq %r10,%r13 + movq (%rsp,%r15,8),%r10 + adcq $0,%rdx + movq %r13,-16(%rsp,%r15,8) + movq %rdx,%r13 + + xorq %rdx,%rdx + addq %r11,%r13 + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-8(%rsp,%r9,8) + movq %rdx,(%rsp,%r9,8) + + leaq 1(%r14),%r14 + cmpq %r9,%r14 + jb .Louter + + xorq %r14,%r14 + movq (%rsp),%rax + leaq (%rsp),%rsi + movq %r9,%r15 + jmp .Lsub +.align 16 +.Lsub: sbbq (%rcx,%r14,8),%rax + movq %rax,(%rdi,%r14,8) + movq 8(%rsi,%r14,8),%rax + leaq 1(%r14),%r14 + decq %r15 + jnz .Lsub + + sbbq $0,%rax + xorq %r14,%r14 + movq %r9,%r15 +.align 16 +.Lcopy: + movq (%rsp,%r14,8),%rsi + movq (%rdi,%r14,8),%rcx + xorq %rcx,%rsi + andq %rax,%rsi + xorq %rcx,%rsi + movq %r14,(%rsp,%r14,8) + movq %rsi,(%rdi,%r14,8) + leaq 1(%r14),%r14 + subq $1,%r15 + jnz .Lcopy + + movq 8(%rsp,%r9,8),%rsi + movq $1,%rax + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +.Lmul_epilogue: + .byte 0xf3,0xc3 +.size bn_mul_mont,.-bn_mul_mont +.type bn_mul4x_mont,@function +.align 16 +bn_mul4x_mont: +.Lmul4x_enter: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + movl %r9d,%r9d + leaq 4(%r9),%r10 + movq %rsp,%r11 + negq %r10 + leaq (%rsp,%r10,8),%rsp + andq $-1024,%rsp + + movq %r11,8(%rsp,%r9,8) +.Lmul4x_body: + movq %rdi,16(%rsp,%r9,8) + movq %rdx,%r12 + movq (%r8),%r8 + movq (%r12),%rbx + movq (%rsi),%rax + + xorq %r14,%r14 + xorq %r15,%r15 + + movq %r8,%rbp + mulq %rbx + movq %rax,%r10 + movq (%rcx),%rax + + imulq %r10,%rbp + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r10 + movq 8(%rsi),%rax + adcq $0,%rdx + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq 8(%rcx),%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq 16(%rsi),%rax + adcq $0,%rdx + addq %r11,%rdi + leaq 4(%r15),%r15 + adcq $0,%rdx + movq %rdi,(%rsp) + movq %rdx,%r13 + jmp .L1st4x +.align 16 +.L1st4x: + mulq %rbx + addq %rax,%r10 + movq -16(%rcx,%r15,8),%rax + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq -8(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-24(%rsp,%r15,8) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq -8(%rcx,%r15,8),%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq (%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %rdi,-16(%rsp,%r15,8) + movq %rdx,%r13 + + mulq %rbx + addq %rax,%r10 + movq (%rcx,%r15,8),%rax + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq 8(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-8(%rsp,%r15,8) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq 8(%rcx,%r15,8),%rax + adcq $0,%rdx + leaq 4(%r15),%r15 + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq -16(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %rdi,-32(%rsp,%r15,8) + movq %rdx,%r13 + cmpq %r9,%r15 + jb .L1st4x + + mulq %rbx + addq %rax,%r10 + movq -16(%rcx,%r15,8),%rax + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq -8(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-24(%rsp,%r15,8) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq -8(%rcx,%r15,8),%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq (%rsi),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %rdi,-16(%rsp,%r15,8) + movq %rdx,%r13 + + xorq %rdi,%rdi + addq %r10,%r13 + adcq $0,%rdi + movq %r13,-8(%rsp,%r15,8) + movq %rdi,(%rsp,%r15,8) + + leaq 1(%r14),%r14 +.align 4 +.Louter4x: + movq (%r12,%r14,8),%rbx + xorq %r15,%r15 + movq (%rsp),%r10 + movq %r8,%rbp + mulq %rbx + addq %rax,%r10 + movq (%rcx),%rax + adcq $0,%rdx + + imulq %r10,%rbp + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r10 + movq 8(%rsi),%rax + adcq $0,%rdx + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq 8(%rcx),%rax + adcq $0,%rdx + addq 8(%rsp),%r11 + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq 16(%rsi),%rax + adcq $0,%rdx + addq %r11,%rdi + leaq 4(%r15),%r15 + adcq $0,%rdx + movq %rdi,(%rsp) + movq %rdx,%r13 + jmp .Linner4x +.align 16 +.Linner4x: + mulq %rbx + addq %rax,%r10 + movq -16(%rcx,%r15,8),%rax + adcq $0,%rdx + addq -16(%rsp,%r15,8),%r10 + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq -8(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-24(%rsp,%r15,8) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq -8(%rcx,%r15,8),%rax + adcq $0,%rdx + addq -8(%rsp,%r15,8),%r11 + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq (%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %rdi,-16(%rsp,%r15,8) + movq %rdx,%r13 + + mulq %rbx + addq %rax,%r10 + movq (%rcx,%r15,8),%rax + adcq $0,%rdx + addq (%rsp,%r15,8),%r10 + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq 8(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-8(%rsp,%r15,8) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq 8(%rcx,%r15,8),%rax + adcq $0,%rdx + addq 8(%rsp,%r15,8),%r11 + adcq $0,%rdx + leaq 4(%r15),%r15 + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq -16(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %rdi,-32(%rsp,%r15,8) + movq %rdx,%r13 + cmpq %r9,%r15 + jb .Linner4x + + mulq %rbx + addq %rax,%r10 + movq -16(%rcx,%r15,8),%rax + adcq $0,%rdx + addq -16(%rsp,%r15,8),%r10 + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq -8(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-24(%rsp,%r15,8) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq -8(%rcx,%r15,8),%rax + adcq $0,%rdx + addq -8(%rsp,%r15,8),%r11 + adcq $0,%rdx + leaq 1(%r14),%r14 + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq (%rsi),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %rdi,-16(%rsp,%r15,8) + movq %rdx,%r13 + + xorq %rdi,%rdi + addq %r10,%r13 + adcq $0,%rdi + addq (%rsp,%r9,8),%r13 + adcq $0,%rdi + movq %r13,-8(%rsp,%r15,8) + movq %rdi,(%rsp,%r15,8) + + cmpq %r9,%r14 + jb .Louter4x + movq 16(%rsp,%r9,8),%rdi + movq 0(%rsp),%rax + movq 8(%rsp),%rdx + shrq $2,%r9 + leaq (%rsp),%rsi + xorq %r14,%r14 + + subq 0(%rcx),%rax + movq 16(%rsi),%rbx + movq 24(%rsi),%rbp + sbbq 8(%rcx),%rdx + leaq -1(%r9),%r15 + jmp .Lsub4x +.align 16 +.Lsub4x: + movq %rax,0(%rdi,%r14,8) + movq %rdx,8(%rdi,%r14,8) + sbbq 16(%rcx,%r14,8),%rbx + movq 32(%rsi,%r14,8),%rax + movq 40(%rsi,%r14,8),%rdx + sbbq 24(%rcx,%r14,8),%rbp + movq %rbx,16(%rdi,%r14,8) + movq %rbp,24(%rdi,%r14,8) + sbbq 32(%rcx,%r14,8),%rax + movq 48(%rsi,%r14,8),%rbx + movq 56(%rsi,%r14,8),%rbp + sbbq 40(%rcx,%r14,8),%rdx + leaq 4(%r14),%r14 + decq %r15 + jnz .Lsub4x + + movq %rax,0(%rdi,%r14,8) + movq 32(%rsi,%r14,8),%rax + sbbq 16(%rcx,%r14,8),%rbx + movq %rdx,8(%rdi,%r14,8) + sbbq 24(%rcx,%r14,8),%rbp + movq %rbx,16(%rdi,%r14,8) + + sbbq $0,%rax + movq %rax,%xmm0 + punpcklqdq %xmm0,%xmm0 + movq %rbp,24(%rdi,%r14,8) + xorq %r14,%r14 + + movq %r9,%r15 + pxor %xmm5,%xmm5 + jmp .Lcopy4x +.align 16 +.Lcopy4x: + movdqu (%rsp,%r14,1),%xmm2 + movdqu 16(%rsp,%r14,1),%xmm4 + movdqu (%rdi,%r14,1),%xmm1 + movdqu 16(%rdi,%r14,1),%xmm3 + pxor %xmm1,%xmm2 + pxor %xmm3,%xmm4 + pand %xmm0,%xmm2 + pand %xmm0,%xmm4 + pxor %xmm1,%xmm2 + pxor %xmm3,%xmm4 + movdqu %xmm2,(%rdi,%r14,1) + movdqu %xmm4,16(%rdi,%r14,1) + movdqa %xmm5,(%rsp,%r14,1) + movdqa %xmm5,16(%rsp,%r14,1) + + leaq 32(%r14),%r14 + decq %r15 + jnz .Lcopy4x + + shlq $2,%r9 + movq 8(%rsp,%r9,8),%rsi + movq $1,%rax + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +.Lmul4x_epilogue: + .byte 0xf3,0xc3 +.size bn_mul4x_mont,.-bn_mul4x_mont +.extern bn_sqr8x_internal +.hidden bn_sqr8x_internal + +.type bn_sqr8x_mont,@function +.align 32 +bn_sqr8x_mont: +.Lsqr8x_enter: + movq %rsp,%rax + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + movl %r9d,%r10d + shll $3,%r9d + shlq $3+2,%r10 + negq %r9 + + + + + + + leaq -64(%rsp,%r9,4),%r11 + movq (%r8),%r8 + subq %rsi,%r11 + andq $4095,%r11 + cmpq %r11,%r10 + jb .Lsqr8x_sp_alt + subq %r11,%rsp + leaq -64(%rsp,%r9,4),%rsp + jmp .Lsqr8x_sp_done + +.align 32 +.Lsqr8x_sp_alt: + leaq 4096-64(,%r9,4),%r10 + leaq -64(%rsp,%r9,4),%rsp + subq %r10,%r11 + movq $0,%r10 + cmovcq %r10,%r11 + subq %r11,%rsp +.Lsqr8x_sp_done: + andq $-64,%rsp + movq %r9,%r10 + negq %r9 + + leaq 64(%rsp,%r9,2),%r11 + movq %r8,32(%rsp) + movq %rax,40(%rsp) +.Lsqr8x_body: + + movq %r9,%rbp +.byte 102,73,15,110,211 + shrq $3+2,%rbp + movl OPENSSL_ia32cap_P+8(%rip),%eax + jmp .Lsqr8x_copy_n + +.align 32 +.Lsqr8x_copy_n: + movq 0(%rcx),%xmm0 + movq 8(%rcx),%xmm1 + movq 16(%rcx),%xmm3 + movq 24(%rcx),%xmm4 + leaq 32(%rcx),%rcx + movdqa %xmm0,0(%r11) + movdqa %xmm1,16(%r11) + movdqa %xmm3,32(%r11) + movdqa %xmm4,48(%r11) + leaq 64(%r11),%r11 + decq %rbp + jnz .Lsqr8x_copy_n + + pxor %xmm0,%xmm0 +.byte 102,72,15,110,207 +.byte 102,73,15,110,218 + call bn_sqr8x_internal + + pxor %xmm0,%xmm0 + leaq 48(%rsp),%rax + leaq 64(%rsp,%r9,2),%rdx + shrq $3+2,%r9 + movq 40(%rsp),%rsi + jmp .Lsqr8x_zero + +.align 32 +.Lsqr8x_zero: + movdqa %xmm0,0(%rax) + movdqa %xmm0,16(%rax) + movdqa %xmm0,32(%rax) + movdqa %xmm0,48(%rax) + leaq 64(%rax),%rax + movdqa %xmm0,0(%rdx) + movdqa %xmm0,16(%rdx) + movdqa %xmm0,32(%rdx) + movdqa %xmm0,48(%rdx) + leaq 64(%rdx),%rdx + decq %r9 + jnz .Lsqr8x_zero + + movq $1,%rax + movq -48(%rsi),%r15 + movq -40(%rsi),%r14 + movq -32(%rsi),%r13 + movq -24(%rsi),%r12 + movq -16(%rsi),%rbp + movq -8(%rsi),%rbx + leaq (%rsi),%rsp +.Lsqr8x_epilogue: + .byte 0xf3,0xc3 +.size bn_sqr8x_mont,.-bn_sqr8x_mont +.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 16 +#endif diff --git a/third_party/boringssl/linux-x86_64/crypto/bn/x86_64-mont5.S b/third_party/boringssl/linux-x86_64/crypto/bn/x86_64-mont5.S new file mode 100644 index 00000000000..02edc69b369 --- /dev/null +++ b/third_party/boringssl/linux-x86_64/crypto/bn/x86_64-mont5.S @@ -0,0 +1,1823 @@ +#if defined(__x86_64__) +.text + +.extern OPENSSL_ia32cap_P +.hidden OPENSSL_ia32cap_P + +.globl bn_mul_mont_gather5 +.hidden bn_mul_mont_gather5 +.type bn_mul_mont_gather5,@function +.align 64 +bn_mul_mont_gather5: + testl $7,%r9d + jnz .Lmul_enter + jmp .Lmul4x_enter + +.align 16 +.Lmul_enter: + movl %r9d,%r9d + movq %rsp,%rax + movl 8(%rsp),%r10d + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + leaq 2(%r9),%r11 + negq %r11 + leaq (%rsp,%r11,8),%rsp + andq $-1024,%rsp + + movq %rax,8(%rsp,%r9,8) +.Lmul_body: + movq %rdx,%r12 + movq %r10,%r11 + shrq $3,%r10 + andq $7,%r11 + notq %r10 + leaq .Lmagic_masks(%rip),%rax + andq $3,%r10 + leaq 96(%r12,%r11,8),%r12 + movq 0(%rax,%r10,8),%xmm4 + movq 8(%rax,%r10,8),%xmm5 + movq 16(%rax,%r10,8),%xmm6 + movq 24(%rax,%r10,8),%xmm7 + + movq -96(%r12),%xmm0 + movq -32(%r12),%xmm1 + pand %xmm4,%xmm0 + movq 32(%r12),%xmm2 + pand %xmm5,%xmm1 + movq 96(%r12),%xmm3 + pand %xmm6,%xmm2 + por %xmm1,%xmm0 + pand %xmm7,%xmm3 + por %xmm2,%xmm0 + leaq 256(%r12),%r12 + por %xmm3,%xmm0 + +.byte 102,72,15,126,195 + + movq (%r8),%r8 + movq (%rsi),%rax + + xorq %r14,%r14 + xorq %r15,%r15 + + movq -96(%r12),%xmm0 + movq -32(%r12),%xmm1 + pand %xmm4,%xmm0 + movq 32(%r12),%xmm2 + pand %xmm5,%xmm1 + + movq %r8,%rbp + mulq %rbx + movq %rax,%r10 + movq (%rcx),%rax + + movq 96(%r12),%xmm3 + pand %xmm6,%xmm2 + por %xmm1,%xmm0 + pand %xmm7,%xmm3 + + imulq %r10,%rbp + movq %rdx,%r11 + + por %xmm2,%xmm0 + leaq 256(%r12),%r12 + por %xmm3,%xmm0 + + mulq %rbp + addq %rax,%r10 + movq 8(%rsi),%rax + adcq $0,%rdx + movq %rdx,%r13 + + leaq 1(%r15),%r15 + jmp .L1st_enter + +.align 16 +.L1st: + addq %rax,%r13 + movq (%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r11,%r13 + movq %r10,%r11 + adcq $0,%rdx + movq %r13,-16(%rsp,%r15,8) + movq %rdx,%r13 + +.L1st_enter: + mulq %rbx + addq %rax,%r11 + movq (%rcx,%r15,8),%rax + adcq $0,%rdx + leaq 1(%r15),%r15 + movq %rdx,%r10 + + mulq %rbp + cmpq %r9,%r15 + jne .L1st + +.byte 102,72,15,126,195 + + addq %rax,%r13 + movq (%rsi),%rax + adcq $0,%rdx + addq %r11,%r13 + adcq $0,%rdx + movq %r13,-16(%rsp,%r15,8) + movq %rdx,%r13 + movq %r10,%r11 + + xorq %rdx,%rdx + addq %r11,%r13 + adcq $0,%rdx + movq %r13,-8(%rsp,%r9,8) + movq %rdx,(%rsp,%r9,8) + + leaq 1(%r14),%r14 + jmp .Louter +.align 16 +.Louter: + xorq %r15,%r15 + movq %r8,%rbp + movq (%rsp),%r10 + + movq -96(%r12),%xmm0 + movq -32(%r12),%xmm1 + pand %xmm4,%xmm0 + movq 32(%r12),%xmm2 + pand %xmm5,%xmm1 + + mulq %rbx + addq %rax,%r10 + movq (%rcx),%rax + adcq $0,%rdx + + movq 96(%r12),%xmm3 + pand %xmm6,%xmm2 + por %xmm1,%xmm0 + pand %xmm7,%xmm3 + + imulq %r10,%rbp + movq %rdx,%r11 + + por %xmm2,%xmm0 + leaq 256(%r12),%r12 + por %xmm3,%xmm0 + + mulq %rbp + addq %rax,%r10 + movq 8(%rsi),%rax + adcq $0,%rdx + movq 8(%rsp),%r10 + movq %rdx,%r13 + + leaq 1(%r15),%r15 + jmp .Linner_enter + +.align 16 +.Linner: + addq %rax,%r13 + movq (%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r10,%r13 + movq (%rsp,%r15,8),%r10 + adcq $0,%rdx + movq %r13,-16(%rsp,%r15,8) + movq %rdx,%r13 + +.Linner_enter: + mulq %rbx + addq %rax,%r11 + movq (%rcx,%r15,8),%rax + adcq $0,%rdx + addq %r11,%r10 + movq %rdx,%r11 + adcq $0,%r11 + leaq 1(%r15),%r15 + + mulq %rbp + cmpq %r9,%r15 + jne .Linner + +.byte 102,72,15,126,195 + + addq %rax,%r13 + movq (%rsi),%rax + adcq $0,%rdx + addq %r10,%r13 + movq (%rsp,%r15,8),%r10 + adcq $0,%rdx + movq %r13,-16(%rsp,%r15,8) + movq %rdx,%r13 + + xorq %rdx,%rdx + addq %r11,%r13 + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-8(%rsp,%r9,8) + movq %rdx,(%rsp,%r9,8) + + leaq 1(%r14),%r14 + cmpq %r9,%r14 + jb .Louter + + xorq %r14,%r14 + movq (%rsp),%rax + leaq (%rsp),%rsi + movq %r9,%r15 + jmp .Lsub +.align 16 +.Lsub: sbbq (%rcx,%r14,8),%rax + movq %rax,(%rdi,%r14,8) + movq 8(%rsi,%r14,8),%rax + leaq 1(%r14),%r14 + decq %r15 + jnz .Lsub + + sbbq $0,%rax + xorq %r14,%r14 + movq %r9,%r15 +.align 16 +.Lcopy: + movq (%rsp,%r14,8),%rsi + movq (%rdi,%r14,8),%rcx + xorq %rcx,%rsi + andq %rax,%rsi + xorq %rcx,%rsi + movq %r14,(%rsp,%r14,8) + movq %rsi,(%rdi,%r14,8) + leaq 1(%r14),%r14 + subq $1,%r15 + jnz .Lcopy + + movq 8(%rsp,%r9,8),%rsi + movq $1,%rax + movq -48(%rsi),%r15 + movq -40(%rsi),%r14 + movq -32(%rsi),%r13 + movq -24(%rsi),%r12 + movq -16(%rsi),%rbp + movq -8(%rsi),%rbx + leaq (%rsi),%rsp +.Lmul_epilogue: + .byte 0xf3,0xc3 +.size bn_mul_mont_gather5,.-bn_mul_mont_gather5 +.type bn_mul4x_mont_gather5,@function +.align 32 +bn_mul4x_mont_gather5: +.Lmul4x_enter: +.byte 0x67 + movq %rsp,%rax + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 +.byte 0x67 + movl %r9d,%r10d + shll $3,%r9d + shll $3+2,%r10d + negq %r9 + + + + + + + + + leaq -64(%rsp,%r9,2),%r11 + subq %rsi,%r11 + andq $4095,%r11 + cmpq %r11,%r10 + jb .Lmul4xsp_alt + subq %r11,%rsp + leaq -64(%rsp,%r9,2),%rsp + jmp .Lmul4xsp_done + +.align 32 +.Lmul4xsp_alt: + leaq 4096-64(,%r9,2),%r10 + leaq -64(%rsp,%r9,2),%rsp + subq %r10,%r11 + movq $0,%r10 + cmovcq %r10,%r11 + subq %r11,%rsp +.Lmul4xsp_done: + andq $-64,%rsp + negq %r9 + + movq %rax,40(%rsp) +.Lmul4x_body: + + call mul4x_internal + + movq 40(%rsp),%rsi + movq $1,%rax + movq -48(%rsi),%r15 + movq -40(%rsi),%r14 + movq -32(%rsi),%r13 + movq -24(%rsi),%r12 + movq -16(%rsi),%rbp + movq -8(%rsi),%rbx + leaq (%rsi),%rsp +.Lmul4x_epilogue: + .byte 0xf3,0xc3 +.size bn_mul4x_mont_gather5,.-bn_mul4x_mont_gather5 + +.type mul4x_internal,@function +.align 32 +mul4x_internal: + shlq $5,%r9 + movl 8(%rax),%r10d + leaq 256(%rdx,%r9,1),%r13 + shrq $5,%r9 + movq %r10,%r11 + shrq $3,%r10 + andq $7,%r11 + notq %r10 + leaq .Lmagic_masks(%rip),%rax + andq $3,%r10 + leaq 96(%rdx,%r11,8),%r12 + movq 0(%rax,%r10,8),%xmm4 + movq 8(%rax,%r10,8),%xmm5 + addq $7,%r11 + movq 16(%rax,%r10,8),%xmm6 + movq 24(%rax,%r10,8),%xmm7 + andq $7,%r11 + + movq -96(%r12),%xmm0 + leaq 256(%r12),%r14 + movq -32(%r12),%xmm1 + pand %xmm4,%xmm0 + movq 32(%r12),%xmm2 + pand %xmm5,%xmm1 + movq 96(%r12),%xmm3 + pand %xmm6,%xmm2 +.byte 0x67 + por %xmm1,%xmm0 + movq -96(%r14),%xmm1 +.byte 0x67 + pand %xmm7,%xmm3 +.byte 0x67 + por %xmm2,%xmm0 + movq -32(%r14),%xmm2 +.byte 0x67 + pand %xmm4,%xmm1 +.byte 0x67 + por %xmm3,%xmm0 + movq 32(%r14),%xmm3 + +.byte 102,72,15,126,195 + movq 96(%r14),%xmm0 + movq %r13,16+8(%rsp) + movq %rdi,56+8(%rsp) + + movq (%r8),%r8 + movq (%rsi),%rax + leaq (%rsi,%r9,1),%rsi + negq %r9 + + movq %r8,%rbp + mulq %rbx + movq %rax,%r10 + movq (%rcx),%rax + + pand %xmm5,%xmm2 + pand %xmm6,%xmm3 + por %xmm2,%xmm1 + + imulq %r10,%rbp + + + + + + + + leaq 64+8(%rsp,%r11,8),%r14 + movq %rdx,%r11 + + pand %xmm7,%xmm0 + por %xmm3,%xmm1 + leaq 512(%r12),%r12 + por %xmm1,%xmm0 + + mulq %rbp + addq %rax,%r10 + movq 8(%rsi,%r9,1),%rax + adcq $0,%rdx + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq 16(%rcx),%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq 16(%rsi,%r9,1),%rax + adcq $0,%rdx + addq %r11,%rdi + leaq 32(%r9),%r15 + leaq 64(%rcx),%rcx + adcq $0,%rdx + movq %rdi,(%r14) + movq %rdx,%r13 + jmp .L1st4x + +.align 32 +.L1st4x: + mulq %rbx + addq %rax,%r10 + movq -32(%rcx),%rax + leaq 32(%r14),%r14 + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq -8(%rsi,%r15,1),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-24(%r14) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq -16(%rcx),%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq (%rsi,%r15,1),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %rdi,-16(%r14) + movq %rdx,%r13 + + mulq %rbx + addq %rax,%r10 + movq 0(%rcx),%rax + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq 8(%rsi,%r15,1),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-8(%r14) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq 16(%rcx),%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq 16(%rsi,%r15,1),%rax + adcq $0,%rdx + addq %r11,%rdi + leaq 64(%rcx),%rcx + adcq $0,%rdx + movq %rdi,(%r14) + movq %rdx,%r13 + + addq $32,%r15 + jnz .L1st4x + + mulq %rbx + addq %rax,%r10 + movq -32(%rcx),%rax + leaq 32(%r14),%r14 + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq -8(%rsi),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-24(%r14) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq -16(%rcx),%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq (%rsi,%r9,1),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %rdi,-16(%r14) + movq %rdx,%r13 + +.byte 102,72,15,126,195 + leaq (%rcx,%r9,2),%rcx + + xorq %rdi,%rdi + addq %r10,%r13 + adcq $0,%rdi + movq %r13,-8(%r14) + + jmp .Louter4x + +.align 32 +.Louter4x: + movq (%r14,%r9,1),%r10 + movq %r8,%rbp + mulq %rbx + addq %rax,%r10 + movq (%rcx),%rax + adcq $0,%rdx + + movq -96(%r12),%xmm0 + movq -32(%r12),%xmm1 + pand %xmm4,%xmm0 + movq 32(%r12),%xmm2 + pand %xmm5,%xmm1 + movq 96(%r12),%xmm3 + + imulq %r10,%rbp +.byte 0x67 + movq %rdx,%r11 + movq %rdi,(%r14) + + pand %xmm6,%xmm2 + por %xmm1,%xmm0 + pand %xmm7,%xmm3 + por %xmm2,%xmm0 + leaq (%r14,%r9,1),%r14 + leaq 256(%r12),%r12 + por %xmm3,%xmm0 + + mulq %rbp + addq %rax,%r10 + movq 8(%rsi,%r9,1),%rax + adcq $0,%rdx + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq 16(%rcx),%rax + adcq $0,%rdx + addq 8(%r14),%r11 + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq 16(%rsi,%r9,1),%rax + adcq $0,%rdx + addq %r11,%rdi + leaq 32(%r9),%r15 + leaq 64(%rcx),%rcx + adcq $0,%rdx + movq %rdx,%r13 + jmp .Linner4x + +.align 32 +.Linner4x: + mulq %rbx + addq %rax,%r10 + movq -32(%rcx),%rax + adcq $0,%rdx + addq 16(%r14),%r10 + leaq 32(%r14),%r14 + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq -8(%rsi,%r15,1),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %rdi,-32(%r14) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq -16(%rcx),%rax + adcq $0,%rdx + addq -8(%r14),%r11 + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq (%rsi,%r15,1),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %r13,-24(%r14) + movq %rdx,%r13 + + mulq %rbx + addq %rax,%r10 + movq 0(%rcx),%rax + adcq $0,%rdx + addq (%r14),%r10 + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq 8(%rsi,%r15,1),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %rdi,-16(%r14) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq 16(%rcx),%rax + adcq $0,%rdx + addq 8(%r14),%r11 + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq 16(%rsi,%r15,1),%rax + adcq $0,%rdx + addq %r11,%rdi + leaq 64(%rcx),%rcx + adcq $0,%rdx + movq %r13,-8(%r14) + movq %rdx,%r13 + + addq $32,%r15 + jnz .Linner4x + + mulq %rbx + addq %rax,%r10 + movq -32(%rcx),%rax + adcq $0,%rdx + addq 16(%r14),%r10 + leaq 32(%r14),%r14 + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq -8(%rsi),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %rdi,-32(%r14) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq %rbp,%rax + movq -16(%rcx),%rbp + adcq $0,%rdx + addq -8(%r14),%r11 + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq (%rsi,%r9,1),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %r13,-24(%r14) + movq %rdx,%r13 + +.byte 102,72,15,126,195 + movq %rdi,-16(%r14) + leaq (%rcx,%r9,2),%rcx + + xorq %rdi,%rdi + addq %r10,%r13 + adcq $0,%rdi + addq (%r14),%r13 + adcq $0,%rdi + movq %r13,-8(%r14) + + cmpq 16+8(%rsp),%r12 + jb .Louter4x + subq %r13,%rbp + adcq %r15,%r15 + orq %r15,%rdi + xorq $1,%rdi + leaq (%r14,%r9,1),%rbx + leaq (%rcx,%rdi,8),%rbp + movq %r9,%rcx + sarq $3+2,%rcx + movq 56+8(%rsp),%rdi + jmp .Lsqr4x_sub +.size mul4x_internal,.-mul4x_internal +.globl bn_power5 +.hidden bn_power5 +.type bn_power5,@function +.align 32 +bn_power5: + movq %rsp,%rax + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + movl %r9d,%r10d + shll $3,%r9d + shll $3+2,%r10d + negq %r9 + movq (%r8),%r8 + + + + + + + + leaq -64(%rsp,%r9,2),%r11 + subq %rsi,%r11 + andq $4095,%r11 + cmpq %r11,%r10 + jb .Lpwr_sp_alt + subq %r11,%rsp + leaq -64(%rsp,%r9,2),%rsp + jmp .Lpwr_sp_done + +.align 32 +.Lpwr_sp_alt: + leaq 4096-64(,%r9,2),%r10 + leaq -64(%rsp,%r9,2),%rsp + subq %r10,%r11 + movq $0,%r10 + cmovcq %r10,%r11 + subq %r11,%rsp +.Lpwr_sp_done: + andq $-64,%rsp + movq %r9,%r10 + negq %r9 + + + + + + + + + + + movq %r8,32(%rsp) + movq %rax,40(%rsp) +.Lpower5_body: +.byte 102,72,15,110,207 +.byte 102,72,15,110,209 +.byte 102,73,15,110,218 +.byte 102,72,15,110,226 + + call __bn_sqr8x_internal + call __bn_sqr8x_internal + call __bn_sqr8x_internal + call __bn_sqr8x_internal + call __bn_sqr8x_internal + +.byte 102,72,15,126,209 +.byte 102,72,15,126,226 + movq %rsi,%rdi + movq 40(%rsp),%rax + leaq 32(%rsp),%r8 + + call mul4x_internal + + movq 40(%rsp),%rsi + movq $1,%rax + movq -48(%rsi),%r15 + movq -40(%rsi),%r14 + movq -32(%rsi),%r13 + movq -24(%rsi),%r12 + movq -16(%rsi),%rbp + movq -8(%rsi),%rbx + leaq (%rsi),%rsp +.Lpower5_epilogue: + .byte 0xf3,0xc3 +.size bn_power5,.-bn_power5 + +.globl bn_sqr8x_internal +.hidden bn_sqr8x_internal +.hidden bn_sqr8x_internal +.type bn_sqr8x_internal,@function +.align 32 +bn_sqr8x_internal: +__bn_sqr8x_internal: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + leaq 32(%r10),%rbp + leaq (%rsi,%r9,1),%rsi + + movq %r9,%rcx + + + movq -32(%rsi,%rbp,1),%r14 + leaq 48+8(%rsp,%r9,2),%rdi + movq -24(%rsi,%rbp,1),%rax + leaq -32(%rdi,%rbp,1),%rdi + movq -16(%rsi,%rbp,1),%rbx + movq %rax,%r15 + + mulq %r14 + movq %rax,%r10 + movq %rbx,%rax + movq %rdx,%r11 + movq %r10,-24(%rdi,%rbp,1) + + mulq %r14 + addq %rax,%r11 + movq %rbx,%rax + adcq $0,%rdx + movq %r11,-16(%rdi,%rbp,1) + movq %rdx,%r10 + + + movq -8(%rsi,%rbp,1),%rbx + mulq %r15 + movq %rax,%r12 + movq %rbx,%rax + movq %rdx,%r13 + + leaq (%rbp),%rcx + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + movq %rdx,%r11 + adcq $0,%r11 + addq %r12,%r10 + adcq $0,%r11 + movq %r10,-8(%rdi,%rcx,1) + jmp .Lsqr4x_1st + +.align 32 +.Lsqr4x_1st: + movq (%rsi,%rcx,1),%rbx + mulq %r15 + addq %rax,%r13 + movq %rbx,%rax + movq %rdx,%r12 + adcq $0,%r12 + + mulq %r14 + addq %rax,%r11 + movq %rbx,%rax + movq 8(%rsi,%rcx,1),%rbx + movq %rdx,%r10 + adcq $0,%r10 + addq %r13,%r11 + adcq $0,%r10 + + + mulq %r15 + addq %rax,%r12 + movq %rbx,%rax + movq %r11,(%rdi,%rcx,1) + movq %rdx,%r13 + adcq $0,%r13 + + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + movq 16(%rsi,%rcx,1),%rbx + movq %rdx,%r11 + adcq $0,%r11 + addq %r12,%r10 + adcq $0,%r11 + + mulq %r15 + addq %rax,%r13 + movq %rbx,%rax + movq %r10,8(%rdi,%rcx,1) + movq %rdx,%r12 + adcq $0,%r12 + + mulq %r14 + addq %rax,%r11 + movq %rbx,%rax + movq 24(%rsi,%rcx,1),%rbx + movq %rdx,%r10 + adcq $0,%r10 + addq %r13,%r11 + adcq $0,%r10 + + + mulq %r15 + addq %rax,%r12 + movq %rbx,%rax + movq %r11,16(%rdi,%rcx,1) + movq %rdx,%r13 + adcq $0,%r13 + leaq 32(%rcx),%rcx + + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + movq %rdx,%r11 + adcq $0,%r11 + addq %r12,%r10 + adcq $0,%r11 + movq %r10,-8(%rdi,%rcx,1) + + cmpq $0,%rcx + jne .Lsqr4x_1st + + mulq %r15 + addq %rax,%r13 + leaq 16(%rbp),%rbp + adcq $0,%rdx + addq %r11,%r13 + adcq $0,%rdx + + movq %r13,(%rdi) + movq %rdx,%r12 + movq %rdx,8(%rdi) + jmp .Lsqr4x_outer + +.align 32 +.Lsqr4x_outer: + movq -32(%rsi,%rbp,1),%r14 + leaq 48+8(%rsp,%r9,2),%rdi + movq -24(%rsi,%rbp,1),%rax + leaq -32(%rdi,%rbp,1),%rdi + movq -16(%rsi,%rbp,1),%rbx + movq %rax,%r15 + + mulq %r14 + movq -24(%rdi,%rbp,1),%r10 + addq %rax,%r10 + movq %rbx,%rax + adcq $0,%rdx + movq %r10,-24(%rdi,%rbp,1) + movq %rdx,%r11 + + mulq %r14 + addq %rax,%r11 + movq %rbx,%rax + adcq $0,%rdx + addq -16(%rdi,%rbp,1),%r11 + movq %rdx,%r10 + adcq $0,%r10 + movq %r11,-16(%rdi,%rbp,1) + + xorq %r12,%r12 + + movq -8(%rsi,%rbp,1),%rbx + mulq %r15 + addq %rax,%r12 + movq %rbx,%rax + adcq $0,%rdx + addq -8(%rdi,%rbp,1),%r12 + movq %rdx,%r13 + adcq $0,%r13 + + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + adcq $0,%rdx + addq %r12,%r10 + movq %rdx,%r11 + adcq $0,%r11 + movq %r10,-8(%rdi,%rbp,1) + + leaq (%rbp),%rcx + jmp .Lsqr4x_inner + +.align 32 +.Lsqr4x_inner: + movq (%rsi,%rcx,1),%rbx + mulq %r15 + addq %rax,%r13 + movq %rbx,%rax + movq %rdx,%r12 + adcq $0,%r12 + addq (%rdi,%rcx,1),%r13 + adcq $0,%r12 + +.byte 0x67 + mulq %r14 + addq %rax,%r11 + movq %rbx,%rax + movq 8(%rsi,%rcx,1),%rbx + movq %rdx,%r10 + adcq $0,%r10 + addq %r13,%r11 + adcq $0,%r10 + + mulq %r15 + addq %rax,%r12 + movq %r11,(%rdi,%rcx,1) + movq %rbx,%rax + movq %rdx,%r13 + adcq $0,%r13 + addq 8(%rdi,%rcx,1),%r12 + leaq 16(%rcx),%rcx + adcq $0,%r13 + + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + adcq $0,%rdx + addq %r12,%r10 + movq %rdx,%r11 + adcq $0,%r11 + movq %r10,-8(%rdi,%rcx,1) + + cmpq $0,%rcx + jne .Lsqr4x_inner + +.byte 0x67 + mulq %r15 + addq %rax,%r13 + adcq $0,%rdx + addq %r11,%r13 + adcq $0,%rdx + + movq %r13,(%rdi) + movq %rdx,%r12 + movq %rdx,8(%rdi) + + addq $16,%rbp + jnz .Lsqr4x_outer + + + movq -32(%rsi),%r14 + leaq 48+8(%rsp,%r9,2),%rdi + movq -24(%rsi),%rax + leaq -32(%rdi,%rbp,1),%rdi + movq -16(%rsi),%rbx + movq %rax,%r15 + + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + movq %rdx,%r11 + adcq $0,%r11 + + mulq %r14 + addq %rax,%r11 + movq %rbx,%rax + movq %r10,-24(%rdi) + movq %rdx,%r10 + adcq $0,%r10 + addq %r13,%r11 + movq -8(%rsi),%rbx + adcq $0,%r10 + + mulq %r15 + addq %rax,%r12 + movq %rbx,%rax + movq %r11,-16(%rdi) + movq %rdx,%r13 + adcq $0,%r13 + + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + movq %rdx,%r11 + adcq $0,%r11 + addq %r12,%r10 + adcq $0,%r11 + movq %r10,-8(%rdi) + + mulq %r15 + addq %rax,%r13 + movq -16(%rsi),%rax + adcq $0,%rdx + addq %r11,%r13 + adcq $0,%rdx + + movq %r13,(%rdi) + movq %rdx,%r12 + movq %rdx,8(%rdi) + + mulq %rbx + addq $16,%rbp + xorq %r14,%r14 + subq %r9,%rbp + xorq %r15,%r15 + + addq %r12,%rax + adcq $0,%rdx + movq %rax,8(%rdi) + movq %rdx,16(%rdi) + movq %r15,24(%rdi) + + movq -16(%rsi,%rbp,1),%rax + leaq 48+8(%rsp),%rdi + xorq %r10,%r10 + movq 8(%rdi),%r11 + + leaq (%r14,%r10,2),%r12 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r13 + shrq $63,%r11 + orq %r10,%r13 + movq 16(%rdi),%r10 + movq %r11,%r14 + mulq %rax + negq %r15 + movq 24(%rdi),%r11 + adcq %rax,%r12 + movq -8(%rsi,%rbp,1),%rax + movq %r12,(%rdi) + adcq %rdx,%r13 + + leaq (%r14,%r10,2),%rbx + movq %r13,8(%rdi) + sbbq %r15,%r15 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r8 + shrq $63,%r11 + orq %r10,%r8 + movq 32(%rdi),%r10 + movq %r11,%r14 + mulq %rax + negq %r15 + movq 40(%rdi),%r11 + adcq %rax,%rbx + movq 0(%rsi,%rbp,1),%rax + movq %rbx,16(%rdi) + adcq %rdx,%r8 + leaq 16(%rbp),%rbp + movq %r8,24(%rdi) + sbbq %r15,%r15 + leaq 64(%rdi),%rdi + jmp .Lsqr4x_shift_n_add + +.align 32 +.Lsqr4x_shift_n_add: + leaq (%r14,%r10,2),%r12 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r13 + shrq $63,%r11 + orq %r10,%r13 + movq -16(%rdi),%r10 + movq %r11,%r14 + mulq %rax + negq %r15 + movq -8(%rdi),%r11 + adcq %rax,%r12 + movq -8(%rsi,%rbp,1),%rax + movq %r12,-32(%rdi) + adcq %rdx,%r13 + + leaq (%r14,%r10,2),%rbx + movq %r13,-24(%rdi) + sbbq %r15,%r15 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r8 + shrq $63,%r11 + orq %r10,%r8 + movq 0(%rdi),%r10 + movq %r11,%r14 + mulq %rax + negq %r15 + movq 8(%rdi),%r11 + adcq %rax,%rbx + movq 0(%rsi,%rbp,1),%rax + movq %rbx,-16(%rdi) + adcq %rdx,%r8 + + leaq (%r14,%r10,2),%r12 + movq %r8,-8(%rdi) + sbbq %r15,%r15 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r13 + shrq $63,%r11 + orq %r10,%r13 + movq 16(%rdi),%r10 + movq %r11,%r14 + mulq %rax + negq %r15 + movq 24(%rdi),%r11 + adcq %rax,%r12 + movq 8(%rsi,%rbp,1),%rax + movq %r12,0(%rdi) + adcq %rdx,%r13 + + leaq (%r14,%r10,2),%rbx + movq %r13,8(%rdi) + sbbq %r15,%r15 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r8 + shrq $63,%r11 + orq %r10,%r8 + movq 32(%rdi),%r10 + movq %r11,%r14 + mulq %rax + negq %r15 + movq 40(%rdi),%r11 + adcq %rax,%rbx + movq 16(%rsi,%rbp,1),%rax + movq %rbx,16(%rdi) + adcq %rdx,%r8 + movq %r8,24(%rdi) + sbbq %r15,%r15 + leaq 64(%rdi),%rdi + addq $32,%rbp + jnz .Lsqr4x_shift_n_add + + leaq (%r14,%r10,2),%r12 +.byte 0x67 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r13 + shrq $63,%r11 + orq %r10,%r13 + movq -16(%rdi),%r10 + movq %r11,%r14 + mulq %rax + negq %r15 + movq -8(%rdi),%r11 + adcq %rax,%r12 + movq -8(%rsi),%rax + movq %r12,-32(%rdi) + adcq %rdx,%r13 + + leaq (%r14,%r10,2),%rbx + movq %r13,-24(%rdi) + sbbq %r15,%r15 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r8 + shrq $63,%r11 + orq %r10,%r8 + mulq %rax + negq %r15 + adcq %rax,%rbx + adcq %rdx,%r8 + movq %rbx,-16(%rdi) + movq %r8,-8(%rdi) +.byte 102,72,15,126,213 +sqr8x_reduction: + xorq %rax,%rax + leaq (%rbp,%r9,2),%rcx + leaq 48+8(%rsp,%r9,2),%rdx + movq %rcx,0+8(%rsp) + leaq 48+8(%rsp,%r9,1),%rdi + movq %rdx,8+8(%rsp) + negq %r9 + jmp .L8x_reduction_loop + +.align 32 +.L8x_reduction_loop: + leaq (%rdi,%r9,1),%rdi +.byte 0x66 + movq 0(%rdi),%rbx + movq 8(%rdi),%r9 + movq 16(%rdi),%r10 + movq 24(%rdi),%r11 + movq 32(%rdi),%r12 + movq 40(%rdi),%r13 + movq 48(%rdi),%r14 + movq 56(%rdi),%r15 + movq %rax,(%rdx) + leaq 64(%rdi),%rdi + +.byte 0x67 + movq %rbx,%r8 + imulq 32+8(%rsp),%rbx + movq 0(%rbp),%rax + movl $8,%ecx + jmp .L8x_reduce + +.align 32 +.L8x_reduce: + mulq %rbx + movq 16(%rbp),%rax + negq %r8 + movq %rdx,%r8 + adcq $0,%r8 + + mulq %rbx + addq %rax,%r9 + movq 32(%rbp),%rax + adcq $0,%rdx + addq %r9,%r8 + movq %rbx,48-8+8(%rsp,%rcx,8) + movq %rdx,%r9 + adcq $0,%r9 + + mulq %rbx + addq %rax,%r10 + movq 48(%rbp),%rax + adcq $0,%rdx + addq %r10,%r9 + movq 32+8(%rsp),%rsi + movq %rdx,%r10 + adcq $0,%r10 + + mulq %rbx + addq %rax,%r11 + movq 64(%rbp),%rax + adcq $0,%rdx + imulq %r8,%rsi + addq %r11,%r10 + movq %rdx,%r11 + adcq $0,%r11 + + mulq %rbx + addq %rax,%r12 + movq 80(%rbp),%rax + adcq $0,%rdx + addq %r12,%r11 + movq %rdx,%r12 + adcq $0,%r12 + + mulq %rbx + addq %rax,%r13 + movq 96(%rbp),%rax + adcq $0,%rdx + addq %r13,%r12 + movq %rdx,%r13 + adcq $0,%r13 + + mulq %rbx + addq %rax,%r14 + movq 112(%rbp),%rax + adcq $0,%rdx + addq %r14,%r13 + movq %rdx,%r14 + adcq $0,%r14 + + mulq %rbx + movq %rsi,%rbx + addq %rax,%r15 + movq 0(%rbp),%rax + adcq $0,%rdx + addq %r15,%r14 + movq %rdx,%r15 + adcq $0,%r15 + + decl %ecx + jnz .L8x_reduce + + leaq 128(%rbp),%rbp + xorq %rax,%rax + movq 8+8(%rsp),%rdx + cmpq 0+8(%rsp),%rbp + jae .L8x_no_tail + +.byte 0x66 + addq 0(%rdi),%r8 + adcq 8(%rdi),%r9 + adcq 16(%rdi),%r10 + adcq 24(%rdi),%r11 + adcq 32(%rdi),%r12 + adcq 40(%rdi),%r13 + adcq 48(%rdi),%r14 + adcq 56(%rdi),%r15 + sbbq %rsi,%rsi + + movq 48+56+8(%rsp),%rbx + movl $8,%ecx + movq 0(%rbp),%rax + jmp .L8x_tail + +.align 32 +.L8x_tail: + mulq %rbx + addq %rax,%r8 + movq 16(%rbp),%rax + movq %r8,(%rdi) + movq %rdx,%r8 + adcq $0,%r8 + + mulq %rbx + addq %rax,%r9 + movq 32(%rbp),%rax + adcq $0,%rdx + addq %r9,%r8 + leaq 8(%rdi),%rdi + movq %rdx,%r9 + adcq $0,%r9 + + mulq %rbx + addq %rax,%r10 + movq 48(%rbp),%rax + adcq $0,%rdx + addq %r10,%r9 + movq %rdx,%r10 + adcq $0,%r10 + + mulq %rbx + addq %rax,%r11 + movq 64(%rbp),%rax + adcq $0,%rdx + addq %r11,%r10 + movq %rdx,%r11 + adcq $0,%r11 + + mulq %rbx + addq %rax,%r12 + movq 80(%rbp),%rax + adcq $0,%rdx + addq %r12,%r11 + movq %rdx,%r12 + adcq $0,%r12 + + mulq %rbx + addq %rax,%r13 + movq 96(%rbp),%rax + adcq $0,%rdx + addq %r13,%r12 + movq %rdx,%r13 + adcq $0,%r13 + + mulq %rbx + addq %rax,%r14 + movq 112(%rbp),%rax + adcq $0,%rdx + addq %r14,%r13 + movq %rdx,%r14 + adcq $0,%r14 + + mulq %rbx + movq 48-16+8(%rsp,%rcx,8),%rbx + addq %rax,%r15 + adcq $0,%rdx + addq %r15,%r14 + movq 0(%rbp),%rax + movq %rdx,%r15 + adcq $0,%r15 + + decl %ecx + jnz .L8x_tail + + leaq 128(%rbp),%rbp + movq 8+8(%rsp),%rdx + cmpq 0+8(%rsp),%rbp + jae .L8x_tail_done + + movq 48+56+8(%rsp),%rbx + negq %rsi + movq 0(%rbp),%rax + adcq 0(%rdi),%r8 + adcq 8(%rdi),%r9 + adcq 16(%rdi),%r10 + adcq 24(%rdi),%r11 + adcq 32(%rdi),%r12 + adcq 40(%rdi),%r13 + adcq 48(%rdi),%r14 + adcq 56(%rdi),%r15 + sbbq %rsi,%rsi + + movl $8,%ecx + jmp .L8x_tail + +.align 32 +.L8x_tail_done: + addq (%rdx),%r8 + xorq %rax,%rax + + negq %rsi +.L8x_no_tail: + adcq 0(%rdi),%r8 + adcq 8(%rdi),%r9 + adcq 16(%rdi),%r10 + adcq 24(%rdi),%r11 + adcq 32(%rdi),%r12 + adcq 40(%rdi),%r13 + adcq 48(%rdi),%r14 + adcq 56(%rdi),%r15 + adcq $0,%rax + movq -16(%rbp),%rcx + xorq %rsi,%rsi + +.byte 102,72,15,126,213 + + movq %r8,0(%rdi) + movq %r9,8(%rdi) +.byte 102,73,15,126,217 + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + movq %r14,48(%rdi) + movq %r15,56(%rdi) + leaq 64(%rdi),%rdi + + cmpq %rdx,%rdi + jb .L8x_reduction_loop + + subq %r15,%rcx + leaq (%rdi,%r9,1),%rbx + adcq %rsi,%rsi + movq %r9,%rcx + orq %rsi,%rax +.byte 102,72,15,126,207 + xorq $1,%rax +.byte 102,72,15,126,206 + leaq (%rbp,%rax,8),%rbp + sarq $3+2,%rcx + jmp .Lsqr4x_sub + +.align 32 +.Lsqr4x_sub: +.byte 0x66 + movq 0(%rbx),%r12 + movq 8(%rbx),%r13 + sbbq 0(%rbp),%r12 + movq 16(%rbx),%r14 + sbbq 16(%rbp),%r13 + movq 24(%rbx),%r15 + leaq 32(%rbx),%rbx + sbbq 32(%rbp),%r14 + movq %r12,0(%rdi) + sbbq 48(%rbp),%r15 + leaq 64(%rbp),%rbp + movq %r13,8(%rdi) + movq %r14,16(%rdi) + movq %r15,24(%rdi) + leaq 32(%rdi),%rdi + + incq %rcx + jnz .Lsqr4x_sub + movq %r9,%r10 + negq %r9 + .byte 0xf3,0xc3 +.size bn_sqr8x_internal,.-bn_sqr8x_internal +.globl bn_from_montgomery +.hidden bn_from_montgomery +.type bn_from_montgomery,@function +.align 32 +bn_from_montgomery: + testl $7,%r9d + jz bn_from_mont8x + xorl %eax,%eax + .byte 0xf3,0xc3 +.size bn_from_montgomery,.-bn_from_montgomery + +.type bn_from_mont8x,@function +.align 32 +bn_from_mont8x: +.byte 0x67 + movq %rsp,%rax + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 +.byte 0x67 + movl %r9d,%r10d + shll $3,%r9d + shll $3+2,%r10d + negq %r9 + movq (%r8),%r8 + + + + + + + + leaq -64(%rsp,%r9,2),%r11 + subq %rsi,%r11 + andq $4095,%r11 + cmpq %r11,%r10 + jb .Lfrom_sp_alt + subq %r11,%rsp + leaq -64(%rsp,%r9,2),%rsp + jmp .Lfrom_sp_done + +.align 32 +.Lfrom_sp_alt: + leaq 4096-64(,%r9,2),%r10 + leaq -64(%rsp,%r9,2),%rsp + subq %r10,%r11 + movq $0,%r10 + cmovcq %r10,%r11 + subq %r11,%rsp +.Lfrom_sp_done: + andq $-64,%rsp + movq %r9,%r10 + negq %r9 + + + + + + + + + + + movq %r8,32(%rsp) + movq %rax,40(%rsp) +.Lfrom_body: + movq %r9,%r11 + leaq 48(%rsp),%rax + pxor %xmm0,%xmm0 + jmp .Lmul_by_1 + +.align 32 +.Lmul_by_1: + movdqu (%rsi),%xmm1 + movdqu 16(%rsi),%xmm2 + movdqu 32(%rsi),%xmm3 + movdqa %xmm0,(%rax,%r9,1) + movdqu 48(%rsi),%xmm4 + movdqa %xmm0,16(%rax,%r9,1) +.byte 0x48,0x8d,0xb6,0x40,0x00,0x00,0x00 + movdqa %xmm1,(%rax) + movdqa %xmm0,32(%rax,%r9,1) + movdqa %xmm2,16(%rax) + movdqa %xmm0,48(%rax,%r9,1) + movdqa %xmm3,32(%rax) + movdqa %xmm4,48(%rax) + leaq 64(%rax),%rax + subq $64,%r11 + jnz .Lmul_by_1 + +.byte 102,72,15,110,207 +.byte 102,72,15,110,209 +.byte 0x67 + movq %rcx,%rbp +.byte 102,73,15,110,218 + call sqr8x_reduction + + pxor %xmm0,%xmm0 + leaq 48(%rsp),%rax + movq 40(%rsp),%rsi + jmp .Lfrom_mont_zero + +.align 32 +.Lfrom_mont_zero: + movdqa %xmm0,0(%rax) + movdqa %xmm0,16(%rax) + movdqa %xmm0,32(%rax) + movdqa %xmm0,48(%rax) + leaq 64(%rax),%rax + subq $32,%r9 + jnz .Lfrom_mont_zero + + movq $1,%rax + movq -48(%rsi),%r15 + movq -40(%rsi),%r14 + movq -32(%rsi),%r13 + movq -24(%rsi),%r12 + movq -16(%rsi),%rbp + movq -8(%rsi),%rbx + leaq (%rsi),%rsp +.Lfrom_epilogue: + .byte 0xf3,0xc3 +.size bn_from_mont8x,.-bn_from_mont8x +.globl bn_scatter5 +.hidden bn_scatter5 +.type bn_scatter5,@function +.align 16 +bn_scatter5: + cmpl $0,%esi + jz .Lscatter_epilogue + leaq (%rdx,%rcx,8),%rdx +.Lscatter: + movq (%rdi),%rax + leaq 8(%rdi),%rdi + movq %rax,(%rdx) + leaq 256(%rdx),%rdx + subl $1,%esi + jnz .Lscatter +.Lscatter_epilogue: + .byte 0xf3,0xc3 +.size bn_scatter5,.-bn_scatter5 + +.globl bn_gather5 +.hidden bn_gather5 +.type bn_gather5,@function +.align 16 +bn_gather5: + movl %ecx,%r11d + shrl $3,%ecx + andq $7,%r11 + notl %ecx + leaq .Lmagic_masks(%rip),%rax + andl $3,%ecx + leaq 128(%rdx,%r11,8),%rdx + movq 0(%rax,%rcx,8),%xmm4 + movq 8(%rax,%rcx,8),%xmm5 + movq 16(%rax,%rcx,8),%xmm6 + movq 24(%rax,%rcx,8),%xmm7 + jmp .Lgather +.align 16 +.Lgather: + movq -128(%rdx),%xmm0 + movq -64(%rdx),%xmm1 + pand %xmm4,%xmm0 + movq 0(%rdx),%xmm2 + pand %xmm5,%xmm1 + movq 64(%rdx),%xmm3 + pand %xmm6,%xmm2 + por %xmm1,%xmm0 + pand %xmm7,%xmm3 +.byte 0x67,0x67 + por %xmm2,%xmm0 + leaq 256(%rdx),%rdx + por %xmm3,%xmm0 + + movq %xmm0,(%rdi) + leaq 8(%rdi),%rdi + subl $1,%esi + jnz .Lgather + .byte 0xf3,0xc3 +.LSEH_end_bn_gather5: +.size bn_gather5,.-bn_gather5 +.align 64 +.Lmagic_masks: +.long 0,0, 0,0, 0,0, -1,-1 +.long 0,0, 0,0, 0,0, 0,0 +.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115,99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +#endif diff --git a/third_party/boringssl/linux-x86_64/crypto/cpu-x86_64-asm.S b/third_party/boringssl/linux-x86_64/crypto/cpu-x86_64-asm.S new file mode 100644 index 00000000000..9eef154e9e7 --- /dev/null +++ b/third_party/boringssl/linux-x86_64/crypto/cpu-x86_64-asm.S @@ -0,0 +1,143 @@ +#if defined(__x86_64__) +.text + +.globl OPENSSL_ia32_cpuid +.hidden OPENSSL_ia32_cpuid +.type OPENSSL_ia32_cpuid,@function +.align 16 +OPENSSL_ia32_cpuid: + + + movq %rdi,%rdi + movq %rbx,%r8 + + xorl %eax,%eax + movl %eax,8(%rdi) + cpuid + movl %eax,%r11d + + xorl %eax,%eax + cmpl $1970169159,%ebx + setne %al + movl %eax,%r9d + cmpl $1231384169,%edx + setne %al + orl %eax,%r9d + cmpl $1818588270,%ecx + setne %al + orl %eax,%r9d + jz .Lintel + + cmpl $1752462657,%ebx + setne %al + movl %eax,%r10d + cmpl $1769238117,%edx + setne %al + orl %eax,%r10d + cmpl $1145913699,%ecx + setne %al + orl %eax,%r10d + jnz .Lintel + + + + + movl $2147483648,%eax + cpuid + + + cmpl $2147483649,%eax + jb .Lintel + movl %eax,%r10d + movl $2147483649,%eax + cpuid + + + orl %ecx,%r9d + andl $2049,%r9d + + cmpl $2147483656,%r10d + jb .Lintel + + movl $2147483656,%eax + cpuid + + movzbq %cl,%r10 + incq %r10 + + movl $1,%eax + cpuid + + btl $28,%edx + jnc .Lgeneric + shrl $16,%ebx + cmpb %r10b,%bl + ja .Lgeneric + andl $4026531839,%edx + jmp .Lgeneric + +.Lintel: + cmpl $4,%r11d + movl $-1,%r10d + jb .Lnocacheinfo + + movl $4,%eax + movl $0,%ecx + cpuid + movl %eax,%r10d + shrl $14,%r10d + andl $4095,%r10d + + cmpl $7,%r11d + jb .Lnocacheinfo + + movl $7,%eax + xorl %ecx,%ecx + cpuid + movl %ebx,8(%rdi) + +.Lnocacheinfo: + movl $1,%eax + cpuid + + andl $3220176895,%edx + cmpl $0,%r9d + jne .Lnotintel + orl $1073741824,%edx +.Lnotintel: + btl $28,%edx + jnc .Lgeneric + andl $4026531839,%edx + cmpl $0,%r10d + je .Lgeneric + + orl $268435456,%edx + shrl $16,%ebx + cmpb $1,%bl + ja .Lgeneric + andl $4026531839,%edx +.Lgeneric: + andl $2048,%r9d + andl $4294965247,%ecx + orl %ecx,%r9d + + movl %edx,%r10d + btl $27,%r9d + jnc .Lclear_avx + xorl %ecx,%ecx +.byte 0x0f,0x01,0xd0 + andl $6,%eax + cmpl $6,%eax + je .Ldone +.Lclear_avx: + movl $4026525695,%eax + andl %eax,%r9d + andl $4294967263,8(%rdi) +.Ldone: + movl %r9d,4(%rdi) + movl %r10d,0(%rdi) + movq %r8,%rbx + .byte 0xf3,0xc3 +.size OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid + +#endif diff --git a/third_party/boringssl/linux-x86_64/crypto/md5/md5-x86_64.S b/third_party/boringssl/linux-x86_64/crypto/md5/md5-x86_64.S new file mode 100644 index 00000000000..7644689644b --- /dev/null +++ b/third_party/boringssl/linux-x86_64/crypto/md5/md5-x86_64.S @@ -0,0 +1,671 @@ +#if defined(__x86_64__) +.text +.align 16 + +.globl md5_block_asm_data_order +.hidden md5_block_asm_data_order +.type md5_block_asm_data_order,@function +md5_block_asm_data_order: + pushq %rbp + pushq %rbx + pushq %r12 + pushq %r14 + pushq %r15 +.Lprologue: + + + + + movq %rdi,%rbp + shlq $6,%rdx + leaq (%rsi,%rdx,1),%rdi + movl 0(%rbp),%eax + movl 4(%rbp),%ebx + movl 8(%rbp),%ecx + movl 12(%rbp),%edx + + + + + + + + cmpq %rdi,%rsi + je .Lend + + +.Lloop: + movl %eax,%r8d + movl %ebx,%r9d + movl %ecx,%r14d + movl %edx,%r15d + movl 0(%rsi),%r10d + movl %edx,%r11d + xorl %ecx,%r11d + leal -680876936(%rax,%r10,1),%eax + andl %ebx,%r11d + xorl %edx,%r11d + movl 4(%rsi),%r10d + addl %r11d,%eax + roll $7,%eax + movl %ecx,%r11d + addl %ebx,%eax + xorl %ebx,%r11d + leal -389564586(%rdx,%r10,1),%edx + andl %eax,%r11d + xorl %ecx,%r11d + movl 8(%rsi),%r10d + addl %r11d,%edx + roll $12,%edx + movl %ebx,%r11d + addl %eax,%edx + xorl %eax,%r11d + leal 606105819(%rcx,%r10,1),%ecx + andl %edx,%r11d + xorl %ebx,%r11d + movl 12(%rsi),%r10d + addl %r11d,%ecx + roll $17,%ecx + movl %eax,%r11d + addl %edx,%ecx + xorl %edx,%r11d + leal -1044525330(%rbx,%r10,1),%ebx + andl %ecx,%r11d + xorl %eax,%r11d + movl 16(%rsi),%r10d + addl %r11d,%ebx + roll $22,%ebx + movl %edx,%r11d + addl %ecx,%ebx + xorl %ecx,%r11d + leal -176418897(%rax,%r10,1),%eax + andl %ebx,%r11d + xorl %edx,%r11d + movl 20(%rsi),%r10d + addl %r11d,%eax + roll $7,%eax + movl %ecx,%r11d + addl %ebx,%eax + xorl %ebx,%r11d + leal 1200080426(%rdx,%r10,1),%edx + andl %eax,%r11d + xorl %ecx,%r11d + movl 24(%rsi),%r10d + addl %r11d,%edx + roll $12,%edx + movl %ebx,%r11d + addl %eax,%edx + xorl %eax,%r11d + leal -1473231341(%rcx,%r10,1),%ecx + andl %edx,%r11d + xorl %ebx,%r11d + movl 28(%rsi),%r10d + addl %r11d,%ecx + roll $17,%ecx + movl %eax,%r11d + addl %edx,%ecx + xorl %edx,%r11d + leal -45705983(%rbx,%r10,1),%ebx + andl %ecx,%r11d + xorl %eax,%r11d + movl 32(%rsi),%r10d + addl %r11d,%ebx + roll $22,%ebx + movl %edx,%r11d + addl %ecx,%ebx + xorl %ecx,%r11d + leal 1770035416(%rax,%r10,1),%eax + andl %ebx,%r11d + xorl %edx,%r11d + movl 36(%rsi),%r10d + addl %r11d,%eax + roll $7,%eax + movl %ecx,%r11d + addl %ebx,%eax + xorl %ebx,%r11d + leal -1958414417(%rdx,%r10,1),%edx + andl %eax,%r11d + xorl %ecx,%r11d + movl 40(%rsi),%r10d + addl %r11d,%edx + roll $12,%edx + movl %ebx,%r11d + addl %eax,%edx + xorl %eax,%r11d + leal -42063(%rcx,%r10,1),%ecx + andl %edx,%r11d + xorl %ebx,%r11d + movl 44(%rsi),%r10d + addl %r11d,%ecx + roll $17,%ecx + movl %eax,%r11d + addl %edx,%ecx + xorl %edx,%r11d + leal -1990404162(%rbx,%r10,1),%ebx + andl %ecx,%r11d + xorl %eax,%r11d + movl 48(%rsi),%r10d + addl %r11d,%ebx + roll $22,%ebx + movl %edx,%r11d + addl %ecx,%ebx + xorl %ecx,%r11d + leal 1804603682(%rax,%r10,1),%eax + andl %ebx,%r11d + xorl %edx,%r11d + movl 52(%rsi),%r10d + addl %r11d,%eax + roll $7,%eax + movl %ecx,%r11d + addl %ebx,%eax + xorl %ebx,%r11d + leal -40341101(%rdx,%r10,1),%edx + andl %eax,%r11d + xorl %ecx,%r11d + movl 56(%rsi),%r10d + addl %r11d,%edx + roll $12,%edx + movl %ebx,%r11d + addl %eax,%edx + xorl %eax,%r11d + leal -1502002290(%rcx,%r10,1),%ecx + andl %edx,%r11d + xorl %ebx,%r11d + movl 60(%rsi),%r10d + addl %r11d,%ecx + roll $17,%ecx + movl %eax,%r11d + addl %edx,%ecx + xorl %edx,%r11d + leal 1236535329(%rbx,%r10,1),%ebx + andl %ecx,%r11d + xorl %eax,%r11d + movl 0(%rsi),%r10d + addl %r11d,%ebx + roll $22,%ebx + movl %edx,%r11d + addl %ecx,%ebx + movl 4(%rsi),%r10d + movl %edx,%r11d + movl %edx,%r12d + notl %r11d + leal -165796510(%rax,%r10,1),%eax + andl %ebx,%r12d + andl %ecx,%r11d + movl 24(%rsi),%r10d + orl %r11d,%r12d + movl %ecx,%r11d + addl %r12d,%eax + movl %ecx,%r12d + roll $5,%eax + addl %ebx,%eax + notl %r11d + leal -1069501632(%rdx,%r10,1),%edx + andl %eax,%r12d + andl %ebx,%r11d + movl 44(%rsi),%r10d + orl %r11d,%r12d + movl %ebx,%r11d + addl %r12d,%edx + movl %ebx,%r12d + roll $9,%edx + addl %eax,%edx + notl %r11d + leal 643717713(%rcx,%r10,1),%ecx + andl %edx,%r12d + andl %eax,%r11d + movl 0(%rsi),%r10d + orl %r11d,%r12d + movl %eax,%r11d + addl %r12d,%ecx + movl %eax,%r12d + roll $14,%ecx + addl %edx,%ecx + notl %r11d + leal -373897302(%rbx,%r10,1),%ebx + andl %ecx,%r12d + andl %edx,%r11d + movl 20(%rsi),%r10d + orl %r11d,%r12d + movl %edx,%r11d + addl %r12d,%ebx + movl %edx,%r12d + roll $20,%ebx + addl %ecx,%ebx + notl %r11d + leal -701558691(%rax,%r10,1),%eax + andl %ebx,%r12d + andl %ecx,%r11d + movl 40(%rsi),%r10d + orl %r11d,%r12d + movl %ecx,%r11d + addl %r12d,%eax + movl %ecx,%r12d + roll $5,%eax + addl %ebx,%eax + notl %r11d + leal 38016083(%rdx,%r10,1),%edx + andl %eax,%r12d + andl %ebx,%r11d + movl 60(%rsi),%r10d + orl %r11d,%r12d + movl %ebx,%r11d + addl %r12d,%edx + movl %ebx,%r12d + roll $9,%edx + addl %eax,%edx + notl %r11d + leal -660478335(%rcx,%r10,1),%ecx + andl %edx,%r12d + andl %eax,%r11d + movl 16(%rsi),%r10d + orl %r11d,%r12d + movl %eax,%r11d + addl %r12d,%ecx + movl %eax,%r12d + roll $14,%ecx + addl %edx,%ecx + notl %r11d + leal -405537848(%rbx,%r10,1),%ebx + andl %ecx,%r12d + andl %edx,%r11d + movl 36(%rsi),%r10d + orl %r11d,%r12d + movl %edx,%r11d + addl %r12d,%ebx + movl %edx,%r12d + roll $20,%ebx + addl %ecx,%ebx + notl %r11d + leal 568446438(%rax,%r10,1),%eax + andl %ebx,%r12d + andl %ecx,%r11d + movl 56(%rsi),%r10d + orl %r11d,%r12d + movl %ecx,%r11d + addl %r12d,%eax + movl %ecx,%r12d + roll $5,%eax + addl %ebx,%eax + notl %r11d + leal -1019803690(%rdx,%r10,1),%edx + andl %eax,%r12d + andl %ebx,%r11d + movl 12(%rsi),%r10d + orl %r11d,%r12d + movl %ebx,%r11d + addl %r12d,%edx + movl %ebx,%r12d + roll $9,%edx + addl %eax,%edx + notl %r11d + leal -187363961(%rcx,%r10,1),%ecx + andl %edx,%r12d + andl %eax,%r11d + movl 32(%rsi),%r10d + orl %r11d,%r12d + movl %eax,%r11d + addl %r12d,%ecx + movl %eax,%r12d + roll $14,%ecx + addl %edx,%ecx + notl %r11d + leal 1163531501(%rbx,%r10,1),%ebx + andl %ecx,%r12d + andl %edx,%r11d + movl 52(%rsi),%r10d + orl %r11d,%r12d + movl %edx,%r11d + addl %r12d,%ebx + movl %edx,%r12d + roll $20,%ebx + addl %ecx,%ebx + notl %r11d + leal -1444681467(%rax,%r10,1),%eax + andl %ebx,%r12d + andl %ecx,%r11d + movl 8(%rsi),%r10d + orl %r11d,%r12d + movl %ecx,%r11d + addl %r12d,%eax + movl %ecx,%r12d + roll $5,%eax + addl %ebx,%eax + notl %r11d + leal -51403784(%rdx,%r10,1),%edx + andl %eax,%r12d + andl %ebx,%r11d + movl 28(%rsi),%r10d + orl %r11d,%r12d + movl %ebx,%r11d + addl %r12d,%edx + movl %ebx,%r12d + roll $9,%edx + addl %eax,%edx + notl %r11d + leal 1735328473(%rcx,%r10,1),%ecx + andl %edx,%r12d + andl %eax,%r11d + movl 48(%rsi),%r10d + orl %r11d,%r12d + movl %eax,%r11d + addl %r12d,%ecx + movl %eax,%r12d + roll $14,%ecx + addl %edx,%ecx + notl %r11d + leal -1926607734(%rbx,%r10,1),%ebx + andl %ecx,%r12d + andl %edx,%r11d + movl 0(%rsi),%r10d + orl %r11d,%r12d + movl %edx,%r11d + addl %r12d,%ebx + movl %edx,%r12d + roll $20,%ebx + addl %ecx,%ebx + movl 20(%rsi),%r10d + movl %ecx,%r11d + leal -378558(%rax,%r10,1),%eax + movl 32(%rsi),%r10d + xorl %edx,%r11d + xorl %ebx,%r11d + addl %r11d,%eax + roll $4,%eax + movl %ebx,%r11d + addl %ebx,%eax + leal -2022574463(%rdx,%r10,1),%edx + movl 44(%rsi),%r10d + xorl %ecx,%r11d + xorl %eax,%r11d + addl %r11d,%edx + roll $11,%edx + movl %eax,%r11d + addl %eax,%edx + leal 1839030562(%rcx,%r10,1),%ecx + movl 56(%rsi),%r10d + xorl %ebx,%r11d + xorl %edx,%r11d + addl %r11d,%ecx + roll $16,%ecx + movl %edx,%r11d + addl %edx,%ecx + leal -35309556(%rbx,%r10,1),%ebx + movl 4(%rsi),%r10d + xorl %eax,%r11d + xorl %ecx,%r11d + addl %r11d,%ebx + roll $23,%ebx + movl %ecx,%r11d + addl %ecx,%ebx + leal -1530992060(%rax,%r10,1),%eax + movl 16(%rsi),%r10d + xorl %edx,%r11d + xorl %ebx,%r11d + addl %r11d,%eax + roll $4,%eax + movl %ebx,%r11d + addl %ebx,%eax + leal 1272893353(%rdx,%r10,1),%edx + movl 28(%rsi),%r10d + xorl %ecx,%r11d + xorl %eax,%r11d + addl %r11d,%edx + roll $11,%edx + movl %eax,%r11d + addl %eax,%edx + leal -155497632(%rcx,%r10,1),%ecx + movl 40(%rsi),%r10d + xorl %ebx,%r11d + xorl %edx,%r11d + addl %r11d,%ecx + roll $16,%ecx + movl %edx,%r11d + addl %edx,%ecx + leal -1094730640(%rbx,%r10,1),%ebx + movl 52(%rsi),%r10d + xorl %eax,%r11d + xorl %ecx,%r11d + addl %r11d,%ebx + roll $23,%ebx + movl %ecx,%r11d + addl %ecx,%ebx + leal 681279174(%rax,%r10,1),%eax + movl 0(%rsi),%r10d + xorl %edx,%r11d + xorl %ebx,%r11d + addl %r11d,%eax + roll $4,%eax + movl %ebx,%r11d + addl %ebx,%eax + leal -358537222(%rdx,%r10,1),%edx + movl 12(%rsi),%r10d + xorl %ecx,%r11d + xorl %eax,%r11d + addl %r11d,%edx + roll $11,%edx + movl %eax,%r11d + addl %eax,%edx + leal -722521979(%rcx,%r10,1),%ecx + movl 24(%rsi),%r10d + xorl %ebx,%r11d + xorl %edx,%r11d + addl %r11d,%ecx + roll $16,%ecx + movl %edx,%r11d + addl %edx,%ecx + leal 76029189(%rbx,%r10,1),%ebx + movl 36(%rsi),%r10d + xorl %eax,%r11d + xorl %ecx,%r11d + addl %r11d,%ebx + roll $23,%ebx + movl %ecx,%r11d + addl %ecx,%ebx + leal -640364487(%rax,%r10,1),%eax + movl 48(%rsi),%r10d + xorl %edx,%r11d + xorl %ebx,%r11d + addl %r11d,%eax + roll $4,%eax + movl %ebx,%r11d + addl %ebx,%eax + leal -421815835(%rdx,%r10,1),%edx + movl 60(%rsi),%r10d + xorl %ecx,%r11d + xorl %eax,%r11d + addl %r11d,%edx + roll $11,%edx + movl %eax,%r11d + addl %eax,%edx + leal 530742520(%rcx,%r10,1),%ecx + movl 8(%rsi),%r10d + xorl %ebx,%r11d + xorl %edx,%r11d + addl %r11d,%ecx + roll $16,%ecx + movl %edx,%r11d + addl %edx,%ecx + leal -995338651(%rbx,%r10,1),%ebx + movl 0(%rsi),%r10d + xorl %eax,%r11d + xorl %ecx,%r11d + addl %r11d,%ebx + roll $23,%ebx + movl %ecx,%r11d + addl %ecx,%ebx + movl 0(%rsi),%r10d + movl $4294967295,%r11d + xorl %edx,%r11d + leal -198630844(%rax,%r10,1),%eax + orl %ebx,%r11d + xorl %ecx,%r11d + addl %r11d,%eax + movl 28(%rsi),%r10d + movl $4294967295,%r11d + roll $6,%eax + xorl %ecx,%r11d + addl %ebx,%eax + leal 1126891415(%rdx,%r10,1),%edx + orl %eax,%r11d + xorl %ebx,%r11d + addl %r11d,%edx + movl 56(%rsi),%r10d + movl $4294967295,%r11d + roll $10,%edx + xorl %ebx,%r11d + addl %eax,%edx + leal -1416354905(%rcx,%r10,1),%ecx + orl %edx,%r11d + xorl %eax,%r11d + addl %r11d,%ecx + movl 20(%rsi),%r10d + movl $4294967295,%r11d + roll $15,%ecx + xorl %eax,%r11d + addl %edx,%ecx + leal -57434055(%rbx,%r10,1),%ebx + orl %ecx,%r11d + xorl %edx,%r11d + addl %r11d,%ebx + movl 48(%rsi),%r10d + movl $4294967295,%r11d + roll $21,%ebx + xorl %edx,%r11d + addl %ecx,%ebx + leal 1700485571(%rax,%r10,1),%eax + orl %ebx,%r11d + xorl %ecx,%r11d + addl %r11d,%eax + movl 12(%rsi),%r10d + movl $4294967295,%r11d + roll $6,%eax + xorl %ecx,%r11d + addl %ebx,%eax + leal -1894986606(%rdx,%r10,1),%edx + orl %eax,%r11d + xorl %ebx,%r11d + addl %r11d,%edx + movl 40(%rsi),%r10d + movl $4294967295,%r11d + roll $10,%edx + xorl %ebx,%r11d + addl %eax,%edx + leal -1051523(%rcx,%r10,1),%ecx + orl %edx,%r11d + xorl %eax,%r11d + addl %r11d,%ecx + movl 4(%rsi),%r10d + movl $4294967295,%r11d + roll $15,%ecx + xorl %eax,%r11d + addl %edx,%ecx + leal -2054922799(%rbx,%r10,1),%ebx + orl %ecx,%r11d + xorl %edx,%r11d + addl %r11d,%ebx + movl 32(%rsi),%r10d + movl $4294967295,%r11d + roll $21,%ebx + xorl %edx,%r11d + addl %ecx,%ebx + leal 1873313359(%rax,%r10,1),%eax + orl %ebx,%r11d + xorl %ecx,%r11d + addl %r11d,%eax + movl 60(%rsi),%r10d + movl $4294967295,%r11d + roll $6,%eax + xorl %ecx,%r11d + addl %ebx,%eax + leal -30611744(%rdx,%r10,1),%edx + orl %eax,%r11d + xorl %ebx,%r11d + addl %r11d,%edx + movl 24(%rsi),%r10d + movl $4294967295,%r11d + roll $10,%edx + xorl %ebx,%r11d + addl %eax,%edx + leal -1560198380(%rcx,%r10,1),%ecx + orl %edx,%r11d + xorl %eax,%r11d + addl %r11d,%ecx + movl 52(%rsi),%r10d + movl $4294967295,%r11d + roll $15,%ecx + xorl %eax,%r11d + addl %edx,%ecx + leal 1309151649(%rbx,%r10,1),%ebx + orl %ecx,%r11d + xorl %edx,%r11d + addl %r11d,%ebx + movl 16(%rsi),%r10d + movl $4294967295,%r11d + roll $21,%ebx + xorl %edx,%r11d + addl %ecx,%ebx + leal -145523070(%rax,%r10,1),%eax + orl %ebx,%r11d + xorl %ecx,%r11d + addl %r11d,%eax + movl 44(%rsi),%r10d + movl $4294967295,%r11d + roll $6,%eax + xorl %ecx,%r11d + addl %ebx,%eax + leal -1120210379(%rdx,%r10,1),%edx + orl %eax,%r11d + xorl %ebx,%r11d + addl %r11d,%edx + movl 8(%rsi),%r10d + movl $4294967295,%r11d + roll $10,%edx + xorl %ebx,%r11d + addl %eax,%edx + leal 718787259(%rcx,%r10,1),%ecx + orl %edx,%r11d + xorl %eax,%r11d + addl %r11d,%ecx + movl 36(%rsi),%r10d + movl $4294967295,%r11d + roll $15,%ecx + xorl %eax,%r11d + addl %edx,%ecx + leal -343485551(%rbx,%r10,1),%ebx + orl %ecx,%r11d + xorl %edx,%r11d + addl %r11d,%ebx + movl 0(%rsi),%r10d + movl $4294967295,%r11d + roll $21,%ebx + xorl %edx,%r11d + addl %ecx,%ebx + + addl %r8d,%eax + addl %r9d,%ebx + addl %r14d,%ecx + addl %r15d,%edx + + + addq $64,%rsi + cmpq %rdi,%rsi + jb .Lloop + + +.Lend: + movl %eax,0(%rbp) + movl %ebx,4(%rbp) + movl %ecx,8(%rbp) + movl %edx,12(%rbp) + + movq (%rsp),%r15 + movq 8(%rsp),%r14 + movq 16(%rsp),%r12 + movq 24(%rsp),%rbx + movq 32(%rsp),%rbp + addq $40,%rsp +.Lepilogue: + .byte 0xf3,0xc3 +.size md5_block_asm_data_order,.-md5_block_asm_data_order +#endif diff --git a/third_party/boringssl/linux-x86_64/crypto/modes/aesni-gcm-x86_64.S b/third_party/boringssl/linux-x86_64/crypto/modes/aesni-gcm-x86_64.S new file mode 100644 index 00000000000..f01692e0b1a --- /dev/null +++ b/third_party/boringssl/linux-x86_64/crypto/modes/aesni-gcm-x86_64.S @@ -0,0 +1,19 @@ +#if defined(__x86_64__) +.text + +.globl aesni_gcm_encrypt +.hidden aesni_gcm_encrypt +.type aesni_gcm_encrypt,@function +aesni_gcm_encrypt: + xorl %eax,%eax + .byte 0xf3,0xc3 +.size aesni_gcm_encrypt,.-aesni_gcm_encrypt + +.globl aesni_gcm_decrypt +.hidden aesni_gcm_decrypt +.type aesni_gcm_decrypt,@function +aesni_gcm_decrypt: + xorl %eax,%eax + .byte 0xf3,0xc3 +.size aesni_gcm_decrypt,.-aesni_gcm_decrypt +#endif diff --git a/third_party/boringssl/linux-x86_64/crypto/modes/ghash-x86_64.S b/third_party/boringssl/linux-x86_64/crypto/modes/ghash-x86_64.S new file mode 100644 index 00000000000..1db7d69d4bb --- /dev/null +++ b/third_party/boringssl/linux-x86_64/crypto/modes/ghash-x86_64.S @@ -0,0 +1,1329 @@ +#if defined(__x86_64__) +.text +.extern OPENSSL_ia32cap_P +.hidden OPENSSL_ia32cap_P + +.globl gcm_gmult_4bit +.hidden gcm_gmult_4bit +.type gcm_gmult_4bit,@function +.align 16 +gcm_gmult_4bit: + pushq %rbx + pushq %rbp + pushq %r12 +.Lgmult_prologue: + + movzbq 15(%rdi),%r8 + leaq .Lrem_4bit(%rip),%r11 + xorq %rax,%rax + xorq %rbx,%rbx + movb %r8b,%al + movb %r8b,%bl + shlb $4,%al + movq $14,%rcx + movq 8(%rsi,%rax,1),%r8 + movq (%rsi,%rax,1),%r9 + andb $240,%bl + movq %r8,%rdx + jmp .Loop1 + +.align 16 +.Loop1: + shrq $4,%r8 + andq $15,%rdx + movq %r9,%r10 + movb (%rdi,%rcx,1),%al + shrq $4,%r9 + xorq 8(%rsi,%rbx,1),%r8 + shlq $60,%r10 + xorq (%rsi,%rbx,1),%r9 + movb %al,%bl + xorq (%r11,%rdx,8),%r9 + movq %r8,%rdx + shlb $4,%al + xorq %r10,%r8 + decq %rcx + js .Lbreak1 + + shrq $4,%r8 + andq $15,%rdx + movq %r9,%r10 + shrq $4,%r9 + xorq 8(%rsi,%rax,1),%r8 + shlq $60,%r10 + xorq (%rsi,%rax,1),%r9 + andb $240,%bl + xorq (%r11,%rdx,8),%r9 + movq %r8,%rdx + xorq %r10,%r8 + jmp .Loop1 + +.align 16 +.Lbreak1: + shrq $4,%r8 + andq $15,%rdx + movq %r9,%r10 + shrq $4,%r9 + xorq 8(%rsi,%rax,1),%r8 + shlq $60,%r10 + xorq (%rsi,%rax,1),%r9 + andb $240,%bl + xorq (%r11,%rdx,8),%r9 + movq %r8,%rdx + xorq %r10,%r8 + + shrq $4,%r8 + andq $15,%rdx + movq %r9,%r10 + shrq $4,%r9 + xorq 8(%rsi,%rbx,1),%r8 + shlq $60,%r10 + xorq (%rsi,%rbx,1),%r9 + xorq %r10,%r8 + xorq (%r11,%rdx,8),%r9 + + bswapq %r8 + bswapq %r9 + movq %r8,8(%rdi) + movq %r9,(%rdi) + + movq 16(%rsp),%rbx + leaq 24(%rsp),%rsp +.Lgmult_epilogue: + .byte 0xf3,0xc3 +.size gcm_gmult_4bit,.-gcm_gmult_4bit +.globl gcm_ghash_4bit +.hidden gcm_ghash_4bit +.type gcm_ghash_4bit,@function +.align 16 +gcm_ghash_4bit: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $280,%rsp +.Lghash_prologue: + movq %rdx,%r14 + movq %rcx,%r15 + subq $-128,%rsi + leaq 16+128(%rsp),%rbp + xorl %edx,%edx + movq 0+0-128(%rsi),%r8 + movq 0+8-128(%rsi),%rax + movb %al,%dl + shrq $4,%rax + movq %r8,%r10 + shrq $4,%r8 + movq 16+0-128(%rsi),%r9 + shlb $4,%dl + movq 16+8-128(%rsi),%rbx + shlq $60,%r10 + movb %dl,0(%rsp) + orq %r10,%rax + movb %bl,%dl + shrq $4,%rbx + movq %r9,%r10 + shrq $4,%r9 + movq %r8,0(%rbp) + movq 32+0-128(%rsi),%r8 + shlb $4,%dl + movq %rax,0-128(%rbp) + movq 32+8-128(%rsi),%rax + shlq $60,%r10 + movb %dl,1(%rsp) + orq %r10,%rbx + movb %al,%dl + shrq $4,%rax + movq %r8,%r10 + shrq $4,%r8 + movq %r9,8(%rbp) + movq 48+0-128(%rsi),%r9 + shlb $4,%dl + movq %rbx,8-128(%rbp) + movq 48+8-128(%rsi),%rbx + shlq $60,%r10 + movb %dl,2(%rsp) + orq %r10,%rax + movb %bl,%dl + shrq $4,%rbx + movq %r9,%r10 + shrq $4,%r9 + movq %r8,16(%rbp) + movq 64+0-128(%rsi),%r8 + shlb $4,%dl + movq %rax,16-128(%rbp) + movq 64+8-128(%rsi),%rax + shlq $60,%r10 + movb %dl,3(%rsp) + orq %r10,%rbx + movb %al,%dl + shrq $4,%rax + movq %r8,%r10 + shrq $4,%r8 + movq %r9,24(%rbp) + movq 80+0-128(%rsi),%r9 + shlb $4,%dl + movq %rbx,24-128(%rbp) + movq 80+8-128(%rsi),%rbx + shlq $60,%r10 + movb %dl,4(%rsp) + orq %r10,%rax + movb %bl,%dl + shrq $4,%rbx + movq %r9,%r10 + shrq $4,%r9 + movq %r8,32(%rbp) + movq 96+0-128(%rsi),%r8 + shlb $4,%dl + movq %rax,32-128(%rbp) + movq 96+8-128(%rsi),%rax + shlq $60,%r10 + movb %dl,5(%rsp) + orq %r10,%rbx + movb %al,%dl + shrq $4,%rax + movq %r8,%r10 + shrq $4,%r8 + movq %r9,40(%rbp) + movq 112+0-128(%rsi),%r9 + shlb $4,%dl + movq %rbx,40-128(%rbp) + movq 112+8-128(%rsi),%rbx + shlq $60,%r10 + movb %dl,6(%rsp) + orq %r10,%rax + movb %bl,%dl + shrq $4,%rbx + movq %r9,%r10 + shrq $4,%r9 + movq %r8,48(%rbp) + movq 128+0-128(%rsi),%r8 + shlb $4,%dl + movq %rax,48-128(%rbp) + movq 128+8-128(%rsi),%rax + shlq $60,%r10 + movb %dl,7(%rsp) + orq %r10,%rbx + movb %al,%dl + shrq $4,%rax + movq %r8,%r10 + shrq $4,%r8 + movq %r9,56(%rbp) + movq 144+0-128(%rsi),%r9 + shlb $4,%dl + movq %rbx,56-128(%rbp) + movq 144+8-128(%rsi),%rbx + shlq $60,%r10 + movb %dl,8(%rsp) + orq %r10,%rax + movb %bl,%dl + shrq $4,%rbx + movq %r9,%r10 + shrq $4,%r9 + movq %r8,64(%rbp) + movq 160+0-128(%rsi),%r8 + shlb $4,%dl + movq %rax,64-128(%rbp) + movq 160+8-128(%rsi),%rax + shlq $60,%r10 + movb %dl,9(%rsp) + orq %r10,%rbx + movb %al,%dl + shrq $4,%rax + movq %r8,%r10 + shrq $4,%r8 + movq %r9,72(%rbp) + movq 176+0-128(%rsi),%r9 + shlb $4,%dl + movq %rbx,72-128(%rbp) + movq 176+8-128(%rsi),%rbx + shlq $60,%r10 + movb %dl,10(%rsp) + orq %r10,%rax + movb %bl,%dl + shrq $4,%rbx + movq %r9,%r10 + shrq $4,%r9 + movq %r8,80(%rbp) + movq 192+0-128(%rsi),%r8 + shlb $4,%dl + movq %rax,80-128(%rbp) + movq 192+8-128(%rsi),%rax + shlq $60,%r10 + movb %dl,11(%rsp) + orq %r10,%rbx + movb %al,%dl + shrq $4,%rax + movq %r8,%r10 + shrq $4,%r8 + movq %r9,88(%rbp) + movq 208+0-128(%rsi),%r9 + shlb $4,%dl + movq %rbx,88-128(%rbp) + movq 208+8-128(%rsi),%rbx + shlq $60,%r10 + movb %dl,12(%rsp) + orq %r10,%rax + movb %bl,%dl + shrq $4,%rbx + movq %r9,%r10 + shrq $4,%r9 + movq %r8,96(%rbp) + movq 224+0-128(%rsi),%r8 + shlb $4,%dl + movq %rax,96-128(%rbp) + movq 224+8-128(%rsi),%rax + shlq $60,%r10 + movb %dl,13(%rsp) + orq %r10,%rbx + movb %al,%dl + shrq $4,%rax + movq %r8,%r10 + shrq $4,%r8 + movq %r9,104(%rbp) + movq 240+0-128(%rsi),%r9 + shlb $4,%dl + movq %rbx,104-128(%rbp) + movq 240+8-128(%rsi),%rbx + shlq $60,%r10 + movb %dl,14(%rsp) + orq %r10,%rax + movb %bl,%dl + shrq $4,%rbx + movq %r9,%r10 + shrq $4,%r9 + movq %r8,112(%rbp) + shlb $4,%dl + movq %rax,112-128(%rbp) + shlq $60,%r10 + movb %dl,15(%rsp) + orq %r10,%rbx + movq %r9,120(%rbp) + movq %rbx,120-128(%rbp) + addq $-128,%rsi + movq 8(%rdi),%r8 + movq 0(%rdi),%r9 + addq %r14,%r15 + leaq .Lrem_8bit(%rip),%r11 + jmp .Louter_loop +.align 16 +.Louter_loop: + xorq (%r14),%r9 + movq 8(%r14),%rdx + leaq 16(%r14),%r14 + xorq %r8,%rdx + movq %r9,(%rdi) + movq %rdx,8(%rdi) + shrq $32,%rdx + xorq %rax,%rax + roll $8,%edx + movb %dl,%al + movzbl %dl,%ebx + shlb $4,%al + shrl $4,%ebx + roll $8,%edx + movq 8(%rsi,%rax,1),%r8 + movq (%rsi,%rax,1),%r9 + movb %dl,%al + movzbl %dl,%ecx + shlb $4,%al + movzbq (%rsp,%rbx,1),%r12 + shrl $4,%ecx + xorq %r8,%r12 + movq %r9,%r10 + shrq $8,%r8 + movzbq %r12b,%r12 + shrq $8,%r9 + xorq -128(%rbp,%rbx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rbx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r12,2),%r12 + movzbl %dl,%ebx + shlb $4,%al + movzbq (%rsp,%rcx,1),%r13 + shrl $4,%ebx + shlq $48,%r12 + xorq %r8,%r13 + movq %r9,%r10 + xorq %r12,%r9 + shrq $8,%r8 + movzbq %r13b,%r13 + shrq $8,%r9 + xorq -128(%rbp,%rcx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rcx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r13,2),%r13 + movzbl %dl,%ecx + shlb $4,%al + movzbq (%rsp,%rbx,1),%r12 + shrl $4,%ecx + shlq $48,%r13 + xorq %r8,%r12 + movq %r9,%r10 + xorq %r13,%r9 + shrq $8,%r8 + movzbq %r12b,%r12 + movl 8(%rdi),%edx + shrq $8,%r9 + xorq -128(%rbp,%rbx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rbx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r12,2),%r12 + movzbl %dl,%ebx + shlb $4,%al + movzbq (%rsp,%rcx,1),%r13 + shrl $4,%ebx + shlq $48,%r12 + xorq %r8,%r13 + movq %r9,%r10 + xorq %r12,%r9 + shrq $8,%r8 + movzbq %r13b,%r13 + shrq $8,%r9 + xorq -128(%rbp,%rcx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rcx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r13,2),%r13 + movzbl %dl,%ecx + shlb $4,%al + movzbq (%rsp,%rbx,1),%r12 + shrl $4,%ecx + shlq $48,%r13 + xorq %r8,%r12 + movq %r9,%r10 + xorq %r13,%r9 + shrq $8,%r8 + movzbq %r12b,%r12 + shrq $8,%r9 + xorq -128(%rbp,%rbx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rbx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r12,2),%r12 + movzbl %dl,%ebx + shlb $4,%al + movzbq (%rsp,%rcx,1),%r13 + shrl $4,%ebx + shlq $48,%r12 + xorq %r8,%r13 + movq %r9,%r10 + xorq %r12,%r9 + shrq $8,%r8 + movzbq %r13b,%r13 + shrq $8,%r9 + xorq -128(%rbp,%rcx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rcx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r13,2),%r13 + movzbl %dl,%ecx + shlb $4,%al + movzbq (%rsp,%rbx,1),%r12 + shrl $4,%ecx + shlq $48,%r13 + xorq %r8,%r12 + movq %r9,%r10 + xorq %r13,%r9 + shrq $8,%r8 + movzbq %r12b,%r12 + movl 4(%rdi),%edx + shrq $8,%r9 + xorq -128(%rbp,%rbx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rbx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r12,2),%r12 + movzbl %dl,%ebx + shlb $4,%al + movzbq (%rsp,%rcx,1),%r13 + shrl $4,%ebx + shlq $48,%r12 + xorq %r8,%r13 + movq %r9,%r10 + xorq %r12,%r9 + shrq $8,%r8 + movzbq %r13b,%r13 + shrq $8,%r9 + xorq -128(%rbp,%rcx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rcx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r13,2),%r13 + movzbl %dl,%ecx + shlb $4,%al + movzbq (%rsp,%rbx,1),%r12 + shrl $4,%ecx + shlq $48,%r13 + xorq %r8,%r12 + movq %r9,%r10 + xorq %r13,%r9 + shrq $8,%r8 + movzbq %r12b,%r12 + shrq $8,%r9 + xorq -128(%rbp,%rbx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rbx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r12,2),%r12 + movzbl %dl,%ebx + shlb $4,%al + movzbq (%rsp,%rcx,1),%r13 + shrl $4,%ebx + shlq $48,%r12 + xorq %r8,%r13 + movq %r9,%r10 + xorq %r12,%r9 + shrq $8,%r8 + movzbq %r13b,%r13 + shrq $8,%r9 + xorq -128(%rbp,%rcx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rcx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r13,2),%r13 + movzbl %dl,%ecx + shlb $4,%al + movzbq (%rsp,%rbx,1),%r12 + shrl $4,%ecx + shlq $48,%r13 + xorq %r8,%r12 + movq %r9,%r10 + xorq %r13,%r9 + shrq $8,%r8 + movzbq %r12b,%r12 + movl 0(%rdi),%edx + shrq $8,%r9 + xorq -128(%rbp,%rbx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rbx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r12,2),%r12 + movzbl %dl,%ebx + shlb $4,%al + movzbq (%rsp,%rcx,1),%r13 + shrl $4,%ebx + shlq $48,%r12 + xorq %r8,%r13 + movq %r9,%r10 + xorq %r12,%r9 + shrq $8,%r8 + movzbq %r13b,%r13 + shrq $8,%r9 + xorq -128(%rbp,%rcx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rcx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r13,2),%r13 + movzbl %dl,%ecx + shlb $4,%al + movzbq (%rsp,%rbx,1),%r12 + shrl $4,%ecx + shlq $48,%r13 + xorq %r8,%r12 + movq %r9,%r10 + xorq %r13,%r9 + shrq $8,%r8 + movzbq %r12b,%r12 + shrq $8,%r9 + xorq -128(%rbp,%rbx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rbx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r12,2),%r12 + movzbl %dl,%ebx + shlb $4,%al + movzbq (%rsp,%rcx,1),%r13 + shrl $4,%ebx + shlq $48,%r12 + xorq %r8,%r13 + movq %r9,%r10 + xorq %r12,%r9 + shrq $8,%r8 + movzbq %r13b,%r13 + shrq $8,%r9 + xorq -128(%rbp,%rcx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rcx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r13,2),%r13 + movzbl %dl,%ecx + shlb $4,%al + movzbq (%rsp,%rbx,1),%r12 + andl $240,%ecx + shlq $48,%r13 + xorq %r8,%r12 + movq %r9,%r10 + xorq %r13,%r9 + shrq $8,%r8 + movzbq %r12b,%r12 + movl -4(%rdi),%edx + shrq $8,%r9 + xorq -128(%rbp,%rbx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rbx,8),%r9 + movzwq (%r11,%r12,2),%r12 + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + shlq $48,%r12 + xorq %r10,%r8 + xorq %r12,%r9 + movzbq %r8b,%r13 + shrq $4,%r8 + movq %r9,%r10 + shlb $4,%r13b + shrq $4,%r9 + xorq 8(%rsi,%rcx,1),%r8 + movzwq (%r11,%r13,2),%r13 + shlq $60,%r10 + xorq (%rsi,%rcx,1),%r9 + xorq %r10,%r8 + shlq $48,%r13 + bswapq %r8 + xorq %r13,%r9 + bswapq %r9 + cmpq %r15,%r14 + jb .Louter_loop + movq %r8,8(%rdi) + movq %r9,(%rdi) + + leaq 280(%rsp),%rsi + movq 0(%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +.Lghash_epilogue: + .byte 0xf3,0xc3 +.size gcm_ghash_4bit,.-gcm_ghash_4bit +.globl gcm_init_clmul +.hidden gcm_init_clmul +.type gcm_init_clmul,@function +.align 16 +gcm_init_clmul: +.L_init_clmul: + movdqu (%rsi),%xmm2 + pshufd $78,%xmm2,%xmm2 + + + pshufd $255,%xmm2,%xmm4 + movdqa %xmm2,%xmm3 + psllq $1,%xmm2 + pxor %xmm5,%xmm5 + psrlq $63,%xmm3 + pcmpgtd %xmm4,%xmm5 + pslldq $8,%xmm3 + por %xmm3,%xmm2 + + + pand .L0x1c2_polynomial(%rip),%xmm5 + pxor %xmm5,%xmm2 + + + pshufd $78,%xmm2,%xmm6 + movdqa %xmm2,%xmm0 + pxor %xmm2,%xmm6 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pxor %xmm0,%xmm3 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,222,0 + pxor %xmm0,%xmm3 + pxor %xmm1,%xmm3 + + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + + + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 + pshufd $78,%xmm2,%xmm3 + pshufd $78,%xmm0,%xmm4 + pxor %xmm2,%xmm3 + movdqu %xmm2,0(%rdi) + pxor %xmm0,%xmm4 + movdqu %xmm0,16(%rdi) +.byte 102,15,58,15,227,8 + movdqu %xmm4,32(%rdi) + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pxor %xmm0,%xmm3 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,222,0 + pxor %xmm0,%xmm3 + pxor %xmm1,%xmm3 + + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + + + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 + movdqa %xmm0,%xmm5 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pxor %xmm0,%xmm3 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,222,0 + pxor %xmm0,%xmm3 + pxor %xmm1,%xmm3 + + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + + + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 + pshufd $78,%xmm5,%xmm3 + pshufd $78,%xmm0,%xmm4 + pxor %xmm5,%xmm3 + movdqu %xmm5,48(%rdi) + pxor %xmm0,%xmm4 + movdqu %xmm0,64(%rdi) +.byte 102,15,58,15,227,8 + movdqu %xmm4,80(%rdi) + .byte 0xf3,0xc3 +.size gcm_init_clmul,.-gcm_init_clmul +.globl gcm_gmult_clmul +.hidden gcm_gmult_clmul +.type gcm_gmult_clmul,@function +.align 16 +gcm_gmult_clmul: +.L_gmult_clmul: + movdqu (%rdi),%xmm0 + movdqa .Lbswap_mask(%rip),%xmm5 + movdqu (%rsi),%xmm2 + movdqu 32(%rsi),%xmm4 +.byte 102,15,56,0,197 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pxor %xmm0,%xmm3 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,220,0 + pxor %xmm0,%xmm3 + pxor %xmm1,%xmm3 + + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + + + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 +.byte 102,15,56,0,197 + movdqu %xmm0,(%rdi) + .byte 0xf3,0xc3 +.size gcm_gmult_clmul,.-gcm_gmult_clmul +.globl gcm_ghash_clmul +.hidden gcm_ghash_clmul +.type gcm_ghash_clmul,@function +.align 32 +gcm_ghash_clmul: +.L_ghash_clmul: + movdqa .Lbswap_mask(%rip),%xmm10 + + movdqu (%rdi),%xmm0 + movdqu (%rsi),%xmm2 + movdqu 32(%rsi),%xmm7 +.byte 102,65,15,56,0,194 + + subq $16,%rcx + jz .Lodd_tail + + movdqu 16(%rsi),%xmm6 + movl OPENSSL_ia32cap_P+4(%rip),%eax + cmpq $48,%rcx + jb .Lskip4x + + andl $71303168,%eax + cmpl $4194304,%eax + je .Lskip4x + + subq $48,%rcx + movq $11547335547999543296,%rax + movdqu 48(%rsi),%xmm14 + movdqu 64(%rsi),%xmm15 + + + + + movdqu 48(%rdx),%xmm3 + movdqu 32(%rdx),%xmm11 +.byte 102,65,15,56,0,218 +.byte 102,69,15,56,0,218 + movdqa %xmm3,%xmm5 + pshufd $78,%xmm3,%xmm4 + pxor %xmm3,%xmm4 +.byte 102,15,58,68,218,0 +.byte 102,15,58,68,234,17 +.byte 102,15,58,68,231,0 + + movdqa %xmm11,%xmm13 + pshufd $78,%xmm11,%xmm12 + pxor %xmm11,%xmm12 +.byte 102,68,15,58,68,222,0 +.byte 102,68,15,58,68,238,17 +.byte 102,68,15,58,68,231,16 + xorps %xmm11,%xmm3 + xorps %xmm13,%xmm5 + movups 80(%rsi),%xmm7 + xorps %xmm12,%xmm4 + + movdqu 16(%rdx),%xmm11 + movdqu 0(%rdx),%xmm8 +.byte 102,69,15,56,0,218 +.byte 102,69,15,56,0,194 + movdqa %xmm11,%xmm13 + pshufd $78,%xmm11,%xmm12 + pxor %xmm8,%xmm0 + pxor %xmm11,%xmm12 +.byte 102,69,15,58,68,222,0 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm8 + pxor %xmm0,%xmm8 +.byte 102,69,15,58,68,238,17 +.byte 102,68,15,58,68,231,0 + xorps %xmm11,%xmm3 + xorps %xmm13,%xmm5 + + leaq 64(%rdx),%rdx + subq $64,%rcx + jc .Ltail4x + + jmp .Lmod4_loop +.align 32 +.Lmod4_loop: +.byte 102,65,15,58,68,199,0 + xorps %xmm12,%xmm4 + movdqu 48(%rdx),%xmm11 +.byte 102,69,15,56,0,218 +.byte 102,65,15,58,68,207,17 + xorps %xmm3,%xmm0 + movdqu 32(%rdx),%xmm3 + movdqa %xmm11,%xmm13 +.byte 102,68,15,58,68,199,16 + pshufd $78,%xmm11,%xmm12 + xorps %xmm5,%xmm1 + pxor %xmm11,%xmm12 +.byte 102,65,15,56,0,218 + movups 32(%rsi),%xmm7 + xorps %xmm4,%xmm8 +.byte 102,68,15,58,68,218,0 + pshufd $78,%xmm3,%xmm4 + + pxor %xmm0,%xmm8 + movdqa %xmm3,%xmm5 + pxor %xmm1,%xmm8 + pxor %xmm3,%xmm4 + movdqa %xmm8,%xmm9 +.byte 102,68,15,58,68,234,17 + pslldq $8,%xmm8 + psrldq $8,%xmm9 + pxor %xmm8,%xmm0 + movdqa .L7_mask(%rip),%xmm8 + pxor %xmm9,%xmm1 +.byte 102,76,15,110,200 + + pand %xmm0,%xmm8 +.byte 102,69,15,56,0,200 + pxor %xmm0,%xmm9 +.byte 102,68,15,58,68,231,0 + psllq $57,%xmm9 + movdqa %xmm9,%xmm8 + pslldq $8,%xmm9 +.byte 102,15,58,68,222,0 + psrldq $8,%xmm8 + pxor %xmm9,%xmm0 + pxor %xmm8,%xmm1 + movdqu 0(%rdx),%xmm8 + + movdqa %xmm0,%xmm9 + psrlq $1,%xmm0 +.byte 102,15,58,68,238,17 + xorps %xmm11,%xmm3 + movdqu 16(%rdx),%xmm11 +.byte 102,69,15,56,0,218 +.byte 102,15,58,68,231,16 + xorps %xmm13,%xmm5 + movups 80(%rsi),%xmm7 +.byte 102,69,15,56,0,194 + pxor %xmm9,%xmm1 + pxor %xmm0,%xmm9 + psrlq $5,%xmm0 + + movdqa %xmm11,%xmm13 + pxor %xmm12,%xmm4 + pshufd $78,%xmm11,%xmm12 + pxor %xmm9,%xmm0 + pxor %xmm8,%xmm1 + pxor %xmm11,%xmm12 +.byte 102,69,15,58,68,222,0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 + movdqa %xmm0,%xmm1 +.byte 102,69,15,58,68,238,17 + xorps %xmm11,%xmm3 + pshufd $78,%xmm0,%xmm8 + pxor %xmm0,%xmm8 + +.byte 102,68,15,58,68,231,0 + xorps %xmm13,%xmm5 + + leaq 64(%rdx),%rdx + subq $64,%rcx + jnc .Lmod4_loop + +.Ltail4x: +.byte 102,65,15,58,68,199,0 +.byte 102,65,15,58,68,207,17 +.byte 102,68,15,58,68,199,16 + xorps %xmm12,%xmm4 + xorps %xmm3,%xmm0 + xorps %xmm5,%xmm1 + pxor %xmm0,%xmm1 + pxor %xmm4,%xmm8 + + pxor %xmm1,%xmm8 + pxor %xmm0,%xmm1 + + movdqa %xmm8,%xmm9 + psrldq $8,%xmm8 + pslldq $8,%xmm9 + pxor %xmm8,%xmm1 + pxor %xmm9,%xmm0 + + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + + + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 + addq $64,%rcx + jz .Ldone + movdqu 32(%rsi),%xmm7 + subq $16,%rcx + jz .Lodd_tail +.Lskip4x: + + + + + + movdqu (%rdx),%xmm8 + movdqu 16(%rdx),%xmm3 +.byte 102,69,15,56,0,194 +.byte 102,65,15,56,0,218 + pxor %xmm8,%xmm0 + + movdqa %xmm3,%xmm5 + pshufd $78,%xmm3,%xmm4 + pxor %xmm3,%xmm4 +.byte 102,15,58,68,218,0 +.byte 102,15,58,68,234,17 +.byte 102,15,58,68,231,0 + + leaq 32(%rdx),%rdx + nop + subq $32,%rcx + jbe .Leven_tail + nop + jmp .Lmod_loop + +.align 32 +.Lmod_loop: + movdqa %xmm0,%xmm1 + movdqa %xmm4,%xmm8 + pshufd $78,%xmm0,%xmm4 + pxor %xmm0,%xmm4 + +.byte 102,15,58,68,198,0 +.byte 102,15,58,68,206,17 +.byte 102,15,58,68,231,16 + + pxor %xmm3,%xmm0 + pxor %xmm5,%xmm1 + movdqu (%rdx),%xmm9 + pxor %xmm0,%xmm8 +.byte 102,69,15,56,0,202 + movdqu 16(%rdx),%xmm3 + + pxor %xmm1,%xmm8 + pxor %xmm9,%xmm1 + pxor %xmm8,%xmm4 +.byte 102,65,15,56,0,218 + movdqa %xmm4,%xmm8 + psrldq $8,%xmm8 + pslldq $8,%xmm4 + pxor %xmm8,%xmm1 + pxor %xmm4,%xmm0 + + movdqa %xmm3,%xmm5 + + movdqa %xmm0,%xmm9 + movdqa %xmm0,%xmm8 + psllq $5,%xmm0 + pxor %xmm0,%xmm8 +.byte 102,15,58,68,218,0 + psllq $1,%xmm0 + pxor %xmm8,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm8 + pslldq $8,%xmm0 + psrldq $8,%xmm8 + pxor %xmm9,%xmm0 + pshufd $78,%xmm5,%xmm4 + pxor %xmm8,%xmm1 + pxor %xmm5,%xmm4 + + movdqa %xmm0,%xmm9 + psrlq $1,%xmm0 +.byte 102,15,58,68,234,17 + pxor %xmm9,%xmm1 + pxor %xmm0,%xmm9 + psrlq $5,%xmm0 + pxor %xmm9,%xmm0 + leaq 32(%rdx),%rdx + psrlq $1,%xmm0 +.byte 102,15,58,68,231,0 + pxor %xmm1,%xmm0 + + subq $32,%rcx + ja .Lmod_loop + +.Leven_tail: + movdqa %xmm0,%xmm1 + movdqa %xmm4,%xmm8 + pshufd $78,%xmm0,%xmm4 + pxor %xmm0,%xmm4 + +.byte 102,15,58,68,198,0 +.byte 102,15,58,68,206,17 +.byte 102,15,58,68,231,16 + + pxor %xmm3,%xmm0 + pxor %xmm5,%xmm1 + pxor %xmm0,%xmm8 + pxor %xmm1,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm8 + psrldq $8,%xmm8 + pslldq $8,%xmm4 + pxor %xmm8,%xmm1 + pxor %xmm4,%xmm0 + + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + + + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 + testq %rcx,%rcx + jnz .Ldone + +.Lodd_tail: + movdqu (%rdx),%xmm8 +.byte 102,69,15,56,0,194 + pxor %xmm8,%xmm0 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pxor %xmm0,%xmm3 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,223,0 + pxor %xmm0,%xmm3 + pxor %xmm1,%xmm3 + + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + + + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 +.Ldone: +.byte 102,65,15,56,0,194 + movdqu %xmm0,(%rdi) + .byte 0xf3,0xc3 +.size gcm_ghash_clmul,.-gcm_ghash_clmul +.globl gcm_init_avx +.hidden gcm_init_avx +.type gcm_init_avx,@function +.align 32 +gcm_init_avx: + jmp .L_init_clmul +.size gcm_init_avx,.-gcm_init_avx +.globl gcm_gmult_avx +.hidden gcm_gmult_avx +.type gcm_gmult_avx,@function +.align 32 +gcm_gmult_avx: + jmp .L_gmult_clmul +.size gcm_gmult_avx,.-gcm_gmult_avx +.globl gcm_ghash_avx +.hidden gcm_ghash_avx +.type gcm_ghash_avx,@function +.align 32 +gcm_ghash_avx: + jmp .L_ghash_clmul +.size gcm_ghash_avx,.-gcm_ghash_avx +.align 64 +.Lbswap_mask: +.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +.L0x1c2_polynomial: +.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2 +.L7_mask: +.long 7,0,7,0 +.L7_mask_poly: +.long 7,0,450,0 +.align 64 +.type .Lrem_4bit,@object +.Lrem_4bit: +.long 0,0,0,471859200,0,943718400,0,610271232 +.long 0,1887436800,0,1822425088,0,1220542464,0,1423966208 +.long 0,3774873600,0,4246732800,0,3644850176,0,3311403008 +.long 0,2441084928,0,2376073216,0,2847932416,0,3051356160 +.type .Lrem_8bit,@object +.Lrem_8bit: +.value 0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E +.value 0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E +.value 0x1C20,0x1DE2,0x1FA4,0x1E66,0x1B28,0x1AEA,0x18AC,0x196E +.value 0x1230,0x13F2,0x11B4,0x1076,0x1538,0x14FA,0x16BC,0x177E +.value 0x3840,0x3982,0x3BC4,0x3A06,0x3F48,0x3E8A,0x3CCC,0x3D0E +.value 0x3650,0x3792,0x35D4,0x3416,0x3158,0x309A,0x32DC,0x331E +.value 0x2460,0x25A2,0x27E4,0x2626,0x2368,0x22AA,0x20EC,0x212E +.value 0x2A70,0x2BB2,0x29F4,0x2836,0x2D78,0x2CBA,0x2EFC,0x2F3E +.value 0x7080,0x7142,0x7304,0x72C6,0x7788,0x764A,0x740C,0x75CE +.value 0x7E90,0x7F52,0x7D14,0x7CD6,0x7998,0x785A,0x7A1C,0x7BDE +.value 0x6CA0,0x6D62,0x6F24,0x6EE6,0x6BA8,0x6A6A,0x682C,0x69EE +.value 0x62B0,0x6372,0x6134,0x60F6,0x65B8,0x647A,0x663C,0x67FE +.value 0x48C0,0x4902,0x4B44,0x4A86,0x4FC8,0x4E0A,0x4C4C,0x4D8E +.value 0x46D0,0x4712,0x4554,0x4496,0x41D8,0x401A,0x425C,0x439E +.value 0x54E0,0x5522,0x5764,0x56A6,0x53E8,0x522A,0x506C,0x51AE +.value 0x5AF0,0x5B32,0x5974,0x58B6,0x5DF8,0x5C3A,0x5E7C,0x5FBE +.value 0xE100,0xE0C2,0xE284,0xE346,0xE608,0xE7CA,0xE58C,0xE44E +.value 0xEF10,0xEED2,0xEC94,0xED56,0xE818,0xE9DA,0xEB9C,0xEA5E +.value 0xFD20,0xFCE2,0xFEA4,0xFF66,0xFA28,0xFBEA,0xF9AC,0xF86E +.value 0xF330,0xF2F2,0xF0B4,0xF176,0xF438,0xF5FA,0xF7BC,0xF67E +.value 0xD940,0xD882,0xDAC4,0xDB06,0xDE48,0xDF8A,0xDDCC,0xDC0E +.value 0xD750,0xD692,0xD4D4,0xD516,0xD058,0xD19A,0xD3DC,0xD21E +.value 0xC560,0xC4A2,0xC6E4,0xC726,0xC268,0xC3AA,0xC1EC,0xC02E +.value 0xCB70,0xCAB2,0xC8F4,0xC936,0xCC78,0xCDBA,0xCFFC,0xCE3E +.value 0x9180,0x9042,0x9204,0x93C6,0x9688,0x974A,0x950C,0x94CE +.value 0x9F90,0x9E52,0x9C14,0x9DD6,0x9898,0x995A,0x9B1C,0x9ADE +.value 0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE +.value 0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE +.value 0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E +.value 0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E +.value 0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE +.value 0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE + +.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 64 +#endif diff --git a/third_party/boringssl/linux-x86_64/crypto/rand/rdrand-x86_64.S b/third_party/boringssl/linux-x86_64/crypto/rand/rdrand-x86_64.S new file mode 100644 index 00000000000..94aab9c19b7 --- /dev/null +++ b/third_party/boringssl/linux-x86_64/crypto/rand/rdrand-x86_64.S @@ -0,0 +1,48 @@ +#if defined(__x86_64__) +.text + + + + +.globl CRYPTO_rdrand +.hidden CRYPTO_rdrand +.type CRYPTO_rdrand,@function +.align 16 +CRYPTO_rdrand: + xorq %rax,%rax + + +.byte 0x48, 0x0f, 0xc7, 0xf1 + + adcq %rax,%rax + movq %rcx,0(%rdi) + .byte 0xf3,0xc3 + + + + + +.globl CRYPTO_rdrand_multiple8_buf +.hidden CRYPTO_rdrand_multiple8_buf +.type CRYPTO_rdrand_multiple8_buf,@function +.align 16 +CRYPTO_rdrand_multiple8_buf: + testq %rsi,%rsi + jz .Lout + movq $8,%rdx +.Lloop: + + +.byte 0x48, 0x0f, 0xc7, 0xf1 + jnc .Lerr + movq %rcx,0(%rdi) + addq %rdx,%rdi + subq %rdx,%rsi + jnz .Lloop +.Lout: + movq $1,%rax + .byte 0xf3,0xc3 +.Lerr: + xorq %rax,%rax + .byte 0xf3,0xc3 +#endif diff --git a/third_party/boringssl/linux-x86_64/crypto/rc4/rc4-md5-x86_64.S b/third_party/boringssl/linux-x86_64/crypto/rc4/rc4-md5-x86_64.S new file mode 100644 index 00000000000..06c8d672ab1 --- /dev/null +++ b/third_party/boringssl/linux-x86_64/crypto/rc4/rc4-md5-x86_64.S @@ -0,0 +1,1262 @@ +#if defined(__x86_64__) +.text +.align 16 + +.globl rc4_md5_enc +.hidden rc4_md5_enc +.type rc4_md5_enc,@function +rc4_md5_enc: + cmpq $0,%r9 + je .Labort + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $40,%rsp +.Lbody: + movq %rcx,%r11 + movq %r9,%r12 + movq %rsi,%r13 + movq %rdx,%r14 + movq %r8,%r15 + xorq %rbp,%rbp + xorq %rcx,%rcx + + leaq 8(%rdi),%rdi + movb -8(%rdi),%bpl + movb -4(%rdi),%cl + + incb %bpl + subq %r13,%r14 + movl (%rdi,%rbp,4),%eax + addb %al,%cl + leaq (%rdi,%rbp,4),%rsi + shlq $6,%r12 + addq %r15,%r12 + movq %r12,16(%rsp) + + movq %r11,24(%rsp) + movl 0(%r11),%r8d + movl 4(%r11),%r9d + movl 8(%r11),%r10d + movl 12(%r11),%r11d + jmp .Loop + +.align 16 +.Loop: + movl %r8d,0(%rsp) + movl %r9d,4(%rsp) + movl %r10d,8(%rsp) + movl %r11d,%r12d + movl %r11d,12(%rsp) + pxor %xmm0,%xmm0 + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r9d,%r12d + addl 0(%r15),%r8d + addb %dl,%al + movl 4(%rsi),%ebx + addl $3614090360,%r8d + xorl %r11d,%r12d + movzbl %al,%eax + movl %edx,0(%rsi) + addl %r12d,%r8d + addb %bl,%cl + roll $7,%r8d + movl %r10d,%r12d + movd (%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + pxor %xmm1,%xmm1 + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r8d,%r12d + addl 4(%r15),%r11d + addb %dl,%bl + movl 8(%rsi),%eax + addl $3905402710,%r11d + xorl %r10d,%r12d + movzbl %bl,%ebx + movl %edx,4(%rsi) + addl %r12d,%r11d + addb %al,%cl + roll $12,%r11d + movl %r9d,%r12d + movd (%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r11d,%r12d + addl 8(%r15),%r10d + addb %dl,%al + movl 12(%rsi),%ebx + addl $606105819,%r10d + xorl %r9d,%r12d + movzbl %al,%eax + movl %edx,8(%rsi) + addl %r12d,%r10d + addb %bl,%cl + roll $17,%r10d + movl %r8d,%r12d + pinsrw $1,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r10d,%r12d + addl 12(%r15),%r9d + addb %dl,%bl + movl 16(%rsi),%eax + addl $3250441966,%r9d + xorl %r8d,%r12d + movzbl %bl,%ebx + movl %edx,12(%rsi) + addl %r12d,%r9d + addb %al,%cl + roll $22,%r9d + movl %r11d,%r12d + pinsrw $1,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r9d,%r12d + addl 16(%r15),%r8d + addb %dl,%al + movl 20(%rsi),%ebx + addl $4118548399,%r8d + xorl %r11d,%r12d + movzbl %al,%eax + movl %edx,16(%rsi) + addl %r12d,%r8d + addb %bl,%cl + roll $7,%r8d + movl %r10d,%r12d + pinsrw $2,(%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r8d,%r12d + addl 20(%r15),%r11d + addb %dl,%bl + movl 24(%rsi),%eax + addl $1200080426,%r11d + xorl %r10d,%r12d + movzbl %bl,%ebx + movl %edx,20(%rsi) + addl %r12d,%r11d + addb %al,%cl + roll $12,%r11d + movl %r9d,%r12d + pinsrw $2,(%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r11d,%r12d + addl 24(%r15),%r10d + addb %dl,%al + movl 28(%rsi),%ebx + addl $2821735955,%r10d + xorl %r9d,%r12d + movzbl %al,%eax + movl %edx,24(%rsi) + addl %r12d,%r10d + addb %bl,%cl + roll $17,%r10d + movl %r8d,%r12d + pinsrw $3,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r10d,%r12d + addl 28(%r15),%r9d + addb %dl,%bl + movl 32(%rsi),%eax + addl $4249261313,%r9d + xorl %r8d,%r12d + movzbl %bl,%ebx + movl %edx,28(%rsi) + addl %r12d,%r9d + addb %al,%cl + roll $22,%r9d + movl %r11d,%r12d + pinsrw $3,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r9d,%r12d + addl 32(%r15),%r8d + addb %dl,%al + movl 36(%rsi),%ebx + addl $1770035416,%r8d + xorl %r11d,%r12d + movzbl %al,%eax + movl %edx,32(%rsi) + addl %r12d,%r8d + addb %bl,%cl + roll $7,%r8d + movl %r10d,%r12d + pinsrw $4,(%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r8d,%r12d + addl 36(%r15),%r11d + addb %dl,%bl + movl 40(%rsi),%eax + addl $2336552879,%r11d + xorl %r10d,%r12d + movzbl %bl,%ebx + movl %edx,36(%rsi) + addl %r12d,%r11d + addb %al,%cl + roll $12,%r11d + movl %r9d,%r12d + pinsrw $4,(%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r11d,%r12d + addl 40(%r15),%r10d + addb %dl,%al + movl 44(%rsi),%ebx + addl $4294925233,%r10d + xorl %r9d,%r12d + movzbl %al,%eax + movl %edx,40(%rsi) + addl %r12d,%r10d + addb %bl,%cl + roll $17,%r10d + movl %r8d,%r12d + pinsrw $5,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r10d,%r12d + addl 44(%r15),%r9d + addb %dl,%bl + movl 48(%rsi),%eax + addl $2304563134,%r9d + xorl %r8d,%r12d + movzbl %bl,%ebx + movl %edx,44(%rsi) + addl %r12d,%r9d + addb %al,%cl + roll $22,%r9d + movl %r11d,%r12d + pinsrw $5,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r9d,%r12d + addl 48(%r15),%r8d + addb %dl,%al + movl 52(%rsi),%ebx + addl $1804603682,%r8d + xorl %r11d,%r12d + movzbl %al,%eax + movl %edx,48(%rsi) + addl %r12d,%r8d + addb %bl,%cl + roll $7,%r8d + movl %r10d,%r12d + pinsrw $6,(%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r8d,%r12d + addl 52(%r15),%r11d + addb %dl,%bl + movl 56(%rsi),%eax + addl $4254626195,%r11d + xorl %r10d,%r12d + movzbl %bl,%ebx + movl %edx,52(%rsi) + addl %r12d,%r11d + addb %al,%cl + roll $12,%r11d + movl %r9d,%r12d + pinsrw $6,(%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r11d,%r12d + addl 56(%r15),%r10d + addb %dl,%al + movl 60(%rsi),%ebx + addl $2792965006,%r10d + xorl %r9d,%r12d + movzbl %al,%eax + movl %edx,56(%rsi) + addl %r12d,%r10d + addb %bl,%cl + roll $17,%r10d + movl %r8d,%r12d + pinsrw $7,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movdqu (%r13),%xmm2 + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r10d,%r12d + addl 60(%r15),%r9d + addb %dl,%bl + movl 64(%rsi),%eax + addl $1236535329,%r9d + xorl %r8d,%r12d + movzbl %bl,%ebx + movl %edx,60(%rsi) + addl %r12d,%r9d + addb %al,%cl + roll $22,%r9d + movl %r10d,%r12d + pinsrw $7,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + psllq $8,%xmm1 + pxor %xmm0,%xmm2 + pxor %xmm1,%xmm2 + pxor %xmm0,%xmm0 + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r11d,%r12d + addl 4(%r15),%r8d + addb %dl,%al + movl 68(%rsi),%ebx + addl $4129170786,%r8d + xorl %r10d,%r12d + movzbl %al,%eax + movl %edx,64(%rsi) + addl %r12d,%r8d + addb %bl,%cl + roll $5,%r8d + movl %r9d,%r12d + movd (%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + pxor %xmm1,%xmm1 + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r10d,%r12d + addl 24(%r15),%r11d + addb %dl,%bl + movl 72(%rsi),%eax + addl $3225465664,%r11d + xorl %r9d,%r12d + movzbl %bl,%ebx + movl %edx,68(%rsi) + addl %r12d,%r11d + addb %al,%cl + roll $9,%r11d + movl %r8d,%r12d + movd (%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r9d,%r12d + addl 44(%r15),%r10d + addb %dl,%al + movl 76(%rsi),%ebx + addl $643717713,%r10d + xorl %r8d,%r12d + movzbl %al,%eax + movl %edx,72(%rsi) + addl %r12d,%r10d + addb %bl,%cl + roll $14,%r10d + movl %r11d,%r12d + pinsrw $1,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r8d,%r12d + addl 0(%r15),%r9d + addb %dl,%bl + movl 80(%rsi),%eax + addl $3921069994,%r9d + xorl %r11d,%r12d + movzbl %bl,%ebx + movl %edx,76(%rsi) + addl %r12d,%r9d + addb %al,%cl + roll $20,%r9d + movl %r10d,%r12d + pinsrw $1,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r11d,%r12d + addl 20(%r15),%r8d + addb %dl,%al + movl 84(%rsi),%ebx + addl $3593408605,%r8d + xorl %r10d,%r12d + movzbl %al,%eax + movl %edx,80(%rsi) + addl %r12d,%r8d + addb %bl,%cl + roll $5,%r8d + movl %r9d,%r12d + pinsrw $2,(%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r10d,%r12d + addl 40(%r15),%r11d + addb %dl,%bl + movl 88(%rsi),%eax + addl $38016083,%r11d + xorl %r9d,%r12d + movzbl %bl,%ebx + movl %edx,84(%rsi) + addl %r12d,%r11d + addb %al,%cl + roll $9,%r11d + movl %r8d,%r12d + pinsrw $2,(%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r9d,%r12d + addl 60(%r15),%r10d + addb %dl,%al + movl 92(%rsi),%ebx + addl $3634488961,%r10d + xorl %r8d,%r12d + movzbl %al,%eax + movl %edx,88(%rsi) + addl %r12d,%r10d + addb %bl,%cl + roll $14,%r10d + movl %r11d,%r12d + pinsrw $3,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r8d,%r12d + addl 16(%r15),%r9d + addb %dl,%bl + movl 96(%rsi),%eax + addl $3889429448,%r9d + xorl %r11d,%r12d + movzbl %bl,%ebx + movl %edx,92(%rsi) + addl %r12d,%r9d + addb %al,%cl + roll $20,%r9d + movl %r10d,%r12d + pinsrw $3,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r11d,%r12d + addl 36(%r15),%r8d + addb %dl,%al + movl 100(%rsi),%ebx + addl $568446438,%r8d + xorl %r10d,%r12d + movzbl %al,%eax + movl %edx,96(%rsi) + addl %r12d,%r8d + addb %bl,%cl + roll $5,%r8d + movl %r9d,%r12d + pinsrw $4,(%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r10d,%r12d + addl 56(%r15),%r11d + addb %dl,%bl + movl 104(%rsi),%eax + addl $3275163606,%r11d + xorl %r9d,%r12d + movzbl %bl,%ebx + movl %edx,100(%rsi) + addl %r12d,%r11d + addb %al,%cl + roll $9,%r11d + movl %r8d,%r12d + pinsrw $4,(%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r9d,%r12d + addl 12(%r15),%r10d + addb %dl,%al + movl 108(%rsi),%ebx + addl $4107603335,%r10d + xorl %r8d,%r12d + movzbl %al,%eax + movl %edx,104(%rsi) + addl %r12d,%r10d + addb %bl,%cl + roll $14,%r10d + movl %r11d,%r12d + pinsrw $5,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r8d,%r12d + addl 32(%r15),%r9d + addb %dl,%bl + movl 112(%rsi),%eax + addl $1163531501,%r9d + xorl %r11d,%r12d + movzbl %bl,%ebx + movl %edx,108(%rsi) + addl %r12d,%r9d + addb %al,%cl + roll $20,%r9d + movl %r10d,%r12d + pinsrw $5,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r11d,%r12d + addl 52(%r15),%r8d + addb %dl,%al + movl 116(%rsi),%ebx + addl $2850285829,%r8d + xorl %r10d,%r12d + movzbl %al,%eax + movl %edx,112(%rsi) + addl %r12d,%r8d + addb %bl,%cl + roll $5,%r8d + movl %r9d,%r12d + pinsrw $6,(%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r10d,%r12d + addl 8(%r15),%r11d + addb %dl,%bl + movl 120(%rsi),%eax + addl $4243563512,%r11d + xorl %r9d,%r12d + movzbl %bl,%ebx + movl %edx,116(%rsi) + addl %r12d,%r11d + addb %al,%cl + roll $9,%r11d + movl %r8d,%r12d + pinsrw $6,(%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r9d,%r12d + addl 28(%r15),%r10d + addb %dl,%al + movl 124(%rsi),%ebx + addl $1735328473,%r10d + xorl %r8d,%r12d + movzbl %al,%eax + movl %edx,120(%rsi) + addl %r12d,%r10d + addb %bl,%cl + roll $14,%r10d + movl %r11d,%r12d + pinsrw $7,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movdqu 16(%r13),%xmm3 + addb $32,%bpl + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r8d,%r12d + addl 48(%r15),%r9d + addb %dl,%bl + movl 0(%rdi,%rbp,4),%eax + addl $2368359562,%r9d + xorl %r11d,%r12d + movzbl %bl,%ebx + movl %edx,124(%rsi) + addl %r12d,%r9d + addb %al,%cl + roll $20,%r9d + movl %r11d,%r12d + pinsrw $7,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movq %rcx,%rsi + xorq %rcx,%rcx + movb %sil,%cl + leaq (%rdi,%rbp,4),%rsi + psllq $8,%xmm1 + pxor %xmm0,%xmm3 + pxor %xmm1,%xmm3 + pxor %xmm0,%xmm0 + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %eax,(%rdi,%rcx,4) + xorl %r9d,%r12d + addl 20(%r15),%r8d + addb %dl,%al + movl 4(%rsi),%ebx + addl $4294588738,%r8d + movzbl %al,%eax + addl %r12d,%r8d + movl %edx,0(%rsi) + addb %bl,%cl + roll $4,%r8d + movl %r10d,%r12d + movd (%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + pxor %xmm1,%xmm1 + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %ebx,(%rdi,%rcx,4) + xorl %r8d,%r12d + addl 32(%r15),%r11d + addb %dl,%bl + movl 8(%rsi),%eax + addl $2272392833,%r11d + movzbl %bl,%ebx + addl %r12d,%r11d + movl %edx,4(%rsi) + addb %al,%cl + roll $11,%r11d + movl %r9d,%r12d + movd (%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %eax,(%rdi,%rcx,4) + xorl %r11d,%r12d + addl 44(%r15),%r10d + addb %dl,%al + movl 12(%rsi),%ebx + addl $1839030562,%r10d + movzbl %al,%eax + addl %r12d,%r10d + movl %edx,8(%rsi) + addb %bl,%cl + roll $16,%r10d + movl %r8d,%r12d + pinsrw $1,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %ebx,(%rdi,%rcx,4) + xorl %r10d,%r12d + addl 56(%r15),%r9d + addb %dl,%bl + movl 16(%rsi),%eax + addl $4259657740,%r9d + movzbl %bl,%ebx + addl %r12d,%r9d + movl %edx,12(%rsi) + addb %al,%cl + roll $23,%r9d + movl %r11d,%r12d + pinsrw $1,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %eax,(%rdi,%rcx,4) + xorl %r9d,%r12d + addl 4(%r15),%r8d + addb %dl,%al + movl 20(%rsi),%ebx + addl $2763975236,%r8d + movzbl %al,%eax + addl %r12d,%r8d + movl %edx,16(%rsi) + addb %bl,%cl + roll $4,%r8d + movl %r10d,%r12d + pinsrw $2,(%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %ebx,(%rdi,%rcx,4) + xorl %r8d,%r12d + addl 16(%r15),%r11d + addb %dl,%bl + movl 24(%rsi),%eax + addl $1272893353,%r11d + movzbl %bl,%ebx + addl %r12d,%r11d + movl %edx,20(%rsi) + addb %al,%cl + roll $11,%r11d + movl %r9d,%r12d + pinsrw $2,(%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %eax,(%rdi,%rcx,4) + xorl %r11d,%r12d + addl 28(%r15),%r10d + addb %dl,%al + movl 28(%rsi),%ebx + addl $4139469664,%r10d + movzbl %al,%eax + addl %r12d,%r10d + movl %edx,24(%rsi) + addb %bl,%cl + roll $16,%r10d + movl %r8d,%r12d + pinsrw $3,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %ebx,(%rdi,%rcx,4) + xorl %r10d,%r12d + addl 40(%r15),%r9d + addb %dl,%bl + movl 32(%rsi),%eax + addl $3200236656,%r9d + movzbl %bl,%ebx + addl %r12d,%r9d + movl %edx,28(%rsi) + addb %al,%cl + roll $23,%r9d + movl %r11d,%r12d + pinsrw $3,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %eax,(%rdi,%rcx,4) + xorl %r9d,%r12d + addl 52(%r15),%r8d + addb %dl,%al + movl 36(%rsi),%ebx + addl $681279174,%r8d + movzbl %al,%eax + addl %r12d,%r8d + movl %edx,32(%rsi) + addb %bl,%cl + roll $4,%r8d + movl %r10d,%r12d + pinsrw $4,(%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %ebx,(%rdi,%rcx,4) + xorl %r8d,%r12d + addl 0(%r15),%r11d + addb %dl,%bl + movl 40(%rsi),%eax + addl $3936430074,%r11d + movzbl %bl,%ebx + addl %r12d,%r11d + movl %edx,36(%rsi) + addb %al,%cl + roll $11,%r11d + movl %r9d,%r12d + pinsrw $4,(%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %eax,(%rdi,%rcx,4) + xorl %r11d,%r12d + addl 12(%r15),%r10d + addb %dl,%al + movl 44(%rsi),%ebx + addl $3572445317,%r10d + movzbl %al,%eax + addl %r12d,%r10d + movl %edx,40(%rsi) + addb %bl,%cl + roll $16,%r10d + movl %r8d,%r12d + pinsrw $5,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %ebx,(%rdi,%rcx,4) + xorl %r10d,%r12d + addl 24(%r15),%r9d + addb %dl,%bl + movl 48(%rsi),%eax + addl $76029189,%r9d + movzbl %bl,%ebx + addl %r12d,%r9d + movl %edx,44(%rsi) + addb %al,%cl + roll $23,%r9d + movl %r11d,%r12d + pinsrw $5,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %eax,(%rdi,%rcx,4) + xorl %r9d,%r12d + addl 36(%r15),%r8d + addb %dl,%al + movl 52(%rsi),%ebx + addl $3654602809,%r8d + movzbl %al,%eax + addl %r12d,%r8d + movl %edx,48(%rsi) + addb %bl,%cl + roll $4,%r8d + movl %r10d,%r12d + pinsrw $6,(%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %ebx,(%rdi,%rcx,4) + xorl %r8d,%r12d + addl 48(%r15),%r11d + addb %dl,%bl + movl 56(%rsi),%eax + addl $3873151461,%r11d + movzbl %bl,%ebx + addl %r12d,%r11d + movl %edx,52(%rsi) + addb %al,%cl + roll $11,%r11d + movl %r9d,%r12d + pinsrw $6,(%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %eax,(%rdi,%rcx,4) + xorl %r11d,%r12d + addl 60(%r15),%r10d + addb %dl,%al + movl 60(%rsi),%ebx + addl $530742520,%r10d + movzbl %al,%eax + addl %r12d,%r10d + movl %edx,56(%rsi) + addb %bl,%cl + roll $16,%r10d + movl %r8d,%r12d + pinsrw $7,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movdqu 32(%r13),%xmm4 + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %ebx,(%rdi,%rcx,4) + xorl %r10d,%r12d + addl 8(%r15),%r9d + addb %dl,%bl + movl 64(%rsi),%eax + addl $3299628645,%r9d + movzbl %bl,%ebx + addl %r12d,%r9d + movl %edx,60(%rsi) + addb %al,%cl + roll $23,%r9d + movl $-1,%r12d + pinsrw $7,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + psllq $8,%xmm1 + pxor %xmm0,%xmm4 + pxor %xmm1,%xmm4 + pxor %xmm0,%xmm0 + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %eax,(%rdi,%rcx,4) + orl %r9d,%r12d + addl 0(%r15),%r8d + addb %dl,%al + movl 68(%rsi),%ebx + addl $4096336452,%r8d + movzbl %al,%eax + xorl %r10d,%r12d + movl %edx,64(%rsi) + addl %r12d,%r8d + addb %bl,%cl + roll $6,%r8d + movl $-1,%r12d + movd (%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + pxor %xmm1,%xmm1 + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %ebx,(%rdi,%rcx,4) + orl %r8d,%r12d + addl 28(%r15),%r11d + addb %dl,%bl + movl 72(%rsi),%eax + addl $1126891415,%r11d + movzbl %bl,%ebx + xorl %r9d,%r12d + movl %edx,68(%rsi) + addl %r12d,%r11d + addb %al,%cl + roll $10,%r11d + movl $-1,%r12d + movd (%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %eax,(%rdi,%rcx,4) + orl %r11d,%r12d + addl 56(%r15),%r10d + addb %dl,%al + movl 76(%rsi),%ebx + addl $2878612391,%r10d + movzbl %al,%eax + xorl %r8d,%r12d + movl %edx,72(%rsi) + addl %r12d,%r10d + addb %bl,%cl + roll $15,%r10d + movl $-1,%r12d + pinsrw $1,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %ebx,(%rdi,%rcx,4) + orl %r10d,%r12d + addl 20(%r15),%r9d + addb %dl,%bl + movl 80(%rsi),%eax + addl $4237533241,%r9d + movzbl %bl,%ebx + xorl %r11d,%r12d + movl %edx,76(%rsi) + addl %r12d,%r9d + addb %al,%cl + roll $21,%r9d + movl $-1,%r12d + pinsrw $1,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %eax,(%rdi,%rcx,4) + orl %r9d,%r12d + addl 48(%r15),%r8d + addb %dl,%al + movl 84(%rsi),%ebx + addl $1700485571,%r8d + movzbl %al,%eax + xorl %r10d,%r12d + movl %edx,80(%rsi) + addl %r12d,%r8d + addb %bl,%cl + roll $6,%r8d + movl $-1,%r12d + pinsrw $2,(%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %ebx,(%rdi,%rcx,4) + orl %r8d,%r12d + addl 12(%r15),%r11d + addb %dl,%bl + movl 88(%rsi),%eax + addl $2399980690,%r11d + movzbl %bl,%ebx + xorl %r9d,%r12d + movl %edx,84(%rsi) + addl %r12d,%r11d + addb %al,%cl + roll $10,%r11d + movl $-1,%r12d + pinsrw $2,(%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %eax,(%rdi,%rcx,4) + orl %r11d,%r12d + addl 40(%r15),%r10d + addb %dl,%al + movl 92(%rsi),%ebx + addl $4293915773,%r10d + movzbl %al,%eax + xorl %r8d,%r12d + movl %edx,88(%rsi) + addl %r12d,%r10d + addb %bl,%cl + roll $15,%r10d + movl $-1,%r12d + pinsrw $3,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %ebx,(%rdi,%rcx,4) + orl %r10d,%r12d + addl 4(%r15),%r9d + addb %dl,%bl + movl 96(%rsi),%eax + addl $2240044497,%r9d + movzbl %bl,%ebx + xorl %r11d,%r12d + movl %edx,92(%rsi) + addl %r12d,%r9d + addb %al,%cl + roll $21,%r9d + movl $-1,%r12d + pinsrw $3,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %eax,(%rdi,%rcx,4) + orl %r9d,%r12d + addl 32(%r15),%r8d + addb %dl,%al + movl 100(%rsi),%ebx + addl $1873313359,%r8d + movzbl %al,%eax + xorl %r10d,%r12d + movl %edx,96(%rsi) + addl %r12d,%r8d + addb %bl,%cl + roll $6,%r8d + movl $-1,%r12d + pinsrw $4,(%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %ebx,(%rdi,%rcx,4) + orl %r8d,%r12d + addl 60(%r15),%r11d + addb %dl,%bl + movl 104(%rsi),%eax + addl $4264355552,%r11d + movzbl %bl,%ebx + xorl %r9d,%r12d + movl %edx,100(%rsi) + addl %r12d,%r11d + addb %al,%cl + roll $10,%r11d + movl $-1,%r12d + pinsrw $4,(%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %eax,(%rdi,%rcx,4) + orl %r11d,%r12d + addl 24(%r15),%r10d + addb %dl,%al + movl 108(%rsi),%ebx + addl $2734768916,%r10d + movzbl %al,%eax + xorl %r8d,%r12d + movl %edx,104(%rsi) + addl %r12d,%r10d + addb %bl,%cl + roll $15,%r10d + movl $-1,%r12d + pinsrw $5,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %ebx,(%rdi,%rcx,4) + orl %r10d,%r12d + addl 52(%r15),%r9d + addb %dl,%bl + movl 112(%rsi),%eax + addl $1309151649,%r9d + movzbl %bl,%ebx + xorl %r11d,%r12d + movl %edx,108(%rsi) + addl %r12d,%r9d + addb %al,%cl + roll $21,%r9d + movl $-1,%r12d + pinsrw $5,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %eax,(%rdi,%rcx,4) + orl %r9d,%r12d + addl 16(%r15),%r8d + addb %dl,%al + movl 116(%rsi),%ebx + addl $4149444226,%r8d + movzbl %al,%eax + xorl %r10d,%r12d + movl %edx,112(%rsi) + addl %r12d,%r8d + addb %bl,%cl + roll $6,%r8d + movl $-1,%r12d + pinsrw $6,(%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %ebx,(%rdi,%rcx,4) + orl %r8d,%r12d + addl 44(%r15),%r11d + addb %dl,%bl + movl 120(%rsi),%eax + addl $3174756917,%r11d + movzbl %bl,%ebx + xorl %r9d,%r12d + movl %edx,116(%rsi) + addl %r12d,%r11d + addb %al,%cl + roll $10,%r11d + movl $-1,%r12d + pinsrw $6,(%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %eax,(%rdi,%rcx,4) + orl %r11d,%r12d + addl 8(%r15),%r10d + addb %dl,%al + movl 124(%rsi),%ebx + addl $718787259,%r10d + movzbl %al,%eax + xorl %r8d,%r12d + movl %edx,120(%rsi) + addl %r12d,%r10d + addb %bl,%cl + roll $15,%r10d + movl $-1,%r12d + pinsrw $7,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movdqu 48(%r13),%xmm5 + addb $32,%bpl + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %ebx,(%rdi,%rcx,4) + orl %r10d,%r12d + addl 36(%r15),%r9d + addb %dl,%bl + movl 0(%rdi,%rbp,4),%eax + addl $3951481745,%r9d + movzbl %bl,%ebx + xorl %r11d,%r12d + movl %edx,124(%rsi) + addl %r12d,%r9d + addb %al,%cl + roll $21,%r9d + movl $-1,%r12d + pinsrw $7,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movq %rbp,%rsi + xorq %rbp,%rbp + movb %sil,%bpl + movq %rcx,%rsi + xorq %rcx,%rcx + movb %sil,%cl + leaq (%rdi,%rbp,4),%rsi + psllq $8,%xmm1 + pxor %xmm0,%xmm5 + pxor %xmm1,%xmm5 + addl 0(%rsp),%r8d + addl 4(%rsp),%r9d + addl 8(%rsp),%r10d + addl 12(%rsp),%r11d + + movdqu %xmm2,(%r14,%r13,1) + movdqu %xmm3,16(%r14,%r13,1) + movdqu %xmm4,32(%r14,%r13,1) + movdqu %xmm5,48(%r14,%r13,1) + leaq 64(%r15),%r15 + leaq 64(%r13),%r13 + cmpq 16(%rsp),%r15 + jb .Loop + + movq 24(%rsp),%r12 + subb %al,%cl + movl %r8d,0(%r12) + movl %r9d,4(%r12) + movl %r10d,8(%r12) + movl %r11d,12(%r12) + subb $1,%bpl + movl %ebp,-8(%rdi) + movl %ecx,-4(%rdi) + + movq 40(%rsp),%r15 + movq 48(%rsp),%r14 + movq 56(%rsp),%r13 + movq 64(%rsp),%r12 + movq 72(%rsp),%rbp + movq 80(%rsp),%rbx + leaq 88(%rsp),%rsp +.Lepilogue: +.Labort: + .byte 0xf3,0xc3 +.size rc4_md5_enc,.-rc4_md5_enc +#endif diff --git a/third_party/boringssl/linux-x86_64/crypto/rc4/rc4-x86_64.S b/third_party/boringssl/linux-x86_64/crypto/rc4/rc4-x86_64.S new file mode 100644 index 00000000000..c4d10024ade --- /dev/null +++ b/third_party/boringssl/linux-x86_64/crypto/rc4/rc4-x86_64.S @@ -0,0 +1,596 @@ +#if defined(__x86_64__) +.text +.extern OPENSSL_ia32cap_P +.hidden OPENSSL_ia32cap_P + +.globl asm_RC4 +.hidden asm_RC4 +.type asm_RC4,@function +.align 16 +asm_RC4: + orq %rsi,%rsi + jne .Lentry + .byte 0xf3,0xc3 +.Lentry: + pushq %rbx + pushq %r12 + pushq %r13 +.Lprologue: + movq %rsi,%r11 + movq %rdx,%r12 + movq %rcx,%r13 + xorq %r10,%r10 + xorq %rcx,%rcx + + leaq 8(%rdi),%rdi + movb -8(%rdi),%r10b + movb -4(%rdi),%cl + cmpl $-1,256(%rdi) + je .LRC4_CHAR + movl OPENSSL_ia32cap_P(%rip),%r8d + xorq %rbx,%rbx + incb %r10b + subq %r10,%rbx + subq %r12,%r13 + movl (%rdi,%r10,4),%eax + testq $-16,%r11 + jz .Lloop1 + btl $30,%r8d + jc .Lintel + andq $7,%rbx + leaq 1(%r10),%rsi + jz .Loop8 + subq %rbx,%r11 +.Loop8_warmup: + addb %al,%cl + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + movl %edx,(%rdi,%r10,4) + addb %dl,%al + incb %r10b + movl (%rdi,%rax,4),%edx + movl (%rdi,%r10,4),%eax + xorb (%r12),%dl + movb %dl,(%r12,%r13,1) + leaq 1(%r12),%r12 + decq %rbx + jnz .Loop8_warmup + + leaq 1(%r10),%rsi + jmp .Loop8 +.align 16 +.Loop8: + addb %al,%cl + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + movl 0(%rdi,%rsi,4),%ebx + rorq $8,%r8 + movl %edx,0(%rdi,%r10,4) + addb %al,%dl + movb (%rdi,%rdx,4),%r8b + addb %bl,%cl + movl (%rdi,%rcx,4),%edx + movl %ebx,(%rdi,%rcx,4) + movl 4(%rdi,%rsi,4),%eax + rorq $8,%r8 + movl %edx,4(%rdi,%r10,4) + addb %bl,%dl + movb (%rdi,%rdx,4),%r8b + addb %al,%cl + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + movl 8(%rdi,%rsi,4),%ebx + rorq $8,%r8 + movl %edx,8(%rdi,%r10,4) + addb %al,%dl + movb (%rdi,%rdx,4),%r8b + addb %bl,%cl + movl (%rdi,%rcx,4),%edx + movl %ebx,(%rdi,%rcx,4) + movl 12(%rdi,%rsi,4),%eax + rorq $8,%r8 + movl %edx,12(%rdi,%r10,4) + addb %bl,%dl + movb (%rdi,%rdx,4),%r8b + addb %al,%cl + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + movl 16(%rdi,%rsi,4),%ebx + rorq $8,%r8 + movl %edx,16(%rdi,%r10,4) + addb %al,%dl + movb (%rdi,%rdx,4),%r8b + addb %bl,%cl + movl (%rdi,%rcx,4),%edx + movl %ebx,(%rdi,%rcx,4) + movl 20(%rdi,%rsi,4),%eax + rorq $8,%r8 + movl %edx,20(%rdi,%r10,4) + addb %bl,%dl + movb (%rdi,%rdx,4),%r8b + addb %al,%cl + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + movl 24(%rdi,%rsi,4),%ebx + rorq $8,%r8 + movl %edx,24(%rdi,%r10,4) + addb %al,%dl + movb (%rdi,%rdx,4),%r8b + addb $8,%sil + addb %bl,%cl + movl (%rdi,%rcx,4),%edx + movl %ebx,(%rdi,%rcx,4) + movl -4(%rdi,%rsi,4),%eax + rorq $8,%r8 + movl %edx,28(%rdi,%r10,4) + addb %bl,%dl + movb (%rdi,%rdx,4),%r8b + addb $8,%r10b + rorq $8,%r8 + subq $8,%r11 + + xorq (%r12),%r8 + movq %r8,(%r12,%r13,1) + leaq 8(%r12),%r12 + + testq $-8,%r11 + jnz .Loop8 + cmpq $0,%r11 + jne .Lloop1 + jmp .Lexit + +.align 16 +.Lintel: + testq $-32,%r11 + jz .Lloop1 + andq $15,%rbx + jz .Loop16_is_hot + subq %rbx,%r11 +.Loop16_warmup: + addb %al,%cl + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + movl %edx,(%rdi,%r10,4) + addb %dl,%al + incb %r10b + movl (%rdi,%rax,4),%edx + movl (%rdi,%r10,4),%eax + xorb (%r12),%dl + movb %dl,(%r12,%r13,1) + leaq 1(%r12),%r12 + decq %rbx + jnz .Loop16_warmup + + movq %rcx,%rbx + xorq %rcx,%rcx + movb %bl,%cl + +.Loop16_is_hot: + leaq (%rdi,%r10,4),%rsi + addb %al,%cl + movl (%rdi,%rcx,4),%edx + pxor %xmm0,%xmm0 + movl %eax,(%rdi,%rcx,4) + addb %dl,%al + movl 4(%rsi),%ebx + movzbl %al,%eax + movl %edx,0(%rsi) + addb %bl,%cl + pinsrw $0,(%rdi,%rax,4),%xmm0 + jmp .Loop16_enter +.align 16 +.Loop16: + addb %al,%cl + movl (%rdi,%rcx,4),%edx + pxor %xmm0,%xmm2 + psllq $8,%xmm1 + pxor %xmm0,%xmm0 + movl %eax,(%rdi,%rcx,4) + addb %dl,%al + movl 4(%rsi),%ebx + movzbl %al,%eax + movl %edx,0(%rsi) + pxor %xmm1,%xmm2 + addb %bl,%cl + pinsrw $0,(%rdi,%rax,4),%xmm0 + movdqu %xmm2,(%r12,%r13,1) + leaq 16(%r12),%r12 +.Loop16_enter: + movl (%rdi,%rcx,4),%edx + pxor %xmm1,%xmm1 + movl %ebx,(%rdi,%rcx,4) + addb %dl,%bl + movl 8(%rsi),%eax + movzbl %bl,%ebx + movl %edx,4(%rsi) + addb %al,%cl + pinsrw $0,(%rdi,%rbx,4),%xmm1 + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + addb %dl,%al + movl 12(%rsi),%ebx + movzbl %al,%eax + movl %edx,8(%rsi) + addb %bl,%cl + pinsrw $1,(%rdi,%rax,4),%xmm0 + movl (%rdi,%rcx,4),%edx + movl %ebx,(%rdi,%rcx,4) + addb %dl,%bl + movl 16(%rsi),%eax + movzbl %bl,%ebx + movl %edx,12(%rsi) + addb %al,%cl + pinsrw $1,(%rdi,%rbx,4),%xmm1 + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + addb %dl,%al + movl 20(%rsi),%ebx + movzbl %al,%eax + movl %edx,16(%rsi) + addb %bl,%cl + pinsrw $2,(%rdi,%rax,4),%xmm0 + movl (%rdi,%rcx,4),%edx + movl %ebx,(%rdi,%rcx,4) + addb %dl,%bl + movl 24(%rsi),%eax + movzbl %bl,%ebx + movl %edx,20(%rsi) + addb %al,%cl + pinsrw $2,(%rdi,%rbx,4),%xmm1 + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + addb %dl,%al + movl 28(%rsi),%ebx + movzbl %al,%eax + movl %edx,24(%rsi) + addb %bl,%cl + pinsrw $3,(%rdi,%rax,4),%xmm0 + movl (%rdi,%rcx,4),%edx + movl %ebx,(%rdi,%rcx,4) + addb %dl,%bl + movl 32(%rsi),%eax + movzbl %bl,%ebx + movl %edx,28(%rsi) + addb %al,%cl + pinsrw $3,(%rdi,%rbx,4),%xmm1 + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + addb %dl,%al + movl 36(%rsi),%ebx + movzbl %al,%eax + movl %edx,32(%rsi) + addb %bl,%cl + pinsrw $4,(%rdi,%rax,4),%xmm0 + movl (%rdi,%rcx,4),%edx + movl %ebx,(%rdi,%rcx,4) + addb %dl,%bl + movl 40(%rsi),%eax + movzbl %bl,%ebx + movl %edx,36(%rsi) + addb %al,%cl + pinsrw $4,(%rdi,%rbx,4),%xmm1 + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + addb %dl,%al + movl 44(%rsi),%ebx + movzbl %al,%eax + movl %edx,40(%rsi) + addb %bl,%cl + pinsrw $5,(%rdi,%rax,4),%xmm0 + movl (%rdi,%rcx,4),%edx + movl %ebx,(%rdi,%rcx,4) + addb %dl,%bl + movl 48(%rsi),%eax + movzbl %bl,%ebx + movl %edx,44(%rsi) + addb %al,%cl + pinsrw $5,(%rdi,%rbx,4),%xmm1 + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + addb %dl,%al + movl 52(%rsi),%ebx + movzbl %al,%eax + movl %edx,48(%rsi) + addb %bl,%cl + pinsrw $6,(%rdi,%rax,4),%xmm0 + movl (%rdi,%rcx,4),%edx + movl %ebx,(%rdi,%rcx,4) + addb %dl,%bl + movl 56(%rsi),%eax + movzbl %bl,%ebx + movl %edx,52(%rsi) + addb %al,%cl + pinsrw $6,(%rdi,%rbx,4),%xmm1 + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + addb %dl,%al + movl 60(%rsi),%ebx + movzbl %al,%eax + movl %edx,56(%rsi) + addb %bl,%cl + pinsrw $7,(%rdi,%rax,4),%xmm0 + addb $16,%r10b + movdqu (%r12),%xmm2 + movl (%rdi,%rcx,4),%edx + movl %ebx,(%rdi,%rcx,4) + addb %dl,%bl + movzbl %bl,%ebx + movl %edx,60(%rsi) + leaq (%rdi,%r10,4),%rsi + pinsrw $7,(%rdi,%rbx,4),%xmm1 + movl (%rsi),%eax + movq %rcx,%rbx + xorq %rcx,%rcx + subq $16,%r11 + movb %bl,%cl + testq $-16,%r11 + jnz .Loop16 + + psllq $8,%xmm1 + pxor %xmm0,%xmm2 + pxor %xmm1,%xmm2 + movdqu %xmm2,(%r12,%r13,1) + leaq 16(%r12),%r12 + + cmpq $0,%r11 + jne .Lloop1 + jmp .Lexit + +.align 16 +.Lloop1: + addb %al,%cl + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + movl %edx,(%rdi,%r10,4) + addb %dl,%al + incb %r10b + movl (%rdi,%rax,4),%edx + movl (%rdi,%r10,4),%eax + xorb (%r12),%dl + movb %dl,(%r12,%r13,1) + leaq 1(%r12),%r12 + decq %r11 + jnz .Lloop1 + jmp .Lexit + +.align 16 +.LRC4_CHAR: + addb $1,%r10b + movzbl (%rdi,%r10,1),%eax + testq $-8,%r11 + jz .Lcloop1 + jmp .Lcloop8 +.align 16 +.Lcloop8: + movl (%r12),%r8d + movl 4(%r12),%r9d + addb %al,%cl + leaq 1(%r10),%rsi + movzbl (%rdi,%rcx,1),%edx + movzbl %sil,%esi + movzbl (%rdi,%rsi,1),%ebx + movb %al,(%rdi,%rcx,1) + cmpq %rsi,%rcx + movb %dl,(%rdi,%r10,1) + jne .Lcmov0 + movq %rax,%rbx +.Lcmov0: + addb %al,%dl + xorb (%rdi,%rdx,1),%r8b + rorl $8,%r8d + addb %bl,%cl + leaq 1(%rsi),%r10 + movzbl (%rdi,%rcx,1),%edx + movzbl %r10b,%r10d + movzbl (%rdi,%r10,1),%eax + movb %bl,(%rdi,%rcx,1) + cmpq %r10,%rcx + movb %dl,(%rdi,%rsi,1) + jne .Lcmov1 + movq %rbx,%rax +.Lcmov1: + addb %bl,%dl + xorb (%rdi,%rdx,1),%r8b + rorl $8,%r8d + addb %al,%cl + leaq 1(%r10),%rsi + movzbl (%rdi,%rcx,1),%edx + movzbl %sil,%esi + movzbl (%rdi,%rsi,1),%ebx + movb %al,(%rdi,%rcx,1) + cmpq %rsi,%rcx + movb %dl,(%rdi,%r10,1) + jne .Lcmov2 + movq %rax,%rbx +.Lcmov2: + addb %al,%dl + xorb (%rdi,%rdx,1),%r8b + rorl $8,%r8d + addb %bl,%cl + leaq 1(%rsi),%r10 + movzbl (%rdi,%rcx,1),%edx + movzbl %r10b,%r10d + movzbl (%rdi,%r10,1),%eax + movb %bl,(%rdi,%rcx,1) + cmpq %r10,%rcx + movb %dl,(%rdi,%rsi,1) + jne .Lcmov3 + movq %rbx,%rax +.Lcmov3: + addb %bl,%dl + xorb (%rdi,%rdx,1),%r8b + rorl $8,%r8d + addb %al,%cl + leaq 1(%r10),%rsi + movzbl (%rdi,%rcx,1),%edx + movzbl %sil,%esi + movzbl (%rdi,%rsi,1),%ebx + movb %al,(%rdi,%rcx,1) + cmpq %rsi,%rcx + movb %dl,(%rdi,%r10,1) + jne .Lcmov4 + movq %rax,%rbx +.Lcmov4: + addb %al,%dl + xorb (%rdi,%rdx,1),%r9b + rorl $8,%r9d + addb %bl,%cl + leaq 1(%rsi),%r10 + movzbl (%rdi,%rcx,1),%edx + movzbl %r10b,%r10d + movzbl (%rdi,%r10,1),%eax + movb %bl,(%rdi,%rcx,1) + cmpq %r10,%rcx + movb %dl,(%rdi,%rsi,1) + jne .Lcmov5 + movq %rbx,%rax +.Lcmov5: + addb %bl,%dl + xorb (%rdi,%rdx,1),%r9b + rorl $8,%r9d + addb %al,%cl + leaq 1(%r10),%rsi + movzbl (%rdi,%rcx,1),%edx + movzbl %sil,%esi + movzbl (%rdi,%rsi,1),%ebx + movb %al,(%rdi,%rcx,1) + cmpq %rsi,%rcx + movb %dl,(%rdi,%r10,1) + jne .Lcmov6 + movq %rax,%rbx +.Lcmov6: + addb %al,%dl + xorb (%rdi,%rdx,1),%r9b + rorl $8,%r9d + addb %bl,%cl + leaq 1(%rsi),%r10 + movzbl (%rdi,%rcx,1),%edx + movzbl %r10b,%r10d + movzbl (%rdi,%r10,1),%eax + movb %bl,(%rdi,%rcx,1) + cmpq %r10,%rcx + movb %dl,(%rdi,%rsi,1) + jne .Lcmov7 + movq %rbx,%rax +.Lcmov7: + addb %bl,%dl + xorb (%rdi,%rdx,1),%r9b + rorl $8,%r9d + leaq -8(%r11),%r11 + movl %r8d,(%r13) + leaq 8(%r12),%r12 + movl %r9d,4(%r13) + leaq 8(%r13),%r13 + + testq $-8,%r11 + jnz .Lcloop8 + cmpq $0,%r11 + jne .Lcloop1 + jmp .Lexit +.align 16 +.Lcloop1: + addb %al,%cl + movzbl %cl,%ecx + movzbl (%rdi,%rcx,1),%edx + movb %al,(%rdi,%rcx,1) + movb %dl,(%rdi,%r10,1) + addb %al,%dl + addb $1,%r10b + movzbl %dl,%edx + movzbl %r10b,%r10d + movzbl (%rdi,%rdx,1),%edx + movzbl (%rdi,%r10,1),%eax + xorb (%r12),%dl + leaq 1(%r12),%r12 + movb %dl,(%r13) + leaq 1(%r13),%r13 + subq $1,%r11 + jnz .Lcloop1 + jmp .Lexit + +.align 16 +.Lexit: + subb $1,%r10b + movl %r10d,-8(%rdi) + movl %ecx,-4(%rdi) + + movq (%rsp),%r13 + movq 8(%rsp),%r12 + movq 16(%rsp),%rbx + addq $24,%rsp +.Lepilogue: + .byte 0xf3,0xc3 +.size asm_RC4,.-asm_RC4 +.globl asm_RC4_set_key +.hidden asm_RC4_set_key +.type asm_RC4_set_key,@function +.align 16 +asm_RC4_set_key: + leaq 8(%rdi),%rdi + leaq (%rdx,%rsi,1),%rdx + negq %rsi + movq %rsi,%rcx + xorl %eax,%eax + xorq %r9,%r9 + xorq %r10,%r10 + xorq %r11,%r11 + + movl OPENSSL_ia32cap_P(%rip),%r8d + btl $20,%r8d + jc .Lc1stloop + jmp .Lw1stloop + +.align 16 +.Lw1stloop: + movl %eax,(%rdi,%rax,4) + addb $1,%al + jnc .Lw1stloop + + xorq %r9,%r9 + xorq %r8,%r8 +.align 16 +.Lw2ndloop: + movl (%rdi,%r9,4),%r10d + addb (%rdx,%rsi,1),%r8b + addb %r10b,%r8b + addq $1,%rsi + movl (%rdi,%r8,4),%r11d + cmovzq %rcx,%rsi + movl %r10d,(%rdi,%r8,4) + movl %r11d,(%rdi,%r9,4) + addb $1,%r9b + jnc .Lw2ndloop + jmp .Lexit_key + +.align 16 +.Lc1stloop: + movb %al,(%rdi,%rax,1) + addb $1,%al + jnc .Lc1stloop + + xorq %r9,%r9 + xorq %r8,%r8 +.align 16 +.Lc2ndloop: + movb (%rdi,%r9,1),%r10b + addb (%rdx,%rsi,1),%r8b + addb %r10b,%r8b + addq $1,%rsi + movb (%rdi,%r8,1),%r11b + jnz .Lcnowrap + movq %rcx,%rsi +.Lcnowrap: + movb %r10b,(%rdi,%r8,1) + movb %r11b,(%rdi,%r9,1) + addb $1,%r9b + jnc .Lc2ndloop + movl $-1,256(%rdi) + +.align 16 +.Lexit_key: + xorl %eax,%eax + movl %eax,-8(%rdi) + movl %eax,-4(%rdi) + .byte 0xf3,0xc3 +.size asm_RC4_set_key,.-asm_RC4_set_key +#endif diff --git a/third_party/boringssl/linux-x86_64/crypto/sha/sha1-x86_64.S b/third_party/boringssl/linux-x86_64/crypto/sha/sha1-x86_64.S new file mode 100644 index 00000000000..7668c2b1f59 --- /dev/null +++ b/third_party/boringssl/linux-x86_64/crypto/sha/sha1-x86_64.S @@ -0,0 +1,2426 @@ +#if defined(__x86_64__) +.text +.extern OPENSSL_ia32cap_P +.hidden OPENSSL_ia32cap_P + +.globl sha1_block_data_order +.hidden sha1_block_data_order +.type sha1_block_data_order,@function +.align 16 +sha1_block_data_order: + movl OPENSSL_ia32cap_P+0(%rip),%r9d + movl OPENSSL_ia32cap_P+4(%rip),%r8d + movl OPENSSL_ia32cap_P+8(%rip),%r10d + testl $512,%r8d + jz .Lialu + jmp _ssse3_shortcut + +.align 16 +.Lialu: + movq %rsp,%rax + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + movq %rdi,%r8 + subq $72,%rsp + movq %rsi,%r9 + andq $-64,%rsp + movq %rdx,%r10 + movq %rax,64(%rsp) +.Lprologue: + + movl 0(%r8),%esi + movl 4(%r8),%edi + movl 8(%r8),%r11d + movl 12(%r8),%r12d + movl 16(%r8),%r13d + jmp .Lloop + +.align 16 +.Lloop: + movl 0(%r9),%edx + bswapl %edx + movl 4(%r9),%ebp + movl %r12d,%eax + movl %edx,0(%rsp) + movl %esi,%ecx + bswapl %ebp + xorl %r11d,%eax + roll $5,%ecx + andl %edi,%eax + leal 1518500249(%rdx,%r13,1),%r13d + addl %ecx,%r13d + xorl %r12d,%eax + roll $30,%edi + addl %eax,%r13d + movl 8(%r9),%r14d + movl %r11d,%eax + movl %ebp,4(%rsp) + movl %r13d,%ecx + bswapl %r14d + xorl %edi,%eax + roll $5,%ecx + andl %esi,%eax + leal 1518500249(%rbp,%r12,1),%r12d + addl %ecx,%r12d + xorl %r11d,%eax + roll $30,%esi + addl %eax,%r12d + movl 12(%r9),%edx + movl %edi,%eax + movl %r14d,8(%rsp) + movl %r12d,%ecx + bswapl %edx + xorl %esi,%eax + roll $5,%ecx + andl %r13d,%eax + leal 1518500249(%r14,%r11,1),%r11d + addl %ecx,%r11d + xorl %edi,%eax + roll $30,%r13d + addl %eax,%r11d + movl 16(%r9),%ebp + movl %esi,%eax + movl %edx,12(%rsp) + movl %r11d,%ecx + bswapl %ebp + xorl %r13d,%eax + roll $5,%ecx + andl %r12d,%eax + leal 1518500249(%rdx,%rdi,1),%edi + addl %ecx,%edi + xorl %esi,%eax + roll $30,%r12d + addl %eax,%edi + movl 20(%r9),%r14d + movl %r13d,%eax + movl %ebp,16(%rsp) + movl %edi,%ecx + bswapl %r14d + xorl %r12d,%eax + roll $5,%ecx + andl %r11d,%eax + leal 1518500249(%rbp,%rsi,1),%esi + addl %ecx,%esi + xorl %r13d,%eax + roll $30,%r11d + addl %eax,%esi + movl 24(%r9),%edx + movl %r12d,%eax + movl %r14d,20(%rsp) + movl %esi,%ecx + bswapl %edx + xorl %r11d,%eax + roll $5,%ecx + andl %edi,%eax + leal 1518500249(%r14,%r13,1),%r13d + addl %ecx,%r13d + xorl %r12d,%eax + roll $30,%edi + addl %eax,%r13d + movl 28(%r9),%ebp + movl %r11d,%eax + movl %edx,24(%rsp) + movl %r13d,%ecx + bswapl %ebp + xorl %edi,%eax + roll $5,%ecx + andl %esi,%eax + leal 1518500249(%rdx,%r12,1),%r12d + addl %ecx,%r12d + xorl %r11d,%eax + roll $30,%esi + addl %eax,%r12d + movl 32(%r9),%r14d + movl %edi,%eax + movl %ebp,28(%rsp) + movl %r12d,%ecx + bswapl %r14d + xorl %esi,%eax + roll $5,%ecx + andl %r13d,%eax + leal 1518500249(%rbp,%r11,1),%r11d + addl %ecx,%r11d + xorl %edi,%eax + roll $30,%r13d + addl %eax,%r11d + movl 36(%r9),%edx + movl %esi,%eax + movl %r14d,32(%rsp) + movl %r11d,%ecx + bswapl %edx + xorl %r13d,%eax + roll $5,%ecx + andl %r12d,%eax + leal 1518500249(%r14,%rdi,1),%edi + addl %ecx,%edi + xorl %esi,%eax + roll $30,%r12d + addl %eax,%edi + movl 40(%r9),%ebp + movl %r13d,%eax + movl %edx,36(%rsp) + movl %edi,%ecx + bswapl %ebp + xorl %r12d,%eax + roll $5,%ecx + andl %r11d,%eax + leal 1518500249(%rdx,%rsi,1),%esi + addl %ecx,%esi + xorl %r13d,%eax + roll $30,%r11d + addl %eax,%esi + movl 44(%r9),%r14d + movl %r12d,%eax + movl %ebp,40(%rsp) + movl %esi,%ecx + bswapl %r14d + xorl %r11d,%eax + roll $5,%ecx + andl %edi,%eax + leal 1518500249(%rbp,%r13,1),%r13d + addl %ecx,%r13d + xorl %r12d,%eax + roll $30,%edi + addl %eax,%r13d + movl 48(%r9),%edx + movl %r11d,%eax + movl %r14d,44(%rsp) + movl %r13d,%ecx + bswapl %edx + xorl %edi,%eax + roll $5,%ecx + andl %esi,%eax + leal 1518500249(%r14,%r12,1),%r12d + addl %ecx,%r12d + xorl %r11d,%eax + roll $30,%esi + addl %eax,%r12d + movl 52(%r9),%ebp + movl %edi,%eax + movl %edx,48(%rsp) + movl %r12d,%ecx + bswapl %ebp + xorl %esi,%eax + roll $5,%ecx + andl %r13d,%eax + leal 1518500249(%rdx,%r11,1),%r11d + addl %ecx,%r11d + xorl %edi,%eax + roll $30,%r13d + addl %eax,%r11d + movl 56(%r9),%r14d + movl %esi,%eax + movl %ebp,52(%rsp) + movl %r11d,%ecx + bswapl %r14d + xorl %r13d,%eax + roll $5,%ecx + andl %r12d,%eax + leal 1518500249(%rbp,%rdi,1),%edi + addl %ecx,%edi + xorl %esi,%eax + roll $30,%r12d + addl %eax,%edi + movl 60(%r9),%edx + movl %r13d,%eax + movl %r14d,56(%rsp) + movl %edi,%ecx + bswapl %edx + xorl %r12d,%eax + roll $5,%ecx + andl %r11d,%eax + leal 1518500249(%r14,%rsi,1),%esi + addl %ecx,%esi + xorl %r13d,%eax + roll $30,%r11d + addl %eax,%esi + xorl 0(%rsp),%ebp + movl %r12d,%eax + movl %edx,60(%rsp) + movl %esi,%ecx + xorl 8(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + xorl 32(%rsp),%ebp + andl %edi,%eax + leal 1518500249(%rdx,%r13,1),%r13d + roll $30,%edi + xorl %r12d,%eax + addl %ecx,%r13d + roll $1,%ebp + addl %eax,%r13d + xorl 4(%rsp),%r14d + movl %r11d,%eax + movl %ebp,0(%rsp) + movl %r13d,%ecx + xorl 12(%rsp),%r14d + xorl %edi,%eax + roll $5,%ecx + xorl 36(%rsp),%r14d + andl %esi,%eax + leal 1518500249(%rbp,%r12,1),%r12d + roll $30,%esi + xorl %r11d,%eax + addl %ecx,%r12d + roll $1,%r14d + addl %eax,%r12d + xorl 8(%rsp),%edx + movl %edi,%eax + movl %r14d,4(%rsp) + movl %r12d,%ecx + xorl 16(%rsp),%edx + xorl %esi,%eax + roll $5,%ecx + xorl 40(%rsp),%edx + andl %r13d,%eax + leal 1518500249(%r14,%r11,1),%r11d + roll $30,%r13d + xorl %edi,%eax + addl %ecx,%r11d + roll $1,%edx + addl %eax,%r11d + xorl 12(%rsp),%ebp + movl %esi,%eax + movl %edx,8(%rsp) + movl %r11d,%ecx + xorl 20(%rsp),%ebp + xorl %r13d,%eax + roll $5,%ecx + xorl 44(%rsp),%ebp + andl %r12d,%eax + leal 1518500249(%rdx,%rdi,1),%edi + roll $30,%r12d + xorl %esi,%eax + addl %ecx,%edi + roll $1,%ebp + addl %eax,%edi + xorl 16(%rsp),%r14d + movl %r13d,%eax + movl %ebp,12(%rsp) + movl %edi,%ecx + xorl 24(%rsp),%r14d + xorl %r12d,%eax + roll $5,%ecx + xorl 48(%rsp),%r14d + andl %r11d,%eax + leal 1518500249(%rbp,%rsi,1),%esi + roll $30,%r11d + xorl %r13d,%eax + addl %ecx,%esi + roll $1,%r14d + addl %eax,%esi + xorl 20(%rsp),%edx + movl %edi,%eax + movl %r14d,16(%rsp) + movl %esi,%ecx + xorl 28(%rsp),%edx + xorl %r12d,%eax + roll $5,%ecx + xorl 52(%rsp),%edx + leal 1859775393(%r14,%r13,1),%r13d + xorl %r11d,%eax + addl %ecx,%r13d + roll $30,%edi + addl %eax,%r13d + roll $1,%edx + xorl 24(%rsp),%ebp + movl %esi,%eax + movl %edx,20(%rsp) + movl %r13d,%ecx + xorl 32(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + xorl 56(%rsp),%ebp + leal 1859775393(%rdx,%r12,1),%r12d + xorl %edi,%eax + addl %ecx,%r12d + roll $30,%esi + addl %eax,%r12d + roll $1,%ebp + xorl 28(%rsp),%r14d + movl %r13d,%eax + movl %ebp,24(%rsp) + movl %r12d,%ecx + xorl 36(%rsp),%r14d + xorl %edi,%eax + roll $5,%ecx + xorl 60(%rsp),%r14d + leal 1859775393(%rbp,%r11,1),%r11d + xorl %esi,%eax + addl %ecx,%r11d + roll $30,%r13d + addl %eax,%r11d + roll $1,%r14d + xorl 32(%rsp),%edx + movl %r12d,%eax + movl %r14d,28(%rsp) + movl %r11d,%ecx + xorl 40(%rsp),%edx + xorl %esi,%eax + roll $5,%ecx + xorl 0(%rsp),%edx + leal 1859775393(%r14,%rdi,1),%edi + xorl %r13d,%eax + addl %ecx,%edi + roll $30,%r12d + addl %eax,%edi + roll $1,%edx + xorl 36(%rsp),%ebp + movl %r11d,%eax + movl %edx,32(%rsp) + movl %edi,%ecx + xorl 44(%rsp),%ebp + xorl %r13d,%eax + roll $5,%ecx + xorl 4(%rsp),%ebp + leal 1859775393(%rdx,%rsi,1),%esi + xorl %r12d,%eax + addl %ecx,%esi + roll $30,%r11d + addl %eax,%esi + roll $1,%ebp + xorl 40(%rsp),%r14d + movl %edi,%eax + movl %ebp,36(%rsp) + movl %esi,%ecx + xorl 48(%rsp),%r14d + xorl %r12d,%eax + roll $5,%ecx + xorl 8(%rsp),%r14d + leal 1859775393(%rbp,%r13,1),%r13d + xorl %r11d,%eax + addl %ecx,%r13d + roll $30,%edi + addl %eax,%r13d + roll $1,%r14d + xorl 44(%rsp),%edx + movl %esi,%eax + movl %r14d,40(%rsp) + movl %r13d,%ecx + xorl 52(%rsp),%edx + xorl %r11d,%eax + roll $5,%ecx + xorl 12(%rsp),%edx + leal 1859775393(%r14,%r12,1),%r12d + xorl %edi,%eax + addl %ecx,%r12d + roll $30,%esi + addl %eax,%r12d + roll $1,%edx + xorl 48(%rsp),%ebp + movl %r13d,%eax + movl %edx,44(%rsp) + movl %r12d,%ecx + xorl 56(%rsp),%ebp + xorl %edi,%eax + roll $5,%ecx + xorl 16(%rsp),%ebp + leal 1859775393(%rdx,%r11,1),%r11d + xorl %esi,%eax + addl %ecx,%r11d + roll $30,%r13d + addl %eax,%r11d + roll $1,%ebp + xorl 52(%rsp),%r14d + movl %r12d,%eax + movl %ebp,48(%rsp) + movl %r11d,%ecx + xorl 60(%rsp),%r14d + xorl %esi,%eax + roll $5,%ecx + xorl 20(%rsp),%r14d + leal 1859775393(%rbp,%rdi,1),%edi + xorl %r13d,%eax + addl %ecx,%edi + roll $30,%r12d + addl %eax,%edi + roll $1,%r14d + xorl 56(%rsp),%edx + movl %r11d,%eax + movl %r14d,52(%rsp) + movl %edi,%ecx + xorl 0(%rsp),%edx + xorl %r13d,%eax + roll $5,%ecx + xorl 24(%rsp),%edx + leal 1859775393(%r14,%rsi,1),%esi + xorl %r12d,%eax + addl %ecx,%esi + roll $30,%r11d + addl %eax,%esi + roll $1,%edx + xorl 60(%rsp),%ebp + movl %edi,%eax + movl %edx,56(%rsp) + movl %esi,%ecx + xorl 4(%rsp),%ebp + xorl %r12d,%eax + roll $5,%ecx + xorl 28(%rsp),%ebp + leal 1859775393(%rdx,%r13,1),%r13d + xorl %r11d,%eax + addl %ecx,%r13d + roll $30,%edi + addl %eax,%r13d + roll $1,%ebp + xorl 0(%rsp),%r14d + movl %esi,%eax + movl %ebp,60(%rsp) + movl %r13d,%ecx + xorl 8(%rsp),%r14d + xorl %r11d,%eax + roll $5,%ecx + xorl 32(%rsp),%r14d + leal 1859775393(%rbp,%r12,1),%r12d + xorl %edi,%eax + addl %ecx,%r12d + roll $30,%esi + addl %eax,%r12d + roll $1,%r14d + xorl 4(%rsp),%edx + movl %r13d,%eax + movl %r14d,0(%rsp) + movl %r12d,%ecx + xorl 12(%rsp),%edx + xorl %edi,%eax + roll $5,%ecx + xorl 36(%rsp),%edx + leal 1859775393(%r14,%r11,1),%r11d + xorl %esi,%eax + addl %ecx,%r11d + roll $30,%r13d + addl %eax,%r11d + roll $1,%edx + xorl 8(%rsp),%ebp + movl %r12d,%eax + movl %edx,4(%rsp) + movl %r11d,%ecx + xorl 16(%rsp),%ebp + xorl %esi,%eax + roll $5,%ecx + xorl 40(%rsp),%ebp + leal 1859775393(%rdx,%rdi,1),%edi + xorl %r13d,%eax + addl %ecx,%edi + roll $30,%r12d + addl %eax,%edi + roll $1,%ebp + xorl 12(%rsp),%r14d + movl %r11d,%eax + movl %ebp,8(%rsp) + movl %edi,%ecx + xorl 20(%rsp),%r14d + xorl %r13d,%eax + roll $5,%ecx + xorl 44(%rsp),%r14d + leal 1859775393(%rbp,%rsi,1),%esi + xorl %r12d,%eax + addl %ecx,%esi + roll $30,%r11d + addl %eax,%esi + roll $1,%r14d + xorl 16(%rsp),%edx + movl %edi,%eax + movl %r14d,12(%rsp) + movl %esi,%ecx + xorl 24(%rsp),%edx + xorl %r12d,%eax + roll $5,%ecx + xorl 48(%rsp),%edx + leal 1859775393(%r14,%r13,1),%r13d + xorl %r11d,%eax + addl %ecx,%r13d + roll $30,%edi + addl %eax,%r13d + roll $1,%edx + xorl 20(%rsp),%ebp + movl %esi,%eax + movl %edx,16(%rsp) + movl %r13d,%ecx + xorl 28(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + xorl 52(%rsp),%ebp + leal 1859775393(%rdx,%r12,1),%r12d + xorl %edi,%eax + addl %ecx,%r12d + roll $30,%esi + addl %eax,%r12d + roll $1,%ebp + xorl 24(%rsp),%r14d + movl %r13d,%eax + movl %ebp,20(%rsp) + movl %r12d,%ecx + xorl 32(%rsp),%r14d + xorl %edi,%eax + roll $5,%ecx + xorl 56(%rsp),%r14d + leal 1859775393(%rbp,%r11,1),%r11d + xorl %esi,%eax + addl %ecx,%r11d + roll $30,%r13d + addl %eax,%r11d + roll $1,%r14d + xorl 28(%rsp),%edx + movl %r12d,%eax + movl %r14d,24(%rsp) + movl %r11d,%ecx + xorl 36(%rsp),%edx + xorl %esi,%eax + roll $5,%ecx + xorl 60(%rsp),%edx + leal 1859775393(%r14,%rdi,1),%edi + xorl %r13d,%eax + addl %ecx,%edi + roll $30,%r12d + addl %eax,%edi + roll $1,%edx + xorl 32(%rsp),%ebp + movl %r11d,%eax + movl %edx,28(%rsp) + movl %edi,%ecx + xorl 40(%rsp),%ebp + xorl %r13d,%eax + roll $5,%ecx + xorl 0(%rsp),%ebp + leal 1859775393(%rdx,%rsi,1),%esi + xorl %r12d,%eax + addl %ecx,%esi + roll $30,%r11d + addl %eax,%esi + roll $1,%ebp + xorl 36(%rsp),%r14d + movl %r12d,%eax + movl %ebp,32(%rsp) + movl %r12d,%ebx + xorl 44(%rsp),%r14d + andl %r11d,%eax + movl %esi,%ecx + xorl 4(%rsp),%r14d + leal -1894007588(%rbp,%r13,1),%r13d + xorl %r11d,%ebx + roll $5,%ecx + addl %eax,%r13d + roll $1,%r14d + andl %edi,%ebx + addl %ecx,%r13d + roll $30,%edi + addl %ebx,%r13d + xorl 40(%rsp),%edx + movl %r11d,%eax + movl %r14d,36(%rsp) + movl %r11d,%ebx + xorl 48(%rsp),%edx + andl %edi,%eax + movl %r13d,%ecx + xorl 8(%rsp),%edx + leal -1894007588(%r14,%r12,1),%r12d + xorl %edi,%ebx + roll $5,%ecx + addl %eax,%r12d + roll $1,%edx + andl %esi,%ebx + addl %ecx,%r12d + roll $30,%esi + addl %ebx,%r12d + xorl 44(%rsp),%ebp + movl %edi,%eax + movl %edx,40(%rsp) + movl %edi,%ebx + xorl 52(%rsp),%ebp + andl %esi,%eax + movl %r12d,%ecx + xorl 12(%rsp),%ebp + leal -1894007588(%rdx,%r11,1),%r11d + xorl %esi,%ebx + roll $5,%ecx + addl %eax,%r11d + roll $1,%ebp + andl %r13d,%ebx + addl %ecx,%r11d + roll $30,%r13d + addl %ebx,%r11d + xorl 48(%rsp),%r14d + movl %esi,%eax + movl %ebp,44(%rsp) + movl %esi,%ebx + xorl 56(%rsp),%r14d + andl %r13d,%eax + movl %r11d,%ecx + xorl 16(%rsp),%r14d + leal -1894007588(%rbp,%rdi,1),%edi + xorl %r13d,%ebx + roll $5,%ecx + addl %eax,%edi + roll $1,%r14d + andl %r12d,%ebx + addl %ecx,%edi + roll $30,%r12d + addl %ebx,%edi + xorl 52(%rsp),%edx + movl %r13d,%eax + movl %r14d,48(%rsp) + movl %r13d,%ebx + xorl 60(%rsp),%edx + andl %r12d,%eax + movl %edi,%ecx + xorl 20(%rsp),%edx + leal -1894007588(%r14,%rsi,1),%esi + xorl %r12d,%ebx + roll $5,%ecx + addl %eax,%esi + roll $1,%edx + andl %r11d,%ebx + addl %ecx,%esi + roll $30,%r11d + addl %ebx,%esi + xorl 56(%rsp),%ebp + movl %r12d,%eax + movl %edx,52(%rsp) + movl %r12d,%ebx + xorl 0(%rsp),%ebp + andl %r11d,%eax + movl %esi,%ecx + xorl 24(%rsp),%ebp + leal -1894007588(%rdx,%r13,1),%r13d + xorl %r11d,%ebx + roll $5,%ecx + addl %eax,%r13d + roll $1,%ebp + andl %edi,%ebx + addl %ecx,%r13d + roll $30,%edi + addl %ebx,%r13d + xorl 60(%rsp),%r14d + movl %r11d,%eax + movl %ebp,56(%rsp) + movl %r11d,%ebx + xorl 4(%rsp),%r14d + andl %edi,%eax + movl %r13d,%ecx + xorl 28(%rsp),%r14d + leal -1894007588(%rbp,%r12,1),%r12d + xorl %edi,%ebx + roll $5,%ecx + addl %eax,%r12d + roll $1,%r14d + andl %esi,%ebx + addl %ecx,%r12d + roll $30,%esi + addl %ebx,%r12d + xorl 0(%rsp),%edx + movl %edi,%eax + movl %r14d,60(%rsp) + movl %edi,%ebx + xorl 8(%rsp),%edx + andl %esi,%eax + movl %r12d,%ecx + xorl 32(%rsp),%edx + leal -1894007588(%r14,%r11,1),%r11d + xorl %esi,%ebx + roll $5,%ecx + addl %eax,%r11d + roll $1,%edx + andl %r13d,%ebx + addl %ecx,%r11d + roll $30,%r13d + addl %ebx,%r11d + xorl 4(%rsp),%ebp + movl %esi,%eax + movl %edx,0(%rsp) + movl %esi,%ebx + xorl 12(%rsp),%ebp + andl %r13d,%eax + movl %r11d,%ecx + xorl 36(%rsp),%ebp + leal -1894007588(%rdx,%rdi,1),%edi + xorl %r13d,%ebx + roll $5,%ecx + addl %eax,%edi + roll $1,%ebp + andl %r12d,%ebx + addl %ecx,%edi + roll $30,%r12d + addl %ebx,%edi + xorl 8(%rsp),%r14d + movl %r13d,%eax + movl %ebp,4(%rsp) + movl %r13d,%ebx + xorl 16(%rsp),%r14d + andl %r12d,%eax + movl %edi,%ecx + xorl 40(%rsp),%r14d + leal -1894007588(%rbp,%rsi,1),%esi + xorl %r12d,%ebx + roll $5,%ecx + addl %eax,%esi + roll $1,%r14d + andl %r11d,%ebx + addl %ecx,%esi + roll $30,%r11d + addl %ebx,%esi + xorl 12(%rsp),%edx + movl %r12d,%eax + movl %r14d,8(%rsp) + movl %r12d,%ebx + xorl 20(%rsp),%edx + andl %r11d,%eax + movl %esi,%ecx + xorl 44(%rsp),%edx + leal -1894007588(%r14,%r13,1),%r13d + xorl %r11d,%ebx + roll $5,%ecx + addl %eax,%r13d + roll $1,%edx + andl %edi,%ebx + addl %ecx,%r13d + roll $30,%edi + addl %ebx,%r13d + xorl 16(%rsp),%ebp + movl %r11d,%eax + movl %edx,12(%rsp) + movl %r11d,%ebx + xorl 24(%rsp),%ebp + andl %edi,%eax + movl %r13d,%ecx + xorl 48(%rsp),%ebp + leal -1894007588(%rdx,%r12,1),%r12d + xorl %edi,%ebx + roll $5,%ecx + addl %eax,%r12d + roll $1,%ebp + andl %esi,%ebx + addl %ecx,%r12d + roll $30,%esi + addl %ebx,%r12d + xorl 20(%rsp),%r14d + movl %edi,%eax + movl %ebp,16(%rsp) + movl %edi,%ebx + xorl 28(%rsp),%r14d + andl %esi,%eax + movl %r12d,%ecx + xorl 52(%rsp),%r14d + leal -1894007588(%rbp,%r11,1),%r11d + xorl %esi,%ebx + roll $5,%ecx + addl %eax,%r11d + roll $1,%r14d + andl %r13d,%ebx + addl %ecx,%r11d + roll $30,%r13d + addl %ebx,%r11d + xorl 24(%rsp),%edx + movl %esi,%eax + movl %r14d,20(%rsp) + movl %esi,%ebx + xorl 32(%rsp),%edx + andl %r13d,%eax + movl %r11d,%ecx + xorl 56(%rsp),%edx + leal -1894007588(%r14,%rdi,1),%edi + xorl %r13d,%ebx + roll $5,%ecx + addl %eax,%edi + roll $1,%edx + andl %r12d,%ebx + addl %ecx,%edi + roll $30,%r12d + addl %ebx,%edi + xorl 28(%rsp),%ebp + movl %r13d,%eax + movl %edx,24(%rsp) + movl %r13d,%ebx + xorl 36(%rsp),%ebp + andl %r12d,%eax + movl %edi,%ecx + xorl 60(%rsp),%ebp + leal -1894007588(%rdx,%rsi,1),%esi + xorl %r12d,%ebx + roll $5,%ecx + addl %eax,%esi + roll $1,%ebp + andl %r11d,%ebx + addl %ecx,%esi + roll $30,%r11d + addl %ebx,%esi + xorl 32(%rsp),%r14d + movl %r12d,%eax + movl %ebp,28(%rsp) + movl %r12d,%ebx + xorl 40(%rsp),%r14d + andl %r11d,%eax + movl %esi,%ecx + xorl 0(%rsp),%r14d + leal -1894007588(%rbp,%r13,1),%r13d + xorl %r11d,%ebx + roll $5,%ecx + addl %eax,%r13d + roll $1,%r14d + andl %edi,%ebx + addl %ecx,%r13d + roll $30,%edi + addl %ebx,%r13d + xorl 36(%rsp),%edx + movl %r11d,%eax + movl %r14d,32(%rsp) + movl %r11d,%ebx + xorl 44(%rsp),%edx + andl %edi,%eax + movl %r13d,%ecx + xorl 4(%rsp),%edx + leal -1894007588(%r14,%r12,1),%r12d + xorl %edi,%ebx + roll $5,%ecx + addl %eax,%r12d + roll $1,%edx + andl %esi,%ebx + addl %ecx,%r12d + roll $30,%esi + addl %ebx,%r12d + xorl 40(%rsp),%ebp + movl %edi,%eax + movl %edx,36(%rsp) + movl %edi,%ebx + xorl 48(%rsp),%ebp + andl %esi,%eax + movl %r12d,%ecx + xorl 8(%rsp),%ebp + leal -1894007588(%rdx,%r11,1),%r11d + xorl %esi,%ebx + roll $5,%ecx + addl %eax,%r11d + roll $1,%ebp + andl %r13d,%ebx + addl %ecx,%r11d + roll $30,%r13d + addl %ebx,%r11d + xorl 44(%rsp),%r14d + movl %esi,%eax + movl %ebp,40(%rsp) + movl %esi,%ebx + xorl 52(%rsp),%r14d + andl %r13d,%eax + movl %r11d,%ecx + xorl 12(%rsp),%r14d + leal -1894007588(%rbp,%rdi,1),%edi + xorl %r13d,%ebx + roll $5,%ecx + addl %eax,%edi + roll $1,%r14d + andl %r12d,%ebx + addl %ecx,%edi + roll $30,%r12d + addl %ebx,%edi + xorl 48(%rsp),%edx + movl %r13d,%eax + movl %r14d,44(%rsp) + movl %r13d,%ebx + xorl 56(%rsp),%edx + andl %r12d,%eax + movl %edi,%ecx + xorl 16(%rsp),%edx + leal -1894007588(%r14,%rsi,1),%esi + xorl %r12d,%ebx + roll $5,%ecx + addl %eax,%esi + roll $1,%edx + andl %r11d,%ebx + addl %ecx,%esi + roll $30,%r11d + addl %ebx,%esi + xorl 52(%rsp),%ebp + movl %edi,%eax + movl %edx,48(%rsp) + movl %esi,%ecx + xorl 60(%rsp),%ebp + xorl %r12d,%eax + roll $5,%ecx + xorl 20(%rsp),%ebp + leal -899497514(%rdx,%r13,1),%r13d + xorl %r11d,%eax + addl %ecx,%r13d + roll $30,%edi + addl %eax,%r13d + roll $1,%ebp + xorl 56(%rsp),%r14d + movl %esi,%eax + movl %ebp,52(%rsp) + movl %r13d,%ecx + xorl 0(%rsp),%r14d + xorl %r11d,%eax + roll $5,%ecx + xorl 24(%rsp),%r14d + leal -899497514(%rbp,%r12,1),%r12d + xorl %edi,%eax + addl %ecx,%r12d + roll $30,%esi + addl %eax,%r12d + roll $1,%r14d + xorl 60(%rsp),%edx + movl %r13d,%eax + movl %r14d,56(%rsp) + movl %r12d,%ecx + xorl 4(%rsp),%edx + xorl %edi,%eax + roll $5,%ecx + xorl 28(%rsp),%edx + leal -899497514(%r14,%r11,1),%r11d + xorl %esi,%eax + addl %ecx,%r11d + roll $30,%r13d + addl %eax,%r11d + roll $1,%edx + xorl 0(%rsp),%ebp + movl %r12d,%eax + movl %edx,60(%rsp) + movl %r11d,%ecx + xorl 8(%rsp),%ebp + xorl %esi,%eax + roll $5,%ecx + xorl 32(%rsp),%ebp + leal -899497514(%rdx,%rdi,1),%edi + xorl %r13d,%eax + addl %ecx,%edi + roll $30,%r12d + addl %eax,%edi + roll $1,%ebp + xorl 4(%rsp),%r14d + movl %r11d,%eax + movl %ebp,0(%rsp) + movl %edi,%ecx + xorl 12(%rsp),%r14d + xorl %r13d,%eax + roll $5,%ecx + xorl 36(%rsp),%r14d + leal -899497514(%rbp,%rsi,1),%esi + xorl %r12d,%eax + addl %ecx,%esi + roll $30,%r11d + addl %eax,%esi + roll $1,%r14d + xorl 8(%rsp),%edx + movl %edi,%eax + movl %r14d,4(%rsp) + movl %esi,%ecx + xorl 16(%rsp),%edx + xorl %r12d,%eax + roll $5,%ecx + xorl 40(%rsp),%edx + leal -899497514(%r14,%r13,1),%r13d + xorl %r11d,%eax + addl %ecx,%r13d + roll $30,%edi + addl %eax,%r13d + roll $1,%edx + xorl 12(%rsp),%ebp + movl %esi,%eax + movl %edx,8(%rsp) + movl %r13d,%ecx + xorl 20(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + xorl 44(%rsp),%ebp + leal -899497514(%rdx,%r12,1),%r12d + xorl %edi,%eax + addl %ecx,%r12d + roll $30,%esi + addl %eax,%r12d + roll $1,%ebp + xorl 16(%rsp),%r14d + movl %r13d,%eax + movl %ebp,12(%rsp) + movl %r12d,%ecx + xorl 24(%rsp),%r14d + xorl %edi,%eax + roll $5,%ecx + xorl 48(%rsp),%r14d + leal -899497514(%rbp,%r11,1),%r11d + xorl %esi,%eax + addl %ecx,%r11d + roll $30,%r13d + addl %eax,%r11d + roll $1,%r14d + xorl 20(%rsp),%edx + movl %r12d,%eax + movl %r14d,16(%rsp) + movl %r11d,%ecx + xorl 28(%rsp),%edx + xorl %esi,%eax + roll $5,%ecx + xorl 52(%rsp),%edx + leal -899497514(%r14,%rdi,1),%edi + xorl %r13d,%eax + addl %ecx,%edi + roll $30,%r12d + addl %eax,%edi + roll $1,%edx + xorl 24(%rsp),%ebp + movl %r11d,%eax + movl %edx,20(%rsp) + movl %edi,%ecx + xorl 32(%rsp),%ebp + xorl %r13d,%eax + roll $5,%ecx + xorl 56(%rsp),%ebp + leal -899497514(%rdx,%rsi,1),%esi + xorl %r12d,%eax + addl %ecx,%esi + roll $30,%r11d + addl %eax,%esi + roll $1,%ebp + xorl 28(%rsp),%r14d + movl %edi,%eax + movl %ebp,24(%rsp) + movl %esi,%ecx + xorl 36(%rsp),%r14d + xorl %r12d,%eax + roll $5,%ecx + xorl 60(%rsp),%r14d + leal -899497514(%rbp,%r13,1),%r13d + xorl %r11d,%eax + addl %ecx,%r13d + roll $30,%edi + addl %eax,%r13d + roll $1,%r14d + xorl 32(%rsp),%edx + movl %esi,%eax + movl %r14d,28(%rsp) + movl %r13d,%ecx + xorl 40(%rsp),%edx + xorl %r11d,%eax + roll $5,%ecx + xorl 0(%rsp),%edx + leal -899497514(%r14,%r12,1),%r12d + xorl %edi,%eax + addl %ecx,%r12d + roll $30,%esi + addl %eax,%r12d + roll $1,%edx + xorl 36(%rsp),%ebp + movl %r13d,%eax + + movl %r12d,%ecx + xorl 44(%rsp),%ebp + xorl %edi,%eax + roll $5,%ecx + xorl 4(%rsp),%ebp + leal -899497514(%rdx,%r11,1),%r11d + xorl %esi,%eax + addl %ecx,%r11d + roll $30,%r13d + addl %eax,%r11d + roll $1,%ebp + xorl 40(%rsp),%r14d + movl %r12d,%eax + + movl %r11d,%ecx + xorl 48(%rsp),%r14d + xorl %esi,%eax + roll $5,%ecx + xorl 8(%rsp),%r14d + leal -899497514(%rbp,%rdi,1),%edi + xorl %r13d,%eax + addl %ecx,%edi + roll $30,%r12d + addl %eax,%edi + roll $1,%r14d + xorl 44(%rsp),%edx + movl %r11d,%eax + + movl %edi,%ecx + xorl 52(%rsp),%edx + xorl %r13d,%eax + roll $5,%ecx + xorl 12(%rsp),%edx + leal -899497514(%r14,%rsi,1),%esi + xorl %r12d,%eax + addl %ecx,%esi + roll $30,%r11d + addl %eax,%esi + roll $1,%edx + xorl 48(%rsp),%ebp + movl %edi,%eax + + movl %esi,%ecx + xorl 56(%rsp),%ebp + xorl %r12d,%eax + roll $5,%ecx + xorl 16(%rsp),%ebp + leal -899497514(%rdx,%r13,1),%r13d + xorl %r11d,%eax + addl %ecx,%r13d + roll $30,%edi + addl %eax,%r13d + roll $1,%ebp + xorl 52(%rsp),%r14d + movl %esi,%eax + + movl %r13d,%ecx + xorl 60(%rsp),%r14d + xorl %r11d,%eax + roll $5,%ecx + xorl 20(%rsp),%r14d + leal -899497514(%rbp,%r12,1),%r12d + xorl %edi,%eax + addl %ecx,%r12d + roll $30,%esi + addl %eax,%r12d + roll $1,%r14d + xorl 56(%rsp),%edx + movl %r13d,%eax + + movl %r12d,%ecx + xorl 0(%rsp),%edx + xorl %edi,%eax + roll $5,%ecx + xorl 24(%rsp),%edx + leal -899497514(%r14,%r11,1),%r11d + xorl %esi,%eax + addl %ecx,%r11d + roll $30,%r13d + addl %eax,%r11d + roll $1,%edx + xorl 60(%rsp),%ebp + movl %r12d,%eax + + movl %r11d,%ecx + xorl 4(%rsp),%ebp + xorl %esi,%eax + roll $5,%ecx + xorl 28(%rsp),%ebp + leal -899497514(%rdx,%rdi,1),%edi + xorl %r13d,%eax + addl %ecx,%edi + roll $30,%r12d + addl %eax,%edi + roll $1,%ebp + movl %r11d,%eax + movl %edi,%ecx + xorl %r13d,%eax + leal -899497514(%rbp,%rsi,1),%esi + roll $5,%ecx + xorl %r12d,%eax + addl %ecx,%esi + roll $30,%r11d + addl %eax,%esi + addl 0(%r8),%esi + addl 4(%r8),%edi + addl 8(%r8),%r11d + addl 12(%r8),%r12d + addl 16(%r8),%r13d + movl %esi,0(%r8) + movl %edi,4(%r8) + movl %r11d,8(%r8) + movl %r12d,12(%r8) + movl %r13d,16(%r8) + + subq $1,%r10 + leaq 64(%r9),%r9 + jnz .Lloop + + movq 64(%rsp),%rsi + movq -40(%rsi),%r14 + movq -32(%rsi),%r13 + movq -24(%rsi),%r12 + movq -16(%rsi),%rbp + movq -8(%rsi),%rbx + leaq (%rsi),%rsp +.Lepilogue: + .byte 0xf3,0xc3 +.size sha1_block_data_order,.-sha1_block_data_order +.type sha1_block_data_order_ssse3,@function +.align 16 +sha1_block_data_order_ssse3: +_ssse3_shortcut: + movq %rsp,%rax + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + leaq -64(%rsp),%rsp + movq %rax,%r14 + andq $-64,%rsp + movq %rdi,%r8 + movq %rsi,%r9 + movq %rdx,%r10 + + shlq $6,%r10 + addq %r9,%r10 + leaq K_XX_XX+64(%rip),%r11 + + movl 0(%r8),%eax + movl 4(%r8),%ebx + movl 8(%r8),%ecx + movl 12(%r8),%edx + movl %ebx,%esi + movl 16(%r8),%ebp + movl %ecx,%edi + xorl %edx,%edi + andl %edi,%esi + + movdqa 64(%r11),%xmm6 + movdqa -64(%r11),%xmm9 + movdqu 0(%r9),%xmm0 + movdqu 16(%r9),%xmm1 + movdqu 32(%r9),%xmm2 + movdqu 48(%r9),%xmm3 +.byte 102,15,56,0,198 +.byte 102,15,56,0,206 +.byte 102,15,56,0,214 + addq $64,%r9 + paddd %xmm9,%xmm0 +.byte 102,15,56,0,222 + paddd %xmm9,%xmm1 + paddd %xmm9,%xmm2 + movdqa %xmm0,0(%rsp) + psubd %xmm9,%xmm0 + movdqa %xmm1,16(%rsp) + psubd %xmm9,%xmm1 + movdqa %xmm2,32(%rsp) + psubd %xmm9,%xmm2 + jmp .Loop_ssse3 +.align 16 +.Loop_ssse3: + rorl $2,%ebx + pshufd $238,%xmm0,%xmm4 + xorl %edx,%esi + movdqa %xmm3,%xmm8 + paddd %xmm3,%xmm9 + movl %eax,%edi + addl 0(%rsp),%ebp + punpcklqdq %xmm1,%xmm4 + xorl %ecx,%ebx + roll $5,%eax + addl %esi,%ebp + psrldq $4,%xmm8 + andl %ebx,%edi + xorl %ecx,%ebx + pxor %xmm0,%xmm4 + addl %eax,%ebp + rorl $7,%eax + pxor %xmm2,%xmm8 + xorl %ecx,%edi + movl %ebp,%esi + addl 4(%rsp),%edx + pxor %xmm8,%xmm4 + xorl %ebx,%eax + roll $5,%ebp + movdqa %xmm9,48(%rsp) + addl %edi,%edx + andl %eax,%esi + movdqa %xmm4,%xmm10 + xorl %ebx,%eax + addl %ebp,%edx + rorl $7,%ebp + movdqa %xmm4,%xmm8 + xorl %ebx,%esi + pslldq $12,%xmm10 + paddd %xmm4,%xmm4 + movl %edx,%edi + addl 8(%rsp),%ecx + psrld $31,%xmm8 + xorl %eax,%ebp + roll $5,%edx + addl %esi,%ecx + movdqa %xmm10,%xmm9 + andl %ebp,%edi + xorl %eax,%ebp + psrld $30,%xmm10 + addl %edx,%ecx + rorl $7,%edx + por %xmm8,%xmm4 + xorl %eax,%edi + movl %ecx,%esi + addl 12(%rsp),%ebx + pslld $2,%xmm9 + pxor %xmm10,%xmm4 + xorl %ebp,%edx + movdqa -64(%r11),%xmm10 + roll $5,%ecx + addl %edi,%ebx + andl %edx,%esi + pxor %xmm9,%xmm4 + xorl %ebp,%edx + addl %ecx,%ebx + rorl $7,%ecx + pshufd $238,%xmm1,%xmm5 + xorl %ebp,%esi + movdqa %xmm4,%xmm9 + paddd %xmm4,%xmm10 + movl %ebx,%edi + addl 16(%rsp),%eax + punpcklqdq %xmm2,%xmm5 + xorl %edx,%ecx + roll $5,%ebx + addl %esi,%eax + psrldq $4,%xmm9 + andl %ecx,%edi + xorl %edx,%ecx + pxor %xmm1,%xmm5 + addl %ebx,%eax + rorl $7,%ebx + pxor %xmm3,%xmm9 + xorl %edx,%edi + movl %eax,%esi + addl 20(%rsp),%ebp + pxor %xmm9,%xmm5 + xorl %ecx,%ebx + roll $5,%eax + movdqa %xmm10,0(%rsp) + addl %edi,%ebp + andl %ebx,%esi + movdqa %xmm5,%xmm8 + xorl %ecx,%ebx + addl %eax,%ebp + rorl $7,%eax + movdqa %xmm5,%xmm9 + xorl %ecx,%esi + pslldq $12,%xmm8 + paddd %xmm5,%xmm5 + movl %ebp,%edi + addl 24(%rsp),%edx + psrld $31,%xmm9 + xorl %ebx,%eax + roll $5,%ebp + addl %esi,%edx + movdqa %xmm8,%xmm10 + andl %eax,%edi + xorl %ebx,%eax + psrld $30,%xmm8 + addl %ebp,%edx + rorl $7,%ebp + por %xmm9,%xmm5 + xorl %ebx,%edi + movl %edx,%esi + addl 28(%rsp),%ecx + pslld $2,%xmm10 + pxor %xmm8,%xmm5 + xorl %eax,%ebp + movdqa -32(%r11),%xmm8 + roll $5,%edx + addl %edi,%ecx + andl %ebp,%esi + pxor %xmm10,%xmm5 + xorl %eax,%ebp + addl %edx,%ecx + rorl $7,%edx + pshufd $238,%xmm2,%xmm6 + xorl %eax,%esi + movdqa %xmm5,%xmm10 + paddd %xmm5,%xmm8 + movl %ecx,%edi + addl 32(%rsp),%ebx + punpcklqdq %xmm3,%xmm6 + xorl %ebp,%edx + roll $5,%ecx + addl %esi,%ebx + psrldq $4,%xmm10 + andl %edx,%edi + xorl %ebp,%edx + pxor %xmm2,%xmm6 + addl %ecx,%ebx + rorl $7,%ecx + pxor %xmm4,%xmm10 + xorl %ebp,%edi + movl %ebx,%esi + addl 36(%rsp),%eax + pxor %xmm10,%xmm6 + xorl %edx,%ecx + roll $5,%ebx + movdqa %xmm8,16(%rsp) + addl %edi,%eax + andl %ecx,%esi + movdqa %xmm6,%xmm9 + xorl %edx,%ecx + addl %ebx,%eax + rorl $7,%ebx + movdqa %xmm6,%xmm10 + xorl %edx,%esi + pslldq $12,%xmm9 + paddd %xmm6,%xmm6 + movl %eax,%edi + addl 40(%rsp),%ebp + psrld $31,%xmm10 + xorl %ecx,%ebx + roll $5,%eax + addl %esi,%ebp + movdqa %xmm9,%xmm8 + andl %ebx,%edi + xorl %ecx,%ebx + psrld $30,%xmm9 + addl %eax,%ebp + rorl $7,%eax + por %xmm10,%xmm6 + xorl %ecx,%edi + movl %ebp,%esi + addl 44(%rsp),%edx + pslld $2,%xmm8 + pxor %xmm9,%xmm6 + xorl %ebx,%eax + movdqa -32(%r11),%xmm9 + roll $5,%ebp + addl %edi,%edx + andl %eax,%esi + pxor %xmm8,%xmm6 + xorl %ebx,%eax + addl %ebp,%edx + rorl $7,%ebp + pshufd $238,%xmm3,%xmm7 + xorl %ebx,%esi + movdqa %xmm6,%xmm8 + paddd %xmm6,%xmm9 + movl %edx,%edi + addl 48(%rsp),%ecx + punpcklqdq %xmm4,%xmm7 + xorl %eax,%ebp + roll $5,%edx + addl %esi,%ecx + psrldq $4,%xmm8 + andl %ebp,%edi + xorl %eax,%ebp + pxor %xmm3,%xmm7 + addl %edx,%ecx + rorl $7,%edx + pxor %xmm5,%xmm8 + xorl %eax,%edi + movl %ecx,%esi + addl 52(%rsp),%ebx + pxor %xmm8,%xmm7 + xorl %ebp,%edx + roll $5,%ecx + movdqa %xmm9,32(%rsp) + addl %edi,%ebx + andl %edx,%esi + movdqa %xmm7,%xmm10 + xorl %ebp,%edx + addl %ecx,%ebx + rorl $7,%ecx + movdqa %xmm7,%xmm8 + xorl %ebp,%esi + pslldq $12,%xmm10 + paddd %xmm7,%xmm7 + movl %ebx,%edi + addl 56(%rsp),%eax + psrld $31,%xmm8 + xorl %edx,%ecx + roll $5,%ebx + addl %esi,%eax + movdqa %xmm10,%xmm9 + andl %ecx,%edi + xorl %edx,%ecx + psrld $30,%xmm10 + addl %ebx,%eax + rorl $7,%ebx + por %xmm8,%xmm7 + xorl %edx,%edi + movl %eax,%esi + addl 60(%rsp),%ebp + pslld $2,%xmm9 + pxor %xmm10,%xmm7 + xorl %ecx,%ebx + movdqa -32(%r11),%xmm10 + roll $5,%eax + addl %edi,%ebp + andl %ebx,%esi + pxor %xmm9,%xmm7 + pshufd $238,%xmm6,%xmm9 + xorl %ecx,%ebx + addl %eax,%ebp + rorl $7,%eax + pxor %xmm4,%xmm0 + xorl %ecx,%esi + movl %ebp,%edi + addl 0(%rsp),%edx + punpcklqdq %xmm7,%xmm9 + xorl %ebx,%eax + roll $5,%ebp + pxor %xmm1,%xmm0 + addl %esi,%edx + andl %eax,%edi + movdqa %xmm10,%xmm8 + xorl %ebx,%eax + paddd %xmm7,%xmm10 + addl %ebp,%edx + pxor %xmm9,%xmm0 + rorl $7,%ebp + xorl %ebx,%edi + movl %edx,%esi + addl 4(%rsp),%ecx + movdqa %xmm0,%xmm9 + xorl %eax,%ebp + roll $5,%edx + movdqa %xmm10,48(%rsp) + addl %edi,%ecx + andl %ebp,%esi + xorl %eax,%ebp + pslld $2,%xmm0 + addl %edx,%ecx + rorl $7,%edx + psrld $30,%xmm9 + xorl %eax,%esi + movl %ecx,%edi + addl 8(%rsp),%ebx + por %xmm9,%xmm0 + xorl %ebp,%edx + roll $5,%ecx + pshufd $238,%xmm7,%xmm10 + addl %esi,%ebx + andl %edx,%edi + xorl %ebp,%edx + addl %ecx,%ebx + addl 12(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + xorl %edx,%esi + rorl $7,%ecx + addl %ebx,%eax + pxor %xmm5,%xmm1 + addl 16(%rsp),%ebp + xorl %ecx,%esi + punpcklqdq %xmm0,%xmm10 + movl %eax,%edi + roll $5,%eax + pxor %xmm2,%xmm1 + addl %esi,%ebp + xorl %ecx,%edi + movdqa %xmm8,%xmm9 + rorl $7,%ebx + paddd %xmm0,%xmm8 + addl %eax,%ebp + pxor %xmm10,%xmm1 + addl 20(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + movdqa %xmm1,%xmm10 + addl %edi,%edx + xorl %ebx,%esi + movdqa %xmm8,0(%rsp) + rorl $7,%eax + addl %ebp,%edx + addl 24(%rsp),%ecx + pslld $2,%xmm1 + xorl %eax,%esi + movl %edx,%edi + psrld $30,%xmm10 + roll $5,%edx + addl %esi,%ecx + xorl %eax,%edi + rorl $7,%ebp + por %xmm10,%xmm1 + addl %edx,%ecx + addl 28(%rsp),%ebx + pshufd $238,%xmm0,%xmm8 + xorl %ebp,%edi + movl %ecx,%esi + roll $5,%ecx + addl %edi,%ebx + xorl %ebp,%esi + rorl $7,%edx + addl %ecx,%ebx + pxor %xmm6,%xmm2 + addl 32(%rsp),%eax + xorl %edx,%esi + punpcklqdq %xmm1,%xmm8 + movl %ebx,%edi + roll $5,%ebx + pxor %xmm3,%xmm2 + addl %esi,%eax + xorl %edx,%edi + movdqa 0(%r11),%xmm10 + rorl $7,%ecx + paddd %xmm1,%xmm9 + addl %ebx,%eax + pxor %xmm8,%xmm2 + addl 36(%rsp),%ebp + xorl %ecx,%edi + movl %eax,%esi + roll $5,%eax + movdqa %xmm2,%xmm8 + addl %edi,%ebp + xorl %ecx,%esi + movdqa %xmm9,16(%rsp) + rorl $7,%ebx + addl %eax,%ebp + addl 40(%rsp),%edx + pslld $2,%xmm2 + xorl %ebx,%esi + movl %ebp,%edi + psrld $30,%xmm8 + roll $5,%ebp + addl %esi,%edx + xorl %ebx,%edi + rorl $7,%eax + por %xmm8,%xmm2 + addl %ebp,%edx + addl 44(%rsp),%ecx + pshufd $238,%xmm1,%xmm9 + xorl %eax,%edi + movl %edx,%esi + roll $5,%edx + addl %edi,%ecx + xorl %eax,%esi + rorl $7,%ebp + addl %edx,%ecx + pxor %xmm7,%xmm3 + addl 48(%rsp),%ebx + xorl %ebp,%esi + punpcklqdq %xmm2,%xmm9 + movl %ecx,%edi + roll $5,%ecx + pxor %xmm4,%xmm3 + addl %esi,%ebx + xorl %ebp,%edi + movdqa %xmm10,%xmm8 + rorl $7,%edx + paddd %xmm2,%xmm10 + addl %ecx,%ebx + pxor %xmm9,%xmm3 + addl 52(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + roll $5,%ebx + movdqa %xmm3,%xmm9 + addl %edi,%eax + xorl %edx,%esi + movdqa %xmm10,32(%rsp) + rorl $7,%ecx + addl %ebx,%eax + addl 56(%rsp),%ebp + pslld $2,%xmm3 + xorl %ecx,%esi + movl %eax,%edi + psrld $30,%xmm9 + roll $5,%eax + addl %esi,%ebp + xorl %ecx,%edi + rorl $7,%ebx + por %xmm9,%xmm3 + addl %eax,%ebp + addl 60(%rsp),%edx + pshufd $238,%xmm2,%xmm10 + xorl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + addl %edi,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %ebp,%edx + pxor %xmm0,%xmm4 + addl 0(%rsp),%ecx + xorl %eax,%esi + punpcklqdq %xmm3,%xmm10 + movl %edx,%edi + roll $5,%edx + pxor %xmm5,%xmm4 + addl %esi,%ecx + xorl %eax,%edi + movdqa %xmm8,%xmm9 + rorl $7,%ebp + paddd %xmm3,%xmm8 + addl %edx,%ecx + pxor %xmm10,%xmm4 + addl 4(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + roll $5,%ecx + movdqa %xmm4,%xmm10 + addl %edi,%ebx + xorl %ebp,%esi + movdqa %xmm8,48(%rsp) + rorl $7,%edx + addl %ecx,%ebx + addl 8(%rsp),%eax + pslld $2,%xmm4 + xorl %edx,%esi + movl %ebx,%edi + psrld $30,%xmm10 + roll $5,%ebx + addl %esi,%eax + xorl %edx,%edi + rorl $7,%ecx + por %xmm10,%xmm4 + addl %ebx,%eax + addl 12(%rsp),%ebp + pshufd $238,%xmm3,%xmm8 + xorl %ecx,%edi + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%ebp + pxor %xmm1,%xmm5 + addl 16(%rsp),%edx + xorl %ebx,%esi + punpcklqdq %xmm4,%xmm8 + movl %ebp,%edi + roll $5,%ebp + pxor %xmm6,%xmm5 + addl %esi,%edx + xorl %ebx,%edi + movdqa %xmm9,%xmm10 + rorl $7,%eax + paddd %xmm4,%xmm9 + addl %ebp,%edx + pxor %xmm8,%xmm5 + addl 20(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + roll $5,%edx + movdqa %xmm5,%xmm8 + addl %edi,%ecx + xorl %eax,%esi + movdqa %xmm9,0(%rsp) + rorl $7,%ebp + addl %edx,%ecx + addl 24(%rsp),%ebx + pslld $2,%xmm5 + xorl %ebp,%esi + movl %ecx,%edi + psrld $30,%xmm8 + roll $5,%ecx + addl %esi,%ebx + xorl %ebp,%edi + rorl $7,%edx + por %xmm8,%xmm5 + addl %ecx,%ebx + addl 28(%rsp),%eax + pshufd $238,%xmm4,%xmm9 + rorl $7,%ecx + movl %ebx,%esi + xorl %edx,%edi + roll $5,%ebx + addl %edi,%eax + xorl %ecx,%esi + xorl %edx,%ecx + addl %ebx,%eax + pxor %xmm2,%xmm6 + addl 32(%rsp),%ebp + andl %ecx,%esi + xorl %edx,%ecx + rorl $7,%ebx + punpcklqdq %xmm5,%xmm9 + movl %eax,%edi + xorl %ecx,%esi + pxor %xmm7,%xmm6 + roll $5,%eax + addl %esi,%ebp + movdqa %xmm10,%xmm8 + xorl %ebx,%edi + paddd %xmm5,%xmm10 + xorl %ecx,%ebx + pxor %xmm9,%xmm6 + addl %eax,%ebp + addl 36(%rsp),%edx + andl %ebx,%edi + xorl %ecx,%ebx + rorl $7,%eax + movdqa %xmm6,%xmm9 + movl %ebp,%esi + xorl %ebx,%edi + movdqa %xmm10,16(%rsp) + roll $5,%ebp + addl %edi,%edx + xorl %eax,%esi + pslld $2,%xmm6 + xorl %ebx,%eax + addl %ebp,%edx + psrld $30,%xmm9 + addl 40(%rsp),%ecx + andl %eax,%esi + xorl %ebx,%eax + por %xmm9,%xmm6 + rorl $7,%ebp + movl %edx,%edi + xorl %eax,%esi + roll $5,%edx + pshufd $238,%xmm5,%xmm10 + addl %esi,%ecx + xorl %ebp,%edi + xorl %eax,%ebp + addl %edx,%ecx + addl 44(%rsp),%ebx + andl %ebp,%edi + xorl %eax,%ebp + rorl $7,%edx + movl %ecx,%esi + xorl %ebp,%edi + roll $5,%ecx + addl %edi,%ebx + xorl %edx,%esi + xorl %ebp,%edx + addl %ecx,%ebx + pxor %xmm3,%xmm7 + addl 48(%rsp),%eax + andl %edx,%esi + xorl %ebp,%edx + rorl $7,%ecx + punpcklqdq %xmm6,%xmm10 + movl %ebx,%edi + xorl %edx,%esi + pxor %xmm0,%xmm7 + roll $5,%ebx + addl %esi,%eax + movdqa 32(%r11),%xmm9 + xorl %ecx,%edi + paddd %xmm6,%xmm8 + xorl %edx,%ecx + pxor %xmm10,%xmm7 + addl %ebx,%eax + addl 52(%rsp),%ebp + andl %ecx,%edi + xorl %edx,%ecx + rorl $7,%ebx + movdqa %xmm7,%xmm10 + movl %eax,%esi + xorl %ecx,%edi + movdqa %xmm8,32(%rsp) + roll $5,%eax + addl %edi,%ebp + xorl %ebx,%esi + pslld $2,%xmm7 + xorl %ecx,%ebx + addl %eax,%ebp + psrld $30,%xmm10 + addl 56(%rsp),%edx + andl %ebx,%esi + xorl %ecx,%ebx + por %xmm10,%xmm7 + rorl $7,%eax + movl %ebp,%edi + xorl %ebx,%esi + roll $5,%ebp + pshufd $238,%xmm6,%xmm8 + addl %esi,%edx + xorl %eax,%edi + xorl %ebx,%eax + addl %ebp,%edx + addl 60(%rsp),%ecx + andl %eax,%edi + xorl %ebx,%eax + rorl $7,%ebp + movl %edx,%esi + xorl %eax,%edi + roll $5,%edx + addl %edi,%ecx + xorl %ebp,%esi + xorl %eax,%ebp + addl %edx,%ecx + pxor %xmm4,%xmm0 + addl 0(%rsp),%ebx + andl %ebp,%esi + xorl %eax,%ebp + rorl $7,%edx + punpcklqdq %xmm7,%xmm8 + movl %ecx,%edi + xorl %ebp,%esi + pxor %xmm1,%xmm0 + roll $5,%ecx + addl %esi,%ebx + movdqa %xmm9,%xmm10 + xorl %edx,%edi + paddd %xmm7,%xmm9 + xorl %ebp,%edx + pxor %xmm8,%xmm0 + addl %ecx,%ebx + addl 4(%rsp),%eax + andl %edx,%edi + xorl %ebp,%edx + rorl $7,%ecx + movdqa %xmm0,%xmm8 + movl %ebx,%esi + xorl %edx,%edi + movdqa %xmm9,48(%rsp) + roll $5,%ebx + addl %edi,%eax + xorl %ecx,%esi + pslld $2,%xmm0 + xorl %edx,%ecx + addl %ebx,%eax + psrld $30,%xmm8 + addl 8(%rsp),%ebp + andl %ecx,%esi + xorl %edx,%ecx + por %xmm8,%xmm0 + rorl $7,%ebx + movl %eax,%edi + xorl %ecx,%esi + roll $5,%eax + pshufd $238,%xmm7,%xmm9 + addl %esi,%ebp + xorl %ebx,%edi + xorl %ecx,%ebx + addl %eax,%ebp + addl 12(%rsp),%edx + andl %ebx,%edi + xorl %ecx,%ebx + rorl $7,%eax + movl %ebp,%esi + xorl %ebx,%edi + roll $5,%ebp + addl %edi,%edx + xorl %eax,%esi + xorl %ebx,%eax + addl %ebp,%edx + pxor %xmm5,%xmm1 + addl 16(%rsp),%ecx + andl %eax,%esi + xorl %ebx,%eax + rorl $7,%ebp + punpcklqdq %xmm0,%xmm9 + movl %edx,%edi + xorl %eax,%esi + pxor %xmm2,%xmm1 + roll $5,%edx + addl %esi,%ecx + movdqa %xmm10,%xmm8 + xorl %ebp,%edi + paddd %xmm0,%xmm10 + xorl %eax,%ebp + pxor %xmm9,%xmm1 + addl %edx,%ecx + addl 20(%rsp),%ebx + andl %ebp,%edi + xorl %eax,%ebp + rorl $7,%edx + movdqa %xmm1,%xmm9 + movl %ecx,%esi + xorl %ebp,%edi + movdqa %xmm10,0(%rsp) + roll $5,%ecx + addl %edi,%ebx + xorl %edx,%esi + pslld $2,%xmm1 + xorl %ebp,%edx + addl %ecx,%ebx + psrld $30,%xmm9 + addl 24(%rsp),%eax + andl %edx,%esi + xorl %ebp,%edx + por %xmm9,%xmm1 + rorl $7,%ecx + movl %ebx,%edi + xorl %edx,%esi + roll $5,%ebx + pshufd $238,%xmm0,%xmm10 + addl %esi,%eax + xorl %ecx,%edi + xorl %edx,%ecx + addl %ebx,%eax + addl 28(%rsp),%ebp + andl %ecx,%edi + xorl %edx,%ecx + rorl $7,%ebx + movl %eax,%esi + xorl %ecx,%edi + roll $5,%eax + addl %edi,%ebp + xorl %ebx,%esi + xorl %ecx,%ebx + addl %eax,%ebp + pxor %xmm6,%xmm2 + addl 32(%rsp),%edx + andl %ebx,%esi + xorl %ecx,%ebx + rorl $7,%eax + punpcklqdq %xmm1,%xmm10 + movl %ebp,%edi + xorl %ebx,%esi + pxor %xmm3,%xmm2 + roll $5,%ebp + addl %esi,%edx + movdqa %xmm8,%xmm9 + xorl %eax,%edi + paddd %xmm1,%xmm8 + xorl %ebx,%eax + pxor %xmm10,%xmm2 + addl %ebp,%edx + addl 36(%rsp),%ecx + andl %eax,%edi + xorl %ebx,%eax + rorl $7,%ebp + movdqa %xmm2,%xmm10 + movl %edx,%esi + xorl %eax,%edi + movdqa %xmm8,16(%rsp) + roll $5,%edx + addl %edi,%ecx + xorl %ebp,%esi + pslld $2,%xmm2 + xorl %eax,%ebp + addl %edx,%ecx + psrld $30,%xmm10 + addl 40(%rsp),%ebx + andl %ebp,%esi + xorl %eax,%ebp + por %xmm10,%xmm2 + rorl $7,%edx + movl %ecx,%edi + xorl %ebp,%esi + roll $5,%ecx + pshufd $238,%xmm1,%xmm8 + addl %esi,%ebx + xorl %edx,%edi + xorl %ebp,%edx + addl %ecx,%ebx + addl 44(%rsp),%eax + andl %edx,%edi + xorl %ebp,%edx + rorl $7,%ecx + movl %ebx,%esi + xorl %edx,%edi + roll $5,%ebx + addl %edi,%eax + xorl %edx,%esi + addl %ebx,%eax + pxor %xmm7,%xmm3 + addl 48(%rsp),%ebp + xorl %ecx,%esi + punpcklqdq %xmm2,%xmm8 + movl %eax,%edi + roll $5,%eax + pxor %xmm4,%xmm3 + addl %esi,%ebp + xorl %ecx,%edi + movdqa %xmm9,%xmm10 + rorl $7,%ebx + paddd %xmm2,%xmm9 + addl %eax,%ebp + pxor %xmm8,%xmm3 + addl 52(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + movdqa %xmm3,%xmm8 + addl %edi,%edx + xorl %ebx,%esi + movdqa %xmm9,32(%rsp) + rorl $7,%eax + addl %ebp,%edx + addl 56(%rsp),%ecx + pslld $2,%xmm3 + xorl %eax,%esi + movl %edx,%edi + psrld $30,%xmm8 + roll $5,%edx + addl %esi,%ecx + xorl %eax,%edi + rorl $7,%ebp + por %xmm8,%xmm3 + addl %edx,%ecx + addl 60(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + roll $5,%ecx + addl %edi,%ebx + xorl %ebp,%esi + rorl $7,%edx + addl %ecx,%ebx + addl 0(%rsp),%eax + xorl %edx,%esi + movl %ebx,%edi + roll $5,%ebx + paddd %xmm3,%xmm10 + addl %esi,%eax + xorl %edx,%edi + movdqa %xmm10,48(%rsp) + rorl $7,%ecx + addl %ebx,%eax + addl 4(%rsp),%ebp + xorl %ecx,%edi + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%ebp + addl 8(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi + roll $5,%ebp + addl %esi,%edx + xorl %ebx,%edi + rorl $7,%eax + addl %ebp,%edx + addl 12(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + roll $5,%edx + addl %edi,%ecx + xorl %eax,%esi + rorl $7,%ebp + addl %edx,%ecx + cmpq %r10,%r9 + je .Ldone_ssse3 + movdqa 64(%r11),%xmm6 + movdqa -64(%r11),%xmm9 + movdqu 0(%r9),%xmm0 + movdqu 16(%r9),%xmm1 + movdqu 32(%r9),%xmm2 + movdqu 48(%r9),%xmm3 +.byte 102,15,56,0,198 + addq $64,%r9 + addl 16(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi +.byte 102,15,56,0,206 + roll $5,%ecx + addl %esi,%ebx + xorl %ebp,%edi + rorl $7,%edx + paddd %xmm9,%xmm0 + addl %ecx,%ebx + addl 20(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + movdqa %xmm0,0(%rsp) + roll $5,%ebx + addl %edi,%eax + xorl %edx,%esi + rorl $7,%ecx + psubd %xmm9,%xmm0 + addl %ebx,%eax + addl 24(%rsp),%ebp + xorl %ecx,%esi + movl %eax,%edi + roll $5,%eax + addl %esi,%ebp + xorl %ecx,%edi + rorl $7,%ebx + addl %eax,%ebp + addl 28(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + addl %edi,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %ebp,%edx + addl 32(%rsp),%ecx + xorl %eax,%esi + movl %edx,%edi +.byte 102,15,56,0,214 + roll $5,%edx + addl %esi,%ecx + xorl %eax,%edi + rorl $7,%ebp + paddd %xmm9,%xmm1 + addl %edx,%ecx + addl 36(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + movdqa %xmm1,16(%rsp) + roll $5,%ecx + addl %edi,%ebx + xorl %ebp,%esi + rorl $7,%edx + psubd %xmm9,%xmm1 + addl %ecx,%ebx + addl 40(%rsp),%eax + xorl %edx,%esi + movl %ebx,%edi + roll $5,%ebx + addl %esi,%eax + xorl %edx,%edi + rorl $7,%ecx + addl %ebx,%eax + addl 44(%rsp),%ebp + xorl %ecx,%edi + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%ebp + addl 48(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi +.byte 102,15,56,0,222 + roll $5,%ebp + addl %esi,%edx + xorl %ebx,%edi + rorl $7,%eax + paddd %xmm9,%xmm2 + addl %ebp,%edx + addl 52(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + movdqa %xmm2,32(%rsp) + roll $5,%edx + addl %edi,%ecx + xorl %eax,%esi + rorl $7,%ebp + psubd %xmm9,%xmm2 + addl %edx,%ecx + addl 56(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + roll $5,%ecx + addl %esi,%ebx + xorl %ebp,%edi + rorl $7,%edx + addl %ecx,%ebx + addl 60(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + rorl $7,%ecx + addl %ebx,%eax + addl 0(%r8),%eax + addl 4(%r8),%esi + addl 8(%r8),%ecx + addl 12(%r8),%edx + movl %eax,0(%r8) + addl 16(%r8),%ebp + movl %esi,4(%r8) + movl %esi,%ebx + movl %ecx,8(%r8) + movl %ecx,%edi + movl %edx,12(%r8) + xorl %edx,%edi + movl %ebp,16(%r8) + andl %edi,%esi + jmp .Loop_ssse3 + +.align 16 +.Ldone_ssse3: + addl 16(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + roll $5,%ecx + addl %esi,%ebx + xorl %ebp,%edi + rorl $7,%edx + addl %ecx,%ebx + addl 20(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + xorl %edx,%esi + rorl $7,%ecx + addl %ebx,%eax + addl 24(%rsp),%ebp + xorl %ecx,%esi + movl %eax,%edi + roll $5,%eax + addl %esi,%ebp + xorl %ecx,%edi + rorl $7,%ebx + addl %eax,%ebp + addl 28(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + addl %edi,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %ebp,%edx + addl 32(%rsp),%ecx + xorl %eax,%esi + movl %edx,%edi + roll $5,%edx + addl %esi,%ecx + xorl %eax,%edi + rorl $7,%ebp + addl %edx,%ecx + addl 36(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + roll $5,%ecx + addl %edi,%ebx + xorl %ebp,%esi + rorl $7,%edx + addl %ecx,%ebx + addl 40(%rsp),%eax + xorl %edx,%esi + movl %ebx,%edi + roll $5,%ebx + addl %esi,%eax + xorl %edx,%edi + rorl $7,%ecx + addl %ebx,%eax + addl 44(%rsp),%ebp + xorl %ecx,%edi + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%ebp + addl 48(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi + roll $5,%ebp + addl %esi,%edx + xorl %ebx,%edi + rorl $7,%eax + addl %ebp,%edx + addl 52(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + roll $5,%edx + addl %edi,%ecx + xorl %eax,%esi + rorl $7,%ebp + addl %edx,%ecx + addl 56(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + roll $5,%ecx + addl %esi,%ebx + xorl %ebp,%edi + rorl $7,%edx + addl %ecx,%ebx + addl 60(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + rorl $7,%ecx + addl %ebx,%eax + addl 0(%r8),%eax + addl 4(%r8),%esi + addl 8(%r8),%ecx + movl %eax,0(%r8) + addl 12(%r8),%edx + movl %esi,4(%r8) + addl 16(%r8),%ebp + movl %ecx,8(%r8) + movl %edx,12(%r8) + movl %ebp,16(%r8) + leaq (%r14),%rsi + movq -40(%rsi),%r14 + movq -32(%rsi),%r13 + movq -24(%rsi),%r12 + movq -16(%rsi),%rbp + movq -8(%rsi),%rbx + leaq (%rsi),%rsp +.Lepilogue_ssse3: + .byte 0xf3,0xc3 +.size sha1_block_data_order_ssse3,.-sha1_block_data_order_ssse3 +.align 64 +K_XX_XX: +.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 +.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 +.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 +.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 +.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc +.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc +.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 +.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.byte 0xf,0xe,0xd,0xc,0xb,0xa,0x9,0x8,0x7,0x6,0x5,0x4,0x3,0x2,0x1,0x0 +.byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 64 +#endif diff --git a/third_party/boringssl/linux-x86_64/crypto/sha/sha256-x86_64.S b/third_party/boringssl/linux-x86_64/crypto/sha/sha256-x86_64.S new file mode 100644 index 00000000000..f526de51ad0 --- /dev/null +++ b/third_party/boringssl/linux-x86_64/crypto/sha/sha256-x86_64.S @@ -0,0 +1,2844 @@ +#if defined(__x86_64__) +.text + +.extern OPENSSL_ia32cap_P +.hidden OPENSSL_ia32cap_P +.globl sha256_block_data_order +.hidden sha256_block_data_order +.type sha256_block_data_order,@function +.align 16 +sha256_block_data_order: + leaq OPENSSL_ia32cap_P(%rip),%r11 + movl 0(%r11),%r9d + movl 4(%r11),%r10d + movl 8(%r11),%r11d + testl $512,%r10d + jnz .Lssse3_shortcut + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + movq %rsp,%r11 + shlq $4,%rdx + subq $64+32,%rsp + leaq (%rsi,%rdx,4),%rdx + andq $-64,%rsp + movq %rdi,64+0(%rsp) + movq %rsi,64+8(%rsp) + movq %rdx,64+16(%rsp) + movq %r11,64+24(%rsp) +.Lprologue: + + movl 0(%rdi),%eax + movl 4(%rdi),%ebx + movl 8(%rdi),%ecx + movl 12(%rdi),%edx + movl 16(%rdi),%r8d + movl 20(%rdi),%r9d + movl 24(%rdi),%r10d + movl 28(%rdi),%r11d + jmp .Lloop + +.align 16 +.Lloop: + movl %ebx,%edi + leaq K256(%rip),%rbp + xorl %ecx,%edi + movl 0(%rsi),%r12d + movl %r8d,%r13d + movl %eax,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r9d,%r15d + + xorl %r8d,%r13d + rorl $9,%r14d + xorl %r10d,%r15d + + movl %r12d,0(%rsp) + xorl %eax,%r14d + andl %r8d,%r15d + + rorl $5,%r13d + addl %r11d,%r12d + xorl %r10d,%r15d + + rorl $11,%r14d + xorl %r8d,%r13d + addl %r15d,%r12d + + movl %eax,%r15d + addl (%rbp),%r12d + xorl %eax,%r14d + + xorl %ebx,%r15d + rorl $6,%r13d + movl %ebx,%r11d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r11d + addl %r12d,%edx + addl %r12d,%r11d + + leaq 4(%rbp),%rbp + addl %r14d,%r11d + movl 4(%rsi),%r12d + movl %edx,%r13d + movl %r11d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r8d,%edi + + xorl %edx,%r13d + rorl $9,%r14d + xorl %r9d,%edi + + movl %r12d,4(%rsp) + xorl %r11d,%r14d + andl %edx,%edi + + rorl $5,%r13d + addl %r10d,%r12d + xorl %r9d,%edi + + rorl $11,%r14d + xorl %edx,%r13d + addl %edi,%r12d + + movl %r11d,%edi + addl (%rbp),%r12d + xorl %r11d,%r14d + + xorl %eax,%edi + rorl $6,%r13d + movl %eax,%r10d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r10d + addl %r12d,%ecx + addl %r12d,%r10d + + leaq 4(%rbp),%rbp + addl %r14d,%r10d + movl 8(%rsi),%r12d + movl %ecx,%r13d + movl %r10d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %edx,%r15d + + xorl %ecx,%r13d + rorl $9,%r14d + xorl %r8d,%r15d + + movl %r12d,8(%rsp) + xorl %r10d,%r14d + andl %ecx,%r15d + + rorl $5,%r13d + addl %r9d,%r12d + xorl %r8d,%r15d + + rorl $11,%r14d + xorl %ecx,%r13d + addl %r15d,%r12d + + movl %r10d,%r15d + addl (%rbp),%r12d + xorl %r10d,%r14d + + xorl %r11d,%r15d + rorl $6,%r13d + movl %r11d,%r9d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r9d + addl %r12d,%ebx + addl %r12d,%r9d + + leaq 4(%rbp),%rbp + addl %r14d,%r9d + movl 12(%rsi),%r12d + movl %ebx,%r13d + movl %r9d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %ecx,%edi + + xorl %ebx,%r13d + rorl $9,%r14d + xorl %edx,%edi + + movl %r12d,12(%rsp) + xorl %r9d,%r14d + andl %ebx,%edi + + rorl $5,%r13d + addl %r8d,%r12d + xorl %edx,%edi + + rorl $11,%r14d + xorl %ebx,%r13d + addl %edi,%r12d + + movl %r9d,%edi + addl (%rbp),%r12d + xorl %r9d,%r14d + + xorl %r10d,%edi + rorl $6,%r13d + movl %r10d,%r8d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r8d + addl %r12d,%eax + addl %r12d,%r8d + + leaq 20(%rbp),%rbp + addl %r14d,%r8d + movl 16(%rsi),%r12d + movl %eax,%r13d + movl %r8d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %ebx,%r15d + + xorl %eax,%r13d + rorl $9,%r14d + xorl %ecx,%r15d + + movl %r12d,16(%rsp) + xorl %r8d,%r14d + andl %eax,%r15d + + rorl $5,%r13d + addl %edx,%r12d + xorl %ecx,%r15d + + rorl $11,%r14d + xorl %eax,%r13d + addl %r15d,%r12d + + movl %r8d,%r15d + addl (%rbp),%r12d + xorl %r8d,%r14d + + xorl %r9d,%r15d + rorl $6,%r13d + movl %r9d,%edx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%edx + addl %r12d,%r11d + addl %r12d,%edx + + leaq 4(%rbp),%rbp + addl %r14d,%edx + movl 20(%rsi),%r12d + movl %r11d,%r13d + movl %edx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %eax,%edi + + xorl %r11d,%r13d + rorl $9,%r14d + xorl %ebx,%edi + + movl %r12d,20(%rsp) + xorl %edx,%r14d + andl %r11d,%edi + + rorl $5,%r13d + addl %ecx,%r12d + xorl %ebx,%edi + + rorl $11,%r14d + xorl %r11d,%r13d + addl %edi,%r12d + + movl %edx,%edi + addl (%rbp),%r12d + xorl %edx,%r14d + + xorl %r8d,%edi + rorl $6,%r13d + movl %r8d,%ecx + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%ecx + addl %r12d,%r10d + addl %r12d,%ecx + + leaq 4(%rbp),%rbp + addl %r14d,%ecx + movl 24(%rsi),%r12d + movl %r10d,%r13d + movl %ecx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r11d,%r15d + + xorl %r10d,%r13d + rorl $9,%r14d + xorl %eax,%r15d + + movl %r12d,24(%rsp) + xorl %ecx,%r14d + andl %r10d,%r15d + + rorl $5,%r13d + addl %ebx,%r12d + xorl %eax,%r15d + + rorl $11,%r14d + xorl %r10d,%r13d + addl %r15d,%r12d + + movl %ecx,%r15d + addl (%rbp),%r12d + xorl %ecx,%r14d + + xorl %edx,%r15d + rorl $6,%r13d + movl %edx,%ebx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%ebx + addl %r12d,%r9d + addl %r12d,%ebx + + leaq 4(%rbp),%rbp + addl %r14d,%ebx + movl 28(%rsi),%r12d + movl %r9d,%r13d + movl %ebx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r10d,%edi + + xorl %r9d,%r13d + rorl $9,%r14d + xorl %r11d,%edi + + movl %r12d,28(%rsp) + xorl %ebx,%r14d + andl %r9d,%edi + + rorl $5,%r13d + addl %eax,%r12d + xorl %r11d,%edi + + rorl $11,%r14d + xorl %r9d,%r13d + addl %edi,%r12d + + movl %ebx,%edi + addl (%rbp),%r12d + xorl %ebx,%r14d + + xorl %ecx,%edi + rorl $6,%r13d + movl %ecx,%eax + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%eax + addl %r12d,%r8d + addl %r12d,%eax + + leaq 20(%rbp),%rbp + addl %r14d,%eax + movl 32(%rsi),%r12d + movl %r8d,%r13d + movl %eax,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r9d,%r15d + + xorl %r8d,%r13d + rorl $9,%r14d + xorl %r10d,%r15d + + movl %r12d,32(%rsp) + xorl %eax,%r14d + andl %r8d,%r15d + + rorl $5,%r13d + addl %r11d,%r12d + xorl %r10d,%r15d + + rorl $11,%r14d + xorl %r8d,%r13d + addl %r15d,%r12d + + movl %eax,%r15d + addl (%rbp),%r12d + xorl %eax,%r14d + + xorl %ebx,%r15d + rorl $6,%r13d + movl %ebx,%r11d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r11d + addl %r12d,%edx + addl %r12d,%r11d + + leaq 4(%rbp),%rbp + addl %r14d,%r11d + movl 36(%rsi),%r12d + movl %edx,%r13d + movl %r11d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r8d,%edi + + xorl %edx,%r13d + rorl $9,%r14d + xorl %r9d,%edi + + movl %r12d,36(%rsp) + xorl %r11d,%r14d + andl %edx,%edi + + rorl $5,%r13d + addl %r10d,%r12d + xorl %r9d,%edi + + rorl $11,%r14d + xorl %edx,%r13d + addl %edi,%r12d + + movl %r11d,%edi + addl (%rbp),%r12d + xorl %r11d,%r14d + + xorl %eax,%edi + rorl $6,%r13d + movl %eax,%r10d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r10d + addl %r12d,%ecx + addl %r12d,%r10d + + leaq 4(%rbp),%rbp + addl %r14d,%r10d + movl 40(%rsi),%r12d + movl %ecx,%r13d + movl %r10d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %edx,%r15d + + xorl %ecx,%r13d + rorl $9,%r14d + xorl %r8d,%r15d + + movl %r12d,40(%rsp) + xorl %r10d,%r14d + andl %ecx,%r15d + + rorl $5,%r13d + addl %r9d,%r12d + xorl %r8d,%r15d + + rorl $11,%r14d + xorl %ecx,%r13d + addl %r15d,%r12d + + movl %r10d,%r15d + addl (%rbp),%r12d + xorl %r10d,%r14d + + xorl %r11d,%r15d + rorl $6,%r13d + movl %r11d,%r9d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r9d + addl %r12d,%ebx + addl %r12d,%r9d + + leaq 4(%rbp),%rbp + addl %r14d,%r9d + movl 44(%rsi),%r12d + movl %ebx,%r13d + movl %r9d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %ecx,%edi + + xorl %ebx,%r13d + rorl $9,%r14d + xorl %edx,%edi + + movl %r12d,44(%rsp) + xorl %r9d,%r14d + andl %ebx,%edi + + rorl $5,%r13d + addl %r8d,%r12d + xorl %edx,%edi + + rorl $11,%r14d + xorl %ebx,%r13d + addl %edi,%r12d + + movl %r9d,%edi + addl (%rbp),%r12d + xorl %r9d,%r14d + + xorl %r10d,%edi + rorl $6,%r13d + movl %r10d,%r8d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r8d + addl %r12d,%eax + addl %r12d,%r8d + + leaq 20(%rbp),%rbp + addl %r14d,%r8d + movl 48(%rsi),%r12d + movl %eax,%r13d + movl %r8d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %ebx,%r15d + + xorl %eax,%r13d + rorl $9,%r14d + xorl %ecx,%r15d + + movl %r12d,48(%rsp) + xorl %r8d,%r14d + andl %eax,%r15d + + rorl $5,%r13d + addl %edx,%r12d + xorl %ecx,%r15d + + rorl $11,%r14d + xorl %eax,%r13d + addl %r15d,%r12d + + movl %r8d,%r15d + addl (%rbp),%r12d + xorl %r8d,%r14d + + xorl %r9d,%r15d + rorl $6,%r13d + movl %r9d,%edx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%edx + addl %r12d,%r11d + addl %r12d,%edx + + leaq 4(%rbp),%rbp + addl %r14d,%edx + movl 52(%rsi),%r12d + movl %r11d,%r13d + movl %edx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %eax,%edi + + xorl %r11d,%r13d + rorl $9,%r14d + xorl %ebx,%edi + + movl %r12d,52(%rsp) + xorl %edx,%r14d + andl %r11d,%edi + + rorl $5,%r13d + addl %ecx,%r12d + xorl %ebx,%edi + + rorl $11,%r14d + xorl %r11d,%r13d + addl %edi,%r12d + + movl %edx,%edi + addl (%rbp),%r12d + xorl %edx,%r14d + + xorl %r8d,%edi + rorl $6,%r13d + movl %r8d,%ecx + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%ecx + addl %r12d,%r10d + addl %r12d,%ecx + + leaq 4(%rbp),%rbp + addl %r14d,%ecx + movl 56(%rsi),%r12d + movl %r10d,%r13d + movl %ecx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r11d,%r15d + + xorl %r10d,%r13d + rorl $9,%r14d + xorl %eax,%r15d + + movl %r12d,56(%rsp) + xorl %ecx,%r14d + andl %r10d,%r15d + + rorl $5,%r13d + addl %ebx,%r12d + xorl %eax,%r15d + + rorl $11,%r14d + xorl %r10d,%r13d + addl %r15d,%r12d + + movl %ecx,%r15d + addl (%rbp),%r12d + xorl %ecx,%r14d + + xorl %edx,%r15d + rorl $6,%r13d + movl %edx,%ebx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%ebx + addl %r12d,%r9d + addl %r12d,%ebx + + leaq 4(%rbp),%rbp + addl %r14d,%ebx + movl 60(%rsi),%r12d + movl %r9d,%r13d + movl %ebx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r10d,%edi + + xorl %r9d,%r13d + rorl $9,%r14d + xorl %r11d,%edi + + movl %r12d,60(%rsp) + xorl %ebx,%r14d + andl %r9d,%edi + + rorl $5,%r13d + addl %eax,%r12d + xorl %r11d,%edi + + rorl $11,%r14d + xorl %r9d,%r13d + addl %edi,%r12d + + movl %ebx,%edi + addl (%rbp),%r12d + xorl %ebx,%r14d + + xorl %ecx,%edi + rorl $6,%r13d + movl %ecx,%eax + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%eax + addl %r12d,%r8d + addl %r12d,%eax + + leaq 20(%rbp),%rbp + jmp .Lrounds_16_xx +.align 16 +.Lrounds_16_xx: + movl 4(%rsp),%r13d + movl 56(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%eax + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 36(%rsp),%r12d + + addl 0(%rsp),%r12d + movl %r8d,%r13d + addl %r15d,%r12d + movl %eax,%r14d + rorl $14,%r13d + movl %r9d,%r15d + + xorl %r8d,%r13d + rorl $9,%r14d + xorl %r10d,%r15d + + movl %r12d,0(%rsp) + xorl %eax,%r14d + andl %r8d,%r15d + + rorl $5,%r13d + addl %r11d,%r12d + xorl %r10d,%r15d + + rorl $11,%r14d + xorl %r8d,%r13d + addl %r15d,%r12d + + movl %eax,%r15d + addl (%rbp),%r12d + xorl %eax,%r14d + + xorl %ebx,%r15d + rorl $6,%r13d + movl %ebx,%r11d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r11d + addl %r12d,%edx + addl %r12d,%r11d + + leaq 4(%rbp),%rbp + movl 8(%rsp),%r13d + movl 60(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r11d + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 40(%rsp),%r12d + + addl 4(%rsp),%r12d + movl %edx,%r13d + addl %edi,%r12d + movl %r11d,%r14d + rorl $14,%r13d + movl %r8d,%edi + + xorl %edx,%r13d + rorl $9,%r14d + xorl %r9d,%edi + + movl %r12d,4(%rsp) + xorl %r11d,%r14d + andl %edx,%edi + + rorl $5,%r13d + addl %r10d,%r12d + xorl %r9d,%edi + + rorl $11,%r14d + xorl %edx,%r13d + addl %edi,%r12d + + movl %r11d,%edi + addl (%rbp),%r12d + xorl %r11d,%r14d + + xorl %eax,%edi + rorl $6,%r13d + movl %eax,%r10d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r10d + addl %r12d,%ecx + addl %r12d,%r10d + + leaq 4(%rbp),%rbp + movl 12(%rsp),%r13d + movl 0(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r10d + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 44(%rsp),%r12d + + addl 8(%rsp),%r12d + movl %ecx,%r13d + addl %r15d,%r12d + movl %r10d,%r14d + rorl $14,%r13d + movl %edx,%r15d + + xorl %ecx,%r13d + rorl $9,%r14d + xorl %r8d,%r15d + + movl %r12d,8(%rsp) + xorl %r10d,%r14d + andl %ecx,%r15d + + rorl $5,%r13d + addl %r9d,%r12d + xorl %r8d,%r15d + + rorl $11,%r14d + xorl %ecx,%r13d + addl %r15d,%r12d + + movl %r10d,%r15d + addl (%rbp),%r12d + xorl %r10d,%r14d + + xorl %r11d,%r15d + rorl $6,%r13d + movl %r11d,%r9d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r9d + addl %r12d,%ebx + addl %r12d,%r9d + + leaq 4(%rbp),%rbp + movl 16(%rsp),%r13d + movl 4(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r9d + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 48(%rsp),%r12d + + addl 12(%rsp),%r12d + movl %ebx,%r13d + addl %edi,%r12d + movl %r9d,%r14d + rorl $14,%r13d + movl %ecx,%edi + + xorl %ebx,%r13d + rorl $9,%r14d + xorl %edx,%edi + + movl %r12d,12(%rsp) + xorl %r9d,%r14d + andl %ebx,%edi + + rorl $5,%r13d + addl %r8d,%r12d + xorl %edx,%edi + + rorl $11,%r14d + xorl %ebx,%r13d + addl %edi,%r12d + + movl %r9d,%edi + addl (%rbp),%r12d + xorl %r9d,%r14d + + xorl %r10d,%edi + rorl $6,%r13d + movl %r10d,%r8d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r8d + addl %r12d,%eax + addl %r12d,%r8d + + leaq 20(%rbp),%rbp + movl 20(%rsp),%r13d + movl 8(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r8d + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 52(%rsp),%r12d + + addl 16(%rsp),%r12d + movl %eax,%r13d + addl %r15d,%r12d + movl %r8d,%r14d + rorl $14,%r13d + movl %ebx,%r15d + + xorl %eax,%r13d + rorl $9,%r14d + xorl %ecx,%r15d + + movl %r12d,16(%rsp) + xorl %r8d,%r14d + andl %eax,%r15d + + rorl $5,%r13d + addl %edx,%r12d + xorl %ecx,%r15d + + rorl $11,%r14d + xorl %eax,%r13d + addl %r15d,%r12d + + movl %r8d,%r15d + addl (%rbp),%r12d + xorl %r8d,%r14d + + xorl %r9d,%r15d + rorl $6,%r13d + movl %r9d,%edx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%edx + addl %r12d,%r11d + addl %r12d,%edx + + leaq 4(%rbp),%rbp + movl 24(%rsp),%r13d + movl 12(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%edx + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 56(%rsp),%r12d + + addl 20(%rsp),%r12d + movl %r11d,%r13d + addl %edi,%r12d + movl %edx,%r14d + rorl $14,%r13d + movl %eax,%edi + + xorl %r11d,%r13d + rorl $9,%r14d + xorl %ebx,%edi + + movl %r12d,20(%rsp) + xorl %edx,%r14d + andl %r11d,%edi + + rorl $5,%r13d + addl %ecx,%r12d + xorl %ebx,%edi + + rorl $11,%r14d + xorl %r11d,%r13d + addl %edi,%r12d + + movl %edx,%edi + addl (%rbp),%r12d + xorl %edx,%r14d + + xorl %r8d,%edi + rorl $6,%r13d + movl %r8d,%ecx + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%ecx + addl %r12d,%r10d + addl %r12d,%ecx + + leaq 4(%rbp),%rbp + movl 28(%rsp),%r13d + movl 16(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%ecx + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 60(%rsp),%r12d + + addl 24(%rsp),%r12d + movl %r10d,%r13d + addl %r15d,%r12d + movl %ecx,%r14d + rorl $14,%r13d + movl %r11d,%r15d + + xorl %r10d,%r13d + rorl $9,%r14d + xorl %eax,%r15d + + movl %r12d,24(%rsp) + xorl %ecx,%r14d + andl %r10d,%r15d + + rorl $5,%r13d + addl %ebx,%r12d + xorl %eax,%r15d + + rorl $11,%r14d + xorl %r10d,%r13d + addl %r15d,%r12d + + movl %ecx,%r15d + addl (%rbp),%r12d + xorl %ecx,%r14d + + xorl %edx,%r15d + rorl $6,%r13d + movl %edx,%ebx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%ebx + addl %r12d,%r9d + addl %r12d,%ebx + + leaq 4(%rbp),%rbp + movl 32(%rsp),%r13d + movl 20(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%ebx + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 0(%rsp),%r12d + + addl 28(%rsp),%r12d + movl %r9d,%r13d + addl %edi,%r12d + movl %ebx,%r14d + rorl $14,%r13d + movl %r10d,%edi + + xorl %r9d,%r13d + rorl $9,%r14d + xorl %r11d,%edi + + movl %r12d,28(%rsp) + xorl %ebx,%r14d + andl %r9d,%edi + + rorl $5,%r13d + addl %eax,%r12d + xorl %r11d,%edi + + rorl $11,%r14d + xorl %r9d,%r13d + addl %edi,%r12d + + movl %ebx,%edi + addl (%rbp),%r12d + xorl %ebx,%r14d + + xorl %ecx,%edi + rorl $6,%r13d + movl %ecx,%eax + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%eax + addl %r12d,%r8d + addl %r12d,%eax + + leaq 20(%rbp),%rbp + movl 36(%rsp),%r13d + movl 24(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%eax + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 4(%rsp),%r12d + + addl 32(%rsp),%r12d + movl %r8d,%r13d + addl %r15d,%r12d + movl %eax,%r14d + rorl $14,%r13d + movl %r9d,%r15d + + xorl %r8d,%r13d + rorl $9,%r14d + xorl %r10d,%r15d + + movl %r12d,32(%rsp) + xorl %eax,%r14d + andl %r8d,%r15d + + rorl $5,%r13d + addl %r11d,%r12d + xorl %r10d,%r15d + + rorl $11,%r14d + xorl %r8d,%r13d + addl %r15d,%r12d + + movl %eax,%r15d + addl (%rbp),%r12d + xorl %eax,%r14d + + xorl %ebx,%r15d + rorl $6,%r13d + movl %ebx,%r11d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r11d + addl %r12d,%edx + addl %r12d,%r11d + + leaq 4(%rbp),%rbp + movl 40(%rsp),%r13d + movl 28(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r11d + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 8(%rsp),%r12d + + addl 36(%rsp),%r12d + movl %edx,%r13d + addl %edi,%r12d + movl %r11d,%r14d + rorl $14,%r13d + movl %r8d,%edi + + xorl %edx,%r13d + rorl $9,%r14d + xorl %r9d,%edi + + movl %r12d,36(%rsp) + xorl %r11d,%r14d + andl %edx,%edi + + rorl $5,%r13d + addl %r10d,%r12d + xorl %r9d,%edi + + rorl $11,%r14d + xorl %edx,%r13d + addl %edi,%r12d + + movl %r11d,%edi + addl (%rbp),%r12d + xorl %r11d,%r14d + + xorl %eax,%edi + rorl $6,%r13d + movl %eax,%r10d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r10d + addl %r12d,%ecx + addl %r12d,%r10d + + leaq 4(%rbp),%rbp + movl 44(%rsp),%r13d + movl 32(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r10d + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 12(%rsp),%r12d + + addl 40(%rsp),%r12d + movl %ecx,%r13d + addl %r15d,%r12d + movl %r10d,%r14d + rorl $14,%r13d + movl %edx,%r15d + + xorl %ecx,%r13d + rorl $9,%r14d + xorl %r8d,%r15d + + movl %r12d,40(%rsp) + xorl %r10d,%r14d + andl %ecx,%r15d + + rorl $5,%r13d + addl %r9d,%r12d + xorl %r8d,%r15d + + rorl $11,%r14d + xorl %ecx,%r13d + addl %r15d,%r12d + + movl %r10d,%r15d + addl (%rbp),%r12d + xorl %r10d,%r14d + + xorl %r11d,%r15d + rorl $6,%r13d + movl %r11d,%r9d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r9d + addl %r12d,%ebx + addl %r12d,%r9d + + leaq 4(%rbp),%rbp + movl 48(%rsp),%r13d + movl 36(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r9d + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 16(%rsp),%r12d + + addl 44(%rsp),%r12d + movl %ebx,%r13d + addl %edi,%r12d + movl %r9d,%r14d + rorl $14,%r13d + movl %ecx,%edi + + xorl %ebx,%r13d + rorl $9,%r14d + xorl %edx,%edi + + movl %r12d,44(%rsp) + xorl %r9d,%r14d + andl %ebx,%edi + + rorl $5,%r13d + addl %r8d,%r12d + xorl %edx,%edi + + rorl $11,%r14d + xorl %ebx,%r13d + addl %edi,%r12d + + movl %r9d,%edi + addl (%rbp),%r12d + xorl %r9d,%r14d + + xorl %r10d,%edi + rorl $6,%r13d + movl %r10d,%r8d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r8d + addl %r12d,%eax + addl %r12d,%r8d + + leaq 20(%rbp),%rbp + movl 52(%rsp),%r13d + movl 40(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r8d + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 20(%rsp),%r12d + + addl 48(%rsp),%r12d + movl %eax,%r13d + addl %r15d,%r12d + movl %r8d,%r14d + rorl $14,%r13d + movl %ebx,%r15d + + xorl %eax,%r13d + rorl $9,%r14d + xorl %ecx,%r15d + + movl %r12d,48(%rsp) + xorl %r8d,%r14d + andl %eax,%r15d + + rorl $5,%r13d + addl %edx,%r12d + xorl %ecx,%r15d + + rorl $11,%r14d + xorl %eax,%r13d + addl %r15d,%r12d + + movl %r8d,%r15d + addl (%rbp),%r12d + xorl %r8d,%r14d + + xorl %r9d,%r15d + rorl $6,%r13d + movl %r9d,%edx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%edx + addl %r12d,%r11d + addl %r12d,%edx + + leaq 4(%rbp),%rbp + movl 56(%rsp),%r13d + movl 44(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%edx + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 24(%rsp),%r12d + + addl 52(%rsp),%r12d + movl %r11d,%r13d + addl %edi,%r12d + movl %edx,%r14d + rorl $14,%r13d + movl %eax,%edi + + xorl %r11d,%r13d + rorl $9,%r14d + xorl %ebx,%edi + + movl %r12d,52(%rsp) + xorl %edx,%r14d + andl %r11d,%edi + + rorl $5,%r13d + addl %ecx,%r12d + xorl %ebx,%edi + + rorl $11,%r14d + xorl %r11d,%r13d + addl %edi,%r12d + + movl %edx,%edi + addl (%rbp),%r12d + xorl %edx,%r14d + + xorl %r8d,%edi + rorl $6,%r13d + movl %r8d,%ecx + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%ecx + addl %r12d,%r10d + addl %r12d,%ecx + + leaq 4(%rbp),%rbp + movl 60(%rsp),%r13d + movl 48(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%ecx + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 28(%rsp),%r12d + + addl 56(%rsp),%r12d + movl %r10d,%r13d + addl %r15d,%r12d + movl %ecx,%r14d + rorl $14,%r13d + movl %r11d,%r15d + + xorl %r10d,%r13d + rorl $9,%r14d + xorl %eax,%r15d + + movl %r12d,56(%rsp) + xorl %ecx,%r14d + andl %r10d,%r15d + + rorl $5,%r13d + addl %ebx,%r12d + xorl %eax,%r15d + + rorl $11,%r14d + xorl %r10d,%r13d + addl %r15d,%r12d + + movl %ecx,%r15d + addl (%rbp),%r12d + xorl %ecx,%r14d + + xorl %edx,%r15d + rorl $6,%r13d + movl %edx,%ebx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%ebx + addl %r12d,%r9d + addl %r12d,%ebx + + leaq 4(%rbp),%rbp + movl 0(%rsp),%r13d + movl 52(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%ebx + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 32(%rsp),%r12d + + addl 60(%rsp),%r12d + movl %r9d,%r13d + addl %edi,%r12d + movl %ebx,%r14d + rorl $14,%r13d + movl %r10d,%edi + + xorl %r9d,%r13d + rorl $9,%r14d + xorl %r11d,%edi + + movl %r12d,60(%rsp) + xorl %ebx,%r14d + andl %r9d,%edi + + rorl $5,%r13d + addl %eax,%r12d + xorl %r11d,%edi + + rorl $11,%r14d + xorl %r9d,%r13d + addl %edi,%r12d + + movl %ebx,%edi + addl (%rbp),%r12d + xorl %ebx,%r14d + + xorl %ecx,%edi + rorl $6,%r13d + movl %ecx,%eax + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%eax + addl %r12d,%r8d + addl %r12d,%eax + + leaq 20(%rbp),%rbp + cmpb $0,3(%rbp) + jnz .Lrounds_16_xx + + movq 64+0(%rsp),%rdi + addl %r14d,%eax + leaq 64(%rsi),%rsi + + addl 0(%rdi),%eax + addl 4(%rdi),%ebx + addl 8(%rdi),%ecx + addl 12(%rdi),%edx + addl 16(%rdi),%r8d + addl 20(%rdi),%r9d + addl 24(%rdi),%r10d + addl 28(%rdi),%r11d + + cmpq 64+16(%rsp),%rsi + + movl %eax,0(%rdi) + movl %ebx,4(%rdi) + movl %ecx,8(%rdi) + movl %edx,12(%rdi) + movl %r8d,16(%rdi) + movl %r9d,20(%rdi) + movl %r10d,24(%rdi) + movl %r11d,28(%rdi) + jb .Lloop + + movq 64+24(%rsp),%rsi + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +.Lepilogue: + .byte 0xf3,0xc3 +.size sha256_block_data_order,.-sha256_block_data_order +.align 64 +.type K256,@object +K256: +.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc +.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc +.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 +.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 +.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 + +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.long 0x03020100,0x0b0a0908,0xffffffff,0xffffffff +.long 0x03020100,0x0b0a0908,0xffffffff,0xffffffff +.long 0xffffffff,0xffffffff,0x03020100,0x0b0a0908 +.long 0xffffffff,0xffffffff,0x03020100,0x0b0a0908 +.byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.type sha256_block_data_order_ssse3,@function +.align 64 +sha256_block_data_order_ssse3: +.Lssse3_shortcut: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + movq %rsp,%r11 + shlq $4,%rdx + subq $96,%rsp + leaq (%rsi,%rdx,4),%rdx + andq $-64,%rsp + movq %rdi,64+0(%rsp) + movq %rsi,64+8(%rsp) + movq %rdx,64+16(%rsp) + movq %r11,64+24(%rsp) +.Lprologue_ssse3: + + movl 0(%rdi),%eax + movl 4(%rdi),%ebx + movl 8(%rdi),%ecx + movl 12(%rdi),%edx + movl 16(%rdi),%r8d + movl 20(%rdi),%r9d + movl 24(%rdi),%r10d + movl 28(%rdi),%r11d + + + jmp .Lloop_ssse3 +.align 16 +.Lloop_ssse3: + movdqa K256+512(%rip),%xmm7 + movdqu 0(%rsi),%xmm0 + movdqu 16(%rsi),%xmm1 + movdqu 32(%rsi),%xmm2 +.byte 102,15,56,0,199 + movdqu 48(%rsi),%xmm3 + leaq K256(%rip),%rbp +.byte 102,15,56,0,207 + movdqa 0(%rbp),%xmm4 + movdqa 32(%rbp),%xmm5 +.byte 102,15,56,0,215 + paddd %xmm0,%xmm4 + movdqa 64(%rbp),%xmm6 +.byte 102,15,56,0,223 + movdqa 96(%rbp),%xmm7 + paddd %xmm1,%xmm5 + paddd %xmm2,%xmm6 + paddd %xmm3,%xmm7 + movdqa %xmm4,0(%rsp) + movl %eax,%r14d + movdqa %xmm5,16(%rsp) + movl %ebx,%edi + movdqa %xmm6,32(%rsp) + xorl %ecx,%edi + movdqa %xmm7,48(%rsp) + movl %r8d,%r13d + jmp .Lssse3_00_47 + +.align 16 +.Lssse3_00_47: + subq $-128,%rbp + rorl $14,%r13d + movdqa %xmm1,%xmm4 + movl %r14d,%eax + movl %r9d,%r12d + movdqa %xmm3,%xmm7 + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + rorl $5,%r13d + xorl %eax,%r14d +.byte 102,15,58,15,224,4 + andl %r8d,%r12d + xorl %r8d,%r13d +.byte 102,15,58,15,250,4 + addl 0(%rsp),%r11d + movl %eax,%r15d + xorl %r10d,%r12d + rorl $11,%r14d + movdqa %xmm4,%xmm5 + xorl %ebx,%r15d + addl %r12d,%r11d + movdqa %xmm4,%xmm6 + rorl $6,%r13d + andl %r15d,%edi + psrld $3,%xmm4 + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + paddd %xmm7,%xmm0 + rorl $2,%r14d + addl %r11d,%edx + psrld $7,%xmm6 + addl %edi,%r11d + movl %edx,%r13d + pshufd $250,%xmm3,%xmm7 + addl %r11d,%r14d + rorl $14,%r13d + pslld $14,%xmm5 + movl %r14d,%r11d + movl %r8d,%r12d + pxor %xmm6,%xmm4 + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r12d + rorl $5,%r13d + psrld $11,%xmm6 + xorl %r11d,%r14d + pxor %xmm5,%xmm4 + andl %edx,%r12d + xorl %edx,%r13d + pslld $11,%xmm5 + addl 4(%rsp),%r10d + movl %r11d,%edi + pxor %xmm6,%xmm4 + xorl %r9d,%r12d + rorl $11,%r14d + movdqa %xmm7,%xmm6 + xorl %eax,%edi + addl %r12d,%r10d + pxor %xmm5,%xmm4 + rorl $6,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + psrld $10,%xmm7 + addl %r13d,%r10d + xorl %eax,%r15d + paddd %xmm4,%xmm0 + rorl $2,%r14d + addl %r10d,%ecx + psrlq $17,%xmm6 + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + pxor %xmm6,%xmm7 + rorl $14,%r13d + movl %r14d,%r10d + movl %edx,%r12d + rorl $9,%r14d + psrlq $2,%xmm6 + xorl %ecx,%r13d + xorl %r8d,%r12d + pxor %xmm6,%xmm7 + rorl $5,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + pshufd $128,%xmm7,%xmm7 + xorl %ecx,%r13d + addl 8(%rsp),%r9d + movl %r10d,%r15d + psrldq $8,%xmm7 + xorl %r8d,%r12d + rorl $11,%r14d + xorl %r11d,%r15d + addl %r12d,%r9d + rorl $6,%r13d + paddd %xmm7,%xmm0 + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + pshufd $80,%xmm0,%xmm7 + xorl %r11d,%edi + rorl $2,%r14d + addl %r9d,%ebx + movdqa %xmm7,%xmm6 + addl %edi,%r9d + movl %ebx,%r13d + psrld $10,%xmm7 + addl %r9d,%r14d + rorl $14,%r13d + psrlq $17,%xmm6 + movl %r14d,%r9d + movl %ecx,%r12d + pxor %xmm6,%xmm7 + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + rorl $5,%r13d + xorl %r9d,%r14d + psrlq $2,%xmm6 + andl %ebx,%r12d + xorl %ebx,%r13d + addl 12(%rsp),%r8d + pxor %xmm6,%xmm7 + movl %r9d,%edi + xorl %edx,%r12d + rorl $11,%r14d + pshufd $8,%xmm7,%xmm7 + xorl %r10d,%edi + addl %r12d,%r8d + movdqa 0(%rbp),%xmm6 + rorl $6,%r13d + andl %edi,%r15d + pslldq $8,%xmm7 + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + paddd %xmm7,%xmm0 + rorl $2,%r14d + addl %r8d,%eax + addl %r15d,%r8d + paddd %xmm0,%xmm6 + movl %eax,%r13d + addl %r8d,%r14d + movdqa %xmm6,0(%rsp) + rorl $14,%r13d + movdqa %xmm2,%xmm4 + movl %r14d,%r8d + movl %ebx,%r12d + movdqa %xmm0,%xmm7 + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + rorl $5,%r13d + xorl %r8d,%r14d +.byte 102,15,58,15,225,4 + andl %eax,%r12d + xorl %eax,%r13d +.byte 102,15,58,15,251,4 + addl 16(%rsp),%edx + movl %r8d,%r15d + xorl %ecx,%r12d + rorl $11,%r14d + movdqa %xmm4,%xmm5 + xorl %r9d,%r15d + addl %r12d,%edx + movdqa %xmm4,%xmm6 + rorl $6,%r13d + andl %r15d,%edi + psrld $3,%xmm4 + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + paddd %xmm7,%xmm1 + rorl $2,%r14d + addl %edx,%r11d + psrld $7,%xmm6 + addl %edi,%edx + movl %r11d,%r13d + pshufd $250,%xmm0,%xmm7 + addl %edx,%r14d + rorl $14,%r13d + pslld $14,%xmm5 + movl %r14d,%edx + movl %eax,%r12d + pxor %xmm6,%xmm4 + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r12d + rorl $5,%r13d + psrld $11,%xmm6 + xorl %edx,%r14d + pxor %xmm5,%xmm4 + andl %r11d,%r12d + xorl %r11d,%r13d + pslld $11,%xmm5 + addl 20(%rsp),%ecx + movl %edx,%edi + pxor %xmm6,%xmm4 + xorl %ebx,%r12d + rorl $11,%r14d + movdqa %xmm7,%xmm6 + xorl %r8d,%edi + addl %r12d,%ecx + pxor %xmm5,%xmm4 + rorl $6,%r13d + andl %edi,%r15d + xorl %edx,%r14d + psrld $10,%xmm7 + addl %r13d,%ecx + xorl %r8d,%r15d + paddd %xmm4,%xmm1 + rorl $2,%r14d + addl %ecx,%r10d + psrlq $17,%xmm6 + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + pxor %xmm6,%xmm7 + rorl $14,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + rorl $9,%r14d + psrlq $2,%xmm6 + xorl %r10d,%r13d + xorl %eax,%r12d + pxor %xmm6,%xmm7 + rorl $5,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + pshufd $128,%xmm7,%xmm7 + xorl %r10d,%r13d + addl 24(%rsp),%ebx + movl %ecx,%r15d + psrldq $8,%xmm7 + xorl %eax,%r12d + rorl $11,%r14d + xorl %edx,%r15d + addl %r12d,%ebx + rorl $6,%r13d + paddd %xmm7,%xmm1 + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + pshufd $80,%xmm1,%xmm7 + xorl %edx,%edi + rorl $2,%r14d + addl %ebx,%r9d + movdqa %xmm7,%xmm6 + addl %edi,%ebx + movl %r9d,%r13d + psrld $10,%xmm7 + addl %ebx,%r14d + rorl $14,%r13d + psrlq $17,%xmm6 + movl %r14d,%ebx + movl %r10d,%r12d + pxor %xmm6,%xmm7 + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + rorl $5,%r13d + xorl %ebx,%r14d + psrlq $2,%xmm6 + andl %r9d,%r12d + xorl %r9d,%r13d + addl 28(%rsp),%eax + pxor %xmm6,%xmm7 + movl %ebx,%edi + xorl %r11d,%r12d + rorl $11,%r14d + pshufd $8,%xmm7,%xmm7 + xorl %ecx,%edi + addl %r12d,%eax + movdqa 32(%rbp),%xmm6 + rorl $6,%r13d + andl %edi,%r15d + pslldq $8,%xmm7 + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + paddd %xmm7,%xmm1 + rorl $2,%r14d + addl %eax,%r8d + addl %r15d,%eax + paddd %xmm1,%xmm6 + movl %r8d,%r13d + addl %eax,%r14d + movdqa %xmm6,16(%rsp) + rorl $14,%r13d + movdqa %xmm3,%xmm4 + movl %r14d,%eax + movl %r9d,%r12d + movdqa %xmm1,%xmm7 + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + rorl $5,%r13d + xorl %eax,%r14d +.byte 102,15,58,15,226,4 + andl %r8d,%r12d + xorl %r8d,%r13d +.byte 102,15,58,15,248,4 + addl 32(%rsp),%r11d + movl %eax,%r15d + xorl %r10d,%r12d + rorl $11,%r14d + movdqa %xmm4,%xmm5 + xorl %ebx,%r15d + addl %r12d,%r11d + movdqa %xmm4,%xmm6 + rorl $6,%r13d + andl %r15d,%edi + psrld $3,%xmm4 + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + paddd %xmm7,%xmm2 + rorl $2,%r14d + addl %r11d,%edx + psrld $7,%xmm6 + addl %edi,%r11d + movl %edx,%r13d + pshufd $250,%xmm1,%xmm7 + addl %r11d,%r14d + rorl $14,%r13d + pslld $14,%xmm5 + movl %r14d,%r11d + movl %r8d,%r12d + pxor %xmm6,%xmm4 + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r12d + rorl $5,%r13d + psrld $11,%xmm6 + xorl %r11d,%r14d + pxor %xmm5,%xmm4 + andl %edx,%r12d + xorl %edx,%r13d + pslld $11,%xmm5 + addl 36(%rsp),%r10d + movl %r11d,%edi + pxor %xmm6,%xmm4 + xorl %r9d,%r12d + rorl $11,%r14d + movdqa %xmm7,%xmm6 + xorl %eax,%edi + addl %r12d,%r10d + pxor %xmm5,%xmm4 + rorl $6,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + psrld $10,%xmm7 + addl %r13d,%r10d + xorl %eax,%r15d + paddd %xmm4,%xmm2 + rorl $2,%r14d + addl %r10d,%ecx + psrlq $17,%xmm6 + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + pxor %xmm6,%xmm7 + rorl $14,%r13d + movl %r14d,%r10d + movl %edx,%r12d + rorl $9,%r14d + psrlq $2,%xmm6 + xorl %ecx,%r13d + xorl %r8d,%r12d + pxor %xmm6,%xmm7 + rorl $5,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + pshufd $128,%xmm7,%xmm7 + xorl %ecx,%r13d + addl 40(%rsp),%r9d + movl %r10d,%r15d + psrldq $8,%xmm7 + xorl %r8d,%r12d + rorl $11,%r14d + xorl %r11d,%r15d + addl %r12d,%r9d + rorl $6,%r13d + paddd %xmm7,%xmm2 + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + pshufd $80,%xmm2,%xmm7 + xorl %r11d,%edi + rorl $2,%r14d + addl %r9d,%ebx + movdqa %xmm7,%xmm6 + addl %edi,%r9d + movl %ebx,%r13d + psrld $10,%xmm7 + addl %r9d,%r14d + rorl $14,%r13d + psrlq $17,%xmm6 + movl %r14d,%r9d + movl %ecx,%r12d + pxor %xmm6,%xmm7 + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + rorl $5,%r13d + xorl %r9d,%r14d + psrlq $2,%xmm6 + andl %ebx,%r12d + xorl %ebx,%r13d + addl 44(%rsp),%r8d + pxor %xmm6,%xmm7 + movl %r9d,%edi + xorl %edx,%r12d + rorl $11,%r14d + pshufd $8,%xmm7,%xmm7 + xorl %r10d,%edi + addl %r12d,%r8d + movdqa 64(%rbp),%xmm6 + rorl $6,%r13d + andl %edi,%r15d + pslldq $8,%xmm7 + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + paddd %xmm7,%xmm2 + rorl $2,%r14d + addl %r8d,%eax + addl %r15d,%r8d + paddd %xmm2,%xmm6 + movl %eax,%r13d + addl %r8d,%r14d + movdqa %xmm6,32(%rsp) + rorl $14,%r13d + movdqa %xmm0,%xmm4 + movl %r14d,%r8d + movl %ebx,%r12d + movdqa %xmm2,%xmm7 + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + rorl $5,%r13d + xorl %r8d,%r14d +.byte 102,15,58,15,227,4 + andl %eax,%r12d + xorl %eax,%r13d +.byte 102,15,58,15,249,4 + addl 48(%rsp),%edx + movl %r8d,%r15d + xorl %ecx,%r12d + rorl $11,%r14d + movdqa %xmm4,%xmm5 + xorl %r9d,%r15d + addl %r12d,%edx + movdqa %xmm4,%xmm6 + rorl $6,%r13d + andl %r15d,%edi + psrld $3,%xmm4 + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + paddd %xmm7,%xmm3 + rorl $2,%r14d + addl %edx,%r11d + psrld $7,%xmm6 + addl %edi,%edx + movl %r11d,%r13d + pshufd $250,%xmm2,%xmm7 + addl %edx,%r14d + rorl $14,%r13d + pslld $14,%xmm5 + movl %r14d,%edx + movl %eax,%r12d + pxor %xmm6,%xmm4 + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r12d + rorl $5,%r13d + psrld $11,%xmm6 + xorl %edx,%r14d + pxor %xmm5,%xmm4 + andl %r11d,%r12d + xorl %r11d,%r13d + pslld $11,%xmm5 + addl 52(%rsp),%ecx + movl %edx,%edi + pxor %xmm6,%xmm4 + xorl %ebx,%r12d + rorl $11,%r14d + movdqa %xmm7,%xmm6 + xorl %r8d,%edi + addl %r12d,%ecx + pxor %xmm5,%xmm4 + rorl $6,%r13d + andl %edi,%r15d + xorl %edx,%r14d + psrld $10,%xmm7 + addl %r13d,%ecx + xorl %r8d,%r15d + paddd %xmm4,%xmm3 + rorl $2,%r14d + addl %ecx,%r10d + psrlq $17,%xmm6 + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + pxor %xmm6,%xmm7 + rorl $14,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + rorl $9,%r14d + psrlq $2,%xmm6 + xorl %r10d,%r13d + xorl %eax,%r12d + pxor %xmm6,%xmm7 + rorl $5,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + pshufd $128,%xmm7,%xmm7 + xorl %r10d,%r13d + addl 56(%rsp),%ebx + movl %ecx,%r15d + psrldq $8,%xmm7 + xorl %eax,%r12d + rorl $11,%r14d + xorl %edx,%r15d + addl %r12d,%ebx + rorl $6,%r13d + paddd %xmm7,%xmm3 + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + pshufd $80,%xmm3,%xmm7 + xorl %edx,%edi + rorl $2,%r14d + addl %ebx,%r9d + movdqa %xmm7,%xmm6 + addl %edi,%ebx + movl %r9d,%r13d + psrld $10,%xmm7 + addl %ebx,%r14d + rorl $14,%r13d + psrlq $17,%xmm6 + movl %r14d,%ebx + movl %r10d,%r12d + pxor %xmm6,%xmm7 + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + rorl $5,%r13d + xorl %ebx,%r14d + psrlq $2,%xmm6 + andl %r9d,%r12d + xorl %r9d,%r13d + addl 60(%rsp),%eax + pxor %xmm6,%xmm7 + movl %ebx,%edi + xorl %r11d,%r12d + rorl $11,%r14d + pshufd $8,%xmm7,%xmm7 + xorl %ecx,%edi + addl %r12d,%eax + movdqa 96(%rbp),%xmm6 + rorl $6,%r13d + andl %edi,%r15d + pslldq $8,%xmm7 + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + paddd %xmm7,%xmm3 + rorl $2,%r14d + addl %eax,%r8d + addl %r15d,%eax + paddd %xmm3,%xmm6 + movl %r8d,%r13d + addl %eax,%r14d + movdqa %xmm6,48(%rsp) + cmpb $0,131(%rbp) + jne .Lssse3_00_47 + rorl $14,%r13d + movl %r14d,%eax + movl %r9d,%r12d + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + rorl $5,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + xorl %r8d,%r13d + addl 0(%rsp),%r11d + movl %eax,%r15d + xorl %r10d,%r12d + rorl $11,%r14d + xorl %ebx,%r15d + addl %r12d,%r11d + rorl $6,%r13d + andl %r15d,%edi + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + rorl $2,%r14d + addl %r11d,%edx + addl %edi,%r11d + movl %edx,%r13d + addl %r11d,%r14d + rorl $14,%r13d + movl %r14d,%r11d + movl %r8d,%r12d + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r12d + rorl $5,%r13d + xorl %r11d,%r14d + andl %edx,%r12d + xorl %edx,%r13d + addl 4(%rsp),%r10d + movl %r11d,%edi + xorl %r9d,%r12d + rorl $11,%r14d + xorl %eax,%edi + addl %r12d,%r10d + rorl $6,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + addl %r13d,%r10d + xorl %eax,%r15d + rorl $2,%r14d + addl %r10d,%ecx + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + rorl $14,%r13d + movl %r14d,%r10d + movl %edx,%r12d + rorl $9,%r14d + xorl %ecx,%r13d + xorl %r8d,%r12d + rorl $5,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + xorl %ecx,%r13d + addl 8(%rsp),%r9d + movl %r10d,%r15d + xorl %r8d,%r12d + rorl $11,%r14d + xorl %r11d,%r15d + addl %r12d,%r9d + rorl $6,%r13d + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + xorl %r11d,%edi + rorl $2,%r14d + addl %r9d,%ebx + addl %edi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + rorl $14,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + rorl $5,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + xorl %ebx,%r13d + addl 12(%rsp),%r8d + movl %r9d,%edi + xorl %edx,%r12d + rorl $11,%r14d + xorl %r10d,%edi + addl %r12d,%r8d + rorl $6,%r13d + andl %edi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + rorl $2,%r14d + addl %r8d,%eax + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + rorl $14,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + rorl $5,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + xorl %eax,%r13d + addl 16(%rsp),%edx + movl %r8d,%r15d + xorl %ecx,%r12d + rorl $11,%r14d + xorl %r9d,%r15d + addl %r12d,%edx + rorl $6,%r13d + andl %r15d,%edi + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + rorl $2,%r14d + addl %edx,%r11d + addl %edi,%edx + movl %r11d,%r13d + addl %edx,%r14d + rorl $14,%r13d + movl %r14d,%edx + movl %eax,%r12d + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r12d + rorl $5,%r13d + xorl %edx,%r14d + andl %r11d,%r12d + xorl %r11d,%r13d + addl 20(%rsp),%ecx + movl %edx,%edi + xorl %ebx,%r12d + rorl $11,%r14d + xorl %r8d,%edi + addl %r12d,%ecx + rorl $6,%r13d + andl %edi,%r15d + xorl %edx,%r14d + addl %r13d,%ecx + xorl %r8d,%r15d + rorl $2,%r14d + addl %ecx,%r10d + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + rorl $14,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + rorl $9,%r14d + xorl %r10d,%r13d + xorl %eax,%r12d + rorl $5,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + xorl %r10d,%r13d + addl 24(%rsp),%ebx + movl %ecx,%r15d + xorl %eax,%r12d + rorl $11,%r14d + xorl %edx,%r15d + addl %r12d,%ebx + rorl $6,%r13d + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + xorl %edx,%edi + rorl $2,%r14d + addl %ebx,%r9d + addl %edi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + rorl $14,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + rorl $5,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + xorl %r9d,%r13d + addl 28(%rsp),%eax + movl %ebx,%edi + xorl %r11d,%r12d + rorl $11,%r14d + xorl %ecx,%edi + addl %r12d,%eax + rorl $6,%r13d + andl %edi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + rorl $2,%r14d + addl %eax,%r8d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + rorl $14,%r13d + movl %r14d,%eax + movl %r9d,%r12d + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + rorl $5,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + xorl %r8d,%r13d + addl 32(%rsp),%r11d + movl %eax,%r15d + xorl %r10d,%r12d + rorl $11,%r14d + xorl %ebx,%r15d + addl %r12d,%r11d + rorl $6,%r13d + andl %r15d,%edi + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + rorl $2,%r14d + addl %r11d,%edx + addl %edi,%r11d + movl %edx,%r13d + addl %r11d,%r14d + rorl $14,%r13d + movl %r14d,%r11d + movl %r8d,%r12d + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r12d + rorl $5,%r13d + xorl %r11d,%r14d + andl %edx,%r12d + xorl %edx,%r13d + addl 36(%rsp),%r10d + movl %r11d,%edi + xorl %r9d,%r12d + rorl $11,%r14d + xorl %eax,%edi + addl %r12d,%r10d + rorl $6,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + addl %r13d,%r10d + xorl %eax,%r15d + rorl $2,%r14d + addl %r10d,%ecx + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + rorl $14,%r13d + movl %r14d,%r10d + movl %edx,%r12d + rorl $9,%r14d + xorl %ecx,%r13d + xorl %r8d,%r12d + rorl $5,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + xorl %ecx,%r13d + addl 40(%rsp),%r9d + movl %r10d,%r15d + xorl %r8d,%r12d + rorl $11,%r14d + xorl %r11d,%r15d + addl %r12d,%r9d + rorl $6,%r13d + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + xorl %r11d,%edi + rorl $2,%r14d + addl %r9d,%ebx + addl %edi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + rorl $14,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + rorl $5,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + xorl %ebx,%r13d + addl 44(%rsp),%r8d + movl %r9d,%edi + xorl %edx,%r12d + rorl $11,%r14d + xorl %r10d,%edi + addl %r12d,%r8d + rorl $6,%r13d + andl %edi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + rorl $2,%r14d + addl %r8d,%eax + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + rorl $14,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + rorl $5,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + xorl %eax,%r13d + addl 48(%rsp),%edx + movl %r8d,%r15d + xorl %ecx,%r12d + rorl $11,%r14d + xorl %r9d,%r15d + addl %r12d,%edx + rorl $6,%r13d + andl %r15d,%edi + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + rorl $2,%r14d + addl %edx,%r11d + addl %edi,%edx + movl %r11d,%r13d + addl %edx,%r14d + rorl $14,%r13d + movl %r14d,%edx + movl %eax,%r12d + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r12d + rorl $5,%r13d + xorl %edx,%r14d + andl %r11d,%r12d + xorl %r11d,%r13d + addl 52(%rsp),%ecx + movl %edx,%edi + xorl %ebx,%r12d + rorl $11,%r14d + xorl %r8d,%edi + addl %r12d,%ecx + rorl $6,%r13d + andl %edi,%r15d + xorl %edx,%r14d + addl %r13d,%ecx + xorl %r8d,%r15d + rorl $2,%r14d + addl %ecx,%r10d + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + rorl $14,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + rorl $9,%r14d + xorl %r10d,%r13d + xorl %eax,%r12d + rorl $5,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + xorl %r10d,%r13d + addl 56(%rsp),%ebx + movl %ecx,%r15d + xorl %eax,%r12d + rorl $11,%r14d + xorl %edx,%r15d + addl %r12d,%ebx + rorl $6,%r13d + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + xorl %edx,%edi + rorl $2,%r14d + addl %ebx,%r9d + addl %edi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + rorl $14,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + rorl $5,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + xorl %r9d,%r13d + addl 60(%rsp),%eax + movl %ebx,%edi + xorl %r11d,%r12d + rorl $11,%r14d + xorl %ecx,%edi + addl %r12d,%eax + rorl $6,%r13d + andl %edi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + rorl $2,%r14d + addl %eax,%r8d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + movq 64+0(%rsp),%rdi + movl %r14d,%eax + + addl 0(%rdi),%eax + leaq 64(%rsi),%rsi + addl 4(%rdi),%ebx + addl 8(%rdi),%ecx + addl 12(%rdi),%edx + addl 16(%rdi),%r8d + addl 20(%rdi),%r9d + addl 24(%rdi),%r10d + addl 28(%rdi),%r11d + + cmpq 64+16(%rsp),%rsi + + movl %eax,0(%rdi) + movl %ebx,4(%rdi) + movl %ecx,8(%rdi) + movl %edx,12(%rdi) + movl %r8d,16(%rdi) + movl %r9d,20(%rdi) + movl %r10d,24(%rdi) + movl %r11d,28(%rdi) + jb .Lloop_ssse3 + + movq 64+24(%rsp),%rsi + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +.Lepilogue_ssse3: + .byte 0xf3,0xc3 +.size sha256_block_data_order_ssse3,.-sha256_block_data_order_ssse3 +#endif diff --git a/third_party/boringssl/linux-x86_64/crypto/sha/sha512-x86_64.S b/third_party/boringssl/linux-x86_64/crypto/sha/sha512-x86_64.S new file mode 100644 index 00000000000..ca3a3a16442 --- /dev/null +++ b/third_party/boringssl/linux-x86_64/crypto/sha/sha512-x86_64.S @@ -0,0 +1,1787 @@ +#if defined(__x86_64__) +.text + +.extern OPENSSL_ia32cap_P +.hidden OPENSSL_ia32cap_P +.globl sha512_block_data_order +.hidden sha512_block_data_order +.type sha512_block_data_order,@function +.align 16 +sha512_block_data_order: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + movq %rsp,%r11 + shlq $4,%rdx + subq $128+32,%rsp + leaq (%rsi,%rdx,8),%rdx + andq $-64,%rsp + movq %rdi,128+0(%rsp) + movq %rsi,128+8(%rsp) + movq %rdx,128+16(%rsp) + movq %r11,128+24(%rsp) +.Lprologue: + + movq 0(%rdi),%rax + movq 8(%rdi),%rbx + movq 16(%rdi),%rcx + movq 24(%rdi),%rdx + movq 32(%rdi),%r8 + movq 40(%rdi),%r9 + movq 48(%rdi),%r10 + movq 56(%rdi),%r11 + jmp .Lloop + +.align 16 +.Lloop: + movq %rbx,%rdi + leaq K512(%rip),%rbp + xorq %rcx,%rdi + movq 0(%rsi),%r12 + movq %r8,%r13 + movq %rax,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r9,%r15 + + xorq %r8,%r13 + rorq $5,%r14 + xorq %r10,%r15 + + movq %r12,0(%rsp) + xorq %rax,%r14 + andq %r8,%r15 + + rorq $4,%r13 + addq %r11,%r12 + xorq %r10,%r15 + + rorq $6,%r14 + xorq %r8,%r13 + addq %r15,%r12 + + movq %rax,%r15 + addq (%rbp),%r12 + xorq %rax,%r14 + + xorq %rbx,%r15 + rorq $14,%r13 + movq %rbx,%r11 + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%r11 + addq %r12,%rdx + addq %r12,%r11 + + leaq 8(%rbp),%rbp + addq %r14,%r11 + movq 8(%rsi),%r12 + movq %rdx,%r13 + movq %r11,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r8,%rdi + + xorq %rdx,%r13 + rorq $5,%r14 + xorq %r9,%rdi + + movq %r12,8(%rsp) + xorq %r11,%r14 + andq %rdx,%rdi + + rorq $4,%r13 + addq %r10,%r12 + xorq %r9,%rdi + + rorq $6,%r14 + xorq %rdx,%r13 + addq %rdi,%r12 + + movq %r11,%rdi + addq (%rbp),%r12 + xorq %r11,%r14 + + xorq %rax,%rdi + rorq $14,%r13 + movq %rax,%r10 + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%r10 + addq %r12,%rcx + addq %r12,%r10 + + leaq 24(%rbp),%rbp + addq %r14,%r10 + movq 16(%rsi),%r12 + movq %rcx,%r13 + movq %r10,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rdx,%r15 + + xorq %rcx,%r13 + rorq $5,%r14 + xorq %r8,%r15 + + movq %r12,16(%rsp) + xorq %r10,%r14 + andq %rcx,%r15 + + rorq $4,%r13 + addq %r9,%r12 + xorq %r8,%r15 + + rorq $6,%r14 + xorq %rcx,%r13 + addq %r15,%r12 + + movq %r10,%r15 + addq (%rbp),%r12 + xorq %r10,%r14 + + xorq %r11,%r15 + rorq $14,%r13 + movq %r11,%r9 + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%r9 + addq %r12,%rbx + addq %r12,%r9 + + leaq 8(%rbp),%rbp + addq %r14,%r9 + movq 24(%rsi),%r12 + movq %rbx,%r13 + movq %r9,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rcx,%rdi + + xorq %rbx,%r13 + rorq $5,%r14 + xorq %rdx,%rdi + + movq %r12,24(%rsp) + xorq %r9,%r14 + andq %rbx,%rdi + + rorq $4,%r13 + addq %r8,%r12 + xorq %rdx,%rdi + + rorq $6,%r14 + xorq %rbx,%r13 + addq %rdi,%r12 + + movq %r9,%rdi + addq (%rbp),%r12 + xorq %r9,%r14 + + xorq %r10,%rdi + rorq $14,%r13 + movq %r10,%r8 + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%r8 + addq %r12,%rax + addq %r12,%r8 + + leaq 24(%rbp),%rbp + addq %r14,%r8 + movq 32(%rsi),%r12 + movq %rax,%r13 + movq %r8,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rbx,%r15 + + xorq %rax,%r13 + rorq $5,%r14 + xorq %rcx,%r15 + + movq %r12,32(%rsp) + xorq %r8,%r14 + andq %rax,%r15 + + rorq $4,%r13 + addq %rdx,%r12 + xorq %rcx,%r15 + + rorq $6,%r14 + xorq %rax,%r13 + addq %r15,%r12 + + movq %r8,%r15 + addq (%rbp),%r12 + xorq %r8,%r14 + + xorq %r9,%r15 + rorq $14,%r13 + movq %r9,%rdx + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%rdx + addq %r12,%r11 + addq %r12,%rdx + + leaq 8(%rbp),%rbp + addq %r14,%rdx + movq 40(%rsi),%r12 + movq %r11,%r13 + movq %rdx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rax,%rdi + + xorq %r11,%r13 + rorq $5,%r14 + xorq %rbx,%rdi + + movq %r12,40(%rsp) + xorq %rdx,%r14 + andq %r11,%rdi + + rorq $4,%r13 + addq %rcx,%r12 + xorq %rbx,%rdi + + rorq $6,%r14 + xorq %r11,%r13 + addq %rdi,%r12 + + movq %rdx,%rdi + addq (%rbp),%r12 + xorq %rdx,%r14 + + xorq %r8,%rdi + rorq $14,%r13 + movq %r8,%rcx + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%rcx + addq %r12,%r10 + addq %r12,%rcx + + leaq 24(%rbp),%rbp + addq %r14,%rcx + movq 48(%rsi),%r12 + movq %r10,%r13 + movq %rcx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r11,%r15 + + xorq %r10,%r13 + rorq $5,%r14 + xorq %rax,%r15 + + movq %r12,48(%rsp) + xorq %rcx,%r14 + andq %r10,%r15 + + rorq $4,%r13 + addq %rbx,%r12 + xorq %rax,%r15 + + rorq $6,%r14 + xorq %r10,%r13 + addq %r15,%r12 + + movq %rcx,%r15 + addq (%rbp),%r12 + xorq %rcx,%r14 + + xorq %rdx,%r15 + rorq $14,%r13 + movq %rdx,%rbx + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%rbx + addq %r12,%r9 + addq %r12,%rbx + + leaq 8(%rbp),%rbp + addq %r14,%rbx + movq 56(%rsi),%r12 + movq %r9,%r13 + movq %rbx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r10,%rdi + + xorq %r9,%r13 + rorq $5,%r14 + xorq %r11,%rdi + + movq %r12,56(%rsp) + xorq %rbx,%r14 + andq %r9,%rdi + + rorq $4,%r13 + addq %rax,%r12 + xorq %r11,%rdi + + rorq $6,%r14 + xorq %r9,%r13 + addq %rdi,%r12 + + movq %rbx,%rdi + addq (%rbp),%r12 + xorq %rbx,%r14 + + xorq %rcx,%rdi + rorq $14,%r13 + movq %rcx,%rax + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%rax + addq %r12,%r8 + addq %r12,%rax + + leaq 24(%rbp),%rbp + addq %r14,%rax + movq 64(%rsi),%r12 + movq %r8,%r13 + movq %rax,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r9,%r15 + + xorq %r8,%r13 + rorq $5,%r14 + xorq %r10,%r15 + + movq %r12,64(%rsp) + xorq %rax,%r14 + andq %r8,%r15 + + rorq $4,%r13 + addq %r11,%r12 + xorq %r10,%r15 + + rorq $6,%r14 + xorq %r8,%r13 + addq %r15,%r12 + + movq %rax,%r15 + addq (%rbp),%r12 + xorq %rax,%r14 + + xorq %rbx,%r15 + rorq $14,%r13 + movq %rbx,%r11 + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%r11 + addq %r12,%rdx + addq %r12,%r11 + + leaq 8(%rbp),%rbp + addq %r14,%r11 + movq 72(%rsi),%r12 + movq %rdx,%r13 + movq %r11,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r8,%rdi + + xorq %rdx,%r13 + rorq $5,%r14 + xorq %r9,%rdi + + movq %r12,72(%rsp) + xorq %r11,%r14 + andq %rdx,%rdi + + rorq $4,%r13 + addq %r10,%r12 + xorq %r9,%rdi + + rorq $6,%r14 + xorq %rdx,%r13 + addq %rdi,%r12 + + movq %r11,%rdi + addq (%rbp),%r12 + xorq %r11,%r14 + + xorq %rax,%rdi + rorq $14,%r13 + movq %rax,%r10 + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%r10 + addq %r12,%rcx + addq %r12,%r10 + + leaq 24(%rbp),%rbp + addq %r14,%r10 + movq 80(%rsi),%r12 + movq %rcx,%r13 + movq %r10,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rdx,%r15 + + xorq %rcx,%r13 + rorq $5,%r14 + xorq %r8,%r15 + + movq %r12,80(%rsp) + xorq %r10,%r14 + andq %rcx,%r15 + + rorq $4,%r13 + addq %r9,%r12 + xorq %r8,%r15 + + rorq $6,%r14 + xorq %rcx,%r13 + addq %r15,%r12 + + movq %r10,%r15 + addq (%rbp),%r12 + xorq %r10,%r14 + + xorq %r11,%r15 + rorq $14,%r13 + movq %r11,%r9 + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%r9 + addq %r12,%rbx + addq %r12,%r9 + + leaq 8(%rbp),%rbp + addq %r14,%r9 + movq 88(%rsi),%r12 + movq %rbx,%r13 + movq %r9,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rcx,%rdi + + xorq %rbx,%r13 + rorq $5,%r14 + xorq %rdx,%rdi + + movq %r12,88(%rsp) + xorq %r9,%r14 + andq %rbx,%rdi + + rorq $4,%r13 + addq %r8,%r12 + xorq %rdx,%rdi + + rorq $6,%r14 + xorq %rbx,%r13 + addq %rdi,%r12 + + movq %r9,%rdi + addq (%rbp),%r12 + xorq %r9,%r14 + + xorq %r10,%rdi + rorq $14,%r13 + movq %r10,%r8 + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%r8 + addq %r12,%rax + addq %r12,%r8 + + leaq 24(%rbp),%rbp + addq %r14,%r8 + movq 96(%rsi),%r12 + movq %rax,%r13 + movq %r8,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rbx,%r15 + + xorq %rax,%r13 + rorq $5,%r14 + xorq %rcx,%r15 + + movq %r12,96(%rsp) + xorq %r8,%r14 + andq %rax,%r15 + + rorq $4,%r13 + addq %rdx,%r12 + xorq %rcx,%r15 + + rorq $6,%r14 + xorq %rax,%r13 + addq %r15,%r12 + + movq %r8,%r15 + addq (%rbp),%r12 + xorq %r8,%r14 + + xorq %r9,%r15 + rorq $14,%r13 + movq %r9,%rdx + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%rdx + addq %r12,%r11 + addq %r12,%rdx + + leaq 8(%rbp),%rbp + addq %r14,%rdx + movq 104(%rsi),%r12 + movq %r11,%r13 + movq %rdx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rax,%rdi + + xorq %r11,%r13 + rorq $5,%r14 + xorq %rbx,%rdi + + movq %r12,104(%rsp) + xorq %rdx,%r14 + andq %r11,%rdi + + rorq $4,%r13 + addq %rcx,%r12 + xorq %rbx,%rdi + + rorq $6,%r14 + xorq %r11,%r13 + addq %rdi,%r12 + + movq %rdx,%rdi + addq (%rbp),%r12 + xorq %rdx,%r14 + + xorq %r8,%rdi + rorq $14,%r13 + movq %r8,%rcx + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%rcx + addq %r12,%r10 + addq %r12,%rcx + + leaq 24(%rbp),%rbp + addq %r14,%rcx + movq 112(%rsi),%r12 + movq %r10,%r13 + movq %rcx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r11,%r15 + + xorq %r10,%r13 + rorq $5,%r14 + xorq %rax,%r15 + + movq %r12,112(%rsp) + xorq %rcx,%r14 + andq %r10,%r15 + + rorq $4,%r13 + addq %rbx,%r12 + xorq %rax,%r15 + + rorq $6,%r14 + xorq %r10,%r13 + addq %r15,%r12 + + movq %rcx,%r15 + addq (%rbp),%r12 + xorq %rcx,%r14 + + xorq %rdx,%r15 + rorq $14,%r13 + movq %rdx,%rbx + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%rbx + addq %r12,%r9 + addq %r12,%rbx + + leaq 8(%rbp),%rbp + addq %r14,%rbx + movq 120(%rsi),%r12 + movq %r9,%r13 + movq %rbx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r10,%rdi + + xorq %r9,%r13 + rorq $5,%r14 + xorq %r11,%rdi + + movq %r12,120(%rsp) + xorq %rbx,%r14 + andq %r9,%rdi + + rorq $4,%r13 + addq %rax,%r12 + xorq %r11,%rdi + + rorq $6,%r14 + xorq %r9,%r13 + addq %rdi,%r12 + + movq %rbx,%rdi + addq (%rbp),%r12 + xorq %rbx,%r14 + + xorq %rcx,%rdi + rorq $14,%r13 + movq %rcx,%rax + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%rax + addq %r12,%r8 + addq %r12,%rax + + leaq 24(%rbp),%rbp + jmp .Lrounds_16_xx +.align 16 +.Lrounds_16_xx: + movq 8(%rsp),%r13 + movq 112(%rsp),%r15 + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rax + movq %r15,%r14 + rorq $42,%r15 + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + xorq %r13,%r12 + xorq %r14,%r15 + addq 72(%rsp),%r12 + + addq 0(%rsp),%r12 + movq %r8,%r13 + addq %r15,%r12 + movq %rax,%r14 + rorq $23,%r13 + movq %r9,%r15 + + xorq %r8,%r13 + rorq $5,%r14 + xorq %r10,%r15 + + movq %r12,0(%rsp) + xorq %rax,%r14 + andq %r8,%r15 + + rorq $4,%r13 + addq %r11,%r12 + xorq %r10,%r15 + + rorq $6,%r14 + xorq %r8,%r13 + addq %r15,%r12 + + movq %rax,%r15 + addq (%rbp),%r12 + xorq %rax,%r14 + + xorq %rbx,%r15 + rorq $14,%r13 + movq %rbx,%r11 + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%r11 + addq %r12,%rdx + addq %r12,%r11 + + leaq 8(%rbp),%rbp + movq 16(%rsp),%r13 + movq 120(%rsp),%rdi + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r11 + movq %rdi,%r14 + rorq $42,%rdi + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi + shrq $6,%r14 + + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 80(%rsp),%r12 + + addq 8(%rsp),%r12 + movq %rdx,%r13 + addq %rdi,%r12 + movq %r11,%r14 + rorq $23,%r13 + movq %r8,%rdi + + xorq %rdx,%r13 + rorq $5,%r14 + xorq %r9,%rdi + + movq %r12,8(%rsp) + xorq %r11,%r14 + andq %rdx,%rdi + + rorq $4,%r13 + addq %r10,%r12 + xorq %r9,%rdi + + rorq $6,%r14 + xorq %rdx,%r13 + addq %rdi,%r12 + + movq %r11,%rdi + addq (%rbp),%r12 + xorq %r11,%r14 + + xorq %rax,%rdi + rorq $14,%r13 + movq %rax,%r10 + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%r10 + addq %r12,%rcx + addq %r12,%r10 + + leaq 24(%rbp),%rbp + movq 24(%rsp),%r13 + movq 0(%rsp),%r15 + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r10 + movq %r15,%r14 + rorq $42,%r15 + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + xorq %r13,%r12 + xorq %r14,%r15 + addq 88(%rsp),%r12 + + addq 16(%rsp),%r12 + movq %rcx,%r13 + addq %r15,%r12 + movq %r10,%r14 + rorq $23,%r13 + movq %rdx,%r15 + + xorq %rcx,%r13 + rorq $5,%r14 + xorq %r8,%r15 + + movq %r12,16(%rsp) + xorq %r10,%r14 + andq %rcx,%r15 + + rorq $4,%r13 + addq %r9,%r12 + xorq %r8,%r15 + + rorq $6,%r14 + xorq %rcx,%r13 + addq %r15,%r12 + + movq %r10,%r15 + addq (%rbp),%r12 + xorq %r10,%r14 + + xorq %r11,%r15 + rorq $14,%r13 + movq %r11,%r9 + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%r9 + addq %r12,%rbx + addq %r12,%r9 + + leaq 8(%rbp),%rbp + movq 32(%rsp),%r13 + movq 8(%rsp),%rdi + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r9 + movq %rdi,%r14 + rorq $42,%rdi + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi + shrq $6,%r14 + + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 96(%rsp),%r12 + + addq 24(%rsp),%r12 + movq %rbx,%r13 + addq %rdi,%r12 + movq %r9,%r14 + rorq $23,%r13 + movq %rcx,%rdi + + xorq %rbx,%r13 + rorq $5,%r14 + xorq %rdx,%rdi + + movq %r12,24(%rsp) + xorq %r9,%r14 + andq %rbx,%rdi + + rorq $4,%r13 + addq %r8,%r12 + xorq %rdx,%rdi + + rorq $6,%r14 + xorq %rbx,%r13 + addq %rdi,%r12 + + movq %r9,%rdi + addq (%rbp),%r12 + xorq %r9,%r14 + + xorq %r10,%rdi + rorq $14,%r13 + movq %r10,%r8 + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%r8 + addq %r12,%rax + addq %r12,%r8 + + leaq 24(%rbp),%rbp + movq 40(%rsp),%r13 + movq 16(%rsp),%r15 + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r8 + movq %r15,%r14 + rorq $42,%r15 + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + xorq %r13,%r12 + xorq %r14,%r15 + addq 104(%rsp),%r12 + + addq 32(%rsp),%r12 + movq %rax,%r13 + addq %r15,%r12 + movq %r8,%r14 + rorq $23,%r13 + movq %rbx,%r15 + + xorq %rax,%r13 + rorq $5,%r14 + xorq %rcx,%r15 + + movq %r12,32(%rsp) + xorq %r8,%r14 + andq %rax,%r15 + + rorq $4,%r13 + addq %rdx,%r12 + xorq %rcx,%r15 + + rorq $6,%r14 + xorq %rax,%r13 + addq %r15,%r12 + + movq %r8,%r15 + addq (%rbp),%r12 + xorq %r8,%r14 + + xorq %r9,%r15 + rorq $14,%r13 + movq %r9,%rdx + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%rdx + addq %r12,%r11 + addq %r12,%rdx + + leaq 8(%rbp),%rbp + movq 48(%rsp),%r13 + movq 24(%rsp),%rdi + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rdx + movq %rdi,%r14 + rorq $42,%rdi + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi + shrq $6,%r14 + + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 112(%rsp),%r12 + + addq 40(%rsp),%r12 + movq %r11,%r13 + addq %rdi,%r12 + movq %rdx,%r14 + rorq $23,%r13 + movq %rax,%rdi + + xorq %r11,%r13 + rorq $5,%r14 + xorq %rbx,%rdi + + movq %r12,40(%rsp) + xorq %rdx,%r14 + andq %r11,%rdi + + rorq $4,%r13 + addq %rcx,%r12 + xorq %rbx,%rdi + + rorq $6,%r14 + xorq %r11,%r13 + addq %rdi,%r12 + + movq %rdx,%rdi + addq (%rbp),%r12 + xorq %rdx,%r14 + + xorq %r8,%rdi + rorq $14,%r13 + movq %r8,%rcx + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%rcx + addq %r12,%r10 + addq %r12,%rcx + + leaq 24(%rbp),%rbp + movq 56(%rsp),%r13 + movq 32(%rsp),%r15 + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rcx + movq %r15,%r14 + rorq $42,%r15 + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + xorq %r13,%r12 + xorq %r14,%r15 + addq 120(%rsp),%r12 + + addq 48(%rsp),%r12 + movq %r10,%r13 + addq %r15,%r12 + movq %rcx,%r14 + rorq $23,%r13 + movq %r11,%r15 + + xorq %r10,%r13 + rorq $5,%r14 + xorq %rax,%r15 + + movq %r12,48(%rsp) + xorq %rcx,%r14 + andq %r10,%r15 + + rorq $4,%r13 + addq %rbx,%r12 + xorq %rax,%r15 + + rorq $6,%r14 + xorq %r10,%r13 + addq %r15,%r12 + + movq %rcx,%r15 + addq (%rbp),%r12 + xorq %rcx,%r14 + + xorq %rdx,%r15 + rorq $14,%r13 + movq %rdx,%rbx + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%rbx + addq %r12,%r9 + addq %r12,%rbx + + leaq 8(%rbp),%rbp + movq 64(%rsp),%r13 + movq 40(%rsp),%rdi + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rbx + movq %rdi,%r14 + rorq $42,%rdi + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi + shrq $6,%r14 + + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 0(%rsp),%r12 + + addq 56(%rsp),%r12 + movq %r9,%r13 + addq %rdi,%r12 + movq %rbx,%r14 + rorq $23,%r13 + movq %r10,%rdi + + xorq %r9,%r13 + rorq $5,%r14 + xorq %r11,%rdi + + movq %r12,56(%rsp) + xorq %rbx,%r14 + andq %r9,%rdi + + rorq $4,%r13 + addq %rax,%r12 + xorq %r11,%rdi + + rorq $6,%r14 + xorq %r9,%r13 + addq %rdi,%r12 + + movq %rbx,%rdi + addq (%rbp),%r12 + xorq %rbx,%r14 + + xorq %rcx,%rdi + rorq $14,%r13 + movq %rcx,%rax + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%rax + addq %r12,%r8 + addq %r12,%rax + + leaq 24(%rbp),%rbp + movq 72(%rsp),%r13 + movq 48(%rsp),%r15 + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rax + movq %r15,%r14 + rorq $42,%r15 + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + xorq %r13,%r12 + xorq %r14,%r15 + addq 8(%rsp),%r12 + + addq 64(%rsp),%r12 + movq %r8,%r13 + addq %r15,%r12 + movq %rax,%r14 + rorq $23,%r13 + movq %r9,%r15 + + xorq %r8,%r13 + rorq $5,%r14 + xorq %r10,%r15 + + movq %r12,64(%rsp) + xorq %rax,%r14 + andq %r8,%r15 + + rorq $4,%r13 + addq %r11,%r12 + xorq %r10,%r15 + + rorq $6,%r14 + xorq %r8,%r13 + addq %r15,%r12 + + movq %rax,%r15 + addq (%rbp),%r12 + xorq %rax,%r14 + + xorq %rbx,%r15 + rorq $14,%r13 + movq %rbx,%r11 + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%r11 + addq %r12,%rdx + addq %r12,%r11 + + leaq 8(%rbp),%rbp + movq 80(%rsp),%r13 + movq 56(%rsp),%rdi + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r11 + movq %rdi,%r14 + rorq $42,%rdi + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi + shrq $6,%r14 + + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 16(%rsp),%r12 + + addq 72(%rsp),%r12 + movq %rdx,%r13 + addq %rdi,%r12 + movq %r11,%r14 + rorq $23,%r13 + movq %r8,%rdi + + xorq %rdx,%r13 + rorq $5,%r14 + xorq %r9,%rdi + + movq %r12,72(%rsp) + xorq %r11,%r14 + andq %rdx,%rdi + + rorq $4,%r13 + addq %r10,%r12 + xorq %r9,%rdi + + rorq $6,%r14 + xorq %rdx,%r13 + addq %rdi,%r12 + + movq %r11,%rdi + addq (%rbp),%r12 + xorq %r11,%r14 + + xorq %rax,%rdi + rorq $14,%r13 + movq %rax,%r10 + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%r10 + addq %r12,%rcx + addq %r12,%r10 + + leaq 24(%rbp),%rbp + movq 88(%rsp),%r13 + movq 64(%rsp),%r15 + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r10 + movq %r15,%r14 + rorq $42,%r15 + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + xorq %r13,%r12 + xorq %r14,%r15 + addq 24(%rsp),%r12 + + addq 80(%rsp),%r12 + movq %rcx,%r13 + addq %r15,%r12 + movq %r10,%r14 + rorq $23,%r13 + movq %rdx,%r15 + + xorq %rcx,%r13 + rorq $5,%r14 + xorq %r8,%r15 + + movq %r12,80(%rsp) + xorq %r10,%r14 + andq %rcx,%r15 + + rorq $4,%r13 + addq %r9,%r12 + xorq %r8,%r15 + + rorq $6,%r14 + xorq %rcx,%r13 + addq %r15,%r12 + + movq %r10,%r15 + addq (%rbp),%r12 + xorq %r10,%r14 + + xorq %r11,%r15 + rorq $14,%r13 + movq %r11,%r9 + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%r9 + addq %r12,%rbx + addq %r12,%r9 + + leaq 8(%rbp),%rbp + movq 96(%rsp),%r13 + movq 72(%rsp),%rdi + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r9 + movq %rdi,%r14 + rorq $42,%rdi + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi + shrq $6,%r14 + + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 32(%rsp),%r12 + + addq 88(%rsp),%r12 + movq %rbx,%r13 + addq %rdi,%r12 + movq %r9,%r14 + rorq $23,%r13 + movq %rcx,%rdi + + xorq %rbx,%r13 + rorq $5,%r14 + xorq %rdx,%rdi + + movq %r12,88(%rsp) + xorq %r9,%r14 + andq %rbx,%rdi + + rorq $4,%r13 + addq %r8,%r12 + xorq %rdx,%rdi + + rorq $6,%r14 + xorq %rbx,%r13 + addq %rdi,%r12 + + movq %r9,%rdi + addq (%rbp),%r12 + xorq %r9,%r14 + + xorq %r10,%rdi + rorq $14,%r13 + movq %r10,%r8 + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%r8 + addq %r12,%rax + addq %r12,%r8 + + leaq 24(%rbp),%rbp + movq 104(%rsp),%r13 + movq 80(%rsp),%r15 + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r8 + movq %r15,%r14 + rorq $42,%r15 + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + xorq %r13,%r12 + xorq %r14,%r15 + addq 40(%rsp),%r12 + + addq 96(%rsp),%r12 + movq %rax,%r13 + addq %r15,%r12 + movq %r8,%r14 + rorq $23,%r13 + movq %rbx,%r15 + + xorq %rax,%r13 + rorq $5,%r14 + xorq %rcx,%r15 + + movq %r12,96(%rsp) + xorq %r8,%r14 + andq %rax,%r15 + + rorq $4,%r13 + addq %rdx,%r12 + xorq %rcx,%r15 + + rorq $6,%r14 + xorq %rax,%r13 + addq %r15,%r12 + + movq %r8,%r15 + addq (%rbp),%r12 + xorq %r8,%r14 + + xorq %r9,%r15 + rorq $14,%r13 + movq %r9,%rdx + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%rdx + addq %r12,%r11 + addq %r12,%rdx + + leaq 8(%rbp),%rbp + movq 112(%rsp),%r13 + movq 88(%rsp),%rdi + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rdx + movq %rdi,%r14 + rorq $42,%rdi + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi + shrq $6,%r14 + + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 48(%rsp),%r12 + + addq 104(%rsp),%r12 + movq %r11,%r13 + addq %rdi,%r12 + movq %rdx,%r14 + rorq $23,%r13 + movq %rax,%rdi + + xorq %r11,%r13 + rorq $5,%r14 + xorq %rbx,%rdi + + movq %r12,104(%rsp) + xorq %rdx,%r14 + andq %r11,%rdi + + rorq $4,%r13 + addq %rcx,%r12 + xorq %rbx,%rdi + + rorq $6,%r14 + xorq %r11,%r13 + addq %rdi,%r12 + + movq %rdx,%rdi + addq (%rbp),%r12 + xorq %rdx,%r14 + + xorq %r8,%rdi + rorq $14,%r13 + movq %r8,%rcx + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%rcx + addq %r12,%r10 + addq %r12,%rcx + + leaq 24(%rbp),%rbp + movq 120(%rsp),%r13 + movq 96(%rsp),%r15 + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rcx + movq %r15,%r14 + rorq $42,%r15 + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + xorq %r13,%r12 + xorq %r14,%r15 + addq 56(%rsp),%r12 + + addq 112(%rsp),%r12 + movq %r10,%r13 + addq %r15,%r12 + movq %rcx,%r14 + rorq $23,%r13 + movq %r11,%r15 + + xorq %r10,%r13 + rorq $5,%r14 + xorq %rax,%r15 + + movq %r12,112(%rsp) + xorq %rcx,%r14 + andq %r10,%r15 + + rorq $4,%r13 + addq %rbx,%r12 + xorq %rax,%r15 + + rorq $6,%r14 + xorq %r10,%r13 + addq %r15,%r12 + + movq %rcx,%r15 + addq (%rbp),%r12 + xorq %rcx,%r14 + + xorq %rdx,%r15 + rorq $14,%r13 + movq %rdx,%rbx + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%rbx + addq %r12,%r9 + addq %r12,%rbx + + leaq 8(%rbp),%rbp + movq 0(%rsp),%r13 + movq 104(%rsp),%rdi + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rbx + movq %rdi,%r14 + rorq $42,%rdi + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi + shrq $6,%r14 + + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 64(%rsp),%r12 + + addq 120(%rsp),%r12 + movq %r9,%r13 + addq %rdi,%r12 + movq %rbx,%r14 + rorq $23,%r13 + movq %r10,%rdi + + xorq %r9,%r13 + rorq $5,%r14 + xorq %r11,%rdi + + movq %r12,120(%rsp) + xorq %rbx,%r14 + andq %r9,%rdi + + rorq $4,%r13 + addq %rax,%r12 + xorq %r11,%rdi + + rorq $6,%r14 + xorq %r9,%r13 + addq %rdi,%r12 + + movq %rbx,%rdi + addq (%rbp),%r12 + xorq %rbx,%r14 + + xorq %rcx,%rdi + rorq $14,%r13 + movq %rcx,%rax + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%rax + addq %r12,%r8 + addq %r12,%rax + + leaq 24(%rbp),%rbp + cmpb $0,7(%rbp) + jnz .Lrounds_16_xx + + movq 128+0(%rsp),%rdi + addq %r14,%rax + leaq 128(%rsi),%rsi + + addq 0(%rdi),%rax + addq 8(%rdi),%rbx + addq 16(%rdi),%rcx + addq 24(%rdi),%rdx + addq 32(%rdi),%r8 + addq 40(%rdi),%r9 + addq 48(%rdi),%r10 + addq 56(%rdi),%r11 + + cmpq 128+16(%rsp),%rsi + + movq %rax,0(%rdi) + movq %rbx,8(%rdi) + movq %rcx,16(%rdi) + movq %rdx,24(%rdi) + movq %r8,32(%rdi) + movq %r9,40(%rdi) + movq %r10,48(%rdi) + movq %r11,56(%rdi) + jb .Lloop + + movq 128+24(%rsp),%rsi + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +.Lepilogue: + .byte 0xf3,0xc3 +.size sha512_block_data_order,.-sha512_block_data_order +.align 64 +.type K512,@object +K512: +.quad 0x428a2f98d728ae22,0x7137449123ef65cd +.quad 0x428a2f98d728ae22,0x7137449123ef65cd +.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc +.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc +.quad 0x3956c25bf348b538,0x59f111f1b605d019 +.quad 0x3956c25bf348b538,0x59f111f1b605d019 +.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 +.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 +.quad 0xd807aa98a3030242,0x12835b0145706fbe +.quad 0xd807aa98a3030242,0x12835b0145706fbe +.quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 +.quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 +.quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 +.quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 +.quad 0x9bdc06a725c71235,0xc19bf174cf692694 +.quad 0x9bdc06a725c71235,0xc19bf174cf692694 +.quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 +.quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 +.quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 +.quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 +.quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 +.quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 +.quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 +.quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 +.quad 0x983e5152ee66dfab,0xa831c66d2db43210 +.quad 0x983e5152ee66dfab,0xa831c66d2db43210 +.quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 +.quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 +.quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 +.quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 +.quad 0x06ca6351e003826f,0x142929670a0e6e70 +.quad 0x06ca6351e003826f,0x142929670a0e6e70 +.quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 +.quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 +.quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df +.quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df +.quad 0x650a73548baf63de,0x766a0abb3c77b2a8 +.quad 0x650a73548baf63de,0x766a0abb3c77b2a8 +.quad 0x81c2c92e47edaee6,0x92722c851482353b +.quad 0x81c2c92e47edaee6,0x92722c851482353b +.quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 +.quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 +.quad 0xc24b8b70d0f89791,0xc76c51a30654be30 +.quad 0xc24b8b70d0f89791,0xc76c51a30654be30 +.quad 0xd192e819d6ef5218,0xd69906245565a910 +.quad 0xd192e819d6ef5218,0xd69906245565a910 +.quad 0xf40e35855771202a,0x106aa07032bbd1b8 +.quad 0xf40e35855771202a,0x106aa07032bbd1b8 +.quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 +.quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 +.quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 +.quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 +.quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb +.quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb +.quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 +.quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 +.quad 0x748f82ee5defb2fc,0x78a5636f43172f60 +.quad 0x748f82ee5defb2fc,0x78a5636f43172f60 +.quad 0x84c87814a1f0ab72,0x8cc702081a6439ec +.quad 0x84c87814a1f0ab72,0x8cc702081a6439ec +.quad 0x90befffa23631e28,0xa4506cebde82bde9 +.quad 0x90befffa23631e28,0xa4506cebde82bde9 +.quad 0xbef9a3f7b2c67915,0xc67178f2e372532b +.quad 0xbef9a3f7b2c67915,0xc67178f2e372532b +.quad 0xca273eceea26619c,0xd186b8c721c0c207 +.quad 0xca273eceea26619c,0xd186b8c721c0c207 +.quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 +.quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 +.quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 +.quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 +.quad 0x113f9804bef90dae,0x1b710b35131c471b +.quad 0x113f9804bef90dae,0x1b710b35131c471b +.quad 0x28db77f523047d84,0x32caab7b40c72493 +.quad 0x28db77f523047d84,0x32caab7b40c72493 +.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c +.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c +.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a +.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a +.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 +.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 + +.quad 0x0001020304050607,0x08090a0b0c0d0e0f +.quad 0x0001020304050607,0x08090a0b0c0d0e0f +.byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +#endif diff --git a/third_party/boringssl/mac-x86/crypto/aes/aes-586.S b/third_party/boringssl/mac-x86/crypto/aes/aes-586.S new file mode 100644 index 00000000000..d3dc6beb6ac --- /dev/null +++ b/third_party/boringssl/mac-x86/crypto/aes/aes-586.S @@ -0,0 +1,3221 @@ +#if defined(__i386__) +.file "aes-586.S" +.text +.private_extern __x86_AES_encrypt_compact +.align 4 +__x86_AES_encrypt_compact: + movl %edi,20(%esp) + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + movl 240(%edi),%esi + leal -2(%esi,%esi,1),%esi + leal (%edi,%esi,8),%esi + movl %esi,24(%esp) + movl -128(%ebp),%edi + movl -96(%ebp),%esi + movl -64(%ebp),%edi + movl -32(%ebp),%esi + movl (%ebp),%edi + movl 32(%ebp),%esi + movl 64(%ebp),%edi + movl 96(%ebp),%esi +.align 4,0x90 +L000loop: + movl %eax,%esi + andl $255,%esi + movzbl -128(%ebp,%esi,1),%esi + movzbl %bh,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %ecx,%edi + shrl $16,%edi + andl $255,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %edx,%edi + shrl $24,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl %esi,4(%esp) + movl %ebx,%esi + andl $255,%esi + shrl $16,%ebx + movzbl -128(%ebp,%esi,1),%esi + movzbl %ch,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %edx,%edi + shrl $16,%edi + andl $255,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %eax,%edi + shrl $24,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl %esi,8(%esp) + movl %ecx,%esi + andl $255,%esi + shrl $24,%ecx + movzbl -128(%ebp,%esi,1),%esi + movzbl %dh,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %eax,%edi + shrl $16,%edi + andl $255,%edx + andl $255,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movzbl %bh,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + andl $255,%edx + movzbl -128(%ebp,%edx,1),%edx + movzbl %ah,%eax + movzbl -128(%ebp,%eax,1),%eax + shll $8,%eax + xorl %eax,%edx + movl 4(%esp),%eax + andl $255,%ebx + movzbl -128(%ebp,%ebx,1),%ebx + shll $16,%ebx + xorl %ebx,%edx + movl 8(%esp),%ebx + movzbl -128(%ebp,%ecx,1),%ecx + shll $24,%ecx + xorl %ecx,%edx + movl %esi,%ecx + movl $2155905152,%ebp + andl %ecx,%ebp + leal (%ecx,%ecx,1),%edi + movl %ebp,%esi + shrl $7,%ebp + andl $4278124286,%edi + subl %ebp,%esi + movl %ecx,%ebp + andl $454761243,%esi + rorl $16,%ebp + xorl %edi,%esi + movl %ecx,%edi + xorl %esi,%ecx + rorl $24,%edi + xorl %ebp,%esi + roll $24,%ecx + xorl %edi,%esi + movl $2155905152,%ebp + xorl %esi,%ecx + andl %edx,%ebp + leal (%edx,%edx,1),%edi + movl %ebp,%esi + shrl $7,%ebp + andl $4278124286,%edi + subl %ebp,%esi + movl %edx,%ebp + andl $454761243,%esi + rorl $16,%ebp + xorl %edi,%esi + movl %edx,%edi + xorl %esi,%edx + rorl $24,%edi + xorl %ebp,%esi + roll $24,%edx + xorl %edi,%esi + movl $2155905152,%ebp + xorl %esi,%edx + andl %eax,%ebp + leal (%eax,%eax,1),%edi + movl %ebp,%esi + shrl $7,%ebp + andl $4278124286,%edi + subl %ebp,%esi + movl %eax,%ebp + andl $454761243,%esi + rorl $16,%ebp + xorl %edi,%esi + movl %eax,%edi + xorl %esi,%eax + rorl $24,%edi + xorl %ebp,%esi + roll $24,%eax + xorl %edi,%esi + movl $2155905152,%ebp + xorl %esi,%eax + andl %ebx,%ebp + leal (%ebx,%ebx,1),%edi + movl %ebp,%esi + shrl $7,%ebp + andl $4278124286,%edi + subl %ebp,%esi + movl %ebx,%ebp + andl $454761243,%esi + rorl $16,%ebp + xorl %edi,%esi + movl %ebx,%edi + xorl %esi,%ebx + rorl $24,%edi + xorl %ebp,%esi + roll $24,%ebx + xorl %edi,%esi + xorl %esi,%ebx + movl 20(%esp),%edi + movl 28(%esp),%ebp + addl $16,%edi + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + cmpl 24(%esp),%edi + movl %edi,20(%esp) + jb L000loop + movl %eax,%esi + andl $255,%esi + movzbl -128(%ebp,%esi,1),%esi + movzbl %bh,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %ecx,%edi + shrl $16,%edi + andl $255,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %edx,%edi + shrl $24,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl %esi,4(%esp) + movl %ebx,%esi + andl $255,%esi + shrl $16,%ebx + movzbl -128(%ebp,%esi,1),%esi + movzbl %ch,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %edx,%edi + shrl $16,%edi + andl $255,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %eax,%edi + shrl $24,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl %esi,8(%esp) + movl %ecx,%esi + andl $255,%esi + shrl $24,%ecx + movzbl -128(%ebp,%esi,1),%esi + movzbl %dh,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %eax,%edi + shrl $16,%edi + andl $255,%edx + andl $255,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movzbl %bh,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl 20(%esp),%edi + andl $255,%edx + movzbl -128(%ebp,%edx,1),%edx + movzbl %ah,%eax + movzbl -128(%ebp,%eax,1),%eax + shll $8,%eax + xorl %eax,%edx + movl 4(%esp),%eax + andl $255,%ebx + movzbl -128(%ebp,%ebx,1),%ebx + shll $16,%ebx + xorl %ebx,%edx + movl 8(%esp),%ebx + movzbl -128(%ebp,%ecx,1),%ecx + shll $24,%ecx + xorl %ecx,%edx + movl %esi,%ecx + xorl 16(%edi),%eax + xorl 20(%edi),%ebx + xorl 24(%edi),%ecx + xorl 28(%edi),%edx + ret +.private_extern __sse_AES_encrypt_compact +.align 4 +__sse_AES_encrypt_compact: + pxor (%edi),%mm0 + pxor 8(%edi),%mm4 + movl 240(%edi),%esi + leal -2(%esi,%esi,1),%esi + leal (%edi,%esi,8),%esi + movl %esi,24(%esp) + movl $454761243,%eax + movl %eax,8(%esp) + movl %eax,12(%esp) + movl -128(%ebp),%eax + movl -96(%ebp),%ebx + movl -64(%ebp),%ecx + movl -32(%ebp),%edx + movl (%ebp),%eax + movl 32(%ebp),%ebx + movl 64(%ebp),%ecx + movl 96(%ebp),%edx +.align 4,0x90 +L001loop: + pshufw $8,%mm0,%mm1 + pshufw $13,%mm4,%mm5 + movd %mm1,%eax + movd %mm5,%ebx + movl %edi,20(%esp) + movzbl %al,%esi + movzbl %ah,%edx + pshufw $13,%mm0,%mm2 + movzbl -128(%ebp,%esi,1),%ecx + movzbl %bl,%edi + movzbl -128(%ebp,%edx,1),%edx + shrl $16,%eax + shll $8,%edx + movzbl -128(%ebp,%edi,1),%esi + movzbl %bh,%edi + shll $16,%esi + pshufw $8,%mm4,%mm6 + orl %esi,%ecx + movzbl -128(%ebp,%edi,1),%esi + movzbl %ah,%edi + shll $24,%esi + shrl $16,%ebx + orl %esi,%edx + movzbl -128(%ebp,%edi,1),%esi + movzbl %bh,%edi + shll $8,%esi + orl %esi,%ecx + movzbl -128(%ebp,%edi,1),%esi + movzbl %al,%edi + shll $24,%esi + orl %esi,%ecx + movzbl -128(%ebp,%edi,1),%esi + movzbl %bl,%edi + movd %mm2,%eax + movd %ecx,%mm0 + movzbl -128(%ebp,%edi,1),%ecx + movzbl %ah,%edi + shll $16,%ecx + movd %mm6,%ebx + orl %esi,%ecx + movzbl -128(%ebp,%edi,1),%esi + movzbl %bh,%edi + shll $24,%esi + orl %esi,%ecx + movzbl -128(%ebp,%edi,1),%esi + movzbl %bl,%edi + shll $8,%esi + shrl $16,%ebx + orl %esi,%ecx + movzbl -128(%ebp,%edi,1),%esi + movzbl %al,%edi + shrl $16,%eax + movd %ecx,%mm1 + movzbl -128(%ebp,%edi,1),%ecx + movzbl %ah,%edi + shll $16,%ecx + andl $255,%eax + orl %esi,%ecx + punpckldq %mm1,%mm0 + movzbl -128(%ebp,%edi,1),%esi + movzbl %bh,%edi + shll $24,%esi + andl $255,%ebx + movzbl -128(%ebp,%eax,1),%eax + orl %esi,%ecx + shll $16,%eax + movzbl -128(%ebp,%edi,1),%esi + orl %eax,%edx + shll $8,%esi + movzbl -128(%ebp,%ebx,1),%ebx + orl %esi,%ecx + orl %ebx,%edx + movl 20(%esp),%edi + movd %ecx,%mm4 + movd %edx,%mm5 + punpckldq %mm5,%mm4 + addl $16,%edi + cmpl 24(%esp),%edi + ja L002out + movq 8(%esp),%mm2 + pxor %mm3,%mm3 + pxor %mm7,%mm7 + movq %mm0,%mm1 + movq %mm4,%mm5 + pcmpgtb %mm0,%mm3 + pcmpgtb %mm4,%mm7 + pand %mm2,%mm3 + pand %mm2,%mm7 + pshufw $177,%mm0,%mm2 + pshufw $177,%mm4,%mm6 + paddb %mm0,%mm0 + paddb %mm4,%mm4 + pxor %mm3,%mm0 + pxor %mm7,%mm4 + pshufw $177,%mm2,%mm3 + pshufw $177,%mm6,%mm7 + pxor %mm0,%mm1 + pxor %mm4,%mm5 + pxor %mm2,%mm0 + pxor %mm6,%mm4 + movq %mm3,%mm2 + movq %mm7,%mm6 + pslld $8,%mm3 + pslld $8,%mm7 + psrld $24,%mm2 + psrld $24,%mm6 + pxor %mm3,%mm0 + pxor %mm7,%mm4 + pxor %mm2,%mm0 + pxor %mm6,%mm4 + movq %mm1,%mm3 + movq %mm5,%mm7 + movq (%edi),%mm2 + movq 8(%edi),%mm6 + psrld $8,%mm1 + psrld $8,%mm5 + movl -128(%ebp),%eax + pslld $24,%mm3 + pslld $24,%mm7 + movl -64(%ebp),%ebx + pxor %mm1,%mm0 + pxor %mm5,%mm4 + movl (%ebp),%ecx + pxor %mm3,%mm0 + pxor %mm7,%mm4 + movl 64(%ebp),%edx + pxor %mm2,%mm0 + pxor %mm6,%mm4 + jmp L001loop +.align 4,0x90 +L002out: + pxor (%edi),%mm0 + pxor 8(%edi),%mm4 + ret +.private_extern __x86_AES_encrypt +.align 4 +__x86_AES_encrypt: + movl %edi,20(%esp) + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + movl 240(%edi),%esi + leal -2(%esi,%esi,1),%esi + leal (%edi,%esi,8),%esi + movl %esi,24(%esp) +.align 4,0x90 +L003loop: + movl %eax,%esi + andl $255,%esi + movl (%ebp,%esi,8),%esi + movzbl %bh,%edi + xorl 3(%ebp,%edi,8),%esi + movl %ecx,%edi + shrl $16,%edi + andl $255,%edi + xorl 2(%ebp,%edi,8),%esi + movl %edx,%edi + shrl $24,%edi + xorl 1(%ebp,%edi,8),%esi + movl %esi,4(%esp) + movl %ebx,%esi + andl $255,%esi + shrl $16,%ebx + movl (%ebp,%esi,8),%esi + movzbl %ch,%edi + xorl 3(%ebp,%edi,8),%esi + movl %edx,%edi + shrl $16,%edi + andl $255,%edi + xorl 2(%ebp,%edi,8),%esi + movl %eax,%edi + shrl $24,%edi + xorl 1(%ebp,%edi,8),%esi + movl %esi,8(%esp) + movl %ecx,%esi + andl $255,%esi + shrl $24,%ecx + movl (%ebp,%esi,8),%esi + movzbl %dh,%edi + xorl 3(%ebp,%edi,8),%esi + movl %eax,%edi + shrl $16,%edi + andl $255,%edx + andl $255,%edi + xorl 2(%ebp,%edi,8),%esi + movzbl %bh,%edi + xorl 1(%ebp,%edi,8),%esi + movl 20(%esp),%edi + movl (%ebp,%edx,8),%edx + movzbl %ah,%eax + xorl 3(%ebp,%eax,8),%edx + movl 4(%esp),%eax + andl $255,%ebx + xorl 2(%ebp,%ebx,8),%edx + movl 8(%esp),%ebx + xorl 1(%ebp,%ecx,8),%edx + movl %esi,%ecx + addl $16,%edi + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + cmpl 24(%esp),%edi + movl %edi,20(%esp) + jb L003loop + movl %eax,%esi + andl $255,%esi + movl 2(%ebp,%esi,8),%esi + andl $255,%esi + movzbl %bh,%edi + movl (%ebp,%edi,8),%edi + andl $65280,%edi + xorl %edi,%esi + movl %ecx,%edi + shrl $16,%edi + andl $255,%edi + movl (%ebp,%edi,8),%edi + andl $16711680,%edi + xorl %edi,%esi + movl %edx,%edi + shrl $24,%edi + movl 2(%ebp,%edi,8),%edi + andl $4278190080,%edi + xorl %edi,%esi + movl %esi,4(%esp) + movl %ebx,%esi + andl $255,%esi + shrl $16,%ebx + movl 2(%ebp,%esi,8),%esi + andl $255,%esi + movzbl %ch,%edi + movl (%ebp,%edi,8),%edi + andl $65280,%edi + xorl %edi,%esi + movl %edx,%edi + shrl $16,%edi + andl $255,%edi + movl (%ebp,%edi,8),%edi + andl $16711680,%edi + xorl %edi,%esi + movl %eax,%edi + shrl $24,%edi + movl 2(%ebp,%edi,8),%edi + andl $4278190080,%edi + xorl %edi,%esi + movl %esi,8(%esp) + movl %ecx,%esi + andl $255,%esi + shrl $24,%ecx + movl 2(%ebp,%esi,8),%esi + andl $255,%esi + movzbl %dh,%edi + movl (%ebp,%edi,8),%edi + andl $65280,%edi + xorl %edi,%esi + movl %eax,%edi + shrl $16,%edi + andl $255,%edx + andl $255,%edi + movl (%ebp,%edi,8),%edi + andl $16711680,%edi + xorl %edi,%esi + movzbl %bh,%edi + movl 2(%ebp,%edi,8),%edi + andl $4278190080,%edi + xorl %edi,%esi + movl 20(%esp),%edi + andl $255,%edx + movl 2(%ebp,%edx,8),%edx + andl $255,%edx + movzbl %ah,%eax + movl (%ebp,%eax,8),%eax + andl $65280,%eax + xorl %eax,%edx + movl 4(%esp),%eax + andl $255,%ebx + movl (%ebp,%ebx,8),%ebx + andl $16711680,%ebx + xorl %ebx,%edx + movl 8(%esp),%ebx + movl 2(%ebp,%ecx,8),%ecx + andl $4278190080,%ecx + xorl %ecx,%edx + movl %esi,%ecx + addl $16,%edi + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + ret +.align 6,0x90 +LAES_Te: +.long 2774754246,2774754246 +.long 2222750968,2222750968 +.long 2574743534,2574743534 +.long 2373680118,2373680118 +.long 234025727,234025727 +.long 3177933782,3177933782 +.long 2976870366,2976870366 +.long 1422247313,1422247313 +.long 1345335392,1345335392 +.long 50397442,50397442 +.long 2842126286,2842126286 +.long 2099981142,2099981142 +.long 436141799,436141799 +.long 1658312629,1658312629 +.long 3870010189,3870010189 +.long 2591454956,2591454956 +.long 1170918031,1170918031 +.long 2642575903,2642575903 +.long 1086966153,1086966153 +.long 2273148410,2273148410 +.long 368769775,368769775 +.long 3948501426,3948501426 +.long 3376891790,3376891790 +.long 200339707,200339707 +.long 3970805057,3970805057 +.long 1742001331,1742001331 +.long 4255294047,4255294047 +.long 3937382213,3937382213 +.long 3214711843,3214711843 +.long 4154762323,4154762323 +.long 2524082916,2524082916 +.long 1539358875,1539358875 +.long 3266819957,3266819957 +.long 486407649,486407649 +.long 2928907069,2928907069 +.long 1780885068,1780885068 +.long 1513502316,1513502316 +.long 1094664062,1094664062 +.long 49805301,49805301 +.long 1338821763,1338821763 +.long 1546925160,1546925160 +.long 4104496465,4104496465 +.long 887481809,887481809 +.long 150073849,150073849 +.long 2473685474,2473685474 +.long 1943591083,1943591083 +.long 1395732834,1395732834 +.long 1058346282,1058346282 +.long 201589768,201589768 +.long 1388824469,1388824469 +.long 1696801606,1696801606 +.long 1589887901,1589887901 +.long 672667696,672667696 +.long 2711000631,2711000631 +.long 251987210,251987210 +.long 3046808111,3046808111 +.long 151455502,151455502 +.long 907153956,907153956 +.long 2608889883,2608889883 +.long 1038279391,1038279391 +.long 652995533,652995533 +.long 1764173646,1764173646 +.long 3451040383,3451040383 +.long 2675275242,2675275242 +.long 453576978,453576978 +.long 2659418909,2659418909 +.long 1949051992,1949051992 +.long 773462580,773462580 +.long 756751158,756751158 +.long 2993581788,2993581788 +.long 3998898868,3998898868 +.long 4221608027,4221608027 +.long 4132590244,4132590244 +.long 1295727478,1295727478 +.long 1641469623,1641469623 +.long 3467883389,3467883389 +.long 2066295122,2066295122 +.long 1055122397,1055122397 +.long 1898917726,1898917726 +.long 2542044179,2542044179 +.long 4115878822,4115878822 +.long 1758581177,1758581177 +.long 0,0 +.long 753790401,753790401 +.long 1612718144,1612718144 +.long 536673507,536673507 +.long 3367088505,3367088505 +.long 3982187446,3982187446 +.long 3194645204,3194645204 +.long 1187761037,1187761037 +.long 3653156455,3653156455 +.long 1262041458,1262041458 +.long 3729410708,3729410708 +.long 3561770136,3561770136 +.long 3898103984,3898103984 +.long 1255133061,1255133061 +.long 1808847035,1808847035 +.long 720367557,720367557 +.long 3853167183,3853167183 +.long 385612781,385612781 +.long 3309519750,3309519750 +.long 3612167578,3612167578 +.long 1429418854,1429418854 +.long 2491778321,2491778321 +.long 3477423498,3477423498 +.long 284817897,284817897 +.long 100794884,100794884 +.long 2172616702,2172616702 +.long 4031795360,4031795360 +.long 1144798328,1144798328 +.long 3131023141,3131023141 +.long 3819481163,3819481163 +.long 4082192802,4082192802 +.long 4272137053,4272137053 +.long 3225436288,3225436288 +.long 2324664069,2324664069 +.long 2912064063,2912064063 +.long 3164445985,3164445985 +.long 1211644016,1211644016 +.long 83228145,83228145 +.long 3753688163,3753688163 +.long 3249976951,3249976951 +.long 1977277103,1977277103 +.long 1663115586,1663115586 +.long 806359072,806359072 +.long 452984805,452984805 +.long 250868733,250868733 +.long 1842533055,1842533055 +.long 1288555905,1288555905 +.long 336333848,336333848 +.long 890442534,890442534 +.long 804056259,804056259 +.long 3781124030,3781124030 +.long 2727843637,2727843637 +.long 3427026056,3427026056 +.long 957814574,957814574 +.long 1472513171,1472513171 +.long 4071073621,4071073621 +.long 2189328124,2189328124 +.long 1195195770,1195195770 +.long 2892260552,2892260552 +.long 3881655738,3881655738 +.long 723065138,723065138 +.long 2507371494,2507371494 +.long 2690670784,2690670784 +.long 2558624025,2558624025 +.long 3511635870,3511635870 +.long 2145180835,2145180835 +.long 1713513028,1713513028 +.long 2116692564,2116692564 +.long 2878378043,2878378043 +.long 2206763019,2206763019 +.long 3393603212,3393603212 +.long 703524551,703524551 +.long 3552098411,3552098411 +.long 1007948840,1007948840 +.long 2044649127,2044649127 +.long 3797835452,3797835452 +.long 487262998,487262998 +.long 1994120109,1994120109 +.long 1004593371,1004593371 +.long 1446130276,1446130276 +.long 1312438900,1312438900 +.long 503974420,503974420 +.long 3679013266,3679013266 +.long 168166924,168166924 +.long 1814307912,1814307912 +.long 3831258296,3831258296 +.long 1573044895,1573044895 +.long 1859376061,1859376061 +.long 4021070915,4021070915 +.long 2791465668,2791465668 +.long 2828112185,2828112185 +.long 2761266481,2761266481 +.long 937747667,937747667 +.long 2339994098,2339994098 +.long 854058965,854058965 +.long 1137232011,1137232011 +.long 1496790894,1496790894 +.long 3077402074,3077402074 +.long 2358086913,2358086913 +.long 1691735473,1691735473 +.long 3528347292,3528347292 +.long 3769215305,3769215305 +.long 3027004632,3027004632 +.long 4199962284,4199962284 +.long 133494003,133494003 +.long 636152527,636152527 +.long 2942657994,2942657994 +.long 2390391540,2390391540 +.long 3920539207,3920539207 +.long 403179536,403179536 +.long 3585784431,3585784431 +.long 2289596656,2289596656 +.long 1864705354,1864705354 +.long 1915629148,1915629148 +.long 605822008,605822008 +.long 4054230615,4054230615 +.long 3350508659,3350508659 +.long 1371981463,1371981463 +.long 602466507,602466507 +.long 2094914977,2094914977 +.long 2624877800,2624877800 +.long 555687742,555687742 +.long 3712699286,3712699286 +.long 3703422305,3703422305 +.long 2257292045,2257292045 +.long 2240449039,2240449039 +.long 2423288032,2423288032 +.long 1111375484,1111375484 +.long 3300242801,3300242801 +.long 2858837708,2858837708 +.long 3628615824,3628615824 +.long 84083462,84083462 +.long 32962295,32962295 +.long 302911004,302911004 +.long 2741068226,2741068226 +.long 1597322602,1597322602 +.long 4183250862,4183250862 +.long 3501832553,3501832553 +.long 2441512471,2441512471 +.long 1489093017,1489093017 +.long 656219450,656219450 +.long 3114180135,3114180135 +.long 954327513,954327513 +.long 335083755,335083755 +.long 3013122091,3013122091 +.long 856756514,856756514 +.long 3144247762,3144247762 +.long 1893325225,1893325225 +.long 2307821063,2307821063 +.long 2811532339,2811532339 +.long 3063651117,3063651117 +.long 572399164,572399164 +.long 2458355477,2458355477 +.long 552200649,552200649 +.long 1238290055,1238290055 +.long 4283782570,4283782570 +.long 2015897680,2015897680 +.long 2061492133,2061492133 +.long 2408352771,2408352771 +.long 4171342169,4171342169 +.long 2156497161,2156497161 +.long 386731290,386731290 +.long 3669999461,3669999461 +.long 837215959,837215959 +.long 3326231172,3326231172 +.long 3093850320,3093850320 +.long 3275833730,3275833730 +.long 2962856233,2962856233 +.long 1999449434,1999449434 +.long 286199582,286199582 +.long 3417354363,3417354363 +.long 4233385128,4233385128 +.long 3602627437,3602627437 +.long 974525996,974525996 +.byte 99,124,119,123,242,107,111,197 +.byte 48,1,103,43,254,215,171,118 +.byte 202,130,201,125,250,89,71,240 +.byte 173,212,162,175,156,164,114,192 +.byte 183,253,147,38,54,63,247,204 +.byte 52,165,229,241,113,216,49,21 +.byte 4,199,35,195,24,150,5,154 +.byte 7,18,128,226,235,39,178,117 +.byte 9,131,44,26,27,110,90,160 +.byte 82,59,214,179,41,227,47,132 +.byte 83,209,0,237,32,252,177,91 +.byte 106,203,190,57,74,76,88,207 +.byte 208,239,170,251,67,77,51,133 +.byte 69,249,2,127,80,60,159,168 +.byte 81,163,64,143,146,157,56,245 +.byte 188,182,218,33,16,255,243,210 +.byte 205,12,19,236,95,151,68,23 +.byte 196,167,126,61,100,93,25,115 +.byte 96,129,79,220,34,42,144,136 +.byte 70,238,184,20,222,94,11,219 +.byte 224,50,58,10,73,6,36,92 +.byte 194,211,172,98,145,149,228,121 +.byte 231,200,55,109,141,213,78,169 +.byte 108,86,244,234,101,122,174,8 +.byte 186,120,37,46,28,166,180,198 +.byte 232,221,116,31,75,189,139,138 +.byte 112,62,181,102,72,3,246,14 +.byte 97,53,87,185,134,193,29,158 +.byte 225,248,152,17,105,217,142,148 +.byte 155,30,135,233,206,85,40,223 +.byte 140,161,137,13,191,230,66,104 +.byte 65,153,45,15,176,84,187,22 +.byte 99,124,119,123,242,107,111,197 +.byte 48,1,103,43,254,215,171,118 +.byte 202,130,201,125,250,89,71,240 +.byte 173,212,162,175,156,164,114,192 +.byte 183,253,147,38,54,63,247,204 +.byte 52,165,229,241,113,216,49,21 +.byte 4,199,35,195,24,150,5,154 +.byte 7,18,128,226,235,39,178,117 +.byte 9,131,44,26,27,110,90,160 +.byte 82,59,214,179,41,227,47,132 +.byte 83,209,0,237,32,252,177,91 +.byte 106,203,190,57,74,76,88,207 +.byte 208,239,170,251,67,77,51,133 +.byte 69,249,2,127,80,60,159,168 +.byte 81,163,64,143,146,157,56,245 +.byte 188,182,218,33,16,255,243,210 +.byte 205,12,19,236,95,151,68,23 +.byte 196,167,126,61,100,93,25,115 +.byte 96,129,79,220,34,42,144,136 +.byte 70,238,184,20,222,94,11,219 +.byte 224,50,58,10,73,6,36,92 +.byte 194,211,172,98,145,149,228,121 +.byte 231,200,55,109,141,213,78,169 +.byte 108,86,244,234,101,122,174,8 +.byte 186,120,37,46,28,166,180,198 +.byte 232,221,116,31,75,189,139,138 +.byte 112,62,181,102,72,3,246,14 +.byte 97,53,87,185,134,193,29,158 +.byte 225,248,152,17,105,217,142,148 +.byte 155,30,135,233,206,85,40,223 +.byte 140,161,137,13,191,230,66,104 +.byte 65,153,45,15,176,84,187,22 +.byte 99,124,119,123,242,107,111,197 +.byte 48,1,103,43,254,215,171,118 +.byte 202,130,201,125,250,89,71,240 +.byte 173,212,162,175,156,164,114,192 +.byte 183,253,147,38,54,63,247,204 +.byte 52,165,229,241,113,216,49,21 +.byte 4,199,35,195,24,150,5,154 +.byte 7,18,128,226,235,39,178,117 +.byte 9,131,44,26,27,110,90,160 +.byte 82,59,214,179,41,227,47,132 +.byte 83,209,0,237,32,252,177,91 +.byte 106,203,190,57,74,76,88,207 +.byte 208,239,170,251,67,77,51,133 +.byte 69,249,2,127,80,60,159,168 +.byte 81,163,64,143,146,157,56,245 +.byte 188,182,218,33,16,255,243,210 +.byte 205,12,19,236,95,151,68,23 +.byte 196,167,126,61,100,93,25,115 +.byte 96,129,79,220,34,42,144,136 +.byte 70,238,184,20,222,94,11,219 +.byte 224,50,58,10,73,6,36,92 +.byte 194,211,172,98,145,149,228,121 +.byte 231,200,55,109,141,213,78,169 +.byte 108,86,244,234,101,122,174,8 +.byte 186,120,37,46,28,166,180,198 +.byte 232,221,116,31,75,189,139,138 +.byte 112,62,181,102,72,3,246,14 +.byte 97,53,87,185,134,193,29,158 +.byte 225,248,152,17,105,217,142,148 +.byte 155,30,135,233,206,85,40,223 +.byte 140,161,137,13,191,230,66,104 +.byte 65,153,45,15,176,84,187,22 +.byte 99,124,119,123,242,107,111,197 +.byte 48,1,103,43,254,215,171,118 +.byte 202,130,201,125,250,89,71,240 +.byte 173,212,162,175,156,164,114,192 +.byte 183,253,147,38,54,63,247,204 +.byte 52,165,229,241,113,216,49,21 +.byte 4,199,35,195,24,150,5,154 +.byte 7,18,128,226,235,39,178,117 +.byte 9,131,44,26,27,110,90,160 +.byte 82,59,214,179,41,227,47,132 +.byte 83,209,0,237,32,252,177,91 +.byte 106,203,190,57,74,76,88,207 +.byte 208,239,170,251,67,77,51,133 +.byte 69,249,2,127,80,60,159,168 +.byte 81,163,64,143,146,157,56,245 +.byte 188,182,218,33,16,255,243,210 +.byte 205,12,19,236,95,151,68,23 +.byte 196,167,126,61,100,93,25,115 +.byte 96,129,79,220,34,42,144,136 +.byte 70,238,184,20,222,94,11,219 +.byte 224,50,58,10,73,6,36,92 +.byte 194,211,172,98,145,149,228,121 +.byte 231,200,55,109,141,213,78,169 +.byte 108,86,244,234,101,122,174,8 +.byte 186,120,37,46,28,166,180,198 +.byte 232,221,116,31,75,189,139,138 +.byte 112,62,181,102,72,3,246,14 +.byte 97,53,87,185,134,193,29,158 +.byte 225,248,152,17,105,217,142,148 +.byte 155,30,135,233,206,85,40,223 +.byte 140,161,137,13,191,230,66,104 +.byte 65,153,45,15,176,84,187,22 +.long 1,2,4,8 +.long 16,32,64,128 +.long 27,54,0,0 +.long 0,0,0,0 +.globl _asm_AES_encrypt +.private_extern _asm_AES_encrypt +.align 4 +_asm_AES_encrypt: +L_asm_AES_encrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 28(%esp),%edi + movl %esp,%eax + subl $36,%esp + andl $-64,%esp + leal -127(%edi),%ebx + subl %esp,%ebx + negl %ebx + andl $960,%ebx + subl %ebx,%esp + addl $4,%esp + movl %eax,28(%esp) + call L004pic_point +L004pic_point: + popl %ebp + movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L004pic_point(%ebp),%eax + leal LAES_Te-L004pic_point(%ebp),%ebp + leal 764(%esp),%ebx + subl %ebp,%ebx + andl $768,%ebx + leal 2176(%ebp,%ebx,1),%ebp + btl $25,(%eax) + jnc L005x86 + movq (%esi),%mm0 + movq 8(%esi),%mm4 + call __sse_AES_encrypt_compact + movl 28(%esp),%esp + movl 24(%esp),%esi + movq %mm0,(%esi) + movq %mm4,8(%esi) + emms + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 4,0x90 +L005x86: + movl %ebp,24(%esp) + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + call __x86_AES_encrypt_compact + movl 28(%esp),%esp + movl 24(%esp),%esi + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %ecx,8(%esi) + movl %edx,12(%esi) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.private_extern __x86_AES_decrypt_compact +.align 4 +__x86_AES_decrypt_compact: + movl %edi,20(%esp) + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + movl 240(%edi),%esi + leal -2(%esi,%esi,1),%esi + leal (%edi,%esi,8),%esi + movl %esi,24(%esp) + movl -128(%ebp),%edi + movl -96(%ebp),%esi + movl -64(%ebp),%edi + movl -32(%ebp),%esi + movl (%ebp),%edi + movl 32(%ebp),%esi + movl 64(%ebp),%edi + movl 96(%ebp),%esi +.align 4,0x90 +L006loop: + movl %eax,%esi + andl $255,%esi + movzbl -128(%ebp,%esi,1),%esi + movzbl %dh,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %ecx,%edi + shrl $16,%edi + andl $255,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %ebx,%edi + shrl $24,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl %esi,4(%esp) + movl %ebx,%esi + andl $255,%esi + movzbl -128(%ebp,%esi,1),%esi + movzbl %ah,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %edx,%edi + shrl $16,%edi + andl $255,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %ecx,%edi + shrl $24,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl %esi,8(%esp) + movl %ecx,%esi + andl $255,%esi + movzbl -128(%ebp,%esi,1),%esi + movzbl %bh,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %eax,%edi + shrl $16,%edi + andl $255,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %edx,%edi + shrl $24,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + andl $255,%edx + movzbl -128(%ebp,%edx,1),%edx + movzbl %ch,%ecx + movzbl -128(%ebp,%ecx,1),%ecx + shll $8,%ecx + xorl %ecx,%edx + movl %esi,%ecx + shrl $16,%ebx + andl $255,%ebx + movzbl -128(%ebp,%ebx,1),%ebx + shll $16,%ebx + xorl %ebx,%edx + shrl $24,%eax + movzbl -128(%ebp,%eax,1),%eax + shll $24,%eax + xorl %eax,%edx + movl $2155905152,%edi + andl %ecx,%edi + movl %edi,%esi + shrl $7,%edi + leal (%ecx,%ecx,1),%eax + subl %edi,%esi + andl $4278124286,%eax + andl $454761243,%esi + xorl %esi,%eax + movl $2155905152,%edi + andl %eax,%edi + movl %edi,%esi + shrl $7,%edi + leal (%eax,%eax,1),%ebx + subl %edi,%esi + andl $4278124286,%ebx + andl $454761243,%esi + xorl %ecx,%eax + xorl %esi,%ebx + movl $2155905152,%edi + andl %ebx,%edi + movl %edi,%esi + shrl $7,%edi + leal (%ebx,%ebx,1),%ebp + subl %edi,%esi + andl $4278124286,%ebp + andl $454761243,%esi + xorl %ecx,%ebx + roll $8,%ecx + xorl %esi,%ebp + xorl %eax,%ecx + xorl %ebp,%eax + xorl %ebx,%ecx + xorl %ebp,%ebx + roll $24,%eax + xorl %ebp,%ecx + roll $16,%ebx + xorl %eax,%ecx + roll $8,%ebp + xorl %ebx,%ecx + movl 4(%esp),%eax + xorl %ebp,%ecx + movl %ecx,12(%esp) + movl $2155905152,%edi + andl %edx,%edi + movl %edi,%esi + shrl $7,%edi + leal (%edx,%edx,1),%ebx + subl %edi,%esi + andl $4278124286,%ebx + andl $454761243,%esi + xorl %esi,%ebx + movl $2155905152,%edi + andl %ebx,%edi + movl %edi,%esi + shrl $7,%edi + leal (%ebx,%ebx,1),%ecx + subl %edi,%esi + andl $4278124286,%ecx + andl $454761243,%esi + xorl %edx,%ebx + xorl %esi,%ecx + movl $2155905152,%edi + andl %ecx,%edi + movl %edi,%esi + shrl $7,%edi + leal (%ecx,%ecx,1),%ebp + subl %edi,%esi + andl $4278124286,%ebp + andl $454761243,%esi + xorl %edx,%ecx + roll $8,%edx + xorl %esi,%ebp + xorl %ebx,%edx + xorl %ebp,%ebx + xorl %ecx,%edx + xorl %ebp,%ecx + roll $24,%ebx + xorl %ebp,%edx + roll $16,%ecx + xorl %ebx,%edx + roll $8,%ebp + xorl %ecx,%edx + movl 8(%esp),%ebx + xorl %ebp,%edx + movl %edx,16(%esp) + movl $2155905152,%edi + andl %eax,%edi + movl %edi,%esi + shrl $7,%edi + leal (%eax,%eax,1),%ecx + subl %edi,%esi + andl $4278124286,%ecx + andl $454761243,%esi + xorl %esi,%ecx + movl $2155905152,%edi + andl %ecx,%edi + movl %edi,%esi + shrl $7,%edi + leal (%ecx,%ecx,1),%edx + subl %edi,%esi + andl $4278124286,%edx + andl $454761243,%esi + xorl %eax,%ecx + xorl %esi,%edx + movl $2155905152,%edi + andl %edx,%edi + movl %edi,%esi + shrl $7,%edi + leal (%edx,%edx,1),%ebp + subl %edi,%esi + andl $4278124286,%ebp + andl $454761243,%esi + xorl %eax,%edx + roll $8,%eax + xorl %esi,%ebp + xorl %ecx,%eax + xorl %ebp,%ecx + xorl %edx,%eax + xorl %ebp,%edx + roll $24,%ecx + xorl %ebp,%eax + roll $16,%edx + xorl %ecx,%eax + roll $8,%ebp + xorl %edx,%eax + xorl %ebp,%eax + movl $2155905152,%edi + andl %ebx,%edi + movl %edi,%esi + shrl $7,%edi + leal (%ebx,%ebx,1),%ecx + subl %edi,%esi + andl $4278124286,%ecx + andl $454761243,%esi + xorl %esi,%ecx + movl $2155905152,%edi + andl %ecx,%edi + movl %edi,%esi + shrl $7,%edi + leal (%ecx,%ecx,1),%edx + subl %edi,%esi + andl $4278124286,%edx + andl $454761243,%esi + xorl %ebx,%ecx + xorl %esi,%edx + movl $2155905152,%edi + andl %edx,%edi + movl %edi,%esi + shrl $7,%edi + leal (%edx,%edx,1),%ebp + subl %edi,%esi + andl $4278124286,%ebp + andl $454761243,%esi + xorl %ebx,%edx + roll $8,%ebx + xorl %esi,%ebp + xorl %ecx,%ebx + xorl %ebp,%ecx + xorl %edx,%ebx + xorl %ebp,%edx + roll $24,%ecx + xorl %ebp,%ebx + roll $16,%edx + xorl %ecx,%ebx + roll $8,%ebp + xorl %edx,%ebx + movl 12(%esp),%ecx + xorl %ebp,%ebx + movl 16(%esp),%edx + movl 20(%esp),%edi + movl 28(%esp),%ebp + addl $16,%edi + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + cmpl 24(%esp),%edi + movl %edi,20(%esp) + jb L006loop + movl %eax,%esi + andl $255,%esi + movzbl -128(%ebp,%esi,1),%esi + movzbl %dh,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %ecx,%edi + shrl $16,%edi + andl $255,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %ebx,%edi + shrl $24,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl %esi,4(%esp) + movl %ebx,%esi + andl $255,%esi + movzbl -128(%ebp,%esi,1),%esi + movzbl %ah,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %edx,%edi + shrl $16,%edi + andl $255,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %ecx,%edi + shrl $24,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl %esi,8(%esp) + movl %ecx,%esi + andl $255,%esi + movzbl -128(%ebp,%esi,1),%esi + movzbl %bh,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %eax,%edi + shrl $16,%edi + andl $255,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %edx,%edi + shrl $24,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl 20(%esp),%edi + andl $255,%edx + movzbl -128(%ebp,%edx,1),%edx + movzbl %ch,%ecx + movzbl -128(%ebp,%ecx,1),%ecx + shll $8,%ecx + xorl %ecx,%edx + movl %esi,%ecx + shrl $16,%ebx + andl $255,%ebx + movzbl -128(%ebp,%ebx,1),%ebx + shll $16,%ebx + xorl %ebx,%edx + movl 8(%esp),%ebx + shrl $24,%eax + movzbl -128(%ebp,%eax,1),%eax + shll $24,%eax + xorl %eax,%edx + movl 4(%esp),%eax + xorl 16(%edi),%eax + xorl 20(%edi),%ebx + xorl 24(%edi),%ecx + xorl 28(%edi),%edx + ret +.private_extern __sse_AES_decrypt_compact +.align 4 +__sse_AES_decrypt_compact: + pxor (%edi),%mm0 + pxor 8(%edi),%mm4 + movl 240(%edi),%esi + leal -2(%esi,%esi,1),%esi + leal (%edi,%esi,8),%esi + movl %esi,24(%esp) + movl $454761243,%eax + movl %eax,8(%esp) + movl %eax,12(%esp) + movl -128(%ebp),%eax + movl -96(%ebp),%ebx + movl -64(%ebp),%ecx + movl -32(%ebp),%edx + movl (%ebp),%eax + movl 32(%ebp),%ebx + movl 64(%ebp),%ecx + movl 96(%ebp),%edx +.align 4,0x90 +L007loop: + pshufw $12,%mm0,%mm1 + pshufw $9,%mm4,%mm5 + movd %mm1,%eax + movd %mm5,%ebx + movl %edi,20(%esp) + movzbl %al,%esi + movzbl %ah,%edx + pshufw $6,%mm0,%mm2 + movzbl -128(%ebp,%esi,1),%ecx + movzbl %bl,%edi + movzbl -128(%ebp,%edx,1),%edx + shrl $16,%eax + shll $8,%edx + movzbl -128(%ebp,%edi,1),%esi + movzbl %bh,%edi + shll $16,%esi + pshufw $3,%mm4,%mm6 + orl %esi,%ecx + movzbl -128(%ebp,%edi,1),%esi + movzbl %ah,%edi + shll $24,%esi + shrl $16,%ebx + orl %esi,%edx + movzbl -128(%ebp,%edi,1),%esi + movzbl %bh,%edi + shll $24,%esi + orl %esi,%ecx + movzbl -128(%ebp,%edi,1),%esi + movzbl %al,%edi + shll $8,%esi + movd %mm2,%eax + orl %esi,%ecx + movzbl -128(%ebp,%edi,1),%esi + movzbl %bl,%edi + shll $16,%esi + movd %mm6,%ebx + movd %ecx,%mm0 + movzbl -128(%ebp,%edi,1),%ecx + movzbl %al,%edi + orl %esi,%ecx + movzbl -128(%ebp,%edi,1),%esi + movzbl %bl,%edi + orl %esi,%edx + movzbl -128(%ebp,%edi,1),%esi + movzbl %ah,%edi + shll $16,%esi + shrl $16,%eax + orl %esi,%edx + movzbl -128(%ebp,%edi,1),%esi + movzbl %bh,%edi + shrl $16,%ebx + shll $8,%esi + movd %edx,%mm1 + movzbl -128(%ebp,%edi,1),%edx + movzbl %bh,%edi + shll $24,%edx + andl $255,%ebx + orl %esi,%edx + punpckldq %mm1,%mm0 + movzbl -128(%ebp,%edi,1),%esi + movzbl %al,%edi + shll $8,%esi + movzbl %ah,%eax + movzbl -128(%ebp,%ebx,1),%ebx + orl %esi,%ecx + movzbl -128(%ebp,%edi,1),%esi + orl %ebx,%edx + shll $16,%esi + movzbl -128(%ebp,%eax,1),%eax + orl %esi,%edx + shll $24,%eax + orl %eax,%ecx + movl 20(%esp),%edi + movd %edx,%mm4 + movd %ecx,%mm5 + punpckldq %mm5,%mm4 + addl $16,%edi + cmpl 24(%esp),%edi + ja L008out + movq %mm0,%mm3 + movq %mm4,%mm7 + pshufw $228,%mm0,%mm2 + pshufw $228,%mm4,%mm6 + movq %mm0,%mm1 + movq %mm4,%mm5 + pshufw $177,%mm0,%mm0 + pshufw $177,%mm4,%mm4 + pslld $8,%mm2 + pslld $8,%mm6 + psrld $8,%mm3 + psrld $8,%mm7 + pxor %mm2,%mm0 + pxor %mm6,%mm4 + pxor %mm3,%mm0 + pxor %mm7,%mm4 + pslld $16,%mm2 + pslld $16,%mm6 + psrld $16,%mm3 + psrld $16,%mm7 + pxor %mm2,%mm0 + pxor %mm6,%mm4 + pxor %mm3,%mm0 + pxor %mm7,%mm4 + movq 8(%esp),%mm3 + pxor %mm2,%mm2 + pxor %mm6,%mm6 + pcmpgtb %mm1,%mm2 + pcmpgtb %mm5,%mm6 + pand %mm3,%mm2 + pand %mm3,%mm6 + paddb %mm1,%mm1 + paddb %mm5,%mm5 + pxor %mm2,%mm1 + pxor %mm6,%mm5 + movq %mm1,%mm3 + movq %mm5,%mm7 + movq %mm1,%mm2 + movq %mm5,%mm6 + pxor %mm1,%mm0 + pxor %mm5,%mm4 + pslld $24,%mm3 + pslld $24,%mm7 + psrld $8,%mm2 + psrld $8,%mm6 + pxor %mm3,%mm0 + pxor %mm7,%mm4 + pxor %mm2,%mm0 + pxor %mm6,%mm4 + movq 8(%esp),%mm2 + pxor %mm3,%mm3 + pxor %mm7,%mm7 + pcmpgtb %mm1,%mm3 + pcmpgtb %mm5,%mm7 + pand %mm2,%mm3 + pand %mm2,%mm7 + paddb %mm1,%mm1 + paddb %mm5,%mm5 + pxor %mm3,%mm1 + pxor %mm7,%mm5 + pshufw $177,%mm1,%mm3 + pshufw $177,%mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm5,%mm4 + pxor %mm3,%mm0 + pxor %mm7,%mm4 + pxor %mm3,%mm3 + pxor %mm7,%mm7 + pcmpgtb %mm1,%mm3 + pcmpgtb %mm5,%mm7 + pand %mm2,%mm3 + pand %mm2,%mm7 + paddb %mm1,%mm1 + paddb %mm5,%mm5 + pxor %mm3,%mm1 + pxor %mm7,%mm5 + pxor %mm1,%mm0 + pxor %mm5,%mm4 + movq %mm1,%mm3 + movq %mm5,%mm7 + pshufw $177,%mm1,%mm2 + pshufw $177,%mm5,%mm6 + pxor %mm2,%mm0 + pxor %mm6,%mm4 + pslld $8,%mm1 + pslld $8,%mm5 + psrld $8,%mm3 + psrld $8,%mm7 + movq (%edi),%mm2 + movq 8(%edi),%mm6 + pxor %mm1,%mm0 + pxor %mm5,%mm4 + pxor %mm3,%mm0 + pxor %mm7,%mm4 + movl -128(%ebp),%eax + pslld $16,%mm1 + pslld $16,%mm5 + movl -64(%ebp),%ebx + psrld $16,%mm3 + psrld $16,%mm7 + movl (%ebp),%ecx + pxor %mm1,%mm0 + pxor %mm5,%mm4 + movl 64(%ebp),%edx + pxor %mm3,%mm0 + pxor %mm7,%mm4 + pxor %mm2,%mm0 + pxor %mm6,%mm4 + jmp L007loop +.align 4,0x90 +L008out: + pxor (%edi),%mm0 + pxor 8(%edi),%mm4 + ret +.private_extern __x86_AES_decrypt +.align 4 +__x86_AES_decrypt: + movl %edi,20(%esp) + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + movl 240(%edi),%esi + leal -2(%esi,%esi,1),%esi + leal (%edi,%esi,8),%esi + movl %esi,24(%esp) +.align 4,0x90 +L009loop: + movl %eax,%esi + andl $255,%esi + movl (%ebp,%esi,8),%esi + movzbl %dh,%edi + xorl 3(%ebp,%edi,8),%esi + movl %ecx,%edi + shrl $16,%edi + andl $255,%edi + xorl 2(%ebp,%edi,8),%esi + movl %ebx,%edi + shrl $24,%edi + xorl 1(%ebp,%edi,8),%esi + movl %esi,4(%esp) + movl %ebx,%esi + andl $255,%esi + movl (%ebp,%esi,8),%esi + movzbl %ah,%edi + xorl 3(%ebp,%edi,8),%esi + movl %edx,%edi + shrl $16,%edi + andl $255,%edi + xorl 2(%ebp,%edi,8),%esi + movl %ecx,%edi + shrl $24,%edi + xorl 1(%ebp,%edi,8),%esi + movl %esi,8(%esp) + movl %ecx,%esi + andl $255,%esi + movl (%ebp,%esi,8),%esi + movzbl %bh,%edi + xorl 3(%ebp,%edi,8),%esi + movl %eax,%edi + shrl $16,%edi + andl $255,%edi + xorl 2(%ebp,%edi,8),%esi + movl %edx,%edi + shrl $24,%edi + xorl 1(%ebp,%edi,8),%esi + movl 20(%esp),%edi + andl $255,%edx + movl (%ebp,%edx,8),%edx + movzbl %ch,%ecx + xorl 3(%ebp,%ecx,8),%edx + movl %esi,%ecx + shrl $16,%ebx + andl $255,%ebx + xorl 2(%ebp,%ebx,8),%edx + movl 8(%esp),%ebx + shrl $24,%eax + xorl 1(%ebp,%eax,8),%edx + movl 4(%esp),%eax + addl $16,%edi + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + cmpl 24(%esp),%edi + movl %edi,20(%esp) + jb L009loop + leal 2176(%ebp),%ebp + movl -128(%ebp),%edi + movl -96(%ebp),%esi + movl -64(%ebp),%edi + movl -32(%ebp),%esi + movl (%ebp),%edi + movl 32(%ebp),%esi + movl 64(%ebp),%edi + movl 96(%ebp),%esi + leal -128(%ebp),%ebp + movl %eax,%esi + andl $255,%esi + movzbl (%ebp,%esi,1),%esi + movzbl %dh,%edi + movzbl (%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %ecx,%edi + shrl $16,%edi + andl $255,%edi + movzbl (%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %ebx,%edi + shrl $24,%edi + movzbl (%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl %esi,4(%esp) + movl %ebx,%esi + andl $255,%esi + movzbl (%ebp,%esi,1),%esi + movzbl %ah,%edi + movzbl (%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %edx,%edi + shrl $16,%edi + andl $255,%edi + movzbl (%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %ecx,%edi + shrl $24,%edi + movzbl (%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl %esi,8(%esp) + movl %ecx,%esi + andl $255,%esi + movzbl (%ebp,%esi,1),%esi + movzbl %bh,%edi + movzbl (%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %eax,%edi + shrl $16,%edi + andl $255,%edi + movzbl (%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %edx,%edi + shrl $24,%edi + movzbl (%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl 20(%esp),%edi + andl $255,%edx + movzbl (%ebp,%edx,1),%edx + movzbl %ch,%ecx + movzbl (%ebp,%ecx,1),%ecx + shll $8,%ecx + xorl %ecx,%edx + movl %esi,%ecx + shrl $16,%ebx + andl $255,%ebx + movzbl (%ebp,%ebx,1),%ebx + shll $16,%ebx + xorl %ebx,%edx + movl 8(%esp),%ebx + shrl $24,%eax + movzbl (%ebp,%eax,1),%eax + shll $24,%eax + xorl %eax,%edx + movl 4(%esp),%eax + leal -2048(%ebp),%ebp + addl $16,%edi + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + ret +.align 6,0x90 +LAES_Td: +.long 1353184337,1353184337 +.long 1399144830,1399144830 +.long 3282310938,3282310938 +.long 2522752826,2522752826 +.long 3412831035,3412831035 +.long 4047871263,4047871263 +.long 2874735276,2874735276 +.long 2466505547,2466505547 +.long 1442459680,1442459680 +.long 4134368941,4134368941 +.long 2440481928,2440481928 +.long 625738485,625738485 +.long 4242007375,4242007375 +.long 3620416197,3620416197 +.long 2151953702,2151953702 +.long 2409849525,2409849525 +.long 1230680542,1230680542 +.long 1729870373,1729870373 +.long 2551114309,2551114309 +.long 3787521629,3787521629 +.long 41234371,41234371 +.long 317738113,317738113 +.long 2744600205,2744600205 +.long 3338261355,3338261355 +.long 3881799427,3881799427 +.long 2510066197,2510066197 +.long 3950669247,3950669247 +.long 3663286933,3663286933 +.long 763608788,763608788 +.long 3542185048,3542185048 +.long 694804553,694804553 +.long 1154009486,1154009486 +.long 1787413109,1787413109 +.long 2021232372,2021232372 +.long 1799248025,1799248025 +.long 3715217703,3715217703 +.long 3058688446,3058688446 +.long 397248752,397248752 +.long 1722556617,1722556617 +.long 3023752829,3023752829 +.long 407560035,407560035 +.long 2184256229,2184256229 +.long 1613975959,1613975959 +.long 1165972322,1165972322 +.long 3765920945,3765920945 +.long 2226023355,2226023355 +.long 480281086,480281086 +.long 2485848313,2485848313 +.long 1483229296,1483229296 +.long 436028815,436028815 +.long 2272059028,2272059028 +.long 3086515026,3086515026 +.long 601060267,601060267 +.long 3791801202,3791801202 +.long 1468997603,1468997603 +.long 715871590,715871590 +.long 120122290,120122290 +.long 63092015,63092015 +.long 2591802758,2591802758 +.long 2768779219,2768779219 +.long 4068943920,4068943920 +.long 2997206819,2997206819 +.long 3127509762,3127509762 +.long 1552029421,1552029421 +.long 723308426,723308426 +.long 2461301159,2461301159 +.long 4042393587,4042393587 +.long 2715969870,2715969870 +.long 3455375973,3455375973 +.long 3586000134,3586000134 +.long 526529745,526529745 +.long 2331944644,2331944644 +.long 2639474228,2639474228 +.long 2689987490,2689987490 +.long 853641733,853641733 +.long 1978398372,1978398372 +.long 971801355,971801355 +.long 2867814464,2867814464 +.long 111112542,111112542 +.long 1360031421,1360031421 +.long 4186579262,4186579262 +.long 1023860118,1023860118 +.long 2919579357,2919579357 +.long 1186850381,1186850381 +.long 3045938321,3045938321 +.long 90031217,90031217 +.long 1876166148,1876166148 +.long 4279586912,4279586912 +.long 620468249,620468249 +.long 2548678102,2548678102 +.long 3426959497,3426959497 +.long 2006899047,2006899047 +.long 3175278768,3175278768 +.long 2290845959,2290845959 +.long 945494503,945494503 +.long 3689859193,3689859193 +.long 1191869601,1191869601 +.long 3910091388,3910091388 +.long 3374220536,3374220536 +.long 0,0 +.long 2206629897,2206629897 +.long 1223502642,1223502642 +.long 2893025566,2893025566 +.long 1316117100,1316117100 +.long 4227796733,4227796733 +.long 1446544655,1446544655 +.long 517320253,517320253 +.long 658058550,658058550 +.long 1691946762,1691946762 +.long 564550760,564550760 +.long 3511966619,3511966619 +.long 976107044,976107044 +.long 2976320012,2976320012 +.long 266819475,266819475 +.long 3533106868,3533106868 +.long 2660342555,2660342555 +.long 1338359936,1338359936 +.long 2720062561,2720062561 +.long 1766553434,1766553434 +.long 370807324,370807324 +.long 179999714,179999714 +.long 3844776128,3844776128 +.long 1138762300,1138762300 +.long 488053522,488053522 +.long 185403662,185403662 +.long 2915535858,2915535858 +.long 3114841645,3114841645 +.long 3366526484,3366526484 +.long 2233069911,2233069911 +.long 1275557295,1275557295 +.long 3151862254,3151862254 +.long 4250959779,4250959779 +.long 2670068215,2670068215 +.long 3170202204,3170202204 +.long 3309004356,3309004356 +.long 880737115,880737115 +.long 1982415755,1982415755 +.long 3703972811,3703972811 +.long 1761406390,1761406390 +.long 1676797112,1676797112 +.long 3403428311,3403428311 +.long 277177154,277177154 +.long 1076008723,1076008723 +.long 538035844,538035844 +.long 2099530373,2099530373 +.long 4164795346,4164795346 +.long 288553390,288553390 +.long 1839278535,1839278535 +.long 1261411869,1261411869 +.long 4080055004,4080055004 +.long 3964831245,3964831245 +.long 3504587127,3504587127 +.long 1813426987,1813426987 +.long 2579067049,2579067049 +.long 4199060497,4199060497 +.long 577038663,577038663 +.long 3297574056,3297574056 +.long 440397984,440397984 +.long 3626794326,3626794326 +.long 4019204898,4019204898 +.long 3343796615,3343796615 +.long 3251714265,3251714265 +.long 4272081548,4272081548 +.long 906744984,906744984 +.long 3481400742,3481400742 +.long 685669029,685669029 +.long 646887386,646887386 +.long 2764025151,2764025151 +.long 3835509292,3835509292 +.long 227702864,227702864 +.long 2613862250,2613862250 +.long 1648787028,1648787028 +.long 3256061430,3256061430 +.long 3904428176,3904428176 +.long 1593260334,1593260334 +.long 4121936770,4121936770 +.long 3196083615,3196083615 +.long 2090061929,2090061929 +.long 2838353263,2838353263 +.long 3004310991,3004310991 +.long 999926984,999926984 +.long 2809993232,2809993232 +.long 1852021992,1852021992 +.long 2075868123,2075868123 +.long 158869197,158869197 +.long 4095236462,4095236462 +.long 28809964,28809964 +.long 2828685187,2828685187 +.long 1701746150,1701746150 +.long 2129067946,2129067946 +.long 147831841,147831841 +.long 3873969647,3873969647 +.long 3650873274,3650873274 +.long 3459673930,3459673930 +.long 3557400554,3557400554 +.long 3598495785,3598495785 +.long 2947720241,2947720241 +.long 824393514,824393514 +.long 815048134,815048134 +.long 3227951669,3227951669 +.long 935087732,935087732 +.long 2798289660,2798289660 +.long 2966458592,2966458592 +.long 366520115,366520115 +.long 1251476721,1251476721 +.long 4158319681,4158319681 +.long 240176511,240176511 +.long 804688151,804688151 +.long 2379631990,2379631990 +.long 1303441219,1303441219 +.long 1414376140,1414376140 +.long 3741619940,3741619940 +.long 3820343710,3820343710 +.long 461924940,461924940 +.long 3089050817,3089050817 +.long 2136040774,2136040774 +.long 82468509,82468509 +.long 1563790337,1563790337 +.long 1937016826,1937016826 +.long 776014843,776014843 +.long 1511876531,1511876531 +.long 1389550482,1389550482 +.long 861278441,861278441 +.long 323475053,323475053 +.long 2355222426,2355222426 +.long 2047648055,2047648055 +.long 2383738969,2383738969 +.long 2302415851,2302415851 +.long 3995576782,3995576782 +.long 902390199,902390199 +.long 3991215329,3991215329 +.long 1018251130,1018251130 +.long 1507840668,1507840668 +.long 1064563285,1064563285 +.long 2043548696,2043548696 +.long 3208103795,3208103795 +.long 3939366739,3939366739 +.long 1537932639,1537932639 +.long 342834655,342834655 +.long 2262516856,2262516856 +.long 2180231114,2180231114 +.long 1053059257,1053059257 +.long 741614648,741614648 +.long 1598071746,1598071746 +.long 1925389590,1925389590 +.long 203809468,203809468 +.long 2336832552,2336832552 +.long 1100287487,1100287487 +.long 1895934009,1895934009 +.long 3736275976,3736275976 +.long 2632234200,2632234200 +.long 2428589668,2428589668 +.long 1636092795,1636092795 +.long 1890988757,1890988757 +.long 1952214088,1952214088 +.long 1113045200,1113045200 +.byte 82,9,106,213,48,54,165,56 +.byte 191,64,163,158,129,243,215,251 +.byte 124,227,57,130,155,47,255,135 +.byte 52,142,67,68,196,222,233,203 +.byte 84,123,148,50,166,194,35,61 +.byte 238,76,149,11,66,250,195,78 +.byte 8,46,161,102,40,217,36,178 +.byte 118,91,162,73,109,139,209,37 +.byte 114,248,246,100,134,104,152,22 +.byte 212,164,92,204,93,101,182,146 +.byte 108,112,72,80,253,237,185,218 +.byte 94,21,70,87,167,141,157,132 +.byte 144,216,171,0,140,188,211,10 +.byte 247,228,88,5,184,179,69,6 +.byte 208,44,30,143,202,63,15,2 +.byte 193,175,189,3,1,19,138,107 +.byte 58,145,17,65,79,103,220,234 +.byte 151,242,207,206,240,180,230,115 +.byte 150,172,116,34,231,173,53,133 +.byte 226,249,55,232,28,117,223,110 +.byte 71,241,26,113,29,41,197,137 +.byte 111,183,98,14,170,24,190,27 +.byte 252,86,62,75,198,210,121,32 +.byte 154,219,192,254,120,205,90,244 +.byte 31,221,168,51,136,7,199,49 +.byte 177,18,16,89,39,128,236,95 +.byte 96,81,127,169,25,181,74,13 +.byte 45,229,122,159,147,201,156,239 +.byte 160,224,59,77,174,42,245,176 +.byte 200,235,187,60,131,83,153,97 +.byte 23,43,4,126,186,119,214,38 +.byte 225,105,20,99,85,33,12,125 +.byte 82,9,106,213,48,54,165,56 +.byte 191,64,163,158,129,243,215,251 +.byte 124,227,57,130,155,47,255,135 +.byte 52,142,67,68,196,222,233,203 +.byte 84,123,148,50,166,194,35,61 +.byte 238,76,149,11,66,250,195,78 +.byte 8,46,161,102,40,217,36,178 +.byte 118,91,162,73,109,139,209,37 +.byte 114,248,246,100,134,104,152,22 +.byte 212,164,92,204,93,101,182,146 +.byte 108,112,72,80,253,237,185,218 +.byte 94,21,70,87,167,141,157,132 +.byte 144,216,171,0,140,188,211,10 +.byte 247,228,88,5,184,179,69,6 +.byte 208,44,30,143,202,63,15,2 +.byte 193,175,189,3,1,19,138,107 +.byte 58,145,17,65,79,103,220,234 +.byte 151,242,207,206,240,180,230,115 +.byte 150,172,116,34,231,173,53,133 +.byte 226,249,55,232,28,117,223,110 +.byte 71,241,26,113,29,41,197,137 +.byte 111,183,98,14,170,24,190,27 +.byte 252,86,62,75,198,210,121,32 +.byte 154,219,192,254,120,205,90,244 +.byte 31,221,168,51,136,7,199,49 +.byte 177,18,16,89,39,128,236,95 +.byte 96,81,127,169,25,181,74,13 +.byte 45,229,122,159,147,201,156,239 +.byte 160,224,59,77,174,42,245,176 +.byte 200,235,187,60,131,83,153,97 +.byte 23,43,4,126,186,119,214,38 +.byte 225,105,20,99,85,33,12,125 +.byte 82,9,106,213,48,54,165,56 +.byte 191,64,163,158,129,243,215,251 +.byte 124,227,57,130,155,47,255,135 +.byte 52,142,67,68,196,222,233,203 +.byte 84,123,148,50,166,194,35,61 +.byte 238,76,149,11,66,250,195,78 +.byte 8,46,161,102,40,217,36,178 +.byte 118,91,162,73,109,139,209,37 +.byte 114,248,246,100,134,104,152,22 +.byte 212,164,92,204,93,101,182,146 +.byte 108,112,72,80,253,237,185,218 +.byte 94,21,70,87,167,141,157,132 +.byte 144,216,171,0,140,188,211,10 +.byte 247,228,88,5,184,179,69,6 +.byte 208,44,30,143,202,63,15,2 +.byte 193,175,189,3,1,19,138,107 +.byte 58,145,17,65,79,103,220,234 +.byte 151,242,207,206,240,180,230,115 +.byte 150,172,116,34,231,173,53,133 +.byte 226,249,55,232,28,117,223,110 +.byte 71,241,26,113,29,41,197,137 +.byte 111,183,98,14,170,24,190,27 +.byte 252,86,62,75,198,210,121,32 +.byte 154,219,192,254,120,205,90,244 +.byte 31,221,168,51,136,7,199,49 +.byte 177,18,16,89,39,128,236,95 +.byte 96,81,127,169,25,181,74,13 +.byte 45,229,122,159,147,201,156,239 +.byte 160,224,59,77,174,42,245,176 +.byte 200,235,187,60,131,83,153,97 +.byte 23,43,4,126,186,119,214,38 +.byte 225,105,20,99,85,33,12,125 +.byte 82,9,106,213,48,54,165,56 +.byte 191,64,163,158,129,243,215,251 +.byte 124,227,57,130,155,47,255,135 +.byte 52,142,67,68,196,222,233,203 +.byte 84,123,148,50,166,194,35,61 +.byte 238,76,149,11,66,250,195,78 +.byte 8,46,161,102,40,217,36,178 +.byte 118,91,162,73,109,139,209,37 +.byte 114,248,246,100,134,104,152,22 +.byte 212,164,92,204,93,101,182,146 +.byte 108,112,72,80,253,237,185,218 +.byte 94,21,70,87,167,141,157,132 +.byte 144,216,171,0,140,188,211,10 +.byte 247,228,88,5,184,179,69,6 +.byte 208,44,30,143,202,63,15,2 +.byte 193,175,189,3,1,19,138,107 +.byte 58,145,17,65,79,103,220,234 +.byte 151,242,207,206,240,180,230,115 +.byte 150,172,116,34,231,173,53,133 +.byte 226,249,55,232,28,117,223,110 +.byte 71,241,26,113,29,41,197,137 +.byte 111,183,98,14,170,24,190,27 +.byte 252,86,62,75,198,210,121,32 +.byte 154,219,192,254,120,205,90,244 +.byte 31,221,168,51,136,7,199,49 +.byte 177,18,16,89,39,128,236,95 +.byte 96,81,127,169,25,181,74,13 +.byte 45,229,122,159,147,201,156,239 +.byte 160,224,59,77,174,42,245,176 +.byte 200,235,187,60,131,83,153,97 +.byte 23,43,4,126,186,119,214,38 +.byte 225,105,20,99,85,33,12,125 +.globl _asm_AES_decrypt +.private_extern _asm_AES_decrypt +.align 4 +_asm_AES_decrypt: +L_asm_AES_decrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 28(%esp),%edi + movl %esp,%eax + subl $36,%esp + andl $-64,%esp + leal -127(%edi),%ebx + subl %esp,%ebx + negl %ebx + andl $960,%ebx + subl %ebx,%esp + addl $4,%esp + movl %eax,28(%esp) + call L010pic_point +L010pic_point: + popl %ebp + movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L010pic_point(%ebp),%eax + leal LAES_Td-L010pic_point(%ebp),%ebp + leal 764(%esp),%ebx + subl %ebp,%ebx + andl $768,%ebx + leal 2176(%ebp,%ebx,1),%ebp + btl $25,(%eax) + jnc L011x86 + movq (%esi),%mm0 + movq 8(%esi),%mm4 + call __sse_AES_decrypt_compact + movl 28(%esp),%esp + movl 24(%esp),%esi + movq %mm0,(%esi) + movq %mm4,8(%esi) + emms + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 4,0x90 +L011x86: + movl %ebp,24(%esp) + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + call __x86_AES_decrypt_compact + movl 28(%esp),%esp + movl 24(%esp),%esi + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %ecx,8(%esi) + movl %edx,12(%esi) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _asm_AES_cbc_encrypt +.private_extern _asm_AES_cbc_encrypt +.align 4 +_asm_AES_cbc_encrypt: +L_asm_AES_cbc_encrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 28(%esp),%ecx + cmpl $0,%ecx + je L012drop_out + call L013pic_point +L013pic_point: + popl %ebp + movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L013pic_point(%ebp),%eax + cmpl $0,40(%esp) + leal LAES_Te-L013pic_point(%ebp),%ebp + jne L014picked_te + leal LAES_Td-LAES_Te(%ebp),%ebp +L014picked_te: + pushfl + cld + cmpl $512,%ecx + jb L015slow_way + testl $15,%ecx + jnz L015slow_way + btl $28,(%eax) + jc L015slow_way + leal -324(%esp),%esi + andl $-64,%esi + movl %ebp,%eax + leal 2304(%ebp),%ebx + movl %esi,%edx + andl $4095,%eax + andl $4095,%ebx + andl $4095,%edx + cmpl %ebx,%edx + jb L016tbl_break_out + subl %ebx,%edx + subl %edx,%esi + jmp L017tbl_ok +.align 2,0x90 +L016tbl_break_out: + subl %eax,%edx + andl $4095,%edx + addl $384,%edx + subl %edx,%esi +.align 2,0x90 +L017tbl_ok: + leal 24(%esp),%edx + xchgl %esi,%esp + addl $4,%esp + movl %ebp,24(%esp) + movl %esi,28(%esp) + movl (%edx),%eax + movl 4(%edx),%ebx + movl 12(%edx),%edi + movl 16(%edx),%esi + movl 20(%edx),%edx + movl %eax,32(%esp) + movl %ebx,36(%esp) + movl %ecx,40(%esp) + movl %edi,44(%esp) + movl %esi,48(%esp) + movl $0,316(%esp) + movl %edi,%ebx + movl $61,%ecx + subl %ebp,%ebx + movl %edi,%esi + andl $4095,%ebx + leal 76(%esp),%edi + cmpl $2304,%ebx + jb L018do_copy + cmpl $3852,%ebx + jb L019skip_copy +.align 2,0x90 +L018do_copy: + movl %edi,44(%esp) +.long 2784229001 +L019skip_copy: + movl $16,%edi +.align 2,0x90 +L020prefetch_tbl: + movl (%ebp),%eax + movl 32(%ebp),%ebx + movl 64(%ebp),%ecx + movl 96(%ebp),%esi + leal 128(%ebp),%ebp + subl $1,%edi + jnz L020prefetch_tbl + subl $2048,%ebp + movl 32(%esp),%esi + movl 48(%esp),%edi + cmpl $0,%edx + je L021fast_decrypt + movl (%edi),%eax + movl 4(%edi),%ebx +.align 4,0x90 +L022fast_enc_loop: + movl 8(%edi),%ecx + movl 12(%edi),%edx + xorl (%esi),%eax + xorl 4(%esi),%ebx + xorl 8(%esi),%ecx + xorl 12(%esi),%edx + movl 44(%esp),%edi + call __x86_AES_encrypt + movl 32(%esp),%esi + movl 36(%esp),%edi + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + leal 16(%esi),%esi + movl 40(%esp),%ecx + movl %esi,32(%esp) + leal 16(%edi),%edx + movl %edx,36(%esp) + subl $16,%ecx + movl %ecx,40(%esp) + jnz L022fast_enc_loop + movl 48(%esp),%esi + movl 8(%edi),%ecx + movl 12(%edi),%edx + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %ecx,8(%esi) + movl %edx,12(%esi) + cmpl $0,316(%esp) + movl 44(%esp),%edi + je L023skip_ezero + movl $60,%ecx + xorl %eax,%eax +.align 2,0x90 +.long 2884892297 +L023skip_ezero: + movl 28(%esp),%esp + popfl +L012drop_out: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret + pushfl +.align 4,0x90 +L021fast_decrypt: + cmpl 36(%esp),%esi + je L024fast_dec_in_place + movl %edi,52(%esp) +.align 2,0x90 +.align 4,0x90 +L025fast_dec_loop: + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + movl 44(%esp),%edi + call __x86_AES_decrypt + movl 52(%esp),%edi + movl 40(%esp),%esi + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + movl 36(%esp),%edi + movl 32(%esp),%esi + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + movl 40(%esp),%ecx + movl %esi,52(%esp) + leal 16(%esi),%esi + movl %esi,32(%esp) + leal 16(%edi),%edi + movl %edi,36(%esp) + subl $16,%ecx + movl %ecx,40(%esp) + jnz L025fast_dec_loop + movl 52(%esp),%edi + movl 48(%esp),%esi + movl (%edi),%eax + movl 4(%edi),%ebx + movl 8(%edi),%ecx + movl 12(%edi),%edx + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %ecx,8(%esi) + movl %edx,12(%esi) + jmp L026fast_dec_out +.align 4,0x90 +L024fast_dec_in_place: +L027fast_dec_in_place_loop: + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + leal 60(%esp),%edi + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + movl 44(%esp),%edi + call __x86_AES_decrypt + movl 48(%esp),%edi + movl 36(%esp),%esi + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %ecx,8(%esi) + movl %edx,12(%esi) + leal 16(%esi),%esi + movl %esi,36(%esp) + leal 60(%esp),%esi + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + movl 32(%esp),%esi + movl 40(%esp),%ecx + leal 16(%esi),%esi + movl %esi,32(%esp) + subl $16,%ecx + movl %ecx,40(%esp) + jnz L027fast_dec_in_place_loop +.align 2,0x90 +L026fast_dec_out: + cmpl $0,316(%esp) + movl 44(%esp),%edi + je L028skip_dzero + movl $60,%ecx + xorl %eax,%eax +.align 2,0x90 +.long 2884892297 +L028skip_dzero: + movl 28(%esp),%esp + popfl + popl %edi + popl %esi + popl %ebx + popl %ebp + ret + pushfl +.align 4,0x90 +L015slow_way: + movl (%eax),%eax + movl 36(%esp),%edi + leal -80(%esp),%esi + andl $-64,%esi + leal -143(%edi),%ebx + subl %esi,%ebx + negl %ebx + andl $960,%ebx + subl %ebx,%esi + leal 768(%esi),%ebx + subl %ebp,%ebx + andl $768,%ebx + leal 2176(%ebp,%ebx,1),%ebp + leal 24(%esp),%edx + xchgl %esi,%esp + addl $4,%esp + movl %ebp,24(%esp) + movl %esi,28(%esp) + movl %eax,52(%esp) + movl (%edx),%eax + movl 4(%edx),%ebx + movl 16(%edx),%esi + movl 20(%edx),%edx + movl %eax,32(%esp) + movl %ebx,36(%esp) + movl %ecx,40(%esp) + movl %edi,44(%esp) + movl %esi,48(%esp) + movl %esi,%edi + movl %eax,%esi + cmpl $0,%edx + je L029slow_decrypt + cmpl $16,%ecx + movl %ebx,%edx + jb L030slow_enc_tail + btl $25,52(%esp) + jnc L031slow_enc_x86 + movq (%edi),%mm0 + movq 8(%edi),%mm4 +.align 4,0x90 +L032slow_enc_loop_sse: + pxor (%esi),%mm0 + pxor 8(%esi),%mm4 + movl 44(%esp),%edi + call __sse_AES_encrypt_compact + movl 32(%esp),%esi + movl 36(%esp),%edi + movl 40(%esp),%ecx + movq %mm0,(%edi) + movq %mm4,8(%edi) + leal 16(%esi),%esi + movl %esi,32(%esp) + leal 16(%edi),%edx + movl %edx,36(%esp) + subl $16,%ecx + cmpl $16,%ecx + movl %ecx,40(%esp) + jae L032slow_enc_loop_sse + testl $15,%ecx + jnz L030slow_enc_tail + movl 48(%esp),%esi + movq %mm0,(%esi) + movq %mm4,8(%esi) + emms + movl 28(%esp),%esp + popfl + popl %edi + popl %esi + popl %ebx + popl %ebp + ret + pushfl +.align 4,0x90 +L031slow_enc_x86: + movl (%edi),%eax + movl 4(%edi),%ebx +.align 2,0x90 +L033slow_enc_loop_x86: + movl 8(%edi),%ecx + movl 12(%edi),%edx + xorl (%esi),%eax + xorl 4(%esi),%ebx + xorl 8(%esi),%ecx + xorl 12(%esi),%edx + movl 44(%esp),%edi + call __x86_AES_encrypt_compact + movl 32(%esp),%esi + movl 36(%esp),%edi + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + movl 40(%esp),%ecx + leal 16(%esi),%esi + movl %esi,32(%esp) + leal 16(%edi),%edx + movl %edx,36(%esp) + subl $16,%ecx + cmpl $16,%ecx + movl %ecx,40(%esp) + jae L033slow_enc_loop_x86 + testl $15,%ecx + jnz L030slow_enc_tail + movl 48(%esp),%esi + movl 8(%edi),%ecx + movl 12(%edi),%edx + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %ecx,8(%esi) + movl %edx,12(%esi) + movl 28(%esp),%esp + popfl + popl %edi + popl %esi + popl %ebx + popl %ebp + ret + pushfl +.align 4,0x90 +L030slow_enc_tail: + emms + movl %edx,%edi + movl $16,%ebx + subl %ecx,%ebx + cmpl %esi,%edi + je L034enc_in_place +.align 2,0x90 +.long 2767451785 + jmp L035enc_skip_in_place +L034enc_in_place: + leal (%edi,%ecx,1),%edi +L035enc_skip_in_place: + movl %ebx,%ecx + xorl %eax,%eax +.align 2,0x90 +.long 2868115081 + movl 48(%esp),%edi + movl %edx,%esi + movl (%edi),%eax + movl 4(%edi),%ebx + movl $16,40(%esp) + jmp L033slow_enc_loop_x86 +.align 4,0x90 +L029slow_decrypt: + btl $25,52(%esp) + jnc L036slow_dec_loop_x86 +.align 2,0x90 +L037slow_dec_loop_sse: + movq (%esi),%mm0 + movq 8(%esi),%mm4 + movl 44(%esp),%edi + call __sse_AES_decrypt_compact + movl 32(%esp),%esi + leal 60(%esp),%eax + movl 36(%esp),%ebx + movl 40(%esp),%ecx + movl 48(%esp),%edi + movq (%esi),%mm1 + movq 8(%esi),%mm5 + pxor (%edi),%mm0 + pxor 8(%edi),%mm4 + movq %mm1,(%edi) + movq %mm5,8(%edi) + subl $16,%ecx + jc L038slow_dec_partial_sse + movq %mm0,(%ebx) + movq %mm4,8(%ebx) + leal 16(%ebx),%ebx + movl %ebx,36(%esp) + leal 16(%esi),%esi + movl %esi,32(%esp) + movl %ecx,40(%esp) + jnz L037slow_dec_loop_sse + emms + movl 28(%esp),%esp + popfl + popl %edi + popl %esi + popl %ebx + popl %ebp + ret + pushfl +.align 4,0x90 +L038slow_dec_partial_sse: + movq %mm0,(%eax) + movq %mm4,8(%eax) + emms + addl $16,%ecx + movl %ebx,%edi + movl %eax,%esi +.align 2,0x90 +.long 2767451785 + movl 28(%esp),%esp + popfl + popl %edi + popl %esi + popl %ebx + popl %ebp + ret + pushfl +.align 4,0x90 +L036slow_dec_loop_x86: + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + leal 60(%esp),%edi + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + movl 44(%esp),%edi + call __x86_AES_decrypt_compact + movl 48(%esp),%edi + movl 40(%esp),%esi + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + subl $16,%esi + jc L039slow_dec_partial_x86 + movl %esi,40(%esp) + movl 36(%esp),%esi + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %ecx,8(%esi) + movl %edx,12(%esi) + leal 16(%esi),%esi + movl %esi,36(%esp) + leal 60(%esp),%esi + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + movl 32(%esp),%esi + leal 16(%esi),%esi + movl %esi,32(%esp) + jnz L036slow_dec_loop_x86 + movl 28(%esp),%esp + popfl + popl %edi + popl %esi + popl %ebx + popl %ebp + ret + pushfl +.align 4,0x90 +L039slow_dec_partial_x86: + leal 60(%esp),%esi + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %ecx,8(%esi) + movl %edx,12(%esi) + movl 32(%esp),%esi + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + movl 40(%esp),%ecx + movl 36(%esp),%edi + leal 60(%esp),%esi +.align 2,0x90 +.long 2767451785 + movl 28(%esp),%esp + popfl + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.private_extern __x86_AES_set_encrypt_key +.align 4 +__x86_AES_set_encrypt_key: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 24(%esp),%esi + movl 32(%esp),%edi + testl $-1,%esi + jz L040badpointer + testl $-1,%edi + jz L040badpointer + call L041pic_point +L041pic_point: + popl %ebp + leal LAES_Te-L041pic_point(%ebp),%ebp + leal 2176(%ebp),%ebp + movl -128(%ebp),%eax + movl -96(%ebp),%ebx + movl -64(%ebp),%ecx + movl -32(%ebp),%edx + movl (%ebp),%eax + movl 32(%ebp),%ebx + movl 64(%ebp),%ecx + movl 96(%ebp),%edx + movl 28(%esp),%ecx + cmpl $128,%ecx + je L04210rounds + cmpl $192,%ecx + je L04312rounds + cmpl $256,%ecx + je L04414rounds + movl $-2,%eax + jmp L045exit +L04210rounds: + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + xorl %ecx,%ecx + jmp L04610shortcut +.align 2,0x90 +L04710loop: + movl (%edi),%eax + movl 12(%edi),%edx +L04610shortcut: + movzbl %dl,%esi + movzbl -128(%ebp,%esi,1),%ebx + movzbl %dh,%esi + shll $24,%ebx + xorl %ebx,%eax + movzbl -128(%ebp,%esi,1),%ebx + shrl $16,%edx + movzbl %dl,%esi + xorl %ebx,%eax + movzbl -128(%ebp,%esi,1),%ebx + movzbl %dh,%esi + shll $8,%ebx + xorl %ebx,%eax + movzbl -128(%ebp,%esi,1),%ebx + shll $16,%ebx + xorl %ebx,%eax + xorl 896(%ebp,%ecx,4),%eax + movl %eax,16(%edi) + xorl 4(%edi),%eax + movl %eax,20(%edi) + xorl 8(%edi),%eax + movl %eax,24(%edi) + xorl 12(%edi),%eax + movl %eax,28(%edi) + incl %ecx + addl $16,%edi + cmpl $10,%ecx + jl L04710loop + movl $10,80(%edi) + xorl %eax,%eax + jmp L045exit +L04312rounds: + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + movl 16(%esi),%ecx + movl 20(%esi),%edx + movl %ecx,16(%edi) + movl %edx,20(%edi) + xorl %ecx,%ecx + jmp L04812shortcut +.align 2,0x90 +L04912loop: + movl (%edi),%eax + movl 20(%edi),%edx +L04812shortcut: + movzbl %dl,%esi + movzbl -128(%ebp,%esi,1),%ebx + movzbl %dh,%esi + shll $24,%ebx + xorl %ebx,%eax + movzbl -128(%ebp,%esi,1),%ebx + shrl $16,%edx + movzbl %dl,%esi + xorl %ebx,%eax + movzbl -128(%ebp,%esi,1),%ebx + movzbl %dh,%esi + shll $8,%ebx + xorl %ebx,%eax + movzbl -128(%ebp,%esi,1),%ebx + shll $16,%ebx + xorl %ebx,%eax + xorl 896(%ebp,%ecx,4),%eax + movl %eax,24(%edi) + xorl 4(%edi),%eax + movl %eax,28(%edi) + xorl 8(%edi),%eax + movl %eax,32(%edi) + xorl 12(%edi),%eax + movl %eax,36(%edi) + cmpl $7,%ecx + je L05012break + incl %ecx + xorl 16(%edi),%eax + movl %eax,40(%edi) + xorl 20(%edi),%eax + movl %eax,44(%edi) + addl $24,%edi + jmp L04912loop +L05012break: + movl $12,72(%edi) + xorl %eax,%eax + jmp L045exit +L04414rounds: + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + movl 16(%esi),%eax + movl 20(%esi),%ebx + movl 24(%esi),%ecx + movl 28(%esi),%edx + movl %eax,16(%edi) + movl %ebx,20(%edi) + movl %ecx,24(%edi) + movl %edx,28(%edi) + xorl %ecx,%ecx + jmp L05114shortcut +.align 2,0x90 +L05214loop: + movl 28(%edi),%edx +L05114shortcut: + movl (%edi),%eax + movzbl %dl,%esi + movzbl -128(%ebp,%esi,1),%ebx + movzbl %dh,%esi + shll $24,%ebx + xorl %ebx,%eax + movzbl -128(%ebp,%esi,1),%ebx + shrl $16,%edx + movzbl %dl,%esi + xorl %ebx,%eax + movzbl -128(%ebp,%esi,1),%ebx + movzbl %dh,%esi + shll $8,%ebx + xorl %ebx,%eax + movzbl -128(%ebp,%esi,1),%ebx + shll $16,%ebx + xorl %ebx,%eax + xorl 896(%ebp,%ecx,4),%eax + movl %eax,32(%edi) + xorl 4(%edi),%eax + movl %eax,36(%edi) + xorl 8(%edi),%eax + movl %eax,40(%edi) + xorl 12(%edi),%eax + movl %eax,44(%edi) + cmpl $6,%ecx + je L05314break + incl %ecx + movl %eax,%edx + movl 16(%edi),%eax + movzbl %dl,%esi + movzbl -128(%ebp,%esi,1),%ebx + movzbl %dh,%esi + xorl %ebx,%eax + movzbl -128(%ebp,%esi,1),%ebx + shrl $16,%edx + shll $8,%ebx + movzbl %dl,%esi + xorl %ebx,%eax + movzbl -128(%ebp,%esi,1),%ebx + movzbl %dh,%esi + shll $16,%ebx + xorl %ebx,%eax + movzbl -128(%ebp,%esi,1),%ebx + shll $24,%ebx + xorl %ebx,%eax + movl %eax,48(%edi) + xorl 20(%edi),%eax + movl %eax,52(%edi) + xorl 24(%edi),%eax + movl %eax,56(%edi) + xorl 28(%edi),%eax + movl %eax,60(%edi) + addl $32,%edi + jmp L05214loop +L05314break: + movl $14,48(%edi) + xorl %eax,%eax + jmp L045exit +L040badpointer: + movl $-1,%eax +L045exit: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _asm_AES_set_encrypt_key +.private_extern _asm_AES_set_encrypt_key +.align 4 +_asm_AES_set_encrypt_key: +L_asm_AES_set_encrypt_key_begin: + call __x86_AES_set_encrypt_key + ret +.globl _asm_AES_set_decrypt_key +.private_extern _asm_AES_set_decrypt_key +.align 4 +_asm_AES_set_decrypt_key: +L_asm_AES_set_decrypt_key_begin: + call __x86_AES_set_encrypt_key + cmpl $0,%eax + je L054proceed + ret +L054proceed: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 28(%esp),%esi + movl 240(%esi),%ecx + leal (,%ecx,4),%ecx + leal (%esi,%ecx,4),%edi +.align 2,0x90 +L055invert: + movl (%esi),%eax + movl 4(%esi),%ebx + movl (%edi),%ecx + movl 4(%edi),%edx + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,(%esi) + movl %edx,4(%esi) + movl 8(%esi),%eax + movl 12(%esi),%ebx + movl 8(%edi),%ecx + movl 12(%edi),%edx + movl %eax,8(%edi) + movl %ebx,12(%edi) + movl %ecx,8(%esi) + movl %edx,12(%esi) + addl $16,%esi + subl $16,%edi + cmpl %edi,%esi + jne L055invert + movl 28(%esp),%edi + movl 240(%edi),%esi + leal -2(%esi,%esi,1),%esi + leal (%edi,%esi,8),%esi + movl %esi,28(%esp) + movl 16(%edi),%eax +.align 2,0x90 +L056permute: + addl $16,%edi + movl $2155905152,%ebp + andl %eax,%ebp + leal (%eax,%eax,1),%ebx + movl %ebp,%esi + shrl $7,%ebp + subl %ebp,%esi + andl $4278124286,%ebx + andl $454761243,%esi + xorl %esi,%ebx + movl $2155905152,%ebp + andl %ebx,%ebp + leal (%ebx,%ebx,1),%ecx + movl %ebp,%esi + shrl $7,%ebp + subl %ebp,%esi + andl $4278124286,%ecx + andl $454761243,%esi + xorl %eax,%ebx + xorl %esi,%ecx + movl $2155905152,%ebp + andl %ecx,%ebp + leal (%ecx,%ecx,1),%edx + movl %ebp,%esi + shrl $7,%ebp + xorl %eax,%ecx + subl %ebp,%esi + andl $4278124286,%edx + andl $454761243,%esi + roll $8,%eax + xorl %esi,%edx + movl 4(%edi),%ebp + xorl %ebx,%eax + xorl %edx,%ebx + xorl %ecx,%eax + roll $24,%ebx + xorl %edx,%ecx + xorl %edx,%eax + roll $16,%ecx + xorl %ebx,%eax + roll $8,%edx + xorl %ecx,%eax + movl %ebp,%ebx + xorl %edx,%eax + movl %eax,(%edi) + movl $2155905152,%ebp + andl %ebx,%ebp + leal (%ebx,%ebx,1),%ecx + movl %ebp,%esi + shrl $7,%ebp + subl %ebp,%esi + andl $4278124286,%ecx + andl $454761243,%esi + xorl %esi,%ecx + movl $2155905152,%ebp + andl %ecx,%ebp + leal (%ecx,%ecx,1),%edx + movl %ebp,%esi + shrl $7,%ebp + subl %ebp,%esi + andl $4278124286,%edx + andl $454761243,%esi + xorl %ebx,%ecx + xorl %esi,%edx + movl $2155905152,%ebp + andl %edx,%ebp + leal (%edx,%edx,1),%eax + movl %ebp,%esi + shrl $7,%ebp + xorl %ebx,%edx + subl %ebp,%esi + andl $4278124286,%eax + andl $454761243,%esi + roll $8,%ebx + xorl %esi,%eax + movl 8(%edi),%ebp + xorl %ecx,%ebx + xorl %eax,%ecx + xorl %edx,%ebx + roll $24,%ecx + xorl %eax,%edx + xorl %eax,%ebx + roll $16,%edx + xorl %ecx,%ebx + roll $8,%eax + xorl %edx,%ebx + movl %ebp,%ecx + xorl %eax,%ebx + movl %ebx,4(%edi) + movl $2155905152,%ebp + andl %ecx,%ebp + leal (%ecx,%ecx,1),%edx + movl %ebp,%esi + shrl $7,%ebp + subl %ebp,%esi + andl $4278124286,%edx + andl $454761243,%esi + xorl %esi,%edx + movl $2155905152,%ebp + andl %edx,%ebp + leal (%edx,%edx,1),%eax + movl %ebp,%esi + shrl $7,%ebp + subl %ebp,%esi + andl $4278124286,%eax + andl $454761243,%esi + xorl %ecx,%edx + xorl %esi,%eax + movl $2155905152,%ebp + andl %eax,%ebp + leal (%eax,%eax,1),%ebx + movl %ebp,%esi + shrl $7,%ebp + xorl %ecx,%eax + subl %ebp,%esi + andl $4278124286,%ebx + andl $454761243,%esi + roll $8,%ecx + xorl %esi,%ebx + movl 12(%edi),%ebp + xorl %edx,%ecx + xorl %ebx,%edx + xorl %eax,%ecx + roll $24,%edx + xorl %ebx,%eax + xorl %ebx,%ecx + roll $16,%eax + xorl %edx,%ecx + roll $8,%ebx + xorl %eax,%ecx + movl %ebp,%edx + xorl %ebx,%ecx + movl %ecx,8(%edi) + movl $2155905152,%ebp + andl %edx,%ebp + leal (%edx,%edx,1),%eax + movl %ebp,%esi + shrl $7,%ebp + subl %ebp,%esi + andl $4278124286,%eax + andl $454761243,%esi + xorl %esi,%eax + movl $2155905152,%ebp + andl %eax,%ebp + leal (%eax,%eax,1),%ebx + movl %ebp,%esi + shrl $7,%ebp + subl %ebp,%esi + andl $4278124286,%ebx + andl $454761243,%esi + xorl %edx,%eax + xorl %esi,%ebx + movl $2155905152,%ebp + andl %ebx,%ebp + leal (%ebx,%ebx,1),%ecx + movl %ebp,%esi + shrl $7,%ebp + xorl %edx,%ebx + subl %ebp,%esi + andl $4278124286,%ecx + andl $454761243,%esi + roll $8,%edx + xorl %esi,%ecx + movl 16(%edi),%ebp + xorl %eax,%edx + xorl %ecx,%eax + xorl %ebx,%edx + roll $24,%eax + xorl %ecx,%ebx + xorl %ecx,%edx + roll $16,%ebx + xorl %eax,%edx + roll $8,%ecx + xorl %ebx,%edx + movl %ebp,%eax + xorl %ecx,%edx + movl %edx,12(%edi) + cmpl 28(%esp),%edi + jb L056permute + xorl %eax,%eax + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.byte 65,69,83,32,102,111,114,32,120,56,54,44,32,67,82,89 +.byte 80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114 +.byte 111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.section __IMPORT,__pointers,non_lazy_symbol_pointers +L_OPENSSL_ia32cap_P$non_lazy_ptr: +.indirect_symbol _OPENSSL_ia32cap_P +.long 0 +#endif diff --git a/third_party/boringssl/mac-x86/crypto/aes/aesni-x86.S b/third_party/boringssl/mac-x86/crypto/aes/aesni-x86.S new file mode 100644 index 00000000000..07719ba7ae8 --- /dev/null +++ b/third_party/boringssl/mac-x86/crypto/aes/aesni-x86.S @@ -0,0 +1,2433 @@ +#if defined(__i386__) +.file "src/crypto/aes/asm/aesni-x86.S" +.text +.globl _aesni_encrypt +.private_extern _aesni_encrypt +.align 4 +_aesni_encrypt: +L_aesni_encrypt_begin: + movl 4(%esp),%eax + movl 12(%esp),%edx + movups (%eax),%xmm2 + movl 240(%edx),%ecx + movl 8(%esp),%eax + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +L000enc1_loop_1: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz L000enc1_loop_1 +.byte 102,15,56,221,209 + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + movups %xmm2,(%eax) + pxor %xmm2,%xmm2 + ret +.globl _aesni_decrypt +.private_extern _aesni_decrypt +.align 4 +_aesni_decrypt: +L_aesni_decrypt_begin: + movl 4(%esp),%eax + movl 12(%esp),%edx + movups (%eax),%xmm2 + movl 240(%edx),%ecx + movl 8(%esp),%eax + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +L001dec1_loop_2: +.byte 102,15,56,222,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz L001dec1_loop_2 +.byte 102,15,56,223,209 + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + movups %xmm2,(%eax) + pxor %xmm2,%xmm2 + ret +.private_extern __aesni_encrypt2 +.align 4 +__aesni_encrypt2: + movups (%edx),%xmm0 + shll $4,%ecx + movups 16(%edx),%xmm1 + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + movups 32(%edx),%xmm0 + leal 32(%edx,%ecx,1),%edx + negl %ecx + addl $16,%ecx +L002enc2_loop: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + movups -16(%edx,%ecx,1),%xmm0 + jnz L002enc2_loop +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 + ret +.private_extern __aesni_decrypt2 +.align 4 +__aesni_decrypt2: + movups (%edx),%xmm0 + shll $4,%ecx + movups 16(%edx),%xmm1 + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + movups 32(%edx),%xmm0 + leal 32(%edx,%ecx,1),%edx + negl %ecx + addl $16,%ecx +L003dec2_loop: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 + movups -16(%edx,%ecx,1),%xmm0 + jnz L003dec2_loop +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 + ret +.private_extern __aesni_encrypt3 +.align 4 +__aesni_encrypt3: + movups (%edx),%xmm0 + shll $4,%ecx + movups 16(%edx),%xmm1 + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + pxor %xmm0,%xmm4 + movups 32(%edx),%xmm0 + leal 32(%edx,%ecx,1),%edx + negl %ecx + addl $16,%ecx +L004enc3_loop: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 + movups -16(%edx,%ecx,1),%xmm0 + jnz L004enc3_loop +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 +.byte 102,15,56,221,224 + ret +.private_extern __aesni_decrypt3 +.align 4 +__aesni_decrypt3: + movups (%edx),%xmm0 + shll $4,%ecx + movups 16(%edx),%xmm1 + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + pxor %xmm0,%xmm4 + movups 32(%edx),%xmm0 + leal 32(%edx,%ecx,1),%edx + negl %ecx + addl $16,%ecx +L005dec3_loop: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 + movups -16(%edx,%ecx,1),%xmm0 + jnz L005dec3_loop +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 +.byte 102,15,56,223,224 + ret +.private_extern __aesni_encrypt4 +.align 4 +__aesni_encrypt4: + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + shll $4,%ecx + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + pxor %xmm0,%xmm4 + pxor %xmm0,%xmm5 + movups 32(%edx),%xmm0 + leal 32(%edx,%ecx,1),%edx + negl %ecx +.byte 15,31,64,0 + addl $16,%ecx +L006enc4_loop: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + movups -16(%edx,%ecx,1),%xmm0 + jnz L006enc4_loop +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 +.byte 102,15,56,221,224 +.byte 102,15,56,221,232 + ret +.private_extern __aesni_decrypt4 +.align 4 +__aesni_decrypt4: + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + shll $4,%ecx + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + pxor %xmm0,%xmm4 + pxor %xmm0,%xmm5 + movups 32(%edx),%xmm0 + leal 32(%edx,%ecx,1),%edx + negl %ecx +.byte 15,31,64,0 + addl $16,%ecx +L007dec4_loop: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 + movups -16(%edx,%ecx,1),%xmm0 + jnz L007dec4_loop +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 +.byte 102,15,56,223,224 +.byte 102,15,56,223,232 + ret +.private_extern __aesni_encrypt6 +.align 4 +__aesni_encrypt6: + movups (%edx),%xmm0 + shll $4,%ecx + movups 16(%edx),%xmm1 + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + pxor %xmm0,%xmm4 +.byte 102,15,56,220,209 + pxor %xmm0,%xmm5 + pxor %xmm0,%xmm6 +.byte 102,15,56,220,217 + leal 32(%edx,%ecx,1),%edx + negl %ecx +.byte 102,15,56,220,225 + pxor %xmm0,%xmm7 + movups (%edx,%ecx,1),%xmm0 + addl $16,%ecx + jmp L008_aesni_encrypt6_inner +.align 4,0x90 +L009enc6_loop: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +L008_aesni_encrypt6_inner: +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +L_aesni_encrypt6_enter: + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 + movups -16(%edx,%ecx,1),%xmm0 + jnz L009enc6_loop +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 +.byte 102,15,56,221,224 +.byte 102,15,56,221,232 +.byte 102,15,56,221,240 +.byte 102,15,56,221,248 + ret +.private_extern __aesni_decrypt6 +.align 4 +__aesni_decrypt6: + movups (%edx),%xmm0 + shll $4,%ecx + movups 16(%edx),%xmm1 + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + pxor %xmm0,%xmm4 +.byte 102,15,56,222,209 + pxor %xmm0,%xmm5 + pxor %xmm0,%xmm6 +.byte 102,15,56,222,217 + leal 32(%edx,%ecx,1),%edx + negl %ecx +.byte 102,15,56,222,225 + pxor %xmm0,%xmm7 + movups (%edx,%ecx,1),%xmm0 + addl $16,%ecx + jmp L010_aesni_decrypt6_inner +.align 4,0x90 +L011dec6_loop: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +L010_aesni_decrypt6_inner: +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +L_aesni_decrypt6_enter: + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 + movups -16(%edx,%ecx,1),%xmm0 + jnz L011dec6_loop +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 +.byte 102,15,56,223,224 +.byte 102,15,56,223,232 +.byte 102,15,56,223,240 +.byte 102,15,56,223,248 + ret +.globl _aesni_ecb_encrypt +.private_extern _aesni_ecb_encrypt +.align 4 +_aesni_ecb_encrypt: +L_aesni_ecb_encrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl 32(%esp),%edx + movl 36(%esp),%ebx + andl $-16,%eax + jz L012ecb_ret + movl 240(%edx),%ecx + testl %ebx,%ebx + jz L013ecb_decrypt + movl %edx,%ebp + movl %ecx,%ebx + cmpl $96,%eax + jb L014ecb_enc_tail + movdqu (%esi),%xmm2 + movdqu 16(%esi),%xmm3 + movdqu 32(%esi),%xmm4 + movdqu 48(%esi),%xmm5 + movdqu 64(%esi),%xmm6 + movdqu 80(%esi),%xmm7 + leal 96(%esi),%esi + subl $96,%eax + jmp L015ecb_enc_loop6_enter +.align 4,0x90 +L016ecb_enc_loop6: + movups %xmm2,(%edi) + movdqu (%esi),%xmm2 + movups %xmm3,16(%edi) + movdqu 16(%esi),%xmm3 + movups %xmm4,32(%edi) + movdqu 32(%esi),%xmm4 + movups %xmm5,48(%edi) + movdqu 48(%esi),%xmm5 + movups %xmm6,64(%edi) + movdqu 64(%esi),%xmm6 + movups %xmm7,80(%edi) + leal 96(%edi),%edi + movdqu 80(%esi),%xmm7 + leal 96(%esi),%esi +L015ecb_enc_loop6_enter: + call __aesni_encrypt6 + movl %ebp,%edx + movl %ebx,%ecx + subl $96,%eax + jnc L016ecb_enc_loop6 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + movups %xmm6,64(%edi) + movups %xmm7,80(%edi) + leal 96(%edi),%edi + addl $96,%eax + jz L012ecb_ret +L014ecb_enc_tail: + movups (%esi),%xmm2 + cmpl $32,%eax + jb L017ecb_enc_one + movups 16(%esi),%xmm3 + je L018ecb_enc_two + movups 32(%esi),%xmm4 + cmpl $64,%eax + jb L019ecb_enc_three + movups 48(%esi),%xmm5 + je L020ecb_enc_four + movups 64(%esi),%xmm6 + xorps %xmm7,%xmm7 + call __aesni_encrypt6 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + movups %xmm6,64(%edi) + jmp L012ecb_ret +.align 4,0x90 +L017ecb_enc_one: + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +L021enc1_loop_3: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz L021enc1_loop_3 +.byte 102,15,56,221,209 + movups %xmm2,(%edi) + jmp L012ecb_ret +.align 4,0x90 +L018ecb_enc_two: + call __aesni_encrypt2 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + jmp L012ecb_ret +.align 4,0x90 +L019ecb_enc_three: + call __aesni_encrypt3 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + jmp L012ecb_ret +.align 4,0x90 +L020ecb_enc_four: + call __aesni_encrypt4 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + jmp L012ecb_ret +.align 4,0x90 +L013ecb_decrypt: + movl %edx,%ebp + movl %ecx,%ebx + cmpl $96,%eax + jb L022ecb_dec_tail + movdqu (%esi),%xmm2 + movdqu 16(%esi),%xmm3 + movdqu 32(%esi),%xmm4 + movdqu 48(%esi),%xmm5 + movdqu 64(%esi),%xmm6 + movdqu 80(%esi),%xmm7 + leal 96(%esi),%esi + subl $96,%eax + jmp L023ecb_dec_loop6_enter +.align 4,0x90 +L024ecb_dec_loop6: + movups %xmm2,(%edi) + movdqu (%esi),%xmm2 + movups %xmm3,16(%edi) + movdqu 16(%esi),%xmm3 + movups %xmm4,32(%edi) + movdqu 32(%esi),%xmm4 + movups %xmm5,48(%edi) + movdqu 48(%esi),%xmm5 + movups %xmm6,64(%edi) + movdqu 64(%esi),%xmm6 + movups %xmm7,80(%edi) + leal 96(%edi),%edi + movdqu 80(%esi),%xmm7 + leal 96(%esi),%esi +L023ecb_dec_loop6_enter: + call __aesni_decrypt6 + movl %ebp,%edx + movl %ebx,%ecx + subl $96,%eax + jnc L024ecb_dec_loop6 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + movups %xmm6,64(%edi) + movups %xmm7,80(%edi) + leal 96(%edi),%edi + addl $96,%eax + jz L012ecb_ret +L022ecb_dec_tail: + movups (%esi),%xmm2 + cmpl $32,%eax + jb L025ecb_dec_one + movups 16(%esi),%xmm3 + je L026ecb_dec_two + movups 32(%esi),%xmm4 + cmpl $64,%eax + jb L027ecb_dec_three + movups 48(%esi),%xmm5 + je L028ecb_dec_four + movups 64(%esi),%xmm6 + xorps %xmm7,%xmm7 + call __aesni_decrypt6 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + movups %xmm6,64(%edi) + jmp L012ecb_ret +.align 4,0x90 +L025ecb_dec_one: + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +L029dec1_loop_4: +.byte 102,15,56,222,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz L029dec1_loop_4 +.byte 102,15,56,223,209 + movups %xmm2,(%edi) + jmp L012ecb_ret +.align 4,0x90 +L026ecb_dec_two: + call __aesni_decrypt2 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + jmp L012ecb_ret +.align 4,0x90 +L027ecb_dec_three: + call __aesni_decrypt3 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + jmp L012ecb_ret +.align 4,0x90 +L028ecb_dec_four: + call __aesni_decrypt4 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) +L012ecb_ret: + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _aesni_ccm64_encrypt_blocks +.private_extern _aesni_ccm64_encrypt_blocks +.align 4 +_aesni_ccm64_encrypt_blocks: +L_aesni_ccm64_encrypt_blocks_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl 32(%esp),%edx + movl 36(%esp),%ebx + movl 40(%esp),%ecx + movl %esp,%ebp + subl $60,%esp + andl $-16,%esp + movl %ebp,48(%esp) + movdqu (%ebx),%xmm7 + movdqu (%ecx),%xmm3 + movl 240(%edx),%ecx + movl $202182159,(%esp) + movl $134810123,4(%esp) + movl $67438087,8(%esp) + movl $66051,12(%esp) + movl $1,%ebx + xorl %ebp,%ebp + movl %ebx,16(%esp) + movl %ebp,20(%esp) + movl %ebp,24(%esp) + movl %ebp,28(%esp) + shll $4,%ecx + movl $16,%ebx + leal (%edx),%ebp + movdqa (%esp),%xmm5 + movdqa %xmm7,%xmm2 + leal 32(%edx,%ecx,1),%edx + subl %ecx,%ebx +.byte 102,15,56,0,253 +L030ccm64_enc_outer: + movups (%ebp),%xmm0 + movl %ebx,%ecx + movups (%esi),%xmm6 + xorps %xmm0,%xmm2 + movups 16(%ebp),%xmm1 + xorps %xmm6,%xmm0 + xorps %xmm0,%xmm3 + movups 32(%ebp),%xmm0 +L031ccm64_enc2_loop: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + movups -16(%edx,%ecx,1),%xmm0 + jnz L031ccm64_enc2_loop +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + paddq 16(%esp),%xmm7 + decl %eax +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 + leal 16(%esi),%esi + xorps %xmm2,%xmm6 + movdqa %xmm7,%xmm2 + movups %xmm6,(%edi) +.byte 102,15,56,0,213 + leal 16(%edi),%edi + jnz L030ccm64_enc_outer + movl 48(%esp),%esp + movl 40(%esp),%edi + movups %xmm3,(%edi) + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _aesni_ccm64_decrypt_blocks +.private_extern _aesni_ccm64_decrypt_blocks +.align 4 +_aesni_ccm64_decrypt_blocks: +L_aesni_ccm64_decrypt_blocks_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl 32(%esp),%edx + movl 36(%esp),%ebx + movl 40(%esp),%ecx + movl %esp,%ebp + subl $60,%esp + andl $-16,%esp + movl %ebp,48(%esp) + movdqu (%ebx),%xmm7 + movdqu (%ecx),%xmm3 + movl 240(%edx),%ecx + movl $202182159,(%esp) + movl $134810123,4(%esp) + movl $67438087,8(%esp) + movl $66051,12(%esp) + movl $1,%ebx + xorl %ebp,%ebp + movl %ebx,16(%esp) + movl %ebp,20(%esp) + movl %ebp,24(%esp) + movl %ebp,28(%esp) + movdqa (%esp),%xmm5 + movdqa %xmm7,%xmm2 + movl %edx,%ebp + movl %ecx,%ebx +.byte 102,15,56,0,253 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +L032enc1_loop_5: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz L032enc1_loop_5 +.byte 102,15,56,221,209 + shll $4,%ebx + movl $16,%ecx + movups (%esi),%xmm6 + paddq 16(%esp),%xmm7 + leal 16(%esi),%esi + subl %ebx,%ecx + leal 32(%ebp,%ebx,1),%edx + movl %ecx,%ebx + jmp L033ccm64_dec_outer +.align 4,0x90 +L033ccm64_dec_outer: + xorps %xmm2,%xmm6 + movdqa %xmm7,%xmm2 + movups %xmm6,(%edi) + leal 16(%edi),%edi +.byte 102,15,56,0,213 + subl $1,%eax + jz L034ccm64_dec_break + movups (%ebp),%xmm0 + movl %ebx,%ecx + movups 16(%ebp),%xmm1 + xorps %xmm0,%xmm6 + xorps %xmm0,%xmm2 + xorps %xmm6,%xmm3 + movups 32(%ebp),%xmm0 +L035ccm64_dec2_loop: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + movups -16(%edx,%ecx,1),%xmm0 + jnz L035ccm64_dec2_loop + movups (%esi),%xmm6 + paddq 16(%esp),%xmm7 +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 + leal 16(%esi),%esi + jmp L033ccm64_dec_outer +.align 4,0x90 +L034ccm64_dec_break: + movl 240(%ebp),%ecx + movl %ebp,%edx + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + xorps %xmm0,%xmm6 + leal 32(%edx),%edx + xorps %xmm6,%xmm3 +L036enc1_loop_6: +.byte 102,15,56,220,217 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz L036enc1_loop_6 +.byte 102,15,56,221,217 + movl 48(%esp),%esp + movl 40(%esp),%edi + movups %xmm3,(%edi) + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _aesni_ctr32_encrypt_blocks +.private_extern _aesni_ctr32_encrypt_blocks +.align 4 +_aesni_ctr32_encrypt_blocks: +L_aesni_ctr32_encrypt_blocks_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl 32(%esp),%edx + movl 36(%esp),%ebx + movl %esp,%ebp + subl $88,%esp + andl $-16,%esp + movl %ebp,80(%esp) + cmpl $1,%eax + je L037ctr32_one_shortcut + movdqu (%ebx),%xmm7 + movl $202182159,(%esp) + movl $134810123,4(%esp) + movl $67438087,8(%esp) + movl $66051,12(%esp) + movl $6,%ecx + xorl %ebp,%ebp + movl %ecx,16(%esp) + movl %ecx,20(%esp) + movl %ecx,24(%esp) + movl %ebp,28(%esp) +.byte 102,15,58,22,251,3 +.byte 102,15,58,34,253,3 + movl 240(%edx),%ecx + bswap %ebx + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + movdqa (%esp),%xmm2 +.byte 102,15,58,34,195,0 + leal 3(%ebx),%ebp +.byte 102,15,58,34,205,0 + incl %ebx +.byte 102,15,58,34,195,1 + incl %ebp +.byte 102,15,58,34,205,1 + incl %ebx +.byte 102,15,58,34,195,2 + incl %ebp +.byte 102,15,58,34,205,2 + movdqa %xmm0,48(%esp) +.byte 102,15,56,0,194 + movdqu (%edx),%xmm6 + movdqa %xmm1,64(%esp) +.byte 102,15,56,0,202 + pshufd $192,%xmm0,%xmm2 + pshufd $128,%xmm0,%xmm3 + cmpl $6,%eax + jb L038ctr32_tail + pxor %xmm6,%xmm7 + shll $4,%ecx + movl $16,%ebx + movdqa %xmm7,32(%esp) + movl %edx,%ebp + subl %ecx,%ebx + leal 32(%edx,%ecx,1),%edx + subl $6,%eax + jmp L039ctr32_loop6 +.align 4,0x90 +L039ctr32_loop6: + pshufd $64,%xmm0,%xmm4 + movdqa 32(%esp),%xmm0 + pshufd $192,%xmm1,%xmm5 + pxor %xmm0,%xmm2 + pshufd $128,%xmm1,%xmm6 + pxor %xmm0,%xmm3 + pshufd $64,%xmm1,%xmm7 + movups 16(%ebp),%xmm1 + pxor %xmm0,%xmm4 + pxor %xmm0,%xmm5 +.byte 102,15,56,220,209 + pxor %xmm0,%xmm6 + pxor %xmm0,%xmm7 +.byte 102,15,56,220,217 + movups 32(%ebp),%xmm0 + movl %ebx,%ecx +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 + call L_aesni_encrypt6_enter + movups (%esi),%xmm1 + movups 16(%esi),%xmm0 + xorps %xmm1,%xmm2 + movups 32(%esi),%xmm1 + xorps %xmm0,%xmm3 + movups %xmm2,(%edi) + movdqa 16(%esp),%xmm0 + xorps %xmm1,%xmm4 + movdqa 64(%esp),%xmm1 + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + paddd %xmm0,%xmm1 + paddd 48(%esp),%xmm0 + movdqa (%esp),%xmm2 + movups 48(%esi),%xmm3 + movups 64(%esi),%xmm4 + xorps %xmm3,%xmm5 + movups 80(%esi),%xmm3 + leal 96(%esi),%esi + movdqa %xmm0,48(%esp) +.byte 102,15,56,0,194 + xorps %xmm4,%xmm6 + movups %xmm5,48(%edi) + xorps %xmm3,%xmm7 + movdqa %xmm1,64(%esp) +.byte 102,15,56,0,202 + movups %xmm6,64(%edi) + pshufd $192,%xmm0,%xmm2 + movups %xmm7,80(%edi) + leal 96(%edi),%edi + pshufd $128,%xmm0,%xmm3 + subl $6,%eax + jnc L039ctr32_loop6 + addl $6,%eax + jz L040ctr32_ret + movdqu (%ebp),%xmm7 + movl %ebp,%edx + pxor 32(%esp),%xmm7 + movl 240(%ebp),%ecx +L038ctr32_tail: + por %xmm7,%xmm2 + cmpl $2,%eax + jb L041ctr32_one + pshufd $64,%xmm0,%xmm4 + por %xmm7,%xmm3 + je L042ctr32_two + pshufd $192,%xmm1,%xmm5 + por %xmm7,%xmm4 + cmpl $4,%eax + jb L043ctr32_three + pshufd $128,%xmm1,%xmm6 + por %xmm7,%xmm5 + je L044ctr32_four + por %xmm7,%xmm6 + call __aesni_encrypt6 + movups (%esi),%xmm1 + movups 16(%esi),%xmm0 + xorps %xmm1,%xmm2 + movups 32(%esi),%xmm1 + xorps %xmm0,%xmm3 + movups 48(%esi),%xmm0 + xorps %xmm1,%xmm4 + movups 64(%esi),%xmm1 + xorps %xmm0,%xmm5 + movups %xmm2,(%edi) + xorps %xmm1,%xmm6 + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + movups %xmm6,64(%edi) + jmp L040ctr32_ret +.align 4,0x90 +L037ctr32_one_shortcut: + movups (%ebx),%xmm2 + movl 240(%edx),%ecx +L041ctr32_one: + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +L045enc1_loop_7: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz L045enc1_loop_7 +.byte 102,15,56,221,209 + movups (%esi),%xmm6 + xorps %xmm2,%xmm6 + movups %xmm6,(%edi) + jmp L040ctr32_ret +.align 4,0x90 +L042ctr32_two: + call __aesni_encrypt2 + movups (%esi),%xmm5 + movups 16(%esi),%xmm6 + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + jmp L040ctr32_ret +.align 4,0x90 +L043ctr32_three: + call __aesni_encrypt3 + movups (%esi),%xmm5 + movups 16(%esi),%xmm6 + xorps %xmm5,%xmm2 + movups 32(%esi),%xmm7 + xorps %xmm6,%xmm3 + movups %xmm2,(%edi) + xorps %xmm7,%xmm4 + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + jmp L040ctr32_ret +.align 4,0x90 +L044ctr32_four: + call __aesni_encrypt4 + movups (%esi),%xmm6 + movups 16(%esi),%xmm7 + movups 32(%esi),%xmm1 + xorps %xmm6,%xmm2 + movups 48(%esi),%xmm0 + xorps %xmm7,%xmm3 + movups %xmm2,(%edi) + xorps %xmm1,%xmm4 + movups %xmm3,16(%edi) + xorps %xmm0,%xmm5 + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) +L040ctr32_ret: + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + movdqa %xmm0,32(%esp) + pxor %xmm5,%xmm5 + movdqa %xmm0,48(%esp) + pxor %xmm6,%xmm6 + movdqa %xmm0,64(%esp) + pxor %xmm7,%xmm7 + movl 80(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _aesni_xts_encrypt +.private_extern _aesni_xts_encrypt +.align 4 +_aesni_xts_encrypt: +L_aesni_xts_encrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 36(%esp),%edx + movl 40(%esp),%esi + movl 240(%edx),%ecx + movups (%esi),%xmm2 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +L046enc1_loop_8: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz L046enc1_loop_8 +.byte 102,15,56,221,209 + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl 32(%esp),%edx + movl %esp,%ebp + subl $120,%esp + movl 240(%edx),%ecx + andl $-16,%esp + movl $135,96(%esp) + movl $0,100(%esp) + movl $1,104(%esp) + movl $0,108(%esp) + movl %eax,112(%esp) + movl %ebp,116(%esp) + movdqa %xmm2,%xmm1 + pxor %xmm0,%xmm0 + movdqa 96(%esp),%xmm3 + pcmpgtd %xmm1,%xmm0 + andl $-16,%eax + movl %edx,%ebp + movl %ecx,%ebx + subl $96,%eax + jc L047xts_enc_short + shll $4,%ecx + movl $16,%ebx + subl %ecx,%ebx + leal 32(%edx,%ecx,1),%edx + jmp L048xts_enc_loop6 +.align 4,0x90 +L048xts_enc_loop6: + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,16(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,32(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,48(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm7 + movdqa %xmm1,64(%esp) + paddq %xmm1,%xmm1 + movups (%ebp),%xmm0 + pand %xmm3,%xmm7 + movups (%esi),%xmm2 + pxor %xmm1,%xmm7 + movl %ebx,%ecx + movdqu 16(%esi),%xmm3 + xorps %xmm0,%xmm2 + movdqu 32(%esi),%xmm4 + pxor %xmm0,%xmm3 + movdqu 48(%esi),%xmm5 + pxor %xmm0,%xmm4 + movdqu 64(%esi),%xmm6 + pxor %xmm0,%xmm5 + movdqu 80(%esi),%xmm1 + pxor %xmm0,%xmm6 + leal 96(%esi),%esi + pxor (%esp),%xmm2 + movdqa %xmm7,80(%esp) + pxor %xmm1,%xmm7 + movups 16(%ebp),%xmm1 + pxor 16(%esp),%xmm3 + pxor 32(%esp),%xmm4 +.byte 102,15,56,220,209 + pxor 48(%esp),%xmm5 + pxor 64(%esp),%xmm6 +.byte 102,15,56,220,217 + pxor %xmm0,%xmm7 + movups 32(%ebp),%xmm0 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 + call L_aesni_encrypt6_enter + movdqa 80(%esp),%xmm1 + pxor %xmm0,%xmm0 + xorps (%esp),%xmm2 + pcmpgtd %xmm1,%xmm0 + xorps 16(%esp),%xmm3 + movups %xmm2,(%edi) + xorps 32(%esp),%xmm4 + movups %xmm3,16(%edi) + xorps 48(%esp),%xmm5 + movups %xmm4,32(%edi) + xorps 64(%esp),%xmm6 + movups %xmm5,48(%edi) + xorps %xmm1,%xmm7 + movups %xmm6,64(%edi) + pshufd $19,%xmm0,%xmm2 + movups %xmm7,80(%edi) + leal 96(%edi),%edi + movdqa 96(%esp),%xmm3 + pxor %xmm0,%xmm0 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + subl $96,%eax + jnc L048xts_enc_loop6 + movl 240(%ebp),%ecx + movl %ebp,%edx + movl %ecx,%ebx +L047xts_enc_short: + addl $96,%eax + jz L049xts_enc_done6x + movdqa %xmm1,%xmm5 + cmpl $32,%eax + jb L050xts_enc_one + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + je L051xts_enc_two + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,%xmm6 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + cmpl $64,%eax + jb L052xts_enc_three + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,%xmm7 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + movdqa %xmm5,(%esp) + movdqa %xmm6,16(%esp) + je L053xts_enc_four + movdqa %xmm7,32(%esp) + pshufd $19,%xmm0,%xmm7 + movdqa %xmm1,48(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm7 + pxor %xmm1,%xmm7 + movdqu (%esi),%xmm2 + movdqu 16(%esi),%xmm3 + movdqu 32(%esi),%xmm4 + pxor (%esp),%xmm2 + movdqu 48(%esi),%xmm5 + pxor 16(%esp),%xmm3 + movdqu 64(%esi),%xmm6 + pxor 32(%esp),%xmm4 + leal 80(%esi),%esi + pxor 48(%esp),%xmm5 + movdqa %xmm7,64(%esp) + pxor %xmm7,%xmm6 + call __aesni_encrypt6 + movaps 64(%esp),%xmm1 + xorps (%esp),%xmm2 + xorps 16(%esp),%xmm3 + xorps 32(%esp),%xmm4 + movups %xmm2,(%edi) + xorps 48(%esp),%xmm5 + movups %xmm3,16(%edi) + xorps %xmm1,%xmm6 + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + movups %xmm6,64(%edi) + leal 80(%edi),%edi + jmp L054xts_enc_done +.align 4,0x90 +L050xts_enc_one: + movups (%esi),%xmm2 + leal 16(%esi),%esi + xorps %xmm5,%xmm2 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +L055enc1_loop_9: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz L055enc1_loop_9 +.byte 102,15,56,221,209 + xorps %xmm5,%xmm2 + movups %xmm2,(%edi) + leal 16(%edi),%edi + movdqa %xmm5,%xmm1 + jmp L054xts_enc_done +.align 4,0x90 +L051xts_enc_two: + movaps %xmm1,%xmm6 + movups (%esi),%xmm2 + movups 16(%esi),%xmm3 + leal 32(%esi),%esi + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + call __aesni_encrypt2 + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + leal 32(%edi),%edi + movdqa %xmm6,%xmm1 + jmp L054xts_enc_done +.align 4,0x90 +L052xts_enc_three: + movaps %xmm1,%xmm7 + movups (%esi),%xmm2 + movups 16(%esi),%xmm3 + movups 32(%esi),%xmm4 + leal 48(%esi),%esi + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + xorps %xmm7,%xmm4 + call __aesni_encrypt3 + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + xorps %xmm7,%xmm4 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + leal 48(%edi),%edi + movdqa %xmm7,%xmm1 + jmp L054xts_enc_done +.align 4,0x90 +L053xts_enc_four: + movaps %xmm1,%xmm6 + movups (%esi),%xmm2 + movups 16(%esi),%xmm3 + movups 32(%esi),%xmm4 + xorps (%esp),%xmm2 + movups 48(%esi),%xmm5 + leal 64(%esi),%esi + xorps 16(%esp),%xmm3 + xorps %xmm7,%xmm4 + xorps %xmm6,%xmm5 + call __aesni_encrypt4 + xorps (%esp),%xmm2 + xorps 16(%esp),%xmm3 + xorps %xmm7,%xmm4 + movups %xmm2,(%edi) + xorps %xmm6,%xmm5 + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + leal 64(%edi),%edi + movdqa %xmm6,%xmm1 + jmp L054xts_enc_done +.align 4,0x90 +L049xts_enc_done6x: + movl 112(%esp),%eax + andl $15,%eax + jz L056xts_enc_ret + movdqa %xmm1,%xmm5 + movl %eax,112(%esp) + jmp L057xts_enc_steal +.align 4,0x90 +L054xts_enc_done: + movl 112(%esp),%eax + pxor %xmm0,%xmm0 + andl $15,%eax + jz L056xts_enc_ret + pcmpgtd %xmm1,%xmm0 + movl %eax,112(%esp) + pshufd $19,%xmm0,%xmm5 + paddq %xmm1,%xmm1 + pand 96(%esp),%xmm5 + pxor %xmm1,%xmm5 +L057xts_enc_steal: + movzbl (%esi),%ecx + movzbl -16(%edi),%edx + leal 1(%esi),%esi + movb %cl,-16(%edi) + movb %dl,(%edi) + leal 1(%edi),%edi + subl $1,%eax + jnz L057xts_enc_steal + subl 112(%esp),%edi + movl %ebp,%edx + movl %ebx,%ecx + movups -16(%edi),%xmm2 + xorps %xmm5,%xmm2 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +L058enc1_loop_10: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz L058enc1_loop_10 +.byte 102,15,56,221,209 + xorps %xmm5,%xmm2 + movups %xmm2,-16(%edi) +L056xts_enc_ret: + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + movdqa %xmm0,(%esp) + pxor %xmm3,%xmm3 + movdqa %xmm0,16(%esp) + pxor %xmm4,%xmm4 + movdqa %xmm0,32(%esp) + pxor %xmm5,%xmm5 + movdqa %xmm0,48(%esp) + pxor %xmm6,%xmm6 + movdqa %xmm0,64(%esp) + pxor %xmm7,%xmm7 + movdqa %xmm0,80(%esp) + movl 116(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _aesni_xts_decrypt +.private_extern _aesni_xts_decrypt +.align 4 +_aesni_xts_decrypt: +L_aesni_xts_decrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 36(%esp),%edx + movl 40(%esp),%esi + movl 240(%edx),%ecx + movups (%esi),%xmm2 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +L059enc1_loop_11: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz L059enc1_loop_11 +.byte 102,15,56,221,209 + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl 32(%esp),%edx + movl %esp,%ebp + subl $120,%esp + andl $-16,%esp + xorl %ebx,%ebx + testl $15,%eax + setnz %bl + shll $4,%ebx + subl %ebx,%eax + movl $135,96(%esp) + movl $0,100(%esp) + movl $1,104(%esp) + movl $0,108(%esp) + movl %eax,112(%esp) + movl %ebp,116(%esp) + movl 240(%edx),%ecx + movl %edx,%ebp + movl %ecx,%ebx + movdqa %xmm2,%xmm1 + pxor %xmm0,%xmm0 + movdqa 96(%esp),%xmm3 + pcmpgtd %xmm1,%xmm0 + andl $-16,%eax + subl $96,%eax + jc L060xts_dec_short + shll $4,%ecx + movl $16,%ebx + subl %ecx,%ebx + leal 32(%edx,%ecx,1),%edx + jmp L061xts_dec_loop6 +.align 4,0x90 +L061xts_dec_loop6: + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,16(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,32(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,48(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm7 + movdqa %xmm1,64(%esp) + paddq %xmm1,%xmm1 + movups (%ebp),%xmm0 + pand %xmm3,%xmm7 + movups (%esi),%xmm2 + pxor %xmm1,%xmm7 + movl %ebx,%ecx + movdqu 16(%esi),%xmm3 + xorps %xmm0,%xmm2 + movdqu 32(%esi),%xmm4 + pxor %xmm0,%xmm3 + movdqu 48(%esi),%xmm5 + pxor %xmm0,%xmm4 + movdqu 64(%esi),%xmm6 + pxor %xmm0,%xmm5 + movdqu 80(%esi),%xmm1 + pxor %xmm0,%xmm6 + leal 96(%esi),%esi + pxor (%esp),%xmm2 + movdqa %xmm7,80(%esp) + pxor %xmm1,%xmm7 + movups 16(%ebp),%xmm1 + pxor 16(%esp),%xmm3 + pxor 32(%esp),%xmm4 +.byte 102,15,56,222,209 + pxor 48(%esp),%xmm5 + pxor 64(%esp),%xmm6 +.byte 102,15,56,222,217 + pxor %xmm0,%xmm7 + movups 32(%ebp),%xmm0 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 + call L_aesni_decrypt6_enter + movdqa 80(%esp),%xmm1 + pxor %xmm0,%xmm0 + xorps (%esp),%xmm2 + pcmpgtd %xmm1,%xmm0 + xorps 16(%esp),%xmm3 + movups %xmm2,(%edi) + xorps 32(%esp),%xmm4 + movups %xmm3,16(%edi) + xorps 48(%esp),%xmm5 + movups %xmm4,32(%edi) + xorps 64(%esp),%xmm6 + movups %xmm5,48(%edi) + xorps %xmm1,%xmm7 + movups %xmm6,64(%edi) + pshufd $19,%xmm0,%xmm2 + movups %xmm7,80(%edi) + leal 96(%edi),%edi + movdqa 96(%esp),%xmm3 + pxor %xmm0,%xmm0 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + subl $96,%eax + jnc L061xts_dec_loop6 + movl 240(%ebp),%ecx + movl %ebp,%edx + movl %ecx,%ebx +L060xts_dec_short: + addl $96,%eax + jz L062xts_dec_done6x + movdqa %xmm1,%xmm5 + cmpl $32,%eax + jb L063xts_dec_one + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + je L064xts_dec_two + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,%xmm6 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + cmpl $64,%eax + jb L065xts_dec_three + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,%xmm7 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + movdqa %xmm5,(%esp) + movdqa %xmm6,16(%esp) + je L066xts_dec_four + movdqa %xmm7,32(%esp) + pshufd $19,%xmm0,%xmm7 + movdqa %xmm1,48(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm7 + pxor %xmm1,%xmm7 + movdqu (%esi),%xmm2 + movdqu 16(%esi),%xmm3 + movdqu 32(%esi),%xmm4 + pxor (%esp),%xmm2 + movdqu 48(%esi),%xmm5 + pxor 16(%esp),%xmm3 + movdqu 64(%esi),%xmm6 + pxor 32(%esp),%xmm4 + leal 80(%esi),%esi + pxor 48(%esp),%xmm5 + movdqa %xmm7,64(%esp) + pxor %xmm7,%xmm6 + call __aesni_decrypt6 + movaps 64(%esp),%xmm1 + xorps (%esp),%xmm2 + xorps 16(%esp),%xmm3 + xorps 32(%esp),%xmm4 + movups %xmm2,(%edi) + xorps 48(%esp),%xmm5 + movups %xmm3,16(%edi) + xorps %xmm1,%xmm6 + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + movups %xmm6,64(%edi) + leal 80(%edi),%edi + jmp L067xts_dec_done +.align 4,0x90 +L063xts_dec_one: + movups (%esi),%xmm2 + leal 16(%esi),%esi + xorps %xmm5,%xmm2 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +L068dec1_loop_12: +.byte 102,15,56,222,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz L068dec1_loop_12 +.byte 102,15,56,223,209 + xorps %xmm5,%xmm2 + movups %xmm2,(%edi) + leal 16(%edi),%edi + movdqa %xmm5,%xmm1 + jmp L067xts_dec_done +.align 4,0x90 +L064xts_dec_two: + movaps %xmm1,%xmm6 + movups (%esi),%xmm2 + movups 16(%esi),%xmm3 + leal 32(%esi),%esi + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + call __aesni_decrypt2 + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + leal 32(%edi),%edi + movdqa %xmm6,%xmm1 + jmp L067xts_dec_done +.align 4,0x90 +L065xts_dec_three: + movaps %xmm1,%xmm7 + movups (%esi),%xmm2 + movups 16(%esi),%xmm3 + movups 32(%esi),%xmm4 + leal 48(%esi),%esi + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + xorps %xmm7,%xmm4 + call __aesni_decrypt3 + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + xorps %xmm7,%xmm4 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + leal 48(%edi),%edi + movdqa %xmm7,%xmm1 + jmp L067xts_dec_done +.align 4,0x90 +L066xts_dec_four: + movaps %xmm1,%xmm6 + movups (%esi),%xmm2 + movups 16(%esi),%xmm3 + movups 32(%esi),%xmm4 + xorps (%esp),%xmm2 + movups 48(%esi),%xmm5 + leal 64(%esi),%esi + xorps 16(%esp),%xmm3 + xorps %xmm7,%xmm4 + xorps %xmm6,%xmm5 + call __aesni_decrypt4 + xorps (%esp),%xmm2 + xorps 16(%esp),%xmm3 + xorps %xmm7,%xmm4 + movups %xmm2,(%edi) + xorps %xmm6,%xmm5 + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + leal 64(%edi),%edi + movdqa %xmm6,%xmm1 + jmp L067xts_dec_done +.align 4,0x90 +L062xts_dec_done6x: + movl 112(%esp),%eax + andl $15,%eax + jz L069xts_dec_ret + movl %eax,112(%esp) + jmp L070xts_dec_only_one_more +.align 4,0x90 +L067xts_dec_done: + movl 112(%esp),%eax + pxor %xmm0,%xmm0 + andl $15,%eax + jz L069xts_dec_ret + pcmpgtd %xmm1,%xmm0 + movl %eax,112(%esp) + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa 96(%esp),%xmm3 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 +L070xts_dec_only_one_more: + pshufd $19,%xmm0,%xmm5 + movdqa %xmm1,%xmm6 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm5 + pxor %xmm1,%xmm5 + movl %ebp,%edx + movl %ebx,%ecx + movups (%esi),%xmm2 + xorps %xmm5,%xmm2 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +L071dec1_loop_13: +.byte 102,15,56,222,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz L071dec1_loop_13 +.byte 102,15,56,223,209 + xorps %xmm5,%xmm2 + movups %xmm2,(%edi) +L072xts_dec_steal: + movzbl 16(%esi),%ecx + movzbl (%edi),%edx + leal 1(%esi),%esi + movb %cl,(%edi) + movb %dl,16(%edi) + leal 1(%edi),%edi + subl $1,%eax + jnz L072xts_dec_steal + subl 112(%esp),%edi + movl %ebp,%edx + movl %ebx,%ecx + movups (%edi),%xmm2 + xorps %xmm6,%xmm2 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +L073dec1_loop_14: +.byte 102,15,56,222,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz L073dec1_loop_14 +.byte 102,15,56,223,209 + xorps %xmm6,%xmm2 + movups %xmm2,(%edi) +L069xts_dec_ret: + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + movdqa %xmm0,(%esp) + pxor %xmm3,%xmm3 + movdqa %xmm0,16(%esp) + pxor %xmm4,%xmm4 + movdqa %xmm0,32(%esp) + pxor %xmm5,%xmm5 + movdqa %xmm0,48(%esp) + pxor %xmm6,%xmm6 + movdqa %xmm0,64(%esp) + pxor %xmm7,%xmm7 + movdqa %xmm0,80(%esp) + movl 116(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _aesni_cbc_encrypt +.private_extern _aesni_cbc_encrypt +.align 4 +_aesni_cbc_encrypt: +L_aesni_cbc_encrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl %esp,%ebx + movl 24(%esp),%edi + subl $24,%ebx + movl 28(%esp),%eax + andl $-16,%ebx + movl 32(%esp),%edx + movl 36(%esp),%ebp + testl %eax,%eax + jz L074cbc_abort + cmpl $0,40(%esp) + xchgl %esp,%ebx + movups (%ebp),%xmm7 + movl 240(%edx),%ecx + movl %edx,%ebp + movl %ebx,16(%esp) + movl %ecx,%ebx + je L075cbc_decrypt + movaps %xmm7,%xmm2 + cmpl $16,%eax + jb L076cbc_enc_tail + subl $16,%eax + jmp L077cbc_enc_loop +.align 4,0x90 +L077cbc_enc_loop: + movups (%esi),%xmm7 + leal 16(%esi),%esi + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + xorps %xmm0,%xmm7 + leal 32(%edx),%edx + xorps %xmm7,%xmm2 +L078enc1_loop_15: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz L078enc1_loop_15 +.byte 102,15,56,221,209 + movl %ebx,%ecx + movl %ebp,%edx + movups %xmm2,(%edi) + leal 16(%edi),%edi + subl $16,%eax + jnc L077cbc_enc_loop + addl $16,%eax + jnz L076cbc_enc_tail + movaps %xmm2,%xmm7 + pxor %xmm2,%xmm2 + jmp L079cbc_ret +L076cbc_enc_tail: + movl %eax,%ecx +.long 2767451785 + movl $16,%ecx + subl %eax,%ecx + xorl %eax,%eax +.long 2868115081 + leal -16(%edi),%edi + movl %ebx,%ecx + movl %edi,%esi + movl %ebp,%edx + jmp L077cbc_enc_loop +.align 4,0x90 +L075cbc_decrypt: + cmpl $80,%eax + jbe L080cbc_dec_tail + movaps %xmm7,(%esp) + subl $80,%eax + jmp L081cbc_dec_loop6_enter +.align 4,0x90 +L082cbc_dec_loop6: + movaps %xmm0,(%esp) + movups %xmm7,(%edi) + leal 16(%edi),%edi +L081cbc_dec_loop6_enter: + movdqu (%esi),%xmm2 + movdqu 16(%esi),%xmm3 + movdqu 32(%esi),%xmm4 + movdqu 48(%esi),%xmm5 + movdqu 64(%esi),%xmm6 + movdqu 80(%esi),%xmm7 + call __aesni_decrypt6 + movups (%esi),%xmm1 + movups 16(%esi),%xmm0 + xorps (%esp),%xmm2 + xorps %xmm1,%xmm3 + movups 32(%esi),%xmm1 + xorps %xmm0,%xmm4 + movups 48(%esi),%xmm0 + xorps %xmm1,%xmm5 + movups 64(%esi),%xmm1 + xorps %xmm0,%xmm6 + movups 80(%esi),%xmm0 + xorps %xmm1,%xmm7 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + leal 96(%esi),%esi + movups %xmm4,32(%edi) + movl %ebx,%ecx + movups %xmm5,48(%edi) + movl %ebp,%edx + movups %xmm6,64(%edi) + leal 80(%edi),%edi + subl $96,%eax + ja L082cbc_dec_loop6 + movaps %xmm7,%xmm2 + movaps %xmm0,%xmm7 + addl $80,%eax + jle L083cbc_dec_clear_tail_collected + movups %xmm2,(%edi) + leal 16(%edi),%edi +L080cbc_dec_tail: + movups (%esi),%xmm2 + movaps %xmm2,%xmm6 + cmpl $16,%eax + jbe L084cbc_dec_one + movups 16(%esi),%xmm3 + movaps %xmm3,%xmm5 + cmpl $32,%eax + jbe L085cbc_dec_two + movups 32(%esi),%xmm4 + cmpl $48,%eax + jbe L086cbc_dec_three + movups 48(%esi),%xmm5 + cmpl $64,%eax + jbe L087cbc_dec_four + movups 64(%esi),%xmm6 + movaps %xmm7,(%esp) + movups (%esi),%xmm2 + xorps %xmm7,%xmm7 + call __aesni_decrypt6 + movups (%esi),%xmm1 + movups 16(%esi),%xmm0 + xorps (%esp),%xmm2 + xorps %xmm1,%xmm3 + movups 32(%esi),%xmm1 + xorps %xmm0,%xmm4 + movups 48(%esi),%xmm0 + xorps %xmm1,%xmm5 + movups 64(%esi),%xmm7 + xorps %xmm0,%xmm6 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + pxor %xmm3,%xmm3 + movups %xmm4,32(%edi) + pxor %xmm4,%xmm4 + movups %xmm5,48(%edi) + pxor %xmm5,%xmm5 + leal 64(%edi),%edi + movaps %xmm6,%xmm2 + pxor %xmm6,%xmm6 + subl $80,%eax + jmp L088cbc_dec_tail_collected +.align 4,0x90 +L084cbc_dec_one: + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +L089dec1_loop_16: +.byte 102,15,56,222,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz L089dec1_loop_16 +.byte 102,15,56,223,209 + xorps %xmm7,%xmm2 + movaps %xmm6,%xmm7 + subl $16,%eax + jmp L088cbc_dec_tail_collected +.align 4,0x90 +L085cbc_dec_two: + call __aesni_decrypt2 + xorps %xmm7,%xmm2 + xorps %xmm6,%xmm3 + movups %xmm2,(%edi) + movaps %xmm3,%xmm2 + pxor %xmm3,%xmm3 + leal 16(%edi),%edi + movaps %xmm5,%xmm7 + subl $32,%eax + jmp L088cbc_dec_tail_collected +.align 4,0x90 +L086cbc_dec_three: + call __aesni_decrypt3 + xorps %xmm7,%xmm2 + xorps %xmm6,%xmm3 + xorps %xmm5,%xmm4 + movups %xmm2,(%edi) + movaps %xmm4,%xmm2 + pxor %xmm4,%xmm4 + movups %xmm3,16(%edi) + pxor %xmm3,%xmm3 + leal 32(%edi),%edi + movups 32(%esi),%xmm7 + subl $48,%eax + jmp L088cbc_dec_tail_collected +.align 4,0x90 +L087cbc_dec_four: + call __aesni_decrypt4 + movups 16(%esi),%xmm1 + movups 32(%esi),%xmm0 + xorps %xmm7,%xmm2 + movups 48(%esi),%xmm7 + xorps %xmm6,%xmm3 + movups %xmm2,(%edi) + xorps %xmm1,%xmm4 + movups %xmm3,16(%edi) + pxor %xmm3,%xmm3 + xorps %xmm0,%xmm5 + movups %xmm4,32(%edi) + pxor %xmm4,%xmm4 + leal 48(%edi),%edi + movaps %xmm5,%xmm2 + pxor %xmm5,%xmm5 + subl $64,%eax + jmp L088cbc_dec_tail_collected +.align 4,0x90 +L083cbc_dec_clear_tail_collected: + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 +L088cbc_dec_tail_collected: + andl $15,%eax + jnz L090cbc_dec_tail_partial + movups %xmm2,(%edi) + pxor %xmm0,%xmm0 + jmp L079cbc_ret +.align 4,0x90 +L090cbc_dec_tail_partial: + movaps %xmm2,(%esp) + pxor %xmm0,%xmm0 + movl $16,%ecx + movl %esp,%esi + subl %eax,%ecx +.long 2767451785 + movdqa %xmm2,(%esp) +L079cbc_ret: + movl 16(%esp),%esp + movl 36(%esp),%ebp + pxor %xmm2,%xmm2 + pxor %xmm1,%xmm1 + movups %xmm7,(%ebp) + pxor %xmm7,%xmm7 +L074cbc_abort: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.private_extern __aesni_set_encrypt_key +.align 4 +__aesni_set_encrypt_key: + pushl %ebp + pushl %ebx + testl %eax,%eax + jz L091bad_pointer + testl %edx,%edx + jz L091bad_pointer + call L092pic +L092pic: + popl %ebx + leal Lkey_const-L092pic(%ebx),%ebx + movl L_OPENSSL_ia32cap_P$non_lazy_ptr-Lkey_const(%ebx),%ebp + movups (%eax),%xmm0 + xorps %xmm4,%xmm4 + movl 4(%ebp),%ebp + leal 16(%edx),%edx + andl $268437504,%ebp + cmpl $256,%ecx + je L09314rounds + cmpl $192,%ecx + je L09412rounds + cmpl $128,%ecx + jne L095bad_keybits +.align 4,0x90 +L09610rounds: + cmpl $268435456,%ebp + je L09710rounds_alt + movl $9,%ecx + movups %xmm0,-16(%edx) +.byte 102,15,58,223,200,1 + call L098key_128_cold +.byte 102,15,58,223,200,2 + call L099key_128 +.byte 102,15,58,223,200,4 + call L099key_128 +.byte 102,15,58,223,200,8 + call L099key_128 +.byte 102,15,58,223,200,16 + call L099key_128 +.byte 102,15,58,223,200,32 + call L099key_128 +.byte 102,15,58,223,200,64 + call L099key_128 +.byte 102,15,58,223,200,128 + call L099key_128 +.byte 102,15,58,223,200,27 + call L099key_128 +.byte 102,15,58,223,200,54 + call L099key_128 + movups %xmm0,(%edx) + movl %ecx,80(%edx) + jmp L100good_key +.align 4,0x90 +L099key_128: + movups %xmm0,(%edx) + leal 16(%edx),%edx +L098key_128_cold: + shufps $16,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $140,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $255,%xmm1,%xmm1 + xorps %xmm1,%xmm0 + ret +.align 4,0x90 +L09710rounds_alt: + movdqa (%ebx),%xmm5 + movl $8,%ecx + movdqa 32(%ebx),%xmm4 + movdqa %xmm0,%xmm2 + movdqu %xmm0,-16(%edx) +L101loop_key128: +.byte 102,15,56,0,197 +.byte 102,15,56,221,196 + pslld $1,%xmm4 + leal 16(%edx),%edx + movdqa %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm3,%xmm2 + pxor %xmm2,%xmm0 + movdqu %xmm0,-16(%edx) + movdqa %xmm0,%xmm2 + decl %ecx + jnz L101loop_key128 + movdqa 48(%ebx),%xmm4 +.byte 102,15,56,0,197 +.byte 102,15,56,221,196 + pslld $1,%xmm4 + movdqa %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm3,%xmm2 + pxor %xmm2,%xmm0 + movdqu %xmm0,(%edx) + movdqa %xmm0,%xmm2 +.byte 102,15,56,0,197 +.byte 102,15,56,221,196 + movdqa %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm3,%xmm2 + pxor %xmm2,%xmm0 + movdqu %xmm0,16(%edx) + movl $9,%ecx + movl %ecx,96(%edx) + jmp L100good_key +.align 4,0x90 +L09412rounds: + movq 16(%eax),%xmm2 + cmpl $268435456,%ebp + je L10212rounds_alt + movl $11,%ecx + movups %xmm0,-16(%edx) +.byte 102,15,58,223,202,1 + call L103key_192a_cold +.byte 102,15,58,223,202,2 + call L104key_192b +.byte 102,15,58,223,202,4 + call L105key_192a +.byte 102,15,58,223,202,8 + call L104key_192b +.byte 102,15,58,223,202,16 + call L105key_192a +.byte 102,15,58,223,202,32 + call L104key_192b +.byte 102,15,58,223,202,64 + call L105key_192a +.byte 102,15,58,223,202,128 + call L104key_192b + movups %xmm0,(%edx) + movl %ecx,48(%edx) + jmp L100good_key +.align 4,0x90 +L105key_192a: + movups %xmm0,(%edx) + leal 16(%edx),%edx +.align 4,0x90 +L103key_192a_cold: + movaps %xmm2,%xmm5 +L106key_192b_warm: + shufps $16,%xmm0,%xmm4 + movdqa %xmm2,%xmm3 + xorps %xmm4,%xmm0 + shufps $140,%xmm0,%xmm4 + pslldq $4,%xmm3 + xorps %xmm4,%xmm0 + pshufd $85,%xmm1,%xmm1 + pxor %xmm3,%xmm2 + pxor %xmm1,%xmm0 + pshufd $255,%xmm0,%xmm3 + pxor %xmm3,%xmm2 + ret +.align 4,0x90 +L104key_192b: + movaps %xmm0,%xmm3 + shufps $68,%xmm0,%xmm5 + movups %xmm5,(%edx) + shufps $78,%xmm2,%xmm3 + movups %xmm3,16(%edx) + leal 32(%edx),%edx + jmp L106key_192b_warm +.align 4,0x90 +L10212rounds_alt: + movdqa 16(%ebx),%xmm5 + movdqa 32(%ebx),%xmm4 + movl $8,%ecx + movdqu %xmm0,-16(%edx) +L107loop_key192: + movq %xmm2,(%edx) + movdqa %xmm2,%xmm1 +.byte 102,15,56,0,213 +.byte 102,15,56,221,212 + pslld $1,%xmm4 + leal 24(%edx),%edx + movdqa %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm3,%xmm0 + pshufd $255,%xmm0,%xmm3 + pxor %xmm1,%xmm3 + pslldq $4,%xmm1 + pxor %xmm1,%xmm3 + pxor %xmm2,%xmm0 + pxor %xmm3,%xmm2 + movdqu %xmm0,-16(%edx) + decl %ecx + jnz L107loop_key192 + movl $11,%ecx + movl %ecx,32(%edx) + jmp L100good_key +.align 4,0x90 +L09314rounds: + movups 16(%eax),%xmm2 + leal 16(%edx),%edx + cmpl $268435456,%ebp + je L10814rounds_alt + movl $13,%ecx + movups %xmm0,-32(%edx) + movups %xmm2,-16(%edx) +.byte 102,15,58,223,202,1 + call L109key_256a_cold +.byte 102,15,58,223,200,1 + call L110key_256b +.byte 102,15,58,223,202,2 + call L111key_256a +.byte 102,15,58,223,200,2 + call L110key_256b +.byte 102,15,58,223,202,4 + call L111key_256a +.byte 102,15,58,223,200,4 + call L110key_256b +.byte 102,15,58,223,202,8 + call L111key_256a +.byte 102,15,58,223,200,8 + call L110key_256b +.byte 102,15,58,223,202,16 + call L111key_256a +.byte 102,15,58,223,200,16 + call L110key_256b +.byte 102,15,58,223,202,32 + call L111key_256a +.byte 102,15,58,223,200,32 + call L110key_256b +.byte 102,15,58,223,202,64 + call L111key_256a + movups %xmm0,(%edx) + movl %ecx,16(%edx) + xorl %eax,%eax + jmp L100good_key +.align 4,0x90 +L111key_256a: + movups %xmm2,(%edx) + leal 16(%edx),%edx +L109key_256a_cold: + shufps $16,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $140,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $255,%xmm1,%xmm1 + xorps %xmm1,%xmm0 + ret +.align 4,0x90 +L110key_256b: + movups %xmm0,(%edx) + leal 16(%edx),%edx + shufps $16,%xmm2,%xmm4 + xorps %xmm4,%xmm2 + shufps $140,%xmm2,%xmm4 + xorps %xmm4,%xmm2 + shufps $170,%xmm1,%xmm1 + xorps %xmm1,%xmm2 + ret +.align 4,0x90 +L10814rounds_alt: + movdqa (%ebx),%xmm5 + movdqa 32(%ebx),%xmm4 + movl $7,%ecx + movdqu %xmm0,-32(%edx) + movdqa %xmm2,%xmm1 + movdqu %xmm2,-16(%edx) +L112loop_key256: +.byte 102,15,56,0,213 +.byte 102,15,56,221,212 + movdqa %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm3,%xmm0 + pslld $1,%xmm4 + pxor %xmm2,%xmm0 + movdqu %xmm0,(%edx) + decl %ecx + jz L113done_key256 + pshufd $255,%xmm0,%xmm2 + pxor %xmm3,%xmm3 +.byte 102,15,56,221,211 + movdqa %xmm1,%xmm3 + pslldq $4,%xmm1 + pxor %xmm1,%xmm3 + pslldq $4,%xmm1 + pxor %xmm1,%xmm3 + pslldq $4,%xmm1 + pxor %xmm3,%xmm1 + pxor %xmm1,%xmm2 + movdqu %xmm2,16(%edx) + leal 32(%edx),%edx + movdqa %xmm2,%xmm1 + jmp L112loop_key256 +L113done_key256: + movl $13,%ecx + movl %ecx,16(%edx) +L100good_key: + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + xorl %eax,%eax + popl %ebx + popl %ebp + ret +.align 2,0x90 +L091bad_pointer: + movl $-1,%eax + popl %ebx + popl %ebp + ret +.align 2,0x90 +L095bad_keybits: + pxor %xmm0,%xmm0 + movl $-2,%eax + popl %ebx + popl %ebp + ret +.globl _aesni_set_encrypt_key +.private_extern _aesni_set_encrypt_key +.align 4 +_aesni_set_encrypt_key: +L_aesni_set_encrypt_key_begin: + movl 4(%esp),%eax + movl 8(%esp),%ecx + movl 12(%esp),%edx + call __aesni_set_encrypt_key + ret +.globl _aesni_set_decrypt_key +.private_extern _aesni_set_decrypt_key +.align 4 +_aesni_set_decrypt_key: +L_aesni_set_decrypt_key_begin: + movl 4(%esp),%eax + movl 8(%esp),%ecx + movl 12(%esp),%edx + call __aesni_set_encrypt_key + movl 12(%esp),%edx + shll $4,%ecx + testl %eax,%eax + jnz L114dec_key_ret + leal 16(%edx,%ecx,1),%eax + movups (%edx),%xmm0 + movups (%eax),%xmm1 + movups %xmm0,(%eax) + movups %xmm1,(%edx) + leal 16(%edx),%edx + leal -16(%eax),%eax +L115dec_key_inverse: + movups (%edx),%xmm0 + movups (%eax),%xmm1 +.byte 102,15,56,219,192 +.byte 102,15,56,219,201 + leal 16(%edx),%edx + leal -16(%eax),%eax + movups %xmm0,16(%eax) + movups %xmm1,-16(%edx) + cmpl %edx,%eax + ja L115dec_key_inverse + movups (%edx),%xmm0 +.byte 102,15,56,219,192 + movups %xmm0,(%edx) + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + xorl %eax,%eax +L114dec_key_ret: + ret +.align 6,0x90 +Lkey_const: +.long 202313229,202313229,202313229,202313229 +.long 67569157,67569157,67569157,67569157 +.long 1,1,1,1 +.long 27,27,27,27 +.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69 +.byte 83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83 +.byte 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115 +.byte 115,108,46,111,114,103,62,0 +.section __IMPORT,__pointers,non_lazy_symbol_pointers +L_OPENSSL_ia32cap_P$non_lazy_ptr: +.indirect_symbol _OPENSSL_ia32cap_P +.long 0 +#endif diff --git a/third_party/boringssl/mac-x86/crypto/aes/vpaes-x86.S b/third_party/boringssl/mac-x86/crypto/aes/vpaes-x86.S new file mode 100644 index 00000000000..8b85709dee0 --- /dev/null +++ b/third_party/boringssl/mac-x86/crypto/aes/vpaes-x86.S @@ -0,0 +1,650 @@ +#if defined(__i386__) +.file "vpaes-x86.S" +.text +.align 6,0x90 +L_vpaes_consts: +.long 218628480,235210255,168496130,67568393 +.long 252381056,17041926,33884169,51187212 +.long 252645135,252645135,252645135,252645135 +.long 1512730624,3266504856,1377990664,3401244816 +.long 830229760,1275146365,2969422977,3447763452 +.long 3411033600,2979783055,338359620,2782886510 +.long 4209124096,907596821,221174255,1006095553 +.long 191964160,3799684038,3164090317,1589111125 +.long 182528256,1777043520,2877432650,3265356744 +.long 1874708224,3503451415,3305285752,363511674 +.long 1606117888,3487855781,1093350906,2384367825 +.long 197121,67569157,134941193,202313229 +.long 67569157,134941193,202313229,197121 +.long 134941193,202313229,197121,67569157 +.long 202313229,197121,67569157,134941193 +.long 33619971,100992007,168364043,235736079 +.long 235736079,33619971,100992007,168364043 +.long 168364043,235736079,33619971,100992007 +.long 100992007,168364043,235736079,33619971 +.long 50462976,117835012,185207048,252579084 +.long 252314880,51251460,117574920,184942860 +.long 184682752,252054788,50987272,118359308 +.long 118099200,185467140,251790600,50727180 +.long 2946363062,528716217,1300004225,1881839624 +.long 1532713819,1532713819,1532713819,1532713819 +.long 3602276352,4288629033,3737020424,4153884961 +.long 1354558464,32357713,2958822624,3775749553 +.long 1201988352,132424512,1572796698,503232858 +.long 2213177600,1597421020,4103937655,675398315 +.long 2749646592,4273543773,1511898873,121693092 +.long 3040248576,1103263732,2871565598,1608280554 +.long 2236667136,2588920351,482954393,64377734 +.long 3069987328,291237287,2117370568,3650299247 +.long 533321216,3573750986,2572112006,1401264716 +.long 1339849704,2721158661,548607111,3445553514 +.long 2128193280,3054596040,2183486460,1257083700 +.long 655635200,1165381986,3923443150,2344132524 +.long 190078720,256924420,290342170,357187870 +.long 1610966272,2263057382,4103205268,309794674 +.long 2592527872,2233205587,1335446729,3402964816 +.long 3973531904,3225098121,3002836325,1918774430 +.long 3870401024,2102906079,2284471353,4117666579 +.long 617007872,1021508343,366931923,691083277 +.long 2528395776,3491914898,2968704004,1613121270 +.long 3445188352,3247741094,844474987,4093578302 +.long 651481088,1190302358,1689581232,574775300 +.long 4289380608,206939853,2555985458,2489840491 +.long 2130264064,327674451,3566485037,3349835193 +.long 2470714624,316102159,3636825756,3393945945 +.byte 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105 +.byte 111,110,32,65,69,83,32,102,111,114,32,120,56,54,47,83 +.byte 83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117 +.byte 114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105 +.byte 118,101,114,115,105,116,121,41,0 +.align 6,0x90 +.private_extern __vpaes_preheat +.align 4 +__vpaes_preheat: + addl (%esp),%ebp + movdqa -48(%ebp),%xmm7 + movdqa -16(%ebp),%xmm6 + ret +.private_extern __vpaes_encrypt_core +.align 4 +__vpaes_encrypt_core: + movl $16,%ecx + movl 240(%edx),%eax + movdqa %xmm6,%xmm1 + movdqa (%ebp),%xmm2 + pandn %xmm0,%xmm1 + pand %xmm6,%xmm0 + movdqu (%edx),%xmm5 +.byte 102,15,56,0,208 + movdqa 16(%ebp),%xmm0 + pxor %xmm5,%xmm2 + psrld $4,%xmm1 + addl $16,%edx +.byte 102,15,56,0,193 + leal 192(%ebp),%ebx + pxor %xmm2,%xmm0 + jmp L000enc_entry +.align 4,0x90 +L001enc_loop: + movdqa 32(%ebp),%xmm4 + movdqa 48(%ebp),%xmm0 +.byte 102,15,56,0,226 +.byte 102,15,56,0,195 + pxor %xmm5,%xmm4 + movdqa 64(%ebp),%xmm5 + pxor %xmm4,%xmm0 + movdqa -64(%ebx,%ecx,1),%xmm1 +.byte 102,15,56,0,234 + movdqa 80(%ebp),%xmm2 + movdqa (%ebx,%ecx,1),%xmm4 +.byte 102,15,56,0,211 + movdqa %xmm0,%xmm3 + pxor %xmm5,%xmm2 +.byte 102,15,56,0,193 + addl $16,%edx + pxor %xmm2,%xmm0 +.byte 102,15,56,0,220 + addl $16,%ecx + pxor %xmm0,%xmm3 +.byte 102,15,56,0,193 + andl $48,%ecx + subl $1,%eax + pxor %xmm3,%xmm0 +L000enc_entry: + movdqa %xmm6,%xmm1 + movdqa -32(%ebp),%xmm5 + pandn %xmm0,%xmm1 + psrld $4,%xmm1 + pand %xmm6,%xmm0 +.byte 102,15,56,0,232 + movdqa %xmm7,%xmm3 + pxor %xmm1,%xmm0 +.byte 102,15,56,0,217 + movdqa %xmm7,%xmm4 + pxor %xmm5,%xmm3 +.byte 102,15,56,0,224 + movdqa %xmm7,%xmm2 + pxor %xmm5,%xmm4 +.byte 102,15,56,0,211 + movdqa %xmm7,%xmm3 + pxor %xmm0,%xmm2 +.byte 102,15,56,0,220 + movdqu (%edx),%xmm5 + pxor %xmm1,%xmm3 + jnz L001enc_loop + movdqa 96(%ebp),%xmm4 + movdqa 112(%ebp),%xmm0 +.byte 102,15,56,0,226 + pxor %xmm5,%xmm4 +.byte 102,15,56,0,195 + movdqa 64(%ebx,%ecx,1),%xmm1 + pxor %xmm4,%xmm0 +.byte 102,15,56,0,193 + ret +.private_extern __vpaes_decrypt_core +.align 4 +__vpaes_decrypt_core: + leal 608(%ebp),%ebx + movl 240(%edx),%eax + movdqa %xmm6,%xmm1 + movdqa -64(%ebx),%xmm2 + pandn %xmm0,%xmm1 + movl %eax,%ecx + psrld $4,%xmm1 + movdqu (%edx),%xmm5 + shll $4,%ecx + pand %xmm6,%xmm0 +.byte 102,15,56,0,208 + movdqa -48(%ebx),%xmm0 + xorl $48,%ecx +.byte 102,15,56,0,193 + andl $48,%ecx + pxor %xmm5,%xmm2 + movdqa 176(%ebp),%xmm5 + pxor %xmm2,%xmm0 + addl $16,%edx + leal -352(%ebx,%ecx,1),%ecx + jmp L002dec_entry +.align 4,0x90 +L003dec_loop: + movdqa -32(%ebx),%xmm4 + movdqa -16(%ebx),%xmm1 +.byte 102,15,56,0,226 +.byte 102,15,56,0,203 + pxor %xmm4,%xmm0 + movdqa (%ebx),%xmm4 + pxor %xmm1,%xmm0 + movdqa 16(%ebx),%xmm1 +.byte 102,15,56,0,226 +.byte 102,15,56,0,197 +.byte 102,15,56,0,203 + pxor %xmm4,%xmm0 + movdqa 32(%ebx),%xmm4 + pxor %xmm1,%xmm0 + movdqa 48(%ebx),%xmm1 +.byte 102,15,56,0,226 +.byte 102,15,56,0,197 +.byte 102,15,56,0,203 + pxor %xmm4,%xmm0 + movdqa 64(%ebx),%xmm4 + pxor %xmm1,%xmm0 + movdqa 80(%ebx),%xmm1 +.byte 102,15,56,0,226 +.byte 102,15,56,0,197 +.byte 102,15,56,0,203 + pxor %xmm4,%xmm0 + addl $16,%edx +.byte 102,15,58,15,237,12 + pxor %xmm1,%xmm0 + subl $1,%eax +L002dec_entry: + movdqa %xmm6,%xmm1 + movdqa -32(%ebp),%xmm2 + pandn %xmm0,%xmm1 + pand %xmm6,%xmm0 + psrld $4,%xmm1 +.byte 102,15,56,0,208 + movdqa %xmm7,%xmm3 + pxor %xmm1,%xmm0 +.byte 102,15,56,0,217 + movdqa %xmm7,%xmm4 + pxor %xmm2,%xmm3 +.byte 102,15,56,0,224 + pxor %xmm2,%xmm4 + movdqa %xmm7,%xmm2 +.byte 102,15,56,0,211 + movdqa %xmm7,%xmm3 + pxor %xmm0,%xmm2 +.byte 102,15,56,0,220 + movdqu (%edx),%xmm0 + pxor %xmm1,%xmm3 + jnz L003dec_loop + movdqa 96(%ebx),%xmm4 +.byte 102,15,56,0,226 + pxor %xmm0,%xmm4 + movdqa 112(%ebx),%xmm0 + movdqa (%ecx),%xmm2 +.byte 102,15,56,0,195 + pxor %xmm4,%xmm0 +.byte 102,15,56,0,194 + ret +.private_extern __vpaes_schedule_core +.align 4 +__vpaes_schedule_core: + addl (%esp),%ebp + movdqu (%esi),%xmm0 + movdqa 320(%ebp),%xmm2 + movdqa %xmm0,%xmm3 + leal (%ebp),%ebx + movdqa %xmm2,4(%esp) + call __vpaes_schedule_transform + movdqa %xmm0,%xmm7 + testl %edi,%edi + jnz L004schedule_am_decrypting + movdqu %xmm0,(%edx) + jmp L005schedule_go +L004schedule_am_decrypting: + movdqa 256(%ebp,%ecx,1),%xmm1 +.byte 102,15,56,0,217 + movdqu %xmm3,(%edx) + xorl $48,%ecx +L005schedule_go: + cmpl $192,%eax + ja L006schedule_256 + je L007schedule_192 +L008schedule_128: + movl $10,%eax +L009loop_schedule_128: + call __vpaes_schedule_round + decl %eax + jz L010schedule_mangle_last + call __vpaes_schedule_mangle + jmp L009loop_schedule_128 +.align 4,0x90 +L007schedule_192: + movdqu 8(%esi),%xmm0 + call __vpaes_schedule_transform + movdqa %xmm0,%xmm6 + pxor %xmm4,%xmm4 + movhlps %xmm4,%xmm6 + movl $4,%eax +L011loop_schedule_192: + call __vpaes_schedule_round +.byte 102,15,58,15,198,8 + call __vpaes_schedule_mangle + call __vpaes_schedule_192_smear + call __vpaes_schedule_mangle + call __vpaes_schedule_round + decl %eax + jz L010schedule_mangle_last + call __vpaes_schedule_mangle + call __vpaes_schedule_192_smear + jmp L011loop_schedule_192 +.align 4,0x90 +L006schedule_256: + movdqu 16(%esi),%xmm0 + call __vpaes_schedule_transform + movl $7,%eax +L012loop_schedule_256: + call __vpaes_schedule_mangle + movdqa %xmm0,%xmm6 + call __vpaes_schedule_round + decl %eax + jz L010schedule_mangle_last + call __vpaes_schedule_mangle + pshufd $255,%xmm0,%xmm0 + movdqa %xmm7,20(%esp) + movdqa %xmm6,%xmm7 + call L_vpaes_schedule_low_round + movdqa 20(%esp),%xmm7 + jmp L012loop_schedule_256 +.align 4,0x90 +L010schedule_mangle_last: + leal 384(%ebp),%ebx + testl %edi,%edi + jnz L013schedule_mangle_last_dec + movdqa 256(%ebp,%ecx,1),%xmm1 +.byte 102,15,56,0,193 + leal 352(%ebp),%ebx + addl $32,%edx +L013schedule_mangle_last_dec: + addl $-16,%edx + pxor 336(%ebp),%xmm0 + call __vpaes_schedule_transform + movdqu %xmm0,(%edx) + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + ret +.private_extern __vpaes_schedule_192_smear +.align 4 +__vpaes_schedule_192_smear: + pshufd $128,%xmm6,%xmm1 + pshufd $254,%xmm7,%xmm0 + pxor %xmm1,%xmm6 + pxor %xmm1,%xmm1 + pxor %xmm0,%xmm6 + movdqa %xmm6,%xmm0 + movhlps %xmm1,%xmm6 + ret +.private_extern __vpaes_schedule_round +.align 4 +__vpaes_schedule_round: + movdqa 8(%esp),%xmm2 + pxor %xmm1,%xmm1 +.byte 102,15,58,15,202,15 +.byte 102,15,58,15,210,15 + pxor %xmm1,%xmm7 + pshufd $255,%xmm0,%xmm0 +.byte 102,15,58,15,192,1 + movdqa %xmm2,8(%esp) +L_vpaes_schedule_low_round: + movdqa %xmm7,%xmm1 + pslldq $4,%xmm7 + pxor %xmm1,%xmm7 + movdqa %xmm7,%xmm1 + pslldq $8,%xmm7 + pxor %xmm1,%xmm7 + pxor 336(%ebp),%xmm7 + movdqa -16(%ebp),%xmm4 + movdqa -48(%ebp),%xmm5 + movdqa %xmm4,%xmm1 + pandn %xmm0,%xmm1 + psrld $4,%xmm1 + pand %xmm4,%xmm0 + movdqa -32(%ebp),%xmm2 +.byte 102,15,56,0,208 + pxor %xmm1,%xmm0 + movdqa %xmm5,%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 + movdqa %xmm5,%xmm4 +.byte 102,15,56,0,224 + pxor %xmm2,%xmm4 + movdqa %xmm5,%xmm2 +.byte 102,15,56,0,211 + pxor %xmm0,%xmm2 + movdqa %xmm5,%xmm3 +.byte 102,15,56,0,220 + pxor %xmm1,%xmm3 + movdqa 32(%ebp),%xmm4 +.byte 102,15,56,0,226 + movdqa 48(%ebp),%xmm0 +.byte 102,15,56,0,195 + pxor %xmm4,%xmm0 + pxor %xmm7,%xmm0 + movdqa %xmm0,%xmm7 + ret +.private_extern __vpaes_schedule_transform +.align 4 +__vpaes_schedule_transform: + movdqa -16(%ebp),%xmm2 + movdqa %xmm2,%xmm1 + pandn %xmm0,%xmm1 + psrld $4,%xmm1 + pand %xmm2,%xmm0 + movdqa (%ebx),%xmm2 +.byte 102,15,56,0,208 + movdqa 16(%ebx),%xmm0 +.byte 102,15,56,0,193 + pxor %xmm2,%xmm0 + ret +.private_extern __vpaes_schedule_mangle +.align 4 +__vpaes_schedule_mangle: + movdqa %xmm0,%xmm4 + movdqa 128(%ebp),%xmm5 + testl %edi,%edi + jnz L014schedule_mangle_dec + addl $16,%edx + pxor 336(%ebp),%xmm4 +.byte 102,15,56,0,229 + movdqa %xmm4,%xmm3 +.byte 102,15,56,0,229 + pxor %xmm4,%xmm3 +.byte 102,15,56,0,229 + pxor %xmm4,%xmm3 + jmp L015schedule_mangle_both +.align 4,0x90 +L014schedule_mangle_dec: + movdqa -16(%ebp),%xmm2 + leal 416(%ebp),%esi + movdqa %xmm2,%xmm1 + pandn %xmm4,%xmm1 + psrld $4,%xmm1 + pand %xmm2,%xmm4 + movdqa (%esi),%xmm2 +.byte 102,15,56,0,212 + movdqa 16(%esi),%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 +.byte 102,15,56,0,221 + movdqa 32(%esi),%xmm2 +.byte 102,15,56,0,212 + pxor %xmm3,%xmm2 + movdqa 48(%esi),%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 +.byte 102,15,56,0,221 + movdqa 64(%esi),%xmm2 +.byte 102,15,56,0,212 + pxor %xmm3,%xmm2 + movdqa 80(%esi),%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 +.byte 102,15,56,0,221 + movdqa 96(%esi),%xmm2 +.byte 102,15,56,0,212 + pxor %xmm3,%xmm2 + movdqa 112(%esi),%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 + addl $-16,%edx +L015schedule_mangle_both: + movdqa 256(%ebp,%ecx,1),%xmm1 +.byte 102,15,56,0,217 + addl $-16,%ecx + andl $48,%ecx + movdqu %xmm3,(%edx) + ret +.globl _vpaes_set_encrypt_key +.private_extern _vpaes_set_encrypt_key +.align 4 +_vpaes_set_encrypt_key: +L_vpaes_set_encrypt_key_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + leal -56(%esp),%ebx + movl 24(%esp),%eax + andl $-16,%ebx + movl 28(%esp),%edx + xchgl %esp,%ebx + movl %ebx,48(%esp) + movl %eax,%ebx + shrl $5,%ebx + addl $5,%ebx + movl %ebx,240(%edx) + movl $48,%ecx + movl $0,%edi + leal L_vpaes_consts+0x30-L016pic_point,%ebp + call __vpaes_schedule_core +L016pic_point: + movl 48(%esp),%esp + xorl %eax,%eax + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _vpaes_set_decrypt_key +.private_extern _vpaes_set_decrypt_key +.align 4 +_vpaes_set_decrypt_key: +L_vpaes_set_decrypt_key_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + leal -56(%esp),%ebx + movl 24(%esp),%eax + andl $-16,%ebx + movl 28(%esp),%edx + xchgl %esp,%ebx + movl %ebx,48(%esp) + movl %eax,%ebx + shrl $5,%ebx + addl $5,%ebx + movl %ebx,240(%edx) + shll $4,%ebx + leal 16(%edx,%ebx,1),%edx + movl $1,%edi + movl %eax,%ecx + shrl $1,%ecx + andl $32,%ecx + xorl $32,%ecx + leal L_vpaes_consts+0x30-L017pic_point,%ebp + call __vpaes_schedule_core +L017pic_point: + movl 48(%esp),%esp + xorl %eax,%eax + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _vpaes_encrypt +.private_extern _vpaes_encrypt +.align 4 +_vpaes_encrypt: +L_vpaes_encrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + leal L_vpaes_consts+0x30-L018pic_point,%ebp + call __vpaes_preheat +L018pic_point: + movl 20(%esp),%esi + leal -56(%esp),%ebx + movl 24(%esp),%edi + andl $-16,%ebx + movl 28(%esp),%edx + xchgl %esp,%ebx + movl %ebx,48(%esp) + movdqu (%esi),%xmm0 + call __vpaes_encrypt_core + movdqu %xmm0,(%edi) + movl 48(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _vpaes_decrypt +.private_extern _vpaes_decrypt +.align 4 +_vpaes_decrypt: +L_vpaes_decrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + leal L_vpaes_consts+0x30-L019pic_point,%ebp + call __vpaes_preheat +L019pic_point: + movl 20(%esp),%esi + leal -56(%esp),%ebx + movl 24(%esp),%edi + andl $-16,%ebx + movl 28(%esp),%edx + xchgl %esp,%ebx + movl %ebx,48(%esp) + movdqu (%esi),%xmm0 + call __vpaes_decrypt_core + movdqu %xmm0,(%edi) + movl 48(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _vpaes_cbc_encrypt +.private_extern _vpaes_cbc_encrypt +.align 4 +_vpaes_cbc_encrypt: +L_vpaes_cbc_encrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl 32(%esp),%edx + subl $16,%eax + jc L020cbc_abort + leal -56(%esp),%ebx + movl 36(%esp),%ebp + andl $-16,%ebx + movl 40(%esp),%ecx + xchgl %esp,%ebx + movdqu (%ebp),%xmm1 + subl %esi,%edi + movl %ebx,48(%esp) + movl %edi,(%esp) + movl %edx,4(%esp) + movl %ebp,8(%esp) + movl %eax,%edi + leal L_vpaes_consts+0x30-L021pic_point,%ebp + call __vpaes_preheat +L021pic_point: + cmpl $0,%ecx + je L022cbc_dec_loop + jmp L023cbc_enc_loop +.align 4,0x90 +L023cbc_enc_loop: + movdqu (%esi),%xmm0 + pxor %xmm1,%xmm0 + call __vpaes_encrypt_core + movl (%esp),%ebx + movl 4(%esp),%edx + movdqa %xmm0,%xmm1 + movdqu %xmm0,(%ebx,%esi,1) + leal 16(%esi),%esi + subl $16,%edi + jnc L023cbc_enc_loop + jmp L024cbc_done +.align 4,0x90 +L022cbc_dec_loop: + movdqu (%esi),%xmm0 + movdqa %xmm1,16(%esp) + movdqa %xmm0,32(%esp) + call __vpaes_decrypt_core + movl (%esp),%ebx + movl 4(%esp),%edx + pxor 16(%esp),%xmm0 + movdqa 32(%esp),%xmm1 + movdqu %xmm0,(%ebx,%esi,1) + leal 16(%esi),%esi + subl $16,%edi + jnc L022cbc_dec_loop +L024cbc_done: + movl 8(%esp),%ebx + movl 48(%esp),%esp + movdqu %xmm1,(%ebx) +L020cbc_abort: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +#endif diff --git a/third_party/boringssl/mac-x86/crypto/bn/bn-586.S b/third_party/boringssl/mac-x86/crypto/bn/bn-586.S new file mode 100644 index 00000000000..0f0a94ece3d --- /dev/null +++ b/third_party/boringssl/mac-x86/crypto/bn/bn-586.S @@ -0,0 +1,1528 @@ +#if defined(__i386__) +.file "src/crypto/bn/asm/bn-586.S" +.text +.globl _bn_mul_add_words +.private_extern _bn_mul_add_words +.align 4 +_bn_mul_add_words: +L_bn_mul_add_words_begin: + call L000PIC_me_up +L000PIC_me_up: + popl %eax + movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L000PIC_me_up(%eax),%eax + btl $26,(%eax) + jnc L001maw_non_sse2 + movl 4(%esp),%eax + movl 8(%esp),%edx + movl 12(%esp),%ecx + movd 16(%esp),%mm0 + pxor %mm1,%mm1 + jmp L002maw_sse2_entry +.align 4,0x90 +L003maw_sse2_unrolled: + movd (%eax),%mm3 + paddq %mm3,%mm1 + movd (%edx),%mm2 + pmuludq %mm0,%mm2 + movd 4(%edx),%mm4 + pmuludq %mm0,%mm4 + movd 8(%edx),%mm6 + pmuludq %mm0,%mm6 + movd 12(%edx),%mm7 + pmuludq %mm0,%mm7 + paddq %mm2,%mm1 + movd 4(%eax),%mm3 + paddq %mm4,%mm3 + movd 8(%eax),%mm5 + paddq %mm6,%mm5 + movd 12(%eax),%mm4 + paddq %mm4,%mm7 + movd %mm1,(%eax) + movd 16(%edx),%mm2 + pmuludq %mm0,%mm2 + psrlq $32,%mm1 + movd 20(%edx),%mm4 + pmuludq %mm0,%mm4 + paddq %mm3,%mm1 + movd 24(%edx),%mm6 + pmuludq %mm0,%mm6 + movd %mm1,4(%eax) + psrlq $32,%mm1 + movd 28(%edx),%mm3 + addl $32,%edx + pmuludq %mm0,%mm3 + paddq %mm5,%mm1 + movd 16(%eax),%mm5 + paddq %mm5,%mm2 + movd %mm1,8(%eax) + psrlq $32,%mm1 + paddq %mm7,%mm1 + movd 20(%eax),%mm5 + paddq %mm5,%mm4 + movd %mm1,12(%eax) + psrlq $32,%mm1 + paddq %mm2,%mm1 + movd 24(%eax),%mm5 + paddq %mm5,%mm6 + movd %mm1,16(%eax) + psrlq $32,%mm1 + paddq %mm4,%mm1 + movd 28(%eax),%mm5 + paddq %mm5,%mm3 + movd %mm1,20(%eax) + psrlq $32,%mm1 + paddq %mm6,%mm1 + movd %mm1,24(%eax) + psrlq $32,%mm1 + paddq %mm3,%mm1 + movd %mm1,28(%eax) + leal 32(%eax),%eax + psrlq $32,%mm1 + subl $8,%ecx + jz L004maw_sse2_exit +L002maw_sse2_entry: + testl $4294967288,%ecx + jnz L003maw_sse2_unrolled +.align 2,0x90 +L005maw_sse2_loop: + movd (%edx),%mm2 + movd (%eax),%mm3 + pmuludq %mm0,%mm2 + leal 4(%edx),%edx + paddq %mm3,%mm1 + paddq %mm2,%mm1 + movd %mm1,(%eax) + subl $1,%ecx + psrlq $32,%mm1 + leal 4(%eax),%eax + jnz L005maw_sse2_loop +L004maw_sse2_exit: + movd %mm1,%eax + emms + ret +.align 4,0x90 +L001maw_non_sse2: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + + xorl %esi,%esi + movl 20(%esp),%edi + movl 28(%esp),%ecx + movl 24(%esp),%ebx + andl $4294967288,%ecx + movl 32(%esp),%ebp + pushl %ecx + jz L006maw_finish +.align 4,0x90 +L007maw_loop: + # Round 0 + movl (%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl (%edi),%eax + adcl $0,%edx + movl %eax,(%edi) + movl %edx,%esi + # Round 4 + movl 4(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 4(%edi),%eax + adcl $0,%edx + movl %eax,4(%edi) + movl %edx,%esi + # Round 8 + movl 8(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 8(%edi),%eax + adcl $0,%edx + movl %eax,8(%edi) + movl %edx,%esi + # Round 12 + movl 12(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 12(%edi),%eax + adcl $0,%edx + movl %eax,12(%edi) + movl %edx,%esi + # Round 16 + movl 16(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 16(%edi),%eax + adcl $0,%edx + movl %eax,16(%edi) + movl %edx,%esi + # Round 20 + movl 20(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 20(%edi),%eax + adcl $0,%edx + movl %eax,20(%edi) + movl %edx,%esi + # Round 24 + movl 24(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 24(%edi),%eax + adcl $0,%edx + movl %eax,24(%edi) + movl %edx,%esi + # Round 28 + movl 28(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 28(%edi),%eax + adcl $0,%edx + movl %eax,28(%edi) + movl %edx,%esi + + subl $8,%ecx + leal 32(%ebx),%ebx + leal 32(%edi),%edi + jnz L007maw_loop +L006maw_finish: + movl 32(%esp),%ecx + andl $7,%ecx + jnz L008maw_finish2 + jmp L009maw_end +L008maw_finish2: + # Tail Round 0 + movl (%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl (%edi),%eax + adcl $0,%edx + decl %ecx + movl %eax,(%edi) + movl %edx,%esi + jz L009maw_end + # Tail Round 1 + movl 4(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 4(%edi),%eax + adcl $0,%edx + decl %ecx + movl %eax,4(%edi) + movl %edx,%esi + jz L009maw_end + # Tail Round 2 + movl 8(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 8(%edi),%eax + adcl $0,%edx + decl %ecx + movl %eax,8(%edi) + movl %edx,%esi + jz L009maw_end + # Tail Round 3 + movl 12(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 12(%edi),%eax + adcl $0,%edx + decl %ecx + movl %eax,12(%edi) + movl %edx,%esi + jz L009maw_end + # Tail Round 4 + movl 16(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 16(%edi),%eax + adcl $0,%edx + decl %ecx + movl %eax,16(%edi) + movl %edx,%esi + jz L009maw_end + # Tail Round 5 + movl 20(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 20(%edi),%eax + adcl $0,%edx + decl %ecx + movl %eax,20(%edi) + movl %edx,%esi + jz L009maw_end + # Tail Round 6 + movl 24(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 24(%edi),%eax + adcl $0,%edx + movl %eax,24(%edi) + movl %edx,%esi +L009maw_end: + movl %esi,%eax + popl %ecx + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _bn_mul_words +.private_extern _bn_mul_words +.align 4 +_bn_mul_words: +L_bn_mul_words_begin: + call L010PIC_me_up +L010PIC_me_up: + popl %eax + movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L010PIC_me_up(%eax),%eax + btl $26,(%eax) + jnc L011mw_non_sse2 + movl 4(%esp),%eax + movl 8(%esp),%edx + movl 12(%esp),%ecx + movd 16(%esp),%mm0 + pxor %mm1,%mm1 +.align 4,0x90 +L012mw_sse2_loop: + movd (%edx),%mm2 + pmuludq %mm0,%mm2 + leal 4(%edx),%edx + paddq %mm2,%mm1 + movd %mm1,(%eax) + subl $1,%ecx + psrlq $32,%mm1 + leal 4(%eax),%eax + jnz L012mw_sse2_loop + movd %mm1,%eax + emms + ret +.align 4,0x90 +L011mw_non_sse2: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + + xorl %esi,%esi + movl 20(%esp),%edi + movl 24(%esp),%ebx + movl 28(%esp),%ebp + movl 32(%esp),%ecx + andl $4294967288,%ebp + jz L013mw_finish +L014mw_loop: + # Round 0 + movl (%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,(%edi) + movl %edx,%esi + # Round 4 + movl 4(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,4(%edi) + movl %edx,%esi + # Round 8 + movl 8(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,8(%edi) + movl %edx,%esi + # Round 12 + movl 12(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,12(%edi) + movl %edx,%esi + # Round 16 + movl 16(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,16(%edi) + movl %edx,%esi + # Round 20 + movl 20(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,20(%edi) + movl %edx,%esi + # Round 24 + movl 24(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,24(%edi) + movl %edx,%esi + # Round 28 + movl 28(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,28(%edi) + movl %edx,%esi + + addl $32,%ebx + addl $32,%edi + subl $8,%ebp + jz L013mw_finish + jmp L014mw_loop +L013mw_finish: + movl 28(%esp),%ebp + andl $7,%ebp + jnz L015mw_finish2 + jmp L016mw_end +L015mw_finish2: + # Tail Round 0 + movl (%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,(%edi) + movl %edx,%esi + decl %ebp + jz L016mw_end + # Tail Round 1 + movl 4(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,4(%edi) + movl %edx,%esi + decl %ebp + jz L016mw_end + # Tail Round 2 + movl 8(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,8(%edi) + movl %edx,%esi + decl %ebp + jz L016mw_end + # Tail Round 3 + movl 12(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,12(%edi) + movl %edx,%esi + decl %ebp + jz L016mw_end + # Tail Round 4 + movl 16(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,16(%edi) + movl %edx,%esi + decl %ebp + jz L016mw_end + # Tail Round 5 + movl 20(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,20(%edi) + movl %edx,%esi + decl %ebp + jz L016mw_end + # Tail Round 6 + movl 24(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,24(%edi) + movl %edx,%esi +L016mw_end: + movl %esi,%eax + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _bn_sqr_words +.private_extern _bn_sqr_words +.align 4 +_bn_sqr_words: +L_bn_sqr_words_begin: + call L017PIC_me_up +L017PIC_me_up: + popl %eax + movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L017PIC_me_up(%eax),%eax + btl $26,(%eax) + jnc L018sqr_non_sse2 + movl 4(%esp),%eax + movl 8(%esp),%edx + movl 12(%esp),%ecx +.align 4,0x90 +L019sqr_sse2_loop: + movd (%edx),%mm0 + pmuludq %mm0,%mm0 + leal 4(%edx),%edx + movq %mm0,(%eax) + subl $1,%ecx + leal 8(%eax),%eax + jnz L019sqr_sse2_loop + emms + ret +.align 4,0x90 +L018sqr_non_sse2: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%ebx + andl $4294967288,%ebx + jz L020sw_finish +L021sw_loop: + # Round 0 + movl (%edi),%eax + mull %eax + movl %eax,(%esi) + movl %edx,4(%esi) + # Round 4 + movl 4(%edi),%eax + mull %eax + movl %eax,8(%esi) + movl %edx,12(%esi) + # Round 8 + movl 8(%edi),%eax + mull %eax + movl %eax,16(%esi) + movl %edx,20(%esi) + # Round 12 + movl 12(%edi),%eax + mull %eax + movl %eax,24(%esi) + movl %edx,28(%esi) + # Round 16 + movl 16(%edi),%eax + mull %eax + movl %eax,32(%esi) + movl %edx,36(%esi) + # Round 20 + movl 20(%edi),%eax + mull %eax + movl %eax,40(%esi) + movl %edx,44(%esi) + # Round 24 + movl 24(%edi),%eax + mull %eax + movl %eax,48(%esi) + movl %edx,52(%esi) + # Round 28 + movl 28(%edi),%eax + mull %eax + movl %eax,56(%esi) + movl %edx,60(%esi) + + addl $32,%edi + addl $64,%esi + subl $8,%ebx + jnz L021sw_loop +L020sw_finish: + movl 28(%esp),%ebx + andl $7,%ebx + jz L022sw_end + # Tail Round 0 + movl (%edi),%eax + mull %eax + movl %eax,(%esi) + decl %ebx + movl %edx,4(%esi) + jz L022sw_end + # Tail Round 1 + movl 4(%edi),%eax + mull %eax + movl %eax,8(%esi) + decl %ebx + movl %edx,12(%esi) + jz L022sw_end + # Tail Round 2 + movl 8(%edi),%eax + mull %eax + movl %eax,16(%esi) + decl %ebx + movl %edx,20(%esi) + jz L022sw_end + # Tail Round 3 + movl 12(%edi),%eax + mull %eax + movl %eax,24(%esi) + decl %ebx + movl %edx,28(%esi) + jz L022sw_end + # Tail Round 4 + movl 16(%edi),%eax + mull %eax + movl %eax,32(%esi) + decl %ebx + movl %edx,36(%esi) + jz L022sw_end + # Tail Round 5 + movl 20(%edi),%eax + mull %eax + movl %eax,40(%esi) + decl %ebx + movl %edx,44(%esi) + jz L022sw_end + # Tail Round 6 + movl 24(%edi),%eax + mull %eax + movl %eax,48(%esi) + movl %edx,52(%esi) +L022sw_end: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _bn_div_words +.private_extern _bn_div_words +.align 4 +_bn_div_words: +L_bn_div_words_begin: + movl 4(%esp),%edx + movl 8(%esp),%eax + movl 12(%esp),%ecx + divl %ecx + ret +.globl _bn_add_words +.private_extern _bn_add_words +.align 4 +_bn_add_words: +L_bn_add_words_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + + movl 20(%esp),%ebx + movl 24(%esp),%esi + movl 28(%esp),%edi + movl 32(%esp),%ebp + xorl %eax,%eax + andl $4294967288,%ebp + jz L023aw_finish +L024aw_loop: + # Round 0 + movl (%esi),%ecx + movl (%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + # Round 1 + movl 4(%esi),%ecx + movl 4(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,4(%ebx) + # Round 2 + movl 8(%esi),%ecx + movl 8(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,8(%ebx) + # Round 3 + movl 12(%esi),%ecx + movl 12(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,12(%ebx) + # Round 4 + movl 16(%esi),%ecx + movl 16(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,16(%ebx) + # Round 5 + movl 20(%esi),%ecx + movl 20(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,20(%ebx) + # Round 6 + movl 24(%esi),%ecx + movl 24(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,24(%ebx) + # Round 7 + movl 28(%esi),%ecx + movl 28(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,28(%ebx) + + addl $32,%esi + addl $32,%edi + addl $32,%ebx + subl $8,%ebp + jnz L024aw_loop +L023aw_finish: + movl 32(%esp),%ebp + andl $7,%ebp + jz L025aw_end + # Tail Round 0 + movl (%esi),%ecx + movl (%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,(%ebx) + jz L025aw_end + # Tail Round 1 + movl 4(%esi),%ecx + movl 4(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,4(%ebx) + jz L025aw_end + # Tail Round 2 + movl 8(%esi),%ecx + movl 8(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,8(%ebx) + jz L025aw_end + # Tail Round 3 + movl 12(%esi),%ecx + movl 12(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,12(%ebx) + jz L025aw_end + # Tail Round 4 + movl 16(%esi),%ecx + movl 16(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,16(%ebx) + jz L025aw_end + # Tail Round 5 + movl 20(%esi),%ecx + movl 20(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,20(%ebx) + jz L025aw_end + # Tail Round 6 + movl 24(%esi),%ecx + movl 24(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,24(%ebx) +L025aw_end: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _bn_sub_words +.private_extern _bn_sub_words +.align 4 +_bn_sub_words: +L_bn_sub_words_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + + movl 20(%esp),%ebx + movl 24(%esp),%esi + movl 28(%esp),%edi + movl 32(%esp),%ebp + xorl %eax,%eax + andl $4294967288,%ebp + jz L026aw_finish +L027aw_loop: + # Round 0 + movl (%esi),%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + # Round 1 + movl 4(%esi),%ecx + movl 4(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,4(%ebx) + # Round 2 + movl 8(%esi),%ecx + movl 8(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,8(%ebx) + # Round 3 + movl 12(%esi),%ecx + movl 12(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,12(%ebx) + # Round 4 + movl 16(%esi),%ecx + movl 16(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,16(%ebx) + # Round 5 + movl 20(%esi),%ecx + movl 20(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,20(%ebx) + # Round 6 + movl 24(%esi),%ecx + movl 24(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,24(%ebx) + # Round 7 + movl 28(%esi),%ecx + movl 28(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,28(%ebx) + + addl $32,%esi + addl $32,%edi + addl $32,%ebx + subl $8,%ebp + jnz L027aw_loop +L026aw_finish: + movl 32(%esp),%ebp + andl $7,%ebp + jz L028aw_end + # Tail Round 0 + movl (%esi),%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,(%ebx) + jz L028aw_end + # Tail Round 1 + movl 4(%esi),%ecx + movl 4(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,4(%ebx) + jz L028aw_end + # Tail Round 2 + movl 8(%esi),%ecx + movl 8(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,8(%ebx) + jz L028aw_end + # Tail Round 3 + movl 12(%esi),%ecx + movl 12(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,12(%ebx) + jz L028aw_end + # Tail Round 4 + movl 16(%esi),%ecx + movl 16(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,16(%ebx) + jz L028aw_end + # Tail Round 5 + movl 20(%esi),%ecx + movl 20(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,20(%ebx) + jz L028aw_end + # Tail Round 6 + movl 24(%esi),%ecx + movl 24(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,24(%ebx) +L028aw_end: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _bn_sub_part_words +.private_extern _bn_sub_part_words +.align 4 +_bn_sub_part_words: +L_bn_sub_part_words_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + + movl 20(%esp),%ebx + movl 24(%esp),%esi + movl 28(%esp),%edi + movl 32(%esp),%ebp + xorl %eax,%eax + andl $4294967288,%ebp + jz L029aw_finish +L030aw_loop: + # Round 0 + movl (%esi),%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + # Round 1 + movl 4(%esi),%ecx + movl 4(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,4(%ebx) + # Round 2 + movl 8(%esi),%ecx + movl 8(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,8(%ebx) + # Round 3 + movl 12(%esi),%ecx + movl 12(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,12(%ebx) + # Round 4 + movl 16(%esi),%ecx + movl 16(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,16(%ebx) + # Round 5 + movl 20(%esi),%ecx + movl 20(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,20(%ebx) + # Round 6 + movl 24(%esi),%ecx + movl 24(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,24(%ebx) + # Round 7 + movl 28(%esi),%ecx + movl 28(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,28(%ebx) + + addl $32,%esi + addl $32,%edi + addl $32,%ebx + subl $8,%ebp + jnz L030aw_loop +L029aw_finish: + movl 32(%esp),%ebp + andl $7,%ebp + jz L031aw_end + # Tail Round 0 + movl (%esi),%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + addl $4,%esi + addl $4,%edi + addl $4,%ebx + decl %ebp + jz L031aw_end + # Tail Round 1 + movl (%esi),%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + addl $4,%esi + addl $4,%edi + addl $4,%ebx + decl %ebp + jz L031aw_end + # Tail Round 2 + movl (%esi),%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + addl $4,%esi + addl $4,%edi + addl $4,%ebx + decl %ebp + jz L031aw_end + # Tail Round 3 + movl (%esi),%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + addl $4,%esi + addl $4,%edi + addl $4,%ebx + decl %ebp + jz L031aw_end + # Tail Round 4 + movl (%esi),%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + addl $4,%esi + addl $4,%edi + addl $4,%ebx + decl %ebp + jz L031aw_end + # Tail Round 5 + movl (%esi),%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + addl $4,%esi + addl $4,%edi + addl $4,%ebx + decl %ebp + jz L031aw_end + # Tail Round 6 + movl (%esi),%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + addl $4,%esi + addl $4,%edi + addl $4,%ebx +L031aw_end: + cmpl $0,36(%esp) + je L032pw_end + movl 36(%esp),%ebp + cmpl $0,%ebp + je L032pw_end + jge L033pw_pos + # pw_neg + movl $0,%edx + subl %ebp,%edx + movl %edx,%ebp + andl $4294967288,%ebp + jz L034pw_neg_finish +L035pw_neg_loop: + # dl<0 Round 0 + movl $0,%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + # dl<0 Round 1 + movl $0,%ecx + movl 4(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,4(%ebx) + # dl<0 Round 2 + movl $0,%ecx + movl 8(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,8(%ebx) + # dl<0 Round 3 + movl $0,%ecx + movl 12(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,12(%ebx) + # dl<0 Round 4 + movl $0,%ecx + movl 16(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,16(%ebx) + # dl<0 Round 5 + movl $0,%ecx + movl 20(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,20(%ebx) + # dl<0 Round 6 + movl $0,%ecx + movl 24(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,24(%ebx) + # dl<0 Round 7 + movl $0,%ecx + movl 28(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,28(%ebx) + + addl $32,%edi + addl $32,%ebx + subl $8,%ebp + jnz L035pw_neg_loop +L034pw_neg_finish: + movl 36(%esp),%edx + movl $0,%ebp + subl %edx,%ebp + andl $7,%ebp + jz L032pw_end + # dl<0 Tail Round 0 + movl $0,%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,(%ebx) + jz L032pw_end + # dl<0 Tail Round 1 + movl $0,%ecx + movl 4(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,4(%ebx) + jz L032pw_end + # dl<0 Tail Round 2 + movl $0,%ecx + movl 8(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,8(%ebx) + jz L032pw_end + # dl<0 Tail Round 3 + movl $0,%ecx + movl 12(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,12(%ebx) + jz L032pw_end + # dl<0 Tail Round 4 + movl $0,%ecx + movl 16(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,16(%ebx) + jz L032pw_end + # dl<0 Tail Round 5 + movl $0,%ecx + movl 20(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,20(%ebx) + jz L032pw_end + # dl<0 Tail Round 6 + movl $0,%ecx + movl 24(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,24(%ebx) + jmp L032pw_end +L033pw_pos: + andl $4294967288,%ebp + jz L036pw_pos_finish +L037pw_pos_loop: + # dl>0 Round 0 + movl (%esi),%ecx + subl %eax,%ecx + movl %ecx,(%ebx) + jnc L038pw_nc0 + # dl>0 Round 1 + movl 4(%esi),%ecx + subl %eax,%ecx + movl %ecx,4(%ebx) + jnc L039pw_nc1 + # dl>0 Round 2 + movl 8(%esi),%ecx + subl %eax,%ecx + movl %ecx,8(%ebx) + jnc L040pw_nc2 + # dl>0 Round 3 + movl 12(%esi),%ecx + subl %eax,%ecx + movl %ecx,12(%ebx) + jnc L041pw_nc3 + # dl>0 Round 4 + movl 16(%esi),%ecx + subl %eax,%ecx + movl %ecx,16(%ebx) + jnc L042pw_nc4 + # dl>0 Round 5 + movl 20(%esi),%ecx + subl %eax,%ecx + movl %ecx,20(%ebx) + jnc L043pw_nc5 + # dl>0 Round 6 + movl 24(%esi),%ecx + subl %eax,%ecx + movl %ecx,24(%ebx) + jnc L044pw_nc6 + # dl>0 Round 7 + movl 28(%esi),%ecx + subl %eax,%ecx + movl %ecx,28(%ebx) + jnc L045pw_nc7 + + addl $32,%esi + addl $32,%ebx + subl $8,%ebp + jnz L037pw_pos_loop +L036pw_pos_finish: + movl 36(%esp),%ebp + andl $7,%ebp + jz L032pw_end + # dl>0 Tail Round 0 + movl (%esi),%ecx + subl %eax,%ecx + movl %ecx,(%ebx) + jnc L046pw_tail_nc0 + decl %ebp + jz L032pw_end + # dl>0 Tail Round 1 + movl 4(%esi),%ecx + subl %eax,%ecx + movl %ecx,4(%ebx) + jnc L047pw_tail_nc1 + decl %ebp + jz L032pw_end + # dl>0 Tail Round 2 + movl 8(%esi),%ecx + subl %eax,%ecx + movl %ecx,8(%ebx) + jnc L048pw_tail_nc2 + decl %ebp + jz L032pw_end + # dl>0 Tail Round 3 + movl 12(%esi),%ecx + subl %eax,%ecx + movl %ecx,12(%ebx) + jnc L049pw_tail_nc3 + decl %ebp + jz L032pw_end + # dl>0 Tail Round 4 + movl 16(%esi),%ecx + subl %eax,%ecx + movl %ecx,16(%ebx) + jnc L050pw_tail_nc4 + decl %ebp + jz L032pw_end + # dl>0 Tail Round 5 + movl 20(%esi),%ecx + subl %eax,%ecx + movl %ecx,20(%ebx) + jnc L051pw_tail_nc5 + decl %ebp + jz L032pw_end + # dl>0 Tail Round 6 + movl 24(%esi),%ecx + subl %eax,%ecx + movl %ecx,24(%ebx) + jnc L052pw_tail_nc6 + movl $1,%eax + jmp L032pw_end +L053pw_nc_loop: + movl (%esi),%ecx + movl %ecx,(%ebx) +L038pw_nc0: + movl 4(%esi),%ecx + movl %ecx,4(%ebx) +L039pw_nc1: + movl 8(%esi),%ecx + movl %ecx,8(%ebx) +L040pw_nc2: + movl 12(%esi),%ecx + movl %ecx,12(%ebx) +L041pw_nc3: + movl 16(%esi),%ecx + movl %ecx,16(%ebx) +L042pw_nc4: + movl 20(%esi),%ecx + movl %ecx,20(%ebx) +L043pw_nc5: + movl 24(%esi),%ecx + movl %ecx,24(%ebx) +L044pw_nc6: + movl 28(%esi),%ecx + movl %ecx,28(%ebx) +L045pw_nc7: + + addl $32,%esi + addl $32,%ebx + subl $8,%ebp + jnz L053pw_nc_loop + movl 36(%esp),%ebp + andl $7,%ebp + jz L054pw_nc_end + movl (%esi),%ecx + movl %ecx,(%ebx) +L046pw_tail_nc0: + decl %ebp + jz L054pw_nc_end + movl 4(%esi),%ecx + movl %ecx,4(%ebx) +L047pw_tail_nc1: + decl %ebp + jz L054pw_nc_end + movl 8(%esi),%ecx + movl %ecx,8(%ebx) +L048pw_tail_nc2: + decl %ebp + jz L054pw_nc_end + movl 12(%esi),%ecx + movl %ecx,12(%ebx) +L049pw_tail_nc3: + decl %ebp + jz L054pw_nc_end + movl 16(%esi),%ecx + movl %ecx,16(%ebx) +L050pw_tail_nc4: + decl %ebp + jz L054pw_nc_end + movl 20(%esi),%ecx + movl %ecx,20(%ebx) +L051pw_tail_nc5: + decl %ebp + jz L054pw_nc_end + movl 24(%esi),%ecx + movl %ecx,24(%ebx) +L052pw_tail_nc6: +L054pw_nc_end: + movl $0,%eax +L032pw_end: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.section __IMPORT,__pointers,non_lazy_symbol_pointers +L_OPENSSL_ia32cap_P$non_lazy_ptr: +.indirect_symbol _OPENSSL_ia32cap_P +.long 0 +#endif diff --git a/third_party/boringssl/mac-x86/crypto/bn/co-586.S b/third_party/boringssl/mac-x86/crypto/bn/co-586.S new file mode 100644 index 00000000000..7ce8e794250 --- /dev/null +++ b/third_party/boringssl/mac-x86/crypto/bn/co-586.S @@ -0,0 +1,1252 @@ +#if defined(__i386__) +.file "src/crypto/bn/asm/co-586.S" +.text +.globl _bn_mul_comba8 +.private_extern _bn_mul_comba8 +.align 4 +_bn_mul_comba8: +L_bn_mul_comba8_begin: + pushl %esi + movl 12(%esp),%esi + pushl %edi + movl 20(%esp),%edi + pushl %ebp + pushl %ebx + xorl %ebx,%ebx + movl (%esi),%eax + xorl %ecx,%ecx + movl (%edi),%edx + # ################## Calculate word 0 + xorl %ebp,%ebp + # mul a[0]*b[0] + mull %edx + addl %eax,%ebx + movl 20(%esp),%eax + adcl %edx,%ecx + movl (%edi),%edx + adcl $0,%ebp + movl %ebx,(%eax) + movl 4(%esi),%eax + # saved r[0] + # ################## Calculate word 1 + xorl %ebx,%ebx + # mul a[1]*b[0] + mull %edx + addl %eax,%ecx + movl (%esi),%eax + adcl %edx,%ebp + movl 4(%edi),%edx + adcl $0,%ebx + # mul a[0]*b[1] + mull %edx + addl %eax,%ecx + movl 20(%esp),%eax + adcl %edx,%ebp + movl (%edi),%edx + adcl $0,%ebx + movl %ecx,4(%eax) + movl 8(%esi),%eax + # saved r[1] + # ################## Calculate word 2 + xorl %ecx,%ecx + # mul a[2]*b[0] + mull %edx + addl %eax,%ebp + movl 4(%esi),%eax + adcl %edx,%ebx + movl 4(%edi),%edx + adcl $0,%ecx + # mul a[1]*b[1] + mull %edx + addl %eax,%ebp + movl (%esi),%eax + adcl %edx,%ebx + movl 8(%edi),%edx + adcl $0,%ecx + # mul a[0]*b[2] + mull %edx + addl %eax,%ebp + movl 20(%esp),%eax + adcl %edx,%ebx + movl (%edi),%edx + adcl $0,%ecx + movl %ebp,8(%eax) + movl 12(%esi),%eax + # saved r[2] + # ################## Calculate word 3 + xorl %ebp,%ebp + # mul a[3]*b[0] + mull %edx + addl %eax,%ebx + movl 8(%esi),%eax + adcl %edx,%ecx + movl 4(%edi),%edx + adcl $0,%ebp + # mul a[2]*b[1] + mull %edx + addl %eax,%ebx + movl 4(%esi),%eax + adcl %edx,%ecx + movl 8(%edi),%edx + adcl $0,%ebp + # mul a[1]*b[2] + mull %edx + addl %eax,%ebx + movl (%esi),%eax + adcl %edx,%ecx + movl 12(%edi),%edx + adcl $0,%ebp + # mul a[0]*b[3] + mull %edx + addl %eax,%ebx + movl 20(%esp),%eax + adcl %edx,%ecx + movl (%edi),%edx + adcl $0,%ebp + movl %ebx,12(%eax) + movl 16(%esi),%eax + # saved r[3] + # ################## Calculate word 4 + xorl %ebx,%ebx + # mul a[4]*b[0] + mull %edx + addl %eax,%ecx + movl 12(%esi),%eax + adcl %edx,%ebp + movl 4(%edi),%edx + adcl $0,%ebx + # mul a[3]*b[1] + mull %edx + addl %eax,%ecx + movl 8(%esi),%eax + adcl %edx,%ebp + movl 8(%edi),%edx + adcl $0,%ebx + # mul a[2]*b[2] + mull %edx + addl %eax,%ecx + movl 4(%esi),%eax + adcl %edx,%ebp + movl 12(%edi),%edx + adcl $0,%ebx + # mul a[1]*b[3] + mull %edx + addl %eax,%ecx + movl (%esi),%eax + adcl %edx,%ebp + movl 16(%edi),%edx + adcl $0,%ebx + # mul a[0]*b[4] + mull %edx + addl %eax,%ecx + movl 20(%esp),%eax + adcl %edx,%ebp + movl (%edi),%edx + adcl $0,%ebx + movl %ecx,16(%eax) + movl 20(%esi),%eax + # saved r[4] + # ################## Calculate word 5 + xorl %ecx,%ecx + # mul a[5]*b[0] + mull %edx + addl %eax,%ebp + movl 16(%esi),%eax + adcl %edx,%ebx + movl 4(%edi),%edx + adcl $0,%ecx + # mul a[4]*b[1] + mull %edx + addl %eax,%ebp + movl 12(%esi),%eax + adcl %edx,%ebx + movl 8(%edi),%edx + adcl $0,%ecx + # mul a[3]*b[2] + mull %edx + addl %eax,%ebp + movl 8(%esi),%eax + adcl %edx,%ebx + movl 12(%edi),%edx + adcl $0,%ecx + # mul a[2]*b[3] + mull %edx + addl %eax,%ebp + movl 4(%esi),%eax + adcl %edx,%ebx + movl 16(%edi),%edx + adcl $0,%ecx + # mul a[1]*b[4] + mull %edx + addl %eax,%ebp + movl (%esi),%eax + adcl %edx,%ebx + movl 20(%edi),%edx + adcl $0,%ecx + # mul a[0]*b[5] + mull %edx + addl %eax,%ebp + movl 20(%esp),%eax + adcl %edx,%ebx + movl (%edi),%edx + adcl $0,%ecx + movl %ebp,20(%eax) + movl 24(%esi),%eax + # saved r[5] + # ################## Calculate word 6 + xorl %ebp,%ebp + # mul a[6]*b[0] + mull %edx + addl %eax,%ebx + movl 20(%esi),%eax + adcl %edx,%ecx + movl 4(%edi),%edx + adcl $0,%ebp + # mul a[5]*b[1] + mull %edx + addl %eax,%ebx + movl 16(%esi),%eax + adcl %edx,%ecx + movl 8(%edi),%edx + adcl $0,%ebp + # mul a[4]*b[2] + mull %edx + addl %eax,%ebx + movl 12(%esi),%eax + adcl %edx,%ecx + movl 12(%edi),%edx + adcl $0,%ebp + # mul a[3]*b[3] + mull %edx + addl %eax,%ebx + movl 8(%esi),%eax + adcl %edx,%ecx + movl 16(%edi),%edx + adcl $0,%ebp + # mul a[2]*b[4] + mull %edx + addl %eax,%ebx + movl 4(%esi),%eax + adcl %edx,%ecx + movl 20(%edi),%edx + adcl $0,%ebp + # mul a[1]*b[5] + mull %edx + addl %eax,%ebx + movl (%esi),%eax + adcl %edx,%ecx + movl 24(%edi),%edx + adcl $0,%ebp + # mul a[0]*b[6] + mull %edx + addl %eax,%ebx + movl 20(%esp),%eax + adcl %edx,%ecx + movl (%edi),%edx + adcl $0,%ebp + movl %ebx,24(%eax) + movl 28(%esi),%eax + # saved r[6] + # ################## Calculate word 7 + xorl %ebx,%ebx + # mul a[7]*b[0] + mull %edx + addl %eax,%ecx + movl 24(%esi),%eax + adcl %edx,%ebp + movl 4(%edi),%edx + adcl $0,%ebx + # mul a[6]*b[1] + mull %edx + addl %eax,%ecx + movl 20(%esi),%eax + adcl %edx,%ebp + movl 8(%edi),%edx + adcl $0,%ebx + # mul a[5]*b[2] + mull %edx + addl %eax,%ecx + movl 16(%esi),%eax + adcl %edx,%ebp + movl 12(%edi),%edx + adcl $0,%ebx + # mul a[4]*b[3] + mull %edx + addl %eax,%ecx + movl 12(%esi),%eax + adcl %edx,%ebp + movl 16(%edi),%edx + adcl $0,%ebx + # mul a[3]*b[4] + mull %edx + addl %eax,%ecx + movl 8(%esi),%eax + adcl %edx,%ebp + movl 20(%edi),%edx + adcl $0,%ebx + # mul a[2]*b[5] + mull %edx + addl %eax,%ecx + movl 4(%esi),%eax + adcl %edx,%ebp + movl 24(%edi),%edx + adcl $0,%ebx + # mul a[1]*b[6] + mull %edx + addl %eax,%ecx + movl (%esi),%eax + adcl %edx,%ebp + movl 28(%edi),%edx + adcl $0,%ebx + # mul a[0]*b[7] + mull %edx + addl %eax,%ecx + movl 20(%esp),%eax + adcl %edx,%ebp + movl 4(%edi),%edx + adcl $0,%ebx + movl %ecx,28(%eax) + movl 28(%esi),%eax + # saved r[7] + # ################## Calculate word 8 + xorl %ecx,%ecx + # mul a[7]*b[1] + mull %edx + addl %eax,%ebp + movl 24(%esi),%eax + adcl %edx,%ebx + movl 8(%edi),%edx + adcl $0,%ecx + # mul a[6]*b[2] + mull %edx + addl %eax,%ebp + movl 20(%esi),%eax + adcl %edx,%ebx + movl 12(%edi),%edx + adcl $0,%ecx + # mul a[5]*b[3] + mull %edx + addl %eax,%ebp + movl 16(%esi),%eax + adcl %edx,%ebx + movl 16(%edi),%edx + adcl $0,%ecx + # mul a[4]*b[4] + mull %edx + addl %eax,%ebp + movl 12(%esi),%eax + adcl %edx,%ebx + movl 20(%edi),%edx + adcl $0,%ecx + # mul a[3]*b[5] + mull %edx + addl %eax,%ebp + movl 8(%esi),%eax + adcl %edx,%ebx + movl 24(%edi),%edx + adcl $0,%ecx + # mul a[2]*b[6] + mull %edx + addl %eax,%ebp + movl 4(%esi),%eax + adcl %edx,%ebx + movl 28(%edi),%edx + adcl $0,%ecx + # mul a[1]*b[7] + mull %edx + addl %eax,%ebp + movl 20(%esp),%eax + adcl %edx,%ebx + movl 8(%edi),%edx + adcl $0,%ecx + movl %ebp,32(%eax) + movl 28(%esi),%eax + # saved r[8] + # ################## Calculate word 9 + xorl %ebp,%ebp + # mul a[7]*b[2] + mull %edx + addl %eax,%ebx + movl 24(%esi),%eax + adcl %edx,%ecx + movl 12(%edi),%edx + adcl $0,%ebp + # mul a[6]*b[3] + mull %edx + addl %eax,%ebx + movl 20(%esi),%eax + adcl %edx,%ecx + movl 16(%edi),%edx + adcl $0,%ebp + # mul a[5]*b[4] + mull %edx + addl %eax,%ebx + movl 16(%esi),%eax + adcl %edx,%ecx + movl 20(%edi),%edx + adcl $0,%ebp + # mul a[4]*b[5] + mull %edx + addl %eax,%ebx + movl 12(%esi),%eax + adcl %edx,%ecx + movl 24(%edi),%edx + adcl $0,%ebp + # mul a[3]*b[6] + mull %edx + addl %eax,%ebx + movl 8(%esi),%eax + adcl %edx,%ecx + movl 28(%edi),%edx + adcl $0,%ebp + # mul a[2]*b[7] + mull %edx + addl %eax,%ebx + movl 20(%esp),%eax + adcl %edx,%ecx + movl 12(%edi),%edx + adcl $0,%ebp + movl %ebx,36(%eax) + movl 28(%esi),%eax + # saved r[9] + # ################## Calculate word 10 + xorl %ebx,%ebx + # mul a[7]*b[3] + mull %edx + addl %eax,%ecx + movl 24(%esi),%eax + adcl %edx,%ebp + movl 16(%edi),%edx + adcl $0,%ebx + # mul a[6]*b[4] + mull %edx + addl %eax,%ecx + movl 20(%esi),%eax + adcl %edx,%ebp + movl 20(%edi),%edx + adcl $0,%ebx + # mul a[5]*b[5] + mull %edx + addl %eax,%ecx + movl 16(%esi),%eax + adcl %edx,%ebp + movl 24(%edi),%edx + adcl $0,%ebx + # mul a[4]*b[6] + mull %edx + addl %eax,%ecx + movl 12(%esi),%eax + adcl %edx,%ebp + movl 28(%edi),%edx + adcl $0,%ebx + # mul a[3]*b[7] + mull %edx + addl %eax,%ecx + movl 20(%esp),%eax + adcl %edx,%ebp + movl 16(%edi),%edx + adcl $0,%ebx + movl %ecx,40(%eax) + movl 28(%esi),%eax + # saved r[10] + # ################## Calculate word 11 + xorl %ecx,%ecx + # mul a[7]*b[4] + mull %edx + addl %eax,%ebp + movl 24(%esi),%eax + adcl %edx,%ebx + movl 20(%edi),%edx + adcl $0,%ecx + # mul a[6]*b[5] + mull %edx + addl %eax,%ebp + movl 20(%esi),%eax + adcl %edx,%ebx + movl 24(%edi),%edx + adcl $0,%ecx + # mul a[5]*b[6] + mull %edx + addl %eax,%ebp + movl 16(%esi),%eax + adcl %edx,%ebx + movl 28(%edi),%edx + adcl $0,%ecx + # mul a[4]*b[7] + mull %edx + addl %eax,%ebp + movl 20(%esp),%eax + adcl %edx,%ebx + movl 20(%edi),%edx + adcl $0,%ecx + movl %ebp,44(%eax) + movl 28(%esi),%eax + # saved r[11] + # ################## Calculate word 12 + xorl %ebp,%ebp + # mul a[7]*b[5] + mull %edx + addl %eax,%ebx + movl 24(%esi),%eax + adcl %edx,%ecx + movl 24(%edi),%edx + adcl $0,%ebp + # mul a[6]*b[6] + mull %edx + addl %eax,%ebx + movl 20(%esi),%eax + adcl %edx,%ecx + movl 28(%edi),%edx + adcl $0,%ebp + # mul a[5]*b[7] + mull %edx + addl %eax,%ebx + movl 20(%esp),%eax + adcl %edx,%ecx + movl 24(%edi),%edx + adcl $0,%ebp + movl %ebx,48(%eax) + movl 28(%esi),%eax + # saved r[12] + # ################## Calculate word 13 + xorl %ebx,%ebx + # mul a[7]*b[6] + mull %edx + addl %eax,%ecx + movl 24(%esi),%eax + adcl %edx,%ebp + movl 28(%edi),%edx + adcl $0,%ebx + # mul a[6]*b[7] + mull %edx + addl %eax,%ecx + movl 20(%esp),%eax + adcl %edx,%ebp + movl 28(%edi),%edx + adcl $0,%ebx + movl %ecx,52(%eax) + movl 28(%esi),%eax + # saved r[13] + # ################## Calculate word 14 + xorl %ecx,%ecx + # mul a[7]*b[7] + mull %edx + addl %eax,%ebp + movl 20(%esp),%eax + adcl %edx,%ebx + adcl $0,%ecx + movl %ebp,56(%eax) + # saved r[14] + # save r[15] + movl %ebx,60(%eax) + popl %ebx + popl %ebp + popl %edi + popl %esi + ret +.globl _bn_mul_comba4 +.private_extern _bn_mul_comba4 +.align 4 +_bn_mul_comba4: +L_bn_mul_comba4_begin: + pushl %esi + movl 12(%esp),%esi + pushl %edi + movl 20(%esp),%edi + pushl %ebp + pushl %ebx + xorl %ebx,%ebx + movl (%esi),%eax + xorl %ecx,%ecx + movl (%edi),%edx + # ################## Calculate word 0 + xorl %ebp,%ebp + # mul a[0]*b[0] + mull %edx + addl %eax,%ebx + movl 20(%esp),%eax + adcl %edx,%ecx + movl (%edi),%edx + adcl $0,%ebp + movl %ebx,(%eax) + movl 4(%esi),%eax + # saved r[0] + # ################## Calculate word 1 + xorl %ebx,%ebx + # mul a[1]*b[0] + mull %edx + addl %eax,%ecx + movl (%esi),%eax + adcl %edx,%ebp + movl 4(%edi),%edx + adcl $0,%ebx + # mul a[0]*b[1] + mull %edx + addl %eax,%ecx + movl 20(%esp),%eax + adcl %edx,%ebp + movl (%edi),%edx + adcl $0,%ebx + movl %ecx,4(%eax) + movl 8(%esi),%eax + # saved r[1] + # ################## Calculate word 2 + xorl %ecx,%ecx + # mul a[2]*b[0] + mull %edx + addl %eax,%ebp + movl 4(%esi),%eax + adcl %edx,%ebx + movl 4(%edi),%edx + adcl $0,%ecx + # mul a[1]*b[1] + mull %edx + addl %eax,%ebp + movl (%esi),%eax + adcl %edx,%ebx + movl 8(%edi),%edx + adcl $0,%ecx + # mul a[0]*b[2] + mull %edx + addl %eax,%ebp + movl 20(%esp),%eax + adcl %edx,%ebx + movl (%edi),%edx + adcl $0,%ecx + movl %ebp,8(%eax) + movl 12(%esi),%eax + # saved r[2] + # ################## Calculate word 3 + xorl %ebp,%ebp + # mul a[3]*b[0] + mull %edx + addl %eax,%ebx + movl 8(%esi),%eax + adcl %edx,%ecx + movl 4(%edi),%edx + adcl $0,%ebp + # mul a[2]*b[1] + mull %edx + addl %eax,%ebx + movl 4(%esi),%eax + adcl %edx,%ecx + movl 8(%edi),%edx + adcl $0,%ebp + # mul a[1]*b[2] + mull %edx + addl %eax,%ebx + movl (%esi),%eax + adcl %edx,%ecx + movl 12(%edi),%edx + adcl $0,%ebp + # mul a[0]*b[3] + mull %edx + addl %eax,%ebx + movl 20(%esp),%eax + adcl %edx,%ecx + movl 4(%edi),%edx + adcl $0,%ebp + movl %ebx,12(%eax) + movl 12(%esi),%eax + # saved r[3] + # ################## Calculate word 4 + xorl %ebx,%ebx + # mul a[3]*b[1] + mull %edx + addl %eax,%ecx + movl 8(%esi),%eax + adcl %edx,%ebp + movl 8(%edi),%edx + adcl $0,%ebx + # mul a[2]*b[2] + mull %edx + addl %eax,%ecx + movl 4(%esi),%eax + adcl %edx,%ebp + movl 12(%edi),%edx + adcl $0,%ebx + # mul a[1]*b[3] + mull %edx + addl %eax,%ecx + movl 20(%esp),%eax + adcl %edx,%ebp + movl 8(%edi),%edx + adcl $0,%ebx + movl %ecx,16(%eax) + movl 12(%esi),%eax + # saved r[4] + # ################## Calculate word 5 + xorl %ecx,%ecx + # mul a[3]*b[2] + mull %edx + addl %eax,%ebp + movl 8(%esi),%eax + adcl %edx,%ebx + movl 12(%edi),%edx + adcl $0,%ecx + # mul a[2]*b[3] + mull %edx + addl %eax,%ebp + movl 20(%esp),%eax + adcl %edx,%ebx + movl 12(%edi),%edx + adcl $0,%ecx + movl %ebp,20(%eax) + movl 12(%esi),%eax + # saved r[5] + # ################## Calculate word 6 + xorl %ebp,%ebp + # mul a[3]*b[3] + mull %edx + addl %eax,%ebx + movl 20(%esp),%eax + adcl %edx,%ecx + adcl $0,%ebp + movl %ebx,24(%eax) + # saved r[6] + # save r[7] + movl %ecx,28(%eax) + popl %ebx + popl %ebp + popl %edi + popl %esi + ret +.globl _bn_sqr_comba8 +.private_extern _bn_sqr_comba8 +.align 4 +_bn_sqr_comba8: +L_bn_sqr_comba8_begin: + pushl %esi + pushl %edi + pushl %ebp + pushl %ebx + movl 20(%esp),%edi + movl 24(%esp),%esi + xorl %ebx,%ebx + xorl %ecx,%ecx + movl (%esi),%eax + # ############### Calculate word 0 + xorl %ebp,%ebp + # sqr a[0]*a[0] + mull %eax + addl %eax,%ebx + adcl %edx,%ecx + movl (%esi),%edx + adcl $0,%ebp + movl %ebx,(%edi) + movl 4(%esi),%eax + # saved r[0] + # ############### Calculate word 1 + xorl %ebx,%ebx + # sqr a[1]*a[0] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 8(%esi),%eax + adcl $0,%ebx + movl %ecx,4(%edi) + movl (%esi),%edx + # saved r[1] + # ############### Calculate word 2 + xorl %ecx,%ecx + # sqr a[2]*a[0] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 4(%esi),%eax + adcl $0,%ecx + # sqr a[1]*a[1] + mull %eax + addl %eax,%ebp + adcl %edx,%ebx + movl (%esi),%edx + adcl $0,%ecx + movl %ebp,8(%edi) + movl 12(%esi),%eax + # saved r[2] + # ############### Calculate word 3 + xorl %ebp,%ebp + # sqr a[3]*a[0] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 8(%esi),%eax + adcl $0,%ebp + movl 4(%esi),%edx + # sqr a[2]*a[1] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 16(%esi),%eax + adcl $0,%ebp + movl %ebx,12(%edi) + movl (%esi),%edx + # saved r[3] + # ############### Calculate word 4 + xorl %ebx,%ebx + # sqr a[4]*a[0] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 12(%esi),%eax + adcl $0,%ebx + movl 4(%esi),%edx + # sqr a[3]*a[1] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 8(%esi),%eax + adcl $0,%ebx + # sqr a[2]*a[2] + mull %eax + addl %eax,%ecx + adcl %edx,%ebp + movl (%esi),%edx + adcl $0,%ebx + movl %ecx,16(%edi) + movl 20(%esi),%eax + # saved r[4] + # ############### Calculate word 5 + xorl %ecx,%ecx + # sqr a[5]*a[0] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 16(%esi),%eax + adcl $0,%ecx + movl 4(%esi),%edx + # sqr a[4]*a[1] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 12(%esi),%eax + adcl $0,%ecx + movl 8(%esi),%edx + # sqr a[3]*a[2] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 24(%esi),%eax + adcl $0,%ecx + movl %ebp,20(%edi) + movl (%esi),%edx + # saved r[5] + # ############### Calculate word 6 + xorl %ebp,%ebp + # sqr a[6]*a[0] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 20(%esi),%eax + adcl $0,%ebp + movl 4(%esi),%edx + # sqr a[5]*a[1] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 16(%esi),%eax + adcl $0,%ebp + movl 8(%esi),%edx + # sqr a[4]*a[2] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 12(%esi),%eax + adcl $0,%ebp + # sqr a[3]*a[3] + mull %eax + addl %eax,%ebx + adcl %edx,%ecx + movl (%esi),%edx + adcl $0,%ebp + movl %ebx,24(%edi) + movl 28(%esi),%eax + # saved r[6] + # ############### Calculate word 7 + xorl %ebx,%ebx + # sqr a[7]*a[0] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 24(%esi),%eax + adcl $0,%ebx + movl 4(%esi),%edx + # sqr a[6]*a[1] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 20(%esi),%eax + adcl $0,%ebx + movl 8(%esi),%edx + # sqr a[5]*a[2] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 16(%esi),%eax + adcl $0,%ebx + movl 12(%esi),%edx + # sqr a[4]*a[3] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 28(%esi),%eax + adcl $0,%ebx + movl %ecx,28(%edi) + movl 4(%esi),%edx + # saved r[7] + # ############### Calculate word 8 + xorl %ecx,%ecx + # sqr a[7]*a[1] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 24(%esi),%eax + adcl $0,%ecx + movl 8(%esi),%edx + # sqr a[6]*a[2] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 20(%esi),%eax + adcl $0,%ecx + movl 12(%esi),%edx + # sqr a[5]*a[3] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 16(%esi),%eax + adcl $0,%ecx + # sqr a[4]*a[4] + mull %eax + addl %eax,%ebp + adcl %edx,%ebx + movl 8(%esi),%edx + adcl $0,%ecx + movl %ebp,32(%edi) + movl 28(%esi),%eax + # saved r[8] + # ############### Calculate word 9 + xorl %ebp,%ebp + # sqr a[7]*a[2] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 24(%esi),%eax + adcl $0,%ebp + movl 12(%esi),%edx + # sqr a[6]*a[3] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 20(%esi),%eax + adcl $0,%ebp + movl 16(%esi),%edx + # sqr a[5]*a[4] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 28(%esi),%eax + adcl $0,%ebp + movl %ebx,36(%edi) + movl 12(%esi),%edx + # saved r[9] + # ############### Calculate word 10 + xorl %ebx,%ebx + # sqr a[7]*a[3] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 24(%esi),%eax + adcl $0,%ebx + movl 16(%esi),%edx + # sqr a[6]*a[4] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 20(%esi),%eax + adcl $0,%ebx + # sqr a[5]*a[5] + mull %eax + addl %eax,%ecx + adcl %edx,%ebp + movl 16(%esi),%edx + adcl $0,%ebx + movl %ecx,40(%edi) + movl 28(%esi),%eax + # saved r[10] + # ############### Calculate word 11 + xorl %ecx,%ecx + # sqr a[7]*a[4] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 24(%esi),%eax + adcl $0,%ecx + movl 20(%esi),%edx + # sqr a[6]*a[5] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 28(%esi),%eax + adcl $0,%ecx + movl %ebp,44(%edi) + movl 20(%esi),%edx + # saved r[11] + # ############### Calculate word 12 + xorl %ebp,%ebp + # sqr a[7]*a[5] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 24(%esi),%eax + adcl $0,%ebp + # sqr a[6]*a[6] + mull %eax + addl %eax,%ebx + adcl %edx,%ecx + movl 24(%esi),%edx + adcl $0,%ebp + movl %ebx,48(%edi) + movl 28(%esi),%eax + # saved r[12] + # ############### Calculate word 13 + xorl %ebx,%ebx + # sqr a[7]*a[6] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 28(%esi),%eax + adcl $0,%ebx + movl %ecx,52(%edi) + # saved r[13] + # ############### Calculate word 14 + xorl %ecx,%ecx + # sqr a[7]*a[7] + mull %eax + addl %eax,%ebp + adcl %edx,%ebx + adcl $0,%ecx + movl %ebp,56(%edi) + # saved r[14] + movl %ebx,60(%edi) + popl %ebx + popl %ebp + popl %edi + popl %esi + ret +.globl _bn_sqr_comba4 +.private_extern _bn_sqr_comba4 +.align 4 +_bn_sqr_comba4: +L_bn_sqr_comba4_begin: + pushl %esi + pushl %edi + pushl %ebp + pushl %ebx + movl 20(%esp),%edi + movl 24(%esp),%esi + xorl %ebx,%ebx + xorl %ecx,%ecx + movl (%esi),%eax + # ############### Calculate word 0 + xorl %ebp,%ebp + # sqr a[0]*a[0] + mull %eax + addl %eax,%ebx + adcl %edx,%ecx + movl (%esi),%edx + adcl $0,%ebp + movl %ebx,(%edi) + movl 4(%esi),%eax + # saved r[0] + # ############### Calculate word 1 + xorl %ebx,%ebx + # sqr a[1]*a[0] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 8(%esi),%eax + adcl $0,%ebx + movl %ecx,4(%edi) + movl (%esi),%edx + # saved r[1] + # ############### Calculate word 2 + xorl %ecx,%ecx + # sqr a[2]*a[0] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 4(%esi),%eax + adcl $0,%ecx + # sqr a[1]*a[1] + mull %eax + addl %eax,%ebp + adcl %edx,%ebx + movl (%esi),%edx + adcl $0,%ecx + movl %ebp,8(%edi) + movl 12(%esi),%eax + # saved r[2] + # ############### Calculate word 3 + xorl %ebp,%ebp + # sqr a[3]*a[0] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 8(%esi),%eax + adcl $0,%ebp + movl 4(%esi),%edx + # sqr a[2]*a[1] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 12(%esi),%eax + adcl $0,%ebp + movl %ebx,12(%edi) + movl 4(%esi),%edx + # saved r[3] + # ############### Calculate word 4 + xorl %ebx,%ebx + # sqr a[3]*a[1] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 8(%esi),%eax + adcl $0,%ebx + # sqr a[2]*a[2] + mull %eax + addl %eax,%ecx + adcl %edx,%ebp + movl 8(%esi),%edx + adcl $0,%ebx + movl %ecx,16(%edi) + movl 12(%esi),%eax + # saved r[4] + # ############### Calculate word 5 + xorl %ecx,%ecx + # sqr a[3]*a[2] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 12(%esi),%eax + adcl $0,%ecx + movl %ebp,20(%edi) + # saved r[5] + # ############### Calculate word 6 + xorl %ebp,%ebp + # sqr a[3]*a[3] + mull %eax + addl %eax,%ebx + adcl %edx,%ecx + adcl $0,%ebp + movl %ebx,24(%edi) + # saved r[6] + movl %ecx,28(%edi) + popl %ebx + popl %ebp + popl %edi + popl %esi + ret +#endif diff --git a/third_party/boringssl/mac-x86/crypto/bn/x86-mont.S b/third_party/boringssl/mac-x86/crypto/bn/x86-mont.S new file mode 100644 index 00000000000..234034b0a08 --- /dev/null +++ b/third_party/boringssl/mac-x86/crypto/bn/x86-mont.S @@ -0,0 +1,462 @@ +#if defined(__i386__) +.file "src/crypto/bn/asm/x86-mont.S" +.text +.globl _bn_mul_mont +.private_extern _bn_mul_mont +.align 4 +_bn_mul_mont: +L_bn_mul_mont_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + xorl %eax,%eax + movl 40(%esp),%edi + cmpl $4,%edi + jl L000just_leave + leal 20(%esp),%esi + leal 24(%esp),%edx + movl %esp,%ebp + addl $2,%edi + negl %edi + leal -32(%esp,%edi,4),%esp + negl %edi + movl %esp,%eax + subl %edx,%eax + andl $2047,%eax + subl %eax,%esp + xorl %esp,%edx + andl $2048,%edx + xorl $2048,%edx + subl %edx,%esp + andl $-64,%esp + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + movl 16(%esi),%esi + movl (%esi),%esi + movl %eax,4(%esp) + movl %ebx,8(%esp) + movl %ecx,12(%esp) + movl %edx,16(%esp) + movl %esi,20(%esp) + leal -3(%edi),%ebx + movl %ebp,24(%esp) + call L001PIC_me_up +L001PIC_me_up: + popl %eax + movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L001PIC_me_up(%eax),%eax + btl $26,(%eax) + jnc L002non_sse2 + movl $-1,%eax + movd %eax,%mm7 + movl 8(%esp),%esi + movl 12(%esp),%edi + movl 16(%esp),%ebp + xorl %edx,%edx + xorl %ecx,%ecx + movd (%edi),%mm4 + movd (%esi),%mm5 + movd (%ebp),%mm3 + pmuludq %mm4,%mm5 + movq %mm5,%mm2 + movq %mm5,%mm0 + pand %mm7,%mm0 + pmuludq 20(%esp),%mm5 + pmuludq %mm5,%mm3 + paddq %mm0,%mm3 + movd 4(%ebp),%mm1 + movd 4(%esi),%mm0 + psrlq $32,%mm2 + psrlq $32,%mm3 + incl %ecx +.align 4,0x90 +L0031st: + pmuludq %mm4,%mm0 + pmuludq %mm5,%mm1 + paddq %mm0,%mm2 + paddq %mm1,%mm3 + movq %mm2,%mm0 + pand %mm7,%mm0 + movd 4(%ebp,%ecx,4),%mm1 + paddq %mm0,%mm3 + movd 4(%esi,%ecx,4),%mm0 + psrlq $32,%mm2 + movd %mm3,28(%esp,%ecx,4) + psrlq $32,%mm3 + leal 1(%ecx),%ecx + cmpl %ebx,%ecx + jl L0031st + pmuludq %mm4,%mm0 + pmuludq %mm5,%mm1 + paddq %mm0,%mm2 + paddq %mm1,%mm3 + movq %mm2,%mm0 + pand %mm7,%mm0 + paddq %mm0,%mm3 + movd %mm3,28(%esp,%ecx,4) + psrlq $32,%mm2 + psrlq $32,%mm3 + paddq %mm2,%mm3 + movq %mm3,32(%esp,%ebx,4) + incl %edx +L004outer: + xorl %ecx,%ecx + movd (%edi,%edx,4),%mm4 + movd (%esi),%mm5 + movd 32(%esp),%mm6 + movd (%ebp),%mm3 + pmuludq %mm4,%mm5 + paddq %mm6,%mm5 + movq %mm5,%mm0 + movq %mm5,%mm2 + pand %mm7,%mm0 + pmuludq 20(%esp),%mm5 + pmuludq %mm5,%mm3 + paddq %mm0,%mm3 + movd 36(%esp),%mm6 + movd 4(%ebp),%mm1 + movd 4(%esi),%mm0 + psrlq $32,%mm2 + psrlq $32,%mm3 + paddq %mm6,%mm2 + incl %ecx + decl %ebx +L005inner: + pmuludq %mm4,%mm0 + pmuludq %mm5,%mm1 + paddq %mm0,%mm2 + paddq %mm1,%mm3 + movq %mm2,%mm0 + movd 36(%esp,%ecx,4),%mm6 + pand %mm7,%mm0 + movd 4(%ebp,%ecx,4),%mm1 + paddq %mm0,%mm3 + movd 4(%esi,%ecx,4),%mm0 + psrlq $32,%mm2 + movd %mm3,28(%esp,%ecx,4) + psrlq $32,%mm3 + paddq %mm6,%mm2 + decl %ebx + leal 1(%ecx),%ecx + jnz L005inner + movl %ecx,%ebx + pmuludq %mm4,%mm0 + pmuludq %mm5,%mm1 + paddq %mm0,%mm2 + paddq %mm1,%mm3 + movq %mm2,%mm0 + pand %mm7,%mm0 + paddq %mm0,%mm3 + movd %mm3,28(%esp,%ecx,4) + psrlq $32,%mm2 + psrlq $32,%mm3 + movd 36(%esp,%ebx,4),%mm6 + paddq %mm2,%mm3 + paddq %mm6,%mm3 + movq %mm3,32(%esp,%ebx,4) + leal 1(%edx),%edx + cmpl %ebx,%edx + jle L004outer + emms + jmp L006common_tail +.align 4,0x90 +L002non_sse2: + movl 8(%esp),%esi + leal 1(%ebx),%ebp + movl 12(%esp),%edi + xorl %ecx,%ecx + movl %esi,%edx + andl $1,%ebp + subl %edi,%edx + leal 4(%edi,%ebx,4),%eax + orl %edx,%ebp + movl (%edi),%edi + jz L007bn_sqr_mont + movl %eax,28(%esp) + movl (%esi),%eax + xorl %edx,%edx +.align 4,0x90 +L008mull: + movl %edx,%ebp + mull %edi + addl %eax,%ebp + leal 1(%ecx),%ecx + adcl $0,%edx + movl (%esi,%ecx,4),%eax + cmpl %ebx,%ecx + movl %ebp,28(%esp,%ecx,4) + jl L008mull + movl %edx,%ebp + mull %edi + movl 20(%esp),%edi + addl %ebp,%eax + movl 16(%esp),%esi + adcl $0,%edx + imull 32(%esp),%edi + movl %eax,32(%esp,%ebx,4) + xorl %ecx,%ecx + movl %edx,36(%esp,%ebx,4) + movl %ecx,40(%esp,%ebx,4) + movl (%esi),%eax + mull %edi + addl 32(%esp),%eax + movl 4(%esi),%eax + adcl $0,%edx + incl %ecx + jmp L0092ndmadd +.align 4,0x90 +L0101stmadd: + movl %edx,%ebp + mull %edi + addl 32(%esp,%ecx,4),%ebp + leal 1(%ecx),%ecx + adcl $0,%edx + addl %eax,%ebp + movl (%esi,%ecx,4),%eax + adcl $0,%edx + cmpl %ebx,%ecx + movl %ebp,28(%esp,%ecx,4) + jl L0101stmadd + movl %edx,%ebp + mull %edi + addl 32(%esp,%ebx,4),%eax + movl 20(%esp),%edi + adcl $0,%edx + movl 16(%esp),%esi + addl %eax,%ebp + adcl $0,%edx + imull 32(%esp),%edi + xorl %ecx,%ecx + addl 36(%esp,%ebx,4),%edx + movl %ebp,32(%esp,%ebx,4) + adcl $0,%ecx + movl (%esi),%eax + movl %edx,36(%esp,%ebx,4) + movl %ecx,40(%esp,%ebx,4) + mull %edi + addl 32(%esp),%eax + movl 4(%esi),%eax + adcl $0,%edx + movl $1,%ecx +.align 4,0x90 +L0092ndmadd: + movl %edx,%ebp + mull %edi + addl 32(%esp,%ecx,4),%ebp + leal 1(%ecx),%ecx + adcl $0,%edx + addl %eax,%ebp + movl (%esi,%ecx,4),%eax + adcl $0,%edx + cmpl %ebx,%ecx + movl %ebp,24(%esp,%ecx,4) + jl L0092ndmadd + movl %edx,%ebp + mull %edi + addl 32(%esp,%ebx,4),%ebp + adcl $0,%edx + addl %eax,%ebp + adcl $0,%edx + movl %ebp,28(%esp,%ebx,4) + xorl %eax,%eax + movl 12(%esp),%ecx + addl 36(%esp,%ebx,4),%edx + adcl 40(%esp,%ebx,4),%eax + leal 4(%ecx),%ecx + movl %edx,32(%esp,%ebx,4) + cmpl 28(%esp),%ecx + movl %eax,36(%esp,%ebx,4) + je L006common_tail + movl (%ecx),%edi + movl 8(%esp),%esi + movl %ecx,12(%esp) + xorl %ecx,%ecx + xorl %edx,%edx + movl (%esi),%eax + jmp L0101stmadd +.align 4,0x90 +L007bn_sqr_mont: + movl %ebx,(%esp) + movl %ecx,12(%esp) + movl %edi,%eax + mull %edi + movl %eax,32(%esp) + movl %edx,%ebx + shrl $1,%edx + andl $1,%ebx + incl %ecx +.align 4,0x90 +L011sqr: + movl (%esi,%ecx,4),%eax + movl %edx,%ebp + mull %edi + addl %ebp,%eax + leal 1(%ecx),%ecx + adcl $0,%edx + leal (%ebx,%eax,2),%ebp + shrl $31,%eax + cmpl (%esp),%ecx + movl %eax,%ebx + movl %ebp,28(%esp,%ecx,4) + jl L011sqr + movl (%esi,%ecx,4),%eax + movl %edx,%ebp + mull %edi + addl %ebp,%eax + movl 20(%esp),%edi + adcl $0,%edx + movl 16(%esp),%esi + leal (%ebx,%eax,2),%ebp + imull 32(%esp),%edi + shrl $31,%eax + movl %ebp,32(%esp,%ecx,4) + leal (%eax,%edx,2),%ebp + movl (%esi),%eax + shrl $31,%edx + movl %ebp,36(%esp,%ecx,4) + movl %edx,40(%esp,%ecx,4) + mull %edi + addl 32(%esp),%eax + movl %ecx,%ebx + adcl $0,%edx + movl 4(%esi),%eax + movl $1,%ecx +.align 4,0x90 +L0123rdmadd: + movl %edx,%ebp + mull %edi + addl 32(%esp,%ecx,4),%ebp + adcl $0,%edx + addl %eax,%ebp + movl 4(%esi,%ecx,4),%eax + adcl $0,%edx + movl %ebp,28(%esp,%ecx,4) + movl %edx,%ebp + mull %edi + addl 36(%esp,%ecx,4),%ebp + leal 2(%ecx),%ecx + adcl $0,%edx + addl %eax,%ebp + movl (%esi,%ecx,4),%eax + adcl $0,%edx + cmpl %ebx,%ecx + movl %ebp,24(%esp,%ecx,4) + jl L0123rdmadd + movl %edx,%ebp + mull %edi + addl 32(%esp,%ebx,4),%ebp + adcl $0,%edx + addl %eax,%ebp + adcl $0,%edx + movl %ebp,28(%esp,%ebx,4) + movl 12(%esp),%ecx + xorl %eax,%eax + movl 8(%esp),%esi + addl 36(%esp,%ebx,4),%edx + adcl 40(%esp,%ebx,4),%eax + movl %edx,32(%esp,%ebx,4) + cmpl %ebx,%ecx + movl %eax,36(%esp,%ebx,4) + je L006common_tail + movl 4(%esi,%ecx,4),%edi + leal 1(%ecx),%ecx + movl %edi,%eax + movl %ecx,12(%esp) + mull %edi + addl 32(%esp,%ecx,4),%eax + adcl $0,%edx + movl %eax,32(%esp,%ecx,4) + xorl %ebp,%ebp + cmpl %ebx,%ecx + leal 1(%ecx),%ecx + je L013sqrlast + movl %edx,%ebx + shrl $1,%edx + andl $1,%ebx +.align 4,0x90 +L014sqradd: + movl (%esi,%ecx,4),%eax + movl %edx,%ebp + mull %edi + addl %ebp,%eax + leal (%eax,%eax,1),%ebp + adcl $0,%edx + shrl $31,%eax + addl 32(%esp,%ecx,4),%ebp + leal 1(%ecx),%ecx + adcl $0,%eax + addl %ebx,%ebp + adcl $0,%eax + cmpl (%esp),%ecx + movl %ebp,28(%esp,%ecx,4) + movl %eax,%ebx + jle L014sqradd + movl %edx,%ebp + addl %edx,%edx + shrl $31,%ebp + addl %ebx,%edx + adcl $0,%ebp +L013sqrlast: + movl 20(%esp),%edi + movl 16(%esp),%esi + imull 32(%esp),%edi + addl 32(%esp,%ecx,4),%edx + movl (%esi),%eax + adcl $0,%ebp + movl %edx,32(%esp,%ecx,4) + movl %ebp,36(%esp,%ecx,4) + mull %edi + addl 32(%esp),%eax + leal -1(%ecx),%ebx + adcl $0,%edx + movl $1,%ecx + movl 4(%esi),%eax + jmp L0123rdmadd +.align 4,0x90 +L006common_tail: + movl 16(%esp),%ebp + movl 4(%esp),%edi + leal 32(%esp),%esi + movl (%esi),%eax + movl %ebx,%ecx + xorl %edx,%edx +.align 4,0x90 +L015sub: + sbbl (%ebp,%edx,4),%eax + movl %eax,(%edi,%edx,4) + decl %ecx + movl 4(%esi,%edx,4),%eax + leal 1(%edx),%edx + jge L015sub + sbbl $0,%eax +.align 4,0x90 +L016copy: + movl (%esi,%ebx,4),%edx + movl (%edi,%ebx,4),%ebp + xorl %ebp,%edx + andl %eax,%edx + xorl %ebp,%edx + movl %ecx,(%esi,%ebx,4) + movl %edx,(%edi,%ebx,4) + decl %ebx + jge L016copy + movl 24(%esp),%esp + movl $1,%eax +L000just_leave: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 +.byte 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56 +.byte 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121 +.byte 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46 +.byte 111,114,103,62,0 +.section __IMPORT,__pointers,non_lazy_symbol_pointers +L_OPENSSL_ia32cap_P$non_lazy_ptr: +.indirect_symbol _OPENSSL_ia32cap_P +.long 0 +#endif diff --git a/third_party/boringssl/mac-x86/crypto/cpu-x86-asm.S b/third_party/boringssl/mac-x86/crypto/cpu-x86-asm.S new file mode 100644 index 00000000000..bfb292c8938 --- /dev/null +++ b/third_party/boringssl/mac-x86/crypto/cpu-x86-asm.S @@ -0,0 +1,309 @@ +#if defined(__i386__) +.file "crypto/cpu-x86-asm.S" +.text +.globl _OPENSSL_ia32_cpuid +.private_extern _OPENSSL_ia32_cpuid +.align 4 +_OPENSSL_ia32_cpuid: +L_OPENSSL_ia32_cpuid_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + xorl %edx,%edx + pushfl + popl %eax + movl %eax,%ecx + xorl $2097152,%eax + pushl %eax + popfl + pushfl + popl %eax + xorl %eax,%ecx + xorl %eax,%eax + btl $21,%ecx + jnc L000nocpuid + movl 20(%esp),%esi + movl %eax,8(%esi) + .byte 0x0f,0xa2 + movl %eax,%edi + xorl %eax,%eax + cmpl $1970169159,%ebx + setne %al + movl %eax,%ebp + cmpl $1231384169,%edx + setne %al + orl %eax,%ebp + cmpl $1818588270,%ecx + setne %al + orl %eax,%ebp + jz L001intel + cmpl $1752462657,%ebx + setne %al + movl %eax,%esi + cmpl $1769238117,%edx + setne %al + orl %eax,%esi + cmpl $1145913699,%ecx + setne %al + orl %eax,%esi + jnz L001intel + movl $2147483648,%eax + .byte 0x0f,0xa2 + cmpl $2147483649,%eax + jb L001intel + movl %eax,%esi + movl $2147483649,%eax + .byte 0x0f,0xa2 + orl %ecx,%ebp + andl $2049,%ebp + cmpl $2147483656,%esi + jb L001intel + movl $2147483656,%eax + .byte 0x0f,0xa2 + movzbl %cl,%esi + incl %esi + movl $1,%eax + xorl %ecx,%ecx + .byte 0x0f,0xa2 + btl $28,%edx + jnc L002generic + shrl $16,%ebx + andl $255,%ebx + cmpl %esi,%ebx + ja L002generic + andl $4026531839,%edx + jmp L002generic +L001intel: + cmpl $7,%edi + jb L003cacheinfo + movl 20(%esp),%esi + movl $7,%eax + xorl %ecx,%ecx + .byte 0x0f,0xa2 + movl %ebx,8(%esi) +L003cacheinfo: + cmpl $4,%edi + movl $-1,%edi + jb L004nocacheinfo + movl $4,%eax + movl $0,%ecx + .byte 0x0f,0xa2 + movl %eax,%edi + shrl $14,%edi + andl $4095,%edi +L004nocacheinfo: + movl $1,%eax + xorl %ecx,%ecx + .byte 0x0f,0xa2 + andl $3220176895,%edx + cmpl $0,%ebp + jne L005notintel + orl $1073741824,%edx +L005notintel: + btl $28,%edx + jnc L002generic + andl $4026531839,%edx + cmpl $0,%edi + je L002generic + orl $268435456,%edx + shrl $16,%ebx + cmpb $1,%bl + ja L002generic + andl $4026531839,%edx +L002generic: + andl $2048,%ebp + andl $4294965247,%ecx + movl %edx,%esi + orl %ecx,%ebp + btl $27,%ecx + jnc L006clear_avx + xorl %ecx,%ecx +.byte 15,1,208 + andl $6,%eax + cmpl $6,%eax + je L007done + cmpl $2,%eax + je L006clear_avx +L008clear_xmm: + andl $4261412861,%ebp + andl $4278190079,%esi +L006clear_avx: + andl $4026525695,%ebp + movl 20(%esp),%edi + andl $4294967263,8(%edi) +L007done: + movl %esi,%eax + movl %ebp,%edx +L000nocpuid: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _OPENSSL_rdtsc +.private_extern _OPENSSL_rdtsc +.align 4 +_OPENSSL_rdtsc: +L_OPENSSL_rdtsc_begin: + xorl %eax,%eax + xorl %edx,%edx + call L009PIC_me_up +L009PIC_me_up: + popl %ecx + movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L009PIC_me_up(%ecx),%ecx + btl $4,(%ecx) + jnc L010notsc + .byte 0x0f,0x31 +L010notsc: + ret +.globl _OPENSSL_instrument_halt +.private_extern _OPENSSL_instrument_halt +.align 4 +_OPENSSL_instrument_halt: +L_OPENSSL_instrument_halt_begin: + call L011PIC_me_up +L011PIC_me_up: + popl %ecx + movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L011PIC_me_up(%ecx),%ecx + btl $4,(%ecx) + jnc L012nohalt +.long 2421723150 + andl $3,%eax + jnz L012nohalt + pushfl + popl %eax + btl $9,%eax + jnc L012nohalt + .byte 0x0f,0x31 + pushl %edx + pushl %eax + hlt + .byte 0x0f,0x31 + subl (%esp),%eax + sbbl 4(%esp),%edx + addl $8,%esp + ret +L012nohalt: + xorl %eax,%eax + xorl %edx,%edx + ret +.globl _OPENSSL_far_spin +.private_extern _OPENSSL_far_spin +.align 4 +_OPENSSL_far_spin: +L_OPENSSL_far_spin_begin: + pushfl + popl %eax + btl $9,%eax + jnc L013nospin + movl 4(%esp),%eax + movl 8(%esp),%ecx +.long 2430111262 + xorl %eax,%eax + movl (%ecx),%edx + jmp L014spin +.align 4,0x90 +L014spin: + incl %eax + cmpl (%ecx),%edx + je L014spin +.long 529567888 + ret +L013nospin: + xorl %eax,%eax + xorl %edx,%edx + ret +.globl _OPENSSL_wipe_cpu +.private_extern _OPENSSL_wipe_cpu +.align 4 +_OPENSSL_wipe_cpu: +L_OPENSSL_wipe_cpu_begin: + xorl %eax,%eax + xorl %edx,%edx + call L015PIC_me_up +L015PIC_me_up: + popl %ecx + movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L015PIC_me_up(%ecx),%ecx + movl (%ecx),%ecx + btl $1,(%ecx) + jnc L016no_x87 + andl $83886080,%ecx + cmpl $83886080,%ecx + jne L017no_sse2 + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 +L017no_sse2: +.long 4007259865,4007259865,4007259865,4007259865,2430851995 +L016no_x87: + leal 4(%esp),%eax + ret +.globl _OPENSSL_atomic_add +.private_extern _OPENSSL_atomic_add +.align 4 +_OPENSSL_atomic_add: +L_OPENSSL_atomic_add_begin: + movl 4(%esp),%edx + movl 8(%esp),%ecx + pushl %ebx + nop + movl (%edx),%eax +L018spin: + leal (%eax,%ecx,1),%ebx + nop +.long 447811568 + jne L018spin + movl %ebx,%eax + popl %ebx + ret +.globl _OPENSSL_indirect_call +.private_extern _OPENSSL_indirect_call +.align 4 +_OPENSSL_indirect_call: +L_OPENSSL_indirect_call_begin: + pushl %ebp + movl %esp,%ebp + subl $28,%esp + movl 12(%ebp),%ecx + movl %ecx,(%esp) + movl 16(%ebp),%edx + movl %edx,4(%esp) + movl 20(%ebp),%eax + movl %eax,8(%esp) + movl 24(%ebp),%eax + movl %eax,12(%esp) + movl 28(%ebp),%eax + movl %eax,16(%esp) + movl 32(%ebp),%eax + movl %eax,20(%esp) + movl 36(%ebp),%eax + movl %eax,24(%esp) + call *8(%ebp) + movl %ebp,%esp + popl %ebp + ret +.globl _OPENSSL_ia32_rdrand +.private_extern _OPENSSL_ia32_rdrand +.align 4 +_OPENSSL_ia32_rdrand: +L_OPENSSL_ia32_rdrand_begin: + movl $8,%ecx +L019loop: +.byte 15,199,240 + jc L020break + loop L019loop +L020break: + cmpl $0,%eax + cmovel %ecx,%eax + ret +.section __IMPORT,__pointers,non_lazy_symbol_pointers +L_OPENSSL_ia32cap_P$non_lazy_ptr: +.indirect_symbol _OPENSSL_ia32cap_P +.long 0 +#endif diff --git a/third_party/boringssl/mac-x86/crypto/md5/md5-586.S b/third_party/boringssl/mac-x86/crypto/md5/md5-586.S new file mode 100644 index 00000000000..6830b16410a --- /dev/null +++ b/third_party/boringssl/mac-x86/crypto/md5/md5-586.S @@ -0,0 +1,680 @@ +#if defined(__i386__) +.file "src/crypto/md5/asm/md5-586.S" +.text +.globl _md5_block_asm_data_order +.private_extern _md5_block_asm_data_order +.align 4 +_md5_block_asm_data_order: +L_md5_block_asm_data_order_begin: + pushl %esi + pushl %edi + movl 12(%esp),%edi + movl 16(%esp),%esi + movl 20(%esp),%ecx + pushl %ebp + shll $6,%ecx + pushl %ebx + addl %esi,%ecx + subl $64,%ecx + movl (%edi),%eax + pushl %ecx + movl 4(%edi),%ebx + movl 8(%edi),%ecx + movl 12(%edi),%edx +L000start: + + # R0 section + movl %ecx,%edi + movl (%esi),%ebp + # R0 0 + xorl %edx,%edi + andl %ebx,%edi + leal 3614090360(%eax,%ebp,1),%eax + xorl %edx,%edi + addl %edi,%eax + movl %ebx,%edi + roll $7,%eax + movl 4(%esi),%ebp + addl %ebx,%eax + # R0 1 + xorl %ecx,%edi + andl %eax,%edi + leal 3905402710(%edx,%ebp,1),%edx + xorl %ecx,%edi + addl %edi,%edx + movl %eax,%edi + roll $12,%edx + movl 8(%esi),%ebp + addl %eax,%edx + # R0 2 + xorl %ebx,%edi + andl %edx,%edi + leal 606105819(%ecx,%ebp,1),%ecx + xorl %ebx,%edi + addl %edi,%ecx + movl %edx,%edi + roll $17,%ecx + movl 12(%esi),%ebp + addl %edx,%ecx + # R0 3 + xorl %eax,%edi + andl %ecx,%edi + leal 3250441966(%ebx,%ebp,1),%ebx + xorl %eax,%edi + addl %edi,%ebx + movl %ecx,%edi + roll $22,%ebx + movl 16(%esi),%ebp + addl %ecx,%ebx + # R0 4 + xorl %edx,%edi + andl %ebx,%edi + leal 4118548399(%eax,%ebp,1),%eax + xorl %edx,%edi + addl %edi,%eax + movl %ebx,%edi + roll $7,%eax + movl 20(%esi),%ebp + addl %ebx,%eax + # R0 5 + xorl %ecx,%edi + andl %eax,%edi + leal 1200080426(%edx,%ebp,1),%edx + xorl %ecx,%edi + addl %edi,%edx + movl %eax,%edi + roll $12,%edx + movl 24(%esi),%ebp + addl %eax,%edx + # R0 6 + xorl %ebx,%edi + andl %edx,%edi + leal 2821735955(%ecx,%ebp,1),%ecx + xorl %ebx,%edi + addl %edi,%ecx + movl %edx,%edi + roll $17,%ecx + movl 28(%esi),%ebp + addl %edx,%ecx + # R0 7 + xorl %eax,%edi + andl %ecx,%edi + leal 4249261313(%ebx,%ebp,1),%ebx + xorl %eax,%edi + addl %edi,%ebx + movl %ecx,%edi + roll $22,%ebx + movl 32(%esi),%ebp + addl %ecx,%ebx + # R0 8 + xorl %edx,%edi + andl %ebx,%edi + leal 1770035416(%eax,%ebp,1),%eax + xorl %edx,%edi + addl %edi,%eax + movl %ebx,%edi + roll $7,%eax + movl 36(%esi),%ebp + addl %ebx,%eax + # R0 9 + xorl %ecx,%edi + andl %eax,%edi + leal 2336552879(%edx,%ebp,1),%edx + xorl %ecx,%edi + addl %edi,%edx + movl %eax,%edi + roll $12,%edx + movl 40(%esi),%ebp + addl %eax,%edx + # R0 10 + xorl %ebx,%edi + andl %edx,%edi + leal 4294925233(%ecx,%ebp,1),%ecx + xorl %ebx,%edi + addl %edi,%ecx + movl %edx,%edi + roll $17,%ecx + movl 44(%esi),%ebp + addl %edx,%ecx + # R0 11 + xorl %eax,%edi + andl %ecx,%edi + leal 2304563134(%ebx,%ebp,1),%ebx + xorl %eax,%edi + addl %edi,%ebx + movl %ecx,%edi + roll $22,%ebx + movl 48(%esi),%ebp + addl %ecx,%ebx + # R0 12 + xorl %edx,%edi + andl %ebx,%edi + leal 1804603682(%eax,%ebp,1),%eax + xorl %edx,%edi + addl %edi,%eax + movl %ebx,%edi + roll $7,%eax + movl 52(%esi),%ebp + addl %ebx,%eax + # R0 13 + xorl %ecx,%edi + andl %eax,%edi + leal 4254626195(%edx,%ebp,1),%edx + xorl %ecx,%edi + addl %edi,%edx + movl %eax,%edi + roll $12,%edx + movl 56(%esi),%ebp + addl %eax,%edx + # R0 14 + xorl %ebx,%edi + andl %edx,%edi + leal 2792965006(%ecx,%ebp,1),%ecx + xorl %ebx,%edi + addl %edi,%ecx + movl %edx,%edi + roll $17,%ecx + movl 60(%esi),%ebp + addl %edx,%ecx + # R0 15 + xorl %eax,%edi + andl %ecx,%edi + leal 1236535329(%ebx,%ebp,1),%ebx + xorl %eax,%edi + addl %edi,%ebx + movl %ecx,%edi + roll $22,%ebx + movl 4(%esi),%ebp + addl %ecx,%ebx + + # R1 section + # R1 16 + leal 4129170786(%eax,%ebp,1),%eax + xorl %ebx,%edi + andl %edx,%edi + movl 24(%esi),%ebp + xorl %ecx,%edi + addl %edi,%eax + movl %ebx,%edi + roll $5,%eax + addl %ebx,%eax + # R1 17 + leal 3225465664(%edx,%ebp,1),%edx + xorl %eax,%edi + andl %ecx,%edi + movl 44(%esi),%ebp + xorl %ebx,%edi + addl %edi,%edx + movl %eax,%edi + roll $9,%edx + addl %eax,%edx + # R1 18 + leal 643717713(%ecx,%ebp,1),%ecx + xorl %edx,%edi + andl %ebx,%edi + movl (%esi),%ebp + xorl %eax,%edi + addl %edi,%ecx + movl %edx,%edi + roll $14,%ecx + addl %edx,%ecx + # R1 19 + leal 3921069994(%ebx,%ebp,1),%ebx + xorl %ecx,%edi + andl %eax,%edi + movl 20(%esi),%ebp + xorl %edx,%edi + addl %edi,%ebx + movl %ecx,%edi + roll $20,%ebx + addl %ecx,%ebx + # R1 20 + leal 3593408605(%eax,%ebp,1),%eax + xorl %ebx,%edi + andl %edx,%edi + movl 40(%esi),%ebp + xorl %ecx,%edi + addl %edi,%eax + movl %ebx,%edi + roll $5,%eax + addl %ebx,%eax + # R1 21 + leal 38016083(%edx,%ebp,1),%edx + xorl %eax,%edi + andl %ecx,%edi + movl 60(%esi),%ebp + xorl %ebx,%edi + addl %edi,%edx + movl %eax,%edi + roll $9,%edx + addl %eax,%edx + # R1 22 + leal 3634488961(%ecx,%ebp,1),%ecx + xorl %edx,%edi + andl %ebx,%edi + movl 16(%esi),%ebp + xorl %eax,%edi + addl %edi,%ecx + movl %edx,%edi + roll $14,%ecx + addl %edx,%ecx + # R1 23 + leal 3889429448(%ebx,%ebp,1),%ebx + xorl %ecx,%edi + andl %eax,%edi + movl 36(%esi),%ebp + xorl %edx,%edi + addl %edi,%ebx + movl %ecx,%edi + roll $20,%ebx + addl %ecx,%ebx + # R1 24 + leal 568446438(%eax,%ebp,1),%eax + xorl %ebx,%edi + andl %edx,%edi + movl 56(%esi),%ebp + xorl %ecx,%edi + addl %edi,%eax + movl %ebx,%edi + roll $5,%eax + addl %ebx,%eax + # R1 25 + leal 3275163606(%edx,%ebp,1),%edx + xorl %eax,%edi + andl %ecx,%edi + movl 12(%esi),%ebp + xorl %ebx,%edi + addl %edi,%edx + movl %eax,%edi + roll $9,%edx + addl %eax,%edx + # R1 26 + leal 4107603335(%ecx,%ebp,1),%ecx + xorl %edx,%edi + andl %ebx,%edi + movl 32(%esi),%ebp + xorl %eax,%edi + addl %edi,%ecx + movl %edx,%edi + roll $14,%ecx + addl %edx,%ecx + # R1 27 + leal 1163531501(%ebx,%ebp,1),%ebx + xorl %ecx,%edi + andl %eax,%edi + movl 52(%esi),%ebp + xorl %edx,%edi + addl %edi,%ebx + movl %ecx,%edi + roll $20,%ebx + addl %ecx,%ebx + # R1 28 + leal 2850285829(%eax,%ebp,1),%eax + xorl %ebx,%edi + andl %edx,%edi + movl 8(%esi),%ebp + xorl %ecx,%edi + addl %edi,%eax + movl %ebx,%edi + roll $5,%eax + addl %ebx,%eax + # R1 29 + leal 4243563512(%edx,%ebp,1),%edx + xorl %eax,%edi + andl %ecx,%edi + movl 28(%esi),%ebp + xorl %ebx,%edi + addl %edi,%edx + movl %eax,%edi + roll $9,%edx + addl %eax,%edx + # R1 30 + leal 1735328473(%ecx,%ebp,1),%ecx + xorl %edx,%edi + andl %ebx,%edi + movl 48(%esi),%ebp + xorl %eax,%edi + addl %edi,%ecx + movl %edx,%edi + roll $14,%ecx + addl %edx,%ecx + # R1 31 + leal 2368359562(%ebx,%ebp,1),%ebx + xorl %ecx,%edi + andl %eax,%edi + movl 20(%esi),%ebp + xorl %edx,%edi + addl %edi,%ebx + movl %ecx,%edi + roll $20,%ebx + addl %ecx,%ebx + + # R2 section + # R2 32 + xorl %edx,%edi + xorl %ebx,%edi + leal 4294588738(%eax,%ebp,1),%eax + addl %edi,%eax + roll $4,%eax + movl 32(%esi),%ebp + movl %ebx,%edi + # R2 33 + leal 2272392833(%edx,%ebp,1),%edx + addl %ebx,%eax + xorl %ecx,%edi + xorl %eax,%edi + movl 44(%esi),%ebp + addl %edi,%edx + movl %eax,%edi + roll $11,%edx + addl %eax,%edx + # R2 34 + xorl %ebx,%edi + xorl %edx,%edi + leal 1839030562(%ecx,%ebp,1),%ecx + addl %edi,%ecx + roll $16,%ecx + movl 56(%esi),%ebp + movl %edx,%edi + # R2 35 + leal 4259657740(%ebx,%ebp,1),%ebx + addl %edx,%ecx + xorl %eax,%edi + xorl %ecx,%edi + movl 4(%esi),%ebp + addl %edi,%ebx + movl %ecx,%edi + roll $23,%ebx + addl %ecx,%ebx + # R2 36 + xorl %edx,%edi + xorl %ebx,%edi + leal 2763975236(%eax,%ebp,1),%eax + addl %edi,%eax + roll $4,%eax + movl 16(%esi),%ebp + movl %ebx,%edi + # R2 37 + leal 1272893353(%edx,%ebp,1),%edx + addl %ebx,%eax + xorl %ecx,%edi + xorl %eax,%edi + movl 28(%esi),%ebp + addl %edi,%edx + movl %eax,%edi + roll $11,%edx + addl %eax,%edx + # R2 38 + xorl %ebx,%edi + xorl %edx,%edi + leal 4139469664(%ecx,%ebp,1),%ecx + addl %edi,%ecx + roll $16,%ecx + movl 40(%esi),%ebp + movl %edx,%edi + # R2 39 + leal 3200236656(%ebx,%ebp,1),%ebx + addl %edx,%ecx + xorl %eax,%edi + xorl %ecx,%edi + movl 52(%esi),%ebp + addl %edi,%ebx + movl %ecx,%edi + roll $23,%ebx + addl %ecx,%ebx + # R2 40 + xorl %edx,%edi + xorl %ebx,%edi + leal 681279174(%eax,%ebp,1),%eax + addl %edi,%eax + roll $4,%eax + movl (%esi),%ebp + movl %ebx,%edi + # R2 41 + leal 3936430074(%edx,%ebp,1),%edx + addl %ebx,%eax + xorl %ecx,%edi + xorl %eax,%edi + movl 12(%esi),%ebp + addl %edi,%edx + movl %eax,%edi + roll $11,%edx + addl %eax,%edx + # R2 42 + xorl %ebx,%edi + xorl %edx,%edi + leal 3572445317(%ecx,%ebp,1),%ecx + addl %edi,%ecx + roll $16,%ecx + movl 24(%esi),%ebp + movl %edx,%edi + # R2 43 + leal 76029189(%ebx,%ebp,1),%ebx + addl %edx,%ecx + xorl %eax,%edi + xorl %ecx,%edi + movl 36(%esi),%ebp + addl %edi,%ebx + movl %ecx,%edi + roll $23,%ebx + addl %ecx,%ebx + # R2 44 + xorl %edx,%edi + xorl %ebx,%edi + leal 3654602809(%eax,%ebp,1),%eax + addl %edi,%eax + roll $4,%eax + movl 48(%esi),%ebp + movl %ebx,%edi + # R2 45 + leal 3873151461(%edx,%ebp,1),%edx + addl %ebx,%eax + xorl %ecx,%edi + xorl %eax,%edi + movl 60(%esi),%ebp + addl %edi,%edx + movl %eax,%edi + roll $11,%edx + addl %eax,%edx + # R2 46 + xorl %ebx,%edi + xorl %edx,%edi + leal 530742520(%ecx,%ebp,1),%ecx + addl %edi,%ecx + roll $16,%ecx + movl 8(%esi),%ebp + movl %edx,%edi + # R2 47 + leal 3299628645(%ebx,%ebp,1),%ebx + addl %edx,%ecx + xorl %eax,%edi + xorl %ecx,%edi + movl (%esi),%ebp + addl %edi,%ebx + movl $-1,%edi + roll $23,%ebx + addl %ecx,%ebx + + # R3 section + # R3 48 + xorl %edx,%edi + orl %ebx,%edi + leal 4096336452(%eax,%ebp,1),%eax + xorl %ecx,%edi + movl 28(%esi),%ebp + addl %edi,%eax + movl $-1,%edi + roll $6,%eax + xorl %ecx,%edi + addl %ebx,%eax + # R3 49 + orl %eax,%edi + leal 1126891415(%edx,%ebp,1),%edx + xorl %ebx,%edi + movl 56(%esi),%ebp + addl %edi,%edx + movl $-1,%edi + roll $10,%edx + xorl %ebx,%edi + addl %eax,%edx + # R3 50 + orl %edx,%edi + leal 2878612391(%ecx,%ebp,1),%ecx + xorl %eax,%edi + movl 20(%esi),%ebp + addl %edi,%ecx + movl $-1,%edi + roll $15,%ecx + xorl %eax,%edi + addl %edx,%ecx + # R3 51 + orl %ecx,%edi + leal 4237533241(%ebx,%ebp,1),%ebx + xorl %edx,%edi + movl 48(%esi),%ebp + addl %edi,%ebx + movl $-1,%edi + roll $21,%ebx + xorl %edx,%edi + addl %ecx,%ebx + # R3 52 + orl %ebx,%edi + leal 1700485571(%eax,%ebp,1),%eax + xorl %ecx,%edi + movl 12(%esi),%ebp + addl %edi,%eax + movl $-1,%edi + roll $6,%eax + xorl %ecx,%edi + addl %ebx,%eax + # R3 53 + orl %eax,%edi + leal 2399980690(%edx,%ebp,1),%edx + xorl %ebx,%edi + movl 40(%esi),%ebp + addl %edi,%edx + movl $-1,%edi + roll $10,%edx + xorl %ebx,%edi + addl %eax,%edx + # R3 54 + orl %edx,%edi + leal 4293915773(%ecx,%ebp,1),%ecx + xorl %eax,%edi + movl 4(%esi),%ebp + addl %edi,%ecx + movl $-1,%edi + roll $15,%ecx + xorl %eax,%edi + addl %edx,%ecx + # R3 55 + orl %ecx,%edi + leal 2240044497(%ebx,%ebp,1),%ebx + xorl %edx,%edi + movl 32(%esi),%ebp + addl %edi,%ebx + movl $-1,%edi + roll $21,%ebx + xorl %edx,%edi + addl %ecx,%ebx + # R3 56 + orl %ebx,%edi + leal 1873313359(%eax,%ebp,1),%eax + xorl %ecx,%edi + movl 60(%esi),%ebp + addl %edi,%eax + movl $-1,%edi + roll $6,%eax + xorl %ecx,%edi + addl %ebx,%eax + # R3 57 + orl %eax,%edi + leal 4264355552(%edx,%ebp,1),%edx + xorl %ebx,%edi + movl 24(%esi),%ebp + addl %edi,%edx + movl $-1,%edi + roll $10,%edx + xorl %ebx,%edi + addl %eax,%edx + # R3 58 + orl %edx,%edi + leal 2734768916(%ecx,%ebp,1),%ecx + xorl %eax,%edi + movl 52(%esi),%ebp + addl %edi,%ecx + movl $-1,%edi + roll $15,%ecx + xorl %eax,%edi + addl %edx,%ecx + # R3 59 + orl %ecx,%edi + leal 1309151649(%ebx,%ebp,1),%ebx + xorl %edx,%edi + movl 16(%esi),%ebp + addl %edi,%ebx + movl $-1,%edi + roll $21,%ebx + xorl %edx,%edi + addl %ecx,%ebx + # R3 60 + orl %ebx,%edi + leal 4149444226(%eax,%ebp,1),%eax + xorl %ecx,%edi + movl 44(%esi),%ebp + addl %edi,%eax + movl $-1,%edi + roll $6,%eax + xorl %ecx,%edi + addl %ebx,%eax + # R3 61 + orl %eax,%edi + leal 3174756917(%edx,%ebp,1),%edx + xorl %ebx,%edi + movl 8(%esi),%ebp + addl %edi,%edx + movl $-1,%edi + roll $10,%edx + xorl %ebx,%edi + addl %eax,%edx + # R3 62 + orl %edx,%edi + leal 718787259(%ecx,%ebp,1),%ecx + xorl %eax,%edi + movl 36(%esi),%ebp + addl %edi,%ecx + movl $-1,%edi + roll $15,%ecx + xorl %eax,%edi + addl %edx,%ecx + # R3 63 + orl %ecx,%edi + leal 3951481745(%ebx,%ebp,1),%ebx + xorl %edx,%edi + movl 24(%esp),%ebp + addl %edi,%ebx + addl $64,%esi + roll $21,%ebx + movl (%ebp),%edi + addl %ecx,%ebx + addl %edi,%eax + movl 4(%ebp),%edi + addl %edi,%ebx + movl 8(%ebp),%edi + addl %edi,%ecx + movl 12(%ebp),%edi + addl %edi,%edx + movl %eax,(%ebp) + movl %ebx,4(%ebp) + movl (%esp),%edi + movl %ecx,8(%ebp) + movl %edx,12(%ebp) + cmpl %esi,%edi + jae L000start + popl %eax + popl %ebx + popl %ebp + popl %edi + popl %esi + ret +#endif diff --git a/third_party/boringssl/mac-x86/crypto/modes/ghash-x86.S b/third_party/boringssl/mac-x86/crypto/modes/ghash-x86.S new file mode 100644 index 00000000000..8693b82a134 --- /dev/null +++ b/third_party/boringssl/mac-x86/crypto/modes/ghash-x86.S @@ -0,0 +1,1260 @@ +#if defined(__i386__) +.file "ghash-x86.S" +.text +.globl _gcm_gmult_4bit_x86 +.private_extern _gcm_gmult_4bit_x86 +.align 4 +_gcm_gmult_4bit_x86: +L_gcm_gmult_4bit_x86_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + subl $84,%esp + movl 104(%esp),%edi + movl 108(%esp),%esi + movl (%edi),%ebp + movl 4(%edi),%edx + movl 8(%edi),%ecx + movl 12(%edi),%ebx + movl $0,16(%esp) + movl $471859200,20(%esp) + movl $943718400,24(%esp) + movl $610271232,28(%esp) + movl $1887436800,32(%esp) + movl $1822425088,36(%esp) + movl $1220542464,40(%esp) + movl $1423966208,44(%esp) + movl $3774873600,48(%esp) + movl $4246732800,52(%esp) + movl $3644850176,56(%esp) + movl $3311403008,60(%esp) + movl $2441084928,64(%esp) + movl $2376073216,68(%esp) + movl $2847932416,72(%esp) + movl $3051356160,76(%esp) + movl %ebp,(%esp) + movl %edx,4(%esp) + movl %ecx,8(%esp) + movl %ebx,12(%esp) + shrl $20,%ebx + andl $240,%ebx + movl 4(%esi,%ebx,1),%ebp + movl (%esi,%ebx,1),%edx + movl 12(%esi,%ebx,1),%ecx + movl 8(%esi,%ebx,1),%ebx + xorl %eax,%eax + movl $15,%edi + jmp L000x86_loop +.align 4,0x90 +L000x86_loop: + movb %bl,%al + shrdl $4,%ecx,%ebx + andb $15,%al + shrdl $4,%edx,%ecx + shrdl $4,%ebp,%edx + shrl $4,%ebp + xorl 16(%esp,%eax,4),%ebp + movb (%esp,%edi,1),%al + andb $240,%al + xorl 8(%esi,%eax,1),%ebx + xorl 12(%esi,%eax,1),%ecx + xorl (%esi,%eax,1),%edx + xorl 4(%esi,%eax,1),%ebp + decl %edi + js L001x86_break + movb %bl,%al + shrdl $4,%ecx,%ebx + andb $15,%al + shrdl $4,%edx,%ecx + shrdl $4,%ebp,%edx + shrl $4,%ebp + xorl 16(%esp,%eax,4),%ebp + movb (%esp,%edi,1),%al + shlb $4,%al + xorl 8(%esi,%eax,1),%ebx + xorl 12(%esi,%eax,1),%ecx + xorl (%esi,%eax,1),%edx + xorl 4(%esi,%eax,1),%ebp + jmp L000x86_loop +.align 4,0x90 +L001x86_break: + bswap %ebx + bswap %ecx + bswap %edx + bswap %ebp + movl 104(%esp),%edi + movl %ebx,12(%edi) + movl %ecx,8(%edi) + movl %edx,4(%edi) + movl %ebp,(%edi) + addl $84,%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _gcm_ghash_4bit_x86 +.private_extern _gcm_ghash_4bit_x86 +.align 4 +_gcm_ghash_4bit_x86: +L_gcm_ghash_4bit_x86_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + subl $84,%esp + movl 104(%esp),%ebx + movl 108(%esp),%esi + movl 112(%esp),%edi + movl 116(%esp),%ecx + addl %edi,%ecx + movl %ecx,116(%esp) + movl (%ebx),%ebp + movl 4(%ebx),%edx + movl 8(%ebx),%ecx + movl 12(%ebx),%ebx + movl $0,16(%esp) + movl $471859200,20(%esp) + movl $943718400,24(%esp) + movl $610271232,28(%esp) + movl $1887436800,32(%esp) + movl $1822425088,36(%esp) + movl $1220542464,40(%esp) + movl $1423966208,44(%esp) + movl $3774873600,48(%esp) + movl $4246732800,52(%esp) + movl $3644850176,56(%esp) + movl $3311403008,60(%esp) + movl $2441084928,64(%esp) + movl $2376073216,68(%esp) + movl $2847932416,72(%esp) + movl $3051356160,76(%esp) +.align 4,0x90 +L002x86_outer_loop: + xorl 12(%edi),%ebx + xorl 8(%edi),%ecx + xorl 4(%edi),%edx + xorl (%edi),%ebp + movl %ebx,12(%esp) + movl %ecx,8(%esp) + movl %edx,4(%esp) + movl %ebp,(%esp) + shrl $20,%ebx + andl $240,%ebx + movl 4(%esi,%ebx,1),%ebp + movl (%esi,%ebx,1),%edx + movl 12(%esi,%ebx,1),%ecx + movl 8(%esi,%ebx,1),%ebx + xorl %eax,%eax + movl $15,%edi + jmp L003x86_loop +.align 4,0x90 +L003x86_loop: + movb %bl,%al + shrdl $4,%ecx,%ebx + andb $15,%al + shrdl $4,%edx,%ecx + shrdl $4,%ebp,%edx + shrl $4,%ebp + xorl 16(%esp,%eax,4),%ebp + movb (%esp,%edi,1),%al + andb $240,%al + xorl 8(%esi,%eax,1),%ebx + xorl 12(%esi,%eax,1),%ecx + xorl (%esi,%eax,1),%edx + xorl 4(%esi,%eax,1),%ebp + decl %edi + js L004x86_break + movb %bl,%al + shrdl $4,%ecx,%ebx + andb $15,%al + shrdl $4,%edx,%ecx + shrdl $4,%ebp,%edx + shrl $4,%ebp + xorl 16(%esp,%eax,4),%ebp + movb (%esp,%edi,1),%al + shlb $4,%al + xorl 8(%esi,%eax,1),%ebx + xorl 12(%esi,%eax,1),%ecx + xorl (%esi,%eax,1),%edx + xorl 4(%esi,%eax,1),%ebp + jmp L003x86_loop +.align 4,0x90 +L004x86_break: + bswap %ebx + bswap %ecx + bswap %edx + bswap %ebp + movl 112(%esp),%edi + leal 16(%edi),%edi + cmpl 116(%esp),%edi + movl %edi,112(%esp) + jb L002x86_outer_loop + movl 104(%esp),%edi + movl %ebx,12(%edi) + movl %ecx,8(%edi) + movl %edx,4(%edi) + movl %ebp,(%edi) + addl $84,%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _gcm_gmult_4bit_mmx +.private_extern _gcm_gmult_4bit_mmx +.align 4 +_gcm_gmult_4bit_mmx: +L_gcm_gmult_4bit_mmx_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%edi + movl 24(%esp),%esi + call L005pic_point +L005pic_point: + popl %eax + leal Lrem_4bit-L005pic_point(%eax),%eax + movzbl 15(%edi),%ebx + xorl %ecx,%ecx + movl %ebx,%edx + movb %dl,%cl + movl $14,%ebp + shlb $4,%cl + andl $240,%edx + movq 8(%esi,%ecx,1),%mm0 + movq (%esi,%ecx,1),%mm1 + movd %mm0,%ebx + jmp L006mmx_loop +.align 4,0x90 +L006mmx_loop: + psrlq $4,%mm0 + andl $15,%ebx + movq %mm1,%mm2 + psrlq $4,%mm1 + pxor 8(%esi,%edx,1),%mm0 + movb (%edi,%ebp,1),%cl + psllq $60,%mm2 + pxor (%eax,%ebx,8),%mm1 + decl %ebp + movd %mm0,%ebx + pxor (%esi,%edx,1),%mm1 + movl %ecx,%edx + pxor %mm2,%mm0 + js L007mmx_break + shlb $4,%cl + andl $15,%ebx + psrlq $4,%mm0 + andl $240,%edx + movq %mm1,%mm2 + psrlq $4,%mm1 + pxor 8(%esi,%ecx,1),%mm0 + psllq $60,%mm2 + pxor (%eax,%ebx,8),%mm1 + movd %mm0,%ebx + pxor (%esi,%ecx,1),%mm1 + pxor %mm2,%mm0 + jmp L006mmx_loop +.align 4,0x90 +L007mmx_break: + shlb $4,%cl + andl $15,%ebx + psrlq $4,%mm0 + andl $240,%edx + movq %mm1,%mm2 + psrlq $4,%mm1 + pxor 8(%esi,%ecx,1),%mm0 + psllq $60,%mm2 + pxor (%eax,%ebx,8),%mm1 + movd %mm0,%ebx + pxor (%esi,%ecx,1),%mm1 + pxor %mm2,%mm0 + psrlq $4,%mm0 + andl $15,%ebx + movq %mm1,%mm2 + psrlq $4,%mm1 + pxor 8(%esi,%edx,1),%mm0 + psllq $60,%mm2 + pxor (%eax,%ebx,8),%mm1 + movd %mm0,%ebx + pxor (%esi,%edx,1),%mm1 + pxor %mm2,%mm0 + psrlq $32,%mm0 + movd %mm1,%edx + psrlq $32,%mm1 + movd %mm0,%ecx + movd %mm1,%ebp + bswap %ebx + bswap %edx + bswap %ecx + bswap %ebp + emms + movl %ebx,12(%edi) + movl %edx,4(%edi) + movl %ecx,8(%edi) + movl %ebp,(%edi) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _gcm_ghash_4bit_mmx +.private_extern _gcm_ghash_4bit_mmx +.align 4 +_gcm_ghash_4bit_mmx: +L_gcm_ghash_4bit_mmx_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%eax + movl 24(%esp),%ebx + movl 28(%esp),%ecx + movl 32(%esp),%edx + movl %esp,%ebp + call L008pic_point +L008pic_point: + popl %esi + leal Lrem_8bit-L008pic_point(%esi),%esi + subl $544,%esp + andl $-64,%esp + subl $16,%esp + addl %ecx,%edx + movl %eax,544(%esp) + movl %edx,552(%esp) + movl %ebp,556(%esp) + addl $128,%ebx + leal 144(%esp),%edi + leal 400(%esp),%ebp + movl -120(%ebx),%edx + movq -120(%ebx),%mm0 + movq -128(%ebx),%mm3 + shll $4,%edx + movb %dl,(%esp) + movl -104(%ebx),%edx + movq -104(%ebx),%mm2 + movq -112(%ebx),%mm5 + movq %mm0,-128(%edi) + psrlq $4,%mm0 + movq %mm3,(%edi) + movq %mm3,%mm7 + psrlq $4,%mm3 + shll $4,%edx + movb %dl,1(%esp) + movl -88(%ebx),%edx + movq -88(%ebx),%mm1 + psllq $60,%mm7 + movq -96(%ebx),%mm4 + por %mm7,%mm0 + movq %mm2,-120(%edi) + psrlq $4,%mm2 + movq %mm5,8(%edi) + movq %mm5,%mm6 + movq %mm0,-128(%ebp) + psrlq $4,%mm5 + movq %mm3,(%ebp) + shll $4,%edx + movb %dl,2(%esp) + movl -72(%ebx),%edx + movq -72(%ebx),%mm0 + psllq $60,%mm6 + movq -80(%ebx),%mm3 + por %mm6,%mm2 + movq %mm1,-112(%edi) + psrlq $4,%mm1 + movq %mm4,16(%edi) + movq %mm4,%mm7 + movq %mm2,-120(%ebp) + psrlq $4,%mm4 + movq %mm5,8(%ebp) + shll $4,%edx + movb %dl,3(%esp) + movl -56(%ebx),%edx + movq -56(%ebx),%mm2 + psllq $60,%mm7 + movq -64(%ebx),%mm5 + por %mm7,%mm1 + movq %mm0,-104(%edi) + psrlq $4,%mm0 + movq %mm3,24(%edi) + movq %mm3,%mm6 + movq %mm1,-112(%ebp) + psrlq $4,%mm3 + movq %mm4,16(%ebp) + shll $4,%edx + movb %dl,4(%esp) + movl -40(%ebx),%edx + movq -40(%ebx),%mm1 + psllq $60,%mm6 + movq -48(%ebx),%mm4 + por %mm6,%mm0 + movq %mm2,-96(%edi) + psrlq $4,%mm2 + movq %mm5,32(%edi) + movq %mm5,%mm7 + movq %mm0,-104(%ebp) + psrlq $4,%mm5 + movq %mm3,24(%ebp) + shll $4,%edx + movb %dl,5(%esp) + movl -24(%ebx),%edx + movq -24(%ebx),%mm0 + psllq $60,%mm7 + movq -32(%ebx),%mm3 + por %mm7,%mm2 + movq %mm1,-88(%edi) + psrlq $4,%mm1 + movq %mm4,40(%edi) + movq %mm4,%mm6 + movq %mm2,-96(%ebp) + psrlq $4,%mm4 + movq %mm5,32(%ebp) + shll $4,%edx + movb %dl,6(%esp) + movl -8(%ebx),%edx + movq -8(%ebx),%mm2 + psllq $60,%mm6 + movq -16(%ebx),%mm5 + por %mm6,%mm1 + movq %mm0,-80(%edi) + psrlq $4,%mm0 + movq %mm3,48(%edi) + movq %mm3,%mm7 + movq %mm1,-88(%ebp) + psrlq $4,%mm3 + movq %mm4,40(%ebp) + shll $4,%edx + movb %dl,7(%esp) + movl 8(%ebx),%edx + movq 8(%ebx),%mm1 + psllq $60,%mm7 + movq (%ebx),%mm4 + por %mm7,%mm0 + movq %mm2,-72(%edi) + psrlq $4,%mm2 + movq %mm5,56(%edi) + movq %mm5,%mm6 + movq %mm0,-80(%ebp) + psrlq $4,%mm5 + movq %mm3,48(%ebp) + shll $4,%edx + movb %dl,8(%esp) + movl 24(%ebx),%edx + movq 24(%ebx),%mm0 + psllq $60,%mm6 + movq 16(%ebx),%mm3 + por %mm6,%mm2 + movq %mm1,-64(%edi) + psrlq $4,%mm1 + movq %mm4,64(%edi) + movq %mm4,%mm7 + movq %mm2,-72(%ebp) + psrlq $4,%mm4 + movq %mm5,56(%ebp) + shll $4,%edx + movb %dl,9(%esp) + movl 40(%ebx),%edx + movq 40(%ebx),%mm2 + psllq $60,%mm7 + movq 32(%ebx),%mm5 + por %mm7,%mm1 + movq %mm0,-56(%edi) + psrlq $4,%mm0 + movq %mm3,72(%edi) + movq %mm3,%mm6 + movq %mm1,-64(%ebp) + psrlq $4,%mm3 + movq %mm4,64(%ebp) + shll $4,%edx + movb %dl,10(%esp) + movl 56(%ebx),%edx + movq 56(%ebx),%mm1 + psllq $60,%mm6 + movq 48(%ebx),%mm4 + por %mm6,%mm0 + movq %mm2,-48(%edi) + psrlq $4,%mm2 + movq %mm5,80(%edi) + movq %mm5,%mm7 + movq %mm0,-56(%ebp) + psrlq $4,%mm5 + movq %mm3,72(%ebp) + shll $4,%edx + movb %dl,11(%esp) + movl 72(%ebx),%edx + movq 72(%ebx),%mm0 + psllq $60,%mm7 + movq 64(%ebx),%mm3 + por %mm7,%mm2 + movq %mm1,-40(%edi) + psrlq $4,%mm1 + movq %mm4,88(%edi) + movq %mm4,%mm6 + movq %mm2,-48(%ebp) + psrlq $4,%mm4 + movq %mm5,80(%ebp) + shll $4,%edx + movb %dl,12(%esp) + movl 88(%ebx),%edx + movq 88(%ebx),%mm2 + psllq $60,%mm6 + movq 80(%ebx),%mm5 + por %mm6,%mm1 + movq %mm0,-32(%edi) + psrlq $4,%mm0 + movq %mm3,96(%edi) + movq %mm3,%mm7 + movq %mm1,-40(%ebp) + psrlq $4,%mm3 + movq %mm4,88(%ebp) + shll $4,%edx + movb %dl,13(%esp) + movl 104(%ebx),%edx + movq 104(%ebx),%mm1 + psllq $60,%mm7 + movq 96(%ebx),%mm4 + por %mm7,%mm0 + movq %mm2,-24(%edi) + psrlq $4,%mm2 + movq %mm5,104(%edi) + movq %mm5,%mm6 + movq %mm0,-32(%ebp) + psrlq $4,%mm5 + movq %mm3,96(%ebp) + shll $4,%edx + movb %dl,14(%esp) + movl 120(%ebx),%edx + movq 120(%ebx),%mm0 + psllq $60,%mm6 + movq 112(%ebx),%mm3 + por %mm6,%mm2 + movq %mm1,-16(%edi) + psrlq $4,%mm1 + movq %mm4,112(%edi) + movq %mm4,%mm7 + movq %mm2,-24(%ebp) + psrlq $4,%mm4 + movq %mm5,104(%ebp) + shll $4,%edx + movb %dl,15(%esp) + psllq $60,%mm7 + por %mm7,%mm1 + movq %mm0,-8(%edi) + psrlq $4,%mm0 + movq %mm3,120(%edi) + movq %mm3,%mm6 + movq %mm1,-16(%ebp) + psrlq $4,%mm3 + movq %mm4,112(%ebp) + psllq $60,%mm6 + por %mm6,%mm0 + movq %mm0,-8(%ebp) + movq %mm3,120(%ebp) + movq (%eax),%mm6 + movl 8(%eax),%ebx + movl 12(%eax),%edx +.align 4,0x90 +L009outer: + xorl 12(%ecx),%edx + xorl 8(%ecx),%ebx + pxor (%ecx),%mm6 + leal 16(%ecx),%ecx + movl %ebx,536(%esp) + movq %mm6,528(%esp) + movl %ecx,548(%esp) + xorl %eax,%eax + roll $8,%edx + movb %dl,%al + movl %eax,%ebp + andb $15,%al + shrl $4,%ebp + pxor %mm0,%mm0 + roll $8,%edx + pxor %mm1,%mm1 + pxor %mm2,%mm2 + movq 16(%esp,%eax,8),%mm7 + movq 144(%esp,%eax,8),%mm6 + movb %dl,%al + movd %mm7,%ebx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%edi + psrlq $8,%mm6 + pxor 272(%esp,%ebp,8),%mm7 + andb $15,%al + psllq $56,%mm3 + shrl $4,%edi + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%ebp,8),%mm6 + xorb (%esp,%ebp,1),%bl + movb %dl,%al + movd %mm7,%ecx + movzbl %bl,%ebx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%ebp + psrlq $8,%mm6 + pxor 272(%esp,%edi,8),%mm7 + andb $15,%al + psllq $56,%mm3 + shrl $4,%ebp + pinsrw $2,(%esi,%ebx,2),%mm2 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%edi,8),%mm6 + xorb (%esp,%edi,1),%cl + movb %dl,%al + movl 536(%esp),%edx + movd %mm7,%ebx + movzbl %cl,%ecx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%edi + psrlq $8,%mm6 + pxor 272(%esp,%ebp,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm2,%mm6 + shrl $4,%edi + pinsrw $2,(%esi,%ecx,2),%mm1 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%ebp,8),%mm6 + xorb (%esp,%ebp,1),%bl + movb %dl,%al + movd %mm7,%ecx + movzbl %bl,%ebx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%ebp + psrlq $8,%mm6 + pxor 272(%esp,%edi,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm1,%mm6 + shrl $4,%ebp + pinsrw $2,(%esi,%ebx,2),%mm0 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%edi,8),%mm6 + xorb (%esp,%edi,1),%cl + movb %dl,%al + movd %mm7,%ebx + movzbl %cl,%ecx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%edi + psrlq $8,%mm6 + pxor 272(%esp,%ebp,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm0,%mm6 + shrl $4,%edi + pinsrw $2,(%esi,%ecx,2),%mm2 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%ebp,8),%mm6 + xorb (%esp,%ebp,1),%bl + movb %dl,%al + movd %mm7,%ecx + movzbl %bl,%ebx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%ebp + psrlq $8,%mm6 + pxor 272(%esp,%edi,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm2,%mm6 + shrl $4,%ebp + pinsrw $2,(%esi,%ebx,2),%mm1 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%edi,8),%mm6 + xorb (%esp,%edi,1),%cl + movb %dl,%al + movl 532(%esp),%edx + movd %mm7,%ebx + movzbl %cl,%ecx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%edi + psrlq $8,%mm6 + pxor 272(%esp,%ebp,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm1,%mm6 + shrl $4,%edi + pinsrw $2,(%esi,%ecx,2),%mm0 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%ebp,8),%mm6 + xorb (%esp,%ebp,1),%bl + movb %dl,%al + movd %mm7,%ecx + movzbl %bl,%ebx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%ebp + psrlq $8,%mm6 + pxor 272(%esp,%edi,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm0,%mm6 + shrl $4,%ebp + pinsrw $2,(%esi,%ebx,2),%mm2 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%edi,8),%mm6 + xorb (%esp,%edi,1),%cl + movb %dl,%al + movd %mm7,%ebx + movzbl %cl,%ecx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%edi + psrlq $8,%mm6 + pxor 272(%esp,%ebp,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm2,%mm6 + shrl $4,%edi + pinsrw $2,(%esi,%ecx,2),%mm1 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%ebp,8),%mm6 + xorb (%esp,%ebp,1),%bl + movb %dl,%al + movd %mm7,%ecx + movzbl %bl,%ebx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%ebp + psrlq $8,%mm6 + pxor 272(%esp,%edi,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm1,%mm6 + shrl $4,%ebp + pinsrw $2,(%esi,%ebx,2),%mm0 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%edi,8),%mm6 + xorb (%esp,%edi,1),%cl + movb %dl,%al + movl 528(%esp),%edx + movd %mm7,%ebx + movzbl %cl,%ecx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%edi + psrlq $8,%mm6 + pxor 272(%esp,%ebp,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm0,%mm6 + shrl $4,%edi + pinsrw $2,(%esi,%ecx,2),%mm2 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%ebp,8),%mm6 + xorb (%esp,%ebp,1),%bl + movb %dl,%al + movd %mm7,%ecx + movzbl %bl,%ebx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%ebp + psrlq $8,%mm6 + pxor 272(%esp,%edi,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm2,%mm6 + shrl $4,%ebp + pinsrw $2,(%esi,%ebx,2),%mm1 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%edi,8),%mm6 + xorb (%esp,%edi,1),%cl + movb %dl,%al + movd %mm7,%ebx + movzbl %cl,%ecx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%edi + psrlq $8,%mm6 + pxor 272(%esp,%ebp,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm1,%mm6 + shrl $4,%edi + pinsrw $2,(%esi,%ecx,2),%mm0 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%ebp,8),%mm6 + xorb (%esp,%ebp,1),%bl + movb %dl,%al + movd %mm7,%ecx + movzbl %bl,%ebx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%ebp + psrlq $8,%mm6 + pxor 272(%esp,%edi,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm0,%mm6 + shrl $4,%ebp + pinsrw $2,(%esi,%ebx,2),%mm2 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%edi,8),%mm6 + xorb (%esp,%edi,1),%cl + movb %dl,%al + movl 524(%esp),%edx + movd %mm7,%ebx + movzbl %cl,%ecx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%edi + psrlq $8,%mm6 + pxor 272(%esp,%ebp,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm2,%mm6 + shrl $4,%edi + pinsrw $2,(%esi,%ecx,2),%mm1 + pxor 16(%esp,%eax,8),%mm7 + pxor 144(%esp,%eax,8),%mm6 + xorb (%esp,%ebp,1),%bl + pxor %mm3,%mm7 + pxor 400(%esp,%ebp,8),%mm6 + movzbl %bl,%ebx + pxor %mm2,%mm2 + psllq $4,%mm1 + movd %mm7,%ecx + psrlq $4,%mm7 + movq %mm6,%mm3 + psrlq $4,%mm6 + shll $4,%ecx + pxor 16(%esp,%edi,8),%mm7 + psllq $60,%mm3 + movzbl %cl,%ecx + pxor %mm3,%mm7 + pxor 144(%esp,%edi,8),%mm6 + pinsrw $2,(%esi,%ebx,2),%mm0 + pxor %mm1,%mm6 + movd %mm7,%edx + pinsrw $3,(%esi,%ecx,2),%mm2 + psllq $12,%mm0 + pxor %mm0,%mm6 + psrlq $32,%mm7 + pxor %mm2,%mm6 + movl 548(%esp),%ecx + movd %mm7,%ebx + movq %mm6,%mm3 + psllw $8,%mm6 + psrlw $8,%mm3 + por %mm3,%mm6 + bswap %edx + pshufw $27,%mm6,%mm6 + bswap %ebx + cmpl 552(%esp),%ecx + jne L009outer + movl 544(%esp),%eax + movl %edx,12(%eax) + movl %ebx,8(%eax) + movq %mm6,(%eax) + movl 556(%esp),%esp + emms + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _gcm_init_clmul +.private_extern _gcm_init_clmul +.align 4 +_gcm_init_clmul: +L_gcm_init_clmul_begin: + movl 4(%esp),%edx + movl 8(%esp),%eax + call L010pic +L010pic: + popl %ecx + leal Lbswap-L010pic(%ecx),%ecx + movdqu (%eax),%xmm2 + pshufd $78,%xmm2,%xmm2 + pshufd $255,%xmm2,%xmm4 + movdqa %xmm2,%xmm3 + psllq $1,%xmm2 + pxor %xmm5,%xmm5 + psrlq $63,%xmm3 + pcmpgtd %xmm4,%xmm5 + pslldq $8,%xmm3 + por %xmm3,%xmm2 + pand 16(%ecx),%xmm5 + pxor %xmm5,%xmm2 + movdqa %xmm2,%xmm0 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pshufd $78,%xmm2,%xmm4 + pxor %xmm0,%xmm3 + pxor %xmm2,%xmm4 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,220,0 + xorps %xmm0,%xmm3 + xorps %xmm1,%xmm3 + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 + pshufd $78,%xmm2,%xmm3 + pshufd $78,%xmm0,%xmm4 + pxor %xmm2,%xmm3 + movdqu %xmm2,(%edx) + pxor %xmm0,%xmm4 + movdqu %xmm0,16(%edx) +.byte 102,15,58,15,227,8 + movdqu %xmm4,32(%edx) + ret +.globl _gcm_gmult_clmul +.private_extern _gcm_gmult_clmul +.align 4 +_gcm_gmult_clmul: +L_gcm_gmult_clmul_begin: + movl 4(%esp),%eax + movl 8(%esp),%edx + call L011pic +L011pic: + popl %ecx + leal Lbswap-L011pic(%ecx),%ecx + movdqu (%eax),%xmm0 + movdqa (%ecx),%xmm5 + movups (%edx),%xmm2 +.byte 102,15,56,0,197 + movups 32(%edx),%xmm4 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pxor %xmm0,%xmm3 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,220,0 + xorps %xmm0,%xmm3 + xorps %xmm1,%xmm3 + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 +.byte 102,15,56,0,197 + movdqu %xmm0,(%eax) + ret +.globl _gcm_ghash_clmul +.private_extern _gcm_ghash_clmul +.align 4 +_gcm_ghash_clmul: +L_gcm_ghash_clmul_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%eax + movl 24(%esp),%edx + movl 28(%esp),%esi + movl 32(%esp),%ebx + call L012pic +L012pic: + popl %ecx + leal Lbswap-L012pic(%ecx),%ecx + movdqu (%eax),%xmm0 + movdqa (%ecx),%xmm5 + movdqu (%edx),%xmm2 +.byte 102,15,56,0,197 + subl $16,%ebx + jz L013odd_tail + movdqu (%esi),%xmm3 + movdqu 16(%esi),%xmm6 +.byte 102,15,56,0,221 +.byte 102,15,56,0,245 + movdqu 32(%edx),%xmm5 + pxor %xmm3,%xmm0 + pshufd $78,%xmm6,%xmm3 + movdqa %xmm6,%xmm7 + pxor %xmm6,%xmm3 + leal 32(%esi),%esi +.byte 102,15,58,68,242,0 +.byte 102,15,58,68,250,17 +.byte 102,15,58,68,221,0 + movups 16(%edx),%xmm2 + nop + subl $32,%ebx + jbe L014even_tail + jmp L015mod_loop +.align 5,0x90 +L015mod_loop: + pshufd $78,%xmm0,%xmm4 + movdqa %xmm0,%xmm1 + pxor %xmm0,%xmm4 + nop +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,229,16 + movups (%edx),%xmm2 + xorps %xmm6,%xmm0 + movdqa (%ecx),%xmm5 + xorps %xmm7,%xmm1 + movdqu (%esi),%xmm7 + pxor %xmm0,%xmm3 + movdqu 16(%esi),%xmm6 + pxor %xmm1,%xmm3 +.byte 102,15,56,0,253 + pxor %xmm3,%xmm4 + movdqa %xmm4,%xmm3 + psrldq $8,%xmm4 + pslldq $8,%xmm3 + pxor %xmm4,%xmm1 + pxor %xmm3,%xmm0 +.byte 102,15,56,0,245 + pxor %xmm7,%xmm1 + movdqa %xmm6,%xmm7 + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 +.byte 102,15,58,68,242,0 + movups 32(%edx),%xmm5 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + pshufd $78,%xmm7,%xmm3 + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm7,%xmm3 + pxor %xmm4,%xmm1 +.byte 102,15,58,68,250,17 + movups 16(%edx),%xmm2 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 +.byte 102,15,58,68,221,0 + leal 32(%esi),%esi + subl $32,%ebx + ja L015mod_loop +L014even_tail: + pshufd $78,%xmm0,%xmm4 + movdqa %xmm0,%xmm1 + pxor %xmm0,%xmm4 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,229,16 + movdqa (%ecx),%xmm5 + xorps %xmm6,%xmm0 + xorps %xmm7,%xmm1 + pxor %xmm0,%xmm3 + pxor %xmm1,%xmm3 + pxor %xmm3,%xmm4 + movdqa %xmm4,%xmm3 + psrldq $8,%xmm4 + pslldq $8,%xmm3 + pxor %xmm4,%xmm1 + pxor %xmm3,%xmm0 + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 + testl %ebx,%ebx + jnz L016done + movups (%edx),%xmm2 +L013odd_tail: + movdqu (%esi),%xmm3 +.byte 102,15,56,0,221 + pxor %xmm3,%xmm0 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pshufd $78,%xmm2,%xmm4 + pxor %xmm0,%xmm3 + pxor %xmm2,%xmm4 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,220,0 + xorps %xmm0,%xmm3 + xorps %xmm1,%xmm3 + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 +L016done: +.byte 102,15,56,0,197 + movdqu %xmm0,(%eax) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 6,0x90 +Lbswap: +.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194 +.align 6,0x90 +Lrem_8bit: +.value 0,450,900,582,1800,1738,1164,1358 +.value 3600,4050,3476,3158,2328,2266,2716,2910 +.value 7200,7650,8100,7782,6952,6890,6316,6510 +.value 4656,5106,4532,4214,5432,5370,5820,6014 +.value 14400,14722,15300,14854,16200,16010,15564,15630 +.value 13904,14226,13780,13334,12632,12442,13020,13086 +.value 9312,9634,10212,9766,9064,8874,8428,8494 +.value 10864,11186,10740,10294,11640,11450,12028,12094 +.value 28800,28994,29444,29382,30600,30282,29708,30158 +.value 32400,32594,32020,31958,31128,30810,31260,31710 +.value 27808,28002,28452,28390,27560,27242,26668,27118 +.value 25264,25458,24884,24822,26040,25722,26172,26622 +.value 18624,18690,19268,19078,20424,19978,19532,19854 +.value 18128,18194,17748,17558,16856,16410,16988,17310 +.value 21728,21794,22372,22182,21480,21034,20588,20910 +.value 23280,23346,22900,22710,24056,23610,24188,24510 +.value 57600,57538,57988,58182,58888,59338,58764,58446 +.value 61200,61138,60564,60758,59416,59866,60316,59998 +.value 64800,64738,65188,65382,64040,64490,63916,63598 +.value 62256,62194,61620,61814,62520,62970,63420,63102 +.value 55616,55426,56004,56070,56904,57226,56780,56334 +.value 55120,54930,54484,54550,53336,53658,54236,53790 +.value 50528,50338,50916,50982,49768,50090,49644,49198 +.value 52080,51890,51444,51510,52344,52666,53244,52798 +.value 37248,36930,37380,37830,38536,38730,38156,38094 +.value 40848,40530,39956,40406,39064,39258,39708,39646 +.value 36256,35938,36388,36838,35496,35690,35116,35054 +.value 33712,33394,32820,33270,33976,34170,34620,34558 +.value 43456,43010,43588,43910,44744,44810,44364,44174 +.value 42960,42514,42068,42390,41176,41242,41820,41630 +.value 46560,46114,46692,47014,45800,45866,45420,45230 +.value 48112,47666,47220,47542,48376,48442,49020,48830 +.align 6,0x90 +Lrem_4bit: +.long 0,0,0,471859200,0,943718400,0,610271232 +.long 0,1887436800,0,1822425088,0,1220542464,0,1423966208 +.long 0,3774873600,0,4246732800,0,3644850176,0,3311403008 +.long 0,2441084928,0,2376073216,0,2847932416,0,3051356160 +.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67 +.byte 82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112 +.byte 112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62 +.byte 0 +#endif diff --git a/third_party/boringssl/mac-x86/crypto/rc4/rc4-586.S b/third_party/boringssl/mac-x86/crypto/rc4/rc4-586.S new file mode 100644 index 00000000000..faecdfa9f2b --- /dev/null +++ b/third_party/boringssl/mac-x86/crypto/rc4/rc4-586.S @@ -0,0 +1,383 @@ +#if defined(__i386__) +.file "rc4-586.S" +.text +.globl _asm_RC4 +.private_extern _asm_RC4 +.align 4 +_asm_RC4: +L_asm_RC4_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%edi + movl 24(%esp),%edx + movl 28(%esp),%esi + movl 32(%esp),%ebp + xorl %eax,%eax + xorl %ebx,%ebx + cmpl $0,%edx + je L000abort + movb (%edi),%al + movb 4(%edi),%bl + addl $8,%edi + leal (%esi,%edx,1),%ecx + subl %esi,%ebp + movl %ecx,24(%esp) + incb %al + cmpl $-1,256(%edi) + je L001RC4_CHAR + movl (%edi,%eax,4),%ecx + andl $-4,%edx + jz L002loop1 + movl %ebp,32(%esp) + testl $-8,%edx + jz L003go4loop4 + call L004PIC_me_up +L004PIC_me_up: + popl %ebp + movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L004PIC_me_up(%ebp),%ebp + btl $26,(%ebp) + jnc L003go4loop4 + movl 32(%esp),%ebp + andl $-8,%edx + leal -8(%esi,%edx,1),%edx + movl %edx,-4(%edi) + addb %cl,%bl + movl (%edi,%ebx,4),%edx + movl %ecx,(%edi,%ebx,4) + movl %edx,(%edi,%eax,4) + incl %eax + addl %ecx,%edx + movzbl %al,%eax + movzbl %dl,%edx + movq (%esi),%mm0 + movl (%edi,%eax,4),%ecx + movd (%edi,%edx,4),%mm2 + jmp L005loop_mmx_enter +.align 4,0x90 +L006loop_mmx: + addb %cl,%bl + psllq $56,%mm1 + movl (%edi,%ebx,4),%edx + movl %ecx,(%edi,%ebx,4) + movl %edx,(%edi,%eax,4) + incl %eax + addl %ecx,%edx + movzbl %al,%eax + movzbl %dl,%edx + pxor %mm1,%mm2 + movq (%esi),%mm0 + movq %mm2,-8(%ebp,%esi,1) + movl (%edi,%eax,4),%ecx + movd (%edi,%edx,4),%mm2 +L005loop_mmx_enter: + addb %cl,%bl + movl (%edi,%ebx,4),%edx + movl %ecx,(%edi,%ebx,4) + movl %edx,(%edi,%eax,4) + incl %eax + addl %ecx,%edx + movzbl %al,%eax + movzbl %dl,%edx + pxor %mm0,%mm2 + movl (%edi,%eax,4),%ecx + movd (%edi,%edx,4),%mm1 + addb %cl,%bl + psllq $8,%mm1 + movl (%edi,%ebx,4),%edx + movl %ecx,(%edi,%ebx,4) + movl %edx,(%edi,%eax,4) + incl %eax + addl %ecx,%edx + movzbl %al,%eax + movzbl %dl,%edx + pxor %mm1,%mm2 + movl (%edi,%eax,4),%ecx + movd (%edi,%edx,4),%mm1 + addb %cl,%bl + psllq $16,%mm1 + movl (%edi,%ebx,4),%edx + movl %ecx,(%edi,%ebx,4) + movl %edx,(%edi,%eax,4) + incl %eax + addl %ecx,%edx + movzbl %al,%eax + movzbl %dl,%edx + pxor %mm1,%mm2 + movl (%edi,%eax,4),%ecx + movd (%edi,%edx,4),%mm1 + addb %cl,%bl + psllq $24,%mm1 + movl (%edi,%ebx,4),%edx + movl %ecx,(%edi,%ebx,4) + movl %edx,(%edi,%eax,4) + incl %eax + addl %ecx,%edx + movzbl %al,%eax + movzbl %dl,%edx + pxor %mm1,%mm2 + movl (%edi,%eax,4),%ecx + movd (%edi,%edx,4),%mm1 + addb %cl,%bl + psllq $32,%mm1 + movl (%edi,%ebx,4),%edx + movl %ecx,(%edi,%ebx,4) + movl %edx,(%edi,%eax,4) + incl %eax + addl %ecx,%edx + movzbl %al,%eax + movzbl %dl,%edx + pxor %mm1,%mm2 + movl (%edi,%eax,4),%ecx + movd (%edi,%edx,4),%mm1 + addb %cl,%bl + psllq $40,%mm1 + movl (%edi,%ebx,4),%edx + movl %ecx,(%edi,%ebx,4) + movl %edx,(%edi,%eax,4) + incl %eax + addl %ecx,%edx + movzbl %al,%eax + movzbl %dl,%edx + pxor %mm1,%mm2 + movl (%edi,%eax,4),%ecx + movd (%edi,%edx,4),%mm1 + addb %cl,%bl + psllq $48,%mm1 + movl (%edi,%ebx,4),%edx + movl %ecx,(%edi,%ebx,4) + movl %edx,(%edi,%eax,4) + incl %eax + addl %ecx,%edx + movzbl %al,%eax + movzbl %dl,%edx + pxor %mm1,%mm2 + movl (%edi,%eax,4),%ecx + movd (%edi,%edx,4),%mm1 + movl %ebx,%edx + xorl %ebx,%ebx + movb %dl,%bl + cmpl -4(%edi),%esi + leal 8(%esi),%esi + jb L006loop_mmx + psllq $56,%mm1 + pxor %mm1,%mm2 + movq %mm2,-8(%ebp,%esi,1) + emms + cmpl 24(%esp),%esi + je L007done + jmp L002loop1 +.align 4,0x90 +L003go4loop4: + leal -4(%esi,%edx,1),%edx + movl %edx,28(%esp) +L008loop4: + addb %cl,%bl + movl (%edi,%ebx,4),%edx + movl %ecx,(%edi,%ebx,4) + movl %edx,(%edi,%eax,4) + addl %ecx,%edx + incb %al + andl $255,%edx + movl (%edi,%eax,4),%ecx + movl (%edi,%edx,4),%ebp + addb %cl,%bl + movl (%edi,%ebx,4),%edx + movl %ecx,(%edi,%ebx,4) + movl %edx,(%edi,%eax,4) + addl %ecx,%edx + incb %al + andl $255,%edx + rorl $8,%ebp + movl (%edi,%eax,4),%ecx + orl (%edi,%edx,4),%ebp + addb %cl,%bl + movl (%edi,%ebx,4),%edx + movl %ecx,(%edi,%ebx,4) + movl %edx,(%edi,%eax,4) + addl %ecx,%edx + incb %al + andl $255,%edx + rorl $8,%ebp + movl (%edi,%eax,4),%ecx + orl (%edi,%edx,4),%ebp + addb %cl,%bl + movl (%edi,%ebx,4),%edx + movl %ecx,(%edi,%ebx,4) + movl %edx,(%edi,%eax,4) + addl %ecx,%edx + incb %al + andl $255,%edx + rorl $8,%ebp + movl 32(%esp),%ecx + orl (%edi,%edx,4),%ebp + rorl $8,%ebp + xorl (%esi),%ebp + cmpl 28(%esp),%esi + movl %ebp,(%ecx,%esi,1) + leal 4(%esi),%esi + movl (%edi,%eax,4),%ecx + jb L008loop4 + cmpl 24(%esp),%esi + je L007done + movl 32(%esp),%ebp +.align 4,0x90 +L002loop1: + addb %cl,%bl + movl (%edi,%ebx,4),%edx + movl %ecx,(%edi,%ebx,4) + movl %edx,(%edi,%eax,4) + addl %ecx,%edx + incb %al + andl $255,%edx + movl (%edi,%edx,4),%edx + xorb (%esi),%dl + leal 1(%esi),%esi + movl (%edi,%eax,4),%ecx + cmpl 24(%esp),%esi + movb %dl,-1(%ebp,%esi,1) + jb L002loop1 + jmp L007done +.align 4,0x90 +L001RC4_CHAR: + movzbl (%edi,%eax,1),%ecx +L009cloop1: + addb %cl,%bl + movzbl (%edi,%ebx,1),%edx + movb %cl,(%edi,%ebx,1) + movb %dl,(%edi,%eax,1) + addb %cl,%dl + movzbl (%edi,%edx,1),%edx + addb $1,%al + xorb (%esi),%dl + leal 1(%esi),%esi + movzbl (%edi,%eax,1),%ecx + cmpl 24(%esp),%esi + movb %dl,-1(%ebp,%esi,1) + jb L009cloop1 +L007done: + decb %al + movl %ebx,-4(%edi) + movb %al,-8(%edi) +L000abort: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _asm_RC4_set_key +.private_extern _asm_RC4_set_key +.align 4 +_asm_RC4_set_key: +L_asm_RC4_set_key_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%edi + movl 24(%esp),%ebp + movl 28(%esp),%esi + call L010PIC_me_up +L010PIC_me_up: + popl %edx + movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L010PIC_me_up(%edx),%edx + leal 8(%edi),%edi + leal (%esi,%ebp,1),%esi + negl %ebp + xorl %eax,%eax + movl %ebp,-4(%edi) + btl $20,(%edx) + jc L011c1stloop +.align 4,0x90 +L012w1stloop: + movl %eax,(%edi,%eax,4) + addb $1,%al + jnc L012w1stloop + xorl %ecx,%ecx + xorl %edx,%edx +.align 4,0x90 +L013w2ndloop: + movl (%edi,%ecx,4),%eax + addb (%esi,%ebp,1),%dl + addb %al,%dl + addl $1,%ebp + movl (%edi,%edx,4),%ebx + jnz L014wnowrap + movl -4(%edi),%ebp +L014wnowrap: + movl %eax,(%edi,%edx,4) + movl %ebx,(%edi,%ecx,4) + addb $1,%cl + jnc L013w2ndloop + jmp L015exit +.align 4,0x90 +L011c1stloop: + movb %al,(%edi,%eax,1) + addb $1,%al + jnc L011c1stloop + xorl %ecx,%ecx + xorl %edx,%edx + xorl %ebx,%ebx +.align 4,0x90 +L016c2ndloop: + movb (%edi,%ecx,1),%al + addb (%esi,%ebp,1),%dl + addb %al,%dl + addl $1,%ebp + movb (%edi,%edx,1),%bl + jnz L017cnowrap + movl -4(%edi),%ebp +L017cnowrap: + movb %al,(%edi,%edx,1) + movb %bl,(%edi,%ecx,1) + addb $1,%cl + jnc L016c2ndloop + movl $-1,256(%edi) +L015exit: + xorl %eax,%eax + movl %eax,-8(%edi) + movl %eax,-4(%edi) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _RC4_options +.private_extern _RC4_options +.align 4 +_RC4_options: +L_RC4_options_begin: + call L018pic_point +L018pic_point: + popl %eax + leal L019opts-L018pic_point(%eax),%eax + call L020PIC_me_up +L020PIC_me_up: + popl %edx + movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L020PIC_me_up(%edx),%edx + movl (%edx),%edx + btl $20,%edx + jc L0211xchar + btl $26,%edx + jnc L022ret + addl $25,%eax + ret +L0211xchar: + addl $12,%eax +L022ret: + ret +.align 6,0x90 +L019opts: +.byte 114,99,52,40,52,120,44,105,110,116,41,0 +.byte 114,99,52,40,49,120,44,99,104,97,114,41,0 +.byte 114,99,52,40,56,120,44,109,109,120,41,0 +.byte 82,67,52,32,102,111,114,32,120,56,54,44,32,67,82,89 +.byte 80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114 +.byte 111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 6,0x90 +.section __IMPORT,__pointers,non_lazy_symbol_pointers +L_OPENSSL_ia32cap_P$non_lazy_ptr: +.indirect_symbol _OPENSSL_ia32cap_P +.long 0 +#endif diff --git a/third_party/boringssl/mac-x86/crypto/sha/sha1-586.S b/third_party/boringssl/mac-x86/crypto/sha/sha1-586.S new file mode 100644 index 00000000000..97aafbf198a --- /dev/null +++ b/third_party/boringssl/mac-x86/crypto/sha/sha1-586.S @@ -0,0 +1,2797 @@ +#if defined(__i386__) +.file "sha1-586.S" +.text +.globl _sha1_block_data_order +.private_extern _sha1_block_data_order +.align 4 +_sha1_block_data_order: +L_sha1_block_data_order_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + call L000pic_point +L000pic_point: + popl %ebp + movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L000pic_point(%ebp),%esi + leal LK_XX_XX-L000pic_point(%ebp),%ebp + movl (%esi),%eax + movl 4(%esi),%edx + testl $512,%edx + jz L001x86 + movl 8(%esi),%ecx + testl $16777216,%eax + jz L001x86 + testl $536870912,%ecx + jnz Lshaext_shortcut + jmp Lssse3_shortcut +.align 4,0x90 +L001x86: + movl 20(%esp),%ebp + movl 24(%esp),%esi + movl 28(%esp),%eax + subl $76,%esp + shll $6,%eax + addl %esi,%eax + movl %eax,104(%esp) + movl 16(%ebp),%edi + jmp L002loop +.align 4,0x90 +L002loop: + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + movl %eax,(%esp) + movl %ebx,4(%esp) + movl %ecx,8(%esp) + movl %edx,12(%esp) + movl 16(%esi),%eax + movl 20(%esi),%ebx + movl 24(%esi),%ecx + movl 28(%esi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + movl %eax,16(%esp) + movl %ebx,20(%esp) + movl %ecx,24(%esp) + movl %edx,28(%esp) + movl 32(%esi),%eax + movl 36(%esi),%ebx + movl 40(%esi),%ecx + movl 44(%esi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + movl %eax,32(%esp) + movl %ebx,36(%esp) + movl %ecx,40(%esp) + movl %edx,44(%esp) + movl 48(%esi),%eax + movl 52(%esi),%ebx + movl 56(%esi),%ecx + movl 60(%esi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + movl %eax,48(%esp) + movl %ebx,52(%esp) + movl %ecx,56(%esp) + movl %edx,60(%esp) + movl %esi,100(%esp) + movl (%ebp),%eax + movl 4(%ebp),%ebx + movl 8(%ebp),%ecx + movl 12(%ebp),%edx + # 00_15 0 + movl %ecx,%esi + movl %eax,%ebp + roll $5,%ebp + xorl %edx,%esi + addl %edi,%ebp + movl (%esp),%edi + andl %ebx,%esi + rorl $2,%ebx + xorl %edx,%esi + leal 1518500249(%ebp,%edi,1),%ebp + addl %esi,%ebp + # 00_15 1 + movl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + xorl %ecx,%edi + addl %edx,%ebp + movl 4(%esp),%edx + andl %eax,%edi + rorl $2,%eax + xorl %ecx,%edi + leal 1518500249(%ebp,%edx,1),%ebp + addl %edi,%ebp + # 00_15 2 + movl %eax,%edx + movl %ebp,%edi + roll $5,%ebp + xorl %ebx,%edx + addl %ecx,%ebp + movl 8(%esp),%ecx + andl %esi,%edx + rorl $2,%esi + xorl %ebx,%edx + leal 1518500249(%ebp,%ecx,1),%ebp + addl %edx,%ebp + # 00_15 3 + movl %esi,%ecx + movl %ebp,%edx + roll $5,%ebp + xorl %eax,%ecx + addl %ebx,%ebp + movl 12(%esp),%ebx + andl %edi,%ecx + rorl $2,%edi + xorl %eax,%ecx + leal 1518500249(%ebp,%ebx,1),%ebp + addl %ecx,%ebp + # 00_15 4 + movl %edi,%ebx + movl %ebp,%ecx + roll $5,%ebp + xorl %esi,%ebx + addl %eax,%ebp + movl 16(%esp),%eax + andl %edx,%ebx + rorl $2,%edx + xorl %esi,%ebx + leal 1518500249(%ebp,%eax,1),%ebp + addl %ebx,%ebp + # 00_15 5 + movl %edx,%eax + movl %ebp,%ebx + roll $5,%ebp + xorl %edi,%eax + addl %esi,%ebp + movl 20(%esp),%esi + andl %ecx,%eax + rorl $2,%ecx + xorl %edi,%eax + leal 1518500249(%ebp,%esi,1),%ebp + addl %eax,%ebp + # 00_15 6 + movl %ecx,%esi + movl %ebp,%eax + roll $5,%ebp + xorl %edx,%esi + addl %edi,%ebp + movl 24(%esp),%edi + andl %ebx,%esi + rorl $2,%ebx + xorl %edx,%esi + leal 1518500249(%ebp,%edi,1),%ebp + addl %esi,%ebp + # 00_15 7 + movl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + xorl %ecx,%edi + addl %edx,%ebp + movl 28(%esp),%edx + andl %eax,%edi + rorl $2,%eax + xorl %ecx,%edi + leal 1518500249(%ebp,%edx,1),%ebp + addl %edi,%ebp + # 00_15 8 + movl %eax,%edx + movl %ebp,%edi + roll $5,%ebp + xorl %ebx,%edx + addl %ecx,%ebp + movl 32(%esp),%ecx + andl %esi,%edx + rorl $2,%esi + xorl %ebx,%edx + leal 1518500249(%ebp,%ecx,1),%ebp + addl %edx,%ebp + # 00_15 9 + movl %esi,%ecx + movl %ebp,%edx + roll $5,%ebp + xorl %eax,%ecx + addl %ebx,%ebp + movl 36(%esp),%ebx + andl %edi,%ecx + rorl $2,%edi + xorl %eax,%ecx + leal 1518500249(%ebp,%ebx,1),%ebp + addl %ecx,%ebp + # 00_15 10 + movl %edi,%ebx + movl %ebp,%ecx + roll $5,%ebp + xorl %esi,%ebx + addl %eax,%ebp + movl 40(%esp),%eax + andl %edx,%ebx + rorl $2,%edx + xorl %esi,%ebx + leal 1518500249(%ebp,%eax,1),%ebp + addl %ebx,%ebp + # 00_15 11 + movl %edx,%eax + movl %ebp,%ebx + roll $5,%ebp + xorl %edi,%eax + addl %esi,%ebp + movl 44(%esp),%esi + andl %ecx,%eax + rorl $2,%ecx + xorl %edi,%eax + leal 1518500249(%ebp,%esi,1),%ebp + addl %eax,%ebp + # 00_15 12 + movl %ecx,%esi + movl %ebp,%eax + roll $5,%ebp + xorl %edx,%esi + addl %edi,%ebp + movl 48(%esp),%edi + andl %ebx,%esi + rorl $2,%ebx + xorl %edx,%esi + leal 1518500249(%ebp,%edi,1),%ebp + addl %esi,%ebp + # 00_15 13 + movl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + xorl %ecx,%edi + addl %edx,%ebp + movl 52(%esp),%edx + andl %eax,%edi + rorl $2,%eax + xorl %ecx,%edi + leal 1518500249(%ebp,%edx,1),%ebp + addl %edi,%ebp + # 00_15 14 + movl %eax,%edx + movl %ebp,%edi + roll $5,%ebp + xorl %ebx,%edx + addl %ecx,%ebp + movl 56(%esp),%ecx + andl %esi,%edx + rorl $2,%esi + xorl %ebx,%edx + leal 1518500249(%ebp,%ecx,1),%ebp + addl %edx,%ebp + # 00_15 15 + movl %esi,%ecx + movl %ebp,%edx + roll $5,%ebp + xorl %eax,%ecx + addl %ebx,%ebp + movl 60(%esp),%ebx + andl %edi,%ecx + rorl $2,%edi + xorl %eax,%ecx + leal 1518500249(%ebp,%ebx,1),%ebp + movl (%esp),%ebx + addl %ebp,%ecx + # 16_19 16 + movl %edi,%ebp + xorl 8(%esp),%ebx + xorl %esi,%ebp + xorl 32(%esp),%ebx + andl %edx,%ebp + xorl 52(%esp),%ebx + roll $1,%ebx + xorl %esi,%ebp + addl %ebp,%eax + movl %ecx,%ebp + rorl $2,%edx + movl %ebx,(%esp) + roll $5,%ebp + leal 1518500249(%ebx,%eax,1),%ebx + movl 4(%esp),%eax + addl %ebp,%ebx + # 16_19 17 + movl %edx,%ebp + xorl 12(%esp),%eax + xorl %edi,%ebp + xorl 36(%esp),%eax + andl %ecx,%ebp + xorl 56(%esp),%eax + roll $1,%eax + xorl %edi,%ebp + addl %ebp,%esi + movl %ebx,%ebp + rorl $2,%ecx + movl %eax,4(%esp) + roll $5,%ebp + leal 1518500249(%eax,%esi,1),%eax + movl 8(%esp),%esi + addl %ebp,%eax + # 16_19 18 + movl %ecx,%ebp + xorl 16(%esp),%esi + xorl %edx,%ebp + xorl 40(%esp),%esi + andl %ebx,%ebp + xorl 60(%esp),%esi + roll $1,%esi + xorl %edx,%ebp + addl %ebp,%edi + movl %eax,%ebp + rorl $2,%ebx + movl %esi,8(%esp) + roll $5,%ebp + leal 1518500249(%esi,%edi,1),%esi + movl 12(%esp),%edi + addl %ebp,%esi + # 16_19 19 + movl %ebx,%ebp + xorl 20(%esp),%edi + xorl %ecx,%ebp + xorl 44(%esp),%edi + andl %eax,%ebp + xorl (%esp),%edi + roll $1,%edi + xorl %ecx,%ebp + addl %ebp,%edx + movl %esi,%ebp + rorl $2,%eax + movl %edi,12(%esp) + roll $5,%ebp + leal 1518500249(%edi,%edx,1),%edi + movl 16(%esp),%edx + addl %ebp,%edi + # 20_39 20 + movl %esi,%ebp + xorl 24(%esp),%edx + xorl %eax,%ebp + xorl 48(%esp),%edx + xorl %ebx,%ebp + xorl 4(%esp),%edx + roll $1,%edx + addl %ebp,%ecx + rorl $2,%esi + movl %edi,%ebp + roll $5,%ebp + movl %edx,16(%esp) + leal 1859775393(%edx,%ecx,1),%edx + movl 20(%esp),%ecx + addl %ebp,%edx + # 20_39 21 + movl %edi,%ebp + xorl 28(%esp),%ecx + xorl %esi,%ebp + xorl 52(%esp),%ecx + xorl %eax,%ebp + xorl 8(%esp),%ecx + roll $1,%ecx + addl %ebp,%ebx + rorl $2,%edi + movl %edx,%ebp + roll $5,%ebp + movl %ecx,20(%esp) + leal 1859775393(%ecx,%ebx,1),%ecx + movl 24(%esp),%ebx + addl %ebp,%ecx + # 20_39 22 + movl %edx,%ebp + xorl 32(%esp),%ebx + xorl %edi,%ebp + xorl 56(%esp),%ebx + xorl %esi,%ebp + xorl 12(%esp),%ebx + roll $1,%ebx + addl %ebp,%eax + rorl $2,%edx + movl %ecx,%ebp + roll $5,%ebp + movl %ebx,24(%esp) + leal 1859775393(%ebx,%eax,1),%ebx + movl 28(%esp),%eax + addl %ebp,%ebx + # 20_39 23 + movl %ecx,%ebp + xorl 36(%esp),%eax + xorl %edx,%ebp + xorl 60(%esp),%eax + xorl %edi,%ebp + xorl 16(%esp),%eax + roll $1,%eax + addl %ebp,%esi + rorl $2,%ecx + movl %ebx,%ebp + roll $5,%ebp + movl %eax,28(%esp) + leal 1859775393(%eax,%esi,1),%eax + movl 32(%esp),%esi + addl %ebp,%eax + # 20_39 24 + movl %ebx,%ebp + xorl 40(%esp),%esi + xorl %ecx,%ebp + xorl (%esp),%esi + xorl %edx,%ebp + xorl 20(%esp),%esi + roll $1,%esi + addl %ebp,%edi + rorl $2,%ebx + movl %eax,%ebp + roll $5,%ebp + movl %esi,32(%esp) + leal 1859775393(%esi,%edi,1),%esi + movl 36(%esp),%edi + addl %ebp,%esi + # 20_39 25 + movl %eax,%ebp + xorl 44(%esp),%edi + xorl %ebx,%ebp + xorl 4(%esp),%edi + xorl %ecx,%ebp + xorl 24(%esp),%edi + roll $1,%edi + addl %ebp,%edx + rorl $2,%eax + movl %esi,%ebp + roll $5,%ebp + movl %edi,36(%esp) + leal 1859775393(%edi,%edx,1),%edi + movl 40(%esp),%edx + addl %ebp,%edi + # 20_39 26 + movl %esi,%ebp + xorl 48(%esp),%edx + xorl %eax,%ebp + xorl 8(%esp),%edx + xorl %ebx,%ebp + xorl 28(%esp),%edx + roll $1,%edx + addl %ebp,%ecx + rorl $2,%esi + movl %edi,%ebp + roll $5,%ebp + movl %edx,40(%esp) + leal 1859775393(%edx,%ecx,1),%edx + movl 44(%esp),%ecx + addl %ebp,%edx + # 20_39 27 + movl %edi,%ebp + xorl 52(%esp),%ecx + xorl %esi,%ebp + xorl 12(%esp),%ecx + xorl %eax,%ebp + xorl 32(%esp),%ecx + roll $1,%ecx + addl %ebp,%ebx + rorl $2,%edi + movl %edx,%ebp + roll $5,%ebp + movl %ecx,44(%esp) + leal 1859775393(%ecx,%ebx,1),%ecx + movl 48(%esp),%ebx + addl %ebp,%ecx + # 20_39 28 + movl %edx,%ebp + xorl 56(%esp),%ebx + xorl %edi,%ebp + xorl 16(%esp),%ebx + xorl %esi,%ebp + xorl 36(%esp),%ebx + roll $1,%ebx + addl %ebp,%eax + rorl $2,%edx + movl %ecx,%ebp + roll $5,%ebp + movl %ebx,48(%esp) + leal 1859775393(%ebx,%eax,1),%ebx + movl 52(%esp),%eax + addl %ebp,%ebx + # 20_39 29 + movl %ecx,%ebp + xorl 60(%esp),%eax + xorl %edx,%ebp + xorl 20(%esp),%eax + xorl %edi,%ebp + xorl 40(%esp),%eax + roll $1,%eax + addl %ebp,%esi + rorl $2,%ecx + movl %ebx,%ebp + roll $5,%ebp + movl %eax,52(%esp) + leal 1859775393(%eax,%esi,1),%eax + movl 56(%esp),%esi + addl %ebp,%eax + # 20_39 30 + movl %ebx,%ebp + xorl (%esp),%esi + xorl %ecx,%ebp + xorl 24(%esp),%esi + xorl %edx,%ebp + xorl 44(%esp),%esi + roll $1,%esi + addl %ebp,%edi + rorl $2,%ebx + movl %eax,%ebp + roll $5,%ebp + movl %esi,56(%esp) + leal 1859775393(%esi,%edi,1),%esi + movl 60(%esp),%edi + addl %ebp,%esi + # 20_39 31 + movl %eax,%ebp + xorl 4(%esp),%edi + xorl %ebx,%ebp + xorl 28(%esp),%edi + xorl %ecx,%ebp + xorl 48(%esp),%edi + roll $1,%edi + addl %ebp,%edx + rorl $2,%eax + movl %esi,%ebp + roll $5,%ebp + movl %edi,60(%esp) + leal 1859775393(%edi,%edx,1),%edi + movl (%esp),%edx + addl %ebp,%edi + # 20_39 32 + movl %esi,%ebp + xorl 8(%esp),%edx + xorl %eax,%ebp + xorl 32(%esp),%edx + xorl %ebx,%ebp + xorl 52(%esp),%edx + roll $1,%edx + addl %ebp,%ecx + rorl $2,%esi + movl %edi,%ebp + roll $5,%ebp + movl %edx,(%esp) + leal 1859775393(%edx,%ecx,1),%edx + movl 4(%esp),%ecx + addl %ebp,%edx + # 20_39 33 + movl %edi,%ebp + xorl 12(%esp),%ecx + xorl %esi,%ebp + xorl 36(%esp),%ecx + xorl %eax,%ebp + xorl 56(%esp),%ecx + roll $1,%ecx + addl %ebp,%ebx + rorl $2,%edi + movl %edx,%ebp + roll $5,%ebp + movl %ecx,4(%esp) + leal 1859775393(%ecx,%ebx,1),%ecx + movl 8(%esp),%ebx + addl %ebp,%ecx + # 20_39 34 + movl %edx,%ebp + xorl 16(%esp),%ebx + xorl %edi,%ebp + xorl 40(%esp),%ebx + xorl %esi,%ebp + xorl 60(%esp),%ebx + roll $1,%ebx + addl %ebp,%eax + rorl $2,%edx + movl %ecx,%ebp + roll $5,%ebp + movl %ebx,8(%esp) + leal 1859775393(%ebx,%eax,1),%ebx + movl 12(%esp),%eax + addl %ebp,%ebx + # 20_39 35 + movl %ecx,%ebp + xorl 20(%esp),%eax + xorl %edx,%ebp + xorl 44(%esp),%eax + xorl %edi,%ebp + xorl (%esp),%eax + roll $1,%eax + addl %ebp,%esi + rorl $2,%ecx + movl %ebx,%ebp + roll $5,%ebp + movl %eax,12(%esp) + leal 1859775393(%eax,%esi,1),%eax + movl 16(%esp),%esi + addl %ebp,%eax + # 20_39 36 + movl %ebx,%ebp + xorl 24(%esp),%esi + xorl %ecx,%ebp + xorl 48(%esp),%esi + xorl %edx,%ebp + xorl 4(%esp),%esi + roll $1,%esi + addl %ebp,%edi + rorl $2,%ebx + movl %eax,%ebp + roll $5,%ebp + movl %esi,16(%esp) + leal 1859775393(%esi,%edi,1),%esi + movl 20(%esp),%edi + addl %ebp,%esi + # 20_39 37 + movl %eax,%ebp + xorl 28(%esp),%edi + xorl %ebx,%ebp + xorl 52(%esp),%edi + xorl %ecx,%ebp + xorl 8(%esp),%edi + roll $1,%edi + addl %ebp,%edx + rorl $2,%eax + movl %esi,%ebp + roll $5,%ebp + movl %edi,20(%esp) + leal 1859775393(%edi,%edx,1),%edi + movl 24(%esp),%edx + addl %ebp,%edi + # 20_39 38 + movl %esi,%ebp + xorl 32(%esp),%edx + xorl %eax,%ebp + xorl 56(%esp),%edx + xorl %ebx,%ebp + xorl 12(%esp),%edx + roll $1,%edx + addl %ebp,%ecx + rorl $2,%esi + movl %edi,%ebp + roll $5,%ebp + movl %edx,24(%esp) + leal 1859775393(%edx,%ecx,1),%edx + movl 28(%esp),%ecx + addl %ebp,%edx + # 20_39 39 + movl %edi,%ebp + xorl 36(%esp),%ecx + xorl %esi,%ebp + xorl 60(%esp),%ecx + xorl %eax,%ebp + xorl 16(%esp),%ecx + roll $1,%ecx + addl %ebp,%ebx + rorl $2,%edi + movl %edx,%ebp + roll $5,%ebp + movl %ecx,28(%esp) + leal 1859775393(%ecx,%ebx,1),%ecx + movl 32(%esp),%ebx + addl %ebp,%ecx + # 40_59 40 + movl %edi,%ebp + xorl 40(%esp),%ebx + xorl %esi,%ebp + xorl (%esp),%ebx + andl %edx,%ebp + xorl 20(%esp),%ebx + roll $1,%ebx + addl %eax,%ebp + rorl $2,%edx + movl %ecx,%eax + roll $5,%eax + movl %ebx,32(%esp) + leal 2400959708(%ebx,%ebp,1),%ebx + movl %edi,%ebp + addl %eax,%ebx + andl %esi,%ebp + movl 36(%esp),%eax + addl %ebp,%ebx + # 40_59 41 + movl %edx,%ebp + xorl 44(%esp),%eax + xorl %edi,%ebp + xorl 4(%esp),%eax + andl %ecx,%ebp + xorl 24(%esp),%eax + roll $1,%eax + addl %esi,%ebp + rorl $2,%ecx + movl %ebx,%esi + roll $5,%esi + movl %eax,36(%esp) + leal 2400959708(%eax,%ebp,1),%eax + movl %edx,%ebp + addl %esi,%eax + andl %edi,%ebp + movl 40(%esp),%esi + addl %ebp,%eax + # 40_59 42 + movl %ecx,%ebp + xorl 48(%esp),%esi + xorl %edx,%ebp + xorl 8(%esp),%esi + andl %ebx,%ebp + xorl 28(%esp),%esi + roll $1,%esi + addl %edi,%ebp + rorl $2,%ebx + movl %eax,%edi + roll $5,%edi + movl %esi,40(%esp) + leal 2400959708(%esi,%ebp,1),%esi + movl %ecx,%ebp + addl %edi,%esi + andl %edx,%ebp + movl 44(%esp),%edi + addl %ebp,%esi + # 40_59 43 + movl %ebx,%ebp + xorl 52(%esp),%edi + xorl %ecx,%ebp + xorl 12(%esp),%edi + andl %eax,%ebp + xorl 32(%esp),%edi + roll $1,%edi + addl %edx,%ebp + rorl $2,%eax + movl %esi,%edx + roll $5,%edx + movl %edi,44(%esp) + leal 2400959708(%edi,%ebp,1),%edi + movl %ebx,%ebp + addl %edx,%edi + andl %ecx,%ebp + movl 48(%esp),%edx + addl %ebp,%edi + # 40_59 44 + movl %eax,%ebp + xorl 56(%esp),%edx + xorl %ebx,%ebp + xorl 16(%esp),%edx + andl %esi,%ebp + xorl 36(%esp),%edx + roll $1,%edx + addl %ecx,%ebp + rorl $2,%esi + movl %edi,%ecx + roll $5,%ecx + movl %edx,48(%esp) + leal 2400959708(%edx,%ebp,1),%edx + movl %eax,%ebp + addl %ecx,%edx + andl %ebx,%ebp + movl 52(%esp),%ecx + addl %ebp,%edx + # 40_59 45 + movl %esi,%ebp + xorl 60(%esp),%ecx + xorl %eax,%ebp + xorl 20(%esp),%ecx + andl %edi,%ebp + xorl 40(%esp),%ecx + roll $1,%ecx + addl %ebx,%ebp + rorl $2,%edi + movl %edx,%ebx + roll $5,%ebx + movl %ecx,52(%esp) + leal 2400959708(%ecx,%ebp,1),%ecx + movl %esi,%ebp + addl %ebx,%ecx + andl %eax,%ebp + movl 56(%esp),%ebx + addl %ebp,%ecx + # 40_59 46 + movl %edi,%ebp + xorl (%esp),%ebx + xorl %esi,%ebp + xorl 24(%esp),%ebx + andl %edx,%ebp + xorl 44(%esp),%ebx + roll $1,%ebx + addl %eax,%ebp + rorl $2,%edx + movl %ecx,%eax + roll $5,%eax + movl %ebx,56(%esp) + leal 2400959708(%ebx,%ebp,1),%ebx + movl %edi,%ebp + addl %eax,%ebx + andl %esi,%ebp + movl 60(%esp),%eax + addl %ebp,%ebx + # 40_59 47 + movl %edx,%ebp + xorl 4(%esp),%eax + xorl %edi,%ebp + xorl 28(%esp),%eax + andl %ecx,%ebp + xorl 48(%esp),%eax + roll $1,%eax + addl %esi,%ebp + rorl $2,%ecx + movl %ebx,%esi + roll $5,%esi + movl %eax,60(%esp) + leal 2400959708(%eax,%ebp,1),%eax + movl %edx,%ebp + addl %esi,%eax + andl %edi,%ebp + movl (%esp),%esi + addl %ebp,%eax + # 40_59 48 + movl %ecx,%ebp + xorl 8(%esp),%esi + xorl %edx,%ebp + xorl 32(%esp),%esi + andl %ebx,%ebp + xorl 52(%esp),%esi + roll $1,%esi + addl %edi,%ebp + rorl $2,%ebx + movl %eax,%edi + roll $5,%edi + movl %esi,(%esp) + leal 2400959708(%esi,%ebp,1),%esi + movl %ecx,%ebp + addl %edi,%esi + andl %edx,%ebp + movl 4(%esp),%edi + addl %ebp,%esi + # 40_59 49 + movl %ebx,%ebp + xorl 12(%esp),%edi + xorl %ecx,%ebp + xorl 36(%esp),%edi + andl %eax,%ebp + xorl 56(%esp),%edi + roll $1,%edi + addl %edx,%ebp + rorl $2,%eax + movl %esi,%edx + roll $5,%edx + movl %edi,4(%esp) + leal 2400959708(%edi,%ebp,1),%edi + movl %ebx,%ebp + addl %edx,%edi + andl %ecx,%ebp + movl 8(%esp),%edx + addl %ebp,%edi + # 40_59 50 + movl %eax,%ebp + xorl 16(%esp),%edx + xorl %ebx,%ebp + xorl 40(%esp),%edx + andl %esi,%ebp + xorl 60(%esp),%edx + roll $1,%edx + addl %ecx,%ebp + rorl $2,%esi + movl %edi,%ecx + roll $5,%ecx + movl %edx,8(%esp) + leal 2400959708(%edx,%ebp,1),%edx + movl %eax,%ebp + addl %ecx,%edx + andl %ebx,%ebp + movl 12(%esp),%ecx + addl %ebp,%edx + # 40_59 51 + movl %esi,%ebp + xorl 20(%esp),%ecx + xorl %eax,%ebp + xorl 44(%esp),%ecx + andl %edi,%ebp + xorl (%esp),%ecx + roll $1,%ecx + addl %ebx,%ebp + rorl $2,%edi + movl %edx,%ebx + roll $5,%ebx + movl %ecx,12(%esp) + leal 2400959708(%ecx,%ebp,1),%ecx + movl %esi,%ebp + addl %ebx,%ecx + andl %eax,%ebp + movl 16(%esp),%ebx + addl %ebp,%ecx + # 40_59 52 + movl %edi,%ebp + xorl 24(%esp),%ebx + xorl %esi,%ebp + xorl 48(%esp),%ebx + andl %edx,%ebp + xorl 4(%esp),%ebx + roll $1,%ebx + addl %eax,%ebp + rorl $2,%edx + movl %ecx,%eax + roll $5,%eax + movl %ebx,16(%esp) + leal 2400959708(%ebx,%ebp,1),%ebx + movl %edi,%ebp + addl %eax,%ebx + andl %esi,%ebp + movl 20(%esp),%eax + addl %ebp,%ebx + # 40_59 53 + movl %edx,%ebp + xorl 28(%esp),%eax + xorl %edi,%ebp + xorl 52(%esp),%eax + andl %ecx,%ebp + xorl 8(%esp),%eax + roll $1,%eax + addl %esi,%ebp + rorl $2,%ecx + movl %ebx,%esi + roll $5,%esi + movl %eax,20(%esp) + leal 2400959708(%eax,%ebp,1),%eax + movl %edx,%ebp + addl %esi,%eax + andl %edi,%ebp + movl 24(%esp),%esi + addl %ebp,%eax + # 40_59 54 + movl %ecx,%ebp + xorl 32(%esp),%esi + xorl %edx,%ebp + xorl 56(%esp),%esi + andl %ebx,%ebp + xorl 12(%esp),%esi + roll $1,%esi + addl %edi,%ebp + rorl $2,%ebx + movl %eax,%edi + roll $5,%edi + movl %esi,24(%esp) + leal 2400959708(%esi,%ebp,1),%esi + movl %ecx,%ebp + addl %edi,%esi + andl %edx,%ebp + movl 28(%esp),%edi + addl %ebp,%esi + # 40_59 55 + movl %ebx,%ebp + xorl 36(%esp),%edi + xorl %ecx,%ebp + xorl 60(%esp),%edi + andl %eax,%ebp + xorl 16(%esp),%edi + roll $1,%edi + addl %edx,%ebp + rorl $2,%eax + movl %esi,%edx + roll $5,%edx + movl %edi,28(%esp) + leal 2400959708(%edi,%ebp,1),%edi + movl %ebx,%ebp + addl %edx,%edi + andl %ecx,%ebp + movl 32(%esp),%edx + addl %ebp,%edi + # 40_59 56 + movl %eax,%ebp + xorl 40(%esp),%edx + xorl %ebx,%ebp + xorl (%esp),%edx + andl %esi,%ebp + xorl 20(%esp),%edx + roll $1,%edx + addl %ecx,%ebp + rorl $2,%esi + movl %edi,%ecx + roll $5,%ecx + movl %edx,32(%esp) + leal 2400959708(%edx,%ebp,1),%edx + movl %eax,%ebp + addl %ecx,%edx + andl %ebx,%ebp + movl 36(%esp),%ecx + addl %ebp,%edx + # 40_59 57 + movl %esi,%ebp + xorl 44(%esp),%ecx + xorl %eax,%ebp + xorl 4(%esp),%ecx + andl %edi,%ebp + xorl 24(%esp),%ecx + roll $1,%ecx + addl %ebx,%ebp + rorl $2,%edi + movl %edx,%ebx + roll $5,%ebx + movl %ecx,36(%esp) + leal 2400959708(%ecx,%ebp,1),%ecx + movl %esi,%ebp + addl %ebx,%ecx + andl %eax,%ebp + movl 40(%esp),%ebx + addl %ebp,%ecx + # 40_59 58 + movl %edi,%ebp + xorl 48(%esp),%ebx + xorl %esi,%ebp + xorl 8(%esp),%ebx + andl %edx,%ebp + xorl 28(%esp),%ebx + roll $1,%ebx + addl %eax,%ebp + rorl $2,%edx + movl %ecx,%eax + roll $5,%eax + movl %ebx,40(%esp) + leal 2400959708(%ebx,%ebp,1),%ebx + movl %edi,%ebp + addl %eax,%ebx + andl %esi,%ebp + movl 44(%esp),%eax + addl %ebp,%ebx + # 40_59 59 + movl %edx,%ebp + xorl 52(%esp),%eax + xorl %edi,%ebp + xorl 12(%esp),%eax + andl %ecx,%ebp + xorl 32(%esp),%eax + roll $1,%eax + addl %esi,%ebp + rorl $2,%ecx + movl %ebx,%esi + roll $5,%esi + movl %eax,44(%esp) + leal 2400959708(%eax,%ebp,1),%eax + movl %edx,%ebp + addl %esi,%eax + andl %edi,%ebp + movl 48(%esp),%esi + addl %ebp,%eax + # 20_39 60 + movl %ebx,%ebp + xorl 56(%esp),%esi + xorl %ecx,%ebp + xorl 16(%esp),%esi + xorl %edx,%ebp + xorl 36(%esp),%esi + roll $1,%esi + addl %ebp,%edi + rorl $2,%ebx + movl %eax,%ebp + roll $5,%ebp + movl %esi,48(%esp) + leal 3395469782(%esi,%edi,1),%esi + movl 52(%esp),%edi + addl %ebp,%esi + # 20_39 61 + movl %eax,%ebp + xorl 60(%esp),%edi + xorl %ebx,%ebp + xorl 20(%esp),%edi + xorl %ecx,%ebp + xorl 40(%esp),%edi + roll $1,%edi + addl %ebp,%edx + rorl $2,%eax + movl %esi,%ebp + roll $5,%ebp + movl %edi,52(%esp) + leal 3395469782(%edi,%edx,1),%edi + movl 56(%esp),%edx + addl %ebp,%edi + # 20_39 62 + movl %esi,%ebp + xorl (%esp),%edx + xorl %eax,%ebp + xorl 24(%esp),%edx + xorl %ebx,%ebp + xorl 44(%esp),%edx + roll $1,%edx + addl %ebp,%ecx + rorl $2,%esi + movl %edi,%ebp + roll $5,%ebp + movl %edx,56(%esp) + leal 3395469782(%edx,%ecx,1),%edx + movl 60(%esp),%ecx + addl %ebp,%edx + # 20_39 63 + movl %edi,%ebp + xorl 4(%esp),%ecx + xorl %esi,%ebp + xorl 28(%esp),%ecx + xorl %eax,%ebp + xorl 48(%esp),%ecx + roll $1,%ecx + addl %ebp,%ebx + rorl $2,%edi + movl %edx,%ebp + roll $5,%ebp + movl %ecx,60(%esp) + leal 3395469782(%ecx,%ebx,1),%ecx + movl (%esp),%ebx + addl %ebp,%ecx + # 20_39 64 + movl %edx,%ebp + xorl 8(%esp),%ebx + xorl %edi,%ebp + xorl 32(%esp),%ebx + xorl %esi,%ebp + xorl 52(%esp),%ebx + roll $1,%ebx + addl %ebp,%eax + rorl $2,%edx + movl %ecx,%ebp + roll $5,%ebp + movl %ebx,(%esp) + leal 3395469782(%ebx,%eax,1),%ebx + movl 4(%esp),%eax + addl %ebp,%ebx + # 20_39 65 + movl %ecx,%ebp + xorl 12(%esp),%eax + xorl %edx,%ebp + xorl 36(%esp),%eax + xorl %edi,%ebp + xorl 56(%esp),%eax + roll $1,%eax + addl %ebp,%esi + rorl $2,%ecx + movl %ebx,%ebp + roll $5,%ebp + movl %eax,4(%esp) + leal 3395469782(%eax,%esi,1),%eax + movl 8(%esp),%esi + addl %ebp,%eax + # 20_39 66 + movl %ebx,%ebp + xorl 16(%esp),%esi + xorl %ecx,%ebp + xorl 40(%esp),%esi + xorl %edx,%ebp + xorl 60(%esp),%esi + roll $1,%esi + addl %ebp,%edi + rorl $2,%ebx + movl %eax,%ebp + roll $5,%ebp + movl %esi,8(%esp) + leal 3395469782(%esi,%edi,1),%esi + movl 12(%esp),%edi + addl %ebp,%esi + # 20_39 67 + movl %eax,%ebp + xorl 20(%esp),%edi + xorl %ebx,%ebp + xorl 44(%esp),%edi + xorl %ecx,%ebp + xorl (%esp),%edi + roll $1,%edi + addl %ebp,%edx + rorl $2,%eax + movl %esi,%ebp + roll $5,%ebp + movl %edi,12(%esp) + leal 3395469782(%edi,%edx,1),%edi + movl 16(%esp),%edx + addl %ebp,%edi + # 20_39 68 + movl %esi,%ebp + xorl 24(%esp),%edx + xorl %eax,%ebp + xorl 48(%esp),%edx + xorl %ebx,%ebp + xorl 4(%esp),%edx + roll $1,%edx + addl %ebp,%ecx + rorl $2,%esi + movl %edi,%ebp + roll $5,%ebp + movl %edx,16(%esp) + leal 3395469782(%edx,%ecx,1),%edx + movl 20(%esp),%ecx + addl %ebp,%edx + # 20_39 69 + movl %edi,%ebp + xorl 28(%esp),%ecx + xorl %esi,%ebp + xorl 52(%esp),%ecx + xorl %eax,%ebp + xorl 8(%esp),%ecx + roll $1,%ecx + addl %ebp,%ebx + rorl $2,%edi + movl %edx,%ebp + roll $5,%ebp + movl %ecx,20(%esp) + leal 3395469782(%ecx,%ebx,1),%ecx + movl 24(%esp),%ebx + addl %ebp,%ecx + # 20_39 70 + movl %edx,%ebp + xorl 32(%esp),%ebx + xorl %edi,%ebp + xorl 56(%esp),%ebx + xorl %esi,%ebp + xorl 12(%esp),%ebx + roll $1,%ebx + addl %ebp,%eax + rorl $2,%edx + movl %ecx,%ebp + roll $5,%ebp + movl %ebx,24(%esp) + leal 3395469782(%ebx,%eax,1),%ebx + movl 28(%esp),%eax + addl %ebp,%ebx + # 20_39 71 + movl %ecx,%ebp + xorl 36(%esp),%eax + xorl %edx,%ebp + xorl 60(%esp),%eax + xorl %edi,%ebp + xorl 16(%esp),%eax + roll $1,%eax + addl %ebp,%esi + rorl $2,%ecx + movl %ebx,%ebp + roll $5,%ebp + movl %eax,28(%esp) + leal 3395469782(%eax,%esi,1),%eax + movl 32(%esp),%esi + addl %ebp,%eax + # 20_39 72 + movl %ebx,%ebp + xorl 40(%esp),%esi + xorl %ecx,%ebp + xorl (%esp),%esi + xorl %edx,%ebp + xorl 20(%esp),%esi + roll $1,%esi + addl %ebp,%edi + rorl $2,%ebx + movl %eax,%ebp + roll $5,%ebp + movl %esi,32(%esp) + leal 3395469782(%esi,%edi,1),%esi + movl 36(%esp),%edi + addl %ebp,%esi + # 20_39 73 + movl %eax,%ebp + xorl 44(%esp),%edi + xorl %ebx,%ebp + xorl 4(%esp),%edi + xorl %ecx,%ebp + xorl 24(%esp),%edi + roll $1,%edi + addl %ebp,%edx + rorl $2,%eax + movl %esi,%ebp + roll $5,%ebp + movl %edi,36(%esp) + leal 3395469782(%edi,%edx,1),%edi + movl 40(%esp),%edx + addl %ebp,%edi + # 20_39 74 + movl %esi,%ebp + xorl 48(%esp),%edx + xorl %eax,%ebp + xorl 8(%esp),%edx + xorl %ebx,%ebp + xorl 28(%esp),%edx + roll $1,%edx + addl %ebp,%ecx + rorl $2,%esi + movl %edi,%ebp + roll $5,%ebp + movl %edx,40(%esp) + leal 3395469782(%edx,%ecx,1),%edx + movl 44(%esp),%ecx + addl %ebp,%edx + # 20_39 75 + movl %edi,%ebp + xorl 52(%esp),%ecx + xorl %esi,%ebp + xorl 12(%esp),%ecx + xorl %eax,%ebp + xorl 32(%esp),%ecx + roll $1,%ecx + addl %ebp,%ebx + rorl $2,%edi + movl %edx,%ebp + roll $5,%ebp + movl %ecx,44(%esp) + leal 3395469782(%ecx,%ebx,1),%ecx + movl 48(%esp),%ebx + addl %ebp,%ecx + # 20_39 76 + movl %edx,%ebp + xorl 56(%esp),%ebx + xorl %edi,%ebp + xorl 16(%esp),%ebx + xorl %esi,%ebp + xorl 36(%esp),%ebx + roll $1,%ebx + addl %ebp,%eax + rorl $2,%edx + movl %ecx,%ebp + roll $5,%ebp + movl %ebx,48(%esp) + leal 3395469782(%ebx,%eax,1),%ebx + movl 52(%esp),%eax + addl %ebp,%ebx + # 20_39 77 + movl %ecx,%ebp + xorl 60(%esp),%eax + xorl %edx,%ebp + xorl 20(%esp),%eax + xorl %edi,%ebp + xorl 40(%esp),%eax + roll $1,%eax + addl %ebp,%esi + rorl $2,%ecx + movl %ebx,%ebp + roll $5,%ebp + leal 3395469782(%eax,%esi,1),%eax + movl 56(%esp),%esi + addl %ebp,%eax + # 20_39 78 + movl %ebx,%ebp + xorl (%esp),%esi + xorl %ecx,%ebp + xorl 24(%esp),%esi + xorl %edx,%ebp + xorl 44(%esp),%esi + roll $1,%esi + addl %ebp,%edi + rorl $2,%ebx + movl %eax,%ebp + roll $5,%ebp + leal 3395469782(%esi,%edi,1),%esi + movl 60(%esp),%edi + addl %ebp,%esi + # 20_39 79 + movl %eax,%ebp + xorl 4(%esp),%edi + xorl %ebx,%ebp + xorl 28(%esp),%edi + xorl %ecx,%ebp + xorl 48(%esp),%edi + roll $1,%edi + addl %ebp,%edx + rorl $2,%eax + movl %esi,%ebp + roll $5,%ebp + leal 3395469782(%edi,%edx,1),%edi + addl %ebp,%edi + movl 96(%esp),%ebp + movl 100(%esp),%edx + addl (%ebp),%edi + addl 4(%ebp),%esi + addl 8(%ebp),%eax + addl 12(%ebp),%ebx + addl 16(%ebp),%ecx + movl %edi,(%ebp) + addl $64,%edx + movl %esi,4(%ebp) + cmpl 104(%esp),%edx + movl %eax,8(%ebp) + movl %ecx,%edi + movl %ebx,12(%ebp) + movl %edx,%esi + movl %ecx,16(%ebp) + jb L002loop + addl $76,%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.private_extern __sha1_block_data_order_shaext +.align 4 +__sha1_block_data_order_shaext: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + call L003pic_point +L003pic_point: + popl %ebp + leal LK_XX_XX-L003pic_point(%ebp),%ebp +Lshaext_shortcut: + movl 20(%esp),%edi + movl %esp,%ebx + movl 24(%esp),%esi + movl 28(%esp),%ecx + subl $32,%esp + movdqu (%edi),%xmm0 + movd 16(%edi),%xmm1 + andl $-32,%esp + movdqa 80(%ebp),%xmm3 + movdqu (%esi),%xmm4 + pshufd $27,%xmm0,%xmm0 + movdqu 16(%esi),%xmm5 + pshufd $27,%xmm1,%xmm1 + movdqu 32(%esi),%xmm6 +.byte 102,15,56,0,227 + movdqu 48(%esi),%xmm7 +.byte 102,15,56,0,235 +.byte 102,15,56,0,243 +.byte 102,15,56,0,251 + jmp L004loop_shaext +.align 4,0x90 +L004loop_shaext: + decl %ecx + leal 64(%esi),%eax + movdqa %xmm1,(%esp) + paddd %xmm4,%xmm1 + cmovnel %eax,%esi + movdqa %xmm0,16(%esp) +.byte 15,56,201,229 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,0 +.byte 15,56,200,213 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 +.byte 15,56,202,231 + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,0 +.byte 15,56,200,206 + pxor %xmm7,%xmm5 +.byte 15,56,202,236 +.byte 15,56,201,247 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,0 +.byte 15,56,200,215 + pxor %xmm4,%xmm6 +.byte 15,56,201,252 +.byte 15,56,202,245 + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,0 +.byte 15,56,200,204 + pxor %xmm5,%xmm7 +.byte 15,56,202,254 +.byte 15,56,201,229 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,0 +.byte 15,56,200,213 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 +.byte 15,56,202,231 + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,1 +.byte 15,56,200,206 + pxor %xmm7,%xmm5 +.byte 15,56,202,236 +.byte 15,56,201,247 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,1 +.byte 15,56,200,215 + pxor %xmm4,%xmm6 +.byte 15,56,201,252 +.byte 15,56,202,245 + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,1 +.byte 15,56,200,204 + pxor %xmm5,%xmm7 +.byte 15,56,202,254 +.byte 15,56,201,229 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,1 +.byte 15,56,200,213 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 +.byte 15,56,202,231 + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,1 +.byte 15,56,200,206 + pxor %xmm7,%xmm5 +.byte 15,56,202,236 +.byte 15,56,201,247 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,2 +.byte 15,56,200,215 + pxor %xmm4,%xmm6 +.byte 15,56,201,252 +.byte 15,56,202,245 + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,2 +.byte 15,56,200,204 + pxor %xmm5,%xmm7 +.byte 15,56,202,254 +.byte 15,56,201,229 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,2 +.byte 15,56,200,213 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 +.byte 15,56,202,231 + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,2 +.byte 15,56,200,206 + pxor %xmm7,%xmm5 +.byte 15,56,202,236 +.byte 15,56,201,247 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,2 +.byte 15,56,200,215 + pxor %xmm4,%xmm6 +.byte 15,56,201,252 +.byte 15,56,202,245 + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,3 +.byte 15,56,200,204 + pxor %xmm5,%xmm7 +.byte 15,56,202,254 + movdqu (%esi),%xmm4 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,3 +.byte 15,56,200,213 + movdqu 16(%esi),%xmm5 +.byte 102,15,56,0,227 + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,3 +.byte 15,56,200,206 + movdqu 32(%esi),%xmm6 +.byte 102,15,56,0,235 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,3 +.byte 15,56,200,215 + movdqu 48(%esi),%xmm7 +.byte 102,15,56,0,243 + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,3 + movdqa (%esp),%xmm2 +.byte 102,15,56,0,251 +.byte 15,56,200,202 + paddd 16(%esp),%xmm0 + jnz L004loop_shaext + pshufd $27,%xmm0,%xmm0 + pshufd $27,%xmm1,%xmm1 + movdqu %xmm0,(%edi) + movd %xmm1,16(%edi) + movl %ebx,%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.private_extern __sha1_block_data_order_ssse3 +.align 4 +__sha1_block_data_order_ssse3: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + call L005pic_point +L005pic_point: + popl %ebp + leal LK_XX_XX-L005pic_point(%ebp),%ebp +Lssse3_shortcut: + movdqa (%ebp),%xmm7 + movdqa 16(%ebp),%xmm0 + movdqa 32(%ebp),%xmm1 + movdqa 48(%ebp),%xmm2 + movdqa 64(%ebp),%xmm6 + movl 20(%esp),%edi + movl 24(%esp),%ebp + movl 28(%esp),%edx + movl %esp,%esi + subl $208,%esp + andl $-64,%esp + movdqa %xmm0,112(%esp) + movdqa %xmm1,128(%esp) + movdqa %xmm2,144(%esp) + shll $6,%edx + movdqa %xmm7,160(%esp) + addl %ebp,%edx + movdqa %xmm6,176(%esp) + addl $64,%ebp + movl %edi,192(%esp) + movl %ebp,196(%esp) + movl %edx,200(%esp) + movl %esi,204(%esp) + movl (%edi),%eax + movl 4(%edi),%ebx + movl 8(%edi),%ecx + movl 12(%edi),%edx + movl 16(%edi),%edi + movl %ebx,%esi + movdqu -64(%ebp),%xmm0 + movdqu -48(%ebp),%xmm1 + movdqu -32(%ebp),%xmm2 + movdqu -16(%ebp),%xmm3 +.byte 102,15,56,0,198 +.byte 102,15,56,0,206 +.byte 102,15,56,0,214 + movdqa %xmm7,96(%esp) +.byte 102,15,56,0,222 + paddd %xmm7,%xmm0 + paddd %xmm7,%xmm1 + paddd %xmm7,%xmm2 + movdqa %xmm0,(%esp) + psubd %xmm7,%xmm0 + movdqa %xmm1,16(%esp) + psubd %xmm7,%xmm1 + movdqa %xmm2,32(%esp) + movl %ecx,%ebp + psubd %xmm7,%xmm2 + xorl %edx,%ebp + pshufd $238,%xmm0,%xmm4 + andl %ebp,%esi + jmp L006loop +.align 4,0x90 +L006loop: + rorl $2,%ebx + xorl %edx,%esi + movl %eax,%ebp + punpcklqdq %xmm1,%xmm4 + movdqa %xmm3,%xmm6 + addl (%esp),%edi + xorl %ecx,%ebx + paddd %xmm3,%xmm7 + movdqa %xmm0,64(%esp) + roll $5,%eax + addl %esi,%edi + psrldq $4,%xmm6 + andl %ebx,%ebp + xorl %ecx,%ebx + pxor %xmm0,%xmm4 + addl %eax,%edi + rorl $7,%eax + pxor %xmm2,%xmm6 + xorl %ecx,%ebp + movl %edi,%esi + addl 4(%esp),%edx + pxor %xmm6,%xmm4 + xorl %ebx,%eax + roll $5,%edi + movdqa %xmm7,48(%esp) + addl %ebp,%edx + andl %eax,%esi + movdqa %xmm4,%xmm0 + xorl %ebx,%eax + addl %edi,%edx + rorl $7,%edi + movdqa %xmm4,%xmm6 + xorl %ebx,%esi + pslldq $12,%xmm0 + paddd %xmm4,%xmm4 + movl %edx,%ebp + addl 8(%esp),%ecx + psrld $31,%xmm6 + xorl %eax,%edi + roll $5,%edx + movdqa %xmm0,%xmm7 + addl %esi,%ecx + andl %edi,%ebp + xorl %eax,%edi + psrld $30,%xmm0 + addl %edx,%ecx + rorl $7,%edx + por %xmm6,%xmm4 + xorl %eax,%ebp + movl %ecx,%esi + addl 12(%esp),%ebx + pslld $2,%xmm7 + xorl %edi,%edx + roll $5,%ecx + pxor %xmm0,%xmm4 + movdqa 96(%esp),%xmm0 + addl %ebp,%ebx + andl %edx,%esi + pxor %xmm7,%xmm4 + pshufd $238,%xmm1,%xmm5 + xorl %edi,%edx + addl %ecx,%ebx + rorl $7,%ecx + xorl %edi,%esi + movl %ebx,%ebp + punpcklqdq %xmm2,%xmm5 + movdqa %xmm4,%xmm7 + addl 16(%esp),%eax + xorl %edx,%ecx + paddd %xmm4,%xmm0 + movdqa %xmm1,80(%esp) + roll $5,%ebx + addl %esi,%eax + psrldq $4,%xmm7 + andl %ecx,%ebp + xorl %edx,%ecx + pxor %xmm1,%xmm5 + addl %ebx,%eax + rorl $7,%ebx + pxor %xmm3,%xmm7 + xorl %edx,%ebp + movl %eax,%esi + addl 20(%esp),%edi + pxor %xmm7,%xmm5 + xorl %ecx,%ebx + roll $5,%eax + movdqa %xmm0,(%esp) + addl %ebp,%edi + andl %ebx,%esi + movdqa %xmm5,%xmm1 + xorl %ecx,%ebx + addl %eax,%edi + rorl $7,%eax + movdqa %xmm5,%xmm7 + xorl %ecx,%esi + pslldq $12,%xmm1 + paddd %xmm5,%xmm5 + movl %edi,%ebp + addl 24(%esp),%edx + psrld $31,%xmm7 + xorl %ebx,%eax + roll $5,%edi + movdqa %xmm1,%xmm0 + addl %esi,%edx + andl %eax,%ebp + xorl %ebx,%eax + psrld $30,%xmm1 + addl %edi,%edx + rorl $7,%edi + por %xmm7,%xmm5 + xorl %ebx,%ebp + movl %edx,%esi + addl 28(%esp),%ecx + pslld $2,%xmm0 + xorl %eax,%edi + roll $5,%edx + pxor %xmm1,%xmm5 + movdqa 112(%esp),%xmm1 + addl %ebp,%ecx + andl %edi,%esi + pxor %xmm0,%xmm5 + pshufd $238,%xmm2,%xmm6 + xorl %eax,%edi + addl %edx,%ecx + rorl $7,%edx + xorl %eax,%esi + movl %ecx,%ebp + punpcklqdq %xmm3,%xmm6 + movdqa %xmm5,%xmm0 + addl 32(%esp),%ebx + xorl %edi,%edx + paddd %xmm5,%xmm1 + movdqa %xmm2,96(%esp) + roll $5,%ecx + addl %esi,%ebx + psrldq $4,%xmm0 + andl %edx,%ebp + xorl %edi,%edx + pxor %xmm2,%xmm6 + addl %ecx,%ebx + rorl $7,%ecx + pxor %xmm4,%xmm0 + xorl %edi,%ebp + movl %ebx,%esi + addl 36(%esp),%eax + pxor %xmm0,%xmm6 + xorl %edx,%ecx + roll $5,%ebx + movdqa %xmm1,16(%esp) + addl %ebp,%eax + andl %ecx,%esi + movdqa %xmm6,%xmm2 + xorl %edx,%ecx + addl %ebx,%eax + rorl $7,%ebx + movdqa %xmm6,%xmm0 + xorl %edx,%esi + pslldq $12,%xmm2 + paddd %xmm6,%xmm6 + movl %eax,%ebp + addl 40(%esp),%edi + psrld $31,%xmm0 + xorl %ecx,%ebx + roll $5,%eax + movdqa %xmm2,%xmm1 + addl %esi,%edi + andl %ebx,%ebp + xorl %ecx,%ebx + psrld $30,%xmm2 + addl %eax,%edi + rorl $7,%eax + por %xmm0,%xmm6 + xorl %ecx,%ebp + movdqa 64(%esp),%xmm0 + movl %edi,%esi + addl 44(%esp),%edx + pslld $2,%xmm1 + xorl %ebx,%eax + roll $5,%edi + pxor %xmm2,%xmm6 + movdqa 112(%esp),%xmm2 + addl %ebp,%edx + andl %eax,%esi + pxor %xmm1,%xmm6 + pshufd $238,%xmm3,%xmm7 + xorl %ebx,%eax + addl %edi,%edx + rorl $7,%edi + xorl %ebx,%esi + movl %edx,%ebp + punpcklqdq %xmm4,%xmm7 + movdqa %xmm6,%xmm1 + addl 48(%esp),%ecx + xorl %eax,%edi + paddd %xmm6,%xmm2 + movdqa %xmm3,64(%esp) + roll $5,%edx + addl %esi,%ecx + psrldq $4,%xmm1 + andl %edi,%ebp + xorl %eax,%edi + pxor %xmm3,%xmm7 + addl %edx,%ecx + rorl $7,%edx + pxor %xmm5,%xmm1 + xorl %eax,%ebp + movl %ecx,%esi + addl 52(%esp),%ebx + pxor %xmm1,%xmm7 + xorl %edi,%edx + roll $5,%ecx + movdqa %xmm2,32(%esp) + addl %ebp,%ebx + andl %edx,%esi + movdqa %xmm7,%xmm3 + xorl %edi,%edx + addl %ecx,%ebx + rorl $7,%ecx + movdqa %xmm7,%xmm1 + xorl %edi,%esi + pslldq $12,%xmm3 + paddd %xmm7,%xmm7 + movl %ebx,%ebp + addl 56(%esp),%eax + psrld $31,%xmm1 + xorl %edx,%ecx + roll $5,%ebx + movdqa %xmm3,%xmm2 + addl %esi,%eax + andl %ecx,%ebp + xorl %edx,%ecx + psrld $30,%xmm3 + addl %ebx,%eax + rorl $7,%ebx + por %xmm1,%xmm7 + xorl %edx,%ebp + movdqa 80(%esp),%xmm1 + movl %eax,%esi + addl 60(%esp),%edi + pslld $2,%xmm2 + xorl %ecx,%ebx + roll $5,%eax + pxor %xmm3,%xmm7 + movdqa 112(%esp),%xmm3 + addl %ebp,%edi + andl %ebx,%esi + pxor %xmm2,%xmm7 + pshufd $238,%xmm6,%xmm2 + xorl %ecx,%ebx + addl %eax,%edi + rorl $7,%eax + pxor %xmm4,%xmm0 + punpcklqdq %xmm7,%xmm2 + xorl %ecx,%esi + movl %edi,%ebp + addl (%esp),%edx + pxor %xmm1,%xmm0 + movdqa %xmm4,80(%esp) + xorl %ebx,%eax + roll $5,%edi + movdqa %xmm3,%xmm4 + addl %esi,%edx + paddd %xmm7,%xmm3 + andl %eax,%ebp + pxor %xmm2,%xmm0 + xorl %ebx,%eax + addl %edi,%edx + rorl $7,%edi + xorl %ebx,%ebp + movdqa %xmm0,%xmm2 + movdqa %xmm3,48(%esp) + movl %edx,%esi + addl 4(%esp),%ecx + xorl %eax,%edi + roll $5,%edx + pslld $2,%xmm0 + addl %ebp,%ecx + andl %edi,%esi + psrld $30,%xmm2 + xorl %eax,%edi + addl %edx,%ecx + rorl $7,%edx + xorl %eax,%esi + movl %ecx,%ebp + addl 8(%esp),%ebx + xorl %edi,%edx + roll $5,%ecx + por %xmm2,%xmm0 + addl %esi,%ebx + andl %edx,%ebp + movdqa 96(%esp),%xmm2 + xorl %edi,%edx + addl %ecx,%ebx + addl 12(%esp),%eax + xorl %edi,%ebp + movl %ebx,%esi + pshufd $238,%xmm7,%xmm3 + roll $5,%ebx + addl %ebp,%eax + xorl %edx,%esi + rorl $7,%ecx + addl %ebx,%eax + addl 16(%esp),%edi + pxor %xmm5,%xmm1 + punpcklqdq %xmm0,%xmm3 + xorl %ecx,%esi + movl %eax,%ebp + roll $5,%eax + pxor %xmm2,%xmm1 + movdqa %xmm5,96(%esp) + addl %esi,%edi + xorl %ecx,%ebp + movdqa %xmm4,%xmm5 + rorl $7,%ebx + paddd %xmm0,%xmm4 + addl %eax,%edi + pxor %xmm3,%xmm1 + addl 20(%esp),%edx + xorl %ebx,%ebp + movl %edi,%esi + roll $5,%edi + movdqa %xmm1,%xmm3 + movdqa %xmm4,(%esp) + addl %ebp,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %edi,%edx + pslld $2,%xmm1 + addl 24(%esp),%ecx + xorl %eax,%esi + psrld $30,%xmm3 + movl %edx,%ebp + roll $5,%edx + addl %esi,%ecx + xorl %eax,%ebp + rorl $7,%edi + addl %edx,%ecx + por %xmm3,%xmm1 + addl 28(%esp),%ebx + xorl %edi,%ebp + movdqa 64(%esp),%xmm3 + movl %ecx,%esi + roll $5,%ecx + addl %ebp,%ebx + xorl %edi,%esi + rorl $7,%edx + pshufd $238,%xmm0,%xmm4 + addl %ecx,%ebx + addl 32(%esp),%eax + pxor %xmm6,%xmm2 + punpcklqdq %xmm1,%xmm4 + xorl %edx,%esi + movl %ebx,%ebp + roll $5,%ebx + pxor %xmm3,%xmm2 + movdqa %xmm6,64(%esp) + addl %esi,%eax + xorl %edx,%ebp + movdqa 128(%esp),%xmm6 + rorl $7,%ecx + paddd %xmm1,%xmm5 + addl %ebx,%eax + pxor %xmm4,%xmm2 + addl 36(%esp),%edi + xorl %ecx,%ebp + movl %eax,%esi + roll $5,%eax + movdqa %xmm2,%xmm4 + movdqa %xmm5,16(%esp) + addl %ebp,%edi + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%edi + pslld $2,%xmm2 + addl 40(%esp),%edx + xorl %ebx,%esi + psrld $30,%xmm4 + movl %edi,%ebp + roll $5,%edi + addl %esi,%edx + xorl %ebx,%ebp + rorl $7,%eax + addl %edi,%edx + por %xmm4,%xmm2 + addl 44(%esp),%ecx + xorl %eax,%ebp + movdqa 80(%esp),%xmm4 + movl %edx,%esi + roll $5,%edx + addl %ebp,%ecx + xorl %eax,%esi + rorl $7,%edi + pshufd $238,%xmm1,%xmm5 + addl %edx,%ecx + addl 48(%esp),%ebx + pxor %xmm7,%xmm3 + punpcklqdq %xmm2,%xmm5 + xorl %edi,%esi + movl %ecx,%ebp + roll $5,%ecx + pxor %xmm4,%xmm3 + movdqa %xmm7,80(%esp) + addl %esi,%ebx + xorl %edi,%ebp + movdqa %xmm6,%xmm7 + rorl $7,%edx + paddd %xmm2,%xmm6 + addl %ecx,%ebx + pxor %xmm5,%xmm3 + addl 52(%esp),%eax + xorl %edx,%ebp + movl %ebx,%esi + roll $5,%ebx + movdqa %xmm3,%xmm5 + movdqa %xmm6,32(%esp) + addl %ebp,%eax + xorl %edx,%esi + rorl $7,%ecx + addl %ebx,%eax + pslld $2,%xmm3 + addl 56(%esp),%edi + xorl %ecx,%esi + psrld $30,%xmm5 + movl %eax,%ebp + roll $5,%eax + addl %esi,%edi + xorl %ecx,%ebp + rorl $7,%ebx + addl %eax,%edi + por %xmm5,%xmm3 + addl 60(%esp),%edx + xorl %ebx,%ebp + movdqa 96(%esp),%xmm5 + movl %edi,%esi + roll $5,%edi + addl %ebp,%edx + xorl %ebx,%esi + rorl $7,%eax + pshufd $238,%xmm2,%xmm6 + addl %edi,%edx + addl (%esp),%ecx + pxor %xmm0,%xmm4 + punpcklqdq %xmm3,%xmm6 + xorl %eax,%esi + movl %edx,%ebp + roll $5,%edx + pxor %xmm5,%xmm4 + movdqa %xmm0,96(%esp) + addl %esi,%ecx + xorl %eax,%ebp + movdqa %xmm7,%xmm0 + rorl $7,%edi + paddd %xmm3,%xmm7 + addl %edx,%ecx + pxor %xmm6,%xmm4 + addl 4(%esp),%ebx + xorl %edi,%ebp + movl %ecx,%esi + roll $5,%ecx + movdqa %xmm4,%xmm6 + movdqa %xmm7,48(%esp) + addl %ebp,%ebx + xorl %edi,%esi + rorl $7,%edx + addl %ecx,%ebx + pslld $2,%xmm4 + addl 8(%esp),%eax + xorl %edx,%esi + psrld $30,%xmm6 + movl %ebx,%ebp + roll $5,%ebx + addl %esi,%eax + xorl %edx,%ebp + rorl $7,%ecx + addl %ebx,%eax + por %xmm6,%xmm4 + addl 12(%esp),%edi + xorl %ecx,%ebp + movdqa 64(%esp),%xmm6 + movl %eax,%esi + roll $5,%eax + addl %ebp,%edi + xorl %ecx,%esi + rorl $7,%ebx + pshufd $238,%xmm3,%xmm7 + addl %eax,%edi + addl 16(%esp),%edx + pxor %xmm1,%xmm5 + punpcklqdq %xmm4,%xmm7 + xorl %ebx,%esi + movl %edi,%ebp + roll $5,%edi + pxor %xmm6,%xmm5 + movdqa %xmm1,64(%esp) + addl %esi,%edx + xorl %ebx,%ebp + movdqa %xmm0,%xmm1 + rorl $7,%eax + paddd %xmm4,%xmm0 + addl %edi,%edx + pxor %xmm7,%xmm5 + addl 20(%esp),%ecx + xorl %eax,%ebp + movl %edx,%esi + roll $5,%edx + movdqa %xmm5,%xmm7 + movdqa %xmm0,(%esp) + addl %ebp,%ecx + xorl %eax,%esi + rorl $7,%edi + addl %edx,%ecx + pslld $2,%xmm5 + addl 24(%esp),%ebx + xorl %edi,%esi + psrld $30,%xmm7 + movl %ecx,%ebp + roll $5,%ecx + addl %esi,%ebx + xorl %edi,%ebp + rorl $7,%edx + addl %ecx,%ebx + por %xmm7,%xmm5 + addl 28(%esp),%eax + movdqa 80(%esp),%xmm7 + rorl $7,%ecx + movl %ebx,%esi + xorl %edx,%ebp + roll $5,%ebx + pshufd $238,%xmm4,%xmm0 + addl %ebp,%eax + xorl %ecx,%esi + xorl %edx,%ecx + addl %ebx,%eax + addl 32(%esp),%edi + pxor %xmm2,%xmm6 + punpcklqdq %xmm5,%xmm0 + andl %ecx,%esi + xorl %edx,%ecx + rorl $7,%ebx + pxor %xmm7,%xmm6 + movdqa %xmm2,80(%esp) + movl %eax,%ebp + xorl %ecx,%esi + roll $5,%eax + movdqa %xmm1,%xmm2 + addl %esi,%edi + paddd %xmm5,%xmm1 + xorl %ebx,%ebp + pxor %xmm0,%xmm6 + xorl %ecx,%ebx + addl %eax,%edi + addl 36(%esp),%edx + andl %ebx,%ebp + movdqa %xmm6,%xmm0 + movdqa %xmm1,16(%esp) + xorl %ecx,%ebx + rorl $7,%eax + movl %edi,%esi + xorl %ebx,%ebp + roll $5,%edi + pslld $2,%xmm6 + addl %ebp,%edx + xorl %eax,%esi + psrld $30,%xmm0 + xorl %ebx,%eax + addl %edi,%edx + addl 40(%esp),%ecx + andl %eax,%esi + xorl %ebx,%eax + rorl $7,%edi + por %xmm0,%xmm6 + movl %edx,%ebp + xorl %eax,%esi + movdqa 96(%esp),%xmm0 + roll $5,%edx + addl %esi,%ecx + xorl %edi,%ebp + xorl %eax,%edi + addl %edx,%ecx + pshufd $238,%xmm5,%xmm1 + addl 44(%esp),%ebx + andl %edi,%ebp + xorl %eax,%edi + rorl $7,%edx + movl %ecx,%esi + xorl %edi,%ebp + roll $5,%ecx + addl %ebp,%ebx + xorl %edx,%esi + xorl %edi,%edx + addl %ecx,%ebx + addl 48(%esp),%eax + pxor %xmm3,%xmm7 + punpcklqdq %xmm6,%xmm1 + andl %edx,%esi + xorl %edi,%edx + rorl $7,%ecx + pxor %xmm0,%xmm7 + movdqa %xmm3,96(%esp) + movl %ebx,%ebp + xorl %edx,%esi + roll $5,%ebx + movdqa 144(%esp),%xmm3 + addl %esi,%eax + paddd %xmm6,%xmm2 + xorl %ecx,%ebp + pxor %xmm1,%xmm7 + xorl %edx,%ecx + addl %ebx,%eax + addl 52(%esp),%edi + andl %ecx,%ebp + movdqa %xmm7,%xmm1 + movdqa %xmm2,32(%esp) + xorl %edx,%ecx + rorl $7,%ebx + movl %eax,%esi + xorl %ecx,%ebp + roll $5,%eax + pslld $2,%xmm7 + addl %ebp,%edi + xorl %ebx,%esi + psrld $30,%xmm1 + xorl %ecx,%ebx + addl %eax,%edi + addl 56(%esp),%edx + andl %ebx,%esi + xorl %ecx,%ebx + rorl $7,%eax + por %xmm1,%xmm7 + movl %edi,%ebp + xorl %ebx,%esi + movdqa 64(%esp),%xmm1 + roll $5,%edi + addl %esi,%edx + xorl %eax,%ebp + xorl %ebx,%eax + addl %edi,%edx + pshufd $238,%xmm6,%xmm2 + addl 60(%esp),%ecx + andl %eax,%ebp + xorl %ebx,%eax + rorl $7,%edi + movl %edx,%esi + xorl %eax,%ebp + roll $5,%edx + addl %ebp,%ecx + xorl %edi,%esi + xorl %eax,%edi + addl %edx,%ecx + addl (%esp),%ebx + pxor %xmm4,%xmm0 + punpcklqdq %xmm7,%xmm2 + andl %edi,%esi + xorl %eax,%edi + rorl $7,%edx + pxor %xmm1,%xmm0 + movdqa %xmm4,64(%esp) + movl %ecx,%ebp + xorl %edi,%esi + roll $5,%ecx + movdqa %xmm3,%xmm4 + addl %esi,%ebx + paddd %xmm7,%xmm3 + xorl %edx,%ebp + pxor %xmm2,%xmm0 + xorl %edi,%edx + addl %ecx,%ebx + addl 4(%esp),%eax + andl %edx,%ebp + movdqa %xmm0,%xmm2 + movdqa %xmm3,48(%esp) + xorl %edi,%edx + rorl $7,%ecx + movl %ebx,%esi + xorl %edx,%ebp + roll $5,%ebx + pslld $2,%xmm0 + addl %ebp,%eax + xorl %ecx,%esi + psrld $30,%xmm2 + xorl %edx,%ecx + addl %ebx,%eax + addl 8(%esp),%edi + andl %ecx,%esi + xorl %edx,%ecx + rorl $7,%ebx + por %xmm2,%xmm0 + movl %eax,%ebp + xorl %ecx,%esi + movdqa 80(%esp),%xmm2 + roll $5,%eax + addl %esi,%edi + xorl %ebx,%ebp + xorl %ecx,%ebx + addl %eax,%edi + pshufd $238,%xmm7,%xmm3 + addl 12(%esp),%edx + andl %ebx,%ebp + xorl %ecx,%ebx + rorl $7,%eax + movl %edi,%esi + xorl %ebx,%ebp + roll $5,%edi + addl %ebp,%edx + xorl %eax,%esi + xorl %ebx,%eax + addl %edi,%edx + addl 16(%esp),%ecx + pxor %xmm5,%xmm1 + punpcklqdq %xmm0,%xmm3 + andl %eax,%esi + xorl %ebx,%eax + rorl $7,%edi + pxor %xmm2,%xmm1 + movdqa %xmm5,80(%esp) + movl %edx,%ebp + xorl %eax,%esi + roll $5,%edx + movdqa %xmm4,%xmm5 + addl %esi,%ecx + paddd %xmm0,%xmm4 + xorl %edi,%ebp + pxor %xmm3,%xmm1 + xorl %eax,%edi + addl %edx,%ecx + addl 20(%esp),%ebx + andl %edi,%ebp + movdqa %xmm1,%xmm3 + movdqa %xmm4,(%esp) + xorl %eax,%edi + rorl $7,%edx + movl %ecx,%esi + xorl %edi,%ebp + roll $5,%ecx + pslld $2,%xmm1 + addl %ebp,%ebx + xorl %edx,%esi + psrld $30,%xmm3 + xorl %edi,%edx + addl %ecx,%ebx + addl 24(%esp),%eax + andl %edx,%esi + xorl %edi,%edx + rorl $7,%ecx + por %xmm3,%xmm1 + movl %ebx,%ebp + xorl %edx,%esi + movdqa 96(%esp),%xmm3 + roll $5,%ebx + addl %esi,%eax + xorl %ecx,%ebp + xorl %edx,%ecx + addl %ebx,%eax + pshufd $238,%xmm0,%xmm4 + addl 28(%esp),%edi + andl %ecx,%ebp + xorl %edx,%ecx + rorl $7,%ebx + movl %eax,%esi + xorl %ecx,%ebp + roll $5,%eax + addl %ebp,%edi + xorl %ebx,%esi + xorl %ecx,%ebx + addl %eax,%edi + addl 32(%esp),%edx + pxor %xmm6,%xmm2 + punpcklqdq %xmm1,%xmm4 + andl %ebx,%esi + xorl %ecx,%ebx + rorl $7,%eax + pxor %xmm3,%xmm2 + movdqa %xmm6,96(%esp) + movl %edi,%ebp + xorl %ebx,%esi + roll $5,%edi + movdqa %xmm5,%xmm6 + addl %esi,%edx + paddd %xmm1,%xmm5 + xorl %eax,%ebp + pxor %xmm4,%xmm2 + xorl %ebx,%eax + addl %edi,%edx + addl 36(%esp),%ecx + andl %eax,%ebp + movdqa %xmm2,%xmm4 + movdqa %xmm5,16(%esp) + xorl %ebx,%eax + rorl $7,%edi + movl %edx,%esi + xorl %eax,%ebp + roll $5,%edx + pslld $2,%xmm2 + addl %ebp,%ecx + xorl %edi,%esi + psrld $30,%xmm4 + xorl %eax,%edi + addl %edx,%ecx + addl 40(%esp),%ebx + andl %edi,%esi + xorl %eax,%edi + rorl $7,%edx + por %xmm4,%xmm2 + movl %ecx,%ebp + xorl %edi,%esi + movdqa 64(%esp),%xmm4 + roll $5,%ecx + addl %esi,%ebx + xorl %edx,%ebp + xorl %edi,%edx + addl %ecx,%ebx + pshufd $238,%xmm1,%xmm5 + addl 44(%esp),%eax + andl %edx,%ebp + xorl %edi,%edx + rorl $7,%ecx + movl %ebx,%esi + xorl %edx,%ebp + roll $5,%ebx + addl %ebp,%eax + xorl %edx,%esi + addl %ebx,%eax + addl 48(%esp),%edi + pxor %xmm7,%xmm3 + punpcklqdq %xmm2,%xmm5 + xorl %ecx,%esi + movl %eax,%ebp + roll $5,%eax + pxor %xmm4,%xmm3 + movdqa %xmm7,64(%esp) + addl %esi,%edi + xorl %ecx,%ebp + movdqa %xmm6,%xmm7 + rorl $7,%ebx + paddd %xmm2,%xmm6 + addl %eax,%edi + pxor %xmm5,%xmm3 + addl 52(%esp),%edx + xorl %ebx,%ebp + movl %edi,%esi + roll $5,%edi + movdqa %xmm3,%xmm5 + movdqa %xmm6,32(%esp) + addl %ebp,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %edi,%edx + pslld $2,%xmm3 + addl 56(%esp),%ecx + xorl %eax,%esi + psrld $30,%xmm5 + movl %edx,%ebp + roll $5,%edx + addl %esi,%ecx + xorl %eax,%ebp + rorl $7,%edi + addl %edx,%ecx + por %xmm5,%xmm3 + addl 60(%esp),%ebx + xorl %edi,%ebp + movl %ecx,%esi + roll $5,%ecx + addl %ebp,%ebx + xorl %edi,%esi + rorl $7,%edx + addl %ecx,%ebx + addl (%esp),%eax + xorl %edx,%esi + movl %ebx,%ebp + roll $5,%ebx + addl %esi,%eax + xorl %edx,%ebp + rorl $7,%ecx + paddd %xmm3,%xmm7 + addl %ebx,%eax + addl 4(%esp),%edi + xorl %ecx,%ebp + movl %eax,%esi + movdqa %xmm7,48(%esp) + roll $5,%eax + addl %ebp,%edi + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%edi + addl 8(%esp),%edx + xorl %ebx,%esi + movl %edi,%ebp + roll $5,%edi + addl %esi,%edx + xorl %ebx,%ebp + rorl $7,%eax + addl %edi,%edx + addl 12(%esp),%ecx + xorl %eax,%ebp + movl %edx,%esi + roll $5,%edx + addl %ebp,%ecx + xorl %eax,%esi + rorl $7,%edi + addl %edx,%ecx + movl 196(%esp),%ebp + cmpl 200(%esp),%ebp + je L007done + movdqa 160(%esp),%xmm7 + movdqa 176(%esp),%xmm6 + movdqu (%ebp),%xmm0 + movdqu 16(%ebp),%xmm1 + movdqu 32(%ebp),%xmm2 + movdqu 48(%ebp),%xmm3 + addl $64,%ebp +.byte 102,15,56,0,198 + movl %ebp,196(%esp) + movdqa %xmm7,96(%esp) + addl 16(%esp),%ebx + xorl %edi,%esi + movl %ecx,%ebp + roll $5,%ecx + addl %esi,%ebx + xorl %edi,%ebp + rorl $7,%edx +.byte 102,15,56,0,206 + addl %ecx,%ebx + addl 20(%esp),%eax + xorl %edx,%ebp + movl %ebx,%esi + paddd %xmm7,%xmm0 + roll $5,%ebx + addl %ebp,%eax + xorl %edx,%esi + rorl $7,%ecx + movdqa %xmm0,(%esp) + addl %ebx,%eax + addl 24(%esp),%edi + xorl %ecx,%esi + movl %eax,%ebp + psubd %xmm7,%xmm0 + roll $5,%eax + addl %esi,%edi + xorl %ecx,%ebp + rorl $7,%ebx + addl %eax,%edi + addl 28(%esp),%edx + xorl %ebx,%ebp + movl %edi,%esi + roll $5,%edi + addl %ebp,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %edi,%edx + addl 32(%esp),%ecx + xorl %eax,%esi + movl %edx,%ebp + roll $5,%edx + addl %esi,%ecx + xorl %eax,%ebp + rorl $7,%edi +.byte 102,15,56,0,214 + addl %edx,%ecx + addl 36(%esp),%ebx + xorl %edi,%ebp + movl %ecx,%esi + paddd %xmm7,%xmm1 + roll $5,%ecx + addl %ebp,%ebx + xorl %edi,%esi + rorl $7,%edx + movdqa %xmm1,16(%esp) + addl %ecx,%ebx + addl 40(%esp),%eax + xorl %edx,%esi + movl %ebx,%ebp + psubd %xmm7,%xmm1 + roll $5,%ebx + addl %esi,%eax + xorl %edx,%ebp + rorl $7,%ecx + addl %ebx,%eax + addl 44(%esp),%edi + xorl %ecx,%ebp + movl %eax,%esi + roll $5,%eax + addl %ebp,%edi + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%edi + addl 48(%esp),%edx + xorl %ebx,%esi + movl %edi,%ebp + roll $5,%edi + addl %esi,%edx + xorl %ebx,%ebp + rorl $7,%eax +.byte 102,15,56,0,222 + addl %edi,%edx + addl 52(%esp),%ecx + xorl %eax,%ebp + movl %edx,%esi + paddd %xmm7,%xmm2 + roll $5,%edx + addl %ebp,%ecx + xorl %eax,%esi + rorl $7,%edi + movdqa %xmm2,32(%esp) + addl %edx,%ecx + addl 56(%esp),%ebx + xorl %edi,%esi + movl %ecx,%ebp + psubd %xmm7,%xmm2 + roll $5,%ecx + addl %esi,%ebx + xorl %edi,%ebp + rorl $7,%edx + addl %ecx,%ebx + addl 60(%esp),%eax + xorl %edx,%ebp + movl %ebx,%esi + roll $5,%ebx + addl %ebp,%eax + rorl $7,%ecx + addl %ebx,%eax + movl 192(%esp),%ebp + addl (%ebp),%eax + addl 4(%ebp),%esi + addl 8(%ebp),%ecx + movl %eax,(%ebp) + addl 12(%ebp),%edx + movl %esi,4(%ebp) + addl 16(%ebp),%edi + movl %ecx,8(%ebp) + movl %ecx,%ebx + movl %edx,12(%ebp) + xorl %edx,%ebx + movl %edi,16(%ebp) + movl %esi,%ebp + pshufd $238,%xmm0,%xmm4 + andl %ebx,%esi + movl %ebp,%ebx + jmp L006loop +.align 4,0x90 +L007done: + addl 16(%esp),%ebx + xorl %edi,%esi + movl %ecx,%ebp + roll $5,%ecx + addl %esi,%ebx + xorl %edi,%ebp + rorl $7,%edx + addl %ecx,%ebx + addl 20(%esp),%eax + xorl %edx,%ebp + movl %ebx,%esi + roll $5,%ebx + addl %ebp,%eax + xorl %edx,%esi + rorl $7,%ecx + addl %ebx,%eax + addl 24(%esp),%edi + xorl %ecx,%esi + movl %eax,%ebp + roll $5,%eax + addl %esi,%edi + xorl %ecx,%ebp + rorl $7,%ebx + addl %eax,%edi + addl 28(%esp),%edx + xorl %ebx,%ebp + movl %edi,%esi + roll $5,%edi + addl %ebp,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %edi,%edx + addl 32(%esp),%ecx + xorl %eax,%esi + movl %edx,%ebp + roll $5,%edx + addl %esi,%ecx + xorl %eax,%ebp + rorl $7,%edi + addl %edx,%ecx + addl 36(%esp),%ebx + xorl %edi,%ebp + movl %ecx,%esi + roll $5,%ecx + addl %ebp,%ebx + xorl %edi,%esi + rorl $7,%edx + addl %ecx,%ebx + addl 40(%esp),%eax + xorl %edx,%esi + movl %ebx,%ebp + roll $5,%ebx + addl %esi,%eax + xorl %edx,%ebp + rorl $7,%ecx + addl %ebx,%eax + addl 44(%esp),%edi + xorl %ecx,%ebp + movl %eax,%esi + roll $5,%eax + addl %ebp,%edi + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%edi + addl 48(%esp),%edx + xorl %ebx,%esi + movl %edi,%ebp + roll $5,%edi + addl %esi,%edx + xorl %ebx,%ebp + rorl $7,%eax + addl %edi,%edx + addl 52(%esp),%ecx + xorl %eax,%ebp + movl %edx,%esi + roll $5,%edx + addl %ebp,%ecx + xorl %eax,%esi + rorl $7,%edi + addl %edx,%ecx + addl 56(%esp),%ebx + xorl %edi,%esi + movl %ecx,%ebp + roll $5,%ecx + addl %esi,%ebx + xorl %edi,%ebp + rorl $7,%edx + addl %ecx,%ebx + addl 60(%esp),%eax + xorl %edx,%ebp + movl %ebx,%esi + roll $5,%ebx + addl %ebp,%eax + rorl $7,%ecx + addl %ebx,%eax + movl 192(%esp),%ebp + addl (%ebp),%eax + movl 204(%esp),%esp + addl 4(%ebp),%esi + addl 8(%ebp),%ecx + movl %eax,(%ebp) + addl 12(%ebp),%edx + movl %esi,4(%ebp) + addl 16(%ebp),%edi + movl %ecx,8(%ebp) + movl %edx,12(%ebp) + movl %edi,16(%ebp) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 6,0x90 +LK_XX_XX: +.long 1518500249,1518500249,1518500249,1518500249 +.long 1859775393,1859775393,1859775393,1859775393 +.long 2400959708,2400959708,2400959708,2400959708 +.long 3395469782,3395469782,3395469782,3395469782 +.long 66051,67438087,134810123,202182159 +.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +.byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115 +.byte 102,111,114,109,32,102,111,114,32,120,56,54,44,32,67,82 +.byte 89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112 +.byte 114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.section __IMPORT,__pointers,non_lazy_symbol_pointers +L_OPENSSL_ia32cap_P$non_lazy_ptr: +.indirect_symbol _OPENSSL_ia32cap_P +.long 0 +#endif diff --git a/third_party/boringssl/mac-x86/crypto/sha/sha256-586.S b/third_party/boringssl/mac-x86/crypto/sha/sha256-586.S new file mode 100644 index 00000000000..f0ba612fabb --- /dev/null +++ b/third_party/boringssl/mac-x86/crypto/sha/sha256-586.S @@ -0,0 +1,4581 @@ +#if defined(__i386__) +.file "sha512-586.S" +.text +.globl _sha256_block_data_order +.private_extern _sha256_block_data_order +.align 4 +_sha256_block_data_order: +L_sha256_block_data_order_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl %esp,%ebx + call L000pic_point +L000pic_point: + popl %ebp + leal L001K256-L000pic_point(%ebp),%ebp + subl $16,%esp + andl $-64,%esp + shll $6,%eax + addl %edi,%eax + movl %esi,(%esp) + movl %edi,4(%esp) + movl %eax,8(%esp) + movl %ebx,12(%esp) + movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L001K256(%ebp),%edx + movl (%edx),%ecx + movl 4(%edx),%ebx + testl $1048576,%ecx + jnz L002loop + movl 8(%edx),%edx + testl $16777216,%ecx + jz L003no_xmm + andl $1073741824,%ecx + andl $268435968,%ebx + testl $536870912,%edx + jnz L004shaext + orl %ebx,%ecx + andl $1342177280,%ecx + cmpl $1342177280,%ecx + testl $512,%ebx + jnz L005SSSE3 +L003no_xmm: + subl %edi,%eax + cmpl $256,%eax + jae L006unrolled + jmp L002loop +.align 4,0x90 +L002loop: + movl (%edi),%eax + movl 4(%edi),%ebx + movl 8(%edi),%ecx + bswap %eax + movl 12(%edi),%edx + bswap %ebx + pushl %eax + bswap %ecx + pushl %ebx + bswap %edx + pushl %ecx + pushl %edx + movl 16(%edi),%eax + movl 20(%edi),%ebx + movl 24(%edi),%ecx + bswap %eax + movl 28(%edi),%edx + bswap %ebx + pushl %eax + bswap %ecx + pushl %ebx + bswap %edx + pushl %ecx + pushl %edx + movl 32(%edi),%eax + movl 36(%edi),%ebx + movl 40(%edi),%ecx + bswap %eax + movl 44(%edi),%edx + bswap %ebx + pushl %eax + bswap %ecx + pushl %ebx + bswap %edx + pushl %ecx + pushl %edx + movl 48(%edi),%eax + movl 52(%edi),%ebx + movl 56(%edi),%ecx + bswap %eax + movl 60(%edi),%edx + bswap %ebx + pushl %eax + bswap %ecx + pushl %ebx + bswap %edx + pushl %ecx + pushl %edx + addl $64,%edi + leal -36(%esp),%esp + movl %edi,104(%esp) + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edi + movl %ebx,8(%esp) + xorl %ecx,%ebx + movl %ecx,12(%esp) + movl %edi,16(%esp) + movl %ebx,(%esp) + movl 16(%esi),%edx + movl 20(%esi),%ebx + movl 24(%esi),%ecx + movl 28(%esi),%edi + movl %ebx,24(%esp) + movl %ecx,28(%esp) + movl %edi,32(%esp) +.align 4,0x90 +L00700_15: + movl %edx,%ecx + movl 24(%esp),%esi + rorl $14,%ecx + movl 28(%esp),%edi + xorl %edx,%ecx + xorl %edi,%esi + movl 96(%esp),%ebx + rorl $5,%ecx + andl %edx,%esi + movl %edx,20(%esp) + xorl %ecx,%edx + addl 32(%esp),%ebx + xorl %edi,%esi + rorl $6,%edx + movl %eax,%ecx + addl %esi,%ebx + rorl $9,%ecx + addl %edx,%ebx + movl 8(%esp),%edi + xorl %eax,%ecx + movl %eax,4(%esp) + leal -4(%esp),%esp + rorl $11,%ecx + movl (%ebp),%esi + xorl %eax,%ecx + movl 20(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %esi,%ebx + movl %eax,(%esp) + addl %ebx,%edx + andl 4(%esp),%eax + addl %ecx,%ebx + xorl %edi,%eax + addl $4,%ebp + addl %ebx,%eax + cmpl $3248222580,%esi + jne L00700_15 + movl 156(%esp),%ecx + jmp L00816_63 +.align 4,0x90 +L00816_63: + movl %ecx,%ebx + movl 104(%esp),%esi + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 160(%esp),%ebx + shrl $10,%edi + addl 124(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 24(%esp),%esi + rorl $14,%ecx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %edx,%ecx + xorl %edi,%esi + movl %ebx,96(%esp) + rorl $5,%ecx + andl %edx,%esi + movl %edx,20(%esp) + xorl %ecx,%edx + addl 32(%esp),%ebx + xorl %edi,%esi + rorl $6,%edx + movl %eax,%ecx + addl %esi,%ebx + rorl $9,%ecx + addl %edx,%ebx + movl 8(%esp),%edi + xorl %eax,%ecx + movl %eax,4(%esp) + leal -4(%esp),%esp + rorl $11,%ecx + movl (%ebp),%esi + xorl %eax,%ecx + movl 20(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %esi,%ebx + movl %eax,(%esp) + addl %ebx,%edx + andl 4(%esp),%eax + addl %ecx,%ebx + xorl %edi,%eax + movl 156(%esp),%ecx + addl $4,%ebp + addl %ebx,%eax + cmpl $3329325298,%esi + jne L00816_63 + movl 356(%esp),%esi + movl 8(%esp),%ebx + movl 16(%esp),%ecx + addl (%esi),%eax + addl 4(%esi),%ebx + addl 8(%esi),%edi + addl 12(%esi),%ecx + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %edi,8(%esi) + movl %ecx,12(%esi) + movl 24(%esp),%eax + movl 28(%esp),%ebx + movl 32(%esp),%ecx + movl 360(%esp),%edi + addl 16(%esi),%edx + addl 20(%esi),%eax + addl 24(%esi),%ebx + addl 28(%esi),%ecx + movl %edx,16(%esi) + movl %eax,20(%esi) + movl %ebx,24(%esi) + movl %ecx,28(%esi) + leal 356(%esp),%esp + subl $256,%ebp + cmpl 8(%esp),%edi + jb L002loop + movl 12(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 6,0x90 +L001K256: +.long 1116352408,1899447441,3049323471,3921009573,961987163,1508970993,2453635748,2870763221,3624381080,310598401,607225278,1426881987,1925078388,2162078206,2614888103,3248222580,3835390401,4022224774,264347078,604807628,770255983,1249150122,1555081692,1996064986,2554220882,2821834349,2952996808,3210313671,3336571891,3584528711,113926993,338241895,666307205,773529912,1294757372,1396182291,1695183700,1986661051,2177026350,2456956037,2730485921,2820302411,3259730800,3345764771,3516065817,3600352804,4094571909,275423344,430227734,506948616,659060556,883997877,958139571,1322822218,1537002063,1747873779,1955562222,2024104815,2227730452,2361852424,2428436474,2756734187,3204031479,3329325298 +.long 66051,67438087,134810123,202182159 +.byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97 +.byte 110,115,102,111,114,109,32,102,111,114,32,120,56,54,44,32 +.byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 +.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 +.byte 62,0 +.align 4,0x90 +L006unrolled: + leal -96(%esp),%esp + movl (%esi),%eax + movl 4(%esi),%ebp + movl 8(%esi),%ecx + movl 12(%esi),%ebx + movl %ebp,4(%esp) + xorl %ecx,%ebp + movl %ecx,8(%esp) + movl %ebx,12(%esp) + movl 16(%esi),%edx + movl 20(%esi),%ebx + movl 24(%esi),%ecx + movl 28(%esi),%esi + movl %ebx,20(%esp) + movl %ecx,24(%esp) + movl %esi,28(%esp) + jmp L009grand_loop +.align 4,0x90 +L009grand_loop: + movl (%edi),%ebx + movl 4(%edi),%ecx + bswap %ebx + movl 8(%edi),%esi + bswap %ecx + movl %ebx,32(%esp) + bswap %esi + movl %ecx,36(%esp) + movl %esi,40(%esp) + movl 12(%edi),%ebx + movl 16(%edi),%ecx + bswap %ebx + movl 20(%edi),%esi + bswap %ecx + movl %ebx,44(%esp) + bswap %esi + movl %ecx,48(%esp) + movl %esi,52(%esp) + movl 24(%edi),%ebx + movl 28(%edi),%ecx + bswap %ebx + movl 32(%edi),%esi + bswap %ecx + movl %ebx,56(%esp) + bswap %esi + movl %ecx,60(%esp) + movl %esi,64(%esp) + movl 36(%edi),%ebx + movl 40(%edi),%ecx + bswap %ebx + movl 44(%edi),%esi + bswap %ecx + movl %ebx,68(%esp) + bswap %esi + movl %ecx,72(%esp) + movl %esi,76(%esp) + movl 48(%edi),%ebx + movl 52(%edi),%ecx + bswap %ebx + movl 56(%edi),%esi + bswap %ecx + movl %ebx,80(%esp) + bswap %esi + movl %ecx,84(%esp) + movl %esi,88(%esp) + movl 60(%edi),%ebx + addl $64,%edi + bswap %ebx + movl %edi,100(%esp) + movl %ebx,92(%esp) + movl %edx,%ecx + movl 20(%esp),%esi + rorl $14,%edx + movl 24(%esp),%edi + xorl %ecx,%edx + movl 32(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 1116352408(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl 16(%esp),%ecx + rorl $14,%edx + movl 20(%esp),%edi + xorl %esi,%edx + movl 36(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1899447441(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl %edx,%ecx + movl 12(%esp),%esi + rorl $14,%edx + movl 16(%esp),%edi + xorl %ecx,%edx + movl 40(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 3049323471(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl 8(%esp),%ecx + rorl $14,%edx + movl 12(%esp),%edi + xorl %esi,%edx + movl 44(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 3921009573(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl %edx,%ecx + movl 4(%esp),%esi + rorl $14,%edx + movl 8(%esp),%edi + xorl %ecx,%edx + movl 48(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 961987163(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl (%esp),%ecx + rorl $14,%edx + movl 4(%esp),%edi + xorl %esi,%edx + movl 52(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1508970993(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl %edx,%ecx + movl 28(%esp),%esi + rorl $14,%edx + movl (%esp),%edi + xorl %ecx,%edx + movl 56(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2453635748(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl 24(%esp),%ecx + rorl $14,%edx + movl 28(%esp),%edi + xorl %esi,%edx + movl 60(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2870763221(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl %edx,%ecx + movl 20(%esp),%esi + rorl $14,%edx + movl 24(%esp),%edi + xorl %ecx,%edx + movl 64(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 3624381080(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl 16(%esp),%ecx + rorl $14,%edx + movl 20(%esp),%edi + xorl %esi,%edx + movl 68(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 310598401(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl %edx,%ecx + movl 12(%esp),%esi + rorl $14,%edx + movl 16(%esp),%edi + xorl %ecx,%edx + movl 72(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 607225278(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl 8(%esp),%ecx + rorl $14,%edx + movl 12(%esp),%edi + xorl %esi,%edx + movl 76(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1426881987(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl %edx,%ecx + movl 4(%esp),%esi + rorl $14,%edx + movl 8(%esp),%edi + xorl %ecx,%edx + movl 80(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 1925078388(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl (%esp),%ecx + rorl $14,%edx + movl 4(%esp),%edi + xorl %esi,%edx + movl 84(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2162078206(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl %edx,%ecx + movl 28(%esp),%esi + rorl $14,%edx + movl (%esp),%edi + xorl %ecx,%edx + movl 88(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2614888103(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl 24(%esp),%ecx + rorl $14,%edx + movl 28(%esp),%edi + xorl %esi,%edx + movl 92(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 3248222580(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 36(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl 88(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 32(%esp),%ebx + shrl $10,%edi + addl 68(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 20(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 24(%esp),%edi + xorl %ecx,%edx + movl %ebx,32(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 3835390401(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 40(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl 92(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 36(%esp),%ebx + shrl $10,%edi + addl 72(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 16(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 20(%esp),%edi + xorl %esi,%edx + movl %ebx,36(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 4022224774(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 44(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl 32(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 40(%esp),%ebx + shrl $10,%edi + addl 76(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 12(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 16(%esp),%edi + xorl %ecx,%edx + movl %ebx,40(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 264347078(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 48(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl 36(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 44(%esp),%ebx + shrl $10,%edi + addl 80(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 8(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 12(%esp),%edi + xorl %esi,%edx + movl %ebx,44(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 604807628(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 52(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl 40(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 48(%esp),%ebx + shrl $10,%edi + addl 84(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 4(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 8(%esp),%edi + xorl %ecx,%edx + movl %ebx,48(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 770255983(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 56(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl 44(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 52(%esp),%ebx + shrl $10,%edi + addl 88(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl (%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 4(%esp),%edi + xorl %esi,%edx + movl %ebx,52(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1249150122(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 60(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl 48(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 56(%esp),%ebx + shrl $10,%edi + addl 92(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 28(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl (%esp),%edi + xorl %ecx,%edx + movl %ebx,56(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 1555081692(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 64(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl 52(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 60(%esp),%ebx + shrl $10,%edi + addl 32(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 24(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %esi,%edx + movl %ebx,60(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1996064986(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 68(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl 56(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 64(%esp),%ebx + shrl $10,%edi + addl 36(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 20(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 24(%esp),%edi + xorl %ecx,%edx + movl %ebx,64(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2554220882(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 72(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl 60(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 68(%esp),%ebx + shrl $10,%edi + addl 40(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 16(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 20(%esp),%edi + xorl %esi,%edx + movl %ebx,68(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2821834349(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 76(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl 64(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 72(%esp),%ebx + shrl $10,%edi + addl 44(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 12(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 16(%esp),%edi + xorl %ecx,%edx + movl %ebx,72(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2952996808(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 80(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl 68(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 76(%esp),%ebx + shrl $10,%edi + addl 48(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 8(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 12(%esp),%edi + xorl %esi,%edx + movl %ebx,76(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 3210313671(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 84(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl 72(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 80(%esp),%ebx + shrl $10,%edi + addl 52(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 4(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 8(%esp),%edi + xorl %ecx,%edx + movl %ebx,80(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 3336571891(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 88(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl 76(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 84(%esp),%ebx + shrl $10,%edi + addl 56(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl (%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 4(%esp),%edi + xorl %esi,%edx + movl %ebx,84(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 3584528711(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 92(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl 80(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 88(%esp),%ebx + shrl $10,%edi + addl 60(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 28(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl (%esp),%edi + xorl %ecx,%edx + movl %ebx,88(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 113926993(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 32(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl 84(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 92(%esp),%ebx + shrl $10,%edi + addl 64(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 24(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %esi,%edx + movl %ebx,92(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 338241895(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 36(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl 88(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 32(%esp),%ebx + shrl $10,%edi + addl 68(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 20(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 24(%esp),%edi + xorl %ecx,%edx + movl %ebx,32(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 666307205(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 40(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl 92(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 36(%esp),%ebx + shrl $10,%edi + addl 72(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 16(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 20(%esp),%edi + xorl %esi,%edx + movl %ebx,36(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 773529912(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 44(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl 32(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 40(%esp),%ebx + shrl $10,%edi + addl 76(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 12(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 16(%esp),%edi + xorl %ecx,%edx + movl %ebx,40(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 1294757372(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 48(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl 36(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 44(%esp),%ebx + shrl $10,%edi + addl 80(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 8(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 12(%esp),%edi + xorl %esi,%edx + movl %ebx,44(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1396182291(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 52(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl 40(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 48(%esp),%ebx + shrl $10,%edi + addl 84(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 4(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 8(%esp),%edi + xorl %ecx,%edx + movl %ebx,48(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 1695183700(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 56(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl 44(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 52(%esp),%ebx + shrl $10,%edi + addl 88(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl (%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 4(%esp),%edi + xorl %esi,%edx + movl %ebx,52(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1986661051(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 60(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl 48(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 56(%esp),%ebx + shrl $10,%edi + addl 92(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 28(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl (%esp),%edi + xorl %ecx,%edx + movl %ebx,56(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2177026350(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 64(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl 52(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 60(%esp),%ebx + shrl $10,%edi + addl 32(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 24(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %esi,%edx + movl %ebx,60(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2456956037(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 68(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl 56(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 64(%esp),%ebx + shrl $10,%edi + addl 36(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 20(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 24(%esp),%edi + xorl %ecx,%edx + movl %ebx,64(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2730485921(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 72(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl 60(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 68(%esp),%ebx + shrl $10,%edi + addl 40(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 16(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 20(%esp),%edi + xorl %esi,%edx + movl %ebx,68(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2820302411(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 76(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl 64(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 72(%esp),%ebx + shrl $10,%edi + addl 44(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 12(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 16(%esp),%edi + xorl %ecx,%edx + movl %ebx,72(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 3259730800(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 80(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl 68(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 76(%esp),%ebx + shrl $10,%edi + addl 48(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 8(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 12(%esp),%edi + xorl %esi,%edx + movl %ebx,76(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 3345764771(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 84(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl 72(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 80(%esp),%ebx + shrl $10,%edi + addl 52(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 4(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 8(%esp),%edi + xorl %ecx,%edx + movl %ebx,80(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 3516065817(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 88(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl 76(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 84(%esp),%ebx + shrl $10,%edi + addl 56(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl (%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 4(%esp),%edi + xorl %esi,%edx + movl %ebx,84(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 3600352804(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 92(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl 80(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 88(%esp),%ebx + shrl $10,%edi + addl 60(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 28(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl (%esp),%edi + xorl %ecx,%edx + movl %ebx,88(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 4094571909(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 32(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl 84(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 92(%esp),%ebx + shrl $10,%edi + addl 64(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 24(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %esi,%edx + movl %ebx,92(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 275423344(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 36(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl 88(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 32(%esp),%ebx + shrl $10,%edi + addl 68(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 20(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 24(%esp),%edi + xorl %ecx,%edx + movl %ebx,32(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 430227734(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 40(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl 92(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 36(%esp),%ebx + shrl $10,%edi + addl 72(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 16(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 20(%esp),%edi + xorl %esi,%edx + movl %ebx,36(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 506948616(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 44(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl 32(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 40(%esp),%ebx + shrl $10,%edi + addl 76(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 12(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 16(%esp),%edi + xorl %ecx,%edx + movl %ebx,40(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 659060556(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 48(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl 36(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 44(%esp),%ebx + shrl $10,%edi + addl 80(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 8(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 12(%esp),%edi + xorl %esi,%edx + movl %ebx,44(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 883997877(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 52(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl 40(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 48(%esp),%ebx + shrl $10,%edi + addl 84(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 4(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 8(%esp),%edi + xorl %ecx,%edx + movl %ebx,48(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 958139571(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 56(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl 44(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 52(%esp),%ebx + shrl $10,%edi + addl 88(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl (%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 4(%esp),%edi + xorl %esi,%edx + movl %ebx,52(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1322822218(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 60(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl 48(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 56(%esp),%ebx + shrl $10,%edi + addl 92(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 28(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl (%esp),%edi + xorl %ecx,%edx + movl %ebx,56(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 1537002063(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 64(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl 52(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 60(%esp),%ebx + shrl $10,%edi + addl 32(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 24(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %esi,%edx + movl %ebx,60(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1747873779(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 68(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl 56(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 64(%esp),%ebx + shrl $10,%edi + addl 36(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 20(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 24(%esp),%edi + xorl %ecx,%edx + movl %ebx,64(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 1955562222(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 72(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl 60(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 68(%esp),%ebx + shrl $10,%edi + addl 40(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 16(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 20(%esp),%edi + xorl %esi,%edx + movl %ebx,68(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2024104815(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 76(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl 64(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 72(%esp),%ebx + shrl $10,%edi + addl 44(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 12(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 16(%esp),%edi + xorl %ecx,%edx + movl %ebx,72(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2227730452(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 80(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl 68(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 76(%esp),%ebx + shrl $10,%edi + addl 48(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 8(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 12(%esp),%edi + xorl %esi,%edx + movl %ebx,76(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2361852424(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 84(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl 72(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 80(%esp),%ebx + shrl $10,%edi + addl 52(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 4(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 8(%esp),%edi + xorl %ecx,%edx + movl %ebx,80(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2428436474(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 88(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl 76(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 84(%esp),%ebx + shrl $10,%edi + addl 56(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl (%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 4(%esp),%edi + xorl %esi,%edx + movl %ebx,84(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2756734187(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 92(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl 80(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 88(%esp),%ebx + shrl $10,%edi + addl 60(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 28(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl (%esp),%edi + xorl %ecx,%edx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 3204031479(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 32(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl 84(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 92(%esp),%ebx + shrl $10,%edi + addl 64(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 24(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %esi,%edx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 3329325298(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl 96(%esp),%esi + xorl %edi,%ebp + movl 12(%esp),%ecx + addl (%esi),%eax + addl 4(%esi),%ebp + addl 8(%esi),%edi + addl 12(%esi),%ecx + movl %eax,(%esi) + movl %ebp,4(%esi) + movl %edi,8(%esi) + movl %ecx,12(%esi) + movl %ebp,4(%esp) + xorl %edi,%ebp + movl %edi,8(%esp) + movl %ecx,12(%esp) + movl 20(%esp),%edi + movl 24(%esp),%ebx + movl 28(%esp),%ecx + addl 16(%esi),%edx + addl 20(%esi),%edi + addl 24(%esi),%ebx + addl 28(%esi),%ecx + movl %edx,16(%esi) + movl %edi,20(%esi) + movl %ebx,24(%esi) + movl %ecx,28(%esi) + movl %edi,20(%esp) + movl 100(%esp),%edi + movl %ebx,24(%esp) + movl %ecx,28(%esp) + cmpl 104(%esp),%edi + jb L009grand_loop + movl 108(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 5,0x90 +L004shaext: + subl $32,%esp + movdqu (%esi),%xmm1 + leal 128(%ebp),%ebp + movdqu 16(%esi),%xmm2 + movdqa 128(%ebp),%xmm7 + pshufd $27,%xmm1,%xmm0 + pshufd $177,%xmm1,%xmm1 + pshufd $27,%xmm2,%xmm2 +.byte 102,15,58,15,202,8 + punpcklqdq %xmm0,%xmm2 + jmp L010loop_shaext +.align 4,0x90 +L010loop_shaext: + movdqu (%edi),%xmm3 + movdqu 16(%edi),%xmm4 + movdqu 32(%edi),%xmm5 +.byte 102,15,56,0,223 + movdqu 48(%edi),%xmm6 + movdqa %xmm2,16(%esp) + movdqa -128(%ebp),%xmm0 + paddd %xmm3,%xmm0 +.byte 102,15,56,0,231 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + nop + movdqa %xmm1,(%esp) +.byte 15,56,203,202 + movdqa -112(%ebp),%xmm0 + paddd %xmm4,%xmm0 +.byte 102,15,56,0,239 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + leal 64(%edi),%edi +.byte 15,56,204,220 +.byte 15,56,203,202 + movdqa -96(%ebp),%xmm0 + paddd %xmm5,%xmm0 +.byte 102,15,56,0,247 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm6,%xmm7 +.byte 102,15,58,15,253,4 + nop + paddd %xmm7,%xmm3 +.byte 15,56,204,229 +.byte 15,56,203,202 + movdqa -80(%ebp),%xmm0 + paddd %xmm6,%xmm0 +.byte 15,56,205,222 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm3,%xmm7 +.byte 102,15,58,15,254,4 + nop + paddd %xmm7,%xmm4 +.byte 15,56,204,238 +.byte 15,56,203,202 + movdqa -64(%ebp),%xmm0 + paddd %xmm3,%xmm0 +.byte 15,56,205,227 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm4,%xmm7 +.byte 102,15,58,15,251,4 + nop + paddd %xmm7,%xmm5 +.byte 15,56,204,243 +.byte 15,56,203,202 + movdqa -48(%ebp),%xmm0 + paddd %xmm4,%xmm0 +.byte 15,56,205,236 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm5,%xmm7 +.byte 102,15,58,15,252,4 + nop + paddd %xmm7,%xmm6 +.byte 15,56,204,220 +.byte 15,56,203,202 + movdqa -32(%ebp),%xmm0 + paddd %xmm5,%xmm0 +.byte 15,56,205,245 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm6,%xmm7 +.byte 102,15,58,15,253,4 + nop + paddd %xmm7,%xmm3 +.byte 15,56,204,229 +.byte 15,56,203,202 + movdqa -16(%ebp),%xmm0 + paddd %xmm6,%xmm0 +.byte 15,56,205,222 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm3,%xmm7 +.byte 102,15,58,15,254,4 + nop + paddd %xmm7,%xmm4 +.byte 15,56,204,238 +.byte 15,56,203,202 + movdqa (%ebp),%xmm0 + paddd %xmm3,%xmm0 +.byte 15,56,205,227 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm4,%xmm7 +.byte 102,15,58,15,251,4 + nop + paddd %xmm7,%xmm5 +.byte 15,56,204,243 +.byte 15,56,203,202 + movdqa 16(%ebp),%xmm0 + paddd %xmm4,%xmm0 +.byte 15,56,205,236 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm5,%xmm7 +.byte 102,15,58,15,252,4 + nop + paddd %xmm7,%xmm6 +.byte 15,56,204,220 +.byte 15,56,203,202 + movdqa 32(%ebp),%xmm0 + paddd %xmm5,%xmm0 +.byte 15,56,205,245 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm6,%xmm7 +.byte 102,15,58,15,253,4 + nop + paddd %xmm7,%xmm3 +.byte 15,56,204,229 +.byte 15,56,203,202 + movdqa 48(%ebp),%xmm0 + paddd %xmm6,%xmm0 +.byte 15,56,205,222 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm3,%xmm7 +.byte 102,15,58,15,254,4 + nop + paddd %xmm7,%xmm4 +.byte 15,56,204,238 +.byte 15,56,203,202 + movdqa 64(%ebp),%xmm0 + paddd %xmm3,%xmm0 +.byte 15,56,205,227 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm4,%xmm7 +.byte 102,15,58,15,251,4 + nop + paddd %xmm7,%xmm5 +.byte 15,56,204,243 +.byte 15,56,203,202 + movdqa 80(%ebp),%xmm0 + paddd %xmm4,%xmm0 +.byte 15,56,205,236 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm5,%xmm7 +.byte 102,15,58,15,252,4 +.byte 15,56,203,202 + paddd %xmm7,%xmm6 + movdqa 96(%ebp),%xmm0 + paddd %xmm5,%xmm0 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 +.byte 15,56,205,245 + movdqa 128(%ebp),%xmm7 +.byte 15,56,203,202 + movdqa 112(%ebp),%xmm0 + paddd %xmm6,%xmm0 + nop +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + cmpl %edi,%eax + nop +.byte 15,56,203,202 + paddd 16(%esp),%xmm2 + paddd (%esp),%xmm1 + jnz L010loop_shaext + pshufd $177,%xmm2,%xmm2 + pshufd $27,%xmm1,%xmm7 + pshufd $177,%xmm1,%xmm1 + punpckhqdq %xmm2,%xmm1 +.byte 102,15,58,15,215,8 + movl 44(%esp),%esp + movdqu %xmm1,(%esi) + movdqu %xmm2,16(%esi) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 5,0x90 +L005SSSE3: + leal -96(%esp),%esp + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edi + movl %ebx,4(%esp) + xorl %ecx,%ebx + movl %ecx,8(%esp) + movl %edi,12(%esp) + movl 16(%esi),%edx + movl 20(%esi),%edi + movl 24(%esi),%ecx + movl 28(%esi),%esi + movl %edi,20(%esp) + movl 100(%esp),%edi + movl %ecx,24(%esp) + movl %esi,28(%esp) + movdqa 256(%ebp),%xmm7 + jmp L011grand_ssse3 +.align 4,0x90 +L011grand_ssse3: + movdqu (%edi),%xmm0 + movdqu 16(%edi),%xmm1 + movdqu 32(%edi),%xmm2 + movdqu 48(%edi),%xmm3 + addl $64,%edi +.byte 102,15,56,0,199 + movl %edi,100(%esp) +.byte 102,15,56,0,207 + movdqa (%ebp),%xmm4 +.byte 102,15,56,0,215 + movdqa 16(%ebp),%xmm5 + paddd %xmm0,%xmm4 +.byte 102,15,56,0,223 + movdqa 32(%ebp),%xmm6 + paddd %xmm1,%xmm5 + movdqa 48(%ebp),%xmm7 + movdqa %xmm4,32(%esp) + paddd %xmm2,%xmm6 + movdqa %xmm5,48(%esp) + paddd %xmm3,%xmm7 + movdqa %xmm6,64(%esp) + movdqa %xmm7,80(%esp) + jmp L012ssse3_00_47 +.align 4,0x90 +L012ssse3_00_47: + addl $64,%ebp + movl %edx,%ecx + movdqa %xmm1,%xmm4 + rorl $14,%edx + movl 20(%esp),%esi + movdqa %xmm3,%xmm7 + xorl %ecx,%edx + movl 24(%esp),%edi +.byte 102,15,58,15,224,4 + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi +.byte 102,15,58,15,250,4 + movl %ecx,16(%esp) + xorl %ecx,%edx + xorl %esi,%edi + movdqa %xmm4,%xmm5 + rorl $6,%edx + movl %eax,%ecx + movdqa %xmm4,%xmm6 + addl %edi,%edx + movl 4(%esp),%edi + psrld $3,%xmm4 + movl %eax,%esi + rorl $9,%ecx + paddd %xmm7,%xmm0 + movl %eax,(%esp) + xorl %eax,%ecx + psrld $7,%xmm6 + xorl %edi,%eax + addl 28(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + pshufd $250,%xmm3,%xmm7 + xorl %esi,%ecx + addl 32(%esp),%edx + pslld $14,%xmm5 + xorl %edi,%ebx + rorl $2,%ecx + pxor %xmm6,%xmm4 + addl %edx,%ebx + addl 12(%esp),%edx + psrld $11,%xmm6 + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + pxor %xmm5,%xmm4 + movl 16(%esp),%esi + xorl %ecx,%edx + pslld $11,%xmm5 + movl 20(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + pxor %xmm6,%xmm4 + andl %ecx,%esi + movl %ecx,12(%esp) + movdqa %xmm7,%xmm6 + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + pxor %xmm5,%xmm4 + movl %ebx,%ecx + addl %edi,%edx + psrld $10,%xmm7 + movl (%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + paddd %xmm4,%xmm0 + movl %ebx,28(%esp) + xorl %ebx,%ecx + psrlq $17,%xmm6 + xorl %edi,%ebx + addl 24(%esp),%edx + rorl $11,%ecx + pxor %xmm6,%xmm7 + andl %ebx,%eax + xorl %esi,%ecx + psrlq $2,%xmm6 + addl 36(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + pxor %xmm6,%xmm7 + addl %edx,%eax + addl 8(%esp),%edx + pshufd $128,%xmm7,%xmm7 + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 12(%esp),%esi + xorl %ecx,%edx + movl 16(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + psrldq $8,%xmm7 + movl %ecx,8(%esp) + xorl %ecx,%edx + xorl %esi,%edi + paddd %xmm7,%xmm0 + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 28(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,24(%esp) + pshufd $80,%xmm0,%xmm7 + xorl %eax,%ecx + xorl %edi,%eax + addl 20(%esp),%edx + movdqa %xmm7,%xmm6 + rorl $11,%ecx + psrld $10,%xmm7 + andl %eax,%ebx + psrlq $17,%xmm6 + xorl %esi,%ecx + addl 40(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + pxor %xmm6,%xmm7 + addl %edx,%ebx + addl 4(%esp),%edx + psrlq $2,%xmm6 + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + pxor %xmm6,%xmm7 + movl 8(%esp),%esi + xorl %ecx,%edx + movl 12(%esp),%edi + pshufd $8,%xmm7,%xmm7 + xorl %edi,%esi + rorl $5,%edx + movdqa (%ebp),%xmm6 + andl %ecx,%esi + movl %ecx,4(%esp) + pslldq $8,%xmm7 + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 24(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + paddd %xmm7,%xmm0 + movl %ebx,20(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 16(%esp),%edx + paddd %xmm0,%xmm6 + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 44(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl (%esp),%edx + addl %ecx,%eax + movdqa %xmm6,32(%esp) + movl %edx,%ecx + movdqa %xmm2,%xmm4 + rorl $14,%edx + movl 4(%esp),%esi + movdqa %xmm0,%xmm7 + xorl %ecx,%edx + movl 8(%esp),%edi +.byte 102,15,58,15,225,4 + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi +.byte 102,15,58,15,251,4 + movl %ecx,(%esp) + xorl %ecx,%edx + xorl %esi,%edi + movdqa %xmm4,%xmm5 + rorl $6,%edx + movl %eax,%ecx + movdqa %xmm4,%xmm6 + addl %edi,%edx + movl 20(%esp),%edi + psrld $3,%xmm4 + movl %eax,%esi + rorl $9,%ecx + paddd %xmm7,%xmm1 + movl %eax,16(%esp) + xorl %eax,%ecx + psrld $7,%xmm6 + xorl %edi,%eax + addl 12(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + pshufd $250,%xmm0,%xmm7 + xorl %esi,%ecx + addl 48(%esp),%edx + pslld $14,%xmm5 + xorl %edi,%ebx + rorl $2,%ecx + pxor %xmm6,%xmm4 + addl %edx,%ebx + addl 28(%esp),%edx + psrld $11,%xmm6 + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + pxor %xmm5,%xmm4 + movl (%esp),%esi + xorl %ecx,%edx + pslld $11,%xmm5 + movl 4(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + pxor %xmm6,%xmm4 + andl %ecx,%esi + movl %ecx,28(%esp) + movdqa %xmm7,%xmm6 + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + pxor %xmm5,%xmm4 + movl %ebx,%ecx + addl %edi,%edx + psrld $10,%xmm7 + movl 16(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + paddd %xmm4,%xmm1 + movl %ebx,12(%esp) + xorl %ebx,%ecx + psrlq $17,%xmm6 + xorl %edi,%ebx + addl 8(%esp),%edx + rorl $11,%ecx + pxor %xmm6,%xmm7 + andl %ebx,%eax + xorl %esi,%ecx + psrlq $2,%xmm6 + addl 52(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + pxor %xmm6,%xmm7 + addl %edx,%eax + addl 24(%esp),%edx + pshufd $128,%xmm7,%xmm7 + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 28(%esp),%esi + xorl %ecx,%edx + movl (%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + psrldq $8,%xmm7 + movl %ecx,24(%esp) + xorl %ecx,%edx + xorl %esi,%edi + paddd %xmm7,%xmm1 + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 12(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,8(%esp) + pshufd $80,%xmm1,%xmm7 + xorl %eax,%ecx + xorl %edi,%eax + addl 4(%esp),%edx + movdqa %xmm7,%xmm6 + rorl $11,%ecx + psrld $10,%xmm7 + andl %eax,%ebx + psrlq $17,%xmm6 + xorl %esi,%ecx + addl 56(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + pxor %xmm6,%xmm7 + addl %edx,%ebx + addl 20(%esp),%edx + psrlq $2,%xmm6 + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + pxor %xmm6,%xmm7 + movl 24(%esp),%esi + xorl %ecx,%edx + movl 28(%esp),%edi + pshufd $8,%xmm7,%xmm7 + xorl %edi,%esi + rorl $5,%edx + movdqa 16(%ebp),%xmm6 + andl %ecx,%esi + movl %ecx,20(%esp) + pslldq $8,%xmm7 + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 8(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + paddd %xmm7,%xmm1 + movl %ebx,4(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl (%esp),%edx + paddd %xmm1,%xmm6 + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 60(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl 16(%esp),%edx + addl %ecx,%eax + movdqa %xmm6,48(%esp) + movl %edx,%ecx + movdqa %xmm3,%xmm4 + rorl $14,%edx + movl 20(%esp),%esi + movdqa %xmm1,%xmm7 + xorl %ecx,%edx + movl 24(%esp),%edi +.byte 102,15,58,15,226,4 + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi +.byte 102,15,58,15,248,4 + movl %ecx,16(%esp) + xorl %ecx,%edx + xorl %esi,%edi + movdqa %xmm4,%xmm5 + rorl $6,%edx + movl %eax,%ecx + movdqa %xmm4,%xmm6 + addl %edi,%edx + movl 4(%esp),%edi + psrld $3,%xmm4 + movl %eax,%esi + rorl $9,%ecx + paddd %xmm7,%xmm2 + movl %eax,(%esp) + xorl %eax,%ecx + psrld $7,%xmm6 + xorl %edi,%eax + addl 28(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + pshufd $250,%xmm1,%xmm7 + xorl %esi,%ecx + addl 64(%esp),%edx + pslld $14,%xmm5 + xorl %edi,%ebx + rorl $2,%ecx + pxor %xmm6,%xmm4 + addl %edx,%ebx + addl 12(%esp),%edx + psrld $11,%xmm6 + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + pxor %xmm5,%xmm4 + movl 16(%esp),%esi + xorl %ecx,%edx + pslld $11,%xmm5 + movl 20(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + pxor %xmm6,%xmm4 + andl %ecx,%esi + movl %ecx,12(%esp) + movdqa %xmm7,%xmm6 + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + pxor %xmm5,%xmm4 + movl %ebx,%ecx + addl %edi,%edx + psrld $10,%xmm7 + movl (%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + paddd %xmm4,%xmm2 + movl %ebx,28(%esp) + xorl %ebx,%ecx + psrlq $17,%xmm6 + xorl %edi,%ebx + addl 24(%esp),%edx + rorl $11,%ecx + pxor %xmm6,%xmm7 + andl %ebx,%eax + xorl %esi,%ecx + psrlq $2,%xmm6 + addl 68(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + pxor %xmm6,%xmm7 + addl %edx,%eax + addl 8(%esp),%edx + pshufd $128,%xmm7,%xmm7 + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 12(%esp),%esi + xorl %ecx,%edx + movl 16(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + psrldq $8,%xmm7 + movl %ecx,8(%esp) + xorl %ecx,%edx + xorl %esi,%edi + paddd %xmm7,%xmm2 + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 28(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,24(%esp) + pshufd $80,%xmm2,%xmm7 + xorl %eax,%ecx + xorl %edi,%eax + addl 20(%esp),%edx + movdqa %xmm7,%xmm6 + rorl $11,%ecx + psrld $10,%xmm7 + andl %eax,%ebx + psrlq $17,%xmm6 + xorl %esi,%ecx + addl 72(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + pxor %xmm6,%xmm7 + addl %edx,%ebx + addl 4(%esp),%edx + psrlq $2,%xmm6 + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + pxor %xmm6,%xmm7 + movl 8(%esp),%esi + xorl %ecx,%edx + movl 12(%esp),%edi + pshufd $8,%xmm7,%xmm7 + xorl %edi,%esi + rorl $5,%edx + movdqa 32(%ebp),%xmm6 + andl %ecx,%esi + movl %ecx,4(%esp) + pslldq $8,%xmm7 + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 24(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + paddd %xmm7,%xmm2 + movl %ebx,20(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 16(%esp),%edx + paddd %xmm2,%xmm6 + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 76(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl (%esp),%edx + addl %ecx,%eax + movdqa %xmm6,64(%esp) + movl %edx,%ecx + movdqa %xmm0,%xmm4 + rorl $14,%edx + movl 4(%esp),%esi + movdqa %xmm2,%xmm7 + xorl %ecx,%edx + movl 8(%esp),%edi +.byte 102,15,58,15,227,4 + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi +.byte 102,15,58,15,249,4 + movl %ecx,(%esp) + xorl %ecx,%edx + xorl %esi,%edi + movdqa %xmm4,%xmm5 + rorl $6,%edx + movl %eax,%ecx + movdqa %xmm4,%xmm6 + addl %edi,%edx + movl 20(%esp),%edi + psrld $3,%xmm4 + movl %eax,%esi + rorl $9,%ecx + paddd %xmm7,%xmm3 + movl %eax,16(%esp) + xorl %eax,%ecx + psrld $7,%xmm6 + xorl %edi,%eax + addl 12(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + pshufd $250,%xmm2,%xmm7 + xorl %esi,%ecx + addl 80(%esp),%edx + pslld $14,%xmm5 + xorl %edi,%ebx + rorl $2,%ecx + pxor %xmm6,%xmm4 + addl %edx,%ebx + addl 28(%esp),%edx + psrld $11,%xmm6 + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + pxor %xmm5,%xmm4 + movl (%esp),%esi + xorl %ecx,%edx + pslld $11,%xmm5 + movl 4(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + pxor %xmm6,%xmm4 + andl %ecx,%esi + movl %ecx,28(%esp) + movdqa %xmm7,%xmm6 + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + pxor %xmm5,%xmm4 + movl %ebx,%ecx + addl %edi,%edx + psrld $10,%xmm7 + movl 16(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + paddd %xmm4,%xmm3 + movl %ebx,12(%esp) + xorl %ebx,%ecx + psrlq $17,%xmm6 + xorl %edi,%ebx + addl 8(%esp),%edx + rorl $11,%ecx + pxor %xmm6,%xmm7 + andl %ebx,%eax + xorl %esi,%ecx + psrlq $2,%xmm6 + addl 84(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + pxor %xmm6,%xmm7 + addl %edx,%eax + addl 24(%esp),%edx + pshufd $128,%xmm7,%xmm7 + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 28(%esp),%esi + xorl %ecx,%edx + movl (%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + psrldq $8,%xmm7 + movl %ecx,24(%esp) + xorl %ecx,%edx + xorl %esi,%edi + paddd %xmm7,%xmm3 + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 12(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,8(%esp) + pshufd $80,%xmm3,%xmm7 + xorl %eax,%ecx + xorl %edi,%eax + addl 4(%esp),%edx + movdqa %xmm7,%xmm6 + rorl $11,%ecx + psrld $10,%xmm7 + andl %eax,%ebx + psrlq $17,%xmm6 + xorl %esi,%ecx + addl 88(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + pxor %xmm6,%xmm7 + addl %edx,%ebx + addl 20(%esp),%edx + psrlq $2,%xmm6 + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + pxor %xmm6,%xmm7 + movl 24(%esp),%esi + xorl %ecx,%edx + movl 28(%esp),%edi + pshufd $8,%xmm7,%xmm7 + xorl %edi,%esi + rorl $5,%edx + movdqa 48(%ebp),%xmm6 + andl %ecx,%esi + movl %ecx,20(%esp) + pslldq $8,%xmm7 + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 8(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + paddd %xmm7,%xmm3 + movl %ebx,4(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl (%esp),%edx + paddd %xmm3,%xmm6 + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 92(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl 16(%esp),%edx + addl %ecx,%eax + movdqa %xmm6,80(%esp) + cmpl $66051,64(%ebp) + jne L012ssse3_00_47 + movl %edx,%ecx + rorl $14,%edx + movl 20(%esp),%esi + xorl %ecx,%edx + movl 24(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 4(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 28(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 32(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + addl %edx,%ebx + addl 12(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + movl 16(%esp),%esi + xorl %ecx,%edx + movl 20(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,12(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl (%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + movl %ebx,28(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 24(%esp),%edx + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 36(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl 8(%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 12(%esp),%esi + xorl %ecx,%edx + movl 16(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 28(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,24(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 20(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 40(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + addl %edx,%ebx + addl 4(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + movl 8(%esp),%esi + xorl %ecx,%edx + movl 12(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,4(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 24(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + movl %ebx,20(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 16(%esp),%edx + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 44(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl (%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 4(%esp),%esi + xorl %ecx,%edx + movl 8(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 20(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,16(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 12(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 48(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + addl %edx,%ebx + addl 28(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + movl (%esp),%esi + xorl %ecx,%edx + movl 4(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,28(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 16(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + movl %ebx,12(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 8(%esp),%edx + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 52(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl 24(%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 28(%esp),%esi + xorl %ecx,%edx + movl (%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 12(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,8(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 4(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 56(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + addl %edx,%ebx + addl 20(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + movl 24(%esp),%esi + xorl %ecx,%edx + movl 28(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,20(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 8(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + movl %ebx,4(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl (%esp),%edx + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 60(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl 16(%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 20(%esp),%esi + xorl %ecx,%edx + movl 24(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 4(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 28(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 64(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + addl %edx,%ebx + addl 12(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + movl 16(%esp),%esi + xorl %ecx,%edx + movl 20(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,12(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl (%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + movl %ebx,28(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 24(%esp),%edx + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 68(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl 8(%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 12(%esp),%esi + xorl %ecx,%edx + movl 16(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 28(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,24(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 20(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 72(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + addl %edx,%ebx + addl 4(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + movl 8(%esp),%esi + xorl %ecx,%edx + movl 12(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,4(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 24(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + movl %ebx,20(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 16(%esp),%edx + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 76(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl (%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 4(%esp),%esi + xorl %ecx,%edx + movl 8(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 20(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,16(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 12(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 80(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + addl %edx,%ebx + addl 28(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + movl (%esp),%esi + xorl %ecx,%edx + movl 4(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,28(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 16(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + movl %ebx,12(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 8(%esp),%edx + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 84(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl 24(%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 28(%esp),%esi + xorl %ecx,%edx + movl (%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 12(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,8(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 4(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 88(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + addl %edx,%ebx + addl 20(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + movl 24(%esp),%esi + xorl %ecx,%edx + movl 28(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,20(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 8(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + movl %ebx,4(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl (%esp),%edx + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 92(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl 16(%esp),%edx + addl %ecx,%eax + movl 96(%esp),%esi + xorl %edi,%ebx + movl 12(%esp),%ecx + addl (%esi),%eax + addl 4(%esi),%ebx + addl 8(%esi),%edi + addl 12(%esi),%ecx + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %edi,8(%esi) + movl %ecx,12(%esi) + movl %ebx,4(%esp) + xorl %edi,%ebx + movl %edi,8(%esp) + movl %ecx,12(%esp) + movl 20(%esp),%edi + movl 24(%esp),%ecx + addl 16(%esi),%edx + addl 20(%esi),%edi + addl 24(%esi),%ecx + movl %edx,16(%esi) + movl %edi,20(%esi) + movl %edi,20(%esp) + movl 28(%esp),%edi + movl %ecx,24(%esi) + addl 28(%esi),%edi + movl %ecx,24(%esp) + movl %edi,28(%esi) + movl %edi,28(%esp) + movl 100(%esp),%edi + movdqa 64(%ebp),%xmm7 + subl $192,%ebp + cmpl 104(%esp),%edi + jb L011grand_ssse3 + movl 108(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.section __IMPORT,__pointers,non_lazy_symbol_pointers +L_OPENSSL_ia32cap_P$non_lazy_ptr: +.indirect_symbol _OPENSSL_ia32cap_P +.long 0 +#endif diff --git a/third_party/boringssl/mac-x86/crypto/sha/sha512-586.S b/third_party/boringssl/mac-x86/crypto/sha/sha512-586.S new file mode 100644 index 00000000000..3066100e6ff --- /dev/null +++ b/third_party/boringssl/mac-x86/crypto/sha/sha512-586.S @@ -0,0 +1,2833 @@ +#if defined(__i386__) +.file "sha512-586.S" +.text +.globl _sha512_block_data_order +.private_extern _sha512_block_data_order +.align 4 +_sha512_block_data_order: +L_sha512_block_data_order_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl %esp,%ebx + call L000pic_point +L000pic_point: + popl %ebp + leal L001K512-L000pic_point(%ebp),%ebp + subl $16,%esp + andl $-64,%esp + shll $7,%eax + addl %edi,%eax + movl %esi,(%esp) + movl %edi,4(%esp) + movl %eax,8(%esp) + movl %ebx,12(%esp) + movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L001K512(%ebp),%edx + movl (%edx),%ecx + testl $67108864,%ecx + jz L002loop_x86 + movl 4(%edx),%edx + movq (%esi),%mm0 + andl $16777216,%ecx + movq 8(%esi),%mm1 + andl $512,%edx + movq 16(%esi),%mm2 + orl %edx,%ecx + movq 24(%esi),%mm3 + movq 32(%esi),%mm4 + movq 40(%esi),%mm5 + movq 48(%esi),%mm6 + movq 56(%esi),%mm7 + cmpl $16777728,%ecx + je L003SSSE3 + subl $80,%esp + jmp L004loop_sse2 +.align 4,0x90 +L004loop_sse2: + movq %mm1,8(%esp) + movq %mm2,16(%esp) + movq %mm3,24(%esp) + movq %mm5,40(%esp) + movq %mm6,48(%esp) + pxor %mm1,%mm2 + movq %mm7,56(%esp) + movq %mm0,%mm3 + movl (%edi),%eax + movl 4(%edi),%ebx + addl $8,%edi + movl $15,%edx + bswap %eax + bswap %ebx + jmp L00500_14_sse2 +.align 4,0x90 +L00500_14_sse2: + movd %eax,%mm1 + movl (%edi),%eax + movd %ebx,%mm7 + movl 4(%edi),%ebx + addl $8,%edi + bswap %eax + bswap %ebx + punpckldq %mm1,%mm7 + movq %mm4,%mm1 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,32(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + movq %mm3,%mm0 + movq %mm7,72(%esp) + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 56(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + paddq (%ebp),%mm7 + pxor %mm4,%mm3 + movq 24(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 8(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + subl $8,%esp + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 40(%esp),%mm5 + paddq %mm2,%mm3 + movq %mm0,%mm2 + addl $8,%ebp + paddq %mm6,%mm3 + movq 48(%esp),%mm6 + decl %edx + jnz L00500_14_sse2 + movd %eax,%mm1 + movd %ebx,%mm7 + punpckldq %mm1,%mm7 + movq %mm4,%mm1 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,32(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + movq %mm3,%mm0 + movq %mm7,72(%esp) + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 56(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + paddq (%ebp),%mm7 + pxor %mm4,%mm3 + movq 24(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 8(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + subl $8,%esp + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 192(%esp),%mm7 + paddq %mm2,%mm3 + movq %mm0,%mm2 + addl $8,%ebp + paddq %mm6,%mm3 + pxor %mm0,%mm0 + movl $32,%edx + jmp L00616_79_sse2 +.align 4,0x90 +L00616_79_sse2: + movq 88(%esp),%mm5 + movq %mm7,%mm1 + psrlq $1,%mm7 + movq %mm5,%mm6 + psrlq $6,%mm5 + psllq $56,%mm1 + paddq %mm3,%mm0 + movq %mm7,%mm3 + psrlq $6,%mm7 + pxor %mm1,%mm3 + psllq $7,%mm1 + pxor %mm7,%mm3 + psrlq $1,%mm7 + pxor %mm1,%mm3 + movq %mm5,%mm1 + psrlq $13,%mm5 + pxor %mm3,%mm7 + psllq $3,%mm6 + pxor %mm5,%mm1 + paddq 200(%esp),%mm7 + pxor %mm6,%mm1 + psrlq $42,%mm5 + paddq 128(%esp),%mm7 + pxor %mm5,%mm1 + psllq $42,%mm6 + movq 40(%esp),%mm5 + pxor %mm6,%mm1 + movq 48(%esp),%mm6 + paddq %mm1,%mm7 + movq %mm4,%mm1 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,32(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + movq %mm7,72(%esp) + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 56(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + paddq (%ebp),%mm7 + pxor %mm4,%mm3 + movq 24(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 8(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + subl $8,%esp + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 192(%esp),%mm7 + paddq %mm6,%mm2 + addl $8,%ebp + movq 88(%esp),%mm5 + movq %mm7,%mm1 + psrlq $1,%mm7 + movq %mm5,%mm6 + psrlq $6,%mm5 + psllq $56,%mm1 + paddq %mm3,%mm2 + movq %mm7,%mm3 + psrlq $6,%mm7 + pxor %mm1,%mm3 + psllq $7,%mm1 + pxor %mm7,%mm3 + psrlq $1,%mm7 + pxor %mm1,%mm3 + movq %mm5,%mm1 + psrlq $13,%mm5 + pxor %mm3,%mm7 + psllq $3,%mm6 + pxor %mm5,%mm1 + paddq 200(%esp),%mm7 + pxor %mm6,%mm1 + psrlq $42,%mm5 + paddq 128(%esp),%mm7 + pxor %mm5,%mm1 + psllq $42,%mm6 + movq 40(%esp),%mm5 + pxor %mm6,%mm1 + movq 48(%esp),%mm6 + paddq %mm1,%mm7 + movq %mm4,%mm1 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,32(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + movq %mm7,72(%esp) + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 56(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + paddq (%ebp),%mm7 + pxor %mm4,%mm3 + movq 24(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 8(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + subl $8,%esp + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 192(%esp),%mm7 + paddq %mm6,%mm0 + addl $8,%ebp + decl %edx + jnz L00616_79_sse2 + paddq %mm3,%mm0 + movq 8(%esp),%mm1 + movq 24(%esp),%mm3 + movq 40(%esp),%mm5 + movq 48(%esp),%mm6 + movq 56(%esp),%mm7 + pxor %mm1,%mm2 + paddq (%esi),%mm0 + paddq 8(%esi),%mm1 + paddq 16(%esi),%mm2 + paddq 24(%esi),%mm3 + paddq 32(%esi),%mm4 + paddq 40(%esi),%mm5 + paddq 48(%esi),%mm6 + paddq 56(%esi),%mm7 + movl $640,%eax + movq %mm0,(%esi) + movq %mm1,8(%esi) + movq %mm2,16(%esi) + movq %mm3,24(%esi) + movq %mm4,32(%esi) + movq %mm5,40(%esi) + movq %mm6,48(%esi) + movq %mm7,56(%esi) + leal (%esp,%eax,1),%esp + subl %eax,%ebp + cmpl 88(%esp),%edi + jb L004loop_sse2 + movl 92(%esp),%esp + emms + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 5,0x90 +L003SSSE3: + leal -64(%esp),%edx + subl $256,%esp + movdqa 640(%ebp),%xmm1 + movdqu (%edi),%xmm0 +.byte 102,15,56,0,193 + movdqa (%ebp),%xmm3 + movdqa %xmm1,%xmm2 + movdqu 16(%edi),%xmm1 + paddq %xmm0,%xmm3 +.byte 102,15,56,0,202 + movdqa %xmm3,-128(%edx) + movdqa 16(%ebp),%xmm4 + movdqa %xmm2,%xmm3 + movdqu 32(%edi),%xmm2 + paddq %xmm1,%xmm4 +.byte 102,15,56,0,211 + movdqa %xmm4,-112(%edx) + movdqa 32(%ebp),%xmm5 + movdqa %xmm3,%xmm4 + movdqu 48(%edi),%xmm3 + paddq %xmm2,%xmm5 +.byte 102,15,56,0,220 + movdqa %xmm5,-96(%edx) + movdqa 48(%ebp),%xmm6 + movdqa %xmm4,%xmm5 + movdqu 64(%edi),%xmm4 + paddq %xmm3,%xmm6 +.byte 102,15,56,0,229 + movdqa %xmm6,-80(%edx) + movdqa 64(%ebp),%xmm7 + movdqa %xmm5,%xmm6 + movdqu 80(%edi),%xmm5 + paddq %xmm4,%xmm7 +.byte 102,15,56,0,238 + movdqa %xmm7,-64(%edx) + movdqa %xmm0,(%edx) + movdqa 80(%ebp),%xmm0 + movdqa %xmm6,%xmm7 + movdqu 96(%edi),%xmm6 + paddq %xmm5,%xmm0 +.byte 102,15,56,0,247 + movdqa %xmm0,-48(%edx) + movdqa %xmm1,16(%edx) + movdqa 96(%ebp),%xmm1 + movdqa %xmm7,%xmm0 + movdqu 112(%edi),%xmm7 + paddq %xmm6,%xmm1 +.byte 102,15,56,0,248 + movdqa %xmm1,-32(%edx) + movdqa %xmm2,32(%edx) + movdqa 112(%ebp),%xmm2 + movdqa (%edx),%xmm0 + paddq %xmm7,%xmm2 + movdqa %xmm2,-16(%edx) + nop +.align 5,0x90 +L007loop_ssse3: + movdqa 16(%edx),%xmm2 + movdqa %xmm3,48(%edx) + leal 128(%ebp),%ebp + movq %mm1,8(%esp) + movl %edi,%ebx + movq %mm2,16(%esp) + leal 128(%edi),%edi + movq %mm3,24(%esp) + cmpl %eax,%edi + movq %mm5,40(%esp) + cmovbl %edi,%ebx + movq %mm6,48(%esp) + movl $4,%ecx + pxor %mm1,%mm2 + movq %mm7,56(%esp) + pxor %mm3,%mm3 + jmp L00800_47_ssse3 +.align 5,0x90 +L00800_47_ssse3: + movdqa %xmm5,%xmm3 + movdqa %xmm2,%xmm1 +.byte 102,15,58,15,208,8 + movdqa %xmm4,(%edx) +.byte 102,15,58,15,220,8 + movdqa %xmm2,%xmm4 + psrlq $7,%xmm2 + paddq %xmm3,%xmm0 + movdqa %xmm4,%xmm3 + psrlq $1,%xmm4 + psllq $56,%xmm3 + pxor %xmm4,%xmm2 + psrlq $7,%xmm4 + pxor %xmm3,%xmm2 + psllq $7,%xmm3 + pxor %xmm4,%xmm2 + movdqa %xmm7,%xmm4 + pxor %xmm3,%xmm2 + movdqa %xmm7,%xmm3 + psrlq $6,%xmm4 + paddq %xmm2,%xmm0 + movdqa %xmm7,%xmm2 + psrlq $19,%xmm3 + psllq $3,%xmm2 + pxor %xmm3,%xmm4 + psrlq $42,%xmm3 + pxor %xmm2,%xmm4 + psllq $42,%xmm2 + pxor %xmm3,%xmm4 + movdqa 32(%edx),%xmm3 + pxor %xmm2,%xmm4 + movdqa (%ebp),%xmm2 + movq %mm4,%mm1 + paddq %xmm4,%xmm0 + movq -128(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,32(%esp) + paddq %xmm0,%xmm2 + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 56(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 24(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 8(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 32(%esp),%mm5 + paddq %mm6,%mm2 + movq 40(%esp),%mm6 + movq %mm4,%mm1 + movq -120(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,24(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,56(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 48(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 16(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq (%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 24(%esp),%mm5 + paddq %mm6,%mm0 + movq 32(%esp),%mm6 + movdqa %xmm2,-128(%edx) + movdqa %xmm6,%xmm4 + movdqa %xmm3,%xmm2 +.byte 102,15,58,15,217,8 + movdqa %xmm5,16(%edx) +.byte 102,15,58,15,229,8 + movdqa %xmm3,%xmm5 + psrlq $7,%xmm3 + paddq %xmm4,%xmm1 + movdqa %xmm5,%xmm4 + psrlq $1,%xmm5 + psllq $56,%xmm4 + pxor %xmm5,%xmm3 + psrlq $7,%xmm5 + pxor %xmm4,%xmm3 + psllq $7,%xmm4 + pxor %xmm5,%xmm3 + movdqa %xmm0,%xmm5 + pxor %xmm4,%xmm3 + movdqa %xmm0,%xmm4 + psrlq $6,%xmm5 + paddq %xmm3,%xmm1 + movdqa %xmm0,%xmm3 + psrlq $19,%xmm4 + psllq $3,%xmm3 + pxor %xmm4,%xmm5 + psrlq $42,%xmm4 + pxor %xmm3,%xmm5 + psllq $42,%xmm3 + pxor %xmm4,%xmm5 + movdqa 48(%edx),%xmm4 + pxor %xmm3,%xmm5 + movdqa 16(%ebp),%xmm3 + movq %mm4,%mm1 + paddq %xmm5,%xmm1 + movq -112(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,16(%esp) + paddq %xmm1,%xmm3 + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,48(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 40(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 8(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 56(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 16(%esp),%mm5 + paddq %mm6,%mm2 + movq 24(%esp),%mm6 + movq %mm4,%mm1 + movq -104(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,8(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,40(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 32(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq (%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 48(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 8(%esp),%mm5 + paddq %mm6,%mm0 + movq 16(%esp),%mm6 + movdqa %xmm3,-112(%edx) + movdqa %xmm7,%xmm5 + movdqa %xmm4,%xmm3 +.byte 102,15,58,15,226,8 + movdqa %xmm6,32(%edx) +.byte 102,15,58,15,238,8 + movdqa %xmm4,%xmm6 + psrlq $7,%xmm4 + paddq %xmm5,%xmm2 + movdqa %xmm6,%xmm5 + psrlq $1,%xmm6 + psllq $56,%xmm5 + pxor %xmm6,%xmm4 + psrlq $7,%xmm6 + pxor %xmm5,%xmm4 + psllq $7,%xmm5 + pxor %xmm6,%xmm4 + movdqa %xmm1,%xmm6 + pxor %xmm5,%xmm4 + movdqa %xmm1,%xmm5 + psrlq $6,%xmm6 + paddq %xmm4,%xmm2 + movdqa %xmm1,%xmm4 + psrlq $19,%xmm5 + psllq $3,%xmm4 + pxor %xmm5,%xmm6 + psrlq $42,%xmm5 + pxor %xmm4,%xmm6 + psllq $42,%xmm4 + pxor %xmm5,%xmm6 + movdqa (%edx),%xmm5 + pxor %xmm4,%xmm6 + movdqa 32(%ebp),%xmm4 + movq %mm4,%mm1 + paddq %xmm6,%xmm2 + movq -96(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,(%esp) + paddq %xmm2,%xmm4 + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,32(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 24(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 56(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 40(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq (%esp),%mm5 + paddq %mm6,%mm2 + movq 8(%esp),%mm6 + movq %mm4,%mm1 + movq -88(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,56(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,24(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 16(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 48(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 32(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 56(%esp),%mm5 + paddq %mm6,%mm0 + movq (%esp),%mm6 + movdqa %xmm4,-96(%edx) + movdqa %xmm0,%xmm6 + movdqa %xmm5,%xmm4 +.byte 102,15,58,15,235,8 + movdqa %xmm7,48(%edx) +.byte 102,15,58,15,247,8 + movdqa %xmm5,%xmm7 + psrlq $7,%xmm5 + paddq %xmm6,%xmm3 + movdqa %xmm7,%xmm6 + psrlq $1,%xmm7 + psllq $56,%xmm6 + pxor %xmm7,%xmm5 + psrlq $7,%xmm7 + pxor %xmm6,%xmm5 + psllq $7,%xmm6 + pxor %xmm7,%xmm5 + movdqa %xmm2,%xmm7 + pxor %xmm6,%xmm5 + movdqa %xmm2,%xmm6 + psrlq $6,%xmm7 + paddq %xmm5,%xmm3 + movdqa %xmm2,%xmm5 + psrlq $19,%xmm6 + psllq $3,%xmm5 + pxor %xmm6,%xmm7 + psrlq $42,%xmm6 + pxor %xmm5,%xmm7 + psllq $42,%xmm5 + pxor %xmm6,%xmm7 + movdqa 16(%edx),%xmm6 + pxor %xmm5,%xmm7 + movdqa 48(%ebp),%xmm5 + movq %mm4,%mm1 + paddq %xmm7,%xmm3 + movq -80(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,48(%esp) + paddq %xmm3,%xmm5 + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,16(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 8(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 40(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 24(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 48(%esp),%mm5 + paddq %mm6,%mm2 + movq 56(%esp),%mm6 + movq %mm4,%mm1 + movq -72(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,40(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,8(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq (%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 32(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 16(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 40(%esp),%mm5 + paddq %mm6,%mm0 + movq 48(%esp),%mm6 + movdqa %xmm5,-80(%edx) + movdqa %xmm1,%xmm7 + movdqa %xmm6,%xmm5 +.byte 102,15,58,15,244,8 + movdqa %xmm0,(%edx) +.byte 102,15,58,15,248,8 + movdqa %xmm6,%xmm0 + psrlq $7,%xmm6 + paddq %xmm7,%xmm4 + movdqa %xmm0,%xmm7 + psrlq $1,%xmm0 + psllq $56,%xmm7 + pxor %xmm0,%xmm6 + psrlq $7,%xmm0 + pxor %xmm7,%xmm6 + psllq $7,%xmm7 + pxor %xmm0,%xmm6 + movdqa %xmm3,%xmm0 + pxor %xmm7,%xmm6 + movdqa %xmm3,%xmm7 + psrlq $6,%xmm0 + paddq %xmm6,%xmm4 + movdqa %xmm3,%xmm6 + psrlq $19,%xmm7 + psllq $3,%xmm6 + pxor %xmm7,%xmm0 + psrlq $42,%xmm7 + pxor %xmm6,%xmm0 + psllq $42,%xmm6 + pxor %xmm7,%xmm0 + movdqa 32(%edx),%xmm7 + pxor %xmm6,%xmm0 + movdqa 64(%ebp),%xmm6 + movq %mm4,%mm1 + paddq %xmm0,%xmm4 + movq -64(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,32(%esp) + paddq %xmm4,%xmm6 + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 56(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 24(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 8(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 32(%esp),%mm5 + paddq %mm6,%mm2 + movq 40(%esp),%mm6 + movq %mm4,%mm1 + movq -56(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,24(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,56(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 48(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 16(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq (%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 24(%esp),%mm5 + paddq %mm6,%mm0 + movq 32(%esp),%mm6 + movdqa %xmm6,-64(%edx) + movdqa %xmm2,%xmm0 + movdqa %xmm7,%xmm6 +.byte 102,15,58,15,253,8 + movdqa %xmm1,16(%edx) +.byte 102,15,58,15,193,8 + movdqa %xmm7,%xmm1 + psrlq $7,%xmm7 + paddq %xmm0,%xmm5 + movdqa %xmm1,%xmm0 + psrlq $1,%xmm1 + psllq $56,%xmm0 + pxor %xmm1,%xmm7 + psrlq $7,%xmm1 + pxor %xmm0,%xmm7 + psllq $7,%xmm0 + pxor %xmm1,%xmm7 + movdqa %xmm4,%xmm1 + pxor %xmm0,%xmm7 + movdqa %xmm4,%xmm0 + psrlq $6,%xmm1 + paddq %xmm7,%xmm5 + movdqa %xmm4,%xmm7 + psrlq $19,%xmm0 + psllq $3,%xmm7 + pxor %xmm0,%xmm1 + psrlq $42,%xmm0 + pxor %xmm7,%xmm1 + psllq $42,%xmm7 + pxor %xmm0,%xmm1 + movdqa 48(%edx),%xmm0 + pxor %xmm7,%xmm1 + movdqa 80(%ebp),%xmm7 + movq %mm4,%mm1 + paddq %xmm1,%xmm5 + movq -48(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,16(%esp) + paddq %xmm5,%xmm7 + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,48(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 40(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 8(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 56(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 16(%esp),%mm5 + paddq %mm6,%mm2 + movq 24(%esp),%mm6 + movq %mm4,%mm1 + movq -40(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,8(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,40(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 32(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq (%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 48(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 8(%esp),%mm5 + paddq %mm6,%mm0 + movq 16(%esp),%mm6 + movdqa %xmm7,-48(%edx) + movdqa %xmm3,%xmm1 + movdqa %xmm0,%xmm7 +.byte 102,15,58,15,198,8 + movdqa %xmm2,32(%edx) +.byte 102,15,58,15,202,8 + movdqa %xmm0,%xmm2 + psrlq $7,%xmm0 + paddq %xmm1,%xmm6 + movdqa %xmm2,%xmm1 + psrlq $1,%xmm2 + psllq $56,%xmm1 + pxor %xmm2,%xmm0 + psrlq $7,%xmm2 + pxor %xmm1,%xmm0 + psllq $7,%xmm1 + pxor %xmm2,%xmm0 + movdqa %xmm5,%xmm2 + pxor %xmm1,%xmm0 + movdqa %xmm5,%xmm1 + psrlq $6,%xmm2 + paddq %xmm0,%xmm6 + movdqa %xmm5,%xmm0 + psrlq $19,%xmm1 + psllq $3,%xmm0 + pxor %xmm1,%xmm2 + psrlq $42,%xmm1 + pxor %xmm0,%xmm2 + psllq $42,%xmm0 + pxor %xmm1,%xmm2 + movdqa (%edx),%xmm1 + pxor %xmm0,%xmm2 + movdqa 96(%ebp),%xmm0 + movq %mm4,%mm1 + paddq %xmm2,%xmm6 + movq -32(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,(%esp) + paddq %xmm6,%xmm0 + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,32(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 24(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 56(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 40(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq (%esp),%mm5 + paddq %mm6,%mm2 + movq 8(%esp),%mm6 + movq %mm4,%mm1 + movq -24(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,56(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,24(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 16(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 48(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 32(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 56(%esp),%mm5 + paddq %mm6,%mm0 + movq (%esp),%mm6 + movdqa %xmm0,-32(%edx) + movdqa %xmm4,%xmm2 + movdqa %xmm1,%xmm0 +.byte 102,15,58,15,207,8 + movdqa %xmm3,48(%edx) +.byte 102,15,58,15,211,8 + movdqa %xmm1,%xmm3 + psrlq $7,%xmm1 + paddq %xmm2,%xmm7 + movdqa %xmm3,%xmm2 + psrlq $1,%xmm3 + psllq $56,%xmm2 + pxor %xmm3,%xmm1 + psrlq $7,%xmm3 + pxor %xmm2,%xmm1 + psllq $7,%xmm2 + pxor %xmm3,%xmm1 + movdqa %xmm6,%xmm3 + pxor %xmm2,%xmm1 + movdqa %xmm6,%xmm2 + psrlq $6,%xmm3 + paddq %xmm1,%xmm7 + movdqa %xmm6,%xmm1 + psrlq $19,%xmm2 + psllq $3,%xmm1 + pxor %xmm2,%xmm3 + psrlq $42,%xmm2 + pxor %xmm1,%xmm3 + psllq $42,%xmm1 + pxor %xmm2,%xmm3 + movdqa 16(%edx),%xmm2 + pxor %xmm1,%xmm3 + movdqa 112(%ebp),%xmm1 + movq %mm4,%mm1 + paddq %xmm3,%xmm7 + movq -16(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,48(%esp) + paddq %xmm7,%xmm1 + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,16(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 8(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 40(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 24(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 48(%esp),%mm5 + paddq %mm6,%mm2 + movq 56(%esp),%mm6 + movq %mm4,%mm1 + movq -8(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,40(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,8(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq (%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 32(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 16(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 40(%esp),%mm5 + paddq %mm6,%mm0 + movq 48(%esp),%mm6 + movdqa %xmm1,-16(%edx) + leal 128(%ebp),%ebp + decl %ecx + jnz L00800_47_ssse3 + movdqa (%ebp),%xmm1 + leal -640(%ebp),%ebp + movdqu (%ebx),%xmm0 +.byte 102,15,56,0,193 + movdqa (%ebp),%xmm3 + movdqa %xmm1,%xmm2 + movdqu 16(%ebx),%xmm1 + paddq %xmm0,%xmm3 +.byte 102,15,56,0,202 + movq %mm4,%mm1 + movq -128(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,32(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 56(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 24(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 8(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 32(%esp),%mm5 + paddq %mm6,%mm2 + movq 40(%esp),%mm6 + movq %mm4,%mm1 + movq -120(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,24(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,56(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 48(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 16(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq (%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 24(%esp),%mm5 + paddq %mm6,%mm0 + movq 32(%esp),%mm6 + movdqa %xmm3,-128(%edx) + movdqa 16(%ebp),%xmm4 + movdqa %xmm2,%xmm3 + movdqu 32(%ebx),%xmm2 + paddq %xmm1,%xmm4 +.byte 102,15,56,0,211 + movq %mm4,%mm1 + movq -112(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,16(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,48(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 40(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 8(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 56(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 16(%esp),%mm5 + paddq %mm6,%mm2 + movq 24(%esp),%mm6 + movq %mm4,%mm1 + movq -104(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,8(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,40(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 32(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq (%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 48(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 8(%esp),%mm5 + paddq %mm6,%mm0 + movq 16(%esp),%mm6 + movdqa %xmm4,-112(%edx) + movdqa 32(%ebp),%xmm5 + movdqa %xmm3,%xmm4 + movdqu 48(%ebx),%xmm3 + paddq %xmm2,%xmm5 +.byte 102,15,56,0,220 + movq %mm4,%mm1 + movq -96(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,32(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 24(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 56(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 40(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq (%esp),%mm5 + paddq %mm6,%mm2 + movq 8(%esp),%mm6 + movq %mm4,%mm1 + movq -88(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,56(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,24(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 16(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 48(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 32(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 56(%esp),%mm5 + paddq %mm6,%mm0 + movq (%esp),%mm6 + movdqa %xmm5,-96(%edx) + movdqa 48(%ebp),%xmm6 + movdqa %xmm4,%xmm5 + movdqu 64(%ebx),%xmm4 + paddq %xmm3,%xmm6 +.byte 102,15,56,0,229 + movq %mm4,%mm1 + movq -80(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,48(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,16(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 8(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 40(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 24(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 48(%esp),%mm5 + paddq %mm6,%mm2 + movq 56(%esp),%mm6 + movq %mm4,%mm1 + movq -72(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,40(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,8(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq (%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 32(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 16(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 40(%esp),%mm5 + paddq %mm6,%mm0 + movq 48(%esp),%mm6 + movdqa %xmm6,-80(%edx) + movdqa 64(%ebp),%xmm7 + movdqa %xmm5,%xmm6 + movdqu 80(%ebx),%xmm5 + paddq %xmm4,%xmm7 +.byte 102,15,56,0,238 + movq %mm4,%mm1 + movq -64(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,32(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 56(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 24(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 8(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 32(%esp),%mm5 + paddq %mm6,%mm2 + movq 40(%esp),%mm6 + movq %mm4,%mm1 + movq -56(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,24(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,56(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 48(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 16(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq (%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 24(%esp),%mm5 + paddq %mm6,%mm0 + movq 32(%esp),%mm6 + movdqa %xmm7,-64(%edx) + movdqa %xmm0,(%edx) + movdqa 80(%ebp),%xmm0 + movdqa %xmm6,%xmm7 + movdqu 96(%ebx),%xmm6 + paddq %xmm5,%xmm0 +.byte 102,15,56,0,247 + movq %mm4,%mm1 + movq -48(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,16(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,48(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 40(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 8(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 56(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 16(%esp),%mm5 + paddq %mm6,%mm2 + movq 24(%esp),%mm6 + movq %mm4,%mm1 + movq -40(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,8(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,40(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 32(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq (%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 48(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 8(%esp),%mm5 + paddq %mm6,%mm0 + movq 16(%esp),%mm6 + movdqa %xmm0,-48(%edx) + movdqa %xmm1,16(%edx) + movdqa 96(%ebp),%xmm1 + movdqa %xmm7,%xmm0 + movdqu 112(%ebx),%xmm7 + paddq %xmm6,%xmm1 +.byte 102,15,56,0,248 + movq %mm4,%mm1 + movq -32(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,32(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 24(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 56(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 40(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq (%esp),%mm5 + paddq %mm6,%mm2 + movq 8(%esp),%mm6 + movq %mm4,%mm1 + movq -24(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,56(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,24(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 16(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 48(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 32(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 56(%esp),%mm5 + paddq %mm6,%mm0 + movq (%esp),%mm6 + movdqa %xmm1,-32(%edx) + movdqa %xmm2,32(%edx) + movdqa 112(%ebp),%xmm2 + movdqa (%edx),%xmm0 + paddq %xmm7,%xmm2 + movq %mm4,%mm1 + movq -16(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,48(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,16(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 8(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 40(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 24(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 48(%esp),%mm5 + paddq %mm6,%mm2 + movq 56(%esp),%mm6 + movq %mm4,%mm1 + movq -8(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,40(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,8(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq (%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 32(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 16(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 40(%esp),%mm5 + paddq %mm6,%mm0 + movq 48(%esp),%mm6 + movdqa %xmm2,-16(%edx) + movq 8(%esp),%mm1 + paddq %mm3,%mm0 + movq 24(%esp),%mm3 + movq 56(%esp),%mm7 + pxor %mm1,%mm2 + paddq (%esi),%mm0 + paddq 8(%esi),%mm1 + paddq 16(%esi),%mm2 + paddq 24(%esi),%mm3 + paddq 32(%esi),%mm4 + paddq 40(%esi),%mm5 + paddq 48(%esi),%mm6 + paddq 56(%esi),%mm7 + movq %mm0,(%esi) + movq %mm1,8(%esi) + movq %mm2,16(%esi) + movq %mm3,24(%esi) + movq %mm4,32(%esi) + movq %mm5,40(%esi) + movq %mm6,48(%esi) + movq %mm7,56(%esi) + cmpl %eax,%edi + jb L007loop_ssse3 + movl 76(%edx),%esp + emms + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 4,0x90 +L002loop_x86: + movl (%edi),%eax + movl 4(%edi),%ebx + movl 8(%edi),%ecx + movl 12(%edi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + movl 16(%edi),%eax + movl 20(%edi),%ebx + movl 24(%edi),%ecx + movl 28(%edi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + movl 32(%edi),%eax + movl 36(%edi),%ebx + movl 40(%edi),%ecx + movl 44(%edi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + movl 48(%edi),%eax + movl 52(%edi),%ebx + movl 56(%edi),%ecx + movl 60(%edi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + movl 64(%edi),%eax + movl 68(%edi),%ebx + movl 72(%edi),%ecx + movl 76(%edi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + movl 80(%edi),%eax + movl 84(%edi),%ebx + movl 88(%edi),%ecx + movl 92(%edi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + movl 96(%edi),%eax + movl 100(%edi),%ebx + movl 104(%edi),%ecx + movl 108(%edi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + movl 112(%edi),%eax + movl 116(%edi),%ebx + movl 120(%edi),%ecx + movl 124(%edi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + addl $128,%edi + subl $72,%esp + movl %edi,204(%esp) + leal 8(%esp),%edi + movl $16,%ecx +.long 2784229001 +.align 4,0x90 +L00900_15_x86: + movl 40(%esp),%ecx + movl 44(%esp),%edx + movl %ecx,%esi + shrl $9,%ecx + movl %edx,%edi + shrl $9,%edx + movl %ecx,%ebx + shll $14,%esi + movl %edx,%eax + shll $14,%edi + xorl %esi,%ebx + shrl $5,%ecx + xorl %edi,%eax + shrl $5,%edx + xorl %ecx,%eax + shll $4,%esi + xorl %edx,%ebx + shll $4,%edi + xorl %esi,%ebx + shrl $4,%ecx + xorl %edi,%eax + shrl $4,%edx + xorl %ecx,%eax + shll $5,%esi + xorl %edx,%ebx + shll $5,%edi + xorl %esi,%eax + xorl %edi,%ebx + movl 48(%esp),%ecx + movl 52(%esp),%edx + movl 56(%esp),%esi + movl 60(%esp),%edi + addl 64(%esp),%eax + adcl 68(%esp),%ebx + xorl %esi,%ecx + xorl %edi,%edx + andl 40(%esp),%ecx + andl 44(%esp),%edx + addl 192(%esp),%eax + adcl 196(%esp),%ebx + xorl %esi,%ecx + xorl %edi,%edx + movl (%ebp),%esi + movl 4(%ebp),%edi + addl %ecx,%eax + adcl %edx,%ebx + movl 32(%esp),%ecx + movl 36(%esp),%edx + addl %esi,%eax + adcl %edi,%ebx + movl %eax,(%esp) + movl %ebx,4(%esp) + addl %ecx,%eax + adcl %edx,%ebx + movl 8(%esp),%ecx + movl 12(%esp),%edx + movl %eax,32(%esp) + movl %ebx,36(%esp) + movl %ecx,%esi + shrl $2,%ecx + movl %edx,%edi + shrl $2,%edx + movl %ecx,%ebx + shll $4,%esi + movl %edx,%eax + shll $4,%edi + xorl %esi,%ebx + shrl $5,%ecx + xorl %edi,%eax + shrl $5,%edx + xorl %ecx,%ebx + shll $21,%esi + xorl %edx,%eax + shll $21,%edi + xorl %esi,%eax + shrl $21,%ecx + xorl %edi,%ebx + shrl $21,%edx + xorl %ecx,%eax + shll $5,%esi + xorl %edx,%ebx + shll $5,%edi + xorl %esi,%eax + xorl %edi,%ebx + movl 8(%esp),%ecx + movl 12(%esp),%edx + movl 16(%esp),%esi + movl 20(%esp),%edi + addl (%esp),%eax + adcl 4(%esp),%ebx + orl %esi,%ecx + orl %edi,%edx + andl 24(%esp),%ecx + andl 28(%esp),%edx + andl 8(%esp),%esi + andl 12(%esp),%edi + orl %esi,%ecx + orl %edi,%edx + addl %ecx,%eax + adcl %edx,%ebx + movl %eax,(%esp) + movl %ebx,4(%esp) + movb (%ebp),%dl + subl $8,%esp + leal 8(%ebp),%ebp + cmpb $148,%dl + jne L00900_15_x86 +.align 4,0x90 +L01016_79_x86: + movl 312(%esp),%ecx + movl 316(%esp),%edx + movl %ecx,%esi + shrl $1,%ecx + movl %edx,%edi + shrl $1,%edx + movl %ecx,%eax + shll $24,%esi + movl %edx,%ebx + shll $24,%edi + xorl %esi,%ebx + shrl $6,%ecx + xorl %edi,%eax + shrl $6,%edx + xorl %ecx,%eax + shll $7,%esi + xorl %edx,%ebx + shll $1,%edi + xorl %esi,%ebx + shrl $1,%ecx + xorl %edi,%eax + shrl $1,%edx + xorl %ecx,%eax + shll $6,%edi + xorl %edx,%ebx + xorl %edi,%eax + movl %eax,(%esp) + movl %ebx,4(%esp) + movl 208(%esp),%ecx + movl 212(%esp),%edx + movl %ecx,%esi + shrl $6,%ecx + movl %edx,%edi + shrl $6,%edx + movl %ecx,%eax + shll $3,%esi + movl %edx,%ebx + shll $3,%edi + xorl %esi,%eax + shrl $13,%ecx + xorl %edi,%ebx + shrl $13,%edx + xorl %ecx,%eax + shll $10,%esi + xorl %edx,%ebx + shll $10,%edi + xorl %esi,%ebx + shrl $10,%ecx + xorl %edi,%eax + shrl $10,%edx + xorl %ecx,%ebx + shll $13,%edi + xorl %edx,%eax + xorl %edi,%eax + movl 320(%esp),%ecx + movl 324(%esp),%edx + addl (%esp),%eax + adcl 4(%esp),%ebx + movl 248(%esp),%esi + movl 252(%esp),%edi + addl %ecx,%eax + adcl %edx,%ebx + addl %esi,%eax + adcl %edi,%ebx + movl %eax,192(%esp) + movl %ebx,196(%esp) + movl 40(%esp),%ecx + movl 44(%esp),%edx + movl %ecx,%esi + shrl $9,%ecx + movl %edx,%edi + shrl $9,%edx + movl %ecx,%ebx + shll $14,%esi + movl %edx,%eax + shll $14,%edi + xorl %esi,%ebx + shrl $5,%ecx + xorl %edi,%eax + shrl $5,%edx + xorl %ecx,%eax + shll $4,%esi + xorl %edx,%ebx + shll $4,%edi + xorl %esi,%ebx + shrl $4,%ecx + xorl %edi,%eax + shrl $4,%edx + xorl %ecx,%eax + shll $5,%esi + xorl %edx,%ebx + shll $5,%edi + xorl %esi,%eax + xorl %edi,%ebx + movl 48(%esp),%ecx + movl 52(%esp),%edx + movl 56(%esp),%esi + movl 60(%esp),%edi + addl 64(%esp),%eax + adcl 68(%esp),%ebx + xorl %esi,%ecx + xorl %edi,%edx + andl 40(%esp),%ecx + andl 44(%esp),%edx + addl 192(%esp),%eax + adcl 196(%esp),%ebx + xorl %esi,%ecx + xorl %edi,%edx + movl (%ebp),%esi + movl 4(%ebp),%edi + addl %ecx,%eax + adcl %edx,%ebx + movl 32(%esp),%ecx + movl 36(%esp),%edx + addl %esi,%eax + adcl %edi,%ebx + movl %eax,(%esp) + movl %ebx,4(%esp) + addl %ecx,%eax + adcl %edx,%ebx + movl 8(%esp),%ecx + movl 12(%esp),%edx + movl %eax,32(%esp) + movl %ebx,36(%esp) + movl %ecx,%esi + shrl $2,%ecx + movl %edx,%edi + shrl $2,%edx + movl %ecx,%ebx + shll $4,%esi + movl %edx,%eax + shll $4,%edi + xorl %esi,%ebx + shrl $5,%ecx + xorl %edi,%eax + shrl $5,%edx + xorl %ecx,%ebx + shll $21,%esi + xorl %edx,%eax + shll $21,%edi + xorl %esi,%eax + shrl $21,%ecx + xorl %edi,%ebx + shrl $21,%edx + xorl %ecx,%eax + shll $5,%esi + xorl %edx,%ebx + shll $5,%edi + xorl %esi,%eax + xorl %edi,%ebx + movl 8(%esp),%ecx + movl 12(%esp),%edx + movl 16(%esp),%esi + movl 20(%esp),%edi + addl (%esp),%eax + adcl 4(%esp),%ebx + orl %esi,%ecx + orl %edi,%edx + andl 24(%esp),%ecx + andl 28(%esp),%edx + andl 8(%esp),%esi + andl 12(%esp),%edi + orl %esi,%ecx + orl %edi,%edx + addl %ecx,%eax + adcl %edx,%ebx + movl %eax,(%esp) + movl %ebx,4(%esp) + movb (%ebp),%dl + subl $8,%esp + leal 8(%ebp),%ebp + cmpb $23,%dl + jne L01016_79_x86 + movl 840(%esp),%esi + movl 844(%esp),%edi + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + addl 8(%esp),%eax + adcl 12(%esp),%ebx + movl %eax,(%esi) + movl %ebx,4(%esi) + addl 16(%esp),%ecx + adcl 20(%esp),%edx + movl %ecx,8(%esi) + movl %edx,12(%esi) + movl 16(%esi),%eax + movl 20(%esi),%ebx + movl 24(%esi),%ecx + movl 28(%esi),%edx + addl 24(%esp),%eax + adcl 28(%esp),%ebx + movl %eax,16(%esi) + movl %ebx,20(%esi) + addl 32(%esp),%ecx + adcl 36(%esp),%edx + movl %ecx,24(%esi) + movl %edx,28(%esi) + movl 32(%esi),%eax + movl 36(%esi),%ebx + movl 40(%esi),%ecx + movl 44(%esi),%edx + addl 40(%esp),%eax + adcl 44(%esp),%ebx + movl %eax,32(%esi) + movl %ebx,36(%esi) + addl 48(%esp),%ecx + adcl 52(%esp),%edx + movl %ecx,40(%esi) + movl %edx,44(%esi) + movl 48(%esi),%eax + movl 52(%esi),%ebx + movl 56(%esi),%ecx + movl 60(%esi),%edx + addl 56(%esp),%eax + adcl 60(%esp),%ebx + movl %eax,48(%esi) + movl %ebx,52(%esi) + addl 64(%esp),%ecx + adcl 68(%esp),%edx + movl %ecx,56(%esi) + movl %edx,60(%esi) + addl $840,%esp + subl $640,%ebp + cmpl 8(%esp),%edi + jb L002loop_x86 + movl 12(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 6,0x90 +L001K512: +.long 3609767458,1116352408 +.long 602891725,1899447441 +.long 3964484399,3049323471 +.long 2173295548,3921009573 +.long 4081628472,961987163 +.long 3053834265,1508970993 +.long 2937671579,2453635748 +.long 3664609560,2870763221 +.long 2734883394,3624381080 +.long 1164996542,310598401 +.long 1323610764,607225278 +.long 3590304994,1426881987 +.long 4068182383,1925078388 +.long 991336113,2162078206 +.long 633803317,2614888103 +.long 3479774868,3248222580 +.long 2666613458,3835390401 +.long 944711139,4022224774 +.long 2341262773,264347078 +.long 2007800933,604807628 +.long 1495990901,770255983 +.long 1856431235,1249150122 +.long 3175218132,1555081692 +.long 2198950837,1996064986 +.long 3999719339,2554220882 +.long 766784016,2821834349 +.long 2566594879,2952996808 +.long 3203337956,3210313671 +.long 1034457026,3336571891 +.long 2466948901,3584528711 +.long 3758326383,113926993 +.long 168717936,338241895 +.long 1188179964,666307205 +.long 1546045734,773529912 +.long 1522805485,1294757372 +.long 2643833823,1396182291 +.long 2343527390,1695183700 +.long 1014477480,1986661051 +.long 1206759142,2177026350 +.long 344077627,2456956037 +.long 1290863460,2730485921 +.long 3158454273,2820302411 +.long 3505952657,3259730800 +.long 106217008,3345764771 +.long 3606008344,3516065817 +.long 1432725776,3600352804 +.long 1467031594,4094571909 +.long 851169720,275423344 +.long 3100823752,430227734 +.long 1363258195,506948616 +.long 3750685593,659060556 +.long 3785050280,883997877 +.long 3318307427,958139571 +.long 3812723403,1322822218 +.long 2003034995,1537002063 +.long 3602036899,1747873779 +.long 1575990012,1955562222 +.long 1125592928,2024104815 +.long 2716904306,2227730452 +.long 442776044,2361852424 +.long 593698344,2428436474 +.long 3733110249,2756734187 +.long 2999351573,3204031479 +.long 3815920427,3329325298 +.long 3928383900,3391569614 +.long 566280711,3515267271 +.long 3454069534,3940187606 +.long 4000239992,4118630271 +.long 1914138554,116418474 +.long 2731055270,174292421 +.long 3203993006,289380356 +.long 320620315,460393269 +.long 587496836,685471733 +.long 1086792851,852142971 +.long 365543100,1017036298 +.long 2618297676,1126000580 +.long 3409855158,1288033470 +.long 4234509866,1501505948 +.long 987167468,1607167915 +.long 1246189591,1816402316 +.long 67438087,66051 +.long 202182159,134810123 +.byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97 +.byte 110,115,102,111,114,109,32,102,111,114,32,120,56,54,44,32 +.byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 +.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 +.byte 62,0 +.section __IMPORT,__pointers,non_lazy_symbol_pointers +L_OPENSSL_ia32cap_P$non_lazy_ptr: +.indirect_symbol _OPENSSL_ia32cap_P +.long 0 +#endif diff --git a/third_party/boringssl/mac-x86_64/crypto/aes/aes-x86_64.S b/third_party/boringssl/mac-x86_64/crypto/aes/aes-x86_64.S new file mode 100644 index 00000000000..02f378bd671 --- /dev/null +++ b/third_party/boringssl/mac-x86_64/crypto/aes/aes-x86_64.S @@ -0,0 +1,2535 @@ +#if defined(__x86_64__) +.text + +.p2align 4 +_x86_64_AES_encrypt: + xorl 0(%r15),%eax + xorl 4(%r15),%ebx + xorl 8(%r15),%ecx + xorl 12(%r15),%edx + + movl 240(%r15),%r13d + subl $1,%r13d + jmp L$enc_loop +.p2align 4 +L$enc_loop: + + movzbl %al,%esi + movzbl %bl,%edi + movzbl %cl,%ebp + movl 0(%r14,%rsi,8),%r10d + movl 0(%r14,%rdi,8),%r11d + movl 0(%r14,%rbp,8),%r12d + + movzbl %bh,%esi + movzbl %ch,%edi + movzbl %dl,%ebp + xorl 3(%r14,%rsi,8),%r10d + xorl 3(%r14,%rdi,8),%r11d + movl 0(%r14,%rbp,8),%r8d + + movzbl %dh,%esi + shrl $16,%ecx + movzbl %ah,%ebp + xorl 3(%r14,%rsi,8),%r12d + shrl $16,%edx + xorl 3(%r14,%rbp,8),%r8d + + shrl $16,%ebx + leaq 16(%r15),%r15 + shrl $16,%eax + + movzbl %cl,%esi + movzbl %dl,%edi + movzbl %al,%ebp + xorl 2(%r14,%rsi,8),%r10d + xorl 2(%r14,%rdi,8),%r11d + xorl 2(%r14,%rbp,8),%r12d + + movzbl %dh,%esi + movzbl %ah,%edi + movzbl %bl,%ebp + xorl 1(%r14,%rsi,8),%r10d + xorl 1(%r14,%rdi,8),%r11d + xorl 2(%r14,%rbp,8),%r8d + + movl 12(%r15),%edx + movzbl %bh,%edi + movzbl %ch,%ebp + movl 0(%r15),%eax + xorl 1(%r14,%rdi,8),%r12d + xorl 1(%r14,%rbp,8),%r8d + + movl 4(%r15),%ebx + movl 8(%r15),%ecx + xorl %r10d,%eax + xorl %r11d,%ebx + xorl %r12d,%ecx + xorl %r8d,%edx + subl $1,%r13d + jnz L$enc_loop + movzbl %al,%esi + movzbl %bl,%edi + movzbl %cl,%ebp + movzbl 2(%r14,%rsi,8),%r10d + movzbl 2(%r14,%rdi,8),%r11d + movzbl 2(%r14,%rbp,8),%r12d + + movzbl %dl,%esi + movzbl %bh,%edi + movzbl %ch,%ebp + movzbl 2(%r14,%rsi,8),%r8d + movl 0(%r14,%rdi,8),%edi + movl 0(%r14,%rbp,8),%ebp + + andl $65280,%edi + andl $65280,%ebp + + xorl %edi,%r10d + xorl %ebp,%r11d + shrl $16,%ecx + + movzbl %dh,%esi + movzbl %ah,%edi + shrl $16,%edx + movl 0(%r14,%rsi,8),%esi + movl 0(%r14,%rdi,8),%edi + + andl $65280,%esi + andl $65280,%edi + shrl $16,%ebx + xorl %esi,%r12d + xorl %edi,%r8d + shrl $16,%eax + + movzbl %cl,%esi + movzbl %dl,%edi + movzbl %al,%ebp + movl 0(%r14,%rsi,8),%esi + movl 0(%r14,%rdi,8),%edi + movl 0(%r14,%rbp,8),%ebp + + andl $16711680,%esi + andl $16711680,%edi + andl $16711680,%ebp + + xorl %esi,%r10d + xorl %edi,%r11d + xorl %ebp,%r12d + + movzbl %bl,%esi + movzbl %dh,%edi + movzbl %ah,%ebp + movl 0(%r14,%rsi,8),%esi + movl 2(%r14,%rdi,8),%edi + movl 2(%r14,%rbp,8),%ebp + + andl $16711680,%esi + andl $4278190080,%edi + andl $4278190080,%ebp + + xorl %esi,%r8d + xorl %edi,%r10d + xorl %ebp,%r11d + + movzbl %bh,%esi + movzbl %ch,%edi + movl 16+12(%r15),%edx + movl 2(%r14,%rsi,8),%esi + movl 2(%r14,%rdi,8),%edi + movl 16+0(%r15),%eax + + andl $4278190080,%esi + andl $4278190080,%edi + + xorl %esi,%r12d + xorl %edi,%r8d + + movl 16+4(%r15),%ebx + movl 16+8(%r15),%ecx + xorl %r10d,%eax + xorl %r11d,%ebx + xorl %r12d,%ecx + xorl %r8d,%edx +.byte 0xf3,0xc3 + + +.p2align 4 +_x86_64_AES_encrypt_compact: + leaq 128(%r14),%r8 + movl 0-128(%r8),%edi + movl 32-128(%r8),%ebp + movl 64-128(%r8),%r10d + movl 96-128(%r8),%r11d + movl 128-128(%r8),%edi + movl 160-128(%r8),%ebp + movl 192-128(%r8),%r10d + movl 224-128(%r8),%r11d + jmp L$enc_loop_compact +.p2align 4 +L$enc_loop_compact: + xorl 0(%r15),%eax + xorl 4(%r15),%ebx + xorl 8(%r15),%ecx + xorl 12(%r15),%edx + leaq 16(%r15),%r15 + movzbl %al,%r10d + movzbl %bl,%r11d + movzbl %cl,%r12d + movzbl %dl,%r8d + movzbl %bh,%esi + movzbl %ch,%edi + shrl $16,%ecx + movzbl %dh,%ebp + movzbl (%r14,%r10,1),%r10d + movzbl (%r14,%r11,1),%r11d + movzbl (%r14,%r12,1),%r12d + movzbl (%r14,%r8,1),%r8d + + movzbl (%r14,%rsi,1),%r9d + movzbl %ah,%esi + movzbl (%r14,%rdi,1),%r13d + movzbl %cl,%edi + movzbl (%r14,%rbp,1),%ebp + movzbl (%r14,%rsi,1),%esi + + shll $8,%r9d + shrl $16,%edx + shll $8,%r13d + xorl %r9d,%r10d + shrl $16,%eax + movzbl %dl,%r9d + shrl $16,%ebx + xorl %r13d,%r11d + shll $8,%ebp + movzbl %al,%r13d + movzbl (%r14,%rdi,1),%edi + xorl %ebp,%r12d + + shll $8,%esi + movzbl %bl,%ebp + shll $16,%edi + xorl %esi,%r8d + movzbl (%r14,%r9,1),%r9d + movzbl %dh,%esi + movzbl (%r14,%r13,1),%r13d + xorl %edi,%r10d + + shrl $8,%ecx + movzbl %ah,%edi + shll $16,%r9d + shrl $8,%ebx + shll $16,%r13d + xorl %r9d,%r11d + movzbl (%r14,%rbp,1),%ebp + movzbl (%r14,%rsi,1),%esi + movzbl (%r14,%rdi,1),%edi + movzbl (%r14,%rcx,1),%edx + movzbl (%r14,%rbx,1),%ecx + + shll $16,%ebp + xorl %r13d,%r12d + shll $24,%esi + xorl %ebp,%r8d + shll $24,%edi + xorl %esi,%r10d + shll $24,%edx + xorl %edi,%r11d + shll $24,%ecx + movl %r10d,%eax + movl %r11d,%ebx + xorl %r12d,%ecx + xorl %r8d,%edx + cmpq 16(%rsp),%r15 + je L$enc_compact_done + movl $2155905152,%r10d + movl $2155905152,%r11d + andl %eax,%r10d + andl %ebx,%r11d + movl %r10d,%esi + movl %r11d,%edi + shrl $7,%r10d + leal (%rax,%rax,1),%r8d + shrl $7,%r11d + leal (%rbx,%rbx,1),%r9d + subl %r10d,%esi + subl %r11d,%edi + andl $4278124286,%r8d + andl $4278124286,%r9d + andl $454761243,%esi + andl $454761243,%edi + movl %eax,%r10d + movl %ebx,%r11d + xorl %esi,%r8d + xorl %edi,%r9d + + xorl %r8d,%eax + xorl %r9d,%ebx + movl $2155905152,%r12d + roll $24,%eax + movl $2155905152,%ebp + roll $24,%ebx + andl %ecx,%r12d + andl %edx,%ebp + xorl %r8d,%eax + xorl %r9d,%ebx + movl %r12d,%esi + rorl $16,%r10d + movl %ebp,%edi + rorl $16,%r11d + leal (%rcx,%rcx,1),%r8d + shrl $7,%r12d + xorl %r10d,%eax + shrl $7,%ebp + xorl %r11d,%ebx + rorl $8,%r10d + leal (%rdx,%rdx,1),%r9d + rorl $8,%r11d + subl %r12d,%esi + subl %ebp,%edi + xorl %r10d,%eax + xorl %r11d,%ebx + + andl $4278124286,%r8d + andl $4278124286,%r9d + andl $454761243,%esi + andl $454761243,%edi + movl %ecx,%r12d + movl %edx,%ebp + xorl %esi,%r8d + xorl %edi,%r9d + + rorl $16,%r12d + xorl %r8d,%ecx + rorl $16,%ebp + xorl %r9d,%edx + roll $24,%ecx + movl 0(%r14),%esi + roll $24,%edx + xorl %r8d,%ecx + movl 64(%r14),%edi + xorl %r9d,%edx + movl 128(%r14),%r8d + xorl %r12d,%ecx + rorl $8,%r12d + xorl %ebp,%edx + rorl $8,%ebp + xorl %r12d,%ecx + movl 192(%r14),%r9d + xorl %ebp,%edx + jmp L$enc_loop_compact +.p2align 4 +L$enc_compact_done: + xorl 0(%r15),%eax + xorl 4(%r15),%ebx + xorl 8(%r15),%ecx + xorl 12(%r15),%edx +.byte 0xf3,0xc3 + +.p2align 4 +.globl _asm_AES_encrypt +.private_extern _asm_AES_encrypt + +.private_extern _asm_AES_encrypt +_asm_AES_encrypt: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + + movq %rsp,%r10 + leaq -63(%rdx),%rcx + andq $-64,%rsp + subq %rsp,%rcx + negq %rcx + andq $960,%rcx + subq %rcx,%rsp + subq $32,%rsp + + movq %rsi,16(%rsp) + movq %r10,24(%rsp) +L$enc_prologue: + + movq %rdx,%r15 + movl 240(%r15),%r13d + + movl 0(%rdi),%eax + movl 4(%rdi),%ebx + movl 8(%rdi),%ecx + movl 12(%rdi),%edx + + shll $4,%r13d + leaq (%r15,%r13,1),%rbp + movq %r15,(%rsp) + movq %rbp,8(%rsp) + + + leaq L$AES_Te+2048(%rip),%r14 + leaq 768(%rsp),%rbp + subq %r14,%rbp + andq $768,%rbp + leaq (%r14,%rbp,1),%r14 + + call _x86_64_AES_encrypt_compact + + movq 16(%rsp),%r9 + movq 24(%rsp),%rsi + movl %eax,0(%r9) + movl %ebx,4(%r9) + movl %ecx,8(%r9) + movl %edx,12(%r9) + + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +L$enc_epilogue: + .byte 0xf3,0xc3 + + +.p2align 4 +_x86_64_AES_decrypt: + xorl 0(%r15),%eax + xorl 4(%r15),%ebx + xorl 8(%r15),%ecx + xorl 12(%r15),%edx + + movl 240(%r15),%r13d + subl $1,%r13d + jmp L$dec_loop +.p2align 4 +L$dec_loop: + + movzbl %al,%esi + movzbl %bl,%edi + movzbl %cl,%ebp + movl 0(%r14,%rsi,8),%r10d + movl 0(%r14,%rdi,8),%r11d + movl 0(%r14,%rbp,8),%r12d + + movzbl %dh,%esi + movzbl %ah,%edi + movzbl %dl,%ebp + xorl 3(%r14,%rsi,8),%r10d + xorl 3(%r14,%rdi,8),%r11d + movl 0(%r14,%rbp,8),%r8d + + movzbl %bh,%esi + shrl $16,%eax + movzbl %ch,%ebp + xorl 3(%r14,%rsi,8),%r12d + shrl $16,%edx + xorl 3(%r14,%rbp,8),%r8d + + shrl $16,%ebx + leaq 16(%r15),%r15 + shrl $16,%ecx + + movzbl %cl,%esi + movzbl %dl,%edi + movzbl %al,%ebp + xorl 2(%r14,%rsi,8),%r10d + xorl 2(%r14,%rdi,8),%r11d + xorl 2(%r14,%rbp,8),%r12d + + movzbl %bh,%esi + movzbl %ch,%edi + movzbl %bl,%ebp + xorl 1(%r14,%rsi,8),%r10d + xorl 1(%r14,%rdi,8),%r11d + xorl 2(%r14,%rbp,8),%r8d + + movzbl %dh,%esi + movl 12(%r15),%edx + movzbl %ah,%ebp + xorl 1(%r14,%rsi,8),%r12d + movl 0(%r15),%eax + xorl 1(%r14,%rbp,8),%r8d + + xorl %r10d,%eax + movl 4(%r15),%ebx + movl 8(%r15),%ecx + xorl %r12d,%ecx + xorl %r11d,%ebx + xorl %r8d,%edx + subl $1,%r13d + jnz L$dec_loop + leaq 2048(%r14),%r14 + movzbl %al,%esi + movzbl %bl,%edi + movzbl %cl,%ebp + movzbl (%r14,%rsi,1),%r10d + movzbl (%r14,%rdi,1),%r11d + movzbl (%r14,%rbp,1),%r12d + + movzbl %dl,%esi + movzbl %dh,%edi + movzbl %ah,%ebp + movzbl (%r14,%rsi,1),%r8d + movzbl (%r14,%rdi,1),%edi + movzbl (%r14,%rbp,1),%ebp + + shll $8,%edi + shll $8,%ebp + + xorl %edi,%r10d + xorl %ebp,%r11d + shrl $16,%edx + + movzbl %bh,%esi + movzbl %ch,%edi + shrl $16,%eax + movzbl (%r14,%rsi,1),%esi + movzbl (%r14,%rdi,1),%edi + + shll $8,%esi + shll $8,%edi + shrl $16,%ebx + xorl %esi,%r12d + xorl %edi,%r8d + shrl $16,%ecx + + movzbl %cl,%esi + movzbl %dl,%edi + movzbl %al,%ebp + movzbl (%r14,%rsi,1),%esi + movzbl (%r14,%rdi,1),%edi + movzbl (%r14,%rbp,1),%ebp + + shll $16,%esi + shll $16,%edi + shll $16,%ebp + + xorl %esi,%r10d + xorl %edi,%r11d + xorl %ebp,%r12d + + movzbl %bl,%esi + movzbl %bh,%edi + movzbl %ch,%ebp + movzbl (%r14,%rsi,1),%esi + movzbl (%r14,%rdi,1),%edi + movzbl (%r14,%rbp,1),%ebp + + shll $16,%esi + shll $24,%edi + shll $24,%ebp + + xorl %esi,%r8d + xorl %edi,%r10d + xorl %ebp,%r11d + + movzbl %dh,%esi + movzbl %ah,%edi + movl 16+12(%r15),%edx + movzbl (%r14,%rsi,1),%esi + movzbl (%r14,%rdi,1),%edi + movl 16+0(%r15),%eax + + shll $24,%esi + shll $24,%edi + + xorl %esi,%r12d + xorl %edi,%r8d + + movl 16+4(%r15),%ebx + movl 16+8(%r15),%ecx + leaq -2048(%r14),%r14 + xorl %r10d,%eax + xorl %r11d,%ebx + xorl %r12d,%ecx + xorl %r8d,%edx +.byte 0xf3,0xc3 + + +.p2align 4 +_x86_64_AES_decrypt_compact: + leaq 128(%r14),%r8 + movl 0-128(%r8),%edi + movl 32-128(%r8),%ebp + movl 64-128(%r8),%r10d + movl 96-128(%r8),%r11d + movl 128-128(%r8),%edi + movl 160-128(%r8),%ebp + movl 192-128(%r8),%r10d + movl 224-128(%r8),%r11d + jmp L$dec_loop_compact + +.p2align 4 +L$dec_loop_compact: + xorl 0(%r15),%eax + xorl 4(%r15),%ebx + xorl 8(%r15),%ecx + xorl 12(%r15),%edx + leaq 16(%r15),%r15 + movzbl %al,%r10d + movzbl %bl,%r11d + movzbl %cl,%r12d + movzbl %dl,%r8d + movzbl %dh,%esi + movzbl %ah,%edi + shrl $16,%edx + movzbl %bh,%ebp + movzbl (%r14,%r10,1),%r10d + movzbl (%r14,%r11,1),%r11d + movzbl (%r14,%r12,1),%r12d + movzbl (%r14,%r8,1),%r8d + + movzbl (%r14,%rsi,1),%r9d + movzbl %ch,%esi + movzbl (%r14,%rdi,1),%r13d + movzbl (%r14,%rbp,1),%ebp + movzbl (%r14,%rsi,1),%esi + + shrl $16,%ecx + shll $8,%r13d + shll $8,%r9d + movzbl %cl,%edi + shrl $16,%eax + xorl %r9d,%r10d + shrl $16,%ebx + movzbl %dl,%r9d + + shll $8,%ebp + xorl %r13d,%r11d + shll $8,%esi + movzbl %al,%r13d + movzbl (%r14,%rdi,1),%edi + xorl %ebp,%r12d + movzbl %bl,%ebp + + shll $16,%edi + xorl %esi,%r8d + movzbl (%r14,%r9,1),%r9d + movzbl %bh,%esi + movzbl (%r14,%rbp,1),%ebp + xorl %edi,%r10d + movzbl (%r14,%r13,1),%r13d + movzbl %ch,%edi + + shll $16,%ebp + shll $16,%r9d + shll $16,%r13d + xorl %ebp,%r8d + movzbl %dh,%ebp + xorl %r9d,%r11d + shrl $8,%eax + xorl %r13d,%r12d + + movzbl (%r14,%rsi,1),%esi + movzbl (%r14,%rdi,1),%ebx + movzbl (%r14,%rbp,1),%ecx + movzbl (%r14,%rax,1),%edx + + movl %r10d,%eax + shll $24,%esi + shll $24,%ebx + shll $24,%ecx + xorl %esi,%eax + shll $24,%edx + xorl %r11d,%ebx + xorl %r12d,%ecx + xorl %r8d,%edx + cmpq 16(%rsp),%r15 + je L$dec_compact_done + + movq 256+0(%r14),%rsi + shlq $32,%rbx + shlq $32,%rdx + movq 256+8(%r14),%rdi + orq %rbx,%rax + orq %rdx,%rcx + movq 256+16(%r14),%rbp + movq %rsi,%r9 + movq %rsi,%r12 + andq %rax,%r9 + andq %rcx,%r12 + movq %r9,%rbx + movq %r12,%rdx + shrq $7,%r9 + leaq (%rax,%rax,1),%r8 + shrq $7,%r12 + leaq (%rcx,%rcx,1),%r11 + subq %r9,%rbx + subq %r12,%rdx + andq %rdi,%r8 + andq %rdi,%r11 + andq %rbp,%rbx + andq %rbp,%rdx + xorq %rbx,%r8 + xorq %rdx,%r11 + movq %rsi,%r10 + movq %rsi,%r13 + + andq %r8,%r10 + andq %r11,%r13 + movq %r10,%rbx + movq %r13,%rdx + shrq $7,%r10 + leaq (%r8,%r8,1),%r9 + shrq $7,%r13 + leaq (%r11,%r11,1),%r12 + subq %r10,%rbx + subq %r13,%rdx + andq %rdi,%r9 + andq %rdi,%r12 + andq %rbp,%rbx + andq %rbp,%rdx + xorq %rbx,%r9 + xorq %rdx,%r12 + movq %rsi,%r10 + movq %rsi,%r13 + + andq %r9,%r10 + andq %r12,%r13 + movq %r10,%rbx + movq %r13,%rdx + shrq $7,%r10 + xorq %rax,%r8 + shrq $7,%r13 + xorq %rcx,%r11 + subq %r10,%rbx + subq %r13,%rdx + leaq (%r9,%r9,1),%r10 + leaq (%r12,%r12,1),%r13 + xorq %rax,%r9 + xorq %rcx,%r12 + andq %rdi,%r10 + andq %rdi,%r13 + andq %rbp,%rbx + andq %rbp,%rdx + xorq %rbx,%r10 + xorq %rdx,%r13 + + xorq %r10,%rax + xorq %r13,%rcx + xorq %r10,%r8 + xorq %r13,%r11 + movq %rax,%rbx + movq %rcx,%rdx + xorq %r10,%r9 + shrq $32,%rbx + xorq %r13,%r12 + shrq $32,%rdx + xorq %r8,%r10 + roll $8,%eax + xorq %r11,%r13 + roll $8,%ecx + xorq %r9,%r10 + roll $8,%ebx + xorq %r12,%r13 + + roll $8,%edx + xorl %r10d,%eax + shrq $32,%r10 + xorl %r13d,%ecx + shrq $32,%r13 + xorl %r10d,%ebx + xorl %r13d,%edx + + movq %r8,%r10 + roll $24,%r8d + movq %r11,%r13 + roll $24,%r11d + shrq $32,%r10 + xorl %r8d,%eax + shrq $32,%r13 + xorl %r11d,%ecx + roll $24,%r10d + movq %r9,%r8 + roll $24,%r13d + movq %r12,%r11 + shrq $32,%r8 + xorl %r10d,%ebx + shrq $32,%r11 + xorl %r13d,%edx + + movq 0(%r14),%rsi + roll $16,%r9d + movq 64(%r14),%rdi + roll $16,%r12d + movq 128(%r14),%rbp + roll $16,%r8d + movq 192(%r14),%r10 + xorl %r9d,%eax + roll $16,%r11d + xorl %r12d,%ecx + movq 256(%r14),%r13 + xorl %r8d,%ebx + xorl %r11d,%edx + jmp L$dec_loop_compact +.p2align 4 +L$dec_compact_done: + xorl 0(%r15),%eax + xorl 4(%r15),%ebx + xorl 8(%r15),%ecx + xorl 12(%r15),%edx +.byte 0xf3,0xc3 + +.p2align 4 +.globl _asm_AES_decrypt +.private_extern _asm_AES_decrypt + +.private_extern _asm_AES_decrypt +_asm_AES_decrypt: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + + movq %rsp,%r10 + leaq -63(%rdx),%rcx + andq $-64,%rsp + subq %rsp,%rcx + negq %rcx + andq $960,%rcx + subq %rcx,%rsp + subq $32,%rsp + + movq %rsi,16(%rsp) + movq %r10,24(%rsp) +L$dec_prologue: + + movq %rdx,%r15 + movl 240(%r15),%r13d + + movl 0(%rdi),%eax + movl 4(%rdi),%ebx + movl 8(%rdi),%ecx + movl 12(%rdi),%edx + + shll $4,%r13d + leaq (%r15,%r13,1),%rbp + movq %r15,(%rsp) + movq %rbp,8(%rsp) + + + leaq L$AES_Td+2048(%rip),%r14 + leaq 768(%rsp),%rbp + subq %r14,%rbp + andq $768,%rbp + leaq (%r14,%rbp,1),%r14 + shrq $3,%rbp + addq %rbp,%r14 + + call _x86_64_AES_decrypt_compact + + movq 16(%rsp),%r9 + movq 24(%rsp),%rsi + movl %eax,0(%r9) + movl %ebx,4(%r9) + movl %ecx,8(%r9) + movl %edx,12(%r9) + + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +L$dec_epilogue: + .byte 0xf3,0xc3 + +.p2align 4 +.globl _asm_AES_set_encrypt_key +.private_extern _asm_AES_set_encrypt_key + +_asm_AES_set_encrypt_key: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $8,%rsp +L$enc_key_prologue: + + call _x86_64_AES_set_encrypt_key + + movq 40(%rsp),%rbp + movq 48(%rsp),%rbx + addq $56,%rsp +L$enc_key_epilogue: + .byte 0xf3,0xc3 + + + +.p2align 4 +_x86_64_AES_set_encrypt_key: + movl %esi,%ecx + movq %rdi,%rsi + movq %rdx,%rdi + + testq $-1,%rsi + jz L$badpointer + testq $-1,%rdi + jz L$badpointer + + leaq L$AES_Te(%rip),%rbp + leaq 2048+128(%rbp),%rbp + + + movl 0-128(%rbp),%eax + movl 32-128(%rbp),%ebx + movl 64-128(%rbp),%r8d + movl 96-128(%rbp),%edx + movl 128-128(%rbp),%eax + movl 160-128(%rbp),%ebx + movl 192-128(%rbp),%r8d + movl 224-128(%rbp),%edx + + cmpl $128,%ecx + je L$10rounds + cmpl $192,%ecx + je L$12rounds + cmpl $256,%ecx + je L$14rounds + movq $-2,%rax + jmp L$exit + +L$10rounds: + movq 0(%rsi),%rax + movq 8(%rsi),%rdx + movq %rax,0(%rdi) + movq %rdx,8(%rdi) + + shrq $32,%rdx + xorl %ecx,%ecx + jmp L$10shortcut +.p2align 2 +L$10loop: + movl 0(%rdi),%eax + movl 12(%rdi),%edx +L$10shortcut: + movzbl %dl,%esi + movzbl -128(%rbp,%rsi,1),%ebx + movzbl %dh,%esi + shll $24,%ebx + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + shrl $16,%edx + movzbl %dl,%esi + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + movzbl %dh,%esi + shll $8,%ebx + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + shll $16,%ebx + xorl %ebx,%eax + + xorl 1024-128(%rbp,%rcx,4),%eax + movl %eax,16(%rdi) + xorl 4(%rdi),%eax + movl %eax,20(%rdi) + xorl 8(%rdi),%eax + movl %eax,24(%rdi) + xorl 12(%rdi),%eax + movl %eax,28(%rdi) + addl $1,%ecx + leaq 16(%rdi),%rdi + cmpl $10,%ecx + jl L$10loop + + movl $10,80(%rdi) + xorq %rax,%rax + jmp L$exit + +L$12rounds: + movq 0(%rsi),%rax + movq 8(%rsi),%rbx + movq 16(%rsi),%rdx + movq %rax,0(%rdi) + movq %rbx,8(%rdi) + movq %rdx,16(%rdi) + + shrq $32,%rdx + xorl %ecx,%ecx + jmp L$12shortcut +.p2align 2 +L$12loop: + movl 0(%rdi),%eax + movl 20(%rdi),%edx +L$12shortcut: + movzbl %dl,%esi + movzbl -128(%rbp,%rsi,1),%ebx + movzbl %dh,%esi + shll $24,%ebx + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + shrl $16,%edx + movzbl %dl,%esi + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + movzbl %dh,%esi + shll $8,%ebx + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + shll $16,%ebx + xorl %ebx,%eax + + xorl 1024-128(%rbp,%rcx,4),%eax + movl %eax,24(%rdi) + xorl 4(%rdi),%eax + movl %eax,28(%rdi) + xorl 8(%rdi),%eax + movl %eax,32(%rdi) + xorl 12(%rdi),%eax + movl %eax,36(%rdi) + + cmpl $7,%ecx + je L$12break + addl $1,%ecx + + xorl 16(%rdi),%eax + movl %eax,40(%rdi) + xorl 20(%rdi),%eax + movl %eax,44(%rdi) + + leaq 24(%rdi),%rdi + jmp L$12loop +L$12break: + movl $12,72(%rdi) + xorq %rax,%rax + jmp L$exit + +L$14rounds: + movq 0(%rsi),%rax + movq 8(%rsi),%rbx + movq 16(%rsi),%rcx + movq 24(%rsi),%rdx + movq %rax,0(%rdi) + movq %rbx,8(%rdi) + movq %rcx,16(%rdi) + movq %rdx,24(%rdi) + + shrq $32,%rdx + xorl %ecx,%ecx + jmp L$14shortcut +.p2align 2 +L$14loop: + movl 0(%rdi),%eax + movl 28(%rdi),%edx +L$14shortcut: + movzbl %dl,%esi + movzbl -128(%rbp,%rsi,1),%ebx + movzbl %dh,%esi + shll $24,%ebx + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + shrl $16,%edx + movzbl %dl,%esi + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + movzbl %dh,%esi + shll $8,%ebx + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + shll $16,%ebx + xorl %ebx,%eax + + xorl 1024-128(%rbp,%rcx,4),%eax + movl %eax,32(%rdi) + xorl 4(%rdi),%eax + movl %eax,36(%rdi) + xorl 8(%rdi),%eax + movl %eax,40(%rdi) + xorl 12(%rdi),%eax + movl %eax,44(%rdi) + + cmpl $6,%ecx + je L$14break + addl $1,%ecx + + movl %eax,%edx + movl 16(%rdi),%eax + movzbl %dl,%esi + movzbl -128(%rbp,%rsi,1),%ebx + movzbl %dh,%esi + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + shrl $16,%edx + shll $8,%ebx + movzbl %dl,%esi + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + movzbl %dh,%esi + shll $16,%ebx + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + shll $24,%ebx + xorl %ebx,%eax + + movl %eax,48(%rdi) + xorl 20(%rdi),%eax + movl %eax,52(%rdi) + xorl 24(%rdi),%eax + movl %eax,56(%rdi) + xorl 28(%rdi),%eax + movl %eax,60(%rdi) + + leaq 32(%rdi),%rdi + jmp L$14loop +L$14break: + movl $14,48(%rdi) + xorq %rax,%rax + jmp L$exit + +L$badpointer: + movq $-1,%rax +L$exit: +.byte 0xf3,0xc3 + +.p2align 4 +.globl _asm_AES_set_decrypt_key +.private_extern _asm_AES_set_decrypt_key + +_asm_AES_set_decrypt_key: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + pushq %rdx +L$dec_key_prologue: + + call _x86_64_AES_set_encrypt_key + movq (%rsp),%r8 + cmpl $0,%eax + jne L$abort + + movl 240(%r8),%r14d + xorq %rdi,%rdi + leaq (%rdi,%r14,4),%rcx + movq %r8,%rsi + leaq (%r8,%rcx,4),%rdi +.p2align 2 +L$invert: + movq 0(%rsi),%rax + movq 8(%rsi),%rbx + movq 0(%rdi),%rcx + movq 8(%rdi),%rdx + movq %rax,0(%rdi) + movq %rbx,8(%rdi) + movq %rcx,0(%rsi) + movq %rdx,8(%rsi) + leaq 16(%rsi),%rsi + leaq -16(%rdi),%rdi + cmpq %rsi,%rdi + jne L$invert + + leaq L$AES_Te+2048+1024(%rip),%rax + + movq 40(%rax),%rsi + movq 48(%rax),%rdi + movq 56(%rax),%rbp + + movq %r8,%r15 + subl $1,%r14d +.p2align 2 +L$permute: + leaq 16(%r15),%r15 + movq 0(%r15),%rax + movq 8(%r15),%rcx + movq %rsi,%r9 + movq %rsi,%r12 + andq %rax,%r9 + andq %rcx,%r12 + movq %r9,%rbx + movq %r12,%rdx + shrq $7,%r9 + leaq (%rax,%rax,1),%r8 + shrq $7,%r12 + leaq (%rcx,%rcx,1),%r11 + subq %r9,%rbx + subq %r12,%rdx + andq %rdi,%r8 + andq %rdi,%r11 + andq %rbp,%rbx + andq %rbp,%rdx + xorq %rbx,%r8 + xorq %rdx,%r11 + movq %rsi,%r10 + movq %rsi,%r13 + + andq %r8,%r10 + andq %r11,%r13 + movq %r10,%rbx + movq %r13,%rdx + shrq $7,%r10 + leaq (%r8,%r8,1),%r9 + shrq $7,%r13 + leaq (%r11,%r11,1),%r12 + subq %r10,%rbx + subq %r13,%rdx + andq %rdi,%r9 + andq %rdi,%r12 + andq %rbp,%rbx + andq %rbp,%rdx + xorq %rbx,%r9 + xorq %rdx,%r12 + movq %rsi,%r10 + movq %rsi,%r13 + + andq %r9,%r10 + andq %r12,%r13 + movq %r10,%rbx + movq %r13,%rdx + shrq $7,%r10 + xorq %rax,%r8 + shrq $7,%r13 + xorq %rcx,%r11 + subq %r10,%rbx + subq %r13,%rdx + leaq (%r9,%r9,1),%r10 + leaq (%r12,%r12,1),%r13 + xorq %rax,%r9 + xorq %rcx,%r12 + andq %rdi,%r10 + andq %rdi,%r13 + andq %rbp,%rbx + andq %rbp,%rdx + xorq %rbx,%r10 + xorq %rdx,%r13 + + xorq %r10,%rax + xorq %r13,%rcx + xorq %r10,%r8 + xorq %r13,%r11 + movq %rax,%rbx + movq %rcx,%rdx + xorq %r10,%r9 + shrq $32,%rbx + xorq %r13,%r12 + shrq $32,%rdx + xorq %r8,%r10 + roll $8,%eax + xorq %r11,%r13 + roll $8,%ecx + xorq %r9,%r10 + roll $8,%ebx + xorq %r12,%r13 + + roll $8,%edx + xorl %r10d,%eax + shrq $32,%r10 + xorl %r13d,%ecx + shrq $32,%r13 + xorl %r10d,%ebx + xorl %r13d,%edx + + movq %r8,%r10 + roll $24,%r8d + movq %r11,%r13 + roll $24,%r11d + shrq $32,%r10 + xorl %r8d,%eax + shrq $32,%r13 + xorl %r11d,%ecx + roll $24,%r10d + movq %r9,%r8 + roll $24,%r13d + movq %r12,%r11 + shrq $32,%r8 + xorl %r10d,%ebx + shrq $32,%r11 + xorl %r13d,%edx + + + roll $16,%r9d + + roll $16,%r12d + + roll $16,%r8d + + xorl %r9d,%eax + roll $16,%r11d + xorl %r12d,%ecx + + xorl %r8d,%ebx + xorl %r11d,%edx + movl %eax,0(%r15) + movl %ebx,4(%r15) + movl %ecx,8(%r15) + movl %edx,12(%r15) + subl $1,%r14d + jnz L$permute + + xorq %rax,%rax +L$abort: + movq 8(%rsp),%r15 + movq 16(%rsp),%r14 + movq 24(%rsp),%r13 + movq 32(%rsp),%r12 + movq 40(%rsp),%rbp + movq 48(%rsp),%rbx + addq $56,%rsp +L$dec_key_epilogue: + .byte 0xf3,0xc3 + +.p2align 4 +.globl _asm_AES_cbc_encrypt +.private_extern _asm_AES_cbc_encrypt + + +.private_extern _asm_AES_cbc_encrypt +_asm_AES_cbc_encrypt: + cmpq $0,%rdx + je L$cbc_epilogue + pushfq + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 +L$cbc_prologue: + + cld + movl %r9d,%r9d + + leaq L$AES_Te(%rip),%r14 + cmpq $0,%r9 + jne L$cbc_picked_te + leaq L$AES_Td(%rip),%r14 +L$cbc_picked_te: + + movl _OPENSSL_ia32cap_P(%rip),%r10d + cmpq $512,%rdx + jb L$cbc_slow_prologue + testq $15,%rdx + jnz L$cbc_slow_prologue + btl $28,%r10d + jc L$cbc_slow_prologue + + + leaq -88-248(%rsp),%r15 + andq $-64,%r15 + + + movq %r14,%r10 + leaq 2304(%r14),%r11 + movq %r15,%r12 + andq $4095,%r10 + andq $4095,%r11 + andq $4095,%r12 + + cmpq %r11,%r12 + jb L$cbc_te_break_out + subq %r11,%r12 + subq %r12,%r15 + jmp L$cbc_te_ok +L$cbc_te_break_out: + subq %r10,%r12 + andq $4095,%r12 + addq $320,%r12 + subq %r12,%r15 +.p2align 2 +L$cbc_te_ok: + + xchgq %rsp,%r15 + + movq %r15,16(%rsp) +L$cbc_fast_body: + movq %rdi,24(%rsp) + movq %rsi,32(%rsp) + movq %rdx,40(%rsp) + movq %rcx,48(%rsp) + movq %r8,56(%rsp) + movl $0,80+240(%rsp) + movq %r8,%rbp + movq %r9,%rbx + movq %rsi,%r9 + movq %rdi,%r8 + movq %rcx,%r15 + + movl 240(%r15),%eax + + movq %r15,%r10 + subq %r14,%r10 + andq $4095,%r10 + cmpq $2304,%r10 + jb L$cbc_do_ecopy + cmpq $4096-248,%r10 + jb L$cbc_skip_ecopy +.p2align 2 +L$cbc_do_ecopy: + movq %r15,%rsi + leaq 80(%rsp),%rdi + leaq 80(%rsp),%r15 + movl $30,%ecx +.long 0x90A548F3 + movl %eax,(%rdi) +L$cbc_skip_ecopy: + movq %r15,0(%rsp) + + movl $18,%ecx +.p2align 2 +L$cbc_prefetch_te: + movq 0(%r14),%r10 + movq 32(%r14),%r11 + movq 64(%r14),%r12 + movq 96(%r14),%r13 + leaq 128(%r14),%r14 + subl $1,%ecx + jnz L$cbc_prefetch_te + leaq -2304(%r14),%r14 + + cmpq $0,%rbx + je L$FAST_DECRYPT + + + movl 0(%rbp),%eax + movl 4(%rbp),%ebx + movl 8(%rbp),%ecx + movl 12(%rbp),%edx + +.p2align 2 +L$cbc_fast_enc_loop: + xorl 0(%r8),%eax + xorl 4(%r8),%ebx + xorl 8(%r8),%ecx + xorl 12(%r8),%edx + movq 0(%rsp),%r15 + movq %r8,24(%rsp) + + call _x86_64_AES_encrypt + + movq 24(%rsp),%r8 + movq 40(%rsp),%r10 + movl %eax,0(%r9) + movl %ebx,4(%r9) + movl %ecx,8(%r9) + movl %edx,12(%r9) + + leaq 16(%r8),%r8 + leaq 16(%r9),%r9 + subq $16,%r10 + testq $-16,%r10 + movq %r10,40(%rsp) + jnz L$cbc_fast_enc_loop + movq 56(%rsp),%rbp + movl %eax,0(%rbp) + movl %ebx,4(%rbp) + movl %ecx,8(%rbp) + movl %edx,12(%rbp) + + jmp L$cbc_fast_cleanup + + +.p2align 4 +L$FAST_DECRYPT: + cmpq %r8,%r9 + je L$cbc_fast_dec_in_place + + movq %rbp,64(%rsp) +.p2align 2 +L$cbc_fast_dec_loop: + movl 0(%r8),%eax + movl 4(%r8),%ebx + movl 8(%r8),%ecx + movl 12(%r8),%edx + movq 0(%rsp),%r15 + movq %r8,24(%rsp) + + call _x86_64_AES_decrypt + + movq 64(%rsp),%rbp + movq 24(%rsp),%r8 + movq 40(%rsp),%r10 + xorl 0(%rbp),%eax + xorl 4(%rbp),%ebx + xorl 8(%rbp),%ecx + xorl 12(%rbp),%edx + movq %r8,%rbp + + subq $16,%r10 + movq %r10,40(%rsp) + movq %rbp,64(%rsp) + + movl %eax,0(%r9) + movl %ebx,4(%r9) + movl %ecx,8(%r9) + movl %edx,12(%r9) + + leaq 16(%r8),%r8 + leaq 16(%r9),%r9 + jnz L$cbc_fast_dec_loop + movq 56(%rsp),%r12 + movq 0(%rbp),%r10 + movq 8(%rbp),%r11 + movq %r10,0(%r12) + movq %r11,8(%r12) + jmp L$cbc_fast_cleanup + +.p2align 4 +L$cbc_fast_dec_in_place: + movq 0(%rbp),%r10 + movq 8(%rbp),%r11 + movq %r10,0+64(%rsp) + movq %r11,8+64(%rsp) +.p2align 2 +L$cbc_fast_dec_in_place_loop: + movl 0(%r8),%eax + movl 4(%r8),%ebx + movl 8(%r8),%ecx + movl 12(%r8),%edx + movq 0(%rsp),%r15 + movq %r8,24(%rsp) + + call _x86_64_AES_decrypt + + movq 24(%rsp),%r8 + movq 40(%rsp),%r10 + xorl 0+64(%rsp),%eax + xorl 4+64(%rsp),%ebx + xorl 8+64(%rsp),%ecx + xorl 12+64(%rsp),%edx + + movq 0(%r8),%r11 + movq 8(%r8),%r12 + subq $16,%r10 + jz L$cbc_fast_dec_in_place_done + + movq %r11,0+64(%rsp) + movq %r12,8+64(%rsp) + + movl %eax,0(%r9) + movl %ebx,4(%r9) + movl %ecx,8(%r9) + movl %edx,12(%r9) + + leaq 16(%r8),%r8 + leaq 16(%r9),%r9 + movq %r10,40(%rsp) + jmp L$cbc_fast_dec_in_place_loop +L$cbc_fast_dec_in_place_done: + movq 56(%rsp),%rdi + movq %r11,0(%rdi) + movq %r12,8(%rdi) + + movl %eax,0(%r9) + movl %ebx,4(%r9) + movl %ecx,8(%r9) + movl %edx,12(%r9) + +.p2align 2 +L$cbc_fast_cleanup: + cmpl $0,80+240(%rsp) + leaq 80(%rsp),%rdi + je L$cbc_exit + movl $30,%ecx + xorq %rax,%rax +.long 0x90AB48F3 + + jmp L$cbc_exit + + +.p2align 4 +L$cbc_slow_prologue: + + leaq -88(%rsp),%rbp + andq $-64,%rbp + + leaq -88-63(%rcx),%r10 + subq %rbp,%r10 + negq %r10 + andq $960,%r10 + subq %r10,%rbp + + xchgq %rsp,%rbp + + movq %rbp,16(%rsp) +L$cbc_slow_body: + + + + + movq %r8,56(%rsp) + movq %r8,%rbp + movq %r9,%rbx + movq %rsi,%r9 + movq %rdi,%r8 + movq %rcx,%r15 + movq %rdx,%r10 + + movl 240(%r15),%eax + movq %r15,0(%rsp) + shll $4,%eax + leaq (%r15,%rax,1),%rax + movq %rax,8(%rsp) + + + leaq 2048(%r14),%r14 + leaq 768-8(%rsp),%rax + subq %r14,%rax + andq $768,%rax + leaq (%r14,%rax,1),%r14 + + cmpq $0,%rbx + je L$SLOW_DECRYPT + + + testq $-16,%r10 + movl 0(%rbp),%eax + movl 4(%rbp),%ebx + movl 8(%rbp),%ecx + movl 12(%rbp),%edx + jz L$cbc_slow_enc_tail + +.p2align 2 +L$cbc_slow_enc_loop: + xorl 0(%r8),%eax + xorl 4(%r8),%ebx + xorl 8(%r8),%ecx + xorl 12(%r8),%edx + movq 0(%rsp),%r15 + movq %r8,24(%rsp) + movq %r9,32(%rsp) + movq %r10,40(%rsp) + + call _x86_64_AES_encrypt_compact + + movq 24(%rsp),%r8 + movq 32(%rsp),%r9 + movq 40(%rsp),%r10 + movl %eax,0(%r9) + movl %ebx,4(%r9) + movl %ecx,8(%r9) + movl %edx,12(%r9) + + leaq 16(%r8),%r8 + leaq 16(%r9),%r9 + subq $16,%r10 + testq $-16,%r10 + jnz L$cbc_slow_enc_loop + testq $15,%r10 + jnz L$cbc_slow_enc_tail + movq 56(%rsp),%rbp + movl %eax,0(%rbp) + movl %ebx,4(%rbp) + movl %ecx,8(%rbp) + movl %edx,12(%rbp) + + jmp L$cbc_exit + +.p2align 2 +L$cbc_slow_enc_tail: + movq %rax,%r11 + movq %rcx,%r12 + movq %r10,%rcx + movq %r8,%rsi + movq %r9,%rdi +.long 0x9066A4F3 + movq $16,%rcx + subq %r10,%rcx + xorq %rax,%rax +.long 0x9066AAF3 + movq %r9,%r8 + movq $16,%r10 + movq %r11,%rax + movq %r12,%rcx + jmp L$cbc_slow_enc_loop + +.p2align 4 +L$SLOW_DECRYPT: + shrq $3,%rax + addq %rax,%r14 + + movq 0(%rbp),%r11 + movq 8(%rbp),%r12 + movq %r11,0+64(%rsp) + movq %r12,8+64(%rsp) + +.p2align 2 +L$cbc_slow_dec_loop: + movl 0(%r8),%eax + movl 4(%r8),%ebx + movl 8(%r8),%ecx + movl 12(%r8),%edx + movq 0(%rsp),%r15 + movq %r8,24(%rsp) + movq %r9,32(%rsp) + movq %r10,40(%rsp) + + call _x86_64_AES_decrypt_compact + + movq 24(%rsp),%r8 + movq 32(%rsp),%r9 + movq 40(%rsp),%r10 + xorl 0+64(%rsp),%eax + xorl 4+64(%rsp),%ebx + xorl 8+64(%rsp),%ecx + xorl 12+64(%rsp),%edx + + movq 0(%r8),%r11 + movq 8(%r8),%r12 + subq $16,%r10 + jc L$cbc_slow_dec_partial + jz L$cbc_slow_dec_done + + movq %r11,0+64(%rsp) + movq %r12,8+64(%rsp) + + movl %eax,0(%r9) + movl %ebx,4(%r9) + movl %ecx,8(%r9) + movl %edx,12(%r9) + + leaq 16(%r8),%r8 + leaq 16(%r9),%r9 + jmp L$cbc_slow_dec_loop +L$cbc_slow_dec_done: + movq 56(%rsp),%rdi + movq %r11,0(%rdi) + movq %r12,8(%rdi) + + movl %eax,0(%r9) + movl %ebx,4(%r9) + movl %ecx,8(%r9) + movl %edx,12(%r9) + + jmp L$cbc_exit + +.p2align 2 +L$cbc_slow_dec_partial: + movq 56(%rsp),%rdi + movq %r11,0(%rdi) + movq %r12,8(%rdi) + + movl %eax,0+64(%rsp) + movl %ebx,4+64(%rsp) + movl %ecx,8+64(%rsp) + movl %edx,12+64(%rsp) + + movq %r9,%rdi + leaq 64(%rsp),%rsi + leaq 16(%r10),%rcx +.long 0x9066A4F3 + jmp L$cbc_exit + +.p2align 4 +L$cbc_exit: + movq 16(%rsp),%rsi + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +L$cbc_popfq: + popfq +L$cbc_epilogue: + .byte 0xf3,0xc3 + +.p2align 6 +L$AES_Te: +.long 0xa56363c6,0xa56363c6 +.long 0x847c7cf8,0x847c7cf8 +.long 0x997777ee,0x997777ee +.long 0x8d7b7bf6,0x8d7b7bf6 +.long 0x0df2f2ff,0x0df2f2ff +.long 0xbd6b6bd6,0xbd6b6bd6 +.long 0xb16f6fde,0xb16f6fde +.long 0x54c5c591,0x54c5c591 +.long 0x50303060,0x50303060 +.long 0x03010102,0x03010102 +.long 0xa96767ce,0xa96767ce +.long 0x7d2b2b56,0x7d2b2b56 +.long 0x19fefee7,0x19fefee7 +.long 0x62d7d7b5,0x62d7d7b5 +.long 0xe6abab4d,0xe6abab4d +.long 0x9a7676ec,0x9a7676ec +.long 0x45caca8f,0x45caca8f +.long 0x9d82821f,0x9d82821f +.long 0x40c9c989,0x40c9c989 +.long 0x877d7dfa,0x877d7dfa +.long 0x15fafaef,0x15fafaef +.long 0xeb5959b2,0xeb5959b2 +.long 0xc947478e,0xc947478e +.long 0x0bf0f0fb,0x0bf0f0fb +.long 0xecadad41,0xecadad41 +.long 0x67d4d4b3,0x67d4d4b3 +.long 0xfda2a25f,0xfda2a25f +.long 0xeaafaf45,0xeaafaf45 +.long 0xbf9c9c23,0xbf9c9c23 +.long 0xf7a4a453,0xf7a4a453 +.long 0x967272e4,0x967272e4 +.long 0x5bc0c09b,0x5bc0c09b +.long 0xc2b7b775,0xc2b7b775 +.long 0x1cfdfde1,0x1cfdfde1 +.long 0xae93933d,0xae93933d +.long 0x6a26264c,0x6a26264c +.long 0x5a36366c,0x5a36366c +.long 0x413f3f7e,0x413f3f7e +.long 0x02f7f7f5,0x02f7f7f5 +.long 0x4fcccc83,0x4fcccc83 +.long 0x5c343468,0x5c343468 +.long 0xf4a5a551,0xf4a5a551 +.long 0x34e5e5d1,0x34e5e5d1 +.long 0x08f1f1f9,0x08f1f1f9 +.long 0x937171e2,0x937171e2 +.long 0x73d8d8ab,0x73d8d8ab +.long 0x53313162,0x53313162 +.long 0x3f15152a,0x3f15152a +.long 0x0c040408,0x0c040408 +.long 0x52c7c795,0x52c7c795 +.long 0x65232346,0x65232346 +.long 0x5ec3c39d,0x5ec3c39d +.long 0x28181830,0x28181830 +.long 0xa1969637,0xa1969637 +.long 0x0f05050a,0x0f05050a +.long 0xb59a9a2f,0xb59a9a2f +.long 0x0907070e,0x0907070e +.long 0x36121224,0x36121224 +.long 0x9b80801b,0x9b80801b +.long 0x3de2e2df,0x3de2e2df +.long 0x26ebebcd,0x26ebebcd +.long 0x6927274e,0x6927274e +.long 0xcdb2b27f,0xcdb2b27f +.long 0x9f7575ea,0x9f7575ea +.long 0x1b090912,0x1b090912 +.long 0x9e83831d,0x9e83831d +.long 0x742c2c58,0x742c2c58 +.long 0x2e1a1a34,0x2e1a1a34 +.long 0x2d1b1b36,0x2d1b1b36 +.long 0xb26e6edc,0xb26e6edc +.long 0xee5a5ab4,0xee5a5ab4 +.long 0xfba0a05b,0xfba0a05b +.long 0xf65252a4,0xf65252a4 +.long 0x4d3b3b76,0x4d3b3b76 +.long 0x61d6d6b7,0x61d6d6b7 +.long 0xceb3b37d,0xceb3b37d +.long 0x7b292952,0x7b292952 +.long 0x3ee3e3dd,0x3ee3e3dd +.long 0x712f2f5e,0x712f2f5e +.long 0x97848413,0x97848413 +.long 0xf55353a6,0xf55353a6 +.long 0x68d1d1b9,0x68d1d1b9 +.long 0x00000000,0x00000000 +.long 0x2cededc1,0x2cededc1 +.long 0x60202040,0x60202040 +.long 0x1ffcfce3,0x1ffcfce3 +.long 0xc8b1b179,0xc8b1b179 +.long 0xed5b5bb6,0xed5b5bb6 +.long 0xbe6a6ad4,0xbe6a6ad4 +.long 0x46cbcb8d,0x46cbcb8d +.long 0xd9bebe67,0xd9bebe67 +.long 0x4b393972,0x4b393972 +.long 0xde4a4a94,0xde4a4a94 +.long 0xd44c4c98,0xd44c4c98 +.long 0xe85858b0,0xe85858b0 +.long 0x4acfcf85,0x4acfcf85 +.long 0x6bd0d0bb,0x6bd0d0bb +.long 0x2aefefc5,0x2aefefc5 +.long 0xe5aaaa4f,0xe5aaaa4f +.long 0x16fbfbed,0x16fbfbed +.long 0xc5434386,0xc5434386 +.long 0xd74d4d9a,0xd74d4d9a +.long 0x55333366,0x55333366 +.long 0x94858511,0x94858511 +.long 0xcf45458a,0xcf45458a +.long 0x10f9f9e9,0x10f9f9e9 +.long 0x06020204,0x06020204 +.long 0x817f7ffe,0x817f7ffe +.long 0xf05050a0,0xf05050a0 +.long 0x443c3c78,0x443c3c78 +.long 0xba9f9f25,0xba9f9f25 +.long 0xe3a8a84b,0xe3a8a84b +.long 0xf35151a2,0xf35151a2 +.long 0xfea3a35d,0xfea3a35d +.long 0xc0404080,0xc0404080 +.long 0x8a8f8f05,0x8a8f8f05 +.long 0xad92923f,0xad92923f +.long 0xbc9d9d21,0xbc9d9d21 +.long 0x48383870,0x48383870 +.long 0x04f5f5f1,0x04f5f5f1 +.long 0xdfbcbc63,0xdfbcbc63 +.long 0xc1b6b677,0xc1b6b677 +.long 0x75dadaaf,0x75dadaaf +.long 0x63212142,0x63212142 +.long 0x30101020,0x30101020 +.long 0x1affffe5,0x1affffe5 +.long 0x0ef3f3fd,0x0ef3f3fd +.long 0x6dd2d2bf,0x6dd2d2bf +.long 0x4ccdcd81,0x4ccdcd81 +.long 0x140c0c18,0x140c0c18 +.long 0x35131326,0x35131326 +.long 0x2fececc3,0x2fececc3 +.long 0xe15f5fbe,0xe15f5fbe +.long 0xa2979735,0xa2979735 +.long 0xcc444488,0xcc444488 +.long 0x3917172e,0x3917172e +.long 0x57c4c493,0x57c4c493 +.long 0xf2a7a755,0xf2a7a755 +.long 0x827e7efc,0x827e7efc +.long 0x473d3d7a,0x473d3d7a +.long 0xac6464c8,0xac6464c8 +.long 0xe75d5dba,0xe75d5dba +.long 0x2b191932,0x2b191932 +.long 0x957373e6,0x957373e6 +.long 0xa06060c0,0xa06060c0 +.long 0x98818119,0x98818119 +.long 0xd14f4f9e,0xd14f4f9e +.long 0x7fdcdca3,0x7fdcdca3 +.long 0x66222244,0x66222244 +.long 0x7e2a2a54,0x7e2a2a54 +.long 0xab90903b,0xab90903b +.long 0x8388880b,0x8388880b +.long 0xca46468c,0xca46468c +.long 0x29eeeec7,0x29eeeec7 +.long 0xd3b8b86b,0xd3b8b86b +.long 0x3c141428,0x3c141428 +.long 0x79dedea7,0x79dedea7 +.long 0xe25e5ebc,0xe25e5ebc +.long 0x1d0b0b16,0x1d0b0b16 +.long 0x76dbdbad,0x76dbdbad +.long 0x3be0e0db,0x3be0e0db +.long 0x56323264,0x56323264 +.long 0x4e3a3a74,0x4e3a3a74 +.long 0x1e0a0a14,0x1e0a0a14 +.long 0xdb494992,0xdb494992 +.long 0x0a06060c,0x0a06060c +.long 0x6c242448,0x6c242448 +.long 0xe45c5cb8,0xe45c5cb8 +.long 0x5dc2c29f,0x5dc2c29f +.long 0x6ed3d3bd,0x6ed3d3bd +.long 0xefacac43,0xefacac43 +.long 0xa66262c4,0xa66262c4 +.long 0xa8919139,0xa8919139 +.long 0xa4959531,0xa4959531 +.long 0x37e4e4d3,0x37e4e4d3 +.long 0x8b7979f2,0x8b7979f2 +.long 0x32e7e7d5,0x32e7e7d5 +.long 0x43c8c88b,0x43c8c88b +.long 0x5937376e,0x5937376e +.long 0xb76d6dda,0xb76d6dda +.long 0x8c8d8d01,0x8c8d8d01 +.long 0x64d5d5b1,0x64d5d5b1 +.long 0xd24e4e9c,0xd24e4e9c +.long 0xe0a9a949,0xe0a9a949 +.long 0xb46c6cd8,0xb46c6cd8 +.long 0xfa5656ac,0xfa5656ac +.long 0x07f4f4f3,0x07f4f4f3 +.long 0x25eaeacf,0x25eaeacf +.long 0xaf6565ca,0xaf6565ca +.long 0x8e7a7af4,0x8e7a7af4 +.long 0xe9aeae47,0xe9aeae47 +.long 0x18080810,0x18080810 +.long 0xd5baba6f,0xd5baba6f +.long 0x887878f0,0x887878f0 +.long 0x6f25254a,0x6f25254a +.long 0x722e2e5c,0x722e2e5c +.long 0x241c1c38,0x241c1c38 +.long 0xf1a6a657,0xf1a6a657 +.long 0xc7b4b473,0xc7b4b473 +.long 0x51c6c697,0x51c6c697 +.long 0x23e8e8cb,0x23e8e8cb +.long 0x7cdddda1,0x7cdddda1 +.long 0x9c7474e8,0x9c7474e8 +.long 0x211f1f3e,0x211f1f3e +.long 0xdd4b4b96,0xdd4b4b96 +.long 0xdcbdbd61,0xdcbdbd61 +.long 0x868b8b0d,0x868b8b0d +.long 0x858a8a0f,0x858a8a0f +.long 0x907070e0,0x907070e0 +.long 0x423e3e7c,0x423e3e7c +.long 0xc4b5b571,0xc4b5b571 +.long 0xaa6666cc,0xaa6666cc +.long 0xd8484890,0xd8484890 +.long 0x05030306,0x05030306 +.long 0x01f6f6f7,0x01f6f6f7 +.long 0x120e0e1c,0x120e0e1c +.long 0xa36161c2,0xa36161c2 +.long 0x5f35356a,0x5f35356a +.long 0xf95757ae,0xf95757ae +.long 0xd0b9b969,0xd0b9b969 +.long 0x91868617,0x91868617 +.long 0x58c1c199,0x58c1c199 +.long 0x271d1d3a,0x271d1d3a +.long 0xb99e9e27,0xb99e9e27 +.long 0x38e1e1d9,0x38e1e1d9 +.long 0x13f8f8eb,0x13f8f8eb +.long 0xb398982b,0xb398982b +.long 0x33111122,0x33111122 +.long 0xbb6969d2,0xbb6969d2 +.long 0x70d9d9a9,0x70d9d9a9 +.long 0x898e8e07,0x898e8e07 +.long 0xa7949433,0xa7949433 +.long 0xb69b9b2d,0xb69b9b2d +.long 0x221e1e3c,0x221e1e3c +.long 0x92878715,0x92878715 +.long 0x20e9e9c9,0x20e9e9c9 +.long 0x49cece87,0x49cece87 +.long 0xff5555aa,0xff5555aa +.long 0x78282850,0x78282850 +.long 0x7adfdfa5,0x7adfdfa5 +.long 0x8f8c8c03,0x8f8c8c03 +.long 0xf8a1a159,0xf8a1a159 +.long 0x80898909,0x80898909 +.long 0x170d0d1a,0x170d0d1a +.long 0xdabfbf65,0xdabfbf65 +.long 0x31e6e6d7,0x31e6e6d7 +.long 0xc6424284,0xc6424284 +.long 0xb86868d0,0xb86868d0 +.long 0xc3414182,0xc3414182 +.long 0xb0999929,0xb0999929 +.long 0x772d2d5a,0x772d2d5a +.long 0x110f0f1e,0x110f0f1e +.long 0xcbb0b07b,0xcbb0b07b +.long 0xfc5454a8,0xfc5454a8 +.long 0xd6bbbb6d,0xd6bbbb6d +.long 0x3a16162c,0x3a16162c +.byte 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5 +.byte 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76 +.byte 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0 +.byte 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0 +.byte 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc +.byte 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15 +.byte 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a +.byte 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75 +.byte 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0 +.byte 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84 +.byte 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b +.byte 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf +.byte 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85 +.byte 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8 +.byte 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5 +.byte 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2 +.byte 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17 +.byte 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73 +.byte 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88 +.byte 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb +.byte 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c +.byte 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79 +.byte 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9 +.byte 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08 +.byte 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6 +.byte 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a +.byte 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e +.byte 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e +.byte 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94 +.byte 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf +.byte 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68 +.byte 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16 +.byte 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5 +.byte 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76 +.byte 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0 +.byte 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0 +.byte 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc +.byte 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15 +.byte 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a +.byte 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75 +.byte 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0 +.byte 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84 +.byte 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b +.byte 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf +.byte 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85 +.byte 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8 +.byte 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5 +.byte 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2 +.byte 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17 +.byte 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73 +.byte 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88 +.byte 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb +.byte 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c +.byte 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79 +.byte 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9 +.byte 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08 +.byte 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6 +.byte 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a +.byte 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e +.byte 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e +.byte 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94 +.byte 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf +.byte 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68 +.byte 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16 +.byte 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5 +.byte 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76 +.byte 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0 +.byte 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0 +.byte 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc +.byte 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15 +.byte 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a +.byte 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75 +.byte 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0 +.byte 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84 +.byte 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b +.byte 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf +.byte 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85 +.byte 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8 +.byte 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5 +.byte 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2 +.byte 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17 +.byte 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73 +.byte 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88 +.byte 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb +.byte 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c +.byte 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79 +.byte 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9 +.byte 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08 +.byte 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6 +.byte 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a +.byte 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e +.byte 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e +.byte 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94 +.byte 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf +.byte 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68 +.byte 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16 +.byte 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5 +.byte 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76 +.byte 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0 +.byte 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0 +.byte 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc +.byte 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15 +.byte 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a +.byte 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75 +.byte 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0 +.byte 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84 +.byte 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b +.byte 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf +.byte 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85 +.byte 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8 +.byte 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5 +.byte 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2 +.byte 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17 +.byte 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73 +.byte 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88 +.byte 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb +.byte 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c +.byte 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79 +.byte 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9 +.byte 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08 +.byte 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6 +.byte 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a +.byte 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e +.byte 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e +.byte 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94 +.byte 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf +.byte 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68 +.byte 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16 +.long 0x00000001, 0x00000002, 0x00000004, 0x00000008 +.long 0x00000010, 0x00000020, 0x00000040, 0x00000080 +.long 0x0000001b, 0x00000036, 0x80808080, 0x80808080 +.long 0xfefefefe, 0xfefefefe, 0x1b1b1b1b, 0x1b1b1b1b +.p2align 6 +L$AES_Td: +.long 0x50a7f451,0x50a7f451 +.long 0x5365417e,0x5365417e +.long 0xc3a4171a,0xc3a4171a +.long 0x965e273a,0x965e273a +.long 0xcb6bab3b,0xcb6bab3b +.long 0xf1459d1f,0xf1459d1f +.long 0xab58faac,0xab58faac +.long 0x9303e34b,0x9303e34b +.long 0x55fa3020,0x55fa3020 +.long 0xf66d76ad,0xf66d76ad +.long 0x9176cc88,0x9176cc88 +.long 0x254c02f5,0x254c02f5 +.long 0xfcd7e54f,0xfcd7e54f +.long 0xd7cb2ac5,0xd7cb2ac5 +.long 0x80443526,0x80443526 +.long 0x8fa362b5,0x8fa362b5 +.long 0x495ab1de,0x495ab1de +.long 0x671bba25,0x671bba25 +.long 0x980eea45,0x980eea45 +.long 0xe1c0fe5d,0xe1c0fe5d +.long 0x02752fc3,0x02752fc3 +.long 0x12f04c81,0x12f04c81 +.long 0xa397468d,0xa397468d +.long 0xc6f9d36b,0xc6f9d36b +.long 0xe75f8f03,0xe75f8f03 +.long 0x959c9215,0x959c9215 +.long 0xeb7a6dbf,0xeb7a6dbf +.long 0xda595295,0xda595295 +.long 0x2d83bed4,0x2d83bed4 +.long 0xd3217458,0xd3217458 +.long 0x2969e049,0x2969e049 +.long 0x44c8c98e,0x44c8c98e +.long 0x6a89c275,0x6a89c275 +.long 0x78798ef4,0x78798ef4 +.long 0x6b3e5899,0x6b3e5899 +.long 0xdd71b927,0xdd71b927 +.long 0xb64fe1be,0xb64fe1be +.long 0x17ad88f0,0x17ad88f0 +.long 0x66ac20c9,0x66ac20c9 +.long 0xb43ace7d,0xb43ace7d +.long 0x184adf63,0x184adf63 +.long 0x82311ae5,0x82311ae5 +.long 0x60335197,0x60335197 +.long 0x457f5362,0x457f5362 +.long 0xe07764b1,0xe07764b1 +.long 0x84ae6bbb,0x84ae6bbb +.long 0x1ca081fe,0x1ca081fe +.long 0x942b08f9,0x942b08f9 +.long 0x58684870,0x58684870 +.long 0x19fd458f,0x19fd458f +.long 0x876cde94,0x876cde94 +.long 0xb7f87b52,0xb7f87b52 +.long 0x23d373ab,0x23d373ab +.long 0xe2024b72,0xe2024b72 +.long 0x578f1fe3,0x578f1fe3 +.long 0x2aab5566,0x2aab5566 +.long 0x0728ebb2,0x0728ebb2 +.long 0x03c2b52f,0x03c2b52f +.long 0x9a7bc586,0x9a7bc586 +.long 0xa50837d3,0xa50837d3 +.long 0xf2872830,0xf2872830 +.long 0xb2a5bf23,0xb2a5bf23 +.long 0xba6a0302,0xba6a0302 +.long 0x5c8216ed,0x5c8216ed +.long 0x2b1ccf8a,0x2b1ccf8a +.long 0x92b479a7,0x92b479a7 +.long 0xf0f207f3,0xf0f207f3 +.long 0xa1e2694e,0xa1e2694e +.long 0xcdf4da65,0xcdf4da65 +.long 0xd5be0506,0xd5be0506 +.long 0x1f6234d1,0x1f6234d1 +.long 0x8afea6c4,0x8afea6c4 +.long 0x9d532e34,0x9d532e34 +.long 0xa055f3a2,0xa055f3a2 +.long 0x32e18a05,0x32e18a05 +.long 0x75ebf6a4,0x75ebf6a4 +.long 0x39ec830b,0x39ec830b +.long 0xaaef6040,0xaaef6040 +.long 0x069f715e,0x069f715e +.long 0x51106ebd,0x51106ebd +.long 0xf98a213e,0xf98a213e +.long 0x3d06dd96,0x3d06dd96 +.long 0xae053edd,0xae053edd +.long 0x46bde64d,0x46bde64d +.long 0xb58d5491,0xb58d5491 +.long 0x055dc471,0x055dc471 +.long 0x6fd40604,0x6fd40604 +.long 0xff155060,0xff155060 +.long 0x24fb9819,0x24fb9819 +.long 0x97e9bdd6,0x97e9bdd6 +.long 0xcc434089,0xcc434089 +.long 0x779ed967,0x779ed967 +.long 0xbd42e8b0,0xbd42e8b0 +.long 0x888b8907,0x888b8907 +.long 0x385b19e7,0x385b19e7 +.long 0xdbeec879,0xdbeec879 +.long 0x470a7ca1,0x470a7ca1 +.long 0xe90f427c,0xe90f427c +.long 0xc91e84f8,0xc91e84f8 +.long 0x00000000,0x00000000 +.long 0x83868009,0x83868009 +.long 0x48ed2b32,0x48ed2b32 +.long 0xac70111e,0xac70111e +.long 0x4e725a6c,0x4e725a6c +.long 0xfbff0efd,0xfbff0efd +.long 0x5638850f,0x5638850f +.long 0x1ed5ae3d,0x1ed5ae3d +.long 0x27392d36,0x27392d36 +.long 0x64d90f0a,0x64d90f0a +.long 0x21a65c68,0x21a65c68 +.long 0xd1545b9b,0xd1545b9b +.long 0x3a2e3624,0x3a2e3624 +.long 0xb1670a0c,0xb1670a0c +.long 0x0fe75793,0x0fe75793 +.long 0xd296eeb4,0xd296eeb4 +.long 0x9e919b1b,0x9e919b1b +.long 0x4fc5c080,0x4fc5c080 +.long 0xa220dc61,0xa220dc61 +.long 0x694b775a,0x694b775a +.long 0x161a121c,0x161a121c +.long 0x0aba93e2,0x0aba93e2 +.long 0xe52aa0c0,0xe52aa0c0 +.long 0x43e0223c,0x43e0223c +.long 0x1d171b12,0x1d171b12 +.long 0x0b0d090e,0x0b0d090e +.long 0xadc78bf2,0xadc78bf2 +.long 0xb9a8b62d,0xb9a8b62d +.long 0xc8a91e14,0xc8a91e14 +.long 0x8519f157,0x8519f157 +.long 0x4c0775af,0x4c0775af +.long 0xbbdd99ee,0xbbdd99ee +.long 0xfd607fa3,0xfd607fa3 +.long 0x9f2601f7,0x9f2601f7 +.long 0xbcf5725c,0xbcf5725c +.long 0xc53b6644,0xc53b6644 +.long 0x347efb5b,0x347efb5b +.long 0x7629438b,0x7629438b +.long 0xdcc623cb,0xdcc623cb +.long 0x68fcedb6,0x68fcedb6 +.long 0x63f1e4b8,0x63f1e4b8 +.long 0xcadc31d7,0xcadc31d7 +.long 0x10856342,0x10856342 +.long 0x40229713,0x40229713 +.long 0x2011c684,0x2011c684 +.long 0x7d244a85,0x7d244a85 +.long 0xf83dbbd2,0xf83dbbd2 +.long 0x1132f9ae,0x1132f9ae +.long 0x6da129c7,0x6da129c7 +.long 0x4b2f9e1d,0x4b2f9e1d +.long 0xf330b2dc,0xf330b2dc +.long 0xec52860d,0xec52860d +.long 0xd0e3c177,0xd0e3c177 +.long 0x6c16b32b,0x6c16b32b +.long 0x99b970a9,0x99b970a9 +.long 0xfa489411,0xfa489411 +.long 0x2264e947,0x2264e947 +.long 0xc48cfca8,0xc48cfca8 +.long 0x1a3ff0a0,0x1a3ff0a0 +.long 0xd82c7d56,0xd82c7d56 +.long 0xef903322,0xef903322 +.long 0xc74e4987,0xc74e4987 +.long 0xc1d138d9,0xc1d138d9 +.long 0xfea2ca8c,0xfea2ca8c +.long 0x360bd498,0x360bd498 +.long 0xcf81f5a6,0xcf81f5a6 +.long 0x28de7aa5,0x28de7aa5 +.long 0x268eb7da,0x268eb7da +.long 0xa4bfad3f,0xa4bfad3f +.long 0xe49d3a2c,0xe49d3a2c +.long 0x0d927850,0x0d927850 +.long 0x9bcc5f6a,0x9bcc5f6a +.long 0x62467e54,0x62467e54 +.long 0xc2138df6,0xc2138df6 +.long 0xe8b8d890,0xe8b8d890 +.long 0x5ef7392e,0x5ef7392e +.long 0xf5afc382,0xf5afc382 +.long 0xbe805d9f,0xbe805d9f +.long 0x7c93d069,0x7c93d069 +.long 0xa92dd56f,0xa92dd56f +.long 0xb31225cf,0xb31225cf +.long 0x3b99acc8,0x3b99acc8 +.long 0xa77d1810,0xa77d1810 +.long 0x6e639ce8,0x6e639ce8 +.long 0x7bbb3bdb,0x7bbb3bdb +.long 0x097826cd,0x097826cd +.long 0xf418596e,0xf418596e +.long 0x01b79aec,0x01b79aec +.long 0xa89a4f83,0xa89a4f83 +.long 0x656e95e6,0x656e95e6 +.long 0x7ee6ffaa,0x7ee6ffaa +.long 0x08cfbc21,0x08cfbc21 +.long 0xe6e815ef,0xe6e815ef +.long 0xd99be7ba,0xd99be7ba +.long 0xce366f4a,0xce366f4a +.long 0xd4099fea,0xd4099fea +.long 0xd67cb029,0xd67cb029 +.long 0xafb2a431,0xafb2a431 +.long 0x31233f2a,0x31233f2a +.long 0x3094a5c6,0x3094a5c6 +.long 0xc066a235,0xc066a235 +.long 0x37bc4e74,0x37bc4e74 +.long 0xa6ca82fc,0xa6ca82fc +.long 0xb0d090e0,0xb0d090e0 +.long 0x15d8a733,0x15d8a733 +.long 0x4a9804f1,0x4a9804f1 +.long 0xf7daec41,0xf7daec41 +.long 0x0e50cd7f,0x0e50cd7f +.long 0x2ff69117,0x2ff69117 +.long 0x8dd64d76,0x8dd64d76 +.long 0x4db0ef43,0x4db0ef43 +.long 0x544daacc,0x544daacc +.long 0xdf0496e4,0xdf0496e4 +.long 0xe3b5d19e,0xe3b5d19e +.long 0x1b886a4c,0x1b886a4c +.long 0xb81f2cc1,0xb81f2cc1 +.long 0x7f516546,0x7f516546 +.long 0x04ea5e9d,0x04ea5e9d +.long 0x5d358c01,0x5d358c01 +.long 0x737487fa,0x737487fa +.long 0x2e410bfb,0x2e410bfb +.long 0x5a1d67b3,0x5a1d67b3 +.long 0x52d2db92,0x52d2db92 +.long 0x335610e9,0x335610e9 +.long 0x1347d66d,0x1347d66d +.long 0x8c61d79a,0x8c61d79a +.long 0x7a0ca137,0x7a0ca137 +.long 0x8e14f859,0x8e14f859 +.long 0x893c13eb,0x893c13eb +.long 0xee27a9ce,0xee27a9ce +.long 0x35c961b7,0x35c961b7 +.long 0xede51ce1,0xede51ce1 +.long 0x3cb1477a,0x3cb1477a +.long 0x59dfd29c,0x59dfd29c +.long 0x3f73f255,0x3f73f255 +.long 0x79ce1418,0x79ce1418 +.long 0xbf37c773,0xbf37c773 +.long 0xeacdf753,0xeacdf753 +.long 0x5baafd5f,0x5baafd5f +.long 0x146f3ddf,0x146f3ddf +.long 0x86db4478,0x86db4478 +.long 0x81f3afca,0x81f3afca +.long 0x3ec468b9,0x3ec468b9 +.long 0x2c342438,0x2c342438 +.long 0x5f40a3c2,0x5f40a3c2 +.long 0x72c31d16,0x72c31d16 +.long 0x0c25e2bc,0x0c25e2bc +.long 0x8b493c28,0x8b493c28 +.long 0x41950dff,0x41950dff +.long 0x7101a839,0x7101a839 +.long 0xdeb30c08,0xdeb30c08 +.long 0x9ce4b4d8,0x9ce4b4d8 +.long 0x90c15664,0x90c15664 +.long 0x6184cb7b,0x6184cb7b +.long 0x70b632d5,0x70b632d5 +.long 0x745c6c48,0x745c6c48 +.long 0x4257b8d0,0x4257b8d0 +.byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38 +.byte 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb +.byte 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87 +.byte 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb +.byte 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d +.byte 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e +.byte 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2 +.byte 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25 +.byte 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16 +.byte 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92 +.byte 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda +.byte 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84 +.byte 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a +.byte 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06 +.byte 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02 +.byte 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b +.byte 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea +.byte 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73 +.byte 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85 +.byte 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e +.byte 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89 +.byte 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b +.byte 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20 +.byte 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4 +.byte 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31 +.byte 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f +.byte 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d +.byte 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef +.byte 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0 +.byte 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61 +.byte 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26 +.byte 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d +.long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe +.long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0 +.byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38 +.byte 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb +.byte 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87 +.byte 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb +.byte 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d +.byte 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e +.byte 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2 +.byte 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25 +.byte 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16 +.byte 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92 +.byte 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda +.byte 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84 +.byte 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a +.byte 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06 +.byte 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02 +.byte 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b +.byte 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea +.byte 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73 +.byte 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85 +.byte 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e +.byte 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89 +.byte 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b +.byte 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20 +.byte 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4 +.byte 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31 +.byte 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f +.byte 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d +.byte 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef +.byte 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0 +.byte 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61 +.byte 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26 +.byte 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d +.long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe +.long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0 +.byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38 +.byte 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb +.byte 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87 +.byte 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb +.byte 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d +.byte 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e +.byte 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2 +.byte 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25 +.byte 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16 +.byte 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92 +.byte 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda +.byte 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84 +.byte 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a +.byte 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06 +.byte 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02 +.byte 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b +.byte 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea +.byte 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73 +.byte 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85 +.byte 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e +.byte 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89 +.byte 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b +.byte 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20 +.byte 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4 +.byte 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31 +.byte 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f +.byte 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d +.byte 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef +.byte 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0 +.byte 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61 +.byte 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26 +.byte 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d +.long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe +.long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0 +.byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38 +.byte 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb +.byte 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87 +.byte 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb +.byte 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d +.byte 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e +.byte 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2 +.byte 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25 +.byte 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16 +.byte 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92 +.byte 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda +.byte 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84 +.byte 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a +.byte 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06 +.byte 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02 +.byte 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b +.byte 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea +.byte 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73 +.byte 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85 +.byte 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e +.byte 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89 +.byte 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b +.byte 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20 +.byte 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4 +.byte 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31 +.byte 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f +.byte 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d +.byte 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef +.byte 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0 +.byte 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61 +.byte 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26 +.byte 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d +.long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe +.long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0 +.byte 65,69,83,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.p2align 6 +#endif diff --git a/third_party/boringssl/mac-x86_64/crypto/aes/aesni-x86_64.S b/third_party/boringssl/mac-x86_64/crypto/aes/aesni-x86_64.S new file mode 100644 index 00000000000..69b22c26b93 --- /dev/null +++ b/third_party/boringssl/mac-x86_64/crypto/aes/aesni-x86_64.S @@ -0,0 +1,3564 @@ +#if defined(__x86_64__) +.text + +.globl _aesni_encrypt +.private_extern _aesni_encrypt + +.p2align 4 +_aesni_encrypt: + movups (%rdi),%xmm2 + movl 240(%rdx),%eax + movups (%rdx),%xmm0 + movups 16(%rdx),%xmm1 + leaq 32(%rdx),%rdx + xorps %xmm0,%xmm2 +L$oop_enc1_1: +.byte 102,15,56,220,209 + decl %eax + movups (%rdx),%xmm1 + leaq 16(%rdx),%rdx + jnz L$oop_enc1_1 +.byte 102,15,56,221,209 + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 + .byte 0xf3,0xc3 + + +.globl _aesni_decrypt +.private_extern _aesni_decrypt + +.p2align 4 +_aesni_decrypt: + movups (%rdi),%xmm2 + movl 240(%rdx),%eax + movups (%rdx),%xmm0 + movups 16(%rdx),%xmm1 + leaq 32(%rdx),%rdx + xorps %xmm0,%xmm2 +L$oop_dec1_2: +.byte 102,15,56,222,209 + decl %eax + movups (%rdx),%xmm1 + leaq 16(%rdx),%rdx + jnz L$oop_dec1_2 +.byte 102,15,56,223,209 + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 + .byte 0xf3,0xc3 + + +.p2align 4 +_aesni_encrypt2: + movups (%rcx),%xmm0 + shll $4,%eax + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + movups 32(%rcx),%xmm0 + leaq 32(%rcx,%rax,1),%rcx + negq %rax + addq $16,%rax + +L$enc_loop2: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + movups -16(%rcx,%rax,1),%xmm0 + jnz L$enc_loop2 + +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 + .byte 0xf3,0xc3 + + +.p2align 4 +_aesni_decrypt2: + movups (%rcx),%xmm0 + shll $4,%eax + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + movups 32(%rcx),%xmm0 + leaq 32(%rcx,%rax,1),%rcx + negq %rax + addq $16,%rax + +L$dec_loop2: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 + movups -16(%rcx,%rax,1),%xmm0 + jnz L$dec_loop2 + +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 + .byte 0xf3,0xc3 + + +.p2align 4 +_aesni_encrypt3: + movups (%rcx),%xmm0 + shll $4,%eax + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + xorps %xmm0,%xmm4 + movups 32(%rcx),%xmm0 + leaq 32(%rcx,%rax,1),%rcx + negq %rax + addq $16,%rax + +L$enc_loop3: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 + movups -16(%rcx,%rax,1),%xmm0 + jnz L$enc_loop3 + +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 +.byte 102,15,56,221,224 + .byte 0xf3,0xc3 + + +.p2align 4 +_aesni_decrypt3: + movups (%rcx),%xmm0 + shll $4,%eax + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + xorps %xmm0,%xmm4 + movups 32(%rcx),%xmm0 + leaq 32(%rcx,%rax,1),%rcx + negq %rax + addq $16,%rax + +L$dec_loop3: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 + movups -16(%rcx,%rax,1),%xmm0 + jnz L$dec_loop3 + +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 +.byte 102,15,56,223,224 + .byte 0xf3,0xc3 + + +.p2align 4 +_aesni_encrypt4: + movups (%rcx),%xmm0 + shll $4,%eax + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + xorps %xmm0,%xmm4 + xorps %xmm0,%xmm5 + movups 32(%rcx),%xmm0 + leaq 32(%rcx,%rax,1),%rcx + negq %rax +.byte 0x0f,0x1f,0x00 + addq $16,%rax + +L$enc_loop4: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + movups -16(%rcx,%rax,1),%xmm0 + jnz L$enc_loop4 + +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 +.byte 102,15,56,221,224 +.byte 102,15,56,221,232 + .byte 0xf3,0xc3 + + +.p2align 4 +_aesni_decrypt4: + movups (%rcx),%xmm0 + shll $4,%eax + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + xorps %xmm0,%xmm4 + xorps %xmm0,%xmm5 + movups 32(%rcx),%xmm0 + leaq 32(%rcx,%rax,1),%rcx + negq %rax +.byte 0x0f,0x1f,0x00 + addq $16,%rax + +L$dec_loop4: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 + movups -16(%rcx,%rax,1),%xmm0 + jnz L$dec_loop4 + +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 +.byte 102,15,56,223,224 +.byte 102,15,56,223,232 + .byte 0xf3,0xc3 + + +.p2align 4 +_aesni_encrypt6: + movups (%rcx),%xmm0 + shll $4,%eax + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + pxor %xmm0,%xmm4 +.byte 102,15,56,220,209 + leaq 32(%rcx,%rax,1),%rcx + negq %rax +.byte 102,15,56,220,217 + pxor %xmm0,%xmm5 + pxor %xmm0,%xmm6 +.byte 102,15,56,220,225 + pxor %xmm0,%xmm7 + movups (%rcx,%rax,1),%xmm0 + addq $16,%rax + jmp L$enc_loop6_enter +.p2align 4 +L$enc_loop6: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +L$enc_loop6_enter: +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 + movups -16(%rcx,%rax,1),%xmm0 + jnz L$enc_loop6 + +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 +.byte 102,15,56,221,224 +.byte 102,15,56,221,232 +.byte 102,15,56,221,240 +.byte 102,15,56,221,248 + .byte 0xf3,0xc3 + + +.p2align 4 +_aesni_decrypt6: + movups (%rcx),%xmm0 + shll $4,%eax + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + pxor %xmm0,%xmm4 +.byte 102,15,56,222,209 + leaq 32(%rcx,%rax,1),%rcx + negq %rax +.byte 102,15,56,222,217 + pxor %xmm0,%xmm5 + pxor %xmm0,%xmm6 +.byte 102,15,56,222,225 + pxor %xmm0,%xmm7 + movups (%rcx,%rax,1),%xmm0 + addq $16,%rax + jmp L$dec_loop6_enter +.p2align 4 +L$dec_loop6: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +L$dec_loop6_enter: +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 + movups -16(%rcx,%rax,1),%xmm0 + jnz L$dec_loop6 + +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 +.byte 102,15,56,223,224 +.byte 102,15,56,223,232 +.byte 102,15,56,223,240 +.byte 102,15,56,223,248 + .byte 0xf3,0xc3 + + +.p2align 4 +_aesni_encrypt8: + movups (%rcx),%xmm0 + shll $4,%eax + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + pxor %xmm0,%xmm4 + pxor %xmm0,%xmm5 + pxor %xmm0,%xmm6 + leaq 32(%rcx,%rax,1),%rcx + negq %rax +.byte 102,15,56,220,209 + pxor %xmm0,%xmm7 + pxor %xmm0,%xmm8 +.byte 102,15,56,220,217 + pxor %xmm0,%xmm9 + movups (%rcx,%rax,1),%xmm0 + addq $16,%rax + jmp L$enc_loop8_inner +.p2align 4 +L$enc_loop8: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +L$enc_loop8_inner: +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 +L$enc_loop8_enter: + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 +.byte 102,68,15,56,220,192 +.byte 102,68,15,56,220,200 + movups -16(%rcx,%rax,1),%xmm0 + jnz L$enc_loop8 + +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 +.byte 102,15,56,221,224 +.byte 102,15,56,221,232 +.byte 102,15,56,221,240 +.byte 102,15,56,221,248 +.byte 102,68,15,56,221,192 +.byte 102,68,15,56,221,200 + .byte 0xf3,0xc3 + + +.p2align 4 +_aesni_decrypt8: + movups (%rcx),%xmm0 + shll $4,%eax + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + pxor %xmm0,%xmm4 + pxor %xmm0,%xmm5 + pxor %xmm0,%xmm6 + leaq 32(%rcx,%rax,1),%rcx + negq %rax +.byte 102,15,56,222,209 + pxor %xmm0,%xmm7 + pxor %xmm0,%xmm8 +.byte 102,15,56,222,217 + pxor %xmm0,%xmm9 + movups (%rcx,%rax,1),%xmm0 + addq $16,%rax + jmp L$dec_loop8_inner +.p2align 4 +L$dec_loop8: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +L$dec_loop8_inner: +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,68,15,56,222,193 +.byte 102,68,15,56,222,201 +L$dec_loop8_enter: + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 +.byte 102,68,15,56,222,192 +.byte 102,68,15,56,222,200 + movups -16(%rcx,%rax,1),%xmm0 + jnz L$dec_loop8 + +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,68,15,56,222,193 +.byte 102,68,15,56,222,201 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 +.byte 102,15,56,223,224 +.byte 102,15,56,223,232 +.byte 102,15,56,223,240 +.byte 102,15,56,223,248 +.byte 102,68,15,56,223,192 +.byte 102,68,15,56,223,200 + .byte 0xf3,0xc3 + +.globl _aesni_ecb_encrypt +.private_extern _aesni_ecb_encrypt + +.p2align 4 +_aesni_ecb_encrypt: + andq $-16,%rdx + jz L$ecb_ret + + movl 240(%rcx),%eax + movups (%rcx),%xmm0 + movq %rcx,%r11 + movl %eax,%r10d + testl %r8d,%r8d + jz L$ecb_decrypt + + cmpq $128,%rdx + jb L$ecb_enc_tail + + movdqu (%rdi),%xmm2 + movdqu 16(%rdi),%xmm3 + movdqu 32(%rdi),%xmm4 + movdqu 48(%rdi),%xmm5 + movdqu 64(%rdi),%xmm6 + movdqu 80(%rdi),%xmm7 + movdqu 96(%rdi),%xmm8 + movdqu 112(%rdi),%xmm9 + leaq 128(%rdi),%rdi + subq $128,%rdx + jmp L$ecb_enc_loop8_enter +.p2align 4 +L$ecb_enc_loop8: + movups %xmm2,(%rsi) + movq %r11,%rcx + movdqu (%rdi),%xmm2 + movl %r10d,%eax + movups %xmm3,16(%rsi) + movdqu 16(%rdi),%xmm3 + movups %xmm4,32(%rsi) + movdqu 32(%rdi),%xmm4 + movups %xmm5,48(%rsi) + movdqu 48(%rdi),%xmm5 + movups %xmm6,64(%rsi) + movdqu 64(%rdi),%xmm6 + movups %xmm7,80(%rsi) + movdqu 80(%rdi),%xmm7 + movups %xmm8,96(%rsi) + movdqu 96(%rdi),%xmm8 + movups %xmm9,112(%rsi) + leaq 128(%rsi),%rsi + movdqu 112(%rdi),%xmm9 + leaq 128(%rdi),%rdi +L$ecb_enc_loop8_enter: + + call _aesni_encrypt8 + + subq $128,%rdx + jnc L$ecb_enc_loop8 + + movups %xmm2,(%rsi) + movq %r11,%rcx + movups %xmm3,16(%rsi) + movl %r10d,%eax + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + movups %xmm8,96(%rsi) + movups %xmm9,112(%rsi) + leaq 128(%rsi),%rsi + addq $128,%rdx + jz L$ecb_ret + +L$ecb_enc_tail: + movups (%rdi),%xmm2 + cmpq $32,%rdx + jb L$ecb_enc_one + movups 16(%rdi),%xmm3 + je L$ecb_enc_two + movups 32(%rdi),%xmm4 + cmpq $64,%rdx + jb L$ecb_enc_three + movups 48(%rdi),%xmm5 + je L$ecb_enc_four + movups 64(%rdi),%xmm6 + cmpq $96,%rdx + jb L$ecb_enc_five + movups 80(%rdi),%xmm7 + je L$ecb_enc_six + movdqu 96(%rdi),%xmm8 + xorps %xmm9,%xmm9 + call _aesni_encrypt8 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + movups %xmm8,96(%rsi) + jmp L$ecb_ret +.p2align 4 +L$ecb_enc_one: + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +L$oop_enc1_3: +.byte 102,15,56,220,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz L$oop_enc1_3 +.byte 102,15,56,221,209 + movups %xmm2,(%rsi) + jmp L$ecb_ret +.p2align 4 +L$ecb_enc_two: + call _aesni_encrypt2 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + jmp L$ecb_ret +.p2align 4 +L$ecb_enc_three: + call _aesni_encrypt3 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + jmp L$ecb_ret +.p2align 4 +L$ecb_enc_four: + call _aesni_encrypt4 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + jmp L$ecb_ret +.p2align 4 +L$ecb_enc_five: + xorps %xmm7,%xmm7 + call _aesni_encrypt6 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + jmp L$ecb_ret +.p2align 4 +L$ecb_enc_six: + call _aesni_encrypt6 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + jmp L$ecb_ret + +.p2align 4 +L$ecb_decrypt: + cmpq $128,%rdx + jb L$ecb_dec_tail + + movdqu (%rdi),%xmm2 + movdqu 16(%rdi),%xmm3 + movdqu 32(%rdi),%xmm4 + movdqu 48(%rdi),%xmm5 + movdqu 64(%rdi),%xmm6 + movdqu 80(%rdi),%xmm7 + movdqu 96(%rdi),%xmm8 + movdqu 112(%rdi),%xmm9 + leaq 128(%rdi),%rdi + subq $128,%rdx + jmp L$ecb_dec_loop8_enter +.p2align 4 +L$ecb_dec_loop8: + movups %xmm2,(%rsi) + movq %r11,%rcx + movdqu (%rdi),%xmm2 + movl %r10d,%eax + movups %xmm3,16(%rsi) + movdqu 16(%rdi),%xmm3 + movups %xmm4,32(%rsi) + movdqu 32(%rdi),%xmm4 + movups %xmm5,48(%rsi) + movdqu 48(%rdi),%xmm5 + movups %xmm6,64(%rsi) + movdqu 64(%rdi),%xmm6 + movups %xmm7,80(%rsi) + movdqu 80(%rdi),%xmm7 + movups %xmm8,96(%rsi) + movdqu 96(%rdi),%xmm8 + movups %xmm9,112(%rsi) + leaq 128(%rsi),%rsi + movdqu 112(%rdi),%xmm9 + leaq 128(%rdi),%rdi +L$ecb_dec_loop8_enter: + + call _aesni_decrypt8 + + movups (%r11),%xmm0 + subq $128,%rdx + jnc L$ecb_dec_loop8 + + movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 + movq %r11,%rcx + movups %xmm3,16(%rsi) + pxor %xmm3,%xmm3 + movl %r10d,%eax + movups %xmm4,32(%rsi) + pxor %xmm4,%xmm4 + movups %xmm5,48(%rsi) + pxor %xmm5,%xmm5 + movups %xmm6,64(%rsi) + pxor %xmm6,%xmm6 + movups %xmm7,80(%rsi) + pxor %xmm7,%xmm7 + movups %xmm8,96(%rsi) + pxor %xmm8,%xmm8 + movups %xmm9,112(%rsi) + pxor %xmm9,%xmm9 + leaq 128(%rsi),%rsi + addq $128,%rdx + jz L$ecb_ret + +L$ecb_dec_tail: + movups (%rdi),%xmm2 + cmpq $32,%rdx + jb L$ecb_dec_one + movups 16(%rdi),%xmm3 + je L$ecb_dec_two + movups 32(%rdi),%xmm4 + cmpq $64,%rdx + jb L$ecb_dec_three + movups 48(%rdi),%xmm5 + je L$ecb_dec_four + movups 64(%rdi),%xmm6 + cmpq $96,%rdx + jb L$ecb_dec_five + movups 80(%rdi),%xmm7 + je L$ecb_dec_six + movups 96(%rdi),%xmm8 + movups (%rcx),%xmm0 + xorps %xmm9,%xmm9 + call _aesni_decrypt8 + movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 + movups %xmm3,16(%rsi) + pxor %xmm3,%xmm3 + movups %xmm4,32(%rsi) + pxor %xmm4,%xmm4 + movups %xmm5,48(%rsi) + pxor %xmm5,%xmm5 + movups %xmm6,64(%rsi) + pxor %xmm6,%xmm6 + movups %xmm7,80(%rsi) + pxor %xmm7,%xmm7 + movups %xmm8,96(%rsi) + pxor %xmm8,%xmm8 + pxor %xmm9,%xmm9 + jmp L$ecb_ret +.p2align 4 +L$ecb_dec_one: + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +L$oop_dec1_4: +.byte 102,15,56,222,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz L$oop_dec1_4 +.byte 102,15,56,223,209 + movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 + jmp L$ecb_ret +.p2align 4 +L$ecb_dec_two: + call _aesni_decrypt2 + movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 + movups %xmm3,16(%rsi) + pxor %xmm3,%xmm3 + jmp L$ecb_ret +.p2align 4 +L$ecb_dec_three: + call _aesni_decrypt3 + movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 + movups %xmm3,16(%rsi) + pxor %xmm3,%xmm3 + movups %xmm4,32(%rsi) + pxor %xmm4,%xmm4 + jmp L$ecb_ret +.p2align 4 +L$ecb_dec_four: + call _aesni_decrypt4 + movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 + movups %xmm3,16(%rsi) + pxor %xmm3,%xmm3 + movups %xmm4,32(%rsi) + pxor %xmm4,%xmm4 + movups %xmm5,48(%rsi) + pxor %xmm5,%xmm5 + jmp L$ecb_ret +.p2align 4 +L$ecb_dec_five: + xorps %xmm7,%xmm7 + call _aesni_decrypt6 + movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 + movups %xmm3,16(%rsi) + pxor %xmm3,%xmm3 + movups %xmm4,32(%rsi) + pxor %xmm4,%xmm4 + movups %xmm5,48(%rsi) + pxor %xmm5,%xmm5 + movups %xmm6,64(%rsi) + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + jmp L$ecb_ret +.p2align 4 +L$ecb_dec_six: + call _aesni_decrypt6 + movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 + movups %xmm3,16(%rsi) + pxor %xmm3,%xmm3 + movups %xmm4,32(%rsi) + pxor %xmm4,%xmm4 + movups %xmm5,48(%rsi) + pxor %xmm5,%xmm5 + movups %xmm6,64(%rsi) + pxor %xmm6,%xmm6 + movups %xmm7,80(%rsi) + pxor %xmm7,%xmm7 + +L$ecb_ret: + xorps %xmm0,%xmm0 + pxor %xmm1,%xmm1 + .byte 0xf3,0xc3 + +.globl _aesni_ccm64_encrypt_blocks +.private_extern _aesni_ccm64_encrypt_blocks + +.p2align 4 +_aesni_ccm64_encrypt_blocks: + movl 240(%rcx),%eax + movdqu (%r8),%xmm6 + movdqa L$increment64(%rip),%xmm9 + movdqa L$bswap_mask(%rip),%xmm7 + + shll $4,%eax + movl $16,%r10d + leaq 0(%rcx),%r11 + movdqu (%r9),%xmm3 + movdqa %xmm6,%xmm2 + leaq 32(%rcx,%rax,1),%rcx +.byte 102,15,56,0,247 + subq %rax,%r10 + jmp L$ccm64_enc_outer +.p2align 4 +L$ccm64_enc_outer: + movups (%r11),%xmm0 + movq %r10,%rax + movups (%rdi),%xmm8 + + xorps %xmm0,%xmm2 + movups 16(%r11),%xmm1 + xorps %xmm8,%xmm0 + xorps %xmm0,%xmm3 + movups 32(%r11),%xmm0 + +L$ccm64_enc2_loop: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + movups -16(%rcx,%rax,1),%xmm0 + jnz L$ccm64_enc2_loop +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + paddq %xmm9,%xmm6 + decq %rdx +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 + + leaq 16(%rdi),%rdi + xorps %xmm2,%xmm8 + movdqa %xmm6,%xmm2 + movups %xmm8,(%rsi) +.byte 102,15,56,0,215 + leaq 16(%rsi),%rsi + jnz L$ccm64_enc_outer + + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + movups %xmm3,(%r9) + pxor %xmm3,%xmm3 + pxor %xmm8,%xmm8 + pxor %xmm6,%xmm6 + .byte 0xf3,0xc3 + +.globl _aesni_ccm64_decrypt_blocks +.private_extern _aesni_ccm64_decrypt_blocks + +.p2align 4 +_aesni_ccm64_decrypt_blocks: + movl 240(%rcx),%eax + movups (%r8),%xmm6 + movdqu (%r9),%xmm3 + movdqa L$increment64(%rip),%xmm9 + movdqa L$bswap_mask(%rip),%xmm7 + + movaps %xmm6,%xmm2 + movl %eax,%r10d + movq %rcx,%r11 +.byte 102,15,56,0,247 + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +L$oop_enc1_5: +.byte 102,15,56,220,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz L$oop_enc1_5 +.byte 102,15,56,221,209 + shll $4,%r10d + movl $16,%eax + movups (%rdi),%xmm8 + paddq %xmm9,%xmm6 + leaq 16(%rdi),%rdi + subq %r10,%rax + leaq 32(%r11,%r10,1),%rcx + movq %rax,%r10 + jmp L$ccm64_dec_outer +.p2align 4 +L$ccm64_dec_outer: + xorps %xmm2,%xmm8 + movdqa %xmm6,%xmm2 + movups %xmm8,(%rsi) + leaq 16(%rsi),%rsi +.byte 102,15,56,0,215 + + subq $1,%rdx + jz L$ccm64_dec_break + + movups (%r11),%xmm0 + movq %r10,%rax + movups 16(%r11),%xmm1 + xorps %xmm0,%xmm8 + xorps %xmm0,%xmm2 + xorps %xmm8,%xmm3 + movups 32(%r11),%xmm0 + jmp L$ccm64_dec2_loop +.p2align 4 +L$ccm64_dec2_loop: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + movups -16(%rcx,%rax,1),%xmm0 + jnz L$ccm64_dec2_loop + movups (%rdi),%xmm8 + paddq %xmm9,%xmm6 +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 + leaq 16(%rdi),%rdi + jmp L$ccm64_dec_outer + +.p2align 4 +L$ccm64_dec_break: + + movl 240(%r11),%eax + movups (%r11),%xmm0 + movups 16(%r11),%xmm1 + xorps %xmm0,%xmm8 + leaq 32(%r11),%r11 + xorps %xmm8,%xmm3 +L$oop_enc1_6: +.byte 102,15,56,220,217 + decl %eax + movups (%r11),%xmm1 + leaq 16(%r11),%r11 + jnz L$oop_enc1_6 +.byte 102,15,56,221,217 + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + movups %xmm3,(%r9) + pxor %xmm3,%xmm3 + pxor %xmm8,%xmm8 + pxor %xmm6,%xmm6 + .byte 0xf3,0xc3 + +.globl _aesni_ctr32_encrypt_blocks +.private_extern _aesni_ctr32_encrypt_blocks + +.p2align 4 +_aesni_ctr32_encrypt_blocks: + cmpq $1,%rdx + jne L$ctr32_bulk + + + + movups (%r8),%xmm2 + movups (%rdi),%xmm3 + movl 240(%rcx),%edx + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +L$oop_enc1_7: +.byte 102,15,56,220,209 + decl %edx + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz L$oop_enc1_7 +.byte 102,15,56,221,209 + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + xorps %xmm3,%xmm2 + pxor %xmm3,%xmm3 + movups %xmm2,(%rsi) + xorps %xmm2,%xmm2 + jmp L$ctr32_epilogue + +.p2align 4 +L$ctr32_bulk: + leaq (%rsp),%rax + pushq %rbp + subq $128,%rsp + andq $-16,%rsp + leaq -8(%rax),%rbp + + + + + movdqu (%r8),%xmm2 + movdqu (%rcx),%xmm0 + movl 12(%r8),%r8d + pxor %xmm0,%xmm2 + movl 12(%rcx),%r11d + movdqa %xmm2,0(%rsp) + bswapl %r8d + movdqa %xmm2,%xmm3 + movdqa %xmm2,%xmm4 + movdqa %xmm2,%xmm5 + movdqa %xmm2,64(%rsp) + movdqa %xmm2,80(%rsp) + movdqa %xmm2,96(%rsp) + movq %rdx,%r10 + movdqa %xmm2,112(%rsp) + + leaq 1(%r8),%rax + leaq 2(%r8),%rdx + bswapl %eax + bswapl %edx + xorl %r11d,%eax + xorl %r11d,%edx +.byte 102,15,58,34,216,3 + leaq 3(%r8),%rax + movdqa %xmm3,16(%rsp) +.byte 102,15,58,34,226,3 + bswapl %eax + movq %r10,%rdx + leaq 4(%r8),%r10 + movdqa %xmm4,32(%rsp) + xorl %r11d,%eax + bswapl %r10d +.byte 102,15,58,34,232,3 + xorl %r11d,%r10d + movdqa %xmm5,48(%rsp) + leaq 5(%r8),%r9 + movl %r10d,64+12(%rsp) + bswapl %r9d + leaq 6(%r8),%r10 + movl 240(%rcx),%eax + xorl %r11d,%r9d + bswapl %r10d + movl %r9d,80+12(%rsp) + xorl %r11d,%r10d + leaq 7(%r8),%r9 + movl %r10d,96+12(%rsp) + bswapl %r9d + movl _OPENSSL_ia32cap_P+4(%rip),%r10d + xorl %r11d,%r9d + andl $71303168,%r10d + movl %r9d,112+12(%rsp) + + movups 16(%rcx),%xmm1 + + movdqa 64(%rsp),%xmm6 + movdqa 80(%rsp),%xmm7 + + cmpq $8,%rdx + jb L$ctr32_tail + + subq $6,%rdx + cmpl $4194304,%r10d + je L$ctr32_6x + + leaq 128(%rcx),%rcx + subq $2,%rdx + jmp L$ctr32_loop8 + +.p2align 4 +L$ctr32_6x: + shll $4,%eax + movl $48,%r10d + bswapl %r11d + leaq 32(%rcx,%rax,1),%rcx + subq %rax,%r10 + jmp L$ctr32_loop6 + +.p2align 4 +L$ctr32_loop6: + addl $6,%r8d + movups -48(%rcx,%r10,1),%xmm0 +.byte 102,15,56,220,209 + movl %r8d,%eax + xorl %r11d,%eax +.byte 102,15,56,220,217 +.byte 0x0f,0x38,0xf1,0x44,0x24,12 + leal 1(%r8),%eax +.byte 102,15,56,220,225 + xorl %r11d,%eax +.byte 0x0f,0x38,0xf1,0x44,0x24,28 +.byte 102,15,56,220,233 + leal 2(%r8),%eax + xorl %r11d,%eax +.byte 102,15,56,220,241 +.byte 0x0f,0x38,0xf1,0x44,0x24,44 + leal 3(%r8),%eax +.byte 102,15,56,220,249 + movups -32(%rcx,%r10,1),%xmm1 + xorl %r11d,%eax + +.byte 102,15,56,220,208 +.byte 0x0f,0x38,0xf1,0x44,0x24,60 + leal 4(%r8),%eax +.byte 102,15,56,220,216 + xorl %r11d,%eax +.byte 0x0f,0x38,0xf1,0x44,0x24,76 +.byte 102,15,56,220,224 + leal 5(%r8),%eax + xorl %r11d,%eax +.byte 102,15,56,220,232 +.byte 0x0f,0x38,0xf1,0x44,0x24,92 + movq %r10,%rax +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 + movups -16(%rcx,%r10,1),%xmm0 + + call L$enc_loop6 + + movdqu (%rdi),%xmm8 + movdqu 16(%rdi),%xmm9 + movdqu 32(%rdi),%xmm10 + movdqu 48(%rdi),%xmm11 + movdqu 64(%rdi),%xmm12 + movdqu 80(%rdi),%xmm13 + leaq 96(%rdi),%rdi + movups -64(%rcx,%r10,1),%xmm1 + pxor %xmm2,%xmm8 + movaps 0(%rsp),%xmm2 + pxor %xmm3,%xmm9 + movaps 16(%rsp),%xmm3 + pxor %xmm4,%xmm10 + movaps 32(%rsp),%xmm4 + pxor %xmm5,%xmm11 + movaps 48(%rsp),%xmm5 + pxor %xmm6,%xmm12 + movaps 64(%rsp),%xmm6 + pxor %xmm7,%xmm13 + movaps 80(%rsp),%xmm7 + movdqu %xmm8,(%rsi) + movdqu %xmm9,16(%rsi) + movdqu %xmm10,32(%rsi) + movdqu %xmm11,48(%rsi) + movdqu %xmm12,64(%rsi) + movdqu %xmm13,80(%rsi) + leaq 96(%rsi),%rsi + + subq $6,%rdx + jnc L$ctr32_loop6 + + addq $6,%rdx + jz L$ctr32_done + + leal -48(%r10),%eax + leaq -80(%rcx,%r10,1),%rcx + negl %eax + shrl $4,%eax + jmp L$ctr32_tail + +.p2align 5 +L$ctr32_loop8: + addl $8,%r8d + movdqa 96(%rsp),%xmm8 +.byte 102,15,56,220,209 + movl %r8d,%r9d + movdqa 112(%rsp),%xmm9 +.byte 102,15,56,220,217 + bswapl %r9d + movups 32-128(%rcx),%xmm0 +.byte 102,15,56,220,225 + xorl %r11d,%r9d + nop +.byte 102,15,56,220,233 + movl %r9d,0+12(%rsp) + leaq 1(%r8),%r9 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 + movups 48-128(%rcx),%xmm1 + bswapl %r9d +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + xorl %r11d,%r9d +.byte 0x66,0x90 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + movl %r9d,16+12(%rsp) + leaq 2(%r8),%r9 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 +.byte 102,68,15,56,220,192 +.byte 102,68,15,56,220,200 + movups 64-128(%rcx),%xmm0 + bswapl %r9d +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + xorl %r11d,%r9d +.byte 0x66,0x90 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movl %r9d,32+12(%rsp) + leaq 3(%r8),%r9 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 + movups 80-128(%rcx),%xmm1 + bswapl %r9d +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + xorl %r11d,%r9d +.byte 0x66,0x90 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + movl %r9d,48+12(%rsp) + leaq 4(%r8),%r9 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 +.byte 102,68,15,56,220,192 +.byte 102,68,15,56,220,200 + movups 96-128(%rcx),%xmm0 + bswapl %r9d +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + xorl %r11d,%r9d +.byte 0x66,0x90 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movl %r9d,64+12(%rsp) + leaq 5(%r8),%r9 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 + movups 112-128(%rcx),%xmm1 + bswapl %r9d +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + xorl %r11d,%r9d +.byte 0x66,0x90 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + movl %r9d,80+12(%rsp) + leaq 6(%r8),%r9 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 +.byte 102,68,15,56,220,192 +.byte 102,68,15,56,220,200 + movups 128-128(%rcx),%xmm0 + bswapl %r9d +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + xorl %r11d,%r9d +.byte 0x66,0x90 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movl %r9d,96+12(%rsp) + leaq 7(%r8),%r9 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 + movups 144-128(%rcx),%xmm1 + bswapl %r9d +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 + xorl %r11d,%r9d + movdqu 0(%rdi),%xmm10 +.byte 102,15,56,220,232 + movl %r9d,112+12(%rsp) + cmpl $11,%eax +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 +.byte 102,68,15,56,220,192 +.byte 102,68,15,56,220,200 + movups 160-128(%rcx),%xmm0 + + jb L$ctr32_enc_done + +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 + movups 176-128(%rcx),%xmm1 + +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 +.byte 102,68,15,56,220,192 +.byte 102,68,15,56,220,200 + movups 192-128(%rcx),%xmm0 + je L$ctr32_enc_done + +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 + movups 208-128(%rcx),%xmm1 + +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 +.byte 102,68,15,56,220,192 +.byte 102,68,15,56,220,200 + movups 224-128(%rcx),%xmm0 + jmp L$ctr32_enc_done + +.p2align 4 +L$ctr32_enc_done: + movdqu 16(%rdi),%xmm11 + pxor %xmm0,%xmm10 + movdqu 32(%rdi),%xmm12 + pxor %xmm0,%xmm11 + movdqu 48(%rdi),%xmm13 + pxor %xmm0,%xmm12 + movdqu 64(%rdi),%xmm14 + pxor %xmm0,%xmm13 + movdqu 80(%rdi),%xmm15 + pxor %xmm0,%xmm14 + pxor %xmm0,%xmm15 +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 + movdqu 96(%rdi),%xmm1 + leaq 128(%rdi),%rdi + +.byte 102,65,15,56,221,210 + pxor %xmm0,%xmm1 + movdqu 112-128(%rdi),%xmm10 +.byte 102,65,15,56,221,219 + pxor %xmm0,%xmm10 + movdqa 0(%rsp),%xmm11 +.byte 102,65,15,56,221,228 +.byte 102,65,15,56,221,237 + movdqa 16(%rsp),%xmm12 + movdqa 32(%rsp),%xmm13 +.byte 102,65,15,56,221,246 +.byte 102,65,15,56,221,255 + movdqa 48(%rsp),%xmm14 + movdqa 64(%rsp),%xmm15 +.byte 102,68,15,56,221,193 + movdqa 80(%rsp),%xmm0 + movups 16-128(%rcx),%xmm1 +.byte 102,69,15,56,221,202 + + movups %xmm2,(%rsi) + movdqa %xmm11,%xmm2 + movups %xmm3,16(%rsi) + movdqa %xmm12,%xmm3 + movups %xmm4,32(%rsi) + movdqa %xmm13,%xmm4 + movups %xmm5,48(%rsi) + movdqa %xmm14,%xmm5 + movups %xmm6,64(%rsi) + movdqa %xmm15,%xmm6 + movups %xmm7,80(%rsi) + movdqa %xmm0,%xmm7 + movups %xmm8,96(%rsi) + movups %xmm9,112(%rsi) + leaq 128(%rsi),%rsi + + subq $8,%rdx + jnc L$ctr32_loop8 + + addq $8,%rdx + jz L$ctr32_done + leaq -128(%rcx),%rcx + +L$ctr32_tail: + + + leaq 16(%rcx),%rcx + cmpq $4,%rdx + jb L$ctr32_loop3 + je L$ctr32_loop4 + + + shll $4,%eax + movdqa 96(%rsp),%xmm8 + pxor %xmm9,%xmm9 + + movups 16(%rcx),%xmm0 +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + leaq 32-16(%rcx,%rax,1),%rcx + negq %rax +.byte 102,15,56,220,225 + addq $16,%rax + movups (%rdi),%xmm10 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 + movups 16(%rdi),%xmm11 + movups 32(%rdi),%xmm12 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 + + call L$enc_loop8_enter + + movdqu 48(%rdi),%xmm13 + pxor %xmm10,%xmm2 + movdqu 64(%rdi),%xmm10 + pxor %xmm11,%xmm3 + movdqu %xmm2,(%rsi) + pxor %xmm12,%xmm4 + movdqu %xmm3,16(%rsi) + pxor %xmm13,%xmm5 + movdqu %xmm4,32(%rsi) + pxor %xmm10,%xmm6 + movdqu %xmm5,48(%rsi) + movdqu %xmm6,64(%rsi) + cmpq $6,%rdx + jb L$ctr32_done + + movups 80(%rdi),%xmm11 + xorps %xmm11,%xmm7 + movups %xmm7,80(%rsi) + je L$ctr32_done + + movups 96(%rdi),%xmm12 + xorps %xmm12,%xmm8 + movups %xmm8,96(%rsi) + jmp L$ctr32_done + +.p2align 5 +L$ctr32_loop4: +.byte 102,15,56,220,209 + leaq 16(%rcx),%rcx + decl %eax +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movups (%rcx),%xmm1 + jnz L$ctr32_loop4 +.byte 102,15,56,221,209 +.byte 102,15,56,221,217 + movups (%rdi),%xmm10 + movups 16(%rdi),%xmm11 +.byte 102,15,56,221,225 +.byte 102,15,56,221,233 + movups 32(%rdi),%xmm12 + movups 48(%rdi),%xmm13 + + xorps %xmm10,%xmm2 + movups %xmm2,(%rsi) + xorps %xmm11,%xmm3 + movups %xmm3,16(%rsi) + pxor %xmm12,%xmm4 + movdqu %xmm4,32(%rsi) + pxor %xmm13,%xmm5 + movdqu %xmm5,48(%rsi) + jmp L$ctr32_done + +.p2align 5 +L$ctr32_loop3: +.byte 102,15,56,220,209 + leaq 16(%rcx),%rcx + decl %eax +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 + movups (%rcx),%xmm1 + jnz L$ctr32_loop3 +.byte 102,15,56,221,209 +.byte 102,15,56,221,217 +.byte 102,15,56,221,225 + + movups (%rdi),%xmm10 + xorps %xmm10,%xmm2 + movups %xmm2,(%rsi) + cmpq $2,%rdx + jb L$ctr32_done + + movups 16(%rdi),%xmm11 + xorps %xmm11,%xmm3 + movups %xmm3,16(%rsi) + je L$ctr32_done + + movups 32(%rdi),%xmm12 + xorps %xmm12,%xmm4 + movups %xmm4,32(%rsi) + +L$ctr32_done: + xorps %xmm0,%xmm0 + xorl %r11d,%r11d + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + movaps %xmm0,0(%rsp) + pxor %xmm8,%xmm8 + movaps %xmm0,16(%rsp) + pxor %xmm9,%xmm9 + movaps %xmm0,32(%rsp) + pxor %xmm10,%xmm10 + movaps %xmm0,48(%rsp) + pxor %xmm11,%xmm11 + movaps %xmm0,64(%rsp) + pxor %xmm12,%xmm12 + movaps %xmm0,80(%rsp) + pxor %xmm13,%xmm13 + movaps %xmm0,96(%rsp) + pxor %xmm14,%xmm14 + movaps %xmm0,112(%rsp) + pxor %xmm15,%xmm15 + leaq (%rbp),%rsp + popq %rbp +L$ctr32_epilogue: + .byte 0xf3,0xc3 + +.globl _aesni_xts_encrypt +.private_extern _aesni_xts_encrypt + +.p2align 4 +_aesni_xts_encrypt: + leaq (%rsp),%rax + pushq %rbp + subq $112,%rsp + andq $-16,%rsp + leaq -8(%rax),%rbp + movups (%r9),%xmm2 + movl 240(%r8),%eax + movl 240(%rcx),%r10d + movups (%r8),%xmm0 + movups 16(%r8),%xmm1 + leaq 32(%r8),%r8 + xorps %xmm0,%xmm2 +L$oop_enc1_8: +.byte 102,15,56,220,209 + decl %eax + movups (%r8),%xmm1 + leaq 16(%r8),%r8 + jnz L$oop_enc1_8 +.byte 102,15,56,221,209 + movups (%rcx),%xmm0 + movq %rcx,%r11 + movl %r10d,%eax + shll $4,%r10d + movq %rdx,%r9 + andq $-16,%rdx + + movups 16(%rcx,%r10,1),%xmm1 + + movdqa L$xts_magic(%rip),%xmm8 + movdqa %xmm2,%xmm15 + pshufd $95,%xmm2,%xmm9 + pxor %xmm0,%xmm1 + movdqa %xmm9,%xmm14 + paddd %xmm9,%xmm9 + movdqa %xmm15,%xmm10 + psrad $31,%xmm14 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm14 + pxor %xmm0,%xmm10 + pxor %xmm14,%xmm15 + movdqa %xmm9,%xmm14 + paddd %xmm9,%xmm9 + movdqa %xmm15,%xmm11 + psrad $31,%xmm14 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm14 + pxor %xmm0,%xmm11 + pxor %xmm14,%xmm15 + movdqa %xmm9,%xmm14 + paddd %xmm9,%xmm9 + movdqa %xmm15,%xmm12 + psrad $31,%xmm14 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm14 + pxor %xmm0,%xmm12 + pxor %xmm14,%xmm15 + movdqa %xmm9,%xmm14 + paddd %xmm9,%xmm9 + movdqa %xmm15,%xmm13 + psrad $31,%xmm14 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm14 + pxor %xmm0,%xmm13 + pxor %xmm14,%xmm15 + movdqa %xmm15,%xmm14 + psrad $31,%xmm9 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm9 + pxor %xmm0,%xmm14 + pxor %xmm9,%xmm15 + movaps %xmm1,96(%rsp) + + subq $96,%rdx + jc L$xts_enc_short + + movl $16+96,%eax + leaq 32(%r11,%r10,1),%rcx + subq %r10,%rax + movups 16(%r11),%xmm1 + movq %rax,%r10 + leaq L$xts_magic(%rip),%r8 + jmp L$xts_enc_grandloop + +.p2align 5 +L$xts_enc_grandloop: + movdqu 0(%rdi),%xmm2 + movdqa %xmm0,%xmm8 + movdqu 16(%rdi),%xmm3 + pxor %xmm10,%xmm2 + movdqu 32(%rdi),%xmm4 + pxor %xmm11,%xmm3 +.byte 102,15,56,220,209 + movdqu 48(%rdi),%xmm5 + pxor %xmm12,%xmm4 +.byte 102,15,56,220,217 + movdqu 64(%rdi),%xmm6 + pxor %xmm13,%xmm5 +.byte 102,15,56,220,225 + movdqu 80(%rdi),%xmm7 + pxor %xmm15,%xmm8 + movdqa 96(%rsp),%xmm9 + pxor %xmm14,%xmm6 +.byte 102,15,56,220,233 + movups 32(%r11),%xmm0 + leaq 96(%rdi),%rdi + pxor %xmm8,%xmm7 + + pxor %xmm9,%xmm10 +.byte 102,15,56,220,241 + pxor %xmm9,%xmm11 + movdqa %xmm10,0(%rsp) +.byte 102,15,56,220,249 + movups 48(%r11),%xmm1 + pxor %xmm9,%xmm12 + +.byte 102,15,56,220,208 + pxor %xmm9,%xmm13 + movdqa %xmm11,16(%rsp) +.byte 102,15,56,220,216 + pxor %xmm9,%xmm14 + movdqa %xmm12,32(%rsp) +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + pxor %xmm9,%xmm8 + movdqa %xmm14,64(%rsp) +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 + movups 64(%r11),%xmm0 + movdqa %xmm8,80(%rsp) + pshufd $95,%xmm15,%xmm9 + jmp L$xts_enc_loop6 +.p2align 5 +L$xts_enc_loop6: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 + movups -64(%rcx,%rax,1),%xmm1 + addq $32,%rax + +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 + movups -80(%rcx,%rax,1),%xmm0 + jnz L$xts_enc_loop6 + + movdqa (%r8),%xmm8 + movdqa %xmm9,%xmm14 + paddd %xmm9,%xmm9 +.byte 102,15,56,220,209 + paddq %xmm15,%xmm15 + psrad $31,%xmm14 +.byte 102,15,56,220,217 + pand %xmm8,%xmm14 + movups (%r11),%xmm10 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 + pxor %xmm14,%xmm15 + movaps %xmm10,%xmm11 +.byte 102,15,56,220,249 + movups -64(%rcx),%xmm1 + + movdqa %xmm9,%xmm14 +.byte 102,15,56,220,208 + paddd %xmm9,%xmm9 + pxor %xmm15,%xmm10 +.byte 102,15,56,220,216 + psrad $31,%xmm14 + paddq %xmm15,%xmm15 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + pand %xmm8,%xmm14 + movaps %xmm11,%xmm12 +.byte 102,15,56,220,240 + pxor %xmm14,%xmm15 + movdqa %xmm9,%xmm14 +.byte 102,15,56,220,248 + movups -48(%rcx),%xmm0 + + paddd %xmm9,%xmm9 +.byte 102,15,56,220,209 + pxor %xmm15,%xmm11 + psrad $31,%xmm14 +.byte 102,15,56,220,217 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm14 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movdqa %xmm13,48(%rsp) + pxor %xmm14,%xmm15 +.byte 102,15,56,220,241 + movaps %xmm12,%xmm13 + movdqa %xmm9,%xmm14 +.byte 102,15,56,220,249 + movups -32(%rcx),%xmm1 + + paddd %xmm9,%xmm9 +.byte 102,15,56,220,208 + pxor %xmm15,%xmm12 + psrad $31,%xmm14 +.byte 102,15,56,220,216 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm14 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 +.byte 102,15,56,220,240 + pxor %xmm14,%xmm15 + movaps %xmm13,%xmm14 +.byte 102,15,56,220,248 + + movdqa %xmm9,%xmm0 + paddd %xmm9,%xmm9 +.byte 102,15,56,220,209 + pxor %xmm15,%xmm13 + psrad $31,%xmm0 +.byte 102,15,56,220,217 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm0 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + pxor %xmm0,%xmm15 + movups (%r11),%xmm0 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 + movups 16(%r11),%xmm1 + + pxor %xmm15,%xmm14 +.byte 102,15,56,221,84,36,0 + psrad $31,%xmm9 + paddq %xmm15,%xmm15 +.byte 102,15,56,221,92,36,16 +.byte 102,15,56,221,100,36,32 + pand %xmm8,%xmm9 + movq %r10,%rax +.byte 102,15,56,221,108,36,48 +.byte 102,15,56,221,116,36,64 +.byte 102,15,56,221,124,36,80 + pxor %xmm9,%xmm15 + + leaq 96(%rsi),%rsi + movups %xmm2,-96(%rsi) + movups %xmm3,-80(%rsi) + movups %xmm4,-64(%rsi) + movups %xmm5,-48(%rsi) + movups %xmm6,-32(%rsi) + movups %xmm7,-16(%rsi) + subq $96,%rdx + jnc L$xts_enc_grandloop + + movl $16+96,%eax + subl %r10d,%eax + movq %r11,%rcx + shrl $4,%eax + +L$xts_enc_short: + + movl %eax,%r10d + pxor %xmm0,%xmm10 + addq $96,%rdx + jz L$xts_enc_done + + pxor %xmm0,%xmm11 + cmpq $32,%rdx + jb L$xts_enc_one + pxor %xmm0,%xmm12 + je L$xts_enc_two + + pxor %xmm0,%xmm13 + cmpq $64,%rdx + jb L$xts_enc_three + pxor %xmm0,%xmm14 + je L$xts_enc_four + + movdqu (%rdi),%xmm2 + movdqu 16(%rdi),%xmm3 + movdqu 32(%rdi),%xmm4 + pxor %xmm10,%xmm2 + movdqu 48(%rdi),%xmm5 + pxor %xmm11,%xmm3 + movdqu 64(%rdi),%xmm6 + leaq 80(%rdi),%rdi + pxor %xmm12,%xmm4 + pxor %xmm13,%xmm5 + pxor %xmm14,%xmm6 + pxor %xmm7,%xmm7 + + call _aesni_encrypt6 + + xorps %xmm10,%xmm2 + movdqa %xmm15,%xmm10 + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + movdqu %xmm2,(%rsi) + xorps %xmm13,%xmm5 + movdqu %xmm3,16(%rsi) + xorps %xmm14,%xmm6 + movdqu %xmm4,32(%rsi) + movdqu %xmm5,48(%rsi) + movdqu %xmm6,64(%rsi) + leaq 80(%rsi),%rsi + jmp L$xts_enc_done + +.p2align 4 +L$xts_enc_one: + movups (%rdi),%xmm2 + leaq 16(%rdi),%rdi + xorps %xmm10,%xmm2 + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +L$oop_enc1_9: +.byte 102,15,56,220,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz L$oop_enc1_9 +.byte 102,15,56,221,209 + xorps %xmm10,%xmm2 + movdqa %xmm11,%xmm10 + movups %xmm2,(%rsi) + leaq 16(%rsi),%rsi + jmp L$xts_enc_done + +.p2align 4 +L$xts_enc_two: + movups (%rdi),%xmm2 + movups 16(%rdi),%xmm3 + leaq 32(%rdi),%rdi + xorps %xmm10,%xmm2 + xorps %xmm11,%xmm3 + + call _aesni_encrypt2 + + xorps %xmm10,%xmm2 + movdqa %xmm12,%xmm10 + xorps %xmm11,%xmm3 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + leaq 32(%rsi),%rsi + jmp L$xts_enc_done + +.p2align 4 +L$xts_enc_three: + movups (%rdi),%xmm2 + movups 16(%rdi),%xmm3 + movups 32(%rdi),%xmm4 + leaq 48(%rdi),%rdi + xorps %xmm10,%xmm2 + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + + call _aesni_encrypt3 + + xorps %xmm10,%xmm2 + movdqa %xmm13,%xmm10 + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + leaq 48(%rsi),%rsi + jmp L$xts_enc_done + +.p2align 4 +L$xts_enc_four: + movups (%rdi),%xmm2 + movups 16(%rdi),%xmm3 + movups 32(%rdi),%xmm4 + xorps %xmm10,%xmm2 + movups 48(%rdi),%xmm5 + leaq 64(%rdi),%rdi + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + xorps %xmm13,%xmm5 + + call _aesni_encrypt4 + + pxor %xmm10,%xmm2 + movdqa %xmm14,%xmm10 + pxor %xmm11,%xmm3 + pxor %xmm12,%xmm4 + movdqu %xmm2,(%rsi) + pxor %xmm13,%xmm5 + movdqu %xmm3,16(%rsi) + movdqu %xmm4,32(%rsi) + movdqu %xmm5,48(%rsi) + leaq 64(%rsi),%rsi + jmp L$xts_enc_done + +.p2align 4 +L$xts_enc_done: + andq $15,%r9 + jz L$xts_enc_ret + movq %r9,%rdx + +L$xts_enc_steal: + movzbl (%rdi),%eax + movzbl -16(%rsi),%ecx + leaq 1(%rdi),%rdi + movb %al,-16(%rsi) + movb %cl,0(%rsi) + leaq 1(%rsi),%rsi + subq $1,%rdx + jnz L$xts_enc_steal + + subq %r9,%rsi + movq %r11,%rcx + movl %r10d,%eax + + movups -16(%rsi),%xmm2 + xorps %xmm10,%xmm2 + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +L$oop_enc1_10: +.byte 102,15,56,220,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz L$oop_enc1_10 +.byte 102,15,56,221,209 + xorps %xmm10,%xmm2 + movups %xmm2,-16(%rsi) + +L$xts_enc_ret: + xorps %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + movaps %xmm0,0(%rsp) + pxor %xmm8,%xmm8 + movaps %xmm0,16(%rsp) + pxor %xmm9,%xmm9 + movaps %xmm0,32(%rsp) + pxor %xmm10,%xmm10 + movaps %xmm0,48(%rsp) + pxor %xmm11,%xmm11 + movaps %xmm0,64(%rsp) + pxor %xmm12,%xmm12 + movaps %xmm0,80(%rsp) + pxor %xmm13,%xmm13 + movaps %xmm0,96(%rsp) + pxor %xmm14,%xmm14 + pxor %xmm15,%xmm15 + leaq (%rbp),%rsp + popq %rbp +L$xts_enc_epilogue: + .byte 0xf3,0xc3 + +.globl _aesni_xts_decrypt +.private_extern _aesni_xts_decrypt + +.p2align 4 +_aesni_xts_decrypt: + leaq (%rsp),%rax + pushq %rbp + subq $112,%rsp + andq $-16,%rsp + leaq -8(%rax),%rbp + movups (%r9),%xmm2 + movl 240(%r8),%eax + movl 240(%rcx),%r10d + movups (%r8),%xmm0 + movups 16(%r8),%xmm1 + leaq 32(%r8),%r8 + xorps %xmm0,%xmm2 +L$oop_enc1_11: +.byte 102,15,56,220,209 + decl %eax + movups (%r8),%xmm1 + leaq 16(%r8),%r8 + jnz L$oop_enc1_11 +.byte 102,15,56,221,209 + xorl %eax,%eax + testq $15,%rdx + setnz %al + shlq $4,%rax + subq %rax,%rdx + + movups (%rcx),%xmm0 + movq %rcx,%r11 + movl %r10d,%eax + shll $4,%r10d + movq %rdx,%r9 + andq $-16,%rdx + + movups 16(%rcx,%r10,1),%xmm1 + + movdqa L$xts_magic(%rip),%xmm8 + movdqa %xmm2,%xmm15 + pshufd $95,%xmm2,%xmm9 + pxor %xmm0,%xmm1 + movdqa %xmm9,%xmm14 + paddd %xmm9,%xmm9 + movdqa %xmm15,%xmm10 + psrad $31,%xmm14 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm14 + pxor %xmm0,%xmm10 + pxor %xmm14,%xmm15 + movdqa %xmm9,%xmm14 + paddd %xmm9,%xmm9 + movdqa %xmm15,%xmm11 + psrad $31,%xmm14 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm14 + pxor %xmm0,%xmm11 + pxor %xmm14,%xmm15 + movdqa %xmm9,%xmm14 + paddd %xmm9,%xmm9 + movdqa %xmm15,%xmm12 + psrad $31,%xmm14 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm14 + pxor %xmm0,%xmm12 + pxor %xmm14,%xmm15 + movdqa %xmm9,%xmm14 + paddd %xmm9,%xmm9 + movdqa %xmm15,%xmm13 + psrad $31,%xmm14 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm14 + pxor %xmm0,%xmm13 + pxor %xmm14,%xmm15 + movdqa %xmm15,%xmm14 + psrad $31,%xmm9 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm9 + pxor %xmm0,%xmm14 + pxor %xmm9,%xmm15 + movaps %xmm1,96(%rsp) + + subq $96,%rdx + jc L$xts_dec_short + + movl $16+96,%eax + leaq 32(%r11,%r10,1),%rcx + subq %r10,%rax + movups 16(%r11),%xmm1 + movq %rax,%r10 + leaq L$xts_magic(%rip),%r8 + jmp L$xts_dec_grandloop + +.p2align 5 +L$xts_dec_grandloop: + movdqu 0(%rdi),%xmm2 + movdqa %xmm0,%xmm8 + movdqu 16(%rdi),%xmm3 + pxor %xmm10,%xmm2 + movdqu 32(%rdi),%xmm4 + pxor %xmm11,%xmm3 +.byte 102,15,56,222,209 + movdqu 48(%rdi),%xmm5 + pxor %xmm12,%xmm4 +.byte 102,15,56,222,217 + movdqu 64(%rdi),%xmm6 + pxor %xmm13,%xmm5 +.byte 102,15,56,222,225 + movdqu 80(%rdi),%xmm7 + pxor %xmm15,%xmm8 + movdqa 96(%rsp),%xmm9 + pxor %xmm14,%xmm6 +.byte 102,15,56,222,233 + movups 32(%r11),%xmm0 + leaq 96(%rdi),%rdi + pxor %xmm8,%xmm7 + + pxor %xmm9,%xmm10 +.byte 102,15,56,222,241 + pxor %xmm9,%xmm11 + movdqa %xmm10,0(%rsp) +.byte 102,15,56,222,249 + movups 48(%r11),%xmm1 + pxor %xmm9,%xmm12 + +.byte 102,15,56,222,208 + pxor %xmm9,%xmm13 + movdqa %xmm11,16(%rsp) +.byte 102,15,56,222,216 + pxor %xmm9,%xmm14 + movdqa %xmm12,32(%rsp) +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 + pxor %xmm9,%xmm8 + movdqa %xmm14,64(%rsp) +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 + movups 64(%r11),%xmm0 + movdqa %xmm8,80(%rsp) + pshufd $95,%xmm15,%xmm9 + jmp L$xts_dec_loop6 +.p2align 5 +L$xts_dec_loop6: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 + movups -64(%rcx,%rax,1),%xmm1 + addq $32,%rax + +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 + movups -80(%rcx,%rax,1),%xmm0 + jnz L$xts_dec_loop6 + + movdqa (%r8),%xmm8 + movdqa %xmm9,%xmm14 + paddd %xmm9,%xmm9 +.byte 102,15,56,222,209 + paddq %xmm15,%xmm15 + psrad $31,%xmm14 +.byte 102,15,56,222,217 + pand %xmm8,%xmm14 + movups (%r11),%xmm10 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 + pxor %xmm14,%xmm15 + movaps %xmm10,%xmm11 +.byte 102,15,56,222,249 + movups -64(%rcx),%xmm1 + + movdqa %xmm9,%xmm14 +.byte 102,15,56,222,208 + paddd %xmm9,%xmm9 + pxor %xmm15,%xmm10 +.byte 102,15,56,222,216 + psrad $31,%xmm14 + paddq %xmm15,%xmm15 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 + pand %xmm8,%xmm14 + movaps %xmm11,%xmm12 +.byte 102,15,56,222,240 + pxor %xmm14,%xmm15 + movdqa %xmm9,%xmm14 +.byte 102,15,56,222,248 + movups -48(%rcx),%xmm0 + + paddd %xmm9,%xmm9 +.byte 102,15,56,222,209 + pxor %xmm15,%xmm11 + psrad $31,%xmm14 +.byte 102,15,56,222,217 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm14 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 + movdqa %xmm13,48(%rsp) + pxor %xmm14,%xmm15 +.byte 102,15,56,222,241 + movaps %xmm12,%xmm13 + movdqa %xmm9,%xmm14 +.byte 102,15,56,222,249 + movups -32(%rcx),%xmm1 + + paddd %xmm9,%xmm9 +.byte 102,15,56,222,208 + pxor %xmm15,%xmm12 + psrad $31,%xmm14 +.byte 102,15,56,222,216 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm14 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 + pxor %xmm14,%xmm15 + movaps %xmm13,%xmm14 +.byte 102,15,56,222,248 + + movdqa %xmm9,%xmm0 + paddd %xmm9,%xmm9 +.byte 102,15,56,222,209 + pxor %xmm15,%xmm13 + psrad $31,%xmm0 +.byte 102,15,56,222,217 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm0 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 + pxor %xmm0,%xmm15 + movups (%r11),%xmm0 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 + movups 16(%r11),%xmm1 + + pxor %xmm15,%xmm14 +.byte 102,15,56,223,84,36,0 + psrad $31,%xmm9 + paddq %xmm15,%xmm15 +.byte 102,15,56,223,92,36,16 +.byte 102,15,56,223,100,36,32 + pand %xmm8,%xmm9 + movq %r10,%rax +.byte 102,15,56,223,108,36,48 +.byte 102,15,56,223,116,36,64 +.byte 102,15,56,223,124,36,80 + pxor %xmm9,%xmm15 + + leaq 96(%rsi),%rsi + movups %xmm2,-96(%rsi) + movups %xmm3,-80(%rsi) + movups %xmm4,-64(%rsi) + movups %xmm5,-48(%rsi) + movups %xmm6,-32(%rsi) + movups %xmm7,-16(%rsi) + subq $96,%rdx + jnc L$xts_dec_grandloop + + movl $16+96,%eax + subl %r10d,%eax + movq %r11,%rcx + shrl $4,%eax + +L$xts_dec_short: + + movl %eax,%r10d + pxor %xmm0,%xmm10 + pxor %xmm0,%xmm11 + addq $96,%rdx + jz L$xts_dec_done + + pxor %xmm0,%xmm12 + cmpq $32,%rdx + jb L$xts_dec_one + pxor %xmm0,%xmm13 + je L$xts_dec_two + + pxor %xmm0,%xmm14 + cmpq $64,%rdx + jb L$xts_dec_three + je L$xts_dec_four + + movdqu (%rdi),%xmm2 + movdqu 16(%rdi),%xmm3 + movdqu 32(%rdi),%xmm4 + pxor %xmm10,%xmm2 + movdqu 48(%rdi),%xmm5 + pxor %xmm11,%xmm3 + movdqu 64(%rdi),%xmm6 + leaq 80(%rdi),%rdi + pxor %xmm12,%xmm4 + pxor %xmm13,%xmm5 + pxor %xmm14,%xmm6 + + call _aesni_decrypt6 + + xorps %xmm10,%xmm2 + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + movdqu %xmm2,(%rsi) + xorps %xmm13,%xmm5 + movdqu %xmm3,16(%rsi) + xorps %xmm14,%xmm6 + movdqu %xmm4,32(%rsi) + pxor %xmm14,%xmm14 + movdqu %xmm5,48(%rsi) + pcmpgtd %xmm15,%xmm14 + movdqu %xmm6,64(%rsi) + leaq 80(%rsi),%rsi + pshufd $19,%xmm14,%xmm11 + andq $15,%r9 + jz L$xts_dec_ret + + movdqa %xmm15,%xmm10 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm11 + pxor %xmm15,%xmm11 + jmp L$xts_dec_done2 + +.p2align 4 +L$xts_dec_one: + movups (%rdi),%xmm2 + leaq 16(%rdi),%rdi + xorps %xmm10,%xmm2 + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +L$oop_dec1_12: +.byte 102,15,56,222,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz L$oop_dec1_12 +.byte 102,15,56,223,209 + xorps %xmm10,%xmm2 + movdqa %xmm11,%xmm10 + movups %xmm2,(%rsi) + movdqa %xmm12,%xmm11 + leaq 16(%rsi),%rsi + jmp L$xts_dec_done + +.p2align 4 +L$xts_dec_two: + movups (%rdi),%xmm2 + movups 16(%rdi),%xmm3 + leaq 32(%rdi),%rdi + xorps %xmm10,%xmm2 + xorps %xmm11,%xmm3 + + call _aesni_decrypt2 + + xorps %xmm10,%xmm2 + movdqa %xmm12,%xmm10 + xorps %xmm11,%xmm3 + movdqa %xmm13,%xmm11 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + leaq 32(%rsi),%rsi + jmp L$xts_dec_done + +.p2align 4 +L$xts_dec_three: + movups (%rdi),%xmm2 + movups 16(%rdi),%xmm3 + movups 32(%rdi),%xmm4 + leaq 48(%rdi),%rdi + xorps %xmm10,%xmm2 + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + + call _aesni_decrypt3 + + xorps %xmm10,%xmm2 + movdqa %xmm13,%xmm10 + xorps %xmm11,%xmm3 + movdqa %xmm14,%xmm11 + xorps %xmm12,%xmm4 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + leaq 48(%rsi),%rsi + jmp L$xts_dec_done + +.p2align 4 +L$xts_dec_four: + movups (%rdi),%xmm2 + movups 16(%rdi),%xmm3 + movups 32(%rdi),%xmm4 + xorps %xmm10,%xmm2 + movups 48(%rdi),%xmm5 + leaq 64(%rdi),%rdi + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + xorps %xmm13,%xmm5 + + call _aesni_decrypt4 + + pxor %xmm10,%xmm2 + movdqa %xmm14,%xmm10 + pxor %xmm11,%xmm3 + movdqa %xmm15,%xmm11 + pxor %xmm12,%xmm4 + movdqu %xmm2,(%rsi) + pxor %xmm13,%xmm5 + movdqu %xmm3,16(%rsi) + movdqu %xmm4,32(%rsi) + movdqu %xmm5,48(%rsi) + leaq 64(%rsi),%rsi + jmp L$xts_dec_done + +.p2align 4 +L$xts_dec_done: + andq $15,%r9 + jz L$xts_dec_ret +L$xts_dec_done2: + movq %r9,%rdx + movq %r11,%rcx + movl %r10d,%eax + + movups (%rdi),%xmm2 + xorps %xmm11,%xmm2 + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +L$oop_dec1_13: +.byte 102,15,56,222,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz L$oop_dec1_13 +.byte 102,15,56,223,209 + xorps %xmm11,%xmm2 + movups %xmm2,(%rsi) + +L$xts_dec_steal: + movzbl 16(%rdi),%eax + movzbl (%rsi),%ecx + leaq 1(%rdi),%rdi + movb %al,(%rsi) + movb %cl,16(%rsi) + leaq 1(%rsi),%rsi + subq $1,%rdx + jnz L$xts_dec_steal + + subq %r9,%rsi + movq %r11,%rcx + movl %r10d,%eax + + movups (%rsi),%xmm2 + xorps %xmm10,%xmm2 + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +L$oop_dec1_14: +.byte 102,15,56,222,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz L$oop_dec1_14 +.byte 102,15,56,223,209 + xorps %xmm10,%xmm2 + movups %xmm2,(%rsi) + +L$xts_dec_ret: + xorps %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + movaps %xmm0,0(%rsp) + pxor %xmm8,%xmm8 + movaps %xmm0,16(%rsp) + pxor %xmm9,%xmm9 + movaps %xmm0,32(%rsp) + pxor %xmm10,%xmm10 + movaps %xmm0,48(%rsp) + pxor %xmm11,%xmm11 + movaps %xmm0,64(%rsp) + pxor %xmm12,%xmm12 + movaps %xmm0,80(%rsp) + pxor %xmm13,%xmm13 + movaps %xmm0,96(%rsp) + pxor %xmm14,%xmm14 + pxor %xmm15,%xmm15 + leaq (%rbp),%rsp + popq %rbp +L$xts_dec_epilogue: + .byte 0xf3,0xc3 + +.globl _aesni_cbc_encrypt +.private_extern _aesni_cbc_encrypt + +.p2align 4 +_aesni_cbc_encrypt: + testq %rdx,%rdx + jz L$cbc_ret + + movl 240(%rcx),%r10d + movq %rcx,%r11 + testl %r9d,%r9d + jz L$cbc_decrypt + + movups (%r8),%xmm2 + movl %r10d,%eax + cmpq $16,%rdx + jb L$cbc_enc_tail + subq $16,%rdx + jmp L$cbc_enc_loop +.p2align 4 +L$cbc_enc_loop: + movups (%rdi),%xmm3 + leaq 16(%rdi),%rdi + + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm3 + leaq 32(%rcx),%rcx + xorps %xmm3,%xmm2 +L$oop_enc1_15: +.byte 102,15,56,220,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz L$oop_enc1_15 +.byte 102,15,56,221,209 + movl %r10d,%eax + movq %r11,%rcx + movups %xmm2,0(%rsi) + leaq 16(%rsi),%rsi + subq $16,%rdx + jnc L$cbc_enc_loop + addq $16,%rdx + jnz L$cbc_enc_tail + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + movups %xmm2,(%r8) + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + jmp L$cbc_ret + +L$cbc_enc_tail: + movq %rdx,%rcx + xchgq %rdi,%rsi +.long 0x9066A4F3 + movl $16,%ecx + subq %rdx,%rcx + xorl %eax,%eax +.long 0x9066AAF3 + leaq -16(%rdi),%rdi + movl %r10d,%eax + movq %rdi,%rsi + movq %r11,%rcx + xorq %rdx,%rdx + jmp L$cbc_enc_loop + +.p2align 4 +L$cbc_decrypt: + cmpq $16,%rdx + jne L$cbc_decrypt_bulk + + + + movdqu (%rdi),%xmm2 + movdqu (%r8),%xmm3 + movdqa %xmm2,%xmm4 + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +L$oop_dec1_16: +.byte 102,15,56,222,209 + decl %r10d + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz L$oop_dec1_16 +.byte 102,15,56,223,209 + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + movdqu %xmm4,(%r8) + xorps %xmm3,%xmm2 + pxor %xmm3,%xmm3 + movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 + jmp L$cbc_ret +.p2align 4 +L$cbc_decrypt_bulk: + leaq (%rsp),%rax + pushq %rbp + subq $16,%rsp + andq $-16,%rsp + leaq -8(%rax),%rbp + movups (%r8),%xmm10 + movl %r10d,%eax + cmpq $80,%rdx + jbe L$cbc_dec_tail + + movups (%rcx),%xmm0 + movdqu 0(%rdi),%xmm2 + movdqu 16(%rdi),%xmm3 + movdqa %xmm2,%xmm11 + movdqu 32(%rdi),%xmm4 + movdqa %xmm3,%xmm12 + movdqu 48(%rdi),%xmm5 + movdqa %xmm4,%xmm13 + movdqu 64(%rdi),%xmm6 + movdqa %xmm5,%xmm14 + movdqu 80(%rdi),%xmm7 + movdqa %xmm6,%xmm15 + movl _OPENSSL_ia32cap_P+4(%rip),%r9d + cmpq $112,%rdx + jbe L$cbc_dec_six_or_seven + + andl $71303168,%r9d + subq $80,%rdx + cmpl $4194304,%r9d + je L$cbc_dec_loop6_enter + subq $32,%rdx + leaq 112(%rcx),%rcx + jmp L$cbc_dec_loop8_enter +.p2align 4 +L$cbc_dec_loop8: + movups %xmm9,(%rsi) + leaq 16(%rsi),%rsi +L$cbc_dec_loop8_enter: + movdqu 96(%rdi),%xmm8 + pxor %xmm0,%xmm2 + movdqu 112(%rdi),%xmm9 + pxor %xmm0,%xmm3 + movups 16-112(%rcx),%xmm1 + pxor %xmm0,%xmm4 + xorq %r11,%r11 + cmpq $112,%rdx + pxor %xmm0,%xmm5 + pxor %xmm0,%xmm6 + pxor %xmm0,%xmm7 + pxor %xmm0,%xmm8 + +.byte 102,15,56,222,209 + pxor %xmm0,%xmm9 + movups 32-112(%rcx),%xmm0 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,68,15,56,222,193 + setnc %r11b + shlq $7,%r11 +.byte 102,68,15,56,222,201 + addq %rdi,%r11 + movups 48-112(%rcx),%xmm1 +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 +.byte 102,68,15,56,222,192 +.byte 102,68,15,56,222,200 + movups 64-112(%rcx),%xmm0 + nop +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,68,15,56,222,193 +.byte 102,68,15,56,222,201 + movups 80-112(%rcx),%xmm1 + nop +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 +.byte 102,68,15,56,222,192 +.byte 102,68,15,56,222,200 + movups 96-112(%rcx),%xmm0 + nop +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,68,15,56,222,193 +.byte 102,68,15,56,222,201 + movups 112-112(%rcx),%xmm1 + nop +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 +.byte 102,68,15,56,222,192 +.byte 102,68,15,56,222,200 + movups 128-112(%rcx),%xmm0 + nop +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,68,15,56,222,193 +.byte 102,68,15,56,222,201 + movups 144-112(%rcx),%xmm1 + cmpl $11,%eax +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 +.byte 102,68,15,56,222,192 +.byte 102,68,15,56,222,200 + movups 160-112(%rcx),%xmm0 + jb L$cbc_dec_done +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,68,15,56,222,193 +.byte 102,68,15,56,222,201 + movups 176-112(%rcx),%xmm1 + nop +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 +.byte 102,68,15,56,222,192 +.byte 102,68,15,56,222,200 + movups 192-112(%rcx),%xmm0 + je L$cbc_dec_done +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,68,15,56,222,193 +.byte 102,68,15,56,222,201 + movups 208-112(%rcx),%xmm1 + nop +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 +.byte 102,68,15,56,222,192 +.byte 102,68,15,56,222,200 + movups 224-112(%rcx),%xmm0 + jmp L$cbc_dec_done +.p2align 4 +L$cbc_dec_done: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 + pxor %xmm0,%xmm10 + pxor %xmm0,%xmm11 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 + pxor %xmm0,%xmm12 + pxor %xmm0,%xmm13 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 + pxor %xmm0,%xmm14 + pxor %xmm0,%xmm15 +.byte 102,68,15,56,222,193 +.byte 102,68,15,56,222,201 + movdqu 80(%rdi),%xmm1 + +.byte 102,65,15,56,223,210 + movdqu 96(%rdi),%xmm10 + pxor %xmm0,%xmm1 +.byte 102,65,15,56,223,219 + pxor %xmm0,%xmm10 + movdqu 112(%rdi),%xmm0 +.byte 102,65,15,56,223,228 + leaq 128(%rdi),%rdi + movdqu 0(%r11),%xmm11 +.byte 102,65,15,56,223,237 +.byte 102,65,15,56,223,246 + movdqu 16(%r11),%xmm12 + movdqu 32(%r11),%xmm13 +.byte 102,65,15,56,223,255 +.byte 102,68,15,56,223,193 + movdqu 48(%r11),%xmm14 + movdqu 64(%r11),%xmm15 +.byte 102,69,15,56,223,202 + movdqa %xmm0,%xmm10 + movdqu 80(%r11),%xmm1 + movups -112(%rcx),%xmm0 + + movups %xmm2,(%rsi) + movdqa %xmm11,%xmm2 + movups %xmm3,16(%rsi) + movdqa %xmm12,%xmm3 + movups %xmm4,32(%rsi) + movdqa %xmm13,%xmm4 + movups %xmm5,48(%rsi) + movdqa %xmm14,%xmm5 + movups %xmm6,64(%rsi) + movdqa %xmm15,%xmm6 + movups %xmm7,80(%rsi) + movdqa %xmm1,%xmm7 + movups %xmm8,96(%rsi) + leaq 112(%rsi),%rsi + + subq $128,%rdx + ja L$cbc_dec_loop8 + + movaps %xmm9,%xmm2 + leaq -112(%rcx),%rcx + addq $112,%rdx + jle L$cbc_dec_clear_tail_collected + movups %xmm9,(%rsi) + leaq 16(%rsi),%rsi + cmpq $80,%rdx + jbe L$cbc_dec_tail + + movaps %xmm11,%xmm2 +L$cbc_dec_six_or_seven: + cmpq $96,%rdx + ja L$cbc_dec_seven + + movaps %xmm7,%xmm8 + call _aesni_decrypt6 + pxor %xmm10,%xmm2 + movaps %xmm8,%xmm10 + pxor %xmm11,%xmm3 + movdqu %xmm2,(%rsi) + pxor %xmm12,%xmm4 + movdqu %xmm3,16(%rsi) + pxor %xmm3,%xmm3 + pxor %xmm13,%xmm5 + movdqu %xmm4,32(%rsi) + pxor %xmm4,%xmm4 + pxor %xmm14,%xmm6 + movdqu %xmm5,48(%rsi) + pxor %xmm5,%xmm5 + pxor %xmm15,%xmm7 + movdqu %xmm6,64(%rsi) + pxor %xmm6,%xmm6 + leaq 80(%rsi),%rsi + movdqa %xmm7,%xmm2 + pxor %xmm7,%xmm7 + jmp L$cbc_dec_tail_collected + +.p2align 4 +L$cbc_dec_seven: + movups 96(%rdi),%xmm8 + xorps %xmm9,%xmm9 + call _aesni_decrypt8 + movups 80(%rdi),%xmm9 + pxor %xmm10,%xmm2 + movups 96(%rdi),%xmm10 + pxor %xmm11,%xmm3 + movdqu %xmm2,(%rsi) + pxor %xmm12,%xmm4 + movdqu %xmm3,16(%rsi) + pxor %xmm3,%xmm3 + pxor %xmm13,%xmm5 + movdqu %xmm4,32(%rsi) + pxor %xmm4,%xmm4 + pxor %xmm14,%xmm6 + movdqu %xmm5,48(%rsi) + pxor %xmm5,%xmm5 + pxor %xmm15,%xmm7 + movdqu %xmm6,64(%rsi) + pxor %xmm6,%xmm6 + pxor %xmm9,%xmm8 + movdqu %xmm7,80(%rsi) + pxor %xmm7,%xmm7 + leaq 96(%rsi),%rsi + movdqa %xmm8,%xmm2 + pxor %xmm8,%xmm8 + pxor %xmm9,%xmm9 + jmp L$cbc_dec_tail_collected + +.p2align 4 +L$cbc_dec_loop6: + movups %xmm7,(%rsi) + leaq 16(%rsi),%rsi + movdqu 0(%rdi),%xmm2 + movdqu 16(%rdi),%xmm3 + movdqa %xmm2,%xmm11 + movdqu 32(%rdi),%xmm4 + movdqa %xmm3,%xmm12 + movdqu 48(%rdi),%xmm5 + movdqa %xmm4,%xmm13 + movdqu 64(%rdi),%xmm6 + movdqa %xmm5,%xmm14 + movdqu 80(%rdi),%xmm7 + movdqa %xmm6,%xmm15 +L$cbc_dec_loop6_enter: + leaq 96(%rdi),%rdi + movdqa %xmm7,%xmm8 + + call _aesni_decrypt6 + + pxor %xmm10,%xmm2 + movdqa %xmm8,%xmm10 + pxor %xmm11,%xmm3 + movdqu %xmm2,(%rsi) + pxor %xmm12,%xmm4 + movdqu %xmm3,16(%rsi) + pxor %xmm13,%xmm5 + movdqu %xmm4,32(%rsi) + pxor %xmm14,%xmm6 + movq %r11,%rcx + movdqu %xmm5,48(%rsi) + pxor %xmm15,%xmm7 + movl %r10d,%eax + movdqu %xmm6,64(%rsi) + leaq 80(%rsi),%rsi + subq $96,%rdx + ja L$cbc_dec_loop6 + + movdqa %xmm7,%xmm2 + addq $80,%rdx + jle L$cbc_dec_clear_tail_collected + movups %xmm7,(%rsi) + leaq 16(%rsi),%rsi + +L$cbc_dec_tail: + movups (%rdi),%xmm2 + subq $16,%rdx + jbe L$cbc_dec_one + + movups 16(%rdi),%xmm3 + movaps %xmm2,%xmm11 + subq $16,%rdx + jbe L$cbc_dec_two + + movups 32(%rdi),%xmm4 + movaps %xmm3,%xmm12 + subq $16,%rdx + jbe L$cbc_dec_three + + movups 48(%rdi),%xmm5 + movaps %xmm4,%xmm13 + subq $16,%rdx + jbe L$cbc_dec_four + + movups 64(%rdi),%xmm6 + movaps %xmm5,%xmm14 + movaps %xmm6,%xmm15 + xorps %xmm7,%xmm7 + call _aesni_decrypt6 + pxor %xmm10,%xmm2 + movaps %xmm15,%xmm10 + pxor %xmm11,%xmm3 + movdqu %xmm2,(%rsi) + pxor %xmm12,%xmm4 + movdqu %xmm3,16(%rsi) + pxor %xmm3,%xmm3 + pxor %xmm13,%xmm5 + movdqu %xmm4,32(%rsi) + pxor %xmm4,%xmm4 + pxor %xmm14,%xmm6 + movdqu %xmm5,48(%rsi) + pxor %xmm5,%xmm5 + leaq 64(%rsi),%rsi + movdqa %xmm6,%xmm2 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + subq $16,%rdx + jmp L$cbc_dec_tail_collected + +.p2align 4 +L$cbc_dec_one: + movaps %xmm2,%xmm11 + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +L$oop_dec1_17: +.byte 102,15,56,222,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz L$oop_dec1_17 +.byte 102,15,56,223,209 + xorps %xmm10,%xmm2 + movaps %xmm11,%xmm10 + jmp L$cbc_dec_tail_collected +.p2align 4 +L$cbc_dec_two: + movaps %xmm3,%xmm12 + call _aesni_decrypt2 + pxor %xmm10,%xmm2 + movaps %xmm12,%xmm10 + pxor %xmm11,%xmm3 + movdqu %xmm2,(%rsi) + movdqa %xmm3,%xmm2 + pxor %xmm3,%xmm3 + leaq 16(%rsi),%rsi + jmp L$cbc_dec_tail_collected +.p2align 4 +L$cbc_dec_three: + movaps %xmm4,%xmm13 + call _aesni_decrypt3 + pxor %xmm10,%xmm2 + movaps %xmm13,%xmm10 + pxor %xmm11,%xmm3 + movdqu %xmm2,(%rsi) + pxor %xmm12,%xmm4 + movdqu %xmm3,16(%rsi) + pxor %xmm3,%xmm3 + movdqa %xmm4,%xmm2 + pxor %xmm4,%xmm4 + leaq 32(%rsi),%rsi + jmp L$cbc_dec_tail_collected +.p2align 4 +L$cbc_dec_four: + movaps %xmm5,%xmm14 + call _aesni_decrypt4 + pxor %xmm10,%xmm2 + movaps %xmm14,%xmm10 + pxor %xmm11,%xmm3 + movdqu %xmm2,(%rsi) + pxor %xmm12,%xmm4 + movdqu %xmm3,16(%rsi) + pxor %xmm3,%xmm3 + pxor %xmm13,%xmm5 + movdqu %xmm4,32(%rsi) + pxor %xmm4,%xmm4 + movdqa %xmm5,%xmm2 + pxor %xmm5,%xmm5 + leaq 48(%rsi),%rsi + jmp L$cbc_dec_tail_collected + +.p2align 4 +L$cbc_dec_clear_tail_collected: + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + pxor %xmm8,%xmm8 + pxor %xmm9,%xmm9 +L$cbc_dec_tail_collected: + movups %xmm10,(%r8) + andq $15,%rdx + jnz L$cbc_dec_tail_partial + movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 + jmp L$cbc_dec_ret +.p2align 4 +L$cbc_dec_tail_partial: + movaps %xmm2,(%rsp) + pxor %xmm2,%xmm2 + movq $16,%rcx + movq %rsi,%rdi + subq %rdx,%rcx + leaq (%rsp),%rsi +.long 0x9066A4F3 + movdqa %xmm2,(%rsp) + +L$cbc_dec_ret: + xorps %xmm0,%xmm0 + pxor %xmm1,%xmm1 + leaq (%rbp),%rsp + popq %rbp +L$cbc_ret: + .byte 0xf3,0xc3 + +.globl _aesni_set_decrypt_key +.private_extern _aesni_set_decrypt_key + +.p2align 4 +_aesni_set_decrypt_key: +.byte 0x48,0x83,0xEC,0x08 + call __aesni_set_encrypt_key + shll $4,%esi + testl %eax,%eax + jnz L$dec_key_ret + leaq 16(%rdx,%rsi,1),%rdi + + movups (%rdx),%xmm0 + movups (%rdi),%xmm1 + movups %xmm0,(%rdi) + movups %xmm1,(%rdx) + leaq 16(%rdx),%rdx + leaq -16(%rdi),%rdi + +L$dec_key_inverse: + movups (%rdx),%xmm0 + movups (%rdi),%xmm1 +.byte 102,15,56,219,192 +.byte 102,15,56,219,201 + leaq 16(%rdx),%rdx + leaq -16(%rdi),%rdi + movups %xmm0,16(%rdi) + movups %xmm1,-16(%rdx) + cmpq %rdx,%rdi + ja L$dec_key_inverse + + movups (%rdx),%xmm0 +.byte 102,15,56,219,192 + pxor %xmm1,%xmm1 + movups %xmm0,(%rdi) + pxor %xmm0,%xmm0 +L$dec_key_ret: + addq $8,%rsp + .byte 0xf3,0xc3 +L$SEH_end_set_decrypt_key: + +.globl _aesni_set_encrypt_key +.private_extern _aesni_set_encrypt_key + +.p2align 4 +_aesni_set_encrypt_key: +__aesni_set_encrypt_key: +.byte 0x48,0x83,0xEC,0x08 + movq $-1,%rax + testq %rdi,%rdi + jz L$enc_key_ret + testq %rdx,%rdx + jz L$enc_key_ret + + movl $268437504,%r10d + movups (%rdi),%xmm0 + xorps %xmm4,%xmm4 + andl _OPENSSL_ia32cap_P+4(%rip),%r10d + leaq 16(%rdx),%rax + cmpl $256,%esi + je L$14rounds + cmpl $192,%esi + je L$12rounds + cmpl $128,%esi + jne L$bad_keybits + +L$10rounds: + movl $9,%esi + cmpl $268435456,%r10d + je L$10rounds_alt + + movups %xmm0,(%rdx) +.byte 102,15,58,223,200,1 + call L$key_expansion_128_cold +.byte 102,15,58,223,200,2 + call L$key_expansion_128 +.byte 102,15,58,223,200,4 + call L$key_expansion_128 +.byte 102,15,58,223,200,8 + call L$key_expansion_128 +.byte 102,15,58,223,200,16 + call L$key_expansion_128 +.byte 102,15,58,223,200,32 + call L$key_expansion_128 +.byte 102,15,58,223,200,64 + call L$key_expansion_128 +.byte 102,15,58,223,200,128 + call L$key_expansion_128 +.byte 102,15,58,223,200,27 + call L$key_expansion_128 +.byte 102,15,58,223,200,54 + call L$key_expansion_128 + movups %xmm0,(%rax) + movl %esi,80(%rax) + xorl %eax,%eax + jmp L$enc_key_ret + +.p2align 4 +L$10rounds_alt: + movdqa L$key_rotate(%rip),%xmm5 + movl $8,%r10d + movdqa L$key_rcon1(%rip),%xmm4 + movdqa %xmm0,%xmm2 + movdqu %xmm0,(%rdx) + jmp L$oop_key128 + +.p2align 4 +L$oop_key128: +.byte 102,15,56,0,197 +.byte 102,15,56,221,196 + pslld $1,%xmm4 + leaq 16(%rax),%rax + + movdqa %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm3,%xmm2 + + pxor %xmm2,%xmm0 + movdqu %xmm0,-16(%rax) + movdqa %xmm0,%xmm2 + + decl %r10d + jnz L$oop_key128 + + movdqa L$key_rcon1b(%rip),%xmm4 + +.byte 102,15,56,0,197 +.byte 102,15,56,221,196 + pslld $1,%xmm4 + + movdqa %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm3,%xmm2 + + pxor %xmm2,%xmm0 + movdqu %xmm0,(%rax) + + movdqa %xmm0,%xmm2 +.byte 102,15,56,0,197 +.byte 102,15,56,221,196 + + movdqa %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm3,%xmm2 + + pxor %xmm2,%xmm0 + movdqu %xmm0,16(%rax) + + movl %esi,96(%rax) + xorl %eax,%eax + jmp L$enc_key_ret + +.p2align 4 +L$12rounds: + movq 16(%rdi),%xmm2 + movl $11,%esi + cmpl $268435456,%r10d + je L$12rounds_alt + + movups %xmm0,(%rdx) +.byte 102,15,58,223,202,1 + call L$key_expansion_192a_cold +.byte 102,15,58,223,202,2 + call L$key_expansion_192b +.byte 102,15,58,223,202,4 + call L$key_expansion_192a +.byte 102,15,58,223,202,8 + call L$key_expansion_192b +.byte 102,15,58,223,202,16 + call L$key_expansion_192a +.byte 102,15,58,223,202,32 + call L$key_expansion_192b +.byte 102,15,58,223,202,64 + call L$key_expansion_192a +.byte 102,15,58,223,202,128 + call L$key_expansion_192b + movups %xmm0,(%rax) + movl %esi,48(%rax) + xorq %rax,%rax + jmp L$enc_key_ret + +.p2align 4 +L$12rounds_alt: + movdqa L$key_rotate192(%rip),%xmm5 + movdqa L$key_rcon1(%rip),%xmm4 + movl $8,%r10d + movdqu %xmm0,(%rdx) + jmp L$oop_key192 + +.p2align 4 +L$oop_key192: + movq %xmm2,0(%rax) + movdqa %xmm2,%xmm1 +.byte 102,15,56,0,213 +.byte 102,15,56,221,212 + pslld $1,%xmm4 + leaq 24(%rax),%rax + + movdqa %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm3,%xmm0 + + pshufd $255,%xmm0,%xmm3 + pxor %xmm1,%xmm3 + pslldq $4,%xmm1 + pxor %xmm1,%xmm3 + + pxor %xmm2,%xmm0 + pxor %xmm3,%xmm2 + movdqu %xmm0,-16(%rax) + + decl %r10d + jnz L$oop_key192 + + movl %esi,32(%rax) + xorl %eax,%eax + jmp L$enc_key_ret + +.p2align 4 +L$14rounds: + movups 16(%rdi),%xmm2 + movl $13,%esi + leaq 16(%rax),%rax + cmpl $268435456,%r10d + je L$14rounds_alt + + movups %xmm0,(%rdx) + movups %xmm2,16(%rdx) +.byte 102,15,58,223,202,1 + call L$key_expansion_256a_cold +.byte 102,15,58,223,200,1 + call L$key_expansion_256b +.byte 102,15,58,223,202,2 + call L$key_expansion_256a +.byte 102,15,58,223,200,2 + call L$key_expansion_256b +.byte 102,15,58,223,202,4 + call L$key_expansion_256a +.byte 102,15,58,223,200,4 + call L$key_expansion_256b +.byte 102,15,58,223,202,8 + call L$key_expansion_256a +.byte 102,15,58,223,200,8 + call L$key_expansion_256b +.byte 102,15,58,223,202,16 + call L$key_expansion_256a +.byte 102,15,58,223,200,16 + call L$key_expansion_256b +.byte 102,15,58,223,202,32 + call L$key_expansion_256a +.byte 102,15,58,223,200,32 + call L$key_expansion_256b +.byte 102,15,58,223,202,64 + call L$key_expansion_256a + movups %xmm0,(%rax) + movl %esi,16(%rax) + xorq %rax,%rax + jmp L$enc_key_ret + +.p2align 4 +L$14rounds_alt: + movdqa L$key_rotate(%rip),%xmm5 + movdqa L$key_rcon1(%rip),%xmm4 + movl $7,%r10d + movdqu %xmm0,0(%rdx) + movdqa %xmm2,%xmm1 + movdqu %xmm2,16(%rdx) + jmp L$oop_key256 + +.p2align 4 +L$oop_key256: +.byte 102,15,56,0,213 +.byte 102,15,56,221,212 + + movdqa %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm3,%xmm0 + pslld $1,%xmm4 + + pxor %xmm2,%xmm0 + movdqu %xmm0,(%rax) + + decl %r10d + jz L$done_key256 + + pshufd $255,%xmm0,%xmm2 + pxor %xmm3,%xmm3 +.byte 102,15,56,221,211 + + movdqa %xmm1,%xmm3 + pslldq $4,%xmm1 + pxor %xmm1,%xmm3 + pslldq $4,%xmm1 + pxor %xmm1,%xmm3 + pslldq $4,%xmm1 + pxor %xmm3,%xmm1 + + pxor %xmm1,%xmm2 + movdqu %xmm2,16(%rax) + leaq 32(%rax),%rax + movdqa %xmm2,%xmm1 + + jmp L$oop_key256 + +L$done_key256: + movl %esi,16(%rax) + xorl %eax,%eax + jmp L$enc_key_ret + +.p2align 4 +L$bad_keybits: + movq $-2,%rax +L$enc_key_ret: + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + addq $8,%rsp + .byte 0xf3,0xc3 +L$SEH_end_set_encrypt_key: + +.p2align 4 +L$key_expansion_128: + movups %xmm0,(%rax) + leaq 16(%rax),%rax +L$key_expansion_128_cold: + shufps $16,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $140,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $255,%xmm1,%xmm1 + xorps %xmm1,%xmm0 + .byte 0xf3,0xc3 + +.p2align 4 +L$key_expansion_192a: + movups %xmm0,(%rax) + leaq 16(%rax),%rax +L$key_expansion_192a_cold: + movaps %xmm2,%xmm5 +L$key_expansion_192b_warm: + shufps $16,%xmm0,%xmm4 + movdqa %xmm2,%xmm3 + xorps %xmm4,%xmm0 + shufps $140,%xmm0,%xmm4 + pslldq $4,%xmm3 + xorps %xmm4,%xmm0 + pshufd $85,%xmm1,%xmm1 + pxor %xmm3,%xmm2 + pxor %xmm1,%xmm0 + pshufd $255,%xmm0,%xmm3 + pxor %xmm3,%xmm2 + .byte 0xf3,0xc3 + +.p2align 4 +L$key_expansion_192b: + movaps %xmm0,%xmm3 + shufps $68,%xmm0,%xmm5 + movups %xmm5,(%rax) + shufps $78,%xmm2,%xmm3 + movups %xmm3,16(%rax) + leaq 32(%rax),%rax + jmp L$key_expansion_192b_warm + +.p2align 4 +L$key_expansion_256a: + movups %xmm2,(%rax) + leaq 16(%rax),%rax +L$key_expansion_256a_cold: + shufps $16,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $140,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $255,%xmm1,%xmm1 + xorps %xmm1,%xmm0 + .byte 0xf3,0xc3 + +.p2align 4 +L$key_expansion_256b: + movups %xmm0,(%rax) + leaq 16(%rax),%rax + + shufps $16,%xmm2,%xmm4 + xorps %xmm4,%xmm2 + shufps $140,%xmm2,%xmm4 + xorps %xmm4,%xmm2 + shufps $170,%xmm1,%xmm1 + xorps %xmm1,%xmm2 + .byte 0xf3,0xc3 + + +.p2align 6 +L$bswap_mask: +.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +L$increment32: +.long 6,6,6,0 +L$increment64: +.long 1,0,0,0 +L$xts_magic: +.long 0x87,0,1,0 +L$increment1: +.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 +L$key_rotate: +.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d +L$key_rotate192: +.long 0x04070605,0x04070605,0x04070605,0x04070605 +L$key_rcon1: +.long 1,1,1,1 +L$key_rcon1b: +.long 0x1b,0x1b,0x1b,0x1b + +.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.p2align 6 +#endif diff --git a/third_party/boringssl/mac-x86_64/crypto/aes/bsaes-x86_64.S b/third_party/boringssl/mac-x86_64/crypto/aes/bsaes-x86_64.S new file mode 100644 index 00000000000..c2d04776a9b --- /dev/null +++ b/third_party/boringssl/mac-x86_64/crypto/aes/bsaes-x86_64.S @@ -0,0 +1,2504 @@ +#if defined(__x86_64__) +.text + + + + + +.p2align 6 +_bsaes_encrypt8: + leaq L$BS0(%rip),%r11 + + movdqa (%rax),%xmm8 + leaq 16(%rax),%rax + movdqa 80(%r11),%xmm7 + pxor %xmm8,%xmm15 + pxor %xmm8,%xmm0 + pxor %xmm8,%xmm1 + pxor %xmm8,%xmm2 +.byte 102,68,15,56,0,255 +.byte 102,15,56,0,199 + pxor %xmm8,%xmm3 + pxor %xmm8,%xmm4 +.byte 102,15,56,0,207 +.byte 102,15,56,0,215 + pxor %xmm8,%xmm5 + pxor %xmm8,%xmm6 +.byte 102,15,56,0,223 +.byte 102,15,56,0,231 +.byte 102,15,56,0,239 +.byte 102,15,56,0,247 +_bsaes_encrypt8_bitslice: + movdqa 0(%r11),%xmm7 + movdqa 16(%r11),%xmm8 + movdqa %xmm5,%xmm9 + psrlq $1,%xmm5 + movdqa %xmm3,%xmm10 + psrlq $1,%xmm3 + pxor %xmm6,%xmm5 + pxor %xmm4,%xmm3 + pand %xmm7,%xmm5 + pand %xmm7,%xmm3 + pxor %xmm5,%xmm6 + psllq $1,%xmm5 + pxor %xmm3,%xmm4 + psllq $1,%xmm3 + pxor %xmm9,%xmm5 + pxor %xmm10,%xmm3 + movdqa %xmm1,%xmm9 + psrlq $1,%xmm1 + movdqa %xmm15,%xmm10 + psrlq $1,%xmm15 + pxor %xmm2,%xmm1 + pxor %xmm0,%xmm15 + pand %xmm7,%xmm1 + pand %xmm7,%xmm15 + pxor %xmm1,%xmm2 + psllq $1,%xmm1 + pxor %xmm15,%xmm0 + psllq $1,%xmm15 + pxor %xmm9,%xmm1 + pxor %xmm10,%xmm15 + movdqa 32(%r11),%xmm7 + movdqa %xmm4,%xmm9 + psrlq $2,%xmm4 + movdqa %xmm3,%xmm10 + psrlq $2,%xmm3 + pxor %xmm6,%xmm4 + pxor %xmm5,%xmm3 + pand %xmm8,%xmm4 + pand %xmm8,%xmm3 + pxor %xmm4,%xmm6 + psllq $2,%xmm4 + pxor %xmm3,%xmm5 + psllq $2,%xmm3 + pxor %xmm9,%xmm4 + pxor %xmm10,%xmm3 + movdqa %xmm0,%xmm9 + psrlq $2,%xmm0 + movdqa %xmm15,%xmm10 + psrlq $2,%xmm15 + pxor %xmm2,%xmm0 + pxor %xmm1,%xmm15 + pand %xmm8,%xmm0 + pand %xmm8,%xmm15 + pxor %xmm0,%xmm2 + psllq $2,%xmm0 + pxor %xmm15,%xmm1 + psllq $2,%xmm15 + pxor %xmm9,%xmm0 + pxor %xmm10,%xmm15 + movdqa %xmm2,%xmm9 + psrlq $4,%xmm2 + movdqa %xmm1,%xmm10 + psrlq $4,%xmm1 + pxor %xmm6,%xmm2 + pxor %xmm5,%xmm1 + pand %xmm7,%xmm2 + pand %xmm7,%xmm1 + pxor %xmm2,%xmm6 + psllq $4,%xmm2 + pxor %xmm1,%xmm5 + psllq $4,%xmm1 + pxor %xmm9,%xmm2 + pxor %xmm10,%xmm1 + movdqa %xmm0,%xmm9 + psrlq $4,%xmm0 + movdqa %xmm15,%xmm10 + psrlq $4,%xmm15 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm15 + pand %xmm7,%xmm0 + pand %xmm7,%xmm15 + pxor %xmm0,%xmm4 + psllq $4,%xmm0 + pxor %xmm15,%xmm3 + psllq $4,%xmm15 + pxor %xmm9,%xmm0 + pxor %xmm10,%xmm15 + decl %r10d + jmp L$enc_sbox +.p2align 4 +L$enc_loop: + pxor 0(%rax),%xmm15 + pxor 16(%rax),%xmm0 + pxor 32(%rax),%xmm1 + pxor 48(%rax),%xmm2 +.byte 102,68,15,56,0,255 +.byte 102,15,56,0,199 + pxor 64(%rax),%xmm3 + pxor 80(%rax),%xmm4 +.byte 102,15,56,0,207 +.byte 102,15,56,0,215 + pxor 96(%rax),%xmm5 + pxor 112(%rax),%xmm6 +.byte 102,15,56,0,223 +.byte 102,15,56,0,231 +.byte 102,15,56,0,239 +.byte 102,15,56,0,247 + leaq 128(%rax),%rax +L$enc_sbox: + pxor %xmm5,%xmm4 + pxor %xmm0,%xmm1 + pxor %xmm15,%xmm2 + pxor %xmm1,%xmm5 + pxor %xmm15,%xmm4 + + pxor %xmm2,%xmm5 + pxor %xmm6,%xmm2 + pxor %xmm4,%xmm6 + pxor %xmm3,%xmm2 + pxor %xmm4,%xmm3 + pxor %xmm0,%xmm2 + + pxor %xmm6,%xmm1 + pxor %xmm4,%xmm0 + movdqa %xmm6,%xmm10 + movdqa %xmm0,%xmm9 + movdqa %xmm4,%xmm8 + movdqa %xmm1,%xmm12 + movdqa %xmm5,%xmm11 + + pxor %xmm3,%xmm10 + pxor %xmm1,%xmm9 + pxor %xmm2,%xmm8 + movdqa %xmm10,%xmm13 + pxor %xmm3,%xmm12 + movdqa %xmm9,%xmm7 + pxor %xmm15,%xmm11 + movdqa %xmm10,%xmm14 + + por %xmm8,%xmm9 + por %xmm11,%xmm10 + pxor %xmm7,%xmm14 + pand %xmm11,%xmm13 + pxor %xmm8,%xmm11 + pand %xmm8,%xmm7 + pand %xmm11,%xmm14 + movdqa %xmm2,%xmm11 + pxor %xmm15,%xmm11 + pand %xmm11,%xmm12 + pxor %xmm12,%xmm10 + pxor %xmm12,%xmm9 + movdqa %xmm6,%xmm12 + movdqa %xmm4,%xmm11 + pxor %xmm0,%xmm12 + pxor %xmm5,%xmm11 + movdqa %xmm12,%xmm8 + pand %xmm11,%xmm12 + por %xmm11,%xmm8 + pxor %xmm12,%xmm7 + pxor %xmm14,%xmm10 + pxor %xmm13,%xmm9 + pxor %xmm14,%xmm8 + movdqa %xmm1,%xmm11 + pxor %xmm13,%xmm7 + movdqa %xmm3,%xmm12 + pxor %xmm13,%xmm8 + movdqa %xmm0,%xmm13 + pand %xmm2,%xmm11 + movdqa %xmm6,%xmm14 + pand %xmm15,%xmm12 + pand %xmm4,%xmm13 + por %xmm5,%xmm14 + pxor %xmm11,%xmm10 + pxor %xmm12,%xmm9 + pxor %xmm13,%xmm8 + pxor %xmm14,%xmm7 + + + + + + movdqa %xmm10,%xmm11 + pand %xmm8,%xmm10 + pxor %xmm9,%xmm11 + + movdqa %xmm7,%xmm13 + movdqa %xmm11,%xmm14 + pxor %xmm10,%xmm13 + pand %xmm13,%xmm14 + + movdqa %xmm8,%xmm12 + pxor %xmm9,%xmm14 + pxor %xmm7,%xmm12 + + pxor %xmm9,%xmm10 + + pand %xmm10,%xmm12 + + movdqa %xmm13,%xmm9 + pxor %xmm7,%xmm12 + + pxor %xmm12,%xmm9 + pxor %xmm12,%xmm8 + + pand %xmm7,%xmm9 + + pxor %xmm9,%xmm13 + pxor %xmm9,%xmm8 + + pand %xmm14,%xmm13 + + pxor %xmm11,%xmm13 + movdqa %xmm5,%xmm11 + movdqa %xmm4,%xmm7 + movdqa %xmm14,%xmm9 + pxor %xmm13,%xmm9 + pand %xmm5,%xmm9 + pxor %xmm4,%xmm5 + pand %xmm14,%xmm4 + pand %xmm13,%xmm5 + pxor %xmm4,%xmm5 + pxor %xmm9,%xmm4 + pxor %xmm15,%xmm11 + pxor %xmm2,%xmm7 + pxor %xmm12,%xmm14 + pxor %xmm8,%xmm13 + movdqa %xmm14,%xmm10 + movdqa %xmm12,%xmm9 + pxor %xmm13,%xmm10 + pxor %xmm8,%xmm9 + pand %xmm11,%xmm10 + pand %xmm15,%xmm9 + pxor %xmm7,%xmm11 + pxor %xmm2,%xmm15 + pand %xmm14,%xmm7 + pand %xmm12,%xmm2 + pand %xmm13,%xmm11 + pand %xmm8,%xmm15 + pxor %xmm11,%xmm7 + pxor %xmm2,%xmm15 + pxor %xmm10,%xmm11 + pxor %xmm9,%xmm2 + pxor %xmm11,%xmm5 + pxor %xmm11,%xmm15 + pxor %xmm7,%xmm4 + pxor %xmm7,%xmm2 + + movdqa %xmm6,%xmm11 + movdqa %xmm0,%xmm7 + pxor %xmm3,%xmm11 + pxor %xmm1,%xmm7 + movdqa %xmm14,%xmm10 + movdqa %xmm12,%xmm9 + pxor %xmm13,%xmm10 + pxor %xmm8,%xmm9 + pand %xmm11,%xmm10 + pand %xmm3,%xmm9 + pxor %xmm7,%xmm11 + pxor %xmm1,%xmm3 + pand %xmm14,%xmm7 + pand %xmm12,%xmm1 + pand %xmm13,%xmm11 + pand %xmm8,%xmm3 + pxor %xmm11,%xmm7 + pxor %xmm1,%xmm3 + pxor %xmm10,%xmm11 + pxor %xmm9,%xmm1 + pxor %xmm12,%xmm14 + pxor %xmm8,%xmm13 + movdqa %xmm14,%xmm10 + pxor %xmm13,%xmm10 + pand %xmm6,%xmm10 + pxor %xmm0,%xmm6 + pand %xmm14,%xmm0 + pand %xmm13,%xmm6 + pxor %xmm0,%xmm6 + pxor %xmm10,%xmm0 + pxor %xmm11,%xmm6 + pxor %xmm11,%xmm3 + pxor %xmm7,%xmm0 + pxor %xmm7,%xmm1 + pxor %xmm15,%xmm6 + pxor %xmm5,%xmm0 + pxor %xmm6,%xmm3 + pxor %xmm15,%xmm5 + pxor %xmm0,%xmm15 + + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm4 + pxor %xmm2,%xmm1 + pxor %xmm4,%xmm2 + pxor %xmm4,%xmm3 + + pxor %xmm2,%xmm5 + decl %r10d + jl L$enc_done + pshufd $147,%xmm15,%xmm7 + pshufd $147,%xmm0,%xmm8 + pxor %xmm7,%xmm15 + pshufd $147,%xmm3,%xmm9 + pxor %xmm8,%xmm0 + pshufd $147,%xmm5,%xmm10 + pxor %xmm9,%xmm3 + pshufd $147,%xmm2,%xmm11 + pxor %xmm10,%xmm5 + pshufd $147,%xmm6,%xmm12 + pxor %xmm11,%xmm2 + pshufd $147,%xmm1,%xmm13 + pxor %xmm12,%xmm6 + pshufd $147,%xmm4,%xmm14 + pxor %xmm13,%xmm1 + pxor %xmm14,%xmm4 + + pxor %xmm15,%xmm8 + pxor %xmm4,%xmm7 + pxor %xmm4,%xmm8 + pshufd $78,%xmm15,%xmm15 + pxor %xmm0,%xmm9 + pshufd $78,%xmm0,%xmm0 + pxor %xmm2,%xmm12 + pxor %xmm7,%xmm15 + pxor %xmm6,%xmm13 + pxor %xmm8,%xmm0 + pxor %xmm5,%xmm11 + pshufd $78,%xmm2,%xmm7 + pxor %xmm1,%xmm14 + pshufd $78,%xmm6,%xmm8 + pxor %xmm3,%xmm10 + pshufd $78,%xmm5,%xmm2 + pxor %xmm4,%xmm10 + pshufd $78,%xmm4,%xmm6 + pxor %xmm4,%xmm11 + pshufd $78,%xmm1,%xmm5 + pxor %xmm11,%xmm7 + pshufd $78,%xmm3,%xmm1 + pxor %xmm12,%xmm8 + pxor %xmm10,%xmm2 + pxor %xmm14,%xmm6 + pxor %xmm13,%xmm5 + movdqa %xmm7,%xmm3 + pxor %xmm9,%xmm1 + movdqa %xmm8,%xmm4 + movdqa 48(%r11),%xmm7 + jnz L$enc_loop + movdqa 64(%r11),%xmm7 + jmp L$enc_loop +.p2align 4 +L$enc_done: + movdqa 0(%r11),%xmm7 + movdqa 16(%r11),%xmm8 + movdqa %xmm1,%xmm9 + psrlq $1,%xmm1 + movdqa %xmm2,%xmm10 + psrlq $1,%xmm2 + pxor %xmm4,%xmm1 + pxor %xmm6,%xmm2 + pand %xmm7,%xmm1 + pand %xmm7,%xmm2 + pxor %xmm1,%xmm4 + psllq $1,%xmm1 + pxor %xmm2,%xmm6 + psllq $1,%xmm2 + pxor %xmm9,%xmm1 + pxor %xmm10,%xmm2 + movdqa %xmm3,%xmm9 + psrlq $1,%xmm3 + movdqa %xmm15,%xmm10 + psrlq $1,%xmm15 + pxor %xmm5,%xmm3 + pxor %xmm0,%xmm15 + pand %xmm7,%xmm3 + pand %xmm7,%xmm15 + pxor %xmm3,%xmm5 + psllq $1,%xmm3 + pxor %xmm15,%xmm0 + psllq $1,%xmm15 + pxor %xmm9,%xmm3 + pxor %xmm10,%xmm15 + movdqa 32(%r11),%xmm7 + movdqa %xmm6,%xmm9 + psrlq $2,%xmm6 + movdqa %xmm2,%xmm10 + psrlq $2,%xmm2 + pxor %xmm4,%xmm6 + pxor %xmm1,%xmm2 + pand %xmm8,%xmm6 + pand %xmm8,%xmm2 + pxor %xmm6,%xmm4 + psllq $2,%xmm6 + pxor %xmm2,%xmm1 + psllq $2,%xmm2 + pxor %xmm9,%xmm6 + pxor %xmm10,%xmm2 + movdqa %xmm0,%xmm9 + psrlq $2,%xmm0 + movdqa %xmm15,%xmm10 + psrlq $2,%xmm15 + pxor %xmm5,%xmm0 + pxor %xmm3,%xmm15 + pand %xmm8,%xmm0 + pand %xmm8,%xmm15 + pxor %xmm0,%xmm5 + psllq $2,%xmm0 + pxor %xmm15,%xmm3 + psllq $2,%xmm15 + pxor %xmm9,%xmm0 + pxor %xmm10,%xmm15 + movdqa %xmm5,%xmm9 + psrlq $4,%xmm5 + movdqa %xmm3,%xmm10 + psrlq $4,%xmm3 + pxor %xmm4,%xmm5 + pxor %xmm1,%xmm3 + pand %xmm7,%xmm5 + pand %xmm7,%xmm3 + pxor %xmm5,%xmm4 + psllq $4,%xmm5 + pxor %xmm3,%xmm1 + psllq $4,%xmm3 + pxor %xmm9,%xmm5 + pxor %xmm10,%xmm3 + movdqa %xmm0,%xmm9 + psrlq $4,%xmm0 + movdqa %xmm15,%xmm10 + psrlq $4,%xmm15 + pxor %xmm6,%xmm0 + pxor %xmm2,%xmm15 + pand %xmm7,%xmm0 + pand %xmm7,%xmm15 + pxor %xmm0,%xmm6 + psllq $4,%xmm0 + pxor %xmm15,%xmm2 + psllq $4,%xmm15 + pxor %xmm9,%xmm0 + pxor %xmm10,%xmm15 + movdqa (%rax),%xmm7 + pxor %xmm7,%xmm3 + pxor %xmm7,%xmm5 + pxor %xmm7,%xmm2 + pxor %xmm7,%xmm6 + pxor %xmm7,%xmm1 + pxor %xmm7,%xmm4 + pxor %xmm7,%xmm15 + pxor %xmm7,%xmm0 + .byte 0xf3,0xc3 + + + +.p2align 6 +_bsaes_decrypt8: + leaq L$BS0(%rip),%r11 + + movdqa (%rax),%xmm8 + leaq 16(%rax),%rax + movdqa -48(%r11),%xmm7 + pxor %xmm8,%xmm15 + pxor %xmm8,%xmm0 + pxor %xmm8,%xmm1 + pxor %xmm8,%xmm2 +.byte 102,68,15,56,0,255 +.byte 102,15,56,0,199 + pxor %xmm8,%xmm3 + pxor %xmm8,%xmm4 +.byte 102,15,56,0,207 +.byte 102,15,56,0,215 + pxor %xmm8,%xmm5 + pxor %xmm8,%xmm6 +.byte 102,15,56,0,223 +.byte 102,15,56,0,231 +.byte 102,15,56,0,239 +.byte 102,15,56,0,247 + movdqa 0(%r11),%xmm7 + movdqa 16(%r11),%xmm8 + movdqa %xmm5,%xmm9 + psrlq $1,%xmm5 + movdqa %xmm3,%xmm10 + psrlq $1,%xmm3 + pxor %xmm6,%xmm5 + pxor %xmm4,%xmm3 + pand %xmm7,%xmm5 + pand %xmm7,%xmm3 + pxor %xmm5,%xmm6 + psllq $1,%xmm5 + pxor %xmm3,%xmm4 + psllq $1,%xmm3 + pxor %xmm9,%xmm5 + pxor %xmm10,%xmm3 + movdqa %xmm1,%xmm9 + psrlq $1,%xmm1 + movdqa %xmm15,%xmm10 + psrlq $1,%xmm15 + pxor %xmm2,%xmm1 + pxor %xmm0,%xmm15 + pand %xmm7,%xmm1 + pand %xmm7,%xmm15 + pxor %xmm1,%xmm2 + psllq $1,%xmm1 + pxor %xmm15,%xmm0 + psllq $1,%xmm15 + pxor %xmm9,%xmm1 + pxor %xmm10,%xmm15 + movdqa 32(%r11),%xmm7 + movdqa %xmm4,%xmm9 + psrlq $2,%xmm4 + movdqa %xmm3,%xmm10 + psrlq $2,%xmm3 + pxor %xmm6,%xmm4 + pxor %xmm5,%xmm3 + pand %xmm8,%xmm4 + pand %xmm8,%xmm3 + pxor %xmm4,%xmm6 + psllq $2,%xmm4 + pxor %xmm3,%xmm5 + psllq $2,%xmm3 + pxor %xmm9,%xmm4 + pxor %xmm10,%xmm3 + movdqa %xmm0,%xmm9 + psrlq $2,%xmm0 + movdqa %xmm15,%xmm10 + psrlq $2,%xmm15 + pxor %xmm2,%xmm0 + pxor %xmm1,%xmm15 + pand %xmm8,%xmm0 + pand %xmm8,%xmm15 + pxor %xmm0,%xmm2 + psllq $2,%xmm0 + pxor %xmm15,%xmm1 + psllq $2,%xmm15 + pxor %xmm9,%xmm0 + pxor %xmm10,%xmm15 + movdqa %xmm2,%xmm9 + psrlq $4,%xmm2 + movdqa %xmm1,%xmm10 + psrlq $4,%xmm1 + pxor %xmm6,%xmm2 + pxor %xmm5,%xmm1 + pand %xmm7,%xmm2 + pand %xmm7,%xmm1 + pxor %xmm2,%xmm6 + psllq $4,%xmm2 + pxor %xmm1,%xmm5 + psllq $4,%xmm1 + pxor %xmm9,%xmm2 + pxor %xmm10,%xmm1 + movdqa %xmm0,%xmm9 + psrlq $4,%xmm0 + movdqa %xmm15,%xmm10 + psrlq $4,%xmm15 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm15 + pand %xmm7,%xmm0 + pand %xmm7,%xmm15 + pxor %xmm0,%xmm4 + psllq $4,%xmm0 + pxor %xmm15,%xmm3 + psllq $4,%xmm15 + pxor %xmm9,%xmm0 + pxor %xmm10,%xmm15 + decl %r10d + jmp L$dec_sbox +.p2align 4 +L$dec_loop: + pxor 0(%rax),%xmm15 + pxor 16(%rax),%xmm0 + pxor 32(%rax),%xmm1 + pxor 48(%rax),%xmm2 +.byte 102,68,15,56,0,255 +.byte 102,15,56,0,199 + pxor 64(%rax),%xmm3 + pxor 80(%rax),%xmm4 +.byte 102,15,56,0,207 +.byte 102,15,56,0,215 + pxor 96(%rax),%xmm5 + pxor 112(%rax),%xmm6 +.byte 102,15,56,0,223 +.byte 102,15,56,0,231 +.byte 102,15,56,0,239 +.byte 102,15,56,0,247 + leaq 128(%rax),%rax +L$dec_sbox: + pxor %xmm3,%xmm2 + + pxor %xmm6,%xmm3 + pxor %xmm6,%xmm1 + pxor %xmm3,%xmm5 + pxor %xmm5,%xmm6 + pxor %xmm6,%xmm0 + + pxor %xmm0,%xmm15 + pxor %xmm4,%xmm1 + pxor %xmm15,%xmm2 + pxor %xmm15,%xmm4 + pxor %xmm2,%xmm0 + movdqa %xmm2,%xmm10 + movdqa %xmm6,%xmm9 + movdqa %xmm0,%xmm8 + movdqa %xmm3,%xmm12 + movdqa %xmm4,%xmm11 + + pxor %xmm15,%xmm10 + pxor %xmm3,%xmm9 + pxor %xmm5,%xmm8 + movdqa %xmm10,%xmm13 + pxor %xmm15,%xmm12 + movdqa %xmm9,%xmm7 + pxor %xmm1,%xmm11 + movdqa %xmm10,%xmm14 + + por %xmm8,%xmm9 + por %xmm11,%xmm10 + pxor %xmm7,%xmm14 + pand %xmm11,%xmm13 + pxor %xmm8,%xmm11 + pand %xmm8,%xmm7 + pand %xmm11,%xmm14 + movdqa %xmm5,%xmm11 + pxor %xmm1,%xmm11 + pand %xmm11,%xmm12 + pxor %xmm12,%xmm10 + pxor %xmm12,%xmm9 + movdqa %xmm2,%xmm12 + movdqa %xmm0,%xmm11 + pxor %xmm6,%xmm12 + pxor %xmm4,%xmm11 + movdqa %xmm12,%xmm8 + pand %xmm11,%xmm12 + por %xmm11,%xmm8 + pxor %xmm12,%xmm7 + pxor %xmm14,%xmm10 + pxor %xmm13,%xmm9 + pxor %xmm14,%xmm8 + movdqa %xmm3,%xmm11 + pxor %xmm13,%xmm7 + movdqa %xmm15,%xmm12 + pxor %xmm13,%xmm8 + movdqa %xmm6,%xmm13 + pand %xmm5,%xmm11 + movdqa %xmm2,%xmm14 + pand %xmm1,%xmm12 + pand %xmm0,%xmm13 + por %xmm4,%xmm14 + pxor %xmm11,%xmm10 + pxor %xmm12,%xmm9 + pxor %xmm13,%xmm8 + pxor %xmm14,%xmm7 + + + + + + movdqa %xmm10,%xmm11 + pand %xmm8,%xmm10 + pxor %xmm9,%xmm11 + + movdqa %xmm7,%xmm13 + movdqa %xmm11,%xmm14 + pxor %xmm10,%xmm13 + pand %xmm13,%xmm14 + + movdqa %xmm8,%xmm12 + pxor %xmm9,%xmm14 + pxor %xmm7,%xmm12 + + pxor %xmm9,%xmm10 + + pand %xmm10,%xmm12 + + movdqa %xmm13,%xmm9 + pxor %xmm7,%xmm12 + + pxor %xmm12,%xmm9 + pxor %xmm12,%xmm8 + + pand %xmm7,%xmm9 + + pxor %xmm9,%xmm13 + pxor %xmm9,%xmm8 + + pand %xmm14,%xmm13 + + pxor %xmm11,%xmm13 + movdqa %xmm4,%xmm11 + movdqa %xmm0,%xmm7 + movdqa %xmm14,%xmm9 + pxor %xmm13,%xmm9 + pand %xmm4,%xmm9 + pxor %xmm0,%xmm4 + pand %xmm14,%xmm0 + pand %xmm13,%xmm4 + pxor %xmm0,%xmm4 + pxor %xmm9,%xmm0 + pxor %xmm1,%xmm11 + pxor %xmm5,%xmm7 + pxor %xmm12,%xmm14 + pxor %xmm8,%xmm13 + movdqa %xmm14,%xmm10 + movdqa %xmm12,%xmm9 + pxor %xmm13,%xmm10 + pxor %xmm8,%xmm9 + pand %xmm11,%xmm10 + pand %xmm1,%xmm9 + pxor %xmm7,%xmm11 + pxor %xmm5,%xmm1 + pand %xmm14,%xmm7 + pand %xmm12,%xmm5 + pand %xmm13,%xmm11 + pand %xmm8,%xmm1 + pxor %xmm11,%xmm7 + pxor %xmm5,%xmm1 + pxor %xmm10,%xmm11 + pxor %xmm9,%xmm5 + pxor %xmm11,%xmm4 + pxor %xmm11,%xmm1 + pxor %xmm7,%xmm0 + pxor %xmm7,%xmm5 + + movdqa %xmm2,%xmm11 + movdqa %xmm6,%xmm7 + pxor %xmm15,%xmm11 + pxor %xmm3,%xmm7 + movdqa %xmm14,%xmm10 + movdqa %xmm12,%xmm9 + pxor %xmm13,%xmm10 + pxor %xmm8,%xmm9 + pand %xmm11,%xmm10 + pand %xmm15,%xmm9 + pxor %xmm7,%xmm11 + pxor %xmm3,%xmm15 + pand %xmm14,%xmm7 + pand %xmm12,%xmm3 + pand %xmm13,%xmm11 + pand %xmm8,%xmm15 + pxor %xmm11,%xmm7 + pxor %xmm3,%xmm15 + pxor %xmm10,%xmm11 + pxor %xmm9,%xmm3 + pxor %xmm12,%xmm14 + pxor %xmm8,%xmm13 + movdqa %xmm14,%xmm10 + pxor %xmm13,%xmm10 + pand %xmm2,%xmm10 + pxor %xmm6,%xmm2 + pand %xmm14,%xmm6 + pand %xmm13,%xmm2 + pxor %xmm6,%xmm2 + pxor %xmm10,%xmm6 + pxor %xmm11,%xmm2 + pxor %xmm11,%xmm15 + pxor %xmm7,%xmm6 + pxor %xmm7,%xmm3 + pxor %xmm6,%xmm0 + pxor %xmm4,%xmm5 + + pxor %xmm0,%xmm3 + pxor %xmm6,%xmm1 + pxor %xmm6,%xmm4 + pxor %xmm1,%xmm3 + pxor %xmm15,%xmm6 + pxor %xmm4,%xmm3 + pxor %xmm5,%xmm2 + pxor %xmm0,%xmm5 + pxor %xmm3,%xmm2 + + pxor %xmm15,%xmm3 + pxor %xmm2,%xmm6 + decl %r10d + jl L$dec_done + + pshufd $78,%xmm15,%xmm7 + pshufd $78,%xmm2,%xmm13 + pxor %xmm15,%xmm7 + pshufd $78,%xmm4,%xmm14 + pxor %xmm2,%xmm13 + pshufd $78,%xmm0,%xmm8 + pxor %xmm4,%xmm14 + pshufd $78,%xmm5,%xmm9 + pxor %xmm0,%xmm8 + pshufd $78,%xmm3,%xmm10 + pxor %xmm5,%xmm9 + pxor %xmm13,%xmm15 + pxor %xmm13,%xmm0 + pshufd $78,%xmm1,%xmm11 + pxor %xmm3,%xmm10 + pxor %xmm7,%xmm5 + pxor %xmm8,%xmm3 + pshufd $78,%xmm6,%xmm12 + pxor %xmm1,%xmm11 + pxor %xmm14,%xmm0 + pxor %xmm9,%xmm1 + pxor %xmm6,%xmm12 + + pxor %xmm14,%xmm5 + pxor %xmm13,%xmm3 + pxor %xmm13,%xmm1 + pxor %xmm10,%xmm6 + pxor %xmm11,%xmm2 + pxor %xmm14,%xmm1 + pxor %xmm14,%xmm6 + pxor %xmm12,%xmm4 + pshufd $147,%xmm15,%xmm7 + pshufd $147,%xmm0,%xmm8 + pxor %xmm7,%xmm15 + pshufd $147,%xmm5,%xmm9 + pxor %xmm8,%xmm0 + pshufd $147,%xmm3,%xmm10 + pxor %xmm9,%xmm5 + pshufd $147,%xmm1,%xmm11 + pxor %xmm10,%xmm3 + pshufd $147,%xmm6,%xmm12 + pxor %xmm11,%xmm1 + pshufd $147,%xmm2,%xmm13 + pxor %xmm12,%xmm6 + pshufd $147,%xmm4,%xmm14 + pxor %xmm13,%xmm2 + pxor %xmm14,%xmm4 + + pxor %xmm15,%xmm8 + pxor %xmm4,%xmm7 + pxor %xmm4,%xmm8 + pshufd $78,%xmm15,%xmm15 + pxor %xmm0,%xmm9 + pshufd $78,%xmm0,%xmm0 + pxor %xmm1,%xmm12 + pxor %xmm7,%xmm15 + pxor %xmm6,%xmm13 + pxor %xmm8,%xmm0 + pxor %xmm3,%xmm11 + pshufd $78,%xmm1,%xmm7 + pxor %xmm2,%xmm14 + pshufd $78,%xmm6,%xmm8 + pxor %xmm5,%xmm10 + pshufd $78,%xmm3,%xmm1 + pxor %xmm4,%xmm10 + pshufd $78,%xmm4,%xmm6 + pxor %xmm4,%xmm11 + pshufd $78,%xmm2,%xmm3 + pxor %xmm11,%xmm7 + pshufd $78,%xmm5,%xmm2 + pxor %xmm12,%xmm8 + pxor %xmm1,%xmm10 + pxor %xmm14,%xmm6 + pxor %xmm3,%xmm13 + movdqa %xmm7,%xmm3 + pxor %xmm9,%xmm2 + movdqa %xmm13,%xmm5 + movdqa %xmm8,%xmm4 + movdqa %xmm2,%xmm1 + movdqa %xmm10,%xmm2 + movdqa -16(%r11),%xmm7 + jnz L$dec_loop + movdqa -32(%r11),%xmm7 + jmp L$dec_loop +.p2align 4 +L$dec_done: + movdqa 0(%r11),%xmm7 + movdqa 16(%r11),%xmm8 + movdqa %xmm2,%xmm9 + psrlq $1,%xmm2 + movdqa %xmm1,%xmm10 + psrlq $1,%xmm1 + pxor %xmm4,%xmm2 + pxor %xmm6,%xmm1 + pand %xmm7,%xmm2 + pand %xmm7,%xmm1 + pxor %xmm2,%xmm4 + psllq $1,%xmm2 + pxor %xmm1,%xmm6 + psllq $1,%xmm1 + pxor %xmm9,%xmm2 + pxor %xmm10,%xmm1 + movdqa %xmm5,%xmm9 + psrlq $1,%xmm5 + movdqa %xmm15,%xmm10 + psrlq $1,%xmm15 + pxor %xmm3,%xmm5 + pxor %xmm0,%xmm15 + pand %xmm7,%xmm5 + pand %xmm7,%xmm15 + pxor %xmm5,%xmm3 + psllq $1,%xmm5 + pxor %xmm15,%xmm0 + psllq $1,%xmm15 + pxor %xmm9,%xmm5 + pxor %xmm10,%xmm15 + movdqa 32(%r11),%xmm7 + movdqa %xmm6,%xmm9 + psrlq $2,%xmm6 + movdqa %xmm1,%xmm10 + psrlq $2,%xmm1 + pxor %xmm4,%xmm6 + pxor %xmm2,%xmm1 + pand %xmm8,%xmm6 + pand %xmm8,%xmm1 + pxor %xmm6,%xmm4 + psllq $2,%xmm6 + pxor %xmm1,%xmm2 + psllq $2,%xmm1 + pxor %xmm9,%xmm6 + pxor %xmm10,%xmm1 + movdqa %xmm0,%xmm9 + psrlq $2,%xmm0 + movdqa %xmm15,%xmm10 + psrlq $2,%xmm15 + pxor %xmm3,%xmm0 + pxor %xmm5,%xmm15 + pand %xmm8,%xmm0 + pand %xmm8,%xmm15 + pxor %xmm0,%xmm3 + psllq $2,%xmm0 + pxor %xmm15,%xmm5 + psllq $2,%xmm15 + pxor %xmm9,%xmm0 + pxor %xmm10,%xmm15 + movdqa %xmm3,%xmm9 + psrlq $4,%xmm3 + movdqa %xmm5,%xmm10 + psrlq $4,%xmm5 + pxor %xmm4,%xmm3 + pxor %xmm2,%xmm5 + pand %xmm7,%xmm3 + pand %xmm7,%xmm5 + pxor %xmm3,%xmm4 + psllq $4,%xmm3 + pxor %xmm5,%xmm2 + psllq $4,%xmm5 + pxor %xmm9,%xmm3 + pxor %xmm10,%xmm5 + movdqa %xmm0,%xmm9 + psrlq $4,%xmm0 + movdqa %xmm15,%xmm10 + psrlq $4,%xmm15 + pxor %xmm6,%xmm0 + pxor %xmm1,%xmm15 + pand %xmm7,%xmm0 + pand %xmm7,%xmm15 + pxor %xmm0,%xmm6 + psllq $4,%xmm0 + pxor %xmm15,%xmm1 + psllq $4,%xmm15 + pxor %xmm9,%xmm0 + pxor %xmm10,%xmm15 + movdqa (%rax),%xmm7 + pxor %xmm7,%xmm5 + pxor %xmm7,%xmm3 + pxor %xmm7,%xmm1 + pxor %xmm7,%xmm6 + pxor %xmm7,%xmm2 + pxor %xmm7,%xmm4 + pxor %xmm7,%xmm15 + pxor %xmm7,%xmm0 + .byte 0xf3,0xc3 + + +.p2align 4 +_bsaes_key_convert: + leaq L$masks(%rip),%r11 + movdqu (%rcx),%xmm7 + leaq 16(%rcx),%rcx + movdqa 0(%r11),%xmm0 + movdqa 16(%r11),%xmm1 + movdqa 32(%r11),%xmm2 + movdqa 48(%r11),%xmm3 + movdqa 64(%r11),%xmm4 + pcmpeqd %xmm5,%xmm5 + + movdqu (%rcx),%xmm6 + movdqa %xmm7,(%rax) + leaq 16(%rax),%rax + decl %r10d + jmp L$key_loop +.p2align 4 +L$key_loop: +.byte 102,15,56,0,244 + + movdqa %xmm0,%xmm8 + movdqa %xmm1,%xmm9 + + pand %xmm6,%xmm8 + pand %xmm6,%xmm9 + movdqa %xmm2,%xmm10 + pcmpeqb %xmm0,%xmm8 + psllq $4,%xmm0 + movdqa %xmm3,%xmm11 + pcmpeqb %xmm1,%xmm9 + psllq $4,%xmm1 + + pand %xmm6,%xmm10 + pand %xmm6,%xmm11 + movdqa %xmm0,%xmm12 + pcmpeqb %xmm2,%xmm10 + psllq $4,%xmm2 + movdqa %xmm1,%xmm13 + pcmpeqb %xmm3,%xmm11 + psllq $4,%xmm3 + + movdqa %xmm2,%xmm14 + movdqa %xmm3,%xmm15 + pxor %xmm5,%xmm8 + pxor %xmm5,%xmm9 + + pand %xmm6,%xmm12 + pand %xmm6,%xmm13 + movdqa %xmm8,0(%rax) + pcmpeqb %xmm0,%xmm12 + psrlq $4,%xmm0 + movdqa %xmm9,16(%rax) + pcmpeqb %xmm1,%xmm13 + psrlq $4,%xmm1 + leaq 16(%rcx),%rcx + + pand %xmm6,%xmm14 + pand %xmm6,%xmm15 + movdqa %xmm10,32(%rax) + pcmpeqb %xmm2,%xmm14 + psrlq $4,%xmm2 + movdqa %xmm11,48(%rax) + pcmpeqb %xmm3,%xmm15 + psrlq $4,%xmm3 + movdqu (%rcx),%xmm6 + + pxor %xmm5,%xmm13 + pxor %xmm5,%xmm14 + movdqa %xmm12,64(%rax) + movdqa %xmm13,80(%rax) + movdqa %xmm14,96(%rax) + movdqa %xmm15,112(%rax) + leaq 128(%rax),%rax + decl %r10d + jnz L$key_loop + + movdqa 80(%r11),%xmm7 + + .byte 0xf3,0xc3 + + +.globl _bsaes_cbc_encrypt +.private_extern _bsaes_cbc_encrypt + +.p2align 4 +_bsaes_cbc_encrypt: + cmpl $0,%r9d + jne _asm_AES_cbc_encrypt + cmpq $128,%rdx + jb _asm_AES_cbc_encrypt + + movq %rsp,%rax +L$cbc_dec_prologue: + pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + leaq -72(%rsp),%rsp + movq %rsp,%rbp + movl 240(%rcx),%eax + movq %rdi,%r12 + movq %rsi,%r13 + movq %rdx,%r14 + movq %rcx,%r15 + movq %r8,%rbx + shrq $4,%r14 + + movl %eax,%edx + shlq $7,%rax + subq $96,%rax + subq %rax,%rsp + + movq %rsp,%rax + movq %r15,%rcx + movl %edx,%r10d + call _bsaes_key_convert + pxor (%rsp),%xmm7 + movdqa %xmm6,(%rax) + movdqa %xmm7,(%rsp) + + movdqu (%rbx),%xmm14 + subq $8,%r14 +L$cbc_dec_loop: + movdqu 0(%r12),%xmm15 + movdqu 16(%r12),%xmm0 + movdqu 32(%r12),%xmm1 + movdqu 48(%r12),%xmm2 + movdqu 64(%r12),%xmm3 + movdqu 80(%r12),%xmm4 + movq %rsp,%rax + movdqu 96(%r12),%xmm5 + movl %edx,%r10d + movdqu 112(%r12),%xmm6 + movdqa %xmm14,32(%rbp) + + call _bsaes_decrypt8 + + pxor 32(%rbp),%xmm15 + movdqu 0(%r12),%xmm7 + movdqu 16(%r12),%xmm8 + pxor %xmm7,%xmm0 + movdqu 32(%r12),%xmm9 + pxor %xmm8,%xmm5 + movdqu 48(%r12),%xmm10 + pxor %xmm9,%xmm3 + movdqu 64(%r12),%xmm11 + pxor %xmm10,%xmm1 + movdqu 80(%r12),%xmm12 + pxor %xmm11,%xmm6 + movdqu 96(%r12),%xmm13 + pxor %xmm12,%xmm2 + movdqu 112(%r12),%xmm14 + pxor %xmm13,%xmm4 + movdqu %xmm15,0(%r13) + leaq 128(%r12),%r12 + movdqu %xmm0,16(%r13) + movdqu %xmm5,32(%r13) + movdqu %xmm3,48(%r13) + movdqu %xmm1,64(%r13) + movdqu %xmm6,80(%r13) + movdqu %xmm2,96(%r13) + movdqu %xmm4,112(%r13) + leaq 128(%r13),%r13 + subq $8,%r14 + jnc L$cbc_dec_loop + + addq $8,%r14 + jz L$cbc_dec_done + + movdqu 0(%r12),%xmm15 + movq %rsp,%rax + movl %edx,%r10d + cmpq $2,%r14 + jb L$cbc_dec_one + movdqu 16(%r12),%xmm0 + je L$cbc_dec_two + movdqu 32(%r12),%xmm1 + cmpq $4,%r14 + jb L$cbc_dec_three + movdqu 48(%r12),%xmm2 + je L$cbc_dec_four + movdqu 64(%r12),%xmm3 + cmpq $6,%r14 + jb L$cbc_dec_five + movdqu 80(%r12),%xmm4 + je L$cbc_dec_six + movdqu 96(%r12),%xmm5 + movdqa %xmm14,32(%rbp) + call _bsaes_decrypt8 + pxor 32(%rbp),%xmm15 + movdqu 0(%r12),%xmm7 + movdqu 16(%r12),%xmm8 + pxor %xmm7,%xmm0 + movdqu 32(%r12),%xmm9 + pxor %xmm8,%xmm5 + movdqu 48(%r12),%xmm10 + pxor %xmm9,%xmm3 + movdqu 64(%r12),%xmm11 + pxor %xmm10,%xmm1 + movdqu 80(%r12),%xmm12 + pxor %xmm11,%xmm6 + movdqu 96(%r12),%xmm14 + pxor %xmm12,%xmm2 + movdqu %xmm15,0(%r13) + movdqu %xmm0,16(%r13) + movdqu %xmm5,32(%r13) + movdqu %xmm3,48(%r13) + movdqu %xmm1,64(%r13) + movdqu %xmm6,80(%r13) + movdqu %xmm2,96(%r13) + jmp L$cbc_dec_done +.p2align 4 +L$cbc_dec_six: + movdqa %xmm14,32(%rbp) + call _bsaes_decrypt8 + pxor 32(%rbp),%xmm15 + movdqu 0(%r12),%xmm7 + movdqu 16(%r12),%xmm8 + pxor %xmm7,%xmm0 + movdqu 32(%r12),%xmm9 + pxor %xmm8,%xmm5 + movdqu 48(%r12),%xmm10 + pxor %xmm9,%xmm3 + movdqu 64(%r12),%xmm11 + pxor %xmm10,%xmm1 + movdqu 80(%r12),%xmm14 + pxor %xmm11,%xmm6 + movdqu %xmm15,0(%r13) + movdqu %xmm0,16(%r13) + movdqu %xmm5,32(%r13) + movdqu %xmm3,48(%r13) + movdqu %xmm1,64(%r13) + movdqu %xmm6,80(%r13) + jmp L$cbc_dec_done +.p2align 4 +L$cbc_dec_five: + movdqa %xmm14,32(%rbp) + call _bsaes_decrypt8 + pxor 32(%rbp),%xmm15 + movdqu 0(%r12),%xmm7 + movdqu 16(%r12),%xmm8 + pxor %xmm7,%xmm0 + movdqu 32(%r12),%xmm9 + pxor %xmm8,%xmm5 + movdqu 48(%r12),%xmm10 + pxor %xmm9,%xmm3 + movdqu 64(%r12),%xmm14 + pxor %xmm10,%xmm1 + movdqu %xmm15,0(%r13) + movdqu %xmm0,16(%r13) + movdqu %xmm5,32(%r13) + movdqu %xmm3,48(%r13) + movdqu %xmm1,64(%r13) + jmp L$cbc_dec_done +.p2align 4 +L$cbc_dec_four: + movdqa %xmm14,32(%rbp) + call _bsaes_decrypt8 + pxor 32(%rbp),%xmm15 + movdqu 0(%r12),%xmm7 + movdqu 16(%r12),%xmm8 + pxor %xmm7,%xmm0 + movdqu 32(%r12),%xmm9 + pxor %xmm8,%xmm5 + movdqu 48(%r12),%xmm14 + pxor %xmm9,%xmm3 + movdqu %xmm15,0(%r13) + movdqu %xmm0,16(%r13) + movdqu %xmm5,32(%r13) + movdqu %xmm3,48(%r13) + jmp L$cbc_dec_done +.p2align 4 +L$cbc_dec_three: + movdqa %xmm14,32(%rbp) + call _bsaes_decrypt8 + pxor 32(%rbp),%xmm15 + movdqu 0(%r12),%xmm7 + movdqu 16(%r12),%xmm8 + pxor %xmm7,%xmm0 + movdqu 32(%r12),%xmm14 + pxor %xmm8,%xmm5 + movdqu %xmm15,0(%r13) + movdqu %xmm0,16(%r13) + movdqu %xmm5,32(%r13) + jmp L$cbc_dec_done +.p2align 4 +L$cbc_dec_two: + movdqa %xmm14,32(%rbp) + call _bsaes_decrypt8 + pxor 32(%rbp),%xmm15 + movdqu 0(%r12),%xmm7 + movdqu 16(%r12),%xmm14 + pxor %xmm7,%xmm0 + movdqu %xmm15,0(%r13) + movdqu %xmm0,16(%r13) + jmp L$cbc_dec_done +.p2align 4 +L$cbc_dec_one: + leaq (%r12),%rdi + leaq 32(%rbp),%rsi + leaq (%r15),%rdx + call _asm_AES_decrypt + pxor 32(%rbp),%xmm14 + movdqu %xmm14,(%r13) + movdqa %xmm15,%xmm14 + +L$cbc_dec_done: + movdqu %xmm14,(%rbx) + leaq (%rsp),%rax + pxor %xmm0,%xmm0 +L$cbc_dec_bzero: + movdqa %xmm0,0(%rax) + movdqa %xmm0,16(%rax) + leaq 32(%rax),%rax + cmpq %rax,%rbp + ja L$cbc_dec_bzero + + leaq (%rbp),%rsp + movq 72(%rsp),%r15 + movq 80(%rsp),%r14 + movq 88(%rsp),%r13 + movq 96(%rsp),%r12 + movq 104(%rsp),%rbx + movq 112(%rsp),%rax + leaq 120(%rsp),%rsp + movq %rax,%rbp +L$cbc_dec_epilogue: + .byte 0xf3,0xc3 + + +.globl _bsaes_ctr32_encrypt_blocks +.private_extern _bsaes_ctr32_encrypt_blocks + +.p2align 4 +_bsaes_ctr32_encrypt_blocks: + movq %rsp,%rax +L$ctr_enc_prologue: + pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + leaq -72(%rsp),%rsp + movq %rsp,%rbp + movdqu (%r8),%xmm0 + movl 240(%rcx),%eax + movq %rdi,%r12 + movq %rsi,%r13 + movq %rdx,%r14 + movq %rcx,%r15 + movdqa %xmm0,32(%rbp) + cmpq $8,%rdx + jb L$ctr_enc_short + + movl %eax,%ebx + shlq $7,%rax + subq $96,%rax + subq %rax,%rsp + + movq %rsp,%rax + movq %r15,%rcx + movl %ebx,%r10d + call _bsaes_key_convert + pxor %xmm6,%xmm7 + movdqa %xmm7,(%rax) + + movdqa (%rsp),%xmm8 + leaq L$ADD1(%rip),%r11 + movdqa 32(%rbp),%xmm15 + movdqa -32(%r11),%xmm7 +.byte 102,68,15,56,0,199 +.byte 102,68,15,56,0,255 + movdqa %xmm8,(%rsp) + jmp L$ctr_enc_loop +.p2align 4 +L$ctr_enc_loop: + movdqa %xmm15,32(%rbp) + movdqa %xmm15,%xmm0 + movdqa %xmm15,%xmm1 + paddd 0(%r11),%xmm0 + movdqa %xmm15,%xmm2 + paddd 16(%r11),%xmm1 + movdqa %xmm15,%xmm3 + paddd 32(%r11),%xmm2 + movdqa %xmm15,%xmm4 + paddd 48(%r11),%xmm3 + movdqa %xmm15,%xmm5 + paddd 64(%r11),%xmm4 + movdqa %xmm15,%xmm6 + paddd 80(%r11),%xmm5 + paddd 96(%r11),%xmm6 + + + + movdqa (%rsp),%xmm8 + leaq 16(%rsp),%rax + movdqa -16(%r11),%xmm7 + pxor %xmm8,%xmm15 + pxor %xmm8,%xmm0 + pxor %xmm8,%xmm1 + pxor %xmm8,%xmm2 +.byte 102,68,15,56,0,255 +.byte 102,15,56,0,199 + pxor %xmm8,%xmm3 + pxor %xmm8,%xmm4 +.byte 102,15,56,0,207 +.byte 102,15,56,0,215 + pxor %xmm8,%xmm5 + pxor %xmm8,%xmm6 +.byte 102,15,56,0,223 +.byte 102,15,56,0,231 +.byte 102,15,56,0,239 +.byte 102,15,56,0,247 + leaq L$BS0(%rip),%r11 + movl %ebx,%r10d + + call _bsaes_encrypt8_bitslice + + subq $8,%r14 + jc L$ctr_enc_loop_done + + movdqu 0(%r12),%xmm7 + movdqu 16(%r12),%xmm8 + movdqu 32(%r12),%xmm9 + movdqu 48(%r12),%xmm10 + movdqu 64(%r12),%xmm11 + movdqu 80(%r12),%xmm12 + movdqu 96(%r12),%xmm13 + movdqu 112(%r12),%xmm14 + leaq 128(%r12),%r12 + pxor %xmm15,%xmm7 + movdqa 32(%rbp),%xmm15 + pxor %xmm8,%xmm0 + movdqu %xmm7,0(%r13) + pxor %xmm9,%xmm3 + movdqu %xmm0,16(%r13) + pxor %xmm10,%xmm5 + movdqu %xmm3,32(%r13) + pxor %xmm11,%xmm2 + movdqu %xmm5,48(%r13) + pxor %xmm12,%xmm6 + movdqu %xmm2,64(%r13) + pxor %xmm13,%xmm1 + movdqu %xmm6,80(%r13) + pxor %xmm14,%xmm4 + movdqu %xmm1,96(%r13) + leaq L$ADD1(%rip),%r11 + movdqu %xmm4,112(%r13) + leaq 128(%r13),%r13 + paddd 112(%r11),%xmm15 + jnz L$ctr_enc_loop + + jmp L$ctr_enc_done +.p2align 4 +L$ctr_enc_loop_done: + addq $8,%r14 + movdqu 0(%r12),%xmm7 + pxor %xmm7,%xmm15 + movdqu %xmm15,0(%r13) + cmpq $2,%r14 + jb L$ctr_enc_done + movdqu 16(%r12),%xmm8 + pxor %xmm8,%xmm0 + movdqu %xmm0,16(%r13) + je L$ctr_enc_done + movdqu 32(%r12),%xmm9 + pxor %xmm9,%xmm3 + movdqu %xmm3,32(%r13) + cmpq $4,%r14 + jb L$ctr_enc_done + movdqu 48(%r12),%xmm10 + pxor %xmm10,%xmm5 + movdqu %xmm5,48(%r13) + je L$ctr_enc_done + movdqu 64(%r12),%xmm11 + pxor %xmm11,%xmm2 + movdqu %xmm2,64(%r13) + cmpq $6,%r14 + jb L$ctr_enc_done + movdqu 80(%r12),%xmm12 + pxor %xmm12,%xmm6 + movdqu %xmm6,80(%r13) + je L$ctr_enc_done + movdqu 96(%r12),%xmm13 + pxor %xmm13,%xmm1 + movdqu %xmm1,96(%r13) + jmp L$ctr_enc_done + +.p2align 4 +L$ctr_enc_short: + leaq 32(%rbp),%rdi + leaq 48(%rbp),%rsi + leaq (%r15),%rdx + call _asm_AES_encrypt + movdqu (%r12),%xmm0 + leaq 16(%r12),%r12 + movl 44(%rbp),%eax + bswapl %eax + pxor 48(%rbp),%xmm0 + incl %eax + movdqu %xmm0,(%r13) + bswapl %eax + leaq 16(%r13),%r13 + movl %eax,44(%rsp) + decq %r14 + jnz L$ctr_enc_short + +L$ctr_enc_done: + leaq (%rsp),%rax + pxor %xmm0,%xmm0 +L$ctr_enc_bzero: + movdqa %xmm0,0(%rax) + movdqa %xmm0,16(%rax) + leaq 32(%rax),%rax + cmpq %rax,%rbp + ja L$ctr_enc_bzero + + leaq (%rbp),%rsp + movq 72(%rsp),%r15 + movq 80(%rsp),%r14 + movq 88(%rsp),%r13 + movq 96(%rsp),%r12 + movq 104(%rsp),%rbx + movq 112(%rsp),%rax + leaq 120(%rsp),%rsp + movq %rax,%rbp +L$ctr_enc_epilogue: + .byte 0xf3,0xc3 + +.globl _bsaes_xts_encrypt +.private_extern _bsaes_xts_encrypt + +.p2align 4 +_bsaes_xts_encrypt: + movq %rsp,%rax +L$xts_enc_prologue: + pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + leaq -72(%rsp),%rsp + movq %rsp,%rbp + movq %rdi,%r12 + movq %rsi,%r13 + movq %rdx,%r14 + movq %rcx,%r15 + + leaq (%r9),%rdi + leaq 32(%rbp),%rsi + leaq (%r8),%rdx + call _asm_AES_encrypt + + movl 240(%r15),%eax + movq %r14,%rbx + + movl %eax,%edx + shlq $7,%rax + subq $96,%rax + subq %rax,%rsp + + movq %rsp,%rax + movq %r15,%rcx + movl %edx,%r10d + call _bsaes_key_convert + pxor %xmm6,%xmm7 + movdqa %xmm7,(%rax) + + andq $-16,%r14 + subq $128,%rsp + movdqa 32(%rbp),%xmm6 + + pxor %xmm14,%xmm14 + movdqa L$xts_magic(%rip),%xmm12 + pcmpgtd %xmm6,%xmm14 + + subq $128,%r14 + jc L$xts_enc_short + jmp L$xts_enc_loop + +.p2align 4 +L$xts_enc_loop: + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm15 + movdqa %xmm6,0(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm0 + movdqa %xmm6,16(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 0(%r12),%xmm7 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm1 + movdqa %xmm6,32(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 16(%r12),%xmm8 + pxor %xmm7,%xmm15 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm2 + movdqa %xmm6,48(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 32(%r12),%xmm9 + pxor %xmm8,%xmm0 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm3 + movdqa %xmm6,64(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 48(%r12),%xmm10 + pxor %xmm9,%xmm1 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm4 + movdqa %xmm6,80(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 64(%r12),%xmm11 + pxor %xmm10,%xmm2 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm5 + movdqa %xmm6,96(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 80(%r12),%xmm12 + pxor %xmm11,%xmm3 + movdqu 96(%r12),%xmm13 + pxor %xmm12,%xmm4 + movdqu 112(%r12),%xmm14 + leaq 128(%r12),%r12 + movdqa %xmm6,112(%rsp) + pxor %xmm13,%xmm5 + leaq 128(%rsp),%rax + pxor %xmm14,%xmm6 + movl %edx,%r10d + + call _bsaes_encrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm3 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm5 + movdqu %xmm3,32(%r13) + pxor 64(%rsp),%xmm2 + movdqu %xmm5,48(%r13) + pxor 80(%rsp),%xmm6 + movdqu %xmm2,64(%r13) + pxor 96(%rsp),%xmm1 + movdqu %xmm6,80(%r13) + pxor 112(%rsp),%xmm4 + movdqu %xmm1,96(%r13) + movdqu %xmm4,112(%r13) + leaq 128(%r13),%r13 + + movdqa 112(%rsp),%xmm6 + pxor %xmm14,%xmm14 + movdqa L$xts_magic(%rip),%xmm12 + pcmpgtd %xmm6,%xmm14 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + + subq $128,%r14 + jnc L$xts_enc_loop + +L$xts_enc_short: + addq $128,%r14 + jz L$xts_enc_done + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm15 + movdqa %xmm6,0(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm0 + movdqa %xmm6,16(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 0(%r12),%xmm7 + cmpq $16,%r14 + je L$xts_enc_1 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm1 + movdqa %xmm6,32(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 16(%r12),%xmm8 + cmpq $32,%r14 + je L$xts_enc_2 + pxor %xmm7,%xmm15 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm2 + movdqa %xmm6,48(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 32(%r12),%xmm9 + cmpq $48,%r14 + je L$xts_enc_3 + pxor %xmm8,%xmm0 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm3 + movdqa %xmm6,64(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 48(%r12),%xmm10 + cmpq $64,%r14 + je L$xts_enc_4 + pxor %xmm9,%xmm1 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm4 + movdqa %xmm6,80(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 64(%r12),%xmm11 + cmpq $80,%r14 + je L$xts_enc_5 + pxor %xmm10,%xmm2 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm5 + movdqa %xmm6,96(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 80(%r12),%xmm12 + cmpq $96,%r14 + je L$xts_enc_6 + pxor %xmm11,%xmm3 + movdqu 96(%r12),%xmm13 + pxor %xmm12,%xmm4 + movdqa %xmm6,112(%rsp) + leaq 112(%r12),%r12 + pxor %xmm13,%xmm5 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_encrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm3 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm5 + movdqu %xmm3,32(%r13) + pxor 64(%rsp),%xmm2 + movdqu %xmm5,48(%r13) + pxor 80(%rsp),%xmm6 + movdqu %xmm2,64(%r13) + pxor 96(%rsp),%xmm1 + movdqu %xmm6,80(%r13) + movdqu %xmm1,96(%r13) + leaq 112(%r13),%r13 + + movdqa 112(%rsp),%xmm6 + jmp L$xts_enc_done +.p2align 4 +L$xts_enc_6: + pxor %xmm11,%xmm3 + leaq 96(%r12),%r12 + pxor %xmm12,%xmm4 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_encrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm3 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm5 + movdqu %xmm3,32(%r13) + pxor 64(%rsp),%xmm2 + movdqu %xmm5,48(%r13) + pxor 80(%rsp),%xmm6 + movdqu %xmm2,64(%r13) + movdqu %xmm6,80(%r13) + leaq 96(%r13),%r13 + + movdqa 96(%rsp),%xmm6 + jmp L$xts_enc_done +.p2align 4 +L$xts_enc_5: + pxor %xmm10,%xmm2 + leaq 80(%r12),%r12 + pxor %xmm11,%xmm3 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_encrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm3 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm5 + movdqu %xmm3,32(%r13) + pxor 64(%rsp),%xmm2 + movdqu %xmm5,48(%r13) + movdqu %xmm2,64(%r13) + leaq 80(%r13),%r13 + + movdqa 80(%rsp),%xmm6 + jmp L$xts_enc_done +.p2align 4 +L$xts_enc_4: + pxor %xmm9,%xmm1 + leaq 64(%r12),%r12 + pxor %xmm10,%xmm2 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_encrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm3 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm5 + movdqu %xmm3,32(%r13) + movdqu %xmm5,48(%r13) + leaq 64(%r13),%r13 + + movdqa 64(%rsp),%xmm6 + jmp L$xts_enc_done +.p2align 4 +L$xts_enc_3: + pxor %xmm8,%xmm0 + leaq 48(%r12),%r12 + pxor %xmm9,%xmm1 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_encrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm3 + movdqu %xmm0,16(%r13) + movdqu %xmm3,32(%r13) + leaq 48(%r13),%r13 + + movdqa 48(%rsp),%xmm6 + jmp L$xts_enc_done +.p2align 4 +L$xts_enc_2: + pxor %xmm7,%xmm15 + leaq 32(%r12),%r12 + pxor %xmm8,%xmm0 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_encrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + movdqu %xmm0,16(%r13) + leaq 32(%r13),%r13 + + movdqa 32(%rsp),%xmm6 + jmp L$xts_enc_done +.p2align 4 +L$xts_enc_1: + pxor %xmm15,%xmm7 + leaq 16(%r12),%r12 + movdqa %xmm7,32(%rbp) + leaq 32(%rbp),%rdi + leaq 32(%rbp),%rsi + leaq (%r15),%rdx + call _asm_AES_encrypt + pxor 32(%rbp),%xmm15 + + + + + + movdqu %xmm15,0(%r13) + leaq 16(%r13),%r13 + + movdqa 16(%rsp),%xmm6 + +L$xts_enc_done: + andl $15,%ebx + jz L$xts_enc_ret + movq %r13,%rdx + +L$xts_enc_steal: + movzbl (%r12),%eax + movzbl -16(%rdx),%ecx + leaq 1(%r12),%r12 + movb %al,-16(%rdx) + movb %cl,0(%rdx) + leaq 1(%rdx),%rdx + subl $1,%ebx + jnz L$xts_enc_steal + + movdqu -16(%r13),%xmm15 + leaq 32(%rbp),%rdi + pxor %xmm6,%xmm15 + leaq 32(%rbp),%rsi + movdqa %xmm15,32(%rbp) + leaq (%r15),%rdx + call _asm_AES_encrypt + pxor 32(%rbp),%xmm6 + movdqu %xmm6,-16(%r13) + +L$xts_enc_ret: + leaq (%rsp),%rax + pxor %xmm0,%xmm0 +L$xts_enc_bzero: + movdqa %xmm0,0(%rax) + movdqa %xmm0,16(%rax) + leaq 32(%rax),%rax + cmpq %rax,%rbp + ja L$xts_enc_bzero + + leaq (%rbp),%rsp + movq 72(%rsp),%r15 + movq 80(%rsp),%r14 + movq 88(%rsp),%r13 + movq 96(%rsp),%r12 + movq 104(%rsp),%rbx + movq 112(%rsp),%rax + leaq 120(%rsp),%rsp + movq %rax,%rbp +L$xts_enc_epilogue: + .byte 0xf3,0xc3 + + +.globl _bsaes_xts_decrypt +.private_extern _bsaes_xts_decrypt + +.p2align 4 +_bsaes_xts_decrypt: + movq %rsp,%rax +L$xts_dec_prologue: + pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + leaq -72(%rsp),%rsp + movq %rsp,%rbp + movq %rdi,%r12 + movq %rsi,%r13 + movq %rdx,%r14 + movq %rcx,%r15 + + leaq (%r9),%rdi + leaq 32(%rbp),%rsi + leaq (%r8),%rdx + call _asm_AES_encrypt + + movl 240(%r15),%eax + movq %r14,%rbx + + movl %eax,%edx + shlq $7,%rax + subq $96,%rax + subq %rax,%rsp + + movq %rsp,%rax + movq %r15,%rcx + movl %edx,%r10d + call _bsaes_key_convert + pxor (%rsp),%xmm7 + movdqa %xmm6,(%rax) + movdqa %xmm7,(%rsp) + + xorl %eax,%eax + andq $-16,%r14 + testl $15,%ebx + setnz %al + shlq $4,%rax + subq %rax,%r14 + + subq $128,%rsp + movdqa 32(%rbp),%xmm6 + + pxor %xmm14,%xmm14 + movdqa L$xts_magic(%rip),%xmm12 + pcmpgtd %xmm6,%xmm14 + + subq $128,%r14 + jc L$xts_dec_short + jmp L$xts_dec_loop + +.p2align 4 +L$xts_dec_loop: + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm15 + movdqa %xmm6,0(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm0 + movdqa %xmm6,16(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 0(%r12),%xmm7 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm1 + movdqa %xmm6,32(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 16(%r12),%xmm8 + pxor %xmm7,%xmm15 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm2 + movdqa %xmm6,48(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 32(%r12),%xmm9 + pxor %xmm8,%xmm0 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm3 + movdqa %xmm6,64(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 48(%r12),%xmm10 + pxor %xmm9,%xmm1 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm4 + movdqa %xmm6,80(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 64(%r12),%xmm11 + pxor %xmm10,%xmm2 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm5 + movdqa %xmm6,96(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 80(%r12),%xmm12 + pxor %xmm11,%xmm3 + movdqu 96(%r12),%xmm13 + pxor %xmm12,%xmm4 + movdqu 112(%r12),%xmm14 + leaq 128(%r12),%r12 + movdqa %xmm6,112(%rsp) + pxor %xmm13,%xmm5 + leaq 128(%rsp),%rax + pxor %xmm14,%xmm6 + movl %edx,%r10d + + call _bsaes_decrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm5 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm3 + movdqu %xmm5,32(%r13) + pxor 64(%rsp),%xmm1 + movdqu %xmm3,48(%r13) + pxor 80(%rsp),%xmm6 + movdqu %xmm1,64(%r13) + pxor 96(%rsp),%xmm2 + movdqu %xmm6,80(%r13) + pxor 112(%rsp),%xmm4 + movdqu %xmm2,96(%r13) + movdqu %xmm4,112(%r13) + leaq 128(%r13),%r13 + + movdqa 112(%rsp),%xmm6 + pxor %xmm14,%xmm14 + movdqa L$xts_magic(%rip),%xmm12 + pcmpgtd %xmm6,%xmm14 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + + subq $128,%r14 + jnc L$xts_dec_loop + +L$xts_dec_short: + addq $128,%r14 + jz L$xts_dec_done + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm15 + movdqa %xmm6,0(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm0 + movdqa %xmm6,16(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 0(%r12),%xmm7 + cmpq $16,%r14 + je L$xts_dec_1 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm1 + movdqa %xmm6,32(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 16(%r12),%xmm8 + cmpq $32,%r14 + je L$xts_dec_2 + pxor %xmm7,%xmm15 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm2 + movdqa %xmm6,48(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 32(%r12),%xmm9 + cmpq $48,%r14 + je L$xts_dec_3 + pxor %xmm8,%xmm0 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm3 + movdqa %xmm6,64(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 48(%r12),%xmm10 + cmpq $64,%r14 + je L$xts_dec_4 + pxor %xmm9,%xmm1 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm4 + movdqa %xmm6,80(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 64(%r12),%xmm11 + cmpq $80,%r14 + je L$xts_dec_5 + pxor %xmm10,%xmm2 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm5 + movdqa %xmm6,96(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 80(%r12),%xmm12 + cmpq $96,%r14 + je L$xts_dec_6 + pxor %xmm11,%xmm3 + movdqu 96(%r12),%xmm13 + pxor %xmm12,%xmm4 + movdqa %xmm6,112(%rsp) + leaq 112(%r12),%r12 + pxor %xmm13,%xmm5 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_decrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm5 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm3 + movdqu %xmm5,32(%r13) + pxor 64(%rsp),%xmm1 + movdqu %xmm3,48(%r13) + pxor 80(%rsp),%xmm6 + movdqu %xmm1,64(%r13) + pxor 96(%rsp),%xmm2 + movdqu %xmm6,80(%r13) + movdqu %xmm2,96(%r13) + leaq 112(%r13),%r13 + + movdqa 112(%rsp),%xmm6 + jmp L$xts_dec_done +.p2align 4 +L$xts_dec_6: + pxor %xmm11,%xmm3 + leaq 96(%r12),%r12 + pxor %xmm12,%xmm4 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_decrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm5 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm3 + movdqu %xmm5,32(%r13) + pxor 64(%rsp),%xmm1 + movdqu %xmm3,48(%r13) + pxor 80(%rsp),%xmm6 + movdqu %xmm1,64(%r13) + movdqu %xmm6,80(%r13) + leaq 96(%r13),%r13 + + movdqa 96(%rsp),%xmm6 + jmp L$xts_dec_done +.p2align 4 +L$xts_dec_5: + pxor %xmm10,%xmm2 + leaq 80(%r12),%r12 + pxor %xmm11,%xmm3 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_decrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm5 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm3 + movdqu %xmm5,32(%r13) + pxor 64(%rsp),%xmm1 + movdqu %xmm3,48(%r13) + movdqu %xmm1,64(%r13) + leaq 80(%r13),%r13 + + movdqa 80(%rsp),%xmm6 + jmp L$xts_dec_done +.p2align 4 +L$xts_dec_4: + pxor %xmm9,%xmm1 + leaq 64(%r12),%r12 + pxor %xmm10,%xmm2 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_decrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm5 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm3 + movdqu %xmm5,32(%r13) + movdqu %xmm3,48(%r13) + leaq 64(%r13),%r13 + + movdqa 64(%rsp),%xmm6 + jmp L$xts_dec_done +.p2align 4 +L$xts_dec_3: + pxor %xmm8,%xmm0 + leaq 48(%r12),%r12 + pxor %xmm9,%xmm1 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_decrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm5 + movdqu %xmm0,16(%r13) + movdqu %xmm5,32(%r13) + leaq 48(%r13),%r13 + + movdqa 48(%rsp),%xmm6 + jmp L$xts_dec_done +.p2align 4 +L$xts_dec_2: + pxor %xmm7,%xmm15 + leaq 32(%r12),%r12 + pxor %xmm8,%xmm0 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_decrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + movdqu %xmm0,16(%r13) + leaq 32(%r13),%r13 + + movdqa 32(%rsp),%xmm6 + jmp L$xts_dec_done +.p2align 4 +L$xts_dec_1: + pxor %xmm15,%xmm7 + leaq 16(%r12),%r12 + movdqa %xmm7,32(%rbp) + leaq 32(%rbp),%rdi + leaq 32(%rbp),%rsi + leaq (%r15),%rdx + call _asm_AES_decrypt + pxor 32(%rbp),%xmm15 + + + + + + movdqu %xmm15,0(%r13) + leaq 16(%r13),%r13 + + movdqa 16(%rsp),%xmm6 + +L$xts_dec_done: + andl $15,%ebx + jz L$xts_dec_ret + + pxor %xmm14,%xmm14 + movdqa L$xts_magic(%rip),%xmm12 + pcmpgtd %xmm6,%xmm14 + pshufd $19,%xmm14,%xmm13 + movdqa %xmm6,%xmm5 + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + movdqu (%r12),%xmm15 + pxor %xmm13,%xmm6 + + leaq 32(%rbp),%rdi + pxor %xmm6,%xmm15 + leaq 32(%rbp),%rsi + movdqa %xmm15,32(%rbp) + leaq (%r15),%rdx + call _asm_AES_decrypt + pxor 32(%rbp),%xmm6 + movq %r13,%rdx + movdqu %xmm6,(%r13) + +L$xts_dec_steal: + movzbl 16(%r12),%eax + movzbl (%rdx),%ecx + leaq 1(%r12),%r12 + movb %al,(%rdx) + movb %cl,16(%rdx) + leaq 1(%rdx),%rdx + subl $1,%ebx + jnz L$xts_dec_steal + + movdqu (%r13),%xmm15 + leaq 32(%rbp),%rdi + pxor %xmm5,%xmm15 + leaq 32(%rbp),%rsi + movdqa %xmm15,32(%rbp) + leaq (%r15),%rdx + call _asm_AES_decrypt + pxor 32(%rbp),%xmm5 + movdqu %xmm5,(%r13) + +L$xts_dec_ret: + leaq (%rsp),%rax + pxor %xmm0,%xmm0 +L$xts_dec_bzero: + movdqa %xmm0,0(%rax) + movdqa %xmm0,16(%rax) + leaq 32(%rax),%rax + cmpq %rax,%rbp + ja L$xts_dec_bzero + + leaq (%rbp),%rsp + movq 72(%rsp),%r15 + movq 80(%rsp),%r14 + movq 88(%rsp),%r13 + movq 96(%rsp),%r12 + movq 104(%rsp),%rbx + movq 112(%rsp),%rax + leaq 120(%rsp),%rsp + movq %rax,%rbp +L$xts_dec_epilogue: + .byte 0xf3,0xc3 + + +.p2align 6 +_bsaes_const: +L$M0ISR: +.quad 0x0a0e0206070b0f03, 0x0004080c0d010509 +L$ISRM0: +.quad 0x01040b0e0205080f, 0x0306090c00070a0d +L$ISR: +.quad 0x0504070602010003, 0x0f0e0d0c080b0a09 +L$BS0: +.quad 0x5555555555555555, 0x5555555555555555 +L$BS1: +.quad 0x3333333333333333, 0x3333333333333333 +L$BS2: +.quad 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f +L$SR: +.quad 0x0504070600030201, 0x0f0e0d0c0a09080b +L$SRM0: +.quad 0x0304090e00050a0f, 0x01060b0c0207080d +L$M0SR: +.quad 0x0a0e02060f03070b, 0x0004080c05090d01 +L$SWPUP: +.quad 0x0706050403020100, 0x0c0d0e0f0b0a0908 +L$SWPUPM0SR: +.quad 0x0a0d02060c03070b, 0x0004080f05090e01 +L$ADD1: +.quad 0x0000000000000000, 0x0000000100000000 +L$ADD2: +.quad 0x0000000000000000, 0x0000000200000000 +L$ADD3: +.quad 0x0000000000000000, 0x0000000300000000 +L$ADD4: +.quad 0x0000000000000000, 0x0000000400000000 +L$ADD5: +.quad 0x0000000000000000, 0x0000000500000000 +L$ADD6: +.quad 0x0000000000000000, 0x0000000600000000 +L$ADD7: +.quad 0x0000000000000000, 0x0000000700000000 +L$ADD8: +.quad 0x0000000000000000, 0x0000000800000000 +L$xts_magic: +.long 0x87,0,1,0 +L$masks: +.quad 0x0101010101010101, 0x0101010101010101 +.quad 0x0202020202020202, 0x0202020202020202 +.quad 0x0404040404040404, 0x0404040404040404 +.quad 0x0808080808080808, 0x0808080808080808 +L$M0: +.quad 0x02060a0e03070b0f, 0x0004080c0105090d +L$63: +.quad 0x6363636363636363, 0x6363636363636363 +.byte 66,105,116,45,115,108,105,99,101,100,32,65,69,83,32,102,111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44,32,69,109,105,108,105,97,32,75,195,164,115,112,101,114,44,32,80,101,116,101,114,32,83,99,104,119,97,98,101,44,32,65,110,100,121,32,80,111,108,121,97,107,111,118,0 +.p2align 6 + +#endif diff --git a/third_party/boringssl/mac-x86_64/crypto/aes/vpaes-x86_64.S b/third_party/boringssl/mac-x86_64/crypto/aes/vpaes-x86_64.S new file mode 100644 index 00000000000..711ea437265 --- /dev/null +++ b/third_party/boringssl/mac-x86_64/crypto/aes/vpaes-x86_64.S @@ -0,0 +1,834 @@ +#if defined(__x86_64__) +.text + + + + + + + + + + + + + + + + + +.p2align 4 +_vpaes_encrypt_core: + movq %rdx,%r9 + movq $16,%r11 + movl 240(%rdx),%eax + movdqa %xmm9,%xmm1 + movdqa L$k_ipt(%rip),%xmm2 + pandn %xmm0,%xmm1 + movdqu (%r9),%xmm5 + psrld $4,%xmm1 + pand %xmm9,%xmm0 +.byte 102,15,56,0,208 + movdqa L$k_ipt+16(%rip),%xmm0 +.byte 102,15,56,0,193 + pxor %xmm5,%xmm2 + addq $16,%r9 + pxor %xmm2,%xmm0 + leaq L$k_mc_backward(%rip),%r10 + jmp L$enc_entry + +.p2align 4 +L$enc_loop: + + movdqa %xmm13,%xmm4 + movdqa %xmm12,%xmm0 +.byte 102,15,56,0,226 +.byte 102,15,56,0,195 + pxor %xmm5,%xmm4 + movdqa %xmm15,%xmm5 + pxor %xmm4,%xmm0 + movdqa -64(%r11,%r10,1),%xmm1 +.byte 102,15,56,0,234 + movdqa (%r11,%r10,1),%xmm4 + movdqa %xmm14,%xmm2 +.byte 102,15,56,0,211 + movdqa %xmm0,%xmm3 + pxor %xmm5,%xmm2 +.byte 102,15,56,0,193 + addq $16,%r9 + pxor %xmm2,%xmm0 +.byte 102,15,56,0,220 + addq $16,%r11 + pxor %xmm0,%xmm3 +.byte 102,15,56,0,193 + andq $48,%r11 + subq $1,%rax + pxor %xmm3,%xmm0 + +L$enc_entry: + + movdqa %xmm9,%xmm1 + movdqa %xmm11,%xmm5 + pandn %xmm0,%xmm1 + psrld $4,%xmm1 + pand %xmm9,%xmm0 +.byte 102,15,56,0,232 + movdqa %xmm10,%xmm3 + pxor %xmm1,%xmm0 +.byte 102,15,56,0,217 + movdqa %xmm10,%xmm4 + pxor %xmm5,%xmm3 +.byte 102,15,56,0,224 + movdqa %xmm10,%xmm2 + pxor %xmm5,%xmm4 +.byte 102,15,56,0,211 + movdqa %xmm10,%xmm3 + pxor %xmm0,%xmm2 +.byte 102,15,56,0,220 + movdqu (%r9),%xmm5 + pxor %xmm1,%xmm3 + jnz L$enc_loop + + + movdqa -96(%r10),%xmm4 + movdqa -80(%r10),%xmm0 +.byte 102,15,56,0,226 + pxor %xmm5,%xmm4 +.byte 102,15,56,0,195 + movdqa 64(%r11,%r10,1),%xmm1 + pxor %xmm4,%xmm0 +.byte 102,15,56,0,193 + .byte 0xf3,0xc3 + + + + + + + + +.p2align 4 +_vpaes_decrypt_core: + movq %rdx,%r9 + movl 240(%rdx),%eax + movdqa %xmm9,%xmm1 + movdqa L$k_dipt(%rip),%xmm2 + pandn %xmm0,%xmm1 + movq %rax,%r11 + psrld $4,%xmm1 + movdqu (%r9),%xmm5 + shlq $4,%r11 + pand %xmm9,%xmm0 +.byte 102,15,56,0,208 + movdqa L$k_dipt+16(%rip),%xmm0 + xorq $48,%r11 + leaq L$k_dsbd(%rip),%r10 +.byte 102,15,56,0,193 + andq $48,%r11 + pxor %xmm5,%xmm2 + movdqa L$k_mc_forward+48(%rip),%xmm5 + pxor %xmm2,%xmm0 + addq $16,%r9 + addq %r10,%r11 + jmp L$dec_entry + +.p2align 4 +L$dec_loop: + + + + movdqa -32(%r10),%xmm4 + movdqa -16(%r10),%xmm1 +.byte 102,15,56,0,226 +.byte 102,15,56,0,203 + pxor %xmm4,%xmm0 + movdqa 0(%r10),%xmm4 + pxor %xmm1,%xmm0 + movdqa 16(%r10),%xmm1 + +.byte 102,15,56,0,226 +.byte 102,15,56,0,197 +.byte 102,15,56,0,203 + pxor %xmm4,%xmm0 + movdqa 32(%r10),%xmm4 + pxor %xmm1,%xmm0 + movdqa 48(%r10),%xmm1 + +.byte 102,15,56,0,226 +.byte 102,15,56,0,197 +.byte 102,15,56,0,203 + pxor %xmm4,%xmm0 + movdqa 64(%r10),%xmm4 + pxor %xmm1,%xmm0 + movdqa 80(%r10),%xmm1 + +.byte 102,15,56,0,226 +.byte 102,15,56,0,197 +.byte 102,15,56,0,203 + pxor %xmm4,%xmm0 + addq $16,%r9 +.byte 102,15,58,15,237,12 + pxor %xmm1,%xmm0 + subq $1,%rax + +L$dec_entry: + + movdqa %xmm9,%xmm1 + pandn %xmm0,%xmm1 + movdqa %xmm11,%xmm2 + psrld $4,%xmm1 + pand %xmm9,%xmm0 +.byte 102,15,56,0,208 + movdqa %xmm10,%xmm3 + pxor %xmm1,%xmm0 +.byte 102,15,56,0,217 + movdqa %xmm10,%xmm4 + pxor %xmm2,%xmm3 +.byte 102,15,56,0,224 + pxor %xmm2,%xmm4 + movdqa %xmm10,%xmm2 +.byte 102,15,56,0,211 + movdqa %xmm10,%xmm3 + pxor %xmm0,%xmm2 +.byte 102,15,56,0,220 + movdqu (%r9),%xmm0 + pxor %xmm1,%xmm3 + jnz L$dec_loop + + + movdqa 96(%r10),%xmm4 +.byte 102,15,56,0,226 + pxor %xmm0,%xmm4 + movdqa 112(%r10),%xmm0 + movdqa -352(%r11),%xmm2 +.byte 102,15,56,0,195 + pxor %xmm4,%xmm0 +.byte 102,15,56,0,194 + .byte 0xf3,0xc3 + + + + + + + + +.p2align 4 +_vpaes_schedule_core: + + + + + + call _vpaes_preheat + movdqa L$k_rcon(%rip),%xmm8 + movdqu (%rdi),%xmm0 + + + movdqa %xmm0,%xmm3 + leaq L$k_ipt(%rip),%r11 + call _vpaes_schedule_transform + movdqa %xmm0,%xmm7 + + leaq L$k_sr(%rip),%r10 + testq %rcx,%rcx + jnz L$schedule_am_decrypting + + + movdqu %xmm0,(%rdx) + jmp L$schedule_go + +L$schedule_am_decrypting: + + movdqa (%r8,%r10,1),%xmm1 +.byte 102,15,56,0,217 + movdqu %xmm3,(%rdx) + xorq $48,%r8 + +L$schedule_go: + cmpl $192,%esi + ja L$schedule_256 + je L$schedule_192 + + + + + + + + + + +L$schedule_128: + movl $10,%esi + +L$oop_schedule_128: + call _vpaes_schedule_round + decq %rsi + jz L$schedule_mangle_last + call _vpaes_schedule_mangle + jmp L$oop_schedule_128 + + + + + + + + + + + + + + + + +.p2align 4 +L$schedule_192: + movdqu 8(%rdi),%xmm0 + call _vpaes_schedule_transform + movdqa %xmm0,%xmm6 + pxor %xmm4,%xmm4 + movhlps %xmm4,%xmm6 + movl $4,%esi + +L$oop_schedule_192: + call _vpaes_schedule_round +.byte 102,15,58,15,198,8 + call _vpaes_schedule_mangle + call _vpaes_schedule_192_smear + call _vpaes_schedule_mangle + call _vpaes_schedule_round + decq %rsi + jz L$schedule_mangle_last + call _vpaes_schedule_mangle + call _vpaes_schedule_192_smear + jmp L$oop_schedule_192 + + + + + + + + + + + +.p2align 4 +L$schedule_256: + movdqu 16(%rdi),%xmm0 + call _vpaes_schedule_transform + movl $7,%esi + +L$oop_schedule_256: + call _vpaes_schedule_mangle + movdqa %xmm0,%xmm6 + + + call _vpaes_schedule_round + decq %rsi + jz L$schedule_mangle_last + call _vpaes_schedule_mangle + + + pshufd $255,%xmm0,%xmm0 + movdqa %xmm7,%xmm5 + movdqa %xmm6,%xmm7 + call _vpaes_schedule_low_round + movdqa %xmm5,%xmm7 + + jmp L$oop_schedule_256 + + + + + + + + + + + + +.p2align 4 +L$schedule_mangle_last: + + leaq L$k_deskew(%rip),%r11 + testq %rcx,%rcx + jnz L$schedule_mangle_last_dec + + + movdqa (%r8,%r10,1),%xmm1 +.byte 102,15,56,0,193 + leaq L$k_opt(%rip),%r11 + addq $32,%rdx + +L$schedule_mangle_last_dec: + addq $-16,%rdx + pxor L$k_s63(%rip),%xmm0 + call _vpaes_schedule_transform + movdqu %xmm0,(%rdx) + + + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + .byte 0xf3,0xc3 + + + + + + + + + + + + + + + + + +.p2align 4 +_vpaes_schedule_192_smear: + pshufd $128,%xmm6,%xmm1 + pshufd $254,%xmm7,%xmm0 + pxor %xmm1,%xmm6 + pxor %xmm1,%xmm1 + pxor %xmm0,%xmm6 + movdqa %xmm6,%xmm0 + movhlps %xmm1,%xmm6 + .byte 0xf3,0xc3 + + + + + + + + + + + + + + + + + + + + + +.p2align 4 +_vpaes_schedule_round: + + pxor %xmm1,%xmm1 +.byte 102,65,15,58,15,200,15 +.byte 102,69,15,58,15,192,15 + pxor %xmm1,%xmm7 + + + pshufd $255,%xmm0,%xmm0 +.byte 102,15,58,15,192,1 + + + + +_vpaes_schedule_low_round: + + movdqa %xmm7,%xmm1 + pslldq $4,%xmm7 + pxor %xmm1,%xmm7 + movdqa %xmm7,%xmm1 + pslldq $8,%xmm7 + pxor %xmm1,%xmm7 + pxor L$k_s63(%rip),%xmm7 + + + movdqa %xmm9,%xmm1 + pandn %xmm0,%xmm1 + psrld $4,%xmm1 + pand %xmm9,%xmm0 + movdqa %xmm11,%xmm2 +.byte 102,15,56,0,208 + pxor %xmm1,%xmm0 + movdqa %xmm10,%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 + movdqa %xmm10,%xmm4 +.byte 102,15,56,0,224 + pxor %xmm2,%xmm4 + movdqa %xmm10,%xmm2 +.byte 102,15,56,0,211 + pxor %xmm0,%xmm2 + movdqa %xmm10,%xmm3 +.byte 102,15,56,0,220 + pxor %xmm1,%xmm3 + movdqa %xmm13,%xmm4 +.byte 102,15,56,0,226 + movdqa %xmm12,%xmm0 +.byte 102,15,56,0,195 + pxor %xmm4,%xmm0 + + + pxor %xmm7,%xmm0 + movdqa %xmm0,%xmm7 + .byte 0xf3,0xc3 + + + + + + + + + + + + +.p2align 4 +_vpaes_schedule_transform: + movdqa %xmm9,%xmm1 + pandn %xmm0,%xmm1 + psrld $4,%xmm1 + pand %xmm9,%xmm0 + movdqa (%r11),%xmm2 +.byte 102,15,56,0,208 + movdqa 16(%r11),%xmm0 +.byte 102,15,56,0,193 + pxor %xmm2,%xmm0 + .byte 0xf3,0xc3 + + + + + + + + + + + + + + + + + + + + + + + + + + +.p2align 4 +_vpaes_schedule_mangle: + movdqa %xmm0,%xmm4 + movdqa L$k_mc_forward(%rip),%xmm5 + testq %rcx,%rcx + jnz L$schedule_mangle_dec + + + addq $16,%rdx + pxor L$k_s63(%rip),%xmm4 +.byte 102,15,56,0,229 + movdqa %xmm4,%xmm3 +.byte 102,15,56,0,229 + pxor %xmm4,%xmm3 +.byte 102,15,56,0,229 + pxor %xmm4,%xmm3 + + jmp L$schedule_mangle_both +.p2align 4 +L$schedule_mangle_dec: + + leaq L$k_dksd(%rip),%r11 + movdqa %xmm9,%xmm1 + pandn %xmm4,%xmm1 + psrld $4,%xmm1 + pand %xmm9,%xmm4 + + movdqa 0(%r11),%xmm2 +.byte 102,15,56,0,212 + movdqa 16(%r11),%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 +.byte 102,15,56,0,221 + + movdqa 32(%r11),%xmm2 +.byte 102,15,56,0,212 + pxor %xmm3,%xmm2 + movdqa 48(%r11),%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 +.byte 102,15,56,0,221 + + movdqa 64(%r11),%xmm2 +.byte 102,15,56,0,212 + pxor %xmm3,%xmm2 + movdqa 80(%r11),%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 +.byte 102,15,56,0,221 + + movdqa 96(%r11),%xmm2 +.byte 102,15,56,0,212 + pxor %xmm3,%xmm2 + movdqa 112(%r11),%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 + + addq $-16,%rdx + +L$schedule_mangle_both: + movdqa (%r8,%r10,1),%xmm1 +.byte 102,15,56,0,217 + addq $-16,%r8 + andq $48,%r8 + movdqu %xmm3,(%rdx) + .byte 0xf3,0xc3 + + + + + +.globl _vpaes_set_encrypt_key +.private_extern _vpaes_set_encrypt_key + +.p2align 4 +_vpaes_set_encrypt_key: + movl %esi,%eax + shrl $5,%eax + addl $5,%eax + movl %eax,240(%rdx) + + movl $0,%ecx + movl $48,%r8d + call _vpaes_schedule_core + xorl %eax,%eax + .byte 0xf3,0xc3 + + +.globl _vpaes_set_decrypt_key +.private_extern _vpaes_set_decrypt_key + +.p2align 4 +_vpaes_set_decrypt_key: + movl %esi,%eax + shrl $5,%eax + addl $5,%eax + movl %eax,240(%rdx) + shll $4,%eax + leaq 16(%rdx,%rax,1),%rdx + + movl $1,%ecx + movl %esi,%r8d + shrl $1,%r8d + andl $32,%r8d + xorl $32,%r8d + call _vpaes_schedule_core + xorl %eax,%eax + .byte 0xf3,0xc3 + + +.globl _vpaes_encrypt +.private_extern _vpaes_encrypt + +.p2align 4 +_vpaes_encrypt: + movdqu (%rdi),%xmm0 + call _vpaes_preheat + call _vpaes_encrypt_core + movdqu %xmm0,(%rsi) + .byte 0xf3,0xc3 + + +.globl _vpaes_decrypt +.private_extern _vpaes_decrypt + +.p2align 4 +_vpaes_decrypt: + movdqu (%rdi),%xmm0 + call _vpaes_preheat + call _vpaes_decrypt_core + movdqu %xmm0,(%rsi) + .byte 0xf3,0xc3 + +.globl _vpaes_cbc_encrypt +.private_extern _vpaes_cbc_encrypt + +.p2align 4 +_vpaes_cbc_encrypt: + xchgq %rcx,%rdx + subq $16,%rcx + jc L$cbc_abort + movdqu (%r8),%xmm6 + subq %rdi,%rsi + call _vpaes_preheat + cmpl $0,%r9d + je L$cbc_dec_loop + jmp L$cbc_enc_loop +.p2align 4 +L$cbc_enc_loop: + movdqu (%rdi),%xmm0 + pxor %xmm6,%xmm0 + call _vpaes_encrypt_core + movdqa %xmm0,%xmm6 + movdqu %xmm0,(%rsi,%rdi,1) + leaq 16(%rdi),%rdi + subq $16,%rcx + jnc L$cbc_enc_loop + jmp L$cbc_done +.p2align 4 +L$cbc_dec_loop: + movdqu (%rdi),%xmm0 + movdqa %xmm0,%xmm7 + call _vpaes_decrypt_core + pxor %xmm6,%xmm0 + movdqa %xmm7,%xmm6 + movdqu %xmm0,(%rsi,%rdi,1) + leaq 16(%rdi),%rdi + subq $16,%rcx + jnc L$cbc_dec_loop +L$cbc_done: + movdqu %xmm6,(%r8) +L$cbc_abort: + .byte 0xf3,0xc3 + + + + + + + + +.p2align 4 +_vpaes_preheat: + leaq L$k_s0F(%rip),%r10 + movdqa -32(%r10),%xmm10 + movdqa -16(%r10),%xmm11 + movdqa 0(%r10),%xmm9 + movdqa 48(%r10),%xmm13 + movdqa 64(%r10),%xmm12 + movdqa 80(%r10),%xmm15 + movdqa 96(%r10),%xmm14 + .byte 0xf3,0xc3 + + + + + + + +.p2align 6 +_vpaes_consts: +L$k_inv: +.quad 0x0E05060F0D080180, 0x040703090A0B0C02 +.quad 0x01040A060F0B0780, 0x030D0E0C02050809 + +L$k_s0F: +.quad 0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F + +L$k_ipt: +.quad 0xC2B2E8985A2A7000, 0xCABAE09052227808 +.quad 0x4C01307D317C4D00, 0xCD80B1FCB0FDCC81 + +L$k_sb1: +.quad 0xB19BE18FCB503E00, 0xA5DF7A6E142AF544 +.quad 0x3618D415FAE22300, 0x3BF7CCC10D2ED9EF +L$k_sb2: +.quad 0xE27A93C60B712400, 0x5EB7E955BC982FCD +.quad 0x69EB88400AE12900, 0xC2A163C8AB82234A +L$k_sbo: +.quad 0xD0D26D176FBDC700, 0x15AABF7AC502A878 +.quad 0xCFE474A55FBB6A00, 0x8E1E90D1412B35FA + +L$k_mc_forward: +.quad 0x0407060500030201, 0x0C0F0E0D080B0A09 +.quad 0x080B0A0904070605, 0x000302010C0F0E0D +.quad 0x0C0F0E0D080B0A09, 0x0407060500030201 +.quad 0x000302010C0F0E0D, 0x080B0A0904070605 + +L$k_mc_backward: +.quad 0x0605040702010003, 0x0E0D0C0F0A09080B +.quad 0x020100030E0D0C0F, 0x0A09080B06050407 +.quad 0x0E0D0C0F0A09080B, 0x0605040702010003 +.quad 0x0A09080B06050407, 0x020100030E0D0C0F + +L$k_sr: +.quad 0x0706050403020100, 0x0F0E0D0C0B0A0908 +.quad 0x030E09040F0A0500, 0x0B06010C07020D08 +.quad 0x0F060D040B020900, 0x070E050C030A0108 +.quad 0x0B0E0104070A0D00, 0x0306090C0F020508 + +L$k_rcon: +.quad 0x1F8391B9AF9DEEB6, 0x702A98084D7C7D81 + +L$k_s63: +.quad 0x5B5B5B5B5B5B5B5B, 0x5B5B5B5B5B5B5B5B + +L$k_opt: +.quad 0xFF9F4929D6B66000, 0xF7974121DEBE6808 +.quad 0x01EDBD5150BCEC00, 0xE10D5DB1B05C0CE0 + +L$k_deskew: +.quad 0x07E4A34047A4E300, 0x1DFEB95A5DBEF91A +.quad 0x5F36B5DC83EA6900, 0x2841C2ABF49D1E77 + + + + + +L$k_dksd: +.quad 0xFEB91A5DA3E44700, 0x0740E3A45A1DBEF9 +.quad 0x41C277F4B5368300, 0x5FDC69EAAB289D1E +L$k_dksb: +.quad 0x9A4FCA1F8550D500, 0x03D653861CC94C99 +.quad 0x115BEDA7B6FC4A00, 0xD993256F7E3482C8 +L$k_dkse: +.quad 0xD5031CCA1FC9D600, 0x53859A4C994F5086 +.quad 0xA23196054FDC7BE8, 0xCD5EF96A20B31487 +L$k_dks9: +.quad 0xB6116FC87ED9A700, 0x4AED933482255BFC +.quad 0x4576516227143300, 0x8BB89FACE9DAFDCE + + + + + +L$k_dipt: +.quad 0x0F505B040B545F00, 0x154A411E114E451A +.quad 0x86E383E660056500, 0x12771772F491F194 + +L$k_dsb9: +.quad 0x851C03539A86D600, 0xCAD51F504F994CC9 +.quad 0xC03B1789ECD74900, 0x725E2C9EB2FBA565 +L$k_dsbd: +.quad 0x7D57CCDFE6B1A200, 0xF56E9B13882A4439 +.quad 0x3CE2FAF724C6CB00, 0x2931180D15DEEFD3 +L$k_dsbb: +.quad 0xD022649296B44200, 0x602646F6B0F2D404 +.quad 0xC19498A6CD596700, 0xF3FF0C3E3255AA6B +L$k_dsbe: +.quad 0x46F2929626D4D000, 0x2242600464B4F6B0 +.quad 0x0C55A6CDFFAAC100, 0x9467F36B98593E32 +L$k_dsbo: +.quad 0x1387EA537EF94000, 0xC7AA6DB9D4943E2D +.quad 0x12D7560F93441D00, 0xCA4B8159D8C58E9C +.byte 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105,111,110,32,65,69,83,32,102,111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117,114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105,118,101,114,115,105,116,121,41,0 +.p2align 6 + +#endif diff --git a/third_party/boringssl/mac-x86_64/crypto/bn/rsaz-avx2.S b/third_party/boringssl/mac-x86_64/crypto/bn/rsaz-avx2.S new file mode 100644 index 00000000000..8ba2019a1cf --- /dev/null +++ b/third_party/boringssl/mac-x86_64/crypto/bn/rsaz-avx2.S @@ -0,0 +1,34 @@ +#if defined(__x86_64__) +.text + +.globl _rsaz_avx2_eligible +.private_extern _rsaz_avx2_eligible + +_rsaz_avx2_eligible: + xorl %eax,%eax + .byte 0xf3,0xc3 + + +.globl _rsaz_1024_sqr_avx2 +.private_extern _rsaz_1024_sqr_avx2 +.globl _rsaz_1024_mul_avx2 +.private_extern _rsaz_1024_mul_avx2 +.globl _rsaz_1024_norm2red_avx2 +.private_extern _rsaz_1024_norm2red_avx2 +.globl _rsaz_1024_red2norm_avx2 +.private_extern _rsaz_1024_red2norm_avx2 +.globl _rsaz_1024_scatter5_avx2 +.private_extern _rsaz_1024_scatter5_avx2 +.globl _rsaz_1024_gather5_avx2 +.private_extern _rsaz_1024_gather5_avx2 + +_rsaz_1024_sqr_avx2: +_rsaz_1024_mul_avx2: +_rsaz_1024_norm2red_avx2: +_rsaz_1024_red2norm_avx2: +_rsaz_1024_scatter5_avx2: +_rsaz_1024_gather5_avx2: +.byte 0x0f,0x0b + .byte 0xf3,0xc3 + +#endif diff --git a/third_party/boringssl/mac-x86_64/crypto/bn/rsaz-x86_64.S b/third_party/boringssl/mac-x86_64/crypto/bn/rsaz-x86_64.S new file mode 100644 index 00000000000..5e9e82feb02 --- /dev/null +++ b/third_party/boringssl/mac-x86_64/crypto/bn/rsaz-x86_64.S @@ -0,0 +1,1126 @@ +#if defined(__x86_64__) +.text + + + +.globl _rsaz_512_sqr +.private_extern _rsaz_512_sqr + +.p2align 5 +_rsaz_512_sqr: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + subq $128+24,%rsp +L$sqr_body: + movq %rdx,%rbp + movq (%rsi),%rdx + movq 8(%rsi),%rax + movq %rcx,128(%rsp) + jmp L$oop_sqr + +.p2align 5 +L$oop_sqr: + movl %r8d,128+8(%rsp) + + movq %rdx,%rbx + mulq %rdx + movq %rax,%r8 + movq 16(%rsi),%rax + movq %rdx,%r9 + + mulq %rbx + addq %rax,%r9 + movq 24(%rsi),%rax + movq %rdx,%r10 + adcq $0,%r10 + + mulq %rbx + addq %rax,%r10 + movq 32(%rsi),%rax + movq %rdx,%r11 + adcq $0,%r11 + + mulq %rbx + addq %rax,%r11 + movq 40(%rsi),%rax + movq %rdx,%r12 + adcq $0,%r12 + + mulq %rbx + addq %rax,%r12 + movq 48(%rsi),%rax + movq %rdx,%r13 + adcq $0,%r13 + + mulq %rbx + addq %rax,%r13 + movq 56(%rsi),%rax + movq %rdx,%r14 + adcq $0,%r14 + + mulq %rbx + addq %rax,%r14 + movq %rbx,%rax + movq %rdx,%r15 + adcq $0,%r15 + + addq %r8,%r8 + movq %r9,%rcx + adcq %r9,%r9 + + mulq %rax + movq %rax,(%rsp) + addq %rdx,%r8 + adcq $0,%r9 + + movq %r8,8(%rsp) + shrq $63,%rcx + + + movq 8(%rsi),%r8 + movq 16(%rsi),%rax + mulq %r8 + addq %rax,%r10 + movq 24(%rsi),%rax + movq %rdx,%rbx + adcq $0,%rbx + + mulq %r8 + addq %rax,%r11 + movq 32(%rsi),%rax + adcq $0,%rdx + addq %rbx,%r11 + movq %rdx,%rbx + adcq $0,%rbx + + mulq %r8 + addq %rax,%r12 + movq 40(%rsi),%rax + adcq $0,%rdx + addq %rbx,%r12 + movq %rdx,%rbx + adcq $0,%rbx + + mulq %r8 + addq %rax,%r13 + movq 48(%rsi),%rax + adcq $0,%rdx + addq %rbx,%r13 + movq %rdx,%rbx + adcq $0,%rbx + + mulq %r8 + addq %rax,%r14 + movq 56(%rsi),%rax + adcq $0,%rdx + addq %rbx,%r14 + movq %rdx,%rbx + adcq $0,%rbx + + mulq %r8 + addq %rax,%r15 + movq %r8,%rax + adcq $0,%rdx + addq %rbx,%r15 + movq %rdx,%r8 + movq %r10,%rdx + adcq $0,%r8 + + addq %rdx,%rdx + leaq (%rcx,%r10,2),%r10 + movq %r11,%rbx + adcq %r11,%r11 + + mulq %rax + addq %rax,%r9 + adcq %rdx,%r10 + adcq $0,%r11 + + movq %r9,16(%rsp) + movq %r10,24(%rsp) + shrq $63,%rbx + + + movq 16(%rsi),%r9 + movq 24(%rsi),%rax + mulq %r9 + addq %rax,%r12 + movq 32(%rsi),%rax + movq %rdx,%rcx + adcq $0,%rcx + + mulq %r9 + addq %rax,%r13 + movq 40(%rsi),%rax + adcq $0,%rdx + addq %rcx,%r13 + movq %rdx,%rcx + adcq $0,%rcx + + mulq %r9 + addq %rax,%r14 + movq 48(%rsi),%rax + adcq $0,%rdx + addq %rcx,%r14 + movq %rdx,%rcx + adcq $0,%rcx + + mulq %r9 + movq %r12,%r10 + leaq (%rbx,%r12,2),%r12 + addq %rax,%r15 + movq 56(%rsi),%rax + adcq $0,%rdx + addq %rcx,%r15 + movq %rdx,%rcx + adcq $0,%rcx + + mulq %r9 + shrq $63,%r10 + addq %rax,%r8 + movq %r9,%rax + adcq $0,%rdx + addq %rcx,%r8 + movq %rdx,%r9 + adcq $0,%r9 + + movq %r13,%rcx + leaq (%r10,%r13,2),%r13 + + mulq %rax + addq %rax,%r11 + adcq %rdx,%r12 + adcq $0,%r13 + + movq %r11,32(%rsp) + movq %r12,40(%rsp) + shrq $63,%rcx + + + movq 24(%rsi),%r10 + movq 32(%rsi),%rax + mulq %r10 + addq %rax,%r14 + movq 40(%rsi),%rax + movq %rdx,%rbx + adcq $0,%rbx + + mulq %r10 + addq %rax,%r15 + movq 48(%rsi),%rax + adcq $0,%rdx + addq %rbx,%r15 + movq %rdx,%rbx + adcq $0,%rbx + + mulq %r10 + movq %r14,%r12 + leaq (%rcx,%r14,2),%r14 + addq %rax,%r8 + movq 56(%rsi),%rax + adcq $0,%rdx + addq %rbx,%r8 + movq %rdx,%rbx + adcq $0,%rbx + + mulq %r10 + shrq $63,%r12 + addq %rax,%r9 + movq %r10,%rax + adcq $0,%rdx + addq %rbx,%r9 + movq %rdx,%r10 + adcq $0,%r10 + + movq %r15,%rbx + leaq (%r12,%r15,2),%r15 + + mulq %rax + addq %rax,%r13 + adcq %rdx,%r14 + adcq $0,%r15 + + movq %r13,48(%rsp) + movq %r14,56(%rsp) + shrq $63,%rbx + + + movq 32(%rsi),%r11 + movq 40(%rsi),%rax + mulq %r11 + addq %rax,%r8 + movq 48(%rsi),%rax + movq %rdx,%rcx + adcq $0,%rcx + + mulq %r11 + addq %rax,%r9 + movq 56(%rsi),%rax + adcq $0,%rdx + movq %r8,%r12 + leaq (%rbx,%r8,2),%r8 + addq %rcx,%r9 + movq %rdx,%rcx + adcq $0,%rcx + + mulq %r11 + shrq $63,%r12 + addq %rax,%r10 + movq %r11,%rax + adcq $0,%rdx + addq %rcx,%r10 + movq %rdx,%r11 + adcq $0,%r11 + + movq %r9,%rcx + leaq (%r12,%r9,2),%r9 + + mulq %rax + addq %rax,%r15 + adcq %rdx,%r8 + adcq $0,%r9 + + movq %r15,64(%rsp) + movq %r8,72(%rsp) + shrq $63,%rcx + + + movq 40(%rsi),%r12 + movq 48(%rsi),%rax + mulq %r12 + addq %rax,%r10 + movq 56(%rsi),%rax + movq %rdx,%rbx + adcq $0,%rbx + + mulq %r12 + addq %rax,%r11 + movq %r12,%rax + movq %r10,%r15 + leaq (%rcx,%r10,2),%r10 + adcq $0,%rdx + shrq $63,%r15 + addq %rbx,%r11 + movq %rdx,%r12 + adcq $0,%r12 + + movq %r11,%rbx + leaq (%r15,%r11,2),%r11 + + mulq %rax + addq %rax,%r9 + adcq %rdx,%r10 + adcq $0,%r11 + + movq %r9,80(%rsp) + movq %r10,88(%rsp) + + + movq 48(%rsi),%r13 + movq 56(%rsi),%rax + mulq %r13 + addq %rax,%r12 + movq %r13,%rax + movq %rdx,%r13 + adcq $0,%r13 + + xorq %r14,%r14 + shlq $1,%rbx + adcq %r12,%r12 + adcq %r13,%r13 + adcq %r14,%r14 + + mulq %rax + addq %rax,%r11 + adcq %rdx,%r12 + adcq $0,%r13 + + movq %r11,96(%rsp) + movq %r12,104(%rsp) + + + movq 56(%rsi),%rax + mulq %rax + addq %rax,%r13 + adcq $0,%rdx + + addq %rdx,%r14 + + movq %r13,112(%rsp) + movq %r14,120(%rsp) + + movq (%rsp),%r8 + movq 8(%rsp),%r9 + movq 16(%rsp),%r10 + movq 24(%rsp),%r11 + movq 32(%rsp),%r12 + movq 40(%rsp),%r13 + movq 48(%rsp),%r14 + movq 56(%rsp),%r15 + + call __rsaz_512_reduce + + addq 64(%rsp),%r8 + adcq 72(%rsp),%r9 + adcq 80(%rsp),%r10 + adcq 88(%rsp),%r11 + adcq 96(%rsp),%r12 + adcq 104(%rsp),%r13 + adcq 112(%rsp),%r14 + adcq 120(%rsp),%r15 + sbbq %rcx,%rcx + + call __rsaz_512_subtract + + movq %r8,%rdx + movq %r9,%rax + movl 128+8(%rsp),%r8d + movq %rdi,%rsi + + decl %r8d + jnz L$oop_sqr + + leaq 128+24+48(%rsp),%rax + movq -48(%rax),%r15 + movq -40(%rax),%r14 + movq -32(%rax),%r13 + movq -24(%rax),%r12 + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp +L$sqr_epilogue: + .byte 0xf3,0xc3 + +.globl _rsaz_512_mul +.private_extern _rsaz_512_mul + +.p2align 5 +_rsaz_512_mul: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + subq $128+24,%rsp +L$mul_body: +.byte 102,72,15,110,199 +.byte 102,72,15,110,201 + movq %r8,128(%rsp) + movq (%rdx),%rbx + movq %rdx,%rbp + call __rsaz_512_mul + +.byte 102,72,15,126,199 +.byte 102,72,15,126,205 + + movq (%rsp),%r8 + movq 8(%rsp),%r9 + movq 16(%rsp),%r10 + movq 24(%rsp),%r11 + movq 32(%rsp),%r12 + movq 40(%rsp),%r13 + movq 48(%rsp),%r14 + movq 56(%rsp),%r15 + + call __rsaz_512_reduce + addq 64(%rsp),%r8 + adcq 72(%rsp),%r9 + adcq 80(%rsp),%r10 + adcq 88(%rsp),%r11 + adcq 96(%rsp),%r12 + adcq 104(%rsp),%r13 + adcq 112(%rsp),%r14 + adcq 120(%rsp),%r15 + sbbq %rcx,%rcx + + call __rsaz_512_subtract + + leaq 128+24+48(%rsp),%rax + movq -48(%rax),%r15 + movq -40(%rax),%r14 + movq -32(%rax),%r13 + movq -24(%rax),%r12 + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp +L$mul_epilogue: + .byte 0xf3,0xc3 + +.globl _rsaz_512_mul_gather4 +.private_extern _rsaz_512_mul_gather4 + +.p2align 5 +_rsaz_512_mul_gather4: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + movl %r9d,%r9d + subq $128+24,%rsp +L$mul_gather4_body: + movl 64(%rdx,%r9,4),%eax +.byte 102,72,15,110,199 + movl (%rdx,%r9,4),%ebx +.byte 102,72,15,110,201 + movq %r8,128(%rsp) + + shlq $32,%rax + orq %rax,%rbx + movq (%rsi),%rax + movq 8(%rsi),%rcx + leaq 128(%rdx,%r9,4),%rbp + mulq %rbx + movq %rax,(%rsp) + movq %rcx,%rax + movq %rdx,%r8 + + mulq %rbx + movd (%rbp),%xmm4 + addq %rax,%r8 + movq 16(%rsi),%rax + movq %rdx,%r9 + adcq $0,%r9 + + mulq %rbx + movd 64(%rbp),%xmm5 + addq %rax,%r9 + movq 24(%rsi),%rax + movq %rdx,%r10 + adcq $0,%r10 + + mulq %rbx + pslldq $4,%xmm5 + addq %rax,%r10 + movq 32(%rsi),%rax + movq %rdx,%r11 + adcq $0,%r11 + + mulq %rbx + por %xmm5,%xmm4 + addq %rax,%r11 + movq 40(%rsi),%rax + movq %rdx,%r12 + adcq $0,%r12 + + mulq %rbx + addq %rax,%r12 + movq 48(%rsi),%rax + movq %rdx,%r13 + adcq $0,%r13 + + mulq %rbx + leaq 128(%rbp),%rbp + addq %rax,%r13 + movq 56(%rsi),%rax + movq %rdx,%r14 + adcq $0,%r14 + + mulq %rbx +.byte 102,72,15,126,227 + addq %rax,%r14 + movq (%rsi),%rax + movq %rdx,%r15 + adcq $0,%r15 + + leaq 8(%rsp),%rdi + movl $7,%ecx + jmp L$oop_mul_gather + +.p2align 5 +L$oop_mul_gather: + mulq %rbx + addq %rax,%r8 + movq 8(%rsi),%rax + movq %r8,(%rdi) + movq %rdx,%r8 + adcq $0,%r8 + + mulq %rbx + movd (%rbp),%xmm4 + addq %rax,%r9 + movq 16(%rsi),%rax + adcq $0,%rdx + addq %r9,%r8 + movq %rdx,%r9 + adcq $0,%r9 + + mulq %rbx + movd 64(%rbp),%xmm5 + addq %rax,%r10 + movq 24(%rsi),%rax + adcq $0,%rdx + addq %r10,%r9 + movq %rdx,%r10 + adcq $0,%r10 + + mulq %rbx + pslldq $4,%xmm5 + addq %rax,%r11 + movq 32(%rsi),%rax + adcq $0,%rdx + addq %r11,%r10 + movq %rdx,%r11 + adcq $0,%r11 + + mulq %rbx + por %xmm5,%xmm4 + addq %rax,%r12 + movq 40(%rsi),%rax + adcq $0,%rdx + addq %r12,%r11 + movq %rdx,%r12 + adcq $0,%r12 + + mulq %rbx + addq %rax,%r13 + movq 48(%rsi),%rax + adcq $0,%rdx + addq %r13,%r12 + movq %rdx,%r13 + adcq $0,%r13 + + mulq %rbx + addq %rax,%r14 + movq 56(%rsi),%rax + adcq $0,%rdx + addq %r14,%r13 + movq %rdx,%r14 + adcq $0,%r14 + + mulq %rbx +.byte 102,72,15,126,227 + addq %rax,%r15 + movq (%rsi),%rax + adcq $0,%rdx + addq %r15,%r14 + movq %rdx,%r15 + adcq $0,%r15 + + leaq 128(%rbp),%rbp + leaq 8(%rdi),%rdi + + decl %ecx + jnz L$oop_mul_gather + + movq %r8,(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + movq %r14,48(%rdi) + movq %r15,56(%rdi) + +.byte 102,72,15,126,199 +.byte 102,72,15,126,205 + + movq (%rsp),%r8 + movq 8(%rsp),%r9 + movq 16(%rsp),%r10 + movq 24(%rsp),%r11 + movq 32(%rsp),%r12 + movq 40(%rsp),%r13 + movq 48(%rsp),%r14 + movq 56(%rsp),%r15 + + call __rsaz_512_reduce + addq 64(%rsp),%r8 + adcq 72(%rsp),%r9 + adcq 80(%rsp),%r10 + adcq 88(%rsp),%r11 + adcq 96(%rsp),%r12 + adcq 104(%rsp),%r13 + adcq 112(%rsp),%r14 + adcq 120(%rsp),%r15 + sbbq %rcx,%rcx + + call __rsaz_512_subtract + + leaq 128+24+48(%rsp),%rax + movq -48(%rax),%r15 + movq -40(%rax),%r14 + movq -32(%rax),%r13 + movq -24(%rax),%r12 + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp +L$mul_gather4_epilogue: + .byte 0xf3,0xc3 + +.globl _rsaz_512_mul_scatter4 +.private_extern _rsaz_512_mul_scatter4 + +.p2align 5 +_rsaz_512_mul_scatter4: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + movl %r9d,%r9d + subq $128+24,%rsp +L$mul_scatter4_body: + leaq (%r8,%r9,4),%r8 +.byte 102,72,15,110,199 +.byte 102,72,15,110,202 +.byte 102,73,15,110,208 + movq %rcx,128(%rsp) + + movq %rdi,%rbp + movq (%rdi),%rbx + call __rsaz_512_mul + +.byte 102,72,15,126,199 +.byte 102,72,15,126,205 + + movq (%rsp),%r8 + movq 8(%rsp),%r9 + movq 16(%rsp),%r10 + movq 24(%rsp),%r11 + movq 32(%rsp),%r12 + movq 40(%rsp),%r13 + movq 48(%rsp),%r14 + movq 56(%rsp),%r15 + + call __rsaz_512_reduce + addq 64(%rsp),%r8 + adcq 72(%rsp),%r9 + adcq 80(%rsp),%r10 + adcq 88(%rsp),%r11 + adcq 96(%rsp),%r12 + adcq 104(%rsp),%r13 + adcq 112(%rsp),%r14 + adcq 120(%rsp),%r15 +.byte 102,72,15,126,214 + sbbq %rcx,%rcx + + call __rsaz_512_subtract + + movl %r8d,0(%rsi) + shrq $32,%r8 + movl %r9d,128(%rsi) + shrq $32,%r9 + movl %r10d,256(%rsi) + shrq $32,%r10 + movl %r11d,384(%rsi) + shrq $32,%r11 + movl %r12d,512(%rsi) + shrq $32,%r12 + movl %r13d,640(%rsi) + shrq $32,%r13 + movl %r14d,768(%rsi) + shrq $32,%r14 + movl %r15d,896(%rsi) + shrq $32,%r15 + movl %r8d,64(%rsi) + movl %r9d,192(%rsi) + movl %r10d,320(%rsi) + movl %r11d,448(%rsi) + movl %r12d,576(%rsi) + movl %r13d,704(%rsi) + movl %r14d,832(%rsi) + movl %r15d,960(%rsi) + + leaq 128+24+48(%rsp),%rax + movq -48(%rax),%r15 + movq -40(%rax),%r14 + movq -32(%rax),%r13 + movq -24(%rax),%r12 + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp +L$mul_scatter4_epilogue: + .byte 0xf3,0xc3 + +.globl _rsaz_512_mul_by_one +.private_extern _rsaz_512_mul_by_one + +.p2align 5 +_rsaz_512_mul_by_one: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + subq $128+24,%rsp +L$mul_by_one_body: + movq %rdx,%rbp + movq %rcx,128(%rsp) + + movq (%rsi),%r8 + pxor %xmm0,%xmm0 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + movq 48(%rsi),%r14 + movq 56(%rsi),%r15 + + movdqa %xmm0,(%rsp) + movdqa %xmm0,16(%rsp) + movdqa %xmm0,32(%rsp) + movdqa %xmm0,48(%rsp) + movdqa %xmm0,64(%rsp) + movdqa %xmm0,80(%rsp) + movdqa %xmm0,96(%rsp) + call __rsaz_512_reduce + movq %r8,(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + movq %r14,48(%rdi) + movq %r15,56(%rdi) + + leaq 128+24+48(%rsp),%rax + movq -48(%rax),%r15 + movq -40(%rax),%r14 + movq -32(%rax),%r13 + movq -24(%rax),%r12 + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp +L$mul_by_one_epilogue: + .byte 0xf3,0xc3 + + +.p2align 5 +__rsaz_512_reduce: + movq %r8,%rbx + imulq 128+8(%rsp),%rbx + movq 0(%rbp),%rax + movl $8,%ecx + jmp L$reduction_loop + +.p2align 5 +L$reduction_loop: + mulq %rbx + movq 8(%rbp),%rax + negq %r8 + movq %rdx,%r8 + adcq $0,%r8 + + mulq %rbx + addq %rax,%r9 + movq 16(%rbp),%rax + adcq $0,%rdx + addq %r9,%r8 + movq %rdx,%r9 + adcq $0,%r9 + + mulq %rbx + addq %rax,%r10 + movq 24(%rbp),%rax + adcq $0,%rdx + addq %r10,%r9 + movq %rdx,%r10 + adcq $0,%r10 + + mulq %rbx + addq %rax,%r11 + movq 32(%rbp),%rax + adcq $0,%rdx + addq %r11,%r10 + movq 128+8(%rsp),%rsi + + + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbx + addq %rax,%r12 + movq 40(%rbp),%rax + adcq $0,%rdx + imulq %r8,%rsi + addq %r12,%r11 + movq %rdx,%r12 + adcq $0,%r12 + + mulq %rbx + addq %rax,%r13 + movq 48(%rbp),%rax + adcq $0,%rdx + addq %r13,%r12 + movq %rdx,%r13 + adcq $0,%r13 + + mulq %rbx + addq %rax,%r14 + movq 56(%rbp),%rax + adcq $0,%rdx + addq %r14,%r13 + movq %rdx,%r14 + adcq $0,%r14 + + mulq %rbx + movq %rsi,%rbx + addq %rax,%r15 + movq 0(%rbp),%rax + adcq $0,%rdx + addq %r15,%r14 + movq %rdx,%r15 + adcq $0,%r15 + + decl %ecx + jne L$reduction_loop + + .byte 0xf3,0xc3 + + +.p2align 5 +__rsaz_512_subtract: + movq %r8,(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + movq %r14,48(%rdi) + movq %r15,56(%rdi) + + movq 0(%rbp),%r8 + movq 8(%rbp),%r9 + negq %r8 + notq %r9 + andq %rcx,%r8 + movq 16(%rbp),%r10 + andq %rcx,%r9 + notq %r10 + movq 24(%rbp),%r11 + andq %rcx,%r10 + notq %r11 + movq 32(%rbp),%r12 + andq %rcx,%r11 + notq %r12 + movq 40(%rbp),%r13 + andq %rcx,%r12 + notq %r13 + movq 48(%rbp),%r14 + andq %rcx,%r13 + notq %r14 + movq 56(%rbp),%r15 + andq %rcx,%r14 + notq %r15 + andq %rcx,%r15 + + addq (%rdi),%r8 + adcq 8(%rdi),%r9 + adcq 16(%rdi),%r10 + adcq 24(%rdi),%r11 + adcq 32(%rdi),%r12 + adcq 40(%rdi),%r13 + adcq 48(%rdi),%r14 + adcq 56(%rdi),%r15 + + movq %r8,(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + movq %r14,48(%rdi) + movq %r15,56(%rdi) + + .byte 0xf3,0xc3 + + +.p2align 5 +__rsaz_512_mul: + leaq 8(%rsp),%rdi + + movq (%rsi),%rax + mulq %rbx + movq %rax,(%rdi) + movq 8(%rsi),%rax + movq %rdx,%r8 + + mulq %rbx + addq %rax,%r8 + movq 16(%rsi),%rax + movq %rdx,%r9 + adcq $0,%r9 + + mulq %rbx + addq %rax,%r9 + movq 24(%rsi),%rax + movq %rdx,%r10 + adcq $0,%r10 + + mulq %rbx + addq %rax,%r10 + movq 32(%rsi),%rax + movq %rdx,%r11 + adcq $0,%r11 + + mulq %rbx + addq %rax,%r11 + movq 40(%rsi),%rax + movq %rdx,%r12 + adcq $0,%r12 + + mulq %rbx + addq %rax,%r12 + movq 48(%rsi),%rax + movq %rdx,%r13 + adcq $0,%r13 + + mulq %rbx + addq %rax,%r13 + movq 56(%rsi),%rax + movq %rdx,%r14 + adcq $0,%r14 + + mulq %rbx + addq %rax,%r14 + movq (%rsi),%rax + movq %rdx,%r15 + adcq $0,%r15 + + leaq 8(%rbp),%rbp + leaq 8(%rdi),%rdi + + movl $7,%ecx + jmp L$oop_mul + +.p2align 5 +L$oop_mul: + movq (%rbp),%rbx + mulq %rbx + addq %rax,%r8 + movq 8(%rsi),%rax + movq %r8,(%rdi) + movq %rdx,%r8 + adcq $0,%r8 + + mulq %rbx + addq %rax,%r9 + movq 16(%rsi),%rax + adcq $0,%rdx + addq %r9,%r8 + movq %rdx,%r9 + adcq $0,%r9 + + mulq %rbx + addq %rax,%r10 + movq 24(%rsi),%rax + adcq $0,%rdx + addq %r10,%r9 + movq %rdx,%r10 + adcq $0,%r10 + + mulq %rbx + addq %rax,%r11 + movq 32(%rsi),%rax + adcq $0,%rdx + addq %r11,%r10 + movq %rdx,%r11 + adcq $0,%r11 + + mulq %rbx + addq %rax,%r12 + movq 40(%rsi),%rax + adcq $0,%rdx + addq %r12,%r11 + movq %rdx,%r12 + adcq $0,%r12 + + mulq %rbx + addq %rax,%r13 + movq 48(%rsi),%rax + adcq $0,%rdx + addq %r13,%r12 + movq %rdx,%r13 + adcq $0,%r13 + + mulq %rbx + addq %rax,%r14 + movq 56(%rsi),%rax + adcq $0,%rdx + addq %r14,%r13 + movq %rdx,%r14 + leaq 8(%rbp),%rbp + adcq $0,%r14 + + mulq %rbx + addq %rax,%r15 + movq (%rsi),%rax + adcq $0,%rdx + addq %r15,%r14 + movq %rdx,%r15 + adcq $0,%r15 + + leaq 8(%rdi),%rdi + + decl %ecx + jnz L$oop_mul + + movq %r8,(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + movq %r14,48(%rdi) + movq %r15,56(%rdi) + + .byte 0xf3,0xc3 + +.globl _rsaz_512_scatter4 +.private_extern _rsaz_512_scatter4 + +.p2align 4 +_rsaz_512_scatter4: + leaq (%rdi,%rdx,4),%rdi + movl $8,%r9d + jmp L$oop_scatter +.p2align 4 +L$oop_scatter: + movq (%rsi),%rax + leaq 8(%rsi),%rsi + movl %eax,(%rdi) + shrq $32,%rax + movl %eax,64(%rdi) + leaq 128(%rdi),%rdi + decl %r9d + jnz L$oop_scatter + .byte 0xf3,0xc3 + + +.globl _rsaz_512_gather4 +.private_extern _rsaz_512_gather4 + +.p2align 4 +_rsaz_512_gather4: + leaq (%rsi,%rdx,4),%rsi + movl $8,%r9d + jmp L$oop_gather +.p2align 4 +L$oop_gather: + movl (%rsi),%eax + movl 64(%rsi),%r8d + leaq 128(%rsi),%rsi + shlq $32,%r8 + orq %r8,%rax + movq %rax,(%rdi) + leaq 8(%rdi),%rdi + decl %r9d + jnz L$oop_gather + .byte 0xf3,0xc3 + +#endif diff --git a/third_party/boringssl/mac-x86_64/crypto/bn/x86_64-mont.S b/third_party/boringssl/mac-x86_64/crypto/bn/x86_64-mont.S new file mode 100644 index 00000000000..6b9bc05bf4e --- /dev/null +++ b/third_party/boringssl/mac-x86_64/crypto/bn/x86_64-mont.S @@ -0,0 +1,726 @@ +#if defined(__x86_64__) +.text + + + +.globl _bn_mul_mont +.private_extern _bn_mul_mont + +.p2align 4 +_bn_mul_mont: + testl $3,%r9d + jnz L$mul_enter + cmpl $8,%r9d + jb L$mul_enter + cmpq %rsi,%rdx + jne L$mul4x_enter + testl $7,%r9d + jz L$sqr8x_enter + jmp L$mul4x_enter + +.p2align 4 +L$mul_enter: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + movl %r9d,%r9d + leaq 2(%r9),%r10 + movq %rsp,%r11 + negq %r10 + leaq (%rsp,%r10,8),%rsp + andq $-1024,%rsp + + movq %r11,8(%rsp,%r9,8) +L$mul_body: + movq %rdx,%r12 + movq (%r8),%r8 + movq (%r12),%rbx + movq (%rsi),%rax + + xorq %r14,%r14 + xorq %r15,%r15 + + movq %r8,%rbp + mulq %rbx + movq %rax,%r10 + movq (%rcx),%rax + + imulq %r10,%rbp + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r10 + movq 8(%rsi),%rax + adcq $0,%rdx + movq %rdx,%r13 + + leaq 1(%r15),%r15 + jmp L$1st_enter + +.p2align 4 +L$1st: + addq %rax,%r13 + movq (%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r11,%r13 + movq %r10,%r11 + adcq $0,%rdx + movq %r13,-16(%rsp,%r15,8) + movq %rdx,%r13 + +L$1st_enter: + mulq %rbx + addq %rax,%r11 + movq (%rcx,%r15,8),%rax + adcq $0,%rdx + leaq 1(%r15),%r15 + movq %rdx,%r10 + + mulq %rbp + cmpq %r9,%r15 + jne L$1st + + addq %rax,%r13 + movq (%rsi),%rax + adcq $0,%rdx + addq %r11,%r13 + adcq $0,%rdx + movq %r13,-16(%rsp,%r15,8) + movq %rdx,%r13 + movq %r10,%r11 + + xorq %rdx,%rdx + addq %r11,%r13 + adcq $0,%rdx + movq %r13,-8(%rsp,%r9,8) + movq %rdx,(%rsp,%r9,8) + + leaq 1(%r14),%r14 + jmp L$outer +.p2align 4 +L$outer: + movq (%r12,%r14,8),%rbx + xorq %r15,%r15 + movq %r8,%rbp + movq (%rsp),%r10 + mulq %rbx + addq %rax,%r10 + movq (%rcx),%rax + adcq $0,%rdx + + imulq %r10,%rbp + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r10 + movq 8(%rsi),%rax + adcq $0,%rdx + movq 8(%rsp),%r10 + movq %rdx,%r13 + + leaq 1(%r15),%r15 + jmp L$inner_enter + +.p2align 4 +L$inner: + addq %rax,%r13 + movq (%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r10,%r13 + movq (%rsp,%r15,8),%r10 + adcq $0,%rdx + movq %r13,-16(%rsp,%r15,8) + movq %rdx,%r13 + +L$inner_enter: + mulq %rbx + addq %rax,%r11 + movq (%rcx,%r15,8),%rax + adcq $0,%rdx + addq %r11,%r10 + movq %rdx,%r11 + adcq $0,%r11 + leaq 1(%r15),%r15 + + mulq %rbp + cmpq %r9,%r15 + jne L$inner + + addq %rax,%r13 + movq (%rsi),%rax + adcq $0,%rdx + addq %r10,%r13 + movq (%rsp,%r15,8),%r10 + adcq $0,%rdx + movq %r13,-16(%rsp,%r15,8) + movq %rdx,%r13 + + xorq %rdx,%rdx + addq %r11,%r13 + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-8(%rsp,%r9,8) + movq %rdx,(%rsp,%r9,8) + + leaq 1(%r14),%r14 + cmpq %r9,%r14 + jb L$outer + + xorq %r14,%r14 + movq (%rsp),%rax + leaq (%rsp),%rsi + movq %r9,%r15 + jmp L$sub +.p2align 4 +L$sub: sbbq (%rcx,%r14,8),%rax + movq %rax,(%rdi,%r14,8) + movq 8(%rsi,%r14,8),%rax + leaq 1(%r14),%r14 + decq %r15 + jnz L$sub + + sbbq $0,%rax + xorq %r14,%r14 + movq %r9,%r15 +.p2align 4 +L$copy: + movq (%rsp,%r14,8),%rsi + movq (%rdi,%r14,8),%rcx + xorq %rcx,%rsi + andq %rax,%rsi + xorq %rcx,%rsi + movq %r14,(%rsp,%r14,8) + movq %rsi,(%rdi,%r14,8) + leaq 1(%r14),%r14 + subq $1,%r15 + jnz L$copy + + movq 8(%rsp,%r9,8),%rsi + movq $1,%rax + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +L$mul_epilogue: + .byte 0xf3,0xc3 + + +.p2align 4 +bn_mul4x_mont: +L$mul4x_enter: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + movl %r9d,%r9d + leaq 4(%r9),%r10 + movq %rsp,%r11 + negq %r10 + leaq (%rsp,%r10,8),%rsp + andq $-1024,%rsp + + movq %r11,8(%rsp,%r9,8) +L$mul4x_body: + movq %rdi,16(%rsp,%r9,8) + movq %rdx,%r12 + movq (%r8),%r8 + movq (%r12),%rbx + movq (%rsi),%rax + + xorq %r14,%r14 + xorq %r15,%r15 + + movq %r8,%rbp + mulq %rbx + movq %rax,%r10 + movq (%rcx),%rax + + imulq %r10,%rbp + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r10 + movq 8(%rsi),%rax + adcq $0,%rdx + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq 8(%rcx),%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq 16(%rsi),%rax + adcq $0,%rdx + addq %r11,%rdi + leaq 4(%r15),%r15 + adcq $0,%rdx + movq %rdi,(%rsp) + movq %rdx,%r13 + jmp L$1st4x +.p2align 4 +L$1st4x: + mulq %rbx + addq %rax,%r10 + movq -16(%rcx,%r15,8),%rax + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq -8(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-24(%rsp,%r15,8) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq -8(%rcx,%r15,8),%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq (%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %rdi,-16(%rsp,%r15,8) + movq %rdx,%r13 + + mulq %rbx + addq %rax,%r10 + movq (%rcx,%r15,8),%rax + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq 8(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-8(%rsp,%r15,8) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq 8(%rcx,%r15,8),%rax + adcq $0,%rdx + leaq 4(%r15),%r15 + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq -16(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %rdi,-32(%rsp,%r15,8) + movq %rdx,%r13 + cmpq %r9,%r15 + jb L$1st4x + + mulq %rbx + addq %rax,%r10 + movq -16(%rcx,%r15,8),%rax + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq -8(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-24(%rsp,%r15,8) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq -8(%rcx,%r15,8),%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq (%rsi),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %rdi,-16(%rsp,%r15,8) + movq %rdx,%r13 + + xorq %rdi,%rdi + addq %r10,%r13 + adcq $0,%rdi + movq %r13,-8(%rsp,%r15,8) + movq %rdi,(%rsp,%r15,8) + + leaq 1(%r14),%r14 +.p2align 2 +L$outer4x: + movq (%r12,%r14,8),%rbx + xorq %r15,%r15 + movq (%rsp),%r10 + movq %r8,%rbp + mulq %rbx + addq %rax,%r10 + movq (%rcx),%rax + adcq $0,%rdx + + imulq %r10,%rbp + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r10 + movq 8(%rsi),%rax + adcq $0,%rdx + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq 8(%rcx),%rax + adcq $0,%rdx + addq 8(%rsp),%r11 + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq 16(%rsi),%rax + adcq $0,%rdx + addq %r11,%rdi + leaq 4(%r15),%r15 + adcq $0,%rdx + movq %rdi,(%rsp) + movq %rdx,%r13 + jmp L$inner4x +.p2align 4 +L$inner4x: + mulq %rbx + addq %rax,%r10 + movq -16(%rcx,%r15,8),%rax + adcq $0,%rdx + addq -16(%rsp,%r15,8),%r10 + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq -8(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-24(%rsp,%r15,8) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq -8(%rcx,%r15,8),%rax + adcq $0,%rdx + addq -8(%rsp,%r15,8),%r11 + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq (%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %rdi,-16(%rsp,%r15,8) + movq %rdx,%r13 + + mulq %rbx + addq %rax,%r10 + movq (%rcx,%r15,8),%rax + adcq $0,%rdx + addq (%rsp,%r15,8),%r10 + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq 8(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-8(%rsp,%r15,8) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq 8(%rcx,%r15,8),%rax + adcq $0,%rdx + addq 8(%rsp,%r15,8),%r11 + adcq $0,%rdx + leaq 4(%r15),%r15 + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq -16(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %rdi,-32(%rsp,%r15,8) + movq %rdx,%r13 + cmpq %r9,%r15 + jb L$inner4x + + mulq %rbx + addq %rax,%r10 + movq -16(%rcx,%r15,8),%rax + adcq $0,%rdx + addq -16(%rsp,%r15,8),%r10 + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq -8(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-24(%rsp,%r15,8) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq -8(%rcx,%r15,8),%rax + adcq $0,%rdx + addq -8(%rsp,%r15,8),%r11 + adcq $0,%rdx + leaq 1(%r14),%r14 + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq (%rsi),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %rdi,-16(%rsp,%r15,8) + movq %rdx,%r13 + + xorq %rdi,%rdi + addq %r10,%r13 + adcq $0,%rdi + addq (%rsp,%r9,8),%r13 + adcq $0,%rdi + movq %r13,-8(%rsp,%r15,8) + movq %rdi,(%rsp,%r15,8) + + cmpq %r9,%r14 + jb L$outer4x + movq 16(%rsp,%r9,8),%rdi + movq 0(%rsp),%rax + movq 8(%rsp),%rdx + shrq $2,%r9 + leaq (%rsp),%rsi + xorq %r14,%r14 + + subq 0(%rcx),%rax + movq 16(%rsi),%rbx + movq 24(%rsi),%rbp + sbbq 8(%rcx),%rdx + leaq -1(%r9),%r15 + jmp L$sub4x +.p2align 4 +L$sub4x: + movq %rax,0(%rdi,%r14,8) + movq %rdx,8(%rdi,%r14,8) + sbbq 16(%rcx,%r14,8),%rbx + movq 32(%rsi,%r14,8),%rax + movq 40(%rsi,%r14,8),%rdx + sbbq 24(%rcx,%r14,8),%rbp + movq %rbx,16(%rdi,%r14,8) + movq %rbp,24(%rdi,%r14,8) + sbbq 32(%rcx,%r14,8),%rax + movq 48(%rsi,%r14,8),%rbx + movq 56(%rsi,%r14,8),%rbp + sbbq 40(%rcx,%r14,8),%rdx + leaq 4(%r14),%r14 + decq %r15 + jnz L$sub4x + + movq %rax,0(%rdi,%r14,8) + movq 32(%rsi,%r14,8),%rax + sbbq 16(%rcx,%r14,8),%rbx + movq %rdx,8(%rdi,%r14,8) + sbbq 24(%rcx,%r14,8),%rbp + movq %rbx,16(%rdi,%r14,8) + + sbbq $0,%rax + movq %rax,%xmm0 + punpcklqdq %xmm0,%xmm0 + movq %rbp,24(%rdi,%r14,8) + xorq %r14,%r14 + + movq %r9,%r15 + pxor %xmm5,%xmm5 + jmp L$copy4x +.p2align 4 +L$copy4x: + movdqu (%rsp,%r14,1),%xmm2 + movdqu 16(%rsp,%r14,1),%xmm4 + movdqu (%rdi,%r14,1),%xmm1 + movdqu 16(%rdi,%r14,1),%xmm3 + pxor %xmm1,%xmm2 + pxor %xmm3,%xmm4 + pand %xmm0,%xmm2 + pand %xmm0,%xmm4 + pxor %xmm1,%xmm2 + pxor %xmm3,%xmm4 + movdqu %xmm2,(%rdi,%r14,1) + movdqu %xmm4,16(%rdi,%r14,1) + movdqa %xmm5,(%rsp,%r14,1) + movdqa %xmm5,16(%rsp,%r14,1) + + leaq 32(%r14),%r14 + decq %r15 + jnz L$copy4x + + shlq $2,%r9 + movq 8(%rsp,%r9,8),%rsi + movq $1,%rax + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +L$mul4x_epilogue: + .byte 0xf3,0xc3 + + + + +.p2align 5 +bn_sqr8x_mont: +L$sqr8x_enter: + movq %rsp,%rax + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + movl %r9d,%r10d + shll $3,%r9d + shlq $3+2,%r10 + negq %r9 + + + + + + + leaq -64(%rsp,%r9,4),%r11 + movq (%r8),%r8 + subq %rsi,%r11 + andq $4095,%r11 + cmpq %r11,%r10 + jb L$sqr8x_sp_alt + subq %r11,%rsp + leaq -64(%rsp,%r9,4),%rsp + jmp L$sqr8x_sp_done + +.p2align 5 +L$sqr8x_sp_alt: + leaq 4096-64(,%r9,4),%r10 + leaq -64(%rsp,%r9,4),%rsp + subq %r10,%r11 + movq $0,%r10 + cmovcq %r10,%r11 + subq %r11,%rsp +L$sqr8x_sp_done: + andq $-64,%rsp + movq %r9,%r10 + negq %r9 + + leaq 64(%rsp,%r9,2),%r11 + movq %r8,32(%rsp) + movq %rax,40(%rsp) +L$sqr8x_body: + + movq %r9,%rbp +.byte 102,73,15,110,211 + shrq $3+2,%rbp + movl _OPENSSL_ia32cap_P+8(%rip),%eax + jmp L$sqr8x_copy_n + +.p2align 5 +L$sqr8x_copy_n: + movq 0(%rcx),%xmm0 + movq 8(%rcx),%xmm1 + movq 16(%rcx),%xmm3 + movq 24(%rcx),%xmm4 + leaq 32(%rcx),%rcx + movdqa %xmm0,0(%r11) + movdqa %xmm1,16(%r11) + movdqa %xmm3,32(%r11) + movdqa %xmm4,48(%r11) + leaq 64(%r11),%r11 + decq %rbp + jnz L$sqr8x_copy_n + + pxor %xmm0,%xmm0 +.byte 102,72,15,110,207 +.byte 102,73,15,110,218 + call _bn_sqr8x_internal + + pxor %xmm0,%xmm0 + leaq 48(%rsp),%rax + leaq 64(%rsp,%r9,2),%rdx + shrq $3+2,%r9 + movq 40(%rsp),%rsi + jmp L$sqr8x_zero + +.p2align 5 +L$sqr8x_zero: + movdqa %xmm0,0(%rax) + movdqa %xmm0,16(%rax) + movdqa %xmm0,32(%rax) + movdqa %xmm0,48(%rax) + leaq 64(%rax),%rax + movdqa %xmm0,0(%rdx) + movdqa %xmm0,16(%rdx) + movdqa %xmm0,32(%rdx) + movdqa %xmm0,48(%rdx) + leaq 64(%rdx),%rdx + decq %r9 + jnz L$sqr8x_zero + + movq $1,%rax + movq -48(%rsi),%r15 + movq -40(%rsi),%r14 + movq -32(%rsi),%r13 + movq -24(%rsi),%r12 + movq -16(%rsi),%rbp + movq -8(%rsi),%rbx + leaq (%rsi),%rsp +L$sqr8x_epilogue: + .byte 0xf3,0xc3 + +.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.p2align 4 +#endif diff --git a/third_party/boringssl/mac-x86_64/crypto/bn/x86_64-mont5.S b/third_party/boringssl/mac-x86_64/crypto/bn/x86_64-mont5.S new file mode 100644 index 00000000000..2e8f469c14d --- /dev/null +++ b/third_party/boringssl/mac-x86_64/crypto/bn/x86_64-mont5.S @@ -0,0 +1,1822 @@ +#if defined(__x86_64__) +.text + + + +.globl _bn_mul_mont_gather5 +.private_extern _bn_mul_mont_gather5 + +.p2align 6 +_bn_mul_mont_gather5: + testl $7,%r9d + jnz L$mul_enter + jmp L$mul4x_enter + +.p2align 4 +L$mul_enter: + movl %r9d,%r9d + movq %rsp,%rax + movl 8(%rsp),%r10d + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + leaq 2(%r9),%r11 + negq %r11 + leaq (%rsp,%r11,8),%rsp + andq $-1024,%rsp + + movq %rax,8(%rsp,%r9,8) +L$mul_body: + movq %rdx,%r12 + movq %r10,%r11 + shrq $3,%r10 + andq $7,%r11 + notq %r10 + leaq L$magic_masks(%rip),%rax + andq $3,%r10 + leaq 96(%r12,%r11,8),%r12 + movq 0(%rax,%r10,8),%xmm4 + movq 8(%rax,%r10,8),%xmm5 + movq 16(%rax,%r10,8),%xmm6 + movq 24(%rax,%r10,8),%xmm7 + + movq -96(%r12),%xmm0 + movq -32(%r12),%xmm1 + pand %xmm4,%xmm0 + movq 32(%r12),%xmm2 + pand %xmm5,%xmm1 + movq 96(%r12),%xmm3 + pand %xmm6,%xmm2 + por %xmm1,%xmm0 + pand %xmm7,%xmm3 + por %xmm2,%xmm0 + leaq 256(%r12),%r12 + por %xmm3,%xmm0 + +.byte 102,72,15,126,195 + + movq (%r8),%r8 + movq (%rsi),%rax + + xorq %r14,%r14 + xorq %r15,%r15 + + movq -96(%r12),%xmm0 + movq -32(%r12),%xmm1 + pand %xmm4,%xmm0 + movq 32(%r12),%xmm2 + pand %xmm5,%xmm1 + + movq %r8,%rbp + mulq %rbx + movq %rax,%r10 + movq (%rcx),%rax + + movq 96(%r12),%xmm3 + pand %xmm6,%xmm2 + por %xmm1,%xmm0 + pand %xmm7,%xmm3 + + imulq %r10,%rbp + movq %rdx,%r11 + + por %xmm2,%xmm0 + leaq 256(%r12),%r12 + por %xmm3,%xmm0 + + mulq %rbp + addq %rax,%r10 + movq 8(%rsi),%rax + adcq $0,%rdx + movq %rdx,%r13 + + leaq 1(%r15),%r15 + jmp L$1st_enter + +.p2align 4 +L$1st: + addq %rax,%r13 + movq (%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r11,%r13 + movq %r10,%r11 + adcq $0,%rdx + movq %r13,-16(%rsp,%r15,8) + movq %rdx,%r13 + +L$1st_enter: + mulq %rbx + addq %rax,%r11 + movq (%rcx,%r15,8),%rax + adcq $0,%rdx + leaq 1(%r15),%r15 + movq %rdx,%r10 + + mulq %rbp + cmpq %r9,%r15 + jne L$1st + +.byte 102,72,15,126,195 + + addq %rax,%r13 + movq (%rsi),%rax + adcq $0,%rdx + addq %r11,%r13 + adcq $0,%rdx + movq %r13,-16(%rsp,%r15,8) + movq %rdx,%r13 + movq %r10,%r11 + + xorq %rdx,%rdx + addq %r11,%r13 + adcq $0,%rdx + movq %r13,-8(%rsp,%r9,8) + movq %rdx,(%rsp,%r9,8) + + leaq 1(%r14),%r14 + jmp L$outer +.p2align 4 +L$outer: + xorq %r15,%r15 + movq %r8,%rbp + movq (%rsp),%r10 + + movq -96(%r12),%xmm0 + movq -32(%r12),%xmm1 + pand %xmm4,%xmm0 + movq 32(%r12),%xmm2 + pand %xmm5,%xmm1 + + mulq %rbx + addq %rax,%r10 + movq (%rcx),%rax + adcq $0,%rdx + + movq 96(%r12),%xmm3 + pand %xmm6,%xmm2 + por %xmm1,%xmm0 + pand %xmm7,%xmm3 + + imulq %r10,%rbp + movq %rdx,%r11 + + por %xmm2,%xmm0 + leaq 256(%r12),%r12 + por %xmm3,%xmm0 + + mulq %rbp + addq %rax,%r10 + movq 8(%rsi),%rax + adcq $0,%rdx + movq 8(%rsp),%r10 + movq %rdx,%r13 + + leaq 1(%r15),%r15 + jmp L$inner_enter + +.p2align 4 +L$inner: + addq %rax,%r13 + movq (%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r10,%r13 + movq (%rsp,%r15,8),%r10 + adcq $0,%rdx + movq %r13,-16(%rsp,%r15,8) + movq %rdx,%r13 + +L$inner_enter: + mulq %rbx + addq %rax,%r11 + movq (%rcx,%r15,8),%rax + adcq $0,%rdx + addq %r11,%r10 + movq %rdx,%r11 + adcq $0,%r11 + leaq 1(%r15),%r15 + + mulq %rbp + cmpq %r9,%r15 + jne L$inner + +.byte 102,72,15,126,195 + + addq %rax,%r13 + movq (%rsi),%rax + adcq $0,%rdx + addq %r10,%r13 + movq (%rsp,%r15,8),%r10 + adcq $0,%rdx + movq %r13,-16(%rsp,%r15,8) + movq %rdx,%r13 + + xorq %rdx,%rdx + addq %r11,%r13 + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-8(%rsp,%r9,8) + movq %rdx,(%rsp,%r9,8) + + leaq 1(%r14),%r14 + cmpq %r9,%r14 + jb L$outer + + xorq %r14,%r14 + movq (%rsp),%rax + leaq (%rsp),%rsi + movq %r9,%r15 + jmp L$sub +.p2align 4 +L$sub: sbbq (%rcx,%r14,8),%rax + movq %rax,(%rdi,%r14,8) + movq 8(%rsi,%r14,8),%rax + leaq 1(%r14),%r14 + decq %r15 + jnz L$sub + + sbbq $0,%rax + xorq %r14,%r14 + movq %r9,%r15 +.p2align 4 +L$copy: + movq (%rsp,%r14,8),%rsi + movq (%rdi,%r14,8),%rcx + xorq %rcx,%rsi + andq %rax,%rsi + xorq %rcx,%rsi + movq %r14,(%rsp,%r14,8) + movq %rsi,(%rdi,%r14,8) + leaq 1(%r14),%r14 + subq $1,%r15 + jnz L$copy + + movq 8(%rsp,%r9,8),%rsi + movq $1,%rax + movq -48(%rsi),%r15 + movq -40(%rsi),%r14 + movq -32(%rsi),%r13 + movq -24(%rsi),%r12 + movq -16(%rsi),%rbp + movq -8(%rsi),%rbx + leaq (%rsi),%rsp +L$mul_epilogue: + .byte 0xf3,0xc3 + + +.p2align 5 +bn_mul4x_mont_gather5: +L$mul4x_enter: +.byte 0x67 + movq %rsp,%rax + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 +.byte 0x67 + movl %r9d,%r10d + shll $3,%r9d + shll $3+2,%r10d + negq %r9 + + + + + + + + + leaq -64(%rsp,%r9,2),%r11 + subq %rsi,%r11 + andq $4095,%r11 + cmpq %r11,%r10 + jb L$mul4xsp_alt + subq %r11,%rsp + leaq -64(%rsp,%r9,2),%rsp + jmp L$mul4xsp_done + +.p2align 5 +L$mul4xsp_alt: + leaq 4096-64(,%r9,2),%r10 + leaq -64(%rsp,%r9,2),%rsp + subq %r10,%r11 + movq $0,%r10 + cmovcq %r10,%r11 + subq %r11,%rsp +L$mul4xsp_done: + andq $-64,%rsp + negq %r9 + + movq %rax,40(%rsp) +L$mul4x_body: + + call mul4x_internal + + movq 40(%rsp),%rsi + movq $1,%rax + movq -48(%rsi),%r15 + movq -40(%rsi),%r14 + movq -32(%rsi),%r13 + movq -24(%rsi),%r12 + movq -16(%rsi),%rbp + movq -8(%rsi),%rbx + leaq (%rsi),%rsp +L$mul4x_epilogue: + .byte 0xf3,0xc3 + + + +.p2align 5 +mul4x_internal: + shlq $5,%r9 + movl 8(%rax),%r10d + leaq 256(%rdx,%r9,1),%r13 + shrq $5,%r9 + movq %r10,%r11 + shrq $3,%r10 + andq $7,%r11 + notq %r10 + leaq L$magic_masks(%rip),%rax + andq $3,%r10 + leaq 96(%rdx,%r11,8),%r12 + movq 0(%rax,%r10,8),%xmm4 + movq 8(%rax,%r10,8),%xmm5 + addq $7,%r11 + movq 16(%rax,%r10,8),%xmm6 + movq 24(%rax,%r10,8),%xmm7 + andq $7,%r11 + + movq -96(%r12),%xmm0 + leaq 256(%r12),%r14 + movq -32(%r12),%xmm1 + pand %xmm4,%xmm0 + movq 32(%r12),%xmm2 + pand %xmm5,%xmm1 + movq 96(%r12),%xmm3 + pand %xmm6,%xmm2 +.byte 0x67 + por %xmm1,%xmm0 + movq -96(%r14),%xmm1 +.byte 0x67 + pand %xmm7,%xmm3 +.byte 0x67 + por %xmm2,%xmm0 + movq -32(%r14),%xmm2 +.byte 0x67 + pand %xmm4,%xmm1 +.byte 0x67 + por %xmm3,%xmm0 + movq 32(%r14),%xmm3 + +.byte 102,72,15,126,195 + movq 96(%r14),%xmm0 + movq %r13,16+8(%rsp) + movq %rdi,56+8(%rsp) + + movq (%r8),%r8 + movq (%rsi),%rax + leaq (%rsi,%r9,1),%rsi + negq %r9 + + movq %r8,%rbp + mulq %rbx + movq %rax,%r10 + movq (%rcx),%rax + + pand %xmm5,%xmm2 + pand %xmm6,%xmm3 + por %xmm2,%xmm1 + + imulq %r10,%rbp + + + + + + + + leaq 64+8(%rsp,%r11,8),%r14 + movq %rdx,%r11 + + pand %xmm7,%xmm0 + por %xmm3,%xmm1 + leaq 512(%r12),%r12 + por %xmm1,%xmm0 + + mulq %rbp + addq %rax,%r10 + movq 8(%rsi,%r9,1),%rax + adcq $0,%rdx + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq 16(%rcx),%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq 16(%rsi,%r9,1),%rax + adcq $0,%rdx + addq %r11,%rdi + leaq 32(%r9),%r15 + leaq 64(%rcx),%rcx + adcq $0,%rdx + movq %rdi,(%r14) + movq %rdx,%r13 + jmp L$1st4x + +.p2align 5 +L$1st4x: + mulq %rbx + addq %rax,%r10 + movq -32(%rcx),%rax + leaq 32(%r14),%r14 + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq -8(%rsi,%r15,1),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-24(%r14) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq -16(%rcx),%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq (%rsi,%r15,1),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %rdi,-16(%r14) + movq %rdx,%r13 + + mulq %rbx + addq %rax,%r10 + movq 0(%rcx),%rax + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq 8(%rsi,%r15,1),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-8(%r14) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq 16(%rcx),%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq 16(%rsi,%r15,1),%rax + adcq $0,%rdx + addq %r11,%rdi + leaq 64(%rcx),%rcx + adcq $0,%rdx + movq %rdi,(%r14) + movq %rdx,%r13 + + addq $32,%r15 + jnz L$1st4x + + mulq %rbx + addq %rax,%r10 + movq -32(%rcx),%rax + leaq 32(%r14),%r14 + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq -8(%rsi),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-24(%r14) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq -16(%rcx),%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq (%rsi,%r9,1),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %rdi,-16(%r14) + movq %rdx,%r13 + +.byte 102,72,15,126,195 + leaq (%rcx,%r9,2),%rcx + + xorq %rdi,%rdi + addq %r10,%r13 + adcq $0,%rdi + movq %r13,-8(%r14) + + jmp L$outer4x + +.p2align 5 +L$outer4x: + movq (%r14,%r9,1),%r10 + movq %r8,%rbp + mulq %rbx + addq %rax,%r10 + movq (%rcx),%rax + adcq $0,%rdx + + movq -96(%r12),%xmm0 + movq -32(%r12),%xmm1 + pand %xmm4,%xmm0 + movq 32(%r12),%xmm2 + pand %xmm5,%xmm1 + movq 96(%r12),%xmm3 + + imulq %r10,%rbp +.byte 0x67 + movq %rdx,%r11 + movq %rdi,(%r14) + + pand %xmm6,%xmm2 + por %xmm1,%xmm0 + pand %xmm7,%xmm3 + por %xmm2,%xmm0 + leaq (%r14,%r9,1),%r14 + leaq 256(%r12),%r12 + por %xmm3,%xmm0 + + mulq %rbp + addq %rax,%r10 + movq 8(%rsi,%r9,1),%rax + adcq $0,%rdx + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq 16(%rcx),%rax + adcq $0,%rdx + addq 8(%r14),%r11 + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq 16(%rsi,%r9,1),%rax + adcq $0,%rdx + addq %r11,%rdi + leaq 32(%r9),%r15 + leaq 64(%rcx),%rcx + adcq $0,%rdx + movq %rdx,%r13 + jmp L$inner4x + +.p2align 5 +L$inner4x: + mulq %rbx + addq %rax,%r10 + movq -32(%rcx),%rax + adcq $0,%rdx + addq 16(%r14),%r10 + leaq 32(%r14),%r14 + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq -8(%rsi,%r15,1),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %rdi,-32(%r14) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq -16(%rcx),%rax + adcq $0,%rdx + addq -8(%r14),%r11 + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq (%rsi,%r15,1),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %r13,-24(%r14) + movq %rdx,%r13 + + mulq %rbx + addq %rax,%r10 + movq 0(%rcx),%rax + adcq $0,%rdx + addq (%r14),%r10 + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq 8(%rsi,%r15,1),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %rdi,-16(%r14) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq 16(%rcx),%rax + adcq $0,%rdx + addq 8(%r14),%r11 + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq 16(%rsi,%r15,1),%rax + adcq $0,%rdx + addq %r11,%rdi + leaq 64(%rcx),%rcx + adcq $0,%rdx + movq %r13,-8(%r14) + movq %rdx,%r13 + + addq $32,%r15 + jnz L$inner4x + + mulq %rbx + addq %rax,%r10 + movq -32(%rcx),%rax + adcq $0,%rdx + addq 16(%r14),%r10 + leaq 32(%r14),%r14 + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq -8(%rsi),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %rdi,-32(%r14) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq %rbp,%rax + movq -16(%rcx),%rbp + adcq $0,%rdx + addq -8(%r14),%r11 + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq (%rsi,%r9,1),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %r13,-24(%r14) + movq %rdx,%r13 + +.byte 102,72,15,126,195 + movq %rdi,-16(%r14) + leaq (%rcx,%r9,2),%rcx + + xorq %rdi,%rdi + addq %r10,%r13 + adcq $0,%rdi + addq (%r14),%r13 + adcq $0,%rdi + movq %r13,-8(%r14) + + cmpq 16+8(%rsp),%r12 + jb L$outer4x + subq %r13,%rbp + adcq %r15,%r15 + orq %r15,%rdi + xorq $1,%rdi + leaq (%r14,%r9,1),%rbx + leaq (%rcx,%rdi,8),%rbp + movq %r9,%rcx + sarq $3+2,%rcx + movq 56+8(%rsp),%rdi + jmp L$sqr4x_sub + +.globl _bn_power5 +.private_extern _bn_power5 + +.p2align 5 +_bn_power5: + movq %rsp,%rax + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + movl %r9d,%r10d + shll $3,%r9d + shll $3+2,%r10d + negq %r9 + movq (%r8),%r8 + + + + + + + + leaq -64(%rsp,%r9,2),%r11 + subq %rsi,%r11 + andq $4095,%r11 + cmpq %r11,%r10 + jb L$pwr_sp_alt + subq %r11,%rsp + leaq -64(%rsp,%r9,2),%rsp + jmp L$pwr_sp_done + +.p2align 5 +L$pwr_sp_alt: + leaq 4096-64(,%r9,2),%r10 + leaq -64(%rsp,%r9,2),%rsp + subq %r10,%r11 + movq $0,%r10 + cmovcq %r10,%r11 + subq %r11,%rsp +L$pwr_sp_done: + andq $-64,%rsp + movq %r9,%r10 + negq %r9 + + + + + + + + + + + movq %r8,32(%rsp) + movq %rax,40(%rsp) +L$power5_body: +.byte 102,72,15,110,207 +.byte 102,72,15,110,209 +.byte 102,73,15,110,218 +.byte 102,72,15,110,226 + + call __bn_sqr8x_internal + call __bn_sqr8x_internal + call __bn_sqr8x_internal + call __bn_sqr8x_internal + call __bn_sqr8x_internal + +.byte 102,72,15,126,209 +.byte 102,72,15,126,226 + movq %rsi,%rdi + movq 40(%rsp),%rax + leaq 32(%rsp),%r8 + + call mul4x_internal + + movq 40(%rsp),%rsi + movq $1,%rax + movq -48(%rsi),%r15 + movq -40(%rsi),%r14 + movq -32(%rsi),%r13 + movq -24(%rsi),%r12 + movq -16(%rsi),%rbp + movq -8(%rsi),%rbx + leaq (%rsi),%rsp +L$power5_epilogue: + .byte 0xf3,0xc3 + + +.globl _bn_sqr8x_internal +.private_extern _bn_sqr8x_internal +.private_extern _bn_sqr8x_internal + +.p2align 5 +_bn_sqr8x_internal: +__bn_sqr8x_internal: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + leaq 32(%r10),%rbp + leaq (%rsi,%r9,1),%rsi + + movq %r9,%rcx + + + movq -32(%rsi,%rbp,1),%r14 + leaq 48+8(%rsp,%r9,2),%rdi + movq -24(%rsi,%rbp,1),%rax + leaq -32(%rdi,%rbp,1),%rdi + movq -16(%rsi,%rbp,1),%rbx + movq %rax,%r15 + + mulq %r14 + movq %rax,%r10 + movq %rbx,%rax + movq %rdx,%r11 + movq %r10,-24(%rdi,%rbp,1) + + mulq %r14 + addq %rax,%r11 + movq %rbx,%rax + adcq $0,%rdx + movq %r11,-16(%rdi,%rbp,1) + movq %rdx,%r10 + + + movq -8(%rsi,%rbp,1),%rbx + mulq %r15 + movq %rax,%r12 + movq %rbx,%rax + movq %rdx,%r13 + + leaq (%rbp),%rcx + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + movq %rdx,%r11 + adcq $0,%r11 + addq %r12,%r10 + adcq $0,%r11 + movq %r10,-8(%rdi,%rcx,1) + jmp L$sqr4x_1st + +.p2align 5 +L$sqr4x_1st: + movq (%rsi,%rcx,1),%rbx + mulq %r15 + addq %rax,%r13 + movq %rbx,%rax + movq %rdx,%r12 + adcq $0,%r12 + + mulq %r14 + addq %rax,%r11 + movq %rbx,%rax + movq 8(%rsi,%rcx,1),%rbx + movq %rdx,%r10 + adcq $0,%r10 + addq %r13,%r11 + adcq $0,%r10 + + + mulq %r15 + addq %rax,%r12 + movq %rbx,%rax + movq %r11,(%rdi,%rcx,1) + movq %rdx,%r13 + adcq $0,%r13 + + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + movq 16(%rsi,%rcx,1),%rbx + movq %rdx,%r11 + adcq $0,%r11 + addq %r12,%r10 + adcq $0,%r11 + + mulq %r15 + addq %rax,%r13 + movq %rbx,%rax + movq %r10,8(%rdi,%rcx,1) + movq %rdx,%r12 + adcq $0,%r12 + + mulq %r14 + addq %rax,%r11 + movq %rbx,%rax + movq 24(%rsi,%rcx,1),%rbx + movq %rdx,%r10 + adcq $0,%r10 + addq %r13,%r11 + adcq $0,%r10 + + + mulq %r15 + addq %rax,%r12 + movq %rbx,%rax + movq %r11,16(%rdi,%rcx,1) + movq %rdx,%r13 + adcq $0,%r13 + leaq 32(%rcx),%rcx + + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + movq %rdx,%r11 + adcq $0,%r11 + addq %r12,%r10 + adcq $0,%r11 + movq %r10,-8(%rdi,%rcx,1) + + cmpq $0,%rcx + jne L$sqr4x_1st + + mulq %r15 + addq %rax,%r13 + leaq 16(%rbp),%rbp + adcq $0,%rdx + addq %r11,%r13 + adcq $0,%rdx + + movq %r13,(%rdi) + movq %rdx,%r12 + movq %rdx,8(%rdi) + jmp L$sqr4x_outer + +.p2align 5 +L$sqr4x_outer: + movq -32(%rsi,%rbp,1),%r14 + leaq 48+8(%rsp,%r9,2),%rdi + movq -24(%rsi,%rbp,1),%rax + leaq -32(%rdi,%rbp,1),%rdi + movq -16(%rsi,%rbp,1),%rbx + movq %rax,%r15 + + mulq %r14 + movq -24(%rdi,%rbp,1),%r10 + addq %rax,%r10 + movq %rbx,%rax + adcq $0,%rdx + movq %r10,-24(%rdi,%rbp,1) + movq %rdx,%r11 + + mulq %r14 + addq %rax,%r11 + movq %rbx,%rax + adcq $0,%rdx + addq -16(%rdi,%rbp,1),%r11 + movq %rdx,%r10 + adcq $0,%r10 + movq %r11,-16(%rdi,%rbp,1) + + xorq %r12,%r12 + + movq -8(%rsi,%rbp,1),%rbx + mulq %r15 + addq %rax,%r12 + movq %rbx,%rax + adcq $0,%rdx + addq -8(%rdi,%rbp,1),%r12 + movq %rdx,%r13 + adcq $0,%r13 + + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + adcq $0,%rdx + addq %r12,%r10 + movq %rdx,%r11 + adcq $0,%r11 + movq %r10,-8(%rdi,%rbp,1) + + leaq (%rbp),%rcx + jmp L$sqr4x_inner + +.p2align 5 +L$sqr4x_inner: + movq (%rsi,%rcx,1),%rbx + mulq %r15 + addq %rax,%r13 + movq %rbx,%rax + movq %rdx,%r12 + adcq $0,%r12 + addq (%rdi,%rcx,1),%r13 + adcq $0,%r12 + +.byte 0x67 + mulq %r14 + addq %rax,%r11 + movq %rbx,%rax + movq 8(%rsi,%rcx,1),%rbx + movq %rdx,%r10 + adcq $0,%r10 + addq %r13,%r11 + adcq $0,%r10 + + mulq %r15 + addq %rax,%r12 + movq %r11,(%rdi,%rcx,1) + movq %rbx,%rax + movq %rdx,%r13 + adcq $0,%r13 + addq 8(%rdi,%rcx,1),%r12 + leaq 16(%rcx),%rcx + adcq $0,%r13 + + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + adcq $0,%rdx + addq %r12,%r10 + movq %rdx,%r11 + adcq $0,%r11 + movq %r10,-8(%rdi,%rcx,1) + + cmpq $0,%rcx + jne L$sqr4x_inner + +.byte 0x67 + mulq %r15 + addq %rax,%r13 + adcq $0,%rdx + addq %r11,%r13 + adcq $0,%rdx + + movq %r13,(%rdi) + movq %rdx,%r12 + movq %rdx,8(%rdi) + + addq $16,%rbp + jnz L$sqr4x_outer + + + movq -32(%rsi),%r14 + leaq 48+8(%rsp,%r9,2),%rdi + movq -24(%rsi),%rax + leaq -32(%rdi,%rbp,1),%rdi + movq -16(%rsi),%rbx + movq %rax,%r15 + + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + movq %rdx,%r11 + adcq $0,%r11 + + mulq %r14 + addq %rax,%r11 + movq %rbx,%rax + movq %r10,-24(%rdi) + movq %rdx,%r10 + adcq $0,%r10 + addq %r13,%r11 + movq -8(%rsi),%rbx + adcq $0,%r10 + + mulq %r15 + addq %rax,%r12 + movq %rbx,%rax + movq %r11,-16(%rdi) + movq %rdx,%r13 + adcq $0,%r13 + + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + movq %rdx,%r11 + adcq $0,%r11 + addq %r12,%r10 + adcq $0,%r11 + movq %r10,-8(%rdi) + + mulq %r15 + addq %rax,%r13 + movq -16(%rsi),%rax + adcq $0,%rdx + addq %r11,%r13 + adcq $0,%rdx + + movq %r13,(%rdi) + movq %rdx,%r12 + movq %rdx,8(%rdi) + + mulq %rbx + addq $16,%rbp + xorq %r14,%r14 + subq %r9,%rbp + xorq %r15,%r15 + + addq %r12,%rax + adcq $0,%rdx + movq %rax,8(%rdi) + movq %rdx,16(%rdi) + movq %r15,24(%rdi) + + movq -16(%rsi,%rbp,1),%rax + leaq 48+8(%rsp),%rdi + xorq %r10,%r10 + movq 8(%rdi),%r11 + + leaq (%r14,%r10,2),%r12 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r13 + shrq $63,%r11 + orq %r10,%r13 + movq 16(%rdi),%r10 + movq %r11,%r14 + mulq %rax + negq %r15 + movq 24(%rdi),%r11 + adcq %rax,%r12 + movq -8(%rsi,%rbp,1),%rax + movq %r12,(%rdi) + adcq %rdx,%r13 + + leaq (%r14,%r10,2),%rbx + movq %r13,8(%rdi) + sbbq %r15,%r15 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r8 + shrq $63,%r11 + orq %r10,%r8 + movq 32(%rdi),%r10 + movq %r11,%r14 + mulq %rax + negq %r15 + movq 40(%rdi),%r11 + adcq %rax,%rbx + movq 0(%rsi,%rbp,1),%rax + movq %rbx,16(%rdi) + adcq %rdx,%r8 + leaq 16(%rbp),%rbp + movq %r8,24(%rdi) + sbbq %r15,%r15 + leaq 64(%rdi),%rdi + jmp L$sqr4x_shift_n_add + +.p2align 5 +L$sqr4x_shift_n_add: + leaq (%r14,%r10,2),%r12 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r13 + shrq $63,%r11 + orq %r10,%r13 + movq -16(%rdi),%r10 + movq %r11,%r14 + mulq %rax + negq %r15 + movq -8(%rdi),%r11 + adcq %rax,%r12 + movq -8(%rsi,%rbp,1),%rax + movq %r12,-32(%rdi) + adcq %rdx,%r13 + + leaq (%r14,%r10,2),%rbx + movq %r13,-24(%rdi) + sbbq %r15,%r15 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r8 + shrq $63,%r11 + orq %r10,%r8 + movq 0(%rdi),%r10 + movq %r11,%r14 + mulq %rax + negq %r15 + movq 8(%rdi),%r11 + adcq %rax,%rbx + movq 0(%rsi,%rbp,1),%rax + movq %rbx,-16(%rdi) + adcq %rdx,%r8 + + leaq (%r14,%r10,2),%r12 + movq %r8,-8(%rdi) + sbbq %r15,%r15 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r13 + shrq $63,%r11 + orq %r10,%r13 + movq 16(%rdi),%r10 + movq %r11,%r14 + mulq %rax + negq %r15 + movq 24(%rdi),%r11 + adcq %rax,%r12 + movq 8(%rsi,%rbp,1),%rax + movq %r12,0(%rdi) + adcq %rdx,%r13 + + leaq (%r14,%r10,2),%rbx + movq %r13,8(%rdi) + sbbq %r15,%r15 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r8 + shrq $63,%r11 + orq %r10,%r8 + movq 32(%rdi),%r10 + movq %r11,%r14 + mulq %rax + negq %r15 + movq 40(%rdi),%r11 + adcq %rax,%rbx + movq 16(%rsi,%rbp,1),%rax + movq %rbx,16(%rdi) + adcq %rdx,%r8 + movq %r8,24(%rdi) + sbbq %r15,%r15 + leaq 64(%rdi),%rdi + addq $32,%rbp + jnz L$sqr4x_shift_n_add + + leaq (%r14,%r10,2),%r12 +.byte 0x67 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r13 + shrq $63,%r11 + orq %r10,%r13 + movq -16(%rdi),%r10 + movq %r11,%r14 + mulq %rax + negq %r15 + movq -8(%rdi),%r11 + adcq %rax,%r12 + movq -8(%rsi),%rax + movq %r12,-32(%rdi) + adcq %rdx,%r13 + + leaq (%r14,%r10,2),%rbx + movq %r13,-24(%rdi) + sbbq %r15,%r15 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r8 + shrq $63,%r11 + orq %r10,%r8 + mulq %rax + negq %r15 + adcq %rax,%rbx + adcq %rdx,%r8 + movq %rbx,-16(%rdi) + movq %r8,-8(%rdi) +.byte 102,72,15,126,213 +sqr8x_reduction: + xorq %rax,%rax + leaq (%rbp,%r9,2),%rcx + leaq 48+8(%rsp,%r9,2),%rdx + movq %rcx,0+8(%rsp) + leaq 48+8(%rsp,%r9,1),%rdi + movq %rdx,8+8(%rsp) + negq %r9 + jmp L$8x_reduction_loop + +.p2align 5 +L$8x_reduction_loop: + leaq (%rdi,%r9,1),%rdi +.byte 0x66 + movq 0(%rdi),%rbx + movq 8(%rdi),%r9 + movq 16(%rdi),%r10 + movq 24(%rdi),%r11 + movq 32(%rdi),%r12 + movq 40(%rdi),%r13 + movq 48(%rdi),%r14 + movq 56(%rdi),%r15 + movq %rax,(%rdx) + leaq 64(%rdi),%rdi + +.byte 0x67 + movq %rbx,%r8 + imulq 32+8(%rsp),%rbx + movq 0(%rbp),%rax + movl $8,%ecx + jmp L$8x_reduce + +.p2align 5 +L$8x_reduce: + mulq %rbx + movq 16(%rbp),%rax + negq %r8 + movq %rdx,%r8 + adcq $0,%r8 + + mulq %rbx + addq %rax,%r9 + movq 32(%rbp),%rax + adcq $0,%rdx + addq %r9,%r8 + movq %rbx,48-8+8(%rsp,%rcx,8) + movq %rdx,%r9 + adcq $0,%r9 + + mulq %rbx + addq %rax,%r10 + movq 48(%rbp),%rax + adcq $0,%rdx + addq %r10,%r9 + movq 32+8(%rsp),%rsi + movq %rdx,%r10 + adcq $0,%r10 + + mulq %rbx + addq %rax,%r11 + movq 64(%rbp),%rax + adcq $0,%rdx + imulq %r8,%rsi + addq %r11,%r10 + movq %rdx,%r11 + adcq $0,%r11 + + mulq %rbx + addq %rax,%r12 + movq 80(%rbp),%rax + adcq $0,%rdx + addq %r12,%r11 + movq %rdx,%r12 + adcq $0,%r12 + + mulq %rbx + addq %rax,%r13 + movq 96(%rbp),%rax + adcq $0,%rdx + addq %r13,%r12 + movq %rdx,%r13 + adcq $0,%r13 + + mulq %rbx + addq %rax,%r14 + movq 112(%rbp),%rax + adcq $0,%rdx + addq %r14,%r13 + movq %rdx,%r14 + adcq $0,%r14 + + mulq %rbx + movq %rsi,%rbx + addq %rax,%r15 + movq 0(%rbp),%rax + adcq $0,%rdx + addq %r15,%r14 + movq %rdx,%r15 + adcq $0,%r15 + + decl %ecx + jnz L$8x_reduce + + leaq 128(%rbp),%rbp + xorq %rax,%rax + movq 8+8(%rsp),%rdx + cmpq 0+8(%rsp),%rbp + jae L$8x_no_tail + +.byte 0x66 + addq 0(%rdi),%r8 + adcq 8(%rdi),%r9 + adcq 16(%rdi),%r10 + adcq 24(%rdi),%r11 + adcq 32(%rdi),%r12 + adcq 40(%rdi),%r13 + adcq 48(%rdi),%r14 + adcq 56(%rdi),%r15 + sbbq %rsi,%rsi + + movq 48+56+8(%rsp),%rbx + movl $8,%ecx + movq 0(%rbp),%rax + jmp L$8x_tail + +.p2align 5 +L$8x_tail: + mulq %rbx + addq %rax,%r8 + movq 16(%rbp),%rax + movq %r8,(%rdi) + movq %rdx,%r8 + adcq $0,%r8 + + mulq %rbx + addq %rax,%r9 + movq 32(%rbp),%rax + adcq $0,%rdx + addq %r9,%r8 + leaq 8(%rdi),%rdi + movq %rdx,%r9 + adcq $0,%r9 + + mulq %rbx + addq %rax,%r10 + movq 48(%rbp),%rax + adcq $0,%rdx + addq %r10,%r9 + movq %rdx,%r10 + adcq $0,%r10 + + mulq %rbx + addq %rax,%r11 + movq 64(%rbp),%rax + adcq $0,%rdx + addq %r11,%r10 + movq %rdx,%r11 + adcq $0,%r11 + + mulq %rbx + addq %rax,%r12 + movq 80(%rbp),%rax + adcq $0,%rdx + addq %r12,%r11 + movq %rdx,%r12 + adcq $0,%r12 + + mulq %rbx + addq %rax,%r13 + movq 96(%rbp),%rax + adcq $0,%rdx + addq %r13,%r12 + movq %rdx,%r13 + adcq $0,%r13 + + mulq %rbx + addq %rax,%r14 + movq 112(%rbp),%rax + adcq $0,%rdx + addq %r14,%r13 + movq %rdx,%r14 + adcq $0,%r14 + + mulq %rbx + movq 48-16+8(%rsp,%rcx,8),%rbx + addq %rax,%r15 + adcq $0,%rdx + addq %r15,%r14 + movq 0(%rbp),%rax + movq %rdx,%r15 + adcq $0,%r15 + + decl %ecx + jnz L$8x_tail + + leaq 128(%rbp),%rbp + movq 8+8(%rsp),%rdx + cmpq 0+8(%rsp),%rbp + jae L$8x_tail_done + + movq 48+56+8(%rsp),%rbx + negq %rsi + movq 0(%rbp),%rax + adcq 0(%rdi),%r8 + adcq 8(%rdi),%r9 + adcq 16(%rdi),%r10 + adcq 24(%rdi),%r11 + adcq 32(%rdi),%r12 + adcq 40(%rdi),%r13 + adcq 48(%rdi),%r14 + adcq 56(%rdi),%r15 + sbbq %rsi,%rsi + + movl $8,%ecx + jmp L$8x_tail + +.p2align 5 +L$8x_tail_done: + addq (%rdx),%r8 + xorq %rax,%rax + + negq %rsi +L$8x_no_tail: + adcq 0(%rdi),%r8 + adcq 8(%rdi),%r9 + adcq 16(%rdi),%r10 + adcq 24(%rdi),%r11 + adcq 32(%rdi),%r12 + adcq 40(%rdi),%r13 + adcq 48(%rdi),%r14 + adcq 56(%rdi),%r15 + adcq $0,%rax + movq -16(%rbp),%rcx + xorq %rsi,%rsi + +.byte 102,72,15,126,213 + + movq %r8,0(%rdi) + movq %r9,8(%rdi) +.byte 102,73,15,126,217 + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + movq %r14,48(%rdi) + movq %r15,56(%rdi) + leaq 64(%rdi),%rdi + + cmpq %rdx,%rdi + jb L$8x_reduction_loop + + subq %r15,%rcx + leaq (%rdi,%r9,1),%rbx + adcq %rsi,%rsi + movq %r9,%rcx + orq %rsi,%rax +.byte 102,72,15,126,207 + xorq $1,%rax +.byte 102,72,15,126,206 + leaq (%rbp,%rax,8),%rbp + sarq $3+2,%rcx + jmp L$sqr4x_sub + +.p2align 5 +L$sqr4x_sub: +.byte 0x66 + movq 0(%rbx),%r12 + movq 8(%rbx),%r13 + sbbq 0(%rbp),%r12 + movq 16(%rbx),%r14 + sbbq 16(%rbp),%r13 + movq 24(%rbx),%r15 + leaq 32(%rbx),%rbx + sbbq 32(%rbp),%r14 + movq %r12,0(%rdi) + sbbq 48(%rbp),%r15 + leaq 64(%rbp),%rbp + movq %r13,8(%rdi) + movq %r14,16(%rdi) + movq %r15,24(%rdi) + leaq 32(%rdi),%rdi + + incq %rcx + jnz L$sqr4x_sub + movq %r9,%r10 + negq %r9 + .byte 0xf3,0xc3 + +.globl _bn_from_montgomery +.private_extern _bn_from_montgomery + +.p2align 5 +_bn_from_montgomery: + testl $7,%r9d + jz bn_from_mont8x + xorl %eax,%eax + .byte 0xf3,0xc3 + + + +.p2align 5 +bn_from_mont8x: +.byte 0x67 + movq %rsp,%rax + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 +.byte 0x67 + movl %r9d,%r10d + shll $3,%r9d + shll $3+2,%r10d + negq %r9 + movq (%r8),%r8 + + + + + + + + leaq -64(%rsp,%r9,2),%r11 + subq %rsi,%r11 + andq $4095,%r11 + cmpq %r11,%r10 + jb L$from_sp_alt + subq %r11,%rsp + leaq -64(%rsp,%r9,2),%rsp + jmp L$from_sp_done + +.p2align 5 +L$from_sp_alt: + leaq 4096-64(,%r9,2),%r10 + leaq -64(%rsp,%r9,2),%rsp + subq %r10,%r11 + movq $0,%r10 + cmovcq %r10,%r11 + subq %r11,%rsp +L$from_sp_done: + andq $-64,%rsp + movq %r9,%r10 + negq %r9 + + + + + + + + + + + movq %r8,32(%rsp) + movq %rax,40(%rsp) +L$from_body: + movq %r9,%r11 + leaq 48(%rsp),%rax + pxor %xmm0,%xmm0 + jmp L$mul_by_1 + +.p2align 5 +L$mul_by_1: + movdqu (%rsi),%xmm1 + movdqu 16(%rsi),%xmm2 + movdqu 32(%rsi),%xmm3 + movdqa %xmm0,(%rax,%r9,1) + movdqu 48(%rsi),%xmm4 + movdqa %xmm0,16(%rax,%r9,1) +.byte 0x48,0x8d,0xb6,0x40,0x00,0x00,0x00 + movdqa %xmm1,(%rax) + movdqa %xmm0,32(%rax,%r9,1) + movdqa %xmm2,16(%rax) + movdqa %xmm0,48(%rax,%r9,1) + movdqa %xmm3,32(%rax) + movdqa %xmm4,48(%rax) + leaq 64(%rax),%rax + subq $64,%r11 + jnz L$mul_by_1 + +.byte 102,72,15,110,207 +.byte 102,72,15,110,209 +.byte 0x67 + movq %rcx,%rbp +.byte 102,73,15,110,218 + call sqr8x_reduction + + pxor %xmm0,%xmm0 + leaq 48(%rsp),%rax + movq 40(%rsp),%rsi + jmp L$from_mont_zero + +.p2align 5 +L$from_mont_zero: + movdqa %xmm0,0(%rax) + movdqa %xmm0,16(%rax) + movdqa %xmm0,32(%rax) + movdqa %xmm0,48(%rax) + leaq 64(%rax),%rax + subq $32,%r9 + jnz L$from_mont_zero + + movq $1,%rax + movq -48(%rsi),%r15 + movq -40(%rsi),%r14 + movq -32(%rsi),%r13 + movq -24(%rsi),%r12 + movq -16(%rsi),%rbp + movq -8(%rsi),%rbx + leaq (%rsi),%rsp +L$from_epilogue: + .byte 0xf3,0xc3 + +.globl _bn_scatter5 +.private_extern _bn_scatter5 + +.p2align 4 +_bn_scatter5: + cmpl $0,%esi + jz L$scatter_epilogue + leaq (%rdx,%rcx,8),%rdx +L$scatter: + movq (%rdi),%rax + leaq 8(%rdi),%rdi + movq %rax,(%rdx) + leaq 256(%rdx),%rdx + subl $1,%esi + jnz L$scatter +L$scatter_epilogue: + .byte 0xf3,0xc3 + + +.globl _bn_gather5 +.private_extern _bn_gather5 + +.p2align 4 +_bn_gather5: + movl %ecx,%r11d + shrl $3,%ecx + andq $7,%r11 + notl %ecx + leaq L$magic_masks(%rip),%rax + andl $3,%ecx + leaq 128(%rdx,%r11,8),%rdx + movq 0(%rax,%rcx,8),%xmm4 + movq 8(%rax,%rcx,8),%xmm5 + movq 16(%rax,%rcx,8),%xmm6 + movq 24(%rax,%rcx,8),%xmm7 + jmp L$gather +.p2align 4 +L$gather: + movq -128(%rdx),%xmm0 + movq -64(%rdx),%xmm1 + pand %xmm4,%xmm0 + movq 0(%rdx),%xmm2 + pand %xmm5,%xmm1 + movq 64(%rdx),%xmm3 + pand %xmm6,%xmm2 + por %xmm1,%xmm0 + pand %xmm7,%xmm3 +.byte 0x67,0x67 + por %xmm2,%xmm0 + leaq 256(%rdx),%rdx + por %xmm3,%xmm0 + + movq %xmm0,(%rdi) + leaq 8(%rdi),%rdi + subl $1,%esi + jnz L$gather + .byte 0xf3,0xc3 +L$SEH_end_bn_gather5: + +.p2align 6 +L$magic_masks: +.long 0,0, 0,0, 0,0, -1,-1 +.long 0,0, 0,0, 0,0, 0,0 +.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115,99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +#endif diff --git a/third_party/boringssl/mac-x86_64/crypto/cpu-x86_64-asm.S b/third_party/boringssl/mac-x86_64/crypto/cpu-x86_64-asm.S new file mode 100644 index 00000000000..0dde04d8fcb --- /dev/null +++ b/third_party/boringssl/mac-x86_64/crypto/cpu-x86_64-asm.S @@ -0,0 +1,143 @@ +#if defined(__x86_64__) +.text + +.globl _OPENSSL_ia32_cpuid +.private_extern _OPENSSL_ia32_cpuid + +.p2align 4 +_OPENSSL_ia32_cpuid: + + + movq %rdi,%rdi + movq %rbx,%r8 + + xorl %eax,%eax + movl %eax,8(%rdi) + cpuid + movl %eax,%r11d + + xorl %eax,%eax + cmpl $1970169159,%ebx + setne %al + movl %eax,%r9d + cmpl $1231384169,%edx + setne %al + orl %eax,%r9d + cmpl $1818588270,%ecx + setne %al + orl %eax,%r9d + jz L$intel + + cmpl $1752462657,%ebx + setne %al + movl %eax,%r10d + cmpl $1769238117,%edx + setne %al + orl %eax,%r10d + cmpl $1145913699,%ecx + setne %al + orl %eax,%r10d + jnz L$intel + + + + + movl $2147483648,%eax + cpuid + + + cmpl $2147483649,%eax + jb L$intel + movl %eax,%r10d + movl $2147483649,%eax + cpuid + + + orl %ecx,%r9d + andl $2049,%r9d + + cmpl $2147483656,%r10d + jb L$intel + + movl $2147483656,%eax + cpuid + + movzbq %cl,%r10 + incq %r10 + + movl $1,%eax + cpuid + + btl $28,%edx + jnc L$generic + shrl $16,%ebx + cmpb %r10b,%bl + ja L$generic + andl $4026531839,%edx + jmp L$generic + +L$intel: + cmpl $4,%r11d + movl $-1,%r10d + jb L$nocacheinfo + + movl $4,%eax + movl $0,%ecx + cpuid + movl %eax,%r10d + shrl $14,%r10d + andl $4095,%r10d + + cmpl $7,%r11d + jb L$nocacheinfo + + movl $7,%eax + xorl %ecx,%ecx + cpuid + movl %ebx,8(%rdi) + +L$nocacheinfo: + movl $1,%eax + cpuid + + andl $3220176895,%edx + cmpl $0,%r9d + jne L$notintel + orl $1073741824,%edx +L$notintel: + btl $28,%edx + jnc L$generic + andl $4026531839,%edx + cmpl $0,%r10d + je L$generic + + orl $268435456,%edx + shrl $16,%ebx + cmpb $1,%bl + ja L$generic + andl $4026531839,%edx +L$generic: + andl $2048,%r9d + andl $4294965247,%ecx + orl %ecx,%r9d + + movl %edx,%r10d + btl $27,%r9d + jnc L$clear_avx + xorl %ecx,%ecx +.byte 0x0f,0x01,0xd0 + andl $6,%eax + cmpl $6,%eax + je L$done +L$clear_avx: + movl $4026525695,%eax + andl %eax,%r9d + andl $4294967263,8(%rdi) +L$done: + movl %r9d,4(%rdi) + movl %r10d,0(%rdi) + movq %r8,%rbx + .byte 0xf3,0xc3 + + +#endif diff --git a/third_party/boringssl/mac-x86_64/crypto/md5/md5-x86_64.S b/third_party/boringssl/mac-x86_64/crypto/md5/md5-x86_64.S new file mode 100644 index 00000000000..1e61479bc47 --- /dev/null +++ b/third_party/boringssl/mac-x86_64/crypto/md5/md5-x86_64.S @@ -0,0 +1,671 @@ +#if defined(__x86_64__) +.text +.p2align 4 + +.globl _md5_block_asm_data_order +.private_extern _md5_block_asm_data_order + +_md5_block_asm_data_order: + pushq %rbp + pushq %rbx + pushq %r12 + pushq %r14 + pushq %r15 +L$prologue: + + + + + movq %rdi,%rbp + shlq $6,%rdx + leaq (%rsi,%rdx,1),%rdi + movl 0(%rbp),%eax + movl 4(%rbp),%ebx + movl 8(%rbp),%ecx + movl 12(%rbp),%edx + + + + + + + + cmpq %rdi,%rsi + je L$end + + +L$loop: + movl %eax,%r8d + movl %ebx,%r9d + movl %ecx,%r14d + movl %edx,%r15d + movl 0(%rsi),%r10d + movl %edx,%r11d + xorl %ecx,%r11d + leal -680876936(%rax,%r10,1),%eax + andl %ebx,%r11d + xorl %edx,%r11d + movl 4(%rsi),%r10d + addl %r11d,%eax + roll $7,%eax + movl %ecx,%r11d + addl %ebx,%eax + xorl %ebx,%r11d + leal -389564586(%rdx,%r10,1),%edx + andl %eax,%r11d + xorl %ecx,%r11d + movl 8(%rsi),%r10d + addl %r11d,%edx + roll $12,%edx + movl %ebx,%r11d + addl %eax,%edx + xorl %eax,%r11d + leal 606105819(%rcx,%r10,1),%ecx + andl %edx,%r11d + xorl %ebx,%r11d + movl 12(%rsi),%r10d + addl %r11d,%ecx + roll $17,%ecx + movl %eax,%r11d + addl %edx,%ecx + xorl %edx,%r11d + leal -1044525330(%rbx,%r10,1),%ebx + andl %ecx,%r11d + xorl %eax,%r11d + movl 16(%rsi),%r10d + addl %r11d,%ebx + roll $22,%ebx + movl %edx,%r11d + addl %ecx,%ebx + xorl %ecx,%r11d + leal -176418897(%rax,%r10,1),%eax + andl %ebx,%r11d + xorl %edx,%r11d + movl 20(%rsi),%r10d + addl %r11d,%eax + roll $7,%eax + movl %ecx,%r11d + addl %ebx,%eax + xorl %ebx,%r11d + leal 1200080426(%rdx,%r10,1),%edx + andl %eax,%r11d + xorl %ecx,%r11d + movl 24(%rsi),%r10d + addl %r11d,%edx + roll $12,%edx + movl %ebx,%r11d + addl %eax,%edx + xorl %eax,%r11d + leal -1473231341(%rcx,%r10,1),%ecx + andl %edx,%r11d + xorl %ebx,%r11d + movl 28(%rsi),%r10d + addl %r11d,%ecx + roll $17,%ecx + movl %eax,%r11d + addl %edx,%ecx + xorl %edx,%r11d + leal -45705983(%rbx,%r10,1),%ebx + andl %ecx,%r11d + xorl %eax,%r11d + movl 32(%rsi),%r10d + addl %r11d,%ebx + roll $22,%ebx + movl %edx,%r11d + addl %ecx,%ebx + xorl %ecx,%r11d + leal 1770035416(%rax,%r10,1),%eax + andl %ebx,%r11d + xorl %edx,%r11d + movl 36(%rsi),%r10d + addl %r11d,%eax + roll $7,%eax + movl %ecx,%r11d + addl %ebx,%eax + xorl %ebx,%r11d + leal -1958414417(%rdx,%r10,1),%edx + andl %eax,%r11d + xorl %ecx,%r11d + movl 40(%rsi),%r10d + addl %r11d,%edx + roll $12,%edx + movl %ebx,%r11d + addl %eax,%edx + xorl %eax,%r11d + leal -42063(%rcx,%r10,1),%ecx + andl %edx,%r11d + xorl %ebx,%r11d + movl 44(%rsi),%r10d + addl %r11d,%ecx + roll $17,%ecx + movl %eax,%r11d + addl %edx,%ecx + xorl %edx,%r11d + leal -1990404162(%rbx,%r10,1),%ebx + andl %ecx,%r11d + xorl %eax,%r11d + movl 48(%rsi),%r10d + addl %r11d,%ebx + roll $22,%ebx + movl %edx,%r11d + addl %ecx,%ebx + xorl %ecx,%r11d + leal 1804603682(%rax,%r10,1),%eax + andl %ebx,%r11d + xorl %edx,%r11d + movl 52(%rsi),%r10d + addl %r11d,%eax + roll $7,%eax + movl %ecx,%r11d + addl %ebx,%eax + xorl %ebx,%r11d + leal -40341101(%rdx,%r10,1),%edx + andl %eax,%r11d + xorl %ecx,%r11d + movl 56(%rsi),%r10d + addl %r11d,%edx + roll $12,%edx + movl %ebx,%r11d + addl %eax,%edx + xorl %eax,%r11d + leal -1502002290(%rcx,%r10,1),%ecx + andl %edx,%r11d + xorl %ebx,%r11d + movl 60(%rsi),%r10d + addl %r11d,%ecx + roll $17,%ecx + movl %eax,%r11d + addl %edx,%ecx + xorl %edx,%r11d + leal 1236535329(%rbx,%r10,1),%ebx + andl %ecx,%r11d + xorl %eax,%r11d + movl 0(%rsi),%r10d + addl %r11d,%ebx + roll $22,%ebx + movl %edx,%r11d + addl %ecx,%ebx + movl 4(%rsi),%r10d + movl %edx,%r11d + movl %edx,%r12d + notl %r11d + leal -165796510(%rax,%r10,1),%eax + andl %ebx,%r12d + andl %ecx,%r11d + movl 24(%rsi),%r10d + orl %r11d,%r12d + movl %ecx,%r11d + addl %r12d,%eax + movl %ecx,%r12d + roll $5,%eax + addl %ebx,%eax + notl %r11d + leal -1069501632(%rdx,%r10,1),%edx + andl %eax,%r12d + andl %ebx,%r11d + movl 44(%rsi),%r10d + orl %r11d,%r12d + movl %ebx,%r11d + addl %r12d,%edx + movl %ebx,%r12d + roll $9,%edx + addl %eax,%edx + notl %r11d + leal 643717713(%rcx,%r10,1),%ecx + andl %edx,%r12d + andl %eax,%r11d + movl 0(%rsi),%r10d + orl %r11d,%r12d + movl %eax,%r11d + addl %r12d,%ecx + movl %eax,%r12d + roll $14,%ecx + addl %edx,%ecx + notl %r11d + leal -373897302(%rbx,%r10,1),%ebx + andl %ecx,%r12d + andl %edx,%r11d + movl 20(%rsi),%r10d + orl %r11d,%r12d + movl %edx,%r11d + addl %r12d,%ebx + movl %edx,%r12d + roll $20,%ebx + addl %ecx,%ebx + notl %r11d + leal -701558691(%rax,%r10,1),%eax + andl %ebx,%r12d + andl %ecx,%r11d + movl 40(%rsi),%r10d + orl %r11d,%r12d + movl %ecx,%r11d + addl %r12d,%eax + movl %ecx,%r12d + roll $5,%eax + addl %ebx,%eax + notl %r11d + leal 38016083(%rdx,%r10,1),%edx + andl %eax,%r12d + andl %ebx,%r11d + movl 60(%rsi),%r10d + orl %r11d,%r12d + movl %ebx,%r11d + addl %r12d,%edx + movl %ebx,%r12d + roll $9,%edx + addl %eax,%edx + notl %r11d + leal -660478335(%rcx,%r10,1),%ecx + andl %edx,%r12d + andl %eax,%r11d + movl 16(%rsi),%r10d + orl %r11d,%r12d + movl %eax,%r11d + addl %r12d,%ecx + movl %eax,%r12d + roll $14,%ecx + addl %edx,%ecx + notl %r11d + leal -405537848(%rbx,%r10,1),%ebx + andl %ecx,%r12d + andl %edx,%r11d + movl 36(%rsi),%r10d + orl %r11d,%r12d + movl %edx,%r11d + addl %r12d,%ebx + movl %edx,%r12d + roll $20,%ebx + addl %ecx,%ebx + notl %r11d + leal 568446438(%rax,%r10,1),%eax + andl %ebx,%r12d + andl %ecx,%r11d + movl 56(%rsi),%r10d + orl %r11d,%r12d + movl %ecx,%r11d + addl %r12d,%eax + movl %ecx,%r12d + roll $5,%eax + addl %ebx,%eax + notl %r11d + leal -1019803690(%rdx,%r10,1),%edx + andl %eax,%r12d + andl %ebx,%r11d + movl 12(%rsi),%r10d + orl %r11d,%r12d + movl %ebx,%r11d + addl %r12d,%edx + movl %ebx,%r12d + roll $9,%edx + addl %eax,%edx + notl %r11d + leal -187363961(%rcx,%r10,1),%ecx + andl %edx,%r12d + andl %eax,%r11d + movl 32(%rsi),%r10d + orl %r11d,%r12d + movl %eax,%r11d + addl %r12d,%ecx + movl %eax,%r12d + roll $14,%ecx + addl %edx,%ecx + notl %r11d + leal 1163531501(%rbx,%r10,1),%ebx + andl %ecx,%r12d + andl %edx,%r11d + movl 52(%rsi),%r10d + orl %r11d,%r12d + movl %edx,%r11d + addl %r12d,%ebx + movl %edx,%r12d + roll $20,%ebx + addl %ecx,%ebx + notl %r11d + leal -1444681467(%rax,%r10,1),%eax + andl %ebx,%r12d + andl %ecx,%r11d + movl 8(%rsi),%r10d + orl %r11d,%r12d + movl %ecx,%r11d + addl %r12d,%eax + movl %ecx,%r12d + roll $5,%eax + addl %ebx,%eax + notl %r11d + leal -51403784(%rdx,%r10,1),%edx + andl %eax,%r12d + andl %ebx,%r11d + movl 28(%rsi),%r10d + orl %r11d,%r12d + movl %ebx,%r11d + addl %r12d,%edx + movl %ebx,%r12d + roll $9,%edx + addl %eax,%edx + notl %r11d + leal 1735328473(%rcx,%r10,1),%ecx + andl %edx,%r12d + andl %eax,%r11d + movl 48(%rsi),%r10d + orl %r11d,%r12d + movl %eax,%r11d + addl %r12d,%ecx + movl %eax,%r12d + roll $14,%ecx + addl %edx,%ecx + notl %r11d + leal -1926607734(%rbx,%r10,1),%ebx + andl %ecx,%r12d + andl %edx,%r11d + movl 0(%rsi),%r10d + orl %r11d,%r12d + movl %edx,%r11d + addl %r12d,%ebx + movl %edx,%r12d + roll $20,%ebx + addl %ecx,%ebx + movl 20(%rsi),%r10d + movl %ecx,%r11d + leal -378558(%rax,%r10,1),%eax + movl 32(%rsi),%r10d + xorl %edx,%r11d + xorl %ebx,%r11d + addl %r11d,%eax + roll $4,%eax + movl %ebx,%r11d + addl %ebx,%eax + leal -2022574463(%rdx,%r10,1),%edx + movl 44(%rsi),%r10d + xorl %ecx,%r11d + xorl %eax,%r11d + addl %r11d,%edx + roll $11,%edx + movl %eax,%r11d + addl %eax,%edx + leal 1839030562(%rcx,%r10,1),%ecx + movl 56(%rsi),%r10d + xorl %ebx,%r11d + xorl %edx,%r11d + addl %r11d,%ecx + roll $16,%ecx + movl %edx,%r11d + addl %edx,%ecx + leal -35309556(%rbx,%r10,1),%ebx + movl 4(%rsi),%r10d + xorl %eax,%r11d + xorl %ecx,%r11d + addl %r11d,%ebx + roll $23,%ebx + movl %ecx,%r11d + addl %ecx,%ebx + leal -1530992060(%rax,%r10,1),%eax + movl 16(%rsi),%r10d + xorl %edx,%r11d + xorl %ebx,%r11d + addl %r11d,%eax + roll $4,%eax + movl %ebx,%r11d + addl %ebx,%eax + leal 1272893353(%rdx,%r10,1),%edx + movl 28(%rsi),%r10d + xorl %ecx,%r11d + xorl %eax,%r11d + addl %r11d,%edx + roll $11,%edx + movl %eax,%r11d + addl %eax,%edx + leal -155497632(%rcx,%r10,1),%ecx + movl 40(%rsi),%r10d + xorl %ebx,%r11d + xorl %edx,%r11d + addl %r11d,%ecx + roll $16,%ecx + movl %edx,%r11d + addl %edx,%ecx + leal -1094730640(%rbx,%r10,1),%ebx + movl 52(%rsi),%r10d + xorl %eax,%r11d + xorl %ecx,%r11d + addl %r11d,%ebx + roll $23,%ebx + movl %ecx,%r11d + addl %ecx,%ebx + leal 681279174(%rax,%r10,1),%eax + movl 0(%rsi),%r10d + xorl %edx,%r11d + xorl %ebx,%r11d + addl %r11d,%eax + roll $4,%eax + movl %ebx,%r11d + addl %ebx,%eax + leal -358537222(%rdx,%r10,1),%edx + movl 12(%rsi),%r10d + xorl %ecx,%r11d + xorl %eax,%r11d + addl %r11d,%edx + roll $11,%edx + movl %eax,%r11d + addl %eax,%edx + leal -722521979(%rcx,%r10,1),%ecx + movl 24(%rsi),%r10d + xorl %ebx,%r11d + xorl %edx,%r11d + addl %r11d,%ecx + roll $16,%ecx + movl %edx,%r11d + addl %edx,%ecx + leal 76029189(%rbx,%r10,1),%ebx + movl 36(%rsi),%r10d + xorl %eax,%r11d + xorl %ecx,%r11d + addl %r11d,%ebx + roll $23,%ebx + movl %ecx,%r11d + addl %ecx,%ebx + leal -640364487(%rax,%r10,1),%eax + movl 48(%rsi),%r10d + xorl %edx,%r11d + xorl %ebx,%r11d + addl %r11d,%eax + roll $4,%eax + movl %ebx,%r11d + addl %ebx,%eax + leal -421815835(%rdx,%r10,1),%edx + movl 60(%rsi),%r10d + xorl %ecx,%r11d + xorl %eax,%r11d + addl %r11d,%edx + roll $11,%edx + movl %eax,%r11d + addl %eax,%edx + leal 530742520(%rcx,%r10,1),%ecx + movl 8(%rsi),%r10d + xorl %ebx,%r11d + xorl %edx,%r11d + addl %r11d,%ecx + roll $16,%ecx + movl %edx,%r11d + addl %edx,%ecx + leal -995338651(%rbx,%r10,1),%ebx + movl 0(%rsi),%r10d + xorl %eax,%r11d + xorl %ecx,%r11d + addl %r11d,%ebx + roll $23,%ebx + movl %ecx,%r11d + addl %ecx,%ebx + movl 0(%rsi),%r10d + movl $4294967295,%r11d + xorl %edx,%r11d + leal -198630844(%rax,%r10,1),%eax + orl %ebx,%r11d + xorl %ecx,%r11d + addl %r11d,%eax + movl 28(%rsi),%r10d + movl $4294967295,%r11d + roll $6,%eax + xorl %ecx,%r11d + addl %ebx,%eax + leal 1126891415(%rdx,%r10,1),%edx + orl %eax,%r11d + xorl %ebx,%r11d + addl %r11d,%edx + movl 56(%rsi),%r10d + movl $4294967295,%r11d + roll $10,%edx + xorl %ebx,%r11d + addl %eax,%edx + leal -1416354905(%rcx,%r10,1),%ecx + orl %edx,%r11d + xorl %eax,%r11d + addl %r11d,%ecx + movl 20(%rsi),%r10d + movl $4294967295,%r11d + roll $15,%ecx + xorl %eax,%r11d + addl %edx,%ecx + leal -57434055(%rbx,%r10,1),%ebx + orl %ecx,%r11d + xorl %edx,%r11d + addl %r11d,%ebx + movl 48(%rsi),%r10d + movl $4294967295,%r11d + roll $21,%ebx + xorl %edx,%r11d + addl %ecx,%ebx + leal 1700485571(%rax,%r10,1),%eax + orl %ebx,%r11d + xorl %ecx,%r11d + addl %r11d,%eax + movl 12(%rsi),%r10d + movl $4294967295,%r11d + roll $6,%eax + xorl %ecx,%r11d + addl %ebx,%eax + leal -1894986606(%rdx,%r10,1),%edx + orl %eax,%r11d + xorl %ebx,%r11d + addl %r11d,%edx + movl 40(%rsi),%r10d + movl $4294967295,%r11d + roll $10,%edx + xorl %ebx,%r11d + addl %eax,%edx + leal -1051523(%rcx,%r10,1),%ecx + orl %edx,%r11d + xorl %eax,%r11d + addl %r11d,%ecx + movl 4(%rsi),%r10d + movl $4294967295,%r11d + roll $15,%ecx + xorl %eax,%r11d + addl %edx,%ecx + leal -2054922799(%rbx,%r10,1),%ebx + orl %ecx,%r11d + xorl %edx,%r11d + addl %r11d,%ebx + movl 32(%rsi),%r10d + movl $4294967295,%r11d + roll $21,%ebx + xorl %edx,%r11d + addl %ecx,%ebx + leal 1873313359(%rax,%r10,1),%eax + orl %ebx,%r11d + xorl %ecx,%r11d + addl %r11d,%eax + movl 60(%rsi),%r10d + movl $4294967295,%r11d + roll $6,%eax + xorl %ecx,%r11d + addl %ebx,%eax + leal -30611744(%rdx,%r10,1),%edx + orl %eax,%r11d + xorl %ebx,%r11d + addl %r11d,%edx + movl 24(%rsi),%r10d + movl $4294967295,%r11d + roll $10,%edx + xorl %ebx,%r11d + addl %eax,%edx + leal -1560198380(%rcx,%r10,1),%ecx + orl %edx,%r11d + xorl %eax,%r11d + addl %r11d,%ecx + movl 52(%rsi),%r10d + movl $4294967295,%r11d + roll $15,%ecx + xorl %eax,%r11d + addl %edx,%ecx + leal 1309151649(%rbx,%r10,1),%ebx + orl %ecx,%r11d + xorl %edx,%r11d + addl %r11d,%ebx + movl 16(%rsi),%r10d + movl $4294967295,%r11d + roll $21,%ebx + xorl %edx,%r11d + addl %ecx,%ebx + leal -145523070(%rax,%r10,1),%eax + orl %ebx,%r11d + xorl %ecx,%r11d + addl %r11d,%eax + movl 44(%rsi),%r10d + movl $4294967295,%r11d + roll $6,%eax + xorl %ecx,%r11d + addl %ebx,%eax + leal -1120210379(%rdx,%r10,1),%edx + orl %eax,%r11d + xorl %ebx,%r11d + addl %r11d,%edx + movl 8(%rsi),%r10d + movl $4294967295,%r11d + roll $10,%edx + xorl %ebx,%r11d + addl %eax,%edx + leal 718787259(%rcx,%r10,1),%ecx + orl %edx,%r11d + xorl %eax,%r11d + addl %r11d,%ecx + movl 36(%rsi),%r10d + movl $4294967295,%r11d + roll $15,%ecx + xorl %eax,%r11d + addl %edx,%ecx + leal -343485551(%rbx,%r10,1),%ebx + orl %ecx,%r11d + xorl %edx,%r11d + addl %r11d,%ebx + movl 0(%rsi),%r10d + movl $4294967295,%r11d + roll $21,%ebx + xorl %edx,%r11d + addl %ecx,%ebx + + addl %r8d,%eax + addl %r9d,%ebx + addl %r14d,%ecx + addl %r15d,%edx + + + addq $64,%rsi + cmpq %rdi,%rsi + jb L$loop + + +L$end: + movl %eax,0(%rbp) + movl %ebx,4(%rbp) + movl %ecx,8(%rbp) + movl %edx,12(%rbp) + + movq (%rsp),%r15 + movq 8(%rsp),%r14 + movq 16(%rsp),%r12 + movq 24(%rsp),%rbx + movq 32(%rsp),%rbp + addq $40,%rsp +L$epilogue: + .byte 0xf3,0xc3 + +#endif diff --git a/third_party/boringssl/mac-x86_64/crypto/modes/aesni-gcm-x86_64.S b/third_party/boringssl/mac-x86_64/crypto/modes/aesni-gcm-x86_64.S new file mode 100644 index 00000000000..21d5ad67e00 --- /dev/null +++ b/third_party/boringssl/mac-x86_64/crypto/modes/aesni-gcm-x86_64.S @@ -0,0 +1,19 @@ +#if defined(__x86_64__) +.text + +.globl _aesni_gcm_encrypt +.private_extern _aesni_gcm_encrypt + +_aesni_gcm_encrypt: + xorl %eax,%eax + .byte 0xf3,0xc3 + + +.globl _aesni_gcm_decrypt +.private_extern _aesni_gcm_decrypt + +_aesni_gcm_decrypt: + xorl %eax,%eax + .byte 0xf3,0xc3 + +#endif diff --git a/third_party/boringssl/mac-x86_64/crypto/modes/ghash-x86_64.S b/third_party/boringssl/mac-x86_64/crypto/modes/ghash-x86_64.S new file mode 100644 index 00000000000..305a91cb60c --- /dev/null +++ b/third_party/boringssl/mac-x86_64/crypto/modes/ghash-x86_64.S @@ -0,0 +1,1328 @@ +#if defined(__x86_64__) +.text + + +.globl _gcm_gmult_4bit +.private_extern _gcm_gmult_4bit + +.p2align 4 +_gcm_gmult_4bit: + pushq %rbx + pushq %rbp + pushq %r12 +L$gmult_prologue: + + movzbq 15(%rdi),%r8 + leaq L$rem_4bit(%rip),%r11 + xorq %rax,%rax + xorq %rbx,%rbx + movb %r8b,%al + movb %r8b,%bl + shlb $4,%al + movq $14,%rcx + movq 8(%rsi,%rax,1),%r8 + movq (%rsi,%rax,1),%r9 + andb $240,%bl + movq %r8,%rdx + jmp L$oop1 + +.p2align 4 +L$oop1: + shrq $4,%r8 + andq $15,%rdx + movq %r9,%r10 + movb (%rdi,%rcx,1),%al + shrq $4,%r9 + xorq 8(%rsi,%rbx,1),%r8 + shlq $60,%r10 + xorq (%rsi,%rbx,1),%r9 + movb %al,%bl + xorq (%r11,%rdx,8),%r9 + movq %r8,%rdx + shlb $4,%al + xorq %r10,%r8 + decq %rcx + js L$break1 + + shrq $4,%r8 + andq $15,%rdx + movq %r9,%r10 + shrq $4,%r9 + xorq 8(%rsi,%rax,1),%r8 + shlq $60,%r10 + xorq (%rsi,%rax,1),%r9 + andb $240,%bl + xorq (%r11,%rdx,8),%r9 + movq %r8,%rdx + xorq %r10,%r8 + jmp L$oop1 + +.p2align 4 +L$break1: + shrq $4,%r8 + andq $15,%rdx + movq %r9,%r10 + shrq $4,%r9 + xorq 8(%rsi,%rax,1),%r8 + shlq $60,%r10 + xorq (%rsi,%rax,1),%r9 + andb $240,%bl + xorq (%r11,%rdx,8),%r9 + movq %r8,%rdx + xorq %r10,%r8 + + shrq $4,%r8 + andq $15,%rdx + movq %r9,%r10 + shrq $4,%r9 + xorq 8(%rsi,%rbx,1),%r8 + shlq $60,%r10 + xorq (%rsi,%rbx,1),%r9 + xorq %r10,%r8 + xorq (%r11,%rdx,8),%r9 + + bswapq %r8 + bswapq %r9 + movq %r8,8(%rdi) + movq %r9,(%rdi) + + movq 16(%rsp),%rbx + leaq 24(%rsp),%rsp +L$gmult_epilogue: + .byte 0xf3,0xc3 + +.globl _gcm_ghash_4bit +.private_extern _gcm_ghash_4bit + +.p2align 4 +_gcm_ghash_4bit: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $280,%rsp +L$ghash_prologue: + movq %rdx,%r14 + movq %rcx,%r15 + subq $-128,%rsi + leaq 16+128(%rsp),%rbp + xorl %edx,%edx + movq 0+0-128(%rsi),%r8 + movq 0+8-128(%rsi),%rax + movb %al,%dl + shrq $4,%rax + movq %r8,%r10 + shrq $4,%r8 + movq 16+0-128(%rsi),%r9 + shlb $4,%dl + movq 16+8-128(%rsi),%rbx + shlq $60,%r10 + movb %dl,0(%rsp) + orq %r10,%rax + movb %bl,%dl + shrq $4,%rbx + movq %r9,%r10 + shrq $4,%r9 + movq %r8,0(%rbp) + movq 32+0-128(%rsi),%r8 + shlb $4,%dl + movq %rax,0-128(%rbp) + movq 32+8-128(%rsi),%rax + shlq $60,%r10 + movb %dl,1(%rsp) + orq %r10,%rbx + movb %al,%dl + shrq $4,%rax + movq %r8,%r10 + shrq $4,%r8 + movq %r9,8(%rbp) + movq 48+0-128(%rsi),%r9 + shlb $4,%dl + movq %rbx,8-128(%rbp) + movq 48+8-128(%rsi),%rbx + shlq $60,%r10 + movb %dl,2(%rsp) + orq %r10,%rax + movb %bl,%dl + shrq $4,%rbx + movq %r9,%r10 + shrq $4,%r9 + movq %r8,16(%rbp) + movq 64+0-128(%rsi),%r8 + shlb $4,%dl + movq %rax,16-128(%rbp) + movq 64+8-128(%rsi),%rax + shlq $60,%r10 + movb %dl,3(%rsp) + orq %r10,%rbx + movb %al,%dl + shrq $4,%rax + movq %r8,%r10 + shrq $4,%r8 + movq %r9,24(%rbp) + movq 80+0-128(%rsi),%r9 + shlb $4,%dl + movq %rbx,24-128(%rbp) + movq 80+8-128(%rsi),%rbx + shlq $60,%r10 + movb %dl,4(%rsp) + orq %r10,%rax + movb %bl,%dl + shrq $4,%rbx + movq %r9,%r10 + shrq $4,%r9 + movq %r8,32(%rbp) + movq 96+0-128(%rsi),%r8 + shlb $4,%dl + movq %rax,32-128(%rbp) + movq 96+8-128(%rsi),%rax + shlq $60,%r10 + movb %dl,5(%rsp) + orq %r10,%rbx + movb %al,%dl + shrq $4,%rax + movq %r8,%r10 + shrq $4,%r8 + movq %r9,40(%rbp) + movq 112+0-128(%rsi),%r9 + shlb $4,%dl + movq %rbx,40-128(%rbp) + movq 112+8-128(%rsi),%rbx + shlq $60,%r10 + movb %dl,6(%rsp) + orq %r10,%rax + movb %bl,%dl + shrq $4,%rbx + movq %r9,%r10 + shrq $4,%r9 + movq %r8,48(%rbp) + movq 128+0-128(%rsi),%r8 + shlb $4,%dl + movq %rax,48-128(%rbp) + movq 128+8-128(%rsi),%rax + shlq $60,%r10 + movb %dl,7(%rsp) + orq %r10,%rbx + movb %al,%dl + shrq $4,%rax + movq %r8,%r10 + shrq $4,%r8 + movq %r9,56(%rbp) + movq 144+0-128(%rsi),%r9 + shlb $4,%dl + movq %rbx,56-128(%rbp) + movq 144+8-128(%rsi),%rbx + shlq $60,%r10 + movb %dl,8(%rsp) + orq %r10,%rax + movb %bl,%dl + shrq $4,%rbx + movq %r9,%r10 + shrq $4,%r9 + movq %r8,64(%rbp) + movq 160+0-128(%rsi),%r8 + shlb $4,%dl + movq %rax,64-128(%rbp) + movq 160+8-128(%rsi),%rax + shlq $60,%r10 + movb %dl,9(%rsp) + orq %r10,%rbx + movb %al,%dl + shrq $4,%rax + movq %r8,%r10 + shrq $4,%r8 + movq %r9,72(%rbp) + movq 176+0-128(%rsi),%r9 + shlb $4,%dl + movq %rbx,72-128(%rbp) + movq 176+8-128(%rsi),%rbx + shlq $60,%r10 + movb %dl,10(%rsp) + orq %r10,%rax + movb %bl,%dl + shrq $4,%rbx + movq %r9,%r10 + shrq $4,%r9 + movq %r8,80(%rbp) + movq 192+0-128(%rsi),%r8 + shlb $4,%dl + movq %rax,80-128(%rbp) + movq 192+8-128(%rsi),%rax + shlq $60,%r10 + movb %dl,11(%rsp) + orq %r10,%rbx + movb %al,%dl + shrq $4,%rax + movq %r8,%r10 + shrq $4,%r8 + movq %r9,88(%rbp) + movq 208+0-128(%rsi),%r9 + shlb $4,%dl + movq %rbx,88-128(%rbp) + movq 208+8-128(%rsi),%rbx + shlq $60,%r10 + movb %dl,12(%rsp) + orq %r10,%rax + movb %bl,%dl + shrq $4,%rbx + movq %r9,%r10 + shrq $4,%r9 + movq %r8,96(%rbp) + movq 224+0-128(%rsi),%r8 + shlb $4,%dl + movq %rax,96-128(%rbp) + movq 224+8-128(%rsi),%rax + shlq $60,%r10 + movb %dl,13(%rsp) + orq %r10,%rbx + movb %al,%dl + shrq $4,%rax + movq %r8,%r10 + shrq $4,%r8 + movq %r9,104(%rbp) + movq 240+0-128(%rsi),%r9 + shlb $4,%dl + movq %rbx,104-128(%rbp) + movq 240+8-128(%rsi),%rbx + shlq $60,%r10 + movb %dl,14(%rsp) + orq %r10,%rax + movb %bl,%dl + shrq $4,%rbx + movq %r9,%r10 + shrq $4,%r9 + movq %r8,112(%rbp) + shlb $4,%dl + movq %rax,112-128(%rbp) + shlq $60,%r10 + movb %dl,15(%rsp) + orq %r10,%rbx + movq %r9,120(%rbp) + movq %rbx,120-128(%rbp) + addq $-128,%rsi + movq 8(%rdi),%r8 + movq 0(%rdi),%r9 + addq %r14,%r15 + leaq L$rem_8bit(%rip),%r11 + jmp L$outer_loop +.p2align 4 +L$outer_loop: + xorq (%r14),%r9 + movq 8(%r14),%rdx + leaq 16(%r14),%r14 + xorq %r8,%rdx + movq %r9,(%rdi) + movq %rdx,8(%rdi) + shrq $32,%rdx + xorq %rax,%rax + roll $8,%edx + movb %dl,%al + movzbl %dl,%ebx + shlb $4,%al + shrl $4,%ebx + roll $8,%edx + movq 8(%rsi,%rax,1),%r8 + movq (%rsi,%rax,1),%r9 + movb %dl,%al + movzbl %dl,%ecx + shlb $4,%al + movzbq (%rsp,%rbx,1),%r12 + shrl $4,%ecx + xorq %r8,%r12 + movq %r9,%r10 + shrq $8,%r8 + movzbq %r12b,%r12 + shrq $8,%r9 + xorq -128(%rbp,%rbx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rbx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r12,2),%r12 + movzbl %dl,%ebx + shlb $4,%al + movzbq (%rsp,%rcx,1),%r13 + shrl $4,%ebx + shlq $48,%r12 + xorq %r8,%r13 + movq %r9,%r10 + xorq %r12,%r9 + shrq $8,%r8 + movzbq %r13b,%r13 + shrq $8,%r9 + xorq -128(%rbp,%rcx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rcx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r13,2),%r13 + movzbl %dl,%ecx + shlb $4,%al + movzbq (%rsp,%rbx,1),%r12 + shrl $4,%ecx + shlq $48,%r13 + xorq %r8,%r12 + movq %r9,%r10 + xorq %r13,%r9 + shrq $8,%r8 + movzbq %r12b,%r12 + movl 8(%rdi),%edx + shrq $8,%r9 + xorq -128(%rbp,%rbx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rbx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r12,2),%r12 + movzbl %dl,%ebx + shlb $4,%al + movzbq (%rsp,%rcx,1),%r13 + shrl $4,%ebx + shlq $48,%r12 + xorq %r8,%r13 + movq %r9,%r10 + xorq %r12,%r9 + shrq $8,%r8 + movzbq %r13b,%r13 + shrq $8,%r9 + xorq -128(%rbp,%rcx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rcx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r13,2),%r13 + movzbl %dl,%ecx + shlb $4,%al + movzbq (%rsp,%rbx,1),%r12 + shrl $4,%ecx + shlq $48,%r13 + xorq %r8,%r12 + movq %r9,%r10 + xorq %r13,%r9 + shrq $8,%r8 + movzbq %r12b,%r12 + shrq $8,%r9 + xorq -128(%rbp,%rbx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rbx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r12,2),%r12 + movzbl %dl,%ebx + shlb $4,%al + movzbq (%rsp,%rcx,1),%r13 + shrl $4,%ebx + shlq $48,%r12 + xorq %r8,%r13 + movq %r9,%r10 + xorq %r12,%r9 + shrq $8,%r8 + movzbq %r13b,%r13 + shrq $8,%r9 + xorq -128(%rbp,%rcx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rcx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r13,2),%r13 + movzbl %dl,%ecx + shlb $4,%al + movzbq (%rsp,%rbx,1),%r12 + shrl $4,%ecx + shlq $48,%r13 + xorq %r8,%r12 + movq %r9,%r10 + xorq %r13,%r9 + shrq $8,%r8 + movzbq %r12b,%r12 + movl 4(%rdi),%edx + shrq $8,%r9 + xorq -128(%rbp,%rbx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rbx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r12,2),%r12 + movzbl %dl,%ebx + shlb $4,%al + movzbq (%rsp,%rcx,1),%r13 + shrl $4,%ebx + shlq $48,%r12 + xorq %r8,%r13 + movq %r9,%r10 + xorq %r12,%r9 + shrq $8,%r8 + movzbq %r13b,%r13 + shrq $8,%r9 + xorq -128(%rbp,%rcx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rcx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r13,2),%r13 + movzbl %dl,%ecx + shlb $4,%al + movzbq (%rsp,%rbx,1),%r12 + shrl $4,%ecx + shlq $48,%r13 + xorq %r8,%r12 + movq %r9,%r10 + xorq %r13,%r9 + shrq $8,%r8 + movzbq %r12b,%r12 + shrq $8,%r9 + xorq -128(%rbp,%rbx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rbx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r12,2),%r12 + movzbl %dl,%ebx + shlb $4,%al + movzbq (%rsp,%rcx,1),%r13 + shrl $4,%ebx + shlq $48,%r12 + xorq %r8,%r13 + movq %r9,%r10 + xorq %r12,%r9 + shrq $8,%r8 + movzbq %r13b,%r13 + shrq $8,%r9 + xorq -128(%rbp,%rcx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rcx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r13,2),%r13 + movzbl %dl,%ecx + shlb $4,%al + movzbq (%rsp,%rbx,1),%r12 + shrl $4,%ecx + shlq $48,%r13 + xorq %r8,%r12 + movq %r9,%r10 + xorq %r13,%r9 + shrq $8,%r8 + movzbq %r12b,%r12 + movl 0(%rdi),%edx + shrq $8,%r9 + xorq -128(%rbp,%rbx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rbx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r12,2),%r12 + movzbl %dl,%ebx + shlb $4,%al + movzbq (%rsp,%rcx,1),%r13 + shrl $4,%ebx + shlq $48,%r12 + xorq %r8,%r13 + movq %r9,%r10 + xorq %r12,%r9 + shrq $8,%r8 + movzbq %r13b,%r13 + shrq $8,%r9 + xorq -128(%rbp,%rcx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rcx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r13,2),%r13 + movzbl %dl,%ecx + shlb $4,%al + movzbq (%rsp,%rbx,1),%r12 + shrl $4,%ecx + shlq $48,%r13 + xorq %r8,%r12 + movq %r9,%r10 + xorq %r13,%r9 + shrq $8,%r8 + movzbq %r12b,%r12 + shrq $8,%r9 + xorq -128(%rbp,%rbx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rbx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r12,2),%r12 + movzbl %dl,%ebx + shlb $4,%al + movzbq (%rsp,%rcx,1),%r13 + shrl $4,%ebx + shlq $48,%r12 + xorq %r8,%r13 + movq %r9,%r10 + xorq %r12,%r9 + shrq $8,%r8 + movzbq %r13b,%r13 + shrq $8,%r9 + xorq -128(%rbp,%rcx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rcx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r13,2),%r13 + movzbl %dl,%ecx + shlb $4,%al + movzbq (%rsp,%rbx,1),%r12 + andl $240,%ecx + shlq $48,%r13 + xorq %r8,%r12 + movq %r9,%r10 + xorq %r13,%r9 + shrq $8,%r8 + movzbq %r12b,%r12 + movl -4(%rdi),%edx + shrq $8,%r9 + xorq -128(%rbp,%rbx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rbx,8),%r9 + movzwq (%r11,%r12,2),%r12 + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + shlq $48,%r12 + xorq %r10,%r8 + xorq %r12,%r9 + movzbq %r8b,%r13 + shrq $4,%r8 + movq %r9,%r10 + shlb $4,%r13b + shrq $4,%r9 + xorq 8(%rsi,%rcx,1),%r8 + movzwq (%r11,%r13,2),%r13 + shlq $60,%r10 + xorq (%rsi,%rcx,1),%r9 + xorq %r10,%r8 + shlq $48,%r13 + bswapq %r8 + xorq %r13,%r9 + bswapq %r9 + cmpq %r15,%r14 + jb L$outer_loop + movq %r8,8(%rdi) + movq %r9,(%rdi) + + leaq 280(%rsp),%rsi + movq 0(%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +L$ghash_epilogue: + .byte 0xf3,0xc3 + +.globl _gcm_init_clmul +.private_extern _gcm_init_clmul + +.p2align 4 +_gcm_init_clmul: +L$_init_clmul: + movdqu (%rsi),%xmm2 + pshufd $78,%xmm2,%xmm2 + + + pshufd $255,%xmm2,%xmm4 + movdqa %xmm2,%xmm3 + psllq $1,%xmm2 + pxor %xmm5,%xmm5 + psrlq $63,%xmm3 + pcmpgtd %xmm4,%xmm5 + pslldq $8,%xmm3 + por %xmm3,%xmm2 + + + pand L$0x1c2_polynomial(%rip),%xmm5 + pxor %xmm5,%xmm2 + + + pshufd $78,%xmm2,%xmm6 + movdqa %xmm2,%xmm0 + pxor %xmm2,%xmm6 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pxor %xmm0,%xmm3 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,222,0 + pxor %xmm0,%xmm3 + pxor %xmm1,%xmm3 + + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + + + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 + pshufd $78,%xmm2,%xmm3 + pshufd $78,%xmm0,%xmm4 + pxor %xmm2,%xmm3 + movdqu %xmm2,0(%rdi) + pxor %xmm0,%xmm4 + movdqu %xmm0,16(%rdi) +.byte 102,15,58,15,227,8 + movdqu %xmm4,32(%rdi) + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pxor %xmm0,%xmm3 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,222,0 + pxor %xmm0,%xmm3 + pxor %xmm1,%xmm3 + + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + + + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 + movdqa %xmm0,%xmm5 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pxor %xmm0,%xmm3 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,222,0 + pxor %xmm0,%xmm3 + pxor %xmm1,%xmm3 + + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + + + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 + pshufd $78,%xmm5,%xmm3 + pshufd $78,%xmm0,%xmm4 + pxor %xmm5,%xmm3 + movdqu %xmm5,48(%rdi) + pxor %xmm0,%xmm4 + movdqu %xmm0,64(%rdi) +.byte 102,15,58,15,227,8 + movdqu %xmm4,80(%rdi) + .byte 0xf3,0xc3 + +.globl _gcm_gmult_clmul +.private_extern _gcm_gmult_clmul + +.p2align 4 +_gcm_gmult_clmul: +L$_gmult_clmul: + movdqu (%rdi),%xmm0 + movdqa L$bswap_mask(%rip),%xmm5 + movdqu (%rsi),%xmm2 + movdqu 32(%rsi),%xmm4 +.byte 102,15,56,0,197 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pxor %xmm0,%xmm3 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,220,0 + pxor %xmm0,%xmm3 + pxor %xmm1,%xmm3 + + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + + + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 +.byte 102,15,56,0,197 + movdqu %xmm0,(%rdi) + .byte 0xf3,0xc3 + +.globl _gcm_ghash_clmul +.private_extern _gcm_ghash_clmul + +.p2align 5 +_gcm_ghash_clmul: +L$_ghash_clmul: + movdqa L$bswap_mask(%rip),%xmm10 + + movdqu (%rdi),%xmm0 + movdqu (%rsi),%xmm2 + movdqu 32(%rsi),%xmm7 +.byte 102,65,15,56,0,194 + + subq $16,%rcx + jz L$odd_tail + + movdqu 16(%rsi),%xmm6 + movl _OPENSSL_ia32cap_P+4(%rip),%eax + cmpq $48,%rcx + jb L$skip4x + + andl $71303168,%eax + cmpl $4194304,%eax + je L$skip4x + + subq $48,%rcx + movq $11547335547999543296,%rax + movdqu 48(%rsi),%xmm14 + movdqu 64(%rsi),%xmm15 + + + + + movdqu 48(%rdx),%xmm3 + movdqu 32(%rdx),%xmm11 +.byte 102,65,15,56,0,218 +.byte 102,69,15,56,0,218 + movdqa %xmm3,%xmm5 + pshufd $78,%xmm3,%xmm4 + pxor %xmm3,%xmm4 +.byte 102,15,58,68,218,0 +.byte 102,15,58,68,234,17 +.byte 102,15,58,68,231,0 + + movdqa %xmm11,%xmm13 + pshufd $78,%xmm11,%xmm12 + pxor %xmm11,%xmm12 +.byte 102,68,15,58,68,222,0 +.byte 102,68,15,58,68,238,17 +.byte 102,68,15,58,68,231,16 + xorps %xmm11,%xmm3 + xorps %xmm13,%xmm5 + movups 80(%rsi),%xmm7 + xorps %xmm12,%xmm4 + + movdqu 16(%rdx),%xmm11 + movdqu 0(%rdx),%xmm8 +.byte 102,69,15,56,0,218 +.byte 102,69,15,56,0,194 + movdqa %xmm11,%xmm13 + pshufd $78,%xmm11,%xmm12 + pxor %xmm8,%xmm0 + pxor %xmm11,%xmm12 +.byte 102,69,15,58,68,222,0 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm8 + pxor %xmm0,%xmm8 +.byte 102,69,15,58,68,238,17 +.byte 102,68,15,58,68,231,0 + xorps %xmm11,%xmm3 + xorps %xmm13,%xmm5 + + leaq 64(%rdx),%rdx + subq $64,%rcx + jc L$tail4x + + jmp L$mod4_loop +.p2align 5 +L$mod4_loop: +.byte 102,65,15,58,68,199,0 + xorps %xmm12,%xmm4 + movdqu 48(%rdx),%xmm11 +.byte 102,69,15,56,0,218 +.byte 102,65,15,58,68,207,17 + xorps %xmm3,%xmm0 + movdqu 32(%rdx),%xmm3 + movdqa %xmm11,%xmm13 +.byte 102,68,15,58,68,199,16 + pshufd $78,%xmm11,%xmm12 + xorps %xmm5,%xmm1 + pxor %xmm11,%xmm12 +.byte 102,65,15,56,0,218 + movups 32(%rsi),%xmm7 + xorps %xmm4,%xmm8 +.byte 102,68,15,58,68,218,0 + pshufd $78,%xmm3,%xmm4 + + pxor %xmm0,%xmm8 + movdqa %xmm3,%xmm5 + pxor %xmm1,%xmm8 + pxor %xmm3,%xmm4 + movdqa %xmm8,%xmm9 +.byte 102,68,15,58,68,234,17 + pslldq $8,%xmm8 + psrldq $8,%xmm9 + pxor %xmm8,%xmm0 + movdqa L$7_mask(%rip),%xmm8 + pxor %xmm9,%xmm1 +.byte 102,76,15,110,200 + + pand %xmm0,%xmm8 +.byte 102,69,15,56,0,200 + pxor %xmm0,%xmm9 +.byte 102,68,15,58,68,231,0 + psllq $57,%xmm9 + movdqa %xmm9,%xmm8 + pslldq $8,%xmm9 +.byte 102,15,58,68,222,0 + psrldq $8,%xmm8 + pxor %xmm9,%xmm0 + pxor %xmm8,%xmm1 + movdqu 0(%rdx),%xmm8 + + movdqa %xmm0,%xmm9 + psrlq $1,%xmm0 +.byte 102,15,58,68,238,17 + xorps %xmm11,%xmm3 + movdqu 16(%rdx),%xmm11 +.byte 102,69,15,56,0,218 +.byte 102,15,58,68,231,16 + xorps %xmm13,%xmm5 + movups 80(%rsi),%xmm7 +.byte 102,69,15,56,0,194 + pxor %xmm9,%xmm1 + pxor %xmm0,%xmm9 + psrlq $5,%xmm0 + + movdqa %xmm11,%xmm13 + pxor %xmm12,%xmm4 + pshufd $78,%xmm11,%xmm12 + pxor %xmm9,%xmm0 + pxor %xmm8,%xmm1 + pxor %xmm11,%xmm12 +.byte 102,69,15,58,68,222,0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 + movdqa %xmm0,%xmm1 +.byte 102,69,15,58,68,238,17 + xorps %xmm11,%xmm3 + pshufd $78,%xmm0,%xmm8 + pxor %xmm0,%xmm8 + +.byte 102,68,15,58,68,231,0 + xorps %xmm13,%xmm5 + + leaq 64(%rdx),%rdx + subq $64,%rcx + jnc L$mod4_loop + +L$tail4x: +.byte 102,65,15,58,68,199,0 +.byte 102,65,15,58,68,207,17 +.byte 102,68,15,58,68,199,16 + xorps %xmm12,%xmm4 + xorps %xmm3,%xmm0 + xorps %xmm5,%xmm1 + pxor %xmm0,%xmm1 + pxor %xmm4,%xmm8 + + pxor %xmm1,%xmm8 + pxor %xmm0,%xmm1 + + movdqa %xmm8,%xmm9 + psrldq $8,%xmm8 + pslldq $8,%xmm9 + pxor %xmm8,%xmm1 + pxor %xmm9,%xmm0 + + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + + + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 + addq $64,%rcx + jz L$done + movdqu 32(%rsi),%xmm7 + subq $16,%rcx + jz L$odd_tail +L$skip4x: + + + + + + movdqu (%rdx),%xmm8 + movdqu 16(%rdx),%xmm3 +.byte 102,69,15,56,0,194 +.byte 102,65,15,56,0,218 + pxor %xmm8,%xmm0 + + movdqa %xmm3,%xmm5 + pshufd $78,%xmm3,%xmm4 + pxor %xmm3,%xmm4 +.byte 102,15,58,68,218,0 +.byte 102,15,58,68,234,17 +.byte 102,15,58,68,231,0 + + leaq 32(%rdx),%rdx + nop + subq $32,%rcx + jbe L$even_tail + nop + jmp L$mod_loop + +.p2align 5 +L$mod_loop: + movdqa %xmm0,%xmm1 + movdqa %xmm4,%xmm8 + pshufd $78,%xmm0,%xmm4 + pxor %xmm0,%xmm4 + +.byte 102,15,58,68,198,0 +.byte 102,15,58,68,206,17 +.byte 102,15,58,68,231,16 + + pxor %xmm3,%xmm0 + pxor %xmm5,%xmm1 + movdqu (%rdx),%xmm9 + pxor %xmm0,%xmm8 +.byte 102,69,15,56,0,202 + movdqu 16(%rdx),%xmm3 + + pxor %xmm1,%xmm8 + pxor %xmm9,%xmm1 + pxor %xmm8,%xmm4 +.byte 102,65,15,56,0,218 + movdqa %xmm4,%xmm8 + psrldq $8,%xmm8 + pslldq $8,%xmm4 + pxor %xmm8,%xmm1 + pxor %xmm4,%xmm0 + + movdqa %xmm3,%xmm5 + + movdqa %xmm0,%xmm9 + movdqa %xmm0,%xmm8 + psllq $5,%xmm0 + pxor %xmm0,%xmm8 +.byte 102,15,58,68,218,0 + psllq $1,%xmm0 + pxor %xmm8,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm8 + pslldq $8,%xmm0 + psrldq $8,%xmm8 + pxor %xmm9,%xmm0 + pshufd $78,%xmm5,%xmm4 + pxor %xmm8,%xmm1 + pxor %xmm5,%xmm4 + + movdqa %xmm0,%xmm9 + psrlq $1,%xmm0 +.byte 102,15,58,68,234,17 + pxor %xmm9,%xmm1 + pxor %xmm0,%xmm9 + psrlq $5,%xmm0 + pxor %xmm9,%xmm0 + leaq 32(%rdx),%rdx + psrlq $1,%xmm0 +.byte 102,15,58,68,231,0 + pxor %xmm1,%xmm0 + + subq $32,%rcx + ja L$mod_loop + +L$even_tail: + movdqa %xmm0,%xmm1 + movdqa %xmm4,%xmm8 + pshufd $78,%xmm0,%xmm4 + pxor %xmm0,%xmm4 + +.byte 102,15,58,68,198,0 +.byte 102,15,58,68,206,17 +.byte 102,15,58,68,231,16 + + pxor %xmm3,%xmm0 + pxor %xmm5,%xmm1 + pxor %xmm0,%xmm8 + pxor %xmm1,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm8 + psrldq $8,%xmm8 + pslldq $8,%xmm4 + pxor %xmm8,%xmm1 + pxor %xmm4,%xmm0 + + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + + + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 + testq %rcx,%rcx + jnz L$done + +L$odd_tail: + movdqu (%rdx),%xmm8 +.byte 102,69,15,56,0,194 + pxor %xmm8,%xmm0 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pxor %xmm0,%xmm3 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,223,0 + pxor %xmm0,%xmm3 + pxor %xmm1,%xmm3 + + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + + + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 +L$done: +.byte 102,65,15,56,0,194 + movdqu %xmm0,(%rdi) + .byte 0xf3,0xc3 + +.globl _gcm_init_avx +.private_extern _gcm_init_avx + +.p2align 5 +_gcm_init_avx: + jmp L$_init_clmul + +.globl _gcm_gmult_avx +.private_extern _gcm_gmult_avx + +.p2align 5 +_gcm_gmult_avx: + jmp L$_gmult_clmul + +.globl _gcm_ghash_avx +.private_extern _gcm_ghash_avx + +.p2align 5 +_gcm_ghash_avx: + jmp L$_ghash_clmul + +.p2align 6 +L$bswap_mask: +.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +L$0x1c2_polynomial: +.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2 +L$7_mask: +.long 7,0,7,0 +L$7_mask_poly: +.long 7,0,450,0 +.p2align 6 + +L$rem_4bit: +.long 0,0,0,471859200,0,943718400,0,610271232 +.long 0,1887436800,0,1822425088,0,1220542464,0,1423966208 +.long 0,3774873600,0,4246732800,0,3644850176,0,3311403008 +.long 0,2441084928,0,2376073216,0,2847932416,0,3051356160 + +L$rem_8bit: +.value 0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E +.value 0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E +.value 0x1C20,0x1DE2,0x1FA4,0x1E66,0x1B28,0x1AEA,0x18AC,0x196E +.value 0x1230,0x13F2,0x11B4,0x1076,0x1538,0x14FA,0x16BC,0x177E +.value 0x3840,0x3982,0x3BC4,0x3A06,0x3F48,0x3E8A,0x3CCC,0x3D0E +.value 0x3650,0x3792,0x35D4,0x3416,0x3158,0x309A,0x32DC,0x331E +.value 0x2460,0x25A2,0x27E4,0x2626,0x2368,0x22AA,0x20EC,0x212E +.value 0x2A70,0x2BB2,0x29F4,0x2836,0x2D78,0x2CBA,0x2EFC,0x2F3E +.value 0x7080,0x7142,0x7304,0x72C6,0x7788,0x764A,0x740C,0x75CE +.value 0x7E90,0x7F52,0x7D14,0x7CD6,0x7998,0x785A,0x7A1C,0x7BDE +.value 0x6CA0,0x6D62,0x6F24,0x6EE6,0x6BA8,0x6A6A,0x682C,0x69EE +.value 0x62B0,0x6372,0x6134,0x60F6,0x65B8,0x647A,0x663C,0x67FE +.value 0x48C0,0x4902,0x4B44,0x4A86,0x4FC8,0x4E0A,0x4C4C,0x4D8E +.value 0x46D0,0x4712,0x4554,0x4496,0x41D8,0x401A,0x425C,0x439E +.value 0x54E0,0x5522,0x5764,0x56A6,0x53E8,0x522A,0x506C,0x51AE +.value 0x5AF0,0x5B32,0x5974,0x58B6,0x5DF8,0x5C3A,0x5E7C,0x5FBE +.value 0xE100,0xE0C2,0xE284,0xE346,0xE608,0xE7CA,0xE58C,0xE44E +.value 0xEF10,0xEED2,0xEC94,0xED56,0xE818,0xE9DA,0xEB9C,0xEA5E +.value 0xFD20,0xFCE2,0xFEA4,0xFF66,0xFA28,0xFBEA,0xF9AC,0xF86E +.value 0xF330,0xF2F2,0xF0B4,0xF176,0xF438,0xF5FA,0xF7BC,0xF67E +.value 0xD940,0xD882,0xDAC4,0xDB06,0xDE48,0xDF8A,0xDDCC,0xDC0E +.value 0xD750,0xD692,0xD4D4,0xD516,0xD058,0xD19A,0xD3DC,0xD21E +.value 0xC560,0xC4A2,0xC6E4,0xC726,0xC268,0xC3AA,0xC1EC,0xC02E +.value 0xCB70,0xCAB2,0xC8F4,0xC936,0xCC78,0xCDBA,0xCFFC,0xCE3E +.value 0x9180,0x9042,0x9204,0x93C6,0x9688,0x974A,0x950C,0x94CE +.value 0x9F90,0x9E52,0x9C14,0x9DD6,0x9898,0x995A,0x9B1C,0x9ADE +.value 0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE +.value 0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE +.value 0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E +.value 0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E +.value 0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE +.value 0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE + +.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.p2align 6 +#endif diff --git a/third_party/boringssl/mac-x86_64/crypto/rand/rdrand-x86_64.S b/third_party/boringssl/mac-x86_64/crypto/rand/rdrand-x86_64.S new file mode 100644 index 00000000000..f0df296e1a3 --- /dev/null +++ b/third_party/boringssl/mac-x86_64/crypto/rand/rdrand-x86_64.S @@ -0,0 +1,48 @@ +#if defined(__x86_64__) +.text + + + + +.globl _CRYPTO_rdrand +.private_extern _CRYPTO_rdrand + +.p2align 4 +_CRYPTO_rdrand: + xorq %rax,%rax + + +.byte 0x48, 0x0f, 0xc7, 0xf1 + + adcq %rax,%rax + movq %rcx,0(%rdi) + .byte 0xf3,0xc3 + + + + + +.globl _CRYPTO_rdrand_multiple8_buf +.private_extern _CRYPTO_rdrand_multiple8_buf + +.p2align 4 +_CRYPTO_rdrand_multiple8_buf: + testq %rsi,%rsi + jz L$out + movq $8,%rdx +L$loop: + + +.byte 0x48, 0x0f, 0xc7, 0xf1 + jnc L$err + movq %rcx,0(%rdi) + addq %rdx,%rdi + subq %rdx,%rsi + jnz L$loop +L$out: + movq $1,%rax + .byte 0xf3,0xc3 +L$err: + xorq %rax,%rax + .byte 0xf3,0xc3 +#endif diff --git a/third_party/boringssl/mac-x86_64/crypto/rc4/rc4-md5-x86_64.S b/third_party/boringssl/mac-x86_64/crypto/rc4/rc4-md5-x86_64.S new file mode 100644 index 00000000000..31ee7d26dff --- /dev/null +++ b/third_party/boringssl/mac-x86_64/crypto/rc4/rc4-md5-x86_64.S @@ -0,0 +1,1262 @@ +#if defined(__x86_64__) +.text +.p2align 4 + +.globl _rc4_md5_enc +.private_extern _rc4_md5_enc + +_rc4_md5_enc: + cmpq $0,%r9 + je L$abort + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $40,%rsp +L$body: + movq %rcx,%r11 + movq %r9,%r12 + movq %rsi,%r13 + movq %rdx,%r14 + movq %r8,%r15 + xorq %rbp,%rbp + xorq %rcx,%rcx + + leaq 8(%rdi),%rdi + movb -8(%rdi),%bpl + movb -4(%rdi),%cl + + incb %bpl + subq %r13,%r14 + movl (%rdi,%rbp,4),%eax + addb %al,%cl + leaq (%rdi,%rbp,4),%rsi + shlq $6,%r12 + addq %r15,%r12 + movq %r12,16(%rsp) + + movq %r11,24(%rsp) + movl 0(%r11),%r8d + movl 4(%r11),%r9d + movl 8(%r11),%r10d + movl 12(%r11),%r11d + jmp L$oop + +.p2align 4 +L$oop: + movl %r8d,0(%rsp) + movl %r9d,4(%rsp) + movl %r10d,8(%rsp) + movl %r11d,%r12d + movl %r11d,12(%rsp) + pxor %xmm0,%xmm0 + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r9d,%r12d + addl 0(%r15),%r8d + addb %dl,%al + movl 4(%rsi),%ebx + addl $3614090360,%r8d + xorl %r11d,%r12d + movzbl %al,%eax + movl %edx,0(%rsi) + addl %r12d,%r8d + addb %bl,%cl + roll $7,%r8d + movl %r10d,%r12d + movd (%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + pxor %xmm1,%xmm1 + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r8d,%r12d + addl 4(%r15),%r11d + addb %dl,%bl + movl 8(%rsi),%eax + addl $3905402710,%r11d + xorl %r10d,%r12d + movzbl %bl,%ebx + movl %edx,4(%rsi) + addl %r12d,%r11d + addb %al,%cl + roll $12,%r11d + movl %r9d,%r12d + movd (%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r11d,%r12d + addl 8(%r15),%r10d + addb %dl,%al + movl 12(%rsi),%ebx + addl $606105819,%r10d + xorl %r9d,%r12d + movzbl %al,%eax + movl %edx,8(%rsi) + addl %r12d,%r10d + addb %bl,%cl + roll $17,%r10d + movl %r8d,%r12d + pinsrw $1,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r10d,%r12d + addl 12(%r15),%r9d + addb %dl,%bl + movl 16(%rsi),%eax + addl $3250441966,%r9d + xorl %r8d,%r12d + movzbl %bl,%ebx + movl %edx,12(%rsi) + addl %r12d,%r9d + addb %al,%cl + roll $22,%r9d + movl %r11d,%r12d + pinsrw $1,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r9d,%r12d + addl 16(%r15),%r8d + addb %dl,%al + movl 20(%rsi),%ebx + addl $4118548399,%r8d + xorl %r11d,%r12d + movzbl %al,%eax + movl %edx,16(%rsi) + addl %r12d,%r8d + addb %bl,%cl + roll $7,%r8d + movl %r10d,%r12d + pinsrw $2,(%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r8d,%r12d + addl 20(%r15),%r11d + addb %dl,%bl + movl 24(%rsi),%eax + addl $1200080426,%r11d + xorl %r10d,%r12d + movzbl %bl,%ebx + movl %edx,20(%rsi) + addl %r12d,%r11d + addb %al,%cl + roll $12,%r11d + movl %r9d,%r12d + pinsrw $2,(%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r11d,%r12d + addl 24(%r15),%r10d + addb %dl,%al + movl 28(%rsi),%ebx + addl $2821735955,%r10d + xorl %r9d,%r12d + movzbl %al,%eax + movl %edx,24(%rsi) + addl %r12d,%r10d + addb %bl,%cl + roll $17,%r10d + movl %r8d,%r12d + pinsrw $3,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r10d,%r12d + addl 28(%r15),%r9d + addb %dl,%bl + movl 32(%rsi),%eax + addl $4249261313,%r9d + xorl %r8d,%r12d + movzbl %bl,%ebx + movl %edx,28(%rsi) + addl %r12d,%r9d + addb %al,%cl + roll $22,%r9d + movl %r11d,%r12d + pinsrw $3,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r9d,%r12d + addl 32(%r15),%r8d + addb %dl,%al + movl 36(%rsi),%ebx + addl $1770035416,%r8d + xorl %r11d,%r12d + movzbl %al,%eax + movl %edx,32(%rsi) + addl %r12d,%r8d + addb %bl,%cl + roll $7,%r8d + movl %r10d,%r12d + pinsrw $4,(%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r8d,%r12d + addl 36(%r15),%r11d + addb %dl,%bl + movl 40(%rsi),%eax + addl $2336552879,%r11d + xorl %r10d,%r12d + movzbl %bl,%ebx + movl %edx,36(%rsi) + addl %r12d,%r11d + addb %al,%cl + roll $12,%r11d + movl %r9d,%r12d + pinsrw $4,(%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r11d,%r12d + addl 40(%r15),%r10d + addb %dl,%al + movl 44(%rsi),%ebx + addl $4294925233,%r10d + xorl %r9d,%r12d + movzbl %al,%eax + movl %edx,40(%rsi) + addl %r12d,%r10d + addb %bl,%cl + roll $17,%r10d + movl %r8d,%r12d + pinsrw $5,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r10d,%r12d + addl 44(%r15),%r9d + addb %dl,%bl + movl 48(%rsi),%eax + addl $2304563134,%r9d + xorl %r8d,%r12d + movzbl %bl,%ebx + movl %edx,44(%rsi) + addl %r12d,%r9d + addb %al,%cl + roll $22,%r9d + movl %r11d,%r12d + pinsrw $5,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r9d,%r12d + addl 48(%r15),%r8d + addb %dl,%al + movl 52(%rsi),%ebx + addl $1804603682,%r8d + xorl %r11d,%r12d + movzbl %al,%eax + movl %edx,48(%rsi) + addl %r12d,%r8d + addb %bl,%cl + roll $7,%r8d + movl %r10d,%r12d + pinsrw $6,(%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r8d,%r12d + addl 52(%r15),%r11d + addb %dl,%bl + movl 56(%rsi),%eax + addl $4254626195,%r11d + xorl %r10d,%r12d + movzbl %bl,%ebx + movl %edx,52(%rsi) + addl %r12d,%r11d + addb %al,%cl + roll $12,%r11d + movl %r9d,%r12d + pinsrw $6,(%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r11d,%r12d + addl 56(%r15),%r10d + addb %dl,%al + movl 60(%rsi),%ebx + addl $2792965006,%r10d + xorl %r9d,%r12d + movzbl %al,%eax + movl %edx,56(%rsi) + addl %r12d,%r10d + addb %bl,%cl + roll $17,%r10d + movl %r8d,%r12d + pinsrw $7,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movdqu (%r13),%xmm2 + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r10d,%r12d + addl 60(%r15),%r9d + addb %dl,%bl + movl 64(%rsi),%eax + addl $1236535329,%r9d + xorl %r8d,%r12d + movzbl %bl,%ebx + movl %edx,60(%rsi) + addl %r12d,%r9d + addb %al,%cl + roll $22,%r9d + movl %r10d,%r12d + pinsrw $7,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + psllq $8,%xmm1 + pxor %xmm0,%xmm2 + pxor %xmm1,%xmm2 + pxor %xmm0,%xmm0 + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r11d,%r12d + addl 4(%r15),%r8d + addb %dl,%al + movl 68(%rsi),%ebx + addl $4129170786,%r8d + xorl %r10d,%r12d + movzbl %al,%eax + movl %edx,64(%rsi) + addl %r12d,%r8d + addb %bl,%cl + roll $5,%r8d + movl %r9d,%r12d + movd (%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + pxor %xmm1,%xmm1 + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r10d,%r12d + addl 24(%r15),%r11d + addb %dl,%bl + movl 72(%rsi),%eax + addl $3225465664,%r11d + xorl %r9d,%r12d + movzbl %bl,%ebx + movl %edx,68(%rsi) + addl %r12d,%r11d + addb %al,%cl + roll $9,%r11d + movl %r8d,%r12d + movd (%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r9d,%r12d + addl 44(%r15),%r10d + addb %dl,%al + movl 76(%rsi),%ebx + addl $643717713,%r10d + xorl %r8d,%r12d + movzbl %al,%eax + movl %edx,72(%rsi) + addl %r12d,%r10d + addb %bl,%cl + roll $14,%r10d + movl %r11d,%r12d + pinsrw $1,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r8d,%r12d + addl 0(%r15),%r9d + addb %dl,%bl + movl 80(%rsi),%eax + addl $3921069994,%r9d + xorl %r11d,%r12d + movzbl %bl,%ebx + movl %edx,76(%rsi) + addl %r12d,%r9d + addb %al,%cl + roll $20,%r9d + movl %r10d,%r12d + pinsrw $1,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r11d,%r12d + addl 20(%r15),%r8d + addb %dl,%al + movl 84(%rsi),%ebx + addl $3593408605,%r8d + xorl %r10d,%r12d + movzbl %al,%eax + movl %edx,80(%rsi) + addl %r12d,%r8d + addb %bl,%cl + roll $5,%r8d + movl %r9d,%r12d + pinsrw $2,(%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r10d,%r12d + addl 40(%r15),%r11d + addb %dl,%bl + movl 88(%rsi),%eax + addl $38016083,%r11d + xorl %r9d,%r12d + movzbl %bl,%ebx + movl %edx,84(%rsi) + addl %r12d,%r11d + addb %al,%cl + roll $9,%r11d + movl %r8d,%r12d + pinsrw $2,(%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r9d,%r12d + addl 60(%r15),%r10d + addb %dl,%al + movl 92(%rsi),%ebx + addl $3634488961,%r10d + xorl %r8d,%r12d + movzbl %al,%eax + movl %edx,88(%rsi) + addl %r12d,%r10d + addb %bl,%cl + roll $14,%r10d + movl %r11d,%r12d + pinsrw $3,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r8d,%r12d + addl 16(%r15),%r9d + addb %dl,%bl + movl 96(%rsi),%eax + addl $3889429448,%r9d + xorl %r11d,%r12d + movzbl %bl,%ebx + movl %edx,92(%rsi) + addl %r12d,%r9d + addb %al,%cl + roll $20,%r9d + movl %r10d,%r12d + pinsrw $3,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r11d,%r12d + addl 36(%r15),%r8d + addb %dl,%al + movl 100(%rsi),%ebx + addl $568446438,%r8d + xorl %r10d,%r12d + movzbl %al,%eax + movl %edx,96(%rsi) + addl %r12d,%r8d + addb %bl,%cl + roll $5,%r8d + movl %r9d,%r12d + pinsrw $4,(%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r10d,%r12d + addl 56(%r15),%r11d + addb %dl,%bl + movl 104(%rsi),%eax + addl $3275163606,%r11d + xorl %r9d,%r12d + movzbl %bl,%ebx + movl %edx,100(%rsi) + addl %r12d,%r11d + addb %al,%cl + roll $9,%r11d + movl %r8d,%r12d + pinsrw $4,(%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r9d,%r12d + addl 12(%r15),%r10d + addb %dl,%al + movl 108(%rsi),%ebx + addl $4107603335,%r10d + xorl %r8d,%r12d + movzbl %al,%eax + movl %edx,104(%rsi) + addl %r12d,%r10d + addb %bl,%cl + roll $14,%r10d + movl %r11d,%r12d + pinsrw $5,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r8d,%r12d + addl 32(%r15),%r9d + addb %dl,%bl + movl 112(%rsi),%eax + addl $1163531501,%r9d + xorl %r11d,%r12d + movzbl %bl,%ebx + movl %edx,108(%rsi) + addl %r12d,%r9d + addb %al,%cl + roll $20,%r9d + movl %r10d,%r12d + pinsrw $5,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r11d,%r12d + addl 52(%r15),%r8d + addb %dl,%al + movl 116(%rsi),%ebx + addl $2850285829,%r8d + xorl %r10d,%r12d + movzbl %al,%eax + movl %edx,112(%rsi) + addl %r12d,%r8d + addb %bl,%cl + roll $5,%r8d + movl %r9d,%r12d + pinsrw $6,(%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r10d,%r12d + addl 8(%r15),%r11d + addb %dl,%bl + movl 120(%rsi),%eax + addl $4243563512,%r11d + xorl %r9d,%r12d + movzbl %bl,%ebx + movl %edx,116(%rsi) + addl %r12d,%r11d + addb %al,%cl + roll $9,%r11d + movl %r8d,%r12d + pinsrw $6,(%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r9d,%r12d + addl 28(%r15),%r10d + addb %dl,%al + movl 124(%rsi),%ebx + addl $1735328473,%r10d + xorl %r8d,%r12d + movzbl %al,%eax + movl %edx,120(%rsi) + addl %r12d,%r10d + addb %bl,%cl + roll $14,%r10d + movl %r11d,%r12d + pinsrw $7,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movdqu 16(%r13),%xmm3 + addb $32,%bpl + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r8d,%r12d + addl 48(%r15),%r9d + addb %dl,%bl + movl 0(%rdi,%rbp,4),%eax + addl $2368359562,%r9d + xorl %r11d,%r12d + movzbl %bl,%ebx + movl %edx,124(%rsi) + addl %r12d,%r9d + addb %al,%cl + roll $20,%r9d + movl %r11d,%r12d + pinsrw $7,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movq %rcx,%rsi + xorq %rcx,%rcx + movb %sil,%cl + leaq (%rdi,%rbp,4),%rsi + psllq $8,%xmm1 + pxor %xmm0,%xmm3 + pxor %xmm1,%xmm3 + pxor %xmm0,%xmm0 + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %eax,(%rdi,%rcx,4) + xorl %r9d,%r12d + addl 20(%r15),%r8d + addb %dl,%al + movl 4(%rsi),%ebx + addl $4294588738,%r8d + movzbl %al,%eax + addl %r12d,%r8d + movl %edx,0(%rsi) + addb %bl,%cl + roll $4,%r8d + movl %r10d,%r12d + movd (%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + pxor %xmm1,%xmm1 + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %ebx,(%rdi,%rcx,4) + xorl %r8d,%r12d + addl 32(%r15),%r11d + addb %dl,%bl + movl 8(%rsi),%eax + addl $2272392833,%r11d + movzbl %bl,%ebx + addl %r12d,%r11d + movl %edx,4(%rsi) + addb %al,%cl + roll $11,%r11d + movl %r9d,%r12d + movd (%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %eax,(%rdi,%rcx,4) + xorl %r11d,%r12d + addl 44(%r15),%r10d + addb %dl,%al + movl 12(%rsi),%ebx + addl $1839030562,%r10d + movzbl %al,%eax + addl %r12d,%r10d + movl %edx,8(%rsi) + addb %bl,%cl + roll $16,%r10d + movl %r8d,%r12d + pinsrw $1,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %ebx,(%rdi,%rcx,4) + xorl %r10d,%r12d + addl 56(%r15),%r9d + addb %dl,%bl + movl 16(%rsi),%eax + addl $4259657740,%r9d + movzbl %bl,%ebx + addl %r12d,%r9d + movl %edx,12(%rsi) + addb %al,%cl + roll $23,%r9d + movl %r11d,%r12d + pinsrw $1,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %eax,(%rdi,%rcx,4) + xorl %r9d,%r12d + addl 4(%r15),%r8d + addb %dl,%al + movl 20(%rsi),%ebx + addl $2763975236,%r8d + movzbl %al,%eax + addl %r12d,%r8d + movl %edx,16(%rsi) + addb %bl,%cl + roll $4,%r8d + movl %r10d,%r12d + pinsrw $2,(%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %ebx,(%rdi,%rcx,4) + xorl %r8d,%r12d + addl 16(%r15),%r11d + addb %dl,%bl + movl 24(%rsi),%eax + addl $1272893353,%r11d + movzbl %bl,%ebx + addl %r12d,%r11d + movl %edx,20(%rsi) + addb %al,%cl + roll $11,%r11d + movl %r9d,%r12d + pinsrw $2,(%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %eax,(%rdi,%rcx,4) + xorl %r11d,%r12d + addl 28(%r15),%r10d + addb %dl,%al + movl 28(%rsi),%ebx + addl $4139469664,%r10d + movzbl %al,%eax + addl %r12d,%r10d + movl %edx,24(%rsi) + addb %bl,%cl + roll $16,%r10d + movl %r8d,%r12d + pinsrw $3,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %ebx,(%rdi,%rcx,4) + xorl %r10d,%r12d + addl 40(%r15),%r9d + addb %dl,%bl + movl 32(%rsi),%eax + addl $3200236656,%r9d + movzbl %bl,%ebx + addl %r12d,%r9d + movl %edx,28(%rsi) + addb %al,%cl + roll $23,%r9d + movl %r11d,%r12d + pinsrw $3,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %eax,(%rdi,%rcx,4) + xorl %r9d,%r12d + addl 52(%r15),%r8d + addb %dl,%al + movl 36(%rsi),%ebx + addl $681279174,%r8d + movzbl %al,%eax + addl %r12d,%r8d + movl %edx,32(%rsi) + addb %bl,%cl + roll $4,%r8d + movl %r10d,%r12d + pinsrw $4,(%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %ebx,(%rdi,%rcx,4) + xorl %r8d,%r12d + addl 0(%r15),%r11d + addb %dl,%bl + movl 40(%rsi),%eax + addl $3936430074,%r11d + movzbl %bl,%ebx + addl %r12d,%r11d + movl %edx,36(%rsi) + addb %al,%cl + roll $11,%r11d + movl %r9d,%r12d + pinsrw $4,(%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %eax,(%rdi,%rcx,4) + xorl %r11d,%r12d + addl 12(%r15),%r10d + addb %dl,%al + movl 44(%rsi),%ebx + addl $3572445317,%r10d + movzbl %al,%eax + addl %r12d,%r10d + movl %edx,40(%rsi) + addb %bl,%cl + roll $16,%r10d + movl %r8d,%r12d + pinsrw $5,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %ebx,(%rdi,%rcx,4) + xorl %r10d,%r12d + addl 24(%r15),%r9d + addb %dl,%bl + movl 48(%rsi),%eax + addl $76029189,%r9d + movzbl %bl,%ebx + addl %r12d,%r9d + movl %edx,44(%rsi) + addb %al,%cl + roll $23,%r9d + movl %r11d,%r12d + pinsrw $5,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %eax,(%rdi,%rcx,4) + xorl %r9d,%r12d + addl 36(%r15),%r8d + addb %dl,%al + movl 52(%rsi),%ebx + addl $3654602809,%r8d + movzbl %al,%eax + addl %r12d,%r8d + movl %edx,48(%rsi) + addb %bl,%cl + roll $4,%r8d + movl %r10d,%r12d + pinsrw $6,(%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %ebx,(%rdi,%rcx,4) + xorl %r8d,%r12d + addl 48(%r15),%r11d + addb %dl,%bl + movl 56(%rsi),%eax + addl $3873151461,%r11d + movzbl %bl,%ebx + addl %r12d,%r11d + movl %edx,52(%rsi) + addb %al,%cl + roll $11,%r11d + movl %r9d,%r12d + pinsrw $6,(%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %eax,(%rdi,%rcx,4) + xorl %r11d,%r12d + addl 60(%r15),%r10d + addb %dl,%al + movl 60(%rsi),%ebx + addl $530742520,%r10d + movzbl %al,%eax + addl %r12d,%r10d + movl %edx,56(%rsi) + addb %bl,%cl + roll $16,%r10d + movl %r8d,%r12d + pinsrw $7,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movdqu 32(%r13),%xmm4 + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %ebx,(%rdi,%rcx,4) + xorl %r10d,%r12d + addl 8(%r15),%r9d + addb %dl,%bl + movl 64(%rsi),%eax + addl $3299628645,%r9d + movzbl %bl,%ebx + addl %r12d,%r9d + movl %edx,60(%rsi) + addb %al,%cl + roll $23,%r9d + movl $-1,%r12d + pinsrw $7,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + psllq $8,%xmm1 + pxor %xmm0,%xmm4 + pxor %xmm1,%xmm4 + pxor %xmm0,%xmm0 + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %eax,(%rdi,%rcx,4) + orl %r9d,%r12d + addl 0(%r15),%r8d + addb %dl,%al + movl 68(%rsi),%ebx + addl $4096336452,%r8d + movzbl %al,%eax + xorl %r10d,%r12d + movl %edx,64(%rsi) + addl %r12d,%r8d + addb %bl,%cl + roll $6,%r8d + movl $-1,%r12d + movd (%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + pxor %xmm1,%xmm1 + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %ebx,(%rdi,%rcx,4) + orl %r8d,%r12d + addl 28(%r15),%r11d + addb %dl,%bl + movl 72(%rsi),%eax + addl $1126891415,%r11d + movzbl %bl,%ebx + xorl %r9d,%r12d + movl %edx,68(%rsi) + addl %r12d,%r11d + addb %al,%cl + roll $10,%r11d + movl $-1,%r12d + movd (%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %eax,(%rdi,%rcx,4) + orl %r11d,%r12d + addl 56(%r15),%r10d + addb %dl,%al + movl 76(%rsi),%ebx + addl $2878612391,%r10d + movzbl %al,%eax + xorl %r8d,%r12d + movl %edx,72(%rsi) + addl %r12d,%r10d + addb %bl,%cl + roll $15,%r10d + movl $-1,%r12d + pinsrw $1,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %ebx,(%rdi,%rcx,4) + orl %r10d,%r12d + addl 20(%r15),%r9d + addb %dl,%bl + movl 80(%rsi),%eax + addl $4237533241,%r9d + movzbl %bl,%ebx + xorl %r11d,%r12d + movl %edx,76(%rsi) + addl %r12d,%r9d + addb %al,%cl + roll $21,%r9d + movl $-1,%r12d + pinsrw $1,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %eax,(%rdi,%rcx,4) + orl %r9d,%r12d + addl 48(%r15),%r8d + addb %dl,%al + movl 84(%rsi),%ebx + addl $1700485571,%r8d + movzbl %al,%eax + xorl %r10d,%r12d + movl %edx,80(%rsi) + addl %r12d,%r8d + addb %bl,%cl + roll $6,%r8d + movl $-1,%r12d + pinsrw $2,(%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %ebx,(%rdi,%rcx,4) + orl %r8d,%r12d + addl 12(%r15),%r11d + addb %dl,%bl + movl 88(%rsi),%eax + addl $2399980690,%r11d + movzbl %bl,%ebx + xorl %r9d,%r12d + movl %edx,84(%rsi) + addl %r12d,%r11d + addb %al,%cl + roll $10,%r11d + movl $-1,%r12d + pinsrw $2,(%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %eax,(%rdi,%rcx,4) + orl %r11d,%r12d + addl 40(%r15),%r10d + addb %dl,%al + movl 92(%rsi),%ebx + addl $4293915773,%r10d + movzbl %al,%eax + xorl %r8d,%r12d + movl %edx,88(%rsi) + addl %r12d,%r10d + addb %bl,%cl + roll $15,%r10d + movl $-1,%r12d + pinsrw $3,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %ebx,(%rdi,%rcx,4) + orl %r10d,%r12d + addl 4(%r15),%r9d + addb %dl,%bl + movl 96(%rsi),%eax + addl $2240044497,%r9d + movzbl %bl,%ebx + xorl %r11d,%r12d + movl %edx,92(%rsi) + addl %r12d,%r9d + addb %al,%cl + roll $21,%r9d + movl $-1,%r12d + pinsrw $3,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %eax,(%rdi,%rcx,4) + orl %r9d,%r12d + addl 32(%r15),%r8d + addb %dl,%al + movl 100(%rsi),%ebx + addl $1873313359,%r8d + movzbl %al,%eax + xorl %r10d,%r12d + movl %edx,96(%rsi) + addl %r12d,%r8d + addb %bl,%cl + roll $6,%r8d + movl $-1,%r12d + pinsrw $4,(%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %ebx,(%rdi,%rcx,4) + orl %r8d,%r12d + addl 60(%r15),%r11d + addb %dl,%bl + movl 104(%rsi),%eax + addl $4264355552,%r11d + movzbl %bl,%ebx + xorl %r9d,%r12d + movl %edx,100(%rsi) + addl %r12d,%r11d + addb %al,%cl + roll $10,%r11d + movl $-1,%r12d + pinsrw $4,(%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %eax,(%rdi,%rcx,4) + orl %r11d,%r12d + addl 24(%r15),%r10d + addb %dl,%al + movl 108(%rsi),%ebx + addl $2734768916,%r10d + movzbl %al,%eax + xorl %r8d,%r12d + movl %edx,104(%rsi) + addl %r12d,%r10d + addb %bl,%cl + roll $15,%r10d + movl $-1,%r12d + pinsrw $5,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %ebx,(%rdi,%rcx,4) + orl %r10d,%r12d + addl 52(%r15),%r9d + addb %dl,%bl + movl 112(%rsi),%eax + addl $1309151649,%r9d + movzbl %bl,%ebx + xorl %r11d,%r12d + movl %edx,108(%rsi) + addl %r12d,%r9d + addb %al,%cl + roll $21,%r9d + movl $-1,%r12d + pinsrw $5,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %eax,(%rdi,%rcx,4) + orl %r9d,%r12d + addl 16(%r15),%r8d + addb %dl,%al + movl 116(%rsi),%ebx + addl $4149444226,%r8d + movzbl %al,%eax + xorl %r10d,%r12d + movl %edx,112(%rsi) + addl %r12d,%r8d + addb %bl,%cl + roll $6,%r8d + movl $-1,%r12d + pinsrw $6,(%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %ebx,(%rdi,%rcx,4) + orl %r8d,%r12d + addl 44(%r15),%r11d + addb %dl,%bl + movl 120(%rsi),%eax + addl $3174756917,%r11d + movzbl %bl,%ebx + xorl %r9d,%r12d + movl %edx,116(%rsi) + addl %r12d,%r11d + addb %al,%cl + roll $10,%r11d + movl $-1,%r12d + pinsrw $6,(%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %eax,(%rdi,%rcx,4) + orl %r11d,%r12d + addl 8(%r15),%r10d + addb %dl,%al + movl 124(%rsi),%ebx + addl $718787259,%r10d + movzbl %al,%eax + xorl %r8d,%r12d + movl %edx,120(%rsi) + addl %r12d,%r10d + addb %bl,%cl + roll $15,%r10d + movl $-1,%r12d + pinsrw $7,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movdqu 48(%r13),%xmm5 + addb $32,%bpl + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %ebx,(%rdi,%rcx,4) + orl %r10d,%r12d + addl 36(%r15),%r9d + addb %dl,%bl + movl 0(%rdi,%rbp,4),%eax + addl $3951481745,%r9d + movzbl %bl,%ebx + xorl %r11d,%r12d + movl %edx,124(%rsi) + addl %r12d,%r9d + addb %al,%cl + roll $21,%r9d + movl $-1,%r12d + pinsrw $7,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movq %rbp,%rsi + xorq %rbp,%rbp + movb %sil,%bpl + movq %rcx,%rsi + xorq %rcx,%rcx + movb %sil,%cl + leaq (%rdi,%rbp,4),%rsi + psllq $8,%xmm1 + pxor %xmm0,%xmm5 + pxor %xmm1,%xmm5 + addl 0(%rsp),%r8d + addl 4(%rsp),%r9d + addl 8(%rsp),%r10d + addl 12(%rsp),%r11d + + movdqu %xmm2,(%r14,%r13,1) + movdqu %xmm3,16(%r14,%r13,1) + movdqu %xmm4,32(%r14,%r13,1) + movdqu %xmm5,48(%r14,%r13,1) + leaq 64(%r15),%r15 + leaq 64(%r13),%r13 + cmpq 16(%rsp),%r15 + jb L$oop + + movq 24(%rsp),%r12 + subb %al,%cl + movl %r8d,0(%r12) + movl %r9d,4(%r12) + movl %r10d,8(%r12) + movl %r11d,12(%r12) + subb $1,%bpl + movl %ebp,-8(%rdi) + movl %ecx,-4(%rdi) + + movq 40(%rsp),%r15 + movq 48(%rsp),%r14 + movq 56(%rsp),%r13 + movq 64(%rsp),%r12 + movq 72(%rsp),%rbp + movq 80(%rsp),%rbx + leaq 88(%rsp),%rsp +L$epilogue: +L$abort: + .byte 0xf3,0xc3 + +#endif diff --git a/third_party/boringssl/mac-x86_64/crypto/rc4/rc4-x86_64.S b/third_party/boringssl/mac-x86_64/crypto/rc4/rc4-x86_64.S new file mode 100644 index 00000000000..780818476c5 --- /dev/null +++ b/third_party/boringssl/mac-x86_64/crypto/rc4/rc4-x86_64.S @@ -0,0 +1,595 @@ +#if defined(__x86_64__) +.text + + +.globl _asm_RC4 +.private_extern _asm_RC4 + +.p2align 4 +_asm_RC4: + orq %rsi,%rsi + jne L$entry + .byte 0xf3,0xc3 +L$entry: + pushq %rbx + pushq %r12 + pushq %r13 +L$prologue: + movq %rsi,%r11 + movq %rdx,%r12 + movq %rcx,%r13 + xorq %r10,%r10 + xorq %rcx,%rcx + + leaq 8(%rdi),%rdi + movb -8(%rdi),%r10b + movb -4(%rdi),%cl + cmpl $-1,256(%rdi) + je L$RC4_CHAR + movl _OPENSSL_ia32cap_P(%rip),%r8d + xorq %rbx,%rbx + incb %r10b + subq %r10,%rbx + subq %r12,%r13 + movl (%rdi,%r10,4),%eax + testq $-16,%r11 + jz L$loop1 + btl $30,%r8d + jc L$intel + andq $7,%rbx + leaq 1(%r10),%rsi + jz L$oop8 + subq %rbx,%r11 +L$oop8_warmup: + addb %al,%cl + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + movl %edx,(%rdi,%r10,4) + addb %dl,%al + incb %r10b + movl (%rdi,%rax,4),%edx + movl (%rdi,%r10,4),%eax + xorb (%r12),%dl + movb %dl,(%r12,%r13,1) + leaq 1(%r12),%r12 + decq %rbx + jnz L$oop8_warmup + + leaq 1(%r10),%rsi + jmp L$oop8 +.p2align 4 +L$oop8: + addb %al,%cl + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + movl 0(%rdi,%rsi,4),%ebx + rorq $8,%r8 + movl %edx,0(%rdi,%r10,4) + addb %al,%dl + movb (%rdi,%rdx,4),%r8b + addb %bl,%cl + movl (%rdi,%rcx,4),%edx + movl %ebx,(%rdi,%rcx,4) + movl 4(%rdi,%rsi,4),%eax + rorq $8,%r8 + movl %edx,4(%rdi,%r10,4) + addb %bl,%dl + movb (%rdi,%rdx,4),%r8b + addb %al,%cl + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + movl 8(%rdi,%rsi,4),%ebx + rorq $8,%r8 + movl %edx,8(%rdi,%r10,4) + addb %al,%dl + movb (%rdi,%rdx,4),%r8b + addb %bl,%cl + movl (%rdi,%rcx,4),%edx + movl %ebx,(%rdi,%rcx,4) + movl 12(%rdi,%rsi,4),%eax + rorq $8,%r8 + movl %edx,12(%rdi,%r10,4) + addb %bl,%dl + movb (%rdi,%rdx,4),%r8b + addb %al,%cl + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + movl 16(%rdi,%rsi,4),%ebx + rorq $8,%r8 + movl %edx,16(%rdi,%r10,4) + addb %al,%dl + movb (%rdi,%rdx,4),%r8b + addb %bl,%cl + movl (%rdi,%rcx,4),%edx + movl %ebx,(%rdi,%rcx,4) + movl 20(%rdi,%rsi,4),%eax + rorq $8,%r8 + movl %edx,20(%rdi,%r10,4) + addb %bl,%dl + movb (%rdi,%rdx,4),%r8b + addb %al,%cl + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + movl 24(%rdi,%rsi,4),%ebx + rorq $8,%r8 + movl %edx,24(%rdi,%r10,4) + addb %al,%dl + movb (%rdi,%rdx,4),%r8b + addb $8,%sil + addb %bl,%cl + movl (%rdi,%rcx,4),%edx + movl %ebx,(%rdi,%rcx,4) + movl -4(%rdi,%rsi,4),%eax + rorq $8,%r8 + movl %edx,28(%rdi,%r10,4) + addb %bl,%dl + movb (%rdi,%rdx,4),%r8b + addb $8,%r10b + rorq $8,%r8 + subq $8,%r11 + + xorq (%r12),%r8 + movq %r8,(%r12,%r13,1) + leaq 8(%r12),%r12 + + testq $-8,%r11 + jnz L$oop8 + cmpq $0,%r11 + jne L$loop1 + jmp L$exit + +.p2align 4 +L$intel: + testq $-32,%r11 + jz L$loop1 + andq $15,%rbx + jz L$oop16_is_hot + subq %rbx,%r11 +L$oop16_warmup: + addb %al,%cl + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + movl %edx,(%rdi,%r10,4) + addb %dl,%al + incb %r10b + movl (%rdi,%rax,4),%edx + movl (%rdi,%r10,4),%eax + xorb (%r12),%dl + movb %dl,(%r12,%r13,1) + leaq 1(%r12),%r12 + decq %rbx + jnz L$oop16_warmup + + movq %rcx,%rbx + xorq %rcx,%rcx + movb %bl,%cl + +L$oop16_is_hot: + leaq (%rdi,%r10,4),%rsi + addb %al,%cl + movl (%rdi,%rcx,4),%edx + pxor %xmm0,%xmm0 + movl %eax,(%rdi,%rcx,4) + addb %dl,%al + movl 4(%rsi),%ebx + movzbl %al,%eax + movl %edx,0(%rsi) + addb %bl,%cl + pinsrw $0,(%rdi,%rax,4),%xmm0 + jmp L$oop16_enter +.p2align 4 +L$oop16: + addb %al,%cl + movl (%rdi,%rcx,4),%edx + pxor %xmm0,%xmm2 + psllq $8,%xmm1 + pxor %xmm0,%xmm0 + movl %eax,(%rdi,%rcx,4) + addb %dl,%al + movl 4(%rsi),%ebx + movzbl %al,%eax + movl %edx,0(%rsi) + pxor %xmm1,%xmm2 + addb %bl,%cl + pinsrw $0,(%rdi,%rax,4),%xmm0 + movdqu %xmm2,(%r12,%r13,1) + leaq 16(%r12),%r12 +L$oop16_enter: + movl (%rdi,%rcx,4),%edx + pxor %xmm1,%xmm1 + movl %ebx,(%rdi,%rcx,4) + addb %dl,%bl + movl 8(%rsi),%eax + movzbl %bl,%ebx + movl %edx,4(%rsi) + addb %al,%cl + pinsrw $0,(%rdi,%rbx,4),%xmm1 + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + addb %dl,%al + movl 12(%rsi),%ebx + movzbl %al,%eax + movl %edx,8(%rsi) + addb %bl,%cl + pinsrw $1,(%rdi,%rax,4),%xmm0 + movl (%rdi,%rcx,4),%edx + movl %ebx,(%rdi,%rcx,4) + addb %dl,%bl + movl 16(%rsi),%eax + movzbl %bl,%ebx + movl %edx,12(%rsi) + addb %al,%cl + pinsrw $1,(%rdi,%rbx,4),%xmm1 + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + addb %dl,%al + movl 20(%rsi),%ebx + movzbl %al,%eax + movl %edx,16(%rsi) + addb %bl,%cl + pinsrw $2,(%rdi,%rax,4),%xmm0 + movl (%rdi,%rcx,4),%edx + movl %ebx,(%rdi,%rcx,4) + addb %dl,%bl + movl 24(%rsi),%eax + movzbl %bl,%ebx + movl %edx,20(%rsi) + addb %al,%cl + pinsrw $2,(%rdi,%rbx,4),%xmm1 + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + addb %dl,%al + movl 28(%rsi),%ebx + movzbl %al,%eax + movl %edx,24(%rsi) + addb %bl,%cl + pinsrw $3,(%rdi,%rax,4),%xmm0 + movl (%rdi,%rcx,4),%edx + movl %ebx,(%rdi,%rcx,4) + addb %dl,%bl + movl 32(%rsi),%eax + movzbl %bl,%ebx + movl %edx,28(%rsi) + addb %al,%cl + pinsrw $3,(%rdi,%rbx,4),%xmm1 + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + addb %dl,%al + movl 36(%rsi),%ebx + movzbl %al,%eax + movl %edx,32(%rsi) + addb %bl,%cl + pinsrw $4,(%rdi,%rax,4),%xmm0 + movl (%rdi,%rcx,4),%edx + movl %ebx,(%rdi,%rcx,4) + addb %dl,%bl + movl 40(%rsi),%eax + movzbl %bl,%ebx + movl %edx,36(%rsi) + addb %al,%cl + pinsrw $4,(%rdi,%rbx,4),%xmm1 + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + addb %dl,%al + movl 44(%rsi),%ebx + movzbl %al,%eax + movl %edx,40(%rsi) + addb %bl,%cl + pinsrw $5,(%rdi,%rax,4),%xmm0 + movl (%rdi,%rcx,4),%edx + movl %ebx,(%rdi,%rcx,4) + addb %dl,%bl + movl 48(%rsi),%eax + movzbl %bl,%ebx + movl %edx,44(%rsi) + addb %al,%cl + pinsrw $5,(%rdi,%rbx,4),%xmm1 + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + addb %dl,%al + movl 52(%rsi),%ebx + movzbl %al,%eax + movl %edx,48(%rsi) + addb %bl,%cl + pinsrw $6,(%rdi,%rax,4),%xmm0 + movl (%rdi,%rcx,4),%edx + movl %ebx,(%rdi,%rcx,4) + addb %dl,%bl + movl 56(%rsi),%eax + movzbl %bl,%ebx + movl %edx,52(%rsi) + addb %al,%cl + pinsrw $6,(%rdi,%rbx,4),%xmm1 + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + addb %dl,%al + movl 60(%rsi),%ebx + movzbl %al,%eax + movl %edx,56(%rsi) + addb %bl,%cl + pinsrw $7,(%rdi,%rax,4),%xmm0 + addb $16,%r10b + movdqu (%r12),%xmm2 + movl (%rdi,%rcx,4),%edx + movl %ebx,(%rdi,%rcx,4) + addb %dl,%bl + movzbl %bl,%ebx + movl %edx,60(%rsi) + leaq (%rdi,%r10,4),%rsi + pinsrw $7,(%rdi,%rbx,4),%xmm1 + movl (%rsi),%eax + movq %rcx,%rbx + xorq %rcx,%rcx + subq $16,%r11 + movb %bl,%cl + testq $-16,%r11 + jnz L$oop16 + + psllq $8,%xmm1 + pxor %xmm0,%xmm2 + pxor %xmm1,%xmm2 + movdqu %xmm2,(%r12,%r13,1) + leaq 16(%r12),%r12 + + cmpq $0,%r11 + jne L$loop1 + jmp L$exit + +.p2align 4 +L$loop1: + addb %al,%cl + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + movl %edx,(%rdi,%r10,4) + addb %dl,%al + incb %r10b + movl (%rdi,%rax,4),%edx + movl (%rdi,%r10,4),%eax + xorb (%r12),%dl + movb %dl,(%r12,%r13,1) + leaq 1(%r12),%r12 + decq %r11 + jnz L$loop1 + jmp L$exit + +.p2align 4 +L$RC4_CHAR: + addb $1,%r10b + movzbl (%rdi,%r10,1),%eax + testq $-8,%r11 + jz L$cloop1 + jmp L$cloop8 +.p2align 4 +L$cloop8: + movl (%r12),%r8d + movl 4(%r12),%r9d + addb %al,%cl + leaq 1(%r10),%rsi + movzbl (%rdi,%rcx,1),%edx + movzbl %sil,%esi + movzbl (%rdi,%rsi,1),%ebx + movb %al,(%rdi,%rcx,1) + cmpq %rsi,%rcx + movb %dl,(%rdi,%r10,1) + jne L$cmov0 + movq %rax,%rbx +L$cmov0: + addb %al,%dl + xorb (%rdi,%rdx,1),%r8b + rorl $8,%r8d + addb %bl,%cl + leaq 1(%rsi),%r10 + movzbl (%rdi,%rcx,1),%edx + movzbl %r10b,%r10d + movzbl (%rdi,%r10,1),%eax + movb %bl,(%rdi,%rcx,1) + cmpq %r10,%rcx + movb %dl,(%rdi,%rsi,1) + jne L$cmov1 + movq %rbx,%rax +L$cmov1: + addb %bl,%dl + xorb (%rdi,%rdx,1),%r8b + rorl $8,%r8d + addb %al,%cl + leaq 1(%r10),%rsi + movzbl (%rdi,%rcx,1),%edx + movzbl %sil,%esi + movzbl (%rdi,%rsi,1),%ebx + movb %al,(%rdi,%rcx,1) + cmpq %rsi,%rcx + movb %dl,(%rdi,%r10,1) + jne L$cmov2 + movq %rax,%rbx +L$cmov2: + addb %al,%dl + xorb (%rdi,%rdx,1),%r8b + rorl $8,%r8d + addb %bl,%cl + leaq 1(%rsi),%r10 + movzbl (%rdi,%rcx,1),%edx + movzbl %r10b,%r10d + movzbl (%rdi,%r10,1),%eax + movb %bl,(%rdi,%rcx,1) + cmpq %r10,%rcx + movb %dl,(%rdi,%rsi,1) + jne L$cmov3 + movq %rbx,%rax +L$cmov3: + addb %bl,%dl + xorb (%rdi,%rdx,1),%r8b + rorl $8,%r8d + addb %al,%cl + leaq 1(%r10),%rsi + movzbl (%rdi,%rcx,1),%edx + movzbl %sil,%esi + movzbl (%rdi,%rsi,1),%ebx + movb %al,(%rdi,%rcx,1) + cmpq %rsi,%rcx + movb %dl,(%rdi,%r10,1) + jne L$cmov4 + movq %rax,%rbx +L$cmov4: + addb %al,%dl + xorb (%rdi,%rdx,1),%r9b + rorl $8,%r9d + addb %bl,%cl + leaq 1(%rsi),%r10 + movzbl (%rdi,%rcx,1),%edx + movzbl %r10b,%r10d + movzbl (%rdi,%r10,1),%eax + movb %bl,(%rdi,%rcx,1) + cmpq %r10,%rcx + movb %dl,(%rdi,%rsi,1) + jne L$cmov5 + movq %rbx,%rax +L$cmov5: + addb %bl,%dl + xorb (%rdi,%rdx,1),%r9b + rorl $8,%r9d + addb %al,%cl + leaq 1(%r10),%rsi + movzbl (%rdi,%rcx,1),%edx + movzbl %sil,%esi + movzbl (%rdi,%rsi,1),%ebx + movb %al,(%rdi,%rcx,1) + cmpq %rsi,%rcx + movb %dl,(%rdi,%r10,1) + jne L$cmov6 + movq %rax,%rbx +L$cmov6: + addb %al,%dl + xorb (%rdi,%rdx,1),%r9b + rorl $8,%r9d + addb %bl,%cl + leaq 1(%rsi),%r10 + movzbl (%rdi,%rcx,1),%edx + movzbl %r10b,%r10d + movzbl (%rdi,%r10,1),%eax + movb %bl,(%rdi,%rcx,1) + cmpq %r10,%rcx + movb %dl,(%rdi,%rsi,1) + jne L$cmov7 + movq %rbx,%rax +L$cmov7: + addb %bl,%dl + xorb (%rdi,%rdx,1),%r9b + rorl $8,%r9d + leaq -8(%r11),%r11 + movl %r8d,(%r13) + leaq 8(%r12),%r12 + movl %r9d,4(%r13) + leaq 8(%r13),%r13 + + testq $-8,%r11 + jnz L$cloop8 + cmpq $0,%r11 + jne L$cloop1 + jmp L$exit +.p2align 4 +L$cloop1: + addb %al,%cl + movzbl %cl,%ecx + movzbl (%rdi,%rcx,1),%edx + movb %al,(%rdi,%rcx,1) + movb %dl,(%rdi,%r10,1) + addb %al,%dl + addb $1,%r10b + movzbl %dl,%edx + movzbl %r10b,%r10d + movzbl (%rdi,%rdx,1),%edx + movzbl (%rdi,%r10,1),%eax + xorb (%r12),%dl + leaq 1(%r12),%r12 + movb %dl,(%r13) + leaq 1(%r13),%r13 + subq $1,%r11 + jnz L$cloop1 + jmp L$exit + +.p2align 4 +L$exit: + subb $1,%r10b + movl %r10d,-8(%rdi) + movl %ecx,-4(%rdi) + + movq (%rsp),%r13 + movq 8(%rsp),%r12 + movq 16(%rsp),%rbx + addq $24,%rsp +L$epilogue: + .byte 0xf3,0xc3 + +.globl _asm_RC4_set_key +.private_extern _asm_RC4_set_key + +.p2align 4 +_asm_RC4_set_key: + leaq 8(%rdi),%rdi + leaq (%rdx,%rsi,1),%rdx + negq %rsi + movq %rsi,%rcx + xorl %eax,%eax + xorq %r9,%r9 + xorq %r10,%r10 + xorq %r11,%r11 + + movl _OPENSSL_ia32cap_P(%rip),%r8d + btl $20,%r8d + jc L$c1stloop + jmp L$w1stloop + +.p2align 4 +L$w1stloop: + movl %eax,(%rdi,%rax,4) + addb $1,%al + jnc L$w1stloop + + xorq %r9,%r9 + xorq %r8,%r8 +.p2align 4 +L$w2ndloop: + movl (%rdi,%r9,4),%r10d + addb (%rdx,%rsi,1),%r8b + addb %r10b,%r8b + addq $1,%rsi + movl (%rdi,%r8,4),%r11d + cmovzq %rcx,%rsi + movl %r10d,(%rdi,%r8,4) + movl %r11d,(%rdi,%r9,4) + addb $1,%r9b + jnc L$w2ndloop + jmp L$exit_key + +.p2align 4 +L$c1stloop: + movb %al,(%rdi,%rax,1) + addb $1,%al + jnc L$c1stloop + + xorq %r9,%r9 + xorq %r8,%r8 +.p2align 4 +L$c2ndloop: + movb (%rdi,%r9,1),%r10b + addb (%rdx,%rsi,1),%r8b + addb %r10b,%r8b + addq $1,%rsi + movb (%rdi,%r8,1),%r11b + jnz L$cnowrap + movq %rcx,%rsi +L$cnowrap: + movb %r10b,(%rdi,%r8,1) + movb %r11b,(%rdi,%r9,1) + addb $1,%r9b + jnc L$c2ndloop + movl $-1,256(%rdi) + +.p2align 4 +L$exit_key: + xorl %eax,%eax + movl %eax,-8(%rdi) + movl %eax,-4(%rdi) + .byte 0xf3,0xc3 + +#endif diff --git a/third_party/boringssl/mac-x86_64/crypto/sha/sha1-x86_64.S b/third_party/boringssl/mac-x86_64/crypto/sha/sha1-x86_64.S new file mode 100644 index 00000000000..044dc5b7cd0 --- /dev/null +++ b/third_party/boringssl/mac-x86_64/crypto/sha/sha1-x86_64.S @@ -0,0 +1,2425 @@ +#if defined(__x86_64__) +.text + + +.globl _sha1_block_data_order +.private_extern _sha1_block_data_order + +.p2align 4 +_sha1_block_data_order: + movl _OPENSSL_ia32cap_P+0(%rip),%r9d + movl _OPENSSL_ia32cap_P+4(%rip),%r8d + movl _OPENSSL_ia32cap_P+8(%rip),%r10d + testl $512,%r8d + jz L$ialu + jmp _ssse3_shortcut + +.p2align 4 +L$ialu: + movq %rsp,%rax + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + movq %rdi,%r8 + subq $72,%rsp + movq %rsi,%r9 + andq $-64,%rsp + movq %rdx,%r10 + movq %rax,64(%rsp) +L$prologue: + + movl 0(%r8),%esi + movl 4(%r8),%edi + movl 8(%r8),%r11d + movl 12(%r8),%r12d + movl 16(%r8),%r13d + jmp L$loop + +.p2align 4 +L$loop: + movl 0(%r9),%edx + bswapl %edx + movl 4(%r9),%ebp + movl %r12d,%eax + movl %edx,0(%rsp) + movl %esi,%ecx + bswapl %ebp + xorl %r11d,%eax + roll $5,%ecx + andl %edi,%eax + leal 1518500249(%rdx,%r13,1),%r13d + addl %ecx,%r13d + xorl %r12d,%eax + roll $30,%edi + addl %eax,%r13d + movl 8(%r9),%r14d + movl %r11d,%eax + movl %ebp,4(%rsp) + movl %r13d,%ecx + bswapl %r14d + xorl %edi,%eax + roll $5,%ecx + andl %esi,%eax + leal 1518500249(%rbp,%r12,1),%r12d + addl %ecx,%r12d + xorl %r11d,%eax + roll $30,%esi + addl %eax,%r12d + movl 12(%r9),%edx + movl %edi,%eax + movl %r14d,8(%rsp) + movl %r12d,%ecx + bswapl %edx + xorl %esi,%eax + roll $5,%ecx + andl %r13d,%eax + leal 1518500249(%r14,%r11,1),%r11d + addl %ecx,%r11d + xorl %edi,%eax + roll $30,%r13d + addl %eax,%r11d + movl 16(%r9),%ebp + movl %esi,%eax + movl %edx,12(%rsp) + movl %r11d,%ecx + bswapl %ebp + xorl %r13d,%eax + roll $5,%ecx + andl %r12d,%eax + leal 1518500249(%rdx,%rdi,1),%edi + addl %ecx,%edi + xorl %esi,%eax + roll $30,%r12d + addl %eax,%edi + movl 20(%r9),%r14d + movl %r13d,%eax + movl %ebp,16(%rsp) + movl %edi,%ecx + bswapl %r14d + xorl %r12d,%eax + roll $5,%ecx + andl %r11d,%eax + leal 1518500249(%rbp,%rsi,1),%esi + addl %ecx,%esi + xorl %r13d,%eax + roll $30,%r11d + addl %eax,%esi + movl 24(%r9),%edx + movl %r12d,%eax + movl %r14d,20(%rsp) + movl %esi,%ecx + bswapl %edx + xorl %r11d,%eax + roll $5,%ecx + andl %edi,%eax + leal 1518500249(%r14,%r13,1),%r13d + addl %ecx,%r13d + xorl %r12d,%eax + roll $30,%edi + addl %eax,%r13d + movl 28(%r9),%ebp + movl %r11d,%eax + movl %edx,24(%rsp) + movl %r13d,%ecx + bswapl %ebp + xorl %edi,%eax + roll $5,%ecx + andl %esi,%eax + leal 1518500249(%rdx,%r12,1),%r12d + addl %ecx,%r12d + xorl %r11d,%eax + roll $30,%esi + addl %eax,%r12d + movl 32(%r9),%r14d + movl %edi,%eax + movl %ebp,28(%rsp) + movl %r12d,%ecx + bswapl %r14d + xorl %esi,%eax + roll $5,%ecx + andl %r13d,%eax + leal 1518500249(%rbp,%r11,1),%r11d + addl %ecx,%r11d + xorl %edi,%eax + roll $30,%r13d + addl %eax,%r11d + movl 36(%r9),%edx + movl %esi,%eax + movl %r14d,32(%rsp) + movl %r11d,%ecx + bswapl %edx + xorl %r13d,%eax + roll $5,%ecx + andl %r12d,%eax + leal 1518500249(%r14,%rdi,1),%edi + addl %ecx,%edi + xorl %esi,%eax + roll $30,%r12d + addl %eax,%edi + movl 40(%r9),%ebp + movl %r13d,%eax + movl %edx,36(%rsp) + movl %edi,%ecx + bswapl %ebp + xorl %r12d,%eax + roll $5,%ecx + andl %r11d,%eax + leal 1518500249(%rdx,%rsi,1),%esi + addl %ecx,%esi + xorl %r13d,%eax + roll $30,%r11d + addl %eax,%esi + movl 44(%r9),%r14d + movl %r12d,%eax + movl %ebp,40(%rsp) + movl %esi,%ecx + bswapl %r14d + xorl %r11d,%eax + roll $5,%ecx + andl %edi,%eax + leal 1518500249(%rbp,%r13,1),%r13d + addl %ecx,%r13d + xorl %r12d,%eax + roll $30,%edi + addl %eax,%r13d + movl 48(%r9),%edx + movl %r11d,%eax + movl %r14d,44(%rsp) + movl %r13d,%ecx + bswapl %edx + xorl %edi,%eax + roll $5,%ecx + andl %esi,%eax + leal 1518500249(%r14,%r12,1),%r12d + addl %ecx,%r12d + xorl %r11d,%eax + roll $30,%esi + addl %eax,%r12d + movl 52(%r9),%ebp + movl %edi,%eax + movl %edx,48(%rsp) + movl %r12d,%ecx + bswapl %ebp + xorl %esi,%eax + roll $5,%ecx + andl %r13d,%eax + leal 1518500249(%rdx,%r11,1),%r11d + addl %ecx,%r11d + xorl %edi,%eax + roll $30,%r13d + addl %eax,%r11d + movl 56(%r9),%r14d + movl %esi,%eax + movl %ebp,52(%rsp) + movl %r11d,%ecx + bswapl %r14d + xorl %r13d,%eax + roll $5,%ecx + andl %r12d,%eax + leal 1518500249(%rbp,%rdi,1),%edi + addl %ecx,%edi + xorl %esi,%eax + roll $30,%r12d + addl %eax,%edi + movl 60(%r9),%edx + movl %r13d,%eax + movl %r14d,56(%rsp) + movl %edi,%ecx + bswapl %edx + xorl %r12d,%eax + roll $5,%ecx + andl %r11d,%eax + leal 1518500249(%r14,%rsi,1),%esi + addl %ecx,%esi + xorl %r13d,%eax + roll $30,%r11d + addl %eax,%esi + xorl 0(%rsp),%ebp + movl %r12d,%eax + movl %edx,60(%rsp) + movl %esi,%ecx + xorl 8(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + xorl 32(%rsp),%ebp + andl %edi,%eax + leal 1518500249(%rdx,%r13,1),%r13d + roll $30,%edi + xorl %r12d,%eax + addl %ecx,%r13d + roll $1,%ebp + addl %eax,%r13d + xorl 4(%rsp),%r14d + movl %r11d,%eax + movl %ebp,0(%rsp) + movl %r13d,%ecx + xorl 12(%rsp),%r14d + xorl %edi,%eax + roll $5,%ecx + xorl 36(%rsp),%r14d + andl %esi,%eax + leal 1518500249(%rbp,%r12,1),%r12d + roll $30,%esi + xorl %r11d,%eax + addl %ecx,%r12d + roll $1,%r14d + addl %eax,%r12d + xorl 8(%rsp),%edx + movl %edi,%eax + movl %r14d,4(%rsp) + movl %r12d,%ecx + xorl 16(%rsp),%edx + xorl %esi,%eax + roll $5,%ecx + xorl 40(%rsp),%edx + andl %r13d,%eax + leal 1518500249(%r14,%r11,1),%r11d + roll $30,%r13d + xorl %edi,%eax + addl %ecx,%r11d + roll $1,%edx + addl %eax,%r11d + xorl 12(%rsp),%ebp + movl %esi,%eax + movl %edx,8(%rsp) + movl %r11d,%ecx + xorl 20(%rsp),%ebp + xorl %r13d,%eax + roll $5,%ecx + xorl 44(%rsp),%ebp + andl %r12d,%eax + leal 1518500249(%rdx,%rdi,1),%edi + roll $30,%r12d + xorl %esi,%eax + addl %ecx,%edi + roll $1,%ebp + addl %eax,%edi + xorl 16(%rsp),%r14d + movl %r13d,%eax + movl %ebp,12(%rsp) + movl %edi,%ecx + xorl 24(%rsp),%r14d + xorl %r12d,%eax + roll $5,%ecx + xorl 48(%rsp),%r14d + andl %r11d,%eax + leal 1518500249(%rbp,%rsi,1),%esi + roll $30,%r11d + xorl %r13d,%eax + addl %ecx,%esi + roll $1,%r14d + addl %eax,%esi + xorl 20(%rsp),%edx + movl %edi,%eax + movl %r14d,16(%rsp) + movl %esi,%ecx + xorl 28(%rsp),%edx + xorl %r12d,%eax + roll $5,%ecx + xorl 52(%rsp),%edx + leal 1859775393(%r14,%r13,1),%r13d + xorl %r11d,%eax + addl %ecx,%r13d + roll $30,%edi + addl %eax,%r13d + roll $1,%edx + xorl 24(%rsp),%ebp + movl %esi,%eax + movl %edx,20(%rsp) + movl %r13d,%ecx + xorl 32(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + xorl 56(%rsp),%ebp + leal 1859775393(%rdx,%r12,1),%r12d + xorl %edi,%eax + addl %ecx,%r12d + roll $30,%esi + addl %eax,%r12d + roll $1,%ebp + xorl 28(%rsp),%r14d + movl %r13d,%eax + movl %ebp,24(%rsp) + movl %r12d,%ecx + xorl 36(%rsp),%r14d + xorl %edi,%eax + roll $5,%ecx + xorl 60(%rsp),%r14d + leal 1859775393(%rbp,%r11,1),%r11d + xorl %esi,%eax + addl %ecx,%r11d + roll $30,%r13d + addl %eax,%r11d + roll $1,%r14d + xorl 32(%rsp),%edx + movl %r12d,%eax + movl %r14d,28(%rsp) + movl %r11d,%ecx + xorl 40(%rsp),%edx + xorl %esi,%eax + roll $5,%ecx + xorl 0(%rsp),%edx + leal 1859775393(%r14,%rdi,1),%edi + xorl %r13d,%eax + addl %ecx,%edi + roll $30,%r12d + addl %eax,%edi + roll $1,%edx + xorl 36(%rsp),%ebp + movl %r11d,%eax + movl %edx,32(%rsp) + movl %edi,%ecx + xorl 44(%rsp),%ebp + xorl %r13d,%eax + roll $5,%ecx + xorl 4(%rsp),%ebp + leal 1859775393(%rdx,%rsi,1),%esi + xorl %r12d,%eax + addl %ecx,%esi + roll $30,%r11d + addl %eax,%esi + roll $1,%ebp + xorl 40(%rsp),%r14d + movl %edi,%eax + movl %ebp,36(%rsp) + movl %esi,%ecx + xorl 48(%rsp),%r14d + xorl %r12d,%eax + roll $5,%ecx + xorl 8(%rsp),%r14d + leal 1859775393(%rbp,%r13,1),%r13d + xorl %r11d,%eax + addl %ecx,%r13d + roll $30,%edi + addl %eax,%r13d + roll $1,%r14d + xorl 44(%rsp),%edx + movl %esi,%eax + movl %r14d,40(%rsp) + movl %r13d,%ecx + xorl 52(%rsp),%edx + xorl %r11d,%eax + roll $5,%ecx + xorl 12(%rsp),%edx + leal 1859775393(%r14,%r12,1),%r12d + xorl %edi,%eax + addl %ecx,%r12d + roll $30,%esi + addl %eax,%r12d + roll $1,%edx + xorl 48(%rsp),%ebp + movl %r13d,%eax + movl %edx,44(%rsp) + movl %r12d,%ecx + xorl 56(%rsp),%ebp + xorl %edi,%eax + roll $5,%ecx + xorl 16(%rsp),%ebp + leal 1859775393(%rdx,%r11,1),%r11d + xorl %esi,%eax + addl %ecx,%r11d + roll $30,%r13d + addl %eax,%r11d + roll $1,%ebp + xorl 52(%rsp),%r14d + movl %r12d,%eax + movl %ebp,48(%rsp) + movl %r11d,%ecx + xorl 60(%rsp),%r14d + xorl %esi,%eax + roll $5,%ecx + xorl 20(%rsp),%r14d + leal 1859775393(%rbp,%rdi,1),%edi + xorl %r13d,%eax + addl %ecx,%edi + roll $30,%r12d + addl %eax,%edi + roll $1,%r14d + xorl 56(%rsp),%edx + movl %r11d,%eax + movl %r14d,52(%rsp) + movl %edi,%ecx + xorl 0(%rsp),%edx + xorl %r13d,%eax + roll $5,%ecx + xorl 24(%rsp),%edx + leal 1859775393(%r14,%rsi,1),%esi + xorl %r12d,%eax + addl %ecx,%esi + roll $30,%r11d + addl %eax,%esi + roll $1,%edx + xorl 60(%rsp),%ebp + movl %edi,%eax + movl %edx,56(%rsp) + movl %esi,%ecx + xorl 4(%rsp),%ebp + xorl %r12d,%eax + roll $5,%ecx + xorl 28(%rsp),%ebp + leal 1859775393(%rdx,%r13,1),%r13d + xorl %r11d,%eax + addl %ecx,%r13d + roll $30,%edi + addl %eax,%r13d + roll $1,%ebp + xorl 0(%rsp),%r14d + movl %esi,%eax + movl %ebp,60(%rsp) + movl %r13d,%ecx + xorl 8(%rsp),%r14d + xorl %r11d,%eax + roll $5,%ecx + xorl 32(%rsp),%r14d + leal 1859775393(%rbp,%r12,1),%r12d + xorl %edi,%eax + addl %ecx,%r12d + roll $30,%esi + addl %eax,%r12d + roll $1,%r14d + xorl 4(%rsp),%edx + movl %r13d,%eax + movl %r14d,0(%rsp) + movl %r12d,%ecx + xorl 12(%rsp),%edx + xorl %edi,%eax + roll $5,%ecx + xorl 36(%rsp),%edx + leal 1859775393(%r14,%r11,1),%r11d + xorl %esi,%eax + addl %ecx,%r11d + roll $30,%r13d + addl %eax,%r11d + roll $1,%edx + xorl 8(%rsp),%ebp + movl %r12d,%eax + movl %edx,4(%rsp) + movl %r11d,%ecx + xorl 16(%rsp),%ebp + xorl %esi,%eax + roll $5,%ecx + xorl 40(%rsp),%ebp + leal 1859775393(%rdx,%rdi,1),%edi + xorl %r13d,%eax + addl %ecx,%edi + roll $30,%r12d + addl %eax,%edi + roll $1,%ebp + xorl 12(%rsp),%r14d + movl %r11d,%eax + movl %ebp,8(%rsp) + movl %edi,%ecx + xorl 20(%rsp),%r14d + xorl %r13d,%eax + roll $5,%ecx + xorl 44(%rsp),%r14d + leal 1859775393(%rbp,%rsi,1),%esi + xorl %r12d,%eax + addl %ecx,%esi + roll $30,%r11d + addl %eax,%esi + roll $1,%r14d + xorl 16(%rsp),%edx + movl %edi,%eax + movl %r14d,12(%rsp) + movl %esi,%ecx + xorl 24(%rsp),%edx + xorl %r12d,%eax + roll $5,%ecx + xorl 48(%rsp),%edx + leal 1859775393(%r14,%r13,1),%r13d + xorl %r11d,%eax + addl %ecx,%r13d + roll $30,%edi + addl %eax,%r13d + roll $1,%edx + xorl 20(%rsp),%ebp + movl %esi,%eax + movl %edx,16(%rsp) + movl %r13d,%ecx + xorl 28(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + xorl 52(%rsp),%ebp + leal 1859775393(%rdx,%r12,1),%r12d + xorl %edi,%eax + addl %ecx,%r12d + roll $30,%esi + addl %eax,%r12d + roll $1,%ebp + xorl 24(%rsp),%r14d + movl %r13d,%eax + movl %ebp,20(%rsp) + movl %r12d,%ecx + xorl 32(%rsp),%r14d + xorl %edi,%eax + roll $5,%ecx + xorl 56(%rsp),%r14d + leal 1859775393(%rbp,%r11,1),%r11d + xorl %esi,%eax + addl %ecx,%r11d + roll $30,%r13d + addl %eax,%r11d + roll $1,%r14d + xorl 28(%rsp),%edx + movl %r12d,%eax + movl %r14d,24(%rsp) + movl %r11d,%ecx + xorl 36(%rsp),%edx + xorl %esi,%eax + roll $5,%ecx + xorl 60(%rsp),%edx + leal 1859775393(%r14,%rdi,1),%edi + xorl %r13d,%eax + addl %ecx,%edi + roll $30,%r12d + addl %eax,%edi + roll $1,%edx + xorl 32(%rsp),%ebp + movl %r11d,%eax + movl %edx,28(%rsp) + movl %edi,%ecx + xorl 40(%rsp),%ebp + xorl %r13d,%eax + roll $5,%ecx + xorl 0(%rsp),%ebp + leal 1859775393(%rdx,%rsi,1),%esi + xorl %r12d,%eax + addl %ecx,%esi + roll $30,%r11d + addl %eax,%esi + roll $1,%ebp + xorl 36(%rsp),%r14d + movl %r12d,%eax + movl %ebp,32(%rsp) + movl %r12d,%ebx + xorl 44(%rsp),%r14d + andl %r11d,%eax + movl %esi,%ecx + xorl 4(%rsp),%r14d + leal -1894007588(%rbp,%r13,1),%r13d + xorl %r11d,%ebx + roll $5,%ecx + addl %eax,%r13d + roll $1,%r14d + andl %edi,%ebx + addl %ecx,%r13d + roll $30,%edi + addl %ebx,%r13d + xorl 40(%rsp),%edx + movl %r11d,%eax + movl %r14d,36(%rsp) + movl %r11d,%ebx + xorl 48(%rsp),%edx + andl %edi,%eax + movl %r13d,%ecx + xorl 8(%rsp),%edx + leal -1894007588(%r14,%r12,1),%r12d + xorl %edi,%ebx + roll $5,%ecx + addl %eax,%r12d + roll $1,%edx + andl %esi,%ebx + addl %ecx,%r12d + roll $30,%esi + addl %ebx,%r12d + xorl 44(%rsp),%ebp + movl %edi,%eax + movl %edx,40(%rsp) + movl %edi,%ebx + xorl 52(%rsp),%ebp + andl %esi,%eax + movl %r12d,%ecx + xorl 12(%rsp),%ebp + leal -1894007588(%rdx,%r11,1),%r11d + xorl %esi,%ebx + roll $5,%ecx + addl %eax,%r11d + roll $1,%ebp + andl %r13d,%ebx + addl %ecx,%r11d + roll $30,%r13d + addl %ebx,%r11d + xorl 48(%rsp),%r14d + movl %esi,%eax + movl %ebp,44(%rsp) + movl %esi,%ebx + xorl 56(%rsp),%r14d + andl %r13d,%eax + movl %r11d,%ecx + xorl 16(%rsp),%r14d + leal -1894007588(%rbp,%rdi,1),%edi + xorl %r13d,%ebx + roll $5,%ecx + addl %eax,%edi + roll $1,%r14d + andl %r12d,%ebx + addl %ecx,%edi + roll $30,%r12d + addl %ebx,%edi + xorl 52(%rsp),%edx + movl %r13d,%eax + movl %r14d,48(%rsp) + movl %r13d,%ebx + xorl 60(%rsp),%edx + andl %r12d,%eax + movl %edi,%ecx + xorl 20(%rsp),%edx + leal -1894007588(%r14,%rsi,1),%esi + xorl %r12d,%ebx + roll $5,%ecx + addl %eax,%esi + roll $1,%edx + andl %r11d,%ebx + addl %ecx,%esi + roll $30,%r11d + addl %ebx,%esi + xorl 56(%rsp),%ebp + movl %r12d,%eax + movl %edx,52(%rsp) + movl %r12d,%ebx + xorl 0(%rsp),%ebp + andl %r11d,%eax + movl %esi,%ecx + xorl 24(%rsp),%ebp + leal -1894007588(%rdx,%r13,1),%r13d + xorl %r11d,%ebx + roll $5,%ecx + addl %eax,%r13d + roll $1,%ebp + andl %edi,%ebx + addl %ecx,%r13d + roll $30,%edi + addl %ebx,%r13d + xorl 60(%rsp),%r14d + movl %r11d,%eax + movl %ebp,56(%rsp) + movl %r11d,%ebx + xorl 4(%rsp),%r14d + andl %edi,%eax + movl %r13d,%ecx + xorl 28(%rsp),%r14d + leal -1894007588(%rbp,%r12,1),%r12d + xorl %edi,%ebx + roll $5,%ecx + addl %eax,%r12d + roll $1,%r14d + andl %esi,%ebx + addl %ecx,%r12d + roll $30,%esi + addl %ebx,%r12d + xorl 0(%rsp),%edx + movl %edi,%eax + movl %r14d,60(%rsp) + movl %edi,%ebx + xorl 8(%rsp),%edx + andl %esi,%eax + movl %r12d,%ecx + xorl 32(%rsp),%edx + leal -1894007588(%r14,%r11,1),%r11d + xorl %esi,%ebx + roll $5,%ecx + addl %eax,%r11d + roll $1,%edx + andl %r13d,%ebx + addl %ecx,%r11d + roll $30,%r13d + addl %ebx,%r11d + xorl 4(%rsp),%ebp + movl %esi,%eax + movl %edx,0(%rsp) + movl %esi,%ebx + xorl 12(%rsp),%ebp + andl %r13d,%eax + movl %r11d,%ecx + xorl 36(%rsp),%ebp + leal -1894007588(%rdx,%rdi,1),%edi + xorl %r13d,%ebx + roll $5,%ecx + addl %eax,%edi + roll $1,%ebp + andl %r12d,%ebx + addl %ecx,%edi + roll $30,%r12d + addl %ebx,%edi + xorl 8(%rsp),%r14d + movl %r13d,%eax + movl %ebp,4(%rsp) + movl %r13d,%ebx + xorl 16(%rsp),%r14d + andl %r12d,%eax + movl %edi,%ecx + xorl 40(%rsp),%r14d + leal -1894007588(%rbp,%rsi,1),%esi + xorl %r12d,%ebx + roll $5,%ecx + addl %eax,%esi + roll $1,%r14d + andl %r11d,%ebx + addl %ecx,%esi + roll $30,%r11d + addl %ebx,%esi + xorl 12(%rsp),%edx + movl %r12d,%eax + movl %r14d,8(%rsp) + movl %r12d,%ebx + xorl 20(%rsp),%edx + andl %r11d,%eax + movl %esi,%ecx + xorl 44(%rsp),%edx + leal -1894007588(%r14,%r13,1),%r13d + xorl %r11d,%ebx + roll $5,%ecx + addl %eax,%r13d + roll $1,%edx + andl %edi,%ebx + addl %ecx,%r13d + roll $30,%edi + addl %ebx,%r13d + xorl 16(%rsp),%ebp + movl %r11d,%eax + movl %edx,12(%rsp) + movl %r11d,%ebx + xorl 24(%rsp),%ebp + andl %edi,%eax + movl %r13d,%ecx + xorl 48(%rsp),%ebp + leal -1894007588(%rdx,%r12,1),%r12d + xorl %edi,%ebx + roll $5,%ecx + addl %eax,%r12d + roll $1,%ebp + andl %esi,%ebx + addl %ecx,%r12d + roll $30,%esi + addl %ebx,%r12d + xorl 20(%rsp),%r14d + movl %edi,%eax + movl %ebp,16(%rsp) + movl %edi,%ebx + xorl 28(%rsp),%r14d + andl %esi,%eax + movl %r12d,%ecx + xorl 52(%rsp),%r14d + leal -1894007588(%rbp,%r11,1),%r11d + xorl %esi,%ebx + roll $5,%ecx + addl %eax,%r11d + roll $1,%r14d + andl %r13d,%ebx + addl %ecx,%r11d + roll $30,%r13d + addl %ebx,%r11d + xorl 24(%rsp),%edx + movl %esi,%eax + movl %r14d,20(%rsp) + movl %esi,%ebx + xorl 32(%rsp),%edx + andl %r13d,%eax + movl %r11d,%ecx + xorl 56(%rsp),%edx + leal -1894007588(%r14,%rdi,1),%edi + xorl %r13d,%ebx + roll $5,%ecx + addl %eax,%edi + roll $1,%edx + andl %r12d,%ebx + addl %ecx,%edi + roll $30,%r12d + addl %ebx,%edi + xorl 28(%rsp),%ebp + movl %r13d,%eax + movl %edx,24(%rsp) + movl %r13d,%ebx + xorl 36(%rsp),%ebp + andl %r12d,%eax + movl %edi,%ecx + xorl 60(%rsp),%ebp + leal -1894007588(%rdx,%rsi,1),%esi + xorl %r12d,%ebx + roll $5,%ecx + addl %eax,%esi + roll $1,%ebp + andl %r11d,%ebx + addl %ecx,%esi + roll $30,%r11d + addl %ebx,%esi + xorl 32(%rsp),%r14d + movl %r12d,%eax + movl %ebp,28(%rsp) + movl %r12d,%ebx + xorl 40(%rsp),%r14d + andl %r11d,%eax + movl %esi,%ecx + xorl 0(%rsp),%r14d + leal -1894007588(%rbp,%r13,1),%r13d + xorl %r11d,%ebx + roll $5,%ecx + addl %eax,%r13d + roll $1,%r14d + andl %edi,%ebx + addl %ecx,%r13d + roll $30,%edi + addl %ebx,%r13d + xorl 36(%rsp),%edx + movl %r11d,%eax + movl %r14d,32(%rsp) + movl %r11d,%ebx + xorl 44(%rsp),%edx + andl %edi,%eax + movl %r13d,%ecx + xorl 4(%rsp),%edx + leal -1894007588(%r14,%r12,1),%r12d + xorl %edi,%ebx + roll $5,%ecx + addl %eax,%r12d + roll $1,%edx + andl %esi,%ebx + addl %ecx,%r12d + roll $30,%esi + addl %ebx,%r12d + xorl 40(%rsp),%ebp + movl %edi,%eax + movl %edx,36(%rsp) + movl %edi,%ebx + xorl 48(%rsp),%ebp + andl %esi,%eax + movl %r12d,%ecx + xorl 8(%rsp),%ebp + leal -1894007588(%rdx,%r11,1),%r11d + xorl %esi,%ebx + roll $5,%ecx + addl %eax,%r11d + roll $1,%ebp + andl %r13d,%ebx + addl %ecx,%r11d + roll $30,%r13d + addl %ebx,%r11d + xorl 44(%rsp),%r14d + movl %esi,%eax + movl %ebp,40(%rsp) + movl %esi,%ebx + xorl 52(%rsp),%r14d + andl %r13d,%eax + movl %r11d,%ecx + xorl 12(%rsp),%r14d + leal -1894007588(%rbp,%rdi,1),%edi + xorl %r13d,%ebx + roll $5,%ecx + addl %eax,%edi + roll $1,%r14d + andl %r12d,%ebx + addl %ecx,%edi + roll $30,%r12d + addl %ebx,%edi + xorl 48(%rsp),%edx + movl %r13d,%eax + movl %r14d,44(%rsp) + movl %r13d,%ebx + xorl 56(%rsp),%edx + andl %r12d,%eax + movl %edi,%ecx + xorl 16(%rsp),%edx + leal -1894007588(%r14,%rsi,1),%esi + xorl %r12d,%ebx + roll $5,%ecx + addl %eax,%esi + roll $1,%edx + andl %r11d,%ebx + addl %ecx,%esi + roll $30,%r11d + addl %ebx,%esi + xorl 52(%rsp),%ebp + movl %edi,%eax + movl %edx,48(%rsp) + movl %esi,%ecx + xorl 60(%rsp),%ebp + xorl %r12d,%eax + roll $5,%ecx + xorl 20(%rsp),%ebp + leal -899497514(%rdx,%r13,1),%r13d + xorl %r11d,%eax + addl %ecx,%r13d + roll $30,%edi + addl %eax,%r13d + roll $1,%ebp + xorl 56(%rsp),%r14d + movl %esi,%eax + movl %ebp,52(%rsp) + movl %r13d,%ecx + xorl 0(%rsp),%r14d + xorl %r11d,%eax + roll $5,%ecx + xorl 24(%rsp),%r14d + leal -899497514(%rbp,%r12,1),%r12d + xorl %edi,%eax + addl %ecx,%r12d + roll $30,%esi + addl %eax,%r12d + roll $1,%r14d + xorl 60(%rsp),%edx + movl %r13d,%eax + movl %r14d,56(%rsp) + movl %r12d,%ecx + xorl 4(%rsp),%edx + xorl %edi,%eax + roll $5,%ecx + xorl 28(%rsp),%edx + leal -899497514(%r14,%r11,1),%r11d + xorl %esi,%eax + addl %ecx,%r11d + roll $30,%r13d + addl %eax,%r11d + roll $1,%edx + xorl 0(%rsp),%ebp + movl %r12d,%eax + movl %edx,60(%rsp) + movl %r11d,%ecx + xorl 8(%rsp),%ebp + xorl %esi,%eax + roll $5,%ecx + xorl 32(%rsp),%ebp + leal -899497514(%rdx,%rdi,1),%edi + xorl %r13d,%eax + addl %ecx,%edi + roll $30,%r12d + addl %eax,%edi + roll $1,%ebp + xorl 4(%rsp),%r14d + movl %r11d,%eax + movl %ebp,0(%rsp) + movl %edi,%ecx + xorl 12(%rsp),%r14d + xorl %r13d,%eax + roll $5,%ecx + xorl 36(%rsp),%r14d + leal -899497514(%rbp,%rsi,1),%esi + xorl %r12d,%eax + addl %ecx,%esi + roll $30,%r11d + addl %eax,%esi + roll $1,%r14d + xorl 8(%rsp),%edx + movl %edi,%eax + movl %r14d,4(%rsp) + movl %esi,%ecx + xorl 16(%rsp),%edx + xorl %r12d,%eax + roll $5,%ecx + xorl 40(%rsp),%edx + leal -899497514(%r14,%r13,1),%r13d + xorl %r11d,%eax + addl %ecx,%r13d + roll $30,%edi + addl %eax,%r13d + roll $1,%edx + xorl 12(%rsp),%ebp + movl %esi,%eax + movl %edx,8(%rsp) + movl %r13d,%ecx + xorl 20(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + xorl 44(%rsp),%ebp + leal -899497514(%rdx,%r12,1),%r12d + xorl %edi,%eax + addl %ecx,%r12d + roll $30,%esi + addl %eax,%r12d + roll $1,%ebp + xorl 16(%rsp),%r14d + movl %r13d,%eax + movl %ebp,12(%rsp) + movl %r12d,%ecx + xorl 24(%rsp),%r14d + xorl %edi,%eax + roll $5,%ecx + xorl 48(%rsp),%r14d + leal -899497514(%rbp,%r11,1),%r11d + xorl %esi,%eax + addl %ecx,%r11d + roll $30,%r13d + addl %eax,%r11d + roll $1,%r14d + xorl 20(%rsp),%edx + movl %r12d,%eax + movl %r14d,16(%rsp) + movl %r11d,%ecx + xorl 28(%rsp),%edx + xorl %esi,%eax + roll $5,%ecx + xorl 52(%rsp),%edx + leal -899497514(%r14,%rdi,1),%edi + xorl %r13d,%eax + addl %ecx,%edi + roll $30,%r12d + addl %eax,%edi + roll $1,%edx + xorl 24(%rsp),%ebp + movl %r11d,%eax + movl %edx,20(%rsp) + movl %edi,%ecx + xorl 32(%rsp),%ebp + xorl %r13d,%eax + roll $5,%ecx + xorl 56(%rsp),%ebp + leal -899497514(%rdx,%rsi,1),%esi + xorl %r12d,%eax + addl %ecx,%esi + roll $30,%r11d + addl %eax,%esi + roll $1,%ebp + xorl 28(%rsp),%r14d + movl %edi,%eax + movl %ebp,24(%rsp) + movl %esi,%ecx + xorl 36(%rsp),%r14d + xorl %r12d,%eax + roll $5,%ecx + xorl 60(%rsp),%r14d + leal -899497514(%rbp,%r13,1),%r13d + xorl %r11d,%eax + addl %ecx,%r13d + roll $30,%edi + addl %eax,%r13d + roll $1,%r14d + xorl 32(%rsp),%edx + movl %esi,%eax + movl %r14d,28(%rsp) + movl %r13d,%ecx + xorl 40(%rsp),%edx + xorl %r11d,%eax + roll $5,%ecx + xorl 0(%rsp),%edx + leal -899497514(%r14,%r12,1),%r12d + xorl %edi,%eax + addl %ecx,%r12d + roll $30,%esi + addl %eax,%r12d + roll $1,%edx + xorl 36(%rsp),%ebp + movl %r13d,%eax + + movl %r12d,%ecx + xorl 44(%rsp),%ebp + xorl %edi,%eax + roll $5,%ecx + xorl 4(%rsp),%ebp + leal -899497514(%rdx,%r11,1),%r11d + xorl %esi,%eax + addl %ecx,%r11d + roll $30,%r13d + addl %eax,%r11d + roll $1,%ebp + xorl 40(%rsp),%r14d + movl %r12d,%eax + + movl %r11d,%ecx + xorl 48(%rsp),%r14d + xorl %esi,%eax + roll $5,%ecx + xorl 8(%rsp),%r14d + leal -899497514(%rbp,%rdi,1),%edi + xorl %r13d,%eax + addl %ecx,%edi + roll $30,%r12d + addl %eax,%edi + roll $1,%r14d + xorl 44(%rsp),%edx + movl %r11d,%eax + + movl %edi,%ecx + xorl 52(%rsp),%edx + xorl %r13d,%eax + roll $5,%ecx + xorl 12(%rsp),%edx + leal -899497514(%r14,%rsi,1),%esi + xorl %r12d,%eax + addl %ecx,%esi + roll $30,%r11d + addl %eax,%esi + roll $1,%edx + xorl 48(%rsp),%ebp + movl %edi,%eax + + movl %esi,%ecx + xorl 56(%rsp),%ebp + xorl %r12d,%eax + roll $5,%ecx + xorl 16(%rsp),%ebp + leal -899497514(%rdx,%r13,1),%r13d + xorl %r11d,%eax + addl %ecx,%r13d + roll $30,%edi + addl %eax,%r13d + roll $1,%ebp + xorl 52(%rsp),%r14d + movl %esi,%eax + + movl %r13d,%ecx + xorl 60(%rsp),%r14d + xorl %r11d,%eax + roll $5,%ecx + xorl 20(%rsp),%r14d + leal -899497514(%rbp,%r12,1),%r12d + xorl %edi,%eax + addl %ecx,%r12d + roll $30,%esi + addl %eax,%r12d + roll $1,%r14d + xorl 56(%rsp),%edx + movl %r13d,%eax + + movl %r12d,%ecx + xorl 0(%rsp),%edx + xorl %edi,%eax + roll $5,%ecx + xorl 24(%rsp),%edx + leal -899497514(%r14,%r11,1),%r11d + xorl %esi,%eax + addl %ecx,%r11d + roll $30,%r13d + addl %eax,%r11d + roll $1,%edx + xorl 60(%rsp),%ebp + movl %r12d,%eax + + movl %r11d,%ecx + xorl 4(%rsp),%ebp + xorl %esi,%eax + roll $5,%ecx + xorl 28(%rsp),%ebp + leal -899497514(%rdx,%rdi,1),%edi + xorl %r13d,%eax + addl %ecx,%edi + roll $30,%r12d + addl %eax,%edi + roll $1,%ebp + movl %r11d,%eax + movl %edi,%ecx + xorl %r13d,%eax + leal -899497514(%rbp,%rsi,1),%esi + roll $5,%ecx + xorl %r12d,%eax + addl %ecx,%esi + roll $30,%r11d + addl %eax,%esi + addl 0(%r8),%esi + addl 4(%r8),%edi + addl 8(%r8),%r11d + addl 12(%r8),%r12d + addl 16(%r8),%r13d + movl %esi,0(%r8) + movl %edi,4(%r8) + movl %r11d,8(%r8) + movl %r12d,12(%r8) + movl %r13d,16(%r8) + + subq $1,%r10 + leaq 64(%r9),%r9 + jnz L$loop + + movq 64(%rsp),%rsi + movq -40(%rsi),%r14 + movq -32(%rsi),%r13 + movq -24(%rsi),%r12 + movq -16(%rsi),%rbp + movq -8(%rsi),%rbx + leaq (%rsi),%rsp +L$epilogue: + .byte 0xf3,0xc3 + + +.p2align 4 +sha1_block_data_order_ssse3: +_ssse3_shortcut: + movq %rsp,%rax + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + leaq -64(%rsp),%rsp + movq %rax,%r14 + andq $-64,%rsp + movq %rdi,%r8 + movq %rsi,%r9 + movq %rdx,%r10 + + shlq $6,%r10 + addq %r9,%r10 + leaq K_XX_XX+64(%rip),%r11 + + movl 0(%r8),%eax + movl 4(%r8),%ebx + movl 8(%r8),%ecx + movl 12(%r8),%edx + movl %ebx,%esi + movl 16(%r8),%ebp + movl %ecx,%edi + xorl %edx,%edi + andl %edi,%esi + + movdqa 64(%r11),%xmm6 + movdqa -64(%r11),%xmm9 + movdqu 0(%r9),%xmm0 + movdqu 16(%r9),%xmm1 + movdqu 32(%r9),%xmm2 + movdqu 48(%r9),%xmm3 +.byte 102,15,56,0,198 +.byte 102,15,56,0,206 +.byte 102,15,56,0,214 + addq $64,%r9 + paddd %xmm9,%xmm0 +.byte 102,15,56,0,222 + paddd %xmm9,%xmm1 + paddd %xmm9,%xmm2 + movdqa %xmm0,0(%rsp) + psubd %xmm9,%xmm0 + movdqa %xmm1,16(%rsp) + psubd %xmm9,%xmm1 + movdqa %xmm2,32(%rsp) + psubd %xmm9,%xmm2 + jmp L$oop_ssse3 +.p2align 4 +L$oop_ssse3: + rorl $2,%ebx + pshufd $238,%xmm0,%xmm4 + xorl %edx,%esi + movdqa %xmm3,%xmm8 + paddd %xmm3,%xmm9 + movl %eax,%edi + addl 0(%rsp),%ebp + punpcklqdq %xmm1,%xmm4 + xorl %ecx,%ebx + roll $5,%eax + addl %esi,%ebp + psrldq $4,%xmm8 + andl %ebx,%edi + xorl %ecx,%ebx + pxor %xmm0,%xmm4 + addl %eax,%ebp + rorl $7,%eax + pxor %xmm2,%xmm8 + xorl %ecx,%edi + movl %ebp,%esi + addl 4(%rsp),%edx + pxor %xmm8,%xmm4 + xorl %ebx,%eax + roll $5,%ebp + movdqa %xmm9,48(%rsp) + addl %edi,%edx + andl %eax,%esi + movdqa %xmm4,%xmm10 + xorl %ebx,%eax + addl %ebp,%edx + rorl $7,%ebp + movdqa %xmm4,%xmm8 + xorl %ebx,%esi + pslldq $12,%xmm10 + paddd %xmm4,%xmm4 + movl %edx,%edi + addl 8(%rsp),%ecx + psrld $31,%xmm8 + xorl %eax,%ebp + roll $5,%edx + addl %esi,%ecx + movdqa %xmm10,%xmm9 + andl %ebp,%edi + xorl %eax,%ebp + psrld $30,%xmm10 + addl %edx,%ecx + rorl $7,%edx + por %xmm8,%xmm4 + xorl %eax,%edi + movl %ecx,%esi + addl 12(%rsp),%ebx + pslld $2,%xmm9 + pxor %xmm10,%xmm4 + xorl %ebp,%edx + movdqa -64(%r11),%xmm10 + roll $5,%ecx + addl %edi,%ebx + andl %edx,%esi + pxor %xmm9,%xmm4 + xorl %ebp,%edx + addl %ecx,%ebx + rorl $7,%ecx + pshufd $238,%xmm1,%xmm5 + xorl %ebp,%esi + movdqa %xmm4,%xmm9 + paddd %xmm4,%xmm10 + movl %ebx,%edi + addl 16(%rsp),%eax + punpcklqdq %xmm2,%xmm5 + xorl %edx,%ecx + roll $5,%ebx + addl %esi,%eax + psrldq $4,%xmm9 + andl %ecx,%edi + xorl %edx,%ecx + pxor %xmm1,%xmm5 + addl %ebx,%eax + rorl $7,%ebx + pxor %xmm3,%xmm9 + xorl %edx,%edi + movl %eax,%esi + addl 20(%rsp),%ebp + pxor %xmm9,%xmm5 + xorl %ecx,%ebx + roll $5,%eax + movdqa %xmm10,0(%rsp) + addl %edi,%ebp + andl %ebx,%esi + movdqa %xmm5,%xmm8 + xorl %ecx,%ebx + addl %eax,%ebp + rorl $7,%eax + movdqa %xmm5,%xmm9 + xorl %ecx,%esi + pslldq $12,%xmm8 + paddd %xmm5,%xmm5 + movl %ebp,%edi + addl 24(%rsp),%edx + psrld $31,%xmm9 + xorl %ebx,%eax + roll $5,%ebp + addl %esi,%edx + movdqa %xmm8,%xmm10 + andl %eax,%edi + xorl %ebx,%eax + psrld $30,%xmm8 + addl %ebp,%edx + rorl $7,%ebp + por %xmm9,%xmm5 + xorl %ebx,%edi + movl %edx,%esi + addl 28(%rsp),%ecx + pslld $2,%xmm10 + pxor %xmm8,%xmm5 + xorl %eax,%ebp + movdqa -32(%r11),%xmm8 + roll $5,%edx + addl %edi,%ecx + andl %ebp,%esi + pxor %xmm10,%xmm5 + xorl %eax,%ebp + addl %edx,%ecx + rorl $7,%edx + pshufd $238,%xmm2,%xmm6 + xorl %eax,%esi + movdqa %xmm5,%xmm10 + paddd %xmm5,%xmm8 + movl %ecx,%edi + addl 32(%rsp),%ebx + punpcklqdq %xmm3,%xmm6 + xorl %ebp,%edx + roll $5,%ecx + addl %esi,%ebx + psrldq $4,%xmm10 + andl %edx,%edi + xorl %ebp,%edx + pxor %xmm2,%xmm6 + addl %ecx,%ebx + rorl $7,%ecx + pxor %xmm4,%xmm10 + xorl %ebp,%edi + movl %ebx,%esi + addl 36(%rsp),%eax + pxor %xmm10,%xmm6 + xorl %edx,%ecx + roll $5,%ebx + movdqa %xmm8,16(%rsp) + addl %edi,%eax + andl %ecx,%esi + movdqa %xmm6,%xmm9 + xorl %edx,%ecx + addl %ebx,%eax + rorl $7,%ebx + movdqa %xmm6,%xmm10 + xorl %edx,%esi + pslldq $12,%xmm9 + paddd %xmm6,%xmm6 + movl %eax,%edi + addl 40(%rsp),%ebp + psrld $31,%xmm10 + xorl %ecx,%ebx + roll $5,%eax + addl %esi,%ebp + movdqa %xmm9,%xmm8 + andl %ebx,%edi + xorl %ecx,%ebx + psrld $30,%xmm9 + addl %eax,%ebp + rorl $7,%eax + por %xmm10,%xmm6 + xorl %ecx,%edi + movl %ebp,%esi + addl 44(%rsp),%edx + pslld $2,%xmm8 + pxor %xmm9,%xmm6 + xorl %ebx,%eax + movdqa -32(%r11),%xmm9 + roll $5,%ebp + addl %edi,%edx + andl %eax,%esi + pxor %xmm8,%xmm6 + xorl %ebx,%eax + addl %ebp,%edx + rorl $7,%ebp + pshufd $238,%xmm3,%xmm7 + xorl %ebx,%esi + movdqa %xmm6,%xmm8 + paddd %xmm6,%xmm9 + movl %edx,%edi + addl 48(%rsp),%ecx + punpcklqdq %xmm4,%xmm7 + xorl %eax,%ebp + roll $5,%edx + addl %esi,%ecx + psrldq $4,%xmm8 + andl %ebp,%edi + xorl %eax,%ebp + pxor %xmm3,%xmm7 + addl %edx,%ecx + rorl $7,%edx + pxor %xmm5,%xmm8 + xorl %eax,%edi + movl %ecx,%esi + addl 52(%rsp),%ebx + pxor %xmm8,%xmm7 + xorl %ebp,%edx + roll $5,%ecx + movdqa %xmm9,32(%rsp) + addl %edi,%ebx + andl %edx,%esi + movdqa %xmm7,%xmm10 + xorl %ebp,%edx + addl %ecx,%ebx + rorl $7,%ecx + movdqa %xmm7,%xmm8 + xorl %ebp,%esi + pslldq $12,%xmm10 + paddd %xmm7,%xmm7 + movl %ebx,%edi + addl 56(%rsp),%eax + psrld $31,%xmm8 + xorl %edx,%ecx + roll $5,%ebx + addl %esi,%eax + movdqa %xmm10,%xmm9 + andl %ecx,%edi + xorl %edx,%ecx + psrld $30,%xmm10 + addl %ebx,%eax + rorl $7,%ebx + por %xmm8,%xmm7 + xorl %edx,%edi + movl %eax,%esi + addl 60(%rsp),%ebp + pslld $2,%xmm9 + pxor %xmm10,%xmm7 + xorl %ecx,%ebx + movdqa -32(%r11),%xmm10 + roll $5,%eax + addl %edi,%ebp + andl %ebx,%esi + pxor %xmm9,%xmm7 + pshufd $238,%xmm6,%xmm9 + xorl %ecx,%ebx + addl %eax,%ebp + rorl $7,%eax + pxor %xmm4,%xmm0 + xorl %ecx,%esi + movl %ebp,%edi + addl 0(%rsp),%edx + punpcklqdq %xmm7,%xmm9 + xorl %ebx,%eax + roll $5,%ebp + pxor %xmm1,%xmm0 + addl %esi,%edx + andl %eax,%edi + movdqa %xmm10,%xmm8 + xorl %ebx,%eax + paddd %xmm7,%xmm10 + addl %ebp,%edx + pxor %xmm9,%xmm0 + rorl $7,%ebp + xorl %ebx,%edi + movl %edx,%esi + addl 4(%rsp),%ecx + movdqa %xmm0,%xmm9 + xorl %eax,%ebp + roll $5,%edx + movdqa %xmm10,48(%rsp) + addl %edi,%ecx + andl %ebp,%esi + xorl %eax,%ebp + pslld $2,%xmm0 + addl %edx,%ecx + rorl $7,%edx + psrld $30,%xmm9 + xorl %eax,%esi + movl %ecx,%edi + addl 8(%rsp),%ebx + por %xmm9,%xmm0 + xorl %ebp,%edx + roll $5,%ecx + pshufd $238,%xmm7,%xmm10 + addl %esi,%ebx + andl %edx,%edi + xorl %ebp,%edx + addl %ecx,%ebx + addl 12(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + xorl %edx,%esi + rorl $7,%ecx + addl %ebx,%eax + pxor %xmm5,%xmm1 + addl 16(%rsp),%ebp + xorl %ecx,%esi + punpcklqdq %xmm0,%xmm10 + movl %eax,%edi + roll $5,%eax + pxor %xmm2,%xmm1 + addl %esi,%ebp + xorl %ecx,%edi + movdqa %xmm8,%xmm9 + rorl $7,%ebx + paddd %xmm0,%xmm8 + addl %eax,%ebp + pxor %xmm10,%xmm1 + addl 20(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + movdqa %xmm1,%xmm10 + addl %edi,%edx + xorl %ebx,%esi + movdqa %xmm8,0(%rsp) + rorl $7,%eax + addl %ebp,%edx + addl 24(%rsp),%ecx + pslld $2,%xmm1 + xorl %eax,%esi + movl %edx,%edi + psrld $30,%xmm10 + roll $5,%edx + addl %esi,%ecx + xorl %eax,%edi + rorl $7,%ebp + por %xmm10,%xmm1 + addl %edx,%ecx + addl 28(%rsp),%ebx + pshufd $238,%xmm0,%xmm8 + xorl %ebp,%edi + movl %ecx,%esi + roll $5,%ecx + addl %edi,%ebx + xorl %ebp,%esi + rorl $7,%edx + addl %ecx,%ebx + pxor %xmm6,%xmm2 + addl 32(%rsp),%eax + xorl %edx,%esi + punpcklqdq %xmm1,%xmm8 + movl %ebx,%edi + roll $5,%ebx + pxor %xmm3,%xmm2 + addl %esi,%eax + xorl %edx,%edi + movdqa 0(%r11),%xmm10 + rorl $7,%ecx + paddd %xmm1,%xmm9 + addl %ebx,%eax + pxor %xmm8,%xmm2 + addl 36(%rsp),%ebp + xorl %ecx,%edi + movl %eax,%esi + roll $5,%eax + movdqa %xmm2,%xmm8 + addl %edi,%ebp + xorl %ecx,%esi + movdqa %xmm9,16(%rsp) + rorl $7,%ebx + addl %eax,%ebp + addl 40(%rsp),%edx + pslld $2,%xmm2 + xorl %ebx,%esi + movl %ebp,%edi + psrld $30,%xmm8 + roll $5,%ebp + addl %esi,%edx + xorl %ebx,%edi + rorl $7,%eax + por %xmm8,%xmm2 + addl %ebp,%edx + addl 44(%rsp),%ecx + pshufd $238,%xmm1,%xmm9 + xorl %eax,%edi + movl %edx,%esi + roll $5,%edx + addl %edi,%ecx + xorl %eax,%esi + rorl $7,%ebp + addl %edx,%ecx + pxor %xmm7,%xmm3 + addl 48(%rsp),%ebx + xorl %ebp,%esi + punpcklqdq %xmm2,%xmm9 + movl %ecx,%edi + roll $5,%ecx + pxor %xmm4,%xmm3 + addl %esi,%ebx + xorl %ebp,%edi + movdqa %xmm10,%xmm8 + rorl $7,%edx + paddd %xmm2,%xmm10 + addl %ecx,%ebx + pxor %xmm9,%xmm3 + addl 52(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + roll $5,%ebx + movdqa %xmm3,%xmm9 + addl %edi,%eax + xorl %edx,%esi + movdqa %xmm10,32(%rsp) + rorl $7,%ecx + addl %ebx,%eax + addl 56(%rsp),%ebp + pslld $2,%xmm3 + xorl %ecx,%esi + movl %eax,%edi + psrld $30,%xmm9 + roll $5,%eax + addl %esi,%ebp + xorl %ecx,%edi + rorl $7,%ebx + por %xmm9,%xmm3 + addl %eax,%ebp + addl 60(%rsp),%edx + pshufd $238,%xmm2,%xmm10 + xorl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + addl %edi,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %ebp,%edx + pxor %xmm0,%xmm4 + addl 0(%rsp),%ecx + xorl %eax,%esi + punpcklqdq %xmm3,%xmm10 + movl %edx,%edi + roll $5,%edx + pxor %xmm5,%xmm4 + addl %esi,%ecx + xorl %eax,%edi + movdqa %xmm8,%xmm9 + rorl $7,%ebp + paddd %xmm3,%xmm8 + addl %edx,%ecx + pxor %xmm10,%xmm4 + addl 4(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + roll $5,%ecx + movdqa %xmm4,%xmm10 + addl %edi,%ebx + xorl %ebp,%esi + movdqa %xmm8,48(%rsp) + rorl $7,%edx + addl %ecx,%ebx + addl 8(%rsp),%eax + pslld $2,%xmm4 + xorl %edx,%esi + movl %ebx,%edi + psrld $30,%xmm10 + roll $5,%ebx + addl %esi,%eax + xorl %edx,%edi + rorl $7,%ecx + por %xmm10,%xmm4 + addl %ebx,%eax + addl 12(%rsp),%ebp + pshufd $238,%xmm3,%xmm8 + xorl %ecx,%edi + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%ebp + pxor %xmm1,%xmm5 + addl 16(%rsp),%edx + xorl %ebx,%esi + punpcklqdq %xmm4,%xmm8 + movl %ebp,%edi + roll $5,%ebp + pxor %xmm6,%xmm5 + addl %esi,%edx + xorl %ebx,%edi + movdqa %xmm9,%xmm10 + rorl $7,%eax + paddd %xmm4,%xmm9 + addl %ebp,%edx + pxor %xmm8,%xmm5 + addl 20(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + roll $5,%edx + movdqa %xmm5,%xmm8 + addl %edi,%ecx + xorl %eax,%esi + movdqa %xmm9,0(%rsp) + rorl $7,%ebp + addl %edx,%ecx + addl 24(%rsp),%ebx + pslld $2,%xmm5 + xorl %ebp,%esi + movl %ecx,%edi + psrld $30,%xmm8 + roll $5,%ecx + addl %esi,%ebx + xorl %ebp,%edi + rorl $7,%edx + por %xmm8,%xmm5 + addl %ecx,%ebx + addl 28(%rsp),%eax + pshufd $238,%xmm4,%xmm9 + rorl $7,%ecx + movl %ebx,%esi + xorl %edx,%edi + roll $5,%ebx + addl %edi,%eax + xorl %ecx,%esi + xorl %edx,%ecx + addl %ebx,%eax + pxor %xmm2,%xmm6 + addl 32(%rsp),%ebp + andl %ecx,%esi + xorl %edx,%ecx + rorl $7,%ebx + punpcklqdq %xmm5,%xmm9 + movl %eax,%edi + xorl %ecx,%esi + pxor %xmm7,%xmm6 + roll $5,%eax + addl %esi,%ebp + movdqa %xmm10,%xmm8 + xorl %ebx,%edi + paddd %xmm5,%xmm10 + xorl %ecx,%ebx + pxor %xmm9,%xmm6 + addl %eax,%ebp + addl 36(%rsp),%edx + andl %ebx,%edi + xorl %ecx,%ebx + rorl $7,%eax + movdqa %xmm6,%xmm9 + movl %ebp,%esi + xorl %ebx,%edi + movdqa %xmm10,16(%rsp) + roll $5,%ebp + addl %edi,%edx + xorl %eax,%esi + pslld $2,%xmm6 + xorl %ebx,%eax + addl %ebp,%edx + psrld $30,%xmm9 + addl 40(%rsp),%ecx + andl %eax,%esi + xorl %ebx,%eax + por %xmm9,%xmm6 + rorl $7,%ebp + movl %edx,%edi + xorl %eax,%esi + roll $5,%edx + pshufd $238,%xmm5,%xmm10 + addl %esi,%ecx + xorl %ebp,%edi + xorl %eax,%ebp + addl %edx,%ecx + addl 44(%rsp),%ebx + andl %ebp,%edi + xorl %eax,%ebp + rorl $7,%edx + movl %ecx,%esi + xorl %ebp,%edi + roll $5,%ecx + addl %edi,%ebx + xorl %edx,%esi + xorl %ebp,%edx + addl %ecx,%ebx + pxor %xmm3,%xmm7 + addl 48(%rsp),%eax + andl %edx,%esi + xorl %ebp,%edx + rorl $7,%ecx + punpcklqdq %xmm6,%xmm10 + movl %ebx,%edi + xorl %edx,%esi + pxor %xmm0,%xmm7 + roll $5,%ebx + addl %esi,%eax + movdqa 32(%r11),%xmm9 + xorl %ecx,%edi + paddd %xmm6,%xmm8 + xorl %edx,%ecx + pxor %xmm10,%xmm7 + addl %ebx,%eax + addl 52(%rsp),%ebp + andl %ecx,%edi + xorl %edx,%ecx + rorl $7,%ebx + movdqa %xmm7,%xmm10 + movl %eax,%esi + xorl %ecx,%edi + movdqa %xmm8,32(%rsp) + roll $5,%eax + addl %edi,%ebp + xorl %ebx,%esi + pslld $2,%xmm7 + xorl %ecx,%ebx + addl %eax,%ebp + psrld $30,%xmm10 + addl 56(%rsp),%edx + andl %ebx,%esi + xorl %ecx,%ebx + por %xmm10,%xmm7 + rorl $7,%eax + movl %ebp,%edi + xorl %ebx,%esi + roll $5,%ebp + pshufd $238,%xmm6,%xmm8 + addl %esi,%edx + xorl %eax,%edi + xorl %ebx,%eax + addl %ebp,%edx + addl 60(%rsp),%ecx + andl %eax,%edi + xorl %ebx,%eax + rorl $7,%ebp + movl %edx,%esi + xorl %eax,%edi + roll $5,%edx + addl %edi,%ecx + xorl %ebp,%esi + xorl %eax,%ebp + addl %edx,%ecx + pxor %xmm4,%xmm0 + addl 0(%rsp),%ebx + andl %ebp,%esi + xorl %eax,%ebp + rorl $7,%edx + punpcklqdq %xmm7,%xmm8 + movl %ecx,%edi + xorl %ebp,%esi + pxor %xmm1,%xmm0 + roll $5,%ecx + addl %esi,%ebx + movdqa %xmm9,%xmm10 + xorl %edx,%edi + paddd %xmm7,%xmm9 + xorl %ebp,%edx + pxor %xmm8,%xmm0 + addl %ecx,%ebx + addl 4(%rsp),%eax + andl %edx,%edi + xorl %ebp,%edx + rorl $7,%ecx + movdqa %xmm0,%xmm8 + movl %ebx,%esi + xorl %edx,%edi + movdqa %xmm9,48(%rsp) + roll $5,%ebx + addl %edi,%eax + xorl %ecx,%esi + pslld $2,%xmm0 + xorl %edx,%ecx + addl %ebx,%eax + psrld $30,%xmm8 + addl 8(%rsp),%ebp + andl %ecx,%esi + xorl %edx,%ecx + por %xmm8,%xmm0 + rorl $7,%ebx + movl %eax,%edi + xorl %ecx,%esi + roll $5,%eax + pshufd $238,%xmm7,%xmm9 + addl %esi,%ebp + xorl %ebx,%edi + xorl %ecx,%ebx + addl %eax,%ebp + addl 12(%rsp),%edx + andl %ebx,%edi + xorl %ecx,%ebx + rorl $7,%eax + movl %ebp,%esi + xorl %ebx,%edi + roll $5,%ebp + addl %edi,%edx + xorl %eax,%esi + xorl %ebx,%eax + addl %ebp,%edx + pxor %xmm5,%xmm1 + addl 16(%rsp),%ecx + andl %eax,%esi + xorl %ebx,%eax + rorl $7,%ebp + punpcklqdq %xmm0,%xmm9 + movl %edx,%edi + xorl %eax,%esi + pxor %xmm2,%xmm1 + roll $5,%edx + addl %esi,%ecx + movdqa %xmm10,%xmm8 + xorl %ebp,%edi + paddd %xmm0,%xmm10 + xorl %eax,%ebp + pxor %xmm9,%xmm1 + addl %edx,%ecx + addl 20(%rsp),%ebx + andl %ebp,%edi + xorl %eax,%ebp + rorl $7,%edx + movdqa %xmm1,%xmm9 + movl %ecx,%esi + xorl %ebp,%edi + movdqa %xmm10,0(%rsp) + roll $5,%ecx + addl %edi,%ebx + xorl %edx,%esi + pslld $2,%xmm1 + xorl %ebp,%edx + addl %ecx,%ebx + psrld $30,%xmm9 + addl 24(%rsp),%eax + andl %edx,%esi + xorl %ebp,%edx + por %xmm9,%xmm1 + rorl $7,%ecx + movl %ebx,%edi + xorl %edx,%esi + roll $5,%ebx + pshufd $238,%xmm0,%xmm10 + addl %esi,%eax + xorl %ecx,%edi + xorl %edx,%ecx + addl %ebx,%eax + addl 28(%rsp),%ebp + andl %ecx,%edi + xorl %edx,%ecx + rorl $7,%ebx + movl %eax,%esi + xorl %ecx,%edi + roll $5,%eax + addl %edi,%ebp + xorl %ebx,%esi + xorl %ecx,%ebx + addl %eax,%ebp + pxor %xmm6,%xmm2 + addl 32(%rsp),%edx + andl %ebx,%esi + xorl %ecx,%ebx + rorl $7,%eax + punpcklqdq %xmm1,%xmm10 + movl %ebp,%edi + xorl %ebx,%esi + pxor %xmm3,%xmm2 + roll $5,%ebp + addl %esi,%edx + movdqa %xmm8,%xmm9 + xorl %eax,%edi + paddd %xmm1,%xmm8 + xorl %ebx,%eax + pxor %xmm10,%xmm2 + addl %ebp,%edx + addl 36(%rsp),%ecx + andl %eax,%edi + xorl %ebx,%eax + rorl $7,%ebp + movdqa %xmm2,%xmm10 + movl %edx,%esi + xorl %eax,%edi + movdqa %xmm8,16(%rsp) + roll $5,%edx + addl %edi,%ecx + xorl %ebp,%esi + pslld $2,%xmm2 + xorl %eax,%ebp + addl %edx,%ecx + psrld $30,%xmm10 + addl 40(%rsp),%ebx + andl %ebp,%esi + xorl %eax,%ebp + por %xmm10,%xmm2 + rorl $7,%edx + movl %ecx,%edi + xorl %ebp,%esi + roll $5,%ecx + pshufd $238,%xmm1,%xmm8 + addl %esi,%ebx + xorl %edx,%edi + xorl %ebp,%edx + addl %ecx,%ebx + addl 44(%rsp),%eax + andl %edx,%edi + xorl %ebp,%edx + rorl $7,%ecx + movl %ebx,%esi + xorl %edx,%edi + roll $5,%ebx + addl %edi,%eax + xorl %edx,%esi + addl %ebx,%eax + pxor %xmm7,%xmm3 + addl 48(%rsp),%ebp + xorl %ecx,%esi + punpcklqdq %xmm2,%xmm8 + movl %eax,%edi + roll $5,%eax + pxor %xmm4,%xmm3 + addl %esi,%ebp + xorl %ecx,%edi + movdqa %xmm9,%xmm10 + rorl $7,%ebx + paddd %xmm2,%xmm9 + addl %eax,%ebp + pxor %xmm8,%xmm3 + addl 52(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + movdqa %xmm3,%xmm8 + addl %edi,%edx + xorl %ebx,%esi + movdqa %xmm9,32(%rsp) + rorl $7,%eax + addl %ebp,%edx + addl 56(%rsp),%ecx + pslld $2,%xmm3 + xorl %eax,%esi + movl %edx,%edi + psrld $30,%xmm8 + roll $5,%edx + addl %esi,%ecx + xorl %eax,%edi + rorl $7,%ebp + por %xmm8,%xmm3 + addl %edx,%ecx + addl 60(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + roll $5,%ecx + addl %edi,%ebx + xorl %ebp,%esi + rorl $7,%edx + addl %ecx,%ebx + addl 0(%rsp),%eax + xorl %edx,%esi + movl %ebx,%edi + roll $5,%ebx + paddd %xmm3,%xmm10 + addl %esi,%eax + xorl %edx,%edi + movdqa %xmm10,48(%rsp) + rorl $7,%ecx + addl %ebx,%eax + addl 4(%rsp),%ebp + xorl %ecx,%edi + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%ebp + addl 8(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi + roll $5,%ebp + addl %esi,%edx + xorl %ebx,%edi + rorl $7,%eax + addl %ebp,%edx + addl 12(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + roll $5,%edx + addl %edi,%ecx + xorl %eax,%esi + rorl $7,%ebp + addl %edx,%ecx + cmpq %r10,%r9 + je L$done_ssse3 + movdqa 64(%r11),%xmm6 + movdqa -64(%r11),%xmm9 + movdqu 0(%r9),%xmm0 + movdqu 16(%r9),%xmm1 + movdqu 32(%r9),%xmm2 + movdqu 48(%r9),%xmm3 +.byte 102,15,56,0,198 + addq $64,%r9 + addl 16(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi +.byte 102,15,56,0,206 + roll $5,%ecx + addl %esi,%ebx + xorl %ebp,%edi + rorl $7,%edx + paddd %xmm9,%xmm0 + addl %ecx,%ebx + addl 20(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + movdqa %xmm0,0(%rsp) + roll $5,%ebx + addl %edi,%eax + xorl %edx,%esi + rorl $7,%ecx + psubd %xmm9,%xmm0 + addl %ebx,%eax + addl 24(%rsp),%ebp + xorl %ecx,%esi + movl %eax,%edi + roll $5,%eax + addl %esi,%ebp + xorl %ecx,%edi + rorl $7,%ebx + addl %eax,%ebp + addl 28(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + addl %edi,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %ebp,%edx + addl 32(%rsp),%ecx + xorl %eax,%esi + movl %edx,%edi +.byte 102,15,56,0,214 + roll $5,%edx + addl %esi,%ecx + xorl %eax,%edi + rorl $7,%ebp + paddd %xmm9,%xmm1 + addl %edx,%ecx + addl 36(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + movdqa %xmm1,16(%rsp) + roll $5,%ecx + addl %edi,%ebx + xorl %ebp,%esi + rorl $7,%edx + psubd %xmm9,%xmm1 + addl %ecx,%ebx + addl 40(%rsp),%eax + xorl %edx,%esi + movl %ebx,%edi + roll $5,%ebx + addl %esi,%eax + xorl %edx,%edi + rorl $7,%ecx + addl %ebx,%eax + addl 44(%rsp),%ebp + xorl %ecx,%edi + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%ebp + addl 48(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi +.byte 102,15,56,0,222 + roll $5,%ebp + addl %esi,%edx + xorl %ebx,%edi + rorl $7,%eax + paddd %xmm9,%xmm2 + addl %ebp,%edx + addl 52(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + movdqa %xmm2,32(%rsp) + roll $5,%edx + addl %edi,%ecx + xorl %eax,%esi + rorl $7,%ebp + psubd %xmm9,%xmm2 + addl %edx,%ecx + addl 56(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + roll $5,%ecx + addl %esi,%ebx + xorl %ebp,%edi + rorl $7,%edx + addl %ecx,%ebx + addl 60(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + rorl $7,%ecx + addl %ebx,%eax + addl 0(%r8),%eax + addl 4(%r8),%esi + addl 8(%r8),%ecx + addl 12(%r8),%edx + movl %eax,0(%r8) + addl 16(%r8),%ebp + movl %esi,4(%r8) + movl %esi,%ebx + movl %ecx,8(%r8) + movl %ecx,%edi + movl %edx,12(%r8) + xorl %edx,%edi + movl %ebp,16(%r8) + andl %edi,%esi + jmp L$oop_ssse3 + +.p2align 4 +L$done_ssse3: + addl 16(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + roll $5,%ecx + addl %esi,%ebx + xorl %ebp,%edi + rorl $7,%edx + addl %ecx,%ebx + addl 20(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + xorl %edx,%esi + rorl $7,%ecx + addl %ebx,%eax + addl 24(%rsp),%ebp + xorl %ecx,%esi + movl %eax,%edi + roll $5,%eax + addl %esi,%ebp + xorl %ecx,%edi + rorl $7,%ebx + addl %eax,%ebp + addl 28(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + addl %edi,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %ebp,%edx + addl 32(%rsp),%ecx + xorl %eax,%esi + movl %edx,%edi + roll $5,%edx + addl %esi,%ecx + xorl %eax,%edi + rorl $7,%ebp + addl %edx,%ecx + addl 36(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + roll $5,%ecx + addl %edi,%ebx + xorl %ebp,%esi + rorl $7,%edx + addl %ecx,%ebx + addl 40(%rsp),%eax + xorl %edx,%esi + movl %ebx,%edi + roll $5,%ebx + addl %esi,%eax + xorl %edx,%edi + rorl $7,%ecx + addl %ebx,%eax + addl 44(%rsp),%ebp + xorl %ecx,%edi + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%ebp + addl 48(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi + roll $5,%ebp + addl %esi,%edx + xorl %ebx,%edi + rorl $7,%eax + addl %ebp,%edx + addl 52(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + roll $5,%edx + addl %edi,%ecx + xorl %eax,%esi + rorl $7,%ebp + addl %edx,%ecx + addl 56(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + roll $5,%ecx + addl %esi,%ebx + xorl %ebp,%edi + rorl $7,%edx + addl %ecx,%ebx + addl 60(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + rorl $7,%ecx + addl %ebx,%eax + addl 0(%r8),%eax + addl 4(%r8),%esi + addl 8(%r8),%ecx + movl %eax,0(%r8) + addl 12(%r8),%edx + movl %esi,4(%r8) + addl 16(%r8),%ebp + movl %ecx,8(%r8) + movl %edx,12(%r8) + movl %ebp,16(%r8) + leaq (%r14),%rsi + movq -40(%rsi),%r14 + movq -32(%rsi),%r13 + movq -24(%rsi),%r12 + movq -16(%rsi),%rbp + movq -8(%rsi),%rbx + leaq (%rsi),%rsp +L$epilogue_ssse3: + .byte 0xf3,0xc3 + +.p2align 6 +K_XX_XX: +.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 +.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 +.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 +.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 +.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc +.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc +.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 +.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.byte 0xf,0xe,0xd,0xc,0xb,0xa,0x9,0x8,0x7,0x6,0x5,0x4,0x3,0x2,0x1,0x0 +.byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.p2align 6 +#endif diff --git a/third_party/boringssl/mac-x86_64/crypto/sha/sha256-x86_64.S b/third_party/boringssl/mac-x86_64/crypto/sha/sha256-x86_64.S new file mode 100644 index 00000000000..da02d4c2dc9 --- /dev/null +++ b/third_party/boringssl/mac-x86_64/crypto/sha/sha256-x86_64.S @@ -0,0 +1,2843 @@ +#if defined(__x86_64__) +.text + + +.globl _sha256_block_data_order +.private_extern _sha256_block_data_order + +.p2align 4 +_sha256_block_data_order: + leaq _OPENSSL_ia32cap_P(%rip),%r11 + movl 0(%r11),%r9d + movl 4(%r11),%r10d + movl 8(%r11),%r11d + testl $512,%r10d + jnz L$ssse3_shortcut + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + movq %rsp,%r11 + shlq $4,%rdx + subq $64+32,%rsp + leaq (%rsi,%rdx,4),%rdx + andq $-64,%rsp + movq %rdi,64+0(%rsp) + movq %rsi,64+8(%rsp) + movq %rdx,64+16(%rsp) + movq %r11,64+24(%rsp) +L$prologue: + + movl 0(%rdi),%eax + movl 4(%rdi),%ebx + movl 8(%rdi),%ecx + movl 12(%rdi),%edx + movl 16(%rdi),%r8d + movl 20(%rdi),%r9d + movl 24(%rdi),%r10d + movl 28(%rdi),%r11d + jmp L$loop + +.p2align 4 +L$loop: + movl %ebx,%edi + leaq K256(%rip),%rbp + xorl %ecx,%edi + movl 0(%rsi),%r12d + movl %r8d,%r13d + movl %eax,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r9d,%r15d + + xorl %r8d,%r13d + rorl $9,%r14d + xorl %r10d,%r15d + + movl %r12d,0(%rsp) + xorl %eax,%r14d + andl %r8d,%r15d + + rorl $5,%r13d + addl %r11d,%r12d + xorl %r10d,%r15d + + rorl $11,%r14d + xorl %r8d,%r13d + addl %r15d,%r12d + + movl %eax,%r15d + addl (%rbp),%r12d + xorl %eax,%r14d + + xorl %ebx,%r15d + rorl $6,%r13d + movl %ebx,%r11d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r11d + addl %r12d,%edx + addl %r12d,%r11d + + leaq 4(%rbp),%rbp + addl %r14d,%r11d + movl 4(%rsi),%r12d + movl %edx,%r13d + movl %r11d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r8d,%edi + + xorl %edx,%r13d + rorl $9,%r14d + xorl %r9d,%edi + + movl %r12d,4(%rsp) + xorl %r11d,%r14d + andl %edx,%edi + + rorl $5,%r13d + addl %r10d,%r12d + xorl %r9d,%edi + + rorl $11,%r14d + xorl %edx,%r13d + addl %edi,%r12d + + movl %r11d,%edi + addl (%rbp),%r12d + xorl %r11d,%r14d + + xorl %eax,%edi + rorl $6,%r13d + movl %eax,%r10d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r10d + addl %r12d,%ecx + addl %r12d,%r10d + + leaq 4(%rbp),%rbp + addl %r14d,%r10d + movl 8(%rsi),%r12d + movl %ecx,%r13d + movl %r10d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %edx,%r15d + + xorl %ecx,%r13d + rorl $9,%r14d + xorl %r8d,%r15d + + movl %r12d,8(%rsp) + xorl %r10d,%r14d + andl %ecx,%r15d + + rorl $5,%r13d + addl %r9d,%r12d + xorl %r8d,%r15d + + rorl $11,%r14d + xorl %ecx,%r13d + addl %r15d,%r12d + + movl %r10d,%r15d + addl (%rbp),%r12d + xorl %r10d,%r14d + + xorl %r11d,%r15d + rorl $6,%r13d + movl %r11d,%r9d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r9d + addl %r12d,%ebx + addl %r12d,%r9d + + leaq 4(%rbp),%rbp + addl %r14d,%r9d + movl 12(%rsi),%r12d + movl %ebx,%r13d + movl %r9d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %ecx,%edi + + xorl %ebx,%r13d + rorl $9,%r14d + xorl %edx,%edi + + movl %r12d,12(%rsp) + xorl %r9d,%r14d + andl %ebx,%edi + + rorl $5,%r13d + addl %r8d,%r12d + xorl %edx,%edi + + rorl $11,%r14d + xorl %ebx,%r13d + addl %edi,%r12d + + movl %r9d,%edi + addl (%rbp),%r12d + xorl %r9d,%r14d + + xorl %r10d,%edi + rorl $6,%r13d + movl %r10d,%r8d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r8d + addl %r12d,%eax + addl %r12d,%r8d + + leaq 20(%rbp),%rbp + addl %r14d,%r8d + movl 16(%rsi),%r12d + movl %eax,%r13d + movl %r8d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %ebx,%r15d + + xorl %eax,%r13d + rorl $9,%r14d + xorl %ecx,%r15d + + movl %r12d,16(%rsp) + xorl %r8d,%r14d + andl %eax,%r15d + + rorl $5,%r13d + addl %edx,%r12d + xorl %ecx,%r15d + + rorl $11,%r14d + xorl %eax,%r13d + addl %r15d,%r12d + + movl %r8d,%r15d + addl (%rbp),%r12d + xorl %r8d,%r14d + + xorl %r9d,%r15d + rorl $6,%r13d + movl %r9d,%edx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%edx + addl %r12d,%r11d + addl %r12d,%edx + + leaq 4(%rbp),%rbp + addl %r14d,%edx + movl 20(%rsi),%r12d + movl %r11d,%r13d + movl %edx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %eax,%edi + + xorl %r11d,%r13d + rorl $9,%r14d + xorl %ebx,%edi + + movl %r12d,20(%rsp) + xorl %edx,%r14d + andl %r11d,%edi + + rorl $5,%r13d + addl %ecx,%r12d + xorl %ebx,%edi + + rorl $11,%r14d + xorl %r11d,%r13d + addl %edi,%r12d + + movl %edx,%edi + addl (%rbp),%r12d + xorl %edx,%r14d + + xorl %r8d,%edi + rorl $6,%r13d + movl %r8d,%ecx + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%ecx + addl %r12d,%r10d + addl %r12d,%ecx + + leaq 4(%rbp),%rbp + addl %r14d,%ecx + movl 24(%rsi),%r12d + movl %r10d,%r13d + movl %ecx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r11d,%r15d + + xorl %r10d,%r13d + rorl $9,%r14d + xorl %eax,%r15d + + movl %r12d,24(%rsp) + xorl %ecx,%r14d + andl %r10d,%r15d + + rorl $5,%r13d + addl %ebx,%r12d + xorl %eax,%r15d + + rorl $11,%r14d + xorl %r10d,%r13d + addl %r15d,%r12d + + movl %ecx,%r15d + addl (%rbp),%r12d + xorl %ecx,%r14d + + xorl %edx,%r15d + rorl $6,%r13d + movl %edx,%ebx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%ebx + addl %r12d,%r9d + addl %r12d,%ebx + + leaq 4(%rbp),%rbp + addl %r14d,%ebx + movl 28(%rsi),%r12d + movl %r9d,%r13d + movl %ebx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r10d,%edi + + xorl %r9d,%r13d + rorl $9,%r14d + xorl %r11d,%edi + + movl %r12d,28(%rsp) + xorl %ebx,%r14d + andl %r9d,%edi + + rorl $5,%r13d + addl %eax,%r12d + xorl %r11d,%edi + + rorl $11,%r14d + xorl %r9d,%r13d + addl %edi,%r12d + + movl %ebx,%edi + addl (%rbp),%r12d + xorl %ebx,%r14d + + xorl %ecx,%edi + rorl $6,%r13d + movl %ecx,%eax + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%eax + addl %r12d,%r8d + addl %r12d,%eax + + leaq 20(%rbp),%rbp + addl %r14d,%eax + movl 32(%rsi),%r12d + movl %r8d,%r13d + movl %eax,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r9d,%r15d + + xorl %r8d,%r13d + rorl $9,%r14d + xorl %r10d,%r15d + + movl %r12d,32(%rsp) + xorl %eax,%r14d + andl %r8d,%r15d + + rorl $5,%r13d + addl %r11d,%r12d + xorl %r10d,%r15d + + rorl $11,%r14d + xorl %r8d,%r13d + addl %r15d,%r12d + + movl %eax,%r15d + addl (%rbp),%r12d + xorl %eax,%r14d + + xorl %ebx,%r15d + rorl $6,%r13d + movl %ebx,%r11d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r11d + addl %r12d,%edx + addl %r12d,%r11d + + leaq 4(%rbp),%rbp + addl %r14d,%r11d + movl 36(%rsi),%r12d + movl %edx,%r13d + movl %r11d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r8d,%edi + + xorl %edx,%r13d + rorl $9,%r14d + xorl %r9d,%edi + + movl %r12d,36(%rsp) + xorl %r11d,%r14d + andl %edx,%edi + + rorl $5,%r13d + addl %r10d,%r12d + xorl %r9d,%edi + + rorl $11,%r14d + xorl %edx,%r13d + addl %edi,%r12d + + movl %r11d,%edi + addl (%rbp),%r12d + xorl %r11d,%r14d + + xorl %eax,%edi + rorl $6,%r13d + movl %eax,%r10d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r10d + addl %r12d,%ecx + addl %r12d,%r10d + + leaq 4(%rbp),%rbp + addl %r14d,%r10d + movl 40(%rsi),%r12d + movl %ecx,%r13d + movl %r10d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %edx,%r15d + + xorl %ecx,%r13d + rorl $9,%r14d + xorl %r8d,%r15d + + movl %r12d,40(%rsp) + xorl %r10d,%r14d + andl %ecx,%r15d + + rorl $5,%r13d + addl %r9d,%r12d + xorl %r8d,%r15d + + rorl $11,%r14d + xorl %ecx,%r13d + addl %r15d,%r12d + + movl %r10d,%r15d + addl (%rbp),%r12d + xorl %r10d,%r14d + + xorl %r11d,%r15d + rorl $6,%r13d + movl %r11d,%r9d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r9d + addl %r12d,%ebx + addl %r12d,%r9d + + leaq 4(%rbp),%rbp + addl %r14d,%r9d + movl 44(%rsi),%r12d + movl %ebx,%r13d + movl %r9d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %ecx,%edi + + xorl %ebx,%r13d + rorl $9,%r14d + xorl %edx,%edi + + movl %r12d,44(%rsp) + xorl %r9d,%r14d + andl %ebx,%edi + + rorl $5,%r13d + addl %r8d,%r12d + xorl %edx,%edi + + rorl $11,%r14d + xorl %ebx,%r13d + addl %edi,%r12d + + movl %r9d,%edi + addl (%rbp),%r12d + xorl %r9d,%r14d + + xorl %r10d,%edi + rorl $6,%r13d + movl %r10d,%r8d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r8d + addl %r12d,%eax + addl %r12d,%r8d + + leaq 20(%rbp),%rbp + addl %r14d,%r8d + movl 48(%rsi),%r12d + movl %eax,%r13d + movl %r8d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %ebx,%r15d + + xorl %eax,%r13d + rorl $9,%r14d + xorl %ecx,%r15d + + movl %r12d,48(%rsp) + xorl %r8d,%r14d + andl %eax,%r15d + + rorl $5,%r13d + addl %edx,%r12d + xorl %ecx,%r15d + + rorl $11,%r14d + xorl %eax,%r13d + addl %r15d,%r12d + + movl %r8d,%r15d + addl (%rbp),%r12d + xorl %r8d,%r14d + + xorl %r9d,%r15d + rorl $6,%r13d + movl %r9d,%edx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%edx + addl %r12d,%r11d + addl %r12d,%edx + + leaq 4(%rbp),%rbp + addl %r14d,%edx + movl 52(%rsi),%r12d + movl %r11d,%r13d + movl %edx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %eax,%edi + + xorl %r11d,%r13d + rorl $9,%r14d + xorl %ebx,%edi + + movl %r12d,52(%rsp) + xorl %edx,%r14d + andl %r11d,%edi + + rorl $5,%r13d + addl %ecx,%r12d + xorl %ebx,%edi + + rorl $11,%r14d + xorl %r11d,%r13d + addl %edi,%r12d + + movl %edx,%edi + addl (%rbp),%r12d + xorl %edx,%r14d + + xorl %r8d,%edi + rorl $6,%r13d + movl %r8d,%ecx + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%ecx + addl %r12d,%r10d + addl %r12d,%ecx + + leaq 4(%rbp),%rbp + addl %r14d,%ecx + movl 56(%rsi),%r12d + movl %r10d,%r13d + movl %ecx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r11d,%r15d + + xorl %r10d,%r13d + rorl $9,%r14d + xorl %eax,%r15d + + movl %r12d,56(%rsp) + xorl %ecx,%r14d + andl %r10d,%r15d + + rorl $5,%r13d + addl %ebx,%r12d + xorl %eax,%r15d + + rorl $11,%r14d + xorl %r10d,%r13d + addl %r15d,%r12d + + movl %ecx,%r15d + addl (%rbp),%r12d + xorl %ecx,%r14d + + xorl %edx,%r15d + rorl $6,%r13d + movl %edx,%ebx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%ebx + addl %r12d,%r9d + addl %r12d,%ebx + + leaq 4(%rbp),%rbp + addl %r14d,%ebx + movl 60(%rsi),%r12d + movl %r9d,%r13d + movl %ebx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r10d,%edi + + xorl %r9d,%r13d + rorl $9,%r14d + xorl %r11d,%edi + + movl %r12d,60(%rsp) + xorl %ebx,%r14d + andl %r9d,%edi + + rorl $5,%r13d + addl %eax,%r12d + xorl %r11d,%edi + + rorl $11,%r14d + xorl %r9d,%r13d + addl %edi,%r12d + + movl %ebx,%edi + addl (%rbp),%r12d + xorl %ebx,%r14d + + xorl %ecx,%edi + rorl $6,%r13d + movl %ecx,%eax + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%eax + addl %r12d,%r8d + addl %r12d,%eax + + leaq 20(%rbp),%rbp + jmp L$rounds_16_xx +.p2align 4 +L$rounds_16_xx: + movl 4(%rsp),%r13d + movl 56(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%eax + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 36(%rsp),%r12d + + addl 0(%rsp),%r12d + movl %r8d,%r13d + addl %r15d,%r12d + movl %eax,%r14d + rorl $14,%r13d + movl %r9d,%r15d + + xorl %r8d,%r13d + rorl $9,%r14d + xorl %r10d,%r15d + + movl %r12d,0(%rsp) + xorl %eax,%r14d + andl %r8d,%r15d + + rorl $5,%r13d + addl %r11d,%r12d + xorl %r10d,%r15d + + rorl $11,%r14d + xorl %r8d,%r13d + addl %r15d,%r12d + + movl %eax,%r15d + addl (%rbp),%r12d + xorl %eax,%r14d + + xorl %ebx,%r15d + rorl $6,%r13d + movl %ebx,%r11d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r11d + addl %r12d,%edx + addl %r12d,%r11d + + leaq 4(%rbp),%rbp + movl 8(%rsp),%r13d + movl 60(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r11d + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 40(%rsp),%r12d + + addl 4(%rsp),%r12d + movl %edx,%r13d + addl %edi,%r12d + movl %r11d,%r14d + rorl $14,%r13d + movl %r8d,%edi + + xorl %edx,%r13d + rorl $9,%r14d + xorl %r9d,%edi + + movl %r12d,4(%rsp) + xorl %r11d,%r14d + andl %edx,%edi + + rorl $5,%r13d + addl %r10d,%r12d + xorl %r9d,%edi + + rorl $11,%r14d + xorl %edx,%r13d + addl %edi,%r12d + + movl %r11d,%edi + addl (%rbp),%r12d + xorl %r11d,%r14d + + xorl %eax,%edi + rorl $6,%r13d + movl %eax,%r10d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r10d + addl %r12d,%ecx + addl %r12d,%r10d + + leaq 4(%rbp),%rbp + movl 12(%rsp),%r13d + movl 0(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r10d + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 44(%rsp),%r12d + + addl 8(%rsp),%r12d + movl %ecx,%r13d + addl %r15d,%r12d + movl %r10d,%r14d + rorl $14,%r13d + movl %edx,%r15d + + xorl %ecx,%r13d + rorl $9,%r14d + xorl %r8d,%r15d + + movl %r12d,8(%rsp) + xorl %r10d,%r14d + andl %ecx,%r15d + + rorl $5,%r13d + addl %r9d,%r12d + xorl %r8d,%r15d + + rorl $11,%r14d + xorl %ecx,%r13d + addl %r15d,%r12d + + movl %r10d,%r15d + addl (%rbp),%r12d + xorl %r10d,%r14d + + xorl %r11d,%r15d + rorl $6,%r13d + movl %r11d,%r9d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r9d + addl %r12d,%ebx + addl %r12d,%r9d + + leaq 4(%rbp),%rbp + movl 16(%rsp),%r13d + movl 4(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r9d + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 48(%rsp),%r12d + + addl 12(%rsp),%r12d + movl %ebx,%r13d + addl %edi,%r12d + movl %r9d,%r14d + rorl $14,%r13d + movl %ecx,%edi + + xorl %ebx,%r13d + rorl $9,%r14d + xorl %edx,%edi + + movl %r12d,12(%rsp) + xorl %r9d,%r14d + andl %ebx,%edi + + rorl $5,%r13d + addl %r8d,%r12d + xorl %edx,%edi + + rorl $11,%r14d + xorl %ebx,%r13d + addl %edi,%r12d + + movl %r9d,%edi + addl (%rbp),%r12d + xorl %r9d,%r14d + + xorl %r10d,%edi + rorl $6,%r13d + movl %r10d,%r8d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r8d + addl %r12d,%eax + addl %r12d,%r8d + + leaq 20(%rbp),%rbp + movl 20(%rsp),%r13d + movl 8(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r8d + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 52(%rsp),%r12d + + addl 16(%rsp),%r12d + movl %eax,%r13d + addl %r15d,%r12d + movl %r8d,%r14d + rorl $14,%r13d + movl %ebx,%r15d + + xorl %eax,%r13d + rorl $9,%r14d + xorl %ecx,%r15d + + movl %r12d,16(%rsp) + xorl %r8d,%r14d + andl %eax,%r15d + + rorl $5,%r13d + addl %edx,%r12d + xorl %ecx,%r15d + + rorl $11,%r14d + xorl %eax,%r13d + addl %r15d,%r12d + + movl %r8d,%r15d + addl (%rbp),%r12d + xorl %r8d,%r14d + + xorl %r9d,%r15d + rorl $6,%r13d + movl %r9d,%edx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%edx + addl %r12d,%r11d + addl %r12d,%edx + + leaq 4(%rbp),%rbp + movl 24(%rsp),%r13d + movl 12(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%edx + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 56(%rsp),%r12d + + addl 20(%rsp),%r12d + movl %r11d,%r13d + addl %edi,%r12d + movl %edx,%r14d + rorl $14,%r13d + movl %eax,%edi + + xorl %r11d,%r13d + rorl $9,%r14d + xorl %ebx,%edi + + movl %r12d,20(%rsp) + xorl %edx,%r14d + andl %r11d,%edi + + rorl $5,%r13d + addl %ecx,%r12d + xorl %ebx,%edi + + rorl $11,%r14d + xorl %r11d,%r13d + addl %edi,%r12d + + movl %edx,%edi + addl (%rbp),%r12d + xorl %edx,%r14d + + xorl %r8d,%edi + rorl $6,%r13d + movl %r8d,%ecx + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%ecx + addl %r12d,%r10d + addl %r12d,%ecx + + leaq 4(%rbp),%rbp + movl 28(%rsp),%r13d + movl 16(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%ecx + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 60(%rsp),%r12d + + addl 24(%rsp),%r12d + movl %r10d,%r13d + addl %r15d,%r12d + movl %ecx,%r14d + rorl $14,%r13d + movl %r11d,%r15d + + xorl %r10d,%r13d + rorl $9,%r14d + xorl %eax,%r15d + + movl %r12d,24(%rsp) + xorl %ecx,%r14d + andl %r10d,%r15d + + rorl $5,%r13d + addl %ebx,%r12d + xorl %eax,%r15d + + rorl $11,%r14d + xorl %r10d,%r13d + addl %r15d,%r12d + + movl %ecx,%r15d + addl (%rbp),%r12d + xorl %ecx,%r14d + + xorl %edx,%r15d + rorl $6,%r13d + movl %edx,%ebx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%ebx + addl %r12d,%r9d + addl %r12d,%ebx + + leaq 4(%rbp),%rbp + movl 32(%rsp),%r13d + movl 20(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%ebx + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 0(%rsp),%r12d + + addl 28(%rsp),%r12d + movl %r9d,%r13d + addl %edi,%r12d + movl %ebx,%r14d + rorl $14,%r13d + movl %r10d,%edi + + xorl %r9d,%r13d + rorl $9,%r14d + xorl %r11d,%edi + + movl %r12d,28(%rsp) + xorl %ebx,%r14d + andl %r9d,%edi + + rorl $5,%r13d + addl %eax,%r12d + xorl %r11d,%edi + + rorl $11,%r14d + xorl %r9d,%r13d + addl %edi,%r12d + + movl %ebx,%edi + addl (%rbp),%r12d + xorl %ebx,%r14d + + xorl %ecx,%edi + rorl $6,%r13d + movl %ecx,%eax + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%eax + addl %r12d,%r8d + addl %r12d,%eax + + leaq 20(%rbp),%rbp + movl 36(%rsp),%r13d + movl 24(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%eax + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 4(%rsp),%r12d + + addl 32(%rsp),%r12d + movl %r8d,%r13d + addl %r15d,%r12d + movl %eax,%r14d + rorl $14,%r13d + movl %r9d,%r15d + + xorl %r8d,%r13d + rorl $9,%r14d + xorl %r10d,%r15d + + movl %r12d,32(%rsp) + xorl %eax,%r14d + andl %r8d,%r15d + + rorl $5,%r13d + addl %r11d,%r12d + xorl %r10d,%r15d + + rorl $11,%r14d + xorl %r8d,%r13d + addl %r15d,%r12d + + movl %eax,%r15d + addl (%rbp),%r12d + xorl %eax,%r14d + + xorl %ebx,%r15d + rorl $6,%r13d + movl %ebx,%r11d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r11d + addl %r12d,%edx + addl %r12d,%r11d + + leaq 4(%rbp),%rbp + movl 40(%rsp),%r13d + movl 28(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r11d + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 8(%rsp),%r12d + + addl 36(%rsp),%r12d + movl %edx,%r13d + addl %edi,%r12d + movl %r11d,%r14d + rorl $14,%r13d + movl %r8d,%edi + + xorl %edx,%r13d + rorl $9,%r14d + xorl %r9d,%edi + + movl %r12d,36(%rsp) + xorl %r11d,%r14d + andl %edx,%edi + + rorl $5,%r13d + addl %r10d,%r12d + xorl %r9d,%edi + + rorl $11,%r14d + xorl %edx,%r13d + addl %edi,%r12d + + movl %r11d,%edi + addl (%rbp),%r12d + xorl %r11d,%r14d + + xorl %eax,%edi + rorl $6,%r13d + movl %eax,%r10d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r10d + addl %r12d,%ecx + addl %r12d,%r10d + + leaq 4(%rbp),%rbp + movl 44(%rsp),%r13d + movl 32(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r10d + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 12(%rsp),%r12d + + addl 40(%rsp),%r12d + movl %ecx,%r13d + addl %r15d,%r12d + movl %r10d,%r14d + rorl $14,%r13d + movl %edx,%r15d + + xorl %ecx,%r13d + rorl $9,%r14d + xorl %r8d,%r15d + + movl %r12d,40(%rsp) + xorl %r10d,%r14d + andl %ecx,%r15d + + rorl $5,%r13d + addl %r9d,%r12d + xorl %r8d,%r15d + + rorl $11,%r14d + xorl %ecx,%r13d + addl %r15d,%r12d + + movl %r10d,%r15d + addl (%rbp),%r12d + xorl %r10d,%r14d + + xorl %r11d,%r15d + rorl $6,%r13d + movl %r11d,%r9d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r9d + addl %r12d,%ebx + addl %r12d,%r9d + + leaq 4(%rbp),%rbp + movl 48(%rsp),%r13d + movl 36(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r9d + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 16(%rsp),%r12d + + addl 44(%rsp),%r12d + movl %ebx,%r13d + addl %edi,%r12d + movl %r9d,%r14d + rorl $14,%r13d + movl %ecx,%edi + + xorl %ebx,%r13d + rorl $9,%r14d + xorl %edx,%edi + + movl %r12d,44(%rsp) + xorl %r9d,%r14d + andl %ebx,%edi + + rorl $5,%r13d + addl %r8d,%r12d + xorl %edx,%edi + + rorl $11,%r14d + xorl %ebx,%r13d + addl %edi,%r12d + + movl %r9d,%edi + addl (%rbp),%r12d + xorl %r9d,%r14d + + xorl %r10d,%edi + rorl $6,%r13d + movl %r10d,%r8d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r8d + addl %r12d,%eax + addl %r12d,%r8d + + leaq 20(%rbp),%rbp + movl 52(%rsp),%r13d + movl 40(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r8d + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 20(%rsp),%r12d + + addl 48(%rsp),%r12d + movl %eax,%r13d + addl %r15d,%r12d + movl %r8d,%r14d + rorl $14,%r13d + movl %ebx,%r15d + + xorl %eax,%r13d + rorl $9,%r14d + xorl %ecx,%r15d + + movl %r12d,48(%rsp) + xorl %r8d,%r14d + andl %eax,%r15d + + rorl $5,%r13d + addl %edx,%r12d + xorl %ecx,%r15d + + rorl $11,%r14d + xorl %eax,%r13d + addl %r15d,%r12d + + movl %r8d,%r15d + addl (%rbp),%r12d + xorl %r8d,%r14d + + xorl %r9d,%r15d + rorl $6,%r13d + movl %r9d,%edx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%edx + addl %r12d,%r11d + addl %r12d,%edx + + leaq 4(%rbp),%rbp + movl 56(%rsp),%r13d + movl 44(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%edx + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 24(%rsp),%r12d + + addl 52(%rsp),%r12d + movl %r11d,%r13d + addl %edi,%r12d + movl %edx,%r14d + rorl $14,%r13d + movl %eax,%edi + + xorl %r11d,%r13d + rorl $9,%r14d + xorl %ebx,%edi + + movl %r12d,52(%rsp) + xorl %edx,%r14d + andl %r11d,%edi + + rorl $5,%r13d + addl %ecx,%r12d + xorl %ebx,%edi + + rorl $11,%r14d + xorl %r11d,%r13d + addl %edi,%r12d + + movl %edx,%edi + addl (%rbp),%r12d + xorl %edx,%r14d + + xorl %r8d,%edi + rorl $6,%r13d + movl %r8d,%ecx + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%ecx + addl %r12d,%r10d + addl %r12d,%ecx + + leaq 4(%rbp),%rbp + movl 60(%rsp),%r13d + movl 48(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%ecx + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 28(%rsp),%r12d + + addl 56(%rsp),%r12d + movl %r10d,%r13d + addl %r15d,%r12d + movl %ecx,%r14d + rorl $14,%r13d + movl %r11d,%r15d + + xorl %r10d,%r13d + rorl $9,%r14d + xorl %eax,%r15d + + movl %r12d,56(%rsp) + xorl %ecx,%r14d + andl %r10d,%r15d + + rorl $5,%r13d + addl %ebx,%r12d + xorl %eax,%r15d + + rorl $11,%r14d + xorl %r10d,%r13d + addl %r15d,%r12d + + movl %ecx,%r15d + addl (%rbp),%r12d + xorl %ecx,%r14d + + xorl %edx,%r15d + rorl $6,%r13d + movl %edx,%ebx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%ebx + addl %r12d,%r9d + addl %r12d,%ebx + + leaq 4(%rbp),%rbp + movl 0(%rsp),%r13d + movl 52(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%ebx + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 32(%rsp),%r12d + + addl 60(%rsp),%r12d + movl %r9d,%r13d + addl %edi,%r12d + movl %ebx,%r14d + rorl $14,%r13d + movl %r10d,%edi + + xorl %r9d,%r13d + rorl $9,%r14d + xorl %r11d,%edi + + movl %r12d,60(%rsp) + xorl %ebx,%r14d + andl %r9d,%edi + + rorl $5,%r13d + addl %eax,%r12d + xorl %r11d,%edi + + rorl $11,%r14d + xorl %r9d,%r13d + addl %edi,%r12d + + movl %ebx,%edi + addl (%rbp),%r12d + xorl %ebx,%r14d + + xorl %ecx,%edi + rorl $6,%r13d + movl %ecx,%eax + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%eax + addl %r12d,%r8d + addl %r12d,%eax + + leaq 20(%rbp),%rbp + cmpb $0,3(%rbp) + jnz L$rounds_16_xx + + movq 64+0(%rsp),%rdi + addl %r14d,%eax + leaq 64(%rsi),%rsi + + addl 0(%rdi),%eax + addl 4(%rdi),%ebx + addl 8(%rdi),%ecx + addl 12(%rdi),%edx + addl 16(%rdi),%r8d + addl 20(%rdi),%r9d + addl 24(%rdi),%r10d + addl 28(%rdi),%r11d + + cmpq 64+16(%rsp),%rsi + + movl %eax,0(%rdi) + movl %ebx,4(%rdi) + movl %ecx,8(%rdi) + movl %edx,12(%rdi) + movl %r8d,16(%rdi) + movl %r9d,20(%rdi) + movl %r10d,24(%rdi) + movl %r11d,28(%rdi) + jb L$loop + + movq 64+24(%rsp),%rsi + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +L$epilogue: + .byte 0xf3,0xc3 + +.p2align 6 + +K256: +.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc +.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc +.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 +.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 +.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 + +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.long 0x03020100,0x0b0a0908,0xffffffff,0xffffffff +.long 0x03020100,0x0b0a0908,0xffffffff,0xffffffff +.long 0xffffffff,0xffffffff,0x03020100,0x0b0a0908 +.long 0xffffffff,0xffffffff,0x03020100,0x0b0a0908 +.byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 + +.p2align 6 +sha256_block_data_order_ssse3: +L$ssse3_shortcut: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + movq %rsp,%r11 + shlq $4,%rdx + subq $96,%rsp + leaq (%rsi,%rdx,4),%rdx + andq $-64,%rsp + movq %rdi,64+0(%rsp) + movq %rsi,64+8(%rsp) + movq %rdx,64+16(%rsp) + movq %r11,64+24(%rsp) +L$prologue_ssse3: + + movl 0(%rdi),%eax + movl 4(%rdi),%ebx + movl 8(%rdi),%ecx + movl 12(%rdi),%edx + movl 16(%rdi),%r8d + movl 20(%rdi),%r9d + movl 24(%rdi),%r10d + movl 28(%rdi),%r11d + + + jmp L$loop_ssse3 +.p2align 4 +L$loop_ssse3: + movdqa K256+512(%rip),%xmm7 + movdqu 0(%rsi),%xmm0 + movdqu 16(%rsi),%xmm1 + movdqu 32(%rsi),%xmm2 +.byte 102,15,56,0,199 + movdqu 48(%rsi),%xmm3 + leaq K256(%rip),%rbp +.byte 102,15,56,0,207 + movdqa 0(%rbp),%xmm4 + movdqa 32(%rbp),%xmm5 +.byte 102,15,56,0,215 + paddd %xmm0,%xmm4 + movdqa 64(%rbp),%xmm6 +.byte 102,15,56,0,223 + movdqa 96(%rbp),%xmm7 + paddd %xmm1,%xmm5 + paddd %xmm2,%xmm6 + paddd %xmm3,%xmm7 + movdqa %xmm4,0(%rsp) + movl %eax,%r14d + movdqa %xmm5,16(%rsp) + movl %ebx,%edi + movdqa %xmm6,32(%rsp) + xorl %ecx,%edi + movdqa %xmm7,48(%rsp) + movl %r8d,%r13d + jmp L$ssse3_00_47 + +.p2align 4 +L$ssse3_00_47: + subq $-128,%rbp + rorl $14,%r13d + movdqa %xmm1,%xmm4 + movl %r14d,%eax + movl %r9d,%r12d + movdqa %xmm3,%xmm7 + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + rorl $5,%r13d + xorl %eax,%r14d +.byte 102,15,58,15,224,4 + andl %r8d,%r12d + xorl %r8d,%r13d +.byte 102,15,58,15,250,4 + addl 0(%rsp),%r11d + movl %eax,%r15d + xorl %r10d,%r12d + rorl $11,%r14d + movdqa %xmm4,%xmm5 + xorl %ebx,%r15d + addl %r12d,%r11d + movdqa %xmm4,%xmm6 + rorl $6,%r13d + andl %r15d,%edi + psrld $3,%xmm4 + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + paddd %xmm7,%xmm0 + rorl $2,%r14d + addl %r11d,%edx + psrld $7,%xmm6 + addl %edi,%r11d + movl %edx,%r13d + pshufd $250,%xmm3,%xmm7 + addl %r11d,%r14d + rorl $14,%r13d + pslld $14,%xmm5 + movl %r14d,%r11d + movl %r8d,%r12d + pxor %xmm6,%xmm4 + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r12d + rorl $5,%r13d + psrld $11,%xmm6 + xorl %r11d,%r14d + pxor %xmm5,%xmm4 + andl %edx,%r12d + xorl %edx,%r13d + pslld $11,%xmm5 + addl 4(%rsp),%r10d + movl %r11d,%edi + pxor %xmm6,%xmm4 + xorl %r9d,%r12d + rorl $11,%r14d + movdqa %xmm7,%xmm6 + xorl %eax,%edi + addl %r12d,%r10d + pxor %xmm5,%xmm4 + rorl $6,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + psrld $10,%xmm7 + addl %r13d,%r10d + xorl %eax,%r15d + paddd %xmm4,%xmm0 + rorl $2,%r14d + addl %r10d,%ecx + psrlq $17,%xmm6 + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + pxor %xmm6,%xmm7 + rorl $14,%r13d + movl %r14d,%r10d + movl %edx,%r12d + rorl $9,%r14d + psrlq $2,%xmm6 + xorl %ecx,%r13d + xorl %r8d,%r12d + pxor %xmm6,%xmm7 + rorl $5,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + pshufd $128,%xmm7,%xmm7 + xorl %ecx,%r13d + addl 8(%rsp),%r9d + movl %r10d,%r15d + psrldq $8,%xmm7 + xorl %r8d,%r12d + rorl $11,%r14d + xorl %r11d,%r15d + addl %r12d,%r9d + rorl $6,%r13d + paddd %xmm7,%xmm0 + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + pshufd $80,%xmm0,%xmm7 + xorl %r11d,%edi + rorl $2,%r14d + addl %r9d,%ebx + movdqa %xmm7,%xmm6 + addl %edi,%r9d + movl %ebx,%r13d + psrld $10,%xmm7 + addl %r9d,%r14d + rorl $14,%r13d + psrlq $17,%xmm6 + movl %r14d,%r9d + movl %ecx,%r12d + pxor %xmm6,%xmm7 + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + rorl $5,%r13d + xorl %r9d,%r14d + psrlq $2,%xmm6 + andl %ebx,%r12d + xorl %ebx,%r13d + addl 12(%rsp),%r8d + pxor %xmm6,%xmm7 + movl %r9d,%edi + xorl %edx,%r12d + rorl $11,%r14d + pshufd $8,%xmm7,%xmm7 + xorl %r10d,%edi + addl %r12d,%r8d + movdqa 0(%rbp),%xmm6 + rorl $6,%r13d + andl %edi,%r15d + pslldq $8,%xmm7 + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + paddd %xmm7,%xmm0 + rorl $2,%r14d + addl %r8d,%eax + addl %r15d,%r8d + paddd %xmm0,%xmm6 + movl %eax,%r13d + addl %r8d,%r14d + movdqa %xmm6,0(%rsp) + rorl $14,%r13d + movdqa %xmm2,%xmm4 + movl %r14d,%r8d + movl %ebx,%r12d + movdqa %xmm0,%xmm7 + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + rorl $5,%r13d + xorl %r8d,%r14d +.byte 102,15,58,15,225,4 + andl %eax,%r12d + xorl %eax,%r13d +.byte 102,15,58,15,251,4 + addl 16(%rsp),%edx + movl %r8d,%r15d + xorl %ecx,%r12d + rorl $11,%r14d + movdqa %xmm4,%xmm5 + xorl %r9d,%r15d + addl %r12d,%edx + movdqa %xmm4,%xmm6 + rorl $6,%r13d + andl %r15d,%edi + psrld $3,%xmm4 + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + paddd %xmm7,%xmm1 + rorl $2,%r14d + addl %edx,%r11d + psrld $7,%xmm6 + addl %edi,%edx + movl %r11d,%r13d + pshufd $250,%xmm0,%xmm7 + addl %edx,%r14d + rorl $14,%r13d + pslld $14,%xmm5 + movl %r14d,%edx + movl %eax,%r12d + pxor %xmm6,%xmm4 + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r12d + rorl $5,%r13d + psrld $11,%xmm6 + xorl %edx,%r14d + pxor %xmm5,%xmm4 + andl %r11d,%r12d + xorl %r11d,%r13d + pslld $11,%xmm5 + addl 20(%rsp),%ecx + movl %edx,%edi + pxor %xmm6,%xmm4 + xorl %ebx,%r12d + rorl $11,%r14d + movdqa %xmm7,%xmm6 + xorl %r8d,%edi + addl %r12d,%ecx + pxor %xmm5,%xmm4 + rorl $6,%r13d + andl %edi,%r15d + xorl %edx,%r14d + psrld $10,%xmm7 + addl %r13d,%ecx + xorl %r8d,%r15d + paddd %xmm4,%xmm1 + rorl $2,%r14d + addl %ecx,%r10d + psrlq $17,%xmm6 + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + pxor %xmm6,%xmm7 + rorl $14,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + rorl $9,%r14d + psrlq $2,%xmm6 + xorl %r10d,%r13d + xorl %eax,%r12d + pxor %xmm6,%xmm7 + rorl $5,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + pshufd $128,%xmm7,%xmm7 + xorl %r10d,%r13d + addl 24(%rsp),%ebx + movl %ecx,%r15d + psrldq $8,%xmm7 + xorl %eax,%r12d + rorl $11,%r14d + xorl %edx,%r15d + addl %r12d,%ebx + rorl $6,%r13d + paddd %xmm7,%xmm1 + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + pshufd $80,%xmm1,%xmm7 + xorl %edx,%edi + rorl $2,%r14d + addl %ebx,%r9d + movdqa %xmm7,%xmm6 + addl %edi,%ebx + movl %r9d,%r13d + psrld $10,%xmm7 + addl %ebx,%r14d + rorl $14,%r13d + psrlq $17,%xmm6 + movl %r14d,%ebx + movl %r10d,%r12d + pxor %xmm6,%xmm7 + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + rorl $5,%r13d + xorl %ebx,%r14d + psrlq $2,%xmm6 + andl %r9d,%r12d + xorl %r9d,%r13d + addl 28(%rsp),%eax + pxor %xmm6,%xmm7 + movl %ebx,%edi + xorl %r11d,%r12d + rorl $11,%r14d + pshufd $8,%xmm7,%xmm7 + xorl %ecx,%edi + addl %r12d,%eax + movdqa 32(%rbp),%xmm6 + rorl $6,%r13d + andl %edi,%r15d + pslldq $8,%xmm7 + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + paddd %xmm7,%xmm1 + rorl $2,%r14d + addl %eax,%r8d + addl %r15d,%eax + paddd %xmm1,%xmm6 + movl %r8d,%r13d + addl %eax,%r14d + movdqa %xmm6,16(%rsp) + rorl $14,%r13d + movdqa %xmm3,%xmm4 + movl %r14d,%eax + movl %r9d,%r12d + movdqa %xmm1,%xmm7 + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + rorl $5,%r13d + xorl %eax,%r14d +.byte 102,15,58,15,226,4 + andl %r8d,%r12d + xorl %r8d,%r13d +.byte 102,15,58,15,248,4 + addl 32(%rsp),%r11d + movl %eax,%r15d + xorl %r10d,%r12d + rorl $11,%r14d + movdqa %xmm4,%xmm5 + xorl %ebx,%r15d + addl %r12d,%r11d + movdqa %xmm4,%xmm6 + rorl $6,%r13d + andl %r15d,%edi + psrld $3,%xmm4 + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + paddd %xmm7,%xmm2 + rorl $2,%r14d + addl %r11d,%edx + psrld $7,%xmm6 + addl %edi,%r11d + movl %edx,%r13d + pshufd $250,%xmm1,%xmm7 + addl %r11d,%r14d + rorl $14,%r13d + pslld $14,%xmm5 + movl %r14d,%r11d + movl %r8d,%r12d + pxor %xmm6,%xmm4 + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r12d + rorl $5,%r13d + psrld $11,%xmm6 + xorl %r11d,%r14d + pxor %xmm5,%xmm4 + andl %edx,%r12d + xorl %edx,%r13d + pslld $11,%xmm5 + addl 36(%rsp),%r10d + movl %r11d,%edi + pxor %xmm6,%xmm4 + xorl %r9d,%r12d + rorl $11,%r14d + movdqa %xmm7,%xmm6 + xorl %eax,%edi + addl %r12d,%r10d + pxor %xmm5,%xmm4 + rorl $6,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + psrld $10,%xmm7 + addl %r13d,%r10d + xorl %eax,%r15d + paddd %xmm4,%xmm2 + rorl $2,%r14d + addl %r10d,%ecx + psrlq $17,%xmm6 + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + pxor %xmm6,%xmm7 + rorl $14,%r13d + movl %r14d,%r10d + movl %edx,%r12d + rorl $9,%r14d + psrlq $2,%xmm6 + xorl %ecx,%r13d + xorl %r8d,%r12d + pxor %xmm6,%xmm7 + rorl $5,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + pshufd $128,%xmm7,%xmm7 + xorl %ecx,%r13d + addl 40(%rsp),%r9d + movl %r10d,%r15d + psrldq $8,%xmm7 + xorl %r8d,%r12d + rorl $11,%r14d + xorl %r11d,%r15d + addl %r12d,%r9d + rorl $6,%r13d + paddd %xmm7,%xmm2 + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + pshufd $80,%xmm2,%xmm7 + xorl %r11d,%edi + rorl $2,%r14d + addl %r9d,%ebx + movdqa %xmm7,%xmm6 + addl %edi,%r9d + movl %ebx,%r13d + psrld $10,%xmm7 + addl %r9d,%r14d + rorl $14,%r13d + psrlq $17,%xmm6 + movl %r14d,%r9d + movl %ecx,%r12d + pxor %xmm6,%xmm7 + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + rorl $5,%r13d + xorl %r9d,%r14d + psrlq $2,%xmm6 + andl %ebx,%r12d + xorl %ebx,%r13d + addl 44(%rsp),%r8d + pxor %xmm6,%xmm7 + movl %r9d,%edi + xorl %edx,%r12d + rorl $11,%r14d + pshufd $8,%xmm7,%xmm7 + xorl %r10d,%edi + addl %r12d,%r8d + movdqa 64(%rbp),%xmm6 + rorl $6,%r13d + andl %edi,%r15d + pslldq $8,%xmm7 + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + paddd %xmm7,%xmm2 + rorl $2,%r14d + addl %r8d,%eax + addl %r15d,%r8d + paddd %xmm2,%xmm6 + movl %eax,%r13d + addl %r8d,%r14d + movdqa %xmm6,32(%rsp) + rorl $14,%r13d + movdqa %xmm0,%xmm4 + movl %r14d,%r8d + movl %ebx,%r12d + movdqa %xmm2,%xmm7 + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + rorl $5,%r13d + xorl %r8d,%r14d +.byte 102,15,58,15,227,4 + andl %eax,%r12d + xorl %eax,%r13d +.byte 102,15,58,15,249,4 + addl 48(%rsp),%edx + movl %r8d,%r15d + xorl %ecx,%r12d + rorl $11,%r14d + movdqa %xmm4,%xmm5 + xorl %r9d,%r15d + addl %r12d,%edx + movdqa %xmm4,%xmm6 + rorl $6,%r13d + andl %r15d,%edi + psrld $3,%xmm4 + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + paddd %xmm7,%xmm3 + rorl $2,%r14d + addl %edx,%r11d + psrld $7,%xmm6 + addl %edi,%edx + movl %r11d,%r13d + pshufd $250,%xmm2,%xmm7 + addl %edx,%r14d + rorl $14,%r13d + pslld $14,%xmm5 + movl %r14d,%edx + movl %eax,%r12d + pxor %xmm6,%xmm4 + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r12d + rorl $5,%r13d + psrld $11,%xmm6 + xorl %edx,%r14d + pxor %xmm5,%xmm4 + andl %r11d,%r12d + xorl %r11d,%r13d + pslld $11,%xmm5 + addl 52(%rsp),%ecx + movl %edx,%edi + pxor %xmm6,%xmm4 + xorl %ebx,%r12d + rorl $11,%r14d + movdqa %xmm7,%xmm6 + xorl %r8d,%edi + addl %r12d,%ecx + pxor %xmm5,%xmm4 + rorl $6,%r13d + andl %edi,%r15d + xorl %edx,%r14d + psrld $10,%xmm7 + addl %r13d,%ecx + xorl %r8d,%r15d + paddd %xmm4,%xmm3 + rorl $2,%r14d + addl %ecx,%r10d + psrlq $17,%xmm6 + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + pxor %xmm6,%xmm7 + rorl $14,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + rorl $9,%r14d + psrlq $2,%xmm6 + xorl %r10d,%r13d + xorl %eax,%r12d + pxor %xmm6,%xmm7 + rorl $5,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + pshufd $128,%xmm7,%xmm7 + xorl %r10d,%r13d + addl 56(%rsp),%ebx + movl %ecx,%r15d + psrldq $8,%xmm7 + xorl %eax,%r12d + rorl $11,%r14d + xorl %edx,%r15d + addl %r12d,%ebx + rorl $6,%r13d + paddd %xmm7,%xmm3 + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + pshufd $80,%xmm3,%xmm7 + xorl %edx,%edi + rorl $2,%r14d + addl %ebx,%r9d + movdqa %xmm7,%xmm6 + addl %edi,%ebx + movl %r9d,%r13d + psrld $10,%xmm7 + addl %ebx,%r14d + rorl $14,%r13d + psrlq $17,%xmm6 + movl %r14d,%ebx + movl %r10d,%r12d + pxor %xmm6,%xmm7 + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + rorl $5,%r13d + xorl %ebx,%r14d + psrlq $2,%xmm6 + andl %r9d,%r12d + xorl %r9d,%r13d + addl 60(%rsp),%eax + pxor %xmm6,%xmm7 + movl %ebx,%edi + xorl %r11d,%r12d + rorl $11,%r14d + pshufd $8,%xmm7,%xmm7 + xorl %ecx,%edi + addl %r12d,%eax + movdqa 96(%rbp),%xmm6 + rorl $6,%r13d + andl %edi,%r15d + pslldq $8,%xmm7 + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + paddd %xmm7,%xmm3 + rorl $2,%r14d + addl %eax,%r8d + addl %r15d,%eax + paddd %xmm3,%xmm6 + movl %r8d,%r13d + addl %eax,%r14d + movdqa %xmm6,48(%rsp) + cmpb $0,131(%rbp) + jne L$ssse3_00_47 + rorl $14,%r13d + movl %r14d,%eax + movl %r9d,%r12d + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + rorl $5,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + xorl %r8d,%r13d + addl 0(%rsp),%r11d + movl %eax,%r15d + xorl %r10d,%r12d + rorl $11,%r14d + xorl %ebx,%r15d + addl %r12d,%r11d + rorl $6,%r13d + andl %r15d,%edi + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + rorl $2,%r14d + addl %r11d,%edx + addl %edi,%r11d + movl %edx,%r13d + addl %r11d,%r14d + rorl $14,%r13d + movl %r14d,%r11d + movl %r8d,%r12d + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r12d + rorl $5,%r13d + xorl %r11d,%r14d + andl %edx,%r12d + xorl %edx,%r13d + addl 4(%rsp),%r10d + movl %r11d,%edi + xorl %r9d,%r12d + rorl $11,%r14d + xorl %eax,%edi + addl %r12d,%r10d + rorl $6,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + addl %r13d,%r10d + xorl %eax,%r15d + rorl $2,%r14d + addl %r10d,%ecx + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + rorl $14,%r13d + movl %r14d,%r10d + movl %edx,%r12d + rorl $9,%r14d + xorl %ecx,%r13d + xorl %r8d,%r12d + rorl $5,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + xorl %ecx,%r13d + addl 8(%rsp),%r9d + movl %r10d,%r15d + xorl %r8d,%r12d + rorl $11,%r14d + xorl %r11d,%r15d + addl %r12d,%r9d + rorl $6,%r13d + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + xorl %r11d,%edi + rorl $2,%r14d + addl %r9d,%ebx + addl %edi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + rorl $14,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + rorl $5,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + xorl %ebx,%r13d + addl 12(%rsp),%r8d + movl %r9d,%edi + xorl %edx,%r12d + rorl $11,%r14d + xorl %r10d,%edi + addl %r12d,%r8d + rorl $6,%r13d + andl %edi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + rorl $2,%r14d + addl %r8d,%eax + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + rorl $14,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + rorl $5,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + xorl %eax,%r13d + addl 16(%rsp),%edx + movl %r8d,%r15d + xorl %ecx,%r12d + rorl $11,%r14d + xorl %r9d,%r15d + addl %r12d,%edx + rorl $6,%r13d + andl %r15d,%edi + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + rorl $2,%r14d + addl %edx,%r11d + addl %edi,%edx + movl %r11d,%r13d + addl %edx,%r14d + rorl $14,%r13d + movl %r14d,%edx + movl %eax,%r12d + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r12d + rorl $5,%r13d + xorl %edx,%r14d + andl %r11d,%r12d + xorl %r11d,%r13d + addl 20(%rsp),%ecx + movl %edx,%edi + xorl %ebx,%r12d + rorl $11,%r14d + xorl %r8d,%edi + addl %r12d,%ecx + rorl $6,%r13d + andl %edi,%r15d + xorl %edx,%r14d + addl %r13d,%ecx + xorl %r8d,%r15d + rorl $2,%r14d + addl %ecx,%r10d + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + rorl $14,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + rorl $9,%r14d + xorl %r10d,%r13d + xorl %eax,%r12d + rorl $5,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + xorl %r10d,%r13d + addl 24(%rsp),%ebx + movl %ecx,%r15d + xorl %eax,%r12d + rorl $11,%r14d + xorl %edx,%r15d + addl %r12d,%ebx + rorl $6,%r13d + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + xorl %edx,%edi + rorl $2,%r14d + addl %ebx,%r9d + addl %edi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + rorl $14,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + rorl $5,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + xorl %r9d,%r13d + addl 28(%rsp),%eax + movl %ebx,%edi + xorl %r11d,%r12d + rorl $11,%r14d + xorl %ecx,%edi + addl %r12d,%eax + rorl $6,%r13d + andl %edi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + rorl $2,%r14d + addl %eax,%r8d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + rorl $14,%r13d + movl %r14d,%eax + movl %r9d,%r12d + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + rorl $5,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + xorl %r8d,%r13d + addl 32(%rsp),%r11d + movl %eax,%r15d + xorl %r10d,%r12d + rorl $11,%r14d + xorl %ebx,%r15d + addl %r12d,%r11d + rorl $6,%r13d + andl %r15d,%edi + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + rorl $2,%r14d + addl %r11d,%edx + addl %edi,%r11d + movl %edx,%r13d + addl %r11d,%r14d + rorl $14,%r13d + movl %r14d,%r11d + movl %r8d,%r12d + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r12d + rorl $5,%r13d + xorl %r11d,%r14d + andl %edx,%r12d + xorl %edx,%r13d + addl 36(%rsp),%r10d + movl %r11d,%edi + xorl %r9d,%r12d + rorl $11,%r14d + xorl %eax,%edi + addl %r12d,%r10d + rorl $6,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + addl %r13d,%r10d + xorl %eax,%r15d + rorl $2,%r14d + addl %r10d,%ecx + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + rorl $14,%r13d + movl %r14d,%r10d + movl %edx,%r12d + rorl $9,%r14d + xorl %ecx,%r13d + xorl %r8d,%r12d + rorl $5,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + xorl %ecx,%r13d + addl 40(%rsp),%r9d + movl %r10d,%r15d + xorl %r8d,%r12d + rorl $11,%r14d + xorl %r11d,%r15d + addl %r12d,%r9d + rorl $6,%r13d + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + xorl %r11d,%edi + rorl $2,%r14d + addl %r9d,%ebx + addl %edi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + rorl $14,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + rorl $5,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + xorl %ebx,%r13d + addl 44(%rsp),%r8d + movl %r9d,%edi + xorl %edx,%r12d + rorl $11,%r14d + xorl %r10d,%edi + addl %r12d,%r8d + rorl $6,%r13d + andl %edi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + rorl $2,%r14d + addl %r8d,%eax + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + rorl $14,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + rorl $5,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + xorl %eax,%r13d + addl 48(%rsp),%edx + movl %r8d,%r15d + xorl %ecx,%r12d + rorl $11,%r14d + xorl %r9d,%r15d + addl %r12d,%edx + rorl $6,%r13d + andl %r15d,%edi + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + rorl $2,%r14d + addl %edx,%r11d + addl %edi,%edx + movl %r11d,%r13d + addl %edx,%r14d + rorl $14,%r13d + movl %r14d,%edx + movl %eax,%r12d + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r12d + rorl $5,%r13d + xorl %edx,%r14d + andl %r11d,%r12d + xorl %r11d,%r13d + addl 52(%rsp),%ecx + movl %edx,%edi + xorl %ebx,%r12d + rorl $11,%r14d + xorl %r8d,%edi + addl %r12d,%ecx + rorl $6,%r13d + andl %edi,%r15d + xorl %edx,%r14d + addl %r13d,%ecx + xorl %r8d,%r15d + rorl $2,%r14d + addl %ecx,%r10d + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + rorl $14,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + rorl $9,%r14d + xorl %r10d,%r13d + xorl %eax,%r12d + rorl $5,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + xorl %r10d,%r13d + addl 56(%rsp),%ebx + movl %ecx,%r15d + xorl %eax,%r12d + rorl $11,%r14d + xorl %edx,%r15d + addl %r12d,%ebx + rorl $6,%r13d + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + xorl %edx,%edi + rorl $2,%r14d + addl %ebx,%r9d + addl %edi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + rorl $14,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + rorl $5,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + xorl %r9d,%r13d + addl 60(%rsp),%eax + movl %ebx,%edi + xorl %r11d,%r12d + rorl $11,%r14d + xorl %ecx,%edi + addl %r12d,%eax + rorl $6,%r13d + andl %edi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + rorl $2,%r14d + addl %eax,%r8d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + movq 64+0(%rsp),%rdi + movl %r14d,%eax + + addl 0(%rdi),%eax + leaq 64(%rsi),%rsi + addl 4(%rdi),%ebx + addl 8(%rdi),%ecx + addl 12(%rdi),%edx + addl 16(%rdi),%r8d + addl 20(%rdi),%r9d + addl 24(%rdi),%r10d + addl 28(%rdi),%r11d + + cmpq 64+16(%rsp),%rsi + + movl %eax,0(%rdi) + movl %ebx,4(%rdi) + movl %ecx,8(%rdi) + movl %edx,12(%rdi) + movl %r8d,16(%rdi) + movl %r9d,20(%rdi) + movl %r10d,24(%rdi) + movl %r11d,28(%rdi) + jb L$loop_ssse3 + + movq 64+24(%rsp),%rsi + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +L$epilogue_ssse3: + .byte 0xf3,0xc3 + +#endif diff --git a/third_party/boringssl/mac-x86_64/crypto/sha/sha512-x86_64.S b/third_party/boringssl/mac-x86_64/crypto/sha/sha512-x86_64.S new file mode 100644 index 00000000000..2f5d912425c --- /dev/null +++ b/third_party/boringssl/mac-x86_64/crypto/sha/sha512-x86_64.S @@ -0,0 +1,1786 @@ +#if defined(__x86_64__) +.text + + +.globl _sha512_block_data_order +.private_extern _sha512_block_data_order + +.p2align 4 +_sha512_block_data_order: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + movq %rsp,%r11 + shlq $4,%rdx + subq $128+32,%rsp + leaq (%rsi,%rdx,8),%rdx + andq $-64,%rsp + movq %rdi,128+0(%rsp) + movq %rsi,128+8(%rsp) + movq %rdx,128+16(%rsp) + movq %r11,128+24(%rsp) +L$prologue: + + movq 0(%rdi),%rax + movq 8(%rdi),%rbx + movq 16(%rdi),%rcx + movq 24(%rdi),%rdx + movq 32(%rdi),%r8 + movq 40(%rdi),%r9 + movq 48(%rdi),%r10 + movq 56(%rdi),%r11 + jmp L$loop + +.p2align 4 +L$loop: + movq %rbx,%rdi + leaq K512(%rip),%rbp + xorq %rcx,%rdi + movq 0(%rsi),%r12 + movq %r8,%r13 + movq %rax,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r9,%r15 + + xorq %r8,%r13 + rorq $5,%r14 + xorq %r10,%r15 + + movq %r12,0(%rsp) + xorq %rax,%r14 + andq %r8,%r15 + + rorq $4,%r13 + addq %r11,%r12 + xorq %r10,%r15 + + rorq $6,%r14 + xorq %r8,%r13 + addq %r15,%r12 + + movq %rax,%r15 + addq (%rbp),%r12 + xorq %rax,%r14 + + xorq %rbx,%r15 + rorq $14,%r13 + movq %rbx,%r11 + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%r11 + addq %r12,%rdx + addq %r12,%r11 + + leaq 8(%rbp),%rbp + addq %r14,%r11 + movq 8(%rsi),%r12 + movq %rdx,%r13 + movq %r11,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r8,%rdi + + xorq %rdx,%r13 + rorq $5,%r14 + xorq %r9,%rdi + + movq %r12,8(%rsp) + xorq %r11,%r14 + andq %rdx,%rdi + + rorq $4,%r13 + addq %r10,%r12 + xorq %r9,%rdi + + rorq $6,%r14 + xorq %rdx,%r13 + addq %rdi,%r12 + + movq %r11,%rdi + addq (%rbp),%r12 + xorq %r11,%r14 + + xorq %rax,%rdi + rorq $14,%r13 + movq %rax,%r10 + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%r10 + addq %r12,%rcx + addq %r12,%r10 + + leaq 24(%rbp),%rbp + addq %r14,%r10 + movq 16(%rsi),%r12 + movq %rcx,%r13 + movq %r10,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rdx,%r15 + + xorq %rcx,%r13 + rorq $5,%r14 + xorq %r8,%r15 + + movq %r12,16(%rsp) + xorq %r10,%r14 + andq %rcx,%r15 + + rorq $4,%r13 + addq %r9,%r12 + xorq %r8,%r15 + + rorq $6,%r14 + xorq %rcx,%r13 + addq %r15,%r12 + + movq %r10,%r15 + addq (%rbp),%r12 + xorq %r10,%r14 + + xorq %r11,%r15 + rorq $14,%r13 + movq %r11,%r9 + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%r9 + addq %r12,%rbx + addq %r12,%r9 + + leaq 8(%rbp),%rbp + addq %r14,%r9 + movq 24(%rsi),%r12 + movq %rbx,%r13 + movq %r9,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rcx,%rdi + + xorq %rbx,%r13 + rorq $5,%r14 + xorq %rdx,%rdi + + movq %r12,24(%rsp) + xorq %r9,%r14 + andq %rbx,%rdi + + rorq $4,%r13 + addq %r8,%r12 + xorq %rdx,%rdi + + rorq $6,%r14 + xorq %rbx,%r13 + addq %rdi,%r12 + + movq %r9,%rdi + addq (%rbp),%r12 + xorq %r9,%r14 + + xorq %r10,%rdi + rorq $14,%r13 + movq %r10,%r8 + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%r8 + addq %r12,%rax + addq %r12,%r8 + + leaq 24(%rbp),%rbp + addq %r14,%r8 + movq 32(%rsi),%r12 + movq %rax,%r13 + movq %r8,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rbx,%r15 + + xorq %rax,%r13 + rorq $5,%r14 + xorq %rcx,%r15 + + movq %r12,32(%rsp) + xorq %r8,%r14 + andq %rax,%r15 + + rorq $4,%r13 + addq %rdx,%r12 + xorq %rcx,%r15 + + rorq $6,%r14 + xorq %rax,%r13 + addq %r15,%r12 + + movq %r8,%r15 + addq (%rbp),%r12 + xorq %r8,%r14 + + xorq %r9,%r15 + rorq $14,%r13 + movq %r9,%rdx + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%rdx + addq %r12,%r11 + addq %r12,%rdx + + leaq 8(%rbp),%rbp + addq %r14,%rdx + movq 40(%rsi),%r12 + movq %r11,%r13 + movq %rdx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rax,%rdi + + xorq %r11,%r13 + rorq $5,%r14 + xorq %rbx,%rdi + + movq %r12,40(%rsp) + xorq %rdx,%r14 + andq %r11,%rdi + + rorq $4,%r13 + addq %rcx,%r12 + xorq %rbx,%rdi + + rorq $6,%r14 + xorq %r11,%r13 + addq %rdi,%r12 + + movq %rdx,%rdi + addq (%rbp),%r12 + xorq %rdx,%r14 + + xorq %r8,%rdi + rorq $14,%r13 + movq %r8,%rcx + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%rcx + addq %r12,%r10 + addq %r12,%rcx + + leaq 24(%rbp),%rbp + addq %r14,%rcx + movq 48(%rsi),%r12 + movq %r10,%r13 + movq %rcx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r11,%r15 + + xorq %r10,%r13 + rorq $5,%r14 + xorq %rax,%r15 + + movq %r12,48(%rsp) + xorq %rcx,%r14 + andq %r10,%r15 + + rorq $4,%r13 + addq %rbx,%r12 + xorq %rax,%r15 + + rorq $6,%r14 + xorq %r10,%r13 + addq %r15,%r12 + + movq %rcx,%r15 + addq (%rbp),%r12 + xorq %rcx,%r14 + + xorq %rdx,%r15 + rorq $14,%r13 + movq %rdx,%rbx + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%rbx + addq %r12,%r9 + addq %r12,%rbx + + leaq 8(%rbp),%rbp + addq %r14,%rbx + movq 56(%rsi),%r12 + movq %r9,%r13 + movq %rbx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r10,%rdi + + xorq %r9,%r13 + rorq $5,%r14 + xorq %r11,%rdi + + movq %r12,56(%rsp) + xorq %rbx,%r14 + andq %r9,%rdi + + rorq $4,%r13 + addq %rax,%r12 + xorq %r11,%rdi + + rorq $6,%r14 + xorq %r9,%r13 + addq %rdi,%r12 + + movq %rbx,%rdi + addq (%rbp),%r12 + xorq %rbx,%r14 + + xorq %rcx,%rdi + rorq $14,%r13 + movq %rcx,%rax + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%rax + addq %r12,%r8 + addq %r12,%rax + + leaq 24(%rbp),%rbp + addq %r14,%rax + movq 64(%rsi),%r12 + movq %r8,%r13 + movq %rax,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r9,%r15 + + xorq %r8,%r13 + rorq $5,%r14 + xorq %r10,%r15 + + movq %r12,64(%rsp) + xorq %rax,%r14 + andq %r8,%r15 + + rorq $4,%r13 + addq %r11,%r12 + xorq %r10,%r15 + + rorq $6,%r14 + xorq %r8,%r13 + addq %r15,%r12 + + movq %rax,%r15 + addq (%rbp),%r12 + xorq %rax,%r14 + + xorq %rbx,%r15 + rorq $14,%r13 + movq %rbx,%r11 + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%r11 + addq %r12,%rdx + addq %r12,%r11 + + leaq 8(%rbp),%rbp + addq %r14,%r11 + movq 72(%rsi),%r12 + movq %rdx,%r13 + movq %r11,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r8,%rdi + + xorq %rdx,%r13 + rorq $5,%r14 + xorq %r9,%rdi + + movq %r12,72(%rsp) + xorq %r11,%r14 + andq %rdx,%rdi + + rorq $4,%r13 + addq %r10,%r12 + xorq %r9,%rdi + + rorq $6,%r14 + xorq %rdx,%r13 + addq %rdi,%r12 + + movq %r11,%rdi + addq (%rbp),%r12 + xorq %r11,%r14 + + xorq %rax,%rdi + rorq $14,%r13 + movq %rax,%r10 + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%r10 + addq %r12,%rcx + addq %r12,%r10 + + leaq 24(%rbp),%rbp + addq %r14,%r10 + movq 80(%rsi),%r12 + movq %rcx,%r13 + movq %r10,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rdx,%r15 + + xorq %rcx,%r13 + rorq $5,%r14 + xorq %r8,%r15 + + movq %r12,80(%rsp) + xorq %r10,%r14 + andq %rcx,%r15 + + rorq $4,%r13 + addq %r9,%r12 + xorq %r8,%r15 + + rorq $6,%r14 + xorq %rcx,%r13 + addq %r15,%r12 + + movq %r10,%r15 + addq (%rbp),%r12 + xorq %r10,%r14 + + xorq %r11,%r15 + rorq $14,%r13 + movq %r11,%r9 + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%r9 + addq %r12,%rbx + addq %r12,%r9 + + leaq 8(%rbp),%rbp + addq %r14,%r9 + movq 88(%rsi),%r12 + movq %rbx,%r13 + movq %r9,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rcx,%rdi + + xorq %rbx,%r13 + rorq $5,%r14 + xorq %rdx,%rdi + + movq %r12,88(%rsp) + xorq %r9,%r14 + andq %rbx,%rdi + + rorq $4,%r13 + addq %r8,%r12 + xorq %rdx,%rdi + + rorq $6,%r14 + xorq %rbx,%r13 + addq %rdi,%r12 + + movq %r9,%rdi + addq (%rbp),%r12 + xorq %r9,%r14 + + xorq %r10,%rdi + rorq $14,%r13 + movq %r10,%r8 + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%r8 + addq %r12,%rax + addq %r12,%r8 + + leaq 24(%rbp),%rbp + addq %r14,%r8 + movq 96(%rsi),%r12 + movq %rax,%r13 + movq %r8,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rbx,%r15 + + xorq %rax,%r13 + rorq $5,%r14 + xorq %rcx,%r15 + + movq %r12,96(%rsp) + xorq %r8,%r14 + andq %rax,%r15 + + rorq $4,%r13 + addq %rdx,%r12 + xorq %rcx,%r15 + + rorq $6,%r14 + xorq %rax,%r13 + addq %r15,%r12 + + movq %r8,%r15 + addq (%rbp),%r12 + xorq %r8,%r14 + + xorq %r9,%r15 + rorq $14,%r13 + movq %r9,%rdx + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%rdx + addq %r12,%r11 + addq %r12,%rdx + + leaq 8(%rbp),%rbp + addq %r14,%rdx + movq 104(%rsi),%r12 + movq %r11,%r13 + movq %rdx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rax,%rdi + + xorq %r11,%r13 + rorq $5,%r14 + xorq %rbx,%rdi + + movq %r12,104(%rsp) + xorq %rdx,%r14 + andq %r11,%rdi + + rorq $4,%r13 + addq %rcx,%r12 + xorq %rbx,%rdi + + rorq $6,%r14 + xorq %r11,%r13 + addq %rdi,%r12 + + movq %rdx,%rdi + addq (%rbp),%r12 + xorq %rdx,%r14 + + xorq %r8,%rdi + rorq $14,%r13 + movq %r8,%rcx + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%rcx + addq %r12,%r10 + addq %r12,%rcx + + leaq 24(%rbp),%rbp + addq %r14,%rcx + movq 112(%rsi),%r12 + movq %r10,%r13 + movq %rcx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r11,%r15 + + xorq %r10,%r13 + rorq $5,%r14 + xorq %rax,%r15 + + movq %r12,112(%rsp) + xorq %rcx,%r14 + andq %r10,%r15 + + rorq $4,%r13 + addq %rbx,%r12 + xorq %rax,%r15 + + rorq $6,%r14 + xorq %r10,%r13 + addq %r15,%r12 + + movq %rcx,%r15 + addq (%rbp),%r12 + xorq %rcx,%r14 + + xorq %rdx,%r15 + rorq $14,%r13 + movq %rdx,%rbx + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%rbx + addq %r12,%r9 + addq %r12,%rbx + + leaq 8(%rbp),%rbp + addq %r14,%rbx + movq 120(%rsi),%r12 + movq %r9,%r13 + movq %rbx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r10,%rdi + + xorq %r9,%r13 + rorq $5,%r14 + xorq %r11,%rdi + + movq %r12,120(%rsp) + xorq %rbx,%r14 + andq %r9,%rdi + + rorq $4,%r13 + addq %rax,%r12 + xorq %r11,%rdi + + rorq $6,%r14 + xorq %r9,%r13 + addq %rdi,%r12 + + movq %rbx,%rdi + addq (%rbp),%r12 + xorq %rbx,%r14 + + xorq %rcx,%rdi + rorq $14,%r13 + movq %rcx,%rax + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%rax + addq %r12,%r8 + addq %r12,%rax + + leaq 24(%rbp),%rbp + jmp L$rounds_16_xx +.p2align 4 +L$rounds_16_xx: + movq 8(%rsp),%r13 + movq 112(%rsp),%r15 + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rax + movq %r15,%r14 + rorq $42,%r15 + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + xorq %r13,%r12 + xorq %r14,%r15 + addq 72(%rsp),%r12 + + addq 0(%rsp),%r12 + movq %r8,%r13 + addq %r15,%r12 + movq %rax,%r14 + rorq $23,%r13 + movq %r9,%r15 + + xorq %r8,%r13 + rorq $5,%r14 + xorq %r10,%r15 + + movq %r12,0(%rsp) + xorq %rax,%r14 + andq %r8,%r15 + + rorq $4,%r13 + addq %r11,%r12 + xorq %r10,%r15 + + rorq $6,%r14 + xorq %r8,%r13 + addq %r15,%r12 + + movq %rax,%r15 + addq (%rbp),%r12 + xorq %rax,%r14 + + xorq %rbx,%r15 + rorq $14,%r13 + movq %rbx,%r11 + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%r11 + addq %r12,%rdx + addq %r12,%r11 + + leaq 8(%rbp),%rbp + movq 16(%rsp),%r13 + movq 120(%rsp),%rdi + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r11 + movq %rdi,%r14 + rorq $42,%rdi + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi + shrq $6,%r14 + + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 80(%rsp),%r12 + + addq 8(%rsp),%r12 + movq %rdx,%r13 + addq %rdi,%r12 + movq %r11,%r14 + rorq $23,%r13 + movq %r8,%rdi + + xorq %rdx,%r13 + rorq $5,%r14 + xorq %r9,%rdi + + movq %r12,8(%rsp) + xorq %r11,%r14 + andq %rdx,%rdi + + rorq $4,%r13 + addq %r10,%r12 + xorq %r9,%rdi + + rorq $6,%r14 + xorq %rdx,%r13 + addq %rdi,%r12 + + movq %r11,%rdi + addq (%rbp),%r12 + xorq %r11,%r14 + + xorq %rax,%rdi + rorq $14,%r13 + movq %rax,%r10 + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%r10 + addq %r12,%rcx + addq %r12,%r10 + + leaq 24(%rbp),%rbp + movq 24(%rsp),%r13 + movq 0(%rsp),%r15 + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r10 + movq %r15,%r14 + rorq $42,%r15 + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + xorq %r13,%r12 + xorq %r14,%r15 + addq 88(%rsp),%r12 + + addq 16(%rsp),%r12 + movq %rcx,%r13 + addq %r15,%r12 + movq %r10,%r14 + rorq $23,%r13 + movq %rdx,%r15 + + xorq %rcx,%r13 + rorq $5,%r14 + xorq %r8,%r15 + + movq %r12,16(%rsp) + xorq %r10,%r14 + andq %rcx,%r15 + + rorq $4,%r13 + addq %r9,%r12 + xorq %r8,%r15 + + rorq $6,%r14 + xorq %rcx,%r13 + addq %r15,%r12 + + movq %r10,%r15 + addq (%rbp),%r12 + xorq %r10,%r14 + + xorq %r11,%r15 + rorq $14,%r13 + movq %r11,%r9 + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%r9 + addq %r12,%rbx + addq %r12,%r9 + + leaq 8(%rbp),%rbp + movq 32(%rsp),%r13 + movq 8(%rsp),%rdi + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r9 + movq %rdi,%r14 + rorq $42,%rdi + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi + shrq $6,%r14 + + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 96(%rsp),%r12 + + addq 24(%rsp),%r12 + movq %rbx,%r13 + addq %rdi,%r12 + movq %r9,%r14 + rorq $23,%r13 + movq %rcx,%rdi + + xorq %rbx,%r13 + rorq $5,%r14 + xorq %rdx,%rdi + + movq %r12,24(%rsp) + xorq %r9,%r14 + andq %rbx,%rdi + + rorq $4,%r13 + addq %r8,%r12 + xorq %rdx,%rdi + + rorq $6,%r14 + xorq %rbx,%r13 + addq %rdi,%r12 + + movq %r9,%rdi + addq (%rbp),%r12 + xorq %r9,%r14 + + xorq %r10,%rdi + rorq $14,%r13 + movq %r10,%r8 + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%r8 + addq %r12,%rax + addq %r12,%r8 + + leaq 24(%rbp),%rbp + movq 40(%rsp),%r13 + movq 16(%rsp),%r15 + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r8 + movq %r15,%r14 + rorq $42,%r15 + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + xorq %r13,%r12 + xorq %r14,%r15 + addq 104(%rsp),%r12 + + addq 32(%rsp),%r12 + movq %rax,%r13 + addq %r15,%r12 + movq %r8,%r14 + rorq $23,%r13 + movq %rbx,%r15 + + xorq %rax,%r13 + rorq $5,%r14 + xorq %rcx,%r15 + + movq %r12,32(%rsp) + xorq %r8,%r14 + andq %rax,%r15 + + rorq $4,%r13 + addq %rdx,%r12 + xorq %rcx,%r15 + + rorq $6,%r14 + xorq %rax,%r13 + addq %r15,%r12 + + movq %r8,%r15 + addq (%rbp),%r12 + xorq %r8,%r14 + + xorq %r9,%r15 + rorq $14,%r13 + movq %r9,%rdx + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%rdx + addq %r12,%r11 + addq %r12,%rdx + + leaq 8(%rbp),%rbp + movq 48(%rsp),%r13 + movq 24(%rsp),%rdi + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rdx + movq %rdi,%r14 + rorq $42,%rdi + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi + shrq $6,%r14 + + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 112(%rsp),%r12 + + addq 40(%rsp),%r12 + movq %r11,%r13 + addq %rdi,%r12 + movq %rdx,%r14 + rorq $23,%r13 + movq %rax,%rdi + + xorq %r11,%r13 + rorq $5,%r14 + xorq %rbx,%rdi + + movq %r12,40(%rsp) + xorq %rdx,%r14 + andq %r11,%rdi + + rorq $4,%r13 + addq %rcx,%r12 + xorq %rbx,%rdi + + rorq $6,%r14 + xorq %r11,%r13 + addq %rdi,%r12 + + movq %rdx,%rdi + addq (%rbp),%r12 + xorq %rdx,%r14 + + xorq %r8,%rdi + rorq $14,%r13 + movq %r8,%rcx + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%rcx + addq %r12,%r10 + addq %r12,%rcx + + leaq 24(%rbp),%rbp + movq 56(%rsp),%r13 + movq 32(%rsp),%r15 + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rcx + movq %r15,%r14 + rorq $42,%r15 + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + xorq %r13,%r12 + xorq %r14,%r15 + addq 120(%rsp),%r12 + + addq 48(%rsp),%r12 + movq %r10,%r13 + addq %r15,%r12 + movq %rcx,%r14 + rorq $23,%r13 + movq %r11,%r15 + + xorq %r10,%r13 + rorq $5,%r14 + xorq %rax,%r15 + + movq %r12,48(%rsp) + xorq %rcx,%r14 + andq %r10,%r15 + + rorq $4,%r13 + addq %rbx,%r12 + xorq %rax,%r15 + + rorq $6,%r14 + xorq %r10,%r13 + addq %r15,%r12 + + movq %rcx,%r15 + addq (%rbp),%r12 + xorq %rcx,%r14 + + xorq %rdx,%r15 + rorq $14,%r13 + movq %rdx,%rbx + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%rbx + addq %r12,%r9 + addq %r12,%rbx + + leaq 8(%rbp),%rbp + movq 64(%rsp),%r13 + movq 40(%rsp),%rdi + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rbx + movq %rdi,%r14 + rorq $42,%rdi + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi + shrq $6,%r14 + + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 0(%rsp),%r12 + + addq 56(%rsp),%r12 + movq %r9,%r13 + addq %rdi,%r12 + movq %rbx,%r14 + rorq $23,%r13 + movq %r10,%rdi + + xorq %r9,%r13 + rorq $5,%r14 + xorq %r11,%rdi + + movq %r12,56(%rsp) + xorq %rbx,%r14 + andq %r9,%rdi + + rorq $4,%r13 + addq %rax,%r12 + xorq %r11,%rdi + + rorq $6,%r14 + xorq %r9,%r13 + addq %rdi,%r12 + + movq %rbx,%rdi + addq (%rbp),%r12 + xorq %rbx,%r14 + + xorq %rcx,%rdi + rorq $14,%r13 + movq %rcx,%rax + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%rax + addq %r12,%r8 + addq %r12,%rax + + leaq 24(%rbp),%rbp + movq 72(%rsp),%r13 + movq 48(%rsp),%r15 + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rax + movq %r15,%r14 + rorq $42,%r15 + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + xorq %r13,%r12 + xorq %r14,%r15 + addq 8(%rsp),%r12 + + addq 64(%rsp),%r12 + movq %r8,%r13 + addq %r15,%r12 + movq %rax,%r14 + rorq $23,%r13 + movq %r9,%r15 + + xorq %r8,%r13 + rorq $5,%r14 + xorq %r10,%r15 + + movq %r12,64(%rsp) + xorq %rax,%r14 + andq %r8,%r15 + + rorq $4,%r13 + addq %r11,%r12 + xorq %r10,%r15 + + rorq $6,%r14 + xorq %r8,%r13 + addq %r15,%r12 + + movq %rax,%r15 + addq (%rbp),%r12 + xorq %rax,%r14 + + xorq %rbx,%r15 + rorq $14,%r13 + movq %rbx,%r11 + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%r11 + addq %r12,%rdx + addq %r12,%r11 + + leaq 8(%rbp),%rbp + movq 80(%rsp),%r13 + movq 56(%rsp),%rdi + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r11 + movq %rdi,%r14 + rorq $42,%rdi + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi + shrq $6,%r14 + + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 16(%rsp),%r12 + + addq 72(%rsp),%r12 + movq %rdx,%r13 + addq %rdi,%r12 + movq %r11,%r14 + rorq $23,%r13 + movq %r8,%rdi + + xorq %rdx,%r13 + rorq $5,%r14 + xorq %r9,%rdi + + movq %r12,72(%rsp) + xorq %r11,%r14 + andq %rdx,%rdi + + rorq $4,%r13 + addq %r10,%r12 + xorq %r9,%rdi + + rorq $6,%r14 + xorq %rdx,%r13 + addq %rdi,%r12 + + movq %r11,%rdi + addq (%rbp),%r12 + xorq %r11,%r14 + + xorq %rax,%rdi + rorq $14,%r13 + movq %rax,%r10 + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%r10 + addq %r12,%rcx + addq %r12,%r10 + + leaq 24(%rbp),%rbp + movq 88(%rsp),%r13 + movq 64(%rsp),%r15 + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r10 + movq %r15,%r14 + rorq $42,%r15 + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + xorq %r13,%r12 + xorq %r14,%r15 + addq 24(%rsp),%r12 + + addq 80(%rsp),%r12 + movq %rcx,%r13 + addq %r15,%r12 + movq %r10,%r14 + rorq $23,%r13 + movq %rdx,%r15 + + xorq %rcx,%r13 + rorq $5,%r14 + xorq %r8,%r15 + + movq %r12,80(%rsp) + xorq %r10,%r14 + andq %rcx,%r15 + + rorq $4,%r13 + addq %r9,%r12 + xorq %r8,%r15 + + rorq $6,%r14 + xorq %rcx,%r13 + addq %r15,%r12 + + movq %r10,%r15 + addq (%rbp),%r12 + xorq %r10,%r14 + + xorq %r11,%r15 + rorq $14,%r13 + movq %r11,%r9 + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%r9 + addq %r12,%rbx + addq %r12,%r9 + + leaq 8(%rbp),%rbp + movq 96(%rsp),%r13 + movq 72(%rsp),%rdi + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r9 + movq %rdi,%r14 + rorq $42,%rdi + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi + shrq $6,%r14 + + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 32(%rsp),%r12 + + addq 88(%rsp),%r12 + movq %rbx,%r13 + addq %rdi,%r12 + movq %r9,%r14 + rorq $23,%r13 + movq %rcx,%rdi + + xorq %rbx,%r13 + rorq $5,%r14 + xorq %rdx,%rdi + + movq %r12,88(%rsp) + xorq %r9,%r14 + andq %rbx,%rdi + + rorq $4,%r13 + addq %r8,%r12 + xorq %rdx,%rdi + + rorq $6,%r14 + xorq %rbx,%r13 + addq %rdi,%r12 + + movq %r9,%rdi + addq (%rbp),%r12 + xorq %r9,%r14 + + xorq %r10,%rdi + rorq $14,%r13 + movq %r10,%r8 + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%r8 + addq %r12,%rax + addq %r12,%r8 + + leaq 24(%rbp),%rbp + movq 104(%rsp),%r13 + movq 80(%rsp),%r15 + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r8 + movq %r15,%r14 + rorq $42,%r15 + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + xorq %r13,%r12 + xorq %r14,%r15 + addq 40(%rsp),%r12 + + addq 96(%rsp),%r12 + movq %rax,%r13 + addq %r15,%r12 + movq %r8,%r14 + rorq $23,%r13 + movq %rbx,%r15 + + xorq %rax,%r13 + rorq $5,%r14 + xorq %rcx,%r15 + + movq %r12,96(%rsp) + xorq %r8,%r14 + andq %rax,%r15 + + rorq $4,%r13 + addq %rdx,%r12 + xorq %rcx,%r15 + + rorq $6,%r14 + xorq %rax,%r13 + addq %r15,%r12 + + movq %r8,%r15 + addq (%rbp),%r12 + xorq %r8,%r14 + + xorq %r9,%r15 + rorq $14,%r13 + movq %r9,%rdx + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%rdx + addq %r12,%r11 + addq %r12,%rdx + + leaq 8(%rbp),%rbp + movq 112(%rsp),%r13 + movq 88(%rsp),%rdi + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rdx + movq %rdi,%r14 + rorq $42,%rdi + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi + shrq $6,%r14 + + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 48(%rsp),%r12 + + addq 104(%rsp),%r12 + movq %r11,%r13 + addq %rdi,%r12 + movq %rdx,%r14 + rorq $23,%r13 + movq %rax,%rdi + + xorq %r11,%r13 + rorq $5,%r14 + xorq %rbx,%rdi + + movq %r12,104(%rsp) + xorq %rdx,%r14 + andq %r11,%rdi + + rorq $4,%r13 + addq %rcx,%r12 + xorq %rbx,%rdi + + rorq $6,%r14 + xorq %r11,%r13 + addq %rdi,%r12 + + movq %rdx,%rdi + addq (%rbp),%r12 + xorq %rdx,%r14 + + xorq %r8,%rdi + rorq $14,%r13 + movq %r8,%rcx + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%rcx + addq %r12,%r10 + addq %r12,%rcx + + leaq 24(%rbp),%rbp + movq 120(%rsp),%r13 + movq 96(%rsp),%r15 + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rcx + movq %r15,%r14 + rorq $42,%r15 + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + xorq %r13,%r12 + xorq %r14,%r15 + addq 56(%rsp),%r12 + + addq 112(%rsp),%r12 + movq %r10,%r13 + addq %r15,%r12 + movq %rcx,%r14 + rorq $23,%r13 + movq %r11,%r15 + + xorq %r10,%r13 + rorq $5,%r14 + xorq %rax,%r15 + + movq %r12,112(%rsp) + xorq %rcx,%r14 + andq %r10,%r15 + + rorq $4,%r13 + addq %rbx,%r12 + xorq %rax,%r15 + + rorq $6,%r14 + xorq %r10,%r13 + addq %r15,%r12 + + movq %rcx,%r15 + addq (%rbp),%r12 + xorq %rcx,%r14 + + xorq %rdx,%r15 + rorq $14,%r13 + movq %rdx,%rbx + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%rbx + addq %r12,%r9 + addq %r12,%rbx + + leaq 8(%rbp),%rbp + movq 0(%rsp),%r13 + movq 104(%rsp),%rdi + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rbx + movq %rdi,%r14 + rorq $42,%rdi + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi + shrq $6,%r14 + + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 64(%rsp),%r12 + + addq 120(%rsp),%r12 + movq %r9,%r13 + addq %rdi,%r12 + movq %rbx,%r14 + rorq $23,%r13 + movq %r10,%rdi + + xorq %r9,%r13 + rorq $5,%r14 + xorq %r11,%rdi + + movq %r12,120(%rsp) + xorq %rbx,%r14 + andq %r9,%rdi + + rorq $4,%r13 + addq %rax,%r12 + xorq %r11,%rdi + + rorq $6,%r14 + xorq %r9,%r13 + addq %rdi,%r12 + + movq %rbx,%rdi + addq (%rbp),%r12 + xorq %rbx,%r14 + + xorq %rcx,%rdi + rorq $14,%r13 + movq %rcx,%rax + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%rax + addq %r12,%r8 + addq %r12,%rax + + leaq 24(%rbp),%rbp + cmpb $0,7(%rbp) + jnz L$rounds_16_xx + + movq 128+0(%rsp),%rdi + addq %r14,%rax + leaq 128(%rsi),%rsi + + addq 0(%rdi),%rax + addq 8(%rdi),%rbx + addq 16(%rdi),%rcx + addq 24(%rdi),%rdx + addq 32(%rdi),%r8 + addq 40(%rdi),%r9 + addq 48(%rdi),%r10 + addq 56(%rdi),%r11 + + cmpq 128+16(%rsp),%rsi + + movq %rax,0(%rdi) + movq %rbx,8(%rdi) + movq %rcx,16(%rdi) + movq %rdx,24(%rdi) + movq %r8,32(%rdi) + movq %r9,40(%rdi) + movq %r10,48(%rdi) + movq %r11,56(%rdi) + jb L$loop + + movq 128+24(%rsp),%rsi + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +L$epilogue: + .byte 0xf3,0xc3 + +.p2align 6 + +K512: +.quad 0x428a2f98d728ae22,0x7137449123ef65cd +.quad 0x428a2f98d728ae22,0x7137449123ef65cd +.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc +.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc +.quad 0x3956c25bf348b538,0x59f111f1b605d019 +.quad 0x3956c25bf348b538,0x59f111f1b605d019 +.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 +.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 +.quad 0xd807aa98a3030242,0x12835b0145706fbe +.quad 0xd807aa98a3030242,0x12835b0145706fbe +.quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 +.quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 +.quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 +.quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 +.quad 0x9bdc06a725c71235,0xc19bf174cf692694 +.quad 0x9bdc06a725c71235,0xc19bf174cf692694 +.quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 +.quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 +.quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 +.quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 +.quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 +.quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 +.quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 +.quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 +.quad 0x983e5152ee66dfab,0xa831c66d2db43210 +.quad 0x983e5152ee66dfab,0xa831c66d2db43210 +.quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 +.quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 +.quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 +.quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 +.quad 0x06ca6351e003826f,0x142929670a0e6e70 +.quad 0x06ca6351e003826f,0x142929670a0e6e70 +.quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 +.quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 +.quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df +.quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df +.quad 0x650a73548baf63de,0x766a0abb3c77b2a8 +.quad 0x650a73548baf63de,0x766a0abb3c77b2a8 +.quad 0x81c2c92e47edaee6,0x92722c851482353b +.quad 0x81c2c92e47edaee6,0x92722c851482353b +.quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 +.quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 +.quad 0xc24b8b70d0f89791,0xc76c51a30654be30 +.quad 0xc24b8b70d0f89791,0xc76c51a30654be30 +.quad 0xd192e819d6ef5218,0xd69906245565a910 +.quad 0xd192e819d6ef5218,0xd69906245565a910 +.quad 0xf40e35855771202a,0x106aa07032bbd1b8 +.quad 0xf40e35855771202a,0x106aa07032bbd1b8 +.quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 +.quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 +.quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 +.quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 +.quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb +.quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb +.quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 +.quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 +.quad 0x748f82ee5defb2fc,0x78a5636f43172f60 +.quad 0x748f82ee5defb2fc,0x78a5636f43172f60 +.quad 0x84c87814a1f0ab72,0x8cc702081a6439ec +.quad 0x84c87814a1f0ab72,0x8cc702081a6439ec +.quad 0x90befffa23631e28,0xa4506cebde82bde9 +.quad 0x90befffa23631e28,0xa4506cebde82bde9 +.quad 0xbef9a3f7b2c67915,0xc67178f2e372532b +.quad 0xbef9a3f7b2c67915,0xc67178f2e372532b +.quad 0xca273eceea26619c,0xd186b8c721c0c207 +.quad 0xca273eceea26619c,0xd186b8c721c0c207 +.quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 +.quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 +.quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 +.quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 +.quad 0x113f9804bef90dae,0x1b710b35131c471b +.quad 0x113f9804bef90dae,0x1b710b35131c471b +.quad 0x28db77f523047d84,0x32caab7b40c72493 +.quad 0x28db77f523047d84,0x32caab7b40c72493 +.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c +.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c +.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a +.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a +.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 +.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 + +.quad 0x0001020304050607,0x08090a0b0c0d0e0f +.quad 0x0001020304050607,0x08090a0b0c0d0e0f +.byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +#endif diff --git a/third_party/boringssl/win-x86/crypto/aes/aes-586.asm b/third_party/boringssl/win-x86/crypto/aes/aes-586.asm new file mode 100644 index 00000000000..42ca0267e7a --- /dev/null +++ b/third_party/boringssl/win-x86/crypto/aes/aes-586.asm @@ -0,0 +1,3219 @@ +%ifidn __OUTPUT_FORMAT__,obj +section code use32 class=code align=64 +%elifidn __OUTPUT_FORMAT__,win32 +%ifdef __YASM_VERSION_ID__ +%if __YASM_VERSION_ID__ < 01010000h +%error yasm version 1.1.0 or later needed. +%endif +; Yasm automatically includes .00 and complains about redefining it. +; https://www.tortall.net/projects/yasm/manual/html/objfmt-win32-safeseh.html +%else +$@feat.00 equ 1 +%endif +section .text code align=64 +%else +section .text code +%endif +align 16 +__x86_AES_encrypt_compact: + mov DWORD [20+esp],edi + xor eax,DWORD [edi] + xor ebx,DWORD [4+edi] + xor ecx,DWORD [8+edi] + xor edx,DWORD [12+edi] + mov esi,DWORD [240+edi] + lea esi,[esi*1+esi-2] + lea esi,[esi*8+edi] + mov DWORD [24+esp],esi + mov edi,DWORD [ebp-128] + mov esi,DWORD [ebp-96] + mov edi,DWORD [ebp-64] + mov esi,DWORD [ebp-32] + mov edi,DWORD [ebp] + mov esi,DWORD [32+ebp] + mov edi,DWORD [64+ebp] + mov esi,DWORD [96+ebp] +align 16 +L$000loop: + mov esi,eax + and esi,255 + movzx esi,BYTE [esi*1+ebp-128] + movzx edi,bh + movzx edi,BYTE [edi*1+ebp-128] + shl edi,8 + xor esi,edi + mov edi,ecx + shr edi,16 + and edi,255 + movzx edi,BYTE [edi*1+ebp-128] + shl edi,16 + xor esi,edi + mov edi,edx + shr edi,24 + movzx edi,BYTE [edi*1+ebp-128] + shl edi,24 + xor esi,edi + mov DWORD [4+esp],esi + mov esi,ebx + and esi,255 + shr ebx,16 + movzx esi,BYTE [esi*1+ebp-128] + movzx edi,ch + movzx edi,BYTE [edi*1+ebp-128] + shl edi,8 + xor esi,edi + mov edi,edx + shr edi,16 + and edi,255 + movzx edi,BYTE [edi*1+ebp-128] + shl edi,16 + xor esi,edi + mov edi,eax + shr edi,24 + movzx edi,BYTE [edi*1+ebp-128] + shl edi,24 + xor esi,edi + mov DWORD [8+esp],esi + mov esi,ecx + and esi,255 + shr ecx,24 + movzx esi,BYTE [esi*1+ebp-128] + movzx edi,dh + movzx edi,BYTE [edi*1+ebp-128] + shl edi,8 + xor esi,edi + mov edi,eax + shr edi,16 + and edx,255 + and edi,255 + movzx edi,BYTE [edi*1+ebp-128] + shl edi,16 + xor esi,edi + movzx edi,bh + movzx edi,BYTE [edi*1+ebp-128] + shl edi,24 + xor esi,edi + and edx,255 + movzx edx,BYTE [edx*1+ebp-128] + movzx eax,ah + movzx eax,BYTE [eax*1+ebp-128] + shl eax,8 + xor edx,eax + mov eax,DWORD [4+esp] + and ebx,255 + movzx ebx,BYTE [ebx*1+ebp-128] + shl ebx,16 + xor edx,ebx + mov ebx,DWORD [8+esp] + movzx ecx,BYTE [ecx*1+ebp-128] + shl ecx,24 + xor edx,ecx + mov ecx,esi + mov ebp,2155905152 + and ebp,ecx + lea edi,[ecx*1+ecx] + mov esi,ebp + shr ebp,7 + and edi,4278124286 + sub esi,ebp + mov ebp,ecx + and esi,454761243 + ror ebp,16 + xor esi,edi + mov edi,ecx + xor ecx,esi + ror edi,24 + xor esi,ebp + rol ecx,24 + xor esi,edi + mov ebp,2155905152 + xor ecx,esi + and ebp,edx + lea edi,[edx*1+edx] + mov esi,ebp + shr ebp,7 + and edi,4278124286 + sub esi,ebp + mov ebp,edx + and esi,454761243 + ror ebp,16 + xor esi,edi + mov edi,edx + xor edx,esi + ror edi,24 + xor esi,ebp + rol edx,24 + xor esi,edi + mov ebp,2155905152 + xor edx,esi + and ebp,eax + lea edi,[eax*1+eax] + mov esi,ebp + shr ebp,7 + and edi,4278124286 + sub esi,ebp + mov ebp,eax + and esi,454761243 + ror ebp,16 + xor esi,edi + mov edi,eax + xor eax,esi + ror edi,24 + xor esi,ebp + rol eax,24 + xor esi,edi + mov ebp,2155905152 + xor eax,esi + and ebp,ebx + lea edi,[ebx*1+ebx] + mov esi,ebp + shr ebp,7 + and edi,4278124286 + sub esi,ebp + mov ebp,ebx + and esi,454761243 + ror ebp,16 + xor esi,edi + mov edi,ebx + xor ebx,esi + ror edi,24 + xor esi,ebp + rol ebx,24 + xor esi,edi + xor ebx,esi + mov edi,DWORD [20+esp] + mov ebp,DWORD [28+esp] + add edi,16 + xor eax,DWORD [edi] + xor ebx,DWORD [4+edi] + xor ecx,DWORD [8+edi] + xor edx,DWORD [12+edi] + cmp edi,DWORD [24+esp] + mov DWORD [20+esp],edi + jb NEAR L$000loop + mov esi,eax + and esi,255 + movzx esi,BYTE [esi*1+ebp-128] + movzx edi,bh + movzx edi,BYTE [edi*1+ebp-128] + shl edi,8 + xor esi,edi + mov edi,ecx + shr edi,16 + and edi,255 + movzx edi,BYTE [edi*1+ebp-128] + shl edi,16 + xor esi,edi + mov edi,edx + shr edi,24 + movzx edi,BYTE [edi*1+ebp-128] + shl edi,24 + xor esi,edi + mov DWORD [4+esp],esi + mov esi,ebx + and esi,255 + shr ebx,16 + movzx esi,BYTE [esi*1+ebp-128] + movzx edi,ch + movzx edi,BYTE [edi*1+ebp-128] + shl edi,8 + xor esi,edi + mov edi,edx + shr edi,16 + and edi,255 + movzx edi,BYTE [edi*1+ebp-128] + shl edi,16 + xor esi,edi + mov edi,eax + shr edi,24 + movzx edi,BYTE [edi*1+ebp-128] + shl edi,24 + xor esi,edi + mov DWORD [8+esp],esi + mov esi,ecx + and esi,255 + shr ecx,24 + movzx esi,BYTE [esi*1+ebp-128] + movzx edi,dh + movzx edi,BYTE [edi*1+ebp-128] + shl edi,8 + xor esi,edi + mov edi,eax + shr edi,16 + and edx,255 + and edi,255 + movzx edi,BYTE [edi*1+ebp-128] + shl edi,16 + xor esi,edi + movzx edi,bh + movzx edi,BYTE [edi*1+ebp-128] + shl edi,24 + xor esi,edi + mov edi,DWORD [20+esp] + and edx,255 + movzx edx,BYTE [edx*1+ebp-128] + movzx eax,ah + movzx eax,BYTE [eax*1+ebp-128] + shl eax,8 + xor edx,eax + mov eax,DWORD [4+esp] + and ebx,255 + movzx ebx,BYTE [ebx*1+ebp-128] + shl ebx,16 + xor edx,ebx + mov ebx,DWORD [8+esp] + movzx ecx,BYTE [ecx*1+ebp-128] + shl ecx,24 + xor edx,ecx + mov ecx,esi + xor eax,DWORD [16+edi] + xor ebx,DWORD [20+edi] + xor ecx,DWORD [24+edi] + xor edx,DWORD [28+edi] + ret +align 16 +__sse_AES_encrypt_compact: + pxor mm0,[edi] + pxor mm4,[8+edi] + mov esi,DWORD [240+edi] + lea esi,[esi*1+esi-2] + lea esi,[esi*8+edi] + mov DWORD [24+esp],esi + mov eax,454761243 + mov DWORD [8+esp],eax + mov DWORD [12+esp],eax + mov eax,DWORD [ebp-128] + mov ebx,DWORD [ebp-96] + mov ecx,DWORD [ebp-64] + mov edx,DWORD [ebp-32] + mov eax,DWORD [ebp] + mov ebx,DWORD [32+ebp] + mov ecx,DWORD [64+ebp] + mov edx,DWORD [96+ebp] +align 16 +L$001loop: + pshufw mm1,mm0,8 + pshufw mm5,mm4,13 + movd eax,mm1 + movd ebx,mm5 + mov DWORD [20+esp],edi + movzx esi,al + movzx edx,ah + pshufw mm2,mm0,13 + movzx ecx,BYTE [esi*1+ebp-128] + movzx edi,bl + movzx edx,BYTE [edx*1+ebp-128] + shr eax,16 + shl edx,8 + movzx esi,BYTE [edi*1+ebp-128] + movzx edi,bh + shl esi,16 + pshufw mm6,mm4,8 + or ecx,esi + movzx esi,BYTE [edi*1+ebp-128] + movzx edi,ah + shl esi,24 + shr ebx,16 + or edx,esi + movzx esi,BYTE [edi*1+ebp-128] + movzx edi,bh + shl esi,8 + or ecx,esi + movzx esi,BYTE [edi*1+ebp-128] + movzx edi,al + shl esi,24 + or ecx,esi + movzx esi,BYTE [edi*1+ebp-128] + movzx edi,bl + movd eax,mm2 + movd mm0,ecx + movzx ecx,BYTE [edi*1+ebp-128] + movzx edi,ah + shl ecx,16 + movd ebx,mm6 + or ecx,esi + movzx esi,BYTE [edi*1+ebp-128] + movzx edi,bh + shl esi,24 + or ecx,esi + movzx esi,BYTE [edi*1+ebp-128] + movzx edi,bl + shl esi,8 + shr ebx,16 + or ecx,esi + movzx esi,BYTE [edi*1+ebp-128] + movzx edi,al + shr eax,16 + movd mm1,ecx + movzx ecx,BYTE [edi*1+ebp-128] + movzx edi,ah + shl ecx,16 + and eax,255 + or ecx,esi + punpckldq mm0,mm1 + movzx esi,BYTE [edi*1+ebp-128] + movzx edi,bh + shl esi,24 + and ebx,255 + movzx eax,BYTE [eax*1+ebp-128] + or ecx,esi + shl eax,16 + movzx esi,BYTE [edi*1+ebp-128] + or edx,eax + shl esi,8 + movzx ebx,BYTE [ebx*1+ebp-128] + or ecx,esi + or edx,ebx + mov edi,DWORD [20+esp] + movd mm4,ecx + movd mm5,edx + punpckldq mm4,mm5 + add edi,16 + cmp edi,DWORD [24+esp] + ja NEAR L$002out + movq mm2,[8+esp] + pxor mm3,mm3 + pxor mm7,mm7 + movq mm1,mm0 + movq mm5,mm4 + pcmpgtb mm3,mm0 + pcmpgtb mm7,mm4 + pand mm3,mm2 + pand mm7,mm2 + pshufw mm2,mm0,177 + pshufw mm6,mm4,177 + paddb mm0,mm0 + paddb mm4,mm4 + pxor mm0,mm3 + pxor mm4,mm7 + pshufw mm3,mm2,177 + pshufw mm7,mm6,177 + pxor mm1,mm0 + pxor mm5,mm4 + pxor mm0,mm2 + pxor mm4,mm6 + movq mm2,mm3 + movq mm6,mm7 + pslld mm3,8 + pslld mm7,8 + psrld mm2,24 + psrld mm6,24 + pxor mm0,mm3 + pxor mm4,mm7 + pxor mm0,mm2 + pxor mm4,mm6 + movq mm3,mm1 + movq mm7,mm5 + movq mm2,[edi] + movq mm6,[8+edi] + psrld mm1,8 + psrld mm5,8 + mov eax,DWORD [ebp-128] + pslld mm3,24 + pslld mm7,24 + mov ebx,DWORD [ebp-64] + pxor mm0,mm1 + pxor mm4,mm5 + mov ecx,DWORD [ebp] + pxor mm0,mm3 + pxor mm4,mm7 + mov edx,DWORD [64+ebp] + pxor mm0,mm2 + pxor mm4,mm6 + jmp NEAR L$001loop +align 16 +L$002out: + pxor mm0,[edi] + pxor mm4,[8+edi] + ret +align 16 +__x86_AES_encrypt: + mov DWORD [20+esp],edi + xor eax,DWORD [edi] + xor ebx,DWORD [4+edi] + xor ecx,DWORD [8+edi] + xor edx,DWORD [12+edi] + mov esi,DWORD [240+edi] + lea esi,[esi*1+esi-2] + lea esi,[esi*8+edi] + mov DWORD [24+esp],esi +align 16 +L$003loop: + mov esi,eax + and esi,255 + mov esi,DWORD [esi*8+ebp] + movzx edi,bh + xor esi,DWORD [3+edi*8+ebp] + mov edi,ecx + shr edi,16 + and edi,255 + xor esi,DWORD [2+edi*8+ebp] + mov edi,edx + shr edi,24 + xor esi,DWORD [1+edi*8+ebp] + mov DWORD [4+esp],esi + mov esi,ebx + and esi,255 + shr ebx,16 + mov esi,DWORD [esi*8+ebp] + movzx edi,ch + xor esi,DWORD [3+edi*8+ebp] + mov edi,edx + shr edi,16 + and edi,255 + xor esi,DWORD [2+edi*8+ebp] + mov edi,eax + shr edi,24 + xor esi,DWORD [1+edi*8+ebp] + mov DWORD [8+esp],esi + mov esi,ecx + and esi,255 + shr ecx,24 + mov esi,DWORD [esi*8+ebp] + movzx edi,dh + xor esi,DWORD [3+edi*8+ebp] + mov edi,eax + shr edi,16 + and edx,255 + and edi,255 + xor esi,DWORD [2+edi*8+ebp] + movzx edi,bh + xor esi,DWORD [1+edi*8+ebp] + mov edi,DWORD [20+esp] + mov edx,DWORD [edx*8+ebp] + movzx eax,ah + xor edx,DWORD [3+eax*8+ebp] + mov eax,DWORD [4+esp] + and ebx,255 + xor edx,DWORD [2+ebx*8+ebp] + mov ebx,DWORD [8+esp] + xor edx,DWORD [1+ecx*8+ebp] + mov ecx,esi + add edi,16 + xor eax,DWORD [edi] + xor ebx,DWORD [4+edi] + xor ecx,DWORD [8+edi] + xor edx,DWORD [12+edi] + cmp edi,DWORD [24+esp] + mov DWORD [20+esp],edi + jb NEAR L$003loop + mov esi,eax + and esi,255 + mov esi,DWORD [2+esi*8+ebp] + and esi,255 + movzx edi,bh + mov edi,DWORD [edi*8+ebp] + and edi,65280 + xor esi,edi + mov edi,ecx + shr edi,16 + and edi,255 + mov edi,DWORD [edi*8+ebp] + and edi,16711680 + xor esi,edi + mov edi,edx + shr edi,24 + mov edi,DWORD [2+edi*8+ebp] + and edi,4278190080 + xor esi,edi + mov DWORD [4+esp],esi + mov esi,ebx + and esi,255 + shr ebx,16 + mov esi,DWORD [2+esi*8+ebp] + and esi,255 + movzx edi,ch + mov edi,DWORD [edi*8+ebp] + and edi,65280 + xor esi,edi + mov edi,edx + shr edi,16 + and edi,255 + mov edi,DWORD [edi*8+ebp] + and edi,16711680 + xor esi,edi + mov edi,eax + shr edi,24 + mov edi,DWORD [2+edi*8+ebp] + and edi,4278190080 + xor esi,edi + mov DWORD [8+esp],esi + mov esi,ecx + and esi,255 + shr ecx,24 + mov esi,DWORD [2+esi*8+ebp] + and esi,255 + movzx edi,dh + mov edi,DWORD [edi*8+ebp] + and edi,65280 + xor esi,edi + mov edi,eax + shr edi,16 + and edx,255 + and edi,255 + mov edi,DWORD [edi*8+ebp] + and edi,16711680 + xor esi,edi + movzx edi,bh + mov edi,DWORD [2+edi*8+ebp] + and edi,4278190080 + xor esi,edi + mov edi,DWORD [20+esp] + and edx,255 + mov edx,DWORD [2+edx*8+ebp] + and edx,255 + movzx eax,ah + mov eax,DWORD [eax*8+ebp] + and eax,65280 + xor edx,eax + mov eax,DWORD [4+esp] + and ebx,255 + mov ebx,DWORD [ebx*8+ebp] + and ebx,16711680 + xor edx,ebx + mov ebx,DWORD [8+esp] + mov ecx,DWORD [2+ecx*8+ebp] + and ecx,4278190080 + xor edx,ecx + mov ecx,esi + add edi,16 + xor eax,DWORD [edi] + xor ebx,DWORD [4+edi] + xor ecx,DWORD [8+edi] + xor edx,DWORD [12+edi] + ret +align 64 +L$AES_Te: +dd 2774754246,2774754246 +dd 2222750968,2222750968 +dd 2574743534,2574743534 +dd 2373680118,2373680118 +dd 234025727,234025727 +dd 3177933782,3177933782 +dd 2976870366,2976870366 +dd 1422247313,1422247313 +dd 1345335392,1345335392 +dd 50397442,50397442 +dd 2842126286,2842126286 +dd 2099981142,2099981142 +dd 436141799,436141799 +dd 1658312629,1658312629 +dd 3870010189,3870010189 +dd 2591454956,2591454956 +dd 1170918031,1170918031 +dd 2642575903,2642575903 +dd 1086966153,1086966153 +dd 2273148410,2273148410 +dd 368769775,368769775 +dd 3948501426,3948501426 +dd 3376891790,3376891790 +dd 200339707,200339707 +dd 3970805057,3970805057 +dd 1742001331,1742001331 +dd 4255294047,4255294047 +dd 3937382213,3937382213 +dd 3214711843,3214711843 +dd 4154762323,4154762323 +dd 2524082916,2524082916 +dd 1539358875,1539358875 +dd 3266819957,3266819957 +dd 486407649,486407649 +dd 2928907069,2928907069 +dd 1780885068,1780885068 +dd 1513502316,1513502316 +dd 1094664062,1094664062 +dd 49805301,49805301 +dd 1338821763,1338821763 +dd 1546925160,1546925160 +dd 4104496465,4104496465 +dd 887481809,887481809 +dd 150073849,150073849 +dd 2473685474,2473685474 +dd 1943591083,1943591083 +dd 1395732834,1395732834 +dd 1058346282,1058346282 +dd 201589768,201589768 +dd 1388824469,1388824469 +dd 1696801606,1696801606 +dd 1589887901,1589887901 +dd 672667696,672667696 +dd 2711000631,2711000631 +dd 251987210,251987210 +dd 3046808111,3046808111 +dd 151455502,151455502 +dd 907153956,907153956 +dd 2608889883,2608889883 +dd 1038279391,1038279391 +dd 652995533,652995533 +dd 1764173646,1764173646 +dd 3451040383,3451040383 +dd 2675275242,2675275242 +dd 453576978,453576978 +dd 2659418909,2659418909 +dd 1949051992,1949051992 +dd 773462580,773462580 +dd 756751158,756751158 +dd 2993581788,2993581788 +dd 3998898868,3998898868 +dd 4221608027,4221608027 +dd 4132590244,4132590244 +dd 1295727478,1295727478 +dd 1641469623,1641469623 +dd 3467883389,3467883389 +dd 2066295122,2066295122 +dd 1055122397,1055122397 +dd 1898917726,1898917726 +dd 2542044179,2542044179 +dd 4115878822,4115878822 +dd 1758581177,1758581177 +dd 0,0 +dd 753790401,753790401 +dd 1612718144,1612718144 +dd 536673507,536673507 +dd 3367088505,3367088505 +dd 3982187446,3982187446 +dd 3194645204,3194645204 +dd 1187761037,1187761037 +dd 3653156455,3653156455 +dd 1262041458,1262041458 +dd 3729410708,3729410708 +dd 3561770136,3561770136 +dd 3898103984,3898103984 +dd 1255133061,1255133061 +dd 1808847035,1808847035 +dd 720367557,720367557 +dd 3853167183,3853167183 +dd 385612781,385612781 +dd 3309519750,3309519750 +dd 3612167578,3612167578 +dd 1429418854,1429418854 +dd 2491778321,2491778321 +dd 3477423498,3477423498 +dd 284817897,284817897 +dd 100794884,100794884 +dd 2172616702,2172616702 +dd 4031795360,4031795360 +dd 1144798328,1144798328 +dd 3131023141,3131023141 +dd 3819481163,3819481163 +dd 4082192802,4082192802 +dd 4272137053,4272137053 +dd 3225436288,3225436288 +dd 2324664069,2324664069 +dd 2912064063,2912064063 +dd 3164445985,3164445985 +dd 1211644016,1211644016 +dd 83228145,83228145 +dd 3753688163,3753688163 +dd 3249976951,3249976951 +dd 1977277103,1977277103 +dd 1663115586,1663115586 +dd 806359072,806359072 +dd 452984805,452984805 +dd 250868733,250868733 +dd 1842533055,1842533055 +dd 1288555905,1288555905 +dd 336333848,336333848 +dd 890442534,890442534 +dd 804056259,804056259 +dd 3781124030,3781124030 +dd 2727843637,2727843637 +dd 3427026056,3427026056 +dd 957814574,957814574 +dd 1472513171,1472513171 +dd 4071073621,4071073621 +dd 2189328124,2189328124 +dd 1195195770,1195195770 +dd 2892260552,2892260552 +dd 3881655738,3881655738 +dd 723065138,723065138 +dd 2507371494,2507371494 +dd 2690670784,2690670784 +dd 2558624025,2558624025 +dd 3511635870,3511635870 +dd 2145180835,2145180835 +dd 1713513028,1713513028 +dd 2116692564,2116692564 +dd 2878378043,2878378043 +dd 2206763019,2206763019 +dd 3393603212,3393603212 +dd 703524551,703524551 +dd 3552098411,3552098411 +dd 1007948840,1007948840 +dd 2044649127,2044649127 +dd 3797835452,3797835452 +dd 487262998,487262998 +dd 1994120109,1994120109 +dd 1004593371,1004593371 +dd 1446130276,1446130276 +dd 1312438900,1312438900 +dd 503974420,503974420 +dd 3679013266,3679013266 +dd 168166924,168166924 +dd 1814307912,1814307912 +dd 3831258296,3831258296 +dd 1573044895,1573044895 +dd 1859376061,1859376061 +dd 4021070915,4021070915 +dd 2791465668,2791465668 +dd 2828112185,2828112185 +dd 2761266481,2761266481 +dd 937747667,937747667 +dd 2339994098,2339994098 +dd 854058965,854058965 +dd 1137232011,1137232011 +dd 1496790894,1496790894 +dd 3077402074,3077402074 +dd 2358086913,2358086913 +dd 1691735473,1691735473 +dd 3528347292,3528347292 +dd 3769215305,3769215305 +dd 3027004632,3027004632 +dd 4199962284,4199962284 +dd 133494003,133494003 +dd 636152527,636152527 +dd 2942657994,2942657994 +dd 2390391540,2390391540 +dd 3920539207,3920539207 +dd 403179536,403179536 +dd 3585784431,3585784431 +dd 2289596656,2289596656 +dd 1864705354,1864705354 +dd 1915629148,1915629148 +dd 605822008,605822008 +dd 4054230615,4054230615 +dd 3350508659,3350508659 +dd 1371981463,1371981463 +dd 602466507,602466507 +dd 2094914977,2094914977 +dd 2624877800,2624877800 +dd 555687742,555687742 +dd 3712699286,3712699286 +dd 3703422305,3703422305 +dd 2257292045,2257292045 +dd 2240449039,2240449039 +dd 2423288032,2423288032 +dd 1111375484,1111375484 +dd 3300242801,3300242801 +dd 2858837708,2858837708 +dd 3628615824,3628615824 +dd 84083462,84083462 +dd 32962295,32962295 +dd 302911004,302911004 +dd 2741068226,2741068226 +dd 1597322602,1597322602 +dd 4183250862,4183250862 +dd 3501832553,3501832553 +dd 2441512471,2441512471 +dd 1489093017,1489093017 +dd 656219450,656219450 +dd 3114180135,3114180135 +dd 954327513,954327513 +dd 335083755,335083755 +dd 3013122091,3013122091 +dd 856756514,856756514 +dd 3144247762,3144247762 +dd 1893325225,1893325225 +dd 2307821063,2307821063 +dd 2811532339,2811532339 +dd 3063651117,3063651117 +dd 572399164,572399164 +dd 2458355477,2458355477 +dd 552200649,552200649 +dd 1238290055,1238290055 +dd 4283782570,4283782570 +dd 2015897680,2015897680 +dd 2061492133,2061492133 +dd 2408352771,2408352771 +dd 4171342169,4171342169 +dd 2156497161,2156497161 +dd 386731290,386731290 +dd 3669999461,3669999461 +dd 837215959,837215959 +dd 3326231172,3326231172 +dd 3093850320,3093850320 +dd 3275833730,3275833730 +dd 2962856233,2962856233 +dd 1999449434,1999449434 +dd 286199582,286199582 +dd 3417354363,3417354363 +dd 4233385128,4233385128 +dd 3602627437,3602627437 +dd 974525996,974525996 +db 99,124,119,123,242,107,111,197 +db 48,1,103,43,254,215,171,118 +db 202,130,201,125,250,89,71,240 +db 173,212,162,175,156,164,114,192 +db 183,253,147,38,54,63,247,204 +db 52,165,229,241,113,216,49,21 +db 4,199,35,195,24,150,5,154 +db 7,18,128,226,235,39,178,117 +db 9,131,44,26,27,110,90,160 +db 82,59,214,179,41,227,47,132 +db 83,209,0,237,32,252,177,91 +db 106,203,190,57,74,76,88,207 +db 208,239,170,251,67,77,51,133 +db 69,249,2,127,80,60,159,168 +db 81,163,64,143,146,157,56,245 +db 188,182,218,33,16,255,243,210 +db 205,12,19,236,95,151,68,23 +db 196,167,126,61,100,93,25,115 +db 96,129,79,220,34,42,144,136 +db 70,238,184,20,222,94,11,219 +db 224,50,58,10,73,6,36,92 +db 194,211,172,98,145,149,228,121 +db 231,200,55,109,141,213,78,169 +db 108,86,244,234,101,122,174,8 +db 186,120,37,46,28,166,180,198 +db 232,221,116,31,75,189,139,138 +db 112,62,181,102,72,3,246,14 +db 97,53,87,185,134,193,29,158 +db 225,248,152,17,105,217,142,148 +db 155,30,135,233,206,85,40,223 +db 140,161,137,13,191,230,66,104 +db 65,153,45,15,176,84,187,22 +db 99,124,119,123,242,107,111,197 +db 48,1,103,43,254,215,171,118 +db 202,130,201,125,250,89,71,240 +db 173,212,162,175,156,164,114,192 +db 183,253,147,38,54,63,247,204 +db 52,165,229,241,113,216,49,21 +db 4,199,35,195,24,150,5,154 +db 7,18,128,226,235,39,178,117 +db 9,131,44,26,27,110,90,160 +db 82,59,214,179,41,227,47,132 +db 83,209,0,237,32,252,177,91 +db 106,203,190,57,74,76,88,207 +db 208,239,170,251,67,77,51,133 +db 69,249,2,127,80,60,159,168 +db 81,163,64,143,146,157,56,245 +db 188,182,218,33,16,255,243,210 +db 205,12,19,236,95,151,68,23 +db 196,167,126,61,100,93,25,115 +db 96,129,79,220,34,42,144,136 +db 70,238,184,20,222,94,11,219 +db 224,50,58,10,73,6,36,92 +db 194,211,172,98,145,149,228,121 +db 231,200,55,109,141,213,78,169 +db 108,86,244,234,101,122,174,8 +db 186,120,37,46,28,166,180,198 +db 232,221,116,31,75,189,139,138 +db 112,62,181,102,72,3,246,14 +db 97,53,87,185,134,193,29,158 +db 225,248,152,17,105,217,142,148 +db 155,30,135,233,206,85,40,223 +db 140,161,137,13,191,230,66,104 +db 65,153,45,15,176,84,187,22 +db 99,124,119,123,242,107,111,197 +db 48,1,103,43,254,215,171,118 +db 202,130,201,125,250,89,71,240 +db 173,212,162,175,156,164,114,192 +db 183,253,147,38,54,63,247,204 +db 52,165,229,241,113,216,49,21 +db 4,199,35,195,24,150,5,154 +db 7,18,128,226,235,39,178,117 +db 9,131,44,26,27,110,90,160 +db 82,59,214,179,41,227,47,132 +db 83,209,0,237,32,252,177,91 +db 106,203,190,57,74,76,88,207 +db 208,239,170,251,67,77,51,133 +db 69,249,2,127,80,60,159,168 +db 81,163,64,143,146,157,56,245 +db 188,182,218,33,16,255,243,210 +db 205,12,19,236,95,151,68,23 +db 196,167,126,61,100,93,25,115 +db 96,129,79,220,34,42,144,136 +db 70,238,184,20,222,94,11,219 +db 224,50,58,10,73,6,36,92 +db 194,211,172,98,145,149,228,121 +db 231,200,55,109,141,213,78,169 +db 108,86,244,234,101,122,174,8 +db 186,120,37,46,28,166,180,198 +db 232,221,116,31,75,189,139,138 +db 112,62,181,102,72,3,246,14 +db 97,53,87,185,134,193,29,158 +db 225,248,152,17,105,217,142,148 +db 155,30,135,233,206,85,40,223 +db 140,161,137,13,191,230,66,104 +db 65,153,45,15,176,84,187,22 +db 99,124,119,123,242,107,111,197 +db 48,1,103,43,254,215,171,118 +db 202,130,201,125,250,89,71,240 +db 173,212,162,175,156,164,114,192 +db 183,253,147,38,54,63,247,204 +db 52,165,229,241,113,216,49,21 +db 4,199,35,195,24,150,5,154 +db 7,18,128,226,235,39,178,117 +db 9,131,44,26,27,110,90,160 +db 82,59,214,179,41,227,47,132 +db 83,209,0,237,32,252,177,91 +db 106,203,190,57,74,76,88,207 +db 208,239,170,251,67,77,51,133 +db 69,249,2,127,80,60,159,168 +db 81,163,64,143,146,157,56,245 +db 188,182,218,33,16,255,243,210 +db 205,12,19,236,95,151,68,23 +db 196,167,126,61,100,93,25,115 +db 96,129,79,220,34,42,144,136 +db 70,238,184,20,222,94,11,219 +db 224,50,58,10,73,6,36,92 +db 194,211,172,98,145,149,228,121 +db 231,200,55,109,141,213,78,169 +db 108,86,244,234,101,122,174,8 +db 186,120,37,46,28,166,180,198 +db 232,221,116,31,75,189,139,138 +db 112,62,181,102,72,3,246,14 +db 97,53,87,185,134,193,29,158 +db 225,248,152,17,105,217,142,148 +db 155,30,135,233,206,85,40,223 +db 140,161,137,13,191,230,66,104 +db 65,153,45,15,176,84,187,22 +dd 1,2,4,8 +dd 16,32,64,128 +dd 27,54,0,0 +dd 0,0,0,0 +global _asm_AES_encrypt +align 16 +_asm_AES_encrypt: +L$_asm_AES_encrypt_begin: + push ebp + push ebx + push esi + push edi + mov esi,DWORD [20+esp] + mov edi,DWORD [28+esp] + mov eax,esp + sub esp,36 + and esp,-64 + lea ebx,[edi-127] + sub ebx,esp + neg ebx + and ebx,960 + sub esp,ebx + add esp,4 + mov DWORD [28+esp],eax + call L$004pic_point +L$004pic_point: + pop ebp + lea eax,[_OPENSSL_ia32cap_P] + lea ebp,[(L$AES_Te-L$004pic_point)+ebp] + lea ebx,[764+esp] + sub ebx,ebp + and ebx,768 + lea ebp,[2176+ebx*1+ebp] + bt DWORD [eax],25 + jnc NEAR L$005x86 + movq mm0,[esi] + movq mm4,[8+esi] + call __sse_AES_encrypt_compact + mov esp,DWORD [28+esp] + mov esi,DWORD [24+esp] + movq [esi],mm0 + movq [8+esi],mm4 + emms + pop edi + pop esi + pop ebx + pop ebp + ret +align 16 +L$005x86: + mov DWORD [24+esp],ebp + mov eax,DWORD [esi] + mov ebx,DWORD [4+esi] + mov ecx,DWORD [8+esi] + mov edx,DWORD [12+esi] + call __x86_AES_encrypt_compact + mov esp,DWORD [28+esp] + mov esi,DWORD [24+esp] + mov DWORD [esi],eax + mov DWORD [4+esi],ebx + mov DWORD [8+esi],ecx + mov DWORD [12+esi],edx + pop edi + pop esi + pop ebx + pop ebp + ret +align 16 +__x86_AES_decrypt_compact: + mov DWORD [20+esp],edi + xor eax,DWORD [edi] + xor ebx,DWORD [4+edi] + xor ecx,DWORD [8+edi] + xor edx,DWORD [12+edi] + mov esi,DWORD [240+edi] + lea esi,[esi*1+esi-2] + lea esi,[esi*8+edi] + mov DWORD [24+esp],esi + mov edi,DWORD [ebp-128] + mov esi,DWORD [ebp-96] + mov edi,DWORD [ebp-64] + mov esi,DWORD [ebp-32] + mov edi,DWORD [ebp] + mov esi,DWORD [32+ebp] + mov edi,DWORD [64+ebp] + mov esi,DWORD [96+ebp] +align 16 +L$006loop: + mov esi,eax + and esi,255 + movzx esi,BYTE [esi*1+ebp-128] + movzx edi,dh + movzx edi,BYTE [edi*1+ebp-128] + shl edi,8 + xor esi,edi + mov edi,ecx + shr edi,16 + and edi,255 + movzx edi,BYTE [edi*1+ebp-128] + shl edi,16 + xor esi,edi + mov edi,ebx + shr edi,24 + movzx edi,BYTE [edi*1+ebp-128] + shl edi,24 + xor esi,edi + mov DWORD [4+esp],esi + mov esi,ebx + and esi,255 + movzx esi,BYTE [esi*1+ebp-128] + movzx edi,ah + movzx edi,BYTE [edi*1+ebp-128] + shl edi,8 + xor esi,edi + mov edi,edx + shr edi,16 + and edi,255 + movzx edi,BYTE [edi*1+ebp-128] + shl edi,16 + xor esi,edi + mov edi,ecx + shr edi,24 + movzx edi,BYTE [edi*1+ebp-128] + shl edi,24 + xor esi,edi + mov DWORD [8+esp],esi + mov esi,ecx + and esi,255 + movzx esi,BYTE [esi*1+ebp-128] + movzx edi,bh + movzx edi,BYTE [edi*1+ebp-128] + shl edi,8 + xor esi,edi + mov edi,eax + shr edi,16 + and edi,255 + movzx edi,BYTE [edi*1+ebp-128] + shl edi,16 + xor esi,edi + mov edi,edx + shr edi,24 + movzx edi,BYTE [edi*1+ebp-128] + shl edi,24 + xor esi,edi + and edx,255 + movzx edx,BYTE [edx*1+ebp-128] + movzx ecx,ch + movzx ecx,BYTE [ecx*1+ebp-128] + shl ecx,8 + xor edx,ecx + mov ecx,esi + shr ebx,16 + and ebx,255 + movzx ebx,BYTE [ebx*1+ebp-128] + shl ebx,16 + xor edx,ebx + shr eax,24 + movzx eax,BYTE [eax*1+ebp-128] + shl eax,24 + xor edx,eax + mov edi,2155905152 + and edi,ecx + mov esi,edi + shr edi,7 + lea eax,[ecx*1+ecx] + sub esi,edi + and eax,4278124286 + and esi,454761243 + xor eax,esi + mov edi,2155905152 + and edi,eax + mov esi,edi + shr edi,7 + lea ebx,[eax*1+eax] + sub esi,edi + and ebx,4278124286 + and esi,454761243 + xor eax,ecx + xor ebx,esi + mov edi,2155905152 + and edi,ebx + mov esi,edi + shr edi,7 + lea ebp,[ebx*1+ebx] + sub esi,edi + and ebp,4278124286 + and esi,454761243 + xor ebx,ecx + rol ecx,8 + xor ebp,esi + xor ecx,eax + xor eax,ebp + xor ecx,ebx + xor ebx,ebp + rol eax,24 + xor ecx,ebp + rol ebx,16 + xor ecx,eax + rol ebp,8 + xor ecx,ebx + mov eax,DWORD [4+esp] + xor ecx,ebp + mov DWORD [12+esp],ecx + mov edi,2155905152 + and edi,edx + mov esi,edi + shr edi,7 + lea ebx,[edx*1+edx] + sub esi,edi + and ebx,4278124286 + and esi,454761243 + xor ebx,esi + mov edi,2155905152 + and edi,ebx + mov esi,edi + shr edi,7 + lea ecx,[ebx*1+ebx] + sub esi,edi + and ecx,4278124286 + and esi,454761243 + xor ebx,edx + xor ecx,esi + mov edi,2155905152 + and edi,ecx + mov esi,edi + shr edi,7 + lea ebp,[ecx*1+ecx] + sub esi,edi + and ebp,4278124286 + and esi,454761243 + xor ecx,edx + rol edx,8 + xor ebp,esi + xor edx,ebx + xor ebx,ebp + xor edx,ecx + xor ecx,ebp + rol ebx,24 + xor edx,ebp + rol ecx,16 + xor edx,ebx + rol ebp,8 + xor edx,ecx + mov ebx,DWORD [8+esp] + xor edx,ebp + mov DWORD [16+esp],edx + mov edi,2155905152 + and edi,eax + mov esi,edi + shr edi,7 + lea ecx,[eax*1+eax] + sub esi,edi + and ecx,4278124286 + and esi,454761243 + xor ecx,esi + mov edi,2155905152 + and edi,ecx + mov esi,edi + shr edi,7 + lea edx,[ecx*1+ecx] + sub esi,edi + and edx,4278124286 + and esi,454761243 + xor ecx,eax + xor edx,esi + mov edi,2155905152 + and edi,edx + mov esi,edi + shr edi,7 + lea ebp,[edx*1+edx] + sub esi,edi + and ebp,4278124286 + and esi,454761243 + xor edx,eax + rol eax,8 + xor ebp,esi + xor eax,ecx + xor ecx,ebp + xor eax,edx + xor edx,ebp + rol ecx,24 + xor eax,ebp + rol edx,16 + xor eax,ecx + rol ebp,8 + xor eax,edx + xor eax,ebp + mov edi,2155905152 + and edi,ebx + mov esi,edi + shr edi,7 + lea ecx,[ebx*1+ebx] + sub esi,edi + and ecx,4278124286 + and esi,454761243 + xor ecx,esi + mov edi,2155905152 + and edi,ecx + mov esi,edi + shr edi,7 + lea edx,[ecx*1+ecx] + sub esi,edi + and edx,4278124286 + and esi,454761243 + xor ecx,ebx + xor edx,esi + mov edi,2155905152 + and edi,edx + mov esi,edi + shr edi,7 + lea ebp,[edx*1+edx] + sub esi,edi + and ebp,4278124286 + and esi,454761243 + xor edx,ebx + rol ebx,8 + xor ebp,esi + xor ebx,ecx + xor ecx,ebp + xor ebx,edx + xor edx,ebp + rol ecx,24 + xor ebx,ebp + rol edx,16 + xor ebx,ecx + rol ebp,8 + xor ebx,edx + mov ecx,DWORD [12+esp] + xor ebx,ebp + mov edx,DWORD [16+esp] + mov edi,DWORD [20+esp] + mov ebp,DWORD [28+esp] + add edi,16 + xor eax,DWORD [edi] + xor ebx,DWORD [4+edi] + xor ecx,DWORD [8+edi] + xor edx,DWORD [12+edi] + cmp edi,DWORD [24+esp] + mov DWORD [20+esp],edi + jb NEAR L$006loop + mov esi,eax + and esi,255 + movzx esi,BYTE [esi*1+ebp-128] + movzx edi,dh + movzx edi,BYTE [edi*1+ebp-128] + shl edi,8 + xor esi,edi + mov edi,ecx + shr edi,16 + and edi,255 + movzx edi,BYTE [edi*1+ebp-128] + shl edi,16 + xor esi,edi + mov edi,ebx + shr edi,24 + movzx edi,BYTE [edi*1+ebp-128] + shl edi,24 + xor esi,edi + mov DWORD [4+esp],esi + mov esi,ebx + and esi,255 + movzx esi,BYTE [esi*1+ebp-128] + movzx edi,ah + movzx edi,BYTE [edi*1+ebp-128] + shl edi,8 + xor esi,edi + mov edi,edx + shr edi,16 + and edi,255 + movzx edi,BYTE [edi*1+ebp-128] + shl edi,16 + xor esi,edi + mov edi,ecx + shr edi,24 + movzx edi,BYTE [edi*1+ebp-128] + shl edi,24 + xor esi,edi + mov DWORD [8+esp],esi + mov esi,ecx + and esi,255 + movzx esi,BYTE [esi*1+ebp-128] + movzx edi,bh + movzx edi,BYTE [edi*1+ebp-128] + shl edi,8 + xor esi,edi + mov edi,eax + shr edi,16 + and edi,255 + movzx edi,BYTE [edi*1+ebp-128] + shl edi,16 + xor esi,edi + mov edi,edx + shr edi,24 + movzx edi,BYTE [edi*1+ebp-128] + shl edi,24 + xor esi,edi + mov edi,DWORD [20+esp] + and edx,255 + movzx edx,BYTE [edx*1+ebp-128] + movzx ecx,ch + movzx ecx,BYTE [ecx*1+ebp-128] + shl ecx,8 + xor edx,ecx + mov ecx,esi + shr ebx,16 + and ebx,255 + movzx ebx,BYTE [ebx*1+ebp-128] + shl ebx,16 + xor edx,ebx + mov ebx,DWORD [8+esp] + shr eax,24 + movzx eax,BYTE [eax*1+ebp-128] + shl eax,24 + xor edx,eax + mov eax,DWORD [4+esp] + xor eax,DWORD [16+edi] + xor ebx,DWORD [20+edi] + xor ecx,DWORD [24+edi] + xor edx,DWORD [28+edi] + ret +align 16 +__sse_AES_decrypt_compact: + pxor mm0,[edi] + pxor mm4,[8+edi] + mov esi,DWORD [240+edi] + lea esi,[esi*1+esi-2] + lea esi,[esi*8+edi] + mov DWORD [24+esp],esi + mov eax,454761243 + mov DWORD [8+esp],eax + mov DWORD [12+esp],eax + mov eax,DWORD [ebp-128] + mov ebx,DWORD [ebp-96] + mov ecx,DWORD [ebp-64] + mov edx,DWORD [ebp-32] + mov eax,DWORD [ebp] + mov ebx,DWORD [32+ebp] + mov ecx,DWORD [64+ebp] + mov edx,DWORD [96+ebp] +align 16 +L$007loop: + pshufw mm1,mm0,12 + pshufw mm5,mm4,9 + movd eax,mm1 + movd ebx,mm5 + mov DWORD [20+esp],edi + movzx esi,al + movzx edx,ah + pshufw mm2,mm0,6 + movzx ecx,BYTE [esi*1+ebp-128] + movzx edi,bl + movzx edx,BYTE [edx*1+ebp-128] + shr eax,16 + shl edx,8 + movzx esi,BYTE [edi*1+ebp-128] + movzx edi,bh + shl esi,16 + pshufw mm6,mm4,3 + or ecx,esi + movzx esi,BYTE [edi*1+ebp-128] + movzx edi,ah + shl esi,24 + shr ebx,16 + or edx,esi + movzx esi,BYTE [edi*1+ebp-128] + movzx edi,bh + shl esi,24 + or ecx,esi + movzx esi,BYTE [edi*1+ebp-128] + movzx edi,al + shl esi,8 + movd eax,mm2 + or ecx,esi + movzx esi,BYTE [edi*1+ebp-128] + movzx edi,bl + shl esi,16 + movd ebx,mm6 + movd mm0,ecx + movzx ecx,BYTE [edi*1+ebp-128] + movzx edi,al + or ecx,esi + movzx esi,BYTE [edi*1+ebp-128] + movzx edi,bl + or edx,esi + movzx esi,BYTE [edi*1+ebp-128] + movzx edi,ah + shl esi,16 + shr eax,16 + or edx,esi + movzx esi,BYTE [edi*1+ebp-128] + movzx edi,bh + shr ebx,16 + shl esi,8 + movd mm1,edx + movzx edx,BYTE [edi*1+ebp-128] + movzx edi,bh + shl edx,24 + and ebx,255 + or edx,esi + punpckldq mm0,mm1 + movzx esi,BYTE [edi*1+ebp-128] + movzx edi,al + shl esi,8 + movzx eax,ah + movzx ebx,BYTE [ebx*1+ebp-128] + or ecx,esi + movzx esi,BYTE [edi*1+ebp-128] + or edx,ebx + shl esi,16 + movzx eax,BYTE [eax*1+ebp-128] + or edx,esi + shl eax,24 + or ecx,eax + mov edi,DWORD [20+esp] + movd mm4,edx + movd mm5,ecx + punpckldq mm4,mm5 + add edi,16 + cmp edi,DWORD [24+esp] + ja NEAR L$008out + movq mm3,mm0 + movq mm7,mm4 + pshufw mm2,mm0,228 + pshufw mm6,mm4,228 + movq mm1,mm0 + movq mm5,mm4 + pshufw mm0,mm0,177 + pshufw mm4,mm4,177 + pslld mm2,8 + pslld mm6,8 + psrld mm3,8 + psrld mm7,8 + pxor mm0,mm2 + pxor mm4,mm6 + pxor mm0,mm3 + pxor mm4,mm7 + pslld mm2,16 + pslld mm6,16 + psrld mm3,16 + psrld mm7,16 + pxor mm0,mm2 + pxor mm4,mm6 + pxor mm0,mm3 + pxor mm4,mm7 + movq mm3,[8+esp] + pxor mm2,mm2 + pxor mm6,mm6 + pcmpgtb mm2,mm1 + pcmpgtb mm6,mm5 + pand mm2,mm3 + pand mm6,mm3 + paddb mm1,mm1 + paddb mm5,mm5 + pxor mm1,mm2 + pxor mm5,mm6 + movq mm3,mm1 + movq mm7,mm5 + movq mm2,mm1 + movq mm6,mm5 + pxor mm0,mm1 + pxor mm4,mm5 + pslld mm3,24 + pslld mm7,24 + psrld mm2,8 + psrld mm6,8 + pxor mm0,mm3 + pxor mm4,mm7 + pxor mm0,mm2 + pxor mm4,mm6 + movq mm2,[8+esp] + pxor mm3,mm3 + pxor mm7,mm7 + pcmpgtb mm3,mm1 + pcmpgtb mm7,mm5 + pand mm3,mm2 + pand mm7,mm2 + paddb mm1,mm1 + paddb mm5,mm5 + pxor mm1,mm3 + pxor mm5,mm7 + pshufw mm3,mm1,177 + pshufw mm7,mm5,177 + pxor mm0,mm1 + pxor mm4,mm5 + pxor mm0,mm3 + pxor mm4,mm7 + pxor mm3,mm3 + pxor mm7,mm7 + pcmpgtb mm3,mm1 + pcmpgtb mm7,mm5 + pand mm3,mm2 + pand mm7,mm2 + paddb mm1,mm1 + paddb mm5,mm5 + pxor mm1,mm3 + pxor mm5,mm7 + pxor mm0,mm1 + pxor mm4,mm5 + movq mm3,mm1 + movq mm7,mm5 + pshufw mm2,mm1,177 + pshufw mm6,mm5,177 + pxor mm0,mm2 + pxor mm4,mm6 + pslld mm1,8 + pslld mm5,8 + psrld mm3,8 + psrld mm7,8 + movq mm2,[edi] + movq mm6,[8+edi] + pxor mm0,mm1 + pxor mm4,mm5 + pxor mm0,mm3 + pxor mm4,mm7 + mov eax,DWORD [ebp-128] + pslld mm1,16 + pslld mm5,16 + mov ebx,DWORD [ebp-64] + psrld mm3,16 + psrld mm7,16 + mov ecx,DWORD [ebp] + pxor mm0,mm1 + pxor mm4,mm5 + mov edx,DWORD [64+ebp] + pxor mm0,mm3 + pxor mm4,mm7 + pxor mm0,mm2 + pxor mm4,mm6 + jmp NEAR L$007loop +align 16 +L$008out: + pxor mm0,[edi] + pxor mm4,[8+edi] + ret +align 16 +__x86_AES_decrypt: + mov DWORD [20+esp],edi + xor eax,DWORD [edi] + xor ebx,DWORD [4+edi] + xor ecx,DWORD [8+edi] + xor edx,DWORD [12+edi] + mov esi,DWORD [240+edi] + lea esi,[esi*1+esi-2] + lea esi,[esi*8+edi] + mov DWORD [24+esp],esi +align 16 +L$009loop: + mov esi,eax + and esi,255 + mov esi,DWORD [esi*8+ebp] + movzx edi,dh + xor esi,DWORD [3+edi*8+ebp] + mov edi,ecx + shr edi,16 + and edi,255 + xor esi,DWORD [2+edi*8+ebp] + mov edi,ebx + shr edi,24 + xor esi,DWORD [1+edi*8+ebp] + mov DWORD [4+esp],esi + mov esi,ebx + and esi,255 + mov esi,DWORD [esi*8+ebp] + movzx edi,ah + xor esi,DWORD [3+edi*8+ebp] + mov edi,edx + shr edi,16 + and edi,255 + xor esi,DWORD [2+edi*8+ebp] + mov edi,ecx + shr edi,24 + xor esi,DWORD [1+edi*8+ebp] + mov DWORD [8+esp],esi + mov esi,ecx + and esi,255 + mov esi,DWORD [esi*8+ebp] + movzx edi,bh + xor esi,DWORD [3+edi*8+ebp] + mov edi,eax + shr edi,16 + and edi,255 + xor esi,DWORD [2+edi*8+ebp] + mov edi,edx + shr edi,24 + xor esi,DWORD [1+edi*8+ebp] + mov edi,DWORD [20+esp] + and edx,255 + mov edx,DWORD [edx*8+ebp] + movzx ecx,ch + xor edx,DWORD [3+ecx*8+ebp] + mov ecx,esi + shr ebx,16 + and ebx,255 + xor edx,DWORD [2+ebx*8+ebp] + mov ebx,DWORD [8+esp] + shr eax,24 + xor edx,DWORD [1+eax*8+ebp] + mov eax,DWORD [4+esp] + add edi,16 + xor eax,DWORD [edi] + xor ebx,DWORD [4+edi] + xor ecx,DWORD [8+edi] + xor edx,DWORD [12+edi] + cmp edi,DWORD [24+esp] + mov DWORD [20+esp],edi + jb NEAR L$009loop + lea ebp,[2176+ebp] + mov edi,DWORD [ebp-128] + mov esi,DWORD [ebp-96] + mov edi,DWORD [ebp-64] + mov esi,DWORD [ebp-32] + mov edi,DWORD [ebp] + mov esi,DWORD [32+ebp] + mov edi,DWORD [64+ebp] + mov esi,DWORD [96+ebp] + lea ebp,[ebp-128] + mov esi,eax + and esi,255 + movzx esi,BYTE [esi*1+ebp] + movzx edi,dh + movzx edi,BYTE [edi*1+ebp] + shl edi,8 + xor esi,edi + mov edi,ecx + shr edi,16 + and edi,255 + movzx edi,BYTE [edi*1+ebp] + shl edi,16 + xor esi,edi + mov edi,ebx + shr edi,24 + movzx edi,BYTE [edi*1+ebp] + shl edi,24 + xor esi,edi + mov DWORD [4+esp],esi + mov esi,ebx + and esi,255 + movzx esi,BYTE [esi*1+ebp] + movzx edi,ah + movzx edi,BYTE [edi*1+ebp] + shl edi,8 + xor esi,edi + mov edi,edx + shr edi,16 + and edi,255 + movzx edi,BYTE [edi*1+ebp] + shl edi,16 + xor esi,edi + mov edi,ecx + shr edi,24 + movzx edi,BYTE [edi*1+ebp] + shl edi,24 + xor esi,edi + mov DWORD [8+esp],esi + mov esi,ecx + and esi,255 + movzx esi,BYTE [esi*1+ebp] + movzx edi,bh + movzx edi,BYTE [edi*1+ebp] + shl edi,8 + xor esi,edi + mov edi,eax + shr edi,16 + and edi,255 + movzx edi,BYTE [edi*1+ebp] + shl edi,16 + xor esi,edi + mov edi,edx + shr edi,24 + movzx edi,BYTE [edi*1+ebp] + shl edi,24 + xor esi,edi + mov edi,DWORD [20+esp] + and edx,255 + movzx edx,BYTE [edx*1+ebp] + movzx ecx,ch + movzx ecx,BYTE [ecx*1+ebp] + shl ecx,8 + xor edx,ecx + mov ecx,esi + shr ebx,16 + and ebx,255 + movzx ebx,BYTE [ebx*1+ebp] + shl ebx,16 + xor edx,ebx + mov ebx,DWORD [8+esp] + shr eax,24 + movzx eax,BYTE [eax*1+ebp] + shl eax,24 + xor edx,eax + mov eax,DWORD [4+esp] + lea ebp,[ebp-2048] + add edi,16 + xor eax,DWORD [edi] + xor ebx,DWORD [4+edi] + xor ecx,DWORD [8+edi] + xor edx,DWORD [12+edi] + ret +align 64 +L$AES_Td: +dd 1353184337,1353184337 +dd 1399144830,1399144830 +dd 3282310938,3282310938 +dd 2522752826,2522752826 +dd 3412831035,3412831035 +dd 4047871263,4047871263 +dd 2874735276,2874735276 +dd 2466505547,2466505547 +dd 1442459680,1442459680 +dd 4134368941,4134368941 +dd 2440481928,2440481928 +dd 625738485,625738485 +dd 4242007375,4242007375 +dd 3620416197,3620416197 +dd 2151953702,2151953702 +dd 2409849525,2409849525 +dd 1230680542,1230680542 +dd 1729870373,1729870373 +dd 2551114309,2551114309 +dd 3787521629,3787521629 +dd 41234371,41234371 +dd 317738113,317738113 +dd 2744600205,2744600205 +dd 3338261355,3338261355 +dd 3881799427,3881799427 +dd 2510066197,2510066197 +dd 3950669247,3950669247 +dd 3663286933,3663286933 +dd 763608788,763608788 +dd 3542185048,3542185048 +dd 694804553,694804553 +dd 1154009486,1154009486 +dd 1787413109,1787413109 +dd 2021232372,2021232372 +dd 1799248025,1799248025 +dd 3715217703,3715217703 +dd 3058688446,3058688446 +dd 397248752,397248752 +dd 1722556617,1722556617 +dd 3023752829,3023752829 +dd 407560035,407560035 +dd 2184256229,2184256229 +dd 1613975959,1613975959 +dd 1165972322,1165972322 +dd 3765920945,3765920945 +dd 2226023355,2226023355 +dd 480281086,480281086 +dd 2485848313,2485848313 +dd 1483229296,1483229296 +dd 436028815,436028815 +dd 2272059028,2272059028 +dd 3086515026,3086515026 +dd 601060267,601060267 +dd 3791801202,3791801202 +dd 1468997603,1468997603 +dd 715871590,715871590 +dd 120122290,120122290 +dd 63092015,63092015 +dd 2591802758,2591802758 +dd 2768779219,2768779219 +dd 4068943920,4068943920 +dd 2997206819,2997206819 +dd 3127509762,3127509762 +dd 1552029421,1552029421 +dd 723308426,723308426 +dd 2461301159,2461301159 +dd 4042393587,4042393587 +dd 2715969870,2715969870 +dd 3455375973,3455375973 +dd 3586000134,3586000134 +dd 526529745,526529745 +dd 2331944644,2331944644 +dd 2639474228,2639474228 +dd 2689987490,2689987490 +dd 853641733,853641733 +dd 1978398372,1978398372 +dd 971801355,971801355 +dd 2867814464,2867814464 +dd 111112542,111112542 +dd 1360031421,1360031421 +dd 4186579262,4186579262 +dd 1023860118,1023860118 +dd 2919579357,2919579357 +dd 1186850381,1186850381 +dd 3045938321,3045938321 +dd 90031217,90031217 +dd 1876166148,1876166148 +dd 4279586912,4279586912 +dd 620468249,620468249 +dd 2548678102,2548678102 +dd 3426959497,3426959497 +dd 2006899047,2006899047 +dd 3175278768,3175278768 +dd 2290845959,2290845959 +dd 945494503,945494503 +dd 3689859193,3689859193 +dd 1191869601,1191869601 +dd 3910091388,3910091388 +dd 3374220536,3374220536 +dd 0,0 +dd 2206629897,2206629897 +dd 1223502642,1223502642 +dd 2893025566,2893025566 +dd 1316117100,1316117100 +dd 4227796733,4227796733 +dd 1446544655,1446544655 +dd 517320253,517320253 +dd 658058550,658058550 +dd 1691946762,1691946762 +dd 564550760,564550760 +dd 3511966619,3511966619 +dd 976107044,976107044 +dd 2976320012,2976320012 +dd 266819475,266819475 +dd 3533106868,3533106868 +dd 2660342555,2660342555 +dd 1338359936,1338359936 +dd 2720062561,2720062561 +dd 1766553434,1766553434 +dd 370807324,370807324 +dd 179999714,179999714 +dd 3844776128,3844776128 +dd 1138762300,1138762300 +dd 488053522,488053522 +dd 185403662,185403662 +dd 2915535858,2915535858 +dd 3114841645,3114841645 +dd 3366526484,3366526484 +dd 2233069911,2233069911 +dd 1275557295,1275557295 +dd 3151862254,3151862254 +dd 4250959779,4250959779 +dd 2670068215,2670068215 +dd 3170202204,3170202204 +dd 3309004356,3309004356 +dd 880737115,880737115 +dd 1982415755,1982415755 +dd 3703972811,3703972811 +dd 1761406390,1761406390 +dd 1676797112,1676797112 +dd 3403428311,3403428311 +dd 277177154,277177154 +dd 1076008723,1076008723 +dd 538035844,538035844 +dd 2099530373,2099530373 +dd 4164795346,4164795346 +dd 288553390,288553390 +dd 1839278535,1839278535 +dd 1261411869,1261411869 +dd 4080055004,4080055004 +dd 3964831245,3964831245 +dd 3504587127,3504587127 +dd 1813426987,1813426987 +dd 2579067049,2579067049 +dd 4199060497,4199060497 +dd 577038663,577038663 +dd 3297574056,3297574056 +dd 440397984,440397984 +dd 3626794326,3626794326 +dd 4019204898,4019204898 +dd 3343796615,3343796615 +dd 3251714265,3251714265 +dd 4272081548,4272081548 +dd 906744984,906744984 +dd 3481400742,3481400742 +dd 685669029,685669029 +dd 646887386,646887386 +dd 2764025151,2764025151 +dd 3835509292,3835509292 +dd 227702864,227702864 +dd 2613862250,2613862250 +dd 1648787028,1648787028 +dd 3256061430,3256061430 +dd 3904428176,3904428176 +dd 1593260334,1593260334 +dd 4121936770,4121936770 +dd 3196083615,3196083615 +dd 2090061929,2090061929 +dd 2838353263,2838353263 +dd 3004310991,3004310991 +dd 999926984,999926984 +dd 2809993232,2809993232 +dd 1852021992,1852021992 +dd 2075868123,2075868123 +dd 158869197,158869197 +dd 4095236462,4095236462 +dd 28809964,28809964 +dd 2828685187,2828685187 +dd 1701746150,1701746150 +dd 2129067946,2129067946 +dd 147831841,147831841 +dd 3873969647,3873969647 +dd 3650873274,3650873274 +dd 3459673930,3459673930 +dd 3557400554,3557400554 +dd 3598495785,3598495785 +dd 2947720241,2947720241 +dd 824393514,824393514 +dd 815048134,815048134 +dd 3227951669,3227951669 +dd 935087732,935087732 +dd 2798289660,2798289660 +dd 2966458592,2966458592 +dd 366520115,366520115 +dd 1251476721,1251476721 +dd 4158319681,4158319681 +dd 240176511,240176511 +dd 804688151,804688151 +dd 2379631990,2379631990 +dd 1303441219,1303441219 +dd 1414376140,1414376140 +dd 3741619940,3741619940 +dd 3820343710,3820343710 +dd 461924940,461924940 +dd 3089050817,3089050817 +dd 2136040774,2136040774 +dd 82468509,82468509 +dd 1563790337,1563790337 +dd 1937016826,1937016826 +dd 776014843,776014843 +dd 1511876531,1511876531 +dd 1389550482,1389550482 +dd 861278441,861278441 +dd 323475053,323475053 +dd 2355222426,2355222426 +dd 2047648055,2047648055 +dd 2383738969,2383738969 +dd 2302415851,2302415851 +dd 3995576782,3995576782 +dd 902390199,902390199 +dd 3991215329,3991215329 +dd 1018251130,1018251130 +dd 1507840668,1507840668 +dd 1064563285,1064563285 +dd 2043548696,2043548696 +dd 3208103795,3208103795 +dd 3939366739,3939366739 +dd 1537932639,1537932639 +dd 342834655,342834655 +dd 2262516856,2262516856 +dd 2180231114,2180231114 +dd 1053059257,1053059257 +dd 741614648,741614648 +dd 1598071746,1598071746 +dd 1925389590,1925389590 +dd 203809468,203809468 +dd 2336832552,2336832552 +dd 1100287487,1100287487 +dd 1895934009,1895934009 +dd 3736275976,3736275976 +dd 2632234200,2632234200 +dd 2428589668,2428589668 +dd 1636092795,1636092795 +dd 1890988757,1890988757 +dd 1952214088,1952214088 +dd 1113045200,1113045200 +db 82,9,106,213,48,54,165,56 +db 191,64,163,158,129,243,215,251 +db 124,227,57,130,155,47,255,135 +db 52,142,67,68,196,222,233,203 +db 84,123,148,50,166,194,35,61 +db 238,76,149,11,66,250,195,78 +db 8,46,161,102,40,217,36,178 +db 118,91,162,73,109,139,209,37 +db 114,248,246,100,134,104,152,22 +db 212,164,92,204,93,101,182,146 +db 108,112,72,80,253,237,185,218 +db 94,21,70,87,167,141,157,132 +db 144,216,171,0,140,188,211,10 +db 247,228,88,5,184,179,69,6 +db 208,44,30,143,202,63,15,2 +db 193,175,189,3,1,19,138,107 +db 58,145,17,65,79,103,220,234 +db 151,242,207,206,240,180,230,115 +db 150,172,116,34,231,173,53,133 +db 226,249,55,232,28,117,223,110 +db 71,241,26,113,29,41,197,137 +db 111,183,98,14,170,24,190,27 +db 252,86,62,75,198,210,121,32 +db 154,219,192,254,120,205,90,244 +db 31,221,168,51,136,7,199,49 +db 177,18,16,89,39,128,236,95 +db 96,81,127,169,25,181,74,13 +db 45,229,122,159,147,201,156,239 +db 160,224,59,77,174,42,245,176 +db 200,235,187,60,131,83,153,97 +db 23,43,4,126,186,119,214,38 +db 225,105,20,99,85,33,12,125 +db 82,9,106,213,48,54,165,56 +db 191,64,163,158,129,243,215,251 +db 124,227,57,130,155,47,255,135 +db 52,142,67,68,196,222,233,203 +db 84,123,148,50,166,194,35,61 +db 238,76,149,11,66,250,195,78 +db 8,46,161,102,40,217,36,178 +db 118,91,162,73,109,139,209,37 +db 114,248,246,100,134,104,152,22 +db 212,164,92,204,93,101,182,146 +db 108,112,72,80,253,237,185,218 +db 94,21,70,87,167,141,157,132 +db 144,216,171,0,140,188,211,10 +db 247,228,88,5,184,179,69,6 +db 208,44,30,143,202,63,15,2 +db 193,175,189,3,1,19,138,107 +db 58,145,17,65,79,103,220,234 +db 151,242,207,206,240,180,230,115 +db 150,172,116,34,231,173,53,133 +db 226,249,55,232,28,117,223,110 +db 71,241,26,113,29,41,197,137 +db 111,183,98,14,170,24,190,27 +db 252,86,62,75,198,210,121,32 +db 154,219,192,254,120,205,90,244 +db 31,221,168,51,136,7,199,49 +db 177,18,16,89,39,128,236,95 +db 96,81,127,169,25,181,74,13 +db 45,229,122,159,147,201,156,239 +db 160,224,59,77,174,42,245,176 +db 200,235,187,60,131,83,153,97 +db 23,43,4,126,186,119,214,38 +db 225,105,20,99,85,33,12,125 +db 82,9,106,213,48,54,165,56 +db 191,64,163,158,129,243,215,251 +db 124,227,57,130,155,47,255,135 +db 52,142,67,68,196,222,233,203 +db 84,123,148,50,166,194,35,61 +db 238,76,149,11,66,250,195,78 +db 8,46,161,102,40,217,36,178 +db 118,91,162,73,109,139,209,37 +db 114,248,246,100,134,104,152,22 +db 212,164,92,204,93,101,182,146 +db 108,112,72,80,253,237,185,218 +db 94,21,70,87,167,141,157,132 +db 144,216,171,0,140,188,211,10 +db 247,228,88,5,184,179,69,6 +db 208,44,30,143,202,63,15,2 +db 193,175,189,3,1,19,138,107 +db 58,145,17,65,79,103,220,234 +db 151,242,207,206,240,180,230,115 +db 150,172,116,34,231,173,53,133 +db 226,249,55,232,28,117,223,110 +db 71,241,26,113,29,41,197,137 +db 111,183,98,14,170,24,190,27 +db 252,86,62,75,198,210,121,32 +db 154,219,192,254,120,205,90,244 +db 31,221,168,51,136,7,199,49 +db 177,18,16,89,39,128,236,95 +db 96,81,127,169,25,181,74,13 +db 45,229,122,159,147,201,156,239 +db 160,224,59,77,174,42,245,176 +db 200,235,187,60,131,83,153,97 +db 23,43,4,126,186,119,214,38 +db 225,105,20,99,85,33,12,125 +db 82,9,106,213,48,54,165,56 +db 191,64,163,158,129,243,215,251 +db 124,227,57,130,155,47,255,135 +db 52,142,67,68,196,222,233,203 +db 84,123,148,50,166,194,35,61 +db 238,76,149,11,66,250,195,78 +db 8,46,161,102,40,217,36,178 +db 118,91,162,73,109,139,209,37 +db 114,248,246,100,134,104,152,22 +db 212,164,92,204,93,101,182,146 +db 108,112,72,80,253,237,185,218 +db 94,21,70,87,167,141,157,132 +db 144,216,171,0,140,188,211,10 +db 247,228,88,5,184,179,69,6 +db 208,44,30,143,202,63,15,2 +db 193,175,189,3,1,19,138,107 +db 58,145,17,65,79,103,220,234 +db 151,242,207,206,240,180,230,115 +db 150,172,116,34,231,173,53,133 +db 226,249,55,232,28,117,223,110 +db 71,241,26,113,29,41,197,137 +db 111,183,98,14,170,24,190,27 +db 252,86,62,75,198,210,121,32 +db 154,219,192,254,120,205,90,244 +db 31,221,168,51,136,7,199,49 +db 177,18,16,89,39,128,236,95 +db 96,81,127,169,25,181,74,13 +db 45,229,122,159,147,201,156,239 +db 160,224,59,77,174,42,245,176 +db 200,235,187,60,131,83,153,97 +db 23,43,4,126,186,119,214,38 +db 225,105,20,99,85,33,12,125 +global _asm_AES_decrypt +align 16 +_asm_AES_decrypt: +L$_asm_AES_decrypt_begin: + push ebp + push ebx + push esi + push edi + mov esi,DWORD [20+esp] + mov edi,DWORD [28+esp] + mov eax,esp + sub esp,36 + and esp,-64 + lea ebx,[edi-127] + sub ebx,esp + neg ebx + and ebx,960 + sub esp,ebx + add esp,4 + mov DWORD [28+esp],eax + call L$010pic_point +L$010pic_point: + pop ebp + lea eax,[_OPENSSL_ia32cap_P] + lea ebp,[(L$AES_Td-L$010pic_point)+ebp] + lea ebx,[764+esp] + sub ebx,ebp + and ebx,768 + lea ebp,[2176+ebx*1+ebp] + bt DWORD [eax],25 + jnc NEAR L$011x86 + movq mm0,[esi] + movq mm4,[8+esi] + call __sse_AES_decrypt_compact + mov esp,DWORD [28+esp] + mov esi,DWORD [24+esp] + movq [esi],mm0 + movq [8+esi],mm4 + emms + pop edi + pop esi + pop ebx + pop ebp + ret +align 16 +L$011x86: + mov DWORD [24+esp],ebp + mov eax,DWORD [esi] + mov ebx,DWORD [4+esi] + mov ecx,DWORD [8+esi] + mov edx,DWORD [12+esi] + call __x86_AES_decrypt_compact + mov esp,DWORD [28+esp] + mov esi,DWORD [24+esp] + mov DWORD [esi],eax + mov DWORD [4+esi],ebx + mov DWORD [8+esi],ecx + mov DWORD [12+esi],edx + pop edi + pop esi + pop ebx + pop ebp + ret +global _asm_AES_cbc_encrypt +align 16 +_asm_AES_cbc_encrypt: +L$_asm_AES_cbc_encrypt_begin: + push ebp + push ebx + push esi + push edi + mov ecx,DWORD [28+esp] + cmp ecx,0 + je NEAR L$012drop_out + call L$013pic_point +L$013pic_point: + pop ebp + lea eax,[_OPENSSL_ia32cap_P] + cmp DWORD [40+esp],0 + lea ebp,[(L$AES_Te-L$013pic_point)+ebp] + jne NEAR L$014picked_te + lea ebp,[(L$AES_Td-L$AES_Te)+ebp] +L$014picked_te: + pushfd + cld + cmp ecx,512 + jb NEAR L$015slow_way + test ecx,15 + jnz NEAR L$015slow_way + bt DWORD [eax],28 + jc NEAR L$015slow_way + lea esi,[esp-324] + and esi,-64 + mov eax,ebp + lea ebx,[2304+ebp] + mov edx,esi + and eax,4095 + and ebx,4095 + and edx,4095 + cmp edx,ebx + jb NEAR L$016tbl_break_out + sub edx,ebx + sub esi,edx + jmp NEAR L$017tbl_ok +align 4 +L$016tbl_break_out: + sub edx,eax + and edx,4095 + add edx,384 + sub esi,edx +align 4 +L$017tbl_ok: + lea edx,[24+esp] + xchg esp,esi + add esp,4 + mov DWORD [24+esp],ebp + mov DWORD [28+esp],esi + mov eax,DWORD [edx] + mov ebx,DWORD [4+edx] + mov edi,DWORD [12+edx] + mov esi,DWORD [16+edx] + mov edx,DWORD [20+edx] + mov DWORD [32+esp],eax + mov DWORD [36+esp],ebx + mov DWORD [40+esp],ecx + mov DWORD [44+esp],edi + mov DWORD [48+esp],esi + mov DWORD [316+esp],0 + mov ebx,edi + mov ecx,61 + sub ebx,ebp + mov esi,edi + and ebx,4095 + lea edi,[76+esp] + cmp ebx,2304 + jb NEAR L$018do_copy + cmp ebx,3852 + jb NEAR L$019skip_copy +align 4 +L$018do_copy: + mov DWORD [44+esp],edi +dd 2784229001 +L$019skip_copy: + mov edi,16 +align 4 +L$020prefetch_tbl: + mov eax,DWORD [ebp] + mov ebx,DWORD [32+ebp] + mov ecx,DWORD [64+ebp] + mov esi,DWORD [96+ebp] + lea ebp,[128+ebp] + sub edi,1 + jnz NEAR L$020prefetch_tbl + sub ebp,2048 + mov esi,DWORD [32+esp] + mov edi,DWORD [48+esp] + cmp edx,0 + je NEAR L$021fast_decrypt + mov eax,DWORD [edi] + mov ebx,DWORD [4+edi] +align 16 +L$022fast_enc_loop: + mov ecx,DWORD [8+edi] + mov edx,DWORD [12+edi] + xor eax,DWORD [esi] + xor ebx,DWORD [4+esi] + xor ecx,DWORD [8+esi] + xor edx,DWORD [12+esi] + mov edi,DWORD [44+esp] + call __x86_AES_encrypt + mov esi,DWORD [32+esp] + mov edi,DWORD [36+esp] + mov DWORD [edi],eax + mov DWORD [4+edi],ebx + mov DWORD [8+edi],ecx + mov DWORD [12+edi],edx + lea esi,[16+esi] + mov ecx,DWORD [40+esp] + mov DWORD [32+esp],esi + lea edx,[16+edi] + mov DWORD [36+esp],edx + sub ecx,16 + mov DWORD [40+esp],ecx + jnz NEAR L$022fast_enc_loop + mov esi,DWORD [48+esp] + mov ecx,DWORD [8+edi] + mov edx,DWORD [12+edi] + mov DWORD [esi],eax + mov DWORD [4+esi],ebx + mov DWORD [8+esi],ecx + mov DWORD [12+esi],edx + cmp DWORD [316+esp],0 + mov edi,DWORD [44+esp] + je NEAR L$023skip_ezero + mov ecx,60 + xor eax,eax +align 4 +dd 2884892297 +L$023skip_ezero: + mov esp,DWORD [28+esp] + popfd +L$012drop_out: + pop edi + pop esi + pop ebx + pop ebp + ret + pushfd +align 16 +L$021fast_decrypt: + cmp esi,DWORD [36+esp] + je NEAR L$024fast_dec_in_place + mov DWORD [52+esp],edi +align 4 +align 16 +L$025fast_dec_loop: + mov eax,DWORD [esi] + mov ebx,DWORD [4+esi] + mov ecx,DWORD [8+esi] + mov edx,DWORD [12+esi] + mov edi,DWORD [44+esp] + call __x86_AES_decrypt + mov edi,DWORD [52+esp] + mov esi,DWORD [40+esp] + xor eax,DWORD [edi] + xor ebx,DWORD [4+edi] + xor ecx,DWORD [8+edi] + xor edx,DWORD [12+edi] + mov edi,DWORD [36+esp] + mov esi,DWORD [32+esp] + mov DWORD [edi],eax + mov DWORD [4+edi],ebx + mov DWORD [8+edi],ecx + mov DWORD [12+edi],edx + mov ecx,DWORD [40+esp] + mov DWORD [52+esp],esi + lea esi,[16+esi] + mov DWORD [32+esp],esi + lea edi,[16+edi] + mov DWORD [36+esp],edi + sub ecx,16 + mov DWORD [40+esp],ecx + jnz NEAR L$025fast_dec_loop + mov edi,DWORD [52+esp] + mov esi,DWORD [48+esp] + mov eax,DWORD [edi] + mov ebx,DWORD [4+edi] + mov ecx,DWORD [8+edi] + mov edx,DWORD [12+edi] + mov DWORD [esi],eax + mov DWORD [4+esi],ebx + mov DWORD [8+esi],ecx + mov DWORD [12+esi],edx + jmp NEAR L$026fast_dec_out +align 16 +L$024fast_dec_in_place: +L$027fast_dec_in_place_loop: + mov eax,DWORD [esi] + mov ebx,DWORD [4+esi] + mov ecx,DWORD [8+esi] + mov edx,DWORD [12+esi] + lea edi,[60+esp] + mov DWORD [edi],eax + mov DWORD [4+edi],ebx + mov DWORD [8+edi],ecx + mov DWORD [12+edi],edx + mov edi,DWORD [44+esp] + call __x86_AES_decrypt + mov edi,DWORD [48+esp] + mov esi,DWORD [36+esp] + xor eax,DWORD [edi] + xor ebx,DWORD [4+edi] + xor ecx,DWORD [8+edi] + xor edx,DWORD [12+edi] + mov DWORD [esi],eax + mov DWORD [4+esi],ebx + mov DWORD [8+esi],ecx + mov DWORD [12+esi],edx + lea esi,[16+esi] + mov DWORD [36+esp],esi + lea esi,[60+esp] + mov eax,DWORD [esi] + mov ebx,DWORD [4+esi] + mov ecx,DWORD [8+esi] + mov edx,DWORD [12+esi] + mov DWORD [edi],eax + mov DWORD [4+edi],ebx + mov DWORD [8+edi],ecx + mov DWORD [12+edi],edx + mov esi,DWORD [32+esp] + mov ecx,DWORD [40+esp] + lea esi,[16+esi] + mov DWORD [32+esp],esi + sub ecx,16 + mov DWORD [40+esp],ecx + jnz NEAR L$027fast_dec_in_place_loop +align 4 +L$026fast_dec_out: + cmp DWORD [316+esp],0 + mov edi,DWORD [44+esp] + je NEAR L$028skip_dzero + mov ecx,60 + xor eax,eax +align 4 +dd 2884892297 +L$028skip_dzero: + mov esp,DWORD [28+esp] + popfd + pop edi + pop esi + pop ebx + pop ebp + ret + pushfd +align 16 +L$015slow_way: + mov eax,DWORD [eax] + mov edi,DWORD [36+esp] + lea esi,[esp-80] + and esi,-64 + lea ebx,[edi-143] + sub ebx,esi + neg ebx + and ebx,960 + sub esi,ebx + lea ebx,[768+esi] + sub ebx,ebp + and ebx,768 + lea ebp,[2176+ebx*1+ebp] + lea edx,[24+esp] + xchg esp,esi + add esp,4 + mov DWORD [24+esp],ebp + mov DWORD [28+esp],esi + mov DWORD [52+esp],eax + mov eax,DWORD [edx] + mov ebx,DWORD [4+edx] + mov esi,DWORD [16+edx] + mov edx,DWORD [20+edx] + mov DWORD [32+esp],eax + mov DWORD [36+esp],ebx + mov DWORD [40+esp],ecx + mov DWORD [44+esp],edi + mov DWORD [48+esp],esi + mov edi,esi + mov esi,eax + cmp edx,0 + je NEAR L$029slow_decrypt + cmp ecx,16 + mov edx,ebx + jb NEAR L$030slow_enc_tail + bt DWORD [52+esp],25 + jnc NEAR L$031slow_enc_x86 + movq mm0,[edi] + movq mm4,[8+edi] +align 16 +L$032slow_enc_loop_sse: + pxor mm0,[esi] + pxor mm4,[8+esi] + mov edi,DWORD [44+esp] + call __sse_AES_encrypt_compact + mov esi,DWORD [32+esp] + mov edi,DWORD [36+esp] + mov ecx,DWORD [40+esp] + movq [edi],mm0 + movq [8+edi],mm4 + lea esi,[16+esi] + mov DWORD [32+esp],esi + lea edx,[16+edi] + mov DWORD [36+esp],edx + sub ecx,16 + cmp ecx,16 + mov DWORD [40+esp],ecx + jae NEAR L$032slow_enc_loop_sse + test ecx,15 + jnz NEAR L$030slow_enc_tail + mov esi,DWORD [48+esp] + movq [esi],mm0 + movq [8+esi],mm4 + emms + mov esp,DWORD [28+esp] + popfd + pop edi + pop esi + pop ebx + pop ebp + ret + pushfd +align 16 +L$031slow_enc_x86: + mov eax,DWORD [edi] + mov ebx,DWORD [4+edi] +align 4 +L$033slow_enc_loop_x86: + mov ecx,DWORD [8+edi] + mov edx,DWORD [12+edi] + xor eax,DWORD [esi] + xor ebx,DWORD [4+esi] + xor ecx,DWORD [8+esi] + xor edx,DWORD [12+esi] + mov edi,DWORD [44+esp] + call __x86_AES_encrypt_compact + mov esi,DWORD [32+esp] + mov edi,DWORD [36+esp] + mov DWORD [edi],eax + mov DWORD [4+edi],ebx + mov DWORD [8+edi],ecx + mov DWORD [12+edi],edx + mov ecx,DWORD [40+esp] + lea esi,[16+esi] + mov DWORD [32+esp],esi + lea edx,[16+edi] + mov DWORD [36+esp],edx + sub ecx,16 + cmp ecx,16 + mov DWORD [40+esp],ecx + jae NEAR L$033slow_enc_loop_x86 + test ecx,15 + jnz NEAR L$030slow_enc_tail + mov esi,DWORD [48+esp] + mov ecx,DWORD [8+edi] + mov edx,DWORD [12+edi] + mov DWORD [esi],eax + mov DWORD [4+esi],ebx + mov DWORD [8+esi],ecx + mov DWORD [12+esi],edx + mov esp,DWORD [28+esp] + popfd + pop edi + pop esi + pop ebx + pop ebp + ret + pushfd +align 16 +L$030slow_enc_tail: + emms + mov edi,edx + mov ebx,16 + sub ebx,ecx + cmp edi,esi + je NEAR L$034enc_in_place +align 4 +dd 2767451785 + jmp NEAR L$035enc_skip_in_place +L$034enc_in_place: + lea edi,[ecx*1+edi] +L$035enc_skip_in_place: + mov ecx,ebx + xor eax,eax +align 4 +dd 2868115081 + mov edi,DWORD [48+esp] + mov esi,edx + mov eax,DWORD [edi] + mov ebx,DWORD [4+edi] + mov DWORD [40+esp],16 + jmp NEAR L$033slow_enc_loop_x86 +align 16 +L$029slow_decrypt: + bt DWORD [52+esp],25 + jnc NEAR L$036slow_dec_loop_x86 +align 4 +L$037slow_dec_loop_sse: + movq mm0,[esi] + movq mm4,[8+esi] + mov edi,DWORD [44+esp] + call __sse_AES_decrypt_compact + mov esi,DWORD [32+esp] + lea eax,[60+esp] + mov ebx,DWORD [36+esp] + mov ecx,DWORD [40+esp] + mov edi,DWORD [48+esp] + movq mm1,[esi] + movq mm5,[8+esi] + pxor mm0,[edi] + pxor mm4,[8+edi] + movq [edi],mm1 + movq [8+edi],mm5 + sub ecx,16 + jc NEAR L$038slow_dec_partial_sse + movq [ebx],mm0 + movq [8+ebx],mm4 + lea ebx,[16+ebx] + mov DWORD [36+esp],ebx + lea esi,[16+esi] + mov DWORD [32+esp],esi + mov DWORD [40+esp],ecx + jnz NEAR L$037slow_dec_loop_sse + emms + mov esp,DWORD [28+esp] + popfd + pop edi + pop esi + pop ebx + pop ebp + ret + pushfd +align 16 +L$038slow_dec_partial_sse: + movq [eax],mm0 + movq [8+eax],mm4 + emms + add ecx,16 + mov edi,ebx + mov esi,eax +align 4 +dd 2767451785 + mov esp,DWORD [28+esp] + popfd + pop edi + pop esi + pop ebx + pop ebp + ret + pushfd +align 16 +L$036slow_dec_loop_x86: + mov eax,DWORD [esi] + mov ebx,DWORD [4+esi] + mov ecx,DWORD [8+esi] + mov edx,DWORD [12+esi] + lea edi,[60+esp] + mov DWORD [edi],eax + mov DWORD [4+edi],ebx + mov DWORD [8+edi],ecx + mov DWORD [12+edi],edx + mov edi,DWORD [44+esp] + call __x86_AES_decrypt_compact + mov edi,DWORD [48+esp] + mov esi,DWORD [40+esp] + xor eax,DWORD [edi] + xor ebx,DWORD [4+edi] + xor ecx,DWORD [8+edi] + xor edx,DWORD [12+edi] + sub esi,16 + jc NEAR L$039slow_dec_partial_x86 + mov DWORD [40+esp],esi + mov esi,DWORD [36+esp] + mov DWORD [esi],eax + mov DWORD [4+esi],ebx + mov DWORD [8+esi],ecx + mov DWORD [12+esi],edx + lea esi,[16+esi] + mov DWORD [36+esp],esi + lea esi,[60+esp] + mov eax,DWORD [esi] + mov ebx,DWORD [4+esi] + mov ecx,DWORD [8+esi] + mov edx,DWORD [12+esi] + mov DWORD [edi],eax + mov DWORD [4+edi],ebx + mov DWORD [8+edi],ecx + mov DWORD [12+edi],edx + mov esi,DWORD [32+esp] + lea esi,[16+esi] + mov DWORD [32+esp],esi + jnz NEAR L$036slow_dec_loop_x86 + mov esp,DWORD [28+esp] + popfd + pop edi + pop esi + pop ebx + pop ebp + ret + pushfd +align 16 +L$039slow_dec_partial_x86: + lea esi,[60+esp] + mov DWORD [esi],eax + mov DWORD [4+esi],ebx + mov DWORD [8+esi],ecx + mov DWORD [12+esi],edx + mov esi,DWORD [32+esp] + mov eax,DWORD [esi] + mov ebx,DWORD [4+esi] + mov ecx,DWORD [8+esi] + mov edx,DWORD [12+esi] + mov DWORD [edi],eax + mov DWORD [4+edi],ebx + mov DWORD [8+edi],ecx + mov DWORD [12+edi],edx + mov ecx,DWORD [40+esp] + mov edi,DWORD [36+esp] + lea esi,[60+esp] +align 4 +dd 2767451785 + mov esp,DWORD [28+esp] + popfd + pop edi + pop esi + pop ebx + pop ebp + ret +align 16 +__x86_AES_set_encrypt_key: + push ebp + push ebx + push esi + push edi + mov esi,DWORD [24+esp] + mov edi,DWORD [32+esp] + test esi,-1 + jz NEAR L$040badpointer + test edi,-1 + jz NEAR L$040badpointer + call L$041pic_point +L$041pic_point: + pop ebp + lea ebp,[(L$AES_Te-L$041pic_point)+ebp] + lea ebp,[2176+ebp] + mov eax,DWORD [ebp-128] + mov ebx,DWORD [ebp-96] + mov ecx,DWORD [ebp-64] + mov edx,DWORD [ebp-32] + mov eax,DWORD [ebp] + mov ebx,DWORD [32+ebp] + mov ecx,DWORD [64+ebp] + mov edx,DWORD [96+ebp] + mov ecx,DWORD [28+esp] + cmp ecx,128 + je NEAR L$04210rounds + cmp ecx,192 + je NEAR L$04312rounds + cmp ecx,256 + je NEAR L$04414rounds + mov eax,-2 + jmp NEAR L$045exit +L$04210rounds: + mov eax,DWORD [esi] + mov ebx,DWORD [4+esi] + mov ecx,DWORD [8+esi] + mov edx,DWORD [12+esi] + mov DWORD [edi],eax + mov DWORD [4+edi],ebx + mov DWORD [8+edi],ecx + mov DWORD [12+edi],edx + xor ecx,ecx + jmp NEAR L$04610shortcut +align 4 +L$04710loop: + mov eax,DWORD [edi] + mov edx,DWORD [12+edi] +L$04610shortcut: + movzx esi,dl + movzx ebx,BYTE [esi*1+ebp-128] + movzx esi,dh + shl ebx,24 + xor eax,ebx + movzx ebx,BYTE [esi*1+ebp-128] + shr edx,16 + movzx esi,dl + xor eax,ebx + movzx ebx,BYTE [esi*1+ebp-128] + movzx esi,dh + shl ebx,8 + xor eax,ebx + movzx ebx,BYTE [esi*1+ebp-128] + shl ebx,16 + xor eax,ebx + xor eax,DWORD [896+ecx*4+ebp] + mov DWORD [16+edi],eax + xor eax,DWORD [4+edi] + mov DWORD [20+edi],eax + xor eax,DWORD [8+edi] + mov DWORD [24+edi],eax + xor eax,DWORD [12+edi] + mov DWORD [28+edi],eax + inc ecx + add edi,16 + cmp ecx,10 + jl NEAR L$04710loop + mov DWORD [80+edi],10 + xor eax,eax + jmp NEAR L$045exit +L$04312rounds: + mov eax,DWORD [esi] + mov ebx,DWORD [4+esi] + mov ecx,DWORD [8+esi] + mov edx,DWORD [12+esi] + mov DWORD [edi],eax + mov DWORD [4+edi],ebx + mov DWORD [8+edi],ecx + mov DWORD [12+edi],edx + mov ecx,DWORD [16+esi] + mov edx,DWORD [20+esi] + mov DWORD [16+edi],ecx + mov DWORD [20+edi],edx + xor ecx,ecx + jmp NEAR L$04812shortcut +align 4 +L$04912loop: + mov eax,DWORD [edi] + mov edx,DWORD [20+edi] +L$04812shortcut: + movzx esi,dl + movzx ebx,BYTE [esi*1+ebp-128] + movzx esi,dh + shl ebx,24 + xor eax,ebx + movzx ebx,BYTE [esi*1+ebp-128] + shr edx,16 + movzx esi,dl + xor eax,ebx + movzx ebx,BYTE [esi*1+ebp-128] + movzx esi,dh + shl ebx,8 + xor eax,ebx + movzx ebx,BYTE [esi*1+ebp-128] + shl ebx,16 + xor eax,ebx + xor eax,DWORD [896+ecx*4+ebp] + mov DWORD [24+edi],eax + xor eax,DWORD [4+edi] + mov DWORD [28+edi],eax + xor eax,DWORD [8+edi] + mov DWORD [32+edi],eax + xor eax,DWORD [12+edi] + mov DWORD [36+edi],eax + cmp ecx,7 + je NEAR L$05012break + inc ecx + xor eax,DWORD [16+edi] + mov DWORD [40+edi],eax + xor eax,DWORD [20+edi] + mov DWORD [44+edi],eax + add edi,24 + jmp NEAR L$04912loop +L$05012break: + mov DWORD [72+edi],12 + xor eax,eax + jmp NEAR L$045exit +L$04414rounds: + mov eax,DWORD [esi] + mov ebx,DWORD [4+esi] + mov ecx,DWORD [8+esi] + mov edx,DWORD [12+esi] + mov DWORD [edi],eax + mov DWORD [4+edi],ebx + mov DWORD [8+edi],ecx + mov DWORD [12+edi],edx + mov eax,DWORD [16+esi] + mov ebx,DWORD [20+esi] + mov ecx,DWORD [24+esi] + mov edx,DWORD [28+esi] + mov DWORD [16+edi],eax + mov DWORD [20+edi],ebx + mov DWORD [24+edi],ecx + mov DWORD [28+edi],edx + xor ecx,ecx + jmp NEAR L$05114shortcut +align 4 +L$05214loop: + mov edx,DWORD [28+edi] +L$05114shortcut: + mov eax,DWORD [edi] + movzx esi,dl + movzx ebx,BYTE [esi*1+ebp-128] + movzx esi,dh + shl ebx,24 + xor eax,ebx + movzx ebx,BYTE [esi*1+ebp-128] + shr edx,16 + movzx esi,dl + xor eax,ebx + movzx ebx,BYTE [esi*1+ebp-128] + movzx esi,dh + shl ebx,8 + xor eax,ebx + movzx ebx,BYTE [esi*1+ebp-128] + shl ebx,16 + xor eax,ebx + xor eax,DWORD [896+ecx*4+ebp] + mov DWORD [32+edi],eax + xor eax,DWORD [4+edi] + mov DWORD [36+edi],eax + xor eax,DWORD [8+edi] + mov DWORD [40+edi],eax + xor eax,DWORD [12+edi] + mov DWORD [44+edi],eax + cmp ecx,6 + je NEAR L$05314break + inc ecx + mov edx,eax + mov eax,DWORD [16+edi] + movzx esi,dl + movzx ebx,BYTE [esi*1+ebp-128] + movzx esi,dh + xor eax,ebx + movzx ebx,BYTE [esi*1+ebp-128] + shr edx,16 + shl ebx,8 + movzx esi,dl + xor eax,ebx + movzx ebx,BYTE [esi*1+ebp-128] + movzx esi,dh + shl ebx,16 + xor eax,ebx + movzx ebx,BYTE [esi*1+ebp-128] + shl ebx,24 + xor eax,ebx + mov DWORD [48+edi],eax + xor eax,DWORD [20+edi] + mov DWORD [52+edi],eax + xor eax,DWORD [24+edi] + mov DWORD [56+edi],eax + xor eax,DWORD [28+edi] + mov DWORD [60+edi],eax + add edi,32 + jmp NEAR L$05214loop +L$05314break: + mov DWORD [48+edi],14 + xor eax,eax + jmp NEAR L$045exit +L$040badpointer: + mov eax,-1 +L$045exit: + pop edi + pop esi + pop ebx + pop ebp + ret +global _asm_AES_set_encrypt_key +align 16 +_asm_AES_set_encrypt_key: +L$_asm_AES_set_encrypt_key_begin: + call __x86_AES_set_encrypt_key + ret +global _asm_AES_set_decrypt_key +align 16 +_asm_AES_set_decrypt_key: +L$_asm_AES_set_decrypt_key_begin: + call __x86_AES_set_encrypt_key + cmp eax,0 + je NEAR L$054proceed + ret +L$054proceed: + push ebp + push ebx + push esi + push edi + mov esi,DWORD [28+esp] + mov ecx,DWORD [240+esi] + lea ecx,[ecx*4] + lea edi,[ecx*4+esi] +align 4 +L$055invert: + mov eax,DWORD [esi] + mov ebx,DWORD [4+esi] + mov ecx,DWORD [edi] + mov edx,DWORD [4+edi] + mov DWORD [edi],eax + mov DWORD [4+edi],ebx + mov DWORD [esi],ecx + mov DWORD [4+esi],edx + mov eax,DWORD [8+esi] + mov ebx,DWORD [12+esi] + mov ecx,DWORD [8+edi] + mov edx,DWORD [12+edi] + mov DWORD [8+edi],eax + mov DWORD [12+edi],ebx + mov DWORD [8+esi],ecx + mov DWORD [12+esi],edx + add esi,16 + sub edi,16 + cmp esi,edi + jne NEAR L$055invert + mov edi,DWORD [28+esp] + mov esi,DWORD [240+edi] + lea esi,[esi*1+esi-2] + lea esi,[esi*8+edi] + mov DWORD [28+esp],esi + mov eax,DWORD [16+edi] +align 4 +L$056permute: + add edi,16 + mov ebp,2155905152 + and ebp,eax + lea ebx,[eax*1+eax] + mov esi,ebp + shr ebp,7 + sub esi,ebp + and ebx,4278124286 + and esi,454761243 + xor ebx,esi + mov ebp,2155905152 + and ebp,ebx + lea ecx,[ebx*1+ebx] + mov esi,ebp + shr ebp,7 + sub esi,ebp + and ecx,4278124286 + and esi,454761243 + xor ebx,eax + xor ecx,esi + mov ebp,2155905152 + and ebp,ecx + lea edx,[ecx*1+ecx] + mov esi,ebp + shr ebp,7 + xor ecx,eax + sub esi,ebp + and edx,4278124286 + and esi,454761243 + rol eax,8 + xor edx,esi + mov ebp,DWORD [4+edi] + xor eax,ebx + xor ebx,edx + xor eax,ecx + rol ebx,24 + xor ecx,edx + xor eax,edx + rol ecx,16 + xor eax,ebx + rol edx,8 + xor eax,ecx + mov ebx,ebp + xor eax,edx + mov DWORD [edi],eax + mov ebp,2155905152 + and ebp,ebx + lea ecx,[ebx*1+ebx] + mov esi,ebp + shr ebp,7 + sub esi,ebp + and ecx,4278124286 + and esi,454761243 + xor ecx,esi + mov ebp,2155905152 + and ebp,ecx + lea edx,[ecx*1+ecx] + mov esi,ebp + shr ebp,7 + sub esi,ebp + and edx,4278124286 + and esi,454761243 + xor ecx,ebx + xor edx,esi + mov ebp,2155905152 + and ebp,edx + lea eax,[edx*1+edx] + mov esi,ebp + shr ebp,7 + xor edx,ebx + sub esi,ebp + and eax,4278124286 + and esi,454761243 + rol ebx,8 + xor eax,esi + mov ebp,DWORD [8+edi] + xor ebx,ecx + xor ecx,eax + xor ebx,edx + rol ecx,24 + xor edx,eax + xor ebx,eax + rol edx,16 + xor ebx,ecx + rol eax,8 + xor ebx,edx + mov ecx,ebp + xor ebx,eax + mov DWORD [4+edi],ebx + mov ebp,2155905152 + and ebp,ecx + lea edx,[ecx*1+ecx] + mov esi,ebp + shr ebp,7 + sub esi,ebp + and edx,4278124286 + and esi,454761243 + xor edx,esi + mov ebp,2155905152 + and ebp,edx + lea eax,[edx*1+edx] + mov esi,ebp + shr ebp,7 + sub esi,ebp + and eax,4278124286 + and esi,454761243 + xor edx,ecx + xor eax,esi + mov ebp,2155905152 + and ebp,eax + lea ebx,[eax*1+eax] + mov esi,ebp + shr ebp,7 + xor eax,ecx + sub esi,ebp + and ebx,4278124286 + and esi,454761243 + rol ecx,8 + xor ebx,esi + mov ebp,DWORD [12+edi] + xor ecx,edx + xor edx,ebx + xor ecx,eax + rol edx,24 + xor eax,ebx + xor ecx,ebx + rol eax,16 + xor ecx,edx + rol ebx,8 + xor ecx,eax + mov edx,ebp + xor ecx,ebx + mov DWORD [8+edi],ecx + mov ebp,2155905152 + and ebp,edx + lea eax,[edx*1+edx] + mov esi,ebp + shr ebp,7 + sub esi,ebp + and eax,4278124286 + and esi,454761243 + xor eax,esi + mov ebp,2155905152 + and ebp,eax + lea ebx,[eax*1+eax] + mov esi,ebp + shr ebp,7 + sub esi,ebp + and ebx,4278124286 + and esi,454761243 + xor eax,edx + xor ebx,esi + mov ebp,2155905152 + and ebp,ebx + lea ecx,[ebx*1+ebx] + mov esi,ebp + shr ebp,7 + xor ebx,edx + sub esi,ebp + and ecx,4278124286 + and esi,454761243 + rol edx,8 + xor ecx,esi + mov ebp,DWORD [16+edi] + xor edx,eax + xor eax,ecx + xor edx,ebx + rol eax,24 + xor ebx,ecx + xor edx,ecx + rol ebx,16 + xor edx,eax + rol ecx,8 + xor edx,ebx + mov eax,ebp + xor edx,ecx + mov DWORD [12+edi],edx + cmp edi,DWORD [28+esp] + jb NEAR L$056permute + xor eax,eax + pop edi + pop esi + pop ebx + pop ebp + ret +db 65,69,83,32,102,111,114,32,120,56,54,44,32,67,82,89 +db 80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114 +db 111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +segment .bss +common _OPENSSL_ia32cap_P 16 diff --git a/third_party/boringssl/win-x86/crypto/aes/aesni-x86.asm b/third_party/boringssl/win-x86/crypto/aes/aesni-x86.asm new file mode 100644 index 00000000000..a9a595653f0 --- /dev/null +++ b/third_party/boringssl/win-x86/crypto/aes/aesni-x86.asm @@ -0,0 +1,2424 @@ +%ifidn __OUTPUT_FORMAT__,obj +section code use32 class=code align=64 +%elifidn __OUTPUT_FORMAT__,win32 +%ifdef __YASM_VERSION_ID__ +%if __YASM_VERSION_ID__ < 01010000h +%error yasm version 1.1.0 or later needed. +%endif +; Yasm automatically includes .00 and complains about redefining it. +; https://www.tortall.net/projects/yasm/manual/html/objfmt-win32-safeseh.html +%else +$@feat.00 equ 1 +%endif +section .text code align=64 +%else +section .text code +%endif +;extern _OPENSSL_ia32cap_P +global _aesni_encrypt +align 16 +_aesni_encrypt: +L$_aesni_encrypt_begin: + mov eax,DWORD [4+esp] + mov edx,DWORD [12+esp] + movups xmm2,[eax] + mov ecx,DWORD [240+edx] + mov eax,DWORD [8+esp] + movups xmm0,[edx] + movups xmm1,[16+edx] + lea edx,[32+edx] + xorps xmm2,xmm0 +L$000enc1_loop_1: +db 102,15,56,220,209 + dec ecx + movups xmm1,[edx] + lea edx,[16+edx] + jnz NEAR L$000enc1_loop_1 +db 102,15,56,221,209 + pxor xmm0,xmm0 + pxor xmm1,xmm1 + movups [eax],xmm2 + pxor xmm2,xmm2 + ret +global _aesni_decrypt +align 16 +_aesni_decrypt: +L$_aesni_decrypt_begin: + mov eax,DWORD [4+esp] + mov edx,DWORD [12+esp] + movups xmm2,[eax] + mov ecx,DWORD [240+edx] + mov eax,DWORD [8+esp] + movups xmm0,[edx] + movups xmm1,[16+edx] + lea edx,[32+edx] + xorps xmm2,xmm0 +L$001dec1_loop_2: +db 102,15,56,222,209 + dec ecx + movups xmm1,[edx] + lea edx,[16+edx] + jnz NEAR L$001dec1_loop_2 +db 102,15,56,223,209 + pxor xmm0,xmm0 + pxor xmm1,xmm1 + movups [eax],xmm2 + pxor xmm2,xmm2 + ret +align 16 +__aesni_encrypt2: + movups xmm0,[edx] + shl ecx,4 + movups xmm1,[16+edx] + xorps xmm2,xmm0 + pxor xmm3,xmm0 + movups xmm0,[32+edx] + lea edx,[32+ecx*1+edx] + neg ecx + add ecx,16 +L$002enc2_loop: +db 102,15,56,220,209 +db 102,15,56,220,217 + movups xmm1,[ecx*1+edx] + add ecx,32 +db 102,15,56,220,208 +db 102,15,56,220,216 + movups xmm0,[ecx*1+edx-16] + jnz NEAR L$002enc2_loop +db 102,15,56,220,209 +db 102,15,56,220,217 +db 102,15,56,221,208 +db 102,15,56,221,216 + ret +align 16 +__aesni_decrypt2: + movups xmm0,[edx] + shl ecx,4 + movups xmm1,[16+edx] + xorps xmm2,xmm0 + pxor xmm3,xmm0 + movups xmm0,[32+edx] + lea edx,[32+ecx*1+edx] + neg ecx + add ecx,16 +L$003dec2_loop: +db 102,15,56,222,209 +db 102,15,56,222,217 + movups xmm1,[ecx*1+edx] + add ecx,32 +db 102,15,56,222,208 +db 102,15,56,222,216 + movups xmm0,[ecx*1+edx-16] + jnz NEAR L$003dec2_loop +db 102,15,56,222,209 +db 102,15,56,222,217 +db 102,15,56,223,208 +db 102,15,56,223,216 + ret +align 16 +__aesni_encrypt3: + movups xmm0,[edx] + shl ecx,4 + movups xmm1,[16+edx] + xorps xmm2,xmm0 + pxor xmm3,xmm0 + pxor xmm4,xmm0 + movups xmm0,[32+edx] + lea edx,[32+ecx*1+edx] + neg ecx + add ecx,16 +L$004enc3_loop: +db 102,15,56,220,209 +db 102,15,56,220,217 +db 102,15,56,220,225 + movups xmm1,[ecx*1+edx] + add ecx,32 +db 102,15,56,220,208 +db 102,15,56,220,216 +db 102,15,56,220,224 + movups xmm0,[ecx*1+edx-16] + jnz NEAR L$004enc3_loop +db 102,15,56,220,209 +db 102,15,56,220,217 +db 102,15,56,220,225 +db 102,15,56,221,208 +db 102,15,56,221,216 +db 102,15,56,221,224 + ret +align 16 +__aesni_decrypt3: + movups xmm0,[edx] + shl ecx,4 + movups xmm1,[16+edx] + xorps xmm2,xmm0 + pxor xmm3,xmm0 + pxor xmm4,xmm0 + movups xmm0,[32+edx] + lea edx,[32+ecx*1+edx] + neg ecx + add ecx,16 +L$005dec3_loop: +db 102,15,56,222,209 +db 102,15,56,222,217 +db 102,15,56,222,225 + movups xmm1,[ecx*1+edx] + add ecx,32 +db 102,15,56,222,208 +db 102,15,56,222,216 +db 102,15,56,222,224 + movups xmm0,[ecx*1+edx-16] + jnz NEAR L$005dec3_loop +db 102,15,56,222,209 +db 102,15,56,222,217 +db 102,15,56,222,225 +db 102,15,56,223,208 +db 102,15,56,223,216 +db 102,15,56,223,224 + ret +align 16 +__aesni_encrypt4: + movups xmm0,[edx] + movups xmm1,[16+edx] + shl ecx,4 + xorps xmm2,xmm0 + pxor xmm3,xmm0 + pxor xmm4,xmm0 + pxor xmm5,xmm0 + movups xmm0,[32+edx] + lea edx,[32+ecx*1+edx] + neg ecx +db 15,31,64,0 + add ecx,16 +L$006enc4_loop: +db 102,15,56,220,209 +db 102,15,56,220,217 +db 102,15,56,220,225 +db 102,15,56,220,233 + movups xmm1,[ecx*1+edx] + add ecx,32 +db 102,15,56,220,208 +db 102,15,56,220,216 +db 102,15,56,220,224 +db 102,15,56,220,232 + movups xmm0,[ecx*1+edx-16] + jnz NEAR L$006enc4_loop +db 102,15,56,220,209 +db 102,15,56,220,217 +db 102,15,56,220,225 +db 102,15,56,220,233 +db 102,15,56,221,208 +db 102,15,56,221,216 +db 102,15,56,221,224 +db 102,15,56,221,232 + ret +align 16 +__aesni_decrypt4: + movups xmm0,[edx] + movups xmm1,[16+edx] + shl ecx,4 + xorps xmm2,xmm0 + pxor xmm3,xmm0 + pxor xmm4,xmm0 + pxor xmm5,xmm0 + movups xmm0,[32+edx] + lea edx,[32+ecx*1+edx] + neg ecx +db 15,31,64,0 + add ecx,16 +L$007dec4_loop: +db 102,15,56,222,209 +db 102,15,56,222,217 +db 102,15,56,222,225 +db 102,15,56,222,233 + movups xmm1,[ecx*1+edx] + add ecx,32 +db 102,15,56,222,208 +db 102,15,56,222,216 +db 102,15,56,222,224 +db 102,15,56,222,232 + movups xmm0,[ecx*1+edx-16] + jnz NEAR L$007dec4_loop +db 102,15,56,222,209 +db 102,15,56,222,217 +db 102,15,56,222,225 +db 102,15,56,222,233 +db 102,15,56,223,208 +db 102,15,56,223,216 +db 102,15,56,223,224 +db 102,15,56,223,232 + ret +align 16 +__aesni_encrypt6: + movups xmm0,[edx] + shl ecx,4 + movups xmm1,[16+edx] + xorps xmm2,xmm0 + pxor xmm3,xmm0 + pxor xmm4,xmm0 +db 102,15,56,220,209 + pxor xmm5,xmm0 + pxor xmm6,xmm0 +db 102,15,56,220,217 + lea edx,[32+ecx*1+edx] + neg ecx +db 102,15,56,220,225 + pxor xmm7,xmm0 + movups xmm0,[ecx*1+edx] + add ecx,16 + jmp NEAR L$008_aesni_encrypt6_inner +align 16 +L$009enc6_loop: +db 102,15,56,220,209 +db 102,15,56,220,217 +db 102,15,56,220,225 +L$008_aesni_encrypt6_inner: +db 102,15,56,220,233 +db 102,15,56,220,241 +db 102,15,56,220,249 +L$_aesni_encrypt6_enter: + movups xmm1,[ecx*1+edx] + add ecx,32 +db 102,15,56,220,208 +db 102,15,56,220,216 +db 102,15,56,220,224 +db 102,15,56,220,232 +db 102,15,56,220,240 +db 102,15,56,220,248 + movups xmm0,[ecx*1+edx-16] + jnz NEAR L$009enc6_loop +db 102,15,56,220,209 +db 102,15,56,220,217 +db 102,15,56,220,225 +db 102,15,56,220,233 +db 102,15,56,220,241 +db 102,15,56,220,249 +db 102,15,56,221,208 +db 102,15,56,221,216 +db 102,15,56,221,224 +db 102,15,56,221,232 +db 102,15,56,221,240 +db 102,15,56,221,248 + ret +align 16 +__aesni_decrypt6: + movups xmm0,[edx] + shl ecx,4 + movups xmm1,[16+edx] + xorps xmm2,xmm0 + pxor xmm3,xmm0 + pxor xmm4,xmm0 +db 102,15,56,222,209 + pxor xmm5,xmm0 + pxor xmm6,xmm0 +db 102,15,56,222,217 + lea edx,[32+ecx*1+edx] + neg ecx +db 102,15,56,222,225 + pxor xmm7,xmm0 + movups xmm0,[ecx*1+edx] + add ecx,16 + jmp NEAR L$010_aesni_decrypt6_inner +align 16 +L$011dec6_loop: +db 102,15,56,222,209 +db 102,15,56,222,217 +db 102,15,56,222,225 +L$010_aesni_decrypt6_inner: +db 102,15,56,222,233 +db 102,15,56,222,241 +db 102,15,56,222,249 +L$_aesni_decrypt6_enter: + movups xmm1,[ecx*1+edx] + add ecx,32 +db 102,15,56,222,208 +db 102,15,56,222,216 +db 102,15,56,222,224 +db 102,15,56,222,232 +db 102,15,56,222,240 +db 102,15,56,222,248 + movups xmm0,[ecx*1+edx-16] + jnz NEAR L$011dec6_loop +db 102,15,56,222,209 +db 102,15,56,222,217 +db 102,15,56,222,225 +db 102,15,56,222,233 +db 102,15,56,222,241 +db 102,15,56,222,249 +db 102,15,56,223,208 +db 102,15,56,223,216 +db 102,15,56,223,224 +db 102,15,56,223,232 +db 102,15,56,223,240 +db 102,15,56,223,248 + ret +global _aesni_ecb_encrypt +align 16 +_aesni_ecb_encrypt: +L$_aesni_ecb_encrypt_begin: + push ebp + push ebx + push esi + push edi + mov esi,DWORD [20+esp] + mov edi,DWORD [24+esp] + mov eax,DWORD [28+esp] + mov edx,DWORD [32+esp] + mov ebx,DWORD [36+esp] + and eax,-16 + jz NEAR L$012ecb_ret + mov ecx,DWORD [240+edx] + test ebx,ebx + jz NEAR L$013ecb_decrypt + mov ebp,edx + mov ebx,ecx + cmp eax,96 + jb NEAR L$014ecb_enc_tail + movdqu xmm2,[esi] + movdqu xmm3,[16+esi] + movdqu xmm4,[32+esi] + movdqu xmm5,[48+esi] + movdqu xmm6,[64+esi] + movdqu xmm7,[80+esi] + lea esi,[96+esi] + sub eax,96 + jmp NEAR L$015ecb_enc_loop6_enter +align 16 +L$016ecb_enc_loop6: + movups [edi],xmm2 + movdqu xmm2,[esi] + movups [16+edi],xmm3 + movdqu xmm3,[16+esi] + movups [32+edi],xmm4 + movdqu xmm4,[32+esi] + movups [48+edi],xmm5 + movdqu xmm5,[48+esi] + movups [64+edi],xmm6 + movdqu xmm6,[64+esi] + movups [80+edi],xmm7 + lea edi,[96+edi] + movdqu xmm7,[80+esi] + lea esi,[96+esi] +L$015ecb_enc_loop6_enter: + call __aesni_encrypt6 + mov edx,ebp + mov ecx,ebx + sub eax,96 + jnc NEAR L$016ecb_enc_loop6 + movups [edi],xmm2 + movups [16+edi],xmm3 + movups [32+edi],xmm4 + movups [48+edi],xmm5 + movups [64+edi],xmm6 + movups [80+edi],xmm7 + lea edi,[96+edi] + add eax,96 + jz NEAR L$012ecb_ret +L$014ecb_enc_tail: + movups xmm2,[esi] + cmp eax,32 + jb NEAR L$017ecb_enc_one + movups xmm3,[16+esi] + je NEAR L$018ecb_enc_two + movups xmm4,[32+esi] + cmp eax,64 + jb NEAR L$019ecb_enc_three + movups xmm5,[48+esi] + je NEAR L$020ecb_enc_four + movups xmm6,[64+esi] + xorps xmm7,xmm7 + call __aesni_encrypt6 + movups [edi],xmm2 + movups [16+edi],xmm3 + movups [32+edi],xmm4 + movups [48+edi],xmm5 + movups [64+edi],xmm6 + jmp NEAR L$012ecb_ret +align 16 +L$017ecb_enc_one: + movups xmm0,[edx] + movups xmm1,[16+edx] + lea edx,[32+edx] + xorps xmm2,xmm0 +L$021enc1_loop_3: +db 102,15,56,220,209 + dec ecx + movups xmm1,[edx] + lea edx,[16+edx] + jnz NEAR L$021enc1_loop_3 +db 102,15,56,221,209 + movups [edi],xmm2 + jmp NEAR L$012ecb_ret +align 16 +L$018ecb_enc_two: + call __aesni_encrypt2 + movups [edi],xmm2 + movups [16+edi],xmm3 + jmp NEAR L$012ecb_ret +align 16 +L$019ecb_enc_three: + call __aesni_encrypt3 + movups [edi],xmm2 + movups [16+edi],xmm3 + movups [32+edi],xmm4 + jmp NEAR L$012ecb_ret +align 16 +L$020ecb_enc_four: + call __aesni_encrypt4 + movups [edi],xmm2 + movups [16+edi],xmm3 + movups [32+edi],xmm4 + movups [48+edi],xmm5 + jmp NEAR L$012ecb_ret +align 16 +L$013ecb_decrypt: + mov ebp,edx + mov ebx,ecx + cmp eax,96 + jb NEAR L$022ecb_dec_tail + movdqu xmm2,[esi] + movdqu xmm3,[16+esi] + movdqu xmm4,[32+esi] + movdqu xmm5,[48+esi] + movdqu xmm6,[64+esi] + movdqu xmm7,[80+esi] + lea esi,[96+esi] + sub eax,96 + jmp NEAR L$023ecb_dec_loop6_enter +align 16 +L$024ecb_dec_loop6: + movups [edi],xmm2 + movdqu xmm2,[esi] + movups [16+edi],xmm3 + movdqu xmm3,[16+esi] + movups [32+edi],xmm4 + movdqu xmm4,[32+esi] + movups [48+edi],xmm5 + movdqu xmm5,[48+esi] + movups [64+edi],xmm6 + movdqu xmm6,[64+esi] + movups [80+edi],xmm7 + lea edi,[96+edi] + movdqu xmm7,[80+esi] + lea esi,[96+esi] +L$023ecb_dec_loop6_enter: + call __aesni_decrypt6 + mov edx,ebp + mov ecx,ebx + sub eax,96 + jnc NEAR L$024ecb_dec_loop6 + movups [edi],xmm2 + movups [16+edi],xmm3 + movups [32+edi],xmm4 + movups [48+edi],xmm5 + movups [64+edi],xmm6 + movups [80+edi],xmm7 + lea edi,[96+edi] + add eax,96 + jz NEAR L$012ecb_ret +L$022ecb_dec_tail: + movups xmm2,[esi] + cmp eax,32 + jb NEAR L$025ecb_dec_one + movups xmm3,[16+esi] + je NEAR L$026ecb_dec_two + movups xmm4,[32+esi] + cmp eax,64 + jb NEAR L$027ecb_dec_three + movups xmm5,[48+esi] + je NEAR L$028ecb_dec_four + movups xmm6,[64+esi] + xorps xmm7,xmm7 + call __aesni_decrypt6 + movups [edi],xmm2 + movups [16+edi],xmm3 + movups [32+edi],xmm4 + movups [48+edi],xmm5 + movups [64+edi],xmm6 + jmp NEAR L$012ecb_ret +align 16 +L$025ecb_dec_one: + movups xmm0,[edx] + movups xmm1,[16+edx] + lea edx,[32+edx] + xorps xmm2,xmm0 +L$029dec1_loop_4: +db 102,15,56,222,209 + dec ecx + movups xmm1,[edx] + lea edx,[16+edx] + jnz NEAR L$029dec1_loop_4 +db 102,15,56,223,209 + movups [edi],xmm2 + jmp NEAR L$012ecb_ret +align 16 +L$026ecb_dec_two: + call __aesni_decrypt2 + movups [edi],xmm2 + movups [16+edi],xmm3 + jmp NEAR L$012ecb_ret +align 16 +L$027ecb_dec_three: + call __aesni_decrypt3 + movups [edi],xmm2 + movups [16+edi],xmm3 + movups [32+edi],xmm4 + jmp NEAR L$012ecb_ret +align 16 +L$028ecb_dec_four: + call __aesni_decrypt4 + movups [edi],xmm2 + movups [16+edi],xmm3 + movups [32+edi],xmm4 + movups [48+edi],xmm5 +L$012ecb_ret: + pxor xmm0,xmm0 + pxor xmm1,xmm1 + pxor xmm2,xmm2 + pxor xmm3,xmm3 + pxor xmm4,xmm4 + pxor xmm5,xmm5 + pxor xmm6,xmm6 + pxor xmm7,xmm7 + pop edi + pop esi + pop ebx + pop ebp + ret +global _aesni_ccm64_encrypt_blocks +align 16 +_aesni_ccm64_encrypt_blocks: +L$_aesni_ccm64_encrypt_blocks_begin: + push ebp + push ebx + push esi + push edi + mov esi,DWORD [20+esp] + mov edi,DWORD [24+esp] + mov eax,DWORD [28+esp] + mov edx,DWORD [32+esp] + mov ebx,DWORD [36+esp] + mov ecx,DWORD [40+esp] + mov ebp,esp + sub esp,60 + and esp,-16 + mov DWORD [48+esp],ebp + movdqu xmm7,[ebx] + movdqu xmm3,[ecx] + mov ecx,DWORD [240+edx] + mov DWORD [esp],202182159 + mov DWORD [4+esp],134810123 + mov DWORD [8+esp],67438087 + mov DWORD [12+esp],66051 + mov ebx,1 + xor ebp,ebp + mov DWORD [16+esp],ebx + mov DWORD [20+esp],ebp + mov DWORD [24+esp],ebp + mov DWORD [28+esp],ebp + shl ecx,4 + mov ebx,16 + lea ebp,[edx] + movdqa xmm5,[esp] + movdqa xmm2,xmm7 + lea edx,[32+ecx*1+edx] + sub ebx,ecx +db 102,15,56,0,253 +L$030ccm64_enc_outer: + movups xmm0,[ebp] + mov ecx,ebx + movups xmm6,[esi] + xorps xmm2,xmm0 + movups xmm1,[16+ebp] + xorps xmm0,xmm6 + xorps xmm3,xmm0 + movups xmm0,[32+ebp] +L$031ccm64_enc2_loop: +db 102,15,56,220,209 +db 102,15,56,220,217 + movups xmm1,[ecx*1+edx] + add ecx,32 +db 102,15,56,220,208 +db 102,15,56,220,216 + movups xmm0,[ecx*1+edx-16] + jnz NEAR L$031ccm64_enc2_loop +db 102,15,56,220,209 +db 102,15,56,220,217 + paddq xmm7,[16+esp] + dec eax +db 102,15,56,221,208 +db 102,15,56,221,216 + lea esi,[16+esi] + xorps xmm6,xmm2 + movdqa xmm2,xmm7 + movups [edi],xmm6 +db 102,15,56,0,213 + lea edi,[16+edi] + jnz NEAR L$030ccm64_enc_outer + mov esp,DWORD [48+esp] + mov edi,DWORD [40+esp] + movups [edi],xmm3 + pxor xmm0,xmm0 + pxor xmm1,xmm1 + pxor xmm2,xmm2 + pxor xmm3,xmm3 + pxor xmm4,xmm4 + pxor xmm5,xmm5 + pxor xmm6,xmm6 + pxor xmm7,xmm7 + pop edi + pop esi + pop ebx + pop ebp + ret +global _aesni_ccm64_decrypt_blocks +align 16 +_aesni_ccm64_decrypt_blocks: +L$_aesni_ccm64_decrypt_blocks_begin: + push ebp + push ebx + push esi + push edi + mov esi,DWORD [20+esp] + mov edi,DWORD [24+esp] + mov eax,DWORD [28+esp] + mov edx,DWORD [32+esp] + mov ebx,DWORD [36+esp] + mov ecx,DWORD [40+esp] + mov ebp,esp + sub esp,60 + and esp,-16 + mov DWORD [48+esp],ebp + movdqu xmm7,[ebx] + movdqu xmm3,[ecx] + mov ecx,DWORD [240+edx] + mov DWORD [esp],202182159 + mov DWORD [4+esp],134810123 + mov DWORD [8+esp],67438087 + mov DWORD [12+esp],66051 + mov ebx,1 + xor ebp,ebp + mov DWORD [16+esp],ebx + mov DWORD [20+esp],ebp + mov DWORD [24+esp],ebp + mov DWORD [28+esp],ebp + movdqa xmm5,[esp] + movdqa xmm2,xmm7 + mov ebp,edx + mov ebx,ecx +db 102,15,56,0,253 + movups xmm0,[edx] + movups xmm1,[16+edx] + lea edx,[32+edx] + xorps xmm2,xmm0 +L$032enc1_loop_5: +db 102,15,56,220,209 + dec ecx + movups xmm1,[edx] + lea edx,[16+edx] + jnz NEAR L$032enc1_loop_5 +db 102,15,56,221,209 + shl ebx,4 + mov ecx,16 + movups xmm6,[esi] + paddq xmm7,[16+esp] + lea esi,[16+esi] + sub ecx,ebx + lea edx,[32+ebx*1+ebp] + mov ebx,ecx + jmp NEAR L$033ccm64_dec_outer +align 16 +L$033ccm64_dec_outer: + xorps xmm6,xmm2 + movdqa xmm2,xmm7 + movups [edi],xmm6 + lea edi,[16+edi] +db 102,15,56,0,213 + sub eax,1 + jz NEAR L$034ccm64_dec_break + movups xmm0,[ebp] + mov ecx,ebx + movups xmm1,[16+ebp] + xorps xmm6,xmm0 + xorps xmm2,xmm0 + xorps xmm3,xmm6 + movups xmm0,[32+ebp] +L$035ccm64_dec2_loop: +db 102,15,56,220,209 +db 102,15,56,220,217 + movups xmm1,[ecx*1+edx] + add ecx,32 +db 102,15,56,220,208 +db 102,15,56,220,216 + movups xmm0,[ecx*1+edx-16] + jnz NEAR L$035ccm64_dec2_loop + movups xmm6,[esi] + paddq xmm7,[16+esp] +db 102,15,56,220,209 +db 102,15,56,220,217 +db 102,15,56,221,208 +db 102,15,56,221,216 + lea esi,[16+esi] + jmp NEAR L$033ccm64_dec_outer +align 16 +L$034ccm64_dec_break: + mov ecx,DWORD [240+ebp] + mov edx,ebp + movups xmm0,[edx] + movups xmm1,[16+edx] + xorps xmm6,xmm0 + lea edx,[32+edx] + xorps xmm3,xmm6 +L$036enc1_loop_6: +db 102,15,56,220,217 + dec ecx + movups xmm1,[edx] + lea edx,[16+edx] + jnz NEAR L$036enc1_loop_6 +db 102,15,56,221,217 + mov esp,DWORD [48+esp] + mov edi,DWORD [40+esp] + movups [edi],xmm3 + pxor xmm0,xmm0 + pxor xmm1,xmm1 + pxor xmm2,xmm2 + pxor xmm3,xmm3 + pxor xmm4,xmm4 + pxor xmm5,xmm5 + pxor xmm6,xmm6 + pxor xmm7,xmm7 + pop edi + pop esi + pop ebx + pop ebp + ret +global _aesni_ctr32_encrypt_blocks +align 16 +_aesni_ctr32_encrypt_blocks: +L$_aesni_ctr32_encrypt_blocks_begin: + push ebp + push ebx + push esi + push edi + mov esi,DWORD [20+esp] + mov edi,DWORD [24+esp] + mov eax,DWORD [28+esp] + mov edx,DWORD [32+esp] + mov ebx,DWORD [36+esp] + mov ebp,esp + sub esp,88 + and esp,-16 + mov DWORD [80+esp],ebp + cmp eax,1 + je NEAR L$037ctr32_one_shortcut + movdqu xmm7,[ebx] + mov DWORD [esp],202182159 + mov DWORD [4+esp],134810123 + mov DWORD [8+esp],67438087 + mov DWORD [12+esp],66051 + mov ecx,6 + xor ebp,ebp + mov DWORD [16+esp],ecx + mov DWORD [20+esp],ecx + mov DWORD [24+esp],ecx + mov DWORD [28+esp],ebp +db 102,15,58,22,251,3 +db 102,15,58,34,253,3 + mov ecx,DWORD [240+edx] + bswap ebx + pxor xmm0,xmm0 + pxor xmm1,xmm1 + movdqa xmm2,[esp] +db 102,15,58,34,195,0 + lea ebp,[3+ebx] +db 102,15,58,34,205,0 + inc ebx +db 102,15,58,34,195,1 + inc ebp +db 102,15,58,34,205,1 + inc ebx +db 102,15,58,34,195,2 + inc ebp +db 102,15,58,34,205,2 + movdqa [48+esp],xmm0 +db 102,15,56,0,194 + movdqu xmm6,[edx] + movdqa [64+esp],xmm1 +db 102,15,56,0,202 + pshufd xmm2,xmm0,192 + pshufd xmm3,xmm0,128 + cmp eax,6 + jb NEAR L$038ctr32_tail + pxor xmm7,xmm6 + shl ecx,4 + mov ebx,16 + movdqa [32+esp],xmm7 + mov ebp,edx + sub ebx,ecx + lea edx,[32+ecx*1+edx] + sub eax,6 + jmp NEAR L$039ctr32_loop6 +align 16 +L$039ctr32_loop6: + pshufd xmm4,xmm0,64 + movdqa xmm0,[32+esp] + pshufd xmm5,xmm1,192 + pxor xmm2,xmm0 + pshufd xmm6,xmm1,128 + pxor xmm3,xmm0 + pshufd xmm7,xmm1,64 + movups xmm1,[16+ebp] + pxor xmm4,xmm0 + pxor xmm5,xmm0 +db 102,15,56,220,209 + pxor xmm6,xmm0 + pxor xmm7,xmm0 +db 102,15,56,220,217 + movups xmm0,[32+ebp] + mov ecx,ebx +db 102,15,56,220,225 +db 102,15,56,220,233 +db 102,15,56,220,241 +db 102,15,56,220,249 + call L$_aesni_encrypt6_enter + movups xmm1,[esi] + movups xmm0,[16+esi] + xorps xmm2,xmm1 + movups xmm1,[32+esi] + xorps xmm3,xmm0 + movups [edi],xmm2 + movdqa xmm0,[16+esp] + xorps xmm4,xmm1 + movdqa xmm1,[64+esp] + movups [16+edi],xmm3 + movups [32+edi],xmm4 + paddd xmm1,xmm0 + paddd xmm0,[48+esp] + movdqa xmm2,[esp] + movups xmm3,[48+esi] + movups xmm4,[64+esi] + xorps xmm5,xmm3 + movups xmm3,[80+esi] + lea esi,[96+esi] + movdqa [48+esp],xmm0 +db 102,15,56,0,194 + xorps xmm6,xmm4 + movups [48+edi],xmm5 + xorps xmm7,xmm3 + movdqa [64+esp],xmm1 +db 102,15,56,0,202 + movups [64+edi],xmm6 + pshufd xmm2,xmm0,192 + movups [80+edi],xmm7 + lea edi,[96+edi] + pshufd xmm3,xmm0,128 + sub eax,6 + jnc NEAR L$039ctr32_loop6 + add eax,6 + jz NEAR L$040ctr32_ret + movdqu xmm7,[ebp] + mov edx,ebp + pxor xmm7,[32+esp] + mov ecx,DWORD [240+ebp] +L$038ctr32_tail: + por xmm2,xmm7 + cmp eax,2 + jb NEAR L$041ctr32_one + pshufd xmm4,xmm0,64 + por xmm3,xmm7 + je NEAR L$042ctr32_two + pshufd xmm5,xmm1,192 + por xmm4,xmm7 + cmp eax,4 + jb NEAR L$043ctr32_three + pshufd xmm6,xmm1,128 + por xmm5,xmm7 + je NEAR L$044ctr32_four + por xmm6,xmm7 + call __aesni_encrypt6 + movups xmm1,[esi] + movups xmm0,[16+esi] + xorps xmm2,xmm1 + movups xmm1,[32+esi] + xorps xmm3,xmm0 + movups xmm0,[48+esi] + xorps xmm4,xmm1 + movups xmm1,[64+esi] + xorps xmm5,xmm0 + movups [edi],xmm2 + xorps xmm6,xmm1 + movups [16+edi],xmm3 + movups [32+edi],xmm4 + movups [48+edi],xmm5 + movups [64+edi],xmm6 + jmp NEAR L$040ctr32_ret +align 16 +L$037ctr32_one_shortcut: + movups xmm2,[ebx] + mov ecx,DWORD [240+edx] +L$041ctr32_one: + movups xmm0,[edx] + movups xmm1,[16+edx] + lea edx,[32+edx] + xorps xmm2,xmm0 +L$045enc1_loop_7: +db 102,15,56,220,209 + dec ecx + movups xmm1,[edx] + lea edx,[16+edx] + jnz NEAR L$045enc1_loop_7 +db 102,15,56,221,209 + movups xmm6,[esi] + xorps xmm6,xmm2 + movups [edi],xmm6 + jmp NEAR L$040ctr32_ret +align 16 +L$042ctr32_two: + call __aesni_encrypt2 + movups xmm5,[esi] + movups xmm6,[16+esi] + xorps xmm2,xmm5 + xorps xmm3,xmm6 + movups [edi],xmm2 + movups [16+edi],xmm3 + jmp NEAR L$040ctr32_ret +align 16 +L$043ctr32_three: + call __aesni_encrypt3 + movups xmm5,[esi] + movups xmm6,[16+esi] + xorps xmm2,xmm5 + movups xmm7,[32+esi] + xorps xmm3,xmm6 + movups [edi],xmm2 + xorps xmm4,xmm7 + movups [16+edi],xmm3 + movups [32+edi],xmm4 + jmp NEAR L$040ctr32_ret +align 16 +L$044ctr32_four: + call __aesni_encrypt4 + movups xmm6,[esi] + movups xmm7,[16+esi] + movups xmm1,[32+esi] + xorps xmm2,xmm6 + movups xmm0,[48+esi] + xorps xmm3,xmm7 + movups [edi],xmm2 + xorps xmm4,xmm1 + movups [16+edi],xmm3 + xorps xmm5,xmm0 + movups [32+edi],xmm4 + movups [48+edi],xmm5 +L$040ctr32_ret: + pxor xmm0,xmm0 + pxor xmm1,xmm1 + pxor xmm2,xmm2 + pxor xmm3,xmm3 + pxor xmm4,xmm4 + movdqa [32+esp],xmm0 + pxor xmm5,xmm5 + movdqa [48+esp],xmm0 + pxor xmm6,xmm6 + movdqa [64+esp],xmm0 + pxor xmm7,xmm7 + mov esp,DWORD [80+esp] + pop edi + pop esi + pop ebx + pop ebp + ret +global _aesni_xts_encrypt +align 16 +_aesni_xts_encrypt: +L$_aesni_xts_encrypt_begin: + push ebp + push ebx + push esi + push edi + mov edx,DWORD [36+esp] + mov esi,DWORD [40+esp] + mov ecx,DWORD [240+edx] + movups xmm2,[esi] + movups xmm0,[edx] + movups xmm1,[16+edx] + lea edx,[32+edx] + xorps xmm2,xmm0 +L$046enc1_loop_8: +db 102,15,56,220,209 + dec ecx + movups xmm1,[edx] + lea edx,[16+edx] + jnz NEAR L$046enc1_loop_8 +db 102,15,56,221,209 + mov esi,DWORD [20+esp] + mov edi,DWORD [24+esp] + mov eax,DWORD [28+esp] + mov edx,DWORD [32+esp] + mov ebp,esp + sub esp,120 + mov ecx,DWORD [240+edx] + and esp,-16 + mov DWORD [96+esp],135 + mov DWORD [100+esp],0 + mov DWORD [104+esp],1 + mov DWORD [108+esp],0 + mov DWORD [112+esp],eax + mov DWORD [116+esp],ebp + movdqa xmm1,xmm2 + pxor xmm0,xmm0 + movdqa xmm3,[96+esp] + pcmpgtd xmm0,xmm1 + and eax,-16 + mov ebp,edx + mov ebx,ecx + sub eax,96 + jc NEAR L$047xts_enc_short + shl ecx,4 + mov ebx,16 + sub ebx,ecx + lea edx,[32+ecx*1+edx] + jmp NEAR L$048xts_enc_loop6 +align 16 +L$048xts_enc_loop6: + pshufd xmm2,xmm0,19 + pxor xmm0,xmm0 + movdqa [esp],xmm1 + paddq xmm1,xmm1 + pand xmm2,xmm3 + pcmpgtd xmm0,xmm1 + pxor xmm1,xmm2 + pshufd xmm2,xmm0,19 + pxor xmm0,xmm0 + movdqa [16+esp],xmm1 + paddq xmm1,xmm1 + pand xmm2,xmm3 + pcmpgtd xmm0,xmm1 + pxor xmm1,xmm2 + pshufd xmm2,xmm0,19 + pxor xmm0,xmm0 + movdqa [32+esp],xmm1 + paddq xmm1,xmm1 + pand xmm2,xmm3 + pcmpgtd xmm0,xmm1 + pxor xmm1,xmm2 + pshufd xmm2,xmm0,19 + pxor xmm0,xmm0 + movdqa [48+esp],xmm1 + paddq xmm1,xmm1 + pand xmm2,xmm3 + pcmpgtd xmm0,xmm1 + pxor xmm1,xmm2 + pshufd xmm7,xmm0,19 + movdqa [64+esp],xmm1 + paddq xmm1,xmm1 + movups xmm0,[ebp] + pand xmm7,xmm3 + movups xmm2,[esi] + pxor xmm7,xmm1 + mov ecx,ebx + movdqu xmm3,[16+esi] + xorps xmm2,xmm0 + movdqu xmm4,[32+esi] + pxor xmm3,xmm0 + movdqu xmm5,[48+esi] + pxor xmm4,xmm0 + movdqu xmm6,[64+esi] + pxor xmm5,xmm0 + movdqu xmm1,[80+esi] + pxor xmm6,xmm0 + lea esi,[96+esi] + pxor xmm2,[esp] + movdqa [80+esp],xmm7 + pxor xmm7,xmm1 + movups xmm1,[16+ebp] + pxor xmm3,[16+esp] + pxor xmm4,[32+esp] +db 102,15,56,220,209 + pxor xmm5,[48+esp] + pxor xmm6,[64+esp] +db 102,15,56,220,217 + pxor xmm7,xmm0 + movups xmm0,[32+ebp] +db 102,15,56,220,225 +db 102,15,56,220,233 +db 102,15,56,220,241 +db 102,15,56,220,249 + call L$_aesni_encrypt6_enter + movdqa xmm1,[80+esp] + pxor xmm0,xmm0 + xorps xmm2,[esp] + pcmpgtd xmm0,xmm1 + xorps xmm3,[16+esp] + movups [edi],xmm2 + xorps xmm4,[32+esp] + movups [16+edi],xmm3 + xorps xmm5,[48+esp] + movups [32+edi],xmm4 + xorps xmm6,[64+esp] + movups [48+edi],xmm5 + xorps xmm7,xmm1 + movups [64+edi],xmm6 + pshufd xmm2,xmm0,19 + movups [80+edi],xmm7 + lea edi,[96+edi] + movdqa xmm3,[96+esp] + pxor xmm0,xmm0 + paddq xmm1,xmm1 + pand xmm2,xmm3 + pcmpgtd xmm0,xmm1 + pxor xmm1,xmm2 + sub eax,96 + jnc NEAR L$048xts_enc_loop6 + mov ecx,DWORD [240+ebp] + mov edx,ebp + mov ebx,ecx +L$047xts_enc_short: + add eax,96 + jz NEAR L$049xts_enc_done6x + movdqa xmm5,xmm1 + cmp eax,32 + jb NEAR L$050xts_enc_one + pshufd xmm2,xmm0,19 + pxor xmm0,xmm0 + paddq xmm1,xmm1 + pand xmm2,xmm3 + pcmpgtd xmm0,xmm1 + pxor xmm1,xmm2 + je NEAR L$051xts_enc_two + pshufd xmm2,xmm0,19 + pxor xmm0,xmm0 + movdqa xmm6,xmm1 + paddq xmm1,xmm1 + pand xmm2,xmm3 + pcmpgtd xmm0,xmm1 + pxor xmm1,xmm2 + cmp eax,64 + jb NEAR L$052xts_enc_three + pshufd xmm2,xmm0,19 + pxor xmm0,xmm0 + movdqa xmm7,xmm1 + paddq xmm1,xmm1 + pand xmm2,xmm3 + pcmpgtd xmm0,xmm1 + pxor xmm1,xmm2 + movdqa [esp],xmm5 + movdqa [16+esp],xmm6 + je NEAR L$053xts_enc_four + movdqa [32+esp],xmm7 + pshufd xmm7,xmm0,19 + movdqa [48+esp],xmm1 + paddq xmm1,xmm1 + pand xmm7,xmm3 + pxor xmm7,xmm1 + movdqu xmm2,[esi] + movdqu xmm3,[16+esi] + movdqu xmm4,[32+esi] + pxor xmm2,[esp] + movdqu xmm5,[48+esi] + pxor xmm3,[16+esp] + movdqu xmm6,[64+esi] + pxor xmm4,[32+esp] + lea esi,[80+esi] + pxor xmm5,[48+esp] + movdqa [64+esp],xmm7 + pxor xmm6,xmm7 + call __aesni_encrypt6 + movaps xmm1,[64+esp] + xorps xmm2,[esp] + xorps xmm3,[16+esp] + xorps xmm4,[32+esp] + movups [edi],xmm2 + xorps xmm5,[48+esp] + movups [16+edi],xmm3 + xorps xmm6,xmm1 + movups [32+edi],xmm4 + movups [48+edi],xmm5 + movups [64+edi],xmm6 + lea edi,[80+edi] + jmp NEAR L$054xts_enc_done +align 16 +L$050xts_enc_one: + movups xmm2,[esi] + lea esi,[16+esi] + xorps xmm2,xmm5 + movups xmm0,[edx] + movups xmm1,[16+edx] + lea edx,[32+edx] + xorps xmm2,xmm0 +L$055enc1_loop_9: +db 102,15,56,220,209 + dec ecx + movups xmm1,[edx] + lea edx,[16+edx] + jnz NEAR L$055enc1_loop_9 +db 102,15,56,221,209 + xorps xmm2,xmm5 + movups [edi],xmm2 + lea edi,[16+edi] + movdqa xmm1,xmm5 + jmp NEAR L$054xts_enc_done +align 16 +L$051xts_enc_two: + movaps xmm6,xmm1 + movups xmm2,[esi] + movups xmm3,[16+esi] + lea esi,[32+esi] + xorps xmm2,xmm5 + xorps xmm3,xmm6 + call __aesni_encrypt2 + xorps xmm2,xmm5 + xorps xmm3,xmm6 + movups [edi],xmm2 + movups [16+edi],xmm3 + lea edi,[32+edi] + movdqa xmm1,xmm6 + jmp NEAR L$054xts_enc_done +align 16 +L$052xts_enc_three: + movaps xmm7,xmm1 + movups xmm2,[esi] + movups xmm3,[16+esi] + movups xmm4,[32+esi] + lea esi,[48+esi] + xorps xmm2,xmm5 + xorps xmm3,xmm6 + xorps xmm4,xmm7 + call __aesni_encrypt3 + xorps xmm2,xmm5 + xorps xmm3,xmm6 + xorps xmm4,xmm7 + movups [edi],xmm2 + movups [16+edi],xmm3 + movups [32+edi],xmm4 + lea edi,[48+edi] + movdqa xmm1,xmm7 + jmp NEAR L$054xts_enc_done +align 16 +L$053xts_enc_four: + movaps xmm6,xmm1 + movups xmm2,[esi] + movups xmm3,[16+esi] + movups xmm4,[32+esi] + xorps xmm2,[esp] + movups xmm5,[48+esi] + lea esi,[64+esi] + xorps xmm3,[16+esp] + xorps xmm4,xmm7 + xorps xmm5,xmm6 + call __aesni_encrypt4 + xorps xmm2,[esp] + xorps xmm3,[16+esp] + xorps xmm4,xmm7 + movups [edi],xmm2 + xorps xmm5,xmm6 + movups [16+edi],xmm3 + movups [32+edi],xmm4 + movups [48+edi],xmm5 + lea edi,[64+edi] + movdqa xmm1,xmm6 + jmp NEAR L$054xts_enc_done +align 16 +L$049xts_enc_done6x: + mov eax,DWORD [112+esp] + and eax,15 + jz NEAR L$056xts_enc_ret + movdqa xmm5,xmm1 + mov DWORD [112+esp],eax + jmp NEAR L$057xts_enc_steal +align 16 +L$054xts_enc_done: + mov eax,DWORD [112+esp] + pxor xmm0,xmm0 + and eax,15 + jz NEAR L$056xts_enc_ret + pcmpgtd xmm0,xmm1 + mov DWORD [112+esp],eax + pshufd xmm5,xmm0,19 + paddq xmm1,xmm1 + pand xmm5,[96+esp] + pxor xmm5,xmm1 +L$057xts_enc_steal: + movzx ecx,BYTE [esi] + movzx edx,BYTE [edi-16] + lea esi,[1+esi] + mov BYTE [edi-16],cl + mov BYTE [edi],dl + lea edi,[1+edi] + sub eax,1 + jnz NEAR L$057xts_enc_steal + sub edi,DWORD [112+esp] + mov edx,ebp + mov ecx,ebx + movups xmm2,[edi-16] + xorps xmm2,xmm5 + movups xmm0,[edx] + movups xmm1,[16+edx] + lea edx,[32+edx] + xorps xmm2,xmm0 +L$058enc1_loop_10: +db 102,15,56,220,209 + dec ecx + movups xmm1,[edx] + lea edx,[16+edx] + jnz NEAR L$058enc1_loop_10 +db 102,15,56,221,209 + xorps xmm2,xmm5 + movups [edi-16],xmm2 +L$056xts_enc_ret: + pxor xmm0,xmm0 + pxor xmm1,xmm1 + pxor xmm2,xmm2 + movdqa [esp],xmm0 + pxor xmm3,xmm3 + movdqa [16+esp],xmm0 + pxor xmm4,xmm4 + movdqa [32+esp],xmm0 + pxor xmm5,xmm5 + movdqa [48+esp],xmm0 + pxor xmm6,xmm6 + movdqa [64+esp],xmm0 + pxor xmm7,xmm7 + movdqa [80+esp],xmm0 + mov esp,DWORD [116+esp] + pop edi + pop esi + pop ebx + pop ebp + ret +global _aesni_xts_decrypt +align 16 +_aesni_xts_decrypt: +L$_aesni_xts_decrypt_begin: + push ebp + push ebx + push esi + push edi + mov edx,DWORD [36+esp] + mov esi,DWORD [40+esp] + mov ecx,DWORD [240+edx] + movups xmm2,[esi] + movups xmm0,[edx] + movups xmm1,[16+edx] + lea edx,[32+edx] + xorps xmm2,xmm0 +L$059enc1_loop_11: +db 102,15,56,220,209 + dec ecx + movups xmm1,[edx] + lea edx,[16+edx] + jnz NEAR L$059enc1_loop_11 +db 102,15,56,221,209 + mov esi,DWORD [20+esp] + mov edi,DWORD [24+esp] + mov eax,DWORD [28+esp] + mov edx,DWORD [32+esp] + mov ebp,esp + sub esp,120 + and esp,-16 + xor ebx,ebx + test eax,15 + setnz bl + shl ebx,4 + sub eax,ebx + mov DWORD [96+esp],135 + mov DWORD [100+esp],0 + mov DWORD [104+esp],1 + mov DWORD [108+esp],0 + mov DWORD [112+esp],eax + mov DWORD [116+esp],ebp + mov ecx,DWORD [240+edx] + mov ebp,edx + mov ebx,ecx + movdqa xmm1,xmm2 + pxor xmm0,xmm0 + movdqa xmm3,[96+esp] + pcmpgtd xmm0,xmm1 + and eax,-16 + sub eax,96 + jc NEAR L$060xts_dec_short + shl ecx,4 + mov ebx,16 + sub ebx,ecx + lea edx,[32+ecx*1+edx] + jmp NEAR L$061xts_dec_loop6 +align 16 +L$061xts_dec_loop6: + pshufd xmm2,xmm0,19 + pxor xmm0,xmm0 + movdqa [esp],xmm1 + paddq xmm1,xmm1 + pand xmm2,xmm3 + pcmpgtd xmm0,xmm1 + pxor xmm1,xmm2 + pshufd xmm2,xmm0,19 + pxor xmm0,xmm0 + movdqa [16+esp],xmm1 + paddq xmm1,xmm1 + pand xmm2,xmm3 + pcmpgtd xmm0,xmm1 + pxor xmm1,xmm2 + pshufd xmm2,xmm0,19 + pxor xmm0,xmm0 + movdqa [32+esp],xmm1 + paddq xmm1,xmm1 + pand xmm2,xmm3 + pcmpgtd xmm0,xmm1 + pxor xmm1,xmm2 + pshufd xmm2,xmm0,19 + pxor xmm0,xmm0 + movdqa [48+esp],xmm1 + paddq xmm1,xmm1 + pand xmm2,xmm3 + pcmpgtd xmm0,xmm1 + pxor xmm1,xmm2 + pshufd xmm7,xmm0,19 + movdqa [64+esp],xmm1 + paddq xmm1,xmm1 + movups xmm0,[ebp] + pand xmm7,xmm3 + movups xmm2,[esi] + pxor xmm7,xmm1 + mov ecx,ebx + movdqu xmm3,[16+esi] + xorps xmm2,xmm0 + movdqu xmm4,[32+esi] + pxor xmm3,xmm0 + movdqu xmm5,[48+esi] + pxor xmm4,xmm0 + movdqu xmm6,[64+esi] + pxor xmm5,xmm0 + movdqu xmm1,[80+esi] + pxor xmm6,xmm0 + lea esi,[96+esi] + pxor xmm2,[esp] + movdqa [80+esp],xmm7 + pxor xmm7,xmm1 + movups xmm1,[16+ebp] + pxor xmm3,[16+esp] + pxor xmm4,[32+esp] +db 102,15,56,222,209 + pxor xmm5,[48+esp] + pxor xmm6,[64+esp] +db 102,15,56,222,217 + pxor xmm7,xmm0 + movups xmm0,[32+ebp] +db 102,15,56,222,225 +db 102,15,56,222,233 +db 102,15,56,222,241 +db 102,15,56,222,249 + call L$_aesni_decrypt6_enter + movdqa xmm1,[80+esp] + pxor xmm0,xmm0 + xorps xmm2,[esp] + pcmpgtd xmm0,xmm1 + xorps xmm3,[16+esp] + movups [edi],xmm2 + xorps xmm4,[32+esp] + movups [16+edi],xmm3 + xorps xmm5,[48+esp] + movups [32+edi],xmm4 + xorps xmm6,[64+esp] + movups [48+edi],xmm5 + xorps xmm7,xmm1 + movups [64+edi],xmm6 + pshufd xmm2,xmm0,19 + movups [80+edi],xmm7 + lea edi,[96+edi] + movdqa xmm3,[96+esp] + pxor xmm0,xmm0 + paddq xmm1,xmm1 + pand xmm2,xmm3 + pcmpgtd xmm0,xmm1 + pxor xmm1,xmm2 + sub eax,96 + jnc NEAR L$061xts_dec_loop6 + mov ecx,DWORD [240+ebp] + mov edx,ebp + mov ebx,ecx +L$060xts_dec_short: + add eax,96 + jz NEAR L$062xts_dec_done6x + movdqa xmm5,xmm1 + cmp eax,32 + jb NEAR L$063xts_dec_one + pshufd xmm2,xmm0,19 + pxor xmm0,xmm0 + paddq xmm1,xmm1 + pand xmm2,xmm3 + pcmpgtd xmm0,xmm1 + pxor xmm1,xmm2 + je NEAR L$064xts_dec_two + pshufd xmm2,xmm0,19 + pxor xmm0,xmm0 + movdqa xmm6,xmm1 + paddq xmm1,xmm1 + pand xmm2,xmm3 + pcmpgtd xmm0,xmm1 + pxor xmm1,xmm2 + cmp eax,64 + jb NEAR L$065xts_dec_three + pshufd xmm2,xmm0,19 + pxor xmm0,xmm0 + movdqa xmm7,xmm1 + paddq xmm1,xmm1 + pand xmm2,xmm3 + pcmpgtd xmm0,xmm1 + pxor xmm1,xmm2 + movdqa [esp],xmm5 + movdqa [16+esp],xmm6 + je NEAR L$066xts_dec_four + movdqa [32+esp],xmm7 + pshufd xmm7,xmm0,19 + movdqa [48+esp],xmm1 + paddq xmm1,xmm1 + pand xmm7,xmm3 + pxor xmm7,xmm1 + movdqu xmm2,[esi] + movdqu xmm3,[16+esi] + movdqu xmm4,[32+esi] + pxor xmm2,[esp] + movdqu xmm5,[48+esi] + pxor xmm3,[16+esp] + movdqu xmm6,[64+esi] + pxor xmm4,[32+esp] + lea esi,[80+esi] + pxor xmm5,[48+esp] + movdqa [64+esp],xmm7 + pxor xmm6,xmm7 + call __aesni_decrypt6 + movaps xmm1,[64+esp] + xorps xmm2,[esp] + xorps xmm3,[16+esp] + xorps xmm4,[32+esp] + movups [edi],xmm2 + xorps xmm5,[48+esp] + movups [16+edi],xmm3 + xorps xmm6,xmm1 + movups [32+edi],xmm4 + movups [48+edi],xmm5 + movups [64+edi],xmm6 + lea edi,[80+edi] + jmp NEAR L$067xts_dec_done +align 16 +L$063xts_dec_one: + movups xmm2,[esi] + lea esi,[16+esi] + xorps xmm2,xmm5 + movups xmm0,[edx] + movups xmm1,[16+edx] + lea edx,[32+edx] + xorps xmm2,xmm0 +L$068dec1_loop_12: +db 102,15,56,222,209 + dec ecx + movups xmm1,[edx] + lea edx,[16+edx] + jnz NEAR L$068dec1_loop_12 +db 102,15,56,223,209 + xorps xmm2,xmm5 + movups [edi],xmm2 + lea edi,[16+edi] + movdqa xmm1,xmm5 + jmp NEAR L$067xts_dec_done +align 16 +L$064xts_dec_two: + movaps xmm6,xmm1 + movups xmm2,[esi] + movups xmm3,[16+esi] + lea esi,[32+esi] + xorps xmm2,xmm5 + xorps xmm3,xmm6 + call __aesni_decrypt2 + xorps xmm2,xmm5 + xorps xmm3,xmm6 + movups [edi],xmm2 + movups [16+edi],xmm3 + lea edi,[32+edi] + movdqa xmm1,xmm6 + jmp NEAR L$067xts_dec_done +align 16 +L$065xts_dec_three: + movaps xmm7,xmm1 + movups xmm2,[esi] + movups xmm3,[16+esi] + movups xmm4,[32+esi] + lea esi,[48+esi] + xorps xmm2,xmm5 + xorps xmm3,xmm6 + xorps xmm4,xmm7 + call __aesni_decrypt3 + xorps xmm2,xmm5 + xorps xmm3,xmm6 + xorps xmm4,xmm7 + movups [edi],xmm2 + movups [16+edi],xmm3 + movups [32+edi],xmm4 + lea edi,[48+edi] + movdqa xmm1,xmm7 + jmp NEAR L$067xts_dec_done +align 16 +L$066xts_dec_four: + movaps xmm6,xmm1 + movups xmm2,[esi] + movups xmm3,[16+esi] + movups xmm4,[32+esi] + xorps xmm2,[esp] + movups xmm5,[48+esi] + lea esi,[64+esi] + xorps xmm3,[16+esp] + xorps xmm4,xmm7 + xorps xmm5,xmm6 + call __aesni_decrypt4 + xorps xmm2,[esp] + xorps xmm3,[16+esp] + xorps xmm4,xmm7 + movups [edi],xmm2 + xorps xmm5,xmm6 + movups [16+edi],xmm3 + movups [32+edi],xmm4 + movups [48+edi],xmm5 + lea edi,[64+edi] + movdqa xmm1,xmm6 + jmp NEAR L$067xts_dec_done +align 16 +L$062xts_dec_done6x: + mov eax,DWORD [112+esp] + and eax,15 + jz NEAR L$069xts_dec_ret + mov DWORD [112+esp],eax + jmp NEAR L$070xts_dec_only_one_more +align 16 +L$067xts_dec_done: + mov eax,DWORD [112+esp] + pxor xmm0,xmm0 + and eax,15 + jz NEAR L$069xts_dec_ret + pcmpgtd xmm0,xmm1 + mov DWORD [112+esp],eax + pshufd xmm2,xmm0,19 + pxor xmm0,xmm0 + movdqa xmm3,[96+esp] + paddq xmm1,xmm1 + pand xmm2,xmm3 + pcmpgtd xmm0,xmm1 + pxor xmm1,xmm2 +L$070xts_dec_only_one_more: + pshufd xmm5,xmm0,19 + movdqa xmm6,xmm1 + paddq xmm1,xmm1 + pand xmm5,xmm3 + pxor xmm5,xmm1 + mov edx,ebp + mov ecx,ebx + movups xmm2,[esi] + xorps xmm2,xmm5 + movups xmm0,[edx] + movups xmm1,[16+edx] + lea edx,[32+edx] + xorps xmm2,xmm0 +L$071dec1_loop_13: +db 102,15,56,222,209 + dec ecx + movups xmm1,[edx] + lea edx,[16+edx] + jnz NEAR L$071dec1_loop_13 +db 102,15,56,223,209 + xorps xmm2,xmm5 + movups [edi],xmm2 +L$072xts_dec_steal: + movzx ecx,BYTE [16+esi] + movzx edx,BYTE [edi] + lea esi,[1+esi] + mov BYTE [edi],cl + mov BYTE [16+edi],dl + lea edi,[1+edi] + sub eax,1 + jnz NEAR L$072xts_dec_steal + sub edi,DWORD [112+esp] + mov edx,ebp + mov ecx,ebx + movups xmm2,[edi] + xorps xmm2,xmm6 + movups xmm0,[edx] + movups xmm1,[16+edx] + lea edx,[32+edx] + xorps xmm2,xmm0 +L$073dec1_loop_14: +db 102,15,56,222,209 + dec ecx + movups xmm1,[edx] + lea edx,[16+edx] + jnz NEAR L$073dec1_loop_14 +db 102,15,56,223,209 + xorps xmm2,xmm6 + movups [edi],xmm2 +L$069xts_dec_ret: + pxor xmm0,xmm0 + pxor xmm1,xmm1 + pxor xmm2,xmm2 + movdqa [esp],xmm0 + pxor xmm3,xmm3 + movdqa [16+esp],xmm0 + pxor xmm4,xmm4 + movdqa [32+esp],xmm0 + pxor xmm5,xmm5 + movdqa [48+esp],xmm0 + pxor xmm6,xmm6 + movdqa [64+esp],xmm0 + pxor xmm7,xmm7 + movdqa [80+esp],xmm0 + mov esp,DWORD [116+esp] + pop edi + pop esi + pop ebx + pop ebp + ret +global _aesni_cbc_encrypt +align 16 +_aesni_cbc_encrypt: +L$_aesni_cbc_encrypt_begin: + push ebp + push ebx + push esi + push edi + mov esi,DWORD [20+esp] + mov ebx,esp + mov edi,DWORD [24+esp] + sub ebx,24 + mov eax,DWORD [28+esp] + and ebx,-16 + mov edx,DWORD [32+esp] + mov ebp,DWORD [36+esp] + test eax,eax + jz NEAR L$074cbc_abort + cmp DWORD [40+esp],0 + xchg ebx,esp + movups xmm7,[ebp] + mov ecx,DWORD [240+edx] + mov ebp,edx + mov DWORD [16+esp],ebx + mov ebx,ecx + je NEAR L$075cbc_decrypt + movaps xmm2,xmm7 + cmp eax,16 + jb NEAR L$076cbc_enc_tail + sub eax,16 + jmp NEAR L$077cbc_enc_loop +align 16 +L$077cbc_enc_loop: + movups xmm7,[esi] + lea esi,[16+esi] + movups xmm0,[edx] + movups xmm1,[16+edx] + xorps xmm7,xmm0 + lea edx,[32+edx] + xorps xmm2,xmm7 +L$078enc1_loop_15: +db 102,15,56,220,209 + dec ecx + movups xmm1,[edx] + lea edx,[16+edx] + jnz NEAR L$078enc1_loop_15 +db 102,15,56,221,209 + mov ecx,ebx + mov edx,ebp + movups [edi],xmm2 + lea edi,[16+edi] + sub eax,16 + jnc NEAR L$077cbc_enc_loop + add eax,16 + jnz NEAR L$076cbc_enc_tail + movaps xmm7,xmm2 + pxor xmm2,xmm2 + jmp NEAR L$079cbc_ret +L$076cbc_enc_tail: + mov ecx,eax +dd 2767451785 + mov ecx,16 + sub ecx,eax + xor eax,eax +dd 2868115081 + lea edi,[edi-16] + mov ecx,ebx + mov esi,edi + mov edx,ebp + jmp NEAR L$077cbc_enc_loop +align 16 +L$075cbc_decrypt: + cmp eax,80 + jbe NEAR L$080cbc_dec_tail + movaps [esp],xmm7 + sub eax,80 + jmp NEAR L$081cbc_dec_loop6_enter +align 16 +L$082cbc_dec_loop6: + movaps [esp],xmm0 + movups [edi],xmm7 + lea edi,[16+edi] +L$081cbc_dec_loop6_enter: + movdqu xmm2,[esi] + movdqu xmm3,[16+esi] + movdqu xmm4,[32+esi] + movdqu xmm5,[48+esi] + movdqu xmm6,[64+esi] + movdqu xmm7,[80+esi] + call __aesni_decrypt6 + movups xmm1,[esi] + movups xmm0,[16+esi] + xorps xmm2,[esp] + xorps xmm3,xmm1 + movups xmm1,[32+esi] + xorps xmm4,xmm0 + movups xmm0,[48+esi] + xorps xmm5,xmm1 + movups xmm1,[64+esi] + xorps xmm6,xmm0 + movups xmm0,[80+esi] + xorps xmm7,xmm1 + movups [edi],xmm2 + movups [16+edi],xmm3 + lea esi,[96+esi] + movups [32+edi],xmm4 + mov ecx,ebx + movups [48+edi],xmm5 + mov edx,ebp + movups [64+edi],xmm6 + lea edi,[80+edi] + sub eax,96 + ja NEAR L$082cbc_dec_loop6 + movaps xmm2,xmm7 + movaps xmm7,xmm0 + add eax,80 + jle NEAR L$083cbc_dec_clear_tail_collected + movups [edi],xmm2 + lea edi,[16+edi] +L$080cbc_dec_tail: + movups xmm2,[esi] + movaps xmm6,xmm2 + cmp eax,16 + jbe NEAR L$084cbc_dec_one + movups xmm3,[16+esi] + movaps xmm5,xmm3 + cmp eax,32 + jbe NEAR L$085cbc_dec_two + movups xmm4,[32+esi] + cmp eax,48 + jbe NEAR L$086cbc_dec_three + movups xmm5,[48+esi] + cmp eax,64 + jbe NEAR L$087cbc_dec_four + movups xmm6,[64+esi] + movaps [esp],xmm7 + movups xmm2,[esi] + xorps xmm7,xmm7 + call __aesni_decrypt6 + movups xmm1,[esi] + movups xmm0,[16+esi] + xorps xmm2,[esp] + xorps xmm3,xmm1 + movups xmm1,[32+esi] + xorps xmm4,xmm0 + movups xmm0,[48+esi] + xorps xmm5,xmm1 + movups xmm7,[64+esi] + xorps xmm6,xmm0 + movups [edi],xmm2 + movups [16+edi],xmm3 + pxor xmm3,xmm3 + movups [32+edi],xmm4 + pxor xmm4,xmm4 + movups [48+edi],xmm5 + pxor xmm5,xmm5 + lea edi,[64+edi] + movaps xmm2,xmm6 + pxor xmm6,xmm6 + sub eax,80 + jmp NEAR L$088cbc_dec_tail_collected +align 16 +L$084cbc_dec_one: + movups xmm0,[edx] + movups xmm1,[16+edx] + lea edx,[32+edx] + xorps xmm2,xmm0 +L$089dec1_loop_16: +db 102,15,56,222,209 + dec ecx + movups xmm1,[edx] + lea edx,[16+edx] + jnz NEAR L$089dec1_loop_16 +db 102,15,56,223,209 + xorps xmm2,xmm7 + movaps xmm7,xmm6 + sub eax,16 + jmp NEAR L$088cbc_dec_tail_collected +align 16 +L$085cbc_dec_two: + call __aesni_decrypt2 + xorps xmm2,xmm7 + xorps xmm3,xmm6 + movups [edi],xmm2 + movaps xmm2,xmm3 + pxor xmm3,xmm3 + lea edi,[16+edi] + movaps xmm7,xmm5 + sub eax,32 + jmp NEAR L$088cbc_dec_tail_collected +align 16 +L$086cbc_dec_three: + call __aesni_decrypt3 + xorps xmm2,xmm7 + xorps xmm3,xmm6 + xorps xmm4,xmm5 + movups [edi],xmm2 + movaps xmm2,xmm4 + pxor xmm4,xmm4 + movups [16+edi],xmm3 + pxor xmm3,xmm3 + lea edi,[32+edi] + movups xmm7,[32+esi] + sub eax,48 + jmp NEAR L$088cbc_dec_tail_collected +align 16 +L$087cbc_dec_four: + call __aesni_decrypt4 + movups xmm1,[16+esi] + movups xmm0,[32+esi] + xorps xmm2,xmm7 + movups xmm7,[48+esi] + xorps xmm3,xmm6 + movups [edi],xmm2 + xorps xmm4,xmm1 + movups [16+edi],xmm3 + pxor xmm3,xmm3 + xorps xmm5,xmm0 + movups [32+edi],xmm4 + pxor xmm4,xmm4 + lea edi,[48+edi] + movaps xmm2,xmm5 + pxor xmm5,xmm5 + sub eax,64 + jmp NEAR L$088cbc_dec_tail_collected +align 16 +L$083cbc_dec_clear_tail_collected: + pxor xmm3,xmm3 + pxor xmm4,xmm4 + pxor xmm5,xmm5 + pxor xmm6,xmm6 +L$088cbc_dec_tail_collected: + and eax,15 + jnz NEAR L$090cbc_dec_tail_partial + movups [edi],xmm2 + pxor xmm0,xmm0 + jmp NEAR L$079cbc_ret +align 16 +L$090cbc_dec_tail_partial: + movaps [esp],xmm2 + pxor xmm0,xmm0 + mov ecx,16 + mov esi,esp + sub ecx,eax +dd 2767451785 + movdqa [esp],xmm2 +L$079cbc_ret: + mov esp,DWORD [16+esp] + mov ebp,DWORD [36+esp] + pxor xmm2,xmm2 + pxor xmm1,xmm1 + movups [ebp],xmm7 + pxor xmm7,xmm7 +L$074cbc_abort: + pop edi + pop esi + pop ebx + pop ebp + ret +align 16 +__aesni_set_encrypt_key: + push ebp + push ebx + test eax,eax + jz NEAR L$091bad_pointer + test edx,edx + jz NEAR L$091bad_pointer + call L$092pic +L$092pic: + pop ebx + lea ebx,[(L$key_const-L$092pic)+ebx] + lea ebp,[_OPENSSL_ia32cap_P] + movups xmm0,[eax] + xorps xmm4,xmm4 + mov ebp,DWORD [4+ebp] + lea edx,[16+edx] + and ebp,268437504 + cmp ecx,256 + je NEAR L$09314rounds + cmp ecx,192 + je NEAR L$09412rounds + cmp ecx,128 + jne NEAR L$095bad_keybits +align 16 +L$09610rounds: + cmp ebp,268435456 + je NEAR L$09710rounds_alt + mov ecx,9 + movups [edx-16],xmm0 +db 102,15,58,223,200,1 + call L$098key_128_cold +db 102,15,58,223,200,2 + call L$099key_128 +db 102,15,58,223,200,4 + call L$099key_128 +db 102,15,58,223,200,8 + call L$099key_128 +db 102,15,58,223,200,16 + call L$099key_128 +db 102,15,58,223,200,32 + call L$099key_128 +db 102,15,58,223,200,64 + call L$099key_128 +db 102,15,58,223,200,128 + call L$099key_128 +db 102,15,58,223,200,27 + call L$099key_128 +db 102,15,58,223,200,54 + call L$099key_128 + movups [edx],xmm0 + mov DWORD [80+edx],ecx + jmp NEAR L$100good_key +align 16 +L$099key_128: + movups [edx],xmm0 + lea edx,[16+edx] +L$098key_128_cold: + shufps xmm4,xmm0,16 + xorps xmm0,xmm4 + shufps xmm4,xmm0,140 + xorps xmm0,xmm4 + shufps xmm1,xmm1,255 + xorps xmm0,xmm1 + ret +align 16 +L$09710rounds_alt: + movdqa xmm5,[ebx] + mov ecx,8 + movdqa xmm4,[32+ebx] + movdqa xmm2,xmm0 + movdqu [edx-16],xmm0 +L$101loop_key128: +db 102,15,56,0,197 +db 102,15,56,221,196 + pslld xmm4,1 + lea edx,[16+edx] + movdqa xmm3,xmm2 + pslldq xmm2,4 + pxor xmm3,xmm2 + pslldq xmm2,4 + pxor xmm3,xmm2 + pslldq xmm2,4 + pxor xmm2,xmm3 + pxor xmm0,xmm2 + movdqu [edx-16],xmm0 + movdqa xmm2,xmm0 + dec ecx + jnz NEAR L$101loop_key128 + movdqa xmm4,[48+ebx] +db 102,15,56,0,197 +db 102,15,56,221,196 + pslld xmm4,1 + movdqa xmm3,xmm2 + pslldq xmm2,4 + pxor xmm3,xmm2 + pslldq xmm2,4 + pxor xmm3,xmm2 + pslldq xmm2,4 + pxor xmm2,xmm3 + pxor xmm0,xmm2 + movdqu [edx],xmm0 + movdqa xmm2,xmm0 +db 102,15,56,0,197 +db 102,15,56,221,196 + movdqa xmm3,xmm2 + pslldq xmm2,4 + pxor xmm3,xmm2 + pslldq xmm2,4 + pxor xmm3,xmm2 + pslldq xmm2,4 + pxor xmm2,xmm3 + pxor xmm0,xmm2 + movdqu [16+edx],xmm0 + mov ecx,9 + mov DWORD [96+edx],ecx + jmp NEAR L$100good_key +align 16 +L$09412rounds: + movq xmm2,[16+eax] + cmp ebp,268435456 + je NEAR L$10212rounds_alt + mov ecx,11 + movups [edx-16],xmm0 +db 102,15,58,223,202,1 + call L$103key_192a_cold +db 102,15,58,223,202,2 + call L$104key_192b +db 102,15,58,223,202,4 + call L$105key_192a +db 102,15,58,223,202,8 + call L$104key_192b +db 102,15,58,223,202,16 + call L$105key_192a +db 102,15,58,223,202,32 + call L$104key_192b +db 102,15,58,223,202,64 + call L$105key_192a +db 102,15,58,223,202,128 + call L$104key_192b + movups [edx],xmm0 + mov DWORD [48+edx],ecx + jmp NEAR L$100good_key +align 16 +L$105key_192a: + movups [edx],xmm0 + lea edx,[16+edx] +align 16 +L$103key_192a_cold: + movaps xmm5,xmm2 +L$106key_192b_warm: + shufps xmm4,xmm0,16 + movdqa xmm3,xmm2 + xorps xmm0,xmm4 + shufps xmm4,xmm0,140 + pslldq xmm3,4 + xorps xmm0,xmm4 + pshufd xmm1,xmm1,85 + pxor xmm2,xmm3 + pxor xmm0,xmm1 + pshufd xmm3,xmm0,255 + pxor xmm2,xmm3 + ret +align 16 +L$104key_192b: + movaps xmm3,xmm0 + shufps xmm5,xmm0,68 + movups [edx],xmm5 + shufps xmm3,xmm2,78 + movups [16+edx],xmm3 + lea edx,[32+edx] + jmp NEAR L$106key_192b_warm +align 16 +L$10212rounds_alt: + movdqa xmm5,[16+ebx] + movdqa xmm4,[32+ebx] + mov ecx,8 + movdqu [edx-16],xmm0 +L$107loop_key192: + movq [edx],xmm2 + movdqa xmm1,xmm2 +db 102,15,56,0,213 +db 102,15,56,221,212 + pslld xmm4,1 + lea edx,[24+edx] + movdqa xmm3,xmm0 + pslldq xmm0,4 + pxor xmm3,xmm0 + pslldq xmm0,4 + pxor xmm3,xmm0 + pslldq xmm0,4 + pxor xmm0,xmm3 + pshufd xmm3,xmm0,255 + pxor xmm3,xmm1 + pslldq xmm1,4 + pxor xmm3,xmm1 + pxor xmm0,xmm2 + pxor xmm2,xmm3 + movdqu [edx-16],xmm0 + dec ecx + jnz NEAR L$107loop_key192 + mov ecx,11 + mov DWORD [32+edx],ecx + jmp NEAR L$100good_key +align 16 +L$09314rounds: + movups xmm2,[16+eax] + lea edx,[16+edx] + cmp ebp,268435456 + je NEAR L$10814rounds_alt + mov ecx,13 + movups [edx-32],xmm0 + movups [edx-16],xmm2 +db 102,15,58,223,202,1 + call L$109key_256a_cold +db 102,15,58,223,200,1 + call L$110key_256b +db 102,15,58,223,202,2 + call L$111key_256a +db 102,15,58,223,200,2 + call L$110key_256b +db 102,15,58,223,202,4 + call L$111key_256a +db 102,15,58,223,200,4 + call L$110key_256b +db 102,15,58,223,202,8 + call L$111key_256a +db 102,15,58,223,200,8 + call L$110key_256b +db 102,15,58,223,202,16 + call L$111key_256a +db 102,15,58,223,200,16 + call L$110key_256b +db 102,15,58,223,202,32 + call L$111key_256a +db 102,15,58,223,200,32 + call L$110key_256b +db 102,15,58,223,202,64 + call L$111key_256a + movups [edx],xmm0 + mov DWORD [16+edx],ecx + xor eax,eax + jmp NEAR L$100good_key +align 16 +L$111key_256a: + movups [edx],xmm2 + lea edx,[16+edx] +L$109key_256a_cold: + shufps xmm4,xmm0,16 + xorps xmm0,xmm4 + shufps xmm4,xmm0,140 + xorps xmm0,xmm4 + shufps xmm1,xmm1,255 + xorps xmm0,xmm1 + ret +align 16 +L$110key_256b: + movups [edx],xmm0 + lea edx,[16+edx] + shufps xmm4,xmm2,16 + xorps xmm2,xmm4 + shufps xmm4,xmm2,140 + xorps xmm2,xmm4 + shufps xmm1,xmm1,170 + xorps xmm2,xmm1 + ret +align 16 +L$10814rounds_alt: + movdqa xmm5,[ebx] + movdqa xmm4,[32+ebx] + mov ecx,7 + movdqu [edx-32],xmm0 + movdqa xmm1,xmm2 + movdqu [edx-16],xmm2 +L$112loop_key256: +db 102,15,56,0,213 +db 102,15,56,221,212 + movdqa xmm3,xmm0 + pslldq xmm0,4 + pxor xmm3,xmm0 + pslldq xmm0,4 + pxor xmm3,xmm0 + pslldq xmm0,4 + pxor xmm0,xmm3 + pslld xmm4,1 + pxor xmm0,xmm2 + movdqu [edx],xmm0 + dec ecx + jz NEAR L$113done_key256 + pshufd xmm2,xmm0,255 + pxor xmm3,xmm3 +db 102,15,56,221,211 + movdqa xmm3,xmm1 + pslldq xmm1,4 + pxor xmm3,xmm1 + pslldq xmm1,4 + pxor xmm3,xmm1 + pslldq xmm1,4 + pxor xmm1,xmm3 + pxor xmm2,xmm1 + movdqu [16+edx],xmm2 + lea edx,[32+edx] + movdqa xmm1,xmm2 + jmp NEAR L$112loop_key256 +L$113done_key256: + mov ecx,13 + mov DWORD [16+edx],ecx +L$100good_key: + pxor xmm0,xmm0 + pxor xmm1,xmm1 + pxor xmm2,xmm2 + pxor xmm3,xmm3 + pxor xmm4,xmm4 + pxor xmm5,xmm5 + xor eax,eax + pop ebx + pop ebp + ret +align 4 +L$091bad_pointer: + mov eax,-1 + pop ebx + pop ebp + ret +align 4 +L$095bad_keybits: + pxor xmm0,xmm0 + mov eax,-2 + pop ebx + pop ebp + ret +global _aesni_set_encrypt_key +align 16 +_aesni_set_encrypt_key: +L$_aesni_set_encrypt_key_begin: + mov eax,DWORD [4+esp] + mov ecx,DWORD [8+esp] + mov edx,DWORD [12+esp] + call __aesni_set_encrypt_key + ret +global _aesni_set_decrypt_key +align 16 +_aesni_set_decrypt_key: +L$_aesni_set_decrypt_key_begin: + mov eax,DWORD [4+esp] + mov ecx,DWORD [8+esp] + mov edx,DWORD [12+esp] + call __aesni_set_encrypt_key + mov edx,DWORD [12+esp] + shl ecx,4 + test eax,eax + jnz NEAR L$114dec_key_ret + lea eax,[16+ecx*1+edx] + movups xmm0,[edx] + movups xmm1,[eax] + movups [eax],xmm0 + movups [edx],xmm1 + lea edx,[16+edx] + lea eax,[eax-16] +L$115dec_key_inverse: + movups xmm0,[edx] + movups xmm1,[eax] +db 102,15,56,219,192 +db 102,15,56,219,201 + lea edx,[16+edx] + lea eax,[eax-16] + movups [16+eax],xmm0 + movups [edx-16],xmm1 + cmp eax,edx + ja NEAR L$115dec_key_inverse + movups xmm0,[edx] +db 102,15,56,219,192 + movups [edx],xmm0 + pxor xmm0,xmm0 + pxor xmm1,xmm1 + xor eax,eax +L$114dec_key_ret: + ret +align 64 +L$key_const: +dd 202313229,202313229,202313229,202313229 +dd 67569157,67569157,67569157,67569157 +dd 1,1,1,1 +dd 27,27,27,27 +db 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69 +db 83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83 +db 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115 +db 115,108,46,111,114,103,62,0 +segment .bss +common _OPENSSL_ia32cap_P 16 diff --git a/third_party/boringssl/win-x86/crypto/aes/vpaes-x86.asm b/third_party/boringssl/win-x86/crypto/aes/vpaes-x86.asm new file mode 100644 index 00000000000..b08b05637db --- /dev/null +++ b/third_party/boringssl/win-x86/crypto/aes/vpaes-x86.asm @@ -0,0 +1,649 @@ +%ifidn __OUTPUT_FORMAT__,obj +section code use32 class=code align=64 +%elifidn __OUTPUT_FORMAT__,win32 +%ifdef __YASM_VERSION_ID__ +%if __YASM_VERSION_ID__ < 01010000h +%error yasm version 1.1.0 or later needed. +%endif +; Yasm automatically includes .00 and complains about redefining it. +; https://www.tortall.net/projects/yasm/manual/html/objfmt-win32-safeseh.html +%else +$@feat.00 equ 1 +%endif +section .text code align=64 +%else +section .text code +%endif +align 64 +L$_vpaes_consts: +dd 218628480,235210255,168496130,67568393 +dd 252381056,17041926,33884169,51187212 +dd 252645135,252645135,252645135,252645135 +dd 1512730624,3266504856,1377990664,3401244816 +dd 830229760,1275146365,2969422977,3447763452 +dd 3411033600,2979783055,338359620,2782886510 +dd 4209124096,907596821,221174255,1006095553 +dd 191964160,3799684038,3164090317,1589111125 +dd 182528256,1777043520,2877432650,3265356744 +dd 1874708224,3503451415,3305285752,363511674 +dd 1606117888,3487855781,1093350906,2384367825 +dd 197121,67569157,134941193,202313229 +dd 67569157,134941193,202313229,197121 +dd 134941193,202313229,197121,67569157 +dd 202313229,197121,67569157,134941193 +dd 33619971,100992007,168364043,235736079 +dd 235736079,33619971,100992007,168364043 +dd 168364043,235736079,33619971,100992007 +dd 100992007,168364043,235736079,33619971 +dd 50462976,117835012,185207048,252579084 +dd 252314880,51251460,117574920,184942860 +dd 184682752,252054788,50987272,118359308 +dd 118099200,185467140,251790600,50727180 +dd 2946363062,528716217,1300004225,1881839624 +dd 1532713819,1532713819,1532713819,1532713819 +dd 3602276352,4288629033,3737020424,4153884961 +dd 1354558464,32357713,2958822624,3775749553 +dd 1201988352,132424512,1572796698,503232858 +dd 2213177600,1597421020,4103937655,675398315 +dd 2749646592,4273543773,1511898873,121693092 +dd 3040248576,1103263732,2871565598,1608280554 +dd 2236667136,2588920351,482954393,64377734 +dd 3069987328,291237287,2117370568,3650299247 +dd 533321216,3573750986,2572112006,1401264716 +dd 1339849704,2721158661,548607111,3445553514 +dd 2128193280,3054596040,2183486460,1257083700 +dd 655635200,1165381986,3923443150,2344132524 +dd 190078720,256924420,290342170,357187870 +dd 1610966272,2263057382,4103205268,309794674 +dd 2592527872,2233205587,1335446729,3402964816 +dd 3973531904,3225098121,3002836325,1918774430 +dd 3870401024,2102906079,2284471353,4117666579 +dd 617007872,1021508343,366931923,691083277 +dd 2528395776,3491914898,2968704004,1613121270 +dd 3445188352,3247741094,844474987,4093578302 +dd 651481088,1190302358,1689581232,574775300 +dd 4289380608,206939853,2555985458,2489840491 +dd 2130264064,327674451,3566485037,3349835193 +dd 2470714624,316102159,3636825756,3393945945 +db 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105 +db 111,110,32,65,69,83,32,102,111,114,32,120,56,54,47,83 +db 83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117 +db 114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105 +db 118,101,114,115,105,116,121,41,0 +align 64 +align 16 +__vpaes_preheat: + add ebp,DWORD [esp] + movdqa xmm7,[ebp-48] + movdqa xmm6,[ebp-16] + ret +align 16 +__vpaes_encrypt_core: + mov ecx,16 + mov eax,DWORD [240+edx] + movdqa xmm1,xmm6 + movdqa xmm2,[ebp] + pandn xmm1,xmm0 + pand xmm0,xmm6 + movdqu xmm5,[edx] +db 102,15,56,0,208 + movdqa xmm0,[16+ebp] + pxor xmm2,xmm5 + psrld xmm1,4 + add edx,16 +db 102,15,56,0,193 + lea ebx,[192+ebp] + pxor xmm0,xmm2 + jmp NEAR L$000enc_entry +align 16 +L$001enc_loop: + movdqa xmm4,[32+ebp] + movdqa xmm0,[48+ebp] +db 102,15,56,0,226 +db 102,15,56,0,195 + pxor xmm4,xmm5 + movdqa xmm5,[64+ebp] + pxor xmm0,xmm4 + movdqa xmm1,[ecx*1+ebx-64] +db 102,15,56,0,234 + movdqa xmm2,[80+ebp] + movdqa xmm4,[ecx*1+ebx] +db 102,15,56,0,211 + movdqa xmm3,xmm0 + pxor xmm2,xmm5 +db 102,15,56,0,193 + add edx,16 + pxor xmm0,xmm2 +db 102,15,56,0,220 + add ecx,16 + pxor xmm3,xmm0 +db 102,15,56,0,193 + and ecx,48 + sub eax,1 + pxor xmm0,xmm3 +L$000enc_entry: + movdqa xmm1,xmm6 + movdqa xmm5,[ebp-32] + pandn xmm1,xmm0 + psrld xmm1,4 + pand xmm0,xmm6 +db 102,15,56,0,232 + movdqa xmm3,xmm7 + pxor xmm0,xmm1 +db 102,15,56,0,217 + movdqa xmm4,xmm7 + pxor xmm3,xmm5 +db 102,15,56,0,224 + movdqa xmm2,xmm7 + pxor xmm4,xmm5 +db 102,15,56,0,211 + movdqa xmm3,xmm7 + pxor xmm2,xmm0 +db 102,15,56,0,220 + movdqu xmm5,[edx] + pxor xmm3,xmm1 + jnz NEAR L$001enc_loop + movdqa xmm4,[96+ebp] + movdqa xmm0,[112+ebp] +db 102,15,56,0,226 + pxor xmm4,xmm5 +db 102,15,56,0,195 + movdqa xmm1,[64+ecx*1+ebx] + pxor xmm0,xmm4 +db 102,15,56,0,193 + ret +align 16 +__vpaes_decrypt_core: + lea ebx,[608+ebp] + mov eax,DWORD [240+edx] + movdqa xmm1,xmm6 + movdqa xmm2,[ebx-64] + pandn xmm1,xmm0 + mov ecx,eax + psrld xmm1,4 + movdqu xmm5,[edx] + shl ecx,4 + pand xmm0,xmm6 +db 102,15,56,0,208 + movdqa xmm0,[ebx-48] + xor ecx,48 +db 102,15,56,0,193 + and ecx,48 + pxor xmm2,xmm5 + movdqa xmm5,[176+ebp] + pxor xmm0,xmm2 + add edx,16 + lea ecx,[ecx*1+ebx-352] + jmp NEAR L$002dec_entry +align 16 +L$003dec_loop: + movdqa xmm4,[ebx-32] + movdqa xmm1,[ebx-16] +db 102,15,56,0,226 +db 102,15,56,0,203 + pxor xmm0,xmm4 + movdqa xmm4,[ebx] + pxor xmm0,xmm1 + movdqa xmm1,[16+ebx] +db 102,15,56,0,226 +db 102,15,56,0,197 +db 102,15,56,0,203 + pxor xmm0,xmm4 + movdqa xmm4,[32+ebx] + pxor xmm0,xmm1 + movdqa xmm1,[48+ebx] +db 102,15,56,0,226 +db 102,15,56,0,197 +db 102,15,56,0,203 + pxor xmm0,xmm4 + movdqa xmm4,[64+ebx] + pxor xmm0,xmm1 + movdqa xmm1,[80+ebx] +db 102,15,56,0,226 +db 102,15,56,0,197 +db 102,15,56,0,203 + pxor xmm0,xmm4 + add edx,16 +db 102,15,58,15,237,12 + pxor xmm0,xmm1 + sub eax,1 +L$002dec_entry: + movdqa xmm1,xmm6 + movdqa xmm2,[ebp-32] + pandn xmm1,xmm0 + pand xmm0,xmm6 + psrld xmm1,4 +db 102,15,56,0,208 + movdqa xmm3,xmm7 + pxor xmm0,xmm1 +db 102,15,56,0,217 + movdqa xmm4,xmm7 + pxor xmm3,xmm2 +db 102,15,56,0,224 + pxor xmm4,xmm2 + movdqa xmm2,xmm7 +db 102,15,56,0,211 + movdqa xmm3,xmm7 + pxor xmm2,xmm0 +db 102,15,56,0,220 + movdqu xmm0,[edx] + pxor xmm3,xmm1 + jnz NEAR L$003dec_loop + movdqa xmm4,[96+ebx] +db 102,15,56,0,226 + pxor xmm4,xmm0 + movdqa xmm0,[112+ebx] + movdqa xmm2,[ecx] +db 102,15,56,0,195 + pxor xmm0,xmm4 +db 102,15,56,0,194 + ret +align 16 +__vpaes_schedule_core: + add ebp,DWORD [esp] + movdqu xmm0,[esi] + movdqa xmm2,[320+ebp] + movdqa xmm3,xmm0 + lea ebx,[ebp] + movdqa [4+esp],xmm2 + call __vpaes_schedule_transform + movdqa xmm7,xmm0 + test edi,edi + jnz NEAR L$004schedule_am_decrypting + movdqu [edx],xmm0 + jmp NEAR L$005schedule_go +L$004schedule_am_decrypting: + movdqa xmm1,[256+ecx*1+ebp] +db 102,15,56,0,217 + movdqu [edx],xmm3 + xor ecx,48 +L$005schedule_go: + cmp eax,192 + ja NEAR L$006schedule_256 + je NEAR L$007schedule_192 +L$008schedule_128: + mov eax,10 +L$009loop_schedule_128: + call __vpaes_schedule_round + dec eax + jz NEAR L$010schedule_mangle_last + call __vpaes_schedule_mangle + jmp NEAR L$009loop_schedule_128 +align 16 +L$007schedule_192: + movdqu xmm0,[8+esi] + call __vpaes_schedule_transform + movdqa xmm6,xmm0 + pxor xmm4,xmm4 + movhlps xmm6,xmm4 + mov eax,4 +L$011loop_schedule_192: + call __vpaes_schedule_round +db 102,15,58,15,198,8 + call __vpaes_schedule_mangle + call __vpaes_schedule_192_smear + call __vpaes_schedule_mangle + call __vpaes_schedule_round + dec eax + jz NEAR L$010schedule_mangle_last + call __vpaes_schedule_mangle + call __vpaes_schedule_192_smear + jmp NEAR L$011loop_schedule_192 +align 16 +L$006schedule_256: + movdqu xmm0,[16+esi] + call __vpaes_schedule_transform + mov eax,7 +L$012loop_schedule_256: + call __vpaes_schedule_mangle + movdqa xmm6,xmm0 + call __vpaes_schedule_round + dec eax + jz NEAR L$010schedule_mangle_last + call __vpaes_schedule_mangle + pshufd xmm0,xmm0,255 + movdqa [20+esp],xmm7 + movdqa xmm7,xmm6 + call L$_vpaes_schedule_low_round + movdqa xmm7,[20+esp] + jmp NEAR L$012loop_schedule_256 +align 16 +L$010schedule_mangle_last: + lea ebx,[384+ebp] + test edi,edi + jnz NEAR L$013schedule_mangle_last_dec + movdqa xmm1,[256+ecx*1+ebp] +db 102,15,56,0,193 + lea ebx,[352+ebp] + add edx,32 +L$013schedule_mangle_last_dec: + add edx,-16 + pxor xmm0,[336+ebp] + call __vpaes_schedule_transform + movdqu [edx],xmm0 + pxor xmm0,xmm0 + pxor xmm1,xmm1 + pxor xmm2,xmm2 + pxor xmm3,xmm3 + pxor xmm4,xmm4 + pxor xmm5,xmm5 + pxor xmm6,xmm6 + pxor xmm7,xmm7 + ret +align 16 +__vpaes_schedule_192_smear: + pshufd xmm1,xmm6,128 + pshufd xmm0,xmm7,254 + pxor xmm6,xmm1 + pxor xmm1,xmm1 + pxor xmm6,xmm0 + movdqa xmm0,xmm6 + movhlps xmm6,xmm1 + ret +align 16 +__vpaes_schedule_round: + movdqa xmm2,[8+esp] + pxor xmm1,xmm1 +db 102,15,58,15,202,15 +db 102,15,58,15,210,15 + pxor xmm7,xmm1 + pshufd xmm0,xmm0,255 +db 102,15,58,15,192,1 + movdqa [8+esp],xmm2 +L$_vpaes_schedule_low_round: + movdqa xmm1,xmm7 + pslldq xmm7,4 + pxor xmm7,xmm1 + movdqa xmm1,xmm7 + pslldq xmm7,8 + pxor xmm7,xmm1 + pxor xmm7,[336+ebp] + movdqa xmm4,[ebp-16] + movdqa xmm5,[ebp-48] + movdqa xmm1,xmm4 + pandn xmm1,xmm0 + psrld xmm1,4 + pand xmm0,xmm4 + movdqa xmm2,[ebp-32] +db 102,15,56,0,208 + pxor xmm0,xmm1 + movdqa xmm3,xmm5 +db 102,15,56,0,217 + pxor xmm3,xmm2 + movdqa xmm4,xmm5 +db 102,15,56,0,224 + pxor xmm4,xmm2 + movdqa xmm2,xmm5 +db 102,15,56,0,211 + pxor xmm2,xmm0 + movdqa xmm3,xmm5 +db 102,15,56,0,220 + pxor xmm3,xmm1 + movdqa xmm4,[32+ebp] +db 102,15,56,0,226 + movdqa xmm0,[48+ebp] +db 102,15,56,0,195 + pxor xmm0,xmm4 + pxor xmm0,xmm7 + movdqa xmm7,xmm0 + ret +align 16 +__vpaes_schedule_transform: + movdqa xmm2,[ebp-16] + movdqa xmm1,xmm2 + pandn xmm1,xmm0 + psrld xmm1,4 + pand xmm0,xmm2 + movdqa xmm2,[ebx] +db 102,15,56,0,208 + movdqa xmm0,[16+ebx] +db 102,15,56,0,193 + pxor xmm0,xmm2 + ret +align 16 +__vpaes_schedule_mangle: + movdqa xmm4,xmm0 + movdqa xmm5,[128+ebp] + test edi,edi + jnz NEAR L$014schedule_mangle_dec + add edx,16 + pxor xmm4,[336+ebp] +db 102,15,56,0,229 + movdqa xmm3,xmm4 +db 102,15,56,0,229 + pxor xmm3,xmm4 +db 102,15,56,0,229 + pxor xmm3,xmm4 + jmp NEAR L$015schedule_mangle_both +align 16 +L$014schedule_mangle_dec: + movdqa xmm2,[ebp-16] + lea esi,[416+ebp] + movdqa xmm1,xmm2 + pandn xmm1,xmm4 + psrld xmm1,4 + pand xmm4,xmm2 + movdqa xmm2,[esi] +db 102,15,56,0,212 + movdqa xmm3,[16+esi] +db 102,15,56,0,217 + pxor xmm3,xmm2 +db 102,15,56,0,221 + movdqa xmm2,[32+esi] +db 102,15,56,0,212 + pxor xmm2,xmm3 + movdqa xmm3,[48+esi] +db 102,15,56,0,217 + pxor xmm3,xmm2 +db 102,15,56,0,221 + movdqa xmm2,[64+esi] +db 102,15,56,0,212 + pxor xmm2,xmm3 + movdqa xmm3,[80+esi] +db 102,15,56,0,217 + pxor xmm3,xmm2 +db 102,15,56,0,221 + movdqa xmm2,[96+esi] +db 102,15,56,0,212 + pxor xmm2,xmm3 + movdqa xmm3,[112+esi] +db 102,15,56,0,217 + pxor xmm3,xmm2 + add edx,-16 +L$015schedule_mangle_both: + movdqa xmm1,[256+ecx*1+ebp] +db 102,15,56,0,217 + add ecx,-16 + and ecx,48 + movdqu [edx],xmm3 + ret +global _vpaes_set_encrypt_key +align 16 +_vpaes_set_encrypt_key: +L$_vpaes_set_encrypt_key_begin: + push ebp + push ebx + push esi + push edi + mov esi,DWORD [20+esp] + lea ebx,[esp-56] + mov eax,DWORD [24+esp] + and ebx,-16 + mov edx,DWORD [28+esp] + xchg ebx,esp + mov DWORD [48+esp],ebx + mov ebx,eax + shr ebx,5 + add ebx,5 + mov DWORD [240+edx],ebx + mov ecx,48 + mov edi,0 + lea ebp,[(L$_vpaes_consts+0x30-L$016pic_point)] + call __vpaes_schedule_core +L$016pic_point: + mov esp,DWORD [48+esp] + xor eax,eax + pop edi + pop esi + pop ebx + pop ebp + ret +global _vpaes_set_decrypt_key +align 16 +_vpaes_set_decrypt_key: +L$_vpaes_set_decrypt_key_begin: + push ebp + push ebx + push esi + push edi + mov esi,DWORD [20+esp] + lea ebx,[esp-56] + mov eax,DWORD [24+esp] + and ebx,-16 + mov edx,DWORD [28+esp] + xchg ebx,esp + mov DWORD [48+esp],ebx + mov ebx,eax + shr ebx,5 + add ebx,5 + mov DWORD [240+edx],ebx + shl ebx,4 + lea edx,[16+ebx*1+edx] + mov edi,1 + mov ecx,eax + shr ecx,1 + and ecx,32 + xor ecx,32 + lea ebp,[(L$_vpaes_consts+0x30-L$017pic_point)] + call __vpaes_schedule_core +L$017pic_point: + mov esp,DWORD [48+esp] + xor eax,eax + pop edi + pop esi + pop ebx + pop ebp + ret +global _vpaes_encrypt +align 16 +_vpaes_encrypt: +L$_vpaes_encrypt_begin: + push ebp + push ebx + push esi + push edi + lea ebp,[(L$_vpaes_consts+0x30-L$018pic_point)] + call __vpaes_preheat +L$018pic_point: + mov esi,DWORD [20+esp] + lea ebx,[esp-56] + mov edi,DWORD [24+esp] + and ebx,-16 + mov edx,DWORD [28+esp] + xchg ebx,esp + mov DWORD [48+esp],ebx + movdqu xmm0,[esi] + call __vpaes_encrypt_core + movdqu [edi],xmm0 + mov esp,DWORD [48+esp] + pop edi + pop esi + pop ebx + pop ebp + ret +global _vpaes_decrypt +align 16 +_vpaes_decrypt: +L$_vpaes_decrypt_begin: + push ebp + push ebx + push esi + push edi + lea ebp,[(L$_vpaes_consts+0x30-L$019pic_point)] + call __vpaes_preheat +L$019pic_point: + mov esi,DWORD [20+esp] + lea ebx,[esp-56] + mov edi,DWORD [24+esp] + and ebx,-16 + mov edx,DWORD [28+esp] + xchg ebx,esp + mov DWORD [48+esp],ebx + movdqu xmm0,[esi] + call __vpaes_decrypt_core + movdqu [edi],xmm0 + mov esp,DWORD [48+esp] + pop edi + pop esi + pop ebx + pop ebp + ret +global _vpaes_cbc_encrypt +align 16 +_vpaes_cbc_encrypt: +L$_vpaes_cbc_encrypt_begin: + push ebp + push ebx + push esi + push edi + mov esi,DWORD [20+esp] + mov edi,DWORD [24+esp] + mov eax,DWORD [28+esp] + mov edx,DWORD [32+esp] + sub eax,16 + jc NEAR L$020cbc_abort + lea ebx,[esp-56] + mov ebp,DWORD [36+esp] + and ebx,-16 + mov ecx,DWORD [40+esp] + xchg ebx,esp + movdqu xmm1,[ebp] + sub edi,esi + mov DWORD [48+esp],ebx + mov DWORD [esp],edi + mov DWORD [4+esp],edx + mov DWORD [8+esp],ebp + mov edi,eax + lea ebp,[(L$_vpaes_consts+0x30-L$021pic_point)] + call __vpaes_preheat +L$021pic_point: + cmp ecx,0 + je NEAR L$022cbc_dec_loop + jmp NEAR L$023cbc_enc_loop +align 16 +L$023cbc_enc_loop: + movdqu xmm0,[esi] + pxor xmm0,xmm1 + call __vpaes_encrypt_core + mov ebx,DWORD [esp] + mov edx,DWORD [4+esp] + movdqa xmm1,xmm0 + movdqu [esi*1+ebx],xmm0 + lea esi,[16+esi] + sub edi,16 + jnc NEAR L$023cbc_enc_loop + jmp NEAR L$024cbc_done +align 16 +L$022cbc_dec_loop: + movdqu xmm0,[esi] + movdqa [16+esp],xmm1 + movdqa [32+esp],xmm0 + call __vpaes_decrypt_core + mov ebx,DWORD [esp] + mov edx,DWORD [4+esp] + pxor xmm0,[16+esp] + movdqa xmm1,[32+esp] + movdqu [esi*1+ebx],xmm0 + lea esi,[16+esi] + sub edi,16 + jnc NEAR L$022cbc_dec_loop +L$024cbc_done: + mov ebx,DWORD [8+esp] + mov esp,DWORD [48+esp] + movdqu [ebx],xmm1 +L$020cbc_abort: + pop edi + pop esi + pop ebx + pop ebp + ret diff --git a/third_party/boringssl/win-x86/crypto/bn/bn-586.asm b/third_party/boringssl/win-x86/crypto/bn/bn-586.asm new file mode 100644 index 00000000000..b222040acae --- /dev/null +++ b/third_party/boringssl/win-x86/crypto/bn/bn-586.asm @@ -0,0 +1,1523 @@ +%ifidn __OUTPUT_FORMAT__,obj +section code use32 class=code align=64 +%elifidn __OUTPUT_FORMAT__,win32 +%ifdef __YASM_VERSION_ID__ +%if __YASM_VERSION_ID__ < 01010000h +%error yasm version 1.1.0 or later needed. +%endif +; Yasm automatically includes .00 and complains about redefining it. +; https://www.tortall.net/projects/yasm/manual/html/objfmt-win32-safeseh.html +%else +$@feat.00 equ 1 +%endif +section .text code align=64 +%else +section .text code +%endif +;extern _OPENSSL_ia32cap_P +global _bn_mul_add_words +align 16 +_bn_mul_add_words: +L$_bn_mul_add_words_begin: + lea eax,[_OPENSSL_ia32cap_P] + bt DWORD [eax],26 + jnc NEAR L$000maw_non_sse2 + mov eax,DWORD [4+esp] + mov edx,DWORD [8+esp] + mov ecx,DWORD [12+esp] + movd mm0,DWORD [16+esp] + pxor mm1,mm1 + jmp NEAR L$001maw_sse2_entry +align 16 +L$002maw_sse2_unrolled: + movd mm3,DWORD [eax] + paddq mm1,mm3 + movd mm2,DWORD [edx] + pmuludq mm2,mm0 + movd mm4,DWORD [4+edx] + pmuludq mm4,mm0 + movd mm6,DWORD [8+edx] + pmuludq mm6,mm0 + movd mm7,DWORD [12+edx] + pmuludq mm7,mm0 + paddq mm1,mm2 + movd mm3,DWORD [4+eax] + paddq mm3,mm4 + movd mm5,DWORD [8+eax] + paddq mm5,mm6 + movd mm4,DWORD [12+eax] + paddq mm7,mm4 + movd DWORD [eax],mm1 + movd mm2,DWORD [16+edx] + pmuludq mm2,mm0 + psrlq mm1,32 + movd mm4,DWORD [20+edx] + pmuludq mm4,mm0 + paddq mm1,mm3 + movd mm6,DWORD [24+edx] + pmuludq mm6,mm0 + movd DWORD [4+eax],mm1 + psrlq mm1,32 + movd mm3,DWORD [28+edx] + add edx,32 + pmuludq mm3,mm0 + paddq mm1,mm5 + movd mm5,DWORD [16+eax] + paddq mm2,mm5 + movd DWORD [8+eax],mm1 + psrlq mm1,32 + paddq mm1,mm7 + movd mm5,DWORD [20+eax] + paddq mm4,mm5 + movd DWORD [12+eax],mm1 + psrlq mm1,32 + paddq mm1,mm2 + movd mm5,DWORD [24+eax] + paddq mm6,mm5 + movd DWORD [16+eax],mm1 + psrlq mm1,32 + paddq mm1,mm4 + movd mm5,DWORD [28+eax] + paddq mm3,mm5 + movd DWORD [20+eax],mm1 + psrlq mm1,32 + paddq mm1,mm6 + movd DWORD [24+eax],mm1 + psrlq mm1,32 + paddq mm1,mm3 + movd DWORD [28+eax],mm1 + lea eax,[32+eax] + psrlq mm1,32 + sub ecx,8 + jz NEAR L$003maw_sse2_exit +L$001maw_sse2_entry: + test ecx,4294967288 + jnz NEAR L$002maw_sse2_unrolled +align 4 +L$004maw_sse2_loop: + movd mm2,DWORD [edx] + movd mm3,DWORD [eax] + pmuludq mm2,mm0 + lea edx,[4+edx] + paddq mm1,mm3 + paddq mm1,mm2 + movd DWORD [eax],mm1 + sub ecx,1 + psrlq mm1,32 + lea eax,[4+eax] + jnz NEAR L$004maw_sse2_loop +L$003maw_sse2_exit: + movd eax,mm1 + emms + ret +align 16 +L$000maw_non_sse2: + push ebp + push ebx + push esi + push edi + ; + xor esi,esi + mov edi,DWORD [20+esp] + mov ecx,DWORD [28+esp] + mov ebx,DWORD [24+esp] + and ecx,4294967288 + mov ebp,DWORD [32+esp] + push ecx + jz NEAR L$005maw_finish +align 16 +L$006maw_loop: + ; Round 0 + mov eax,DWORD [ebx] + mul ebp + add eax,esi + adc edx,0 + add eax,DWORD [edi] + adc edx,0 + mov DWORD [edi],eax + mov esi,edx + ; Round 4 + mov eax,DWORD [4+ebx] + mul ebp + add eax,esi + adc edx,0 + add eax,DWORD [4+edi] + adc edx,0 + mov DWORD [4+edi],eax + mov esi,edx + ; Round 8 + mov eax,DWORD [8+ebx] + mul ebp + add eax,esi + adc edx,0 + add eax,DWORD [8+edi] + adc edx,0 + mov DWORD [8+edi],eax + mov esi,edx + ; Round 12 + mov eax,DWORD [12+ebx] + mul ebp + add eax,esi + adc edx,0 + add eax,DWORD [12+edi] + adc edx,0 + mov DWORD [12+edi],eax + mov esi,edx + ; Round 16 + mov eax,DWORD [16+ebx] + mul ebp + add eax,esi + adc edx,0 + add eax,DWORD [16+edi] + adc edx,0 + mov DWORD [16+edi],eax + mov esi,edx + ; Round 20 + mov eax,DWORD [20+ebx] + mul ebp + add eax,esi + adc edx,0 + add eax,DWORD [20+edi] + adc edx,0 + mov DWORD [20+edi],eax + mov esi,edx + ; Round 24 + mov eax,DWORD [24+ebx] + mul ebp + add eax,esi + adc edx,0 + add eax,DWORD [24+edi] + adc edx,0 + mov DWORD [24+edi],eax + mov esi,edx + ; Round 28 + mov eax,DWORD [28+ebx] + mul ebp + add eax,esi + adc edx,0 + add eax,DWORD [28+edi] + adc edx,0 + mov DWORD [28+edi],eax + mov esi,edx + ; + sub ecx,8 + lea ebx,[32+ebx] + lea edi,[32+edi] + jnz NEAR L$006maw_loop +L$005maw_finish: + mov ecx,DWORD [32+esp] + and ecx,7 + jnz NEAR L$007maw_finish2 + jmp NEAR L$008maw_end +L$007maw_finish2: + ; Tail Round 0 + mov eax,DWORD [ebx] + mul ebp + add eax,esi + adc edx,0 + add eax,DWORD [edi] + adc edx,0 + dec ecx + mov DWORD [edi],eax + mov esi,edx + jz NEAR L$008maw_end + ; Tail Round 1 + mov eax,DWORD [4+ebx] + mul ebp + add eax,esi + adc edx,0 + add eax,DWORD [4+edi] + adc edx,0 + dec ecx + mov DWORD [4+edi],eax + mov esi,edx + jz NEAR L$008maw_end + ; Tail Round 2 + mov eax,DWORD [8+ebx] + mul ebp + add eax,esi + adc edx,0 + add eax,DWORD [8+edi] + adc edx,0 + dec ecx + mov DWORD [8+edi],eax + mov esi,edx + jz NEAR L$008maw_end + ; Tail Round 3 + mov eax,DWORD [12+ebx] + mul ebp + add eax,esi + adc edx,0 + add eax,DWORD [12+edi] + adc edx,0 + dec ecx + mov DWORD [12+edi],eax + mov esi,edx + jz NEAR L$008maw_end + ; Tail Round 4 + mov eax,DWORD [16+ebx] + mul ebp + add eax,esi + adc edx,0 + add eax,DWORD [16+edi] + adc edx,0 + dec ecx + mov DWORD [16+edi],eax + mov esi,edx + jz NEAR L$008maw_end + ; Tail Round 5 + mov eax,DWORD [20+ebx] + mul ebp + add eax,esi + adc edx,0 + add eax,DWORD [20+edi] + adc edx,0 + dec ecx + mov DWORD [20+edi],eax + mov esi,edx + jz NEAR L$008maw_end + ; Tail Round 6 + mov eax,DWORD [24+ebx] + mul ebp + add eax,esi + adc edx,0 + add eax,DWORD [24+edi] + adc edx,0 + mov DWORD [24+edi],eax + mov esi,edx +L$008maw_end: + mov eax,esi + pop ecx + pop edi + pop esi + pop ebx + pop ebp + ret +global _bn_mul_words +align 16 +_bn_mul_words: +L$_bn_mul_words_begin: + lea eax,[_OPENSSL_ia32cap_P] + bt DWORD [eax],26 + jnc NEAR L$009mw_non_sse2 + mov eax,DWORD [4+esp] + mov edx,DWORD [8+esp] + mov ecx,DWORD [12+esp] + movd mm0,DWORD [16+esp] + pxor mm1,mm1 +align 16 +L$010mw_sse2_loop: + movd mm2,DWORD [edx] + pmuludq mm2,mm0 + lea edx,[4+edx] + paddq mm1,mm2 + movd DWORD [eax],mm1 + sub ecx,1 + psrlq mm1,32 + lea eax,[4+eax] + jnz NEAR L$010mw_sse2_loop + movd eax,mm1 + emms + ret +align 16 +L$009mw_non_sse2: + push ebp + push ebx + push esi + push edi + ; + xor esi,esi + mov edi,DWORD [20+esp] + mov ebx,DWORD [24+esp] + mov ebp,DWORD [28+esp] + mov ecx,DWORD [32+esp] + and ebp,4294967288 + jz NEAR L$011mw_finish +L$012mw_loop: + ; Round 0 + mov eax,DWORD [ebx] + mul ecx + add eax,esi + adc edx,0 + mov DWORD [edi],eax + mov esi,edx + ; Round 4 + mov eax,DWORD [4+ebx] + mul ecx + add eax,esi + adc edx,0 + mov DWORD [4+edi],eax + mov esi,edx + ; Round 8 + mov eax,DWORD [8+ebx] + mul ecx + add eax,esi + adc edx,0 + mov DWORD [8+edi],eax + mov esi,edx + ; Round 12 + mov eax,DWORD [12+ebx] + mul ecx + add eax,esi + adc edx,0 + mov DWORD [12+edi],eax + mov esi,edx + ; Round 16 + mov eax,DWORD [16+ebx] + mul ecx + add eax,esi + adc edx,0 + mov DWORD [16+edi],eax + mov esi,edx + ; Round 20 + mov eax,DWORD [20+ebx] + mul ecx + add eax,esi + adc edx,0 + mov DWORD [20+edi],eax + mov esi,edx + ; Round 24 + mov eax,DWORD [24+ebx] + mul ecx + add eax,esi + adc edx,0 + mov DWORD [24+edi],eax + mov esi,edx + ; Round 28 + mov eax,DWORD [28+ebx] + mul ecx + add eax,esi + adc edx,0 + mov DWORD [28+edi],eax + mov esi,edx + ; + add ebx,32 + add edi,32 + sub ebp,8 + jz NEAR L$011mw_finish + jmp NEAR L$012mw_loop +L$011mw_finish: + mov ebp,DWORD [28+esp] + and ebp,7 + jnz NEAR L$013mw_finish2 + jmp NEAR L$014mw_end +L$013mw_finish2: + ; Tail Round 0 + mov eax,DWORD [ebx] + mul ecx + add eax,esi + adc edx,0 + mov DWORD [edi],eax + mov esi,edx + dec ebp + jz NEAR L$014mw_end + ; Tail Round 1 + mov eax,DWORD [4+ebx] + mul ecx + add eax,esi + adc edx,0 + mov DWORD [4+edi],eax + mov esi,edx + dec ebp + jz NEAR L$014mw_end + ; Tail Round 2 + mov eax,DWORD [8+ebx] + mul ecx + add eax,esi + adc edx,0 + mov DWORD [8+edi],eax + mov esi,edx + dec ebp + jz NEAR L$014mw_end + ; Tail Round 3 + mov eax,DWORD [12+ebx] + mul ecx + add eax,esi + adc edx,0 + mov DWORD [12+edi],eax + mov esi,edx + dec ebp + jz NEAR L$014mw_end + ; Tail Round 4 + mov eax,DWORD [16+ebx] + mul ecx + add eax,esi + adc edx,0 + mov DWORD [16+edi],eax + mov esi,edx + dec ebp + jz NEAR L$014mw_end + ; Tail Round 5 + mov eax,DWORD [20+ebx] + mul ecx + add eax,esi + adc edx,0 + mov DWORD [20+edi],eax + mov esi,edx + dec ebp + jz NEAR L$014mw_end + ; Tail Round 6 + mov eax,DWORD [24+ebx] + mul ecx + add eax,esi + adc edx,0 + mov DWORD [24+edi],eax + mov esi,edx +L$014mw_end: + mov eax,esi + pop edi + pop esi + pop ebx + pop ebp + ret +global _bn_sqr_words +align 16 +_bn_sqr_words: +L$_bn_sqr_words_begin: + lea eax,[_OPENSSL_ia32cap_P] + bt DWORD [eax],26 + jnc NEAR L$015sqr_non_sse2 + mov eax,DWORD [4+esp] + mov edx,DWORD [8+esp] + mov ecx,DWORD [12+esp] +align 16 +L$016sqr_sse2_loop: + movd mm0,DWORD [edx] + pmuludq mm0,mm0 + lea edx,[4+edx] + movq [eax],mm0 + sub ecx,1 + lea eax,[8+eax] + jnz NEAR L$016sqr_sse2_loop + emms + ret +align 16 +L$015sqr_non_sse2: + push ebp + push ebx + push esi + push edi + ; + mov esi,DWORD [20+esp] + mov edi,DWORD [24+esp] + mov ebx,DWORD [28+esp] + and ebx,4294967288 + jz NEAR L$017sw_finish +L$018sw_loop: + ; Round 0 + mov eax,DWORD [edi] + mul eax + mov DWORD [esi],eax + mov DWORD [4+esi],edx + ; Round 4 + mov eax,DWORD [4+edi] + mul eax + mov DWORD [8+esi],eax + mov DWORD [12+esi],edx + ; Round 8 + mov eax,DWORD [8+edi] + mul eax + mov DWORD [16+esi],eax + mov DWORD [20+esi],edx + ; Round 12 + mov eax,DWORD [12+edi] + mul eax + mov DWORD [24+esi],eax + mov DWORD [28+esi],edx + ; Round 16 + mov eax,DWORD [16+edi] + mul eax + mov DWORD [32+esi],eax + mov DWORD [36+esi],edx + ; Round 20 + mov eax,DWORD [20+edi] + mul eax + mov DWORD [40+esi],eax + mov DWORD [44+esi],edx + ; Round 24 + mov eax,DWORD [24+edi] + mul eax + mov DWORD [48+esi],eax + mov DWORD [52+esi],edx + ; Round 28 + mov eax,DWORD [28+edi] + mul eax + mov DWORD [56+esi],eax + mov DWORD [60+esi],edx + ; + add edi,32 + add esi,64 + sub ebx,8 + jnz NEAR L$018sw_loop +L$017sw_finish: + mov ebx,DWORD [28+esp] + and ebx,7 + jz NEAR L$019sw_end + ; Tail Round 0 + mov eax,DWORD [edi] + mul eax + mov DWORD [esi],eax + dec ebx + mov DWORD [4+esi],edx + jz NEAR L$019sw_end + ; Tail Round 1 + mov eax,DWORD [4+edi] + mul eax + mov DWORD [8+esi],eax + dec ebx + mov DWORD [12+esi],edx + jz NEAR L$019sw_end + ; Tail Round 2 + mov eax,DWORD [8+edi] + mul eax + mov DWORD [16+esi],eax + dec ebx + mov DWORD [20+esi],edx + jz NEAR L$019sw_end + ; Tail Round 3 + mov eax,DWORD [12+edi] + mul eax + mov DWORD [24+esi],eax + dec ebx + mov DWORD [28+esi],edx + jz NEAR L$019sw_end + ; Tail Round 4 + mov eax,DWORD [16+edi] + mul eax + mov DWORD [32+esi],eax + dec ebx + mov DWORD [36+esi],edx + jz NEAR L$019sw_end + ; Tail Round 5 + mov eax,DWORD [20+edi] + mul eax + mov DWORD [40+esi],eax + dec ebx + mov DWORD [44+esi],edx + jz NEAR L$019sw_end + ; Tail Round 6 + mov eax,DWORD [24+edi] + mul eax + mov DWORD [48+esi],eax + mov DWORD [52+esi],edx +L$019sw_end: + pop edi + pop esi + pop ebx + pop ebp + ret +global _bn_div_words +align 16 +_bn_div_words: +L$_bn_div_words_begin: + mov edx,DWORD [4+esp] + mov eax,DWORD [8+esp] + mov ecx,DWORD [12+esp] + div ecx + ret +global _bn_add_words +align 16 +_bn_add_words: +L$_bn_add_words_begin: + push ebp + push ebx + push esi + push edi + ; + mov ebx,DWORD [20+esp] + mov esi,DWORD [24+esp] + mov edi,DWORD [28+esp] + mov ebp,DWORD [32+esp] + xor eax,eax + and ebp,4294967288 + jz NEAR L$020aw_finish +L$021aw_loop: + ; Round 0 + mov ecx,DWORD [esi] + mov edx,DWORD [edi] + add ecx,eax + mov eax,0 + adc eax,eax + add ecx,edx + adc eax,0 + mov DWORD [ebx],ecx + ; Round 1 + mov ecx,DWORD [4+esi] + mov edx,DWORD [4+edi] + add ecx,eax + mov eax,0 + adc eax,eax + add ecx,edx + adc eax,0 + mov DWORD [4+ebx],ecx + ; Round 2 + mov ecx,DWORD [8+esi] + mov edx,DWORD [8+edi] + add ecx,eax + mov eax,0 + adc eax,eax + add ecx,edx + adc eax,0 + mov DWORD [8+ebx],ecx + ; Round 3 + mov ecx,DWORD [12+esi] + mov edx,DWORD [12+edi] + add ecx,eax + mov eax,0 + adc eax,eax + add ecx,edx + adc eax,0 + mov DWORD [12+ebx],ecx + ; Round 4 + mov ecx,DWORD [16+esi] + mov edx,DWORD [16+edi] + add ecx,eax + mov eax,0 + adc eax,eax + add ecx,edx + adc eax,0 + mov DWORD [16+ebx],ecx + ; Round 5 + mov ecx,DWORD [20+esi] + mov edx,DWORD [20+edi] + add ecx,eax + mov eax,0 + adc eax,eax + add ecx,edx + adc eax,0 + mov DWORD [20+ebx],ecx + ; Round 6 + mov ecx,DWORD [24+esi] + mov edx,DWORD [24+edi] + add ecx,eax + mov eax,0 + adc eax,eax + add ecx,edx + adc eax,0 + mov DWORD [24+ebx],ecx + ; Round 7 + mov ecx,DWORD [28+esi] + mov edx,DWORD [28+edi] + add ecx,eax + mov eax,0 + adc eax,eax + add ecx,edx + adc eax,0 + mov DWORD [28+ebx],ecx + ; + add esi,32 + add edi,32 + add ebx,32 + sub ebp,8 + jnz NEAR L$021aw_loop +L$020aw_finish: + mov ebp,DWORD [32+esp] + and ebp,7 + jz NEAR L$022aw_end + ; Tail Round 0 + mov ecx,DWORD [esi] + mov edx,DWORD [edi] + add ecx,eax + mov eax,0 + adc eax,eax + add ecx,edx + adc eax,0 + dec ebp + mov DWORD [ebx],ecx + jz NEAR L$022aw_end + ; Tail Round 1 + mov ecx,DWORD [4+esi] + mov edx,DWORD [4+edi] + add ecx,eax + mov eax,0 + adc eax,eax + add ecx,edx + adc eax,0 + dec ebp + mov DWORD [4+ebx],ecx + jz NEAR L$022aw_end + ; Tail Round 2 + mov ecx,DWORD [8+esi] + mov edx,DWORD [8+edi] + add ecx,eax + mov eax,0 + adc eax,eax + add ecx,edx + adc eax,0 + dec ebp + mov DWORD [8+ebx],ecx + jz NEAR L$022aw_end + ; Tail Round 3 + mov ecx,DWORD [12+esi] + mov edx,DWORD [12+edi] + add ecx,eax + mov eax,0 + adc eax,eax + add ecx,edx + adc eax,0 + dec ebp + mov DWORD [12+ebx],ecx + jz NEAR L$022aw_end + ; Tail Round 4 + mov ecx,DWORD [16+esi] + mov edx,DWORD [16+edi] + add ecx,eax + mov eax,0 + adc eax,eax + add ecx,edx + adc eax,0 + dec ebp + mov DWORD [16+ebx],ecx + jz NEAR L$022aw_end + ; Tail Round 5 + mov ecx,DWORD [20+esi] + mov edx,DWORD [20+edi] + add ecx,eax + mov eax,0 + adc eax,eax + add ecx,edx + adc eax,0 + dec ebp + mov DWORD [20+ebx],ecx + jz NEAR L$022aw_end + ; Tail Round 6 + mov ecx,DWORD [24+esi] + mov edx,DWORD [24+edi] + add ecx,eax + mov eax,0 + adc eax,eax + add ecx,edx + adc eax,0 + mov DWORD [24+ebx],ecx +L$022aw_end: + pop edi + pop esi + pop ebx + pop ebp + ret +global _bn_sub_words +align 16 +_bn_sub_words: +L$_bn_sub_words_begin: + push ebp + push ebx + push esi + push edi + ; + mov ebx,DWORD [20+esp] + mov esi,DWORD [24+esp] + mov edi,DWORD [28+esp] + mov ebp,DWORD [32+esp] + xor eax,eax + and ebp,4294967288 + jz NEAR L$023aw_finish +L$024aw_loop: + ; Round 0 + mov ecx,DWORD [esi] + mov edx,DWORD [edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD [ebx],ecx + ; Round 1 + mov ecx,DWORD [4+esi] + mov edx,DWORD [4+edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD [4+ebx],ecx + ; Round 2 + mov ecx,DWORD [8+esi] + mov edx,DWORD [8+edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD [8+ebx],ecx + ; Round 3 + mov ecx,DWORD [12+esi] + mov edx,DWORD [12+edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD [12+ebx],ecx + ; Round 4 + mov ecx,DWORD [16+esi] + mov edx,DWORD [16+edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD [16+ebx],ecx + ; Round 5 + mov ecx,DWORD [20+esi] + mov edx,DWORD [20+edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD [20+ebx],ecx + ; Round 6 + mov ecx,DWORD [24+esi] + mov edx,DWORD [24+edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD [24+ebx],ecx + ; Round 7 + mov ecx,DWORD [28+esi] + mov edx,DWORD [28+edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD [28+ebx],ecx + ; + add esi,32 + add edi,32 + add ebx,32 + sub ebp,8 + jnz NEAR L$024aw_loop +L$023aw_finish: + mov ebp,DWORD [32+esp] + and ebp,7 + jz NEAR L$025aw_end + ; Tail Round 0 + mov ecx,DWORD [esi] + mov edx,DWORD [edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + dec ebp + mov DWORD [ebx],ecx + jz NEAR L$025aw_end + ; Tail Round 1 + mov ecx,DWORD [4+esi] + mov edx,DWORD [4+edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + dec ebp + mov DWORD [4+ebx],ecx + jz NEAR L$025aw_end + ; Tail Round 2 + mov ecx,DWORD [8+esi] + mov edx,DWORD [8+edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + dec ebp + mov DWORD [8+ebx],ecx + jz NEAR L$025aw_end + ; Tail Round 3 + mov ecx,DWORD [12+esi] + mov edx,DWORD [12+edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + dec ebp + mov DWORD [12+ebx],ecx + jz NEAR L$025aw_end + ; Tail Round 4 + mov ecx,DWORD [16+esi] + mov edx,DWORD [16+edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + dec ebp + mov DWORD [16+ebx],ecx + jz NEAR L$025aw_end + ; Tail Round 5 + mov ecx,DWORD [20+esi] + mov edx,DWORD [20+edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + dec ebp + mov DWORD [20+ebx],ecx + jz NEAR L$025aw_end + ; Tail Round 6 + mov ecx,DWORD [24+esi] + mov edx,DWORD [24+edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD [24+ebx],ecx +L$025aw_end: + pop edi + pop esi + pop ebx + pop ebp + ret +global _bn_sub_part_words +align 16 +_bn_sub_part_words: +L$_bn_sub_part_words_begin: + push ebp + push ebx + push esi + push edi + ; + mov ebx,DWORD [20+esp] + mov esi,DWORD [24+esp] + mov edi,DWORD [28+esp] + mov ebp,DWORD [32+esp] + xor eax,eax + and ebp,4294967288 + jz NEAR L$026aw_finish +L$027aw_loop: + ; Round 0 + mov ecx,DWORD [esi] + mov edx,DWORD [edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD [ebx],ecx + ; Round 1 + mov ecx,DWORD [4+esi] + mov edx,DWORD [4+edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD [4+ebx],ecx + ; Round 2 + mov ecx,DWORD [8+esi] + mov edx,DWORD [8+edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD [8+ebx],ecx + ; Round 3 + mov ecx,DWORD [12+esi] + mov edx,DWORD [12+edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD [12+ebx],ecx + ; Round 4 + mov ecx,DWORD [16+esi] + mov edx,DWORD [16+edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD [16+ebx],ecx + ; Round 5 + mov ecx,DWORD [20+esi] + mov edx,DWORD [20+edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD [20+ebx],ecx + ; Round 6 + mov ecx,DWORD [24+esi] + mov edx,DWORD [24+edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD [24+ebx],ecx + ; Round 7 + mov ecx,DWORD [28+esi] + mov edx,DWORD [28+edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD [28+ebx],ecx + ; + add esi,32 + add edi,32 + add ebx,32 + sub ebp,8 + jnz NEAR L$027aw_loop +L$026aw_finish: + mov ebp,DWORD [32+esp] + and ebp,7 + jz NEAR L$028aw_end + ; Tail Round 0 + mov ecx,DWORD [esi] + mov edx,DWORD [edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD [ebx],ecx + add esi,4 + add edi,4 + add ebx,4 + dec ebp + jz NEAR L$028aw_end + ; Tail Round 1 + mov ecx,DWORD [esi] + mov edx,DWORD [edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD [ebx],ecx + add esi,4 + add edi,4 + add ebx,4 + dec ebp + jz NEAR L$028aw_end + ; Tail Round 2 + mov ecx,DWORD [esi] + mov edx,DWORD [edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD [ebx],ecx + add esi,4 + add edi,4 + add ebx,4 + dec ebp + jz NEAR L$028aw_end + ; Tail Round 3 + mov ecx,DWORD [esi] + mov edx,DWORD [edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD [ebx],ecx + add esi,4 + add edi,4 + add ebx,4 + dec ebp + jz NEAR L$028aw_end + ; Tail Round 4 + mov ecx,DWORD [esi] + mov edx,DWORD [edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD [ebx],ecx + add esi,4 + add edi,4 + add ebx,4 + dec ebp + jz NEAR L$028aw_end + ; Tail Round 5 + mov ecx,DWORD [esi] + mov edx,DWORD [edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD [ebx],ecx + add esi,4 + add edi,4 + add ebx,4 + dec ebp + jz NEAR L$028aw_end + ; Tail Round 6 + mov ecx,DWORD [esi] + mov edx,DWORD [edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD [ebx],ecx + add esi,4 + add edi,4 + add ebx,4 +L$028aw_end: + cmp DWORD [36+esp],0 + je NEAR L$029pw_end + mov ebp,DWORD [36+esp] + cmp ebp,0 + je NEAR L$029pw_end + jge NEAR L$030pw_pos + ; pw_neg + mov edx,0 + sub edx,ebp + mov ebp,edx + and ebp,4294967288 + jz NEAR L$031pw_neg_finish +L$032pw_neg_loop: + ; dl<0 Round 0 + mov ecx,0 + mov edx,DWORD [edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD [ebx],ecx + ; dl<0 Round 1 + mov ecx,0 + mov edx,DWORD [4+edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD [4+ebx],ecx + ; dl<0 Round 2 + mov ecx,0 + mov edx,DWORD [8+edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD [8+ebx],ecx + ; dl<0 Round 3 + mov ecx,0 + mov edx,DWORD [12+edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD [12+ebx],ecx + ; dl<0 Round 4 + mov ecx,0 + mov edx,DWORD [16+edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD [16+ebx],ecx + ; dl<0 Round 5 + mov ecx,0 + mov edx,DWORD [20+edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD [20+ebx],ecx + ; dl<0 Round 6 + mov ecx,0 + mov edx,DWORD [24+edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD [24+ebx],ecx + ; dl<0 Round 7 + mov ecx,0 + mov edx,DWORD [28+edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD [28+ebx],ecx + ; + add edi,32 + add ebx,32 + sub ebp,8 + jnz NEAR L$032pw_neg_loop +L$031pw_neg_finish: + mov edx,DWORD [36+esp] + mov ebp,0 + sub ebp,edx + and ebp,7 + jz NEAR L$029pw_end + ; dl<0 Tail Round 0 + mov ecx,0 + mov edx,DWORD [edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + dec ebp + mov DWORD [ebx],ecx + jz NEAR L$029pw_end + ; dl<0 Tail Round 1 + mov ecx,0 + mov edx,DWORD [4+edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + dec ebp + mov DWORD [4+ebx],ecx + jz NEAR L$029pw_end + ; dl<0 Tail Round 2 + mov ecx,0 + mov edx,DWORD [8+edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + dec ebp + mov DWORD [8+ebx],ecx + jz NEAR L$029pw_end + ; dl<0 Tail Round 3 + mov ecx,0 + mov edx,DWORD [12+edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + dec ebp + mov DWORD [12+ebx],ecx + jz NEAR L$029pw_end + ; dl<0 Tail Round 4 + mov ecx,0 + mov edx,DWORD [16+edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + dec ebp + mov DWORD [16+ebx],ecx + jz NEAR L$029pw_end + ; dl<0 Tail Round 5 + mov ecx,0 + mov edx,DWORD [20+edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + dec ebp + mov DWORD [20+ebx],ecx + jz NEAR L$029pw_end + ; dl<0 Tail Round 6 + mov ecx,0 + mov edx,DWORD [24+edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD [24+ebx],ecx + jmp NEAR L$029pw_end +L$030pw_pos: + and ebp,4294967288 + jz NEAR L$033pw_pos_finish +L$034pw_pos_loop: + ; dl>0 Round 0 + mov ecx,DWORD [esi] + sub ecx,eax + mov DWORD [ebx],ecx + jnc NEAR L$035pw_nc0 + ; dl>0 Round 1 + mov ecx,DWORD [4+esi] + sub ecx,eax + mov DWORD [4+ebx],ecx + jnc NEAR L$036pw_nc1 + ; dl>0 Round 2 + mov ecx,DWORD [8+esi] + sub ecx,eax + mov DWORD [8+ebx],ecx + jnc NEAR L$037pw_nc2 + ; dl>0 Round 3 + mov ecx,DWORD [12+esi] + sub ecx,eax + mov DWORD [12+ebx],ecx + jnc NEAR L$038pw_nc3 + ; dl>0 Round 4 + mov ecx,DWORD [16+esi] + sub ecx,eax + mov DWORD [16+ebx],ecx + jnc NEAR L$039pw_nc4 + ; dl>0 Round 5 + mov ecx,DWORD [20+esi] + sub ecx,eax + mov DWORD [20+ebx],ecx + jnc NEAR L$040pw_nc5 + ; dl>0 Round 6 + mov ecx,DWORD [24+esi] + sub ecx,eax + mov DWORD [24+ebx],ecx + jnc NEAR L$041pw_nc6 + ; dl>0 Round 7 + mov ecx,DWORD [28+esi] + sub ecx,eax + mov DWORD [28+ebx],ecx + jnc NEAR L$042pw_nc7 + ; + add esi,32 + add ebx,32 + sub ebp,8 + jnz NEAR L$034pw_pos_loop +L$033pw_pos_finish: + mov ebp,DWORD [36+esp] + and ebp,7 + jz NEAR L$029pw_end + ; dl>0 Tail Round 0 + mov ecx,DWORD [esi] + sub ecx,eax + mov DWORD [ebx],ecx + jnc NEAR L$043pw_tail_nc0 + dec ebp + jz NEAR L$029pw_end + ; dl>0 Tail Round 1 + mov ecx,DWORD [4+esi] + sub ecx,eax + mov DWORD [4+ebx],ecx + jnc NEAR L$044pw_tail_nc1 + dec ebp + jz NEAR L$029pw_end + ; dl>0 Tail Round 2 + mov ecx,DWORD [8+esi] + sub ecx,eax + mov DWORD [8+ebx],ecx + jnc NEAR L$045pw_tail_nc2 + dec ebp + jz NEAR L$029pw_end + ; dl>0 Tail Round 3 + mov ecx,DWORD [12+esi] + sub ecx,eax + mov DWORD [12+ebx],ecx + jnc NEAR L$046pw_tail_nc3 + dec ebp + jz NEAR L$029pw_end + ; dl>0 Tail Round 4 + mov ecx,DWORD [16+esi] + sub ecx,eax + mov DWORD [16+ebx],ecx + jnc NEAR L$047pw_tail_nc4 + dec ebp + jz NEAR L$029pw_end + ; dl>0 Tail Round 5 + mov ecx,DWORD [20+esi] + sub ecx,eax + mov DWORD [20+ebx],ecx + jnc NEAR L$048pw_tail_nc5 + dec ebp + jz NEAR L$029pw_end + ; dl>0 Tail Round 6 + mov ecx,DWORD [24+esi] + sub ecx,eax + mov DWORD [24+ebx],ecx + jnc NEAR L$049pw_tail_nc6 + mov eax,1 + jmp NEAR L$029pw_end +L$050pw_nc_loop: + mov ecx,DWORD [esi] + mov DWORD [ebx],ecx +L$035pw_nc0: + mov ecx,DWORD [4+esi] + mov DWORD [4+ebx],ecx +L$036pw_nc1: + mov ecx,DWORD [8+esi] + mov DWORD [8+ebx],ecx +L$037pw_nc2: + mov ecx,DWORD [12+esi] + mov DWORD [12+ebx],ecx +L$038pw_nc3: + mov ecx,DWORD [16+esi] + mov DWORD [16+ebx],ecx +L$039pw_nc4: + mov ecx,DWORD [20+esi] + mov DWORD [20+ebx],ecx +L$040pw_nc5: + mov ecx,DWORD [24+esi] + mov DWORD [24+ebx],ecx +L$041pw_nc6: + mov ecx,DWORD [28+esi] + mov DWORD [28+ebx],ecx +L$042pw_nc7: + ; + add esi,32 + add ebx,32 + sub ebp,8 + jnz NEAR L$050pw_nc_loop + mov ebp,DWORD [36+esp] + and ebp,7 + jz NEAR L$051pw_nc_end + mov ecx,DWORD [esi] + mov DWORD [ebx],ecx +L$043pw_tail_nc0: + dec ebp + jz NEAR L$051pw_nc_end + mov ecx,DWORD [4+esi] + mov DWORD [4+ebx],ecx +L$044pw_tail_nc1: + dec ebp + jz NEAR L$051pw_nc_end + mov ecx,DWORD [8+esi] + mov DWORD [8+ebx],ecx +L$045pw_tail_nc2: + dec ebp + jz NEAR L$051pw_nc_end + mov ecx,DWORD [12+esi] + mov DWORD [12+ebx],ecx +L$046pw_tail_nc3: + dec ebp + jz NEAR L$051pw_nc_end + mov ecx,DWORD [16+esi] + mov DWORD [16+ebx],ecx +L$047pw_tail_nc4: + dec ebp + jz NEAR L$051pw_nc_end + mov ecx,DWORD [20+esi] + mov DWORD [20+ebx],ecx +L$048pw_tail_nc5: + dec ebp + jz NEAR L$051pw_nc_end + mov ecx,DWORD [24+esi] + mov DWORD [24+ebx],ecx +L$049pw_tail_nc6: +L$051pw_nc_end: + mov eax,0 +L$029pw_end: + pop edi + pop esi + pop ebx + pop ebp + ret +segment .bss +common _OPENSSL_ia32cap_P 16 diff --git a/third_party/boringssl/win-x86/crypto/bn/co-586.asm b/third_party/boringssl/win-x86/crypto/bn/co-586.asm new file mode 100644 index 00000000000..5780dc841bf --- /dev/null +++ b/third_party/boringssl/win-x86/crypto/bn/co-586.asm @@ -0,0 +1,1260 @@ +%ifidn __OUTPUT_FORMAT__,obj +section code use32 class=code align=64 +%elifidn __OUTPUT_FORMAT__,win32 +%ifdef __YASM_VERSION_ID__ +%if __YASM_VERSION_ID__ < 01010000h +%error yasm version 1.1.0 or later needed. +%endif +; Yasm automatically includes .00 and complains about redefining it. +; https://www.tortall.net/projects/yasm/manual/html/objfmt-win32-safeseh.html +%else +$@feat.00 equ 1 +%endif +section .text code align=64 +%else +section .text code +%endif +global _bn_mul_comba8 +align 16 +_bn_mul_comba8: +L$_bn_mul_comba8_begin: + push esi + mov esi,DWORD [12+esp] + push edi + mov edi,DWORD [20+esp] + push ebp + push ebx + xor ebx,ebx + mov eax,DWORD [esi] + xor ecx,ecx + mov edx,DWORD [edi] + ; ################## Calculate word 0 + xor ebp,ebp + ; mul a[0]*b[0] + mul edx + add ebx,eax + mov eax,DWORD [20+esp] + adc ecx,edx + mov edx,DWORD [edi] + adc ebp,0 + mov DWORD [eax],ebx + mov eax,DWORD [4+esi] + ; saved r[0] + ; ################## Calculate word 1 + xor ebx,ebx + ; mul a[1]*b[0] + mul edx + add ecx,eax + mov eax,DWORD [esi] + adc ebp,edx + mov edx,DWORD [4+edi] + adc ebx,0 + ; mul a[0]*b[1] + mul edx + add ecx,eax + mov eax,DWORD [20+esp] + adc ebp,edx + mov edx,DWORD [edi] + adc ebx,0 + mov DWORD [4+eax],ecx + mov eax,DWORD [8+esi] + ; saved r[1] + ; ################## Calculate word 2 + xor ecx,ecx + ; mul a[2]*b[0] + mul edx + add ebp,eax + mov eax,DWORD [4+esi] + adc ebx,edx + mov edx,DWORD [4+edi] + adc ecx,0 + ; mul a[1]*b[1] + mul edx + add ebp,eax + mov eax,DWORD [esi] + adc ebx,edx + mov edx,DWORD [8+edi] + adc ecx,0 + ; mul a[0]*b[2] + mul edx + add ebp,eax + mov eax,DWORD [20+esp] + adc ebx,edx + mov edx,DWORD [edi] + adc ecx,0 + mov DWORD [8+eax],ebp + mov eax,DWORD [12+esi] + ; saved r[2] + ; ################## Calculate word 3 + xor ebp,ebp + ; mul a[3]*b[0] + mul edx + add ebx,eax + mov eax,DWORD [8+esi] + adc ecx,edx + mov edx,DWORD [4+edi] + adc ebp,0 + ; mul a[2]*b[1] + mul edx + add ebx,eax + mov eax,DWORD [4+esi] + adc ecx,edx + mov edx,DWORD [8+edi] + adc ebp,0 + ; mul a[1]*b[2] + mul edx + add ebx,eax + mov eax,DWORD [esi] + adc ecx,edx + mov edx,DWORD [12+edi] + adc ebp,0 + ; mul a[0]*b[3] + mul edx + add ebx,eax + mov eax,DWORD [20+esp] + adc ecx,edx + mov edx,DWORD [edi] + adc ebp,0 + mov DWORD [12+eax],ebx + mov eax,DWORD [16+esi] + ; saved r[3] + ; ################## Calculate word 4 + xor ebx,ebx + ; mul a[4]*b[0] + mul edx + add ecx,eax + mov eax,DWORD [12+esi] + adc ebp,edx + mov edx,DWORD [4+edi] + adc ebx,0 + ; mul a[3]*b[1] + mul edx + add ecx,eax + mov eax,DWORD [8+esi] + adc ebp,edx + mov edx,DWORD [8+edi] + adc ebx,0 + ; mul a[2]*b[2] + mul edx + add ecx,eax + mov eax,DWORD [4+esi] + adc ebp,edx + mov edx,DWORD [12+edi] + adc ebx,0 + ; mul a[1]*b[3] + mul edx + add ecx,eax + mov eax,DWORD [esi] + adc ebp,edx + mov edx,DWORD [16+edi] + adc ebx,0 + ; mul a[0]*b[4] + mul edx + add ecx,eax + mov eax,DWORD [20+esp] + adc ebp,edx + mov edx,DWORD [edi] + adc ebx,0 + mov DWORD [16+eax],ecx + mov eax,DWORD [20+esi] + ; saved r[4] + ; ################## Calculate word 5 + xor ecx,ecx + ; mul a[5]*b[0] + mul edx + add ebp,eax + mov eax,DWORD [16+esi] + adc ebx,edx + mov edx,DWORD [4+edi] + adc ecx,0 + ; mul a[4]*b[1] + mul edx + add ebp,eax + mov eax,DWORD [12+esi] + adc ebx,edx + mov edx,DWORD [8+edi] + adc ecx,0 + ; mul a[3]*b[2] + mul edx + add ebp,eax + mov eax,DWORD [8+esi] + adc ebx,edx + mov edx,DWORD [12+edi] + adc ecx,0 + ; mul a[2]*b[3] + mul edx + add ebp,eax + mov eax,DWORD [4+esi] + adc ebx,edx + mov edx,DWORD [16+edi] + adc ecx,0 + ; mul a[1]*b[4] + mul edx + add ebp,eax + mov eax,DWORD [esi] + adc ebx,edx + mov edx,DWORD [20+edi] + adc ecx,0 + ; mul a[0]*b[5] + mul edx + add ebp,eax + mov eax,DWORD [20+esp] + adc ebx,edx + mov edx,DWORD [edi] + adc ecx,0 + mov DWORD [20+eax],ebp + mov eax,DWORD [24+esi] + ; saved r[5] + ; ################## Calculate word 6 + xor ebp,ebp + ; mul a[6]*b[0] + mul edx + add ebx,eax + mov eax,DWORD [20+esi] + adc ecx,edx + mov edx,DWORD [4+edi] + adc ebp,0 + ; mul a[5]*b[1] + mul edx + add ebx,eax + mov eax,DWORD [16+esi] + adc ecx,edx + mov edx,DWORD [8+edi] + adc ebp,0 + ; mul a[4]*b[2] + mul edx + add ebx,eax + mov eax,DWORD [12+esi] + adc ecx,edx + mov edx,DWORD [12+edi] + adc ebp,0 + ; mul a[3]*b[3] + mul edx + add ebx,eax + mov eax,DWORD [8+esi] + adc ecx,edx + mov edx,DWORD [16+edi] + adc ebp,0 + ; mul a[2]*b[4] + mul edx + add ebx,eax + mov eax,DWORD [4+esi] + adc ecx,edx + mov edx,DWORD [20+edi] + adc ebp,0 + ; mul a[1]*b[5] + mul edx + add ebx,eax + mov eax,DWORD [esi] + adc ecx,edx + mov edx,DWORD [24+edi] + adc ebp,0 + ; mul a[0]*b[6] + mul edx + add ebx,eax + mov eax,DWORD [20+esp] + adc ecx,edx + mov edx,DWORD [edi] + adc ebp,0 + mov DWORD [24+eax],ebx + mov eax,DWORD [28+esi] + ; saved r[6] + ; ################## Calculate word 7 + xor ebx,ebx + ; mul a[7]*b[0] + mul edx + add ecx,eax + mov eax,DWORD [24+esi] + adc ebp,edx + mov edx,DWORD [4+edi] + adc ebx,0 + ; mul a[6]*b[1] + mul edx + add ecx,eax + mov eax,DWORD [20+esi] + adc ebp,edx + mov edx,DWORD [8+edi] + adc ebx,0 + ; mul a[5]*b[2] + mul edx + add ecx,eax + mov eax,DWORD [16+esi] + adc ebp,edx + mov edx,DWORD [12+edi] + adc ebx,0 + ; mul a[4]*b[3] + mul edx + add ecx,eax + mov eax,DWORD [12+esi] + adc ebp,edx + mov edx,DWORD [16+edi] + adc ebx,0 + ; mul a[3]*b[4] + mul edx + add ecx,eax + mov eax,DWORD [8+esi] + adc ebp,edx + mov edx,DWORD [20+edi] + adc ebx,0 + ; mul a[2]*b[5] + mul edx + add ecx,eax + mov eax,DWORD [4+esi] + adc ebp,edx + mov edx,DWORD [24+edi] + adc ebx,0 + ; mul a[1]*b[6] + mul edx + add ecx,eax + mov eax,DWORD [esi] + adc ebp,edx + mov edx,DWORD [28+edi] + adc ebx,0 + ; mul a[0]*b[7] + mul edx + add ecx,eax + mov eax,DWORD [20+esp] + adc ebp,edx + mov edx,DWORD [4+edi] + adc ebx,0 + mov DWORD [28+eax],ecx + mov eax,DWORD [28+esi] + ; saved r[7] + ; ################## Calculate word 8 + xor ecx,ecx + ; mul a[7]*b[1] + mul edx + add ebp,eax + mov eax,DWORD [24+esi] + adc ebx,edx + mov edx,DWORD [8+edi] + adc ecx,0 + ; mul a[6]*b[2] + mul edx + add ebp,eax + mov eax,DWORD [20+esi] + adc ebx,edx + mov edx,DWORD [12+edi] + adc ecx,0 + ; mul a[5]*b[3] + mul edx + add ebp,eax + mov eax,DWORD [16+esi] + adc ebx,edx + mov edx,DWORD [16+edi] + adc ecx,0 + ; mul a[4]*b[4] + mul edx + add ebp,eax + mov eax,DWORD [12+esi] + adc ebx,edx + mov edx,DWORD [20+edi] + adc ecx,0 + ; mul a[3]*b[5] + mul edx + add ebp,eax + mov eax,DWORD [8+esi] + adc ebx,edx + mov edx,DWORD [24+edi] + adc ecx,0 + ; mul a[2]*b[6] + mul edx + add ebp,eax + mov eax,DWORD [4+esi] + adc ebx,edx + mov edx,DWORD [28+edi] + adc ecx,0 + ; mul a[1]*b[7] + mul edx + add ebp,eax + mov eax,DWORD [20+esp] + adc ebx,edx + mov edx,DWORD [8+edi] + adc ecx,0 + mov DWORD [32+eax],ebp + mov eax,DWORD [28+esi] + ; saved r[8] + ; ################## Calculate word 9 + xor ebp,ebp + ; mul a[7]*b[2] + mul edx + add ebx,eax + mov eax,DWORD [24+esi] + adc ecx,edx + mov edx,DWORD [12+edi] + adc ebp,0 + ; mul a[6]*b[3] + mul edx + add ebx,eax + mov eax,DWORD [20+esi] + adc ecx,edx + mov edx,DWORD [16+edi] + adc ebp,0 + ; mul a[5]*b[4] + mul edx + add ebx,eax + mov eax,DWORD [16+esi] + adc ecx,edx + mov edx,DWORD [20+edi] + adc ebp,0 + ; mul a[4]*b[5] + mul edx + add ebx,eax + mov eax,DWORD [12+esi] + adc ecx,edx + mov edx,DWORD [24+edi] + adc ebp,0 + ; mul a[3]*b[6] + mul edx + add ebx,eax + mov eax,DWORD [8+esi] + adc ecx,edx + mov edx,DWORD [28+edi] + adc ebp,0 + ; mul a[2]*b[7] + mul edx + add ebx,eax + mov eax,DWORD [20+esp] + adc ecx,edx + mov edx,DWORD [12+edi] + adc ebp,0 + mov DWORD [36+eax],ebx + mov eax,DWORD [28+esi] + ; saved r[9] + ; ################## Calculate word 10 + xor ebx,ebx + ; mul a[7]*b[3] + mul edx + add ecx,eax + mov eax,DWORD [24+esi] + adc ebp,edx + mov edx,DWORD [16+edi] + adc ebx,0 + ; mul a[6]*b[4] + mul edx + add ecx,eax + mov eax,DWORD [20+esi] + adc ebp,edx + mov edx,DWORD [20+edi] + adc ebx,0 + ; mul a[5]*b[5] + mul edx + add ecx,eax + mov eax,DWORD [16+esi] + adc ebp,edx + mov edx,DWORD [24+edi] + adc ebx,0 + ; mul a[4]*b[6] + mul edx + add ecx,eax + mov eax,DWORD [12+esi] + adc ebp,edx + mov edx,DWORD [28+edi] + adc ebx,0 + ; mul a[3]*b[7] + mul edx + add ecx,eax + mov eax,DWORD [20+esp] + adc ebp,edx + mov edx,DWORD [16+edi] + adc ebx,0 + mov DWORD [40+eax],ecx + mov eax,DWORD [28+esi] + ; saved r[10] + ; ################## Calculate word 11 + xor ecx,ecx + ; mul a[7]*b[4] + mul edx + add ebp,eax + mov eax,DWORD [24+esi] + adc ebx,edx + mov edx,DWORD [20+edi] + adc ecx,0 + ; mul a[6]*b[5] + mul edx + add ebp,eax + mov eax,DWORD [20+esi] + adc ebx,edx + mov edx,DWORD [24+edi] + adc ecx,0 + ; mul a[5]*b[6] + mul edx + add ebp,eax + mov eax,DWORD [16+esi] + adc ebx,edx + mov edx,DWORD [28+edi] + adc ecx,0 + ; mul a[4]*b[7] + mul edx + add ebp,eax + mov eax,DWORD [20+esp] + adc ebx,edx + mov edx,DWORD [20+edi] + adc ecx,0 + mov DWORD [44+eax],ebp + mov eax,DWORD [28+esi] + ; saved r[11] + ; ################## Calculate word 12 + xor ebp,ebp + ; mul a[7]*b[5] + mul edx + add ebx,eax + mov eax,DWORD [24+esi] + adc ecx,edx + mov edx,DWORD [24+edi] + adc ebp,0 + ; mul a[6]*b[6] + mul edx + add ebx,eax + mov eax,DWORD [20+esi] + adc ecx,edx + mov edx,DWORD [28+edi] + adc ebp,0 + ; mul a[5]*b[7] + mul edx + add ebx,eax + mov eax,DWORD [20+esp] + adc ecx,edx + mov edx,DWORD [24+edi] + adc ebp,0 + mov DWORD [48+eax],ebx + mov eax,DWORD [28+esi] + ; saved r[12] + ; ################## Calculate word 13 + xor ebx,ebx + ; mul a[7]*b[6] + mul edx + add ecx,eax + mov eax,DWORD [24+esi] + adc ebp,edx + mov edx,DWORD [28+edi] + adc ebx,0 + ; mul a[6]*b[7] + mul edx + add ecx,eax + mov eax,DWORD [20+esp] + adc ebp,edx + mov edx,DWORD [28+edi] + adc ebx,0 + mov DWORD [52+eax],ecx + mov eax,DWORD [28+esi] + ; saved r[13] + ; ################## Calculate word 14 + xor ecx,ecx + ; mul a[7]*b[7] + mul edx + add ebp,eax + mov eax,DWORD [20+esp] + adc ebx,edx + adc ecx,0 + mov DWORD [56+eax],ebp + ; saved r[14] + ; save r[15] + mov DWORD [60+eax],ebx + pop ebx + pop ebp + pop edi + pop esi + ret +global _bn_mul_comba4 +align 16 +_bn_mul_comba4: +L$_bn_mul_comba4_begin: + push esi + mov esi,DWORD [12+esp] + push edi + mov edi,DWORD [20+esp] + push ebp + push ebx + xor ebx,ebx + mov eax,DWORD [esi] + xor ecx,ecx + mov edx,DWORD [edi] + ; ################## Calculate word 0 + xor ebp,ebp + ; mul a[0]*b[0] + mul edx + add ebx,eax + mov eax,DWORD [20+esp] + adc ecx,edx + mov edx,DWORD [edi] + adc ebp,0 + mov DWORD [eax],ebx + mov eax,DWORD [4+esi] + ; saved r[0] + ; ################## Calculate word 1 + xor ebx,ebx + ; mul a[1]*b[0] + mul edx + add ecx,eax + mov eax,DWORD [esi] + adc ebp,edx + mov edx,DWORD [4+edi] + adc ebx,0 + ; mul a[0]*b[1] + mul edx + add ecx,eax + mov eax,DWORD [20+esp] + adc ebp,edx + mov edx,DWORD [edi] + adc ebx,0 + mov DWORD [4+eax],ecx + mov eax,DWORD [8+esi] + ; saved r[1] + ; ################## Calculate word 2 + xor ecx,ecx + ; mul a[2]*b[0] + mul edx + add ebp,eax + mov eax,DWORD [4+esi] + adc ebx,edx + mov edx,DWORD [4+edi] + adc ecx,0 + ; mul a[1]*b[1] + mul edx + add ebp,eax + mov eax,DWORD [esi] + adc ebx,edx + mov edx,DWORD [8+edi] + adc ecx,0 + ; mul a[0]*b[2] + mul edx + add ebp,eax + mov eax,DWORD [20+esp] + adc ebx,edx + mov edx,DWORD [edi] + adc ecx,0 + mov DWORD [8+eax],ebp + mov eax,DWORD [12+esi] + ; saved r[2] + ; ################## Calculate word 3 + xor ebp,ebp + ; mul a[3]*b[0] + mul edx + add ebx,eax + mov eax,DWORD [8+esi] + adc ecx,edx + mov edx,DWORD [4+edi] + adc ebp,0 + ; mul a[2]*b[1] + mul edx + add ebx,eax + mov eax,DWORD [4+esi] + adc ecx,edx + mov edx,DWORD [8+edi] + adc ebp,0 + ; mul a[1]*b[2] + mul edx + add ebx,eax + mov eax,DWORD [esi] + adc ecx,edx + mov edx,DWORD [12+edi] + adc ebp,0 + ; mul a[0]*b[3] + mul edx + add ebx,eax + mov eax,DWORD [20+esp] + adc ecx,edx + mov edx,DWORD [4+edi] + adc ebp,0 + mov DWORD [12+eax],ebx + mov eax,DWORD [12+esi] + ; saved r[3] + ; ################## Calculate word 4 + xor ebx,ebx + ; mul a[3]*b[1] + mul edx + add ecx,eax + mov eax,DWORD [8+esi] + adc ebp,edx + mov edx,DWORD [8+edi] + adc ebx,0 + ; mul a[2]*b[2] + mul edx + add ecx,eax + mov eax,DWORD [4+esi] + adc ebp,edx + mov edx,DWORD [12+edi] + adc ebx,0 + ; mul a[1]*b[3] + mul edx + add ecx,eax + mov eax,DWORD [20+esp] + adc ebp,edx + mov edx,DWORD [8+edi] + adc ebx,0 + mov DWORD [16+eax],ecx + mov eax,DWORD [12+esi] + ; saved r[4] + ; ################## Calculate word 5 + xor ecx,ecx + ; mul a[3]*b[2] + mul edx + add ebp,eax + mov eax,DWORD [8+esi] + adc ebx,edx + mov edx,DWORD [12+edi] + adc ecx,0 + ; mul a[2]*b[3] + mul edx + add ebp,eax + mov eax,DWORD [20+esp] + adc ebx,edx + mov edx,DWORD [12+edi] + adc ecx,0 + mov DWORD [20+eax],ebp + mov eax,DWORD [12+esi] + ; saved r[5] + ; ################## Calculate word 6 + xor ebp,ebp + ; mul a[3]*b[3] + mul edx + add ebx,eax + mov eax,DWORD [20+esp] + adc ecx,edx + adc ebp,0 + mov DWORD [24+eax],ebx + ; saved r[6] + ; save r[7] + mov DWORD [28+eax],ecx + pop ebx + pop ebp + pop edi + pop esi + ret +global _bn_sqr_comba8 +align 16 +_bn_sqr_comba8: +L$_bn_sqr_comba8_begin: + push esi + push edi + push ebp + push ebx + mov edi,DWORD [20+esp] + mov esi,DWORD [24+esp] + xor ebx,ebx + xor ecx,ecx + mov eax,DWORD [esi] + ; ############### Calculate word 0 + xor ebp,ebp + ; sqr a[0]*a[0] + mul eax + add ebx,eax + adc ecx,edx + mov edx,DWORD [esi] + adc ebp,0 + mov DWORD [edi],ebx + mov eax,DWORD [4+esi] + ; saved r[0] + ; ############### Calculate word 1 + xor ebx,ebx + ; sqr a[1]*a[0] + mul edx + add eax,eax + adc edx,edx + adc ebx,0 + add ecx,eax + adc ebp,edx + mov eax,DWORD [8+esi] + adc ebx,0 + mov DWORD [4+edi],ecx + mov edx,DWORD [esi] + ; saved r[1] + ; ############### Calculate word 2 + xor ecx,ecx + ; sqr a[2]*a[0] + mul edx + add eax,eax + adc edx,edx + adc ecx,0 + add ebp,eax + adc ebx,edx + mov eax,DWORD [4+esi] + adc ecx,0 + ; sqr a[1]*a[1] + mul eax + add ebp,eax + adc ebx,edx + mov edx,DWORD [esi] + adc ecx,0 + mov DWORD [8+edi],ebp + mov eax,DWORD [12+esi] + ; saved r[2] + ; ############### Calculate word 3 + xor ebp,ebp + ; sqr a[3]*a[0] + mul edx + add eax,eax + adc edx,edx + adc ebp,0 + add ebx,eax + adc ecx,edx + mov eax,DWORD [8+esi] + adc ebp,0 + mov edx,DWORD [4+esi] + ; sqr a[2]*a[1] + mul edx + add eax,eax + adc edx,edx + adc ebp,0 + add ebx,eax + adc ecx,edx + mov eax,DWORD [16+esi] + adc ebp,0 + mov DWORD [12+edi],ebx + mov edx,DWORD [esi] + ; saved r[3] + ; ############### Calculate word 4 + xor ebx,ebx + ; sqr a[4]*a[0] + mul edx + add eax,eax + adc edx,edx + adc ebx,0 + add ecx,eax + adc ebp,edx + mov eax,DWORD [12+esi] + adc ebx,0 + mov edx,DWORD [4+esi] + ; sqr a[3]*a[1] + mul edx + add eax,eax + adc edx,edx + adc ebx,0 + add ecx,eax + adc ebp,edx + mov eax,DWORD [8+esi] + adc ebx,0 + ; sqr a[2]*a[2] + mul eax + add ecx,eax + adc ebp,edx + mov edx,DWORD [esi] + adc ebx,0 + mov DWORD [16+edi],ecx + mov eax,DWORD [20+esi] + ; saved r[4] + ; ############### Calculate word 5 + xor ecx,ecx + ; sqr a[5]*a[0] + mul edx + add eax,eax + adc edx,edx + adc ecx,0 + add ebp,eax + adc ebx,edx + mov eax,DWORD [16+esi] + adc ecx,0 + mov edx,DWORD [4+esi] + ; sqr a[4]*a[1] + mul edx + add eax,eax + adc edx,edx + adc ecx,0 + add ebp,eax + adc ebx,edx + mov eax,DWORD [12+esi] + adc ecx,0 + mov edx,DWORD [8+esi] + ; sqr a[3]*a[2] + mul edx + add eax,eax + adc edx,edx + adc ecx,0 + add ebp,eax + adc ebx,edx + mov eax,DWORD [24+esi] + adc ecx,0 + mov DWORD [20+edi],ebp + mov edx,DWORD [esi] + ; saved r[5] + ; ############### Calculate word 6 + xor ebp,ebp + ; sqr a[6]*a[0] + mul edx + add eax,eax + adc edx,edx + adc ebp,0 + add ebx,eax + adc ecx,edx + mov eax,DWORD [20+esi] + adc ebp,0 + mov edx,DWORD [4+esi] + ; sqr a[5]*a[1] + mul edx + add eax,eax + adc edx,edx + adc ebp,0 + add ebx,eax + adc ecx,edx + mov eax,DWORD [16+esi] + adc ebp,0 + mov edx,DWORD [8+esi] + ; sqr a[4]*a[2] + mul edx + add eax,eax + adc edx,edx + adc ebp,0 + add ebx,eax + adc ecx,edx + mov eax,DWORD [12+esi] + adc ebp,0 + ; sqr a[3]*a[3] + mul eax + add ebx,eax + adc ecx,edx + mov edx,DWORD [esi] + adc ebp,0 + mov DWORD [24+edi],ebx + mov eax,DWORD [28+esi] + ; saved r[6] + ; ############### Calculate word 7 + xor ebx,ebx + ; sqr a[7]*a[0] + mul edx + add eax,eax + adc edx,edx + adc ebx,0 + add ecx,eax + adc ebp,edx + mov eax,DWORD [24+esi] + adc ebx,0 + mov edx,DWORD [4+esi] + ; sqr a[6]*a[1] + mul edx + add eax,eax + adc edx,edx + adc ebx,0 + add ecx,eax + adc ebp,edx + mov eax,DWORD [20+esi] + adc ebx,0 + mov edx,DWORD [8+esi] + ; sqr a[5]*a[2] + mul edx + add eax,eax + adc edx,edx + adc ebx,0 + add ecx,eax + adc ebp,edx + mov eax,DWORD [16+esi] + adc ebx,0 + mov edx,DWORD [12+esi] + ; sqr a[4]*a[3] + mul edx + add eax,eax + adc edx,edx + adc ebx,0 + add ecx,eax + adc ebp,edx + mov eax,DWORD [28+esi] + adc ebx,0 + mov DWORD [28+edi],ecx + mov edx,DWORD [4+esi] + ; saved r[7] + ; ############### Calculate word 8 + xor ecx,ecx + ; sqr a[7]*a[1] + mul edx + add eax,eax + adc edx,edx + adc ecx,0 + add ebp,eax + adc ebx,edx + mov eax,DWORD [24+esi] + adc ecx,0 + mov edx,DWORD [8+esi] + ; sqr a[6]*a[2] + mul edx + add eax,eax + adc edx,edx + adc ecx,0 + add ebp,eax + adc ebx,edx + mov eax,DWORD [20+esi] + adc ecx,0 + mov edx,DWORD [12+esi] + ; sqr a[5]*a[3] + mul edx + add eax,eax + adc edx,edx + adc ecx,0 + add ebp,eax + adc ebx,edx + mov eax,DWORD [16+esi] + adc ecx,0 + ; sqr a[4]*a[4] + mul eax + add ebp,eax + adc ebx,edx + mov edx,DWORD [8+esi] + adc ecx,0 + mov DWORD [32+edi],ebp + mov eax,DWORD [28+esi] + ; saved r[8] + ; ############### Calculate word 9 + xor ebp,ebp + ; sqr a[7]*a[2] + mul edx + add eax,eax + adc edx,edx + adc ebp,0 + add ebx,eax + adc ecx,edx + mov eax,DWORD [24+esi] + adc ebp,0 + mov edx,DWORD [12+esi] + ; sqr a[6]*a[3] + mul edx + add eax,eax + adc edx,edx + adc ebp,0 + add ebx,eax + adc ecx,edx + mov eax,DWORD [20+esi] + adc ebp,0 + mov edx,DWORD [16+esi] + ; sqr a[5]*a[4] + mul edx + add eax,eax + adc edx,edx + adc ebp,0 + add ebx,eax + adc ecx,edx + mov eax,DWORD [28+esi] + adc ebp,0 + mov DWORD [36+edi],ebx + mov edx,DWORD [12+esi] + ; saved r[9] + ; ############### Calculate word 10 + xor ebx,ebx + ; sqr a[7]*a[3] + mul edx + add eax,eax + adc edx,edx + adc ebx,0 + add ecx,eax + adc ebp,edx + mov eax,DWORD [24+esi] + adc ebx,0 + mov edx,DWORD [16+esi] + ; sqr a[6]*a[4] + mul edx + add eax,eax + adc edx,edx + adc ebx,0 + add ecx,eax + adc ebp,edx + mov eax,DWORD [20+esi] + adc ebx,0 + ; sqr a[5]*a[5] + mul eax + add ecx,eax + adc ebp,edx + mov edx,DWORD [16+esi] + adc ebx,0 + mov DWORD [40+edi],ecx + mov eax,DWORD [28+esi] + ; saved r[10] + ; ############### Calculate word 11 + xor ecx,ecx + ; sqr a[7]*a[4] + mul edx + add eax,eax + adc edx,edx + adc ecx,0 + add ebp,eax + adc ebx,edx + mov eax,DWORD [24+esi] + adc ecx,0 + mov edx,DWORD [20+esi] + ; sqr a[6]*a[5] + mul edx + add eax,eax + adc edx,edx + adc ecx,0 + add ebp,eax + adc ebx,edx + mov eax,DWORD [28+esi] + adc ecx,0 + mov DWORD [44+edi],ebp + mov edx,DWORD [20+esi] + ; saved r[11] + ; ############### Calculate word 12 + xor ebp,ebp + ; sqr a[7]*a[5] + mul edx + add eax,eax + adc edx,edx + adc ebp,0 + add ebx,eax + adc ecx,edx + mov eax,DWORD [24+esi] + adc ebp,0 + ; sqr a[6]*a[6] + mul eax + add ebx,eax + adc ecx,edx + mov edx,DWORD [24+esi] + adc ebp,0 + mov DWORD [48+edi],ebx + mov eax,DWORD [28+esi] + ; saved r[12] + ; ############### Calculate word 13 + xor ebx,ebx + ; sqr a[7]*a[6] + mul edx + add eax,eax + adc edx,edx + adc ebx,0 + add ecx,eax + adc ebp,edx + mov eax,DWORD [28+esi] + adc ebx,0 + mov DWORD [52+edi],ecx + ; saved r[13] + ; ############### Calculate word 14 + xor ecx,ecx + ; sqr a[7]*a[7] + mul eax + add ebp,eax + adc ebx,edx + adc ecx,0 + mov DWORD [56+edi],ebp + ; saved r[14] + mov DWORD [60+edi],ebx + pop ebx + pop ebp + pop edi + pop esi + ret +global _bn_sqr_comba4 +align 16 +_bn_sqr_comba4: +L$_bn_sqr_comba4_begin: + push esi + push edi + push ebp + push ebx + mov edi,DWORD [20+esp] + mov esi,DWORD [24+esp] + xor ebx,ebx + xor ecx,ecx + mov eax,DWORD [esi] + ; ############### Calculate word 0 + xor ebp,ebp + ; sqr a[0]*a[0] + mul eax + add ebx,eax + adc ecx,edx + mov edx,DWORD [esi] + adc ebp,0 + mov DWORD [edi],ebx + mov eax,DWORD [4+esi] + ; saved r[0] + ; ############### Calculate word 1 + xor ebx,ebx + ; sqr a[1]*a[0] + mul edx + add eax,eax + adc edx,edx + adc ebx,0 + add ecx,eax + adc ebp,edx + mov eax,DWORD [8+esi] + adc ebx,0 + mov DWORD [4+edi],ecx + mov edx,DWORD [esi] + ; saved r[1] + ; ############### Calculate word 2 + xor ecx,ecx + ; sqr a[2]*a[0] + mul edx + add eax,eax + adc edx,edx + adc ecx,0 + add ebp,eax + adc ebx,edx + mov eax,DWORD [4+esi] + adc ecx,0 + ; sqr a[1]*a[1] + mul eax + add ebp,eax + adc ebx,edx + mov edx,DWORD [esi] + adc ecx,0 + mov DWORD [8+edi],ebp + mov eax,DWORD [12+esi] + ; saved r[2] + ; ############### Calculate word 3 + xor ebp,ebp + ; sqr a[3]*a[0] + mul edx + add eax,eax + adc edx,edx + adc ebp,0 + add ebx,eax + adc ecx,edx + mov eax,DWORD [8+esi] + adc ebp,0 + mov edx,DWORD [4+esi] + ; sqr a[2]*a[1] + mul edx + add eax,eax + adc edx,edx + adc ebp,0 + add ebx,eax + adc ecx,edx + mov eax,DWORD [12+esi] + adc ebp,0 + mov DWORD [12+edi],ebx + mov edx,DWORD [4+esi] + ; saved r[3] + ; ############### Calculate word 4 + xor ebx,ebx + ; sqr a[3]*a[1] + mul edx + add eax,eax + adc edx,edx + adc ebx,0 + add ecx,eax + adc ebp,edx + mov eax,DWORD [8+esi] + adc ebx,0 + ; sqr a[2]*a[2] + mul eax + add ecx,eax + adc ebp,edx + mov edx,DWORD [8+esi] + adc ebx,0 + mov DWORD [16+edi],ecx + mov eax,DWORD [12+esi] + ; saved r[4] + ; ############### Calculate word 5 + xor ecx,ecx + ; sqr a[3]*a[2] + mul edx + add eax,eax + adc edx,edx + adc ecx,0 + add ebp,eax + adc ebx,edx + mov eax,DWORD [12+esi] + adc ecx,0 + mov DWORD [20+edi],ebp + ; saved r[5] + ; ############### Calculate word 6 + xor ebp,ebp + ; sqr a[3]*a[3] + mul eax + add ebx,eax + adc ecx,edx + adc ebp,0 + mov DWORD [24+edi],ebx + ; saved r[6] + mov DWORD [28+edi],ecx + pop ebx + pop ebp + pop edi + pop esi + ret diff --git a/third_party/boringssl/win-x86/crypto/bn/x86-mont.asm b/third_party/boringssl/win-x86/crypto/bn/x86-mont.asm new file mode 100644 index 00000000000..de7b9499272 --- /dev/null +++ b/third_party/boringssl/win-x86/crypto/bn/x86-mont.asm @@ -0,0 +1,469 @@ +%ifidn __OUTPUT_FORMAT__,obj +section code use32 class=code align=64 +%elifidn __OUTPUT_FORMAT__,win32 +%ifdef __YASM_VERSION_ID__ +%if __YASM_VERSION_ID__ < 01010000h +%error yasm version 1.1.0 or later needed. +%endif +; Yasm automatically includes .00 and complains about redefining it. +; https://www.tortall.net/projects/yasm/manual/html/objfmt-win32-safeseh.html +%else +$@feat.00 equ 1 +%endif +section .text code align=64 +%else +section .text code +%endif +;extern _OPENSSL_ia32cap_P +global _bn_mul_mont +align 16 +_bn_mul_mont: +L$_bn_mul_mont_begin: + push ebp + push ebx + push esi + push edi + xor eax,eax + mov edi,DWORD [40+esp] + cmp edi,4 + jl NEAR L$000just_leave + lea esi,[20+esp] + lea edx,[24+esp] + mov ebp,esp + add edi,2 + neg edi + lea esp,[edi*4+esp-32] + neg edi + mov eax,esp + sub eax,edx + and eax,2047 + sub esp,eax + xor edx,esp + and edx,2048 + xor edx,2048 + sub esp,edx + and esp,-64 + mov eax,DWORD [esi] + mov ebx,DWORD [4+esi] + mov ecx,DWORD [8+esi] + mov edx,DWORD [12+esi] + mov esi,DWORD [16+esi] + mov esi,DWORD [esi] + mov DWORD [4+esp],eax + mov DWORD [8+esp],ebx + mov DWORD [12+esp],ecx + mov DWORD [16+esp],edx + mov DWORD [20+esp],esi + lea ebx,[edi-3] + mov DWORD [24+esp],ebp + lea eax,[_OPENSSL_ia32cap_P] + bt DWORD [eax],26 + jnc NEAR L$001non_sse2 + mov eax,-1 + movd mm7,eax + mov esi,DWORD [8+esp] + mov edi,DWORD [12+esp] + mov ebp,DWORD [16+esp] + xor edx,edx + xor ecx,ecx + movd mm4,DWORD [edi] + movd mm5,DWORD [esi] + movd mm3,DWORD [ebp] + pmuludq mm5,mm4 + movq mm2,mm5 + movq mm0,mm5 + pand mm0,mm7 + pmuludq mm5,[20+esp] + pmuludq mm3,mm5 + paddq mm3,mm0 + movd mm1,DWORD [4+ebp] + movd mm0,DWORD [4+esi] + psrlq mm2,32 + psrlq mm3,32 + inc ecx +align 16 +L$0021st: + pmuludq mm0,mm4 + pmuludq mm1,mm5 + paddq mm2,mm0 + paddq mm3,mm1 + movq mm0,mm2 + pand mm0,mm7 + movd mm1,DWORD [4+ecx*4+ebp] + paddq mm3,mm0 + movd mm0,DWORD [4+ecx*4+esi] + psrlq mm2,32 + movd DWORD [28+ecx*4+esp],mm3 + psrlq mm3,32 + lea ecx,[1+ecx] + cmp ecx,ebx + jl NEAR L$0021st + pmuludq mm0,mm4 + pmuludq mm1,mm5 + paddq mm2,mm0 + paddq mm3,mm1 + movq mm0,mm2 + pand mm0,mm7 + paddq mm3,mm0 + movd DWORD [28+ecx*4+esp],mm3 + psrlq mm2,32 + psrlq mm3,32 + paddq mm3,mm2 + movq [32+ebx*4+esp],mm3 + inc edx +L$003outer: + xor ecx,ecx + movd mm4,DWORD [edx*4+edi] + movd mm5,DWORD [esi] + movd mm6,DWORD [32+esp] + movd mm3,DWORD [ebp] + pmuludq mm5,mm4 + paddq mm5,mm6 + movq mm0,mm5 + movq mm2,mm5 + pand mm0,mm7 + pmuludq mm5,[20+esp] + pmuludq mm3,mm5 + paddq mm3,mm0 + movd mm6,DWORD [36+esp] + movd mm1,DWORD [4+ebp] + movd mm0,DWORD [4+esi] + psrlq mm2,32 + psrlq mm3,32 + paddq mm2,mm6 + inc ecx + dec ebx +L$004inner: + pmuludq mm0,mm4 + pmuludq mm1,mm5 + paddq mm2,mm0 + paddq mm3,mm1 + movq mm0,mm2 + movd mm6,DWORD [36+ecx*4+esp] + pand mm0,mm7 + movd mm1,DWORD [4+ecx*4+ebp] + paddq mm3,mm0 + movd mm0,DWORD [4+ecx*4+esi] + psrlq mm2,32 + movd DWORD [28+ecx*4+esp],mm3 + psrlq mm3,32 + paddq mm2,mm6 + dec ebx + lea ecx,[1+ecx] + jnz NEAR L$004inner + mov ebx,ecx + pmuludq mm0,mm4 + pmuludq mm1,mm5 + paddq mm2,mm0 + paddq mm3,mm1 + movq mm0,mm2 + pand mm0,mm7 + paddq mm3,mm0 + movd DWORD [28+ecx*4+esp],mm3 + psrlq mm2,32 + psrlq mm3,32 + movd mm6,DWORD [36+ebx*4+esp] + paddq mm3,mm2 + paddq mm3,mm6 + movq [32+ebx*4+esp],mm3 + lea edx,[1+edx] + cmp edx,ebx + jle NEAR L$003outer + emms + jmp NEAR L$005common_tail +align 16 +L$001non_sse2: + mov esi,DWORD [8+esp] + lea ebp,[1+ebx] + mov edi,DWORD [12+esp] + xor ecx,ecx + mov edx,esi + and ebp,1 + sub edx,edi + lea eax,[4+ebx*4+edi] + or ebp,edx + mov edi,DWORD [edi] + jz NEAR L$006bn_sqr_mont + mov DWORD [28+esp],eax + mov eax,DWORD [esi] + xor edx,edx +align 16 +L$007mull: + mov ebp,edx + mul edi + add ebp,eax + lea ecx,[1+ecx] + adc edx,0 + mov eax,DWORD [ecx*4+esi] + cmp ecx,ebx + mov DWORD [28+ecx*4+esp],ebp + jl NEAR L$007mull + mov ebp,edx + mul edi + mov edi,DWORD [20+esp] + add eax,ebp + mov esi,DWORD [16+esp] + adc edx,0 + imul edi,DWORD [32+esp] + mov DWORD [32+ebx*4+esp],eax + xor ecx,ecx + mov DWORD [36+ebx*4+esp],edx + mov DWORD [40+ebx*4+esp],ecx + mov eax,DWORD [esi] + mul edi + add eax,DWORD [32+esp] + mov eax,DWORD [4+esi] + adc edx,0 + inc ecx + jmp NEAR L$0082ndmadd +align 16 +L$0091stmadd: + mov ebp,edx + mul edi + add ebp,DWORD [32+ecx*4+esp] + lea ecx,[1+ecx] + adc edx,0 + add ebp,eax + mov eax,DWORD [ecx*4+esi] + adc edx,0 + cmp ecx,ebx + mov DWORD [28+ecx*4+esp],ebp + jl NEAR L$0091stmadd + mov ebp,edx + mul edi + add eax,DWORD [32+ebx*4+esp] + mov edi,DWORD [20+esp] + adc edx,0 + mov esi,DWORD [16+esp] + add ebp,eax + adc edx,0 + imul edi,DWORD [32+esp] + xor ecx,ecx + add edx,DWORD [36+ebx*4+esp] + mov DWORD [32+ebx*4+esp],ebp + adc ecx,0 + mov eax,DWORD [esi] + mov DWORD [36+ebx*4+esp],edx + mov DWORD [40+ebx*4+esp],ecx + mul edi + add eax,DWORD [32+esp] + mov eax,DWORD [4+esi] + adc edx,0 + mov ecx,1 +align 16 +L$0082ndmadd: + mov ebp,edx + mul edi + add ebp,DWORD [32+ecx*4+esp] + lea ecx,[1+ecx] + adc edx,0 + add ebp,eax + mov eax,DWORD [ecx*4+esi] + adc edx,0 + cmp ecx,ebx + mov DWORD [24+ecx*4+esp],ebp + jl NEAR L$0082ndmadd + mov ebp,edx + mul edi + add ebp,DWORD [32+ebx*4+esp] + adc edx,0 + add ebp,eax + adc edx,0 + mov DWORD [28+ebx*4+esp],ebp + xor eax,eax + mov ecx,DWORD [12+esp] + add edx,DWORD [36+ebx*4+esp] + adc eax,DWORD [40+ebx*4+esp] + lea ecx,[4+ecx] + mov DWORD [32+ebx*4+esp],edx + cmp ecx,DWORD [28+esp] + mov DWORD [36+ebx*4+esp],eax + je NEAR L$005common_tail + mov edi,DWORD [ecx] + mov esi,DWORD [8+esp] + mov DWORD [12+esp],ecx + xor ecx,ecx + xor edx,edx + mov eax,DWORD [esi] + jmp NEAR L$0091stmadd +align 16 +L$006bn_sqr_mont: + mov DWORD [esp],ebx + mov DWORD [12+esp],ecx + mov eax,edi + mul edi + mov DWORD [32+esp],eax + mov ebx,edx + shr edx,1 + and ebx,1 + inc ecx +align 16 +L$010sqr: + mov eax,DWORD [ecx*4+esi] + mov ebp,edx + mul edi + add eax,ebp + lea ecx,[1+ecx] + adc edx,0 + lea ebp,[eax*2+ebx] + shr eax,31 + cmp ecx,DWORD [esp] + mov ebx,eax + mov DWORD [28+ecx*4+esp],ebp + jl NEAR L$010sqr + mov eax,DWORD [ecx*4+esi] + mov ebp,edx + mul edi + add eax,ebp + mov edi,DWORD [20+esp] + adc edx,0 + mov esi,DWORD [16+esp] + lea ebp,[eax*2+ebx] + imul edi,DWORD [32+esp] + shr eax,31 + mov DWORD [32+ecx*4+esp],ebp + lea ebp,[edx*2+eax] + mov eax,DWORD [esi] + shr edx,31 + mov DWORD [36+ecx*4+esp],ebp + mov DWORD [40+ecx*4+esp],edx + mul edi + add eax,DWORD [32+esp] + mov ebx,ecx + adc edx,0 + mov eax,DWORD [4+esi] + mov ecx,1 +align 16 +L$0113rdmadd: + mov ebp,edx + mul edi + add ebp,DWORD [32+ecx*4+esp] + adc edx,0 + add ebp,eax + mov eax,DWORD [4+ecx*4+esi] + adc edx,0 + mov DWORD [28+ecx*4+esp],ebp + mov ebp,edx + mul edi + add ebp,DWORD [36+ecx*4+esp] + lea ecx,[2+ecx] + adc edx,0 + add ebp,eax + mov eax,DWORD [ecx*4+esi] + adc edx,0 + cmp ecx,ebx + mov DWORD [24+ecx*4+esp],ebp + jl NEAR L$0113rdmadd + mov ebp,edx + mul edi + add ebp,DWORD [32+ebx*4+esp] + adc edx,0 + add ebp,eax + adc edx,0 + mov DWORD [28+ebx*4+esp],ebp + mov ecx,DWORD [12+esp] + xor eax,eax + mov esi,DWORD [8+esp] + add edx,DWORD [36+ebx*4+esp] + adc eax,DWORD [40+ebx*4+esp] + mov DWORD [32+ebx*4+esp],edx + cmp ecx,ebx + mov DWORD [36+ebx*4+esp],eax + je NEAR L$005common_tail + mov edi,DWORD [4+ecx*4+esi] + lea ecx,[1+ecx] + mov eax,edi + mov DWORD [12+esp],ecx + mul edi + add eax,DWORD [32+ecx*4+esp] + adc edx,0 + mov DWORD [32+ecx*4+esp],eax + xor ebp,ebp + cmp ecx,ebx + lea ecx,[1+ecx] + je NEAR L$012sqrlast + mov ebx,edx + shr edx,1 + and ebx,1 +align 16 +L$013sqradd: + mov eax,DWORD [ecx*4+esi] + mov ebp,edx + mul edi + add eax,ebp + lea ebp,[eax*1+eax] + adc edx,0 + shr eax,31 + add ebp,DWORD [32+ecx*4+esp] + lea ecx,[1+ecx] + adc eax,0 + add ebp,ebx + adc eax,0 + cmp ecx,DWORD [esp] + mov DWORD [28+ecx*4+esp],ebp + mov ebx,eax + jle NEAR L$013sqradd + mov ebp,edx + add edx,edx + shr ebp,31 + add edx,ebx + adc ebp,0 +L$012sqrlast: + mov edi,DWORD [20+esp] + mov esi,DWORD [16+esp] + imul edi,DWORD [32+esp] + add edx,DWORD [32+ecx*4+esp] + mov eax,DWORD [esi] + adc ebp,0 + mov DWORD [32+ecx*4+esp],edx + mov DWORD [36+ecx*4+esp],ebp + mul edi + add eax,DWORD [32+esp] + lea ebx,[ecx-1] + adc edx,0 + mov ecx,1 + mov eax,DWORD [4+esi] + jmp NEAR L$0113rdmadd +align 16 +L$005common_tail: + mov ebp,DWORD [16+esp] + mov edi,DWORD [4+esp] + lea esi,[32+esp] + mov eax,DWORD [esi] + mov ecx,ebx + xor edx,edx +align 16 +L$014sub: + sbb eax,DWORD [edx*4+ebp] + mov DWORD [edx*4+edi],eax + dec ecx + mov eax,DWORD [4+edx*4+esi] + lea edx,[1+edx] + jge NEAR L$014sub + sbb eax,0 +align 16 +L$015copy: + mov edx,DWORD [ebx*4+esi] + mov ebp,DWORD [ebx*4+edi] + xor edx,ebp + and edx,eax + xor edx,ebp + mov DWORD [ebx*4+esi],ecx + mov DWORD [ebx*4+edi],edx + dec ebx + jge NEAR L$015copy + mov esp,DWORD [24+esp] + mov eax,1 +L$000just_leave: + pop edi + pop esi + pop ebx + pop ebp + ret +db 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 +db 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56 +db 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121 +db 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46 +db 111,114,103,62,0 +segment .bss +common _OPENSSL_ia32cap_P 16 diff --git a/third_party/boringssl/win-x86/crypto/cpu-x86-asm.asm b/third_party/boringssl/win-x86/crypto/cpu-x86-asm.asm new file mode 100644 index 00000000000..4317a73bc88 --- /dev/null +++ b/third_party/boringssl/win-x86/crypto/cpu-x86-asm.asm @@ -0,0 +1,303 @@ +%ifidn __OUTPUT_FORMAT__,obj +section code use32 class=code align=64 +%elifidn __OUTPUT_FORMAT__,win32 +%ifdef __YASM_VERSION_ID__ +%if __YASM_VERSION_ID__ < 01010000h +%error yasm version 1.1.0 or later needed. +%endif +; Yasm automatically includes .00 and complains about redefining it. +; https://www.tortall.net/projects/yasm/manual/html/objfmt-win32-safeseh.html +%else +$@feat.00 equ 1 +%endif +section .text code align=64 +%else +section .text code +%endif +global _OPENSSL_ia32_cpuid +align 16 +_OPENSSL_ia32_cpuid: +L$_OPENSSL_ia32_cpuid_begin: + push ebp + push ebx + push esi + push edi + xor edx,edx + pushfd + pop eax + mov ecx,eax + xor eax,2097152 + push eax + popfd + pushfd + pop eax + xor ecx,eax + xor eax,eax + bt ecx,21 + jnc NEAR L$000nocpuid + mov esi,DWORD [20+esp] + mov DWORD [8+esi],eax + cpuid + mov edi,eax + xor eax,eax + cmp ebx,1970169159 + setne al + mov ebp,eax + cmp edx,1231384169 + setne al + or ebp,eax + cmp ecx,1818588270 + setne al + or ebp,eax + jz NEAR L$001intel + cmp ebx,1752462657 + setne al + mov esi,eax + cmp edx,1769238117 + setne al + or esi,eax + cmp ecx,1145913699 + setne al + or esi,eax + jnz NEAR L$001intel + mov eax,2147483648 + cpuid + cmp eax,2147483649 + jb NEAR L$001intel + mov esi,eax + mov eax,2147483649 + cpuid + or ebp,ecx + and ebp,2049 + cmp esi,2147483656 + jb NEAR L$001intel + mov eax,2147483656 + cpuid + movzx esi,cl + inc esi + mov eax,1 + xor ecx,ecx + cpuid + bt edx,28 + jnc NEAR L$002generic + shr ebx,16 + and ebx,255 + cmp ebx,esi + ja NEAR L$002generic + and edx,4026531839 + jmp NEAR L$002generic +L$001intel: + cmp edi,7 + jb NEAR L$003cacheinfo + mov esi,DWORD [20+esp] + mov eax,7 + xor ecx,ecx + cpuid + mov DWORD [8+esi],ebx +L$003cacheinfo: + cmp edi,4 + mov edi,-1 + jb NEAR L$004nocacheinfo + mov eax,4 + mov ecx,0 + cpuid + mov edi,eax + shr edi,14 + and edi,4095 +L$004nocacheinfo: + mov eax,1 + xor ecx,ecx + cpuid + and edx,3220176895 + cmp ebp,0 + jne NEAR L$005notintel + or edx,1073741824 +L$005notintel: + bt edx,28 + jnc NEAR L$002generic + and edx,4026531839 + cmp edi,0 + je NEAR L$002generic + or edx,268435456 + shr ebx,16 + cmp bl,1 + ja NEAR L$002generic + and edx,4026531839 +L$002generic: + and ebp,2048 + and ecx,4294965247 + mov esi,edx + or ebp,ecx + bt ecx,27 + jnc NEAR L$006clear_avx + xor ecx,ecx +db 15,1,208 + and eax,6 + cmp eax,6 + je NEAR L$007done + cmp eax,2 + je NEAR L$006clear_avx +L$008clear_xmm: + and ebp,4261412861 + and esi,4278190079 +L$006clear_avx: + and ebp,4026525695 + mov edi,DWORD [20+esp] + and DWORD [8+edi],4294967263 +L$007done: + mov eax,esi + mov edx,ebp +L$000nocpuid: + pop edi + pop esi + pop ebx + pop ebp + ret +;extern _OPENSSL_ia32cap_P +global _OPENSSL_rdtsc +align 16 +_OPENSSL_rdtsc: +L$_OPENSSL_rdtsc_begin: + xor eax,eax + xor edx,edx + lea ecx,[_OPENSSL_ia32cap_P] + bt DWORD [ecx],4 + jnc NEAR L$009notsc + rdtsc +L$009notsc: + ret +global _OPENSSL_instrument_halt +align 16 +_OPENSSL_instrument_halt: +L$_OPENSSL_instrument_halt_begin: + lea ecx,[_OPENSSL_ia32cap_P] + bt DWORD [ecx],4 + jnc NEAR L$010nohalt +dd 2421723150 + and eax,3 + jnz NEAR L$010nohalt + pushfd + pop eax + bt eax,9 + jnc NEAR L$010nohalt + rdtsc + push edx + push eax + hlt + rdtsc + sub eax,DWORD [esp] + sbb edx,DWORD [4+esp] + add esp,8 + ret +L$010nohalt: + xor eax,eax + xor edx,edx + ret +global _OPENSSL_far_spin +align 16 +_OPENSSL_far_spin: +L$_OPENSSL_far_spin_begin: + pushfd + pop eax + bt eax,9 + jnc NEAR L$011nospin + mov eax,DWORD [4+esp] + mov ecx,DWORD [8+esp] +dd 2430111262 + xor eax,eax + mov edx,DWORD [ecx] + jmp NEAR L$012spin +align 16 +L$012spin: + inc eax + cmp edx,DWORD [ecx] + je NEAR L$012spin +dd 529567888 + ret +L$011nospin: + xor eax,eax + xor edx,edx + ret +global _OPENSSL_wipe_cpu +align 16 +_OPENSSL_wipe_cpu: +L$_OPENSSL_wipe_cpu_begin: + xor eax,eax + xor edx,edx + lea ecx,[_OPENSSL_ia32cap_P] + mov ecx,DWORD [ecx] + bt DWORD [ecx],1 + jnc NEAR L$013no_x87 + and ecx,83886080 + cmp ecx,83886080 + jne NEAR L$014no_sse2 + pxor xmm0,xmm0 + pxor xmm1,xmm1 + pxor xmm2,xmm2 + pxor xmm3,xmm3 + pxor xmm4,xmm4 + pxor xmm5,xmm5 + pxor xmm6,xmm6 + pxor xmm7,xmm7 +L$014no_sse2: +dd 4007259865,4007259865,4007259865,4007259865,2430851995 +L$013no_x87: + lea eax,[4+esp] + ret +global _OPENSSL_atomic_add +align 16 +_OPENSSL_atomic_add: +L$_OPENSSL_atomic_add_begin: + mov edx,DWORD [4+esp] + mov ecx,DWORD [8+esp] + push ebx + nop + mov eax,DWORD [edx] +L$015spin: + lea ebx,[ecx*1+eax] + nop +dd 447811568 + jne NEAR L$015spin + mov eax,ebx + pop ebx + ret +global _OPENSSL_indirect_call +align 16 +_OPENSSL_indirect_call: +L$_OPENSSL_indirect_call_begin: + push ebp + mov ebp,esp + sub esp,28 + mov ecx,DWORD [12+ebp] + mov DWORD [esp],ecx + mov edx,DWORD [16+ebp] + mov DWORD [4+esp],edx + mov eax,DWORD [20+ebp] + mov DWORD [8+esp],eax + mov eax,DWORD [24+ebp] + mov DWORD [12+esp],eax + mov eax,DWORD [28+ebp] + mov DWORD [16+esp],eax + mov eax,DWORD [32+ebp] + mov DWORD [20+esp],eax + mov eax,DWORD [36+ebp] + mov DWORD [24+esp],eax + call DWORD [8+ebp] + mov esp,ebp + pop ebp + ret +global _OPENSSL_ia32_rdrand +align 16 +_OPENSSL_ia32_rdrand: +L$_OPENSSL_ia32_rdrand_begin: + mov ecx,8 +L$016loop: +db 15,199,240 + jc NEAR L$017break + loop L$016loop +L$017break: + cmp eax,0 + cmove eax,ecx + ret +segment .bss +common _OPENSSL_ia32cap_P 16 diff --git a/third_party/boringssl/win-x86/crypto/md5/md5-586.asm b/third_party/boringssl/win-x86/crypto/md5/md5-586.asm new file mode 100644 index 00000000000..67ee21651fc --- /dev/null +++ b/third_party/boringssl/win-x86/crypto/md5/md5-586.asm @@ -0,0 +1,691 @@ +%ifidn __OUTPUT_FORMAT__,obj +section code use32 class=code align=64 +%elifidn __OUTPUT_FORMAT__,win32 +%ifdef __YASM_VERSION_ID__ +%if __YASM_VERSION_ID__ < 01010000h +%error yasm version 1.1.0 or later needed. +%endif +; Yasm automatically includes .00 and complains about redefining it. +; https://www.tortall.net/projects/yasm/manual/html/objfmt-win32-safeseh.html +%else +$@feat.00 equ 1 +%endif +section .text code align=64 +%else +section .text code +%endif +global _md5_block_asm_data_order +align 16 +_md5_block_asm_data_order: +L$_md5_block_asm_data_order_begin: + push esi + push edi + mov edi,DWORD [12+esp] + mov esi,DWORD [16+esp] + mov ecx,DWORD [20+esp] + push ebp + shl ecx,6 + push ebx + add ecx,esi + sub ecx,64 + mov eax,DWORD [edi] + push ecx + mov ebx,DWORD [4+edi] + mov ecx,DWORD [8+edi] + mov edx,DWORD [12+edi] +L$000start: + ; + ; R0 section + mov edi,ecx + mov ebp,DWORD [esi] + ; R0 0 + xor edi,edx + and edi,ebx + lea eax,[3614090360+ebp*1+eax] + xor edi,edx + add eax,edi + mov edi,ebx + rol eax,7 + mov ebp,DWORD [4+esi] + add eax,ebx + ; R0 1 + xor edi,ecx + and edi,eax + lea edx,[3905402710+ebp*1+edx] + xor edi,ecx + add edx,edi + mov edi,eax + rol edx,12 + mov ebp,DWORD [8+esi] + add edx,eax + ; R0 2 + xor edi,ebx + and edi,edx + lea ecx,[606105819+ebp*1+ecx] + xor edi,ebx + add ecx,edi + mov edi,edx + rol ecx,17 + mov ebp,DWORD [12+esi] + add ecx,edx + ; R0 3 + xor edi,eax + and edi,ecx + lea ebx,[3250441966+ebp*1+ebx] + xor edi,eax + add ebx,edi + mov edi,ecx + rol ebx,22 + mov ebp,DWORD [16+esi] + add ebx,ecx + ; R0 4 + xor edi,edx + and edi,ebx + lea eax,[4118548399+ebp*1+eax] + xor edi,edx + add eax,edi + mov edi,ebx + rol eax,7 + mov ebp,DWORD [20+esi] + add eax,ebx + ; R0 5 + xor edi,ecx + and edi,eax + lea edx,[1200080426+ebp*1+edx] + xor edi,ecx + add edx,edi + mov edi,eax + rol edx,12 + mov ebp,DWORD [24+esi] + add edx,eax + ; R0 6 + xor edi,ebx + and edi,edx + lea ecx,[2821735955+ebp*1+ecx] + xor edi,ebx + add ecx,edi + mov edi,edx + rol ecx,17 + mov ebp,DWORD [28+esi] + add ecx,edx + ; R0 7 + xor edi,eax + and edi,ecx + lea ebx,[4249261313+ebp*1+ebx] + xor edi,eax + add ebx,edi + mov edi,ecx + rol ebx,22 + mov ebp,DWORD [32+esi] + add ebx,ecx + ; R0 8 + xor edi,edx + and edi,ebx + lea eax,[1770035416+ebp*1+eax] + xor edi,edx + add eax,edi + mov edi,ebx + rol eax,7 + mov ebp,DWORD [36+esi] + add eax,ebx + ; R0 9 + xor edi,ecx + and edi,eax + lea edx,[2336552879+ebp*1+edx] + xor edi,ecx + add edx,edi + mov edi,eax + rol edx,12 + mov ebp,DWORD [40+esi] + add edx,eax + ; R0 10 + xor edi,ebx + and edi,edx + lea ecx,[4294925233+ebp*1+ecx] + xor edi,ebx + add ecx,edi + mov edi,edx + rol ecx,17 + mov ebp,DWORD [44+esi] + add ecx,edx + ; R0 11 + xor edi,eax + and edi,ecx + lea ebx,[2304563134+ebp*1+ebx] + xor edi,eax + add ebx,edi + mov edi,ecx + rol ebx,22 + mov ebp,DWORD [48+esi] + add ebx,ecx + ; R0 12 + xor edi,edx + and edi,ebx + lea eax,[1804603682+ebp*1+eax] + xor edi,edx + add eax,edi + mov edi,ebx + rol eax,7 + mov ebp,DWORD [52+esi] + add eax,ebx + ; R0 13 + xor edi,ecx + and edi,eax + lea edx,[4254626195+ebp*1+edx] + xor edi,ecx + add edx,edi + mov edi,eax + rol edx,12 + mov ebp,DWORD [56+esi] + add edx,eax + ; R0 14 + xor edi,ebx + and edi,edx + lea ecx,[2792965006+ebp*1+ecx] + xor edi,ebx + add ecx,edi + mov edi,edx + rol ecx,17 + mov ebp,DWORD [60+esi] + add ecx,edx + ; R0 15 + xor edi,eax + and edi,ecx + lea ebx,[1236535329+ebp*1+ebx] + xor edi,eax + add ebx,edi + mov edi,ecx + rol ebx,22 + mov ebp,DWORD [4+esi] + add ebx,ecx + ; + ; R1 section + ; R1 16 + lea eax,[4129170786+ebp*1+eax] + xor edi,ebx + and edi,edx + mov ebp,DWORD [24+esi] + xor edi,ecx + add eax,edi + mov edi,ebx + rol eax,5 + add eax,ebx + ; R1 17 + lea edx,[3225465664+ebp*1+edx] + xor edi,eax + and edi,ecx + mov ebp,DWORD [44+esi] + xor edi,ebx + add edx,edi + mov edi,eax + rol edx,9 + add edx,eax + ; R1 18 + lea ecx,[643717713+ebp*1+ecx] + xor edi,edx + and edi,ebx + mov ebp,DWORD [esi] + xor edi,eax + add ecx,edi + mov edi,edx + rol ecx,14 + add ecx,edx + ; R1 19 + lea ebx,[3921069994+ebp*1+ebx] + xor edi,ecx + and edi,eax + mov ebp,DWORD [20+esi] + xor edi,edx + add ebx,edi + mov edi,ecx + rol ebx,20 + add ebx,ecx + ; R1 20 + lea eax,[3593408605+ebp*1+eax] + xor edi,ebx + and edi,edx + mov ebp,DWORD [40+esi] + xor edi,ecx + add eax,edi + mov edi,ebx + rol eax,5 + add eax,ebx + ; R1 21 + lea edx,[38016083+ebp*1+edx] + xor edi,eax + and edi,ecx + mov ebp,DWORD [60+esi] + xor edi,ebx + add edx,edi + mov edi,eax + rol edx,9 + add edx,eax + ; R1 22 + lea ecx,[3634488961+ebp*1+ecx] + xor edi,edx + and edi,ebx + mov ebp,DWORD [16+esi] + xor edi,eax + add ecx,edi + mov edi,edx + rol ecx,14 + add ecx,edx + ; R1 23 + lea ebx,[3889429448+ebp*1+ebx] + xor edi,ecx + and edi,eax + mov ebp,DWORD [36+esi] + xor edi,edx + add ebx,edi + mov edi,ecx + rol ebx,20 + add ebx,ecx + ; R1 24 + lea eax,[568446438+ebp*1+eax] + xor edi,ebx + and edi,edx + mov ebp,DWORD [56+esi] + xor edi,ecx + add eax,edi + mov edi,ebx + rol eax,5 + add eax,ebx + ; R1 25 + lea edx,[3275163606+ebp*1+edx] + xor edi,eax + and edi,ecx + mov ebp,DWORD [12+esi] + xor edi,ebx + add edx,edi + mov edi,eax + rol edx,9 + add edx,eax + ; R1 26 + lea ecx,[4107603335+ebp*1+ecx] + xor edi,edx + and edi,ebx + mov ebp,DWORD [32+esi] + xor edi,eax + add ecx,edi + mov edi,edx + rol ecx,14 + add ecx,edx + ; R1 27 + lea ebx,[1163531501+ebp*1+ebx] + xor edi,ecx + and edi,eax + mov ebp,DWORD [52+esi] + xor edi,edx + add ebx,edi + mov edi,ecx + rol ebx,20 + add ebx,ecx + ; R1 28 + lea eax,[2850285829+ebp*1+eax] + xor edi,ebx + and edi,edx + mov ebp,DWORD [8+esi] + xor edi,ecx + add eax,edi + mov edi,ebx + rol eax,5 + add eax,ebx + ; R1 29 + lea edx,[4243563512+ebp*1+edx] + xor edi,eax + and edi,ecx + mov ebp,DWORD [28+esi] + xor edi,ebx + add edx,edi + mov edi,eax + rol edx,9 + add edx,eax + ; R1 30 + lea ecx,[1735328473+ebp*1+ecx] + xor edi,edx + and edi,ebx + mov ebp,DWORD [48+esi] + xor edi,eax + add ecx,edi + mov edi,edx + rol ecx,14 + add ecx,edx + ; R1 31 + lea ebx,[2368359562+ebp*1+ebx] + xor edi,ecx + and edi,eax + mov ebp,DWORD [20+esi] + xor edi,edx + add ebx,edi + mov edi,ecx + rol ebx,20 + add ebx,ecx + ; + ; R2 section + ; R2 32 + xor edi,edx + xor edi,ebx + lea eax,[4294588738+ebp*1+eax] + add eax,edi + rol eax,4 + mov ebp,DWORD [32+esi] + mov edi,ebx + ; R2 33 + lea edx,[2272392833+ebp*1+edx] + add eax,ebx + xor edi,ecx + xor edi,eax + mov ebp,DWORD [44+esi] + add edx,edi + mov edi,eax + rol edx,11 + add edx,eax + ; R2 34 + xor edi,ebx + xor edi,edx + lea ecx,[1839030562+ebp*1+ecx] + add ecx,edi + rol ecx,16 + mov ebp,DWORD [56+esi] + mov edi,edx + ; R2 35 + lea ebx,[4259657740+ebp*1+ebx] + add ecx,edx + xor edi,eax + xor edi,ecx + mov ebp,DWORD [4+esi] + add ebx,edi + mov edi,ecx + rol ebx,23 + add ebx,ecx + ; R2 36 + xor edi,edx + xor edi,ebx + lea eax,[2763975236+ebp*1+eax] + add eax,edi + rol eax,4 + mov ebp,DWORD [16+esi] + mov edi,ebx + ; R2 37 + lea edx,[1272893353+ebp*1+edx] + add eax,ebx + xor edi,ecx + xor edi,eax + mov ebp,DWORD [28+esi] + add edx,edi + mov edi,eax + rol edx,11 + add edx,eax + ; R2 38 + xor edi,ebx + xor edi,edx + lea ecx,[4139469664+ebp*1+ecx] + add ecx,edi + rol ecx,16 + mov ebp,DWORD [40+esi] + mov edi,edx + ; R2 39 + lea ebx,[3200236656+ebp*1+ebx] + add ecx,edx + xor edi,eax + xor edi,ecx + mov ebp,DWORD [52+esi] + add ebx,edi + mov edi,ecx + rol ebx,23 + add ebx,ecx + ; R2 40 + xor edi,edx + xor edi,ebx + lea eax,[681279174+ebp*1+eax] + add eax,edi + rol eax,4 + mov ebp,DWORD [esi] + mov edi,ebx + ; R2 41 + lea edx,[3936430074+ebp*1+edx] + add eax,ebx + xor edi,ecx + xor edi,eax + mov ebp,DWORD [12+esi] + add edx,edi + mov edi,eax + rol edx,11 + add edx,eax + ; R2 42 + xor edi,ebx + xor edi,edx + lea ecx,[3572445317+ebp*1+ecx] + add ecx,edi + rol ecx,16 + mov ebp,DWORD [24+esi] + mov edi,edx + ; R2 43 + lea ebx,[76029189+ebp*1+ebx] + add ecx,edx + xor edi,eax + xor edi,ecx + mov ebp,DWORD [36+esi] + add ebx,edi + mov edi,ecx + rol ebx,23 + add ebx,ecx + ; R2 44 + xor edi,edx + xor edi,ebx + lea eax,[3654602809+ebp*1+eax] + add eax,edi + rol eax,4 + mov ebp,DWORD [48+esi] + mov edi,ebx + ; R2 45 + lea edx,[3873151461+ebp*1+edx] + add eax,ebx + xor edi,ecx + xor edi,eax + mov ebp,DWORD [60+esi] + add edx,edi + mov edi,eax + rol edx,11 + add edx,eax + ; R2 46 + xor edi,ebx + xor edi,edx + lea ecx,[530742520+ebp*1+ecx] + add ecx,edi + rol ecx,16 + mov ebp,DWORD [8+esi] + mov edi,edx + ; R2 47 + lea ebx,[3299628645+ebp*1+ebx] + add ecx,edx + xor edi,eax + xor edi,ecx + mov ebp,DWORD [esi] + add ebx,edi + mov edi,-1 + rol ebx,23 + add ebx,ecx + ; + ; R3 section + ; R3 48 + xor edi,edx + or edi,ebx + lea eax,[4096336452+ebp*1+eax] + xor edi,ecx + mov ebp,DWORD [28+esi] + add eax,edi + mov edi,-1 + rol eax,6 + xor edi,ecx + add eax,ebx + ; R3 49 + or edi,eax + lea edx,[1126891415+ebp*1+edx] + xor edi,ebx + mov ebp,DWORD [56+esi] + add edx,edi + mov edi,-1 + rol edx,10 + xor edi,ebx + add edx,eax + ; R3 50 + or edi,edx + lea ecx,[2878612391+ebp*1+ecx] + xor edi,eax + mov ebp,DWORD [20+esi] + add ecx,edi + mov edi,-1 + rol ecx,15 + xor edi,eax + add ecx,edx + ; R3 51 + or edi,ecx + lea ebx,[4237533241+ebp*1+ebx] + xor edi,edx + mov ebp,DWORD [48+esi] + add ebx,edi + mov edi,-1 + rol ebx,21 + xor edi,edx + add ebx,ecx + ; R3 52 + or edi,ebx + lea eax,[1700485571+ebp*1+eax] + xor edi,ecx + mov ebp,DWORD [12+esi] + add eax,edi + mov edi,-1 + rol eax,6 + xor edi,ecx + add eax,ebx + ; R3 53 + or edi,eax + lea edx,[2399980690+ebp*1+edx] + xor edi,ebx + mov ebp,DWORD [40+esi] + add edx,edi + mov edi,-1 + rol edx,10 + xor edi,ebx + add edx,eax + ; R3 54 + or edi,edx + lea ecx,[4293915773+ebp*1+ecx] + xor edi,eax + mov ebp,DWORD [4+esi] + add ecx,edi + mov edi,-1 + rol ecx,15 + xor edi,eax + add ecx,edx + ; R3 55 + or edi,ecx + lea ebx,[2240044497+ebp*1+ebx] + xor edi,edx + mov ebp,DWORD [32+esi] + add ebx,edi + mov edi,-1 + rol ebx,21 + xor edi,edx + add ebx,ecx + ; R3 56 + or edi,ebx + lea eax,[1873313359+ebp*1+eax] + xor edi,ecx + mov ebp,DWORD [60+esi] + add eax,edi + mov edi,-1 + rol eax,6 + xor edi,ecx + add eax,ebx + ; R3 57 + or edi,eax + lea edx,[4264355552+ebp*1+edx] + xor edi,ebx + mov ebp,DWORD [24+esi] + add edx,edi + mov edi,-1 + rol edx,10 + xor edi,ebx + add edx,eax + ; R3 58 + or edi,edx + lea ecx,[2734768916+ebp*1+ecx] + xor edi,eax + mov ebp,DWORD [52+esi] + add ecx,edi + mov edi,-1 + rol ecx,15 + xor edi,eax + add ecx,edx + ; R3 59 + or edi,ecx + lea ebx,[1309151649+ebp*1+ebx] + xor edi,edx + mov ebp,DWORD [16+esi] + add ebx,edi + mov edi,-1 + rol ebx,21 + xor edi,edx + add ebx,ecx + ; R3 60 + or edi,ebx + lea eax,[4149444226+ebp*1+eax] + xor edi,ecx + mov ebp,DWORD [44+esi] + add eax,edi + mov edi,-1 + rol eax,6 + xor edi,ecx + add eax,ebx + ; R3 61 + or edi,eax + lea edx,[3174756917+ebp*1+edx] + xor edi,ebx + mov ebp,DWORD [8+esi] + add edx,edi + mov edi,-1 + rol edx,10 + xor edi,ebx + add edx,eax + ; R3 62 + or edi,edx + lea ecx,[718787259+ebp*1+ecx] + xor edi,eax + mov ebp,DWORD [36+esi] + add ecx,edi + mov edi,-1 + rol ecx,15 + xor edi,eax + add ecx,edx + ; R3 63 + or edi,ecx + lea ebx,[3951481745+ebp*1+ebx] + xor edi,edx + mov ebp,DWORD [24+esp] + add ebx,edi + add esi,64 + rol ebx,21 + mov edi,DWORD [ebp] + add ebx,ecx + add eax,edi + mov edi,DWORD [4+ebp] + add ebx,edi + mov edi,DWORD [8+ebp] + add ecx,edi + mov edi,DWORD [12+ebp] + add edx,edi + mov DWORD [ebp],eax + mov DWORD [4+ebp],ebx + mov edi,DWORD [esp] + mov DWORD [8+ebp],ecx + mov DWORD [12+ebp],edx + cmp edi,esi + jae NEAR L$000start + pop eax + pop ebx + pop ebp + pop edi + pop esi + ret diff --git a/third_party/boringssl/win-x86/crypto/modes/ghash-x86.asm b/third_party/boringssl/win-x86/crypto/modes/ghash-x86.asm new file mode 100644 index 00000000000..eb493aca636 --- /dev/null +++ b/third_party/boringssl/win-x86/crypto/modes/ghash-x86.asm @@ -0,0 +1,1265 @@ +%ifidn __OUTPUT_FORMAT__,obj +section code use32 class=code align=64 +%elifidn __OUTPUT_FORMAT__,win32 +%ifdef __YASM_VERSION_ID__ +%if __YASM_VERSION_ID__ < 01010000h +%error yasm version 1.1.0 or later needed. +%endif +; Yasm automatically includes .00 and complains about redefining it. +; https://www.tortall.net/projects/yasm/manual/html/objfmt-win32-safeseh.html +%else +$@feat.00 equ 1 +%endif +section .text code align=64 +%else +section .text code +%endif +global _gcm_gmult_4bit_x86 +align 16 +_gcm_gmult_4bit_x86: +L$_gcm_gmult_4bit_x86_begin: + push ebp + push ebx + push esi + push edi + sub esp,84 + mov edi,DWORD [104+esp] + mov esi,DWORD [108+esp] + mov ebp,DWORD [edi] + mov edx,DWORD [4+edi] + mov ecx,DWORD [8+edi] + mov ebx,DWORD [12+edi] + mov DWORD [16+esp],0 + mov DWORD [20+esp],471859200 + mov DWORD [24+esp],943718400 + mov DWORD [28+esp],610271232 + mov DWORD [32+esp],1887436800 + mov DWORD [36+esp],1822425088 + mov DWORD [40+esp],1220542464 + mov DWORD [44+esp],1423966208 + mov DWORD [48+esp],3774873600 + mov DWORD [52+esp],4246732800 + mov DWORD [56+esp],3644850176 + mov DWORD [60+esp],3311403008 + mov DWORD [64+esp],2441084928 + mov DWORD [68+esp],2376073216 + mov DWORD [72+esp],2847932416 + mov DWORD [76+esp],3051356160 + mov DWORD [esp],ebp + mov DWORD [4+esp],edx + mov DWORD [8+esp],ecx + mov DWORD [12+esp],ebx + shr ebx,20 + and ebx,240 + mov ebp,DWORD [4+ebx*1+esi] + mov edx,DWORD [ebx*1+esi] + mov ecx,DWORD [12+ebx*1+esi] + mov ebx,DWORD [8+ebx*1+esi] + xor eax,eax + mov edi,15 + jmp NEAR L$000x86_loop +align 16 +L$000x86_loop: + mov al,bl + shrd ebx,ecx,4 + and al,15 + shrd ecx,edx,4 + shrd edx,ebp,4 + shr ebp,4 + xor ebp,DWORD [16+eax*4+esp] + mov al,BYTE [edi*1+esp] + and al,240 + xor ebx,DWORD [8+eax*1+esi] + xor ecx,DWORD [12+eax*1+esi] + xor edx,DWORD [eax*1+esi] + xor ebp,DWORD [4+eax*1+esi] + dec edi + js NEAR L$001x86_break + mov al,bl + shrd ebx,ecx,4 + and al,15 + shrd ecx,edx,4 + shrd edx,ebp,4 + shr ebp,4 + xor ebp,DWORD [16+eax*4+esp] + mov al,BYTE [edi*1+esp] + shl al,4 + xor ebx,DWORD [8+eax*1+esi] + xor ecx,DWORD [12+eax*1+esi] + xor edx,DWORD [eax*1+esi] + xor ebp,DWORD [4+eax*1+esi] + jmp NEAR L$000x86_loop +align 16 +L$001x86_break: + bswap ebx + bswap ecx + bswap edx + bswap ebp + mov edi,DWORD [104+esp] + mov DWORD [12+edi],ebx + mov DWORD [8+edi],ecx + mov DWORD [4+edi],edx + mov DWORD [edi],ebp + add esp,84 + pop edi + pop esi + pop ebx + pop ebp + ret +global _gcm_ghash_4bit_x86 +align 16 +_gcm_ghash_4bit_x86: +L$_gcm_ghash_4bit_x86_begin: + push ebp + push ebx + push esi + push edi + sub esp,84 + mov ebx,DWORD [104+esp] + mov esi,DWORD [108+esp] + mov edi,DWORD [112+esp] + mov ecx,DWORD [116+esp] + add ecx,edi + mov DWORD [116+esp],ecx + mov ebp,DWORD [ebx] + mov edx,DWORD [4+ebx] + mov ecx,DWORD [8+ebx] + mov ebx,DWORD [12+ebx] + mov DWORD [16+esp],0 + mov DWORD [20+esp],471859200 + mov DWORD [24+esp],943718400 + mov DWORD [28+esp],610271232 + mov DWORD [32+esp],1887436800 + mov DWORD [36+esp],1822425088 + mov DWORD [40+esp],1220542464 + mov DWORD [44+esp],1423966208 + mov DWORD [48+esp],3774873600 + mov DWORD [52+esp],4246732800 + mov DWORD [56+esp],3644850176 + mov DWORD [60+esp],3311403008 + mov DWORD [64+esp],2441084928 + mov DWORD [68+esp],2376073216 + mov DWORD [72+esp],2847932416 + mov DWORD [76+esp],3051356160 +align 16 +L$002x86_outer_loop: + xor ebx,DWORD [12+edi] + xor ecx,DWORD [8+edi] + xor edx,DWORD [4+edi] + xor ebp,DWORD [edi] + mov DWORD [12+esp],ebx + mov DWORD [8+esp],ecx + mov DWORD [4+esp],edx + mov DWORD [esp],ebp + shr ebx,20 + and ebx,240 + mov ebp,DWORD [4+ebx*1+esi] + mov edx,DWORD [ebx*1+esi] + mov ecx,DWORD [12+ebx*1+esi] + mov ebx,DWORD [8+ebx*1+esi] + xor eax,eax + mov edi,15 + jmp NEAR L$003x86_loop +align 16 +L$003x86_loop: + mov al,bl + shrd ebx,ecx,4 + and al,15 + shrd ecx,edx,4 + shrd edx,ebp,4 + shr ebp,4 + xor ebp,DWORD [16+eax*4+esp] + mov al,BYTE [edi*1+esp] + and al,240 + xor ebx,DWORD [8+eax*1+esi] + xor ecx,DWORD [12+eax*1+esi] + xor edx,DWORD [eax*1+esi] + xor ebp,DWORD [4+eax*1+esi] + dec edi + js NEAR L$004x86_break + mov al,bl + shrd ebx,ecx,4 + and al,15 + shrd ecx,edx,4 + shrd edx,ebp,4 + shr ebp,4 + xor ebp,DWORD [16+eax*4+esp] + mov al,BYTE [edi*1+esp] + shl al,4 + xor ebx,DWORD [8+eax*1+esi] + xor ecx,DWORD [12+eax*1+esi] + xor edx,DWORD [eax*1+esi] + xor ebp,DWORD [4+eax*1+esi] + jmp NEAR L$003x86_loop +align 16 +L$004x86_break: + bswap ebx + bswap ecx + bswap edx + bswap ebp + mov edi,DWORD [112+esp] + lea edi,[16+edi] + cmp edi,DWORD [116+esp] + mov DWORD [112+esp],edi + jb NEAR L$002x86_outer_loop + mov edi,DWORD [104+esp] + mov DWORD [12+edi],ebx + mov DWORD [8+edi],ecx + mov DWORD [4+edi],edx + mov DWORD [edi],ebp + add esp,84 + pop edi + pop esi + pop ebx + pop ebp + ret +global _gcm_gmult_4bit_mmx +align 16 +_gcm_gmult_4bit_mmx: +L$_gcm_gmult_4bit_mmx_begin: + push ebp + push ebx + push esi + push edi + mov edi,DWORD [20+esp] + mov esi,DWORD [24+esp] + call L$005pic_point +L$005pic_point: + pop eax + lea eax,[(L$rem_4bit-L$005pic_point)+eax] + movzx ebx,BYTE [15+edi] + xor ecx,ecx + mov edx,ebx + mov cl,dl + mov ebp,14 + shl cl,4 + and edx,240 + movq mm0,[8+ecx*1+esi] + movq mm1,[ecx*1+esi] + movd ebx,mm0 + jmp NEAR L$006mmx_loop +align 16 +L$006mmx_loop: + psrlq mm0,4 + and ebx,15 + movq mm2,mm1 + psrlq mm1,4 + pxor mm0,[8+edx*1+esi] + mov cl,BYTE [ebp*1+edi] + psllq mm2,60 + pxor mm1,[ebx*8+eax] + dec ebp + movd ebx,mm0 + pxor mm1,[edx*1+esi] + mov edx,ecx + pxor mm0,mm2 + js NEAR L$007mmx_break + shl cl,4 + and ebx,15 + psrlq mm0,4 + and edx,240 + movq mm2,mm1 + psrlq mm1,4 + pxor mm0,[8+ecx*1+esi] + psllq mm2,60 + pxor mm1,[ebx*8+eax] + movd ebx,mm0 + pxor mm1,[ecx*1+esi] + pxor mm0,mm2 + jmp NEAR L$006mmx_loop +align 16 +L$007mmx_break: + shl cl,4 + and ebx,15 + psrlq mm0,4 + and edx,240 + movq mm2,mm1 + psrlq mm1,4 + pxor mm0,[8+ecx*1+esi] + psllq mm2,60 + pxor mm1,[ebx*8+eax] + movd ebx,mm0 + pxor mm1,[ecx*1+esi] + pxor mm0,mm2 + psrlq mm0,4 + and ebx,15 + movq mm2,mm1 + psrlq mm1,4 + pxor mm0,[8+edx*1+esi] + psllq mm2,60 + pxor mm1,[ebx*8+eax] + movd ebx,mm0 + pxor mm1,[edx*1+esi] + pxor mm0,mm2 + psrlq mm0,32 + movd edx,mm1 + psrlq mm1,32 + movd ecx,mm0 + movd ebp,mm1 + bswap ebx + bswap edx + bswap ecx + bswap ebp + emms + mov DWORD [12+edi],ebx + mov DWORD [4+edi],edx + mov DWORD [8+edi],ecx + mov DWORD [edi],ebp + pop edi + pop esi + pop ebx + pop ebp + ret +global _gcm_ghash_4bit_mmx +align 16 +_gcm_ghash_4bit_mmx: +L$_gcm_ghash_4bit_mmx_begin: + push ebp + push ebx + push esi + push edi + mov eax,DWORD [20+esp] + mov ebx,DWORD [24+esp] + mov ecx,DWORD [28+esp] + mov edx,DWORD [32+esp] + mov ebp,esp + call L$008pic_point +L$008pic_point: + pop esi + lea esi,[(L$rem_8bit-L$008pic_point)+esi] + sub esp,544 + and esp,-64 + sub esp,16 + add edx,ecx + mov DWORD [544+esp],eax + mov DWORD [552+esp],edx + mov DWORD [556+esp],ebp + add ebx,128 + lea edi,[144+esp] + lea ebp,[400+esp] + mov edx,DWORD [ebx-120] + movq mm0,[ebx-120] + movq mm3,[ebx-128] + shl edx,4 + mov BYTE [esp],dl + mov edx,DWORD [ebx-104] + movq mm2,[ebx-104] + movq mm5,[ebx-112] + movq [edi-128],mm0 + psrlq mm0,4 + movq [edi],mm3 + movq mm7,mm3 + psrlq mm3,4 + shl edx,4 + mov BYTE [1+esp],dl + mov edx,DWORD [ebx-88] + movq mm1,[ebx-88] + psllq mm7,60 + movq mm4,[ebx-96] + por mm0,mm7 + movq [edi-120],mm2 + psrlq mm2,4 + movq [8+edi],mm5 + movq mm6,mm5 + movq [ebp-128],mm0 + psrlq mm5,4 + movq [ebp],mm3 + shl edx,4 + mov BYTE [2+esp],dl + mov edx,DWORD [ebx-72] + movq mm0,[ebx-72] + psllq mm6,60 + movq mm3,[ebx-80] + por mm2,mm6 + movq [edi-112],mm1 + psrlq mm1,4 + movq [16+edi],mm4 + movq mm7,mm4 + movq [ebp-120],mm2 + psrlq mm4,4 + movq [8+ebp],mm5 + shl edx,4 + mov BYTE [3+esp],dl + mov edx,DWORD [ebx-56] + movq mm2,[ebx-56] + psllq mm7,60 + movq mm5,[ebx-64] + por mm1,mm7 + movq [edi-104],mm0 + psrlq mm0,4 + movq [24+edi],mm3 + movq mm6,mm3 + movq [ebp-112],mm1 + psrlq mm3,4 + movq [16+ebp],mm4 + shl edx,4 + mov BYTE [4+esp],dl + mov edx,DWORD [ebx-40] + movq mm1,[ebx-40] + psllq mm6,60 + movq mm4,[ebx-48] + por mm0,mm6 + movq [edi-96],mm2 + psrlq mm2,4 + movq [32+edi],mm5 + movq mm7,mm5 + movq [ebp-104],mm0 + psrlq mm5,4 + movq [24+ebp],mm3 + shl edx,4 + mov BYTE [5+esp],dl + mov edx,DWORD [ebx-24] + movq mm0,[ebx-24] + psllq mm7,60 + movq mm3,[ebx-32] + por mm2,mm7 + movq [edi-88],mm1 + psrlq mm1,4 + movq [40+edi],mm4 + movq mm6,mm4 + movq [ebp-96],mm2 + psrlq mm4,4 + movq [32+ebp],mm5 + shl edx,4 + mov BYTE [6+esp],dl + mov edx,DWORD [ebx-8] + movq mm2,[ebx-8] + psllq mm6,60 + movq mm5,[ebx-16] + por mm1,mm6 + movq [edi-80],mm0 + psrlq mm0,4 + movq [48+edi],mm3 + movq mm7,mm3 + movq [ebp-88],mm1 + psrlq mm3,4 + movq [40+ebp],mm4 + shl edx,4 + mov BYTE [7+esp],dl + mov edx,DWORD [8+ebx] + movq mm1,[8+ebx] + psllq mm7,60 + movq mm4,[ebx] + por mm0,mm7 + movq [edi-72],mm2 + psrlq mm2,4 + movq [56+edi],mm5 + movq mm6,mm5 + movq [ebp-80],mm0 + psrlq mm5,4 + movq [48+ebp],mm3 + shl edx,4 + mov BYTE [8+esp],dl + mov edx,DWORD [24+ebx] + movq mm0,[24+ebx] + psllq mm6,60 + movq mm3,[16+ebx] + por mm2,mm6 + movq [edi-64],mm1 + psrlq mm1,4 + movq [64+edi],mm4 + movq mm7,mm4 + movq [ebp-72],mm2 + psrlq mm4,4 + movq [56+ebp],mm5 + shl edx,4 + mov BYTE [9+esp],dl + mov edx,DWORD [40+ebx] + movq mm2,[40+ebx] + psllq mm7,60 + movq mm5,[32+ebx] + por mm1,mm7 + movq [edi-56],mm0 + psrlq mm0,4 + movq [72+edi],mm3 + movq mm6,mm3 + movq [ebp-64],mm1 + psrlq mm3,4 + movq [64+ebp],mm4 + shl edx,4 + mov BYTE [10+esp],dl + mov edx,DWORD [56+ebx] + movq mm1,[56+ebx] + psllq mm6,60 + movq mm4,[48+ebx] + por mm0,mm6 + movq [edi-48],mm2 + psrlq mm2,4 + movq [80+edi],mm5 + movq mm7,mm5 + movq [ebp-56],mm0 + psrlq mm5,4 + movq [72+ebp],mm3 + shl edx,4 + mov BYTE [11+esp],dl + mov edx,DWORD [72+ebx] + movq mm0,[72+ebx] + psllq mm7,60 + movq mm3,[64+ebx] + por mm2,mm7 + movq [edi-40],mm1 + psrlq mm1,4 + movq [88+edi],mm4 + movq mm6,mm4 + movq [ebp-48],mm2 + psrlq mm4,4 + movq [80+ebp],mm5 + shl edx,4 + mov BYTE [12+esp],dl + mov edx,DWORD [88+ebx] + movq mm2,[88+ebx] + psllq mm6,60 + movq mm5,[80+ebx] + por mm1,mm6 + movq [edi-32],mm0 + psrlq mm0,4 + movq [96+edi],mm3 + movq mm7,mm3 + movq [ebp-40],mm1 + psrlq mm3,4 + movq [88+ebp],mm4 + shl edx,4 + mov BYTE [13+esp],dl + mov edx,DWORD [104+ebx] + movq mm1,[104+ebx] + psllq mm7,60 + movq mm4,[96+ebx] + por mm0,mm7 + movq [edi-24],mm2 + psrlq mm2,4 + movq [104+edi],mm5 + movq mm6,mm5 + movq [ebp-32],mm0 + psrlq mm5,4 + movq [96+ebp],mm3 + shl edx,4 + mov BYTE [14+esp],dl + mov edx,DWORD [120+ebx] + movq mm0,[120+ebx] + psllq mm6,60 + movq mm3,[112+ebx] + por mm2,mm6 + movq [edi-16],mm1 + psrlq mm1,4 + movq [112+edi],mm4 + movq mm7,mm4 + movq [ebp-24],mm2 + psrlq mm4,4 + movq [104+ebp],mm5 + shl edx,4 + mov BYTE [15+esp],dl + psllq mm7,60 + por mm1,mm7 + movq [edi-8],mm0 + psrlq mm0,4 + movq [120+edi],mm3 + movq mm6,mm3 + movq [ebp-16],mm1 + psrlq mm3,4 + movq [112+ebp],mm4 + psllq mm6,60 + por mm0,mm6 + movq [ebp-8],mm0 + movq [120+ebp],mm3 + movq mm6,[eax] + mov ebx,DWORD [8+eax] + mov edx,DWORD [12+eax] +align 16 +L$009outer: + xor edx,DWORD [12+ecx] + xor ebx,DWORD [8+ecx] + pxor mm6,[ecx] + lea ecx,[16+ecx] + mov DWORD [536+esp],ebx + movq [528+esp],mm6 + mov DWORD [548+esp],ecx + xor eax,eax + rol edx,8 + mov al,dl + mov ebp,eax + and al,15 + shr ebp,4 + pxor mm0,mm0 + rol edx,8 + pxor mm1,mm1 + pxor mm2,mm2 + movq mm7,[16+eax*8+esp] + movq mm6,[144+eax*8+esp] + mov al,dl + movd ebx,mm7 + psrlq mm7,8 + movq mm3,mm6 + mov edi,eax + psrlq mm6,8 + pxor mm7,[272+ebp*8+esp] + and al,15 + psllq mm3,56 + shr edi,4 + pxor mm7,[16+eax*8+esp] + rol edx,8 + pxor mm6,[144+eax*8+esp] + pxor mm7,mm3 + pxor mm6,[400+ebp*8+esp] + xor bl,BYTE [ebp*1+esp] + mov al,dl + movd ecx,mm7 + movzx ebx,bl + psrlq mm7,8 + movq mm3,mm6 + mov ebp,eax + psrlq mm6,8 + pxor mm7,[272+edi*8+esp] + and al,15 + psllq mm3,56 + shr ebp,4 + pinsrw mm2,WORD [ebx*2+esi],2 + pxor mm7,[16+eax*8+esp] + rol edx,8 + pxor mm6,[144+eax*8+esp] + pxor mm7,mm3 + pxor mm6,[400+edi*8+esp] + xor cl,BYTE [edi*1+esp] + mov al,dl + mov edx,DWORD [536+esp] + movd ebx,mm7 + movzx ecx,cl + psrlq mm7,8 + movq mm3,mm6 + mov edi,eax + psrlq mm6,8 + pxor mm7,[272+ebp*8+esp] + and al,15 + psllq mm3,56 + pxor mm6,mm2 + shr edi,4 + pinsrw mm1,WORD [ecx*2+esi],2 + pxor mm7,[16+eax*8+esp] + rol edx,8 + pxor mm6,[144+eax*8+esp] + pxor mm7,mm3 + pxor mm6,[400+ebp*8+esp] + xor bl,BYTE [ebp*1+esp] + mov al,dl + movd ecx,mm7 + movzx ebx,bl + psrlq mm7,8 + movq mm3,mm6 + mov ebp,eax + psrlq mm6,8 + pxor mm7,[272+edi*8+esp] + and al,15 + psllq mm3,56 + pxor mm6,mm1 + shr ebp,4 + pinsrw mm0,WORD [ebx*2+esi],2 + pxor mm7,[16+eax*8+esp] + rol edx,8 + pxor mm6,[144+eax*8+esp] + pxor mm7,mm3 + pxor mm6,[400+edi*8+esp] + xor cl,BYTE [edi*1+esp] + mov al,dl + movd ebx,mm7 + movzx ecx,cl + psrlq mm7,8 + movq mm3,mm6 + mov edi,eax + psrlq mm6,8 + pxor mm7,[272+ebp*8+esp] + and al,15 + psllq mm3,56 + pxor mm6,mm0 + shr edi,4 + pinsrw mm2,WORD [ecx*2+esi],2 + pxor mm7,[16+eax*8+esp] + rol edx,8 + pxor mm6,[144+eax*8+esp] + pxor mm7,mm3 + pxor mm6,[400+ebp*8+esp] + xor bl,BYTE [ebp*1+esp] + mov al,dl + movd ecx,mm7 + movzx ebx,bl + psrlq mm7,8 + movq mm3,mm6 + mov ebp,eax + psrlq mm6,8 + pxor mm7,[272+edi*8+esp] + and al,15 + psllq mm3,56 + pxor mm6,mm2 + shr ebp,4 + pinsrw mm1,WORD [ebx*2+esi],2 + pxor mm7,[16+eax*8+esp] + rol edx,8 + pxor mm6,[144+eax*8+esp] + pxor mm7,mm3 + pxor mm6,[400+edi*8+esp] + xor cl,BYTE [edi*1+esp] + mov al,dl + mov edx,DWORD [532+esp] + movd ebx,mm7 + movzx ecx,cl + psrlq mm7,8 + movq mm3,mm6 + mov edi,eax + psrlq mm6,8 + pxor mm7,[272+ebp*8+esp] + and al,15 + psllq mm3,56 + pxor mm6,mm1 + shr edi,4 + pinsrw mm0,WORD [ecx*2+esi],2 + pxor mm7,[16+eax*8+esp] + rol edx,8 + pxor mm6,[144+eax*8+esp] + pxor mm7,mm3 + pxor mm6,[400+ebp*8+esp] + xor bl,BYTE [ebp*1+esp] + mov al,dl + movd ecx,mm7 + movzx ebx,bl + psrlq mm7,8 + movq mm3,mm6 + mov ebp,eax + psrlq mm6,8 + pxor mm7,[272+edi*8+esp] + and al,15 + psllq mm3,56 + pxor mm6,mm0 + shr ebp,4 + pinsrw mm2,WORD [ebx*2+esi],2 + pxor mm7,[16+eax*8+esp] + rol edx,8 + pxor mm6,[144+eax*8+esp] + pxor mm7,mm3 + pxor mm6,[400+edi*8+esp] + xor cl,BYTE [edi*1+esp] + mov al,dl + movd ebx,mm7 + movzx ecx,cl + psrlq mm7,8 + movq mm3,mm6 + mov edi,eax + psrlq mm6,8 + pxor mm7,[272+ebp*8+esp] + and al,15 + psllq mm3,56 + pxor mm6,mm2 + shr edi,4 + pinsrw mm1,WORD [ecx*2+esi],2 + pxor mm7,[16+eax*8+esp] + rol edx,8 + pxor mm6,[144+eax*8+esp] + pxor mm7,mm3 + pxor mm6,[400+ebp*8+esp] + xor bl,BYTE [ebp*1+esp] + mov al,dl + movd ecx,mm7 + movzx ebx,bl + psrlq mm7,8 + movq mm3,mm6 + mov ebp,eax + psrlq mm6,8 + pxor mm7,[272+edi*8+esp] + and al,15 + psllq mm3,56 + pxor mm6,mm1 + shr ebp,4 + pinsrw mm0,WORD [ebx*2+esi],2 + pxor mm7,[16+eax*8+esp] + rol edx,8 + pxor mm6,[144+eax*8+esp] + pxor mm7,mm3 + pxor mm6,[400+edi*8+esp] + xor cl,BYTE [edi*1+esp] + mov al,dl + mov edx,DWORD [528+esp] + movd ebx,mm7 + movzx ecx,cl + psrlq mm7,8 + movq mm3,mm6 + mov edi,eax + psrlq mm6,8 + pxor mm7,[272+ebp*8+esp] + and al,15 + psllq mm3,56 + pxor mm6,mm0 + shr edi,4 + pinsrw mm2,WORD [ecx*2+esi],2 + pxor mm7,[16+eax*8+esp] + rol edx,8 + pxor mm6,[144+eax*8+esp] + pxor mm7,mm3 + pxor mm6,[400+ebp*8+esp] + xor bl,BYTE [ebp*1+esp] + mov al,dl + movd ecx,mm7 + movzx ebx,bl + psrlq mm7,8 + movq mm3,mm6 + mov ebp,eax + psrlq mm6,8 + pxor mm7,[272+edi*8+esp] + and al,15 + psllq mm3,56 + pxor mm6,mm2 + shr ebp,4 + pinsrw mm1,WORD [ebx*2+esi],2 + pxor mm7,[16+eax*8+esp] + rol edx,8 + pxor mm6,[144+eax*8+esp] + pxor mm7,mm3 + pxor mm6,[400+edi*8+esp] + xor cl,BYTE [edi*1+esp] + mov al,dl + movd ebx,mm7 + movzx ecx,cl + psrlq mm7,8 + movq mm3,mm6 + mov edi,eax + psrlq mm6,8 + pxor mm7,[272+ebp*8+esp] + and al,15 + psllq mm3,56 + pxor mm6,mm1 + shr edi,4 + pinsrw mm0,WORD [ecx*2+esi],2 + pxor mm7,[16+eax*8+esp] + rol edx,8 + pxor mm6,[144+eax*8+esp] + pxor mm7,mm3 + pxor mm6,[400+ebp*8+esp] + xor bl,BYTE [ebp*1+esp] + mov al,dl + movd ecx,mm7 + movzx ebx,bl + psrlq mm7,8 + movq mm3,mm6 + mov ebp,eax + psrlq mm6,8 + pxor mm7,[272+edi*8+esp] + and al,15 + psllq mm3,56 + pxor mm6,mm0 + shr ebp,4 + pinsrw mm2,WORD [ebx*2+esi],2 + pxor mm7,[16+eax*8+esp] + rol edx,8 + pxor mm6,[144+eax*8+esp] + pxor mm7,mm3 + pxor mm6,[400+edi*8+esp] + xor cl,BYTE [edi*1+esp] + mov al,dl + mov edx,DWORD [524+esp] + movd ebx,mm7 + movzx ecx,cl + psrlq mm7,8 + movq mm3,mm6 + mov edi,eax + psrlq mm6,8 + pxor mm7,[272+ebp*8+esp] + and al,15 + psllq mm3,56 + pxor mm6,mm2 + shr edi,4 + pinsrw mm1,WORD [ecx*2+esi],2 + pxor mm7,[16+eax*8+esp] + pxor mm6,[144+eax*8+esp] + xor bl,BYTE [ebp*1+esp] + pxor mm7,mm3 + pxor mm6,[400+ebp*8+esp] + movzx ebx,bl + pxor mm2,mm2 + psllq mm1,4 + movd ecx,mm7 + psrlq mm7,4 + movq mm3,mm6 + psrlq mm6,4 + shl ecx,4 + pxor mm7,[16+edi*8+esp] + psllq mm3,60 + movzx ecx,cl + pxor mm7,mm3 + pxor mm6,[144+edi*8+esp] + pinsrw mm0,WORD [ebx*2+esi],2 + pxor mm6,mm1 + movd edx,mm7 + pinsrw mm2,WORD [ecx*2+esi],3 + psllq mm0,12 + pxor mm6,mm0 + psrlq mm7,32 + pxor mm6,mm2 + mov ecx,DWORD [548+esp] + movd ebx,mm7 + movq mm3,mm6 + psllw mm6,8 + psrlw mm3,8 + por mm6,mm3 + bswap edx + pshufw mm6,mm6,27 + bswap ebx + cmp ecx,DWORD [552+esp] + jne NEAR L$009outer + mov eax,DWORD [544+esp] + mov DWORD [12+eax],edx + mov DWORD [8+eax],ebx + movq [eax],mm6 + mov esp,DWORD [556+esp] + emms + pop edi + pop esi + pop ebx + pop ebp + ret +global _gcm_init_clmul +align 16 +_gcm_init_clmul: +L$_gcm_init_clmul_begin: + mov edx,DWORD [4+esp] + mov eax,DWORD [8+esp] + call L$010pic +L$010pic: + pop ecx + lea ecx,[(L$bswap-L$010pic)+ecx] + movdqu xmm2,[eax] + pshufd xmm2,xmm2,78 + pshufd xmm4,xmm2,255 + movdqa xmm3,xmm2 + psllq xmm2,1 + pxor xmm5,xmm5 + psrlq xmm3,63 + pcmpgtd xmm5,xmm4 + pslldq xmm3,8 + por xmm2,xmm3 + pand xmm5,[16+ecx] + pxor xmm2,xmm5 + movdqa xmm0,xmm2 + movdqa xmm1,xmm0 + pshufd xmm3,xmm0,78 + pshufd xmm4,xmm2,78 + pxor xmm3,xmm0 + pxor xmm4,xmm2 +db 102,15,58,68,194,0 +db 102,15,58,68,202,17 +db 102,15,58,68,220,0 + xorps xmm3,xmm0 + xorps xmm3,xmm1 + movdqa xmm4,xmm3 + psrldq xmm3,8 + pslldq xmm4,8 + pxor xmm1,xmm3 + pxor xmm0,xmm4 + movdqa xmm4,xmm0 + movdqa xmm3,xmm0 + psllq xmm0,5 + pxor xmm3,xmm0 + psllq xmm0,1 + pxor xmm0,xmm3 + psllq xmm0,57 + movdqa xmm3,xmm0 + pslldq xmm0,8 + psrldq xmm3,8 + pxor xmm0,xmm4 + pxor xmm1,xmm3 + movdqa xmm4,xmm0 + psrlq xmm0,1 + pxor xmm1,xmm4 + pxor xmm4,xmm0 + psrlq xmm0,5 + pxor xmm0,xmm4 + psrlq xmm0,1 + pxor xmm0,xmm1 + pshufd xmm3,xmm2,78 + pshufd xmm4,xmm0,78 + pxor xmm3,xmm2 + movdqu [edx],xmm2 + pxor xmm4,xmm0 + movdqu [16+edx],xmm0 +db 102,15,58,15,227,8 + movdqu [32+edx],xmm4 + ret +global _gcm_gmult_clmul +align 16 +_gcm_gmult_clmul: +L$_gcm_gmult_clmul_begin: + mov eax,DWORD [4+esp] + mov edx,DWORD [8+esp] + call L$011pic +L$011pic: + pop ecx + lea ecx,[(L$bswap-L$011pic)+ecx] + movdqu xmm0,[eax] + movdqa xmm5,[ecx] + movups xmm2,[edx] +db 102,15,56,0,197 + movups xmm4,[32+edx] + movdqa xmm1,xmm0 + pshufd xmm3,xmm0,78 + pxor xmm3,xmm0 +db 102,15,58,68,194,0 +db 102,15,58,68,202,17 +db 102,15,58,68,220,0 + xorps xmm3,xmm0 + xorps xmm3,xmm1 + movdqa xmm4,xmm3 + psrldq xmm3,8 + pslldq xmm4,8 + pxor xmm1,xmm3 + pxor xmm0,xmm4 + movdqa xmm4,xmm0 + movdqa xmm3,xmm0 + psllq xmm0,5 + pxor xmm3,xmm0 + psllq xmm0,1 + pxor xmm0,xmm3 + psllq xmm0,57 + movdqa xmm3,xmm0 + pslldq xmm0,8 + psrldq xmm3,8 + pxor xmm0,xmm4 + pxor xmm1,xmm3 + movdqa xmm4,xmm0 + psrlq xmm0,1 + pxor xmm1,xmm4 + pxor xmm4,xmm0 + psrlq xmm0,5 + pxor xmm0,xmm4 + psrlq xmm0,1 + pxor xmm0,xmm1 +db 102,15,56,0,197 + movdqu [eax],xmm0 + ret +global _gcm_ghash_clmul +align 16 +_gcm_ghash_clmul: +L$_gcm_ghash_clmul_begin: + push ebp + push ebx + push esi + push edi + mov eax,DWORD [20+esp] + mov edx,DWORD [24+esp] + mov esi,DWORD [28+esp] + mov ebx,DWORD [32+esp] + call L$012pic +L$012pic: + pop ecx + lea ecx,[(L$bswap-L$012pic)+ecx] + movdqu xmm0,[eax] + movdqa xmm5,[ecx] + movdqu xmm2,[edx] +db 102,15,56,0,197 + sub ebx,16 + jz NEAR L$013odd_tail + movdqu xmm3,[esi] + movdqu xmm6,[16+esi] +db 102,15,56,0,221 +db 102,15,56,0,245 + movdqu xmm5,[32+edx] + pxor xmm0,xmm3 + pshufd xmm3,xmm6,78 + movdqa xmm7,xmm6 + pxor xmm3,xmm6 + lea esi,[32+esi] +db 102,15,58,68,242,0 +db 102,15,58,68,250,17 +db 102,15,58,68,221,0 + movups xmm2,[16+edx] + nop + sub ebx,32 + jbe NEAR L$014even_tail + jmp NEAR L$015mod_loop +align 32 +L$015mod_loop: + pshufd xmm4,xmm0,78 + movdqa xmm1,xmm0 + pxor xmm4,xmm0 + nop +db 102,15,58,68,194,0 +db 102,15,58,68,202,17 +db 102,15,58,68,229,16 + movups xmm2,[edx] + xorps xmm0,xmm6 + movdqa xmm5,[ecx] + xorps xmm1,xmm7 + movdqu xmm7,[esi] + pxor xmm3,xmm0 + movdqu xmm6,[16+esi] + pxor xmm3,xmm1 +db 102,15,56,0,253 + pxor xmm4,xmm3 + movdqa xmm3,xmm4 + psrldq xmm4,8 + pslldq xmm3,8 + pxor xmm1,xmm4 + pxor xmm0,xmm3 +db 102,15,56,0,245 + pxor xmm1,xmm7 + movdqa xmm7,xmm6 + movdqa xmm4,xmm0 + movdqa xmm3,xmm0 + psllq xmm0,5 + pxor xmm3,xmm0 + psllq xmm0,1 + pxor xmm0,xmm3 +db 102,15,58,68,242,0 + movups xmm5,[32+edx] + psllq xmm0,57 + movdqa xmm3,xmm0 + pslldq xmm0,8 + psrldq xmm3,8 + pxor xmm0,xmm4 + pxor xmm1,xmm3 + pshufd xmm3,xmm7,78 + movdqa xmm4,xmm0 + psrlq xmm0,1 + pxor xmm3,xmm7 + pxor xmm1,xmm4 +db 102,15,58,68,250,17 + movups xmm2,[16+edx] + pxor xmm4,xmm0 + psrlq xmm0,5 + pxor xmm0,xmm4 + psrlq xmm0,1 + pxor xmm0,xmm1 +db 102,15,58,68,221,0 + lea esi,[32+esi] + sub ebx,32 + ja NEAR L$015mod_loop +L$014even_tail: + pshufd xmm4,xmm0,78 + movdqa xmm1,xmm0 + pxor xmm4,xmm0 +db 102,15,58,68,194,0 +db 102,15,58,68,202,17 +db 102,15,58,68,229,16 + movdqa xmm5,[ecx] + xorps xmm0,xmm6 + xorps xmm1,xmm7 + pxor xmm3,xmm0 + pxor xmm3,xmm1 + pxor xmm4,xmm3 + movdqa xmm3,xmm4 + psrldq xmm4,8 + pslldq xmm3,8 + pxor xmm1,xmm4 + pxor xmm0,xmm3 + movdqa xmm4,xmm0 + movdqa xmm3,xmm0 + psllq xmm0,5 + pxor xmm3,xmm0 + psllq xmm0,1 + pxor xmm0,xmm3 + psllq xmm0,57 + movdqa xmm3,xmm0 + pslldq xmm0,8 + psrldq xmm3,8 + pxor xmm0,xmm4 + pxor xmm1,xmm3 + movdqa xmm4,xmm0 + psrlq xmm0,1 + pxor xmm1,xmm4 + pxor xmm4,xmm0 + psrlq xmm0,5 + pxor xmm0,xmm4 + psrlq xmm0,1 + pxor xmm0,xmm1 + test ebx,ebx + jnz NEAR L$016done + movups xmm2,[edx] +L$013odd_tail: + movdqu xmm3,[esi] +db 102,15,56,0,221 + pxor xmm0,xmm3 + movdqa xmm1,xmm0 + pshufd xmm3,xmm0,78 + pshufd xmm4,xmm2,78 + pxor xmm3,xmm0 + pxor xmm4,xmm2 +db 102,15,58,68,194,0 +db 102,15,58,68,202,17 +db 102,15,58,68,220,0 + xorps xmm3,xmm0 + xorps xmm3,xmm1 + movdqa xmm4,xmm3 + psrldq xmm3,8 + pslldq xmm4,8 + pxor xmm1,xmm3 + pxor xmm0,xmm4 + movdqa xmm4,xmm0 + movdqa xmm3,xmm0 + psllq xmm0,5 + pxor xmm3,xmm0 + psllq xmm0,1 + pxor xmm0,xmm3 + psllq xmm0,57 + movdqa xmm3,xmm0 + pslldq xmm0,8 + psrldq xmm3,8 + pxor xmm0,xmm4 + pxor xmm1,xmm3 + movdqa xmm4,xmm0 + psrlq xmm0,1 + pxor xmm1,xmm4 + pxor xmm4,xmm0 + psrlq xmm0,5 + pxor xmm0,xmm4 + psrlq xmm0,1 + pxor xmm0,xmm1 +L$016done: +db 102,15,56,0,197 + movdqu [eax],xmm0 + pop edi + pop esi + pop ebx + pop ebp + ret +align 64 +L$bswap: +db 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +db 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194 +align 64 +L$rem_8bit: +dw 0,450,900,582,1800,1738,1164,1358 +dw 3600,4050,3476,3158,2328,2266,2716,2910 +dw 7200,7650,8100,7782,6952,6890,6316,6510 +dw 4656,5106,4532,4214,5432,5370,5820,6014 +dw 14400,14722,15300,14854,16200,16010,15564,15630 +dw 13904,14226,13780,13334,12632,12442,13020,13086 +dw 9312,9634,10212,9766,9064,8874,8428,8494 +dw 10864,11186,10740,10294,11640,11450,12028,12094 +dw 28800,28994,29444,29382,30600,30282,29708,30158 +dw 32400,32594,32020,31958,31128,30810,31260,31710 +dw 27808,28002,28452,28390,27560,27242,26668,27118 +dw 25264,25458,24884,24822,26040,25722,26172,26622 +dw 18624,18690,19268,19078,20424,19978,19532,19854 +dw 18128,18194,17748,17558,16856,16410,16988,17310 +dw 21728,21794,22372,22182,21480,21034,20588,20910 +dw 23280,23346,22900,22710,24056,23610,24188,24510 +dw 57600,57538,57988,58182,58888,59338,58764,58446 +dw 61200,61138,60564,60758,59416,59866,60316,59998 +dw 64800,64738,65188,65382,64040,64490,63916,63598 +dw 62256,62194,61620,61814,62520,62970,63420,63102 +dw 55616,55426,56004,56070,56904,57226,56780,56334 +dw 55120,54930,54484,54550,53336,53658,54236,53790 +dw 50528,50338,50916,50982,49768,50090,49644,49198 +dw 52080,51890,51444,51510,52344,52666,53244,52798 +dw 37248,36930,37380,37830,38536,38730,38156,38094 +dw 40848,40530,39956,40406,39064,39258,39708,39646 +dw 36256,35938,36388,36838,35496,35690,35116,35054 +dw 33712,33394,32820,33270,33976,34170,34620,34558 +dw 43456,43010,43588,43910,44744,44810,44364,44174 +dw 42960,42514,42068,42390,41176,41242,41820,41630 +dw 46560,46114,46692,47014,45800,45866,45420,45230 +dw 48112,47666,47220,47542,48376,48442,49020,48830 +align 64 +L$rem_4bit: +dd 0,0,0,471859200,0,943718400,0,610271232 +dd 0,1887436800,0,1822425088,0,1220542464,0,1423966208 +dd 0,3774873600,0,4246732800,0,3644850176,0,3311403008 +dd 0,2441084928,0,2376073216,0,2847932416,0,3051356160 +db 71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67 +db 82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112 +db 112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62 +db 0 diff --git a/third_party/boringssl/win-x86/crypto/rc4/rc4-586.asm b/third_party/boringssl/win-x86/crypto/rc4/rc4-586.asm new file mode 100644 index 00000000000..08cd9f6d70a --- /dev/null +++ b/third_party/boringssl/win-x86/crypto/rc4/rc4-586.asm @@ -0,0 +1,382 @@ +%ifidn __OUTPUT_FORMAT__,obj +section code use32 class=code align=64 +%elifidn __OUTPUT_FORMAT__,win32 +%ifdef __YASM_VERSION_ID__ +%if __YASM_VERSION_ID__ < 01010000h +%error yasm version 1.1.0 or later needed. +%endif +; Yasm automatically includes .00 and complains about redefining it. +; https://www.tortall.net/projects/yasm/manual/html/objfmt-win32-safeseh.html +%else +$@feat.00 equ 1 +%endif +section .text code align=64 +%else +section .text code +%endif +;extern _OPENSSL_ia32cap_P +global _asm_RC4 +align 16 +_asm_RC4: +L$_asm_RC4_begin: + push ebp + push ebx + push esi + push edi + mov edi,DWORD [20+esp] + mov edx,DWORD [24+esp] + mov esi,DWORD [28+esp] + mov ebp,DWORD [32+esp] + xor eax,eax + xor ebx,ebx + cmp edx,0 + je NEAR L$000abort + mov al,BYTE [edi] + mov bl,BYTE [4+edi] + add edi,8 + lea ecx,[edx*1+esi] + sub ebp,esi + mov DWORD [24+esp],ecx + inc al + cmp DWORD [256+edi],-1 + je NEAR L$001RC4_CHAR + mov ecx,DWORD [eax*4+edi] + and edx,-4 + jz NEAR L$002loop1 + mov DWORD [32+esp],ebp + test edx,-8 + jz NEAR L$003go4loop4 + lea ebp,[_OPENSSL_ia32cap_P] + bt DWORD [ebp],26 + jnc NEAR L$003go4loop4 + mov ebp,DWORD [32+esp] + and edx,-8 + lea edx,[edx*1+esi-8] + mov DWORD [edi-4],edx + add bl,cl + mov edx,DWORD [ebx*4+edi] + mov DWORD [ebx*4+edi],ecx + mov DWORD [eax*4+edi],edx + inc eax + add edx,ecx + movzx eax,al + movzx edx,dl + movq mm0,[esi] + mov ecx,DWORD [eax*4+edi] + movd mm2,DWORD [edx*4+edi] + jmp NEAR L$004loop_mmx_enter +align 16 +L$005loop_mmx: + add bl,cl + psllq mm1,56 + mov edx,DWORD [ebx*4+edi] + mov DWORD [ebx*4+edi],ecx + mov DWORD [eax*4+edi],edx + inc eax + add edx,ecx + movzx eax,al + movzx edx,dl + pxor mm2,mm1 + movq mm0,[esi] + movq [esi*1+ebp-8],mm2 + mov ecx,DWORD [eax*4+edi] + movd mm2,DWORD [edx*4+edi] +L$004loop_mmx_enter: + add bl,cl + mov edx,DWORD [ebx*4+edi] + mov DWORD [ebx*4+edi],ecx + mov DWORD [eax*4+edi],edx + inc eax + add edx,ecx + movzx eax,al + movzx edx,dl + pxor mm2,mm0 + mov ecx,DWORD [eax*4+edi] + movd mm1,DWORD [edx*4+edi] + add bl,cl + psllq mm1,8 + mov edx,DWORD [ebx*4+edi] + mov DWORD [ebx*4+edi],ecx + mov DWORD [eax*4+edi],edx + inc eax + add edx,ecx + movzx eax,al + movzx edx,dl + pxor mm2,mm1 + mov ecx,DWORD [eax*4+edi] + movd mm1,DWORD [edx*4+edi] + add bl,cl + psllq mm1,16 + mov edx,DWORD [ebx*4+edi] + mov DWORD [ebx*4+edi],ecx + mov DWORD [eax*4+edi],edx + inc eax + add edx,ecx + movzx eax,al + movzx edx,dl + pxor mm2,mm1 + mov ecx,DWORD [eax*4+edi] + movd mm1,DWORD [edx*4+edi] + add bl,cl + psllq mm1,24 + mov edx,DWORD [ebx*4+edi] + mov DWORD [ebx*4+edi],ecx + mov DWORD [eax*4+edi],edx + inc eax + add edx,ecx + movzx eax,al + movzx edx,dl + pxor mm2,mm1 + mov ecx,DWORD [eax*4+edi] + movd mm1,DWORD [edx*4+edi] + add bl,cl + psllq mm1,32 + mov edx,DWORD [ebx*4+edi] + mov DWORD [ebx*4+edi],ecx + mov DWORD [eax*4+edi],edx + inc eax + add edx,ecx + movzx eax,al + movzx edx,dl + pxor mm2,mm1 + mov ecx,DWORD [eax*4+edi] + movd mm1,DWORD [edx*4+edi] + add bl,cl + psllq mm1,40 + mov edx,DWORD [ebx*4+edi] + mov DWORD [ebx*4+edi],ecx + mov DWORD [eax*4+edi],edx + inc eax + add edx,ecx + movzx eax,al + movzx edx,dl + pxor mm2,mm1 + mov ecx,DWORD [eax*4+edi] + movd mm1,DWORD [edx*4+edi] + add bl,cl + psllq mm1,48 + mov edx,DWORD [ebx*4+edi] + mov DWORD [ebx*4+edi],ecx + mov DWORD [eax*4+edi],edx + inc eax + add edx,ecx + movzx eax,al + movzx edx,dl + pxor mm2,mm1 + mov ecx,DWORD [eax*4+edi] + movd mm1,DWORD [edx*4+edi] + mov edx,ebx + xor ebx,ebx + mov bl,dl + cmp esi,DWORD [edi-4] + lea esi,[8+esi] + jb NEAR L$005loop_mmx + psllq mm1,56 + pxor mm2,mm1 + movq [esi*1+ebp-8],mm2 + emms + cmp esi,DWORD [24+esp] + je NEAR L$006done + jmp NEAR L$002loop1 +align 16 +L$003go4loop4: + lea edx,[edx*1+esi-4] + mov DWORD [28+esp],edx +L$007loop4: + add bl,cl + mov edx,DWORD [ebx*4+edi] + mov DWORD [ebx*4+edi],ecx + mov DWORD [eax*4+edi],edx + add edx,ecx + inc al + and edx,255 + mov ecx,DWORD [eax*4+edi] + mov ebp,DWORD [edx*4+edi] + add bl,cl + mov edx,DWORD [ebx*4+edi] + mov DWORD [ebx*4+edi],ecx + mov DWORD [eax*4+edi],edx + add edx,ecx + inc al + and edx,255 + ror ebp,8 + mov ecx,DWORD [eax*4+edi] + or ebp,DWORD [edx*4+edi] + add bl,cl + mov edx,DWORD [ebx*4+edi] + mov DWORD [ebx*4+edi],ecx + mov DWORD [eax*4+edi],edx + add edx,ecx + inc al + and edx,255 + ror ebp,8 + mov ecx,DWORD [eax*4+edi] + or ebp,DWORD [edx*4+edi] + add bl,cl + mov edx,DWORD [ebx*4+edi] + mov DWORD [ebx*4+edi],ecx + mov DWORD [eax*4+edi],edx + add edx,ecx + inc al + and edx,255 + ror ebp,8 + mov ecx,DWORD [32+esp] + or ebp,DWORD [edx*4+edi] + ror ebp,8 + xor ebp,DWORD [esi] + cmp esi,DWORD [28+esp] + mov DWORD [esi*1+ecx],ebp + lea esi,[4+esi] + mov ecx,DWORD [eax*4+edi] + jb NEAR L$007loop4 + cmp esi,DWORD [24+esp] + je NEAR L$006done + mov ebp,DWORD [32+esp] +align 16 +L$002loop1: + add bl,cl + mov edx,DWORD [ebx*4+edi] + mov DWORD [ebx*4+edi],ecx + mov DWORD [eax*4+edi],edx + add edx,ecx + inc al + and edx,255 + mov edx,DWORD [edx*4+edi] + xor dl,BYTE [esi] + lea esi,[1+esi] + mov ecx,DWORD [eax*4+edi] + cmp esi,DWORD [24+esp] + mov BYTE [esi*1+ebp-1],dl + jb NEAR L$002loop1 + jmp NEAR L$006done +align 16 +L$001RC4_CHAR: + movzx ecx,BYTE [eax*1+edi] +L$008cloop1: + add bl,cl + movzx edx,BYTE [ebx*1+edi] + mov BYTE [ebx*1+edi],cl + mov BYTE [eax*1+edi],dl + add dl,cl + movzx edx,BYTE [edx*1+edi] + add al,1 + xor dl,BYTE [esi] + lea esi,[1+esi] + movzx ecx,BYTE [eax*1+edi] + cmp esi,DWORD [24+esp] + mov BYTE [esi*1+ebp-1],dl + jb NEAR L$008cloop1 +L$006done: + dec al + mov DWORD [edi-4],ebx + mov BYTE [edi-8],al +L$000abort: + pop edi + pop esi + pop ebx + pop ebp + ret +global _asm_RC4_set_key +align 16 +_asm_RC4_set_key: +L$_asm_RC4_set_key_begin: + push ebp + push ebx + push esi + push edi + mov edi,DWORD [20+esp] + mov ebp,DWORD [24+esp] + mov esi,DWORD [28+esp] + lea edx,[_OPENSSL_ia32cap_P] + lea edi,[8+edi] + lea esi,[ebp*1+esi] + neg ebp + xor eax,eax + mov DWORD [edi-4],ebp + bt DWORD [edx],20 + jc NEAR L$009c1stloop +align 16 +L$010w1stloop: + mov DWORD [eax*4+edi],eax + add al,1 + jnc NEAR L$010w1stloop + xor ecx,ecx + xor edx,edx +align 16 +L$011w2ndloop: + mov eax,DWORD [ecx*4+edi] + add dl,BYTE [ebp*1+esi] + add dl,al + add ebp,1 + mov ebx,DWORD [edx*4+edi] + jnz NEAR L$012wnowrap + mov ebp,DWORD [edi-4] +L$012wnowrap: + mov DWORD [edx*4+edi],eax + mov DWORD [ecx*4+edi],ebx + add cl,1 + jnc NEAR L$011w2ndloop + jmp NEAR L$013exit +align 16 +L$009c1stloop: + mov BYTE [eax*1+edi],al + add al,1 + jnc NEAR L$009c1stloop + xor ecx,ecx + xor edx,edx + xor ebx,ebx +align 16 +L$014c2ndloop: + mov al,BYTE [ecx*1+edi] + add dl,BYTE [ebp*1+esi] + add dl,al + add ebp,1 + mov bl,BYTE [edx*1+edi] + jnz NEAR L$015cnowrap + mov ebp,DWORD [edi-4] +L$015cnowrap: + mov BYTE [edx*1+edi],al + mov BYTE [ecx*1+edi],bl + add cl,1 + jnc NEAR L$014c2ndloop + mov DWORD [256+edi],-1 +L$013exit: + xor eax,eax + mov DWORD [edi-8],eax + mov DWORD [edi-4],eax + pop edi + pop esi + pop ebx + pop ebp + ret +global _RC4_options +align 16 +_RC4_options: +L$_RC4_options_begin: + call L$016pic_point +L$016pic_point: + pop eax + lea eax,[(L$017opts-L$016pic_point)+eax] + lea edx,[_OPENSSL_ia32cap_P] + mov edx,DWORD [edx] + bt edx,20 + jc NEAR L$0181xchar + bt edx,26 + jnc NEAR L$019ret + add eax,25 + ret +L$0181xchar: + add eax,12 +L$019ret: + ret +align 64 +L$017opts: +db 114,99,52,40,52,120,44,105,110,116,41,0 +db 114,99,52,40,49,120,44,99,104,97,114,41,0 +db 114,99,52,40,56,120,44,109,109,120,41,0 +db 82,67,52,32,102,111,114,32,120,56,54,44,32,67,82,89 +db 80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114 +db 111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +align 64 +segment .bss +common _OPENSSL_ia32cap_P 16 diff --git a/third_party/boringssl/win-x86/crypto/sha/sha1-586.asm b/third_party/boringssl/win-x86/crypto/sha/sha1-586.asm new file mode 100644 index 00000000000..e24449d2006 --- /dev/null +++ b/third_party/boringssl/win-x86/crypto/sha/sha1-586.asm @@ -0,0 +1,2805 @@ +%ifidn __OUTPUT_FORMAT__,obj +section code use32 class=code align=64 +%elifidn __OUTPUT_FORMAT__,win32 +%ifdef __YASM_VERSION_ID__ +%if __YASM_VERSION_ID__ < 01010000h +%error yasm version 1.1.0 or later needed. +%endif +; Yasm automatically includes .00 and complains about redefining it. +; https://www.tortall.net/projects/yasm/manual/html/objfmt-win32-safeseh.html +%else +$@feat.00 equ 1 +%endif +section .text code align=64 +%else +section .text code +%endif +;extern _OPENSSL_ia32cap_P +global _sha1_block_data_order +align 16 +_sha1_block_data_order: +L$_sha1_block_data_order_begin: + push ebp + push ebx + push esi + push edi + call L$000pic_point +L$000pic_point: + pop ebp + lea esi,[_OPENSSL_ia32cap_P] + lea ebp,[(L$K_XX_XX-L$000pic_point)+ebp] + mov eax,DWORD [esi] + mov edx,DWORD [4+esi] + test edx,512 + jz NEAR L$001x86 + mov ecx,DWORD [8+esi] + test eax,16777216 + jz NEAR L$001x86 + test ecx,536870912 + jnz NEAR L$shaext_shortcut + jmp NEAR L$ssse3_shortcut +align 16 +L$001x86: + mov ebp,DWORD [20+esp] + mov esi,DWORD [24+esp] + mov eax,DWORD [28+esp] + sub esp,76 + shl eax,6 + add eax,esi + mov DWORD [104+esp],eax + mov edi,DWORD [16+ebp] + jmp NEAR L$002loop +align 16 +L$002loop: + mov eax,DWORD [esi] + mov ebx,DWORD [4+esi] + mov ecx,DWORD [8+esi] + mov edx,DWORD [12+esi] + bswap eax + bswap ebx + bswap ecx + bswap edx + mov DWORD [esp],eax + mov DWORD [4+esp],ebx + mov DWORD [8+esp],ecx + mov DWORD [12+esp],edx + mov eax,DWORD [16+esi] + mov ebx,DWORD [20+esi] + mov ecx,DWORD [24+esi] + mov edx,DWORD [28+esi] + bswap eax + bswap ebx + bswap ecx + bswap edx + mov DWORD [16+esp],eax + mov DWORD [20+esp],ebx + mov DWORD [24+esp],ecx + mov DWORD [28+esp],edx + mov eax,DWORD [32+esi] + mov ebx,DWORD [36+esi] + mov ecx,DWORD [40+esi] + mov edx,DWORD [44+esi] + bswap eax + bswap ebx + bswap ecx + bswap edx + mov DWORD [32+esp],eax + mov DWORD [36+esp],ebx + mov DWORD [40+esp],ecx + mov DWORD [44+esp],edx + mov eax,DWORD [48+esi] + mov ebx,DWORD [52+esi] + mov ecx,DWORD [56+esi] + mov edx,DWORD [60+esi] + bswap eax + bswap ebx + bswap ecx + bswap edx + mov DWORD [48+esp],eax + mov DWORD [52+esp],ebx + mov DWORD [56+esp],ecx + mov DWORD [60+esp],edx + mov DWORD [100+esp],esi + mov eax,DWORD [ebp] + mov ebx,DWORD [4+ebp] + mov ecx,DWORD [8+ebp] + mov edx,DWORD [12+ebp] + ; 00_15 0 + mov esi,ecx + mov ebp,eax + rol ebp,5 + xor esi,edx + add ebp,edi + mov edi,DWORD [esp] + and esi,ebx + ror ebx,2 + xor esi,edx + lea ebp,[1518500249+edi*1+ebp] + add ebp,esi + ; 00_15 1 + mov edi,ebx + mov esi,ebp + rol ebp,5 + xor edi,ecx + add ebp,edx + mov edx,DWORD [4+esp] + and edi,eax + ror eax,2 + xor edi,ecx + lea ebp,[1518500249+edx*1+ebp] + add ebp,edi + ; 00_15 2 + mov edx,eax + mov edi,ebp + rol ebp,5 + xor edx,ebx + add ebp,ecx + mov ecx,DWORD [8+esp] + and edx,esi + ror esi,2 + xor edx,ebx + lea ebp,[1518500249+ecx*1+ebp] + add ebp,edx + ; 00_15 3 + mov ecx,esi + mov edx,ebp + rol ebp,5 + xor ecx,eax + add ebp,ebx + mov ebx,DWORD [12+esp] + and ecx,edi + ror edi,2 + xor ecx,eax + lea ebp,[1518500249+ebx*1+ebp] + add ebp,ecx + ; 00_15 4 + mov ebx,edi + mov ecx,ebp + rol ebp,5 + xor ebx,esi + add ebp,eax + mov eax,DWORD [16+esp] + and ebx,edx + ror edx,2 + xor ebx,esi + lea ebp,[1518500249+eax*1+ebp] + add ebp,ebx + ; 00_15 5 + mov eax,edx + mov ebx,ebp + rol ebp,5 + xor eax,edi + add ebp,esi + mov esi,DWORD [20+esp] + and eax,ecx + ror ecx,2 + xor eax,edi + lea ebp,[1518500249+esi*1+ebp] + add ebp,eax + ; 00_15 6 + mov esi,ecx + mov eax,ebp + rol ebp,5 + xor esi,edx + add ebp,edi + mov edi,DWORD [24+esp] + and esi,ebx + ror ebx,2 + xor esi,edx + lea ebp,[1518500249+edi*1+ebp] + add ebp,esi + ; 00_15 7 + mov edi,ebx + mov esi,ebp + rol ebp,5 + xor edi,ecx + add ebp,edx + mov edx,DWORD [28+esp] + and edi,eax + ror eax,2 + xor edi,ecx + lea ebp,[1518500249+edx*1+ebp] + add ebp,edi + ; 00_15 8 + mov edx,eax + mov edi,ebp + rol ebp,5 + xor edx,ebx + add ebp,ecx + mov ecx,DWORD [32+esp] + and edx,esi + ror esi,2 + xor edx,ebx + lea ebp,[1518500249+ecx*1+ebp] + add ebp,edx + ; 00_15 9 + mov ecx,esi + mov edx,ebp + rol ebp,5 + xor ecx,eax + add ebp,ebx + mov ebx,DWORD [36+esp] + and ecx,edi + ror edi,2 + xor ecx,eax + lea ebp,[1518500249+ebx*1+ebp] + add ebp,ecx + ; 00_15 10 + mov ebx,edi + mov ecx,ebp + rol ebp,5 + xor ebx,esi + add ebp,eax + mov eax,DWORD [40+esp] + and ebx,edx + ror edx,2 + xor ebx,esi + lea ebp,[1518500249+eax*1+ebp] + add ebp,ebx + ; 00_15 11 + mov eax,edx + mov ebx,ebp + rol ebp,5 + xor eax,edi + add ebp,esi + mov esi,DWORD [44+esp] + and eax,ecx + ror ecx,2 + xor eax,edi + lea ebp,[1518500249+esi*1+ebp] + add ebp,eax + ; 00_15 12 + mov esi,ecx + mov eax,ebp + rol ebp,5 + xor esi,edx + add ebp,edi + mov edi,DWORD [48+esp] + and esi,ebx + ror ebx,2 + xor esi,edx + lea ebp,[1518500249+edi*1+ebp] + add ebp,esi + ; 00_15 13 + mov edi,ebx + mov esi,ebp + rol ebp,5 + xor edi,ecx + add ebp,edx + mov edx,DWORD [52+esp] + and edi,eax + ror eax,2 + xor edi,ecx + lea ebp,[1518500249+edx*1+ebp] + add ebp,edi + ; 00_15 14 + mov edx,eax + mov edi,ebp + rol ebp,5 + xor edx,ebx + add ebp,ecx + mov ecx,DWORD [56+esp] + and edx,esi + ror esi,2 + xor edx,ebx + lea ebp,[1518500249+ecx*1+ebp] + add ebp,edx + ; 00_15 15 + mov ecx,esi + mov edx,ebp + rol ebp,5 + xor ecx,eax + add ebp,ebx + mov ebx,DWORD [60+esp] + and ecx,edi + ror edi,2 + xor ecx,eax + lea ebp,[1518500249+ebx*1+ebp] + mov ebx,DWORD [esp] + add ecx,ebp + ; 16_19 16 + mov ebp,edi + xor ebx,DWORD [8+esp] + xor ebp,esi + xor ebx,DWORD [32+esp] + and ebp,edx + xor ebx,DWORD [52+esp] + rol ebx,1 + xor ebp,esi + add eax,ebp + mov ebp,ecx + ror edx,2 + mov DWORD [esp],ebx + rol ebp,5 + lea ebx,[1518500249+eax*1+ebx] + mov eax,DWORD [4+esp] + add ebx,ebp + ; 16_19 17 + mov ebp,edx + xor eax,DWORD [12+esp] + xor ebp,edi + xor eax,DWORD [36+esp] + and ebp,ecx + xor eax,DWORD [56+esp] + rol eax,1 + xor ebp,edi + add esi,ebp + mov ebp,ebx + ror ecx,2 + mov DWORD [4+esp],eax + rol ebp,5 + lea eax,[1518500249+esi*1+eax] + mov esi,DWORD [8+esp] + add eax,ebp + ; 16_19 18 + mov ebp,ecx + xor esi,DWORD [16+esp] + xor ebp,edx + xor esi,DWORD [40+esp] + and ebp,ebx + xor esi,DWORD [60+esp] + rol esi,1 + xor ebp,edx + add edi,ebp + mov ebp,eax + ror ebx,2 + mov DWORD [8+esp],esi + rol ebp,5 + lea esi,[1518500249+edi*1+esi] + mov edi,DWORD [12+esp] + add esi,ebp + ; 16_19 19 + mov ebp,ebx + xor edi,DWORD [20+esp] + xor ebp,ecx + xor edi,DWORD [44+esp] + and ebp,eax + xor edi,DWORD [esp] + rol edi,1 + xor ebp,ecx + add edx,ebp + mov ebp,esi + ror eax,2 + mov DWORD [12+esp],edi + rol ebp,5 + lea edi,[1518500249+edx*1+edi] + mov edx,DWORD [16+esp] + add edi,ebp + ; 20_39 20 + mov ebp,esi + xor edx,DWORD [24+esp] + xor ebp,eax + xor edx,DWORD [48+esp] + xor ebp,ebx + xor edx,DWORD [4+esp] + rol edx,1 + add ecx,ebp + ror esi,2 + mov ebp,edi + rol ebp,5 + mov DWORD [16+esp],edx + lea edx,[1859775393+ecx*1+edx] + mov ecx,DWORD [20+esp] + add edx,ebp + ; 20_39 21 + mov ebp,edi + xor ecx,DWORD [28+esp] + xor ebp,esi + xor ecx,DWORD [52+esp] + xor ebp,eax + xor ecx,DWORD [8+esp] + rol ecx,1 + add ebx,ebp + ror edi,2 + mov ebp,edx + rol ebp,5 + mov DWORD [20+esp],ecx + lea ecx,[1859775393+ebx*1+ecx] + mov ebx,DWORD [24+esp] + add ecx,ebp + ; 20_39 22 + mov ebp,edx + xor ebx,DWORD [32+esp] + xor ebp,edi + xor ebx,DWORD [56+esp] + xor ebp,esi + xor ebx,DWORD [12+esp] + rol ebx,1 + add eax,ebp + ror edx,2 + mov ebp,ecx + rol ebp,5 + mov DWORD [24+esp],ebx + lea ebx,[1859775393+eax*1+ebx] + mov eax,DWORD [28+esp] + add ebx,ebp + ; 20_39 23 + mov ebp,ecx + xor eax,DWORD [36+esp] + xor ebp,edx + xor eax,DWORD [60+esp] + xor ebp,edi + xor eax,DWORD [16+esp] + rol eax,1 + add esi,ebp + ror ecx,2 + mov ebp,ebx + rol ebp,5 + mov DWORD [28+esp],eax + lea eax,[1859775393+esi*1+eax] + mov esi,DWORD [32+esp] + add eax,ebp + ; 20_39 24 + mov ebp,ebx + xor esi,DWORD [40+esp] + xor ebp,ecx + xor esi,DWORD [esp] + xor ebp,edx + xor esi,DWORD [20+esp] + rol esi,1 + add edi,ebp + ror ebx,2 + mov ebp,eax + rol ebp,5 + mov DWORD [32+esp],esi + lea esi,[1859775393+edi*1+esi] + mov edi,DWORD [36+esp] + add esi,ebp + ; 20_39 25 + mov ebp,eax + xor edi,DWORD [44+esp] + xor ebp,ebx + xor edi,DWORD [4+esp] + xor ebp,ecx + xor edi,DWORD [24+esp] + rol edi,1 + add edx,ebp + ror eax,2 + mov ebp,esi + rol ebp,5 + mov DWORD [36+esp],edi + lea edi,[1859775393+edx*1+edi] + mov edx,DWORD [40+esp] + add edi,ebp + ; 20_39 26 + mov ebp,esi + xor edx,DWORD [48+esp] + xor ebp,eax + xor edx,DWORD [8+esp] + xor ebp,ebx + xor edx,DWORD [28+esp] + rol edx,1 + add ecx,ebp + ror esi,2 + mov ebp,edi + rol ebp,5 + mov DWORD [40+esp],edx + lea edx,[1859775393+ecx*1+edx] + mov ecx,DWORD [44+esp] + add edx,ebp + ; 20_39 27 + mov ebp,edi + xor ecx,DWORD [52+esp] + xor ebp,esi + xor ecx,DWORD [12+esp] + xor ebp,eax + xor ecx,DWORD [32+esp] + rol ecx,1 + add ebx,ebp + ror edi,2 + mov ebp,edx + rol ebp,5 + mov DWORD [44+esp],ecx + lea ecx,[1859775393+ebx*1+ecx] + mov ebx,DWORD [48+esp] + add ecx,ebp + ; 20_39 28 + mov ebp,edx + xor ebx,DWORD [56+esp] + xor ebp,edi + xor ebx,DWORD [16+esp] + xor ebp,esi + xor ebx,DWORD [36+esp] + rol ebx,1 + add eax,ebp + ror edx,2 + mov ebp,ecx + rol ebp,5 + mov DWORD [48+esp],ebx + lea ebx,[1859775393+eax*1+ebx] + mov eax,DWORD [52+esp] + add ebx,ebp + ; 20_39 29 + mov ebp,ecx + xor eax,DWORD [60+esp] + xor ebp,edx + xor eax,DWORD [20+esp] + xor ebp,edi + xor eax,DWORD [40+esp] + rol eax,1 + add esi,ebp + ror ecx,2 + mov ebp,ebx + rol ebp,5 + mov DWORD [52+esp],eax + lea eax,[1859775393+esi*1+eax] + mov esi,DWORD [56+esp] + add eax,ebp + ; 20_39 30 + mov ebp,ebx + xor esi,DWORD [esp] + xor ebp,ecx + xor esi,DWORD [24+esp] + xor ebp,edx + xor esi,DWORD [44+esp] + rol esi,1 + add edi,ebp + ror ebx,2 + mov ebp,eax + rol ebp,5 + mov DWORD [56+esp],esi + lea esi,[1859775393+edi*1+esi] + mov edi,DWORD [60+esp] + add esi,ebp + ; 20_39 31 + mov ebp,eax + xor edi,DWORD [4+esp] + xor ebp,ebx + xor edi,DWORD [28+esp] + xor ebp,ecx + xor edi,DWORD [48+esp] + rol edi,1 + add edx,ebp + ror eax,2 + mov ebp,esi + rol ebp,5 + mov DWORD [60+esp],edi + lea edi,[1859775393+edx*1+edi] + mov edx,DWORD [esp] + add edi,ebp + ; 20_39 32 + mov ebp,esi + xor edx,DWORD [8+esp] + xor ebp,eax + xor edx,DWORD [32+esp] + xor ebp,ebx + xor edx,DWORD [52+esp] + rol edx,1 + add ecx,ebp + ror esi,2 + mov ebp,edi + rol ebp,5 + mov DWORD [esp],edx + lea edx,[1859775393+ecx*1+edx] + mov ecx,DWORD [4+esp] + add edx,ebp + ; 20_39 33 + mov ebp,edi + xor ecx,DWORD [12+esp] + xor ebp,esi + xor ecx,DWORD [36+esp] + xor ebp,eax + xor ecx,DWORD [56+esp] + rol ecx,1 + add ebx,ebp + ror edi,2 + mov ebp,edx + rol ebp,5 + mov DWORD [4+esp],ecx + lea ecx,[1859775393+ebx*1+ecx] + mov ebx,DWORD [8+esp] + add ecx,ebp + ; 20_39 34 + mov ebp,edx + xor ebx,DWORD [16+esp] + xor ebp,edi + xor ebx,DWORD [40+esp] + xor ebp,esi + xor ebx,DWORD [60+esp] + rol ebx,1 + add eax,ebp + ror edx,2 + mov ebp,ecx + rol ebp,5 + mov DWORD [8+esp],ebx + lea ebx,[1859775393+eax*1+ebx] + mov eax,DWORD [12+esp] + add ebx,ebp + ; 20_39 35 + mov ebp,ecx + xor eax,DWORD [20+esp] + xor ebp,edx + xor eax,DWORD [44+esp] + xor ebp,edi + xor eax,DWORD [esp] + rol eax,1 + add esi,ebp + ror ecx,2 + mov ebp,ebx + rol ebp,5 + mov DWORD [12+esp],eax + lea eax,[1859775393+esi*1+eax] + mov esi,DWORD [16+esp] + add eax,ebp + ; 20_39 36 + mov ebp,ebx + xor esi,DWORD [24+esp] + xor ebp,ecx + xor esi,DWORD [48+esp] + xor ebp,edx + xor esi,DWORD [4+esp] + rol esi,1 + add edi,ebp + ror ebx,2 + mov ebp,eax + rol ebp,5 + mov DWORD [16+esp],esi + lea esi,[1859775393+edi*1+esi] + mov edi,DWORD [20+esp] + add esi,ebp + ; 20_39 37 + mov ebp,eax + xor edi,DWORD [28+esp] + xor ebp,ebx + xor edi,DWORD [52+esp] + xor ebp,ecx + xor edi,DWORD [8+esp] + rol edi,1 + add edx,ebp + ror eax,2 + mov ebp,esi + rol ebp,5 + mov DWORD [20+esp],edi + lea edi,[1859775393+edx*1+edi] + mov edx,DWORD [24+esp] + add edi,ebp + ; 20_39 38 + mov ebp,esi + xor edx,DWORD [32+esp] + xor ebp,eax + xor edx,DWORD [56+esp] + xor ebp,ebx + xor edx,DWORD [12+esp] + rol edx,1 + add ecx,ebp + ror esi,2 + mov ebp,edi + rol ebp,5 + mov DWORD [24+esp],edx + lea edx,[1859775393+ecx*1+edx] + mov ecx,DWORD [28+esp] + add edx,ebp + ; 20_39 39 + mov ebp,edi + xor ecx,DWORD [36+esp] + xor ebp,esi + xor ecx,DWORD [60+esp] + xor ebp,eax + xor ecx,DWORD [16+esp] + rol ecx,1 + add ebx,ebp + ror edi,2 + mov ebp,edx + rol ebp,5 + mov DWORD [28+esp],ecx + lea ecx,[1859775393+ebx*1+ecx] + mov ebx,DWORD [32+esp] + add ecx,ebp + ; 40_59 40 + mov ebp,edi + xor ebx,DWORD [40+esp] + xor ebp,esi + xor ebx,DWORD [esp] + and ebp,edx + xor ebx,DWORD [20+esp] + rol ebx,1 + add ebp,eax + ror edx,2 + mov eax,ecx + rol eax,5 + mov DWORD [32+esp],ebx + lea ebx,[2400959708+ebp*1+ebx] + mov ebp,edi + add ebx,eax + and ebp,esi + mov eax,DWORD [36+esp] + add ebx,ebp + ; 40_59 41 + mov ebp,edx + xor eax,DWORD [44+esp] + xor ebp,edi + xor eax,DWORD [4+esp] + and ebp,ecx + xor eax,DWORD [24+esp] + rol eax,1 + add ebp,esi + ror ecx,2 + mov esi,ebx + rol esi,5 + mov DWORD [36+esp],eax + lea eax,[2400959708+ebp*1+eax] + mov ebp,edx + add eax,esi + and ebp,edi + mov esi,DWORD [40+esp] + add eax,ebp + ; 40_59 42 + mov ebp,ecx + xor esi,DWORD [48+esp] + xor ebp,edx + xor esi,DWORD [8+esp] + and ebp,ebx + xor esi,DWORD [28+esp] + rol esi,1 + add ebp,edi + ror ebx,2 + mov edi,eax + rol edi,5 + mov DWORD [40+esp],esi + lea esi,[2400959708+ebp*1+esi] + mov ebp,ecx + add esi,edi + and ebp,edx + mov edi,DWORD [44+esp] + add esi,ebp + ; 40_59 43 + mov ebp,ebx + xor edi,DWORD [52+esp] + xor ebp,ecx + xor edi,DWORD [12+esp] + and ebp,eax + xor edi,DWORD [32+esp] + rol edi,1 + add ebp,edx + ror eax,2 + mov edx,esi + rol edx,5 + mov DWORD [44+esp],edi + lea edi,[2400959708+ebp*1+edi] + mov ebp,ebx + add edi,edx + and ebp,ecx + mov edx,DWORD [48+esp] + add edi,ebp + ; 40_59 44 + mov ebp,eax + xor edx,DWORD [56+esp] + xor ebp,ebx + xor edx,DWORD [16+esp] + and ebp,esi + xor edx,DWORD [36+esp] + rol edx,1 + add ebp,ecx + ror esi,2 + mov ecx,edi + rol ecx,5 + mov DWORD [48+esp],edx + lea edx,[2400959708+ebp*1+edx] + mov ebp,eax + add edx,ecx + and ebp,ebx + mov ecx,DWORD [52+esp] + add edx,ebp + ; 40_59 45 + mov ebp,esi + xor ecx,DWORD [60+esp] + xor ebp,eax + xor ecx,DWORD [20+esp] + and ebp,edi + xor ecx,DWORD [40+esp] + rol ecx,1 + add ebp,ebx + ror edi,2 + mov ebx,edx + rol ebx,5 + mov DWORD [52+esp],ecx + lea ecx,[2400959708+ebp*1+ecx] + mov ebp,esi + add ecx,ebx + and ebp,eax + mov ebx,DWORD [56+esp] + add ecx,ebp + ; 40_59 46 + mov ebp,edi + xor ebx,DWORD [esp] + xor ebp,esi + xor ebx,DWORD [24+esp] + and ebp,edx + xor ebx,DWORD [44+esp] + rol ebx,1 + add ebp,eax + ror edx,2 + mov eax,ecx + rol eax,5 + mov DWORD [56+esp],ebx + lea ebx,[2400959708+ebp*1+ebx] + mov ebp,edi + add ebx,eax + and ebp,esi + mov eax,DWORD [60+esp] + add ebx,ebp + ; 40_59 47 + mov ebp,edx + xor eax,DWORD [4+esp] + xor ebp,edi + xor eax,DWORD [28+esp] + and ebp,ecx + xor eax,DWORD [48+esp] + rol eax,1 + add ebp,esi + ror ecx,2 + mov esi,ebx + rol esi,5 + mov DWORD [60+esp],eax + lea eax,[2400959708+ebp*1+eax] + mov ebp,edx + add eax,esi + and ebp,edi + mov esi,DWORD [esp] + add eax,ebp + ; 40_59 48 + mov ebp,ecx + xor esi,DWORD [8+esp] + xor ebp,edx + xor esi,DWORD [32+esp] + and ebp,ebx + xor esi,DWORD [52+esp] + rol esi,1 + add ebp,edi + ror ebx,2 + mov edi,eax + rol edi,5 + mov DWORD [esp],esi + lea esi,[2400959708+ebp*1+esi] + mov ebp,ecx + add esi,edi + and ebp,edx + mov edi,DWORD [4+esp] + add esi,ebp + ; 40_59 49 + mov ebp,ebx + xor edi,DWORD [12+esp] + xor ebp,ecx + xor edi,DWORD [36+esp] + and ebp,eax + xor edi,DWORD [56+esp] + rol edi,1 + add ebp,edx + ror eax,2 + mov edx,esi + rol edx,5 + mov DWORD [4+esp],edi + lea edi,[2400959708+ebp*1+edi] + mov ebp,ebx + add edi,edx + and ebp,ecx + mov edx,DWORD [8+esp] + add edi,ebp + ; 40_59 50 + mov ebp,eax + xor edx,DWORD [16+esp] + xor ebp,ebx + xor edx,DWORD [40+esp] + and ebp,esi + xor edx,DWORD [60+esp] + rol edx,1 + add ebp,ecx + ror esi,2 + mov ecx,edi + rol ecx,5 + mov DWORD [8+esp],edx + lea edx,[2400959708+ebp*1+edx] + mov ebp,eax + add edx,ecx + and ebp,ebx + mov ecx,DWORD [12+esp] + add edx,ebp + ; 40_59 51 + mov ebp,esi + xor ecx,DWORD [20+esp] + xor ebp,eax + xor ecx,DWORD [44+esp] + and ebp,edi + xor ecx,DWORD [esp] + rol ecx,1 + add ebp,ebx + ror edi,2 + mov ebx,edx + rol ebx,5 + mov DWORD [12+esp],ecx + lea ecx,[2400959708+ebp*1+ecx] + mov ebp,esi + add ecx,ebx + and ebp,eax + mov ebx,DWORD [16+esp] + add ecx,ebp + ; 40_59 52 + mov ebp,edi + xor ebx,DWORD [24+esp] + xor ebp,esi + xor ebx,DWORD [48+esp] + and ebp,edx + xor ebx,DWORD [4+esp] + rol ebx,1 + add ebp,eax + ror edx,2 + mov eax,ecx + rol eax,5 + mov DWORD [16+esp],ebx + lea ebx,[2400959708+ebp*1+ebx] + mov ebp,edi + add ebx,eax + and ebp,esi + mov eax,DWORD [20+esp] + add ebx,ebp + ; 40_59 53 + mov ebp,edx + xor eax,DWORD [28+esp] + xor ebp,edi + xor eax,DWORD [52+esp] + and ebp,ecx + xor eax,DWORD [8+esp] + rol eax,1 + add ebp,esi + ror ecx,2 + mov esi,ebx + rol esi,5 + mov DWORD [20+esp],eax + lea eax,[2400959708+ebp*1+eax] + mov ebp,edx + add eax,esi + and ebp,edi + mov esi,DWORD [24+esp] + add eax,ebp + ; 40_59 54 + mov ebp,ecx + xor esi,DWORD [32+esp] + xor ebp,edx + xor esi,DWORD [56+esp] + and ebp,ebx + xor esi,DWORD [12+esp] + rol esi,1 + add ebp,edi + ror ebx,2 + mov edi,eax + rol edi,5 + mov DWORD [24+esp],esi + lea esi,[2400959708+ebp*1+esi] + mov ebp,ecx + add esi,edi + and ebp,edx + mov edi,DWORD [28+esp] + add esi,ebp + ; 40_59 55 + mov ebp,ebx + xor edi,DWORD [36+esp] + xor ebp,ecx + xor edi,DWORD [60+esp] + and ebp,eax + xor edi,DWORD [16+esp] + rol edi,1 + add ebp,edx + ror eax,2 + mov edx,esi + rol edx,5 + mov DWORD [28+esp],edi + lea edi,[2400959708+ebp*1+edi] + mov ebp,ebx + add edi,edx + and ebp,ecx + mov edx,DWORD [32+esp] + add edi,ebp + ; 40_59 56 + mov ebp,eax + xor edx,DWORD [40+esp] + xor ebp,ebx + xor edx,DWORD [esp] + and ebp,esi + xor edx,DWORD [20+esp] + rol edx,1 + add ebp,ecx + ror esi,2 + mov ecx,edi + rol ecx,5 + mov DWORD [32+esp],edx + lea edx,[2400959708+ebp*1+edx] + mov ebp,eax + add edx,ecx + and ebp,ebx + mov ecx,DWORD [36+esp] + add edx,ebp + ; 40_59 57 + mov ebp,esi + xor ecx,DWORD [44+esp] + xor ebp,eax + xor ecx,DWORD [4+esp] + and ebp,edi + xor ecx,DWORD [24+esp] + rol ecx,1 + add ebp,ebx + ror edi,2 + mov ebx,edx + rol ebx,5 + mov DWORD [36+esp],ecx + lea ecx,[2400959708+ebp*1+ecx] + mov ebp,esi + add ecx,ebx + and ebp,eax + mov ebx,DWORD [40+esp] + add ecx,ebp + ; 40_59 58 + mov ebp,edi + xor ebx,DWORD [48+esp] + xor ebp,esi + xor ebx,DWORD [8+esp] + and ebp,edx + xor ebx,DWORD [28+esp] + rol ebx,1 + add ebp,eax + ror edx,2 + mov eax,ecx + rol eax,5 + mov DWORD [40+esp],ebx + lea ebx,[2400959708+ebp*1+ebx] + mov ebp,edi + add ebx,eax + and ebp,esi + mov eax,DWORD [44+esp] + add ebx,ebp + ; 40_59 59 + mov ebp,edx + xor eax,DWORD [52+esp] + xor ebp,edi + xor eax,DWORD [12+esp] + and ebp,ecx + xor eax,DWORD [32+esp] + rol eax,1 + add ebp,esi + ror ecx,2 + mov esi,ebx + rol esi,5 + mov DWORD [44+esp],eax + lea eax,[2400959708+ebp*1+eax] + mov ebp,edx + add eax,esi + and ebp,edi + mov esi,DWORD [48+esp] + add eax,ebp + ; 20_39 60 + mov ebp,ebx + xor esi,DWORD [56+esp] + xor ebp,ecx + xor esi,DWORD [16+esp] + xor ebp,edx + xor esi,DWORD [36+esp] + rol esi,1 + add edi,ebp + ror ebx,2 + mov ebp,eax + rol ebp,5 + mov DWORD [48+esp],esi + lea esi,[3395469782+edi*1+esi] + mov edi,DWORD [52+esp] + add esi,ebp + ; 20_39 61 + mov ebp,eax + xor edi,DWORD [60+esp] + xor ebp,ebx + xor edi,DWORD [20+esp] + xor ebp,ecx + xor edi,DWORD [40+esp] + rol edi,1 + add edx,ebp + ror eax,2 + mov ebp,esi + rol ebp,5 + mov DWORD [52+esp],edi + lea edi,[3395469782+edx*1+edi] + mov edx,DWORD [56+esp] + add edi,ebp + ; 20_39 62 + mov ebp,esi + xor edx,DWORD [esp] + xor ebp,eax + xor edx,DWORD [24+esp] + xor ebp,ebx + xor edx,DWORD [44+esp] + rol edx,1 + add ecx,ebp + ror esi,2 + mov ebp,edi + rol ebp,5 + mov DWORD [56+esp],edx + lea edx,[3395469782+ecx*1+edx] + mov ecx,DWORD [60+esp] + add edx,ebp + ; 20_39 63 + mov ebp,edi + xor ecx,DWORD [4+esp] + xor ebp,esi + xor ecx,DWORD [28+esp] + xor ebp,eax + xor ecx,DWORD [48+esp] + rol ecx,1 + add ebx,ebp + ror edi,2 + mov ebp,edx + rol ebp,5 + mov DWORD [60+esp],ecx + lea ecx,[3395469782+ebx*1+ecx] + mov ebx,DWORD [esp] + add ecx,ebp + ; 20_39 64 + mov ebp,edx + xor ebx,DWORD [8+esp] + xor ebp,edi + xor ebx,DWORD [32+esp] + xor ebp,esi + xor ebx,DWORD [52+esp] + rol ebx,1 + add eax,ebp + ror edx,2 + mov ebp,ecx + rol ebp,5 + mov DWORD [esp],ebx + lea ebx,[3395469782+eax*1+ebx] + mov eax,DWORD [4+esp] + add ebx,ebp + ; 20_39 65 + mov ebp,ecx + xor eax,DWORD [12+esp] + xor ebp,edx + xor eax,DWORD [36+esp] + xor ebp,edi + xor eax,DWORD [56+esp] + rol eax,1 + add esi,ebp + ror ecx,2 + mov ebp,ebx + rol ebp,5 + mov DWORD [4+esp],eax + lea eax,[3395469782+esi*1+eax] + mov esi,DWORD [8+esp] + add eax,ebp + ; 20_39 66 + mov ebp,ebx + xor esi,DWORD [16+esp] + xor ebp,ecx + xor esi,DWORD [40+esp] + xor ebp,edx + xor esi,DWORD [60+esp] + rol esi,1 + add edi,ebp + ror ebx,2 + mov ebp,eax + rol ebp,5 + mov DWORD [8+esp],esi + lea esi,[3395469782+edi*1+esi] + mov edi,DWORD [12+esp] + add esi,ebp + ; 20_39 67 + mov ebp,eax + xor edi,DWORD [20+esp] + xor ebp,ebx + xor edi,DWORD [44+esp] + xor ebp,ecx + xor edi,DWORD [esp] + rol edi,1 + add edx,ebp + ror eax,2 + mov ebp,esi + rol ebp,5 + mov DWORD [12+esp],edi + lea edi,[3395469782+edx*1+edi] + mov edx,DWORD [16+esp] + add edi,ebp + ; 20_39 68 + mov ebp,esi + xor edx,DWORD [24+esp] + xor ebp,eax + xor edx,DWORD [48+esp] + xor ebp,ebx + xor edx,DWORD [4+esp] + rol edx,1 + add ecx,ebp + ror esi,2 + mov ebp,edi + rol ebp,5 + mov DWORD [16+esp],edx + lea edx,[3395469782+ecx*1+edx] + mov ecx,DWORD [20+esp] + add edx,ebp + ; 20_39 69 + mov ebp,edi + xor ecx,DWORD [28+esp] + xor ebp,esi + xor ecx,DWORD [52+esp] + xor ebp,eax + xor ecx,DWORD [8+esp] + rol ecx,1 + add ebx,ebp + ror edi,2 + mov ebp,edx + rol ebp,5 + mov DWORD [20+esp],ecx + lea ecx,[3395469782+ebx*1+ecx] + mov ebx,DWORD [24+esp] + add ecx,ebp + ; 20_39 70 + mov ebp,edx + xor ebx,DWORD [32+esp] + xor ebp,edi + xor ebx,DWORD [56+esp] + xor ebp,esi + xor ebx,DWORD [12+esp] + rol ebx,1 + add eax,ebp + ror edx,2 + mov ebp,ecx + rol ebp,5 + mov DWORD [24+esp],ebx + lea ebx,[3395469782+eax*1+ebx] + mov eax,DWORD [28+esp] + add ebx,ebp + ; 20_39 71 + mov ebp,ecx + xor eax,DWORD [36+esp] + xor ebp,edx + xor eax,DWORD [60+esp] + xor ebp,edi + xor eax,DWORD [16+esp] + rol eax,1 + add esi,ebp + ror ecx,2 + mov ebp,ebx + rol ebp,5 + mov DWORD [28+esp],eax + lea eax,[3395469782+esi*1+eax] + mov esi,DWORD [32+esp] + add eax,ebp + ; 20_39 72 + mov ebp,ebx + xor esi,DWORD [40+esp] + xor ebp,ecx + xor esi,DWORD [esp] + xor ebp,edx + xor esi,DWORD [20+esp] + rol esi,1 + add edi,ebp + ror ebx,2 + mov ebp,eax + rol ebp,5 + mov DWORD [32+esp],esi + lea esi,[3395469782+edi*1+esi] + mov edi,DWORD [36+esp] + add esi,ebp + ; 20_39 73 + mov ebp,eax + xor edi,DWORD [44+esp] + xor ebp,ebx + xor edi,DWORD [4+esp] + xor ebp,ecx + xor edi,DWORD [24+esp] + rol edi,1 + add edx,ebp + ror eax,2 + mov ebp,esi + rol ebp,5 + mov DWORD [36+esp],edi + lea edi,[3395469782+edx*1+edi] + mov edx,DWORD [40+esp] + add edi,ebp + ; 20_39 74 + mov ebp,esi + xor edx,DWORD [48+esp] + xor ebp,eax + xor edx,DWORD [8+esp] + xor ebp,ebx + xor edx,DWORD [28+esp] + rol edx,1 + add ecx,ebp + ror esi,2 + mov ebp,edi + rol ebp,5 + mov DWORD [40+esp],edx + lea edx,[3395469782+ecx*1+edx] + mov ecx,DWORD [44+esp] + add edx,ebp + ; 20_39 75 + mov ebp,edi + xor ecx,DWORD [52+esp] + xor ebp,esi + xor ecx,DWORD [12+esp] + xor ebp,eax + xor ecx,DWORD [32+esp] + rol ecx,1 + add ebx,ebp + ror edi,2 + mov ebp,edx + rol ebp,5 + mov DWORD [44+esp],ecx + lea ecx,[3395469782+ebx*1+ecx] + mov ebx,DWORD [48+esp] + add ecx,ebp + ; 20_39 76 + mov ebp,edx + xor ebx,DWORD [56+esp] + xor ebp,edi + xor ebx,DWORD [16+esp] + xor ebp,esi + xor ebx,DWORD [36+esp] + rol ebx,1 + add eax,ebp + ror edx,2 + mov ebp,ecx + rol ebp,5 + mov DWORD [48+esp],ebx + lea ebx,[3395469782+eax*1+ebx] + mov eax,DWORD [52+esp] + add ebx,ebp + ; 20_39 77 + mov ebp,ecx + xor eax,DWORD [60+esp] + xor ebp,edx + xor eax,DWORD [20+esp] + xor ebp,edi + xor eax,DWORD [40+esp] + rol eax,1 + add esi,ebp + ror ecx,2 + mov ebp,ebx + rol ebp,5 + lea eax,[3395469782+esi*1+eax] + mov esi,DWORD [56+esp] + add eax,ebp + ; 20_39 78 + mov ebp,ebx + xor esi,DWORD [esp] + xor ebp,ecx + xor esi,DWORD [24+esp] + xor ebp,edx + xor esi,DWORD [44+esp] + rol esi,1 + add edi,ebp + ror ebx,2 + mov ebp,eax + rol ebp,5 + lea esi,[3395469782+edi*1+esi] + mov edi,DWORD [60+esp] + add esi,ebp + ; 20_39 79 + mov ebp,eax + xor edi,DWORD [4+esp] + xor ebp,ebx + xor edi,DWORD [28+esp] + xor ebp,ecx + xor edi,DWORD [48+esp] + rol edi,1 + add edx,ebp + ror eax,2 + mov ebp,esi + rol ebp,5 + lea edi,[3395469782+edx*1+edi] + add edi,ebp + mov ebp,DWORD [96+esp] + mov edx,DWORD [100+esp] + add edi,DWORD [ebp] + add esi,DWORD [4+ebp] + add eax,DWORD [8+ebp] + add ebx,DWORD [12+ebp] + add ecx,DWORD [16+ebp] + mov DWORD [ebp],edi + add edx,64 + mov DWORD [4+ebp],esi + cmp edx,DWORD [104+esp] + mov DWORD [8+ebp],eax + mov edi,ecx + mov DWORD [12+ebp],ebx + mov esi,edx + mov DWORD [16+ebp],ecx + jb NEAR L$002loop + add esp,76 + pop edi + pop esi + pop ebx + pop ebp + ret +align 16 +__sha1_block_data_order_shaext: + push ebp + push ebx + push esi + push edi + call L$003pic_point +L$003pic_point: + pop ebp + lea ebp,[(L$K_XX_XX-L$003pic_point)+ebp] +L$shaext_shortcut: + mov edi,DWORD [20+esp] + mov ebx,esp + mov esi,DWORD [24+esp] + mov ecx,DWORD [28+esp] + sub esp,32 + movdqu xmm0,[edi] + movd xmm1,DWORD [16+edi] + and esp,-32 + movdqa xmm3,[80+ebp] + movdqu xmm4,[esi] + pshufd xmm0,xmm0,27 + movdqu xmm5,[16+esi] + pshufd xmm1,xmm1,27 + movdqu xmm6,[32+esi] +db 102,15,56,0,227 + movdqu xmm7,[48+esi] +db 102,15,56,0,235 +db 102,15,56,0,243 +db 102,15,56,0,251 + jmp NEAR L$004loop_shaext +align 16 +L$004loop_shaext: + dec ecx + lea eax,[64+esi] + movdqa [esp],xmm1 + paddd xmm1,xmm4 + cmovne esi,eax + movdqa [16+esp],xmm0 +db 15,56,201,229 + movdqa xmm2,xmm0 +db 15,58,204,193,0 +db 15,56,200,213 + pxor xmm4,xmm6 +db 15,56,201,238 +db 15,56,202,231 + movdqa xmm1,xmm0 +db 15,58,204,194,0 +db 15,56,200,206 + pxor xmm5,xmm7 +db 15,56,202,236 +db 15,56,201,247 + movdqa xmm2,xmm0 +db 15,58,204,193,0 +db 15,56,200,215 + pxor xmm6,xmm4 +db 15,56,201,252 +db 15,56,202,245 + movdqa xmm1,xmm0 +db 15,58,204,194,0 +db 15,56,200,204 + pxor xmm7,xmm5 +db 15,56,202,254 +db 15,56,201,229 + movdqa xmm2,xmm0 +db 15,58,204,193,0 +db 15,56,200,213 + pxor xmm4,xmm6 +db 15,56,201,238 +db 15,56,202,231 + movdqa xmm1,xmm0 +db 15,58,204,194,1 +db 15,56,200,206 + pxor xmm5,xmm7 +db 15,56,202,236 +db 15,56,201,247 + movdqa xmm2,xmm0 +db 15,58,204,193,1 +db 15,56,200,215 + pxor xmm6,xmm4 +db 15,56,201,252 +db 15,56,202,245 + movdqa xmm1,xmm0 +db 15,58,204,194,1 +db 15,56,200,204 + pxor xmm7,xmm5 +db 15,56,202,254 +db 15,56,201,229 + movdqa xmm2,xmm0 +db 15,58,204,193,1 +db 15,56,200,213 + pxor xmm4,xmm6 +db 15,56,201,238 +db 15,56,202,231 + movdqa xmm1,xmm0 +db 15,58,204,194,1 +db 15,56,200,206 + pxor xmm5,xmm7 +db 15,56,202,236 +db 15,56,201,247 + movdqa xmm2,xmm0 +db 15,58,204,193,2 +db 15,56,200,215 + pxor xmm6,xmm4 +db 15,56,201,252 +db 15,56,202,245 + movdqa xmm1,xmm0 +db 15,58,204,194,2 +db 15,56,200,204 + pxor xmm7,xmm5 +db 15,56,202,254 +db 15,56,201,229 + movdqa xmm2,xmm0 +db 15,58,204,193,2 +db 15,56,200,213 + pxor xmm4,xmm6 +db 15,56,201,238 +db 15,56,202,231 + movdqa xmm1,xmm0 +db 15,58,204,194,2 +db 15,56,200,206 + pxor xmm5,xmm7 +db 15,56,202,236 +db 15,56,201,247 + movdqa xmm2,xmm0 +db 15,58,204,193,2 +db 15,56,200,215 + pxor xmm6,xmm4 +db 15,56,201,252 +db 15,56,202,245 + movdqa xmm1,xmm0 +db 15,58,204,194,3 +db 15,56,200,204 + pxor xmm7,xmm5 +db 15,56,202,254 + movdqu xmm4,[esi] + movdqa xmm2,xmm0 +db 15,58,204,193,3 +db 15,56,200,213 + movdqu xmm5,[16+esi] +db 102,15,56,0,227 + movdqa xmm1,xmm0 +db 15,58,204,194,3 +db 15,56,200,206 + movdqu xmm6,[32+esi] +db 102,15,56,0,235 + movdqa xmm2,xmm0 +db 15,58,204,193,3 +db 15,56,200,215 + movdqu xmm7,[48+esi] +db 102,15,56,0,243 + movdqa xmm1,xmm0 +db 15,58,204,194,3 + movdqa xmm2,[esp] +db 102,15,56,0,251 +db 15,56,200,202 + paddd xmm0,[16+esp] + jnz NEAR L$004loop_shaext + pshufd xmm0,xmm0,27 + pshufd xmm1,xmm1,27 + movdqu [edi],xmm0 + movd DWORD [16+edi],xmm1 + mov esp,ebx + pop edi + pop esi + pop ebx + pop ebp + ret +align 16 +__sha1_block_data_order_ssse3: + push ebp + push ebx + push esi + push edi + call L$005pic_point +L$005pic_point: + pop ebp + lea ebp,[(L$K_XX_XX-L$005pic_point)+ebp] +L$ssse3_shortcut: + movdqa xmm7,[ebp] + movdqa xmm0,[16+ebp] + movdqa xmm1,[32+ebp] + movdqa xmm2,[48+ebp] + movdqa xmm6,[64+ebp] + mov edi,DWORD [20+esp] + mov ebp,DWORD [24+esp] + mov edx,DWORD [28+esp] + mov esi,esp + sub esp,208 + and esp,-64 + movdqa [112+esp],xmm0 + movdqa [128+esp],xmm1 + movdqa [144+esp],xmm2 + shl edx,6 + movdqa [160+esp],xmm7 + add edx,ebp + movdqa [176+esp],xmm6 + add ebp,64 + mov DWORD [192+esp],edi + mov DWORD [196+esp],ebp + mov DWORD [200+esp],edx + mov DWORD [204+esp],esi + mov eax,DWORD [edi] + mov ebx,DWORD [4+edi] + mov ecx,DWORD [8+edi] + mov edx,DWORD [12+edi] + mov edi,DWORD [16+edi] + mov esi,ebx + movdqu xmm0,[ebp-64] + movdqu xmm1,[ebp-48] + movdqu xmm2,[ebp-32] + movdqu xmm3,[ebp-16] +db 102,15,56,0,198 +db 102,15,56,0,206 +db 102,15,56,0,214 + movdqa [96+esp],xmm7 +db 102,15,56,0,222 + paddd xmm0,xmm7 + paddd xmm1,xmm7 + paddd xmm2,xmm7 + movdqa [esp],xmm0 + psubd xmm0,xmm7 + movdqa [16+esp],xmm1 + psubd xmm1,xmm7 + movdqa [32+esp],xmm2 + mov ebp,ecx + psubd xmm2,xmm7 + xor ebp,edx + pshufd xmm4,xmm0,238 + and esi,ebp + jmp NEAR L$006loop +align 16 +L$006loop: + ror ebx,2 + xor esi,edx + mov ebp,eax + punpcklqdq xmm4,xmm1 + movdqa xmm6,xmm3 + add edi,DWORD [esp] + xor ebx,ecx + paddd xmm7,xmm3 + movdqa [64+esp],xmm0 + rol eax,5 + add edi,esi + psrldq xmm6,4 + and ebp,ebx + xor ebx,ecx + pxor xmm4,xmm0 + add edi,eax + ror eax,7 + pxor xmm6,xmm2 + xor ebp,ecx + mov esi,edi + add edx,DWORD [4+esp] + pxor xmm4,xmm6 + xor eax,ebx + rol edi,5 + movdqa [48+esp],xmm7 + add edx,ebp + and esi,eax + movdqa xmm0,xmm4 + xor eax,ebx + add edx,edi + ror edi,7 + movdqa xmm6,xmm4 + xor esi,ebx + pslldq xmm0,12 + paddd xmm4,xmm4 + mov ebp,edx + add ecx,DWORD [8+esp] + psrld xmm6,31 + xor edi,eax + rol edx,5 + movdqa xmm7,xmm0 + add ecx,esi + and ebp,edi + xor edi,eax + psrld xmm0,30 + add ecx,edx + ror edx,7 + por xmm4,xmm6 + xor ebp,eax + mov esi,ecx + add ebx,DWORD [12+esp] + pslld xmm7,2 + xor edx,edi + rol ecx,5 + pxor xmm4,xmm0 + movdqa xmm0,[96+esp] + add ebx,ebp + and esi,edx + pxor xmm4,xmm7 + pshufd xmm5,xmm1,238 + xor edx,edi + add ebx,ecx + ror ecx,7 + xor esi,edi + mov ebp,ebx + punpcklqdq xmm5,xmm2 + movdqa xmm7,xmm4 + add eax,DWORD [16+esp] + xor ecx,edx + paddd xmm0,xmm4 + movdqa [80+esp],xmm1 + rol ebx,5 + add eax,esi + psrldq xmm7,4 + and ebp,ecx + xor ecx,edx + pxor xmm5,xmm1 + add eax,ebx + ror ebx,7 + pxor xmm7,xmm3 + xor ebp,edx + mov esi,eax + add edi,DWORD [20+esp] + pxor xmm5,xmm7 + xor ebx,ecx + rol eax,5 + movdqa [esp],xmm0 + add edi,ebp + and esi,ebx + movdqa xmm1,xmm5 + xor ebx,ecx + add edi,eax + ror eax,7 + movdqa xmm7,xmm5 + xor esi,ecx + pslldq xmm1,12 + paddd xmm5,xmm5 + mov ebp,edi + add edx,DWORD [24+esp] + psrld xmm7,31 + xor eax,ebx + rol edi,5 + movdqa xmm0,xmm1 + add edx,esi + and ebp,eax + xor eax,ebx + psrld xmm1,30 + add edx,edi + ror edi,7 + por xmm5,xmm7 + xor ebp,ebx + mov esi,edx + add ecx,DWORD [28+esp] + pslld xmm0,2 + xor edi,eax + rol edx,5 + pxor xmm5,xmm1 + movdqa xmm1,[112+esp] + add ecx,ebp + and esi,edi + pxor xmm5,xmm0 + pshufd xmm6,xmm2,238 + xor edi,eax + add ecx,edx + ror edx,7 + xor esi,eax + mov ebp,ecx + punpcklqdq xmm6,xmm3 + movdqa xmm0,xmm5 + add ebx,DWORD [32+esp] + xor edx,edi + paddd xmm1,xmm5 + movdqa [96+esp],xmm2 + rol ecx,5 + add ebx,esi + psrldq xmm0,4 + and ebp,edx + xor edx,edi + pxor xmm6,xmm2 + add ebx,ecx + ror ecx,7 + pxor xmm0,xmm4 + xor ebp,edi + mov esi,ebx + add eax,DWORD [36+esp] + pxor xmm6,xmm0 + xor ecx,edx + rol ebx,5 + movdqa [16+esp],xmm1 + add eax,ebp + and esi,ecx + movdqa xmm2,xmm6 + xor ecx,edx + add eax,ebx + ror ebx,7 + movdqa xmm0,xmm6 + xor esi,edx + pslldq xmm2,12 + paddd xmm6,xmm6 + mov ebp,eax + add edi,DWORD [40+esp] + psrld xmm0,31 + xor ebx,ecx + rol eax,5 + movdqa xmm1,xmm2 + add edi,esi + and ebp,ebx + xor ebx,ecx + psrld xmm2,30 + add edi,eax + ror eax,7 + por xmm6,xmm0 + xor ebp,ecx + movdqa xmm0,[64+esp] + mov esi,edi + add edx,DWORD [44+esp] + pslld xmm1,2 + xor eax,ebx + rol edi,5 + pxor xmm6,xmm2 + movdqa xmm2,[112+esp] + add edx,ebp + and esi,eax + pxor xmm6,xmm1 + pshufd xmm7,xmm3,238 + xor eax,ebx + add edx,edi + ror edi,7 + xor esi,ebx + mov ebp,edx + punpcklqdq xmm7,xmm4 + movdqa xmm1,xmm6 + add ecx,DWORD [48+esp] + xor edi,eax + paddd xmm2,xmm6 + movdqa [64+esp],xmm3 + rol edx,5 + add ecx,esi + psrldq xmm1,4 + and ebp,edi + xor edi,eax + pxor xmm7,xmm3 + add ecx,edx + ror edx,7 + pxor xmm1,xmm5 + xor ebp,eax + mov esi,ecx + add ebx,DWORD [52+esp] + pxor xmm7,xmm1 + xor edx,edi + rol ecx,5 + movdqa [32+esp],xmm2 + add ebx,ebp + and esi,edx + movdqa xmm3,xmm7 + xor edx,edi + add ebx,ecx + ror ecx,7 + movdqa xmm1,xmm7 + xor esi,edi + pslldq xmm3,12 + paddd xmm7,xmm7 + mov ebp,ebx + add eax,DWORD [56+esp] + psrld xmm1,31 + xor ecx,edx + rol ebx,5 + movdqa xmm2,xmm3 + add eax,esi + and ebp,ecx + xor ecx,edx + psrld xmm3,30 + add eax,ebx + ror ebx,7 + por xmm7,xmm1 + xor ebp,edx + movdqa xmm1,[80+esp] + mov esi,eax + add edi,DWORD [60+esp] + pslld xmm2,2 + xor ebx,ecx + rol eax,5 + pxor xmm7,xmm3 + movdqa xmm3,[112+esp] + add edi,ebp + and esi,ebx + pxor xmm7,xmm2 + pshufd xmm2,xmm6,238 + xor ebx,ecx + add edi,eax + ror eax,7 + pxor xmm0,xmm4 + punpcklqdq xmm2,xmm7 + xor esi,ecx + mov ebp,edi + add edx,DWORD [esp] + pxor xmm0,xmm1 + movdqa [80+esp],xmm4 + xor eax,ebx + rol edi,5 + movdqa xmm4,xmm3 + add edx,esi + paddd xmm3,xmm7 + and ebp,eax + pxor xmm0,xmm2 + xor eax,ebx + add edx,edi + ror edi,7 + xor ebp,ebx + movdqa xmm2,xmm0 + movdqa [48+esp],xmm3 + mov esi,edx + add ecx,DWORD [4+esp] + xor edi,eax + rol edx,5 + pslld xmm0,2 + add ecx,ebp + and esi,edi + psrld xmm2,30 + xor edi,eax + add ecx,edx + ror edx,7 + xor esi,eax + mov ebp,ecx + add ebx,DWORD [8+esp] + xor edx,edi + rol ecx,5 + por xmm0,xmm2 + add ebx,esi + and ebp,edx + movdqa xmm2,[96+esp] + xor edx,edi + add ebx,ecx + add eax,DWORD [12+esp] + xor ebp,edi + mov esi,ebx + pshufd xmm3,xmm7,238 + rol ebx,5 + add eax,ebp + xor esi,edx + ror ecx,7 + add eax,ebx + add edi,DWORD [16+esp] + pxor xmm1,xmm5 + punpcklqdq xmm3,xmm0 + xor esi,ecx + mov ebp,eax + rol eax,5 + pxor xmm1,xmm2 + movdqa [96+esp],xmm5 + add edi,esi + xor ebp,ecx + movdqa xmm5,xmm4 + ror ebx,7 + paddd xmm4,xmm0 + add edi,eax + pxor xmm1,xmm3 + add edx,DWORD [20+esp] + xor ebp,ebx + mov esi,edi + rol edi,5 + movdqa xmm3,xmm1 + movdqa [esp],xmm4 + add edx,ebp + xor esi,ebx + ror eax,7 + add edx,edi + pslld xmm1,2 + add ecx,DWORD [24+esp] + xor esi,eax + psrld xmm3,30 + mov ebp,edx + rol edx,5 + add ecx,esi + xor ebp,eax + ror edi,7 + add ecx,edx + por xmm1,xmm3 + add ebx,DWORD [28+esp] + xor ebp,edi + movdqa xmm3,[64+esp] + mov esi,ecx + rol ecx,5 + add ebx,ebp + xor esi,edi + ror edx,7 + pshufd xmm4,xmm0,238 + add ebx,ecx + add eax,DWORD [32+esp] + pxor xmm2,xmm6 + punpcklqdq xmm4,xmm1 + xor esi,edx + mov ebp,ebx + rol ebx,5 + pxor xmm2,xmm3 + movdqa [64+esp],xmm6 + add eax,esi + xor ebp,edx + movdqa xmm6,[128+esp] + ror ecx,7 + paddd xmm5,xmm1 + add eax,ebx + pxor xmm2,xmm4 + add edi,DWORD [36+esp] + xor ebp,ecx + mov esi,eax + rol eax,5 + movdqa xmm4,xmm2 + movdqa [16+esp],xmm5 + add edi,ebp + xor esi,ecx + ror ebx,7 + add edi,eax + pslld xmm2,2 + add edx,DWORD [40+esp] + xor esi,ebx + psrld xmm4,30 + mov ebp,edi + rol edi,5 + add edx,esi + xor ebp,ebx + ror eax,7 + add edx,edi + por xmm2,xmm4 + add ecx,DWORD [44+esp] + xor ebp,eax + movdqa xmm4,[80+esp] + mov esi,edx + rol edx,5 + add ecx,ebp + xor esi,eax + ror edi,7 + pshufd xmm5,xmm1,238 + add ecx,edx + add ebx,DWORD [48+esp] + pxor xmm3,xmm7 + punpcklqdq xmm5,xmm2 + xor esi,edi + mov ebp,ecx + rol ecx,5 + pxor xmm3,xmm4 + movdqa [80+esp],xmm7 + add ebx,esi + xor ebp,edi + movdqa xmm7,xmm6 + ror edx,7 + paddd xmm6,xmm2 + add ebx,ecx + pxor xmm3,xmm5 + add eax,DWORD [52+esp] + xor ebp,edx + mov esi,ebx + rol ebx,5 + movdqa xmm5,xmm3 + movdqa [32+esp],xmm6 + add eax,ebp + xor esi,edx + ror ecx,7 + add eax,ebx + pslld xmm3,2 + add edi,DWORD [56+esp] + xor esi,ecx + psrld xmm5,30 + mov ebp,eax + rol eax,5 + add edi,esi + xor ebp,ecx + ror ebx,7 + add edi,eax + por xmm3,xmm5 + add edx,DWORD [60+esp] + xor ebp,ebx + movdqa xmm5,[96+esp] + mov esi,edi + rol edi,5 + add edx,ebp + xor esi,ebx + ror eax,7 + pshufd xmm6,xmm2,238 + add edx,edi + add ecx,DWORD [esp] + pxor xmm4,xmm0 + punpcklqdq xmm6,xmm3 + xor esi,eax + mov ebp,edx + rol edx,5 + pxor xmm4,xmm5 + movdqa [96+esp],xmm0 + add ecx,esi + xor ebp,eax + movdqa xmm0,xmm7 + ror edi,7 + paddd xmm7,xmm3 + add ecx,edx + pxor xmm4,xmm6 + add ebx,DWORD [4+esp] + xor ebp,edi + mov esi,ecx + rol ecx,5 + movdqa xmm6,xmm4 + movdqa [48+esp],xmm7 + add ebx,ebp + xor esi,edi + ror edx,7 + add ebx,ecx + pslld xmm4,2 + add eax,DWORD [8+esp] + xor esi,edx + psrld xmm6,30 + mov ebp,ebx + rol ebx,5 + add eax,esi + xor ebp,edx + ror ecx,7 + add eax,ebx + por xmm4,xmm6 + add edi,DWORD [12+esp] + xor ebp,ecx + movdqa xmm6,[64+esp] + mov esi,eax + rol eax,5 + add edi,ebp + xor esi,ecx + ror ebx,7 + pshufd xmm7,xmm3,238 + add edi,eax + add edx,DWORD [16+esp] + pxor xmm5,xmm1 + punpcklqdq xmm7,xmm4 + xor esi,ebx + mov ebp,edi + rol edi,5 + pxor xmm5,xmm6 + movdqa [64+esp],xmm1 + add edx,esi + xor ebp,ebx + movdqa xmm1,xmm0 + ror eax,7 + paddd xmm0,xmm4 + add edx,edi + pxor xmm5,xmm7 + add ecx,DWORD [20+esp] + xor ebp,eax + mov esi,edx + rol edx,5 + movdqa xmm7,xmm5 + movdqa [esp],xmm0 + add ecx,ebp + xor esi,eax + ror edi,7 + add ecx,edx + pslld xmm5,2 + add ebx,DWORD [24+esp] + xor esi,edi + psrld xmm7,30 + mov ebp,ecx + rol ecx,5 + add ebx,esi + xor ebp,edi + ror edx,7 + add ebx,ecx + por xmm5,xmm7 + add eax,DWORD [28+esp] + movdqa xmm7,[80+esp] + ror ecx,7 + mov esi,ebx + xor ebp,edx + rol ebx,5 + pshufd xmm0,xmm4,238 + add eax,ebp + xor esi,ecx + xor ecx,edx + add eax,ebx + add edi,DWORD [32+esp] + pxor xmm6,xmm2 + punpcklqdq xmm0,xmm5 + and esi,ecx + xor ecx,edx + ror ebx,7 + pxor xmm6,xmm7 + movdqa [80+esp],xmm2 + mov ebp,eax + xor esi,ecx + rol eax,5 + movdqa xmm2,xmm1 + add edi,esi + paddd xmm1,xmm5 + xor ebp,ebx + pxor xmm6,xmm0 + xor ebx,ecx + add edi,eax + add edx,DWORD [36+esp] + and ebp,ebx + movdqa xmm0,xmm6 + movdqa [16+esp],xmm1 + xor ebx,ecx + ror eax,7 + mov esi,edi + xor ebp,ebx + rol edi,5 + pslld xmm6,2 + add edx,ebp + xor esi,eax + psrld xmm0,30 + xor eax,ebx + add edx,edi + add ecx,DWORD [40+esp] + and esi,eax + xor eax,ebx + ror edi,7 + por xmm6,xmm0 + mov ebp,edx + xor esi,eax + movdqa xmm0,[96+esp] + rol edx,5 + add ecx,esi + xor ebp,edi + xor edi,eax + add ecx,edx + pshufd xmm1,xmm5,238 + add ebx,DWORD [44+esp] + and ebp,edi + xor edi,eax + ror edx,7 + mov esi,ecx + xor ebp,edi + rol ecx,5 + add ebx,ebp + xor esi,edx + xor edx,edi + add ebx,ecx + add eax,DWORD [48+esp] + pxor xmm7,xmm3 + punpcklqdq xmm1,xmm6 + and esi,edx + xor edx,edi + ror ecx,7 + pxor xmm7,xmm0 + movdqa [96+esp],xmm3 + mov ebp,ebx + xor esi,edx + rol ebx,5 + movdqa xmm3,[144+esp] + add eax,esi + paddd xmm2,xmm6 + xor ebp,ecx + pxor xmm7,xmm1 + xor ecx,edx + add eax,ebx + add edi,DWORD [52+esp] + and ebp,ecx + movdqa xmm1,xmm7 + movdqa [32+esp],xmm2 + xor ecx,edx + ror ebx,7 + mov esi,eax + xor ebp,ecx + rol eax,5 + pslld xmm7,2 + add edi,ebp + xor esi,ebx + psrld xmm1,30 + xor ebx,ecx + add edi,eax + add edx,DWORD [56+esp] + and esi,ebx + xor ebx,ecx + ror eax,7 + por xmm7,xmm1 + mov ebp,edi + xor esi,ebx + movdqa xmm1,[64+esp] + rol edi,5 + add edx,esi + xor ebp,eax + xor eax,ebx + add edx,edi + pshufd xmm2,xmm6,238 + add ecx,DWORD [60+esp] + and ebp,eax + xor eax,ebx + ror edi,7 + mov esi,edx + xor ebp,eax + rol edx,5 + add ecx,ebp + xor esi,edi + xor edi,eax + add ecx,edx + add ebx,DWORD [esp] + pxor xmm0,xmm4 + punpcklqdq xmm2,xmm7 + and esi,edi + xor edi,eax + ror edx,7 + pxor xmm0,xmm1 + movdqa [64+esp],xmm4 + mov ebp,ecx + xor esi,edi + rol ecx,5 + movdqa xmm4,xmm3 + add ebx,esi + paddd xmm3,xmm7 + xor ebp,edx + pxor xmm0,xmm2 + xor edx,edi + add ebx,ecx + add eax,DWORD [4+esp] + and ebp,edx + movdqa xmm2,xmm0 + movdqa [48+esp],xmm3 + xor edx,edi + ror ecx,7 + mov esi,ebx + xor ebp,edx + rol ebx,5 + pslld xmm0,2 + add eax,ebp + xor esi,ecx + psrld xmm2,30 + xor ecx,edx + add eax,ebx + add edi,DWORD [8+esp] + and esi,ecx + xor ecx,edx + ror ebx,7 + por xmm0,xmm2 + mov ebp,eax + xor esi,ecx + movdqa xmm2,[80+esp] + rol eax,5 + add edi,esi + xor ebp,ebx + xor ebx,ecx + add edi,eax + pshufd xmm3,xmm7,238 + add edx,DWORD [12+esp] + and ebp,ebx + xor ebx,ecx + ror eax,7 + mov esi,edi + xor ebp,ebx + rol edi,5 + add edx,ebp + xor esi,eax + xor eax,ebx + add edx,edi + add ecx,DWORD [16+esp] + pxor xmm1,xmm5 + punpcklqdq xmm3,xmm0 + and esi,eax + xor eax,ebx + ror edi,7 + pxor xmm1,xmm2 + movdqa [80+esp],xmm5 + mov ebp,edx + xor esi,eax + rol edx,5 + movdqa xmm5,xmm4 + add ecx,esi + paddd xmm4,xmm0 + xor ebp,edi + pxor xmm1,xmm3 + xor edi,eax + add ecx,edx + add ebx,DWORD [20+esp] + and ebp,edi + movdqa xmm3,xmm1 + movdqa [esp],xmm4 + xor edi,eax + ror edx,7 + mov esi,ecx + xor ebp,edi + rol ecx,5 + pslld xmm1,2 + add ebx,ebp + xor esi,edx + psrld xmm3,30 + xor edx,edi + add ebx,ecx + add eax,DWORD [24+esp] + and esi,edx + xor edx,edi + ror ecx,7 + por xmm1,xmm3 + mov ebp,ebx + xor esi,edx + movdqa xmm3,[96+esp] + rol ebx,5 + add eax,esi + xor ebp,ecx + xor ecx,edx + add eax,ebx + pshufd xmm4,xmm0,238 + add edi,DWORD [28+esp] + and ebp,ecx + xor ecx,edx + ror ebx,7 + mov esi,eax + xor ebp,ecx + rol eax,5 + add edi,ebp + xor esi,ebx + xor ebx,ecx + add edi,eax + add edx,DWORD [32+esp] + pxor xmm2,xmm6 + punpcklqdq xmm4,xmm1 + and esi,ebx + xor ebx,ecx + ror eax,7 + pxor xmm2,xmm3 + movdqa [96+esp],xmm6 + mov ebp,edi + xor esi,ebx + rol edi,5 + movdqa xmm6,xmm5 + add edx,esi + paddd xmm5,xmm1 + xor ebp,eax + pxor xmm2,xmm4 + xor eax,ebx + add edx,edi + add ecx,DWORD [36+esp] + and ebp,eax + movdqa xmm4,xmm2 + movdqa [16+esp],xmm5 + xor eax,ebx + ror edi,7 + mov esi,edx + xor ebp,eax + rol edx,5 + pslld xmm2,2 + add ecx,ebp + xor esi,edi + psrld xmm4,30 + xor edi,eax + add ecx,edx + add ebx,DWORD [40+esp] + and esi,edi + xor edi,eax + ror edx,7 + por xmm2,xmm4 + mov ebp,ecx + xor esi,edi + movdqa xmm4,[64+esp] + rol ecx,5 + add ebx,esi + xor ebp,edx + xor edx,edi + add ebx,ecx + pshufd xmm5,xmm1,238 + add eax,DWORD [44+esp] + and ebp,edx + xor edx,edi + ror ecx,7 + mov esi,ebx + xor ebp,edx + rol ebx,5 + add eax,ebp + xor esi,edx + add eax,ebx + add edi,DWORD [48+esp] + pxor xmm3,xmm7 + punpcklqdq xmm5,xmm2 + xor esi,ecx + mov ebp,eax + rol eax,5 + pxor xmm3,xmm4 + movdqa [64+esp],xmm7 + add edi,esi + xor ebp,ecx + movdqa xmm7,xmm6 + ror ebx,7 + paddd xmm6,xmm2 + add edi,eax + pxor xmm3,xmm5 + add edx,DWORD [52+esp] + xor ebp,ebx + mov esi,edi + rol edi,5 + movdqa xmm5,xmm3 + movdqa [32+esp],xmm6 + add edx,ebp + xor esi,ebx + ror eax,7 + add edx,edi + pslld xmm3,2 + add ecx,DWORD [56+esp] + xor esi,eax + psrld xmm5,30 + mov ebp,edx + rol edx,5 + add ecx,esi + xor ebp,eax + ror edi,7 + add ecx,edx + por xmm3,xmm5 + add ebx,DWORD [60+esp] + xor ebp,edi + mov esi,ecx + rol ecx,5 + add ebx,ebp + xor esi,edi + ror edx,7 + add ebx,ecx + add eax,DWORD [esp] + xor esi,edx + mov ebp,ebx + rol ebx,5 + add eax,esi + xor ebp,edx + ror ecx,7 + paddd xmm7,xmm3 + add eax,ebx + add edi,DWORD [4+esp] + xor ebp,ecx + mov esi,eax + movdqa [48+esp],xmm7 + rol eax,5 + add edi,ebp + xor esi,ecx + ror ebx,7 + add edi,eax + add edx,DWORD [8+esp] + xor esi,ebx + mov ebp,edi + rol edi,5 + add edx,esi + xor ebp,ebx + ror eax,7 + add edx,edi + add ecx,DWORD [12+esp] + xor ebp,eax + mov esi,edx + rol edx,5 + add ecx,ebp + xor esi,eax + ror edi,7 + add ecx,edx + mov ebp,DWORD [196+esp] + cmp ebp,DWORD [200+esp] + je NEAR L$007done + movdqa xmm7,[160+esp] + movdqa xmm6,[176+esp] + movdqu xmm0,[ebp] + movdqu xmm1,[16+ebp] + movdqu xmm2,[32+ebp] + movdqu xmm3,[48+ebp] + add ebp,64 +db 102,15,56,0,198 + mov DWORD [196+esp],ebp + movdqa [96+esp],xmm7 + add ebx,DWORD [16+esp] + xor esi,edi + mov ebp,ecx + rol ecx,5 + add ebx,esi + xor ebp,edi + ror edx,7 +db 102,15,56,0,206 + add ebx,ecx + add eax,DWORD [20+esp] + xor ebp,edx + mov esi,ebx + paddd xmm0,xmm7 + rol ebx,5 + add eax,ebp + xor esi,edx + ror ecx,7 + movdqa [esp],xmm0 + add eax,ebx + add edi,DWORD [24+esp] + xor esi,ecx + mov ebp,eax + psubd xmm0,xmm7 + rol eax,5 + add edi,esi + xor ebp,ecx + ror ebx,7 + add edi,eax + add edx,DWORD [28+esp] + xor ebp,ebx + mov esi,edi + rol edi,5 + add edx,ebp + xor esi,ebx + ror eax,7 + add edx,edi + add ecx,DWORD [32+esp] + xor esi,eax + mov ebp,edx + rol edx,5 + add ecx,esi + xor ebp,eax + ror edi,7 +db 102,15,56,0,214 + add ecx,edx + add ebx,DWORD [36+esp] + xor ebp,edi + mov esi,ecx + paddd xmm1,xmm7 + rol ecx,5 + add ebx,ebp + xor esi,edi + ror edx,7 + movdqa [16+esp],xmm1 + add ebx,ecx + add eax,DWORD [40+esp] + xor esi,edx + mov ebp,ebx + psubd xmm1,xmm7 + rol ebx,5 + add eax,esi + xor ebp,edx + ror ecx,7 + add eax,ebx + add edi,DWORD [44+esp] + xor ebp,ecx + mov esi,eax + rol eax,5 + add edi,ebp + xor esi,ecx + ror ebx,7 + add edi,eax + add edx,DWORD [48+esp] + xor esi,ebx + mov ebp,edi + rol edi,5 + add edx,esi + xor ebp,ebx + ror eax,7 +db 102,15,56,0,222 + add edx,edi + add ecx,DWORD [52+esp] + xor ebp,eax + mov esi,edx + paddd xmm2,xmm7 + rol edx,5 + add ecx,ebp + xor esi,eax + ror edi,7 + movdqa [32+esp],xmm2 + add ecx,edx + add ebx,DWORD [56+esp] + xor esi,edi + mov ebp,ecx + psubd xmm2,xmm7 + rol ecx,5 + add ebx,esi + xor ebp,edi + ror edx,7 + add ebx,ecx + add eax,DWORD [60+esp] + xor ebp,edx + mov esi,ebx + rol ebx,5 + add eax,ebp + ror ecx,7 + add eax,ebx + mov ebp,DWORD [192+esp] + add eax,DWORD [ebp] + add esi,DWORD [4+ebp] + add ecx,DWORD [8+ebp] + mov DWORD [ebp],eax + add edx,DWORD [12+ebp] + mov DWORD [4+ebp],esi + add edi,DWORD [16+ebp] + mov DWORD [8+ebp],ecx + mov ebx,ecx + mov DWORD [12+ebp],edx + xor ebx,edx + mov DWORD [16+ebp],edi + mov ebp,esi + pshufd xmm4,xmm0,238 + and esi,ebx + mov ebx,ebp + jmp NEAR L$006loop +align 16 +L$007done: + add ebx,DWORD [16+esp] + xor esi,edi + mov ebp,ecx + rol ecx,5 + add ebx,esi + xor ebp,edi + ror edx,7 + add ebx,ecx + add eax,DWORD [20+esp] + xor ebp,edx + mov esi,ebx + rol ebx,5 + add eax,ebp + xor esi,edx + ror ecx,7 + add eax,ebx + add edi,DWORD [24+esp] + xor esi,ecx + mov ebp,eax + rol eax,5 + add edi,esi + xor ebp,ecx + ror ebx,7 + add edi,eax + add edx,DWORD [28+esp] + xor ebp,ebx + mov esi,edi + rol edi,5 + add edx,ebp + xor esi,ebx + ror eax,7 + add edx,edi + add ecx,DWORD [32+esp] + xor esi,eax + mov ebp,edx + rol edx,5 + add ecx,esi + xor ebp,eax + ror edi,7 + add ecx,edx + add ebx,DWORD [36+esp] + xor ebp,edi + mov esi,ecx + rol ecx,5 + add ebx,ebp + xor esi,edi + ror edx,7 + add ebx,ecx + add eax,DWORD [40+esp] + xor esi,edx + mov ebp,ebx + rol ebx,5 + add eax,esi + xor ebp,edx + ror ecx,7 + add eax,ebx + add edi,DWORD [44+esp] + xor ebp,ecx + mov esi,eax + rol eax,5 + add edi,ebp + xor esi,ecx + ror ebx,7 + add edi,eax + add edx,DWORD [48+esp] + xor esi,ebx + mov ebp,edi + rol edi,5 + add edx,esi + xor ebp,ebx + ror eax,7 + add edx,edi + add ecx,DWORD [52+esp] + xor ebp,eax + mov esi,edx + rol edx,5 + add ecx,ebp + xor esi,eax + ror edi,7 + add ecx,edx + add ebx,DWORD [56+esp] + xor esi,edi + mov ebp,ecx + rol ecx,5 + add ebx,esi + xor ebp,edi + ror edx,7 + add ebx,ecx + add eax,DWORD [60+esp] + xor ebp,edx + mov esi,ebx + rol ebx,5 + add eax,ebp + ror ecx,7 + add eax,ebx + mov ebp,DWORD [192+esp] + add eax,DWORD [ebp] + mov esp,DWORD [204+esp] + add esi,DWORD [4+ebp] + add ecx,DWORD [8+ebp] + mov DWORD [ebp],eax + add edx,DWORD [12+ebp] + mov DWORD [4+ebp],esi + add edi,DWORD [16+ebp] + mov DWORD [8+ebp],ecx + mov DWORD [12+ebp],edx + mov DWORD [16+ebp],edi + pop edi + pop esi + pop ebx + pop ebp + ret +align 64 +L$K_XX_XX: +dd 1518500249,1518500249,1518500249,1518500249 +dd 1859775393,1859775393,1859775393,1859775393 +dd 2400959708,2400959708,2400959708,2400959708 +dd 3395469782,3395469782,3395469782,3395469782 +dd 66051,67438087,134810123,202182159 +db 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +db 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115 +db 102,111,114,109,32,102,111,114,32,120,56,54,44,32,67,82 +db 89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112 +db 114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +segment .bss +common _OPENSSL_ia32cap_P 16 diff --git a/third_party/boringssl/win-x86/crypto/sha/sha256-586.asm b/third_party/boringssl/win-x86/crypto/sha/sha256-586.asm new file mode 100644 index 00000000000..fe36bc5c9ad --- /dev/null +++ b/third_party/boringssl/win-x86/crypto/sha/sha256-586.asm @@ -0,0 +1,4591 @@ +%ifidn __OUTPUT_FORMAT__,obj +section code use32 class=code align=64 +%elifidn __OUTPUT_FORMAT__,win32 +%ifdef __YASM_VERSION_ID__ +%if __YASM_VERSION_ID__ < 01010000h +%error yasm version 1.1.0 or later needed. +%endif +; Yasm automatically includes .00 and complains about redefining it. +; https://www.tortall.net/projects/yasm/manual/html/objfmt-win32-safeseh.html +%else +$@feat.00 equ 1 +%endif +section .text code align=64 +%else +section .text code +%endif +;extern _OPENSSL_ia32cap_P +global _sha256_block_data_order +align 16 +_sha256_block_data_order: +L$_sha256_block_data_order_begin: + push ebp + push ebx + push esi + push edi + mov esi,DWORD [20+esp] + mov edi,DWORD [24+esp] + mov eax,DWORD [28+esp] + mov ebx,esp + call L$000pic_point +L$000pic_point: + pop ebp + lea ebp,[(L$001K256-L$000pic_point)+ebp] + sub esp,16 + and esp,-64 + shl eax,6 + add eax,edi + mov DWORD [esp],esi + mov DWORD [4+esp],edi + mov DWORD [8+esp],eax + mov DWORD [12+esp],ebx + lea edx,[_OPENSSL_ia32cap_P] + mov ecx,DWORD [edx] + mov ebx,DWORD [4+edx] + test ecx,1048576 + jnz NEAR L$002loop + mov edx,DWORD [8+edx] + test ecx,16777216 + jz NEAR L$003no_xmm + and ecx,1073741824 + and ebx,268435968 + test edx,536870912 + jnz NEAR L$004shaext + or ecx,ebx + and ecx,1342177280 + cmp ecx,1342177280 + test ebx,512 + jnz NEAR L$005SSSE3 +L$003no_xmm: + sub eax,edi + cmp eax,256 + jae NEAR L$006unrolled + jmp NEAR L$002loop +align 16 +L$002loop: + mov eax,DWORD [edi] + mov ebx,DWORD [4+edi] + mov ecx,DWORD [8+edi] + bswap eax + mov edx,DWORD [12+edi] + bswap ebx + push eax + bswap ecx + push ebx + bswap edx + push ecx + push edx + mov eax,DWORD [16+edi] + mov ebx,DWORD [20+edi] + mov ecx,DWORD [24+edi] + bswap eax + mov edx,DWORD [28+edi] + bswap ebx + push eax + bswap ecx + push ebx + bswap edx + push ecx + push edx + mov eax,DWORD [32+edi] + mov ebx,DWORD [36+edi] + mov ecx,DWORD [40+edi] + bswap eax + mov edx,DWORD [44+edi] + bswap ebx + push eax + bswap ecx + push ebx + bswap edx + push ecx + push edx + mov eax,DWORD [48+edi] + mov ebx,DWORD [52+edi] + mov ecx,DWORD [56+edi] + bswap eax + mov edx,DWORD [60+edi] + bswap ebx + push eax + bswap ecx + push ebx + bswap edx + push ecx + push edx + add edi,64 + lea esp,[esp-36] + mov DWORD [104+esp],edi + mov eax,DWORD [esi] + mov ebx,DWORD [4+esi] + mov ecx,DWORD [8+esi] + mov edi,DWORD [12+esi] + mov DWORD [8+esp],ebx + xor ebx,ecx + mov DWORD [12+esp],ecx + mov DWORD [16+esp],edi + mov DWORD [esp],ebx + mov edx,DWORD [16+esi] + mov ebx,DWORD [20+esi] + mov ecx,DWORD [24+esi] + mov edi,DWORD [28+esi] + mov DWORD [24+esp],ebx + mov DWORD [28+esp],ecx + mov DWORD [32+esp],edi +align 16 +L$00700_15: + mov ecx,edx + mov esi,DWORD [24+esp] + ror ecx,14 + mov edi,DWORD [28+esp] + xor ecx,edx + xor esi,edi + mov ebx,DWORD [96+esp] + ror ecx,5 + and esi,edx + mov DWORD [20+esp],edx + xor edx,ecx + add ebx,DWORD [32+esp] + xor esi,edi + ror edx,6 + mov ecx,eax + add ebx,esi + ror ecx,9 + add ebx,edx + mov edi,DWORD [8+esp] + xor ecx,eax + mov DWORD [4+esp],eax + lea esp,[esp-4] + ror ecx,11 + mov esi,DWORD [ebp] + xor ecx,eax + mov edx,DWORD [20+esp] + xor eax,edi + ror ecx,2 + add ebx,esi + mov DWORD [esp],eax + add edx,ebx + and eax,DWORD [4+esp] + add ebx,ecx + xor eax,edi + add ebp,4 + add eax,ebx + cmp esi,3248222580 + jne NEAR L$00700_15 + mov ecx,DWORD [156+esp] + jmp NEAR L$00816_63 +align 16 +L$00816_63: + mov ebx,ecx + mov esi,DWORD [104+esp] + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD [160+esp] + shr edi,10 + add ebx,DWORD [124+esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD [24+esp] + ror ecx,14 + add ebx,edi + mov edi,DWORD [28+esp] + xor ecx,edx + xor esi,edi + mov DWORD [96+esp],ebx + ror ecx,5 + and esi,edx + mov DWORD [20+esp],edx + xor edx,ecx + add ebx,DWORD [32+esp] + xor esi,edi + ror edx,6 + mov ecx,eax + add ebx,esi + ror ecx,9 + add ebx,edx + mov edi,DWORD [8+esp] + xor ecx,eax + mov DWORD [4+esp],eax + lea esp,[esp-4] + ror ecx,11 + mov esi,DWORD [ebp] + xor ecx,eax + mov edx,DWORD [20+esp] + xor eax,edi + ror ecx,2 + add ebx,esi + mov DWORD [esp],eax + add edx,ebx + and eax,DWORD [4+esp] + add ebx,ecx + xor eax,edi + mov ecx,DWORD [156+esp] + add ebp,4 + add eax,ebx + cmp esi,3329325298 + jne NEAR L$00816_63 + mov esi,DWORD [356+esp] + mov ebx,DWORD [8+esp] + mov ecx,DWORD [16+esp] + add eax,DWORD [esi] + add ebx,DWORD [4+esi] + add edi,DWORD [8+esi] + add ecx,DWORD [12+esi] + mov DWORD [esi],eax + mov DWORD [4+esi],ebx + mov DWORD [8+esi],edi + mov DWORD [12+esi],ecx + mov eax,DWORD [24+esp] + mov ebx,DWORD [28+esp] + mov ecx,DWORD [32+esp] + mov edi,DWORD [360+esp] + add edx,DWORD [16+esi] + add eax,DWORD [20+esi] + add ebx,DWORD [24+esi] + add ecx,DWORD [28+esi] + mov DWORD [16+esi],edx + mov DWORD [20+esi],eax + mov DWORD [24+esi],ebx + mov DWORD [28+esi],ecx + lea esp,[356+esp] + sub ebp,256 + cmp edi,DWORD [8+esp] + jb NEAR L$002loop + mov esp,DWORD [12+esp] + pop edi + pop esi + pop ebx + pop ebp + ret +align 64 +L$001K256: +dd 1116352408,1899447441,3049323471,3921009573,961987163,1508970993,2453635748,2870763221,3624381080,310598401,607225278,1426881987,1925078388,2162078206,2614888103,3248222580,3835390401,4022224774,264347078,604807628,770255983,1249150122,1555081692,1996064986,2554220882,2821834349,2952996808,3210313671,3336571891,3584528711,113926993,338241895,666307205,773529912,1294757372,1396182291,1695183700,1986661051,2177026350,2456956037,2730485921,2820302411,3259730800,3345764771,3516065817,3600352804,4094571909,275423344,430227734,506948616,659060556,883997877,958139571,1322822218,1537002063,1747873779,1955562222,2024104815,2227730452,2361852424,2428436474,2756734187,3204031479,3329325298 +dd 66051,67438087,134810123,202182159 +db 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97 +db 110,115,102,111,114,109,32,102,111,114,32,120,56,54,44,32 +db 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 +db 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 +db 62,0 +align 16 +L$006unrolled: + lea esp,[esp-96] + mov eax,DWORD [esi] + mov ebp,DWORD [4+esi] + mov ecx,DWORD [8+esi] + mov ebx,DWORD [12+esi] + mov DWORD [4+esp],ebp + xor ebp,ecx + mov DWORD [8+esp],ecx + mov DWORD [12+esp],ebx + mov edx,DWORD [16+esi] + mov ebx,DWORD [20+esi] + mov ecx,DWORD [24+esi] + mov esi,DWORD [28+esi] + mov DWORD [20+esp],ebx + mov DWORD [24+esp],ecx + mov DWORD [28+esp],esi + jmp NEAR L$009grand_loop +align 16 +L$009grand_loop: + mov ebx,DWORD [edi] + mov ecx,DWORD [4+edi] + bswap ebx + mov esi,DWORD [8+edi] + bswap ecx + mov DWORD [32+esp],ebx + bswap esi + mov DWORD [36+esp],ecx + mov DWORD [40+esp],esi + mov ebx,DWORD [12+edi] + mov ecx,DWORD [16+edi] + bswap ebx + mov esi,DWORD [20+edi] + bswap ecx + mov DWORD [44+esp],ebx + bswap esi + mov DWORD [48+esp],ecx + mov DWORD [52+esp],esi + mov ebx,DWORD [24+edi] + mov ecx,DWORD [28+edi] + bswap ebx + mov esi,DWORD [32+edi] + bswap ecx + mov DWORD [56+esp],ebx + bswap esi + mov DWORD [60+esp],ecx + mov DWORD [64+esp],esi + mov ebx,DWORD [36+edi] + mov ecx,DWORD [40+edi] + bswap ebx + mov esi,DWORD [44+edi] + bswap ecx + mov DWORD [68+esp],ebx + bswap esi + mov DWORD [72+esp],ecx + mov DWORD [76+esp],esi + mov ebx,DWORD [48+edi] + mov ecx,DWORD [52+edi] + bswap ebx + mov esi,DWORD [56+edi] + bswap ecx + mov DWORD [80+esp],ebx + bswap esi + mov DWORD [84+esp],ecx + mov DWORD [88+esp],esi + mov ebx,DWORD [60+edi] + add edi,64 + bswap ebx + mov DWORD [100+esp],edi + mov DWORD [92+esp],ebx + mov ecx,edx + mov esi,DWORD [20+esp] + ror edx,14 + mov edi,DWORD [24+esp] + xor edx,ecx + mov ebx,DWORD [32+esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [16+esp],ecx + xor edx,ecx + add ebx,DWORD [28+esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD [4+esp] + xor ecx,eax + mov DWORD [esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,[1116352408+edx*1+ebx] + xor ecx,esi + xor ebp,edi + ror ecx,2 + add ebp,edx + add edx,DWORD [12+esp] + add ebp,ecx + mov esi,edx + mov ecx,DWORD [16+esp] + ror edx,14 + mov edi,DWORD [20+esp] + xor edx,esi + mov ebx,DWORD [36+esp] + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD [12+esp],esi + xor edx,esi + add ebx,DWORD [24+esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD [esp] + xor esi,ebp + mov DWORD [28+esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,[1899447441+edx*1+ebx] + xor esi,ecx + xor eax,edi + ror esi,2 + add eax,edx + add edx,DWORD [8+esp] + add eax,esi + mov ecx,edx + mov esi,DWORD [12+esp] + ror edx,14 + mov edi,DWORD [16+esp] + xor edx,ecx + mov ebx,DWORD [40+esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [8+esp],ecx + xor edx,ecx + add ebx,DWORD [20+esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD [28+esp] + xor ecx,eax + mov DWORD [24+esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,[3049323471+edx*1+ebx] + xor ecx,esi + xor ebp,edi + ror ecx,2 + add ebp,edx + add edx,DWORD [4+esp] + add ebp,ecx + mov esi,edx + mov ecx,DWORD [8+esp] + ror edx,14 + mov edi,DWORD [12+esp] + xor edx,esi + mov ebx,DWORD [44+esp] + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD [4+esp],esi + xor edx,esi + add ebx,DWORD [16+esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD [24+esp] + xor esi,ebp + mov DWORD [20+esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,[3921009573+edx*1+ebx] + xor esi,ecx + xor eax,edi + ror esi,2 + add eax,edx + add edx,DWORD [esp] + add eax,esi + mov ecx,edx + mov esi,DWORD [4+esp] + ror edx,14 + mov edi,DWORD [8+esp] + xor edx,ecx + mov ebx,DWORD [48+esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [esp],ecx + xor edx,ecx + add ebx,DWORD [12+esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD [20+esp] + xor ecx,eax + mov DWORD [16+esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,[961987163+edx*1+ebx] + xor ecx,esi + xor ebp,edi + ror ecx,2 + add ebp,edx + add edx,DWORD [28+esp] + add ebp,ecx + mov esi,edx + mov ecx,DWORD [esp] + ror edx,14 + mov edi,DWORD [4+esp] + xor edx,esi + mov ebx,DWORD [52+esp] + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD [28+esp],esi + xor edx,esi + add ebx,DWORD [8+esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD [16+esp] + xor esi,ebp + mov DWORD [12+esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,[1508970993+edx*1+ebx] + xor esi,ecx + xor eax,edi + ror esi,2 + add eax,edx + add edx,DWORD [24+esp] + add eax,esi + mov ecx,edx + mov esi,DWORD [28+esp] + ror edx,14 + mov edi,DWORD [esp] + xor edx,ecx + mov ebx,DWORD [56+esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [24+esp],ecx + xor edx,ecx + add ebx,DWORD [4+esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD [12+esp] + xor ecx,eax + mov DWORD [8+esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,[2453635748+edx*1+ebx] + xor ecx,esi + xor ebp,edi + ror ecx,2 + add ebp,edx + add edx,DWORD [20+esp] + add ebp,ecx + mov esi,edx + mov ecx,DWORD [24+esp] + ror edx,14 + mov edi,DWORD [28+esp] + xor edx,esi + mov ebx,DWORD [60+esp] + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD [20+esp],esi + xor edx,esi + add ebx,DWORD [esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD [8+esp] + xor esi,ebp + mov DWORD [4+esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,[2870763221+edx*1+ebx] + xor esi,ecx + xor eax,edi + ror esi,2 + add eax,edx + add edx,DWORD [16+esp] + add eax,esi + mov ecx,edx + mov esi,DWORD [20+esp] + ror edx,14 + mov edi,DWORD [24+esp] + xor edx,ecx + mov ebx,DWORD [64+esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [16+esp],ecx + xor edx,ecx + add ebx,DWORD [28+esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD [4+esp] + xor ecx,eax + mov DWORD [esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,[3624381080+edx*1+ebx] + xor ecx,esi + xor ebp,edi + ror ecx,2 + add ebp,edx + add edx,DWORD [12+esp] + add ebp,ecx + mov esi,edx + mov ecx,DWORD [16+esp] + ror edx,14 + mov edi,DWORD [20+esp] + xor edx,esi + mov ebx,DWORD [68+esp] + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD [12+esp],esi + xor edx,esi + add ebx,DWORD [24+esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD [esp] + xor esi,ebp + mov DWORD [28+esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,[310598401+edx*1+ebx] + xor esi,ecx + xor eax,edi + ror esi,2 + add eax,edx + add edx,DWORD [8+esp] + add eax,esi + mov ecx,edx + mov esi,DWORD [12+esp] + ror edx,14 + mov edi,DWORD [16+esp] + xor edx,ecx + mov ebx,DWORD [72+esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [8+esp],ecx + xor edx,ecx + add ebx,DWORD [20+esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD [28+esp] + xor ecx,eax + mov DWORD [24+esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,[607225278+edx*1+ebx] + xor ecx,esi + xor ebp,edi + ror ecx,2 + add ebp,edx + add edx,DWORD [4+esp] + add ebp,ecx + mov esi,edx + mov ecx,DWORD [8+esp] + ror edx,14 + mov edi,DWORD [12+esp] + xor edx,esi + mov ebx,DWORD [76+esp] + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD [4+esp],esi + xor edx,esi + add ebx,DWORD [16+esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD [24+esp] + xor esi,ebp + mov DWORD [20+esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,[1426881987+edx*1+ebx] + xor esi,ecx + xor eax,edi + ror esi,2 + add eax,edx + add edx,DWORD [esp] + add eax,esi + mov ecx,edx + mov esi,DWORD [4+esp] + ror edx,14 + mov edi,DWORD [8+esp] + xor edx,ecx + mov ebx,DWORD [80+esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [esp],ecx + xor edx,ecx + add ebx,DWORD [12+esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD [20+esp] + xor ecx,eax + mov DWORD [16+esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,[1925078388+edx*1+ebx] + xor ecx,esi + xor ebp,edi + ror ecx,2 + add ebp,edx + add edx,DWORD [28+esp] + add ebp,ecx + mov esi,edx + mov ecx,DWORD [esp] + ror edx,14 + mov edi,DWORD [4+esp] + xor edx,esi + mov ebx,DWORD [84+esp] + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD [28+esp],esi + xor edx,esi + add ebx,DWORD [8+esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD [16+esp] + xor esi,ebp + mov DWORD [12+esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,[2162078206+edx*1+ebx] + xor esi,ecx + xor eax,edi + ror esi,2 + add eax,edx + add edx,DWORD [24+esp] + add eax,esi + mov ecx,edx + mov esi,DWORD [28+esp] + ror edx,14 + mov edi,DWORD [esp] + xor edx,ecx + mov ebx,DWORD [88+esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [24+esp],ecx + xor edx,ecx + add ebx,DWORD [4+esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD [12+esp] + xor ecx,eax + mov DWORD [8+esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,[2614888103+edx*1+ebx] + xor ecx,esi + xor ebp,edi + ror ecx,2 + add ebp,edx + add edx,DWORD [20+esp] + add ebp,ecx + mov esi,edx + mov ecx,DWORD [24+esp] + ror edx,14 + mov edi,DWORD [28+esp] + xor edx,esi + mov ebx,DWORD [92+esp] + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD [20+esp],esi + xor edx,esi + add ebx,DWORD [esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD [8+esp] + xor esi,ebp + mov DWORD [4+esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,[3248222580+edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD [36+esp] + ror esi,2 + add eax,edx + add edx,DWORD [16+esp] + add eax,esi + mov esi,DWORD [88+esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD [32+esp] + shr edi,10 + add ebx,DWORD [68+esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD [20+esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [24+esp] + xor edx,ecx + mov DWORD [32+esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [16+esp],ecx + xor edx,ecx + add ebx,DWORD [28+esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD [4+esp] + xor ecx,eax + mov DWORD [esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,[3835390401+edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD [40+esp] + ror ecx,2 + add ebp,edx + add edx,DWORD [12+esp] + add ebp,ecx + mov ecx,DWORD [92+esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD [36+esp] + shr edi,10 + add ebx,DWORD [72+esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD [16+esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [20+esp] + xor edx,esi + mov DWORD [36+esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD [12+esp],esi + xor edx,esi + add ebx,DWORD [24+esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD [esp] + xor esi,ebp + mov DWORD [28+esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,[4022224774+edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD [44+esp] + ror esi,2 + add eax,edx + add edx,DWORD [8+esp] + add eax,esi + mov esi,DWORD [32+esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD [40+esp] + shr edi,10 + add ebx,DWORD [76+esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD [12+esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [16+esp] + xor edx,ecx + mov DWORD [40+esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [8+esp],ecx + xor edx,ecx + add ebx,DWORD [20+esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD [28+esp] + xor ecx,eax + mov DWORD [24+esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,[264347078+edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD [48+esp] + ror ecx,2 + add ebp,edx + add edx,DWORD [4+esp] + add ebp,ecx + mov ecx,DWORD [36+esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD [44+esp] + shr edi,10 + add ebx,DWORD [80+esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD [8+esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [12+esp] + xor edx,esi + mov DWORD [44+esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD [4+esp],esi + xor edx,esi + add ebx,DWORD [16+esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD [24+esp] + xor esi,ebp + mov DWORD [20+esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,[604807628+edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD [52+esp] + ror esi,2 + add eax,edx + add edx,DWORD [esp] + add eax,esi + mov esi,DWORD [40+esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD [48+esp] + shr edi,10 + add ebx,DWORD [84+esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD [4+esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [8+esp] + xor edx,ecx + mov DWORD [48+esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [esp],ecx + xor edx,ecx + add ebx,DWORD [12+esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD [20+esp] + xor ecx,eax + mov DWORD [16+esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,[770255983+edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD [56+esp] + ror ecx,2 + add ebp,edx + add edx,DWORD [28+esp] + add ebp,ecx + mov ecx,DWORD [44+esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD [52+esp] + shr edi,10 + add ebx,DWORD [88+esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD [esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [4+esp] + xor edx,esi + mov DWORD [52+esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD [28+esp],esi + xor edx,esi + add ebx,DWORD [8+esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD [16+esp] + xor esi,ebp + mov DWORD [12+esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,[1249150122+edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD [60+esp] + ror esi,2 + add eax,edx + add edx,DWORD [24+esp] + add eax,esi + mov esi,DWORD [48+esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD [56+esp] + shr edi,10 + add ebx,DWORD [92+esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD [28+esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [esp] + xor edx,ecx + mov DWORD [56+esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [24+esp],ecx + xor edx,ecx + add ebx,DWORD [4+esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD [12+esp] + xor ecx,eax + mov DWORD [8+esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,[1555081692+edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD [64+esp] + ror ecx,2 + add ebp,edx + add edx,DWORD [20+esp] + add ebp,ecx + mov ecx,DWORD [52+esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD [60+esp] + shr edi,10 + add ebx,DWORD [32+esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD [24+esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [28+esp] + xor edx,esi + mov DWORD [60+esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD [20+esp],esi + xor edx,esi + add ebx,DWORD [esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD [8+esp] + xor esi,ebp + mov DWORD [4+esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,[1996064986+edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD [68+esp] + ror esi,2 + add eax,edx + add edx,DWORD [16+esp] + add eax,esi + mov esi,DWORD [56+esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD [64+esp] + shr edi,10 + add ebx,DWORD [36+esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD [20+esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [24+esp] + xor edx,ecx + mov DWORD [64+esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [16+esp],ecx + xor edx,ecx + add ebx,DWORD [28+esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD [4+esp] + xor ecx,eax + mov DWORD [esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,[2554220882+edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD [72+esp] + ror ecx,2 + add ebp,edx + add edx,DWORD [12+esp] + add ebp,ecx + mov ecx,DWORD [60+esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD [68+esp] + shr edi,10 + add ebx,DWORD [40+esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD [16+esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [20+esp] + xor edx,esi + mov DWORD [68+esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD [12+esp],esi + xor edx,esi + add ebx,DWORD [24+esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD [esp] + xor esi,ebp + mov DWORD [28+esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,[2821834349+edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD [76+esp] + ror esi,2 + add eax,edx + add edx,DWORD [8+esp] + add eax,esi + mov esi,DWORD [64+esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD [72+esp] + shr edi,10 + add ebx,DWORD [44+esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD [12+esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [16+esp] + xor edx,ecx + mov DWORD [72+esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [8+esp],ecx + xor edx,ecx + add ebx,DWORD [20+esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD [28+esp] + xor ecx,eax + mov DWORD [24+esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,[2952996808+edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD [80+esp] + ror ecx,2 + add ebp,edx + add edx,DWORD [4+esp] + add ebp,ecx + mov ecx,DWORD [68+esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD [76+esp] + shr edi,10 + add ebx,DWORD [48+esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD [8+esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [12+esp] + xor edx,esi + mov DWORD [76+esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD [4+esp],esi + xor edx,esi + add ebx,DWORD [16+esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD [24+esp] + xor esi,ebp + mov DWORD [20+esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,[3210313671+edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD [84+esp] + ror esi,2 + add eax,edx + add edx,DWORD [esp] + add eax,esi + mov esi,DWORD [72+esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD [80+esp] + shr edi,10 + add ebx,DWORD [52+esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD [4+esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [8+esp] + xor edx,ecx + mov DWORD [80+esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [esp],ecx + xor edx,ecx + add ebx,DWORD [12+esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD [20+esp] + xor ecx,eax + mov DWORD [16+esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,[3336571891+edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD [88+esp] + ror ecx,2 + add ebp,edx + add edx,DWORD [28+esp] + add ebp,ecx + mov ecx,DWORD [76+esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD [84+esp] + shr edi,10 + add ebx,DWORD [56+esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD [esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [4+esp] + xor edx,esi + mov DWORD [84+esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD [28+esp],esi + xor edx,esi + add ebx,DWORD [8+esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD [16+esp] + xor esi,ebp + mov DWORD [12+esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,[3584528711+edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD [92+esp] + ror esi,2 + add eax,edx + add edx,DWORD [24+esp] + add eax,esi + mov esi,DWORD [80+esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD [88+esp] + shr edi,10 + add ebx,DWORD [60+esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD [28+esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [esp] + xor edx,ecx + mov DWORD [88+esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [24+esp],ecx + xor edx,ecx + add ebx,DWORD [4+esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD [12+esp] + xor ecx,eax + mov DWORD [8+esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,[113926993+edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD [32+esp] + ror ecx,2 + add ebp,edx + add edx,DWORD [20+esp] + add ebp,ecx + mov ecx,DWORD [84+esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD [92+esp] + shr edi,10 + add ebx,DWORD [64+esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD [24+esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [28+esp] + xor edx,esi + mov DWORD [92+esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD [20+esp],esi + xor edx,esi + add ebx,DWORD [esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD [8+esp] + xor esi,ebp + mov DWORD [4+esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,[338241895+edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD [36+esp] + ror esi,2 + add eax,edx + add edx,DWORD [16+esp] + add eax,esi + mov esi,DWORD [88+esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD [32+esp] + shr edi,10 + add ebx,DWORD [68+esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD [20+esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [24+esp] + xor edx,ecx + mov DWORD [32+esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [16+esp],ecx + xor edx,ecx + add ebx,DWORD [28+esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD [4+esp] + xor ecx,eax + mov DWORD [esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,[666307205+edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD [40+esp] + ror ecx,2 + add ebp,edx + add edx,DWORD [12+esp] + add ebp,ecx + mov ecx,DWORD [92+esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD [36+esp] + shr edi,10 + add ebx,DWORD [72+esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD [16+esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [20+esp] + xor edx,esi + mov DWORD [36+esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD [12+esp],esi + xor edx,esi + add ebx,DWORD [24+esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD [esp] + xor esi,ebp + mov DWORD [28+esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,[773529912+edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD [44+esp] + ror esi,2 + add eax,edx + add edx,DWORD [8+esp] + add eax,esi + mov esi,DWORD [32+esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD [40+esp] + shr edi,10 + add ebx,DWORD [76+esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD [12+esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [16+esp] + xor edx,ecx + mov DWORD [40+esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [8+esp],ecx + xor edx,ecx + add ebx,DWORD [20+esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD [28+esp] + xor ecx,eax + mov DWORD [24+esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,[1294757372+edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD [48+esp] + ror ecx,2 + add ebp,edx + add edx,DWORD [4+esp] + add ebp,ecx + mov ecx,DWORD [36+esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD [44+esp] + shr edi,10 + add ebx,DWORD [80+esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD [8+esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [12+esp] + xor edx,esi + mov DWORD [44+esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD [4+esp],esi + xor edx,esi + add ebx,DWORD [16+esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD [24+esp] + xor esi,ebp + mov DWORD [20+esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,[1396182291+edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD [52+esp] + ror esi,2 + add eax,edx + add edx,DWORD [esp] + add eax,esi + mov esi,DWORD [40+esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD [48+esp] + shr edi,10 + add ebx,DWORD [84+esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD [4+esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [8+esp] + xor edx,ecx + mov DWORD [48+esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [esp],ecx + xor edx,ecx + add ebx,DWORD [12+esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD [20+esp] + xor ecx,eax + mov DWORD [16+esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,[1695183700+edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD [56+esp] + ror ecx,2 + add ebp,edx + add edx,DWORD [28+esp] + add ebp,ecx + mov ecx,DWORD [44+esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD [52+esp] + shr edi,10 + add ebx,DWORD [88+esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD [esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [4+esp] + xor edx,esi + mov DWORD [52+esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD [28+esp],esi + xor edx,esi + add ebx,DWORD [8+esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD [16+esp] + xor esi,ebp + mov DWORD [12+esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,[1986661051+edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD [60+esp] + ror esi,2 + add eax,edx + add edx,DWORD [24+esp] + add eax,esi + mov esi,DWORD [48+esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD [56+esp] + shr edi,10 + add ebx,DWORD [92+esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD [28+esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [esp] + xor edx,ecx + mov DWORD [56+esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [24+esp],ecx + xor edx,ecx + add ebx,DWORD [4+esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD [12+esp] + xor ecx,eax + mov DWORD [8+esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,[2177026350+edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD [64+esp] + ror ecx,2 + add ebp,edx + add edx,DWORD [20+esp] + add ebp,ecx + mov ecx,DWORD [52+esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD [60+esp] + shr edi,10 + add ebx,DWORD [32+esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD [24+esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [28+esp] + xor edx,esi + mov DWORD [60+esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD [20+esp],esi + xor edx,esi + add ebx,DWORD [esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD [8+esp] + xor esi,ebp + mov DWORD [4+esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,[2456956037+edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD [68+esp] + ror esi,2 + add eax,edx + add edx,DWORD [16+esp] + add eax,esi + mov esi,DWORD [56+esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD [64+esp] + shr edi,10 + add ebx,DWORD [36+esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD [20+esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [24+esp] + xor edx,ecx + mov DWORD [64+esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [16+esp],ecx + xor edx,ecx + add ebx,DWORD [28+esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD [4+esp] + xor ecx,eax + mov DWORD [esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,[2730485921+edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD [72+esp] + ror ecx,2 + add ebp,edx + add edx,DWORD [12+esp] + add ebp,ecx + mov ecx,DWORD [60+esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD [68+esp] + shr edi,10 + add ebx,DWORD [40+esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD [16+esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [20+esp] + xor edx,esi + mov DWORD [68+esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD [12+esp],esi + xor edx,esi + add ebx,DWORD [24+esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD [esp] + xor esi,ebp + mov DWORD [28+esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,[2820302411+edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD [76+esp] + ror esi,2 + add eax,edx + add edx,DWORD [8+esp] + add eax,esi + mov esi,DWORD [64+esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD [72+esp] + shr edi,10 + add ebx,DWORD [44+esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD [12+esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [16+esp] + xor edx,ecx + mov DWORD [72+esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [8+esp],ecx + xor edx,ecx + add ebx,DWORD [20+esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD [28+esp] + xor ecx,eax + mov DWORD [24+esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,[3259730800+edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD [80+esp] + ror ecx,2 + add ebp,edx + add edx,DWORD [4+esp] + add ebp,ecx + mov ecx,DWORD [68+esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD [76+esp] + shr edi,10 + add ebx,DWORD [48+esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD [8+esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [12+esp] + xor edx,esi + mov DWORD [76+esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD [4+esp],esi + xor edx,esi + add ebx,DWORD [16+esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD [24+esp] + xor esi,ebp + mov DWORD [20+esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,[3345764771+edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD [84+esp] + ror esi,2 + add eax,edx + add edx,DWORD [esp] + add eax,esi + mov esi,DWORD [72+esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD [80+esp] + shr edi,10 + add ebx,DWORD [52+esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD [4+esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [8+esp] + xor edx,ecx + mov DWORD [80+esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [esp],ecx + xor edx,ecx + add ebx,DWORD [12+esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD [20+esp] + xor ecx,eax + mov DWORD [16+esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,[3516065817+edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD [88+esp] + ror ecx,2 + add ebp,edx + add edx,DWORD [28+esp] + add ebp,ecx + mov ecx,DWORD [76+esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD [84+esp] + shr edi,10 + add ebx,DWORD [56+esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD [esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [4+esp] + xor edx,esi + mov DWORD [84+esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD [28+esp],esi + xor edx,esi + add ebx,DWORD [8+esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD [16+esp] + xor esi,ebp + mov DWORD [12+esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,[3600352804+edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD [92+esp] + ror esi,2 + add eax,edx + add edx,DWORD [24+esp] + add eax,esi + mov esi,DWORD [80+esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD [88+esp] + shr edi,10 + add ebx,DWORD [60+esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD [28+esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [esp] + xor edx,ecx + mov DWORD [88+esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [24+esp],ecx + xor edx,ecx + add ebx,DWORD [4+esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD [12+esp] + xor ecx,eax + mov DWORD [8+esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,[4094571909+edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD [32+esp] + ror ecx,2 + add ebp,edx + add edx,DWORD [20+esp] + add ebp,ecx + mov ecx,DWORD [84+esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD [92+esp] + shr edi,10 + add ebx,DWORD [64+esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD [24+esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [28+esp] + xor edx,esi + mov DWORD [92+esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD [20+esp],esi + xor edx,esi + add ebx,DWORD [esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD [8+esp] + xor esi,ebp + mov DWORD [4+esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,[275423344+edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD [36+esp] + ror esi,2 + add eax,edx + add edx,DWORD [16+esp] + add eax,esi + mov esi,DWORD [88+esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD [32+esp] + shr edi,10 + add ebx,DWORD [68+esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD [20+esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [24+esp] + xor edx,ecx + mov DWORD [32+esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [16+esp],ecx + xor edx,ecx + add ebx,DWORD [28+esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD [4+esp] + xor ecx,eax + mov DWORD [esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,[430227734+edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD [40+esp] + ror ecx,2 + add ebp,edx + add edx,DWORD [12+esp] + add ebp,ecx + mov ecx,DWORD [92+esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD [36+esp] + shr edi,10 + add ebx,DWORD [72+esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD [16+esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [20+esp] + xor edx,esi + mov DWORD [36+esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD [12+esp],esi + xor edx,esi + add ebx,DWORD [24+esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD [esp] + xor esi,ebp + mov DWORD [28+esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,[506948616+edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD [44+esp] + ror esi,2 + add eax,edx + add edx,DWORD [8+esp] + add eax,esi + mov esi,DWORD [32+esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD [40+esp] + shr edi,10 + add ebx,DWORD [76+esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD [12+esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [16+esp] + xor edx,ecx + mov DWORD [40+esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [8+esp],ecx + xor edx,ecx + add ebx,DWORD [20+esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD [28+esp] + xor ecx,eax + mov DWORD [24+esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,[659060556+edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD [48+esp] + ror ecx,2 + add ebp,edx + add edx,DWORD [4+esp] + add ebp,ecx + mov ecx,DWORD [36+esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD [44+esp] + shr edi,10 + add ebx,DWORD [80+esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD [8+esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [12+esp] + xor edx,esi + mov DWORD [44+esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD [4+esp],esi + xor edx,esi + add ebx,DWORD [16+esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD [24+esp] + xor esi,ebp + mov DWORD [20+esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,[883997877+edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD [52+esp] + ror esi,2 + add eax,edx + add edx,DWORD [esp] + add eax,esi + mov esi,DWORD [40+esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD [48+esp] + shr edi,10 + add ebx,DWORD [84+esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD [4+esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [8+esp] + xor edx,ecx + mov DWORD [48+esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [esp],ecx + xor edx,ecx + add ebx,DWORD [12+esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD [20+esp] + xor ecx,eax + mov DWORD [16+esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,[958139571+edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD [56+esp] + ror ecx,2 + add ebp,edx + add edx,DWORD [28+esp] + add ebp,ecx + mov ecx,DWORD [44+esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD [52+esp] + shr edi,10 + add ebx,DWORD [88+esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD [esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [4+esp] + xor edx,esi + mov DWORD [52+esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD [28+esp],esi + xor edx,esi + add ebx,DWORD [8+esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD [16+esp] + xor esi,ebp + mov DWORD [12+esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,[1322822218+edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD [60+esp] + ror esi,2 + add eax,edx + add edx,DWORD [24+esp] + add eax,esi + mov esi,DWORD [48+esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD [56+esp] + shr edi,10 + add ebx,DWORD [92+esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD [28+esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [esp] + xor edx,ecx + mov DWORD [56+esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [24+esp],ecx + xor edx,ecx + add ebx,DWORD [4+esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD [12+esp] + xor ecx,eax + mov DWORD [8+esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,[1537002063+edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD [64+esp] + ror ecx,2 + add ebp,edx + add edx,DWORD [20+esp] + add ebp,ecx + mov ecx,DWORD [52+esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD [60+esp] + shr edi,10 + add ebx,DWORD [32+esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD [24+esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [28+esp] + xor edx,esi + mov DWORD [60+esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD [20+esp],esi + xor edx,esi + add ebx,DWORD [esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD [8+esp] + xor esi,ebp + mov DWORD [4+esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,[1747873779+edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD [68+esp] + ror esi,2 + add eax,edx + add edx,DWORD [16+esp] + add eax,esi + mov esi,DWORD [56+esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD [64+esp] + shr edi,10 + add ebx,DWORD [36+esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD [20+esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [24+esp] + xor edx,ecx + mov DWORD [64+esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [16+esp],ecx + xor edx,ecx + add ebx,DWORD [28+esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD [4+esp] + xor ecx,eax + mov DWORD [esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,[1955562222+edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD [72+esp] + ror ecx,2 + add ebp,edx + add edx,DWORD [12+esp] + add ebp,ecx + mov ecx,DWORD [60+esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD [68+esp] + shr edi,10 + add ebx,DWORD [40+esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD [16+esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [20+esp] + xor edx,esi + mov DWORD [68+esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD [12+esp],esi + xor edx,esi + add ebx,DWORD [24+esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD [esp] + xor esi,ebp + mov DWORD [28+esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,[2024104815+edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD [76+esp] + ror esi,2 + add eax,edx + add edx,DWORD [8+esp] + add eax,esi + mov esi,DWORD [64+esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD [72+esp] + shr edi,10 + add ebx,DWORD [44+esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD [12+esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [16+esp] + xor edx,ecx + mov DWORD [72+esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [8+esp],ecx + xor edx,ecx + add ebx,DWORD [20+esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD [28+esp] + xor ecx,eax + mov DWORD [24+esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,[2227730452+edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD [80+esp] + ror ecx,2 + add ebp,edx + add edx,DWORD [4+esp] + add ebp,ecx + mov ecx,DWORD [68+esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD [76+esp] + shr edi,10 + add ebx,DWORD [48+esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD [8+esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [12+esp] + xor edx,esi + mov DWORD [76+esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD [4+esp],esi + xor edx,esi + add ebx,DWORD [16+esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD [24+esp] + xor esi,ebp + mov DWORD [20+esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,[2361852424+edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD [84+esp] + ror esi,2 + add eax,edx + add edx,DWORD [esp] + add eax,esi + mov esi,DWORD [72+esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD [80+esp] + shr edi,10 + add ebx,DWORD [52+esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD [4+esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [8+esp] + xor edx,ecx + mov DWORD [80+esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [esp],ecx + xor edx,ecx + add ebx,DWORD [12+esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD [20+esp] + xor ecx,eax + mov DWORD [16+esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,[2428436474+edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD [88+esp] + ror ecx,2 + add ebp,edx + add edx,DWORD [28+esp] + add ebp,ecx + mov ecx,DWORD [76+esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD [84+esp] + shr edi,10 + add ebx,DWORD [56+esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD [esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [4+esp] + xor edx,esi + mov DWORD [84+esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD [28+esp],esi + xor edx,esi + add ebx,DWORD [8+esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD [16+esp] + xor esi,ebp + mov DWORD [12+esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,[2756734187+edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD [92+esp] + ror esi,2 + add eax,edx + add edx,DWORD [24+esp] + add eax,esi + mov esi,DWORD [80+esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD [88+esp] + shr edi,10 + add ebx,DWORD [60+esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD [28+esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [esp] + xor edx,ecx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [24+esp],ecx + xor edx,ecx + add ebx,DWORD [4+esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD [12+esp] + xor ecx,eax + mov DWORD [8+esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,[3204031479+edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD [32+esp] + ror ecx,2 + add ebp,edx + add edx,DWORD [20+esp] + add ebp,ecx + mov ecx,DWORD [84+esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD [92+esp] + shr edi,10 + add ebx,DWORD [64+esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD [24+esp] + ror edx,14 + add ebx,edi + mov edi,DWORD [28+esp] + xor edx,esi + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD [20+esp],esi + xor edx,esi + add ebx,DWORD [esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD [8+esp] + xor esi,ebp + mov DWORD [4+esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,[3329325298+edx*1+ebx] + xor esi,ecx + xor eax,edi + ror esi,2 + add eax,edx + add edx,DWORD [16+esp] + add eax,esi + mov esi,DWORD [96+esp] + xor ebp,edi + mov ecx,DWORD [12+esp] + add eax,DWORD [esi] + add ebp,DWORD [4+esi] + add edi,DWORD [8+esi] + add ecx,DWORD [12+esi] + mov DWORD [esi],eax + mov DWORD [4+esi],ebp + mov DWORD [8+esi],edi + mov DWORD [12+esi],ecx + mov DWORD [4+esp],ebp + xor ebp,edi + mov DWORD [8+esp],edi + mov DWORD [12+esp],ecx + mov edi,DWORD [20+esp] + mov ebx,DWORD [24+esp] + mov ecx,DWORD [28+esp] + add edx,DWORD [16+esi] + add edi,DWORD [20+esi] + add ebx,DWORD [24+esi] + add ecx,DWORD [28+esi] + mov DWORD [16+esi],edx + mov DWORD [20+esi],edi + mov DWORD [24+esi],ebx + mov DWORD [28+esi],ecx + mov DWORD [20+esp],edi + mov edi,DWORD [100+esp] + mov DWORD [24+esp],ebx + mov DWORD [28+esp],ecx + cmp edi,DWORD [104+esp] + jb NEAR L$009grand_loop + mov esp,DWORD [108+esp] + pop edi + pop esi + pop ebx + pop ebp + ret +align 32 +L$004shaext: + sub esp,32 + movdqu xmm1,[esi] + lea ebp,[128+ebp] + movdqu xmm2,[16+esi] + movdqa xmm7,[128+ebp] + pshufd xmm0,xmm1,27 + pshufd xmm1,xmm1,177 + pshufd xmm2,xmm2,27 +db 102,15,58,15,202,8 + punpcklqdq xmm2,xmm0 + jmp NEAR L$010loop_shaext +align 16 +L$010loop_shaext: + movdqu xmm3,[edi] + movdqu xmm4,[16+edi] + movdqu xmm5,[32+edi] +db 102,15,56,0,223 + movdqu xmm6,[48+edi] + movdqa [16+esp],xmm2 + movdqa xmm0,[ebp-128] + paddd xmm0,xmm3 +db 102,15,56,0,231 +db 15,56,203,209 + pshufd xmm0,xmm0,14 + nop + movdqa [esp],xmm1 +db 15,56,203,202 + movdqa xmm0,[ebp-112] + paddd xmm0,xmm4 +db 102,15,56,0,239 +db 15,56,203,209 + pshufd xmm0,xmm0,14 + lea edi,[64+edi] +db 15,56,204,220 +db 15,56,203,202 + movdqa xmm0,[ebp-96] + paddd xmm0,xmm5 +db 102,15,56,0,247 +db 15,56,203,209 + pshufd xmm0,xmm0,14 + movdqa xmm7,xmm6 +db 102,15,58,15,253,4 + nop + paddd xmm3,xmm7 +db 15,56,204,229 +db 15,56,203,202 + movdqa xmm0,[ebp-80] + paddd xmm0,xmm6 +db 15,56,205,222 +db 15,56,203,209 + pshufd xmm0,xmm0,14 + movdqa xmm7,xmm3 +db 102,15,58,15,254,4 + nop + paddd xmm4,xmm7 +db 15,56,204,238 +db 15,56,203,202 + movdqa xmm0,[ebp-64] + paddd xmm0,xmm3 +db 15,56,205,227 +db 15,56,203,209 + pshufd xmm0,xmm0,14 + movdqa xmm7,xmm4 +db 102,15,58,15,251,4 + nop + paddd xmm5,xmm7 +db 15,56,204,243 +db 15,56,203,202 + movdqa xmm0,[ebp-48] + paddd xmm0,xmm4 +db 15,56,205,236 +db 15,56,203,209 + pshufd xmm0,xmm0,14 + movdqa xmm7,xmm5 +db 102,15,58,15,252,4 + nop + paddd xmm6,xmm7 +db 15,56,204,220 +db 15,56,203,202 + movdqa xmm0,[ebp-32] + paddd xmm0,xmm5 +db 15,56,205,245 +db 15,56,203,209 + pshufd xmm0,xmm0,14 + movdqa xmm7,xmm6 +db 102,15,58,15,253,4 + nop + paddd xmm3,xmm7 +db 15,56,204,229 +db 15,56,203,202 + movdqa xmm0,[ebp-16] + paddd xmm0,xmm6 +db 15,56,205,222 +db 15,56,203,209 + pshufd xmm0,xmm0,14 + movdqa xmm7,xmm3 +db 102,15,58,15,254,4 + nop + paddd xmm4,xmm7 +db 15,56,204,238 +db 15,56,203,202 + movdqa xmm0,[ebp] + paddd xmm0,xmm3 +db 15,56,205,227 +db 15,56,203,209 + pshufd xmm0,xmm0,14 + movdqa xmm7,xmm4 +db 102,15,58,15,251,4 + nop + paddd xmm5,xmm7 +db 15,56,204,243 +db 15,56,203,202 + movdqa xmm0,[16+ebp] + paddd xmm0,xmm4 +db 15,56,205,236 +db 15,56,203,209 + pshufd xmm0,xmm0,14 + movdqa xmm7,xmm5 +db 102,15,58,15,252,4 + nop + paddd xmm6,xmm7 +db 15,56,204,220 +db 15,56,203,202 + movdqa xmm0,[32+ebp] + paddd xmm0,xmm5 +db 15,56,205,245 +db 15,56,203,209 + pshufd xmm0,xmm0,14 + movdqa xmm7,xmm6 +db 102,15,58,15,253,4 + nop + paddd xmm3,xmm7 +db 15,56,204,229 +db 15,56,203,202 + movdqa xmm0,[48+ebp] + paddd xmm0,xmm6 +db 15,56,205,222 +db 15,56,203,209 + pshufd xmm0,xmm0,14 + movdqa xmm7,xmm3 +db 102,15,58,15,254,4 + nop + paddd xmm4,xmm7 +db 15,56,204,238 +db 15,56,203,202 + movdqa xmm0,[64+ebp] + paddd xmm0,xmm3 +db 15,56,205,227 +db 15,56,203,209 + pshufd xmm0,xmm0,14 + movdqa xmm7,xmm4 +db 102,15,58,15,251,4 + nop + paddd xmm5,xmm7 +db 15,56,204,243 +db 15,56,203,202 + movdqa xmm0,[80+ebp] + paddd xmm0,xmm4 +db 15,56,205,236 +db 15,56,203,209 + pshufd xmm0,xmm0,14 + movdqa xmm7,xmm5 +db 102,15,58,15,252,4 +db 15,56,203,202 + paddd xmm6,xmm7 + movdqa xmm0,[96+ebp] + paddd xmm0,xmm5 +db 15,56,203,209 + pshufd xmm0,xmm0,14 +db 15,56,205,245 + movdqa xmm7,[128+ebp] +db 15,56,203,202 + movdqa xmm0,[112+ebp] + paddd xmm0,xmm6 + nop +db 15,56,203,209 + pshufd xmm0,xmm0,14 + cmp eax,edi + nop +db 15,56,203,202 + paddd xmm2,[16+esp] + paddd xmm1,[esp] + jnz NEAR L$010loop_shaext + pshufd xmm2,xmm2,177 + pshufd xmm7,xmm1,27 + pshufd xmm1,xmm1,177 + punpckhqdq xmm1,xmm2 +db 102,15,58,15,215,8 + mov esp,DWORD [44+esp] + movdqu [esi],xmm1 + movdqu [16+esi],xmm2 + pop edi + pop esi + pop ebx + pop ebp + ret +align 32 +L$005SSSE3: + lea esp,[esp-96] + mov eax,DWORD [esi] + mov ebx,DWORD [4+esi] + mov ecx,DWORD [8+esi] + mov edi,DWORD [12+esi] + mov DWORD [4+esp],ebx + xor ebx,ecx + mov DWORD [8+esp],ecx + mov DWORD [12+esp],edi + mov edx,DWORD [16+esi] + mov edi,DWORD [20+esi] + mov ecx,DWORD [24+esi] + mov esi,DWORD [28+esi] + mov DWORD [20+esp],edi + mov edi,DWORD [100+esp] + mov DWORD [24+esp],ecx + mov DWORD [28+esp],esi + movdqa xmm7,[256+ebp] + jmp NEAR L$011grand_ssse3 +align 16 +L$011grand_ssse3: + movdqu xmm0,[edi] + movdqu xmm1,[16+edi] + movdqu xmm2,[32+edi] + movdqu xmm3,[48+edi] + add edi,64 +db 102,15,56,0,199 + mov DWORD [100+esp],edi +db 102,15,56,0,207 + movdqa xmm4,[ebp] +db 102,15,56,0,215 + movdqa xmm5,[16+ebp] + paddd xmm4,xmm0 +db 102,15,56,0,223 + movdqa xmm6,[32+ebp] + paddd xmm5,xmm1 + movdqa xmm7,[48+ebp] + movdqa [32+esp],xmm4 + paddd xmm6,xmm2 + movdqa [48+esp],xmm5 + paddd xmm7,xmm3 + movdqa [64+esp],xmm6 + movdqa [80+esp],xmm7 + jmp NEAR L$012ssse3_00_47 +align 16 +L$012ssse3_00_47: + add ebp,64 + mov ecx,edx + movdqa xmm4,xmm1 + ror edx,14 + mov esi,DWORD [20+esp] + movdqa xmm7,xmm3 + xor edx,ecx + mov edi,DWORD [24+esp] +db 102,15,58,15,224,4 + xor esi,edi + ror edx,5 + and esi,ecx +db 102,15,58,15,250,4 + mov DWORD [16+esp],ecx + xor edx,ecx + xor edi,esi + movdqa xmm5,xmm4 + ror edx,6 + mov ecx,eax + movdqa xmm6,xmm4 + add edx,edi + mov edi,DWORD [4+esp] + psrld xmm4,3 + mov esi,eax + ror ecx,9 + paddd xmm0,xmm7 + mov DWORD [esp],eax + xor ecx,eax + psrld xmm6,7 + xor eax,edi + add edx,DWORD [28+esp] + ror ecx,11 + and ebx,eax + pshufd xmm7,xmm3,250 + xor ecx,esi + add edx,DWORD [32+esp] + pslld xmm5,14 + xor ebx,edi + ror ecx,2 + pxor xmm4,xmm6 + add ebx,edx + add edx,DWORD [12+esp] + psrld xmm6,11 + add ebx,ecx + mov ecx,edx + ror edx,14 + pxor xmm4,xmm5 + mov esi,DWORD [16+esp] + xor edx,ecx + pslld xmm5,11 + mov edi,DWORD [20+esp] + xor esi,edi + ror edx,5 + pxor xmm4,xmm6 + and esi,ecx + mov DWORD [12+esp],ecx + movdqa xmm6,xmm7 + xor edx,ecx + xor edi,esi + ror edx,6 + pxor xmm4,xmm5 + mov ecx,ebx + add edx,edi + psrld xmm7,10 + mov edi,DWORD [esp] + mov esi,ebx + ror ecx,9 + paddd xmm0,xmm4 + mov DWORD [28+esp],ebx + xor ecx,ebx + psrlq xmm6,17 + xor ebx,edi + add edx,DWORD [24+esp] + ror ecx,11 + pxor xmm7,xmm6 + and eax,ebx + xor ecx,esi + psrlq xmm6,2 + add edx,DWORD [36+esp] + xor eax,edi + ror ecx,2 + pxor xmm7,xmm6 + add eax,edx + add edx,DWORD [8+esp] + pshufd xmm7,xmm7,128 + add eax,ecx + mov ecx,edx + ror edx,14 + mov esi,DWORD [12+esp] + xor edx,ecx + mov edi,DWORD [16+esp] + xor esi,edi + ror edx,5 + and esi,ecx + psrldq xmm7,8 + mov DWORD [8+esp],ecx + xor edx,ecx + xor edi,esi + paddd xmm0,xmm7 + ror edx,6 + mov ecx,eax + add edx,edi + mov edi,DWORD [28+esp] + mov esi,eax + ror ecx,9 + mov DWORD [24+esp],eax + pshufd xmm7,xmm0,80 + xor ecx,eax + xor eax,edi + add edx,DWORD [20+esp] + movdqa xmm6,xmm7 + ror ecx,11 + psrld xmm7,10 + and ebx,eax + psrlq xmm6,17 + xor ecx,esi + add edx,DWORD [40+esp] + xor ebx,edi + ror ecx,2 + pxor xmm7,xmm6 + add ebx,edx + add edx,DWORD [4+esp] + psrlq xmm6,2 + add ebx,ecx + mov ecx,edx + ror edx,14 + pxor xmm7,xmm6 + mov esi,DWORD [8+esp] + xor edx,ecx + mov edi,DWORD [12+esp] + pshufd xmm7,xmm7,8 + xor esi,edi + ror edx,5 + movdqa xmm6,[ebp] + and esi,ecx + mov DWORD [4+esp],ecx + pslldq xmm7,8 + xor edx,ecx + xor edi,esi + ror edx,6 + mov ecx,ebx + add edx,edi + mov edi,DWORD [24+esp] + mov esi,ebx + ror ecx,9 + paddd xmm0,xmm7 + mov DWORD [20+esp],ebx + xor ecx,ebx + xor ebx,edi + add edx,DWORD [16+esp] + paddd xmm6,xmm0 + ror ecx,11 + and eax,ebx + xor ecx,esi + add edx,DWORD [44+esp] + xor eax,edi + ror ecx,2 + add eax,edx + add edx,DWORD [esp] + add eax,ecx + movdqa [32+esp],xmm6 + mov ecx,edx + movdqa xmm4,xmm2 + ror edx,14 + mov esi,DWORD [4+esp] + movdqa xmm7,xmm0 + xor edx,ecx + mov edi,DWORD [8+esp] +db 102,15,58,15,225,4 + xor esi,edi + ror edx,5 + and esi,ecx +db 102,15,58,15,251,4 + mov DWORD [esp],ecx + xor edx,ecx + xor edi,esi + movdqa xmm5,xmm4 + ror edx,6 + mov ecx,eax + movdqa xmm6,xmm4 + add edx,edi + mov edi,DWORD [20+esp] + psrld xmm4,3 + mov esi,eax + ror ecx,9 + paddd xmm1,xmm7 + mov DWORD [16+esp],eax + xor ecx,eax + psrld xmm6,7 + xor eax,edi + add edx,DWORD [12+esp] + ror ecx,11 + and ebx,eax + pshufd xmm7,xmm0,250 + xor ecx,esi + add edx,DWORD [48+esp] + pslld xmm5,14 + xor ebx,edi + ror ecx,2 + pxor xmm4,xmm6 + add ebx,edx + add edx,DWORD [28+esp] + psrld xmm6,11 + add ebx,ecx + mov ecx,edx + ror edx,14 + pxor xmm4,xmm5 + mov esi,DWORD [esp] + xor edx,ecx + pslld xmm5,11 + mov edi,DWORD [4+esp] + xor esi,edi + ror edx,5 + pxor xmm4,xmm6 + and esi,ecx + mov DWORD [28+esp],ecx + movdqa xmm6,xmm7 + xor edx,ecx + xor edi,esi + ror edx,6 + pxor xmm4,xmm5 + mov ecx,ebx + add edx,edi + psrld xmm7,10 + mov edi,DWORD [16+esp] + mov esi,ebx + ror ecx,9 + paddd xmm1,xmm4 + mov DWORD [12+esp],ebx + xor ecx,ebx + psrlq xmm6,17 + xor ebx,edi + add edx,DWORD [8+esp] + ror ecx,11 + pxor xmm7,xmm6 + and eax,ebx + xor ecx,esi + psrlq xmm6,2 + add edx,DWORD [52+esp] + xor eax,edi + ror ecx,2 + pxor xmm7,xmm6 + add eax,edx + add edx,DWORD [24+esp] + pshufd xmm7,xmm7,128 + add eax,ecx + mov ecx,edx + ror edx,14 + mov esi,DWORD [28+esp] + xor edx,ecx + mov edi,DWORD [esp] + xor esi,edi + ror edx,5 + and esi,ecx + psrldq xmm7,8 + mov DWORD [24+esp],ecx + xor edx,ecx + xor edi,esi + paddd xmm1,xmm7 + ror edx,6 + mov ecx,eax + add edx,edi + mov edi,DWORD [12+esp] + mov esi,eax + ror ecx,9 + mov DWORD [8+esp],eax + pshufd xmm7,xmm1,80 + xor ecx,eax + xor eax,edi + add edx,DWORD [4+esp] + movdqa xmm6,xmm7 + ror ecx,11 + psrld xmm7,10 + and ebx,eax + psrlq xmm6,17 + xor ecx,esi + add edx,DWORD [56+esp] + xor ebx,edi + ror ecx,2 + pxor xmm7,xmm6 + add ebx,edx + add edx,DWORD [20+esp] + psrlq xmm6,2 + add ebx,ecx + mov ecx,edx + ror edx,14 + pxor xmm7,xmm6 + mov esi,DWORD [24+esp] + xor edx,ecx + mov edi,DWORD [28+esp] + pshufd xmm7,xmm7,8 + xor esi,edi + ror edx,5 + movdqa xmm6,[16+ebp] + and esi,ecx + mov DWORD [20+esp],ecx + pslldq xmm7,8 + xor edx,ecx + xor edi,esi + ror edx,6 + mov ecx,ebx + add edx,edi + mov edi,DWORD [8+esp] + mov esi,ebx + ror ecx,9 + paddd xmm1,xmm7 + mov DWORD [4+esp],ebx + xor ecx,ebx + xor ebx,edi + add edx,DWORD [esp] + paddd xmm6,xmm1 + ror ecx,11 + and eax,ebx + xor ecx,esi + add edx,DWORD [60+esp] + xor eax,edi + ror ecx,2 + add eax,edx + add edx,DWORD [16+esp] + add eax,ecx + movdqa [48+esp],xmm6 + mov ecx,edx + movdqa xmm4,xmm3 + ror edx,14 + mov esi,DWORD [20+esp] + movdqa xmm7,xmm1 + xor edx,ecx + mov edi,DWORD [24+esp] +db 102,15,58,15,226,4 + xor esi,edi + ror edx,5 + and esi,ecx +db 102,15,58,15,248,4 + mov DWORD [16+esp],ecx + xor edx,ecx + xor edi,esi + movdqa xmm5,xmm4 + ror edx,6 + mov ecx,eax + movdqa xmm6,xmm4 + add edx,edi + mov edi,DWORD [4+esp] + psrld xmm4,3 + mov esi,eax + ror ecx,9 + paddd xmm2,xmm7 + mov DWORD [esp],eax + xor ecx,eax + psrld xmm6,7 + xor eax,edi + add edx,DWORD [28+esp] + ror ecx,11 + and ebx,eax + pshufd xmm7,xmm1,250 + xor ecx,esi + add edx,DWORD [64+esp] + pslld xmm5,14 + xor ebx,edi + ror ecx,2 + pxor xmm4,xmm6 + add ebx,edx + add edx,DWORD [12+esp] + psrld xmm6,11 + add ebx,ecx + mov ecx,edx + ror edx,14 + pxor xmm4,xmm5 + mov esi,DWORD [16+esp] + xor edx,ecx + pslld xmm5,11 + mov edi,DWORD [20+esp] + xor esi,edi + ror edx,5 + pxor xmm4,xmm6 + and esi,ecx + mov DWORD [12+esp],ecx + movdqa xmm6,xmm7 + xor edx,ecx + xor edi,esi + ror edx,6 + pxor xmm4,xmm5 + mov ecx,ebx + add edx,edi + psrld xmm7,10 + mov edi,DWORD [esp] + mov esi,ebx + ror ecx,9 + paddd xmm2,xmm4 + mov DWORD [28+esp],ebx + xor ecx,ebx + psrlq xmm6,17 + xor ebx,edi + add edx,DWORD [24+esp] + ror ecx,11 + pxor xmm7,xmm6 + and eax,ebx + xor ecx,esi + psrlq xmm6,2 + add edx,DWORD [68+esp] + xor eax,edi + ror ecx,2 + pxor xmm7,xmm6 + add eax,edx + add edx,DWORD [8+esp] + pshufd xmm7,xmm7,128 + add eax,ecx + mov ecx,edx + ror edx,14 + mov esi,DWORD [12+esp] + xor edx,ecx + mov edi,DWORD [16+esp] + xor esi,edi + ror edx,5 + and esi,ecx + psrldq xmm7,8 + mov DWORD [8+esp],ecx + xor edx,ecx + xor edi,esi + paddd xmm2,xmm7 + ror edx,6 + mov ecx,eax + add edx,edi + mov edi,DWORD [28+esp] + mov esi,eax + ror ecx,9 + mov DWORD [24+esp],eax + pshufd xmm7,xmm2,80 + xor ecx,eax + xor eax,edi + add edx,DWORD [20+esp] + movdqa xmm6,xmm7 + ror ecx,11 + psrld xmm7,10 + and ebx,eax + psrlq xmm6,17 + xor ecx,esi + add edx,DWORD [72+esp] + xor ebx,edi + ror ecx,2 + pxor xmm7,xmm6 + add ebx,edx + add edx,DWORD [4+esp] + psrlq xmm6,2 + add ebx,ecx + mov ecx,edx + ror edx,14 + pxor xmm7,xmm6 + mov esi,DWORD [8+esp] + xor edx,ecx + mov edi,DWORD [12+esp] + pshufd xmm7,xmm7,8 + xor esi,edi + ror edx,5 + movdqa xmm6,[32+ebp] + and esi,ecx + mov DWORD [4+esp],ecx + pslldq xmm7,8 + xor edx,ecx + xor edi,esi + ror edx,6 + mov ecx,ebx + add edx,edi + mov edi,DWORD [24+esp] + mov esi,ebx + ror ecx,9 + paddd xmm2,xmm7 + mov DWORD [20+esp],ebx + xor ecx,ebx + xor ebx,edi + add edx,DWORD [16+esp] + paddd xmm6,xmm2 + ror ecx,11 + and eax,ebx + xor ecx,esi + add edx,DWORD [76+esp] + xor eax,edi + ror ecx,2 + add eax,edx + add edx,DWORD [esp] + add eax,ecx + movdqa [64+esp],xmm6 + mov ecx,edx + movdqa xmm4,xmm0 + ror edx,14 + mov esi,DWORD [4+esp] + movdqa xmm7,xmm2 + xor edx,ecx + mov edi,DWORD [8+esp] +db 102,15,58,15,227,4 + xor esi,edi + ror edx,5 + and esi,ecx +db 102,15,58,15,249,4 + mov DWORD [esp],ecx + xor edx,ecx + xor edi,esi + movdqa xmm5,xmm4 + ror edx,6 + mov ecx,eax + movdqa xmm6,xmm4 + add edx,edi + mov edi,DWORD [20+esp] + psrld xmm4,3 + mov esi,eax + ror ecx,9 + paddd xmm3,xmm7 + mov DWORD [16+esp],eax + xor ecx,eax + psrld xmm6,7 + xor eax,edi + add edx,DWORD [12+esp] + ror ecx,11 + and ebx,eax + pshufd xmm7,xmm2,250 + xor ecx,esi + add edx,DWORD [80+esp] + pslld xmm5,14 + xor ebx,edi + ror ecx,2 + pxor xmm4,xmm6 + add ebx,edx + add edx,DWORD [28+esp] + psrld xmm6,11 + add ebx,ecx + mov ecx,edx + ror edx,14 + pxor xmm4,xmm5 + mov esi,DWORD [esp] + xor edx,ecx + pslld xmm5,11 + mov edi,DWORD [4+esp] + xor esi,edi + ror edx,5 + pxor xmm4,xmm6 + and esi,ecx + mov DWORD [28+esp],ecx + movdqa xmm6,xmm7 + xor edx,ecx + xor edi,esi + ror edx,6 + pxor xmm4,xmm5 + mov ecx,ebx + add edx,edi + psrld xmm7,10 + mov edi,DWORD [16+esp] + mov esi,ebx + ror ecx,9 + paddd xmm3,xmm4 + mov DWORD [12+esp],ebx + xor ecx,ebx + psrlq xmm6,17 + xor ebx,edi + add edx,DWORD [8+esp] + ror ecx,11 + pxor xmm7,xmm6 + and eax,ebx + xor ecx,esi + psrlq xmm6,2 + add edx,DWORD [84+esp] + xor eax,edi + ror ecx,2 + pxor xmm7,xmm6 + add eax,edx + add edx,DWORD [24+esp] + pshufd xmm7,xmm7,128 + add eax,ecx + mov ecx,edx + ror edx,14 + mov esi,DWORD [28+esp] + xor edx,ecx + mov edi,DWORD [esp] + xor esi,edi + ror edx,5 + and esi,ecx + psrldq xmm7,8 + mov DWORD [24+esp],ecx + xor edx,ecx + xor edi,esi + paddd xmm3,xmm7 + ror edx,6 + mov ecx,eax + add edx,edi + mov edi,DWORD [12+esp] + mov esi,eax + ror ecx,9 + mov DWORD [8+esp],eax + pshufd xmm7,xmm3,80 + xor ecx,eax + xor eax,edi + add edx,DWORD [4+esp] + movdqa xmm6,xmm7 + ror ecx,11 + psrld xmm7,10 + and ebx,eax + psrlq xmm6,17 + xor ecx,esi + add edx,DWORD [88+esp] + xor ebx,edi + ror ecx,2 + pxor xmm7,xmm6 + add ebx,edx + add edx,DWORD [20+esp] + psrlq xmm6,2 + add ebx,ecx + mov ecx,edx + ror edx,14 + pxor xmm7,xmm6 + mov esi,DWORD [24+esp] + xor edx,ecx + mov edi,DWORD [28+esp] + pshufd xmm7,xmm7,8 + xor esi,edi + ror edx,5 + movdqa xmm6,[48+ebp] + and esi,ecx + mov DWORD [20+esp],ecx + pslldq xmm7,8 + xor edx,ecx + xor edi,esi + ror edx,6 + mov ecx,ebx + add edx,edi + mov edi,DWORD [8+esp] + mov esi,ebx + ror ecx,9 + paddd xmm3,xmm7 + mov DWORD [4+esp],ebx + xor ecx,ebx + xor ebx,edi + add edx,DWORD [esp] + paddd xmm6,xmm3 + ror ecx,11 + and eax,ebx + xor ecx,esi + add edx,DWORD [92+esp] + xor eax,edi + ror ecx,2 + add eax,edx + add edx,DWORD [16+esp] + add eax,ecx + movdqa [80+esp],xmm6 + cmp DWORD [64+ebp],66051 + jne NEAR L$012ssse3_00_47 + mov ecx,edx + ror edx,14 + mov esi,DWORD [20+esp] + xor edx,ecx + mov edi,DWORD [24+esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [16+esp],ecx + xor edx,ecx + xor edi,esi + ror edx,6 + mov ecx,eax + add edx,edi + mov edi,DWORD [4+esp] + mov esi,eax + ror ecx,9 + mov DWORD [esp],eax + xor ecx,eax + xor eax,edi + add edx,DWORD [28+esp] + ror ecx,11 + and ebx,eax + xor ecx,esi + add edx,DWORD [32+esp] + xor ebx,edi + ror ecx,2 + add ebx,edx + add edx,DWORD [12+esp] + add ebx,ecx + mov ecx,edx + ror edx,14 + mov esi,DWORD [16+esp] + xor edx,ecx + mov edi,DWORD [20+esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [12+esp],ecx + xor edx,ecx + xor edi,esi + ror edx,6 + mov ecx,ebx + add edx,edi + mov edi,DWORD [esp] + mov esi,ebx + ror ecx,9 + mov DWORD [28+esp],ebx + xor ecx,ebx + xor ebx,edi + add edx,DWORD [24+esp] + ror ecx,11 + and eax,ebx + xor ecx,esi + add edx,DWORD [36+esp] + xor eax,edi + ror ecx,2 + add eax,edx + add edx,DWORD [8+esp] + add eax,ecx + mov ecx,edx + ror edx,14 + mov esi,DWORD [12+esp] + xor edx,ecx + mov edi,DWORD [16+esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [8+esp],ecx + xor edx,ecx + xor edi,esi + ror edx,6 + mov ecx,eax + add edx,edi + mov edi,DWORD [28+esp] + mov esi,eax + ror ecx,9 + mov DWORD [24+esp],eax + xor ecx,eax + xor eax,edi + add edx,DWORD [20+esp] + ror ecx,11 + and ebx,eax + xor ecx,esi + add edx,DWORD [40+esp] + xor ebx,edi + ror ecx,2 + add ebx,edx + add edx,DWORD [4+esp] + add ebx,ecx + mov ecx,edx + ror edx,14 + mov esi,DWORD [8+esp] + xor edx,ecx + mov edi,DWORD [12+esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [4+esp],ecx + xor edx,ecx + xor edi,esi + ror edx,6 + mov ecx,ebx + add edx,edi + mov edi,DWORD [24+esp] + mov esi,ebx + ror ecx,9 + mov DWORD [20+esp],ebx + xor ecx,ebx + xor ebx,edi + add edx,DWORD [16+esp] + ror ecx,11 + and eax,ebx + xor ecx,esi + add edx,DWORD [44+esp] + xor eax,edi + ror ecx,2 + add eax,edx + add edx,DWORD [esp] + add eax,ecx + mov ecx,edx + ror edx,14 + mov esi,DWORD [4+esp] + xor edx,ecx + mov edi,DWORD [8+esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [esp],ecx + xor edx,ecx + xor edi,esi + ror edx,6 + mov ecx,eax + add edx,edi + mov edi,DWORD [20+esp] + mov esi,eax + ror ecx,9 + mov DWORD [16+esp],eax + xor ecx,eax + xor eax,edi + add edx,DWORD [12+esp] + ror ecx,11 + and ebx,eax + xor ecx,esi + add edx,DWORD [48+esp] + xor ebx,edi + ror ecx,2 + add ebx,edx + add edx,DWORD [28+esp] + add ebx,ecx + mov ecx,edx + ror edx,14 + mov esi,DWORD [esp] + xor edx,ecx + mov edi,DWORD [4+esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [28+esp],ecx + xor edx,ecx + xor edi,esi + ror edx,6 + mov ecx,ebx + add edx,edi + mov edi,DWORD [16+esp] + mov esi,ebx + ror ecx,9 + mov DWORD [12+esp],ebx + xor ecx,ebx + xor ebx,edi + add edx,DWORD [8+esp] + ror ecx,11 + and eax,ebx + xor ecx,esi + add edx,DWORD [52+esp] + xor eax,edi + ror ecx,2 + add eax,edx + add edx,DWORD [24+esp] + add eax,ecx + mov ecx,edx + ror edx,14 + mov esi,DWORD [28+esp] + xor edx,ecx + mov edi,DWORD [esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [24+esp],ecx + xor edx,ecx + xor edi,esi + ror edx,6 + mov ecx,eax + add edx,edi + mov edi,DWORD [12+esp] + mov esi,eax + ror ecx,9 + mov DWORD [8+esp],eax + xor ecx,eax + xor eax,edi + add edx,DWORD [4+esp] + ror ecx,11 + and ebx,eax + xor ecx,esi + add edx,DWORD [56+esp] + xor ebx,edi + ror ecx,2 + add ebx,edx + add edx,DWORD [20+esp] + add ebx,ecx + mov ecx,edx + ror edx,14 + mov esi,DWORD [24+esp] + xor edx,ecx + mov edi,DWORD [28+esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [20+esp],ecx + xor edx,ecx + xor edi,esi + ror edx,6 + mov ecx,ebx + add edx,edi + mov edi,DWORD [8+esp] + mov esi,ebx + ror ecx,9 + mov DWORD [4+esp],ebx + xor ecx,ebx + xor ebx,edi + add edx,DWORD [esp] + ror ecx,11 + and eax,ebx + xor ecx,esi + add edx,DWORD [60+esp] + xor eax,edi + ror ecx,2 + add eax,edx + add edx,DWORD [16+esp] + add eax,ecx + mov ecx,edx + ror edx,14 + mov esi,DWORD [20+esp] + xor edx,ecx + mov edi,DWORD [24+esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [16+esp],ecx + xor edx,ecx + xor edi,esi + ror edx,6 + mov ecx,eax + add edx,edi + mov edi,DWORD [4+esp] + mov esi,eax + ror ecx,9 + mov DWORD [esp],eax + xor ecx,eax + xor eax,edi + add edx,DWORD [28+esp] + ror ecx,11 + and ebx,eax + xor ecx,esi + add edx,DWORD [64+esp] + xor ebx,edi + ror ecx,2 + add ebx,edx + add edx,DWORD [12+esp] + add ebx,ecx + mov ecx,edx + ror edx,14 + mov esi,DWORD [16+esp] + xor edx,ecx + mov edi,DWORD [20+esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [12+esp],ecx + xor edx,ecx + xor edi,esi + ror edx,6 + mov ecx,ebx + add edx,edi + mov edi,DWORD [esp] + mov esi,ebx + ror ecx,9 + mov DWORD [28+esp],ebx + xor ecx,ebx + xor ebx,edi + add edx,DWORD [24+esp] + ror ecx,11 + and eax,ebx + xor ecx,esi + add edx,DWORD [68+esp] + xor eax,edi + ror ecx,2 + add eax,edx + add edx,DWORD [8+esp] + add eax,ecx + mov ecx,edx + ror edx,14 + mov esi,DWORD [12+esp] + xor edx,ecx + mov edi,DWORD [16+esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [8+esp],ecx + xor edx,ecx + xor edi,esi + ror edx,6 + mov ecx,eax + add edx,edi + mov edi,DWORD [28+esp] + mov esi,eax + ror ecx,9 + mov DWORD [24+esp],eax + xor ecx,eax + xor eax,edi + add edx,DWORD [20+esp] + ror ecx,11 + and ebx,eax + xor ecx,esi + add edx,DWORD [72+esp] + xor ebx,edi + ror ecx,2 + add ebx,edx + add edx,DWORD [4+esp] + add ebx,ecx + mov ecx,edx + ror edx,14 + mov esi,DWORD [8+esp] + xor edx,ecx + mov edi,DWORD [12+esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [4+esp],ecx + xor edx,ecx + xor edi,esi + ror edx,6 + mov ecx,ebx + add edx,edi + mov edi,DWORD [24+esp] + mov esi,ebx + ror ecx,9 + mov DWORD [20+esp],ebx + xor ecx,ebx + xor ebx,edi + add edx,DWORD [16+esp] + ror ecx,11 + and eax,ebx + xor ecx,esi + add edx,DWORD [76+esp] + xor eax,edi + ror ecx,2 + add eax,edx + add edx,DWORD [esp] + add eax,ecx + mov ecx,edx + ror edx,14 + mov esi,DWORD [4+esp] + xor edx,ecx + mov edi,DWORD [8+esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [esp],ecx + xor edx,ecx + xor edi,esi + ror edx,6 + mov ecx,eax + add edx,edi + mov edi,DWORD [20+esp] + mov esi,eax + ror ecx,9 + mov DWORD [16+esp],eax + xor ecx,eax + xor eax,edi + add edx,DWORD [12+esp] + ror ecx,11 + and ebx,eax + xor ecx,esi + add edx,DWORD [80+esp] + xor ebx,edi + ror ecx,2 + add ebx,edx + add edx,DWORD [28+esp] + add ebx,ecx + mov ecx,edx + ror edx,14 + mov esi,DWORD [esp] + xor edx,ecx + mov edi,DWORD [4+esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [28+esp],ecx + xor edx,ecx + xor edi,esi + ror edx,6 + mov ecx,ebx + add edx,edi + mov edi,DWORD [16+esp] + mov esi,ebx + ror ecx,9 + mov DWORD [12+esp],ebx + xor ecx,ebx + xor ebx,edi + add edx,DWORD [8+esp] + ror ecx,11 + and eax,ebx + xor ecx,esi + add edx,DWORD [84+esp] + xor eax,edi + ror ecx,2 + add eax,edx + add edx,DWORD [24+esp] + add eax,ecx + mov ecx,edx + ror edx,14 + mov esi,DWORD [28+esp] + xor edx,ecx + mov edi,DWORD [esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [24+esp],ecx + xor edx,ecx + xor edi,esi + ror edx,6 + mov ecx,eax + add edx,edi + mov edi,DWORD [12+esp] + mov esi,eax + ror ecx,9 + mov DWORD [8+esp],eax + xor ecx,eax + xor eax,edi + add edx,DWORD [4+esp] + ror ecx,11 + and ebx,eax + xor ecx,esi + add edx,DWORD [88+esp] + xor ebx,edi + ror ecx,2 + add ebx,edx + add edx,DWORD [20+esp] + add ebx,ecx + mov ecx,edx + ror edx,14 + mov esi,DWORD [24+esp] + xor edx,ecx + mov edi,DWORD [28+esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD [20+esp],ecx + xor edx,ecx + xor edi,esi + ror edx,6 + mov ecx,ebx + add edx,edi + mov edi,DWORD [8+esp] + mov esi,ebx + ror ecx,9 + mov DWORD [4+esp],ebx + xor ecx,ebx + xor ebx,edi + add edx,DWORD [esp] + ror ecx,11 + and eax,ebx + xor ecx,esi + add edx,DWORD [92+esp] + xor eax,edi + ror ecx,2 + add eax,edx + add edx,DWORD [16+esp] + add eax,ecx + mov esi,DWORD [96+esp] + xor ebx,edi + mov ecx,DWORD [12+esp] + add eax,DWORD [esi] + add ebx,DWORD [4+esi] + add edi,DWORD [8+esi] + add ecx,DWORD [12+esi] + mov DWORD [esi],eax + mov DWORD [4+esi],ebx + mov DWORD [8+esi],edi + mov DWORD [12+esi],ecx + mov DWORD [4+esp],ebx + xor ebx,edi + mov DWORD [8+esp],edi + mov DWORD [12+esp],ecx + mov edi,DWORD [20+esp] + mov ecx,DWORD [24+esp] + add edx,DWORD [16+esi] + add edi,DWORD [20+esi] + add ecx,DWORD [24+esi] + mov DWORD [16+esi],edx + mov DWORD [20+esi],edi + mov DWORD [20+esp],edi + mov edi,DWORD [28+esp] + mov DWORD [24+esi],ecx + add edi,DWORD [28+esi] + mov DWORD [24+esp],ecx + mov DWORD [28+esi],edi + mov DWORD [28+esp],edi + mov edi,DWORD [100+esp] + movdqa xmm7,[64+ebp] + sub ebp,192 + cmp edi,DWORD [104+esp] + jb NEAR L$011grand_ssse3 + mov esp,DWORD [108+esp] + pop edi + pop esi + pop ebx + pop ebp + ret +segment .bss +common _OPENSSL_ia32cap_P 16 diff --git a/third_party/boringssl/win-x86/crypto/sha/sha512-586.asm b/third_party/boringssl/win-x86/crypto/sha/sha512-586.asm new file mode 100644 index 00000000000..88ed0b380d0 --- /dev/null +++ b/third_party/boringssl/win-x86/crypto/sha/sha512-586.asm @@ -0,0 +1,2843 @@ +%ifidn __OUTPUT_FORMAT__,obj +section code use32 class=code align=64 +%elifidn __OUTPUT_FORMAT__,win32 +%ifdef __YASM_VERSION_ID__ +%if __YASM_VERSION_ID__ < 01010000h +%error yasm version 1.1.0 or later needed. +%endif +; Yasm automatically includes .00 and complains about redefining it. +; https://www.tortall.net/projects/yasm/manual/html/objfmt-win32-safeseh.html +%else +$@feat.00 equ 1 +%endif +section .text code align=64 +%else +section .text code +%endif +;extern _OPENSSL_ia32cap_P +global _sha512_block_data_order +align 16 +_sha512_block_data_order: +L$_sha512_block_data_order_begin: + push ebp + push ebx + push esi + push edi + mov esi,DWORD [20+esp] + mov edi,DWORD [24+esp] + mov eax,DWORD [28+esp] + mov ebx,esp + call L$000pic_point +L$000pic_point: + pop ebp + lea ebp,[(L$001K512-L$000pic_point)+ebp] + sub esp,16 + and esp,-64 + shl eax,7 + add eax,edi + mov DWORD [esp],esi + mov DWORD [4+esp],edi + mov DWORD [8+esp],eax + mov DWORD [12+esp],ebx + lea edx,[_OPENSSL_ia32cap_P] + mov ecx,DWORD [edx] + test ecx,67108864 + jz NEAR L$002loop_x86 + mov edx,DWORD [4+edx] + movq mm0,[esi] + and ecx,16777216 + movq mm1,[8+esi] + and edx,512 + movq mm2,[16+esi] + or ecx,edx + movq mm3,[24+esi] + movq mm4,[32+esi] + movq mm5,[40+esi] + movq mm6,[48+esi] + movq mm7,[56+esi] + cmp ecx,16777728 + je NEAR L$003SSSE3 + sub esp,80 + jmp NEAR L$004loop_sse2 +align 16 +L$004loop_sse2: + movq [8+esp],mm1 + movq [16+esp],mm2 + movq [24+esp],mm3 + movq [40+esp],mm5 + movq [48+esp],mm6 + pxor mm2,mm1 + movq [56+esp],mm7 + movq mm3,mm0 + mov eax,DWORD [edi] + mov ebx,DWORD [4+edi] + add edi,8 + mov edx,15 + bswap eax + bswap ebx + jmp NEAR L$00500_14_sse2 +align 16 +L$00500_14_sse2: + movd mm1,eax + mov eax,DWORD [edi] + movd mm7,ebx + mov ebx,DWORD [4+edi] + add edi,8 + bswap eax + bswap ebx + punpckldq mm7,mm1 + movq mm1,mm4 + pxor mm5,mm6 + psrlq mm1,14 + movq [32+esp],mm4 + pand mm5,mm4 + psllq mm4,23 + movq mm0,mm3 + movq [72+esp],mm7 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq [esp],mm0 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,[56+esp] + pxor mm3,mm1 + psllq mm4,4 + paddq mm7,[ebp] + pxor mm3,mm4 + movq mm4,[24+esp] + paddq mm3,mm7 + movq mm5,mm0 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm0 + movq mm7,mm5 + psllq mm6,25 + movq mm1,[8+esp] + psrlq mm5,6 + pxor mm7,mm6 + sub esp,8 + psllq mm6,5 + pxor mm7,mm5 + pxor mm0,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm2,mm0 + psllq mm6,6 + pxor mm7,mm5 + pxor mm2,mm1 + pxor mm6,mm7 + movq mm5,[40+esp] + paddq mm3,mm2 + movq mm2,mm0 + add ebp,8 + paddq mm3,mm6 + movq mm6,[48+esp] + dec edx + jnz NEAR L$00500_14_sse2 + movd mm1,eax + movd mm7,ebx + punpckldq mm7,mm1 + movq mm1,mm4 + pxor mm5,mm6 + psrlq mm1,14 + movq [32+esp],mm4 + pand mm5,mm4 + psllq mm4,23 + movq mm0,mm3 + movq [72+esp],mm7 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq [esp],mm0 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,[56+esp] + pxor mm3,mm1 + psllq mm4,4 + paddq mm7,[ebp] + pxor mm3,mm4 + movq mm4,[24+esp] + paddq mm3,mm7 + movq mm5,mm0 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm0 + movq mm7,mm5 + psllq mm6,25 + movq mm1,[8+esp] + psrlq mm5,6 + pxor mm7,mm6 + sub esp,8 + psllq mm6,5 + pxor mm7,mm5 + pxor mm0,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm2,mm0 + psllq mm6,6 + pxor mm7,mm5 + pxor mm2,mm1 + pxor mm6,mm7 + movq mm7,[192+esp] + paddq mm3,mm2 + movq mm2,mm0 + add ebp,8 + paddq mm3,mm6 + pxor mm0,mm0 + mov edx,32 + jmp NEAR L$00616_79_sse2 +align 16 +L$00616_79_sse2: + movq mm5,[88+esp] + movq mm1,mm7 + psrlq mm7,1 + movq mm6,mm5 + psrlq mm5,6 + psllq mm1,56 + paddq mm0,mm3 + movq mm3,mm7 + psrlq mm7,6 + pxor mm3,mm1 + psllq mm1,7 + pxor mm3,mm7 + psrlq mm7,1 + pxor mm3,mm1 + movq mm1,mm5 + psrlq mm5,13 + pxor mm7,mm3 + psllq mm6,3 + pxor mm1,mm5 + paddq mm7,[200+esp] + pxor mm1,mm6 + psrlq mm5,42 + paddq mm7,[128+esp] + pxor mm1,mm5 + psllq mm6,42 + movq mm5,[40+esp] + pxor mm1,mm6 + movq mm6,[48+esp] + paddq mm7,mm1 + movq mm1,mm4 + pxor mm5,mm6 + psrlq mm1,14 + movq [32+esp],mm4 + pand mm5,mm4 + psllq mm4,23 + movq [72+esp],mm7 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq [esp],mm0 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,[56+esp] + pxor mm3,mm1 + psllq mm4,4 + paddq mm7,[ebp] + pxor mm3,mm4 + movq mm4,[24+esp] + paddq mm3,mm7 + movq mm5,mm0 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm0 + movq mm7,mm5 + psllq mm6,25 + movq mm1,[8+esp] + psrlq mm5,6 + pxor mm7,mm6 + sub esp,8 + psllq mm6,5 + pxor mm7,mm5 + pxor mm0,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm2,mm0 + psllq mm6,6 + pxor mm7,mm5 + pxor mm2,mm1 + pxor mm6,mm7 + movq mm7,[192+esp] + paddq mm2,mm6 + add ebp,8 + movq mm5,[88+esp] + movq mm1,mm7 + psrlq mm7,1 + movq mm6,mm5 + psrlq mm5,6 + psllq mm1,56 + paddq mm2,mm3 + movq mm3,mm7 + psrlq mm7,6 + pxor mm3,mm1 + psllq mm1,7 + pxor mm3,mm7 + psrlq mm7,1 + pxor mm3,mm1 + movq mm1,mm5 + psrlq mm5,13 + pxor mm7,mm3 + psllq mm6,3 + pxor mm1,mm5 + paddq mm7,[200+esp] + pxor mm1,mm6 + psrlq mm5,42 + paddq mm7,[128+esp] + pxor mm1,mm5 + psllq mm6,42 + movq mm5,[40+esp] + pxor mm1,mm6 + movq mm6,[48+esp] + paddq mm7,mm1 + movq mm1,mm4 + pxor mm5,mm6 + psrlq mm1,14 + movq [32+esp],mm4 + pand mm5,mm4 + psllq mm4,23 + movq [72+esp],mm7 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq [esp],mm2 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,[56+esp] + pxor mm3,mm1 + psllq mm4,4 + paddq mm7,[ebp] + pxor mm3,mm4 + movq mm4,[24+esp] + paddq mm3,mm7 + movq mm5,mm2 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm2 + movq mm7,mm5 + psllq mm6,25 + movq mm1,[8+esp] + psrlq mm5,6 + pxor mm7,mm6 + sub esp,8 + psllq mm6,5 + pxor mm7,mm5 + pxor mm2,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm0,mm2 + psllq mm6,6 + pxor mm7,mm5 + pxor mm0,mm1 + pxor mm6,mm7 + movq mm7,[192+esp] + paddq mm0,mm6 + add ebp,8 + dec edx + jnz NEAR L$00616_79_sse2 + paddq mm0,mm3 + movq mm1,[8+esp] + movq mm3,[24+esp] + movq mm5,[40+esp] + movq mm6,[48+esp] + movq mm7,[56+esp] + pxor mm2,mm1 + paddq mm0,[esi] + paddq mm1,[8+esi] + paddq mm2,[16+esi] + paddq mm3,[24+esi] + paddq mm4,[32+esi] + paddq mm5,[40+esi] + paddq mm6,[48+esi] + paddq mm7,[56+esi] + mov eax,640 + movq [esi],mm0 + movq [8+esi],mm1 + movq [16+esi],mm2 + movq [24+esi],mm3 + movq [32+esi],mm4 + movq [40+esi],mm5 + movq [48+esi],mm6 + movq [56+esi],mm7 + lea esp,[eax*1+esp] + sub ebp,eax + cmp edi,DWORD [88+esp] + jb NEAR L$004loop_sse2 + mov esp,DWORD [92+esp] + emms + pop edi + pop esi + pop ebx + pop ebp + ret +align 32 +L$003SSSE3: + lea edx,[esp-64] + sub esp,256 + movdqa xmm1,[640+ebp] + movdqu xmm0,[edi] +db 102,15,56,0,193 + movdqa xmm3,[ebp] + movdqa xmm2,xmm1 + movdqu xmm1,[16+edi] + paddq xmm3,xmm0 +db 102,15,56,0,202 + movdqa [edx-128],xmm3 + movdqa xmm4,[16+ebp] + movdqa xmm3,xmm2 + movdqu xmm2,[32+edi] + paddq xmm4,xmm1 +db 102,15,56,0,211 + movdqa [edx-112],xmm4 + movdqa xmm5,[32+ebp] + movdqa xmm4,xmm3 + movdqu xmm3,[48+edi] + paddq xmm5,xmm2 +db 102,15,56,0,220 + movdqa [edx-96],xmm5 + movdqa xmm6,[48+ebp] + movdqa xmm5,xmm4 + movdqu xmm4,[64+edi] + paddq xmm6,xmm3 +db 102,15,56,0,229 + movdqa [edx-80],xmm6 + movdqa xmm7,[64+ebp] + movdqa xmm6,xmm5 + movdqu xmm5,[80+edi] + paddq xmm7,xmm4 +db 102,15,56,0,238 + movdqa [edx-64],xmm7 + movdqa [edx],xmm0 + movdqa xmm0,[80+ebp] + movdqa xmm7,xmm6 + movdqu xmm6,[96+edi] + paddq xmm0,xmm5 +db 102,15,56,0,247 + movdqa [edx-48],xmm0 + movdqa [16+edx],xmm1 + movdqa xmm1,[96+ebp] + movdqa xmm0,xmm7 + movdqu xmm7,[112+edi] + paddq xmm1,xmm6 +db 102,15,56,0,248 + movdqa [edx-32],xmm1 + movdqa [32+edx],xmm2 + movdqa xmm2,[112+ebp] + movdqa xmm0,[edx] + paddq xmm2,xmm7 + movdqa [edx-16],xmm2 + nop +align 32 +L$007loop_ssse3: + movdqa xmm2,[16+edx] + movdqa [48+edx],xmm3 + lea ebp,[128+ebp] + movq [8+esp],mm1 + mov ebx,edi + movq [16+esp],mm2 + lea edi,[128+edi] + movq [24+esp],mm3 + cmp edi,eax + movq [40+esp],mm5 + cmovb ebx,edi + movq [48+esp],mm6 + mov ecx,4 + pxor mm2,mm1 + movq [56+esp],mm7 + pxor mm3,mm3 + jmp NEAR L$00800_47_ssse3 +align 32 +L$00800_47_ssse3: + movdqa xmm3,xmm5 + movdqa xmm1,xmm2 +db 102,15,58,15,208,8 + movdqa [edx],xmm4 +db 102,15,58,15,220,8 + movdqa xmm4,xmm2 + psrlq xmm2,7 + paddq xmm0,xmm3 + movdqa xmm3,xmm4 + psrlq xmm4,1 + psllq xmm3,56 + pxor xmm2,xmm4 + psrlq xmm4,7 + pxor xmm2,xmm3 + psllq xmm3,7 + pxor xmm2,xmm4 + movdqa xmm4,xmm7 + pxor xmm2,xmm3 + movdqa xmm3,xmm7 + psrlq xmm4,6 + paddq xmm0,xmm2 + movdqa xmm2,xmm7 + psrlq xmm3,19 + psllq xmm2,3 + pxor xmm4,xmm3 + psrlq xmm3,42 + pxor xmm4,xmm2 + psllq xmm2,42 + pxor xmm4,xmm3 + movdqa xmm3,[32+edx] + pxor xmm4,xmm2 + movdqa xmm2,[ebp] + movq mm1,mm4 + paddq xmm0,xmm4 + movq mm7,[edx-128] + pxor mm5,mm6 + psrlq mm1,14 + movq [32+esp],mm4 + paddq xmm2,xmm0 + pand mm5,mm4 + psllq mm4,23 + paddq mm0,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq [esp],mm0 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,[56+esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,[24+esp] + paddq mm3,mm7 + movq mm5,mm0 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm0 + movq mm7,mm5 + psllq mm6,25 + movq mm1,[8+esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm0,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm2,mm0 + psllq mm6,6 + pxor mm7,mm5 + pxor mm2,mm1 + pxor mm6,mm7 + movq mm5,[32+esp] + paddq mm2,mm6 + movq mm6,[40+esp] + movq mm1,mm4 + movq mm7,[edx-120] + pxor mm5,mm6 + psrlq mm1,14 + movq [24+esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm2,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq [56+esp],mm2 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,[48+esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,[16+esp] + paddq mm3,mm7 + movq mm5,mm2 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm2 + movq mm7,mm5 + psllq mm6,25 + movq mm1,[esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm2,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm0,mm2 + psllq mm6,6 + pxor mm7,mm5 + pxor mm0,mm1 + pxor mm6,mm7 + movq mm5,[24+esp] + paddq mm0,mm6 + movq mm6,[32+esp] + movdqa [edx-128],xmm2 + movdqa xmm4,xmm6 + movdqa xmm2,xmm3 +db 102,15,58,15,217,8 + movdqa [16+edx],xmm5 +db 102,15,58,15,229,8 + movdqa xmm5,xmm3 + psrlq xmm3,7 + paddq xmm1,xmm4 + movdqa xmm4,xmm5 + psrlq xmm5,1 + psllq xmm4,56 + pxor xmm3,xmm5 + psrlq xmm5,7 + pxor xmm3,xmm4 + psllq xmm4,7 + pxor xmm3,xmm5 + movdqa xmm5,xmm0 + pxor xmm3,xmm4 + movdqa xmm4,xmm0 + psrlq xmm5,6 + paddq xmm1,xmm3 + movdqa xmm3,xmm0 + psrlq xmm4,19 + psllq xmm3,3 + pxor xmm5,xmm4 + psrlq xmm4,42 + pxor xmm5,xmm3 + psllq xmm3,42 + pxor xmm5,xmm4 + movdqa xmm4,[48+edx] + pxor xmm5,xmm3 + movdqa xmm3,[16+ebp] + movq mm1,mm4 + paddq xmm1,xmm5 + movq mm7,[edx-112] + pxor mm5,mm6 + psrlq mm1,14 + movq [16+esp],mm4 + paddq xmm3,xmm1 + pand mm5,mm4 + psllq mm4,23 + paddq mm0,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq [48+esp],mm0 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,[40+esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,[8+esp] + paddq mm3,mm7 + movq mm5,mm0 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm0 + movq mm7,mm5 + psllq mm6,25 + movq mm1,[56+esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm0,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm2,mm0 + psllq mm6,6 + pxor mm7,mm5 + pxor mm2,mm1 + pxor mm6,mm7 + movq mm5,[16+esp] + paddq mm2,mm6 + movq mm6,[24+esp] + movq mm1,mm4 + movq mm7,[edx-104] + pxor mm5,mm6 + psrlq mm1,14 + movq [8+esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm2,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq [40+esp],mm2 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,[32+esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,[esp] + paddq mm3,mm7 + movq mm5,mm2 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm2 + movq mm7,mm5 + psllq mm6,25 + movq mm1,[48+esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm2,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm0,mm2 + psllq mm6,6 + pxor mm7,mm5 + pxor mm0,mm1 + pxor mm6,mm7 + movq mm5,[8+esp] + paddq mm0,mm6 + movq mm6,[16+esp] + movdqa [edx-112],xmm3 + movdqa xmm5,xmm7 + movdqa xmm3,xmm4 +db 102,15,58,15,226,8 + movdqa [32+edx],xmm6 +db 102,15,58,15,238,8 + movdqa xmm6,xmm4 + psrlq xmm4,7 + paddq xmm2,xmm5 + movdqa xmm5,xmm6 + psrlq xmm6,1 + psllq xmm5,56 + pxor xmm4,xmm6 + psrlq xmm6,7 + pxor xmm4,xmm5 + psllq xmm5,7 + pxor xmm4,xmm6 + movdqa xmm6,xmm1 + pxor xmm4,xmm5 + movdqa xmm5,xmm1 + psrlq xmm6,6 + paddq xmm2,xmm4 + movdqa xmm4,xmm1 + psrlq xmm5,19 + psllq xmm4,3 + pxor xmm6,xmm5 + psrlq xmm5,42 + pxor xmm6,xmm4 + psllq xmm4,42 + pxor xmm6,xmm5 + movdqa xmm5,[edx] + pxor xmm6,xmm4 + movdqa xmm4,[32+ebp] + movq mm1,mm4 + paddq xmm2,xmm6 + movq mm7,[edx-96] + pxor mm5,mm6 + psrlq mm1,14 + movq [esp],mm4 + paddq xmm4,xmm2 + pand mm5,mm4 + psllq mm4,23 + paddq mm0,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq [32+esp],mm0 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,[24+esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,[56+esp] + paddq mm3,mm7 + movq mm5,mm0 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm0 + movq mm7,mm5 + psllq mm6,25 + movq mm1,[40+esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm0,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm2,mm0 + psllq mm6,6 + pxor mm7,mm5 + pxor mm2,mm1 + pxor mm6,mm7 + movq mm5,[esp] + paddq mm2,mm6 + movq mm6,[8+esp] + movq mm1,mm4 + movq mm7,[edx-88] + pxor mm5,mm6 + psrlq mm1,14 + movq [56+esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm2,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq [24+esp],mm2 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,[16+esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,[48+esp] + paddq mm3,mm7 + movq mm5,mm2 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm2 + movq mm7,mm5 + psllq mm6,25 + movq mm1,[32+esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm2,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm0,mm2 + psllq mm6,6 + pxor mm7,mm5 + pxor mm0,mm1 + pxor mm6,mm7 + movq mm5,[56+esp] + paddq mm0,mm6 + movq mm6,[esp] + movdqa [edx-96],xmm4 + movdqa xmm6,xmm0 + movdqa xmm4,xmm5 +db 102,15,58,15,235,8 + movdqa [48+edx],xmm7 +db 102,15,58,15,247,8 + movdqa xmm7,xmm5 + psrlq xmm5,7 + paddq xmm3,xmm6 + movdqa xmm6,xmm7 + psrlq xmm7,1 + psllq xmm6,56 + pxor xmm5,xmm7 + psrlq xmm7,7 + pxor xmm5,xmm6 + psllq xmm6,7 + pxor xmm5,xmm7 + movdqa xmm7,xmm2 + pxor xmm5,xmm6 + movdqa xmm6,xmm2 + psrlq xmm7,6 + paddq xmm3,xmm5 + movdqa xmm5,xmm2 + psrlq xmm6,19 + psllq xmm5,3 + pxor xmm7,xmm6 + psrlq xmm6,42 + pxor xmm7,xmm5 + psllq xmm5,42 + pxor xmm7,xmm6 + movdqa xmm6,[16+edx] + pxor xmm7,xmm5 + movdqa xmm5,[48+ebp] + movq mm1,mm4 + paddq xmm3,xmm7 + movq mm7,[edx-80] + pxor mm5,mm6 + psrlq mm1,14 + movq [48+esp],mm4 + paddq xmm5,xmm3 + pand mm5,mm4 + psllq mm4,23 + paddq mm0,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq [16+esp],mm0 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,[8+esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,[40+esp] + paddq mm3,mm7 + movq mm5,mm0 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm0 + movq mm7,mm5 + psllq mm6,25 + movq mm1,[24+esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm0,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm2,mm0 + psllq mm6,6 + pxor mm7,mm5 + pxor mm2,mm1 + pxor mm6,mm7 + movq mm5,[48+esp] + paddq mm2,mm6 + movq mm6,[56+esp] + movq mm1,mm4 + movq mm7,[edx-72] + pxor mm5,mm6 + psrlq mm1,14 + movq [40+esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm2,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq [8+esp],mm2 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,[esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,[32+esp] + paddq mm3,mm7 + movq mm5,mm2 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm2 + movq mm7,mm5 + psllq mm6,25 + movq mm1,[16+esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm2,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm0,mm2 + psllq mm6,6 + pxor mm7,mm5 + pxor mm0,mm1 + pxor mm6,mm7 + movq mm5,[40+esp] + paddq mm0,mm6 + movq mm6,[48+esp] + movdqa [edx-80],xmm5 + movdqa xmm7,xmm1 + movdqa xmm5,xmm6 +db 102,15,58,15,244,8 + movdqa [edx],xmm0 +db 102,15,58,15,248,8 + movdqa xmm0,xmm6 + psrlq xmm6,7 + paddq xmm4,xmm7 + movdqa xmm7,xmm0 + psrlq xmm0,1 + psllq xmm7,56 + pxor xmm6,xmm0 + psrlq xmm0,7 + pxor xmm6,xmm7 + psllq xmm7,7 + pxor xmm6,xmm0 + movdqa xmm0,xmm3 + pxor xmm6,xmm7 + movdqa xmm7,xmm3 + psrlq xmm0,6 + paddq xmm4,xmm6 + movdqa xmm6,xmm3 + psrlq xmm7,19 + psllq xmm6,3 + pxor xmm0,xmm7 + psrlq xmm7,42 + pxor xmm0,xmm6 + psllq xmm6,42 + pxor xmm0,xmm7 + movdqa xmm7,[32+edx] + pxor xmm0,xmm6 + movdqa xmm6,[64+ebp] + movq mm1,mm4 + paddq xmm4,xmm0 + movq mm7,[edx-64] + pxor mm5,mm6 + psrlq mm1,14 + movq [32+esp],mm4 + paddq xmm6,xmm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm0,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq [esp],mm0 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,[56+esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,[24+esp] + paddq mm3,mm7 + movq mm5,mm0 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm0 + movq mm7,mm5 + psllq mm6,25 + movq mm1,[8+esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm0,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm2,mm0 + psllq mm6,6 + pxor mm7,mm5 + pxor mm2,mm1 + pxor mm6,mm7 + movq mm5,[32+esp] + paddq mm2,mm6 + movq mm6,[40+esp] + movq mm1,mm4 + movq mm7,[edx-56] + pxor mm5,mm6 + psrlq mm1,14 + movq [24+esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm2,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq [56+esp],mm2 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,[48+esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,[16+esp] + paddq mm3,mm7 + movq mm5,mm2 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm2 + movq mm7,mm5 + psllq mm6,25 + movq mm1,[esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm2,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm0,mm2 + psllq mm6,6 + pxor mm7,mm5 + pxor mm0,mm1 + pxor mm6,mm7 + movq mm5,[24+esp] + paddq mm0,mm6 + movq mm6,[32+esp] + movdqa [edx-64],xmm6 + movdqa xmm0,xmm2 + movdqa xmm6,xmm7 +db 102,15,58,15,253,8 + movdqa [16+edx],xmm1 +db 102,15,58,15,193,8 + movdqa xmm1,xmm7 + psrlq xmm7,7 + paddq xmm5,xmm0 + movdqa xmm0,xmm1 + psrlq xmm1,1 + psllq xmm0,56 + pxor xmm7,xmm1 + psrlq xmm1,7 + pxor xmm7,xmm0 + psllq xmm0,7 + pxor xmm7,xmm1 + movdqa xmm1,xmm4 + pxor xmm7,xmm0 + movdqa xmm0,xmm4 + psrlq xmm1,6 + paddq xmm5,xmm7 + movdqa xmm7,xmm4 + psrlq xmm0,19 + psllq xmm7,3 + pxor xmm1,xmm0 + psrlq xmm0,42 + pxor xmm1,xmm7 + psllq xmm7,42 + pxor xmm1,xmm0 + movdqa xmm0,[48+edx] + pxor xmm1,xmm7 + movdqa xmm7,[80+ebp] + movq mm1,mm4 + paddq xmm5,xmm1 + movq mm7,[edx-48] + pxor mm5,mm6 + psrlq mm1,14 + movq [16+esp],mm4 + paddq xmm7,xmm5 + pand mm5,mm4 + psllq mm4,23 + paddq mm0,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq [48+esp],mm0 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,[40+esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,[8+esp] + paddq mm3,mm7 + movq mm5,mm0 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm0 + movq mm7,mm5 + psllq mm6,25 + movq mm1,[56+esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm0,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm2,mm0 + psllq mm6,6 + pxor mm7,mm5 + pxor mm2,mm1 + pxor mm6,mm7 + movq mm5,[16+esp] + paddq mm2,mm6 + movq mm6,[24+esp] + movq mm1,mm4 + movq mm7,[edx-40] + pxor mm5,mm6 + psrlq mm1,14 + movq [8+esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm2,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq [40+esp],mm2 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,[32+esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,[esp] + paddq mm3,mm7 + movq mm5,mm2 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm2 + movq mm7,mm5 + psllq mm6,25 + movq mm1,[48+esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm2,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm0,mm2 + psllq mm6,6 + pxor mm7,mm5 + pxor mm0,mm1 + pxor mm6,mm7 + movq mm5,[8+esp] + paddq mm0,mm6 + movq mm6,[16+esp] + movdqa [edx-48],xmm7 + movdqa xmm1,xmm3 + movdqa xmm7,xmm0 +db 102,15,58,15,198,8 + movdqa [32+edx],xmm2 +db 102,15,58,15,202,8 + movdqa xmm2,xmm0 + psrlq xmm0,7 + paddq xmm6,xmm1 + movdqa xmm1,xmm2 + psrlq xmm2,1 + psllq xmm1,56 + pxor xmm0,xmm2 + psrlq xmm2,7 + pxor xmm0,xmm1 + psllq xmm1,7 + pxor xmm0,xmm2 + movdqa xmm2,xmm5 + pxor xmm0,xmm1 + movdqa xmm1,xmm5 + psrlq xmm2,6 + paddq xmm6,xmm0 + movdqa xmm0,xmm5 + psrlq xmm1,19 + psllq xmm0,3 + pxor xmm2,xmm1 + psrlq xmm1,42 + pxor xmm2,xmm0 + psllq xmm0,42 + pxor xmm2,xmm1 + movdqa xmm1,[edx] + pxor xmm2,xmm0 + movdqa xmm0,[96+ebp] + movq mm1,mm4 + paddq xmm6,xmm2 + movq mm7,[edx-32] + pxor mm5,mm6 + psrlq mm1,14 + movq [esp],mm4 + paddq xmm0,xmm6 + pand mm5,mm4 + psllq mm4,23 + paddq mm0,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq [32+esp],mm0 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,[24+esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,[56+esp] + paddq mm3,mm7 + movq mm5,mm0 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm0 + movq mm7,mm5 + psllq mm6,25 + movq mm1,[40+esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm0,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm2,mm0 + psllq mm6,6 + pxor mm7,mm5 + pxor mm2,mm1 + pxor mm6,mm7 + movq mm5,[esp] + paddq mm2,mm6 + movq mm6,[8+esp] + movq mm1,mm4 + movq mm7,[edx-24] + pxor mm5,mm6 + psrlq mm1,14 + movq [56+esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm2,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq [24+esp],mm2 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,[16+esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,[48+esp] + paddq mm3,mm7 + movq mm5,mm2 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm2 + movq mm7,mm5 + psllq mm6,25 + movq mm1,[32+esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm2,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm0,mm2 + psllq mm6,6 + pxor mm7,mm5 + pxor mm0,mm1 + pxor mm6,mm7 + movq mm5,[56+esp] + paddq mm0,mm6 + movq mm6,[esp] + movdqa [edx-32],xmm0 + movdqa xmm2,xmm4 + movdqa xmm0,xmm1 +db 102,15,58,15,207,8 + movdqa [48+edx],xmm3 +db 102,15,58,15,211,8 + movdqa xmm3,xmm1 + psrlq xmm1,7 + paddq xmm7,xmm2 + movdqa xmm2,xmm3 + psrlq xmm3,1 + psllq xmm2,56 + pxor xmm1,xmm3 + psrlq xmm3,7 + pxor xmm1,xmm2 + psllq xmm2,7 + pxor xmm1,xmm3 + movdqa xmm3,xmm6 + pxor xmm1,xmm2 + movdqa xmm2,xmm6 + psrlq xmm3,6 + paddq xmm7,xmm1 + movdqa xmm1,xmm6 + psrlq xmm2,19 + psllq xmm1,3 + pxor xmm3,xmm2 + psrlq xmm2,42 + pxor xmm3,xmm1 + psllq xmm1,42 + pxor xmm3,xmm2 + movdqa xmm2,[16+edx] + pxor xmm3,xmm1 + movdqa xmm1,[112+ebp] + movq mm1,mm4 + paddq xmm7,xmm3 + movq mm7,[edx-16] + pxor mm5,mm6 + psrlq mm1,14 + movq [48+esp],mm4 + paddq xmm1,xmm7 + pand mm5,mm4 + psllq mm4,23 + paddq mm0,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq [16+esp],mm0 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,[8+esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,[40+esp] + paddq mm3,mm7 + movq mm5,mm0 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm0 + movq mm7,mm5 + psllq mm6,25 + movq mm1,[24+esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm0,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm2,mm0 + psllq mm6,6 + pxor mm7,mm5 + pxor mm2,mm1 + pxor mm6,mm7 + movq mm5,[48+esp] + paddq mm2,mm6 + movq mm6,[56+esp] + movq mm1,mm4 + movq mm7,[edx-8] + pxor mm5,mm6 + psrlq mm1,14 + movq [40+esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm2,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq [8+esp],mm2 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,[esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,[32+esp] + paddq mm3,mm7 + movq mm5,mm2 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm2 + movq mm7,mm5 + psllq mm6,25 + movq mm1,[16+esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm2,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm0,mm2 + psllq mm6,6 + pxor mm7,mm5 + pxor mm0,mm1 + pxor mm6,mm7 + movq mm5,[40+esp] + paddq mm0,mm6 + movq mm6,[48+esp] + movdqa [edx-16],xmm1 + lea ebp,[128+ebp] + dec ecx + jnz NEAR L$00800_47_ssse3 + movdqa xmm1,[ebp] + lea ebp,[ebp-640] + movdqu xmm0,[ebx] +db 102,15,56,0,193 + movdqa xmm3,[ebp] + movdqa xmm2,xmm1 + movdqu xmm1,[16+ebx] + paddq xmm3,xmm0 +db 102,15,56,0,202 + movq mm1,mm4 + movq mm7,[edx-128] + pxor mm5,mm6 + psrlq mm1,14 + movq [32+esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm0,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq [esp],mm0 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,[56+esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,[24+esp] + paddq mm3,mm7 + movq mm5,mm0 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm0 + movq mm7,mm5 + psllq mm6,25 + movq mm1,[8+esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm0,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm2,mm0 + psllq mm6,6 + pxor mm7,mm5 + pxor mm2,mm1 + pxor mm6,mm7 + movq mm5,[32+esp] + paddq mm2,mm6 + movq mm6,[40+esp] + movq mm1,mm4 + movq mm7,[edx-120] + pxor mm5,mm6 + psrlq mm1,14 + movq [24+esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm2,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq [56+esp],mm2 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,[48+esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,[16+esp] + paddq mm3,mm7 + movq mm5,mm2 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm2 + movq mm7,mm5 + psllq mm6,25 + movq mm1,[esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm2,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm0,mm2 + psllq mm6,6 + pxor mm7,mm5 + pxor mm0,mm1 + pxor mm6,mm7 + movq mm5,[24+esp] + paddq mm0,mm6 + movq mm6,[32+esp] + movdqa [edx-128],xmm3 + movdqa xmm4,[16+ebp] + movdqa xmm3,xmm2 + movdqu xmm2,[32+ebx] + paddq xmm4,xmm1 +db 102,15,56,0,211 + movq mm1,mm4 + movq mm7,[edx-112] + pxor mm5,mm6 + psrlq mm1,14 + movq [16+esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm0,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq [48+esp],mm0 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,[40+esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,[8+esp] + paddq mm3,mm7 + movq mm5,mm0 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm0 + movq mm7,mm5 + psllq mm6,25 + movq mm1,[56+esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm0,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm2,mm0 + psllq mm6,6 + pxor mm7,mm5 + pxor mm2,mm1 + pxor mm6,mm7 + movq mm5,[16+esp] + paddq mm2,mm6 + movq mm6,[24+esp] + movq mm1,mm4 + movq mm7,[edx-104] + pxor mm5,mm6 + psrlq mm1,14 + movq [8+esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm2,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq [40+esp],mm2 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,[32+esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,[esp] + paddq mm3,mm7 + movq mm5,mm2 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm2 + movq mm7,mm5 + psllq mm6,25 + movq mm1,[48+esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm2,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm0,mm2 + psllq mm6,6 + pxor mm7,mm5 + pxor mm0,mm1 + pxor mm6,mm7 + movq mm5,[8+esp] + paddq mm0,mm6 + movq mm6,[16+esp] + movdqa [edx-112],xmm4 + movdqa xmm5,[32+ebp] + movdqa xmm4,xmm3 + movdqu xmm3,[48+ebx] + paddq xmm5,xmm2 +db 102,15,56,0,220 + movq mm1,mm4 + movq mm7,[edx-96] + pxor mm5,mm6 + psrlq mm1,14 + movq [esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm0,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq [32+esp],mm0 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,[24+esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,[56+esp] + paddq mm3,mm7 + movq mm5,mm0 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm0 + movq mm7,mm5 + psllq mm6,25 + movq mm1,[40+esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm0,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm2,mm0 + psllq mm6,6 + pxor mm7,mm5 + pxor mm2,mm1 + pxor mm6,mm7 + movq mm5,[esp] + paddq mm2,mm6 + movq mm6,[8+esp] + movq mm1,mm4 + movq mm7,[edx-88] + pxor mm5,mm6 + psrlq mm1,14 + movq [56+esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm2,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq [24+esp],mm2 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,[16+esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,[48+esp] + paddq mm3,mm7 + movq mm5,mm2 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm2 + movq mm7,mm5 + psllq mm6,25 + movq mm1,[32+esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm2,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm0,mm2 + psllq mm6,6 + pxor mm7,mm5 + pxor mm0,mm1 + pxor mm6,mm7 + movq mm5,[56+esp] + paddq mm0,mm6 + movq mm6,[esp] + movdqa [edx-96],xmm5 + movdqa xmm6,[48+ebp] + movdqa xmm5,xmm4 + movdqu xmm4,[64+ebx] + paddq xmm6,xmm3 +db 102,15,56,0,229 + movq mm1,mm4 + movq mm7,[edx-80] + pxor mm5,mm6 + psrlq mm1,14 + movq [48+esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm0,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq [16+esp],mm0 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,[8+esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,[40+esp] + paddq mm3,mm7 + movq mm5,mm0 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm0 + movq mm7,mm5 + psllq mm6,25 + movq mm1,[24+esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm0,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm2,mm0 + psllq mm6,6 + pxor mm7,mm5 + pxor mm2,mm1 + pxor mm6,mm7 + movq mm5,[48+esp] + paddq mm2,mm6 + movq mm6,[56+esp] + movq mm1,mm4 + movq mm7,[edx-72] + pxor mm5,mm6 + psrlq mm1,14 + movq [40+esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm2,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq [8+esp],mm2 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,[esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,[32+esp] + paddq mm3,mm7 + movq mm5,mm2 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm2 + movq mm7,mm5 + psllq mm6,25 + movq mm1,[16+esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm2,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm0,mm2 + psllq mm6,6 + pxor mm7,mm5 + pxor mm0,mm1 + pxor mm6,mm7 + movq mm5,[40+esp] + paddq mm0,mm6 + movq mm6,[48+esp] + movdqa [edx-80],xmm6 + movdqa xmm7,[64+ebp] + movdqa xmm6,xmm5 + movdqu xmm5,[80+ebx] + paddq xmm7,xmm4 +db 102,15,56,0,238 + movq mm1,mm4 + movq mm7,[edx-64] + pxor mm5,mm6 + psrlq mm1,14 + movq [32+esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm0,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq [esp],mm0 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,[56+esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,[24+esp] + paddq mm3,mm7 + movq mm5,mm0 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm0 + movq mm7,mm5 + psllq mm6,25 + movq mm1,[8+esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm0,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm2,mm0 + psllq mm6,6 + pxor mm7,mm5 + pxor mm2,mm1 + pxor mm6,mm7 + movq mm5,[32+esp] + paddq mm2,mm6 + movq mm6,[40+esp] + movq mm1,mm4 + movq mm7,[edx-56] + pxor mm5,mm6 + psrlq mm1,14 + movq [24+esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm2,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq [56+esp],mm2 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,[48+esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,[16+esp] + paddq mm3,mm7 + movq mm5,mm2 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm2 + movq mm7,mm5 + psllq mm6,25 + movq mm1,[esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm2,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm0,mm2 + psllq mm6,6 + pxor mm7,mm5 + pxor mm0,mm1 + pxor mm6,mm7 + movq mm5,[24+esp] + paddq mm0,mm6 + movq mm6,[32+esp] + movdqa [edx-64],xmm7 + movdqa [edx],xmm0 + movdqa xmm0,[80+ebp] + movdqa xmm7,xmm6 + movdqu xmm6,[96+ebx] + paddq xmm0,xmm5 +db 102,15,56,0,247 + movq mm1,mm4 + movq mm7,[edx-48] + pxor mm5,mm6 + psrlq mm1,14 + movq [16+esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm0,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq [48+esp],mm0 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,[40+esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,[8+esp] + paddq mm3,mm7 + movq mm5,mm0 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm0 + movq mm7,mm5 + psllq mm6,25 + movq mm1,[56+esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm0,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm2,mm0 + psllq mm6,6 + pxor mm7,mm5 + pxor mm2,mm1 + pxor mm6,mm7 + movq mm5,[16+esp] + paddq mm2,mm6 + movq mm6,[24+esp] + movq mm1,mm4 + movq mm7,[edx-40] + pxor mm5,mm6 + psrlq mm1,14 + movq [8+esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm2,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq [40+esp],mm2 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,[32+esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,[esp] + paddq mm3,mm7 + movq mm5,mm2 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm2 + movq mm7,mm5 + psllq mm6,25 + movq mm1,[48+esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm2,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm0,mm2 + psllq mm6,6 + pxor mm7,mm5 + pxor mm0,mm1 + pxor mm6,mm7 + movq mm5,[8+esp] + paddq mm0,mm6 + movq mm6,[16+esp] + movdqa [edx-48],xmm0 + movdqa [16+edx],xmm1 + movdqa xmm1,[96+ebp] + movdqa xmm0,xmm7 + movdqu xmm7,[112+ebx] + paddq xmm1,xmm6 +db 102,15,56,0,248 + movq mm1,mm4 + movq mm7,[edx-32] + pxor mm5,mm6 + psrlq mm1,14 + movq [esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm0,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq [32+esp],mm0 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,[24+esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,[56+esp] + paddq mm3,mm7 + movq mm5,mm0 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm0 + movq mm7,mm5 + psllq mm6,25 + movq mm1,[40+esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm0,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm2,mm0 + psllq mm6,6 + pxor mm7,mm5 + pxor mm2,mm1 + pxor mm6,mm7 + movq mm5,[esp] + paddq mm2,mm6 + movq mm6,[8+esp] + movq mm1,mm4 + movq mm7,[edx-24] + pxor mm5,mm6 + psrlq mm1,14 + movq [56+esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm2,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq [24+esp],mm2 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,[16+esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,[48+esp] + paddq mm3,mm7 + movq mm5,mm2 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm2 + movq mm7,mm5 + psllq mm6,25 + movq mm1,[32+esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm2,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm0,mm2 + psllq mm6,6 + pxor mm7,mm5 + pxor mm0,mm1 + pxor mm6,mm7 + movq mm5,[56+esp] + paddq mm0,mm6 + movq mm6,[esp] + movdqa [edx-32],xmm1 + movdqa [32+edx],xmm2 + movdqa xmm2,[112+ebp] + movdqa xmm0,[edx] + paddq xmm2,xmm7 + movq mm1,mm4 + movq mm7,[edx-16] + pxor mm5,mm6 + psrlq mm1,14 + movq [48+esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm0,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq [16+esp],mm0 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,[8+esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,[40+esp] + paddq mm3,mm7 + movq mm5,mm0 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm0 + movq mm7,mm5 + psllq mm6,25 + movq mm1,[24+esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm0,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm2,mm0 + psllq mm6,6 + pxor mm7,mm5 + pxor mm2,mm1 + pxor mm6,mm7 + movq mm5,[48+esp] + paddq mm2,mm6 + movq mm6,[56+esp] + movq mm1,mm4 + movq mm7,[edx-8] + pxor mm5,mm6 + psrlq mm1,14 + movq [40+esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm2,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq [8+esp],mm2 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,[esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,[32+esp] + paddq mm3,mm7 + movq mm5,mm2 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm2 + movq mm7,mm5 + psllq mm6,25 + movq mm1,[16+esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm2,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm0,mm2 + psllq mm6,6 + pxor mm7,mm5 + pxor mm0,mm1 + pxor mm6,mm7 + movq mm5,[40+esp] + paddq mm0,mm6 + movq mm6,[48+esp] + movdqa [edx-16],xmm2 + movq mm1,[8+esp] + paddq mm0,mm3 + movq mm3,[24+esp] + movq mm7,[56+esp] + pxor mm2,mm1 + paddq mm0,[esi] + paddq mm1,[8+esi] + paddq mm2,[16+esi] + paddq mm3,[24+esi] + paddq mm4,[32+esi] + paddq mm5,[40+esi] + paddq mm6,[48+esi] + paddq mm7,[56+esi] + movq [esi],mm0 + movq [8+esi],mm1 + movq [16+esi],mm2 + movq [24+esi],mm3 + movq [32+esi],mm4 + movq [40+esi],mm5 + movq [48+esi],mm6 + movq [56+esi],mm7 + cmp edi,eax + jb NEAR L$007loop_ssse3 + mov esp,DWORD [76+edx] + emms + pop edi + pop esi + pop ebx + pop ebp + ret +align 16 +L$002loop_x86: + mov eax,DWORD [edi] + mov ebx,DWORD [4+edi] + mov ecx,DWORD [8+edi] + mov edx,DWORD [12+edi] + bswap eax + bswap ebx + bswap ecx + bswap edx + push eax + push ebx + push ecx + push edx + mov eax,DWORD [16+edi] + mov ebx,DWORD [20+edi] + mov ecx,DWORD [24+edi] + mov edx,DWORD [28+edi] + bswap eax + bswap ebx + bswap ecx + bswap edx + push eax + push ebx + push ecx + push edx + mov eax,DWORD [32+edi] + mov ebx,DWORD [36+edi] + mov ecx,DWORD [40+edi] + mov edx,DWORD [44+edi] + bswap eax + bswap ebx + bswap ecx + bswap edx + push eax + push ebx + push ecx + push edx + mov eax,DWORD [48+edi] + mov ebx,DWORD [52+edi] + mov ecx,DWORD [56+edi] + mov edx,DWORD [60+edi] + bswap eax + bswap ebx + bswap ecx + bswap edx + push eax + push ebx + push ecx + push edx + mov eax,DWORD [64+edi] + mov ebx,DWORD [68+edi] + mov ecx,DWORD [72+edi] + mov edx,DWORD [76+edi] + bswap eax + bswap ebx + bswap ecx + bswap edx + push eax + push ebx + push ecx + push edx + mov eax,DWORD [80+edi] + mov ebx,DWORD [84+edi] + mov ecx,DWORD [88+edi] + mov edx,DWORD [92+edi] + bswap eax + bswap ebx + bswap ecx + bswap edx + push eax + push ebx + push ecx + push edx + mov eax,DWORD [96+edi] + mov ebx,DWORD [100+edi] + mov ecx,DWORD [104+edi] + mov edx,DWORD [108+edi] + bswap eax + bswap ebx + bswap ecx + bswap edx + push eax + push ebx + push ecx + push edx + mov eax,DWORD [112+edi] + mov ebx,DWORD [116+edi] + mov ecx,DWORD [120+edi] + mov edx,DWORD [124+edi] + bswap eax + bswap ebx + bswap ecx + bswap edx + push eax + push ebx + push ecx + push edx + add edi,128 + sub esp,72 + mov DWORD [204+esp],edi + lea edi,[8+esp] + mov ecx,16 +dd 2784229001 +align 16 +L$00900_15_x86: + mov ecx,DWORD [40+esp] + mov edx,DWORD [44+esp] + mov esi,ecx + shr ecx,9 + mov edi,edx + shr edx,9 + mov ebx,ecx + shl esi,14 + mov eax,edx + shl edi,14 + xor ebx,esi + shr ecx,5 + xor eax,edi + shr edx,5 + xor eax,ecx + shl esi,4 + xor ebx,edx + shl edi,4 + xor ebx,esi + shr ecx,4 + xor eax,edi + shr edx,4 + xor eax,ecx + shl esi,5 + xor ebx,edx + shl edi,5 + xor eax,esi + xor ebx,edi + mov ecx,DWORD [48+esp] + mov edx,DWORD [52+esp] + mov esi,DWORD [56+esp] + mov edi,DWORD [60+esp] + add eax,DWORD [64+esp] + adc ebx,DWORD [68+esp] + xor ecx,esi + xor edx,edi + and ecx,DWORD [40+esp] + and edx,DWORD [44+esp] + add eax,DWORD [192+esp] + adc ebx,DWORD [196+esp] + xor ecx,esi + xor edx,edi + mov esi,DWORD [ebp] + mov edi,DWORD [4+ebp] + add eax,ecx + adc ebx,edx + mov ecx,DWORD [32+esp] + mov edx,DWORD [36+esp] + add eax,esi + adc ebx,edi + mov DWORD [esp],eax + mov DWORD [4+esp],ebx + add eax,ecx + adc ebx,edx + mov ecx,DWORD [8+esp] + mov edx,DWORD [12+esp] + mov DWORD [32+esp],eax + mov DWORD [36+esp],ebx + mov esi,ecx + shr ecx,2 + mov edi,edx + shr edx,2 + mov ebx,ecx + shl esi,4 + mov eax,edx + shl edi,4 + xor ebx,esi + shr ecx,5 + xor eax,edi + shr edx,5 + xor ebx,ecx + shl esi,21 + xor eax,edx + shl edi,21 + xor eax,esi + shr ecx,21 + xor ebx,edi + shr edx,21 + xor eax,ecx + shl esi,5 + xor ebx,edx + shl edi,5 + xor eax,esi + xor ebx,edi + mov ecx,DWORD [8+esp] + mov edx,DWORD [12+esp] + mov esi,DWORD [16+esp] + mov edi,DWORD [20+esp] + add eax,DWORD [esp] + adc ebx,DWORD [4+esp] + or ecx,esi + or edx,edi + and ecx,DWORD [24+esp] + and edx,DWORD [28+esp] + and esi,DWORD [8+esp] + and edi,DWORD [12+esp] + or ecx,esi + or edx,edi + add eax,ecx + adc ebx,edx + mov DWORD [esp],eax + mov DWORD [4+esp],ebx + mov dl,BYTE [ebp] + sub esp,8 + lea ebp,[8+ebp] + cmp dl,148 + jne NEAR L$00900_15_x86 +align 16 +L$01016_79_x86: + mov ecx,DWORD [312+esp] + mov edx,DWORD [316+esp] + mov esi,ecx + shr ecx,1 + mov edi,edx + shr edx,1 + mov eax,ecx + shl esi,24 + mov ebx,edx + shl edi,24 + xor ebx,esi + shr ecx,6 + xor eax,edi + shr edx,6 + xor eax,ecx + shl esi,7 + xor ebx,edx + shl edi,1 + xor ebx,esi + shr ecx,1 + xor eax,edi + shr edx,1 + xor eax,ecx + shl edi,6 + xor ebx,edx + xor eax,edi + mov DWORD [esp],eax + mov DWORD [4+esp],ebx + mov ecx,DWORD [208+esp] + mov edx,DWORD [212+esp] + mov esi,ecx + shr ecx,6 + mov edi,edx + shr edx,6 + mov eax,ecx + shl esi,3 + mov ebx,edx + shl edi,3 + xor eax,esi + shr ecx,13 + xor ebx,edi + shr edx,13 + xor eax,ecx + shl esi,10 + xor ebx,edx + shl edi,10 + xor ebx,esi + shr ecx,10 + xor eax,edi + shr edx,10 + xor ebx,ecx + shl edi,13 + xor eax,edx + xor eax,edi + mov ecx,DWORD [320+esp] + mov edx,DWORD [324+esp] + add eax,DWORD [esp] + adc ebx,DWORD [4+esp] + mov esi,DWORD [248+esp] + mov edi,DWORD [252+esp] + add eax,ecx + adc ebx,edx + add eax,esi + adc ebx,edi + mov DWORD [192+esp],eax + mov DWORD [196+esp],ebx + mov ecx,DWORD [40+esp] + mov edx,DWORD [44+esp] + mov esi,ecx + shr ecx,9 + mov edi,edx + shr edx,9 + mov ebx,ecx + shl esi,14 + mov eax,edx + shl edi,14 + xor ebx,esi + shr ecx,5 + xor eax,edi + shr edx,5 + xor eax,ecx + shl esi,4 + xor ebx,edx + shl edi,4 + xor ebx,esi + shr ecx,4 + xor eax,edi + shr edx,4 + xor eax,ecx + shl esi,5 + xor ebx,edx + shl edi,5 + xor eax,esi + xor ebx,edi + mov ecx,DWORD [48+esp] + mov edx,DWORD [52+esp] + mov esi,DWORD [56+esp] + mov edi,DWORD [60+esp] + add eax,DWORD [64+esp] + adc ebx,DWORD [68+esp] + xor ecx,esi + xor edx,edi + and ecx,DWORD [40+esp] + and edx,DWORD [44+esp] + add eax,DWORD [192+esp] + adc ebx,DWORD [196+esp] + xor ecx,esi + xor edx,edi + mov esi,DWORD [ebp] + mov edi,DWORD [4+ebp] + add eax,ecx + adc ebx,edx + mov ecx,DWORD [32+esp] + mov edx,DWORD [36+esp] + add eax,esi + adc ebx,edi + mov DWORD [esp],eax + mov DWORD [4+esp],ebx + add eax,ecx + adc ebx,edx + mov ecx,DWORD [8+esp] + mov edx,DWORD [12+esp] + mov DWORD [32+esp],eax + mov DWORD [36+esp],ebx + mov esi,ecx + shr ecx,2 + mov edi,edx + shr edx,2 + mov ebx,ecx + shl esi,4 + mov eax,edx + shl edi,4 + xor ebx,esi + shr ecx,5 + xor eax,edi + shr edx,5 + xor ebx,ecx + shl esi,21 + xor eax,edx + shl edi,21 + xor eax,esi + shr ecx,21 + xor ebx,edi + shr edx,21 + xor eax,ecx + shl esi,5 + xor ebx,edx + shl edi,5 + xor eax,esi + xor ebx,edi + mov ecx,DWORD [8+esp] + mov edx,DWORD [12+esp] + mov esi,DWORD [16+esp] + mov edi,DWORD [20+esp] + add eax,DWORD [esp] + adc ebx,DWORD [4+esp] + or ecx,esi + or edx,edi + and ecx,DWORD [24+esp] + and edx,DWORD [28+esp] + and esi,DWORD [8+esp] + and edi,DWORD [12+esp] + or ecx,esi + or edx,edi + add eax,ecx + adc ebx,edx + mov DWORD [esp],eax + mov DWORD [4+esp],ebx + mov dl,BYTE [ebp] + sub esp,8 + lea ebp,[8+ebp] + cmp dl,23 + jne NEAR L$01016_79_x86 + mov esi,DWORD [840+esp] + mov edi,DWORD [844+esp] + mov eax,DWORD [esi] + mov ebx,DWORD [4+esi] + mov ecx,DWORD [8+esi] + mov edx,DWORD [12+esi] + add eax,DWORD [8+esp] + adc ebx,DWORD [12+esp] + mov DWORD [esi],eax + mov DWORD [4+esi],ebx + add ecx,DWORD [16+esp] + adc edx,DWORD [20+esp] + mov DWORD [8+esi],ecx + mov DWORD [12+esi],edx + mov eax,DWORD [16+esi] + mov ebx,DWORD [20+esi] + mov ecx,DWORD [24+esi] + mov edx,DWORD [28+esi] + add eax,DWORD [24+esp] + adc ebx,DWORD [28+esp] + mov DWORD [16+esi],eax + mov DWORD [20+esi],ebx + add ecx,DWORD [32+esp] + adc edx,DWORD [36+esp] + mov DWORD [24+esi],ecx + mov DWORD [28+esi],edx + mov eax,DWORD [32+esi] + mov ebx,DWORD [36+esi] + mov ecx,DWORD [40+esi] + mov edx,DWORD [44+esi] + add eax,DWORD [40+esp] + adc ebx,DWORD [44+esp] + mov DWORD [32+esi],eax + mov DWORD [36+esi],ebx + add ecx,DWORD [48+esp] + adc edx,DWORD [52+esp] + mov DWORD [40+esi],ecx + mov DWORD [44+esi],edx + mov eax,DWORD [48+esi] + mov ebx,DWORD [52+esi] + mov ecx,DWORD [56+esi] + mov edx,DWORD [60+esi] + add eax,DWORD [56+esp] + adc ebx,DWORD [60+esp] + mov DWORD [48+esi],eax + mov DWORD [52+esi],ebx + add ecx,DWORD [64+esp] + adc edx,DWORD [68+esp] + mov DWORD [56+esi],ecx + mov DWORD [60+esi],edx + add esp,840 + sub ebp,640 + cmp edi,DWORD [8+esp] + jb NEAR L$002loop_x86 + mov esp,DWORD [12+esp] + pop edi + pop esi + pop ebx + pop ebp + ret +align 64 +L$001K512: +dd 3609767458,1116352408 +dd 602891725,1899447441 +dd 3964484399,3049323471 +dd 2173295548,3921009573 +dd 4081628472,961987163 +dd 3053834265,1508970993 +dd 2937671579,2453635748 +dd 3664609560,2870763221 +dd 2734883394,3624381080 +dd 1164996542,310598401 +dd 1323610764,607225278 +dd 3590304994,1426881987 +dd 4068182383,1925078388 +dd 991336113,2162078206 +dd 633803317,2614888103 +dd 3479774868,3248222580 +dd 2666613458,3835390401 +dd 944711139,4022224774 +dd 2341262773,264347078 +dd 2007800933,604807628 +dd 1495990901,770255983 +dd 1856431235,1249150122 +dd 3175218132,1555081692 +dd 2198950837,1996064986 +dd 3999719339,2554220882 +dd 766784016,2821834349 +dd 2566594879,2952996808 +dd 3203337956,3210313671 +dd 1034457026,3336571891 +dd 2466948901,3584528711 +dd 3758326383,113926993 +dd 168717936,338241895 +dd 1188179964,666307205 +dd 1546045734,773529912 +dd 1522805485,1294757372 +dd 2643833823,1396182291 +dd 2343527390,1695183700 +dd 1014477480,1986661051 +dd 1206759142,2177026350 +dd 344077627,2456956037 +dd 1290863460,2730485921 +dd 3158454273,2820302411 +dd 3505952657,3259730800 +dd 106217008,3345764771 +dd 3606008344,3516065817 +dd 1432725776,3600352804 +dd 1467031594,4094571909 +dd 851169720,275423344 +dd 3100823752,430227734 +dd 1363258195,506948616 +dd 3750685593,659060556 +dd 3785050280,883997877 +dd 3318307427,958139571 +dd 3812723403,1322822218 +dd 2003034995,1537002063 +dd 3602036899,1747873779 +dd 1575990012,1955562222 +dd 1125592928,2024104815 +dd 2716904306,2227730452 +dd 442776044,2361852424 +dd 593698344,2428436474 +dd 3733110249,2756734187 +dd 2999351573,3204031479 +dd 3815920427,3329325298 +dd 3928383900,3391569614 +dd 566280711,3515267271 +dd 3454069534,3940187606 +dd 4000239992,4118630271 +dd 1914138554,116418474 +dd 2731055270,174292421 +dd 3203993006,289380356 +dd 320620315,460393269 +dd 587496836,685471733 +dd 1086792851,852142971 +dd 365543100,1017036298 +dd 2618297676,1126000580 +dd 3409855158,1288033470 +dd 4234509866,1501505948 +dd 987167468,1607167915 +dd 1246189591,1816402316 +dd 67438087,66051 +dd 202182159,134810123 +db 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97 +db 110,115,102,111,114,109,32,102,111,114,32,120,56,54,44,32 +db 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 +db 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 +db 62,0 +segment .bss +common _OPENSSL_ia32cap_P 16 diff --git a/third_party/boringssl/win-x86_64/crypto/aes/aes-x86_64.asm b/third_party/boringssl/win-x86_64/crypto/aes/aes-x86_64.asm new file mode 100644 index 00000000000..53394f0e22f --- /dev/null +++ b/third_party/boringssl/win-x86_64/crypto/aes/aes-x86_64.asm @@ -0,0 +1,2858 @@ +default rel +%define XMMWORD +%define YMMWORD +%define ZMMWORD +section .text code align=64 + + +ALIGN 16 +_x86_64_AES_encrypt: + xor eax,DWORD[r15] + xor ebx,DWORD[4+r15] + xor ecx,DWORD[8+r15] + xor edx,DWORD[12+r15] + + mov r13d,DWORD[240+r15] + sub r13d,1 + jmp NEAR $L$enc_loop +ALIGN 16 +$L$enc_loop: + + movzx esi,al + movzx edi,bl + movzx ebp,cl + mov r10d,DWORD[rsi*8+r14] + mov r11d,DWORD[rdi*8+r14] + mov r12d,DWORD[rbp*8+r14] + + movzx esi,bh + movzx edi,ch + movzx ebp,dl + xor r10d,DWORD[3+rsi*8+r14] + xor r11d,DWORD[3+rdi*8+r14] + mov r8d,DWORD[rbp*8+r14] + + movzx esi,dh + shr ecx,16 + movzx ebp,ah + xor r12d,DWORD[3+rsi*8+r14] + shr edx,16 + xor r8d,DWORD[3+rbp*8+r14] + + shr ebx,16 + lea r15,[16+r15] + shr eax,16 + + movzx esi,cl + movzx edi,dl + movzx ebp,al + xor r10d,DWORD[2+rsi*8+r14] + xor r11d,DWORD[2+rdi*8+r14] + xor r12d,DWORD[2+rbp*8+r14] + + movzx esi,dh + movzx edi,ah + movzx ebp,bl + xor r10d,DWORD[1+rsi*8+r14] + xor r11d,DWORD[1+rdi*8+r14] + xor r8d,DWORD[2+rbp*8+r14] + + mov edx,DWORD[12+r15] + movzx edi,bh + movzx ebp,ch + mov eax,DWORD[r15] + xor r12d,DWORD[1+rdi*8+r14] + xor r8d,DWORD[1+rbp*8+r14] + + mov ebx,DWORD[4+r15] + mov ecx,DWORD[8+r15] + xor eax,r10d + xor ebx,r11d + xor ecx,r12d + xor edx,r8d + sub r13d,1 + jnz NEAR $L$enc_loop + movzx esi,al + movzx edi,bl + movzx ebp,cl + movzx r10d,BYTE[2+rsi*8+r14] + movzx r11d,BYTE[2+rdi*8+r14] + movzx r12d,BYTE[2+rbp*8+r14] + + movzx esi,dl + movzx edi,bh + movzx ebp,ch + movzx r8d,BYTE[2+rsi*8+r14] + mov edi,DWORD[rdi*8+r14] + mov ebp,DWORD[rbp*8+r14] + + and edi,0x0000ff00 + and ebp,0x0000ff00 + + xor r10d,edi + xor r11d,ebp + shr ecx,16 + + movzx esi,dh + movzx edi,ah + shr edx,16 + mov esi,DWORD[rsi*8+r14] + mov edi,DWORD[rdi*8+r14] + + and esi,0x0000ff00 + and edi,0x0000ff00 + shr ebx,16 + xor r12d,esi + xor r8d,edi + shr eax,16 + + movzx esi,cl + movzx edi,dl + movzx ebp,al + mov esi,DWORD[rsi*8+r14] + mov edi,DWORD[rdi*8+r14] + mov ebp,DWORD[rbp*8+r14] + + and esi,0x00ff0000 + and edi,0x00ff0000 + and ebp,0x00ff0000 + + xor r10d,esi + xor r11d,edi + xor r12d,ebp + + movzx esi,bl + movzx edi,dh + movzx ebp,ah + mov esi,DWORD[rsi*8+r14] + mov edi,DWORD[2+rdi*8+r14] + mov ebp,DWORD[2+rbp*8+r14] + + and esi,0x00ff0000 + and edi,0xff000000 + and ebp,0xff000000 + + xor r8d,esi + xor r10d,edi + xor r11d,ebp + + movzx esi,bh + movzx edi,ch + mov edx,DWORD[((16+12))+r15] + mov esi,DWORD[2+rsi*8+r14] + mov edi,DWORD[2+rdi*8+r14] + mov eax,DWORD[((16+0))+r15] + + and esi,0xff000000 + and edi,0xff000000 + + xor r12d,esi + xor r8d,edi + + mov ebx,DWORD[((16+4))+r15] + mov ecx,DWORD[((16+8))+r15] + xor eax,r10d + xor ebx,r11d + xor ecx,r12d + xor edx,r8d +DB 0xf3,0xc3 + + +ALIGN 16 +_x86_64_AES_encrypt_compact: + lea r8,[128+r14] + mov edi,DWORD[((0-128))+r8] + mov ebp,DWORD[((32-128))+r8] + mov r10d,DWORD[((64-128))+r8] + mov r11d,DWORD[((96-128))+r8] + mov edi,DWORD[((128-128))+r8] + mov ebp,DWORD[((160-128))+r8] + mov r10d,DWORD[((192-128))+r8] + mov r11d,DWORD[((224-128))+r8] + jmp NEAR $L$enc_loop_compact +ALIGN 16 +$L$enc_loop_compact: + xor eax,DWORD[r15] + xor ebx,DWORD[4+r15] + xor ecx,DWORD[8+r15] + xor edx,DWORD[12+r15] + lea r15,[16+r15] + movzx r10d,al + movzx r11d,bl + movzx r12d,cl + movzx r8d,dl + movzx esi,bh + movzx edi,ch + shr ecx,16 + movzx ebp,dh + movzx r10d,BYTE[r10*1+r14] + movzx r11d,BYTE[r11*1+r14] + movzx r12d,BYTE[r12*1+r14] + movzx r8d,BYTE[r8*1+r14] + + movzx r9d,BYTE[rsi*1+r14] + movzx esi,ah + movzx r13d,BYTE[rdi*1+r14] + movzx edi,cl + movzx ebp,BYTE[rbp*1+r14] + movzx esi,BYTE[rsi*1+r14] + + shl r9d,8 + shr edx,16 + shl r13d,8 + xor r10d,r9d + shr eax,16 + movzx r9d,dl + shr ebx,16 + xor r11d,r13d + shl ebp,8 + movzx r13d,al + movzx edi,BYTE[rdi*1+r14] + xor r12d,ebp + + shl esi,8 + movzx ebp,bl + shl edi,16 + xor r8d,esi + movzx r9d,BYTE[r9*1+r14] + movzx esi,dh + movzx r13d,BYTE[r13*1+r14] + xor r10d,edi + + shr ecx,8 + movzx edi,ah + shl r9d,16 + shr ebx,8 + shl r13d,16 + xor r11d,r9d + movzx ebp,BYTE[rbp*1+r14] + movzx esi,BYTE[rsi*1+r14] + movzx edi,BYTE[rdi*1+r14] + movzx edx,BYTE[rcx*1+r14] + movzx ecx,BYTE[rbx*1+r14] + + shl ebp,16 + xor r12d,r13d + shl esi,24 + xor r8d,ebp + shl edi,24 + xor r10d,esi + shl edx,24 + xor r11d,edi + shl ecx,24 + mov eax,r10d + mov ebx,r11d + xor ecx,r12d + xor edx,r8d + cmp r15,QWORD[16+rsp] + je NEAR $L$enc_compact_done + mov r10d,0x80808080 + mov r11d,0x80808080 + and r10d,eax + and r11d,ebx + mov esi,r10d + mov edi,r11d + shr r10d,7 + lea r8d,[rax*1+rax] + shr r11d,7 + lea r9d,[rbx*1+rbx] + sub esi,r10d + sub edi,r11d + and r8d,0xfefefefe + and r9d,0xfefefefe + and esi,0x1b1b1b1b + and edi,0x1b1b1b1b + mov r10d,eax + mov r11d,ebx + xor r8d,esi + xor r9d,edi + + xor eax,r8d + xor ebx,r9d + mov r12d,0x80808080 + rol eax,24 + mov ebp,0x80808080 + rol ebx,24 + and r12d,ecx + and ebp,edx + xor eax,r8d + xor ebx,r9d + mov esi,r12d + ror r10d,16 + mov edi,ebp + ror r11d,16 + lea r8d,[rcx*1+rcx] + shr r12d,7 + xor eax,r10d + shr ebp,7 + xor ebx,r11d + ror r10d,8 + lea r9d,[rdx*1+rdx] + ror r11d,8 + sub esi,r12d + sub edi,ebp + xor eax,r10d + xor ebx,r11d + + and r8d,0xfefefefe + and r9d,0xfefefefe + and esi,0x1b1b1b1b + and edi,0x1b1b1b1b + mov r12d,ecx + mov ebp,edx + xor r8d,esi + xor r9d,edi + + ror r12d,16 + xor ecx,r8d + ror ebp,16 + xor edx,r9d + rol ecx,24 + mov esi,DWORD[r14] + rol edx,24 + xor ecx,r8d + mov edi,DWORD[64+r14] + xor edx,r9d + mov r8d,DWORD[128+r14] + xor ecx,r12d + ror r12d,8 + xor edx,ebp + ror ebp,8 + xor ecx,r12d + mov r9d,DWORD[192+r14] + xor edx,ebp + jmp NEAR $L$enc_loop_compact +ALIGN 16 +$L$enc_compact_done: + xor eax,DWORD[r15] + xor ebx,DWORD[4+r15] + xor ecx,DWORD[8+r15] + xor edx,DWORD[12+r15] +DB 0xf3,0xc3 + +ALIGN 16 +global asm_AES_encrypt + + +asm_AES_encrypt: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_asm_AES_encrypt: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + + + mov r10,rsp + lea rcx,[((-63))+rdx] + and rsp,-64 + sub rcx,rsp + neg rcx + and rcx,0x3c0 + sub rsp,rcx + sub rsp,32 + + mov QWORD[16+rsp],rsi + mov QWORD[24+rsp],r10 +$L$enc_prologue: + + mov r15,rdx + mov r13d,DWORD[240+r15] + + mov eax,DWORD[rdi] + mov ebx,DWORD[4+rdi] + mov ecx,DWORD[8+rdi] + mov edx,DWORD[12+rdi] + + shl r13d,4 + lea rbp,[r13*1+r15] + mov QWORD[rsp],r15 + mov QWORD[8+rsp],rbp + + + lea r14,[(($L$AES_Te+2048))] + lea rbp,[768+rsp] + sub rbp,r14 + and rbp,0x300 + lea r14,[rbp*1+r14] + + call _x86_64_AES_encrypt_compact + + mov r9,QWORD[16+rsp] + mov rsi,QWORD[24+rsp] + mov DWORD[r9],eax + mov DWORD[4+r9],ebx + mov DWORD[8+r9],ecx + mov DWORD[12+r9],edx + + mov r15,QWORD[rsi] + mov r14,QWORD[8+rsi] + mov r13,QWORD[16+rsi] + mov r12,QWORD[24+rsi] + mov rbp,QWORD[32+rsi] + mov rbx,QWORD[40+rsi] + lea rsp,[48+rsi] +$L$enc_epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_asm_AES_encrypt: + +ALIGN 16 +_x86_64_AES_decrypt: + xor eax,DWORD[r15] + xor ebx,DWORD[4+r15] + xor ecx,DWORD[8+r15] + xor edx,DWORD[12+r15] + + mov r13d,DWORD[240+r15] + sub r13d,1 + jmp NEAR $L$dec_loop +ALIGN 16 +$L$dec_loop: + + movzx esi,al + movzx edi,bl + movzx ebp,cl + mov r10d,DWORD[rsi*8+r14] + mov r11d,DWORD[rdi*8+r14] + mov r12d,DWORD[rbp*8+r14] + + movzx esi,dh + movzx edi,ah + movzx ebp,dl + xor r10d,DWORD[3+rsi*8+r14] + xor r11d,DWORD[3+rdi*8+r14] + mov r8d,DWORD[rbp*8+r14] + + movzx esi,bh + shr eax,16 + movzx ebp,ch + xor r12d,DWORD[3+rsi*8+r14] + shr edx,16 + xor r8d,DWORD[3+rbp*8+r14] + + shr ebx,16 + lea r15,[16+r15] + shr ecx,16 + + movzx esi,cl + movzx edi,dl + movzx ebp,al + xor r10d,DWORD[2+rsi*8+r14] + xor r11d,DWORD[2+rdi*8+r14] + xor r12d,DWORD[2+rbp*8+r14] + + movzx esi,bh + movzx edi,ch + movzx ebp,bl + xor r10d,DWORD[1+rsi*8+r14] + xor r11d,DWORD[1+rdi*8+r14] + xor r8d,DWORD[2+rbp*8+r14] + + movzx esi,dh + mov edx,DWORD[12+r15] + movzx ebp,ah + xor r12d,DWORD[1+rsi*8+r14] + mov eax,DWORD[r15] + xor r8d,DWORD[1+rbp*8+r14] + + xor eax,r10d + mov ebx,DWORD[4+r15] + mov ecx,DWORD[8+r15] + xor ecx,r12d + xor ebx,r11d + xor edx,r8d + sub r13d,1 + jnz NEAR $L$dec_loop + lea r14,[2048+r14] + movzx esi,al + movzx edi,bl + movzx ebp,cl + movzx r10d,BYTE[rsi*1+r14] + movzx r11d,BYTE[rdi*1+r14] + movzx r12d,BYTE[rbp*1+r14] + + movzx esi,dl + movzx edi,dh + movzx ebp,ah + movzx r8d,BYTE[rsi*1+r14] + movzx edi,BYTE[rdi*1+r14] + movzx ebp,BYTE[rbp*1+r14] + + shl edi,8 + shl ebp,8 + + xor r10d,edi + xor r11d,ebp + shr edx,16 + + movzx esi,bh + movzx edi,ch + shr eax,16 + movzx esi,BYTE[rsi*1+r14] + movzx edi,BYTE[rdi*1+r14] + + shl esi,8 + shl edi,8 + shr ebx,16 + xor r12d,esi + xor r8d,edi + shr ecx,16 + + movzx esi,cl + movzx edi,dl + movzx ebp,al + movzx esi,BYTE[rsi*1+r14] + movzx edi,BYTE[rdi*1+r14] + movzx ebp,BYTE[rbp*1+r14] + + shl esi,16 + shl edi,16 + shl ebp,16 + + xor r10d,esi + xor r11d,edi + xor r12d,ebp + + movzx esi,bl + movzx edi,bh + movzx ebp,ch + movzx esi,BYTE[rsi*1+r14] + movzx edi,BYTE[rdi*1+r14] + movzx ebp,BYTE[rbp*1+r14] + + shl esi,16 + shl edi,24 + shl ebp,24 + + xor r8d,esi + xor r10d,edi + xor r11d,ebp + + movzx esi,dh + movzx edi,ah + mov edx,DWORD[((16+12))+r15] + movzx esi,BYTE[rsi*1+r14] + movzx edi,BYTE[rdi*1+r14] + mov eax,DWORD[((16+0))+r15] + + shl esi,24 + shl edi,24 + + xor r12d,esi + xor r8d,edi + + mov ebx,DWORD[((16+4))+r15] + mov ecx,DWORD[((16+8))+r15] + lea r14,[((-2048))+r14] + xor eax,r10d + xor ebx,r11d + xor ecx,r12d + xor edx,r8d +DB 0xf3,0xc3 + + +ALIGN 16 +_x86_64_AES_decrypt_compact: + lea r8,[128+r14] + mov edi,DWORD[((0-128))+r8] + mov ebp,DWORD[((32-128))+r8] + mov r10d,DWORD[((64-128))+r8] + mov r11d,DWORD[((96-128))+r8] + mov edi,DWORD[((128-128))+r8] + mov ebp,DWORD[((160-128))+r8] + mov r10d,DWORD[((192-128))+r8] + mov r11d,DWORD[((224-128))+r8] + jmp NEAR $L$dec_loop_compact + +ALIGN 16 +$L$dec_loop_compact: + xor eax,DWORD[r15] + xor ebx,DWORD[4+r15] + xor ecx,DWORD[8+r15] + xor edx,DWORD[12+r15] + lea r15,[16+r15] + movzx r10d,al + movzx r11d,bl + movzx r12d,cl + movzx r8d,dl + movzx esi,dh + movzx edi,ah + shr edx,16 + movzx ebp,bh + movzx r10d,BYTE[r10*1+r14] + movzx r11d,BYTE[r11*1+r14] + movzx r12d,BYTE[r12*1+r14] + movzx r8d,BYTE[r8*1+r14] + + movzx r9d,BYTE[rsi*1+r14] + movzx esi,ch + movzx r13d,BYTE[rdi*1+r14] + movzx ebp,BYTE[rbp*1+r14] + movzx esi,BYTE[rsi*1+r14] + + shr ecx,16 + shl r13d,8 + shl r9d,8 + movzx edi,cl + shr eax,16 + xor r10d,r9d + shr ebx,16 + movzx r9d,dl + + shl ebp,8 + xor r11d,r13d + shl esi,8 + movzx r13d,al + movzx edi,BYTE[rdi*1+r14] + xor r12d,ebp + movzx ebp,bl + + shl edi,16 + xor r8d,esi + movzx r9d,BYTE[r9*1+r14] + movzx esi,bh + movzx ebp,BYTE[rbp*1+r14] + xor r10d,edi + movzx r13d,BYTE[r13*1+r14] + movzx edi,ch + + shl ebp,16 + shl r9d,16 + shl r13d,16 + xor r8d,ebp + movzx ebp,dh + xor r11d,r9d + shr eax,8 + xor r12d,r13d + + movzx esi,BYTE[rsi*1+r14] + movzx ebx,BYTE[rdi*1+r14] + movzx ecx,BYTE[rbp*1+r14] + movzx edx,BYTE[rax*1+r14] + + mov eax,r10d + shl esi,24 + shl ebx,24 + shl ecx,24 + xor eax,esi + shl edx,24 + xor ebx,r11d + xor ecx,r12d + xor edx,r8d + cmp r15,QWORD[16+rsp] + je NEAR $L$dec_compact_done + + mov rsi,QWORD[((256+0))+r14] + shl rbx,32 + shl rdx,32 + mov rdi,QWORD[((256+8))+r14] + or rax,rbx + or rcx,rdx + mov rbp,QWORD[((256+16))+r14] + mov r9,rsi + mov r12,rsi + and r9,rax + and r12,rcx + mov rbx,r9 + mov rdx,r12 + shr r9,7 + lea r8,[rax*1+rax] + shr r12,7 + lea r11,[rcx*1+rcx] + sub rbx,r9 + sub rdx,r12 + and r8,rdi + and r11,rdi + and rbx,rbp + and rdx,rbp + xor r8,rbx + xor r11,rdx + mov r10,rsi + mov r13,rsi + + and r10,r8 + and r13,r11 + mov rbx,r10 + mov rdx,r13 + shr r10,7 + lea r9,[r8*1+r8] + shr r13,7 + lea r12,[r11*1+r11] + sub rbx,r10 + sub rdx,r13 + and r9,rdi + and r12,rdi + and rbx,rbp + and rdx,rbp + xor r9,rbx + xor r12,rdx + mov r10,rsi + mov r13,rsi + + and r10,r9 + and r13,r12 + mov rbx,r10 + mov rdx,r13 + shr r10,7 + xor r8,rax + shr r13,7 + xor r11,rcx + sub rbx,r10 + sub rdx,r13 + lea r10,[r9*1+r9] + lea r13,[r12*1+r12] + xor r9,rax + xor r12,rcx + and r10,rdi + and r13,rdi + and rbx,rbp + and rdx,rbp + xor r10,rbx + xor r13,rdx + + xor rax,r10 + xor rcx,r13 + xor r8,r10 + xor r11,r13 + mov rbx,rax + mov rdx,rcx + xor r9,r10 + shr rbx,32 + xor r12,r13 + shr rdx,32 + xor r10,r8 + rol eax,8 + xor r13,r11 + rol ecx,8 + xor r10,r9 + rol ebx,8 + xor r13,r12 + + rol edx,8 + xor eax,r10d + shr r10,32 + xor ecx,r13d + shr r13,32 + xor ebx,r10d + xor edx,r13d + + mov r10,r8 + rol r8d,24 + mov r13,r11 + rol r11d,24 + shr r10,32 + xor eax,r8d + shr r13,32 + xor ecx,r11d + rol r10d,24 + mov r8,r9 + rol r13d,24 + mov r11,r12 + shr r8,32 + xor ebx,r10d + shr r11,32 + xor edx,r13d + + mov rsi,QWORD[r14] + rol r9d,16 + mov rdi,QWORD[64+r14] + rol r12d,16 + mov rbp,QWORD[128+r14] + rol r8d,16 + mov r10,QWORD[192+r14] + xor eax,r9d + rol r11d,16 + xor ecx,r12d + mov r13,QWORD[256+r14] + xor ebx,r8d + xor edx,r11d + jmp NEAR $L$dec_loop_compact +ALIGN 16 +$L$dec_compact_done: + xor eax,DWORD[r15] + xor ebx,DWORD[4+r15] + xor ecx,DWORD[8+r15] + xor edx,DWORD[12+r15] +DB 0xf3,0xc3 + +ALIGN 16 +global asm_AES_decrypt + + +asm_AES_decrypt: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_asm_AES_decrypt: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + + + mov r10,rsp + lea rcx,[((-63))+rdx] + and rsp,-64 + sub rcx,rsp + neg rcx + and rcx,0x3c0 + sub rsp,rcx + sub rsp,32 + + mov QWORD[16+rsp],rsi + mov QWORD[24+rsp],r10 +$L$dec_prologue: + + mov r15,rdx + mov r13d,DWORD[240+r15] + + mov eax,DWORD[rdi] + mov ebx,DWORD[4+rdi] + mov ecx,DWORD[8+rdi] + mov edx,DWORD[12+rdi] + + shl r13d,4 + lea rbp,[r13*1+r15] + mov QWORD[rsp],r15 + mov QWORD[8+rsp],rbp + + + lea r14,[(($L$AES_Td+2048))] + lea rbp,[768+rsp] + sub rbp,r14 + and rbp,0x300 + lea r14,[rbp*1+r14] + shr rbp,3 + add r14,rbp + + call _x86_64_AES_decrypt_compact + + mov r9,QWORD[16+rsp] + mov rsi,QWORD[24+rsp] + mov DWORD[r9],eax + mov DWORD[4+r9],ebx + mov DWORD[8+r9],ecx + mov DWORD[12+r9],edx + + mov r15,QWORD[rsi] + mov r14,QWORD[8+rsi] + mov r13,QWORD[16+rsi] + mov r12,QWORD[24+rsi] + mov rbp,QWORD[32+rsi] + mov rbx,QWORD[40+rsi] + lea rsp,[48+rsi] +$L$dec_epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_asm_AES_decrypt: +ALIGN 16 +global asm_AES_set_encrypt_key + +asm_AES_set_encrypt_key: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_asm_AES_set_encrypt_key: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + sub rsp,8 +$L$enc_key_prologue: + + call _x86_64_AES_set_encrypt_key + + mov rbp,QWORD[40+rsp] + mov rbx,QWORD[48+rsp] + add rsp,56 +$L$enc_key_epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_asm_AES_set_encrypt_key: + + +ALIGN 16 +_x86_64_AES_set_encrypt_key: + mov ecx,esi + mov rsi,rdi + mov rdi,rdx + + test rsi,-1 + jz NEAR $L$badpointer + test rdi,-1 + jz NEAR $L$badpointer + + lea rbp,[$L$AES_Te] + lea rbp,[((2048+128))+rbp] + + + mov eax,DWORD[((0-128))+rbp] + mov ebx,DWORD[((32-128))+rbp] + mov r8d,DWORD[((64-128))+rbp] + mov edx,DWORD[((96-128))+rbp] + mov eax,DWORD[((128-128))+rbp] + mov ebx,DWORD[((160-128))+rbp] + mov r8d,DWORD[((192-128))+rbp] + mov edx,DWORD[((224-128))+rbp] + + cmp ecx,128 + je NEAR $L$10rounds + cmp ecx,192 + je NEAR $L$12rounds + cmp ecx,256 + je NEAR $L$14rounds + mov rax,-2 + jmp NEAR $L$exit + +$L$10rounds: + mov rax,QWORD[rsi] + mov rdx,QWORD[8+rsi] + mov QWORD[rdi],rax + mov QWORD[8+rdi],rdx + + shr rdx,32 + xor ecx,ecx + jmp NEAR $L$10shortcut +ALIGN 4 +$L$10loop: + mov eax,DWORD[rdi] + mov edx,DWORD[12+rdi] +$L$10shortcut: + movzx esi,dl + movzx ebx,BYTE[((-128))+rsi*1+rbp] + movzx esi,dh + shl ebx,24 + xor eax,ebx + + movzx ebx,BYTE[((-128))+rsi*1+rbp] + shr edx,16 + movzx esi,dl + xor eax,ebx + + movzx ebx,BYTE[((-128))+rsi*1+rbp] + movzx esi,dh + shl ebx,8 + xor eax,ebx + + movzx ebx,BYTE[((-128))+rsi*1+rbp] + shl ebx,16 + xor eax,ebx + + xor eax,DWORD[((1024-128))+rcx*4+rbp] + mov DWORD[16+rdi],eax + xor eax,DWORD[4+rdi] + mov DWORD[20+rdi],eax + xor eax,DWORD[8+rdi] + mov DWORD[24+rdi],eax + xor eax,DWORD[12+rdi] + mov DWORD[28+rdi],eax + add ecx,1 + lea rdi,[16+rdi] + cmp ecx,10 + jl NEAR $L$10loop + + mov DWORD[80+rdi],10 + xor rax,rax + jmp NEAR $L$exit + +$L$12rounds: + mov rax,QWORD[rsi] + mov rbx,QWORD[8+rsi] + mov rdx,QWORD[16+rsi] + mov QWORD[rdi],rax + mov QWORD[8+rdi],rbx + mov QWORD[16+rdi],rdx + + shr rdx,32 + xor ecx,ecx + jmp NEAR $L$12shortcut +ALIGN 4 +$L$12loop: + mov eax,DWORD[rdi] + mov edx,DWORD[20+rdi] +$L$12shortcut: + movzx esi,dl + movzx ebx,BYTE[((-128))+rsi*1+rbp] + movzx esi,dh + shl ebx,24 + xor eax,ebx + + movzx ebx,BYTE[((-128))+rsi*1+rbp] + shr edx,16 + movzx esi,dl + xor eax,ebx + + movzx ebx,BYTE[((-128))+rsi*1+rbp] + movzx esi,dh + shl ebx,8 + xor eax,ebx + + movzx ebx,BYTE[((-128))+rsi*1+rbp] + shl ebx,16 + xor eax,ebx + + xor eax,DWORD[((1024-128))+rcx*4+rbp] + mov DWORD[24+rdi],eax + xor eax,DWORD[4+rdi] + mov DWORD[28+rdi],eax + xor eax,DWORD[8+rdi] + mov DWORD[32+rdi],eax + xor eax,DWORD[12+rdi] + mov DWORD[36+rdi],eax + + cmp ecx,7 + je NEAR $L$12break + add ecx,1 + + xor eax,DWORD[16+rdi] + mov DWORD[40+rdi],eax + xor eax,DWORD[20+rdi] + mov DWORD[44+rdi],eax + + lea rdi,[24+rdi] + jmp NEAR $L$12loop +$L$12break: + mov DWORD[72+rdi],12 + xor rax,rax + jmp NEAR $L$exit + +$L$14rounds: + mov rax,QWORD[rsi] + mov rbx,QWORD[8+rsi] + mov rcx,QWORD[16+rsi] + mov rdx,QWORD[24+rsi] + mov QWORD[rdi],rax + mov QWORD[8+rdi],rbx + mov QWORD[16+rdi],rcx + mov QWORD[24+rdi],rdx + + shr rdx,32 + xor ecx,ecx + jmp NEAR $L$14shortcut +ALIGN 4 +$L$14loop: + mov eax,DWORD[rdi] + mov edx,DWORD[28+rdi] +$L$14shortcut: + movzx esi,dl + movzx ebx,BYTE[((-128))+rsi*1+rbp] + movzx esi,dh + shl ebx,24 + xor eax,ebx + + movzx ebx,BYTE[((-128))+rsi*1+rbp] + shr edx,16 + movzx esi,dl + xor eax,ebx + + movzx ebx,BYTE[((-128))+rsi*1+rbp] + movzx esi,dh + shl ebx,8 + xor eax,ebx + + movzx ebx,BYTE[((-128))+rsi*1+rbp] + shl ebx,16 + xor eax,ebx + + xor eax,DWORD[((1024-128))+rcx*4+rbp] + mov DWORD[32+rdi],eax + xor eax,DWORD[4+rdi] + mov DWORD[36+rdi],eax + xor eax,DWORD[8+rdi] + mov DWORD[40+rdi],eax + xor eax,DWORD[12+rdi] + mov DWORD[44+rdi],eax + + cmp ecx,6 + je NEAR $L$14break + add ecx,1 + + mov edx,eax + mov eax,DWORD[16+rdi] + movzx esi,dl + movzx ebx,BYTE[((-128))+rsi*1+rbp] + movzx esi,dh + xor eax,ebx + + movzx ebx,BYTE[((-128))+rsi*1+rbp] + shr edx,16 + shl ebx,8 + movzx esi,dl + xor eax,ebx + + movzx ebx,BYTE[((-128))+rsi*1+rbp] + movzx esi,dh + shl ebx,16 + xor eax,ebx + + movzx ebx,BYTE[((-128))+rsi*1+rbp] + shl ebx,24 + xor eax,ebx + + mov DWORD[48+rdi],eax + xor eax,DWORD[20+rdi] + mov DWORD[52+rdi],eax + xor eax,DWORD[24+rdi] + mov DWORD[56+rdi],eax + xor eax,DWORD[28+rdi] + mov DWORD[60+rdi],eax + + lea rdi,[32+rdi] + jmp NEAR $L$14loop +$L$14break: + mov DWORD[48+rdi],14 + xor rax,rax + jmp NEAR $L$exit + +$L$badpointer: + mov rax,-1 +$L$exit: +DB 0xf3,0xc3 + +ALIGN 16 +global asm_AES_set_decrypt_key + +asm_AES_set_decrypt_key: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_asm_AES_set_decrypt_key: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + push rdx +$L$dec_key_prologue: + + call _x86_64_AES_set_encrypt_key + mov r8,QWORD[rsp] + cmp eax,0 + jne NEAR $L$abort + + mov r14d,DWORD[240+r8] + xor rdi,rdi + lea rcx,[r14*4+rdi] + mov rsi,r8 + lea rdi,[rcx*4+r8] +ALIGN 4 +$L$invert: + mov rax,QWORD[rsi] + mov rbx,QWORD[8+rsi] + mov rcx,QWORD[rdi] + mov rdx,QWORD[8+rdi] + mov QWORD[rdi],rax + mov QWORD[8+rdi],rbx + mov QWORD[rsi],rcx + mov QWORD[8+rsi],rdx + lea rsi,[16+rsi] + lea rdi,[((-16))+rdi] + cmp rdi,rsi + jne NEAR $L$invert + + lea rax,[(($L$AES_Te+2048+1024))] + + mov rsi,QWORD[40+rax] + mov rdi,QWORD[48+rax] + mov rbp,QWORD[56+rax] + + mov r15,r8 + sub r14d,1 +ALIGN 4 +$L$permute: + lea r15,[16+r15] + mov rax,QWORD[r15] + mov rcx,QWORD[8+r15] + mov r9,rsi + mov r12,rsi + and r9,rax + and r12,rcx + mov rbx,r9 + mov rdx,r12 + shr r9,7 + lea r8,[rax*1+rax] + shr r12,7 + lea r11,[rcx*1+rcx] + sub rbx,r9 + sub rdx,r12 + and r8,rdi + and r11,rdi + and rbx,rbp + and rdx,rbp + xor r8,rbx + xor r11,rdx + mov r10,rsi + mov r13,rsi + + and r10,r8 + and r13,r11 + mov rbx,r10 + mov rdx,r13 + shr r10,7 + lea r9,[r8*1+r8] + shr r13,7 + lea r12,[r11*1+r11] + sub rbx,r10 + sub rdx,r13 + and r9,rdi + and r12,rdi + and rbx,rbp + and rdx,rbp + xor r9,rbx + xor r12,rdx + mov r10,rsi + mov r13,rsi + + and r10,r9 + and r13,r12 + mov rbx,r10 + mov rdx,r13 + shr r10,7 + xor r8,rax + shr r13,7 + xor r11,rcx + sub rbx,r10 + sub rdx,r13 + lea r10,[r9*1+r9] + lea r13,[r12*1+r12] + xor r9,rax + xor r12,rcx + and r10,rdi + and r13,rdi + and rbx,rbp + and rdx,rbp + xor r10,rbx + xor r13,rdx + + xor rax,r10 + xor rcx,r13 + xor r8,r10 + xor r11,r13 + mov rbx,rax + mov rdx,rcx + xor r9,r10 + shr rbx,32 + xor r12,r13 + shr rdx,32 + xor r10,r8 + rol eax,8 + xor r13,r11 + rol ecx,8 + xor r10,r9 + rol ebx,8 + xor r13,r12 + + rol edx,8 + xor eax,r10d + shr r10,32 + xor ecx,r13d + shr r13,32 + xor ebx,r10d + xor edx,r13d + + mov r10,r8 + rol r8d,24 + mov r13,r11 + rol r11d,24 + shr r10,32 + xor eax,r8d + shr r13,32 + xor ecx,r11d + rol r10d,24 + mov r8,r9 + rol r13d,24 + mov r11,r12 + shr r8,32 + xor ebx,r10d + shr r11,32 + xor edx,r13d + + + rol r9d,16 + + rol r12d,16 + + rol r8d,16 + + xor eax,r9d + rol r11d,16 + xor ecx,r12d + + xor ebx,r8d + xor edx,r11d + mov DWORD[r15],eax + mov DWORD[4+r15],ebx + mov DWORD[8+r15],ecx + mov DWORD[12+r15],edx + sub r14d,1 + jnz NEAR $L$permute + + xor rax,rax +$L$abort: + mov r15,QWORD[8+rsp] + mov r14,QWORD[16+rsp] + mov r13,QWORD[24+rsp] + mov r12,QWORD[32+rsp] + mov rbp,QWORD[40+rsp] + mov rbx,QWORD[48+rsp] + add rsp,56 +$L$dec_key_epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_asm_AES_set_decrypt_key: +ALIGN 16 +global asm_AES_cbc_encrypt + +EXTERN OPENSSL_ia32cap_P + +asm_AES_cbc_encrypt: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_asm_AES_cbc_encrypt: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD[40+rsp] + mov r9,QWORD[48+rsp] + + + cmp rdx,0 + je NEAR $L$cbc_epilogue + pushfq + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 +$L$cbc_prologue: + + cld + mov r9d,r9d + + lea r14,[$L$AES_Te] + cmp r9,0 + jne NEAR $L$cbc_picked_te + lea r14,[$L$AES_Td] +$L$cbc_picked_te: + + mov r10d,DWORD[OPENSSL_ia32cap_P] + cmp rdx,512 + jb NEAR $L$cbc_slow_prologue + test rdx,15 + jnz NEAR $L$cbc_slow_prologue + bt r10d,28 + jc NEAR $L$cbc_slow_prologue + + + lea r15,[((-88-248))+rsp] + and r15,-64 + + + mov r10,r14 + lea r11,[2304+r14] + mov r12,r15 + and r10,0xFFF + and r11,0xFFF + and r12,0xFFF + + cmp r12,r11 + jb NEAR $L$cbc_te_break_out + sub r12,r11 + sub r15,r12 + jmp NEAR $L$cbc_te_ok +$L$cbc_te_break_out: + sub r12,r10 + and r12,0xFFF + add r12,320 + sub r15,r12 +ALIGN 4 +$L$cbc_te_ok: + + xchg r15,rsp + + mov QWORD[16+rsp],r15 +$L$cbc_fast_body: + mov QWORD[24+rsp],rdi + mov QWORD[32+rsp],rsi + mov QWORD[40+rsp],rdx + mov QWORD[48+rsp],rcx + mov QWORD[56+rsp],r8 + mov DWORD[((80+240))+rsp],0 + mov rbp,r8 + mov rbx,r9 + mov r9,rsi + mov r8,rdi + mov r15,rcx + + mov eax,DWORD[240+r15] + + mov r10,r15 + sub r10,r14 + and r10,0xfff + cmp r10,2304 + jb NEAR $L$cbc_do_ecopy + cmp r10,4096-248 + jb NEAR $L$cbc_skip_ecopy +ALIGN 4 +$L$cbc_do_ecopy: + mov rsi,r15 + lea rdi,[80+rsp] + lea r15,[80+rsp] + mov ecx,240/8 + DD 0x90A548F3 + mov DWORD[rdi],eax +$L$cbc_skip_ecopy: + mov QWORD[rsp],r15 + + mov ecx,18 +ALIGN 4 +$L$cbc_prefetch_te: + mov r10,QWORD[r14] + mov r11,QWORD[32+r14] + mov r12,QWORD[64+r14] + mov r13,QWORD[96+r14] + lea r14,[128+r14] + sub ecx,1 + jnz NEAR $L$cbc_prefetch_te + lea r14,[((-2304))+r14] + + cmp rbx,0 + je NEAR $L$FAST_DECRYPT + + + mov eax,DWORD[rbp] + mov ebx,DWORD[4+rbp] + mov ecx,DWORD[8+rbp] + mov edx,DWORD[12+rbp] + +ALIGN 4 +$L$cbc_fast_enc_loop: + xor eax,DWORD[r8] + xor ebx,DWORD[4+r8] + xor ecx,DWORD[8+r8] + xor edx,DWORD[12+r8] + mov r15,QWORD[rsp] + mov QWORD[24+rsp],r8 + + call _x86_64_AES_encrypt + + mov r8,QWORD[24+rsp] + mov r10,QWORD[40+rsp] + mov DWORD[r9],eax + mov DWORD[4+r9],ebx + mov DWORD[8+r9],ecx + mov DWORD[12+r9],edx + + lea r8,[16+r8] + lea r9,[16+r9] + sub r10,16 + test r10,-16 + mov QWORD[40+rsp],r10 + jnz NEAR $L$cbc_fast_enc_loop + mov rbp,QWORD[56+rsp] + mov DWORD[rbp],eax + mov DWORD[4+rbp],ebx + mov DWORD[8+rbp],ecx + mov DWORD[12+rbp],edx + + jmp NEAR $L$cbc_fast_cleanup + + +ALIGN 16 +$L$FAST_DECRYPT: + cmp r9,r8 + je NEAR $L$cbc_fast_dec_in_place + + mov QWORD[64+rsp],rbp +ALIGN 4 +$L$cbc_fast_dec_loop: + mov eax,DWORD[r8] + mov ebx,DWORD[4+r8] + mov ecx,DWORD[8+r8] + mov edx,DWORD[12+r8] + mov r15,QWORD[rsp] + mov QWORD[24+rsp],r8 + + call _x86_64_AES_decrypt + + mov rbp,QWORD[64+rsp] + mov r8,QWORD[24+rsp] + mov r10,QWORD[40+rsp] + xor eax,DWORD[rbp] + xor ebx,DWORD[4+rbp] + xor ecx,DWORD[8+rbp] + xor edx,DWORD[12+rbp] + mov rbp,r8 + + sub r10,16 + mov QWORD[40+rsp],r10 + mov QWORD[64+rsp],rbp + + mov DWORD[r9],eax + mov DWORD[4+r9],ebx + mov DWORD[8+r9],ecx + mov DWORD[12+r9],edx + + lea r8,[16+r8] + lea r9,[16+r9] + jnz NEAR $L$cbc_fast_dec_loop + mov r12,QWORD[56+rsp] + mov r10,QWORD[rbp] + mov r11,QWORD[8+rbp] + mov QWORD[r12],r10 + mov QWORD[8+r12],r11 + jmp NEAR $L$cbc_fast_cleanup + +ALIGN 16 +$L$cbc_fast_dec_in_place: + mov r10,QWORD[rbp] + mov r11,QWORD[8+rbp] + mov QWORD[((0+64))+rsp],r10 + mov QWORD[((8+64))+rsp],r11 +ALIGN 4 +$L$cbc_fast_dec_in_place_loop: + mov eax,DWORD[r8] + mov ebx,DWORD[4+r8] + mov ecx,DWORD[8+r8] + mov edx,DWORD[12+r8] + mov r15,QWORD[rsp] + mov QWORD[24+rsp],r8 + + call _x86_64_AES_decrypt + + mov r8,QWORD[24+rsp] + mov r10,QWORD[40+rsp] + xor eax,DWORD[((0+64))+rsp] + xor ebx,DWORD[((4+64))+rsp] + xor ecx,DWORD[((8+64))+rsp] + xor edx,DWORD[((12+64))+rsp] + + mov r11,QWORD[r8] + mov r12,QWORD[8+r8] + sub r10,16 + jz NEAR $L$cbc_fast_dec_in_place_done + + mov QWORD[((0+64))+rsp],r11 + mov QWORD[((8+64))+rsp],r12 + + mov DWORD[r9],eax + mov DWORD[4+r9],ebx + mov DWORD[8+r9],ecx + mov DWORD[12+r9],edx + + lea r8,[16+r8] + lea r9,[16+r9] + mov QWORD[40+rsp],r10 + jmp NEAR $L$cbc_fast_dec_in_place_loop +$L$cbc_fast_dec_in_place_done: + mov rdi,QWORD[56+rsp] + mov QWORD[rdi],r11 + mov QWORD[8+rdi],r12 + + mov DWORD[r9],eax + mov DWORD[4+r9],ebx + mov DWORD[8+r9],ecx + mov DWORD[12+r9],edx + +ALIGN 4 +$L$cbc_fast_cleanup: + cmp DWORD[((80+240))+rsp],0 + lea rdi,[80+rsp] + je NEAR $L$cbc_exit + mov ecx,240/8 + xor rax,rax + DD 0x90AB48F3 + + jmp NEAR $L$cbc_exit + + +ALIGN 16 +$L$cbc_slow_prologue: + + lea rbp,[((-88))+rsp] + and rbp,-64 + + lea r10,[((-88-63))+rcx] + sub r10,rbp + neg r10 + and r10,0x3c0 + sub rbp,r10 + + xchg rbp,rsp + + mov QWORD[16+rsp],rbp +$L$cbc_slow_body: + + + + + mov QWORD[56+rsp],r8 + mov rbp,r8 + mov rbx,r9 + mov r9,rsi + mov r8,rdi + mov r15,rcx + mov r10,rdx + + mov eax,DWORD[240+r15] + mov QWORD[rsp],r15 + shl eax,4 + lea rax,[rax*1+r15] + mov QWORD[8+rsp],rax + + + lea r14,[2048+r14] + lea rax,[((768-8))+rsp] + sub rax,r14 + and rax,0x300 + lea r14,[rax*1+r14] + + cmp rbx,0 + je NEAR $L$SLOW_DECRYPT + + + test r10,-16 + mov eax,DWORD[rbp] + mov ebx,DWORD[4+rbp] + mov ecx,DWORD[8+rbp] + mov edx,DWORD[12+rbp] + jz NEAR $L$cbc_slow_enc_tail + +ALIGN 4 +$L$cbc_slow_enc_loop: + xor eax,DWORD[r8] + xor ebx,DWORD[4+r8] + xor ecx,DWORD[8+r8] + xor edx,DWORD[12+r8] + mov r15,QWORD[rsp] + mov QWORD[24+rsp],r8 + mov QWORD[32+rsp],r9 + mov QWORD[40+rsp],r10 + + call _x86_64_AES_encrypt_compact + + mov r8,QWORD[24+rsp] + mov r9,QWORD[32+rsp] + mov r10,QWORD[40+rsp] + mov DWORD[r9],eax + mov DWORD[4+r9],ebx + mov DWORD[8+r9],ecx + mov DWORD[12+r9],edx + + lea r8,[16+r8] + lea r9,[16+r9] + sub r10,16 + test r10,-16 + jnz NEAR $L$cbc_slow_enc_loop + test r10,15 + jnz NEAR $L$cbc_slow_enc_tail + mov rbp,QWORD[56+rsp] + mov DWORD[rbp],eax + mov DWORD[4+rbp],ebx + mov DWORD[8+rbp],ecx + mov DWORD[12+rbp],edx + + jmp NEAR $L$cbc_exit + +ALIGN 4 +$L$cbc_slow_enc_tail: + mov r11,rax + mov r12,rcx + mov rcx,r10 + mov rsi,r8 + mov rdi,r9 + DD 0x9066A4F3 + mov rcx,16 + sub rcx,r10 + xor rax,rax + DD 0x9066AAF3 + mov r8,r9 + mov r10,16 + mov rax,r11 + mov rcx,r12 + jmp NEAR $L$cbc_slow_enc_loop + +ALIGN 16 +$L$SLOW_DECRYPT: + shr rax,3 + add r14,rax + + mov r11,QWORD[rbp] + mov r12,QWORD[8+rbp] + mov QWORD[((0+64))+rsp],r11 + mov QWORD[((8+64))+rsp],r12 + +ALIGN 4 +$L$cbc_slow_dec_loop: + mov eax,DWORD[r8] + mov ebx,DWORD[4+r8] + mov ecx,DWORD[8+r8] + mov edx,DWORD[12+r8] + mov r15,QWORD[rsp] + mov QWORD[24+rsp],r8 + mov QWORD[32+rsp],r9 + mov QWORD[40+rsp],r10 + + call _x86_64_AES_decrypt_compact + + mov r8,QWORD[24+rsp] + mov r9,QWORD[32+rsp] + mov r10,QWORD[40+rsp] + xor eax,DWORD[((0+64))+rsp] + xor ebx,DWORD[((4+64))+rsp] + xor ecx,DWORD[((8+64))+rsp] + xor edx,DWORD[((12+64))+rsp] + + mov r11,QWORD[r8] + mov r12,QWORD[8+r8] + sub r10,16 + jc NEAR $L$cbc_slow_dec_partial + jz NEAR $L$cbc_slow_dec_done + + mov QWORD[((0+64))+rsp],r11 + mov QWORD[((8+64))+rsp],r12 + + mov DWORD[r9],eax + mov DWORD[4+r9],ebx + mov DWORD[8+r9],ecx + mov DWORD[12+r9],edx + + lea r8,[16+r8] + lea r9,[16+r9] + jmp NEAR $L$cbc_slow_dec_loop +$L$cbc_slow_dec_done: + mov rdi,QWORD[56+rsp] + mov QWORD[rdi],r11 + mov QWORD[8+rdi],r12 + + mov DWORD[r9],eax + mov DWORD[4+r9],ebx + mov DWORD[8+r9],ecx + mov DWORD[12+r9],edx + + jmp NEAR $L$cbc_exit + +ALIGN 4 +$L$cbc_slow_dec_partial: + mov rdi,QWORD[56+rsp] + mov QWORD[rdi],r11 + mov QWORD[8+rdi],r12 + + mov DWORD[((0+64))+rsp],eax + mov DWORD[((4+64))+rsp],ebx + mov DWORD[((8+64))+rsp],ecx + mov DWORD[((12+64))+rsp],edx + + mov rdi,r9 + lea rsi,[64+rsp] + lea rcx,[16+r10] + DD 0x9066A4F3 + jmp NEAR $L$cbc_exit + +ALIGN 16 +$L$cbc_exit: + mov rsi,QWORD[16+rsp] + mov r15,QWORD[rsi] + mov r14,QWORD[8+rsi] + mov r13,QWORD[16+rsi] + mov r12,QWORD[24+rsi] + mov rbp,QWORD[32+rsi] + mov rbx,QWORD[40+rsi] + lea rsp,[48+rsi] +$L$cbc_popfq: + popfq +$L$cbc_epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_asm_AES_cbc_encrypt: +ALIGN 64 +$L$AES_Te: + DD 0xa56363c6,0xa56363c6 + DD 0x847c7cf8,0x847c7cf8 + DD 0x997777ee,0x997777ee + DD 0x8d7b7bf6,0x8d7b7bf6 + DD 0x0df2f2ff,0x0df2f2ff + DD 0xbd6b6bd6,0xbd6b6bd6 + DD 0xb16f6fde,0xb16f6fde + DD 0x54c5c591,0x54c5c591 + DD 0x50303060,0x50303060 + DD 0x03010102,0x03010102 + DD 0xa96767ce,0xa96767ce + DD 0x7d2b2b56,0x7d2b2b56 + DD 0x19fefee7,0x19fefee7 + DD 0x62d7d7b5,0x62d7d7b5 + DD 0xe6abab4d,0xe6abab4d + DD 0x9a7676ec,0x9a7676ec + DD 0x45caca8f,0x45caca8f + DD 0x9d82821f,0x9d82821f + DD 0x40c9c989,0x40c9c989 + DD 0x877d7dfa,0x877d7dfa + DD 0x15fafaef,0x15fafaef + DD 0xeb5959b2,0xeb5959b2 + DD 0xc947478e,0xc947478e + DD 0x0bf0f0fb,0x0bf0f0fb + DD 0xecadad41,0xecadad41 + DD 0x67d4d4b3,0x67d4d4b3 + DD 0xfda2a25f,0xfda2a25f + DD 0xeaafaf45,0xeaafaf45 + DD 0xbf9c9c23,0xbf9c9c23 + DD 0xf7a4a453,0xf7a4a453 + DD 0x967272e4,0x967272e4 + DD 0x5bc0c09b,0x5bc0c09b + DD 0xc2b7b775,0xc2b7b775 + DD 0x1cfdfde1,0x1cfdfde1 + DD 0xae93933d,0xae93933d + DD 0x6a26264c,0x6a26264c + DD 0x5a36366c,0x5a36366c + DD 0x413f3f7e,0x413f3f7e + DD 0x02f7f7f5,0x02f7f7f5 + DD 0x4fcccc83,0x4fcccc83 + DD 0x5c343468,0x5c343468 + DD 0xf4a5a551,0xf4a5a551 + DD 0x34e5e5d1,0x34e5e5d1 + DD 0x08f1f1f9,0x08f1f1f9 + DD 0x937171e2,0x937171e2 + DD 0x73d8d8ab,0x73d8d8ab + DD 0x53313162,0x53313162 + DD 0x3f15152a,0x3f15152a + DD 0x0c040408,0x0c040408 + DD 0x52c7c795,0x52c7c795 + DD 0x65232346,0x65232346 + DD 0x5ec3c39d,0x5ec3c39d + DD 0x28181830,0x28181830 + DD 0xa1969637,0xa1969637 + DD 0x0f05050a,0x0f05050a + DD 0xb59a9a2f,0xb59a9a2f + DD 0x0907070e,0x0907070e + DD 0x36121224,0x36121224 + DD 0x9b80801b,0x9b80801b + DD 0x3de2e2df,0x3de2e2df + DD 0x26ebebcd,0x26ebebcd + DD 0x6927274e,0x6927274e + DD 0xcdb2b27f,0xcdb2b27f + DD 0x9f7575ea,0x9f7575ea + DD 0x1b090912,0x1b090912 + DD 0x9e83831d,0x9e83831d + DD 0x742c2c58,0x742c2c58 + DD 0x2e1a1a34,0x2e1a1a34 + DD 0x2d1b1b36,0x2d1b1b36 + DD 0xb26e6edc,0xb26e6edc + DD 0xee5a5ab4,0xee5a5ab4 + DD 0xfba0a05b,0xfba0a05b + DD 0xf65252a4,0xf65252a4 + DD 0x4d3b3b76,0x4d3b3b76 + DD 0x61d6d6b7,0x61d6d6b7 + DD 0xceb3b37d,0xceb3b37d + DD 0x7b292952,0x7b292952 + DD 0x3ee3e3dd,0x3ee3e3dd + DD 0x712f2f5e,0x712f2f5e + DD 0x97848413,0x97848413 + DD 0xf55353a6,0xf55353a6 + DD 0x68d1d1b9,0x68d1d1b9 + DD 0x00000000,0x00000000 + DD 0x2cededc1,0x2cededc1 + DD 0x60202040,0x60202040 + DD 0x1ffcfce3,0x1ffcfce3 + DD 0xc8b1b179,0xc8b1b179 + DD 0xed5b5bb6,0xed5b5bb6 + DD 0xbe6a6ad4,0xbe6a6ad4 + DD 0x46cbcb8d,0x46cbcb8d + DD 0xd9bebe67,0xd9bebe67 + DD 0x4b393972,0x4b393972 + DD 0xde4a4a94,0xde4a4a94 + DD 0xd44c4c98,0xd44c4c98 + DD 0xe85858b0,0xe85858b0 + DD 0x4acfcf85,0x4acfcf85 + DD 0x6bd0d0bb,0x6bd0d0bb + DD 0x2aefefc5,0x2aefefc5 + DD 0xe5aaaa4f,0xe5aaaa4f + DD 0x16fbfbed,0x16fbfbed + DD 0xc5434386,0xc5434386 + DD 0xd74d4d9a,0xd74d4d9a + DD 0x55333366,0x55333366 + DD 0x94858511,0x94858511 + DD 0xcf45458a,0xcf45458a + DD 0x10f9f9e9,0x10f9f9e9 + DD 0x06020204,0x06020204 + DD 0x817f7ffe,0x817f7ffe + DD 0xf05050a0,0xf05050a0 + DD 0x443c3c78,0x443c3c78 + DD 0xba9f9f25,0xba9f9f25 + DD 0xe3a8a84b,0xe3a8a84b + DD 0xf35151a2,0xf35151a2 + DD 0xfea3a35d,0xfea3a35d + DD 0xc0404080,0xc0404080 + DD 0x8a8f8f05,0x8a8f8f05 + DD 0xad92923f,0xad92923f + DD 0xbc9d9d21,0xbc9d9d21 + DD 0x48383870,0x48383870 + DD 0x04f5f5f1,0x04f5f5f1 + DD 0xdfbcbc63,0xdfbcbc63 + DD 0xc1b6b677,0xc1b6b677 + DD 0x75dadaaf,0x75dadaaf + DD 0x63212142,0x63212142 + DD 0x30101020,0x30101020 + DD 0x1affffe5,0x1affffe5 + DD 0x0ef3f3fd,0x0ef3f3fd + DD 0x6dd2d2bf,0x6dd2d2bf + DD 0x4ccdcd81,0x4ccdcd81 + DD 0x140c0c18,0x140c0c18 + DD 0x35131326,0x35131326 + DD 0x2fececc3,0x2fececc3 + DD 0xe15f5fbe,0xe15f5fbe + DD 0xa2979735,0xa2979735 + DD 0xcc444488,0xcc444488 + DD 0x3917172e,0x3917172e + DD 0x57c4c493,0x57c4c493 + DD 0xf2a7a755,0xf2a7a755 + DD 0x827e7efc,0x827e7efc + DD 0x473d3d7a,0x473d3d7a + DD 0xac6464c8,0xac6464c8 + DD 0xe75d5dba,0xe75d5dba + DD 0x2b191932,0x2b191932 + DD 0x957373e6,0x957373e6 + DD 0xa06060c0,0xa06060c0 + DD 0x98818119,0x98818119 + DD 0xd14f4f9e,0xd14f4f9e + DD 0x7fdcdca3,0x7fdcdca3 + DD 0x66222244,0x66222244 + DD 0x7e2a2a54,0x7e2a2a54 + DD 0xab90903b,0xab90903b + DD 0x8388880b,0x8388880b + DD 0xca46468c,0xca46468c + DD 0x29eeeec7,0x29eeeec7 + DD 0xd3b8b86b,0xd3b8b86b + DD 0x3c141428,0x3c141428 + DD 0x79dedea7,0x79dedea7 + DD 0xe25e5ebc,0xe25e5ebc + DD 0x1d0b0b16,0x1d0b0b16 + DD 0x76dbdbad,0x76dbdbad + DD 0x3be0e0db,0x3be0e0db + DD 0x56323264,0x56323264 + DD 0x4e3a3a74,0x4e3a3a74 + DD 0x1e0a0a14,0x1e0a0a14 + DD 0xdb494992,0xdb494992 + DD 0x0a06060c,0x0a06060c + DD 0x6c242448,0x6c242448 + DD 0xe45c5cb8,0xe45c5cb8 + DD 0x5dc2c29f,0x5dc2c29f + DD 0x6ed3d3bd,0x6ed3d3bd + DD 0xefacac43,0xefacac43 + DD 0xa66262c4,0xa66262c4 + DD 0xa8919139,0xa8919139 + DD 0xa4959531,0xa4959531 + DD 0x37e4e4d3,0x37e4e4d3 + DD 0x8b7979f2,0x8b7979f2 + DD 0x32e7e7d5,0x32e7e7d5 + DD 0x43c8c88b,0x43c8c88b + DD 0x5937376e,0x5937376e + DD 0xb76d6dda,0xb76d6dda + DD 0x8c8d8d01,0x8c8d8d01 + DD 0x64d5d5b1,0x64d5d5b1 + DD 0xd24e4e9c,0xd24e4e9c + DD 0xe0a9a949,0xe0a9a949 + DD 0xb46c6cd8,0xb46c6cd8 + DD 0xfa5656ac,0xfa5656ac + DD 0x07f4f4f3,0x07f4f4f3 + DD 0x25eaeacf,0x25eaeacf + DD 0xaf6565ca,0xaf6565ca + DD 0x8e7a7af4,0x8e7a7af4 + DD 0xe9aeae47,0xe9aeae47 + DD 0x18080810,0x18080810 + DD 0xd5baba6f,0xd5baba6f + DD 0x887878f0,0x887878f0 + DD 0x6f25254a,0x6f25254a + DD 0x722e2e5c,0x722e2e5c + DD 0x241c1c38,0x241c1c38 + DD 0xf1a6a657,0xf1a6a657 + DD 0xc7b4b473,0xc7b4b473 + DD 0x51c6c697,0x51c6c697 + DD 0x23e8e8cb,0x23e8e8cb + DD 0x7cdddda1,0x7cdddda1 + DD 0x9c7474e8,0x9c7474e8 + DD 0x211f1f3e,0x211f1f3e + DD 0xdd4b4b96,0xdd4b4b96 + DD 0xdcbdbd61,0xdcbdbd61 + DD 0x868b8b0d,0x868b8b0d + DD 0x858a8a0f,0x858a8a0f + DD 0x907070e0,0x907070e0 + DD 0x423e3e7c,0x423e3e7c + DD 0xc4b5b571,0xc4b5b571 + DD 0xaa6666cc,0xaa6666cc + DD 0xd8484890,0xd8484890 + DD 0x05030306,0x05030306 + DD 0x01f6f6f7,0x01f6f6f7 + DD 0x120e0e1c,0x120e0e1c + DD 0xa36161c2,0xa36161c2 + DD 0x5f35356a,0x5f35356a + DD 0xf95757ae,0xf95757ae + DD 0xd0b9b969,0xd0b9b969 + DD 0x91868617,0x91868617 + DD 0x58c1c199,0x58c1c199 + DD 0x271d1d3a,0x271d1d3a + DD 0xb99e9e27,0xb99e9e27 + DD 0x38e1e1d9,0x38e1e1d9 + DD 0x13f8f8eb,0x13f8f8eb + DD 0xb398982b,0xb398982b + DD 0x33111122,0x33111122 + DD 0xbb6969d2,0xbb6969d2 + DD 0x70d9d9a9,0x70d9d9a9 + DD 0x898e8e07,0x898e8e07 + DD 0xa7949433,0xa7949433 + DD 0xb69b9b2d,0xb69b9b2d + DD 0x221e1e3c,0x221e1e3c + DD 0x92878715,0x92878715 + DD 0x20e9e9c9,0x20e9e9c9 + DD 0x49cece87,0x49cece87 + DD 0xff5555aa,0xff5555aa + DD 0x78282850,0x78282850 + DD 0x7adfdfa5,0x7adfdfa5 + DD 0x8f8c8c03,0x8f8c8c03 + DD 0xf8a1a159,0xf8a1a159 + DD 0x80898909,0x80898909 + DD 0x170d0d1a,0x170d0d1a + DD 0xdabfbf65,0xdabfbf65 + DD 0x31e6e6d7,0x31e6e6d7 + DD 0xc6424284,0xc6424284 + DD 0xb86868d0,0xb86868d0 + DD 0xc3414182,0xc3414182 + DD 0xb0999929,0xb0999929 + DD 0x772d2d5a,0x772d2d5a + DD 0x110f0f1e,0x110f0f1e + DD 0xcbb0b07b,0xcbb0b07b + DD 0xfc5454a8,0xfc5454a8 + DD 0xd6bbbb6d,0xd6bbbb6d + DD 0x3a16162c,0x3a16162c +DB 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5 +DB 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76 +DB 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0 +DB 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0 +DB 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc +DB 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15 +DB 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a +DB 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75 +DB 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0 +DB 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84 +DB 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b +DB 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf +DB 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85 +DB 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8 +DB 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5 +DB 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2 +DB 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17 +DB 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73 +DB 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88 +DB 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb +DB 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c +DB 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79 +DB 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9 +DB 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08 +DB 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6 +DB 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a +DB 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e +DB 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e +DB 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94 +DB 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf +DB 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68 +DB 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16 +DB 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5 +DB 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76 +DB 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0 +DB 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0 +DB 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc +DB 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15 +DB 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a +DB 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75 +DB 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0 +DB 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84 +DB 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b +DB 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf +DB 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85 +DB 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8 +DB 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5 +DB 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2 +DB 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17 +DB 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73 +DB 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88 +DB 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb +DB 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c +DB 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79 +DB 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9 +DB 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08 +DB 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6 +DB 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a +DB 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e +DB 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e +DB 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94 +DB 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf +DB 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68 +DB 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16 +DB 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5 +DB 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76 +DB 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0 +DB 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0 +DB 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc +DB 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15 +DB 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a +DB 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75 +DB 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0 +DB 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84 +DB 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b +DB 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf +DB 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85 +DB 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8 +DB 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5 +DB 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2 +DB 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17 +DB 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73 +DB 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88 +DB 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb +DB 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c +DB 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79 +DB 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9 +DB 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08 +DB 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6 +DB 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a +DB 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e +DB 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e +DB 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94 +DB 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf +DB 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68 +DB 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16 +DB 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5 +DB 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76 +DB 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0 +DB 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0 +DB 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc +DB 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15 +DB 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a +DB 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75 +DB 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0 +DB 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84 +DB 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b +DB 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf +DB 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85 +DB 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8 +DB 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5 +DB 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2 +DB 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17 +DB 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73 +DB 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88 +DB 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb +DB 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c +DB 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79 +DB 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9 +DB 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08 +DB 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6 +DB 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a +DB 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e +DB 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e +DB 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94 +DB 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf +DB 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68 +DB 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16 + DD 0x00000001,0x00000002,0x00000004,0x00000008 + DD 0x00000010,0x00000020,0x00000040,0x00000080 + DD 0x0000001b,0x00000036,0x80808080,0x80808080 + DD 0xfefefefe,0xfefefefe,0x1b1b1b1b,0x1b1b1b1b +ALIGN 64 +$L$AES_Td: + DD 0x50a7f451,0x50a7f451 + DD 0x5365417e,0x5365417e + DD 0xc3a4171a,0xc3a4171a + DD 0x965e273a,0x965e273a + DD 0xcb6bab3b,0xcb6bab3b + DD 0xf1459d1f,0xf1459d1f + DD 0xab58faac,0xab58faac + DD 0x9303e34b,0x9303e34b + DD 0x55fa3020,0x55fa3020 + DD 0xf66d76ad,0xf66d76ad + DD 0x9176cc88,0x9176cc88 + DD 0x254c02f5,0x254c02f5 + DD 0xfcd7e54f,0xfcd7e54f + DD 0xd7cb2ac5,0xd7cb2ac5 + DD 0x80443526,0x80443526 + DD 0x8fa362b5,0x8fa362b5 + DD 0x495ab1de,0x495ab1de + DD 0x671bba25,0x671bba25 + DD 0x980eea45,0x980eea45 + DD 0xe1c0fe5d,0xe1c0fe5d + DD 0x02752fc3,0x02752fc3 + DD 0x12f04c81,0x12f04c81 + DD 0xa397468d,0xa397468d + DD 0xc6f9d36b,0xc6f9d36b + DD 0xe75f8f03,0xe75f8f03 + DD 0x959c9215,0x959c9215 + DD 0xeb7a6dbf,0xeb7a6dbf + DD 0xda595295,0xda595295 + DD 0x2d83bed4,0x2d83bed4 + DD 0xd3217458,0xd3217458 + DD 0x2969e049,0x2969e049 + DD 0x44c8c98e,0x44c8c98e + DD 0x6a89c275,0x6a89c275 + DD 0x78798ef4,0x78798ef4 + DD 0x6b3e5899,0x6b3e5899 + DD 0xdd71b927,0xdd71b927 + DD 0xb64fe1be,0xb64fe1be + DD 0x17ad88f0,0x17ad88f0 + DD 0x66ac20c9,0x66ac20c9 + DD 0xb43ace7d,0xb43ace7d + DD 0x184adf63,0x184adf63 + DD 0x82311ae5,0x82311ae5 + DD 0x60335197,0x60335197 + DD 0x457f5362,0x457f5362 + DD 0xe07764b1,0xe07764b1 + DD 0x84ae6bbb,0x84ae6bbb + DD 0x1ca081fe,0x1ca081fe + DD 0x942b08f9,0x942b08f9 + DD 0x58684870,0x58684870 + DD 0x19fd458f,0x19fd458f + DD 0x876cde94,0x876cde94 + DD 0xb7f87b52,0xb7f87b52 + DD 0x23d373ab,0x23d373ab + DD 0xe2024b72,0xe2024b72 + DD 0x578f1fe3,0x578f1fe3 + DD 0x2aab5566,0x2aab5566 + DD 0x0728ebb2,0x0728ebb2 + DD 0x03c2b52f,0x03c2b52f + DD 0x9a7bc586,0x9a7bc586 + DD 0xa50837d3,0xa50837d3 + DD 0xf2872830,0xf2872830 + DD 0xb2a5bf23,0xb2a5bf23 + DD 0xba6a0302,0xba6a0302 + DD 0x5c8216ed,0x5c8216ed + DD 0x2b1ccf8a,0x2b1ccf8a + DD 0x92b479a7,0x92b479a7 + DD 0xf0f207f3,0xf0f207f3 + DD 0xa1e2694e,0xa1e2694e + DD 0xcdf4da65,0xcdf4da65 + DD 0xd5be0506,0xd5be0506 + DD 0x1f6234d1,0x1f6234d1 + DD 0x8afea6c4,0x8afea6c4 + DD 0x9d532e34,0x9d532e34 + DD 0xa055f3a2,0xa055f3a2 + DD 0x32e18a05,0x32e18a05 + DD 0x75ebf6a4,0x75ebf6a4 + DD 0x39ec830b,0x39ec830b + DD 0xaaef6040,0xaaef6040 + DD 0x069f715e,0x069f715e + DD 0x51106ebd,0x51106ebd + DD 0xf98a213e,0xf98a213e + DD 0x3d06dd96,0x3d06dd96 + DD 0xae053edd,0xae053edd + DD 0x46bde64d,0x46bde64d + DD 0xb58d5491,0xb58d5491 + DD 0x055dc471,0x055dc471 + DD 0x6fd40604,0x6fd40604 + DD 0xff155060,0xff155060 + DD 0x24fb9819,0x24fb9819 + DD 0x97e9bdd6,0x97e9bdd6 + DD 0xcc434089,0xcc434089 + DD 0x779ed967,0x779ed967 + DD 0xbd42e8b0,0xbd42e8b0 + DD 0x888b8907,0x888b8907 + DD 0x385b19e7,0x385b19e7 + DD 0xdbeec879,0xdbeec879 + DD 0x470a7ca1,0x470a7ca1 + DD 0xe90f427c,0xe90f427c + DD 0xc91e84f8,0xc91e84f8 + DD 0x00000000,0x00000000 + DD 0x83868009,0x83868009 + DD 0x48ed2b32,0x48ed2b32 + DD 0xac70111e,0xac70111e + DD 0x4e725a6c,0x4e725a6c + DD 0xfbff0efd,0xfbff0efd + DD 0x5638850f,0x5638850f + DD 0x1ed5ae3d,0x1ed5ae3d + DD 0x27392d36,0x27392d36 + DD 0x64d90f0a,0x64d90f0a + DD 0x21a65c68,0x21a65c68 + DD 0xd1545b9b,0xd1545b9b + DD 0x3a2e3624,0x3a2e3624 + DD 0xb1670a0c,0xb1670a0c + DD 0x0fe75793,0x0fe75793 + DD 0xd296eeb4,0xd296eeb4 + DD 0x9e919b1b,0x9e919b1b + DD 0x4fc5c080,0x4fc5c080 + DD 0xa220dc61,0xa220dc61 + DD 0x694b775a,0x694b775a + DD 0x161a121c,0x161a121c + DD 0x0aba93e2,0x0aba93e2 + DD 0xe52aa0c0,0xe52aa0c0 + DD 0x43e0223c,0x43e0223c + DD 0x1d171b12,0x1d171b12 + DD 0x0b0d090e,0x0b0d090e + DD 0xadc78bf2,0xadc78bf2 + DD 0xb9a8b62d,0xb9a8b62d + DD 0xc8a91e14,0xc8a91e14 + DD 0x8519f157,0x8519f157 + DD 0x4c0775af,0x4c0775af + DD 0xbbdd99ee,0xbbdd99ee + DD 0xfd607fa3,0xfd607fa3 + DD 0x9f2601f7,0x9f2601f7 + DD 0xbcf5725c,0xbcf5725c + DD 0xc53b6644,0xc53b6644 + DD 0x347efb5b,0x347efb5b + DD 0x7629438b,0x7629438b + DD 0xdcc623cb,0xdcc623cb + DD 0x68fcedb6,0x68fcedb6 + DD 0x63f1e4b8,0x63f1e4b8 + DD 0xcadc31d7,0xcadc31d7 + DD 0x10856342,0x10856342 + DD 0x40229713,0x40229713 + DD 0x2011c684,0x2011c684 + DD 0x7d244a85,0x7d244a85 + DD 0xf83dbbd2,0xf83dbbd2 + DD 0x1132f9ae,0x1132f9ae + DD 0x6da129c7,0x6da129c7 + DD 0x4b2f9e1d,0x4b2f9e1d + DD 0xf330b2dc,0xf330b2dc + DD 0xec52860d,0xec52860d + DD 0xd0e3c177,0xd0e3c177 + DD 0x6c16b32b,0x6c16b32b + DD 0x99b970a9,0x99b970a9 + DD 0xfa489411,0xfa489411 + DD 0x2264e947,0x2264e947 + DD 0xc48cfca8,0xc48cfca8 + DD 0x1a3ff0a0,0x1a3ff0a0 + DD 0xd82c7d56,0xd82c7d56 + DD 0xef903322,0xef903322 + DD 0xc74e4987,0xc74e4987 + DD 0xc1d138d9,0xc1d138d9 + DD 0xfea2ca8c,0xfea2ca8c + DD 0x360bd498,0x360bd498 + DD 0xcf81f5a6,0xcf81f5a6 + DD 0x28de7aa5,0x28de7aa5 + DD 0x268eb7da,0x268eb7da + DD 0xa4bfad3f,0xa4bfad3f + DD 0xe49d3a2c,0xe49d3a2c + DD 0x0d927850,0x0d927850 + DD 0x9bcc5f6a,0x9bcc5f6a + DD 0x62467e54,0x62467e54 + DD 0xc2138df6,0xc2138df6 + DD 0xe8b8d890,0xe8b8d890 + DD 0x5ef7392e,0x5ef7392e + DD 0xf5afc382,0xf5afc382 + DD 0xbe805d9f,0xbe805d9f + DD 0x7c93d069,0x7c93d069 + DD 0xa92dd56f,0xa92dd56f + DD 0xb31225cf,0xb31225cf + DD 0x3b99acc8,0x3b99acc8 + DD 0xa77d1810,0xa77d1810 + DD 0x6e639ce8,0x6e639ce8 + DD 0x7bbb3bdb,0x7bbb3bdb + DD 0x097826cd,0x097826cd + DD 0xf418596e,0xf418596e + DD 0x01b79aec,0x01b79aec + DD 0xa89a4f83,0xa89a4f83 + DD 0x656e95e6,0x656e95e6 + DD 0x7ee6ffaa,0x7ee6ffaa + DD 0x08cfbc21,0x08cfbc21 + DD 0xe6e815ef,0xe6e815ef + DD 0xd99be7ba,0xd99be7ba + DD 0xce366f4a,0xce366f4a + DD 0xd4099fea,0xd4099fea + DD 0xd67cb029,0xd67cb029 + DD 0xafb2a431,0xafb2a431 + DD 0x31233f2a,0x31233f2a + DD 0x3094a5c6,0x3094a5c6 + DD 0xc066a235,0xc066a235 + DD 0x37bc4e74,0x37bc4e74 + DD 0xa6ca82fc,0xa6ca82fc + DD 0xb0d090e0,0xb0d090e0 + DD 0x15d8a733,0x15d8a733 + DD 0x4a9804f1,0x4a9804f1 + DD 0xf7daec41,0xf7daec41 + DD 0x0e50cd7f,0x0e50cd7f + DD 0x2ff69117,0x2ff69117 + DD 0x8dd64d76,0x8dd64d76 + DD 0x4db0ef43,0x4db0ef43 + DD 0x544daacc,0x544daacc + DD 0xdf0496e4,0xdf0496e4 + DD 0xe3b5d19e,0xe3b5d19e + DD 0x1b886a4c,0x1b886a4c + DD 0xb81f2cc1,0xb81f2cc1 + DD 0x7f516546,0x7f516546 + DD 0x04ea5e9d,0x04ea5e9d + DD 0x5d358c01,0x5d358c01 + DD 0x737487fa,0x737487fa + DD 0x2e410bfb,0x2e410bfb + DD 0x5a1d67b3,0x5a1d67b3 + DD 0x52d2db92,0x52d2db92 + DD 0x335610e9,0x335610e9 + DD 0x1347d66d,0x1347d66d + DD 0x8c61d79a,0x8c61d79a + DD 0x7a0ca137,0x7a0ca137 + DD 0x8e14f859,0x8e14f859 + DD 0x893c13eb,0x893c13eb + DD 0xee27a9ce,0xee27a9ce + DD 0x35c961b7,0x35c961b7 + DD 0xede51ce1,0xede51ce1 + DD 0x3cb1477a,0x3cb1477a + DD 0x59dfd29c,0x59dfd29c + DD 0x3f73f255,0x3f73f255 + DD 0x79ce1418,0x79ce1418 + DD 0xbf37c773,0xbf37c773 + DD 0xeacdf753,0xeacdf753 + DD 0x5baafd5f,0x5baafd5f + DD 0x146f3ddf,0x146f3ddf + DD 0x86db4478,0x86db4478 + DD 0x81f3afca,0x81f3afca + DD 0x3ec468b9,0x3ec468b9 + DD 0x2c342438,0x2c342438 + DD 0x5f40a3c2,0x5f40a3c2 + DD 0x72c31d16,0x72c31d16 + DD 0x0c25e2bc,0x0c25e2bc + DD 0x8b493c28,0x8b493c28 + DD 0x41950dff,0x41950dff + DD 0x7101a839,0x7101a839 + DD 0xdeb30c08,0xdeb30c08 + DD 0x9ce4b4d8,0x9ce4b4d8 + DD 0x90c15664,0x90c15664 + DD 0x6184cb7b,0x6184cb7b + DD 0x70b632d5,0x70b632d5 + DD 0x745c6c48,0x745c6c48 + DD 0x4257b8d0,0x4257b8d0 +DB 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38 +DB 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb +DB 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87 +DB 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb +DB 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d +DB 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e +DB 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2 +DB 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25 +DB 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16 +DB 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92 +DB 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda +DB 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84 +DB 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a +DB 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06 +DB 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02 +DB 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b +DB 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea +DB 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73 +DB 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85 +DB 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e +DB 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89 +DB 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b +DB 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20 +DB 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4 +DB 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31 +DB 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f +DB 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d +DB 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef +DB 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0 +DB 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61 +DB 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26 +DB 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d + DD 0x80808080,0x80808080,0xfefefefe,0xfefefefe + DD 0x1b1b1b1b,0x1b1b1b1b,0,0 +DB 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38 +DB 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb +DB 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87 +DB 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb +DB 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d +DB 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e +DB 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2 +DB 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25 +DB 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16 +DB 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92 +DB 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda +DB 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84 +DB 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a +DB 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06 +DB 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02 +DB 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b +DB 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea +DB 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73 +DB 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85 +DB 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e +DB 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89 +DB 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b +DB 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20 +DB 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4 +DB 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31 +DB 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f +DB 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d +DB 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef +DB 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0 +DB 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61 +DB 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26 +DB 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d + DD 0x80808080,0x80808080,0xfefefefe,0xfefefefe + DD 0x1b1b1b1b,0x1b1b1b1b,0,0 +DB 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38 +DB 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb +DB 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87 +DB 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb +DB 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d +DB 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e +DB 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2 +DB 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25 +DB 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16 +DB 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92 +DB 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda +DB 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84 +DB 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a +DB 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06 +DB 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02 +DB 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b +DB 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea +DB 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73 +DB 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85 +DB 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e +DB 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89 +DB 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b +DB 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20 +DB 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4 +DB 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31 +DB 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f +DB 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d +DB 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef +DB 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0 +DB 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61 +DB 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26 +DB 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d + DD 0x80808080,0x80808080,0xfefefefe,0xfefefefe + DD 0x1b1b1b1b,0x1b1b1b1b,0,0 +DB 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38 +DB 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb +DB 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87 +DB 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb +DB 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d +DB 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e +DB 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2 +DB 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25 +DB 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16 +DB 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92 +DB 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda +DB 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84 +DB 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a +DB 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06 +DB 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02 +DB 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b +DB 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea +DB 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73 +DB 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85 +DB 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e +DB 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89 +DB 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b +DB 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20 +DB 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4 +DB 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31 +DB 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f +DB 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d +DB 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef +DB 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0 +DB 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61 +DB 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26 +DB 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d + DD 0x80808080,0x80808080,0xfefefefe,0xfefefefe + DD 0x1b1b1b1b,0x1b1b1b1b,0,0 +DB 65,69,83,32,102,111,114,32,120,56,54,95,54,52,44,32 +DB 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 +DB 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 +DB 62,0 +ALIGN 64 +EXTERN __imp_RtlVirtualUnwind + +ALIGN 16 +block_se_handler: + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD[120+r8] + mov rbx,QWORD[248+r8] + + mov rsi,QWORD[8+r9] + mov r11,QWORD[56+r9] + + mov r10d,DWORD[r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jb NEAR $L$in_block_prologue + + mov rax,QWORD[152+r8] + + mov r10d,DWORD[4+r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jae NEAR $L$in_block_prologue + + mov rax,QWORD[24+rax] + lea rax,[48+rax] + + mov rbx,QWORD[((-8))+rax] + mov rbp,QWORD[((-16))+rax] + mov r12,QWORD[((-24))+rax] + mov r13,QWORD[((-32))+rax] + mov r14,QWORD[((-40))+rax] + mov r15,QWORD[((-48))+rax] + mov QWORD[144+r8],rbx + mov QWORD[160+r8],rbp + mov QWORD[216+r8],r12 + mov QWORD[224+r8],r13 + mov QWORD[232+r8],r14 + mov QWORD[240+r8],r15 + +$L$in_block_prologue: + mov rdi,QWORD[8+rax] + mov rsi,QWORD[16+rax] + mov QWORD[152+r8],rax + mov QWORD[168+r8],rsi + mov QWORD[176+r8],rdi + + jmp NEAR $L$common_seh_exit + + + +ALIGN 16 +key_se_handler: + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD[120+r8] + mov rbx,QWORD[248+r8] + + mov rsi,QWORD[8+r9] + mov r11,QWORD[56+r9] + + mov r10d,DWORD[r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jb NEAR $L$in_key_prologue + + mov rax,QWORD[152+r8] + + mov r10d,DWORD[4+r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jae NEAR $L$in_key_prologue + + lea rax,[56+rax] + + mov rbx,QWORD[((-8))+rax] + mov rbp,QWORD[((-16))+rax] + mov r12,QWORD[((-24))+rax] + mov r13,QWORD[((-32))+rax] + mov r14,QWORD[((-40))+rax] + mov r15,QWORD[((-48))+rax] + mov QWORD[144+r8],rbx + mov QWORD[160+r8],rbp + mov QWORD[216+r8],r12 + mov QWORD[224+r8],r13 + mov QWORD[232+r8],r14 + mov QWORD[240+r8],r15 + +$L$in_key_prologue: + mov rdi,QWORD[8+rax] + mov rsi,QWORD[16+rax] + mov QWORD[152+r8],rax + mov QWORD[168+r8],rsi + mov QWORD[176+r8],rdi + + jmp NEAR $L$common_seh_exit + + + +ALIGN 16 +cbc_se_handler: + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD[120+r8] + mov rbx,QWORD[248+r8] + + lea r10,[$L$cbc_prologue] + cmp rbx,r10 + jb NEAR $L$in_cbc_prologue + + lea r10,[$L$cbc_fast_body] + cmp rbx,r10 + jb NEAR $L$in_cbc_frame_setup + + lea r10,[$L$cbc_slow_prologue] + cmp rbx,r10 + jb NEAR $L$in_cbc_body + + lea r10,[$L$cbc_slow_body] + cmp rbx,r10 + jb NEAR $L$in_cbc_frame_setup + +$L$in_cbc_body: + mov rax,QWORD[152+r8] + + lea r10,[$L$cbc_epilogue] + cmp rbx,r10 + jae NEAR $L$in_cbc_prologue + + lea rax,[8+rax] + + lea r10,[$L$cbc_popfq] + cmp rbx,r10 + jae NEAR $L$in_cbc_prologue + + mov rax,QWORD[8+rax] + lea rax,[56+rax] + +$L$in_cbc_frame_setup: + mov rbx,QWORD[((-16))+rax] + mov rbp,QWORD[((-24))+rax] + mov r12,QWORD[((-32))+rax] + mov r13,QWORD[((-40))+rax] + mov r14,QWORD[((-48))+rax] + mov r15,QWORD[((-56))+rax] + mov QWORD[144+r8],rbx + mov QWORD[160+r8],rbp + mov QWORD[216+r8],r12 + mov QWORD[224+r8],r13 + mov QWORD[232+r8],r14 + mov QWORD[240+r8],r15 + +$L$in_cbc_prologue: + mov rdi,QWORD[8+rax] + mov rsi,QWORD[16+rax] + mov QWORD[152+r8],rax + mov QWORD[168+r8],rsi + mov QWORD[176+r8],rdi + +$L$common_seh_exit: + + mov rdi,QWORD[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0xa548f3fc + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD[8+rsi] + mov r8,QWORD[rsi] + mov r9,QWORD[16+rsi] + mov r10,QWORD[40+rsi] + lea r11,[56+rsi] + lea r12,[24+rsi] + mov QWORD[32+rsp],r10 + mov QWORD[40+rsp],r11 + mov QWORD[48+rsp],r12 + mov QWORD[56+rsp],rcx + call QWORD[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret + + +section .pdata rdata align=4 +ALIGN 4 + DD $L$SEH_begin_asm_AES_encrypt wrt ..imagebase + DD $L$SEH_end_asm_AES_encrypt wrt ..imagebase + DD $L$SEH_info_asm_AES_encrypt wrt ..imagebase + + DD $L$SEH_begin_asm_AES_decrypt wrt ..imagebase + DD $L$SEH_end_asm_AES_decrypt wrt ..imagebase + DD $L$SEH_info_asm_AES_decrypt wrt ..imagebase + + DD $L$SEH_begin_asm_AES_set_encrypt_key wrt ..imagebase + DD $L$SEH_end_asm_AES_set_encrypt_key wrt ..imagebase + DD $L$SEH_info_asm_AES_set_encrypt_key wrt ..imagebase + + DD $L$SEH_begin_asm_AES_set_decrypt_key wrt ..imagebase + DD $L$SEH_end_asm_AES_set_decrypt_key wrt ..imagebase + DD $L$SEH_info_asm_AES_set_decrypt_key wrt ..imagebase + + DD $L$SEH_begin_asm_AES_cbc_encrypt wrt ..imagebase + DD $L$SEH_end_asm_AES_cbc_encrypt wrt ..imagebase + DD $L$SEH_info_asm_AES_cbc_encrypt wrt ..imagebase + +section .xdata rdata align=8 +ALIGN 8 +$L$SEH_info_asm_AES_encrypt: +DB 9,0,0,0 + DD block_se_handler wrt ..imagebase + DD $L$enc_prologue wrt ..imagebase,$L$enc_epilogue wrt ..imagebase +$L$SEH_info_asm_AES_decrypt: +DB 9,0,0,0 + DD block_se_handler wrt ..imagebase + DD $L$dec_prologue wrt ..imagebase,$L$dec_epilogue wrt ..imagebase +$L$SEH_info_asm_AES_set_encrypt_key: +DB 9,0,0,0 + DD key_se_handler wrt ..imagebase + DD $L$enc_key_prologue wrt ..imagebase,$L$enc_key_epilogue wrt ..imagebase +$L$SEH_info_asm_AES_set_decrypt_key: +DB 9,0,0,0 + DD key_se_handler wrt ..imagebase + DD $L$dec_key_prologue wrt ..imagebase,$L$dec_key_epilogue wrt ..imagebase +$L$SEH_info_asm_AES_cbc_encrypt: +DB 9,0,0,0 + DD cbc_se_handler wrt ..imagebase diff --git a/third_party/boringssl/win-x86_64/crypto/aes/aesni-x86_64.asm b/third_party/boringssl/win-x86_64/crypto/aes/aesni-x86_64.asm new file mode 100644 index 00000000000..cf313d1ae9a --- /dev/null +++ b/third_party/boringssl/win-x86_64/crypto/aes/aesni-x86_64.asm @@ -0,0 +1,4027 @@ +default rel +%define XMMWORD +%define YMMWORD +%define ZMMWORD +section .text code align=64 + +EXTERN OPENSSL_ia32cap_P +global aesni_encrypt + +ALIGN 16 +aesni_encrypt: + movups xmm2,XMMWORD[rcx] + mov eax,DWORD[240+r8] + movups xmm0,XMMWORD[r8] + movups xmm1,XMMWORD[16+r8] + lea r8,[32+r8] + xorps xmm2,xmm0 +$L$oop_enc1_1: +DB 102,15,56,220,209 + dec eax + movups xmm1,XMMWORD[r8] + lea r8,[16+r8] + jnz NEAR $L$oop_enc1_1 +DB 102,15,56,221,209 + pxor xmm0,xmm0 + pxor xmm1,xmm1 + movups XMMWORD[rdx],xmm2 + pxor xmm2,xmm2 + DB 0F3h,0C3h ;repret + + +global aesni_decrypt + +ALIGN 16 +aesni_decrypt: + movups xmm2,XMMWORD[rcx] + mov eax,DWORD[240+r8] + movups xmm0,XMMWORD[r8] + movups xmm1,XMMWORD[16+r8] + lea r8,[32+r8] + xorps xmm2,xmm0 +$L$oop_dec1_2: +DB 102,15,56,222,209 + dec eax + movups xmm1,XMMWORD[r8] + lea r8,[16+r8] + jnz NEAR $L$oop_dec1_2 +DB 102,15,56,223,209 + pxor xmm0,xmm0 + pxor xmm1,xmm1 + movups XMMWORD[rdx],xmm2 + pxor xmm2,xmm2 + DB 0F3h,0C3h ;repret + + +ALIGN 16 +_aesni_encrypt2: + movups xmm0,XMMWORD[rcx] + shl eax,4 + movups xmm1,XMMWORD[16+rcx] + xorps xmm2,xmm0 + xorps xmm3,xmm0 + movups xmm0,XMMWORD[32+rcx] + lea rcx,[32+rax*1+rcx] + neg rax + add rax,16 + +$L$enc_loop2: +DB 102,15,56,220,209 +DB 102,15,56,220,217 + movups xmm1,XMMWORD[rax*1+rcx] + add rax,32 +DB 102,15,56,220,208 +DB 102,15,56,220,216 + movups xmm0,XMMWORD[((-16))+rax*1+rcx] + jnz NEAR $L$enc_loop2 + +DB 102,15,56,220,209 +DB 102,15,56,220,217 +DB 102,15,56,221,208 +DB 102,15,56,221,216 + DB 0F3h,0C3h ;repret + + +ALIGN 16 +_aesni_decrypt2: + movups xmm0,XMMWORD[rcx] + shl eax,4 + movups xmm1,XMMWORD[16+rcx] + xorps xmm2,xmm0 + xorps xmm3,xmm0 + movups xmm0,XMMWORD[32+rcx] + lea rcx,[32+rax*1+rcx] + neg rax + add rax,16 + +$L$dec_loop2: +DB 102,15,56,222,209 +DB 102,15,56,222,217 + movups xmm1,XMMWORD[rax*1+rcx] + add rax,32 +DB 102,15,56,222,208 +DB 102,15,56,222,216 + movups xmm0,XMMWORD[((-16))+rax*1+rcx] + jnz NEAR $L$dec_loop2 + +DB 102,15,56,222,209 +DB 102,15,56,222,217 +DB 102,15,56,223,208 +DB 102,15,56,223,216 + DB 0F3h,0C3h ;repret + + +ALIGN 16 +_aesni_encrypt3: + movups xmm0,XMMWORD[rcx] + shl eax,4 + movups xmm1,XMMWORD[16+rcx] + xorps xmm2,xmm0 + xorps xmm3,xmm0 + xorps xmm4,xmm0 + movups xmm0,XMMWORD[32+rcx] + lea rcx,[32+rax*1+rcx] + neg rax + add rax,16 + +$L$enc_loop3: +DB 102,15,56,220,209 +DB 102,15,56,220,217 +DB 102,15,56,220,225 + movups xmm1,XMMWORD[rax*1+rcx] + add rax,32 +DB 102,15,56,220,208 +DB 102,15,56,220,216 +DB 102,15,56,220,224 + movups xmm0,XMMWORD[((-16))+rax*1+rcx] + jnz NEAR $L$enc_loop3 + +DB 102,15,56,220,209 +DB 102,15,56,220,217 +DB 102,15,56,220,225 +DB 102,15,56,221,208 +DB 102,15,56,221,216 +DB 102,15,56,221,224 + DB 0F3h,0C3h ;repret + + +ALIGN 16 +_aesni_decrypt3: + movups xmm0,XMMWORD[rcx] + shl eax,4 + movups xmm1,XMMWORD[16+rcx] + xorps xmm2,xmm0 + xorps xmm3,xmm0 + xorps xmm4,xmm0 + movups xmm0,XMMWORD[32+rcx] + lea rcx,[32+rax*1+rcx] + neg rax + add rax,16 + +$L$dec_loop3: +DB 102,15,56,222,209 +DB 102,15,56,222,217 +DB 102,15,56,222,225 + movups xmm1,XMMWORD[rax*1+rcx] + add rax,32 +DB 102,15,56,222,208 +DB 102,15,56,222,216 +DB 102,15,56,222,224 + movups xmm0,XMMWORD[((-16))+rax*1+rcx] + jnz NEAR $L$dec_loop3 + +DB 102,15,56,222,209 +DB 102,15,56,222,217 +DB 102,15,56,222,225 +DB 102,15,56,223,208 +DB 102,15,56,223,216 +DB 102,15,56,223,224 + DB 0F3h,0C3h ;repret + + +ALIGN 16 +_aesni_encrypt4: + movups xmm0,XMMWORD[rcx] + shl eax,4 + movups xmm1,XMMWORD[16+rcx] + xorps xmm2,xmm0 + xorps xmm3,xmm0 + xorps xmm4,xmm0 + xorps xmm5,xmm0 + movups xmm0,XMMWORD[32+rcx] + lea rcx,[32+rax*1+rcx] + neg rax +DB 0x0f,0x1f,0x00 + add rax,16 + +$L$enc_loop4: +DB 102,15,56,220,209 +DB 102,15,56,220,217 +DB 102,15,56,220,225 +DB 102,15,56,220,233 + movups xmm1,XMMWORD[rax*1+rcx] + add rax,32 +DB 102,15,56,220,208 +DB 102,15,56,220,216 +DB 102,15,56,220,224 +DB 102,15,56,220,232 + movups xmm0,XMMWORD[((-16))+rax*1+rcx] + jnz NEAR $L$enc_loop4 + +DB 102,15,56,220,209 +DB 102,15,56,220,217 +DB 102,15,56,220,225 +DB 102,15,56,220,233 +DB 102,15,56,221,208 +DB 102,15,56,221,216 +DB 102,15,56,221,224 +DB 102,15,56,221,232 + DB 0F3h,0C3h ;repret + + +ALIGN 16 +_aesni_decrypt4: + movups xmm0,XMMWORD[rcx] + shl eax,4 + movups xmm1,XMMWORD[16+rcx] + xorps xmm2,xmm0 + xorps xmm3,xmm0 + xorps xmm4,xmm0 + xorps xmm5,xmm0 + movups xmm0,XMMWORD[32+rcx] + lea rcx,[32+rax*1+rcx] + neg rax +DB 0x0f,0x1f,0x00 + add rax,16 + +$L$dec_loop4: +DB 102,15,56,222,209 +DB 102,15,56,222,217 +DB 102,15,56,222,225 +DB 102,15,56,222,233 + movups xmm1,XMMWORD[rax*1+rcx] + add rax,32 +DB 102,15,56,222,208 +DB 102,15,56,222,216 +DB 102,15,56,222,224 +DB 102,15,56,222,232 + movups xmm0,XMMWORD[((-16))+rax*1+rcx] + jnz NEAR $L$dec_loop4 + +DB 102,15,56,222,209 +DB 102,15,56,222,217 +DB 102,15,56,222,225 +DB 102,15,56,222,233 +DB 102,15,56,223,208 +DB 102,15,56,223,216 +DB 102,15,56,223,224 +DB 102,15,56,223,232 + DB 0F3h,0C3h ;repret + + +ALIGN 16 +_aesni_encrypt6: + movups xmm0,XMMWORD[rcx] + shl eax,4 + movups xmm1,XMMWORD[16+rcx] + xorps xmm2,xmm0 + pxor xmm3,xmm0 + pxor xmm4,xmm0 +DB 102,15,56,220,209 + lea rcx,[32+rax*1+rcx] + neg rax +DB 102,15,56,220,217 + pxor xmm5,xmm0 + pxor xmm6,xmm0 +DB 102,15,56,220,225 + pxor xmm7,xmm0 + movups xmm0,XMMWORD[rax*1+rcx] + add rax,16 + jmp NEAR $L$enc_loop6_enter +ALIGN 16 +$L$enc_loop6: +DB 102,15,56,220,209 +DB 102,15,56,220,217 +DB 102,15,56,220,225 +$L$enc_loop6_enter: +DB 102,15,56,220,233 +DB 102,15,56,220,241 +DB 102,15,56,220,249 + movups xmm1,XMMWORD[rax*1+rcx] + add rax,32 +DB 102,15,56,220,208 +DB 102,15,56,220,216 +DB 102,15,56,220,224 +DB 102,15,56,220,232 +DB 102,15,56,220,240 +DB 102,15,56,220,248 + movups xmm0,XMMWORD[((-16))+rax*1+rcx] + jnz NEAR $L$enc_loop6 + +DB 102,15,56,220,209 +DB 102,15,56,220,217 +DB 102,15,56,220,225 +DB 102,15,56,220,233 +DB 102,15,56,220,241 +DB 102,15,56,220,249 +DB 102,15,56,221,208 +DB 102,15,56,221,216 +DB 102,15,56,221,224 +DB 102,15,56,221,232 +DB 102,15,56,221,240 +DB 102,15,56,221,248 + DB 0F3h,0C3h ;repret + + +ALIGN 16 +_aesni_decrypt6: + movups xmm0,XMMWORD[rcx] + shl eax,4 + movups xmm1,XMMWORD[16+rcx] + xorps xmm2,xmm0 + pxor xmm3,xmm0 + pxor xmm4,xmm0 +DB 102,15,56,222,209 + lea rcx,[32+rax*1+rcx] + neg rax +DB 102,15,56,222,217 + pxor xmm5,xmm0 + pxor xmm6,xmm0 +DB 102,15,56,222,225 + pxor xmm7,xmm0 + movups xmm0,XMMWORD[rax*1+rcx] + add rax,16 + jmp NEAR $L$dec_loop6_enter +ALIGN 16 +$L$dec_loop6: +DB 102,15,56,222,209 +DB 102,15,56,222,217 +DB 102,15,56,222,225 +$L$dec_loop6_enter: +DB 102,15,56,222,233 +DB 102,15,56,222,241 +DB 102,15,56,222,249 + movups xmm1,XMMWORD[rax*1+rcx] + add rax,32 +DB 102,15,56,222,208 +DB 102,15,56,222,216 +DB 102,15,56,222,224 +DB 102,15,56,222,232 +DB 102,15,56,222,240 +DB 102,15,56,222,248 + movups xmm0,XMMWORD[((-16))+rax*1+rcx] + jnz NEAR $L$dec_loop6 + +DB 102,15,56,222,209 +DB 102,15,56,222,217 +DB 102,15,56,222,225 +DB 102,15,56,222,233 +DB 102,15,56,222,241 +DB 102,15,56,222,249 +DB 102,15,56,223,208 +DB 102,15,56,223,216 +DB 102,15,56,223,224 +DB 102,15,56,223,232 +DB 102,15,56,223,240 +DB 102,15,56,223,248 + DB 0F3h,0C3h ;repret + + +ALIGN 16 +_aesni_encrypt8: + movups xmm0,XMMWORD[rcx] + shl eax,4 + movups xmm1,XMMWORD[16+rcx] + xorps xmm2,xmm0 + xorps xmm3,xmm0 + pxor xmm4,xmm0 + pxor xmm5,xmm0 + pxor xmm6,xmm0 + lea rcx,[32+rax*1+rcx] + neg rax +DB 102,15,56,220,209 + pxor xmm7,xmm0 + pxor xmm8,xmm0 +DB 102,15,56,220,217 + pxor xmm9,xmm0 + movups xmm0,XMMWORD[rax*1+rcx] + add rax,16 + jmp NEAR $L$enc_loop8_inner +ALIGN 16 +$L$enc_loop8: +DB 102,15,56,220,209 +DB 102,15,56,220,217 +$L$enc_loop8_inner: +DB 102,15,56,220,225 +DB 102,15,56,220,233 +DB 102,15,56,220,241 +DB 102,15,56,220,249 +DB 102,68,15,56,220,193 +DB 102,68,15,56,220,201 +$L$enc_loop8_enter: + movups xmm1,XMMWORD[rax*1+rcx] + add rax,32 +DB 102,15,56,220,208 +DB 102,15,56,220,216 +DB 102,15,56,220,224 +DB 102,15,56,220,232 +DB 102,15,56,220,240 +DB 102,15,56,220,248 +DB 102,68,15,56,220,192 +DB 102,68,15,56,220,200 + movups xmm0,XMMWORD[((-16))+rax*1+rcx] + jnz NEAR $L$enc_loop8 + +DB 102,15,56,220,209 +DB 102,15,56,220,217 +DB 102,15,56,220,225 +DB 102,15,56,220,233 +DB 102,15,56,220,241 +DB 102,15,56,220,249 +DB 102,68,15,56,220,193 +DB 102,68,15,56,220,201 +DB 102,15,56,221,208 +DB 102,15,56,221,216 +DB 102,15,56,221,224 +DB 102,15,56,221,232 +DB 102,15,56,221,240 +DB 102,15,56,221,248 +DB 102,68,15,56,221,192 +DB 102,68,15,56,221,200 + DB 0F3h,0C3h ;repret + + +ALIGN 16 +_aesni_decrypt8: + movups xmm0,XMMWORD[rcx] + shl eax,4 + movups xmm1,XMMWORD[16+rcx] + xorps xmm2,xmm0 + xorps xmm3,xmm0 + pxor xmm4,xmm0 + pxor xmm5,xmm0 + pxor xmm6,xmm0 + lea rcx,[32+rax*1+rcx] + neg rax +DB 102,15,56,222,209 + pxor xmm7,xmm0 + pxor xmm8,xmm0 +DB 102,15,56,222,217 + pxor xmm9,xmm0 + movups xmm0,XMMWORD[rax*1+rcx] + add rax,16 + jmp NEAR $L$dec_loop8_inner +ALIGN 16 +$L$dec_loop8: +DB 102,15,56,222,209 +DB 102,15,56,222,217 +$L$dec_loop8_inner: +DB 102,15,56,222,225 +DB 102,15,56,222,233 +DB 102,15,56,222,241 +DB 102,15,56,222,249 +DB 102,68,15,56,222,193 +DB 102,68,15,56,222,201 +$L$dec_loop8_enter: + movups xmm1,XMMWORD[rax*1+rcx] + add rax,32 +DB 102,15,56,222,208 +DB 102,15,56,222,216 +DB 102,15,56,222,224 +DB 102,15,56,222,232 +DB 102,15,56,222,240 +DB 102,15,56,222,248 +DB 102,68,15,56,222,192 +DB 102,68,15,56,222,200 + movups xmm0,XMMWORD[((-16))+rax*1+rcx] + jnz NEAR $L$dec_loop8 + +DB 102,15,56,222,209 +DB 102,15,56,222,217 +DB 102,15,56,222,225 +DB 102,15,56,222,233 +DB 102,15,56,222,241 +DB 102,15,56,222,249 +DB 102,68,15,56,222,193 +DB 102,68,15,56,222,201 +DB 102,15,56,223,208 +DB 102,15,56,223,216 +DB 102,15,56,223,224 +DB 102,15,56,223,232 +DB 102,15,56,223,240 +DB 102,15,56,223,248 +DB 102,68,15,56,223,192 +DB 102,68,15,56,223,200 + DB 0F3h,0C3h ;repret + +global aesni_ecb_encrypt + +ALIGN 16 +aesni_ecb_encrypt: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_aesni_ecb_encrypt: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD[40+rsp] + + + lea rsp,[((-88))+rsp] + movaps XMMWORD[rsp],xmm6 + movaps XMMWORD[16+rsp],xmm7 + movaps XMMWORD[32+rsp],xmm8 + movaps XMMWORD[48+rsp],xmm9 +$L$ecb_enc_body: + and rdx,-16 + jz NEAR $L$ecb_ret + + mov eax,DWORD[240+rcx] + movups xmm0,XMMWORD[rcx] + mov r11,rcx + mov r10d,eax + test r8d,r8d + jz NEAR $L$ecb_decrypt + + cmp rdx,0x80 + jb NEAR $L$ecb_enc_tail + + movdqu xmm2,XMMWORD[rdi] + movdqu xmm3,XMMWORD[16+rdi] + movdqu xmm4,XMMWORD[32+rdi] + movdqu xmm5,XMMWORD[48+rdi] + movdqu xmm6,XMMWORD[64+rdi] + movdqu xmm7,XMMWORD[80+rdi] + movdqu xmm8,XMMWORD[96+rdi] + movdqu xmm9,XMMWORD[112+rdi] + lea rdi,[128+rdi] + sub rdx,0x80 + jmp NEAR $L$ecb_enc_loop8_enter +ALIGN 16 +$L$ecb_enc_loop8: + movups XMMWORD[rsi],xmm2 + mov rcx,r11 + movdqu xmm2,XMMWORD[rdi] + mov eax,r10d + movups XMMWORD[16+rsi],xmm3 + movdqu xmm3,XMMWORD[16+rdi] + movups XMMWORD[32+rsi],xmm4 + movdqu xmm4,XMMWORD[32+rdi] + movups XMMWORD[48+rsi],xmm5 + movdqu xmm5,XMMWORD[48+rdi] + movups XMMWORD[64+rsi],xmm6 + movdqu xmm6,XMMWORD[64+rdi] + movups XMMWORD[80+rsi],xmm7 + movdqu xmm7,XMMWORD[80+rdi] + movups XMMWORD[96+rsi],xmm8 + movdqu xmm8,XMMWORD[96+rdi] + movups XMMWORD[112+rsi],xmm9 + lea rsi,[128+rsi] + movdqu xmm9,XMMWORD[112+rdi] + lea rdi,[128+rdi] +$L$ecb_enc_loop8_enter: + + call _aesni_encrypt8 + + sub rdx,0x80 + jnc NEAR $L$ecb_enc_loop8 + + movups XMMWORD[rsi],xmm2 + mov rcx,r11 + movups XMMWORD[16+rsi],xmm3 + mov eax,r10d + movups XMMWORD[32+rsi],xmm4 + movups XMMWORD[48+rsi],xmm5 + movups XMMWORD[64+rsi],xmm6 + movups XMMWORD[80+rsi],xmm7 + movups XMMWORD[96+rsi],xmm8 + movups XMMWORD[112+rsi],xmm9 + lea rsi,[128+rsi] + add rdx,0x80 + jz NEAR $L$ecb_ret + +$L$ecb_enc_tail: + movups xmm2,XMMWORD[rdi] + cmp rdx,0x20 + jb NEAR $L$ecb_enc_one + movups xmm3,XMMWORD[16+rdi] + je NEAR $L$ecb_enc_two + movups xmm4,XMMWORD[32+rdi] + cmp rdx,0x40 + jb NEAR $L$ecb_enc_three + movups xmm5,XMMWORD[48+rdi] + je NEAR $L$ecb_enc_four + movups xmm6,XMMWORD[64+rdi] + cmp rdx,0x60 + jb NEAR $L$ecb_enc_five + movups xmm7,XMMWORD[80+rdi] + je NEAR $L$ecb_enc_six + movdqu xmm8,XMMWORD[96+rdi] + xorps xmm9,xmm9 + call _aesni_encrypt8 + movups XMMWORD[rsi],xmm2 + movups XMMWORD[16+rsi],xmm3 + movups XMMWORD[32+rsi],xmm4 + movups XMMWORD[48+rsi],xmm5 + movups XMMWORD[64+rsi],xmm6 + movups XMMWORD[80+rsi],xmm7 + movups XMMWORD[96+rsi],xmm8 + jmp NEAR $L$ecb_ret +ALIGN 16 +$L$ecb_enc_one: + movups xmm0,XMMWORD[rcx] + movups xmm1,XMMWORD[16+rcx] + lea rcx,[32+rcx] + xorps xmm2,xmm0 +$L$oop_enc1_3: +DB 102,15,56,220,209 + dec eax + movups xmm1,XMMWORD[rcx] + lea rcx,[16+rcx] + jnz NEAR $L$oop_enc1_3 +DB 102,15,56,221,209 + movups XMMWORD[rsi],xmm2 + jmp NEAR $L$ecb_ret +ALIGN 16 +$L$ecb_enc_two: + call _aesni_encrypt2 + movups XMMWORD[rsi],xmm2 + movups XMMWORD[16+rsi],xmm3 + jmp NEAR $L$ecb_ret +ALIGN 16 +$L$ecb_enc_three: + call _aesni_encrypt3 + movups XMMWORD[rsi],xmm2 + movups XMMWORD[16+rsi],xmm3 + movups XMMWORD[32+rsi],xmm4 + jmp NEAR $L$ecb_ret +ALIGN 16 +$L$ecb_enc_four: + call _aesni_encrypt4 + movups XMMWORD[rsi],xmm2 + movups XMMWORD[16+rsi],xmm3 + movups XMMWORD[32+rsi],xmm4 + movups XMMWORD[48+rsi],xmm5 + jmp NEAR $L$ecb_ret +ALIGN 16 +$L$ecb_enc_five: + xorps xmm7,xmm7 + call _aesni_encrypt6 + movups XMMWORD[rsi],xmm2 + movups XMMWORD[16+rsi],xmm3 + movups XMMWORD[32+rsi],xmm4 + movups XMMWORD[48+rsi],xmm5 + movups XMMWORD[64+rsi],xmm6 + jmp NEAR $L$ecb_ret +ALIGN 16 +$L$ecb_enc_six: + call _aesni_encrypt6 + movups XMMWORD[rsi],xmm2 + movups XMMWORD[16+rsi],xmm3 + movups XMMWORD[32+rsi],xmm4 + movups XMMWORD[48+rsi],xmm5 + movups XMMWORD[64+rsi],xmm6 + movups XMMWORD[80+rsi],xmm7 + jmp NEAR $L$ecb_ret + +ALIGN 16 +$L$ecb_decrypt: + cmp rdx,0x80 + jb NEAR $L$ecb_dec_tail + + movdqu xmm2,XMMWORD[rdi] + movdqu xmm3,XMMWORD[16+rdi] + movdqu xmm4,XMMWORD[32+rdi] + movdqu xmm5,XMMWORD[48+rdi] + movdqu xmm6,XMMWORD[64+rdi] + movdqu xmm7,XMMWORD[80+rdi] + movdqu xmm8,XMMWORD[96+rdi] + movdqu xmm9,XMMWORD[112+rdi] + lea rdi,[128+rdi] + sub rdx,0x80 + jmp NEAR $L$ecb_dec_loop8_enter +ALIGN 16 +$L$ecb_dec_loop8: + movups XMMWORD[rsi],xmm2 + mov rcx,r11 + movdqu xmm2,XMMWORD[rdi] + mov eax,r10d + movups XMMWORD[16+rsi],xmm3 + movdqu xmm3,XMMWORD[16+rdi] + movups XMMWORD[32+rsi],xmm4 + movdqu xmm4,XMMWORD[32+rdi] + movups XMMWORD[48+rsi],xmm5 + movdqu xmm5,XMMWORD[48+rdi] + movups XMMWORD[64+rsi],xmm6 + movdqu xmm6,XMMWORD[64+rdi] + movups XMMWORD[80+rsi],xmm7 + movdqu xmm7,XMMWORD[80+rdi] + movups XMMWORD[96+rsi],xmm8 + movdqu xmm8,XMMWORD[96+rdi] + movups XMMWORD[112+rsi],xmm9 + lea rsi,[128+rsi] + movdqu xmm9,XMMWORD[112+rdi] + lea rdi,[128+rdi] +$L$ecb_dec_loop8_enter: + + call _aesni_decrypt8 + + movups xmm0,XMMWORD[r11] + sub rdx,0x80 + jnc NEAR $L$ecb_dec_loop8 + + movups XMMWORD[rsi],xmm2 + pxor xmm2,xmm2 + mov rcx,r11 + movups XMMWORD[16+rsi],xmm3 + pxor xmm3,xmm3 + mov eax,r10d + movups XMMWORD[32+rsi],xmm4 + pxor xmm4,xmm4 + movups XMMWORD[48+rsi],xmm5 + pxor xmm5,xmm5 + movups XMMWORD[64+rsi],xmm6 + pxor xmm6,xmm6 + movups XMMWORD[80+rsi],xmm7 + pxor xmm7,xmm7 + movups XMMWORD[96+rsi],xmm8 + pxor xmm8,xmm8 + movups XMMWORD[112+rsi],xmm9 + pxor xmm9,xmm9 + lea rsi,[128+rsi] + add rdx,0x80 + jz NEAR $L$ecb_ret + +$L$ecb_dec_tail: + movups xmm2,XMMWORD[rdi] + cmp rdx,0x20 + jb NEAR $L$ecb_dec_one + movups xmm3,XMMWORD[16+rdi] + je NEAR $L$ecb_dec_two + movups xmm4,XMMWORD[32+rdi] + cmp rdx,0x40 + jb NEAR $L$ecb_dec_three + movups xmm5,XMMWORD[48+rdi] + je NEAR $L$ecb_dec_four + movups xmm6,XMMWORD[64+rdi] + cmp rdx,0x60 + jb NEAR $L$ecb_dec_five + movups xmm7,XMMWORD[80+rdi] + je NEAR $L$ecb_dec_six + movups xmm8,XMMWORD[96+rdi] + movups xmm0,XMMWORD[rcx] + xorps xmm9,xmm9 + call _aesni_decrypt8 + movups XMMWORD[rsi],xmm2 + pxor xmm2,xmm2 + movups XMMWORD[16+rsi],xmm3 + pxor xmm3,xmm3 + movups XMMWORD[32+rsi],xmm4 + pxor xmm4,xmm4 + movups XMMWORD[48+rsi],xmm5 + pxor xmm5,xmm5 + movups XMMWORD[64+rsi],xmm6 + pxor xmm6,xmm6 + movups XMMWORD[80+rsi],xmm7 + pxor xmm7,xmm7 + movups XMMWORD[96+rsi],xmm8 + pxor xmm8,xmm8 + pxor xmm9,xmm9 + jmp NEAR $L$ecb_ret +ALIGN 16 +$L$ecb_dec_one: + movups xmm0,XMMWORD[rcx] + movups xmm1,XMMWORD[16+rcx] + lea rcx,[32+rcx] + xorps xmm2,xmm0 +$L$oop_dec1_4: +DB 102,15,56,222,209 + dec eax + movups xmm1,XMMWORD[rcx] + lea rcx,[16+rcx] + jnz NEAR $L$oop_dec1_4 +DB 102,15,56,223,209 + movups XMMWORD[rsi],xmm2 + pxor xmm2,xmm2 + jmp NEAR $L$ecb_ret +ALIGN 16 +$L$ecb_dec_two: + call _aesni_decrypt2 + movups XMMWORD[rsi],xmm2 + pxor xmm2,xmm2 + movups XMMWORD[16+rsi],xmm3 + pxor xmm3,xmm3 + jmp NEAR $L$ecb_ret +ALIGN 16 +$L$ecb_dec_three: + call _aesni_decrypt3 + movups XMMWORD[rsi],xmm2 + pxor xmm2,xmm2 + movups XMMWORD[16+rsi],xmm3 + pxor xmm3,xmm3 + movups XMMWORD[32+rsi],xmm4 + pxor xmm4,xmm4 + jmp NEAR $L$ecb_ret +ALIGN 16 +$L$ecb_dec_four: + call _aesni_decrypt4 + movups XMMWORD[rsi],xmm2 + pxor xmm2,xmm2 + movups XMMWORD[16+rsi],xmm3 + pxor xmm3,xmm3 + movups XMMWORD[32+rsi],xmm4 + pxor xmm4,xmm4 + movups XMMWORD[48+rsi],xmm5 + pxor xmm5,xmm5 + jmp NEAR $L$ecb_ret +ALIGN 16 +$L$ecb_dec_five: + xorps xmm7,xmm7 + call _aesni_decrypt6 + movups XMMWORD[rsi],xmm2 + pxor xmm2,xmm2 + movups XMMWORD[16+rsi],xmm3 + pxor xmm3,xmm3 + movups XMMWORD[32+rsi],xmm4 + pxor xmm4,xmm4 + movups XMMWORD[48+rsi],xmm5 + pxor xmm5,xmm5 + movups XMMWORD[64+rsi],xmm6 + pxor xmm6,xmm6 + pxor xmm7,xmm7 + jmp NEAR $L$ecb_ret +ALIGN 16 +$L$ecb_dec_six: + call _aesni_decrypt6 + movups XMMWORD[rsi],xmm2 + pxor xmm2,xmm2 + movups XMMWORD[16+rsi],xmm3 + pxor xmm3,xmm3 + movups XMMWORD[32+rsi],xmm4 + pxor xmm4,xmm4 + movups XMMWORD[48+rsi],xmm5 + pxor xmm5,xmm5 + movups XMMWORD[64+rsi],xmm6 + pxor xmm6,xmm6 + movups XMMWORD[80+rsi],xmm7 + pxor xmm7,xmm7 + +$L$ecb_ret: + xorps xmm0,xmm0 + pxor xmm1,xmm1 + movaps xmm6,XMMWORD[rsp] + movaps XMMWORD[rsp],xmm0 + movaps xmm7,XMMWORD[16+rsp] + movaps XMMWORD[16+rsp],xmm0 + movaps xmm8,XMMWORD[32+rsp] + movaps XMMWORD[32+rsp],xmm0 + movaps xmm9,XMMWORD[48+rsp] + movaps XMMWORD[48+rsp],xmm0 + lea rsp,[88+rsp] +$L$ecb_enc_ret: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_aesni_ecb_encrypt: +global aesni_ccm64_encrypt_blocks + +ALIGN 16 +aesni_ccm64_encrypt_blocks: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_aesni_ccm64_encrypt_blocks: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD[40+rsp] + mov r9,QWORD[48+rsp] + + + lea rsp,[((-88))+rsp] + movaps XMMWORD[rsp],xmm6 + movaps XMMWORD[16+rsp],xmm7 + movaps XMMWORD[32+rsp],xmm8 + movaps XMMWORD[48+rsp],xmm9 +$L$ccm64_enc_body: + mov eax,DWORD[240+rcx] + movdqu xmm6,XMMWORD[r8] + movdqa xmm9,XMMWORD[$L$increment64] + movdqa xmm7,XMMWORD[$L$bswap_mask] + + shl eax,4 + mov r10d,16 + lea r11,[rcx] + movdqu xmm3,XMMWORD[r9] + movdqa xmm2,xmm6 + lea rcx,[32+rax*1+rcx] +DB 102,15,56,0,247 + sub r10,rax + jmp NEAR $L$ccm64_enc_outer +ALIGN 16 +$L$ccm64_enc_outer: + movups xmm0,XMMWORD[r11] + mov rax,r10 + movups xmm8,XMMWORD[rdi] + + xorps xmm2,xmm0 + movups xmm1,XMMWORD[16+r11] + xorps xmm0,xmm8 + xorps xmm3,xmm0 + movups xmm0,XMMWORD[32+r11] + +$L$ccm64_enc2_loop: +DB 102,15,56,220,209 +DB 102,15,56,220,217 + movups xmm1,XMMWORD[rax*1+rcx] + add rax,32 +DB 102,15,56,220,208 +DB 102,15,56,220,216 + movups xmm0,XMMWORD[((-16))+rax*1+rcx] + jnz NEAR $L$ccm64_enc2_loop +DB 102,15,56,220,209 +DB 102,15,56,220,217 + paddq xmm6,xmm9 + dec rdx +DB 102,15,56,221,208 +DB 102,15,56,221,216 + + lea rdi,[16+rdi] + xorps xmm8,xmm2 + movdqa xmm2,xmm6 + movups XMMWORD[rsi],xmm8 +DB 102,15,56,0,215 + lea rsi,[16+rsi] + jnz NEAR $L$ccm64_enc_outer + + pxor xmm0,xmm0 + pxor xmm1,xmm1 + pxor xmm2,xmm2 + movups XMMWORD[r9],xmm3 + pxor xmm3,xmm3 + pxor xmm8,xmm8 + pxor xmm6,xmm6 + movaps xmm6,XMMWORD[rsp] + movaps XMMWORD[rsp],xmm0 + movaps xmm7,XMMWORD[16+rsp] + movaps XMMWORD[16+rsp],xmm0 + movaps xmm8,XMMWORD[32+rsp] + movaps XMMWORD[32+rsp],xmm0 + movaps xmm9,XMMWORD[48+rsp] + movaps XMMWORD[48+rsp],xmm0 + lea rsp,[88+rsp] +$L$ccm64_enc_ret: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_aesni_ccm64_encrypt_blocks: +global aesni_ccm64_decrypt_blocks + +ALIGN 16 +aesni_ccm64_decrypt_blocks: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_aesni_ccm64_decrypt_blocks: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD[40+rsp] + mov r9,QWORD[48+rsp] + + + lea rsp,[((-88))+rsp] + movaps XMMWORD[rsp],xmm6 + movaps XMMWORD[16+rsp],xmm7 + movaps XMMWORD[32+rsp],xmm8 + movaps XMMWORD[48+rsp],xmm9 +$L$ccm64_dec_body: + mov eax,DWORD[240+rcx] + movups xmm6,XMMWORD[r8] + movdqu xmm3,XMMWORD[r9] + movdqa xmm9,XMMWORD[$L$increment64] + movdqa xmm7,XMMWORD[$L$bswap_mask] + + movaps xmm2,xmm6 + mov r10d,eax + mov r11,rcx +DB 102,15,56,0,247 + movups xmm0,XMMWORD[rcx] + movups xmm1,XMMWORD[16+rcx] + lea rcx,[32+rcx] + xorps xmm2,xmm0 +$L$oop_enc1_5: +DB 102,15,56,220,209 + dec eax + movups xmm1,XMMWORD[rcx] + lea rcx,[16+rcx] + jnz NEAR $L$oop_enc1_5 +DB 102,15,56,221,209 + shl r10d,4 + mov eax,16 + movups xmm8,XMMWORD[rdi] + paddq xmm6,xmm9 + lea rdi,[16+rdi] + sub rax,r10 + lea rcx,[32+r10*1+r11] + mov r10,rax + jmp NEAR $L$ccm64_dec_outer +ALIGN 16 +$L$ccm64_dec_outer: + xorps xmm8,xmm2 + movdqa xmm2,xmm6 + movups XMMWORD[rsi],xmm8 + lea rsi,[16+rsi] +DB 102,15,56,0,215 + + sub rdx,1 + jz NEAR $L$ccm64_dec_break + + movups xmm0,XMMWORD[r11] + mov rax,r10 + movups xmm1,XMMWORD[16+r11] + xorps xmm8,xmm0 + xorps xmm2,xmm0 + xorps xmm3,xmm8 + movups xmm0,XMMWORD[32+r11] + jmp NEAR $L$ccm64_dec2_loop +ALIGN 16 +$L$ccm64_dec2_loop: +DB 102,15,56,220,209 +DB 102,15,56,220,217 + movups xmm1,XMMWORD[rax*1+rcx] + add rax,32 +DB 102,15,56,220,208 +DB 102,15,56,220,216 + movups xmm0,XMMWORD[((-16))+rax*1+rcx] + jnz NEAR $L$ccm64_dec2_loop + movups xmm8,XMMWORD[rdi] + paddq xmm6,xmm9 +DB 102,15,56,220,209 +DB 102,15,56,220,217 +DB 102,15,56,221,208 +DB 102,15,56,221,216 + lea rdi,[16+rdi] + jmp NEAR $L$ccm64_dec_outer + +ALIGN 16 +$L$ccm64_dec_break: + + mov eax,DWORD[240+r11] + movups xmm0,XMMWORD[r11] + movups xmm1,XMMWORD[16+r11] + xorps xmm8,xmm0 + lea r11,[32+r11] + xorps xmm3,xmm8 +$L$oop_enc1_6: +DB 102,15,56,220,217 + dec eax + movups xmm1,XMMWORD[r11] + lea r11,[16+r11] + jnz NEAR $L$oop_enc1_6 +DB 102,15,56,221,217 + pxor xmm0,xmm0 + pxor xmm1,xmm1 + pxor xmm2,xmm2 + movups XMMWORD[r9],xmm3 + pxor xmm3,xmm3 + pxor xmm8,xmm8 + pxor xmm6,xmm6 + movaps xmm6,XMMWORD[rsp] + movaps XMMWORD[rsp],xmm0 + movaps xmm7,XMMWORD[16+rsp] + movaps XMMWORD[16+rsp],xmm0 + movaps xmm8,XMMWORD[32+rsp] + movaps XMMWORD[32+rsp],xmm0 + movaps xmm9,XMMWORD[48+rsp] + movaps XMMWORD[48+rsp],xmm0 + lea rsp,[88+rsp] +$L$ccm64_dec_ret: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_aesni_ccm64_decrypt_blocks: +global aesni_ctr32_encrypt_blocks + +ALIGN 16 +aesni_ctr32_encrypt_blocks: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_aesni_ctr32_encrypt_blocks: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD[40+rsp] + + + cmp rdx,1 + jne NEAR $L$ctr32_bulk + + + + movups xmm2,XMMWORD[r8] + movups xmm3,XMMWORD[rdi] + mov edx,DWORD[240+rcx] + movups xmm0,XMMWORD[rcx] + movups xmm1,XMMWORD[16+rcx] + lea rcx,[32+rcx] + xorps xmm2,xmm0 +$L$oop_enc1_7: +DB 102,15,56,220,209 + dec edx + movups xmm1,XMMWORD[rcx] + lea rcx,[16+rcx] + jnz NEAR $L$oop_enc1_7 +DB 102,15,56,221,209 + pxor xmm0,xmm0 + pxor xmm1,xmm1 + xorps xmm2,xmm3 + pxor xmm3,xmm3 + movups XMMWORD[rsi],xmm2 + xorps xmm2,xmm2 + jmp NEAR $L$ctr32_epilogue + +ALIGN 16 +$L$ctr32_bulk: + lea rax,[rsp] + push rbp + sub rsp,288 + and rsp,-16 + movaps XMMWORD[(-168)+rax],xmm6 + movaps XMMWORD[(-152)+rax],xmm7 + movaps XMMWORD[(-136)+rax],xmm8 + movaps XMMWORD[(-120)+rax],xmm9 + movaps XMMWORD[(-104)+rax],xmm10 + movaps XMMWORD[(-88)+rax],xmm11 + movaps XMMWORD[(-72)+rax],xmm12 + movaps XMMWORD[(-56)+rax],xmm13 + movaps XMMWORD[(-40)+rax],xmm14 + movaps XMMWORD[(-24)+rax],xmm15 +$L$ctr32_body: + lea rbp,[((-8))+rax] + + + + + movdqu xmm2,XMMWORD[r8] + movdqu xmm0,XMMWORD[rcx] + mov r8d,DWORD[12+r8] + pxor xmm2,xmm0 + mov r11d,DWORD[12+rcx] + movdqa XMMWORD[rsp],xmm2 + bswap r8d + movdqa xmm3,xmm2 + movdqa xmm4,xmm2 + movdqa xmm5,xmm2 + movdqa XMMWORD[64+rsp],xmm2 + movdqa XMMWORD[80+rsp],xmm2 + movdqa XMMWORD[96+rsp],xmm2 + mov r10,rdx + movdqa XMMWORD[112+rsp],xmm2 + + lea rax,[1+r8] + lea rdx,[2+r8] + bswap eax + bswap edx + xor eax,r11d + xor edx,r11d +DB 102,15,58,34,216,3 + lea rax,[3+r8] + movdqa XMMWORD[16+rsp],xmm3 +DB 102,15,58,34,226,3 + bswap eax + mov rdx,r10 + lea r10,[4+r8] + movdqa XMMWORD[32+rsp],xmm4 + xor eax,r11d + bswap r10d +DB 102,15,58,34,232,3 + xor r10d,r11d + movdqa XMMWORD[48+rsp],xmm5 + lea r9,[5+r8] + mov DWORD[((64+12))+rsp],r10d + bswap r9d + lea r10,[6+r8] + mov eax,DWORD[240+rcx] + xor r9d,r11d + bswap r10d + mov DWORD[((80+12))+rsp],r9d + xor r10d,r11d + lea r9,[7+r8] + mov DWORD[((96+12))+rsp],r10d + bswap r9d + mov r10d,DWORD[((OPENSSL_ia32cap_P+4))] + xor r9d,r11d + and r10d,71303168 + mov DWORD[((112+12))+rsp],r9d + + movups xmm1,XMMWORD[16+rcx] + + movdqa xmm6,XMMWORD[64+rsp] + movdqa xmm7,XMMWORD[80+rsp] + + cmp rdx,8 + jb NEAR $L$ctr32_tail + + sub rdx,6 + cmp r10d,4194304 + je NEAR $L$ctr32_6x + + lea rcx,[128+rcx] + sub rdx,2 + jmp NEAR $L$ctr32_loop8 + +ALIGN 16 +$L$ctr32_6x: + shl eax,4 + mov r10d,48 + bswap r11d + lea rcx,[32+rax*1+rcx] + sub r10,rax + jmp NEAR $L$ctr32_loop6 + +ALIGN 16 +$L$ctr32_loop6: + add r8d,6 + movups xmm0,XMMWORD[((-48))+r10*1+rcx] +DB 102,15,56,220,209 + mov eax,r8d + xor eax,r11d +DB 102,15,56,220,217 +DB 0x0f,0x38,0xf1,0x44,0x24,12 + lea eax,[1+r8] +DB 102,15,56,220,225 + xor eax,r11d +DB 0x0f,0x38,0xf1,0x44,0x24,28 +DB 102,15,56,220,233 + lea eax,[2+r8] + xor eax,r11d +DB 102,15,56,220,241 +DB 0x0f,0x38,0xf1,0x44,0x24,44 + lea eax,[3+r8] +DB 102,15,56,220,249 + movups xmm1,XMMWORD[((-32))+r10*1+rcx] + xor eax,r11d + +DB 102,15,56,220,208 +DB 0x0f,0x38,0xf1,0x44,0x24,60 + lea eax,[4+r8] +DB 102,15,56,220,216 + xor eax,r11d +DB 0x0f,0x38,0xf1,0x44,0x24,76 +DB 102,15,56,220,224 + lea eax,[5+r8] + xor eax,r11d +DB 102,15,56,220,232 +DB 0x0f,0x38,0xf1,0x44,0x24,92 + mov rax,r10 +DB 102,15,56,220,240 +DB 102,15,56,220,248 + movups xmm0,XMMWORD[((-16))+r10*1+rcx] + + call $L$enc_loop6 + + movdqu xmm8,XMMWORD[rdi] + movdqu xmm9,XMMWORD[16+rdi] + movdqu xmm10,XMMWORD[32+rdi] + movdqu xmm11,XMMWORD[48+rdi] + movdqu xmm12,XMMWORD[64+rdi] + movdqu xmm13,XMMWORD[80+rdi] + lea rdi,[96+rdi] + movups xmm1,XMMWORD[((-64))+r10*1+rcx] + pxor xmm8,xmm2 + movaps xmm2,XMMWORD[rsp] + pxor xmm9,xmm3 + movaps xmm3,XMMWORD[16+rsp] + pxor xmm10,xmm4 + movaps xmm4,XMMWORD[32+rsp] + pxor xmm11,xmm5 + movaps xmm5,XMMWORD[48+rsp] + pxor xmm12,xmm6 + movaps xmm6,XMMWORD[64+rsp] + pxor xmm13,xmm7 + movaps xmm7,XMMWORD[80+rsp] + movdqu XMMWORD[rsi],xmm8 + movdqu XMMWORD[16+rsi],xmm9 + movdqu XMMWORD[32+rsi],xmm10 + movdqu XMMWORD[48+rsi],xmm11 + movdqu XMMWORD[64+rsi],xmm12 + movdqu XMMWORD[80+rsi],xmm13 + lea rsi,[96+rsi] + + sub rdx,6 + jnc NEAR $L$ctr32_loop6 + + add rdx,6 + jz NEAR $L$ctr32_done + + lea eax,[((-48))+r10] + lea rcx,[((-80))+r10*1+rcx] + neg eax + shr eax,4 + jmp NEAR $L$ctr32_tail + +ALIGN 32 +$L$ctr32_loop8: + add r8d,8 + movdqa xmm8,XMMWORD[96+rsp] +DB 102,15,56,220,209 + mov r9d,r8d + movdqa xmm9,XMMWORD[112+rsp] +DB 102,15,56,220,217 + bswap r9d + movups xmm0,XMMWORD[((32-128))+rcx] +DB 102,15,56,220,225 + xor r9d,r11d + nop +DB 102,15,56,220,233 + mov DWORD[((0+12))+rsp],r9d + lea r9,[1+r8] +DB 102,15,56,220,241 +DB 102,15,56,220,249 +DB 102,68,15,56,220,193 +DB 102,68,15,56,220,201 + movups xmm1,XMMWORD[((48-128))+rcx] + bswap r9d +DB 102,15,56,220,208 +DB 102,15,56,220,216 + xor r9d,r11d +DB 0x66,0x90 +DB 102,15,56,220,224 +DB 102,15,56,220,232 + mov DWORD[((16+12))+rsp],r9d + lea r9,[2+r8] +DB 102,15,56,220,240 +DB 102,15,56,220,248 +DB 102,68,15,56,220,192 +DB 102,68,15,56,220,200 + movups xmm0,XMMWORD[((64-128))+rcx] + bswap r9d +DB 102,15,56,220,209 +DB 102,15,56,220,217 + xor r9d,r11d +DB 0x66,0x90 +DB 102,15,56,220,225 +DB 102,15,56,220,233 + mov DWORD[((32+12))+rsp],r9d + lea r9,[3+r8] +DB 102,15,56,220,241 +DB 102,15,56,220,249 +DB 102,68,15,56,220,193 +DB 102,68,15,56,220,201 + movups xmm1,XMMWORD[((80-128))+rcx] + bswap r9d +DB 102,15,56,220,208 +DB 102,15,56,220,216 + xor r9d,r11d +DB 0x66,0x90 +DB 102,15,56,220,224 +DB 102,15,56,220,232 + mov DWORD[((48+12))+rsp],r9d + lea r9,[4+r8] +DB 102,15,56,220,240 +DB 102,15,56,220,248 +DB 102,68,15,56,220,192 +DB 102,68,15,56,220,200 + movups xmm0,XMMWORD[((96-128))+rcx] + bswap r9d +DB 102,15,56,220,209 +DB 102,15,56,220,217 + xor r9d,r11d +DB 0x66,0x90 +DB 102,15,56,220,225 +DB 102,15,56,220,233 + mov DWORD[((64+12))+rsp],r9d + lea r9,[5+r8] +DB 102,15,56,220,241 +DB 102,15,56,220,249 +DB 102,68,15,56,220,193 +DB 102,68,15,56,220,201 + movups xmm1,XMMWORD[((112-128))+rcx] + bswap r9d +DB 102,15,56,220,208 +DB 102,15,56,220,216 + xor r9d,r11d +DB 0x66,0x90 +DB 102,15,56,220,224 +DB 102,15,56,220,232 + mov DWORD[((80+12))+rsp],r9d + lea r9,[6+r8] +DB 102,15,56,220,240 +DB 102,15,56,220,248 +DB 102,68,15,56,220,192 +DB 102,68,15,56,220,200 + movups xmm0,XMMWORD[((128-128))+rcx] + bswap r9d +DB 102,15,56,220,209 +DB 102,15,56,220,217 + xor r9d,r11d +DB 0x66,0x90 +DB 102,15,56,220,225 +DB 102,15,56,220,233 + mov DWORD[((96+12))+rsp],r9d + lea r9,[7+r8] +DB 102,15,56,220,241 +DB 102,15,56,220,249 +DB 102,68,15,56,220,193 +DB 102,68,15,56,220,201 + movups xmm1,XMMWORD[((144-128))+rcx] + bswap r9d +DB 102,15,56,220,208 +DB 102,15,56,220,216 +DB 102,15,56,220,224 + xor r9d,r11d + movdqu xmm10,XMMWORD[rdi] +DB 102,15,56,220,232 + mov DWORD[((112+12))+rsp],r9d + cmp eax,11 +DB 102,15,56,220,240 +DB 102,15,56,220,248 +DB 102,68,15,56,220,192 +DB 102,68,15,56,220,200 + movups xmm0,XMMWORD[((160-128))+rcx] + + jb NEAR $L$ctr32_enc_done + +DB 102,15,56,220,209 +DB 102,15,56,220,217 +DB 102,15,56,220,225 +DB 102,15,56,220,233 +DB 102,15,56,220,241 +DB 102,15,56,220,249 +DB 102,68,15,56,220,193 +DB 102,68,15,56,220,201 + movups xmm1,XMMWORD[((176-128))+rcx] + +DB 102,15,56,220,208 +DB 102,15,56,220,216 +DB 102,15,56,220,224 +DB 102,15,56,220,232 +DB 102,15,56,220,240 +DB 102,15,56,220,248 +DB 102,68,15,56,220,192 +DB 102,68,15,56,220,200 + movups xmm0,XMMWORD[((192-128))+rcx] + je NEAR $L$ctr32_enc_done + +DB 102,15,56,220,209 +DB 102,15,56,220,217 +DB 102,15,56,220,225 +DB 102,15,56,220,233 +DB 102,15,56,220,241 +DB 102,15,56,220,249 +DB 102,68,15,56,220,193 +DB 102,68,15,56,220,201 + movups xmm1,XMMWORD[((208-128))+rcx] + +DB 102,15,56,220,208 +DB 102,15,56,220,216 +DB 102,15,56,220,224 +DB 102,15,56,220,232 +DB 102,15,56,220,240 +DB 102,15,56,220,248 +DB 102,68,15,56,220,192 +DB 102,68,15,56,220,200 + movups xmm0,XMMWORD[((224-128))+rcx] + jmp NEAR $L$ctr32_enc_done + +ALIGN 16 +$L$ctr32_enc_done: + movdqu xmm11,XMMWORD[16+rdi] + pxor xmm10,xmm0 + movdqu xmm12,XMMWORD[32+rdi] + pxor xmm11,xmm0 + movdqu xmm13,XMMWORD[48+rdi] + pxor xmm12,xmm0 + movdqu xmm14,XMMWORD[64+rdi] + pxor xmm13,xmm0 + movdqu xmm15,XMMWORD[80+rdi] + pxor xmm14,xmm0 + pxor xmm15,xmm0 +DB 102,15,56,220,209 +DB 102,15,56,220,217 +DB 102,15,56,220,225 +DB 102,15,56,220,233 +DB 102,15,56,220,241 +DB 102,15,56,220,249 +DB 102,68,15,56,220,193 +DB 102,68,15,56,220,201 + movdqu xmm1,XMMWORD[96+rdi] + lea rdi,[128+rdi] + +DB 102,65,15,56,221,210 + pxor xmm1,xmm0 + movdqu xmm10,XMMWORD[((112-128))+rdi] +DB 102,65,15,56,221,219 + pxor xmm10,xmm0 + movdqa xmm11,XMMWORD[rsp] +DB 102,65,15,56,221,228 +DB 102,65,15,56,221,237 + movdqa xmm12,XMMWORD[16+rsp] + movdqa xmm13,XMMWORD[32+rsp] +DB 102,65,15,56,221,246 +DB 102,65,15,56,221,255 + movdqa xmm14,XMMWORD[48+rsp] + movdqa xmm15,XMMWORD[64+rsp] +DB 102,68,15,56,221,193 + movdqa xmm0,XMMWORD[80+rsp] + movups xmm1,XMMWORD[((16-128))+rcx] +DB 102,69,15,56,221,202 + + movups XMMWORD[rsi],xmm2 + movdqa xmm2,xmm11 + movups XMMWORD[16+rsi],xmm3 + movdqa xmm3,xmm12 + movups XMMWORD[32+rsi],xmm4 + movdqa xmm4,xmm13 + movups XMMWORD[48+rsi],xmm5 + movdqa xmm5,xmm14 + movups XMMWORD[64+rsi],xmm6 + movdqa xmm6,xmm15 + movups XMMWORD[80+rsi],xmm7 + movdqa xmm7,xmm0 + movups XMMWORD[96+rsi],xmm8 + movups XMMWORD[112+rsi],xmm9 + lea rsi,[128+rsi] + + sub rdx,8 + jnc NEAR $L$ctr32_loop8 + + add rdx,8 + jz NEAR $L$ctr32_done + lea rcx,[((-128))+rcx] + +$L$ctr32_tail: + + + lea rcx,[16+rcx] + cmp rdx,4 + jb NEAR $L$ctr32_loop3 + je NEAR $L$ctr32_loop4 + + + shl eax,4 + movdqa xmm8,XMMWORD[96+rsp] + pxor xmm9,xmm9 + + movups xmm0,XMMWORD[16+rcx] +DB 102,15,56,220,209 +DB 102,15,56,220,217 + lea rcx,[((32-16))+rax*1+rcx] + neg rax +DB 102,15,56,220,225 + add rax,16 + movups xmm10,XMMWORD[rdi] +DB 102,15,56,220,233 +DB 102,15,56,220,241 + movups xmm11,XMMWORD[16+rdi] + movups xmm12,XMMWORD[32+rdi] +DB 102,15,56,220,249 +DB 102,68,15,56,220,193 + + call $L$enc_loop8_enter + + movdqu xmm13,XMMWORD[48+rdi] + pxor xmm2,xmm10 + movdqu xmm10,XMMWORD[64+rdi] + pxor xmm3,xmm11 + movdqu XMMWORD[rsi],xmm2 + pxor xmm4,xmm12 + movdqu XMMWORD[16+rsi],xmm3 + pxor xmm5,xmm13 + movdqu XMMWORD[32+rsi],xmm4 + pxor xmm6,xmm10 + movdqu XMMWORD[48+rsi],xmm5 + movdqu XMMWORD[64+rsi],xmm6 + cmp rdx,6 + jb NEAR $L$ctr32_done + + movups xmm11,XMMWORD[80+rdi] + xorps xmm7,xmm11 + movups XMMWORD[80+rsi],xmm7 + je NEAR $L$ctr32_done + + movups xmm12,XMMWORD[96+rdi] + xorps xmm8,xmm12 + movups XMMWORD[96+rsi],xmm8 + jmp NEAR $L$ctr32_done + +ALIGN 32 +$L$ctr32_loop4: +DB 102,15,56,220,209 + lea rcx,[16+rcx] + dec eax +DB 102,15,56,220,217 +DB 102,15,56,220,225 +DB 102,15,56,220,233 + movups xmm1,XMMWORD[rcx] + jnz NEAR $L$ctr32_loop4 +DB 102,15,56,221,209 +DB 102,15,56,221,217 + movups xmm10,XMMWORD[rdi] + movups xmm11,XMMWORD[16+rdi] +DB 102,15,56,221,225 +DB 102,15,56,221,233 + movups xmm12,XMMWORD[32+rdi] + movups xmm13,XMMWORD[48+rdi] + + xorps xmm2,xmm10 + movups XMMWORD[rsi],xmm2 + xorps xmm3,xmm11 + movups XMMWORD[16+rsi],xmm3 + pxor xmm4,xmm12 + movdqu XMMWORD[32+rsi],xmm4 + pxor xmm5,xmm13 + movdqu XMMWORD[48+rsi],xmm5 + jmp NEAR $L$ctr32_done + +ALIGN 32 +$L$ctr32_loop3: +DB 102,15,56,220,209 + lea rcx,[16+rcx] + dec eax +DB 102,15,56,220,217 +DB 102,15,56,220,225 + movups xmm1,XMMWORD[rcx] + jnz NEAR $L$ctr32_loop3 +DB 102,15,56,221,209 +DB 102,15,56,221,217 +DB 102,15,56,221,225 + + movups xmm10,XMMWORD[rdi] + xorps xmm2,xmm10 + movups XMMWORD[rsi],xmm2 + cmp rdx,2 + jb NEAR $L$ctr32_done + + movups xmm11,XMMWORD[16+rdi] + xorps xmm3,xmm11 + movups XMMWORD[16+rsi],xmm3 + je NEAR $L$ctr32_done + + movups xmm12,XMMWORD[32+rdi] + xorps xmm4,xmm12 + movups XMMWORD[32+rsi],xmm4 + +$L$ctr32_done: + xorps xmm0,xmm0 + xor r11d,r11d + pxor xmm1,xmm1 + pxor xmm2,xmm2 + pxor xmm3,xmm3 + pxor xmm4,xmm4 + pxor xmm5,xmm5 + movaps xmm6,XMMWORD[((-160))+rbp] + movaps XMMWORD[(-160)+rbp],xmm0 + movaps xmm7,XMMWORD[((-144))+rbp] + movaps XMMWORD[(-144)+rbp],xmm0 + movaps xmm8,XMMWORD[((-128))+rbp] + movaps XMMWORD[(-128)+rbp],xmm0 + movaps xmm9,XMMWORD[((-112))+rbp] + movaps XMMWORD[(-112)+rbp],xmm0 + movaps xmm10,XMMWORD[((-96))+rbp] + movaps XMMWORD[(-96)+rbp],xmm0 + movaps xmm11,XMMWORD[((-80))+rbp] + movaps XMMWORD[(-80)+rbp],xmm0 + movaps xmm12,XMMWORD[((-64))+rbp] + movaps XMMWORD[(-64)+rbp],xmm0 + movaps xmm13,XMMWORD[((-48))+rbp] + movaps XMMWORD[(-48)+rbp],xmm0 + movaps xmm14,XMMWORD[((-32))+rbp] + movaps XMMWORD[(-32)+rbp],xmm0 + movaps xmm15,XMMWORD[((-16))+rbp] + movaps XMMWORD[(-16)+rbp],xmm0 + movaps XMMWORD[rsp],xmm0 + movaps XMMWORD[16+rsp],xmm0 + movaps XMMWORD[32+rsp],xmm0 + movaps XMMWORD[48+rsp],xmm0 + movaps XMMWORD[64+rsp],xmm0 + movaps XMMWORD[80+rsp],xmm0 + movaps XMMWORD[96+rsp],xmm0 + movaps XMMWORD[112+rsp],xmm0 + lea rsp,[rbp] + pop rbp +$L$ctr32_epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_aesni_ctr32_encrypt_blocks: +global aesni_xts_encrypt + +ALIGN 16 +aesni_xts_encrypt: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_aesni_xts_encrypt: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD[40+rsp] + mov r9,QWORD[48+rsp] + + + lea rax,[rsp] + push rbp + sub rsp,272 + and rsp,-16 + movaps XMMWORD[(-168)+rax],xmm6 + movaps XMMWORD[(-152)+rax],xmm7 + movaps XMMWORD[(-136)+rax],xmm8 + movaps XMMWORD[(-120)+rax],xmm9 + movaps XMMWORD[(-104)+rax],xmm10 + movaps XMMWORD[(-88)+rax],xmm11 + movaps XMMWORD[(-72)+rax],xmm12 + movaps XMMWORD[(-56)+rax],xmm13 + movaps XMMWORD[(-40)+rax],xmm14 + movaps XMMWORD[(-24)+rax],xmm15 +$L$xts_enc_body: + lea rbp,[((-8))+rax] + movups xmm2,XMMWORD[r9] + mov eax,DWORD[240+r8] + mov r10d,DWORD[240+rcx] + movups xmm0,XMMWORD[r8] + movups xmm1,XMMWORD[16+r8] + lea r8,[32+r8] + xorps xmm2,xmm0 +$L$oop_enc1_8: +DB 102,15,56,220,209 + dec eax + movups xmm1,XMMWORD[r8] + lea r8,[16+r8] + jnz NEAR $L$oop_enc1_8 +DB 102,15,56,221,209 + movups xmm0,XMMWORD[rcx] + mov r11,rcx + mov eax,r10d + shl r10d,4 + mov r9,rdx + and rdx,-16 + + movups xmm1,XMMWORD[16+r10*1+rcx] + + movdqa xmm8,XMMWORD[$L$xts_magic] + movdqa xmm15,xmm2 + pshufd xmm9,xmm2,0x5f + pxor xmm1,xmm0 + movdqa xmm14,xmm9 + paddd xmm9,xmm9 + movdqa xmm10,xmm15 + psrad xmm14,31 + paddq xmm15,xmm15 + pand xmm14,xmm8 + pxor xmm10,xmm0 + pxor xmm15,xmm14 + movdqa xmm14,xmm9 + paddd xmm9,xmm9 + movdqa xmm11,xmm15 + psrad xmm14,31 + paddq xmm15,xmm15 + pand xmm14,xmm8 + pxor xmm11,xmm0 + pxor xmm15,xmm14 + movdqa xmm14,xmm9 + paddd xmm9,xmm9 + movdqa xmm12,xmm15 + psrad xmm14,31 + paddq xmm15,xmm15 + pand xmm14,xmm8 + pxor xmm12,xmm0 + pxor xmm15,xmm14 + movdqa xmm14,xmm9 + paddd xmm9,xmm9 + movdqa xmm13,xmm15 + psrad xmm14,31 + paddq xmm15,xmm15 + pand xmm14,xmm8 + pxor xmm13,xmm0 + pxor xmm15,xmm14 + movdqa xmm14,xmm15 + psrad xmm9,31 + paddq xmm15,xmm15 + pand xmm9,xmm8 + pxor xmm14,xmm0 + pxor xmm15,xmm9 + movaps XMMWORD[96+rsp],xmm1 + + sub rdx,16*6 + jc NEAR $L$xts_enc_short + + mov eax,16+96 + lea rcx,[32+r10*1+r11] + sub rax,r10 + movups xmm1,XMMWORD[16+r11] + mov r10,rax + lea r8,[$L$xts_magic] + jmp NEAR $L$xts_enc_grandloop + +ALIGN 32 +$L$xts_enc_grandloop: + movdqu xmm2,XMMWORD[rdi] + movdqa xmm8,xmm0 + movdqu xmm3,XMMWORD[16+rdi] + pxor xmm2,xmm10 + movdqu xmm4,XMMWORD[32+rdi] + pxor xmm3,xmm11 +DB 102,15,56,220,209 + movdqu xmm5,XMMWORD[48+rdi] + pxor xmm4,xmm12 +DB 102,15,56,220,217 + movdqu xmm6,XMMWORD[64+rdi] + pxor xmm5,xmm13 +DB 102,15,56,220,225 + movdqu xmm7,XMMWORD[80+rdi] + pxor xmm8,xmm15 + movdqa xmm9,XMMWORD[96+rsp] + pxor xmm6,xmm14 +DB 102,15,56,220,233 + movups xmm0,XMMWORD[32+r11] + lea rdi,[96+rdi] + pxor xmm7,xmm8 + + pxor xmm10,xmm9 +DB 102,15,56,220,241 + pxor xmm11,xmm9 + movdqa XMMWORD[rsp],xmm10 +DB 102,15,56,220,249 + movups xmm1,XMMWORD[48+r11] + pxor xmm12,xmm9 + +DB 102,15,56,220,208 + pxor xmm13,xmm9 + movdqa XMMWORD[16+rsp],xmm11 +DB 102,15,56,220,216 + pxor xmm14,xmm9 + movdqa XMMWORD[32+rsp],xmm12 +DB 102,15,56,220,224 +DB 102,15,56,220,232 + pxor xmm8,xmm9 + movdqa XMMWORD[64+rsp],xmm14 +DB 102,15,56,220,240 +DB 102,15,56,220,248 + movups xmm0,XMMWORD[64+r11] + movdqa XMMWORD[80+rsp],xmm8 + pshufd xmm9,xmm15,0x5f + jmp NEAR $L$xts_enc_loop6 +ALIGN 32 +$L$xts_enc_loop6: +DB 102,15,56,220,209 +DB 102,15,56,220,217 +DB 102,15,56,220,225 +DB 102,15,56,220,233 +DB 102,15,56,220,241 +DB 102,15,56,220,249 + movups xmm1,XMMWORD[((-64))+rax*1+rcx] + add rax,32 + +DB 102,15,56,220,208 +DB 102,15,56,220,216 +DB 102,15,56,220,224 +DB 102,15,56,220,232 +DB 102,15,56,220,240 +DB 102,15,56,220,248 + movups xmm0,XMMWORD[((-80))+rax*1+rcx] + jnz NEAR $L$xts_enc_loop6 + + movdqa xmm8,XMMWORD[r8] + movdqa xmm14,xmm9 + paddd xmm9,xmm9 +DB 102,15,56,220,209 + paddq xmm15,xmm15 + psrad xmm14,31 +DB 102,15,56,220,217 + pand xmm14,xmm8 + movups xmm10,XMMWORD[r11] +DB 102,15,56,220,225 +DB 102,15,56,220,233 +DB 102,15,56,220,241 + pxor xmm15,xmm14 + movaps xmm11,xmm10 +DB 102,15,56,220,249 + movups xmm1,XMMWORD[((-64))+rcx] + + movdqa xmm14,xmm9 +DB 102,15,56,220,208 + paddd xmm9,xmm9 + pxor xmm10,xmm15 +DB 102,15,56,220,216 + psrad xmm14,31 + paddq xmm15,xmm15 +DB 102,15,56,220,224 +DB 102,15,56,220,232 + pand xmm14,xmm8 + movaps xmm12,xmm11 +DB 102,15,56,220,240 + pxor xmm15,xmm14 + movdqa xmm14,xmm9 +DB 102,15,56,220,248 + movups xmm0,XMMWORD[((-48))+rcx] + + paddd xmm9,xmm9 +DB 102,15,56,220,209 + pxor xmm11,xmm15 + psrad xmm14,31 +DB 102,15,56,220,217 + paddq xmm15,xmm15 + pand xmm14,xmm8 +DB 102,15,56,220,225 +DB 102,15,56,220,233 + movdqa XMMWORD[48+rsp],xmm13 + pxor xmm15,xmm14 +DB 102,15,56,220,241 + movaps xmm13,xmm12 + movdqa xmm14,xmm9 +DB 102,15,56,220,249 + movups xmm1,XMMWORD[((-32))+rcx] + + paddd xmm9,xmm9 +DB 102,15,56,220,208 + pxor xmm12,xmm15 + psrad xmm14,31 +DB 102,15,56,220,216 + paddq xmm15,xmm15 + pand xmm14,xmm8 +DB 102,15,56,220,224 +DB 102,15,56,220,232 +DB 102,15,56,220,240 + pxor xmm15,xmm14 + movaps xmm14,xmm13 +DB 102,15,56,220,248 + + movdqa xmm0,xmm9 + paddd xmm9,xmm9 +DB 102,15,56,220,209 + pxor xmm13,xmm15 + psrad xmm0,31 +DB 102,15,56,220,217 + paddq xmm15,xmm15 + pand xmm0,xmm8 +DB 102,15,56,220,225 +DB 102,15,56,220,233 + pxor xmm15,xmm0 + movups xmm0,XMMWORD[r11] +DB 102,15,56,220,241 +DB 102,15,56,220,249 + movups xmm1,XMMWORD[16+r11] + + pxor xmm14,xmm15 +DB 102,15,56,221,84,36,0 + psrad xmm9,31 + paddq xmm15,xmm15 +DB 102,15,56,221,92,36,16 +DB 102,15,56,221,100,36,32 + pand xmm9,xmm8 + mov rax,r10 +DB 102,15,56,221,108,36,48 +DB 102,15,56,221,116,36,64 +DB 102,15,56,221,124,36,80 + pxor xmm15,xmm9 + + lea rsi,[96+rsi] + movups XMMWORD[(-96)+rsi],xmm2 + movups XMMWORD[(-80)+rsi],xmm3 + movups XMMWORD[(-64)+rsi],xmm4 + movups XMMWORD[(-48)+rsi],xmm5 + movups XMMWORD[(-32)+rsi],xmm6 + movups XMMWORD[(-16)+rsi],xmm7 + sub rdx,16*6 + jnc NEAR $L$xts_enc_grandloop + + mov eax,16+96 + sub eax,r10d + mov rcx,r11 + shr eax,4 + +$L$xts_enc_short: + + mov r10d,eax + pxor xmm10,xmm0 + add rdx,16*6 + jz NEAR $L$xts_enc_done + + pxor xmm11,xmm0 + cmp rdx,0x20 + jb NEAR $L$xts_enc_one + pxor xmm12,xmm0 + je NEAR $L$xts_enc_two + + pxor xmm13,xmm0 + cmp rdx,0x40 + jb NEAR $L$xts_enc_three + pxor xmm14,xmm0 + je NEAR $L$xts_enc_four + + movdqu xmm2,XMMWORD[rdi] + movdqu xmm3,XMMWORD[16+rdi] + movdqu xmm4,XMMWORD[32+rdi] + pxor xmm2,xmm10 + movdqu xmm5,XMMWORD[48+rdi] + pxor xmm3,xmm11 + movdqu xmm6,XMMWORD[64+rdi] + lea rdi,[80+rdi] + pxor xmm4,xmm12 + pxor xmm5,xmm13 + pxor xmm6,xmm14 + pxor xmm7,xmm7 + + call _aesni_encrypt6 + + xorps xmm2,xmm10 + movdqa xmm10,xmm15 + xorps xmm3,xmm11 + xorps xmm4,xmm12 + movdqu XMMWORD[rsi],xmm2 + xorps xmm5,xmm13 + movdqu XMMWORD[16+rsi],xmm3 + xorps xmm6,xmm14 + movdqu XMMWORD[32+rsi],xmm4 + movdqu XMMWORD[48+rsi],xmm5 + movdqu XMMWORD[64+rsi],xmm6 + lea rsi,[80+rsi] + jmp NEAR $L$xts_enc_done + +ALIGN 16 +$L$xts_enc_one: + movups xmm2,XMMWORD[rdi] + lea rdi,[16+rdi] + xorps xmm2,xmm10 + movups xmm0,XMMWORD[rcx] + movups xmm1,XMMWORD[16+rcx] + lea rcx,[32+rcx] + xorps xmm2,xmm0 +$L$oop_enc1_9: +DB 102,15,56,220,209 + dec eax + movups xmm1,XMMWORD[rcx] + lea rcx,[16+rcx] + jnz NEAR $L$oop_enc1_9 +DB 102,15,56,221,209 + xorps xmm2,xmm10 + movdqa xmm10,xmm11 + movups XMMWORD[rsi],xmm2 + lea rsi,[16+rsi] + jmp NEAR $L$xts_enc_done + +ALIGN 16 +$L$xts_enc_two: + movups xmm2,XMMWORD[rdi] + movups xmm3,XMMWORD[16+rdi] + lea rdi,[32+rdi] + xorps xmm2,xmm10 + xorps xmm3,xmm11 + + call _aesni_encrypt2 + + xorps xmm2,xmm10 + movdqa xmm10,xmm12 + xorps xmm3,xmm11 + movups XMMWORD[rsi],xmm2 + movups XMMWORD[16+rsi],xmm3 + lea rsi,[32+rsi] + jmp NEAR $L$xts_enc_done + +ALIGN 16 +$L$xts_enc_three: + movups xmm2,XMMWORD[rdi] + movups xmm3,XMMWORD[16+rdi] + movups xmm4,XMMWORD[32+rdi] + lea rdi,[48+rdi] + xorps xmm2,xmm10 + xorps xmm3,xmm11 + xorps xmm4,xmm12 + + call _aesni_encrypt3 + + xorps xmm2,xmm10 + movdqa xmm10,xmm13 + xorps xmm3,xmm11 + xorps xmm4,xmm12 + movups XMMWORD[rsi],xmm2 + movups XMMWORD[16+rsi],xmm3 + movups XMMWORD[32+rsi],xmm4 + lea rsi,[48+rsi] + jmp NEAR $L$xts_enc_done + +ALIGN 16 +$L$xts_enc_four: + movups xmm2,XMMWORD[rdi] + movups xmm3,XMMWORD[16+rdi] + movups xmm4,XMMWORD[32+rdi] + xorps xmm2,xmm10 + movups xmm5,XMMWORD[48+rdi] + lea rdi,[64+rdi] + xorps xmm3,xmm11 + xorps xmm4,xmm12 + xorps xmm5,xmm13 + + call _aesni_encrypt4 + + pxor xmm2,xmm10 + movdqa xmm10,xmm14 + pxor xmm3,xmm11 + pxor xmm4,xmm12 + movdqu XMMWORD[rsi],xmm2 + pxor xmm5,xmm13 + movdqu XMMWORD[16+rsi],xmm3 + movdqu XMMWORD[32+rsi],xmm4 + movdqu XMMWORD[48+rsi],xmm5 + lea rsi,[64+rsi] + jmp NEAR $L$xts_enc_done + +ALIGN 16 +$L$xts_enc_done: + and r9,15 + jz NEAR $L$xts_enc_ret + mov rdx,r9 + +$L$xts_enc_steal: + movzx eax,BYTE[rdi] + movzx ecx,BYTE[((-16))+rsi] + lea rdi,[1+rdi] + mov BYTE[((-16))+rsi],al + mov BYTE[rsi],cl + lea rsi,[1+rsi] + sub rdx,1 + jnz NEAR $L$xts_enc_steal + + sub rsi,r9 + mov rcx,r11 + mov eax,r10d + + movups xmm2,XMMWORD[((-16))+rsi] + xorps xmm2,xmm10 + movups xmm0,XMMWORD[rcx] + movups xmm1,XMMWORD[16+rcx] + lea rcx,[32+rcx] + xorps xmm2,xmm0 +$L$oop_enc1_10: +DB 102,15,56,220,209 + dec eax + movups xmm1,XMMWORD[rcx] + lea rcx,[16+rcx] + jnz NEAR $L$oop_enc1_10 +DB 102,15,56,221,209 + xorps xmm2,xmm10 + movups XMMWORD[(-16)+rsi],xmm2 + +$L$xts_enc_ret: + xorps xmm0,xmm0 + pxor xmm1,xmm1 + pxor xmm2,xmm2 + pxor xmm3,xmm3 + pxor xmm4,xmm4 + pxor xmm5,xmm5 + movaps xmm6,XMMWORD[((-160))+rbp] + movaps XMMWORD[(-160)+rbp],xmm0 + movaps xmm7,XMMWORD[((-144))+rbp] + movaps XMMWORD[(-144)+rbp],xmm0 + movaps xmm8,XMMWORD[((-128))+rbp] + movaps XMMWORD[(-128)+rbp],xmm0 + movaps xmm9,XMMWORD[((-112))+rbp] + movaps XMMWORD[(-112)+rbp],xmm0 + movaps xmm10,XMMWORD[((-96))+rbp] + movaps XMMWORD[(-96)+rbp],xmm0 + movaps xmm11,XMMWORD[((-80))+rbp] + movaps XMMWORD[(-80)+rbp],xmm0 + movaps xmm12,XMMWORD[((-64))+rbp] + movaps XMMWORD[(-64)+rbp],xmm0 + movaps xmm13,XMMWORD[((-48))+rbp] + movaps XMMWORD[(-48)+rbp],xmm0 + movaps xmm14,XMMWORD[((-32))+rbp] + movaps XMMWORD[(-32)+rbp],xmm0 + movaps xmm15,XMMWORD[((-16))+rbp] + movaps XMMWORD[(-16)+rbp],xmm0 + movaps XMMWORD[rsp],xmm0 + movaps XMMWORD[16+rsp],xmm0 + movaps XMMWORD[32+rsp],xmm0 + movaps XMMWORD[48+rsp],xmm0 + movaps XMMWORD[64+rsp],xmm0 + movaps XMMWORD[80+rsp],xmm0 + movaps XMMWORD[96+rsp],xmm0 + lea rsp,[rbp] + pop rbp +$L$xts_enc_epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_aesni_xts_encrypt: +global aesni_xts_decrypt + +ALIGN 16 +aesni_xts_decrypt: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_aesni_xts_decrypt: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD[40+rsp] + mov r9,QWORD[48+rsp] + + + lea rax,[rsp] + push rbp + sub rsp,272 + and rsp,-16 + movaps XMMWORD[(-168)+rax],xmm6 + movaps XMMWORD[(-152)+rax],xmm7 + movaps XMMWORD[(-136)+rax],xmm8 + movaps XMMWORD[(-120)+rax],xmm9 + movaps XMMWORD[(-104)+rax],xmm10 + movaps XMMWORD[(-88)+rax],xmm11 + movaps XMMWORD[(-72)+rax],xmm12 + movaps XMMWORD[(-56)+rax],xmm13 + movaps XMMWORD[(-40)+rax],xmm14 + movaps XMMWORD[(-24)+rax],xmm15 +$L$xts_dec_body: + lea rbp,[((-8))+rax] + movups xmm2,XMMWORD[r9] + mov eax,DWORD[240+r8] + mov r10d,DWORD[240+rcx] + movups xmm0,XMMWORD[r8] + movups xmm1,XMMWORD[16+r8] + lea r8,[32+r8] + xorps xmm2,xmm0 +$L$oop_enc1_11: +DB 102,15,56,220,209 + dec eax + movups xmm1,XMMWORD[r8] + lea r8,[16+r8] + jnz NEAR $L$oop_enc1_11 +DB 102,15,56,221,209 + xor eax,eax + test rdx,15 + setnz al + shl rax,4 + sub rdx,rax + + movups xmm0,XMMWORD[rcx] + mov r11,rcx + mov eax,r10d + shl r10d,4 + mov r9,rdx + and rdx,-16 + + movups xmm1,XMMWORD[16+r10*1+rcx] + + movdqa xmm8,XMMWORD[$L$xts_magic] + movdqa xmm15,xmm2 + pshufd xmm9,xmm2,0x5f + pxor xmm1,xmm0 + movdqa xmm14,xmm9 + paddd xmm9,xmm9 + movdqa xmm10,xmm15 + psrad xmm14,31 + paddq xmm15,xmm15 + pand xmm14,xmm8 + pxor xmm10,xmm0 + pxor xmm15,xmm14 + movdqa xmm14,xmm9 + paddd xmm9,xmm9 + movdqa xmm11,xmm15 + psrad xmm14,31 + paddq xmm15,xmm15 + pand xmm14,xmm8 + pxor xmm11,xmm0 + pxor xmm15,xmm14 + movdqa xmm14,xmm9 + paddd xmm9,xmm9 + movdqa xmm12,xmm15 + psrad xmm14,31 + paddq xmm15,xmm15 + pand xmm14,xmm8 + pxor xmm12,xmm0 + pxor xmm15,xmm14 + movdqa xmm14,xmm9 + paddd xmm9,xmm9 + movdqa xmm13,xmm15 + psrad xmm14,31 + paddq xmm15,xmm15 + pand xmm14,xmm8 + pxor xmm13,xmm0 + pxor xmm15,xmm14 + movdqa xmm14,xmm15 + psrad xmm9,31 + paddq xmm15,xmm15 + pand xmm9,xmm8 + pxor xmm14,xmm0 + pxor xmm15,xmm9 + movaps XMMWORD[96+rsp],xmm1 + + sub rdx,16*6 + jc NEAR $L$xts_dec_short + + mov eax,16+96 + lea rcx,[32+r10*1+r11] + sub rax,r10 + movups xmm1,XMMWORD[16+r11] + mov r10,rax + lea r8,[$L$xts_magic] + jmp NEAR $L$xts_dec_grandloop + +ALIGN 32 +$L$xts_dec_grandloop: + movdqu xmm2,XMMWORD[rdi] + movdqa xmm8,xmm0 + movdqu xmm3,XMMWORD[16+rdi] + pxor xmm2,xmm10 + movdqu xmm4,XMMWORD[32+rdi] + pxor xmm3,xmm11 +DB 102,15,56,222,209 + movdqu xmm5,XMMWORD[48+rdi] + pxor xmm4,xmm12 +DB 102,15,56,222,217 + movdqu xmm6,XMMWORD[64+rdi] + pxor xmm5,xmm13 +DB 102,15,56,222,225 + movdqu xmm7,XMMWORD[80+rdi] + pxor xmm8,xmm15 + movdqa xmm9,XMMWORD[96+rsp] + pxor xmm6,xmm14 +DB 102,15,56,222,233 + movups xmm0,XMMWORD[32+r11] + lea rdi,[96+rdi] + pxor xmm7,xmm8 + + pxor xmm10,xmm9 +DB 102,15,56,222,241 + pxor xmm11,xmm9 + movdqa XMMWORD[rsp],xmm10 +DB 102,15,56,222,249 + movups xmm1,XMMWORD[48+r11] + pxor xmm12,xmm9 + +DB 102,15,56,222,208 + pxor xmm13,xmm9 + movdqa XMMWORD[16+rsp],xmm11 +DB 102,15,56,222,216 + pxor xmm14,xmm9 + movdqa XMMWORD[32+rsp],xmm12 +DB 102,15,56,222,224 +DB 102,15,56,222,232 + pxor xmm8,xmm9 + movdqa XMMWORD[64+rsp],xmm14 +DB 102,15,56,222,240 +DB 102,15,56,222,248 + movups xmm0,XMMWORD[64+r11] + movdqa XMMWORD[80+rsp],xmm8 + pshufd xmm9,xmm15,0x5f + jmp NEAR $L$xts_dec_loop6 +ALIGN 32 +$L$xts_dec_loop6: +DB 102,15,56,222,209 +DB 102,15,56,222,217 +DB 102,15,56,222,225 +DB 102,15,56,222,233 +DB 102,15,56,222,241 +DB 102,15,56,222,249 + movups xmm1,XMMWORD[((-64))+rax*1+rcx] + add rax,32 + +DB 102,15,56,222,208 +DB 102,15,56,222,216 +DB 102,15,56,222,224 +DB 102,15,56,222,232 +DB 102,15,56,222,240 +DB 102,15,56,222,248 + movups xmm0,XMMWORD[((-80))+rax*1+rcx] + jnz NEAR $L$xts_dec_loop6 + + movdqa xmm8,XMMWORD[r8] + movdqa xmm14,xmm9 + paddd xmm9,xmm9 +DB 102,15,56,222,209 + paddq xmm15,xmm15 + psrad xmm14,31 +DB 102,15,56,222,217 + pand xmm14,xmm8 + movups xmm10,XMMWORD[r11] +DB 102,15,56,222,225 +DB 102,15,56,222,233 +DB 102,15,56,222,241 + pxor xmm15,xmm14 + movaps xmm11,xmm10 +DB 102,15,56,222,249 + movups xmm1,XMMWORD[((-64))+rcx] + + movdqa xmm14,xmm9 +DB 102,15,56,222,208 + paddd xmm9,xmm9 + pxor xmm10,xmm15 +DB 102,15,56,222,216 + psrad xmm14,31 + paddq xmm15,xmm15 +DB 102,15,56,222,224 +DB 102,15,56,222,232 + pand xmm14,xmm8 + movaps xmm12,xmm11 +DB 102,15,56,222,240 + pxor xmm15,xmm14 + movdqa xmm14,xmm9 +DB 102,15,56,222,248 + movups xmm0,XMMWORD[((-48))+rcx] + + paddd xmm9,xmm9 +DB 102,15,56,222,209 + pxor xmm11,xmm15 + psrad xmm14,31 +DB 102,15,56,222,217 + paddq xmm15,xmm15 + pand xmm14,xmm8 +DB 102,15,56,222,225 +DB 102,15,56,222,233 + movdqa XMMWORD[48+rsp],xmm13 + pxor xmm15,xmm14 +DB 102,15,56,222,241 + movaps xmm13,xmm12 + movdqa xmm14,xmm9 +DB 102,15,56,222,249 + movups xmm1,XMMWORD[((-32))+rcx] + + paddd xmm9,xmm9 +DB 102,15,56,222,208 + pxor xmm12,xmm15 + psrad xmm14,31 +DB 102,15,56,222,216 + paddq xmm15,xmm15 + pand xmm14,xmm8 +DB 102,15,56,222,224 +DB 102,15,56,222,232 +DB 102,15,56,222,240 + pxor xmm15,xmm14 + movaps xmm14,xmm13 +DB 102,15,56,222,248 + + movdqa xmm0,xmm9 + paddd xmm9,xmm9 +DB 102,15,56,222,209 + pxor xmm13,xmm15 + psrad xmm0,31 +DB 102,15,56,222,217 + paddq xmm15,xmm15 + pand xmm0,xmm8 +DB 102,15,56,222,225 +DB 102,15,56,222,233 + pxor xmm15,xmm0 + movups xmm0,XMMWORD[r11] +DB 102,15,56,222,241 +DB 102,15,56,222,249 + movups xmm1,XMMWORD[16+r11] + + pxor xmm14,xmm15 +DB 102,15,56,223,84,36,0 + psrad xmm9,31 + paddq xmm15,xmm15 +DB 102,15,56,223,92,36,16 +DB 102,15,56,223,100,36,32 + pand xmm9,xmm8 + mov rax,r10 +DB 102,15,56,223,108,36,48 +DB 102,15,56,223,116,36,64 +DB 102,15,56,223,124,36,80 + pxor xmm15,xmm9 + + lea rsi,[96+rsi] + movups XMMWORD[(-96)+rsi],xmm2 + movups XMMWORD[(-80)+rsi],xmm3 + movups XMMWORD[(-64)+rsi],xmm4 + movups XMMWORD[(-48)+rsi],xmm5 + movups XMMWORD[(-32)+rsi],xmm6 + movups XMMWORD[(-16)+rsi],xmm7 + sub rdx,16*6 + jnc NEAR $L$xts_dec_grandloop + + mov eax,16+96 + sub eax,r10d + mov rcx,r11 + shr eax,4 + +$L$xts_dec_short: + + mov r10d,eax + pxor xmm10,xmm0 + pxor xmm11,xmm0 + add rdx,16*6 + jz NEAR $L$xts_dec_done + + pxor xmm12,xmm0 + cmp rdx,0x20 + jb NEAR $L$xts_dec_one + pxor xmm13,xmm0 + je NEAR $L$xts_dec_two + + pxor xmm14,xmm0 + cmp rdx,0x40 + jb NEAR $L$xts_dec_three + je NEAR $L$xts_dec_four + + movdqu xmm2,XMMWORD[rdi] + movdqu xmm3,XMMWORD[16+rdi] + movdqu xmm4,XMMWORD[32+rdi] + pxor xmm2,xmm10 + movdqu xmm5,XMMWORD[48+rdi] + pxor xmm3,xmm11 + movdqu xmm6,XMMWORD[64+rdi] + lea rdi,[80+rdi] + pxor xmm4,xmm12 + pxor xmm5,xmm13 + pxor xmm6,xmm14 + + call _aesni_decrypt6 + + xorps xmm2,xmm10 + xorps xmm3,xmm11 + xorps xmm4,xmm12 + movdqu XMMWORD[rsi],xmm2 + xorps xmm5,xmm13 + movdqu XMMWORD[16+rsi],xmm3 + xorps xmm6,xmm14 + movdqu XMMWORD[32+rsi],xmm4 + pxor xmm14,xmm14 + movdqu XMMWORD[48+rsi],xmm5 + pcmpgtd xmm14,xmm15 + movdqu XMMWORD[64+rsi],xmm6 + lea rsi,[80+rsi] + pshufd xmm11,xmm14,0x13 + and r9,15 + jz NEAR $L$xts_dec_ret + + movdqa xmm10,xmm15 + paddq xmm15,xmm15 + pand xmm11,xmm8 + pxor xmm11,xmm15 + jmp NEAR $L$xts_dec_done2 + +ALIGN 16 +$L$xts_dec_one: + movups xmm2,XMMWORD[rdi] + lea rdi,[16+rdi] + xorps xmm2,xmm10 + movups xmm0,XMMWORD[rcx] + movups xmm1,XMMWORD[16+rcx] + lea rcx,[32+rcx] + xorps xmm2,xmm0 +$L$oop_dec1_12: +DB 102,15,56,222,209 + dec eax + movups xmm1,XMMWORD[rcx] + lea rcx,[16+rcx] + jnz NEAR $L$oop_dec1_12 +DB 102,15,56,223,209 + xorps xmm2,xmm10 + movdqa xmm10,xmm11 + movups XMMWORD[rsi],xmm2 + movdqa xmm11,xmm12 + lea rsi,[16+rsi] + jmp NEAR $L$xts_dec_done + +ALIGN 16 +$L$xts_dec_two: + movups xmm2,XMMWORD[rdi] + movups xmm3,XMMWORD[16+rdi] + lea rdi,[32+rdi] + xorps xmm2,xmm10 + xorps xmm3,xmm11 + + call _aesni_decrypt2 + + xorps xmm2,xmm10 + movdqa xmm10,xmm12 + xorps xmm3,xmm11 + movdqa xmm11,xmm13 + movups XMMWORD[rsi],xmm2 + movups XMMWORD[16+rsi],xmm3 + lea rsi,[32+rsi] + jmp NEAR $L$xts_dec_done + +ALIGN 16 +$L$xts_dec_three: + movups xmm2,XMMWORD[rdi] + movups xmm3,XMMWORD[16+rdi] + movups xmm4,XMMWORD[32+rdi] + lea rdi,[48+rdi] + xorps xmm2,xmm10 + xorps xmm3,xmm11 + xorps xmm4,xmm12 + + call _aesni_decrypt3 + + xorps xmm2,xmm10 + movdqa xmm10,xmm13 + xorps xmm3,xmm11 + movdqa xmm11,xmm14 + xorps xmm4,xmm12 + movups XMMWORD[rsi],xmm2 + movups XMMWORD[16+rsi],xmm3 + movups XMMWORD[32+rsi],xmm4 + lea rsi,[48+rsi] + jmp NEAR $L$xts_dec_done + +ALIGN 16 +$L$xts_dec_four: + movups xmm2,XMMWORD[rdi] + movups xmm3,XMMWORD[16+rdi] + movups xmm4,XMMWORD[32+rdi] + xorps xmm2,xmm10 + movups xmm5,XMMWORD[48+rdi] + lea rdi,[64+rdi] + xorps xmm3,xmm11 + xorps xmm4,xmm12 + xorps xmm5,xmm13 + + call _aesni_decrypt4 + + pxor xmm2,xmm10 + movdqa xmm10,xmm14 + pxor xmm3,xmm11 + movdqa xmm11,xmm15 + pxor xmm4,xmm12 + movdqu XMMWORD[rsi],xmm2 + pxor xmm5,xmm13 + movdqu XMMWORD[16+rsi],xmm3 + movdqu XMMWORD[32+rsi],xmm4 + movdqu XMMWORD[48+rsi],xmm5 + lea rsi,[64+rsi] + jmp NEAR $L$xts_dec_done + +ALIGN 16 +$L$xts_dec_done: + and r9,15 + jz NEAR $L$xts_dec_ret +$L$xts_dec_done2: + mov rdx,r9 + mov rcx,r11 + mov eax,r10d + + movups xmm2,XMMWORD[rdi] + xorps xmm2,xmm11 + movups xmm0,XMMWORD[rcx] + movups xmm1,XMMWORD[16+rcx] + lea rcx,[32+rcx] + xorps xmm2,xmm0 +$L$oop_dec1_13: +DB 102,15,56,222,209 + dec eax + movups xmm1,XMMWORD[rcx] + lea rcx,[16+rcx] + jnz NEAR $L$oop_dec1_13 +DB 102,15,56,223,209 + xorps xmm2,xmm11 + movups XMMWORD[rsi],xmm2 + +$L$xts_dec_steal: + movzx eax,BYTE[16+rdi] + movzx ecx,BYTE[rsi] + lea rdi,[1+rdi] + mov BYTE[rsi],al + mov BYTE[16+rsi],cl + lea rsi,[1+rsi] + sub rdx,1 + jnz NEAR $L$xts_dec_steal + + sub rsi,r9 + mov rcx,r11 + mov eax,r10d + + movups xmm2,XMMWORD[rsi] + xorps xmm2,xmm10 + movups xmm0,XMMWORD[rcx] + movups xmm1,XMMWORD[16+rcx] + lea rcx,[32+rcx] + xorps xmm2,xmm0 +$L$oop_dec1_14: +DB 102,15,56,222,209 + dec eax + movups xmm1,XMMWORD[rcx] + lea rcx,[16+rcx] + jnz NEAR $L$oop_dec1_14 +DB 102,15,56,223,209 + xorps xmm2,xmm10 + movups XMMWORD[rsi],xmm2 + +$L$xts_dec_ret: + xorps xmm0,xmm0 + pxor xmm1,xmm1 + pxor xmm2,xmm2 + pxor xmm3,xmm3 + pxor xmm4,xmm4 + pxor xmm5,xmm5 + movaps xmm6,XMMWORD[((-160))+rbp] + movaps XMMWORD[(-160)+rbp],xmm0 + movaps xmm7,XMMWORD[((-144))+rbp] + movaps XMMWORD[(-144)+rbp],xmm0 + movaps xmm8,XMMWORD[((-128))+rbp] + movaps XMMWORD[(-128)+rbp],xmm0 + movaps xmm9,XMMWORD[((-112))+rbp] + movaps XMMWORD[(-112)+rbp],xmm0 + movaps xmm10,XMMWORD[((-96))+rbp] + movaps XMMWORD[(-96)+rbp],xmm0 + movaps xmm11,XMMWORD[((-80))+rbp] + movaps XMMWORD[(-80)+rbp],xmm0 + movaps xmm12,XMMWORD[((-64))+rbp] + movaps XMMWORD[(-64)+rbp],xmm0 + movaps xmm13,XMMWORD[((-48))+rbp] + movaps XMMWORD[(-48)+rbp],xmm0 + movaps xmm14,XMMWORD[((-32))+rbp] + movaps XMMWORD[(-32)+rbp],xmm0 + movaps xmm15,XMMWORD[((-16))+rbp] + movaps XMMWORD[(-16)+rbp],xmm0 + movaps XMMWORD[rsp],xmm0 + movaps XMMWORD[16+rsp],xmm0 + movaps XMMWORD[32+rsp],xmm0 + movaps XMMWORD[48+rsp],xmm0 + movaps XMMWORD[64+rsp],xmm0 + movaps XMMWORD[80+rsp],xmm0 + movaps XMMWORD[96+rsp],xmm0 + lea rsp,[rbp] + pop rbp +$L$xts_dec_epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_aesni_xts_decrypt: +global aesni_cbc_encrypt + +ALIGN 16 +aesni_cbc_encrypt: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_aesni_cbc_encrypt: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD[40+rsp] + mov r9,QWORD[48+rsp] + + + test rdx,rdx + jz NEAR $L$cbc_ret + + mov r10d,DWORD[240+rcx] + mov r11,rcx + test r9d,r9d + jz NEAR $L$cbc_decrypt + + movups xmm2,XMMWORD[r8] + mov eax,r10d + cmp rdx,16 + jb NEAR $L$cbc_enc_tail + sub rdx,16 + jmp NEAR $L$cbc_enc_loop +ALIGN 16 +$L$cbc_enc_loop: + movups xmm3,XMMWORD[rdi] + lea rdi,[16+rdi] + + movups xmm0,XMMWORD[rcx] + movups xmm1,XMMWORD[16+rcx] + xorps xmm3,xmm0 + lea rcx,[32+rcx] + xorps xmm2,xmm3 +$L$oop_enc1_15: +DB 102,15,56,220,209 + dec eax + movups xmm1,XMMWORD[rcx] + lea rcx,[16+rcx] + jnz NEAR $L$oop_enc1_15 +DB 102,15,56,221,209 + mov eax,r10d + mov rcx,r11 + movups XMMWORD[rsi],xmm2 + lea rsi,[16+rsi] + sub rdx,16 + jnc NEAR $L$cbc_enc_loop + add rdx,16 + jnz NEAR $L$cbc_enc_tail + pxor xmm0,xmm0 + pxor xmm1,xmm1 + movups XMMWORD[r8],xmm2 + pxor xmm2,xmm2 + pxor xmm3,xmm3 + jmp NEAR $L$cbc_ret + +$L$cbc_enc_tail: + mov rcx,rdx + xchg rsi,rdi + DD 0x9066A4F3 + mov ecx,16 + sub rcx,rdx + xor eax,eax + DD 0x9066AAF3 + lea rdi,[((-16))+rdi] + mov eax,r10d + mov rsi,rdi + mov rcx,r11 + xor rdx,rdx + jmp NEAR $L$cbc_enc_loop + +ALIGN 16 +$L$cbc_decrypt: + cmp rdx,16 + jne NEAR $L$cbc_decrypt_bulk + + + + movdqu xmm2,XMMWORD[rdi] + movdqu xmm3,XMMWORD[r8] + movdqa xmm4,xmm2 + movups xmm0,XMMWORD[rcx] + movups xmm1,XMMWORD[16+rcx] + lea rcx,[32+rcx] + xorps xmm2,xmm0 +$L$oop_dec1_16: +DB 102,15,56,222,209 + dec r10d + movups xmm1,XMMWORD[rcx] + lea rcx,[16+rcx] + jnz NEAR $L$oop_dec1_16 +DB 102,15,56,223,209 + pxor xmm0,xmm0 + pxor xmm1,xmm1 + movdqu XMMWORD[r8],xmm4 + xorps xmm2,xmm3 + pxor xmm3,xmm3 + movups XMMWORD[rsi],xmm2 + pxor xmm2,xmm2 + jmp NEAR $L$cbc_ret +ALIGN 16 +$L$cbc_decrypt_bulk: + lea rax,[rsp] + push rbp + sub rsp,176 + and rsp,-16 + movaps XMMWORD[16+rsp],xmm6 + movaps XMMWORD[32+rsp],xmm7 + movaps XMMWORD[48+rsp],xmm8 + movaps XMMWORD[64+rsp],xmm9 + movaps XMMWORD[80+rsp],xmm10 + movaps XMMWORD[96+rsp],xmm11 + movaps XMMWORD[112+rsp],xmm12 + movaps XMMWORD[128+rsp],xmm13 + movaps XMMWORD[144+rsp],xmm14 + movaps XMMWORD[160+rsp],xmm15 +$L$cbc_decrypt_body: + lea rbp,[((-8))+rax] + movups xmm10,XMMWORD[r8] + mov eax,r10d + cmp rdx,0x50 + jbe NEAR $L$cbc_dec_tail + + movups xmm0,XMMWORD[rcx] + movdqu xmm2,XMMWORD[rdi] + movdqu xmm3,XMMWORD[16+rdi] + movdqa xmm11,xmm2 + movdqu xmm4,XMMWORD[32+rdi] + movdqa xmm12,xmm3 + movdqu xmm5,XMMWORD[48+rdi] + movdqa xmm13,xmm4 + movdqu xmm6,XMMWORD[64+rdi] + movdqa xmm14,xmm5 + movdqu xmm7,XMMWORD[80+rdi] + movdqa xmm15,xmm6 + mov r9d,DWORD[((OPENSSL_ia32cap_P+4))] + cmp rdx,0x70 + jbe NEAR $L$cbc_dec_six_or_seven + + and r9d,71303168 + sub rdx,0x50 + cmp r9d,4194304 + je NEAR $L$cbc_dec_loop6_enter + sub rdx,0x20 + lea rcx,[112+rcx] + jmp NEAR $L$cbc_dec_loop8_enter +ALIGN 16 +$L$cbc_dec_loop8: + movups XMMWORD[rsi],xmm9 + lea rsi,[16+rsi] +$L$cbc_dec_loop8_enter: + movdqu xmm8,XMMWORD[96+rdi] + pxor xmm2,xmm0 + movdqu xmm9,XMMWORD[112+rdi] + pxor xmm3,xmm0 + movups xmm1,XMMWORD[((16-112))+rcx] + pxor xmm4,xmm0 + xor r11,r11 + cmp rdx,0x70 + pxor xmm5,xmm0 + pxor xmm6,xmm0 + pxor xmm7,xmm0 + pxor xmm8,xmm0 + +DB 102,15,56,222,209 + pxor xmm9,xmm0 + movups xmm0,XMMWORD[((32-112))+rcx] +DB 102,15,56,222,217 +DB 102,15,56,222,225 +DB 102,15,56,222,233 +DB 102,15,56,222,241 +DB 102,15,56,222,249 +DB 102,68,15,56,222,193 + setnc r11b + shl r11,7 +DB 102,68,15,56,222,201 + add r11,rdi + movups xmm1,XMMWORD[((48-112))+rcx] +DB 102,15,56,222,208 +DB 102,15,56,222,216 +DB 102,15,56,222,224 +DB 102,15,56,222,232 +DB 102,15,56,222,240 +DB 102,15,56,222,248 +DB 102,68,15,56,222,192 +DB 102,68,15,56,222,200 + movups xmm0,XMMWORD[((64-112))+rcx] + nop +DB 102,15,56,222,209 +DB 102,15,56,222,217 +DB 102,15,56,222,225 +DB 102,15,56,222,233 +DB 102,15,56,222,241 +DB 102,15,56,222,249 +DB 102,68,15,56,222,193 +DB 102,68,15,56,222,201 + movups xmm1,XMMWORD[((80-112))+rcx] + nop +DB 102,15,56,222,208 +DB 102,15,56,222,216 +DB 102,15,56,222,224 +DB 102,15,56,222,232 +DB 102,15,56,222,240 +DB 102,15,56,222,248 +DB 102,68,15,56,222,192 +DB 102,68,15,56,222,200 + movups xmm0,XMMWORD[((96-112))+rcx] + nop +DB 102,15,56,222,209 +DB 102,15,56,222,217 +DB 102,15,56,222,225 +DB 102,15,56,222,233 +DB 102,15,56,222,241 +DB 102,15,56,222,249 +DB 102,68,15,56,222,193 +DB 102,68,15,56,222,201 + movups xmm1,XMMWORD[((112-112))+rcx] + nop +DB 102,15,56,222,208 +DB 102,15,56,222,216 +DB 102,15,56,222,224 +DB 102,15,56,222,232 +DB 102,15,56,222,240 +DB 102,15,56,222,248 +DB 102,68,15,56,222,192 +DB 102,68,15,56,222,200 + movups xmm0,XMMWORD[((128-112))+rcx] + nop +DB 102,15,56,222,209 +DB 102,15,56,222,217 +DB 102,15,56,222,225 +DB 102,15,56,222,233 +DB 102,15,56,222,241 +DB 102,15,56,222,249 +DB 102,68,15,56,222,193 +DB 102,68,15,56,222,201 + movups xmm1,XMMWORD[((144-112))+rcx] + cmp eax,11 +DB 102,15,56,222,208 +DB 102,15,56,222,216 +DB 102,15,56,222,224 +DB 102,15,56,222,232 +DB 102,15,56,222,240 +DB 102,15,56,222,248 +DB 102,68,15,56,222,192 +DB 102,68,15,56,222,200 + movups xmm0,XMMWORD[((160-112))+rcx] + jb NEAR $L$cbc_dec_done +DB 102,15,56,222,209 +DB 102,15,56,222,217 +DB 102,15,56,222,225 +DB 102,15,56,222,233 +DB 102,15,56,222,241 +DB 102,15,56,222,249 +DB 102,68,15,56,222,193 +DB 102,68,15,56,222,201 + movups xmm1,XMMWORD[((176-112))+rcx] + nop +DB 102,15,56,222,208 +DB 102,15,56,222,216 +DB 102,15,56,222,224 +DB 102,15,56,222,232 +DB 102,15,56,222,240 +DB 102,15,56,222,248 +DB 102,68,15,56,222,192 +DB 102,68,15,56,222,200 + movups xmm0,XMMWORD[((192-112))+rcx] + je NEAR $L$cbc_dec_done +DB 102,15,56,222,209 +DB 102,15,56,222,217 +DB 102,15,56,222,225 +DB 102,15,56,222,233 +DB 102,15,56,222,241 +DB 102,15,56,222,249 +DB 102,68,15,56,222,193 +DB 102,68,15,56,222,201 + movups xmm1,XMMWORD[((208-112))+rcx] + nop +DB 102,15,56,222,208 +DB 102,15,56,222,216 +DB 102,15,56,222,224 +DB 102,15,56,222,232 +DB 102,15,56,222,240 +DB 102,15,56,222,248 +DB 102,68,15,56,222,192 +DB 102,68,15,56,222,200 + movups xmm0,XMMWORD[((224-112))+rcx] + jmp NEAR $L$cbc_dec_done +ALIGN 16 +$L$cbc_dec_done: +DB 102,15,56,222,209 +DB 102,15,56,222,217 + pxor xmm10,xmm0 + pxor xmm11,xmm0 +DB 102,15,56,222,225 +DB 102,15,56,222,233 + pxor xmm12,xmm0 + pxor xmm13,xmm0 +DB 102,15,56,222,241 +DB 102,15,56,222,249 + pxor xmm14,xmm0 + pxor xmm15,xmm0 +DB 102,68,15,56,222,193 +DB 102,68,15,56,222,201 + movdqu xmm1,XMMWORD[80+rdi] + +DB 102,65,15,56,223,210 + movdqu xmm10,XMMWORD[96+rdi] + pxor xmm1,xmm0 +DB 102,65,15,56,223,219 + pxor xmm10,xmm0 + movdqu xmm0,XMMWORD[112+rdi] +DB 102,65,15,56,223,228 + lea rdi,[128+rdi] + movdqu xmm11,XMMWORD[r11] +DB 102,65,15,56,223,237 +DB 102,65,15,56,223,246 + movdqu xmm12,XMMWORD[16+r11] + movdqu xmm13,XMMWORD[32+r11] +DB 102,65,15,56,223,255 +DB 102,68,15,56,223,193 + movdqu xmm14,XMMWORD[48+r11] + movdqu xmm15,XMMWORD[64+r11] +DB 102,69,15,56,223,202 + movdqa xmm10,xmm0 + movdqu xmm1,XMMWORD[80+r11] + movups xmm0,XMMWORD[((-112))+rcx] + + movups XMMWORD[rsi],xmm2 + movdqa xmm2,xmm11 + movups XMMWORD[16+rsi],xmm3 + movdqa xmm3,xmm12 + movups XMMWORD[32+rsi],xmm4 + movdqa xmm4,xmm13 + movups XMMWORD[48+rsi],xmm5 + movdqa xmm5,xmm14 + movups XMMWORD[64+rsi],xmm6 + movdqa xmm6,xmm15 + movups XMMWORD[80+rsi],xmm7 + movdqa xmm7,xmm1 + movups XMMWORD[96+rsi],xmm8 + lea rsi,[112+rsi] + + sub rdx,0x80 + ja NEAR $L$cbc_dec_loop8 + + movaps xmm2,xmm9 + lea rcx,[((-112))+rcx] + add rdx,0x70 + jle NEAR $L$cbc_dec_clear_tail_collected + movups XMMWORD[rsi],xmm9 + lea rsi,[16+rsi] + cmp rdx,0x50 + jbe NEAR $L$cbc_dec_tail + + movaps xmm2,xmm11 +$L$cbc_dec_six_or_seven: + cmp rdx,0x60 + ja NEAR $L$cbc_dec_seven + + movaps xmm8,xmm7 + call _aesni_decrypt6 + pxor xmm2,xmm10 + movaps xmm10,xmm8 + pxor xmm3,xmm11 + movdqu XMMWORD[rsi],xmm2 + pxor xmm4,xmm12 + movdqu XMMWORD[16+rsi],xmm3 + pxor xmm3,xmm3 + pxor xmm5,xmm13 + movdqu XMMWORD[32+rsi],xmm4 + pxor xmm4,xmm4 + pxor xmm6,xmm14 + movdqu XMMWORD[48+rsi],xmm5 + pxor xmm5,xmm5 + pxor xmm7,xmm15 + movdqu XMMWORD[64+rsi],xmm6 + pxor xmm6,xmm6 + lea rsi,[80+rsi] + movdqa xmm2,xmm7 + pxor xmm7,xmm7 + jmp NEAR $L$cbc_dec_tail_collected + +ALIGN 16 +$L$cbc_dec_seven: + movups xmm8,XMMWORD[96+rdi] + xorps xmm9,xmm9 + call _aesni_decrypt8 + movups xmm9,XMMWORD[80+rdi] + pxor xmm2,xmm10 + movups xmm10,XMMWORD[96+rdi] + pxor xmm3,xmm11 + movdqu XMMWORD[rsi],xmm2 + pxor xmm4,xmm12 + movdqu XMMWORD[16+rsi],xmm3 + pxor xmm3,xmm3 + pxor xmm5,xmm13 + movdqu XMMWORD[32+rsi],xmm4 + pxor xmm4,xmm4 + pxor xmm6,xmm14 + movdqu XMMWORD[48+rsi],xmm5 + pxor xmm5,xmm5 + pxor xmm7,xmm15 + movdqu XMMWORD[64+rsi],xmm6 + pxor xmm6,xmm6 + pxor xmm8,xmm9 + movdqu XMMWORD[80+rsi],xmm7 + pxor xmm7,xmm7 + lea rsi,[96+rsi] + movdqa xmm2,xmm8 + pxor xmm8,xmm8 + pxor xmm9,xmm9 + jmp NEAR $L$cbc_dec_tail_collected + +ALIGN 16 +$L$cbc_dec_loop6: + movups XMMWORD[rsi],xmm7 + lea rsi,[16+rsi] + movdqu xmm2,XMMWORD[rdi] + movdqu xmm3,XMMWORD[16+rdi] + movdqa xmm11,xmm2 + movdqu xmm4,XMMWORD[32+rdi] + movdqa xmm12,xmm3 + movdqu xmm5,XMMWORD[48+rdi] + movdqa xmm13,xmm4 + movdqu xmm6,XMMWORD[64+rdi] + movdqa xmm14,xmm5 + movdqu xmm7,XMMWORD[80+rdi] + movdqa xmm15,xmm6 +$L$cbc_dec_loop6_enter: + lea rdi,[96+rdi] + movdqa xmm8,xmm7 + + call _aesni_decrypt6 + + pxor xmm2,xmm10 + movdqa xmm10,xmm8 + pxor xmm3,xmm11 + movdqu XMMWORD[rsi],xmm2 + pxor xmm4,xmm12 + movdqu XMMWORD[16+rsi],xmm3 + pxor xmm5,xmm13 + movdqu XMMWORD[32+rsi],xmm4 + pxor xmm6,xmm14 + mov rcx,r11 + movdqu XMMWORD[48+rsi],xmm5 + pxor xmm7,xmm15 + mov eax,r10d + movdqu XMMWORD[64+rsi],xmm6 + lea rsi,[80+rsi] + sub rdx,0x60 + ja NEAR $L$cbc_dec_loop6 + + movdqa xmm2,xmm7 + add rdx,0x50 + jle NEAR $L$cbc_dec_clear_tail_collected + movups XMMWORD[rsi],xmm7 + lea rsi,[16+rsi] + +$L$cbc_dec_tail: + movups xmm2,XMMWORD[rdi] + sub rdx,0x10 + jbe NEAR $L$cbc_dec_one + + movups xmm3,XMMWORD[16+rdi] + movaps xmm11,xmm2 + sub rdx,0x10 + jbe NEAR $L$cbc_dec_two + + movups xmm4,XMMWORD[32+rdi] + movaps xmm12,xmm3 + sub rdx,0x10 + jbe NEAR $L$cbc_dec_three + + movups xmm5,XMMWORD[48+rdi] + movaps xmm13,xmm4 + sub rdx,0x10 + jbe NEAR $L$cbc_dec_four + + movups xmm6,XMMWORD[64+rdi] + movaps xmm14,xmm5 + movaps xmm15,xmm6 + xorps xmm7,xmm7 + call _aesni_decrypt6 + pxor xmm2,xmm10 + movaps xmm10,xmm15 + pxor xmm3,xmm11 + movdqu XMMWORD[rsi],xmm2 + pxor xmm4,xmm12 + movdqu XMMWORD[16+rsi],xmm3 + pxor xmm3,xmm3 + pxor xmm5,xmm13 + movdqu XMMWORD[32+rsi],xmm4 + pxor xmm4,xmm4 + pxor xmm6,xmm14 + movdqu XMMWORD[48+rsi],xmm5 + pxor xmm5,xmm5 + lea rsi,[64+rsi] + movdqa xmm2,xmm6 + pxor xmm6,xmm6 + pxor xmm7,xmm7 + sub rdx,0x10 + jmp NEAR $L$cbc_dec_tail_collected + +ALIGN 16 +$L$cbc_dec_one: + movaps xmm11,xmm2 + movups xmm0,XMMWORD[rcx] + movups xmm1,XMMWORD[16+rcx] + lea rcx,[32+rcx] + xorps xmm2,xmm0 +$L$oop_dec1_17: +DB 102,15,56,222,209 + dec eax + movups xmm1,XMMWORD[rcx] + lea rcx,[16+rcx] + jnz NEAR $L$oop_dec1_17 +DB 102,15,56,223,209 + xorps xmm2,xmm10 + movaps xmm10,xmm11 + jmp NEAR $L$cbc_dec_tail_collected +ALIGN 16 +$L$cbc_dec_two: + movaps xmm12,xmm3 + call _aesni_decrypt2 + pxor xmm2,xmm10 + movaps xmm10,xmm12 + pxor xmm3,xmm11 + movdqu XMMWORD[rsi],xmm2 + movdqa xmm2,xmm3 + pxor xmm3,xmm3 + lea rsi,[16+rsi] + jmp NEAR $L$cbc_dec_tail_collected +ALIGN 16 +$L$cbc_dec_three: + movaps xmm13,xmm4 + call _aesni_decrypt3 + pxor xmm2,xmm10 + movaps xmm10,xmm13 + pxor xmm3,xmm11 + movdqu XMMWORD[rsi],xmm2 + pxor xmm4,xmm12 + movdqu XMMWORD[16+rsi],xmm3 + pxor xmm3,xmm3 + movdqa xmm2,xmm4 + pxor xmm4,xmm4 + lea rsi,[32+rsi] + jmp NEAR $L$cbc_dec_tail_collected +ALIGN 16 +$L$cbc_dec_four: + movaps xmm14,xmm5 + call _aesni_decrypt4 + pxor xmm2,xmm10 + movaps xmm10,xmm14 + pxor xmm3,xmm11 + movdqu XMMWORD[rsi],xmm2 + pxor xmm4,xmm12 + movdqu XMMWORD[16+rsi],xmm3 + pxor xmm3,xmm3 + pxor xmm5,xmm13 + movdqu XMMWORD[32+rsi],xmm4 + pxor xmm4,xmm4 + movdqa xmm2,xmm5 + pxor xmm5,xmm5 + lea rsi,[48+rsi] + jmp NEAR $L$cbc_dec_tail_collected + +ALIGN 16 +$L$cbc_dec_clear_tail_collected: + pxor xmm3,xmm3 + pxor xmm4,xmm4 + pxor xmm5,xmm5 +$L$cbc_dec_tail_collected: + movups XMMWORD[r8],xmm10 + and rdx,15 + jnz NEAR $L$cbc_dec_tail_partial + movups XMMWORD[rsi],xmm2 + pxor xmm2,xmm2 + jmp NEAR $L$cbc_dec_ret +ALIGN 16 +$L$cbc_dec_tail_partial: + movaps XMMWORD[rsp],xmm2 + pxor xmm2,xmm2 + mov rcx,16 + mov rdi,rsi + sub rcx,rdx + lea rsi,[rsp] + DD 0x9066A4F3 + movdqa XMMWORD[rsp],xmm2 + +$L$cbc_dec_ret: + xorps xmm0,xmm0 + pxor xmm1,xmm1 + movaps xmm6,XMMWORD[16+rsp] + movaps XMMWORD[16+rsp],xmm0 + movaps xmm7,XMMWORD[32+rsp] + movaps XMMWORD[32+rsp],xmm0 + movaps xmm8,XMMWORD[48+rsp] + movaps XMMWORD[48+rsp],xmm0 + movaps xmm9,XMMWORD[64+rsp] + movaps XMMWORD[64+rsp],xmm0 + movaps xmm10,XMMWORD[80+rsp] + movaps XMMWORD[80+rsp],xmm0 + movaps xmm11,XMMWORD[96+rsp] + movaps XMMWORD[96+rsp],xmm0 + movaps xmm12,XMMWORD[112+rsp] + movaps XMMWORD[112+rsp],xmm0 + movaps xmm13,XMMWORD[128+rsp] + movaps XMMWORD[128+rsp],xmm0 + movaps xmm14,XMMWORD[144+rsp] + movaps XMMWORD[144+rsp],xmm0 + movaps xmm15,XMMWORD[160+rsp] + movaps XMMWORD[160+rsp],xmm0 + lea rsp,[rbp] + pop rbp +$L$cbc_ret: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_aesni_cbc_encrypt: +global aesni_set_decrypt_key + +ALIGN 16 +aesni_set_decrypt_key: +DB 0x48,0x83,0xEC,0x08 + call __aesni_set_encrypt_key + shl edx,4 + test eax,eax + jnz NEAR $L$dec_key_ret + lea rcx,[16+rdx*1+r8] + + movups xmm0,XMMWORD[r8] + movups xmm1,XMMWORD[rcx] + movups XMMWORD[rcx],xmm0 + movups XMMWORD[r8],xmm1 + lea r8,[16+r8] + lea rcx,[((-16))+rcx] + +$L$dec_key_inverse: + movups xmm0,XMMWORD[r8] + movups xmm1,XMMWORD[rcx] +DB 102,15,56,219,192 +DB 102,15,56,219,201 + lea r8,[16+r8] + lea rcx,[((-16))+rcx] + movups XMMWORD[16+rcx],xmm0 + movups XMMWORD[(-16)+r8],xmm1 + cmp rcx,r8 + ja NEAR $L$dec_key_inverse + + movups xmm0,XMMWORD[r8] +DB 102,15,56,219,192 + pxor xmm1,xmm1 + movups XMMWORD[rcx],xmm0 + pxor xmm0,xmm0 +$L$dec_key_ret: + add rsp,8 + DB 0F3h,0C3h ;repret +$L$SEH_end_set_decrypt_key: + +global aesni_set_encrypt_key + +ALIGN 16 +aesni_set_encrypt_key: +__aesni_set_encrypt_key: +DB 0x48,0x83,0xEC,0x08 + mov rax,-1 + test rcx,rcx + jz NEAR $L$enc_key_ret + test r8,r8 + jz NEAR $L$enc_key_ret + + mov r10d,268437504 + movups xmm0,XMMWORD[rcx] + xorps xmm4,xmm4 + and r10d,DWORD[((OPENSSL_ia32cap_P+4))] + lea rax,[16+r8] + cmp edx,256 + je NEAR $L$14rounds + cmp edx,192 + je NEAR $L$12rounds + cmp edx,128 + jne NEAR $L$bad_keybits + +$L$10rounds: + mov edx,9 + cmp r10d,268435456 + je NEAR $L$10rounds_alt + + movups XMMWORD[r8],xmm0 +DB 102,15,58,223,200,1 + call $L$key_expansion_128_cold +DB 102,15,58,223,200,2 + call $L$key_expansion_128 +DB 102,15,58,223,200,4 + call $L$key_expansion_128 +DB 102,15,58,223,200,8 + call $L$key_expansion_128 +DB 102,15,58,223,200,16 + call $L$key_expansion_128 +DB 102,15,58,223,200,32 + call $L$key_expansion_128 +DB 102,15,58,223,200,64 + call $L$key_expansion_128 +DB 102,15,58,223,200,128 + call $L$key_expansion_128 +DB 102,15,58,223,200,27 + call $L$key_expansion_128 +DB 102,15,58,223,200,54 + call $L$key_expansion_128 + movups XMMWORD[rax],xmm0 + mov DWORD[80+rax],edx + xor eax,eax + jmp NEAR $L$enc_key_ret + +ALIGN 16 +$L$10rounds_alt: + movdqa xmm5,XMMWORD[$L$key_rotate] + mov r10d,8 + movdqa xmm4,XMMWORD[$L$key_rcon1] + movdqa xmm2,xmm0 + movdqu XMMWORD[r8],xmm0 + jmp NEAR $L$oop_key128 + +ALIGN 16 +$L$oop_key128: +DB 102,15,56,0,197 +DB 102,15,56,221,196 + pslld xmm4,1 + lea rax,[16+rax] + + movdqa xmm3,xmm2 + pslldq xmm2,4 + pxor xmm3,xmm2 + pslldq xmm2,4 + pxor xmm3,xmm2 + pslldq xmm2,4 + pxor xmm2,xmm3 + + pxor xmm0,xmm2 + movdqu XMMWORD[(-16)+rax],xmm0 + movdqa xmm2,xmm0 + + dec r10d + jnz NEAR $L$oop_key128 + + movdqa xmm4,XMMWORD[$L$key_rcon1b] + +DB 102,15,56,0,197 +DB 102,15,56,221,196 + pslld xmm4,1 + + movdqa xmm3,xmm2 + pslldq xmm2,4 + pxor xmm3,xmm2 + pslldq xmm2,4 + pxor xmm3,xmm2 + pslldq xmm2,4 + pxor xmm2,xmm3 + + pxor xmm0,xmm2 + movdqu XMMWORD[rax],xmm0 + + movdqa xmm2,xmm0 +DB 102,15,56,0,197 +DB 102,15,56,221,196 + + movdqa xmm3,xmm2 + pslldq xmm2,4 + pxor xmm3,xmm2 + pslldq xmm2,4 + pxor xmm3,xmm2 + pslldq xmm2,4 + pxor xmm2,xmm3 + + pxor xmm0,xmm2 + movdqu XMMWORD[16+rax],xmm0 + + mov DWORD[96+rax],edx + xor eax,eax + jmp NEAR $L$enc_key_ret + +ALIGN 16 +$L$12rounds: + movq xmm2,QWORD[16+rcx] + mov edx,11 + cmp r10d,268435456 + je NEAR $L$12rounds_alt + + movups XMMWORD[r8],xmm0 +DB 102,15,58,223,202,1 + call $L$key_expansion_192a_cold +DB 102,15,58,223,202,2 + call $L$key_expansion_192b +DB 102,15,58,223,202,4 + call $L$key_expansion_192a +DB 102,15,58,223,202,8 + call $L$key_expansion_192b +DB 102,15,58,223,202,16 + call $L$key_expansion_192a +DB 102,15,58,223,202,32 + call $L$key_expansion_192b +DB 102,15,58,223,202,64 + call $L$key_expansion_192a +DB 102,15,58,223,202,128 + call $L$key_expansion_192b + movups XMMWORD[rax],xmm0 + mov DWORD[48+rax],edx + xor rax,rax + jmp NEAR $L$enc_key_ret + +ALIGN 16 +$L$12rounds_alt: + movdqa xmm5,XMMWORD[$L$key_rotate192] + movdqa xmm4,XMMWORD[$L$key_rcon1] + mov r10d,8 + movdqu XMMWORD[r8],xmm0 + jmp NEAR $L$oop_key192 + +ALIGN 16 +$L$oop_key192: + movq QWORD[rax],xmm2 + movdqa xmm1,xmm2 +DB 102,15,56,0,213 +DB 102,15,56,221,212 + pslld xmm4,1 + lea rax,[24+rax] + + movdqa xmm3,xmm0 + pslldq xmm0,4 + pxor xmm3,xmm0 + pslldq xmm0,4 + pxor xmm3,xmm0 + pslldq xmm0,4 + pxor xmm0,xmm3 + + pshufd xmm3,xmm0,0xff + pxor xmm3,xmm1 + pslldq xmm1,4 + pxor xmm3,xmm1 + + pxor xmm0,xmm2 + pxor xmm2,xmm3 + movdqu XMMWORD[(-16)+rax],xmm0 + + dec r10d + jnz NEAR $L$oop_key192 + + mov DWORD[32+rax],edx + xor eax,eax + jmp NEAR $L$enc_key_ret + +ALIGN 16 +$L$14rounds: + movups xmm2,XMMWORD[16+rcx] + mov edx,13 + lea rax,[16+rax] + cmp r10d,268435456 + je NEAR $L$14rounds_alt + + movups XMMWORD[r8],xmm0 + movups XMMWORD[16+r8],xmm2 +DB 102,15,58,223,202,1 + call $L$key_expansion_256a_cold +DB 102,15,58,223,200,1 + call $L$key_expansion_256b +DB 102,15,58,223,202,2 + call $L$key_expansion_256a +DB 102,15,58,223,200,2 + call $L$key_expansion_256b +DB 102,15,58,223,202,4 + call $L$key_expansion_256a +DB 102,15,58,223,200,4 + call $L$key_expansion_256b +DB 102,15,58,223,202,8 + call $L$key_expansion_256a +DB 102,15,58,223,200,8 + call $L$key_expansion_256b +DB 102,15,58,223,202,16 + call $L$key_expansion_256a +DB 102,15,58,223,200,16 + call $L$key_expansion_256b +DB 102,15,58,223,202,32 + call $L$key_expansion_256a +DB 102,15,58,223,200,32 + call $L$key_expansion_256b +DB 102,15,58,223,202,64 + call $L$key_expansion_256a + movups XMMWORD[rax],xmm0 + mov DWORD[16+rax],edx + xor rax,rax + jmp NEAR $L$enc_key_ret + +ALIGN 16 +$L$14rounds_alt: + movdqa xmm5,XMMWORD[$L$key_rotate] + movdqa xmm4,XMMWORD[$L$key_rcon1] + mov r10d,7 + movdqu XMMWORD[r8],xmm0 + movdqa xmm1,xmm2 + movdqu XMMWORD[16+r8],xmm2 + jmp NEAR $L$oop_key256 + +ALIGN 16 +$L$oop_key256: +DB 102,15,56,0,213 +DB 102,15,56,221,212 + + movdqa xmm3,xmm0 + pslldq xmm0,4 + pxor xmm3,xmm0 + pslldq xmm0,4 + pxor xmm3,xmm0 + pslldq xmm0,4 + pxor xmm0,xmm3 + pslld xmm4,1 + + pxor xmm0,xmm2 + movdqu XMMWORD[rax],xmm0 + + dec r10d + jz NEAR $L$done_key256 + + pshufd xmm2,xmm0,0xff + pxor xmm3,xmm3 +DB 102,15,56,221,211 + + movdqa xmm3,xmm1 + pslldq xmm1,4 + pxor xmm3,xmm1 + pslldq xmm1,4 + pxor xmm3,xmm1 + pslldq xmm1,4 + pxor xmm1,xmm3 + + pxor xmm2,xmm1 + movdqu XMMWORD[16+rax],xmm2 + lea rax,[32+rax] + movdqa xmm1,xmm2 + + jmp NEAR $L$oop_key256 + +$L$done_key256: + mov DWORD[16+rax],edx + xor eax,eax + jmp NEAR $L$enc_key_ret + +ALIGN 16 +$L$bad_keybits: + mov rax,-2 +$L$enc_key_ret: + pxor xmm0,xmm0 + pxor xmm1,xmm1 + pxor xmm2,xmm2 + pxor xmm3,xmm3 + pxor xmm4,xmm4 + pxor xmm5,xmm5 + add rsp,8 + DB 0F3h,0C3h ;repret +$L$SEH_end_set_encrypt_key: + +ALIGN 16 +$L$key_expansion_128: + movups XMMWORD[rax],xmm0 + lea rax,[16+rax] +$L$key_expansion_128_cold: + shufps xmm4,xmm0,16 + xorps xmm0,xmm4 + shufps xmm4,xmm0,140 + xorps xmm0,xmm4 + shufps xmm1,xmm1,255 + xorps xmm0,xmm1 + DB 0F3h,0C3h ;repret + +ALIGN 16 +$L$key_expansion_192a: + movups XMMWORD[rax],xmm0 + lea rax,[16+rax] +$L$key_expansion_192a_cold: + movaps xmm5,xmm2 +$L$key_expansion_192b_warm: + shufps xmm4,xmm0,16 + movdqa xmm3,xmm2 + xorps xmm0,xmm4 + shufps xmm4,xmm0,140 + pslldq xmm3,4 + xorps xmm0,xmm4 + pshufd xmm1,xmm1,85 + pxor xmm2,xmm3 + pxor xmm0,xmm1 + pshufd xmm3,xmm0,255 + pxor xmm2,xmm3 + DB 0F3h,0C3h ;repret + +ALIGN 16 +$L$key_expansion_192b: + movaps xmm3,xmm0 + shufps xmm5,xmm0,68 + movups XMMWORD[rax],xmm5 + shufps xmm3,xmm2,78 + movups XMMWORD[16+rax],xmm3 + lea rax,[32+rax] + jmp NEAR $L$key_expansion_192b_warm + +ALIGN 16 +$L$key_expansion_256a: + movups XMMWORD[rax],xmm2 + lea rax,[16+rax] +$L$key_expansion_256a_cold: + shufps xmm4,xmm0,16 + xorps xmm0,xmm4 + shufps xmm4,xmm0,140 + xorps xmm0,xmm4 + shufps xmm1,xmm1,255 + xorps xmm0,xmm1 + DB 0F3h,0C3h ;repret + +ALIGN 16 +$L$key_expansion_256b: + movups XMMWORD[rax],xmm0 + lea rax,[16+rax] + + shufps xmm4,xmm2,16 + xorps xmm2,xmm4 + shufps xmm4,xmm2,140 + xorps xmm2,xmm4 + shufps xmm1,xmm1,170 + xorps xmm2,xmm1 + DB 0F3h,0C3h ;repret + + +ALIGN 64 +$L$bswap_mask: +DB 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +$L$increment32: + DD 6,6,6,0 +$L$increment64: + DD 1,0,0,0 +$L$xts_magic: + DD 0x87,0,1,0 +$L$increment1: +DB 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 +$L$key_rotate: + DD 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d +$L$key_rotate192: + DD 0x04070605,0x04070605,0x04070605,0x04070605 +$L$key_rcon1: + DD 1,1,1,1 +$L$key_rcon1b: + DD 0x1b,0x1b,0x1b,0x1b + +DB 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69 +DB 83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83 +DB 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115 +DB 115,108,46,111,114,103,62,0 +ALIGN 64 +EXTERN __imp_RtlVirtualUnwind + +ALIGN 16 +ecb_ccm64_se_handler: + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD[120+r8] + mov rbx,QWORD[248+r8] + + mov rsi,QWORD[8+r9] + mov r11,QWORD[56+r9] + + mov r10d,DWORD[r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jb NEAR $L$common_seh_tail + + mov rax,QWORD[152+r8] + + mov r10d,DWORD[4+r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jae NEAR $L$common_seh_tail + + lea rsi,[rax] + lea rdi,[512+r8] + mov ecx,8 + DD 0xa548f3fc + lea rax,[88+rax] + + jmp NEAR $L$common_seh_tail + + + +ALIGN 16 +ctr_xts_se_handler: + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD[120+r8] + mov rbx,QWORD[248+r8] + + mov rsi,QWORD[8+r9] + mov r11,QWORD[56+r9] + + mov r10d,DWORD[r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jb NEAR $L$common_seh_tail + + mov rax,QWORD[152+r8] + + mov r10d,DWORD[4+r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jae NEAR $L$common_seh_tail + + mov rax,QWORD[160+r8] + lea rsi,[((-160))+rax] + lea rdi,[512+r8] + mov ecx,20 + DD 0xa548f3fc + + jmp NEAR $L$common_rbp_tail + + +ALIGN 16 +cbc_se_handler: + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD[152+r8] + mov rbx,QWORD[248+r8] + + lea r10,[$L$cbc_decrypt_bulk] + cmp rbx,r10 + jb NEAR $L$common_seh_tail + + lea r10,[$L$cbc_decrypt_body] + cmp rbx,r10 + jb NEAR $L$restore_cbc_rax + + lea r10,[$L$cbc_ret] + cmp rbx,r10 + jae NEAR $L$common_seh_tail + + lea rsi,[16+rax] + lea rdi,[512+r8] + mov ecx,20 + DD 0xa548f3fc + +$L$common_rbp_tail: + mov rax,QWORD[160+r8] + mov rbp,QWORD[rax] + lea rax,[8+rax] + mov QWORD[160+r8],rbp + jmp NEAR $L$common_seh_tail + +$L$restore_cbc_rax: + mov rax,QWORD[120+r8] + +$L$common_seh_tail: + mov rdi,QWORD[8+rax] + mov rsi,QWORD[16+rax] + mov QWORD[152+r8],rax + mov QWORD[168+r8],rsi + mov QWORD[176+r8],rdi + + mov rdi,QWORD[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0xa548f3fc + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD[8+rsi] + mov r8,QWORD[rsi] + mov r9,QWORD[16+rsi] + mov r10,QWORD[40+rsi] + lea r11,[56+rsi] + lea r12,[24+rsi] + mov QWORD[32+rsp],r10 + mov QWORD[40+rsp],r11 + mov QWORD[48+rsp],r12 + mov QWORD[56+rsp],rcx + call QWORD[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret + + +section .pdata rdata align=4 +ALIGN 4 + DD $L$SEH_begin_aesni_ecb_encrypt wrt ..imagebase + DD $L$SEH_end_aesni_ecb_encrypt wrt ..imagebase + DD $L$SEH_info_ecb wrt ..imagebase + + DD $L$SEH_begin_aesni_ccm64_encrypt_blocks wrt ..imagebase + DD $L$SEH_end_aesni_ccm64_encrypt_blocks wrt ..imagebase + DD $L$SEH_info_ccm64_enc wrt ..imagebase + + DD $L$SEH_begin_aesni_ccm64_decrypt_blocks wrt ..imagebase + DD $L$SEH_end_aesni_ccm64_decrypt_blocks wrt ..imagebase + DD $L$SEH_info_ccm64_dec wrt ..imagebase + + DD $L$SEH_begin_aesni_ctr32_encrypt_blocks wrt ..imagebase + DD $L$SEH_end_aesni_ctr32_encrypt_blocks wrt ..imagebase + DD $L$SEH_info_ctr32 wrt ..imagebase + + DD $L$SEH_begin_aesni_xts_encrypt wrt ..imagebase + DD $L$SEH_end_aesni_xts_encrypt wrt ..imagebase + DD $L$SEH_info_xts_enc wrt ..imagebase + + DD $L$SEH_begin_aesni_xts_decrypt wrt ..imagebase + DD $L$SEH_end_aesni_xts_decrypt wrt ..imagebase + DD $L$SEH_info_xts_dec wrt ..imagebase + DD $L$SEH_begin_aesni_cbc_encrypt wrt ..imagebase + DD $L$SEH_end_aesni_cbc_encrypt wrt ..imagebase + DD $L$SEH_info_cbc wrt ..imagebase + + DD aesni_set_decrypt_key wrt ..imagebase + DD $L$SEH_end_set_decrypt_key wrt ..imagebase + DD $L$SEH_info_key wrt ..imagebase + + DD aesni_set_encrypt_key wrt ..imagebase + DD $L$SEH_end_set_encrypt_key wrt ..imagebase + DD $L$SEH_info_key wrt ..imagebase +section .xdata rdata align=8 +ALIGN 8 +$L$SEH_info_ecb: +DB 9,0,0,0 + DD ecb_ccm64_se_handler wrt ..imagebase + DD $L$ecb_enc_body wrt ..imagebase,$L$ecb_enc_ret wrt ..imagebase +$L$SEH_info_ccm64_enc: +DB 9,0,0,0 + DD ecb_ccm64_se_handler wrt ..imagebase + DD $L$ccm64_enc_body wrt ..imagebase,$L$ccm64_enc_ret wrt ..imagebase +$L$SEH_info_ccm64_dec: +DB 9,0,0,0 + DD ecb_ccm64_se_handler wrt ..imagebase + DD $L$ccm64_dec_body wrt ..imagebase,$L$ccm64_dec_ret wrt ..imagebase +$L$SEH_info_ctr32: +DB 9,0,0,0 + DD ctr_xts_se_handler wrt ..imagebase + DD $L$ctr32_body wrt ..imagebase,$L$ctr32_epilogue wrt ..imagebase +$L$SEH_info_xts_enc: +DB 9,0,0,0 + DD ctr_xts_se_handler wrt ..imagebase + DD $L$xts_enc_body wrt ..imagebase,$L$xts_enc_epilogue wrt ..imagebase +$L$SEH_info_xts_dec: +DB 9,0,0,0 + DD ctr_xts_se_handler wrt ..imagebase + DD $L$xts_dec_body wrt ..imagebase,$L$xts_dec_epilogue wrt ..imagebase +$L$SEH_info_cbc: +DB 9,0,0,0 + DD cbc_se_handler wrt ..imagebase +$L$SEH_info_key: +DB 0x01,0x04,0x01,0x00 +DB 0x04,0x02,0x00,0x00 diff --git a/third_party/boringssl/win-x86_64/crypto/aes/bsaes-x86_64.asm b/third_party/boringssl/win-x86_64/crypto/aes/bsaes-x86_64.asm new file mode 100644 index 00000000000..6d75248d1f2 --- /dev/null +++ b/third_party/boringssl/win-x86_64/crypto/aes/bsaes-x86_64.asm @@ -0,0 +1,2733 @@ +default rel +%define XMMWORD +%define YMMWORD +%define ZMMWORD +section .text code align=64 + + +EXTERN asm_AES_encrypt +EXTERN asm_AES_decrypt + + +ALIGN 64 +_bsaes_encrypt8: + lea r11,[$L$BS0] + + movdqa xmm8,XMMWORD[rax] + lea rax,[16+rax] + movdqa xmm7,XMMWORD[80+r11] + pxor xmm15,xmm8 + pxor xmm0,xmm8 + pxor xmm1,xmm8 + pxor xmm2,xmm8 +DB 102,68,15,56,0,255 +DB 102,15,56,0,199 + pxor xmm3,xmm8 + pxor xmm4,xmm8 +DB 102,15,56,0,207 +DB 102,15,56,0,215 + pxor xmm5,xmm8 + pxor xmm6,xmm8 +DB 102,15,56,0,223 +DB 102,15,56,0,231 +DB 102,15,56,0,239 +DB 102,15,56,0,247 +_bsaes_encrypt8_bitslice: + movdqa xmm7,XMMWORD[r11] + movdqa xmm8,XMMWORD[16+r11] + movdqa xmm9,xmm5 + psrlq xmm5,1 + movdqa xmm10,xmm3 + psrlq xmm3,1 + pxor xmm5,xmm6 + pxor xmm3,xmm4 + pand xmm5,xmm7 + pand xmm3,xmm7 + pxor xmm6,xmm5 + psllq xmm5,1 + pxor xmm4,xmm3 + psllq xmm3,1 + pxor xmm5,xmm9 + pxor xmm3,xmm10 + movdqa xmm9,xmm1 + psrlq xmm1,1 + movdqa xmm10,xmm15 + psrlq xmm15,1 + pxor xmm1,xmm2 + pxor xmm15,xmm0 + pand xmm1,xmm7 + pand xmm15,xmm7 + pxor xmm2,xmm1 + psllq xmm1,1 + pxor xmm0,xmm15 + psllq xmm15,1 + pxor xmm1,xmm9 + pxor xmm15,xmm10 + movdqa xmm7,XMMWORD[32+r11] + movdqa xmm9,xmm4 + psrlq xmm4,2 + movdqa xmm10,xmm3 + psrlq xmm3,2 + pxor xmm4,xmm6 + pxor xmm3,xmm5 + pand xmm4,xmm8 + pand xmm3,xmm8 + pxor xmm6,xmm4 + psllq xmm4,2 + pxor xmm5,xmm3 + psllq xmm3,2 + pxor xmm4,xmm9 + pxor xmm3,xmm10 + movdqa xmm9,xmm0 + psrlq xmm0,2 + movdqa xmm10,xmm15 + psrlq xmm15,2 + pxor xmm0,xmm2 + pxor xmm15,xmm1 + pand xmm0,xmm8 + pand xmm15,xmm8 + pxor xmm2,xmm0 + psllq xmm0,2 + pxor xmm1,xmm15 + psllq xmm15,2 + pxor xmm0,xmm9 + pxor xmm15,xmm10 + movdqa xmm9,xmm2 + psrlq xmm2,4 + movdqa xmm10,xmm1 + psrlq xmm1,4 + pxor xmm2,xmm6 + pxor xmm1,xmm5 + pand xmm2,xmm7 + pand xmm1,xmm7 + pxor xmm6,xmm2 + psllq xmm2,4 + pxor xmm5,xmm1 + psllq xmm1,4 + pxor xmm2,xmm9 + pxor xmm1,xmm10 + movdqa xmm9,xmm0 + psrlq xmm0,4 + movdqa xmm10,xmm15 + psrlq xmm15,4 + pxor xmm0,xmm4 + pxor xmm15,xmm3 + pand xmm0,xmm7 + pand xmm15,xmm7 + pxor xmm4,xmm0 + psllq xmm0,4 + pxor xmm3,xmm15 + psllq xmm15,4 + pxor xmm0,xmm9 + pxor xmm15,xmm10 + dec r10d + jmp NEAR $L$enc_sbox +ALIGN 16 +$L$enc_loop: + pxor xmm15,XMMWORD[rax] + pxor xmm0,XMMWORD[16+rax] + pxor xmm1,XMMWORD[32+rax] + pxor xmm2,XMMWORD[48+rax] +DB 102,68,15,56,0,255 +DB 102,15,56,0,199 + pxor xmm3,XMMWORD[64+rax] + pxor xmm4,XMMWORD[80+rax] +DB 102,15,56,0,207 +DB 102,15,56,0,215 + pxor xmm5,XMMWORD[96+rax] + pxor xmm6,XMMWORD[112+rax] +DB 102,15,56,0,223 +DB 102,15,56,0,231 +DB 102,15,56,0,239 +DB 102,15,56,0,247 + lea rax,[128+rax] +$L$enc_sbox: + pxor xmm4,xmm5 + pxor xmm1,xmm0 + pxor xmm2,xmm15 + pxor xmm5,xmm1 + pxor xmm4,xmm15 + + pxor xmm5,xmm2 + pxor xmm2,xmm6 + pxor xmm6,xmm4 + pxor xmm2,xmm3 + pxor xmm3,xmm4 + pxor xmm2,xmm0 + + pxor xmm1,xmm6 + pxor xmm0,xmm4 + movdqa xmm10,xmm6 + movdqa xmm9,xmm0 + movdqa xmm8,xmm4 + movdqa xmm12,xmm1 + movdqa xmm11,xmm5 + + pxor xmm10,xmm3 + pxor xmm9,xmm1 + pxor xmm8,xmm2 + movdqa xmm13,xmm10 + pxor xmm12,xmm3 + movdqa xmm7,xmm9 + pxor xmm11,xmm15 + movdqa xmm14,xmm10 + + por xmm9,xmm8 + por xmm10,xmm11 + pxor xmm14,xmm7 + pand xmm13,xmm11 + pxor xmm11,xmm8 + pand xmm7,xmm8 + pand xmm14,xmm11 + movdqa xmm11,xmm2 + pxor xmm11,xmm15 + pand xmm12,xmm11 + pxor xmm10,xmm12 + pxor xmm9,xmm12 + movdqa xmm12,xmm6 + movdqa xmm11,xmm4 + pxor xmm12,xmm0 + pxor xmm11,xmm5 + movdqa xmm8,xmm12 + pand xmm12,xmm11 + por xmm8,xmm11 + pxor xmm7,xmm12 + pxor xmm10,xmm14 + pxor xmm9,xmm13 + pxor xmm8,xmm14 + movdqa xmm11,xmm1 + pxor xmm7,xmm13 + movdqa xmm12,xmm3 + pxor xmm8,xmm13 + movdqa xmm13,xmm0 + pand xmm11,xmm2 + movdqa xmm14,xmm6 + pand xmm12,xmm15 + pand xmm13,xmm4 + por xmm14,xmm5 + pxor xmm10,xmm11 + pxor xmm9,xmm12 + pxor xmm8,xmm13 + pxor xmm7,xmm14 + + + + + + movdqa xmm11,xmm10 + pand xmm10,xmm8 + pxor xmm11,xmm9 + + movdqa xmm13,xmm7 + movdqa xmm14,xmm11 + pxor xmm13,xmm10 + pand xmm14,xmm13 + + movdqa xmm12,xmm8 + pxor xmm14,xmm9 + pxor xmm12,xmm7 + + pxor xmm10,xmm9 + + pand xmm12,xmm10 + + movdqa xmm9,xmm13 + pxor xmm12,xmm7 + + pxor xmm9,xmm12 + pxor xmm8,xmm12 + + pand xmm9,xmm7 + + pxor xmm13,xmm9 + pxor xmm8,xmm9 + + pand xmm13,xmm14 + + pxor xmm13,xmm11 + movdqa xmm11,xmm5 + movdqa xmm7,xmm4 + movdqa xmm9,xmm14 + pxor xmm9,xmm13 + pand xmm9,xmm5 + pxor xmm5,xmm4 + pand xmm4,xmm14 + pand xmm5,xmm13 + pxor xmm5,xmm4 + pxor xmm4,xmm9 + pxor xmm11,xmm15 + pxor xmm7,xmm2 + pxor xmm14,xmm12 + pxor xmm13,xmm8 + movdqa xmm10,xmm14 + movdqa xmm9,xmm12 + pxor xmm10,xmm13 + pxor xmm9,xmm8 + pand xmm10,xmm11 + pand xmm9,xmm15 + pxor xmm11,xmm7 + pxor xmm15,xmm2 + pand xmm7,xmm14 + pand xmm2,xmm12 + pand xmm11,xmm13 + pand xmm15,xmm8 + pxor xmm7,xmm11 + pxor xmm15,xmm2 + pxor xmm11,xmm10 + pxor xmm2,xmm9 + pxor xmm5,xmm11 + pxor xmm15,xmm11 + pxor xmm4,xmm7 + pxor xmm2,xmm7 + + movdqa xmm11,xmm6 + movdqa xmm7,xmm0 + pxor xmm11,xmm3 + pxor xmm7,xmm1 + movdqa xmm10,xmm14 + movdqa xmm9,xmm12 + pxor xmm10,xmm13 + pxor xmm9,xmm8 + pand xmm10,xmm11 + pand xmm9,xmm3 + pxor xmm11,xmm7 + pxor xmm3,xmm1 + pand xmm7,xmm14 + pand xmm1,xmm12 + pand xmm11,xmm13 + pand xmm3,xmm8 + pxor xmm7,xmm11 + pxor xmm3,xmm1 + pxor xmm11,xmm10 + pxor xmm1,xmm9 + pxor xmm14,xmm12 + pxor xmm13,xmm8 + movdqa xmm10,xmm14 + pxor xmm10,xmm13 + pand xmm10,xmm6 + pxor xmm6,xmm0 + pand xmm0,xmm14 + pand xmm6,xmm13 + pxor xmm6,xmm0 + pxor xmm0,xmm10 + pxor xmm6,xmm11 + pxor xmm3,xmm11 + pxor xmm0,xmm7 + pxor xmm1,xmm7 + pxor xmm6,xmm15 + pxor xmm0,xmm5 + pxor xmm3,xmm6 + pxor xmm5,xmm15 + pxor xmm15,xmm0 + + pxor xmm0,xmm4 + pxor xmm4,xmm1 + pxor xmm1,xmm2 + pxor xmm2,xmm4 + pxor xmm3,xmm4 + + pxor xmm5,xmm2 + dec r10d + jl NEAR $L$enc_done + pshufd xmm7,xmm15,0x93 + pshufd xmm8,xmm0,0x93 + pxor xmm15,xmm7 + pshufd xmm9,xmm3,0x93 + pxor xmm0,xmm8 + pshufd xmm10,xmm5,0x93 + pxor xmm3,xmm9 + pshufd xmm11,xmm2,0x93 + pxor xmm5,xmm10 + pshufd xmm12,xmm6,0x93 + pxor xmm2,xmm11 + pshufd xmm13,xmm1,0x93 + pxor xmm6,xmm12 + pshufd xmm14,xmm4,0x93 + pxor xmm1,xmm13 + pxor xmm4,xmm14 + + pxor xmm8,xmm15 + pxor xmm7,xmm4 + pxor xmm8,xmm4 + pshufd xmm15,xmm15,0x4E + pxor xmm9,xmm0 + pshufd xmm0,xmm0,0x4E + pxor xmm12,xmm2 + pxor xmm15,xmm7 + pxor xmm13,xmm6 + pxor xmm0,xmm8 + pxor xmm11,xmm5 + pshufd xmm7,xmm2,0x4E + pxor xmm14,xmm1 + pshufd xmm8,xmm6,0x4E + pxor xmm10,xmm3 + pshufd xmm2,xmm5,0x4E + pxor xmm10,xmm4 + pshufd xmm6,xmm4,0x4E + pxor xmm11,xmm4 + pshufd xmm5,xmm1,0x4E + pxor xmm7,xmm11 + pshufd xmm1,xmm3,0x4E + pxor xmm8,xmm12 + pxor xmm2,xmm10 + pxor xmm6,xmm14 + pxor xmm5,xmm13 + movdqa xmm3,xmm7 + pxor xmm1,xmm9 + movdqa xmm4,xmm8 + movdqa xmm7,XMMWORD[48+r11] + jnz NEAR $L$enc_loop + movdqa xmm7,XMMWORD[64+r11] + jmp NEAR $L$enc_loop +ALIGN 16 +$L$enc_done: + movdqa xmm7,XMMWORD[r11] + movdqa xmm8,XMMWORD[16+r11] + movdqa xmm9,xmm1 + psrlq xmm1,1 + movdqa xmm10,xmm2 + psrlq xmm2,1 + pxor xmm1,xmm4 + pxor xmm2,xmm6 + pand xmm1,xmm7 + pand xmm2,xmm7 + pxor xmm4,xmm1 + psllq xmm1,1 + pxor xmm6,xmm2 + psllq xmm2,1 + pxor xmm1,xmm9 + pxor xmm2,xmm10 + movdqa xmm9,xmm3 + psrlq xmm3,1 + movdqa xmm10,xmm15 + psrlq xmm15,1 + pxor xmm3,xmm5 + pxor xmm15,xmm0 + pand xmm3,xmm7 + pand xmm15,xmm7 + pxor xmm5,xmm3 + psllq xmm3,1 + pxor xmm0,xmm15 + psllq xmm15,1 + pxor xmm3,xmm9 + pxor xmm15,xmm10 + movdqa xmm7,XMMWORD[32+r11] + movdqa xmm9,xmm6 + psrlq xmm6,2 + movdqa xmm10,xmm2 + psrlq xmm2,2 + pxor xmm6,xmm4 + pxor xmm2,xmm1 + pand xmm6,xmm8 + pand xmm2,xmm8 + pxor xmm4,xmm6 + psllq xmm6,2 + pxor xmm1,xmm2 + psllq xmm2,2 + pxor xmm6,xmm9 + pxor xmm2,xmm10 + movdqa xmm9,xmm0 + psrlq xmm0,2 + movdqa xmm10,xmm15 + psrlq xmm15,2 + pxor xmm0,xmm5 + pxor xmm15,xmm3 + pand xmm0,xmm8 + pand xmm15,xmm8 + pxor xmm5,xmm0 + psllq xmm0,2 + pxor xmm3,xmm15 + psllq xmm15,2 + pxor xmm0,xmm9 + pxor xmm15,xmm10 + movdqa xmm9,xmm5 + psrlq xmm5,4 + movdqa xmm10,xmm3 + psrlq xmm3,4 + pxor xmm5,xmm4 + pxor xmm3,xmm1 + pand xmm5,xmm7 + pand xmm3,xmm7 + pxor xmm4,xmm5 + psllq xmm5,4 + pxor xmm1,xmm3 + psllq xmm3,4 + pxor xmm5,xmm9 + pxor xmm3,xmm10 + movdqa xmm9,xmm0 + psrlq xmm0,4 + movdqa xmm10,xmm15 + psrlq xmm15,4 + pxor xmm0,xmm6 + pxor xmm15,xmm2 + pand xmm0,xmm7 + pand xmm15,xmm7 + pxor xmm6,xmm0 + psllq xmm0,4 + pxor xmm2,xmm15 + psllq xmm15,4 + pxor xmm0,xmm9 + pxor xmm15,xmm10 + movdqa xmm7,XMMWORD[rax] + pxor xmm3,xmm7 + pxor xmm5,xmm7 + pxor xmm2,xmm7 + pxor xmm6,xmm7 + pxor xmm1,xmm7 + pxor xmm4,xmm7 + pxor xmm15,xmm7 + pxor xmm0,xmm7 + DB 0F3h,0C3h ;repret + + + +ALIGN 64 +_bsaes_decrypt8: + lea r11,[$L$BS0] + + movdqa xmm8,XMMWORD[rax] + lea rax,[16+rax] + movdqa xmm7,XMMWORD[((-48))+r11] + pxor xmm15,xmm8 + pxor xmm0,xmm8 + pxor xmm1,xmm8 + pxor xmm2,xmm8 +DB 102,68,15,56,0,255 +DB 102,15,56,0,199 + pxor xmm3,xmm8 + pxor xmm4,xmm8 +DB 102,15,56,0,207 +DB 102,15,56,0,215 + pxor xmm5,xmm8 + pxor xmm6,xmm8 +DB 102,15,56,0,223 +DB 102,15,56,0,231 +DB 102,15,56,0,239 +DB 102,15,56,0,247 + movdqa xmm7,XMMWORD[r11] + movdqa xmm8,XMMWORD[16+r11] + movdqa xmm9,xmm5 + psrlq xmm5,1 + movdqa xmm10,xmm3 + psrlq xmm3,1 + pxor xmm5,xmm6 + pxor xmm3,xmm4 + pand xmm5,xmm7 + pand xmm3,xmm7 + pxor xmm6,xmm5 + psllq xmm5,1 + pxor xmm4,xmm3 + psllq xmm3,1 + pxor xmm5,xmm9 + pxor xmm3,xmm10 + movdqa xmm9,xmm1 + psrlq xmm1,1 + movdqa xmm10,xmm15 + psrlq xmm15,1 + pxor xmm1,xmm2 + pxor xmm15,xmm0 + pand xmm1,xmm7 + pand xmm15,xmm7 + pxor xmm2,xmm1 + psllq xmm1,1 + pxor xmm0,xmm15 + psllq xmm15,1 + pxor xmm1,xmm9 + pxor xmm15,xmm10 + movdqa xmm7,XMMWORD[32+r11] + movdqa xmm9,xmm4 + psrlq xmm4,2 + movdqa xmm10,xmm3 + psrlq xmm3,2 + pxor xmm4,xmm6 + pxor xmm3,xmm5 + pand xmm4,xmm8 + pand xmm3,xmm8 + pxor xmm6,xmm4 + psllq xmm4,2 + pxor xmm5,xmm3 + psllq xmm3,2 + pxor xmm4,xmm9 + pxor xmm3,xmm10 + movdqa xmm9,xmm0 + psrlq xmm0,2 + movdqa xmm10,xmm15 + psrlq xmm15,2 + pxor xmm0,xmm2 + pxor xmm15,xmm1 + pand xmm0,xmm8 + pand xmm15,xmm8 + pxor xmm2,xmm0 + psllq xmm0,2 + pxor xmm1,xmm15 + psllq xmm15,2 + pxor xmm0,xmm9 + pxor xmm15,xmm10 + movdqa xmm9,xmm2 + psrlq xmm2,4 + movdqa xmm10,xmm1 + psrlq xmm1,4 + pxor xmm2,xmm6 + pxor xmm1,xmm5 + pand xmm2,xmm7 + pand xmm1,xmm7 + pxor xmm6,xmm2 + psllq xmm2,4 + pxor xmm5,xmm1 + psllq xmm1,4 + pxor xmm2,xmm9 + pxor xmm1,xmm10 + movdqa xmm9,xmm0 + psrlq xmm0,4 + movdqa xmm10,xmm15 + psrlq xmm15,4 + pxor xmm0,xmm4 + pxor xmm15,xmm3 + pand xmm0,xmm7 + pand xmm15,xmm7 + pxor xmm4,xmm0 + psllq xmm0,4 + pxor xmm3,xmm15 + psllq xmm15,4 + pxor xmm0,xmm9 + pxor xmm15,xmm10 + dec r10d + jmp NEAR $L$dec_sbox +ALIGN 16 +$L$dec_loop: + pxor xmm15,XMMWORD[rax] + pxor xmm0,XMMWORD[16+rax] + pxor xmm1,XMMWORD[32+rax] + pxor xmm2,XMMWORD[48+rax] +DB 102,68,15,56,0,255 +DB 102,15,56,0,199 + pxor xmm3,XMMWORD[64+rax] + pxor xmm4,XMMWORD[80+rax] +DB 102,15,56,0,207 +DB 102,15,56,0,215 + pxor xmm5,XMMWORD[96+rax] + pxor xmm6,XMMWORD[112+rax] +DB 102,15,56,0,223 +DB 102,15,56,0,231 +DB 102,15,56,0,239 +DB 102,15,56,0,247 + lea rax,[128+rax] +$L$dec_sbox: + pxor xmm2,xmm3 + + pxor xmm3,xmm6 + pxor xmm1,xmm6 + pxor xmm5,xmm3 + pxor xmm6,xmm5 + pxor xmm0,xmm6 + + pxor xmm15,xmm0 + pxor xmm1,xmm4 + pxor xmm2,xmm15 + pxor xmm4,xmm15 + pxor xmm0,xmm2 + movdqa xmm10,xmm2 + movdqa xmm9,xmm6 + movdqa xmm8,xmm0 + movdqa xmm12,xmm3 + movdqa xmm11,xmm4 + + pxor xmm10,xmm15 + pxor xmm9,xmm3 + pxor xmm8,xmm5 + movdqa xmm13,xmm10 + pxor xmm12,xmm15 + movdqa xmm7,xmm9 + pxor xmm11,xmm1 + movdqa xmm14,xmm10 + + por xmm9,xmm8 + por xmm10,xmm11 + pxor xmm14,xmm7 + pand xmm13,xmm11 + pxor xmm11,xmm8 + pand xmm7,xmm8 + pand xmm14,xmm11 + movdqa xmm11,xmm5 + pxor xmm11,xmm1 + pand xmm12,xmm11 + pxor xmm10,xmm12 + pxor xmm9,xmm12 + movdqa xmm12,xmm2 + movdqa xmm11,xmm0 + pxor xmm12,xmm6 + pxor xmm11,xmm4 + movdqa xmm8,xmm12 + pand xmm12,xmm11 + por xmm8,xmm11 + pxor xmm7,xmm12 + pxor xmm10,xmm14 + pxor xmm9,xmm13 + pxor xmm8,xmm14 + movdqa xmm11,xmm3 + pxor xmm7,xmm13 + movdqa xmm12,xmm15 + pxor xmm8,xmm13 + movdqa xmm13,xmm6 + pand xmm11,xmm5 + movdqa xmm14,xmm2 + pand xmm12,xmm1 + pand xmm13,xmm0 + por xmm14,xmm4 + pxor xmm10,xmm11 + pxor xmm9,xmm12 + pxor xmm8,xmm13 + pxor xmm7,xmm14 + + + + + + movdqa xmm11,xmm10 + pand xmm10,xmm8 + pxor xmm11,xmm9 + + movdqa xmm13,xmm7 + movdqa xmm14,xmm11 + pxor xmm13,xmm10 + pand xmm14,xmm13 + + movdqa xmm12,xmm8 + pxor xmm14,xmm9 + pxor xmm12,xmm7 + + pxor xmm10,xmm9 + + pand xmm12,xmm10 + + movdqa xmm9,xmm13 + pxor xmm12,xmm7 + + pxor xmm9,xmm12 + pxor xmm8,xmm12 + + pand xmm9,xmm7 + + pxor xmm13,xmm9 + pxor xmm8,xmm9 + + pand xmm13,xmm14 + + pxor xmm13,xmm11 + movdqa xmm11,xmm4 + movdqa xmm7,xmm0 + movdqa xmm9,xmm14 + pxor xmm9,xmm13 + pand xmm9,xmm4 + pxor xmm4,xmm0 + pand xmm0,xmm14 + pand xmm4,xmm13 + pxor xmm4,xmm0 + pxor xmm0,xmm9 + pxor xmm11,xmm1 + pxor xmm7,xmm5 + pxor xmm14,xmm12 + pxor xmm13,xmm8 + movdqa xmm10,xmm14 + movdqa xmm9,xmm12 + pxor xmm10,xmm13 + pxor xmm9,xmm8 + pand xmm10,xmm11 + pand xmm9,xmm1 + pxor xmm11,xmm7 + pxor xmm1,xmm5 + pand xmm7,xmm14 + pand xmm5,xmm12 + pand xmm11,xmm13 + pand xmm1,xmm8 + pxor xmm7,xmm11 + pxor xmm1,xmm5 + pxor xmm11,xmm10 + pxor xmm5,xmm9 + pxor xmm4,xmm11 + pxor xmm1,xmm11 + pxor xmm0,xmm7 + pxor xmm5,xmm7 + + movdqa xmm11,xmm2 + movdqa xmm7,xmm6 + pxor xmm11,xmm15 + pxor xmm7,xmm3 + movdqa xmm10,xmm14 + movdqa xmm9,xmm12 + pxor xmm10,xmm13 + pxor xmm9,xmm8 + pand xmm10,xmm11 + pand xmm9,xmm15 + pxor xmm11,xmm7 + pxor xmm15,xmm3 + pand xmm7,xmm14 + pand xmm3,xmm12 + pand xmm11,xmm13 + pand xmm15,xmm8 + pxor xmm7,xmm11 + pxor xmm15,xmm3 + pxor xmm11,xmm10 + pxor xmm3,xmm9 + pxor xmm14,xmm12 + pxor xmm13,xmm8 + movdqa xmm10,xmm14 + pxor xmm10,xmm13 + pand xmm10,xmm2 + pxor xmm2,xmm6 + pand xmm6,xmm14 + pand xmm2,xmm13 + pxor xmm2,xmm6 + pxor xmm6,xmm10 + pxor xmm2,xmm11 + pxor xmm15,xmm11 + pxor xmm6,xmm7 + pxor xmm3,xmm7 + pxor xmm0,xmm6 + pxor xmm5,xmm4 + + pxor xmm3,xmm0 + pxor xmm1,xmm6 + pxor xmm4,xmm6 + pxor xmm3,xmm1 + pxor xmm6,xmm15 + pxor xmm3,xmm4 + pxor xmm2,xmm5 + pxor xmm5,xmm0 + pxor xmm2,xmm3 + + pxor xmm3,xmm15 + pxor xmm6,xmm2 + dec r10d + jl NEAR $L$dec_done + + pshufd xmm7,xmm15,0x4E + pshufd xmm13,xmm2,0x4E + pxor xmm7,xmm15 + pshufd xmm14,xmm4,0x4E + pxor xmm13,xmm2 + pshufd xmm8,xmm0,0x4E + pxor xmm14,xmm4 + pshufd xmm9,xmm5,0x4E + pxor xmm8,xmm0 + pshufd xmm10,xmm3,0x4E + pxor xmm9,xmm5 + pxor xmm15,xmm13 + pxor xmm0,xmm13 + pshufd xmm11,xmm1,0x4E + pxor xmm10,xmm3 + pxor xmm5,xmm7 + pxor xmm3,xmm8 + pshufd xmm12,xmm6,0x4E + pxor xmm11,xmm1 + pxor xmm0,xmm14 + pxor xmm1,xmm9 + pxor xmm12,xmm6 + + pxor xmm5,xmm14 + pxor xmm3,xmm13 + pxor xmm1,xmm13 + pxor xmm6,xmm10 + pxor xmm2,xmm11 + pxor xmm1,xmm14 + pxor xmm6,xmm14 + pxor xmm4,xmm12 + pshufd xmm7,xmm15,0x93 + pshufd xmm8,xmm0,0x93 + pxor xmm15,xmm7 + pshufd xmm9,xmm5,0x93 + pxor xmm0,xmm8 + pshufd xmm10,xmm3,0x93 + pxor xmm5,xmm9 + pshufd xmm11,xmm1,0x93 + pxor xmm3,xmm10 + pshufd xmm12,xmm6,0x93 + pxor xmm1,xmm11 + pshufd xmm13,xmm2,0x93 + pxor xmm6,xmm12 + pshufd xmm14,xmm4,0x93 + pxor xmm2,xmm13 + pxor xmm4,xmm14 + + pxor xmm8,xmm15 + pxor xmm7,xmm4 + pxor xmm8,xmm4 + pshufd xmm15,xmm15,0x4E + pxor xmm9,xmm0 + pshufd xmm0,xmm0,0x4E + pxor xmm12,xmm1 + pxor xmm15,xmm7 + pxor xmm13,xmm6 + pxor xmm0,xmm8 + pxor xmm11,xmm3 + pshufd xmm7,xmm1,0x4E + pxor xmm14,xmm2 + pshufd xmm8,xmm6,0x4E + pxor xmm10,xmm5 + pshufd xmm1,xmm3,0x4E + pxor xmm10,xmm4 + pshufd xmm6,xmm4,0x4E + pxor xmm11,xmm4 + pshufd xmm3,xmm2,0x4E + pxor xmm7,xmm11 + pshufd xmm2,xmm5,0x4E + pxor xmm8,xmm12 + pxor xmm10,xmm1 + pxor xmm6,xmm14 + pxor xmm13,xmm3 + movdqa xmm3,xmm7 + pxor xmm2,xmm9 + movdqa xmm5,xmm13 + movdqa xmm4,xmm8 + movdqa xmm1,xmm2 + movdqa xmm2,xmm10 + movdqa xmm7,XMMWORD[((-16))+r11] + jnz NEAR $L$dec_loop + movdqa xmm7,XMMWORD[((-32))+r11] + jmp NEAR $L$dec_loop +ALIGN 16 +$L$dec_done: + movdqa xmm7,XMMWORD[r11] + movdqa xmm8,XMMWORD[16+r11] + movdqa xmm9,xmm2 + psrlq xmm2,1 + movdqa xmm10,xmm1 + psrlq xmm1,1 + pxor xmm2,xmm4 + pxor xmm1,xmm6 + pand xmm2,xmm7 + pand xmm1,xmm7 + pxor xmm4,xmm2 + psllq xmm2,1 + pxor xmm6,xmm1 + psllq xmm1,1 + pxor xmm2,xmm9 + pxor xmm1,xmm10 + movdqa xmm9,xmm5 + psrlq xmm5,1 + movdqa xmm10,xmm15 + psrlq xmm15,1 + pxor xmm5,xmm3 + pxor xmm15,xmm0 + pand xmm5,xmm7 + pand xmm15,xmm7 + pxor xmm3,xmm5 + psllq xmm5,1 + pxor xmm0,xmm15 + psllq xmm15,1 + pxor xmm5,xmm9 + pxor xmm15,xmm10 + movdqa xmm7,XMMWORD[32+r11] + movdqa xmm9,xmm6 + psrlq xmm6,2 + movdqa xmm10,xmm1 + psrlq xmm1,2 + pxor xmm6,xmm4 + pxor xmm1,xmm2 + pand xmm6,xmm8 + pand xmm1,xmm8 + pxor xmm4,xmm6 + psllq xmm6,2 + pxor xmm2,xmm1 + psllq xmm1,2 + pxor xmm6,xmm9 + pxor xmm1,xmm10 + movdqa xmm9,xmm0 + psrlq xmm0,2 + movdqa xmm10,xmm15 + psrlq xmm15,2 + pxor xmm0,xmm3 + pxor xmm15,xmm5 + pand xmm0,xmm8 + pand xmm15,xmm8 + pxor xmm3,xmm0 + psllq xmm0,2 + pxor xmm5,xmm15 + psllq xmm15,2 + pxor xmm0,xmm9 + pxor xmm15,xmm10 + movdqa xmm9,xmm3 + psrlq xmm3,4 + movdqa xmm10,xmm5 + psrlq xmm5,4 + pxor xmm3,xmm4 + pxor xmm5,xmm2 + pand xmm3,xmm7 + pand xmm5,xmm7 + pxor xmm4,xmm3 + psllq xmm3,4 + pxor xmm2,xmm5 + psllq xmm5,4 + pxor xmm3,xmm9 + pxor xmm5,xmm10 + movdqa xmm9,xmm0 + psrlq xmm0,4 + movdqa xmm10,xmm15 + psrlq xmm15,4 + pxor xmm0,xmm6 + pxor xmm15,xmm1 + pand xmm0,xmm7 + pand xmm15,xmm7 + pxor xmm6,xmm0 + psllq xmm0,4 + pxor xmm1,xmm15 + psllq xmm15,4 + pxor xmm0,xmm9 + pxor xmm15,xmm10 + movdqa xmm7,XMMWORD[rax] + pxor xmm5,xmm7 + pxor xmm3,xmm7 + pxor xmm1,xmm7 + pxor xmm6,xmm7 + pxor xmm2,xmm7 + pxor xmm4,xmm7 + pxor xmm15,xmm7 + pxor xmm0,xmm7 + DB 0F3h,0C3h ;repret + + +ALIGN 16 +_bsaes_key_convert: + lea r11,[$L$masks] + movdqu xmm7,XMMWORD[rcx] + lea rcx,[16+rcx] + movdqa xmm0,XMMWORD[r11] + movdqa xmm1,XMMWORD[16+r11] + movdqa xmm2,XMMWORD[32+r11] + movdqa xmm3,XMMWORD[48+r11] + movdqa xmm4,XMMWORD[64+r11] + pcmpeqd xmm5,xmm5 + + movdqu xmm6,XMMWORD[rcx] + movdqa XMMWORD[rax],xmm7 + lea rax,[16+rax] + dec r10d + jmp NEAR $L$key_loop +ALIGN 16 +$L$key_loop: +DB 102,15,56,0,244 + + movdqa xmm8,xmm0 + movdqa xmm9,xmm1 + + pand xmm8,xmm6 + pand xmm9,xmm6 + movdqa xmm10,xmm2 + pcmpeqb xmm8,xmm0 + psllq xmm0,4 + movdqa xmm11,xmm3 + pcmpeqb xmm9,xmm1 + psllq xmm1,4 + + pand xmm10,xmm6 + pand xmm11,xmm6 + movdqa xmm12,xmm0 + pcmpeqb xmm10,xmm2 + psllq xmm2,4 + movdqa xmm13,xmm1 + pcmpeqb xmm11,xmm3 + psllq xmm3,4 + + movdqa xmm14,xmm2 + movdqa xmm15,xmm3 + pxor xmm8,xmm5 + pxor xmm9,xmm5 + + pand xmm12,xmm6 + pand xmm13,xmm6 + movdqa XMMWORD[rax],xmm8 + pcmpeqb xmm12,xmm0 + psrlq xmm0,4 + movdqa XMMWORD[16+rax],xmm9 + pcmpeqb xmm13,xmm1 + psrlq xmm1,4 + lea rcx,[16+rcx] + + pand xmm14,xmm6 + pand xmm15,xmm6 + movdqa XMMWORD[32+rax],xmm10 + pcmpeqb xmm14,xmm2 + psrlq xmm2,4 + movdqa XMMWORD[48+rax],xmm11 + pcmpeqb xmm15,xmm3 + psrlq xmm3,4 + movdqu xmm6,XMMWORD[rcx] + + pxor xmm13,xmm5 + pxor xmm14,xmm5 + movdqa XMMWORD[64+rax],xmm12 + movdqa XMMWORD[80+rax],xmm13 + movdqa XMMWORD[96+rax],xmm14 + movdqa XMMWORD[112+rax],xmm15 + lea rax,[128+rax] + dec r10d + jnz NEAR $L$key_loop + + movdqa xmm7,XMMWORD[80+r11] + + DB 0F3h,0C3h ;repret + +EXTERN asm_AES_cbc_encrypt +global bsaes_cbc_encrypt + +ALIGN 16 +bsaes_cbc_encrypt: + mov r11d,DWORD[48+rsp] + cmp r11d,0 + jne NEAR asm_AES_cbc_encrypt + cmp r8,128 + jb NEAR asm_AES_cbc_encrypt + + mov rax,rsp +$L$cbc_dec_prologue: + push rbp + push rbx + push r12 + push r13 + push r14 + push r15 + lea rsp,[((-72))+rsp] + mov r10,QWORD[160+rsp] + lea rsp,[((-160))+rsp] + movaps XMMWORD[64+rsp],xmm6 + movaps XMMWORD[80+rsp],xmm7 + movaps XMMWORD[96+rsp],xmm8 + movaps XMMWORD[112+rsp],xmm9 + movaps XMMWORD[128+rsp],xmm10 + movaps XMMWORD[144+rsp],xmm11 + movaps XMMWORD[160+rsp],xmm12 + movaps XMMWORD[176+rsp],xmm13 + movaps XMMWORD[192+rsp],xmm14 + movaps XMMWORD[208+rsp],xmm15 +$L$cbc_dec_body: + mov rbp,rsp + mov eax,DWORD[240+r9] + mov r12,rcx + mov r13,rdx + mov r14,r8 + mov r15,r9 + mov rbx,r10 + shr r14,4 + + mov edx,eax + shl rax,7 + sub rax,96 + sub rsp,rax + + mov rax,rsp + mov rcx,r15 + mov r10d,edx + call _bsaes_key_convert + pxor xmm7,XMMWORD[rsp] + movdqa XMMWORD[rax],xmm6 + movdqa XMMWORD[rsp],xmm7 + + movdqu xmm14,XMMWORD[rbx] + sub r14,8 +$L$cbc_dec_loop: + movdqu xmm15,XMMWORD[r12] + movdqu xmm0,XMMWORD[16+r12] + movdqu xmm1,XMMWORD[32+r12] + movdqu xmm2,XMMWORD[48+r12] + movdqu xmm3,XMMWORD[64+r12] + movdqu xmm4,XMMWORD[80+r12] + mov rax,rsp + movdqu xmm5,XMMWORD[96+r12] + mov r10d,edx + movdqu xmm6,XMMWORD[112+r12] + movdqa XMMWORD[32+rbp],xmm14 + + call _bsaes_decrypt8 + + pxor xmm15,XMMWORD[32+rbp] + movdqu xmm7,XMMWORD[r12] + movdqu xmm8,XMMWORD[16+r12] + pxor xmm0,xmm7 + movdqu xmm9,XMMWORD[32+r12] + pxor xmm5,xmm8 + movdqu xmm10,XMMWORD[48+r12] + pxor xmm3,xmm9 + movdqu xmm11,XMMWORD[64+r12] + pxor xmm1,xmm10 + movdqu xmm12,XMMWORD[80+r12] + pxor xmm6,xmm11 + movdqu xmm13,XMMWORD[96+r12] + pxor xmm2,xmm12 + movdqu xmm14,XMMWORD[112+r12] + pxor xmm4,xmm13 + movdqu XMMWORD[r13],xmm15 + lea r12,[128+r12] + movdqu XMMWORD[16+r13],xmm0 + movdqu XMMWORD[32+r13],xmm5 + movdqu XMMWORD[48+r13],xmm3 + movdqu XMMWORD[64+r13],xmm1 + movdqu XMMWORD[80+r13],xmm6 + movdqu XMMWORD[96+r13],xmm2 + movdqu XMMWORD[112+r13],xmm4 + lea r13,[128+r13] + sub r14,8 + jnc NEAR $L$cbc_dec_loop + + add r14,8 + jz NEAR $L$cbc_dec_done + + movdqu xmm15,XMMWORD[r12] + mov rax,rsp + mov r10d,edx + cmp r14,2 + jb NEAR $L$cbc_dec_one + movdqu xmm0,XMMWORD[16+r12] + je NEAR $L$cbc_dec_two + movdqu xmm1,XMMWORD[32+r12] + cmp r14,4 + jb NEAR $L$cbc_dec_three + movdqu xmm2,XMMWORD[48+r12] + je NEAR $L$cbc_dec_four + movdqu xmm3,XMMWORD[64+r12] + cmp r14,6 + jb NEAR $L$cbc_dec_five + movdqu xmm4,XMMWORD[80+r12] + je NEAR $L$cbc_dec_six + movdqu xmm5,XMMWORD[96+r12] + movdqa XMMWORD[32+rbp],xmm14 + call _bsaes_decrypt8 + pxor xmm15,XMMWORD[32+rbp] + movdqu xmm7,XMMWORD[r12] + movdqu xmm8,XMMWORD[16+r12] + pxor xmm0,xmm7 + movdqu xmm9,XMMWORD[32+r12] + pxor xmm5,xmm8 + movdqu xmm10,XMMWORD[48+r12] + pxor xmm3,xmm9 + movdqu xmm11,XMMWORD[64+r12] + pxor xmm1,xmm10 + movdqu xmm12,XMMWORD[80+r12] + pxor xmm6,xmm11 + movdqu xmm14,XMMWORD[96+r12] + pxor xmm2,xmm12 + movdqu XMMWORD[r13],xmm15 + movdqu XMMWORD[16+r13],xmm0 + movdqu XMMWORD[32+r13],xmm5 + movdqu XMMWORD[48+r13],xmm3 + movdqu XMMWORD[64+r13],xmm1 + movdqu XMMWORD[80+r13],xmm6 + movdqu XMMWORD[96+r13],xmm2 + jmp NEAR $L$cbc_dec_done +ALIGN 16 +$L$cbc_dec_six: + movdqa XMMWORD[32+rbp],xmm14 + call _bsaes_decrypt8 + pxor xmm15,XMMWORD[32+rbp] + movdqu xmm7,XMMWORD[r12] + movdqu xmm8,XMMWORD[16+r12] + pxor xmm0,xmm7 + movdqu xmm9,XMMWORD[32+r12] + pxor xmm5,xmm8 + movdqu xmm10,XMMWORD[48+r12] + pxor xmm3,xmm9 + movdqu xmm11,XMMWORD[64+r12] + pxor xmm1,xmm10 + movdqu xmm14,XMMWORD[80+r12] + pxor xmm6,xmm11 + movdqu XMMWORD[r13],xmm15 + movdqu XMMWORD[16+r13],xmm0 + movdqu XMMWORD[32+r13],xmm5 + movdqu XMMWORD[48+r13],xmm3 + movdqu XMMWORD[64+r13],xmm1 + movdqu XMMWORD[80+r13],xmm6 + jmp NEAR $L$cbc_dec_done +ALIGN 16 +$L$cbc_dec_five: + movdqa XMMWORD[32+rbp],xmm14 + call _bsaes_decrypt8 + pxor xmm15,XMMWORD[32+rbp] + movdqu xmm7,XMMWORD[r12] + movdqu xmm8,XMMWORD[16+r12] + pxor xmm0,xmm7 + movdqu xmm9,XMMWORD[32+r12] + pxor xmm5,xmm8 + movdqu xmm10,XMMWORD[48+r12] + pxor xmm3,xmm9 + movdqu xmm14,XMMWORD[64+r12] + pxor xmm1,xmm10 + movdqu XMMWORD[r13],xmm15 + movdqu XMMWORD[16+r13],xmm0 + movdqu XMMWORD[32+r13],xmm5 + movdqu XMMWORD[48+r13],xmm3 + movdqu XMMWORD[64+r13],xmm1 + jmp NEAR $L$cbc_dec_done +ALIGN 16 +$L$cbc_dec_four: + movdqa XMMWORD[32+rbp],xmm14 + call _bsaes_decrypt8 + pxor xmm15,XMMWORD[32+rbp] + movdqu xmm7,XMMWORD[r12] + movdqu xmm8,XMMWORD[16+r12] + pxor xmm0,xmm7 + movdqu xmm9,XMMWORD[32+r12] + pxor xmm5,xmm8 + movdqu xmm14,XMMWORD[48+r12] + pxor xmm3,xmm9 + movdqu XMMWORD[r13],xmm15 + movdqu XMMWORD[16+r13],xmm0 + movdqu XMMWORD[32+r13],xmm5 + movdqu XMMWORD[48+r13],xmm3 + jmp NEAR $L$cbc_dec_done +ALIGN 16 +$L$cbc_dec_three: + movdqa XMMWORD[32+rbp],xmm14 + call _bsaes_decrypt8 + pxor xmm15,XMMWORD[32+rbp] + movdqu xmm7,XMMWORD[r12] + movdqu xmm8,XMMWORD[16+r12] + pxor xmm0,xmm7 + movdqu xmm14,XMMWORD[32+r12] + pxor xmm5,xmm8 + movdqu XMMWORD[r13],xmm15 + movdqu XMMWORD[16+r13],xmm0 + movdqu XMMWORD[32+r13],xmm5 + jmp NEAR $L$cbc_dec_done +ALIGN 16 +$L$cbc_dec_two: + movdqa XMMWORD[32+rbp],xmm14 + call _bsaes_decrypt8 + pxor xmm15,XMMWORD[32+rbp] + movdqu xmm7,XMMWORD[r12] + movdqu xmm14,XMMWORD[16+r12] + pxor xmm0,xmm7 + movdqu XMMWORD[r13],xmm15 + movdqu XMMWORD[16+r13],xmm0 + jmp NEAR $L$cbc_dec_done +ALIGN 16 +$L$cbc_dec_one: + lea rcx,[r12] + lea rdx,[32+rbp] + lea r8,[r15] + call asm_AES_decrypt + pxor xmm14,XMMWORD[32+rbp] + movdqu XMMWORD[r13],xmm14 + movdqa xmm14,xmm15 + +$L$cbc_dec_done: + movdqu XMMWORD[rbx],xmm14 + lea rax,[rsp] + pxor xmm0,xmm0 +$L$cbc_dec_bzero: + movdqa XMMWORD[rax],xmm0 + movdqa XMMWORD[16+rax],xmm0 + lea rax,[32+rax] + cmp rbp,rax + ja NEAR $L$cbc_dec_bzero + + lea rsp,[rbp] + movaps xmm6,XMMWORD[64+rbp] + movaps xmm7,XMMWORD[80+rbp] + movaps xmm8,XMMWORD[96+rbp] + movaps xmm9,XMMWORD[112+rbp] + movaps xmm10,XMMWORD[128+rbp] + movaps xmm11,XMMWORD[144+rbp] + movaps xmm12,XMMWORD[160+rbp] + movaps xmm13,XMMWORD[176+rbp] + movaps xmm14,XMMWORD[192+rbp] + movaps xmm15,XMMWORD[208+rbp] + lea rsp,[160+rbp] + mov r15,QWORD[72+rsp] + mov r14,QWORD[80+rsp] + mov r13,QWORD[88+rsp] + mov r12,QWORD[96+rsp] + mov rbx,QWORD[104+rsp] + mov rax,QWORD[112+rsp] + lea rsp,[120+rsp] + mov rbp,rax +$L$cbc_dec_epilogue: + DB 0F3h,0C3h ;repret + + +global bsaes_ctr32_encrypt_blocks + +ALIGN 16 +bsaes_ctr32_encrypt_blocks: + mov rax,rsp +$L$ctr_enc_prologue: + push rbp + push rbx + push r12 + push r13 + push r14 + push r15 + lea rsp,[((-72))+rsp] + mov r10,QWORD[160+rsp] + lea rsp,[((-160))+rsp] + movaps XMMWORD[64+rsp],xmm6 + movaps XMMWORD[80+rsp],xmm7 + movaps XMMWORD[96+rsp],xmm8 + movaps XMMWORD[112+rsp],xmm9 + movaps XMMWORD[128+rsp],xmm10 + movaps XMMWORD[144+rsp],xmm11 + movaps XMMWORD[160+rsp],xmm12 + movaps XMMWORD[176+rsp],xmm13 + movaps XMMWORD[192+rsp],xmm14 + movaps XMMWORD[208+rsp],xmm15 +$L$ctr_enc_body: + mov rbp,rsp + movdqu xmm0,XMMWORD[r10] + mov eax,DWORD[240+r9] + mov r12,rcx + mov r13,rdx + mov r14,r8 + mov r15,r9 + movdqa XMMWORD[32+rbp],xmm0 + cmp r8,8 + jb NEAR $L$ctr_enc_short + + mov ebx,eax + shl rax,7 + sub rax,96 + sub rsp,rax + + mov rax,rsp + mov rcx,r15 + mov r10d,ebx + call _bsaes_key_convert + pxor xmm7,xmm6 + movdqa XMMWORD[rax],xmm7 + + movdqa xmm8,XMMWORD[rsp] + lea r11,[$L$ADD1] + movdqa xmm15,XMMWORD[32+rbp] + movdqa xmm7,XMMWORD[((-32))+r11] +DB 102,68,15,56,0,199 +DB 102,68,15,56,0,255 + movdqa XMMWORD[rsp],xmm8 + jmp NEAR $L$ctr_enc_loop +ALIGN 16 +$L$ctr_enc_loop: + movdqa XMMWORD[32+rbp],xmm15 + movdqa xmm0,xmm15 + movdqa xmm1,xmm15 + paddd xmm0,XMMWORD[r11] + movdqa xmm2,xmm15 + paddd xmm1,XMMWORD[16+r11] + movdqa xmm3,xmm15 + paddd xmm2,XMMWORD[32+r11] + movdqa xmm4,xmm15 + paddd xmm3,XMMWORD[48+r11] + movdqa xmm5,xmm15 + paddd xmm4,XMMWORD[64+r11] + movdqa xmm6,xmm15 + paddd xmm5,XMMWORD[80+r11] + paddd xmm6,XMMWORD[96+r11] + + + + movdqa xmm8,XMMWORD[rsp] + lea rax,[16+rsp] + movdqa xmm7,XMMWORD[((-16))+r11] + pxor xmm15,xmm8 + pxor xmm0,xmm8 + pxor xmm1,xmm8 + pxor xmm2,xmm8 +DB 102,68,15,56,0,255 +DB 102,15,56,0,199 + pxor xmm3,xmm8 + pxor xmm4,xmm8 +DB 102,15,56,0,207 +DB 102,15,56,0,215 + pxor xmm5,xmm8 + pxor xmm6,xmm8 +DB 102,15,56,0,223 +DB 102,15,56,0,231 +DB 102,15,56,0,239 +DB 102,15,56,0,247 + lea r11,[$L$BS0] + mov r10d,ebx + + call _bsaes_encrypt8_bitslice + + sub r14,8 + jc NEAR $L$ctr_enc_loop_done + + movdqu xmm7,XMMWORD[r12] + movdqu xmm8,XMMWORD[16+r12] + movdqu xmm9,XMMWORD[32+r12] + movdqu xmm10,XMMWORD[48+r12] + movdqu xmm11,XMMWORD[64+r12] + movdqu xmm12,XMMWORD[80+r12] + movdqu xmm13,XMMWORD[96+r12] + movdqu xmm14,XMMWORD[112+r12] + lea r12,[128+r12] + pxor xmm7,xmm15 + movdqa xmm15,XMMWORD[32+rbp] + pxor xmm0,xmm8 + movdqu XMMWORD[r13],xmm7 + pxor xmm3,xmm9 + movdqu XMMWORD[16+r13],xmm0 + pxor xmm5,xmm10 + movdqu XMMWORD[32+r13],xmm3 + pxor xmm2,xmm11 + movdqu XMMWORD[48+r13],xmm5 + pxor xmm6,xmm12 + movdqu XMMWORD[64+r13],xmm2 + pxor xmm1,xmm13 + movdqu XMMWORD[80+r13],xmm6 + pxor xmm4,xmm14 + movdqu XMMWORD[96+r13],xmm1 + lea r11,[$L$ADD1] + movdqu XMMWORD[112+r13],xmm4 + lea r13,[128+r13] + paddd xmm15,XMMWORD[112+r11] + jnz NEAR $L$ctr_enc_loop + + jmp NEAR $L$ctr_enc_done +ALIGN 16 +$L$ctr_enc_loop_done: + add r14,8 + movdqu xmm7,XMMWORD[r12] + pxor xmm15,xmm7 + movdqu XMMWORD[r13],xmm15 + cmp r14,2 + jb NEAR $L$ctr_enc_done + movdqu xmm8,XMMWORD[16+r12] + pxor xmm0,xmm8 + movdqu XMMWORD[16+r13],xmm0 + je NEAR $L$ctr_enc_done + movdqu xmm9,XMMWORD[32+r12] + pxor xmm3,xmm9 + movdqu XMMWORD[32+r13],xmm3 + cmp r14,4 + jb NEAR $L$ctr_enc_done + movdqu xmm10,XMMWORD[48+r12] + pxor xmm5,xmm10 + movdqu XMMWORD[48+r13],xmm5 + je NEAR $L$ctr_enc_done + movdqu xmm11,XMMWORD[64+r12] + pxor xmm2,xmm11 + movdqu XMMWORD[64+r13],xmm2 + cmp r14,6 + jb NEAR $L$ctr_enc_done + movdqu xmm12,XMMWORD[80+r12] + pxor xmm6,xmm12 + movdqu XMMWORD[80+r13],xmm6 + je NEAR $L$ctr_enc_done + movdqu xmm13,XMMWORD[96+r12] + pxor xmm1,xmm13 + movdqu XMMWORD[96+r13],xmm1 + jmp NEAR $L$ctr_enc_done + +ALIGN 16 +$L$ctr_enc_short: + lea rcx,[32+rbp] + lea rdx,[48+rbp] + lea r8,[r15] + call asm_AES_encrypt + movdqu xmm0,XMMWORD[r12] + lea r12,[16+r12] + mov eax,DWORD[44+rbp] + bswap eax + pxor xmm0,XMMWORD[48+rbp] + inc eax + movdqu XMMWORD[r13],xmm0 + bswap eax + lea r13,[16+r13] + mov DWORD[44+rsp],eax + dec r14 + jnz NEAR $L$ctr_enc_short + +$L$ctr_enc_done: + lea rax,[rsp] + pxor xmm0,xmm0 +$L$ctr_enc_bzero: + movdqa XMMWORD[rax],xmm0 + movdqa XMMWORD[16+rax],xmm0 + lea rax,[32+rax] + cmp rbp,rax + ja NEAR $L$ctr_enc_bzero + + lea rsp,[rbp] + movaps xmm6,XMMWORD[64+rbp] + movaps xmm7,XMMWORD[80+rbp] + movaps xmm8,XMMWORD[96+rbp] + movaps xmm9,XMMWORD[112+rbp] + movaps xmm10,XMMWORD[128+rbp] + movaps xmm11,XMMWORD[144+rbp] + movaps xmm12,XMMWORD[160+rbp] + movaps xmm13,XMMWORD[176+rbp] + movaps xmm14,XMMWORD[192+rbp] + movaps xmm15,XMMWORD[208+rbp] + lea rsp,[160+rbp] + mov r15,QWORD[72+rsp] + mov r14,QWORD[80+rsp] + mov r13,QWORD[88+rsp] + mov r12,QWORD[96+rsp] + mov rbx,QWORD[104+rsp] + mov rax,QWORD[112+rsp] + lea rsp,[120+rsp] + mov rbp,rax +$L$ctr_enc_epilogue: + DB 0F3h,0C3h ;repret + +global bsaes_xts_encrypt + +ALIGN 16 +bsaes_xts_encrypt: + mov rax,rsp +$L$xts_enc_prologue: + push rbp + push rbx + push r12 + push r13 + push r14 + push r15 + lea rsp,[((-72))+rsp] + mov r10,QWORD[160+rsp] + mov r11,QWORD[168+rsp] + lea rsp,[((-160))+rsp] + movaps XMMWORD[64+rsp],xmm6 + movaps XMMWORD[80+rsp],xmm7 + movaps XMMWORD[96+rsp],xmm8 + movaps XMMWORD[112+rsp],xmm9 + movaps XMMWORD[128+rsp],xmm10 + movaps XMMWORD[144+rsp],xmm11 + movaps XMMWORD[160+rsp],xmm12 + movaps XMMWORD[176+rsp],xmm13 + movaps XMMWORD[192+rsp],xmm14 + movaps XMMWORD[208+rsp],xmm15 +$L$xts_enc_body: + mov rbp,rsp + mov r12,rcx + mov r13,rdx + mov r14,r8 + mov r15,r9 + + lea rcx,[r11] + lea rdx,[32+rbp] + lea r8,[r10] + call asm_AES_encrypt + + mov eax,DWORD[240+r15] + mov rbx,r14 + + mov edx,eax + shl rax,7 + sub rax,96 + sub rsp,rax + + mov rax,rsp + mov rcx,r15 + mov r10d,edx + call _bsaes_key_convert + pxor xmm7,xmm6 + movdqa XMMWORD[rax],xmm7 + + and r14,-16 + sub rsp,0x80 + movdqa xmm6,XMMWORD[32+rbp] + + pxor xmm14,xmm14 + movdqa xmm12,XMMWORD[$L$xts_magic] + pcmpgtd xmm14,xmm6 + + sub r14,0x80 + jc NEAR $L$xts_enc_short + jmp NEAR $L$xts_enc_loop + +ALIGN 16 +$L$xts_enc_loop: + pshufd xmm13,xmm14,0x13 + pxor xmm14,xmm14 + movdqa xmm15,xmm6 + movdqa XMMWORD[rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + pshufd xmm13,xmm14,0x13 + pxor xmm14,xmm14 + movdqa xmm0,xmm6 + movdqa XMMWORD[16+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm7,XMMWORD[r12] + pshufd xmm13,xmm14,0x13 + pxor xmm14,xmm14 + movdqa xmm1,xmm6 + movdqa XMMWORD[32+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm8,XMMWORD[16+r12] + pxor xmm15,xmm7 + pshufd xmm13,xmm14,0x13 + pxor xmm14,xmm14 + movdqa xmm2,xmm6 + movdqa XMMWORD[48+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm9,XMMWORD[32+r12] + pxor xmm0,xmm8 + pshufd xmm13,xmm14,0x13 + pxor xmm14,xmm14 + movdqa xmm3,xmm6 + movdqa XMMWORD[64+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm10,XMMWORD[48+r12] + pxor xmm1,xmm9 + pshufd xmm13,xmm14,0x13 + pxor xmm14,xmm14 + movdqa xmm4,xmm6 + movdqa XMMWORD[80+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm11,XMMWORD[64+r12] + pxor xmm2,xmm10 + pshufd xmm13,xmm14,0x13 + pxor xmm14,xmm14 + movdqa xmm5,xmm6 + movdqa XMMWORD[96+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm12,XMMWORD[80+r12] + pxor xmm3,xmm11 + movdqu xmm13,XMMWORD[96+r12] + pxor xmm4,xmm12 + movdqu xmm14,XMMWORD[112+r12] + lea r12,[128+r12] + movdqa XMMWORD[112+rsp],xmm6 + pxor xmm5,xmm13 + lea rax,[128+rsp] + pxor xmm6,xmm14 + mov r10d,edx + + call _bsaes_encrypt8 + + pxor xmm15,XMMWORD[rsp] + pxor xmm0,XMMWORD[16+rsp] + movdqu XMMWORD[r13],xmm15 + pxor xmm3,XMMWORD[32+rsp] + movdqu XMMWORD[16+r13],xmm0 + pxor xmm5,XMMWORD[48+rsp] + movdqu XMMWORD[32+r13],xmm3 + pxor xmm2,XMMWORD[64+rsp] + movdqu XMMWORD[48+r13],xmm5 + pxor xmm6,XMMWORD[80+rsp] + movdqu XMMWORD[64+r13],xmm2 + pxor xmm1,XMMWORD[96+rsp] + movdqu XMMWORD[80+r13],xmm6 + pxor xmm4,XMMWORD[112+rsp] + movdqu XMMWORD[96+r13],xmm1 + movdqu XMMWORD[112+r13],xmm4 + lea r13,[128+r13] + + movdqa xmm6,XMMWORD[112+rsp] + pxor xmm14,xmm14 + movdqa xmm12,XMMWORD[$L$xts_magic] + pcmpgtd xmm14,xmm6 + pshufd xmm13,xmm14,0x13 + pxor xmm14,xmm14 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + + sub r14,0x80 + jnc NEAR $L$xts_enc_loop + +$L$xts_enc_short: + add r14,0x80 + jz NEAR $L$xts_enc_done + pshufd xmm13,xmm14,0x13 + pxor xmm14,xmm14 + movdqa xmm15,xmm6 + movdqa XMMWORD[rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + pshufd xmm13,xmm14,0x13 + pxor xmm14,xmm14 + movdqa xmm0,xmm6 + movdqa XMMWORD[16+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm7,XMMWORD[r12] + cmp r14,16 + je NEAR $L$xts_enc_1 + pshufd xmm13,xmm14,0x13 + pxor xmm14,xmm14 + movdqa xmm1,xmm6 + movdqa XMMWORD[32+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm8,XMMWORD[16+r12] + cmp r14,32 + je NEAR $L$xts_enc_2 + pxor xmm15,xmm7 + pshufd xmm13,xmm14,0x13 + pxor xmm14,xmm14 + movdqa xmm2,xmm6 + movdqa XMMWORD[48+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm9,XMMWORD[32+r12] + cmp r14,48 + je NEAR $L$xts_enc_3 + pxor xmm0,xmm8 + pshufd xmm13,xmm14,0x13 + pxor xmm14,xmm14 + movdqa xmm3,xmm6 + movdqa XMMWORD[64+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm10,XMMWORD[48+r12] + cmp r14,64 + je NEAR $L$xts_enc_4 + pxor xmm1,xmm9 + pshufd xmm13,xmm14,0x13 + pxor xmm14,xmm14 + movdqa xmm4,xmm6 + movdqa XMMWORD[80+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm11,XMMWORD[64+r12] + cmp r14,80 + je NEAR $L$xts_enc_5 + pxor xmm2,xmm10 + pshufd xmm13,xmm14,0x13 + pxor xmm14,xmm14 + movdqa xmm5,xmm6 + movdqa XMMWORD[96+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm12,XMMWORD[80+r12] + cmp r14,96 + je NEAR $L$xts_enc_6 + pxor xmm3,xmm11 + movdqu xmm13,XMMWORD[96+r12] + pxor xmm4,xmm12 + movdqa XMMWORD[112+rsp],xmm6 + lea r12,[112+r12] + pxor xmm5,xmm13 + lea rax,[128+rsp] + mov r10d,edx + + call _bsaes_encrypt8 + + pxor xmm15,XMMWORD[rsp] + pxor xmm0,XMMWORD[16+rsp] + movdqu XMMWORD[r13],xmm15 + pxor xmm3,XMMWORD[32+rsp] + movdqu XMMWORD[16+r13],xmm0 + pxor xmm5,XMMWORD[48+rsp] + movdqu XMMWORD[32+r13],xmm3 + pxor xmm2,XMMWORD[64+rsp] + movdqu XMMWORD[48+r13],xmm5 + pxor xmm6,XMMWORD[80+rsp] + movdqu XMMWORD[64+r13],xmm2 + pxor xmm1,XMMWORD[96+rsp] + movdqu XMMWORD[80+r13],xmm6 + movdqu XMMWORD[96+r13],xmm1 + lea r13,[112+r13] + + movdqa xmm6,XMMWORD[112+rsp] + jmp NEAR $L$xts_enc_done +ALIGN 16 +$L$xts_enc_6: + pxor xmm3,xmm11 + lea r12,[96+r12] + pxor xmm4,xmm12 + lea rax,[128+rsp] + mov r10d,edx + + call _bsaes_encrypt8 + + pxor xmm15,XMMWORD[rsp] + pxor xmm0,XMMWORD[16+rsp] + movdqu XMMWORD[r13],xmm15 + pxor xmm3,XMMWORD[32+rsp] + movdqu XMMWORD[16+r13],xmm0 + pxor xmm5,XMMWORD[48+rsp] + movdqu XMMWORD[32+r13],xmm3 + pxor xmm2,XMMWORD[64+rsp] + movdqu XMMWORD[48+r13],xmm5 + pxor xmm6,XMMWORD[80+rsp] + movdqu XMMWORD[64+r13],xmm2 + movdqu XMMWORD[80+r13],xmm6 + lea r13,[96+r13] + + movdqa xmm6,XMMWORD[96+rsp] + jmp NEAR $L$xts_enc_done +ALIGN 16 +$L$xts_enc_5: + pxor xmm2,xmm10 + lea r12,[80+r12] + pxor xmm3,xmm11 + lea rax,[128+rsp] + mov r10d,edx + + call _bsaes_encrypt8 + + pxor xmm15,XMMWORD[rsp] + pxor xmm0,XMMWORD[16+rsp] + movdqu XMMWORD[r13],xmm15 + pxor xmm3,XMMWORD[32+rsp] + movdqu XMMWORD[16+r13],xmm0 + pxor xmm5,XMMWORD[48+rsp] + movdqu XMMWORD[32+r13],xmm3 + pxor xmm2,XMMWORD[64+rsp] + movdqu XMMWORD[48+r13],xmm5 + movdqu XMMWORD[64+r13],xmm2 + lea r13,[80+r13] + + movdqa xmm6,XMMWORD[80+rsp] + jmp NEAR $L$xts_enc_done +ALIGN 16 +$L$xts_enc_4: + pxor xmm1,xmm9 + lea r12,[64+r12] + pxor xmm2,xmm10 + lea rax,[128+rsp] + mov r10d,edx + + call _bsaes_encrypt8 + + pxor xmm15,XMMWORD[rsp] + pxor xmm0,XMMWORD[16+rsp] + movdqu XMMWORD[r13],xmm15 + pxor xmm3,XMMWORD[32+rsp] + movdqu XMMWORD[16+r13],xmm0 + pxor xmm5,XMMWORD[48+rsp] + movdqu XMMWORD[32+r13],xmm3 + movdqu XMMWORD[48+r13],xmm5 + lea r13,[64+r13] + + movdqa xmm6,XMMWORD[64+rsp] + jmp NEAR $L$xts_enc_done +ALIGN 16 +$L$xts_enc_3: + pxor xmm0,xmm8 + lea r12,[48+r12] + pxor xmm1,xmm9 + lea rax,[128+rsp] + mov r10d,edx + + call _bsaes_encrypt8 + + pxor xmm15,XMMWORD[rsp] + pxor xmm0,XMMWORD[16+rsp] + movdqu XMMWORD[r13],xmm15 + pxor xmm3,XMMWORD[32+rsp] + movdqu XMMWORD[16+r13],xmm0 + movdqu XMMWORD[32+r13],xmm3 + lea r13,[48+r13] + + movdqa xmm6,XMMWORD[48+rsp] + jmp NEAR $L$xts_enc_done +ALIGN 16 +$L$xts_enc_2: + pxor xmm15,xmm7 + lea r12,[32+r12] + pxor xmm0,xmm8 + lea rax,[128+rsp] + mov r10d,edx + + call _bsaes_encrypt8 + + pxor xmm15,XMMWORD[rsp] + pxor xmm0,XMMWORD[16+rsp] + movdqu XMMWORD[r13],xmm15 + movdqu XMMWORD[16+r13],xmm0 + lea r13,[32+r13] + + movdqa xmm6,XMMWORD[32+rsp] + jmp NEAR $L$xts_enc_done +ALIGN 16 +$L$xts_enc_1: + pxor xmm7,xmm15 + lea r12,[16+r12] + movdqa XMMWORD[32+rbp],xmm7 + lea rcx,[32+rbp] + lea rdx,[32+rbp] + lea r8,[r15] + call asm_AES_encrypt + pxor xmm15,XMMWORD[32+rbp] + + + + + + movdqu XMMWORD[r13],xmm15 + lea r13,[16+r13] + + movdqa xmm6,XMMWORD[16+rsp] + +$L$xts_enc_done: + and ebx,15 + jz NEAR $L$xts_enc_ret + mov rdx,r13 + +$L$xts_enc_steal: + movzx eax,BYTE[r12] + movzx ecx,BYTE[((-16))+rdx] + lea r12,[1+r12] + mov BYTE[((-16))+rdx],al + mov BYTE[rdx],cl + lea rdx,[1+rdx] + sub ebx,1 + jnz NEAR $L$xts_enc_steal + + movdqu xmm15,XMMWORD[((-16))+r13] + lea rcx,[32+rbp] + pxor xmm15,xmm6 + lea rdx,[32+rbp] + movdqa XMMWORD[32+rbp],xmm15 + lea r8,[r15] + call asm_AES_encrypt + pxor xmm6,XMMWORD[32+rbp] + movdqu XMMWORD[(-16)+r13],xmm6 + +$L$xts_enc_ret: + lea rax,[rsp] + pxor xmm0,xmm0 +$L$xts_enc_bzero: + movdqa XMMWORD[rax],xmm0 + movdqa XMMWORD[16+rax],xmm0 + lea rax,[32+rax] + cmp rbp,rax + ja NEAR $L$xts_enc_bzero + + lea rsp,[rbp] + movaps xmm6,XMMWORD[64+rbp] + movaps xmm7,XMMWORD[80+rbp] + movaps xmm8,XMMWORD[96+rbp] + movaps xmm9,XMMWORD[112+rbp] + movaps xmm10,XMMWORD[128+rbp] + movaps xmm11,XMMWORD[144+rbp] + movaps xmm12,XMMWORD[160+rbp] + movaps xmm13,XMMWORD[176+rbp] + movaps xmm14,XMMWORD[192+rbp] + movaps xmm15,XMMWORD[208+rbp] + lea rsp,[160+rbp] + mov r15,QWORD[72+rsp] + mov r14,QWORD[80+rsp] + mov r13,QWORD[88+rsp] + mov r12,QWORD[96+rsp] + mov rbx,QWORD[104+rsp] + mov rax,QWORD[112+rsp] + lea rsp,[120+rsp] + mov rbp,rax +$L$xts_enc_epilogue: + DB 0F3h,0C3h ;repret + + +global bsaes_xts_decrypt + +ALIGN 16 +bsaes_xts_decrypt: + mov rax,rsp +$L$xts_dec_prologue: + push rbp + push rbx + push r12 + push r13 + push r14 + push r15 + lea rsp,[((-72))+rsp] + mov r10,QWORD[160+rsp] + mov r11,QWORD[168+rsp] + lea rsp,[((-160))+rsp] + movaps XMMWORD[64+rsp],xmm6 + movaps XMMWORD[80+rsp],xmm7 + movaps XMMWORD[96+rsp],xmm8 + movaps XMMWORD[112+rsp],xmm9 + movaps XMMWORD[128+rsp],xmm10 + movaps XMMWORD[144+rsp],xmm11 + movaps XMMWORD[160+rsp],xmm12 + movaps XMMWORD[176+rsp],xmm13 + movaps XMMWORD[192+rsp],xmm14 + movaps XMMWORD[208+rsp],xmm15 +$L$xts_dec_body: + mov rbp,rsp + mov r12,rcx + mov r13,rdx + mov r14,r8 + mov r15,r9 + + lea rcx,[r11] + lea rdx,[32+rbp] + lea r8,[r10] + call asm_AES_encrypt + + mov eax,DWORD[240+r15] + mov rbx,r14 + + mov edx,eax + shl rax,7 + sub rax,96 + sub rsp,rax + + mov rax,rsp + mov rcx,r15 + mov r10d,edx + call _bsaes_key_convert + pxor xmm7,XMMWORD[rsp] + movdqa XMMWORD[rax],xmm6 + movdqa XMMWORD[rsp],xmm7 + + xor eax,eax + and r14,-16 + test ebx,15 + setnz al + shl rax,4 + sub r14,rax + + sub rsp,0x80 + movdqa xmm6,XMMWORD[32+rbp] + + pxor xmm14,xmm14 + movdqa xmm12,XMMWORD[$L$xts_magic] + pcmpgtd xmm14,xmm6 + + sub r14,0x80 + jc NEAR $L$xts_dec_short + jmp NEAR $L$xts_dec_loop + +ALIGN 16 +$L$xts_dec_loop: + pshufd xmm13,xmm14,0x13 + pxor xmm14,xmm14 + movdqa xmm15,xmm6 + movdqa XMMWORD[rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + pshufd xmm13,xmm14,0x13 + pxor xmm14,xmm14 + movdqa xmm0,xmm6 + movdqa XMMWORD[16+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm7,XMMWORD[r12] + pshufd xmm13,xmm14,0x13 + pxor xmm14,xmm14 + movdqa xmm1,xmm6 + movdqa XMMWORD[32+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm8,XMMWORD[16+r12] + pxor xmm15,xmm7 + pshufd xmm13,xmm14,0x13 + pxor xmm14,xmm14 + movdqa xmm2,xmm6 + movdqa XMMWORD[48+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm9,XMMWORD[32+r12] + pxor xmm0,xmm8 + pshufd xmm13,xmm14,0x13 + pxor xmm14,xmm14 + movdqa xmm3,xmm6 + movdqa XMMWORD[64+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm10,XMMWORD[48+r12] + pxor xmm1,xmm9 + pshufd xmm13,xmm14,0x13 + pxor xmm14,xmm14 + movdqa xmm4,xmm6 + movdqa XMMWORD[80+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm11,XMMWORD[64+r12] + pxor xmm2,xmm10 + pshufd xmm13,xmm14,0x13 + pxor xmm14,xmm14 + movdqa xmm5,xmm6 + movdqa XMMWORD[96+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm12,XMMWORD[80+r12] + pxor xmm3,xmm11 + movdqu xmm13,XMMWORD[96+r12] + pxor xmm4,xmm12 + movdqu xmm14,XMMWORD[112+r12] + lea r12,[128+r12] + movdqa XMMWORD[112+rsp],xmm6 + pxor xmm5,xmm13 + lea rax,[128+rsp] + pxor xmm6,xmm14 + mov r10d,edx + + call _bsaes_decrypt8 + + pxor xmm15,XMMWORD[rsp] + pxor xmm0,XMMWORD[16+rsp] + movdqu XMMWORD[r13],xmm15 + pxor xmm5,XMMWORD[32+rsp] + movdqu XMMWORD[16+r13],xmm0 + pxor xmm3,XMMWORD[48+rsp] + movdqu XMMWORD[32+r13],xmm5 + pxor xmm1,XMMWORD[64+rsp] + movdqu XMMWORD[48+r13],xmm3 + pxor xmm6,XMMWORD[80+rsp] + movdqu XMMWORD[64+r13],xmm1 + pxor xmm2,XMMWORD[96+rsp] + movdqu XMMWORD[80+r13],xmm6 + pxor xmm4,XMMWORD[112+rsp] + movdqu XMMWORD[96+r13],xmm2 + movdqu XMMWORD[112+r13],xmm4 + lea r13,[128+r13] + + movdqa xmm6,XMMWORD[112+rsp] + pxor xmm14,xmm14 + movdqa xmm12,XMMWORD[$L$xts_magic] + pcmpgtd xmm14,xmm6 + pshufd xmm13,xmm14,0x13 + pxor xmm14,xmm14 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + + sub r14,0x80 + jnc NEAR $L$xts_dec_loop + +$L$xts_dec_short: + add r14,0x80 + jz NEAR $L$xts_dec_done + pshufd xmm13,xmm14,0x13 + pxor xmm14,xmm14 + movdqa xmm15,xmm6 + movdqa XMMWORD[rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + pshufd xmm13,xmm14,0x13 + pxor xmm14,xmm14 + movdqa xmm0,xmm6 + movdqa XMMWORD[16+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm7,XMMWORD[r12] + cmp r14,16 + je NEAR $L$xts_dec_1 + pshufd xmm13,xmm14,0x13 + pxor xmm14,xmm14 + movdqa xmm1,xmm6 + movdqa XMMWORD[32+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm8,XMMWORD[16+r12] + cmp r14,32 + je NEAR $L$xts_dec_2 + pxor xmm15,xmm7 + pshufd xmm13,xmm14,0x13 + pxor xmm14,xmm14 + movdqa xmm2,xmm6 + movdqa XMMWORD[48+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm9,XMMWORD[32+r12] + cmp r14,48 + je NEAR $L$xts_dec_3 + pxor xmm0,xmm8 + pshufd xmm13,xmm14,0x13 + pxor xmm14,xmm14 + movdqa xmm3,xmm6 + movdqa XMMWORD[64+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm10,XMMWORD[48+r12] + cmp r14,64 + je NEAR $L$xts_dec_4 + pxor xmm1,xmm9 + pshufd xmm13,xmm14,0x13 + pxor xmm14,xmm14 + movdqa xmm4,xmm6 + movdqa XMMWORD[80+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm11,XMMWORD[64+r12] + cmp r14,80 + je NEAR $L$xts_dec_5 + pxor xmm2,xmm10 + pshufd xmm13,xmm14,0x13 + pxor xmm14,xmm14 + movdqa xmm5,xmm6 + movdqa XMMWORD[96+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm12,XMMWORD[80+r12] + cmp r14,96 + je NEAR $L$xts_dec_6 + pxor xmm3,xmm11 + movdqu xmm13,XMMWORD[96+r12] + pxor xmm4,xmm12 + movdqa XMMWORD[112+rsp],xmm6 + lea r12,[112+r12] + pxor xmm5,xmm13 + lea rax,[128+rsp] + mov r10d,edx + + call _bsaes_decrypt8 + + pxor xmm15,XMMWORD[rsp] + pxor xmm0,XMMWORD[16+rsp] + movdqu XMMWORD[r13],xmm15 + pxor xmm5,XMMWORD[32+rsp] + movdqu XMMWORD[16+r13],xmm0 + pxor xmm3,XMMWORD[48+rsp] + movdqu XMMWORD[32+r13],xmm5 + pxor xmm1,XMMWORD[64+rsp] + movdqu XMMWORD[48+r13],xmm3 + pxor xmm6,XMMWORD[80+rsp] + movdqu XMMWORD[64+r13],xmm1 + pxor xmm2,XMMWORD[96+rsp] + movdqu XMMWORD[80+r13],xmm6 + movdqu XMMWORD[96+r13],xmm2 + lea r13,[112+r13] + + movdqa xmm6,XMMWORD[112+rsp] + jmp NEAR $L$xts_dec_done +ALIGN 16 +$L$xts_dec_6: + pxor xmm3,xmm11 + lea r12,[96+r12] + pxor xmm4,xmm12 + lea rax,[128+rsp] + mov r10d,edx + + call _bsaes_decrypt8 + + pxor xmm15,XMMWORD[rsp] + pxor xmm0,XMMWORD[16+rsp] + movdqu XMMWORD[r13],xmm15 + pxor xmm5,XMMWORD[32+rsp] + movdqu XMMWORD[16+r13],xmm0 + pxor xmm3,XMMWORD[48+rsp] + movdqu XMMWORD[32+r13],xmm5 + pxor xmm1,XMMWORD[64+rsp] + movdqu XMMWORD[48+r13],xmm3 + pxor xmm6,XMMWORD[80+rsp] + movdqu XMMWORD[64+r13],xmm1 + movdqu XMMWORD[80+r13],xmm6 + lea r13,[96+r13] + + movdqa xmm6,XMMWORD[96+rsp] + jmp NEAR $L$xts_dec_done +ALIGN 16 +$L$xts_dec_5: + pxor xmm2,xmm10 + lea r12,[80+r12] + pxor xmm3,xmm11 + lea rax,[128+rsp] + mov r10d,edx + + call _bsaes_decrypt8 + + pxor xmm15,XMMWORD[rsp] + pxor xmm0,XMMWORD[16+rsp] + movdqu XMMWORD[r13],xmm15 + pxor xmm5,XMMWORD[32+rsp] + movdqu XMMWORD[16+r13],xmm0 + pxor xmm3,XMMWORD[48+rsp] + movdqu XMMWORD[32+r13],xmm5 + pxor xmm1,XMMWORD[64+rsp] + movdqu XMMWORD[48+r13],xmm3 + movdqu XMMWORD[64+r13],xmm1 + lea r13,[80+r13] + + movdqa xmm6,XMMWORD[80+rsp] + jmp NEAR $L$xts_dec_done +ALIGN 16 +$L$xts_dec_4: + pxor xmm1,xmm9 + lea r12,[64+r12] + pxor xmm2,xmm10 + lea rax,[128+rsp] + mov r10d,edx + + call _bsaes_decrypt8 + + pxor xmm15,XMMWORD[rsp] + pxor xmm0,XMMWORD[16+rsp] + movdqu XMMWORD[r13],xmm15 + pxor xmm5,XMMWORD[32+rsp] + movdqu XMMWORD[16+r13],xmm0 + pxor xmm3,XMMWORD[48+rsp] + movdqu XMMWORD[32+r13],xmm5 + movdqu XMMWORD[48+r13],xmm3 + lea r13,[64+r13] + + movdqa xmm6,XMMWORD[64+rsp] + jmp NEAR $L$xts_dec_done +ALIGN 16 +$L$xts_dec_3: + pxor xmm0,xmm8 + lea r12,[48+r12] + pxor xmm1,xmm9 + lea rax,[128+rsp] + mov r10d,edx + + call _bsaes_decrypt8 + + pxor xmm15,XMMWORD[rsp] + pxor xmm0,XMMWORD[16+rsp] + movdqu XMMWORD[r13],xmm15 + pxor xmm5,XMMWORD[32+rsp] + movdqu XMMWORD[16+r13],xmm0 + movdqu XMMWORD[32+r13],xmm5 + lea r13,[48+r13] + + movdqa xmm6,XMMWORD[48+rsp] + jmp NEAR $L$xts_dec_done +ALIGN 16 +$L$xts_dec_2: + pxor xmm15,xmm7 + lea r12,[32+r12] + pxor xmm0,xmm8 + lea rax,[128+rsp] + mov r10d,edx + + call _bsaes_decrypt8 + + pxor xmm15,XMMWORD[rsp] + pxor xmm0,XMMWORD[16+rsp] + movdqu XMMWORD[r13],xmm15 + movdqu XMMWORD[16+r13],xmm0 + lea r13,[32+r13] + + movdqa xmm6,XMMWORD[32+rsp] + jmp NEAR $L$xts_dec_done +ALIGN 16 +$L$xts_dec_1: + pxor xmm7,xmm15 + lea r12,[16+r12] + movdqa XMMWORD[32+rbp],xmm7 + lea rcx,[32+rbp] + lea rdx,[32+rbp] + lea r8,[r15] + call asm_AES_decrypt + pxor xmm15,XMMWORD[32+rbp] + + + + + + movdqu XMMWORD[r13],xmm15 + lea r13,[16+r13] + + movdqa xmm6,XMMWORD[16+rsp] + +$L$xts_dec_done: + and ebx,15 + jz NEAR $L$xts_dec_ret + + pxor xmm14,xmm14 + movdqa xmm12,XMMWORD[$L$xts_magic] + pcmpgtd xmm14,xmm6 + pshufd xmm13,xmm14,0x13 + movdqa xmm5,xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + movdqu xmm15,XMMWORD[r12] + pxor xmm6,xmm13 + + lea rcx,[32+rbp] + pxor xmm15,xmm6 + lea rdx,[32+rbp] + movdqa XMMWORD[32+rbp],xmm15 + lea r8,[r15] + call asm_AES_decrypt + pxor xmm6,XMMWORD[32+rbp] + mov rdx,r13 + movdqu XMMWORD[r13],xmm6 + +$L$xts_dec_steal: + movzx eax,BYTE[16+r12] + movzx ecx,BYTE[rdx] + lea r12,[1+r12] + mov BYTE[rdx],al + mov BYTE[16+rdx],cl + lea rdx,[1+rdx] + sub ebx,1 + jnz NEAR $L$xts_dec_steal + + movdqu xmm15,XMMWORD[r13] + lea rcx,[32+rbp] + pxor xmm15,xmm5 + lea rdx,[32+rbp] + movdqa XMMWORD[32+rbp],xmm15 + lea r8,[r15] + call asm_AES_decrypt + pxor xmm5,XMMWORD[32+rbp] + movdqu XMMWORD[r13],xmm5 + +$L$xts_dec_ret: + lea rax,[rsp] + pxor xmm0,xmm0 +$L$xts_dec_bzero: + movdqa XMMWORD[rax],xmm0 + movdqa XMMWORD[16+rax],xmm0 + lea rax,[32+rax] + cmp rbp,rax + ja NEAR $L$xts_dec_bzero + + lea rsp,[rbp] + movaps xmm6,XMMWORD[64+rbp] + movaps xmm7,XMMWORD[80+rbp] + movaps xmm8,XMMWORD[96+rbp] + movaps xmm9,XMMWORD[112+rbp] + movaps xmm10,XMMWORD[128+rbp] + movaps xmm11,XMMWORD[144+rbp] + movaps xmm12,XMMWORD[160+rbp] + movaps xmm13,XMMWORD[176+rbp] + movaps xmm14,XMMWORD[192+rbp] + movaps xmm15,XMMWORD[208+rbp] + lea rsp,[160+rbp] + mov r15,QWORD[72+rsp] + mov r14,QWORD[80+rsp] + mov r13,QWORD[88+rsp] + mov r12,QWORD[96+rsp] + mov rbx,QWORD[104+rsp] + mov rax,QWORD[112+rsp] + lea rsp,[120+rsp] + mov rbp,rax +$L$xts_dec_epilogue: + DB 0F3h,0C3h ;repret + + +ALIGN 64 +_bsaes_const: +$L$M0ISR: + DQ 0x0a0e0206070b0f03,0x0004080c0d010509 +$L$ISRM0: + DQ 0x01040b0e0205080f,0x0306090c00070a0d +$L$ISR: + DQ 0x0504070602010003,0x0f0e0d0c080b0a09 +$L$BS0: + DQ 0x5555555555555555,0x5555555555555555 +$L$BS1: + DQ 0x3333333333333333,0x3333333333333333 +$L$BS2: + DQ 0x0f0f0f0f0f0f0f0f,0x0f0f0f0f0f0f0f0f +$L$SR: + DQ 0x0504070600030201,0x0f0e0d0c0a09080b +$L$SRM0: + DQ 0x0304090e00050a0f,0x01060b0c0207080d +$L$M0SR: + DQ 0x0a0e02060f03070b,0x0004080c05090d01 +$L$SWPUP: + DQ 0x0706050403020100,0x0c0d0e0f0b0a0908 +$L$SWPUPM0SR: + DQ 0x0a0d02060c03070b,0x0004080f05090e01 +$L$ADD1: + DQ 0x0000000000000000,0x0000000100000000 +$L$ADD2: + DQ 0x0000000000000000,0x0000000200000000 +$L$ADD3: + DQ 0x0000000000000000,0x0000000300000000 +$L$ADD4: + DQ 0x0000000000000000,0x0000000400000000 +$L$ADD5: + DQ 0x0000000000000000,0x0000000500000000 +$L$ADD6: + DQ 0x0000000000000000,0x0000000600000000 +$L$ADD7: + DQ 0x0000000000000000,0x0000000700000000 +$L$ADD8: + DQ 0x0000000000000000,0x0000000800000000 +$L$xts_magic: + DD 0x87,0,1,0 +$L$masks: + DQ 0x0101010101010101,0x0101010101010101 + DQ 0x0202020202020202,0x0202020202020202 + DQ 0x0404040404040404,0x0404040404040404 + DQ 0x0808080808080808,0x0808080808080808 +$L$M0: + DQ 0x02060a0e03070b0f,0x0004080c0105090d +$L$63: + DQ 0x6363636363636363,0x6363636363636363 +DB 66,105,116,45,115,108,105,99,101,100,32,65,69,83,32,102 +DB 111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44 +DB 32,69,109,105,108,105,97,32,75,195,164,115,112,101,114,44 +DB 32,80,101,116,101,114,32,83,99,104,119,97,98,101,44,32 +DB 65,110,100,121,32,80,111,108,121,97,107,111,118,0 +ALIGN 64 + +EXTERN __imp_RtlVirtualUnwind + +ALIGN 16 +se_handler: + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD[120+r8] + mov rbx,QWORD[248+r8] + + mov rsi,QWORD[8+r9] + mov r11,QWORD[56+r9] + + mov r10d,DWORD[r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jb NEAR $L$in_prologue + + mov rax,QWORD[152+r8] + + mov r10d,DWORD[4+r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jae NEAR $L$in_prologue + + mov rax,QWORD[160+r8] + + lea rsi,[64+rax] + lea rdi,[512+r8] + mov ecx,20 + DD 0xa548f3fc + lea rax,[160+rax] + + mov rbp,QWORD[112+rax] + mov rbx,QWORD[104+rax] + mov r12,QWORD[96+rax] + mov r13,QWORD[88+rax] + mov r14,QWORD[80+rax] + mov r15,QWORD[72+rax] + lea rax,[120+rax] + mov QWORD[144+r8],rbx + mov QWORD[160+r8],rbp + mov QWORD[216+r8],r12 + mov QWORD[224+r8],r13 + mov QWORD[232+r8],r14 + mov QWORD[240+r8],r15 + +$L$in_prologue: + mov QWORD[152+r8],rax + + mov rdi,QWORD[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0xa548f3fc + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD[8+rsi] + mov r8,QWORD[rsi] + mov r9,QWORD[16+rsi] + mov r10,QWORD[40+rsi] + lea r11,[56+rsi] + lea r12,[24+rsi] + mov QWORD[32+rsp],r10 + mov QWORD[40+rsp],r11 + mov QWORD[48+rsp],r12 + mov QWORD[56+rsp],rcx + call QWORD[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret + + +section .pdata rdata align=4 +ALIGN 4 + DD $L$cbc_dec_prologue wrt ..imagebase + DD $L$cbc_dec_epilogue wrt ..imagebase + DD $L$cbc_dec_info wrt ..imagebase + + DD $L$ctr_enc_prologue wrt ..imagebase + DD $L$ctr_enc_epilogue wrt ..imagebase + DD $L$ctr_enc_info wrt ..imagebase + + DD $L$xts_enc_prologue wrt ..imagebase + DD $L$xts_enc_epilogue wrt ..imagebase + DD $L$xts_enc_info wrt ..imagebase + + DD $L$xts_dec_prologue wrt ..imagebase + DD $L$xts_dec_epilogue wrt ..imagebase + DD $L$xts_dec_info wrt ..imagebase + +section .xdata rdata align=8 +ALIGN 8 +$L$cbc_dec_info: +DB 9,0,0,0 + DD se_handler wrt ..imagebase + DD $L$cbc_dec_body wrt ..imagebase,$L$cbc_dec_epilogue wrt ..imagebase +$L$ctr_enc_info: +DB 9,0,0,0 + DD se_handler wrt ..imagebase + DD $L$ctr_enc_body wrt ..imagebase,$L$ctr_enc_epilogue wrt ..imagebase +$L$xts_enc_info: +DB 9,0,0,0 + DD se_handler wrt ..imagebase + DD $L$xts_enc_body wrt ..imagebase,$L$xts_enc_epilogue wrt ..imagebase +$L$xts_dec_info: +DB 9,0,0,0 + DD se_handler wrt ..imagebase + DD $L$xts_dec_body wrt ..imagebase,$L$xts_dec_epilogue wrt ..imagebase diff --git a/third_party/boringssl/win-x86_64/crypto/aes/vpaes-x86_64.asm b/third_party/boringssl/win-x86_64/crypto/aes/vpaes-x86_64.asm new file mode 100644 index 00000000000..3edde9fdbc3 --- /dev/null +++ b/third_party/boringssl/win-x86_64/crypto/aes/vpaes-x86_64.asm @@ -0,0 +1,1137 @@ +default rel +%define XMMWORD +%define YMMWORD +%define ZMMWORD +section .text code align=64 + + + + + + + + + + + + + + + + + + +ALIGN 16 +_vpaes_encrypt_core: + mov r9,rdx + mov r11,16 + mov eax,DWORD[240+rdx] + movdqa xmm1,xmm9 + movdqa xmm2,XMMWORD[$L$k_ipt] + pandn xmm1,xmm0 + movdqu xmm5,XMMWORD[r9] + psrld xmm1,4 + pand xmm0,xmm9 +DB 102,15,56,0,208 + movdqa xmm0,XMMWORD[(($L$k_ipt+16))] +DB 102,15,56,0,193 + pxor xmm2,xmm5 + add r9,16 + pxor xmm0,xmm2 + lea r10,[$L$k_mc_backward] + jmp NEAR $L$enc_entry + +ALIGN 16 +$L$enc_loop: + + movdqa xmm4,xmm13 + movdqa xmm0,xmm12 +DB 102,15,56,0,226 +DB 102,15,56,0,195 + pxor xmm4,xmm5 + movdqa xmm5,xmm15 + pxor xmm0,xmm4 + movdqa xmm1,XMMWORD[((-64))+r10*1+r11] +DB 102,15,56,0,234 + movdqa xmm4,XMMWORD[r10*1+r11] + movdqa xmm2,xmm14 +DB 102,15,56,0,211 + movdqa xmm3,xmm0 + pxor xmm2,xmm5 +DB 102,15,56,0,193 + add r9,16 + pxor xmm0,xmm2 +DB 102,15,56,0,220 + add r11,16 + pxor xmm3,xmm0 +DB 102,15,56,0,193 + and r11,0x30 + sub rax,1 + pxor xmm0,xmm3 + +$L$enc_entry: + + movdqa xmm1,xmm9 + movdqa xmm5,xmm11 + pandn xmm1,xmm0 + psrld xmm1,4 + pand xmm0,xmm9 +DB 102,15,56,0,232 + movdqa xmm3,xmm10 + pxor xmm0,xmm1 +DB 102,15,56,0,217 + movdqa xmm4,xmm10 + pxor xmm3,xmm5 +DB 102,15,56,0,224 + movdqa xmm2,xmm10 + pxor xmm4,xmm5 +DB 102,15,56,0,211 + movdqa xmm3,xmm10 + pxor xmm2,xmm0 +DB 102,15,56,0,220 + movdqu xmm5,XMMWORD[r9] + pxor xmm3,xmm1 + jnz NEAR $L$enc_loop + + + movdqa xmm4,XMMWORD[((-96))+r10] + movdqa xmm0,XMMWORD[((-80))+r10] +DB 102,15,56,0,226 + pxor xmm4,xmm5 +DB 102,15,56,0,195 + movdqa xmm1,XMMWORD[64+r10*1+r11] + pxor xmm0,xmm4 +DB 102,15,56,0,193 + DB 0F3h,0C3h ;repret + + + + + + + + +ALIGN 16 +_vpaes_decrypt_core: + mov r9,rdx + mov eax,DWORD[240+rdx] + movdqa xmm1,xmm9 + movdqa xmm2,XMMWORD[$L$k_dipt] + pandn xmm1,xmm0 + mov r11,rax + psrld xmm1,4 + movdqu xmm5,XMMWORD[r9] + shl r11,4 + pand xmm0,xmm9 +DB 102,15,56,0,208 + movdqa xmm0,XMMWORD[(($L$k_dipt+16))] + xor r11,0x30 + lea r10,[$L$k_dsbd] +DB 102,15,56,0,193 + and r11,0x30 + pxor xmm2,xmm5 + movdqa xmm5,XMMWORD[(($L$k_mc_forward+48))] + pxor xmm0,xmm2 + add r9,16 + add r11,r10 + jmp NEAR $L$dec_entry + +ALIGN 16 +$L$dec_loop: + + + + movdqa xmm4,XMMWORD[((-32))+r10] + movdqa xmm1,XMMWORD[((-16))+r10] +DB 102,15,56,0,226 +DB 102,15,56,0,203 + pxor xmm0,xmm4 + movdqa xmm4,XMMWORD[r10] + pxor xmm0,xmm1 + movdqa xmm1,XMMWORD[16+r10] + +DB 102,15,56,0,226 +DB 102,15,56,0,197 +DB 102,15,56,0,203 + pxor xmm0,xmm4 + movdqa xmm4,XMMWORD[32+r10] + pxor xmm0,xmm1 + movdqa xmm1,XMMWORD[48+r10] + +DB 102,15,56,0,226 +DB 102,15,56,0,197 +DB 102,15,56,0,203 + pxor xmm0,xmm4 + movdqa xmm4,XMMWORD[64+r10] + pxor xmm0,xmm1 + movdqa xmm1,XMMWORD[80+r10] + +DB 102,15,56,0,226 +DB 102,15,56,0,197 +DB 102,15,56,0,203 + pxor xmm0,xmm4 + add r9,16 +DB 102,15,58,15,237,12 + pxor xmm0,xmm1 + sub rax,1 + +$L$dec_entry: + + movdqa xmm1,xmm9 + pandn xmm1,xmm0 + movdqa xmm2,xmm11 + psrld xmm1,4 + pand xmm0,xmm9 +DB 102,15,56,0,208 + movdqa xmm3,xmm10 + pxor xmm0,xmm1 +DB 102,15,56,0,217 + movdqa xmm4,xmm10 + pxor xmm3,xmm2 +DB 102,15,56,0,224 + pxor xmm4,xmm2 + movdqa xmm2,xmm10 +DB 102,15,56,0,211 + movdqa xmm3,xmm10 + pxor xmm2,xmm0 +DB 102,15,56,0,220 + movdqu xmm0,XMMWORD[r9] + pxor xmm3,xmm1 + jnz NEAR $L$dec_loop + + + movdqa xmm4,XMMWORD[96+r10] +DB 102,15,56,0,226 + pxor xmm4,xmm0 + movdqa xmm0,XMMWORD[112+r10] + movdqa xmm2,XMMWORD[((-352))+r11] +DB 102,15,56,0,195 + pxor xmm0,xmm4 +DB 102,15,56,0,194 + DB 0F3h,0C3h ;repret + + + + + + + + +ALIGN 16 +_vpaes_schedule_core: + + + + + + call _vpaes_preheat + movdqa xmm8,XMMWORD[$L$k_rcon] + movdqu xmm0,XMMWORD[rdi] + + + movdqa xmm3,xmm0 + lea r11,[$L$k_ipt] + call _vpaes_schedule_transform + movdqa xmm7,xmm0 + + lea r10,[$L$k_sr] + test rcx,rcx + jnz NEAR $L$schedule_am_decrypting + + + movdqu XMMWORD[rdx],xmm0 + jmp NEAR $L$schedule_go + +$L$schedule_am_decrypting: + + movdqa xmm1,XMMWORD[r10*1+r8] +DB 102,15,56,0,217 + movdqu XMMWORD[rdx],xmm3 + xor r8,0x30 + +$L$schedule_go: + cmp esi,192 + ja NEAR $L$schedule_256 + je NEAR $L$schedule_192 + + + + + + + + + + +$L$schedule_128: + mov esi,10 + +$L$oop_schedule_128: + call _vpaes_schedule_round + dec rsi + jz NEAR $L$schedule_mangle_last + call _vpaes_schedule_mangle + jmp NEAR $L$oop_schedule_128 + + + + + + + + + + + + + + + + +ALIGN 16 +$L$schedule_192: + movdqu xmm0,XMMWORD[8+rdi] + call _vpaes_schedule_transform + movdqa xmm6,xmm0 + pxor xmm4,xmm4 + movhlps xmm6,xmm4 + mov esi,4 + +$L$oop_schedule_192: + call _vpaes_schedule_round +DB 102,15,58,15,198,8 + call _vpaes_schedule_mangle + call _vpaes_schedule_192_smear + call _vpaes_schedule_mangle + call _vpaes_schedule_round + dec rsi + jz NEAR $L$schedule_mangle_last + call _vpaes_schedule_mangle + call _vpaes_schedule_192_smear + jmp NEAR $L$oop_schedule_192 + + + + + + + + + + + +ALIGN 16 +$L$schedule_256: + movdqu xmm0,XMMWORD[16+rdi] + call _vpaes_schedule_transform + mov esi,7 + +$L$oop_schedule_256: + call _vpaes_schedule_mangle + movdqa xmm6,xmm0 + + + call _vpaes_schedule_round + dec rsi + jz NEAR $L$schedule_mangle_last + call _vpaes_schedule_mangle + + + pshufd xmm0,xmm0,0xFF + movdqa xmm5,xmm7 + movdqa xmm7,xmm6 + call _vpaes_schedule_low_round + movdqa xmm7,xmm5 + + jmp NEAR $L$oop_schedule_256 + + + + + + + + + + + + +ALIGN 16 +$L$schedule_mangle_last: + + lea r11,[$L$k_deskew] + test rcx,rcx + jnz NEAR $L$schedule_mangle_last_dec + + + movdqa xmm1,XMMWORD[r10*1+r8] +DB 102,15,56,0,193 + lea r11,[$L$k_opt] + add rdx,32 + +$L$schedule_mangle_last_dec: + add rdx,-16 + pxor xmm0,XMMWORD[$L$k_s63] + call _vpaes_schedule_transform + movdqu XMMWORD[rdx],xmm0 + + + pxor xmm0,xmm0 + pxor xmm1,xmm1 + pxor xmm2,xmm2 + pxor xmm3,xmm3 + pxor xmm4,xmm4 + pxor xmm5,xmm5 + pxor xmm6,xmm6 + pxor xmm7,xmm7 + DB 0F3h,0C3h ;repret + + + + + + + + + + + + + + + + + +ALIGN 16 +_vpaes_schedule_192_smear: + pshufd xmm1,xmm6,0x80 + pshufd xmm0,xmm7,0xFE + pxor xmm6,xmm1 + pxor xmm1,xmm1 + pxor xmm6,xmm0 + movdqa xmm0,xmm6 + movhlps xmm6,xmm1 + DB 0F3h,0C3h ;repret + + + + + + + + + + + + + + + + + + + + + +ALIGN 16 +_vpaes_schedule_round: + + pxor xmm1,xmm1 +DB 102,65,15,58,15,200,15 +DB 102,69,15,58,15,192,15 + pxor xmm7,xmm1 + + + pshufd xmm0,xmm0,0xFF +DB 102,15,58,15,192,1 + + + + +_vpaes_schedule_low_round: + + movdqa xmm1,xmm7 + pslldq xmm7,4 + pxor xmm7,xmm1 + movdqa xmm1,xmm7 + pslldq xmm7,8 + pxor xmm7,xmm1 + pxor xmm7,XMMWORD[$L$k_s63] + + + movdqa xmm1,xmm9 + pandn xmm1,xmm0 + psrld xmm1,4 + pand xmm0,xmm9 + movdqa xmm2,xmm11 +DB 102,15,56,0,208 + pxor xmm0,xmm1 + movdqa xmm3,xmm10 +DB 102,15,56,0,217 + pxor xmm3,xmm2 + movdqa xmm4,xmm10 +DB 102,15,56,0,224 + pxor xmm4,xmm2 + movdqa xmm2,xmm10 +DB 102,15,56,0,211 + pxor xmm2,xmm0 + movdqa xmm3,xmm10 +DB 102,15,56,0,220 + pxor xmm3,xmm1 + movdqa xmm4,xmm13 +DB 102,15,56,0,226 + movdqa xmm0,xmm12 +DB 102,15,56,0,195 + pxor xmm0,xmm4 + + + pxor xmm0,xmm7 + movdqa xmm7,xmm0 + DB 0F3h,0C3h ;repret + + + + + + + + + + + + +ALIGN 16 +_vpaes_schedule_transform: + movdqa xmm1,xmm9 + pandn xmm1,xmm0 + psrld xmm1,4 + pand xmm0,xmm9 + movdqa xmm2,XMMWORD[r11] +DB 102,15,56,0,208 + movdqa xmm0,XMMWORD[16+r11] +DB 102,15,56,0,193 + pxor xmm0,xmm2 + DB 0F3h,0C3h ;repret + + + + + + + + + + + + + + + + + + + + + + + + + + +ALIGN 16 +_vpaes_schedule_mangle: + movdqa xmm4,xmm0 + movdqa xmm5,XMMWORD[$L$k_mc_forward] + test rcx,rcx + jnz NEAR $L$schedule_mangle_dec + + + add rdx,16 + pxor xmm4,XMMWORD[$L$k_s63] +DB 102,15,56,0,229 + movdqa xmm3,xmm4 +DB 102,15,56,0,229 + pxor xmm3,xmm4 +DB 102,15,56,0,229 + pxor xmm3,xmm4 + + jmp NEAR $L$schedule_mangle_both +ALIGN 16 +$L$schedule_mangle_dec: + + lea r11,[$L$k_dksd] + movdqa xmm1,xmm9 + pandn xmm1,xmm4 + psrld xmm1,4 + pand xmm4,xmm9 + + movdqa xmm2,XMMWORD[r11] +DB 102,15,56,0,212 + movdqa xmm3,XMMWORD[16+r11] +DB 102,15,56,0,217 + pxor xmm3,xmm2 +DB 102,15,56,0,221 + + movdqa xmm2,XMMWORD[32+r11] +DB 102,15,56,0,212 + pxor xmm2,xmm3 + movdqa xmm3,XMMWORD[48+r11] +DB 102,15,56,0,217 + pxor xmm3,xmm2 +DB 102,15,56,0,221 + + movdqa xmm2,XMMWORD[64+r11] +DB 102,15,56,0,212 + pxor xmm2,xmm3 + movdqa xmm3,XMMWORD[80+r11] +DB 102,15,56,0,217 + pxor xmm3,xmm2 +DB 102,15,56,0,221 + + movdqa xmm2,XMMWORD[96+r11] +DB 102,15,56,0,212 + pxor xmm2,xmm3 + movdqa xmm3,XMMWORD[112+r11] +DB 102,15,56,0,217 + pxor xmm3,xmm2 + + add rdx,-16 + +$L$schedule_mangle_both: + movdqa xmm1,XMMWORD[r10*1+r8] +DB 102,15,56,0,217 + add r8,-16 + and r8,0x30 + movdqu XMMWORD[rdx],xmm3 + DB 0F3h,0C3h ;repret + + + + + +global vpaes_set_encrypt_key + +ALIGN 16 +vpaes_set_encrypt_key: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_vpaes_set_encrypt_key: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + lea rsp,[((-184))+rsp] + movaps XMMWORD[16+rsp],xmm6 + movaps XMMWORD[32+rsp],xmm7 + movaps XMMWORD[48+rsp],xmm8 + movaps XMMWORD[64+rsp],xmm9 + movaps XMMWORD[80+rsp],xmm10 + movaps XMMWORD[96+rsp],xmm11 + movaps XMMWORD[112+rsp],xmm12 + movaps XMMWORD[128+rsp],xmm13 + movaps XMMWORD[144+rsp],xmm14 + movaps XMMWORD[160+rsp],xmm15 +$L$enc_key_body: + mov eax,esi + shr eax,5 + add eax,5 + mov DWORD[240+rdx],eax + + mov ecx,0 + mov r8d,0x30 + call _vpaes_schedule_core + movaps xmm6,XMMWORD[16+rsp] + movaps xmm7,XMMWORD[32+rsp] + movaps xmm8,XMMWORD[48+rsp] + movaps xmm9,XMMWORD[64+rsp] + movaps xmm10,XMMWORD[80+rsp] + movaps xmm11,XMMWORD[96+rsp] + movaps xmm12,XMMWORD[112+rsp] + movaps xmm13,XMMWORD[128+rsp] + movaps xmm14,XMMWORD[144+rsp] + movaps xmm15,XMMWORD[160+rsp] + lea rsp,[184+rsp] +$L$enc_key_epilogue: + xor eax,eax + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_vpaes_set_encrypt_key: + +global vpaes_set_decrypt_key + +ALIGN 16 +vpaes_set_decrypt_key: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_vpaes_set_decrypt_key: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + lea rsp,[((-184))+rsp] + movaps XMMWORD[16+rsp],xmm6 + movaps XMMWORD[32+rsp],xmm7 + movaps XMMWORD[48+rsp],xmm8 + movaps XMMWORD[64+rsp],xmm9 + movaps XMMWORD[80+rsp],xmm10 + movaps XMMWORD[96+rsp],xmm11 + movaps XMMWORD[112+rsp],xmm12 + movaps XMMWORD[128+rsp],xmm13 + movaps XMMWORD[144+rsp],xmm14 + movaps XMMWORD[160+rsp],xmm15 +$L$dec_key_body: + mov eax,esi + shr eax,5 + add eax,5 + mov DWORD[240+rdx],eax + shl eax,4 + lea rdx,[16+rax*1+rdx] + + mov ecx,1 + mov r8d,esi + shr r8d,1 + and r8d,32 + xor r8d,32 + call _vpaes_schedule_core + movaps xmm6,XMMWORD[16+rsp] + movaps xmm7,XMMWORD[32+rsp] + movaps xmm8,XMMWORD[48+rsp] + movaps xmm9,XMMWORD[64+rsp] + movaps xmm10,XMMWORD[80+rsp] + movaps xmm11,XMMWORD[96+rsp] + movaps xmm12,XMMWORD[112+rsp] + movaps xmm13,XMMWORD[128+rsp] + movaps xmm14,XMMWORD[144+rsp] + movaps xmm15,XMMWORD[160+rsp] + lea rsp,[184+rsp] +$L$dec_key_epilogue: + xor eax,eax + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_vpaes_set_decrypt_key: + +global vpaes_encrypt + +ALIGN 16 +vpaes_encrypt: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_vpaes_encrypt: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + lea rsp,[((-184))+rsp] + movaps XMMWORD[16+rsp],xmm6 + movaps XMMWORD[32+rsp],xmm7 + movaps XMMWORD[48+rsp],xmm8 + movaps XMMWORD[64+rsp],xmm9 + movaps XMMWORD[80+rsp],xmm10 + movaps XMMWORD[96+rsp],xmm11 + movaps XMMWORD[112+rsp],xmm12 + movaps XMMWORD[128+rsp],xmm13 + movaps XMMWORD[144+rsp],xmm14 + movaps XMMWORD[160+rsp],xmm15 +$L$enc_body: + movdqu xmm0,XMMWORD[rdi] + call _vpaes_preheat + call _vpaes_encrypt_core + movdqu XMMWORD[rsi],xmm0 + movaps xmm6,XMMWORD[16+rsp] + movaps xmm7,XMMWORD[32+rsp] + movaps xmm8,XMMWORD[48+rsp] + movaps xmm9,XMMWORD[64+rsp] + movaps xmm10,XMMWORD[80+rsp] + movaps xmm11,XMMWORD[96+rsp] + movaps xmm12,XMMWORD[112+rsp] + movaps xmm13,XMMWORD[128+rsp] + movaps xmm14,XMMWORD[144+rsp] + movaps xmm15,XMMWORD[160+rsp] + lea rsp,[184+rsp] +$L$enc_epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_vpaes_encrypt: + +global vpaes_decrypt + +ALIGN 16 +vpaes_decrypt: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_vpaes_decrypt: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + lea rsp,[((-184))+rsp] + movaps XMMWORD[16+rsp],xmm6 + movaps XMMWORD[32+rsp],xmm7 + movaps XMMWORD[48+rsp],xmm8 + movaps XMMWORD[64+rsp],xmm9 + movaps XMMWORD[80+rsp],xmm10 + movaps XMMWORD[96+rsp],xmm11 + movaps XMMWORD[112+rsp],xmm12 + movaps XMMWORD[128+rsp],xmm13 + movaps XMMWORD[144+rsp],xmm14 + movaps XMMWORD[160+rsp],xmm15 +$L$dec_body: + movdqu xmm0,XMMWORD[rdi] + call _vpaes_preheat + call _vpaes_decrypt_core + movdqu XMMWORD[rsi],xmm0 + movaps xmm6,XMMWORD[16+rsp] + movaps xmm7,XMMWORD[32+rsp] + movaps xmm8,XMMWORD[48+rsp] + movaps xmm9,XMMWORD[64+rsp] + movaps xmm10,XMMWORD[80+rsp] + movaps xmm11,XMMWORD[96+rsp] + movaps xmm12,XMMWORD[112+rsp] + movaps xmm13,XMMWORD[128+rsp] + movaps xmm14,XMMWORD[144+rsp] + movaps xmm15,XMMWORD[160+rsp] + lea rsp,[184+rsp] +$L$dec_epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_vpaes_decrypt: +global vpaes_cbc_encrypt + +ALIGN 16 +vpaes_cbc_encrypt: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_vpaes_cbc_encrypt: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD[40+rsp] + mov r9,QWORD[48+rsp] + + + xchg rdx,rcx + sub rcx,16 + jc NEAR $L$cbc_abort + lea rsp,[((-184))+rsp] + movaps XMMWORD[16+rsp],xmm6 + movaps XMMWORD[32+rsp],xmm7 + movaps XMMWORD[48+rsp],xmm8 + movaps XMMWORD[64+rsp],xmm9 + movaps XMMWORD[80+rsp],xmm10 + movaps XMMWORD[96+rsp],xmm11 + movaps XMMWORD[112+rsp],xmm12 + movaps XMMWORD[128+rsp],xmm13 + movaps XMMWORD[144+rsp],xmm14 + movaps XMMWORD[160+rsp],xmm15 +$L$cbc_body: + movdqu xmm6,XMMWORD[r8] + sub rsi,rdi + call _vpaes_preheat + cmp r9d,0 + je NEAR $L$cbc_dec_loop + jmp NEAR $L$cbc_enc_loop +ALIGN 16 +$L$cbc_enc_loop: + movdqu xmm0,XMMWORD[rdi] + pxor xmm0,xmm6 + call _vpaes_encrypt_core + movdqa xmm6,xmm0 + movdqu XMMWORD[rdi*1+rsi],xmm0 + lea rdi,[16+rdi] + sub rcx,16 + jnc NEAR $L$cbc_enc_loop + jmp NEAR $L$cbc_done +ALIGN 16 +$L$cbc_dec_loop: + movdqu xmm0,XMMWORD[rdi] + movdqa xmm7,xmm0 + call _vpaes_decrypt_core + pxor xmm0,xmm6 + movdqa xmm6,xmm7 + movdqu XMMWORD[rdi*1+rsi],xmm0 + lea rdi,[16+rdi] + sub rcx,16 + jnc NEAR $L$cbc_dec_loop +$L$cbc_done: + movdqu XMMWORD[r8],xmm6 + movaps xmm6,XMMWORD[16+rsp] + movaps xmm7,XMMWORD[32+rsp] + movaps xmm8,XMMWORD[48+rsp] + movaps xmm9,XMMWORD[64+rsp] + movaps xmm10,XMMWORD[80+rsp] + movaps xmm11,XMMWORD[96+rsp] + movaps xmm12,XMMWORD[112+rsp] + movaps xmm13,XMMWORD[128+rsp] + movaps xmm14,XMMWORD[144+rsp] + movaps xmm15,XMMWORD[160+rsp] + lea rsp,[184+rsp] +$L$cbc_epilogue: +$L$cbc_abort: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_vpaes_cbc_encrypt: + + + + + + + +ALIGN 16 +_vpaes_preheat: + lea r10,[$L$k_s0F] + movdqa xmm10,XMMWORD[((-32))+r10] + movdqa xmm11,XMMWORD[((-16))+r10] + movdqa xmm9,XMMWORD[r10] + movdqa xmm13,XMMWORD[48+r10] + movdqa xmm12,XMMWORD[64+r10] + movdqa xmm15,XMMWORD[80+r10] + movdqa xmm14,XMMWORD[96+r10] + DB 0F3h,0C3h ;repret + + + + + + + +ALIGN 64 +_vpaes_consts: +$L$k_inv: + DQ 0x0E05060F0D080180,0x040703090A0B0C02 + DQ 0x01040A060F0B0780,0x030D0E0C02050809 + +$L$k_s0F: + DQ 0x0F0F0F0F0F0F0F0F,0x0F0F0F0F0F0F0F0F + +$L$k_ipt: + DQ 0xC2B2E8985A2A7000,0xCABAE09052227808 + DQ 0x4C01307D317C4D00,0xCD80B1FCB0FDCC81 + +$L$k_sb1: + DQ 0xB19BE18FCB503E00,0xA5DF7A6E142AF544 + DQ 0x3618D415FAE22300,0x3BF7CCC10D2ED9EF +$L$k_sb2: + DQ 0xE27A93C60B712400,0x5EB7E955BC982FCD + DQ 0x69EB88400AE12900,0xC2A163C8AB82234A +$L$k_sbo: + DQ 0xD0D26D176FBDC700,0x15AABF7AC502A878 + DQ 0xCFE474A55FBB6A00,0x8E1E90D1412B35FA + +$L$k_mc_forward: + DQ 0x0407060500030201,0x0C0F0E0D080B0A09 + DQ 0x080B0A0904070605,0x000302010C0F0E0D + DQ 0x0C0F0E0D080B0A09,0x0407060500030201 + DQ 0x000302010C0F0E0D,0x080B0A0904070605 + +$L$k_mc_backward: + DQ 0x0605040702010003,0x0E0D0C0F0A09080B + DQ 0x020100030E0D0C0F,0x0A09080B06050407 + DQ 0x0E0D0C0F0A09080B,0x0605040702010003 + DQ 0x0A09080B06050407,0x020100030E0D0C0F + +$L$k_sr: + DQ 0x0706050403020100,0x0F0E0D0C0B0A0908 + DQ 0x030E09040F0A0500,0x0B06010C07020D08 + DQ 0x0F060D040B020900,0x070E050C030A0108 + DQ 0x0B0E0104070A0D00,0x0306090C0F020508 + +$L$k_rcon: + DQ 0x1F8391B9AF9DEEB6,0x702A98084D7C7D81 + +$L$k_s63: + DQ 0x5B5B5B5B5B5B5B5B,0x5B5B5B5B5B5B5B5B + +$L$k_opt: + DQ 0xFF9F4929D6B66000,0xF7974121DEBE6808 + DQ 0x01EDBD5150BCEC00,0xE10D5DB1B05C0CE0 + +$L$k_deskew: + DQ 0x07E4A34047A4E300,0x1DFEB95A5DBEF91A + DQ 0x5F36B5DC83EA6900,0x2841C2ABF49D1E77 + + + + + +$L$k_dksd: + DQ 0xFEB91A5DA3E44700,0x0740E3A45A1DBEF9 + DQ 0x41C277F4B5368300,0x5FDC69EAAB289D1E +$L$k_dksb: + DQ 0x9A4FCA1F8550D500,0x03D653861CC94C99 + DQ 0x115BEDA7B6FC4A00,0xD993256F7E3482C8 +$L$k_dkse: + DQ 0xD5031CCA1FC9D600,0x53859A4C994F5086 + DQ 0xA23196054FDC7BE8,0xCD5EF96A20B31487 +$L$k_dks9: + DQ 0xB6116FC87ED9A700,0x4AED933482255BFC + DQ 0x4576516227143300,0x8BB89FACE9DAFDCE + + + + + +$L$k_dipt: + DQ 0x0F505B040B545F00,0x154A411E114E451A + DQ 0x86E383E660056500,0x12771772F491F194 + +$L$k_dsb9: + DQ 0x851C03539A86D600,0xCAD51F504F994CC9 + DQ 0xC03B1789ECD74900,0x725E2C9EB2FBA565 +$L$k_dsbd: + DQ 0x7D57CCDFE6B1A200,0xF56E9B13882A4439 + DQ 0x3CE2FAF724C6CB00,0x2931180D15DEEFD3 +$L$k_dsbb: + DQ 0xD022649296B44200,0x602646F6B0F2D404 + DQ 0xC19498A6CD596700,0xF3FF0C3E3255AA6B +$L$k_dsbe: + DQ 0x46F2929626D4D000,0x2242600464B4F6B0 + DQ 0x0C55A6CDFFAAC100,0x9467F36B98593E32 +$L$k_dsbo: + DQ 0x1387EA537EF94000,0xC7AA6DB9D4943E2D + DQ 0x12D7560F93441D00,0xCA4B8159D8C58E9C +DB 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105 +DB 111,110,32,65,69,83,32,102,111,114,32,120,56,54,95,54 +DB 52,47,83,83,83,69,51,44,32,77,105,107,101,32,72,97 +DB 109,98,117,114,103,32,40,83,116,97,110,102,111,114,100,32 +DB 85,110,105,118,101,114,115,105,116,121,41,0 +ALIGN 64 + +EXTERN __imp_RtlVirtualUnwind + +ALIGN 16 +se_handler: + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD[120+r8] + mov rbx,QWORD[248+r8] + + mov rsi,QWORD[8+r9] + mov r11,QWORD[56+r9] + + mov r10d,DWORD[r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jb NEAR $L$in_prologue + + mov rax,QWORD[152+r8] + + mov r10d,DWORD[4+r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jae NEAR $L$in_prologue + + lea rsi,[16+rax] + lea rdi,[512+r8] + mov ecx,20 + DD 0xa548f3fc + lea rax,[184+rax] + +$L$in_prologue: + mov rdi,QWORD[8+rax] + mov rsi,QWORD[16+rax] + mov QWORD[152+r8],rax + mov QWORD[168+r8],rsi + mov QWORD[176+r8],rdi + + mov rdi,QWORD[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0xa548f3fc + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD[8+rsi] + mov r8,QWORD[rsi] + mov r9,QWORD[16+rsi] + mov r10,QWORD[40+rsi] + lea r11,[56+rsi] + lea r12,[24+rsi] + mov QWORD[32+rsp],r10 + mov QWORD[40+rsp],r11 + mov QWORD[48+rsp],r12 + mov QWORD[56+rsp],rcx + call QWORD[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret + + +section .pdata rdata align=4 +ALIGN 4 + DD $L$SEH_begin_vpaes_set_encrypt_key wrt ..imagebase + DD $L$SEH_end_vpaes_set_encrypt_key wrt ..imagebase + DD $L$SEH_info_vpaes_set_encrypt_key wrt ..imagebase + + DD $L$SEH_begin_vpaes_set_decrypt_key wrt ..imagebase + DD $L$SEH_end_vpaes_set_decrypt_key wrt ..imagebase + DD $L$SEH_info_vpaes_set_decrypt_key wrt ..imagebase + + DD $L$SEH_begin_vpaes_encrypt wrt ..imagebase + DD $L$SEH_end_vpaes_encrypt wrt ..imagebase + DD $L$SEH_info_vpaes_encrypt wrt ..imagebase + + DD $L$SEH_begin_vpaes_decrypt wrt ..imagebase + DD $L$SEH_end_vpaes_decrypt wrt ..imagebase + DD $L$SEH_info_vpaes_decrypt wrt ..imagebase + + DD $L$SEH_begin_vpaes_cbc_encrypt wrt ..imagebase + DD $L$SEH_end_vpaes_cbc_encrypt wrt ..imagebase + DD $L$SEH_info_vpaes_cbc_encrypt wrt ..imagebase + +section .xdata rdata align=8 +ALIGN 8 +$L$SEH_info_vpaes_set_encrypt_key: +DB 9,0,0,0 + DD se_handler wrt ..imagebase + DD $L$enc_key_body wrt ..imagebase,$L$enc_key_epilogue wrt ..imagebase +$L$SEH_info_vpaes_set_decrypt_key: +DB 9,0,0,0 + DD se_handler wrt ..imagebase + DD $L$dec_key_body wrt ..imagebase,$L$dec_key_epilogue wrt ..imagebase +$L$SEH_info_vpaes_encrypt: +DB 9,0,0,0 + DD se_handler wrt ..imagebase + DD $L$enc_body wrt ..imagebase,$L$enc_epilogue wrt ..imagebase +$L$SEH_info_vpaes_decrypt: +DB 9,0,0,0 + DD se_handler wrt ..imagebase + DD $L$dec_body wrt ..imagebase,$L$dec_epilogue wrt ..imagebase +$L$SEH_info_vpaes_cbc_encrypt: +DB 9,0,0,0 + DD se_handler wrt ..imagebase + DD $L$cbc_body wrt ..imagebase,$L$cbc_epilogue wrt ..imagebase diff --git a/third_party/boringssl/win-x86_64/crypto/bn/rsaz-avx2.asm b/third_party/boringssl/win-x86_64/crypto/bn/rsaz-avx2.asm new file mode 100644 index 00000000000..45d0fd46320 --- /dev/null +++ b/third_party/boringssl/win-x86_64/crypto/bn/rsaz-avx2.asm @@ -0,0 +1,30 @@ +default rel +%define XMMWORD +%define YMMWORD +%define ZMMWORD +section .text code align=64 + + +global rsaz_avx2_eligible + +rsaz_avx2_eligible: + xor eax,eax + DB 0F3h,0C3h ;repret + + +global rsaz_1024_sqr_avx2 +global rsaz_1024_mul_avx2 +global rsaz_1024_norm2red_avx2 +global rsaz_1024_red2norm_avx2 +global rsaz_1024_scatter5_avx2 +global rsaz_1024_gather5_avx2 + +rsaz_1024_sqr_avx2: +rsaz_1024_mul_avx2: +rsaz_1024_norm2red_avx2: +rsaz_1024_red2norm_avx2: +rsaz_1024_scatter5_avx2: +rsaz_1024_gather5_avx2: +DB 0x0f,0x0b + DB 0F3h,0C3h ;repret + diff --git a/third_party/boringssl/win-x86_64/crypto/bn/rsaz-x86_64.asm b/third_party/boringssl/win-x86_64/crypto/bn/rsaz-x86_64.asm new file mode 100644 index 00000000000..04d5e3915af --- /dev/null +++ b/third_party/boringssl/win-x86_64/crypto/bn/rsaz-x86_64.asm @@ -0,0 +1,1320 @@ +default rel +%define XMMWORD +%define YMMWORD +%define ZMMWORD +section .text code align=64 + + +EXTERN OPENSSL_ia32cap_P + +global rsaz_512_sqr + +ALIGN 32 +rsaz_512_sqr: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_rsaz_512_sqr: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD[40+rsp] + + + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + + sub rsp,128+24 +$L$sqr_body: + mov rbp,rdx + mov rdx,QWORD[rsi] + mov rax,QWORD[8+rsi] + mov QWORD[128+rsp],rcx + jmp NEAR $L$oop_sqr + +ALIGN 32 +$L$oop_sqr: + mov DWORD[((128+8))+rsp],r8d + + mov rbx,rdx + mul rdx + mov r8,rax + mov rax,QWORD[16+rsi] + mov r9,rdx + + mul rbx + add r9,rax + mov rax,QWORD[24+rsi] + mov r10,rdx + adc r10,0 + + mul rbx + add r10,rax + mov rax,QWORD[32+rsi] + mov r11,rdx + adc r11,0 + + mul rbx + add r11,rax + mov rax,QWORD[40+rsi] + mov r12,rdx + adc r12,0 + + mul rbx + add r12,rax + mov rax,QWORD[48+rsi] + mov r13,rdx + adc r13,0 + + mul rbx + add r13,rax + mov rax,QWORD[56+rsi] + mov r14,rdx + adc r14,0 + + mul rbx + add r14,rax + mov rax,rbx + mov r15,rdx + adc r15,0 + + add r8,r8 + mov rcx,r9 + adc r9,r9 + + mul rax + mov QWORD[rsp],rax + add r8,rdx + adc r9,0 + + mov QWORD[8+rsp],r8 + shr rcx,63 + + + mov r8,QWORD[8+rsi] + mov rax,QWORD[16+rsi] + mul r8 + add r10,rax + mov rax,QWORD[24+rsi] + mov rbx,rdx + adc rbx,0 + + mul r8 + add r11,rax + mov rax,QWORD[32+rsi] + adc rdx,0 + add r11,rbx + mov rbx,rdx + adc rbx,0 + + mul r8 + add r12,rax + mov rax,QWORD[40+rsi] + adc rdx,0 + add r12,rbx + mov rbx,rdx + adc rbx,0 + + mul r8 + add r13,rax + mov rax,QWORD[48+rsi] + adc rdx,0 + add r13,rbx + mov rbx,rdx + adc rbx,0 + + mul r8 + add r14,rax + mov rax,QWORD[56+rsi] + adc rdx,0 + add r14,rbx + mov rbx,rdx + adc rbx,0 + + mul r8 + add r15,rax + mov rax,r8 + adc rdx,0 + add r15,rbx + mov r8,rdx + mov rdx,r10 + adc r8,0 + + add rdx,rdx + lea r10,[r10*2+rcx] + mov rbx,r11 + adc r11,r11 + + mul rax + add r9,rax + adc r10,rdx + adc r11,0 + + mov QWORD[16+rsp],r9 + mov QWORD[24+rsp],r10 + shr rbx,63 + + + mov r9,QWORD[16+rsi] + mov rax,QWORD[24+rsi] + mul r9 + add r12,rax + mov rax,QWORD[32+rsi] + mov rcx,rdx + adc rcx,0 + + mul r9 + add r13,rax + mov rax,QWORD[40+rsi] + adc rdx,0 + add r13,rcx + mov rcx,rdx + adc rcx,0 + + mul r9 + add r14,rax + mov rax,QWORD[48+rsi] + adc rdx,0 + add r14,rcx + mov rcx,rdx + adc rcx,0 + + mul r9 + mov r10,r12 + lea r12,[r12*2+rbx] + add r15,rax + mov rax,QWORD[56+rsi] + adc rdx,0 + add r15,rcx + mov rcx,rdx + adc rcx,0 + + mul r9 + shr r10,63 + add r8,rax + mov rax,r9 + adc rdx,0 + add r8,rcx + mov r9,rdx + adc r9,0 + + mov rcx,r13 + lea r13,[r13*2+r10] + + mul rax + add r11,rax + adc r12,rdx + adc r13,0 + + mov QWORD[32+rsp],r11 + mov QWORD[40+rsp],r12 + shr rcx,63 + + + mov r10,QWORD[24+rsi] + mov rax,QWORD[32+rsi] + mul r10 + add r14,rax + mov rax,QWORD[40+rsi] + mov rbx,rdx + adc rbx,0 + + mul r10 + add r15,rax + mov rax,QWORD[48+rsi] + adc rdx,0 + add r15,rbx + mov rbx,rdx + adc rbx,0 + + mul r10 + mov r12,r14 + lea r14,[r14*2+rcx] + add r8,rax + mov rax,QWORD[56+rsi] + adc rdx,0 + add r8,rbx + mov rbx,rdx + adc rbx,0 + + mul r10 + shr r12,63 + add r9,rax + mov rax,r10 + adc rdx,0 + add r9,rbx + mov r10,rdx + adc r10,0 + + mov rbx,r15 + lea r15,[r15*2+r12] + + mul rax + add r13,rax + adc r14,rdx + adc r15,0 + + mov QWORD[48+rsp],r13 + mov QWORD[56+rsp],r14 + shr rbx,63 + + + mov r11,QWORD[32+rsi] + mov rax,QWORD[40+rsi] + mul r11 + add r8,rax + mov rax,QWORD[48+rsi] + mov rcx,rdx + adc rcx,0 + + mul r11 + add r9,rax + mov rax,QWORD[56+rsi] + adc rdx,0 + mov r12,r8 + lea r8,[r8*2+rbx] + add r9,rcx + mov rcx,rdx + adc rcx,0 + + mul r11 + shr r12,63 + add r10,rax + mov rax,r11 + adc rdx,0 + add r10,rcx + mov r11,rdx + adc r11,0 + + mov rcx,r9 + lea r9,[r9*2+r12] + + mul rax + add r15,rax + adc r8,rdx + adc r9,0 + + mov QWORD[64+rsp],r15 + mov QWORD[72+rsp],r8 + shr rcx,63 + + + mov r12,QWORD[40+rsi] + mov rax,QWORD[48+rsi] + mul r12 + add r10,rax + mov rax,QWORD[56+rsi] + mov rbx,rdx + adc rbx,0 + + mul r12 + add r11,rax + mov rax,r12 + mov r15,r10 + lea r10,[r10*2+rcx] + adc rdx,0 + shr r15,63 + add r11,rbx + mov r12,rdx + adc r12,0 + + mov rbx,r11 + lea r11,[r11*2+r15] + + mul rax + add r9,rax + adc r10,rdx + adc r11,0 + + mov QWORD[80+rsp],r9 + mov QWORD[88+rsp],r10 + + + mov r13,QWORD[48+rsi] + mov rax,QWORD[56+rsi] + mul r13 + add r12,rax + mov rax,r13 + mov r13,rdx + adc r13,0 + + xor r14,r14 + shl rbx,1 + adc r12,r12 + adc r13,r13 + adc r14,r14 + + mul rax + add r11,rax + adc r12,rdx + adc r13,0 + + mov QWORD[96+rsp],r11 + mov QWORD[104+rsp],r12 + + + mov rax,QWORD[56+rsi] + mul rax + add r13,rax + adc rdx,0 + + add r14,rdx + + mov QWORD[112+rsp],r13 + mov QWORD[120+rsp],r14 + + mov r8,QWORD[rsp] + mov r9,QWORD[8+rsp] + mov r10,QWORD[16+rsp] + mov r11,QWORD[24+rsp] + mov r12,QWORD[32+rsp] + mov r13,QWORD[40+rsp] + mov r14,QWORD[48+rsp] + mov r15,QWORD[56+rsp] + + call __rsaz_512_reduce + + add r8,QWORD[64+rsp] + adc r9,QWORD[72+rsp] + adc r10,QWORD[80+rsp] + adc r11,QWORD[88+rsp] + adc r12,QWORD[96+rsp] + adc r13,QWORD[104+rsp] + adc r14,QWORD[112+rsp] + adc r15,QWORD[120+rsp] + sbb rcx,rcx + + call __rsaz_512_subtract + + mov rdx,r8 + mov rax,r9 + mov r8d,DWORD[((128+8))+rsp] + mov rsi,rdi + + dec r8d + jnz NEAR $L$oop_sqr + + lea rax,[((128+24+48))+rsp] + mov r15,QWORD[((-48))+rax] + mov r14,QWORD[((-40))+rax] + mov r13,QWORD[((-32))+rax] + mov r12,QWORD[((-24))+rax] + mov rbp,QWORD[((-16))+rax] + mov rbx,QWORD[((-8))+rax] + lea rsp,[rax] +$L$sqr_epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_rsaz_512_sqr: +global rsaz_512_mul + +ALIGN 32 +rsaz_512_mul: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_rsaz_512_mul: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD[40+rsp] + + + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + + sub rsp,128+24 +$L$mul_body: +DB 102,72,15,110,199 +DB 102,72,15,110,201 + mov QWORD[128+rsp],r8 + mov rbx,QWORD[rdx] + mov rbp,rdx + call __rsaz_512_mul + +DB 102,72,15,126,199 +DB 102,72,15,126,205 + + mov r8,QWORD[rsp] + mov r9,QWORD[8+rsp] + mov r10,QWORD[16+rsp] + mov r11,QWORD[24+rsp] + mov r12,QWORD[32+rsp] + mov r13,QWORD[40+rsp] + mov r14,QWORD[48+rsp] + mov r15,QWORD[56+rsp] + + call __rsaz_512_reduce + add r8,QWORD[64+rsp] + adc r9,QWORD[72+rsp] + adc r10,QWORD[80+rsp] + adc r11,QWORD[88+rsp] + adc r12,QWORD[96+rsp] + adc r13,QWORD[104+rsp] + adc r14,QWORD[112+rsp] + adc r15,QWORD[120+rsp] + sbb rcx,rcx + + call __rsaz_512_subtract + + lea rax,[((128+24+48))+rsp] + mov r15,QWORD[((-48))+rax] + mov r14,QWORD[((-40))+rax] + mov r13,QWORD[((-32))+rax] + mov r12,QWORD[((-24))+rax] + mov rbp,QWORD[((-16))+rax] + mov rbx,QWORD[((-8))+rax] + lea rsp,[rax] +$L$mul_epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_rsaz_512_mul: +global rsaz_512_mul_gather4 + +ALIGN 32 +rsaz_512_mul_gather4: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_rsaz_512_mul_gather4: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD[40+rsp] + mov r9,QWORD[48+rsp] + + + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + + mov r9d,r9d + sub rsp,128+24 +$L$mul_gather4_body: + mov eax,DWORD[64+r9*4+rdx] +DB 102,72,15,110,199 + mov ebx,DWORD[r9*4+rdx] +DB 102,72,15,110,201 + mov QWORD[128+rsp],r8 + + shl rax,32 + or rbx,rax + mov rax,QWORD[rsi] + mov rcx,QWORD[8+rsi] + lea rbp,[128+r9*4+rdx] + mul rbx + mov QWORD[rsp],rax + mov rax,rcx + mov r8,rdx + + mul rbx + movd xmm4,DWORD[rbp] + add r8,rax + mov rax,QWORD[16+rsi] + mov r9,rdx + adc r9,0 + + mul rbx + movd xmm5,DWORD[64+rbp] + add r9,rax + mov rax,QWORD[24+rsi] + mov r10,rdx + adc r10,0 + + mul rbx + pslldq xmm5,4 + add r10,rax + mov rax,QWORD[32+rsi] + mov r11,rdx + adc r11,0 + + mul rbx + por xmm4,xmm5 + add r11,rax + mov rax,QWORD[40+rsi] + mov r12,rdx + adc r12,0 + + mul rbx + add r12,rax + mov rax,QWORD[48+rsi] + mov r13,rdx + adc r13,0 + + mul rbx + lea rbp,[128+rbp] + add r13,rax + mov rax,QWORD[56+rsi] + mov r14,rdx + adc r14,0 + + mul rbx +DB 102,72,15,126,227 + add r14,rax + mov rax,QWORD[rsi] + mov r15,rdx + adc r15,0 + + lea rdi,[8+rsp] + mov ecx,7 + jmp NEAR $L$oop_mul_gather + +ALIGN 32 +$L$oop_mul_gather: + mul rbx + add r8,rax + mov rax,QWORD[8+rsi] + mov QWORD[rdi],r8 + mov r8,rdx + adc r8,0 + + mul rbx + movd xmm4,DWORD[rbp] + add r9,rax + mov rax,QWORD[16+rsi] + adc rdx,0 + add r8,r9 + mov r9,rdx + adc r9,0 + + mul rbx + movd xmm5,DWORD[64+rbp] + add r10,rax + mov rax,QWORD[24+rsi] + adc rdx,0 + add r9,r10 + mov r10,rdx + adc r10,0 + + mul rbx + pslldq xmm5,4 + add r11,rax + mov rax,QWORD[32+rsi] + adc rdx,0 + add r10,r11 + mov r11,rdx + adc r11,0 + + mul rbx + por xmm4,xmm5 + add r12,rax + mov rax,QWORD[40+rsi] + adc rdx,0 + add r11,r12 + mov r12,rdx + adc r12,0 + + mul rbx + add r13,rax + mov rax,QWORD[48+rsi] + adc rdx,0 + add r12,r13 + mov r13,rdx + adc r13,0 + + mul rbx + add r14,rax + mov rax,QWORD[56+rsi] + adc rdx,0 + add r13,r14 + mov r14,rdx + adc r14,0 + + mul rbx +DB 102,72,15,126,227 + add r15,rax + mov rax,QWORD[rsi] + adc rdx,0 + add r14,r15 + mov r15,rdx + adc r15,0 + + lea rbp,[128+rbp] + lea rdi,[8+rdi] + + dec ecx + jnz NEAR $L$oop_mul_gather + + mov QWORD[rdi],r8 + mov QWORD[8+rdi],r9 + mov QWORD[16+rdi],r10 + mov QWORD[24+rdi],r11 + mov QWORD[32+rdi],r12 + mov QWORD[40+rdi],r13 + mov QWORD[48+rdi],r14 + mov QWORD[56+rdi],r15 + +DB 102,72,15,126,199 +DB 102,72,15,126,205 + + mov r8,QWORD[rsp] + mov r9,QWORD[8+rsp] + mov r10,QWORD[16+rsp] + mov r11,QWORD[24+rsp] + mov r12,QWORD[32+rsp] + mov r13,QWORD[40+rsp] + mov r14,QWORD[48+rsp] + mov r15,QWORD[56+rsp] + + call __rsaz_512_reduce + add r8,QWORD[64+rsp] + adc r9,QWORD[72+rsp] + adc r10,QWORD[80+rsp] + adc r11,QWORD[88+rsp] + adc r12,QWORD[96+rsp] + adc r13,QWORD[104+rsp] + adc r14,QWORD[112+rsp] + adc r15,QWORD[120+rsp] + sbb rcx,rcx + + call __rsaz_512_subtract + + lea rax,[((128+24+48))+rsp] + mov r15,QWORD[((-48))+rax] + mov r14,QWORD[((-40))+rax] + mov r13,QWORD[((-32))+rax] + mov r12,QWORD[((-24))+rax] + mov rbp,QWORD[((-16))+rax] + mov rbx,QWORD[((-8))+rax] + lea rsp,[rax] +$L$mul_gather4_epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_rsaz_512_mul_gather4: +global rsaz_512_mul_scatter4 + +ALIGN 32 +rsaz_512_mul_scatter4: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_rsaz_512_mul_scatter4: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD[40+rsp] + mov r9,QWORD[48+rsp] + + + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + + mov r9d,r9d + sub rsp,128+24 +$L$mul_scatter4_body: + lea r8,[r9*4+r8] +DB 102,72,15,110,199 +DB 102,72,15,110,202 +DB 102,73,15,110,208 + mov QWORD[128+rsp],rcx + + mov rbp,rdi + mov rbx,QWORD[rdi] + call __rsaz_512_mul + +DB 102,72,15,126,199 +DB 102,72,15,126,205 + + mov r8,QWORD[rsp] + mov r9,QWORD[8+rsp] + mov r10,QWORD[16+rsp] + mov r11,QWORD[24+rsp] + mov r12,QWORD[32+rsp] + mov r13,QWORD[40+rsp] + mov r14,QWORD[48+rsp] + mov r15,QWORD[56+rsp] + + call __rsaz_512_reduce + add r8,QWORD[64+rsp] + adc r9,QWORD[72+rsp] + adc r10,QWORD[80+rsp] + adc r11,QWORD[88+rsp] + adc r12,QWORD[96+rsp] + adc r13,QWORD[104+rsp] + adc r14,QWORD[112+rsp] + adc r15,QWORD[120+rsp] +DB 102,72,15,126,214 + sbb rcx,rcx + + call __rsaz_512_subtract + + mov DWORD[rsi],r8d + shr r8,32 + mov DWORD[128+rsi],r9d + shr r9,32 + mov DWORD[256+rsi],r10d + shr r10,32 + mov DWORD[384+rsi],r11d + shr r11,32 + mov DWORD[512+rsi],r12d + shr r12,32 + mov DWORD[640+rsi],r13d + shr r13,32 + mov DWORD[768+rsi],r14d + shr r14,32 + mov DWORD[896+rsi],r15d + shr r15,32 + mov DWORD[64+rsi],r8d + mov DWORD[192+rsi],r9d + mov DWORD[320+rsi],r10d + mov DWORD[448+rsi],r11d + mov DWORD[576+rsi],r12d + mov DWORD[704+rsi],r13d + mov DWORD[832+rsi],r14d + mov DWORD[960+rsi],r15d + + lea rax,[((128+24+48))+rsp] + mov r15,QWORD[((-48))+rax] + mov r14,QWORD[((-40))+rax] + mov r13,QWORD[((-32))+rax] + mov r12,QWORD[((-24))+rax] + mov rbp,QWORD[((-16))+rax] + mov rbx,QWORD[((-8))+rax] + lea rsp,[rax] +$L$mul_scatter4_epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_rsaz_512_mul_scatter4: +global rsaz_512_mul_by_one + +ALIGN 32 +rsaz_512_mul_by_one: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_rsaz_512_mul_by_one: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + + + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + + sub rsp,128+24 +$L$mul_by_one_body: + mov rbp,rdx + mov QWORD[128+rsp],rcx + + mov r8,QWORD[rsi] + pxor xmm0,xmm0 + mov r9,QWORD[8+rsi] + mov r10,QWORD[16+rsi] + mov r11,QWORD[24+rsi] + mov r12,QWORD[32+rsi] + mov r13,QWORD[40+rsi] + mov r14,QWORD[48+rsi] + mov r15,QWORD[56+rsi] + + movdqa XMMWORD[rsp],xmm0 + movdqa XMMWORD[16+rsp],xmm0 + movdqa XMMWORD[32+rsp],xmm0 + movdqa XMMWORD[48+rsp],xmm0 + movdqa XMMWORD[64+rsp],xmm0 + movdqa XMMWORD[80+rsp],xmm0 + movdqa XMMWORD[96+rsp],xmm0 + call __rsaz_512_reduce + mov QWORD[rdi],r8 + mov QWORD[8+rdi],r9 + mov QWORD[16+rdi],r10 + mov QWORD[24+rdi],r11 + mov QWORD[32+rdi],r12 + mov QWORD[40+rdi],r13 + mov QWORD[48+rdi],r14 + mov QWORD[56+rdi],r15 + + lea rax,[((128+24+48))+rsp] + mov r15,QWORD[((-48))+rax] + mov r14,QWORD[((-40))+rax] + mov r13,QWORD[((-32))+rax] + mov r12,QWORD[((-24))+rax] + mov rbp,QWORD[((-16))+rax] + mov rbx,QWORD[((-8))+rax] + lea rsp,[rax] +$L$mul_by_one_epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_rsaz_512_mul_by_one: + +ALIGN 32 +__rsaz_512_reduce: + mov rbx,r8 + imul rbx,QWORD[((128+8))+rsp] + mov rax,QWORD[rbp] + mov ecx,8 + jmp NEAR $L$reduction_loop + +ALIGN 32 +$L$reduction_loop: + mul rbx + mov rax,QWORD[8+rbp] + neg r8 + mov r8,rdx + adc r8,0 + + mul rbx + add r9,rax + mov rax,QWORD[16+rbp] + adc rdx,0 + add r8,r9 + mov r9,rdx + adc r9,0 + + mul rbx + add r10,rax + mov rax,QWORD[24+rbp] + adc rdx,0 + add r9,r10 + mov r10,rdx + adc r10,0 + + mul rbx + add r11,rax + mov rax,QWORD[32+rbp] + adc rdx,0 + add r10,r11 + mov rsi,QWORD[((128+8))+rsp] + + + adc rdx,0 + mov r11,rdx + + mul rbx + add r12,rax + mov rax,QWORD[40+rbp] + adc rdx,0 + imul rsi,r8 + add r11,r12 + mov r12,rdx + adc r12,0 + + mul rbx + add r13,rax + mov rax,QWORD[48+rbp] + adc rdx,0 + add r12,r13 + mov r13,rdx + adc r13,0 + + mul rbx + add r14,rax + mov rax,QWORD[56+rbp] + adc rdx,0 + add r13,r14 + mov r14,rdx + adc r14,0 + + mul rbx + mov rbx,rsi + add r15,rax + mov rax,QWORD[rbp] + adc rdx,0 + add r14,r15 + mov r15,rdx + adc r15,0 + + dec ecx + jne NEAR $L$reduction_loop + + DB 0F3h,0C3h ;repret + + +ALIGN 32 +__rsaz_512_subtract: + mov QWORD[rdi],r8 + mov QWORD[8+rdi],r9 + mov QWORD[16+rdi],r10 + mov QWORD[24+rdi],r11 + mov QWORD[32+rdi],r12 + mov QWORD[40+rdi],r13 + mov QWORD[48+rdi],r14 + mov QWORD[56+rdi],r15 + + mov r8,QWORD[rbp] + mov r9,QWORD[8+rbp] + neg r8 + not r9 + and r8,rcx + mov r10,QWORD[16+rbp] + and r9,rcx + not r10 + mov r11,QWORD[24+rbp] + and r10,rcx + not r11 + mov r12,QWORD[32+rbp] + and r11,rcx + not r12 + mov r13,QWORD[40+rbp] + and r12,rcx + not r13 + mov r14,QWORD[48+rbp] + and r13,rcx + not r14 + mov r15,QWORD[56+rbp] + and r14,rcx + not r15 + and r15,rcx + + add r8,QWORD[rdi] + adc r9,QWORD[8+rdi] + adc r10,QWORD[16+rdi] + adc r11,QWORD[24+rdi] + adc r12,QWORD[32+rdi] + adc r13,QWORD[40+rdi] + adc r14,QWORD[48+rdi] + adc r15,QWORD[56+rdi] + + mov QWORD[rdi],r8 + mov QWORD[8+rdi],r9 + mov QWORD[16+rdi],r10 + mov QWORD[24+rdi],r11 + mov QWORD[32+rdi],r12 + mov QWORD[40+rdi],r13 + mov QWORD[48+rdi],r14 + mov QWORD[56+rdi],r15 + + DB 0F3h,0C3h ;repret + + +ALIGN 32 +__rsaz_512_mul: + lea rdi,[8+rsp] + + mov rax,QWORD[rsi] + mul rbx + mov QWORD[rdi],rax + mov rax,QWORD[8+rsi] + mov r8,rdx + + mul rbx + add r8,rax + mov rax,QWORD[16+rsi] + mov r9,rdx + adc r9,0 + + mul rbx + add r9,rax + mov rax,QWORD[24+rsi] + mov r10,rdx + adc r10,0 + + mul rbx + add r10,rax + mov rax,QWORD[32+rsi] + mov r11,rdx + adc r11,0 + + mul rbx + add r11,rax + mov rax,QWORD[40+rsi] + mov r12,rdx + adc r12,0 + + mul rbx + add r12,rax + mov rax,QWORD[48+rsi] + mov r13,rdx + adc r13,0 + + mul rbx + add r13,rax + mov rax,QWORD[56+rsi] + mov r14,rdx + adc r14,0 + + mul rbx + add r14,rax + mov rax,QWORD[rsi] + mov r15,rdx + adc r15,0 + + lea rbp,[8+rbp] + lea rdi,[8+rdi] + + mov ecx,7 + jmp NEAR $L$oop_mul + +ALIGN 32 +$L$oop_mul: + mov rbx,QWORD[rbp] + mul rbx + add r8,rax + mov rax,QWORD[8+rsi] + mov QWORD[rdi],r8 + mov r8,rdx + adc r8,0 + + mul rbx + add r9,rax + mov rax,QWORD[16+rsi] + adc rdx,0 + add r8,r9 + mov r9,rdx + adc r9,0 + + mul rbx + add r10,rax + mov rax,QWORD[24+rsi] + adc rdx,0 + add r9,r10 + mov r10,rdx + adc r10,0 + + mul rbx + add r11,rax + mov rax,QWORD[32+rsi] + adc rdx,0 + add r10,r11 + mov r11,rdx + adc r11,0 + + mul rbx + add r12,rax + mov rax,QWORD[40+rsi] + adc rdx,0 + add r11,r12 + mov r12,rdx + adc r12,0 + + mul rbx + add r13,rax + mov rax,QWORD[48+rsi] + adc rdx,0 + add r12,r13 + mov r13,rdx + adc r13,0 + + mul rbx + add r14,rax + mov rax,QWORD[56+rsi] + adc rdx,0 + add r13,r14 + mov r14,rdx + lea rbp,[8+rbp] + adc r14,0 + + mul rbx + add r15,rax + mov rax,QWORD[rsi] + adc rdx,0 + add r14,r15 + mov r15,rdx + adc r15,0 + + lea rdi,[8+rdi] + + dec ecx + jnz NEAR $L$oop_mul + + mov QWORD[rdi],r8 + mov QWORD[8+rdi],r9 + mov QWORD[16+rdi],r10 + mov QWORD[24+rdi],r11 + mov QWORD[32+rdi],r12 + mov QWORD[40+rdi],r13 + mov QWORD[48+rdi],r14 + mov QWORD[56+rdi],r15 + + DB 0F3h,0C3h ;repret + +global rsaz_512_scatter4 + +ALIGN 16 +rsaz_512_scatter4: + lea rcx,[r8*4+rcx] + mov r9d,8 + jmp NEAR $L$oop_scatter +ALIGN 16 +$L$oop_scatter: + mov rax,QWORD[rdx] + lea rdx,[8+rdx] + mov DWORD[rcx],eax + shr rax,32 + mov DWORD[64+rcx],eax + lea rcx,[128+rcx] + dec r9d + jnz NEAR $L$oop_scatter + DB 0F3h,0C3h ;repret + + +global rsaz_512_gather4 + +ALIGN 16 +rsaz_512_gather4: + lea rdx,[r8*4+rdx] + mov r9d,8 + jmp NEAR $L$oop_gather +ALIGN 16 +$L$oop_gather: + mov eax,DWORD[rdx] + mov r8d,DWORD[64+rdx] + lea rdx,[128+rdx] + shl r8,32 + or rax,r8 + mov QWORD[rcx],rax + lea rcx,[8+rcx] + dec r9d + jnz NEAR $L$oop_gather + DB 0F3h,0C3h ;repret + +EXTERN __imp_RtlVirtualUnwind + +ALIGN 16 +se_handler: + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD[120+r8] + mov rbx,QWORD[248+r8] + + mov rsi,QWORD[8+r9] + mov r11,QWORD[56+r9] + + mov r10d,DWORD[r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jb NEAR $L$common_seh_tail + + mov rax,QWORD[152+r8] + + mov r10d,DWORD[4+r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jae NEAR $L$common_seh_tail + + lea rax,[((128+24+48))+rax] + + mov rbx,QWORD[((-8))+rax] + mov rbp,QWORD[((-16))+rax] + mov r12,QWORD[((-24))+rax] + mov r13,QWORD[((-32))+rax] + mov r14,QWORD[((-40))+rax] + mov r15,QWORD[((-48))+rax] + mov QWORD[144+r8],rbx + mov QWORD[160+r8],rbp + mov QWORD[216+r8],r12 + mov QWORD[224+r8],r13 + mov QWORD[232+r8],r14 + mov QWORD[240+r8],r15 + +$L$common_seh_tail: + mov rdi,QWORD[8+rax] + mov rsi,QWORD[16+rax] + mov QWORD[152+r8],rax + mov QWORD[168+r8],rsi + mov QWORD[176+r8],rdi + + mov rdi,QWORD[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0xa548f3fc + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD[8+rsi] + mov r8,QWORD[rsi] + mov r9,QWORD[16+rsi] + mov r10,QWORD[40+rsi] + lea r11,[56+rsi] + lea r12,[24+rsi] + mov QWORD[32+rsp],r10 + mov QWORD[40+rsp],r11 + mov QWORD[48+rsp],r12 + mov QWORD[56+rsp],rcx + call QWORD[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret + + +section .pdata rdata align=4 +ALIGN 4 + DD $L$SEH_begin_rsaz_512_sqr wrt ..imagebase + DD $L$SEH_end_rsaz_512_sqr wrt ..imagebase + DD $L$SEH_info_rsaz_512_sqr wrt ..imagebase + + DD $L$SEH_begin_rsaz_512_mul wrt ..imagebase + DD $L$SEH_end_rsaz_512_mul wrt ..imagebase + DD $L$SEH_info_rsaz_512_mul wrt ..imagebase + + DD $L$SEH_begin_rsaz_512_mul_gather4 wrt ..imagebase + DD $L$SEH_end_rsaz_512_mul_gather4 wrt ..imagebase + DD $L$SEH_info_rsaz_512_mul_gather4 wrt ..imagebase + + DD $L$SEH_begin_rsaz_512_mul_scatter4 wrt ..imagebase + DD $L$SEH_end_rsaz_512_mul_scatter4 wrt ..imagebase + DD $L$SEH_info_rsaz_512_mul_scatter4 wrt ..imagebase + + DD $L$SEH_begin_rsaz_512_mul_by_one wrt ..imagebase + DD $L$SEH_end_rsaz_512_mul_by_one wrt ..imagebase + DD $L$SEH_info_rsaz_512_mul_by_one wrt ..imagebase + +section .xdata rdata align=8 +ALIGN 8 +$L$SEH_info_rsaz_512_sqr: +DB 9,0,0,0 + DD se_handler wrt ..imagebase + DD $L$sqr_body wrt ..imagebase,$L$sqr_epilogue wrt ..imagebase +$L$SEH_info_rsaz_512_mul: +DB 9,0,0,0 + DD se_handler wrt ..imagebase + DD $L$mul_body wrt ..imagebase,$L$mul_epilogue wrt ..imagebase +$L$SEH_info_rsaz_512_mul_gather4: +DB 9,0,0,0 + DD se_handler wrt ..imagebase + DD $L$mul_gather4_body wrt ..imagebase,$L$mul_gather4_epilogue wrt ..imagebase +$L$SEH_info_rsaz_512_mul_scatter4: +DB 9,0,0,0 + DD se_handler wrt ..imagebase + DD $L$mul_scatter4_body wrt ..imagebase,$L$mul_scatter4_epilogue wrt ..imagebase +$L$SEH_info_rsaz_512_mul_by_one: +DB 9,0,0,0 + DD se_handler wrt ..imagebase + DD $L$mul_by_one_body wrt ..imagebase,$L$mul_by_one_epilogue wrt ..imagebase diff --git a/third_party/boringssl/win-x86_64/crypto/bn/x86_64-mont.asm b/third_party/boringssl/win-x86_64/crypto/bn/x86_64-mont.asm new file mode 100644 index 00000000000..db0d1b976fc --- /dev/null +++ b/third_party/boringssl/win-x86_64/crypto/bn/x86_64-mont.asm @@ -0,0 +1,941 @@ +default rel +%define XMMWORD +%define YMMWORD +%define ZMMWORD +section .text code align=64 + + +EXTERN OPENSSL_ia32cap_P + +global bn_mul_mont + +ALIGN 16 +bn_mul_mont: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_bn_mul_mont: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD[40+rsp] + mov r9,QWORD[48+rsp] + + + test r9d,3 + jnz NEAR $L$mul_enter + cmp r9d,8 + jb NEAR $L$mul_enter + cmp rdx,rsi + jne NEAR $L$mul4x_enter + test r9d,7 + jz NEAR $L$sqr8x_enter + jmp NEAR $L$mul4x_enter + +ALIGN 16 +$L$mul_enter: + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + + mov r9d,r9d + lea r10,[2+r9] + mov r11,rsp + neg r10 + lea rsp,[r10*8+rsp] + and rsp,-1024 + + mov QWORD[8+r9*8+rsp],r11 +$L$mul_body: + mov r12,rdx + mov r8,QWORD[r8] + mov rbx,QWORD[r12] + mov rax,QWORD[rsi] + + xor r14,r14 + xor r15,r15 + + mov rbp,r8 + mul rbx + mov r10,rax + mov rax,QWORD[rcx] + + imul rbp,r10 + mov r11,rdx + + mul rbp + add r10,rax + mov rax,QWORD[8+rsi] + adc rdx,0 + mov r13,rdx + + lea r15,[1+r15] + jmp NEAR $L$1st_enter + +ALIGN 16 +$L$1st: + add r13,rax + mov rax,QWORD[r15*8+rsi] + adc rdx,0 + add r13,r11 + mov r11,r10 + adc rdx,0 + mov QWORD[((-16))+r15*8+rsp],r13 + mov r13,rdx + +$L$1st_enter: + mul rbx + add r11,rax + mov rax,QWORD[r15*8+rcx] + adc rdx,0 + lea r15,[1+r15] + mov r10,rdx + + mul rbp + cmp r15,r9 + jne NEAR $L$1st + + add r13,rax + mov rax,QWORD[rsi] + adc rdx,0 + add r13,r11 + adc rdx,0 + mov QWORD[((-16))+r15*8+rsp],r13 + mov r13,rdx + mov r11,r10 + + xor rdx,rdx + add r13,r11 + adc rdx,0 + mov QWORD[((-8))+r9*8+rsp],r13 + mov QWORD[r9*8+rsp],rdx + + lea r14,[1+r14] + jmp NEAR $L$outer +ALIGN 16 +$L$outer: + mov rbx,QWORD[r14*8+r12] + xor r15,r15 + mov rbp,r8 + mov r10,QWORD[rsp] + mul rbx + add r10,rax + mov rax,QWORD[rcx] + adc rdx,0 + + imul rbp,r10 + mov r11,rdx + + mul rbp + add r10,rax + mov rax,QWORD[8+rsi] + adc rdx,0 + mov r10,QWORD[8+rsp] + mov r13,rdx + + lea r15,[1+r15] + jmp NEAR $L$inner_enter + +ALIGN 16 +$L$inner: + add r13,rax + mov rax,QWORD[r15*8+rsi] + adc rdx,0 + add r13,r10 + mov r10,QWORD[r15*8+rsp] + adc rdx,0 + mov QWORD[((-16))+r15*8+rsp],r13 + mov r13,rdx + +$L$inner_enter: + mul rbx + add r11,rax + mov rax,QWORD[r15*8+rcx] + adc rdx,0 + add r10,r11 + mov r11,rdx + adc r11,0 + lea r15,[1+r15] + + mul rbp + cmp r15,r9 + jne NEAR $L$inner + + add r13,rax + mov rax,QWORD[rsi] + adc rdx,0 + add r13,r10 + mov r10,QWORD[r15*8+rsp] + adc rdx,0 + mov QWORD[((-16))+r15*8+rsp],r13 + mov r13,rdx + + xor rdx,rdx + add r13,r11 + adc rdx,0 + add r13,r10 + adc rdx,0 + mov QWORD[((-8))+r9*8+rsp],r13 + mov QWORD[r9*8+rsp],rdx + + lea r14,[1+r14] + cmp r14,r9 + jb NEAR $L$outer + + xor r14,r14 + mov rax,QWORD[rsp] + lea rsi,[rsp] + mov r15,r9 + jmp NEAR $L$sub +ALIGN 16 +$L$sub: sbb rax,QWORD[r14*8+rcx] + mov QWORD[r14*8+rdi],rax + mov rax,QWORD[8+r14*8+rsi] + lea r14,[1+r14] + dec r15 + jnz NEAR $L$sub + + sbb rax,0 + xor r14,r14 + mov r15,r9 +ALIGN 16 +$L$copy: + mov rsi,QWORD[r14*8+rsp] + mov rcx,QWORD[r14*8+rdi] + xor rsi,rcx + and rsi,rax + xor rsi,rcx + mov QWORD[r14*8+rsp],r14 + mov QWORD[r14*8+rdi],rsi + lea r14,[1+r14] + sub r15,1 + jnz NEAR $L$copy + + mov rsi,QWORD[8+r9*8+rsp] + mov rax,1 + mov r15,QWORD[rsi] + mov r14,QWORD[8+rsi] + mov r13,QWORD[16+rsi] + mov r12,QWORD[24+rsi] + mov rbp,QWORD[32+rsi] + mov rbx,QWORD[40+rsi] + lea rsp,[48+rsi] +$L$mul_epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_bn_mul_mont: + +ALIGN 16 +bn_mul4x_mont: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_bn_mul4x_mont: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD[40+rsp] + mov r9,QWORD[48+rsp] + + +$L$mul4x_enter: + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + + mov r9d,r9d + lea r10,[4+r9] + mov r11,rsp + neg r10 + lea rsp,[r10*8+rsp] + and rsp,-1024 + + mov QWORD[8+r9*8+rsp],r11 +$L$mul4x_body: + mov QWORD[16+r9*8+rsp],rdi + mov r12,rdx + mov r8,QWORD[r8] + mov rbx,QWORD[r12] + mov rax,QWORD[rsi] + + xor r14,r14 + xor r15,r15 + + mov rbp,r8 + mul rbx + mov r10,rax + mov rax,QWORD[rcx] + + imul rbp,r10 + mov r11,rdx + + mul rbp + add r10,rax + mov rax,QWORD[8+rsi] + adc rdx,0 + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,QWORD[8+rcx] + adc rdx,0 + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD[16+rsi] + adc rdx,0 + add rdi,r11 + lea r15,[4+r15] + adc rdx,0 + mov QWORD[rsp],rdi + mov r13,rdx + jmp NEAR $L$1st4x +ALIGN 16 +$L$1st4x: + mul rbx + add r10,rax + mov rax,QWORD[((-16))+r15*8+rcx] + adc rdx,0 + mov r11,rdx + + mul rbp + add r13,rax + mov rax,QWORD[((-8))+r15*8+rsi] + adc rdx,0 + add r13,r10 + adc rdx,0 + mov QWORD[((-24))+r15*8+rsp],r13 + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,QWORD[((-8))+r15*8+rcx] + adc rdx,0 + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD[r15*8+rsi] + adc rdx,0 + add rdi,r11 + adc rdx,0 + mov QWORD[((-16))+r15*8+rsp],rdi + mov r13,rdx + + mul rbx + add r10,rax + mov rax,QWORD[r15*8+rcx] + adc rdx,0 + mov r11,rdx + + mul rbp + add r13,rax + mov rax,QWORD[8+r15*8+rsi] + adc rdx,0 + add r13,r10 + adc rdx,0 + mov QWORD[((-8))+r15*8+rsp],r13 + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,QWORD[8+r15*8+rcx] + adc rdx,0 + lea r15,[4+r15] + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD[((-16))+r15*8+rsi] + adc rdx,0 + add rdi,r11 + adc rdx,0 + mov QWORD[((-32))+r15*8+rsp],rdi + mov r13,rdx + cmp r15,r9 + jb NEAR $L$1st4x + + mul rbx + add r10,rax + mov rax,QWORD[((-16))+r15*8+rcx] + adc rdx,0 + mov r11,rdx + + mul rbp + add r13,rax + mov rax,QWORD[((-8))+r15*8+rsi] + adc rdx,0 + add r13,r10 + adc rdx,0 + mov QWORD[((-24))+r15*8+rsp],r13 + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,QWORD[((-8))+r15*8+rcx] + adc rdx,0 + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD[rsi] + adc rdx,0 + add rdi,r11 + adc rdx,0 + mov QWORD[((-16))+r15*8+rsp],rdi + mov r13,rdx + + xor rdi,rdi + add r13,r10 + adc rdi,0 + mov QWORD[((-8))+r15*8+rsp],r13 + mov QWORD[r15*8+rsp],rdi + + lea r14,[1+r14] +ALIGN 4 +$L$outer4x: + mov rbx,QWORD[r14*8+r12] + xor r15,r15 + mov r10,QWORD[rsp] + mov rbp,r8 + mul rbx + add r10,rax + mov rax,QWORD[rcx] + adc rdx,0 + + imul rbp,r10 + mov r11,rdx + + mul rbp + add r10,rax + mov rax,QWORD[8+rsi] + adc rdx,0 + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,QWORD[8+rcx] + adc rdx,0 + add r11,QWORD[8+rsp] + adc rdx,0 + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD[16+rsi] + adc rdx,0 + add rdi,r11 + lea r15,[4+r15] + adc rdx,0 + mov QWORD[rsp],rdi + mov r13,rdx + jmp NEAR $L$inner4x +ALIGN 16 +$L$inner4x: + mul rbx + add r10,rax + mov rax,QWORD[((-16))+r15*8+rcx] + adc rdx,0 + add r10,QWORD[((-16))+r15*8+rsp] + adc rdx,0 + mov r11,rdx + + mul rbp + add r13,rax + mov rax,QWORD[((-8))+r15*8+rsi] + adc rdx,0 + add r13,r10 + adc rdx,0 + mov QWORD[((-24))+r15*8+rsp],r13 + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,QWORD[((-8))+r15*8+rcx] + adc rdx,0 + add r11,QWORD[((-8))+r15*8+rsp] + adc rdx,0 + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD[r15*8+rsi] + adc rdx,0 + add rdi,r11 + adc rdx,0 + mov QWORD[((-16))+r15*8+rsp],rdi + mov r13,rdx + + mul rbx + add r10,rax + mov rax,QWORD[r15*8+rcx] + adc rdx,0 + add r10,QWORD[r15*8+rsp] + adc rdx,0 + mov r11,rdx + + mul rbp + add r13,rax + mov rax,QWORD[8+r15*8+rsi] + adc rdx,0 + add r13,r10 + adc rdx,0 + mov QWORD[((-8))+r15*8+rsp],r13 + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,QWORD[8+r15*8+rcx] + adc rdx,0 + add r11,QWORD[8+r15*8+rsp] + adc rdx,0 + lea r15,[4+r15] + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD[((-16))+r15*8+rsi] + adc rdx,0 + add rdi,r11 + adc rdx,0 + mov QWORD[((-32))+r15*8+rsp],rdi + mov r13,rdx + cmp r15,r9 + jb NEAR $L$inner4x + + mul rbx + add r10,rax + mov rax,QWORD[((-16))+r15*8+rcx] + adc rdx,0 + add r10,QWORD[((-16))+r15*8+rsp] + adc rdx,0 + mov r11,rdx + + mul rbp + add r13,rax + mov rax,QWORD[((-8))+r15*8+rsi] + adc rdx,0 + add r13,r10 + adc rdx,0 + mov QWORD[((-24))+r15*8+rsp],r13 + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,QWORD[((-8))+r15*8+rcx] + adc rdx,0 + add r11,QWORD[((-8))+r15*8+rsp] + adc rdx,0 + lea r14,[1+r14] + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD[rsi] + adc rdx,0 + add rdi,r11 + adc rdx,0 + mov QWORD[((-16))+r15*8+rsp],rdi + mov r13,rdx + + xor rdi,rdi + add r13,r10 + adc rdi,0 + add r13,QWORD[r9*8+rsp] + adc rdi,0 + mov QWORD[((-8))+r15*8+rsp],r13 + mov QWORD[r15*8+rsp],rdi + + cmp r14,r9 + jb NEAR $L$outer4x + mov rdi,QWORD[16+r9*8+rsp] + mov rax,QWORD[rsp] + mov rdx,QWORD[8+rsp] + shr r9,2 + lea rsi,[rsp] + xor r14,r14 + + sub rax,QWORD[rcx] + mov rbx,QWORD[16+rsi] + mov rbp,QWORD[24+rsi] + sbb rdx,QWORD[8+rcx] + lea r15,[((-1))+r9] + jmp NEAR $L$sub4x +ALIGN 16 +$L$sub4x: + mov QWORD[r14*8+rdi],rax + mov QWORD[8+r14*8+rdi],rdx + sbb rbx,QWORD[16+r14*8+rcx] + mov rax,QWORD[32+r14*8+rsi] + mov rdx,QWORD[40+r14*8+rsi] + sbb rbp,QWORD[24+r14*8+rcx] + mov QWORD[16+r14*8+rdi],rbx + mov QWORD[24+r14*8+rdi],rbp + sbb rax,QWORD[32+r14*8+rcx] + mov rbx,QWORD[48+r14*8+rsi] + mov rbp,QWORD[56+r14*8+rsi] + sbb rdx,QWORD[40+r14*8+rcx] + lea r14,[4+r14] + dec r15 + jnz NEAR $L$sub4x + + mov QWORD[r14*8+rdi],rax + mov rax,QWORD[32+r14*8+rsi] + sbb rbx,QWORD[16+r14*8+rcx] + mov QWORD[8+r14*8+rdi],rdx + sbb rbp,QWORD[24+r14*8+rcx] + mov QWORD[16+r14*8+rdi],rbx + + sbb rax,0 +DB 66h, 48h, 0fh, 6eh, 0c0h + punpcklqdq xmm0,xmm0 + mov QWORD[24+r14*8+rdi],rbp + xor r14,r14 + + mov r15,r9 + pxor xmm5,xmm5 + jmp NEAR $L$copy4x +ALIGN 16 +$L$copy4x: + movdqu xmm2,XMMWORD[r14*1+rsp] + movdqu xmm4,XMMWORD[16+r14*1+rsp] + movdqu xmm1,XMMWORD[r14*1+rdi] + movdqu xmm3,XMMWORD[16+r14*1+rdi] + pxor xmm2,xmm1 + pxor xmm4,xmm3 + pand xmm2,xmm0 + pand xmm4,xmm0 + pxor xmm2,xmm1 + pxor xmm4,xmm3 + movdqu XMMWORD[r14*1+rdi],xmm2 + movdqu XMMWORD[16+r14*1+rdi],xmm4 + movdqa XMMWORD[r14*1+rsp],xmm5 + movdqa XMMWORD[16+r14*1+rsp],xmm5 + + lea r14,[32+r14] + dec r15 + jnz NEAR $L$copy4x + + shl r9,2 + mov rsi,QWORD[8+r9*8+rsp] + mov rax,1 + mov r15,QWORD[rsi] + mov r14,QWORD[8+rsi] + mov r13,QWORD[16+rsi] + mov r12,QWORD[24+rsi] + mov rbp,QWORD[32+rsi] + mov rbx,QWORD[40+rsi] + lea rsp,[48+rsi] +$L$mul4x_epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_bn_mul4x_mont: +EXTERN bn_sqr8x_internal + + +ALIGN 32 +bn_sqr8x_mont: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_bn_sqr8x_mont: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD[40+rsp] + mov r9,QWORD[48+rsp] + + +$L$sqr8x_enter: + mov rax,rsp + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + + mov r10d,r9d + shl r9d,3 + shl r10,3+2 + neg r9 + + + + + + + lea r11,[((-64))+r9*4+rsp] + mov r8,QWORD[r8] + sub r11,rsi + and r11,4095 + cmp r10,r11 + jb NEAR $L$sqr8x_sp_alt + sub rsp,r11 + lea rsp,[((-64))+r9*4+rsp] + jmp NEAR $L$sqr8x_sp_done + +ALIGN 32 +$L$sqr8x_sp_alt: + lea r10,[((4096-64))+r9*4] + lea rsp,[((-64))+r9*4+rsp] + sub r11,r10 + mov r10,0 + cmovc r11,r10 + sub rsp,r11 +$L$sqr8x_sp_done: + and rsp,-64 + mov r10,r9 + neg r9 + + lea r11,[64+r9*2+rsp] + mov QWORD[32+rsp],r8 + mov QWORD[40+rsp],rax +$L$sqr8x_body: + + mov rbp,r9 +DB 102,73,15,110,211 + shr rbp,3+2 + mov eax,DWORD[((OPENSSL_ia32cap_P+8))] + jmp NEAR $L$sqr8x_copy_n + +ALIGN 32 +$L$sqr8x_copy_n: + movq xmm0,QWORD[rcx] + movq xmm1,QWORD[8+rcx] + movq xmm3,QWORD[16+rcx] + movq xmm4,QWORD[24+rcx] + lea rcx,[32+rcx] + movdqa XMMWORD[r11],xmm0 + movdqa XMMWORD[16+r11],xmm1 + movdqa XMMWORD[32+r11],xmm3 + movdqa XMMWORD[48+r11],xmm4 + lea r11,[64+r11] + dec rbp + jnz NEAR $L$sqr8x_copy_n + + pxor xmm0,xmm0 +DB 102,72,15,110,207 +DB 102,73,15,110,218 + call bn_sqr8x_internal + + pxor xmm0,xmm0 + lea rax,[48+rsp] + lea rdx,[64+r9*2+rsp] + shr r9,3+2 + mov rsi,QWORD[40+rsp] + jmp NEAR $L$sqr8x_zero + +ALIGN 32 +$L$sqr8x_zero: + movdqa XMMWORD[rax],xmm0 + movdqa XMMWORD[16+rax],xmm0 + movdqa XMMWORD[32+rax],xmm0 + movdqa XMMWORD[48+rax],xmm0 + lea rax,[64+rax] + movdqa XMMWORD[rdx],xmm0 + movdqa XMMWORD[16+rdx],xmm0 + movdqa XMMWORD[32+rdx],xmm0 + movdqa XMMWORD[48+rdx],xmm0 + lea rdx,[64+rdx] + dec r9 + jnz NEAR $L$sqr8x_zero + + mov rax,1 + mov r15,QWORD[((-48))+rsi] + mov r14,QWORD[((-40))+rsi] + mov r13,QWORD[((-32))+rsi] + mov r12,QWORD[((-24))+rsi] + mov rbp,QWORD[((-16))+rsi] + mov rbx,QWORD[((-8))+rsi] + lea rsp,[rsi] +$L$sqr8x_epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_bn_sqr8x_mont: +DB 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 +DB 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56 +DB 54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83 +DB 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115 +DB 115,108,46,111,114,103,62,0 +ALIGN 16 +EXTERN __imp_RtlVirtualUnwind + +ALIGN 16 +mul_handler: + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD[120+r8] + mov rbx,QWORD[248+r8] + + mov rsi,QWORD[8+r9] + mov r11,QWORD[56+r9] + + mov r10d,DWORD[r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jb NEAR $L$common_seh_tail + + mov rax,QWORD[152+r8] + + mov r10d,DWORD[4+r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jae NEAR $L$common_seh_tail + + mov r10,QWORD[192+r8] + mov rax,QWORD[8+r10*8+rax] + lea rax,[48+rax] + + mov rbx,QWORD[((-8))+rax] + mov rbp,QWORD[((-16))+rax] + mov r12,QWORD[((-24))+rax] + mov r13,QWORD[((-32))+rax] + mov r14,QWORD[((-40))+rax] + mov r15,QWORD[((-48))+rax] + mov QWORD[144+r8],rbx + mov QWORD[160+r8],rbp + mov QWORD[216+r8],r12 + mov QWORD[224+r8],r13 + mov QWORD[232+r8],r14 + mov QWORD[240+r8],r15 + + jmp NEAR $L$common_seh_tail + + + +ALIGN 16 +sqr_handler: + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD[120+r8] + mov rbx,QWORD[248+r8] + + mov rsi,QWORD[8+r9] + mov r11,QWORD[56+r9] + + mov r10d,DWORD[r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jb NEAR $L$common_seh_tail + + mov rax,QWORD[152+r8] + + mov r10d,DWORD[4+r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jae NEAR $L$common_seh_tail + + mov rax,QWORD[40+rax] + + mov rbx,QWORD[((-8))+rax] + mov rbp,QWORD[((-16))+rax] + mov r12,QWORD[((-24))+rax] + mov r13,QWORD[((-32))+rax] + mov r14,QWORD[((-40))+rax] + mov r15,QWORD[((-48))+rax] + mov QWORD[144+r8],rbx + mov QWORD[160+r8],rbp + mov QWORD[216+r8],r12 + mov QWORD[224+r8],r13 + mov QWORD[232+r8],r14 + mov QWORD[240+r8],r15 + +$L$common_seh_tail: + mov rdi,QWORD[8+rax] + mov rsi,QWORD[16+rax] + mov QWORD[152+r8],rax + mov QWORD[168+r8],rsi + mov QWORD[176+r8],rdi + + mov rdi,QWORD[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0xa548f3fc + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD[8+rsi] + mov r8,QWORD[rsi] + mov r9,QWORD[16+rsi] + mov r10,QWORD[40+rsi] + lea r11,[56+rsi] + lea r12,[24+rsi] + mov QWORD[32+rsp],r10 + mov QWORD[40+rsp],r11 + mov QWORD[48+rsp],r12 + mov QWORD[56+rsp],rcx + call QWORD[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret + + +section .pdata rdata align=4 +ALIGN 4 + DD $L$SEH_begin_bn_mul_mont wrt ..imagebase + DD $L$SEH_end_bn_mul_mont wrt ..imagebase + DD $L$SEH_info_bn_mul_mont wrt ..imagebase + + DD $L$SEH_begin_bn_mul4x_mont wrt ..imagebase + DD $L$SEH_end_bn_mul4x_mont wrt ..imagebase + DD $L$SEH_info_bn_mul4x_mont wrt ..imagebase + + DD $L$SEH_begin_bn_sqr8x_mont wrt ..imagebase + DD $L$SEH_end_bn_sqr8x_mont wrt ..imagebase + DD $L$SEH_info_bn_sqr8x_mont wrt ..imagebase +section .xdata rdata align=8 +ALIGN 8 +$L$SEH_info_bn_mul_mont: +DB 9,0,0,0 + DD mul_handler wrt ..imagebase + DD $L$mul_body wrt ..imagebase,$L$mul_epilogue wrt ..imagebase +$L$SEH_info_bn_mul4x_mont: +DB 9,0,0,0 + DD mul_handler wrt ..imagebase + DD $L$mul4x_body wrt ..imagebase,$L$mul4x_epilogue wrt ..imagebase +$L$SEH_info_bn_sqr8x_mont: +DB 9,0,0,0 + DD sqr_handler wrt ..imagebase + DD $L$sqr8x_body wrt ..imagebase,$L$sqr8x_epilogue wrt ..imagebase diff --git a/third_party/boringssl/win-x86_64/crypto/bn/x86_64-mont5.asm b/third_party/boringssl/win-x86_64/crypto/bn/x86_64-mont5.asm new file mode 100644 index 00000000000..284318aae32 --- /dev/null +++ b/third_party/boringssl/win-x86_64/crypto/bn/x86_64-mont5.asm @@ -0,0 +1,2056 @@ +default rel +%define XMMWORD +%define YMMWORD +%define ZMMWORD +section .text code align=64 + + +EXTERN OPENSSL_ia32cap_P + +global bn_mul_mont_gather5 + +ALIGN 64 +bn_mul_mont_gather5: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_bn_mul_mont_gather5: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD[40+rsp] + mov r9,QWORD[48+rsp] + + + test r9d,7 + jnz NEAR $L$mul_enter + jmp NEAR $L$mul4x_enter + +ALIGN 16 +$L$mul_enter: + mov r9d,r9d + mov rax,rsp + mov r10d,DWORD[56+rsp] + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + lea rsp,[((-40))+rsp] + movaps XMMWORD[rsp],xmm6 + movaps XMMWORD[16+rsp],xmm7 + lea r11,[2+r9] + neg r11 + lea rsp,[r11*8+rsp] + and rsp,-1024 + + mov QWORD[8+r9*8+rsp],rax +$L$mul_body: + mov r12,rdx + mov r11,r10 + shr r10,3 + and r11,7 + not r10 + lea rax,[$L$magic_masks] + and r10,3 + lea r12,[96+r11*8+r12] + movq xmm4,QWORD[r10*8+rax] + movq xmm5,QWORD[8+r10*8+rax] + movq xmm6,QWORD[16+r10*8+rax] + movq xmm7,QWORD[24+r10*8+rax] + + movq xmm0,QWORD[(((-96)))+r12] + movq xmm1,QWORD[((-32))+r12] + pand xmm0,xmm4 + movq xmm2,QWORD[32+r12] + pand xmm1,xmm5 + movq xmm3,QWORD[96+r12] + pand xmm2,xmm6 + por xmm0,xmm1 + pand xmm3,xmm7 + por xmm0,xmm2 + lea r12,[256+r12] + por xmm0,xmm3 + +DB 102,72,15,126,195 + + mov r8,QWORD[r8] + mov rax,QWORD[rsi] + + xor r14,r14 + xor r15,r15 + + movq xmm0,QWORD[(((-96)))+r12] + movq xmm1,QWORD[((-32))+r12] + pand xmm0,xmm4 + movq xmm2,QWORD[32+r12] + pand xmm1,xmm5 + + mov rbp,r8 + mul rbx + mov r10,rax + mov rax,QWORD[rcx] + + movq xmm3,QWORD[96+r12] + pand xmm2,xmm6 + por xmm0,xmm1 + pand xmm3,xmm7 + + imul rbp,r10 + mov r11,rdx + + por xmm0,xmm2 + lea r12,[256+r12] + por xmm0,xmm3 + + mul rbp + add r10,rax + mov rax,QWORD[8+rsi] + adc rdx,0 + mov r13,rdx + + lea r15,[1+r15] + jmp NEAR $L$1st_enter + +ALIGN 16 +$L$1st: + add r13,rax + mov rax,QWORD[r15*8+rsi] + adc rdx,0 + add r13,r11 + mov r11,r10 + adc rdx,0 + mov QWORD[((-16))+r15*8+rsp],r13 + mov r13,rdx + +$L$1st_enter: + mul rbx + add r11,rax + mov rax,QWORD[r15*8+rcx] + adc rdx,0 + lea r15,[1+r15] + mov r10,rdx + + mul rbp + cmp r15,r9 + jne NEAR $L$1st + +DB 102,72,15,126,195 + + add r13,rax + mov rax,QWORD[rsi] + adc rdx,0 + add r13,r11 + adc rdx,0 + mov QWORD[((-16))+r15*8+rsp],r13 + mov r13,rdx + mov r11,r10 + + xor rdx,rdx + add r13,r11 + adc rdx,0 + mov QWORD[((-8))+r9*8+rsp],r13 + mov QWORD[r9*8+rsp],rdx + + lea r14,[1+r14] + jmp NEAR $L$outer +ALIGN 16 +$L$outer: + xor r15,r15 + mov rbp,r8 + mov r10,QWORD[rsp] + + movq xmm0,QWORD[(((-96)))+r12] + movq xmm1,QWORD[((-32))+r12] + pand xmm0,xmm4 + movq xmm2,QWORD[32+r12] + pand xmm1,xmm5 + + mul rbx + add r10,rax + mov rax,QWORD[rcx] + adc rdx,0 + + movq xmm3,QWORD[96+r12] + pand xmm2,xmm6 + por xmm0,xmm1 + pand xmm3,xmm7 + + imul rbp,r10 + mov r11,rdx + + por xmm0,xmm2 + lea r12,[256+r12] + por xmm0,xmm3 + + mul rbp + add r10,rax + mov rax,QWORD[8+rsi] + adc rdx,0 + mov r10,QWORD[8+rsp] + mov r13,rdx + + lea r15,[1+r15] + jmp NEAR $L$inner_enter + +ALIGN 16 +$L$inner: + add r13,rax + mov rax,QWORD[r15*8+rsi] + adc rdx,0 + add r13,r10 + mov r10,QWORD[r15*8+rsp] + adc rdx,0 + mov QWORD[((-16))+r15*8+rsp],r13 + mov r13,rdx + +$L$inner_enter: + mul rbx + add r11,rax + mov rax,QWORD[r15*8+rcx] + adc rdx,0 + add r10,r11 + mov r11,rdx + adc r11,0 + lea r15,[1+r15] + + mul rbp + cmp r15,r9 + jne NEAR $L$inner + +DB 102,72,15,126,195 + + add r13,rax + mov rax,QWORD[rsi] + adc rdx,0 + add r13,r10 + mov r10,QWORD[r15*8+rsp] + adc rdx,0 + mov QWORD[((-16))+r15*8+rsp],r13 + mov r13,rdx + + xor rdx,rdx + add r13,r11 + adc rdx,0 + add r13,r10 + adc rdx,0 + mov QWORD[((-8))+r9*8+rsp],r13 + mov QWORD[r9*8+rsp],rdx + + lea r14,[1+r14] + cmp r14,r9 + jb NEAR $L$outer + + xor r14,r14 + mov rax,QWORD[rsp] + lea rsi,[rsp] + mov r15,r9 + jmp NEAR $L$sub +ALIGN 16 +$L$sub: sbb rax,QWORD[r14*8+rcx] + mov QWORD[r14*8+rdi],rax + mov rax,QWORD[8+r14*8+rsi] + lea r14,[1+r14] + dec r15 + jnz NEAR $L$sub + + sbb rax,0 + xor r14,r14 + mov r15,r9 +ALIGN 16 +$L$copy: + mov rsi,QWORD[r14*8+rsp] + mov rcx,QWORD[r14*8+rdi] + xor rsi,rcx + and rsi,rax + xor rsi,rcx + mov QWORD[r14*8+rsp],r14 + mov QWORD[r14*8+rdi],rsi + lea r14,[1+r14] + sub r15,1 + jnz NEAR $L$copy + + mov rsi,QWORD[8+r9*8+rsp] + mov rax,1 + movaps xmm6,XMMWORD[((-88))+rsi] + movaps xmm7,XMMWORD[((-72))+rsi] + mov r15,QWORD[((-48))+rsi] + mov r14,QWORD[((-40))+rsi] + mov r13,QWORD[((-32))+rsi] + mov r12,QWORD[((-24))+rsi] + mov rbp,QWORD[((-16))+rsi] + mov rbx,QWORD[((-8))+rsi] + lea rsp,[rsi] +$L$mul_epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_bn_mul_mont_gather5: + +ALIGN 32 +bn_mul4x_mont_gather5: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_bn_mul4x_mont_gather5: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD[40+rsp] + mov r9,QWORD[48+rsp] + + +$L$mul4x_enter: +DB 0x67 + mov rax,rsp + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + lea rsp,[((-40))+rsp] + movaps XMMWORD[rsp],xmm6 + movaps XMMWORD[16+rsp],xmm7 +DB 0x67 + mov r10d,r9d + shl r9d,3 + shl r10d,3+2 + neg r9 + + + + + + + + + lea r11,[((-64))+r9*2+rsp] + sub r11,rsi + and r11,4095 + cmp r10,r11 + jb NEAR $L$mul4xsp_alt + sub rsp,r11 + lea rsp,[((-64))+r9*2+rsp] + jmp NEAR $L$mul4xsp_done + +ALIGN 32 +$L$mul4xsp_alt: + lea r10,[((4096-64))+r9*2] + lea rsp,[((-64))+r9*2+rsp] + sub r11,r10 + mov r10,0 + cmovc r11,r10 + sub rsp,r11 +$L$mul4xsp_done: + and rsp,-64 + neg r9 + + mov QWORD[40+rsp],rax +$L$mul4x_body: + + call mul4x_internal + + mov rsi,QWORD[40+rsp] + mov rax,1 + movaps xmm6,XMMWORD[((-88))+rsi] + movaps xmm7,XMMWORD[((-72))+rsi] + mov r15,QWORD[((-48))+rsi] + mov r14,QWORD[((-40))+rsi] + mov r13,QWORD[((-32))+rsi] + mov r12,QWORD[((-24))+rsi] + mov rbp,QWORD[((-16))+rsi] + mov rbx,QWORD[((-8))+rsi] + lea rsp,[rsi] +$L$mul4x_epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_bn_mul4x_mont_gather5: + + +ALIGN 32 +mul4x_internal: + shl r9,5 + mov r10d,DWORD[56+rax] + lea r13,[256+r9*1+rdx] + shr r9,5 + mov r11,r10 + shr r10,3 + and r11,7 + not r10 + lea rax,[$L$magic_masks] + and r10,3 + lea r12,[96+r11*8+rdx] + movq xmm4,QWORD[r10*8+rax] + movq xmm5,QWORD[8+r10*8+rax] + add r11,7 + movq xmm6,QWORD[16+r10*8+rax] + movq xmm7,QWORD[24+r10*8+rax] + and r11,7 + + movq xmm0,QWORD[(((-96)))+r12] + lea r14,[256+r12] + movq xmm1,QWORD[((-32))+r12] + pand xmm0,xmm4 + movq xmm2,QWORD[32+r12] + pand xmm1,xmm5 + movq xmm3,QWORD[96+r12] + pand xmm2,xmm6 +DB 0x67 + por xmm0,xmm1 + movq xmm1,QWORD[((-96))+r14] +DB 0x67 + pand xmm3,xmm7 +DB 0x67 + por xmm0,xmm2 + movq xmm2,QWORD[((-32))+r14] +DB 0x67 + pand xmm1,xmm4 +DB 0x67 + por xmm0,xmm3 + movq xmm3,QWORD[32+r14] + +DB 102,72,15,126,195 + movq xmm0,QWORD[96+r14] + mov QWORD[((16+8))+rsp],r13 + mov QWORD[((56+8))+rsp],rdi + + mov r8,QWORD[r8] + mov rax,QWORD[rsi] + lea rsi,[r9*1+rsi] + neg r9 + + mov rbp,r8 + mul rbx + mov r10,rax + mov rax,QWORD[rcx] + + pand xmm2,xmm5 + pand xmm3,xmm6 + por xmm1,xmm2 + + imul rbp,r10 + + + + + + + + lea r14,[((64+8))+r11*8+rsp] + mov r11,rdx + + pand xmm0,xmm7 + por xmm1,xmm3 + lea r12,[512+r12] + por xmm0,xmm1 + + mul rbp + add r10,rax + mov rax,QWORD[8+r9*1+rsi] + adc rdx,0 + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,QWORD[16+rcx] + adc rdx,0 + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD[16+r9*1+rsi] + adc rdx,0 + add rdi,r11 + lea r15,[32+r9] + lea rcx,[64+rcx] + adc rdx,0 + mov QWORD[r14],rdi + mov r13,rdx + jmp NEAR $L$1st4x + +ALIGN 32 +$L$1st4x: + mul rbx + add r10,rax + mov rax,QWORD[((-32))+rcx] + lea r14,[32+r14] + adc rdx,0 + mov r11,rdx + + mul rbp + add r13,rax + mov rax,QWORD[((-8))+r15*1+rsi] + adc rdx,0 + add r13,r10 + adc rdx,0 + mov QWORD[((-24))+r14],r13 + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,QWORD[((-16))+rcx] + adc rdx,0 + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD[r15*1+rsi] + adc rdx,0 + add rdi,r11 + adc rdx,0 + mov QWORD[((-16))+r14],rdi + mov r13,rdx + + mul rbx + add r10,rax + mov rax,QWORD[rcx] + adc rdx,0 + mov r11,rdx + + mul rbp + add r13,rax + mov rax,QWORD[8+r15*1+rsi] + adc rdx,0 + add r13,r10 + adc rdx,0 + mov QWORD[((-8))+r14],r13 + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,QWORD[16+rcx] + adc rdx,0 + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD[16+r15*1+rsi] + adc rdx,0 + add rdi,r11 + lea rcx,[64+rcx] + adc rdx,0 + mov QWORD[r14],rdi + mov r13,rdx + + add r15,32 + jnz NEAR $L$1st4x + + mul rbx + add r10,rax + mov rax,QWORD[((-32))+rcx] + lea r14,[32+r14] + adc rdx,0 + mov r11,rdx + + mul rbp + add r13,rax + mov rax,QWORD[((-8))+rsi] + adc rdx,0 + add r13,r10 + adc rdx,0 + mov QWORD[((-24))+r14],r13 + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,QWORD[((-16))+rcx] + adc rdx,0 + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD[r9*1+rsi] + adc rdx,0 + add rdi,r11 + adc rdx,0 + mov QWORD[((-16))+r14],rdi + mov r13,rdx + +DB 102,72,15,126,195 + lea rcx,[r9*2+rcx] + + xor rdi,rdi + add r13,r10 + adc rdi,0 + mov QWORD[((-8))+r14],r13 + + jmp NEAR $L$outer4x + +ALIGN 32 +$L$outer4x: + mov r10,QWORD[r9*1+r14] + mov rbp,r8 + mul rbx + add r10,rax + mov rax,QWORD[rcx] + adc rdx,0 + + movq xmm0,QWORD[(((-96)))+r12] + movq xmm1,QWORD[((-32))+r12] + pand xmm0,xmm4 + movq xmm2,QWORD[32+r12] + pand xmm1,xmm5 + movq xmm3,QWORD[96+r12] + + imul rbp,r10 +DB 0x67 + mov r11,rdx + mov QWORD[r14],rdi + + pand xmm2,xmm6 + por xmm0,xmm1 + pand xmm3,xmm7 + por xmm0,xmm2 + lea r14,[r9*1+r14] + lea r12,[256+r12] + por xmm0,xmm3 + + mul rbp + add r10,rax + mov rax,QWORD[8+r9*1+rsi] + adc rdx,0 + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,QWORD[16+rcx] + adc rdx,0 + add r11,QWORD[8+r14] + adc rdx,0 + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD[16+r9*1+rsi] + adc rdx,0 + add rdi,r11 + lea r15,[32+r9] + lea rcx,[64+rcx] + adc rdx,0 + mov r13,rdx + jmp NEAR $L$inner4x + +ALIGN 32 +$L$inner4x: + mul rbx + add r10,rax + mov rax,QWORD[((-32))+rcx] + adc rdx,0 + add r10,QWORD[16+r14] + lea r14,[32+r14] + adc rdx,0 + mov r11,rdx + + mul rbp + add r13,rax + mov rax,QWORD[((-8))+r15*1+rsi] + adc rdx,0 + add r13,r10 + adc rdx,0 + mov QWORD[((-32))+r14],rdi + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,QWORD[((-16))+rcx] + adc rdx,0 + add r11,QWORD[((-8))+r14] + adc rdx,0 + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD[r15*1+rsi] + adc rdx,0 + add rdi,r11 + adc rdx,0 + mov QWORD[((-24))+r14],r13 + mov r13,rdx + + mul rbx + add r10,rax + mov rax,QWORD[rcx] + adc rdx,0 + add r10,QWORD[r14] + adc rdx,0 + mov r11,rdx + + mul rbp + add r13,rax + mov rax,QWORD[8+r15*1+rsi] + adc rdx,0 + add r13,r10 + adc rdx,0 + mov QWORD[((-16))+r14],rdi + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,QWORD[16+rcx] + adc rdx,0 + add r11,QWORD[8+r14] + adc rdx,0 + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD[16+r15*1+rsi] + adc rdx,0 + add rdi,r11 + lea rcx,[64+rcx] + adc rdx,0 + mov QWORD[((-8))+r14],r13 + mov r13,rdx + + add r15,32 + jnz NEAR $L$inner4x + + mul rbx + add r10,rax + mov rax,QWORD[((-32))+rcx] + adc rdx,0 + add r10,QWORD[16+r14] + lea r14,[32+r14] + adc rdx,0 + mov r11,rdx + + mul rbp + add r13,rax + mov rax,QWORD[((-8))+rsi] + adc rdx,0 + add r13,r10 + adc rdx,0 + mov QWORD[((-32))+r14],rdi + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,rbp + mov rbp,QWORD[((-16))+rcx] + adc rdx,0 + add r11,QWORD[((-8))+r14] + adc rdx,0 + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD[r9*1+rsi] + adc rdx,0 + add rdi,r11 + adc rdx,0 + mov QWORD[((-24))+r14],r13 + mov r13,rdx + +DB 102,72,15,126,195 + mov QWORD[((-16))+r14],rdi + lea rcx,[r9*2+rcx] + + xor rdi,rdi + add r13,r10 + adc rdi,0 + add r13,QWORD[r14] + adc rdi,0 + mov QWORD[((-8))+r14],r13 + + cmp r12,QWORD[((16+8))+rsp] + jb NEAR $L$outer4x + sub rbp,r13 + adc r15,r15 + or rdi,r15 + xor rdi,1 + lea rbx,[r9*1+r14] + lea rbp,[rdi*8+rcx] + mov rcx,r9 + sar rcx,3+2 + mov rdi,QWORD[((56+8))+rsp] + jmp NEAR $L$sqr4x_sub + +global bn_power5 + +ALIGN 32 +bn_power5: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_bn_power5: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD[40+rsp] + mov r9,QWORD[48+rsp] + + + mov rax,rsp + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + lea rsp,[((-40))+rsp] + movaps XMMWORD[rsp],xmm6 + movaps XMMWORD[16+rsp],xmm7 + mov r10d,r9d + shl r9d,3 + shl r10d,3+2 + neg r9 + mov r8,QWORD[r8] + + + + + + + + lea r11,[((-64))+r9*2+rsp] + sub r11,rsi + and r11,4095 + cmp r10,r11 + jb NEAR $L$pwr_sp_alt + sub rsp,r11 + lea rsp,[((-64))+r9*2+rsp] + jmp NEAR $L$pwr_sp_done + +ALIGN 32 +$L$pwr_sp_alt: + lea r10,[((4096-64))+r9*2] + lea rsp,[((-64))+r9*2+rsp] + sub r11,r10 + mov r10,0 + cmovc r11,r10 + sub rsp,r11 +$L$pwr_sp_done: + and rsp,-64 + mov r10,r9 + neg r9 + + + + + + + + + + + mov QWORD[32+rsp],r8 + mov QWORD[40+rsp],rax +$L$power5_body: +DB 102,72,15,110,207 +DB 102,72,15,110,209 +DB 102,73,15,110,218 +DB 102,72,15,110,226 + + call __bn_sqr8x_internal + call __bn_sqr8x_internal + call __bn_sqr8x_internal + call __bn_sqr8x_internal + call __bn_sqr8x_internal + +DB 102,72,15,126,209 +DB 102,72,15,126,226 + mov rdi,rsi + mov rax,QWORD[40+rsp] + lea r8,[32+rsp] + + call mul4x_internal + + mov rsi,QWORD[40+rsp] + mov rax,1 + mov r15,QWORD[((-48))+rsi] + mov r14,QWORD[((-40))+rsi] + mov r13,QWORD[((-32))+rsi] + mov r12,QWORD[((-24))+rsi] + mov rbp,QWORD[((-16))+rsi] + mov rbx,QWORD[((-8))+rsi] + lea rsp,[rsi] +$L$power5_epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_bn_power5: + +global bn_sqr8x_internal + + +ALIGN 32 +bn_sqr8x_internal: +__bn_sqr8x_internal: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + lea rbp,[32+r10] + lea rsi,[r9*1+rsi] + + mov rcx,r9 + + + mov r14,QWORD[((-32))+rbp*1+rsi] + lea rdi,[((48+8))+r9*2+rsp] + mov rax,QWORD[((-24))+rbp*1+rsi] + lea rdi,[((-32))+rbp*1+rdi] + mov rbx,QWORD[((-16))+rbp*1+rsi] + mov r15,rax + + mul r14 + mov r10,rax + mov rax,rbx + mov r11,rdx + mov QWORD[((-24))+rbp*1+rdi],r10 + + mul r14 + add r11,rax + mov rax,rbx + adc rdx,0 + mov QWORD[((-16))+rbp*1+rdi],r11 + mov r10,rdx + + + mov rbx,QWORD[((-8))+rbp*1+rsi] + mul r15 + mov r12,rax + mov rax,rbx + mov r13,rdx + + lea rcx,[rbp] + mul r14 + add r10,rax + mov rax,rbx + mov r11,rdx + adc r11,0 + add r10,r12 + adc r11,0 + mov QWORD[((-8))+rcx*1+rdi],r10 + jmp NEAR $L$sqr4x_1st + +ALIGN 32 +$L$sqr4x_1st: + mov rbx,QWORD[rcx*1+rsi] + mul r15 + add r13,rax + mov rax,rbx + mov r12,rdx + adc r12,0 + + mul r14 + add r11,rax + mov rax,rbx + mov rbx,QWORD[8+rcx*1+rsi] + mov r10,rdx + adc r10,0 + add r11,r13 + adc r10,0 + + + mul r15 + add r12,rax + mov rax,rbx + mov QWORD[rcx*1+rdi],r11 + mov r13,rdx + adc r13,0 + + mul r14 + add r10,rax + mov rax,rbx + mov rbx,QWORD[16+rcx*1+rsi] + mov r11,rdx + adc r11,0 + add r10,r12 + adc r11,0 + + mul r15 + add r13,rax + mov rax,rbx + mov QWORD[8+rcx*1+rdi],r10 + mov r12,rdx + adc r12,0 + + mul r14 + add r11,rax + mov rax,rbx + mov rbx,QWORD[24+rcx*1+rsi] + mov r10,rdx + adc r10,0 + add r11,r13 + adc r10,0 + + + mul r15 + add r12,rax + mov rax,rbx + mov QWORD[16+rcx*1+rdi],r11 + mov r13,rdx + adc r13,0 + lea rcx,[32+rcx] + + mul r14 + add r10,rax + mov rax,rbx + mov r11,rdx + adc r11,0 + add r10,r12 + adc r11,0 + mov QWORD[((-8))+rcx*1+rdi],r10 + + cmp rcx,0 + jne NEAR $L$sqr4x_1st + + mul r15 + add r13,rax + lea rbp,[16+rbp] + adc rdx,0 + add r13,r11 + adc rdx,0 + + mov QWORD[rdi],r13 + mov r12,rdx + mov QWORD[8+rdi],rdx + jmp NEAR $L$sqr4x_outer + +ALIGN 32 +$L$sqr4x_outer: + mov r14,QWORD[((-32))+rbp*1+rsi] + lea rdi,[((48+8))+r9*2+rsp] + mov rax,QWORD[((-24))+rbp*1+rsi] + lea rdi,[((-32))+rbp*1+rdi] + mov rbx,QWORD[((-16))+rbp*1+rsi] + mov r15,rax + + mul r14 + mov r10,QWORD[((-24))+rbp*1+rdi] + add r10,rax + mov rax,rbx + adc rdx,0 + mov QWORD[((-24))+rbp*1+rdi],r10 + mov r11,rdx + + mul r14 + add r11,rax + mov rax,rbx + adc rdx,0 + add r11,QWORD[((-16))+rbp*1+rdi] + mov r10,rdx + adc r10,0 + mov QWORD[((-16))+rbp*1+rdi],r11 + + xor r12,r12 + + mov rbx,QWORD[((-8))+rbp*1+rsi] + mul r15 + add r12,rax + mov rax,rbx + adc rdx,0 + add r12,QWORD[((-8))+rbp*1+rdi] + mov r13,rdx + adc r13,0 + + mul r14 + add r10,rax + mov rax,rbx + adc rdx,0 + add r10,r12 + mov r11,rdx + adc r11,0 + mov QWORD[((-8))+rbp*1+rdi],r10 + + lea rcx,[rbp] + jmp NEAR $L$sqr4x_inner + +ALIGN 32 +$L$sqr4x_inner: + mov rbx,QWORD[rcx*1+rsi] + mul r15 + add r13,rax + mov rax,rbx + mov r12,rdx + adc r12,0 + add r13,QWORD[rcx*1+rdi] + adc r12,0 + +DB 0x67 + mul r14 + add r11,rax + mov rax,rbx + mov rbx,QWORD[8+rcx*1+rsi] + mov r10,rdx + adc r10,0 + add r11,r13 + adc r10,0 + + mul r15 + add r12,rax + mov QWORD[rcx*1+rdi],r11 + mov rax,rbx + mov r13,rdx + adc r13,0 + add r12,QWORD[8+rcx*1+rdi] + lea rcx,[16+rcx] + adc r13,0 + + mul r14 + add r10,rax + mov rax,rbx + adc rdx,0 + add r10,r12 + mov r11,rdx + adc r11,0 + mov QWORD[((-8))+rcx*1+rdi],r10 + + cmp rcx,0 + jne NEAR $L$sqr4x_inner + +DB 0x67 + mul r15 + add r13,rax + adc rdx,0 + add r13,r11 + adc rdx,0 + + mov QWORD[rdi],r13 + mov r12,rdx + mov QWORD[8+rdi],rdx + + add rbp,16 + jnz NEAR $L$sqr4x_outer + + + mov r14,QWORD[((-32))+rsi] + lea rdi,[((48+8))+r9*2+rsp] + mov rax,QWORD[((-24))+rsi] + lea rdi,[((-32))+rbp*1+rdi] + mov rbx,QWORD[((-16))+rsi] + mov r15,rax + + mul r14 + add r10,rax + mov rax,rbx + mov r11,rdx + adc r11,0 + + mul r14 + add r11,rax + mov rax,rbx + mov QWORD[((-24))+rdi],r10 + mov r10,rdx + adc r10,0 + add r11,r13 + mov rbx,QWORD[((-8))+rsi] + adc r10,0 + + mul r15 + add r12,rax + mov rax,rbx + mov QWORD[((-16))+rdi],r11 + mov r13,rdx + adc r13,0 + + mul r14 + add r10,rax + mov rax,rbx + mov r11,rdx + adc r11,0 + add r10,r12 + adc r11,0 + mov QWORD[((-8))+rdi],r10 + + mul r15 + add r13,rax + mov rax,QWORD[((-16))+rsi] + adc rdx,0 + add r13,r11 + adc rdx,0 + + mov QWORD[rdi],r13 + mov r12,rdx + mov QWORD[8+rdi],rdx + + mul rbx + add rbp,16 + xor r14,r14 + sub rbp,r9 + xor r15,r15 + + add rax,r12 + adc rdx,0 + mov QWORD[8+rdi],rax + mov QWORD[16+rdi],rdx + mov QWORD[24+rdi],r15 + + mov rax,QWORD[((-16))+rbp*1+rsi] + lea rdi,[((48+8))+rsp] + xor r10,r10 + mov r11,QWORD[8+rdi] + + lea r12,[r10*2+r14] + shr r10,63 + lea r13,[r11*2+rcx] + shr r11,63 + or r13,r10 + mov r10,QWORD[16+rdi] + mov r14,r11 + mul rax + neg r15 + mov r11,QWORD[24+rdi] + adc r12,rax + mov rax,QWORD[((-8))+rbp*1+rsi] + mov QWORD[rdi],r12 + adc r13,rdx + + lea rbx,[r10*2+r14] + mov QWORD[8+rdi],r13 + sbb r15,r15 + shr r10,63 + lea r8,[r11*2+rcx] + shr r11,63 + or r8,r10 + mov r10,QWORD[32+rdi] + mov r14,r11 + mul rax + neg r15 + mov r11,QWORD[40+rdi] + adc rbx,rax + mov rax,QWORD[rbp*1+rsi] + mov QWORD[16+rdi],rbx + adc r8,rdx + lea rbp,[16+rbp] + mov QWORD[24+rdi],r8 + sbb r15,r15 + lea rdi,[64+rdi] + jmp NEAR $L$sqr4x_shift_n_add + +ALIGN 32 +$L$sqr4x_shift_n_add: + lea r12,[r10*2+r14] + shr r10,63 + lea r13,[r11*2+rcx] + shr r11,63 + or r13,r10 + mov r10,QWORD[((-16))+rdi] + mov r14,r11 + mul rax + neg r15 + mov r11,QWORD[((-8))+rdi] + adc r12,rax + mov rax,QWORD[((-8))+rbp*1+rsi] + mov QWORD[((-32))+rdi],r12 + adc r13,rdx + + lea rbx,[r10*2+r14] + mov QWORD[((-24))+rdi],r13 + sbb r15,r15 + shr r10,63 + lea r8,[r11*2+rcx] + shr r11,63 + or r8,r10 + mov r10,QWORD[rdi] + mov r14,r11 + mul rax + neg r15 + mov r11,QWORD[8+rdi] + adc rbx,rax + mov rax,QWORD[rbp*1+rsi] + mov QWORD[((-16))+rdi],rbx + adc r8,rdx + + lea r12,[r10*2+r14] + mov QWORD[((-8))+rdi],r8 + sbb r15,r15 + shr r10,63 + lea r13,[r11*2+rcx] + shr r11,63 + or r13,r10 + mov r10,QWORD[16+rdi] + mov r14,r11 + mul rax + neg r15 + mov r11,QWORD[24+rdi] + adc r12,rax + mov rax,QWORD[8+rbp*1+rsi] + mov QWORD[rdi],r12 + adc r13,rdx + + lea rbx,[r10*2+r14] + mov QWORD[8+rdi],r13 + sbb r15,r15 + shr r10,63 + lea r8,[r11*2+rcx] + shr r11,63 + or r8,r10 + mov r10,QWORD[32+rdi] + mov r14,r11 + mul rax + neg r15 + mov r11,QWORD[40+rdi] + adc rbx,rax + mov rax,QWORD[16+rbp*1+rsi] + mov QWORD[16+rdi],rbx + adc r8,rdx + mov QWORD[24+rdi],r8 + sbb r15,r15 + lea rdi,[64+rdi] + add rbp,32 + jnz NEAR $L$sqr4x_shift_n_add + + lea r12,[r10*2+r14] +DB 0x67 + shr r10,63 + lea r13,[r11*2+rcx] + shr r11,63 + or r13,r10 + mov r10,QWORD[((-16))+rdi] + mov r14,r11 + mul rax + neg r15 + mov r11,QWORD[((-8))+rdi] + adc r12,rax + mov rax,QWORD[((-8))+rsi] + mov QWORD[((-32))+rdi],r12 + adc r13,rdx + + lea rbx,[r10*2+r14] + mov QWORD[((-24))+rdi],r13 + sbb r15,r15 + shr r10,63 + lea r8,[r11*2+rcx] + shr r11,63 + or r8,r10 + mul rax + neg r15 + adc rbx,rax + adc r8,rdx + mov QWORD[((-16))+rdi],rbx + mov QWORD[((-8))+rdi],r8 +DB 102,72,15,126,213 +sqr8x_reduction: + xor rax,rax + lea rcx,[r9*2+rbp] + lea rdx,[((48+8))+r9*2+rsp] + mov QWORD[((0+8))+rsp],rcx + lea rdi,[((48+8))+r9*1+rsp] + mov QWORD[((8+8))+rsp],rdx + neg r9 + jmp NEAR $L$8x_reduction_loop + +ALIGN 32 +$L$8x_reduction_loop: + lea rdi,[r9*1+rdi] +DB 0x66 + mov rbx,QWORD[rdi] + mov r9,QWORD[8+rdi] + mov r10,QWORD[16+rdi] + mov r11,QWORD[24+rdi] + mov r12,QWORD[32+rdi] + mov r13,QWORD[40+rdi] + mov r14,QWORD[48+rdi] + mov r15,QWORD[56+rdi] + mov QWORD[rdx],rax + lea rdi,[64+rdi] + +DB 0x67 + mov r8,rbx + imul rbx,QWORD[((32+8))+rsp] + mov rax,QWORD[rbp] + mov ecx,8 + jmp NEAR $L$8x_reduce + +ALIGN 32 +$L$8x_reduce: + mul rbx + mov rax,QWORD[16+rbp] + neg r8 + mov r8,rdx + adc r8,0 + + mul rbx + add r9,rax + mov rax,QWORD[32+rbp] + adc rdx,0 + add r8,r9 + mov QWORD[((48-8+8))+rcx*8+rsp],rbx + mov r9,rdx + adc r9,0 + + mul rbx + add r10,rax + mov rax,QWORD[48+rbp] + adc rdx,0 + add r9,r10 + mov rsi,QWORD[((32+8))+rsp] + mov r10,rdx + adc r10,0 + + mul rbx + add r11,rax + mov rax,QWORD[64+rbp] + adc rdx,0 + imul rsi,r8 + add r10,r11 + mov r11,rdx + adc r11,0 + + mul rbx + add r12,rax + mov rax,QWORD[80+rbp] + adc rdx,0 + add r11,r12 + mov r12,rdx + adc r12,0 + + mul rbx + add r13,rax + mov rax,QWORD[96+rbp] + adc rdx,0 + add r12,r13 + mov r13,rdx + adc r13,0 + + mul rbx + add r14,rax + mov rax,QWORD[112+rbp] + adc rdx,0 + add r13,r14 + mov r14,rdx + adc r14,0 + + mul rbx + mov rbx,rsi + add r15,rax + mov rax,QWORD[rbp] + adc rdx,0 + add r14,r15 + mov r15,rdx + adc r15,0 + + dec ecx + jnz NEAR $L$8x_reduce + + lea rbp,[128+rbp] + xor rax,rax + mov rdx,QWORD[((8+8))+rsp] + cmp rbp,QWORD[((0+8))+rsp] + jae NEAR $L$8x_no_tail + +DB 0x66 + add r8,QWORD[rdi] + adc r9,QWORD[8+rdi] + adc r10,QWORD[16+rdi] + adc r11,QWORD[24+rdi] + adc r12,QWORD[32+rdi] + adc r13,QWORD[40+rdi] + adc r14,QWORD[48+rdi] + adc r15,QWORD[56+rdi] + sbb rsi,rsi + + mov rbx,QWORD[((48+56+8))+rsp] + mov ecx,8 + mov rax,QWORD[rbp] + jmp NEAR $L$8x_tail + +ALIGN 32 +$L$8x_tail: + mul rbx + add r8,rax + mov rax,QWORD[16+rbp] + mov QWORD[rdi],r8 + mov r8,rdx + adc r8,0 + + mul rbx + add r9,rax + mov rax,QWORD[32+rbp] + adc rdx,0 + add r8,r9 + lea rdi,[8+rdi] + mov r9,rdx + adc r9,0 + + mul rbx + add r10,rax + mov rax,QWORD[48+rbp] + adc rdx,0 + add r9,r10 + mov r10,rdx + adc r10,0 + + mul rbx + add r11,rax + mov rax,QWORD[64+rbp] + adc rdx,0 + add r10,r11 + mov r11,rdx + adc r11,0 + + mul rbx + add r12,rax + mov rax,QWORD[80+rbp] + adc rdx,0 + add r11,r12 + mov r12,rdx + adc r12,0 + + mul rbx + add r13,rax + mov rax,QWORD[96+rbp] + adc rdx,0 + add r12,r13 + mov r13,rdx + adc r13,0 + + mul rbx + add r14,rax + mov rax,QWORD[112+rbp] + adc rdx,0 + add r13,r14 + mov r14,rdx + adc r14,0 + + mul rbx + mov rbx,QWORD[((48-16+8))+rcx*8+rsp] + add r15,rax + adc rdx,0 + add r14,r15 + mov rax,QWORD[rbp] + mov r15,rdx + adc r15,0 + + dec ecx + jnz NEAR $L$8x_tail + + lea rbp,[128+rbp] + mov rdx,QWORD[((8+8))+rsp] + cmp rbp,QWORD[((0+8))+rsp] + jae NEAR $L$8x_tail_done + + mov rbx,QWORD[((48+56+8))+rsp] + neg rsi + mov rax,QWORD[rbp] + adc r8,QWORD[rdi] + adc r9,QWORD[8+rdi] + adc r10,QWORD[16+rdi] + adc r11,QWORD[24+rdi] + adc r12,QWORD[32+rdi] + adc r13,QWORD[40+rdi] + adc r14,QWORD[48+rdi] + adc r15,QWORD[56+rdi] + sbb rsi,rsi + + mov ecx,8 + jmp NEAR $L$8x_tail + +ALIGN 32 +$L$8x_tail_done: + add r8,QWORD[rdx] + xor rax,rax + + neg rsi +$L$8x_no_tail: + adc r8,QWORD[rdi] + adc r9,QWORD[8+rdi] + adc r10,QWORD[16+rdi] + adc r11,QWORD[24+rdi] + adc r12,QWORD[32+rdi] + adc r13,QWORD[40+rdi] + adc r14,QWORD[48+rdi] + adc r15,QWORD[56+rdi] + adc rax,0 + mov rcx,QWORD[((-16))+rbp] + xor rsi,rsi + +DB 102,72,15,126,213 + + mov QWORD[rdi],r8 + mov QWORD[8+rdi],r9 +DB 102,73,15,126,217 + mov QWORD[16+rdi],r10 + mov QWORD[24+rdi],r11 + mov QWORD[32+rdi],r12 + mov QWORD[40+rdi],r13 + mov QWORD[48+rdi],r14 + mov QWORD[56+rdi],r15 + lea rdi,[64+rdi] + + cmp rdi,rdx + jb NEAR $L$8x_reduction_loop + + sub rcx,r15 + lea rbx,[r9*1+rdi] + adc rsi,rsi + mov rcx,r9 + or rax,rsi +DB 102,72,15,126,207 + xor rax,1 +DB 102,72,15,126,206 + lea rbp,[rax*8+rbp] + sar rcx,3+2 + jmp NEAR $L$sqr4x_sub + +ALIGN 32 +$L$sqr4x_sub: +DB 0x66 + mov r12,QWORD[rbx] + mov r13,QWORD[8+rbx] + sbb r12,QWORD[rbp] + mov r14,QWORD[16+rbx] + sbb r13,QWORD[16+rbp] + mov r15,QWORD[24+rbx] + lea rbx,[32+rbx] + sbb r14,QWORD[32+rbp] + mov QWORD[rdi],r12 + sbb r15,QWORD[48+rbp] + lea rbp,[64+rbp] + mov QWORD[8+rdi],r13 + mov QWORD[16+rdi],r14 + mov QWORD[24+rdi],r15 + lea rdi,[32+rdi] + + inc rcx + jnz NEAR $L$sqr4x_sub + mov r10,r9 + neg r9 + DB 0F3h,0C3h ;repret + +global bn_from_montgomery + +ALIGN 32 +bn_from_montgomery: + test DWORD[48+rsp],7 + jz NEAR bn_from_mont8x + xor eax,eax + DB 0F3h,0C3h ;repret + + + +ALIGN 32 +bn_from_mont8x: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_bn_from_mont8x: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD[40+rsp] + mov r9,QWORD[48+rsp] + + +DB 0x67 + mov rax,rsp + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + lea rsp,[((-40))+rsp] + movaps XMMWORD[rsp],xmm6 + movaps XMMWORD[16+rsp],xmm7 +DB 0x67 + mov r10d,r9d + shl r9d,3 + shl r10d,3+2 + neg r9 + mov r8,QWORD[r8] + + + + + + + + lea r11,[((-64))+r9*2+rsp] + sub r11,rsi + and r11,4095 + cmp r10,r11 + jb NEAR $L$from_sp_alt + sub rsp,r11 + lea rsp,[((-64))+r9*2+rsp] + jmp NEAR $L$from_sp_done + +ALIGN 32 +$L$from_sp_alt: + lea r10,[((4096-64))+r9*2] + lea rsp,[((-64))+r9*2+rsp] + sub r11,r10 + mov r10,0 + cmovc r11,r10 + sub rsp,r11 +$L$from_sp_done: + and rsp,-64 + mov r10,r9 + neg r9 + + + + + + + + + + + mov QWORD[32+rsp],r8 + mov QWORD[40+rsp],rax +$L$from_body: + mov r11,r9 + lea rax,[48+rsp] + pxor xmm0,xmm0 + jmp NEAR $L$mul_by_1 + +ALIGN 32 +$L$mul_by_1: + movdqu xmm1,XMMWORD[rsi] + movdqu xmm2,XMMWORD[16+rsi] + movdqu xmm3,XMMWORD[32+rsi] + movdqa XMMWORD[r9*1+rax],xmm0 + movdqu xmm4,XMMWORD[48+rsi] + movdqa XMMWORD[16+r9*1+rax],xmm0 +DB 0x48,0x8d,0xb6,0x40,0x00,0x00,0x00 + movdqa XMMWORD[rax],xmm1 + movdqa XMMWORD[32+r9*1+rax],xmm0 + movdqa XMMWORD[16+rax],xmm2 + movdqa XMMWORD[48+r9*1+rax],xmm0 + movdqa XMMWORD[32+rax],xmm3 + movdqa XMMWORD[48+rax],xmm4 + lea rax,[64+rax] + sub r11,64 + jnz NEAR $L$mul_by_1 + +DB 102,72,15,110,207 +DB 102,72,15,110,209 +DB 0x67 + mov rbp,rcx +DB 102,73,15,110,218 + call sqr8x_reduction + + pxor xmm0,xmm0 + lea rax,[48+rsp] + mov rsi,QWORD[40+rsp] + jmp NEAR $L$from_mont_zero + +ALIGN 32 +$L$from_mont_zero: + movdqa XMMWORD[rax],xmm0 + movdqa XMMWORD[16+rax],xmm0 + movdqa XMMWORD[32+rax],xmm0 + movdqa XMMWORD[48+rax],xmm0 + lea rax,[64+rax] + sub r9,32 + jnz NEAR $L$from_mont_zero + + mov rax,1 + mov r15,QWORD[((-48))+rsi] + mov r14,QWORD[((-40))+rsi] + mov r13,QWORD[((-32))+rsi] + mov r12,QWORD[((-24))+rsi] + mov rbp,QWORD[((-16))+rsi] + mov rbx,QWORD[((-8))+rsi] + lea rsp,[rsi] +$L$from_epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_bn_from_mont8x: +global bn_scatter5 + +ALIGN 16 +bn_scatter5: + cmp edx,0 + jz NEAR $L$scatter_epilogue + lea r8,[r9*8+r8] +$L$scatter: + mov rax,QWORD[rcx] + lea rcx,[8+rcx] + mov QWORD[r8],rax + lea r8,[256+r8] + sub edx,1 + jnz NEAR $L$scatter +$L$scatter_epilogue: + DB 0F3h,0C3h ;repret + + +global bn_gather5 + +ALIGN 16 +bn_gather5: +$L$SEH_begin_bn_gather5: + +DB 0x48,0x83,0xec,0x28 +DB 0x0f,0x29,0x34,0x24 +DB 0x0f,0x29,0x7c,0x24,0x10 + mov r11d,r9d + shr r9d,3 + and r11,7 + not r9d + lea rax,[$L$magic_masks] + and r9d,3 + lea r8,[128+r11*8+r8] + movq xmm4,QWORD[r9*8+rax] + movq xmm5,QWORD[8+r9*8+rax] + movq xmm6,QWORD[16+r9*8+rax] + movq xmm7,QWORD[24+r9*8+rax] + jmp NEAR $L$gather +ALIGN 16 +$L$gather: + movq xmm0,QWORD[(((-128)))+r8] + movq xmm1,QWORD[((-64))+r8] + pand xmm0,xmm4 + movq xmm2,QWORD[r8] + pand xmm1,xmm5 + movq xmm3,QWORD[64+r8] + pand xmm2,xmm6 + por xmm0,xmm1 + pand xmm3,xmm7 +DB 0x67,0x67 + por xmm0,xmm2 + lea r8,[256+r8] + por xmm0,xmm3 + + movq QWORD[rcx],xmm0 + lea rcx,[8+rcx] + sub edx,1 + jnz NEAR $L$gather + movaps xmm6,XMMWORD[rsp] + movaps xmm7,XMMWORD[16+rsp] + lea rsp,[40+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_bn_gather5: + +ALIGN 64 +$L$magic_masks: + DD 0,0,0,0,0,0,-1,-1 + DD 0,0,0,0,0,0,0,0 +DB 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 +DB 112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115 +DB 99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111 +DB 114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79 +DB 71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111 +DB 112,101,110,115,115,108,46,111,114,103,62,0 +EXTERN __imp_RtlVirtualUnwind + +ALIGN 16 +mul_handler: + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD[120+r8] + mov rbx,QWORD[248+r8] + + mov rsi,QWORD[8+r9] + mov r11,QWORD[56+r9] + + mov r10d,DWORD[r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jb NEAR $L$common_seh_tail + + mov rax,QWORD[152+r8] + + mov r10d,DWORD[4+r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jae NEAR $L$common_seh_tail + + lea r10,[$L$mul_epilogue] + cmp rbx,r10 + jb NEAR $L$body_40 + + mov r10,QWORD[192+r8] + mov rax,QWORD[8+r10*8+rax] + jmp NEAR $L$body_proceed + +$L$body_40: + mov rax,QWORD[40+rax] +$L$body_proceed: + + movaps xmm0,XMMWORD[((-88))+rax] + movaps xmm1,XMMWORD[((-72))+rax] + + mov rbx,QWORD[((-8))+rax] + mov rbp,QWORD[((-16))+rax] + mov r12,QWORD[((-24))+rax] + mov r13,QWORD[((-32))+rax] + mov r14,QWORD[((-40))+rax] + mov r15,QWORD[((-48))+rax] + mov QWORD[144+r8],rbx + mov QWORD[160+r8],rbp + mov QWORD[216+r8],r12 + mov QWORD[224+r8],r13 + mov QWORD[232+r8],r14 + mov QWORD[240+r8],r15 + movups XMMWORD[512+r8],xmm0 + movups XMMWORD[528+r8],xmm1 + +$L$common_seh_tail: + mov rdi,QWORD[8+rax] + mov rsi,QWORD[16+rax] + mov QWORD[152+r8],rax + mov QWORD[168+r8],rsi + mov QWORD[176+r8],rdi + + mov rdi,QWORD[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0xa548f3fc + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD[8+rsi] + mov r8,QWORD[rsi] + mov r9,QWORD[16+rsi] + mov r10,QWORD[40+rsi] + lea r11,[56+rsi] + lea r12,[24+rsi] + mov QWORD[32+rsp],r10 + mov QWORD[40+rsp],r11 + mov QWORD[48+rsp],r12 + mov QWORD[56+rsp],rcx + call QWORD[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret + + +section .pdata rdata align=4 +ALIGN 4 + DD $L$SEH_begin_bn_mul_mont_gather5 wrt ..imagebase + DD $L$SEH_end_bn_mul_mont_gather5 wrt ..imagebase + DD $L$SEH_info_bn_mul_mont_gather5 wrt ..imagebase + + DD $L$SEH_begin_bn_mul4x_mont_gather5 wrt ..imagebase + DD $L$SEH_end_bn_mul4x_mont_gather5 wrt ..imagebase + DD $L$SEH_info_bn_mul4x_mont_gather5 wrt ..imagebase + + DD $L$SEH_begin_bn_power5 wrt ..imagebase + DD $L$SEH_end_bn_power5 wrt ..imagebase + DD $L$SEH_info_bn_power5 wrt ..imagebase + + DD $L$SEH_begin_bn_from_mont8x wrt ..imagebase + DD $L$SEH_end_bn_from_mont8x wrt ..imagebase + DD $L$SEH_info_bn_from_mont8x wrt ..imagebase + DD $L$SEH_begin_bn_gather5 wrt ..imagebase + DD $L$SEH_end_bn_gather5 wrt ..imagebase + DD $L$SEH_info_bn_gather5 wrt ..imagebase + +section .xdata rdata align=8 +ALIGN 8 +$L$SEH_info_bn_mul_mont_gather5: +DB 9,0,0,0 + DD mul_handler wrt ..imagebase + DD $L$mul_body wrt ..imagebase,$L$mul_epilogue wrt ..imagebase +ALIGN 8 +$L$SEH_info_bn_mul4x_mont_gather5: +DB 9,0,0,0 + DD mul_handler wrt ..imagebase + DD $L$mul4x_body wrt ..imagebase,$L$mul4x_epilogue wrt ..imagebase +ALIGN 8 +$L$SEH_info_bn_power5: +DB 9,0,0,0 + DD mul_handler wrt ..imagebase + DD $L$power5_body wrt ..imagebase,$L$power5_epilogue wrt ..imagebase +ALIGN 8 +$L$SEH_info_bn_from_mont8x: +DB 9,0,0,0 + DD mul_handler wrt ..imagebase + DD $L$from_body wrt ..imagebase,$L$from_epilogue wrt ..imagebase +ALIGN 8 +$L$SEH_info_bn_gather5: +DB 0x01,0x0d,0x05,0x00 +DB 0x0d,0x78,0x01,0x00 +DB 0x08,0x68,0x00,0x00 +DB 0x04,0x42,0x00,0x00 +ALIGN 8 diff --git a/third_party/boringssl/win-x86_64/crypto/cpu-x86_64-asm.asm b/third_party/boringssl/win-x86_64/crypto/cpu-x86_64-asm.asm new file mode 100644 index 00000000000..c92d7bbc1f7 --- /dev/null +++ b/third_party/boringssl/win-x86_64/crypto/cpu-x86_64-asm.asm @@ -0,0 +1,154 @@ +default rel +%define XMMWORD +%define YMMWORD +%define ZMMWORD +section .text code align=64 + + +global OPENSSL_ia32_cpuid + +ALIGN 16 +OPENSSL_ia32_cpuid: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_OPENSSL_ia32_cpuid: + mov rdi,rcx + + + + + mov rdi,rcx + mov r8,rbx + + xor eax,eax + mov DWORD[8+rdi],eax + cpuid + mov r11d,eax + + xor eax,eax + cmp ebx,0x756e6547 + setne al + mov r9d,eax + cmp edx,0x49656e69 + setne al + or r9d,eax + cmp ecx,0x6c65746e + setne al + or r9d,eax + jz NEAR $L$intel + + cmp ebx,0x68747541 + setne al + mov r10d,eax + cmp edx,0x69746E65 + setne al + or r10d,eax + cmp ecx,0x444D4163 + setne al + or r10d,eax + jnz NEAR $L$intel + + + + + mov eax,0x80000000 + cpuid + + + cmp eax,0x80000001 + jb NEAR $L$intel + mov r10d,eax + mov eax,0x80000001 + cpuid + + + or r9d,ecx + and r9d,0x00000801 + + cmp r10d,0x80000008 + jb NEAR $L$intel + + mov eax,0x80000008 + cpuid + + movzx r10,cl + inc r10 + + mov eax,1 + cpuid + + bt edx,28 + jnc NEAR $L$generic + shr ebx,16 + cmp bl,r10b + ja NEAR $L$generic + and edx,0xefffffff + jmp NEAR $L$generic + +$L$intel: + cmp r11d,4 + mov r10d,-1 + jb NEAR $L$nocacheinfo + + mov eax,4 + mov ecx,0 + cpuid + mov r10d,eax + shr r10d,14 + and r10d,0xfff + + cmp r11d,7 + jb NEAR $L$nocacheinfo + + mov eax,7 + xor ecx,ecx + cpuid + mov DWORD[8+rdi],ebx + +$L$nocacheinfo: + mov eax,1 + cpuid + + and edx,0xbfefffff + cmp r9d,0 + jne NEAR $L$notintel + or edx,0x40000000 +$L$notintel: + bt edx,28 + jnc NEAR $L$generic + and edx,0xefffffff + cmp r10d,0 + je NEAR $L$generic + + or edx,0x10000000 + shr ebx,16 + cmp bl,1 + ja NEAR $L$generic + and edx,0xefffffff +$L$generic: + and r9d,0x00000800 + and ecx,0xfffff7ff + or r9d,ecx + + mov r10d,edx + bt r9d,27 + jnc NEAR $L$clear_avx + xor ecx,ecx +DB 0x0f,0x01,0xd0 + and eax,6 + cmp eax,6 + je NEAR $L$done +$L$clear_avx: + mov eax,0xefffe7ff + and r9d,eax + and DWORD[8+rdi],0xffffffdf +$L$done: + mov DWORD[4+rdi],r9d + mov DWORD[rdi],r10d + mov rbx,r8 + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_OPENSSL_ia32_cpuid: + diff --git a/third_party/boringssl/win-x86_64/crypto/md5/md5-x86_64.asm b/third_party/boringssl/win-x86_64/crypto/md5/md5-x86_64.asm new file mode 100644 index 00000000000..0e9d2c604eb --- /dev/null +++ b/third_party/boringssl/win-x86_64/crypto/md5/md5-x86_64.asm @@ -0,0 +1,776 @@ +default rel +%define XMMWORD +%define YMMWORD +%define ZMMWORD +section .text code align=64 + +ALIGN 16 + +global md5_block_asm_data_order + +md5_block_asm_data_order: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_md5_block_asm_data_order: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + push rbp + push rbx + push r12 + push r14 + push r15 +$L$prologue: + + + + + mov rbp,rdi + shl rdx,6 + lea rdi,[rdx*1+rsi] + mov eax,DWORD[rbp] + mov ebx,DWORD[4+rbp] + mov ecx,DWORD[8+rbp] + mov edx,DWORD[12+rbp] + + + + + + + + cmp rsi,rdi + je NEAR $L$end + + +$L$loop: + mov r8d,eax + mov r9d,ebx + mov r14d,ecx + mov r15d,edx + mov r10d,DWORD[rsi] + mov r11d,edx + xor r11d,ecx + lea eax,[((-680876936))+r10*1+rax] + and r11d,ebx + xor r11d,edx + mov r10d,DWORD[4+rsi] + add eax,r11d + rol eax,7 + mov r11d,ecx + add eax,ebx + xor r11d,ebx + lea edx,[((-389564586))+r10*1+rdx] + and r11d,eax + xor r11d,ecx + mov r10d,DWORD[8+rsi] + add edx,r11d + rol edx,12 + mov r11d,ebx + add edx,eax + xor r11d,eax + lea ecx,[606105819+r10*1+rcx] + and r11d,edx + xor r11d,ebx + mov r10d,DWORD[12+rsi] + add ecx,r11d + rol ecx,17 + mov r11d,eax + add ecx,edx + xor r11d,edx + lea ebx,[((-1044525330))+r10*1+rbx] + and r11d,ecx + xor r11d,eax + mov r10d,DWORD[16+rsi] + add ebx,r11d + rol ebx,22 + mov r11d,edx + add ebx,ecx + xor r11d,ecx + lea eax,[((-176418897))+r10*1+rax] + and r11d,ebx + xor r11d,edx + mov r10d,DWORD[20+rsi] + add eax,r11d + rol eax,7 + mov r11d,ecx + add eax,ebx + xor r11d,ebx + lea edx,[1200080426+r10*1+rdx] + and r11d,eax + xor r11d,ecx + mov r10d,DWORD[24+rsi] + add edx,r11d + rol edx,12 + mov r11d,ebx + add edx,eax + xor r11d,eax + lea ecx,[((-1473231341))+r10*1+rcx] + and r11d,edx + xor r11d,ebx + mov r10d,DWORD[28+rsi] + add ecx,r11d + rol ecx,17 + mov r11d,eax + add ecx,edx + xor r11d,edx + lea ebx,[((-45705983))+r10*1+rbx] + and r11d,ecx + xor r11d,eax + mov r10d,DWORD[32+rsi] + add ebx,r11d + rol ebx,22 + mov r11d,edx + add ebx,ecx + xor r11d,ecx + lea eax,[1770035416+r10*1+rax] + and r11d,ebx + xor r11d,edx + mov r10d,DWORD[36+rsi] + add eax,r11d + rol eax,7 + mov r11d,ecx + add eax,ebx + xor r11d,ebx + lea edx,[((-1958414417))+r10*1+rdx] + and r11d,eax + xor r11d,ecx + mov r10d,DWORD[40+rsi] + add edx,r11d + rol edx,12 + mov r11d,ebx + add edx,eax + xor r11d,eax + lea ecx,[((-42063))+r10*1+rcx] + and r11d,edx + xor r11d,ebx + mov r10d,DWORD[44+rsi] + add ecx,r11d + rol ecx,17 + mov r11d,eax + add ecx,edx + xor r11d,edx + lea ebx,[((-1990404162))+r10*1+rbx] + and r11d,ecx + xor r11d,eax + mov r10d,DWORD[48+rsi] + add ebx,r11d + rol ebx,22 + mov r11d,edx + add ebx,ecx + xor r11d,ecx + lea eax,[1804603682+r10*1+rax] + and r11d,ebx + xor r11d,edx + mov r10d,DWORD[52+rsi] + add eax,r11d + rol eax,7 + mov r11d,ecx + add eax,ebx + xor r11d,ebx + lea edx,[((-40341101))+r10*1+rdx] + and r11d,eax + xor r11d,ecx + mov r10d,DWORD[56+rsi] + add edx,r11d + rol edx,12 + mov r11d,ebx + add edx,eax + xor r11d,eax + lea ecx,[((-1502002290))+r10*1+rcx] + and r11d,edx + xor r11d,ebx + mov r10d,DWORD[60+rsi] + add ecx,r11d + rol ecx,17 + mov r11d,eax + add ecx,edx + xor r11d,edx + lea ebx,[1236535329+r10*1+rbx] + and r11d,ecx + xor r11d,eax + mov r10d,DWORD[rsi] + add ebx,r11d + rol ebx,22 + mov r11d,edx + add ebx,ecx + mov r10d,DWORD[4+rsi] + mov r11d,edx + mov r12d,edx + not r11d + lea eax,[((-165796510))+r10*1+rax] + and r12d,ebx + and r11d,ecx + mov r10d,DWORD[24+rsi] + or r12d,r11d + mov r11d,ecx + add eax,r12d + mov r12d,ecx + rol eax,5 + add eax,ebx + not r11d + lea edx,[((-1069501632))+r10*1+rdx] + and r12d,eax + and r11d,ebx + mov r10d,DWORD[44+rsi] + or r12d,r11d + mov r11d,ebx + add edx,r12d + mov r12d,ebx + rol edx,9 + add edx,eax + not r11d + lea ecx,[643717713+r10*1+rcx] + and r12d,edx + and r11d,eax + mov r10d,DWORD[rsi] + or r12d,r11d + mov r11d,eax + add ecx,r12d + mov r12d,eax + rol ecx,14 + add ecx,edx + not r11d + lea ebx,[((-373897302))+r10*1+rbx] + and r12d,ecx + and r11d,edx + mov r10d,DWORD[20+rsi] + or r12d,r11d + mov r11d,edx + add ebx,r12d + mov r12d,edx + rol ebx,20 + add ebx,ecx + not r11d + lea eax,[((-701558691))+r10*1+rax] + and r12d,ebx + and r11d,ecx + mov r10d,DWORD[40+rsi] + or r12d,r11d + mov r11d,ecx + add eax,r12d + mov r12d,ecx + rol eax,5 + add eax,ebx + not r11d + lea edx,[38016083+r10*1+rdx] + and r12d,eax + and r11d,ebx + mov r10d,DWORD[60+rsi] + or r12d,r11d + mov r11d,ebx + add edx,r12d + mov r12d,ebx + rol edx,9 + add edx,eax + not r11d + lea ecx,[((-660478335))+r10*1+rcx] + and r12d,edx + and r11d,eax + mov r10d,DWORD[16+rsi] + or r12d,r11d + mov r11d,eax + add ecx,r12d + mov r12d,eax + rol ecx,14 + add ecx,edx + not r11d + lea ebx,[((-405537848))+r10*1+rbx] + and r12d,ecx + and r11d,edx + mov r10d,DWORD[36+rsi] + or r12d,r11d + mov r11d,edx + add ebx,r12d + mov r12d,edx + rol ebx,20 + add ebx,ecx + not r11d + lea eax,[568446438+r10*1+rax] + and r12d,ebx + and r11d,ecx + mov r10d,DWORD[56+rsi] + or r12d,r11d + mov r11d,ecx + add eax,r12d + mov r12d,ecx + rol eax,5 + add eax,ebx + not r11d + lea edx,[((-1019803690))+r10*1+rdx] + and r12d,eax + and r11d,ebx + mov r10d,DWORD[12+rsi] + or r12d,r11d + mov r11d,ebx + add edx,r12d + mov r12d,ebx + rol edx,9 + add edx,eax + not r11d + lea ecx,[((-187363961))+r10*1+rcx] + and r12d,edx + and r11d,eax + mov r10d,DWORD[32+rsi] + or r12d,r11d + mov r11d,eax + add ecx,r12d + mov r12d,eax + rol ecx,14 + add ecx,edx + not r11d + lea ebx,[1163531501+r10*1+rbx] + and r12d,ecx + and r11d,edx + mov r10d,DWORD[52+rsi] + or r12d,r11d + mov r11d,edx + add ebx,r12d + mov r12d,edx + rol ebx,20 + add ebx,ecx + not r11d + lea eax,[((-1444681467))+r10*1+rax] + and r12d,ebx + and r11d,ecx + mov r10d,DWORD[8+rsi] + or r12d,r11d + mov r11d,ecx + add eax,r12d + mov r12d,ecx + rol eax,5 + add eax,ebx + not r11d + lea edx,[((-51403784))+r10*1+rdx] + and r12d,eax + and r11d,ebx + mov r10d,DWORD[28+rsi] + or r12d,r11d + mov r11d,ebx + add edx,r12d + mov r12d,ebx + rol edx,9 + add edx,eax + not r11d + lea ecx,[1735328473+r10*1+rcx] + and r12d,edx + and r11d,eax + mov r10d,DWORD[48+rsi] + or r12d,r11d + mov r11d,eax + add ecx,r12d + mov r12d,eax + rol ecx,14 + add ecx,edx + not r11d + lea ebx,[((-1926607734))+r10*1+rbx] + and r12d,ecx + and r11d,edx + mov r10d,DWORD[rsi] + or r12d,r11d + mov r11d,edx + add ebx,r12d + mov r12d,edx + rol ebx,20 + add ebx,ecx + mov r10d,DWORD[20+rsi] + mov r11d,ecx + lea eax,[((-378558))+r10*1+rax] + mov r10d,DWORD[32+rsi] + xor r11d,edx + xor r11d,ebx + add eax,r11d + rol eax,4 + mov r11d,ebx + add eax,ebx + lea edx,[((-2022574463))+r10*1+rdx] + mov r10d,DWORD[44+rsi] + xor r11d,ecx + xor r11d,eax + add edx,r11d + rol edx,11 + mov r11d,eax + add edx,eax + lea ecx,[1839030562+r10*1+rcx] + mov r10d,DWORD[56+rsi] + xor r11d,ebx + xor r11d,edx + add ecx,r11d + rol ecx,16 + mov r11d,edx + add ecx,edx + lea ebx,[((-35309556))+r10*1+rbx] + mov r10d,DWORD[4+rsi] + xor r11d,eax + xor r11d,ecx + add ebx,r11d + rol ebx,23 + mov r11d,ecx + add ebx,ecx + lea eax,[((-1530992060))+r10*1+rax] + mov r10d,DWORD[16+rsi] + xor r11d,edx + xor r11d,ebx + add eax,r11d + rol eax,4 + mov r11d,ebx + add eax,ebx + lea edx,[1272893353+r10*1+rdx] + mov r10d,DWORD[28+rsi] + xor r11d,ecx + xor r11d,eax + add edx,r11d + rol edx,11 + mov r11d,eax + add edx,eax + lea ecx,[((-155497632))+r10*1+rcx] + mov r10d,DWORD[40+rsi] + xor r11d,ebx + xor r11d,edx + add ecx,r11d + rol ecx,16 + mov r11d,edx + add ecx,edx + lea ebx,[((-1094730640))+r10*1+rbx] + mov r10d,DWORD[52+rsi] + xor r11d,eax + xor r11d,ecx + add ebx,r11d + rol ebx,23 + mov r11d,ecx + add ebx,ecx + lea eax,[681279174+r10*1+rax] + mov r10d,DWORD[rsi] + xor r11d,edx + xor r11d,ebx + add eax,r11d + rol eax,4 + mov r11d,ebx + add eax,ebx + lea edx,[((-358537222))+r10*1+rdx] + mov r10d,DWORD[12+rsi] + xor r11d,ecx + xor r11d,eax + add edx,r11d + rol edx,11 + mov r11d,eax + add edx,eax + lea ecx,[((-722521979))+r10*1+rcx] + mov r10d,DWORD[24+rsi] + xor r11d,ebx + xor r11d,edx + add ecx,r11d + rol ecx,16 + mov r11d,edx + add ecx,edx + lea ebx,[76029189+r10*1+rbx] + mov r10d,DWORD[36+rsi] + xor r11d,eax + xor r11d,ecx + add ebx,r11d + rol ebx,23 + mov r11d,ecx + add ebx,ecx + lea eax,[((-640364487))+r10*1+rax] + mov r10d,DWORD[48+rsi] + xor r11d,edx + xor r11d,ebx + add eax,r11d + rol eax,4 + mov r11d,ebx + add eax,ebx + lea edx,[((-421815835))+r10*1+rdx] + mov r10d,DWORD[60+rsi] + xor r11d,ecx + xor r11d,eax + add edx,r11d + rol edx,11 + mov r11d,eax + add edx,eax + lea ecx,[530742520+r10*1+rcx] + mov r10d,DWORD[8+rsi] + xor r11d,ebx + xor r11d,edx + add ecx,r11d + rol ecx,16 + mov r11d,edx + add ecx,edx + lea ebx,[((-995338651))+r10*1+rbx] + mov r10d,DWORD[rsi] + xor r11d,eax + xor r11d,ecx + add ebx,r11d + rol ebx,23 + mov r11d,ecx + add ebx,ecx + mov r10d,DWORD[rsi] + mov r11d,0xffffffff + xor r11d,edx + lea eax,[((-198630844))+r10*1+rax] + or r11d,ebx + xor r11d,ecx + add eax,r11d + mov r10d,DWORD[28+rsi] + mov r11d,0xffffffff + rol eax,6 + xor r11d,ecx + add eax,ebx + lea edx,[1126891415+r10*1+rdx] + or r11d,eax + xor r11d,ebx + add edx,r11d + mov r10d,DWORD[56+rsi] + mov r11d,0xffffffff + rol edx,10 + xor r11d,ebx + add edx,eax + lea ecx,[((-1416354905))+r10*1+rcx] + or r11d,edx + xor r11d,eax + add ecx,r11d + mov r10d,DWORD[20+rsi] + mov r11d,0xffffffff + rol ecx,15 + xor r11d,eax + add ecx,edx + lea ebx,[((-57434055))+r10*1+rbx] + or r11d,ecx + xor r11d,edx + add ebx,r11d + mov r10d,DWORD[48+rsi] + mov r11d,0xffffffff + rol ebx,21 + xor r11d,edx + add ebx,ecx + lea eax,[1700485571+r10*1+rax] + or r11d,ebx + xor r11d,ecx + add eax,r11d + mov r10d,DWORD[12+rsi] + mov r11d,0xffffffff + rol eax,6 + xor r11d,ecx + add eax,ebx + lea edx,[((-1894986606))+r10*1+rdx] + or r11d,eax + xor r11d,ebx + add edx,r11d + mov r10d,DWORD[40+rsi] + mov r11d,0xffffffff + rol edx,10 + xor r11d,ebx + add edx,eax + lea ecx,[((-1051523))+r10*1+rcx] + or r11d,edx + xor r11d,eax + add ecx,r11d + mov r10d,DWORD[4+rsi] + mov r11d,0xffffffff + rol ecx,15 + xor r11d,eax + add ecx,edx + lea ebx,[((-2054922799))+r10*1+rbx] + or r11d,ecx + xor r11d,edx + add ebx,r11d + mov r10d,DWORD[32+rsi] + mov r11d,0xffffffff + rol ebx,21 + xor r11d,edx + add ebx,ecx + lea eax,[1873313359+r10*1+rax] + or r11d,ebx + xor r11d,ecx + add eax,r11d + mov r10d,DWORD[60+rsi] + mov r11d,0xffffffff + rol eax,6 + xor r11d,ecx + add eax,ebx + lea edx,[((-30611744))+r10*1+rdx] + or r11d,eax + xor r11d,ebx + add edx,r11d + mov r10d,DWORD[24+rsi] + mov r11d,0xffffffff + rol edx,10 + xor r11d,ebx + add edx,eax + lea ecx,[((-1560198380))+r10*1+rcx] + or r11d,edx + xor r11d,eax + add ecx,r11d + mov r10d,DWORD[52+rsi] + mov r11d,0xffffffff + rol ecx,15 + xor r11d,eax + add ecx,edx + lea ebx,[1309151649+r10*1+rbx] + or r11d,ecx + xor r11d,edx + add ebx,r11d + mov r10d,DWORD[16+rsi] + mov r11d,0xffffffff + rol ebx,21 + xor r11d,edx + add ebx,ecx + lea eax,[((-145523070))+r10*1+rax] + or r11d,ebx + xor r11d,ecx + add eax,r11d + mov r10d,DWORD[44+rsi] + mov r11d,0xffffffff + rol eax,6 + xor r11d,ecx + add eax,ebx + lea edx,[((-1120210379))+r10*1+rdx] + or r11d,eax + xor r11d,ebx + add edx,r11d + mov r10d,DWORD[8+rsi] + mov r11d,0xffffffff + rol edx,10 + xor r11d,ebx + add edx,eax + lea ecx,[718787259+r10*1+rcx] + or r11d,edx + xor r11d,eax + add ecx,r11d + mov r10d,DWORD[36+rsi] + mov r11d,0xffffffff + rol ecx,15 + xor r11d,eax + add ecx,edx + lea ebx,[((-343485551))+r10*1+rbx] + or r11d,ecx + xor r11d,edx + add ebx,r11d + mov r10d,DWORD[rsi] + mov r11d,0xffffffff + rol ebx,21 + xor r11d,edx + add ebx,ecx + + add eax,r8d + add ebx,r9d + add ecx,r14d + add edx,r15d + + + add rsi,64 + cmp rsi,rdi + jb NEAR $L$loop + + +$L$end: + mov DWORD[rbp],eax + mov DWORD[4+rbp],ebx + mov DWORD[8+rbp],ecx + mov DWORD[12+rbp],edx + + mov r15,QWORD[rsp] + mov r14,QWORD[8+rsp] + mov r12,QWORD[16+rsp] + mov rbx,QWORD[24+rsp] + mov rbp,QWORD[32+rsp] + add rsp,40 +$L$epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_md5_block_asm_data_order: +EXTERN __imp_RtlVirtualUnwind + +ALIGN 16 +se_handler: + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD[120+r8] + mov rbx,QWORD[248+r8] + + lea r10,[$L$prologue] + cmp rbx,r10 + jb NEAR $L$in_prologue + + mov rax,QWORD[152+r8] + + lea r10,[$L$epilogue] + cmp rbx,r10 + jae NEAR $L$in_prologue + + lea rax,[40+rax] + + mov rbp,QWORD[((-8))+rax] + mov rbx,QWORD[((-16))+rax] + mov r12,QWORD[((-24))+rax] + mov r14,QWORD[((-32))+rax] + mov r15,QWORD[((-40))+rax] + mov QWORD[144+r8],rbx + mov QWORD[160+r8],rbp + mov QWORD[216+r8],r12 + mov QWORD[232+r8],r14 + mov QWORD[240+r8],r15 + +$L$in_prologue: + mov rdi,QWORD[8+rax] + mov rsi,QWORD[16+rax] + mov QWORD[152+r8],rax + mov QWORD[168+r8],rsi + mov QWORD[176+r8],rdi + + mov rdi,QWORD[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0xa548f3fc + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD[8+rsi] + mov r8,QWORD[rsi] + mov r9,QWORD[16+rsi] + mov r10,QWORD[40+rsi] + lea r11,[56+rsi] + lea r12,[24+rsi] + mov QWORD[32+rsp],r10 + mov QWORD[40+rsp],r11 + mov QWORD[48+rsp],r12 + mov QWORD[56+rsp],rcx + call QWORD[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret + + +section .pdata rdata align=4 +ALIGN 4 + DD $L$SEH_begin_md5_block_asm_data_order wrt ..imagebase + DD $L$SEH_end_md5_block_asm_data_order wrt ..imagebase + DD $L$SEH_info_md5_block_asm_data_order wrt ..imagebase + +section .xdata rdata align=8 +ALIGN 8 +$L$SEH_info_md5_block_asm_data_order: +DB 9,0,0,0 + DD se_handler wrt ..imagebase diff --git a/third_party/boringssl/win-x86_64/crypto/modes/aesni-gcm-x86_64.asm b/third_party/boringssl/win-x86_64/crypto/modes/aesni-gcm-x86_64.asm new file mode 100644 index 00000000000..d7fff6a9746 --- /dev/null +++ b/third_party/boringssl/win-x86_64/crypto/modes/aesni-gcm-x86_64.asm @@ -0,0 +1,20 @@ +default rel +%define XMMWORD +%define YMMWORD +%define ZMMWORD +section .text code align=64 + + +global aesni_gcm_encrypt + +aesni_gcm_encrypt: + xor eax,eax + DB 0F3h,0C3h ;repret + + +global aesni_gcm_decrypt + +aesni_gcm_decrypt: + xor eax,eax + DB 0F3h,0C3h ;repret + diff --git a/third_party/boringssl/win-x86_64/crypto/modes/ghash-x86_64.asm b/third_party/boringssl/win-x86_64/crypto/modes/ghash-x86_64.asm new file mode 100644 index 00000000000..5d8fadc0332 --- /dev/null +++ b/third_party/boringssl/win-x86_64/crypto/modes/ghash-x86_64.asm @@ -0,0 +1,1507 @@ +default rel +%define XMMWORD +%define YMMWORD +%define ZMMWORD +section .text code align=64 + +EXTERN OPENSSL_ia32cap_P + +global gcm_gmult_4bit + +ALIGN 16 +gcm_gmult_4bit: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_gcm_gmult_4bit: + mov rdi,rcx + mov rsi,rdx + + + push rbx + push rbp + push r12 +$L$gmult_prologue: + + movzx r8,BYTE[15+rdi] + lea r11,[$L$rem_4bit] + xor rax,rax + xor rbx,rbx + mov al,r8b + mov bl,r8b + shl al,4 + mov rcx,14 + mov r8,QWORD[8+rax*1+rsi] + mov r9,QWORD[rax*1+rsi] + and bl,0xf0 + mov rdx,r8 + jmp NEAR $L$oop1 + +ALIGN 16 +$L$oop1: + shr r8,4 + and rdx,0xf + mov r10,r9 + mov al,BYTE[rcx*1+rdi] + shr r9,4 + xor r8,QWORD[8+rbx*1+rsi] + shl r10,60 + xor r9,QWORD[rbx*1+rsi] + mov bl,al + xor r9,QWORD[rdx*8+r11] + mov rdx,r8 + shl al,4 + xor r8,r10 + dec rcx + js NEAR $L$break1 + + shr r8,4 + and rdx,0xf + mov r10,r9 + shr r9,4 + xor r8,QWORD[8+rax*1+rsi] + shl r10,60 + xor r9,QWORD[rax*1+rsi] + and bl,0xf0 + xor r9,QWORD[rdx*8+r11] + mov rdx,r8 + xor r8,r10 + jmp NEAR $L$oop1 + +ALIGN 16 +$L$break1: + shr r8,4 + and rdx,0xf + mov r10,r9 + shr r9,4 + xor r8,QWORD[8+rax*1+rsi] + shl r10,60 + xor r9,QWORD[rax*1+rsi] + and bl,0xf0 + xor r9,QWORD[rdx*8+r11] + mov rdx,r8 + xor r8,r10 + + shr r8,4 + and rdx,0xf + mov r10,r9 + shr r9,4 + xor r8,QWORD[8+rbx*1+rsi] + shl r10,60 + xor r9,QWORD[rbx*1+rsi] + xor r8,r10 + xor r9,QWORD[rdx*8+r11] + + bswap r8 + bswap r9 + mov QWORD[8+rdi],r8 + mov QWORD[rdi],r9 + + mov rbx,QWORD[16+rsp] + lea rsp,[24+rsp] +$L$gmult_epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_gcm_gmult_4bit: +global gcm_ghash_4bit + +ALIGN 16 +gcm_ghash_4bit: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_gcm_ghash_4bit: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + + + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + sub rsp,280 +$L$ghash_prologue: + mov r14,rdx + mov r15,rcx + sub rsi,-128 + lea rbp,[((16+128))+rsp] + xor edx,edx + mov r8,QWORD[((0+0-128))+rsi] + mov rax,QWORD[((0+8-128))+rsi] + mov dl,al + shr rax,4 + mov r10,r8 + shr r8,4 + mov r9,QWORD[((16+0-128))+rsi] + shl dl,4 + mov rbx,QWORD[((16+8-128))+rsi] + shl r10,60 + mov BYTE[rsp],dl + or rax,r10 + mov dl,bl + shr rbx,4 + mov r10,r9 + shr r9,4 + mov QWORD[rbp],r8 + mov r8,QWORD[((32+0-128))+rsi] + shl dl,4 + mov QWORD[((0-128))+rbp],rax + mov rax,QWORD[((32+8-128))+rsi] + shl r10,60 + mov BYTE[1+rsp],dl + or rbx,r10 + mov dl,al + shr rax,4 + mov r10,r8 + shr r8,4 + mov QWORD[8+rbp],r9 + mov r9,QWORD[((48+0-128))+rsi] + shl dl,4 + mov QWORD[((8-128))+rbp],rbx + mov rbx,QWORD[((48+8-128))+rsi] + shl r10,60 + mov BYTE[2+rsp],dl + or rax,r10 + mov dl,bl + shr rbx,4 + mov r10,r9 + shr r9,4 + mov QWORD[16+rbp],r8 + mov r8,QWORD[((64+0-128))+rsi] + shl dl,4 + mov QWORD[((16-128))+rbp],rax + mov rax,QWORD[((64+8-128))+rsi] + shl r10,60 + mov BYTE[3+rsp],dl + or rbx,r10 + mov dl,al + shr rax,4 + mov r10,r8 + shr r8,4 + mov QWORD[24+rbp],r9 + mov r9,QWORD[((80+0-128))+rsi] + shl dl,4 + mov QWORD[((24-128))+rbp],rbx + mov rbx,QWORD[((80+8-128))+rsi] + shl r10,60 + mov BYTE[4+rsp],dl + or rax,r10 + mov dl,bl + shr rbx,4 + mov r10,r9 + shr r9,4 + mov QWORD[32+rbp],r8 + mov r8,QWORD[((96+0-128))+rsi] + shl dl,4 + mov QWORD[((32-128))+rbp],rax + mov rax,QWORD[((96+8-128))+rsi] + shl r10,60 + mov BYTE[5+rsp],dl + or rbx,r10 + mov dl,al + shr rax,4 + mov r10,r8 + shr r8,4 + mov QWORD[40+rbp],r9 + mov r9,QWORD[((112+0-128))+rsi] + shl dl,4 + mov QWORD[((40-128))+rbp],rbx + mov rbx,QWORD[((112+8-128))+rsi] + shl r10,60 + mov BYTE[6+rsp],dl + or rax,r10 + mov dl,bl + shr rbx,4 + mov r10,r9 + shr r9,4 + mov QWORD[48+rbp],r8 + mov r8,QWORD[((128+0-128))+rsi] + shl dl,4 + mov QWORD[((48-128))+rbp],rax + mov rax,QWORD[((128+8-128))+rsi] + shl r10,60 + mov BYTE[7+rsp],dl + or rbx,r10 + mov dl,al + shr rax,4 + mov r10,r8 + shr r8,4 + mov QWORD[56+rbp],r9 + mov r9,QWORD[((144+0-128))+rsi] + shl dl,4 + mov QWORD[((56-128))+rbp],rbx + mov rbx,QWORD[((144+8-128))+rsi] + shl r10,60 + mov BYTE[8+rsp],dl + or rax,r10 + mov dl,bl + shr rbx,4 + mov r10,r9 + shr r9,4 + mov QWORD[64+rbp],r8 + mov r8,QWORD[((160+0-128))+rsi] + shl dl,4 + mov QWORD[((64-128))+rbp],rax + mov rax,QWORD[((160+8-128))+rsi] + shl r10,60 + mov BYTE[9+rsp],dl + or rbx,r10 + mov dl,al + shr rax,4 + mov r10,r8 + shr r8,4 + mov QWORD[72+rbp],r9 + mov r9,QWORD[((176+0-128))+rsi] + shl dl,4 + mov QWORD[((72-128))+rbp],rbx + mov rbx,QWORD[((176+8-128))+rsi] + shl r10,60 + mov BYTE[10+rsp],dl + or rax,r10 + mov dl,bl + shr rbx,4 + mov r10,r9 + shr r9,4 + mov QWORD[80+rbp],r8 + mov r8,QWORD[((192+0-128))+rsi] + shl dl,4 + mov QWORD[((80-128))+rbp],rax + mov rax,QWORD[((192+8-128))+rsi] + shl r10,60 + mov BYTE[11+rsp],dl + or rbx,r10 + mov dl,al + shr rax,4 + mov r10,r8 + shr r8,4 + mov QWORD[88+rbp],r9 + mov r9,QWORD[((208+0-128))+rsi] + shl dl,4 + mov QWORD[((88-128))+rbp],rbx + mov rbx,QWORD[((208+8-128))+rsi] + shl r10,60 + mov BYTE[12+rsp],dl + or rax,r10 + mov dl,bl + shr rbx,4 + mov r10,r9 + shr r9,4 + mov QWORD[96+rbp],r8 + mov r8,QWORD[((224+0-128))+rsi] + shl dl,4 + mov QWORD[((96-128))+rbp],rax + mov rax,QWORD[((224+8-128))+rsi] + shl r10,60 + mov BYTE[13+rsp],dl + or rbx,r10 + mov dl,al + shr rax,4 + mov r10,r8 + shr r8,4 + mov QWORD[104+rbp],r9 + mov r9,QWORD[((240+0-128))+rsi] + shl dl,4 + mov QWORD[((104-128))+rbp],rbx + mov rbx,QWORD[((240+8-128))+rsi] + shl r10,60 + mov BYTE[14+rsp],dl + or rax,r10 + mov dl,bl + shr rbx,4 + mov r10,r9 + shr r9,4 + mov QWORD[112+rbp],r8 + shl dl,4 + mov QWORD[((112-128))+rbp],rax + shl r10,60 + mov BYTE[15+rsp],dl + or rbx,r10 + mov QWORD[120+rbp],r9 + mov QWORD[((120-128))+rbp],rbx + add rsi,-128 + mov r8,QWORD[8+rdi] + mov r9,QWORD[rdi] + add r15,r14 + lea r11,[$L$rem_8bit] + jmp NEAR $L$outer_loop +ALIGN 16 +$L$outer_loop: + xor r9,QWORD[r14] + mov rdx,QWORD[8+r14] + lea r14,[16+r14] + xor rdx,r8 + mov QWORD[rdi],r9 + mov QWORD[8+rdi],rdx + shr rdx,32 + xor rax,rax + rol edx,8 + mov al,dl + movzx ebx,dl + shl al,4 + shr ebx,4 + rol edx,8 + mov r8,QWORD[8+rax*1+rsi] + mov r9,QWORD[rax*1+rsi] + mov al,dl + movzx ecx,dl + shl al,4 + movzx r12,BYTE[rbx*1+rsp] + shr ecx,4 + xor r12,r8 + mov r10,r9 + shr r8,8 + movzx r12,r12b + shr r9,8 + xor r8,QWORD[((-128))+rbx*8+rbp] + shl r10,56 + xor r9,QWORD[rbx*8+rbp] + rol edx,8 + xor r8,QWORD[8+rax*1+rsi] + xor r9,QWORD[rax*1+rsi] + mov al,dl + xor r8,r10 + movzx r12,WORD[r12*2+r11] + movzx ebx,dl + shl al,4 + movzx r13,BYTE[rcx*1+rsp] + shr ebx,4 + shl r12,48 + xor r13,r8 + mov r10,r9 + xor r9,r12 + shr r8,8 + movzx r13,r13b + shr r9,8 + xor r8,QWORD[((-128))+rcx*8+rbp] + shl r10,56 + xor r9,QWORD[rcx*8+rbp] + rol edx,8 + xor r8,QWORD[8+rax*1+rsi] + xor r9,QWORD[rax*1+rsi] + mov al,dl + xor r8,r10 + movzx r13,WORD[r13*2+r11] + movzx ecx,dl + shl al,4 + movzx r12,BYTE[rbx*1+rsp] + shr ecx,4 + shl r13,48 + xor r12,r8 + mov r10,r9 + xor r9,r13 + shr r8,8 + movzx r12,r12b + mov edx,DWORD[8+rdi] + shr r9,8 + xor r8,QWORD[((-128))+rbx*8+rbp] + shl r10,56 + xor r9,QWORD[rbx*8+rbp] + rol edx,8 + xor r8,QWORD[8+rax*1+rsi] + xor r9,QWORD[rax*1+rsi] + mov al,dl + xor r8,r10 + movzx r12,WORD[r12*2+r11] + movzx ebx,dl + shl al,4 + movzx r13,BYTE[rcx*1+rsp] + shr ebx,4 + shl r12,48 + xor r13,r8 + mov r10,r9 + xor r9,r12 + shr r8,8 + movzx r13,r13b + shr r9,8 + xor r8,QWORD[((-128))+rcx*8+rbp] + shl r10,56 + xor r9,QWORD[rcx*8+rbp] + rol edx,8 + xor r8,QWORD[8+rax*1+rsi] + xor r9,QWORD[rax*1+rsi] + mov al,dl + xor r8,r10 + movzx r13,WORD[r13*2+r11] + movzx ecx,dl + shl al,4 + movzx r12,BYTE[rbx*1+rsp] + shr ecx,4 + shl r13,48 + xor r12,r8 + mov r10,r9 + xor r9,r13 + shr r8,8 + movzx r12,r12b + shr r9,8 + xor r8,QWORD[((-128))+rbx*8+rbp] + shl r10,56 + xor r9,QWORD[rbx*8+rbp] + rol edx,8 + xor r8,QWORD[8+rax*1+rsi] + xor r9,QWORD[rax*1+rsi] + mov al,dl + xor r8,r10 + movzx r12,WORD[r12*2+r11] + movzx ebx,dl + shl al,4 + movzx r13,BYTE[rcx*1+rsp] + shr ebx,4 + shl r12,48 + xor r13,r8 + mov r10,r9 + xor r9,r12 + shr r8,8 + movzx r13,r13b + shr r9,8 + xor r8,QWORD[((-128))+rcx*8+rbp] + shl r10,56 + xor r9,QWORD[rcx*8+rbp] + rol edx,8 + xor r8,QWORD[8+rax*1+rsi] + xor r9,QWORD[rax*1+rsi] + mov al,dl + xor r8,r10 + movzx r13,WORD[r13*2+r11] + movzx ecx,dl + shl al,4 + movzx r12,BYTE[rbx*1+rsp] + shr ecx,4 + shl r13,48 + xor r12,r8 + mov r10,r9 + xor r9,r13 + shr r8,8 + movzx r12,r12b + mov edx,DWORD[4+rdi] + shr r9,8 + xor r8,QWORD[((-128))+rbx*8+rbp] + shl r10,56 + xor r9,QWORD[rbx*8+rbp] + rol edx,8 + xor r8,QWORD[8+rax*1+rsi] + xor r9,QWORD[rax*1+rsi] + mov al,dl + xor r8,r10 + movzx r12,WORD[r12*2+r11] + movzx ebx,dl + shl al,4 + movzx r13,BYTE[rcx*1+rsp] + shr ebx,4 + shl r12,48 + xor r13,r8 + mov r10,r9 + xor r9,r12 + shr r8,8 + movzx r13,r13b + shr r9,8 + xor r8,QWORD[((-128))+rcx*8+rbp] + shl r10,56 + xor r9,QWORD[rcx*8+rbp] + rol edx,8 + xor r8,QWORD[8+rax*1+rsi] + xor r9,QWORD[rax*1+rsi] + mov al,dl + xor r8,r10 + movzx r13,WORD[r13*2+r11] + movzx ecx,dl + shl al,4 + movzx r12,BYTE[rbx*1+rsp] + shr ecx,4 + shl r13,48 + xor r12,r8 + mov r10,r9 + xor r9,r13 + shr r8,8 + movzx r12,r12b + shr r9,8 + xor r8,QWORD[((-128))+rbx*8+rbp] + shl r10,56 + xor r9,QWORD[rbx*8+rbp] + rol edx,8 + xor r8,QWORD[8+rax*1+rsi] + xor r9,QWORD[rax*1+rsi] + mov al,dl + xor r8,r10 + movzx r12,WORD[r12*2+r11] + movzx ebx,dl + shl al,4 + movzx r13,BYTE[rcx*1+rsp] + shr ebx,4 + shl r12,48 + xor r13,r8 + mov r10,r9 + xor r9,r12 + shr r8,8 + movzx r13,r13b + shr r9,8 + xor r8,QWORD[((-128))+rcx*8+rbp] + shl r10,56 + xor r9,QWORD[rcx*8+rbp] + rol edx,8 + xor r8,QWORD[8+rax*1+rsi] + xor r9,QWORD[rax*1+rsi] + mov al,dl + xor r8,r10 + movzx r13,WORD[r13*2+r11] + movzx ecx,dl + shl al,4 + movzx r12,BYTE[rbx*1+rsp] + shr ecx,4 + shl r13,48 + xor r12,r8 + mov r10,r9 + xor r9,r13 + shr r8,8 + movzx r12,r12b + mov edx,DWORD[rdi] + shr r9,8 + xor r8,QWORD[((-128))+rbx*8+rbp] + shl r10,56 + xor r9,QWORD[rbx*8+rbp] + rol edx,8 + xor r8,QWORD[8+rax*1+rsi] + xor r9,QWORD[rax*1+rsi] + mov al,dl + xor r8,r10 + movzx r12,WORD[r12*2+r11] + movzx ebx,dl + shl al,4 + movzx r13,BYTE[rcx*1+rsp] + shr ebx,4 + shl r12,48 + xor r13,r8 + mov r10,r9 + xor r9,r12 + shr r8,8 + movzx r13,r13b + shr r9,8 + xor r8,QWORD[((-128))+rcx*8+rbp] + shl r10,56 + xor r9,QWORD[rcx*8+rbp] + rol edx,8 + xor r8,QWORD[8+rax*1+rsi] + xor r9,QWORD[rax*1+rsi] + mov al,dl + xor r8,r10 + movzx r13,WORD[r13*2+r11] + movzx ecx,dl + shl al,4 + movzx r12,BYTE[rbx*1+rsp] + shr ecx,4 + shl r13,48 + xor r12,r8 + mov r10,r9 + xor r9,r13 + shr r8,8 + movzx r12,r12b + shr r9,8 + xor r8,QWORD[((-128))+rbx*8+rbp] + shl r10,56 + xor r9,QWORD[rbx*8+rbp] + rol edx,8 + xor r8,QWORD[8+rax*1+rsi] + xor r9,QWORD[rax*1+rsi] + mov al,dl + xor r8,r10 + movzx r12,WORD[r12*2+r11] + movzx ebx,dl + shl al,4 + movzx r13,BYTE[rcx*1+rsp] + shr ebx,4 + shl r12,48 + xor r13,r8 + mov r10,r9 + xor r9,r12 + shr r8,8 + movzx r13,r13b + shr r9,8 + xor r8,QWORD[((-128))+rcx*8+rbp] + shl r10,56 + xor r9,QWORD[rcx*8+rbp] + rol edx,8 + xor r8,QWORD[8+rax*1+rsi] + xor r9,QWORD[rax*1+rsi] + mov al,dl + xor r8,r10 + movzx r13,WORD[r13*2+r11] + movzx ecx,dl + shl al,4 + movzx r12,BYTE[rbx*1+rsp] + and ecx,240 + shl r13,48 + xor r12,r8 + mov r10,r9 + xor r9,r13 + shr r8,8 + movzx r12,r12b + mov edx,DWORD[((-4))+rdi] + shr r9,8 + xor r8,QWORD[((-128))+rbx*8+rbp] + shl r10,56 + xor r9,QWORD[rbx*8+rbp] + movzx r12,WORD[r12*2+r11] + xor r8,QWORD[8+rax*1+rsi] + xor r9,QWORD[rax*1+rsi] + shl r12,48 + xor r8,r10 + xor r9,r12 + movzx r13,r8b + shr r8,4 + mov r10,r9 + shl r13b,4 + shr r9,4 + xor r8,QWORD[8+rcx*1+rsi] + movzx r13,WORD[r13*2+r11] + shl r10,60 + xor r9,QWORD[rcx*1+rsi] + xor r8,r10 + shl r13,48 + bswap r8 + xor r9,r13 + bswap r9 + cmp r14,r15 + jb NEAR $L$outer_loop + mov QWORD[8+rdi],r8 + mov QWORD[rdi],r9 + + lea rsi,[280+rsp] + mov r15,QWORD[rsi] + mov r14,QWORD[8+rsi] + mov r13,QWORD[16+rsi] + mov r12,QWORD[24+rsi] + mov rbp,QWORD[32+rsi] + mov rbx,QWORD[40+rsi] + lea rsp,[48+rsi] +$L$ghash_epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_gcm_ghash_4bit: +global gcm_init_clmul + +ALIGN 16 +gcm_init_clmul: +$L$_init_clmul: +$L$SEH_begin_gcm_init_clmul: + +DB 0x48,0x83,0xec,0x18 +DB 0x0f,0x29,0x34,0x24 + movdqu xmm2,XMMWORD[rdx] + pshufd xmm2,xmm2,78 + + + pshufd xmm4,xmm2,255 + movdqa xmm3,xmm2 + psllq xmm2,1 + pxor xmm5,xmm5 + psrlq xmm3,63 + pcmpgtd xmm5,xmm4 + pslldq xmm3,8 + por xmm2,xmm3 + + + pand xmm5,XMMWORD[$L$0x1c2_polynomial] + pxor xmm2,xmm5 + + + pshufd xmm6,xmm2,78 + movdqa xmm0,xmm2 + pxor xmm6,xmm2 + movdqa xmm1,xmm0 + pshufd xmm3,xmm0,78 + pxor xmm3,xmm0 +DB 102,15,58,68,194,0 +DB 102,15,58,68,202,17 +DB 102,15,58,68,222,0 + pxor xmm3,xmm0 + pxor xmm3,xmm1 + + movdqa xmm4,xmm3 + psrldq xmm3,8 + pslldq xmm4,8 + pxor xmm1,xmm3 + pxor xmm0,xmm4 + + movdqa xmm4,xmm0 + movdqa xmm3,xmm0 + psllq xmm0,5 + pxor xmm3,xmm0 + psllq xmm0,1 + pxor xmm0,xmm3 + psllq xmm0,57 + movdqa xmm3,xmm0 + pslldq xmm0,8 + psrldq xmm3,8 + pxor xmm0,xmm4 + pxor xmm1,xmm3 + + + movdqa xmm4,xmm0 + psrlq xmm0,1 + pxor xmm1,xmm4 + pxor xmm4,xmm0 + psrlq xmm0,5 + pxor xmm0,xmm4 + psrlq xmm0,1 + pxor xmm0,xmm1 + pshufd xmm3,xmm2,78 + pshufd xmm4,xmm0,78 + pxor xmm3,xmm2 + movdqu XMMWORD[rcx],xmm2 + pxor xmm4,xmm0 + movdqu XMMWORD[16+rcx],xmm0 +DB 102,15,58,15,227,8 + movdqu XMMWORD[32+rcx],xmm4 + movdqa xmm1,xmm0 + pshufd xmm3,xmm0,78 + pxor xmm3,xmm0 +DB 102,15,58,68,194,0 +DB 102,15,58,68,202,17 +DB 102,15,58,68,222,0 + pxor xmm3,xmm0 + pxor xmm3,xmm1 + + movdqa xmm4,xmm3 + psrldq xmm3,8 + pslldq xmm4,8 + pxor xmm1,xmm3 + pxor xmm0,xmm4 + + movdqa xmm4,xmm0 + movdqa xmm3,xmm0 + psllq xmm0,5 + pxor xmm3,xmm0 + psllq xmm0,1 + pxor xmm0,xmm3 + psllq xmm0,57 + movdqa xmm3,xmm0 + pslldq xmm0,8 + psrldq xmm3,8 + pxor xmm0,xmm4 + pxor xmm1,xmm3 + + + movdqa xmm4,xmm0 + psrlq xmm0,1 + pxor xmm1,xmm4 + pxor xmm4,xmm0 + psrlq xmm0,5 + pxor xmm0,xmm4 + psrlq xmm0,1 + pxor xmm0,xmm1 + movdqa xmm5,xmm0 + movdqa xmm1,xmm0 + pshufd xmm3,xmm0,78 + pxor xmm3,xmm0 +DB 102,15,58,68,194,0 +DB 102,15,58,68,202,17 +DB 102,15,58,68,222,0 + pxor xmm3,xmm0 + pxor xmm3,xmm1 + + movdqa xmm4,xmm3 + psrldq xmm3,8 + pslldq xmm4,8 + pxor xmm1,xmm3 + pxor xmm0,xmm4 + + movdqa xmm4,xmm0 + movdqa xmm3,xmm0 + psllq xmm0,5 + pxor xmm3,xmm0 + psllq xmm0,1 + pxor xmm0,xmm3 + psllq xmm0,57 + movdqa xmm3,xmm0 + pslldq xmm0,8 + psrldq xmm3,8 + pxor xmm0,xmm4 + pxor xmm1,xmm3 + + + movdqa xmm4,xmm0 + psrlq xmm0,1 + pxor xmm1,xmm4 + pxor xmm4,xmm0 + psrlq xmm0,5 + pxor xmm0,xmm4 + psrlq xmm0,1 + pxor xmm0,xmm1 + pshufd xmm3,xmm5,78 + pshufd xmm4,xmm0,78 + pxor xmm3,xmm5 + movdqu XMMWORD[48+rcx],xmm5 + pxor xmm4,xmm0 + movdqu XMMWORD[64+rcx],xmm0 +DB 102,15,58,15,227,8 + movdqu XMMWORD[80+rcx],xmm4 + movaps xmm6,XMMWORD[rsp] + lea rsp,[24+rsp] +$L$SEH_end_gcm_init_clmul: + DB 0F3h,0C3h ;repret + +global gcm_gmult_clmul + +ALIGN 16 +gcm_gmult_clmul: +$L$_gmult_clmul: + movdqu xmm0,XMMWORD[rcx] + movdqa xmm5,XMMWORD[$L$bswap_mask] + movdqu xmm2,XMMWORD[rdx] + movdqu xmm4,XMMWORD[32+rdx] +DB 102,15,56,0,197 + movdqa xmm1,xmm0 + pshufd xmm3,xmm0,78 + pxor xmm3,xmm0 +DB 102,15,58,68,194,0 +DB 102,15,58,68,202,17 +DB 102,15,58,68,220,0 + pxor xmm3,xmm0 + pxor xmm3,xmm1 + + movdqa xmm4,xmm3 + psrldq xmm3,8 + pslldq xmm4,8 + pxor xmm1,xmm3 + pxor xmm0,xmm4 + + movdqa xmm4,xmm0 + movdqa xmm3,xmm0 + psllq xmm0,5 + pxor xmm3,xmm0 + psllq xmm0,1 + pxor xmm0,xmm3 + psllq xmm0,57 + movdqa xmm3,xmm0 + pslldq xmm0,8 + psrldq xmm3,8 + pxor xmm0,xmm4 + pxor xmm1,xmm3 + + + movdqa xmm4,xmm0 + psrlq xmm0,1 + pxor xmm1,xmm4 + pxor xmm4,xmm0 + psrlq xmm0,5 + pxor xmm0,xmm4 + psrlq xmm0,1 + pxor xmm0,xmm1 +DB 102,15,56,0,197 + movdqu XMMWORD[rcx],xmm0 + DB 0F3h,0C3h ;repret + +global gcm_ghash_clmul + +ALIGN 32 +gcm_ghash_clmul: +$L$_ghash_clmul: + lea rax,[((-136))+rsp] +$L$SEH_begin_gcm_ghash_clmul: + +DB 0x48,0x8d,0x60,0xe0 +DB 0x0f,0x29,0x70,0xe0 +DB 0x0f,0x29,0x78,0xf0 +DB 0x44,0x0f,0x29,0x00 +DB 0x44,0x0f,0x29,0x48,0x10 +DB 0x44,0x0f,0x29,0x50,0x20 +DB 0x44,0x0f,0x29,0x58,0x30 +DB 0x44,0x0f,0x29,0x60,0x40 +DB 0x44,0x0f,0x29,0x68,0x50 +DB 0x44,0x0f,0x29,0x70,0x60 +DB 0x44,0x0f,0x29,0x78,0x70 + movdqa xmm10,XMMWORD[$L$bswap_mask] + + movdqu xmm0,XMMWORD[rcx] + movdqu xmm2,XMMWORD[rdx] + movdqu xmm7,XMMWORD[32+rdx] +DB 102,65,15,56,0,194 + + sub r9,0x10 + jz NEAR $L$odd_tail + + movdqu xmm6,XMMWORD[16+rdx] + mov eax,DWORD[((OPENSSL_ia32cap_P+4))] + cmp r9,0x30 + jb NEAR $L$skip4x + + and eax,71303168 + cmp eax,4194304 + je NEAR $L$skip4x + + sub r9,0x30 + mov rax,0xA040608020C0E000 + movdqu xmm14,XMMWORD[48+rdx] + movdqu xmm15,XMMWORD[64+rdx] + + + + + movdqu xmm3,XMMWORD[48+r8] + movdqu xmm11,XMMWORD[32+r8] +DB 102,65,15,56,0,218 +DB 102,69,15,56,0,218 + movdqa xmm5,xmm3 + pshufd xmm4,xmm3,78 + pxor xmm4,xmm3 +DB 102,15,58,68,218,0 +DB 102,15,58,68,234,17 +DB 102,15,58,68,231,0 + + movdqa xmm13,xmm11 + pshufd xmm12,xmm11,78 + pxor xmm12,xmm11 +DB 102,68,15,58,68,222,0 +DB 102,68,15,58,68,238,17 +DB 102,68,15,58,68,231,16 + xorps xmm3,xmm11 + xorps xmm5,xmm13 + movups xmm7,XMMWORD[80+rdx] + xorps xmm4,xmm12 + + movdqu xmm11,XMMWORD[16+r8] + movdqu xmm8,XMMWORD[r8] +DB 102,69,15,56,0,218 +DB 102,69,15,56,0,194 + movdqa xmm13,xmm11 + pshufd xmm12,xmm11,78 + pxor xmm0,xmm8 + pxor xmm12,xmm11 +DB 102,69,15,58,68,222,0 + movdqa xmm1,xmm0 + pshufd xmm8,xmm0,78 + pxor xmm8,xmm0 +DB 102,69,15,58,68,238,17 +DB 102,68,15,58,68,231,0 + xorps xmm3,xmm11 + xorps xmm5,xmm13 + + lea r8,[64+r8] + sub r9,0x40 + jc NEAR $L$tail4x + + jmp NEAR $L$mod4_loop +ALIGN 32 +$L$mod4_loop: +DB 102,65,15,58,68,199,0 + xorps xmm4,xmm12 + movdqu xmm11,XMMWORD[48+r8] +DB 102,69,15,56,0,218 +DB 102,65,15,58,68,207,17 + xorps xmm0,xmm3 + movdqu xmm3,XMMWORD[32+r8] + movdqa xmm13,xmm11 +DB 102,68,15,58,68,199,16 + pshufd xmm12,xmm11,78 + xorps xmm1,xmm5 + pxor xmm12,xmm11 +DB 102,65,15,56,0,218 + movups xmm7,XMMWORD[32+rdx] + xorps xmm8,xmm4 +DB 102,68,15,58,68,218,0 + pshufd xmm4,xmm3,78 + + pxor xmm8,xmm0 + movdqa xmm5,xmm3 + pxor xmm8,xmm1 + pxor xmm4,xmm3 + movdqa xmm9,xmm8 +DB 102,68,15,58,68,234,17 + pslldq xmm8,8 + psrldq xmm9,8 + pxor xmm0,xmm8 + movdqa xmm8,XMMWORD[$L$7_mask] + pxor xmm1,xmm9 +DB 102,76,15,110,200 + + pand xmm8,xmm0 +DB 102,69,15,56,0,200 + pxor xmm9,xmm0 +DB 102,68,15,58,68,231,0 + psllq xmm9,57 + movdqa xmm8,xmm9 + pslldq xmm9,8 +DB 102,15,58,68,222,0 + psrldq xmm8,8 + pxor xmm0,xmm9 + pxor xmm1,xmm8 + movdqu xmm8,XMMWORD[r8] + + movdqa xmm9,xmm0 + psrlq xmm0,1 +DB 102,15,58,68,238,17 + xorps xmm3,xmm11 + movdqu xmm11,XMMWORD[16+r8] +DB 102,69,15,56,0,218 +DB 102,15,58,68,231,16 + xorps xmm5,xmm13 + movups xmm7,XMMWORD[80+rdx] +DB 102,69,15,56,0,194 + pxor xmm1,xmm9 + pxor xmm9,xmm0 + psrlq xmm0,5 + + movdqa xmm13,xmm11 + pxor xmm4,xmm12 + pshufd xmm12,xmm11,78 + pxor xmm0,xmm9 + pxor xmm1,xmm8 + pxor xmm12,xmm11 +DB 102,69,15,58,68,222,0 + psrlq xmm0,1 + pxor xmm0,xmm1 + movdqa xmm1,xmm0 +DB 102,69,15,58,68,238,17 + xorps xmm3,xmm11 + pshufd xmm8,xmm0,78 + pxor xmm8,xmm0 + +DB 102,68,15,58,68,231,0 + xorps xmm5,xmm13 + + lea r8,[64+r8] + sub r9,0x40 + jnc NEAR $L$mod4_loop + +$L$tail4x: +DB 102,65,15,58,68,199,0 +DB 102,65,15,58,68,207,17 +DB 102,68,15,58,68,199,16 + xorps xmm4,xmm12 + xorps xmm0,xmm3 + xorps xmm1,xmm5 + pxor xmm1,xmm0 + pxor xmm8,xmm4 + + pxor xmm8,xmm1 + pxor xmm1,xmm0 + + movdqa xmm9,xmm8 + psrldq xmm8,8 + pslldq xmm9,8 + pxor xmm1,xmm8 + pxor xmm0,xmm9 + + movdqa xmm4,xmm0 + movdqa xmm3,xmm0 + psllq xmm0,5 + pxor xmm3,xmm0 + psllq xmm0,1 + pxor xmm0,xmm3 + psllq xmm0,57 + movdqa xmm3,xmm0 + pslldq xmm0,8 + psrldq xmm3,8 + pxor xmm0,xmm4 + pxor xmm1,xmm3 + + + movdqa xmm4,xmm0 + psrlq xmm0,1 + pxor xmm1,xmm4 + pxor xmm4,xmm0 + psrlq xmm0,5 + pxor xmm0,xmm4 + psrlq xmm0,1 + pxor xmm0,xmm1 + add r9,0x40 + jz NEAR $L$done + movdqu xmm7,XMMWORD[32+rdx] + sub r9,0x10 + jz NEAR $L$odd_tail +$L$skip4x: + + + + + + movdqu xmm8,XMMWORD[r8] + movdqu xmm3,XMMWORD[16+r8] +DB 102,69,15,56,0,194 +DB 102,65,15,56,0,218 + pxor xmm0,xmm8 + + movdqa xmm5,xmm3 + pshufd xmm4,xmm3,78 + pxor xmm4,xmm3 +DB 102,15,58,68,218,0 +DB 102,15,58,68,234,17 +DB 102,15,58,68,231,0 + + lea r8,[32+r8] + nop + sub r9,0x20 + jbe NEAR $L$even_tail + nop + jmp NEAR $L$mod_loop + +ALIGN 32 +$L$mod_loop: + movdqa xmm1,xmm0 + movdqa xmm8,xmm4 + pshufd xmm4,xmm0,78 + pxor xmm4,xmm0 + +DB 102,15,58,68,198,0 +DB 102,15,58,68,206,17 +DB 102,15,58,68,231,16 + + pxor xmm0,xmm3 + pxor xmm1,xmm5 + movdqu xmm9,XMMWORD[r8] + pxor xmm8,xmm0 +DB 102,69,15,56,0,202 + movdqu xmm3,XMMWORD[16+r8] + + pxor xmm8,xmm1 + pxor xmm1,xmm9 + pxor xmm4,xmm8 +DB 102,65,15,56,0,218 + movdqa xmm8,xmm4 + psrldq xmm8,8 + pslldq xmm4,8 + pxor xmm1,xmm8 + pxor xmm0,xmm4 + + movdqa xmm5,xmm3 + + movdqa xmm9,xmm0 + movdqa xmm8,xmm0 + psllq xmm0,5 + pxor xmm8,xmm0 +DB 102,15,58,68,218,0 + psllq xmm0,1 + pxor xmm0,xmm8 + psllq xmm0,57 + movdqa xmm8,xmm0 + pslldq xmm0,8 + psrldq xmm8,8 + pxor xmm0,xmm9 + pshufd xmm4,xmm5,78 + pxor xmm1,xmm8 + pxor xmm4,xmm5 + + movdqa xmm9,xmm0 + psrlq xmm0,1 +DB 102,15,58,68,234,17 + pxor xmm1,xmm9 + pxor xmm9,xmm0 + psrlq xmm0,5 + pxor xmm0,xmm9 + lea r8,[32+r8] + psrlq xmm0,1 +DB 102,15,58,68,231,0 + pxor xmm0,xmm1 + + sub r9,0x20 + ja NEAR $L$mod_loop + +$L$even_tail: + movdqa xmm1,xmm0 + movdqa xmm8,xmm4 + pshufd xmm4,xmm0,78 + pxor xmm4,xmm0 + +DB 102,15,58,68,198,0 +DB 102,15,58,68,206,17 +DB 102,15,58,68,231,16 + + pxor xmm0,xmm3 + pxor xmm1,xmm5 + pxor xmm8,xmm0 + pxor xmm8,xmm1 + pxor xmm4,xmm8 + movdqa xmm8,xmm4 + psrldq xmm8,8 + pslldq xmm4,8 + pxor xmm1,xmm8 + pxor xmm0,xmm4 + + movdqa xmm4,xmm0 + movdqa xmm3,xmm0 + psllq xmm0,5 + pxor xmm3,xmm0 + psllq xmm0,1 + pxor xmm0,xmm3 + psllq xmm0,57 + movdqa xmm3,xmm0 + pslldq xmm0,8 + psrldq xmm3,8 + pxor xmm0,xmm4 + pxor xmm1,xmm3 + + + movdqa xmm4,xmm0 + psrlq xmm0,1 + pxor xmm1,xmm4 + pxor xmm4,xmm0 + psrlq xmm0,5 + pxor xmm0,xmm4 + psrlq xmm0,1 + pxor xmm0,xmm1 + test r9,r9 + jnz NEAR $L$done + +$L$odd_tail: + movdqu xmm8,XMMWORD[r8] +DB 102,69,15,56,0,194 + pxor xmm0,xmm8 + movdqa xmm1,xmm0 + pshufd xmm3,xmm0,78 + pxor xmm3,xmm0 +DB 102,15,58,68,194,0 +DB 102,15,58,68,202,17 +DB 102,15,58,68,223,0 + pxor xmm3,xmm0 + pxor xmm3,xmm1 + + movdqa xmm4,xmm3 + psrldq xmm3,8 + pslldq xmm4,8 + pxor xmm1,xmm3 + pxor xmm0,xmm4 + + movdqa xmm4,xmm0 + movdqa xmm3,xmm0 + psllq xmm0,5 + pxor xmm3,xmm0 + psllq xmm0,1 + pxor xmm0,xmm3 + psllq xmm0,57 + movdqa xmm3,xmm0 + pslldq xmm0,8 + psrldq xmm3,8 + pxor xmm0,xmm4 + pxor xmm1,xmm3 + + + movdqa xmm4,xmm0 + psrlq xmm0,1 + pxor xmm1,xmm4 + pxor xmm4,xmm0 + psrlq xmm0,5 + pxor xmm0,xmm4 + psrlq xmm0,1 + pxor xmm0,xmm1 +$L$done: +DB 102,65,15,56,0,194 + movdqu XMMWORD[rcx],xmm0 + movaps xmm6,XMMWORD[rsp] + movaps xmm7,XMMWORD[16+rsp] + movaps xmm8,XMMWORD[32+rsp] + movaps xmm9,XMMWORD[48+rsp] + movaps xmm10,XMMWORD[64+rsp] + movaps xmm11,XMMWORD[80+rsp] + movaps xmm12,XMMWORD[96+rsp] + movaps xmm13,XMMWORD[112+rsp] + movaps xmm14,XMMWORD[128+rsp] + movaps xmm15,XMMWORD[144+rsp] + lea rsp,[168+rsp] +$L$SEH_end_gcm_ghash_clmul: + DB 0F3h,0C3h ;repret + +global gcm_init_avx + +ALIGN 32 +gcm_init_avx: + jmp NEAR $L$_init_clmul + +global gcm_gmult_avx + +ALIGN 32 +gcm_gmult_avx: + jmp NEAR $L$_gmult_clmul + +global gcm_ghash_avx + +ALIGN 32 +gcm_ghash_avx: + jmp NEAR $L$_ghash_clmul + +ALIGN 64 +$L$bswap_mask: +DB 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +$L$0x1c2_polynomial: +DB 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2 +$L$7_mask: + DD 7,0,7,0 +$L$7_mask_poly: + DD 7,0,450,0 +ALIGN 64 + +$L$rem_4bit: + DD 0,0,0,471859200,0,943718400,0,610271232 + DD 0,1887436800,0,1822425088,0,1220542464,0,1423966208 + DD 0,3774873600,0,4246732800,0,3644850176,0,3311403008 + DD 0,2441084928,0,2376073216,0,2847932416,0,3051356160 + +$L$rem_8bit: + DW 0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E + DW 0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E + DW 0x1C20,0x1DE2,0x1FA4,0x1E66,0x1B28,0x1AEA,0x18AC,0x196E + DW 0x1230,0x13F2,0x11B4,0x1076,0x1538,0x14FA,0x16BC,0x177E + DW 0x3840,0x3982,0x3BC4,0x3A06,0x3F48,0x3E8A,0x3CCC,0x3D0E + DW 0x3650,0x3792,0x35D4,0x3416,0x3158,0x309A,0x32DC,0x331E + DW 0x2460,0x25A2,0x27E4,0x2626,0x2368,0x22AA,0x20EC,0x212E + DW 0x2A70,0x2BB2,0x29F4,0x2836,0x2D78,0x2CBA,0x2EFC,0x2F3E + DW 0x7080,0x7142,0x7304,0x72C6,0x7788,0x764A,0x740C,0x75CE + DW 0x7E90,0x7F52,0x7D14,0x7CD6,0x7998,0x785A,0x7A1C,0x7BDE + DW 0x6CA0,0x6D62,0x6F24,0x6EE6,0x6BA8,0x6A6A,0x682C,0x69EE + DW 0x62B0,0x6372,0x6134,0x60F6,0x65B8,0x647A,0x663C,0x67FE + DW 0x48C0,0x4902,0x4B44,0x4A86,0x4FC8,0x4E0A,0x4C4C,0x4D8E + DW 0x46D0,0x4712,0x4554,0x4496,0x41D8,0x401A,0x425C,0x439E + DW 0x54E0,0x5522,0x5764,0x56A6,0x53E8,0x522A,0x506C,0x51AE + DW 0x5AF0,0x5B32,0x5974,0x58B6,0x5DF8,0x5C3A,0x5E7C,0x5FBE + DW 0xE100,0xE0C2,0xE284,0xE346,0xE608,0xE7CA,0xE58C,0xE44E + DW 0xEF10,0xEED2,0xEC94,0xED56,0xE818,0xE9DA,0xEB9C,0xEA5E + DW 0xFD20,0xFCE2,0xFEA4,0xFF66,0xFA28,0xFBEA,0xF9AC,0xF86E + DW 0xF330,0xF2F2,0xF0B4,0xF176,0xF438,0xF5FA,0xF7BC,0xF67E + DW 0xD940,0xD882,0xDAC4,0xDB06,0xDE48,0xDF8A,0xDDCC,0xDC0E + DW 0xD750,0xD692,0xD4D4,0xD516,0xD058,0xD19A,0xD3DC,0xD21E + DW 0xC560,0xC4A2,0xC6E4,0xC726,0xC268,0xC3AA,0xC1EC,0xC02E + DW 0xCB70,0xCAB2,0xC8F4,0xC936,0xCC78,0xCDBA,0xCFFC,0xCE3E + DW 0x9180,0x9042,0x9204,0x93C6,0x9688,0x974A,0x950C,0x94CE + DW 0x9F90,0x9E52,0x9C14,0x9DD6,0x9898,0x995A,0x9B1C,0x9ADE + DW 0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE + DW 0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE + DW 0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E + DW 0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E + DW 0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE + DW 0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE + +DB 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52 +DB 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32 +DB 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111 +DB 114,103,62,0 +ALIGN 64 +EXTERN __imp_RtlVirtualUnwind + +ALIGN 16 +se_handler: + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD[120+r8] + mov rbx,QWORD[248+r8] + + mov rsi,QWORD[8+r9] + mov r11,QWORD[56+r9] + + mov r10d,DWORD[r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jb NEAR $L$in_prologue + + mov rax,QWORD[152+r8] + + mov r10d,DWORD[4+r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jae NEAR $L$in_prologue + + lea rax,[24+rax] + + mov rbx,QWORD[((-8))+rax] + mov rbp,QWORD[((-16))+rax] + mov r12,QWORD[((-24))+rax] + mov QWORD[144+r8],rbx + mov QWORD[160+r8],rbp + mov QWORD[216+r8],r12 + +$L$in_prologue: + mov rdi,QWORD[8+rax] + mov rsi,QWORD[16+rax] + mov QWORD[152+r8],rax + mov QWORD[168+r8],rsi + mov QWORD[176+r8],rdi + + mov rdi,QWORD[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0xa548f3fc + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD[8+rsi] + mov r8,QWORD[rsi] + mov r9,QWORD[16+rsi] + mov r10,QWORD[40+rsi] + lea r11,[56+rsi] + lea r12,[24+rsi] + mov QWORD[32+rsp],r10 + mov QWORD[40+rsp],r11 + mov QWORD[48+rsp],r12 + mov QWORD[56+rsp],rcx + call QWORD[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret + + +section .pdata rdata align=4 +ALIGN 4 + DD $L$SEH_begin_gcm_gmult_4bit wrt ..imagebase + DD $L$SEH_end_gcm_gmult_4bit wrt ..imagebase + DD $L$SEH_info_gcm_gmult_4bit wrt ..imagebase + + DD $L$SEH_begin_gcm_ghash_4bit wrt ..imagebase + DD $L$SEH_end_gcm_ghash_4bit wrt ..imagebase + DD $L$SEH_info_gcm_ghash_4bit wrt ..imagebase + + DD $L$SEH_begin_gcm_init_clmul wrt ..imagebase + DD $L$SEH_end_gcm_init_clmul wrt ..imagebase + DD $L$SEH_info_gcm_init_clmul wrt ..imagebase + + DD $L$SEH_begin_gcm_ghash_clmul wrt ..imagebase + DD $L$SEH_end_gcm_ghash_clmul wrt ..imagebase + DD $L$SEH_info_gcm_ghash_clmul wrt ..imagebase +section .xdata rdata align=8 +ALIGN 8 +$L$SEH_info_gcm_gmult_4bit: +DB 9,0,0,0 + DD se_handler wrt ..imagebase + DD $L$gmult_prologue wrt ..imagebase,$L$gmult_epilogue wrt ..imagebase +$L$SEH_info_gcm_ghash_4bit: +DB 9,0,0,0 + DD se_handler wrt ..imagebase + DD $L$ghash_prologue wrt ..imagebase,$L$ghash_epilogue wrt ..imagebase +$L$SEH_info_gcm_init_clmul: +DB 0x01,0x08,0x03,0x00 +DB 0x08,0x68,0x00,0x00 +DB 0x04,0x22,0x00,0x00 +$L$SEH_info_gcm_ghash_clmul: +DB 0x01,0x33,0x16,0x00 +DB 0x33,0xf8,0x09,0x00 +DB 0x2e,0xe8,0x08,0x00 +DB 0x29,0xd8,0x07,0x00 +DB 0x24,0xc8,0x06,0x00 +DB 0x1f,0xb8,0x05,0x00 +DB 0x1a,0xa8,0x04,0x00 +DB 0x15,0x98,0x03,0x00 +DB 0x10,0x88,0x02,0x00 +DB 0x0c,0x78,0x01,0x00 +DB 0x08,0x68,0x00,0x00 +DB 0x04,0x01,0x15,0x00 diff --git a/third_party/boringssl/win-x86_64/crypto/rand/rdrand-x86_64.asm b/third_party/boringssl/win-x86_64/crypto/rand/rdrand-x86_64.asm new file mode 100644 index 00000000000..4c03791b48e --- /dev/null +++ b/third_party/boringssl/win-x86_64/crypto/rand/rdrand-x86_64.asm @@ -0,0 +1,70 @@ +default rel +%define XMMWORD +%define YMMWORD +%define ZMMWORD +section .text code align=64 + + + + + +global CRYPTO_rdrand + +ALIGN 16 +CRYPTO_rdrand: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_CRYPTO_rdrand: + mov rdi,rcx + + + xor rax,rax + + +DB 0x48,0x0f,0xc7,0xf1 + + adc rax,rax + mov QWORD[rdi],rcx + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret + + + + + +global CRYPTO_rdrand_multiple8_buf + +ALIGN 16 +CRYPTO_rdrand_multiple8_buf: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_CRYPTO_rdrand_multiple8_buf: + mov rdi,rcx + mov rsi,rdx + + + test rsi,rsi + jz NEAR $L$out + mov rdx,8 +$L$loop: + + +DB 0x48,0x0f,0xc7,0xf1 + jnc NEAR $L$err + mov QWORD[rdi],rcx + add rdi,rdx + sub rsi,rdx + jnz NEAR $L$loop +$L$out: + mov rax,1 + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret +$L$err: + xor rax,rax + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret diff --git a/third_party/boringssl/win-x86_64/crypto/rc4/rc4-md5-x86_64.asm b/third_party/boringssl/win-x86_64/crypto/rc4/rc4-md5-x86_64.asm new file mode 100644 index 00000000000..f1ea9652d92 --- /dev/null +++ b/third_party/boringssl/win-x86_64/crypto/rc4/rc4-md5-x86_64.asm @@ -0,0 +1,1372 @@ +default rel +%define XMMWORD +%define YMMWORD +%define ZMMWORD +section .text code align=64 + +ALIGN 16 + +global rc4_md5_enc + +rc4_md5_enc: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_rc4_md5_enc: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD[40+rsp] + mov r9,QWORD[48+rsp] + + + cmp r9,0 + je NEAR $L$abort + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + sub rsp,40 +$L$body: + mov r11,rcx + mov r12,r9 + mov r13,rsi + mov r14,rdx + mov r15,r8 + xor rbp,rbp + xor rcx,rcx + + lea rdi,[8+rdi] + mov bpl,BYTE[((-8))+rdi] + mov cl,BYTE[((-4))+rdi] + + inc bpl + sub r14,r13 + mov eax,DWORD[rbp*4+rdi] + add cl,al + lea rsi,[rbp*4+rdi] + shl r12,6 + add r12,r15 + mov QWORD[16+rsp],r12 + + mov QWORD[24+rsp],r11 + mov r8d,DWORD[r11] + mov r9d,DWORD[4+r11] + mov r10d,DWORD[8+r11] + mov r11d,DWORD[12+r11] + jmp NEAR $L$oop + +ALIGN 16 +$L$oop: + mov DWORD[rsp],r8d + mov DWORD[4+rsp],r9d + mov DWORD[8+rsp],r10d + mov r12d,r11d + mov DWORD[12+rsp],r11d + pxor xmm0,xmm0 + mov edx,DWORD[rcx*4+rdi] + xor r12d,r10d + mov DWORD[rcx*4+rdi],eax + and r12d,r9d + add r8d,DWORD[r15] + add al,dl + mov ebx,DWORD[4+rsi] + add r8d,3614090360 + xor r12d,r11d + movzx eax,al + mov DWORD[rsi],edx + add r8d,r12d + add cl,bl + rol r8d,7 + mov r12d,r10d + movd xmm0,DWORD[rax*4+rdi] + + add r8d,r9d + pxor xmm1,xmm1 + mov edx,DWORD[rcx*4+rdi] + xor r12d,r9d + mov DWORD[rcx*4+rdi],ebx + and r12d,r8d + add r11d,DWORD[4+r15] + add bl,dl + mov eax,DWORD[8+rsi] + add r11d,3905402710 + xor r12d,r10d + movzx ebx,bl + mov DWORD[4+rsi],edx + add r11d,r12d + add cl,al + rol r11d,12 + mov r12d,r9d + movd xmm1,DWORD[rbx*4+rdi] + + add r11d,r8d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r8d + mov DWORD[rcx*4+rdi],eax + and r12d,r11d + add r10d,DWORD[8+r15] + add al,dl + mov ebx,DWORD[12+rsi] + add r10d,606105819 + xor r12d,r9d + movzx eax,al + mov DWORD[8+rsi],edx + add r10d,r12d + add cl,bl + rol r10d,17 + mov r12d,r8d + pinsrw xmm0,WORD[rax*4+rdi],1 + + add r10d,r11d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r11d + mov DWORD[rcx*4+rdi],ebx + and r12d,r10d + add r9d,DWORD[12+r15] + add bl,dl + mov eax,DWORD[16+rsi] + add r9d,3250441966 + xor r12d,r8d + movzx ebx,bl + mov DWORD[12+rsi],edx + add r9d,r12d + add cl,al + rol r9d,22 + mov r12d,r11d + pinsrw xmm1,WORD[rbx*4+rdi],1 + + add r9d,r10d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r10d + mov DWORD[rcx*4+rdi],eax + and r12d,r9d + add r8d,DWORD[16+r15] + add al,dl + mov ebx,DWORD[20+rsi] + add r8d,4118548399 + xor r12d,r11d + movzx eax,al + mov DWORD[16+rsi],edx + add r8d,r12d + add cl,bl + rol r8d,7 + mov r12d,r10d + pinsrw xmm0,WORD[rax*4+rdi],2 + + add r8d,r9d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r9d + mov DWORD[rcx*4+rdi],ebx + and r12d,r8d + add r11d,DWORD[20+r15] + add bl,dl + mov eax,DWORD[24+rsi] + add r11d,1200080426 + xor r12d,r10d + movzx ebx,bl + mov DWORD[20+rsi],edx + add r11d,r12d + add cl,al + rol r11d,12 + mov r12d,r9d + pinsrw xmm1,WORD[rbx*4+rdi],2 + + add r11d,r8d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r8d + mov DWORD[rcx*4+rdi],eax + and r12d,r11d + add r10d,DWORD[24+r15] + add al,dl + mov ebx,DWORD[28+rsi] + add r10d,2821735955 + xor r12d,r9d + movzx eax,al + mov DWORD[24+rsi],edx + add r10d,r12d + add cl,bl + rol r10d,17 + mov r12d,r8d + pinsrw xmm0,WORD[rax*4+rdi],3 + + add r10d,r11d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r11d + mov DWORD[rcx*4+rdi],ebx + and r12d,r10d + add r9d,DWORD[28+r15] + add bl,dl + mov eax,DWORD[32+rsi] + add r9d,4249261313 + xor r12d,r8d + movzx ebx,bl + mov DWORD[28+rsi],edx + add r9d,r12d + add cl,al + rol r9d,22 + mov r12d,r11d + pinsrw xmm1,WORD[rbx*4+rdi],3 + + add r9d,r10d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r10d + mov DWORD[rcx*4+rdi],eax + and r12d,r9d + add r8d,DWORD[32+r15] + add al,dl + mov ebx,DWORD[36+rsi] + add r8d,1770035416 + xor r12d,r11d + movzx eax,al + mov DWORD[32+rsi],edx + add r8d,r12d + add cl,bl + rol r8d,7 + mov r12d,r10d + pinsrw xmm0,WORD[rax*4+rdi],4 + + add r8d,r9d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r9d + mov DWORD[rcx*4+rdi],ebx + and r12d,r8d + add r11d,DWORD[36+r15] + add bl,dl + mov eax,DWORD[40+rsi] + add r11d,2336552879 + xor r12d,r10d + movzx ebx,bl + mov DWORD[36+rsi],edx + add r11d,r12d + add cl,al + rol r11d,12 + mov r12d,r9d + pinsrw xmm1,WORD[rbx*4+rdi],4 + + add r11d,r8d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r8d + mov DWORD[rcx*4+rdi],eax + and r12d,r11d + add r10d,DWORD[40+r15] + add al,dl + mov ebx,DWORD[44+rsi] + add r10d,4294925233 + xor r12d,r9d + movzx eax,al + mov DWORD[40+rsi],edx + add r10d,r12d + add cl,bl + rol r10d,17 + mov r12d,r8d + pinsrw xmm0,WORD[rax*4+rdi],5 + + add r10d,r11d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r11d + mov DWORD[rcx*4+rdi],ebx + and r12d,r10d + add r9d,DWORD[44+r15] + add bl,dl + mov eax,DWORD[48+rsi] + add r9d,2304563134 + xor r12d,r8d + movzx ebx,bl + mov DWORD[44+rsi],edx + add r9d,r12d + add cl,al + rol r9d,22 + mov r12d,r11d + pinsrw xmm1,WORD[rbx*4+rdi],5 + + add r9d,r10d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r10d + mov DWORD[rcx*4+rdi],eax + and r12d,r9d + add r8d,DWORD[48+r15] + add al,dl + mov ebx,DWORD[52+rsi] + add r8d,1804603682 + xor r12d,r11d + movzx eax,al + mov DWORD[48+rsi],edx + add r8d,r12d + add cl,bl + rol r8d,7 + mov r12d,r10d + pinsrw xmm0,WORD[rax*4+rdi],6 + + add r8d,r9d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r9d + mov DWORD[rcx*4+rdi],ebx + and r12d,r8d + add r11d,DWORD[52+r15] + add bl,dl + mov eax,DWORD[56+rsi] + add r11d,4254626195 + xor r12d,r10d + movzx ebx,bl + mov DWORD[52+rsi],edx + add r11d,r12d + add cl,al + rol r11d,12 + mov r12d,r9d + pinsrw xmm1,WORD[rbx*4+rdi],6 + + add r11d,r8d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r8d + mov DWORD[rcx*4+rdi],eax + and r12d,r11d + add r10d,DWORD[56+r15] + add al,dl + mov ebx,DWORD[60+rsi] + add r10d,2792965006 + xor r12d,r9d + movzx eax,al + mov DWORD[56+rsi],edx + add r10d,r12d + add cl,bl + rol r10d,17 + mov r12d,r8d + pinsrw xmm0,WORD[rax*4+rdi],7 + + add r10d,r11d + movdqu xmm2,XMMWORD[r13] + mov edx,DWORD[rcx*4+rdi] + xor r12d,r11d + mov DWORD[rcx*4+rdi],ebx + and r12d,r10d + add r9d,DWORD[60+r15] + add bl,dl + mov eax,DWORD[64+rsi] + add r9d,1236535329 + xor r12d,r8d + movzx ebx,bl + mov DWORD[60+rsi],edx + add r9d,r12d + add cl,al + rol r9d,22 + mov r12d,r10d + pinsrw xmm1,WORD[rbx*4+rdi],7 + + add r9d,r10d + psllq xmm1,8 + pxor xmm2,xmm0 + pxor xmm2,xmm1 + pxor xmm0,xmm0 + mov edx,DWORD[rcx*4+rdi] + xor r12d,r9d + mov DWORD[rcx*4+rdi],eax + and r12d,r11d + add r8d,DWORD[4+r15] + add al,dl + mov ebx,DWORD[68+rsi] + add r8d,4129170786 + xor r12d,r10d + movzx eax,al + mov DWORD[64+rsi],edx + add r8d,r12d + add cl,bl + rol r8d,5 + mov r12d,r9d + movd xmm0,DWORD[rax*4+rdi] + + add r8d,r9d + pxor xmm1,xmm1 + mov edx,DWORD[rcx*4+rdi] + xor r12d,r8d + mov DWORD[rcx*4+rdi],ebx + and r12d,r10d + add r11d,DWORD[24+r15] + add bl,dl + mov eax,DWORD[72+rsi] + add r11d,3225465664 + xor r12d,r9d + movzx ebx,bl + mov DWORD[68+rsi],edx + add r11d,r12d + add cl,al + rol r11d,9 + mov r12d,r8d + movd xmm1,DWORD[rbx*4+rdi] + + add r11d,r8d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r11d + mov DWORD[rcx*4+rdi],eax + and r12d,r9d + add r10d,DWORD[44+r15] + add al,dl + mov ebx,DWORD[76+rsi] + add r10d,643717713 + xor r12d,r8d + movzx eax,al + mov DWORD[72+rsi],edx + add r10d,r12d + add cl,bl + rol r10d,14 + mov r12d,r11d + pinsrw xmm0,WORD[rax*4+rdi],1 + + add r10d,r11d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r10d + mov DWORD[rcx*4+rdi],ebx + and r12d,r8d + add r9d,DWORD[r15] + add bl,dl + mov eax,DWORD[80+rsi] + add r9d,3921069994 + xor r12d,r11d + movzx ebx,bl + mov DWORD[76+rsi],edx + add r9d,r12d + add cl,al + rol r9d,20 + mov r12d,r10d + pinsrw xmm1,WORD[rbx*4+rdi],1 + + add r9d,r10d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r9d + mov DWORD[rcx*4+rdi],eax + and r12d,r11d + add r8d,DWORD[20+r15] + add al,dl + mov ebx,DWORD[84+rsi] + add r8d,3593408605 + xor r12d,r10d + movzx eax,al + mov DWORD[80+rsi],edx + add r8d,r12d + add cl,bl + rol r8d,5 + mov r12d,r9d + pinsrw xmm0,WORD[rax*4+rdi],2 + + add r8d,r9d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r8d + mov DWORD[rcx*4+rdi],ebx + and r12d,r10d + add r11d,DWORD[40+r15] + add bl,dl + mov eax,DWORD[88+rsi] + add r11d,38016083 + xor r12d,r9d + movzx ebx,bl + mov DWORD[84+rsi],edx + add r11d,r12d + add cl,al + rol r11d,9 + mov r12d,r8d + pinsrw xmm1,WORD[rbx*4+rdi],2 + + add r11d,r8d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r11d + mov DWORD[rcx*4+rdi],eax + and r12d,r9d + add r10d,DWORD[60+r15] + add al,dl + mov ebx,DWORD[92+rsi] + add r10d,3634488961 + xor r12d,r8d + movzx eax,al + mov DWORD[88+rsi],edx + add r10d,r12d + add cl,bl + rol r10d,14 + mov r12d,r11d + pinsrw xmm0,WORD[rax*4+rdi],3 + + add r10d,r11d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r10d + mov DWORD[rcx*4+rdi],ebx + and r12d,r8d + add r9d,DWORD[16+r15] + add bl,dl + mov eax,DWORD[96+rsi] + add r9d,3889429448 + xor r12d,r11d + movzx ebx,bl + mov DWORD[92+rsi],edx + add r9d,r12d + add cl,al + rol r9d,20 + mov r12d,r10d + pinsrw xmm1,WORD[rbx*4+rdi],3 + + add r9d,r10d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r9d + mov DWORD[rcx*4+rdi],eax + and r12d,r11d + add r8d,DWORD[36+r15] + add al,dl + mov ebx,DWORD[100+rsi] + add r8d,568446438 + xor r12d,r10d + movzx eax,al + mov DWORD[96+rsi],edx + add r8d,r12d + add cl,bl + rol r8d,5 + mov r12d,r9d + pinsrw xmm0,WORD[rax*4+rdi],4 + + add r8d,r9d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r8d + mov DWORD[rcx*4+rdi],ebx + and r12d,r10d + add r11d,DWORD[56+r15] + add bl,dl + mov eax,DWORD[104+rsi] + add r11d,3275163606 + xor r12d,r9d + movzx ebx,bl + mov DWORD[100+rsi],edx + add r11d,r12d + add cl,al + rol r11d,9 + mov r12d,r8d + pinsrw xmm1,WORD[rbx*4+rdi],4 + + add r11d,r8d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r11d + mov DWORD[rcx*4+rdi],eax + and r12d,r9d + add r10d,DWORD[12+r15] + add al,dl + mov ebx,DWORD[108+rsi] + add r10d,4107603335 + xor r12d,r8d + movzx eax,al + mov DWORD[104+rsi],edx + add r10d,r12d + add cl,bl + rol r10d,14 + mov r12d,r11d + pinsrw xmm0,WORD[rax*4+rdi],5 + + add r10d,r11d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r10d + mov DWORD[rcx*4+rdi],ebx + and r12d,r8d + add r9d,DWORD[32+r15] + add bl,dl + mov eax,DWORD[112+rsi] + add r9d,1163531501 + xor r12d,r11d + movzx ebx,bl + mov DWORD[108+rsi],edx + add r9d,r12d + add cl,al + rol r9d,20 + mov r12d,r10d + pinsrw xmm1,WORD[rbx*4+rdi],5 + + add r9d,r10d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r9d + mov DWORD[rcx*4+rdi],eax + and r12d,r11d + add r8d,DWORD[52+r15] + add al,dl + mov ebx,DWORD[116+rsi] + add r8d,2850285829 + xor r12d,r10d + movzx eax,al + mov DWORD[112+rsi],edx + add r8d,r12d + add cl,bl + rol r8d,5 + mov r12d,r9d + pinsrw xmm0,WORD[rax*4+rdi],6 + + add r8d,r9d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r8d + mov DWORD[rcx*4+rdi],ebx + and r12d,r10d + add r11d,DWORD[8+r15] + add bl,dl + mov eax,DWORD[120+rsi] + add r11d,4243563512 + xor r12d,r9d + movzx ebx,bl + mov DWORD[116+rsi],edx + add r11d,r12d + add cl,al + rol r11d,9 + mov r12d,r8d + pinsrw xmm1,WORD[rbx*4+rdi],6 + + add r11d,r8d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r11d + mov DWORD[rcx*4+rdi],eax + and r12d,r9d + add r10d,DWORD[28+r15] + add al,dl + mov ebx,DWORD[124+rsi] + add r10d,1735328473 + xor r12d,r8d + movzx eax,al + mov DWORD[120+rsi],edx + add r10d,r12d + add cl,bl + rol r10d,14 + mov r12d,r11d + pinsrw xmm0,WORD[rax*4+rdi],7 + + add r10d,r11d + movdqu xmm3,XMMWORD[16+r13] + add bpl,32 + mov edx,DWORD[rcx*4+rdi] + xor r12d,r10d + mov DWORD[rcx*4+rdi],ebx + and r12d,r8d + add r9d,DWORD[48+r15] + add bl,dl + mov eax,DWORD[rbp*4+rdi] + add r9d,2368359562 + xor r12d,r11d + movzx ebx,bl + mov DWORD[124+rsi],edx + add r9d,r12d + add cl,al + rol r9d,20 + mov r12d,r11d + pinsrw xmm1,WORD[rbx*4+rdi],7 + + add r9d,r10d + mov rsi,rcx + xor rcx,rcx + mov cl,sil + lea rsi,[rbp*4+rdi] + psllq xmm1,8 + pxor xmm3,xmm0 + pxor xmm3,xmm1 + pxor xmm0,xmm0 + mov edx,DWORD[rcx*4+rdi] + xor r12d,r10d + mov DWORD[rcx*4+rdi],eax + xor r12d,r9d + add r8d,DWORD[20+r15] + add al,dl + mov ebx,DWORD[4+rsi] + add r8d,4294588738 + movzx eax,al + add r8d,r12d + mov DWORD[rsi],edx + add cl,bl + rol r8d,4 + mov r12d,r10d + movd xmm0,DWORD[rax*4+rdi] + + add r8d,r9d + pxor xmm1,xmm1 + mov edx,DWORD[rcx*4+rdi] + xor r12d,r9d + mov DWORD[rcx*4+rdi],ebx + xor r12d,r8d + add r11d,DWORD[32+r15] + add bl,dl + mov eax,DWORD[8+rsi] + add r11d,2272392833 + movzx ebx,bl + add r11d,r12d + mov DWORD[4+rsi],edx + add cl,al + rol r11d,11 + mov r12d,r9d + movd xmm1,DWORD[rbx*4+rdi] + + add r11d,r8d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r8d + mov DWORD[rcx*4+rdi],eax + xor r12d,r11d + add r10d,DWORD[44+r15] + add al,dl + mov ebx,DWORD[12+rsi] + add r10d,1839030562 + movzx eax,al + add r10d,r12d + mov DWORD[8+rsi],edx + add cl,bl + rol r10d,16 + mov r12d,r8d + pinsrw xmm0,WORD[rax*4+rdi],1 + + add r10d,r11d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r11d + mov DWORD[rcx*4+rdi],ebx + xor r12d,r10d + add r9d,DWORD[56+r15] + add bl,dl + mov eax,DWORD[16+rsi] + add r9d,4259657740 + movzx ebx,bl + add r9d,r12d + mov DWORD[12+rsi],edx + add cl,al + rol r9d,23 + mov r12d,r11d + pinsrw xmm1,WORD[rbx*4+rdi],1 + + add r9d,r10d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r10d + mov DWORD[rcx*4+rdi],eax + xor r12d,r9d + add r8d,DWORD[4+r15] + add al,dl + mov ebx,DWORD[20+rsi] + add r8d,2763975236 + movzx eax,al + add r8d,r12d + mov DWORD[16+rsi],edx + add cl,bl + rol r8d,4 + mov r12d,r10d + pinsrw xmm0,WORD[rax*4+rdi],2 + + add r8d,r9d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r9d + mov DWORD[rcx*4+rdi],ebx + xor r12d,r8d + add r11d,DWORD[16+r15] + add bl,dl + mov eax,DWORD[24+rsi] + add r11d,1272893353 + movzx ebx,bl + add r11d,r12d + mov DWORD[20+rsi],edx + add cl,al + rol r11d,11 + mov r12d,r9d + pinsrw xmm1,WORD[rbx*4+rdi],2 + + add r11d,r8d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r8d + mov DWORD[rcx*4+rdi],eax + xor r12d,r11d + add r10d,DWORD[28+r15] + add al,dl + mov ebx,DWORD[28+rsi] + add r10d,4139469664 + movzx eax,al + add r10d,r12d + mov DWORD[24+rsi],edx + add cl,bl + rol r10d,16 + mov r12d,r8d + pinsrw xmm0,WORD[rax*4+rdi],3 + + add r10d,r11d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r11d + mov DWORD[rcx*4+rdi],ebx + xor r12d,r10d + add r9d,DWORD[40+r15] + add bl,dl + mov eax,DWORD[32+rsi] + add r9d,3200236656 + movzx ebx,bl + add r9d,r12d + mov DWORD[28+rsi],edx + add cl,al + rol r9d,23 + mov r12d,r11d + pinsrw xmm1,WORD[rbx*4+rdi],3 + + add r9d,r10d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r10d + mov DWORD[rcx*4+rdi],eax + xor r12d,r9d + add r8d,DWORD[52+r15] + add al,dl + mov ebx,DWORD[36+rsi] + add r8d,681279174 + movzx eax,al + add r8d,r12d + mov DWORD[32+rsi],edx + add cl,bl + rol r8d,4 + mov r12d,r10d + pinsrw xmm0,WORD[rax*4+rdi],4 + + add r8d,r9d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r9d + mov DWORD[rcx*4+rdi],ebx + xor r12d,r8d + add r11d,DWORD[r15] + add bl,dl + mov eax,DWORD[40+rsi] + add r11d,3936430074 + movzx ebx,bl + add r11d,r12d + mov DWORD[36+rsi],edx + add cl,al + rol r11d,11 + mov r12d,r9d + pinsrw xmm1,WORD[rbx*4+rdi],4 + + add r11d,r8d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r8d + mov DWORD[rcx*4+rdi],eax + xor r12d,r11d + add r10d,DWORD[12+r15] + add al,dl + mov ebx,DWORD[44+rsi] + add r10d,3572445317 + movzx eax,al + add r10d,r12d + mov DWORD[40+rsi],edx + add cl,bl + rol r10d,16 + mov r12d,r8d + pinsrw xmm0,WORD[rax*4+rdi],5 + + add r10d,r11d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r11d + mov DWORD[rcx*4+rdi],ebx + xor r12d,r10d + add r9d,DWORD[24+r15] + add bl,dl + mov eax,DWORD[48+rsi] + add r9d,76029189 + movzx ebx,bl + add r9d,r12d + mov DWORD[44+rsi],edx + add cl,al + rol r9d,23 + mov r12d,r11d + pinsrw xmm1,WORD[rbx*4+rdi],5 + + add r9d,r10d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r10d + mov DWORD[rcx*4+rdi],eax + xor r12d,r9d + add r8d,DWORD[36+r15] + add al,dl + mov ebx,DWORD[52+rsi] + add r8d,3654602809 + movzx eax,al + add r8d,r12d + mov DWORD[48+rsi],edx + add cl,bl + rol r8d,4 + mov r12d,r10d + pinsrw xmm0,WORD[rax*4+rdi],6 + + add r8d,r9d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r9d + mov DWORD[rcx*4+rdi],ebx + xor r12d,r8d + add r11d,DWORD[48+r15] + add bl,dl + mov eax,DWORD[56+rsi] + add r11d,3873151461 + movzx ebx,bl + add r11d,r12d + mov DWORD[52+rsi],edx + add cl,al + rol r11d,11 + mov r12d,r9d + pinsrw xmm1,WORD[rbx*4+rdi],6 + + add r11d,r8d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r8d + mov DWORD[rcx*4+rdi],eax + xor r12d,r11d + add r10d,DWORD[60+r15] + add al,dl + mov ebx,DWORD[60+rsi] + add r10d,530742520 + movzx eax,al + add r10d,r12d + mov DWORD[56+rsi],edx + add cl,bl + rol r10d,16 + mov r12d,r8d + pinsrw xmm0,WORD[rax*4+rdi],7 + + add r10d,r11d + movdqu xmm4,XMMWORD[32+r13] + mov edx,DWORD[rcx*4+rdi] + xor r12d,r11d + mov DWORD[rcx*4+rdi],ebx + xor r12d,r10d + add r9d,DWORD[8+r15] + add bl,dl + mov eax,DWORD[64+rsi] + add r9d,3299628645 + movzx ebx,bl + add r9d,r12d + mov DWORD[60+rsi],edx + add cl,al + rol r9d,23 + mov r12d,-1 + pinsrw xmm1,WORD[rbx*4+rdi],7 + + add r9d,r10d + psllq xmm1,8 + pxor xmm4,xmm0 + pxor xmm4,xmm1 + pxor xmm0,xmm0 + mov edx,DWORD[rcx*4+rdi] + xor r12d,r11d + mov DWORD[rcx*4+rdi],eax + or r12d,r9d + add r8d,DWORD[r15] + add al,dl + mov ebx,DWORD[68+rsi] + add r8d,4096336452 + movzx eax,al + xor r12d,r10d + mov DWORD[64+rsi],edx + add r8d,r12d + add cl,bl + rol r8d,6 + mov r12d,-1 + movd xmm0,DWORD[rax*4+rdi] + + add r8d,r9d + pxor xmm1,xmm1 + mov edx,DWORD[rcx*4+rdi] + xor r12d,r10d + mov DWORD[rcx*4+rdi],ebx + or r12d,r8d + add r11d,DWORD[28+r15] + add bl,dl + mov eax,DWORD[72+rsi] + add r11d,1126891415 + movzx ebx,bl + xor r12d,r9d + mov DWORD[68+rsi],edx + add r11d,r12d + add cl,al + rol r11d,10 + mov r12d,-1 + movd xmm1,DWORD[rbx*4+rdi] + + add r11d,r8d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r9d + mov DWORD[rcx*4+rdi],eax + or r12d,r11d + add r10d,DWORD[56+r15] + add al,dl + mov ebx,DWORD[76+rsi] + add r10d,2878612391 + movzx eax,al + xor r12d,r8d + mov DWORD[72+rsi],edx + add r10d,r12d + add cl,bl + rol r10d,15 + mov r12d,-1 + pinsrw xmm0,WORD[rax*4+rdi],1 + + add r10d,r11d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r8d + mov DWORD[rcx*4+rdi],ebx + or r12d,r10d + add r9d,DWORD[20+r15] + add bl,dl + mov eax,DWORD[80+rsi] + add r9d,4237533241 + movzx ebx,bl + xor r12d,r11d + mov DWORD[76+rsi],edx + add r9d,r12d + add cl,al + rol r9d,21 + mov r12d,-1 + pinsrw xmm1,WORD[rbx*4+rdi],1 + + add r9d,r10d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r11d + mov DWORD[rcx*4+rdi],eax + or r12d,r9d + add r8d,DWORD[48+r15] + add al,dl + mov ebx,DWORD[84+rsi] + add r8d,1700485571 + movzx eax,al + xor r12d,r10d + mov DWORD[80+rsi],edx + add r8d,r12d + add cl,bl + rol r8d,6 + mov r12d,-1 + pinsrw xmm0,WORD[rax*4+rdi],2 + + add r8d,r9d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r10d + mov DWORD[rcx*4+rdi],ebx + or r12d,r8d + add r11d,DWORD[12+r15] + add bl,dl + mov eax,DWORD[88+rsi] + add r11d,2399980690 + movzx ebx,bl + xor r12d,r9d + mov DWORD[84+rsi],edx + add r11d,r12d + add cl,al + rol r11d,10 + mov r12d,-1 + pinsrw xmm1,WORD[rbx*4+rdi],2 + + add r11d,r8d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r9d + mov DWORD[rcx*4+rdi],eax + or r12d,r11d + add r10d,DWORD[40+r15] + add al,dl + mov ebx,DWORD[92+rsi] + add r10d,4293915773 + movzx eax,al + xor r12d,r8d + mov DWORD[88+rsi],edx + add r10d,r12d + add cl,bl + rol r10d,15 + mov r12d,-1 + pinsrw xmm0,WORD[rax*4+rdi],3 + + add r10d,r11d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r8d + mov DWORD[rcx*4+rdi],ebx + or r12d,r10d + add r9d,DWORD[4+r15] + add bl,dl + mov eax,DWORD[96+rsi] + add r9d,2240044497 + movzx ebx,bl + xor r12d,r11d + mov DWORD[92+rsi],edx + add r9d,r12d + add cl,al + rol r9d,21 + mov r12d,-1 + pinsrw xmm1,WORD[rbx*4+rdi],3 + + add r9d,r10d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r11d + mov DWORD[rcx*4+rdi],eax + or r12d,r9d + add r8d,DWORD[32+r15] + add al,dl + mov ebx,DWORD[100+rsi] + add r8d,1873313359 + movzx eax,al + xor r12d,r10d + mov DWORD[96+rsi],edx + add r8d,r12d + add cl,bl + rol r8d,6 + mov r12d,-1 + pinsrw xmm0,WORD[rax*4+rdi],4 + + add r8d,r9d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r10d + mov DWORD[rcx*4+rdi],ebx + or r12d,r8d + add r11d,DWORD[60+r15] + add bl,dl + mov eax,DWORD[104+rsi] + add r11d,4264355552 + movzx ebx,bl + xor r12d,r9d + mov DWORD[100+rsi],edx + add r11d,r12d + add cl,al + rol r11d,10 + mov r12d,-1 + pinsrw xmm1,WORD[rbx*4+rdi],4 + + add r11d,r8d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r9d + mov DWORD[rcx*4+rdi],eax + or r12d,r11d + add r10d,DWORD[24+r15] + add al,dl + mov ebx,DWORD[108+rsi] + add r10d,2734768916 + movzx eax,al + xor r12d,r8d + mov DWORD[104+rsi],edx + add r10d,r12d + add cl,bl + rol r10d,15 + mov r12d,-1 + pinsrw xmm0,WORD[rax*4+rdi],5 + + add r10d,r11d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r8d + mov DWORD[rcx*4+rdi],ebx + or r12d,r10d + add r9d,DWORD[52+r15] + add bl,dl + mov eax,DWORD[112+rsi] + add r9d,1309151649 + movzx ebx,bl + xor r12d,r11d + mov DWORD[108+rsi],edx + add r9d,r12d + add cl,al + rol r9d,21 + mov r12d,-1 + pinsrw xmm1,WORD[rbx*4+rdi],5 + + add r9d,r10d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r11d + mov DWORD[rcx*4+rdi],eax + or r12d,r9d + add r8d,DWORD[16+r15] + add al,dl + mov ebx,DWORD[116+rsi] + add r8d,4149444226 + movzx eax,al + xor r12d,r10d + mov DWORD[112+rsi],edx + add r8d,r12d + add cl,bl + rol r8d,6 + mov r12d,-1 + pinsrw xmm0,WORD[rax*4+rdi],6 + + add r8d,r9d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r10d + mov DWORD[rcx*4+rdi],ebx + or r12d,r8d + add r11d,DWORD[44+r15] + add bl,dl + mov eax,DWORD[120+rsi] + add r11d,3174756917 + movzx ebx,bl + xor r12d,r9d + mov DWORD[116+rsi],edx + add r11d,r12d + add cl,al + rol r11d,10 + mov r12d,-1 + pinsrw xmm1,WORD[rbx*4+rdi],6 + + add r11d,r8d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r9d + mov DWORD[rcx*4+rdi],eax + or r12d,r11d + add r10d,DWORD[8+r15] + add al,dl + mov ebx,DWORD[124+rsi] + add r10d,718787259 + movzx eax,al + xor r12d,r8d + mov DWORD[120+rsi],edx + add r10d,r12d + add cl,bl + rol r10d,15 + mov r12d,-1 + pinsrw xmm0,WORD[rax*4+rdi],7 + + add r10d,r11d + movdqu xmm5,XMMWORD[48+r13] + add bpl,32 + mov edx,DWORD[rcx*4+rdi] + xor r12d,r8d + mov DWORD[rcx*4+rdi],ebx + or r12d,r10d + add r9d,DWORD[36+r15] + add bl,dl + mov eax,DWORD[rbp*4+rdi] + add r9d,3951481745 + movzx ebx,bl + xor r12d,r11d + mov DWORD[124+rsi],edx + add r9d,r12d + add cl,al + rol r9d,21 + mov r12d,-1 + pinsrw xmm1,WORD[rbx*4+rdi],7 + + add r9d,r10d + mov rsi,rbp + xor rbp,rbp + mov bpl,sil + mov rsi,rcx + xor rcx,rcx + mov cl,sil + lea rsi,[rbp*4+rdi] + psllq xmm1,8 + pxor xmm5,xmm0 + pxor xmm5,xmm1 + add r8d,DWORD[rsp] + add r9d,DWORD[4+rsp] + add r10d,DWORD[8+rsp] + add r11d,DWORD[12+rsp] + + movdqu XMMWORD[r13*1+r14],xmm2 + movdqu XMMWORD[16+r13*1+r14],xmm3 + movdqu XMMWORD[32+r13*1+r14],xmm4 + movdqu XMMWORD[48+r13*1+r14],xmm5 + lea r15,[64+r15] + lea r13,[64+r13] + cmp r15,QWORD[16+rsp] + jb NEAR $L$oop + + mov r12,QWORD[24+rsp] + sub cl,al + mov DWORD[r12],r8d + mov DWORD[4+r12],r9d + mov DWORD[8+r12],r10d + mov DWORD[12+r12],r11d + sub bpl,1 + mov DWORD[((-8))+rdi],ebp + mov DWORD[((-4))+rdi],ecx + + mov r15,QWORD[40+rsp] + mov r14,QWORD[48+rsp] + mov r13,QWORD[56+rsp] + mov r12,QWORD[64+rsp] + mov rbp,QWORD[72+rsp] + mov rbx,QWORD[80+rsp] + lea rsp,[88+rsp] +$L$epilogue: +$L$abort: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_rc4_md5_enc: +EXTERN __imp_RtlVirtualUnwind + +ALIGN 16 +se_handler: + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD[120+r8] + mov rbx,QWORD[248+r8] + + lea r10,[$L$body] + cmp rbx,r10 + jb NEAR $L$in_prologue + + mov rax,QWORD[152+r8] + + lea r10,[$L$epilogue] + cmp rbx,r10 + jae NEAR $L$in_prologue + + mov r15,QWORD[40+rax] + mov r14,QWORD[48+rax] + mov r13,QWORD[56+rax] + mov r12,QWORD[64+rax] + mov rbp,QWORD[72+rax] + mov rbx,QWORD[80+rax] + lea rax,[88+rax] + + mov QWORD[144+r8],rbx + mov QWORD[160+r8],rbp + mov QWORD[216+r8],r12 + mov QWORD[224+r8],r13 + mov QWORD[232+r8],r14 + mov QWORD[240+r8],r15 + +$L$in_prologue: + mov rdi,QWORD[8+rax] + mov rsi,QWORD[16+rax] + mov QWORD[152+r8],rax + mov QWORD[168+r8],rsi + mov QWORD[176+r8],rdi + + mov rdi,QWORD[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0xa548f3fc + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD[8+rsi] + mov r8,QWORD[rsi] + mov r9,QWORD[16+rsi] + mov r10,QWORD[40+rsi] + lea r11,[56+rsi] + lea r12,[24+rsi] + mov QWORD[32+rsp],r10 + mov QWORD[40+rsp],r11 + mov QWORD[48+rsp],r12 + mov QWORD[56+rsp],rcx + call QWORD[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret + + +section .pdata rdata align=4 +ALIGN 4 + DD $L$SEH_begin_rc4_md5_enc wrt ..imagebase + DD $L$SEH_end_rc4_md5_enc wrt ..imagebase + DD $L$SEH_info_rc4_md5_enc wrt ..imagebase + +section .xdata rdata align=8 +ALIGN 8 +$L$SEH_info_rc4_md5_enc: +DB 9,0,0,0 + DD se_handler wrt ..imagebase diff --git a/third_party/boringssl/win-x86_64/crypto/rc4/rc4-x86_64.asm b/third_party/boringssl/win-x86_64/crypto/rc4/rc4-x86_64.asm new file mode 100644 index 00000000000..c7c3b7b6c0f --- /dev/null +++ b/third_party/boringssl/win-x86_64/crypto/rc4/rc4-x86_64.asm @@ -0,0 +1,741 @@ +default rel +%define XMMWORD +%define YMMWORD +%define ZMMWORD +section .text code align=64 + +EXTERN OPENSSL_ia32cap_P + +global asm_RC4 + +ALIGN 16 +asm_RC4: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_asm_RC4: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + + + or rsi,rsi + jne NEAR $L$entry + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret +$L$entry: + push rbx + push r12 + push r13 +$L$prologue: + mov r11,rsi + mov r12,rdx + mov r13,rcx + xor r10,r10 + xor rcx,rcx + + lea rdi,[8+rdi] + mov r10b,BYTE[((-8))+rdi] + mov cl,BYTE[((-4))+rdi] + cmp DWORD[256+rdi],-1 + je NEAR $L$RC4_CHAR + mov r8d,DWORD[OPENSSL_ia32cap_P] + xor rbx,rbx + inc r10b + sub rbx,r10 + sub r13,r12 + mov eax,DWORD[r10*4+rdi] + test r11,-16 + jz NEAR $L$loop1 + bt r8d,30 + jc NEAR $L$intel + and rbx,7 + lea rsi,[1+r10] + jz NEAR $L$oop8 + sub r11,rbx +$L$oop8_warmup: + add cl,al + mov edx,DWORD[rcx*4+rdi] + mov DWORD[rcx*4+rdi],eax + mov DWORD[r10*4+rdi],edx + add al,dl + inc r10b + mov edx,DWORD[rax*4+rdi] + mov eax,DWORD[r10*4+rdi] + xor dl,BYTE[r12] + mov BYTE[r13*1+r12],dl + lea r12,[1+r12] + dec rbx + jnz NEAR $L$oop8_warmup + + lea rsi,[1+r10] + jmp NEAR $L$oop8 +ALIGN 16 +$L$oop8: + add cl,al + mov edx,DWORD[rcx*4+rdi] + mov DWORD[rcx*4+rdi],eax + mov ebx,DWORD[rsi*4+rdi] + ror r8,8 + mov DWORD[r10*4+rdi],edx + add dl,al + mov r8b,BYTE[rdx*4+rdi] + add cl,bl + mov edx,DWORD[rcx*4+rdi] + mov DWORD[rcx*4+rdi],ebx + mov eax,DWORD[4+rsi*4+rdi] + ror r8,8 + mov DWORD[4+r10*4+rdi],edx + add dl,bl + mov r8b,BYTE[rdx*4+rdi] + add cl,al + mov edx,DWORD[rcx*4+rdi] + mov DWORD[rcx*4+rdi],eax + mov ebx,DWORD[8+rsi*4+rdi] + ror r8,8 + mov DWORD[8+r10*4+rdi],edx + add dl,al + mov r8b,BYTE[rdx*4+rdi] + add cl,bl + mov edx,DWORD[rcx*4+rdi] + mov DWORD[rcx*4+rdi],ebx + mov eax,DWORD[12+rsi*4+rdi] + ror r8,8 + mov DWORD[12+r10*4+rdi],edx + add dl,bl + mov r8b,BYTE[rdx*4+rdi] + add cl,al + mov edx,DWORD[rcx*4+rdi] + mov DWORD[rcx*4+rdi],eax + mov ebx,DWORD[16+rsi*4+rdi] + ror r8,8 + mov DWORD[16+r10*4+rdi],edx + add dl,al + mov r8b,BYTE[rdx*4+rdi] + add cl,bl + mov edx,DWORD[rcx*4+rdi] + mov DWORD[rcx*4+rdi],ebx + mov eax,DWORD[20+rsi*4+rdi] + ror r8,8 + mov DWORD[20+r10*4+rdi],edx + add dl,bl + mov r8b,BYTE[rdx*4+rdi] + add cl,al + mov edx,DWORD[rcx*4+rdi] + mov DWORD[rcx*4+rdi],eax + mov ebx,DWORD[24+rsi*4+rdi] + ror r8,8 + mov DWORD[24+r10*4+rdi],edx + add dl,al + mov r8b,BYTE[rdx*4+rdi] + add sil,8 + add cl,bl + mov edx,DWORD[rcx*4+rdi] + mov DWORD[rcx*4+rdi],ebx + mov eax,DWORD[((-4))+rsi*4+rdi] + ror r8,8 + mov DWORD[28+r10*4+rdi],edx + add dl,bl + mov r8b,BYTE[rdx*4+rdi] + add r10b,8 + ror r8,8 + sub r11,8 + + xor r8,QWORD[r12] + mov QWORD[r13*1+r12],r8 + lea r12,[8+r12] + + test r11,-8 + jnz NEAR $L$oop8 + cmp r11,0 + jne NEAR $L$loop1 + jmp NEAR $L$exit + +ALIGN 16 +$L$intel: + test r11,-32 + jz NEAR $L$loop1 + and rbx,15 + jz NEAR $L$oop16_is_hot + sub r11,rbx +$L$oop16_warmup: + add cl,al + mov edx,DWORD[rcx*4+rdi] + mov DWORD[rcx*4+rdi],eax + mov DWORD[r10*4+rdi],edx + add al,dl + inc r10b + mov edx,DWORD[rax*4+rdi] + mov eax,DWORD[r10*4+rdi] + xor dl,BYTE[r12] + mov BYTE[r13*1+r12],dl + lea r12,[1+r12] + dec rbx + jnz NEAR $L$oop16_warmup + + mov rbx,rcx + xor rcx,rcx + mov cl,bl + +$L$oop16_is_hot: + lea rsi,[r10*4+rdi] + add cl,al + mov edx,DWORD[rcx*4+rdi] + pxor xmm0,xmm0 + mov DWORD[rcx*4+rdi],eax + add al,dl + mov ebx,DWORD[4+rsi] + movzx eax,al + mov DWORD[rsi],edx + add cl,bl + pinsrw xmm0,WORD[rax*4+rdi],0 + jmp NEAR $L$oop16_enter +ALIGN 16 +$L$oop16: + add cl,al + mov edx,DWORD[rcx*4+rdi] + pxor xmm2,xmm0 + psllq xmm1,8 + pxor xmm0,xmm0 + mov DWORD[rcx*4+rdi],eax + add al,dl + mov ebx,DWORD[4+rsi] + movzx eax,al + mov DWORD[rsi],edx + pxor xmm2,xmm1 + add cl,bl + pinsrw xmm0,WORD[rax*4+rdi],0 + movdqu XMMWORD[r13*1+r12],xmm2 + lea r12,[16+r12] +$L$oop16_enter: + mov edx,DWORD[rcx*4+rdi] + pxor xmm1,xmm1 + mov DWORD[rcx*4+rdi],ebx + add bl,dl + mov eax,DWORD[8+rsi] + movzx ebx,bl + mov DWORD[4+rsi],edx + add cl,al + pinsrw xmm1,WORD[rbx*4+rdi],0 + mov edx,DWORD[rcx*4+rdi] + mov DWORD[rcx*4+rdi],eax + add al,dl + mov ebx,DWORD[12+rsi] + movzx eax,al + mov DWORD[8+rsi],edx + add cl,bl + pinsrw xmm0,WORD[rax*4+rdi],1 + mov edx,DWORD[rcx*4+rdi] + mov DWORD[rcx*4+rdi],ebx + add bl,dl + mov eax,DWORD[16+rsi] + movzx ebx,bl + mov DWORD[12+rsi],edx + add cl,al + pinsrw xmm1,WORD[rbx*4+rdi],1 + mov edx,DWORD[rcx*4+rdi] + mov DWORD[rcx*4+rdi],eax + add al,dl + mov ebx,DWORD[20+rsi] + movzx eax,al + mov DWORD[16+rsi],edx + add cl,bl + pinsrw xmm0,WORD[rax*4+rdi],2 + mov edx,DWORD[rcx*4+rdi] + mov DWORD[rcx*4+rdi],ebx + add bl,dl + mov eax,DWORD[24+rsi] + movzx ebx,bl + mov DWORD[20+rsi],edx + add cl,al + pinsrw xmm1,WORD[rbx*4+rdi],2 + mov edx,DWORD[rcx*4+rdi] + mov DWORD[rcx*4+rdi],eax + add al,dl + mov ebx,DWORD[28+rsi] + movzx eax,al + mov DWORD[24+rsi],edx + add cl,bl + pinsrw xmm0,WORD[rax*4+rdi],3 + mov edx,DWORD[rcx*4+rdi] + mov DWORD[rcx*4+rdi],ebx + add bl,dl + mov eax,DWORD[32+rsi] + movzx ebx,bl + mov DWORD[28+rsi],edx + add cl,al + pinsrw xmm1,WORD[rbx*4+rdi],3 + mov edx,DWORD[rcx*4+rdi] + mov DWORD[rcx*4+rdi],eax + add al,dl + mov ebx,DWORD[36+rsi] + movzx eax,al + mov DWORD[32+rsi],edx + add cl,bl + pinsrw xmm0,WORD[rax*4+rdi],4 + mov edx,DWORD[rcx*4+rdi] + mov DWORD[rcx*4+rdi],ebx + add bl,dl + mov eax,DWORD[40+rsi] + movzx ebx,bl + mov DWORD[36+rsi],edx + add cl,al + pinsrw xmm1,WORD[rbx*4+rdi],4 + mov edx,DWORD[rcx*4+rdi] + mov DWORD[rcx*4+rdi],eax + add al,dl + mov ebx,DWORD[44+rsi] + movzx eax,al + mov DWORD[40+rsi],edx + add cl,bl + pinsrw xmm0,WORD[rax*4+rdi],5 + mov edx,DWORD[rcx*4+rdi] + mov DWORD[rcx*4+rdi],ebx + add bl,dl + mov eax,DWORD[48+rsi] + movzx ebx,bl + mov DWORD[44+rsi],edx + add cl,al + pinsrw xmm1,WORD[rbx*4+rdi],5 + mov edx,DWORD[rcx*4+rdi] + mov DWORD[rcx*4+rdi],eax + add al,dl + mov ebx,DWORD[52+rsi] + movzx eax,al + mov DWORD[48+rsi],edx + add cl,bl + pinsrw xmm0,WORD[rax*4+rdi],6 + mov edx,DWORD[rcx*4+rdi] + mov DWORD[rcx*4+rdi],ebx + add bl,dl + mov eax,DWORD[56+rsi] + movzx ebx,bl + mov DWORD[52+rsi],edx + add cl,al + pinsrw xmm1,WORD[rbx*4+rdi],6 + mov edx,DWORD[rcx*4+rdi] + mov DWORD[rcx*4+rdi],eax + add al,dl + mov ebx,DWORD[60+rsi] + movzx eax,al + mov DWORD[56+rsi],edx + add cl,bl + pinsrw xmm0,WORD[rax*4+rdi],7 + add r10b,16 + movdqu xmm2,XMMWORD[r12] + mov edx,DWORD[rcx*4+rdi] + mov DWORD[rcx*4+rdi],ebx + add bl,dl + movzx ebx,bl + mov DWORD[60+rsi],edx + lea rsi,[r10*4+rdi] + pinsrw xmm1,WORD[rbx*4+rdi],7 + mov eax,DWORD[rsi] + mov rbx,rcx + xor rcx,rcx + sub r11,16 + mov cl,bl + test r11,-16 + jnz NEAR $L$oop16 + + psllq xmm1,8 + pxor xmm2,xmm0 + pxor xmm2,xmm1 + movdqu XMMWORD[r13*1+r12],xmm2 + lea r12,[16+r12] + + cmp r11,0 + jne NEAR $L$loop1 + jmp NEAR $L$exit + +ALIGN 16 +$L$loop1: + add cl,al + mov edx,DWORD[rcx*4+rdi] + mov DWORD[rcx*4+rdi],eax + mov DWORD[r10*4+rdi],edx + add al,dl + inc r10b + mov edx,DWORD[rax*4+rdi] + mov eax,DWORD[r10*4+rdi] + xor dl,BYTE[r12] + mov BYTE[r13*1+r12],dl + lea r12,[1+r12] + dec r11 + jnz NEAR $L$loop1 + jmp NEAR $L$exit + +ALIGN 16 +$L$RC4_CHAR: + add r10b,1 + movzx eax,BYTE[r10*1+rdi] + test r11,-8 + jz NEAR $L$cloop1 + jmp NEAR $L$cloop8 +ALIGN 16 +$L$cloop8: + mov r8d,DWORD[r12] + mov r9d,DWORD[4+r12] + add cl,al + lea rsi,[1+r10] + movzx edx,BYTE[rcx*1+rdi] + movzx esi,sil + movzx ebx,BYTE[rsi*1+rdi] + mov BYTE[rcx*1+rdi],al + cmp rcx,rsi + mov BYTE[r10*1+rdi],dl + jne NEAR $L$cmov0 + mov rbx,rax +$L$cmov0: + add dl,al + xor r8b,BYTE[rdx*1+rdi] + ror r8d,8 + add cl,bl + lea r10,[1+rsi] + movzx edx,BYTE[rcx*1+rdi] + movzx r10d,r10b + movzx eax,BYTE[r10*1+rdi] + mov BYTE[rcx*1+rdi],bl + cmp rcx,r10 + mov BYTE[rsi*1+rdi],dl + jne NEAR $L$cmov1 + mov rax,rbx +$L$cmov1: + add dl,bl + xor r8b,BYTE[rdx*1+rdi] + ror r8d,8 + add cl,al + lea rsi,[1+r10] + movzx edx,BYTE[rcx*1+rdi] + movzx esi,sil + movzx ebx,BYTE[rsi*1+rdi] + mov BYTE[rcx*1+rdi],al + cmp rcx,rsi + mov BYTE[r10*1+rdi],dl + jne NEAR $L$cmov2 + mov rbx,rax +$L$cmov2: + add dl,al + xor r8b,BYTE[rdx*1+rdi] + ror r8d,8 + add cl,bl + lea r10,[1+rsi] + movzx edx,BYTE[rcx*1+rdi] + movzx r10d,r10b + movzx eax,BYTE[r10*1+rdi] + mov BYTE[rcx*1+rdi],bl + cmp rcx,r10 + mov BYTE[rsi*1+rdi],dl + jne NEAR $L$cmov3 + mov rax,rbx +$L$cmov3: + add dl,bl + xor r8b,BYTE[rdx*1+rdi] + ror r8d,8 + add cl,al + lea rsi,[1+r10] + movzx edx,BYTE[rcx*1+rdi] + movzx esi,sil + movzx ebx,BYTE[rsi*1+rdi] + mov BYTE[rcx*1+rdi],al + cmp rcx,rsi + mov BYTE[r10*1+rdi],dl + jne NEAR $L$cmov4 + mov rbx,rax +$L$cmov4: + add dl,al + xor r9b,BYTE[rdx*1+rdi] + ror r9d,8 + add cl,bl + lea r10,[1+rsi] + movzx edx,BYTE[rcx*1+rdi] + movzx r10d,r10b + movzx eax,BYTE[r10*1+rdi] + mov BYTE[rcx*1+rdi],bl + cmp rcx,r10 + mov BYTE[rsi*1+rdi],dl + jne NEAR $L$cmov5 + mov rax,rbx +$L$cmov5: + add dl,bl + xor r9b,BYTE[rdx*1+rdi] + ror r9d,8 + add cl,al + lea rsi,[1+r10] + movzx edx,BYTE[rcx*1+rdi] + movzx esi,sil + movzx ebx,BYTE[rsi*1+rdi] + mov BYTE[rcx*1+rdi],al + cmp rcx,rsi + mov BYTE[r10*1+rdi],dl + jne NEAR $L$cmov6 + mov rbx,rax +$L$cmov6: + add dl,al + xor r9b,BYTE[rdx*1+rdi] + ror r9d,8 + add cl,bl + lea r10,[1+rsi] + movzx edx,BYTE[rcx*1+rdi] + movzx r10d,r10b + movzx eax,BYTE[r10*1+rdi] + mov BYTE[rcx*1+rdi],bl + cmp rcx,r10 + mov BYTE[rsi*1+rdi],dl + jne NEAR $L$cmov7 + mov rax,rbx +$L$cmov7: + add dl,bl + xor r9b,BYTE[rdx*1+rdi] + ror r9d,8 + lea r11,[((-8))+r11] + mov DWORD[r13],r8d + lea r12,[8+r12] + mov DWORD[4+r13],r9d + lea r13,[8+r13] + + test r11,-8 + jnz NEAR $L$cloop8 + cmp r11,0 + jne NEAR $L$cloop1 + jmp NEAR $L$exit +ALIGN 16 +$L$cloop1: + add cl,al + movzx ecx,cl + movzx edx,BYTE[rcx*1+rdi] + mov BYTE[rcx*1+rdi],al + mov BYTE[r10*1+rdi],dl + add dl,al + add r10b,1 + movzx edx,dl + movzx r10d,r10b + movzx edx,BYTE[rdx*1+rdi] + movzx eax,BYTE[r10*1+rdi] + xor dl,BYTE[r12] + lea r12,[1+r12] + mov BYTE[r13],dl + lea r13,[1+r13] + sub r11,1 + jnz NEAR $L$cloop1 + jmp NEAR $L$exit + +ALIGN 16 +$L$exit: + sub r10b,1 + mov DWORD[((-8))+rdi],r10d + mov DWORD[((-4))+rdi],ecx + + mov r13,QWORD[rsp] + mov r12,QWORD[8+rsp] + mov rbx,QWORD[16+rsp] + add rsp,24 +$L$epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_asm_RC4: +global asm_RC4_set_key + +ALIGN 16 +asm_RC4_set_key: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_asm_RC4_set_key: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + lea rdi,[8+rdi] + lea rdx,[rsi*1+rdx] + neg rsi + mov rcx,rsi + xor eax,eax + xor r9,r9 + xor r10,r10 + xor r11,r11 + + mov r8d,DWORD[OPENSSL_ia32cap_P] + bt r8d,20 + jc NEAR $L$c1stloop + jmp NEAR $L$w1stloop + +ALIGN 16 +$L$w1stloop: + mov DWORD[rax*4+rdi],eax + add al,1 + jnc NEAR $L$w1stloop + + xor r9,r9 + xor r8,r8 +ALIGN 16 +$L$w2ndloop: + mov r10d,DWORD[r9*4+rdi] + add r8b,BYTE[rsi*1+rdx] + add r8b,r10b + add rsi,1 + mov r11d,DWORD[r8*4+rdi] + cmovz rsi,rcx + mov DWORD[r8*4+rdi],r10d + mov DWORD[r9*4+rdi],r11d + add r9b,1 + jnc NEAR $L$w2ndloop + jmp NEAR $L$exit_key + +ALIGN 16 +$L$c1stloop: + mov BYTE[rax*1+rdi],al + add al,1 + jnc NEAR $L$c1stloop + + xor r9,r9 + xor r8,r8 +ALIGN 16 +$L$c2ndloop: + mov r10b,BYTE[r9*1+rdi] + add r8b,BYTE[rsi*1+rdx] + add r8b,r10b + add rsi,1 + mov r11b,BYTE[r8*1+rdi] + jnz NEAR $L$cnowrap + mov rsi,rcx +$L$cnowrap: + mov BYTE[r8*1+rdi],r10b + mov BYTE[r9*1+rdi],r11b + add r9b,1 + jnc NEAR $L$c2ndloop + mov DWORD[256+rdi],-1 + +ALIGN 16 +$L$exit_key: + xor eax,eax + mov DWORD[((-8))+rdi],eax + mov DWORD[((-4))+rdi],eax + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_asm_RC4_set_key: +EXTERN __imp_RtlVirtualUnwind + +ALIGN 16 +stream_se_handler: + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD[120+r8] + mov rbx,QWORD[248+r8] + + lea r10,[$L$prologue] + cmp rbx,r10 + jb NEAR $L$in_prologue + + mov rax,QWORD[152+r8] + + lea r10,[$L$epilogue] + cmp rbx,r10 + jae NEAR $L$in_prologue + + lea rax,[24+rax] + + mov rbx,QWORD[((-8))+rax] + mov r12,QWORD[((-16))+rax] + mov r13,QWORD[((-24))+rax] + mov QWORD[144+r8],rbx + mov QWORD[216+r8],r12 + mov QWORD[224+r8],r13 + +$L$in_prologue: + mov rdi,QWORD[8+rax] + mov rsi,QWORD[16+rax] + mov QWORD[152+r8],rax + mov QWORD[168+r8],rsi + mov QWORD[176+r8],rdi + + jmp NEAR $L$common_seh_exit + + + +ALIGN 16 +key_se_handler: + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD[152+r8] + mov rdi,QWORD[8+rax] + mov rsi,QWORD[16+rax] + mov QWORD[168+r8],rsi + mov QWORD[176+r8],rdi + +$L$common_seh_exit: + + mov rdi,QWORD[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0xa548f3fc + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD[8+rsi] + mov r8,QWORD[rsi] + mov r9,QWORD[16+rsi] + mov r10,QWORD[40+rsi] + lea r11,[56+rsi] + lea r12,[24+rsi] + mov QWORD[32+rsp],r10 + mov QWORD[40+rsp],r11 + mov QWORD[48+rsp],r12 + mov QWORD[56+rsp],rcx + call QWORD[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret + + +section .pdata rdata align=4 +ALIGN 4 + DD $L$SEH_begin_asm_RC4 wrt ..imagebase + DD $L$SEH_end_asm_RC4 wrt ..imagebase + DD $L$SEH_info_asm_RC4 wrt ..imagebase + + DD $L$SEH_begin_asm_RC4_set_key wrt ..imagebase + DD $L$SEH_end_asm_RC4_set_key wrt ..imagebase + DD $L$SEH_info_asm_RC4_set_key wrt ..imagebase + +section .xdata rdata align=8 +ALIGN 8 +$L$SEH_info_asm_RC4: +DB 9,0,0,0 + DD stream_se_handler wrt ..imagebase +$L$SEH_info_asm_RC4_set_key: +DB 9,0,0,0 + DD key_se_handler wrt ..imagebase diff --git a/third_party/boringssl/win-x86_64/crypto/sha/sha1-x86_64.asm b/third_party/boringssl/win-x86_64/crypto/sha/sha1-x86_64.asm new file mode 100644 index 00000000000..0f5361a1b54 --- /dev/null +++ b/third_party/boringssl/win-x86_64/crypto/sha/sha1-x86_64.asm @@ -0,0 +1,2616 @@ +default rel +%define XMMWORD +%define YMMWORD +%define ZMMWORD +section .text code align=64 + +EXTERN OPENSSL_ia32cap_P + +global sha1_block_data_order + +ALIGN 16 +sha1_block_data_order: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_sha1_block_data_order: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + mov r9d,DWORD[((OPENSSL_ia32cap_P+0))] + mov r8d,DWORD[((OPENSSL_ia32cap_P+4))] + mov r10d,DWORD[((OPENSSL_ia32cap_P+8))] + test r8d,512 + jz NEAR $L$ialu + jmp NEAR _ssse3_shortcut + +ALIGN 16 +$L$ialu: + mov rax,rsp + push rbx + push rbp + push r12 + push r13 + push r14 + mov r8,rdi + sub rsp,72 + mov r9,rsi + and rsp,-64 + mov r10,rdx + mov QWORD[64+rsp],rax +$L$prologue: + + mov esi,DWORD[r8] + mov edi,DWORD[4+r8] + mov r11d,DWORD[8+r8] + mov r12d,DWORD[12+r8] + mov r13d,DWORD[16+r8] + jmp NEAR $L$loop + +ALIGN 16 +$L$loop: + mov edx,DWORD[r9] + bswap edx + mov ebp,DWORD[4+r9] + mov eax,r12d + mov DWORD[rsp],edx + mov ecx,esi + bswap ebp + xor eax,r11d + rol ecx,5 + and eax,edi + lea r13d,[1518500249+r13*1+rdx] + add r13d,ecx + xor eax,r12d + rol edi,30 + add r13d,eax + mov r14d,DWORD[8+r9] + mov eax,r11d + mov DWORD[4+rsp],ebp + mov ecx,r13d + bswap r14d + xor eax,edi + rol ecx,5 + and eax,esi + lea r12d,[1518500249+r12*1+rbp] + add r12d,ecx + xor eax,r11d + rol esi,30 + add r12d,eax + mov edx,DWORD[12+r9] + mov eax,edi + mov DWORD[8+rsp],r14d + mov ecx,r12d + bswap edx + xor eax,esi + rol ecx,5 + and eax,r13d + lea r11d,[1518500249+r11*1+r14] + add r11d,ecx + xor eax,edi + rol r13d,30 + add r11d,eax + mov ebp,DWORD[16+r9] + mov eax,esi + mov DWORD[12+rsp],edx + mov ecx,r11d + bswap ebp + xor eax,r13d + rol ecx,5 + and eax,r12d + lea edi,[1518500249+rdi*1+rdx] + add edi,ecx + xor eax,esi + rol r12d,30 + add edi,eax + mov r14d,DWORD[20+r9] + mov eax,r13d + mov DWORD[16+rsp],ebp + mov ecx,edi + bswap r14d + xor eax,r12d + rol ecx,5 + and eax,r11d + lea esi,[1518500249+rsi*1+rbp] + add esi,ecx + xor eax,r13d + rol r11d,30 + add esi,eax + mov edx,DWORD[24+r9] + mov eax,r12d + mov DWORD[20+rsp],r14d + mov ecx,esi + bswap edx + xor eax,r11d + rol ecx,5 + and eax,edi + lea r13d,[1518500249+r13*1+r14] + add r13d,ecx + xor eax,r12d + rol edi,30 + add r13d,eax + mov ebp,DWORD[28+r9] + mov eax,r11d + mov DWORD[24+rsp],edx + mov ecx,r13d + bswap ebp + xor eax,edi + rol ecx,5 + and eax,esi + lea r12d,[1518500249+r12*1+rdx] + add r12d,ecx + xor eax,r11d + rol esi,30 + add r12d,eax + mov r14d,DWORD[32+r9] + mov eax,edi + mov DWORD[28+rsp],ebp + mov ecx,r12d + bswap r14d + xor eax,esi + rol ecx,5 + and eax,r13d + lea r11d,[1518500249+r11*1+rbp] + add r11d,ecx + xor eax,edi + rol r13d,30 + add r11d,eax + mov edx,DWORD[36+r9] + mov eax,esi + mov DWORD[32+rsp],r14d + mov ecx,r11d + bswap edx + xor eax,r13d + rol ecx,5 + and eax,r12d + lea edi,[1518500249+rdi*1+r14] + add edi,ecx + xor eax,esi + rol r12d,30 + add edi,eax + mov ebp,DWORD[40+r9] + mov eax,r13d + mov DWORD[36+rsp],edx + mov ecx,edi + bswap ebp + xor eax,r12d + rol ecx,5 + and eax,r11d + lea esi,[1518500249+rsi*1+rdx] + add esi,ecx + xor eax,r13d + rol r11d,30 + add esi,eax + mov r14d,DWORD[44+r9] + mov eax,r12d + mov DWORD[40+rsp],ebp + mov ecx,esi + bswap r14d + xor eax,r11d + rol ecx,5 + and eax,edi + lea r13d,[1518500249+r13*1+rbp] + add r13d,ecx + xor eax,r12d + rol edi,30 + add r13d,eax + mov edx,DWORD[48+r9] + mov eax,r11d + mov DWORD[44+rsp],r14d + mov ecx,r13d + bswap edx + xor eax,edi + rol ecx,5 + and eax,esi + lea r12d,[1518500249+r12*1+r14] + add r12d,ecx + xor eax,r11d + rol esi,30 + add r12d,eax + mov ebp,DWORD[52+r9] + mov eax,edi + mov DWORD[48+rsp],edx + mov ecx,r12d + bswap ebp + xor eax,esi + rol ecx,5 + and eax,r13d + lea r11d,[1518500249+r11*1+rdx] + add r11d,ecx + xor eax,edi + rol r13d,30 + add r11d,eax + mov r14d,DWORD[56+r9] + mov eax,esi + mov DWORD[52+rsp],ebp + mov ecx,r11d + bswap r14d + xor eax,r13d + rol ecx,5 + and eax,r12d + lea edi,[1518500249+rdi*1+rbp] + add edi,ecx + xor eax,esi + rol r12d,30 + add edi,eax + mov edx,DWORD[60+r9] + mov eax,r13d + mov DWORD[56+rsp],r14d + mov ecx,edi + bswap edx + xor eax,r12d + rol ecx,5 + and eax,r11d + lea esi,[1518500249+rsi*1+r14] + add esi,ecx + xor eax,r13d + rol r11d,30 + add esi,eax + xor ebp,DWORD[rsp] + mov eax,r12d + mov DWORD[60+rsp],edx + mov ecx,esi + xor ebp,DWORD[8+rsp] + xor eax,r11d + rol ecx,5 + xor ebp,DWORD[32+rsp] + and eax,edi + lea r13d,[1518500249+r13*1+rdx] + rol edi,30 + xor eax,r12d + add r13d,ecx + rol ebp,1 + add r13d,eax + xor r14d,DWORD[4+rsp] + mov eax,r11d + mov DWORD[rsp],ebp + mov ecx,r13d + xor r14d,DWORD[12+rsp] + xor eax,edi + rol ecx,5 + xor r14d,DWORD[36+rsp] + and eax,esi + lea r12d,[1518500249+r12*1+rbp] + rol esi,30 + xor eax,r11d + add r12d,ecx + rol r14d,1 + add r12d,eax + xor edx,DWORD[8+rsp] + mov eax,edi + mov DWORD[4+rsp],r14d + mov ecx,r12d + xor edx,DWORD[16+rsp] + xor eax,esi + rol ecx,5 + xor edx,DWORD[40+rsp] + and eax,r13d + lea r11d,[1518500249+r11*1+r14] + rol r13d,30 + xor eax,edi + add r11d,ecx + rol edx,1 + add r11d,eax + xor ebp,DWORD[12+rsp] + mov eax,esi + mov DWORD[8+rsp],edx + mov ecx,r11d + xor ebp,DWORD[20+rsp] + xor eax,r13d + rol ecx,5 + xor ebp,DWORD[44+rsp] + and eax,r12d + lea edi,[1518500249+rdi*1+rdx] + rol r12d,30 + xor eax,esi + add edi,ecx + rol ebp,1 + add edi,eax + xor r14d,DWORD[16+rsp] + mov eax,r13d + mov DWORD[12+rsp],ebp + mov ecx,edi + xor r14d,DWORD[24+rsp] + xor eax,r12d + rol ecx,5 + xor r14d,DWORD[48+rsp] + and eax,r11d + lea esi,[1518500249+rsi*1+rbp] + rol r11d,30 + xor eax,r13d + add esi,ecx + rol r14d,1 + add esi,eax + xor edx,DWORD[20+rsp] + mov eax,edi + mov DWORD[16+rsp],r14d + mov ecx,esi + xor edx,DWORD[28+rsp] + xor eax,r12d + rol ecx,5 + xor edx,DWORD[52+rsp] + lea r13d,[1859775393+r13*1+r14] + xor eax,r11d + add r13d,ecx + rol edi,30 + add r13d,eax + rol edx,1 + xor ebp,DWORD[24+rsp] + mov eax,esi + mov DWORD[20+rsp],edx + mov ecx,r13d + xor ebp,DWORD[32+rsp] + xor eax,r11d + rol ecx,5 + xor ebp,DWORD[56+rsp] + lea r12d,[1859775393+r12*1+rdx] + xor eax,edi + add r12d,ecx + rol esi,30 + add r12d,eax + rol ebp,1 + xor r14d,DWORD[28+rsp] + mov eax,r13d + mov DWORD[24+rsp],ebp + mov ecx,r12d + xor r14d,DWORD[36+rsp] + xor eax,edi + rol ecx,5 + xor r14d,DWORD[60+rsp] + lea r11d,[1859775393+r11*1+rbp] + xor eax,esi + add r11d,ecx + rol r13d,30 + add r11d,eax + rol r14d,1 + xor edx,DWORD[32+rsp] + mov eax,r12d + mov DWORD[28+rsp],r14d + mov ecx,r11d + xor edx,DWORD[40+rsp] + xor eax,esi + rol ecx,5 + xor edx,DWORD[rsp] + lea edi,[1859775393+rdi*1+r14] + xor eax,r13d + add edi,ecx + rol r12d,30 + add edi,eax + rol edx,1 + xor ebp,DWORD[36+rsp] + mov eax,r11d + mov DWORD[32+rsp],edx + mov ecx,edi + xor ebp,DWORD[44+rsp] + xor eax,r13d + rol ecx,5 + xor ebp,DWORD[4+rsp] + lea esi,[1859775393+rsi*1+rdx] + xor eax,r12d + add esi,ecx + rol r11d,30 + add esi,eax + rol ebp,1 + xor r14d,DWORD[40+rsp] + mov eax,edi + mov DWORD[36+rsp],ebp + mov ecx,esi + xor r14d,DWORD[48+rsp] + xor eax,r12d + rol ecx,5 + xor r14d,DWORD[8+rsp] + lea r13d,[1859775393+r13*1+rbp] + xor eax,r11d + add r13d,ecx + rol edi,30 + add r13d,eax + rol r14d,1 + xor edx,DWORD[44+rsp] + mov eax,esi + mov DWORD[40+rsp],r14d + mov ecx,r13d + xor edx,DWORD[52+rsp] + xor eax,r11d + rol ecx,5 + xor edx,DWORD[12+rsp] + lea r12d,[1859775393+r12*1+r14] + xor eax,edi + add r12d,ecx + rol esi,30 + add r12d,eax + rol edx,1 + xor ebp,DWORD[48+rsp] + mov eax,r13d + mov DWORD[44+rsp],edx + mov ecx,r12d + xor ebp,DWORD[56+rsp] + xor eax,edi + rol ecx,5 + xor ebp,DWORD[16+rsp] + lea r11d,[1859775393+r11*1+rdx] + xor eax,esi + add r11d,ecx + rol r13d,30 + add r11d,eax + rol ebp,1 + xor r14d,DWORD[52+rsp] + mov eax,r12d + mov DWORD[48+rsp],ebp + mov ecx,r11d + xor r14d,DWORD[60+rsp] + xor eax,esi + rol ecx,5 + xor r14d,DWORD[20+rsp] + lea edi,[1859775393+rdi*1+rbp] + xor eax,r13d + add edi,ecx + rol r12d,30 + add edi,eax + rol r14d,1 + xor edx,DWORD[56+rsp] + mov eax,r11d + mov DWORD[52+rsp],r14d + mov ecx,edi + xor edx,DWORD[rsp] + xor eax,r13d + rol ecx,5 + xor edx,DWORD[24+rsp] + lea esi,[1859775393+rsi*1+r14] + xor eax,r12d + add esi,ecx + rol r11d,30 + add esi,eax + rol edx,1 + xor ebp,DWORD[60+rsp] + mov eax,edi + mov DWORD[56+rsp],edx + mov ecx,esi + xor ebp,DWORD[4+rsp] + xor eax,r12d + rol ecx,5 + xor ebp,DWORD[28+rsp] + lea r13d,[1859775393+r13*1+rdx] + xor eax,r11d + add r13d,ecx + rol edi,30 + add r13d,eax + rol ebp,1 + xor r14d,DWORD[rsp] + mov eax,esi + mov DWORD[60+rsp],ebp + mov ecx,r13d + xor r14d,DWORD[8+rsp] + xor eax,r11d + rol ecx,5 + xor r14d,DWORD[32+rsp] + lea r12d,[1859775393+r12*1+rbp] + xor eax,edi + add r12d,ecx + rol esi,30 + add r12d,eax + rol r14d,1 + xor edx,DWORD[4+rsp] + mov eax,r13d + mov DWORD[rsp],r14d + mov ecx,r12d + xor edx,DWORD[12+rsp] + xor eax,edi + rol ecx,5 + xor edx,DWORD[36+rsp] + lea r11d,[1859775393+r11*1+r14] + xor eax,esi + add r11d,ecx + rol r13d,30 + add r11d,eax + rol edx,1 + xor ebp,DWORD[8+rsp] + mov eax,r12d + mov DWORD[4+rsp],edx + mov ecx,r11d + xor ebp,DWORD[16+rsp] + xor eax,esi + rol ecx,5 + xor ebp,DWORD[40+rsp] + lea edi,[1859775393+rdi*1+rdx] + xor eax,r13d + add edi,ecx + rol r12d,30 + add edi,eax + rol ebp,1 + xor r14d,DWORD[12+rsp] + mov eax,r11d + mov DWORD[8+rsp],ebp + mov ecx,edi + xor r14d,DWORD[20+rsp] + xor eax,r13d + rol ecx,5 + xor r14d,DWORD[44+rsp] + lea esi,[1859775393+rsi*1+rbp] + xor eax,r12d + add esi,ecx + rol r11d,30 + add esi,eax + rol r14d,1 + xor edx,DWORD[16+rsp] + mov eax,edi + mov DWORD[12+rsp],r14d + mov ecx,esi + xor edx,DWORD[24+rsp] + xor eax,r12d + rol ecx,5 + xor edx,DWORD[48+rsp] + lea r13d,[1859775393+r13*1+r14] + xor eax,r11d + add r13d,ecx + rol edi,30 + add r13d,eax + rol edx,1 + xor ebp,DWORD[20+rsp] + mov eax,esi + mov DWORD[16+rsp],edx + mov ecx,r13d + xor ebp,DWORD[28+rsp] + xor eax,r11d + rol ecx,5 + xor ebp,DWORD[52+rsp] + lea r12d,[1859775393+r12*1+rdx] + xor eax,edi + add r12d,ecx + rol esi,30 + add r12d,eax + rol ebp,1 + xor r14d,DWORD[24+rsp] + mov eax,r13d + mov DWORD[20+rsp],ebp + mov ecx,r12d + xor r14d,DWORD[32+rsp] + xor eax,edi + rol ecx,5 + xor r14d,DWORD[56+rsp] + lea r11d,[1859775393+r11*1+rbp] + xor eax,esi + add r11d,ecx + rol r13d,30 + add r11d,eax + rol r14d,1 + xor edx,DWORD[28+rsp] + mov eax,r12d + mov DWORD[24+rsp],r14d + mov ecx,r11d + xor edx,DWORD[36+rsp] + xor eax,esi + rol ecx,5 + xor edx,DWORD[60+rsp] + lea edi,[1859775393+rdi*1+r14] + xor eax,r13d + add edi,ecx + rol r12d,30 + add edi,eax + rol edx,1 + xor ebp,DWORD[32+rsp] + mov eax,r11d + mov DWORD[28+rsp],edx + mov ecx,edi + xor ebp,DWORD[40+rsp] + xor eax,r13d + rol ecx,5 + xor ebp,DWORD[rsp] + lea esi,[1859775393+rsi*1+rdx] + xor eax,r12d + add esi,ecx + rol r11d,30 + add esi,eax + rol ebp,1 + xor r14d,DWORD[36+rsp] + mov eax,r12d + mov DWORD[32+rsp],ebp + mov ebx,r12d + xor r14d,DWORD[44+rsp] + and eax,r11d + mov ecx,esi + xor r14d,DWORD[4+rsp] + lea r13d,[((-1894007588))+r13*1+rbp] + xor ebx,r11d + rol ecx,5 + add r13d,eax + rol r14d,1 + and ebx,edi + add r13d,ecx + rol edi,30 + add r13d,ebx + xor edx,DWORD[40+rsp] + mov eax,r11d + mov DWORD[36+rsp],r14d + mov ebx,r11d + xor edx,DWORD[48+rsp] + and eax,edi + mov ecx,r13d + xor edx,DWORD[8+rsp] + lea r12d,[((-1894007588))+r12*1+r14] + xor ebx,edi + rol ecx,5 + add r12d,eax + rol edx,1 + and ebx,esi + add r12d,ecx + rol esi,30 + add r12d,ebx + xor ebp,DWORD[44+rsp] + mov eax,edi + mov DWORD[40+rsp],edx + mov ebx,edi + xor ebp,DWORD[52+rsp] + and eax,esi + mov ecx,r12d + xor ebp,DWORD[12+rsp] + lea r11d,[((-1894007588))+r11*1+rdx] + xor ebx,esi + rol ecx,5 + add r11d,eax + rol ebp,1 + and ebx,r13d + add r11d,ecx + rol r13d,30 + add r11d,ebx + xor r14d,DWORD[48+rsp] + mov eax,esi + mov DWORD[44+rsp],ebp + mov ebx,esi + xor r14d,DWORD[56+rsp] + and eax,r13d + mov ecx,r11d + xor r14d,DWORD[16+rsp] + lea edi,[((-1894007588))+rdi*1+rbp] + xor ebx,r13d + rol ecx,5 + add edi,eax + rol r14d,1 + and ebx,r12d + add edi,ecx + rol r12d,30 + add edi,ebx + xor edx,DWORD[52+rsp] + mov eax,r13d + mov DWORD[48+rsp],r14d + mov ebx,r13d + xor edx,DWORD[60+rsp] + and eax,r12d + mov ecx,edi + xor edx,DWORD[20+rsp] + lea esi,[((-1894007588))+rsi*1+r14] + xor ebx,r12d + rol ecx,5 + add esi,eax + rol edx,1 + and ebx,r11d + add esi,ecx + rol r11d,30 + add esi,ebx + xor ebp,DWORD[56+rsp] + mov eax,r12d + mov DWORD[52+rsp],edx + mov ebx,r12d + xor ebp,DWORD[rsp] + and eax,r11d + mov ecx,esi + xor ebp,DWORD[24+rsp] + lea r13d,[((-1894007588))+r13*1+rdx] + xor ebx,r11d + rol ecx,5 + add r13d,eax + rol ebp,1 + and ebx,edi + add r13d,ecx + rol edi,30 + add r13d,ebx + xor r14d,DWORD[60+rsp] + mov eax,r11d + mov DWORD[56+rsp],ebp + mov ebx,r11d + xor r14d,DWORD[4+rsp] + and eax,edi + mov ecx,r13d + xor r14d,DWORD[28+rsp] + lea r12d,[((-1894007588))+r12*1+rbp] + xor ebx,edi + rol ecx,5 + add r12d,eax + rol r14d,1 + and ebx,esi + add r12d,ecx + rol esi,30 + add r12d,ebx + xor edx,DWORD[rsp] + mov eax,edi + mov DWORD[60+rsp],r14d + mov ebx,edi + xor edx,DWORD[8+rsp] + and eax,esi + mov ecx,r12d + xor edx,DWORD[32+rsp] + lea r11d,[((-1894007588))+r11*1+r14] + xor ebx,esi + rol ecx,5 + add r11d,eax + rol edx,1 + and ebx,r13d + add r11d,ecx + rol r13d,30 + add r11d,ebx + xor ebp,DWORD[4+rsp] + mov eax,esi + mov DWORD[rsp],edx + mov ebx,esi + xor ebp,DWORD[12+rsp] + and eax,r13d + mov ecx,r11d + xor ebp,DWORD[36+rsp] + lea edi,[((-1894007588))+rdi*1+rdx] + xor ebx,r13d + rol ecx,5 + add edi,eax + rol ebp,1 + and ebx,r12d + add edi,ecx + rol r12d,30 + add edi,ebx + xor r14d,DWORD[8+rsp] + mov eax,r13d + mov DWORD[4+rsp],ebp + mov ebx,r13d + xor r14d,DWORD[16+rsp] + and eax,r12d + mov ecx,edi + xor r14d,DWORD[40+rsp] + lea esi,[((-1894007588))+rsi*1+rbp] + xor ebx,r12d + rol ecx,5 + add esi,eax + rol r14d,1 + and ebx,r11d + add esi,ecx + rol r11d,30 + add esi,ebx + xor edx,DWORD[12+rsp] + mov eax,r12d + mov DWORD[8+rsp],r14d + mov ebx,r12d + xor edx,DWORD[20+rsp] + and eax,r11d + mov ecx,esi + xor edx,DWORD[44+rsp] + lea r13d,[((-1894007588))+r13*1+r14] + xor ebx,r11d + rol ecx,5 + add r13d,eax + rol edx,1 + and ebx,edi + add r13d,ecx + rol edi,30 + add r13d,ebx + xor ebp,DWORD[16+rsp] + mov eax,r11d + mov DWORD[12+rsp],edx + mov ebx,r11d + xor ebp,DWORD[24+rsp] + and eax,edi + mov ecx,r13d + xor ebp,DWORD[48+rsp] + lea r12d,[((-1894007588))+r12*1+rdx] + xor ebx,edi + rol ecx,5 + add r12d,eax + rol ebp,1 + and ebx,esi + add r12d,ecx + rol esi,30 + add r12d,ebx + xor r14d,DWORD[20+rsp] + mov eax,edi + mov DWORD[16+rsp],ebp + mov ebx,edi + xor r14d,DWORD[28+rsp] + and eax,esi + mov ecx,r12d + xor r14d,DWORD[52+rsp] + lea r11d,[((-1894007588))+r11*1+rbp] + xor ebx,esi + rol ecx,5 + add r11d,eax + rol r14d,1 + and ebx,r13d + add r11d,ecx + rol r13d,30 + add r11d,ebx + xor edx,DWORD[24+rsp] + mov eax,esi + mov DWORD[20+rsp],r14d + mov ebx,esi + xor edx,DWORD[32+rsp] + and eax,r13d + mov ecx,r11d + xor edx,DWORD[56+rsp] + lea edi,[((-1894007588))+rdi*1+r14] + xor ebx,r13d + rol ecx,5 + add edi,eax + rol edx,1 + and ebx,r12d + add edi,ecx + rol r12d,30 + add edi,ebx + xor ebp,DWORD[28+rsp] + mov eax,r13d + mov DWORD[24+rsp],edx + mov ebx,r13d + xor ebp,DWORD[36+rsp] + and eax,r12d + mov ecx,edi + xor ebp,DWORD[60+rsp] + lea esi,[((-1894007588))+rsi*1+rdx] + xor ebx,r12d + rol ecx,5 + add esi,eax + rol ebp,1 + and ebx,r11d + add esi,ecx + rol r11d,30 + add esi,ebx + xor r14d,DWORD[32+rsp] + mov eax,r12d + mov DWORD[28+rsp],ebp + mov ebx,r12d + xor r14d,DWORD[40+rsp] + and eax,r11d + mov ecx,esi + xor r14d,DWORD[rsp] + lea r13d,[((-1894007588))+r13*1+rbp] + xor ebx,r11d + rol ecx,5 + add r13d,eax + rol r14d,1 + and ebx,edi + add r13d,ecx + rol edi,30 + add r13d,ebx + xor edx,DWORD[36+rsp] + mov eax,r11d + mov DWORD[32+rsp],r14d + mov ebx,r11d + xor edx,DWORD[44+rsp] + and eax,edi + mov ecx,r13d + xor edx,DWORD[4+rsp] + lea r12d,[((-1894007588))+r12*1+r14] + xor ebx,edi + rol ecx,5 + add r12d,eax + rol edx,1 + and ebx,esi + add r12d,ecx + rol esi,30 + add r12d,ebx + xor ebp,DWORD[40+rsp] + mov eax,edi + mov DWORD[36+rsp],edx + mov ebx,edi + xor ebp,DWORD[48+rsp] + and eax,esi + mov ecx,r12d + xor ebp,DWORD[8+rsp] + lea r11d,[((-1894007588))+r11*1+rdx] + xor ebx,esi + rol ecx,5 + add r11d,eax + rol ebp,1 + and ebx,r13d + add r11d,ecx + rol r13d,30 + add r11d,ebx + xor r14d,DWORD[44+rsp] + mov eax,esi + mov DWORD[40+rsp],ebp + mov ebx,esi + xor r14d,DWORD[52+rsp] + and eax,r13d + mov ecx,r11d + xor r14d,DWORD[12+rsp] + lea edi,[((-1894007588))+rdi*1+rbp] + xor ebx,r13d + rol ecx,5 + add edi,eax + rol r14d,1 + and ebx,r12d + add edi,ecx + rol r12d,30 + add edi,ebx + xor edx,DWORD[48+rsp] + mov eax,r13d + mov DWORD[44+rsp],r14d + mov ebx,r13d + xor edx,DWORD[56+rsp] + and eax,r12d + mov ecx,edi + xor edx,DWORD[16+rsp] + lea esi,[((-1894007588))+rsi*1+r14] + xor ebx,r12d + rol ecx,5 + add esi,eax + rol edx,1 + and ebx,r11d + add esi,ecx + rol r11d,30 + add esi,ebx + xor ebp,DWORD[52+rsp] + mov eax,edi + mov DWORD[48+rsp],edx + mov ecx,esi + xor ebp,DWORD[60+rsp] + xor eax,r12d + rol ecx,5 + xor ebp,DWORD[20+rsp] + lea r13d,[((-899497514))+r13*1+rdx] + xor eax,r11d + add r13d,ecx + rol edi,30 + add r13d,eax + rol ebp,1 + xor r14d,DWORD[56+rsp] + mov eax,esi + mov DWORD[52+rsp],ebp + mov ecx,r13d + xor r14d,DWORD[rsp] + xor eax,r11d + rol ecx,5 + xor r14d,DWORD[24+rsp] + lea r12d,[((-899497514))+r12*1+rbp] + xor eax,edi + add r12d,ecx + rol esi,30 + add r12d,eax + rol r14d,1 + xor edx,DWORD[60+rsp] + mov eax,r13d + mov DWORD[56+rsp],r14d + mov ecx,r12d + xor edx,DWORD[4+rsp] + xor eax,edi + rol ecx,5 + xor edx,DWORD[28+rsp] + lea r11d,[((-899497514))+r11*1+r14] + xor eax,esi + add r11d,ecx + rol r13d,30 + add r11d,eax + rol edx,1 + xor ebp,DWORD[rsp] + mov eax,r12d + mov DWORD[60+rsp],edx + mov ecx,r11d + xor ebp,DWORD[8+rsp] + xor eax,esi + rol ecx,5 + xor ebp,DWORD[32+rsp] + lea edi,[((-899497514))+rdi*1+rdx] + xor eax,r13d + add edi,ecx + rol r12d,30 + add edi,eax + rol ebp,1 + xor r14d,DWORD[4+rsp] + mov eax,r11d + mov DWORD[rsp],ebp + mov ecx,edi + xor r14d,DWORD[12+rsp] + xor eax,r13d + rol ecx,5 + xor r14d,DWORD[36+rsp] + lea esi,[((-899497514))+rsi*1+rbp] + xor eax,r12d + add esi,ecx + rol r11d,30 + add esi,eax + rol r14d,1 + xor edx,DWORD[8+rsp] + mov eax,edi + mov DWORD[4+rsp],r14d + mov ecx,esi + xor edx,DWORD[16+rsp] + xor eax,r12d + rol ecx,5 + xor edx,DWORD[40+rsp] + lea r13d,[((-899497514))+r13*1+r14] + xor eax,r11d + add r13d,ecx + rol edi,30 + add r13d,eax + rol edx,1 + xor ebp,DWORD[12+rsp] + mov eax,esi + mov DWORD[8+rsp],edx + mov ecx,r13d + xor ebp,DWORD[20+rsp] + xor eax,r11d + rol ecx,5 + xor ebp,DWORD[44+rsp] + lea r12d,[((-899497514))+r12*1+rdx] + xor eax,edi + add r12d,ecx + rol esi,30 + add r12d,eax + rol ebp,1 + xor r14d,DWORD[16+rsp] + mov eax,r13d + mov DWORD[12+rsp],ebp + mov ecx,r12d + xor r14d,DWORD[24+rsp] + xor eax,edi + rol ecx,5 + xor r14d,DWORD[48+rsp] + lea r11d,[((-899497514))+r11*1+rbp] + xor eax,esi + add r11d,ecx + rol r13d,30 + add r11d,eax + rol r14d,1 + xor edx,DWORD[20+rsp] + mov eax,r12d + mov DWORD[16+rsp],r14d + mov ecx,r11d + xor edx,DWORD[28+rsp] + xor eax,esi + rol ecx,5 + xor edx,DWORD[52+rsp] + lea edi,[((-899497514))+rdi*1+r14] + xor eax,r13d + add edi,ecx + rol r12d,30 + add edi,eax + rol edx,1 + xor ebp,DWORD[24+rsp] + mov eax,r11d + mov DWORD[20+rsp],edx + mov ecx,edi + xor ebp,DWORD[32+rsp] + xor eax,r13d + rol ecx,5 + xor ebp,DWORD[56+rsp] + lea esi,[((-899497514))+rsi*1+rdx] + xor eax,r12d + add esi,ecx + rol r11d,30 + add esi,eax + rol ebp,1 + xor r14d,DWORD[28+rsp] + mov eax,edi + mov DWORD[24+rsp],ebp + mov ecx,esi + xor r14d,DWORD[36+rsp] + xor eax,r12d + rol ecx,5 + xor r14d,DWORD[60+rsp] + lea r13d,[((-899497514))+r13*1+rbp] + xor eax,r11d + add r13d,ecx + rol edi,30 + add r13d,eax + rol r14d,1 + xor edx,DWORD[32+rsp] + mov eax,esi + mov DWORD[28+rsp],r14d + mov ecx,r13d + xor edx,DWORD[40+rsp] + xor eax,r11d + rol ecx,5 + xor edx,DWORD[rsp] + lea r12d,[((-899497514))+r12*1+r14] + xor eax,edi + add r12d,ecx + rol esi,30 + add r12d,eax + rol edx,1 + xor ebp,DWORD[36+rsp] + mov eax,r13d + + mov ecx,r12d + xor ebp,DWORD[44+rsp] + xor eax,edi + rol ecx,5 + xor ebp,DWORD[4+rsp] + lea r11d,[((-899497514))+r11*1+rdx] + xor eax,esi + add r11d,ecx + rol r13d,30 + add r11d,eax + rol ebp,1 + xor r14d,DWORD[40+rsp] + mov eax,r12d + + mov ecx,r11d + xor r14d,DWORD[48+rsp] + xor eax,esi + rol ecx,5 + xor r14d,DWORD[8+rsp] + lea edi,[((-899497514))+rdi*1+rbp] + xor eax,r13d + add edi,ecx + rol r12d,30 + add edi,eax + rol r14d,1 + xor edx,DWORD[44+rsp] + mov eax,r11d + + mov ecx,edi + xor edx,DWORD[52+rsp] + xor eax,r13d + rol ecx,5 + xor edx,DWORD[12+rsp] + lea esi,[((-899497514))+rsi*1+r14] + xor eax,r12d + add esi,ecx + rol r11d,30 + add esi,eax + rol edx,1 + xor ebp,DWORD[48+rsp] + mov eax,edi + + mov ecx,esi + xor ebp,DWORD[56+rsp] + xor eax,r12d + rol ecx,5 + xor ebp,DWORD[16+rsp] + lea r13d,[((-899497514))+r13*1+rdx] + xor eax,r11d + add r13d,ecx + rol edi,30 + add r13d,eax + rol ebp,1 + xor r14d,DWORD[52+rsp] + mov eax,esi + + mov ecx,r13d + xor r14d,DWORD[60+rsp] + xor eax,r11d + rol ecx,5 + xor r14d,DWORD[20+rsp] + lea r12d,[((-899497514))+r12*1+rbp] + xor eax,edi + add r12d,ecx + rol esi,30 + add r12d,eax + rol r14d,1 + xor edx,DWORD[56+rsp] + mov eax,r13d + + mov ecx,r12d + xor edx,DWORD[rsp] + xor eax,edi + rol ecx,5 + xor edx,DWORD[24+rsp] + lea r11d,[((-899497514))+r11*1+r14] + xor eax,esi + add r11d,ecx + rol r13d,30 + add r11d,eax + rol edx,1 + xor ebp,DWORD[60+rsp] + mov eax,r12d + + mov ecx,r11d + xor ebp,DWORD[4+rsp] + xor eax,esi + rol ecx,5 + xor ebp,DWORD[28+rsp] + lea edi,[((-899497514))+rdi*1+rdx] + xor eax,r13d + add edi,ecx + rol r12d,30 + add edi,eax + rol ebp,1 + mov eax,r11d + mov ecx,edi + xor eax,r13d + lea esi,[((-899497514))+rsi*1+rbp] + rol ecx,5 + xor eax,r12d + add esi,ecx + rol r11d,30 + add esi,eax + add esi,DWORD[r8] + add edi,DWORD[4+r8] + add r11d,DWORD[8+r8] + add r12d,DWORD[12+r8] + add r13d,DWORD[16+r8] + mov DWORD[r8],esi + mov DWORD[4+r8],edi + mov DWORD[8+r8],r11d + mov DWORD[12+r8],r12d + mov DWORD[16+r8],r13d + + sub r10,1 + lea r9,[64+r9] + jnz NEAR $L$loop + + mov rsi,QWORD[64+rsp] + mov r14,QWORD[((-40))+rsi] + mov r13,QWORD[((-32))+rsi] + mov r12,QWORD[((-24))+rsi] + mov rbp,QWORD[((-16))+rsi] + mov rbx,QWORD[((-8))+rsi] + lea rsp,[rsi] +$L$epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_sha1_block_data_order: + +ALIGN 16 +sha1_block_data_order_ssse3: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_sha1_block_data_order_ssse3: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + +_ssse3_shortcut: + mov rax,rsp + push rbx + push rbp + push r12 + push r13 + push r14 + lea rsp,[((-160))+rsp] + movaps XMMWORD[(-40-96)+rax],xmm6 + movaps XMMWORD[(-40-80)+rax],xmm7 + movaps XMMWORD[(-40-64)+rax],xmm8 + movaps XMMWORD[(-40-48)+rax],xmm9 + movaps XMMWORD[(-40-32)+rax],xmm10 + movaps XMMWORD[(-40-16)+rax],xmm11 +$L$prologue_ssse3: + mov r14,rax + and rsp,-64 + mov r8,rdi + mov r9,rsi + mov r10,rdx + + shl r10,6 + add r10,r9 + lea r11,[((K_XX_XX+64))] + + mov eax,DWORD[r8] + mov ebx,DWORD[4+r8] + mov ecx,DWORD[8+r8] + mov edx,DWORD[12+r8] + mov esi,ebx + mov ebp,DWORD[16+r8] + mov edi,ecx + xor edi,edx + and esi,edi + + movdqa xmm6,XMMWORD[64+r11] + movdqa xmm9,XMMWORD[((-64))+r11] + movdqu xmm0,XMMWORD[r9] + movdqu xmm1,XMMWORD[16+r9] + movdqu xmm2,XMMWORD[32+r9] + movdqu xmm3,XMMWORD[48+r9] +DB 102,15,56,0,198 +DB 102,15,56,0,206 +DB 102,15,56,0,214 + add r9,64 + paddd xmm0,xmm9 +DB 102,15,56,0,222 + paddd xmm1,xmm9 + paddd xmm2,xmm9 + movdqa XMMWORD[rsp],xmm0 + psubd xmm0,xmm9 + movdqa XMMWORD[16+rsp],xmm1 + psubd xmm1,xmm9 + movdqa XMMWORD[32+rsp],xmm2 + psubd xmm2,xmm9 + jmp NEAR $L$oop_ssse3 +ALIGN 16 +$L$oop_ssse3: + ror ebx,2 + pshufd xmm4,xmm0,238 + xor esi,edx + movdqa xmm8,xmm3 + paddd xmm9,xmm3 + mov edi,eax + add ebp,DWORD[rsp] + punpcklqdq xmm4,xmm1 + xor ebx,ecx + rol eax,5 + add ebp,esi + psrldq xmm8,4 + and edi,ebx + xor ebx,ecx + pxor xmm4,xmm0 + add ebp,eax + ror eax,7 + pxor xmm8,xmm2 + xor edi,ecx + mov esi,ebp + add edx,DWORD[4+rsp] + pxor xmm4,xmm8 + xor eax,ebx + rol ebp,5 + movdqa XMMWORD[48+rsp],xmm9 + add edx,edi + and esi,eax + movdqa xmm10,xmm4 + xor eax,ebx + add edx,ebp + ror ebp,7 + movdqa xmm8,xmm4 + xor esi,ebx + pslldq xmm10,12 + paddd xmm4,xmm4 + mov edi,edx + add ecx,DWORD[8+rsp] + psrld xmm8,31 + xor ebp,eax + rol edx,5 + add ecx,esi + movdqa xmm9,xmm10 + and edi,ebp + xor ebp,eax + psrld xmm10,30 + add ecx,edx + ror edx,7 + por xmm4,xmm8 + xor edi,eax + mov esi,ecx + add ebx,DWORD[12+rsp] + pslld xmm9,2 + pxor xmm4,xmm10 + xor edx,ebp + movdqa xmm10,XMMWORD[((-64))+r11] + rol ecx,5 + add ebx,edi + and esi,edx + pxor xmm4,xmm9 + xor edx,ebp + add ebx,ecx + ror ecx,7 + pshufd xmm5,xmm1,238 + xor esi,ebp + movdqa xmm9,xmm4 + paddd xmm10,xmm4 + mov edi,ebx + add eax,DWORD[16+rsp] + punpcklqdq xmm5,xmm2 + xor ecx,edx + rol ebx,5 + add eax,esi + psrldq xmm9,4 + and edi,ecx + xor ecx,edx + pxor xmm5,xmm1 + add eax,ebx + ror ebx,7 + pxor xmm9,xmm3 + xor edi,edx + mov esi,eax + add ebp,DWORD[20+rsp] + pxor xmm5,xmm9 + xor ebx,ecx + rol eax,5 + movdqa XMMWORD[rsp],xmm10 + add ebp,edi + and esi,ebx + movdqa xmm8,xmm5 + xor ebx,ecx + add ebp,eax + ror eax,7 + movdqa xmm9,xmm5 + xor esi,ecx + pslldq xmm8,12 + paddd xmm5,xmm5 + mov edi,ebp + add edx,DWORD[24+rsp] + psrld xmm9,31 + xor eax,ebx + rol ebp,5 + add edx,esi + movdqa xmm10,xmm8 + and edi,eax + xor eax,ebx + psrld xmm8,30 + add edx,ebp + ror ebp,7 + por xmm5,xmm9 + xor edi,ebx + mov esi,edx + add ecx,DWORD[28+rsp] + pslld xmm10,2 + pxor xmm5,xmm8 + xor ebp,eax + movdqa xmm8,XMMWORD[((-32))+r11] + rol edx,5 + add ecx,edi + and esi,ebp + pxor xmm5,xmm10 + xor ebp,eax + add ecx,edx + ror edx,7 + pshufd xmm6,xmm2,238 + xor esi,eax + movdqa xmm10,xmm5 + paddd xmm8,xmm5 + mov edi,ecx + add ebx,DWORD[32+rsp] + punpcklqdq xmm6,xmm3 + xor edx,ebp + rol ecx,5 + add ebx,esi + psrldq xmm10,4 + and edi,edx + xor edx,ebp + pxor xmm6,xmm2 + add ebx,ecx + ror ecx,7 + pxor xmm10,xmm4 + xor edi,ebp + mov esi,ebx + add eax,DWORD[36+rsp] + pxor xmm6,xmm10 + xor ecx,edx + rol ebx,5 + movdqa XMMWORD[16+rsp],xmm8 + add eax,edi + and esi,ecx + movdqa xmm9,xmm6 + xor ecx,edx + add eax,ebx + ror ebx,7 + movdqa xmm10,xmm6 + xor esi,edx + pslldq xmm9,12 + paddd xmm6,xmm6 + mov edi,eax + add ebp,DWORD[40+rsp] + psrld xmm10,31 + xor ebx,ecx + rol eax,5 + add ebp,esi + movdqa xmm8,xmm9 + and edi,ebx + xor ebx,ecx + psrld xmm9,30 + add ebp,eax + ror eax,7 + por xmm6,xmm10 + xor edi,ecx + mov esi,ebp + add edx,DWORD[44+rsp] + pslld xmm8,2 + pxor xmm6,xmm9 + xor eax,ebx + movdqa xmm9,XMMWORD[((-32))+r11] + rol ebp,5 + add edx,edi + and esi,eax + pxor xmm6,xmm8 + xor eax,ebx + add edx,ebp + ror ebp,7 + pshufd xmm7,xmm3,238 + xor esi,ebx + movdqa xmm8,xmm6 + paddd xmm9,xmm6 + mov edi,edx + add ecx,DWORD[48+rsp] + punpcklqdq xmm7,xmm4 + xor ebp,eax + rol edx,5 + add ecx,esi + psrldq xmm8,4 + and edi,ebp + xor ebp,eax + pxor xmm7,xmm3 + add ecx,edx + ror edx,7 + pxor xmm8,xmm5 + xor edi,eax + mov esi,ecx + add ebx,DWORD[52+rsp] + pxor xmm7,xmm8 + xor edx,ebp + rol ecx,5 + movdqa XMMWORD[32+rsp],xmm9 + add ebx,edi + and esi,edx + movdqa xmm10,xmm7 + xor edx,ebp + add ebx,ecx + ror ecx,7 + movdqa xmm8,xmm7 + xor esi,ebp + pslldq xmm10,12 + paddd xmm7,xmm7 + mov edi,ebx + add eax,DWORD[56+rsp] + psrld xmm8,31 + xor ecx,edx + rol ebx,5 + add eax,esi + movdqa xmm9,xmm10 + and edi,ecx + xor ecx,edx + psrld xmm10,30 + add eax,ebx + ror ebx,7 + por xmm7,xmm8 + xor edi,edx + mov esi,eax + add ebp,DWORD[60+rsp] + pslld xmm9,2 + pxor xmm7,xmm10 + xor ebx,ecx + movdqa xmm10,XMMWORD[((-32))+r11] + rol eax,5 + add ebp,edi + and esi,ebx + pxor xmm7,xmm9 + pshufd xmm9,xmm6,238 + xor ebx,ecx + add ebp,eax + ror eax,7 + pxor xmm0,xmm4 + xor esi,ecx + mov edi,ebp + add edx,DWORD[rsp] + punpcklqdq xmm9,xmm7 + xor eax,ebx + rol ebp,5 + pxor xmm0,xmm1 + add edx,esi + and edi,eax + movdqa xmm8,xmm10 + xor eax,ebx + paddd xmm10,xmm7 + add edx,ebp + pxor xmm0,xmm9 + ror ebp,7 + xor edi,ebx + mov esi,edx + add ecx,DWORD[4+rsp] + movdqa xmm9,xmm0 + xor ebp,eax + rol edx,5 + movdqa XMMWORD[48+rsp],xmm10 + add ecx,edi + and esi,ebp + xor ebp,eax + pslld xmm0,2 + add ecx,edx + ror edx,7 + psrld xmm9,30 + xor esi,eax + mov edi,ecx + add ebx,DWORD[8+rsp] + por xmm0,xmm9 + xor edx,ebp + rol ecx,5 + pshufd xmm10,xmm7,238 + add ebx,esi + and edi,edx + xor edx,ebp + add ebx,ecx + add eax,DWORD[12+rsp] + xor edi,ebp + mov esi,ebx + rol ebx,5 + add eax,edi + xor esi,edx + ror ecx,7 + add eax,ebx + pxor xmm1,xmm5 + add ebp,DWORD[16+rsp] + xor esi,ecx + punpcklqdq xmm10,xmm0 + mov edi,eax + rol eax,5 + pxor xmm1,xmm2 + add ebp,esi + xor edi,ecx + movdqa xmm9,xmm8 + ror ebx,7 + paddd xmm8,xmm0 + add ebp,eax + pxor xmm1,xmm10 + add edx,DWORD[20+rsp] + xor edi,ebx + mov esi,ebp + rol ebp,5 + movdqa xmm10,xmm1 + add edx,edi + xor esi,ebx + movdqa XMMWORD[rsp],xmm8 + ror eax,7 + add edx,ebp + add ecx,DWORD[24+rsp] + pslld xmm1,2 + xor esi,eax + mov edi,edx + psrld xmm10,30 + rol edx,5 + add ecx,esi + xor edi,eax + ror ebp,7 + por xmm1,xmm10 + add ecx,edx + add ebx,DWORD[28+rsp] + pshufd xmm8,xmm0,238 + xor edi,ebp + mov esi,ecx + rol ecx,5 + add ebx,edi + xor esi,ebp + ror edx,7 + add ebx,ecx + pxor xmm2,xmm6 + add eax,DWORD[32+rsp] + xor esi,edx + punpcklqdq xmm8,xmm1 + mov edi,ebx + rol ebx,5 + pxor xmm2,xmm3 + add eax,esi + xor edi,edx + movdqa xmm10,XMMWORD[r11] + ror ecx,7 + paddd xmm9,xmm1 + add eax,ebx + pxor xmm2,xmm8 + add ebp,DWORD[36+rsp] + xor edi,ecx + mov esi,eax + rol eax,5 + movdqa xmm8,xmm2 + add ebp,edi + xor esi,ecx + movdqa XMMWORD[16+rsp],xmm9 + ror ebx,7 + add ebp,eax + add edx,DWORD[40+rsp] + pslld xmm2,2 + xor esi,ebx + mov edi,ebp + psrld xmm8,30 + rol ebp,5 + add edx,esi + xor edi,ebx + ror eax,7 + por xmm2,xmm8 + add edx,ebp + add ecx,DWORD[44+rsp] + pshufd xmm9,xmm1,238 + xor edi,eax + mov esi,edx + rol edx,5 + add ecx,edi + xor esi,eax + ror ebp,7 + add ecx,edx + pxor xmm3,xmm7 + add ebx,DWORD[48+rsp] + xor esi,ebp + punpcklqdq xmm9,xmm2 + mov edi,ecx + rol ecx,5 + pxor xmm3,xmm4 + add ebx,esi + xor edi,ebp + movdqa xmm8,xmm10 + ror edx,7 + paddd xmm10,xmm2 + add ebx,ecx + pxor xmm3,xmm9 + add eax,DWORD[52+rsp] + xor edi,edx + mov esi,ebx + rol ebx,5 + movdqa xmm9,xmm3 + add eax,edi + xor esi,edx + movdqa XMMWORD[32+rsp],xmm10 + ror ecx,7 + add eax,ebx + add ebp,DWORD[56+rsp] + pslld xmm3,2 + xor esi,ecx + mov edi,eax + psrld xmm9,30 + rol eax,5 + add ebp,esi + xor edi,ecx + ror ebx,7 + por xmm3,xmm9 + add ebp,eax + add edx,DWORD[60+rsp] + pshufd xmm10,xmm2,238 + xor edi,ebx + mov esi,ebp + rol ebp,5 + add edx,edi + xor esi,ebx + ror eax,7 + add edx,ebp + pxor xmm4,xmm0 + add ecx,DWORD[rsp] + xor esi,eax + punpcklqdq xmm10,xmm3 + mov edi,edx + rol edx,5 + pxor xmm4,xmm5 + add ecx,esi + xor edi,eax + movdqa xmm9,xmm8 + ror ebp,7 + paddd xmm8,xmm3 + add ecx,edx + pxor xmm4,xmm10 + add ebx,DWORD[4+rsp] + xor edi,ebp + mov esi,ecx + rol ecx,5 + movdqa xmm10,xmm4 + add ebx,edi + xor esi,ebp + movdqa XMMWORD[48+rsp],xmm8 + ror edx,7 + add ebx,ecx + add eax,DWORD[8+rsp] + pslld xmm4,2 + xor esi,edx + mov edi,ebx + psrld xmm10,30 + rol ebx,5 + add eax,esi + xor edi,edx + ror ecx,7 + por xmm4,xmm10 + add eax,ebx + add ebp,DWORD[12+rsp] + pshufd xmm8,xmm3,238 + xor edi,ecx + mov esi,eax + rol eax,5 + add ebp,edi + xor esi,ecx + ror ebx,7 + add ebp,eax + pxor xmm5,xmm1 + add edx,DWORD[16+rsp] + xor esi,ebx + punpcklqdq xmm8,xmm4 + mov edi,ebp + rol ebp,5 + pxor xmm5,xmm6 + add edx,esi + xor edi,ebx + movdqa xmm10,xmm9 + ror eax,7 + paddd xmm9,xmm4 + add edx,ebp + pxor xmm5,xmm8 + add ecx,DWORD[20+rsp] + xor edi,eax + mov esi,edx + rol edx,5 + movdqa xmm8,xmm5 + add ecx,edi + xor esi,eax + movdqa XMMWORD[rsp],xmm9 + ror ebp,7 + add ecx,edx + add ebx,DWORD[24+rsp] + pslld xmm5,2 + xor esi,ebp + mov edi,ecx + psrld xmm8,30 + rol ecx,5 + add ebx,esi + xor edi,ebp + ror edx,7 + por xmm5,xmm8 + add ebx,ecx + add eax,DWORD[28+rsp] + pshufd xmm9,xmm4,238 + ror ecx,7 + mov esi,ebx + xor edi,edx + rol ebx,5 + add eax,edi + xor esi,ecx + xor ecx,edx + add eax,ebx + pxor xmm6,xmm2 + add ebp,DWORD[32+rsp] + and esi,ecx + xor ecx,edx + ror ebx,7 + punpcklqdq xmm9,xmm5 + mov edi,eax + xor esi,ecx + pxor xmm6,xmm7 + rol eax,5 + add ebp,esi + movdqa xmm8,xmm10 + xor edi,ebx + paddd xmm10,xmm5 + xor ebx,ecx + pxor xmm6,xmm9 + add ebp,eax + add edx,DWORD[36+rsp] + and edi,ebx + xor ebx,ecx + ror eax,7 + movdqa xmm9,xmm6 + mov esi,ebp + xor edi,ebx + movdqa XMMWORD[16+rsp],xmm10 + rol ebp,5 + add edx,edi + xor esi,eax + pslld xmm6,2 + xor eax,ebx + add edx,ebp + psrld xmm9,30 + add ecx,DWORD[40+rsp] + and esi,eax + xor eax,ebx + por xmm6,xmm9 + ror ebp,7 + mov edi,edx + xor esi,eax + rol edx,5 + pshufd xmm10,xmm5,238 + add ecx,esi + xor edi,ebp + xor ebp,eax + add ecx,edx + add ebx,DWORD[44+rsp] + and edi,ebp + xor ebp,eax + ror edx,7 + mov esi,ecx + xor edi,ebp + rol ecx,5 + add ebx,edi + xor esi,edx + xor edx,ebp + add ebx,ecx + pxor xmm7,xmm3 + add eax,DWORD[48+rsp] + and esi,edx + xor edx,ebp + ror ecx,7 + punpcklqdq xmm10,xmm6 + mov edi,ebx + xor esi,edx + pxor xmm7,xmm0 + rol ebx,5 + add eax,esi + movdqa xmm9,XMMWORD[32+r11] + xor edi,ecx + paddd xmm8,xmm6 + xor ecx,edx + pxor xmm7,xmm10 + add eax,ebx + add ebp,DWORD[52+rsp] + and edi,ecx + xor ecx,edx + ror ebx,7 + movdqa xmm10,xmm7 + mov esi,eax + xor edi,ecx + movdqa XMMWORD[32+rsp],xmm8 + rol eax,5 + add ebp,edi + xor esi,ebx + pslld xmm7,2 + xor ebx,ecx + add ebp,eax + psrld xmm10,30 + add edx,DWORD[56+rsp] + and esi,ebx + xor ebx,ecx + por xmm7,xmm10 + ror eax,7 + mov edi,ebp + xor esi,ebx + rol ebp,5 + pshufd xmm8,xmm6,238 + add edx,esi + xor edi,eax + xor eax,ebx + add edx,ebp + add ecx,DWORD[60+rsp] + and edi,eax + xor eax,ebx + ror ebp,7 + mov esi,edx + xor edi,eax + rol edx,5 + add ecx,edi + xor esi,ebp + xor ebp,eax + add ecx,edx + pxor xmm0,xmm4 + add ebx,DWORD[rsp] + and esi,ebp + xor ebp,eax + ror edx,7 + punpcklqdq xmm8,xmm7 + mov edi,ecx + xor esi,ebp + pxor xmm0,xmm1 + rol ecx,5 + add ebx,esi + movdqa xmm10,xmm9 + xor edi,edx + paddd xmm9,xmm7 + xor edx,ebp + pxor xmm0,xmm8 + add ebx,ecx + add eax,DWORD[4+rsp] + and edi,edx + xor edx,ebp + ror ecx,7 + movdqa xmm8,xmm0 + mov esi,ebx + xor edi,edx + movdqa XMMWORD[48+rsp],xmm9 + rol ebx,5 + add eax,edi + xor esi,ecx + pslld xmm0,2 + xor ecx,edx + add eax,ebx + psrld xmm8,30 + add ebp,DWORD[8+rsp] + and esi,ecx + xor ecx,edx + por xmm0,xmm8 + ror ebx,7 + mov edi,eax + xor esi,ecx + rol eax,5 + pshufd xmm9,xmm7,238 + add ebp,esi + xor edi,ebx + xor ebx,ecx + add ebp,eax + add edx,DWORD[12+rsp] + and edi,ebx + xor ebx,ecx + ror eax,7 + mov esi,ebp + xor edi,ebx + rol ebp,5 + add edx,edi + xor esi,eax + xor eax,ebx + add edx,ebp + pxor xmm1,xmm5 + add ecx,DWORD[16+rsp] + and esi,eax + xor eax,ebx + ror ebp,7 + punpcklqdq xmm9,xmm0 + mov edi,edx + xor esi,eax + pxor xmm1,xmm2 + rol edx,5 + add ecx,esi + movdqa xmm8,xmm10 + xor edi,ebp + paddd xmm10,xmm0 + xor ebp,eax + pxor xmm1,xmm9 + add ecx,edx + add ebx,DWORD[20+rsp] + and edi,ebp + xor ebp,eax + ror edx,7 + movdqa xmm9,xmm1 + mov esi,ecx + xor edi,ebp + movdqa XMMWORD[rsp],xmm10 + rol ecx,5 + add ebx,edi + xor esi,edx + pslld xmm1,2 + xor edx,ebp + add ebx,ecx + psrld xmm9,30 + add eax,DWORD[24+rsp] + and esi,edx + xor edx,ebp + por xmm1,xmm9 + ror ecx,7 + mov edi,ebx + xor esi,edx + rol ebx,5 + pshufd xmm10,xmm0,238 + add eax,esi + xor edi,ecx + xor ecx,edx + add eax,ebx + add ebp,DWORD[28+rsp] + and edi,ecx + xor ecx,edx + ror ebx,7 + mov esi,eax + xor edi,ecx + rol eax,5 + add ebp,edi + xor esi,ebx + xor ebx,ecx + add ebp,eax + pxor xmm2,xmm6 + add edx,DWORD[32+rsp] + and esi,ebx + xor ebx,ecx + ror eax,7 + punpcklqdq xmm10,xmm1 + mov edi,ebp + xor esi,ebx + pxor xmm2,xmm3 + rol ebp,5 + add edx,esi + movdqa xmm9,xmm8 + xor edi,eax + paddd xmm8,xmm1 + xor eax,ebx + pxor xmm2,xmm10 + add edx,ebp + add ecx,DWORD[36+rsp] + and edi,eax + xor eax,ebx + ror ebp,7 + movdqa xmm10,xmm2 + mov esi,edx + xor edi,eax + movdqa XMMWORD[16+rsp],xmm8 + rol edx,5 + add ecx,edi + xor esi,ebp + pslld xmm2,2 + xor ebp,eax + add ecx,edx + psrld xmm10,30 + add ebx,DWORD[40+rsp] + and esi,ebp + xor ebp,eax + por xmm2,xmm10 + ror edx,7 + mov edi,ecx + xor esi,ebp + rol ecx,5 + pshufd xmm8,xmm1,238 + add ebx,esi + xor edi,edx + xor edx,ebp + add ebx,ecx + add eax,DWORD[44+rsp] + and edi,edx + xor edx,ebp + ror ecx,7 + mov esi,ebx + xor edi,edx + rol ebx,5 + add eax,edi + xor esi,edx + add eax,ebx + pxor xmm3,xmm7 + add ebp,DWORD[48+rsp] + xor esi,ecx + punpcklqdq xmm8,xmm2 + mov edi,eax + rol eax,5 + pxor xmm3,xmm4 + add ebp,esi + xor edi,ecx + movdqa xmm10,xmm9 + ror ebx,7 + paddd xmm9,xmm2 + add ebp,eax + pxor xmm3,xmm8 + add edx,DWORD[52+rsp] + xor edi,ebx + mov esi,ebp + rol ebp,5 + movdqa xmm8,xmm3 + add edx,edi + xor esi,ebx + movdqa XMMWORD[32+rsp],xmm9 + ror eax,7 + add edx,ebp + add ecx,DWORD[56+rsp] + pslld xmm3,2 + xor esi,eax + mov edi,edx + psrld xmm8,30 + rol edx,5 + add ecx,esi + xor edi,eax + ror ebp,7 + por xmm3,xmm8 + add ecx,edx + add ebx,DWORD[60+rsp] + xor edi,ebp + mov esi,ecx + rol ecx,5 + add ebx,edi + xor esi,ebp + ror edx,7 + add ebx,ecx + add eax,DWORD[rsp] + xor esi,edx + mov edi,ebx + rol ebx,5 + paddd xmm10,xmm3 + add eax,esi + xor edi,edx + movdqa XMMWORD[48+rsp],xmm10 + ror ecx,7 + add eax,ebx + add ebp,DWORD[4+rsp] + xor edi,ecx + mov esi,eax + rol eax,5 + add ebp,edi + xor esi,ecx + ror ebx,7 + add ebp,eax + add edx,DWORD[8+rsp] + xor esi,ebx + mov edi,ebp + rol ebp,5 + add edx,esi + xor edi,ebx + ror eax,7 + add edx,ebp + add ecx,DWORD[12+rsp] + xor edi,eax + mov esi,edx + rol edx,5 + add ecx,edi + xor esi,eax + ror ebp,7 + add ecx,edx + cmp r9,r10 + je NEAR $L$done_ssse3 + movdqa xmm6,XMMWORD[64+r11] + movdqa xmm9,XMMWORD[((-64))+r11] + movdqu xmm0,XMMWORD[r9] + movdqu xmm1,XMMWORD[16+r9] + movdqu xmm2,XMMWORD[32+r9] + movdqu xmm3,XMMWORD[48+r9] +DB 102,15,56,0,198 + add r9,64 + add ebx,DWORD[16+rsp] + xor esi,ebp + mov edi,ecx +DB 102,15,56,0,206 + rol ecx,5 + add ebx,esi + xor edi,ebp + ror edx,7 + paddd xmm0,xmm9 + add ebx,ecx + add eax,DWORD[20+rsp] + xor edi,edx + mov esi,ebx + movdqa XMMWORD[rsp],xmm0 + rol ebx,5 + add eax,edi + xor esi,edx + ror ecx,7 + psubd xmm0,xmm9 + add eax,ebx + add ebp,DWORD[24+rsp] + xor esi,ecx + mov edi,eax + rol eax,5 + add ebp,esi + xor edi,ecx + ror ebx,7 + add ebp,eax + add edx,DWORD[28+rsp] + xor edi,ebx + mov esi,ebp + rol ebp,5 + add edx,edi + xor esi,ebx + ror eax,7 + add edx,ebp + add ecx,DWORD[32+rsp] + xor esi,eax + mov edi,edx +DB 102,15,56,0,214 + rol edx,5 + add ecx,esi + xor edi,eax + ror ebp,7 + paddd xmm1,xmm9 + add ecx,edx + add ebx,DWORD[36+rsp] + xor edi,ebp + mov esi,ecx + movdqa XMMWORD[16+rsp],xmm1 + rol ecx,5 + add ebx,edi + xor esi,ebp + ror edx,7 + psubd xmm1,xmm9 + add ebx,ecx + add eax,DWORD[40+rsp] + xor esi,edx + mov edi,ebx + rol ebx,5 + add eax,esi + xor edi,edx + ror ecx,7 + add eax,ebx + add ebp,DWORD[44+rsp] + xor edi,ecx + mov esi,eax + rol eax,5 + add ebp,edi + xor esi,ecx + ror ebx,7 + add ebp,eax + add edx,DWORD[48+rsp] + xor esi,ebx + mov edi,ebp +DB 102,15,56,0,222 + rol ebp,5 + add edx,esi + xor edi,ebx + ror eax,7 + paddd xmm2,xmm9 + add edx,ebp + add ecx,DWORD[52+rsp] + xor edi,eax + mov esi,edx + movdqa XMMWORD[32+rsp],xmm2 + rol edx,5 + add ecx,edi + xor esi,eax + ror ebp,7 + psubd xmm2,xmm9 + add ecx,edx + add ebx,DWORD[56+rsp] + xor esi,ebp + mov edi,ecx + rol ecx,5 + add ebx,esi + xor edi,ebp + ror edx,7 + add ebx,ecx + add eax,DWORD[60+rsp] + xor edi,edx + mov esi,ebx + rol ebx,5 + add eax,edi + ror ecx,7 + add eax,ebx + add eax,DWORD[r8] + add esi,DWORD[4+r8] + add ecx,DWORD[8+r8] + add edx,DWORD[12+r8] + mov DWORD[r8],eax + add ebp,DWORD[16+r8] + mov DWORD[4+r8],esi + mov ebx,esi + mov DWORD[8+r8],ecx + mov edi,ecx + mov DWORD[12+r8],edx + xor edi,edx + mov DWORD[16+r8],ebp + and esi,edi + jmp NEAR $L$oop_ssse3 + +ALIGN 16 +$L$done_ssse3: + add ebx,DWORD[16+rsp] + xor esi,ebp + mov edi,ecx + rol ecx,5 + add ebx,esi + xor edi,ebp + ror edx,7 + add ebx,ecx + add eax,DWORD[20+rsp] + xor edi,edx + mov esi,ebx + rol ebx,5 + add eax,edi + xor esi,edx + ror ecx,7 + add eax,ebx + add ebp,DWORD[24+rsp] + xor esi,ecx + mov edi,eax + rol eax,5 + add ebp,esi + xor edi,ecx + ror ebx,7 + add ebp,eax + add edx,DWORD[28+rsp] + xor edi,ebx + mov esi,ebp + rol ebp,5 + add edx,edi + xor esi,ebx + ror eax,7 + add edx,ebp + add ecx,DWORD[32+rsp] + xor esi,eax + mov edi,edx + rol edx,5 + add ecx,esi + xor edi,eax + ror ebp,7 + add ecx,edx + add ebx,DWORD[36+rsp] + xor edi,ebp + mov esi,ecx + rol ecx,5 + add ebx,edi + xor esi,ebp + ror edx,7 + add ebx,ecx + add eax,DWORD[40+rsp] + xor esi,edx + mov edi,ebx + rol ebx,5 + add eax,esi + xor edi,edx + ror ecx,7 + add eax,ebx + add ebp,DWORD[44+rsp] + xor edi,ecx + mov esi,eax + rol eax,5 + add ebp,edi + xor esi,ecx + ror ebx,7 + add ebp,eax + add edx,DWORD[48+rsp] + xor esi,ebx + mov edi,ebp + rol ebp,5 + add edx,esi + xor edi,ebx + ror eax,7 + add edx,ebp + add ecx,DWORD[52+rsp] + xor edi,eax + mov esi,edx + rol edx,5 + add ecx,edi + xor esi,eax + ror ebp,7 + add ecx,edx + add ebx,DWORD[56+rsp] + xor esi,ebp + mov edi,ecx + rol ecx,5 + add ebx,esi + xor edi,ebp + ror edx,7 + add ebx,ecx + add eax,DWORD[60+rsp] + xor edi,edx + mov esi,ebx + rol ebx,5 + add eax,edi + ror ecx,7 + add eax,ebx + add eax,DWORD[r8] + add esi,DWORD[4+r8] + add ecx,DWORD[8+r8] + mov DWORD[r8],eax + add edx,DWORD[12+r8] + mov DWORD[4+r8],esi + add ebp,DWORD[16+r8] + mov DWORD[8+r8],ecx + mov DWORD[12+r8],edx + mov DWORD[16+r8],ebp + movaps xmm6,XMMWORD[((-40-96))+r14] + movaps xmm7,XMMWORD[((-40-80))+r14] + movaps xmm8,XMMWORD[((-40-64))+r14] + movaps xmm9,XMMWORD[((-40-48))+r14] + movaps xmm10,XMMWORD[((-40-32))+r14] + movaps xmm11,XMMWORD[((-40-16))+r14] + lea rsi,[r14] + mov r14,QWORD[((-40))+rsi] + mov r13,QWORD[((-32))+rsi] + mov r12,QWORD[((-24))+rsi] + mov rbp,QWORD[((-16))+rsi] + mov rbx,QWORD[((-8))+rsi] + lea rsp,[rsi] +$L$epilogue_ssse3: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_sha1_block_data_order_ssse3: +ALIGN 64 +K_XX_XX: + DD 0x5a827999,0x5a827999,0x5a827999,0x5a827999 + DD 0x5a827999,0x5a827999,0x5a827999,0x5a827999 + DD 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 + DD 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 + DD 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc + DD 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc + DD 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 + DD 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 + DD 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f + DD 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +DB 0xf,0xe,0xd,0xc,0xb,0xa,0x9,0x8,0x7,0x6,0x5,0x4,0x3,0x2,0x1,0x0 +DB 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115 +DB 102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44 +DB 32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60 +DB 97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114 +DB 103,62,0 +ALIGN 64 +EXTERN __imp_RtlVirtualUnwind + +ALIGN 16 +se_handler: + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD[120+r8] + mov rbx,QWORD[248+r8] + + lea r10,[$L$prologue] + cmp rbx,r10 + jb NEAR $L$common_seh_tail + + mov rax,QWORD[152+r8] + + lea r10,[$L$epilogue] + cmp rbx,r10 + jae NEAR $L$common_seh_tail + + mov rax,QWORD[64+rax] + + mov rbx,QWORD[((-8))+rax] + mov rbp,QWORD[((-16))+rax] + mov r12,QWORD[((-24))+rax] + mov r13,QWORD[((-32))+rax] + mov r14,QWORD[((-40))+rax] + mov QWORD[144+r8],rbx + mov QWORD[160+r8],rbp + mov QWORD[216+r8],r12 + mov QWORD[224+r8],r13 + mov QWORD[232+r8],r14 + + jmp NEAR $L$common_seh_tail + + +ALIGN 16 +ssse3_handler: + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD[120+r8] + mov rbx,QWORD[248+r8] + + mov rsi,QWORD[8+r9] + mov r11,QWORD[56+r9] + + mov r10d,DWORD[r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jb NEAR $L$common_seh_tail + + mov rax,QWORD[152+r8] + + mov r10d,DWORD[4+r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jae NEAR $L$common_seh_tail + + mov rax,QWORD[232+r8] + + lea rsi,[((-40-96))+rax] + lea rdi,[512+r8] + mov ecx,12 + DD 0xa548f3fc + + mov rbx,QWORD[((-8))+rax] + mov rbp,QWORD[((-16))+rax] + mov r12,QWORD[((-24))+rax] + mov r13,QWORD[((-32))+rax] + mov r14,QWORD[((-40))+rax] + mov QWORD[144+r8],rbx + mov QWORD[160+r8],rbp + mov QWORD[216+r8],r12 + mov QWORD[224+r8],r13 + mov QWORD[232+r8],r14 + +$L$common_seh_tail: + mov rdi,QWORD[8+rax] + mov rsi,QWORD[16+rax] + mov QWORD[152+r8],rax + mov QWORD[168+r8],rsi + mov QWORD[176+r8],rdi + + mov rdi,QWORD[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0xa548f3fc + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD[8+rsi] + mov r8,QWORD[rsi] + mov r9,QWORD[16+rsi] + mov r10,QWORD[40+rsi] + lea r11,[56+rsi] + lea r12,[24+rsi] + mov QWORD[32+rsp],r10 + mov QWORD[40+rsp],r11 + mov QWORD[48+rsp],r12 + mov QWORD[56+rsp],rcx + call QWORD[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret + + +section .pdata rdata align=4 +ALIGN 4 + DD $L$SEH_begin_sha1_block_data_order wrt ..imagebase + DD $L$SEH_end_sha1_block_data_order wrt ..imagebase + DD $L$SEH_info_sha1_block_data_order wrt ..imagebase + DD $L$SEH_begin_sha1_block_data_order_ssse3 wrt ..imagebase + DD $L$SEH_end_sha1_block_data_order_ssse3 wrt ..imagebase + DD $L$SEH_info_sha1_block_data_order_ssse3 wrt ..imagebase +section .xdata rdata align=8 +ALIGN 8 +$L$SEH_info_sha1_block_data_order: +DB 9,0,0,0 + DD se_handler wrt ..imagebase +$L$SEH_info_sha1_block_data_order_ssse3: +DB 9,0,0,0 + DD ssse3_handler wrt ..imagebase + DD $L$prologue_ssse3 wrt ..imagebase,$L$epilogue_ssse3 wrt ..imagebase diff --git a/third_party/boringssl/win-x86_64/crypto/sha/sha256-x86_64.asm b/third_party/boringssl/win-x86_64/crypto/sha/sha256-x86_64.asm new file mode 100644 index 00000000000..e6193c5b9d7 --- /dev/null +++ b/third_party/boringssl/win-x86_64/crypto/sha/sha256-x86_64.asm @@ -0,0 +1,2994 @@ +default rel +%define XMMWORD +%define YMMWORD +%define ZMMWORD +section .text code align=64 + + +EXTERN OPENSSL_ia32cap_P +global sha256_block_data_order + +ALIGN 16 +sha256_block_data_order: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_sha256_block_data_order: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + lea r11,[OPENSSL_ia32cap_P] + mov r9d,DWORD[r11] + mov r10d,DWORD[4+r11] + mov r11d,DWORD[8+r11] + test r10d,512 + jnz NEAR $L$ssse3_shortcut + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + mov r11,rsp + shl rdx,4 + sub rsp,16*4+4*8 + lea rdx,[rdx*4+rsi] + and rsp,-64 + mov QWORD[((64+0))+rsp],rdi + mov QWORD[((64+8))+rsp],rsi + mov QWORD[((64+16))+rsp],rdx + mov QWORD[((64+24))+rsp],r11 +$L$prologue: + + mov eax,DWORD[rdi] + mov ebx,DWORD[4+rdi] + mov ecx,DWORD[8+rdi] + mov edx,DWORD[12+rdi] + mov r8d,DWORD[16+rdi] + mov r9d,DWORD[20+rdi] + mov r10d,DWORD[24+rdi] + mov r11d,DWORD[28+rdi] + jmp NEAR $L$loop + +ALIGN 16 +$L$loop: + mov edi,ebx + lea rbp,[K256] + xor edi,ecx + mov r12d,DWORD[rsi] + mov r13d,r8d + mov r14d,eax + bswap r12d + ror r13d,14 + mov r15d,r9d + + xor r13d,r8d + ror r14d,9 + xor r15d,r10d + + mov DWORD[rsp],r12d + xor r14d,eax + and r15d,r8d + + ror r13d,5 + add r12d,r11d + xor r15d,r10d + + ror r14d,11 + xor r13d,r8d + add r12d,r15d + + mov r15d,eax + add r12d,DWORD[rbp] + xor r14d,eax + + xor r15d,ebx + ror r13d,6 + mov r11d,ebx + + and edi,r15d + ror r14d,2 + add r12d,r13d + + xor r11d,edi + add edx,r12d + add r11d,r12d + + lea rbp,[4+rbp] + add r11d,r14d + mov r12d,DWORD[4+rsi] + mov r13d,edx + mov r14d,r11d + bswap r12d + ror r13d,14 + mov edi,r8d + + xor r13d,edx + ror r14d,9 + xor edi,r9d + + mov DWORD[4+rsp],r12d + xor r14d,r11d + and edi,edx + + ror r13d,5 + add r12d,r10d + xor edi,r9d + + ror r14d,11 + xor r13d,edx + add r12d,edi + + mov edi,r11d + add r12d,DWORD[rbp] + xor r14d,r11d + + xor edi,eax + ror r13d,6 + mov r10d,eax + + and r15d,edi + ror r14d,2 + add r12d,r13d + + xor r10d,r15d + add ecx,r12d + add r10d,r12d + + lea rbp,[4+rbp] + add r10d,r14d + mov r12d,DWORD[8+rsi] + mov r13d,ecx + mov r14d,r10d + bswap r12d + ror r13d,14 + mov r15d,edx + + xor r13d,ecx + ror r14d,9 + xor r15d,r8d + + mov DWORD[8+rsp],r12d + xor r14d,r10d + and r15d,ecx + + ror r13d,5 + add r12d,r9d + xor r15d,r8d + + ror r14d,11 + xor r13d,ecx + add r12d,r15d + + mov r15d,r10d + add r12d,DWORD[rbp] + xor r14d,r10d + + xor r15d,r11d + ror r13d,6 + mov r9d,r11d + + and edi,r15d + ror r14d,2 + add r12d,r13d + + xor r9d,edi + add ebx,r12d + add r9d,r12d + + lea rbp,[4+rbp] + add r9d,r14d + mov r12d,DWORD[12+rsi] + mov r13d,ebx + mov r14d,r9d + bswap r12d + ror r13d,14 + mov edi,ecx + + xor r13d,ebx + ror r14d,9 + xor edi,edx + + mov DWORD[12+rsp],r12d + xor r14d,r9d + and edi,ebx + + ror r13d,5 + add r12d,r8d + xor edi,edx + + ror r14d,11 + xor r13d,ebx + add r12d,edi + + mov edi,r9d + add r12d,DWORD[rbp] + xor r14d,r9d + + xor edi,r10d + ror r13d,6 + mov r8d,r10d + + and r15d,edi + ror r14d,2 + add r12d,r13d + + xor r8d,r15d + add eax,r12d + add r8d,r12d + + lea rbp,[20+rbp] + add r8d,r14d + mov r12d,DWORD[16+rsi] + mov r13d,eax + mov r14d,r8d + bswap r12d + ror r13d,14 + mov r15d,ebx + + xor r13d,eax + ror r14d,9 + xor r15d,ecx + + mov DWORD[16+rsp],r12d + xor r14d,r8d + and r15d,eax + + ror r13d,5 + add r12d,edx + xor r15d,ecx + + ror r14d,11 + xor r13d,eax + add r12d,r15d + + mov r15d,r8d + add r12d,DWORD[rbp] + xor r14d,r8d + + xor r15d,r9d + ror r13d,6 + mov edx,r9d + + and edi,r15d + ror r14d,2 + add r12d,r13d + + xor edx,edi + add r11d,r12d + add edx,r12d + + lea rbp,[4+rbp] + add edx,r14d + mov r12d,DWORD[20+rsi] + mov r13d,r11d + mov r14d,edx + bswap r12d + ror r13d,14 + mov edi,eax + + xor r13d,r11d + ror r14d,9 + xor edi,ebx + + mov DWORD[20+rsp],r12d + xor r14d,edx + and edi,r11d + + ror r13d,5 + add r12d,ecx + xor edi,ebx + + ror r14d,11 + xor r13d,r11d + add r12d,edi + + mov edi,edx + add r12d,DWORD[rbp] + xor r14d,edx + + xor edi,r8d + ror r13d,6 + mov ecx,r8d + + and r15d,edi + ror r14d,2 + add r12d,r13d + + xor ecx,r15d + add r10d,r12d + add ecx,r12d + + lea rbp,[4+rbp] + add ecx,r14d + mov r12d,DWORD[24+rsi] + mov r13d,r10d + mov r14d,ecx + bswap r12d + ror r13d,14 + mov r15d,r11d + + xor r13d,r10d + ror r14d,9 + xor r15d,eax + + mov DWORD[24+rsp],r12d + xor r14d,ecx + and r15d,r10d + + ror r13d,5 + add r12d,ebx + xor r15d,eax + + ror r14d,11 + xor r13d,r10d + add r12d,r15d + + mov r15d,ecx + add r12d,DWORD[rbp] + xor r14d,ecx + + xor r15d,edx + ror r13d,6 + mov ebx,edx + + and edi,r15d + ror r14d,2 + add r12d,r13d + + xor ebx,edi + add r9d,r12d + add ebx,r12d + + lea rbp,[4+rbp] + add ebx,r14d + mov r12d,DWORD[28+rsi] + mov r13d,r9d + mov r14d,ebx + bswap r12d + ror r13d,14 + mov edi,r10d + + xor r13d,r9d + ror r14d,9 + xor edi,r11d + + mov DWORD[28+rsp],r12d + xor r14d,ebx + and edi,r9d + + ror r13d,5 + add r12d,eax + xor edi,r11d + + ror r14d,11 + xor r13d,r9d + add r12d,edi + + mov edi,ebx + add r12d,DWORD[rbp] + xor r14d,ebx + + xor edi,ecx + ror r13d,6 + mov eax,ecx + + and r15d,edi + ror r14d,2 + add r12d,r13d + + xor eax,r15d + add r8d,r12d + add eax,r12d + + lea rbp,[20+rbp] + add eax,r14d + mov r12d,DWORD[32+rsi] + mov r13d,r8d + mov r14d,eax + bswap r12d + ror r13d,14 + mov r15d,r9d + + xor r13d,r8d + ror r14d,9 + xor r15d,r10d + + mov DWORD[32+rsp],r12d + xor r14d,eax + and r15d,r8d + + ror r13d,5 + add r12d,r11d + xor r15d,r10d + + ror r14d,11 + xor r13d,r8d + add r12d,r15d + + mov r15d,eax + add r12d,DWORD[rbp] + xor r14d,eax + + xor r15d,ebx + ror r13d,6 + mov r11d,ebx + + and edi,r15d + ror r14d,2 + add r12d,r13d + + xor r11d,edi + add edx,r12d + add r11d,r12d + + lea rbp,[4+rbp] + add r11d,r14d + mov r12d,DWORD[36+rsi] + mov r13d,edx + mov r14d,r11d + bswap r12d + ror r13d,14 + mov edi,r8d + + xor r13d,edx + ror r14d,9 + xor edi,r9d + + mov DWORD[36+rsp],r12d + xor r14d,r11d + and edi,edx + + ror r13d,5 + add r12d,r10d + xor edi,r9d + + ror r14d,11 + xor r13d,edx + add r12d,edi + + mov edi,r11d + add r12d,DWORD[rbp] + xor r14d,r11d + + xor edi,eax + ror r13d,6 + mov r10d,eax + + and r15d,edi + ror r14d,2 + add r12d,r13d + + xor r10d,r15d + add ecx,r12d + add r10d,r12d + + lea rbp,[4+rbp] + add r10d,r14d + mov r12d,DWORD[40+rsi] + mov r13d,ecx + mov r14d,r10d + bswap r12d + ror r13d,14 + mov r15d,edx + + xor r13d,ecx + ror r14d,9 + xor r15d,r8d + + mov DWORD[40+rsp],r12d + xor r14d,r10d + and r15d,ecx + + ror r13d,5 + add r12d,r9d + xor r15d,r8d + + ror r14d,11 + xor r13d,ecx + add r12d,r15d + + mov r15d,r10d + add r12d,DWORD[rbp] + xor r14d,r10d + + xor r15d,r11d + ror r13d,6 + mov r9d,r11d + + and edi,r15d + ror r14d,2 + add r12d,r13d + + xor r9d,edi + add ebx,r12d + add r9d,r12d + + lea rbp,[4+rbp] + add r9d,r14d + mov r12d,DWORD[44+rsi] + mov r13d,ebx + mov r14d,r9d + bswap r12d + ror r13d,14 + mov edi,ecx + + xor r13d,ebx + ror r14d,9 + xor edi,edx + + mov DWORD[44+rsp],r12d + xor r14d,r9d + and edi,ebx + + ror r13d,5 + add r12d,r8d + xor edi,edx + + ror r14d,11 + xor r13d,ebx + add r12d,edi + + mov edi,r9d + add r12d,DWORD[rbp] + xor r14d,r9d + + xor edi,r10d + ror r13d,6 + mov r8d,r10d + + and r15d,edi + ror r14d,2 + add r12d,r13d + + xor r8d,r15d + add eax,r12d + add r8d,r12d + + lea rbp,[20+rbp] + add r8d,r14d + mov r12d,DWORD[48+rsi] + mov r13d,eax + mov r14d,r8d + bswap r12d + ror r13d,14 + mov r15d,ebx + + xor r13d,eax + ror r14d,9 + xor r15d,ecx + + mov DWORD[48+rsp],r12d + xor r14d,r8d + and r15d,eax + + ror r13d,5 + add r12d,edx + xor r15d,ecx + + ror r14d,11 + xor r13d,eax + add r12d,r15d + + mov r15d,r8d + add r12d,DWORD[rbp] + xor r14d,r8d + + xor r15d,r9d + ror r13d,6 + mov edx,r9d + + and edi,r15d + ror r14d,2 + add r12d,r13d + + xor edx,edi + add r11d,r12d + add edx,r12d + + lea rbp,[4+rbp] + add edx,r14d + mov r12d,DWORD[52+rsi] + mov r13d,r11d + mov r14d,edx + bswap r12d + ror r13d,14 + mov edi,eax + + xor r13d,r11d + ror r14d,9 + xor edi,ebx + + mov DWORD[52+rsp],r12d + xor r14d,edx + and edi,r11d + + ror r13d,5 + add r12d,ecx + xor edi,ebx + + ror r14d,11 + xor r13d,r11d + add r12d,edi + + mov edi,edx + add r12d,DWORD[rbp] + xor r14d,edx + + xor edi,r8d + ror r13d,6 + mov ecx,r8d + + and r15d,edi + ror r14d,2 + add r12d,r13d + + xor ecx,r15d + add r10d,r12d + add ecx,r12d + + lea rbp,[4+rbp] + add ecx,r14d + mov r12d,DWORD[56+rsi] + mov r13d,r10d + mov r14d,ecx + bswap r12d + ror r13d,14 + mov r15d,r11d + + xor r13d,r10d + ror r14d,9 + xor r15d,eax + + mov DWORD[56+rsp],r12d + xor r14d,ecx + and r15d,r10d + + ror r13d,5 + add r12d,ebx + xor r15d,eax + + ror r14d,11 + xor r13d,r10d + add r12d,r15d + + mov r15d,ecx + add r12d,DWORD[rbp] + xor r14d,ecx + + xor r15d,edx + ror r13d,6 + mov ebx,edx + + and edi,r15d + ror r14d,2 + add r12d,r13d + + xor ebx,edi + add r9d,r12d + add ebx,r12d + + lea rbp,[4+rbp] + add ebx,r14d + mov r12d,DWORD[60+rsi] + mov r13d,r9d + mov r14d,ebx + bswap r12d + ror r13d,14 + mov edi,r10d + + xor r13d,r9d + ror r14d,9 + xor edi,r11d + + mov DWORD[60+rsp],r12d + xor r14d,ebx + and edi,r9d + + ror r13d,5 + add r12d,eax + xor edi,r11d + + ror r14d,11 + xor r13d,r9d + add r12d,edi + + mov edi,ebx + add r12d,DWORD[rbp] + xor r14d,ebx + + xor edi,ecx + ror r13d,6 + mov eax,ecx + + and r15d,edi + ror r14d,2 + add r12d,r13d + + xor eax,r15d + add r8d,r12d + add eax,r12d + + lea rbp,[20+rbp] + jmp NEAR $L$rounds_16_xx +ALIGN 16 +$L$rounds_16_xx: + mov r13d,DWORD[4+rsp] + mov r15d,DWORD[56+rsp] + + mov r12d,r13d + ror r13d,11 + add eax,r14d + mov r14d,r15d + ror r15d,2 + + xor r13d,r12d + shr r12d,3 + ror r13d,7 + xor r15d,r14d + shr r14d,10 + + ror r15d,17 + xor r12d,r13d + xor r15d,r14d + add r12d,DWORD[36+rsp] + + add r12d,DWORD[rsp] + mov r13d,r8d + add r12d,r15d + mov r14d,eax + ror r13d,14 + mov r15d,r9d + + xor r13d,r8d + ror r14d,9 + xor r15d,r10d + + mov DWORD[rsp],r12d + xor r14d,eax + and r15d,r8d + + ror r13d,5 + add r12d,r11d + xor r15d,r10d + + ror r14d,11 + xor r13d,r8d + add r12d,r15d + + mov r15d,eax + add r12d,DWORD[rbp] + xor r14d,eax + + xor r15d,ebx + ror r13d,6 + mov r11d,ebx + + and edi,r15d + ror r14d,2 + add r12d,r13d + + xor r11d,edi + add edx,r12d + add r11d,r12d + + lea rbp,[4+rbp] + mov r13d,DWORD[8+rsp] + mov edi,DWORD[60+rsp] + + mov r12d,r13d + ror r13d,11 + add r11d,r14d + mov r14d,edi + ror edi,2 + + xor r13d,r12d + shr r12d,3 + ror r13d,7 + xor edi,r14d + shr r14d,10 + + ror edi,17 + xor r12d,r13d + xor edi,r14d + add r12d,DWORD[40+rsp] + + add r12d,DWORD[4+rsp] + mov r13d,edx + add r12d,edi + mov r14d,r11d + ror r13d,14 + mov edi,r8d + + xor r13d,edx + ror r14d,9 + xor edi,r9d + + mov DWORD[4+rsp],r12d + xor r14d,r11d + and edi,edx + + ror r13d,5 + add r12d,r10d + xor edi,r9d + + ror r14d,11 + xor r13d,edx + add r12d,edi + + mov edi,r11d + add r12d,DWORD[rbp] + xor r14d,r11d + + xor edi,eax + ror r13d,6 + mov r10d,eax + + and r15d,edi + ror r14d,2 + add r12d,r13d + + xor r10d,r15d + add ecx,r12d + add r10d,r12d + + lea rbp,[4+rbp] + mov r13d,DWORD[12+rsp] + mov r15d,DWORD[rsp] + + mov r12d,r13d + ror r13d,11 + add r10d,r14d + mov r14d,r15d + ror r15d,2 + + xor r13d,r12d + shr r12d,3 + ror r13d,7 + xor r15d,r14d + shr r14d,10 + + ror r15d,17 + xor r12d,r13d + xor r15d,r14d + add r12d,DWORD[44+rsp] + + add r12d,DWORD[8+rsp] + mov r13d,ecx + add r12d,r15d + mov r14d,r10d + ror r13d,14 + mov r15d,edx + + xor r13d,ecx + ror r14d,9 + xor r15d,r8d + + mov DWORD[8+rsp],r12d + xor r14d,r10d + and r15d,ecx + + ror r13d,5 + add r12d,r9d + xor r15d,r8d + + ror r14d,11 + xor r13d,ecx + add r12d,r15d + + mov r15d,r10d + add r12d,DWORD[rbp] + xor r14d,r10d + + xor r15d,r11d + ror r13d,6 + mov r9d,r11d + + and edi,r15d + ror r14d,2 + add r12d,r13d + + xor r9d,edi + add ebx,r12d + add r9d,r12d + + lea rbp,[4+rbp] + mov r13d,DWORD[16+rsp] + mov edi,DWORD[4+rsp] + + mov r12d,r13d + ror r13d,11 + add r9d,r14d + mov r14d,edi + ror edi,2 + + xor r13d,r12d + shr r12d,3 + ror r13d,7 + xor edi,r14d + shr r14d,10 + + ror edi,17 + xor r12d,r13d + xor edi,r14d + add r12d,DWORD[48+rsp] + + add r12d,DWORD[12+rsp] + mov r13d,ebx + add r12d,edi + mov r14d,r9d + ror r13d,14 + mov edi,ecx + + xor r13d,ebx + ror r14d,9 + xor edi,edx + + mov DWORD[12+rsp],r12d + xor r14d,r9d + and edi,ebx + + ror r13d,5 + add r12d,r8d + xor edi,edx + + ror r14d,11 + xor r13d,ebx + add r12d,edi + + mov edi,r9d + add r12d,DWORD[rbp] + xor r14d,r9d + + xor edi,r10d + ror r13d,6 + mov r8d,r10d + + and r15d,edi + ror r14d,2 + add r12d,r13d + + xor r8d,r15d + add eax,r12d + add r8d,r12d + + lea rbp,[20+rbp] + mov r13d,DWORD[20+rsp] + mov r15d,DWORD[8+rsp] + + mov r12d,r13d + ror r13d,11 + add r8d,r14d + mov r14d,r15d + ror r15d,2 + + xor r13d,r12d + shr r12d,3 + ror r13d,7 + xor r15d,r14d + shr r14d,10 + + ror r15d,17 + xor r12d,r13d + xor r15d,r14d + add r12d,DWORD[52+rsp] + + add r12d,DWORD[16+rsp] + mov r13d,eax + add r12d,r15d + mov r14d,r8d + ror r13d,14 + mov r15d,ebx + + xor r13d,eax + ror r14d,9 + xor r15d,ecx + + mov DWORD[16+rsp],r12d + xor r14d,r8d + and r15d,eax + + ror r13d,5 + add r12d,edx + xor r15d,ecx + + ror r14d,11 + xor r13d,eax + add r12d,r15d + + mov r15d,r8d + add r12d,DWORD[rbp] + xor r14d,r8d + + xor r15d,r9d + ror r13d,6 + mov edx,r9d + + and edi,r15d + ror r14d,2 + add r12d,r13d + + xor edx,edi + add r11d,r12d + add edx,r12d + + lea rbp,[4+rbp] + mov r13d,DWORD[24+rsp] + mov edi,DWORD[12+rsp] + + mov r12d,r13d + ror r13d,11 + add edx,r14d + mov r14d,edi + ror edi,2 + + xor r13d,r12d + shr r12d,3 + ror r13d,7 + xor edi,r14d + shr r14d,10 + + ror edi,17 + xor r12d,r13d + xor edi,r14d + add r12d,DWORD[56+rsp] + + add r12d,DWORD[20+rsp] + mov r13d,r11d + add r12d,edi + mov r14d,edx + ror r13d,14 + mov edi,eax + + xor r13d,r11d + ror r14d,9 + xor edi,ebx + + mov DWORD[20+rsp],r12d + xor r14d,edx + and edi,r11d + + ror r13d,5 + add r12d,ecx + xor edi,ebx + + ror r14d,11 + xor r13d,r11d + add r12d,edi + + mov edi,edx + add r12d,DWORD[rbp] + xor r14d,edx + + xor edi,r8d + ror r13d,6 + mov ecx,r8d + + and r15d,edi + ror r14d,2 + add r12d,r13d + + xor ecx,r15d + add r10d,r12d + add ecx,r12d + + lea rbp,[4+rbp] + mov r13d,DWORD[28+rsp] + mov r15d,DWORD[16+rsp] + + mov r12d,r13d + ror r13d,11 + add ecx,r14d + mov r14d,r15d + ror r15d,2 + + xor r13d,r12d + shr r12d,3 + ror r13d,7 + xor r15d,r14d + shr r14d,10 + + ror r15d,17 + xor r12d,r13d + xor r15d,r14d + add r12d,DWORD[60+rsp] + + add r12d,DWORD[24+rsp] + mov r13d,r10d + add r12d,r15d + mov r14d,ecx + ror r13d,14 + mov r15d,r11d + + xor r13d,r10d + ror r14d,9 + xor r15d,eax + + mov DWORD[24+rsp],r12d + xor r14d,ecx + and r15d,r10d + + ror r13d,5 + add r12d,ebx + xor r15d,eax + + ror r14d,11 + xor r13d,r10d + add r12d,r15d + + mov r15d,ecx + add r12d,DWORD[rbp] + xor r14d,ecx + + xor r15d,edx + ror r13d,6 + mov ebx,edx + + and edi,r15d + ror r14d,2 + add r12d,r13d + + xor ebx,edi + add r9d,r12d + add ebx,r12d + + lea rbp,[4+rbp] + mov r13d,DWORD[32+rsp] + mov edi,DWORD[20+rsp] + + mov r12d,r13d + ror r13d,11 + add ebx,r14d + mov r14d,edi + ror edi,2 + + xor r13d,r12d + shr r12d,3 + ror r13d,7 + xor edi,r14d + shr r14d,10 + + ror edi,17 + xor r12d,r13d + xor edi,r14d + add r12d,DWORD[rsp] + + add r12d,DWORD[28+rsp] + mov r13d,r9d + add r12d,edi + mov r14d,ebx + ror r13d,14 + mov edi,r10d + + xor r13d,r9d + ror r14d,9 + xor edi,r11d + + mov DWORD[28+rsp],r12d + xor r14d,ebx + and edi,r9d + + ror r13d,5 + add r12d,eax + xor edi,r11d + + ror r14d,11 + xor r13d,r9d + add r12d,edi + + mov edi,ebx + add r12d,DWORD[rbp] + xor r14d,ebx + + xor edi,ecx + ror r13d,6 + mov eax,ecx + + and r15d,edi + ror r14d,2 + add r12d,r13d + + xor eax,r15d + add r8d,r12d + add eax,r12d + + lea rbp,[20+rbp] + mov r13d,DWORD[36+rsp] + mov r15d,DWORD[24+rsp] + + mov r12d,r13d + ror r13d,11 + add eax,r14d + mov r14d,r15d + ror r15d,2 + + xor r13d,r12d + shr r12d,3 + ror r13d,7 + xor r15d,r14d + shr r14d,10 + + ror r15d,17 + xor r12d,r13d + xor r15d,r14d + add r12d,DWORD[4+rsp] + + add r12d,DWORD[32+rsp] + mov r13d,r8d + add r12d,r15d + mov r14d,eax + ror r13d,14 + mov r15d,r9d + + xor r13d,r8d + ror r14d,9 + xor r15d,r10d + + mov DWORD[32+rsp],r12d + xor r14d,eax + and r15d,r8d + + ror r13d,5 + add r12d,r11d + xor r15d,r10d + + ror r14d,11 + xor r13d,r8d + add r12d,r15d + + mov r15d,eax + add r12d,DWORD[rbp] + xor r14d,eax + + xor r15d,ebx + ror r13d,6 + mov r11d,ebx + + and edi,r15d + ror r14d,2 + add r12d,r13d + + xor r11d,edi + add edx,r12d + add r11d,r12d + + lea rbp,[4+rbp] + mov r13d,DWORD[40+rsp] + mov edi,DWORD[28+rsp] + + mov r12d,r13d + ror r13d,11 + add r11d,r14d + mov r14d,edi + ror edi,2 + + xor r13d,r12d + shr r12d,3 + ror r13d,7 + xor edi,r14d + shr r14d,10 + + ror edi,17 + xor r12d,r13d + xor edi,r14d + add r12d,DWORD[8+rsp] + + add r12d,DWORD[36+rsp] + mov r13d,edx + add r12d,edi + mov r14d,r11d + ror r13d,14 + mov edi,r8d + + xor r13d,edx + ror r14d,9 + xor edi,r9d + + mov DWORD[36+rsp],r12d + xor r14d,r11d + and edi,edx + + ror r13d,5 + add r12d,r10d + xor edi,r9d + + ror r14d,11 + xor r13d,edx + add r12d,edi + + mov edi,r11d + add r12d,DWORD[rbp] + xor r14d,r11d + + xor edi,eax + ror r13d,6 + mov r10d,eax + + and r15d,edi + ror r14d,2 + add r12d,r13d + + xor r10d,r15d + add ecx,r12d + add r10d,r12d + + lea rbp,[4+rbp] + mov r13d,DWORD[44+rsp] + mov r15d,DWORD[32+rsp] + + mov r12d,r13d + ror r13d,11 + add r10d,r14d + mov r14d,r15d + ror r15d,2 + + xor r13d,r12d + shr r12d,3 + ror r13d,7 + xor r15d,r14d + shr r14d,10 + + ror r15d,17 + xor r12d,r13d + xor r15d,r14d + add r12d,DWORD[12+rsp] + + add r12d,DWORD[40+rsp] + mov r13d,ecx + add r12d,r15d + mov r14d,r10d + ror r13d,14 + mov r15d,edx + + xor r13d,ecx + ror r14d,9 + xor r15d,r8d + + mov DWORD[40+rsp],r12d + xor r14d,r10d + and r15d,ecx + + ror r13d,5 + add r12d,r9d + xor r15d,r8d + + ror r14d,11 + xor r13d,ecx + add r12d,r15d + + mov r15d,r10d + add r12d,DWORD[rbp] + xor r14d,r10d + + xor r15d,r11d + ror r13d,6 + mov r9d,r11d + + and edi,r15d + ror r14d,2 + add r12d,r13d + + xor r9d,edi + add ebx,r12d + add r9d,r12d + + lea rbp,[4+rbp] + mov r13d,DWORD[48+rsp] + mov edi,DWORD[36+rsp] + + mov r12d,r13d + ror r13d,11 + add r9d,r14d + mov r14d,edi + ror edi,2 + + xor r13d,r12d + shr r12d,3 + ror r13d,7 + xor edi,r14d + shr r14d,10 + + ror edi,17 + xor r12d,r13d + xor edi,r14d + add r12d,DWORD[16+rsp] + + add r12d,DWORD[44+rsp] + mov r13d,ebx + add r12d,edi + mov r14d,r9d + ror r13d,14 + mov edi,ecx + + xor r13d,ebx + ror r14d,9 + xor edi,edx + + mov DWORD[44+rsp],r12d + xor r14d,r9d + and edi,ebx + + ror r13d,5 + add r12d,r8d + xor edi,edx + + ror r14d,11 + xor r13d,ebx + add r12d,edi + + mov edi,r9d + add r12d,DWORD[rbp] + xor r14d,r9d + + xor edi,r10d + ror r13d,6 + mov r8d,r10d + + and r15d,edi + ror r14d,2 + add r12d,r13d + + xor r8d,r15d + add eax,r12d + add r8d,r12d + + lea rbp,[20+rbp] + mov r13d,DWORD[52+rsp] + mov r15d,DWORD[40+rsp] + + mov r12d,r13d + ror r13d,11 + add r8d,r14d + mov r14d,r15d + ror r15d,2 + + xor r13d,r12d + shr r12d,3 + ror r13d,7 + xor r15d,r14d + shr r14d,10 + + ror r15d,17 + xor r12d,r13d + xor r15d,r14d + add r12d,DWORD[20+rsp] + + add r12d,DWORD[48+rsp] + mov r13d,eax + add r12d,r15d + mov r14d,r8d + ror r13d,14 + mov r15d,ebx + + xor r13d,eax + ror r14d,9 + xor r15d,ecx + + mov DWORD[48+rsp],r12d + xor r14d,r8d + and r15d,eax + + ror r13d,5 + add r12d,edx + xor r15d,ecx + + ror r14d,11 + xor r13d,eax + add r12d,r15d + + mov r15d,r8d + add r12d,DWORD[rbp] + xor r14d,r8d + + xor r15d,r9d + ror r13d,6 + mov edx,r9d + + and edi,r15d + ror r14d,2 + add r12d,r13d + + xor edx,edi + add r11d,r12d + add edx,r12d + + lea rbp,[4+rbp] + mov r13d,DWORD[56+rsp] + mov edi,DWORD[44+rsp] + + mov r12d,r13d + ror r13d,11 + add edx,r14d + mov r14d,edi + ror edi,2 + + xor r13d,r12d + shr r12d,3 + ror r13d,7 + xor edi,r14d + shr r14d,10 + + ror edi,17 + xor r12d,r13d + xor edi,r14d + add r12d,DWORD[24+rsp] + + add r12d,DWORD[52+rsp] + mov r13d,r11d + add r12d,edi + mov r14d,edx + ror r13d,14 + mov edi,eax + + xor r13d,r11d + ror r14d,9 + xor edi,ebx + + mov DWORD[52+rsp],r12d + xor r14d,edx + and edi,r11d + + ror r13d,5 + add r12d,ecx + xor edi,ebx + + ror r14d,11 + xor r13d,r11d + add r12d,edi + + mov edi,edx + add r12d,DWORD[rbp] + xor r14d,edx + + xor edi,r8d + ror r13d,6 + mov ecx,r8d + + and r15d,edi + ror r14d,2 + add r12d,r13d + + xor ecx,r15d + add r10d,r12d + add ecx,r12d + + lea rbp,[4+rbp] + mov r13d,DWORD[60+rsp] + mov r15d,DWORD[48+rsp] + + mov r12d,r13d + ror r13d,11 + add ecx,r14d + mov r14d,r15d + ror r15d,2 + + xor r13d,r12d + shr r12d,3 + ror r13d,7 + xor r15d,r14d + shr r14d,10 + + ror r15d,17 + xor r12d,r13d + xor r15d,r14d + add r12d,DWORD[28+rsp] + + add r12d,DWORD[56+rsp] + mov r13d,r10d + add r12d,r15d + mov r14d,ecx + ror r13d,14 + mov r15d,r11d + + xor r13d,r10d + ror r14d,9 + xor r15d,eax + + mov DWORD[56+rsp],r12d + xor r14d,ecx + and r15d,r10d + + ror r13d,5 + add r12d,ebx + xor r15d,eax + + ror r14d,11 + xor r13d,r10d + add r12d,r15d + + mov r15d,ecx + add r12d,DWORD[rbp] + xor r14d,ecx + + xor r15d,edx + ror r13d,6 + mov ebx,edx + + and edi,r15d + ror r14d,2 + add r12d,r13d + + xor ebx,edi + add r9d,r12d + add ebx,r12d + + lea rbp,[4+rbp] + mov r13d,DWORD[rsp] + mov edi,DWORD[52+rsp] + + mov r12d,r13d + ror r13d,11 + add ebx,r14d + mov r14d,edi + ror edi,2 + + xor r13d,r12d + shr r12d,3 + ror r13d,7 + xor edi,r14d + shr r14d,10 + + ror edi,17 + xor r12d,r13d + xor edi,r14d + add r12d,DWORD[32+rsp] + + add r12d,DWORD[60+rsp] + mov r13d,r9d + add r12d,edi + mov r14d,ebx + ror r13d,14 + mov edi,r10d + + xor r13d,r9d + ror r14d,9 + xor edi,r11d + + mov DWORD[60+rsp],r12d + xor r14d,ebx + and edi,r9d + + ror r13d,5 + add r12d,eax + xor edi,r11d + + ror r14d,11 + xor r13d,r9d + add r12d,edi + + mov edi,ebx + add r12d,DWORD[rbp] + xor r14d,ebx + + xor edi,ecx + ror r13d,6 + mov eax,ecx + + and r15d,edi + ror r14d,2 + add r12d,r13d + + xor eax,r15d + add r8d,r12d + add eax,r12d + + lea rbp,[20+rbp] + cmp BYTE[3+rbp],0 + jnz NEAR $L$rounds_16_xx + + mov rdi,QWORD[((64+0))+rsp] + add eax,r14d + lea rsi,[64+rsi] + + add eax,DWORD[rdi] + add ebx,DWORD[4+rdi] + add ecx,DWORD[8+rdi] + add edx,DWORD[12+rdi] + add r8d,DWORD[16+rdi] + add r9d,DWORD[20+rdi] + add r10d,DWORD[24+rdi] + add r11d,DWORD[28+rdi] + + cmp rsi,QWORD[((64+16))+rsp] + + mov DWORD[rdi],eax + mov DWORD[4+rdi],ebx + mov DWORD[8+rdi],ecx + mov DWORD[12+rdi],edx + mov DWORD[16+rdi],r8d + mov DWORD[20+rdi],r9d + mov DWORD[24+rdi],r10d + mov DWORD[28+rdi],r11d + jb NEAR $L$loop + + mov rsi,QWORD[((64+24))+rsp] + mov r15,QWORD[rsi] + mov r14,QWORD[8+rsi] + mov r13,QWORD[16+rsi] + mov r12,QWORD[24+rsi] + mov rbp,QWORD[32+rsi] + mov rbx,QWORD[40+rsi] + lea rsp,[48+rsi] +$L$epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_sha256_block_data_order: +ALIGN 64 + +K256: + DD 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 + DD 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 + DD 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 + DD 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 + DD 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 + DD 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 + DD 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 + DD 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 + DD 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc + DD 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc + DD 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da + DD 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da + DD 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 + DD 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 + DD 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 + DD 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 + DD 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 + DD 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 + DD 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 + DD 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 + DD 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 + DD 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 + DD 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 + DD 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 + DD 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 + DD 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 + DD 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 + DD 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 + DD 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 + DD 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 + DD 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 + DD 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 + + DD 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f + DD 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f + DD 0x03020100,0x0b0a0908,0xffffffff,0xffffffff + DD 0x03020100,0x0b0a0908,0xffffffff,0xffffffff + DD 0xffffffff,0xffffffff,0x03020100,0x0b0a0908 + DD 0xffffffff,0xffffffff,0x03020100,0x0b0a0908 +DB 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97 +DB 110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54 +DB 52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121 +DB 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46 +DB 111,114,103,62,0 + +ALIGN 64 +sha256_block_data_order_ssse3: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_sha256_block_data_order_ssse3: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + +$L$ssse3_shortcut: + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + mov r11,rsp + shl rdx,4 + sub rsp,160 + lea rdx,[rdx*4+rsi] + and rsp,-64 + mov QWORD[((64+0))+rsp],rdi + mov QWORD[((64+8))+rsp],rsi + mov QWORD[((64+16))+rsp],rdx + mov QWORD[((64+24))+rsp],r11 + movaps XMMWORD[(64+32)+rsp],xmm6 + movaps XMMWORD[(64+48)+rsp],xmm7 + movaps XMMWORD[(64+64)+rsp],xmm8 + movaps XMMWORD[(64+80)+rsp],xmm9 +$L$prologue_ssse3: + + mov eax,DWORD[rdi] + mov ebx,DWORD[4+rdi] + mov ecx,DWORD[8+rdi] + mov edx,DWORD[12+rdi] + mov r8d,DWORD[16+rdi] + mov r9d,DWORD[20+rdi] + mov r10d,DWORD[24+rdi] + mov r11d,DWORD[28+rdi] + + + jmp NEAR $L$loop_ssse3 +ALIGN 16 +$L$loop_ssse3: + movdqa xmm7,XMMWORD[((K256+512))] + movdqu xmm0,XMMWORD[rsi] + movdqu xmm1,XMMWORD[16+rsi] + movdqu xmm2,XMMWORD[32+rsi] +DB 102,15,56,0,199 + movdqu xmm3,XMMWORD[48+rsi] + lea rbp,[K256] +DB 102,15,56,0,207 + movdqa xmm4,XMMWORD[rbp] + movdqa xmm5,XMMWORD[32+rbp] +DB 102,15,56,0,215 + paddd xmm4,xmm0 + movdqa xmm6,XMMWORD[64+rbp] +DB 102,15,56,0,223 + movdqa xmm7,XMMWORD[96+rbp] + paddd xmm5,xmm1 + paddd xmm6,xmm2 + paddd xmm7,xmm3 + movdqa XMMWORD[rsp],xmm4 + mov r14d,eax + movdqa XMMWORD[16+rsp],xmm5 + mov edi,ebx + movdqa XMMWORD[32+rsp],xmm6 + xor edi,ecx + movdqa XMMWORD[48+rsp],xmm7 + mov r13d,r8d + jmp NEAR $L$ssse3_00_47 + +ALIGN 16 +$L$ssse3_00_47: + sub rbp,-128 + ror r13d,14 + movdqa xmm4,xmm1 + mov eax,r14d + mov r12d,r9d + movdqa xmm7,xmm3 + ror r14d,9 + xor r13d,r8d + xor r12d,r10d + ror r13d,5 + xor r14d,eax +DB 102,15,58,15,224,4 + and r12d,r8d + xor r13d,r8d +DB 102,15,58,15,250,4 + add r11d,DWORD[rsp] + mov r15d,eax + xor r12d,r10d + ror r14d,11 + movdqa xmm5,xmm4 + xor r15d,ebx + add r11d,r12d + movdqa xmm6,xmm4 + ror r13d,6 + and edi,r15d + psrld xmm4,3 + xor r14d,eax + add r11d,r13d + xor edi,ebx + paddd xmm0,xmm7 + ror r14d,2 + add edx,r11d + psrld xmm6,7 + add r11d,edi + mov r13d,edx + pshufd xmm7,xmm3,250 + add r14d,r11d + ror r13d,14 + pslld xmm5,14 + mov r11d,r14d + mov r12d,r8d + pxor xmm4,xmm6 + ror r14d,9 + xor r13d,edx + xor r12d,r9d + ror r13d,5 + psrld xmm6,11 + xor r14d,r11d + pxor xmm4,xmm5 + and r12d,edx + xor r13d,edx + pslld xmm5,11 + add r10d,DWORD[4+rsp] + mov edi,r11d + pxor xmm4,xmm6 + xor r12d,r9d + ror r14d,11 + movdqa xmm6,xmm7 + xor edi,eax + add r10d,r12d + pxor xmm4,xmm5 + ror r13d,6 + and r15d,edi + xor r14d,r11d + psrld xmm7,10 + add r10d,r13d + xor r15d,eax + paddd xmm0,xmm4 + ror r14d,2 + add ecx,r10d + psrlq xmm6,17 + add r10d,r15d + mov r13d,ecx + add r14d,r10d + pxor xmm7,xmm6 + ror r13d,14 + mov r10d,r14d + mov r12d,edx + ror r14d,9 + psrlq xmm6,2 + xor r13d,ecx + xor r12d,r8d + pxor xmm7,xmm6 + ror r13d,5 + xor r14d,r10d + and r12d,ecx + pshufd xmm7,xmm7,128 + xor r13d,ecx + add r9d,DWORD[8+rsp] + mov r15d,r10d + psrldq xmm7,8 + xor r12d,r8d + ror r14d,11 + xor r15d,r11d + add r9d,r12d + ror r13d,6 + paddd xmm0,xmm7 + and edi,r15d + xor r14d,r10d + add r9d,r13d + pshufd xmm7,xmm0,80 + xor edi,r11d + ror r14d,2 + add ebx,r9d + movdqa xmm6,xmm7 + add r9d,edi + mov r13d,ebx + psrld xmm7,10 + add r14d,r9d + ror r13d,14 + psrlq xmm6,17 + mov r9d,r14d + mov r12d,ecx + pxor xmm7,xmm6 + ror r14d,9 + xor r13d,ebx + xor r12d,edx + ror r13d,5 + xor r14d,r9d + psrlq xmm6,2 + and r12d,ebx + xor r13d,ebx + add r8d,DWORD[12+rsp] + pxor xmm7,xmm6 + mov edi,r9d + xor r12d,edx + ror r14d,11 + pshufd xmm7,xmm7,8 + xor edi,r10d + add r8d,r12d + movdqa xmm6,XMMWORD[rbp] + ror r13d,6 + and r15d,edi + pslldq xmm7,8 + xor r14d,r9d + add r8d,r13d + xor r15d,r10d + paddd xmm0,xmm7 + ror r14d,2 + add eax,r8d + add r8d,r15d + paddd xmm6,xmm0 + mov r13d,eax + add r14d,r8d + movdqa XMMWORD[rsp],xmm6 + ror r13d,14 + movdqa xmm4,xmm2 + mov r8d,r14d + mov r12d,ebx + movdqa xmm7,xmm0 + ror r14d,9 + xor r13d,eax + xor r12d,ecx + ror r13d,5 + xor r14d,r8d +DB 102,15,58,15,225,4 + and r12d,eax + xor r13d,eax +DB 102,15,58,15,251,4 + add edx,DWORD[16+rsp] + mov r15d,r8d + xor r12d,ecx + ror r14d,11 + movdqa xmm5,xmm4 + xor r15d,r9d + add edx,r12d + movdqa xmm6,xmm4 + ror r13d,6 + and edi,r15d + psrld xmm4,3 + xor r14d,r8d + add edx,r13d + xor edi,r9d + paddd xmm1,xmm7 + ror r14d,2 + add r11d,edx + psrld xmm6,7 + add edx,edi + mov r13d,r11d + pshufd xmm7,xmm0,250 + add r14d,edx + ror r13d,14 + pslld xmm5,14 + mov edx,r14d + mov r12d,eax + pxor xmm4,xmm6 + ror r14d,9 + xor r13d,r11d + xor r12d,ebx + ror r13d,5 + psrld xmm6,11 + xor r14d,edx + pxor xmm4,xmm5 + and r12d,r11d + xor r13d,r11d + pslld xmm5,11 + add ecx,DWORD[20+rsp] + mov edi,edx + pxor xmm4,xmm6 + xor r12d,ebx + ror r14d,11 + movdqa xmm6,xmm7 + xor edi,r8d + add ecx,r12d + pxor xmm4,xmm5 + ror r13d,6 + and r15d,edi + xor r14d,edx + psrld xmm7,10 + add ecx,r13d + xor r15d,r8d + paddd xmm1,xmm4 + ror r14d,2 + add r10d,ecx + psrlq xmm6,17 + add ecx,r15d + mov r13d,r10d + add r14d,ecx + pxor xmm7,xmm6 + ror r13d,14 + mov ecx,r14d + mov r12d,r11d + ror r14d,9 + psrlq xmm6,2 + xor r13d,r10d + xor r12d,eax + pxor xmm7,xmm6 + ror r13d,5 + xor r14d,ecx + and r12d,r10d + pshufd xmm7,xmm7,128 + xor r13d,r10d + add ebx,DWORD[24+rsp] + mov r15d,ecx + psrldq xmm7,8 + xor r12d,eax + ror r14d,11 + xor r15d,edx + add ebx,r12d + ror r13d,6 + paddd xmm1,xmm7 + and edi,r15d + xor r14d,ecx + add ebx,r13d + pshufd xmm7,xmm1,80 + xor edi,edx + ror r14d,2 + add r9d,ebx + movdqa xmm6,xmm7 + add ebx,edi + mov r13d,r9d + psrld xmm7,10 + add r14d,ebx + ror r13d,14 + psrlq xmm6,17 + mov ebx,r14d + mov r12d,r10d + pxor xmm7,xmm6 + ror r14d,9 + xor r13d,r9d + xor r12d,r11d + ror r13d,5 + xor r14d,ebx + psrlq xmm6,2 + and r12d,r9d + xor r13d,r9d + add eax,DWORD[28+rsp] + pxor xmm7,xmm6 + mov edi,ebx + xor r12d,r11d + ror r14d,11 + pshufd xmm7,xmm7,8 + xor edi,ecx + add eax,r12d + movdqa xmm6,XMMWORD[32+rbp] + ror r13d,6 + and r15d,edi + pslldq xmm7,8 + xor r14d,ebx + add eax,r13d + xor r15d,ecx + paddd xmm1,xmm7 + ror r14d,2 + add r8d,eax + add eax,r15d + paddd xmm6,xmm1 + mov r13d,r8d + add r14d,eax + movdqa XMMWORD[16+rsp],xmm6 + ror r13d,14 + movdqa xmm4,xmm3 + mov eax,r14d + mov r12d,r9d + movdqa xmm7,xmm1 + ror r14d,9 + xor r13d,r8d + xor r12d,r10d + ror r13d,5 + xor r14d,eax +DB 102,15,58,15,226,4 + and r12d,r8d + xor r13d,r8d +DB 102,15,58,15,248,4 + add r11d,DWORD[32+rsp] + mov r15d,eax + xor r12d,r10d + ror r14d,11 + movdqa xmm5,xmm4 + xor r15d,ebx + add r11d,r12d + movdqa xmm6,xmm4 + ror r13d,6 + and edi,r15d + psrld xmm4,3 + xor r14d,eax + add r11d,r13d + xor edi,ebx + paddd xmm2,xmm7 + ror r14d,2 + add edx,r11d + psrld xmm6,7 + add r11d,edi + mov r13d,edx + pshufd xmm7,xmm1,250 + add r14d,r11d + ror r13d,14 + pslld xmm5,14 + mov r11d,r14d + mov r12d,r8d + pxor xmm4,xmm6 + ror r14d,9 + xor r13d,edx + xor r12d,r9d + ror r13d,5 + psrld xmm6,11 + xor r14d,r11d + pxor xmm4,xmm5 + and r12d,edx + xor r13d,edx + pslld xmm5,11 + add r10d,DWORD[36+rsp] + mov edi,r11d + pxor xmm4,xmm6 + xor r12d,r9d + ror r14d,11 + movdqa xmm6,xmm7 + xor edi,eax + add r10d,r12d + pxor xmm4,xmm5 + ror r13d,6 + and r15d,edi + xor r14d,r11d + psrld xmm7,10 + add r10d,r13d + xor r15d,eax + paddd xmm2,xmm4 + ror r14d,2 + add ecx,r10d + psrlq xmm6,17 + add r10d,r15d + mov r13d,ecx + add r14d,r10d + pxor xmm7,xmm6 + ror r13d,14 + mov r10d,r14d + mov r12d,edx + ror r14d,9 + psrlq xmm6,2 + xor r13d,ecx + xor r12d,r8d + pxor xmm7,xmm6 + ror r13d,5 + xor r14d,r10d + and r12d,ecx + pshufd xmm7,xmm7,128 + xor r13d,ecx + add r9d,DWORD[40+rsp] + mov r15d,r10d + psrldq xmm7,8 + xor r12d,r8d + ror r14d,11 + xor r15d,r11d + add r9d,r12d + ror r13d,6 + paddd xmm2,xmm7 + and edi,r15d + xor r14d,r10d + add r9d,r13d + pshufd xmm7,xmm2,80 + xor edi,r11d + ror r14d,2 + add ebx,r9d + movdqa xmm6,xmm7 + add r9d,edi + mov r13d,ebx + psrld xmm7,10 + add r14d,r9d + ror r13d,14 + psrlq xmm6,17 + mov r9d,r14d + mov r12d,ecx + pxor xmm7,xmm6 + ror r14d,9 + xor r13d,ebx + xor r12d,edx + ror r13d,5 + xor r14d,r9d + psrlq xmm6,2 + and r12d,ebx + xor r13d,ebx + add r8d,DWORD[44+rsp] + pxor xmm7,xmm6 + mov edi,r9d + xor r12d,edx + ror r14d,11 + pshufd xmm7,xmm7,8 + xor edi,r10d + add r8d,r12d + movdqa xmm6,XMMWORD[64+rbp] + ror r13d,6 + and r15d,edi + pslldq xmm7,8 + xor r14d,r9d + add r8d,r13d + xor r15d,r10d + paddd xmm2,xmm7 + ror r14d,2 + add eax,r8d + add r8d,r15d + paddd xmm6,xmm2 + mov r13d,eax + add r14d,r8d + movdqa XMMWORD[32+rsp],xmm6 + ror r13d,14 + movdqa xmm4,xmm0 + mov r8d,r14d + mov r12d,ebx + movdqa xmm7,xmm2 + ror r14d,9 + xor r13d,eax + xor r12d,ecx + ror r13d,5 + xor r14d,r8d +DB 102,15,58,15,227,4 + and r12d,eax + xor r13d,eax +DB 102,15,58,15,249,4 + add edx,DWORD[48+rsp] + mov r15d,r8d + xor r12d,ecx + ror r14d,11 + movdqa xmm5,xmm4 + xor r15d,r9d + add edx,r12d + movdqa xmm6,xmm4 + ror r13d,6 + and edi,r15d + psrld xmm4,3 + xor r14d,r8d + add edx,r13d + xor edi,r9d + paddd xmm3,xmm7 + ror r14d,2 + add r11d,edx + psrld xmm6,7 + add edx,edi + mov r13d,r11d + pshufd xmm7,xmm2,250 + add r14d,edx + ror r13d,14 + pslld xmm5,14 + mov edx,r14d + mov r12d,eax + pxor xmm4,xmm6 + ror r14d,9 + xor r13d,r11d + xor r12d,ebx + ror r13d,5 + psrld xmm6,11 + xor r14d,edx + pxor xmm4,xmm5 + and r12d,r11d + xor r13d,r11d + pslld xmm5,11 + add ecx,DWORD[52+rsp] + mov edi,edx + pxor xmm4,xmm6 + xor r12d,ebx + ror r14d,11 + movdqa xmm6,xmm7 + xor edi,r8d + add ecx,r12d + pxor xmm4,xmm5 + ror r13d,6 + and r15d,edi + xor r14d,edx + psrld xmm7,10 + add ecx,r13d + xor r15d,r8d + paddd xmm3,xmm4 + ror r14d,2 + add r10d,ecx + psrlq xmm6,17 + add ecx,r15d + mov r13d,r10d + add r14d,ecx + pxor xmm7,xmm6 + ror r13d,14 + mov ecx,r14d + mov r12d,r11d + ror r14d,9 + psrlq xmm6,2 + xor r13d,r10d + xor r12d,eax + pxor xmm7,xmm6 + ror r13d,5 + xor r14d,ecx + and r12d,r10d + pshufd xmm7,xmm7,128 + xor r13d,r10d + add ebx,DWORD[56+rsp] + mov r15d,ecx + psrldq xmm7,8 + xor r12d,eax + ror r14d,11 + xor r15d,edx + add ebx,r12d + ror r13d,6 + paddd xmm3,xmm7 + and edi,r15d + xor r14d,ecx + add ebx,r13d + pshufd xmm7,xmm3,80 + xor edi,edx + ror r14d,2 + add r9d,ebx + movdqa xmm6,xmm7 + add ebx,edi + mov r13d,r9d + psrld xmm7,10 + add r14d,ebx + ror r13d,14 + psrlq xmm6,17 + mov ebx,r14d + mov r12d,r10d + pxor xmm7,xmm6 + ror r14d,9 + xor r13d,r9d + xor r12d,r11d + ror r13d,5 + xor r14d,ebx + psrlq xmm6,2 + and r12d,r9d + xor r13d,r9d + add eax,DWORD[60+rsp] + pxor xmm7,xmm6 + mov edi,ebx + xor r12d,r11d + ror r14d,11 + pshufd xmm7,xmm7,8 + xor edi,ecx + add eax,r12d + movdqa xmm6,XMMWORD[96+rbp] + ror r13d,6 + and r15d,edi + pslldq xmm7,8 + xor r14d,ebx + add eax,r13d + xor r15d,ecx + paddd xmm3,xmm7 + ror r14d,2 + add r8d,eax + add eax,r15d + paddd xmm6,xmm3 + mov r13d,r8d + add r14d,eax + movdqa XMMWORD[48+rsp],xmm6 + cmp BYTE[131+rbp],0 + jne NEAR $L$ssse3_00_47 + ror r13d,14 + mov eax,r14d + mov r12d,r9d + ror r14d,9 + xor r13d,r8d + xor r12d,r10d + ror r13d,5 + xor r14d,eax + and r12d,r8d + xor r13d,r8d + add r11d,DWORD[rsp] + mov r15d,eax + xor r12d,r10d + ror r14d,11 + xor r15d,ebx + add r11d,r12d + ror r13d,6 + and edi,r15d + xor r14d,eax + add r11d,r13d + xor edi,ebx + ror r14d,2 + add edx,r11d + add r11d,edi + mov r13d,edx + add r14d,r11d + ror r13d,14 + mov r11d,r14d + mov r12d,r8d + ror r14d,9 + xor r13d,edx + xor r12d,r9d + ror r13d,5 + xor r14d,r11d + and r12d,edx + xor r13d,edx + add r10d,DWORD[4+rsp] + mov edi,r11d + xor r12d,r9d + ror r14d,11 + xor edi,eax + add r10d,r12d + ror r13d,6 + and r15d,edi + xor r14d,r11d + add r10d,r13d + xor r15d,eax + ror r14d,2 + add ecx,r10d + add r10d,r15d + mov r13d,ecx + add r14d,r10d + ror r13d,14 + mov r10d,r14d + mov r12d,edx + ror r14d,9 + xor r13d,ecx + xor r12d,r8d + ror r13d,5 + xor r14d,r10d + and r12d,ecx + xor r13d,ecx + add r9d,DWORD[8+rsp] + mov r15d,r10d + xor r12d,r8d + ror r14d,11 + xor r15d,r11d + add r9d,r12d + ror r13d,6 + and edi,r15d + xor r14d,r10d + add r9d,r13d + xor edi,r11d + ror r14d,2 + add ebx,r9d + add r9d,edi + mov r13d,ebx + add r14d,r9d + ror r13d,14 + mov r9d,r14d + mov r12d,ecx + ror r14d,9 + xor r13d,ebx + xor r12d,edx + ror r13d,5 + xor r14d,r9d + and r12d,ebx + xor r13d,ebx + add r8d,DWORD[12+rsp] + mov edi,r9d + xor r12d,edx + ror r14d,11 + xor edi,r10d + add r8d,r12d + ror r13d,6 + and r15d,edi + xor r14d,r9d + add r8d,r13d + xor r15d,r10d + ror r14d,2 + add eax,r8d + add r8d,r15d + mov r13d,eax + add r14d,r8d + ror r13d,14 + mov r8d,r14d + mov r12d,ebx + ror r14d,9 + xor r13d,eax + xor r12d,ecx + ror r13d,5 + xor r14d,r8d + and r12d,eax + xor r13d,eax + add edx,DWORD[16+rsp] + mov r15d,r8d + xor r12d,ecx + ror r14d,11 + xor r15d,r9d + add edx,r12d + ror r13d,6 + and edi,r15d + xor r14d,r8d + add edx,r13d + xor edi,r9d + ror r14d,2 + add r11d,edx + add edx,edi + mov r13d,r11d + add r14d,edx + ror r13d,14 + mov edx,r14d + mov r12d,eax + ror r14d,9 + xor r13d,r11d + xor r12d,ebx + ror r13d,5 + xor r14d,edx + and r12d,r11d + xor r13d,r11d + add ecx,DWORD[20+rsp] + mov edi,edx + xor r12d,ebx + ror r14d,11 + xor edi,r8d + add ecx,r12d + ror r13d,6 + and r15d,edi + xor r14d,edx + add ecx,r13d + xor r15d,r8d + ror r14d,2 + add r10d,ecx + add ecx,r15d + mov r13d,r10d + add r14d,ecx + ror r13d,14 + mov ecx,r14d + mov r12d,r11d + ror r14d,9 + xor r13d,r10d + xor r12d,eax + ror r13d,5 + xor r14d,ecx + and r12d,r10d + xor r13d,r10d + add ebx,DWORD[24+rsp] + mov r15d,ecx + xor r12d,eax + ror r14d,11 + xor r15d,edx + add ebx,r12d + ror r13d,6 + and edi,r15d + xor r14d,ecx + add ebx,r13d + xor edi,edx + ror r14d,2 + add r9d,ebx + add ebx,edi + mov r13d,r9d + add r14d,ebx + ror r13d,14 + mov ebx,r14d + mov r12d,r10d + ror r14d,9 + xor r13d,r9d + xor r12d,r11d + ror r13d,5 + xor r14d,ebx + and r12d,r9d + xor r13d,r9d + add eax,DWORD[28+rsp] + mov edi,ebx + xor r12d,r11d + ror r14d,11 + xor edi,ecx + add eax,r12d + ror r13d,6 + and r15d,edi + xor r14d,ebx + add eax,r13d + xor r15d,ecx + ror r14d,2 + add r8d,eax + add eax,r15d + mov r13d,r8d + add r14d,eax + ror r13d,14 + mov eax,r14d + mov r12d,r9d + ror r14d,9 + xor r13d,r8d + xor r12d,r10d + ror r13d,5 + xor r14d,eax + and r12d,r8d + xor r13d,r8d + add r11d,DWORD[32+rsp] + mov r15d,eax + xor r12d,r10d + ror r14d,11 + xor r15d,ebx + add r11d,r12d + ror r13d,6 + and edi,r15d + xor r14d,eax + add r11d,r13d + xor edi,ebx + ror r14d,2 + add edx,r11d + add r11d,edi + mov r13d,edx + add r14d,r11d + ror r13d,14 + mov r11d,r14d + mov r12d,r8d + ror r14d,9 + xor r13d,edx + xor r12d,r9d + ror r13d,5 + xor r14d,r11d + and r12d,edx + xor r13d,edx + add r10d,DWORD[36+rsp] + mov edi,r11d + xor r12d,r9d + ror r14d,11 + xor edi,eax + add r10d,r12d + ror r13d,6 + and r15d,edi + xor r14d,r11d + add r10d,r13d + xor r15d,eax + ror r14d,2 + add ecx,r10d + add r10d,r15d + mov r13d,ecx + add r14d,r10d + ror r13d,14 + mov r10d,r14d + mov r12d,edx + ror r14d,9 + xor r13d,ecx + xor r12d,r8d + ror r13d,5 + xor r14d,r10d + and r12d,ecx + xor r13d,ecx + add r9d,DWORD[40+rsp] + mov r15d,r10d + xor r12d,r8d + ror r14d,11 + xor r15d,r11d + add r9d,r12d + ror r13d,6 + and edi,r15d + xor r14d,r10d + add r9d,r13d + xor edi,r11d + ror r14d,2 + add ebx,r9d + add r9d,edi + mov r13d,ebx + add r14d,r9d + ror r13d,14 + mov r9d,r14d + mov r12d,ecx + ror r14d,9 + xor r13d,ebx + xor r12d,edx + ror r13d,5 + xor r14d,r9d + and r12d,ebx + xor r13d,ebx + add r8d,DWORD[44+rsp] + mov edi,r9d + xor r12d,edx + ror r14d,11 + xor edi,r10d + add r8d,r12d + ror r13d,6 + and r15d,edi + xor r14d,r9d + add r8d,r13d + xor r15d,r10d + ror r14d,2 + add eax,r8d + add r8d,r15d + mov r13d,eax + add r14d,r8d + ror r13d,14 + mov r8d,r14d + mov r12d,ebx + ror r14d,9 + xor r13d,eax + xor r12d,ecx + ror r13d,5 + xor r14d,r8d + and r12d,eax + xor r13d,eax + add edx,DWORD[48+rsp] + mov r15d,r8d + xor r12d,ecx + ror r14d,11 + xor r15d,r9d + add edx,r12d + ror r13d,6 + and edi,r15d + xor r14d,r8d + add edx,r13d + xor edi,r9d + ror r14d,2 + add r11d,edx + add edx,edi + mov r13d,r11d + add r14d,edx + ror r13d,14 + mov edx,r14d + mov r12d,eax + ror r14d,9 + xor r13d,r11d + xor r12d,ebx + ror r13d,5 + xor r14d,edx + and r12d,r11d + xor r13d,r11d + add ecx,DWORD[52+rsp] + mov edi,edx + xor r12d,ebx + ror r14d,11 + xor edi,r8d + add ecx,r12d + ror r13d,6 + and r15d,edi + xor r14d,edx + add ecx,r13d + xor r15d,r8d + ror r14d,2 + add r10d,ecx + add ecx,r15d + mov r13d,r10d + add r14d,ecx + ror r13d,14 + mov ecx,r14d + mov r12d,r11d + ror r14d,9 + xor r13d,r10d + xor r12d,eax + ror r13d,5 + xor r14d,ecx + and r12d,r10d + xor r13d,r10d + add ebx,DWORD[56+rsp] + mov r15d,ecx + xor r12d,eax + ror r14d,11 + xor r15d,edx + add ebx,r12d + ror r13d,6 + and edi,r15d + xor r14d,ecx + add ebx,r13d + xor edi,edx + ror r14d,2 + add r9d,ebx + add ebx,edi + mov r13d,r9d + add r14d,ebx + ror r13d,14 + mov ebx,r14d + mov r12d,r10d + ror r14d,9 + xor r13d,r9d + xor r12d,r11d + ror r13d,5 + xor r14d,ebx + and r12d,r9d + xor r13d,r9d + add eax,DWORD[60+rsp] + mov edi,ebx + xor r12d,r11d + ror r14d,11 + xor edi,ecx + add eax,r12d + ror r13d,6 + and r15d,edi + xor r14d,ebx + add eax,r13d + xor r15d,ecx + ror r14d,2 + add r8d,eax + add eax,r15d + mov r13d,r8d + add r14d,eax + mov rdi,QWORD[((64+0))+rsp] + mov eax,r14d + + add eax,DWORD[rdi] + lea rsi,[64+rsi] + add ebx,DWORD[4+rdi] + add ecx,DWORD[8+rdi] + add edx,DWORD[12+rdi] + add r8d,DWORD[16+rdi] + add r9d,DWORD[20+rdi] + add r10d,DWORD[24+rdi] + add r11d,DWORD[28+rdi] + + cmp rsi,QWORD[((64+16))+rsp] + + mov DWORD[rdi],eax + mov DWORD[4+rdi],ebx + mov DWORD[8+rdi],ecx + mov DWORD[12+rdi],edx + mov DWORD[16+rdi],r8d + mov DWORD[20+rdi],r9d + mov DWORD[24+rdi],r10d + mov DWORD[28+rdi],r11d + jb NEAR $L$loop_ssse3 + + mov rsi,QWORD[((64+24))+rsp] + movaps xmm6,XMMWORD[((64+32))+rsp] + movaps xmm7,XMMWORD[((64+48))+rsp] + movaps xmm8,XMMWORD[((64+64))+rsp] + movaps xmm9,XMMWORD[((64+80))+rsp] + mov r15,QWORD[rsi] + mov r14,QWORD[8+rsi] + mov r13,QWORD[16+rsi] + mov r12,QWORD[24+rsi] + mov rbp,QWORD[32+rsi] + mov rbx,QWORD[40+rsi] + lea rsp,[48+rsi] +$L$epilogue_ssse3: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_sha256_block_data_order_ssse3: +EXTERN __imp_RtlVirtualUnwind + +ALIGN 16 +se_handler: + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD[120+r8] + mov rbx,QWORD[248+r8] + + mov rsi,QWORD[8+r9] + mov r11,QWORD[56+r9] + + mov r10d,DWORD[r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jb NEAR $L$in_prologue + + mov rax,QWORD[152+r8] + + mov r10d,DWORD[4+r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jae NEAR $L$in_prologue + mov rsi,rax + mov rax,QWORD[((64+24))+rax] + lea rax,[48+rax] + + mov rbx,QWORD[((-8))+rax] + mov rbp,QWORD[((-16))+rax] + mov r12,QWORD[((-24))+rax] + mov r13,QWORD[((-32))+rax] + mov r14,QWORD[((-40))+rax] + mov r15,QWORD[((-48))+rax] + mov QWORD[144+r8],rbx + mov QWORD[160+r8],rbp + mov QWORD[216+r8],r12 + mov QWORD[224+r8],r13 + mov QWORD[232+r8],r14 + mov QWORD[240+r8],r15 + + lea r10,[$L$epilogue] + cmp rbx,r10 + jb NEAR $L$in_prologue + + lea rsi,[((64+32))+rsi] + lea rdi,[512+r8] + mov ecx,8 + DD 0xa548f3fc + +$L$in_prologue: + mov rdi,QWORD[8+rax] + mov rsi,QWORD[16+rax] + mov QWORD[152+r8],rax + mov QWORD[168+r8],rsi + mov QWORD[176+r8],rdi + + mov rdi,QWORD[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0xa548f3fc + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD[8+rsi] + mov r8,QWORD[rsi] + mov r9,QWORD[16+rsi] + mov r10,QWORD[40+rsi] + lea r11,[56+rsi] + lea r12,[24+rsi] + mov QWORD[32+rsp],r10 + mov QWORD[40+rsp],r11 + mov QWORD[48+rsp],r12 + mov QWORD[56+rsp],rcx + call QWORD[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret + +section .pdata rdata align=4 +ALIGN 4 + DD $L$SEH_begin_sha256_block_data_order wrt ..imagebase + DD $L$SEH_end_sha256_block_data_order wrt ..imagebase + DD $L$SEH_info_sha256_block_data_order wrt ..imagebase + DD $L$SEH_begin_sha256_block_data_order_ssse3 wrt ..imagebase + DD $L$SEH_end_sha256_block_data_order_ssse3 wrt ..imagebase + DD $L$SEH_info_sha256_block_data_order_ssse3 wrt ..imagebase +section .xdata rdata align=8 +ALIGN 8 +$L$SEH_info_sha256_block_data_order: +DB 9,0,0,0 + DD se_handler wrt ..imagebase + DD $L$prologue wrt ..imagebase,$L$epilogue wrt ..imagebase +$L$SEH_info_sha256_block_data_order_ssse3: +DB 9,0,0,0 + DD se_handler wrt ..imagebase + DD $L$prologue_ssse3 wrt ..imagebase,$L$epilogue_ssse3 wrt ..imagebase diff --git a/third_party/boringssl/win-x86_64/crypto/sha/sha512-x86_64.asm b/third_party/boringssl/win-x86_64/crypto/sha/sha512-x86_64.asm new file mode 100644 index 00000000000..b76cc0edb96 --- /dev/null +++ b/third_party/boringssl/win-x86_64/crypto/sha/sha512-x86_64.asm @@ -0,0 +1,1911 @@ +default rel +%define XMMWORD +%define YMMWORD +%define ZMMWORD +section .text code align=64 + + +EXTERN OPENSSL_ia32cap_P +global sha512_block_data_order + +ALIGN 16 +sha512_block_data_order: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_sha512_block_data_order: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + mov r11,rsp + shl rdx,4 + sub rsp,16*8+4*8 + lea rdx,[rdx*8+rsi] + and rsp,-64 + mov QWORD[((128+0))+rsp],rdi + mov QWORD[((128+8))+rsp],rsi + mov QWORD[((128+16))+rsp],rdx + mov QWORD[((128+24))+rsp],r11 +$L$prologue: + + mov rax,QWORD[rdi] + mov rbx,QWORD[8+rdi] + mov rcx,QWORD[16+rdi] + mov rdx,QWORD[24+rdi] + mov r8,QWORD[32+rdi] + mov r9,QWORD[40+rdi] + mov r10,QWORD[48+rdi] + mov r11,QWORD[56+rdi] + jmp NEAR $L$loop + +ALIGN 16 +$L$loop: + mov rdi,rbx + lea rbp,[K512] + xor rdi,rcx + mov r12,QWORD[rsi] + mov r13,r8 + mov r14,rax + bswap r12 + ror r13,23 + mov r15,r9 + + xor r13,r8 + ror r14,5 + xor r15,r10 + + mov QWORD[rsp],r12 + xor r14,rax + and r15,r8 + + ror r13,4 + add r12,r11 + xor r15,r10 + + ror r14,6 + xor r13,r8 + add r12,r15 + + mov r15,rax + add r12,QWORD[rbp] + xor r14,rax + + xor r15,rbx + ror r13,14 + mov r11,rbx + + and rdi,r15 + ror r14,28 + add r12,r13 + + xor r11,rdi + add rdx,r12 + add r11,r12 + + lea rbp,[8+rbp] + add r11,r14 + mov r12,QWORD[8+rsi] + mov r13,rdx + mov r14,r11 + bswap r12 + ror r13,23 + mov rdi,r8 + + xor r13,rdx + ror r14,5 + xor rdi,r9 + + mov QWORD[8+rsp],r12 + xor r14,r11 + and rdi,rdx + + ror r13,4 + add r12,r10 + xor rdi,r9 + + ror r14,6 + xor r13,rdx + add r12,rdi + + mov rdi,r11 + add r12,QWORD[rbp] + xor r14,r11 + + xor rdi,rax + ror r13,14 + mov r10,rax + + and r15,rdi + ror r14,28 + add r12,r13 + + xor r10,r15 + add rcx,r12 + add r10,r12 + + lea rbp,[24+rbp] + add r10,r14 + mov r12,QWORD[16+rsi] + mov r13,rcx + mov r14,r10 + bswap r12 + ror r13,23 + mov r15,rdx + + xor r13,rcx + ror r14,5 + xor r15,r8 + + mov QWORD[16+rsp],r12 + xor r14,r10 + and r15,rcx + + ror r13,4 + add r12,r9 + xor r15,r8 + + ror r14,6 + xor r13,rcx + add r12,r15 + + mov r15,r10 + add r12,QWORD[rbp] + xor r14,r10 + + xor r15,r11 + ror r13,14 + mov r9,r11 + + and rdi,r15 + ror r14,28 + add r12,r13 + + xor r9,rdi + add rbx,r12 + add r9,r12 + + lea rbp,[8+rbp] + add r9,r14 + mov r12,QWORD[24+rsi] + mov r13,rbx + mov r14,r9 + bswap r12 + ror r13,23 + mov rdi,rcx + + xor r13,rbx + ror r14,5 + xor rdi,rdx + + mov QWORD[24+rsp],r12 + xor r14,r9 + and rdi,rbx + + ror r13,4 + add r12,r8 + xor rdi,rdx + + ror r14,6 + xor r13,rbx + add r12,rdi + + mov rdi,r9 + add r12,QWORD[rbp] + xor r14,r9 + + xor rdi,r10 + ror r13,14 + mov r8,r10 + + and r15,rdi + ror r14,28 + add r12,r13 + + xor r8,r15 + add rax,r12 + add r8,r12 + + lea rbp,[24+rbp] + add r8,r14 + mov r12,QWORD[32+rsi] + mov r13,rax + mov r14,r8 + bswap r12 + ror r13,23 + mov r15,rbx + + xor r13,rax + ror r14,5 + xor r15,rcx + + mov QWORD[32+rsp],r12 + xor r14,r8 + and r15,rax + + ror r13,4 + add r12,rdx + xor r15,rcx + + ror r14,6 + xor r13,rax + add r12,r15 + + mov r15,r8 + add r12,QWORD[rbp] + xor r14,r8 + + xor r15,r9 + ror r13,14 + mov rdx,r9 + + and rdi,r15 + ror r14,28 + add r12,r13 + + xor rdx,rdi + add r11,r12 + add rdx,r12 + + lea rbp,[8+rbp] + add rdx,r14 + mov r12,QWORD[40+rsi] + mov r13,r11 + mov r14,rdx + bswap r12 + ror r13,23 + mov rdi,rax + + xor r13,r11 + ror r14,5 + xor rdi,rbx + + mov QWORD[40+rsp],r12 + xor r14,rdx + and rdi,r11 + + ror r13,4 + add r12,rcx + xor rdi,rbx + + ror r14,6 + xor r13,r11 + add r12,rdi + + mov rdi,rdx + add r12,QWORD[rbp] + xor r14,rdx + + xor rdi,r8 + ror r13,14 + mov rcx,r8 + + and r15,rdi + ror r14,28 + add r12,r13 + + xor rcx,r15 + add r10,r12 + add rcx,r12 + + lea rbp,[24+rbp] + add rcx,r14 + mov r12,QWORD[48+rsi] + mov r13,r10 + mov r14,rcx + bswap r12 + ror r13,23 + mov r15,r11 + + xor r13,r10 + ror r14,5 + xor r15,rax + + mov QWORD[48+rsp],r12 + xor r14,rcx + and r15,r10 + + ror r13,4 + add r12,rbx + xor r15,rax + + ror r14,6 + xor r13,r10 + add r12,r15 + + mov r15,rcx + add r12,QWORD[rbp] + xor r14,rcx + + xor r15,rdx + ror r13,14 + mov rbx,rdx + + and rdi,r15 + ror r14,28 + add r12,r13 + + xor rbx,rdi + add r9,r12 + add rbx,r12 + + lea rbp,[8+rbp] + add rbx,r14 + mov r12,QWORD[56+rsi] + mov r13,r9 + mov r14,rbx + bswap r12 + ror r13,23 + mov rdi,r10 + + xor r13,r9 + ror r14,5 + xor rdi,r11 + + mov QWORD[56+rsp],r12 + xor r14,rbx + and rdi,r9 + + ror r13,4 + add r12,rax + xor rdi,r11 + + ror r14,6 + xor r13,r9 + add r12,rdi + + mov rdi,rbx + add r12,QWORD[rbp] + xor r14,rbx + + xor rdi,rcx + ror r13,14 + mov rax,rcx + + and r15,rdi + ror r14,28 + add r12,r13 + + xor rax,r15 + add r8,r12 + add rax,r12 + + lea rbp,[24+rbp] + add rax,r14 + mov r12,QWORD[64+rsi] + mov r13,r8 + mov r14,rax + bswap r12 + ror r13,23 + mov r15,r9 + + xor r13,r8 + ror r14,5 + xor r15,r10 + + mov QWORD[64+rsp],r12 + xor r14,rax + and r15,r8 + + ror r13,4 + add r12,r11 + xor r15,r10 + + ror r14,6 + xor r13,r8 + add r12,r15 + + mov r15,rax + add r12,QWORD[rbp] + xor r14,rax + + xor r15,rbx + ror r13,14 + mov r11,rbx + + and rdi,r15 + ror r14,28 + add r12,r13 + + xor r11,rdi + add rdx,r12 + add r11,r12 + + lea rbp,[8+rbp] + add r11,r14 + mov r12,QWORD[72+rsi] + mov r13,rdx + mov r14,r11 + bswap r12 + ror r13,23 + mov rdi,r8 + + xor r13,rdx + ror r14,5 + xor rdi,r9 + + mov QWORD[72+rsp],r12 + xor r14,r11 + and rdi,rdx + + ror r13,4 + add r12,r10 + xor rdi,r9 + + ror r14,6 + xor r13,rdx + add r12,rdi + + mov rdi,r11 + add r12,QWORD[rbp] + xor r14,r11 + + xor rdi,rax + ror r13,14 + mov r10,rax + + and r15,rdi + ror r14,28 + add r12,r13 + + xor r10,r15 + add rcx,r12 + add r10,r12 + + lea rbp,[24+rbp] + add r10,r14 + mov r12,QWORD[80+rsi] + mov r13,rcx + mov r14,r10 + bswap r12 + ror r13,23 + mov r15,rdx + + xor r13,rcx + ror r14,5 + xor r15,r8 + + mov QWORD[80+rsp],r12 + xor r14,r10 + and r15,rcx + + ror r13,4 + add r12,r9 + xor r15,r8 + + ror r14,6 + xor r13,rcx + add r12,r15 + + mov r15,r10 + add r12,QWORD[rbp] + xor r14,r10 + + xor r15,r11 + ror r13,14 + mov r9,r11 + + and rdi,r15 + ror r14,28 + add r12,r13 + + xor r9,rdi + add rbx,r12 + add r9,r12 + + lea rbp,[8+rbp] + add r9,r14 + mov r12,QWORD[88+rsi] + mov r13,rbx + mov r14,r9 + bswap r12 + ror r13,23 + mov rdi,rcx + + xor r13,rbx + ror r14,5 + xor rdi,rdx + + mov QWORD[88+rsp],r12 + xor r14,r9 + and rdi,rbx + + ror r13,4 + add r12,r8 + xor rdi,rdx + + ror r14,6 + xor r13,rbx + add r12,rdi + + mov rdi,r9 + add r12,QWORD[rbp] + xor r14,r9 + + xor rdi,r10 + ror r13,14 + mov r8,r10 + + and r15,rdi + ror r14,28 + add r12,r13 + + xor r8,r15 + add rax,r12 + add r8,r12 + + lea rbp,[24+rbp] + add r8,r14 + mov r12,QWORD[96+rsi] + mov r13,rax + mov r14,r8 + bswap r12 + ror r13,23 + mov r15,rbx + + xor r13,rax + ror r14,5 + xor r15,rcx + + mov QWORD[96+rsp],r12 + xor r14,r8 + and r15,rax + + ror r13,4 + add r12,rdx + xor r15,rcx + + ror r14,6 + xor r13,rax + add r12,r15 + + mov r15,r8 + add r12,QWORD[rbp] + xor r14,r8 + + xor r15,r9 + ror r13,14 + mov rdx,r9 + + and rdi,r15 + ror r14,28 + add r12,r13 + + xor rdx,rdi + add r11,r12 + add rdx,r12 + + lea rbp,[8+rbp] + add rdx,r14 + mov r12,QWORD[104+rsi] + mov r13,r11 + mov r14,rdx + bswap r12 + ror r13,23 + mov rdi,rax + + xor r13,r11 + ror r14,5 + xor rdi,rbx + + mov QWORD[104+rsp],r12 + xor r14,rdx + and rdi,r11 + + ror r13,4 + add r12,rcx + xor rdi,rbx + + ror r14,6 + xor r13,r11 + add r12,rdi + + mov rdi,rdx + add r12,QWORD[rbp] + xor r14,rdx + + xor rdi,r8 + ror r13,14 + mov rcx,r8 + + and r15,rdi + ror r14,28 + add r12,r13 + + xor rcx,r15 + add r10,r12 + add rcx,r12 + + lea rbp,[24+rbp] + add rcx,r14 + mov r12,QWORD[112+rsi] + mov r13,r10 + mov r14,rcx + bswap r12 + ror r13,23 + mov r15,r11 + + xor r13,r10 + ror r14,5 + xor r15,rax + + mov QWORD[112+rsp],r12 + xor r14,rcx + and r15,r10 + + ror r13,4 + add r12,rbx + xor r15,rax + + ror r14,6 + xor r13,r10 + add r12,r15 + + mov r15,rcx + add r12,QWORD[rbp] + xor r14,rcx + + xor r15,rdx + ror r13,14 + mov rbx,rdx + + and rdi,r15 + ror r14,28 + add r12,r13 + + xor rbx,rdi + add r9,r12 + add rbx,r12 + + lea rbp,[8+rbp] + add rbx,r14 + mov r12,QWORD[120+rsi] + mov r13,r9 + mov r14,rbx + bswap r12 + ror r13,23 + mov rdi,r10 + + xor r13,r9 + ror r14,5 + xor rdi,r11 + + mov QWORD[120+rsp],r12 + xor r14,rbx + and rdi,r9 + + ror r13,4 + add r12,rax + xor rdi,r11 + + ror r14,6 + xor r13,r9 + add r12,rdi + + mov rdi,rbx + add r12,QWORD[rbp] + xor r14,rbx + + xor rdi,rcx + ror r13,14 + mov rax,rcx + + and r15,rdi + ror r14,28 + add r12,r13 + + xor rax,r15 + add r8,r12 + add rax,r12 + + lea rbp,[24+rbp] + jmp NEAR $L$rounds_16_xx +ALIGN 16 +$L$rounds_16_xx: + mov r13,QWORD[8+rsp] + mov r15,QWORD[112+rsp] + + mov r12,r13 + ror r13,7 + add rax,r14 + mov r14,r15 + ror r15,42 + + xor r13,r12 + shr r12,7 + ror r13,1 + xor r15,r14 + shr r14,6 + + ror r15,19 + xor r12,r13 + xor r15,r14 + add r12,QWORD[72+rsp] + + add r12,QWORD[rsp] + mov r13,r8 + add r12,r15 + mov r14,rax + ror r13,23 + mov r15,r9 + + xor r13,r8 + ror r14,5 + xor r15,r10 + + mov QWORD[rsp],r12 + xor r14,rax + and r15,r8 + + ror r13,4 + add r12,r11 + xor r15,r10 + + ror r14,6 + xor r13,r8 + add r12,r15 + + mov r15,rax + add r12,QWORD[rbp] + xor r14,rax + + xor r15,rbx + ror r13,14 + mov r11,rbx + + and rdi,r15 + ror r14,28 + add r12,r13 + + xor r11,rdi + add rdx,r12 + add r11,r12 + + lea rbp,[8+rbp] + mov r13,QWORD[16+rsp] + mov rdi,QWORD[120+rsp] + + mov r12,r13 + ror r13,7 + add r11,r14 + mov r14,rdi + ror rdi,42 + + xor r13,r12 + shr r12,7 + ror r13,1 + xor rdi,r14 + shr r14,6 + + ror rdi,19 + xor r12,r13 + xor rdi,r14 + add r12,QWORD[80+rsp] + + add r12,QWORD[8+rsp] + mov r13,rdx + add r12,rdi + mov r14,r11 + ror r13,23 + mov rdi,r8 + + xor r13,rdx + ror r14,5 + xor rdi,r9 + + mov QWORD[8+rsp],r12 + xor r14,r11 + and rdi,rdx + + ror r13,4 + add r12,r10 + xor rdi,r9 + + ror r14,6 + xor r13,rdx + add r12,rdi + + mov rdi,r11 + add r12,QWORD[rbp] + xor r14,r11 + + xor rdi,rax + ror r13,14 + mov r10,rax + + and r15,rdi + ror r14,28 + add r12,r13 + + xor r10,r15 + add rcx,r12 + add r10,r12 + + lea rbp,[24+rbp] + mov r13,QWORD[24+rsp] + mov r15,QWORD[rsp] + + mov r12,r13 + ror r13,7 + add r10,r14 + mov r14,r15 + ror r15,42 + + xor r13,r12 + shr r12,7 + ror r13,1 + xor r15,r14 + shr r14,6 + + ror r15,19 + xor r12,r13 + xor r15,r14 + add r12,QWORD[88+rsp] + + add r12,QWORD[16+rsp] + mov r13,rcx + add r12,r15 + mov r14,r10 + ror r13,23 + mov r15,rdx + + xor r13,rcx + ror r14,5 + xor r15,r8 + + mov QWORD[16+rsp],r12 + xor r14,r10 + and r15,rcx + + ror r13,4 + add r12,r9 + xor r15,r8 + + ror r14,6 + xor r13,rcx + add r12,r15 + + mov r15,r10 + add r12,QWORD[rbp] + xor r14,r10 + + xor r15,r11 + ror r13,14 + mov r9,r11 + + and rdi,r15 + ror r14,28 + add r12,r13 + + xor r9,rdi + add rbx,r12 + add r9,r12 + + lea rbp,[8+rbp] + mov r13,QWORD[32+rsp] + mov rdi,QWORD[8+rsp] + + mov r12,r13 + ror r13,7 + add r9,r14 + mov r14,rdi + ror rdi,42 + + xor r13,r12 + shr r12,7 + ror r13,1 + xor rdi,r14 + shr r14,6 + + ror rdi,19 + xor r12,r13 + xor rdi,r14 + add r12,QWORD[96+rsp] + + add r12,QWORD[24+rsp] + mov r13,rbx + add r12,rdi + mov r14,r9 + ror r13,23 + mov rdi,rcx + + xor r13,rbx + ror r14,5 + xor rdi,rdx + + mov QWORD[24+rsp],r12 + xor r14,r9 + and rdi,rbx + + ror r13,4 + add r12,r8 + xor rdi,rdx + + ror r14,6 + xor r13,rbx + add r12,rdi + + mov rdi,r9 + add r12,QWORD[rbp] + xor r14,r9 + + xor rdi,r10 + ror r13,14 + mov r8,r10 + + and r15,rdi + ror r14,28 + add r12,r13 + + xor r8,r15 + add rax,r12 + add r8,r12 + + lea rbp,[24+rbp] + mov r13,QWORD[40+rsp] + mov r15,QWORD[16+rsp] + + mov r12,r13 + ror r13,7 + add r8,r14 + mov r14,r15 + ror r15,42 + + xor r13,r12 + shr r12,7 + ror r13,1 + xor r15,r14 + shr r14,6 + + ror r15,19 + xor r12,r13 + xor r15,r14 + add r12,QWORD[104+rsp] + + add r12,QWORD[32+rsp] + mov r13,rax + add r12,r15 + mov r14,r8 + ror r13,23 + mov r15,rbx + + xor r13,rax + ror r14,5 + xor r15,rcx + + mov QWORD[32+rsp],r12 + xor r14,r8 + and r15,rax + + ror r13,4 + add r12,rdx + xor r15,rcx + + ror r14,6 + xor r13,rax + add r12,r15 + + mov r15,r8 + add r12,QWORD[rbp] + xor r14,r8 + + xor r15,r9 + ror r13,14 + mov rdx,r9 + + and rdi,r15 + ror r14,28 + add r12,r13 + + xor rdx,rdi + add r11,r12 + add rdx,r12 + + lea rbp,[8+rbp] + mov r13,QWORD[48+rsp] + mov rdi,QWORD[24+rsp] + + mov r12,r13 + ror r13,7 + add rdx,r14 + mov r14,rdi + ror rdi,42 + + xor r13,r12 + shr r12,7 + ror r13,1 + xor rdi,r14 + shr r14,6 + + ror rdi,19 + xor r12,r13 + xor rdi,r14 + add r12,QWORD[112+rsp] + + add r12,QWORD[40+rsp] + mov r13,r11 + add r12,rdi + mov r14,rdx + ror r13,23 + mov rdi,rax + + xor r13,r11 + ror r14,5 + xor rdi,rbx + + mov QWORD[40+rsp],r12 + xor r14,rdx + and rdi,r11 + + ror r13,4 + add r12,rcx + xor rdi,rbx + + ror r14,6 + xor r13,r11 + add r12,rdi + + mov rdi,rdx + add r12,QWORD[rbp] + xor r14,rdx + + xor rdi,r8 + ror r13,14 + mov rcx,r8 + + and r15,rdi + ror r14,28 + add r12,r13 + + xor rcx,r15 + add r10,r12 + add rcx,r12 + + lea rbp,[24+rbp] + mov r13,QWORD[56+rsp] + mov r15,QWORD[32+rsp] + + mov r12,r13 + ror r13,7 + add rcx,r14 + mov r14,r15 + ror r15,42 + + xor r13,r12 + shr r12,7 + ror r13,1 + xor r15,r14 + shr r14,6 + + ror r15,19 + xor r12,r13 + xor r15,r14 + add r12,QWORD[120+rsp] + + add r12,QWORD[48+rsp] + mov r13,r10 + add r12,r15 + mov r14,rcx + ror r13,23 + mov r15,r11 + + xor r13,r10 + ror r14,5 + xor r15,rax + + mov QWORD[48+rsp],r12 + xor r14,rcx + and r15,r10 + + ror r13,4 + add r12,rbx + xor r15,rax + + ror r14,6 + xor r13,r10 + add r12,r15 + + mov r15,rcx + add r12,QWORD[rbp] + xor r14,rcx + + xor r15,rdx + ror r13,14 + mov rbx,rdx + + and rdi,r15 + ror r14,28 + add r12,r13 + + xor rbx,rdi + add r9,r12 + add rbx,r12 + + lea rbp,[8+rbp] + mov r13,QWORD[64+rsp] + mov rdi,QWORD[40+rsp] + + mov r12,r13 + ror r13,7 + add rbx,r14 + mov r14,rdi + ror rdi,42 + + xor r13,r12 + shr r12,7 + ror r13,1 + xor rdi,r14 + shr r14,6 + + ror rdi,19 + xor r12,r13 + xor rdi,r14 + add r12,QWORD[rsp] + + add r12,QWORD[56+rsp] + mov r13,r9 + add r12,rdi + mov r14,rbx + ror r13,23 + mov rdi,r10 + + xor r13,r9 + ror r14,5 + xor rdi,r11 + + mov QWORD[56+rsp],r12 + xor r14,rbx + and rdi,r9 + + ror r13,4 + add r12,rax + xor rdi,r11 + + ror r14,6 + xor r13,r9 + add r12,rdi + + mov rdi,rbx + add r12,QWORD[rbp] + xor r14,rbx + + xor rdi,rcx + ror r13,14 + mov rax,rcx + + and r15,rdi + ror r14,28 + add r12,r13 + + xor rax,r15 + add r8,r12 + add rax,r12 + + lea rbp,[24+rbp] + mov r13,QWORD[72+rsp] + mov r15,QWORD[48+rsp] + + mov r12,r13 + ror r13,7 + add rax,r14 + mov r14,r15 + ror r15,42 + + xor r13,r12 + shr r12,7 + ror r13,1 + xor r15,r14 + shr r14,6 + + ror r15,19 + xor r12,r13 + xor r15,r14 + add r12,QWORD[8+rsp] + + add r12,QWORD[64+rsp] + mov r13,r8 + add r12,r15 + mov r14,rax + ror r13,23 + mov r15,r9 + + xor r13,r8 + ror r14,5 + xor r15,r10 + + mov QWORD[64+rsp],r12 + xor r14,rax + and r15,r8 + + ror r13,4 + add r12,r11 + xor r15,r10 + + ror r14,6 + xor r13,r8 + add r12,r15 + + mov r15,rax + add r12,QWORD[rbp] + xor r14,rax + + xor r15,rbx + ror r13,14 + mov r11,rbx + + and rdi,r15 + ror r14,28 + add r12,r13 + + xor r11,rdi + add rdx,r12 + add r11,r12 + + lea rbp,[8+rbp] + mov r13,QWORD[80+rsp] + mov rdi,QWORD[56+rsp] + + mov r12,r13 + ror r13,7 + add r11,r14 + mov r14,rdi + ror rdi,42 + + xor r13,r12 + shr r12,7 + ror r13,1 + xor rdi,r14 + shr r14,6 + + ror rdi,19 + xor r12,r13 + xor rdi,r14 + add r12,QWORD[16+rsp] + + add r12,QWORD[72+rsp] + mov r13,rdx + add r12,rdi + mov r14,r11 + ror r13,23 + mov rdi,r8 + + xor r13,rdx + ror r14,5 + xor rdi,r9 + + mov QWORD[72+rsp],r12 + xor r14,r11 + and rdi,rdx + + ror r13,4 + add r12,r10 + xor rdi,r9 + + ror r14,6 + xor r13,rdx + add r12,rdi + + mov rdi,r11 + add r12,QWORD[rbp] + xor r14,r11 + + xor rdi,rax + ror r13,14 + mov r10,rax + + and r15,rdi + ror r14,28 + add r12,r13 + + xor r10,r15 + add rcx,r12 + add r10,r12 + + lea rbp,[24+rbp] + mov r13,QWORD[88+rsp] + mov r15,QWORD[64+rsp] + + mov r12,r13 + ror r13,7 + add r10,r14 + mov r14,r15 + ror r15,42 + + xor r13,r12 + shr r12,7 + ror r13,1 + xor r15,r14 + shr r14,6 + + ror r15,19 + xor r12,r13 + xor r15,r14 + add r12,QWORD[24+rsp] + + add r12,QWORD[80+rsp] + mov r13,rcx + add r12,r15 + mov r14,r10 + ror r13,23 + mov r15,rdx + + xor r13,rcx + ror r14,5 + xor r15,r8 + + mov QWORD[80+rsp],r12 + xor r14,r10 + and r15,rcx + + ror r13,4 + add r12,r9 + xor r15,r8 + + ror r14,6 + xor r13,rcx + add r12,r15 + + mov r15,r10 + add r12,QWORD[rbp] + xor r14,r10 + + xor r15,r11 + ror r13,14 + mov r9,r11 + + and rdi,r15 + ror r14,28 + add r12,r13 + + xor r9,rdi + add rbx,r12 + add r9,r12 + + lea rbp,[8+rbp] + mov r13,QWORD[96+rsp] + mov rdi,QWORD[72+rsp] + + mov r12,r13 + ror r13,7 + add r9,r14 + mov r14,rdi + ror rdi,42 + + xor r13,r12 + shr r12,7 + ror r13,1 + xor rdi,r14 + shr r14,6 + + ror rdi,19 + xor r12,r13 + xor rdi,r14 + add r12,QWORD[32+rsp] + + add r12,QWORD[88+rsp] + mov r13,rbx + add r12,rdi + mov r14,r9 + ror r13,23 + mov rdi,rcx + + xor r13,rbx + ror r14,5 + xor rdi,rdx + + mov QWORD[88+rsp],r12 + xor r14,r9 + and rdi,rbx + + ror r13,4 + add r12,r8 + xor rdi,rdx + + ror r14,6 + xor r13,rbx + add r12,rdi + + mov rdi,r9 + add r12,QWORD[rbp] + xor r14,r9 + + xor rdi,r10 + ror r13,14 + mov r8,r10 + + and r15,rdi + ror r14,28 + add r12,r13 + + xor r8,r15 + add rax,r12 + add r8,r12 + + lea rbp,[24+rbp] + mov r13,QWORD[104+rsp] + mov r15,QWORD[80+rsp] + + mov r12,r13 + ror r13,7 + add r8,r14 + mov r14,r15 + ror r15,42 + + xor r13,r12 + shr r12,7 + ror r13,1 + xor r15,r14 + shr r14,6 + + ror r15,19 + xor r12,r13 + xor r15,r14 + add r12,QWORD[40+rsp] + + add r12,QWORD[96+rsp] + mov r13,rax + add r12,r15 + mov r14,r8 + ror r13,23 + mov r15,rbx + + xor r13,rax + ror r14,5 + xor r15,rcx + + mov QWORD[96+rsp],r12 + xor r14,r8 + and r15,rax + + ror r13,4 + add r12,rdx + xor r15,rcx + + ror r14,6 + xor r13,rax + add r12,r15 + + mov r15,r8 + add r12,QWORD[rbp] + xor r14,r8 + + xor r15,r9 + ror r13,14 + mov rdx,r9 + + and rdi,r15 + ror r14,28 + add r12,r13 + + xor rdx,rdi + add r11,r12 + add rdx,r12 + + lea rbp,[8+rbp] + mov r13,QWORD[112+rsp] + mov rdi,QWORD[88+rsp] + + mov r12,r13 + ror r13,7 + add rdx,r14 + mov r14,rdi + ror rdi,42 + + xor r13,r12 + shr r12,7 + ror r13,1 + xor rdi,r14 + shr r14,6 + + ror rdi,19 + xor r12,r13 + xor rdi,r14 + add r12,QWORD[48+rsp] + + add r12,QWORD[104+rsp] + mov r13,r11 + add r12,rdi + mov r14,rdx + ror r13,23 + mov rdi,rax + + xor r13,r11 + ror r14,5 + xor rdi,rbx + + mov QWORD[104+rsp],r12 + xor r14,rdx + and rdi,r11 + + ror r13,4 + add r12,rcx + xor rdi,rbx + + ror r14,6 + xor r13,r11 + add r12,rdi + + mov rdi,rdx + add r12,QWORD[rbp] + xor r14,rdx + + xor rdi,r8 + ror r13,14 + mov rcx,r8 + + and r15,rdi + ror r14,28 + add r12,r13 + + xor rcx,r15 + add r10,r12 + add rcx,r12 + + lea rbp,[24+rbp] + mov r13,QWORD[120+rsp] + mov r15,QWORD[96+rsp] + + mov r12,r13 + ror r13,7 + add rcx,r14 + mov r14,r15 + ror r15,42 + + xor r13,r12 + shr r12,7 + ror r13,1 + xor r15,r14 + shr r14,6 + + ror r15,19 + xor r12,r13 + xor r15,r14 + add r12,QWORD[56+rsp] + + add r12,QWORD[112+rsp] + mov r13,r10 + add r12,r15 + mov r14,rcx + ror r13,23 + mov r15,r11 + + xor r13,r10 + ror r14,5 + xor r15,rax + + mov QWORD[112+rsp],r12 + xor r14,rcx + and r15,r10 + + ror r13,4 + add r12,rbx + xor r15,rax + + ror r14,6 + xor r13,r10 + add r12,r15 + + mov r15,rcx + add r12,QWORD[rbp] + xor r14,rcx + + xor r15,rdx + ror r13,14 + mov rbx,rdx + + and rdi,r15 + ror r14,28 + add r12,r13 + + xor rbx,rdi + add r9,r12 + add rbx,r12 + + lea rbp,[8+rbp] + mov r13,QWORD[rsp] + mov rdi,QWORD[104+rsp] + + mov r12,r13 + ror r13,7 + add rbx,r14 + mov r14,rdi + ror rdi,42 + + xor r13,r12 + shr r12,7 + ror r13,1 + xor rdi,r14 + shr r14,6 + + ror rdi,19 + xor r12,r13 + xor rdi,r14 + add r12,QWORD[64+rsp] + + add r12,QWORD[120+rsp] + mov r13,r9 + add r12,rdi + mov r14,rbx + ror r13,23 + mov rdi,r10 + + xor r13,r9 + ror r14,5 + xor rdi,r11 + + mov QWORD[120+rsp],r12 + xor r14,rbx + and rdi,r9 + + ror r13,4 + add r12,rax + xor rdi,r11 + + ror r14,6 + xor r13,r9 + add r12,rdi + + mov rdi,rbx + add r12,QWORD[rbp] + xor r14,rbx + + xor rdi,rcx + ror r13,14 + mov rax,rcx + + and r15,rdi + ror r14,28 + add r12,r13 + + xor rax,r15 + add r8,r12 + add rax,r12 + + lea rbp,[24+rbp] + cmp BYTE[7+rbp],0 + jnz NEAR $L$rounds_16_xx + + mov rdi,QWORD[((128+0))+rsp] + add rax,r14 + lea rsi,[128+rsi] + + add rax,QWORD[rdi] + add rbx,QWORD[8+rdi] + add rcx,QWORD[16+rdi] + add rdx,QWORD[24+rdi] + add r8,QWORD[32+rdi] + add r9,QWORD[40+rdi] + add r10,QWORD[48+rdi] + add r11,QWORD[56+rdi] + + cmp rsi,QWORD[((128+16))+rsp] + + mov QWORD[rdi],rax + mov QWORD[8+rdi],rbx + mov QWORD[16+rdi],rcx + mov QWORD[24+rdi],rdx + mov QWORD[32+rdi],r8 + mov QWORD[40+rdi],r9 + mov QWORD[48+rdi],r10 + mov QWORD[56+rdi],r11 + jb NEAR $L$loop + + mov rsi,QWORD[((128+24))+rsp] + mov r15,QWORD[rsi] + mov r14,QWORD[8+rsi] + mov r13,QWORD[16+rsi] + mov r12,QWORD[24+rsi] + mov rbp,QWORD[32+rsi] + mov rbx,QWORD[40+rsi] + lea rsp,[48+rsi] +$L$epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_sha512_block_data_order: +ALIGN 64 + +K512: + DQ 0x428a2f98d728ae22,0x7137449123ef65cd + DQ 0x428a2f98d728ae22,0x7137449123ef65cd + DQ 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc + DQ 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc + DQ 0x3956c25bf348b538,0x59f111f1b605d019 + DQ 0x3956c25bf348b538,0x59f111f1b605d019 + DQ 0x923f82a4af194f9b,0xab1c5ed5da6d8118 + DQ 0x923f82a4af194f9b,0xab1c5ed5da6d8118 + DQ 0xd807aa98a3030242,0x12835b0145706fbe + DQ 0xd807aa98a3030242,0x12835b0145706fbe + DQ 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 + DQ 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 + DQ 0x72be5d74f27b896f,0x80deb1fe3b1696b1 + DQ 0x72be5d74f27b896f,0x80deb1fe3b1696b1 + DQ 0x9bdc06a725c71235,0xc19bf174cf692694 + DQ 0x9bdc06a725c71235,0xc19bf174cf692694 + DQ 0xe49b69c19ef14ad2,0xefbe4786384f25e3 + DQ 0xe49b69c19ef14ad2,0xefbe4786384f25e3 + DQ 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 + DQ 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 + DQ 0x2de92c6f592b0275,0x4a7484aa6ea6e483 + DQ 0x2de92c6f592b0275,0x4a7484aa6ea6e483 + DQ 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 + DQ 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 + DQ 0x983e5152ee66dfab,0xa831c66d2db43210 + DQ 0x983e5152ee66dfab,0xa831c66d2db43210 + DQ 0xb00327c898fb213f,0xbf597fc7beef0ee4 + DQ 0xb00327c898fb213f,0xbf597fc7beef0ee4 + DQ 0xc6e00bf33da88fc2,0xd5a79147930aa725 + DQ 0xc6e00bf33da88fc2,0xd5a79147930aa725 + DQ 0x06ca6351e003826f,0x142929670a0e6e70 + DQ 0x06ca6351e003826f,0x142929670a0e6e70 + DQ 0x27b70a8546d22ffc,0x2e1b21385c26c926 + DQ 0x27b70a8546d22ffc,0x2e1b21385c26c926 + DQ 0x4d2c6dfc5ac42aed,0x53380d139d95b3df + DQ 0x4d2c6dfc5ac42aed,0x53380d139d95b3df + DQ 0x650a73548baf63de,0x766a0abb3c77b2a8 + DQ 0x650a73548baf63de,0x766a0abb3c77b2a8 + DQ 0x81c2c92e47edaee6,0x92722c851482353b + DQ 0x81c2c92e47edaee6,0x92722c851482353b + DQ 0xa2bfe8a14cf10364,0xa81a664bbc423001 + DQ 0xa2bfe8a14cf10364,0xa81a664bbc423001 + DQ 0xc24b8b70d0f89791,0xc76c51a30654be30 + DQ 0xc24b8b70d0f89791,0xc76c51a30654be30 + DQ 0xd192e819d6ef5218,0xd69906245565a910 + DQ 0xd192e819d6ef5218,0xd69906245565a910 + DQ 0xf40e35855771202a,0x106aa07032bbd1b8 + DQ 0xf40e35855771202a,0x106aa07032bbd1b8 + DQ 0x19a4c116b8d2d0c8,0x1e376c085141ab53 + DQ 0x19a4c116b8d2d0c8,0x1e376c085141ab53 + DQ 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 + DQ 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 + DQ 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb + DQ 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb + DQ 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 + DQ 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 + DQ 0x748f82ee5defb2fc,0x78a5636f43172f60 + DQ 0x748f82ee5defb2fc,0x78a5636f43172f60 + DQ 0x84c87814a1f0ab72,0x8cc702081a6439ec + DQ 0x84c87814a1f0ab72,0x8cc702081a6439ec + DQ 0x90befffa23631e28,0xa4506cebde82bde9 + DQ 0x90befffa23631e28,0xa4506cebde82bde9 + DQ 0xbef9a3f7b2c67915,0xc67178f2e372532b + DQ 0xbef9a3f7b2c67915,0xc67178f2e372532b + DQ 0xca273eceea26619c,0xd186b8c721c0c207 + DQ 0xca273eceea26619c,0xd186b8c721c0c207 + DQ 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 + DQ 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 + DQ 0x06f067aa72176fba,0x0a637dc5a2c898a6 + DQ 0x06f067aa72176fba,0x0a637dc5a2c898a6 + DQ 0x113f9804bef90dae,0x1b710b35131c471b + DQ 0x113f9804bef90dae,0x1b710b35131c471b + DQ 0x28db77f523047d84,0x32caab7b40c72493 + DQ 0x28db77f523047d84,0x32caab7b40c72493 + DQ 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c + DQ 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c + DQ 0x4cc5d4becb3e42b6,0x597f299cfc657e2a + DQ 0x4cc5d4becb3e42b6,0x597f299cfc657e2a + DQ 0x5fcb6fab3ad6faec,0x6c44198c4a475817 + DQ 0x5fcb6fab3ad6faec,0x6c44198c4a475817 + + DQ 0x0001020304050607,0x08090a0b0c0d0e0f + DQ 0x0001020304050607,0x08090a0b0c0d0e0f +DB 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97 +DB 110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54 +DB 52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121 +DB 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46 +DB 111,114,103,62,0 +EXTERN __imp_RtlVirtualUnwind + +ALIGN 16 +se_handler: + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD[120+r8] + mov rbx,QWORD[248+r8] + + mov rsi,QWORD[8+r9] + mov r11,QWORD[56+r9] + + mov r10d,DWORD[r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jb NEAR $L$in_prologue + + mov rax,QWORD[152+r8] + + mov r10d,DWORD[4+r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jae NEAR $L$in_prologue + mov rsi,rax + mov rax,QWORD[((128+24))+rax] + lea rax,[48+rax] + + mov rbx,QWORD[((-8))+rax] + mov rbp,QWORD[((-16))+rax] + mov r12,QWORD[((-24))+rax] + mov r13,QWORD[((-32))+rax] + mov r14,QWORD[((-40))+rax] + mov r15,QWORD[((-48))+rax] + mov QWORD[144+r8],rbx + mov QWORD[160+r8],rbp + mov QWORD[216+r8],r12 + mov QWORD[224+r8],r13 + mov QWORD[232+r8],r14 + mov QWORD[240+r8],r15 + + lea r10,[$L$epilogue] + cmp rbx,r10 + jb NEAR $L$in_prologue + + lea rsi,[((128+32))+rsi] + lea rdi,[512+r8] + mov ecx,12 + DD 0xa548f3fc + +$L$in_prologue: + mov rdi,QWORD[8+rax] + mov rsi,QWORD[16+rax] + mov QWORD[152+r8],rax + mov QWORD[168+r8],rsi + mov QWORD[176+r8],rdi + + mov rdi,QWORD[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0xa548f3fc + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD[8+rsi] + mov r8,QWORD[rsi] + mov r9,QWORD[16+rsi] + mov r10,QWORD[40+rsi] + lea r11,[56+rsi] + lea r12,[24+rsi] + mov QWORD[32+rsp],r10 + mov QWORD[40+rsp],r11 + mov QWORD[48+rsp],r12 + mov QWORD[56+rsp],rcx + call QWORD[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret + +section .pdata rdata align=4 +ALIGN 4 + DD $L$SEH_begin_sha512_block_data_order wrt ..imagebase + DD $L$SEH_end_sha512_block_data_order wrt ..imagebase + DD $L$SEH_info_sha512_block_data_order wrt ..imagebase +section .xdata rdata align=8 +ALIGN 8 +$L$SEH_info_sha512_block_data_order: +DB 9,0,0,0 + DD se_handler wrt ..imagebase + DD $L$prologue wrt ..imagebase,$L$epilogue wrt ..imagebase