ossl: Add a VAES-based AES-GCM implementation for amd64

aes-gcm-avx512.S is generated from OpenSSL 3.1 and implements AES-GCM.
ossl_x86.c detects whether the CPU implements the required AVX512
instructions; if not, the ossl(4) module does not provide an AES-GCM
implementation.  The VAES implementation increases throughput for all
buffer sizes in both directions, up to 2x for sufficiently large
buffers.

The "process" implementation is in two parts: a generic OCF layer in
ossl_aes.c that calls a set of MD functions to do the heavy lifting.
The intent there is to make it possible to add other implementations for
other platforms, e.g., to reduce the diff required for D37421.

A follow-up commit will add a fallback path to legacy AES-NI, so that
ossl(4) can be used in preference to aesni(4) on all amd64 platforms.
In the long term we would like to replace aesni(4) and armv8crypto(4)
with ossl(4).

Note, currently this implementation will not be selected by default
since aesni(4) and ossl(4) return the same probe priority for crypto
sessions, and the opencrypto framework selects the first registered
implementation to break a tie.  Since aesni(4) is compiled into the
kernel, aesni(4) wins.  A separate change may modify ossl(4) to have
priority.

Sponsored by:	Stormshield
Sponsored by:	Klara, Inc.
Reviewed by:	jhb
MFC after:	3 months
Differential Revision:	https://reviews.freebsd.org/D39783
This commit is contained in:
Mark Johnston 2023-06-02 11:58:29 -04:00
parent 9ad8dc721e
commit 9a3444d91c
8 changed files with 136616 additions and 10 deletions

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,233 @@
/*
* Copyright 2010-2022 The OpenSSL Project Authors. All Rights Reserved.
* Copyright (c) 2021, Intel Corporation. All Rights Reserved.
*
* Licensed under the Apache License 2.0 (the "License"). You may not use
* this file except in compliance with the License. You can obtain a copy
* in the file LICENSE in the source distribution or at
* https://www.openssl.org/source/license.html
*/
/*
* This file contains a AES-GCM wrapper implementation from OpenSSL 3.1,
* targeting amd64 VAES extensions. This was ported from
* cipher_aes_gcm_hw_vaes_avx512.inc.
*/
#include <sys/endian.h>
#include <sys/systm.h>
#include <crypto/openssl/ossl.h>
#include <crypto/openssl/ossl_aes_gcm.h>
#include <crypto/openssl/ossl_cipher.h>
#include <opencrypto/cryptodev.h>
_Static_assert(
sizeof(struct ossl_gcm_context) <= sizeof(struct ossl_cipher_context),
"ossl_gcm_context too large");
void aesni_set_encrypt_key(const void *key, int bits, void *ctx);
static void
gcm_init(struct ossl_gcm_context *ctx, const void *key, size_t keylen)
{
KASSERT(keylen == 128 || keylen == 192 || keylen == 256,
("%s: invalid key length %zu", __func__, keylen));
memset(&ctx->gcm, 0, sizeof(ctx->gcm));
memset(&ctx->aes_ks, 0, sizeof(ctx->aes_ks));
aesni_set_encrypt_key(key, keylen, &ctx->aes_ks);
ctx->ops->init(ctx, key, keylen);
}
static void
gcm_tag(struct ossl_gcm_context *ctx, unsigned char *tag, size_t len)
{
(void)ctx->ops->finish(ctx, NULL, 0);
memcpy(tag, ctx->gcm.Xi.c, len);
}
void ossl_gcm_gmult_avx512(uint64_t Xi[2], void *gcm128ctx);
void ossl_aes_gcm_init_avx512(const void *ks, void *gcm128ctx);
void ossl_aes_gcm_setiv_avx512(const void *ks, void *gcm128ctx,
const unsigned char *iv, size_t ivlen);
void ossl_aes_gcm_update_aad_avx512(void *gcm128ctx, const unsigned char *aad,
size_t len);
void ossl_aes_gcm_encrypt_avx512(const void *ks, void *gcm128ctx,
unsigned int *pblocklen, const unsigned char *in, size_t len,
unsigned char *out);
void ossl_aes_gcm_decrypt_avx512(const void *ks, void *gcm128ctx,
unsigned int *pblocklen, const unsigned char *in, size_t len,
unsigned char *out);
void ossl_aes_gcm_finalize_avx512(void *gcm128ctx, unsigned int pblocklen);
static void
gcm_init_avx512(struct ossl_gcm_context *ctx, const void *key, size_t keylen)
{
ossl_aes_gcm_init_avx512(&ctx->aes_ks, &ctx->gcm);
}
static void
gcm_setiv_avx512(struct ossl_gcm_context *ctx, const unsigned char *iv,
size_t len)
{
KASSERT(len == AES_GCM_IV_LEN,
("%s: invalid IV length %zu", __func__, len));
ctx->gcm.Yi.u[0] = 0; /* Current counter */
ctx->gcm.Yi.u[1] = 0;
ctx->gcm.Xi.u[0] = 0; /* AAD hash */
ctx->gcm.Xi.u[1] = 0;
ctx->gcm.len.u[0] = 0; /* AAD length */
ctx->gcm.len.u[1] = 0; /* Message length */
ctx->gcm.ares = 0;
ctx->gcm.mres = 0;
ossl_aes_gcm_setiv_avx512(&ctx->aes_ks, ctx, iv, len);
}
static int
gcm_aad_avx512(struct ossl_gcm_context *ctx, const unsigned char *aad,
size_t len)
{
uint64_t alen = ctx->gcm.len.u[0];
size_t lenblks;
unsigned int ares;
/* Bad sequence: call of AAD update after message processing */
if (ctx->gcm.len.u[1])
return -2;
alen += len;
/* AAD is limited by 2^64 bits, thus 2^61 bytes */
if (alen > (1ull << 61) || (sizeof(len) == 8 && alen < len))
return -1;
ctx->gcm.len.u[0] = alen;
ares = ctx->gcm.ares;
/* Partial AAD block left from previous AAD update calls */
if (ares > 0) {
/*
* Fill partial block buffer till full block
* (note, the hash is stored reflected)
*/
while (ares > 0 && len > 0) {
ctx->gcm.Xi.c[15 - ares] ^= *(aad++);
--len;
ares = (ares + 1) % AES_BLOCK_LEN;
}
/* Full block gathered */
if (ares == 0) {
ossl_gcm_gmult_avx512(ctx->gcm.Xi.u, ctx);
} else { /* no more AAD */
ctx->gcm.ares = ares;
return 0;
}
}
/* Bulk AAD processing */
lenblks = len & ((size_t)(-AES_BLOCK_LEN));
if (lenblks > 0) {
ossl_aes_gcm_update_aad_avx512(ctx, aad, lenblks);
aad += lenblks;
len -= lenblks;
}
/* Add remaining AAD to the hash (note, the hash is stored reflected) */
if (len > 0) {
ares = (unsigned int)len;
for (size_t i = 0; i < len; ++i)
ctx->gcm.Xi.c[15 - i] ^= aad[i];
}
ctx->gcm.ares = ares;
return 0;
}
static int
_gcm_encrypt_avx512(struct ossl_gcm_context *ctx, const unsigned char *in,
unsigned char *out, size_t len, bool encrypt)
{
uint64_t mlen = ctx->gcm.len.u[1];
mlen += len;
if (mlen > ((1ull << 36) - 32) || (sizeof(len) == 8 && mlen < len))
return -1;
ctx->gcm.len.u[1] = mlen;
/* Finalize GHASH(AAD) if AAD partial blocks left unprocessed */
if (ctx->gcm.ares > 0) {
ossl_gcm_gmult_avx512(ctx->gcm.Xi.u, ctx);
ctx->gcm.ares = 0;
}
if (encrypt) {
ossl_aes_gcm_encrypt_avx512(&ctx->aes_ks, ctx, &ctx->gcm.mres,
in, len, out);
} else {
ossl_aes_gcm_decrypt_avx512(&ctx->aes_ks, ctx, &ctx->gcm.mres,
in, len, out);
}
return 0;
}
static int
gcm_encrypt_avx512(struct ossl_gcm_context *ctx, const unsigned char *in,
unsigned char *out, size_t len)
{
return _gcm_encrypt_avx512(ctx, in, out, len, true);
}
static int
gcm_decrypt_avx512(struct ossl_gcm_context *ctx, const unsigned char *in,
unsigned char *out, size_t len)
{
return _gcm_encrypt_avx512(ctx, in, out, len, false);
}
static int
gcm_finish_avx512(struct ossl_gcm_context *ctx, const unsigned char *tag,
size_t len)
{
unsigned int *res = &ctx->gcm.mres;
/* Finalize AAD processing */
if (ctx->gcm.ares > 0)
res = &ctx->gcm.ares;
ossl_aes_gcm_finalize_avx512(ctx, *res);
ctx->gcm.ares = ctx->gcm.mres = 0;
if (tag != NULL)
return timingsafe_bcmp(ctx->gcm.Xi.c, tag, len);
return 0;
}
static const struct ossl_aes_gcm_ops gcm_ops_avx512 = {
.init = gcm_init_avx512,
.setiv = gcm_setiv_avx512,
.aad = gcm_aad_avx512,
.encrypt = gcm_encrypt_avx512,
.decrypt = gcm_decrypt_avx512,
.finish = gcm_finish_avx512,
.tag = gcm_tag,
};
int ossl_aes_gcm_setkey_avx512(const unsigned char *key, int klen, void *_ctx);
int
ossl_aes_gcm_setkey_avx512(const unsigned char *key, int klen,
void *_ctx)
{
struct ossl_gcm_context *ctx;
ctx = _ctx;
ctx->ops = &gcm_ops_avx512;
gcm_init(ctx, key, klen);
return (0);
}

View file

@ -78,6 +78,8 @@ ossl_attach(device_t dev)
sc = device_get_softc(dev);
sc->has_aes = sc->has_aes_gcm = false;
ossl_cpuid(sc);
sc->sc_cid = crypto_get_driverid(dev, sizeof(struct ossl_session),
CRYPTOCAP_F_SOFTWARE | CRYPTOCAP_F_SYNC |
@ -144,6 +146,16 @@ ossl_lookup_cipher(const struct crypto_session_params *csp)
return (NULL);
}
return (&ossl_cipher_aes_cbc);
case CRYPTO_AES_NIST_GCM_16:
switch (csp->csp_cipher_klen * 8) {
case 128:
case 192:
case 256:
break;
default:
return (NULL);
}
return (&ossl_cipher_aes_gcm);
case CRYPTO_CHACHA20:
if (csp->csp_cipher_klen != CHACHA_KEY_SIZE)
return (NULL);
@ -183,6 +195,15 @@ ossl_probesession(device_t dev, const struct crypto_session_params *csp)
switch (csp->csp_cipher_alg) {
case CRYPTO_CHACHA20_POLY1305:
break;
case CRYPTO_AES_NIST_GCM_16:
if (!sc->has_aes_gcm || ossl_lookup_cipher(csp) == NULL)
return (EINVAL);
if (csp->csp_ivlen != AES_GCM_IV_LEN)
return (EINVAL);
if (csp->csp_auth_mlen != 0 &&
csp->csp_auth_mlen != GMAC_DIGEST_LEN)
return (EINVAL);
break;
default:
return (EINVAL);
}
@ -279,6 +300,11 @@ ossl_newsession(device_t dev, crypto_session_t cses,
ossl_newsession_hash(s, csp);
error = ossl_newsession_cipher(s, csp);
break;
case CSP_MODE_AEAD:
error = ossl_newsession_cipher(s, csp);
break;
default:
__assert_unreachable();
}
return (error);
@ -352,6 +378,13 @@ ossl_process_hash(struct ossl_session *s, struct cryptop *crp,
return (error);
}
static int
ossl_process_cipher(struct ossl_session *s, struct cryptop *crp,
const struct crypto_session_params *csp)
{
return (s->cipher.cipher->process(&s->cipher, crp, csp));
}
static int
ossl_process_eta(struct ossl_session *s, struct cryptop *crp,
const struct crypto_session_params *csp)
@ -371,6 +404,20 @@ ossl_process_eta(struct ossl_session *s, struct cryptop *crp,
return (error);
}
static int
ossl_process_aead(struct ossl_session *s, struct cryptop *crp,
const struct crypto_session_params *csp)
{
if (csp->csp_cipher_alg == CRYPTO_CHACHA20_POLY1305) {
if (CRYPTO_OP_IS_ENCRYPT(crp->crp_op))
return (ossl_chacha20_poly1305_encrypt(crp, csp));
else
return (ossl_chacha20_poly1305_decrypt(crp, csp));
} else {
return (s->cipher.cipher->process(&s->cipher, crp, csp));
}
}
static int
ossl_process(device_t dev, struct cryptop *crp, int hint)
{
@ -394,16 +441,13 @@ ossl_process(device_t dev, struct cryptop *crp, int hint)
error = ossl_process_hash(s, crp, csp);
break;
case CSP_MODE_CIPHER:
error = s->cipher.cipher->process(&s->cipher, crp, csp);
error = ossl_process_cipher(s, crp, csp);
break;
case CSP_MODE_ETA:
error = ossl_process_eta(s, crp, csp);
break;
case CSP_MODE_AEAD:
if (CRYPTO_OP_IS_ENCRYPT(crp->crp_op))
error = ossl_chacha20_poly1305_encrypt(crp, csp);
else
error = ossl_chacha20_poly1305_decrypt(crp, csp);
error = ossl_process_aead(s, crp, csp);
break;
default:
__assert_unreachable();

View file

@ -48,15 +48,16 @@ void ossl_cpuid(struct ossl_softc *sc);
struct ossl_softc {
int32_t sc_cid;
bool has_aes;
bool has_aes_gcm;
};
/* Needs to be big enough to hold any hash context. */
struct ossl_hash_context {
uint32_t dummy[61];
uint32_t dummy[196];
} __aligned(32);
struct ossl_cipher_context {
uint32_t dummy[61];
uint32_t dummy[196];
} __aligned(32);
struct ossl_session_hash {
@ -85,6 +86,7 @@ extern struct auth_hash ossl_hash_sha384;
extern struct auth_hash ossl_hash_sha512;
extern struct ossl_cipher ossl_cipher_aes_cbc;
extern struct ossl_cipher ossl_cipher_aes_gcm;
extern struct ossl_cipher ossl_cipher_chacha20;
#endif /* !__OSSL_H__ */

View file

@ -32,8 +32,10 @@ __FBSDID("$FreeBSD$");
#include <sys/malloc.h>
#include <opencrypto/cryptodev.h>
#include <opencrypto/gmac.h>
#include <crypto/openssl/ossl.h>
#include <crypto/openssl/ossl_aes_gcm.h>
#include <crypto/openssl/ossl_cipher.h>
#if defined(__amd64__) || defined(__i386__)
@ -43,6 +45,7 @@ __FBSDID("$FreeBSD$");
#endif
static ossl_cipher_process_t ossl_aes_cbc;
static ossl_cipher_process_t ossl_aes_gcm;
struct ossl_cipher ossl_cipher_aes_cbc = {
.type = CRYPTO_AES_CBC,
@ -55,6 +58,17 @@ struct ossl_cipher ossl_cipher_aes_cbc = {
.process = ossl_aes_cbc
};
struct ossl_cipher ossl_cipher_aes_gcm = {
.type = CRYPTO_AES_NIST_GCM_16,
.blocksize = 1,
.ivsize = AES_GCM_IV_LEN,
/* Filled during initialization based on CPU caps. */
.set_encrypt_key = NULL,
.set_decrypt_key = NULL,
.process = ossl_aes_gcm,
};
static int
ossl_aes_cbc(struct ossl_session_cipher *s, struct cryptop *crp,
const struct crypto_session_params *csp)
@ -151,3 +165,92 @@ ossl_aes_cbc(struct ossl_session_cipher *s, struct cryptop *crp,
explicit_bzero(&key, sizeof(key));
return (0);
}
static int
ossl_aes_gcm(struct ossl_session_cipher *s, struct cryptop *crp,
const struct crypto_session_params *csp)
{
struct ossl_cipher_context key;
struct crypto_buffer_cursor cc_in, cc_out;
unsigned char iv[AES_BLOCK_LEN], tag[AES_BLOCK_LEN];
struct ossl_gcm_context *ctx;
const unsigned char *inseg;
unsigned char *outseg;
size_t inlen, outlen, seglen;
int error;
bool encrypt;
encrypt = CRYPTO_OP_IS_ENCRYPT(crp->crp_op);
if (crp->crp_cipher_key != NULL) {
if (encrypt)
error = s->cipher->set_encrypt_key(crp->crp_cipher_key,
8 * csp->csp_cipher_klen, &key);
else
error = s->cipher->set_decrypt_key(crp->crp_cipher_key,
8 * csp->csp_cipher_klen, &key);
if (error)
return (error);
ctx = (struct ossl_gcm_context *)&key;
} else if (encrypt) {
ctx = (struct ossl_gcm_context *)&s->enc_ctx;
} else {
ctx = (struct ossl_gcm_context *)&s->dec_ctx;
}
crypto_read_iv(crp, iv);
ctx->ops->setiv(ctx, iv, csp->csp_ivlen);
crypto_cursor_init(&cc_in, &crp->crp_buf);
crypto_cursor_advance(&cc_in, crp->crp_aad_start);
for (size_t alen = crp->crp_aad_length; alen > 0; alen -= seglen) {
inseg = crypto_cursor_segment(&cc_in, &inlen);
seglen = MIN(alen, inlen);
if (ctx->ops->aad(ctx, inseg, seglen) != 0)
return (EINVAL);
crypto_cursor_advance(&cc_in, seglen);
}
crypto_cursor_init(&cc_in, &crp->crp_buf);
crypto_cursor_advance(&cc_in, crp->crp_payload_start);
if (CRYPTO_HAS_OUTPUT_BUFFER(crp)) {
crypto_cursor_init(&cc_out, &crp->crp_obuf);
crypto_cursor_advance(&cc_out, crp->crp_payload_output_start);
} else {
cc_out = cc_in;
}
for (size_t plen = crp->crp_payload_length; plen > 0; plen -= seglen) {
inseg = crypto_cursor_segment(&cc_in, &inlen);
outseg = crypto_cursor_segment(&cc_out, &outlen);
seglen = MIN(plen, MIN(inlen, outlen));
if (encrypt) {
if (ctx->ops->encrypt(ctx, inseg, outseg, seglen) != 0)
return (EINVAL);
} else {
if (ctx->ops->decrypt(ctx, inseg, outseg, seglen) != 0)
return (EINVAL);
}
crypto_cursor_advance(&cc_in, seglen);
crypto_cursor_advance(&cc_out, seglen);
}
error = 0;
if (encrypt) {
ctx->ops->tag(ctx, tag, GMAC_DIGEST_LEN);
crypto_copyback(crp, crp->crp_digest_start, GMAC_DIGEST_LEN,
tag);
} else {
crypto_copydata(crp, crp->crp_digest_start, GMAC_DIGEST_LEN,
tag);
if (ctx->ops->finish(ctx, tag, GMAC_DIGEST_LEN) != 0)
error = EBADMSG;
}
explicit_bzero(iv, sizeof(iv));
explicit_bzero(tag, sizeof(tag));
return (error);
}

View file

@ -0,0 +1,71 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2023 Stormshield
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef _OSSL_AES_GCM_H_
#define _OSSL_AES_GCM_H_
#include <crypto/openssl/ossl_cipher.h>
#include <crypto/rijndael/rijndael.h>
struct ossl_gcm_context;
struct ossl_aes_gcm_ops {
void (*init)(struct ossl_gcm_context *ctx, const void *key,
size_t keylen);
void (*setiv)(struct ossl_gcm_context *ctx, const unsigned char *iv,
size_t ivlen);
int (*aad)(struct ossl_gcm_context *ctx, const unsigned char *aad,
size_t len);
int (*encrypt)(struct ossl_gcm_context *ctx, const unsigned char *in,
unsigned char *out, size_t len);
int (*decrypt)(struct ossl_gcm_context *ctx, const unsigned char *in,
unsigned char *out, size_t len);
int (*finish)(struct ossl_gcm_context *ctx, const unsigned char *tag,
size_t len);
void (*tag)(struct ossl_gcm_context *ctx, unsigned char *tag,
size_t len);
};
struct ossl_gcm_context {
struct {
union {
uint64_t u[2];
uint32_t d[4];
uint8_t c[16];
} Yi, EKi, EK0, len, Xi, H;
__uint128_t Htable[16];
unsigned int mres, ares;
} gcm;
struct {
uint32_t ks[4 * (RIJNDAEL_MAXNR + 1)];
int rounds;
} aes_ks;
const struct ossl_aes_gcm_ops *ops;
};
#endif /* !_OSSL_AES_GCM_H_ */

View file

@ -39,6 +39,7 @@
#include <x86/specialreg.h>
#include <crypto/openssl/ossl.h>
#include <crypto/openssl/ossl_aes_gcm.h>
#include <crypto/openssl/ossl_cipher.h>
/*
@ -55,6 +56,11 @@ unsigned int OPENSSL_ia32cap_P[4];
ossl_cipher_setkey_t aesni_set_encrypt_key;
ossl_cipher_setkey_t aesni_set_decrypt_key;
#ifdef __amd64__
int ossl_vaes_vpclmulqdq_capable(void);
ossl_cipher_setkey_t ossl_aes_gcm_setkey_avx512;
#endif
void
ossl_cpuid(struct ossl_softc *sc)
{
@ -119,11 +125,24 @@ ossl_cpuid(struct ossl_softc *sc)
}
OPENSSL_ia32cap_P[3] = cpu_stdext_feature2;
if (!AESNI_CAPABLE) {
sc->has_aes = false;
if (!AESNI_CAPABLE)
return;
}
sc->has_aes = true;
ossl_cipher_aes_cbc.set_encrypt_key = aesni_set_encrypt_key;
ossl_cipher_aes_cbc.set_decrypt_key = aesni_set_decrypt_key;
#ifdef __amd64__
if (ossl_vaes_vpclmulqdq_capable()) {
ossl_cipher_aes_gcm.set_encrypt_key =
ossl_aes_gcm_setkey_avx512;
ossl_cipher_aes_gcm.set_decrypt_key =
ossl_aes_gcm_setkey_avx512;
sc->has_aes_gcm = true;
} else {
sc->has_aes_gcm = false;
}
#else
sc->has_aes_gcm = false;
#endif
}

View file

@ -27,12 +27,14 @@ SRCS.aarch64= \
ossl_aarch64.c
SRCS.amd64= \
aes-gcm-avx512.S \
aesni-x86_64.S \
chacha-x86_64.S \
poly1305-x86_64.S \
sha1-x86_64.S \
sha256-x86_64.S \
sha512-x86_64.S \
ossl_aes_gcm.c \
ossl_x86.c
SRCS.i386= \