linux/arch/arm/crypto/sha1_neon_glue.c
Jussi Kivilinna 604682551a ARM: 8119/1: crypto: sha1: add ARM NEON implementation
This patch adds ARM NEON assembly implementation of SHA-1 algorithm.

tcrypt benchmark results on Cortex-A8, sha1-arm-asm vs sha1-neon-asm:

block-size      bytes/update    old-vs-new
16              16              1.04x
64              16              1.02x
64              64              1.05x
256             16              1.03x
256             64              1.04x
256             256             1.30x
1024            16              1.03x
1024            256             1.36x
1024            1024            1.52x
2048            16              1.03x
2048            256             1.39x
2048            1024            1.55x
2048            2048            1.59x
4096            16              1.03x
4096            256             1.40x
4096            1024            1.57x
4096            4096            1.62x
8192            16              1.03x
8192            256             1.40x
8192            1024            1.58x
8192            4096            1.63x
8192            8192            1.63x

Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2014-08-02 08:51:47 +01:00

198 lines
4.8 KiB
C

/*
* Glue code for the SHA1 Secure Hash Algorithm assembler implementation using
* ARM NEON instructions.
*
* Copyright © 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi>
*
* This file is based on sha1_generic.c and sha1_ssse3_glue.c:
* Copyright (c) Alan Smithee.
* Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
* Copyright (c) Jean-Francois Dive <jef@linuxbe.org>
* Copyright (c) Mathias Krause <minipli@googlemail.com>
* Copyright (c) Chandramouli Narayanan <mouli@linux.intel.com>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
*/
#include <crypto/internal/hash.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/mm.h>
#include <linux/cryptohash.h>
#include <linux/types.h>
#include <crypto/sha.h>
#include <asm/byteorder.h>
#include <asm/neon.h>
#include <asm/simd.h>
#include <asm/crypto/sha1.h>
asmlinkage void sha1_transform_neon(void *state_h, const char *data,
unsigned int rounds);
static int sha1_neon_init(struct shash_desc *desc)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
*sctx = (struct sha1_state){
.state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 },
};
return 0;
}
static int __sha1_neon_update(struct shash_desc *desc, const u8 *data,
unsigned int len, unsigned int partial)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
unsigned int done = 0;
sctx->count += len;
if (partial) {
done = SHA1_BLOCK_SIZE - partial;
memcpy(sctx->buffer + partial, data, done);
sha1_transform_neon(sctx->state, sctx->buffer, 1);
}
if (len - done >= SHA1_BLOCK_SIZE) {
const unsigned int rounds = (len - done) / SHA1_BLOCK_SIZE;
sha1_transform_neon(sctx->state, data + done, rounds);
done += rounds * SHA1_BLOCK_SIZE;
}
memcpy(sctx->buffer, data + done, len - done);
return 0;
}
static int sha1_neon_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
unsigned int partial = sctx->count % SHA1_BLOCK_SIZE;
int res;
/* Handle the fast case right here */
if (partial + len < SHA1_BLOCK_SIZE) {
sctx->count += len;
memcpy(sctx->buffer + partial, data, len);
return 0;
}
if (!may_use_simd()) {
res = sha1_update_arm(desc, data, len);
} else {
kernel_neon_begin();
res = __sha1_neon_update(desc, data, len, partial);
kernel_neon_end();
}
return res;
}
/* Add padding and return the message digest. */
static int sha1_neon_final(struct shash_desc *desc, u8 *out)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
unsigned int i, index, padlen;
__be32 *dst = (__be32 *)out;
__be64 bits;
static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, };
bits = cpu_to_be64(sctx->count << 3);
/* Pad out to 56 mod 64 and append length */
index = sctx->count % SHA1_BLOCK_SIZE;
padlen = (index < 56) ? (56 - index) : ((SHA1_BLOCK_SIZE+56) - index);
if (!may_use_simd()) {
sha1_update_arm(desc, padding, padlen);
sha1_update_arm(desc, (const u8 *)&bits, sizeof(bits));
} else {
kernel_neon_begin();
/* We need to fill a whole block for __sha1_neon_update() */
if (padlen <= 56) {
sctx->count += padlen;
memcpy(sctx->buffer + index, padding, padlen);
} else {
__sha1_neon_update(desc, padding, padlen, index);
}
__sha1_neon_update(desc, (const u8 *)&bits, sizeof(bits), 56);
kernel_neon_end();
}
/* Store state in digest */
for (i = 0; i < 5; i++)
dst[i] = cpu_to_be32(sctx->state[i]);
/* Wipe context */
memset(sctx, 0, sizeof(*sctx));
return 0;
}
static int sha1_neon_export(struct shash_desc *desc, void *out)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
memcpy(out, sctx, sizeof(*sctx));
return 0;
}
static int sha1_neon_import(struct shash_desc *desc, const void *in)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
memcpy(sctx, in, sizeof(*sctx));
return 0;
}
static struct shash_alg alg = {
.digestsize = SHA1_DIGEST_SIZE,
.init = sha1_neon_init,
.update = sha1_neon_update,
.final = sha1_neon_final,
.export = sha1_neon_export,
.import = sha1_neon_import,
.descsize = sizeof(struct sha1_state),
.statesize = sizeof(struct sha1_state),
.base = {
.cra_name = "sha1",
.cra_driver_name = "sha1-neon",
.cra_priority = 250,
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA1_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
};
static int __init sha1_neon_mod_init(void)
{
if (!cpu_has_neon())
return -ENODEV;
return crypto_register_shash(&alg);
}
static void __exit sha1_neon_mod_fini(void)
{
crypto_unregister_shash(&alg);
}
module_init(sha1_neon_mod_init);
module_exit(sha1_neon_mod_fini);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm, NEON accelerated");
MODULE_ALIAS("sha1");