Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 1 | // SPDX-License-Identifier: GPL-2.0 |
| 2 | /* |
| 3 | * ARM NEON accelerated ChaCha and XChaCha stream ciphers, |
| 4 | * including ChaCha20 (RFC7539) |
| 5 | * |
| 6 | * Copyright (C) 2016-2019 Linaro, Ltd. <ard.biesheuvel@linaro.org> |
| 7 | * Copyright (C) 2015 Martin Willi |
| 8 | */ |
| 9 | |
| 10 | #include <crypto/algapi.h> |
| 11 | #include <crypto/internal/chacha.h> |
| 12 | #include <crypto/internal/simd.h> |
| 13 | #include <crypto/internal/skcipher.h> |
| 14 | #include <linux/jump_label.h> |
| 15 | #include <linux/kernel.h> |
| 16 | #include <linux/module.h> |
| 17 | |
| 18 | #include <asm/cputype.h> |
| 19 | #include <asm/hwcap.h> |
| 20 | #include <asm/neon.h> |
| 21 | #include <asm/simd.h> |
| 22 | |
| 23 | asmlinkage void chacha_block_xor_neon(const u32 *state, u8 *dst, const u8 *src, |
| 24 | int nrounds); |
| 25 | asmlinkage void chacha_4block_xor_neon(const u32 *state, u8 *dst, const u8 *src, |
| 26 | int nrounds); |
| 27 | asmlinkage void hchacha_block_arm(const u32 *state, u32 *out, int nrounds); |
| 28 | asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds); |
| 29 | |
| 30 | asmlinkage void chacha_doarm(u8 *dst, const u8 *src, unsigned int bytes, |
| 31 | const u32 *state, int nrounds); |
| 32 | |
| 33 | static __ro_after_init DEFINE_STATIC_KEY_FALSE(use_neon); |
| 34 | |
| 35 | static inline bool neon_usable(void) |
| 36 | { |
| 37 | return static_branch_likely(&use_neon) && crypto_simd_usable(); |
| 38 | } |
| 39 | |
| 40 | static void chacha_doneon(u32 *state, u8 *dst, const u8 *src, |
| 41 | unsigned int bytes, int nrounds) |
| 42 | { |
| 43 | u8 buf[CHACHA_BLOCK_SIZE]; |
| 44 | |
| 45 | while (bytes >= CHACHA_BLOCK_SIZE * 4) { |
| 46 | chacha_4block_xor_neon(state, dst, src, nrounds); |
| 47 | bytes -= CHACHA_BLOCK_SIZE * 4; |
| 48 | src += CHACHA_BLOCK_SIZE * 4; |
| 49 | dst += CHACHA_BLOCK_SIZE * 4; |
| 50 | state[12] += 4; |
| 51 | } |
| 52 | while (bytes >= CHACHA_BLOCK_SIZE) { |
| 53 | chacha_block_xor_neon(state, dst, src, nrounds); |
| 54 | bytes -= CHACHA_BLOCK_SIZE; |
| 55 | src += CHACHA_BLOCK_SIZE; |
| 56 | dst += CHACHA_BLOCK_SIZE; |
| 57 | state[12]++; |
| 58 | } |
| 59 | if (bytes) { |
| 60 | memcpy(buf, src, bytes); |
| 61 | chacha_block_xor_neon(state, buf, buf, nrounds); |
| 62 | memcpy(dst, buf, bytes); |
| 63 | } |
| 64 | } |
| 65 | |
| 66 | void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds) |
| 67 | { |
| 68 | if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable()) { |
| 69 | hchacha_block_arm(state, stream, nrounds); |
| 70 | } else { |
| 71 | kernel_neon_begin(); |
| 72 | hchacha_block_neon(state, stream, nrounds); |
| 73 | kernel_neon_end(); |
| 74 | } |
| 75 | } |
| 76 | EXPORT_SYMBOL(hchacha_block_arch); |
| 77 | |
| 78 | void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv) |
| 79 | { |
| 80 | chacha_init_generic(state, key, iv); |
| 81 | } |
| 82 | EXPORT_SYMBOL(chacha_init_arch); |
| 83 | |
| 84 | void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes, |
| 85 | int nrounds) |
| 86 | { |
| 87 | if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable() || |
| 88 | bytes <= CHACHA_BLOCK_SIZE) { |
| 89 | chacha_doarm(dst, src, bytes, state, nrounds); |
| 90 | state[12] += DIV_ROUND_UP(bytes, CHACHA_BLOCK_SIZE); |
| 91 | return; |
| 92 | } |
| 93 | |
| 94 | do { |
| 95 | unsigned int todo = min_t(unsigned int, bytes, SZ_4K); |
| 96 | |
| 97 | kernel_neon_begin(); |
| 98 | chacha_doneon(state, dst, src, todo, nrounds); |
| 99 | kernel_neon_end(); |
| 100 | |
| 101 | bytes -= todo; |
| 102 | src += todo; |
| 103 | dst += todo; |
| 104 | } while (bytes); |
| 105 | } |
| 106 | EXPORT_SYMBOL(chacha_crypt_arch); |
| 107 | |
| 108 | static int chacha_stream_xor(struct skcipher_request *req, |
| 109 | const struct chacha_ctx *ctx, const u8 *iv, |
| 110 | bool neon) |
| 111 | { |
| 112 | struct skcipher_walk walk; |
| 113 | u32 state[16]; |
| 114 | int err; |
| 115 | |
| 116 | err = skcipher_walk_virt(&walk, req, false); |
| 117 | |
| 118 | chacha_init_generic(state, ctx->key, iv); |
| 119 | |
| 120 | while (walk.nbytes > 0) { |
| 121 | unsigned int nbytes = walk.nbytes; |
| 122 | |
| 123 | if (nbytes < walk.total) |
| 124 | nbytes = round_down(nbytes, walk.stride); |
| 125 | |
| 126 | if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon) { |
| 127 | chacha_doarm(walk.dst.virt.addr, walk.src.virt.addr, |
| 128 | nbytes, state, ctx->nrounds); |
| 129 | state[12] += DIV_ROUND_UP(nbytes, CHACHA_BLOCK_SIZE); |
| 130 | } else { |
| 131 | kernel_neon_begin(); |
| 132 | chacha_doneon(state, walk.dst.virt.addr, |
| 133 | walk.src.virt.addr, nbytes, ctx->nrounds); |
| 134 | kernel_neon_end(); |
| 135 | } |
| 136 | err = skcipher_walk_done(&walk, walk.nbytes - nbytes); |
| 137 | } |
| 138 | |
| 139 | return err; |
| 140 | } |
| 141 | |
| 142 | static int do_chacha(struct skcipher_request *req, bool neon) |
| 143 | { |
| 144 | struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); |
| 145 | struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); |
| 146 | |
| 147 | return chacha_stream_xor(req, ctx, req->iv, neon); |
| 148 | } |
| 149 | |
| 150 | static int chacha_arm(struct skcipher_request *req) |
| 151 | { |
| 152 | return do_chacha(req, false); |
| 153 | } |
| 154 | |
| 155 | static int chacha_neon(struct skcipher_request *req) |
| 156 | { |
| 157 | return do_chacha(req, neon_usable()); |
| 158 | } |
| 159 | |
| 160 | static int do_xchacha(struct skcipher_request *req, bool neon) |
| 161 | { |
| 162 | struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); |
| 163 | struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); |
| 164 | struct chacha_ctx subctx; |
| 165 | u32 state[16]; |
| 166 | u8 real_iv[16]; |
| 167 | |
| 168 | chacha_init_generic(state, ctx->key, req->iv); |
| 169 | |
| 170 | if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon) { |
| 171 | hchacha_block_arm(state, subctx.key, ctx->nrounds); |
| 172 | } else { |
| 173 | kernel_neon_begin(); |
| 174 | hchacha_block_neon(state, subctx.key, ctx->nrounds); |
| 175 | kernel_neon_end(); |
| 176 | } |
| 177 | subctx.nrounds = ctx->nrounds; |
| 178 | |
| 179 | memcpy(&real_iv[0], req->iv + 24, 8); |
| 180 | memcpy(&real_iv[8], req->iv + 16, 8); |
| 181 | return chacha_stream_xor(req, &subctx, real_iv, neon); |
| 182 | } |
| 183 | |
| 184 | static int xchacha_arm(struct skcipher_request *req) |
| 185 | { |
| 186 | return do_xchacha(req, false); |
| 187 | } |
| 188 | |
| 189 | static int xchacha_neon(struct skcipher_request *req) |
| 190 | { |
| 191 | return do_xchacha(req, neon_usable()); |
| 192 | } |
| 193 | |
| 194 | static struct skcipher_alg arm_algs[] = { |
| 195 | { |
| 196 | .base.cra_name = "chacha20", |
| 197 | .base.cra_driver_name = "chacha20-arm", |
| 198 | .base.cra_priority = 200, |
| 199 | .base.cra_blocksize = 1, |
| 200 | .base.cra_ctxsize = sizeof(struct chacha_ctx), |
| 201 | .base.cra_module = THIS_MODULE, |
| 202 | |
| 203 | .min_keysize = CHACHA_KEY_SIZE, |
| 204 | .max_keysize = CHACHA_KEY_SIZE, |
| 205 | .ivsize = CHACHA_IV_SIZE, |
| 206 | .chunksize = CHACHA_BLOCK_SIZE, |
| 207 | .setkey = chacha20_setkey, |
| 208 | .encrypt = chacha_arm, |
| 209 | .decrypt = chacha_arm, |
| 210 | }, { |
| 211 | .base.cra_name = "xchacha20", |
| 212 | .base.cra_driver_name = "xchacha20-arm", |
| 213 | .base.cra_priority = 200, |
| 214 | .base.cra_blocksize = 1, |
| 215 | .base.cra_ctxsize = sizeof(struct chacha_ctx), |
| 216 | .base.cra_module = THIS_MODULE, |
| 217 | |
| 218 | .min_keysize = CHACHA_KEY_SIZE, |
| 219 | .max_keysize = CHACHA_KEY_SIZE, |
| 220 | .ivsize = XCHACHA_IV_SIZE, |
| 221 | .chunksize = CHACHA_BLOCK_SIZE, |
| 222 | .setkey = chacha20_setkey, |
| 223 | .encrypt = xchacha_arm, |
| 224 | .decrypt = xchacha_arm, |
| 225 | }, { |
| 226 | .base.cra_name = "xchacha12", |
| 227 | .base.cra_driver_name = "xchacha12-arm", |
| 228 | .base.cra_priority = 200, |
| 229 | .base.cra_blocksize = 1, |
| 230 | .base.cra_ctxsize = sizeof(struct chacha_ctx), |
| 231 | .base.cra_module = THIS_MODULE, |
| 232 | |
| 233 | .min_keysize = CHACHA_KEY_SIZE, |
| 234 | .max_keysize = CHACHA_KEY_SIZE, |
| 235 | .ivsize = XCHACHA_IV_SIZE, |
| 236 | .chunksize = CHACHA_BLOCK_SIZE, |
| 237 | .setkey = chacha12_setkey, |
| 238 | .encrypt = xchacha_arm, |
| 239 | .decrypt = xchacha_arm, |
| 240 | }, |
| 241 | }; |
| 242 | |
| 243 | static struct skcipher_alg neon_algs[] = { |
| 244 | { |
| 245 | .base.cra_name = "chacha20", |
| 246 | .base.cra_driver_name = "chacha20-neon", |
| 247 | .base.cra_priority = 300, |
| 248 | .base.cra_blocksize = 1, |
| 249 | .base.cra_ctxsize = sizeof(struct chacha_ctx), |
| 250 | .base.cra_module = THIS_MODULE, |
| 251 | |
| 252 | .min_keysize = CHACHA_KEY_SIZE, |
| 253 | .max_keysize = CHACHA_KEY_SIZE, |
| 254 | .ivsize = CHACHA_IV_SIZE, |
| 255 | .chunksize = CHACHA_BLOCK_SIZE, |
| 256 | .walksize = 4 * CHACHA_BLOCK_SIZE, |
| 257 | .setkey = chacha20_setkey, |
| 258 | .encrypt = chacha_neon, |
| 259 | .decrypt = chacha_neon, |
| 260 | }, { |
| 261 | .base.cra_name = "xchacha20", |
| 262 | .base.cra_driver_name = "xchacha20-neon", |
| 263 | .base.cra_priority = 300, |
| 264 | .base.cra_blocksize = 1, |
| 265 | .base.cra_ctxsize = sizeof(struct chacha_ctx), |
| 266 | .base.cra_module = THIS_MODULE, |
| 267 | |
| 268 | .min_keysize = CHACHA_KEY_SIZE, |
| 269 | .max_keysize = CHACHA_KEY_SIZE, |
| 270 | .ivsize = XCHACHA_IV_SIZE, |
| 271 | .chunksize = CHACHA_BLOCK_SIZE, |
| 272 | .walksize = 4 * CHACHA_BLOCK_SIZE, |
| 273 | .setkey = chacha20_setkey, |
| 274 | .encrypt = xchacha_neon, |
| 275 | .decrypt = xchacha_neon, |
| 276 | }, { |
| 277 | .base.cra_name = "xchacha12", |
| 278 | .base.cra_driver_name = "xchacha12-neon", |
| 279 | .base.cra_priority = 300, |
| 280 | .base.cra_blocksize = 1, |
| 281 | .base.cra_ctxsize = sizeof(struct chacha_ctx), |
| 282 | .base.cra_module = THIS_MODULE, |
| 283 | |
| 284 | .min_keysize = CHACHA_KEY_SIZE, |
| 285 | .max_keysize = CHACHA_KEY_SIZE, |
| 286 | .ivsize = XCHACHA_IV_SIZE, |
| 287 | .chunksize = CHACHA_BLOCK_SIZE, |
| 288 | .walksize = 4 * CHACHA_BLOCK_SIZE, |
| 289 | .setkey = chacha12_setkey, |
| 290 | .encrypt = xchacha_neon, |
| 291 | .decrypt = xchacha_neon, |
| 292 | } |
| 293 | }; |
| 294 | |
| 295 | static int __init chacha_simd_mod_init(void) |
| 296 | { |
| 297 | int err = 0; |
| 298 | |
| 299 | if (IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER)) { |
| 300 | err = crypto_register_skciphers(arm_algs, ARRAY_SIZE(arm_algs)); |
| 301 | if (err) |
| 302 | return err; |
| 303 | } |
| 304 | |
| 305 | if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON)) { |
| 306 | int i; |
| 307 | |
| 308 | switch (read_cpuid_part()) { |
| 309 | case ARM_CPU_PART_CORTEX_A7: |
| 310 | case ARM_CPU_PART_CORTEX_A5: |
| 311 | /* |
| 312 | * The Cortex-A7 and Cortex-A5 do not perform well with |
| 313 | * the NEON implementation but do incredibly with the |
| 314 | * scalar one and use less power. |
| 315 | */ |
| 316 | for (i = 0; i < ARRAY_SIZE(neon_algs); i++) |
| 317 | neon_algs[i].base.cra_priority = 0; |
| 318 | break; |
| 319 | default: |
| 320 | static_branch_enable(&use_neon); |
| 321 | } |
| 322 | |
| 323 | if (IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER)) { |
| 324 | err = crypto_register_skciphers(neon_algs, ARRAY_SIZE(neon_algs)); |
| 325 | if (err) |
| 326 | crypto_unregister_skciphers(arm_algs, ARRAY_SIZE(arm_algs)); |
| 327 | } |
| 328 | } |
| 329 | return err; |
| 330 | } |
| 331 | |
| 332 | static void __exit chacha_simd_mod_fini(void) |
| 333 | { |
| 334 | if (IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER)) { |
| 335 | crypto_unregister_skciphers(arm_algs, ARRAY_SIZE(arm_algs)); |
| 336 | if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON)) |
| 337 | crypto_unregister_skciphers(neon_algs, ARRAY_SIZE(neon_algs)); |
| 338 | } |
| 339 | } |
| 340 | |
| 341 | module_init(chacha_simd_mod_init); |
| 342 | module_exit(chacha_simd_mod_fini); |
| 343 | |
| 344 | MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (scalar and NEON accelerated)"); |
| 345 | MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); |
| 346 | MODULE_LICENSE("GPL v2"); |
| 347 | MODULE_ALIAS_CRYPTO("chacha20"); |
| 348 | MODULE_ALIAS_CRYPTO("chacha20-arm"); |
| 349 | MODULE_ALIAS_CRYPTO("xchacha20"); |
| 350 | MODULE_ALIAS_CRYPTO("xchacha20-arm"); |
| 351 | MODULE_ALIAS_CRYPTO("xchacha12"); |
| 352 | MODULE_ALIAS_CRYPTO("xchacha12-arm"); |
| 353 | #ifdef CONFIG_KERNEL_MODE_NEON |
| 354 | MODULE_ALIAS_CRYPTO("chacha20-neon"); |
| 355 | MODULE_ALIAS_CRYPTO("xchacha20-neon"); |
| 356 | MODULE_ALIAS_CRYPTO("xchacha12-neon"); |
| 357 | #endif |