David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 1 | // SPDX-License-Identifier: GPL-2.0-or-later |
| 2 | /* |
| 3 | * x64 SIMD accelerated ChaCha and XChaCha stream ciphers, |
| 4 | * including ChaCha20 (RFC7539) |
| 5 | * |
| 6 | * Copyright (C) 2015 Martin Willi |
| 7 | */ |
| 8 | |
| 9 | #include <crypto/algapi.h> |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 10 | #include <crypto/internal/chacha.h> |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 11 | #include <crypto/internal/simd.h> |
| 12 | #include <crypto/internal/skcipher.h> |
| 13 | #include <linux/kernel.h> |
| 14 | #include <linux/module.h> |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 15 | #include <linux/sizes.h> |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 16 | #include <asm/simd.h> |
| 17 | |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 18 | asmlinkage void chacha_block_xor_ssse3(u32 *state, u8 *dst, const u8 *src, |
| 19 | unsigned int len, int nrounds); |
| 20 | asmlinkage void chacha_4block_xor_ssse3(u32 *state, u8 *dst, const u8 *src, |
| 21 | unsigned int len, int nrounds); |
| 22 | asmlinkage void hchacha_block_ssse3(const u32 *state, u32 *out, int nrounds); |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 23 | |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 24 | asmlinkage void chacha_2block_xor_avx2(u32 *state, u8 *dst, const u8 *src, |
| 25 | unsigned int len, int nrounds); |
| 26 | asmlinkage void chacha_4block_xor_avx2(u32 *state, u8 *dst, const u8 *src, |
| 27 | unsigned int len, int nrounds); |
| 28 | asmlinkage void chacha_8block_xor_avx2(u32 *state, u8 *dst, const u8 *src, |
| 29 | unsigned int len, int nrounds); |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 30 | |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 31 | asmlinkage void chacha_2block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src, |
| 32 | unsigned int len, int nrounds); |
| 33 | asmlinkage void chacha_4block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src, |
| 34 | unsigned int len, int nrounds); |
| 35 | asmlinkage void chacha_8block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src, |
| 36 | unsigned int len, int nrounds); |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 37 | |
| 38 | static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_simd); |
| 39 | static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_avx2); |
| 40 | static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_avx512vl); |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 41 | |
| 42 | static unsigned int chacha_advance(unsigned int len, unsigned int maxblocks) |
| 43 | { |
| 44 | len = min(len, maxblocks * CHACHA_BLOCK_SIZE); |
| 45 | return round_up(len, CHACHA_BLOCK_SIZE) / CHACHA_BLOCK_SIZE; |
| 46 | } |
| 47 | |
| 48 | static void chacha_dosimd(u32 *state, u8 *dst, const u8 *src, |
| 49 | unsigned int bytes, int nrounds) |
| 50 | { |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 51 | if (IS_ENABLED(CONFIG_AS_AVX512) && |
| 52 | static_branch_likely(&chacha_use_avx512vl)) { |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 53 | while (bytes >= CHACHA_BLOCK_SIZE * 8) { |
| 54 | chacha_8block_xor_avx512vl(state, dst, src, bytes, |
| 55 | nrounds); |
| 56 | bytes -= CHACHA_BLOCK_SIZE * 8; |
| 57 | src += CHACHA_BLOCK_SIZE * 8; |
| 58 | dst += CHACHA_BLOCK_SIZE * 8; |
| 59 | state[12] += 8; |
| 60 | } |
| 61 | if (bytes > CHACHA_BLOCK_SIZE * 4) { |
| 62 | chacha_8block_xor_avx512vl(state, dst, src, bytes, |
| 63 | nrounds); |
| 64 | state[12] += chacha_advance(bytes, 8); |
| 65 | return; |
| 66 | } |
| 67 | if (bytes > CHACHA_BLOCK_SIZE * 2) { |
| 68 | chacha_4block_xor_avx512vl(state, dst, src, bytes, |
| 69 | nrounds); |
| 70 | state[12] += chacha_advance(bytes, 4); |
| 71 | return; |
| 72 | } |
| 73 | if (bytes) { |
| 74 | chacha_2block_xor_avx512vl(state, dst, src, bytes, |
| 75 | nrounds); |
| 76 | state[12] += chacha_advance(bytes, 2); |
| 77 | return; |
| 78 | } |
| 79 | } |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 80 | |
| 81 | if (static_branch_likely(&chacha_use_avx2)) { |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 82 | while (bytes >= CHACHA_BLOCK_SIZE * 8) { |
| 83 | chacha_8block_xor_avx2(state, dst, src, bytes, nrounds); |
| 84 | bytes -= CHACHA_BLOCK_SIZE * 8; |
| 85 | src += CHACHA_BLOCK_SIZE * 8; |
| 86 | dst += CHACHA_BLOCK_SIZE * 8; |
| 87 | state[12] += 8; |
| 88 | } |
| 89 | if (bytes > CHACHA_BLOCK_SIZE * 4) { |
| 90 | chacha_8block_xor_avx2(state, dst, src, bytes, nrounds); |
| 91 | state[12] += chacha_advance(bytes, 8); |
| 92 | return; |
| 93 | } |
| 94 | if (bytes > CHACHA_BLOCK_SIZE * 2) { |
| 95 | chacha_4block_xor_avx2(state, dst, src, bytes, nrounds); |
| 96 | state[12] += chacha_advance(bytes, 4); |
| 97 | return; |
| 98 | } |
| 99 | if (bytes > CHACHA_BLOCK_SIZE) { |
| 100 | chacha_2block_xor_avx2(state, dst, src, bytes, nrounds); |
| 101 | state[12] += chacha_advance(bytes, 2); |
| 102 | return; |
| 103 | } |
| 104 | } |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 105 | |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 106 | while (bytes >= CHACHA_BLOCK_SIZE * 4) { |
| 107 | chacha_4block_xor_ssse3(state, dst, src, bytes, nrounds); |
| 108 | bytes -= CHACHA_BLOCK_SIZE * 4; |
| 109 | src += CHACHA_BLOCK_SIZE * 4; |
| 110 | dst += CHACHA_BLOCK_SIZE * 4; |
| 111 | state[12] += 4; |
| 112 | } |
| 113 | if (bytes > CHACHA_BLOCK_SIZE) { |
| 114 | chacha_4block_xor_ssse3(state, dst, src, bytes, nrounds); |
| 115 | state[12] += chacha_advance(bytes, 4); |
| 116 | return; |
| 117 | } |
| 118 | if (bytes) { |
| 119 | chacha_block_xor_ssse3(state, dst, src, bytes, nrounds); |
| 120 | state[12]++; |
| 121 | } |
| 122 | } |
| 123 | |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 124 | void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds) |
| 125 | { |
| 126 | if (!static_branch_likely(&chacha_use_simd) || !crypto_simd_usable()) { |
| 127 | hchacha_block_generic(state, stream, nrounds); |
| 128 | } else { |
| 129 | kernel_fpu_begin(); |
| 130 | hchacha_block_ssse3(state, stream, nrounds); |
| 131 | kernel_fpu_end(); |
| 132 | } |
| 133 | } |
| 134 | EXPORT_SYMBOL(hchacha_block_arch); |
| 135 | |
| 136 | void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv) |
| 137 | { |
| 138 | chacha_init_generic(state, key, iv); |
| 139 | } |
| 140 | EXPORT_SYMBOL(chacha_init_arch); |
| 141 | |
| 142 | void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes, |
| 143 | int nrounds) |
| 144 | { |
| 145 | if (!static_branch_likely(&chacha_use_simd) || !crypto_simd_usable() || |
| 146 | bytes <= CHACHA_BLOCK_SIZE) |
| 147 | return chacha_crypt_generic(state, dst, src, bytes, nrounds); |
| 148 | |
| 149 | do { |
| 150 | unsigned int todo = min_t(unsigned int, bytes, SZ_4K); |
| 151 | |
| 152 | kernel_fpu_begin(); |
| 153 | chacha_dosimd(state, dst, src, todo, nrounds); |
| 154 | kernel_fpu_end(); |
| 155 | |
| 156 | bytes -= todo; |
| 157 | src += todo; |
| 158 | dst += todo; |
| 159 | } while (bytes); |
| 160 | } |
| 161 | EXPORT_SYMBOL(chacha_crypt_arch); |
| 162 | |
| 163 | static int chacha_simd_stream_xor(struct skcipher_request *req, |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 164 | const struct chacha_ctx *ctx, const u8 *iv) |
| 165 | { |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 166 | u32 state[CHACHA_STATE_WORDS] __aligned(8); |
| 167 | struct skcipher_walk walk; |
| 168 | int err; |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 169 | |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 170 | err = skcipher_walk_virt(&walk, req, false); |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 171 | |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 172 | chacha_init_generic(state, ctx->key, iv); |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 173 | |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 174 | while (walk.nbytes > 0) { |
| 175 | unsigned int nbytes = walk.nbytes; |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 176 | |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 177 | if (nbytes < walk.total) |
| 178 | nbytes = round_down(nbytes, walk.stride); |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 179 | |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 180 | if (!static_branch_likely(&chacha_use_simd) || |
| 181 | !crypto_simd_usable()) { |
| 182 | chacha_crypt_generic(state, walk.dst.virt.addr, |
| 183 | walk.src.virt.addr, nbytes, |
| 184 | ctx->nrounds); |
| 185 | } else { |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 186 | kernel_fpu_begin(); |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 187 | chacha_dosimd(state, walk.dst.virt.addr, |
| 188 | walk.src.virt.addr, nbytes, |
| 189 | ctx->nrounds); |
| 190 | kernel_fpu_end(); |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 191 | } |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 192 | err = skcipher_walk_done(&walk, walk.nbytes - nbytes); |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 193 | } |
| 194 | |
| 195 | return err; |
| 196 | } |
| 197 | |
| 198 | static int chacha_simd(struct skcipher_request *req) |
| 199 | { |
| 200 | struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); |
| 201 | struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 202 | |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 203 | return chacha_simd_stream_xor(req, ctx, req->iv); |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 204 | } |
| 205 | |
| 206 | static int xchacha_simd(struct skcipher_request *req) |
| 207 | { |
| 208 | struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); |
| 209 | struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 210 | u32 state[CHACHA_STATE_WORDS] __aligned(8); |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 211 | struct chacha_ctx subctx; |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 212 | u8 real_iv[16]; |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 213 | |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 214 | chacha_init_generic(state, ctx->key, req->iv); |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 215 | |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 216 | if (req->cryptlen > CHACHA_BLOCK_SIZE && crypto_simd_usable()) { |
| 217 | kernel_fpu_begin(); |
| 218 | hchacha_block_ssse3(state, subctx.key, ctx->nrounds); |
| 219 | kernel_fpu_end(); |
| 220 | } else { |
| 221 | hchacha_block_generic(state, subctx.key, ctx->nrounds); |
| 222 | } |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 223 | subctx.nrounds = ctx->nrounds; |
| 224 | |
| 225 | memcpy(&real_iv[0], req->iv + 24, 8); |
| 226 | memcpy(&real_iv[8], req->iv + 16, 8); |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 227 | return chacha_simd_stream_xor(req, &subctx, real_iv); |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 228 | } |
| 229 | |
| 230 | static struct skcipher_alg algs[] = { |
| 231 | { |
| 232 | .base.cra_name = "chacha20", |
| 233 | .base.cra_driver_name = "chacha20-simd", |
| 234 | .base.cra_priority = 300, |
| 235 | .base.cra_blocksize = 1, |
| 236 | .base.cra_ctxsize = sizeof(struct chacha_ctx), |
| 237 | .base.cra_module = THIS_MODULE, |
| 238 | |
| 239 | .min_keysize = CHACHA_KEY_SIZE, |
| 240 | .max_keysize = CHACHA_KEY_SIZE, |
| 241 | .ivsize = CHACHA_IV_SIZE, |
| 242 | .chunksize = CHACHA_BLOCK_SIZE, |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 243 | .setkey = chacha20_setkey, |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 244 | .encrypt = chacha_simd, |
| 245 | .decrypt = chacha_simd, |
| 246 | }, { |
| 247 | .base.cra_name = "xchacha20", |
| 248 | .base.cra_driver_name = "xchacha20-simd", |
| 249 | .base.cra_priority = 300, |
| 250 | .base.cra_blocksize = 1, |
| 251 | .base.cra_ctxsize = sizeof(struct chacha_ctx), |
| 252 | .base.cra_module = THIS_MODULE, |
| 253 | |
| 254 | .min_keysize = CHACHA_KEY_SIZE, |
| 255 | .max_keysize = CHACHA_KEY_SIZE, |
| 256 | .ivsize = XCHACHA_IV_SIZE, |
| 257 | .chunksize = CHACHA_BLOCK_SIZE, |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 258 | .setkey = chacha20_setkey, |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 259 | .encrypt = xchacha_simd, |
| 260 | .decrypt = xchacha_simd, |
| 261 | }, { |
| 262 | .base.cra_name = "xchacha12", |
| 263 | .base.cra_driver_name = "xchacha12-simd", |
| 264 | .base.cra_priority = 300, |
| 265 | .base.cra_blocksize = 1, |
| 266 | .base.cra_ctxsize = sizeof(struct chacha_ctx), |
| 267 | .base.cra_module = THIS_MODULE, |
| 268 | |
| 269 | .min_keysize = CHACHA_KEY_SIZE, |
| 270 | .max_keysize = CHACHA_KEY_SIZE, |
| 271 | .ivsize = XCHACHA_IV_SIZE, |
| 272 | .chunksize = CHACHA_BLOCK_SIZE, |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 273 | .setkey = chacha12_setkey, |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 274 | .encrypt = xchacha_simd, |
| 275 | .decrypt = xchacha_simd, |
| 276 | }, |
| 277 | }; |
| 278 | |
| 279 | static int __init chacha_simd_mod_init(void) |
| 280 | { |
| 281 | if (!boot_cpu_has(X86_FEATURE_SSSE3)) |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 282 | return 0; |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 283 | |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 284 | static_branch_enable(&chacha_use_simd); |
| 285 | |
| 286 | if (boot_cpu_has(X86_FEATURE_AVX) && |
| 287 | boot_cpu_has(X86_FEATURE_AVX2) && |
| 288 | cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) { |
| 289 | static_branch_enable(&chacha_use_avx2); |
| 290 | |
| 291 | if (IS_ENABLED(CONFIG_AS_AVX512) && |
| 292 | boot_cpu_has(X86_FEATURE_AVX512VL) && |
| 293 | boot_cpu_has(X86_FEATURE_AVX512BW)) /* kmovq */ |
| 294 | static_branch_enable(&chacha_use_avx512vl); |
| 295 | } |
| 296 | return IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER) ? |
| 297 | crypto_register_skciphers(algs, ARRAY_SIZE(algs)) : 0; |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 298 | } |
| 299 | |
| 300 | static void __exit chacha_simd_mod_fini(void) |
| 301 | { |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 302 | if (IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER) && boot_cpu_has(X86_FEATURE_SSSE3)) |
| 303 | crypto_unregister_skciphers(algs, ARRAY_SIZE(algs)); |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 304 | } |
| 305 | |
| 306 | module_init(chacha_simd_mod_init); |
| 307 | module_exit(chacha_simd_mod_fini); |
| 308 | |
| 309 | MODULE_LICENSE("GPL"); |
| 310 | MODULE_AUTHOR("Martin Willi <martin@strongswan.org>"); |
| 311 | MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (x64 SIMD accelerated)"); |
| 312 | MODULE_ALIAS_CRYPTO("chacha20"); |
| 313 | MODULE_ALIAS_CRYPTO("chacha20-simd"); |
| 314 | MODULE_ALIAS_CRYPTO("xchacha20"); |
| 315 | MODULE_ALIAS_CRYPTO("xchacha20-simd"); |
| 316 | MODULE_ALIAS_CRYPTO("xchacha12"); |
| 317 | MODULE_ALIAS_CRYPTO("xchacha12-simd"); |