Markku-Juhani O. Saarinen | dfb6015 | 2017-11-20 14:58:41 +0000 | [diff] [blame^] | 1 | /* |
| 2 | * ARMv8 Cryptography Extensions -- Optimized code for AES and GCM |
| 3 | * |
| 4 | * Copyright (C) 2006-2017, ARM Limited, All Rights Reserved |
| 5 | * SPDX-License-Identifier: Apache-2.0 |
| 6 | * |
| 7 | * Licensed under the Apache License, Version 2.0 (the "License"); you may |
| 8 | * not use this file except in compliance with the License. |
| 9 | * You may obtain a copy of the License at |
| 10 | * |
| 11 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 12 | * |
| 13 | * Unless required by applicable law or agreed to in writing, software |
| 14 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT |
| 15 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 16 | * See the License for the specific language governing permissions and |
| 17 | * limitations under the License. |
| 18 | * |
| 19 | * This file is part of mbed TLS (https://tls.mbed.org) |
| 20 | */ |
| 21 | |
| 22 | #if !defined(MBEDTLS_CONFIG_FILE) |
| 23 | #include "mbedtls/config.h" |
| 24 | #else |
| 25 | #include MBEDTLS_CONFIG_FILE |
| 26 | #endif |
| 27 | |
| 28 | #if defined(MBEDTLS_ARMV8CE_AES_C) |
| 29 | |
| 30 | #include <arm_neon.h> |
| 31 | #include "mbedtls/armv8ce_aes.h" |
| 32 | |
| 33 | #ifndef asm |
| 34 | #define asm __asm |
| 35 | #endif |
| 36 | |
| 37 | /* |
| 38 | * [ARMv8 Crypto Extensions] AES-ECB block en(de)cryption |
| 39 | */ |
| 40 | |
| 41 | #if defined(MBEDTLS_AES_C) |
| 42 | |
| 43 | int mbedtls_armv8ce_aes_crypt_ecb( mbedtls_aes_context *ctx, |
| 44 | int mode, |
| 45 | const unsigned char input[16], |
| 46 | unsigned char output[16] ) |
| 47 | { |
| 48 | unsigned int i; |
| 49 | const uint8_t *rk; |
| 50 | uint8x16_t x, k; |
| 51 | |
| 52 | x = vld1q_u8( input ); // input block |
| 53 | rk = (const uint8_t *) ctx->rk; // round keys |
| 54 | |
| 55 | if( mode == MBEDTLS_AES_ENCRYPT ) |
| 56 | { |
| 57 | for( i = ctx->nr - 1; i ; i-- ) // encryption loop |
| 58 | { |
| 59 | k = vld1q_u8( rk ); |
| 60 | rk += 16; |
| 61 | x = vaeseq_u8( x, k ); |
| 62 | x = vaesmcq_u8( x ); |
| 63 | } |
| 64 | k = vld1q_u8( rk ); |
| 65 | rk += 16; |
| 66 | x = vaeseq_u8( x, k ); |
| 67 | } |
| 68 | else |
| 69 | { |
| 70 | for( i = ctx->nr - 1; i ; i-- ) // decryption loop |
| 71 | { |
| 72 | k = vld1q_u8( rk ); |
| 73 | rk += 16; |
| 74 | x = vaesdq_u8( x, k ); |
| 75 | x = vaesimcq_u8( x ); |
| 76 | } |
| 77 | k = vld1q_u8( rk ); |
| 78 | rk += 16; |
| 79 | x = vaesdq_u8( x, k ); |
| 80 | } |
| 81 | |
| 82 | k = vld1q_u8( rk ); // final key just XORed |
| 83 | x = veorq_u8( x, k ); |
| 84 | vst1q_u8( output, x ); // write out |
| 85 | |
| 86 | return ( 0 ); |
| 87 | } |
| 88 | |
| 89 | #endif /* MBEDTLS_AES_C */ |
| 90 | |
| 91 | |
| 92 | /* |
| 93 | * [ARMv8 Crypto Extensions] Multiply in GF(2^128) for GCM |
| 94 | */ |
| 95 | |
| 96 | #if defined(MBEDTLS_GCM_C) |
| 97 | |
| 98 | void mbedtls_armv8ce_gcm_mult( unsigned char c[16], |
| 99 | const unsigned char a[16], |
| 100 | const unsigned char b[16] ) |
| 101 | { |
| 102 | // GCM's GF(2^128) polynomial basis is x^128 + x^7 + x^2 + x + 1 |
| 103 | const uint64x2_t base = { 0, 0x86 }; // note missing LS bit |
| 104 | |
| 105 | register uint8x16_t vc asm( "v0" ); // named registers |
| 106 | register uint8x16_t va asm( "v1" ); // (to avoid conflict) |
| 107 | register uint8x16_t vb asm( "v2" ); |
| 108 | register uint64x2_t vp asm( "v3" ); |
| 109 | |
| 110 | va = vld1q_u8( a ); // load inputs |
| 111 | vb = vld1q_u8( b ); |
| 112 | vp = base; |
| 113 | |
| 114 | asm ( |
| 115 | "rbit %1.16b, %1.16b \n\t" // reverse bit order |
| 116 | "rbit %2.16b, %2.16b \n\t" |
| 117 | "pmull2 %0.1q, %1.2d, %2.2d \n\t" // v0 = a.hi * b.hi |
| 118 | "pmull2 v4.1q, %0.2d, %3.2d \n\t" // mul v0 by x^64, reduce |
| 119 | "ext %0.16b, %0.16b, %0.16b, #8 \n\t" |
| 120 | "eor %0.16b, %0.16b, v4.16b \n\t" |
| 121 | "ext v5.16b, %2.16b, %2.16b, #8 \n\t" // (swap hi and lo in b) |
| 122 | "pmull v4.1q, %1.1d, v5.1d \n\t" // v0 ^= a.lo * b.hi |
| 123 | "eor %0.16b, %0.16b, v4.16b \n\t" |
| 124 | "pmull2 v4.1q, %1.2d, v5.2d \n\t" // v0 ^= a.hi * b.lo |
| 125 | "eor %0.16b, %0.16b, v4.16b \n\t" |
| 126 | "pmull2 v4.1q, %0.2d, %3.2d \n\t" // mul v0 by x^64, reduce |
| 127 | "ext %0.16b, %0.16b, %0.16b, #8 \n\t" |
| 128 | "eor %0.16b, %0.16b, v4.16b \n\t" |
| 129 | "pmull v4.1q, %1.1d, %2.1d \n\t" // v0 ^= a.lo * b.lo |
| 130 | "eor %0.16b, %0.16b, v4.16b \n\t" |
| 131 | "rbit %0.16b, %0.16b \n\t" // reverse bits for output |
| 132 | : "=w" (vc) // q0: output |
| 133 | : "w" (va), "w" (vb), "w" (vp) // q1, q2: input |
| 134 | : "v4", "v5" // q4, q5: clobbered |
| 135 | ); |
| 136 | |
| 137 | vst1q_u8( c, vc ); // write out |
| 138 | } |
| 139 | |
| 140 | #endif /* MBEDTLS_GCM_C */ |
| 141 | |
| 142 | #endif /* MBEDTLS_ARMV8CE_AES_C */ |
| 143 | |