blob: de04d1b64157534ddaa4646eeb3cb0ccb2b3b544 [file] [log] [blame]
Markku-Juhani O. Saarinendfb60152017-11-20 14:58:41 +00001/*
2 * ARMv8 Cryptography Extensions -- Optimized code for AES and GCM
3 *
4 * Copyright (C) 2006-2017, ARM Limited, All Rights Reserved
5 * SPDX-License-Identifier: Apache-2.0
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License"); you may
8 * not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
15 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *
19 * This file is part of mbed TLS (https://tls.mbed.org)
20 */
21
22#if !defined(MBEDTLS_CONFIG_FILE)
23#include "mbedtls/config.h"
24#else
25#include MBEDTLS_CONFIG_FILE
26#endif
27
28#if defined(MBEDTLS_ARMV8CE_AES_C)
29
30#include <arm_neon.h>
31#include "mbedtls/armv8ce_aes.h"
32
33#ifndef asm
34#define asm __asm
35#endif
36
37/*
38 * [ARMv8 Crypto Extensions] AES-ECB block en(de)cryption
39 */
40
41#if defined(MBEDTLS_AES_C)
42
43int mbedtls_armv8ce_aes_crypt_ecb( mbedtls_aes_context *ctx,
44 int mode,
45 const unsigned char input[16],
46 unsigned char output[16] )
47{
48 unsigned int i;
49 const uint8_t *rk;
50 uint8x16_t x, k;
51
52 x = vld1q_u8( input ); // input block
53 rk = (const uint8_t *) ctx->rk; // round keys
54
55 if( mode == MBEDTLS_AES_ENCRYPT )
56 {
57 for( i = ctx->nr - 1; i ; i-- ) // encryption loop
58 {
59 k = vld1q_u8( rk );
60 rk += 16;
61 x = vaeseq_u8( x, k );
62 x = vaesmcq_u8( x );
63 }
64 k = vld1q_u8( rk );
65 rk += 16;
66 x = vaeseq_u8( x, k );
67 }
68 else
69 {
70 for( i = ctx->nr - 1; i ; i-- ) // decryption loop
71 {
72 k = vld1q_u8( rk );
73 rk += 16;
74 x = vaesdq_u8( x, k );
75 x = vaesimcq_u8( x );
76 }
77 k = vld1q_u8( rk );
78 rk += 16;
79 x = vaesdq_u8( x, k );
80 }
81
82 k = vld1q_u8( rk ); // final key just XORed
83 x = veorq_u8( x, k );
84 vst1q_u8( output, x ); // write out
85
86 return ( 0 );
87}
88
89#endif /* MBEDTLS_AES_C */
90
91
92/*
93 * [ARMv8 Crypto Extensions] Multiply in GF(2^128) for GCM
94 */
95
96#if defined(MBEDTLS_GCM_C)
97
98void mbedtls_armv8ce_gcm_mult( unsigned char c[16],
99 const unsigned char a[16],
100 const unsigned char b[16] )
101{
102 // GCM's GF(2^128) polynomial basis is x^128 + x^7 + x^2 + x + 1
103 const uint64x2_t base = { 0, 0x86 }; // note missing LS bit
104
105 register uint8x16_t vc asm( "v0" ); // named registers
106 register uint8x16_t va asm( "v1" ); // (to avoid conflict)
107 register uint8x16_t vb asm( "v2" );
108 register uint64x2_t vp asm( "v3" );
109
110 va = vld1q_u8( a ); // load inputs
111 vb = vld1q_u8( b );
112 vp = base;
113
114 asm (
115 "rbit %1.16b, %1.16b \n\t" // reverse bit order
116 "rbit %2.16b, %2.16b \n\t"
117 "pmull2 %0.1q, %1.2d, %2.2d \n\t" // v0 = a.hi * b.hi
118 "pmull2 v4.1q, %0.2d, %3.2d \n\t" // mul v0 by x^64, reduce
119 "ext %0.16b, %0.16b, %0.16b, #8 \n\t"
120 "eor %0.16b, %0.16b, v4.16b \n\t"
121 "ext v5.16b, %2.16b, %2.16b, #8 \n\t" // (swap hi and lo in b)
122 "pmull v4.1q, %1.1d, v5.1d \n\t" // v0 ^= a.lo * b.hi
123 "eor %0.16b, %0.16b, v4.16b \n\t"
124 "pmull2 v4.1q, %1.2d, v5.2d \n\t" // v0 ^= a.hi * b.lo
125 "eor %0.16b, %0.16b, v4.16b \n\t"
126 "pmull2 v4.1q, %0.2d, %3.2d \n\t" // mul v0 by x^64, reduce
127 "ext %0.16b, %0.16b, %0.16b, #8 \n\t"
128 "eor %0.16b, %0.16b, v4.16b \n\t"
129 "pmull v4.1q, %1.1d, %2.1d \n\t" // v0 ^= a.lo * b.lo
130 "eor %0.16b, %0.16b, v4.16b \n\t"
131 "rbit %0.16b, %0.16b \n\t" // reverse bits for output
132 : "=w" (vc) // q0: output
133 : "w" (va), "w" (vb), "w" (vp) // q1, q2: input
134 : "v4", "v5" // q4, q5: clobbered
135 );
136
137 vst1q_u8( c, vc ); // write out
138}
139
140#endif /* MBEDTLS_GCM_C */
141
142#endif /* MBEDTLS_ARMV8CE_AES_C */
143