Blame - library/armv8ce_aes.c - mirror/mbed-tls.git

blob: de04d1b64157534ddaa4646eeb3cb0ccb2b3b544 [file] [log] [blame]

Markku-Juhani O. Saarinen	dfb6015	2017-11-20 14:58:41 +0000	[diff] [blame^]	1	/*
				2	* ARMv8 Cryptography Extensions -- Optimized code for AES and GCM
				3	*
				4	* Copyright (C) 2006-2017, ARM Limited, All Rights Reserved
				5	* SPDX-License-Identifier: Apache-2.0
				6	*
				7	* Licensed under the Apache License, Version 2.0 (the "License"); you may
				8	* not use this file except in compliance with the License.
				9	* You may obtain a copy of the License at
				10	*
				11	* http://www.apache.org/licenses/LICENSE-2.0
				12	*
				13	* Unless required by applicable law or agreed to in writing, software
				14	* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
				15	* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				16	* See the License for the specific language governing permissions and
				17	* limitations under the License.
				18	*
				19	* This file is part of mbed TLS (https://tls.mbed.org)
				20	*/
				21
				22	#if !defined(MBEDTLS_CONFIG_FILE)
				23	#include "mbedtls/config.h"
				24	#else
				25	#include MBEDTLS_CONFIG_FILE
				26	#endif
				27
				28	#if defined(MBEDTLS_ARMV8CE_AES_C)
				29
				30	#include <arm_neon.h>
				31	#include "mbedtls/armv8ce_aes.h"
				32
				33	#ifndef asm
				34	#define asm __asm
				35	#endif
				36
				37	/*
				38	* [ARMv8 Crypto Extensions] AES-ECB block en(de)cryption
				39	*/
				40
				41	#if defined(MBEDTLS_AES_C)
				42
				43	int mbedtls_armv8ce_aes_crypt_ecb( mbedtls_aes_context *ctx,
				44	int mode,
				45	const unsigned char input[16],
				46	unsigned char output[16] )
				47	{
				48	unsigned int i;
				49	const uint8_t *rk;
				50	uint8x16_t x, k;
				51
				52	x = vld1q_u8( input ); // input block
				53	rk = (const uint8_t *) ctx->rk; // round keys
				54
				55	if( mode == MBEDTLS_AES_ENCRYPT )
				56	{
				57	for( i = ctx->nr - 1; i ; i-- ) // encryption loop
				58	{
				59	k = vld1q_u8( rk );
				60	rk += 16;
				61	x = vaeseq_u8( x, k );
				62	x = vaesmcq_u8( x );
				63	}
				64	k = vld1q_u8( rk );
				65	rk += 16;
				66	x = vaeseq_u8( x, k );
				67	}
				68	else
				69	{
				70	for( i = ctx->nr - 1; i ; i-- ) // decryption loop
				71	{
				72	k = vld1q_u8( rk );
				73	rk += 16;
				74	x = vaesdq_u8( x, k );
				75	x = vaesimcq_u8( x );
				76	}
				77	k = vld1q_u8( rk );
				78	rk += 16;
				79	x = vaesdq_u8( x, k );
				80	}
				81
				82	k = vld1q_u8( rk ); // final key just XORed
				83	x = veorq_u8( x, k );
				84	vst1q_u8( output, x ); // write out
				85
				86	return ( 0 );
				87	}
				88
				89	#endif /* MBEDTLS_AES_C */
				90
				91
				92	/*
				93	* [ARMv8 Crypto Extensions] Multiply in GF(2^128) for GCM
				94	*/
				95
				96	#if defined(MBEDTLS_GCM_C)
				97
				98	void mbedtls_armv8ce_gcm_mult( unsigned char c[16],
				99	const unsigned char a[16],
				100	const unsigned char b[16] )
				101	{
				102	// GCM's GF(2^128) polynomial basis is x^128 + x^7 + x^2 + x + 1
				103	const uint64x2_t base = { 0, 0x86 }; // note missing LS bit
				104
				105	register uint8x16_t vc asm( "v0" ); // named registers
				106	register uint8x16_t va asm( "v1" ); // (to avoid conflict)
				107	register uint8x16_t vb asm( "v2" );
				108	register uint64x2_t vp asm( "v3" );
				109
				110	va = vld1q_u8( a ); // load inputs
				111	vb = vld1q_u8( b );
				112	vp = base;
				113
				114	asm (
				115	"rbit %1.16b, %1.16b \n\t" // reverse bit order
				116	"rbit %2.16b, %2.16b \n\t"
				117	"pmull2 %0.1q, %1.2d, %2.2d \n\t" // v0 = a.hi * b.hi
				118	"pmull2 v4.1q, %0.2d, %3.2d \n\t" // mul v0 by x^64, reduce
				119	"ext %0.16b, %0.16b, %0.16b, #8 \n\t"
				120	"eor %0.16b, %0.16b, v4.16b \n\t"
				121	"ext v5.16b, %2.16b, %2.16b, #8 \n\t" // (swap hi and lo in b)
				122	"pmull v4.1q, %1.1d, v5.1d \n\t" // v0 ^= a.lo * b.hi
				123	"eor %0.16b, %0.16b, v4.16b \n\t"
				124	"pmull2 v4.1q, %1.2d, v5.2d \n\t" // v0 ^= a.hi * b.lo
				125	"eor %0.16b, %0.16b, v4.16b \n\t"
				126	"pmull2 v4.1q, %0.2d, %3.2d \n\t" // mul v0 by x^64, reduce
				127	"ext %0.16b, %0.16b, %0.16b, #8 \n\t"
				128	"eor %0.16b, %0.16b, v4.16b \n\t"
				129	"pmull v4.1q, %1.1d, %2.1d \n\t" // v0 ^= a.lo * b.lo
				130	"eor %0.16b, %0.16b, v4.16b \n\t"
				131	"rbit %0.16b, %0.16b \n\t" // reverse bits for output
				132	: "=w" (vc) // q0: output
				133	: "w" (va), "w" (vb), "w" (vp) // q1, q2: input
				134	: "v4", "v5" // q4, q5: clobbered
				135	);
				136
				137	vst1q_u8( c, vc ); // write out
				138	}
				139
				140	#endif /* MBEDTLS_GCM_C */
				141
				142	#endif /* MBEDTLS_ARMV8CE_AES_C */
				143