Remove temporary allocation in mbedtls_mpi_sub_abs
In cases where X == B the code now no longer requires an extra call to
mbedtls_calloc if X->n >= A->n. This reduces calls to mbedtls_calloc in
test_suite_ecdsa by 52% and in test_suite_ecdh by 36%.
This does not update mpi_sub_hlp - in mpi_montmul this translates to a
performance decrease for rsa and dhm since in all cases X == A.
Signed-off-by: Eric Adamson <eadamson@fastmail.fm>
diff --git a/library/bignum.c b/library/bignum.c
index d53aefd..63a656f 100644
--- a/library/bignum.c
+++ b/library/bignum.c
@@ -1353,9 +1353,7 @@
*/
int mbedtls_mpi_sub_abs( mbedtls_mpi *X, const mbedtls_mpi *A, const mbedtls_mpi *B )
{
- mbedtls_mpi TB;
int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
- size_t n;
MPI_VALIDATE_RET( X != NULL );
MPI_VALIDATE_RET( A != NULL );
MPI_VALIDATE_RET( B != NULL );
@@ -1363,34 +1361,48 @@
if( mbedtls_mpi_cmp_abs( A, B ) < 0 )
return( MBEDTLS_ERR_MPI_NEGATIVE_VALUE );
- mbedtls_mpi_init( &TB );
-
- if( X == B )
- {
- MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &TB, B ) );
- B = &TB;
- }
-
- if( X != A )
- MBEDTLS_MPI_CHK( mbedtls_mpi_copy( X, A ) );
-
/*
* X should always be positive as a result of unsigned subtractions.
*/
X->s = 1;
-
ret = 0;
- for( n = B->n; n > 0; n-- )
- if( B->p[n - 1] != 0 )
- break;
+ MBEDTLS_MPI_CHK( mbedtls_mpi_grow( X, A->n ) );
- mpi_sub_hlp( n, B->p, X->p );
+ size_t width = A->n > B->n ? B->n : A->n;
+ size_t i;
+ mbedtls_mpi_uint c = 0;
+
+ for( i = 0; i < width; i++ )
+ {
+ mbedtls_mpi_uint a = A->p[i];
+ mbedtls_mpi_uint b = B->p[i];
+ mbedtls_mpi_uint z = a < c;
+
+ X->p[i] = a - c;
+ c = ( X->p[i] < b ) + z;
+ X->p[i] -= b;
+ }
+
+ for( ; i < A->n && c > 0; i++ )
+ {
+ mbedtls_mpi_uint a = A->p[i];
+ mbedtls_mpi_uint z = a < c;
+ X->p[i] = a - c;
+ c = z;
+ }
+
+ for( ; i < A->n; i++ )
+ {
+ X->p[i] = A->p[i];
+ }
+
+ for( ; i < X->n; i++ )
+ {
+ X->p[i] = 0;
+ }
cleanup:
-
- mbedtls_mpi_free( &TB );
-
return( ret );
}