Optimize mpi_bigendian_to_host() for speed and size
Use GCC / Clang builtins for byte swapping.
diff --git a/library/bignum.c b/library/bignum.c
index 402a3d5..c83f06d 100644
--- a/library/bignum.c
+++ b/library/bignum.c
@@ -718,18 +718,59 @@
/* Convert a big-endian byte array aligned to the size of mbedtls_mpi_uint
* into the storage form used by mbedtls_mpi. */
+
+static mbedtls_mpi_uint mpi_uint_bigendian_to_host_c( mbedtls_mpi_uint x )
+{
+ uint8_t i;
+ mbedtls_mpi_uint tmp = 0;
+ /* This works regardless of the endianness. */
+ for( i = 0; i < ciL; i++, x >>= 8 )
+ tmp |= ( x & 0xFF ) << ( ( ciL - 1 - i ) << 3 );
+ return( tmp );
+}
+
+static mbedtls_mpi_uint mpi_uint_bigendian_to_host( mbedtls_mpi_uint x )
+{
+#if defined(__BYTE_ORDER__)
+
+/* Nothing to do on bigendian systems. */
+#if ( __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ )
+ return( x );
+#endif /* __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ */
+
+#if ( __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ )
+
+/* For GCC and Clang, have builtins for byte swapping. */
+#if( defined(__GNUC__) && __GNUC_PREREQ(4,3) )
+#define have_bswap
+#elif defined(__clang__) && \
+ defined(__has_builtin) && \
+ __has_builtin(__builtin_bswap32) && \
+ __has_builtin(__builtin_bswap64)
+#define have_bswap
+#endif
+#if defined(have_bswap)
+ /* The compiler is hopefully able to statically evaluate this! */
+ switch( sizeof(mbedtls_mpi_uint) )
+ {
+ case 4:
+ return( __builtin_bswap32(x) );
+ case 8:
+ return( __builtin_bswap64(x) );
+ }
+#endif
+#endif /* __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ */
+#endif /* __BYTE_ORDER__ */
+
+ /* Fall back to C-based reordering if we don't know the byte order
+ * or we couldn't use a compiler-specific builtin. */
+ return( mpi_uint_bigendian_to_host_c( x ) );
+}
+
static void mpi_bigendian_to_host( mbedtls_mpi_uint * const p, size_t limbs )
{
- size_t i;
-
- unsigned char *cur_byte_left;
- unsigned char *cur_byte_right;
-
mbedtls_mpi_uint *cur_limb_left;
mbedtls_mpi_uint *cur_limb_right;
-
- mbedtls_mpi_uint tmp_left, tmp_right;
-
if( limbs == 0 )
return;
@@ -742,30 +783,17 @@
* than the right index (it's not a problem if limbs is odd and the
* indices coincide in the last iteration).
*/
-
for( cur_limb_left = p, cur_limb_right = p + ( limbs - 1 );
cur_limb_left <= cur_limb_right;
cur_limb_left++, cur_limb_right-- )
{
- cur_byte_left = (unsigned char*) cur_limb_left;
- cur_byte_right = (unsigned char*) cur_limb_right;
-
- tmp_left = 0;
- tmp_right = 0;
-
- for( i = 0; i < ciL; i++ )
- {
- tmp_left |= ( (mbedtls_mpi_uint) *cur_byte_left++ )
- << ( ( ciL - 1 - i ) << 3 );
- tmp_right |= ( (mbedtls_mpi_uint) *cur_byte_right++ )
- << ( ( ciL - 1 - i ) << 3 );
- }
-
- *cur_limb_right = tmp_left;
- *cur_limb_left = tmp_right;
+ mbedtls_mpi_uint tmp;
+ /* Note that if cur_limb_left == cur_limb_right,
+ * this code effectively swaps the bytes only once. */
+ tmp = mpi_uint_bigendian_to_host( *cur_limb_left );
+ *cur_limb_left = mpi_uint_bigendian_to_host( *cur_limb_right );
+ *cur_limb_right = tmp;
}
-
- return;
}
/*