Squashed commit upgrading to mbedtls-2.28.1

Squash merging branch import/mbedtls-2.28.1

 ebf1f6a58089 ("libmbedtls: compile new files added with 2.28.1")
 3ffb51b58a54 ("libmbedtls: add SM2 curve")
 c425755720b4 ("libmbedtls: mbedtls_mpi_exp_mod(): optimize mempool usage")
 23493c822a82 ("libmbedtls: mbedtls_mpi_exp_mod(): reduce stack usage")
 dcdca2348dff ("libmbedtls: mbedtls_mpi_exp_mod() initialize W")
 dc2994976958 ("libmbedtls: fix no CRT issue")
 c6628873b281 ("libmbedtls: add interfaces in mbedtls for context memory operation")
 8acd202d3e55 ("libmedtls: mpi_miller_rabin: increase count limit")
 37284e28d5d9 ("libmbedtls: add mbedtls_mpi_init_mempool()")
 b499a75f29f3 ("libmbedtls: make mbedtls_mpi_mont*() available")
 2080a8c96a5d ("mbedtls: configure mbedtls to reach for config")
 e0858334327a ("mbedtls: remove default include/mbedtls/config.h")
 dd9688e6b8ce ("Import mbedtls-2.28.1")

Signed-off-by: Jerome Forissier <jerome.forissier@linaro.org>
Acked-by: Jens Wiklander <jens.wiklander@linaro.org>
diff --git a/lib/libmbedtls/mbedtls/library/poly1305.c b/lib/libmbedtls/mbedtls/library/poly1305.c
index 492d145..7375a0c 100644
--- a/lib/libmbedtls/mbedtls/library/poly1305.c
+++ b/lib/libmbedtls/mbedtls/library/poly1305.c
@@ -52,13 +52,6 @@
 
 #define POLY1305_BLOCK_SIZE_BYTES ( 16U )
 
-#define BYTES_TO_U32_LE( data, offset )                           \
-    ( (uint32_t) (data)[offset]                                     \
-          | (uint32_t) ( (uint32_t) (data)[( offset ) + 1] << 8 )   \
-          | (uint32_t) ( (uint32_t) (data)[( offset ) + 2] << 16 )  \
-          | (uint32_t) ( (uint32_t) (data)[( offset ) + 3] << 24 )  \
-    )
-
 /*
  * Our implementation is tuned for 32-bit platforms with a 64-bit multiplier.
  * However we provided an alternative for platforms without such a multiplier.
@@ -129,10 +122,10 @@
     for( i = 0U; i < nblocks; i++ )
     {
         /* The input block is treated as a 128-bit little-endian integer */
-        d0   = BYTES_TO_U32_LE( input, offset + 0  );
-        d1   = BYTES_TO_U32_LE( input, offset + 4  );
-        d2   = BYTES_TO_U32_LE( input, offset + 8  );
-        d3   = BYTES_TO_U32_LE( input, offset + 12 );
+        d0   = MBEDTLS_GET_UINT32_LE( input, offset + 0  );
+        d1   = MBEDTLS_GET_UINT32_LE( input, offset + 4  );
+        d2   = MBEDTLS_GET_UINT32_LE( input, offset + 8  );
+        d3   = MBEDTLS_GET_UINT32_LE( input, offset + 12 );
 
         /* Compute: acc += (padded) block as a 130-bit integer */
         d0  += (uint64_t) acc0;
@@ -257,22 +250,10 @@
     acc3 += ctx->s[3] + (uint32_t) ( d >> 32U );
 
     /* Compute MAC (128 least significant bits of the accumulator) */
-    mac[ 0] = (unsigned char)( acc0       );
-    mac[ 1] = (unsigned char)( acc0 >>  8 );
-    mac[ 2] = (unsigned char)( acc0 >> 16 );
-    mac[ 3] = (unsigned char)( acc0 >> 24 );
-    mac[ 4] = (unsigned char)( acc1       );
-    mac[ 5] = (unsigned char)( acc1 >>  8 );
-    mac[ 6] = (unsigned char)( acc1 >> 16 );
-    mac[ 7] = (unsigned char)( acc1 >> 24 );
-    mac[ 8] = (unsigned char)( acc2       );
-    mac[ 9] = (unsigned char)( acc2 >>  8 );
-    mac[10] = (unsigned char)( acc2 >> 16 );
-    mac[11] = (unsigned char)( acc2 >> 24 );
-    mac[12] = (unsigned char)( acc3       );
-    mac[13] = (unsigned char)( acc3 >>  8 );
-    mac[14] = (unsigned char)( acc3 >> 16 );
-    mac[15] = (unsigned char)( acc3 >> 24 );
+    MBEDTLS_PUT_UINT32_LE( acc0, mac,  0 );
+    MBEDTLS_PUT_UINT32_LE( acc1, mac,  4 );
+    MBEDTLS_PUT_UINT32_LE( acc2, mac,  8 );
+    MBEDTLS_PUT_UINT32_LE( acc3, mac, 12 );
 }
 
 void mbedtls_poly1305_init( mbedtls_poly1305_context *ctx )
@@ -297,15 +278,15 @@
     POLY1305_VALIDATE_RET( key != NULL );
 
     /* r &= 0x0ffffffc0ffffffc0ffffffc0fffffff */
-    ctx->r[0] = BYTES_TO_U32_LE( key, 0 )  & 0x0FFFFFFFU;
-    ctx->r[1] = BYTES_TO_U32_LE( key, 4 )  & 0x0FFFFFFCU;
-    ctx->r[2] = BYTES_TO_U32_LE( key, 8 )  & 0x0FFFFFFCU;
-    ctx->r[3] = BYTES_TO_U32_LE( key, 12 ) & 0x0FFFFFFCU;
+    ctx->r[0] = MBEDTLS_GET_UINT32_LE( key, 0 )  & 0x0FFFFFFFU;
+    ctx->r[1] = MBEDTLS_GET_UINT32_LE( key, 4 )  & 0x0FFFFFFCU;
+    ctx->r[2] = MBEDTLS_GET_UINT32_LE( key, 8 )  & 0x0FFFFFFCU;
+    ctx->r[3] = MBEDTLS_GET_UINT32_LE( key, 12 ) & 0x0FFFFFFCU;
 
-    ctx->s[0] = BYTES_TO_U32_LE( key, 16 );
-    ctx->s[1] = BYTES_TO_U32_LE( key, 20 );
-    ctx->s[2] = BYTES_TO_U32_LE( key, 24 );
-    ctx->s[3] = BYTES_TO_U32_LE( key, 28 );
+    ctx->s[0] = MBEDTLS_GET_UINT32_LE( key, 16 );
+    ctx->s[1] = MBEDTLS_GET_UINT32_LE( key, 20 );
+    ctx->s[2] = MBEDTLS_GET_UINT32_LE( key, 24 );
+    ctx->s[3] = MBEDTLS_GET_UINT32_LE( key, 28 );
 
     /* Initial accumulator state */
     ctx->acc[0] = 0U;