Merge pull request #3840 from AndrzejKurek/baremetal-aes-shuffling-2

CCM countermeasures - shuffling and masking
diff --git a/library/ccm.c b/library/ccm.c
index e54a995..87fe16d 100644
--- a/library/ccm.c
+++ b/library/ccm.c
@@ -114,6 +114,41 @@
     mbedtls_platform_zeroize( ctx, sizeof( mbedtls_ccm_context ) );
 }
 
+/* Durstenfeld's version of Fisher-Yates shuffle */
+static void mbedtls_generate_permutation( unsigned char* table, size_t  size )
+{
+    size_t i, j;
+
+    for( i = 0; i < size; i++ )
+    {
+        table[i] = (unsigned char) i;
+    }
+
+    if( size < 2 )
+    {
+        return;
+    }
+
+    for( i = size - 1; i > 0; i-- )
+    {
+        unsigned char tmp;
+        j = mbedtls_platform_random_in_range( (uint32_t) i + 1 );
+        tmp = table[i];
+        table[i] = table[j];
+        table[j] = tmp;
+    }
+}
+
+static void mbedtls_generate_masks( unsigned char* table, size_t  size )
+{
+    size_t i;
+
+    for( i = 0; i < size; i++ )
+    {
+        table[i] = mbedtls_platform_random_in_range( 256 );
+    }
+}
+
 /*
  * Macros for common operations.
  * Results in smaller compiled code than static inline functions.
@@ -122,30 +157,55 @@
 /*
  * Update the CBC-MAC state in y using a block in b
  * (Always using b as the source helps the compiler optimise a bit better.)
+ * Initial b masking happens outside of this macro due to various sources of it.
  */
 #define UPDATE_CBC_MAC                                                      \
     for( i = 0; i < 16; i++ )                                               \
-        y[i] ^= b[i];                                                       \
+    {                                                                       \
+        y[perm_table[i]] ^= b[perm_table[i]];                               \
+        y[perm_table[i]] ^= mask_table[perm_table[i]];                      \
+    }                                                                       \
                                                                             \
     if( ( ret = mbedtls_cipher_update( &ctx->cipher_ctx, y, 16, y, &olen ) ) != 0 ) \
         return( ret );
 
 /*
+ * Copy src to dst starting at a random offset, while masking the whole dst buffer.
+ */
+#define COPY_MASK( dst, src, mask, len_src, len_dst )                   \
+    do                                                                  \
+    {                                                                   \
+        unsigned j, offset = mbedtls_platform_random_in_range( 256 );   \
+        for( i = 0; i < len_src; i++ )                                  \
+        {                                                               \
+            j = (i + offset) % len_src;                                 \
+            (dst)[j] = (src)[j] ^ (mask)[j];                            \
+        }                                                               \
+        for( ; i < len_dst; i++ )                                       \
+            (dst)[i] ^= (mask)[i];                                      \
+    } while( 0 )
+/*
  * Encrypt or decrypt a partial block with CTR
  * Warning: using b for temporary storage! src and dst must not be b!
  * This avoids allocating one more 16 bytes buffer while allowing src == dst.
  */
-#define CTR_CRYPT( dst, src, len  )                                            \
+#define CTR_CRYPT( dst, src, len )                                      \
     do                                                                  \
     {                                                                   \
+        mbedtls_generate_permutation( perm_table, len );                \
+        mbedtls_generate_masks( mask_table, len );                      \
         if( ( ret = mbedtls_cipher_update( &ctx->cipher_ctx, ctr,       \
                                            16, b, &olen ) ) != 0 )      \
         {                                                               \
             return( ret );                                              \
         }                                                               \
                                                                         \
-        for( i = 0; i < (len); i++ )                                    \
-            (dst)[i] = (src)[i] ^ b[i];                                 \
+        for( i = 0; i < len; i++ )                                      \
+        {                                                               \
+            (dst)[perm_table[i]] = (src)[perm_table[i]] ^ mask_table[perm_table[i]];\
+            (dst)[perm_table[i]] ^= b[perm_table[i]];                   \
+            (dst)[perm_table[i]] ^= mask_table[perm_table[i]];          \
+        }                                                               \
     } while( 0 )
 
 /*
@@ -164,6 +224,8 @@
     unsigned char b[16];
     unsigned char y[16];
     unsigned char ctr[16];
+    unsigned char perm_table[16];
+    unsigned char mask_table[16];
     const unsigned char *src;
     unsigned char *dst;
 
@@ -198,15 +260,16 @@
      * 5 .. 3   (t - 2) / 2
      * 2 .. 0   q - 1
      */
-    b[0] = 0;
-    b[0] |= ( add_len > 0 ) << 6;
-    b[0] |= ( ( tag_len - 2 ) / 2 ) << 3;
-    b[0] |= q - 1;
+    mbedtls_generate_masks( mask_table, 16 );
+    mbedtls_generate_permutation( perm_table, 16 );
+    b[0] = (unsigned char) ( ( ( add_len > 0 ) << 6 ) |
+                           ( ( ( tag_len - 2 ) / 2 ) << 3 ) |
+                           ( q - 1 ) ) ^ mask_table[0];
 
-    mbedtls_platform_memcpy( b + 1, iv, iv_len );
-
+    for( i = 0; i < iv_len; i++ )
+        b[i+1] = iv[i] ^ mask_table[i+1];
     for( i = 0, len_left = length; i < q; i++, len_left >>= 8 )
-        b[15-i] = (unsigned char)( len_left & 0xFF );
+        b[15-i] = (unsigned char)( ( len_left & 0xFF ) ) ^ mask_table[15-i];
 
     if( len_left > 0 )
         return( MBEDTLS_ERR_CCM_BAD_INPUT );
@@ -226,12 +289,16 @@
         len_left = add_len;
         src = add;
 
+        mbedtls_generate_masks( mask_table, 16 );
+        mbedtls_generate_permutation( perm_table, 16 );
         mbedtls_platform_memset( b, 0, 16 );
-        b[0] = (unsigned char)( ( add_len >> 8 ) & 0xFF );
-        b[1] = (unsigned char)( ( add_len      ) & 0xFF );
+        b[0] = (unsigned char)( ( ( add_len >> 8 ) & 0xFF ) ^ mask_table[0] );
+        b[1] = (unsigned char)( ( ( add_len      ) & 0xFF ) ^ mask_table[1] );
 
         use_len = len_left < 16 - 2 ? len_left : 16 - 2;
-        mbedtls_platform_memcpy( b + 2, src, use_len );
+
+        COPY_MASK( b+2, src, mask_table+2, use_len, 14 );
+
         len_left -= use_len;
         src += use_len;
 
@@ -239,10 +306,12 @@
 
         while( len_left > 0 )
         {
+            mbedtls_generate_masks( mask_table, 16 );
+            mbedtls_generate_permutation( perm_table, 16 );
             use_len = len_left > 16 ? 16 : len_left;
 
             mbedtls_platform_memset( b, 0, 16 );
-            mbedtls_platform_memcpy( b, src, use_len );
+            COPY_MASK( b, src, mask_table, use_len, 16);
             UPDATE_CBC_MAC;
 
             len_left -= use_len;
@@ -281,8 +350,10 @@
 
         if( mode == CCM_ENCRYPT )
         {
+            mbedtls_generate_masks( mask_table, 16 );
+            mbedtls_generate_permutation( perm_table, 16 );
             mbedtls_platform_memset( b, 0, 16 );
-            mbedtls_platform_memcpy( b, src, use_len );
+            COPY_MASK( b, src, mask_table, use_len, 16 );
             UPDATE_CBC_MAC;
         }
 
@@ -290,8 +361,10 @@
 
         if( mode == CCM_DECRYPT )
         {
+            mbedtls_generate_masks( mask_table, 16 );
+            mbedtls_generate_permutation( perm_table, 16 );
             mbedtls_platform_memset( b, 0, 16 );
-            mbedtls_platform_memcpy( b, dst, use_len );
+            COPY_MASK( b, dst, mask_table, use_len, 16 );
             UPDATE_CBC_MAC;
         }