Import mbedtls-2.16.0

Imports mbedTLS 2.16.0 from https://github.com/ARMmbed/mbedtls.git
commit fb1972db23da ("Merge pull request #544 from ARMmbed/version-2.16")
(tag mbedtls-2.16.0).

Certain files will never be needed and are thus removed (reducing number
of lines to almost 50%):
rm -f circle.yml CMakeLists.txt DartConfiguration.tcl Makefile
rm -f .gitignore .travis.yml .pylint
rm -f include/.gitignore include/CMakeLists.txt library/.gitignore
rm -f library/CMakeLists.txt library/Makefile
rm -rf .git .github doxygen configs programs scripts tests visualc yotta

This is a complete overwrite of previous code so earlier changes in the
branch import/mbedtls-2.6.1 will be added on top of this commit to bring
the changes forward.

Acked-by: Jerome Forissier <jerome.forissier@linaro.org>
Signed-off-by: Jens Wiklander <jens.wiklander@linaro.org>
diff --git a/lib/libmbedtls/mbedtls/library/poly1305.c b/lib/libmbedtls/mbedtls/library/poly1305.c
new file mode 100644
index 0000000..b274119
--- /dev/null
+++ b/lib/libmbedtls/mbedtls/library/poly1305.c
@@ -0,0 +1,559 @@
+/**
+ * \file poly1305.c
+ *
+ * \brief Poly1305 authentication algorithm.
+ *
+ *  Copyright (C) 2006-2016, ARM Limited, All Rights Reserved
+ *  SPDX-License-Identifier: Apache-2.0
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License"); you may
+ *  not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ *  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ *  This file is part of mbed TLS (https://tls.mbed.org)
+ */
+#if !defined(MBEDTLS_CONFIG_FILE)
+#include "mbedtls/config.h"
+#else
+#include MBEDTLS_CONFIG_FILE
+#endif
+
+#if defined(MBEDTLS_POLY1305_C)
+
+#include "mbedtls/poly1305.h"
+#include "mbedtls/platform_util.h"
+
+#include <string.h>
+
+#if defined(MBEDTLS_SELF_TEST)
+#if defined(MBEDTLS_PLATFORM_C)
+#include "mbedtls/platform.h"
+#else
+#include <stdio.h>
+#define mbedtls_printf printf
+#endif /* MBEDTLS_PLATFORM_C */
+#endif /* MBEDTLS_SELF_TEST */
+
+#if !defined(MBEDTLS_POLY1305_ALT)
+
+#if ( defined(__ARMCC_VERSION) || defined(_MSC_VER) ) && \
+    !defined(inline) && !defined(__cplusplus)
+#define inline __inline
+#endif
+
+/* Parameter validation macros */
+#define POLY1305_VALIDATE_RET( cond )                                       \
+    MBEDTLS_INTERNAL_VALIDATE_RET( cond, MBEDTLS_ERR_POLY1305_BAD_INPUT_DATA )
+#define POLY1305_VALIDATE( cond )                                           \
+    MBEDTLS_INTERNAL_VALIDATE( cond )
+
+#define POLY1305_BLOCK_SIZE_BYTES ( 16U )
+
+#define BYTES_TO_U32_LE( data, offset )                           \
+    ( (uint32_t) data[offset]                                     \
+          | (uint32_t) ( (uint32_t) data[( offset ) + 1] << 8 )   \
+          | (uint32_t) ( (uint32_t) data[( offset ) + 2] << 16 )  \
+          | (uint32_t) ( (uint32_t) data[( offset ) + 3] << 24 )  \
+    )
+
+/*
+ * Our implementation is tuned for 32-bit platforms with a 64-bit multiplier.
+ * However we provided an alternative for platforms without such a multiplier.
+ */
+#if defined(MBEDTLS_NO_64BIT_MULTIPLICATION)
+static uint64_t mul64( uint32_t a, uint32_t b )
+{
+    /* a = al + 2**16 ah, b = bl + 2**16 bh */
+    const uint16_t al = (uint16_t) a;
+    const uint16_t bl = (uint16_t) b;
+    const uint16_t ah = a >> 16;
+    const uint16_t bh = b >> 16;
+
+    /* ab = al*bl + 2**16 (ah*bl + bl*bh) + 2**32 ah*bh */
+    const uint32_t lo = (uint32_t) al * bl;
+    const uint64_t me = (uint64_t)( (uint32_t) ah * bl ) + (uint32_t) al * bh;
+    const uint32_t hi = (uint32_t) ah * bh;
+
+    return( lo + ( me << 16 ) + ( (uint64_t) hi << 32 ) );
+}
+#else
+static inline uint64_t mul64( uint32_t a, uint32_t b )
+{
+    return( (uint64_t) a * b );
+}
+#endif
+
+
+/**
+ * \brief                   Process blocks with Poly1305.
+ *
+ * \param ctx               The Poly1305 context.
+ * \param nblocks           Number of blocks to process. Note that this
+ *                          function only processes full blocks.
+ * \param input             Buffer containing the input block(s).
+ * \param needs_padding     Set to 0 if the padding bit has already been
+ *                          applied to the input data before calling this
+ *                          function.  Otherwise, set this parameter to 1.
+ */
+static void poly1305_process( mbedtls_poly1305_context *ctx,
+                              size_t nblocks,
+                              const unsigned char *input,
+                              uint32_t needs_padding )
+{
+    uint64_t d0, d1, d2, d3;
+    uint32_t acc0, acc1, acc2, acc3, acc4;
+    uint32_t r0, r1, r2, r3;
+    uint32_t rs1, rs2, rs3;
+    size_t offset  = 0U;
+    size_t i;
+
+    r0 = ctx->r[0];
+    r1 = ctx->r[1];
+    r2 = ctx->r[2];
+    r3 = ctx->r[3];
+
+    rs1 = r1 + ( r1 >> 2U );
+    rs2 = r2 + ( r2 >> 2U );
+    rs3 = r3 + ( r3 >> 2U );
+
+    acc0 = ctx->acc[0];
+    acc1 = ctx->acc[1];
+    acc2 = ctx->acc[2];
+    acc3 = ctx->acc[3];
+    acc4 = ctx->acc[4];
+
+    /* Process full blocks */
+    for( i = 0U; i < nblocks; i++ )
+    {
+        /* The input block is treated as a 128-bit little-endian integer */
+        d0   = BYTES_TO_U32_LE( input, offset + 0  );
+        d1   = BYTES_TO_U32_LE( input, offset + 4  );
+        d2   = BYTES_TO_U32_LE( input, offset + 8  );
+        d3   = BYTES_TO_U32_LE( input, offset + 12 );
+
+        /* Compute: acc += (padded) block as a 130-bit integer */
+        d0  += (uint64_t) acc0;
+        d1  += (uint64_t) acc1 + ( d0 >> 32U );
+        d2  += (uint64_t) acc2 + ( d1 >> 32U );
+        d3  += (uint64_t) acc3 + ( d2 >> 32U );
+        acc0 = (uint32_t) d0;
+        acc1 = (uint32_t) d1;
+        acc2 = (uint32_t) d2;
+        acc3 = (uint32_t) d3;
+        acc4 += (uint32_t) ( d3 >> 32U ) + needs_padding;
+
+        /* Compute: acc *= r */
+        d0 = mul64( acc0, r0  ) +
+             mul64( acc1, rs3 ) +
+             mul64( acc2, rs2 ) +
+             mul64( acc3, rs1 );
+        d1 = mul64( acc0, r1  ) +
+             mul64( acc1, r0  ) +
+             mul64( acc2, rs3 ) +
+             mul64( acc3, rs2 ) +
+             mul64( acc4, rs1 );
+        d2 = mul64( acc0, r2  ) +
+             mul64( acc1, r1  ) +
+             mul64( acc2, r0  ) +
+             mul64( acc3, rs3 ) +
+             mul64( acc4, rs2 );
+        d3 = mul64( acc0, r3  ) +
+             mul64( acc1, r2  ) +
+             mul64( acc2, r1  ) +
+             mul64( acc3, r0  ) +
+             mul64( acc4, rs3 );
+        acc4 *= r0;
+
+        /* Compute: acc %= (2^130 - 5) (partial remainder) */
+        d1 += ( d0 >> 32 );
+        d2 += ( d1 >> 32 );
+        d3 += ( d2 >> 32 );
+        acc0 = (uint32_t) d0;
+        acc1 = (uint32_t) d1;
+        acc2 = (uint32_t) d2;
+        acc3 = (uint32_t) d3;
+        acc4 = (uint32_t) ( d3 >> 32 ) + acc4;
+
+        d0 = (uint64_t) acc0 + ( acc4 >> 2 ) + ( acc4 & 0xFFFFFFFCU );
+        acc4 &= 3U;
+        acc0 = (uint32_t) d0;
+        d0 = (uint64_t) acc1 + ( d0 >> 32U );
+        acc1 = (uint32_t) d0;
+        d0 = (uint64_t) acc2 + ( d0 >> 32U );
+        acc2 = (uint32_t) d0;
+        d0 = (uint64_t) acc3 + ( d0 >> 32U );
+        acc3 = (uint32_t) d0;
+        d0 = (uint64_t) acc4 + ( d0 >> 32U );
+        acc4 = (uint32_t) d0;
+
+        offset    += POLY1305_BLOCK_SIZE_BYTES;
+    }
+
+    ctx->acc[0] = acc0;
+    ctx->acc[1] = acc1;
+    ctx->acc[2] = acc2;
+    ctx->acc[3] = acc3;
+    ctx->acc[4] = acc4;
+}
+
+/**
+ * \brief                   Compute the Poly1305 MAC
+ *
+ * \param ctx               The Poly1305 context.
+ * \param mac               The buffer to where the MAC is written. Must be
+ *                          big enough to contain the 16-byte MAC.
+ */
+static void poly1305_compute_mac( const mbedtls_poly1305_context *ctx,
+                                  unsigned char mac[16] )
+{
+    uint64_t d;
+    uint32_t g0, g1, g2, g3, g4;
+    uint32_t acc0, acc1, acc2, acc3, acc4;
+    uint32_t mask;
+    uint32_t mask_inv;
+
+    acc0 = ctx->acc[0];
+    acc1 = ctx->acc[1];
+    acc2 = ctx->acc[2];
+    acc3 = ctx->acc[3];
+    acc4 = ctx->acc[4];
+
+    /* Before adding 's' we ensure that the accumulator is mod 2^130 - 5.
+     * We do this by calculating acc - (2^130 - 5), then checking if
+     * the 131st bit is set. If it is, then reduce: acc -= (2^130 - 5)
+     */
+
+    /* Calculate acc + -(2^130 - 5) */
+    d  = ( (uint64_t) acc0 + 5U );
+    g0 = (uint32_t) d;
+    d  = ( (uint64_t) acc1 + ( d >> 32 ) );
+    g1 = (uint32_t) d;
+    d  = ( (uint64_t) acc2 + ( d >> 32 ) );
+    g2 = (uint32_t) d;
+    d  = ( (uint64_t) acc3 + ( d >> 32 ) );
+    g3 = (uint32_t) d;
+    g4 = acc4 + (uint32_t) ( d >> 32U );
+
+    /* mask == 0xFFFFFFFF if 131st bit is set, otherwise mask == 0 */
+    mask = (uint32_t) 0U - ( g4 >> 2U );
+    mask_inv = ~mask;
+
+    /* If 131st bit is set then acc=g, otherwise, acc is unmodified */
+    acc0 = ( acc0 & mask_inv ) | ( g0 & mask );
+    acc1 = ( acc1 & mask_inv ) | ( g1 & mask );
+    acc2 = ( acc2 & mask_inv ) | ( g2 & mask );
+    acc3 = ( acc3 & mask_inv ) | ( g3 & mask );
+
+    /* Add 's' */
+    d = (uint64_t) acc0 + ctx->s[0];
+    acc0 = (uint32_t) d;
+    d = (uint64_t) acc1 + ctx->s[1] + ( d >> 32U );
+    acc1 = (uint32_t) d;
+    d = (uint64_t) acc2 + ctx->s[2] + ( d >> 32U );
+    acc2 = (uint32_t) d;
+    acc3 += ctx->s[3] + (uint32_t) ( d >> 32U );
+
+    /* Compute MAC (128 least significant bits of the accumulator) */
+    mac[ 0] = (unsigned char)( acc0       );
+    mac[ 1] = (unsigned char)( acc0 >>  8 );
+    mac[ 2] = (unsigned char)( acc0 >> 16 );
+    mac[ 3] = (unsigned char)( acc0 >> 24 );
+    mac[ 4] = (unsigned char)( acc1       );
+    mac[ 5] = (unsigned char)( acc1 >>  8 );
+    mac[ 6] = (unsigned char)( acc1 >> 16 );
+    mac[ 7] = (unsigned char)( acc1 >> 24 );
+    mac[ 8] = (unsigned char)( acc2       );
+    mac[ 9] = (unsigned char)( acc2 >>  8 );
+    mac[10] = (unsigned char)( acc2 >> 16 );
+    mac[11] = (unsigned char)( acc2 >> 24 );
+    mac[12] = (unsigned char)( acc3       );
+    mac[13] = (unsigned char)( acc3 >>  8 );
+    mac[14] = (unsigned char)( acc3 >> 16 );
+    mac[15] = (unsigned char)( acc3 >> 24 );
+}
+
+void mbedtls_poly1305_init( mbedtls_poly1305_context *ctx )
+{
+    POLY1305_VALIDATE( ctx != NULL );
+
+    mbedtls_platform_zeroize( ctx, sizeof( mbedtls_poly1305_context ) );
+}
+
+void mbedtls_poly1305_free( mbedtls_poly1305_context *ctx )
+{
+    if( ctx == NULL )
+        return;
+
+    mbedtls_platform_zeroize( ctx, sizeof( mbedtls_poly1305_context ) );
+}
+
+int mbedtls_poly1305_starts( mbedtls_poly1305_context *ctx,
+                             const unsigned char key[32] )
+{
+    POLY1305_VALIDATE_RET( ctx != NULL );
+    POLY1305_VALIDATE_RET( key != NULL );
+
+    /* r &= 0x0ffffffc0ffffffc0ffffffc0fffffff */
+    ctx->r[0] = BYTES_TO_U32_LE( key, 0 )  & 0x0FFFFFFFU;
+    ctx->r[1] = BYTES_TO_U32_LE( key, 4 )  & 0x0FFFFFFCU;
+    ctx->r[2] = BYTES_TO_U32_LE( key, 8 )  & 0x0FFFFFFCU;
+    ctx->r[3] = BYTES_TO_U32_LE( key, 12 ) & 0x0FFFFFFCU;
+
+    ctx->s[0] = BYTES_TO_U32_LE( key, 16 );
+    ctx->s[1] = BYTES_TO_U32_LE( key, 20 );
+    ctx->s[2] = BYTES_TO_U32_LE( key, 24 );
+    ctx->s[3] = BYTES_TO_U32_LE( key, 28 );
+
+    /* Initial accumulator state */
+    ctx->acc[0] = 0U;
+    ctx->acc[1] = 0U;
+    ctx->acc[2] = 0U;
+    ctx->acc[3] = 0U;
+    ctx->acc[4] = 0U;
+
+    /* Queue initially empty */
+    mbedtls_platform_zeroize( ctx->queue, sizeof( ctx->queue ) );
+    ctx->queue_len = 0U;
+
+    return( 0 );
+}
+
+int mbedtls_poly1305_update( mbedtls_poly1305_context *ctx,
+                             const unsigned char *input,
+                             size_t ilen )
+{
+    size_t offset    = 0U;
+    size_t remaining = ilen;
+    size_t queue_free_len;
+    size_t nblocks;
+    POLY1305_VALIDATE_RET( ctx != NULL );
+    POLY1305_VALIDATE_RET( ilen == 0 || input != NULL );
+
+    if( ( remaining > 0U ) && ( ctx->queue_len > 0U ) )
+    {
+        queue_free_len = ( POLY1305_BLOCK_SIZE_BYTES - ctx->queue_len );
+
+        if( ilen < queue_free_len )
+        {
+            /* Not enough data to complete the block.
+             * Store this data with the other leftovers.
+             */
+            memcpy( &ctx->queue[ctx->queue_len],
+                    input,
+                    ilen );
+
+            ctx->queue_len += ilen;
+
+            remaining = 0U;
+        }
+        else
+        {
+            /* Enough data to produce a complete block */
+            memcpy( &ctx->queue[ctx->queue_len],
+                    input,
+                    queue_free_len );
+
+            ctx->queue_len = 0U;
+
+            poly1305_process( ctx, 1U, ctx->queue, 1U ); /* add padding bit */
+
+            offset    += queue_free_len;
+            remaining -= queue_free_len;
+        }
+    }
+
+    if( remaining >= POLY1305_BLOCK_SIZE_BYTES )
+    {
+        nblocks = remaining / POLY1305_BLOCK_SIZE_BYTES;
+
+        poly1305_process( ctx, nblocks, &input[offset], 1U );
+
+        offset += nblocks * POLY1305_BLOCK_SIZE_BYTES;
+        remaining %= POLY1305_BLOCK_SIZE_BYTES;
+    }
+
+    if( remaining > 0U )
+    {
+        /* Store partial block */
+        ctx->queue_len = remaining;
+        memcpy( ctx->queue, &input[offset], remaining );
+    }
+
+    return( 0 );
+}
+
+int mbedtls_poly1305_finish( mbedtls_poly1305_context *ctx,
+                             unsigned char mac[16] )
+{
+    POLY1305_VALIDATE_RET( ctx != NULL );
+    POLY1305_VALIDATE_RET( mac != NULL );
+
+    /* Process any leftover data */
+    if( ctx->queue_len > 0U )
+    {
+        /* Add padding bit */
+        ctx->queue[ctx->queue_len] = 1U;
+        ctx->queue_len++;
+
+        /* Pad with zeroes */
+        memset( &ctx->queue[ctx->queue_len],
+                0,
+                POLY1305_BLOCK_SIZE_BYTES - ctx->queue_len );
+
+        poly1305_process( ctx, 1U,          /* Process 1 block */
+                          ctx->queue, 0U ); /* Already padded above */
+    }
+
+    poly1305_compute_mac( ctx, mac );
+
+    return( 0 );
+}
+
+int mbedtls_poly1305_mac( const unsigned char key[32],
+                          const unsigned char *input,
+                          size_t ilen,
+                          unsigned char mac[16] )
+{
+    mbedtls_poly1305_context ctx;
+    int ret;
+    POLY1305_VALIDATE_RET( key != NULL );
+    POLY1305_VALIDATE_RET( mac != NULL );
+    POLY1305_VALIDATE_RET( ilen == 0 || input != NULL );
+
+    mbedtls_poly1305_init( &ctx );
+
+    ret = mbedtls_poly1305_starts( &ctx, key );
+    if( ret != 0 )
+        goto cleanup;
+
+    ret = mbedtls_poly1305_update( &ctx, input, ilen );
+    if( ret != 0 )
+        goto cleanup;
+
+    ret = mbedtls_poly1305_finish( &ctx, mac );
+
+cleanup:
+    mbedtls_poly1305_free( &ctx );
+    return( ret );
+}
+
+#endif /* MBEDTLS_POLY1305_ALT */
+
+#if defined(MBEDTLS_SELF_TEST)
+
+static const unsigned char test_keys[2][32] =
+{
+    {
+        0x85, 0xd6, 0xbe, 0x78, 0x57, 0x55, 0x6d, 0x33,
+        0x7f, 0x44, 0x52, 0xfe, 0x42, 0xd5, 0x06, 0xa8,
+        0x01, 0x03, 0x80, 0x8a, 0xfb, 0x0d, 0xb2, 0xfd,
+        0x4a, 0xbf, 0xf6, 0xaf, 0x41, 0x49, 0xf5, 0x1b
+    },
+    {
+        0x1c, 0x92, 0x40, 0xa5, 0xeb, 0x55, 0xd3, 0x8a,
+        0xf3, 0x33, 0x88, 0x86, 0x04, 0xf6, 0xb5, 0xf0,
+        0x47, 0x39, 0x17, 0xc1, 0x40, 0x2b, 0x80, 0x09,
+        0x9d, 0xca, 0x5c, 0xbc, 0x20, 0x70, 0x75, 0xc0
+    }
+};
+
+static const unsigned char test_data[2][127] =
+{
+    {
+        0x43, 0x72, 0x79, 0x70, 0x74, 0x6f, 0x67, 0x72,
+        0x61, 0x70, 0x68, 0x69, 0x63, 0x20, 0x46, 0x6f,
+        0x72, 0x75, 0x6d, 0x20, 0x52, 0x65, 0x73, 0x65,
+        0x61, 0x72, 0x63, 0x68, 0x20, 0x47, 0x72, 0x6f,
+        0x75, 0x70
+    },
+    {
+        0x27, 0x54, 0x77, 0x61, 0x73, 0x20, 0x62, 0x72,
+        0x69, 0x6c, 0x6c, 0x69, 0x67, 0x2c, 0x20, 0x61,
+        0x6e, 0x64, 0x20, 0x74, 0x68, 0x65, 0x20, 0x73,
+        0x6c, 0x69, 0x74, 0x68, 0x79, 0x20, 0x74, 0x6f,
+        0x76, 0x65, 0x73, 0x0a, 0x44, 0x69, 0x64, 0x20,
+        0x67, 0x79, 0x72, 0x65, 0x20, 0x61, 0x6e, 0x64,
+        0x20, 0x67, 0x69, 0x6d, 0x62, 0x6c, 0x65, 0x20,
+        0x69, 0x6e, 0x20, 0x74, 0x68, 0x65, 0x20, 0x77,
+        0x61, 0x62, 0x65, 0x3a, 0x0a, 0x41, 0x6c, 0x6c,
+        0x20, 0x6d, 0x69, 0x6d, 0x73, 0x79, 0x20, 0x77,
+        0x65, 0x72, 0x65, 0x20, 0x74, 0x68, 0x65, 0x20,
+        0x62, 0x6f, 0x72, 0x6f, 0x67, 0x6f, 0x76, 0x65,
+        0x73, 0x2c, 0x0a, 0x41, 0x6e, 0x64, 0x20, 0x74,
+        0x68, 0x65, 0x20, 0x6d, 0x6f, 0x6d, 0x65, 0x20,
+        0x72, 0x61, 0x74, 0x68, 0x73, 0x20, 0x6f, 0x75,
+        0x74, 0x67, 0x72, 0x61, 0x62, 0x65, 0x2e
+    }
+};
+
+static const size_t test_data_len[2] =
+{
+    34U,
+    127U
+};
+
+static const unsigned char test_mac[2][16] =
+{
+    {
+        0xa8, 0x06, 0x1d, 0xc1, 0x30, 0x51, 0x36, 0xc6,
+        0xc2, 0x2b, 0x8b, 0xaf, 0x0c, 0x01, 0x27, 0xa9
+    },
+    {
+        0x45, 0x41, 0x66, 0x9a, 0x7e, 0xaa, 0xee, 0x61,
+        0xe7, 0x08, 0xdc, 0x7c, 0xbc, 0xc5, 0xeb, 0x62
+    }
+};
+
+#define ASSERT( cond, args )            \
+    do                                  \
+    {                                   \
+        if( ! ( cond ) )                \
+        {                               \
+            if( verbose != 0 )          \
+                mbedtls_printf args;    \
+                                        \
+            return( -1 );               \
+        }                               \
+    }                                   \
+    while( 0 )
+
+int mbedtls_poly1305_self_test( int verbose )
+{
+    unsigned char mac[16];
+    unsigned i;
+    int ret;
+
+    for( i = 0U; i < 2U; i++ )
+    {
+        if( verbose != 0 )
+            mbedtls_printf( "  Poly1305 test %u ", i );
+
+        ret = mbedtls_poly1305_mac( test_keys[i],
+                                    test_data[i],
+                                    test_data_len[i],
+                                    mac );
+        ASSERT( 0 == ret, ( "error code: %i\n", ret ) );
+
+        ASSERT( 0 == memcmp( mac, test_mac[i], 16U ), ( "failed (mac)\n" ) );
+
+        if( verbose != 0 )
+            mbedtls_printf( "passed\n" );
+    }
+
+    if( verbose != 0 )
+        mbedtls_printf( "\n" );
+
+    return( 0 );
+}
+
+#endif /* MBEDTLS_SELF_TEST */
+
+#endif /* MBEDTLS_POLY1305_C */