Import mbedtls-3.6.0

Imports Mbed TLS 3.6.0 from https://github.com/Mbed-TLS/mbedtls.git
tags mbedtls-3.6.0, v3.6.0

Files that are not needed are removed:

cd lib/libmbedtls
rm -rf mbedtls
cp -R path/to/mbedtls-3.6.0/mbedtls .
cd mbedtls
rm CMakeLists.txt DartConfiguration.tcl Makefile
rm .gitignore .travis.yml .pylintrc .globalrc .mypy.ini BRANCHES.md
rm include/.gitignore include/CMakeLists.txt library/.gitignore
rm library/CMakeLists.txt library/Makefile
rm -r cmake
rm -rf .git .github doxygen configs programs scripts tests visualc
rm -rf 3rdparty ChangeLog.d docs pkgconfig .gitmodules .readthedocs.yaml
rm library/mps_*
cd ..
git add mbedtls

This time we leave library/psa_* present to enable TLS 1.3 features.

This is a complete overwrite of previous code so earlier changes in the
previous branch import/mbedtls-3.4.0 will be added on top of this commit.

Signed-off-by: Tom Van Eyck <tom.vaneyck@kuleuven.be>
Acked-by: Jens Wiklander <jens.wiklander@linaro.org>
diff --git a/lib/libmbedtls/mbedtls/library/alignment.h b/lib/libmbedtls/mbedtls/library/alignment.h
index a518a8a..a17001d 100644
--- a/lib/libmbedtls/mbedtls/library/alignment.h
+++ b/lib/libmbedtls/mbedtls/library/alignment.h
@@ -5,19 +5,7 @@
  */
 /*
  *  Copyright The Mbed TLS Contributors
- *  SPDX-License-Identifier: Apache-2.0
- *
- *  Licensed under the Apache License, Version 2.0 (the "License"); you may
- *  not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- *  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
+ *  SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
  */
 
 #ifndef MBEDTLS_LIBRARY_ALIGNMENT_H
@@ -27,8 +15,6 @@
 #include <string.h>
 #include <stdlib.h>
 
-#include "mbedtls/build_info.h"
-
 /*
  * Define MBEDTLS_EFFICIENT_UNALIGNED_ACCESS for architectures where unaligned memory
  * accesses are known to be efficient.
@@ -37,15 +23,83 @@
  * efficient when this is not defined.
  */
 #if defined(__ARM_FEATURE_UNALIGNED) \
-    || defined(__i386__) || defined(__amd64__) || defined(__x86_64__)
+    || defined(MBEDTLS_ARCH_IS_X86) || defined(MBEDTLS_ARCH_IS_X64) \
+    || defined(MBEDTLS_PLATFORM_IS_WINDOWS_ON_ARM64)
 /*
  * __ARM_FEATURE_UNALIGNED is defined where appropriate by armcc, gcc 7, clang 9
  * (and later versions) for Arm v7 and later; all x86 platforms should have
  * efficient unaligned access.
+ *
+ * https://learn.microsoft.com/en-us/cpp/build/arm64-windows-abi-conventions?view=msvc-170#alignment
+ * specifies that on Windows-on-Arm64, unaligned access is safe (except for uncached
+ * device memory).
  */
 #define MBEDTLS_EFFICIENT_UNALIGNED_ACCESS
 #endif
 
+#if defined(__IAR_SYSTEMS_ICC__) && \
+    (defined(MBEDTLS_ARCH_IS_ARM64) || defined(MBEDTLS_ARCH_IS_ARM32) \
+    || defined(__ICCRX__) || defined(__ICCRL78__) || defined(__ICCRISCV__))
+#pragma language=save
+#pragma language=extended
+#define MBEDTLS_POP_IAR_LANGUAGE_PRAGMA
+/* IAR recommend this technique for accessing unaligned data in
+ * https://www.iar.com/knowledge/support/technical-notes/compiler/accessing-unaligned-data
+ * This results in a single load / store instruction (if unaligned access is supported).
+ * According to that document, this is only supported on certain architectures.
+ */
+    #define UINT_UNALIGNED
+typedef uint16_t __packed mbedtls_uint16_unaligned_t;
+typedef uint32_t __packed mbedtls_uint32_unaligned_t;
+typedef uint64_t __packed mbedtls_uint64_unaligned_t;
+#elif defined(MBEDTLS_COMPILER_IS_GCC) && (MBEDTLS_GCC_VERSION >= 40504) && \
+    ((MBEDTLS_GCC_VERSION < 60300) || (!defined(MBEDTLS_EFFICIENT_UNALIGNED_ACCESS)))
+/*
+ * gcc may generate a branch to memcpy for calls like `memcpy(dest, src, 4)` rather than
+ * generating some LDR or LDRB instructions (similar for stores).
+ *
+ * This is architecture dependent: x86-64 seems fine even with old gcc; 32-bit Arm
+ * is affected. To keep it simple, we enable for all architectures.
+ *
+ * For versions of gcc < 5.4.0 this issue always happens.
+ * For gcc < 6.3.0, this issue happens at -O0
+ * For all versions, this issue happens iff unaligned access is not supported.
+ *
+ * For gcc 4.x, this implementation will generate byte-by-byte loads even if unaligned access is
+ * supported, which is correct but not optimal.
+ *
+ * For performance (and code size, in some cases), we want to avoid the branch and just generate
+ * some inline load/store instructions since the access is small and constant-size.
+ *
+ * The manual states:
+ * "The packed attribute specifies that a variable or structure field should have the smallest
+ *  possible alignment—one byte for a variable"
+ * https://gcc.gnu.org/onlinedocs/gcc-4.5.4/gcc/Variable-Attributes.html
+ *
+ * Previous implementations used __attribute__((__aligned__(1)), but had issues with a gcc bug:
+ * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94662
+ *
+ * Tested with several versions of GCC from 4.5.0 up to 13.2.0
+ * We don't enable for older than 4.5.0 as this has not been tested.
+ */
+ #define UINT_UNALIGNED_STRUCT
+typedef struct {
+    uint16_t x;
+} __attribute__((packed)) mbedtls_uint16_unaligned_t;
+typedef struct {
+    uint32_t x;
+} __attribute__((packed)) mbedtls_uint32_unaligned_t;
+typedef struct {
+    uint64_t x;
+} __attribute__((packed)) mbedtls_uint64_unaligned_t;
+ #endif
+
+/*
+ * We try to force mbedtls_(get|put)_unaligned_uintXX to be always inline, because this results
+ * in code that is both smaller and faster. IAR and gcc both benefit from this when optimising
+ * for size.
+ */
+
 /**
  * Read the unsigned 16 bits integer from the given address, which need not
  * be aligned.
@@ -53,10 +107,23 @@
  * \param   p pointer to 2 bytes of data
  * \return  Data at the given address
  */
-inline uint16_t mbedtls_get_unaligned_uint16(const void *p)
+#if defined(__IAR_SYSTEMS_ICC__)
+#pragma inline = forced
+#elif defined(__GNUC__)
+__attribute__((always_inline))
+#endif
+static inline uint16_t mbedtls_get_unaligned_uint16(const void *p)
 {
     uint16_t r;
+#if defined(UINT_UNALIGNED)
+    mbedtls_uint16_unaligned_t *p16 = (mbedtls_uint16_unaligned_t *) p;
+    r = *p16;
+#elif defined(UINT_UNALIGNED_STRUCT)
+    mbedtls_uint16_unaligned_t *p16 = (mbedtls_uint16_unaligned_t *) p;
+    r = p16->x;
+#else
     memcpy(&r, p, sizeof(r));
+#endif
     return r;
 }
 
@@ -67,9 +134,22 @@
  * \param   p pointer to 2 bytes of data
  * \param   x data to write
  */
-inline void mbedtls_put_unaligned_uint16(void *p, uint16_t x)
+#if defined(__IAR_SYSTEMS_ICC__)
+#pragma inline = forced
+#elif defined(__GNUC__)
+__attribute__((always_inline))
+#endif
+static inline void mbedtls_put_unaligned_uint16(void *p, uint16_t x)
 {
+#if defined(UINT_UNALIGNED)
+    mbedtls_uint16_unaligned_t *p16 = (mbedtls_uint16_unaligned_t *) p;
+    *p16 = x;
+#elif defined(UINT_UNALIGNED_STRUCT)
+    mbedtls_uint16_unaligned_t *p16 = (mbedtls_uint16_unaligned_t *) p;
+    p16->x = x;
+#else
     memcpy(p, &x, sizeof(x));
+#endif
 }
 
 /**
@@ -79,10 +159,23 @@
  * \param   p pointer to 4 bytes of data
  * \return  Data at the given address
  */
-inline uint32_t mbedtls_get_unaligned_uint32(const void *p)
+#if defined(__IAR_SYSTEMS_ICC__)
+#pragma inline = forced
+#elif defined(__GNUC__)
+__attribute__((always_inline))
+#endif
+static inline uint32_t mbedtls_get_unaligned_uint32(const void *p)
 {
     uint32_t r;
+#if defined(UINT_UNALIGNED)
+    mbedtls_uint32_unaligned_t *p32 = (mbedtls_uint32_unaligned_t *) p;
+    r = *p32;
+#elif defined(UINT_UNALIGNED_STRUCT)
+    mbedtls_uint32_unaligned_t *p32 = (mbedtls_uint32_unaligned_t *) p;
+    r = p32->x;
+#else
     memcpy(&r, p, sizeof(r));
+#endif
     return r;
 }
 
@@ -93,9 +186,22 @@
  * \param   p pointer to 4 bytes of data
  * \param   x data to write
  */
-inline void mbedtls_put_unaligned_uint32(void *p, uint32_t x)
+#if defined(__IAR_SYSTEMS_ICC__)
+#pragma inline = forced
+#elif defined(__GNUC__)
+__attribute__((always_inline))
+#endif
+static inline void mbedtls_put_unaligned_uint32(void *p, uint32_t x)
 {
+#if defined(UINT_UNALIGNED)
+    mbedtls_uint32_unaligned_t *p32 = (mbedtls_uint32_unaligned_t *) p;
+    *p32 = x;
+#elif defined(UINT_UNALIGNED_STRUCT)
+    mbedtls_uint32_unaligned_t *p32 = (mbedtls_uint32_unaligned_t *) p;
+    p32->x = x;
+#else
     memcpy(p, &x, sizeof(x));
+#endif
 }
 
 /**
@@ -105,10 +211,23 @@
  * \param   p pointer to 8 bytes of data
  * \return  Data at the given address
  */
-inline uint64_t mbedtls_get_unaligned_uint64(const void *p)
+#if defined(__IAR_SYSTEMS_ICC__)
+#pragma inline = forced
+#elif defined(__GNUC__)
+__attribute__((always_inline))
+#endif
+static inline uint64_t mbedtls_get_unaligned_uint64(const void *p)
 {
     uint64_t r;
+#if defined(UINT_UNALIGNED)
+    mbedtls_uint64_unaligned_t *p64 = (mbedtls_uint64_unaligned_t *) p;
+    r = *p64;
+#elif defined(UINT_UNALIGNED_STRUCT)
+    mbedtls_uint64_unaligned_t *p64 = (mbedtls_uint64_unaligned_t *) p;
+    r = p64->x;
+#else
     memcpy(&r, p, sizeof(r));
+#endif
     return r;
 }
 
@@ -119,11 +238,28 @@
  * \param   p pointer to 8 bytes of data
  * \param   x data to write
  */
-inline void mbedtls_put_unaligned_uint64(void *p, uint64_t x)
+#if defined(__IAR_SYSTEMS_ICC__)
+#pragma inline = forced
+#elif defined(__GNUC__)
+__attribute__((always_inline))
+#endif
+static inline void mbedtls_put_unaligned_uint64(void *p, uint64_t x)
 {
+#if defined(UINT_UNALIGNED)
+    mbedtls_uint64_unaligned_t *p64 = (mbedtls_uint64_unaligned_t *) p;
+    *p64 = x;
+#elif defined(UINT_UNALIGNED_STRUCT)
+    mbedtls_uint64_unaligned_t *p64 = (mbedtls_uint64_unaligned_t *) p;
+    p64->x = x;
+#else
     memcpy(p, &x, sizeof(x));
+#endif
 }
 
+#if defined(MBEDTLS_POP_IAR_LANGUAGE_PRAGMA)
+#pragma language=restore
+#endif
+
 /** Byte Reading Macros
  *
  * Given a multi-byte integer \p x, MBEDTLS_BYTE_n retrieves the n-th
@@ -183,9 +319,22 @@
 
 /* Detect armcc built-in byteswap routine */
 #if defined(__ARMCC_VERSION) && (__ARMCC_VERSION >= 410000) && !defined(MBEDTLS_BSWAP32)
+#if defined(__ARM_ACLE)  /* ARM Compiler 6 - earlier versions don't need a header */
+#include <arm_acle.h>
+#endif
 #define MBEDTLS_BSWAP32 __rev
 #endif
 
+/* Detect IAR built-in byteswap routine */
+#if defined(__IAR_SYSTEMS_ICC__)
+#if defined(__ARM_ACLE)
+#include <arm_acle.h>
+#define MBEDTLS_BSWAP16(x) ((uint16_t) __rev16((uint32_t) (x)))
+#define MBEDTLS_BSWAP32 __rev
+#define MBEDTLS_BSWAP64 __revll
+#endif
+#endif
+
 /*
  * Where compiler built-ins are not present, fall back to C code that the
  * compiler may be able to detect and transform into the relevant bswap or
@@ -230,10 +379,25 @@
 #endif /* !defined(MBEDTLS_BSWAP64) */
 
 #if !defined(__BYTE_ORDER__)
+
+#if defined(__LITTLE_ENDIAN__)
+/* IAR defines __xxx_ENDIAN__, but not __BYTE_ORDER__ */
+#define MBEDTLS_IS_BIG_ENDIAN 0
+#elif defined(__BIG_ENDIAN__)
+#define MBEDTLS_IS_BIG_ENDIAN 1
+#else
 static const uint16_t mbedtls_byte_order_detector = { 0x100 };
 #define MBEDTLS_IS_BIG_ENDIAN (*((unsigned char *) (&mbedtls_byte_order_detector)) == 0x01)
+#endif
+
 #else
-#define MBEDTLS_IS_BIG_ENDIAN ((__BYTE_ORDER__) == (__ORDER_BIG_ENDIAN__))
+
+#if (__BYTE_ORDER__) == (__ORDER_BIG_ENDIAN__)
+#define MBEDTLS_IS_BIG_ENDIAN 1
+#else
+#define MBEDTLS_IS_BIG_ENDIAN 0
+#endif
+
 #endif /* !defined(__BYTE_ORDER__) */
 
 /**