Merge pull request #7462 from daverodgman/clz_size_opt

clz size/perf optimisation
diff --git a/library/bignum_core.c b/library/bignum_core.c
index c6d92fb..a99b3be 100644
--- a/library/bignum_core.c
+++ b/library/bignum_core.c
@@ -33,8 +33,25 @@
 #include "bn_mul.h"
 #include "constant_time_internal.h"
 
-size_t mbedtls_mpi_core_clz(mbedtls_mpi_uint a)
+inline size_t mbedtls_mpi_core_clz(mbedtls_mpi_uint a)
 {
+#if defined(__has_builtin)
+#if __has_builtin(__builtin_clz)
+    if (sizeof(mbedtls_mpi_uint) == sizeof(unsigned int)) {
+        return (size_t) __builtin_clz(a);
+    }
+#endif
+#if __has_builtin(__builtin_clzl)
+    if (sizeof(mbedtls_mpi_uint) == sizeof(unsigned long)) {
+        return (size_t) __builtin_clzl(a);
+    }
+#endif
+#if __has_builtin(__builtin_clzll)
+    if (sizeof(mbedtls_mpi_uint) == sizeof(unsigned long long)) {
+        return (size_t) __builtin_clzll(a);
+    }
+#endif
+#endif
     size_t j;
     mbedtls_mpi_uint mask = (mbedtls_mpi_uint) 1 << (biL - 1);
 
@@ -51,21 +68,17 @@
 
 size_t mbedtls_mpi_core_bitlen(const mbedtls_mpi_uint *A, size_t A_limbs)
 {
-    size_t i, j;
+    int i;
+    size_t j;
 
-    if (A_limbs == 0) {
-        return 0;
-    }
-
-    for (i = A_limbs - 1; i > 0; i--) {
+    for (i = ((int) A_limbs) - 1; i >= 0; i--) {
         if (A[i] != 0) {
-            break;
+            j = biL - mbedtls_mpi_core_clz(A[i]);
+            return (i * biL) + j;
         }
     }
 
-    j = biL - mbedtls_mpi_core_clz(A[i]);
-
-    return (i * biL) + j;
+    return 0;
 }
 
 /* Convert a big-endian byte array aligned to the size of mbedtls_mpi_uint
diff --git a/library/bignum_core.h b/library/bignum_core.h
index b3d05a3..158d2b3 100644
--- a/library/bignum_core.h
+++ b/library/bignum_core.h
@@ -102,9 +102,12 @@
 
 /** Count leading zero bits in a given integer.
  *
+ * \warning     The result is undefined if \p a == 0
+ *
  * \param a     Integer to count leading zero bits.
  *
- * \return      The number of leading zero bits in \p a.
+ * \return      The number of leading zero bits in \p a, if \p a != 0.
+ *              If \p a == 0, the result is undefined.
  */
 size_t mbedtls_mpi_core_clz(mbedtls_mpi_uint a);
 
diff --git a/tests/suites/test_suite_bignum_core.function b/tests/suites/test_suite_bignum_core.function
index e084b83..53aa002 100644
--- a/tests/suites/test_suite_bignum_core.function
+++ b/tests/suites/test_suite_bignum_core.function
@@ -309,6 +309,36 @@
 }
 /* END_CASE */
 
+
+/* BEGIN_CASE */
+void mpi_core_clz(int leading_zeros, int trailing_zeros)
+{
+    if ((size_t) (leading_zeros + trailing_zeros) >= (sizeof(mbedtls_mpi_uint) * 8)) {
+        // can't fit required number of leading and trailing zeros - skip test
+        goto exit;
+    }
+
+    // Construct a test input value where the count of leading zeros and
+    // trailing zeros is given in the test case, and we add ones to fill
+    // the gap.
+    mbedtls_mpi_uint x;
+    if ((leading_zeros + trailing_zeros) > 0) {
+        // some zero bits
+        uint32_t s = (sizeof(mbedtls_mpi_uint) * 8 - leading_zeros - trailing_zeros);
+        x = ((((mbedtls_mpi_uint) 1) << s) - 1) << trailing_zeros;
+    } else {
+        // all bits set
+        x = ~((mbedtls_mpi_uint) 0);
+    }
+
+    size_t n = mbedtls_mpi_core_clz(x);
+    TEST_EQUAL(n, leading_zeros);
+exit:
+    ;
+}
+/* END_CASE */
+
+
 /* BEGIN_CASE */
 void mpi_core_lt_ct(char *input_X, char *input_Y, int exp_ret)
 {
diff --git a/tests/suites/test_suite_bignum_core.misc.data b/tests/suites/test_suite_bignum_core.misc.data
index b61d708..ba86029 100644
--- a/tests/suites/test_suite_bignum_core.misc.data
+++ b/tests/suites/test_suite_bignum_core.misc.data
@@ -491,3 +491,35 @@
 Fill random core: 42 bytes, 5 missing limbs
 mpi_core_fill_random:42:0:-5:0:MBEDTLS_ERR_MPI_BAD_INPUT_DATA
 
+CLZ: 0 0: all ones
+mpi_core_clz:0:0
+
+CLZ: 1 0
+mpi_core_clz:1:0
+
+CLZ: 1 1
+mpi_core_clz:1:1
+
+CLZ: 4 5
+mpi_core_clz:4:5
+
+CLZ: 8 16
+mpi_core_clz:8:16
+
+CLZ: 31 0
+mpi_core_clz:31:0
+
+CLZ: 32 0
+mpi_core_clz:32:0
+
+CLZ: 33 0
+mpi_core_clz:33:0
+
+CLZ: 63 0
+mpi_core_clz:63:0
+
+CLZ: 64 0
+mpi_core_clz:64:0
+
+CLZ: 100000 0: skip overly long input
+mpi_core_clz:100000:0