Relax and test aliasing rules

This is consistent with the general rules documented at the top of the
file:
- when computing GCD(A, N), there is no modular arithmetic, so the
  output can alias any of the inputs;
- when computing a modular inverse, N is the modulus, so it can't be
  aliased by any of the outputs (we'll use it for modular operations
  over the entire course of the function's execution).

But since this function has two modes of operations with different
aliasing rules (G can alias N only if I == NULL), I think it should
really be stated explicitly.

Signed-off-by: Manuel Pégourié-Gonnard <manuel.pegourie-gonnard@arm.com>
diff --git a/library/bignum_core.c b/library/bignum_core.c
index 1e80a5b..e020da1 100644
--- a/library/bignum_core.c
+++ b/library/bignum_core.c
@@ -1094,11 +1094,19 @@
      * Initial values:
      * u, v = A, N
      * q, r = 0, 1
+     *
+     * We only write to G (aka v) after reading from inputs (A and N), which
+     * allows aliasing, except with N when I != NULL, as then we'll be operating
+     * mod N on q and r later - see the public documentation.
+     *
+     * Also avoid possible UB with memcpy when src == dst.
      */
     memcpy(u, A, A_limbs * ciL);
     memset((char *) u + A_limbs * ciL, 0, (N_limbs - A_limbs) * ciL);
 
-    memcpy(v, N, N_limbs * ciL);
+    if (v != N) {
+        memcpy(v, N, N_limbs * ciL);
+    }
 
     if (I != NULL) {
         memset(q, 0, N_limbs * ciL);
diff --git a/library/bignum_core.h b/library/bignum_core.h
index 29e05cd..1589c34 100644
--- a/library/bignum_core.h
+++ b/library/bignum_core.h
@@ -826,7 +826,9 @@
  *
  * Requires N to be odd, and 0 <= A <= N.
  *
- * None of the parameters may alias or overlap another.
+ * When I == NULL (computing only the GCD), G may alias A or N.
+ * When I != NULL (computing the modular inverse), G or I may alias A
+ * but none of them may alias N (the modulus).
  *
  * \param[out]    G       The GCD of \p A and \p N.
  *                        Must have the same number of limbs as \p N.
diff --git a/tests/suites/test_suite_bignum_core.function b/tests/suites/test_suite_bignum_core.function
index 81c2242..93782c1 100644
--- a/tests/suites/test_suite_bignum_core.function
+++ b/tests/suites/test_suite_bignum_core.function
@@ -1416,6 +1416,38 @@
     mbedtls_free(T);
     T = NULL;
 
+    /* GCD only, G aliased to N */
+    TEST_CALLOC(G, N_limbs);
+    memcpy(G, N, bytes);
+
+    TEST_CALLOC(T, 4 * N_limbs);
+    memset(T, 'T', bytes);
+
+    mbedtls_mpi_core_gcd_modinv_odd(G, NULL, A, A_limbs, G, N_limbs, T);
+    TEST_CF_PUBLIC(G, bytes);
+    TEST_MEMORY_COMPARE(G, bytes, exp_G, bytes);
+
+    mbedtls_free(G);
+    G = NULL;
+    mbedtls_free(T);
+    T = NULL;
+
+    /* GCD only, G aliased to A (hence A_limbs = N_limbs) */
+    TEST_CALLOC(G, N_limbs);
+    memcpy(G, A, A_limbs * sizeof(mbedtls_mpi_uint));
+
+    TEST_CALLOC(T, 4 * N_limbs);
+    memset(T, 'T', bytes);
+
+    mbedtls_mpi_core_gcd_modinv_odd(G, NULL, G, N_limbs, N, N_limbs, T);
+    TEST_CF_PUBLIC(G, bytes);
+    TEST_MEMORY_COMPARE(G, bytes, exp_G, bytes);
+
+    mbedtls_free(G);
+    G = NULL;
+    mbedtls_free(T);
+    T = NULL;
+
     /*
      * Test GCD + modinv
      */
@@ -1437,6 +1469,65 @@
         TEST_MEMORY_COMPARE(I, bytes, exp_I, bytes);
     }
 
+    mbedtls_free(G);
+    G = NULL;
+    mbedtls_free(I);
+    I = NULL;
+    mbedtls_free(T);
+    T = NULL;
+
+    /* GCD + modinv, G aliased to A */
+    TEST_CALLOC(G, N_limbs);
+    memcpy(G, A, A_limbs * sizeof(mbedtls_mpi_uint));
+
+    TEST_CALLOC(I, N_limbs);
+    memset(I, 'I', bytes);
+
+    TEST_CALLOC(T, 5 * N_limbs);
+    memset(T, 'T', bytes);
+
+    mbedtls_mpi_core_gcd_modinv_odd(G, I, G, N_limbs, N, N_limbs, T);
+
+    TEST_CF_PUBLIC(G, bytes);
+    TEST_MEMORY_COMPARE(G, bytes, exp_G, bytes);
+    if (got_I) {
+        TEST_CF_PUBLIC(I, bytes);
+        TEST_MEMORY_COMPARE(I, bytes, exp_I, bytes);
+    }
+
+    mbedtls_free(G);
+    G = NULL;
+    mbedtls_free(I);
+    I = NULL;
+    mbedtls_free(T);
+    T = NULL;
+
+    /* GCD + modinv, I aliased to A */
+    TEST_CALLOC(G, N_limbs);
+    memset(G, 'G', bytes);
+
+    TEST_CALLOC(I, N_limbs);
+    memcpy(I, A, A_limbs * sizeof(mbedtls_mpi_uint));
+
+    TEST_CALLOC(T, 5 * N_limbs);
+    memset(T, 'T', bytes);
+
+    mbedtls_mpi_core_gcd_modinv_odd(G, I, I, N_limbs, N, N_limbs, T);
+
+    TEST_CF_PUBLIC(G, bytes);
+    TEST_MEMORY_COMPARE(G, bytes, exp_G, bytes);
+    if (got_I) {
+        TEST_CF_PUBLIC(I, bytes);
+        TEST_MEMORY_COMPARE(I, bytes, exp_I, bytes);
+    }
+
+    mbedtls_free(G);
+    G = NULL;
+    mbedtls_free(I);
+    I = NULL;
+    mbedtls_free(T);
+    T = NULL;
+
 exit:
     mbedtls_free(A);
     mbedtls_free(N);