DPE: Reuse command buffer to decrease stack usage

The goal to reuse the cmd_buf allocated in dpe_req_mngr.c
to create the big objects (certificate, certificate_chain)
in place rather then allocate a separate buffer on the stack
and later copy them to cmd_buf.

Change-Id: Id26891e9d07e482d481b408041f9f729d0e38798
Signed-off-by: Tamas Ban <tamas.ban@arm.com>
diff --git a/partitions/dice_protection_environment/dpe_cmd_decode.c b/partitions/dice_protection_environment/dpe_cmd_decode.c
index 59e2597..143e12c 100644
--- a/partitions/dice_protection_environment/dpe_cmd_decode.c
+++ b/partitions/dice_protection_environment/dpe_cmd_decode.c
@@ -7,6 +7,7 @@
 
 #include "dpe_cmd_decode.h"
 
+#include <assert.h>
 #include <string.h>
 
 #include "dpe_client.h"
@@ -16,6 +17,41 @@
 #include "qcbor/qcbor_decode.h"
 #include "qcbor/qcbor_spiffy_decode.h"
 
+/*
+ * The goal to reuse the cmd_buf allocated in dpe_req_mngr.c to create the
+ * big objects (certificate, certificate_chain) in place rather then allocate
+ * a separate buffer on the stack and later copy them to cmd_buf.
+ *
+ * The temporary buffer is allocated from the beginning + some offset of the
+ * cmd_buf. The offset is a placeholder for the first few bytes of the
+ * response. The size of temp buf is till the end of the cmd_buf. Usually not
+ * the entire size is necessary it is just convenient to allocate as much. When
+ * the response is encoded then the content of the temp buf is moved to its
+ * final location within the cmd_buf.
+ *
+ * Overlapping copy is not an issue because QCBOR relies on memmove under the
+ * hood which handles this scenario.
+ *
+ * Note:
+ *   Only a single temp buffer can be allocated with this trick per DPE command.
+ */
+#define DPE_RESPONSE_HEADER_SIZE  20
+#define ALLOC_TEMP_BUF      (uint8_t *)encode_ctx->OutBuf.UB.ptr + \
+                                       DPE_RESPONSE_HEADER_SIZE
+
+#define SIZEOF_TEMP_BUF     encode_ctx->OutBuf.UB.len - \
+                                       DPE_RESPONSE_HEADER_SIZE
+
+/* Decreasing the placeholder size a bit because UsefulOutBuf_GetEndPosition()
+ * cannot determine the exact number of bytes to encode array and map objects
+ * until they are closed. Allocating 1 byte for both allows encoding up to
+ * 23 elements per type (map or array) in a single byte, so at the end it
+ * consumes 2 bytes. It is very unlikely to have more elements in a map or array
+ * than 23 in a DPE response.
+ */
+#define CHECK_OVERFLOW_TO_TEMP_BUF assert((DPE_RESPONSE_HEADER_SIZE - 2) > \
+                        UsefulOutBuf_GetEndPosition(&encode_ctx->OutBuf))
+
 #define COUNT_ARGS(arg)  (arg)++
 
 #define CHECK_AND_COUNT_OPTIONAL_ARGUMENT(decode_ctx)                          \
@@ -139,7 +175,7 @@
     DiceInputValues dice_inputs;
     int new_context_handle;
     int new_parent_context_handle;
-    uint8_t new_certificate_buf[DICE_CERT_SIZE];
+    uint8_t *new_certificate_buf = ALLOC_TEMP_BUF;
     uint8_t exported_cdi_buf[DICE_MAX_ENCODED_CDI_SIZE];
     uint32_t cert_id;
     size_t new_certificate_actual_size = 0;
@@ -249,7 +285,7 @@
                                      &new_context_handle,
                                      &new_parent_context_handle,
                                      new_certificate_buf,
-                                     sizeof(new_certificate_buf),
+                                     SIZEOF_TEMP_BUF,
                                      &new_certificate_actual_size,
                                      exported_cdi_buf,
                                      sizeof(exported_cdi_buf),
@@ -275,6 +311,7 @@
      * add_encoded_layer_certificate. Add it as a byte string so that its
      * decoding can be skipped and the CBOR returned to the caller.
      */
+    CHECK_OVERFLOW_TO_TEMP_BUF;
     QCBOREncode_AddBytesToMapN(encode_ctx, DPE_DERIVE_CONTEXT_NEW_CERTIFICATE,
                                (UsefulBufC){ new_certificate_buf,
                                              new_certificate_actual_size });
@@ -371,7 +408,7 @@
     size_t public_key_size;
     const uint8_t *label = NULL;
     size_t label_size;
-    uint8_t certificate_buf[DICE_CERT_SIZE];
+    uint8_t *certificate_buf = ALLOC_TEMP_BUF;
     size_t certificate_actual_size;
     uint8_t derived_public_key_buf[DPE_ATTEST_PUB_KEY_SIZE];
     size_t derived_public_key_actual_size;
@@ -446,7 +483,7 @@
     dpe_err = certify_key_request(context_handle, retain_context, public_key,
                                   public_key_size, label, label_size,
                                   certificate_buf,
-                                  sizeof(certificate_buf),
+                                  SIZEOF_TEMP_BUF,
                                   &certificate_actual_size,
                                   derived_public_key_buf,
                                   sizeof(derived_public_key_buf),
@@ -466,6 +503,7 @@
      * key implementation. Add it as a byte string so that its decoding can be
      * skipped and the CBOR returned to the caller.
      */
+    CHECK_OVERFLOW_TO_TEMP_BUF;
     QCBOREncode_AddBytesToMapN(encode_ctx, DPE_CERTIFY_KEY_CERTIFICATE,
                                (UsefulBufC){ certificate_buf,
                                              certificate_actual_size });
@@ -493,7 +531,7 @@
     int context_handle;
     bool retain_context;
     bool clear_from_context;
-    uint8_t certificate_chain_buf[DICE_CERT_CHAIN_SIZE];
+    uint8_t *certificate_chain_buf = ALLOC_TEMP_BUF;
     size_t certificate_chain_actual_size;
     int new_context_handle;
     QCBORItem item;
@@ -553,7 +591,7 @@
                                             retain_context,
                                             clear_from_context,
                                             certificate_chain_buf,
-                                            sizeof(certificate_chain_buf),
+                                            SIZEOF_TEMP_BUF,
                                             &certificate_chain_actual_size,
                                             &new_context_handle);
     if (dpe_err != DPE_NO_ERROR) {
@@ -570,6 +608,7 @@
      * chain implementation. Add it as a byte string so that its decoding can be
      * skipped and the CBOR returned to the caller.
      */
+    CHECK_OVERFLOW_TO_TEMP_BUF;
     QCBOREncode_AddBytesToMapN(encode_ctx, DPE_GET_CERTIFICATE_CHAIN_CERTIFICATE_CHAIN,
                                (UsefulBufC){ certificate_chain_buf,
                                              certificate_chain_actual_size });