Merge branch 'AdvancedDecode' of https://github.com/laurencelundblade/QCBOR into AdvancedDecode

commit: d02ea8ed7e3b75fa0a1437659b52a5a48ad6dbee [log] [tgz]
author: Laurence Lundblade <lgl@securitytheory.com> Sat Jun 06 18:38:19 2020 -0700
committer: Laurence Lundblade <lgl@securitytheory.com> Sat Jun 06 18:38:19 2020 -0700
tree: 7d36ede717d2fe7805efca1720de4ff19b077765
parent: da09597e2408047abda5594f5e41b2a783e02968 [diff]
parent: 830fbf95e3b165fbd1767dedda32590d6bb23c26 [diff]
diff --git a/QCBOR.xcodeproj/project.pbxproj b/QCBOR.xcodeproj/project.pbxproj
index a720857..ad20feb 100644
--- a/QCBOR.xcodeproj/project.pbxproj
+++ b/QCBOR.xcodeproj/project.pbxproj

@@ -69,6 +69,7 @@
 		E73B57632161F8F70080D658 /* run_tests.c */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 3; lastKnownFileType = sourcecode.c.c; name = run_tests.c; path = test/run_tests.c; sourceTree = "<group>"; tabWidth = 3; };
 		E73B57642161F8F80080D658 /* run_tests.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = run_tests.h; path = test/run_tests.h; sourceTree = "<group>"; };
 		E74BF411245D6713002CE8E8 /* UsefulBuf.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = UsefulBuf.h; path = inc/qcbor/UsefulBuf.h; sourceTree = "<group>"; };
+		E74FA9FE247D2F2C003F8ECE /* Tagging.md */ = {isa = PBXFileReference; lastKnownFileType = net.daringfireball.markdown; path = Tagging.md; sourceTree = "<group>"; };
 		E772022723B52C02006E966E /* QCBOR_Disable_Exp_Mantissa */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = QCBOR_Disable_Exp_Mantissa; sourceTree = BUILT_PRODUCTS_DIR; };
 		E776E07C214ADF7F00E67947 /* QCBOR */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = QCBOR; sourceTree = BUILT_PRODUCTS_DIR; };
 		E776E08C214AE07400E67947 /* qcbor_encode.c */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 3; lastKnownFileType = sourcecode.c.c; name = qcbor_encode.c; path = src/qcbor_encode.c; sourceTree = "<group>"; tabWidth = 3; };
@@ -106,6 +107,7 @@
 			isa = PBXGroup;
 			children = (
 				E776E161214EE19C00E67947 /* README.md */,
+				E74FA9FE247D2F2C003F8ECE /* Tagging.md */,
 				E776E096214AE0C700E67947 /* cmd_line_main.c */,
 				E776E092214AE07C00E67947 /* inc */,
 				E776E08B214AE06600E67947 /* src */,

diff --git a/README.md b/README.md
index 1f8a646..334dff3 100644
--- a/README.md
+++ b/README.md

@@ -121,7 +121,7 @@
 
 ## Code Size
 
-These are approximate sizes on 64-bit x86 with the -Os optimization.
+These are approximate sizes on a 64-bit x86 CPU with the -Os optimization.
 
     |               | smallest | largest |  
     |---------------|----------|---------|

diff --git a/Tagging.md b/Tagging.md
new file mode 100644
index 0000000..c3481f5
--- /dev/null
+++ b/Tagging.md

@@ -0,0 +1,102 @@
+#  Types and Tagging in CBOR
+
+## New Types
+
+CBOR provides a means for defining new data types that are either 
+aggregates of the primitive types or the association of further sematics
+to a primitive type. 
+
+An aggregate is similar to a C structure. A bigfloat is an example. It
+is an array of two data items, an exponent and a mantissa. 
+
+An example of association of further semantics to a primitive type
+is an epoch date, where the new data type is 
+a primitive integer that is to be interpreted as a date.
+
+## Explicit Tags
+
+These new types can be explicitly tagged by preceding them
+with a CBOR Item of major type 6. The tag data item is a positive
+integer. 
+
+For example the epoch date looks lie this:
+
+
+A big float looks like this:
+
+
+The data item tagged is known as the tag content. Most tags
+require the content to be of a specific type or types. A few work
+with content of any type.
+
+There may be more than one explicit tag for a single tag content. When
+this is done, they nest. The order of the explicit tags is significant. The explicit
+tag closes to the content is applied first. That then becomes the 
+content for the next closest tag.
+
+If the content for a specific tag is not of the right type then
+the encoded CBOR is invalid.
+
+The explicit tag data item is not always required when the data type is used. In some situations
+in some CBOR protocols, they may actually be prohibited.
+
+## Standard Tags and the Tags Registry
+
+Tags used in CBOR protocols should at least be registered in 
+the IANA CBOR Tags Registry. A small number of tags (0-23),
+are full IETF standards. Further, tags 24-255 require published
+documentation, but are not full IETF standards. Beyond
+tag 255, the tags are first come first served. 
+
+There is no range for private use, so any tag used in a
+CBOR protocol should be registered. The range of tag
+values is very large to accommodate this.
+
+It is common to use  data types from the registry in a CBOR protocol
+without the explicit tag, so in a way the registry is a registry
+of data types.
+
+## When Explicit Tags are Required
+
+In many CBOR protocols, the new type of a data item
+can be known implicitly without any explicit type. In that
+case the explicit tag is redundant. For example,
+if a data item in a map is labled the "expiration date", 
+it can be inferred that the type is a date.
+
+All CBOR protocols that use registered data types
+should explicitly say for each occurance whether
+the explicit tag is required or not. If they say it is required,
+it must always be present and it is a protocol decoding
+error if not. Usually the tag is explicitly required because
+it is not possible to infer the type from the context
+of the protocol. 
+
+If the protocol says the explicit tag is not required, it
+is a decoding error if it is present.
+
+That is tags are not optional in a protocol (even though they
+were called "optional tags" in RFC 7049).
+
+Part of the result of this is that unknown tags generally
+can't be ignored during decoding. They are not like
+email or HTTP headers.
+
+The QCBOR encoding API for standard registered types
+has an option to include the tag or not. Setting this
+flag depends on the protocol definition and should only
+be true if the protocol requires explicit tagging.
+
+The QCBOR decoding APIs for standard registered types
+has a tag requirements flag. If true it requires the tag
+to be present and sets an error if it is absent. If false
+an error is set if it is present.
+
+During decoding, it will sometimes be necessary to 
+peek-decode the data item with the generic PeekNext()
+first to know its type, then call the appropriate GetXxxx(0
+to actually dcode and consume it. When this is necessary
+depends on the design and flow of the protocol.
+
+
+

diff --git a/inc/qcbor/qcbor_decode.h b/inc/qcbor/qcbor_decode.h
index 87b0fb6..a4a96cc 100644
--- a/inc/qcbor/qcbor_decode.h
+++ b/inc/qcbor/qcbor_decode.h

@@ -357,6 +357,41 @@
 
    uint16_t uTags[QCBOR_MAX_TAGS_PER_ITEM];
 
+   /*
+
+    0-127, the tag value itself
+    128-255, index into tag map
+
+    The decode context stores 4 tags (32 bytes)
+
+    Could
+    - allocate space in decode context for new tags encountered, say 32 bytes worth for
+      four extra tags.
+    - require caller to pass in list of tags they are interested in, up to 128 even
+      This already exists. It is an error when an unknown tag is enountered.
+    - allow caller to give space to store tags if using tags > 128 in value
+      It is only an error if more distinct tag values are encountered
+      than there is space to remember them.
+    - Use storage alloator to expand space needed. 
+
+
+    */
+   uint8_t  auTags[4]; // 4 bytes
+
+   /*
+    Or use the existing tag mapping strategy, and
+    store the offset in 4-bits accommomdating use
+    of 64 tag values in a decode session and using
+    only 2 bytes to store the tag list.
+
+    Can elimiate getNextWithTags.
+
+    Add new function to get the tag value.
+
+    Is annoying to find tag value on error with
+    existing scheme.
+    */
+
 } QCBORItem;
 
 
@@ -1286,6 +1321,11 @@
 static void QCBORDecode_ExitArray(QCBORDecodeContext *pCtx);
 
 
+void QCBORDecode_EnterBstrWrapped(QCBORDecodeContext *pCtx, uint8_t uTagRequirement);
+void QCBORDecode_EnterBstrWrappedFromMapN(QCBORDecodeContext *pCtx, uint8_t uTagRequirement, int64_t uLabel);
+void QCBORDecode_EnterBstrWrappedFromMapSZ(QCBORDecodeContext *pCtx, uint8_t uTagRequirement, const char  *szLabel);
+void QCBORDecode_ExitBstrWrapped(QCBORDecodeContext *pCtx);
+
 
 /*
  Restarts fetching of items in a map to the start of the
@@ -1302,6 +1342,28 @@
 
 
 /*
+ @brief Get an item in map by label.
+
+ @param[in] pCtx   The decode context.
+ @param[in] nLabel The integer label.
+ @param[in] uQcborType  The QCBOR type. One of QCBOR_TYPE_XXX.
+ @param[out] pItem  The returned item
+
+ A map must have been entered to use this. If not \ref xxx is set.
+
+ The map is searched for an item of the requested label and type.
+ QCBOR_TYPE_ANY can be given to search for the label without
+ matching the type.
+
+ This will always search the entire map. This will always perform
+  duplicate label detection, setting \ref QCBOR_ERR_DUPLICATE_LABEL if there is more than
+ one occurance of the label being searched for.
+
+ This performs a full decode of every item in the map
+ being searched, which involves a full pre-order traversal
+ of every item. For  maps with little nesting, this
+ is of little consequence, but 
+
  Get an item out of a map.
  
  Decoding must be in map mode for this to work.
@@ -1320,35 +1382,41 @@
 */
 void QCBORDecode_GetItemInMapN(QCBORDecodeContext *pCtx,
                                int64_t nLabel,
-                               uint8_t qcbor_type,
+                               uint8_t uQcborType,
                                QCBORItem *pItem);
 
-
-QCBORError QCBORDecode_GetItemInMapSZ(QCBORDecodeContext *pCtx,
-                                      const char *szLabel,
-                                      uint8_t qcbor_type,
-                                      QCBORItem *pItem);
+void QCBORDecode_GetItemInMapSZ(QCBORDecodeContext *pCtx,
+                                const char *szLabel,
+                                uint8_t qcbor_type,
+                                QCBORItem *pItem);
 
 /*
+
+ @param[in] pCtx   The decode context.
+ @param[in,out] pItemList  On input the items to search for. On output the returned items.
+ 
  This gets several labeled items out of a map.
  
- pItemArray is an array of items terminated by an item
+ pItemList is an array of items terminated by an item
  with uLabelType QCBOR_TYPE_NONE.
- 
- On input the the array of items is the list of labels to fetch
- items for.
- 
- On output the array is the data items found. If the label
- wasn't found, uDataType is QCBOR_TYPE_NONE.
- 
+
+ On input the labels to search for are in the uLabelType and
+ label fields in the items in pItemList.
+
+ Also on input are the requested QCBOR types in the field uDataType.
+ To match any type, searching just by lable, uDataType
+ can be QCBOR_TYPE_ANY.
+
  This is a CPU-efficient way to decode a bunch of items in a map. It
  is more efficient than scanning each individually because the map
  only needs to be traversed once.
  
- If any duplicate labels are detected, this returns an error.
+ If any duplicate labels are detected, this returns \ref QCBOR_ERR_DUPLICATE_LABEL.
  
  This will return maps and arrays that are in the map, but
- provides no way to descend into and decode them.
+ provides no way to descend into and decode them. Use
+ QCBORDecode_EnterMapinMapN(), QCBORDecode_EnterArrayInMapN()
+ and such to decsend into and process maps and arrays.
  
  */
 QCBORError QCBORDecode_GetItemsInMap(QCBORDecodeContext *pCtx, QCBORItem *pItemList);

diff --git a/src/qcbor_decode.c b/src/qcbor_decode.c
index 749327b..3d7fd4b 100644
--- a/src/qcbor_decode.c
+++ b/src/qcbor_decode.c

@@ -2124,11 +2124,16 @@
 }
 
 
-QCBORError QCBORDecode_GetItemInMapSZ(QCBORDecodeContext *pMe,
-                                      const char         *szLabel,
-                                      uint8_t            uQcborType,
-                                      QCBORItem         *pItem)
+void QCBORDecode_GetItemInMapSZ(QCBORDecodeContext *pMe,
+                                const char         *szLabel,
+                                uint8_t            uQcborType,
+                                QCBORItem         *pItem)
 {
+   if(pMe->uLastError != QCBOR_SUCCESS) {
+      return;
+   }
+
+
    QCBORItem OneItemSeach[2];
 
    OneItemSeach[0].uLabelType   = QCBOR_TYPE_TEXT_STRING;
@@ -2138,16 +2143,15 @@
 
    QCBORError nReturn = MapSearch(pMe, OneItemSeach, NULL, NULL);
    if(nReturn) {
-     return nReturn;
+      pMe->uLastError = (uint8_t)nReturn;
    }
 
+
    if(OneItemSeach[0].uDataType == QCBOR_TYPE_NONE) {
-      return QCBOR_ERR_NOT_FOUND;
+      pMe->uLastError = QCBOR_ERR_NOT_FOUND;
    }
 
    *pItem = OneItemSeach[0];
-
-   return QCBOR_SUCCESS;
 }
 
 
@@ -2696,12 +2700,22 @@
 static inline QCBORError ConvertNegativeBigNumToSigned(const UsefulBufC BigNum, int64_t *pResult)
 {
    uint64_t uResult;
-   QCBORError uError = ConvertBigNumToUnsigned(BigNum, INT64_MAX-1, &uResult);
+   /* negaative int furthest from zero is INT64_MIN
+      which is expressed as -INT64_MAX-1. The value of
+    a negative bignum is -n-1, one further from zero
+    than the positive bignum */
+
+   /* say INT64_MIN is -2; then INT64_MAX is 1.
+    Then -n-1 <= INT64_MIN.
+    Then -n -1 <= -INT64_MAX - 1
+    THen n <= INT64_MAX. */
+   QCBORError uError = ConvertBigNumToUnsigned(BigNum, INT64_MAX, &uResult);
    if(uError) {
       return uError;
    }
    /* Cast is safe because ConvertBigNum is told to limit to INT64_MAX */
    // TODO: this code is incorrect. See RFC 7049
+   uResult++; // this is the -1 in -n-1
    *pResult = -(int64_t)uResult;
    return QCBOR_SUCCESS;
 }
@@ -3444,7 +3458,7 @@
 
       case QCBOR_TYPE_NEGBIGNUM:
          if(uOptions & QCBOR_CONVERT_TYPE_BIG_NUM) {
-            *pdValue = -ConvertBigNumToDouble(pItem->val.bigNum);
+            *pdValue = -1-ConvertBigNumToDouble(pItem->val.bigNum);
          } else {
             return QCBOR_ERR_CONVERSION_NOT_REQUESTED;
          }
@@ -3479,7 +3493,7 @@
 
       case QCBOR_TYPE_BIGFLOAT_NEG_BIGNUM:
         if(uOptions & QCBOR_CONVERT_TYPE_BIGFLOAT) {
-         double dMantissa = -ConvertBigNumToDouble(pItem->val.expAndMantissa.Mantissa.bigNum);
+         double dMantissa = -1-ConvertBigNumToDouble(pItem->val.expAndMantissa.Mantissa.bigNum);
          *pdValue = dMantissa * exp2((double)pItem->val.expAndMantissa.nExponent);
          } else {
             return QCBOR_ERR_CONVERSION_NOT_REQUESTED;

diff --git a/test/qcbor_decode_tests.c b/test/qcbor_decode_tests.c
index fca76e9..11e47f2 100644
--- a/test/qcbor_decode_tests.c
+++ b/test/qcbor_decode_tests.c

@@ -4011,6 +4011,16 @@
 
 static const struct NumberConversion NumberConversions[] = {
    {
+      "negative bignum -1",
+      {(uint8_t[]){0xc3, 0x41, 0x00}, 3},
+      -1,
+      QCBOR_SUCCESS,
+      0,
+      QCBOR_ERR_NUMBER_SIGN_CONVERSION,
+      -1.0,
+      QCBOR_SUCCESS
+   },
+   {
       "Decimal Fraction with positive bignum 257 * 10e3",
       {(uint8_t[]){0xC4, 0x82, 0x1B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03,
                                0xC2, 0x42, 0x01, 0x01}, 15},
@@ -4022,14 +4032,14 @@
       QCBOR_SUCCESS
    },
    {
-      "bigfloat with negative bignum -257 * 2e3",
+      "bigfloat with negative bignum -258 * 2e3",
       {(uint8_t[]){0xC5, 0x82, 0x1B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03,
                                0xC3, 0x42, 0x01, 0x01}, 15},
-      -2056,
+      -2064,
       QCBOR_SUCCESS,
       0,
       QCBOR_ERR_NUMBER_SIGN_CONVERSION,
-      -2056.0,
+      -2064.0,
       QCBOR_SUCCESS
    },
    {
@@ -4044,7 +4054,7 @@
       QCBOR_SUCCESS
    },
    {
-      "negative bignum 0xc349010000000000000000",
+      "negative bignum 0xc349010000000000000000 -18446744073709551617",
       {(uint8_t[]){0xc3, 0x49, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, 11},
       0,
       QCBOR_ERR_CONVERSION_UNDER_OVER_FLOW,
commit	d02ea8ed7e3b75fa0a1437659b52a5a48ad6dbee	[log] [tgz]
author	Laurence Lundblade <lgl@securitytheory.com>	Sat Jun 06 18:38:19 2020 -0700
committer	Laurence Lundblade <lgl@securitytheory.com>	Sat Jun 06 18:38:19 2020 -0700
tree	7d36ede717d2fe7805efca1720de4ff19b077765
parent	da09597e2408047abda5594f5e41b2a783e02968 [diff]
parent	830fbf95e3b165fbd1767dedda32590d6bb23c26 [diff]