Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 1 | /* ========================================================================== |
| 2 | * ieee754.h -- Conversion between half, double & single-precision floats |
| 3 | * |
| 4 | * Copyright (c) 2018-2024, Laurence Lundblade. All rights reserved. |
| 5 | * |
| 6 | * SPDX-License-Identifier: BSD-3-Clause |
| 7 | * |
Laurence Lundblade | e8f5816 | 2024-08-22 10:30:08 -0700 | [diff] [blame] | 8 | * See BSD-3-Clause license in file named "LICENSE" |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 9 | * |
| 10 | * Created on 7/23/18 |
| 11 | * ========================================================================== */ |
Laurence Lundblade | 12d32c5 | 2018-09-19 11:25:27 -0700 | [diff] [blame] | 12 | |
Laurence Lundblade | 9682a53 | 2020-06-06 18:33:04 -0700 | [diff] [blame] | 13 | |
Laurence Lundblade | 12d32c5 | 2018-09-19 11:25:27 -0700 | [diff] [blame] | 14 | #ifndef ieee754_h |
| 15 | #define ieee754_h |
| 16 | |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 17 | |
Laurence Lundblade | 12d32c5 | 2018-09-19 11:25:27 -0700 | [diff] [blame] | 18 | #include <stdint.h> |
| 19 | |
| 20 | |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 21 | /** @file ieee754.h |
| 22 | * |
| 23 | * This implements floating-point conversion between half, single and |
Laurence Lundblade | cb7282d | 2024-11-09 23:01:11 -0800 | [diff] [blame] | 24 | * double precision floating-point numbers, in particular conversion to |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 25 | * smaller representation (e.g., double to single) that does not lose |
| 26 | * precision for CBOR preferred serialization. |
| 27 | * |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 28 | * This also implements conversion of floats to whole numbers as |
| 29 | * is required for dCBOR. |
| 30 | * |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 31 | * This implementation works entirely with shifts and masks and does |
| 32 | * not require any floating-point HW or library. |
| 33 | * |
| 34 | * This conforms to IEEE 754-2008, but note that it doesn't specify |
| 35 | * conversions, just the encodings. |
| 36 | * |
| 37 | * This is complete, supporting +/- infinity, +/- zero, subnormals and |
| 38 | * NaN payloads. NaN payloads are converted to smaller by dropping the |
| 39 | * right most bits if they are zero and shifting to the right. If the |
Laurence Lundblade | cb7282d | 2024-11-09 23:01:11 -0800 | [diff] [blame] | 40 | * rightmost bits are not zero, the conversion is not performed. When |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 41 | * converting from smaller to larger, the payload is shifted left and |
| 42 | * zero-padded. This is what is specified by CBOR preferred |
Laurence Lundblade | cb7282d | 2024-11-09 23:01:11 -0800 | [diff] [blame] | 43 | * serialization and what modern HW conversion instructions do. |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 44 | * |
| 45 | * There is no special handling of silent and quiet NaNs. It probably |
Laurence Lundblade | cb7282d | 2024-11-09 23:01:11 -0800 | [diff] [blame] | 46 | * isn't necessary to transmit these special NaNs as their purpose is |
| 47 | * more for propagating errors up through some calculation. In many |
| 48 | * cases the handling of the NaN payload will work for silent and quiet |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 49 | * NaNs. |
| 50 | * |
| 51 | * A previous version of this was usable as a general library for |
| 52 | * conversion. This version is reduced to what is needed for CBOR. |
Laurence Lundblade | 12d32c5 | 2018-09-19 11:25:27 -0700 | [diff] [blame] | 53 | */ |
| 54 | |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 55 | #ifndef QCBOR_DISABLE_PREFERRED_FLOAT |
Laurence Lundblade | 8db3d3e | 2018-09-29 11:46:37 -0700 | [diff] [blame] | 56 | |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 57 | /** |
| 58 | * @brief Convert half-precision float to double-precision float. |
| 59 | * |
Laurence Lundblade | cb7282d | 2024-11-09 23:01:11 -0800 | [diff] [blame] | 60 | * @param[in] uHalfPrecision Half-precision number to convert. |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 61 | * |
Laurence Lundblade | cb7282d | 2024-11-09 23:01:11 -0800 | [diff] [blame] | 62 | * @returns double-precision value. |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 63 | * |
| 64 | * This is a lossless conversion because every half-precision value |
| 65 | * can be represented as a double. There is no error condition. |
| 66 | * |
| 67 | * There is no half-precision type in C, so it is represented here as |
| 68 | * a @c uint16_t. The bits of @c uHalfPrecision are as described for |
| 69 | * half-precision by IEEE 754. |
Laurence Lundblade | 8db3d3e | 2018-09-29 11:46:37 -0700 | [diff] [blame] | 70 | */ |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 71 | double |
| 72 | IEEE754_HalfToDouble(uint16_t uHalfPrecision); |
Laurence Lundblade | 12d32c5 | 2018-09-19 11:25:27 -0700 | [diff] [blame] | 73 | |
Laurence Lundblade | 8db3d3e | 2018-09-29 11:46:37 -0700 | [diff] [blame] | 74 | |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 75 | /** Holds a floating-point value that could be half, single or |
| 76 | * double-precision. The value is in a @c uint64_t that may be copied |
| 77 | * to a float or double. Simply casting uValue will usually work but |
| 78 | * may generate compiler or static analyzer warnings. Using |
| 79 | * UsefulBufUtil_CopyUint64ToDouble() or |
| 80 | * UsefulBufUtil_CopyUint32ToFloat() will not (and will not generate |
| 81 | * any extra code). |
Laurence Lundblade | 8db3d3e | 2018-09-29 11:46:37 -0700 | [diff] [blame] | 82 | */ |
Laurence Lundblade | 12d32c5 | 2018-09-19 11:25:27 -0700 | [diff] [blame] | 83 | typedef struct { |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 84 | enum {IEEE754_UNION_IS_HALF = 2, |
| 85 | IEEE754_UNION_IS_SINGLE = 4, |
| 86 | IEEE754_UNION_IS_DOUBLE = 8, |
| 87 | } uSize; /* Size of uValue */ |
| 88 | uint64_t uValue; |
Laurence Lundblade | 12d32c5 | 2018-09-19 11:25:27 -0700 | [diff] [blame] | 89 | } IEEE754_union; |
| 90 | |
| 91 | |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 92 | /** Holds result of an attempt to convert a floating-point |
| 93 | * number to an int64_t or uint64_t. |
| 94 | */ |
| 95 | struct IEEE754_ToInt { |
| 96 | enum {IEEE754_ToInt_IS_INT, |
| 97 | IEEE754_ToInt_IS_UINT, |
Laurence Lundblade | 14ce228 | 2024-07-24 22:13:35 -0700 | [diff] [blame] | 98 | IEEE754_ToInt_IS_65BIT_NEG, |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 99 | IEEE754_ToInt_NO_CONVERSION, |
| 100 | IEEE754_ToInt_NaN |
| 101 | } type; |
| 102 | union { |
| 103 | uint64_t un_signed; |
| 104 | int64_t is_signed; |
| 105 | } integer; |
| 106 | }; |
| 107 | |
| 108 | |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 109 | /** |
| 110 | * @brief Convert a double to either single or half-precision. |
| 111 | * |
| 112 | * @param[in] d The value to convert. |
| 113 | * @param[in] bAllowHalfPrecision If true, convert to either half or |
| 114 | * single precision. |
| 115 | * |
| 116 | * @returns Unconverted value, or value converted to single or half-precision. |
| 117 | * |
| 118 | * This always succeeds. If the value cannot be converted without the |
| 119 | * loss of precision, it is not converted. |
| 120 | * |
| 121 | * This handles all subnormals and NaN payloads. |
Laurence Lundblade | 8db3d3e | 2018-09-29 11:46:37 -0700 | [diff] [blame] | 122 | */ |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 123 | IEEE754_union |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 124 | IEEE754_DoubleToSmaller(double d, int bAllowHalfPrecision, int bNoNaNPayload); |
Laurence Lundblade | 12d32c5 | 2018-09-19 11:25:27 -0700 | [diff] [blame] | 125 | |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 126 | |
| 127 | /** |
| 128 | * @brief Convert a single-precision float to half-precision. |
| 129 | * |
| 130 | * @param[in] f The value to convert. |
| 131 | * |
| 132 | * @returns Either unconverted value or value converted to half-precision. |
| 133 | * |
| 134 | * This always succeeds. If the value cannot be converted without the |
| 135 | * loss of precision, it is not converted. |
| 136 | * |
| 137 | * This handles all subnormals and NaN payloads. |
Laurence Lundblade | 12d32c5 | 2018-09-19 11:25:27 -0700 | [diff] [blame] | 138 | */ |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 139 | IEEE754_union |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 140 | IEEE754_SingleToHalf(float f, int bNoNanPayloads); |
| 141 | |
| 142 | |
| 143 | /** |
Laurence Lundblade | cb7282d | 2024-11-09 23:01:11 -0800 | [diff] [blame] | 144 | * @brief Convert a double-precision float to an integer if whole number |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 145 | * |
| 146 | * @param[in] d The value to convert. |
| 147 | * |
| 148 | * @returns Either converted number or conversion status. |
| 149 | * |
| 150 | * If the value is a whole number that will fit either in a uint64_t |
| 151 | * or an int64_t, it is converted. If it is a NaN, then there is no |
Laurence Lundblade | cb7282d | 2024-11-09 23:01:11 -0800 | [diff] [blame] | 152 | * conversion and the fact that it is a NaN is indicated in the |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 153 | * returned structure. If it can't be converted, then that is |
| 154 | * indicated in the returned structure. |
| 155 | * |
Laurence Lundblade | cb7282d | 2024-11-09 23:01:11 -0800 | [diff] [blame] | 156 | * This always returns positive numbers as a uint64_t even if they will |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 157 | * fit in an int64_t. |
| 158 | * |
Laurence Lundblade | cb7282d | 2024-11-09 23:01:11 -0800 | [diff] [blame] | 159 | * This never fails because of precision, but may fail because of range. |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 160 | */ |
| 161 | struct IEEE754_ToInt |
| 162 | IEEE754_DoubleToInt(double d); |
| 163 | |
| 164 | |
| 165 | /** |
Laurence Lundblade | cb7282d | 2024-11-09 23:01:11 -0800 | [diff] [blame] | 166 | * @brief Convert a single-precision float to an integer if whole number |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 167 | * |
| 168 | * @param[in] f The value to convert. |
| 169 | * |
| 170 | * @returns Either converted number or conversion status. |
| 171 | * |
| 172 | * If the value is a whole number that will fit either in a uint64_t |
| 173 | * or an int64_t, it is converted. If it is a NaN, then there is no |
Laurence Lundblade | cb7282d | 2024-11-09 23:01:11 -0800 | [diff] [blame] | 174 | * conversion and the fact that it is a NaN is indicated in the |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 175 | * returned structure. If it can't be converted, then that is |
| 176 | * indicated in the returned structure. |
| 177 | * |
Laurence Lundblade | cb7282d | 2024-11-09 23:01:11 -0800 | [diff] [blame] | 178 | * This always returns positive numbers as a uint64_t even if they will |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 179 | * fit in an int64_t. |
| 180 | * |
Laurence Lundblade | cb7282d | 2024-11-09 23:01:11 -0800 | [diff] [blame] | 181 | * This never fails because of precision, but may fail because of range. |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 182 | */ |
| 183 | struct IEEE754_ToInt |
| 184 | IEEE754_SingleToInt(float f); |
| 185 | |
Laurence Lundblade | d883ad3 | 2024-03-23 22:37:37 -0700 | [diff] [blame] | 186 | |
| 187 | /** |
| 188 | * @brief Convert an unsigned integer to a double with no precision loss. |
| 189 | * |
| 190 | * @param[in] uInt The value to convert. |
Laurence Lundblade | cb7282d | 2024-11-09 23:01:11 -0800 | [diff] [blame] | 191 | * @param[in] uIsNegative 0 if positive, 1 if negative. |
Laurence Lundblade | d883ad3 | 2024-03-23 22:37:37 -0700 | [diff] [blame] | 192 | * |
| 193 | * @returns Either the converted number or 0.5 if no conversion. |
| 194 | * |
| 195 | * The conversion will fail if the input can not be represented in the |
| 196 | * 52 bits or precision that a double has. 0.5 is returned to indicate |
| 197 | * no conversion. It is out-of-band from non-error results, because |
| 198 | * all non-error results are whole integers. |
| 199 | */ |
| 200 | #define IEEE754_UINT_TO_DOUBLE_OOB 0.5 |
| 201 | double |
| 202 | IEEE754_UintToDouble(uint64_t uInt, int uIsNegative); |
| 203 | |
| 204 | |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 205 | #endif /* ! QCBOR_DISABLE_PREFERRED_FLOAT */ |
| 206 | |
| 207 | |
| 208 | /** |
| 209 | * @brief Tests whether NaN is "quiet" vs having a payload. |
| 210 | * |
| 211 | * @param[in] dNum Double number to test. |
| 212 | * |
| 213 | * @returns 0 if a quiet NaN, 1 if it has a payload. |
| 214 | * |
| 215 | * A quiet NaN is usually represented as 0x7ff8000000000000. That is |
| 216 | * the significand bits are 0x8000000000000. If the significand bits |
| 217 | * are other than 0x8000000000000 it is considered to have a NaN |
| 218 | * payload. |
| 219 | * |
| 220 | * Note that 0x7ff8000000000000 is not specified in a standard, but it |
| 221 | * is commonly implemented and chosen by CBOR as the best way to |
| 222 | * represent a NaN. |
| 223 | */ |
| 224 | int |
Laurence Lundblade | 9b2ae8a | 2024-07-12 11:00:20 -0700 | [diff] [blame] | 225 | IEEE754_DoubleHasNaNPayload(double dNum); |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 226 | |
| 227 | |
| 228 | |
| 229 | /** |
| 230 | * @brief Tests whether NaN is "quiet" vs having a payload. |
| 231 | * |
| 232 | * @param[in] fNum Float number to test. |
| 233 | * |
| 234 | * @returns 0 if a quiet NaN, 1 if it has a payload. |
| 235 | * |
Laurence Lundblade | 9b2ae8a | 2024-07-12 11:00:20 -0700 | [diff] [blame] | 236 | * See IEEE754_DoubleHasNaNPayload(). A single precision quiet NaN |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 237 | * is 0x7fc00000. |
| 238 | */ |
| 239 | int |
Laurence Lundblade | 9b2ae8a | 2024-07-12 11:00:20 -0700 | [diff] [blame] | 240 | IEEE754_SingleHasNaNPayload(float fNum); |
Laurence Lundblade | 12d32c5 | 2018-09-19 11:25:27 -0700 | [diff] [blame] | 241 | |
| 242 | |
Laurence Lundblade | 12d32c5 | 2018-09-19 11:25:27 -0700 | [diff] [blame] | 243 | #endif /* ieee754_h */ |
| 244 | |