Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 1 | /* ========================================================================== |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 2 | * ieee754.c -- floating-point conversion for half, double & single-precision |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 3 | * |
| 4 | * Copyright (c) 2018-2024, Laurence Lundblade. All rights reserved. |
| 5 | * Copyright (c) 2021, Arm Limited. All rights reserved. |
| 6 | * |
| 7 | * SPDX-License-Identifier: BSD-3-Clause |
| 8 | * |
| 9 | * See BSD-3-Clause license in README.md |
| 10 | * |
| 11 | * Created on 7/23/18 |
| 12 | * ========================================================================== */ |
Laurence Lundblade | cc2ed34 | 2018-09-22 17:29:55 -0700 | [diff] [blame] | 13 | |
Máté Tóth-Pál | ef5f07a | 2021-09-17 19:31:37 +0200 | [diff] [blame] | 14 | #include "qcbor/qcbor_common.h" |
| 15 | |
Laurence Lundblade | 12d32c5 | 2018-09-19 11:25:27 -0700 | [diff] [blame] | 16 | #include "ieee754.h" |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 17 | #include <string.h> /* For memcpy() */ |
Laurence Lundblade | 12d32c5 | 2018-09-19 11:25:27 -0700 | [diff] [blame] | 18 | |
Laurence Lundblade | 8db3d3e | 2018-09-29 11:46:37 -0700 | [diff] [blame] | 19 | |
Laurence Lundblade | 12d32c5 | 2018-09-19 11:25:27 -0700 | [diff] [blame] | 20 | /* |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 21 | * This has long lines and is easier to read because of |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 22 | * them. Some coding guidelines prefer 80 column lines (can they not |
| 23 | * afford big displays?). |
| 24 | * |
| 25 | * This code works solely using shifts and masks and thus has no |
| 26 | * dependency on any math libraries. It can even work if the CPU |
| 27 | * doesn't have any floating-point support, though that isn't the most |
| 28 | * useful thing to do. |
| 29 | * |
| 30 | * The memcpy() dependency is only for CopyFloatToUint32() and friends |
| 31 | * which only is needed to avoid type punning when converting the |
| 32 | * actual float bits to an unsigned value so the bit shifts and masks |
| 33 | * can work. |
| 34 | * |
| 35 | * The references used to write this code: |
| 36 | * |
| 37 | * IEEE 754-2008, particularly section 3.6 and 6.2.1 |
| 38 | * |
| 39 | * https://en.wikipedia.org/wiki/IEEE_754 and subordinate pages |
| 40 | * |
| 41 | * https://stackoverflow.com/questions/19800415/why-does-ieee-754-reserve-so-many-nan-values |
| 42 | * |
| 43 | * https://stackoverflow.com/questions/46073295/implicit-type-promotion-rules |
| 44 | * |
| 45 | * https://stackoverflow.com/questions/589575/what-does-the-c-standard-state-the-size-of-int-long-type-to-be |
| 46 | * |
| 47 | * IEEE754_FloatToDouble(uint32_t uFloat) was created but is not |
| 48 | * needed. It can be retrieved from github history if needed. |
Laurence Lundblade | 12d32c5 | 2018-09-19 11:25:27 -0700 | [diff] [blame] | 49 | */ |
| 50 | |
| 51 | |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 52 | |
| 53 | |
| 54 | /* ----- Half Precsion ----------- */ |
Laurence Lundblade | 12d32c5 | 2018-09-19 11:25:27 -0700 | [diff] [blame] | 55 | #define HALF_NUM_SIGNIFICAND_BITS (10) |
| 56 | #define HALF_NUM_EXPONENT_BITS (5) |
| 57 | #define HALF_NUM_SIGN_BITS (1) |
| 58 | |
| 59 | #define HALF_SIGNIFICAND_SHIFT (0) |
| 60 | #define HALF_EXPONENT_SHIFT (HALF_NUM_SIGNIFICAND_BITS) |
| 61 | #define HALF_SIGN_SHIFT (HALF_NUM_SIGNIFICAND_BITS + HALF_NUM_EXPONENT_BITS) |
| 62 | |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 63 | #define HALF_SIGNIFICAND_MASK (0x3ffU) // The lower 10 bits |
Laurence Lundblade | 06350ea | 2020-01-27 19:32:40 -0800 | [diff] [blame] | 64 | #define HALF_EXPONENT_MASK (0x1fU << HALF_EXPONENT_SHIFT) // 0x7c00 5 bits of exponent |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 65 | #define HALF_SIGN_MASK (0x01U << HALF_SIGN_SHIFT) // 0x8000 1 bit of sign |
Laurence Lundblade | 06350ea | 2020-01-27 19:32:40 -0800 | [diff] [blame] | 66 | #define HALF_QUIET_NAN_BIT (0x01U << (HALF_NUM_SIGNIFICAND_BITS-1)) // 0x0200 |
Laurence Lundblade | 12d32c5 | 2018-09-19 11:25:27 -0700 | [diff] [blame] | 67 | |
| 68 | /* Biased Biased Unbiased Use |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 69 | * 0x00 0 -15 0 and subnormal |
| 70 | * 0x01 1 -14 Smallest normal exponent |
| 71 | * 0x1e 30 15 Largest normal exponent |
| 72 | * 0x1F 31 16 NaN and Infinity */ |
Laurence Lundblade | 12d32c5 | 2018-09-19 11:25:27 -0700 | [diff] [blame] | 73 | #define HALF_EXPONENT_BIAS (15) |
| 74 | #define HALF_EXPONENT_MAX (HALF_EXPONENT_BIAS) // 15 Unbiased |
| 75 | #define HALF_EXPONENT_MIN (-HALF_EXPONENT_BIAS+1) // -14 Unbiased |
| 76 | #define HALF_EXPONENT_ZERO (-HALF_EXPONENT_BIAS) // -15 Unbiased |
| 77 | #define HALF_EXPONENT_INF_OR_NAN (HALF_EXPONENT_BIAS+1) // 16 Unbiased |
| 78 | |
| 79 | |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 80 | /* ------ Single-Precision -------- */ |
Laurence Lundblade | 12d32c5 | 2018-09-19 11:25:27 -0700 | [diff] [blame] | 81 | #define SINGLE_NUM_SIGNIFICAND_BITS (23) |
| 82 | #define SINGLE_NUM_EXPONENT_BITS (8) |
| 83 | #define SINGLE_NUM_SIGN_BITS (1) |
| 84 | |
| 85 | #define SINGLE_SIGNIFICAND_SHIFT (0) |
| 86 | #define SINGLE_EXPONENT_SHIFT (SINGLE_NUM_SIGNIFICAND_BITS) |
| 87 | #define SINGLE_SIGN_SHIFT (SINGLE_NUM_SIGNIFICAND_BITS + SINGLE_NUM_EXPONENT_BITS) |
| 88 | |
Laurence Lundblade | 06350ea | 2020-01-27 19:32:40 -0800 | [diff] [blame] | 89 | #define SINGLE_SIGNIFICAND_MASK (0x7fffffU) // The lower 23 bits |
| 90 | #define SINGLE_EXPONENT_MASK (0xffU << SINGLE_EXPONENT_SHIFT) // 8 bits of exponent |
| 91 | #define SINGLE_SIGN_MASK (0x01U << SINGLE_SIGN_SHIFT) // 1 bit of sign |
| 92 | #define SINGLE_QUIET_NAN_BIT (0x01U << (SINGLE_NUM_SIGNIFICAND_BITS-1)) |
Laurence Lundblade | 12d32c5 | 2018-09-19 11:25:27 -0700 | [diff] [blame] | 93 | |
| 94 | /* Biased Biased Unbiased Use |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 95 | * 0x0000 0 -127 0 and subnormal |
| 96 | * 0x0001 1 -126 Smallest normal exponent |
| 97 | * 0x7f 127 0 1 |
| 98 | * 0xfe 254 127 Largest normal exponent |
| 99 | * 0xff 255 128 NaN and Infinity */ |
Laurence Lundblade | 12d32c5 | 2018-09-19 11:25:27 -0700 | [diff] [blame] | 100 | #define SINGLE_EXPONENT_BIAS (127) |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 101 | #define SINGLE_EXPONENT_MAX (SINGLE_EXPONENT_BIAS) |
| 102 | #define SINGLE_EXPONENT_MIN (-SINGLE_EXPONENT_BIAS+1) |
| 103 | #define SINGLE_EXPONENT_ZERO (-SINGLE_EXPONENT_BIAS) |
| 104 | #define SINGLE_EXPONENT_INF_OR_NAN (SINGLE_EXPONENT_BIAS+1) |
Laurence Lundblade | 12d32c5 | 2018-09-19 11:25:27 -0700 | [diff] [blame] | 105 | |
| 106 | |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 107 | /* --------- Double-Precision ---------- */ |
Laurence Lundblade | 12d32c5 | 2018-09-19 11:25:27 -0700 | [diff] [blame] | 108 | #define DOUBLE_NUM_SIGNIFICAND_BITS (52) |
| 109 | #define DOUBLE_NUM_EXPONENT_BITS (11) |
| 110 | #define DOUBLE_NUM_SIGN_BITS (1) |
| 111 | |
| 112 | #define DOUBLE_SIGNIFICAND_SHIFT (0) |
| 113 | #define DOUBLE_EXPONENT_SHIFT (DOUBLE_NUM_SIGNIFICAND_BITS) |
| 114 | #define DOUBLE_SIGN_SHIFT (DOUBLE_NUM_SIGNIFICAND_BITS + DOUBLE_NUM_EXPONENT_BITS) |
| 115 | |
Laurence Lundblade | 8db3d3e | 2018-09-29 11:46:37 -0700 | [diff] [blame] | 116 | #define DOUBLE_SIGNIFICAND_MASK (0xfffffffffffffULL) // The lower 52 bits |
| 117 | #define DOUBLE_EXPONENT_MASK (0x7ffULL << DOUBLE_EXPONENT_SHIFT) // 11 bits of exponent |
| 118 | #define DOUBLE_SIGN_MASK (0x01ULL << DOUBLE_SIGN_SHIFT) // 1 bit of sign |
| 119 | #define DOUBLE_QUIET_NAN_BIT (0x01ULL << (DOUBLE_NUM_SIGNIFICAND_BITS-1)) |
| 120 | |
Laurence Lundblade | 12d32c5 | 2018-09-19 11:25:27 -0700 | [diff] [blame] | 121 | |
| 122 | /* Biased Biased Unbiased Use |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 123 | * 0x00000000 0 -1023 0 and subnormal |
| 124 | * 0x00000001 1 -1022 Smallest normal exponent |
| 125 | * 0x000007fe 2046 1023 Largest normal exponent |
| 126 | * 0x000007ff 2047 1024 NaN and Infinity */ |
Laurence Lundblade | 12d32c5 | 2018-09-19 11:25:27 -0700 | [diff] [blame] | 127 | #define DOUBLE_EXPONENT_BIAS (1023) |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 128 | #define DOUBLE_EXPONENT_MAX (DOUBLE_EXPONENT_BIAS) |
| 129 | #define DOUBLE_EXPONENT_MIN (-DOUBLE_EXPONENT_BIAS+1) |
| 130 | #define DOUBLE_EXPONENT_ZERO (-DOUBLE_EXPONENT_BIAS) |
| 131 | #define DOUBLE_EXPONENT_INF_OR_NAN (DOUBLE_EXPONENT_BIAS+1) |
| 132 | |
Laurence Lundblade | 12d32c5 | 2018-09-19 11:25:27 -0700 | [diff] [blame] | 133 | |
| 134 | |
| 135 | |
| 136 | /* |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 137 | * Convenient functions to avoid type punning, compiler warnings and |
| 138 | * such. The optimizer reduces them to a simple assignment. This is a |
| 139 | * crusty corner of C. It shouldn't be this hard. |
| 140 | * |
| 141 | * These are also in UsefulBuf.h under a different name. They are copied |
| 142 | * here to avoid a dependency on UsefulBuf.h. There is no object code |
| 143 | * size impact because these always optimze down to a simple assignment. |
Laurence Lundblade | 12d32c5 | 2018-09-19 11:25:27 -0700 | [diff] [blame] | 144 | */ |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 145 | static inline uint32_t |
| 146 | CopyFloatToUint32(float f) |
Laurence Lundblade | 12d32c5 | 2018-09-19 11:25:27 -0700 | [diff] [blame] | 147 | { |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 148 | uint32_t u32; |
| 149 | memcpy(&u32, &f, sizeof(uint32_t)); |
| 150 | return u32; |
Laurence Lundblade | 12d32c5 | 2018-09-19 11:25:27 -0700 | [diff] [blame] | 151 | } |
| 152 | |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 153 | static inline uint64_t |
| 154 | CopyDoubleToUint64(double d) |
Laurence Lundblade | 12d32c5 | 2018-09-19 11:25:27 -0700 | [diff] [blame] | 155 | { |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 156 | uint64_t u64; |
| 157 | memcpy(&u64, &d, sizeof(uint64_t)); |
| 158 | return u64; |
Laurence Lundblade | 12d32c5 | 2018-09-19 11:25:27 -0700 | [diff] [blame] | 159 | } |
| 160 | |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 161 | |
| 162 | #ifndef QCBOR_DISABLE_PREFERRED_FLOAT |
| 163 | |
| 164 | |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 165 | static inline double |
| 166 | CopyUint64ToDouble(uint64_t u64) |
Laurence Lundblade | 67bd551 | 2018-11-02 21:44:06 +0700 | [diff] [blame] | 167 | { |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 168 | double d; |
| 169 | memcpy(&d, &u64, sizeof(uint64_t)); |
| 170 | return d; |
| 171 | } |
| 172 | |
| 173 | static inline float |
| 174 | CopyUint32ToSingle(uint32_t u32) |
| 175 | { |
| 176 | float f; |
| 177 | memcpy(&f, &u32, sizeof(uint32_t)); |
| 178 | return f; |
Laurence Lundblade | 67bd551 | 2018-11-02 21:44:06 +0700 | [diff] [blame] | 179 | } |
Laurence Lundblade | 12d32c5 | 2018-09-19 11:25:27 -0700 | [diff] [blame] | 180 | |
| 181 | |
Laurence Lundblade | 3aee3a3 | 2018-12-17 16:17:45 -0800 | [diff] [blame] | 182 | |
| 183 | |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 184 | /** |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 185 | * @brief Assemble sign, significand and exponent into double precision float. |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 186 | * |
| 187 | * @param[in] uDoubleSign 0 if positive, 1 if negative |
| 188 | * @pararm[in] uDoubleSignificand Bits of the significand |
| 189 | * @param[in] nDoubleUnBiasedExponent Exponent |
| 190 | * |
| 191 | * This returns the bits for a single-precision float, a binary64 |
| 192 | * as specified in IEEE754. |
Laurence Lundblade | fe09bbf | 2020-07-16 12:14:51 -0700 | [diff] [blame] | 193 | */ |
Laurence Lundblade | d883ad3 | 2024-03-23 22:37:37 -0700 | [diff] [blame] | 194 | // TODO: make the sign and exponent type int? |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 195 | static double |
| 196 | IEEE754_AssembleDouble(uint64_t uDoubleSign, |
| 197 | uint64_t uDoubleSignificand, |
| 198 | int64_t nDoubleUnBiasedExponent) |
Laurence Lundblade | 67bd551 | 2018-11-02 21:44:06 +0700 | [diff] [blame] | 199 | { |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 200 | uint64_t uDoubleBiasedExponent; |
| 201 | |
| 202 | uDoubleBiasedExponent = (uint64_t)(nDoubleUnBiasedExponent + DOUBLE_EXPONENT_BIAS); |
| 203 | |
| 204 | return CopyUint64ToDouble(uDoubleSignificand | |
| 205 | (uDoubleBiasedExponent << DOUBLE_EXPONENT_SHIFT) | |
| 206 | (uDoubleSign << DOUBLE_SIGN_SHIFT)); |
| 207 | } |
Laurence Lundblade | 3aee3a3 | 2018-12-17 16:17:45 -0800 | [diff] [blame] | 208 | |
| 209 | |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 210 | /* Public function; see ieee754.h */ |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 211 | double |
| 212 | IEEE754_HalfToDouble(uint16_t uHalfPrecision) |
| 213 | { |
| 214 | uint64_t uDoubleSignificand; |
| 215 | int64_t nDoubleUnBiasedExponent; |
| 216 | double dResult; |
| 217 | |
| 218 | /* Pull out the three parts of the half-precision float. Do all |
| 219 | * the work in 64 bits because that is what the end result is. It |
| 220 | * may give smaller code size and will keep static analyzers |
| 221 | * happier. |
| 222 | */ |
| 223 | const uint64_t uHalfSignificand = uHalfPrecision & HALF_SIGNIFICAND_MASK; |
| 224 | const uint64_t uHalfBiasedExponent = (uHalfPrecision & HALF_EXPONENT_MASK) >> HALF_EXPONENT_SHIFT; |
| 225 | const int64_t nHalfUnBiasedExponent = (int64_t)uHalfBiasedExponent - HALF_EXPONENT_BIAS; |
| 226 | const uint64_t uHalfSign = (uHalfPrecision & HALF_SIGN_MASK) >> HALF_SIGN_SHIFT; |
| 227 | |
| 228 | if(nHalfUnBiasedExponent == HALF_EXPONENT_ZERO) { |
| 229 | /* 0 or subnormal */ |
| 230 | if(uHalfSignificand) { |
| 231 | /* --- SUBNORMAL --- */ |
| 232 | /* A half-precision subnormal can always be converted to a |
| 233 | * normal double-precision float because the ranges line up. |
| 234 | * The exponent of a subnormal starts out at the min exponent |
| 235 | * for a normal. As the sub normal significand bits are |
| 236 | * shifted, left to normalize, the exponent is |
| 237 | * decremented. Shifting continues until fully normalized. |
| 238 | */ |
| 239 | nDoubleUnBiasedExponent = HALF_EXPONENT_MIN; |
| 240 | uDoubleSignificand = uHalfSignificand; |
| 241 | do { |
| 242 | uDoubleSignificand <<= 1; |
| 243 | nDoubleUnBiasedExponent--; |
| 244 | } while ((uDoubleSignificand & (1ULL << HALF_NUM_SIGNIFICAND_BITS)) == 0); |
| 245 | /* A normal has an implied 1 in the most significant |
| 246 | * position that a subnormal doesn't. */ |
| 247 | uDoubleSignificand -= 1ULL << HALF_NUM_SIGNIFICAND_BITS; |
| 248 | /* Must shift into place for a double significand */ |
| 249 | uDoubleSignificand <<= DOUBLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS; |
| 250 | |
| 251 | dResult = IEEE754_AssembleDouble(uHalfSign, |
| 252 | uDoubleSignificand, |
| 253 | nDoubleUnBiasedExponent); |
| 254 | } else { |
| 255 | /* --- ZERO --- */ |
| 256 | dResult = IEEE754_AssembleDouble(uHalfSign, |
| 257 | 0, |
| 258 | DOUBLE_EXPONENT_ZERO); |
| 259 | } |
| 260 | } else if(nHalfUnBiasedExponent == HALF_EXPONENT_INF_OR_NAN) { |
| 261 | /* NaN or Inifinity */ |
| 262 | if(uHalfSignificand) { |
| 263 | /* --- NaN --- */ |
| 264 | /* Half-precision payloads always fit into double precision |
| 265 | * payloads. They are shifted left the same as a normal |
| 266 | * number significand. |
| 267 | */ |
| 268 | uDoubleSignificand = uHalfSignificand << (DOUBLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS); |
| 269 | dResult = IEEE754_AssembleDouble(uHalfSign, |
| 270 | uDoubleSignificand, |
| 271 | DOUBLE_EXPONENT_INF_OR_NAN); |
| 272 | } else { |
| 273 | /* --- INFINITY --- */ |
| 274 | dResult = IEEE754_AssembleDouble(uHalfSign, |
| 275 | 0, |
| 276 | DOUBLE_EXPONENT_INF_OR_NAN); |
| 277 | } |
| 278 | } else { |
| 279 | /* --- NORMAL NUMBER --- */ |
| 280 | uDoubleSignificand = uHalfSignificand << (DOUBLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS); |
| 281 | dResult = IEEE754_AssembleDouble(uHalfSign, |
| 282 | uDoubleSignificand, |
| 283 | nHalfUnBiasedExponent); |
| 284 | } |
| 285 | |
| 286 | return dResult; |
| 287 | } |
| 288 | |
| 289 | |
| 290 | /** |
| 291 | * @brief Assemble sign, significand and exponent into single precision float. |
| 292 | * |
| 293 | * @param[in] uHalfSign 0 if positive, 1 if negative |
| 294 | * @pararm[in] uHalfSignificand Bits of the significand |
| 295 | * @param[in] nHalfUnBiasedExponent Exponent |
| 296 | * |
| 297 | * This returns the bits for a single-precision float, a binary32 as |
| 298 | * specified in IEEE754. It is returned as a uint64_t rather than a |
| 299 | * uint32_t or a float for convenience of usage. |
| 300 | */ |
| 301 | static uint32_t |
| 302 | IEEE754_AssembleHalf(uint32_t uHalfSign, |
| 303 | uint32_t uHalfSignificand, |
| 304 | int32_t nHalfUnBiasedExponent) |
| 305 | { |
| 306 | uint32_t uHalfUnbiasedExponent; |
| 307 | |
| 308 | uHalfUnbiasedExponent = (uint32_t)(nHalfUnBiasedExponent + HALF_EXPONENT_BIAS); |
| 309 | |
| 310 | return uHalfSignificand | |
| 311 | (uHalfUnbiasedExponent << HALF_EXPONENT_SHIFT) | |
| 312 | (uHalfSign << HALF_SIGN_SHIFT); |
| 313 | } |
| 314 | |
| 315 | |
| 316 | /* Public function; see ieee754.h */ |
| 317 | IEEE754_union |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 318 | IEEE754_SingleToHalf(const float f, const int bNoNaNPayload) |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 319 | { |
| 320 | IEEE754_union result; |
| 321 | uint32_t uDroppedBits; |
| 322 | int32_t nExponentDifference; |
| 323 | int32_t nShiftAmount; |
| 324 | uint32_t uHalfSignificand; |
| 325 | |
| 326 | /* Pull the three parts out of the double-precision float Most work |
| 327 | * is done with uint32_t which helps avoid integer promotions and |
| 328 | * static analyzer complaints. |
| 329 | */ |
| 330 | const uint32_t uSingle = CopyFloatToUint32(f); |
| 331 | const uint32_t uSingleBiasedExponent = (uSingle & SINGLE_EXPONENT_MASK) >> SINGLE_EXPONENT_SHIFT; |
| 332 | const int32_t nSingleUnbiasedExponent = (int32_t)uSingleBiasedExponent - SINGLE_EXPONENT_BIAS; |
| 333 | const uint32_t uSingleSignificand = uSingle & SINGLE_SIGNIFICAND_MASK; |
| 334 | const uint32_t uSingleSign = (uSingle & SINGLE_SIGN_MASK) >> SINGLE_SIGN_SHIFT; |
| 335 | |
| 336 | if(nSingleUnbiasedExponent == SINGLE_EXPONENT_ZERO) { |
| 337 | if(uSingleSignificand == 0) { |
| 338 | /* --- IS ZERO --- */ |
| 339 | result.uSize = IEEE754_UNION_IS_HALF; |
| 340 | result.uValue = IEEE754_AssembleHalf(uSingleSign, |
| 341 | 0, |
| 342 | HALF_EXPONENT_ZERO); |
| 343 | } else { |
| 344 | /* --- IS SINGLE SUBNORMAL --- */ |
| 345 | /* The largest single subnormal is slightly less than the |
| 346 | * largest single normal which is 2^-149 or |
| 347 | * 2.2040517676619426e-38. The smallest half subnormal is |
| 348 | * 2^-14 or 5.9604644775390625E-8. There is no overlap so |
| 349 | * single subnormals can't be converted to halfs of any sort. |
| 350 | */ |
| 351 | result.uSize = IEEE754_UNION_IS_SINGLE; |
| 352 | result.uValue = uSingle; |
| 353 | } |
| 354 | } else if(nSingleUnbiasedExponent == SINGLE_EXPONENT_INF_OR_NAN) { |
| 355 | if(uSingleSignificand == 0) { |
| 356 | /* ---- IS INFINITY ---- */ |
| 357 | result.uSize = IEEE754_UNION_IS_HALF; |
| 358 | result.uValue = IEEE754_AssembleHalf(uSingleSign, 0, HALF_EXPONENT_INF_OR_NAN); |
| 359 | } else { |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 360 | if(bNoNaNPayload) { |
| 361 | /* --- REQUIRE CANNONICAL NAN --- */ |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 362 | result.uSize = IEEE754_UNION_IS_HALF; |
| 363 | result.uValue = IEEE754_AssembleHalf(uSingleSign, |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 364 | HALF_QUIET_NAN_BIT, |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 365 | HALF_EXPONENT_INF_OR_NAN); |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 366 | } else { |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 367 | /* The NaN can only be converted if no payload bits are lost |
| 368 | * per RFC 8949 section 4.1 that defines Preferred |
| 369 | * Serializaton. Note that Deterministically Encode CBOR in |
| 370 | * section 4.2 allows for some variation of this rule, but at |
| 371 | * the moment this implementation is of Preferred |
| 372 | * Serialization, not CDE. As of December 2023, we are also |
| 373 | * expecting an update to CDE. This code may need to be |
| 374 | * updated for CDE. |
| 375 | */ |
| 376 | uDroppedBits = uSingleSignificand & (SINGLE_SIGNIFICAND_MASK >> HALF_NUM_SIGNIFICAND_BITS); |
| 377 | if(uDroppedBits == 0) { |
| 378 | /* --- IS CONVERTABLE NAN --- */ |
| 379 | uHalfSignificand = uSingleSignificand >> (SINGLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS); |
| 380 | result.uSize = IEEE754_UNION_IS_HALF; |
| 381 | result.uValue = IEEE754_AssembleHalf(uSingleSign, |
| 382 | uHalfSignificand, |
| 383 | HALF_EXPONENT_INF_OR_NAN); |
| 384 | |
| 385 | } else { |
| 386 | /* --- IS UNCONVERTABLE NAN --- */ |
| 387 | result.uSize = IEEE754_UNION_IS_SINGLE; |
| 388 | result.uValue = uSingle; |
| 389 | } |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 390 | } |
| 391 | } |
| 392 | } else { |
| 393 | /* ---- REGULAR NUMBER ---- */ |
| 394 | /* A regular single can be converted to a regular half if the |
| 395 | * single's exponent is in the smaller range of a half and if no |
| 396 | * precision is lost in the significand. |
| 397 | */ |
| 398 | if(nSingleUnbiasedExponent >= HALF_EXPONENT_MIN && |
| 399 | nSingleUnbiasedExponent <= HALF_EXPONENT_MAX && |
| 400 | (uSingleSignificand & (SINGLE_SIGNIFICAND_MASK >> HALF_NUM_SIGNIFICAND_BITS)) == 0) { |
| 401 | uHalfSignificand = uSingleSignificand >> (SINGLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS); |
| 402 | |
| 403 | /* --- CONVERT TO HALF NORMAL --- */ |
| 404 | result.uSize = IEEE754_UNION_IS_HALF; |
| 405 | result.uValue = IEEE754_AssembleHalf(uSingleSign, |
| 406 | uHalfSignificand, |
| 407 | nSingleUnbiasedExponent); |
| 408 | } else { |
| 409 | /* Unable to convert to a half normal. See if it can be |
| 410 | * converted to a half subnormal. To do that, the exponent |
| 411 | * must be in range and no precision can be lost in the |
| 412 | * signficand. |
| 413 | * |
| 414 | * This is more complicated because the number is not |
| 415 | * normalized. The signficand must be shifted proprotionally |
| 416 | * to the exponent and 1 must be added in. See |
| 417 | * https://en.wikipedia.org/wiki/Single-precision_floating-point_format#Exponent_encoding |
| 418 | * |
| 419 | * Exponents -14 to -24 map to a shift of 0 to 10 of the |
| 420 | * significand. The largest value of a half subnormal has an |
| 421 | * exponent of -14. Subnormals are not normalized like |
| 422 | * normals meaning they lose precision as the numbers get |
| 423 | * smaller. Normals don't lose precision because the exponent |
| 424 | * allows all the bits of the significand to be significant. |
| 425 | */ |
| 426 | /* The exponent of the largest possible half-precision |
| 427 | * subnormal is HALF_EXPONENT_MIN (-14). Exponents larger |
| 428 | * than this are normal and handled above. We're going to |
| 429 | * shift the significand right by at least this amount. |
| 430 | */ |
| 431 | nExponentDifference = -(nSingleUnbiasedExponent - HALF_EXPONENT_MIN); |
| 432 | |
| 433 | /* In addition to the shift based on the exponent's value, |
| 434 | * the single significand has to be shifted right to fit into |
| 435 | * a half-precision significand */ |
| 436 | nShiftAmount = nExponentDifference + (SINGLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS); |
| 437 | |
| 438 | /* Must add 1 in to the possible significand because there is |
| 439 | * an implied 1 for normal values and not for subnormal |
| 440 | * values. See equations here: |
| 441 | * https://en.wikipedia.org/wiki/Single-precision_floating-point_format#Exponent_encoding |
| 442 | */ |
| 443 | uHalfSignificand = (uSingleSignificand + (1 << SINGLE_NUM_SIGNIFICAND_BITS)) >> nShiftAmount; |
| 444 | |
| 445 | /* If only zero bits get shifted out, this can be converted |
| 446 | * to subnormal */ |
| 447 | if(nSingleUnbiasedExponent < HALF_EXPONENT_MIN && |
| 448 | nSingleUnbiasedExponent >= HALF_EXPONENT_MIN - HALF_NUM_SIGNIFICAND_BITS && |
| 449 | uHalfSignificand << nShiftAmount == uSingleSignificand + (1 << SINGLE_NUM_SIGNIFICAND_BITS)) { |
| 450 | /* --- CONVERTABLE TO HALF SUBNORMAL --- */ |
| 451 | result.uSize = IEEE754_UNION_IS_HALF; |
| 452 | result.uValue = IEEE754_AssembleHalf(uSingleSign, |
| 453 | uHalfSignificand, |
| 454 | HALF_EXPONENT_ZERO); |
| 455 | } else { |
| 456 | /* --- DO NOT CONVERT --- */ |
| 457 | result.uSize = IEEE754_UNION_IS_SINGLE; |
| 458 | result.uValue = uSingle; |
| 459 | } |
| 460 | } |
| 461 | } |
| 462 | |
| 463 | return result; |
| 464 | } |
| 465 | |
| 466 | |
| 467 | /** |
| 468 | * @brief Assemble sign, significand and exponent into single precision float. |
| 469 | * |
| 470 | * @param[in] uSingleSign 0 if positive, 1 if negative |
| 471 | * @pararm[in] uSingleSignificand Bits of the significand |
| 472 | * @param[in] nSingleUnBiasedExponent Exponent |
| 473 | * |
| 474 | * This returns the bits for a single-precision float, a binary32 as |
| 475 | * specified in IEEE754. It is returned as a uint64_t rather than a |
| 476 | * uint32_t or a float for convenience of usage. |
| 477 | */ |
| 478 | static uint64_t |
| 479 | IEEE754_AssembleSingle(uint64_t uSingleSign, |
| 480 | uint64_t uSingleSignificand, |
| 481 | int64_t nSingleUnBiasedExponent) |
| 482 | { |
| 483 | uint64_t uSingleBiasedExponent; |
| 484 | |
| 485 | uSingleBiasedExponent = (uint64_t)(nSingleUnBiasedExponent + SINGLE_EXPONENT_BIAS); |
| 486 | |
| 487 | return uSingleSignificand | |
| 488 | (uSingleBiasedExponent << SINGLE_EXPONENT_SHIFT) | |
| 489 | (uSingleSign << SINGLE_SIGN_SHIFT); |
| 490 | } |
| 491 | |
| 492 | |
| 493 | /** |
| 494 | * @brief Convert a double-precision float to single-precision. |
| 495 | * |
| 496 | * @param[in] d The value to convert. |
| 497 | * |
| 498 | * @returns Either unconverted value or value converted to single-precision. |
| 499 | * |
| 500 | * This always succeeds. If the value cannot be converted without the |
| 501 | * loss of precision, it is not converted. |
| 502 | * |
| 503 | * This handles all subnormals and NaN payloads. |
| 504 | */ |
| 505 | static IEEE754_union |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 506 | IEEE754_DoubleToSingle(const double d) |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 507 | { |
| 508 | IEEE754_union Result; |
| 509 | int64_t nExponentDifference; |
| 510 | int64_t nShiftAmount; |
| 511 | uint64_t uSingleSignificand; |
| 512 | uint64_t uDroppedBits; |
| 513 | |
| 514 | |
| 515 | /* Pull the three parts out of the double-precision float. Most |
| 516 | * work is done with uint64_t which helps avoid integer promotions |
| 517 | * and static analyzer complaints. |
| 518 | */ |
| 519 | const uint64_t uDouble = CopyDoubleToUint64(d); |
| 520 | const uint64_t uDoubleBiasedExponent = (uDouble & DOUBLE_EXPONENT_MASK) >> DOUBLE_EXPONENT_SHIFT; |
| 521 | const int64_t nDoubleUnbiasedExponent = (int64_t)uDoubleBiasedExponent - DOUBLE_EXPONENT_BIAS; |
| 522 | const uint64_t uDoubleSign = (uDouble & DOUBLE_SIGN_MASK) >> DOUBLE_SIGN_SHIFT; |
| 523 | const uint64_t uDoubleSignificand = uDouble & DOUBLE_SIGNIFICAND_MASK; |
| 524 | |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 525 | if(nDoubleUnbiasedExponent == DOUBLE_EXPONENT_ZERO) { |
| 526 | if(uDoubleSignificand == 0) { |
| 527 | /* --- IS ZERO --- */ |
| 528 | Result.uSize = IEEE754_UNION_IS_SINGLE; |
| 529 | Result.uValue = IEEE754_AssembleSingle(uDoubleSign, |
| 530 | 0, |
| 531 | SINGLE_EXPONENT_ZERO); |
Laurence Lundblade | 67bd551 | 2018-11-02 21:44:06 +0700 | [diff] [blame] | 532 | } else { |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 533 | /* --- IS DOUBLE SUBNORMAL --- */ |
| 534 | /* The largest double subnormal is slightly less than the |
| 535 | * largest double normal which is 2^-1022 or |
| 536 | * 2.2250738585072014e-308. The smallest single subnormal |
| 537 | * is 2^-149 or 1.401298464324817e-45. There is no |
| 538 | * overlap so double subnormals can't be converted to |
| 539 | * singles of any sort. |
| 540 | */ |
| 541 | Result.uSize = IEEE754_UNION_IS_DOUBLE; |
| 542 | Result.uValue = uDouble; |
| 543 | } |
| 544 | } else if(nDoubleUnbiasedExponent == DOUBLE_EXPONENT_INF_OR_NAN) { |
| 545 | if(uDoubleSignificand == 0) { |
| 546 | /* ---- IS INFINITY ---- */ |
| 547 | Result.uSize = IEEE754_UNION_IS_SINGLE; |
| 548 | Result.uValue = IEEE754_AssembleSingle(uDoubleSign, |
| 549 | 0, |
| 550 | SINGLE_EXPONENT_INF_OR_NAN); |
| 551 | } else { |
| 552 | /* The NaN can only be converted if no payload bits are |
| 553 | * lost per RFC 8949 section 4.1 that defines Preferred |
| 554 | * Serializaton. Note that Deterministically Encode CBOR |
| 555 | * in section 4.2 allows for some variation of this rule, |
| 556 | * but at the moment this implementation is of Preferred |
| 557 | * Serialization, not CDE. As of December 2023, we are |
| 558 | * also expecting an update to CDE. This code may need to |
| 559 | * be updated for CDE. |
| 560 | */ |
| 561 | uDroppedBits = uDoubleSignificand & (DOUBLE_SIGNIFICAND_MASK >> SINGLE_NUM_SIGNIFICAND_BITS); |
| 562 | if(uDroppedBits == 0) { |
| 563 | /* --- IS CONVERTABLE NAN --- */ |
| 564 | uSingleSignificand = uDoubleSignificand >> (DOUBLE_NUM_SIGNIFICAND_BITS - SINGLE_NUM_SIGNIFICAND_BITS); |
| 565 | Result.uSize = IEEE754_UNION_IS_SINGLE; |
| 566 | Result.uValue = IEEE754_AssembleSingle(uDoubleSign, |
| 567 | uSingleSignificand, |
| 568 | SINGLE_EXPONENT_INF_OR_NAN); |
| 569 | } else { |
| 570 | /* --- IS UNCONVERTABLE NAN --- */ |
| 571 | Result.uSize = IEEE754_UNION_IS_DOUBLE; |
| 572 | Result.uValue = uDouble; |
Laurence Lundblade | 67bd551 | 2018-11-02 21:44:06 +0700 | [diff] [blame] | 573 | } |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 574 | } |
| 575 | } else { |
| 576 | /* ---- REGULAR NUMBER ---- */ |
| 577 | /* A regular double can be converted to a regular single if |
| 578 | * the double's exponent is in the smaller range of a single |
| 579 | * and if no precision is lost in the significand. |
| 580 | */ |
| 581 | uDroppedBits = uDoubleSignificand & (DOUBLE_SIGNIFICAND_MASK >> SINGLE_NUM_SIGNIFICAND_BITS); |
| 582 | if(nDoubleUnbiasedExponent >= SINGLE_EXPONENT_MIN && |
| 583 | nDoubleUnbiasedExponent <= SINGLE_EXPONENT_MAX && |
| 584 | uDroppedBits == 0) { |
| 585 | /* --- IS CONVERTABLE TO SINGLE --- */ |
| 586 | uSingleSignificand = uDoubleSignificand >> (DOUBLE_NUM_SIGNIFICAND_BITS - SINGLE_NUM_SIGNIFICAND_BITS); |
| 587 | Result.uSize = IEEE754_UNION_IS_SINGLE; |
| 588 | Result.uValue = IEEE754_AssembleSingle(uDoubleSign, |
| 589 | uSingleSignificand, |
| 590 | nDoubleUnbiasedExponent); |
Laurence Lundblade | 67bd551 | 2018-11-02 21:44:06 +0700 | [diff] [blame] | 591 | } else { |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 592 | /* Unable to convert to a single normal. See if it can be |
| 593 | * converted to a single subnormal. To do that, the |
| 594 | * exponent must be in range and no precision can be lost |
| 595 | * in the signficand. |
| 596 | * |
| 597 | * This is more complicated because the number is not |
| 598 | * normalized. The signficand must be shifted |
| 599 | * proprotionally to the exponent and 1 must be added |
| 600 | * in. See |
| 601 | * https://en.wikipedia.org/wiki/Single-precision_floating-point_format#Exponent_encoding |
| 602 | */ |
| 603 | nExponentDifference = -(nDoubleUnbiasedExponent - SINGLE_EXPONENT_MIN); |
| 604 | nShiftAmount = nExponentDifference + (DOUBLE_NUM_SIGNIFICAND_BITS - SINGLE_NUM_SIGNIFICAND_BITS); |
| 605 | uSingleSignificand = (uDoubleSignificand + (1ULL << DOUBLE_NUM_SIGNIFICAND_BITS)) >> nShiftAmount; |
| 606 | |
| 607 | if(nDoubleUnbiasedExponent < SINGLE_EXPONENT_MIN && |
| 608 | nDoubleUnbiasedExponent >= SINGLE_EXPONENT_MIN - SINGLE_NUM_SIGNIFICAND_BITS && |
| 609 | uSingleSignificand << nShiftAmount == uDoubleSignificand + (1ULL << DOUBLE_NUM_SIGNIFICAND_BITS)) { |
| 610 | /* --- IS CONVERTABLE TO SINGLE SUBNORMAL --- */ |
| 611 | Result.uSize = IEEE754_UNION_IS_SINGLE; |
| 612 | Result.uValue = IEEE754_AssembleSingle(uDoubleSign, |
| 613 | uSingleSignificand, |
| 614 | SINGLE_EXPONENT_ZERO); |
| 615 | } else { |
| 616 | /* --- CAN NOT BE CONVERTED --- */ |
| 617 | Result.uSize = IEEE754_UNION_IS_DOUBLE; |
| 618 | Result.uValue = uDouble; |
| 619 | } |
Laurence Lundblade | 67bd551 | 2018-11-02 21:44:06 +0700 | [diff] [blame] | 620 | } |
Laurence Lundblade | 67bd551 | 2018-11-02 21:44:06 +0700 | [diff] [blame] | 621 | } |
Laurence Lundblade | 3aee3a3 | 2018-12-17 16:17:45 -0800 | [diff] [blame] | 622 | |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 623 | return Result; |
Laurence Lundblade | 67bd551 | 2018-11-02 21:44:06 +0700 | [diff] [blame] | 624 | } |
Laurence Lundblade | 12d32c5 | 2018-09-19 11:25:27 -0700 | [diff] [blame] | 625 | |
| 626 | |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 627 | /* Public function; see ieee754.h */ |
| 628 | IEEE754_union |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 629 | IEEE754_DoubleToSmaller(const double d, |
| 630 | const int bAllowHalfPrecision, |
| 631 | const int bNoNanPayload) |
Laurence Lundblade | 12d32c5 | 2018-09-19 11:25:27 -0700 | [diff] [blame] | 632 | { |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 633 | IEEE754_union result; |
Laurence Lundblade | 3aee3a3 | 2018-12-17 16:17:45 -0800 | [diff] [blame] | 634 | |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 635 | result = IEEE754_DoubleToSingle(d); |
Laurence Lundblade | 3aee3a3 | 2018-12-17 16:17:45 -0800 | [diff] [blame] | 636 | |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 637 | if(result.uSize == IEEE754_UNION_IS_SINGLE && bAllowHalfPrecision) { |
| 638 | /* Cast to uint32_t is OK, because value was just successfully |
| 639 | * converted to single. */ |
| 640 | float uSingle = CopyUint32ToSingle((uint32_t)result.uValue); |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 641 | result = IEEE754_SingleToHalf(uSingle, bNoNanPayload); |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 642 | } |
Laurence Lundblade | 12d32c5 | 2018-09-19 11:25:27 -0700 | [diff] [blame] | 643 | |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 644 | return result; |
Laurence Lundblade | 12d32c5 | 2018-09-19 11:25:27 -0700 | [diff] [blame] | 645 | } |
| 646 | |
Laurence Lundblade | 3aee3a3 | 2018-12-17 16:17:45 -0800 | [diff] [blame] | 647 | |
Laurence Lundblade | d883ad3 | 2024-03-23 22:37:37 -0700 | [diff] [blame] | 648 | |
| 649 | /* This returns 64 minus the number of zero bits on the right. It is |
| 650 | * is the amount of precision in the 64-bit significand passed in. |
| 651 | * When used for 52 and 23-bit significands, subtract 12 and 41 |
| 652 | * to get their precision. |
| 653 | * |
| 654 | * The value returned is for a *normalized* number like the |
| 655 | * significand of a double. When used for precision for a non-normalized |
| 656 | * number like a uint64_t, further computation is required. |
| 657 | * |
| 658 | * If the significand is 0, then 0 is returned as the precision.*/ |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 659 | static int |
Laurence Lundblade | d883ad3 | 2024-03-23 22:37:37 -0700 | [diff] [blame] | 660 | IEEE754_Private_CountPrecisionBits(uint64_t uSignigicand) |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 661 | { |
| 662 | int nNonZeroBitsCount; |
| 663 | uint64_t uMask; |
Laurence Lundblade | 3aee3a3 | 2018-12-17 16:17:45 -0800 | [diff] [blame] | 664 | |
Laurence Lundblade | d883ad3 | 2024-03-23 22:37:37 -0700 | [diff] [blame] | 665 | for(nNonZeroBitsCount = 64; nNonZeroBitsCount > 0; nNonZeroBitsCount--) { |
| 666 | uMask = 0x01UL << (64 - nNonZeroBitsCount); |
| 667 | if(uMask & uSignigicand) { |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 668 | break; |
| 669 | } |
| 670 | } |
Laurence Lundblade | d883ad3 | 2024-03-23 22:37:37 -0700 | [diff] [blame] | 671 | |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 672 | return nNonZeroBitsCount; |
| 673 | } |
| 674 | |
| 675 | |
Laurence Lundblade | d883ad3 | 2024-03-23 22:37:37 -0700 | [diff] [blame] | 676 | |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 677 | /* Public function; see ieee754.h */ |
| 678 | struct IEEE754_ToInt |
| 679 | IEEE754_DoubleToInt(const double d) |
| 680 | { |
Laurence Lundblade | d883ad3 | 2024-03-23 22:37:37 -0700 | [diff] [blame] | 681 | int64_t nPrecisionBits; |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 682 | struct IEEE754_ToInt Result; |
| 683 | uint64_t uInteger; |
| 684 | |
| 685 | /* Pull the three parts out of the double-precision float. Most |
| 686 | * work is done with uint64_t which helps avoid integer promotions |
| 687 | * and static analyzer complaints. |
| 688 | */ |
| 689 | const uint64_t uDouble = CopyDoubleToUint64(d); |
| 690 | const uint64_t uDoubleBiasedExponent = (uDouble & DOUBLE_EXPONENT_MASK) >> DOUBLE_EXPONENT_SHIFT; |
| 691 | /* Cast safe because of mask above; exponents < DOUBLE_EXPONENT_MAX */ |
| 692 | const int64_t nDoubleUnbiasedExponent = (int64_t)uDoubleBiasedExponent - DOUBLE_EXPONENT_BIAS; |
| 693 | const uint64_t uDoubleSignificand = uDouble & DOUBLE_SIGNIFICAND_MASK; |
Laurence Lundblade | 3d0785e | 2024-04-22 10:31:40 -0700 | [diff] [blame] | 694 | const uint64_t bIsNegative = uDouble & DOUBLE_SIGN_MASK; |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 695 | |
| 696 | if(nDoubleUnbiasedExponent == DOUBLE_EXPONENT_ZERO) { |
| 697 | if(uDoubleSignificand == 0) { |
| 698 | /* --- POSITIVE AND NEGATIVE ZERO --- */ |
| 699 | Result.integer.un_signed = 0; |
| 700 | Result.type = IEEE754_ToInt_IS_UINT; |
| 701 | } else { |
| 702 | /* --- SUBNORMAL --- */ |
| 703 | Result.type = IEEE754_ToInt_NO_CONVERSION; |
| 704 | } |
| 705 | } else if(nDoubleUnbiasedExponent == DOUBLE_EXPONENT_INF_OR_NAN) { |
| 706 | if(uDoubleSignificand != 0) { |
| 707 | /* --- NAN --- */ |
| 708 | Result.type = IEEE754_ToInt_NaN; /* dCBOR doesn't care about payload */ |
| 709 | } else { |
Laurence Lundblade | 3d0785e | 2024-04-22 10:31:40 -0700 | [diff] [blame] | 710 | /* --- INFINITY --- */ |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 711 | Result.type = IEEE754_ToInt_NO_CONVERSION; |
| 712 | } |
Laurence Lundblade | 3d0785e | 2024-04-22 10:31:40 -0700 | [diff] [blame] | 713 | } else if(nDoubleUnbiasedExponent < 0) { |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 714 | /* --- Exponent out of range --- */ |
| 715 | Result.type = IEEE754_ToInt_NO_CONVERSION; |
Laurence Lundblade | 3d0785e | 2024-04-22 10:31:40 -0700 | [diff] [blame] | 716 | } else if(nDoubleUnbiasedExponent >= 64) { |
| 717 | if(nDoubleUnbiasedExponent == 64 && uDoubleSignificand == 0 && bIsNegative) { |
| 718 | /* Very special case for -18446744073709551616.0 */ |
| 719 | Result.integer.un_signed = 0; /* No negative 0, use it to indicate 2^64 */ |
| 720 | Result.type = IEEE754_ToInt_IS_65BIT_NEG; |
| 721 | } else { |
| 722 | /* --- Exponent out of range --- */ |
| 723 | Result.type = IEEE754_ToInt_NO_CONVERSION; |
| 724 | } |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 725 | } else { |
Laurence Lundblade | d883ad3 | 2024-03-23 22:37:37 -0700 | [diff] [blame] | 726 | /* Conversion only fails when the input is too large or is not a |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 727 | * whole number, never because of lack of precision because |
| 728 | * 64-bit integers always have more precision than the 52-bits |
| 729 | * of a double. |
| 730 | */ |
Laurence Lundblade | d883ad3 | 2024-03-23 22:37:37 -0700 | [diff] [blame] | 731 | nPrecisionBits = IEEE754_Private_CountPrecisionBits(uDoubleSignificand) - |
| 732 | (64-DOUBLE_NUM_SIGNIFICAND_BITS); |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 733 | |
Laurence Lundblade | d883ad3 | 2024-03-23 22:37:37 -0700 | [diff] [blame] | 734 | if(nPrecisionBits && nPrecisionBits > nDoubleUnbiasedExponent) { |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 735 | /* --- Not a whole number --- */ |
| 736 | Result.type = IEEE754_ToInt_NO_CONVERSION; |
| 737 | } else { |
| 738 | /* --- CONVERTABLE WHOLE NUMBER --- */ |
| 739 | /* Add in the one that is implied in normal floats */ |
| 740 | uInteger = uDoubleSignificand + (1ULL << DOUBLE_NUM_SIGNIFICAND_BITS); |
| 741 | /* Factor in the exponent */ |
| 742 | if(nDoubleUnbiasedExponent < DOUBLE_NUM_SIGNIFICAND_BITS) { |
| 743 | /* Numbers less than 2^52 with up to 52 significant bits */ |
| 744 | uInteger >>= DOUBLE_NUM_SIGNIFICAND_BITS - nDoubleUnbiasedExponent; |
| 745 | } else { |
| 746 | /* Numbers greater than 2^52 with at most 52 significant bits */ |
| 747 | uInteger <<= nDoubleUnbiasedExponent - DOUBLE_NUM_SIGNIFICAND_BITS; |
| 748 | } |
Laurence Lundblade | 3d0785e | 2024-04-22 10:31:40 -0700 | [diff] [blame] | 749 | if(bIsNegative) { |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 750 | /* Cast safe because exponent range check above */ |
Laurence Lundblade | 3d0785e | 2024-04-22 10:31:40 -0700 | [diff] [blame] | 751 | if(nDoubleUnbiasedExponent == 63) { |
| 752 | Result.integer.un_signed = uInteger; |
| 753 | Result.type = IEEE754_ToInt_IS_65BIT_NEG; |
| 754 | } else { |
| 755 | Result.integer.is_signed = -((int64_t)uInteger); |
| 756 | Result.type = IEEE754_ToInt_IS_INT; |
| 757 | } |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 758 | } else { |
| 759 | Result.integer.un_signed = uInteger; |
| 760 | Result.type = IEEE754_ToInt_IS_UINT; |
| 761 | } |
| 762 | } |
| 763 | } |
| 764 | |
| 765 | return Result; |
| 766 | } |
| 767 | |
| 768 | |
| 769 | /* Public function; see ieee754.h */ |
| 770 | struct IEEE754_ToInt |
| 771 | IEEE754_SingleToInt(const float f) |
| 772 | { |
Laurence Lundblade | d883ad3 | 2024-03-23 22:37:37 -0700 | [diff] [blame] | 773 | int32_t nPrecisionBits; |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 774 | struct IEEE754_ToInt Result; |
| 775 | uint64_t uInteger; |
| 776 | |
| 777 | /* Pull the three parts out of the single-precision float. Most |
| 778 | * work is done with uint32_t which helps avoid integer promotions |
| 779 | * and static analyzer complaints. |
| 780 | */ |
| 781 | const uint32_t uSingle = CopyFloatToUint32(f); |
| 782 | const uint32_t uSingleBiasedExponent = (uSingle & SINGLE_EXPONENT_MASK) >> SINGLE_EXPONENT_SHIFT; |
| 783 | /* Cast safe because of mask above; exponents < SINGLE_EXPONENT_MAX */ |
| 784 | const int32_t nSingleUnbiasedExponent = (int32_t)uSingleBiasedExponent - SINGLE_EXPONENT_BIAS; |
| 785 | const uint32_t uSingleleSignificand = uSingle & SINGLE_SIGNIFICAND_MASK; |
Laurence Lundblade | 3d0785e | 2024-04-22 10:31:40 -0700 | [diff] [blame] | 786 | const uint64_t bIsNegative = uSingle & SINGLE_SIGN_MASK; |
| 787 | |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 788 | |
| 789 | if(nSingleUnbiasedExponent == SINGLE_EXPONENT_ZERO) { |
| 790 | if(uSingleleSignificand == 0 && !(uSingle & SINGLE_SIGN_MASK)) { |
| 791 | /* --- POSITIVE AND NEGATIVE ZERO --- */ |
| 792 | Result.integer.un_signed = 0; |
| 793 | Result.type = IEEE754_ToInt_IS_UINT; |
| 794 | } else { |
| 795 | /* --- Subnormal --- */ |
| 796 | Result.type = IEEE754_ToInt_NO_CONVERSION; |
| 797 | } |
| 798 | } else if(nSingleUnbiasedExponent == SINGLE_EXPONENT_INF_OR_NAN) { |
| 799 | /* --- NAN or INFINITY --- */ |
| 800 | if(uSingleleSignificand != 0) { |
| 801 | Result.type = IEEE754_ToInt_NaN; /* dCBOR doesn't care about payload */ |
| 802 | } else { |
| 803 | Result.type = IEEE754_ToInt_NO_CONVERSION; |
| 804 | } |
Laurence Lundblade | 3d0785e | 2024-04-22 10:31:40 -0700 | [diff] [blame] | 805 | } else if(nSingleUnbiasedExponent < 0) { |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 806 | /* --- Exponent out of range --- */ |
| 807 | Result.type = IEEE754_ToInt_NO_CONVERSION; |
Laurence Lundblade | 3d0785e | 2024-04-22 10:31:40 -0700 | [diff] [blame] | 808 | } else if(nSingleUnbiasedExponent >= 64) { |
| 809 | if(nSingleUnbiasedExponent == 64 && uSingleleSignificand == 0 && bIsNegative) { |
| 810 | /* Very special case for -18446744073709551616.0 */ |
| 811 | Result.integer.un_signed = 0; /* No negative 0, use it to indicate 2^64 */ |
| 812 | Result.type = IEEE754_ToInt_IS_65BIT_NEG; |
| 813 | } else { |
| 814 | /* --- Exponent out of range --- */ |
| 815 | Result.type = IEEE754_ToInt_NO_CONVERSION; |
| 816 | } |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 817 | } else { |
Laurence Lundblade | d883ad3 | 2024-03-23 22:37:37 -0700 | [diff] [blame] | 818 | /* Conversion only fails when the input is too large or is not a |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 819 | * whole number, never because of lack of precision because |
Laurence Lundblade | d883ad3 | 2024-03-23 22:37:37 -0700 | [diff] [blame] | 820 | * 64-bit integers always have more precision than the 23 bits |
| 821 | * of a single. |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 822 | */ |
Laurence Lundblade | d883ad3 | 2024-03-23 22:37:37 -0700 | [diff] [blame] | 823 | nPrecisionBits = IEEE754_Private_CountPrecisionBits(uSingleleSignificand) - |
| 824 | (64 - SINGLE_NUM_SIGNIFICAND_BITS); |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 825 | |
Laurence Lundblade | d883ad3 | 2024-03-23 22:37:37 -0700 | [diff] [blame] | 826 | if(nPrecisionBits && nPrecisionBits > nSingleUnbiasedExponent) { |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 827 | /* --- Not a whole number --- */ |
| 828 | Result.type = IEEE754_ToInt_NO_CONVERSION; |
| 829 | } else { |
| 830 | /* --- CONVERTABLE WHOLE NUMBER --- */ |
| 831 | /* Add in the one that is implied in normal floats */ |
| 832 | uInteger = uSingleleSignificand + (1ULL << SINGLE_NUM_SIGNIFICAND_BITS); |
| 833 | /* Factor in the exponent */ |
| 834 | if(nSingleUnbiasedExponent < SINGLE_NUM_SIGNIFICAND_BITS) { |
| 835 | /* Numbers less than 2^23 with up to 23 significant bits */ |
| 836 | uInteger >>= SINGLE_NUM_SIGNIFICAND_BITS - nSingleUnbiasedExponent; |
| 837 | } else { |
| 838 | /* Numbers greater than 2^23 with at most 23 significant bits*/ |
| 839 | uInteger <<= nSingleUnbiasedExponent - SINGLE_NUM_SIGNIFICAND_BITS; |
| 840 | } |
Laurence Lundblade | 3d0785e | 2024-04-22 10:31:40 -0700 | [diff] [blame] | 841 | if(bIsNegative) { |
| 842 | /* Cast safe because exponent range check above */ |
| 843 | if(nSingleUnbiasedExponent == 63) { |
| 844 | Result.integer.un_signed = uInteger; |
| 845 | Result.type = IEEE754_ToInt_IS_65BIT_NEG; |
| 846 | } else { |
| 847 | Result.integer.is_signed = -((int64_t)uInteger); |
| 848 | Result.type = IEEE754_ToInt_IS_INT; |
| 849 | } |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 850 | } else { |
| 851 | Result.integer.un_signed = uInteger; |
| 852 | Result.type = IEEE754_ToInt_IS_UINT; |
| 853 | } |
| 854 | } |
| 855 | } |
| 856 | |
| 857 | return Result; |
| 858 | } |
Laurence Lundblade | fe09bbf | 2020-07-16 12:14:51 -0700 | [diff] [blame] | 859 | |
Laurence Lundblade | d883ad3 | 2024-03-23 22:37:37 -0700 | [diff] [blame] | 860 | |
| 861 | |
| 862 | /* Public function; see ieee754.h */ |
| 863 | double |
| 864 | IEEE754_UintToDouble(const uint64_t uInt, const int uIsNegative) |
| 865 | { |
| 866 | int nDoubleUnbiasedExponent; |
| 867 | uint64_t uDoubleSignificand; |
| 868 | int nPrecisionBits; |
| 869 | |
Laurence Lundblade | 0bfa3cb | 2024-04-21 13:14:24 -0700 | [diff] [blame] | 870 | if(uInt == 0) { |
Laurence Lundblade | 9d04d2b | 2024-07-24 16:57:43 -0700 | [diff] [blame] | 871 | uDoubleSignificand = 0; |
| 872 | nDoubleUnbiasedExponent = DOUBLE_EXPONENT_ZERO; |
| 873 | |
Laurence Lundblade | 0bfa3cb | 2024-04-21 13:14:24 -0700 | [diff] [blame] | 874 | } else { |
| 875 | /* Figure out the exponent and normalize the significand. This is |
| 876 | * done by shifting out all leading zero bits and counting them. If |
| 877 | * none are shifted out, the exponent is 63. */ |
| 878 | uDoubleSignificand = uInt; |
| 879 | nDoubleUnbiasedExponent = 63; |
| 880 | while(1) { |
| 881 | if(uDoubleSignificand & 0x8000000000000000UL) { |
| 882 | break; |
| 883 | } |
| 884 | uDoubleSignificand <<= 1; |
| 885 | nDoubleUnbiasedExponent--; |
| 886 | }; |
| 887 | |
| 888 | /* Position significand correctly for a double. Only shift 63 bits |
| 889 | * because of the 1 that is present by implication in IEEE 754.*/ |
| 890 | uDoubleSignificand >>= 63 - DOUBLE_NUM_SIGNIFICAND_BITS; |
| 891 | |
| 892 | /* Subtract 1 which is present by implication in IEEE 754 */ |
| 893 | uDoubleSignificand -= 1ULL << (DOUBLE_NUM_SIGNIFICAND_BITS); |
| 894 | |
| 895 | nPrecisionBits = IEEE754_Private_CountPrecisionBits(uInt) - (64 - nDoubleUnbiasedExponent); |
| 896 | |
| 897 | if(nPrecisionBits > DOUBLE_NUM_SIGNIFICAND_BITS) { |
| 898 | /* Will lose precision if converted */ |
| 899 | return IEEE754_UINT_TO_DOUBLE_OOB; |
Laurence Lundblade | d883ad3 | 2024-03-23 22:37:37 -0700 | [diff] [blame] | 900 | } |
Laurence Lundblade | d883ad3 | 2024-03-23 22:37:37 -0700 | [diff] [blame] | 901 | } |
| 902 | |
| 903 | return IEEE754_AssembleDouble((uint64_t)uIsNegative, |
| 904 | uDoubleSignificand, |
| 905 | nDoubleUnbiasedExponent); |
| 906 | } |
| 907 | |
Laurence Lundblade | b275cdc | 2020-07-12 12:34:38 -0700 | [diff] [blame] | 908 | #endif /* QCBOR_DISABLE_PREFERRED_FLOAT */ |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 909 | |
| 910 | |
| 911 | |
| 912 | /* Public function; see ieee754.h */ |
| 913 | int |
Laurence Lundblade | 9b2ae8a | 2024-07-12 11:00:20 -0700 | [diff] [blame] | 914 | IEEE754_DoubleHasNaNPayload(const double d) |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 915 | { |
| 916 | const uint64_t uDouble = CopyDoubleToUint64(d); |
| 917 | const uint64_t uDoubleBiasedExponent = (uDouble & DOUBLE_EXPONENT_MASK) >> DOUBLE_EXPONENT_SHIFT; |
| 918 | /* Cast safe because of mask above; exponents < DOUBLE_EXPONENT_MAX */ |
| 919 | const int64_t nDoubleUnbiasedExponent = (int64_t)uDoubleBiasedExponent - DOUBLE_EXPONENT_BIAS; |
| 920 | const uint64_t uDoubleSignificand = uDouble & DOUBLE_SIGNIFICAND_MASK; |
| 921 | |
| 922 | if(nDoubleUnbiasedExponent == DOUBLE_EXPONENT_INF_OR_NAN && |
| 923 | uDoubleSignificand != 0 && |
| 924 | uDoubleSignificand != DOUBLE_QUIET_NAN_BIT) { |
| 925 | return 1; |
| 926 | } else { |
| 927 | return 0; |
| 928 | } |
| 929 | } |
| 930 | |
| 931 | |
| 932 | /* Public function; see ieee754.h */ |
| 933 | int |
Laurence Lundblade | 9b2ae8a | 2024-07-12 11:00:20 -0700 | [diff] [blame] | 934 | IEEE754_SingleHasNaNPayload(const float f) |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 935 | { |
| 936 | const uint32_t uSingle = CopyFloatToUint32(f); |
| 937 | const uint32_t uSingleBiasedExponent = (uSingle & SINGLE_EXPONENT_MASK) >> SINGLE_EXPONENT_SHIFT; |
| 938 | /* Cast safe because of mask above; exponents < SINGLE_EXPONENT_MAX */ |
| 939 | const int32_t nSingleUnbiasedExponent = (int32_t)uSingleBiasedExponent - SINGLE_EXPONENT_BIAS; |
Laurence Lundblade | 9b2ae8a | 2024-07-12 11:00:20 -0700 | [diff] [blame] | 940 | const uint32_t uSingleSignificand = uSingle & SINGLE_SIGNIFICAND_MASK; |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 941 | |
| 942 | if(nSingleUnbiasedExponent == SINGLE_EXPONENT_INF_OR_NAN && |
Laurence Lundblade | 9b2ae8a | 2024-07-12 11:00:20 -0700 | [diff] [blame] | 943 | uSingleSignificand != 0 && |
| 944 | uSingleSignificand != SINGLE_QUIET_NAN_BIT) { |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 945 | return 1; |
| 946 | } else { |
| 947 | return 0; |
| 948 | } |
| 949 | } |
Laurence Lundblade | 9b2ae8a | 2024-07-12 11:00:20 -0700 | [diff] [blame] | 950 | |