Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 1 | /* ========================================================================== |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 2 | * ieee754.c -- floating-point conversion for half, double & single-precision |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 3 | * |
| 4 | * Copyright (c) 2018-2024, Laurence Lundblade. All rights reserved. |
| 5 | * Copyright (c) 2021, Arm Limited. All rights reserved. |
| 6 | * |
| 7 | * SPDX-License-Identifier: BSD-3-Clause |
| 8 | * |
Laurence Lundblade | e8f5816 | 2024-08-22 10:30:08 -0700 | [diff] [blame] | 9 | * See BSD-3-Clause license in file named "LICENSE" |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 10 | * |
| 11 | * Created on 7/23/18 |
| 12 | * ========================================================================== */ |
Laurence Lundblade | cc2ed34 | 2018-09-22 17:29:55 -0700 | [diff] [blame] | 13 | |
Máté Tóth-Pál | ef5f07a | 2021-09-17 19:31:37 +0200 | [diff] [blame] | 14 | #include "qcbor/qcbor_common.h" |
| 15 | |
Laurence Lundblade | 12d32c5 | 2018-09-19 11:25:27 -0700 | [diff] [blame] | 16 | #include "ieee754.h" |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 17 | #include <string.h> /* For memcpy() */ |
Laurence Lundblade | 12d32c5 | 2018-09-19 11:25:27 -0700 | [diff] [blame] | 18 | |
Laurence Lundblade | 8db3d3e | 2018-09-29 11:46:37 -0700 | [diff] [blame] | 19 | |
Laurence Lundblade | 12d32c5 | 2018-09-19 11:25:27 -0700 | [diff] [blame] | 20 | /* |
Laurence Lundblade | cb7282d | 2024-11-09 23:01:11 -0800 | [diff] [blame] | 21 | * This code has long lines and is easier to read because of |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 22 | * them. Some coding guidelines prefer 80 column lines (can they not |
| 23 | * afford big displays?). |
| 24 | * |
| 25 | * This code works solely using shifts and masks and thus has no |
| 26 | * dependency on any math libraries. It can even work if the CPU |
| 27 | * doesn't have any floating-point support, though that isn't the most |
| 28 | * useful thing to do. |
| 29 | * |
| 30 | * The memcpy() dependency is only for CopyFloatToUint32() and friends |
| 31 | * which only is needed to avoid type punning when converting the |
| 32 | * actual float bits to an unsigned value so the bit shifts and masks |
| 33 | * can work. |
| 34 | * |
| 35 | * The references used to write this code: |
| 36 | * |
| 37 | * IEEE 754-2008, particularly section 3.6 and 6.2.1 |
| 38 | * |
| 39 | * https://en.wikipedia.org/wiki/IEEE_754 and subordinate pages |
| 40 | * |
| 41 | * https://stackoverflow.com/questions/19800415/why-does-ieee-754-reserve-so-many-nan-values |
| 42 | * |
| 43 | * https://stackoverflow.com/questions/46073295/implicit-type-promotion-rules |
| 44 | * |
| 45 | * https://stackoverflow.com/questions/589575/what-does-the-c-standard-state-the-size-of-int-long-type-to-be |
| 46 | * |
| 47 | * IEEE754_FloatToDouble(uint32_t uFloat) was created but is not |
| 48 | * needed. It can be retrieved from github history if needed. |
Laurence Lundblade | 12d32c5 | 2018-09-19 11:25:27 -0700 | [diff] [blame] | 49 | */ |
| 50 | |
| 51 | |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 52 | |
| 53 | |
| 54 | /* ----- Half Precsion ----------- */ |
Laurence Lundblade | 12d32c5 | 2018-09-19 11:25:27 -0700 | [diff] [blame] | 55 | #define HALF_NUM_SIGNIFICAND_BITS (10) |
| 56 | #define HALF_NUM_EXPONENT_BITS (5) |
| 57 | #define HALF_NUM_SIGN_BITS (1) |
| 58 | |
| 59 | #define HALF_SIGNIFICAND_SHIFT (0) |
| 60 | #define HALF_EXPONENT_SHIFT (HALF_NUM_SIGNIFICAND_BITS) |
| 61 | #define HALF_SIGN_SHIFT (HALF_NUM_SIGNIFICAND_BITS + HALF_NUM_EXPONENT_BITS) |
| 62 | |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 63 | #define HALF_SIGNIFICAND_MASK (0x3ffU) // The lower 10 bits |
Laurence Lundblade | 06350ea | 2020-01-27 19:32:40 -0800 | [diff] [blame] | 64 | #define HALF_EXPONENT_MASK (0x1fU << HALF_EXPONENT_SHIFT) // 0x7c00 5 bits of exponent |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 65 | #define HALF_SIGN_MASK (0x01U << HALF_SIGN_SHIFT) // 0x8000 1 bit of sign |
Laurence Lundblade | 06350ea | 2020-01-27 19:32:40 -0800 | [diff] [blame] | 66 | #define HALF_QUIET_NAN_BIT (0x01U << (HALF_NUM_SIGNIFICAND_BITS-1)) // 0x0200 |
Laurence Lundblade | 12d32c5 | 2018-09-19 11:25:27 -0700 | [diff] [blame] | 67 | |
| 68 | /* Biased Biased Unbiased Use |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 69 | * 0x00 0 -15 0 and subnormal |
| 70 | * 0x01 1 -14 Smallest normal exponent |
| 71 | * 0x1e 30 15 Largest normal exponent |
| 72 | * 0x1F 31 16 NaN and Infinity */ |
Laurence Lundblade | 12d32c5 | 2018-09-19 11:25:27 -0700 | [diff] [blame] | 73 | #define HALF_EXPONENT_BIAS (15) |
| 74 | #define HALF_EXPONENT_MAX (HALF_EXPONENT_BIAS) // 15 Unbiased |
| 75 | #define HALF_EXPONENT_MIN (-HALF_EXPONENT_BIAS+1) // -14 Unbiased |
| 76 | #define HALF_EXPONENT_ZERO (-HALF_EXPONENT_BIAS) // -15 Unbiased |
| 77 | #define HALF_EXPONENT_INF_OR_NAN (HALF_EXPONENT_BIAS+1) // 16 Unbiased |
| 78 | |
| 79 | |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 80 | /* ------ Single-Precision -------- */ |
Laurence Lundblade | 12d32c5 | 2018-09-19 11:25:27 -0700 | [diff] [blame] | 81 | #define SINGLE_NUM_SIGNIFICAND_BITS (23) |
| 82 | #define SINGLE_NUM_EXPONENT_BITS (8) |
| 83 | #define SINGLE_NUM_SIGN_BITS (1) |
| 84 | |
| 85 | #define SINGLE_SIGNIFICAND_SHIFT (0) |
| 86 | #define SINGLE_EXPONENT_SHIFT (SINGLE_NUM_SIGNIFICAND_BITS) |
| 87 | #define SINGLE_SIGN_SHIFT (SINGLE_NUM_SIGNIFICAND_BITS + SINGLE_NUM_EXPONENT_BITS) |
| 88 | |
Laurence Lundblade | 06350ea | 2020-01-27 19:32:40 -0800 | [diff] [blame] | 89 | #define SINGLE_SIGNIFICAND_MASK (0x7fffffU) // The lower 23 bits |
| 90 | #define SINGLE_EXPONENT_MASK (0xffU << SINGLE_EXPONENT_SHIFT) // 8 bits of exponent |
| 91 | #define SINGLE_SIGN_MASK (0x01U << SINGLE_SIGN_SHIFT) // 1 bit of sign |
| 92 | #define SINGLE_QUIET_NAN_BIT (0x01U << (SINGLE_NUM_SIGNIFICAND_BITS-1)) |
Laurence Lundblade | 12d32c5 | 2018-09-19 11:25:27 -0700 | [diff] [blame] | 93 | |
| 94 | /* Biased Biased Unbiased Use |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 95 | * 0x0000 0 -127 0 and subnormal |
| 96 | * 0x0001 1 -126 Smallest normal exponent |
| 97 | * 0x7f 127 0 1 |
| 98 | * 0xfe 254 127 Largest normal exponent |
| 99 | * 0xff 255 128 NaN and Infinity */ |
Laurence Lundblade | 12d32c5 | 2018-09-19 11:25:27 -0700 | [diff] [blame] | 100 | #define SINGLE_EXPONENT_BIAS (127) |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 101 | #define SINGLE_EXPONENT_MAX (SINGLE_EXPONENT_BIAS) |
| 102 | #define SINGLE_EXPONENT_MIN (-SINGLE_EXPONENT_BIAS+1) |
| 103 | #define SINGLE_EXPONENT_ZERO (-SINGLE_EXPONENT_BIAS) |
| 104 | #define SINGLE_EXPONENT_INF_OR_NAN (SINGLE_EXPONENT_BIAS+1) |
Laurence Lundblade | 12d32c5 | 2018-09-19 11:25:27 -0700 | [diff] [blame] | 105 | |
| 106 | |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 107 | /* --------- Double-Precision ---------- */ |
Laurence Lundblade | 12d32c5 | 2018-09-19 11:25:27 -0700 | [diff] [blame] | 108 | #define DOUBLE_NUM_SIGNIFICAND_BITS (52) |
| 109 | #define DOUBLE_NUM_EXPONENT_BITS (11) |
| 110 | #define DOUBLE_NUM_SIGN_BITS (1) |
| 111 | |
| 112 | #define DOUBLE_SIGNIFICAND_SHIFT (0) |
| 113 | #define DOUBLE_EXPONENT_SHIFT (DOUBLE_NUM_SIGNIFICAND_BITS) |
| 114 | #define DOUBLE_SIGN_SHIFT (DOUBLE_NUM_SIGNIFICAND_BITS + DOUBLE_NUM_EXPONENT_BITS) |
| 115 | |
Laurence Lundblade | 8db3d3e | 2018-09-29 11:46:37 -0700 | [diff] [blame] | 116 | #define DOUBLE_SIGNIFICAND_MASK (0xfffffffffffffULL) // The lower 52 bits |
| 117 | #define DOUBLE_EXPONENT_MASK (0x7ffULL << DOUBLE_EXPONENT_SHIFT) // 11 bits of exponent |
| 118 | #define DOUBLE_SIGN_MASK (0x01ULL << DOUBLE_SIGN_SHIFT) // 1 bit of sign |
| 119 | #define DOUBLE_QUIET_NAN_BIT (0x01ULL << (DOUBLE_NUM_SIGNIFICAND_BITS-1)) |
| 120 | |
Laurence Lundblade | 12d32c5 | 2018-09-19 11:25:27 -0700 | [diff] [blame] | 121 | |
| 122 | /* Biased Biased Unbiased Use |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 123 | * 0x00000000 0 -1023 0 and subnormal |
| 124 | * 0x00000001 1 -1022 Smallest normal exponent |
| 125 | * 0x000007fe 2046 1023 Largest normal exponent |
| 126 | * 0x000007ff 2047 1024 NaN and Infinity */ |
Laurence Lundblade | 12d32c5 | 2018-09-19 11:25:27 -0700 | [diff] [blame] | 127 | #define DOUBLE_EXPONENT_BIAS (1023) |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 128 | #define DOUBLE_EXPONENT_MAX (DOUBLE_EXPONENT_BIAS) |
| 129 | #define DOUBLE_EXPONENT_MIN (-DOUBLE_EXPONENT_BIAS+1) |
| 130 | #define DOUBLE_EXPONENT_ZERO (-DOUBLE_EXPONENT_BIAS) |
| 131 | #define DOUBLE_EXPONENT_INF_OR_NAN (DOUBLE_EXPONENT_BIAS+1) |
| 132 | |
Laurence Lundblade | 12d32c5 | 2018-09-19 11:25:27 -0700 | [diff] [blame] | 133 | |
| 134 | |
| 135 | |
| 136 | /* |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 137 | * Convenient functions to avoid type punning, compiler warnings and |
| 138 | * such. The optimizer reduces them to a simple assignment. This is a |
| 139 | * crusty corner of C. It shouldn't be this hard. |
| 140 | * |
| 141 | * These are also in UsefulBuf.h under a different name. They are copied |
| 142 | * here to avoid a dependency on UsefulBuf.h. There is no object code |
| 143 | * size impact because these always optimze down to a simple assignment. |
Laurence Lundblade | 12d32c5 | 2018-09-19 11:25:27 -0700 | [diff] [blame] | 144 | */ |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 145 | static inline uint32_t |
| 146 | CopyFloatToUint32(float f) |
Laurence Lundblade | 12d32c5 | 2018-09-19 11:25:27 -0700 | [diff] [blame] | 147 | { |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 148 | uint32_t u32; |
| 149 | memcpy(&u32, &f, sizeof(uint32_t)); |
| 150 | return u32; |
Laurence Lundblade | 12d32c5 | 2018-09-19 11:25:27 -0700 | [diff] [blame] | 151 | } |
| 152 | |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 153 | static inline uint64_t |
| 154 | CopyDoubleToUint64(double d) |
Laurence Lundblade | 12d32c5 | 2018-09-19 11:25:27 -0700 | [diff] [blame] | 155 | { |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 156 | uint64_t u64; |
| 157 | memcpy(&u64, &d, sizeof(uint64_t)); |
| 158 | return u64; |
Laurence Lundblade | 12d32c5 | 2018-09-19 11:25:27 -0700 | [diff] [blame] | 159 | } |
| 160 | |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 161 | |
| 162 | #ifndef QCBOR_DISABLE_PREFERRED_FLOAT |
| 163 | |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 164 | static inline double |
| 165 | CopyUint64ToDouble(uint64_t u64) |
Laurence Lundblade | 67bd551 | 2018-11-02 21:44:06 +0700 | [diff] [blame] | 166 | { |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 167 | double d; |
| 168 | memcpy(&d, &u64, sizeof(uint64_t)); |
| 169 | return d; |
| 170 | } |
| 171 | |
| 172 | static inline float |
| 173 | CopyUint32ToSingle(uint32_t u32) |
| 174 | { |
| 175 | float f; |
| 176 | memcpy(&f, &u32, sizeof(uint32_t)); |
| 177 | return f; |
Laurence Lundblade | 67bd551 | 2018-11-02 21:44:06 +0700 | [diff] [blame] | 178 | } |
Laurence Lundblade | 12d32c5 | 2018-09-19 11:25:27 -0700 | [diff] [blame] | 179 | |
| 180 | |
Laurence Lundblade | 3aee3a3 | 2018-12-17 16:17:45 -0800 | [diff] [blame] | 181 | |
| 182 | |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 183 | /** |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 184 | * @brief Assemble sign, significand and exponent into double precision float. |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 185 | * |
Laurence Lundblade | cb7282d | 2024-11-09 23:01:11 -0800 | [diff] [blame] | 186 | * @param[in] nIsNegative 0 if positive, 1 if negative. |
| 187 | * @pararm[in] uDoubleSignificand Bits of the significand. |
| 188 | * @param[in] nDoubleUnBiasedExponent Exponent. |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 189 | * |
| 190 | * This returns the bits for a single-precision float, a binary64 |
| 191 | * as specified in IEEE754. |
Laurence Lundblade | fe09bbf | 2020-07-16 12:14:51 -0700 | [diff] [blame] | 192 | */ |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 193 | static double |
Laurence Lundblade | cb7282d | 2024-11-09 23:01:11 -0800 | [diff] [blame] | 194 | IEEE754_AssembleDouble(int nIsNegative, |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 195 | uint64_t uDoubleSignificand, |
Laurence Lundblade | cb7282d | 2024-11-09 23:01:11 -0800 | [diff] [blame] | 196 | int nDoubleUnBiasedExponent) |
Laurence Lundblade | 67bd551 | 2018-11-02 21:44:06 +0700 | [diff] [blame] | 197 | { |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 198 | uint64_t uDoubleBiasedExponent; |
| 199 | |
| 200 | uDoubleBiasedExponent = (uint64_t)(nDoubleUnBiasedExponent + DOUBLE_EXPONENT_BIAS); |
| 201 | |
| 202 | return CopyUint64ToDouble(uDoubleSignificand | |
| 203 | (uDoubleBiasedExponent << DOUBLE_EXPONENT_SHIFT) | |
Laurence Lundblade | cb7282d | 2024-11-09 23:01:11 -0800 | [diff] [blame] | 204 | ((uint64_t)nIsNegative << DOUBLE_SIGN_SHIFT)); |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 205 | } |
Laurence Lundblade | 3aee3a3 | 2018-12-17 16:17:45 -0800 | [diff] [blame] | 206 | |
| 207 | |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 208 | /* Public function; see ieee754.h */ |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 209 | double |
| 210 | IEEE754_HalfToDouble(uint16_t uHalfPrecision) |
| 211 | { |
| 212 | uint64_t uDoubleSignificand; |
Laurence Lundblade | cb7282d | 2024-11-09 23:01:11 -0800 | [diff] [blame] | 213 | int nDoubleUnBiasedExponent; |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 214 | double dResult; |
| 215 | |
| 216 | /* Pull out the three parts of the half-precision float. Do all |
| 217 | * the work in 64 bits because that is what the end result is. It |
| 218 | * may give smaller code size and will keep static analyzers |
| 219 | * happier. |
| 220 | */ |
| 221 | const uint64_t uHalfSignificand = uHalfPrecision & HALF_SIGNIFICAND_MASK; |
| 222 | const uint64_t uHalfBiasedExponent = (uHalfPrecision & HALF_EXPONENT_MASK) >> HALF_EXPONENT_SHIFT; |
Laurence Lundblade | cb7282d | 2024-11-09 23:01:11 -0800 | [diff] [blame] | 223 | const int nHalfUnBiasedExponent = (int)uHalfBiasedExponent - HALF_EXPONENT_BIAS; |
| 224 | const int nIsNegative = (uHalfPrecision & HALF_SIGN_MASK) >> HALF_SIGN_SHIFT; |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 225 | |
| 226 | if(nHalfUnBiasedExponent == HALF_EXPONENT_ZERO) { |
| 227 | /* 0 or subnormal */ |
| 228 | if(uHalfSignificand) { |
| 229 | /* --- SUBNORMAL --- */ |
| 230 | /* A half-precision subnormal can always be converted to a |
| 231 | * normal double-precision float because the ranges line up. |
| 232 | * The exponent of a subnormal starts out at the min exponent |
| 233 | * for a normal. As the sub normal significand bits are |
| 234 | * shifted, left to normalize, the exponent is |
| 235 | * decremented. Shifting continues until fully normalized. |
| 236 | */ |
| 237 | nDoubleUnBiasedExponent = HALF_EXPONENT_MIN; |
| 238 | uDoubleSignificand = uHalfSignificand; |
| 239 | do { |
| 240 | uDoubleSignificand <<= 1; |
| 241 | nDoubleUnBiasedExponent--; |
| 242 | } while ((uDoubleSignificand & (1ULL << HALF_NUM_SIGNIFICAND_BITS)) == 0); |
| 243 | /* A normal has an implied 1 in the most significant |
| 244 | * position that a subnormal doesn't. */ |
| 245 | uDoubleSignificand -= 1ULL << HALF_NUM_SIGNIFICAND_BITS; |
| 246 | /* Must shift into place for a double significand */ |
| 247 | uDoubleSignificand <<= DOUBLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS; |
| 248 | |
Laurence Lundblade | cb7282d | 2024-11-09 23:01:11 -0800 | [diff] [blame] | 249 | dResult = IEEE754_AssembleDouble(nIsNegative, |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 250 | uDoubleSignificand, |
| 251 | nDoubleUnBiasedExponent); |
| 252 | } else { |
| 253 | /* --- ZERO --- */ |
Laurence Lundblade | cb7282d | 2024-11-09 23:01:11 -0800 | [diff] [blame] | 254 | dResult = IEEE754_AssembleDouble(nIsNegative, |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 255 | 0, |
| 256 | DOUBLE_EXPONENT_ZERO); |
| 257 | } |
| 258 | } else if(nHalfUnBiasedExponent == HALF_EXPONENT_INF_OR_NAN) { |
| 259 | /* NaN or Inifinity */ |
| 260 | if(uHalfSignificand) { |
| 261 | /* --- NaN --- */ |
| 262 | /* Half-precision payloads always fit into double precision |
| 263 | * payloads. They are shifted left the same as a normal |
| 264 | * number significand. |
| 265 | */ |
| 266 | uDoubleSignificand = uHalfSignificand << (DOUBLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS); |
Laurence Lundblade | cb7282d | 2024-11-09 23:01:11 -0800 | [diff] [blame] | 267 | dResult = IEEE754_AssembleDouble(nIsNegative, |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 268 | uDoubleSignificand, |
| 269 | DOUBLE_EXPONENT_INF_OR_NAN); |
| 270 | } else { |
| 271 | /* --- INFINITY --- */ |
Laurence Lundblade | cb7282d | 2024-11-09 23:01:11 -0800 | [diff] [blame] | 272 | dResult = IEEE754_AssembleDouble(nIsNegative, |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 273 | 0, |
| 274 | DOUBLE_EXPONENT_INF_OR_NAN); |
| 275 | } |
| 276 | } else { |
| 277 | /* --- NORMAL NUMBER --- */ |
| 278 | uDoubleSignificand = uHalfSignificand << (DOUBLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS); |
Laurence Lundblade | cb7282d | 2024-11-09 23:01:11 -0800 | [diff] [blame] | 279 | dResult = IEEE754_AssembleDouble(nIsNegative, |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 280 | uDoubleSignificand, |
| 281 | nHalfUnBiasedExponent); |
| 282 | } |
| 283 | |
| 284 | return dResult; |
| 285 | } |
| 286 | |
| 287 | |
| 288 | /** |
| 289 | * @brief Assemble sign, significand and exponent into single precision float. |
| 290 | * |
Laurence Lundblade | cb7282d | 2024-11-09 23:01:11 -0800 | [diff] [blame] | 291 | * @param[in] nIsNegative 0 if positive, 1 if negative |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 292 | * @pararm[in] uHalfSignificand Bits of the significand |
| 293 | * @param[in] nHalfUnBiasedExponent Exponent |
| 294 | * |
| 295 | * This returns the bits for a single-precision float, a binary32 as |
| 296 | * specified in IEEE754. It is returned as a uint64_t rather than a |
| 297 | * uint32_t or a float for convenience of usage. |
| 298 | */ |
| 299 | static uint32_t |
Laurence Lundblade | cb7282d | 2024-11-09 23:01:11 -0800 | [diff] [blame] | 300 | IEEE754_AssembleHalf(int nIsNegative, |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 301 | uint32_t uHalfSignificand, |
Laurence Lundblade | cb7282d | 2024-11-09 23:01:11 -0800 | [diff] [blame] | 302 | int nHalfUnBiasedExponent) |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 303 | { |
| 304 | uint32_t uHalfUnbiasedExponent; |
| 305 | |
| 306 | uHalfUnbiasedExponent = (uint32_t)(nHalfUnBiasedExponent + HALF_EXPONENT_BIAS); |
| 307 | |
| 308 | return uHalfSignificand | |
| 309 | (uHalfUnbiasedExponent << HALF_EXPONENT_SHIFT) | |
Laurence Lundblade | cb7282d | 2024-11-09 23:01:11 -0800 | [diff] [blame] | 310 | ((uint32_t)nIsNegative << HALF_SIGN_SHIFT); |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 311 | } |
| 312 | |
| 313 | |
| 314 | /* Public function; see ieee754.h */ |
| 315 | IEEE754_union |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 316 | IEEE754_SingleToHalf(const float f, const int bNoNaNPayload) |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 317 | { |
| 318 | IEEE754_union result; |
| 319 | uint32_t uDroppedBits; |
Laurence Lundblade | cb7282d | 2024-11-09 23:01:11 -0800 | [diff] [blame] | 320 | int nExponentDifference; |
| 321 | int nShiftAmount; |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 322 | uint32_t uHalfSignificand; |
| 323 | |
| 324 | /* Pull the three parts out of the double-precision float Most work |
| 325 | * is done with uint32_t which helps avoid integer promotions and |
| 326 | * static analyzer complaints. |
| 327 | */ |
| 328 | const uint32_t uSingle = CopyFloatToUint32(f); |
| 329 | const uint32_t uSingleBiasedExponent = (uSingle & SINGLE_EXPONENT_MASK) >> SINGLE_EXPONENT_SHIFT; |
Laurence Lundblade | cb7282d | 2024-11-09 23:01:11 -0800 | [diff] [blame] | 330 | const int nSingleUnbiasedExponent = (int32_t)uSingleBiasedExponent - SINGLE_EXPONENT_BIAS; |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 331 | const uint32_t uSingleSignificand = uSingle & SINGLE_SIGNIFICAND_MASK; |
Laurence Lundblade | cb7282d | 2024-11-09 23:01:11 -0800 | [diff] [blame] | 332 | const int nIsNegative = (uSingle & SINGLE_SIGN_MASK) >> SINGLE_SIGN_SHIFT; |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 333 | |
| 334 | if(nSingleUnbiasedExponent == SINGLE_EXPONENT_ZERO) { |
| 335 | if(uSingleSignificand == 0) { |
| 336 | /* --- IS ZERO --- */ |
| 337 | result.uSize = IEEE754_UNION_IS_HALF; |
Laurence Lundblade | cb7282d | 2024-11-09 23:01:11 -0800 | [diff] [blame] | 338 | result.uValue = IEEE754_AssembleHalf(nIsNegative, |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 339 | 0, |
| 340 | HALF_EXPONENT_ZERO); |
| 341 | } else { |
| 342 | /* --- IS SINGLE SUBNORMAL --- */ |
| 343 | /* The largest single subnormal is slightly less than the |
| 344 | * largest single normal which is 2^-149 or |
| 345 | * 2.2040517676619426e-38. The smallest half subnormal is |
| 346 | * 2^-14 or 5.9604644775390625E-8. There is no overlap so |
| 347 | * single subnormals can't be converted to halfs of any sort. |
| 348 | */ |
| 349 | result.uSize = IEEE754_UNION_IS_SINGLE; |
| 350 | result.uValue = uSingle; |
| 351 | } |
| 352 | } else if(nSingleUnbiasedExponent == SINGLE_EXPONENT_INF_OR_NAN) { |
| 353 | if(uSingleSignificand == 0) { |
| 354 | /* ---- IS INFINITY ---- */ |
| 355 | result.uSize = IEEE754_UNION_IS_HALF; |
Laurence Lundblade | cb7282d | 2024-11-09 23:01:11 -0800 | [diff] [blame] | 356 | result.uValue = IEEE754_AssembleHalf(nIsNegative, 0, HALF_EXPONENT_INF_OR_NAN); |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 357 | } else { |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 358 | if(bNoNaNPayload) { |
| 359 | /* --- REQUIRE CANNONICAL NAN --- */ |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 360 | result.uSize = IEEE754_UNION_IS_HALF; |
Laurence Lundblade | cb7282d | 2024-11-09 23:01:11 -0800 | [diff] [blame] | 361 | result.uValue = IEEE754_AssembleHalf(nIsNegative, |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 362 | HALF_QUIET_NAN_BIT, |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 363 | HALF_EXPONENT_INF_OR_NAN); |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 364 | } else { |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 365 | /* The NaN can only be converted if no payload bits are lost |
| 366 | * per RFC 8949 section 4.1 that defines Preferred |
| 367 | * Serializaton. Note that Deterministically Encode CBOR in |
| 368 | * section 4.2 allows for some variation of this rule, but at |
| 369 | * the moment this implementation is of Preferred |
| 370 | * Serialization, not CDE. As of December 2023, we are also |
| 371 | * expecting an update to CDE. This code may need to be |
| 372 | * updated for CDE. |
| 373 | */ |
| 374 | uDroppedBits = uSingleSignificand & (SINGLE_SIGNIFICAND_MASK >> HALF_NUM_SIGNIFICAND_BITS); |
| 375 | if(uDroppedBits == 0) { |
| 376 | /* --- IS CONVERTABLE NAN --- */ |
| 377 | uHalfSignificand = uSingleSignificand >> (SINGLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS); |
| 378 | result.uSize = IEEE754_UNION_IS_HALF; |
Laurence Lundblade | cb7282d | 2024-11-09 23:01:11 -0800 | [diff] [blame] | 379 | result.uValue = IEEE754_AssembleHalf(nIsNegative, |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 380 | uHalfSignificand, |
| 381 | HALF_EXPONENT_INF_OR_NAN); |
| 382 | |
| 383 | } else { |
| 384 | /* --- IS UNCONVERTABLE NAN --- */ |
| 385 | result.uSize = IEEE754_UNION_IS_SINGLE; |
| 386 | result.uValue = uSingle; |
| 387 | } |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 388 | } |
| 389 | } |
| 390 | } else { |
| 391 | /* ---- REGULAR NUMBER ---- */ |
| 392 | /* A regular single can be converted to a regular half if the |
| 393 | * single's exponent is in the smaller range of a half and if no |
| 394 | * precision is lost in the significand. |
| 395 | */ |
| 396 | if(nSingleUnbiasedExponent >= HALF_EXPONENT_MIN && |
| 397 | nSingleUnbiasedExponent <= HALF_EXPONENT_MAX && |
| 398 | (uSingleSignificand & (SINGLE_SIGNIFICAND_MASK >> HALF_NUM_SIGNIFICAND_BITS)) == 0) { |
| 399 | uHalfSignificand = uSingleSignificand >> (SINGLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS); |
| 400 | |
| 401 | /* --- CONVERT TO HALF NORMAL --- */ |
| 402 | result.uSize = IEEE754_UNION_IS_HALF; |
Laurence Lundblade | cb7282d | 2024-11-09 23:01:11 -0800 | [diff] [blame] | 403 | result.uValue = IEEE754_AssembleHalf(nIsNegative, |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 404 | uHalfSignificand, |
| 405 | nSingleUnbiasedExponent); |
| 406 | } else { |
| 407 | /* Unable to convert to a half normal. See if it can be |
| 408 | * converted to a half subnormal. To do that, the exponent |
| 409 | * must be in range and no precision can be lost in the |
| 410 | * signficand. |
| 411 | * |
| 412 | * This is more complicated because the number is not |
| 413 | * normalized. The signficand must be shifted proprotionally |
| 414 | * to the exponent and 1 must be added in. See |
| 415 | * https://en.wikipedia.org/wiki/Single-precision_floating-point_format#Exponent_encoding |
| 416 | * |
| 417 | * Exponents -14 to -24 map to a shift of 0 to 10 of the |
| 418 | * significand. The largest value of a half subnormal has an |
| 419 | * exponent of -14. Subnormals are not normalized like |
| 420 | * normals meaning they lose precision as the numbers get |
| 421 | * smaller. Normals don't lose precision because the exponent |
| 422 | * allows all the bits of the significand to be significant. |
| 423 | */ |
| 424 | /* The exponent of the largest possible half-precision |
| 425 | * subnormal is HALF_EXPONENT_MIN (-14). Exponents larger |
| 426 | * than this are normal and handled above. We're going to |
| 427 | * shift the significand right by at least this amount. |
| 428 | */ |
| 429 | nExponentDifference = -(nSingleUnbiasedExponent - HALF_EXPONENT_MIN); |
| 430 | |
| 431 | /* In addition to the shift based on the exponent's value, |
| 432 | * the single significand has to be shifted right to fit into |
| 433 | * a half-precision significand */ |
| 434 | nShiftAmount = nExponentDifference + (SINGLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS); |
| 435 | |
| 436 | /* Must add 1 in to the possible significand because there is |
| 437 | * an implied 1 for normal values and not for subnormal |
| 438 | * values. See equations here: |
| 439 | * https://en.wikipedia.org/wiki/Single-precision_floating-point_format#Exponent_encoding |
| 440 | */ |
| 441 | uHalfSignificand = (uSingleSignificand + (1 << SINGLE_NUM_SIGNIFICAND_BITS)) >> nShiftAmount; |
| 442 | |
| 443 | /* If only zero bits get shifted out, this can be converted |
| 444 | * to subnormal */ |
| 445 | if(nSingleUnbiasedExponent < HALF_EXPONENT_MIN && |
| 446 | nSingleUnbiasedExponent >= HALF_EXPONENT_MIN - HALF_NUM_SIGNIFICAND_BITS && |
| 447 | uHalfSignificand << nShiftAmount == uSingleSignificand + (1 << SINGLE_NUM_SIGNIFICAND_BITS)) { |
| 448 | /* --- CONVERTABLE TO HALF SUBNORMAL --- */ |
| 449 | result.uSize = IEEE754_UNION_IS_HALF; |
Laurence Lundblade | cb7282d | 2024-11-09 23:01:11 -0800 | [diff] [blame] | 450 | result.uValue = IEEE754_AssembleHalf(nIsNegative, |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 451 | uHalfSignificand, |
| 452 | HALF_EXPONENT_ZERO); |
| 453 | } else { |
| 454 | /* --- DO NOT CONVERT --- */ |
| 455 | result.uSize = IEEE754_UNION_IS_SINGLE; |
| 456 | result.uValue = uSingle; |
| 457 | } |
| 458 | } |
| 459 | } |
| 460 | |
| 461 | return result; |
| 462 | } |
| 463 | |
| 464 | |
| 465 | /** |
| 466 | * @brief Assemble sign, significand and exponent into single precision float. |
| 467 | * |
Laurence Lundblade | cb7282d | 2024-11-09 23:01:11 -0800 | [diff] [blame] | 468 | * @param[in] nIsNegative 0 if positive, 1 if negative. |
| 469 | * @pararm[in] uSingleSignificand Bits of the significand. |
| 470 | * @param[in] nSingleUnBiasedExponent Exponent. |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 471 | * |
| 472 | * This returns the bits for a single-precision float, a binary32 as |
| 473 | * specified in IEEE754. It is returned as a uint64_t rather than a |
| 474 | * uint32_t or a float for convenience of usage. |
| 475 | */ |
| 476 | static uint64_t |
Laurence Lundblade | cb7282d | 2024-11-09 23:01:11 -0800 | [diff] [blame] | 477 | IEEE754_AssembleSingle(int nIsNegative, |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 478 | uint64_t uSingleSignificand, |
Laurence Lundblade | cb7282d | 2024-11-09 23:01:11 -0800 | [diff] [blame] | 479 | int nSingleUnBiasedExponent) |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 480 | { |
| 481 | uint64_t uSingleBiasedExponent; |
| 482 | |
| 483 | uSingleBiasedExponent = (uint64_t)(nSingleUnBiasedExponent + SINGLE_EXPONENT_BIAS); |
| 484 | |
| 485 | return uSingleSignificand | |
| 486 | (uSingleBiasedExponent << SINGLE_EXPONENT_SHIFT) | |
Laurence Lundblade | cb7282d | 2024-11-09 23:01:11 -0800 | [diff] [blame] | 487 | ((uint64_t)nIsNegative << SINGLE_SIGN_SHIFT); |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 488 | } |
| 489 | |
| 490 | |
| 491 | /** |
| 492 | * @brief Convert a double-precision float to single-precision. |
| 493 | * |
| 494 | * @param[in] d The value to convert. |
| 495 | * |
| 496 | * @returns Either unconverted value or value converted to single-precision. |
| 497 | * |
| 498 | * This always succeeds. If the value cannot be converted without the |
| 499 | * loss of precision, it is not converted. |
| 500 | * |
| 501 | * This handles all subnormals and NaN payloads. |
| 502 | */ |
| 503 | static IEEE754_union |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 504 | IEEE754_DoubleToSingle(const double d) |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 505 | { |
| 506 | IEEE754_union Result; |
Laurence Lundblade | cb7282d | 2024-11-09 23:01:11 -0800 | [diff] [blame] | 507 | int nExponentDifference; |
| 508 | int nShiftAmount; |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 509 | uint64_t uSingleSignificand; |
| 510 | uint64_t uDroppedBits; |
| 511 | |
| 512 | |
| 513 | /* Pull the three parts out of the double-precision float. Most |
| 514 | * work is done with uint64_t which helps avoid integer promotions |
| 515 | * and static analyzer complaints. |
| 516 | */ |
| 517 | const uint64_t uDouble = CopyDoubleToUint64(d); |
| 518 | const uint64_t uDoubleBiasedExponent = (uDouble & DOUBLE_EXPONENT_MASK) >> DOUBLE_EXPONENT_SHIFT; |
Laurence Lundblade | cb7282d | 2024-11-09 23:01:11 -0800 | [diff] [blame] | 519 | const int nDoubleUnbiasedExponent = (int)uDoubleBiasedExponent - DOUBLE_EXPONENT_BIAS; |
| 520 | const int nIsNegative = (uDouble & DOUBLE_SIGN_MASK) >> DOUBLE_SIGN_SHIFT; |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 521 | const uint64_t uDoubleSignificand = uDouble & DOUBLE_SIGNIFICAND_MASK; |
| 522 | |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 523 | if(nDoubleUnbiasedExponent == DOUBLE_EXPONENT_ZERO) { |
| 524 | if(uDoubleSignificand == 0) { |
| 525 | /* --- IS ZERO --- */ |
| 526 | Result.uSize = IEEE754_UNION_IS_SINGLE; |
Laurence Lundblade | cb7282d | 2024-11-09 23:01:11 -0800 | [diff] [blame] | 527 | Result.uValue = IEEE754_AssembleSingle(nIsNegative, |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 528 | 0, |
| 529 | SINGLE_EXPONENT_ZERO); |
Laurence Lundblade | 67bd551 | 2018-11-02 21:44:06 +0700 | [diff] [blame] | 530 | } else { |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 531 | /* --- IS DOUBLE SUBNORMAL --- */ |
| 532 | /* The largest double subnormal is slightly less than the |
| 533 | * largest double normal which is 2^-1022 or |
| 534 | * 2.2250738585072014e-308. The smallest single subnormal |
| 535 | * is 2^-149 or 1.401298464324817e-45. There is no |
| 536 | * overlap so double subnormals can't be converted to |
| 537 | * singles of any sort. |
| 538 | */ |
| 539 | Result.uSize = IEEE754_UNION_IS_DOUBLE; |
| 540 | Result.uValue = uDouble; |
| 541 | } |
| 542 | } else if(nDoubleUnbiasedExponent == DOUBLE_EXPONENT_INF_OR_NAN) { |
| 543 | if(uDoubleSignificand == 0) { |
| 544 | /* ---- IS INFINITY ---- */ |
| 545 | Result.uSize = IEEE754_UNION_IS_SINGLE; |
Laurence Lundblade | cb7282d | 2024-11-09 23:01:11 -0800 | [diff] [blame] | 546 | Result.uValue = IEEE754_AssembleSingle(nIsNegative, |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 547 | 0, |
| 548 | SINGLE_EXPONENT_INF_OR_NAN); |
| 549 | } else { |
| 550 | /* The NaN can only be converted if no payload bits are |
| 551 | * lost per RFC 8949 section 4.1 that defines Preferred |
| 552 | * Serializaton. Note that Deterministically Encode CBOR |
| 553 | * in section 4.2 allows for some variation of this rule, |
| 554 | * but at the moment this implementation is of Preferred |
| 555 | * Serialization, not CDE. As of December 2023, we are |
| 556 | * also expecting an update to CDE. This code may need to |
| 557 | * be updated for CDE. |
| 558 | */ |
| 559 | uDroppedBits = uDoubleSignificand & (DOUBLE_SIGNIFICAND_MASK >> SINGLE_NUM_SIGNIFICAND_BITS); |
| 560 | if(uDroppedBits == 0) { |
| 561 | /* --- IS CONVERTABLE NAN --- */ |
| 562 | uSingleSignificand = uDoubleSignificand >> (DOUBLE_NUM_SIGNIFICAND_BITS - SINGLE_NUM_SIGNIFICAND_BITS); |
| 563 | Result.uSize = IEEE754_UNION_IS_SINGLE; |
Laurence Lundblade | cb7282d | 2024-11-09 23:01:11 -0800 | [diff] [blame] | 564 | Result.uValue = IEEE754_AssembleSingle(nIsNegative, |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 565 | uSingleSignificand, |
| 566 | SINGLE_EXPONENT_INF_OR_NAN); |
| 567 | } else { |
| 568 | /* --- IS UNCONVERTABLE NAN --- */ |
| 569 | Result.uSize = IEEE754_UNION_IS_DOUBLE; |
| 570 | Result.uValue = uDouble; |
Laurence Lundblade | 67bd551 | 2018-11-02 21:44:06 +0700 | [diff] [blame] | 571 | } |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 572 | } |
| 573 | } else { |
| 574 | /* ---- REGULAR NUMBER ---- */ |
| 575 | /* A regular double can be converted to a regular single if |
| 576 | * the double's exponent is in the smaller range of a single |
| 577 | * and if no precision is lost in the significand. |
| 578 | */ |
| 579 | uDroppedBits = uDoubleSignificand & (DOUBLE_SIGNIFICAND_MASK >> SINGLE_NUM_SIGNIFICAND_BITS); |
| 580 | if(nDoubleUnbiasedExponent >= SINGLE_EXPONENT_MIN && |
| 581 | nDoubleUnbiasedExponent <= SINGLE_EXPONENT_MAX && |
| 582 | uDroppedBits == 0) { |
| 583 | /* --- IS CONVERTABLE TO SINGLE --- */ |
| 584 | uSingleSignificand = uDoubleSignificand >> (DOUBLE_NUM_SIGNIFICAND_BITS - SINGLE_NUM_SIGNIFICAND_BITS); |
| 585 | Result.uSize = IEEE754_UNION_IS_SINGLE; |
Laurence Lundblade | cb7282d | 2024-11-09 23:01:11 -0800 | [diff] [blame] | 586 | Result.uValue = IEEE754_AssembleSingle(nIsNegative, |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 587 | uSingleSignificand, |
| 588 | nDoubleUnbiasedExponent); |
Laurence Lundblade | 67bd551 | 2018-11-02 21:44:06 +0700 | [diff] [blame] | 589 | } else { |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 590 | /* Unable to convert to a single normal. See if it can be |
| 591 | * converted to a single subnormal. To do that, the |
| 592 | * exponent must be in range and no precision can be lost |
| 593 | * in the signficand. |
| 594 | * |
| 595 | * This is more complicated because the number is not |
| 596 | * normalized. The signficand must be shifted |
| 597 | * proprotionally to the exponent and 1 must be added |
| 598 | * in. See |
| 599 | * https://en.wikipedia.org/wiki/Single-precision_floating-point_format#Exponent_encoding |
| 600 | */ |
| 601 | nExponentDifference = -(nDoubleUnbiasedExponent - SINGLE_EXPONENT_MIN); |
| 602 | nShiftAmount = nExponentDifference + (DOUBLE_NUM_SIGNIFICAND_BITS - SINGLE_NUM_SIGNIFICAND_BITS); |
| 603 | uSingleSignificand = (uDoubleSignificand + (1ULL << DOUBLE_NUM_SIGNIFICAND_BITS)) >> nShiftAmount; |
| 604 | |
| 605 | if(nDoubleUnbiasedExponent < SINGLE_EXPONENT_MIN && |
| 606 | nDoubleUnbiasedExponent >= SINGLE_EXPONENT_MIN - SINGLE_NUM_SIGNIFICAND_BITS && |
| 607 | uSingleSignificand << nShiftAmount == uDoubleSignificand + (1ULL << DOUBLE_NUM_SIGNIFICAND_BITS)) { |
| 608 | /* --- IS CONVERTABLE TO SINGLE SUBNORMAL --- */ |
| 609 | Result.uSize = IEEE754_UNION_IS_SINGLE; |
Laurence Lundblade | cb7282d | 2024-11-09 23:01:11 -0800 | [diff] [blame] | 610 | Result.uValue = IEEE754_AssembleSingle(nIsNegative, |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 611 | uSingleSignificand, |
| 612 | SINGLE_EXPONENT_ZERO); |
| 613 | } else { |
| 614 | /* --- CAN NOT BE CONVERTED --- */ |
| 615 | Result.uSize = IEEE754_UNION_IS_DOUBLE; |
| 616 | Result.uValue = uDouble; |
| 617 | } |
Laurence Lundblade | 67bd551 | 2018-11-02 21:44:06 +0700 | [diff] [blame] | 618 | } |
Laurence Lundblade | 67bd551 | 2018-11-02 21:44:06 +0700 | [diff] [blame] | 619 | } |
Laurence Lundblade | 3aee3a3 | 2018-12-17 16:17:45 -0800 | [diff] [blame] | 620 | |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 621 | return Result; |
Laurence Lundblade | 67bd551 | 2018-11-02 21:44:06 +0700 | [diff] [blame] | 622 | } |
Laurence Lundblade | 12d32c5 | 2018-09-19 11:25:27 -0700 | [diff] [blame] | 623 | |
| 624 | |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 625 | /* Public function; see ieee754.h */ |
| 626 | IEEE754_union |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 627 | IEEE754_DoubleToSmaller(const double d, |
| 628 | const int bAllowHalfPrecision, |
| 629 | const int bNoNanPayload) |
Laurence Lundblade | 12d32c5 | 2018-09-19 11:25:27 -0700 | [diff] [blame] | 630 | { |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 631 | IEEE754_union result; |
Laurence Lundblade | 3aee3a3 | 2018-12-17 16:17:45 -0800 | [diff] [blame] | 632 | |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 633 | result = IEEE754_DoubleToSingle(d); |
Laurence Lundblade | 3aee3a3 | 2018-12-17 16:17:45 -0800 | [diff] [blame] | 634 | |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 635 | if(result.uSize == IEEE754_UNION_IS_SINGLE && bAllowHalfPrecision) { |
| 636 | /* Cast to uint32_t is OK, because value was just successfully |
| 637 | * converted to single. */ |
| 638 | float uSingle = CopyUint32ToSingle((uint32_t)result.uValue); |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 639 | result = IEEE754_SingleToHalf(uSingle, bNoNanPayload); |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 640 | } |
Laurence Lundblade | 12d32c5 | 2018-09-19 11:25:27 -0700 | [diff] [blame] | 641 | |
Laurence Lundblade | 83dbf5c | 2024-01-07 19:17:52 -0700 | [diff] [blame] | 642 | return result; |
Laurence Lundblade | 12d32c5 | 2018-09-19 11:25:27 -0700 | [diff] [blame] | 643 | } |
| 644 | |
Laurence Lundblade | 3aee3a3 | 2018-12-17 16:17:45 -0800 | [diff] [blame] | 645 | |
Laurence Lundblade | d883ad3 | 2024-03-23 22:37:37 -0700 | [diff] [blame] | 646 | |
Laurence Lundblade | cb7282d | 2024-11-09 23:01:11 -0800 | [diff] [blame] | 647 | |
| 648 | /** |
| 649 | * @brief Count the bits of preceision in a significand. |
| 650 | * |
| 651 | * @param[in] uSignificand The significand as uint64_t. |
| 652 | * |
| 653 | * @return The number of bits set. |
| 654 | |
| 655 | * This returns 64 minus the number of zero bits on the right. It is |
Laurence Lundblade | d883ad3 | 2024-03-23 22:37:37 -0700 | [diff] [blame] | 656 | * is the amount of precision in the 64-bit significand passed in. |
| 657 | * When used for 52 and 23-bit significands, subtract 12 and 41 |
| 658 | * to get their precision. |
| 659 | * |
| 660 | * The value returned is for a *normalized* number like the |
| 661 | * significand of a double. When used for precision for a non-normalized |
| 662 | * number like a uint64_t, further computation is required. |
| 663 | * |
Laurence Lundblade | cb7282d | 2024-11-09 23:01:11 -0800 | [diff] [blame] | 664 | * If the significand is 0, then 0 is returned as the precision. |
| 665 | */ |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 666 | static int |
Laurence Lundblade | cb7282d | 2024-11-09 23:01:11 -0800 | [diff] [blame] | 667 | IEEE754_Private_CountPrecisionBits(uint64_t uSignificand) |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 668 | { |
| 669 | int nNonZeroBitsCount; |
| 670 | uint64_t uMask; |
Laurence Lundblade | 3aee3a3 | 2018-12-17 16:17:45 -0800 | [diff] [blame] | 671 | |
Laurence Lundblade | d883ad3 | 2024-03-23 22:37:37 -0700 | [diff] [blame] | 672 | for(nNonZeroBitsCount = 64; nNonZeroBitsCount > 0; nNonZeroBitsCount--) { |
| 673 | uMask = 0x01UL << (64 - nNonZeroBitsCount); |
Laurence Lundblade | cb7282d | 2024-11-09 23:01:11 -0800 | [diff] [blame] | 674 | if(uMask & uSignificand) { |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 675 | break; |
| 676 | } |
| 677 | } |
Laurence Lundblade | d883ad3 | 2024-03-23 22:37:37 -0700 | [diff] [blame] | 678 | |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 679 | return nNonZeroBitsCount; |
| 680 | } |
| 681 | |
| 682 | |
| 683 | /* Public function; see ieee754.h */ |
| 684 | struct IEEE754_ToInt |
| 685 | IEEE754_DoubleToInt(const double d) |
| 686 | { |
Laurence Lundblade | d883ad3 | 2024-03-23 22:37:37 -0700 | [diff] [blame] | 687 | int64_t nPrecisionBits; |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 688 | struct IEEE754_ToInt Result; |
| 689 | uint64_t uInteger; |
| 690 | |
| 691 | /* Pull the three parts out of the double-precision float. Most |
| 692 | * work is done with uint64_t which helps avoid integer promotions |
| 693 | * and static analyzer complaints. |
| 694 | */ |
| 695 | const uint64_t uDouble = CopyDoubleToUint64(d); |
| 696 | const uint64_t uDoubleBiasedExponent = (uDouble & DOUBLE_EXPONENT_MASK) >> DOUBLE_EXPONENT_SHIFT; |
| 697 | /* Cast safe because of mask above; exponents < DOUBLE_EXPONENT_MAX */ |
Laurence Lundblade | cb7282d | 2024-11-09 23:01:11 -0800 | [diff] [blame] | 698 | const int nDoubleUnbiasedExponent = (int)uDoubleBiasedExponent - DOUBLE_EXPONENT_BIAS; |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 699 | const uint64_t uDoubleSignificand = uDouble & DOUBLE_SIGNIFICAND_MASK; |
Laurence Lundblade | 14ce228 | 2024-07-24 22:13:35 -0700 | [diff] [blame] | 700 | const uint64_t bIsNegative = uDouble & DOUBLE_SIGN_MASK; |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 701 | |
| 702 | if(nDoubleUnbiasedExponent == DOUBLE_EXPONENT_ZERO) { |
| 703 | if(uDoubleSignificand == 0) { |
| 704 | /* --- POSITIVE AND NEGATIVE ZERO --- */ |
| 705 | Result.integer.un_signed = 0; |
| 706 | Result.type = IEEE754_ToInt_IS_UINT; |
| 707 | } else { |
| 708 | /* --- SUBNORMAL --- */ |
| 709 | Result.type = IEEE754_ToInt_NO_CONVERSION; |
| 710 | } |
| 711 | } else if(nDoubleUnbiasedExponent == DOUBLE_EXPONENT_INF_OR_NAN) { |
| 712 | if(uDoubleSignificand != 0) { |
| 713 | /* --- NAN --- */ |
| 714 | Result.type = IEEE754_ToInt_NaN; /* dCBOR doesn't care about payload */ |
| 715 | } else { |
Laurence Lundblade | 14ce228 | 2024-07-24 22:13:35 -0700 | [diff] [blame] | 716 | /* --- INFINITY --- */ |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 717 | Result.type = IEEE754_ToInt_NO_CONVERSION; |
| 718 | } |
Laurence Lundblade | 14ce228 | 2024-07-24 22:13:35 -0700 | [diff] [blame] | 719 | } else if(nDoubleUnbiasedExponent < 0) { |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 720 | /* --- Exponent out of range --- */ |
| 721 | Result.type = IEEE754_ToInt_NO_CONVERSION; |
Laurence Lundblade | 14ce228 | 2024-07-24 22:13:35 -0700 | [diff] [blame] | 722 | } else if(nDoubleUnbiasedExponent >= 64) { |
| 723 | if(nDoubleUnbiasedExponent == 64 && uDoubleSignificand == 0 && bIsNegative) { |
| 724 | /* Very special case for -18446744073709551616.0 */ |
| 725 | Result.integer.un_signed = 0; /* No negative 0, use it to indicate 2^64 */ |
| 726 | Result.type = IEEE754_ToInt_IS_65BIT_NEG; |
| 727 | } else { |
| 728 | /* --- Exponent out of range --- */ |
| 729 | Result.type = IEEE754_ToInt_NO_CONVERSION; |
| 730 | } |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 731 | } else { |
Laurence Lundblade | d883ad3 | 2024-03-23 22:37:37 -0700 | [diff] [blame] | 732 | /* Conversion only fails when the input is too large or is not a |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 733 | * whole number, never because of lack of precision because |
| 734 | * 64-bit integers always have more precision than the 52-bits |
| 735 | * of a double. |
| 736 | */ |
Laurence Lundblade | d883ad3 | 2024-03-23 22:37:37 -0700 | [diff] [blame] | 737 | nPrecisionBits = IEEE754_Private_CountPrecisionBits(uDoubleSignificand) - |
| 738 | (64-DOUBLE_NUM_SIGNIFICAND_BITS); |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 739 | |
Laurence Lundblade | d883ad3 | 2024-03-23 22:37:37 -0700 | [diff] [blame] | 740 | if(nPrecisionBits && nPrecisionBits > nDoubleUnbiasedExponent) { |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 741 | /* --- Not a whole number --- */ |
| 742 | Result.type = IEEE754_ToInt_NO_CONVERSION; |
| 743 | } else { |
| 744 | /* --- CONVERTABLE WHOLE NUMBER --- */ |
| 745 | /* Add in the one that is implied in normal floats */ |
| 746 | uInteger = uDoubleSignificand + (1ULL << DOUBLE_NUM_SIGNIFICAND_BITS); |
| 747 | /* Factor in the exponent */ |
| 748 | if(nDoubleUnbiasedExponent < DOUBLE_NUM_SIGNIFICAND_BITS) { |
| 749 | /* Numbers less than 2^52 with up to 52 significant bits */ |
| 750 | uInteger >>= DOUBLE_NUM_SIGNIFICAND_BITS - nDoubleUnbiasedExponent; |
| 751 | } else { |
| 752 | /* Numbers greater than 2^52 with at most 52 significant bits */ |
| 753 | uInteger <<= nDoubleUnbiasedExponent - DOUBLE_NUM_SIGNIFICAND_BITS; |
| 754 | } |
Laurence Lundblade | 14ce228 | 2024-07-24 22:13:35 -0700 | [diff] [blame] | 755 | if(bIsNegative) { |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 756 | /* Cast safe because exponent range check above */ |
Laurence Lundblade | 14ce228 | 2024-07-24 22:13:35 -0700 | [diff] [blame] | 757 | if(nDoubleUnbiasedExponent == 63) { |
| 758 | Result.integer.un_signed = uInteger; |
| 759 | Result.type = IEEE754_ToInt_IS_65BIT_NEG; |
| 760 | } else { |
| 761 | Result.integer.is_signed = -((int64_t)uInteger); |
| 762 | Result.type = IEEE754_ToInt_IS_INT; |
| 763 | } |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 764 | } else { |
| 765 | Result.integer.un_signed = uInteger; |
| 766 | Result.type = IEEE754_ToInt_IS_UINT; |
| 767 | } |
| 768 | } |
| 769 | } |
| 770 | |
| 771 | return Result; |
| 772 | } |
| 773 | |
| 774 | |
| 775 | /* Public function; see ieee754.h */ |
| 776 | struct IEEE754_ToInt |
| 777 | IEEE754_SingleToInt(const float f) |
| 778 | { |
Laurence Lundblade | cb7282d | 2024-11-09 23:01:11 -0800 | [diff] [blame] | 779 | int nPrecisionBits; |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 780 | struct IEEE754_ToInt Result; |
| 781 | uint64_t uInteger; |
| 782 | |
| 783 | /* Pull the three parts out of the single-precision float. Most |
| 784 | * work is done with uint32_t which helps avoid integer promotions |
| 785 | * and static analyzer complaints. |
| 786 | */ |
| 787 | const uint32_t uSingle = CopyFloatToUint32(f); |
| 788 | const uint32_t uSingleBiasedExponent = (uSingle & SINGLE_EXPONENT_MASK) >> SINGLE_EXPONENT_SHIFT; |
| 789 | /* Cast safe because of mask above; exponents < SINGLE_EXPONENT_MAX */ |
Laurence Lundblade | cb7282d | 2024-11-09 23:01:11 -0800 | [diff] [blame] | 790 | const int nSingleUnbiasedExponent = (int)uSingleBiasedExponent - SINGLE_EXPONENT_BIAS; |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 791 | const uint32_t uSingleleSignificand = uSingle & SINGLE_SIGNIFICAND_MASK; |
Laurence Lundblade | 14ce228 | 2024-07-24 22:13:35 -0700 | [diff] [blame] | 792 | const uint64_t bIsNegative = uSingle & SINGLE_SIGN_MASK; |
| 793 | |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 794 | |
| 795 | if(nSingleUnbiasedExponent == SINGLE_EXPONENT_ZERO) { |
| 796 | if(uSingleleSignificand == 0 && !(uSingle & SINGLE_SIGN_MASK)) { |
| 797 | /* --- POSITIVE AND NEGATIVE ZERO --- */ |
| 798 | Result.integer.un_signed = 0; |
| 799 | Result.type = IEEE754_ToInt_IS_UINT; |
| 800 | } else { |
| 801 | /* --- Subnormal --- */ |
| 802 | Result.type = IEEE754_ToInt_NO_CONVERSION; |
| 803 | } |
| 804 | } else if(nSingleUnbiasedExponent == SINGLE_EXPONENT_INF_OR_NAN) { |
| 805 | /* --- NAN or INFINITY --- */ |
| 806 | if(uSingleleSignificand != 0) { |
| 807 | Result.type = IEEE754_ToInt_NaN; /* dCBOR doesn't care about payload */ |
| 808 | } else { |
| 809 | Result.type = IEEE754_ToInt_NO_CONVERSION; |
| 810 | } |
Laurence Lundblade | 14ce228 | 2024-07-24 22:13:35 -0700 | [diff] [blame] | 811 | } else if(nSingleUnbiasedExponent < 0) { |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 812 | /* --- Exponent out of range --- */ |
| 813 | Result.type = IEEE754_ToInt_NO_CONVERSION; |
Laurence Lundblade | 14ce228 | 2024-07-24 22:13:35 -0700 | [diff] [blame] | 814 | } else if(nSingleUnbiasedExponent >= 64) { |
| 815 | if(nSingleUnbiasedExponent == 64 && uSingleleSignificand == 0 && bIsNegative) { |
| 816 | /* Very special case for -18446744073709551616.0 */ |
| 817 | Result.integer.un_signed = 0; /* No negative 0, use it to indicate 2^64 */ |
| 818 | Result.type = IEEE754_ToInt_IS_65BIT_NEG; |
| 819 | } else { |
| 820 | /* --- Exponent out of range --- */ |
| 821 | Result.type = IEEE754_ToInt_NO_CONVERSION; |
| 822 | } |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 823 | } else { |
Laurence Lundblade | d883ad3 | 2024-03-23 22:37:37 -0700 | [diff] [blame] | 824 | /* Conversion only fails when the input is too large or is not a |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 825 | * whole number, never because of lack of precision because |
Laurence Lundblade | d883ad3 | 2024-03-23 22:37:37 -0700 | [diff] [blame] | 826 | * 64-bit integers always have more precision than the 23 bits |
| 827 | * of a single. |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 828 | */ |
Laurence Lundblade | d883ad3 | 2024-03-23 22:37:37 -0700 | [diff] [blame] | 829 | nPrecisionBits = IEEE754_Private_CountPrecisionBits(uSingleleSignificand) - |
| 830 | (64 - SINGLE_NUM_SIGNIFICAND_BITS); |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 831 | |
Laurence Lundblade | d883ad3 | 2024-03-23 22:37:37 -0700 | [diff] [blame] | 832 | if(nPrecisionBits && nPrecisionBits > nSingleUnbiasedExponent) { |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 833 | /* --- Not a whole number --- */ |
| 834 | Result.type = IEEE754_ToInt_NO_CONVERSION; |
| 835 | } else { |
| 836 | /* --- CONVERTABLE WHOLE NUMBER --- */ |
| 837 | /* Add in the one that is implied in normal floats */ |
| 838 | uInteger = uSingleleSignificand + (1ULL << SINGLE_NUM_SIGNIFICAND_BITS); |
| 839 | /* Factor in the exponent */ |
| 840 | if(nSingleUnbiasedExponent < SINGLE_NUM_SIGNIFICAND_BITS) { |
| 841 | /* Numbers less than 2^23 with up to 23 significant bits */ |
| 842 | uInteger >>= SINGLE_NUM_SIGNIFICAND_BITS - nSingleUnbiasedExponent; |
| 843 | } else { |
| 844 | /* Numbers greater than 2^23 with at most 23 significant bits*/ |
| 845 | uInteger <<= nSingleUnbiasedExponent - SINGLE_NUM_SIGNIFICAND_BITS; |
| 846 | } |
Laurence Lundblade | 14ce228 | 2024-07-24 22:13:35 -0700 | [diff] [blame] | 847 | if(bIsNegative) { |
| 848 | /* Cast safe because exponent range check above */ |
| 849 | if(nSingleUnbiasedExponent == 63) { |
| 850 | Result.integer.un_signed = uInteger; |
| 851 | Result.type = IEEE754_ToInt_IS_65BIT_NEG; |
| 852 | } else { |
| 853 | Result.integer.is_signed = -((int64_t)uInteger); |
| 854 | Result.type = IEEE754_ToInt_IS_INT; |
| 855 | } |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 856 | } else { |
| 857 | Result.integer.un_signed = uInteger; |
| 858 | Result.type = IEEE754_ToInt_IS_UINT; |
| 859 | } |
| 860 | } |
| 861 | } |
| 862 | |
| 863 | return Result; |
| 864 | } |
Laurence Lundblade | fe09bbf | 2020-07-16 12:14:51 -0700 | [diff] [blame] | 865 | |
Laurence Lundblade | d883ad3 | 2024-03-23 22:37:37 -0700 | [diff] [blame] | 866 | |
Laurence Lundblade | d883ad3 | 2024-03-23 22:37:37 -0700 | [diff] [blame] | 867 | /* Public function; see ieee754.h */ |
| 868 | double |
Laurence Lundblade | cb7282d | 2024-11-09 23:01:11 -0800 | [diff] [blame] | 869 | IEEE754_UintToDouble(const uint64_t uInt, const int nIsNegative) |
Laurence Lundblade | d883ad3 | 2024-03-23 22:37:37 -0700 | [diff] [blame] | 870 | { |
| 871 | int nDoubleUnbiasedExponent; |
| 872 | uint64_t uDoubleSignificand; |
| 873 | int nPrecisionBits; |
| 874 | |
Laurence Lundblade | 14ce228 | 2024-07-24 22:13:35 -0700 | [diff] [blame] | 875 | if(uInt == 0) { |
| 876 | uDoubleSignificand = 0; |
| 877 | nDoubleUnbiasedExponent = DOUBLE_EXPONENT_ZERO; |
| 878 | |
| 879 | } else { |
| 880 | /* Figure out the exponent and normalize the significand. This is |
| 881 | * done by shifting out all leading zero bits and counting them. If |
| 882 | * none are shifted out, the exponent is 63. */ |
| 883 | uDoubleSignificand = uInt; |
| 884 | nDoubleUnbiasedExponent = 63; |
| 885 | while(1) { |
| 886 | if(uDoubleSignificand & 0x8000000000000000UL) { |
| 887 | break; |
| 888 | } |
| 889 | uDoubleSignificand <<= 1; |
| 890 | nDoubleUnbiasedExponent--; |
| 891 | }; |
| 892 | |
| 893 | /* Position significand correctly for a double. Only shift 63 bits |
| 894 | * because of the 1 that is present by implication in IEEE 754.*/ |
| 895 | uDoubleSignificand >>= 63 - DOUBLE_NUM_SIGNIFICAND_BITS; |
| 896 | |
| 897 | /* Subtract 1 which is present by implication in IEEE 754 */ |
| 898 | uDoubleSignificand -= 1ULL << (DOUBLE_NUM_SIGNIFICAND_BITS); |
| 899 | |
| 900 | nPrecisionBits = IEEE754_Private_CountPrecisionBits(uInt) - (64 - nDoubleUnbiasedExponent); |
| 901 | |
| 902 | if(nPrecisionBits > DOUBLE_NUM_SIGNIFICAND_BITS) { |
| 903 | /* Will lose precision if converted */ |
| 904 | return IEEE754_UINT_TO_DOUBLE_OOB; |
Laurence Lundblade | d883ad3 | 2024-03-23 22:37:37 -0700 | [diff] [blame] | 905 | } |
Laurence Lundblade | d883ad3 | 2024-03-23 22:37:37 -0700 | [diff] [blame] | 906 | } |
| 907 | |
Laurence Lundblade | cb7282d | 2024-11-09 23:01:11 -0800 | [diff] [blame] | 908 | return IEEE754_AssembleDouble(nIsNegative, |
Laurence Lundblade | d883ad3 | 2024-03-23 22:37:37 -0700 | [diff] [blame] | 909 | uDoubleSignificand, |
| 910 | nDoubleUnbiasedExponent); |
| 911 | } |
| 912 | |
Laurence Lundblade | cb7282d | 2024-11-09 23:01:11 -0800 | [diff] [blame] | 913 | #endif /* ! QCBOR_DISABLE_PREFERRED_FLOAT */ |
| 914 | |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 915 | |
| 916 | |
| 917 | |
| 918 | /* Public function; see ieee754.h */ |
| 919 | int |
Laurence Lundblade | 9b2ae8a | 2024-07-12 11:00:20 -0700 | [diff] [blame] | 920 | IEEE754_DoubleHasNaNPayload(const double d) |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 921 | { |
| 922 | const uint64_t uDouble = CopyDoubleToUint64(d); |
| 923 | const uint64_t uDoubleBiasedExponent = (uDouble & DOUBLE_EXPONENT_MASK) >> DOUBLE_EXPONENT_SHIFT; |
| 924 | /* Cast safe because of mask above; exponents < DOUBLE_EXPONENT_MAX */ |
| 925 | const int64_t nDoubleUnbiasedExponent = (int64_t)uDoubleBiasedExponent - DOUBLE_EXPONENT_BIAS; |
| 926 | const uint64_t uDoubleSignificand = uDouble & DOUBLE_SIGNIFICAND_MASK; |
| 927 | |
| 928 | if(nDoubleUnbiasedExponent == DOUBLE_EXPONENT_INF_OR_NAN && |
| 929 | uDoubleSignificand != 0 && |
| 930 | uDoubleSignificand != DOUBLE_QUIET_NAN_BIT) { |
| 931 | return 1; |
| 932 | } else { |
| 933 | return 0; |
| 934 | } |
| 935 | } |
| 936 | |
| 937 | |
| 938 | /* Public function; see ieee754.h */ |
| 939 | int |
Laurence Lundblade | 9b2ae8a | 2024-07-12 11:00:20 -0700 | [diff] [blame] | 940 | IEEE754_SingleHasNaNPayload(const float f) |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 941 | { |
| 942 | const uint32_t uSingle = CopyFloatToUint32(f); |
| 943 | const uint32_t uSingleBiasedExponent = (uSingle & SINGLE_EXPONENT_MASK) >> SINGLE_EXPONENT_SHIFT; |
| 944 | /* Cast safe because of mask above; exponents < SINGLE_EXPONENT_MAX */ |
| 945 | const int32_t nSingleUnbiasedExponent = (int32_t)uSingleBiasedExponent - SINGLE_EXPONENT_BIAS; |
Laurence Lundblade | 9b2ae8a | 2024-07-12 11:00:20 -0700 | [diff] [blame] | 946 | const uint32_t uSingleSignificand = uSingle & SINGLE_SIGNIFICAND_MASK; |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 947 | |
| 948 | if(nSingleUnbiasedExponent == SINGLE_EXPONENT_INF_OR_NAN && |
Laurence Lundblade | 9b2ae8a | 2024-07-12 11:00:20 -0700 | [diff] [blame] | 949 | uSingleSignificand != 0 && |
| 950 | uSingleSignificand != SINGLE_QUIET_NAN_BIT) { |
Laurence Lundblade | eb3cdef | 2024-02-17 20:38:55 -0800 | [diff] [blame] | 951 | return 1; |
| 952 | } else { |
| 953 | return 0; |
| 954 | } |
| 955 | } |