blob: e41aef592921701ae4c8a2886cfea4332f78bf26 [file] [log] [blame]
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -07001/* ==========================================================================
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -08002 * ieee754.c -- floating-point conversion for half, double & single-precision
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -07003 *
4 * Copyright (c) 2018-2024, Laurence Lundblade. All rights reserved.
5 * Copyright (c) 2021, Arm Limited. All rights reserved.
6 *
7 * SPDX-License-Identifier: BSD-3-Clause
8 *
Laurence Lundbladee8f58162024-08-22 10:30:08 -07009 * See BSD-3-Clause license in file named "LICENSE"
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -070010 *
11 * Created on 7/23/18
12 * ========================================================================== */
Laurence Lundbladecc2ed342018-09-22 17:29:55 -070013
Máté Tóth-Pálef5f07a2021-09-17 19:31:37 +020014#include "qcbor/qcbor_common.h"
15
Laurence Lundblade12d32c52018-09-19 11:25:27 -070016#include "ieee754.h"
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -070017#include <string.h> /* For memcpy() */
Laurence Lundblade12d32c52018-09-19 11:25:27 -070018
Laurence Lundblade8db3d3e2018-09-29 11:46:37 -070019
Laurence Lundblade12d32c52018-09-19 11:25:27 -070020/*
Laurence Lundbladecb7282d2024-11-09 23:01:11 -080021 * This code has long lines and is easier to read because of
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -070022 * them. Some coding guidelines prefer 80 column lines (can they not
23 * afford big displays?).
24 *
25 * This code works solely using shifts and masks and thus has no
26 * dependency on any math libraries. It can even work if the CPU
27 * doesn't have any floating-point support, though that isn't the most
28 * useful thing to do.
29 *
30 * The memcpy() dependency is only for CopyFloatToUint32() and friends
31 * which only is needed to avoid type punning when converting the
32 * actual float bits to an unsigned value so the bit shifts and masks
33 * can work.
34 *
35 * The references used to write this code:
36 *
37 * IEEE 754-2008, particularly section 3.6 and 6.2.1
38 *
39 * https://en.wikipedia.org/wiki/IEEE_754 and subordinate pages
40 *
41 * https://stackoverflow.com/questions/19800415/why-does-ieee-754-reserve-so-many-nan-values
42 *
43 * https://stackoverflow.com/questions/46073295/implicit-type-promotion-rules
44 *
45 * https://stackoverflow.com/questions/589575/what-does-the-c-standard-state-the-size-of-int-long-type-to-be
46 *
47 * IEEE754_FloatToDouble(uint32_t uFloat) was created but is not
48 * needed. It can be retrieved from github history if needed.
Laurence Lundblade12d32c52018-09-19 11:25:27 -070049 */
50
51
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -070052
53
54/* ----- Half Precsion ----------- */
Laurence Lundblade12d32c52018-09-19 11:25:27 -070055#define HALF_NUM_SIGNIFICAND_BITS (10)
56#define HALF_NUM_EXPONENT_BITS (5)
57#define HALF_NUM_SIGN_BITS (1)
58
59#define HALF_SIGNIFICAND_SHIFT (0)
60#define HALF_EXPONENT_SHIFT (HALF_NUM_SIGNIFICAND_BITS)
61#define HALF_SIGN_SHIFT (HALF_NUM_SIGNIFICAND_BITS + HALF_NUM_EXPONENT_BITS)
62
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -070063#define HALF_SIGNIFICAND_MASK (0x3ffU) // The lower 10 bits
Laurence Lundblade06350ea2020-01-27 19:32:40 -080064#define HALF_EXPONENT_MASK (0x1fU << HALF_EXPONENT_SHIFT) // 0x7c00 5 bits of exponent
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -070065#define HALF_SIGN_MASK (0x01U << HALF_SIGN_SHIFT) // 0x8000 1 bit of sign
Laurence Lundblade06350ea2020-01-27 19:32:40 -080066#define HALF_QUIET_NAN_BIT (0x01U << (HALF_NUM_SIGNIFICAND_BITS-1)) // 0x0200
Laurence Lundblade12d32c52018-09-19 11:25:27 -070067
68/* Biased Biased Unbiased Use
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -070069 * 0x00 0 -15 0 and subnormal
70 * 0x01 1 -14 Smallest normal exponent
71 * 0x1e 30 15 Largest normal exponent
72 * 0x1F 31 16 NaN and Infinity */
Laurence Lundblade12d32c52018-09-19 11:25:27 -070073#define HALF_EXPONENT_BIAS (15)
74#define HALF_EXPONENT_MAX (HALF_EXPONENT_BIAS) // 15 Unbiased
75#define HALF_EXPONENT_MIN (-HALF_EXPONENT_BIAS+1) // -14 Unbiased
76#define HALF_EXPONENT_ZERO (-HALF_EXPONENT_BIAS) // -15 Unbiased
77#define HALF_EXPONENT_INF_OR_NAN (HALF_EXPONENT_BIAS+1) // 16 Unbiased
78
79
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -070080/* ------ Single-Precision -------- */
Laurence Lundblade12d32c52018-09-19 11:25:27 -070081#define SINGLE_NUM_SIGNIFICAND_BITS (23)
82#define SINGLE_NUM_EXPONENT_BITS (8)
83#define SINGLE_NUM_SIGN_BITS (1)
84
85#define SINGLE_SIGNIFICAND_SHIFT (0)
86#define SINGLE_EXPONENT_SHIFT (SINGLE_NUM_SIGNIFICAND_BITS)
87#define SINGLE_SIGN_SHIFT (SINGLE_NUM_SIGNIFICAND_BITS + SINGLE_NUM_EXPONENT_BITS)
88
Laurence Lundblade06350ea2020-01-27 19:32:40 -080089#define SINGLE_SIGNIFICAND_MASK (0x7fffffU) // The lower 23 bits
90#define SINGLE_EXPONENT_MASK (0xffU << SINGLE_EXPONENT_SHIFT) // 8 bits of exponent
91#define SINGLE_SIGN_MASK (0x01U << SINGLE_SIGN_SHIFT) // 1 bit of sign
92#define SINGLE_QUIET_NAN_BIT (0x01U << (SINGLE_NUM_SIGNIFICAND_BITS-1))
Laurence Lundblade12d32c52018-09-19 11:25:27 -070093
94/* Biased Biased Unbiased Use
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -070095 * 0x0000 0 -127 0 and subnormal
96 * 0x0001 1 -126 Smallest normal exponent
97 * 0x7f 127 0 1
98 * 0xfe 254 127 Largest normal exponent
99 * 0xff 255 128 NaN and Infinity */
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700100#define SINGLE_EXPONENT_BIAS (127)
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700101#define SINGLE_EXPONENT_MAX (SINGLE_EXPONENT_BIAS)
102#define SINGLE_EXPONENT_MIN (-SINGLE_EXPONENT_BIAS+1)
103#define SINGLE_EXPONENT_ZERO (-SINGLE_EXPONENT_BIAS)
104#define SINGLE_EXPONENT_INF_OR_NAN (SINGLE_EXPONENT_BIAS+1)
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700105
106
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700107/* --------- Double-Precision ---------- */
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700108#define DOUBLE_NUM_SIGNIFICAND_BITS (52)
109#define DOUBLE_NUM_EXPONENT_BITS (11)
110#define DOUBLE_NUM_SIGN_BITS (1)
111
112#define DOUBLE_SIGNIFICAND_SHIFT (0)
113#define DOUBLE_EXPONENT_SHIFT (DOUBLE_NUM_SIGNIFICAND_BITS)
114#define DOUBLE_SIGN_SHIFT (DOUBLE_NUM_SIGNIFICAND_BITS + DOUBLE_NUM_EXPONENT_BITS)
115
Laurence Lundblade8db3d3e2018-09-29 11:46:37 -0700116#define DOUBLE_SIGNIFICAND_MASK (0xfffffffffffffULL) // The lower 52 bits
117#define DOUBLE_EXPONENT_MASK (0x7ffULL << DOUBLE_EXPONENT_SHIFT) // 11 bits of exponent
118#define DOUBLE_SIGN_MASK (0x01ULL << DOUBLE_SIGN_SHIFT) // 1 bit of sign
119#define DOUBLE_QUIET_NAN_BIT (0x01ULL << (DOUBLE_NUM_SIGNIFICAND_BITS-1))
120
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700121
122/* Biased Biased Unbiased Use
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700123 * 0x00000000 0 -1023 0 and subnormal
124 * 0x00000001 1 -1022 Smallest normal exponent
125 * 0x000007fe 2046 1023 Largest normal exponent
126 * 0x000007ff 2047 1024 NaN and Infinity */
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700127#define DOUBLE_EXPONENT_BIAS (1023)
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700128#define DOUBLE_EXPONENT_MAX (DOUBLE_EXPONENT_BIAS)
129#define DOUBLE_EXPONENT_MIN (-DOUBLE_EXPONENT_BIAS+1)
130#define DOUBLE_EXPONENT_ZERO (-DOUBLE_EXPONENT_BIAS)
131#define DOUBLE_EXPONENT_INF_OR_NAN (DOUBLE_EXPONENT_BIAS+1)
132
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700133
134
135
136/*
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700137 * Convenient functions to avoid type punning, compiler warnings and
138 * such. The optimizer reduces them to a simple assignment. This is a
139 * crusty corner of C. It shouldn't be this hard.
140 *
141 * These are also in UsefulBuf.h under a different name. They are copied
142 * here to avoid a dependency on UsefulBuf.h. There is no object code
143 * size impact because these always optimze down to a simple assignment.
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700144 */
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700145static inline uint32_t
146CopyFloatToUint32(float f)
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700147{
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700148 uint32_t u32;
149 memcpy(&u32, &f, sizeof(uint32_t));
150 return u32;
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700151}
152
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700153static inline uint64_t
154CopyDoubleToUint64(double d)
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700155{
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700156 uint64_t u64;
157 memcpy(&u64, &d, sizeof(uint64_t));
158 return u64;
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700159}
160
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800161
162#ifndef QCBOR_DISABLE_PREFERRED_FLOAT
163
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700164static inline double
165CopyUint64ToDouble(uint64_t u64)
Laurence Lundblade67bd5512018-11-02 21:44:06 +0700166{
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700167 double d;
168 memcpy(&d, &u64, sizeof(uint64_t));
169 return d;
170}
171
172static inline float
173CopyUint32ToSingle(uint32_t u32)
174{
175 float f;
176 memcpy(&f, &u32, sizeof(uint32_t));
177 return f;
Laurence Lundblade67bd5512018-11-02 21:44:06 +0700178}
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700179
180
Laurence Lundblade3aee3a32018-12-17 16:17:45 -0800181
182
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700183/**
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800184 * @brief Assemble sign, significand and exponent into double precision float.
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700185 *
Laurence Lundbladecb7282d2024-11-09 23:01:11 -0800186 * @param[in] nIsNegative 0 if positive, 1 if negative.
187 * @pararm[in] uDoubleSignificand Bits of the significand.
188 * @param[in] nDoubleUnBiasedExponent Exponent.
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700189 *
190 * This returns the bits for a single-precision float, a binary64
191 * as specified in IEEE754.
Laurence Lundbladefe09bbf2020-07-16 12:14:51 -0700192 */
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700193static double
Laurence Lundbladecb7282d2024-11-09 23:01:11 -0800194IEEE754_AssembleDouble(int nIsNegative,
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700195 uint64_t uDoubleSignificand,
Laurence Lundbladecb7282d2024-11-09 23:01:11 -0800196 int nDoubleUnBiasedExponent)
Laurence Lundblade67bd5512018-11-02 21:44:06 +0700197{
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700198 uint64_t uDoubleBiasedExponent;
199
200 uDoubleBiasedExponent = (uint64_t)(nDoubleUnBiasedExponent + DOUBLE_EXPONENT_BIAS);
201
202 return CopyUint64ToDouble(uDoubleSignificand |
203 (uDoubleBiasedExponent << DOUBLE_EXPONENT_SHIFT) |
Laurence Lundbladecb7282d2024-11-09 23:01:11 -0800204 ((uint64_t)nIsNegative << DOUBLE_SIGN_SHIFT));
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700205}
Laurence Lundblade3aee3a32018-12-17 16:17:45 -0800206
207
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800208/* Public function; see ieee754.h */
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700209double
210IEEE754_HalfToDouble(uint16_t uHalfPrecision)
211{
212 uint64_t uDoubleSignificand;
Laurence Lundbladecb7282d2024-11-09 23:01:11 -0800213 int nDoubleUnBiasedExponent;
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700214 double dResult;
215
216 /* Pull out the three parts of the half-precision float. Do all
217 * the work in 64 bits because that is what the end result is. It
218 * may give smaller code size and will keep static analyzers
219 * happier.
220 */
221 const uint64_t uHalfSignificand = uHalfPrecision & HALF_SIGNIFICAND_MASK;
222 const uint64_t uHalfBiasedExponent = (uHalfPrecision & HALF_EXPONENT_MASK) >> HALF_EXPONENT_SHIFT;
Laurence Lundbladecb7282d2024-11-09 23:01:11 -0800223 const int nHalfUnBiasedExponent = (int)uHalfBiasedExponent - HALF_EXPONENT_BIAS;
224 const int nIsNegative = (uHalfPrecision & HALF_SIGN_MASK) >> HALF_SIGN_SHIFT;
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700225
226 if(nHalfUnBiasedExponent == HALF_EXPONENT_ZERO) {
227 /* 0 or subnormal */
228 if(uHalfSignificand) {
229 /* --- SUBNORMAL --- */
230 /* A half-precision subnormal can always be converted to a
231 * normal double-precision float because the ranges line up.
232 * The exponent of a subnormal starts out at the min exponent
233 * for a normal. As the sub normal significand bits are
234 * shifted, left to normalize, the exponent is
235 * decremented. Shifting continues until fully normalized.
236 */
237 nDoubleUnBiasedExponent = HALF_EXPONENT_MIN;
238 uDoubleSignificand = uHalfSignificand;
239 do {
240 uDoubleSignificand <<= 1;
241 nDoubleUnBiasedExponent--;
242 } while ((uDoubleSignificand & (1ULL << HALF_NUM_SIGNIFICAND_BITS)) == 0);
243 /* A normal has an implied 1 in the most significant
244 * position that a subnormal doesn't. */
245 uDoubleSignificand -= 1ULL << HALF_NUM_SIGNIFICAND_BITS;
246 /* Must shift into place for a double significand */
247 uDoubleSignificand <<= DOUBLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS;
248
Laurence Lundbladecb7282d2024-11-09 23:01:11 -0800249 dResult = IEEE754_AssembleDouble(nIsNegative,
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700250 uDoubleSignificand,
251 nDoubleUnBiasedExponent);
252 } else {
253 /* --- ZERO --- */
Laurence Lundbladecb7282d2024-11-09 23:01:11 -0800254 dResult = IEEE754_AssembleDouble(nIsNegative,
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700255 0,
256 DOUBLE_EXPONENT_ZERO);
257 }
258 } else if(nHalfUnBiasedExponent == HALF_EXPONENT_INF_OR_NAN) {
259 /* NaN or Inifinity */
260 if(uHalfSignificand) {
261 /* --- NaN --- */
262 /* Half-precision payloads always fit into double precision
263 * payloads. They are shifted left the same as a normal
264 * number significand.
265 */
266 uDoubleSignificand = uHalfSignificand << (DOUBLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
Laurence Lundbladecb7282d2024-11-09 23:01:11 -0800267 dResult = IEEE754_AssembleDouble(nIsNegative,
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700268 uDoubleSignificand,
269 DOUBLE_EXPONENT_INF_OR_NAN);
270 } else {
271 /* --- INFINITY --- */
Laurence Lundbladecb7282d2024-11-09 23:01:11 -0800272 dResult = IEEE754_AssembleDouble(nIsNegative,
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700273 0,
274 DOUBLE_EXPONENT_INF_OR_NAN);
275 }
276 } else {
277 /* --- NORMAL NUMBER --- */
278 uDoubleSignificand = uHalfSignificand << (DOUBLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
Laurence Lundbladecb7282d2024-11-09 23:01:11 -0800279 dResult = IEEE754_AssembleDouble(nIsNegative,
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700280 uDoubleSignificand,
281 nHalfUnBiasedExponent);
282 }
283
284 return dResult;
285}
286
287
288/**
289 * @brief Assemble sign, significand and exponent into single precision float.
290 *
Laurence Lundbladecb7282d2024-11-09 23:01:11 -0800291 * @param[in] nIsNegative 0 if positive, 1 if negative
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700292 * @pararm[in] uHalfSignificand Bits of the significand
293 * @param[in] nHalfUnBiasedExponent Exponent
294 *
295 * This returns the bits for a single-precision float, a binary32 as
296 * specified in IEEE754. It is returned as a uint64_t rather than a
297 * uint32_t or a float for convenience of usage.
298 */
299static uint32_t
Laurence Lundbladecb7282d2024-11-09 23:01:11 -0800300IEEE754_AssembleHalf(int nIsNegative,
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700301 uint32_t uHalfSignificand,
Laurence Lundbladecb7282d2024-11-09 23:01:11 -0800302 int nHalfUnBiasedExponent)
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700303{
304 uint32_t uHalfUnbiasedExponent;
305
306 uHalfUnbiasedExponent = (uint32_t)(nHalfUnBiasedExponent + HALF_EXPONENT_BIAS);
307
308 return uHalfSignificand |
309 (uHalfUnbiasedExponent << HALF_EXPONENT_SHIFT) |
Laurence Lundbladecb7282d2024-11-09 23:01:11 -0800310 ((uint32_t)nIsNegative << HALF_SIGN_SHIFT);
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700311}
312
313
314/* Public function; see ieee754.h */
315IEEE754_union
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800316IEEE754_SingleToHalf(const float f, const int bNoNaNPayload)
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700317{
318 IEEE754_union result;
319 uint32_t uDroppedBits;
Laurence Lundbladecb7282d2024-11-09 23:01:11 -0800320 int nExponentDifference;
321 int nShiftAmount;
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700322 uint32_t uHalfSignificand;
323
324 /* Pull the three parts out of the double-precision float Most work
325 * is done with uint32_t which helps avoid integer promotions and
326 * static analyzer complaints.
327 */
328 const uint32_t uSingle = CopyFloatToUint32(f);
329 const uint32_t uSingleBiasedExponent = (uSingle & SINGLE_EXPONENT_MASK) >> SINGLE_EXPONENT_SHIFT;
Laurence Lundbladecb7282d2024-11-09 23:01:11 -0800330 const int nSingleUnbiasedExponent = (int32_t)uSingleBiasedExponent - SINGLE_EXPONENT_BIAS;
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700331 const uint32_t uSingleSignificand = uSingle & SINGLE_SIGNIFICAND_MASK;
Laurence Lundbladecb7282d2024-11-09 23:01:11 -0800332 const int nIsNegative = (uSingle & SINGLE_SIGN_MASK) >> SINGLE_SIGN_SHIFT;
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700333
334 if(nSingleUnbiasedExponent == SINGLE_EXPONENT_ZERO) {
335 if(uSingleSignificand == 0) {
336 /* --- IS ZERO --- */
337 result.uSize = IEEE754_UNION_IS_HALF;
Laurence Lundbladecb7282d2024-11-09 23:01:11 -0800338 result.uValue = IEEE754_AssembleHalf(nIsNegative,
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700339 0,
340 HALF_EXPONENT_ZERO);
341 } else {
342 /* --- IS SINGLE SUBNORMAL --- */
343 /* The largest single subnormal is slightly less than the
344 * largest single normal which is 2^-149 or
345 * 2.2040517676619426e-38. The smallest half subnormal is
346 * 2^-14 or 5.9604644775390625E-8. There is no overlap so
347 * single subnormals can't be converted to halfs of any sort.
348 */
349 result.uSize = IEEE754_UNION_IS_SINGLE;
350 result.uValue = uSingle;
351 }
352 } else if(nSingleUnbiasedExponent == SINGLE_EXPONENT_INF_OR_NAN) {
353 if(uSingleSignificand == 0) {
354 /* ---- IS INFINITY ---- */
355 result.uSize = IEEE754_UNION_IS_HALF;
Laurence Lundbladecb7282d2024-11-09 23:01:11 -0800356 result.uValue = IEEE754_AssembleHalf(nIsNegative, 0, HALF_EXPONENT_INF_OR_NAN);
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700357 } else {
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800358 if(bNoNaNPayload) {
359 /* --- REQUIRE CANNONICAL NAN --- */
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700360 result.uSize = IEEE754_UNION_IS_HALF;
Laurence Lundbladecb7282d2024-11-09 23:01:11 -0800361 result.uValue = IEEE754_AssembleHalf(nIsNegative,
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800362 HALF_QUIET_NAN_BIT,
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700363 HALF_EXPONENT_INF_OR_NAN);
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700364 } else {
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800365 /* The NaN can only be converted if no payload bits are lost
366 * per RFC 8949 section 4.1 that defines Preferred
367 * Serializaton. Note that Deterministically Encode CBOR in
368 * section 4.2 allows for some variation of this rule, but at
369 * the moment this implementation is of Preferred
370 * Serialization, not CDE. As of December 2023, we are also
371 * expecting an update to CDE. This code may need to be
372 * updated for CDE.
373 */
374 uDroppedBits = uSingleSignificand & (SINGLE_SIGNIFICAND_MASK >> HALF_NUM_SIGNIFICAND_BITS);
375 if(uDroppedBits == 0) {
376 /* --- IS CONVERTABLE NAN --- */
377 uHalfSignificand = uSingleSignificand >> (SINGLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
378 result.uSize = IEEE754_UNION_IS_HALF;
Laurence Lundbladecb7282d2024-11-09 23:01:11 -0800379 result.uValue = IEEE754_AssembleHalf(nIsNegative,
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800380 uHalfSignificand,
381 HALF_EXPONENT_INF_OR_NAN);
382
383 } else {
384 /* --- IS UNCONVERTABLE NAN --- */
385 result.uSize = IEEE754_UNION_IS_SINGLE;
386 result.uValue = uSingle;
387 }
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700388 }
389 }
390 } else {
391 /* ---- REGULAR NUMBER ---- */
392 /* A regular single can be converted to a regular half if the
393 * single's exponent is in the smaller range of a half and if no
394 * precision is lost in the significand.
395 */
396 if(nSingleUnbiasedExponent >= HALF_EXPONENT_MIN &&
397 nSingleUnbiasedExponent <= HALF_EXPONENT_MAX &&
398 (uSingleSignificand & (SINGLE_SIGNIFICAND_MASK >> HALF_NUM_SIGNIFICAND_BITS)) == 0) {
399 uHalfSignificand = uSingleSignificand >> (SINGLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
400
401 /* --- CONVERT TO HALF NORMAL --- */
402 result.uSize = IEEE754_UNION_IS_HALF;
Laurence Lundbladecb7282d2024-11-09 23:01:11 -0800403 result.uValue = IEEE754_AssembleHalf(nIsNegative,
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700404 uHalfSignificand,
405 nSingleUnbiasedExponent);
406 } else {
407 /* Unable to convert to a half normal. See if it can be
408 * converted to a half subnormal. To do that, the exponent
409 * must be in range and no precision can be lost in the
410 * signficand.
411 *
412 * This is more complicated because the number is not
413 * normalized. The signficand must be shifted proprotionally
414 * to the exponent and 1 must be added in. See
415 * https://en.wikipedia.org/wiki/Single-precision_floating-point_format#Exponent_encoding
416 *
417 * Exponents -14 to -24 map to a shift of 0 to 10 of the
418 * significand. The largest value of a half subnormal has an
419 * exponent of -14. Subnormals are not normalized like
420 * normals meaning they lose precision as the numbers get
421 * smaller. Normals don't lose precision because the exponent
422 * allows all the bits of the significand to be significant.
423 */
424 /* The exponent of the largest possible half-precision
425 * subnormal is HALF_EXPONENT_MIN (-14). Exponents larger
426 * than this are normal and handled above. We're going to
427 * shift the significand right by at least this amount.
428 */
429 nExponentDifference = -(nSingleUnbiasedExponent - HALF_EXPONENT_MIN);
430
431 /* In addition to the shift based on the exponent's value,
432 * the single significand has to be shifted right to fit into
433 * a half-precision significand */
434 nShiftAmount = nExponentDifference + (SINGLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
435
436 /* Must add 1 in to the possible significand because there is
437 * an implied 1 for normal values and not for subnormal
438 * values. See equations here:
439 * https://en.wikipedia.org/wiki/Single-precision_floating-point_format#Exponent_encoding
440 */
441 uHalfSignificand = (uSingleSignificand + (1 << SINGLE_NUM_SIGNIFICAND_BITS)) >> nShiftAmount;
442
443 /* If only zero bits get shifted out, this can be converted
444 * to subnormal */
445 if(nSingleUnbiasedExponent < HALF_EXPONENT_MIN &&
446 nSingleUnbiasedExponent >= HALF_EXPONENT_MIN - HALF_NUM_SIGNIFICAND_BITS &&
447 uHalfSignificand << nShiftAmount == uSingleSignificand + (1 << SINGLE_NUM_SIGNIFICAND_BITS)) {
448 /* --- CONVERTABLE TO HALF SUBNORMAL --- */
449 result.uSize = IEEE754_UNION_IS_HALF;
Laurence Lundbladecb7282d2024-11-09 23:01:11 -0800450 result.uValue = IEEE754_AssembleHalf(nIsNegative,
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700451 uHalfSignificand,
452 HALF_EXPONENT_ZERO);
453 } else {
454 /* --- DO NOT CONVERT --- */
455 result.uSize = IEEE754_UNION_IS_SINGLE;
456 result.uValue = uSingle;
457 }
458 }
459 }
460
461 return result;
462}
463
464
465/**
466 * @brief Assemble sign, significand and exponent into single precision float.
467 *
Laurence Lundbladecb7282d2024-11-09 23:01:11 -0800468 * @param[in] nIsNegative 0 if positive, 1 if negative.
469 * @pararm[in] uSingleSignificand Bits of the significand.
470 * @param[in] nSingleUnBiasedExponent Exponent.
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700471 *
472 * This returns the bits for a single-precision float, a binary32 as
473 * specified in IEEE754. It is returned as a uint64_t rather than a
474 * uint32_t or a float for convenience of usage.
475 */
476static uint64_t
Laurence Lundbladecb7282d2024-11-09 23:01:11 -0800477IEEE754_AssembleSingle(int nIsNegative,
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700478 uint64_t uSingleSignificand,
Laurence Lundbladecb7282d2024-11-09 23:01:11 -0800479 int nSingleUnBiasedExponent)
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700480{
481 uint64_t uSingleBiasedExponent;
482
483 uSingleBiasedExponent = (uint64_t)(nSingleUnBiasedExponent + SINGLE_EXPONENT_BIAS);
484
485 return uSingleSignificand |
486 (uSingleBiasedExponent << SINGLE_EXPONENT_SHIFT) |
Laurence Lundbladecb7282d2024-11-09 23:01:11 -0800487 ((uint64_t)nIsNegative << SINGLE_SIGN_SHIFT);
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700488}
489
490
491/**
492 * @brief Convert a double-precision float to single-precision.
493 *
494 * @param[in] d The value to convert.
495 *
496 * @returns Either unconverted value or value converted to single-precision.
497 *
498 * This always succeeds. If the value cannot be converted without the
499 * loss of precision, it is not converted.
500 *
501 * This handles all subnormals and NaN payloads.
502 */
503static IEEE754_union
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800504IEEE754_DoubleToSingle(const double d)
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700505{
506 IEEE754_union Result;
Laurence Lundbladecb7282d2024-11-09 23:01:11 -0800507 int nExponentDifference;
508 int nShiftAmount;
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700509 uint64_t uSingleSignificand;
510 uint64_t uDroppedBits;
511
512
513 /* Pull the three parts out of the double-precision float. Most
514 * work is done with uint64_t which helps avoid integer promotions
515 * and static analyzer complaints.
516 */
517 const uint64_t uDouble = CopyDoubleToUint64(d);
518 const uint64_t uDoubleBiasedExponent = (uDouble & DOUBLE_EXPONENT_MASK) >> DOUBLE_EXPONENT_SHIFT;
Laurence Lundbladecb7282d2024-11-09 23:01:11 -0800519 const int nDoubleUnbiasedExponent = (int)uDoubleBiasedExponent - DOUBLE_EXPONENT_BIAS;
520 const int nIsNegative = (uDouble & DOUBLE_SIGN_MASK) >> DOUBLE_SIGN_SHIFT;
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700521 const uint64_t uDoubleSignificand = uDouble & DOUBLE_SIGNIFICAND_MASK;
522
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700523 if(nDoubleUnbiasedExponent == DOUBLE_EXPONENT_ZERO) {
524 if(uDoubleSignificand == 0) {
525 /* --- IS ZERO --- */
526 Result.uSize = IEEE754_UNION_IS_SINGLE;
Laurence Lundbladecb7282d2024-11-09 23:01:11 -0800527 Result.uValue = IEEE754_AssembleSingle(nIsNegative,
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700528 0,
529 SINGLE_EXPONENT_ZERO);
Laurence Lundblade67bd5512018-11-02 21:44:06 +0700530 } else {
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700531 /* --- IS DOUBLE SUBNORMAL --- */
532 /* The largest double subnormal is slightly less than the
533 * largest double normal which is 2^-1022 or
534 * 2.2250738585072014e-308. The smallest single subnormal
535 * is 2^-149 or 1.401298464324817e-45. There is no
536 * overlap so double subnormals can't be converted to
537 * singles of any sort.
538 */
539 Result.uSize = IEEE754_UNION_IS_DOUBLE;
540 Result.uValue = uDouble;
541 }
542 } else if(nDoubleUnbiasedExponent == DOUBLE_EXPONENT_INF_OR_NAN) {
543 if(uDoubleSignificand == 0) {
544 /* ---- IS INFINITY ---- */
545 Result.uSize = IEEE754_UNION_IS_SINGLE;
Laurence Lundbladecb7282d2024-11-09 23:01:11 -0800546 Result.uValue = IEEE754_AssembleSingle(nIsNegative,
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700547 0,
548 SINGLE_EXPONENT_INF_OR_NAN);
549 } else {
550 /* The NaN can only be converted if no payload bits are
551 * lost per RFC 8949 section 4.1 that defines Preferred
552 * Serializaton. Note that Deterministically Encode CBOR
553 * in section 4.2 allows for some variation of this rule,
554 * but at the moment this implementation is of Preferred
555 * Serialization, not CDE. As of December 2023, we are
556 * also expecting an update to CDE. This code may need to
557 * be updated for CDE.
558 */
559 uDroppedBits = uDoubleSignificand & (DOUBLE_SIGNIFICAND_MASK >> SINGLE_NUM_SIGNIFICAND_BITS);
560 if(uDroppedBits == 0) {
561 /* --- IS CONVERTABLE NAN --- */
562 uSingleSignificand = uDoubleSignificand >> (DOUBLE_NUM_SIGNIFICAND_BITS - SINGLE_NUM_SIGNIFICAND_BITS);
563 Result.uSize = IEEE754_UNION_IS_SINGLE;
Laurence Lundbladecb7282d2024-11-09 23:01:11 -0800564 Result.uValue = IEEE754_AssembleSingle(nIsNegative,
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700565 uSingleSignificand,
566 SINGLE_EXPONENT_INF_OR_NAN);
567 } else {
568 /* --- IS UNCONVERTABLE NAN --- */
569 Result.uSize = IEEE754_UNION_IS_DOUBLE;
570 Result.uValue = uDouble;
Laurence Lundblade67bd5512018-11-02 21:44:06 +0700571 }
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700572 }
573 } else {
574 /* ---- REGULAR NUMBER ---- */
575 /* A regular double can be converted to a regular single if
576 * the double's exponent is in the smaller range of a single
577 * and if no precision is lost in the significand.
578 */
579 uDroppedBits = uDoubleSignificand & (DOUBLE_SIGNIFICAND_MASK >> SINGLE_NUM_SIGNIFICAND_BITS);
580 if(nDoubleUnbiasedExponent >= SINGLE_EXPONENT_MIN &&
581 nDoubleUnbiasedExponent <= SINGLE_EXPONENT_MAX &&
582 uDroppedBits == 0) {
583 /* --- IS CONVERTABLE TO SINGLE --- */
584 uSingleSignificand = uDoubleSignificand >> (DOUBLE_NUM_SIGNIFICAND_BITS - SINGLE_NUM_SIGNIFICAND_BITS);
585 Result.uSize = IEEE754_UNION_IS_SINGLE;
Laurence Lundbladecb7282d2024-11-09 23:01:11 -0800586 Result.uValue = IEEE754_AssembleSingle(nIsNegative,
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700587 uSingleSignificand,
588 nDoubleUnbiasedExponent);
Laurence Lundblade67bd5512018-11-02 21:44:06 +0700589 } else {
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700590 /* Unable to convert to a single normal. See if it can be
591 * converted to a single subnormal. To do that, the
592 * exponent must be in range and no precision can be lost
593 * in the signficand.
594 *
595 * This is more complicated because the number is not
596 * normalized. The signficand must be shifted
597 * proprotionally to the exponent and 1 must be added
598 * in. See
599 * https://en.wikipedia.org/wiki/Single-precision_floating-point_format#Exponent_encoding
600 */
601 nExponentDifference = -(nDoubleUnbiasedExponent - SINGLE_EXPONENT_MIN);
602 nShiftAmount = nExponentDifference + (DOUBLE_NUM_SIGNIFICAND_BITS - SINGLE_NUM_SIGNIFICAND_BITS);
603 uSingleSignificand = (uDoubleSignificand + (1ULL << DOUBLE_NUM_SIGNIFICAND_BITS)) >> nShiftAmount;
604
605 if(nDoubleUnbiasedExponent < SINGLE_EXPONENT_MIN &&
606 nDoubleUnbiasedExponent >= SINGLE_EXPONENT_MIN - SINGLE_NUM_SIGNIFICAND_BITS &&
607 uSingleSignificand << nShiftAmount == uDoubleSignificand + (1ULL << DOUBLE_NUM_SIGNIFICAND_BITS)) {
608 /* --- IS CONVERTABLE TO SINGLE SUBNORMAL --- */
609 Result.uSize = IEEE754_UNION_IS_SINGLE;
Laurence Lundbladecb7282d2024-11-09 23:01:11 -0800610 Result.uValue = IEEE754_AssembleSingle(nIsNegative,
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700611 uSingleSignificand,
612 SINGLE_EXPONENT_ZERO);
613 } else {
614 /* --- CAN NOT BE CONVERTED --- */
615 Result.uSize = IEEE754_UNION_IS_DOUBLE;
616 Result.uValue = uDouble;
617 }
Laurence Lundblade67bd5512018-11-02 21:44:06 +0700618 }
Laurence Lundblade67bd5512018-11-02 21:44:06 +0700619 }
Laurence Lundblade3aee3a32018-12-17 16:17:45 -0800620
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700621 return Result;
Laurence Lundblade67bd5512018-11-02 21:44:06 +0700622}
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700623
624
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700625/* Public function; see ieee754.h */
626IEEE754_union
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800627IEEE754_DoubleToSmaller(const double d,
628 const int bAllowHalfPrecision,
629 const int bNoNanPayload)
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700630{
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700631 IEEE754_union result;
Laurence Lundblade3aee3a32018-12-17 16:17:45 -0800632
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700633 result = IEEE754_DoubleToSingle(d);
Laurence Lundblade3aee3a32018-12-17 16:17:45 -0800634
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700635 if(result.uSize == IEEE754_UNION_IS_SINGLE && bAllowHalfPrecision) {
636 /* Cast to uint32_t is OK, because value was just successfully
637 * converted to single. */
638 float uSingle = CopyUint32ToSingle((uint32_t)result.uValue);
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800639 result = IEEE754_SingleToHalf(uSingle, bNoNanPayload);
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700640 }
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700641
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700642 return result;
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700643}
644
Laurence Lundblade3aee3a32018-12-17 16:17:45 -0800645
Laurence Lundbladed883ad32024-03-23 22:37:37 -0700646
Laurence Lundbladecb7282d2024-11-09 23:01:11 -0800647
648/**
649 * @brief Count the bits of preceision in a significand.
650 *
651 * @param[in] uSignificand The significand as uint64_t.
652 *
653 * @return The number of bits set.
654
655 * This returns 64 minus the number of zero bits on the right. It is
Laurence Lundbladed883ad32024-03-23 22:37:37 -0700656 * is the amount of precision in the 64-bit significand passed in.
657 * When used for 52 and 23-bit significands, subtract 12 and 41
658 * to get their precision.
659 *
660 * The value returned is for a *normalized* number like the
661 * significand of a double. When used for precision for a non-normalized
662 * number like a uint64_t, further computation is required.
663 *
Laurence Lundbladecb7282d2024-11-09 23:01:11 -0800664 * If the significand is 0, then 0 is returned as the precision.
665 */
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800666static int
Laurence Lundbladecb7282d2024-11-09 23:01:11 -0800667IEEE754_Private_CountPrecisionBits(uint64_t uSignificand)
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800668{
669 int nNonZeroBitsCount;
670 uint64_t uMask;
Laurence Lundblade3aee3a32018-12-17 16:17:45 -0800671
Laurence Lundbladed883ad32024-03-23 22:37:37 -0700672 for(nNonZeroBitsCount = 64; nNonZeroBitsCount > 0; nNonZeroBitsCount--) {
673 uMask = 0x01UL << (64 - nNonZeroBitsCount);
Laurence Lundbladecb7282d2024-11-09 23:01:11 -0800674 if(uMask & uSignificand) {
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800675 break;
676 }
677 }
Laurence Lundbladed883ad32024-03-23 22:37:37 -0700678
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800679 return nNonZeroBitsCount;
680}
681
682
683/* Public function; see ieee754.h */
684struct IEEE754_ToInt
685IEEE754_DoubleToInt(const double d)
686{
Laurence Lundbladed883ad32024-03-23 22:37:37 -0700687 int64_t nPrecisionBits;
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800688 struct IEEE754_ToInt Result;
689 uint64_t uInteger;
690
691 /* Pull the three parts out of the double-precision float. Most
692 * work is done with uint64_t which helps avoid integer promotions
693 * and static analyzer complaints.
694 */
695 const uint64_t uDouble = CopyDoubleToUint64(d);
696 const uint64_t uDoubleBiasedExponent = (uDouble & DOUBLE_EXPONENT_MASK) >> DOUBLE_EXPONENT_SHIFT;
697 /* Cast safe because of mask above; exponents < DOUBLE_EXPONENT_MAX */
Laurence Lundbladecb7282d2024-11-09 23:01:11 -0800698 const int nDoubleUnbiasedExponent = (int)uDoubleBiasedExponent - DOUBLE_EXPONENT_BIAS;
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800699 const uint64_t uDoubleSignificand = uDouble & DOUBLE_SIGNIFICAND_MASK;
Laurence Lundblade14ce2282024-07-24 22:13:35 -0700700 const uint64_t bIsNegative = uDouble & DOUBLE_SIGN_MASK;
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800701
702 if(nDoubleUnbiasedExponent == DOUBLE_EXPONENT_ZERO) {
703 if(uDoubleSignificand == 0) {
704 /* --- POSITIVE AND NEGATIVE ZERO --- */
705 Result.integer.un_signed = 0;
706 Result.type = IEEE754_ToInt_IS_UINT;
707 } else {
708 /* --- SUBNORMAL --- */
709 Result.type = IEEE754_ToInt_NO_CONVERSION;
710 }
711 } else if(nDoubleUnbiasedExponent == DOUBLE_EXPONENT_INF_OR_NAN) {
712 if(uDoubleSignificand != 0) {
713 /* --- NAN --- */
714 Result.type = IEEE754_ToInt_NaN; /* dCBOR doesn't care about payload */
715 } else {
Laurence Lundblade14ce2282024-07-24 22:13:35 -0700716 /* --- INFINITY --- */
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800717 Result.type = IEEE754_ToInt_NO_CONVERSION;
718 }
Laurence Lundblade14ce2282024-07-24 22:13:35 -0700719 } else if(nDoubleUnbiasedExponent < 0) {
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800720 /* --- Exponent out of range --- */
721 Result.type = IEEE754_ToInt_NO_CONVERSION;
Laurence Lundblade14ce2282024-07-24 22:13:35 -0700722 } else if(nDoubleUnbiasedExponent >= 64) {
723 if(nDoubleUnbiasedExponent == 64 && uDoubleSignificand == 0 && bIsNegative) {
724 /* Very special case for -18446744073709551616.0 */
725 Result.integer.un_signed = 0; /* No negative 0, use it to indicate 2^64 */
726 Result.type = IEEE754_ToInt_IS_65BIT_NEG;
727 } else {
728 /* --- Exponent out of range --- */
729 Result.type = IEEE754_ToInt_NO_CONVERSION;
730 }
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800731 } else {
Laurence Lundbladed883ad32024-03-23 22:37:37 -0700732 /* Conversion only fails when the input is too large or is not a
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800733 * whole number, never because of lack of precision because
734 * 64-bit integers always have more precision than the 52-bits
735 * of a double.
736 */
Laurence Lundbladed883ad32024-03-23 22:37:37 -0700737 nPrecisionBits = IEEE754_Private_CountPrecisionBits(uDoubleSignificand) -
738 (64-DOUBLE_NUM_SIGNIFICAND_BITS);
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800739
Laurence Lundbladed883ad32024-03-23 22:37:37 -0700740 if(nPrecisionBits && nPrecisionBits > nDoubleUnbiasedExponent) {
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800741 /* --- Not a whole number --- */
742 Result.type = IEEE754_ToInt_NO_CONVERSION;
743 } else {
744 /* --- CONVERTABLE WHOLE NUMBER --- */
745 /* Add in the one that is implied in normal floats */
746 uInteger = uDoubleSignificand + (1ULL << DOUBLE_NUM_SIGNIFICAND_BITS);
747 /* Factor in the exponent */
748 if(nDoubleUnbiasedExponent < DOUBLE_NUM_SIGNIFICAND_BITS) {
749 /* Numbers less than 2^52 with up to 52 significant bits */
750 uInteger >>= DOUBLE_NUM_SIGNIFICAND_BITS - nDoubleUnbiasedExponent;
751 } else {
752 /* Numbers greater than 2^52 with at most 52 significant bits */
753 uInteger <<= nDoubleUnbiasedExponent - DOUBLE_NUM_SIGNIFICAND_BITS;
754 }
Laurence Lundblade14ce2282024-07-24 22:13:35 -0700755 if(bIsNegative) {
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800756 /* Cast safe because exponent range check above */
Laurence Lundblade14ce2282024-07-24 22:13:35 -0700757 if(nDoubleUnbiasedExponent == 63) {
758 Result.integer.un_signed = uInteger;
759 Result.type = IEEE754_ToInt_IS_65BIT_NEG;
760 } else {
761 Result.integer.is_signed = -((int64_t)uInteger);
762 Result.type = IEEE754_ToInt_IS_INT;
763 }
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800764 } else {
765 Result.integer.un_signed = uInteger;
766 Result.type = IEEE754_ToInt_IS_UINT;
767 }
768 }
769 }
770
771 return Result;
772}
773
774
775/* Public function; see ieee754.h */
776struct IEEE754_ToInt
777IEEE754_SingleToInt(const float f)
778{
Laurence Lundbladecb7282d2024-11-09 23:01:11 -0800779 int nPrecisionBits;
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800780 struct IEEE754_ToInt Result;
781 uint64_t uInteger;
782
783 /* Pull the three parts out of the single-precision float. Most
784 * work is done with uint32_t which helps avoid integer promotions
785 * and static analyzer complaints.
786 */
787 const uint32_t uSingle = CopyFloatToUint32(f);
788 const uint32_t uSingleBiasedExponent = (uSingle & SINGLE_EXPONENT_MASK) >> SINGLE_EXPONENT_SHIFT;
789 /* Cast safe because of mask above; exponents < SINGLE_EXPONENT_MAX */
Laurence Lundbladecb7282d2024-11-09 23:01:11 -0800790 const int nSingleUnbiasedExponent = (int)uSingleBiasedExponent - SINGLE_EXPONENT_BIAS;
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800791 const uint32_t uSingleleSignificand = uSingle & SINGLE_SIGNIFICAND_MASK;
Laurence Lundblade14ce2282024-07-24 22:13:35 -0700792 const uint64_t bIsNegative = uSingle & SINGLE_SIGN_MASK;
793
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800794
795 if(nSingleUnbiasedExponent == SINGLE_EXPONENT_ZERO) {
796 if(uSingleleSignificand == 0 && !(uSingle & SINGLE_SIGN_MASK)) {
797 /* --- POSITIVE AND NEGATIVE ZERO --- */
798 Result.integer.un_signed = 0;
799 Result.type = IEEE754_ToInt_IS_UINT;
800 } else {
801 /* --- Subnormal --- */
802 Result.type = IEEE754_ToInt_NO_CONVERSION;
803 }
804 } else if(nSingleUnbiasedExponent == SINGLE_EXPONENT_INF_OR_NAN) {
805 /* --- NAN or INFINITY --- */
806 if(uSingleleSignificand != 0) {
807 Result.type = IEEE754_ToInt_NaN; /* dCBOR doesn't care about payload */
808 } else {
809 Result.type = IEEE754_ToInt_NO_CONVERSION;
810 }
Laurence Lundblade14ce2282024-07-24 22:13:35 -0700811 } else if(nSingleUnbiasedExponent < 0) {
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800812 /* --- Exponent out of range --- */
813 Result.type = IEEE754_ToInt_NO_CONVERSION;
Laurence Lundblade14ce2282024-07-24 22:13:35 -0700814 } else if(nSingleUnbiasedExponent >= 64) {
815 if(nSingleUnbiasedExponent == 64 && uSingleleSignificand == 0 && bIsNegative) {
816 /* Very special case for -18446744073709551616.0 */
817 Result.integer.un_signed = 0; /* No negative 0, use it to indicate 2^64 */
818 Result.type = IEEE754_ToInt_IS_65BIT_NEG;
819 } else {
820 /* --- Exponent out of range --- */
821 Result.type = IEEE754_ToInt_NO_CONVERSION;
822 }
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800823 } else {
Laurence Lundbladed883ad32024-03-23 22:37:37 -0700824 /* Conversion only fails when the input is too large or is not a
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800825 * whole number, never because of lack of precision because
Laurence Lundbladed883ad32024-03-23 22:37:37 -0700826 * 64-bit integers always have more precision than the 23 bits
827 * of a single.
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800828 */
Laurence Lundbladed883ad32024-03-23 22:37:37 -0700829 nPrecisionBits = IEEE754_Private_CountPrecisionBits(uSingleleSignificand) -
830 (64 - SINGLE_NUM_SIGNIFICAND_BITS);
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800831
Laurence Lundbladed883ad32024-03-23 22:37:37 -0700832 if(nPrecisionBits && nPrecisionBits > nSingleUnbiasedExponent) {
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800833 /* --- Not a whole number --- */
834 Result.type = IEEE754_ToInt_NO_CONVERSION;
835 } else {
836 /* --- CONVERTABLE WHOLE NUMBER --- */
837 /* Add in the one that is implied in normal floats */
838 uInteger = uSingleleSignificand + (1ULL << SINGLE_NUM_SIGNIFICAND_BITS);
839 /* Factor in the exponent */
840 if(nSingleUnbiasedExponent < SINGLE_NUM_SIGNIFICAND_BITS) {
841 /* Numbers less than 2^23 with up to 23 significant bits */
842 uInteger >>= SINGLE_NUM_SIGNIFICAND_BITS - nSingleUnbiasedExponent;
843 } else {
844 /* Numbers greater than 2^23 with at most 23 significant bits*/
845 uInteger <<= nSingleUnbiasedExponent - SINGLE_NUM_SIGNIFICAND_BITS;
846 }
Laurence Lundblade14ce2282024-07-24 22:13:35 -0700847 if(bIsNegative) {
848 /* Cast safe because exponent range check above */
849 if(nSingleUnbiasedExponent == 63) {
850 Result.integer.un_signed = uInteger;
851 Result.type = IEEE754_ToInt_IS_65BIT_NEG;
852 } else {
853 Result.integer.is_signed = -((int64_t)uInteger);
854 Result.type = IEEE754_ToInt_IS_INT;
855 }
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800856 } else {
857 Result.integer.un_signed = uInteger;
858 Result.type = IEEE754_ToInt_IS_UINT;
859 }
860 }
861 }
862
863 return Result;
864}
Laurence Lundbladefe09bbf2020-07-16 12:14:51 -0700865
Laurence Lundbladed883ad32024-03-23 22:37:37 -0700866
Laurence Lundbladed883ad32024-03-23 22:37:37 -0700867/* Public function; see ieee754.h */
868double
Laurence Lundbladecb7282d2024-11-09 23:01:11 -0800869IEEE754_UintToDouble(const uint64_t uInt, const int nIsNegative)
Laurence Lundbladed883ad32024-03-23 22:37:37 -0700870{
871 int nDoubleUnbiasedExponent;
872 uint64_t uDoubleSignificand;
873 int nPrecisionBits;
874
Laurence Lundblade14ce2282024-07-24 22:13:35 -0700875 if(uInt == 0) {
876 uDoubleSignificand = 0;
877 nDoubleUnbiasedExponent = DOUBLE_EXPONENT_ZERO;
878
879 } else {
880 /* Figure out the exponent and normalize the significand. This is
881 * done by shifting out all leading zero bits and counting them. If
882 * none are shifted out, the exponent is 63. */
883 uDoubleSignificand = uInt;
884 nDoubleUnbiasedExponent = 63;
885 while(1) {
886 if(uDoubleSignificand & 0x8000000000000000UL) {
887 break;
888 }
889 uDoubleSignificand <<= 1;
890 nDoubleUnbiasedExponent--;
891 };
892
893 /* Position significand correctly for a double. Only shift 63 bits
894 * because of the 1 that is present by implication in IEEE 754.*/
895 uDoubleSignificand >>= 63 - DOUBLE_NUM_SIGNIFICAND_BITS;
896
897 /* Subtract 1 which is present by implication in IEEE 754 */
898 uDoubleSignificand -= 1ULL << (DOUBLE_NUM_SIGNIFICAND_BITS);
899
900 nPrecisionBits = IEEE754_Private_CountPrecisionBits(uInt) - (64 - nDoubleUnbiasedExponent);
901
902 if(nPrecisionBits > DOUBLE_NUM_SIGNIFICAND_BITS) {
903 /* Will lose precision if converted */
904 return IEEE754_UINT_TO_DOUBLE_OOB;
Laurence Lundbladed883ad32024-03-23 22:37:37 -0700905 }
Laurence Lundbladed883ad32024-03-23 22:37:37 -0700906 }
907
Laurence Lundbladecb7282d2024-11-09 23:01:11 -0800908 return IEEE754_AssembleDouble(nIsNegative,
Laurence Lundbladed883ad32024-03-23 22:37:37 -0700909 uDoubleSignificand,
910 nDoubleUnbiasedExponent);
911}
912
Laurence Lundbladecb7282d2024-11-09 23:01:11 -0800913#endif /* ! QCBOR_DISABLE_PREFERRED_FLOAT */
914
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800915
916
917
918/* Public function; see ieee754.h */
919int
Laurence Lundblade9b2ae8a2024-07-12 11:00:20 -0700920IEEE754_DoubleHasNaNPayload(const double d)
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800921{
922 const uint64_t uDouble = CopyDoubleToUint64(d);
923 const uint64_t uDoubleBiasedExponent = (uDouble & DOUBLE_EXPONENT_MASK) >> DOUBLE_EXPONENT_SHIFT;
924 /* Cast safe because of mask above; exponents < DOUBLE_EXPONENT_MAX */
925 const int64_t nDoubleUnbiasedExponent = (int64_t)uDoubleBiasedExponent - DOUBLE_EXPONENT_BIAS;
926 const uint64_t uDoubleSignificand = uDouble & DOUBLE_SIGNIFICAND_MASK;
927
928 if(nDoubleUnbiasedExponent == DOUBLE_EXPONENT_INF_OR_NAN &&
929 uDoubleSignificand != 0 &&
930 uDoubleSignificand != DOUBLE_QUIET_NAN_BIT) {
931 return 1;
932 } else {
933 return 0;
934 }
935}
936
937
938/* Public function; see ieee754.h */
939int
Laurence Lundblade9b2ae8a2024-07-12 11:00:20 -0700940IEEE754_SingleHasNaNPayload(const float f)
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800941{
942 const uint32_t uSingle = CopyFloatToUint32(f);
943 const uint32_t uSingleBiasedExponent = (uSingle & SINGLE_EXPONENT_MASK) >> SINGLE_EXPONENT_SHIFT;
944 /* Cast safe because of mask above; exponents < SINGLE_EXPONENT_MAX */
945 const int32_t nSingleUnbiasedExponent = (int32_t)uSingleBiasedExponent - SINGLE_EXPONENT_BIAS;
Laurence Lundblade9b2ae8a2024-07-12 11:00:20 -0700946 const uint32_t uSingleSignificand = uSingle & SINGLE_SIGNIFICAND_MASK;
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800947
948 if(nSingleUnbiasedExponent == SINGLE_EXPONENT_INF_OR_NAN &&
Laurence Lundblade9b2ae8a2024-07-12 11:00:20 -0700949 uSingleSignificand != 0 &&
950 uSingleSignificand != SINGLE_QUIET_NAN_BIT) {
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800951 return 1;
952 } else {
953 return 0;
954 }
955}