blob: 40421adc015376864d57864ac6737a1ec3959f29 [file] [log] [blame]
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -07001/* ==========================================================================
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -08002 * ieee754.c -- floating-point conversion for half, double & single-precision
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -07003 *
4 * Copyright (c) 2018-2024, Laurence Lundblade. All rights reserved.
5 * Copyright (c) 2021, Arm Limited. All rights reserved.
6 *
7 * SPDX-License-Identifier: BSD-3-Clause
8 *
9 * See BSD-3-Clause license in README.md
10 *
11 * Created on 7/23/18
12 * ========================================================================== */
Laurence Lundbladecc2ed342018-09-22 17:29:55 -070013
Máté Tóth-Pálef5f07a2021-09-17 19:31:37 +020014#include "qcbor/qcbor_common.h"
15
Laurence Lundblade12d32c52018-09-19 11:25:27 -070016#include "ieee754.h"
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -070017#include <string.h> /* For memcpy() */
Laurence Lundblade12d32c52018-09-19 11:25:27 -070018
Laurence Lundblade8db3d3e2018-09-29 11:46:37 -070019
Laurence Lundblade12d32c52018-09-19 11:25:27 -070020/*
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -080021 * This has long lines and is easier to read because of
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -070022 * them. Some coding guidelines prefer 80 column lines (can they not
23 * afford big displays?).
24 *
25 * This code works solely using shifts and masks and thus has no
26 * dependency on any math libraries. It can even work if the CPU
27 * doesn't have any floating-point support, though that isn't the most
28 * useful thing to do.
29 *
30 * The memcpy() dependency is only for CopyFloatToUint32() and friends
31 * which only is needed to avoid type punning when converting the
32 * actual float bits to an unsigned value so the bit shifts and masks
33 * can work.
34 *
35 * The references used to write this code:
36 *
37 * IEEE 754-2008, particularly section 3.6 and 6.2.1
38 *
39 * https://en.wikipedia.org/wiki/IEEE_754 and subordinate pages
40 *
41 * https://stackoverflow.com/questions/19800415/why-does-ieee-754-reserve-so-many-nan-values
42 *
43 * https://stackoverflow.com/questions/46073295/implicit-type-promotion-rules
44 *
45 * https://stackoverflow.com/questions/589575/what-does-the-c-standard-state-the-size-of-int-long-type-to-be
46 *
47 * IEEE754_FloatToDouble(uint32_t uFloat) was created but is not
48 * needed. It can be retrieved from github history if needed.
Laurence Lundblade12d32c52018-09-19 11:25:27 -070049 */
50
51
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -070052
53
54/* ----- Half Precsion ----------- */
Laurence Lundblade12d32c52018-09-19 11:25:27 -070055#define HALF_NUM_SIGNIFICAND_BITS (10)
56#define HALF_NUM_EXPONENT_BITS (5)
57#define HALF_NUM_SIGN_BITS (1)
58
59#define HALF_SIGNIFICAND_SHIFT (0)
60#define HALF_EXPONENT_SHIFT (HALF_NUM_SIGNIFICAND_BITS)
61#define HALF_SIGN_SHIFT (HALF_NUM_SIGNIFICAND_BITS + HALF_NUM_EXPONENT_BITS)
62
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -070063#define HALF_SIGNIFICAND_MASK (0x3ffU) // The lower 10 bits
Laurence Lundblade06350ea2020-01-27 19:32:40 -080064#define HALF_EXPONENT_MASK (0x1fU << HALF_EXPONENT_SHIFT) // 0x7c00 5 bits of exponent
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -070065#define HALF_SIGN_MASK (0x01U << HALF_SIGN_SHIFT) // 0x8000 1 bit of sign
Laurence Lundblade06350ea2020-01-27 19:32:40 -080066#define HALF_QUIET_NAN_BIT (0x01U << (HALF_NUM_SIGNIFICAND_BITS-1)) // 0x0200
Laurence Lundblade12d32c52018-09-19 11:25:27 -070067
68/* Biased Biased Unbiased Use
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -070069 * 0x00 0 -15 0 and subnormal
70 * 0x01 1 -14 Smallest normal exponent
71 * 0x1e 30 15 Largest normal exponent
72 * 0x1F 31 16 NaN and Infinity */
Laurence Lundblade12d32c52018-09-19 11:25:27 -070073#define HALF_EXPONENT_BIAS (15)
74#define HALF_EXPONENT_MAX (HALF_EXPONENT_BIAS) // 15 Unbiased
75#define HALF_EXPONENT_MIN (-HALF_EXPONENT_BIAS+1) // -14 Unbiased
76#define HALF_EXPONENT_ZERO (-HALF_EXPONENT_BIAS) // -15 Unbiased
77#define HALF_EXPONENT_INF_OR_NAN (HALF_EXPONENT_BIAS+1) // 16 Unbiased
78
79
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -070080/* ------ Single-Precision -------- */
Laurence Lundblade12d32c52018-09-19 11:25:27 -070081#define SINGLE_NUM_SIGNIFICAND_BITS (23)
82#define SINGLE_NUM_EXPONENT_BITS (8)
83#define SINGLE_NUM_SIGN_BITS (1)
84
85#define SINGLE_SIGNIFICAND_SHIFT (0)
86#define SINGLE_EXPONENT_SHIFT (SINGLE_NUM_SIGNIFICAND_BITS)
87#define SINGLE_SIGN_SHIFT (SINGLE_NUM_SIGNIFICAND_BITS + SINGLE_NUM_EXPONENT_BITS)
88
Laurence Lundblade06350ea2020-01-27 19:32:40 -080089#define SINGLE_SIGNIFICAND_MASK (0x7fffffU) // The lower 23 bits
90#define SINGLE_EXPONENT_MASK (0xffU << SINGLE_EXPONENT_SHIFT) // 8 bits of exponent
91#define SINGLE_SIGN_MASK (0x01U << SINGLE_SIGN_SHIFT) // 1 bit of sign
92#define SINGLE_QUIET_NAN_BIT (0x01U << (SINGLE_NUM_SIGNIFICAND_BITS-1))
Laurence Lundblade12d32c52018-09-19 11:25:27 -070093
94/* Biased Biased Unbiased Use
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -070095 * 0x0000 0 -127 0 and subnormal
96 * 0x0001 1 -126 Smallest normal exponent
97 * 0x7f 127 0 1
98 * 0xfe 254 127 Largest normal exponent
99 * 0xff 255 128 NaN and Infinity */
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700100#define SINGLE_EXPONENT_BIAS (127)
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700101#define SINGLE_EXPONENT_MAX (SINGLE_EXPONENT_BIAS)
102#define SINGLE_EXPONENT_MIN (-SINGLE_EXPONENT_BIAS+1)
103#define SINGLE_EXPONENT_ZERO (-SINGLE_EXPONENT_BIAS)
104#define SINGLE_EXPONENT_INF_OR_NAN (SINGLE_EXPONENT_BIAS+1)
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700105
106
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700107/* --------- Double-Precision ---------- */
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700108#define DOUBLE_NUM_SIGNIFICAND_BITS (52)
109#define DOUBLE_NUM_EXPONENT_BITS (11)
110#define DOUBLE_NUM_SIGN_BITS (1)
111
112#define DOUBLE_SIGNIFICAND_SHIFT (0)
113#define DOUBLE_EXPONENT_SHIFT (DOUBLE_NUM_SIGNIFICAND_BITS)
114#define DOUBLE_SIGN_SHIFT (DOUBLE_NUM_SIGNIFICAND_BITS + DOUBLE_NUM_EXPONENT_BITS)
115
Laurence Lundblade8db3d3e2018-09-29 11:46:37 -0700116#define DOUBLE_SIGNIFICAND_MASK (0xfffffffffffffULL) // The lower 52 bits
117#define DOUBLE_EXPONENT_MASK (0x7ffULL << DOUBLE_EXPONENT_SHIFT) // 11 bits of exponent
118#define DOUBLE_SIGN_MASK (0x01ULL << DOUBLE_SIGN_SHIFT) // 1 bit of sign
119#define DOUBLE_QUIET_NAN_BIT (0x01ULL << (DOUBLE_NUM_SIGNIFICAND_BITS-1))
120
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700121
122/* Biased Biased Unbiased Use
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700123 * 0x00000000 0 -1023 0 and subnormal
124 * 0x00000001 1 -1022 Smallest normal exponent
125 * 0x000007fe 2046 1023 Largest normal exponent
126 * 0x000007ff 2047 1024 NaN and Infinity */
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700127#define DOUBLE_EXPONENT_BIAS (1023)
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700128#define DOUBLE_EXPONENT_MAX (DOUBLE_EXPONENT_BIAS)
129#define DOUBLE_EXPONENT_MIN (-DOUBLE_EXPONENT_BIAS+1)
130#define DOUBLE_EXPONENT_ZERO (-DOUBLE_EXPONENT_BIAS)
131#define DOUBLE_EXPONENT_INF_OR_NAN (DOUBLE_EXPONENT_BIAS+1)
132
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700133
134
135
136/*
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700137 * Convenient functions to avoid type punning, compiler warnings and
138 * such. The optimizer reduces them to a simple assignment. This is a
139 * crusty corner of C. It shouldn't be this hard.
140 *
141 * These are also in UsefulBuf.h under a different name. They are copied
142 * here to avoid a dependency on UsefulBuf.h. There is no object code
143 * size impact because these always optimze down to a simple assignment.
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700144 */
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700145static inline uint32_t
146CopyFloatToUint32(float f)
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700147{
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700148 uint32_t u32;
149 memcpy(&u32, &f, sizeof(uint32_t));
150 return u32;
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700151}
152
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700153static inline uint64_t
154CopyDoubleToUint64(double d)
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700155{
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700156 uint64_t u64;
157 memcpy(&u64, &d, sizeof(uint64_t));
158 return u64;
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700159}
160
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800161
162#ifndef QCBOR_DISABLE_PREFERRED_FLOAT
163
164
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700165static inline double
166CopyUint64ToDouble(uint64_t u64)
Laurence Lundblade67bd5512018-11-02 21:44:06 +0700167{
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700168 double d;
169 memcpy(&d, &u64, sizeof(uint64_t));
170 return d;
171}
172
173static inline float
174CopyUint32ToSingle(uint32_t u32)
175{
176 float f;
177 memcpy(&f, &u32, sizeof(uint32_t));
178 return f;
Laurence Lundblade67bd5512018-11-02 21:44:06 +0700179}
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700180
181
Laurence Lundblade3aee3a32018-12-17 16:17:45 -0800182
183
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700184/**
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800185 * @brief Assemble sign, significand and exponent into double precision float.
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700186 *
187 * @param[in] uDoubleSign 0 if positive, 1 if negative
188 * @pararm[in] uDoubleSignificand Bits of the significand
189 * @param[in] nDoubleUnBiasedExponent Exponent
190 *
191 * This returns the bits for a single-precision float, a binary64
192 * as specified in IEEE754.
Laurence Lundbladefe09bbf2020-07-16 12:14:51 -0700193 */
Laurence Lundbladed883ad32024-03-23 22:37:37 -0700194// TODO: make the sign and exponent type int?
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700195static double
196IEEE754_AssembleDouble(uint64_t uDoubleSign,
197 uint64_t uDoubleSignificand,
198 int64_t nDoubleUnBiasedExponent)
Laurence Lundblade67bd5512018-11-02 21:44:06 +0700199{
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700200 uint64_t uDoubleBiasedExponent;
201
202 uDoubleBiasedExponent = (uint64_t)(nDoubleUnBiasedExponent + DOUBLE_EXPONENT_BIAS);
203
204 return CopyUint64ToDouble(uDoubleSignificand |
205 (uDoubleBiasedExponent << DOUBLE_EXPONENT_SHIFT) |
206 (uDoubleSign << DOUBLE_SIGN_SHIFT));
207}
Laurence Lundblade3aee3a32018-12-17 16:17:45 -0800208
209
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800210/* Public function; see ieee754.h */
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700211double
212IEEE754_HalfToDouble(uint16_t uHalfPrecision)
213{
214 uint64_t uDoubleSignificand;
215 int64_t nDoubleUnBiasedExponent;
216 double dResult;
217
218 /* Pull out the three parts of the half-precision float. Do all
219 * the work in 64 bits because that is what the end result is. It
220 * may give smaller code size and will keep static analyzers
221 * happier.
222 */
223 const uint64_t uHalfSignificand = uHalfPrecision & HALF_SIGNIFICAND_MASK;
224 const uint64_t uHalfBiasedExponent = (uHalfPrecision & HALF_EXPONENT_MASK) >> HALF_EXPONENT_SHIFT;
225 const int64_t nHalfUnBiasedExponent = (int64_t)uHalfBiasedExponent - HALF_EXPONENT_BIAS;
226 const uint64_t uHalfSign = (uHalfPrecision & HALF_SIGN_MASK) >> HALF_SIGN_SHIFT;
227
228 if(nHalfUnBiasedExponent == HALF_EXPONENT_ZERO) {
229 /* 0 or subnormal */
230 if(uHalfSignificand) {
231 /* --- SUBNORMAL --- */
232 /* A half-precision subnormal can always be converted to a
233 * normal double-precision float because the ranges line up.
234 * The exponent of a subnormal starts out at the min exponent
235 * for a normal. As the sub normal significand bits are
236 * shifted, left to normalize, the exponent is
237 * decremented. Shifting continues until fully normalized.
238 */
239 nDoubleUnBiasedExponent = HALF_EXPONENT_MIN;
240 uDoubleSignificand = uHalfSignificand;
241 do {
242 uDoubleSignificand <<= 1;
243 nDoubleUnBiasedExponent--;
244 } while ((uDoubleSignificand & (1ULL << HALF_NUM_SIGNIFICAND_BITS)) == 0);
245 /* A normal has an implied 1 in the most significant
246 * position that a subnormal doesn't. */
247 uDoubleSignificand -= 1ULL << HALF_NUM_SIGNIFICAND_BITS;
248 /* Must shift into place for a double significand */
249 uDoubleSignificand <<= DOUBLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS;
250
251 dResult = IEEE754_AssembleDouble(uHalfSign,
252 uDoubleSignificand,
253 nDoubleUnBiasedExponent);
254 } else {
255 /* --- ZERO --- */
256 dResult = IEEE754_AssembleDouble(uHalfSign,
257 0,
258 DOUBLE_EXPONENT_ZERO);
259 }
260 } else if(nHalfUnBiasedExponent == HALF_EXPONENT_INF_OR_NAN) {
261 /* NaN or Inifinity */
262 if(uHalfSignificand) {
263 /* --- NaN --- */
264 /* Half-precision payloads always fit into double precision
265 * payloads. They are shifted left the same as a normal
266 * number significand.
267 */
268 uDoubleSignificand = uHalfSignificand << (DOUBLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
269 dResult = IEEE754_AssembleDouble(uHalfSign,
270 uDoubleSignificand,
271 DOUBLE_EXPONENT_INF_OR_NAN);
272 } else {
273 /* --- INFINITY --- */
274 dResult = IEEE754_AssembleDouble(uHalfSign,
275 0,
276 DOUBLE_EXPONENT_INF_OR_NAN);
277 }
278 } else {
279 /* --- NORMAL NUMBER --- */
280 uDoubleSignificand = uHalfSignificand << (DOUBLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
281 dResult = IEEE754_AssembleDouble(uHalfSign,
282 uDoubleSignificand,
283 nHalfUnBiasedExponent);
284 }
285
286 return dResult;
287}
288
289
290/**
291 * @brief Assemble sign, significand and exponent into single precision float.
292 *
293 * @param[in] uHalfSign 0 if positive, 1 if negative
294 * @pararm[in] uHalfSignificand Bits of the significand
295 * @param[in] nHalfUnBiasedExponent Exponent
296 *
297 * This returns the bits for a single-precision float, a binary32 as
298 * specified in IEEE754. It is returned as a uint64_t rather than a
299 * uint32_t or a float for convenience of usage.
300 */
301static uint32_t
302IEEE754_AssembleHalf(uint32_t uHalfSign,
303 uint32_t uHalfSignificand,
304 int32_t nHalfUnBiasedExponent)
305{
306 uint32_t uHalfUnbiasedExponent;
307
308 uHalfUnbiasedExponent = (uint32_t)(nHalfUnBiasedExponent + HALF_EXPONENT_BIAS);
309
310 return uHalfSignificand |
311 (uHalfUnbiasedExponent << HALF_EXPONENT_SHIFT) |
312 (uHalfSign << HALF_SIGN_SHIFT);
313}
314
315
316/* Public function; see ieee754.h */
317IEEE754_union
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800318IEEE754_SingleToHalf(const float f, const int bNoNaNPayload)
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700319{
320 IEEE754_union result;
321 uint32_t uDroppedBits;
322 int32_t nExponentDifference;
323 int32_t nShiftAmount;
324 uint32_t uHalfSignificand;
325
326 /* Pull the three parts out of the double-precision float Most work
327 * is done with uint32_t which helps avoid integer promotions and
328 * static analyzer complaints.
329 */
330 const uint32_t uSingle = CopyFloatToUint32(f);
331 const uint32_t uSingleBiasedExponent = (uSingle & SINGLE_EXPONENT_MASK) >> SINGLE_EXPONENT_SHIFT;
332 const int32_t nSingleUnbiasedExponent = (int32_t)uSingleBiasedExponent - SINGLE_EXPONENT_BIAS;
333 const uint32_t uSingleSignificand = uSingle & SINGLE_SIGNIFICAND_MASK;
334 const uint32_t uSingleSign = (uSingle & SINGLE_SIGN_MASK) >> SINGLE_SIGN_SHIFT;
335
336 if(nSingleUnbiasedExponent == SINGLE_EXPONENT_ZERO) {
337 if(uSingleSignificand == 0) {
338 /* --- IS ZERO --- */
339 result.uSize = IEEE754_UNION_IS_HALF;
340 result.uValue = IEEE754_AssembleHalf(uSingleSign,
341 0,
342 HALF_EXPONENT_ZERO);
343 } else {
344 /* --- IS SINGLE SUBNORMAL --- */
345 /* The largest single subnormal is slightly less than the
346 * largest single normal which is 2^-149 or
347 * 2.2040517676619426e-38. The smallest half subnormal is
348 * 2^-14 or 5.9604644775390625E-8. There is no overlap so
349 * single subnormals can't be converted to halfs of any sort.
350 */
351 result.uSize = IEEE754_UNION_IS_SINGLE;
352 result.uValue = uSingle;
353 }
354 } else if(nSingleUnbiasedExponent == SINGLE_EXPONENT_INF_OR_NAN) {
355 if(uSingleSignificand == 0) {
356 /* ---- IS INFINITY ---- */
357 result.uSize = IEEE754_UNION_IS_HALF;
358 result.uValue = IEEE754_AssembleHalf(uSingleSign, 0, HALF_EXPONENT_INF_OR_NAN);
359 } else {
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800360 if(bNoNaNPayload) {
361 /* --- REQUIRE CANNONICAL NAN --- */
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700362 result.uSize = IEEE754_UNION_IS_HALF;
363 result.uValue = IEEE754_AssembleHalf(uSingleSign,
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800364 HALF_QUIET_NAN_BIT,
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700365 HALF_EXPONENT_INF_OR_NAN);
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700366 } else {
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800367 /* The NaN can only be converted if no payload bits are lost
368 * per RFC 8949 section 4.1 that defines Preferred
369 * Serializaton. Note that Deterministically Encode CBOR in
370 * section 4.2 allows for some variation of this rule, but at
371 * the moment this implementation is of Preferred
372 * Serialization, not CDE. As of December 2023, we are also
373 * expecting an update to CDE. This code may need to be
374 * updated for CDE.
375 */
376 uDroppedBits = uSingleSignificand & (SINGLE_SIGNIFICAND_MASK >> HALF_NUM_SIGNIFICAND_BITS);
377 if(uDroppedBits == 0) {
378 /* --- IS CONVERTABLE NAN --- */
379 uHalfSignificand = uSingleSignificand >> (SINGLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
380 result.uSize = IEEE754_UNION_IS_HALF;
381 result.uValue = IEEE754_AssembleHalf(uSingleSign,
382 uHalfSignificand,
383 HALF_EXPONENT_INF_OR_NAN);
384
385 } else {
386 /* --- IS UNCONVERTABLE NAN --- */
387 result.uSize = IEEE754_UNION_IS_SINGLE;
388 result.uValue = uSingle;
389 }
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700390 }
391 }
392 } else {
393 /* ---- REGULAR NUMBER ---- */
394 /* A regular single can be converted to a regular half if the
395 * single's exponent is in the smaller range of a half and if no
396 * precision is lost in the significand.
397 */
398 if(nSingleUnbiasedExponent >= HALF_EXPONENT_MIN &&
399 nSingleUnbiasedExponent <= HALF_EXPONENT_MAX &&
400 (uSingleSignificand & (SINGLE_SIGNIFICAND_MASK >> HALF_NUM_SIGNIFICAND_BITS)) == 0) {
401 uHalfSignificand = uSingleSignificand >> (SINGLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
402
403 /* --- CONVERT TO HALF NORMAL --- */
404 result.uSize = IEEE754_UNION_IS_HALF;
405 result.uValue = IEEE754_AssembleHalf(uSingleSign,
406 uHalfSignificand,
407 nSingleUnbiasedExponent);
408 } else {
409 /* Unable to convert to a half normal. See if it can be
410 * converted to a half subnormal. To do that, the exponent
411 * must be in range and no precision can be lost in the
412 * signficand.
413 *
414 * This is more complicated because the number is not
415 * normalized. The signficand must be shifted proprotionally
416 * to the exponent and 1 must be added in. See
417 * https://en.wikipedia.org/wiki/Single-precision_floating-point_format#Exponent_encoding
418 *
419 * Exponents -14 to -24 map to a shift of 0 to 10 of the
420 * significand. The largest value of a half subnormal has an
421 * exponent of -14. Subnormals are not normalized like
422 * normals meaning they lose precision as the numbers get
423 * smaller. Normals don't lose precision because the exponent
424 * allows all the bits of the significand to be significant.
425 */
426 /* The exponent of the largest possible half-precision
427 * subnormal is HALF_EXPONENT_MIN (-14). Exponents larger
428 * than this are normal and handled above. We're going to
429 * shift the significand right by at least this amount.
430 */
431 nExponentDifference = -(nSingleUnbiasedExponent - HALF_EXPONENT_MIN);
432
433 /* In addition to the shift based on the exponent's value,
434 * the single significand has to be shifted right to fit into
435 * a half-precision significand */
436 nShiftAmount = nExponentDifference + (SINGLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
437
438 /* Must add 1 in to the possible significand because there is
439 * an implied 1 for normal values and not for subnormal
440 * values. See equations here:
441 * https://en.wikipedia.org/wiki/Single-precision_floating-point_format#Exponent_encoding
442 */
443 uHalfSignificand = (uSingleSignificand + (1 << SINGLE_NUM_SIGNIFICAND_BITS)) >> nShiftAmount;
444
445 /* If only zero bits get shifted out, this can be converted
446 * to subnormal */
447 if(nSingleUnbiasedExponent < HALF_EXPONENT_MIN &&
448 nSingleUnbiasedExponent >= HALF_EXPONENT_MIN - HALF_NUM_SIGNIFICAND_BITS &&
449 uHalfSignificand << nShiftAmount == uSingleSignificand + (1 << SINGLE_NUM_SIGNIFICAND_BITS)) {
450 /* --- CONVERTABLE TO HALF SUBNORMAL --- */
451 result.uSize = IEEE754_UNION_IS_HALF;
452 result.uValue = IEEE754_AssembleHalf(uSingleSign,
453 uHalfSignificand,
454 HALF_EXPONENT_ZERO);
455 } else {
456 /* --- DO NOT CONVERT --- */
457 result.uSize = IEEE754_UNION_IS_SINGLE;
458 result.uValue = uSingle;
459 }
460 }
461 }
462
463 return result;
464}
465
466
467/**
468 * @brief Assemble sign, significand and exponent into single precision float.
469 *
470 * @param[in] uSingleSign 0 if positive, 1 if negative
471 * @pararm[in] uSingleSignificand Bits of the significand
472 * @param[in] nSingleUnBiasedExponent Exponent
473 *
474 * This returns the bits for a single-precision float, a binary32 as
475 * specified in IEEE754. It is returned as a uint64_t rather than a
476 * uint32_t or a float for convenience of usage.
477 */
478static uint64_t
479IEEE754_AssembleSingle(uint64_t uSingleSign,
480 uint64_t uSingleSignificand,
481 int64_t nSingleUnBiasedExponent)
482{
483 uint64_t uSingleBiasedExponent;
484
485 uSingleBiasedExponent = (uint64_t)(nSingleUnBiasedExponent + SINGLE_EXPONENT_BIAS);
486
487 return uSingleSignificand |
488 (uSingleBiasedExponent << SINGLE_EXPONENT_SHIFT) |
489 (uSingleSign << SINGLE_SIGN_SHIFT);
490}
491
492
493/**
494 * @brief Convert a double-precision float to single-precision.
495 *
496 * @param[in] d The value to convert.
497 *
498 * @returns Either unconverted value or value converted to single-precision.
499 *
500 * This always succeeds. If the value cannot be converted without the
501 * loss of precision, it is not converted.
502 *
503 * This handles all subnormals and NaN payloads.
504 */
505static IEEE754_union
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800506IEEE754_DoubleToSingle(const double d)
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700507{
508 IEEE754_union Result;
509 int64_t nExponentDifference;
510 int64_t nShiftAmount;
511 uint64_t uSingleSignificand;
512 uint64_t uDroppedBits;
513
514
515 /* Pull the three parts out of the double-precision float. Most
516 * work is done with uint64_t which helps avoid integer promotions
517 * and static analyzer complaints.
518 */
519 const uint64_t uDouble = CopyDoubleToUint64(d);
520 const uint64_t uDoubleBiasedExponent = (uDouble & DOUBLE_EXPONENT_MASK) >> DOUBLE_EXPONENT_SHIFT;
521 const int64_t nDoubleUnbiasedExponent = (int64_t)uDoubleBiasedExponent - DOUBLE_EXPONENT_BIAS;
522 const uint64_t uDoubleSign = (uDouble & DOUBLE_SIGN_MASK) >> DOUBLE_SIGN_SHIFT;
523 const uint64_t uDoubleSignificand = uDouble & DOUBLE_SIGNIFICAND_MASK;
524
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700525 if(nDoubleUnbiasedExponent == DOUBLE_EXPONENT_ZERO) {
526 if(uDoubleSignificand == 0) {
527 /* --- IS ZERO --- */
528 Result.uSize = IEEE754_UNION_IS_SINGLE;
529 Result.uValue = IEEE754_AssembleSingle(uDoubleSign,
530 0,
531 SINGLE_EXPONENT_ZERO);
Laurence Lundblade67bd5512018-11-02 21:44:06 +0700532 } else {
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700533 /* --- IS DOUBLE SUBNORMAL --- */
534 /* The largest double subnormal is slightly less than the
535 * largest double normal which is 2^-1022 or
536 * 2.2250738585072014e-308. The smallest single subnormal
537 * is 2^-149 or 1.401298464324817e-45. There is no
538 * overlap so double subnormals can't be converted to
539 * singles of any sort.
540 */
541 Result.uSize = IEEE754_UNION_IS_DOUBLE;
542 Result.uValue = uDouble;
543 }
544 } else if(nDoubleUnbiasedExponent == DOUBLE_EXPONENT_INF_OR_NAN) {
545 if(uDoubleSignificand == 0) {
546 /* ---- IS INFINITY ---- */
547 Result.uSize = IEEE754_UNION_IS_SINGLE;
548 Result.uValue = IEEE754_AssembleSingle(uDoubleSign,
549 0,
550 SINGLE_EXPONENT_INF_OR_NAN);
551 } else {
552 /* The NaN can only be converted if no payload bits are
553 * lost per RFC 8949 section 4.1 that defines Preferred
554 * Serializaton. Note that Deterministically Encode CBOR
555 * in section 4.2 allows for some variation of this rule,
556 * but at the moment this implementation is of Preferred
557 * Serialization, not CDE. As of December 2023, we are
558 * also expecting an update to CDE. This code may need to
559 * be updated for CDE.
560 */
561 uDroppedBits = uDoubleSignificand & (DOUBLE_SIGNIFICAND_MASK >> SINGLE_NUM_SIGNIFICAND_BITS);
562 if(uDroppedBits == 0) {
563 /* --- IS CONVERTABLE NAN --- */
564 uSingleSignificand = uDoubleSignificand >> (DOUBLE_NUM_SIGNIFICAND_BITS - SINGLE_NUM_SIGNIFICAND_BITS);
565 Result.uSize = IEEE754_UNION_IS_SINGLE;
566 Result.uValue = IEEE754_AssembleSingle(uDoubleSign,
567 uSingleSignificand,
568 SINGLE_EXPONENT_INF_OR_NAN);
569 } else {
570 /* --- IS UNCONVERTABLE NAN --- */
571 Result.uSize = IEEE754_UNION_IS_DOUBLE;
572 Result.uValue = uDouble;
Laurence Lundblade67bd5512018-11-02 21:44:06 +0700573 }
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700574 }
575 } else {
576 /* ---- REGULAR NUMBER ---- */
577 /* A regular double can be converted to a regular single if
578 * the double's exponent is in the smaller range of a single
579 * and if no precision is lost in the significand.
580 */
581 uDroppedBits = uDoubleSignificand & (DOUBLE_SIGNIFICAND_MASK >> SINGLE_NUM_SIGNIFICAND_BITS);
582 if(nDoubleUnbiasedExponent >= SINGLE_EXPONENT_MIN &&
583 nDoubleUnbiasedExponent <= SINGLE_EXPONENT_MAX &&
584 uDroppedBits == 0) {
585 /* --- IS CONVERTABLE TO SINGLE --- */
586 uSingleSignificand = uDoubleSignificand >> (DOUBLE_NUM_SIGNIFICAND_BITS - SINGLE_NUM_SIGNIFICAND_BITS);
587 Result.uSize = IEEE754_UNION_IS_SINGLE;
588 Result.uValue = IEEE754_AssembleSingle(uDoubleSign,
589 uSingleSignificand,
590 nDoubleUnbiasedExponent);
Laurence Lundblade67bd5512018-11-02 21:44:06 +0700591 } else {
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700592 /* Unable to convert to a single normal. See if it can be
593 * converted to a single subnormal. To do that, the
594 * exponent must be in range and no precision can be lost
595 * in the signficand.
596 *
597 * This is more complicated because the number is not
598 * normalized. The signficand must be shifted
599 * proprotionally to the exponent and 1 must be added
600 * in. See
601 * https://en.wikipedia.org/wiki/Single-precision_floating-point_format#Exponent_encoding
602 */
603 nExponentDifference = -(nDoubleUnbiasedExponent - SINGLE_EXPONENT_MIN);
604 nShiftAmount = nExponentDifference + (DOUBLE_NUM_SIGNIFICAND_BITS - SINGLE_NUM_SIGNIFICAND_BITS);
605 uSingleSignificand = (uDoubleSignificand + (1ULL << DOUBLE_NUM_SIGNIFICAND_BITS)) >> nShiftAmount;
606
607 if(nDoubleUnbiasedExponent < SINGLE_EXPONENT_MIN &&
608 nDoubleUnbiasedExponent >= SINGLE_EXPONENT_MIN - SINGLE_NUM_SIGNIFICAND_BITS &&
609 uSingleSignificand << nShiftAmount == uDoubleSignificand + (1ULL << DOUBLE_NUM_SIGNIFICAND_BITS)) {
610 /* --- IS CONVERTABLE TO SINGLE SUBNORMAL --- */
611 Result.uSize = IEEE754_UNION_IS_SINGLE;
612 Result.uValue = IEEE754_AssembleSingle(uDoubleSign,
613 uSingleSignificand,
614 SINGLE_EXPONENT_ZERO);
615 } else {
616 /* --- CAN NOT BE CONVERTED --- */
617 Result.uSize = IEEE754_UNION_IS_DOUBLE;
618 Result.uValue = uDouble;
619 }
Laurence Lundblade67bd5512018-11-02 21:44:06 +0700620 }
Laurence Lundblade67bd5512018-11-02 21:44:06 +0700621 }
Laurence Lundblade3aee3a32018-12-17 16:17:45 -0800622
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700623 return Result;
Laurence Lundblade67bd5512018-11-02 21:44:06 +0700624}
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700625
626
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700627/* Public function; see ieee754.h */
628IEEE754_union
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800629IEEE754_DoubleToSmaller(const double d,
630 const int bAllowHalfPrecision,
631 const int bNoNanPayload)
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700632{
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700633 IEEE754_union result;
Laurence Lundblade3aee3a32018-12-17 16:17:45 -0800634
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700635 result = IEEE754_DoubleToSingle(d);
Laurence Lundblade3aee3a32018-12-17 16:17:45 -0800636
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700637 if(result.uSize == IEEE754_UNION_IS_SINGLE && bAllowHalfPrecision) {
638 /* Cast to uint32_t is OK, because value was just successfully
639 * converted to single. */
640 float uSingle = CopyUint32ToSingle((uint32_t)result.uValue);
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800641 result = IEEE754_SingleToHalf(uSingle, bNoNanPayload);
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700642 }
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700643
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700644 return result;
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700645}
646
Laurence Lundblade3aee3a32018-12-17 16:17:45 -0800647
Laurence Lundbladed883ad32024-03-23 22:37:37 -0700648
649/* This returns 64 minus the number of zero bits on the right. It is
650 * is the amount of precision in the 64-bit significand passed in.
651 * When used for 52 and 23-bit significands, subtract 12 and 41
652 * to get their precision.
653 *
654 * The value returned is for a *normalized* number like the
655 * significand of a double. When used for precision for a non-normalized
656 * number like a uint64_t, further computation is required.
657 *
658 * If the significand is 0, then 0 is returned as the precision.*/
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800659static int
Laurence Lundbladed883ad32024-03-23 22:37:37 -0700660IEEE754_Private_CountPrecisionBits(uint64_t uSignigicand)
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800661{
662 int nNonZeroBitsCount;
663 uint64_t uMask;
Laurence Lundblade3aee3a32018-12-17 16:17:45 -0800664
Laurence Lundbladed883ad32024-03-23 22:37:37 -0700665 for(nNonZeroBitsCount = 64; nNonZeroBitsCount > 0; nNonZeroBitsCount--) {
666 uMask = 0x01UL << (64 - nNonZeroBitsCount);
667 if(uMask & uSignigicand) {
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800668 break;
669 }
670 }
Laurence Lundbladed883ad32024-03-23 22:37:37 -0700671
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800672 return nNonZeroBitsCount;
673}
674
675
Laurence Lundbladed883ad32024-03-23 22:37:37 -0700676
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800677/* Public function; see ieee754.h */
678struct IEEE754_ToInt
679IEEE754_DoubleToInt(const double d)
680{
Laurence Lundbladed883ad32024-03-23 22:37:37 -0700681 int64_t nPrecisionBits;
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800682 struct IEEE754_ToInt Result;
683 uint64_t uInteger;
684
685 /* Pull the three parts out of the double-precision float. Most
686 * work is done with uint64_t which helps avoid integer promotions
687 * and static analyzer complaints.
688 */
689 const uint64_t uDouble = CopyDoubleToUint64(d);
690 const uint64_t uDoubleBiasedExponent = (uDouble & DOUBLE_EXPONENT_MASK) >> DOUBLE_EXPONENT_SHIFT;
691 /* Cast safe because of mask above; exponents < DOUBLE_EXPONENT_MAX */
692 const int64_t nDoubleUnbiasedExponent = (int64_t)uDoubleBiasedExponent - DOUBLE_EXPONENT_BIAS;
693 const uint64_t uDoubleSignificand = uDouble & DOUBLE_SIGNIFICAND_MASK;
Laurence Lundblade3d0785e2024-04-22 10:31:40 -0700694 const uint64_t bIsNegative = uDouble & DOUBLE_SIGN_MASK;
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800695
696 if(nDoubleUnbiasedExponent == DOUBLE_EXPONENT_ZERO) {
697 if(uDoubleSignificand == 0) {
698 /* --- POSITIVE AND NEGATIVE ZERO --- */
699 Result.integer.un_signed = 0;
700 Result.type = IEEE754_ToInt_IS_UINT;
701 } else {
702 /* --- SUBNORMAL --- */
703 Result.type = IEEE754_ToInt_NO_CONVERSION;
704 }
705 } else if(nDoubleUnbiasedExponent == DOUBLE_EXPONENT_INF_OR_NAN) {
706 if(uDoubleSignificand != 0) {
707 /* --- NAN --- */
708 Result.type = IEEE754_ToInt_NaN; /* dCBOR doesn't care about payload */
709 } else {
Laurence Lundblade3d0785e2024-04-22 10:31:40 -0700710 /* --- INFINITY --- */
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800711 Result.type = IEEE754_ToInt_NO_CONVERSION;
712 }
Laurence Lundblade3d0785e2024-04-22 10:31:40 -0700713 } else if(nDoubleUnbiasedExponent < 0) {
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800714 /* --- Exponent out of range --- */
715 Result.type = IEEE754_ToInt_NO_CONVERSION;
Laurence Lundblade3d0785e2024-04-22 10:31:40 -0700716 } else if(nDoubleUnbiasedExponent >= 64) {
717 if(nDoubleUnbiasedExponent == 64 && uDoubleSignificand == 0 && bIsNegative) {
718 /* Very special case for -18446744073709551616.0 */
719 Result.integer.un_signed = 0; /* No negative 0, use it to indicate 2^64 */
720 Result.type = IEEE754_ToInt_IS_65BIT_NEG;
721 } else {
722 /* --- Exponent out of range --- */
723 Result.type = IEEE754_ToInt_NO_CONVERSION;
724 }
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800725 } else {
Laurence Lundbladed883ad32024-03-23 22:37:37 -0700726 /* Conversion only fails when the input is too large or is not a
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800727 * whole number, never because of lack of precision because
728 * 64-bit integers always have more precision than the 52-bits
729 * of a double.
730 */
Laurence Lundbladed883ad32024-03-23 22:37:37 -0700731 nPrecisionBits = IEEE754_Private_CountPrecisionBits(uDoubleSignificand) -
732 (64-DOUBLE_NUM_SIGNIFICAND_BITS);
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800733
Laurence Lundbladed883ad32024-03-23 22:37:37 -0700734 if(nPrecisionBits && nPrecisionBits > nDoubleUnbiasedExponent) {
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800735 /* --- Not a whole number --- */
736 Result.type = IEEE754_ToInt_NO_CONVERSION;
737 } else {
738 /* --- CONVERTABLE WHOLE NUMBER --- */
739 /* Add in the one that is implied in normal floats */
740 uInteger = uDoubleSignificand + (1ULL << DOUBLE_NUM_SIGNIFICAND_BITS);
741 /* Factor in the exponent */
742 if(nDoubleUnbiasedExponent < DOUBLE_NUM_SIGNIFICAND_BITS) {
743 /* Numbers less than 2^52 with up to 52 significant bits */
744 uInteger >>= DOUBLE_NUM_SIGNIFICAND_BITS - nDoubleUnbiasedExponent;
745 } else {
746 /* Numbers greater than 2^52 with at most 52 significant bits */
747 uInteger <<= nDoubleUnbiasedExponent - DOUBLE_NUM_SIGNIFICAND_BITS;
748 }
Laurence Lundblade3d0785e2024-04-22 10:31:40 -0700749 if(bIsNegative) {
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800750 /* Cast safe because exponent range check above */
Laurence Lundblade3d0785e2024-04-22 10:31:40 -0700751 if(nDoubleUnbiasedExponent == 63) {
752 Result.integer.un_signed = uInteger;
753 Result.type = IEEE754_ToInt_IS_65BIT_NEG;
754 } else {
755 Result.integer.is_signed = -((int64_t)uInteger);
756 Result.type = IEEE754_ToInt_IS_INT;
757 }
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800758 } else {
759 Result.integer.un_signed = uInteger;
760 Result.type = IEEE754_ToInt_IS_UINT;
761 }
762 }
763 }
764
765 return Result;
766}
767
768
769/* Public function; see ieee754.h */
770struct IEEE754_ToInt
771IEEE754_SingleToInt(const float f)
772{
Laurence Lundbladed883ad32024-03-23 22:37:37 -0700773 int32_t nPrecisionBits;
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800774 struct IEEE754_ToInt Result;
775 uint64_t uInteger;
776
777 /* Pull the three parts out of the single-precision float. Most
778 * work is done with uint32_t which helps avoid integer promotions
779 * and static analyzer complaints.
780 */
781 const uint32_t uSingle = CopyFloatToUint32(f);
782 const uint32_t uSingleBiasedExponent = (uSingle & SINGLE_EXPONENT_MASK) >> SINGLE_EXPONENT_SHIFT;
783 /* Cast safe because of mask above; exponents < SINGLE_EXPONENT_MAX */
784 const int32_t nSingleUnbiasedExponent = (int32_t)uSingleBiasedExponent - SINGLE_EXPONENT_BIAS;
785 const uint32_t uSingleleSignificand = uSingle & SINGLE_SIGNIFICAND_MASK;
Laurence Lundblade3d0785e2024-04-22 10:31:40 -0700786 const uint64_t bIsNegative = uSingle & SINGLE_SIGN_MASK;
787
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800788
789 if(nSingleUnbiasedExponent == SINGLE_EXPONENT_ZERO) {
790 if(uSingleleSignificand == 0 && !(uSingle & SINGLE_SIGN_MASK)) {
791 /* --- POSITIVE AND NEGATIVE ZERO --- */
792 Result.integer.un_signed = 0;
793 Result.type = IEEE754_ToInt_IS_UINT;
794 } else {
795 /* --- Subnormal --- */
796 Result.type = IEEE754_ToInt_NO_CONVERSION;
797 }
798 } else if(nSingleUnbiasedExponent == SINGLE_EXPONENT_INF_OR_NAN) {
799 /* --- NAN or INFINITY --- */
800 if(uSingleleSignificand != 0) {
801 Result.type = IEEE754_ToInt_NaN; /* dCBOR doesn't care about payload */
802 } else {
803 Result.type = IEEE754_ToInt_NO_CONVERSION;
804 }
Laurence Lundblade3d0785e2024-04-22 10:31:40 -0700805 } else if(nSingleUnbiasedExponent < 0) {
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800806 /* --- Exponent out of range --- */
807 Result.type = IEEE754_ToInt_NO_CONVERSION;
Laurence Lundblade3d0785e2024-04-22 10:31:40 -0700808 } else if(nSingleUnbiasedExponent >= 64) {
809 if(nSingleUnbiasedExponent == 64 && uSingleleSignificand == 0 && bIsNegative) {
810 /* Very special case for -18446744073709551616.0 */
811 Result.integer.un_signed = 0; /* No negative 0, use it to indicate 2^64 */
812 Result.type = IEEE754_ToInt_IS_65BIT_NEG;
813 } else {
814 /* --- Exponent out of range --- */
815 Result.type = IEEE754_ToInt_NO_CONVERSION;
816 }
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800817 } else {
Laurence Lundbladed883ad32024-03-23 22:37:37 -0700818 /* Conversion only fails when the input is too large or is not a
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800819 * whole number, never because of lack of precision because
Laurence Lundbladed883ad32024-03-23 22:37:37 -0700820 * 64-bit integers always have more precision than the 23 bits
821 * of a single.
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800822 */
Laurence Lundbladed883ad32024-03-23 22:37:37 -0700823 nPrecisionBits = IEEE754_Private_CountPrecisionBits(uSingleleSignificand) -
824 (64 - SINGLE_NUM_SIGNIFICAND_BITS);
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800825
Laurence Lundbladed883ad32024-03-23 22:37:37 -0700826 if(nPrecisionBits && nPrecisionBits > nSingleUnbiasedExponent) {
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800827 /* --- Not a whole number --- */
828 Result.type = IEEE754_ToInt_NO_CONVERSION;
829 } else {
830 /* --- CONVERTABLE WHOLE NUMBER --- */
831 /* Add in the one that is implied in normal floats */
832 uInteger = uSingleleSignificand + (1ULL << SINGLE_NUM_SIGNIFICAND_BITS);
833 /* Factor in the exponent */
834 if(nSingleUnbiasedExponent < SINGLE_NUM_SIGNIFICAND_BITS) {
835 /* Numbers less than 2^23 with up to 23 significant bits */
836 uInteger >>= SINGLE_NUM_SIGNIFICAND_BITS - nSingleUnbiasedExponent;
837 } else {
838 /* Numbers greater than 2^23 with at most 23 significant bits*/
839 uInteger <<= nSingleUnbiasedExponent - SINGLE_NUM_SIGNIFICAND_BITS;
840 }
Laurence Lundblade3d0785e2024-04-22 10:31:40 -0700841 if(bIsNegative) {
842 /* Cast safe because exponent range check above */
843 if(nSingleUnbiasedExponent == 63) {
844 Result.integer.un_signed = uInteger;
845 Result.type = IEEE754_ToInt_IS_65BIT_NEG;
846 } else {
847 Result.integer.is_signed = -((int64_t)uInteger);
848 Result.type = IEEE754_ToInt_IS_INT;
849 }
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800850 } else {
851 Result.integer.un_signed = uInteger;
852 Result.type = IEEE754_ToInt_IS_UINT;
853 }
854 }
855 }
856
857 return Result;
858}
Laurence Lundbladefe09bbf2020-07-16 12:14:51 -0700859
Laurence Lundbladed883ad32024-03-23 22:37:37 -0700860
861
862/* Public function; see ieee754.h */
863double
864IEEE754_UintToDouble(const uint64_t uInt, const int uIsNegative)
865{
866 int nDoubleUnbiasedExponent;
867 uint64_t uDoubleSignificand;
868 int nPrecisionBits;
869
Laurence Lundblade0bfa3cb2024-04-21 13:14:24 -0700870 if(uInt == 0) {
Laurence Lundblade9d04d2b2024-07-24 16:57:43 -0700871 uDoubleSignificand = 0;
872 nDoubleUnbiasedExponent = DOUBLE_EXPONENT_ZERO;
873
Laurence Lundblade0bfa3cb2024-04-21 13:14:24 -0700874 } else {
875 /* Figure out the exponent and normalize the significand. This is
876 * done by shifting out all leading zero bits and counting them. If
877 * none are shifted out, the exponent is 63. */
878 uDoubleSignificand = uInt;
879 nDoubleUnbiasedExponent = 63;
880 while(1) {
881 if(uDoubleSignificand & 0x8000000000000000UL) {
882 break;
883 }
884 uDoubleSignificand <<= 1;
885 nDoubleUnbiasedExponent--;
886 };
887
888 /* Position significand correctly for a double. Only shift 63 bits
889 * because of the 1 that is present by implication in IEEE 754.*/
890 uDoubleSignificand >>= 63 - DOUBLE_NUM_SIGNIFICAND_BITS;
891
892 /* Subtract 1 which is present by implication in IEEE 754 */
893 uDoubleSignificand -= 1ULL << (DOUBLE_NUM_SIGNIFICAND_BITS);
894
895 nPrecisionBits = IEEE754_Private_CountPrecisionBits(uInt) - (64 - nDoubleUnbiasedExponent);
896
897 if(nPrecisionBits > DOUBLE_NUM_SIGNIFICAND_BITS) {
898 /* Will lose precision if converted */
899 return IEEE754_UINT_TO_DOUBLE_OOB;
Laurence Lundbladed883ad32024-03-23 22:37:37 -0700900 }
Laurence Lundbladed883ad32024-03-23 22:37:37 -0700901 }
902
903 return IEEE754_AssembleDouble((uint64_t)uIsNegative,
904 uDoubleSignificand,
905 nDoubleUnbiasedExponent);
906}
907
Laurence Lundbladeb275cdc2020-07-12 12:34:38 -0700908#endif /* QCBOR_DISABLE_PREFERRED_FLOAT */
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800909
910
911
912/* Public function; see ieee754.h */
913int
Laurence Lundblade9b2ae8a2024-07-12 11:00:20 -0700914IEEE754_DoubleHasNaNPayload(const double d)
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800915{
916 const uint64_t uDouble = CopyDoubleToUint64(d);
917 const uint64_t uDoubleBiasedExponent = (uDouble & DOUBLE_EXPONENT_MASK) >> DOUBLE_EXPONENT_SHIFT;
918 /* Cast safe because of mask above; exponents < DOUBLE_EXPONENT_MAX */
919 const int64_t nDoubleUnbiasedExponent = (int64_t)uDoubleBiasedExponent - DOUBLE_EXPONENT_BIAS;
920 const uint64_t uDoubleSignificand = uDouble & DOUBLE_SIGNIFICAND_MASK;
921
922 if(nDoubleUnbiasedExponent == DOUBLE_EXPONENT_INF_OR_NAN &&
923 uDoubleSignificand != 0 &&
924 uDoubleSignificand != DOUBLE_QUIET_NAN_BIT) {
925 return 1;
926 } else {
927 return 0;
928 }
929}
930
931
932/* Public function; see ieee754.h */
933int
Laurence Lundblade9b2ae8a2024-07-12 11:00:20 -0700934IEEE754_SingleHasNaNPayload(const float f)
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800935{
936 const uint32_t uSingle = CopyFloatToUint32(f);
937 const uint32_t uSingleBiasedExponent = (uSingle & SINGLE_EXPONENT_MASK) >> SINGLE_EXPONENT_SHIFT;
938 /* Cast safe because of mask above; exponents < SINGLE_EXPONENT_MAX */
939 const int32_t nSingleUnbiasedExponent = (int32_t)uSingleBiasedExponent - SINGLE_EXPONENT_BIAS;
Laurence Lundblade9b2ae8a2024-07-12 11:00:20 -0700940 const uint32_t uSingleSignificand = uSingle & SINGLE_SIGNIFICAND_MASK;
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800941
942 if(nSingleUnbiasedExponent == SINGLE_EXPONENT_INF_OR_NAN &&
Laurence Lundblade9b2ae8a2024-07-12 11:00:20 -0700943 uSingleSignificand != 0 &&
944 uSingleSignificand != SINGLE_QUIET_NAN_BIT) {
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800945 return 1;
946 } else {
947 return 0;
948 }
949}
Laurence Lundblade9b2ae8a2024-07-12 11:00:20 -0700950