blob: 002ca40cfff21095e4cd2fd966cf450fed30091c [file] [log] [blame]
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -07001/* ==========================================================================
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -08002 * ieee754.c -- floating-point conversion for half, double & single-precision
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -07003 *
4 * Copyright (c) 2018-2024, Laurence Lundblade. All rights reserved.
5 * Copyright (c) 2021, Arm Limited. All rights reserved.
6 *
7 * SPDX-License-Identifier: BSD-3-Clause
8 *
9 * See BSD-3-Clause license in README.md
10 *
11 * Created on 7/23/18
12 * ========================================================================== */
Laurence Lundbladecc2ed342018-09-22 17:29:55 -070013
Máté Tóth-Pálef5f07a2021-09-17 19:31:37 +020014#include "qcbor/qcbor_common.h"
15
Laurence Lundblade12d32c52018-09-19 11:25:27 -070016#include "ieee754.h"
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -070017#include <string.h> /* For memcpy() */
Laurence Lundblade12d32c52018-09-19 11:25:27 -070018
Laurence Lundblade8db3d3e2018-09-29 11:46:37 -070019
Laurence Lundblade12d32c52018-09-19 11:25:27 -070020/*
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -080021 * This has long lines and is easier to read because of
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -070022 * them. Some coding guidelines prefer 80 column lines (can they not
23 * afford big displays?).
24 *
25 * This code works solely using shifts and masks and thus has no
26 * dependency on any math libraries. It can even work if the CPU
27 * doesn't have any floating-point support, though that isn't the most
28 * useful thing to do.
29 *
30 * The memcpy() dependency is only for CopyFloatToUint32() and friends
31 * which only is needed to avoid type punning when converting the
32 * actual float bits to an unsigned value so the bit shifts and masks
33 * can work.
34 *
35 * The references used to write this code:
36 *
37 * IEEE 754-2008, particularly section 3.6 and 6.2.1
38 *
39 * https://en.wikipedia.org/wiki/IEEE_754 and subordinate pages
40 *
41 * https://stackoverflow.com/questions/19800415/why-does-ieee-754-reserve-so-many-nan-values
42 *
43 * https://stackoverflow.com/questions/46073295/implicit-type-promotion-rules
44 *
45 * https://stackoverflow.com/questions/589575/what-does-the-c-standard-state-the-size-of-int-long-type-to-be
46 *
47 * IEEE754_FloatToDouble(uint32_t uFloat) was created but is not
48 * needed. It can be retrieved from github history if needed.
Laurence Lundblade12d32c52018-09-19 11:25:27 -070049 */
50
51
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -070052
53
54/* ----- Half Precsion ----------- */
Laurence Lundblade12d32c52018-09-19 11:25:27 -070055#define HALF_NUM_SIGNIFICAND_BITS (10)
56#define HALF_NUM_EXPONENT_BITS (5)
57#define HALF_NUM_SIGN_BITS (1)
58
59#define HALF_SIGNIFICAND_SHIFT (0)
60#define HALF_EXPONENT_SHIFT (HALF_NUM_SIGNIFICAND_BITS)
61#define HALF_SIGN_SHIFT (HALF_NUM_SIGNIFICAND_BITS + HALF_NUM_EXPONENT_BITS)
62
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -070063#define HALF_SIGNIFICAND_MASK (0x3ffU) // The lower 10 bits
Laurence Lundblade06350ea2020-01-27 19:32:40 -080064#define HALF_EXPONENT_MASK (0x1fU << HALF_EXPONENT_SHIFT) // 0x7c00 5 bits of exponent
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -070065#define HALF_SIGN_MASK (0x01U << HALF_SIGN_SHIFT) // 0x8000 1 bit of sign
Laurence Lundblade06350ea2020-01-27 19:32:40 -080066#define HALF_QUIET_NAN_BIT (0x01U << (HALF_NUM_SIGNIFICAND_BITS-1)) // 0x0200
Laurence Lundblade12d32c52018-09-19 11:25:27 -070067
68/* Biased Biased Unbiased Use
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -070069 * 0x00 0 -15 0 and subnormal
70 * 0x01 1 -14 Smallest normal exponent
71 * 0x1e 30 15 Largest normal exponent
72 * 0x1F 31 16 NaN and Infinity */
Laurence Lundblade12d32c52018-09-19 11:25:27 -070073#define HALF_EXPONENT_BIAS (15)
74#define HALF_EXPONENT_MAX (HALF_EXPONENT_BIAS) // 15 Unbiased
75#define HALF_EXPONENT_MIN (-HALF_EXPONENT_BIAS+1) // -14 Unbiased
76#define HALF_EXPONENT_ZERO (-HALF_EXPONENT_BIAS) // -15 Unbiased
77#define HALF_EXPONENT_INF_OR_NAN (HALF_EXPONENT_BIAS+1) // 16 Unbiased
78
79
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -070080/* ------ Single-Precision -------- */
Laurence Lundblade12d32c52018-09-19 11:25:27 -070081#define SINGLE_NUM_SIGNIFICAND_BITS (23)
82#define SINGLE_NUM_EXPONENT_BITS (8)
83#define SINGLE_NUM_SIGN_BITS (1)
84
85#define SINGLE_SIGNIFICAND_SHIFT (0)
86#define SINGLE_EXPONENT_SHIFT (SINGLE_NUM_SIGNIFICAND_BITS)
87#define SINGLE_SIGN_SHIFT (SINGLE_NUM_SIGNIFICAND_BITS + SINGLE_NUM_EXPONENT_BITS)
88
Laurence Lundblade06350ea2020-01-27 19:32:40 -080089#define SINGLE_SIGNIFICAND_MASK (0x7fffffU) // The lower 23 bits
90#define SINGLE_EXPONENT_MASK (0xffU << SINGLE_EXPONENT_SHIFT) // 8 bits of exponent
91#define SINGLE_SIGN_MASK (0x01U << SINGLE_SIGN_SHIFT) // 1 bit of sign
92#define SINGLE_QUIET_NAN_BIT (0x01U << (SINGLE_NUM_SIGNIFICAND_BITS-1))
Laurence Lundblade12d32c52018-09-19 11:25:27 -070093
94/* Biased Biased Unbiased Use
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -070095 * 0x0000 0 -127 0 and subnormal
96 * 0x0001 1 -126 Smallest normal exponent
97 * 0x7f 127 0 1
98 * 0xfe 254 127 Largest normal exponent
99 * 0xff 255 128 NaN and Infinity */
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700100#define SINGLE_EXPONENT_BIAS (127)
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700101#define SINGLE_EXPONENT_MAX (SINGLE_EXPONENT_BIAS)
102#define SINGLE_EXPONENT_MIN (-SINGLE_EXPONENT_BIAS+1)
103#define SINGLE_EXPONENT_ZERO (-SINGLE_EXPONENT_BIAS)
104#define SINGLE_EXPONENT_INF_OR_NAN (SINGLE_EXPONENT_BIAS+1)
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700105
106
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700107/* --------- Double-Precision ---------- */
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700108#define DOUBLE_NUM_SIGNIFICAND_BITS (52)
109#define DOUBLE_NUM_EXPONENT_BITS (11)
110#define DOUBLE_NUM_SIGN_BITS (1)
111
112#define DOUBLE_SIGNIFICAND_SHIFT (0)
113#define DOUBLE_EXPONENT_SHIFT (DOUBLE_NUM_SIGNIFICAND_BITS)
114#define DOUBLE_SIGN_SHIFT (DOUBLE_NUM_SIGNIFICAND_BITS + DOUBLE_NUM_EXPONENT_BITS)
115
Laurence Lundblade8db3d3e2018-09-29 11:46:37 -0700116#define DOUBLE_SIGNIFICAND_MASK (0xfffffffffffffULL) // The lower 52 bits
117#define DOUBLE_EXPONENT_MASK (0x7ffULL << DOUBLE_EXPONENT_SHIFT) // 11 bits of exponent
118#define DOUBLE_SIGN_MASK (0x01ULL << DOUBLE_SIGN_SHIFT) // 1 bit of sign
119#define DOUBLE_QUIET_NAN_BIT (0x01ULL << (DOUBLE_NUM_SIGNIFICAND_BITS-1))
120
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700121
122/* Biased Biased Unbiased Use
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700123 * 0x00000000 0 -1023 0 and subnormal
124 * 0x00000001 1 -1022 Smallest normal exponent
125 * 0x000007fe 2046 1023 Largest normal exponent
126 * 0x000007ff 2047 1024 NaN and Infinity */
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700127#define DOUBLE_EXPONENT_BIAS (1023)
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700128#define DOUBLE_EXPONENT_MAX (DOUBLE_EXPONENT_BIAS)
129#define DOUBLE_EXPONENT_MIN (-DOUBLE_EXPONENT_BIAS+1)
130#define DOUBLE_EXPONENT_ZERO (-DOUBLE_EXPONENT_BIAS)
131#define DOUBLE_EXPONENT_INF_OR_NAN (DOUBLE_EXPONENT_BIAS+1)
132
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700133
134
135
136/*
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700137 * Convenient functions to avoid type punning, compiler warnings and
138 * such. The optimizer reduces them to a simple assignment. This is a
139 * crusty corner of C. It shouldn't be this hard.
140 *
141 * These are also in UsefulBuf.h under a different name. They are copied
142 * here to avoid a dependency on UsefulBuf.h. There is no object code
143 * size impact because these always optimze down to a simple assignment.
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700144 */
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700145static inline uint32_t
146CopyFloatToUint32(float f)
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700147{
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700148 uint32_t u32;
149 memcpy(&u32, &f, sizeof(uint32_t));
150 return u32;
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700151}
152
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700153static inline uint64_t
154CopyDoubleToUint64(double d)
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700155{
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700156 uint64_t u64;
157 memcpy(&u64, &d, sizeof(uint64_t));
158 return u64;
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700159}
160
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800161
162#ifndef QCBOR_DISABLE_PREFERRED_FLOAT
163
164
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700165static inline double
166CopyUint64ToDouble(uint64_t u64)
Laurence Lundblade67bd5512018-11-02 21:44:06 +0700167{
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700168 double d;
169 memcpy(&d, &u64, sizeof(uint64_t));
170 return d;
171}
172
173static inline float
174CopyUint32ToSingle(uint32_t u32)
175{
176 float f;
177 memcpy(&f, &u32, sizeof(uint32_t));
178 return f;
Laurence Lundblade67bd5512018-11-02 21:44:06 +0700179}
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700180
181
Laurence Lundblade3aee3a32018-12-17 16:17:45 -0800182
183
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700184/**
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800185 * @brief Assemble sign, significand and exponent into double precision float.
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700186 *
187 * @param[in] uDoubleSign 0 if positive, 1 if negative
188 * @pararm[in] uDoubleSignificand Bits of the significand
189 * @param[in] nDoubleUnBiasedExponent Exponent
190 *
191 * This returns the bits for a single-precision float, a binary64
192 * as specified in IEEE754.
Laurence Lundbladefe09bbf2020-07-16 12:14:51 -0700193 */
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700194static double
195IEEE754_AssembleDouble(uint64_t uDoubleSign,
196 uint64_t uDoubleSignificand,
197 int64_t nDoubleUnBiasedExponent)
Laurence Lundblade67bd5512018-11-02 21:44:06 +0700198{
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700199 uint64_t uDoubleBiasedExponent;
200
201 uDoubleBiasedExponent = (uint64_t)(nDoubleUnBiasedExponent + DOUBLE_EXPONENT_BIAS);
202
203 return CopyUint64ToDouble(uDoubleSignificand |
204 (uDoubleBiasedExponent << DOUBLE_EXPONENT_SHIFT) |
205 (uDoubleSign << DOUBLE_SIGN_SHIFT));
206}
Laurence Lundblade3aee3a32018-12-17 16:17:45 -0800207
208
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800209/* Public function; see ieee754.h */
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700210double
211IEEE754_HalfToDouble(uint16_t uHalfPrecision)
212{
213 uint64_t uDoubleSignificand;
214 int64_t nDoubleUnBiasedExponent;
215 double dResult;
216
217 /* Pull out the three parts of the half-precision float. Do all
218 * the work in 64 bits because that is what the end result is. It
219 * may give smaller code size and will keep static analyzers
220 * happier.
221 */
222 const uint64_t uHalfSignificand = uHalfPrecision & HALF_SIGNIFICAND_MASK;
223 const uint64_t uHalfBiasedExponent = (uHalfPrecision & HALF_EXPONENT_MASK) >> HALF_EXPONENT_SHIFT;
224 const int64_t nHalfUnBiasedExponent = (int64_t)uHalfBiasedExponent - HALF_EXPONENT_BIAS;
225 const uint64_t uHalfSign = (uHalfPrecision & HALF_SIGN_MASK) >> HALF_SIGN_SHIFT;
226
227 if(nHalfUnBiasedExponent == HALF_EXPONENT_ZERO) {
228 /* 0 or subnormal */
229 if(uHalfSignificand) {
230 /* --- SUBNORMAL --- */
231 /* A half-precision subnormal can always be converted to a
232 * normal double-precision float because the ranges line up.
233 * The exponent of a subnormal starts out at the min exponent
234 * for a normal. As the sub normal significand bits are
235 * shifted, left to normalize, the exponent is
236 * decremented. Shifting continues until fully normalized.
237 */
238 nDoubleUnBiasedExponent = HALF_EXPONENT_MIN;
239 uDoubleSignificand = uHalfSignificand;
240 do {
241 uDoubleSignificand <<= 1;
242 nDoubleUnBiasedExponent--;
243 } while ((uDoubleSignificand & (1ULL << HALF_NUM_SIGNIFICAND_BITS)) == 0);
244 /* A normal has an implied 1 in the most significant
245 * position that a subnormal doesn't. */
246 uDoubleSignificand -= 1ULL << HALF_NUM_SIGNIFICAND_BITS;
247 /* Must shift into place for a double significand */
248 uDoubleSignificand <<= DOUBLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS;
249
250 dResult = IEEE754_AssembleDouble(uHalfSign,
251 uDoubleSignificand,
252 nDoubleUnBiasedExponent);
253 } else {
254 /* --- ZERO --- */
255 dResult = IEEE754_AssembleDouble(uHalfSign,
256 0,
257 DOUBLE_EXPONENT_ZERO);
258 }
259 } else if(nHalfUnBiasedExponent == HALF_EXPONENT_INF_OR_NAN) {
260 /* NaN or Inifinity */
261 if(uHalfSignificand) {
262 /* --- NaN --- */
263 /* Half-precision payloads always fit into double precision
264 * payloads. They are shifted left the same as a normal
265 * number significand.
266 */
267 uDoubleSignificand = uHalfSignificand << (DOUBLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
268 dResult = IEEE754_AssembleDouble(uHalfSign,
269 uDoubleSignificand,
270 DOUBLE_EXPONENT_INF_OR_NAN);
271 } else {
272 /* --- INFINITY --- */
273 dResult = IEEE754_AssembleDouble(uHalfSign,
274 0,
275 DOUBLE_EXPONENT_INF_OR_NAN);
276 }
277 } else {
278 /* --- NORMAL NUMBER --- */
279 uDoubleSignificand = uHalfSignificand << (DOUBLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
280 dResult = IEEE754_AssembleDouble(uHalfSign,
281 uDoubleSignificand,
282 nHalfUnBiasedExponent);
283 }
284
285 return dResult;
286}
287
288
289/**
290 * @brief Assemble sign, significand and exponent into single precision float.
291 *
292 * @param[in] uHalfSign 0 if positive, 1 if negative
293 * @pararm[in] uHalfSignificand Bits of the significand
294 * @param[in] nHalfUnBiasedExponent Exponent
295 *
296 * This returns the bits for a single-precision float, a binary32 as
297 * specified in IEEE754. It is returned as a uint64_t rather than a
298 * uint32_t or a float for convenience of usage.
299 */
300static uint32_t
301IEEE754_AssembleHalf(uint32_t uHalfSign,
302 uint32_t uHalfSignificand,
303 int32_t nHalfUnBiasedExponent)
304{
305 uint32_t uHalfUnbiasedExponent;
306
307 uHalfUnbiasedExponent = (uint32_t)(nHalfUnBiasedExponent + HALF_EXPONENT_BIAS);
308
309 return uHalfSignificand |
310 (uHalfUnbiasedExponent << HALF_EXPONENT_SHIFT) |
311 (uHalfSign << HALF_SIGN_SHIFT);
312}
313
314
315/* Public function; see ieee754.h */
316IEEE754_union
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800317IEEE754_SingleToHalf(const float f, const int bNoNaNPayload)
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700318{
319 IEEE754_union result;
320 uint32_t uDroppedBits;
321 int32_t nExponentDifference;
322 int32_t nShiftAmount;
323 uint32_t uHalfSignificand;
324
325 /* Pull the three parts out of the double-precision float Most work
326 * is done with uint32_t which helps avoid integer promotions and
327 * static analyzer complaints.
328 */
329 const uint32_t uSingle = CopyFloatToUint32(f);
330 const uint32_t uSingleBiasedExponent = (uSingle & SINGLE_EXPONENT_MASK) >> SINGLE_EXPONENT_SHIFT;
331 const int32_t nSingleUnbiasedExponent = (int32_t)uSingleBiasedExponent - SINGLE_EXPONENT_BIAS;
332 const uint32_t uSingleSignificand = uSingle & SINGLE_SIGNIFICAND_MASK;
333 const uint32_t uSingleSign = (uSingle & SINGLE_SIGN_MASK) >> SINGLE_SIGN_SHIFT;
334
335 if(nSingleUnbiasedExponent == SINGLE_EXPONENT_ZERO) {
336 if(uSingleSignificand == 0) {
337 /* --- IS ZERO --- */
338 result.uSize = IEEE754_UNION_IS_HALF;
339 result.uValue = IEEE754_AssembleHalf(uSingleSign,
340 0,
341 HALF_EXPONENT_ZERO);
342 } else {
343 /* --- IS SINGLE SUBNORMAL --- */
344 /* The largest single subnormal is slightly less than the
345 * largest single normal which is 2^-149 or
346 * 2.2040517676619426e-38. The smallest half subnormal is
347 * 2^-14 or 5.9604644775390625E-8. There is no overlap so
348 * single subnormals can't be converted to halfs of any sort.
349 */
350 result.uSize = IEEE754_UNION_IS_SINGLE;
351 result.uValue = uSingle;
352 }
353 } else if(nSingleUnbiasedExponent == SINGLE_EXPONENT_INF_OR_NAN) {
354 if(uSingleSignificand == 0) {
355 /* ---- IS INFINITY ---- */
356 result.uSize = IEEE754_UNION_IS_HALF;
357 result.uValue = IEEE754_AssembleHalf(uSingleSign, 0, HALF_EXPONENT_INF_OR_NAN);
358 } else {
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800359 if(bNoNaNPayload) {
360 /* --- REQUIRE CANNONICAL NAN --- */
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700361 result.uSize = IEEE754_UNION_IS_HALF;
362 result.uValue = IEEE754_AssembleHalf(uSingleSign,
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800363 HALF_QUIET_NAN_BIT,
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700364 HALF_EXPONENT_INF_OR_NAN);
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700365 } else {
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800366 /* The NaN can only be converted if no payload bits are lost
367 * per RFC 8949 section 4.1 that defines Preferred
368 * Serializaton. Note that Deterministically Encode CBOR in
369 * section 4.2 allows for some variation of this rule, but at
370 * the moment this implementation is of Preferred
371 * Serialization, not CDE. As of December 2023, we are also
372 * expecting an update to CDE. This code may need to be
373 * updated for CDE.
374 */
375 uDroppedBits = uSingleSignificand & (SINGLE_SIGNIFICAND_MASK >> HALF_NUM_SIGNIFICAND_BITS);
376 if(uDroppedBits == 0) {
377 /* --- IS CONVERTABLE NAN --- */
378 uHalfSignificand = uSingleSignificand >> (SINGLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
379 result.uSize = IEEE754_UNION_IS_HALF;
380 result.uValue = IEEE754_AssembleHalf(uSingleSign,
381 uHalfSignificand,
382 HALF_EXPONENT_INF_OR_NAN);
383
384 } else {
385 /* --- IS UNCONVERTABLE NAN --- */
386 result.uSize = IEEE754_UNION_IS_SINGLE;
387 result.uValue = uSingle;
388 }
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700389 }
390 }
391 } else {
392 /* ---- REGULAR NUMBER ---- */
393 /* A regular single can be converted to a regular half if the
394 * single's exponent is in the smaller range of a half and if no
395 * precision is lost in the significand.
396 */
397 if(nSingleUnbiasedExponent >= HALF_EXPONENT_MIN &&
398 nSingleUnbiasedExponent <= HALF_EXPONENT_MAX &&
399 (uSingleSignificand & (SINGLE_SIGNIFICAND_MASK >> HALF_NUM_SIGNIFICAND_BITS)) == 0) {
400 uHalfSignificand = uSingleSignificand >> (SINGLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
401
402 /* --- CONVERT TO HALF NORMAL --- */
403 result.uSize = IEEE754_UNION_IS_HALF;
404 result.uValue = IEEE754_AssembleHalf(uSingleSign,
405 uHalfSignificand,
406 nSingleUnbiasedExponent);
407 } else {
408 /* Unable to convert to a half normal. See if it can be
409 * converted to a half subnormal. To do that, the exponent
410 * must be in range and no precision can be lost in the
411 * signficand.
412 *
413 * This is more complicated because the number is not
414 * normalized. The signficand must be shifted proprotionally
415 * to the exponent and 1 must be added in. See
416 * https://en.wikipedia.org/wiki/Single-precision_floating-point_format#Exponent_encoding
417 *
418 * Exponents -14 to -24 map to a shift of 0 to 10 of the
419 * significand. The largest value of a half subnormal has an
420 * exponent of -14. Subnormals are not normalized like
421 * normals meaning they lose precision as the numbers get
422 * smaller. Normals don't lose precision because the exponent
423 * allows all the bits of the significand to be significant.
424 */
425 /* The exponent of the largest possible half-precision
426 * subnormal is HALF_EXPONENT_MIN (-14). Exponents larger
427 * than this are normal and handled above. We're going to
428 * shift the significand right by at least this amount.
429 */
430 nExponentDifference = -(nSingleUnbiasedExponent - HALF_EXPONENT_MIN);
431
432 /* In addition to the shift based on the exponent's value,
433 * the single significand has to be shifted right to fit into
434 * a half-precision significand */
435 nShiftAmount = nExponentDifference + (SINGLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
436
437 /* Must add 1 in to the possible significand because there is
438 * an implied 1 for normal values and not for subnormal
439 * values. See equations here:
440 * https://en.wikipedia.org/wiki/Single-precision_floating-point_format#Exponent_encoding
441 */
442 uHalfSignificand = (uSingleSignificand + (1 << SINGLE_NUM_SIGNIFICAND_BITS)) >> nShiftAmount;
443
444 /* If only zero bits get shifted out, this can be converted
445 * to subnormal */
446 if(nSingleUnbiasedExponent < HALF_EXPONENT_MIN &&
447 nSingleUnbiasedExponent >= HALF_EXPONENT_MIN - HALF_NUM_SIGNIFICAND_BITS &&
448 uHalfSignificand << nShiftAmount == uSingleSignificand + (1 << SINGLE_NUM_SIGNIFICAND_BITS)) {
449 /* --- CONVERTABLE TO HALF SUBNORMAL --- */
450 result.uSize = IEEE754_UNION_IS_HALF;
451 result.uValue = IEEE754_AssembleHalf(uSingleSign,
452 uHalfSignificand,
453 HALF_EXPONENT_ZERO);
454 } else {
455 /* --- DO NOT CONVERT --- */
456 result.uSize = IEEE754_UNION_IS_SINGLE;
457 result.uValue = uSingle;
458 }
459 }
460 }
461
462 return result;
463}
464
465
466/**
467 * @brief Assemble sign, significand and exponent into single precision float.
468 *
469 * @param[in] uSingleSign 0 if positive, 1 if negative
470 * @pararm[in] uSingleSignificand Bits of the significand
471 * @param[in] nSingleUnBiasedExponent Exponent
472 *
473 * This returns the bits for a single-precision float, a binary32 as
474 * specified in IEEE754. It is returned as a uint64_t rather than a
475 * uint32_t or a float for convenience of usage.
476 */
477static uint64_t
478IEEE754_AssembleSingle(uint64_t uSingleSign,
479 uint64_t uSingleSignificand,
480 int64_t nSingleUnBiasedExponent)
481{
482 uint64_t uSingleBiasedExponent;
483
484 uSingleBiasedExponent = (uint64_t)(nSingleUnBiasedExponent + SINGLE_EXPONENT_BIAS);
485
486 return uSingleSignificand |
487 (uSingleBiasedExponent << SINGLE_EXPONENT_SHIFT) |
488 (uSingleSign << SINGLE_SIGN_SHIFT);
489}
490
491
492/**
493 * @brief Convert a double-precision float to single-precision.
494 *
495 * @param[in] d The value to convert.
496 *
497 * @returns Either unconverted value or value converted to single-precision.
498 *
499 * This always succeeds. If the value cannot be converted without the
500 * loss of precision, it is not converted.
501 *
502 * This handles all subnormals and NaN payloads.
503 */
504static IEEE754_union
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800505IEEE754_DoubleToSingle(const double d)
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700506{
507 IEEE754_union Result;
508 int64_t nExponentDifference;
509 int64_t nShiftAmount;
510 uint64_t uSingleSignificand;
511 uint64_t uDroppedBits;
512
513
514 /* Pull the three parts out of the double-precision float. Most
515 * work is done with uint64_t which helps avoid integer promotions
516 * and static analyzer complaints.
517 */
518 const uint64_t uDouble = CopyDoubleToUint64(d);
519 const uint64_t uDoubleBiasedExponent = (uDouble & DOUBLE_EXPONENT_MASK) >> DOUBLE_EXPONENT_SHIFT;
520 const int64_t nDoubleUnbiasedExponent = (int64_t)uDoubleBiasedExponent - DOUBLE_EXPONENT_BIAS;
521 const uint64_t uDoubleSign = (uDouble & DOUBLE_SIGN_MASK) >> DOUBLE_SIGN_SHIFT;
522 const uint64_t uDoubleSignificand = uDouble & DOUBLE_SIGNIFICAND_MASK;
523
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700524 if(nDoubleUnbiasedExponent == DOUBLE_EXPONENT_ZERO) {
525 if(uDoubleSignificand == 0) {
526 /* --- IS ZERO --- */
527 Result.uSize = IEEE754_UNION_IS_SINGLE;
528 Result.uValue = IEEE754_AssembleSingle(uDoubleSign,
529 0,
530 SINGLE_EXPONENT_ZERO);
Laurence Lundblade67bd5512018-11-02 21:44:06 +0700531 } else {
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700532 /* --- IS DOUBLE SUBNORMAL --- */
533 /* The largest double subnormal is slightly less than the
534 * largest double normal which is 2^-1022 or
535 * 2.2250738585072014e-308. The smallest single subnormal
536 * is 2^-149 or 1.401298464324817e-45. There is no
537 * overlap so double subnormals can't be converted to
538 * singles of any sort.
539 */
540 Result.uSize = IEEE754_UNION_IS_DOUBLE;
541 Result.uValue = uDouble;
542 }
543 } else if(nDoubleUnbiasedExponent == DOUBLE_EXPONENT_INF_OR_NAN) {
544 if(uDoubleSignificand == 0) {
545 /* ---- IS INFINITY ---- */
546 Result.uSize = IEEE754_UNION_IS_SINGLE;
547 Result.uValue = IEEE754_AssembleSingle(uDoubleSign,
548 0,
549 SINGLE_EXPONENT_INF_OR_NAN);
550 } else {
551 /* The NaN can only be converted if no payload bits are
552 * lost per RFC 8949 section 4.1 that defines Preferred
553 * Serializaton. Note that Deterministically Encode CBOR
554 * in section 4.2 allows for some variation of this rule,
555 * but at the moment this implementation is of Preferred
556 * Serialization, not CDE. As of December 2023, we are
557 * also expecting an update to CDE. This code may need to
558 * be updated for CDE.
559 */
560 uDroppedBits = uDoubleSignificand & (DOUBLE_SIGNIFICAND_MASK >> SINGLE_NUM_SIGNIFICAND_BITS);
561 if(uDroppedBits == 0) {
562 /* --- IS CONVERTABLE NAN --- */
563 uSingleSignificand = uDoubleSignificand >> (DOUBLE_NUM_SIGNIFICAND_BITS - SINGLE_NUM_SIGNIFICAND_BITS);
564 Result.uSize = IEEE754_UNION_IS_SINGLE;
565 Result.uValue = IEEE754_AssembleSingle(uDoubleSign,
566 uSingleSignificand,
567 SINGLE_EXPONENT_INF_OR_NAN);
568 } else {
569 /* --- IS UNCONVERTABLE NAN --- */
570 Result.uSize = IEEE754_UNION_IS_DOUBLE;
571 Result.uValue = uDouble;
Laurence Lundblade67bd5512018-11-02 21:44:06 +0700572 }
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700573 }
574 } else {
575 /* ---- REGULAR NUMBER ---- */
576 /* A regular double can be converted to a regular single if
577 * the double's exponent is in the smaller range of a single
578 * and if no precision is lost in the significand.
579 */
580 uDroppedBits = uDoubleSignificand & (DOUBLE_SIGNIFICAND_MASK >> SINGLE_NUM_SIGNIFICAND_BITS);
581 if(nDoubleUnbiasedExponent >= SINGLE_EXPONENT_MIN &&
582 nDoubleUnbiasedExponent <= SINGLE_EXPONENT_MAX &&
583 uDroppedBits == 0) {
584 /* --- IS CONVERTABLE TO SINGLE --- */
585 uSingleSignificand = uDoubleSignificand >> (DOUBLE_NUM_SIGNIFICAND_BITS - SINGLE_NUM_SIGNIFICAND_BITS);
586 Result.uSize = IEEE754_UNION_IS_SINGLE;
587 Result.uValue = IEEE754_AssembleSingle(uDoubleSign,
588 uSingleSignificand,
589 nDoubleUnbiasedExponent);
Laurence Lundblade67bd5512018-11-02 21:44:06 +0700590 } else {
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700591 /* Unable to convert to a single normal. See if it can be
592 * converted to a single subnormal. To do that, the
593 * exponent must be in range and no precision can be lost
594 * in the signficand.
595 *
596 * This is more complicated because the number is not
597 * normalized. The signficand must be shifted
598 * proprotionally to the exponent and 1 must be added
599 * in. See
600 * https://en.wikipedia.org/wiki/Single-precision_floating-point_format#Exponent_encoding
601 */
602 nExponentDifference = -(nDoubleUnbiasedExponent - SINGLE_EXPONENT_MIN);
603 nShiftAmount = nExponentDifference + (DOUBLE_NUM_SIGNIFICAND_BITS - SINGLE_NUM_SIGNIFICAND_BITS);
604 uSingleSignificand = (uDoubleSignificand + (1ULL << DOUBLE_NUM_SIGNIFICAND_BITS)) >> nShiftAmount;
605
606 if(nDoubleUnbiasedExponent < SINGLE_EXPONENT_MIN &&
607 nDoubleUnbiasedExponent >= SINGLE_EXPONENT_MIN - SINGLE_NUM_SIGNIFICAND_BITS &&
608 uSingleSignificand << nShiftAmount == uDoubleSignificand + (1ULL << DOUBLE_NUM_SIGNIFICAND_BITS)) {
609 /* --- IS CONVERTABLE TO SINGLE SUBNORMAL --- */
610 Result.uSize = IEEE754_UNION_IS_SINGLE;
611 Result.uValue = IEEE754_AssembleSingle(uDoubleSign,
612 uSingleSignificand,
613 SINGLE_EXPONENT_ZERO);
614 } else {
615 /* --- CAN NOT BE CONVERTED --- */
616 Result.uSize = IEEE754_UNION_IS_DOUBLE;
617 Result.uValue = uDouble;
618 }
Laurence Lundblade67bd5512018-11-02 21:44:06 +0700619 }
Laurence Lundblade67bd5512018-11-02 21:44:06 +0700620 }
Laurence Lundblade3aee3a32018-12-17 16:17:45 -0800621
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700622 return Result;
Laurence Lundblade67bd5512018-11-02 21:44:06 +0700623}
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700624
625
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700626/* Public function; see ieee754.h */
627IEEE754_union
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800628IEEE754_DoubleToSmaller(const double d,
629 const int bAllowHalfPrecision,
630 const int bNoNanPayload)
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700631{
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700632 IEEE754_union result;
Laurence Lundblade3aee3a32018-12-17 16:17:45 -0800633
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700634 result = IEEE754_DoubleToSingle(d);
Laurence Lundblade3aee3a32018-12-17 16:17:45 -0800635
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700636 if(result.uSize == IEEE754_UNION_IS_SINGLE && bAllowHalfPrecision) {
637 /* Cast to uint32_t is OK, because value was just successfully
638 * converted to single. */
639 float uSingle = CopyUint32ToSingle((uint32_t)result.uValue);
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800640 result = IEEE754_SingleToHalf(uSingle, bNoNanPayload);
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700641 }
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700642
Laurence Lundblade83dbf5c2024-01-07 19:17:52 -0700643 return result;
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700644}
645
Laurence Lundblade3aee3a32018-12-17 16:17:45 -0800646
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800647static int
648IEEE754_Private_CountNonZeroBits(int nMax, uint64_t uTarget)
649{
650 int nNonZeroBitsCount;
651 uint64_t uMask;
Laurence Lundblade3aee3a32018-12-17 16:17:45 -0800652
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800653 for(nNonZeroBitsCount = nMax; nNonZeroBitsCount > 0; nNonZeroBitsCount--) {
654 uMask = (0x01UL << nMax) >> nNonZeroBitsCount;
655 if(uMask & uTarget) {
656 break;
657 }
658 }
659 return nNonZeroBitsCount;
660}
661
662
663/* Public function; see ieee754.h */
664struct IEEE754_ToInt
665IEEE754_DoubleToInt(const double d)
666{
667 int64_t nNonZeroBitsCount;
668 struct IEEE754_ToInt Result;
669 uint64_t uInteger;
670
671 /* Pull the three parts out of the double-precision float. Most
672 * work is done with uint64_t which helps avoid integer promotions
673 * and static analyzer complaints.
674 */
675 const uint64_t uDouble = CopyDoubleToUint64(d);
676 const uint64_t uDoubleBiasedExponent = (uDouble & DOUBLE_EXPONENT_MASK) >> DOUBLE_EXPONENT_SHIFT;
677 /* Cast safe because of mask above; exponents < DOUBLE_EXPONENT_MAX */
678 const int64_t nDoubleUnbiasedExponent = (int64_t)uDoubleBiasedExponent - DOUBLE_EXPONENT_BIAS;
679 const uint64_t uDoubleSignificand = uDouble & DOUBLE_SIGNIFICAND_MASK;
680
681 if(nDoubleUnbiasedExponent == DOUBLE_EXPONENT_ZERO) {
682 if(uDoubleSignificand == 0) {
683 /* --- POSITIVE AND NEGATIVE ZERO --- */
684 Result.integer.un_signed = 0;
685 Result.type = IEEE754_ToInt_IS_UINT;
686 } else {
687 /* --- SUBNORMAL --- */
688 Result.type = IEEE754_ToInt_NO_CONVERSION;
689 }
690 } else if(nDoubleUnbiasedExponent == DOUBLE_EXPONENT_INF_OR_NAN) {
691 if(uDoubleSignificand != 0) {
692 /* --- NAN --- */
693 Result.type = IEEE754_ToInt_NaN; /* dCBOR doesn't care about payload */
694 } else {
695 /* --- INIFINITY --- */
696 Result.type = IEEE754_ToInt_NO_CONVERSION;
697 }
698 } else if(nDoubleUnbiasedExponent < 0 ||
699 (nDoubleUnbiasedExponent >= ((uDouble & DOUBLE_SIGN_MASK) ? 63 : 64))) {
700 /* --- Exponent out of range --- */
701 Result.type = IEEE754_ToInt_NO_CONVERSION;
702 } else {
703 /* Count down from 52 to the number of bits that are not zero in
704 * the significand. This counts from the least significant bit
705 * until a non-zero bit is found to know if it is a whole
706 * number.
707 *
708 * Conversion only fails when the input is too large or is not a
709 * whole number, never because of lack of precision because
710 * 64-bit integers always have more precision than the 52-bits
711 * of a double.
712 */
713 nNonZeroBitsCount = IEEE754_Private_CountNonZeroBits(DOUBLE_NUM_SIGNIFICAND_BITS, uDoubleSignificand);
714
715 if(nNonZeroBitsCount && nNonZeroBitsCount > nDoubleUnbiasedExponent) {
716 /* --- Not a whole number --- */
717 Result.type = IEEE754_ToInt_NO_CONVERSION;
718 } else {
719 /* --- CONVERTABLE WHOLE NUMBER --- */
720 /* Add in the one that is implied in normal floats */
721 uInteger = uDoubleSignificand + (1ULL << DOUBLE_NUM_SIGNIFICAND_BITS);
722 /* Factor in the exponent */
723 if(nDoubleUnbiasedExponent < DOUBLE_NUM_SIGNIFICAND_BITS) {
724 /* Numbers less than 2^52 with up to 52 significant bits */
725 uInteger >>= DOUBLE_NUM_SIGNIFICAND_BITS - nDoubleUnbiasedExponent;
726 } else {
727 /* Numbers greater than 2^52 with at most 52 significant bits */
728 uInteger <<= nDoubleUnbiasedExponent - DOUBLE_NUM_SIGNIFICAND_BITS;
729 }
730 if(uDouble & DOUBLE_SIGN_MASK) {
731 /* Cast safe because exponent range check above */
732 Result.integer.is_signed = -((int64_t)uInteger);
733 Result.type = IEEE754_ToInt_IS_INT;
734 } else {
735 Result.integer.un_signed = uInteger;
736 Result.type = IEEE754_ToInt_IS_UINT;
737 }
738 }
739 }
740
741 return Result;
742}
743
744
745/* Public function; see ieee754.h */
746struct IEEE754_ToInt
747IEEE754_SingleToInt(const float f)
748{
749 int32_t nNonZeroBitsCount;
750 struct IEEE754_ToInt Result;
751 uint64_t uInteger;
752
753 /* Pull the three parts out of the single-precision float. Most
754 * work is done with uint32_t which helps avoid integer promotions
755 * and static analyzer complaints.
756 */
757 const uint32_t uSingle = CopyFloatToUint32(f);
758 const uint32_t uSingleBiasedExponent = (uSingle & SINGLE_EXPONENT_MASK) >> SINGLE_EXPONENT_SHIFT;
759 /* Cast safe because of mask above; exponents < SINGLE_EXPONENT_MAX */
760 const int32_t nSingleUnbiasedExponent = (int32_t)uSingleBiasedExponent - SINGLE_EXPONENT_BIAS;
761 const uint32_t uSingleleSignificand = uSingle & SINGLE_SIGNIFICAND_MASK;
762
763 if(nSingleUnbiasedExponent == SINGLE_EXPONENT_ZERO) {
764 if(uSingleleSignificand == 0 && !(uSingle & SINGLE_SIGN_MASK)) {
765 /* --- POSITIVE AND NEGATIVE ZERO --- */
766 Result.integer.un_signed = 0;
767 Result.type = IEEE754_ToInt_IS_UINT;
768 } else {
769 /* --- Subnormal --- */
770 Result.type = IEEE754_ToInt_NO_CONVERSION;
771 }
772 } else if(nSingleUnbiasedExponent == SINGLE_EXPONENT_INF_OR_NAN) {
773 /* --- NAN or INFINITY --- */
774 if(uSingleleSignificand != 0) {
775 Result.type = IEEE754_ToInt_NaN; /* dCBOR doesn't care about payload */
776 } else {
777 Result.type = IEEE754_ToInt_NO_CONVERSION;
778 }
779 } else if(nSingleUnbiasedExponent < 0 ||
780 (nSingleUnbiasedExponent >= ((uSingle & SINGLE_SIGN_MASK) ? 63 : 64))) {
781 /* --- Exponent out of range --- */
782 Result.type = IEEE754_ToInt_NO_CONVERSION;
783 } else {
784 /* Count down from 23 to the number of bits that are not zero in
785 * the significand. This counts from the least significant bit
786 * until a non-zero bit is found.
787 *
788 * Conversion only fails when the input is too large or is not a
789 * whole number, never because of lack of precision because
790 * 64-bit integers always have more precision than the 52-bits
791 * of a double.
792 */
793 nNonZeroBitsCount = IEEE754_Private_CountNonZeroBits(SINGLE_NUM_SIGNIFICAND_BITS, uSingleleSignificand);
794
795 if(nNonZeroBitsCount && nNonZeroBitsCount > nSingleUnbiasedExponent) {
796 /* --- Not a whole number --- */
797 Result.type = IEEE754_ToInt_NO_CONVERSION;
798 } else {
799 /* --- CONVERTABLE WHOLE NUMBER --- */
800 /* Add in the one that is implied in normal floats */
801 uInteger = uSingleleSignificand + (1ULL << SINGLE_NUM_SIGNIFICAND_BITS);
802 /* Factor in the exponent */
803 if(nSingleUnbiasedExponent < SINGLE_NUM_SIGNIFICAND_BITS) {
804 /* Numbers less than 2^23 with up to 23 significant bits */
805 uInteger >>= SINGLE_NUM_SIGNIFICAND_BITS - nSingleUnbiasedExponent;
806 } else {
807 /* Numbers greater than 2^23 with at most 23 significant bits*/
808 uInteger <<= nSingleUnbiasedExponent - SINGLE_NUM_SIGNIFICAND_BITS;
809 }
810 if(uSingle & SINGLE_SIGN_MASK) {
811 Result.integer.is_signed = -((int64_t)uInteger);
812 Result.type = IEEE754_ToInt_IS_INT;
813 } else {
814 Result.integer.un_signed = uInteger;
815 Result.type = IEEE754_ToInt_IS_UINT;
816 }
817 }
818 }
819
820 return Result;
821}
Laurence Lundbladefe09bbf2020-07-16 12:14:51 -0700822
Laurence Lundbladeb275cdc2020-07-12 12:34:38 -0700823#endif /* QCBOR_DISABLE_PREFERRED_FLOAT */
Laurence Lundbladeeb3cdef2024-02-17 20:38:55 -0800824
825
826
827/* Public function; see ieee754.h */
828int
829IEEE754_IsNotStandardDoubleNaN(const double d)
830{
831 const uint64_t uDouble = CopyDoubleToUint64(d);
832 const uint64_t uDoubleBiasedExponent = (uDouble & DOUBLE_EXPONENT_MASK) >> DOUBLE_EXPONENT_SHIFT;
833 /* Cast safe because of mask above; exponents < DOUBLE_EXPONENT_MAX */
834 const int64_t nDoubleUnbiasedExponent = (int64_t)uDoubleBiasedExponent - DOUBLE_EXPONENT_BIAS;
835 const uint64_t uDoubleSignificand = uDouble & DOUBLE_SIGNIFICAND_MASK;
836
837 if(nDoubleUnbiasedExponent == DOUBLE_EXPONENT_INF_OR_NAN &&
838 uDoubleSignificand != 0 &&
839 uDoubleSignificand != DOUBLE_QUIET_NAN_BIT) {
840 return 1;
841 } else {
842 return 0;
843 }
844}
845
846
847/* Public function; see ieee754.h */
848int
849IEEE754_IsNotStandardSingleNaN(const float f)
850{
851 const uint32_t uSingle = CopyFloatToUint32(f);
852 const uint32_t uSingleBiasedExponent = (uSingle & SINGLE_EXPONENT_MASK) >> SINGLE_EXPONENT_SHIFT;
853 /* Cast safe because of mask above; exponents < SINGLE_EXPONENT_MAX */
854 const int32_t nSingleUnbiasedExponent = (int32_t)uSingleBiasedExponent - SINGLE_EXPONENT_BIAS;
855 const uint32_t uSingleleSignificand = uSingle & SINGLE_SIGNIFICAND_MASK;
856
857 if(nSingleUnbiasedExponent == SINGLE_EXPONENT_INF_OR_NAN &&
858 uSingleleSignificand != 0 &&
859 uSingleleSignificand != SINGLE_QUIET_NAN_BIT) {
860 return 1;
861 } else {
862 return 0;
863 }
864}