blob: a5a65a323a8581723188e3d20c7a3a5abb4cb9f9 [file] [log] [blame]
Laurence Lundblade12d32c52018-09-19 11:25:27 -07001//
2// ieee754.c
3// Indefinite
4//
5// Created by Laurence Lundblade on 7/23/18.
6// Copyright © 2018 Laurence Lundblade. All rights reserved.
7//
8
9#include "ieee754.h"
10#include <string.h> // For memcpy()
11
12/*
13
14 https://stackoverflow.com/questions/19800415/why-does-ieee-754-reserve-so-many-nan-values
15
16 These values come from IEEE 754-2008 section 3.6
17
18 This code is written for clarity and verifiability, not for size, on the assumption
19 that the optimizer will do a good job. The LLVM optimizer, -Os, does seem to do the
20 job and the resulting object code is smaller from combing code for the many different
21 cases (normal, subnormal, infinity, zero...) for the conversions.
22
23 Dead stripping is also really helpful to get code size down.
24
25 */
26
27
28// ----- Half Precsion -----------
29#define HALF_NUM_SIGNIFICAND_BITS (10)
30#define HALF_NUM_EXPONENT_BITS (5)
31#define HALF_NUM_SIGN_BITS (1)
32
33#define HALF_SIGNIFICAND_SHIFT (0)
34#define HALF_EXPONENT_SHIFT (HALF_NUM_SIGNIFICAND_BITS)
35#define HALF_SIGN_SHIFT (HALF_NUM_SIGNIFICAND_BITS + HALF_NUM_EXPONENT_BITS)
36
37#define HALF_SIGNIFICAND_MASK (0x3ff) // The lower 10 bits // 0x03ff
38#define HALF_EXPONENT_MASK (0x1f << HALF_EXPONENT_SHIFT) // 0x7c00 5 bits of exponent
39#define HALF_SIGN_MASK (0x01 << HALF_SIGN_SHIFT) // // 0x80001 bit of sign
40#define HALF_QUIET_NAN_BIT (0x01 << (HALF_NUM_SIGNIFICAND_BITS-1)) // 0x0200
41
42/* Biased Biased Unbiased Use
43 0x00 0 -15 0 and subnormal
44 0x01 1 -14 Smallest normal exponent
45 0x1e 30 15 Largest normal exponent
46 0x1F 31 16 NaN and Infinity */
47#define HALF_EXPONENT_BIAS (15)
48#define HALF_EXPONENT_MAX (HALF_EXPONENT_BIAS) // 15 Unbiased
49#define HALF_EXPONENT_MIN (-HALF_EXPONENT_BIAS+1) // -14 Unbiased
50#define HALF_EXPONENT_ZERO (-HALF_EXPONENT_BIAS) // -15 Unbiased
51#define HALF_EXPONENT_INF_OR_NAN (HALF_EXPONENT_BIAS+1) // 16 Unbiased
52
53
54// ------ Single Precision --------
55#define SINGLE_NUM_SIGNIFICAND_BITS (23)
56#define SINGLE_NUM_EXPONENT_BITS (8)
57#define SINGLE_NUM_SIGN_BITS (1)
58
59#define SINGLE_SIGNIFICAND_SHIFT (0)
60#define SINGLE_EXPONENT_SHIFT (SINGLE_NUM_SIGNIFICAND_BITS)
61#define SINGLE_SIGN_SHIFT (SINGLE_NUM_SIGNIFICAND_BITS + SINGLE_NUM_EXPONENT_BITS)
62
63#define SINGLE_SIGNIFICAND_MASK (0x7fffff) // The lower 23 bits
64#define SINGLE_EXPONENT_MASK (0xff << SINGLE_EXPONENT_SHIFT) // 8 bits of exponent
65#define SINGLE_SIGN_MASK (0x01 << SINGLE_SIGN_SHIFT) // 1 bit of sign
66
67/* Biased Biased Unbiased Use
68 0x0000 0 -127 0 and subnormal
69 0x0001 1 -126 Smallest normal exponent
70 0x7f 127 0 1
71 0xfe 254 127 Largest normal exponent
72 0xff 255 128 NaN and Infinity */
73#define SINGLE_EXPONENT_BIAS (127)
74#define SINGLE_EXPONENT_MAX (SINGLE_EXPONENT_BIAS) // 127 unbiased
75#define SINGLE_EXPONENT_MIN (-SINGLE_EXPONENT_BIAS+1) // -126 unbiased
76#define SINGLE_EXPONENT_ZERO (-SINGLE_EXPONENT_BIAS) // -127 unbiased
77#define SINGLE_EXPONENT_INF_OR_NAN (SINGLE_EXPONENT_BIAS+1) // 128 unbiased
78
79
80// --------- Double Precision ----------
81#define DOUBLE_NUM_SIGNIFICAND_BITS (52)
82#define DOUBLE_NUM_EXPONENT_BITS (11)
83#define DOUBLE_NUM_SIGN_BITS (1)
84
85#define DOUBLE_SIGNIFICAND_SHIFT (0)
86#define DOUBLE_EXPONENT_SHIFT (DOUBLE_NUM_SIGNIFICAND_BITS)
87#define DOUBLE_SIGN_SHIFT (DOUBLE_NUM_SIGNIFICAND_BITS + DOUBLE_NUM_EXPONENT_BITS)
88
89#define DOUBLE_SIGNIFICAND_MASK (0xfffffffffffffLL) // The lower 52 bits
90#define DOUBLE_EXPONENT_MASK (0x7ffLL << DOUBLE_EXPONENT_SHIFT) // 11 bits of exponent
91#define DOUBLE_SIGN_MASK (0x01LL << DOUBLE_SIGN_SHIFT) // 1 bit of sign
92
93/* Biased Biased Unbiased Use
94 0x00000000 0 -1023 0 and subnormal
95 0x00000001 1 -1022 Smallest normal exponent
96 0x000007fe 2046 1023 Largest normal exponent
97 0x000007ff 2047 1024 NaN and Infinity */
98#define DOUBLE_EXPONENT_BIAS (1023)
99#define DOUBLE_EXPONENT_MAX (DOUBLE_EXPONENT_BIAS) // unbiased
100#define DOUBLE_EXPONENT_MIN (-DOUBLE_EXPONENT_BIAS+1) // unbiased
101#define DOUBLE_EXPONENT_ZERO (-DOUBLE_EXPONENT_BIAS) // unbiased
102#define DOUBLE_EXPONENT_INF_OR_NAN (DOUBLE_EXPONENT_BIAS+1) // unbiased
103
104
105
106/*
107 Convenient functions to avoid type punning, compiler warnings and such
108 The optimizer reduces them to a simple assignment
109 This is a crusty corner of C. It shouldn't be this hard.
110 */
111static inline uint32_t CopyFloatToUint32(float f)
112{
113 uint32_t u32;
114 memcpy(&u32, &f, sizeof(uint32_t));
115 return u32;
116}
117
118static inline uint64_t CopyDoubleToUint64(double d)
119{
120 uint64_t u64;
121 memcpy(&u64, &d, sizeof(uint64_t));
122 return u64;
123}
124
125static inline double CopyUint64ToDouble(uint64_t u64)
126{
127 double d;
128 memcpy(&d, &u64, sizeof(uint64_t));
129 return d;
130}
131
132static inline float CopyUint32ToFloat(uint32_t u32)
133{
134 float f;
135 memcpy(&f, &u32, sizeof(uint32_t));
136 return f;
137}
138
139
140
141// Public function; see ieee754.h
142int16_t IEEE754_FloatToHalf(float f)
143{
144 // Pull the three parts out of the single-precision float
145 const uint32_t uSingle = CopyFloatToUint32(f);
146 const int32_t nSingleUnbiasedExponent = ((uSingle & SINGLE_EXPONENT_MASK) >> SINGLE_EXPONENT_SHIFT) - SINGLE_EXPONENT_BIAS;
147 const uint32_t uSingleSign = (uSingle & SINGLE_SIGN_MASK) >> SINGLE_SIGN_SHIFT;
148 const uint32_t uSingleSignificand = uSingle & SINGLE_SIGNIFICAND_MASK;
149
150
151 // Now convert the three parts to half-precision.
152 uint16_t uHalfSign, uHalfSignificand, uHalfBiasedExponent;
153 if(nSingleUnbiasedExponent == SINGLE_EXPONENT_INF_OR_NAN) {
154 // +/- Infinity and NaNs -- single biased exponent is 0xff
155 uHalfBiasedExponent = HALF_EXPONENT_INF_OR_NAN + HALF_EXPONENT_BIAS;
156 if(!uSingleSignificand) {
157 // Infinity
158 uHalfSignificand = 0;
159 } else {
160 // NaN; significand has to be non-zero
161 if(!(uSingleSignificand & HALF_SIGNIFICAND_MASK)) {
162 // NaN payload bits that can't be carried; convert to a quite NaN
163 // since this has to be non-zero to still be a NaN
164 uHalfSignificand = HALF_QUIET_NAN_BIT; // standard qNaN;
165 } else {
166 // The LSBs are preserved, but not the MSBs
167 // This preservation allows some limited form of NaN payloads / boxing
168 // Would be good to find out what other implementations do for
169 // this kind of conversion of NaN
170 uHalfSignificand = uSingleSignificand & HALF_SIGNIFICAND_MASK;
171 }
172 }
173 } else if(nSingleUnbiasedExponent == SINGLE_EXPONENT_ZERO) {
174 // 0 or a subnormal number -- singled biased exponent is 0
175 uHalfBiasedExponent = 0;
176 uHalfSignificand = 0; // Any subnormal single will be too small to express as a half precision
177 } else if(nSingleUnbiasedExponent > HALF_EXPONENT_MAX) {
178 // Exponent is too large to express in half-precision; round up to infinity
179 uHalfBiasedExponent = HALF_EXPONENT_INF_OR_NAN + HALF_EXPONENT_BIAS;
180 uHalfSignificand = 0;
181 } else if(nSingleUnbiasedExponent < HALF_EXPONENT_MIN) {
182 // Exponent is too small to express in half-precision normal; make it a half-precision subnormal
183 uHalfBiasedExponent = (uint16_t)(HALF_EXPONENT_ZERO + HALF_EXPONENT_BIAS);
184 // Difference between single normal exponent and the base exponent of a half subnormal
185 const uint32_t nExpDiff = -(nSingleUnbiasedExponent - HALF_EXPONENT_MIN);
186 // Also have to shift the significand by the difference in number of bits between a single and a half significand
187 const int32_t nSignificandBitsDiff = SINGLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS;
188 // Add in the 1 that is implied in the significand of a normal number; it needs to be present in a subnormal
189 const uint32_t uSingleSignificandSubnormal = uSingleSignificand + (0x01L << SINGLE_NUM_SIGNIFICAND_BITS);
190 uHalfSignificand = uSingleSignificandSubnormal >> (nExpDiff + nSignificandBitsDiff);
191 } else {
192 // The normal case
193 uHalfBiasedExponent = nSingleUnbiasedExponent + HALF_EXPONENT_BIAS;
194 uHalfSignificand = uSingleSignificand >> (SINGLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
195 }
196 uHalfSign = uSingleSign;
197
198 // Put the 3 values in the right place for a half precision
199 const uint16_t uHalfPrecision = uHalfSignificand |
200 (uHalfBiasedExponent << HALF_EXPONENT_SHIFT) |
201 (uHalfSign << HALF_SIGN_SHIFT);
202 return uHalfPrecision;
203}
204
205
206// Public function; see ieee754.h
207int16_t IEEE754_DoubleToHalf(double d)
208{
209 // Pull the three parts out of the double-precision float
210 const uint64_t uDouble = CopyDoubleToUint64(d);
211 const int64_t nDoubleUnbiasedExponent = ((uDouble & DOUBLE_EXPONENT_MASK) >> DOUBLE_EXPONENT_SHIFT) - DOUBLE_EXPONENT_BIAS;
212 const uint64_t uDoubleSign = (uDouble & DOUBLE_SIGN_MASK) >> DOUBLE_SIGN_SHIFT;
213 const uint64_t uDoubleSignificand = uDouble & DOUBLE_SIGNIFICAND_MASK;
214
215
216 // Now convert the three parts to half-precision.
217 uint16_t uHalfSign, uHalfSignificand, uHalfBiasedExponent;
218 if(nDoubleUnbiasedExponent == DOUBLE_EXPONENT_INF_OR_NAN) {
219 // +/- Infinity and NaNs -- single biased exponent is 0xff
220 uHalfBiasedExponent = HALF_EXPONENT_INF_OR_NAN + HALF_EXPONENT_BIAS;
221 if(!uDoubleSignificand) {
222 // Infinity
223 uHalfSignificand = 0;
224 } else {
225 // NaN; significand has to be non-zero
226 if(!(uDoubleSignificand & HALF_SIGNIFICAND_MASK)) {
227 // NaN payload bits that can't be carried; convert to a quite NaN
228 // since this has to be non-zero to still be a NaN
229 uHalfSignificand = HALF_QUIET_NAN_BIT; // standard qNaN;
230 } else {
231 // The LSBs are preserved, but not the MSBs
232 // This preservation allows some limited form of NaN payloads / boxing
233 // Would be good to find out what other implementations do for
234 // this kind of conversion of NaN
235 uHalfSignificand = uDoubleSignificand & HALF_SIGNIFICAND_MASK;
236 }
237 }
238 } else if(nDoubleUnbiasedExponent == DOUBLE_EXPONENT_ZERO) {
239 // 0 or a subnormal number -- double biased exponent is 0
240 uHalfBiasedExponent = 0;
241 uHalfSignificand = 0; // Any subnormal single will be too small to express as a half precision; TODO, is this really true?
242 } else if(nDoubleUnbiasedExponent > HALF_EXPONENT_MAX) {
243 // Exponent is too large to express in half-precision; round up to infinity; TODO, is this really true?
244 uHalfBiasedExponent = HALF_EXPONENT_INF_OR_NAN + HALF_EXPONENT_BIAS;
245 uHalfSignificand = 0;
246 } else if(nDoubleUnbiasedExponent < HALF_EXPONENT_MIN) {
247 // Exponent is too small to express in half-precision; round down to zero
248 uHalfBiasedExponent = (uint16_t)(HALF_EXPONENT_ZERO + HALF_EXPONENT_BIAS);
249 // Difference between double normal exponent and the base exponent of a half subnormal
250 const uint64_t nExpDiff = -(nDoubleUnbiasedExponent - HALF_EXPONENT_MIN);
251 // Also have to shift the significand by the difference in number of bits between a double and a half significand
252 const int64_t nSignificandBitsDiff = DOUBLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS;
253 // Add in the 1 that is implied in the significand of a normal number; it needs to be present in a subnormal
254 const uint64_t uDoubleSignificandSubnormal = uDoubleSignificand + (0x01L << DOUBLE_NUM_SIGNIFICAND_BITS);
255 uHalfSignificand = uDoubleSignificandSubnormal >> (nExpDiff + nSignificandBitsDiff);
256 } else {
257 // The normal case
258 uHalfBiasedExponent = nDoubleUnbiasedExponent + HALF_EXPONENT_BIAS;
259 uHalfSignificand = uDoubleSignificand >> (DOUBLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
260 }
261 uHalfSign = uDoubleSign;
262
263
264 // Put the 3 values in the right place for a half precision
265 const uint16_t uHalfPrecision = uHalfSignificand |
266 (uHalfBiasedExponent << HALF_EXPONENT_SHIFT) |
267 (uHalfSign << HALF_SIGN_SHIFT);
268 return uHalfPrecision;
269}
270
271
272// Public function; see ieee754.h
273float IEEE754_HalfToFloat(uint16_t uHalfPrecision)
274{
275 // Pull out the three parts of the half-precision float
276 const uint16_t uHalfSignificand = uHalfPrecision & HALF_SIGNIFICAND_MASK;
277 const int16_t nHalfUnBiasedExponent = ((uHalfPrecision & HALF_EXPONENT_MASK) >> HALF_EXPONENT_SHIFT) - HALF_EXPONENT_BIAS;
278 const uint16_t uHalfSign = (uHalfPrecision & HALF_SIGN_MASK) >> HALF_SIGN_SHIFT;
279
280
281 // Make the three parts of the single-precision number
282 uint32_t uSingleSignificand, uSingleSign, uSingleBiasedExponent;
283 if(nHalfUnBiasedExponent == HALF_EXPONENT_ZERO) {
284 // 0 or subnormal
285 if(uHalfSignificand) {
286 // Subnormal case
287 uSingleBiasedExponent = -HALF_EXPONENT_BIAS + SINGLE_EXPONENT_BIAS +1;
288 // A half-precision subnormal can always be converted to a normal single-precision float because the ranges line up
289 uSingleSignificand = uHalfSignificand;
290 // Shift bits from right of the decimal to left, reducing the exponent by 1 each time
291 do {
292 uSingleSignificand <<= 1;
293 uSingleBiasedExponent--;
294 } while ((uSingleSignificand & 0x400) == 0);
295 uSingleSignificand &= HALF_SIGNIFICAND_MASK;
296 uSingleSignificand <<= (SINGLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
297 } else {
298 // Just zero
299 uSingleBiasedExponent = SINGLE_EXPONENT_ZERO + SINGLE_EXPONENT_BIAS;
300 uSingleSignificand = 0;
301 }
302 } else if(nHalfUnBiasedExponent == HALF_EXPONENT_INF_OR_NAN) {
303 // NaN or Inifinity
304 uSingleBiasedExponent = SINGLE_EXPONENT_INF_OR_NAN + SINGLE_EXPONENT_BIAS;
305 if(uHalfSignificand) {
306 // Preserve NaN payload for NaN boxing
307 uSingleSignificand = uHalfSignificand;
308 } else {
309 // Infinity
310 uSingleSignificand = 0;
311 }
312 } else {
313 // Normal number
314 uSingleBiasedExponent = nHalfUnBiasedExponent + SINGLE_EXPONENT_BIAS;
315 uSingleSignificand = uHalfSignificand << (SINGLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
316 }
317 uSingleSign = uHalfSign;
318
319
320 // Shift the three parts of the single precision into place
321 const uint32_t uSinglePrecision = uSingleSignificand |
322 (uSingleBiasedExponent << SINGLE_EXPONENT_SHIFT) |
323 (uSingleSign << SINGLE_SIGN_SHIFT);
324
325 return CopyUint32ToFloat(uSinglePrecision);
326}
327
328
329// Public function; see ieee754.h
330double IEEE754_HalfToDouble(uint16_t uHalfPrecision)
331{
332 // Pull out the three parts of the half-precision float
333 const uint16_t uHalfSignificand = uHalfPrecision & HALF_SIGNIFICAND_MASK;
334 const int16_t nHalfUnBiasedExponent = ((uHalfPrecision & HALF_EXPONENT_MASK) >> HALF_EXPONENT_SHIFT) - HALF_EXPONENT_BIAS;
335 const uint16_t uHalfSign = (uHalfPrecision & HALF_SIGN_MASK) >> HALF_SIGN_SHIFT;
336
337
338 // Make the three parts of hte single-precision number
339 uint64_t uDoubleSignificand, uDoubleSign, uDoubleBiasedExponent;
340 if(nHalfUnBiasedExponent == HALF_EXPONENT_ZERO) {
341 // 0 or subnormal
342 uDoubleBiasedExponent = DOUBLE_EXPONENT_ZERO + DOUBLE_EXPONENT_BIAS;
343 if(uHalfSignificand) {
344 // Subnormal case
345 uDoubleBiasedExponent = -HALF_EXPONENT_BIAS + DOUBLE_EXPONENT_BIAS +1;
346 // A half-precision subnormal can always be converted to a normal double-precision float because the ranges line up
347 uDoubleSignificand = uHalfSignificand;
348 // Shift bits from right of the decimal to left, reducing the exponent by 1 each time
349 do {
350 uDoubleSignificand <<= 1;
351 uDoubleBiasedExponent--;
352 } while ((uDoubleSignificand & 0x400) == 0);
353 uDoubleSignificand &= HALF_SIGNIFICAND_MASK;
354 uDoubleSignificand <<= (DOUBLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
355 } else {
356 // Just zero
357 uDoubleSignificand = 0;
358 }
359 } else if(nHalfUnBiasedExponent == HALF_EXPONENT_INF_OR_NAN) {
360 // NaN or Inifinity
361 uDoubleBiasedExponent = DOUBLE_EXPONENT_INF_OR_NAN + DOUBLE_EXPONENT_BIAS;
362 if(uHalfSignificand) {
363 // Preserve NaN payload for NaN boxing
364 uDoubleSignificand = uHalfSignificand;
365 } else {
366 // Infinity
367 uDoubleSignificand = 0;
368 }
369 } else {
370 // Normal number
371 uDoubleBiasedExponent = nHalfUnBiasedExponent + DOUBLE_EXPONENT_BIAS;
372 uDoubleSignificand = (uint64_t)uHalfSignificand << (DOUBLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
373 }
374 uDoubleSign = uHalfSign;
375
376
377 // Shift the 3 parts into place as a double-precision
378 const uint64_t uDouble = uDoubleSignificand |
379 (uDoubleBiasedExponent << DOUBLE_EXPONENT_SHIFT) |
380 (uDoubleSign << DOUBLE_SIGN_SHIFT);
381 return CopyUint64ToDouble(uDouble);
382}
383
384
385// Public function; see ieee754.h
386IEEE754_union IEEE754_FloatToSmallest(float f)
387{
388 IEEE754_union result;
389
390 // Pull the neeed two parts out of the single-precision float
391 const uint32_t uSingle = CopyFloatToUint32(f);
392 const int32_t nSingleExponent = ((uSingle & SINGLE_EXPONENT_MASK) >> SINGLE_EXPONENT_SHIFT) - SINGLE_EXPONENT_BIAS;
393 const uint32_t uSingleSignificand = uSingle & SINGLE_SIGNIFICAND_MASK;
394
395 // Bit mask that is the significand bits that would be lost when converting
396 // from single-precision to half-precision
397 const uint64_t uDroppedSingleBits = SINGLE_SIGNIFICAND_MASK >> HALF_NUM_SIGNIFICAND_BITS;
398
399 // Optimizer will re organize so there is only one call to IEEE754_FloatToHalf()
400 if(uSingle == 0) {
401 // Value is 0.0000, not a a subnormal
402 result.uTag = IEEE754_UNION_IS_HALF;
403 result.u16 = IEEE754_FloatToHalf(f);
404 } else if(nSingleExponent == SINGLE_EXPONENT_INF_OR_NAN) {
405 // NaN, +/- infinity
406 result.uTag = IEEE754_UNION_IS_HALF;
407 result.u16 = IEEE754_FloatToHalf(f);
408 } else if((nSingleExponent >= HALF_EXPONENT_MIN) && nSingleExponent <= HALF_EXPONENT_MAX && (!(uSingleSignificand & uDroppedSingleBits))) {
409 // Normal number in exponent range and precision won't be lost
410 result.uTag = IEEE754_UNION_IS_HALF;
411 result.u16 = IEEE754_FloatToHalf(f);
412 } else {
413 // Subnormal, exponent out of range, or precision will be lost
414 result.uTag = IEEE754_UNION_IS_SINGLE;
415 result.u32 = uSingle;
416 }
417
418 return result;
419}
420
421
422IEEE754_union IEEE754_DoubleToSmallestInternal(double d, int bAllowHalfPrecision)
423{
424 IEEE754_union result;
425
426 // Pull the needed two parts out of the double-precision float
427 const uint64_t uDouble = CopyDoubleToUint64(d);
428 const int64_t nDoubleExponent = ((uDouble & DOUBLE_EXPONENT_MASK) >> DOUBLE_EXPONENT_SHIFT) - DOUBLE_EXPONENT_BIAS;
429 const uint64_t uDoubleSignificand = uDouble & DOUBLE_SIGNIFICAND_MASK;
430
431 // Masks to check whether dropped significand bits are zero or not
432 const uint64_t uDroppedDoubleBits = DOUBLE_SIGNIFICAND_MASK >> HALF_NUM_SIGNIFICAND_BITS;
433 const uint64_t uDroppedSingleBits = DOUBLE_SIGNIFICAND_MASK >> SINGLE_NUM_SIGNIFICAND_BITS;
434
435 // The various cases
436 if(uDouble == 0) {
437 // Value is 0.0000, not a a subnormal
438 result.uTag = IEEE754_UNION_IS_HALF;
439 result.u16 = IEEE754_DoubleToHalf(d);
440 } else if(nDoubleExponent == DOUBLE_EXPONENT_INF_OR_NAN) {
441 // NaN, +/- infinity
442 result.uTag = IEEE754_UNION_IS_HALF;
443 result.u16 = IEEE754_DoubleToHalf(d);
444 } else if(bAllowHalfPrecision && (nDoubleExponent >= HALF_EXPONENT_MIN) && nDoubleExponent <= HALF_EXPONENT_MAX && (!(uDoubleSignificand & uDroppedDoubleBits))) {
445 // Can convert to half without precision loss
446 result.uTag = IEEE754_UNION_IS_HALF;
447 result.u16 = IEEE754_DoubleToHalf(d);
448 } else if((nDoubleExponent >= SINGLE_EXPONENT_MIN) && nDoubleExponent <= SINGLE_EXPONENT_MAX && (!(uDoubleSignificand & uDroppedSingleBits))) {
449 // Can convert to single without precision loss
450 result.uTag = IEEE754_UNION_IS_SINGLE;
451 result.u32 = CopyFloatToUint32((float)d);
452 } else {
453 // Can't convert without precision loss
454 result.uTag = IEEE754_UNION_IS_DOUBLE;
455 result.u64 = uDouble;
456 }
457
458 return result;
459}
460