| /*============================================================================== |
| ieee754.c -- floating-point conversion between half, double & single-precision |
| |
| Copyright (c) 2018-2020, Laurence Lundblade. All rights reserved. |
| |
| SPDX-License-Identifier: BSD-3-Clause |
| |
| See BSD-3-Clause license in README.md |
| |
| Created on 7/23/18 |
| =============================================================================*/ |
| |
| #ifndef QCBOR_DISABLE_PREFERRED_FLOAT |
| |
| #ifndef ieee754_h |
| #define ieee754_h |
| |
| #include <stdint.h> |
| |
| |
| |
| /* |
| General comments |
| |
| This is a complete in that it handles all conversion cases including |
| +/- infinity, +/- zero, subnormal numbers, qNaN, sNaN and NaN |
| payloads. |
| |
| This conforms to IEEE 754-2008, but note that this doesn't specify |
| conversions, just the encodings. |
| |
| NaN payloads are preserved with alignment on the LSB. The qNaN bit is |
| handled differently and explicity copied. It is always the MSB of the |
| significand. The NaN payload MSBs (except the qNaN bit) are truncated |
| when going from double or single to half. |
| |
| TODO: what does the C cast do with NaN payloads from |
| double to single? It probably depends entirely on the |
| CPU. |
| |
| */ |
| |
| /* |
| Most simply just explicilty encode the type you want, single or |
| double. This works easily everywhere since standard C supports both |
| these types and so does qcbor. This encoder also supports half |
| precision and there's a few ways to use it to encode floating-point |
| numbers in less space. |
| |
| Without losing precision, you can encode a single or double such that |
| the special values of 0, NaN and Infinity encode as half-precision. |
| This CBOR decodoer and most others should handle this properly. |
| |
| If you don't mind losing precision, then you can use half-precision. |
| One way to do this is to set up your environment to use |
| ___fp_16. Some compilers and CPUs support it even though it is not |
| standard C. What is nice about this is that your program will use |
| less memory and floating-point operations like multiplying, adding |
| and such will be faster. |
| |
| Another way to make use of half-precision is to represent the values |
| in your program as single or double, but encode them in CBOR as |
| half-precision. This cuts the size of the encoded messages by 2 or 4, |
| but doesn't reduce memory needs or speed because you are still using |
| single or double in your code. |
| |
| */ |
| |
| |
| |
| /* |
| Convert single-precision float to half-precision float. Precision |
| and NaN payload bits will be lost. Too-large values will round up to |
| infinity and too small to zero. |
| */ |
| uint16_t IEEE754_FloatToHalf(float f); |
| |
| |
| /* |
| Convert double-precision float to half-precision float. Precision |
| and NaN payload bits will be lost. Too-large values will round up to |
| infinity and too small to zero. |
| */ |
| uint16_t IEEE754_DoubleToHalf(double d); |
| |
| |
| /* |
| Convert half-precision float to double-precision float. |
| This is a loss-less conversion. |
| */ |
| double IEEE754_HalfToDouble(uint16_t uHalfPrecision); |
| |
| |
| /* |
| Convert float to double-precision without using any |
| floating-point HW or compiler-supplied SW. |
| This is a loss-less conversion. |
| */ |
| double IEEE754_FloatToDouble(uint32_t ufloat); |
| |
| |
| // Both tags the value and gives the size |
| #define IEEE754_UNION_IS_HALF 2 |
| #define IEEE754_UNION_IS_SINGLE 4 |
| #define IEEE754_UNION_IS_DOUBLE 8 |
| |
| typedef struct { |
| uint8_t uSize; // One of IEEE754_IS_xxxx |
| uint64_t uValue; |
| } IEEE754_union; |
| |
| |
| /* |
| Converts double-precision to single-precision or half-precision if |
| possible without loss of precisions. If not, leaves it as a |
| double. Only converts to single-precision unless bAllowHalfPrecision |
| is set. |
| */ |
| IEEE754_union IEEE754_DoubleToSmallestInternal(double d, int bAllowHalfPrecision); |
| |
| /* |
| Converts double-precision to single-precision if possible without |
| loss of precision. If not, leaves it as a double. |
| */ |
| static inline IEEE754_union IEEE754_DoubleToSmall(double d) |
| { |
| return IEEE754_DoubleToSmallestInternal(d, 0); |
| } |
| |
| |
| /* |
| Converts double-precision to single-precision or half-precision if |
| possible without loss of precisions. If not, leaves it as a double. |
| */ |
| static inline IEEE754_union IEEE754_DoubleToSmallest(double d) |
| { |
| return IEEE754_DoubleToSmallestInternal(d, 1); |
| } |
| |
| |
| /* |
| Converts single-precision to half-precision if possible without loss |
| of precision. If not leaves as single-precision. |
| */ |
| IEEE754_union IEEE754_FloatToSmallest(float f); |
| |
| |
| #endif /* ieee754_h */ |
| |
| |
| #endif /* QCBOR_DISABLE_PREFERRED_FLOAT */ |
| |
| |
| |
| |