blob: 705ef62e731162f8e915761f706075f7eebf2b99 [file] [log] [blame]
Laurence Lundbladecc2ed342018-09-22 17:29:55 -07001/*==============================================================================
Laurence Lundbladeee851742020-01-08 08:37:05 -08002 ieee754.c -- floating-point conversion between half, double & single-precision
Laurence Lundblade035bd782019-01-21 17:01:31 -08003
Laurence Lundbladeee851742020-01-08 08:37:05 -08004 Copyright (c) 2018-2020, Laurence Lundblade. All rights reserved.
Laurence Lundblade035bd782019-01-21 17:01:31 -08005
Laurence Lundbladea3fd49f2019-01-21 10:16:22 -08006 SPDX-License-Identifier: BSD-3-Clause
Laurence Lundblade035bd782019-01-21 17:01:31 -08007
Laurence Lundbladea3fd49f2019-01-21 10:16:22 -08008 See BSD-3-Clause license in README.md
Laurence Lundblade035bd782019-01-21 17:01:31 -08009
Laurence Lundbladea3fd49f2019-01-21 10:16:22 -080010 Created on 7/23/18
Laurence Lundbladeee851742020-01-08 08:37:05 -080011 =============================================================================*/
Laurence Lundblade12d32c52018-09-19 11:25:27 -070012
13#ifndef ieee754_h
14#define ieee754_h
15
16#include <stdint.h>
17
18
Laurence Lundblade8db3d3e2018-09-29 11:46:37 -070019
20/*
21 General comments
Laurence Lundblade3aee3a32018-12-17 16:17:45 -080022
Laurence Lundbladeee851742020-01-08 08:37:05 -080023 This is a complete in that it handles all conversion cases including
24 +/- infinity, +/- zero, subnormal numbers, qNaN, sNaN and NaN
25 payloads.
Laurence Lundblade3aee3a32018-12-17 16:17:45 -080026
Laurence Lundbladec5fef682020-01-25 11:38:45 -080027 This conforms to IEEE 754-2008, but note that this doesn't specify
Laurence Lundbladeee851742020-01-08 08:37:05 -080028 conversions, just the encodings.
Laurence Lundblade3aee3a32018-12-17 16:17:45 -080029
Laurence Lundbladeee851742020-01-08 08:37:05 -080030 NaN payloads are preserved with alignment on the LSB. The qNaN bit is
31 handled differently and explicity copied. It is always the MSB of the
32 significand. The NaN payload MSBs (except the qNaN bit) are truncated
33 when going from double or single to half.
Laurence Lundblade3aee3a32018-12-17 16:17:45 -080034
Laurence Lundblade8db3d3e2018-09-29 11:46:37 -070035 TODO: what does the C cast do with NaN payloads from
Laurence Lundbladeee851742020-01-08 08:37:05 -080036 double to single? It probably depends entirely on the
37 CPU.
Laurence Lundblade3aee3a32018-12-17 16:17:45 -080038
Laurence Lundblade8db3d3e2018-09-29 11:46:37 -070039 */
40
Laurence Lundblade12d32c52018-09-19 11:25:27 -070041/*
Laurence Lundbladeee851742020-01-08 08:37:05 -080042 Most simply just explicilty encode the type you want, single or
43 double. This works easily everywhere since standard C supports both
44 these types and so does qcbor. This encoder also supports half
45 precision and there's a few ways to use it to encode floating-point
46 numbers in less space.
Laurence Lundblade3aee3a32018-12-17 16:17:45 -080047
Laurence Lundbladeee851742020-01-08 08:37:05 -080048 Without losing precision, you can encode a single or double such that
49 the special values of 0, NaN and Infinity encode as half-precision.
50 This CBOR decodoer and most others should handle this properly.
Laurence Lundblade3aee3a32018-12-17 16:17:45 -080051
Laurence Lundblade12d32c52018-09-19 11:25:27 -070052 If you don't mind losing precision, then you can use half-precision.
53 One way to do this is to set up your environment to use
54 ___fp_16. Some compilers and CPUs support it even though it is not
Laurence Lundbladeee851742020-01-08 08:37:05 -080055 standard C. What is nice about this is that your program will use
56 less memory and floating-point operations like multiplying, adding
57 and such will be faster.
Laurence Lundblade3aee3a32018-12-17 16:17:45 -080058
Laurence Lundbladeee851742020-01-08 08:37:05 -080059 Another way to make use of half-precision is to represent the values
60 in your program as single or double, but encode them in CBOR as
61 half-precision. This cuts the size of the encoded messages by 2 or 4,
62 but doesn't reduce memory needs or speed because you are still using
Laurence Lundblade12d32c52018-09-19 11:25:27 -070063 single or double in your code.
Laurence Lundblade3aee3a32018-12-17 16:17:45 -080064
Laurence Lundblade12d32c52018-09-19 11:25:27 -070065 */
66
Laurence Lundblade8db3d3e2018-09-29 11:46:37 -070067
68
69/*
Laurence Lundbladeee851742020-01-08 08:37:05 -080070 Convert single-precision float to half-precision float. Precision
71 and NaN payload bits will be lost. Too-large values will round up to
72 infinity and too small to zero.
Laurence Lundblade8db3d3e2018-09-29 11:46:37 -070073 */
Laurence Lundbladecc2ed342018-09-22 17:29:55 -070074uint16_t IEEE754_FloatToHalf(float f);
Laurence Lundblade12d32c52018-09-19 11:25:27 -070075
Laurence Lundblade8db3d3e2018-09-29 11:46:37 -070076
77/*
Laurence Lundbladeee851742020-01-08 08:37:05 -080078 Convert half-precision float to single-precision float. This is a
79 loss-less conversion.
Laurence Lundblade8db3d3e2018-09-29 11:46:37 -070080 */
Laurence Lundblade12d32c52018-09-19 11:25:27 -070081float IEEE754_HalfToFloat(uint16_t uHalfPrecision);
82
Laurence Lundblade8db3d3e2018-09-29 11:46:37 -070083
84/*
Laurence Lundbladeee851742020-01-08 08:37:05 -080085 Convert double-precision float to half-precision float. Precision
86 and NaN payload bits will be lost. Too-large values will round up to
87 infinity and too small to zero.
Laurence Lundblade8db3d3e2018-09-29 11:46:37 -070088 */
Laurence Lundbladecc2ed342018-09-22 17:29:55 -070089uint16_t IEEE754_DoubleToHalf(double d);
Laurence Lundblade12d32c52018-09-19 11:25:27 -070090
Laurence Lundblade8db3d3e2018-09-29 11:46:37 -070091
92/*
Laurence Lundbladeee851742020-01-08 08:37:05 -080093 Convert half-precision float to double-precision float.
Laurence Lundblade8db3d3e2018-09-29 11:46:37 -070094 This is a loss-less conversion.
95 */
Laurence Lundblade12d32c52018-09-19 11:25:27 -070096double IEEE754_HalfToDouble(uint16_t uHalfPrecision);
97
98
99
Laurence Lundblade577d8212018-11-01 14:04:08 +0700100// Both tags the value and gives the size
101#define IEEE754_UNION_IS_HALF 2
102#define IEEE754_UNION_IS_SINGLE 4
103#define IEEE754_UNION_IS_DOUBLE 8
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700104
105typedef struct {
Laurence Lundblade577d8212018-11-01 14:04:08 +0700106 uint8_t uSize; // One of IEEE754_IS_xxxx
107 uint64_t uValue;
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700108} IEEE754_union;
109
110
Laurence Lundblade8db3d3e2018-09-29 11:46:37 -0700111/*
Laurence Lundbladeee851742020-01-08 08:37:05 -0800112 Converts double-precision to single-precision or half-precision if
113 possible without loss of precisions. If not, leaves it as a
114 double. Only converts to single-precision unless bAllowHalfPrecision
115 is set.
Laurence Lundblade8db3d3e2018-09-29 11:46:37 -0700116 */
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700117IEEE754_union IEEE754_DoubleToSmallestInternal(double d, int bAllowHalfPrecision);
118
119/*
Laurence Lundblade8db3d3e2018-09-29 11:46:37 -0700120 Converts double-precision to single-precision if possible without
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700121 loss of precision. If not, leaves it as a double.
122 */
123static inline IEEE754_union IEEE754_DoubleToSmall(double d)
124{
125 return IEEE754_DoubleToSmallestInternal(d, 0);
126}
127
128
129/*
Laurence Lundbladeee851742020-01-08 08:37:05 -0800130 Converts double-precision to single-precision or half-precision if
131 possible without loss of precisions. If not, leaves it as a double.
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700132 */
133static inline IEEE754_union IEEE754_DoubleToSmallest(double d)
134{
135 return IEEE754_DoubleToSmallestInternal(d, 1);
136}
137
Laurence Lundbladeee851742020-01-08 08:37:05 -0800138
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700139/*
Laurence Lundbladeee851742020-01-08 08:37:05 -0800140 Converts single-precision to half-precision if possible without loss
141 of precision. If not leaves as single-precision.
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700142 */
143IEEE754_union IEEE754_FloatToSmallest(float f);
144
145
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700146#endif /* ieee754_h */
147
148
149
150
151
152
153