blob: f6824bae8fde5d23db29e8cbb1f3a7980f95f58e [file] [log] [blame]
Laurence Lundbladecc2ed342018-09-22 17:29:55 -07001/*==============================================================================
2 Copyright 2018 Laurence Lundblade
3
4 Permission is hereby granted, free of charge, to any person obtaining
5 a copy of this software and associated documentation files (the
6 "Software"), to deal in the Software without restriction, including
7 without limitation the rights to use, copy, modify, merge, publish,
8 distribute, sublicense, and/or sell copies of the Software, and to
9 permit persons to whom the Software is furnished to do so, subject to
10 the following conditions:
11
12 The above copyright notice and this permission notice shall be included
13 in all copies or substantial portions of the Software.
14
15 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19 BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 SOFTWARE.
23
24 (This is the MIT license)
25 ==============================================================================*/
Laurence Lundblade12d32c52018-09-19 11:25:27 -070026//
27// ieee754.h
28// Indefinite
29//
30// Created by Laurence Lundblade on 7/23/18.
31// Copyright © 2018 Laurence Lundblade. All rights reserved.
32//
33
34#ifndef ieee754_h
35#define ieee754_h
36
37#include <stdint.h>
38
39
Laurence Lundblade8db3d3e2018-09-29 11:46:37 -070040
41/*
42 General comments
43
44 This is a complete in that it handles all conversion cases
45 including +/- infinity, +/- zero, subnormal numbers, qNaN, sNaN
46 and NaN payloads.
47
48 This confirms to IEEE 754-2008, but note that this doesn't
49 specify conversions, just the encodings.
50
51 NaN payloads are preserved with alignment on the LSB. The
52 qNaN bit is handled differently and explicity copied. It
53 is always the MSB of the significand. The NaN payload MSBs
54 (except the qNaN bit) are truncated when going from
55 double or single to half.
56
57 TODO: what does the C cast do with NaN payloads from
58 double to single?
59
60
61
62 */
63
Laurence Lundblade12d32c52018-09-19 11:25:27 -070064/*
65 Most simply just explicilty encode the type you want, single or double.
66 This works easily everywhere since standard C supports both
67 these types and so does qcbor. This encoder also supports
68 half precision and there's a few ways to use it to encode
69 floating point numbers in less space.
70
71 Without losing precision, you can encode a single or double
72 such that the special values of 0, NaN and Infinity encode
73 as half-precision. This CBOR decodoer and most others
74 should handle this properly.
75
76 If you don't mind losing precision, then you can use half-precision.
77 One way to do this is to set up your environment to use
78 ___fp_16. Some compilers and CPUs support it even though it is not
79 standard C. What is nice about this is that your program
80 will use less memory and floating point operations like
81 multiplying, adding and such will be faster.
82
83 Another way to make use of half-precision is to represent
84 the values in your program as single or double, but encode
85 them in CBOR as half-precision. This cuts the size
86 of the encoded messages by 2 or 4, but doesn't reduce
87 memory needs or speed because you are still using
88 single or double in your code.
89
90
91 encode:
92 - float as float
93 - double as double
94 - half as half
95 - float as half_precision, for environments that don't support a half-precision type
96 - double as half_precision, for environments that don't support a half-precision type
97 - float with NaN, Infinity and 0 as half
98 - double with NaN, Infinity and 0 as half
99
100
101
102
103 */
104
Laurence Lundblade8db3d3e2018-09-29 11:46:37 -0700105
106
107/*
108 Convert single precision float to half-precision float.
109 Precision and NaN payload bits will be lost. Too large
110 values will round up to infinity and too small to zero.
111 */
Laurence Lundbladecc2ed342018-09-22 17:29:55 -0700112uint16_t IEEE754_FloatToHalf(float f);
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700113
Laurence Lundblade8db3d3e2018-09-29 11:46:37 -0700114
115/*
116 Convert half precision float to single precision float.
117 This is a loss-less conversion.
118 */
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700119float IEEE754_HalfToFloat(uint16_t uHalfPrecision);
120
Laurence Lundblade8db3d3e2018-09-29 11:46:37 -0700121
122/*
123 Convert double precision float to half-precision float.
124 Precision and NaN payload bits will be lost. Too large
125 values will round up to infinity and too small to zero.
126 */
Laurence Lundbladecc2ed342018-09-22 17:29:55 -0700127uint16_t IEEE754_DoubleToHalf(double d);
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700128
Laurence Lundblade8db3d3e2018-09-29 11:46:37 -0700129
130/*
131 Convert half precision float to double precision float.
132 This is a loss-less conversion.
133 */
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700134double IEEE754_HalfToDouble(uint16_t uHalfPrecision);
135
136
137
138
139#define IEEE754_UNION_IS_HALF 0
140#define IEEE754_UNION_IS_SINGLE 1
141#define IEEE754_UNION_IS_DOUBLE 2
142
143typedef struct {
144 uint8_t uTag; // One of IEEE754_IS_xxxx
145 union {
146 uint16_t u16;
147 uint32_t u32;
148 uint64_t u64;
149 };
150} IEEE754_union;
151
152
Laurence Lundblade8db3d3e2018-09-29 11:46:37 -0700153/*
154 Converts double-precision to single-precision or half-precision if possible without
155 loss of precisions. If not, leaves it as a double. Only converts to single-precision
156 unless bAllowHalfPrecision is set.
157 */
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700158IEEE754_union IEEE754_DoubleToSmallestInternal(double d, int bAllowHalfPrecision);
159
160/*
Laurence Lundblade8db3d3e2018-09-29 11:46:37 -0700161 Converts double-precision to single-precision if possible without
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700162 loss of precision. If not, leaves it as a double.
163 */
164static inline IEEE754_union IEEE754_DoubleToSmall(double d)
165{
166 return IEEE754_DoubleToSmallestInternal(d, 0);
167}
168
169
170/*
Laurence Lundblade8db3d3e2018-09-29 11:46:37 -0700171 Converts double-precision to single-precision or half-precision if possible without
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700172 loss of precisions. If not, leaves it as a double.
173 */
174static inline IEEE754_union IEEE754_DoubleToSmallest(double d)
175{
176 return IEEE754_DoubleToSmallestInternal(d, 1);
177}
178
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700179/*
180 Converts single-precision to half-precision if possible without
181 loss of precision. If not leaves as single-precision.
182 */
183IEEE754_union IEEE754_FloatToSmallest(float f);
184
185
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700186#endif /* ieee754_h */
187
188
189
190
191
192
193