blob: fc94646ee37b11a68c8067fa03ff4733e233582c [file] [log] [blame]
Laurence Lundbladecc2ed342018-09-22 17:29:55 -07001/*==============================================================================
Laurence Lundblade3aee3a32018-12-17 16:17:45 -08002
Laurence Lundbladed92a6162018-11-01 11:38:35 +07003 Copyright (c) 2018, Laurence Lundblade.
4 All rights reserved.
Laurence Lundblade0dbc9172018-11-01 14:17:21 +07005
6Redistribution and use in source and binary forms, with or without
7modification, are permitted provided that the following conditions are
8met:
9 * Redistributions of source code must retain the above copyright
10 notice, this list of conditions and the following disclaimer.
11 * Redistributions in binary form must reproduce the above
12 copyright notice, this list of conditions and the following
13 disclaimer in the documentation and/or other materials provided
14 with the distribution.
15 * The name "Laurence Lundblade" may not be used to
16 endorse or promote products derived from this software without
17 specific prior written permission.
Laurence Lundblade3aee3a32018-12-17 16:17:45 -080018
Laurence Lundblade0dbc9172018-11-01 14:17:21 +070019THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED
20WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
21MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT
22ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
23BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
26BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
28OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
29IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Laurence Lundbladecc2ed342018-09-22 17:29:55 -070030 ==============================================================================*/
Laurence Lundblade12d32c52018-09-19 11:25:27 -070031//
32// ieee754.h
33// Indefinite
34//
35// Created by Laurence Lundblade on 7/23/18.
36// Copyright © 2018 Laurence Lundblade. All rights reserved.
37//
38
39#ifndef ieee754_h
40#define ieee754_h
41
42#include <stdint.h>
43
44
Laurence Lundblade8db3d3e2018-09-29 11:46:37 -070045
46/*
47 General comments
Laurence Lundblade3aee3a32018-12-17 16:17:45 -080048
Laurence Lundblade8db3d3e2018-09-29 11:46:37 -070049 This is a complete in that it handles all conversion cases
50 including +/- infinity, +/- zero, subnormal numbers, qNaN, sNaN
51 and NaN payloads.
Laurence Lundblade3aee3a32018-12-17 16:17:45 -080052
Laurence Lundblade8db3d3e2018-09-29 11:46:37 -070053 This confirms to IEEE 754-2008, but note that this doesn't
54 specify conversions, just the encodings.
Laurence Lundblade3aee3a32018-12-17 16:17:45 -080055
Laurence Lundblade8db3d3e2018-09-29 11:46:37 -070056 NaN payloads are preserved with alignment on the LSB. The
57 qNaN bit is handled differently and explicity copied. It
58 is always the MSB of the significand. The NaN payload MSBs
59 (except the qNaN bit) are truncated when going from
60 double or single to half.
Laurence Lundblade3aee3a32018-12-17 16:17:45 -080061
Laurence Lundblade8db3d3e2018-09-29 11:46:37 -070062 TODO: what does the C cast do with NaN payloads from
63 double to single?
Laurence Lundblade3aee3a32018-12-17 16:17:45 -080064
65
66
Laurence Lundblade8db3d3e2018-09-29 11:46:37 -070067 */
68
Laurence Lundblade12d32c52018-09-19 11:25:27 -070069/*
70 Most simply just explicilty encode the type you want, single or double.
71 This works easily everywhere since standard C supports both
72 these types and so does qcbor. This encoder also supports
73 half precision and there's a few ways to use it to encode
74 floating point numbers in less space.
Laurence Lundblade3aee3a32018-12-17 16:17:45 -080075
Laurence Lundblade12d32c52018-09-19 11:25:27 -070076 Without losing precision, you can encode a single or double
77 such that the special values of 0, NaN and Infinity encode
78 as half-precision. This CBOR decodoer and most others
79 should handle this properly.
Laurence Lundblade3aee3a32018-12-17 16:17:45 -080080
Laurence Lundblade12d32c52018-09-19 11:25:27 -070081 If you don't mind losing precision, then you can use half-precision.
82 One way to do this is to set up your environment to use
83 ___fp_16. Some compilers and CPUs support it even though it is not
84 standard C. What is nice about this is that your program
85 will use less memory and floating point operations like
86 multiplying, adding and such will be faster.
Laurence Lundblade3aee3a32018-12-17 16:17:45 -080087
Laurence Lundblade12d32c52018-09-19 11:25:27 -070088 Another way to make use of half-precision is to represent
89 the values in your program as single or double, but encode
90 them in CBOR as half-precision. This cuts the size
91 of the encoded messages by 2 or 4, but doesn't reduce
92 memory needs or speed because you are still using
93 single or double in your code.
Laurence Lundblade3aee3a32018-12-17 16:17:45 -080094
Laurence Lundblade12d32c52018-09-19 11:25:27 -070095
96 encode:
97 - float as float
98 - double as double
99 - half as half
100 - float as half_precision, for environments that don't support a half-precision type
101 - double as half_precision, for environments that don't support a half-precision type
102 - float with NaN, Infinity and 0 as half
103 - double with NaN, Infinity and 0 as half
Laurence Lundblade3aee3a32018-12-17 16:17:45 -0800104
105
106
107
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700108 */
109
Laurence Lundblade8db3d3e2018-09-29 11:46:37 -0700110
111
112/*
113 Convert single precision float to half-precision float.
114 Precision and NaN payload bits will be lost. Too large
115 values will round up to infinity and too small to zero.
116 */
Laurence Lundbladecc2ed342018-09-22 17:29:55 -0700117uint16_t IEEE754_FloatToHalf(float f);
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700118
Laurence Lundblade8db3d3e2018-09-29 11:46:37 -0700119
120/*
121 Convert half precision float to single precision float.
122 This is a loss-less conversion.
123 */
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700124float IEEE754_HalfToFloat(uint16_t uHalfPrecision);
125
Laurence Lundblade8db3d3e2018-09-29 11:46:37 -0700126
127/*
128 Convert double precision float to half-precision float.
129 Precision and NaN payload bits will be lost. Too large
130 values will round up to infinity and too small to zero.
131 */
Laurence Lundbladecc2ed342018-09-22 17:29:55 -0700132uint16_t IEEE754_DoubleToHalf(double d);
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700133
Laurence Lundblade8db3d3e2018-09-29 11:46:37 -0700134
135/*
136 Convert half precision float to double precision float.
137 This is a loss-less conversion.
138 */
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700139double IEEE754_HalfToDouble(uint16_t uHalfPrecision);
140
141
142
Laurence Lundblade577d8212018-11-01 14:04:08 +0700143// Both tags the value and gives the size
144#define IEEE754_UNION_IS_HALF 2
145#define IEEE754_UNION_IS_SINGLE 4
146#define IEEE754_UNION_IS_DOUBLE 8
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700147
148typedef struct {
Laurence Lundblade577d8212018-11-01 14:04:08 +0700149 uint8_t uSize; // One of IEEE754_IS_xxxx
150 uint64_t uValue;
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700151} IEEE754_union;
152
153
Laurence Lundblade8db3d3e2018-09-29 11:46:37 -0700154/*
155 Converts double-precision to single-precision or half-precision if possible without
156 loss of precisions. If not, leaves it as a double. Only converts to single-precision
157 unless bAllowHalfPrecision is set.
158 */
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700159IEEE754_union IEEE754_DoubleToSmallestInternal(double d, int bAllowHalfPrecision);
160
161/*
Laurence Lundblade8db3d3e2018-09-29 11:46:37 -0700162 Converts double-precision to single-precision if possible without
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700163 loss of precision. If not, leaves it as a double.
164 */
165static inline IEEE754_union IEEE754_DoubleToSmall(double d)
166{
167 return IEEE754_DoubleToSmallestInternal(d, 0);
168}
169
170
171/*
Laurence Lundblade8db3d3e2018-09-29 11:46:37 -0700172 Converts double-precision to single-precision or half-precision if possible without
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700173 loss of precisions. If not, leaves it as a double.
174 */
175static inline IEEE754_union IEEE754_DoubleToSmallest(double d)
176{
177 return IEEE754_DoubleToSmallestInternal(d, 1);
178}
179
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700180/*
181 Converts single-precision to half-precision if possible without
182 loss of precision. If not leaves as single-precision.
183 */
184IEEE754_union IEEE754_FloatToSmallest(float f);
185
186
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700187#endif /* ieee754_h */
188
189
190
191
192
193
194