blob: 47bfea56affe7ff9ecea30cfa20287814eb35714 [file] [log] [blame]
Laurence Lundbladecc2ed342018-09-22 17:29:55 -07001/*==============================================================================
Laurence Lundbladeee851742020-01-08 08:37:05 -08002 ieee754.c -- floating-point conversion between half, double & single-precision
Laurence Lundblade035bd782019-01-21 17:01:31 -08003
Laurence Lundbladeee851742020-01-08 08:37:05 -08004 Copyright (c) 2018-2020, Laurence Lundblade. All rights reserved.
Laurence Lundblade035bd782019-01-21 17:01:31 -08005
Laurence Lundbladea3fd49f2019-01-21 10:16:22 -08006 SPDX-License-Identifier: BSD-3-Clause
Laurence Lundblade035bd782019-01-21 17:01:31 -08007
Laurence Lundbladea3fd49f2019-01-21 10:16:22 -08008 See BSD-3-Clause license in README.md
Laurence Lundblade035bd782019-01-21 17:01:31 -08009
Laurence Lundbladea3fd49f2019-01-21 10:16:22 -080010 Created on 7/23/18
Laurence Lundbladeee851742020-01-08 08:37:05 -080011 =============================================================================*/
Laurence Lundblade12d32c52018-09-19 11:25:27 -070012
Laurence Lundblade9682a532020-06-06 18:33:04 -070013#ifndef QCBOR_CONFIG_DISABLE_ENCODE_IEEE754
14
Laurence Lundblade12d32c52018-09-19 11:25:27 -070015#ifndef ieee754_h
16#define ieee754_h
17
18#include <stdint.h>
19
20
Laurence Lundblade8db3d3e2018-09-29 11:46:37 -070021
22/*
23 General comments
Laurence Lundblade3aee3a32018-12-17 16:17:45 -080024
Laurence Lundbladeee851742020-01-08 08:37:05 -080025 This is a complete in that it handles all conversion cases including
26 +/- infinity, +/- zero, subnormal numbers, qNaN, sNaN and NaN
27 payloads.
Laurence Lundblade3aee3a32018-12-17 16:17:45 -080028
Laurence Lundbladec5fef682020-01-25 11:38:45 -080029 This conforms to IEEE 754-2008, but note that this doesn't specify
Laurence Lundbladeee851742020-01-08 08:37:05 -080030 conversions, just the encodings.
Laurence Lundblade3aee3a32018-12-17 16:17:45 -080031
Laurence Lundbladeee851742020-01-08 08:37:05 -080032 NaN payloads are preserved with alignment on the LSB. The qNaN bit is
33 handled differently and explicity copied. It is always the MSB of the
34 significand. The NaN payload MSBs (except the qNaN bit) are truncated
35 when going from double or single to half.
Laurence Lundblade3aee3a32018-12-17 16:17:45 -080036
Laurence Lundblade8db3d3e2018-09-29 11:46:37 -070037 TODO: what does the C cast do with NaN payloads from
Laurence Lundbladeee851742020-01-08 08:37:05 -080038 double to single? It probably depends entirely on the
39 CPU.
Laurence Lundblade3aee3a32018-12-17 16:17:45 -080040
Laurence Lundblade8db3d3e2018-09-29 11:46:37 -070041 */
42
Laurence Lundblade12d32c52018-09-19 11:25:27 -070043/*
Laurence Lundbladeee851742020-01-08 08:37:05 -080044 Most simply just explicilty encode the type you want, single or
45 double. This works easily everywhere since standard C supports both
46 these types and so does qcbor. This encoder also supports half
47 precision and there's a few ways to use it to encode floating-point
48 numbers in less space.
Laurence Lundblade3aee3a32018-12-17 16:17:45 -080049
Laurence Lundbladeee851742020-01-08 08:37:05 -080050 Without losing precision, you can encode a single or double such that
51 the special values of 0, NaN and Infinity encode as half-precision.
52 This CBOR decodoer and most others should handle this properly.
Laurence Lundblade3aee3a32018-12-17 16:17:45 -080053
Laurence Lundblade12d32c52018-09-19 11:25:27 -070054 If you don't mind losing precision, then you can use half-precision.
55 One way to do this is to set up your environment to use
56 ___fp_16. Some compilers and CPUs support it even though it is not
Laurence Lundbladeee851742020-01-08 08:37:05 -080057 standard C. What is nice about this is that your program will use
58 less memory and floating-point operations like multiplying, adding
59 and such will be faster.
Laurence Lundblade3aee3a32018-12-17 16:17:45 -080060
Laurence Lundbladeee851742020-01-08 08:37:05 -080061 Another way to make use of half-precision is to represent the values
62 in your program as single or double, but encode them in CBOR as
63 half-precision. This cuts the size of the encoded messages by 2 or 4,
64 but doesn't reduce memory needs or speed because you are still using
Laurence Lundblade12d32c52018-09-19 11:25:27 -070065 single or double in your code.
Laurence Lundblade3aee3a32018-12-17 16:17:45 -080066
Laurence Lundblade12d32c52018-09-19 11:25:27 -070067 */
68
Laurence Lundblade8db3d3e2018-09-29 11:46:37 -070069
70
71/*
Laurence Lundbladeee851742020-01-08 08:37:05 -080072 Convert single-precision float to half-precision float. Precision
73 and NaN payload bits will be lost. Too-large values will round up to
74 infinity and too small to zero.
Laurence Lundblade8db3d3e2018-09-29 11:46:37 -070075 */
Laurence Lundbladecc2ed342018-09-22 17:29:55 -070076uint16_t IEEE754_FloatToHalf(float f);
Laurence Lundblade12d32c52018-09-19 11:25:27 -070077
Laurence Lundblade8db3d3e2018-09-29 11:46:37 -070078
79/*
Laurence Lundbladeee851742020-01-08 08:37:05 -080080 Convert half-precision float to single-precision float. This is a
81 loss-less conversion.
Laurence Lundblade8db3d3e2018-09-29 11:46:37 -070082 */
Laurence Lundblade12d32c52018-09-19 11:25:27 -070083float IEEE754_HalfToFloat(uint16_t uHalfPrecision);
84
Laurence Lundblade8db3d3e2018-09-29 11:46:37 -070085
86/*
Laurence Lundbladeee851742020-01-08 08:37:05 -080087 Convert double-precision float to half-precision float. Precision
88 and NaN payload bits will be lost. Too-large values will round up to
89 infinity and too small to zero.
Laurence Lundblade8db3d3e2018-09-29 11:46:37 -070090 */
Laurence Lundbladecc2ed342018-09-22 17:29:55 -070091uint16_t IEEE754_DoubleToHalf(double d);
Laurence Lundblade12d32c52018-09-19 11:25:27 -070092
Laurence Lundblade8db3d3e2018-09-29 11:46:37 -070093
94/*
Laurence Lundbladeee851742020-01-08 08:37:05 -080095 Convert half-precision float to double-precision float.
Laurence Lundblade8db3d3e2018-09-29 11:46:37 -070096 This is a loss-less conversion.
97 */
Laurence Lundblade12d32c52018-09-19 11:25:27 -070098double IEEE754_HalfToDouble(uint16_t uHalfPrecision);
99
100
Laurence Lundblade9682a532020-06-06 18:33:04 -0700101/*
102 Convert float to double-precision without using any
103 floating-point HW or compiler-supplied SW.
104 This is a loss-less conversion.
105 */
106double IEEE754_FloatToDouble(float f);
107
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700108
Laurence Lundblade577d8212018-11-01 14:04:08 +0700109// Both tags the value and gives the size
110#define IEEE754_UNION_IS_HALF 2
111#define IEEE754_UNION_IS_SINGLE 4
112#define IEEE754_UNION_IS_DOUBLE 8
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700113
114typedef struct {
Laurence Lundblade577d8212018-11-01 14:04:08 +0700115 uint8_t uSize; // One of IEEE754_IS_xxxx
116 uint64_t uValue;
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700117} IEEE754_union;
118
119
Laurence Lundblade8db3d3e2018-09-29 11:46:37 -0700120/*
Laurence Lundbladeee851742020-01-08 08:37:05 -0800121 Converts double-precision to single-precision or half-precision if
122 possible without loss of precisions. If not, leaves it as a
123 double. Only converts to single-precision unless bAllowHalfPrecision
124 is set.
Laurence Lundblade8db3d3e2018-09-29 11:46:37 -0700125 */
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700126IEEE754_union IEEE754_DoubleToSmallestInternal(double d, int bAllowHalfPrecision);
127
128/*
Laurence Lundblade8db3d3e2018-09-29 11:46:37 -0700129 Converts double-precision to single-precision if possible without
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700130 loss of precision. If not, leaves it as a double.
131 */
132static inline IEEE754_union IEEE754_DoubleToSmall(double d)
133{
134 return IEEE754_DoubleToSmallestInternal(d, 0);
135}
136
137
138/*
Laurence Lundbladeee851742020-01-08 08:37:05 -0800139 Converts double-precision to single-precision or half-precision if
140 possible without loss of precisions. If not, leaves it as a double.
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700141 */
142static inline IEEE754_union IEEE754_DoubleToSmallest(double d)
143{
144 return IEEE754_DoubleToSmallestInternal(d, 1);
145}
146
Laurence Lundbladeee851742020-01-08 08:37:05 -0800147
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700148/*
Laurence Lundbladeee851742020-01-08 08:37:05 -0800149 Converts single-precision to half-precision if possible without loss
150 of precision. If not leaves as single-precision.
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700151 */
152IEEE754_union IEEE754_FloatToSmallest(float f);
153
154
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700155#endif /* ieee754_h */
156
157
Laurence Lundblade9682a532020-06-06 18:33:04 -0700158#endif /* QCBOR_CONFIG_DISABLE_ENCODE_IEEE754 */
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700159
160
161
162