blob: d61482565e85db9727029bf35c696d2596cffcd5 [file] [log] [blame]
Laurence Lundbladecc2ed342018-09-22 17:29:55 -07001/*==============================================================================
Laurence Lundbladeee851742020-01-08 08:37:05 -08002 ieee754.c -- floating-point conversion between half, double & single-precision
Laurence Lundblade035bd782019-01-21 17:01:31 -08003
Laurence Lundbladeee851742020-01-08 08:37:05 -08004 Copyright (c) 2018-2020, Laurence Lundblade. All rights reserved.
Laurence Lundblade035bd782019-01-21 17:01:31 -08005
Laurence Lundbladea3fd49f2019-01-21 10:16:22 -08006 SPDX-License-Identifier: BSD-3-Clause
Laurence Lundblade035bd782019-01-21 17:01:31 -08007
Laurence Lundbladea3fd49f2019-01-21 10:16:22 -08008 See BSD-3-Clause license in README.md
Laurence Lundblade035bd782019-01-21 17:01:31 -08009
Laurence Lundbladea3fd49f2019-01-21 10:16:22 -080010 Created on 7/23/18
Laurence Lundbladeee851742020-01-08 08:37:05 -080011 =============================================================================*/
Laurence Lundblade12d32c52018-09-19 11:25:27 -070012
Laurence Lundbladeb275cdc2020-07-12 12:34:38 -070013#ifndef QCBOR_DISABLE_PREFERRED_FLOAT
Laurence Lundblade9682a532020-06-06 18:33:04 -070014
Laurence Lundblade12d32c52018-09-19 11:25:27 -070015#ifndef ieee754_h
16#define ieee754_h
17
18#include <stdint.h>
19
20
Laurence Lundblade8db3d3e2018-09-29 11:46:37 -070021
22/*
23 General comments
Laurence Lundblade3aee3a32018-12-17 16:17:45 -080024
Laurence Lundbladeee851742020-01-08 08:37:05 -080025 This is a complete in that it handles all conversion cases including
26 +/- infinity, +/- zero, subnormal numbers, qNaN, sNaN and NaN
27 payloads.
Laurence Lundblade3aee3a32018-12-17 16:17:45 -080028
Laurence Lundbladec5fef682020-01-25 11:38:45 -080029 This conforms to IEEE 754-2008, but note that this doesn't specify
Laurence Lundbladeee851742020-01-08 08:37:05 -080030 conversions, just the encodings.
Laurence Lundblade3aee3a32018-12-17 16:17:45 -080031
Laurence Lundbladeee851742020-01-08 08:37:05 -080032 NaN payloads are preserved with alignment on the LSB. The qNaN bit is
33 handled differently and explicity copied. It is always the MSB of the
34 significand. The NaN payload MSBs (except the qNaN bit) are truncated
35 when going from double or single to half.
Laurence Lundblade3aee3a32018-12-17 16:17:45 -080036
Laurence Lundblade8db3d3e2018-09-29 11:46:37 -070037 TODO: what does the C cast do with NaN payloads from
Laurence Lundbladeee851742020-01-08 08:37:05 -080038 double to single? It probably depends entirely on the
39 CPU.
Laurence Lundblade3aee3a32018-12-17 16:17:45 -080040
Laurence Lundblade8db3d3e2018-09-29 11:46:37 -070041 */
42
Laurence Lundblade12d32c52018-09-19 11:25:27 -070043/*
Laurence Lundbladeee851742020-01-08 08:37:05 -080044 Most simply just explicilty encode the type you want, single or
45 double. This works easily everywhere since standard C supports both
46 these types and so does qcbor. This encoder also supports half
47 precision and there's a few ways to use it to encode floating-point
48 numbers in less space.
Laurence Lundblade3aee3a32018-12-17 16:17:45 -080049
Laurence Lundbladeee851742020-01-08 08:37:05 -080050 Without losing precision, you can encode a single or double such that
51 the special values of 0, NaN and Infinity encode as half-precision.
52 This CBOR decodoer and most others should handle this properly.
Laurence Lundblade3aee3a32018-12-17 16:17:45 -080053
Laurence Lundblade12d32c52018-09-19 11:25:27 -070054 If you don't mind losing precision, then you can use half-precision.
55 One way to do this is to set up your environment to use
56 ___fp_16. Some compilers and CPUs support it even though it is not
Laurence Lundbladeee851742020-01-08 08:37:05 -080057 standard C. What is nice about this is that your program will use
58 less memory and floating-point operations like multiplying, adding
59 and such will be faster.
Laurence Lundblade3aee3a32018-12-17 16:17:45 -080060
Laurence Lundbladeee851742020-01-08 08:37:05 -080061 Another way to make use of half-precision is to represent the values
62 in your program as single or double, but encode them in CBOR as
63 half-precision. This cuts the size of the encoded messages by 2 or 4,
64 but doesn't reduce memory needs or speed because you are still using
Laurence Lundblade12d32c52018-09-19 11:25:27 -070065 single or double in your code.
Laurence Lundblade3aee3a32018-12-17 16:17:45 -080066
Laurence Lundblade12d32c52018-09-19 11:25:27 -070067 */
68
Laurence Lundblade8db3d3e2018-09-29 11:46:37 -070069
70
71/*
Laurence Lundbladeee851742020-01-08 08:37:05 -080072 Convert single-precision float to half-precision float. Precision
73 and NaN payload bits will be lost. Too-large values will round up to
74 infinity and too small to zero.
Laurence Lundblade8db3d3e2018-09-29 11:46:37 -070075 */
Laurence Lundbladecc2ed342018-09-22 17:29:55 -070076uint16_t IEEE754_FloatToHalf(float f);
Laurence Lundblade12d32c52018-09-19 11:25:27 -070077
Laurence Lundblade8db3d3e2018-09-29 11:46:37 -070078
79/*
Laurence Lundbladeee851742020-01-08 08:37:05 -080080 Convert double-precision float to half-precision float. Precision
81 and NaN payload bits will be lost. Too-large values will round up to
82 infinity and too small to zero.
Laurence Lundblade8db3d3e2018-09-29 11:46:37 -070083 */
Laurence Lundbladecc2ed342018-09-22 17:29:55 -070084uint16_t IEEE754_DoubleToHalf(double d);
Laurence Lundblade12d32c52018-09-19 11:25:27 -070085
Laurence Lundblade8db3d3e2018-09-29 11:46:37 -070086
87/*
Laurence Lundbladeee851742020-01-08 08:37:05 -080088 Convert half-precision float to double-precision float.
Laurence Lundblade8db3d3e2018-09-29 11:46:37 -070089 This is a loss-less conversion.
90 */
Laurence Lundblade12d32c52018-09-19 11:25:27 -070091double IEEE754_HalfToDouble(uint16_t uHalfPrecision);
92
93
Laurence Lundblade9682a532020-06-06 18:33:04 -070094/*
95 Convert float to double-precision without using any
96 floating-point HW or compiler-supplied SW.
97 This is a loss-less conversion.
98 */
Laurence Lundblade8fa7d5d2020-07-11 16:30:47 -070099double IEEE754_FloatToDouble(uint32_t ufloat);
Laurence Lundblade9682a532020-06-06 18:33:04 -0700100
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700101
Laurence Lundblade577d8212018-11-01 14:04:08 +0700102// Both tags the value and gives the size
103#define IEEE754_UNION_IS_HALF 2
104#define IEEE754_UNION_IS_SINGLE 4
105#define IEEE754_UNION_IS_DOUBLE 8
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700106
107typedef struct {
Laurence Lundblade577d8212018-11-01 14:04:08 +0700108 uint8_t uSize; // One of IEEE754_IS_xxxx
109 uint64_t uValue;
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700110} IEEE754_union;
111
112
Laurence Lundblade8db3d3e2018-09-29 11:46:37 -0700113/*
Laurence Lundbladeee851742020-01-08 08:37:05 -0800114 Converts double-precision to single-precision or half-precision if
115 possible without loss of precisions. If not, leaves it as a
116 double. Only converts to single-precision unless bAllowHalfPrecision
117 is set.
Laurence Lundblade8db3d3e2018-09-29 11:46:37 -0700118 */
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700119IEEE754_union IEEE754_DoubleToSmallestInternal(double d, int bAllowHalfPrecision);
120
121/*
Laurence Lundblade8db3d3e2018-09-29 11:46:37 -0700122 Converts double-precision to single-precision if possible without
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700123 loss of precision. If not, leaves it as a double.
124 */
125static inline IEEE754_union IEEE754_DoubleToSmall(double d)
126{
127 return IEEE754_DoubleToSmallestInternal(d, 0);
128}
129
130
131/*
Laurence Lundbladeee851742020-01-08 08:37:05 -0800132 Converts double-precision to single-precision or half-precision if
133 possible without loss of precisions. If not, leaves it as a double.
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700134 */
135static inline IEEE754_union IEEE754_DoubleToSmallest(double d)
136{
137 return IEEE754_DoubleToSmallestInternal(d, 1);
138}
139
Laurence Lundbladeee851742020-01-08 08:37:05 -0800140
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700141/*
Laurence Lundbladeee851742020-01-08 08:37:05 -0800142 Converts single-precision to half-precision if possible without loss
143 of precision. If not leaves as single-precision.
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700144 */
145IEEE754_union IEEE754_FloatToSmallest(float f);
146
147
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700148#endif /* ieee754_h */
149
150
Laurence Lundbladeb275cdc2020-07-12 12:34:38 -0700151#endif /* QCBOR_DISABLE_PREFERRED_FLOAT */
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700152
153
154
155