blob: d37532a092c63b72ca1d6da79c912d9a76276441 [file] [log] [blame]
Laurence Lundbladecc2ed342018-09-22 17:29:55 -07001/*==============================================================================
Laurence Lundbladeee851742020-01-08 08:37:05 -08002 ieee754.c -- floating-point conversion between half, double & single-precision
Laurence Lundblade035bd782019-01-21 17:01:31 -08003
Laurence Lundbladeee851742020-01-08 08:37:05 -08004 Copyright (c) 2018-2020, Laurence Lundblade. All rights reserved.
Laurence Lundblade035bd782019-01-21 17:01:31 -08005
Laurence Lundbladea3fd49f2019-01-21 10:16:22 -08006 SPDX-License-Identifier: BSD-3-Clause
Laurence Lundblade035bd782019-01-21 17:01:31 -08007
Laurence Lundbladea3fd49f2019-01-21 10:16:22 -08008 See BSD-3-Clause license in README.md
Laurence Lundblade035bd782019-01-21 17:01:31 -08009
Laurence Lundbladea3fd49f2019-01-21 10:16:22 -080010 Created on 7/23/18
Laurence Lundbladeee851742020-01-08 08:37:05 -080011 =============================================================================*/
Laurence Lundblade12d32c52018-09-19 11:25:27 -070012
Laurence Lundbladeb275cdc2020-07-12 12:34:38 -070013#ifndef QCBOR_DISABLE_PREFERRED_FLOAT
Laurence Lundblade9682a532020-06-06 18:33:04 -070014
Laurence Lundblade12d32c52018-09-19 11:25:27 -070015#ifndef ieee754_h
16#define ieee754_h
17
18#include <stdint.h>
19
20
Laurence Lundblade8db3d3e2018-09-29 11:46:37 -070021
22/*
23 General comments
Laurence Lundblade3aee3a32018-12-17 16:17:45 -080024
Laurence Lundbladeee851742020-01-08 08:37:05 -080025 This is a complete in that it handles all conversion cases including
26 +/- infinity, +/- zero, subnormal numbers, qNaN, sNaN and NaN
27 payloads.
Laurence Lundblade3aee3a32018-12-17 16:17:45 -080028
Laurence Lundbladec5fef682020-01-25 11:38:45 -080029 This conforms to IEEE 754-2008, but note that this doesn't specify
Laurence Lundbladeee851742020-01-08 08:37:05 -080030 conversions, just the encodings.
Laurence Lundblade3aee3a32018-12-17 16:17:45 -080031
Laurence Lundbladeee851742020-01-08 08:37:05 -080032 NaN payloads are preserved with alignment on the LSB. The qNaN bit is
33 handled differently and explicity copied. It is always the MSB of the
34 significand. The NaN payload MSBs (except the qNaN bit) are truncated
35 when going from double or single to half.
Laurence Lundblade3aee3a32018-12-17 16:17:45 -080036
Laurence Lundblade8db3d3e2018-09-29 11:46:37 -070037 TODO: what does the C cast do with NaN payloads from
Laurence Lundbladeee851742020-01-08 08:37:05 -080038 double to single? It probably depends entirely on the
39 CPU.
Laurence Lundblade3aee3a32018-12-17 16:17:45 -080040
Laurence Lundblade8db3d3e2018-09-29 11:46:37 -070041 */
42
Laurence Lundblade12d32c52018-09-19 11:25:27 -070043/*
Laurence Lundbladeee851742020-01-08 08:37:05 -080044 Most simply just explicilty encode the type you want, single or
45 double. This works easily everywhere since standard C supports both
46 these types and so does qcbor. This encoder also supports half
47 precision and there's a few ways to use it to encode floating-point
48 numbers in less space.
Laurence Lundblade3aee3a32018-12-17 16:17:45 -080049
Laurence Lundbladeee851742020-01-08 08:37:05 -080050 Without losing precision, you can encode a single or double such that
51 the special values of 0, NaN and Infinity encode as half-precision.
52 This CBOR decodoer and most others should handle this properly.
Laurence Lundblade3aee3a32018-12-17 16:17:45 -080053
Laurence Lundblade12d32c52018-09-19 11:25:27 -070054 If you don't mind losing precision, then you can use half-precision.
55 One way to do this is to set up your environment to use
56 ___fp_16. Some compilers and CPUs support it even though it is not
Laurence Lundbladeee851742020-01-08 08:37:05 -080057 standard C. What is nice about this is that your program will use
58 less memory and floating-point operations like multiplying, adding
59 and such will be faster.
Laurence Lundblade3aee3a32018-12-17 16:17:45 -080060
Laurence Lundbladeee851742020-01-08 08:37:05 -080061 Another way to make use of half-precision is to represent the values
62 in your program as single or double, but encode them in CBOR as
63 half-precision. This cuts the size of the encoded messages by 2 or 4,
64 but doesn't reduce memory needs or speed because you are still using
Laurence Lundblade12d32c52018-09-19 11:25:27 -070065 single or double in your code.
Laurence Lundblade3aee3a32018-12-17 16:17:45 -080066
Laurence Lundblade12d32c52018-09-19 11:25:27 -070067 */
68
Laurence Lundblade8db3d3e2018-09-29 11:46:37 -070069
70
71/*
Laurence Lundbladeee851742020-01-08 08:37:05 -080072 Convert single-precision float to half-precision float. Precision
73 and NaN payload bits will be lost. Too-large values will round up to
74 infinity and too small to zero.
Laurence Lundblade8db3d3e2018-09-29 11:46:37 -070075 */
Laurence Lundbladecc2ed342018-09-22 17:29:55 -070076uint16_t IEEE754_FloatToHalf(float f);
Laurence Lundblade12d32c52018-09-19 11:25:27 -070077
Laurence Lundblade8db3d3e2018-09-29 11:46:37 -070078
79/*
Laurence Lundbladeee851742020-01-08 08:37:05 -080080 Convert double-precision float to half-precision float. Precision
81 and NaN payload bits will be lost. Too-large values will round up to
82 infinity and too small to zero.
Laurence Lundblade8db3d3e2018-09-29 11:46:37 -070083 */
Laurence Lundbladecc2ed342018-09-22 17:29:55 -070084uint16_t IEEE754_DoubleToHalf(double d);
Laurence Lundblade12d32c52018-09-19 11:25:27 -070085
Laurence Lundblade8db3d3e2018-09-29 11:46:37 -070086
87/*
Laurence Lundbladeee851742020-01-08 08:37:05 -080088 Convert half-precision float to double-precision float.
Laurence Lundblade8db3d3e2018-09-29 11:46:37 -070089 This is a loss-less conversion.
90 */
Laurence Lundblade12d32c52018-09-19 11:25:27 -070091double IEEE754_HalfToDouble(uint16_t uHalfPrecision);
92
93
Laurence Lundblade577d8212018-11-01 14:04:08 +070094// Both tags the value and gives the size
95#define IEEE754_UNION_IS_HALF 2
96#define IEEE754_UNION_IS_SINGLE 4
97#define IEEE754_UNION_IS_DOUBLE 8
Laurence Lundblade12d32c52018-09-19 11:25:27 -070098
99typedef struct {
Laurence Lundblade577d8212018-11-01 14:04:08 +0700100 uint8_t uSize; // One of IEEE754_IS_xxxx
101 uint64_t uValue;
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700102} IEEE754_union;
103
104
Laurence Lundblade8db3d3e2018-09-29 11:46:37 -0700105/*
Laurence Lundbladeee851742020-01-08 08:37:05 -0800106 Converts double-precision to single-precision or half-precision if
107 possible without loss of precisions. If not, leaves it as a
108 double. Only converts to single-precision unless bAllowHalfPrecision
109 is set.
Laurence Lundblade8db3d3e2018-09-29 11:46:37 -0700110 */
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700111IEEE754_union IEEE754_DoubleToSmallestInternal(double d, int bAllowHalfPrecision);
112
113/*
Laurence Lundblade8db3d3e2018-09-29 11:46:37 -0700114 Converts double-precision to single-precision if possible without
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700115 loss of precision. If not, leaves it as a double.
116 */
117static inline IEEE754_union IEEE754_DoubleToSmall(double d)
118{
119 return IEEE754_DoubleToSmallestInternal(d, 0);
120}
121
122
123/*
Laurence Lundbladeee851742020-01-08 08:37:05 -0800124 Converts double-precision to single-precision or half-precision if
125 possible without loss of precisions. If not, leaves it as a double.
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700126 */
127static inline IEEE754_union IEEE754_DoubleToSmallest(double d)
128{
129 return IEEE754_DoubleToSmallestInternal(d, 1);
130}
131
Laurence Lundbladeee851742020-01-08 08:37:05 -0800132
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700133/*
Laurence Lundbladeee851742020-01-08 08:37:05 -0800134 Converts single-precision to half-precision if possible without loss
135 of precision. If not leaves as single-precision.
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700136 */
137IEEE754_union IEEE754_FloatToSmallest(float f);
138
139
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700140#endif /* ieee754_h */
141
142
Laurence Lundbladeb275cdc2020-07-12 12:34:38 -0700143#endif /* QCBOR_DISABLE_PREFERRED_FLOAT */
Laurence Lundblade12d32c52018-09-19 11:25:27 -0700144
145
146
147