Blame - src/ieee754.h - mirror/QCBOR - TrustedFirmware Git Browser

blob: 85422fecee2121f6cb483666af4b9571be9eb06f [file] [log] [blame]

Laurence Lundblade	83dbf5c	2024-01-07 19:17:52 -0700	[diff] [blame]	1	/* ==========================================================================
				2	* ieee754.h -- Conversion between half, double & single-precision floats
				3	*
				4	* Copyright (c) 2018-2024, Laurence Lundblade. All rights reserved.
				5	*
				6	* SPDX-License-Identifier: BSD-3-Clause
				7	*
Laurence Lundblade	e8f5816	2024-08-22 10:30:08 -0700	[diff] [blame]	8	* See BSD-3-Clause license in file named "LICENSE"
Laurence Lundblade	83dbf5c	2024-01-07 19:17:52 -0700	[diff] [blame]	9	*
				10	* Created on 7/23/18
				11	* ========================================================================== */
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	12
Laurence Lundblade	9682a53	2020-06-06 18:33:04 -0700	[diff] [blame]	13
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	14	#ifndef ieee754_h
				15	#define ieee754_h
				16
Laurence Lundblade	eb3cdef	2024-02-17 20:38:55 -0800	[diff] [blame]	17
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	18	#include <stdint.h>
				19
				20
Laurence Lundblade	83dbf5c	2024-01-07 19:17:52 -0700	[diff] [blame]	21	/** @file ieee754.h
				22	*
				23	* This implements floating-point conversion between half, single and
Laurence Lundblade	cb7282d	2024-11-09 23:01:11 -0800	[diff] [blame]	24	* double precision floating-point numbers, in particular conversion to
Laurence Lundblade	83dbf5c	2024-01-07 19:17:52 -0700	[diff] [blame]	25	* smaller representation (e.g., double to single) that does not lose
				26	* precision for CBOR preferred serialization.
				27	*
Laurence Lundblade	eb3cdef	2024-02-17 20:38:55 -0800	[diff] [blame]	28	* This also implements conversion of floats to whole numbers as
				29	* is required for dCBOR.
				30	*
Laurence Lundblade	83dbf5c	2024-01-07 19:17:52 -0700	[diff] [blame]	31	* This implementation works entirely with shifts and masks and does
				32	* not require any floating-point HW or library.
				33	*
				34	* This conforms to IEEE 754-2008, but note that it doesn't specify
				35	* conversions, just the encodings.
				36	*
				37	* This is complete, supporting +/- infinity, +/- zero, subnormals and
				38	* NaN payloads. NaN payloads are converted to smaller by dropping the
				39	* right most bits if they are zero and shifting to the right. If the
Laurence Lundblade	cb7282d	2024-11-09 23:01:11 -0800	[diff] [blame]	40	* rightmost bits are not zero, the conversion is not performed. When
Laurence Lundblade	83dbf5c	2024-01-07 19:17:52 -0700	[diff] [blame]	41	* converting from smaller to larger, the payload is shifted left and
				42	* zero-padded. This is what is specified by CBOR preferred
Laurence Lundblade	cb7282d	2024-11-09 23:01:11 -0800	[diff] [blame]	43	* serialization and what modern HW conversion instructions do.
Laurence Lundblade	83dbf5c	2024-01-07 19:17:52 -0700	[diff] [blame]	44	*
				45	* There is no special handling of silent and quiet NaNs. It probably
Laurence Lundblade	cb7282d	2024-11-09 23:01:11 -0800	[diff] [blame]	46	* isn't necessary to transmit these special NaNs as their purpose is
				47	* more for propagating errors up through some calculation. In many
				48	* cases the handling of the NaN payload will work for silent and quiet
Laurence Lundblade	83dbf5c	2024-01-07 19:17:52 -0700	[diff] [blame]	49	* NaNs.
				50	*
				51	* A previous version of this was usable as a general library for
				52	* conversion. This version is reduced to what is needed for CBOR.
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	53	*/
				54
Laurence Lundblade	eb3cdef	2024-02-17 20:38:55 -0800	[diff] [blame]	55	#ifndef QCBOR_DISABLE_PREFERRED_FLOAT
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	56
Laurence Lundblade	83dbf5c	2024-01-07 19:17:52 -0700	[diff] [blame]	57	/**
				58	* @brief Convert half-precision float to double-precision float.
				59	*
Laurence Lundblade	cb7282d	2024-11-09 23:01:11 -0800	[diff] [blame]	60	* @param[in] uHalfPrecision Half-precision number to convert.
Laurence Lundblade	83dbf5c	2024-01-07 19:17:52 -0700	[diff] [blame]	61	*
Laurence Lundblade	cb7282d	2024-11-09 23:01:11 -0800	[diff] [blame]	62	* @returns double-precision value.
Laurence Lundblade	83dbf5c	2024-01-07 19:17:52 -0700	[diff] [blame]	63	*
				64	* This is a lossless conversion because every half-precision value
				65	* can be represented as a double. There is no error condition.
				66	*
				67	* There is no half-precision type in C, so it is represented here as
				68	* a @c uint16_t. The bits of @c uHalfPrecision are as described for
				69	* half-precision by IEEE 754.
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	70	*/
Laurence Lundblade	83dbf5c	2024-01-07 19:17:52 -0700	[diff] [blame]	71	double
				72	IEEE754_HalfToDouble(uint16_t uHalfPrecision);
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	73
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	74
Laurence Lundblade	83dbf5c	2024-01-07 19:17:52 -0700	[diff] [blame]	75	/** Holds a floating-point value that could be half, single or
				76	* double-precision. The value is in a @c uint64_t that may be copied
				77	* to a float or double. Simply casting uValue will usually work but
				78	* may generate compiler or static analyzer warnings. Using
				79	* UsefulBufUtil_CopyUint64ToDouble() or
				80	* UsefulBufUtil_CopyUint32ToFloat() will not (and will not generate
				81	* any extra code).
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	82	*/
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	83	typedef struct {
Laurence Lundblade	83dbf5c	2024-01-07 19:17:52 -0700	[diff] [blame]	84	enum {IEEE754_UNION_IS_HALF = 2,
				85	IEEE754_UNION_IS_SINGLE = 4,
				86	IEEE754_UNION_IS_DOUBLE = 8,
				87	} uSize; /* Size of uValue */
				88	uint64_t uValue;
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	89	} IEEE754_union;
				90
				91
Laurence Lundblade	eb3cdef	2024-02-17 20:38:55 -0800	[diff] [blame]	92	/** Holds result of an attempt to convert a floating-point
				93	* number to an int64_t or uint64_t.
				94	*/
				95	struct IEEE754_ToInt {
				96	enum {IEEE754_ToInt_IS_INT,
				97	IEEE754_ToInt_IS_UINT,
Laurence Lundblade	14ce228	2024-07-24 22:13:35 -0700	[diff] [blame]	98	IEEE754_ToInt_IS_65BIT_NEG,
Laurence Lundblade	eb3cdef	2024-02-17 20:38:55 -0800	[diff] [blame]	99	IEEE754_ToInt_NO_CONVERSION,
				100	IEEE754_ToInt_NaN
				101	} type;
				102	union {
				103	uint64_t un_signed;
				104	int64_t is_signed;
				105	} integer;
				106	};
				107
				108
Laurence Lundblade	83dbf5c	2024-01-07 19:17:52 -0700	[diff] [blame]	109	/**
				110	* @brief Convert a double to either single or half-precision.
				111	*
				112	* @param[in] d The value to convert.
				113	* @param[in] bAllowHalfPrecision If true, convert to either half or
				114	* single precision.
				115	*
				116	* @returns Unconverted value, or value converted to single or half-precision.
				117	*
				118	* This always succeeds. If the value cannot be converted without the
				119	* loss of precision, it is not converted.
				120	*
				121	* This handles all subnormals and NaN payloads.
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	122	*/
Laurence Lundblade	83dbf5c	2024-01-07 19:17:52 -0700	[diff] [blame]	123	IEEE754_union
Laurence Lundblade	eb3cdef	2024-02-17 20:38:55 -0800	[diff] [blame]	124	IEEE754_DoubleToSmaller(double d, int bAllowHalfPrecision, int bNoNaNPayload);
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	125
Laurence Lundblade	83dbf5c	2024-01-07 19:17:52 -0700	[diff] [blame]	126
				127	/**
				128	* @brief Convert a single-precision float to half-precision.
				129	*
				130	* @param[in] f The value to convert.
				131	*
				132	* @returns Either unconverted value or value converted to half-precision.
				133	*
				134	* This always succeeds. If the value cannot be converted without the
				135	* loss of precision, it is not converted.
				136	*
				137	* This handles all subnormals and NaN payloads.
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	138	*/
Laurence Lundblade	83dbf5c	2024-01-07 19:17:52 -0700	[diff] [blame]	139	IEEE754_union
Laurence Lundblade	eb3cdef	2024-02-17 20:38:55 -0800	[diff] [blame]	140	IEEE754_SingleToHalf(float f, int bNoNanPayloads);
				141
				142
				143	/**
Laurence Lundblade	cb7282d	2024-11-09 23:01:11 -0800	[diff] [blame]	144	* @brief Convert a double-precision float to an integer if whole number
Laurence Lundblade	eb3cdef	2024-02-17 20:38:55 -0800	[diff] [blame]	145	*
				146	* @param[in] d The value to convert.
				147	*
				148	* @returns Either converted number or conversion status.
				149	*
				150	* If the value is a whole number that will fit either in a uint64_t
				151	* or an int64_t, it is converted. If it is a NaN, then there is no
Laurence Lundblade	cb7282d	2024-11-09 23:01:11 -0800	[diff] [blame]	152	* conversion and the fact that it is a NaN is indicated in the
Laurence Lundblade	eb3cdef	2024-02-17 20:38:55 -0800	[diff] [blame]	153	* returned structure. If it can't be converted, then that is
				154	* indicated in the returned structure.
				155	*
Laurence Lundblade	cb7282d	2024-11-09 23:01:11 -0800	[diff] [blame]	156	* This always returns positive numbers as a uint64_t even if they will
Laurence Lundblade	eb3cdef	2024-02-17 20:38:55 -0800	[diff] [blame]	157	* fit in an int64_t.
				158	*
Laurence Lundblade	cb7282d	2024-11-09 23:01:11 -0800	[diff] [blame]	159	* This never fails because of precision, but may fail because of range.
Laurence Lundblade	eb3cdef	2024-02-17 20:38:55 -0800	[diff] [blame]	160	*/
				161	struct IEEE754_ToInt
				162	IEEE754_DoubleToInt(double d);
				163
				164
				165	/**
Laurence Lundblade	cb7282d	2024-11-09 23:01:11 -0800	[diff] [blame]	166	* @brief Convert a single-precision float to an integer if whole number
Laurence Lundblade	eb3cdef	2024-02-17 20:38:55 -0800	[diff] [blame]	167	*
				168	* @param[in] f The value to convert.
				169	*
				170	* @returns Either converted number or conversion status.
				171	*
				172	* If the value is a whole number that will fit either in a uint64_t
				173	* or an int64_t, it is converted. If it is a NaN, then there is no
Laurence Lundblade	cb7282d	2024-11-09 23:01:11 -0800	[diff] [blame]	174	* conversion and the fact that it is a NaN is indicated in the
Laurence Lundblade	eb3cdef	2024-02-17 20:38:55 -0800	[diff] [blame]	175	* returned structure. If it can't be converted, then that is
				176	* indicated in the returned structure.
				177	*
Laurence Lundblade	cb7282d	2024-11-09 23:01:11 -0800	[diff] [blame]	178	* This always returns positive numbers as a uint64_t even if they will
Laurence Lundblade	eb3cdef	2024-02-17 20:38:55 -0800	[diff] [blame]	179	* fit in an int64_t.
				180	*
Laurence Lundblade	cb7282d	2024-11-09 23:01:11 -0800	[diff] [blame]	181	* This never fails because of precision, but may fail because of range.
Laurence Lundblade	eb3cdef	2024-02-17 20:38:55 -0800	[diff] [blame]	182	*/
				183	struct IEEE754_ToInt
				184	IEEE754_SingleToInt(float f);
				185
Laurence Lundblade	d883ad3	2024-03-23 22:37:37 -0700	[diff] [blame]	186
				187	/**
				188	* @brief Convert an unsigned integer to a double with no precision loss.
				189	*
				190	* @param[in] uInt The value to convert.
Laurence Lundblade	cb7282d	2024-11-09 23:01:11 -0800	[diff] [blame]	191	* @param[in] uIsNegative 0 if positive, 1 if negative.
Laurence Lundblade	d883ad3	2024-03-23 22:37:37 -0700	[diff] [blame]	192	*
				193	* @returns Either the converted number or 0.5 if no conversion.
				194	*
				195	* The conversion will fail if the input can not be represented in the
				196	* 52 bits or precision that a double has. 0.5 is returned to indicate
				197	* no conversion. It is out-of-band from non-error results, because
				198	* all non-error results are whole integers.
				199	*/
				200	#define IEEE754_UINT_TO_DOUBLE_OOB 0.5
				201	double
				202	IEEE754_UintToDouble(uint64_t uInt, int uIsNegative);
				203
				204
Laurence Lundblade	eb3cdef	2024-02-17 20:38:55 -0800	[diff] [blame]	205	#endif /* ! QCBOR_DISABLE_PREFERRED_FLOAT */
				206
				207
				208	/**
				209	* @brief Tests whether NaN is "quiet" vs having a payload.
				210	*
				211	* @param[in] dNum Double number to test.
				212	*
				213	* @returns 0 if a quiet NaN, 1 if it has a payload.
				214	*
				215	* A quiet NaN is usually represented as 0x7ff8000000000000. That is
				216	* the significand bits are 0x8000000000000. If the significand bits
				217	* are other than 0x8000000000000 it is considered to have a NaN
				218	* payload.
				219	*
				220	* Note that 0x7ff8000000000000 is not specified in a standard, but it
				221	* is commonly implemented and chosen by CBOR as the best way to
				222	* represent a NaN.
				223	*/
				224	int
Laurence Lundblade	9b2ae8a	2024-07-12 11:00:20 -0700	[diff] [blame]	225	IEEE754_DoubleHasNaNPayload(double dNum);
Laurence Lundblade	eb3cdef	2024-02-17 20:38:55 -0800	[diff] [blame]	226
				227
				228
				229	/**
				230	* @brief Tests whether NaN is "quiet" vs having a payload.
				231	*
				232	* @param[in] fNum Float number to test.
				233	*
				234	* @returns 0 if a quiet NaN, 1 if it has a payload.
				235	*
Laurence Lundblade	9b2ae8a	2024-07-12 11:00:20 -0700	[diff] [blame]	236	* See IEEE754_DoubleHasNaNPayload(). A single precision quiet NaN
Laurence Lundblade	eb3cdef	2024-02-17 20:38:55 -0800	[diff] [blame]	237	* is 0x7fc00000.
				238	*/
				239	int
Laurence Lundblade	9b2ae8a	2024-07-12 11:00:20 -0700	[diff] [blame]	240	IEEE754_SingleHasNaNPayload(float fNum);
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	241
				242
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	243	#endif /* ieee754_h */
				244