Blame - src/ieee754.h - mirror/QCBOR - TrustedFirmware Git Browser

blob: 863019b2bf42e099513d57c99bfbbf16629864dd [file] [log] [blame]

Laurence Lundblade	83dbf5c	2024-01-07 19:17:52 -0700	[diff] [blame]	1	/* ==========================================================================
				2	* ieee754.h -- Conversion between half, double & single-precision floats
				3	*
				4	* Copyright (c) 2018-2024, Laurence Lundblade. All rights reserved.
				5	*
				6	* SPDX-License-Identifier: BSD-3-Clause
				7	*
				8	* See BSD-3-Clause license in README.md
				9	*
				10	* Created on 7/23/18
				11	* ========================================================================== */
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	12
Laurence Lundblade	b275cdc	2020-07-12 12:34:38 -0700	[diff] [blame]	13	#ifndef QCBOR_DISABLE_PREFERRED_FLOAT
Laurence Lundblade	9682a53	2020-06-06 18:33:04 -0700	[diff] [blame]	14
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	15	#ifndef ieee754_h
				16	#define ieee754_h
				17
				18	#include <stdint.h>
				19
				20
Laurence Lundblade	83dbf5c	2024-01-07 19:17:52 -0700	[diff] [blame]	21	/** @file ieee754.h
				22	*
				23	* This implements floating-point conversion between half, single and
				24	* double precision floating-point numbers, in particular convesion to
				25	* smaller representation (e.g., double to single) that does not lose
				26	* precision for CBOR preferred serialization.
				27	*
				28	* This implementation works entirely with shifts and masks and does
				29	* not require any floating-point HW or library.
				30	*
				31	* This conforms to IEEE 754-2008, but note that it doesn't specify
				32	* conversions, just the encodings.
				33	*
				34	* This is complete, supporting +/- infinity, +/- zero, subnormals and
				35	* NaN payloads. NaN payloads are converted to smaller by dropping the
				36	* right most bits if they are zero and shifting to the right. If the
				37	* rightmost bits are not zero the conversion is not performed. When
				38	* converting from smaller to larger, the payload is shifted left and
				39	* zero-padded. This is what is specified by CBOR preferred
				40	* serialization and what modern HW conversion instructions do. CBOR
				41	* CDE handling for NaN is not clearly specified, but upcoming
				42	* documents may clarify this.
				43	*
				44	* There is no special handling of silent and quiet NaNs. It probably
				45	* isn't necessary to transmit these special NaNs as there purpose is
				46	* more for propgating errors up through some calculation. In many
				47	* cases the handlng of the NaN payload will work for silent and quiet
				48	* NaNs.
				49	*
				50	* A previous version of this was usable as a general library for
				51	* conversion. This version is reduced to what is needed for CBOR.
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	52	*/
				53
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	54
Laurence Lundblade	83dbf5c	2024-01-07 19:17:52 -0700	[diff] [blame]	55	/**
				56	* @brief Convert half-precision float to double-precision float.
				57	*
				58	* @param[in] uHalfPrecision Half-prevision number to convert.
				59	*
				60	* @returns double-presion value.
				61	*
				62	* This is a lossless conversion because every half-precision value
				63	* can be represented as a double. There is no error condition.
				64	*
				65	* There is no half-precision type in C, so it is represented here as
				66	* a @c uint16_t. The bits of @c uHalfPrecision are as described for
				67	* half-precision by IEEE 754.
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	68	*/
Laurence Lundblade	83dbf5c	2024-01-07 19:17:52 -0700	[diff] [blame]	69	double
				70	IEEE754_HalfToDouble(uint16_t uHalfPrecision);
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	71
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	72
Laurence Lundblade	83dbf5c	2024-01-07 19:17:52 -0700	[diff] [blame]	73	/** Holds a floating-point value that could be half, single or
				74	* double-precision. The value is in a @c uint64_t that may be copied
				75	* to a float or double. Simply casting uValue will usually work but
				76	* may generate compiler or static analyzer warnings. Using
				77	* UsefulBufUtil_CopyUint64ToDouble() or
				78	* UsefulBufUtil_CopyUint32ToFloat() will not (and will not generate
				79	* any extra code).
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	80	*/
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	81	typedef struct {
Laurence Lundblade	83dbf5c	2024-01-07 19:17:52 -0700	[diff] [blame]	82	enum {IEEE754_UNION_IS_HALF = 2,
				83	IEEE754_UNION_IS_SINGLE = 4,
				84	IEEE754_UNION_IS_DOUBLE = 8,
				85	} uSize; /* Size of uValue */
				86	uint64_t uValue;
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	87	} IEEE754_union;
				88
				89
Laurence Lundblade	83dbf5c	2024-01-07 19:17:52 -0700	[diff] [blame]	90	/**
				91	* @brief Convert a double to either single or half-precision.
				92	*
				93	* @param[in] d The value to convert.
				94	* @param[in] bAllowHalfPrecision If true, convert to either half or
				95	* single precision.
				96	*
				97	* @returns Unconverted value, or value converted to single or half-precision.
				98	*
				99	* This always succeeds. If the value cannot be converted without the
				100	* loss of precision, it is not converted.
				101	*
				102	* This handles all subnormals and NaN payloads.
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	103	*/
Laurence Lundblade	83dbf5c	2024-01-07 19:17:52 -0700	[diff] [blame]	104	IEEE754_union
				105	IEEE754_DoubleToSmaller(double d, int bAllowHalfPrecision);
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	106
Laurence Lundblade	83dbf5c	2024-01-07 19:17:52 -0700	[diff] [blame]	107
				108	/**
				109	* @brief Convert a single-precision float to half-precision.
				110	*
				111	* @param[in] f The value to convert.
				112	*
				113	* @returns Either unconverted value or value converted to half-precision.
				114	*
				115	* This always succeeds. If the value cannot be converted without the
				116	* loss of precision, it is not converted.
				117	*
				118	* This handles all subnormals and NaN payloads.
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	119	*/
Laurence Lundblade	83dbf5c	2024-01-07 19:17:52 -0700	[diff] [blame]	120	IEEE754_union
				121	IEEE754_SingleToHalf(float f);
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	122
				123
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	124	#endif /* ieee754_h */
				125
Laurence Lundblade	b275cdc	2020-07-12 12:34:38 -0700	[diff] [blame]	126	#endif /* QCBOR_DISABLE_PREFERRED_FLOAT */