Blame - src/ieee754.c - mirror/QCBOR - TrustedFirmware Git Browser

blob: a8079f8cff0c9c22a963bc6e7c7e4c8b01508333 [file] [log] [blame]

Laurence Lundblade	cc2ed34	2018-09-22 17:29:55 -0700	[diff] [blame]	1	/*==============================================================================
Laurence Lundblade	ee85174	2020-01-08 08:37:05 -0800	[diff] [blame]	2	ieee754.c -- floating-point conversion between half, double & single-precision
Laurence Lundblade	035bd78	2019-01-21 17:01:31 -0800	[diff] [blame]	3
Laurence Lundblade	ee85174	2020-01-08 08:37:05 -0800	[diff] [blame]	4	Copyright (c) 2018-2020, Laurence Lundblade. All rights reserved.
Máté Tóth-Pál	ef5f07a	2021-09-17 19:31:37 +0200	[diff] [blame]	5	Copyright (c) 2021, Arm Limited. All rights reserved.
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	6
Laurence Lundblade	a3fd49f	2019-01-21 10:16:22 -0800	[diff] [blame]	7	SPDX-License-Identifier: BSD-3-Clause
Laurence Lundblade	035bd78	2019-01-21 17:01:31 -0800	[diff] [blame]	8
Laurence Lundblade	a3fd49f	2019-01-21 10:16:22 -0800	[diff] [blame]	9	See BSD-3-Clause license in README.md
Laurence Lundblade	035bd78	2019-01-21 17:01:31 -0800	[diff] [blame]	10
Laurence Lundblade	a3fd49f	2019-01-21 10:16:22 -0800	[diff] [blame]	11	Created on 7/23/18
Laurence Lundblade	ee85174	2020-01-08 08:37:05 -0800	[diff] [blame]	12	=============================================================================*/
Laurence Lundblade	cc2ed34	2018-09-22 17:29:55 -0700	[diff] [blame]	13
Máté Tóth-Pál	ef5f07a	2021-09-17 19:31:37 +0200	[diff] [blame]	14	/*
				15	Include before QCBOR_DISABLE_PREFERRED_FLOAT is checked as
				16	QCBOR_DISABLE_PREFERRED_FLOAT might be defined in qcbor/qcbor_common.h
				17	*/
				18	#include "qcbor/qcbor_common.h"
				19
Laurence Lundblade	b275cdc	2020-07-12 12:34:38 -0700	[diff] [blame]	20	#ifndef QCBOR_DISABLE_PREFERRED_FLOAT
Laurence Lundblade	9682a53	2020-06-06 18:33:04 -0700	[diff] [blame]	21
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	22	#include "ieee754.h"
				23	#include <string.h> // For memcpy()
				24
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	25
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	26	/*
Laurence Lundblade	ee85174	2020-01-08 08:37:05 -0800	[diff] [blame]	27	This code is written for clarity and verifiability, not for size, on
				28	the assumption that the optimizer will do a good job. The LLVM
				29	optimizer, -Os, does seem to do the job and the resulting object code
				30	is smaller from combining code for the many different cases (normal,
Laurence Lundblade	c5fef68	2020-01-25 11:38:45 -0800	[diff] [blame]	31	subnormal, infinity, zero...) for the conversions. GCC is no where near
				32	as good.
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	33
Laurence Lundblade	ee85174	2020-01-08 08:37:05 -0800	[diff] [blame]	34	This code has really long lines and is much easier to read because of
				35	them. Some coding guidelines prefer 80 column lines (can they not afford
				36	big displays?). It would make this code much worse even to wrap at 120
				37	columns.
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	38
Laurence Lundblade	ee85174	2020-01-08 08:37:05 -0800	[diff] [blame]	39	Dead stripping is also really helpful to get code size down when
Laurence Lundblade	c5fef68	2020-01-25 11:38:45 -0800	[diff] [blame]	40	floating-point encoding is not needed. (If this is put in a library
				41	and linking is against the library, then dead stripping is automatic).
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	42
Laurence Lundblade	ee85174	2020-01-08 08:37:05 -0800	[diff] [blame]	43	This code works solely using shifts and masks and thus has no
				44	dependency on any math libraries. It can even work if the CPU doesn't
				45	have any floating-point support, though that isn't the most useful
				46	thing to do.
				47
				48	The memcpy() dependency is only for CopyFloatToUint32() and friends
				49	which only is needed to avoid type punning when converting the actual
				50	float bits to an unsigned value so the bit shifts and masks can work.
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	51	*/
				52
				53	/*
				54	The references used to write this code:
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	55
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	56	- IEEE 754-2008, particularly section 3.6 and 6.2.1
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	57
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	58	- https://en.wikipedia.org/wiki/IEEE_754 and subordinate pages
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	59
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	60	- https://stackoverflow.com/questions/19800415/why-does-ieee-754-reserve-so-many-nan-values
Laurence Lundblade	c5fef68	2020-01-25 11:38:45 -0800	[diff] [blame]	61
				62	- https://stackoverflow.com/questions/46073295/implicit-type-promotion-rules
				63
				64	- https://stackoverflow.com/questions/589575/what-does-the-c-standard-state-the-size-of-int-long-type-to-be
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	65	*/
				66
				67
				68	// ----- Half Precsion -----------
				69	#define HALF_NUM_SIGNIFICAND_BITS (10)
				70	#define HALF_NUM_EXPONENT_BITS (5)
				71	#define HALF_NUM_SIGN_BITS (1)
				72
				73	#define HALF_SIGNIFICAND_SHIFT (0)
				74	#define HALF_EXPONENT_SHIFT (HALF_NUM_SIGNIFICAND_BITS)
				75	#define HALF_SIGN_SHIFT (HALF_NUM_SIGNIFICAND_BITS + HALF_NUM_EXPONENT_BITS)
				76
Laurence Lundblade	06350ea	2020-01-27 19:32:40 -0800	[diff] [blame]	77	#define HALF_SIGNIFICAND_MASK (0x3ffU) // The lower 10 bits // 0x03ff
				78	#define HALF_EXPONENT_MASK (0x1fU << HALF_EXPONENT_SHIFT) // 0x7c00 5 bits of exponent
				79	#define HALF_SIGN_MASK (0x01U << HALF_SIGN_SHIFT) // // 0x8000 1 bit of sign
				80	#define HALF_QUIET_NAN_BIT (0x01U << (HALF_NUM_SIGNIFICAND_BITS-1)) // 0x0200
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	81
				82	/* Biased Biased Unbiased Use
				83	0x00 0 -15 0 and subnormal
				84	0x01 1 -14 Smallest normal exponent
				85	0x1e 30 15 Largest normal exponent
				86	0x1F 31 16 NaN and Infinity */
				87	#define HALF_EXPONENT_BIAS (15)
				88	#define HALF_EXPONENT_MAX (HALF_EXPONENT_BIAS) // 15 Unbiased
				89	#define HALF_EXPONENT_MIN (-HALF_EXPONENT_BIAS+1) // -14 Unbiased
				90	#define HALF_EXPONENT_ZERO (-HALF_EXPONENT_BIAS) // -15 Unbiased
				91	#define HALF_EXPONENT_INF_OR_NAN (HALF_EXPONENT_BIAS+1) // 16 Unbiased
				92
				93
Laurence Lundblade	ee85174	2020-01-08 08:37:05 -0800	[diff] [blame]	94	// ------ Single-Precision --------
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	95	#define SINGLE_NUM_SIGNIFICAND_BITS (23)
				96	#define SINGLE_NUM_EXPONENT_BITS (8)
				97	#define SINGLE_NUM_SIGN_BITS (1)
				98
				99	#define SINGLE_SIGNIFICAND_SHIFT (0)
				100	#define SINGLE_EXPONENT_SHIFT (SINGLE_NUM_SIGNIFICAND_BITS)
				101	#define SINGLE_SIGN_SHIFT (SINGLE_NUM_SIGNIFICAND_BITS + SINGLE_NUM_EXPONENT_BITS)
				102
Laurence Lundblade	06350ea	2020-01-27 19:32:40 -0800	[diff] [blame]	103	#define SINGLE_SIGNIFICAND_MASK (0x7fffffU) // The lower 23 bits
				104	#define SINGLE_EXPONENT_MASK (0xffU << SINGLE_EXPONENT_SHIFT) // 8 bits of exponent
				105	#define SINGLE_SIGN_MASK (0x01U << SINGLE_SIGN_SHIFT) // 1 bit of sign
				106	#define SINGLE_QUIET_NAN_BIT (0x01U << (SINGLE_NUM_SIGNIFICAND_BITS-1))
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	107
				108	/* Biased Biased Unbiased Use
				109	0x0000 0 -127 0 and subnormal
				110	0x0001 1 -126 Smallest normal exponent
				111	0x7f 127 0 1
				112	0xfe 254 127 Largest normal exponent
				113	0xff 255 128 NaN and Infinity */
				114	#define SINGLE_EXPONENT_BIAS (127)
Laurence Lundblade	c5fef68	2020-01-25 11:38:45 -0800	[diff] [blame]	115	#define SINGLE_EXPONENT_MAX (SINGLE_EXPONENT_BIAS) // 127 unbiased
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	116	#define SINGLE_EXPONENT_MIN (-SINGLE_EXPONENT_BIAS+1) // -126 unbiased
				117	#define SINGLE_EXPONENT_ZERO (-SINGLE_EXPONENT_BIAS) // -127 unbiased
Laurence Lundblade	c5fef68	2020-01-25 11:38:45 -0800	[diff] [blame]	118	#define SINGLE_EXPONENT_INF_OR_NAN (SINGLE_EXPONENT_BIAS+1) // 128 unbiased
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	119
				120
Laurence Lundblade	ee85174	2020-01-08 08:37:05 -0800	[diff] [blame]	121	// --------- Double-Precision ----------
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	122	#define DOUBLE_NUM_SIGNIFICAND_BITS (52)
				123	#define DOUBLE_NUM_EXPONENT_BITS (11)
				124	#define DOUBLE_NUM_SIGN_BITS (1)
				125
				126	#define DOUBLE_SIGNIFICAND_SHIFT (0)
				127	#define DOUBLE_EXPONENT_SHIFT (DOUBLE_NUM_SIGNIFICAND_BITS)
				128	#define DOUBLE_SIGN_SHIFT (DOUBLE_NUM_SIGNIFICAND_BITS + DOUBLE_NUM_EXPONENT_BITS)
				129
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	130	#define DOUBLE_SIGNIFICAND_MASK (0xfffffffffffffULL) // The lower 52 bits
				131	#define DOUBLE_EXPONENT_MASK (0x7ffULL << DOUBLE_EXPONENT_SHIFT) // 11 bits of exponent
				132	#define DOUBLE_SIGN_MASK (0x01ULL << DOUBLE_SIGN_SHIFT) // 1 bit of sign
				133	#define DOUBLE_QUIET_NAN_BIT (0x01ULL << (DOUBLE_NUM_SIGNIFICAND_BITS-1))
				134
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	135
				136	/* Biased Biased Unbiased Use
				137	0x00000000 0 -1023 0 and subnormal
				138	0x00000001 1 -1022 Smallest normal exponent
				139	0x000007fe 2046 1023 Largest normal exponent
				140	0x000007ff 2047 1024 NaN and Infinity */
				141	#define DOUBLE_EXPONENT_BIAS (1023)
				142	#define DOUBLE_EXPONENT_MAX (DOUBLE_EXPONENT_BIAS) // unbiased
				143	#define DOUBLE_EXPONENT_MIN (-DOUBLE_EXPONENT_BIAS+1) // unbiased
				144	#define DOUBLE_EXPONENT_ZERO (-DOUBLE_EXPONENT_BIAS) // unbiased
				145	#define DOUBLE_EXPONENT_INF_OR_NAN (DOUBLE_EXPONENT_BIAS+1) // unbiased
				146
				147
				148
				149	/*
Laurence Lundblade	c5fef68	2020-01-25 11:38:45 -0800	[diff] [blame]	150	Convenient functions to avoid type punning, compiler warnings and
				151	such. The optimizer reduces them to a simple assignment. This is a
				152	crusty corner of C. It shouldn't be this hard.
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	153
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	154	These are also in UsefulBuf.h under a different name. They are copied
Laurence Lundblade	c5fef68	2020-01-25 11:38:45 -0800	[diff] [blame]	155	here to avoid a dependency on UsefulBuf.h. There is no object code
				156	size impact because these always optimze down to a simple assignment.
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	157	*/
				158	static inline uint32_t CopyFloatToUint32(float f)
				159	{
				160	uint32_t u32;
				161	memcpy(&u32, &f, sizeof(uint32_t));
				162	return u32;
				163	}
				164
				165	static inline uint64_t CopyDoubleToUint64(double d)
				166	{
				167	uint64_t u64;
				168	memcpy(&u64, &d, sizeof(uint64_t));
				169	return u64;
				170	}
				171
Laurence Lundblade	67bd551	2018-11-02 21:44:06 +0700	[diff] [blame]	172	static inline double CopyUint64ToDouble(uint64_t u64)
				173	{
				174	double d;
				175	memcpy(&d, &u64, sizeof(uint64_t));
				176	return d;
				177	}
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	178
				179
				180	// Public function; see ieee754.h
Laurence Lundblade	cc2ed34	2018-09-22 17:29:55 -0700	[diff] [blame]	181	uint16_t IEEE754_FloatToHalf(float f)
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	182	{
				183	// Pull the three parts out of the single-precision float
				184	const uint32_t uSingle = CopyFloatToUint32(f);
Laurence Lundblade	c5fef68	2020-01-25 11:38:45 -0800	[diff] [blame]	185	const int32_t nSingleUnbiasedExponent = (int32_t)((uSingle & SINGLE_EXPONENT_MASK) >> SINGLE_EXPONENT_SHIFT) - SINGLE_EXPONENT_BIAS;
				186	const uint32_t uSingleSign = (uSingle & SINGLE_SIGN_MASK) >> SINGLE_SIGN_SHIFT;
				187	const uint32_t uSingleSignificand = uSingle & SINGLE_SIGNIFICAND_MASK;
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	188
				189
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	190	// Now convert the three parts to half-precision.
Laurence Lundblade	c5fef68	2020-01-25 11:38:45 -0800	[diff] [blame]	191
Laurence Lundblade	576aa0c	2020-07-21 21:36:52 -0700	[diff] [blame]	192	// All works is done on uint32_t with conversion to uint16_t at
				193	// the end. This avoids integer promotions that static analyzers
				194	// complain about and reduces code size.
Laurence Lundblade	c5fef68	2020-01-25 11:38:45 -0800	[diff] [blame]	195	uint32_t uHalfSign, uHalfSignificand, uHalfBiasedExponent;
				196
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	197	if(nSingleUnbiasedExponent == SINGLE_EXPONENT_INF_OR_NAN) {
				198	// +/- Infinity and NaNs -- single biased exponent is 0xff
				199	uHalfBiasedExponent = HALF_EXPONENT_INF_OR_NAN + HALF_EXPONENT_BIAS;
				200	if(!uSingleSignificand) {
				201	// Infinity
				202	uHalfSignificand = 0;
				203	} else {
Laurence Lundblade	576aa0c	2020-07-21 21:36:52 -0700	[diff] [blame]	204	// Copy the LSBs of the NaN payload that will fit from the
				205	// single to the half
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	206	uHalfSignificand = uSingleSignificand & (HALF_SIGNIFICAND_MASK & ~HALF_QUIET_NAN_BIT);
				207	if(uSingleSignificand & SINGLE_QUIET_NAN_BIT) {
				208	// It's a qNaN; copy the qNaN bit
				209	uHalfSignificand \|= HALF_QUIET_NAN_BIT;
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	210	} else {
Laurence Lundblade	576aa0c	2020-07-21 21:36:52 -0700	[diff] [blame]	211	// It's an sNaN; make sure the significand is not zero
				212	// so it stays a NaN This is needed because not all
				213	// significand bits are copied from single
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	214	if(!uHalfSignificand) {
Laurence Lundblade	576aa0c	2020-07-21 21:36:52 -0700	[diff] [blame]	215	// Set the LSB. This is what wikipedia shows for
				216	// sNAN.
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	217	uHalfSignificand \|= 0x01;
				218	}
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	219	}
				220	}
				221	} else if(nSingleUnbiasedExponent == SINGLE_EXPONENT_ZERO) {
Laurence Lundblade	576aa0c	2020-07-21 21:36:52 -0700	[diff] [blame]	222	// 0 or a subnormal number -- singled biased exponent is 0
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	223	uHalfBiasedExponent = 0;
				224	uHalfSignificand = 0; // Any subnormal single will be too small to express as a half precision
				225	} else if(nSingleUnbiasedExponent > HALF_EXPONENT_MAX) {
Laurence Lundblade	576aa0c	2020-07-21 21:36:52 -0700	[diff] [blame]	226	// Exponent is too large to express in half-precision; round
				227	// up to infinity
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	228	uHalfBiasedExponent = HALF_EXPONENT_INF_OR_NAN + HALF_EXPONENT_BIAS;
				229	uHalfSignificand = 0;
				230	} else if(nSingleUnbiasedExponent < HALF_EXPONENT_MIN) {
Laurence Lundblade	576aa0c	2020-07-21 21:36:52 -0700	[diff] [blame]	231	// Exponent is too small to express in half-precision normal;
				232	// make it a half-precision subnormal
Laurence Lundblade	c5fef68	2020-01-25 11:38:45 -0800	[diff] [blame]	233	uHalfBiasedExponent = HALF_EXPONENT_ZERO + HALF_EXPONENT_BIAS;
Laurence Lundblade	576aa0c	2020-07-21 21:36:52 -0700	[diff] [blame]	234	uHalfSignificand = 0;
				235	// Could convert some of these values to a half-precision
				236	// subnormal, but the layer above this will never use it. See
				237	// layer above. There is code to do this in github history
				238	// for this file, but it was removed because it was never
				239	// invoked.
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	240	} else {
Laurence Lundblade	c5fef68	2020-01-25 11:38:45 -0800	[diff] [blame]	241	// The normal case, exponent is in range for half-precision
				242	uHalfBiasedExponent = (uint32_t)(nSingleUnbiasedExponent + HALF_EXPONENT_BIAS);
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	243	uHalfSignificand = uSingleSignificand >> (SINGLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
				244	}
				245	uHalfSign = uSingleSign;
				246
				247	// Put the 3 values in the right place for a half precision
Laurence Lundblade	c5fef68	2020-01-25 11:38:45 -0800	[diff] [blame]	248	const uint32_t uHalfPrecision = uHalfSignificand \|
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	249	(uHalfBiasedExponent << HALF_EXPONENT_SHIFT) \|
				250	(uHalfSign << HALF_SIGN_SHIFT);
Laurence Lundblade	576aa0c	2020-07-21 21:36:52 -0700	[diff] [blame]	251	// Cast is safe because all the masks and shifts above work to
				252	// make a half precision value which is only 16 bits.
Laurence Lundblade	c5fef68	2020-01-25 11:38:45 -0800	[diff] [blame]	253	return (uint16_t)uHalfPrecision;
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	254	}
				255
				256
				257	// Public function; see ieee754.h
Laurence Lundblade	cc2ed34	2018-09-22 17:29:55 -0700	[diff] [blame]	258	uint16_t IEEE754_DoubleToHalf(double d)
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	259	{
				260	// Pull the three parts out of the double-precision float
				261	const uint64_t uDouble = CopyDoubleToUint64(d);
Laurence Lundblade	c5fef68	2020-01-25 11:38:45 -0800	[diff] [blame]	262	const int64_t nDoubleUnbiasedExponent = (int64_t)((uDouble & DOUBLE_EXPONENT_MASK) >> DOUBLE_EXPONENT_SHIFT) - DOUBLE_EXPONENT_BIAS;
				263	const uint64_t uDoubleSign = (uDouble & DOUBLE_SIGN_MASK) >> DOUBLE_SIGN_SHIFT;
				264	const uint64_t uDoubleSignificand = uDouble & DOUBLE_SIGNIFICAND_MASK;
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	265
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	266	// Now convert the three parts to half-precision.
Laurence Lundblade	c5fef68	2020-01-25 11:38:45 -0800	[diff] [blame]	267
Laurence Lundblade	576aa0c	2020-07-21 21:36:52 -0700	[diff] [blame]	268	// All works is done on uint64_t with conversion to uint16_t at
				269	// the end. This avoids integer promotions that static analyzers
				270	// complain about. Other options are for these to be unsigned int
				271	// or fast_int16_t. Code size doesn't vary much between all these
				272	// options for 64-bit LLVM, 64-bit GCC and 32-bit Armv7 LLVM.
Laurence Lundblade	c5fef68	2020-01-25 11:38:45 -0800	[diff] [blame]	273	uint64_t uHalfSign, uHalfSignificand, uHalfBiasedExponent;
				274
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	275	if(nDoubleUnbiasedExponent == DOUBLE_EXPONENT_INF_OR_NAN) {
				276	// +/- Infinity and NaNs -- single biased exponent is 0xff
				277	uHalfBiasedExponent = HALF_EXPONENT_INF_OR_NAN + HALF_EXPONENT_BIAS;
				278	if(!uDoubleSignificand) {
				279	// Infinity
				280	uHalfSignificand = 0;
				281	} else {
Laurence Lundblade	576aa0c	2020-07-21 21:36:52 -0700	[diff] [blame]	282	// Copy the LSBs of the NaN payload that will fit from the
				283	// double to the half
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	284	uHalfSignificand = uDoubleSignificand & (HALF_SIGNIFICAND_MASK & ~HALF_QUIET_NAN_BIT);
				285	if(uDoubleSignificand & DOUBLE_QUIET_NAN_BIT) {
				286	// It's a qNaN; copy the qNaN bit
				287	uHalfSignificand \|= HALF_QUIET_NAN_BIT;
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	288	} else {
Laurence Lundblade	576aa0c	2020-07-21 21:36:52 -0700	[diff] [blame]	289	// It's an sNaN; make sure the significand is not zero
				290	// so it stays a NaN This is needed because not all
				291	// significand bits are copied from single
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	292	if(!uHalfSignificand) {
Laurence Lundblade	576aa0c	2020-07-21 21:36:52 -0700	[diff] [blame]	293	// Set the LSB. This is what wikipedia shows for
				294	// sNAN.
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	295	uHalfSignificand \|= 0x01;
				296	}
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	297	}
				298	}
				299	} else if(nDoubleUnbiasedExponent == DOUBLE_EXPONENT_ZERO) {
Laurence Lundblade	576aa0c	2020-07-21 21:36:52 -0700	[diff] [blame]	300	// 0 or a subnormal number -- double biased exponent is 0
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	301	uHalfBiasedExponent = 0;
				302	uHalfSignificand = 0; // Any subnormal single will be too small to express as a half precision; TODO, is this really true?
				303	} else if(nDoubleUnbiasedExponent > HALF_EXPONENT_MAX) {
Laurence Lundblade	576aa0c	2020-07-21 21:36:52 -0700	[diff] [blame]	304	// Exponent is too large to express in half-precision; round
				305	// up to infinity; TODO, is this really true?
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	306	uHalfBiasedExponent = HALF_EXPONENT_INF_OR_NAN + HALF_EXPONENT_BIAS;
				307	uHalfSignificand = 0;
				308	} else if(nDoubleUnbiasedExponent < HALF_EXPONENT_MIN) {
Laurence Lundblade	576aa0c	2020-07-21 21:36:52 -0700	[diff] [blame]	309	// Exponent is too small to express in half-precision; round
				310	// down to zero
Laurence Lundblade	c5fef68	2020-01-25 11:38:45 -0800	[diff] [blame]	311	uHalfBiasedExponent = HALF_EXPONENT_ZERO + HALF_EXPONENT_BIAS;
Laurence Lundblade	576aa0c	2020-07-21 21:36:52 -0700	[diff] [blame]	312	uHalfSignificand = 0;
				313	// Could convert some of these values to a half-precision
				314	// subnormal, but the layer above this will never use it. See
				315	// layer above. There is code to do this in github history
				316	// for this file, but it was removed because it was never
				317	// invoked.
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	318	} else {
Laurence Lundblade	c5fef68	2020-01-25 11:38:45 -0800	[diff] [blame]	319	// The normal case, exponent is in range for half-precision
				320	uHalfBiasedExponent = (uint32_t)(nDoubleUnbiasedExponent + HALF_EXPONENT_BIAS);
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	321	uHalfSignificand = uDoubleSignificand >> (DOUBLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
				322	}
				323	uHalfSign = uDoubleSign;
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	324
				325
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	326	// Put the 3 values in the right place for a half precision
Laurence Lundblade	c5fef68	2020-01-25 11:38:45 -0800	[diff] [blame]	327	const uint64_t uHalfPrecision = uHalfSignificand \|
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	328	(uHalfBiasedExponent << HALF_EXPONENT_SHIFT) \|
				329	(uHalfSign << HALF_SIGN_SHIFT);
Laurence Lundblade	576aa0c	2020-07-21 21:36:52 -0700	[diff] [blame]	330	// Cast is safe because all the masks and shifts above work to
				331	// make a half precision value which is only 16 bits.
Laurence Lundblade	c5fef68	2020-01-25 11:38:45 -0800	[diff] [blame]	332	return (uint16_t)uHalfPrecision;
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	333	}
				334
				335
Laurence Lundblade	fe09bbf	2020-07-16 12:14:51 -0700	[diff] [blame]	336	/*
				337	EEE754_HalfToFloat() was created but is not needed. It can be retrieved from
				338	github history if needed.
				339	*/
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	340
				341
Laurence Lundblade	67bd551	2018-11-02 21:44:06 +0700	[diff] [blame]	342	// Public function; see ieee754.h
				343	double IEEE754_HalfToDouble(uint16_t uHalfPrecision)
				344	{
Laurence Lundblade	576aa0c	2020-07-21 21:36:52 -0700	[diff] [blame]	345	// Pull out the three parts of the half-precision float. Do all
				346	// the work in 64 bits because that is what the end result is. It
				347	// may give smaller code size and will keep static analyzers
				348	// happier.
Laurence Lundblade	c5fef68	2020-01-25 11:38:45 -0800	[diff] [blame]	349	const uint64_t uHalfSignificand = uHalfPrecision & HALF_SIGNIFICAND_MASK;
				350	const int64_t nHalfUnBiasedExponent = (int64_t)((uHalfPrecision & HALF_EXPONENT_MASK) >> HALF_EXPONENT_SHIFT) - HALF_EXPONENT_BIAS;
				351	const uint64_t uHalfSign = (uHalfPrecision & HALF_SIGN_MASK) >> HALF_SIGN_SHIFT;
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	352
				353
Laurence Lundblade	67bd551	2018-11-02 21:44:06 +0700	[diff] [blame]	354	// Make the three parts of hte single-precision number
				355	uint64_t uDoubleSignificand, uDoubleSign, uDoubleBiasedExponent;
				356	if(nHalfUnBiasedExponent == HALF_EXPONENT_ZERO) {
				357	// 0 or subnormal
				358	uDoubleBiasedExponent = DOUBLE_EXPONENT_ZERO + DOUBLE_EXPONENT_BIAS;
				359	if(uHalfSignificand) {
				360	// Subnormal case
				361	uDoubleBiasedExponent = -HALF_EXPONENT_BIAS + DOUBLE_EXPONENT_BIAS +1;
Laurence Lundblade	576aa0c	2020-07-21 21:36:52 -0700	[diff] [blame]	362	// A half-precision subnormal can always be converted to a
				363	// normal double-precision float because the ranges line
				364	// up
Laurence Lundblade	67bd551	2018-11-02 21:44:06 +0700	[diff] [blame]	365	uDoubleSignificand = uHalfSignificand;
Laurence Lundblade	576aa0c	2020-07-21 21:36:52 -0700	[diff] [blame]	366	// Shift bits from right of the decimal to left, reducing
				367	// the exponent by 1 each time
Laurence Lundblade	67bd551	2018-11-02 21:44:06 +0700	[diff] [blame]	368	do {
				369	uDoubleSignificand <<= 1;
				370	uDoubleBiasedExponent--;
				371	} while ((uDoubleSignificand & 0x400) == 0);
				372	uDoubleSignificand &= HALF_SIGNIFICAND_MASK;
				373	uDoubleSignificand <<= (DOUBLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
				374	} else {
				375	// Just zero
				376	uDoubleSignificand = 0;
				377	}
				378	} else if(nHalfUnBiasedExponent == HALF_EXPONENT_INF_OR_NAN) {
				379	// NaN or Inifinity
				380	uDoubleBiasedExponent = DOUBLE_EXPONENT_INF_OR_NAN + DOUBLE_EXPONENT_BIAS;
				381	if(uHalfSignificand) {
				382	// NaN
				383	// First preserve the NaN payload from half to single
				384	uDoubleSignificand = uHalfSignificand & ~HALF_QUIET_NAN_BIT;
				385	if(uHalfSignificand & HALF_QUIET_NAN_BIT) {
Laurence Lundblade	576aa0c	2020-07-21 21:36:52 -0700	[diff] [blame]	386	// Next, set qNaN if needed since half qNaN bit is not
				387	// copied above
Laurence Lundblade	67bd551	2018-11-02 21:44:06 +0700	[diff] [blame]	388	uDoubleSignificand \|= DOUBLE_QUIET_NAN_BIT;
				389	}
				390	} else {
				391	// Infinity
				392	uDoubleSignificand = 0;
				393	}
				394	} else {
				395	// Normal number
Laurence Lundblade	c5fef68	2020-01-25 11:38:45 -0800	[diff] [blame]	396	uDoubleBiasedExponent = (uint64_t)(nHalfUnBiasedExponent + DOUBLE_EXPONENT_BIAS);
				397	uDoubleSignificand = uHalfSignificand << (DOUBLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
Laurence Lundblade	67bd551	2018-11-02 21:44:06 +0700	[diff] [blame]	398	}
				399	uDoubleSign = uHalfSign;
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	400
				401
Laurence Lundblade	67bd551	2018-11-02 21:44:06 +0700	[diff] [blame]	402	// Shift the 3 parts into place as a double-precision
				403	const uint64_t uDouble = uDoubleSignificand \|
				404	(uDoubleBiasedExponent << DOUBLE_EXPONENT_SHIFT) \|
				405	(uDoubleSign << DOUBLE_SIGN_SHIFT);
				406	return CopyUint64ToDouble(uDouble);
				407	}
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	408
				409
Laurence Lundblade	9682a53	2020-06-06 18:33:04 -0700	[diff] [blame]	410
Laurence Lundblade	f7c0adb	2020-08-08 20:20:58 -0700	[diff] [blame]	411	/*
				412	IEEE754_FloatToDouble(uint32_t uFloat) was created but is not needed. It can be retrieved from
				413	github history if needed.
				414	*/
Laurence Lundblade	9682a53	2020-06-06 18:33:04 -0700	[diff] [blame]	415
				416
				417
				418	// Public function; see ieee754.h
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	419	IEEE754_union IEEE754_FloatToSmallest(float f)
				420	{
				421	IEEE754_union result;
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	422
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	423	// Pull the neeed two parts out of the single-precision float
				424	const uint32_t uSingle = CopyFloatToUint32(f);
Laurence Lundblade	c5fef68	2020-01-25 11:38:45 -0800	[diff] [blame]	425	const int32_t nSingleExponent = (int32_t)((uSingle & SINGLE_EXPONENT_MASK) >> SINGLE_EXPONENT_SHIFT) - SINGLE_EXPONENT_BIAS;
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	426	const uint32_t uSingleSignificand = uSingle & SINGLE_SIGNIFICAND_MASK;
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	427
Laurence Lundblade	576aa0c	2020-07-21 21:36:52 -0700	[diff] [blame]	428	// Bit mask that is the significand bits that would be lost when
				429	// converting from single-precision to half-precision
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	430	const uint64_t uDroppedSingleBits = SINGLE_SIGNIFICAND_MASK >> HALF_NUM_SIGNIFICAND_BITS;
				431
Laurence Lundblade	576aa0c	2020-07-21 21:36:52 -0700	[diff] [blame]	432	// Optimizer will re organize so there is only one call to
				433	// IEEE754_FloatToHalf() in the final code.
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	434	if(uSingle == 0) {
				435	// Value is 0.0000, not a a subnormal
Laurence Lundblade	577d821	2018-11-01 14:04:08 +0700	[diff] [blame]	436	result.uSize = IEEE754_UNION_IS_HALF;
				437	result.uValue = IEEE754_FloatToHalf(f);
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	438	} else if(nSingleExponent == SINGLE_EXPONENT_INF_OR_NAN) {
				439	// NaN, +/- infinity
Laurence Lundblade	577d821	2018-11-01 14:04:08 +0700	[diff] [blame]	440	result.uSize = IEEE754_UNION_IS_HALF;
				441	result.uValue = IEEE754_FloatToHalf(f);
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	442	} else if((nSingleExponent >= HALF_EXPONENT_MIN) && nSingleExponent <= HALF_EXPONENT_MAX && (!(uSingleSignificand & uDroppedSingleBits))) {
				443	// Normal number in exponent range and precision won't be lost
Laurence Lundblade	577d821	2018-11-01 14:04:08 +0700	[diff] [blame]	444	result.uSize = IEEE754_UNION_IS_HALF;
				445	result.uValue = IEEE754_FloatToHalf(f);
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	446	} else {
				447	// Subnormal, exponent out of range, or precision will be lost
Laurence Lundblade	577d821	2018-11-01 14:04:08 +0700	[diff] [blame]	448	result.uSize = IEEE754_UNION_IS_SINGLE;
				449	result.uValue = uSingle;
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	450	}
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	451
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	452	return result;
				453	}
				454
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	455	// Public function; see ieee754.h
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	456	IEEE754_union IEEE754_DoubleToSmallestInternal(double d, int bAllowHalfPrecision)
				457	{
				458	IEEE754_union result;
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	459
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	460	// Pull the needed two parts out of the double-precision float
				461	const uint64_t uDouble = CopyDoubleToUint64(d);
Laurence Lundblade	c5fef68	2020-01-25 11:38:45 -0800	[diff] [blame]	462	const int64_t nDoubleExponent = (int64_t)((uDouble & DOUBLE_EXPONENT_MASK) >> DOUBLE_EXPONENT_SHIFT) - DOUBLE_EXPONENT_BIAS;
Laurence Lundblade	e17e2d7	2020-07-16 19:15:26 -0700	[diff] [blame]	463	const uint64_t uDoubleSignificand = uDouble & DOUBLE_SIGNIFICAND_MASK;
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	464
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	465	// Masks to check whether dropped significand bits are zero or not
Laurence Lundblade	b992fdb	2020-07-20 22:44:11 -0700	[diff] [blame]	466	const uint64_t uDroppedHalfBits = DOUBLE_SIGNIFICAND_MASK >> HALF_NUM_SIGNIFICAND_BITS;
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	467	const uint64_t uDroppedSingleBits = DOUBLE_SIGNIFICAND_MASK >> SINGLE_NUM_SIGNIFICAND_BITS;
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	468
Laurence Lundblade	29ec464	2020-07-21 21:11:45 -0700	[diff] [blame]	469	// This will not convert to half-precion or single-precision
				470	// subnormals. Values that could be converted will be output as
				471	// the double they are or occasionally to a normal single. This
				472	// could be implemented, but it is more code and would rarely be
				473	// used and rarely reduce the output size.
Laurence Lundblade	b992fdb	2020-07-20 22:44:11 -0700	[diff] [blame]	474
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	475	// The various cases
Laurence Lundblade	d711fb2	2018-09-26 14:35:22 -0700	[diff] [blame]	476	if(d == 0.0) { // Take care of positive and negative zero
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	477	// Value is 0.0000, not a a subnormal
Laurence Lundblade	577d821	2018-11-01 14:04:08 +0700	[diff] [blame]	478	result.uSize = IEEE754_UNION_IS_HALF;
				479	result.uValue = IEEE754_DoubleToHalf(d);
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	480	} else if(nDoubleExponent == DOUBLE_EXPONENT_INF_OR_NAN) {
				481	// NaN, +/- infinity
Laurence Lundblade	577d821	2018-11-01 14:04:08 +0700	[diff] [blame]	482	result.uSize = IEEE754_UNION_IS_HALF;
				483	result.uValue = IEEE754_DoubleToHalf(d);
Laurence Lundblade	b992fdb	2020-07-20 22:44:11 -0700	[diff] [blame]	484	} else if(bAllowHalfPrecision && (nDoubleExponent >= HALF_EXPONENT_MIN) && nDoubleExponent <= HALF_EXPONENT_MAX && (!(uDoubleSignificand & uDroppedHalfBits))) {
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	485	// Can convert to half without precision loss
Laurence Lundblade	577d821	2018-11-01 14:04:08 +0700	[diff] [blame]	486	result.uSize = IEEE754_UNION_IS_HALF;
				487	result.uValue = IEEE754_DoubleToHalf(d);
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	488	} else if((nDoubleExponent >= SINGLE_EXPONENT_MIN) && nDoubleExponent <= SINGLE_EXPONENT_MAX && (!(uDoubleSignificand & uDroppedSingleBits))) {
				489	// Can convert to single without precision loss
Laurence Lundblade	577d821	2018-11-01 14:04:08 +0700	[diff] [blame]	490	result.uSize = IEEE754_UNION_IS_SINGLE;
				491	result.uValue = CopyFloatToUint32((float)d);
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	492	} else {
				493	// Can't convert without precision loss
Laurence Lundblade	577d821	2018-11-01 14:04:08 +0700	[diff] [blame]	494	result.uSize = IEEE754_UNION_IS_DOUBLE;
				495	result.uValue = uDouble;
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	496	}
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	497
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	498	return result;
				499	}
				500
Laurence Lundblade	fe09bbf	2020-07-16 12:14:51 -0700	[diff] [blame]	501	#else
				502
				503	int x;
				504
Laurence Lundblade	b275cdc	2020-07-12 12:34:38 -0700	[diff] [blame]	505	#endif /* QCBOR_DISABLE_PREFERRED_FLOAT */