Blame - src/ieee754.c - mirror/QCBOR - TrustedFirmware Git Browser

blob: 216cd00a69c28fae61110efb834d8eb19ec358fd [file] [log] [blame]

Laurence Lundblade	cc2ed34	2018-09-22 17:29:55 -0700	[diff] [blame]	1	/*==============================================================================
Laurence Lundblade	ee85174	2020-01-08 08:37:05 -0800	[diff] [blame]	2	ieee754.c -- floating-point conversion between half, double & single-precision
Laurence Lundblade	035bd78	2019-01-21 17:01:31 -0800	[diff] [blame]	3
Laurence Lundblade	ee85174	2020-01-08 08:37:05 -0800	[diff] [blame]	4	Copyright (c) 2018-2020, Laurence Lundblade. All rights reserved.
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	5
Laurence Lundblade	a3fd49f	2019-01-21 10:16:22 -0800	[diff] [blame]	6	SPDX-License-Identifier: BSD-3-Clause
Laurence Lundblade	035bd78	2019-01-21 17:01:31 -0800	[diff] [blame]	7
Laurence Lundblade	a3fd49f	2019-01-21 10:16:22 -0800	[diff] [blame]	8	See BSD-3-Clause license in README.md
Laurence Lundblade	035bd78	2019-01-21 17:01:31 -0800	[diff] [blame]	9
Laurence Lundblade	a3fd49f	2019-01-21 10:16:22 -0800	[diff] [blame]	10	Created on 7/23/18
Laurence Lundblade	ee85174	2020-01-08 08:37:05 -0800	[diff] [blame]	11	=============================================================================*/
Laurence Lundblade	cc2ed34	2018-09-22 17:29:55 -0700	[diff] [blame]	12
Laurence Lundblade	b275cdc	2020-07-12 12:34:38 -0700	[diff] [blame]	13	#ifndef QCBOR_DISABLE_PREFERRED_FLOAT
Laurence Lundblade	9682a53	2020-06-06 18:33:04 -0700	[diff] [blame]	14
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	15	#include "ieee754.h"
				16	#include <string.h> // For memcpy()
				17
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	18
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	19	/*
Laurence Lundblade	ee85174	2020-01-08 08:37:05 -0800	[diff] [blame]	20	This code is written for clarity and verifiability, not for size, on
				21	the assumption that the optimizer will do a good job. The LLVM
				22	optimizer, -Os, does seem to do the job and the resulting object code
				23	is smaller from combining code for the many different cases (normal,
Laurence Lundblade	c5fef68	2020-01-25 11:38:45 -0800	[diff] [blame]	24	subnormal, infinity, zero...) for the conversions. GCC is no where near
				25	as good.
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	26
Laurence Lundblade	ee85174	2020-01-08 08:37:05 -0800	[diff] [blame]	27	This code has really long lines and is much easier to read because of
				28	them. Some coding guidelines prefer 80 column lines (can they not afford
				29	big displays?). It would make this code much worse even to wrap at 120
				30	columns.
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	31
Laurence Lundblade	ee85174	2020-01-08 08:37:05 -0800	[diff] [blame]	32	Dead stripping is also really helpful to get code size down when
Laurence Lundblade	c5fef68	2020-01-25 11:38:45 -0800	[diff] [blame]	33	floating-point encoding is not needed. (If this is put in a library
				34	and linking is against the library, then dead stripping is automatic).
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	35
Laurence Lundblade	ee85174	2020-01-08 08:37:05 -0800	[diff] [blame]	36	This code works solely using shifts and masks and thus has no
				37	dependency on any math libraries. It can even work if the CPU doesn't
				38	have any floating-point support, though that isn't the most useful
				39	thing to do.
				40
				41	The memcpy() dependency is only for CopyFloatToUint32() and friends
				42	which only is needed to avoid type punning when converting the actual
				43	float bits to an unsigned value so the bit shifts and masks can work.
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	44	*/
				45
				46	/*
				47	The references used to write this code:
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	48
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	49	- IEEE 754-2008, particularly section 3.6 and 6.2.1
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	50
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	51	- https://en.wikipedia.org/wiki/IEEE_754 and subordinate pages
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	52
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	53	- https://stackoverflow.com/questions/19800415/why-does-ieee-754-reserve-so-many-nan-values
Laurence Lundblade	c5fef68	2020-01-25 11:38:45 -0800	[diff] [blame]	54
				55	- https://stackoverflow.com/questions/46073295/implicit-type-promotion-rules
				56
				57	- https://stackoverflow.com/questions/589575/what-does-the-c-standard-state-the-size-of-int-long-type-to-be
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	58	*/
				59
				60
				61	// ----- Half Precsion -----------
				62	#define HALF_NUM_SIGNIFICAND_BITS (10)
				63	#define HALF_NUM_EXPONENT_BITS (5)
				64	#define HALF_NUM_SIGN_BITS (1)
				65
				66	#define HALF_SIGNIFICAND_SHIFT (0)
				67	#define HALF_EXPONENT_SHIFT (HALF_NUM_SIGNIFICAND_BITS)
				68	#define HALF_SIGN_SHIFT (HALF_NUM_SIGNIFICAND_BITS + HALF_NUM_EXPONENT_BITS)
				69
Laurence Lundblade	06350ea	2020-01-27 19:32:40 -0800	[diff] [blame]	70	#define HALF_SIGNIFICAND_MASK (0x3ffU) // The lower 10 bits // 0x03ff
				71	#define HALF_EXPONENT_MASK (0x1fU << HALF_EXPONENT_SHIFT) // 0x7c00 5 bits of exponent
				72	#define HALF_SIGN_MASK (0x01U << HALF_SIGN_SHIFT) // // 0x8000 1 bit of sign
				73	#define HALF_QUIET_NAN_BIT (0x01U << (HALF_NUM_SIGNIFICAND_BITS-1)) // 0x0200
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	74
				75	/* Biased Biased Unbiased Use
				76	0x00 0 -15 0 and subnormal
				77	0x01 1 -14 Smallest normal exponent
				78	0x1e 30 15 Largest normal exponent
				79	0x1F 31 16 NaN and Infinity */
				80	#define HALF_EXPONENT_BIAS (15)
				81	#define HALF_EXPONENT_MAX (HALF_EXPONENT_BIAS) // 15 Unbiased
				82	#define HALF_EXPONENT_MIN (-HALF_EXPONENT_BIAS+1) // -14 Unbiased
				83	#define HALF_EXPONENT_ZERO (-HALF_EXPONENT_BIAS) // -15 Unbiased
				84	#define HALF_EXPONENT_INF_OR_NAN (HALF_EXPONENT_BIAS+1) // 16 Unbiased
				85
				86
Laurence Lundblade	ee85174	2020-01-08 08:37:05 -0800	[diff] [blame]	87	// ------ Single-Precision --------
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	88	#define SINGLE_NUM_SIGNIFICAND_BITS (23)
				89	#define SINGLE_NUM_EXPONENT_BITS (8)
				90	#define SINGLE_NUM_SIGN_BITS (1)
				91
				92	#define SINGLE_SIGNIFICAND_SHIFT (0)
				93	#define SINGLE_EXPONENT_SHIFT (SINGLE_NUM_SIGNIFICAND_BITS)
				94	#define SINGLE_SIGN_SHIFT (SINGLE_NUM_SIGNIFICAND_BITS + SINGLE_NUM_EXPONENT_BITS)
				95
Laurence Lundblade	06350ea	2020-01-27 19:32:40 -0800	[diff] [blame]	96	#define SINGLE_SIGNIFICAND_MASK (0x7fffffU) // The lower 23 bits
				97	#define SINGLE_EXPONENT_MASK (0xffU << SINGLE_EXPONENT_SHIFT) // 8 bits of exponent
				98	#define SINGLE_SIGN_MASK (0x01U << SINGLE_SIGN_SHIFT) // 1 bit of sign
				99	#define SINGLE_QUIET_NAN_BIT (0x01U << (SINGLE_NUM_SIGNIFICAND_BITS-1))
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	100
				101	/* Biased Biased Unbiased Use
				102	0x0000 0 -127 0 and subnormal
				103	0x0001 1 -126 Smallest normal exponent
				104	0x7f 127 0 1
				105	0xfe 254 127 Largest normal exponent
				106	0xff 255 128 NaN and Infinity */
				107	#define SINGLE_EXPONENT_BIAS (127)
Laurence Lundblade	c5fef68	2020-01-25 11:38:45 -0800	[diff] [blame]	108	#define SINGLE_EXPONENT_MAX (SINGLE_EXPONENT_BIAS) // 127 unbiased
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	109	#define SINGLE_EXPONENT_MIN (-SINGLE_EXPONENT_BIAS+1) // -126 unbiased
				110	#define SINGLE_EXPONENT_ZERO (-SINGLE_EXPONENT_BIAS) // -127 unbiased
Laurence Lundblade	c5fef68	2020-01-25 11:38:45 -0800	[diff] [blame]	111	#define SINGLE_EXPONENT_INF_OR_NAN (SINGLE_EXPONENT_BIAS+1) // 128 unbiased
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	112
				113
Laurence Lundblade	ee85174	2020-01-08 08:37:05 -0800	[diff] [blame]	114	// --------- Double-Precision ----------
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	115	#define DOUBLE_NUM_SIGNIFICAND_BITS (52)
				116	#define DOUBLE_NUM_EXPONENT_BITS (11)
				117	#define DOUBLE_NUM_SIGN_BITS (1)
				118
				119	#define DOUBLE_SIGNIFICAND_SHIFT (0)
				120	#define DOUBLE_EXPONENT_SHIFT (DOUBLE_NUM_SIGNIFICAND_BITS)
				121	#define DOUBLE_SIGN_SHIFT (DOUBLE_NUM_SIGNIFICAND_BITS + DOUBLE_NUM_EXPONENT_BITS)
				122
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	123	#define DOUBLE_SIGNIFICAND_MASK (0xfffffffffffffULL) // The lower 52 bits
				124	#define DOUBLE_EXPONENT_MASK (0x7ffULL << DOUBLE_EXPONENT_SHIFT) // 11 bits of exponent
				125	#define DOUBLE_SIGN_MASK (0x01ULL << DOUBLE_SIGN_SHIFT) // 1 bit of sign
				126	#define DOUBLE_QUIET_NAN_BIT (0x01ULL << (DOUBLE_NUM_SIGNIFICAND_BITS-1))
				127
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	128
				129	/* Biased Biased Unbiased Use
				130	0x00000000 0 -1023 0 and subnormal
				131	0x00000001 1 -1022 Smallest normal exponent
				132	0x000007fe 2046 1023 Largest normal exponent
				133	0x000007ff 2047 1024 NaN and Infinity */
				134	#define DOUBLE_EXPONENT_BIAS (1023)
				135	#define DOUBLE_EXPONENT_MAX (DOUBLE_EXPONENT_BIAS) // unbiased
				136	#define DOUBLE_EXPONENT_MIN (-DOUBLE_EXPONENT_BIAS+1) // unbiased
				137	#define DOUBLE_EXPONENT_ZERO (-DOUBLE_EXPONENT_BIAS) // unbiased
				138	#define DOUBLE_EXPONENT_INF_OR_NAN (DOUBLE_EXPONENT_BIAS+1) // unbiased
				139
				140
				141
				142	/*
Laurence Lundblade	c5fef68	2020-01-25 11:38:45 -0800	[diff] [blame]	143	Convenient functions to avoid type punning, compiler warnings and
				144	such. The optimizer reduces them to a simple assignment. This is a
				145	crusty corner of C. It shouldn't be this hard.
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	146
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	147	These are also in UsefulBuf.h under a different name. They are copied
Laurence Lundblade	c5fef68	2020-01-25 11:38:45 -0800	[diff] [blame]	148	here to avoid a dependency on UsefulBuf.h. There is no object code
				149	size impact because these always optimze down to a simple assignment.
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	150	*/
				151	static inline uint32_t CopyFloatToUint32(float f)
				152	{
				153	uint32_t u32;
				154	memcpy(&u32, &f, sizeof(uint32_t));
				155	return u32;
				156	}
				157
				158	static inline uint64_t CopyDoubleToUint64(double d)
				159	{
				160	uint64_t u64;
				161	memcpy(&u64, &d, sizeof(uint64_t));
				162	return u64;
				163	}
				164
Laurence Lundblade	67bd551	2018-11-02 21:44:06 +0700	[diff] [blame]	165	static inline double CopyUint64ToDouble(uint64_t u64)
				166	{
				167	double d;
				168	memcpy(&d, &u64, sizeof(uint64_t));
				169	return d;
				170	}
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	171
				172
				173	// Public function; see ieee754.h
Laurence Lundblade	cc2ed34	2018-09-22 17:29:55 -0700	[diff] [blame]	174	uint16_t IEEE754_FloatToHalf(float f)
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	175	{
				176	// Pull the three parts out of the single-precision float
				177	const uint32_t uSingle = CopyFloatToUint32(f);
Laurence Lundblade	c5fef68	2020-01-25 11:38:45 -0800	[diff] [blame]	178	const int32_t nSingleUnbiasedExponent = (int32_t)((uSingle & SINGLE_EXPONENT_MASK) >> SINGLE_EXPONENT_SHIFT) - SINGLE_EXPONENT_BIAS;
				179	const uint32_t uSingleSign = (uSingle & SINGLE_SIGN_MASK) >> SINGLE_SIGN_SHIFT;
				180	const uint32_t uSingleSignificand = uSingle & SINGLE_SIGNIFICAND_MASK;
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	181
				182
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	183	// Now convert the three parts to half-precision.
Laurence Lundblade	c5fef68	2020-01-25 11:38:45 -0800	[diff] [blame]	184
Laurence Lundblade	576aa0c	2020-07-21 21:36:52 -0700	[diff] [blame]	185	// All works is done on uint32_t with conversion to uint16_t at
				186	// the end. This avoids integer promotions that static analyzers
				187	// complain about and reduces code size.
Laurence Lundblade	c5fef68	2020-01-25 11:38:45 -0800	[diff] [blame]	188	uint32_t uHalfSign, uHalfSignificand, uHalfBiasedExponent;
				189
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	190	if(nSingleUnbiasedExponent == SINGLE_EXPONENT_INF_OR_NAN) {
				191	// +/- Infinity and NaNs -- single biased exponent is 0xff
				192	uHalfBiasedExponent = HALF_EXPONENT_INF_OR_NAN + HALF_EXPONENT_BIAS;
				193	if(!uSingleSignificand) {
				194	// Infinity
				195	uHalfSignificand = 0;
				196	} else {
Laurence Lundblade	576aa0c	2020-07-21 21:36:52 -0700	[diff] [blame]	197	// Copy the LSBs of the NaN payload that will fit from the
				198	// single to the half
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	199	uHalfSignificand = uSingleSignificand & (HALF_SIGNIFICAND_MASK & ~HALF_QUIET_NAN_BIT);
				200	if(uSingleSignificand & SINGLE_QUIET_NAN_BIT) {
				201	// It's a qNaN; copy the qNaN bit
				202	uHalfSignificand \|= HALF_QUIET_NAN_BIT;
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	203	} else {
Laurence Lundblade	576aa0c	2020-07-21 21:36:52 -0700	[diff] [blame]	204	// It's an sNaN; make sure the significand is not zero
				205	// so it stays a NaN This is needed because not all
				206	// significand bits are copied from single
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	207	if(!uHalfSignificand) {
Laurence Lundblade	576aa0c	2020-07-21 21:36:52 -0700	[diff] [blame]	208	// Set the LSB. This is what wikipedia shows for
				209	// sNAN.
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	210	uHalfSignificand \|= 0x01;
				211	}
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	212	}
				213	}
				214	} else if(nSingleUnbiasedExponent == SINGLE_EXPONENT_ZERO) {
Laurence Lundblade	576aa0c	2020-07-21 21:36:52 -0700	[diff] [blame]	215	// 0 or a subnormal number -- singled biased exponent is 0
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	216	uHalfBiasedExponent = 0;
				217	uHalfSignificand = 0; // Any subnormal single will be too small to express as a half precision
				218	} else if(nSingleUnbiasedExponent > HALF_EXPONENT_MAX) {
Laurence Lundblade	576aa0c	2020-07-21 21:36:52 -0700	[diff] [blame]	219	// Exponent is too large to express in half-precision; round
				220	// up to infinity
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	221	uHalfBiasedExponent = HALF_EXPONENT_INF_OR_NAN + HALF_EXPONENT_BIAS;
				222	uHalfSignificand = 0;
				223	} else if(nSingleUnbiasedExponent < HALF_EXPONENT_MIN) {
Laurence Lundblade	576aa0c	2020-07-21 21:36:52 -0700	[diff] [blame]	224	// Exponent is too small to express in half-precision normal;
				225	// make it a half-precision subnormal
Laurence Lundblade	c5fef68	2020-01-25 11:38:45 -0800	[diff] [blame]	226	uHalfBiasedExponent = HALF_EXPONENT_ZERO + HALF_EXPONENT_BIAS;
Laurence Lundblade	576aa0c	2020-07-21 21:36:52 -0700	[diff] [blame]	227	uHalfSignificand = 0;
				228	// Could convert some of these values to a half-precision
				229	// subnormal, but the layer above this will never use it. See
				230	// layer above. There is code to do this in github history
				231	// for this file, but it was removed because it was never
				232	// invoked.
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	233	} else {
Laurence Lundblade	c5fef68	2020-01-25 11:38:45 -0800	[diff] [blame]	234	// The normal case, exponent is in range for half-precision
				235	uHalfBiasedExponent = (uint32_t)(nSingleUnbiasedExponent + HALF_EXPONENT_BIAS);
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	236	uHalfSignificand = uSingleSignificand >> (SINGLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
				237	}
				238	uHalfSign = uSingleSign;
				239
				240	// Put the 3 values in the right place for a half precision
Laurence Lundblade	c5fef68	2020-01-25 11:38:45 -0800	[diff] [blame]	241	const uint32_t uHalfPrecision = uHalfSignificand \|
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	242	(uHalfBiasedExponent << HALF_EXPONENT_SHIFT) \|
				243	(uHalfSign << HALF_SIGN_SHIFT);
Laurence Lundblade	576aa0c	2020-07-21 21:36:52 -0700	[diff] [blame]	244	// Cast is safe because all the masks and shifts above work to
				245	// make a half precision value which is only 16 bits.
Laurence Lundblade	c5fef68	2020-01-25 11:38:45 -0800	[diff] [blame]	246	return (uint16_t)uHalfPrecision;
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	247	}
				248
				249
				250	// Public function; see ieee754.h
Laurence Lundblade	cc2ed34	2018-09-22 17:29:55 -0700	[diff] [blame]	251	uint16_t IEEE754_DoubleToHalf(double d)
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	252	{
				253	// Pull the three parts out of the double-precision float
				254	const uint64_t uDouble = CopyDoubleToUint64(d);
Laurence Lundblade	c5fef68	2020-01-25 11:38:45 -0800	[diff] [blame]	255	const int64_t nDoubleUnbiasedExponent = (int64_t)((uDouble & DOUBLE_EXPONENT_MASK) >> DOUBLE_EXPONENT_SHIFT) - DOUBLE_EXPONENT_BIAS;
				256	const uint64_t uDoubleSign = (uDouble & DOUBLE_SIGN_MASK) >> DOUBLE_SIGN_SHIFT;
				257	const uint64_t uDoubleSignificand = uDouble & DOUBLE_SIGNIFICAND_MASK;
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	258
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	259	// Now convert the three parts to half-precision.
Laurence Lundblade	c5fef68	2020-01-25 11:38:45 -0800	[diff] [blame]	260
Laurence Lundblade	576aa0c	2020-07-21 21:36:52 -0700	[diff] [blame]	261	// All works is done on uint64_t with conversion to uint16_t at
				262	// the end. This avoids integer promotions that static analyzers
				263	// complain about. Other options are for these to be unsigned int
				264	// or fast_int16_t. Code size doesn't vary much between all these
				265	// options for 64-bit LLVM, 64-bit GCC and 32-bit Armv7 LLVM.
Laurence Lundblade	c5fef68	2020-01-25 11:38:45 -0800	[diff] [blame]	266	uint64_t uHalfSign, uHalfSignificand, uHalfBiasedExponent;
				267
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	268	if(nDoubleUnbiasedExponent == DOUBLE_EXPONENT_INF_OR_NAN) {
				269	// +/- Infinity and NaNs -- single biased exponent is 0xff
				270	uHalfBiasedExponent = HALF_EXPONENT_INF_OR_NAN + HALF_EXPONENT_BIAS;
				271	if(!uDoubleSignificand) {
				272	// Infinity
				273	uHalfSignificand = 0;
				274	} else {
Laurence Lundblade	576aa0c	2020-07-21 21:36:52 -0700	[diff] [blame]	275	// Copy the LSBs of the NaN payload that will fit from the
				276	// double to the half
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	277	uHalfSignificand = uDoubleSignificand & (HALF_SIGNIFICAND_MASK & ~HALF_QUIET_NAN_BIT);
				278	if(uDoubleSignificand & DOUBLE_QUIET_NAN_BIT) {
				279	// It's a qNaN; copy the qNaN bit
				280	uHalfSignificand \|= HALF_QUIET_NAN_BIT;
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	281	} else {
Laurence Lundblade	576aa0c	2020-07-21 21:36:52 -0700	[diff] [blame]	282	// It's an sNaN; make sure the significand is not zero
				283	// so it stays a NaN This is needed because not all
				284	// significand bits are copied from single
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	285	if(!uHalfSignificand) {
Laurence Lundblade	576aa0c	2020-07-21 21:36:52 -0700	[diff] [blame]	286	// Set the LSB. This is what wikipedia shows for
				287	// sNAN.
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	288	uHalfSignificand \|= 0x01;
				289	}
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	290	}
				291	}
				292	} else if(nDoubleUnbiasedExponent == DOUBLE_EXPONENT_ZERO) {
Laurence Lundblade	576aa0c	2020-07-21 21:36:52 -0700	[diff] [blame]	293	// 0 or a subnormal number -- double biased exponent is 0
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	294	uHalfBiasedExponent = 0;
				295	uHalfSignificand = 0; // Any subnormal single will be too small to express as a half precision; TODO, is this really true?
				296	} else if(nDoubleUnbiasedExponent > HALF_EXPONENT_MAX) {
Laurence Lundblade	576aa0c	2020-07-21 21:36:52 -0700	[diff] [blame]	297	// Exponent is too large to express in half-precision; round
				298	// up to infinity; TODO, is this really true?
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	299	uHalfBiasedExponent = HALF_EXPONENT_INF_OR_NAN + HALF_EXPONENT_BIAS;
				300	uHalfSignificand = 0;
				301	} else if(nDoubleUnbiasedExponent < HALF_EXPONENT_MIN) {
Laurence Lundblade	576aa0c	2020-07-21 21:36:52 -0700	[diff] [blame]	302	// Exponent is too small to express in half-precision; round
				303	// down to zero
Laurence Lundblade	c5fef68	2020-01-25 11:38:45 -0800	[diff] [blame]	304	uHalfBiasedExponent = HALF_EXPONENT_ZERO + HALF_EXPONENT_BIAS;
Laurence Lundblade	576aa0c	2020-07-21 21:36:52 -0700	[diff] [blame]	305	uHalfSignificand = 0;
				306	// Could convert some of these values to a half-precision
				307	// subnormal, but the layer above this will never use it. See
				308	// layer above. There is code to do this in github history
				309	// for this file, but it was removed because it was never
				310	// invoked.
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	311	} else {
Laurence Lundblade	c5fef68	2020-01-25 11:38:45 -0800	[diff] [blame]	312	// The normal case, exponent is in range for half-precision
				313	uHalfBiasedExponent = (uint32_t)(nDoubleUnbiasedExponent + HALF_EXPONENT_BIAS);
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	314	uHalfSignificand = uDoubleSignificand >> (DOUBLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
				315	}
				316	uHalfSign = uDoubleSign;
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	317
				318
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	319	// Put the 3 values in the right place for a half precision
Laurence Lundblade	c5fef68	2020-01-25 11:38:45 -0800	[diff] [blame]	320	const uint64_t uHalfPrecision = uHalfSignificand \|
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	321	(uHalfBiasedExponent << HALF_EXPONENT_SHIFT) \|
				322	(uHalfSign << HALF_SIGN_SHIFT);
Laurence Lundblade	576aa0c	2020-07-21 21:36:52 -0700	[diff] [blame]	323	// Cast is safe because all the masks and shifts above work to
				324	// make a half precision value which is only 16 bits.
Laurence Lundblade	c5fef68	2020-01-25 11:38:45 -0800	[diff] [blame]	325	return (uint16_t)uHalfPrecision;
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	326	}
				327
				328
Laurence Lundblade	fe09bbf	2020-07-16 12:14:51 -0700	[diff] [blame]	329	/*
				330	EEE754_HalfToFloat() was created but is not needed. It can be retrieved from
				331	github history if needed.
				332	*/
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	333
				334
Laurence Lundblade	67bd551	2018-11-02 21:44:06 +0700	[diff] [blame]	335	// Public function; see ieee754.h
				336	double IEEE754_HalfToDouble(uint16_t uHalfPrecision)
				337	{
Laurence Lundblade	576aa0c	2020-07-21 21:36:52 -0700	[diff] [blame]	338	// Pull out the three parts of the half-precision float. Do all
				339	// the work in 64 bits because that is what the end result is. It
				340	// may give smaller code size and will keep static analyzers
				341	// happier.
Laurence Lundblade	c5fef68	2020-01-25 11:38:45 -0800	[diff] [blame]	342	const uint64_t uHalfSignificand = uHalfPrecision & HALF_SIGNIFICAND_MASK;
				343	const int64_t nHalfUnBiasedExponent = (int64_t)((uHalfPrecision & HALF_EXPONENT_MASK) >> HALF_EXPONENT_SHIFT) - HALF_EXPONENT_BIAS;
				344	const uint64_t uHalfSign = (uHalfPrecision & HALF_SIGN_MASK) >> HALF_SIGN_SHIFT;
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	345
				346
Laurence Lundblade	67bd551	2018-11-02 21:44:06 +0700	[diff] [blame]	347	// Make the three parts of hte single-precision number
				348	uint64_t uDoubleSignificand, uDoubleSign, uDoubleBiasedExponent;
				349	if(nHalfUnBiasedExponent == HALF_EXPONENT_ZERO) {
				350	// 0 or subnormal
				351	uDoubleBiasedExponent = DOUBLE_EXPONENT_ZERO + DOUBLE_EXPONENT_BIAS;
				352	if(uHalfSignificand) {
				353	// Subnormal case
				354	uDoubleBiasedExponent = -HALF_EXPONENT_BIAS + DOUBLE_EXPONENT_BIAS +1;
Laurence Lundblade	576aa0c	2020-07-21 21:36:52 -0700	[diff] [blame]	355	// A half-precision subnormal can always be converted to a
				356	// normal double-precision float because the ranges line
				357	// up
Laurence Lundblade	67bd551	2018-11-02 21:44:06 +0700	[diff] [blame]	358	uDoubleSignificand = uHalfSignificand;
Laurence Lundblade	576aa0c	2020-07-21 21:36:52 -0700	[diff] [blame]	359	// Shift bits from right of the decimal to left, reducing
				360	// the exponent by 1 each time
Laurence Lundblade	67bd551	2018-11-02 21:44:06 +0700	[diff] [blame]	361	do {
				362	uDoubleSignificand <<= 1;
				363	uDoubleBiasedExponent--;
				364	} while ((uDoubleSignificand & 0x400) == 0);
				365	uDoubleSignificand &= HALF_SIGNIFICAND_MASK;
				366	uDoubleSignificand <<= (DOUBLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
				367	} else {
				368	// Just zero
				369	uDoubleSignificand = 0;
				370	}
				371	} else if(nHalfUnBiasedExponent == HALF_EXPONENT_INF_OR_NAN) {
				372	// NaN or Inifinity
				373	uDoubleBiasedExponent = DOUBLE_EXPONENT_INF_OR_NAN + DOUBLE_EXPONENT_BIAS;
				374	if(uHalfSignificand) {
				375	// NaN
				376	// First preserve the NaN payload from half to single
				377	uDoubleSignificand = uHalfSignificand & ~HALF_QUIET_NAN_BIT;
				378	if(uHalfSignificand & HALF_QUIET_NAN_BIT) {
Laurence Lundblade	576aa0c	2020-07-21 21:36:52 -0700	[diff] [blame]	379	// Next, set qNaN if needed since half qNaN bit is not
				380	// copied above
Laurence Lundblade	67bd551	2018-11-02 21:44:06 +0700	[diff] [blame]	381	uDoubleSignificand \|= DOUBLE_QUIET_NAN_BIT;
				382	}
				383	} else {
				384	// Infinity
				385	uDoubleSignificand = 0;
				386	}
				387	} else {
				388	// Normal number
Laurence Lundblade	c5fef68	2020-01-25 11:38:45 -0800	[diff] [blame]	389	uDoubleBiasedExponent = (uint64_t)(nHalfUnBiasedExponent + DOUBLE_EXPONENT_BIAS);
				390	uDoubleSignificand = uHalfSignificand << (DOUBLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
Laurence Lundblade	67bd551	2018-11-02 21:44:06 +0700	[diff] [blame]	391	}
				392	uDoubleSign = uHalfSign;
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	393
				394
Laurence Lundblade	67bd551	2018-11-02 21:44:06 +0700	[diff] [blame]	395	// Shift the 3 parts into place as a double-precision
				396	const uint64_t uDouble = uDoubleSignificand \|
				397	(uDoubleBiasedExponent << DOUBLE_EXPONENT_SHIFT) \|
				398	(uDoubleSign << DOUBLE_SIGN_SHIFT);
				399	return CopyUint64ToDouble(uDouble);
				400	}
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	401
				402
Laurence Lundblade	9682a53	2020-06-06 18:33:04 -0700	[diff] [blame]	403
Laurence Lundblade	f7c0adb	2020-08-08 20:20:58 -0700	[diff] [blame]	404	/*
				405	IEEE754_FloatToDouble(uint32_t uFloat) was created but is not needed. It can be retrieved from
				406	github history if needed.
				407	*/
Laurence Lundblade	9682a53	2020-06-06 18:33:04 -0700	[diff] [blame]	408
				409
				410
				411	// Public function; see ieee754.h
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	412	IEEE754_union IEEE754_FloatToSmallest(float f)
				413	{
				414	IEEE754_union result;
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	415
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	416	// Pull the neeed two parts out of the single-precision float
				417	const uint32_t uSingle = CopyFloatToUint32(f);
Laurence Lundblade	c5fef68	2020-01-25 11:38:45 -0800	[diff] [blame]	418	const int32_t nSingleExponent = (int32_t)((uSingle & SINGLE_EXPONENT_MASK) >> SINGLE_EXPONENT_SHIFT) - SINGLE_EXPONENT_BIAS;
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	419	const uint32_t uSingleSignificand = uSingle & SINGLE_SIGNIFICAND_MASK;
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	420
Laurence Lundblade	576aa0c	2020-07-21 21:36:52 -0700	[diff] [blame]	421	// Bit mask that is the significand bits that would be lost when
				422	// converting from single-precision to half-precision
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	423	const uint64_t uDroppedSingleBits = SINGLE_SIGNIFICAND_MASK >> HALF_NUM_SIGNIFICAND_BITS;
				424
Laurence Lundblade	576aa0c	2020-07-21 21:36:52 -0700	[diff] [blame]	425	// Optimizer will re organize so there is only one call to
				426	// IEEE754_FloatToHalf() in the final code.
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	427	if(uSingle == 0) {
				428	// Value is 0.0000, not a a subnormal
Laurence Lundblade	577d821	2018-11-01 14:04:08 +0700	[diff] [blame]	429	result.uSize = IEEE754_UNION_IS_HALF;
				430	result.uValue = IEEE754_FloatToHalf(f);
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	431	} else if(nSingleExponent == SINGLE_EXPONENT_INF_OR_NAN) {
				432	// NaN, +/- infinity
Laurence Lundblade	577d821	2018-11-01 14:04:08 +0700	[diff] [blame]	433	result.uSize = IEEE754_UNION_IS_HALF;
				434	result.uValue = IEEE754_FloatToHalf(f);
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	435	} else if((nSingleExponent >= HALF_EXPONENT_MIN) && nSingleExponent <= HALF_EXPONENT_MAX && (!(uSingleSignificand & uDroppedSingleBits))) {
				436	// Normal number in exponent range and precision won't be lost
Laurence Lundblade	577d821	2018-11-01 14:04:08 +0700	[diff] [blame]	437	result.uSize = IEEE754_UNION_IS_HALF;
				438	result.uValue = IEEE754_FloatToHalf(f);
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	439	} else {
				440	// Subnormal, exponent out of range, or precision will be lost
Laurence Lundblade	577d821	2018-11-01 14:04:08 +0700	[diff] [blame]	441	result.uSize = IEEE754_UNION_IS_SINGLE;
				442	result.uValue = uSingle;
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	443	}
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	444
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	445	return result;
				446	}
				447
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	448	// Public function; see ieee754.h
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	449	IEEE754_union IEEE754_DoubleToSmallestInternal(double d, int bAllowHalfPrecision)
				450	{
				451	IEEE754_union result;
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	452
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	453	// Pull the needed two parts out of the double-precision float
				454	const uint64_t uDouble = CopyDoubleToUint64(d);
Laurence Lundblade	c5fef68	2020-01-25 11:38:45 -0800	[diff] [blame]	455	const int64_t nDoubleExponent = (int64_t)((uDouble & DOUBLE_EXPONENT_MASK) >> DOUBLE_EXPONENT_SHIFT) - DOUBLE_EXPONENT_BIAS;
Laurence Lundblade	e17e2d7	2020-07-16 19:15:26 -0700	[diff] [blame]	456	const uint64_t uDoubleSignificand = uDouble & DOUBLE_SIGNIFICAND_MASK;
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	457
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	458	// Masks to check whether dropped significand bits are zero or not
Laurence Lundblade	b992fdb	2020-07-20 22:44:11 -0700	[diff] [blame]	459	const uint64_t uDroppedHalfBits = DOUBLE_SIGNIFICAND_MASK >> HALF_NUM_SIGNIFICAND_BITS;
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	460	const uint64_t uDroppedSingleBits = DOUBLE_SIGNIFICAND_MASK >> SINGLE_NUM_SIGNIFICAND_BITS;
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	461
Laurence Lundblade	29ec464	2020-07-21 21:11:45 -0700	[diff] [blame]	462	// This will not convert to half-precion or single-precision
				463	// subnormals. Values that could be converted will be output as
				464	// the double they are or occasionally to a normal single. This
				465	// could be implemented, but it is more code and would rarely be
				466	// used and rarely reduce the output size.
Laurence Lundblade	b992fdb	2020-07-20 22:44:11 -0700	[diff] [blame]	467
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	468	// The various cases
Laurence Lundblade	d711fb2	2018-09-26 14:35:22 -0700	[diff] [blame]	469	if(d == 0.0) { // Take care of positive and negative zero
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	470	// Value is 0.0000, not a a subnormal
Laurence Lundblade	577d821	2018-11-01 14:04:08 +0700	[diff] [blame]	471	result.uSize = IEEE754_UNION_IS_HALF;
				472	result.uValue = IEEE754_DoubleToHalf(d);
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	473	} else if(nDoubleExponent == DOUBLE_EXPONENT_INF_OR_NAN) {
				474	// NaN, +/- infinity
Laurence Lundblade	577d821	2018-11-01 14:04:08 +0700	[diff] [blame]	475	result.uSize = IEEE754_UNION_IS_HALF;
				476	result.uValue = IEEE754_DoubleToHalf(d);
Laurence Lundblade	b992fdb	2020-07-20 22:44:11 -0700	[diff] [blame]	477	} else if(bAllowHalfPrecision && (nDoubleExponent >= HALF_EXPONENT_MIN) && nDoubleExponent <= HALF_EXPONENT_MAX && (!(uDoubleSignificand & uDroppedHalfBits))) {
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	478	// Can convert to half without precision loss
Laurence Lundblade	577d821	2018-11-01 14:04:08 +0700	[diff] [blame]	479	result.uSize = IEEE754_UNION_IS_HALF;
				480	result.uValue = IEEE754_DoubleToHalf(d);
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	481	} else if((nDoubleExponent >= SINGLE_EXPONENT_MIN) && nDoubleExponent <= SINGLE_EXPONENT_MAX && (!(uDoubleSignificand & uDroppedSingleBits))) {
				482	// Can convert to single without precision loss
Laurence Lundblade	577d821	2018-11-01 14:04:08 +0700	[diff] [blame]	483	result.uSize = IEEE754_UNION_IS_SINGLE;
				484	result.uValue = CopyFloatToUint32((float)d);
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	485	} else {
				486	// Can't convert without precision loss
Laurence Lundblade	577d821	2018-11-01 14:04:08 +0700	[diff] [blame]	487	result.uSize = IEEE754_UNION_IS_DOUBLE;
				488	result.uValue = uDouble;
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	489	}
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	490
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	491	return result;
				492	}
				493
Laurence Lundblade	fe09bbf	2020-07-16 12:14:51 -0700	[diff] [blame]	494	#else
				495
				496	int x;
				497
Laurence Lundblade	b275cdc	2020-07-12 12:34:38 -0700	[diff] [blame]	498	#endif /* QCBOR_DISABLE_PREFERRED_FLOAT */