Blame - src/ieee754.c - mirror/QCBOR - TrustedFirmware Git Browser

blob: 4ae8dd63417502421c6a40b24afa0dc1ea7a6b5e [file] [log] [blame]

Laurence Lundblade	cc2ed34	2018-09-22 17:29:55 -0700	[diff] [blame]	1	/*==============================================================================
Laurence Lundblade	a3fd49f	2019-01-21 10:16:22 -0800	[diff] [blame^]	2	ieee754.c -- floating point conversion between half, double and single precision
				3
				4	Copyright (c) 2018-2019, Laurence Lundblade. All rights reserved.
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	5
Laurence Lundblade	a3fd49f	2019-01-21 10:16:22 -0800	[diff] [blame^]	6	SPDX-License-Identifier: BSD-3-Clause
				7
				8	See BSD-3-Clause license in README.md
				9
				10	Created on 7/23/18
Laurence Lundblade	cc2ed34	2018-09-22 17:29:55 -0700	[diff] [blame]	11	==============================================================================*/
				12
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	13	#include "ieee754.h"
				14	#include <string.h> // For memcpy()
				15
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	16
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	17	/*
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	18	This code is written for clarity and verifiability, not for size, on the assumption
				19	that the optimizer will do a good job. The LLVM optimizer, -Os, does seem to do the
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	20	job and the resulting object code is smaller from combining code for the many different
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	21	cases (normal, subnormal, infinity, zero...) for the conversions.
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	22
Laurence Lundblade	570fab5	2018-10-13 18:28:27 +0800	[diff] [blame]	23	Dead stripping is also really helpful to get code size down when floating point
				24	encoding is not needed.
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	25
Laurence Lundblade	570fab5	2018-10-13 18:28:27 +0800	[diff] [blame]	26	This code works solely using shifts and masks and thus has no dependency on
				27	any math libraries. It can even work if the CPU doesn't have any floating
				28	point support, though that isn't the most useful thing to do.
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	29
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	30	The memcpy() dependency is only for CopyFloatToUint32() and friends which only
				31	is needed to avoid type punning when converting the actual float bits to
				32	an unsigned value so the bit shifts and masks can work.
				33	*/
				34
				35	/*
				36	The references used to write this code:
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	37
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	38	- IEEE 754-2008, particularly section 3.6 and 6.2.1
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	39
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	40	- https://en.wikipedia.org/wiki/IEEE_754 and subordinate pages
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	41
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	42	- https://stackoverflow.com/questions/19800415/why-does-ieee-754-reserve-so-many-nan-values
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	43	*/
				44
				45
				46	// ----- Half Precsion -----------
				47	#define HALF_NUM_SIGNIFICAND_BITS (10)
				48	#define HALF_NUM_EXPONENT_BITS (5)
				49	#define HALF_NUM_SIGN_BITS (1)
				50
				51	#define HALF_SIGNIFICAND_SHIFT (0)
				52	#define HALF_EXPONENT_SHIFT (HALF_NUM_SIGNIFICAND_BITS)
				53	#define HALF_SIGN_SHIFT (HALF_NUM_SIGNIFICAND_BITS + HALF_NUM_EXPONENT_BITS)
				54
				55	#define HALF_SIGNIFICAND_MASK (0x3ff) // The lower 10 bits // 0x03ff
				56	#define HALF_EXPONENT_MASK (0x1f << HALF_EXPONENT_SHIFT) // 0x7c00 5 bits of exponent
				57	#define HALF_SIGN_MASK (0x01 << HALF_SIGN_SHIFT) // // 0x80001 bit of sign
				58	#define HALF_QUIET_NAN_BIT (0x01 << (HALF_NUM_SIGNIFICAND_BITS-1)) // 0x0200
				59
				60	/* Biased Biased Unbiased Use
				61	0x00 0 -15 0 and subnormal
				62	0x01 1 -14 Smallest normal exponent
				63	0x1e 30 15 Largest normal exponent
				64	0x1F 31 16 NaN and Infinity */
				65	#define HALF_EXPONENT_BIAS (15)
				66	#define HALF_EXPONENT_MAX (HALF_EXPONENT_BIAS) // 15 Unbiased
				67	#define HALF_EXPONENT_MIN (-HALF_EXPONENT_BIAS+1) // -14 Unbiased
				68	#define HALF_EXPONENT_ZERO (-HALF_EXPONENT_BIAS) // -15 Unbiased
				69	#define HALF_EXPONENT_INF_OR_NAN (HALF_EXPONENT_BIAS+1) // 16 Unbiased
				70
				71
				72	// ------ Single Precision --------
				73	#define SINGLE_NUM_SIGNIFICAND_BITS (23)
				74	#define SINGLE_NUM_EXPONENT_BITS (8)
				75	#define SINGLE_NUM_SIGN_BITS (1)
				76
				77	#define SINGLE_SIGNIFICAND_SHIFT (0)
				78	#define SINGLE_EXPONENT_SHIFT (SINGLE_NUM_SIGNIFICAND_BITS)
				79	#define SINGLE_SIGN_SHIFT (SINGLE_NUM_SIGNIFICAND_BITS + SINGLE_NUM_EXPONENT_BITS)
				80
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	81	#define SINGLE_SIGNIFICAND_MASK (0x7fffffUL) // The lower 23 bits
				82	#define SINGLE_EXPONENT_MASK (0xffUL << SINGLE_EXPONENT_SHIFT) // 8 bits of exponent
				83	#define SINGLE_SIGN_MASK (0x01UL << SINGLE_SIGN_SHIFT) // 1 bit of sign
				84	#define SINGLE_QUIET_NAN_BIT (0x01UL << (SINGLE_NUM_SIGNIFICAND_BITS-1))
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	85
				86	/* Biased Biased Unbiased Use
				87	0x0000 0 -127 0 and subnormal
				88	0x0001 1 -126 Smallest normal exponent
				89	0x7f 127 0 1
				90	0xfe 254 127 Largest normal exponent
				91	0xff 255 128 NaN and Infinity */
				92	#define SINGLE_EXPONENT_BIAS (127)
				93	#define SINGLE_EXPONENT_MAX (SINGLE_EXPONENT_BIAS) // 127 unbiased
				94	#define SINGLE_EXPONENT_MIN (-SINGLE_EXPONENT_BIAS+1) // -126 unbiased
				95	#define SINGLE_EXPONENT_ZERO (-SINGLE_EXPONENT_BIAS) // -127 unbiased
				96	#define SINGLE_EXPONENT_INF_OR_NAN (SINGLE_EXPONENT_BIAS+1) // 128 unbiased
				97
				98
				99	// --------- Double Precision ----------
				100	#define DOUBLE_NUM_SIGNIFICAND_BITS (52)
				101	#define DOUBLE_NUM_EXPONENT_BITS (11)
				102	#define DOUBLE_NUM_SIGN_BITS (1)
				103
				104	#define DOUBLE_SIGNIFICAND_SHIFT (0)
				105	#define DOUBLE_EXPONENT_SHIFT (DOUBLE_NUM_SIGNIFICAND_BITS)
				106	#define DOUBLE_SIGN_SHIFT (DOUBLE_NUM_SIGNIFICAND_BITS + DOUBLE_NUM_EXPONENT_BITS)
				107
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	108	#define DOUBLE_SIGNIFICAND_MASK (0xfffffffffffffULL) // The lower 52 bits
				109	#define DOUBLE_EXPONENT_MASK (0x7ffULL << DOUBLE_EXPONENT_SHIFT) // 11 bits of exponent
				110	#define DOUBLE_SIGN_MASK (0x01ULL << DOUBLE_SIGN_SHIFT) // 1 bit of sign
				111	#define DOUBLE_QUIET_NAN_BIT (0x01ULL << (DOUBLE_NUM_SIGNIFICAND_BITS-1))
				112
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	113
				114	/* Biased Biased Unbiased Use
				115	0x00000000 0 -1023 0 and subnormal
				116	0x00000001 1 -1022 Smallest normal exponent
				117	0x000007fe 2046 1023 Largest normal exponent
				118	0x000007ff 2047 1024 NaN and Infinity */
				119	#define DOUBLE_EXPONENT_BIAS (1023)
				120	#define DOUBLE_EXPONENT_MAX (DOUBLE_EXPONENT_BIAS) // unbiased
				121	#define DOUBLE_EXPONENT_MIN (-DOUBLE_EXPONENT_BIAS+1) // unbiased
				122	#define DOUBLE_EXPONENT_ZERO (-DOUBLE_EXPONENT_BIAS) // unbiased
				123	#define DOUBLE_EXPONENT_INF_OR_NAN (DOUBLE_EXPONENT_BIAS+1) // unbiased
				124
				125
				126
				127	/*
				128	Convenient functions to avoid type punning, compiler warnings and such
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	129	The optimizer reduces them to a simple assignment.
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	130	This is a crusty corner of C. It shouldn't be this hard.
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	131
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	132	These are also in UsefulBuf.h under a different name. They are copied
Laurence Lundblade	3df8c7e	2018-11-02 13:12:41 +0700	[diff] [blame]	133	here to avoid a dependency on UsefulBuf.h. There is no
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	134	object code size impact because these always optimze down to a
				135	simple assignment.
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	136	*/
				137	static inline uint32_t CopyFloatToUint32(float f)
				138	{
				139	uint32_t u32;
				140	memcpy(&u32, &f, sizeof(uint32_t));
				141	return u32;
				142	}
				143
				144	static inline uint64_t CopyDoubleToUint64(double d)
				145	{
				146	uint64_t u64;
				147	memcpy(&u64, &d, sizeof(uint64_t));
				148	return u64;
				149	}
				150
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	151	static inline float CopyUint32ToFloat(uint32_t u32)
				152	{
				153	float f;
				154	memcpy(&f, &u32, sizeof(uint32_t));
				155	return f;
				156	}
				157
Laurence Lundblade	67bd551	2018-11-02 21:44:06 +0700	[diff] [blame]	158	static inline double CopyUint64ToDouble(uint64_t u64)
				159	{
				160	double d;
				161	memcpy(&d, &u64, sizeof(uint64_t));
				162	return d;
				163	}
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	164
				165
				166	// Public function; see ieee754.h
Laurence Lundblade	cc2ed34	2018-09-22 17:29:55 -0700	[diff] [blame]	167	uint16_t IEEE754_FloatToHalf(float f)
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	168	{
				169	// Pull the three parts out of the single-precision float
				170	const uint32_t uSingle = CopyFloatToUint32(f);
				171	const int32_t nSingleUnbiasedExponent = ((uSingle & SINGLE_EXPONENT_MASK) >> SINGLE_EXPONENT_SHIFT) - SINGLE_EXPONENT_BIAS;
				172	const uint32_t uSingleSign = (uSingle & SINGLE_SIGN_MASK) >> SINGLE_SIGN_SHIFT;
				173	const uint32_t uSingleSignificand = uSingle & SINGLE_SIGNIFICAND_MASK;
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	174
				175
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	176	// Now convert the three parts to half-precision.
				177	uint16_t uHalfSign, uHalfSignificand, uHalfBiasedExponent;
				178	if(nSingleUnbiasedExponent == SINGLE_EXPONENT_INF_OR_NAN) {
				179	// +/- Infinity and NaNs -- single biased exponent is 0xff
				180	uHalfBiasedExponent = HALF_EXPONENT_INF_OR_NAN + HALF_EXPONENT_BIAS;
				181	if(!uSingleSignificand) {
				182	// Infinity
				183	uHalfSignificand = 0;
				184	} else {
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	185	// Copy the LBSs of the NaN payload that will fit from the single to the half
				186	uHalfSignificand = uSingleSignificand & (HALF_SIGNIFICAND_MASK & ~HALF_QUIET_NAN_BIT);
				187	if(uSingleSignificand & SINGLE_QUIET_NAN_BIT) {
				188	// It's a qNaN; copy the qNaN bit
				189	uHalfSignificand \|= HALF_QUIET_NAN_BIT;
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	190	} else {
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	191	// It's a sNaN; make sure the significand is not zero so it stays a NaN
				192	// This is needed because not all significand bits are copied from single
				193	if(!uHalfSignificand) {
				194	// Set the LSB. This is what wikipedia shows for sNAN.
				195	uHalfSignificand \|= 0x01;
				196	}
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	197	}
				198	}
				199	} else if(nSingleUnbiasedExponent == SINGLE_EXPONENT_ZERO) {
				200	// 0 or a subnormal number -- singled biased exponent is 0
				201	uHalfBiasedExponent = 0;
				202	uHalfSignificand = 0; // Any subnormal single will be too small to express as a half precision
				203	} else if(nSingleUnbiasedExponent > HALF_EXPONENT_MAX) {
				204	// Exponent is too large to express in half-precision; round up to infinity
				205	uHalfBiasedExponent = HALF_EXPONENT_INF_OR_NAN + HALF_EXPONENT_BIAS;
				206	uHalfSignificand = 0;
				207	} else if(nSingleUnbiasedExponent < HALF_EXPONENT_MIN) {
				208	// Exponent is too small to express in half-precision normal; make it a half-precision subnormal
				209	uHalfBiasedExponent = (uint16_t)(HALF_EXPONENT_ZERO + HALF_EXPONENT_BIAS);
				210	// Difference between single normal exponent and the base exponent of a half subnormal
				211	const uint32_t nExpDiff = -(nSingleUnbiasedExponent - HALF_EXPONENT_MIN);
				212	// Also have to shift the significand by the difference in number of bits between a single and a half significand
				213	const int32_t nSignificandBitsDiff = SINGLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS;
				214	// Add in the 1 that is implied in the significand of a normal number; it needs to be present in a subnormal
				215	const uint32_t uSingleSignificandSubnormal = uSingleSignificand + (0x01L << SINGLE_NUM_SIGNIFICAND_BITS);
				216	uHalfSignificand = uSingleSignificandSubnormal >> (nExpDiff + nSignificandBitsDiff);
				217	} else {
				218	// The normal case
				219	uHalfBiasedExponent = nSingleUnbiasedExponent + HALF_EXPONENT_BIAS;
				220	uHalfSignificand = uSingleSignificand >> (SINGLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
				221	}
				222	uHalfSign = uSingleSign;
				223
				224	// Put the 3 values in the right place for a half precision
				225	const uint16_t uHalfPrecision = uHalfSignificand \|
				226	(uHalfBiasedExponent << HALF_EXPONENT_SHIFT) \|
				227	(uHalfSign << HALF_SIGN_SHIFT);
				228	return uHalfPrecision;
				229	}
				230
				231
				232	// Public function; see ieee754.h
Laurence Lundblade	cc2ed34	2018-09-22 17:29:55 -0700	[diff] [blame]	233	uint16_t IEEE754_DoubleToHalf(double d)
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	234	{
				235	// Pull the three parts out of the double-precision float
				236	const uint64_t uDouble = CopyDoubleToUint64(d);
				237	const int64_t nDoubleUnbiasedExponent = ((uDouble & DOUBLE_EXPONENT_MASK) >> DOUBLE_EXPONENT_SHIFT) - DOUBLE_EXPONENT_BIAS;
				238	const uint64_t uDoubleSign = (uDouble & DOUBLE_SIGN_MASK) >> DOUBLE_SIGN_SHIFT;
				239	const uint64_t uDoubleSignificand = uDouble & DOUBLE_SIGNIFICAND_MASK;
				240
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	241
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	242	// Now convert the three parts to half-precision.
				243	uint16_t uHalfSign, uHalfSignificand, uHalfBiasedExponent;
				244	if(nDoubleUnbiasedExponent == DOUBLE_EXPONENT_INF_OR_NAN) {
				245	// +/- Infinity and NaNs -- single biased exponent is 0xff
				246	uHalfBiasedExponent = HALF_EXPONENT_INF_OR_NAN + HALF_EXPONENT_BIAS;
				247	if(!uDoubleSignificand) {
				248	// Infinity
				249	uHalfSignificand = 0;
				250	} else {
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	251	// Copy the LBSs of the NaN payload that will fit from the double to the half
				252	uHalfSignificand = uDoubleSignificand & (HALF_SIGNIFICAND_MASK & ~HALF_QUIET_NAN_BIT);
				253	if(uDoubleSignificand & DOUBLE_QUIET_NAN_BIT) {
				254	// It's a qNaN; copy the qNaN bit
				255	uHalfSignificand \|= HALF_QUIET_NAN_BIT;
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	256	} else {
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	257	// It's an sNaN; make sure the significand is not zero so it stays a NaN
				258	// This is needed because not all significand bits are copied from single
				259	if(!uHalfSignificand) {
				260	// Set the LSB. This is what wikipedia shows for sNAN.
				261	uHalfSignificand \|= 0x01;
				262	}
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	263	}
				264	}
				265	} else if(nDoubleUnbiasedExponent == DOUBLE_EXPONENT_ZERO) {
				266	// 0 or a subnormal number -- double biased exponent is 0
				267	uHalfBiasedExponent = 0;
				268	uHalfSignificand = 0; // Any subnormal single will be too small to express as a half precision; TODO, is this really true?
				269	} else if(nDoubleUnbiasedExponent > HALF_EXPONENT_MAX) {
				270	// Exponent is too large to express in half-precision; round up to infinity; TODO, is this really true?
				271	uHalfBiasedExponent = HALF_EXPONENT_INF_OR_NAN + HALF_EXPONENT_BIAS;
				272	uHalfSignificand = 0;
				273	} else if(nDoubleUnbiasedExponent < HALF_EXPONENT_MIN) {
				274	// Exponent is too small to express in half-precision; round down to zero
				275	uHalfBiasedExponent = (uint16_t)(HALF_EXPONENT_ZERO + HALF_EXPONENT_BIAS);
				276	// Difference between double normal exponent and the base exponent of a half subnormal
				277	const uint64_t nExpDiff = -(nDoubleUnbiasedExponent - HALF_EXPONENT_MIN);
				278	// Also have to shift the significand by the difference in number of bits between a double and a half significand
				279	const int64_t nSignificandBitsDiff = DOUBLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS;
				280	// Add in the 1 that is implied in the significand of a normal number; it needs to be present in a subnormal
Laurence Lundblade	f280195	2018-12-17 10:40:29 -0800	[diff] [blame]	281	const uint64_t uDoubleSignificandSubnormal = uDoubleSignificand + (0x01ULL << DOUBLE_NUM_SIGNIFICAND_BITS);
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	282	uHalfSignificand = uDoubleSignificandSubnormal >> (nExpDiff + nSignificandBitsDiff);
				283	} else {
				284	// The normal case
				285	uHalfBiasedExponent = nDoubleUnbiasedExponent + HALF_EXPONENT_BIAS;
				286	uHalfSignificand = uDoubleSignificand >> (DOUBLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
				287	}
				288	uHalfSign = uDoubleSign;
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	289
				290
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	291	// Put the 3 values in the right place for a half precision
				292	const uint16_t uHalfPrecision = uHalfSignificand \|
				293	(uHalfBiasedExponent << HALF_EXPONENT_SHIFT) \|
				294	(uHalfSign << HALF_SIGN_SHIFT);
				295	return uHalfPrecision;
				296	}
				297
				298
				299	// Public function; see ieee754.h
				300	float IEEE754_HalfToFloat(uint16_t uHalfPrecision)
				301	{
				302	// Pull out the three parts of the half-precision float
				303	const uint16_t uHalfSignificand = uHalfPrecision & HALF_SIGNIFICAND_MASK;
				304	const int16_t nHalfUnBiasedExponent = ((uHalfPrecision & HALF_EXPONENT_MASK) >> HALF_EXPONENT_SHIFT) - HALF_EXPONENT_BIAS;
				305	const uint16_t uHalfSign = (uHalfPrecision & HALF_SIGN_MASK) >> HALF_SIGN_SHIFT;
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	306
				307
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	308	// Make the three parts of the single-precision number
				309	uint32_t uSingleSignificand, uSingleSign, uSingleBiasedExponent;
				310	if(nHalfUnBiasedExponent == HALF_EXPONENT_ZERO) {
				311	// 0 or subnormal
				312	if(uHalfSignificand) {
				313	// Subnormal case
				314	uSingleBiasedExponent = -HALF_EXPONENT_BIAS + SINGLE_EXPONENT_BIAS +1;
				315	// A half-precision subnormal can always be converted to a normal single-precision float because the ranges line up
				316	uSingleSignificand = uHalfSignificand;
				317	// Shift bits from right of the decimal to left, reducing the exponent by 1 each time
				318	do {
				319	uSingleSignificand <<= 1;
				320	uSingleBiasedExponent--;
				321	} while ((uSingleSignificand & 0x400) == 0);
				322	uSingleSignificand &= HALF_SIGNIFICAND_MASK;
				323	uSingleSignificand <<= (SINGLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
				324	} else {
				325	// Just zero
				326	uSingleBiasedExponent = SINGLE_EXPONENT_ZERO + SINGLE_EXPONENT_BIAS;
				327	uSingleSignificand = 0;
				328	}
				329	} else if(nHalfUnBiasedExponent == HALF_EXPONENT_INF_OR_NAN) {
				330	// NaN or Inifinity
				331	uSingleBiasedExponent = SINGLE_EXPONENT_INF_OR_NAN + SINGLE_EXPONENT_BIAS;
				332	if(uHalfSignificand) {
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	333	// NaN
				334	// First preserve the NaN payload from half to single
				335	uSingleSignificand = uHalfSignificand & ~HALF_QUIET_NAN_BIT;
				336	if(uHalfSignificand & HALF_QUIET_NAN_BIT) {
				337	// Next, set qNaN if needed since half qNaN bit is not copied above
				338	uSingleSignificand \|= SINGLE_QUIET_NAN_BIT;
				339	}
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	340	} else {
				341	// Infinity
				342	uSingleSignificand = 0;
				343	}
				344	} else {
				345	// Normal number
				346	uSingleBiasedExponent = nHalfUnBiasedExponent + SINGLE_EXPONENT_BIAS;
				347	uSingleSignificand = uHalfSignificand << (SINGLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
				348	}
				349	uSingleSign = uHalfSign;
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	350
				351
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	352	// Shift the three parts of the single precision into place
				353	const uint32_t uSinglePrecision = uSingleSignificand \|
				354	(uSingleBiasedExponent << SINGLE_EXPONENT_SHIFT) \|
				355	(uSingleSign << SINGLE_SIGN_SHIFT);
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	356
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	357	return CopyUint32ToFloat(uSinglePrecision);
				358	}
				359
				360
Laurence Lundblade	67bd551	2018-11-02 21:44:06 +0700	[diff] [blame]	361	// Public function; see ieee754.h
				362	double IEEE754_HalfToDouble(uint16_t uHalfPrecision)
				363	{
				364	// Pull out the three parts of the half-precision float
				365	const uint16_t uHalfSignificand = uHalfPrecision & HALF_SIGNIFICAND_MASK;
				366	const int16_t nHalfUnBiasedExponent = ((uHalfPrecision & HALF_EXPONENT_MASK) >> HALF_EXPONENT_SHIFT) - HALF_EXPONENT_BIAS;
				367	const uint16_t uHalfSign = (uHalfPrecision & HALF_SIGN_MASK) >> HALF_SIGN_SHIFT;
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	368
				369
Laurence Lundblade	67bd551	2018-11-02 21:44:06 +0700	[diff] [blame]	370	// Make the three parts of hte single-precision number
				371	uint64_t uDoubleSignificand, uDoubleSign, uDoubleBiasedExponent;
				372	if(nHalfUnBiasedExponent == HALF_EXPONENT_ZERO) {
				373	// 0 or subnormal
				374	uDoubleBiasedExponent = DOUBLE_EXPONENT_ZERO + DOUBLE_EXPONENT_BIAS;
				375	if(uHalfSignificand) {
				376	// Subnormal case
				377	uDoubleBiasedExponent = -HALF_EXPONENT_BIAS + DOUBLE_EXPONENT_BIAS +1;
				378	// A half-precision subnormal can always be converted to a normal double-precision float because the ranges line up
				379	uDoubleSignificand = uHalfSignificand;
				380	// Shift bits from right of the decimal to left, reducing the exponent by 1 each time
				381	do {
				382	uDoubleSignificand <<= 1;
				383	uDoubleBiasedExponent--;
				384	} while ((uDoubleSignificand & 0x400) == 0);
				385	uDoubleSignificand &= HALF_SIGNIFICAND_MASK;
				386	uDoubleSignificand <<= (DOUBLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
				387	} else {
				388	// Just zero
				389	uDoubleSignificand = 0;
				390	}
				391	} else if(nHalfUnBiasedExponent == HALF_EXPONENT_INF_OR_NAN) {
				392	// NaN or Inifinity
				393	uDoubleBiasedExponent = DOUBLE_EXPONENT_INF_OR_NAN + DOUBLE_EXPONENT_BIAS;
				394	if(uHalfSignificand) {
				395	// NaN
				396	// First preserve the NaN payload from half to single
				397	uDoubleSignificand = uHalfSignificand & ~HALF_QUIET_NAN_BIT;
				398	if(uHalfSignificand & HALF_QUIET_NAN_BIT) {
				399	// Next, set qNaN if needed since half qNaN bit is not copied above
				400	uDoubleSignificand \|= DOUBLE_QUIET_NAN_BIT;
				401	}
				402	} else {
				403	// Infinity
				404	uDoubleSignificand = 0;
				405	}
				406	} else {
				407	// Normal number
				408	uDoubleBiasedExponent = nHalfUnBiasedExponent + DOUBLE_EXPONENT_BIAS;
				409	uDoubleSignificand = (uint64_t)uHalfSignificand << (DOUBLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
				410	}
				411	uDoubleSign = uHalfSign;
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	412
				413
Laurence Lundblade	67bd551	2018-11-02 21:44:06 +0700	[diff] [blame]	414	// Shift the 3 parts into place as a double-precision
				415	const uint64_t uDouble = uDoubleSignificand \|
				416	(uDoubleBiasedExponent << DOUBLE_EXPONENT_SHIFT) \|
				417	(uDoubleSign << DOUBLE_SIGN_SHIFT);
				418	return CopyUint64ToDouble(uDouble);
				419	}
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	420
				421
				422	// Public function; see ieee754.h
				423	IEEE754_union IEEE754_FloatToSmallest(float f)
				424	{
				425	IEEE754_union result;
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	426
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	427	// Pull the neeed two parts out of the single-precision float
				428	const uint32_t uSingle = CopyFloatToUint32(f);
				429	const int32_t nSingleExponent = ((uSingle & SINGLE_EXPONENT_MASK) >> SINGLE_EXPONENT_SHIFT) - SINGLE_EXPONENT_BIAS;
				430	const uint32_t uSingleSignificand = uSingle & SINGLE_SIGNIFICAND_MASK;
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	431
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	432	// Bit mask that is the significand bits that would be lost when converting
				433	// from single-precision to half-precision
				434	const uint64_t uDroppedSingleBits = SINGLE_SIGNIFICAND_MASK >> HALF_NUM_SIGNIFICAND_BITS;
				435
				436	// Optimizer will re organize so there is only one call to IEEE754_FloatToHalf()
				437	if(uSingle == 0) {
				438	// Value is 0.0000, not a a subnormal
Laurence Lundblade	577d821	2018-11-01 14:04:08 +0700	[diff] [blame]	439	result.uSize = IEEE754_UNION_IS_HALF;
				440	result.uValue = IEEE754_FloatToHalf(f);
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	441	} else if(nSingleExponent == SINGLE_EXPONENT_INF_OR_NAN) {
				442	// NaN, +/- infinity
Laurence Lundblade	577d821	2018-11-01 14:04:08 +0700	[diff] [blame]	443	result.uSize = IEEE754_UNION_IS_HALF;
				444	result.uValue = IEEE754_FloatToHalf(f);
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	445	} else if((nSingleExponent >= HALF_EXPONENT_MIN) && nSingleExponent <= HALF_EXPONENT_MAX && (!(uSingleSignificand & uDroppedSingleBits))) {
				446	// Normal number in exponent range and precision won't be lost
Laurence Lundblade	577d821	2018-11-01 14:04:08 +0700	[diff] [blame]	447	result.uSize = IEEE754_UNION_IS_HALF;
				448	result.uValue = IEEE754_FloatToHalf(f);
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	449	} else {
				450	// Subnormal, exponent out of range, or precision will be lost
Laurence Lundblade	577d821	2018-11-01 14:04:08 +0700	[diff] [blame]	451	result.uSize = IEEE754_UNION_IS_SINGLE;
				452	result.uValue = uSingle;
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	453	}
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	454
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	455	return result;
				456	}
				457
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	458	// Public function; see ieee754.h
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	459	IEEE754_union IEEE754_DoubleToSmallestInternal(double d, int bAllowHalfPrecision)
				460	{
				461	IEEE754_union result;
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	462
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	463	// Pull the needed two parts out of the double-precision float
				464	const uint64_t uDouble = CopyDoubleToUint64(d);
				465	const int64_t nDoubleExponent = ((uDouble & DOUBLE_EXPONENT_MASK) >> DOUBLE_EXPONENT_SHIFT) - DOUBLE_EXPONENT_BIAS;
				466	const uint64_t uDoubleSignificand = uDouble & DOUBLE_SIGNIFICAND_MASK;
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	467
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	468	// Masks to check whether dropped significand bits are zero or not
				469	const uint64_t uDroppedDoubleBits = DOUBLE_SIGNIFICAND_MASK >> HALF_NUM_SIGNIFICAND_BITS;
				470	const uint64_t uDroppedSingleBits = DOUBLE_SIGNIFICAND_MASK >> SINGLE_NUM_SIGNIFICAND_BITS;
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	471
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	472	// The various cases
Laurence Lundblade	d711fb2	2018-09-26 14:35:22 -0700	[diff] [blame]	473	if(d == 0.0) { // Take care of positive and negative zero
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	474	// Value is 0.0000, not a a subnormal
Laurence Lundblade	577d821	2018-11-01 14:04:08 +0700	[diff] [blame]	475	result.uSize = IEEE754_UNION_IS_HALF;
				476	result.uValue = IEEE754_DoubleToHalf(d);
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	477	} else if(nDoubleExponent == DOUBLE_EXPONENT_INF_OR_NAN) {
				478	// NaN, +/- infinity
Laurence Lundblade	577d821	2018-11-01 14:04:08 +0700	[diff] [blame]	479	result.uSize = IEEE754_UNION_IS_HALF;
				480	result.uValue = IEEE754_DoubleToHalf(d);
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	481	} else if(bAllowHalfPrecision && (nDoubleExponent >= HALF_EXPONENT_MIN) && nDoubleExponent <= HALF_EXPONENT_MAX && (!(uDoubleSignificand & uDroppedDoubleBits))) {
				482	// Can convert to half without precision loss
Laurence Lundblade	577d821	2018-11-01 14:04:08 +0700	[diff] [blame]	483	result.uSize = IEEE754_UNION_IS_HALF;
				484	result.uValue = IEEE754_DoubleToHalf(d);
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	485	} else if((nDoubleExponent >= SINGLE_EXPONENT_MIN) && nDoubleExponent <= SINGLE_EXPONENT_MAX && (!(uDoubleSignificand & uDroppedSingleBits))) {
				486	// Can convert to single without precision loss
Laurence Lundblade	577d821	2018-11-01 14:04:08 +0700	[diff] [blame]	487	result.uSize = IEEE754_UNION_IS_SINGLE;
				488	result.uValue = CopyFloatToUint32((float)d);
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	489	} else {
				490	// Can't convert without precision loss
Laurence Lundblade	577d821	2018-11-01 14:04:08 +0700	[diff] [blame]	491	result.uSize = IEEE754_UNION_IS_DOUBLE;
				492	result.uValue = uDouble;
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	493	}
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	494
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	495	return result;
				496	}
				497