Blame - src/ieee754.c - mirror/QCBOR - TrustedFirmware Git Browser

blob: e221f3ddeb8085faf8f7aaff5dd65537fd1d53c0 [file] [log] [blame]

Laurence Lundblade	cc2ed34	2018-09-22 17:29:55 -0700	[diff] [blame]	1	/*==============================================================================
Laurence Lundblade	cc2ed34	2018-09-22 17:29:55 -0700	[diff] [blame]	2
Laurence Lundblade	d92a616	2018-11-01 11:38:35 +0700	[diff] [blame]	3	Copyright (c) 2018, Laurence Lundblade.
				4	All rights reserved.
Laurence Lundblade	cc2ed34	2018-09-22 17:29:55 -0700	[diff] [blame]	5
Laurence Lundblade	d92a616	2018-11-01 11:38:35 +0700	[diff] [blame]	6	Redistribution and use in source and binary forms, with or without
				7	modification, are permitted provided that the following conditions are
				8	met:
				9	* Redistributions of source code must retain the above copyright
				10	notice, this list of conditions and the following disclaimer.
				11	* Redistributions in binary form must reproduce the above
				12	copyright notice, this list of conditions and the following
				13	disclaimer in the documentation and/or other materials provided
				14	with the distribution.
				15	* Neither the name of The Linux Foundation nor the names of its
				16	contributors, nor the name "Laurence Lundblade" may be used to
				17	endorse or promote products derived from this software without
				18	specific prior written permission.
Laurence Lundblade	cc2ed34	2018-09-22 17:29:55 -0700	[diff] [blame]	19
Laurence Lundblade	d92a616	2018-11-01 11:38:35 +0700	[diff] [blame]	20	THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED
				21	WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
				22	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT
				23	ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
				24	BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
				25	CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
				26	SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
				27	BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
				28	WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
				29	OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
				30	IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Laurence Lundblade	cc2ed34	2018-09-22 17:29:55 -0700	[diff] [blame]	31	==============================================================================*/
				32
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	33	//
				34	// ieee754.c
				35	// Indefinite
				36	//
				37	// Created by Laurence Lundblade on 7/23/18.
				38	// Copyright © 2018 Laurence Lundblade. All rights reserved.
				39	//
				40
				41	#include "ieee754.h"
				42	#include <string.h> // For memcpy()
				43
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	44
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	45	/*
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	46	This code is written for clarity and verifiability, not for size, on the assumption
				47	that the optimizer will do a good job. The LLVM optimizer, -Os, does seem to do the
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	48	job and the resulting object code is smaller from combining code for the many different
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	49	cases (normal, subnormal, infinity, zero...) for the conversions.
				50
Laurence Lundblade	570fab5	2018-10-13 18:28:27 +0800	[diff] [blame]	51	Dead stripping is also really helpful to get code size down when floating point
				52	encoding is not needed.
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	53
Laurence Lundblade	570fab5	2018-10-13 18:28:27 +0800	[diff] [blame]	54	This code works solely using shifts and masks and thus has no dependency on
				55	any math libraries. It can even work if the CPU doesn't have any floating
				56	point support, though that isn't the most useful thing to do.
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	57
				58	The memcpy() dependency is only for CopyFloatToUint32() and friends which only
				59	is needed to avoid type punning when converting the actual float bits to
				60	an unsigned value so the bit shifts and masks can work.
				61	*/
				62
				63	/*
				64	The references used to write this code:
				65
				66	- IEEE 754-2008, particularly section 3.6 and 6.2.1
				67
				68	- https://en.wikipedia.org/wiki/IEEE_754 and subordinate pages
				69
				70	- https://stackoverflow.com/questions/19800415/why-does-ieee-754-reserve-so-many-nan-values
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	71	*/
				72
				73
				74	// ----- Half Precsion -----------
				75	#define HALF_NUM_SIGNIFICAND_BITS (10)
				76	#define HALF_NUM_EXPONENT_BITS (5)
				77	#define HALF_NUM_SIGN_BITS (1)
				78
				79	#define HALF_SIGNIFICAND_SHIFT (0)
				80	#define HALF_EXPONENT_SHIFT (HALF_NUM_SIGNIFICAND_BITS)
				81	#define HALF_SIGN_SHIFT (HALF_NUM_SIGNIFICAND_BITS + HALF_NUM_EXPONENT_BITS)
				82
				83	#define HALF_SIGNIFICAND_MASK (0x3ff) // The lower 10 bits // 0x03ff
				84	#define HALF_EXPONENT_MASK (0x1f << HALF_EXPONENT_SHIFT) // 0x7c00 5 bits of exponent
				85	#define HALF_SIGN_MASK (0x01 << HALF_SIGN_SHIFT) // // 0x80001 bit of sign
				86	#define HALF_QUIET_NAN_BIT (0x01 << (HALF_NUM_SIGNIFICAND_BITS-1)) // 0x0200
				87
				88	/* Biased Biased Unbiased Use
				89	0x00 0 -15 0 and subnormal
				90	0x01 1 -14 Smallest normal exponent
				91	0x1e 30 15 Largest normal exponent
				92	0x1F 31 16 NaN and Infinity */
				93	#define HALF_EXPONENT_BIAS (15)
				94	#define HALF_EXPONENT_MAX (HALF_EXPONENT_BIAS) // 15 Unbiased
				95	#define HALF_EXPONENT_MIN (-HALF_EXPONENT_BIAS+1) // -14 Unbiased
				96	#define HALF_EXPONENT_ZERO (-HALF_EXPONENT_BIAS) // -15 Unbiased
				97	#define HALF_EXPONENT_INF_OR_NAN (HALF_EXPONENT_BIAS+1) // 16 Unbiased
				98
				99
				100	// ------ Single Precision --------
				101	#define SINGLE_NUM_SIGNIFICAND_BITS (23)
				102	#define SINGLE_NUM_EXPONENT_BITS (8)
				103	#define SINGLE_NUM_SIGN_BITS (1)
				104
				105	#define SINGLE_SIGNIFICAND_SHIFT (0)
				106	#define SINGLE_EXPONENT_SHIFT (SINGLE_NUM_SIGNIFICAND_BITS)
				107	#define SINGLE_SIGN_SHIFT (SINGLE_NUM_SIGNIFICAND_BITS + SINGLE_NUM_EXPONENT_BITS)
				108
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	109	#define SINGLE_SIGNIFICAND_MASK (0x7fffffUL) // The lower 23 bits
				110	#define SINGLE_EXPONENT_MASK (0xffUL << SINGLE_EXPONENT_SHIFT) // 8 bits of exponent
				111	#define SINGLE_SIGN_MASK (0x01UL << SINGLE_SIGN_SHIFT) // 1 bit of sign
				112	#define SINGLE_QUIET_NAN_BIT (0x01UL << (SINGLE_NUM_SIGNIFICAND_BITS-1))
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	113
				114	/* Biased Biased Unbiased Use
				115	0x0000 0 -127 0 and subnormal
				116	0x0001 1 -126 Smallest normal exponent
				117	0x7f 127 0 1
				118	0xfe 254 127 Largest normal exponent
				119	0xff 255 128 NaN and Infinity */
				120	#define SINGLE_EXPONENT_BIAS (127)
				121	#define SINGLE_EXPONENT_MAX (SINGLE_EXPONENT_BIAS) // 127 unbiased
				122	#define SINGLE_EXPONENT_MIN (-SINGLE_EXPONENT_BIAS+1) // -126 unbiased
				123	#define SINGLE_EXPONENT_ZERO (-SINGLE_EXPONENT_BIAS) // -127 unbiased
				124	#define SINGLE_EXPONENT_INF_OR_NAN (SINGLE_EXPONENT_BIAS+1) // 128 unbiased
				125
				126
				127	// --------- Double Precision ----------
				128	#define DOUBLE_NUM_SIGNIFICAND_BITS (52)
				129	#define DOUBLE_NUM_EXPONENT_BITS (11)
				130	#define DOUBLE_NUM_SIGN_BITS (1)
				131
				132	#define DOUBLE_SIGNIFICAND_SHIFT (0)
				133	#define DOUBLE_EXPONENT_SHIFT (DOUBLE_NUM_SIGNIFICAND_BITS)
				134	#define DOUBLE_SIGN_SHIFT (DOUBLE_NUM_SIGNIFICAND_BITS + DOUBLE_NUM_EXPONENT_BITS)
				135
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	136	#define DOUBLE_SIGNIFICAND_MASK (0xfffffffffffffULL) // The lower 52 bits
				137	#define DOUBLE_EXPONENT_MASK (0x7ffULL << DOUBLE_EXPONENT_SHIFT) // 11 bits of exponent
				138	#define DOUBLE_SIGN_MASK (0x01ULL << DOUBLE_SIGN_SHIFT) // 1 bit of sign
				139	#define DOUBLE_QUIET_NAN_BIT (0x01ULL << (DOUBLE_NUM_SIGNIFICAND_BITS-1))
				140
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	141
				142	/* Biased Biased Unbiased Use
				143	0x00000000 0 -1023 0 and subnormal
				144	0x00000001 1 -1022 Smallest normal exponent
				145	0x000007fe 2046 1023 Largest normal exponent
				146	0x000007ff 2047 1024 NaN and Infinity */
				147	#define DOUBLE_EXPONENT_BIAS (1023)
				148	#define DOUBLE_EXPONENT_MAX (DOUBLE_EXPONENT_BIAS) // unbiased
				149	#define DOUBLE_EXPONENT_MIN (-DOUBLE_EXPONENT_BIAS+1) // unbiased
				150	#define DOUBLE_EXPONENT_ZERO (-DOUBLE_EXPONENT_BIAS) // unbiased
				151	#define DOUBLE_EXPONENT_INF_OR_NAN (DOUBLE_EXPONENT_BIAS+1) // unbiased
				152
				153
				154
				155	/*
				156	Convenient functions to avoid type punning, compiler warnings and such
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	157	The optimizer reduces them to a simple assignment.
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	158	This is a crusty corner of C. It shouldn't be this hard.
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	159
				160	These are also in UsefulBuf.h under a different name. They are copied
				161	here because to avoid a dependency on UsefulBuf.h. There is no
				162	object code size impact because these always optimze down to a
				163	simple assignment.
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	164	*/
				165	static inline uint32_t CopyFloatToUint32(float f)
				166	{
				167	uint32_t u32;
				168	memcpy(&u32, &f, sizeof(uint32_t));
				169	return u32;
				170	}
				171
				172	static inline uint64_t CopyDoubleToUint64(double d)
				173	{
				174	uint64_t u64;
				175	memcpy(&u64, &d, sizeof(uint64_t));
				176	return u64;
				177	}
				178
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	179	static inline float CopyUint32ToFloat(uint32_t u32)
				180	{
				181	float f;
				182	memcpy(&f, &u32, sizeof(uint32_t));
				183	return f;
				184	}
				185
				186
				187
				188	// Public function; see ieee754.h
Laurence Lundblade	cc2ed34	2018-09-22 17:29:55 -0700	[diff] [blame]	189	uint16_t IEEE754_FloatToHalf(float f)
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	190	{
				191	// Pull the three parts out of the single-precision float
				192	const uint32_t uSingle = CopyFloatToUint32(f);
				193	const int32_t nSingleUnbiasedExponent = ((uSingle & SINGLE_EXPONENT_MASK) >> SINGLE_EXPONENT_SHIFT) - SINGLE_EXPONENT_BIAS;
				194	const uint32_t uSingleSign = (uSingle & SINGLE_SIGN_MASK) >> SINGLE_SIGN_SHIFT;
				195	const uint32_t uSingleSignificand = uSingle & SINGLE_SIGNIFICAND_MASK;
				196
				197
				198	// Now convert the three parts to half-precision.
				199	uint16_t uHalfSign, uHalfSignificand, uHalfBiasedExponent;
				200	if(nSingleUnbiasedExponent == SINGLE_EXPONENT_INF_OR_NAN) {
				201	// +/- Infinity and NaNs -- single biased exponent is 0xff
				202	uHalfBiasedExponent = HALF_EXPONENT_INF_OR_NAN + HALF_EXPONENT_BIAS;
				203	if(!uSingleSignificand) {
				204	// Infinity
				205	uHalfSignificand = 0;
				206	} else {
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	207	// Copy the LBSs of the NaN payload that will fit from the single to the half
				208	uHalfSignificand = uSingleSignificand & (HALF_SIGNIFICAND_MASK & ~HALF_QUIET_NAN_BIT);
				209	if(uSingleSignificand & SINGLE_QUIET_NAN_BIT) {
				210	// It's a qNaN; copy the qNaN bit
				211	uHalfSignificand \|= HALF_QUIET_NAN_BIT;
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	212	} else {
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	213	// It's a sNaN; make sure the significand is not zero so it stays a NaN
				214	// This is needed because not all significand bits are copied from single
				215	if(!uHalfSignificand) {
				216	// Set the LSB. This is what wikipedia shows for sNAN.
				217	uHalfSignificand \|= 0x01;
				218	}
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	219	}
				220	}
				221	} else if(nSingleUnbiasedExponent == SINGLE_EXPONENT_ZERO) {
				222	// 0 or a subnormal number -- singled biased exponent is 0
				223	uHalfBiasedExponent = 0;
				224	uHalfSignificand = 0; // Any subnormal single will be too small to express as a half precision
				225	} else if(nSingleUnbiasedExponent > HALF_EXPONENT_MAX) {
				226	// Exponent is too large to express in half-precision; round up to infinity
				227	uHalfBiasedExponent = HALF_EXPONENT_INF_OR_NAN + HALF_EXPONENT_BIAS;
				228	uHalfSignificand = 0;
				229	} else if(nSingleUnbiasedExponent < HALF_EXPONENT_MIN) {
				230	// Exponent is too small to express in half-precision normal; make it a half-precision subnormal
				231	uHalfBiasedExponent = (uint16_t)(HALF_EXPONENT_ZERO + HALF_EXPONENT_BIAS);
				232	// Difference between single normal exponent and the base exponent of a half subnormal
				233	const uint32_t nExpDiff = -(nSingleUnbiasedExponent - HALF_EXPONENT_MIN);
				234	// Also have to shift the significand by the difference in number of bits between a single and a half significand
				235	const int32_t nSignificandBitsDiff = SINGLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS;
				236	// Add in the 1 that is implied in the significand of a normal number; it needs to be present in a subnormal
				237	const uint32_t uSingleSignificandSubnormal = uSingleSignificand + (0x01L << SINGLE_NUM_SIGNIFICAND_BITS);
				238	uHalfSignificand = uSingleSignificandSubnormal >> (nExpDiff + nSignificandBitsDiff);
				239	} else {
				240	// The normal case
				241	uHalfBiasedExponent = nSingleUnbiasedExponent + HALF_EXPONENT_BIAS;
				242	uHalfSignificand = uSingleSignificand >> (SINGLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
				243	}
				244	uHalfSign = uSingleSign;
				245
				246	// Put the 3 values in the right place for a half precision
				247	const uint16_t uHalfPrecision = uHalfSignificand \|
				248	(uHalfBiasedExponent << HALF_EXPONENT_SHIFT) \|
				249	(uHalfSign << HALF_SIGN_SHIFT);
				250	return uHalfPrecision;
				251	}
				252
				253
				254	// Public function; see ieee754.h
Laurence Lundblade	cc2ed34	2018-09-22 17:29:55 -0700	[diff] [blame]	255	uint16_t IEEE754_DoubleToHalf(double d)
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	256	{
				257	// Pull the three parts out of the double-precision float
				258	const uint64_t uDouble = CopyDoubleToUint64(d);
				259	const int64_t nDoubleUnbiasedExponent = ((uDouble & DOUBLE_EXPONENT_MASK) >> DOUBLE_EXPONENT_SHIFT) - DOUBLE_EXPONENT_BIAS;
				260	const uint64_t uDoubleSign = (uDouble & DOUBLE_SIGN_MASK) >> DOUBLE_SIGN_SHIFT;
				261	const uint64_t uDoubleSignificand = uDouble & DOUBLE_SIGNIFICAND_MASK;
				262
				263
				264	// Now convert the three parts to half-precision.
				265	uint16_t uHalfSign, uHalfSignificand, uHalfBiasedExponent;
				266	if(nDoubleUnbiasedExponent == DOUBLE_EXPONENT_INF_OR_NAN) {
				267	// +/- Infinity and NaNs -- single biased exponent is 0xff
				268	uHalfBiasedExponent = HALF_EXPONENT_INF_OR_NAN + HALF_EXPONENT_BIAS;
				269	if(!uDoubleSignificand) {
				270	// Infinity
				271	uHalfSignificand = 0;
				272	} else {
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	273	// Copy the LBSs of the NaN payload that will fit from the double to the half
				274	uHalfSignificand = uDoubleSignificand & (HALF_SIGNIFICAND_MASK & ~HALF_QUIET_NAN_BIT);
				275	if(uDoubleSignificand & DOUBLE_QUIET_NAN_BIT) {
				276	// It's a qNaN; copy the qNaN bit
				277	uHalfSignificand \|= HALF_QUIET_NAN_BIT;
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	278	} else {
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	279	// It's an sNaN; make sure the significand is not zero so it stays a NaN
				280	// This is needed because not all significand bits are copied from single
				281	if(!uHalfSignificand) {
				282	// Set the LSB. This is what wikipedia shows for sNAN.
				283	uHalfSignificand \|= 0x01;
				284	}
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	285	}
				286	}
				287	} else if(nDoubleUnbiasedExponent == DOUBLE_EXPONENT_ZERO) {
				288	// 0 or a subnormal number -- double biased exponent is 0
				289	uHalfBiasedExponent = 0;
				290	uHalfSignificand = 0; // Any subnormal single will be too small to express as a half precision; TODO, is this really true?
				291	} else if(nDoubleUnbiasedExponent > HALF_EXPONENT_MAX) {
				292	// Exponent is too large to express in half-precision; round up to infinity; TODO, is this really true?
				293	uHalfBiasedExponent = HALF_EXPONENT_INF_OR_NAN + HALF_EXPONENT_BIAS;
				294	uHalfSignificand = 0;
				295	} else if(nDoubleUnbiasedExponent < HALF_EXPONENT_MIN) {
				296	// Exponent is too small to express in half-precision; round down to zero
				297	uHalfBiasedExponent = (uint16_t)(HALF_EXPONENT_ZERO + HALF_EXPONENT_BIAS);
				298	// Difference between double normal exponent and the base exponent of a half subnormal
				299	const uint64_t nExpDiff = -(nDoubleUnbiasedExponent - HALF_EXPONENT_MIN);
				300	// Also have to shift the significand by the difference in number of bits between a double and a half significand
				301	const int64_t nSignificandBitsDiff = DOUBLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS;
				302	// Add in the 1 that is implied in the significand of a normal number; it needs to be present in a subnormal
				303	const uint64_t uDoubleSignificandSubnormal = uDoubleSignificand + (0x01L << DOUBLE_NUM_SIGNIFICAND_BITS);
				304	uHalfSignificand = uDoubleSignificandSubnormal >> (nExpDiff + nSignificandBitsDiff);
				305	} else {
				306	// The normal case
				307	uHalfBiasedExponent = nDoubleUnbiasedExponent + HALF_EXPONENT_BIAS;
				308	uHalfSignificand = uDoubleSignificand >> (DOUBLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
				309	}
				310	uHalfSign = uDoubleSign;
				311
				312
				313	// Put the 3 values in the right place for a half precision
				314	const uint16_t uHalfPrecision = uHalfSignificand \|
				315	(uHalfBiasedExponent << HALF_EXPONENT_SHIFT) \|
				316	(uHalfSign << HALF_SIGN_SHIFT);
				317	return uHalfPrecision;
				318	}
				319
				320
				321	// Public function; see ieee754.h
				322	float IEEE754_HalfToFloat(uint16_t uHalfPrecision)
				323	{
				324	// Pull out the three parts of the half-precision float
				325	const uint16_t uHalfSignificand = uHalfPrecision & HALF_SIGNIFICAND_MASK;
				326	const int16_t nHalfUnBiasedExponent = ((uHalfPrecision & HALF_EXPONENT_MASK) >> HALF_EXPONENT_SHIFT) - HALF_EXPONENT_BIAS;
				327	const uint16_t uHalfSign = (uHalfPrecision & HALF_SIGN_MASK) >> HALF_SIGN_SHIFT;
				328
				329
				330	// Make the three parts of the single-precision number
				331	uint32_t uSingleSignificand, uSingleSign, uSingleBiasedExponent;
				332	if(nHalfUnBiasedExponent == HALF_EXPONENT_ZERO) {
				333	// 0 or subnormal
				334	if(uHalfSignificand) {
				335	// Subnormal case
				336	uSingleBiasedExponent = -HALF_EXPONENT_BIAS + SINGLE_EXPONENT_BIAS +1;
				337	// A half-precision subnormal can always be converted to a normal single-precision float because the ranges line up
				338	uSingleSignificand = uHalfSignificand;
				339	// Shift bits from right of the decimal to left, reducing the exponent by 1 each time
				340	do {
				341	uSingleSignificand <<= 1;
				342	uSingleBiasedExponent--;
				343	} while ((uSingleSignificand & 0x400) == 0);
				344	uSingleSignificand &= HALF_SIGNIFICAND_MASK;
				345	uSingleSignificand <<= (SINGLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
				346	} else {
				347	// Just zero
				348	uSingleBiasedExponent = SINGLE_EXPONENT_ZERO + SINGLE_EXPONENT_BIAS;
				349	uSingleSignificand = 0;
				350	}
				351	} else if(nHalfUnBiasedExponent == HALF_EXPONENT_INF_OR_NAN) {
				352	// NaN or Inifinity
				353	uSingleBiasedExponent = SINGLE_EXPONENT_INF_OR_NAN + SINGLE_EXPONENT_BIAS;
				354	if(uHalfSignificand) {
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	355	// NaN
				356	// First preserve the NaN payload from half to single
				357	uSingleSignificand = uHalfSignificand & ~HALF_QUIET_NAN_BIT;
				358	if(uHalfSignificand & HALF_QUIET_NAN_BIT) {
				359	// Next, set qNaN if needed since half qNaN bit is not copied above
				360	uSingleSignificand \|= SINGLE_QUIET_NAN_BIT;
				361	}
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	362	} else {
				363	// Infinity
				364	uSingleSignificand = 0;
				365	}
				366	} else {
				367	// Normal number
				368	uSingleBiasedExponent = nHalfUnBiasedExponent + SINGLE_EXPONENT_BIAS;
				369	uSingleSignificand = uHalfSignificand << (SINGLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
				370	}
				371	uSingleSign = uHalfSign;
				372
				373
				374	// Shift the three parts of the single precision into place
				375	const uint32_t uSinglePrecision = uSingleSignificand \|
				376	(uSingleBiasedExponent << SINGLE_EXPONENT_SHIFT) \|
				377	(uSingleSign << SINGLE_SIGN_SHIFT);
				378
				379	return CopyUint32ToFloat(uSinglePrecision);
				380	}
				381
				382
Laurence Lundblade	781fd82	2018-10-01 09:37:52 -0700	[diff] [blame]	383	/*
				384	double IEEE754_HalfToDouble(uint16_t uHalfPrecision) is not needed
				385	*/
				386
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	387
				388
				389	// Public function; see ieee754.h
				390	IEEE754_union IEEE754_FloatToSmallest(float f)
				391	{
				392	IEEE754_union result;
				393
				394	// Pull the neeed two parts out of the single-precision float
				395	const uint32_t uSingle = CopyFloatToUint32(f);
				396	const int32_t nSingleExponent = ((uSingle & SINGLE_EXPONENT_MASK) >> SINGLE_EXPONENT_SHIFT) - SINGLE_EXPONENT_BIAS;
				397	const uint32_t uSingleSignificand = uSingle & SINGLE_SIGNIFICAND_MASK;
				398
				399	// Bit mask that is the significand bits that would be lost when converting
				400	// from single-precision to half-precision
				401	const uint64_t uDroppedSingleBits = SINGLE_SIGNIFICAND_MASK >> HALF_NUM_SIGNIFICAND_BITS;
				402
				403	// Optimizer will re organize so there is only one call to IEEE754_FloatToHalf()
				404	if(uSingle == 0) {
				405	// Value is 0.0000, not a a subnormal
				406	result.uTag = IEEE754_UNION_IS_HALF;
				407	result.u16 = IEEE754_FloatToHalf(f);
				408	} else if(nSingleExponent == SINGLE_EXPONENT_INF_OR_NAN) {
				409	// NaN, +/- infinity
				410	result.uTag = IEEE754_UNION_IS_HALF;
				411	result.u16 = IEEE754_FloatToHalf(f);
				412	} else if((nSingleExponent >= HALF_EXPONENT_MIN) && nSingleExponent <= HALF_EXPONENT_MAX && (!(uSingleSignificand & uDroppedSingleBits))) {
				413	// Normal number in exponent range and precision won't be lost
				414	result.uTag = IEEE754_UNION_IS_HALF;
				415	result.u16 = IEEE754_FloatToHalf(f);
				416	} else {
				417	// Subnormal, exponent out of range, or precision will be lost
				418	result.uTag = IEEE754_UNION_IS_SINGLE;
				419	result.u32 = uSingle;
				420	}
				421
				422	return result;
				423	}
				424
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	425	// Public function; see ieee754.h
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	426	IEEE754_union IEEE754_DoubleToSmallestInternal(double d, int bAllowHalfPrecision)
				427	{
				428	IEEE754_union result;
				429
				430	// Pull the needed two parts out of the double-precision float
				431	const uint64_t uDouble = CopyDoubleToUint64(d);
				432	const int64_t nDoubleExponent = ((uDouble & DOUBLE_EXPONENT_MASK) >> DOUBLE_EXPONENT_SHIFT) - DOUBLE_EXPONENT_BIAS;
				433	const uint64_t uDoubleSignificand = uDouble & DOUBLE_SIGNIFICAND_MASK;
				434
				435	// Masks to check whether dropped significand bits are zero or not
				436	const uint64_t uDroppedDoubleBits = DOUBLE_SIGNIFICAND_MASK >> HALF_NUM_SIGNIFICAND_BITS;
				437	const uint64_t uDroppedSingleBits = DOUBLE_SIGNIFICAND_MASK >> SINGLE_NUM_SIGNIFICAND_BITS;
				438
				439	// The various cases
Laurence Lundblade	d711fb2	2018-09-26 14:35:22 -0700	[diff] [blame]	440	if(d == 0.0) { // Take care of positive and negative zero
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	441	// Value is 0.0000, not a a subnormal
				442	result.uTag = IEEE754_UNION_IS_HALF;
				443	result.u16 = IEEE754_DoubleToHalf(d);
				444	} else if(nDoubleExponent == DOUBLE_EXPONENT_INF_OR_NAN) {
				445	// NaN, +/- infinity
				446	result.uTag = IEEE754_UNION_IS_HALF;
				447	result.u16 = IEEE754_DoubleToHalf(d);
				448	} else if(bAllowHalfPrecision && (nDoubleExponent >= HALF_EXPONENT_MIN) && nDoubleExponent <= HALF_EXPONENT_MAX && (!(uDoubleSignificand & uDroppedDoubleBits))) {
				449	// Can convert to half without precision loss
				450	result.uTag = IEEE754_UNION_IS_HALF;
				451	result.u16 = IEEE754_DoubleToHalf(d);
				452	} else if((nDoubleExponent >= SINGLE_EXPONENT_MIN) && nDoubleExponent <= SINGLE_EXPONENT_MAX && (!(uDoubleSignificand & uDroppedSingleBits))) {
				453	// Can convert to single without precision loss
				454	result.uTag = IEEE754_UNION_IS_SINGLE;
				455	result.u32 = CopyFloatToUint32((float)d);
				456	} else {
				457	// Can't convert without precision loss
				458	result.uTag = IEEE754_UNION_IS_DOUBLE;
				459	result.u64 = uDouble;
				460	}
				461
				462	return result;
				463	}
				464