Blame - src/ieee754.c - mirror/QCBOR - TrustedFirmware Git Browser

blob: c52f6eb2d3503bdd5789d655b4c3f7cad07e2783 [file] [log] [blame]

Laurence Lundblade	cc2ed34	2018-09-22 17:29:55 -0700	[diff] [blame]	1	/*==============================================================================
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	2
Laurence Lundblade	d92a616	2018-11-01 11:38:35 +0700	[diff] [blame]	3	Copyright (c) 2018, Laurence Lundblade.
				4	All rights reserved.
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	5
Laurence Lundblade	0dbc917	2018-11-01 14:17:21 +0700	[diff] [blame]	6	Redistribution and use in source and binary forms, with or without
				7	modification, are permitted provided that the following conditions are
				8	met:
				9	* Redistributions of source code must retain the above copyright
				10	notice, this list of conditions and the following disclaimer.
				11	* Redistributions in binary form must reproduce the above
				12	copyright notice, this list of conditions and the following
				13	disclaimer in the documentation and/or other materials provided
				14	with the distribution.
				15	* The name "Laurence Lundblade" may not be used to
				16	endorse or promote products derived from this software without
				17	specific prior written permission.
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	18
Laurence Lundblade	0dbc917	2018-11-01 14:17:21 +0700	[diff] [blame]	19	THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED
				20	WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
				21	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT
				22	ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
				23	BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
				24	CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
				25	SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
				26	BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
				27	WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
				28	OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
				29	IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Laurence Lundblade	cc2ed34	2018-09-22 17:29:55 -0700	[diff] [blame]	30	==============================================================================*/
				31
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	32	//
				33	// ieee754.c
				34	// Indefinite
				35	//
				36	// Created by Laurence Lundblade on 7/23/18.
				37	// Copyright © 2018 Laurence Lundblade. All rights reserved.
				38	//
				39
				40	#include "ieee754.h"
				41	#include <string.h> // For memcpy()
				42
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	43
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	44	/*
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	45	This code is written for clarity and verifiability, not for size, on the assumption
				46	that the optimizer will do a good job. The LLVM optimizer, -Os, does seem to do the
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	47	job and the resulting object code is smaller from combining code for the many different
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	48	cases (normal, subnormal, infinity, zero...) for the conversions.
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	49
Laurence Lundblade	570fab5	2018-10-13 18:28:27 +0800	[diff] [blame]	50	Dead stripping is also really helpful to get code size down when floating point
				51	encoding is not needed.
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	52
Laurence Lundblade	570fab5	2018-10-13 18:28:27 +0800	[diff] [blame]	53	This code works solely using shifts and masks and thus has no dependency on
				54	any math libraries. It can even work if the CPU doesn't have any floating
				55	point support, though that isn't the most useful thing to do.
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	56
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	57	The memcpy() dependency is only for CopyFloatToUint32() and friends which only
				58	is needed to avoid type punning when converting the actual float bits to
				59	an unsigned value so the bit shifts and masks can work.
				60	*/
				61
				62	/*
				63	The references used to write this code:
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	64
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	65	- IEEE 754-2008, particularly section 3.6 and 6.2.1
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	66
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	67	- https://en.wikipedia.org/wiki/IEEE_754 and subordinate pages
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	68
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	69	- https://stackoverflow.com/questions/19800415/why-does-ieee-754-reserve-so-many-nan-values
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	70	*/
				71
				72
				73	// ----- Half Precsion -----------
				74	#define HALF_NUM_SIGNIFICAND_BITS (10)
				75	#define HALF_NUM_EXPONENT_BITS (5)
				76	#define HALF_NUM_SIGN_BITS (1)
				77
				78	#define HALF_SIGNIFICAND_SHIFT (0)
				79	#define HALF_EXPONENT_SHIFT (HALF_NUM_SIGNIFICAND_BITS)
				80	#define HALF_SIGN_SHIFT (HALF_NUM_SIGNIFICAND_BITS + HALF_NUM_EXPONENT_BITS)
				81
				82	#define HALF_SIGNIFICAND_MASK (0x3ff) // The lower 10 bits // 0x03ff
				83	#define HALF_EXPONENT_MASK (0x1f << HALF_EXPONENT_SHIFT) // 0x7c00 5 bits of exponent
				84	#define HALF_SIGN_MASK (0x01 << HALF_SIGN_SHIFT) // // 0x80001 bit of sign
				85	#define HALF_QUIET_NAN_BIT (0x01 << (HALF_NUM_SIGNIFICAND_BITS-1)) // 0x0200
				86
				87	/* Biased Biased Unbiased Use
				88	0x00 0 -15 0 and subnormal
				89	0x01 1 -14 Smallest normal exponent
				90	0x1e 30 15 Largest normal exponent
				91	0x1F 31 16 NaN and Infinity */
				92	#define HALF_EXPONENT_BIAS (15)
				93	#define HALF_EXPONENT_MAX (HALF_EXPONENT_BIAS) // 15 Unbiased
				94	#define HALF_EXPONENT_MIN (-HALF_EXPONENT_BIAS+1) // -14 Unbiased
				95	#define HALF_EXPONENT_ZERO (-HALF_EXPONENT_BIAS) // -15 Unbiased
				96	#define HALF_EXPONENT_INF_OR_NAN (HALF_EXPONENT_BIAS+1) // 16 Unbiased
				97
				98
				99	// ------ Single Precision --------
				100	#define SINGLE_NUM_SIGNIFICAND_BITS (23)
				101	#define SINGLE_NUM_EXPONENT_BITS (8)
				102	#define SINGLE_NUM_SIGN_BITS (1)
				103
				104	#define SINGLE_SIGNIFICAND_SHIFT (0)
				105	#define SINGLE_EXPONENT_SHIFT (SINGLE_NUM_SIGNIFICAND_BITS)
				106	#define SINGLE_SIGN_SHIFT (SINGLE_NUM_SIGNIFICAND_BITS + SINGLE_NUM_EXPONENT_BITS)
				107
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	108	#define SINGLE_SIGNIFICAND_MASK (0x7fffffUL) // The lower 23 bits
				109	#define SINGLE_EXPONENT_MASK (0xffUL << SINGLE_EXPONENT_SHIFT) // 8 bits of exponent
				110	#define SINGLE_SIGN_MASK (0x01UL << SINGLE_SIGN_SHIFT) // 1 bit of sign
				111	#define SINGLE_QUIET_NAN_BIT (0x01UL << (SINGLE_NUM_SIGNIFICAND_BITS-1))
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	112
				113	/* Biased Biased Unbiased Use
				114	0x0000 0 -127 0 and subnormal
				115	0x0001 1 -126 Smallest normal exponent
				116	0x7f 127 0 1
				117	0xfe 254 127 Largest normal exponent
				118	0xff 255 128 NaN and Infinity */
				119	#define SINGLE_EXPONENT_BIAS (127)
				120	#define SINGLE_EXPONENT_MAX (SINGLE_EXPONENT_BIAS) // 127 unbiased
				121	#define SINGLE_EXPONENT_MIN (-SINGLE_EXPONENT_BIAS+1) // -126 unbiased
				122	#define SINGLE_EXPONENT_ZERO (-SINGLE_EXPONENT_BIAS) // -127 unbiased
				123	#define SINGLE_EXPONENT_INF_OR_NAN (SINGLE_EXPONENT_BIAS+1) // 128 unbiased
				124
				125
				126	// --------- Double Precision ----------
				127	#define DOUBLE_NUM_SIGNIFICAND_BITS (52)
				128	#define DOUBLE_NUM_EXPONENT_BITS (11)
				129	#define DOUBLE_NUM_SIGN_BITS (1)
				130
				131	#define DOUBLE_SIGNIFICAND_SHIFT (0)
				132	#define DOUBLE_EXPONENT_SHIFT (DOUBLE_NUM_SIGNIFICAND_BITS)
				133	#define DOUBLE_SIGN_SHIFT (DOUBLE_NUM_SIGNIFICAND_BITS + DOUBLE_NUM_EXPONENT_BITS)
				134
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	135	#define DOUBLE_SIGNIFICAND_MASK (0xfffffffffffffULL) // The lower 52 bits
				136	#define DOUBLE_EXPONENT_MASK (0x7ffULL << DOUBLE_EXPONENT_SHIFT) // 11 bits of exponent
				137	#define DOUBLE_SIGN_MASK (0x01ULL << DOUBLE_SIGN_SHIFT) // 1 bit of sign
				138	#define DOUBLE_QUIET_NAN_BIT (0x01ULL << (DOUBLE_NUM_SIGNIFICAND_BITS-1))
				139
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	140
				141	/* Biased Biased Unbiased Use
				142	0x00000000 0 -1023 0 and subnormal
				143	0x00000001 1 -1022 Smallest normal exponent
				144	0x000007fe 2046 1023 Largest normal exponent
				145	0x000007ff 2047 1024 NaN and Infinity */
				146	#define DOUBLE_EXPONENT_BIAS (1023)
				147	#define DOUBLE_EXPONENT_MAX (DOUBLE_EXPONENT_BIAS) // unbiased
				148	#define DOUBLE_EXPONENT_MIN (-DOUBLE_EXPONENT_BIAS+1) // unbiased
				149	#define DOUBLE_EXPONENT_ZERO (-DOUBLE_EXPONENT_BIAS) // unbiased
				150	#define DOUBLE_EXPONENT_INF_OR_NAN (DOUBLE_EXPONENT_BIAS+1) // unbiased
				151
				152
				153
				154	/*
				155	Convenient functions to avoid type punning, compiler warnings and such
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	156	The optimizer reduces them to a simple assignment.
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	157	This is a crusty corner of C. It shouldn't be this hard.
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	158
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	159	These are also in UsefulBuf.h under a different name. They are copied
Laurence Lundblade	3df8c7e	2018-11-02 13:12:41 +0700	[diff] [blame]	160	here to avoid a dependency on UsefulBuf.h. There is no
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	161	object code size impact because these always optimze down to a
				162	simple assignment.
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	163	*/
				164	static inline uint32_t CopyFloatToUint32(float f)
				165	{
				166	uint32_t u32;
				167	memcpy(&u32, &f, sizeof(uint32_t));
				168	return u32;
				169	}
				170
				171	static inline uint64_t CopyDoubleToUint64(double d)
				172	{
				173	uint64_t u64;
				174	memcpy(&u64, &d, sizeof(uint64_t));
				175	return u64;
				176	}
				177
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	178	static inline float CopyUint32ToFloat(uint32_t u32)
				179	{
				180	float f;
				181	memcpy(&f, &u32, sizeof(uint32_t));
				182	return f;
				183	}
				184
Laurence Lundblade	67bd551	2018-11-02 21:44:06 +0700	[diff] [blame]	185	static inline double CopyUint64ToDouble(uint64_t u64)
				186	{
				187	double d;
				188	memcpy(&d, &u64, sizeof(uint64_t));
				189	return d;
				190	}
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	191
				192
				193	// Public function; see ieee754.h
Laurence Lundblade	cc2ed34	2018-09-22 17:29:55 -0700	[diff] [blame]	194	uint16_t IEEE754_FloatToHalf(float f)
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	195	{
				196	// Pull the three parts out of the single-precision float
				197	const uint32_t uSingle = CopyFloatToUint32(f);
				198	const int32_t nSingleUnbiasedExponent = ((uSingle & SINGLE_EXPONENT_MASK) >> SINGLE_EXPONENT_SHIFT) - SINGLE_EXPONENT_BIAS;
				199	const uint32_t uSingleSign = (uSingle & SINGLE_SIGN_MASK) >> SINGLE_SIGN_SHIFT;
				200	const uint32_t uSingleSignificand = uSingle & SINGLE_SIGNIFICAND_MASK;
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	201
				202
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	203	// Now convert the three parts to half-precision.
				204	uint16_t uHalfSign, uHalfSignificand, uHalfBiasedExponent;
				205	if(nSingleUnbiasedExponent == SINGLE_EXPONENT_INF_OR_NAN) {
				206	// +/- Infinity and NaNs -- single biased exponent is 0xff
				207	uHalfBiasedExponent = HALF_EXPONENT_INF_OR_NAN + HALF_EXPONENT_BIAS;
				208	if(!uSingleSignificand) {
				209	// Infinity
				210	uHalfSignificand = 0;
				211	} else {
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	212	// Copy the LBSs of the NaN payload that will fit from the single to the half
				213	uHalfSignificand = uSingleSignificand & (HALF_SIGNIFICAND_MASK & ~HALF_QUIET_NAN_BIT);
				214	if(uSingleSignificand & SINGLE_QUIET_NAN_BIT) {
				215	// It's a qNaN; copy the qNaN bit
				216	uHalfSignificand \|= HALF_QUIET_NAN_BIT;
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	217	} else {
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	218	// It's a sNaN; make sure the significand is not zero so it stays a NaN
				219	// This is needed because not all significand bits are copied from single
				220	if(!uHalfSignificand) {
				221	// Set the LSB. This is what wikipedia shows for sNAN.
				222	uHalfSignificand \|= 0x01;
				223	}
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	224	}
				225	}
				226	} else if(nSingleUnbiasedExponent == SINGLE_EXPONENT_ZERO) {
				227	// 0 or a subnormal number -- singled biased exponent is 0
				228	uHalfBiasedExponent = 0;
				229	uHalfSignificand = 0; // Any subnormal single will be too small to express as a half precision
				230	} else if(nSingleUnbiasedExponent > HALF_EXPONENT_MAX) {
				231	// Exponent is too large to express in half-precision; round up to infinity
				232	uHalfBiasedExponent = HALF_EXPONENT_INF_OR_NAN + HALF_EXPONENT_BIAS;
				233	uHalfSignificand = 0;
				234	} else if(nSingleUnbiasedExponent < HALF_EXPONENT_MIN) {
				235	// Exponent is too small to express in half-precision normal; make it a half-precision subnormal
				236	uHalfBiasedExponent = (uint16_t)(HALF_EXPONENT_ZERO + HALF_EXPONENT_BIAS);
				237	// Difference between single normal exponent and the base exponent of a half subnormal
				238	const uint32_t nExpDiff = -(nSingleUnbiasedExponent - HALF_EXPONENT_MIN);
				239	// Also have to shift the significand by the difference in number of bits between a single and a half significand
				240	const int32_t nSignificandBitsDiff = SINGLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS;
				241	// Add in the 1 that is implied in the significand of a normal number; it needs to be present in a subnormal
				242	const uint32_t uSingleSignificandSubnormal = uSingleSignificand + (0x01L << SINGLE_NUM_SIGNIFICAND_BITS);
				243	uHalfSignificand = uSingleSignificandSubnormal >> (nExpDiff + nSignificandBitsDiff);
				244	} else {
				245	// The normal case
				246	uHalfBiasedExponent = nSingleUnbiasedExponent + HALF_EXPONENT_BIAS;
				247	uHalfSignificand = uSingleSignificand >> (SINGLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
				248	}
				249	uHalfSign = uSingleSign;
				250
				251	// Put the 3 values in the right place for a half precision
				252	const uint16_t uHalfPrecision = uHalfSignificand \|
				253	(uHalfBiasedExponent << HALF_EXPONENT_SHIFT) \|
				254	(uHalfSign << HALF_SIGN_SHIFT);
				255	return uHalfPrecision;
				256	}
				257
				258
				259	// Public function; see ieee754.h
Laurence Lundblade	cc2ed34	2018-09-22 17:29:55 -0700	[diff] [blame]	260	uint16_t IEEE754_DoubleToHalf(double d)
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	261	{
				262	// Pull the three parts out of the double-precision float
				263	const uint64_t uDouble = CopyDoubleToUint64(d);
				264	const int64_t nDoubleUnbiasedExponent = ((uDouble & DOUBLE_EXPONENT_MASK) >> DOUBLE_EXPONENT_SHIFT) - DOUBLE_EXPONENT_BIAS;
				265	const uint64_t uDoubleSign = (uDouble & DOUBLE_SIGN_MASK) >> DOUBLE_SIGN_SHIFT;
				266	const uint64_t uDoubleSignificand = uDouble & DOUBLE_SIGNIFICAND_MASK;
				267
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	268
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	269	// Now convert the three parts to half-precision.
				270	uint16_t uHalfSign, uHalfSignificand, uHalfBiasedExponent;
				271	if(nDoubleUnbiasedExponent == DOUBLE_EXPONENT_INF_OR_NAN) {
				272	// +/- Infinity and NaNs -- single biased exponent is 0xff
				273	uHalfBiasedExponent = HALF_EXPONENT_INF_OR_NAN + HALF_EXPONENT_BIAS;
				274	if(!uDoubleSignificand) {
				275	// Infinity
				276	uHalfSignificand = 0;
				277	} else {
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	278	// Copy the LBSs of the NaN payload that will fit from the double to the half
				279	uHalfSignificand = uDoubleSignificand & (HALF_SIGNIFICAND_MASK & ~HALF_QUIET_NAN_BIT);
				280	if(uDoubleSignificand & DOUBLE_QUIET_NAN_BIT) {
				281	// It's a qNaN; copy the qNaN bit
				282	uHalfSignificand \|= HALF_QUIET_NAN_BIT;
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	283	} else {
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	284	// It's an sNaN; make sure the significand is not zero so it stays a NaN
				285	// This is needed because not all significand bits are copied from single
				286	if(!uHalfSignificand) {
				287	// Set the LSB. This is what wikipedia shows for sNAN.
				288	uHalfSignificand \|= 0x01;
				289	}
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	290	}
				291	}
				292	} else if(nDoubleUnbiasedExponent == DOUBLE_EXPONENT_ZERO) {
				293	// 0 or a subnormal number -- double biased exponent is 0
				294	uHalfBiasedExponent = 0;
				295	uHalfSignificand = 0; // Any subnormal single will be too small to express as a half precision; TODO, is this really true?
				296	} else if(nDoubleUnbiasedExponent > HALF_EXPONENT_MAX) {
				297	// Exponent is too large to express in half-precision; round up to infinity; TODO, is this really true?
				298	uHalfBiasedExponent = HALF_EXPONENT_INF_OR_NAN + HALF_EXPONENT_BIAS;
				299	uHalfSignificand = 0;
				300	} else if(nDoubleUnbiasedExponent < HALF_EXPONENT_MIN) {
				301	// Exponent is too small to express in half-precision; round down to zero
				302	uHalfBiasedExponent = (uint16_t)(HALF_EXPONENT_ZERO + HALF_EXPONENT_BIAS);
				303	// Difference between double normal exponent and the base exponent of a half subnormal
				304	const uint64_t nExpDiff = -(nDoubleUnbiasedExponent - HALF_EXPONENT_MIN);
				305	// Also have to shift the significand by the difference in number of bits between a double and a half significand
				306	const int64_t nSignificandBitsDiff = DOUBLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS;
				307	// Add in the 1 that is implied in the significand of a normal number; it needs to be present in a subnormal
Laurence Lundblade	f280195	2018-12-17 10:40:29 -0800	[diff] [blame]	308	const uint64_t uDoubleSignificandSubnormal = uDoubleSignificand + (0x01ULL << DOUBLE_NUM_SIGNIFICAND_BITS);
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	309	uHalfSignificand = uDoubleSignificandSubnormal >> (nExpDiff + nSignificandBitsDiff);
				310	} else {
				311	// The normal case
				312	uHalfBiasedExponent = nDoubleUnbiasedExponent + HALF_EXPONENT_BIAS;
				313	uHalfSignificand = uDoubleSignificand >> (DOUBLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
				314	}
				315	uHalfSign = uDoubleSign;
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	316
				317
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	318	// Put the 3 values in the right place for a half precision
				319	const uint16_t uHalfPrecision = uHalfSignificand \|
				320	(uHalfBiasedExponent << HALF_EXPONENT_SHIFT) \|
				321	(uHalfSign << HALF_SIGN_SHIFT);
				322	return uHalfPrecision;
				323	}
				324
				325
				326	// Public function; see ieee754.h
				327	float IEEE754_HalfToFloat(uint16_t uHalfPrecision)
				328	{
				329	// Pull out the three parts of the half-precision float
				330	const uint16_t uHalfSignificand = uHalfPrecision & HALF_SIGNIFICAND_MASK;
				331	const int16_t nHalfUnBiasedExponent = ((uHalfPrecision & HALF_EXPONENT_MASK) >> HALF_EXPONENT_SHIFT) - HALF_EXPONENT_BIAS;
				332	const uint16_t uHalfSign = (uHalfPrecision & HALF_SIGN_MASK) >> HALF_SIGN_SHIFT;
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	333
				334
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	335	// Make the three parts of the single-precision number
				336	uint32_t uSingleSignificand, uSingleSign, uSingleBiasedExponent;
				337	if(nHalfUnBiasedExponent == HALF_EXPONENT_ZERO) {
				338	// 0 or subnormal
				339	if(uHalfSignificand) {
				340	// Subnormal case
				341	uSingleBiasedExponent = -HALF_EXPONENT_BIAS + SINGLE_EXPONENT_BIAS +1;
				342	// A half-precision subnormal can always be converted to a normal single-precision float because the ranges line up
				343	uSingleSignificand = uHalfSignificand;
				344	// Shift bits from right of the decimal to left, reducing the exponent by 1 each time
				345	do {
				346	uSingleSignificand <<= 1;
				347	uSingleBiasedExponent--;
				348	} while ((uSingleSignificand & 0x400) == 0);
				349	uSingleSignificand &= HALF_SIGNIFICAND_MASK;
				350	uSingleSignificand <<= (SINGLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
				351	} else {
				352	// Just zero
				353	uSingleBiasedExponent = SINGLE_EXPONENT_ZERO + SINGLE_EXPONENT_BIAS;
				354	uSingleSignificand = 0;
				355	}
				356	} else if(nHalfUnBiasedExponent == HALF_EXPONENT_INF_OR_NAN) {
				357	// NaN or Inifinity
				358	uSingleBiasedExponent = SINGLE_EXPONENT_INF_OR_NAN + SINGLE_EXPONENT_BIAS;
				359	if(uHalfSignificand) {
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	360	// NaN
				361	// First preserve the NaN payload from half to single
				362	uSingleSignificand = uHalfSignificand & ~HALF_QUIET_NAN_BIT;
				363	if(uHalfSignificand & HALF_QUIET_NAN_BIT) {
				364	// Next, set qNaN if needed since half qNaN bit is not copied above
				365	uSingleSignificand \|= SINGLE_QUIET_NAN_BIT;
				366	}
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	367	} else {
				368	// Infinity
				369	uSingleSignificand = 0;
				370	}
				371	} else {
				372	// Normal number
				373	uSingleBiasedExponent = nHalfUnBiasedExponent + SINGLE_EXPONENT_BIAS;
				374	uSingleSignificand = uHalfSignificand << (SINGLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
				375	}
				376	uSingleSign = uHalfSign;
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	377
				378
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	379	// Shift the three parts of the single precision into place
				380	const uint32_t uSinglePrecision = uSingleSignificand \|
				381	(uSingleBiasedExponent << SINGLE_EXPONENT_SHIFT) \|
				382	(uSingleSign << SINGLE_SIGN_SHIFT);
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	383
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	384	return CopyUint32ToFloat(uSinglePrecision);
				385	}
				386
				387
Laurence Lundblade	67bd551	2018-11-02 21:44:06 +0700	[diff] [blame]	388	// Public function; see ieee754.h
				389	double IEEE754_HalfToDouble(uint16_t uHalfPrecision)
				390	{
				391	// Pull out the three parts of the half-precision float
				392	const uint16_t uHalfSignificand = uHalfPrecision & HALF_SIGNIFICAND_MASK;
				393	const int16_t nHalfUnBiasedExponent = ((uHalfPrecision & HALF_EXPONENT_MASK) >> HALF_EXPONENT_SHIFT) - HALF_EXPONENT_BIAS;
				394	const uint16_t uHalfSign = (uHalfPrecision & HALF_SIGN_MASK) >> HALF_SIGN_SHIFT;
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	395
				396
Laurence Lundblade	67bd551	2018-11-02 21:44:06 +0700	[diff] [blame]	397	// Make the three parts of hte single-precision number
				398	uint64_t uDoubleSignificand, uDoubleSign, uDoubleBiasedExponent;
				399	if(nHalfUnBiasedExponent == HALF_EXPONENT_ZERO) {
				400	// 0 or subnormal
				401	uDoubleBiasedExponent = DOUBLE_EXPONENT_ZERO + DOUBLE_EXPONENT_BIAS;
				402	if(uHalfSignificand) {
				403	// Subnormal case
				404	uDoubleBiasedExponent = -HALF_EXPONENT_BIAS + DOUBLE_EXPONENT_BIAS +1;
				405	// A half-precision subnormal can always be converted to a normal double-precision float because the ranges line up
				406	uDoubleSignificand = uHalfSignificand;
				407	// Shift bits from right of the decimal to left, reducing the exponent by 1 each time
				408	do {
				409	uDoubleSignificand <<= 1;
				410	uDoubleBiasedExponent--;
				411	} while ((uDoubleSignificand & 0x400) == 0);
				412	uDoubleSignificand &= HALF_SIGNIFICAND_MASK;
				413	uDoubleSignificand <<= (DOUBLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
				414	} else {
				415	// Just zero
				416	uDoubleSignificand = 0;
				417	}
				418	} else if(nHalfUnBiasedExponent == HALF_EXPONENT_INF_OR_NAN) {
				419	// NaN or Inifinity
				420	uDoubleBiasedExponent = DOUBLE_EXPONENT_INF_OR_NAN + DOUBLE_EXPONENT_BIAS;
				421	if(uHalfSignificand) {
				422	// NaN
				423	// First preserve the NaN payload from half to single
				424	uDoubleSignificand = uHalfSignificand & ~HALF_QUIET_NAN_BIT;
				425	if(uHalfSignificand & HALF_QUIET_NAN_BIT) {
				426	// Next, set qNaN if needed since half qNaN bit is not copied above
				427	uDoubleSignificand \|= DOUBLE_QUIET_NAN_BIT;
				428	}
				429	} else {
				430	// Infinity
				431	uDoubleSignificand = 0;
				432	}
				433	} else {
				434	// Normal number
				435	uDoubleBiasedExponent = nHalfUnBiasedExponent + DOUBLE_EXPONENT_BIAS;
				436	uDoubleSignificand = (uint64_t)uHalfSignificand << (DOUBLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
				437	}
				438	uDoubleSign = uHalfSign;
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	439
				440
Laurence Lundblade	67bd551	2018-11-02 21:44:06 +0700	[diff] [blame]	441	// Shift the 3 parts into place as a double-precision
				442	const uint64_t uDouble = uDoubleSignificand \|
				443	(uDoubleBiasedExponent << DOUBLE_EXPONENT_SHIFT) \|
				444	(uDoubleSign << DOUBLE_SIGN_SHIFT);
				445	return CopyUint64ToDouble(uDouble);
				446	}
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	447
				448
				449	// Public function; see ieee754.h
				450	IEEE754_union IEEE754_FloatToSmallest(float f)
				451	{
				452	IEEE754_union result;
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	453
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	454	// Pull the neeed two parts out of the single-precision float
				455	const uint32_t uSingle = CopyFloatToUint32(f);
				456	const int32_t nSingleExponent = ((uSingle & SINGLE_EXPONENT_MASK) >> SINGLE_EXPONENT_SHIFT) - SINGLE_EXPONENT_BIAS;
				457	const uint32_t uSingleSignificand = uSingle & SINGLE_SIGNIFICAND_MASK;
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	458
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	459	// Bit mask that is the significand bits that would be lost when converting
				460	// from single-precision to half-precision
				461	const uint64_t uDroppedSingleBits = SINGLE_SIGNIFICAND_MASK >> HALF_NUM_SIGNIFICAND_BITS;
				462
				463	// Optimizer will re organize so there is only one call to IEEE754_FloatToHalf()
				464	if(uSingle == 0) {
				465	// Value is 0.0000, not a a subnormal
Laurence Lundblade	577d821	2018-11-01 14:04:08 +0700	[diff] [blame]	466	result.uSize = IEEE754_UNION_IS_HALF;
				467	result.uValue = IEEE754_FloatToHalf(f);
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	468	} else if(nSingleExponent == SINGLE_EXPONENT_INF_OR_NAN) {
				469	// NaN, +/- infinity
Laurence Lundblade	577d821	2018-11-01 14:04:08 +0700	[diff] [blame]	470	result.uSize = IEEE754_UNION_IS_HALF;
				471	result.uValue = IEEE754_FloatToHalf(f);
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	472	} else if((nSingleExponent >= HALF_EXPONENT_MIN) && nSingleExponent <= HALF_EXPONENT_MAX && (!(uSingleSignificand & uDroppedSingleBits))) {
				473	// Normal number in exponent range and precision won't be lost
Laurence Lundblade	577d821	2018-11-01 14:04:08 +0700	[diff] [blame]	474	result.uSize = IEEE754_UNION_IS_HALF;
				475	result.uValue = IEEE754_FloatToHalf(f);
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	476	} else {
				477	// Subnormal, exponent out of range, or precision will be lost
Laurence Lundblade	577d821	2018-11-01 14:04:08 +0700	[diff] [blame]	478	result.uSize = IEEE754_UNION_IS_SINGLE;
				479	result.uValue = uSingle;
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	480	}
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	481
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	482	return result;
				483	}
				484
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	485	// Public function; see ieee754.h
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	486	IEEE754_union IEEE754_DoubleToSmallestInternal(double d, int bAllowHalfPrecision)
				487	{
				488	IEEE754_union result;
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	489
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	490	// Pull the needed two parts out of the double-precision float
				491	const uint64_t uDouble = CopyDoubleToUint64(d);
				492	const int64_t nDoubleExponent = ((uDouble & DOUBLE_EXPONENT_MASK) >> DOUBLE_EXPONENT_SHIFT) - DOUBLE_EXPONENT_BIAS;
				493	const uint64_t uDoubleSignificand = uDouble & DOUBLE_SIGNIFICAND_MASK;
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	494
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	495	// Masks to check whether dropped significand bits are zero or not
				496	const uint64_t uDroppedDoubleBits = DOUBLE_SIGNIFICAND_MASK >> HALF_NUM_SIGNIFICAND_BITS;
				497	const uint64_t uDroppedSingleBits = DOUBLE_SIGNIFICAND_MASK >> SINGLE_NUM_SIGNIFICAND_BITS;
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	498
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	499	// The various cases
Laurence Lundblade	d711fb2	2018-09-26 14:35:22 -0700	[diff] [blame]	500	if(d == 0.0) { // Take care of positive and negative zero
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	501	// Value is 0.0000, not a a subnormal
Laurence Lundblade	577d821	2018-11-01 14:04:08 +0700	[diff] [blame]	502	result.uSize = IEEE754_UNION_IS_HALF;
				503	result.uValue = IEEE754_DoubleToHalf(d);
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	504	} else if(nDoubleExponent == DOUBLE_EXPONENT_INF_OR_NAN) {
				505	// NaN, +/- infinity
Laurence Lundblade	577d821	2018-11-01 14:04:08 +0700	[diff] [blame]	506	result.uSize = IEEE754_UNION_IS_HALF;
				507	result.uValue = IEEE754_DoubleToHalf(d);
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	508	} else if(bAllowHalfPrecision && (nDoubleExponent >= HALF_EXPONENT_MIN) && nDoubleExponent <= HALF_EXPONENT_MAX && (!(uDoubleSignificand & uDroppedDoubleBits))) {
				509	// Can convert to half without precision loss
Laurence Lundblade	577d821	2018-11-01 14:04:08 +0700	[diff] [blame]	510	result.uSize = IEEE754_UNION_IS_HALF;
				511	result.uValue = IEEE754_DoubleToHalf(d);
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	512	} else if((nDoubleExponent >= SINGLE_EXPONENT_MIN) && nDoubleExponent <= SINGLE_EXPONENT_MAX && (!(uDoubleSignificand & uDroppedSingleBits))) {
				513	// Can convert to single without precision loss
Laurence Lundblade	577d821	2018-11-01 14:04:08 +0700	[diff] [blame]	514	result.uSize = IEEE754_UNION_IS_SINGLE;
				515	result.uValue = CopyFloatToUint32((float)d);
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	516	} else {
				517	// Can't convert without precision loss
Laurence Lundblade	577d821	2018-11-01 14:04:08 +0700	[diff] [blame]	518	result.uSize = IEEE754_UNION_IS_DOUBLE;
				519	result.uValue = uDouble;
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	520	}
Laurence Lundblade	3aee3a3	2018-12-17 16:17:45 -0800	[diff] [blame]	521
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	522	return result;
				523	}
				524