Blame - src/ieee754.c - mirror/QCBOR - TrustedFirmware Git Browser

blob: 12877bdf63f61b29af86c80e40badcfe208e6d4c [file] [log] [blame]

Laurence Lundblade	cc2ed34	2018-09-22 17:29:55 -0700	[diff] [blame]	1	/*==============================================================================
				2	Copyright 2018 Laurence Lundblade
				3
				4	Permission is hereby granted, free of charge, to any person obtaining
				5	a copy of this software and associated documentation files (the
				6	"Software"), to deal in the Software without restriction, including
				7	without limitation the rights to use, copy, modify, merge, publish,
				8	distribute, sublicense, and/or sell copies of the Software, and to
				9	permit persons to whom the Software is furnished to do so, subject to
				10	the following conditions:
				11
				12	The above copyright notice and this permission notice shall be included
				13	in all copies or substantial portions of the Software.
				14
				15	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
				16	EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
				17	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
				18	NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
				19	BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
				20	ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
				21	CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
				22	SOFTWARE.
				23
				24	(This is the MIT license)
				25	==============================================================================*/
				26
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	27	//
				28	// ieee754.c
				29	// Indefinite
				30	//
				31	// Created by Laurence Lundblade on 7/23/18.
				32	// Copyright © 2018 Laurence Lundblade. All rights reserved.
				33	//
				34
				35	#include "ieee754.h"
				36	#include <string.h> // For memcpy()
				37
				38	/*
				39
				40	https://stackoverflow.com/questions/19800415/why-does-ieee-754-reserve-so-many-nan-values
				41
				42	These values come from IEEE 754-2008 section 3.6
				43
				44	This code is written for clarity and verifiability, not for size, on the assumption
				45	that the optimizer will do a good job. The LLVM optimizer, -Os, does seem to do the
				46	job and the resulting object code is smaller from combing code for the many different
				47	cases (normal, subnormal, infinity, zero...) for the conversions.
				48
				49	Dead stripping is also really helpful to get code size down.
				50
				51	*/
				52
				53
				54	// ----- Half Precsion -----------
				55	#define HALF_NUM_SIGNIFICAND_BITS (10)
				56	#define HALF_NUM_EXPONENT_BITS (5)
				57	#define HALF_NUM_SIGN_BITS (1)
				58
				59	#define HALF_SIGNIFICAND_SHIFT (0)
				60	#define HALF_EXPONENT_SHIFT (HALF_NUM_SIGNIFICAND_BITS)
				61	#define HALF_SIGN_SHIFT (HALF_NUM_SIGNIFICAND_BITS + HALF_NUM_EXPONENT_BITS)
				62
				63	#define HALF_SIGNIFICAND_MASK (0x3ff) // The lower 10 bits // 0x03ff
				64	#define HALF_EXPONENT_MASK (0x1f << HALF_EXPONENT_SHIFT) // 0x7c00 5 bits of exponent
				65	#define HALF_SIGN_MASK (0x01 << HALF_SIGN_SHIFT) // // 0x80001 bit of sign
				66	#define HALF_QUIET_NAN_BIT (0x01 << (HALF_NUM_SIGNIFICAND_BITS-1)) // 0x0200
				67
				68	/* Biased Biased Unbiased Use
				69	0x00 0 -15 0 and subnormal
				70	0x01 1 -14 Smallest normal exponent
				71	0x1e 30 15 Largest normal exponent
				72	0x1F 31 16 NaN and Infinity */
				73	#define HALF_EXPONENT_BIAS (15)
				74	#define HALF_EXPONENT_MAX (HALF_EXPONENT_BIAS) // 15 Unbiased
				75	#define HALF_EXPONENT_MIN (-HALF_EXPONENT_BIAS+1) // -14 Unbiased
				76	#define HALF_EXPONENT_ZERO (-HALF_EXPONENT_BIAS) // -15 Unbiased
				77	#define HALF_EXPONENT_INF_OR_NAN (HALF_EXPONENT_BIAS+1) // 16 Unbiased
				78
				79
				80	// ------ Single Precision --------
				81	#define SINGLE_NUM_SIGNIFICAND_BITS (23)
				82	#define SINGLE_NUM_EXPONENT_BITS (8)
				83	#define SINGLE_NUM_SIGN_BITS (1)
				84
				85	#define SINGLE_SIGNIFICAND_SHIFT (0)
				86	#define SINGLE_EXPONENT_SHIFT (SINGLE_NUM_SIGNIFICAND_BITS)
				87	#define SINGLE_SIGN_SHIFT (SINGLE_NUM_SIGNIFICAND_BITS + SINGLE_NUM_EXPONENT_BITS)
				88
				89	#define SINGLE_SIGNIFICAND_MASK (0x7fffff) // The lower 23 bits
				90	#define SINGLE_EXPONENT_MASK (0xff << SINGLE_EXPONENT_SHIFT) // 8 bits of exponent
				91	#define SINGLE_SIGN_MASK (0x01 << SINGLE_SIGN_SHIFT) // 1 bit of sign
				92
				93	/* Biased Biased Unbiased Use
				94	0x0000 0 -127 0 and subnormal
				95	0x0001 1 -126 Smallest normal exponent
				96	0x7f 127 0 1
				97	0xfe 254 127 Largest normal exponent
				98	0xff 255 128 NaN and Infinity */
				99	#define SINGLE_EXPONENT_BIAS (127)
				100	#define SINGLE_EXPONENT_MAX (SINGLE_EXPONENT_BIAS) // 127 unbiased
				101	#define SINGLE_EXPONENT_MIN (-SINGLE_EXPONENT_BIAS+1) // -126 unbiased
				102	#define SINGLE_EXPONENT_ZERO (-SINGLE_EXPONENT_BIAS) // -127 unbiased
				103	#define SINGLE_EXPONENT_INF_OR_NAN (SINGLE_EXPONENT_BIAS+1) // 128 unbiased
				104
				105
				106	// --------- Double Precision ----------
				107	#define DOUBLE_NUM_SIGNIFICAND_BITS (52)
				108	#define DOUBLE_NUM_EXPONENT_BITS (11)
				109	#define DOUBLE_NUM_SIGN_BITS (1)
				110
				111	#define DOUBLE_SIGNIFICAND_SHIFT (0)
				112	#define DOUBLE_EXPONENT_SHIFT (DOUBLE_NUM_SIGNIFICAND_BITS)
				113	#define DOUBLE_SIGN_SHIFT (DOUBLE_NUM_SIGNIFICAND_BITS + DOUBLE_NUM_EXPONENT_BITS)
				114
				115	#define DOUBLE_SIGNIFICAND_MASK (0xfffffffffffffLL) // The lower 52 bits
				116	#define DOUBLE_EXPONENT_MASK (0x7ffLL << DOUBLE_EXPONENT_SHIFT) // 11 bits of exponent
				117	#define DOUBLE_SIGN_MASK (0x01LL << DOUBLE_SIGN_SHIFT) // 1 bit of sign
				118
				119	/* Biased Biased Unbiased Use
				120	0x00000000 0 -1023 0 and subnormal
				121	0x00000001 1 -1022 Smallest normal exponent
				122	0x000007fe 2046 1023 Largest normal exponent
				123	0x000007ff 2047 1024 NaN and Infinity */
				124	#define DOUBLE_EXPONENT_BIAS (1023)
				125	#define DOUBLE_EXPONENT_MAX (DOUBLE_EXPONENT_BIAS) // unbiased
				126	#define DOUBLE_EXPONENT_MIN (-DOUBLE_EXPONENT_BIAS+1) // unbiased
				127	#define DOUBLE_EXPONENT_ZERO (-DOUBLE_EXPONENT_BIAS) // unbiased
				128	#define DOUBLE_EXPONENT_INF_OR_NAN (DOUBLE_EXPONENT_BIAS+1) // unbiased
				129
				130
				131
				132	/*
				133	Convenient functions to avoid type punning, compiler warnings and such
				134	The optimizer reduces them to a simple assignment
				135	This is a crusty corner of C. It shouldn't be this hard.
				136	*/
				137	static inline uint32_t CopyFloatToUint32(float f)
				138	{
				139	uint32_t u32;
				140	memcpy(&u32, &f, sizeof(uint32_t));
				141	return u32;
				142	}
				143
				144	static inline uint64_t CopyDoubleToUint64(double d)
				145	{
				146	uint64_t u64;
				147	memcpy(&u64, &d, sizeof(uint64_t));
				148	return u64;
				149	}
				150
				151	static inline double CopyUint64ToDouble(uint64_t u64)
				152	{
				153	double d;
				154	memcpy(&d, &u64, sizeof(uint64_t));
				155	return d;
				156	}
				157
				158	static inline float CopyUint32ToFloat(uint32_t u32)
				159	{
				160	float f;
				161	memcpy(&f, &u32, sizeof(uint32_t));
				162	return f;
				163	}
				164
				165
				166
				167	// Public function; see ieee754.h
Laurence Lundblade	cc2ed34	2018-09-22 17:29:55 -0700	[diff] [blame]	168	uint16_t IEEE754_FloatToHalf(float f)
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	169	{
				170	// Pull the three parts out of the single-precision float
				171	const uint32_t uSingle = CopyFloatToUint32(f);
				172	const int32_t nSingleUnbiasedExponent = ((uSingle & SINGLE_EXPONENT_MASK) >> SINGLE_EXPONENT_SHIFT) - SINGLE_EXPONENT_BIAS;
				173	const uint32_t uSingleSign = (uSingle & SINGLE_SIGN_MASK) >> SINGLE_SIGN_SHIFT;
				174	const uint32_t uSingleSignificand = uSingle & SINGLE_SIGNIFICAND_MASK;
				175
				176
				177	// Now convert the three parts to half-precision.
				178	uint16_t uHalfSign, uHalfSignificand, uHalfBiasedExponent;
				179	if(nSingleUnbiasedExponent == SINGLE_EXPONENT_INF_OR_NAN) {
				180	// +/- Infinity and NaNs -- single biased exponent is 0xff
				181	uHalfBiasedExponent = HALF_EXPONENT_INF_OR_NAN + HALF_EXPONENT_BIAS;
				182	if(!uSingleSignificand) {
				183	// Infinity
				184	uHalfSignificand = 0;
				185	} else {
				186	// NaN; significand has to be non-zero
				187	if(!(uSingleSignificand & HALF_SIGNIFICAND_MASK)) {
				188	// NaN payload bits that can't be carried; convert to a quite NaN
				189	// since this has to be non-zero to still be a NaN
				190	uHalfSignificand = HALF_QUIET_NAN_BIT; // standard qNaN;
				191	} else {
				192	// The LSBs are preserved, but not the MSBs
				193	// This preservation allows some limited form of NaN payloads / boxing
				194	// Would be good to find out what other implementations do for
				195	// this kind of conversion of NaN
				196	uHalfSignificand = uSingleSignificand & HALF_SIGNIFICAND_MASK;
				197	}
				198	}
				199	} else if(nSingleUnbiasedExponent == SINGLE_EXPONENT_ZERO) {
				200	// 0 or a subnormal number -- singled biased exponent is 0
				201	uHalfBiasedExponent = 0;
				202	uHalfSignificand = 0; // Any subnormal single will be too small to express as a half precision
				203	} else if(nSingleUnbiasedExponent > HALF_EXPONENT_MAX) {
				204	// Exponent is too large to express in half-precision; round up to infinity
				205	uHalfBiasedExponent = HALF_EXPONENT_INF_OR_NAN + HALF_EXPONENT_BIAS;
				206	uHalfSignificand = 0;
				207	} else if(nSingleUnbiasedExponent < HALF_EXPONENT_MIN) {
				208	// Exponent is too small to express in half-precision normal; make it a half-precision subnormal
				209	uHalfBiasedExponent = (uint16_t)(HALF_EXPONENT_ZERO + HALF_EXPONENT_BIAS);
				210	// Difference between single normal exponent and the base exponent of a half subnormal
				211	const uint32_t nExpDiff = -(nSingleUnbiasedExponent - HALF_EXPONENT_MIN);
				212	// Also have to shift the significand by the difference in number of bits between a single and a half significand
				213	const int32_t nSignificandBitsDiff = SINGLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS;
				214	// Add in the 1 that is implied in the significand of a normal number; it needs to be present in a subnormal
				215	const uint32_t uSingleSignificandSubnormal = uSingleSignificand + (0x01L << SINGLE_NUM_SIGNIFICAND_BITS);
				216	uHalfSignificand = uSingleSignificandSubnormal >> (nExpDiff + nSignificandBitsDiff);
				217	} else {
				218	// The normal case
				219	uHalfBiasedExponent = nSingleUnbiasedExponent + HALF_EXPONENT_BIAS;
				220	uHalfSignificand = uSingleSignificand >> (SINGLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
				221	}
				222	uHalfSign = uSingleSign;
				223
				224	// Put the 3 values in the right place for a half precision
				225	const uint16_t uHalfPrecision = uHalfSignificand \|
				226	(uHalfBiasedExponent << HALF_EXPONENT_SHIFT) \|
				227	(uHalfSign << HALF_SIGN_SHIFT);
				228	return uHalfPrecision;
				229	}
				230
				231
				232	// Public function; see ieee754.h
Laurence Lundblade	cc2ed34	2018-09-22 17:29:55 -0700	[diff] [blame]	233	uint16_t IEEE754_DoubleToHalf(double d)
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	234	{
				235	// Pull the three parts out of the double-precision float
				236	const uint64_t uDouble = CopyDoubleToUint64(d);
				237	const int64_t nDoubleUnbiasedExponent = ((uDouble & DOUBLE_EXPONENT_MASK) >> DOUBLE_EXPONENT_SHIFT) - DOUBLE_EXPONENT_BIAS;
				238	const uint64_t uDoubleSign = (uDouble & DOUBLE_SIGN_MASK) >> DOUBLE_SIGN_SHIFT;
				239	const uint64_t uDoubleSignificand = uDouble & DOUBLE_SIGNIFICAND_MASK;
				240
				241
				242	// Now convert the three parts to half-precision.
				243	uint16_t uHalfSign, uHalfSignificand, uHalfBiasedExponent;
				244	if(nDoubleUnbiasedExponent == DOUBLE_EXPONENT_INF_OR_NAN) {
				245	// +/- Infinity and NaNs -- single biased exponent is 0xff
				246	uHalfBiasedExponent = HALF_EXPONENT_INF_OR_NAN + HALF_EXPONENT_BIAS;
				247	if(!uDoubleSignificand) {
				248	// Infinity
				249	uHalfSignificand = 0;
				250	} else {
				251	// NaN; significand has to be non-zero
				252	if(!(uDoubleSignificand & HALF_SIGNIFICAND_MASK)) {
				253	// NaN payload bits that can't be carried; convert to a quite NaN
				254	// since this has to be non-zero to still be a NaN
				255	uHalfSignificand = HALF_QUIET_NAN_BIT; // standard qNaN;
				256	} else {
				257	// The LSBs are preserved, but not the MSBs
				258	// This preservation allows some limited form of NaN payloads / boxing
				259	// Would be good to find out what other implementations do for
				260	// this kind of conversion of NaN
				261	uHalfSignificand = uDoubleSignificand & HALF_SIGNIFICAND_MASK;
				262	}
				263	}
				264	} else if(nDoubleUnbiasedExponent == DOUBLE_EXPONENT_ZERO) {
				265	// 0 or a subnormal number -- double biased exponent is 0
				266	uHalfBiasedExponent = 0;
				267	uHalfSignificand = 0; // Any subnormal single will be too small to express as a half precision; TODO, is this really true?
				268	} else if(nDoubleUnbiasedExponent > HALF_EXPONENT_MAX) {
				269	// Exponent is too large to express in half-precision; round up to infinity; TODO, is this really true?
				270	uHalfBiasedExponent = HALF_EXPONENT_INF_OR_NAN + HALF_EXPONENT_BIAS;
				271	uHalfSignificand = 0;
				272	} else if(nDoubleUnbiasedExponent < HALF_EXPONENT_MIN) {
				273	// Exponent is too small to express in half-precision; round down to zero
				274	uHalfBiasedExponent = (uint16_t)(HALF_EXPONENT_ZERO + HALF_EXPONENT_BIAS);
				275	// Difference between double normal exponent and the base exponent of a half subnormal
				276	const uint64_t nExpDiff = -(nDoubleUnbiasedExponent - HALF_EXPONENT_MIN);
				277	// Also have to shift the significand by the difference in number of bits between a double and a half significand
				278	const int64_t nSignificandBitsDiff = DOUBLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS;
				279	// Add in the 1 that is implied in the significand of a normal number; it needs to be present in a subnormal
				280	const uint64_t uDoubleSignificandSubnormal = uDoubleSignificand + (0x01L << DOUBLE_NUM_SIGNIFICAND_BITS);
				281	uHalfSignificand = uDoubleSignificandSubnormal >> (nExpDiff + nSignificandBitsDiff);
				282	} else {
				283	// The normal case
				284	uHalfBiasedExponent = nDoubleUnbiasedExponent + HALF_EXPONENT_BIAS;
				285	uHalfSignificand = uDoubleSignificand >> (DOUBLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
				286	}
				287	uHalfSign = uDoubleSign;
				288
				289
				290	// Put the 3 values in the right place for a half precision
				291	const uint16_t uHalfPrecision = uHalfSignificand \|
				292	(uHalfBiasedExponent << HALF_EXPONENT_SHIFT) \|
				293	(uHalfSign << HALF_SIGN_SHIFT);
				294	return uHalfPrecision;
				295	}
				296
				297
				298	// Public function; see ieee754.h
				299	float IEEE754_HalfToFloat(uint16_t uHalfPrecision)
				300	{
				301	// Pull out the three parts of the half-precision float
				302	const uint16_t uHalfSignificand = uHalfPrecision & HALF_SIGNIFICAND_MASK;
				303	const int16_t nHalfUnBiasedExponent = ((uHalfPrecision & HALF_EXPONENT_MASK) >> HALF_EXPONENT_SHIFT) - HALF_EXPONENT_BIAS;
				304	const uint16_t uHalfSign = (uHalfPrecision & HALF_SIGN_MASK) >> HALF_SIGN_SHIFT;
				305
				306
				307	// Make the three parts of the single-precision number
				308	uint32_t uSingleSignificand, uSingleSign, uSingleBiasedExponent;
				309	if(nHalfUnBiasedExponent == HALF_EXPONENT_ZERO) {
				310	// 0 or subnormal
				311	if(uHalfSignificand) {
				312	// Subnormal case
				313	uSingleBiasedExponent = -HALF_EXPONENT_BIAS + SINGLE_EXPONENT_BIAS +1;
				314	// A half-precision subnormal can always be converted to a normal single-precision float because the ranges line up
				315	uSingleSignificand = uHalfSignificand;
				316	// Shift bits from right of the decimal to left, reducing the exponent by 1 each time
				317	do {
				318	uSingleSignificand <<= 1;
				319	uSingleBiasedExponent--;
				320	} while ((uSingleSignificand & 0x400) == 0);
				321	uSingleSignificand &= HALF_SIGNIFICAND_MASK;
				322	uSingleSignificand <<= (SINGLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
				323	} else {
				324	// Just zero
				325	uSingleBiasedExponent = SINGLE_EXPONENT_ZERO + SINGLE_EXPONENT_BIAS;
				326	uSingleSignificand = 0;
				327	}
				328	} else if(nHalfUnBiasedExponent == HALF_EXPONENT_INF_OR_NAN) {
				329	// NaN or Inifinity
				330	uSingleBiasedExponent = SINGLE_EXPONENT_INF_OR_NAN + SINGLE_EXPONENT_BIAS;
				331	if(uHalfSignificand) {
				332	// Preserve NaN payload for NaN boxing
				333	uSingleSignificand = uHalfSignificand;
				334	} else {
				335	// Infinity
				336	uSingleSignificand = 0;
				337	}
				338	} else {
				339	// Normal number
				340	uSingleBiasedExponent = nHalfUnBiasedExponent + SINGLE_EXPONENT_BIAS;
				341	uSingleSignificand = uHalfSignificand << (SINGLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
				342	}
				343	uSingleSign = uHalfSign;
				344
				345
				346	// Shift the three parts of the single precision into place
				347	const uint32_t uSinglePrecision = uSingleSignificand \|
				348	(uSingleBiasedExponent << SINGLE_EXPONENT_SHIFT) \|
				349	(uSingleSign << SINGLE_SIGN_SHIFT);
				350
				351	return CopyUint32ToFloat(uSinglePrecision);
				352	}
				353
				354
				355	// Public function; see ieee754.h
				356	double IEEE754_HalfToDouble(uint16_t uHalfPrecision)
				357	{
				358	// Pull out the three parts of the half-precision float
				359	const uint16_t uHalfSignificand = uHalfPrecision & HALF_SIGNIFICAND_MASK;
				360	const int16_t nHalfUnBiasedExponent = ((uHalfPrecision & HALF_EXPONENT_MASK) >> HALF_EXPONENT_SHIFT) - HALF_EXPONENT_BIAS;
				361	const uint16_t uHalfSign = (uHalfPrecision & HALF_SIGN_MASK) >> HALF_SIGN_SHIFT;
				362
				363
				364	// Make the three parts of hte single-precision number
				365	uint64_t uDoubleSignificand, uDoubleSign, uDoubleBiasedExponent;
				366	if(nHalfUnBiasedExponent == HALF_EXPONENT_ZERO) {
				367	// 0 or subnormal
				368	uDoubleBiasedExponent = DOUBLE_EXPONENT_ZERO + DOUBLE_EXPONENT_BIAS;
				369	if(uHalfSignificand) {
				370	// Subnormal case
				371	uDoubleBiasedExponent = -HALF_EXPONENT_BIAS + DOUBLE_EXPONENT_BIAS +1;
				372	// A half-precision subnormal can always be converted to a normal double-precision float because the ranges line up
				373	uDoubleSignificand = uHalfSignificand;
				374	// Shift bits from right of the decimal to left, reducing the exponent by 1 each time
				375	do {
				376	uDoubleSignificand <<= 1;
				377	uDoubleBiasedExponent--;
				378	} while ((uDoubleSignificand & 0x400) == 0);
				379	uDoubleSignificand &= HALF_SIGNIFICAND_MASK;
				380	uDoubleSignificand <<= (DOUBLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
				381	} else {
				382	// Just zero
				383	uDoubleSignificand = 0;
				384	}
				385	} else if(nHalfUnBiasedExponent == HALF_EXPONENT_INF_OR_NAN) {
				386	// NaN or Inifinity
				387	uDoubleBiasedExponent = DOUBLE_EXPONENT_INF_OR_NAN + DOUBLE_EXPONENT_BIAS;
				388	if(uHalfSignificand) {
				389	// Preserve NaN payload for NaN boxing
				390	uDoubleSignificand = uHalfSignificand;
				391	} else {
				392	// Infinity
				393	uDoubleSignificand = 0;
				394	}
				395	} else {
				396	// Normal number
				397	uDoubleBiasedExponent = nHalfUnBiasedExponent + DOUBLE_EXPONENT_BIAS;
				398	uDoubleSignificand = (uint64_t)uHalfSignificand << (DOUBLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
				399	}
				400	uDoubleSign = uHalfSign;
				401
				402
				403	// Shift the 3 parts into place as a double-precision
				404	const uint64_t uDouble = uDoubleSignificand \|
				405	(uDoubleBiasedExponent << DOUBLE_EXPONENT_SHIFT) \|
				406	(uDoubleSign << DOUBLE_SIGN_SHIFT);
				407	return CopyUint64ToDouble(uDouble);
				408	}
				409
				410
				411	// Public function; see ieee754.h
				412	IEEE754_union IEEE754_FloatToSmallest(float f)
				413	{
				414	IEEE754_union result;
				415
				416	// Pull the neeed two parts out of the single-precision float
				417	const uint32_t uSingle = CopyFloatToUint32(f);
				418	const int32_t nSingleExponent = ((uSingle & SINGLE_EXPONENT_MASK) >> SINGLE_EXPONENT_SHIFT) - SINGLE_EXPONENT_BIAS;
				419	const uint32_t uSingleSignificand = uSingle & SINGLE_SIGNIFICAND_MASK;
				420
				421	// Bit mask that is the significand bits that would be lost when converting
				422	// from single-precision to half-precision
				423	const uint64_t uDroppedSingleBits = SINGLE_SIGNIFICAND_MASK >> HALF_NUM_SIGNIFICAND_BITS;
				424
				425	// Optimizer will re organize so there is only one call to IEEE754_FloatToHalf()
				426	if(uSingle == 0) {
				427	// Value is 0.0000, not a a subnormal
				428	result.uTag = IEEE754_UNION_IS_HALF;
				429	result.u16 = IEEE754_FloatToHalf(f);
				430	} else if(nSingleExponent == SINGLE_EXPONENT_INF_OR_NAN) {
				431	// NaN, +/- infinity
				432	result.uTag = IEEE754_UNION_IS_HALF;
				433	result.u16 = IEEE754_FloatToHalf(f);
				434	} else if((nSingleExponent >= HALF_EXPONENT_MIN) && nSingleExponent <= HALF_EXPONENT_MAX && (!(uSingleSignificand & uDroppedSingleBits))) {
				435	// Normal number in exponent range and precision won't be lost
				436	result.uTag = IEEE754_UNION_IS_HALF;
				437	result.u16 = IEEE754_FloatToHalf(f);
				438	} else {
				439	// Subnormal, exponent out of range, or precision will be lost
				440	result.uTag = IEEE754_UNION_IS_SINGLE;
				441	result.u32 = uSingle;
				442	}
				443
				444	return result;
				445	}
				446
				447
				448	IEEE754_union IEEE754_DoubleToSmallestInternal(double d, int bAllowHalfPrecision)
				449	{
				450	IEEE754_union result;
				451
				452	// Pull the needed two parts out of the double-precision float
				453	const uint64_t uDouble = CopyDoubleToUint64(d);
				454	const int64_t nDoubleExponent = ((uDouble & DOUBLE_EXPONENT_MASK) >> DOUBLE_EXPONENT_SHIFT) - DOUBLE_EXPONENT_BIAS;
				455	const uint64_t uDoubleSignificand = uDouble & DOUBLE_SIGNIFICAND_MASK;
				456
				457	// Masks to check whether dropped significand bits are zero or not
				458	const uint64_t uDroppedDoubleBits = DOUBLE_SIGNIFICAND_MASK >> HALF_NUM_SIGNIFICAND_BITS;
				459	const uint64_t uDroppedSingleBits = DOUBLE_SIGNIFICAND_MASK >> SINGLE_NUM_SIGNIFICAND_BITS;
				460
				461	// The various cases
Laurence Lundblade	d711fb2	2018-09-26 14:35:22 -0700	[diff] [blame^]	462	if(d == 0.0) { // Take care of positive and negative zero
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	463	// Value is 0.0000, not a a subnormal
				464	result.uTag = IEEE754_UNION_IS_HALF;
				465	result.u16 = IEEE754_DoubleToHalf(d);
				466	} else if(nDoubleExponent == DOUBLE_EXPONENT_INF_OR_NAN) {
				467	// NaN, +/- infinity
				468	result.uTag = IEEE754_UNION_IS_HALF;
				469	result.u16 = IEEE754_DoubleToHalf(d);
				470	} else if(bAllowHalfPrecision && (nDoubleExponent >= HALF_EXPONENT_MIN) && nDoubleExponent <= HALF_EXPONENT_MAX && (!(uDoubleSignificand & uDroppedDoubleBits))) {
				471	// Can convert to half without precision loss
				472	result.uTag = IEEE754_UNION_IS_HALF;
				473	result.u16 = IEEE754_DoubleToHalf(d);
				474	} else if((nDoubleExponent >= SINGLE_EXPONENT_MIN) && nDoubleExponent <= SINGLE_EXPONENT_MAX && (!(uDoubleSignificand & uDroppedSingleBits))) {
				475	// Can convert to single without precision loss
				476	result.uTag = IEEE754_UNION_IS_SINGLE;
				477	result.u32 = CopyFloatToUint32((float)d);
				478	} else {
				479	// Can't convert without precision loss
				480	result.uTag = IEEE754_UNION_IS_DOUBLE;
				481	result.u64 = uDouble;
				482	}
				483
				484	return result;
				485	}
				486