src/ieee754.h - mirror/QCBOR - TrustedFirmware Git Browser

 /* ==========================================================================
  * ieee754.h -- Conversion between half, double & single-precision floats
  *
  * Copyright (c) 2018-2024, Laurence Lundblade. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * See BSD-3-Clause license in file named "LICENSE"
  *
  * Created on 7/23/18
  * ========================================================================== */

 #ifndef QCBOR_DISABLE_PREFERRED_FLOAT

 #ifndef ieee754_h
 #define ieee754_h

 #include <stdint.h>


 /** @file ieee754.h
  *
  * This implements floating-point conversion between half, single and
  * double precision floating-point numbers, in particular convesion to
  * smaller representation (e.g., double to single) that does not lose
  * precision for CBOR preferred serialization.
  *
  * This implementation works entirely with shifts and masks and does
  * not require any floating-point HW or library.
  *
  * This conforms to IEEE 754-2008, but note that it doesn't specify
  * conversions, just the encodings.
  *
  * This is complete, supporting +/- infinity, +/- zero, subnormals and
  * NaN payloads. NaN payloads are converted to smaller by dropping the
  * right most bits if they are zero and shifting to the right. If the
  * rightmost bits are not zero the conversion is not performed. When
  * converting from smaller to larger, the payload is shifted left and
  * zero-padded. This is what is specified by CBOR preferred
  * serialization and what modern HW conversion instructions do. CBOR
  * CDE handling for NaN is not clearly specified, but upcoming
  * documents may clarify this.
  *
  * There is no special handling of silent and quiet NaNs. It probably
  * isn't necessary to transmit these special NaNs as there purpose is
  * more for propgating errors up through some calculation. In many
  * cases the handlng of the NaN payload will work for silent and quiet
  * NaNs.
  *
  * A previous version of this was usable as a general library for
  * conversion. This version is reduced to what is needed for CBOR.
  */


 /**
  * @brief Convert half-precision float to double-precision float.
  *
  * @param[in] uHalfPrecision   Half-prevision number to convert.
  *
  * @returns double-presion value.
  *
  * This is a lossless conversion because every half-precision value
  * can be represented as a double. There is no error condition.
  *
  * There is no half-precision type in C, so it is represented here as
  * a @c uint16_t. The bits of @c uHalfPrecision are as described for
  * half-precision by IEEE 754.
  */
 double
 IEEE754_HalfToDouble(uint16_t uHalfPrecision);


 /** Holds a floating-point value that could be half, single or
  * double-precision.  The value is in a @c uint64_t that may be copied
  * to a float or double.  Simply casting uValue will usually work but
  * may generate compiler or static analyzer warnings. Using
  * UsefulBufUtil_CopyUint64ToDouble() or
  * UsefulBufUtil_CopyUint32ToFloat() will not (and will not generate
  * any extra code).
  */
 typedef struct {
    enum {IEEE754_UNION_IS_HALF   = 2,
          IEEE754_UNION_IS_SINGLE = 4,
          IEEE754_UNION_IS_DOUBLE = 8,
    } uSize; /* Size of uValue */
    uint64_t uValue;
 } IEEE754_union;


 /**
  * @brief Convert a double to either single or half-precision.
  *
  * @param[in] d                    The value to convert.
  * @param[in] bAllowHalfPrecision  If true, convert to either half or
  *                                 single precision.
  *
  * @returns Unconverted value, or value converted to single or half-precision.
  *
  * This always succeeds. If the value cannot be converted without the
  * loss of precision, it is not converted.
  *
  * This handles all subnormals and NaN payloads.
  */
 IEEE754_union
 IEEE754_DoubleToSmaller(double d, int bAllowHalfPrecision);


 /**
  * @brief Convert a single-precision float to half-precision.
  *
  * @param[in] f  The value to convert.
  *
  * @returns Either unconverted value or value converted to half-precision.
  *
  * This always succeeds. If the value cannot be converted without the
  * loss of precision, it is not converted.
  *
  * This handles all subnormals and NaN payloads.
  */
 IEEE754_union
 IEEE754_SingleToHalf(float f);


 #endif /* ieee754_h */

 #endif /* QCBOR_DISABLE_PREFERRED_FLOAT */
	/* ==========================================================================
	* ieee754.h -- Conversion between half, double & single-precision floats
	*
	* Copyright (c) 2018-2024, Laurence Lundblade. All rights reserved.
	*
	* SPDX-License-Identifier: BSD-3-Clause
	*
	* See BSD-3-Clause license in file named "LICENSE"
	*
	* Created on 7/23/18
	* ========================================================================== */

	#ifndef QCBOR_DISABLE_PREFERRED_FLOAT

	#ifndef ieee754_h
	#define ieee754_h

	#include <stdint.h>


	/** @file ieee754.h
	*
	* This implements floating-point conversion between half, single and
	* double precision floating-point numbers, in particular convesion to
	* smaller representation (e.g., double to single) that does not lose
	* precision for CBOR preferred serialization.
	*
	* This implementation works entirely with shifts and masks and does
	* not require any floating-point HW or library.
	*
	* This conforms to IEEE 754-2008, but note that it doesn't specify
	* conversions, just the encodings.
	*
	* This is complete, supporting +/- infinity, +/- zero, subnormals and
	* NaN payloads. NaN payloads are converted to smaller by dropping the
	* right most bits if they are zero and shifting to the right. If the
	* rightmost bits are not zero the conversion is not performed. When
	* converting from smaller to larger, the payload is shifted left and
	* zero-padded. This is what is specified by CBOR preferred
	* serialization and what modern HW conversion instructions do. CBOR
	* CDE handling for NaN is not clearly specified, but upcoming
	* documents may clarify this.
	*
	* There is no special handling of silent and quiet NaNs. It probably
	* isn't necessary to transmit these special NaNs as there purpose is
	* more for propgating errors up through some calculation. In many
	* cases the handlng of the NaN payload will work for silent and quiet
	* NaNs.
	*
	* A previous version of this was usable as a general library for
	* conversion. This version is reduced to what is needed for CBOR.
	*/


	/**
	* @brief Convert half-precision float to double-precision float.
	*
	* @param[in] uHalfPrecision Half-prevision number to convert.
	*
	* @returns double-presion value.
	*
	* This is a lossless conversion because every half-precision value
	* can be represented as a double. There is no error condition.
	*
	* There is no half-precision type in C, so it is represented here as
	* a @c uint16_t. The bits of @c uHalfPrecision are as described for
	* half-precision by IEEE 754.
	*/
	double
	IEEE754_HalfToDouble(uint16_t uHalfPrecision);


	/** Holds a floating-point value that could be half, single or
	* double-precision. The value is in a @c uint64_t that may be copied
	* to a float or double. Simply casting uValue will usually work but
	* may generate compiler or static analyzer warnings. Using
	* UsefulBufUtil_CopyUint64ToDouble() or
	* UsefulBufUtil_CopyUint32ToFloat() will not (and will not generate
	* any extra code).
	*/
	typedef struct {
	enum {IEEE754_UNION_IS_HALF = 2,
	IEEE754_UNION_IS_SINGLE = 4,
	IEEE754_UNION_IS_DOUBLE = 8,
	} uSize; /* Size of uValue */
	uint64_t uValue;
	} IEEE754_union;


	/**
	* @brief Convert a double to either single or half-precision.
	*
	* @param[in] d The value to convert.
	* @param[in] bAllowHalfPrecision If true, convert to either half or
	* single precision.
	*
	* @returns Unconverted value, or value converted to single or half-precision.
	*
	* This always succeeds. If the value cannot be converted without the
	* loss of precision, it is not converted.
	*
	* This handles all subnormals and NaN payloads.
	*/
	IEEE754_union
	IEEE754_DoubleToSmaller(double d, int bAllowHalfPrecision);


	/**
	* @brief Convert a single-precision float to half-precision.
	*
	* @param[in] f The value to convert.
	*
	* @returns Either unconverted value or value converted to half-precision.
	*
	* This always succeeds. If the value cannot be converted without the
	* loss of precision, it is not converted.
	*
	* This handles all subnormals and NaN payloads.
	*/
	IEEE754_union
	IEEE754_SingleToHalf(float f);


	#endif /* ieee754_h */

	#endif /* QCBOR_DISABLE_PREFERRED_FLOAT */