Blame - src/ieee754.c - mirror/QCBOR - TrustedFirmware Git Browser

2024-01-07 19:17:52 -0700

[diff] [blame]

1

/* ==========================================================================

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

2

* ieee754.c -- floating-point conversion for half, double & single-precision

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

*

*

* SPDX-License-Identifier: BSD-3-Clause

8

*

Laurence Lundblade

e8f5816

2024-08-22 10:30:08 -0700

[diff] [blame]

9

* See BSD-3-Clause license in file named "LICENSE"

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

10

*

11

* Created on 7/23/18

12

* ========================================================================== */

Laurence Lundblade

cc2ed34

2018-09-22 17:29:55 -0700

[diff] [blame]

13

Máté Tóth-Pál

ef5f07a

2021-09-17 19:31:37 +0200

[diff] [blame]

14

#include "qcbor/qcbor_common.h"

15

Laurence Lundblade

2018-09-19 11:25:27 -0700

[diff] [blame]

16

#include "ieee754.h"

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

17

#include <string.h> /* For memcpy() */

Laurence Lundblade

2018-09-19 11:25:27 -0700

[diff] [blame]

18

Laurence Lundblade

8db3d3e

2018-09-29 11:46:37 -0700

[diff] [blame]

19

Laurence Lundblade

2018-09-19 11:25:27 -0700

[diff] [blame]

20

/*

Laurence Lundblade

2024-11-09 23:01:11 -0800

[diff] [blame]

21

* This code has long lines and is easier to read because of

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

22

* them. Some coding guidelines prefer 80 column lines (can they not

23

* afford big displays?).

24

*

25

* This code works solely using shifts and masks and thus has no

26

* dependency on any math libraries. It can even work if the CPU

27

* doesn't have any floating-point support, though that isn't the most

28

* useful thing to do.

29

*

30

* The memcpy() dependency is only for CopyFloatToUint32() and friends

31

* which only is needed to avoid type punning when converting the

32

* actual float bits to an unsigned value so the bit shifts and masks

33

* can work.

34

*

35

* The references used to write this code:

36

*

37

* IEEE 754-2008, particularly section 3.6 and 6.2.1

38

*

39

* https://en.wikipedia.org/wiki/IEEE_754 and subordinate pages

40

*

41

* https://stackoverflow.com/questions/19800415/why-does-ieee-754-reserve-so-many-nan-values

42

*

43

* https://stackoverflow.com/questions/46073295/implicit-type-promotion-rules

44

*

45

* https://stackoverflow.com/questions/589575/what-does-the-c-standard-state-the-size-of-int-long-type-to-be

46

*

47

* IEEE754_FloatToDouble(uint32_t uFloat) was created but is not

48

* needed. It can be retrieved from github history if needed.

Laurence Lundblade

2018-09-19 11:25:27 -0700

[diff] [blame]

*/

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

52

53

54

/* ----- Half Precsion ----------- */

Laurence Lundblade

2018-09-19 11:25:27 -0700

[diff] [blame]

55

#define HALF_NUM_SIGNIFICAND_BITS (10)

56

#define HALF_NUM_EXPONENT_BITS (5)

57

#define HALF_NUM_SIGN_BITS (1)

58

59

#define HALF_SIGNIFICAND_SHIFT (0)

60

#define HALF_EXPONENT_SHIFT (HALF_NUM_SIGNIFICAND_BITS)

61

#define HALF_SIGN_SHIFT (HALF_NUM_SIGNIFICAND_BITS + HALF_NUM_EXPONENT_BITS)

62

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

63

#define HALF_SIGNIFICAND_MASK (0x3ffU) // The lower 10 bits

Laurence Lundblade

06350ea

2020-01-27 19:32:40 -0800

[diff] [blame]

64

#define HALF_EXPONENT_MASK (0x1fU << HALF_EXPONENT_SHIFT) // 0x7c00 5 bits of exponent

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

65

#define HALF_SIGN_MASK (0x01U << HALF_SIGN_SHIFT) // 0x8000 1 bit of sign

Laurence Lundblade

06350ea

2020-01-27 19:32:40 -0800

[diff] [blame]

66

#define HALF_QUIET_NAN_BIT (0x01U << (HALF_NUM_SIGNIFICAND_BITS-1)) // 0x0200

Laurence Lundblade

2018-09-19 11:25:27 -0700

[diff] [blame]

67

68

/* Biased Biased Unbiased Use

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

69

* 0x00 0 -15 0 and subnormal

70

* 0x01 1 -14 Smallest normal exponent

71

* 0x1e 30 15 Largest normal exponent

72

* 0x1F 31 16 NaN and Infinity */

Laurence Lundblade

2018-09-19 11:25:27 -0700

[diff] [blame]

73

#define HALF_EXPONENT_BIAS (15)

74

#define HALF_EXPONENT_MAX (HALF_EXPONENT_BIAS) // 15 Unbiased

75

#define HALF_EXPONENT_MIN (-HALF_EXPONENT_BIAS+1) // -14 Unbiased

76

#define HALF_EXPONENT_ZERO (-HALF_EXPONENT_BIAS) // -15 Unbiased

77

#define HALF_EXPONENT_INF_OR_NAN (HALF_EXPONENT_BIAS+1) // 16 Unbiased

78

79

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

80

/* ------ Single-Precision -------- */

Laurence Lundblade

2018-09-19 11:25:27 -0700

[diff] [blame]

81

#define SINGLE_NUM_SIGNIFICAND_BITS (23)

82

#define SINGLE_NUM_EXPONENT_BITS (8)

83

#define SINGLE_NUM_SIGN_BITS (1)

84

85

#define SINGLE_SIGNIFICAND_SHIFT (0)

86

#define SINGLE_EXPONENT_SHIFT (SINGLE_NUM_SIGNIFICAND_BITS)

87

#define SINGLE_SIGN_SHIFT (SINGLE_NUM_SIGNIFICAND_BITS + SINGLE_NUM_EXPONENT_BITS)

88

Laurence Lundblade

06350ea

2020-01-27 19:32:40 -0800

[diff] [blame]

89

#define SINGLE_SIGNIFICAND_MASK (0x7fffffU) // The lower 23 bits

90

#define SINGLE_EXPONENT_MASK (0xffU << SINGLE_EXPONENT_SHIFT) // 8 bits of exponent

91

#define SINGLE_SIGN_MASK (0x01U << SINGLE_SIGN_SHIFT) // 1 bit of sign

92

#define SINGLE_QUIET_NAN_BIT (0x01U << (SINGLE_NUM_SIGNIFICAND_BITS-1))

Laurence Lundblade

2018-09-19 11:25:27 -0700

[diff] [blame]

93

94

/* Biased Biased Unbiased Use

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

95

* 0x0000 0 -127 0 and subnormal

96

* 0x0001 1 -126 Smallest normal exponent

97

* 0x7f 127 0 1

98

* 0xfe 254 127 Largest normal exponent

99

* 0xff 255 128 NaN and Infinity */

Laurence Lundblade

2018-09-19 11:25:27 -0700

[diff] [blame]

100

#define SINGLE_EXPONENT_BIAS (127)

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

101

#define SINGLE_EXPONENT_MAX (SINGLE_EXPONENT_BIAS)

102

#define SINGLE_EXPONENT_MIN (-SINGLE_EXPONENT_BIAS+1)

103

#define SINGLE_EXPONENT_ZERO (-SINGLE_EXPONENT_BIAS)

104

#define SINGLE_EXPONENT_INF_OR_NAN (SINGLE_EXPONENT_BIAS+1)

Laurence Lundblade

2018-09-19 11:25:27 -0700

[diff] [blame]

105

106

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

107

/* --------- Double-Precision ---------- */

Laurence Lundblade

2018-09-19 11:25:27 -0700

[diff] [blame]

108

#define DOUBLE_NUM_SIGNIFICAND_BITS (52)

109

#define DOUBLE_NUM_EXPONENT_BITS (11)

110

#define DOUBLE_NUM_SIGN_BITS (1)

111

112

#define DOUBLE_SIGNIFICAND_SHIFT (0)

113

#define DOUBLE_EXPONENT_SHIFT (DOUBLE_NUM_SIGNIFICAND_BITS)

114

#define DOUBLE_SIGN_SHIFT (DOUBLE_NUM_SIGNIFICAND_BITS + DOUBLE_NUM_EXPONENT_BITS)

115

Laurence Lundblade

8db3d3e

2018-09-29 11:46:37 -0700

[diff] [blame]

116

#define DOUBLE_SIGNIFICAND_MASK (0xfffffffffffffULL) // The lower 52 bits

117

#define DOUBLE_EXPONENT_MASK (0x7ffULL << DOUBLE_EXPONENT_SHIFT) // 11 bits of exponent

118

#define DOUBLE_SIGN_MASK (0x01ULL << DOUBLE_SIGN_SHIFT) // 1 bit of sign

119

#define DOUBLE_QUIET_NAN_BIT (0x01ULL << (DOUBLE_NUM_SIGNIFICAND_BITS-1))

120

Laurence Lundblade

2018-09-19 11:25:27 -0700

[diff] [blame]

121

122

/* Biased Biased Unbiased Use

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

123

* 0x00000000 0 -1023 0 and subnormal

124

* 0x00000001 1 -1022 Smallest normal exponent

125

* 0x000007fe 2046 1023 Largest normal exponent

126

* 0x000007ff 2047 1024 NaN and Infinity */

Laurence Lundblade

2018-09-19 11:25:27 -0700

[diff] [blame]

127

#define DOUBLE_EXPONENT_BIAS (1023)

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

128

#define DOUBLE_EXPONENT_MAX (DOUBLE_EXPONENT_BIAS)

129

#define DOUBLE_EXPONENT_MIN (-DOUBLE_EXPONENT_BIAS+1)

130

#define DOUBLE_EXPONENT_ZERO (-DOUBLE_EXPONENT_BIAS)

131

#define DOUBLE_EXPONENT_INF_OR_NAN (DOUBLE_EXPONENT_BIAS+1)

132

Laurence Lundblade

2018-09-19 11:25:27 -0700

[diff] [blame]

/*

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

137

* Convenient functions to avoid type punning, compiler warnings and

138

* such. The optimizer reduces them to a simple assignment. This is a

139

* crusty corner of C. It shouldn't be this hard.

140

*

141

* These are also in UsefulBuf.h under a different name. They are copied

142

* here to avoid a dependency on UsefulBuf.h. There is no object code

143

* size impact because these always optimze down to a simple assignment.

Laurence Lundblade

2018-09-19 11:25:27 -0700

[diff] [blame]

144

*/

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

145

static inline uint32_t

146

CopyFloatToUint32(float f)

Laurence Lundblade

2018-09-19 11:25:27 -0700

[diff] [blame]

147

{

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

148

uint32_t u32;

149

memcpy(&u32, &f, sizeof(uint32_t));

150

return u32;

Laurence Lundblade

2018-09-19 11:25:27 -0700

[diff] [blame]

151

}

152

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

153

static inline uint64_t

154

CopyDoubleToUint64(double d)

Laurence Lundblade

2018-09-19 11:25:27 -0700

[diff] [blame]

155

{

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

156

uint64_t u64;

157

memcpy(&u64, &d, sizeof(uint64_t));

158

return u64;

Laurence Lundblade

2018-09-19 11:25:27 -0700

[diff] [blame]

159

}

160

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

161

162

#ifndef QCBOR_DISABLE_PREFERRED_FLOAT

163

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

164

static inline double

165

CopyUint64ToDouble(uint64_t u64)

Laurence Lundblade

2018-11-02 21:44:06 +0700

[diff] [blame]

166

{

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

167

double d;

168

memcpy(&d, &u64, sizeof(uint64_t));

return d;

}

static inline float

CopyUint32ToSingle(uint32_t u32)

174

{

175

float f;

176

memcpy(&f, &u32, sizeof(uint32_t));

177

return f;

Laurence Lundblade

2018-11-02 21:44:06 +0700

[diff] [blame]

178

}

Laurence Lundblade

2018-09-19 11:25:27 -0700

[diff] [blame]

179

180

Laurence Lundblade

2018-12-17 16:17:45 -0800

[diff] [blame]

181

182

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

183

/**

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

184

* @brief Assemble sign, significand and exponent into double precision float.

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

185

*

Laurence Lundblade

2024-11-09 23:01:11 -0800

[diff] [blame]

186

* @param[in] nIsNegative 0 if positive, 1 if negative.

187

* @pararm[in] uDoubleSignificand Bits of the significand.

188

* @param[in] nDoubleUnBiasedExponent Exponent.

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

189

*

190

* This returns the bits for a single-precision float, a binary64

191

* as specified in IEEE754.

Laurence Lundblade

fe09bbf

2020-07-16 12:14:51 -0700

[diff] [blame]

192

*/

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

193

static double

Laurence Lundblade

2024-11-09 23:01:11 -0800

[diff] [blame]

194

IEEE754_AssembleDouble(int nIsNegative,

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

195

uint64_t uDoubleSignificand,

Laurence Lundblade

2024-11-09 23:01:11 -0800

[diff] [blame]

196

int nDoubleUnBiasedExponent)

Laurence Lundblade

2018-11-02 21:44:06 +0700

[diff] [blame]

197

{

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

198

uint64_t uDoubleBiasedExponent;

199

200

uDoubleBiasedExponent = (uint64_t)(nDoubleUnBiasedExponent + DOUBLE_EXPONENT_BIAS);

201

202

return CopyUint64ToDouble(uDoubleSignificand |

203

(uDoubleBiasedExponent << DOUBLE_EXPONENT_SHIFT) |

Laurence Lundblade

2024-11-09 23:01:11 -0800

[diff] [blame]

204

((uint64_t)nIsNegative << DOUBLE_SIGN_SHIFT));

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

205

}

Laurence Lundblade

2018-12-17 16:17:45 -0800

[diff] [blame]

206

207

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

208

/* Public function; see ieee754.h */

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

209

double

210

IEEE754_HalfToDouble(uint16_t uHalfPrecision)

211

{

212

uint64_t uDoubleSignificand;

Laurence Lundblade

2024-11-09 23:01:11 -0800

[diff] [blame]

213

int nDoubleUnBiasedExponent;

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

214

double dResult;

215

216

/* Pull out the three parts of the half-precision float. Do all

217

* the work in 64 bits because that is what the end result is. It

218

* may give smaller code size and will keep static analyzers

219

* happier.

220

*/

221

const uint64_t uHalfSignificand = uHalfPrecision & HALF_SIGNIFICAND_MASK;

222

const uint64_t uHalfBiasedExponent = (uHalfPrecision & HALF_EXPONENT_MASK) >> HALF_EXPONENT_SHIFT;

Laurence Lundblade

2024-11-09 23:01:11 -0800

[diff] [blame]

223

const int nHalfUnBiasedExponent = (int)uHalfBiasedExponent - HALF_EXPONENT_BIAS;

224

const int nIsNegative = (uHalfPrecision & HALF_SIGN_MASK) >> HALF_SIGN_SHIFT;

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

225

226

if(nHalfUnBiasedExponent == HALF_EXPONENT_ZERO) {

227

/* 0 or subnormal */

228

if(uHalfSignificand) {

229

/* --- SUBNORMAL --- */

230

/* A half-precision subnormal can always be converted to a

231

* normal double-precision float because the ranges line up.

232

* The exponent of a subnormal starts out at the min exponent

233

* for a normal. As the sub normal significand bits are

234

* shifted, left to normalize, the exponent is

235

* decremented. Shifting continues until fully normalized.

236

*/

237

nDoubleUnBiasedExponent = HALF_EXPONENT_MIN;

238

uDoubleSignificand = uHalfSignificand;

239

do {

240

uDoubleSignificand <<= 1;

241

nDoubleUnBiasedExponent--;

242

} while ((uDoubleSignificand & (1ULL << HALF_NUM_SIGNIFICAND_BITS)) == 0);

243

/* A normal has an implied 1 in the most significant

244

* position that a subnormal doesn't. */

245

uDoubleSignificand -= 1ULL << HALF_NUM_SIGNIFICAND_BITS;

246

/* Must shift into place for a double significand */

247

uDoubleSignificand <<= DOUBLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS;

248

Laurence Lundblade

2024-11-09 23:01:11 -0800

[diff] [blame]

249

dResult = IEEE754_AssembleDouble(nIsNegative,

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

250

uDoubleSignificand,

251

nDoubleUnBiasedExponent);

252

} else {

253

/* --- ZERO --- */

Laurence Lundblade

2024-11-09 23:01:11 -0800

[diff] [blame]

254

dResult = IEEE754_AssembleDouble(nIsNegative,

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

255

0,

256

DOUBLE_EXPONENT_ZERO);

257

}

258

} else if(nHalfUnBiasedExponent == HALF_EXPONENT_INF_OR_NAN) {

259

/* NaN or Inifinity */

260

if(uHalfSignificand) {

261

/* --- NaN --- */

262

/* Half-precision payloads always fit into double precision

263

* payloads. They are shifted left the same as a normal

264

* number significand.

265

*/

266

uDoubleSignificand = uHalfSignificand << (DOUBLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);

Laurence Lundblade

2024-11-09 23:01:11 -0800

[diff] [blame]

267

dResult = IEEE754_AssembleDouble(nIsNegative,

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

268

uDoubleSignificand,

269

DOUBLE_EXPONENT_INF_OR_NAN);

270

} else {

271

/* --- INFINITY --- */

Laurence Lundblade

2024-11-09 23:01:11 -0800

[diff] [blame]

272

dResult = IEEE754_AssembleDouble(nIsNegative,

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

273

0,

274

DOUBLE_EXPONENT_INF_OR_NAN);

275

}

276

} else {

277

/* --- NORMAL NUMBER --- */

278

uDoubleSignificand = uHalfSignificand << (DOUBLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);

Laurence Lundblade

2024-11-09 23:01:11 -0800

[diff] [blame]

279

dResult = IEEE754_AssembleDouble(nIsNegative,

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

280

uDoubleSignificand,

281

nHalfUnBiasedExponent);

}

return dResult;

}

/**

* @brief Assemble sign, significand and exponent into single precision float.

290

*

Laurence Lundblade

2024-11-09 23:01:11 -0800

[diff] [blame]

291

* @param[in] nIsNegative 0 if positive, 1 if negative

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

292

* @pararm[in] uHalfSignificand Bits of the significand

293

* @param[in] nHalfUnBiasedExponent Exponent

294

*

295

* This returns the bits for a single-precision float, a binary32 as

296

* specified in IEEE754. It is returned as a uint64_t rather than a

297

* uint32_t or a float for convenience of usage.

298

*/

299

static uint32_t

Laurence Lundblade

2024-11-09 23:01:11 -0800

[diff] [blame]

300

IEEE754_AssembleHalf(int nIsNegative,

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

301

uint32_t uHalfSignificand,

Laurence Lundblade

2024-11-09 23:01:11 -0800

[diff] [blame]

302

int nHalfUnBiasedExponent)

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

303

{

304

uint32_t uHalfUnbiasedExponent;

305

306

uHalfUnbiasedExponent = (uint32_t)(nHalfUnBiasedExponent + HALF_EXPONENT_BIAS);

307

308

return uHalfSignificand |

309

(uHalfUnbiasedExponent << HALF_EXPONENT_SHIFT) |

Laurence Lundblade

2024-11-09 23:01:11 -0800

[diff] [blame]

310

((uint32_t)nIsNegative << HALF_SIGN_SHIFT);

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

}

/* Public function; see ieee754.h */

315

IEEE754_union

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

316

IEEE754_SingleToHalf(const float f, const int bNoNaNPayload)

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

317

{

318

IEEE754_union result;

319

uint32_t uDroppedBits;

Laurence Lundblade

2024-11-09 23:01:11 -0800

[diff] [blame]

320

int nExponentDifference;

321

int nShiftAmount;

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

322

uint32_t uHalfSignificand;

323

324

/* Pull the three parts out of the double-precision float Most work

325

* is done with uint32_t which helps avoid integer promotions and

326

* static analyzer complaints.

327

*/

328

const uint32_t uSingle = CopyFloatToUint32(f);

329

const uint32_t uSingleBiasedExponent = (uSingle & SINGLE_EXPONENT_MASK) >> SINGLE_EXPONENT_SHIFT;

Laurence Lundblade

2024-11-09 23:01:11 -0800

[diff] [blame]

330

const int nSingleUnbiasedExponent = (int32_t)uSingleBiasedExponent - SINGLE_EXPONENT_BIAS;

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

331

const uint32_t uSingleSignificand = uSingle & SINGLE_SIGNIFICAND_MASK;

Laurence Lundblade

2024-11-09 23:01:11 -0800

[diff] [blame]

332

const int nIsNegative = (uSingle & SINGLE_SIGN_MASK) >> SINGLE_SIGN_SHIFT;

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

333

334

if(nSingleUnbiasedExponent == SINGLE_EXPONENT_ZERO) {

335

if(uSingleSignificand == 0) {

336

/* --- IS ZERO --- */

337

result.uSize = IEEE754_UNION_IS_HALF;

Laurence Lundblade

2024-11-09 23:01:11 -0800

[diff] [blame]

338

result.uValue = IEEE754_AssembleHalf(nIsNegative,

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

0,

HALF_EXPONENT_ZERO);

} else {

/* --- IS SINGLE SUBNORMAL --- */

343

/* The largest single subnormal is slightly less than the

344

* largest single normal which is 2^-149 or

345

* 2.2040517676619426e-38. The smallest half subnormal is

346

* 2^-14 or 5.9604644775390625E-8. There is no overlap so

347

* single subnormals can't be converted to halfs of any sort.

348

*/

349

result.uSize = IEEE754_UNION_IS_SINGLE;

350

result.uValue = uSingle;

351

}

352

} else if(nSingleUnbiasedExponent == SINGLE_EXPONENT_INF_OR_NAN) {

353

if(uSingleSignificand == 0) {

354

/* ---- IS INFINITY ---- */

355

result.uSize = IEEE754_UNION_IS_HALF;

Laurence Lundblade

2024-11-09 23:01:11 -0800

[diff] [blame]

356

result.uValue = IEEE754_AssembleHalf(nIsNegative, 0, HALF_EXPONENT_INF_OR_NAN);

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

357

} else {

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

358

if(bNoNaNPayload) {

359

/* --- REQUIRE CANNONICAL NAN --- */

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

360

result.uSize = IEEE754_UNION_IS_HALF;

Laurence Lundblade

2024-11-09 23:01:11 -0800

[diff] [blame]

361

result.uValue = IEEE754_AssembleHalf(nIsNegative,

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

362

HALF_QUIET_NAN_BIT,

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

363

HALF_EXPONENT_INF_OR_NAN);

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

364

} else {

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

365

/* The NaN can only be converted if no payload bits are lost

366

* per RFC 8949 section 4.1 that defines Preferred

367

* Serializaton. Note that Deterministically Encode CBOR in

368

* section 4.2 allows for some variation of this rule, but at

369

* the moment this implementation is of Preferred

370

* Serialization, not CDE. As of December 2023, we are also

371

* expecting an update to CDE. This code may need to be

372

* updated for CDE.

373

*/

374

uDroppedBits = uSingleSignificand & (SINGLE_SIGNIFICAND_MASK >> HALF_NUM_SIGNIFICAND_BITS);

375

if(uDroppedBits == 0) {

376

/* --- IS CONVERTABLE NAN --- */

377

uHalfSignificand = uSingleSignificand >> (SINGLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);

378

result.uSize = IEEE754_UNION_IS_HALF;

Laurence Lundblade

2024-11-09 23:01:11 -0800

[diff] [blame]

379

result.uValue = IEEE754_AssembleHalf(nIsNegative,

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

380

uHalfSignificand,

381

HALF_EXPONENT_INF_OR_NAN);

382

383

} else {

384

/* --- IS UNCONVERTABLE NAN --- */

385

result.uSize = IEEE754_UNION_IS_SINGLE;

386

result.uValue = uSingle;

387

}

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

}

}

} else {

/* ---- REGULAR NUMBER ---- */

392

/* A regular single can be converted to a regular half if the

393

* single's exponent is in the smaller range of a half and if no

394

* precision is lost in the significand.

395

*/

396

if(nSingleUnbiasedExponent >= HALF_EXPONENT_MIN &&

397

nSingleUnbiasedExponent <= HALF_EXPONENT_MAX &&

398

(uSingleSignificand & (SINGLE_SIGNIFICAND_MASK >> HALF_NUM_SIGNIFICAND_BITS)) == 0) {

399

uHalfSignificand = uSingleSignificand >> (SINGLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);

400

401

/* --- CONVERT TO HALF NORMAL --- */

402

result.uSize = IEEE754_UNION_IS_HALF;

Laurence Lundblade

2024-11-09 23:01:11 -0800

[diff] [blame]

403

result.uValue = IEEE754_AssembleHalf(nIsNegative,

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

404

uHalfSignificand,

405

nSingleUnbiasedExponent);

406

} else {

407

/* Unable to convert to a half normal. See if it can be

408

* converted to a half subnormal. To do that, the exponent

409

* must be in range and no precision can be lost in the

410

* signficand.

411

*

412

* This is more complicated because the number is not

413

* normalized. The signficand must be shifted proprotionally

414

* to the exponent and 1 must be added in. See

415

* https://en.wikipedia.org/wiki/Single-precision_floating-point_format#Exponent_encoding

416

*

417

* Exponents -14 to -24 map to a shift of 0 to 10 of the

418

* significand. The largest value of a half subnormal has an

419

* exponent of -14. Subnormals are not normalized like

420

* normals meaning they lose precision as the numbers get

421

* smaller. Normals don't lose precision because the exponent

422

* allows all the bits of the significand to be significant.

423

*/

424

/* The exponent of the largest possible half-precision

425

* subnormal is HALF_EXPONENT_MIN (-14). Exponents larger

426

* than this are normal and handled above. We're going to

427

* shift the significand right by at least this amount.

428

*/

429

nExponentDifference = -(nSingleUnbiasedExponent - HALF_EXPONENT_MIN);

430

431

/* In addition to the shift based on the exponent's value,

432

* the single significand has to be shifted right to fit into

433

* a half-precision significand */

434

nShiftAmount = nExponentDifference + (SINGLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);

435

436

/* Must add 1 in to the possible significand because there is

437

* an implied 1 for normal values and not for subnormal

438

* values. See equations here:

439

* https://en.wikipedia.org/wiki/Single-precision_floating-point_format#Exponent_encoding

440

*/

441

uHalfSignificand = (uSingleSignificand + (1 << SINGLE_NUM_SIGNIFICAND_BITS)) >> nShiftAmount;

442

443

/* If only zero bits get shifted out, this can be converted

444

* to subnormal */

445

if(nSingleUnbiasedExponent < HALF_EXPONENT_MIN &&

446

nSingleUnbiasedExponent >= HALF_EXPONENT_MIN - HALF_NUM_SIGNIFICAND_BITS &&

447

uHalfSignificand << nShiftAmount == uSingleSignificand + (1 << SINGLE_NUM_SIGNIFICAND_BITS)) {

448

/* --- CONVERTABLE TO HALF SUBNORMAL --- */

449

result.uSize = IEEE754_UNION_IS_HALF;

Laurence Lundblade

2024-11-09 23:01:11 -0800

[diff] [blame]

450

result.uValue = IEEE754_AssembleHalf(nIsNegative,

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

uHalfSignificand,

HALF_EXPONENT_ZERO);

} else {

/* --- DO NOT CONVERT --- */

455

result.uSize = IEEE754_UNION_IS_SINGLE;

456

result.uValue = uSingle;

}

}

}

return result;

}

/**

* @brief Assemble sign, significand and exponent into single precision float.

467

*

Laurence Lundblade

2024-11-09 23:01:11 -0800

[diff] [blame]

468

* @param[in] nIsNegative 0 if positive, 1 if negative.

469

* @pararm[in] uSingleSignificand Bits of the significand.

470

* @param[in] nSingleUnBiasedExponent Exponent.

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

471

*

472

* This returns the bits for a single-precision float, a binary32 as

473

* specified in IEEE754. It is returned as a uint64_t rather than a

474

* uint32_t or a float for convenience of usage.

475

*/

476

static uint64_t

Laurence Lundblade

2024-11-09 23:01:11 -0800

[diff] [blame]

477

IEEE754_AssembleSingle(int nIsNegative,

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

478

uint64_t uSingleSignificand,

Laurence Lundblade

2024-11-09 23:01:11 -0800

[diff] [blame]

479

int nSingleUnBiasedExponent)

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

480

{

481

uint64_t uSingleBiasedExponent;

482

483

uSingleBiasedExponent = (uint64_t)(nSingleUnBiasedExponent + SINGLE_EXPONENT_BIAS);

484

485

return uSingleSignificand |

486

(uSingleBiasedExponent << SINGLE_EXPONENT_SHIFT) |

Laurence Lundblade

2024-11-09 23:01:11 -0800

[diff] [blame]

487

((uint64_t)nIsNegative << SINGLE_SIGN_SHIFT);

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

}

/**

* @brief Convert a double-precision float to single-precision.

493

*

494

* @param[in] d The value to convert.

495

*

496

* @returns Either unconverted value or value converted to single-precision.

497

*

498

* This always succeeds. If the value cannot be converted without the

499

* loss of precision, it is not converted.

500

*

501

* This handles all subnormals and NaN payloads.

502

*/

503

static IEEE754_union

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

504

IEEE754_DoubleToSingle(const double d)

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

505

{

506

IEEE754_union Result;

Laurence Lundblade

2024-11-09 23:01:11 -0800

[diff] [blame]

507

int nExponentDifference;

508

int nShiftAmount;

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

509

uint64_t uSingleSignificand;

510

uint64_t uDroppedBits;

511

512

513

/* Pull the three parts out of the double-precision float. Most

514

* work is done with uint64_t which helps avoid integer promotions

515

* and static analyzer complaints.

516

*/

517

const uint64_t uDouble = CopyDoubleToUint64(d);

518

const uint64_t uDoubleBiasedExponent = (uDouble & DOUBLE_EXPONENT_MASK) >> DOUBLE_EXPONENT_SHIFT;

Laurence Lundblade

2024-11-09 23:01:11 -0800

[diff] [blame]

519

const int nDoubleUnbiasedExponent = (int)uDoubleBiasedExponent - DOUBLE_EXPONENT_BIAS;

520

const int nIsNegative = (uDouble & DOUBLE_SIGN_MASK) >> DOUBLE_SIGN_SHIFT;

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

521

const uint64_t uDoubleSignificand = uDouble & DOUBLE_SIGNIFICAND_MASK;

522

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

523

if(nDoubleUnbiasedExponent == DOUBLE_EXPONENT_ZERO) {

524

if(uDoubleSignificand == 0) {

525

/* --- IS ZERO --- */

526

Result.uSize = IEEE754_UNION_IS_SINGLE;

Laurence Lundblade

2024-11-09 23:01:11 -0800

[diff] [blame]

527

Result.uValue = IEEE754_AssembleSingle(nIsNegative,

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

528

0,

529

SINGLE_EXPONENT_ZERO);

Laurence Lundblade

2018-11-02 21:44:06 +0700

[diff] [blame]

530

} else {

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

531

/* --- IS DOUBLE SUBNORMAL --- */

532

/* The largest double subnormal is slightly less than the

533

* largest double normal which is 2^-1022 or

534

* 2.2250738585072014e-308. The smallest single subnormal

535

* is 2^-149 or 1.401298464324817e-45. There is no

536

* overlap so double subnormals can't be converted to

537

* singles of any sort.

538

*/

539

Result.uSize = IEEE754_UNION_IS_DOUBLE;

540

Result.uValue = uDouble;

541

}

542

} else if(nDoubleUnbiasedExponent == DOUBLE_EXPONENT_INF_OR_NAN) {

543

if(uDoubleSignificand == 0) {

544

/* ---- IS INFINITY ---- */

545

Result.uSize = IEEE754_UNION_IS_SINGLE;

Laurence Lundblade

2024-11-09 23:01:11 -0800

[diff] [blame]

546

Result.uValue = IEEE754_AssembleSingle(nIsNegative,

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

547

0,

548

SINGLE_EXPONENT_INF_OR_NAN);

549

} else {

550

/* The NaN can only be converted if no payload bits are

551

* lost per RFC 8949 section 4.1 that defines Preferred

552

* Serializaton. Note that Deterministically Encode CBOR

553

* in section 4.2 allows for some variation of this rule,

554

* but at the moment this implementation is of Preferred

555

* Serialization, not CDE. As of December 2023, we are

556

* also expecting an update to CDE. This code may need to

557

* be updated for CDE.

558

*/

559

uDroppedBits = uDoubleSignificand & (DOUBLE_SIGNIFICAND_MASK >> SINGLE_NUM_SIGNIFICAND_BITS);

560

if(uDroppedBits == 0) {

561

/* --- IS CONVERTABLE NAN --- */

562

uSingleSignificand = uDoubleSignificand >> (DOUBLE_NUM_SIGNIFICAND_BITS - SINGLE_NUM_SIGNIFICAND_BITS);

563

Result.uSize = IEEE754_UNION_IS_SINGLE;

Laurence Lundblade

2024-11-09 23:01:11 -0800

[diff] [blame]

564

Result.uValue = IEEE754_AssembleSingle(nIsNegative,

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

565

uSingleSignificand,

566

SINGLE_EXPONENT_INF_OR_NAN);

567

} else {

568

/* --- IS UNCONVERTABLE NAN --- */

569

Result.uSize = IEEE754_UNION_IS_DOUBLE;

570

Result.uValue = uDouble;

Laurence Lundblade

2018-11-02 21:44:06 +0700

[diff] [blame]

571

}

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

572

}

573

} else {

574

/* ---- REGULAR NUMBER ---- */

575

/* A regular double can be converted to a regular single if

576

* the double's exponent is in the smaller range of a single

577

* and if no precision is lost in the significand.

578

*/

579

uDroppedBits = uDoubleSignificand & (DOUBLE_SIGNIFICAND_MASK >> SINGLE_NUM_SIGNIFICAND_BITS);

580

if(nDoubleUnbiasedExponent >= SINGLE_EXPONENT_MIN &&

581

nDoubleUnbiasedExponent <= SINGLE_EXPONENT_MAX &&

582

uDroppedBits == 0) {

583

/* --- IS CONVERTABLE TO SINGLE --- */

584

uSingleSignificand = uDoubleSignificand >> (DOUBLE_NUM_SIGNIFICAND_BITS - SINGLE_NUM_SIGNIFICAND_BITS);

585

Result.uSize = IEEE754_UNION_IS_SINGLE;

Laurence Lundblade

2024-11-09 23:01:11 -0800

[diff] [blame]

586

Result.uValue = IEEE754_AssembleSingle(nIsNegative,

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

587

uSingleSignificand,

588

nDoubleUnbiasedExponent);

Laurence Lundblade

2018-11-02 21:44:06 +0700

[diff] [blame]

589

} else {

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

590

/* Unable to convert to a single normal. See if it can be

591

* converted to a single subnormal. To do that, the

592

* exponent must be in range and no precision can be lost

593

* in the signficand.

594

*

595

* This is more complicated because the number is not

596

* normalized. The signficand must be shifted

597

* proprotionally to the exponent and 1 must be added

598

* in. See

599

* https://en.wikipedia.org/wiki/Single-precision_floating-point_format#Exponent_encoding

600

*/

601

nExponentDifference = -(nDoubleUnbiasedExponent - SINGLE_EXPONENT_MIN);

602

nShiftAmount = nExponentDifference + (DOUBLE_NUM_SIGNIFICAND_BITS - SINGLE_NUM_SIGNIFICAND_BITS);

603

uSingleSignificand = (uDoubleSignificand + (1ULL << DOUBLE_NUM_SIGNIFICAND_BITS)) >> nShiftAmount;

604

605

if(nDoubleUnbiasedExponent < SINGLE_EXPONENT_MIN &&

606

nDoubleUnbiasedExponent >= SINGLE_EXPONENT_MIN - SINGLE_NUM_SIGNIFICAND_BITS &&

607

uSingleSignificand << nShiftAmount == uDoubleSignificand + (1ULL << DOUBLE_NUM_SIGNIFICAND_BITS)) {

608

/* --- IS CONVERTABLE TO SINGLE SUBNORMAL --- */

609

Result.uSize = IEEE754_UNION_IS_SINGLE;

Laurence Lundblade

2024-11-09 23:01:11 -0800

[diff] [blame]

610

Result.uValue = IEEE754_AssembleSingle(nIsNegative,

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

611

uSingleSignificand,

612

SINGLE_EXPONENT_ZERO);

613

} else {

614

/* --- CAN NOT BE CONVERTED --- */

615

Result.uSize = IEEE754_UNION_IS_DOUBLE;

616

Result.uValue = uDouble;

617

}

Laurence Lundblade

2018-11-02 21:44:06 +0700

[diff] [blame]

618

}

Laurence Lundblade

2018-11-02 21:44:06 +0700

[diff] [blame]

619

}

Laurence Lundblade

2018-12-17 16:17:45 -0800

[diff] [blame]

620

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

621

return Result;

Laurence Lundblade

2018-11-02 21:44:06 +0700

[diff] [blame]

622

}

Laurence Lundblade

2018-09-19 11:25:27 -0700

[diff] [blame]

623

624

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

625

/* Public function; see ieee754.h */

626

IEEE754_union

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

627

IEEE754_DoubleToSmaller(const double d,

628

const int bAllowHalfPrecision,

629

const int bNoNanPayload)

Laurence Lundblade

2018-09-19 11:25:27 -0700

[diff] [blame]

630

{

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

631

IEEE754_union result;

Laurence Lundblade

2018-12-17 16:17:45 -0800

[diff] [blame]

632

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

633

result = IEEE754_DoubleToSingle(d);

Laurence Lundblade

2018-12-17 16:17:45 -0800

[diff] [blame]

634

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

635

if(result.uSize == IEEE754_UNION_IS_SINGLE && bAllowHalfPrecision) {

636

/* Cast to uint32_t is OK, because value was just successfully

637

* converted to single. */

638

float uSingle = CopyUint32ToSingle((uint32_t)result.uValue);

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

639

result = IEEE754_SingleToHalf(uSingle, bNoNanPayload);

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

640

}

Laurence Lundblade

2018-09-19 11:25:27 -0700

[diff] [blame]

641

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

642

return result;

Laurence Lundblade

2018-09-19 11:25:27 -0700

[diff] [blame]

643

}

644

Laurence Lundblade

2018-12-17 16:17:45 -0800

[diff] [blame]

645

Laurence Lundblade

2024-03-23 22:37:37 -0700

[diff] [blame]

646

Laurence Lundblade

2024-11-09 23:01:11 -0800

[diff] [blame]

647

648

/**

649

* @brief Count the bits of preceision in a significand.

650

*

651

* @param[in] uSignificand The significand as uint64_t.

652

*

653

* @return The number of bits set.

654

655

* This returns 64 minus the number of zero bits on the right. It is

Laurence Lundblade

2024-03-23 22:37:37 -0700

[diff] [blame]

656

* is the amount of precision in the 64-bit significand passed in.

657

* When used for 52 and 23-bit significands, subtract 12 and 41

658

* to get their precision.

659

*

660

* The value returned is for a *normalized* number like the

661

* significand of a double. When used for precision for a non-normalized

662

* number like a uint64_t, further computation is required.

663

*

Laurence Lundblade

2024-11-09 23:01:11 -0800

[diff] [blame]

664

* If the significand is 0, then 0 is returned as the precision.

665

*/

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

666

static int

Laurence Lundblade

2024-11-09 23:01:11 -0800

[diff] [blame]

667

IEEE754_Private_CountPrecisionBits(uint64_t uSignificand)

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

668

{

669

int nNonZeroBitsCount;

670

uint64_t uMask;

Laurence Lundblade

2018-12-17 16:17:45 -0800

[diff] [blame]

671

Laurence Lundblade

2024-03-23 22:37:37 -0700

[diff] [blame]

672

for(nNonZeroBitsCount = 64; nNonZeroBitsCount > 0; nNonZeroBitsCount--) {

673

uMask = 0x01UL << (64 - nNonZeroBitsCount);

Laurence Lundblade

2024-11-09 23:01:11 -0800

[diff] [blame]

674

if(uMask & uSignificand) {

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

675

break;

676

}

677

}

Laurence Lundblade

2024-03-23 22:37:37 -0700

[diff] [blame]

678

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

679

return nNonZeroBitsCount;

}

/* Public function; see ieee754.h */

684

struct IEEE754_ToInt

685

IEEE754_DoubleToInt(const double d)

686

{

Laurence Lundblade

2024-03-23 22:37:37 -0700

[diff] [blame]

687

int64_t nPrecisionBits;

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

688

struct IEEE754_ToInt Result;

689

uint64_t uInteger;

690

691

/* Pull the three parts out of the double-precision float. Most

692

* work is done with uint64_t which helps avoid integer promotions

693

* and static analyzer complaints.

694

*/

695

const uint64_t uDouble = CopyDoubleToUint64(d);

696

const uint64_t uDoubleBiasedExponent = (uDouble & DOUBLE_EXPONENT_MASK) >> DOUBLE_EXPONENT_SHIFT;

697

/* Cast safe because of mask above; exponents < DOUBLE_EXPONENT_MAX */

Laurence Lundblade

2024-11-09 23:01:11 -0800

[diff] [blame]

698

const int nDoubleUnbiasedExponent = (int)uDoubleBiasedExponent - DOUBLE_EXPONENT_BIAS;

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

699

const uint64_t uDoubleSignificand = uDouble & DOUBLE_SIGNIFICAND_MASK;

Laurence Lundblade

2024-07-24 22:13:35 -0700

[diff] [blame]

700

const uint64_t bIsNegative = uDouble & DOUBLE_SIGN_MASK;

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

701

702

if(nDoubleUnbiasedExponent == DOUBLE_EXPONENT_ZERO) {

703

if(uDoubleSignificand == 0) {

704

/* --- POSITIVE AND NEGATIVE ZERO --- */

705

Result.integer.un_signed = 0;

706

Result.type = IEEE754_ToInt_IS_UINT;

707

} else {

708

/* --- SUBNORMAL --- */

709

Result.type = IEEE754_ToInt_NO_CONVERSION;

710

}

711

} else if(nDoubleUnbiasedExponent == DOUBLE_EXPONENT_INF_OR_NAN) {

712

if(uDoubleSignificand != 0) {

713

/* --- NAN --- */

714

Result.type = IEEE754_ToInt_NaN; /* dCBOR doesn't care about payload */

715

} else {

Laurence Lundblade

2024-07-24 22:13:35 -0700

[diff] [blame]

716

/* --- INFINITY --- */

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

717

Result.type = IEEE754_ToInt_NO_CONVERSION;

718

}

Laurence Lundblade

2024-07-24 22:13:35 -0700

[diff] [blame]

719

} else if(nDoubleUnbiasedExponent < 0) {

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

720

/* --- Exponent out of range --- */

721

Result.type = IEEE754_ToInt_NO_CONVERSION;

Laurence Lundblade

2024-07-24 22:13:35 -0700

[diff] [blame]

722

} else if(nDoubleUnbiasedExponent >= 64) {

723

if(nDoubleUnbiasedExponent == 64 && uDoubleSignificand == 0 && bIsNegative) {

724

/* Very special case for -18446744073709551616.0 */

725

Result.integer.un_signed = 0; /* No negative 0, use it to indicate 2^64 */

726

Result.type = IEEE754_ToInt_IS_65BIT_NEG;

727

} else {

728

/* --- Exponent out of range --- */

729

Result.type = IEEE754_ToInt_NO_CONVERSION;

730

}

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

731

} else {

Laurence Lundblade

2024-03-23 22:37:37 -0700

[diff] [blame]

732

/* Conversion only fails when the input is too large or is not a

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

733

* whole number, never because of lack of precision because

734

* 64-bit integers always have more precision than the 52-bits

735

* of a double.

736

*/

Laurence Lundblade

2024-03-23 22:37:37 -0700

[diff] [blame]

737

nPrecisionBits = IEEE754_Private_CountPrecisionBits(uDoubleSignificand) -

738

(64-DOUBLE_NUM_SIGNIFICAND_BITS);

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

739

Laurence Lundblade

2024-03-23 22:37:37 -0700

[diff] [blame]

740

if(nPrecisionBits && nPrecisionBits > nDoubleUnbiasedExponent) {

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

741

/* --- Not a whole number --- */

742

Result.type = IEEE754_ToInt_NO_CONVERSION;

743

} else {

744

/* --- CONVERTABLE WHOLE NUMBER --- */

745

/* Add in the one that is implied in normal floats */

746

uInteger = uDoubleSignificand + (1ULL << DOUBLE_NUM_SIGNIFICAND_BITS);

747

/* Factor in the exponent */

748

if(nDoubleUnbiasedExponent < DOUBLE_NUM_SIGNIFICAND_BITS) {

749

/* Numbers less than 2^52 with up to 52 significant bits */

750

uInteger >>= DOUBLE_NUM_SIGNIFICAND_BITS - nDoubleUnbiasedExponent;

751

} else {

752

/* Numbers greater than 2^52 with at most 52 significant bits */

753

uInteger <<= nDoubleUnbiasedExponent - DOUBLE_NUM_SIGNIFICAND_BITS;

754

}

Laurence Lundblade

2024-07-24 22:13:35 -0700

[diff] [blame]

755

if(bIsNegative) {

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

756

/* Cast safe because exponent range check above */

Laurence Lundblade

2024-07-24 22:13:35 -0700

[diff] [blame]

757

if(nDoubleUnbiasedExponent == 63) {

758

Result.integer.un_signed = uInteger;

759

Result.type = IEEE754_ToInt_IS_65BIT_NEG;

760

} else {

761

Result.integer.is_signed = -((int64_t)uInteger);

762

Result.type = IEEE754_ToInt_IS_INT;

763

}

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

764

} else {

765

Result.integer.un_signed = uInteger;

766

Result.type = IEEE754_ToInt_IS_UINT;

}

}

}

return Result;

}

/* Public function; see ieee754.h */

776

struct IEEE754_ToInt

777

IEEE754_SingleToInt(const float f)

778

{

Laurence Lundblade

2024-11-09 23:01:11 -0800

[diff] [blame]

779

int nPrecisionBits;

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

780

struct IEEE754_ToInt Result;

781

uint64_t uInteger;

782

783

/* Pull the three parts out of the single-precision float. Most

784

* work is done with uint32_t which helps avoid integer promotions

785

* and static analyzer complaints.

786

*/

787

const uint32_t uSingle = CopyFloatToUint32(f);

788

const uint32_t uSingleBiasedExponent = (uSingle & SINGLE_EXPONENT_MASK) >> SINGLE_EXPONENT_SHIFT;

789

/* Cast safe because of mask above; exponents < SINGLE_EXPONENT_MAX */

Laurence Lundblade

2024-11-09 23:01:11 -0800

[diff] [blame]

790

const int nSingleUnbiasedExponent = (int)uSingleBiasedExponent - SINGLE_EXPONENT_BIAS;

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

791

const uint32_t uSingleleSignificand = uSingle & SINGLE_SIGNIFICAND_MASK;

Laurence Lundblade

2024-07-24 22:13:35 -0700

[diff] [blame]

792

const uint64_t bIsNegative = uSingle & SINGLE_SIGN_MASK;

793

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

794

795

if(nSingleUnbiasedExponent == SINGLE_EXPONENT_ZERO) {

796

if(uSingleleSignificand == 0 && !(uSingle & SINGLE_SIGN_MASK)) {

797

/* --- POSITIVE AND NEGATIVE ZERO --- */

798

Result.integer.un_signed = 0;

799

Result.type = IEEE754_ToInt_IS_UINT;

800

} else {

801

/* --- Subnormal --- */

802

Result.type = IEEE754_ToInt_NO_CONVERSION;

803

}

804

} else if(nSingleUnbiasedExponent == SINGLE_EXPONENT_INF_OR_NAN) {

805

/* --- NAN or INFINITY --- */

806

if(uSingleleSignificand != 0) {

807

Result.type = IEEE754_ToInt_NaN; /* dCBOR doesn't care about payload */

808

} else {

809

Result.type = IEEE754_ToInt_NO_CONVERSION;

810

}

Laurence Lundblade

2024-07-24 22:13:35 -0700

[diff] [blame]

811

} else if(nSingleUnbiasedExponent < 0) {

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

812

/* --- Exponent out of range --- */

813

Result.type = IEEE754_ToInt_NO_CONVERSION;

Laurence Lundblade

2024-07-24 22:13:35 -0700

[diff] [blame]

814

} else if(nSingleUnbiasedExponent >= 64) {

815

if(nSingleUnbiasedExponent == 64 && uSingleleSignificand == 0 && bIsNegative) {

816

/* Very special case for -18446744073709551616.0 */

817

Result.integer.un_signed = 0; /* No negative 0, use it to indicate 2^64 */

818

Result.type = IEEE754_ToInt_IS_65BIT_NEG;

819

} else {

820

/* --- Exponent out of range --- */

821

Result.type = IEEE754_ToInt_NO_CONVERSION;

822

}

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

823

} else {

Laurence Lundblade

2024-03-23 22:37:37 -0700

[diff] [blame]

824

/* Conversion only fails when the input is too large or is not a

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

825

* whole number, never because of lack of precision because

Laurence Lundblade

2024-03-23 22:37:37 -0700

[diff] [blame]

826

* 64-bit integers always have more precision than the 23 bits

827

* of a single.

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

828

*/

Laurence Lundblade

2024-03-23 22:37:37 -0700

[diff] [blame]

829

nPrecisionBits = IEEE754_Private_CountPrecisionBits(uSingleleSignificand) -

830

(64 - SINGLE_NUM_SIGNIFICAND_BITS);

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

831

Laurence Lundblade

2024-03-23 22:37:37 -0700

[diff] [blame]

832

if(nPrecisionBits && nPrecisionBits > nSingleUnbiasedExponent) {

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

833

/* --- Not a whole number --- */

834

Result.type = IEEE754_ToInt_NO_CONVERSION;

835

} else {

836

/* --- CONVERTABLE WHOLE NUMBER --- */

837

/* Add in the one that is implied in normal floats */

838

uInteger = uSingleleSignificand + (1ULL << SINGLE_NUM_SIGNIFICAND_BITS);

839

/* Factor in the exponent */

840

if(nSingleUnbiasedExponent < SINGLE_NUM_SIGNIFICAND_BITS) {

841

/* Numbers less than 2^23 with up to 23 significant bits */

842

uInteger >>= SINGLE_NUM_SIGNIFICAND_BITS - nSingleUnbiasedExponent;

843

} else {

844

/* Numbers greater than 2^23 with at most 23 significant bits*/

845

uInteger <<= nSingleUnbiasedExponent - SINGLE_NUM_SIGNIFICAND_BITS;

846

}

Laurence Lundblade

2024-07-24 22:13:35 -0700

[diff] [blame]

847

if(bIsNegative) {

848

/* Cast safe because exponent range check above */

849

if(nSingleUnbiasedExponent == 63) {

850

Result.integer.un_signed = uInteger;

851

Result.type = IEEE754_ToInt_IS_65BIT_NEG;

852

} else {

853

Result.integer.is_signed = -((int64_t)uInteger);

854

Result.type = IEEE754_ToInt_IS_INT;

855

}

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

856

} else {

857

Result.integer.un_signed = uInteger;

858

Result.type = IEEE754_ToInt_IS_UINT;

}

}

}

return Result;

}

Laurence Lundblade

fe09bbf

2020-07-16 12:14:51 -0700

[diff] [blame]

865

Laurence Lundblade

2024-03-23 22:37:37 -0700

[diff] [blame]

866

Laurence Lundblade

2024-03-23 22:37:37 -0700

[diff] [blame]

867

/* Public function; see ieee754.h */

868

double

Laurence Lundblade

2024-11-09 23:01:11 -0800

[diff] [blame]

869

IEEE754_UintToDouble(const uint64_t uInt, const int nIsNegative)

Laurence Lundblade

2024-03-23 22:37:37 -0700

[diff] [blame]

870

{

871

int nDoubleUnbiasedExponent;

872

uint64_t uDoubleSignificand;

873

int nPrecisionBits;

874

Laurence Lundblade

2024-07-24 22:13:35 -0700

[diff] [blame]

875

if(uInt == 0) {

876

uDoubleSignificand = 0;

877

nDoubleUnbiasedExponent = DOUBLE_EXPONENT_ZERO;

878

879

} else {

880

/* Figure out the exponent and normalize the significand. This is

881

* done by shifting out all leading zero bits and counting them. If

882

* none are shifted out, the exponent is 63. */

883

uDoubleSignificand = uInt;

884

nDoubleUnbiasedExponent = 63;

885

while(1) {

886

if(uDoubleSignificand & 0x8000000000000000UL) {

887

break;

888

}

889

uDoubleSignificand <<= 1;

890

nDoubleUnbiasedExponent--;

891

};

892

893

/* Position significand correctly for a double. Only shift 63 bits

894

* because of the 1 that is present by implication in IEEE 754.*/

895

uDoubleSignificand >>= 63 - DOUBLE_NUM_SIGNIFICAND_BITS;

896

897

/* Subtract 1 which is present by implication in IEEE 754 */

898

uDoubleSignificand -= 1ULL << (DOUBLE_NUM_SIGNIFICAND_BITS);

899

900

nPrecisionBits = IEEE754_Private_CountPrecisionBits(uInt) - (64 - nDoubleUnbiasedExponent);

901

902

if(nPrecisionBits > DOUBLE_NUM_SIGNIFICAND_BITS) {

903

/* Will lose precision if converted */

904

return IEEE754_UINT_TO_DOUBLE_OOB;

Laurence Lundblade

2024-03-23 22:37:37 -0700

[diff] [blame]

905

}

Laurence Lundblade

2024-03-23 22:37:37 -0700

[diff] [blame]

906

}

907

Laurence Lundblade

2024-11-09 23:01:11 -0800

[diff] [blame]

908

return IEEE754_AssembleDouble(nIsNegative,

Laurence Lundblade

2024-03-23 22:37:37 -0700

[diff] [blame]

909

uDoubleSignificand,

910

nDoubleUnbiasedExponent);

911

}

912

Laurence Lundblade

2024-11-09 23:01:11 -0800

[diff] [blame]

913

#endif /* ! QCBOR_DISABLE_PREFERRED_FLOAT */

914

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

/* Public function; see ieee754.h */

919

int

Laurence Lundblade

2024-07-12 11:00:20 -0700

[diff] [blame]

920

IEEE754_DoubleHasNaNPayload(const double d)

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

921

{

922

const uint64_t uDouble = CopyDoubleToUint64(d);

923

const uint64_t uDoubleBiasedExponent = (uDouble & DOUBLE_EXPONENT_MASK) >> DOUBLE_EXPONENT_SHIFT;

924

/* Cast safe because of mask above; exponents < DOUBLE_EXPONENT_MAX */

925

const int64_t nDoubleUnbiasedExponent = (int64_t)uDoubleBiasedExponent - DOUBLE_EXPONENT_BIAS;

926

const uint64_t uDoubleSignificand = uDouble & DOUBLE_SIGNIFICAND_MASK;

927

928

if(nDoubleUnbiasedExponent == DOUBLE_EXPONENT_INF_OR_NAN &&

929

uDoubleSignificand != 0 &&

930

uDoubleSignificand != DOUBLE_QUIET_NAN_BIT) {

return 1;

} else {

return 0;

}

}

/* Public function; see ieee754.h */

939

int

Laurence Lundblade

2024-07-12 11:00:20 -0700

[diff] [blame]

940

IEEE754_SingleHasNaNPayload(const float f)

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

941

{

942

const uint32_t uSingle = CopyFloatToUint32(f);

943

const uint32_t uSingleBiasedExponent = (uSingle & SINGLE_EXPONENT_MASK) >> SINGLE_EXPONENT_SHIFT;

944

/* Cast safe because of mask above; exponents < SINGLE_EXPONENT_MAX */

945

const int32_t nSingleUnbiasedExponent = (int32_t)uSingleBiasedExponent - SINGLE_EXPONENT_BIAS;

Laurence Lundblade

2024-07-12 11:00:20 -0700

[diff] [blame]

946

const uint32_t uSingleSignificand = uSingle & SINGLE_SIGNIFICAND_MASK;

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

947

948

if(nSingleUnbiasedExponent == SINGLE_EXPONENT_INF_OR_NAN &&

Laurence Lundblade

2024-07-12 11:00:20 -0700

[diff] [blame]

949

uSingleSignificand != 0 &&

950

uSingleSignificand != SINGLE_QUIET_NAN_BIT) {

Laurence Lundblade