Blame - src/ieee754.c - mirror/QCBOR - TrustedFirmware Git Browser

2024-01-07 19:17:52 -0700

[diff] [blame]

1

/* ==========================================================================

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

2

* ieee754.c -- floating-point conversion for half, double & single-precision

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

*

*

* SPDX-License-Identifier: BSD-3-Clause

8

*

9

* See BSD-3-Clause license in README.md

10

*

11

* Created on 7/23/18

12

* ========================================================================== */

Laurence Lundblade

cc2ed34

2018-09-22 17:29:55 -0700

[diff] [blame]

13

Máté Tóth-Pál

ef5f07a

2021-09-17 19:31:37 +0200

[diff] [blame]

14

#include "qcbor/qcbor_common.h"

15

Laurence Lundblade

2018-09-19 11:25:27 -0700

[diff] [blame]

16

#include "ieee754.h"

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

17

#include <string.h> /* For memcpy() */

Laurence Lundblade

2018-09-19 11:25:27 -0700

[diff] [blame]

18

Laurence Lundblade

8db3d3e

2018-09-29 11:46:37 -0700

[diff] [blame]

19

Laurence Lundblade

2018-09-19 11:25:27 -0700

[diff] [blame]

20

/*

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

21

* This has long lines and is easier to read because of

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

22

* them. Some coding guidelines prefer 80 column lines (can they not

23

* afford big displays?).

24

*

25

* This code works solely using shifts and masks and thus has no

26

* dependency on any math libraries. It can even work if the CPU

27

* doesn't have any floating-point support, though that isn't the most

28

* useful thing to do.

29

*

30

* The memcpy() dependency is only for CopyFloatToUint32() and friends

31

* which only is needed to avoid type punning when converting the

32

* actual float bits to an unsigned value so the bit shifts and masks

33

* can work.

34

*

35

* The references used to write this code:

36

*

37

* IEEE 754-2008, particularly section 3.6 and 6.2.1

38

*

39

* https://en.wikipedia.org/wiki/IEEE_754 and subordinate pages

40

*

41

* https://stackoverflow.com/questions/19800415/why-does-ieee-754-reserve-so-many-nan-values

42

*

43

* https://stackoverflow.com/questions/46073295/implicit-type-promotion-rules

44

*

45

* https://stackoverflow.com/questions/589575/what-does-the-c-standard-state-the-size-of-int-long-type-to-be

46

*

47

* IEEE754_FloatToDouble(uint32_t uFloat) was created but is not

48

* needed. It can be retrieved from github history if needed.

Laurence Lundblade

2018-09-19 11:25:27 -0700

[diff] [blame]

*/

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

52

53

54

/* ----- Half Precsion ----------- */

Laurence Lundblade

2018-09-19 11:25:27 -0700

[diff] [blame]

55

#define HALF_NUM_SIGNIFICAND_BITS (10)

56

#define HALF_NUM_EXPONENT_BITS (5)

57

#define HALF_NUM_SIGN_BITS (1)

58

59

#define HALF_SIGNIFICAND_SHIFT (0)

60

#define HALF_EXPONENT_SHIFT (HALF_NUM_SIGNIFICAND_BITS)

61

#define HALF_SIGN_SHIFT (HALF_NUM_SIGNIFICAND_BITS + HALF_NUM_EXPONENT_BITS)

62

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

63

#define HALF_SIGNIFICAND_MASK (0x3ffU) // The lower 10 bits

Laurence Lundblade

06350ea

2020-01-27 19:32:40 -0800

[diff] [blame]

64

#define HALF_EXPONENT_MASK (0x1fU << HALF_EXPONENT_SHIFT) // 0x7c00 5 bits of exponent

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

65

#define HALF_SIGN_MASK (0x01U << HALF_SIGN_SHIFT) // 0x8000 1 bit of sign

Laurence Lundblade

06350ea

2020-01-27 19:32:40 -0800

[diff] [blame]

66

#define HALF_QUIET_NAN_BIT (0x01U << (HALF_NUM_SIGNIFICAND_BITS-1)) // 0x0200

Laurence Lundblade

2018-09-19 11:25:27 -0700

[diff] [blame]

67

68

/* Biased Biased Unbiased Use

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

69

* 0x00 0 -15 0 and subnormal

70

* 0x01 1 -14 Smallest normal exponent

71

* 0x1e 30 15 Largest normal exponent

72

* 0x1F 31 16 NaN and Infinity */

Laurence Lundblade

2018-09-19 11:25:27 -0700

[diff] [blame]

73

#define HALF_EXPONENT_BIAS (15)

74

#define HALF_EXPONENT_MAX (HALF_EXPONENT_BIAS) // 15 Unbiased

75

#define HALF_EXPONENT_MIN (-HALF_EXPONENT_BIAS+1) // -14 Unbiased

76

#define HALF_EXPONENT_ZERO (-HALF_EXPONENT_BIAS) // -15 Unbiased

77

#define HALF_EXPONENT_INF_OR_NAN (HALF_EXPONENT_BIAS+1) // 16 Unbiased

78

79

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

80

/* ------ Single-Precision -------- */

Laurence Lundblade

2018-09-19 11:25:27 -0700

[diff] [blame]

81

#define SINGLE_NUM_SIGNIFICAND_BITS (23)

82

#define SINGLE_NUM_EXPONENT_BITS (8)

83

#define SINGLE_NUM_SIGN_BITS (1)

84

85

#define SINGLE_SIGNIFICAND_SHIFT (0)

86

#define SINGLE_EXPONENT_SHIFT (SINGLE_NUM_SIGNIFICAND_BITS)

87

#define SINGLE_SIGN_SHIFT (SINGLE_NUM_SIGNIFICAND_BITS + SINGLE_NUM_EXPONENT_BITS)

88

Laurence Lundblade

06350ea

2020-01-27 19:32:40 -0800

[diff] [blame]

89

#define SINGLE_SIGNIFICAND_MASK (0x7fffffU) // The lower 23 bits

90

#define SINGLE_EXPONENT_MASK (0xffU << SINGLE_EXPONENT_SHIFT) // 8 bits of exponent

91

#define SINGLE_SIGN_MASK (0x01U << SINGLE_SIGN_SHIFT) // 1 bit of sign

92

#define SINGLE_QUIET_NAN_BIT (0x01U << (SINGLE_NUM_SIGNIFICAND_BITS-1))

Laurence Lundblade

2018-09-19 11:25:27 -0700

[diff] [blame]

93

94

/* Biased Biased Unbiased Use

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

95

* 0x0000 0 -127 0 and subnormal

96

* 0x0001 1 -126 Smallest normal exponent

97

* 0x7f 127 0 1

98

* 0xfe 254 127 Largest normal exponent

99

* 0xff 255 128 NaN and Infinity */

Laurence Lundblade

2018-09-19 11:25:27 -0700

[diff] [blame]

100

#define SINGLE_EXPONENT_BIAS (127)

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

101

#define SINGLE_EXPONENT_MAX (SINGLE_EXPONENT_BIAS)

102

#define SINGLE_EXPONENT_MIN (-SINGLE_EXPONENT_BIAS+1)

103

#define SINGLE_EXPONENT_ZERO (-SINGLE_EXPONENT_BIAS)

104

#define SINGLE_EXPONENT_INF_OR_NAN (SINGLE_EXPONENT_BIAS+1)

Laurence Lundblade

2018-09-19 11:25:27 -0700

[diff] [blame]

105

106

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

107

/* --------- Double-Precision ---------- */

Laurence Lundblade

2018-09-19 11:25:27 -0700

[diff] [blame]

108

#define DOUBLE_NUM_SIGNIFICAND_BITS (52)

109

#define DOUBLE_NUM_EXPONENT_BITS (11)

110

#define DOUBLE_NUM_SIGN_BITS (1)

111

112

#define DOUBLE_SIGNIFICAND_SHIFT (0)

113

#define DOUBLE_EXPONENT_SHIFT (DOUBLE_NUM_SIGNIFICAND_BITS)

114

#define DOUBLE_SIGN_SHIFT (DOUBLE_NUM_SIGNIFICAND_BITS + DOUBLE_NUM_EXPONENT_BITS)

115

Laurence Lundblade

8db3d3e

2018-09-29 11:46:37 -0700

[diff] [blame]

116

#define DOUBLE_SIGNIFICAND_MASK (0xfffffffffffffULL) // The lower 52 bits

117

#define DOUBLE_EXPONENT_MASK (0x7ffULL << DOUBLE_EXPONENT_SHIFT) // 11 bits of exponent

118

#define DOUBLE_SIGN_MASK (0x01ULL << DOUBLE_SIGN_SHIFT) // 1 bit of sign

119

#define DOUBLE_QUIET_NAN_BIT (0x01ULL << (DOUBLE_NUM_SIGNIFICAND_BITS-1))

120

Laurence Lundblade

2018-09-19 11:25:27 -0700

[diff] [blame]

121

122

/* Biased Biased Unbiased Use

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

123

* 0x00000000 0 -1023 0 and subnormal

124

* 0x00000001 1 -1022 Smallest normal exponent

125

* 0x000007fe 2046 1023 Largest normal exponent

126

* 0x000007ff 2047 1024 NaN and Infinity */

Laurence Lundblade

2018-09-19 11:25:27 -0700

[diff] [blame]

127

#define DOUBLE_EXPONENT_BIAS (1023)

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

128

#define DOUBLE_EXPONENT_MAX (DOUBLE_EXPONENT_BIAS)

129

#define DOUBLE_EXPONENT_MIN (-DOUBLE_EXPONENT_BIAS+1)

130

#define DOUBLE_EXPONENT_ZERO (-DOUBLE_EXPONENT_BIAS)

131

#define DOUBLE_EXPONENT_INF_OR_NAN (DOUBLE_EXPONENT_BIAS+1)

132

Laurence Lundblade

2018-09-19 11:25:27 -0700

[diff] [blame]

/*

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

137

* Convenient functions to avoid type punning, compiler warnings and

138

* such. The optimizer reduces them to a simple assignment. This is a

139

* crusty corner of C. It shouldn't be this hard.

140

*

141

* These are also in UsefulBuf.h under a different name. They are copied

142

* here to avoid a dependency on UsefulBuf.h. There is no object code

143

* size impact because these always optimze down to a simple assignment.

Laurence Lundblade

2018-09-19 11:25:27 -0700

[diff] [blame]

144

*/

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

145

static inline uint32_t

146

CopyFloatToUint32(float f)

Laurence Lundblade

2018-09-19 11:25:27 -0700

[diff] [blame]

147

{

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

148

uint32_t u32;

149

memcpy(&u32, &f, sizeof(uint32_t));

150

return u32;

Laurence Lundblade

2018-09-19 11:25:27 -0700

[diff] [blame]

151

}

152

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

153

static inline uint64_t

154

CopyDoubleToUint64(double d)

Laurence Lundblade

2018-09-19 11:25:27 -0700

[diff] [blame]

155

{

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

156

uint64_t u64;

157

memcpy(&u64, &d, sizeof(uint64_t));

158

return u64;

Laurence Lundblade

2018-09-19 11:25:27 -0700

[diff] [blame]

159

}

160

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

161

162

#ifndef QCBOR_DISABLE_PREFERRED_FLOAT

163

164

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

165

static inline double

166

CopyUint64ToDouble(uint64_t u64)

Laurence Lundblade

2018-11-02 21:44:06 +0700

[diff] [blame]

167

{

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

168

double d;

169

memcpy(&d, &u64, sizeof(uint64_t));

return d;

}

static inline float

CopyUint32ToSingle(uint32_t u32)

175

{

176

float f;

177

memcpy(&f, &u32, sizeof(uint32_t));

178

return f;

Laurence Lundblade

2018-11-02 21:44:06 +0700

[diff] [blame]

179

}

Laurence Lundblade

2018-09-19 11:25:27 -0700

[diff] [blame]

180

181

Laurence Lundblade

2018-12-17 16:17:45 -0800

[diff] [blame]

182

183

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

184

/**

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

185

* @brief Assemble sign, significand and exponent into double precision float.

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

186

*

187

* @param[in] uDoubleSign 0 if positive, 1 if negative

188

* @pararm[in] uDoubleSignificand Bits of the significand

189

* @param[in] nDoubleUnBiasedExponent Exponent

190

*

191

* This returns the bits for a single-precision float, a binary64

192

* as specified in IEEE754.

Laurence Lundblade

fe09bbf

2020-07-16 12:14:51 -0700

[diff] [blame]

193

*/

Laurence Lundblade

2024-03-23 22:37:37 -0700

[diff] [blame]

194

// TODO: make the sign and exponent type int?

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

195

static double

196

IEEE754_AssembleDouble(uint64_t uDoubleSign,

197

uint64_t uDoubleSignificand,

198

int64_t nDoubleUnBiasedExponent)

Laurence Lundblade

2018-11-02 21:44:06 +0700

[diff] [blame]

199

{

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

200

uint64_t uDoubleBiasedExponent;

201

202

uDoubleBiasedExponent = (uint64_t)(nDoubleUnBiasedExponent + DOUBLE_EXPONENT_BIAS);

203

204

return CopyUint64ToDouble(uDoubleSignificand |

205

(uDoubleBiasedExponent << DOUBLE_EXPONENT_SHIFT) |

206

(uDoubleSign << DOUBLE_SIGN_SHIFT));

207

}

Laurence Lundblade

2018-12-17 16:17:45 -0800

[diff] [blame]

208

209

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

210

/* Public function; see ieee754.h */

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

211

double

212

IEEE754_HalfToDouble(uint16_t uHalfPrecision)

213

{

214

uint64_t uDoubleSignificand;

215

int64_t nDoubleUnBiasedExponent;

216

double dResult;

217

218

/* Pull out the three parts of the half-precision float. Do all

219

* the work in 64 bits because that is what the end result is. It

220

* may give smaller code size and will keep static analyzers

221

* happier.

222

*/

223

const uint64_t uHalfSignificand = uHalfPrecision & HALF_SIGNIFICAND_MASK;

224

const uint64_t uHalfBiasedExponent = (uHalfPrecision & HALF_EXPONENT_MASK) >> HALF_EXPONENT_SHIFT;

225

const int64_t nHalfUnBiasedExponent = (int64_t)uHalfBiasedExponent - HALF_EXPONENT_BIAS;

226

const uint64_t uHalfSign = (uHalfPrecision & HALF_SIGN_MASK) >> HALF_SIGN_SHIFT;

227

228

if(nHalfUnBiasedExponent == HALF_EXPONENT_ZERO) {

229

/* 0 or subnormal */

230

if(uHalfSignificand) {

231

/* --- SUBNORMAL --- */

232

/* A half-precision subnormal can always be converted to a

233

* normal double-precision float because the ranges line up.

234

* The exponent of a subnormal starts out at the min exponent

235

* for a normal. As the sub normal significand bits are

236

* shifted, left to normalize, the exponent is

237

* decremented. Shifting continues until fully normalized.

238

*/

239

nDoubleUnBiasedExponent = HALF_EXPONENT_MIN;

240

uDoubleSignificand = uHalfSignificand;

241

do {

242

uDoubleSignificand <<= 1;

243

nDoubleUnBiasedExponent--;

244

} while ((uDoubleSignificand & (1ULL << HALF_NUM_SIGNIFICAND_BITS)) == 0);

245

/* A normal has an implied 1 in the most significant

246

* position that a subnormal doesn't. */

247

uDoubleSignificand -= 1ULL << HALF_NUM_SIGNIFICAND_BITS;

248

/* Must shift into place for a double significand */

249

uDoubleSignificand <<= DOUBLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS;

250

251

dResult = IEEE754_AssembleDouble(uHalfSign,

252

uDoubleSignificand,

253

nDoubleUnBiasedExponent);

254

} else {

255

/* --- ZERO --- */

256

dResult = IEEE754_AssembleDouble(uHalfSign,

257

0,

258

DOUBLE_EXPONENT_ZERO);

259

}

260

} else if(nHalfUnBiasedExponent == HALF_EXPONENT_INF_OR_NAN) {

261

/* NaN or Inifinity */

262

if(uHalfSignificand) {

263

/* --- NaN --- */

264

/* Half-precision payloads always fit into double precision

265

* payloads. They are shifted left the same as a normal

266

* number significand.

267

*/

268

uDoubleSignificand = uHalfSignificand << (DOUBLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);

269

dResult = IEEE754_AssembleDouble(uHalfSign,

270

uDoubleSignificand,

271

DOUBLE_EXPONENT_INF_OR_NAN);

272

} else {

273

/* --- INFINITY --- */

274

dResult = IEEE754_AssembleDouble(uHalfSign,

275

0,

276

DOUBLE_EXPONENT_INF_OR_NAN);

277

}

278

} else {

279

/* --- NORMAL NUMBER --- */

280

uDoubleSignificand = uHalfSignificand << (DOUBLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);

281

dResult = IEEE754_AssembleDouble(uHalfSign,

282

uDoubleSignificand,

283

nHalfUnBiasedExponent);

}

return dResult;

}

/**

* @brief Assemble sign, significand and exponent into single precision float.

292

*

293

* @param[in] uHalfSign 0 if positive, 1 if negative

294

* @pararm[in] uHalfSignificand Bits of the significand

295

* @param[in] nHalfUnBiasedExponent Exponent

296

*

297

* This returns the bits for a single-precision float, a binary32 as

298

* specified in IEEE754. It is returned as a uint64_t rather than a

299

* uint32_t or a float for convenience of usage.

300

*/

301

static uint32_t

302

IEEE754_AssembleHalf(uint32_t uHalfSign,

303

uint32_t uHalfSignificand,

304

int32_t nHalfUnBiasedExponent)

305

{

306

uint32_t uHalfUnbiasedExponent;

307

308

uHalfUnbiasedExponent = (uint32_t)(nHalfUnBiasedExponent + HALF_EXPONENT_BIAS);

309

310

return uHalfSignificand |

311

(uHalfUnbiasedExponent << HALF_EXPONENT_SHIFT) |

312

(uHalfSign << HALF_SIGN_SHIFT);

}

/* Public function; see ieee754.h */

317

IEEE754_union

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

318

IEEE754_SingleToHalf(const float f, const int bNoNaNPayload)

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

319

{

320

IEEE754_union result;

321

uint32_t uDroppedBits;

322

int32_t nExponentDifference;

323

int32_t nShiftAmount;

324

uint32_t uHalfSignificand;

325

326

/* Pull the three parts out of the double-precision float Most work

327

* is done with uint32_t which helps avoid integer promotions and

328

* static analyzer complaints.

329

*/

330

const uint32_t uSingle = CopyFloatToUint32(f);

331

const uint32_t uSingleBiasedExponent = (uSingle & SINGLE_EXPONENT_MASK) >> SINGLE_EXPONENT_SHIFT;

332

const int32_t nSingleUnbiasedExponent = (int32_t)uSingleBiasedExponent - SINGLE_EXPONENT_BIAS;

333

const uint32_t uSingleSignificand = uSingle & SINGLE_SIGNIFICAND_MASK;

334

const uint32_t uSingleSign = (uSingle & SINGLE_SIGN_MASK) >> SINGLE_SIGN_SHIFT;

335

336

if(nSingleUnbiasedExponent == SINGLE_EXPONENT_ZERO) {

337

if(uSingleSignificand == 0) {

338

/* --- IS ZERO --- */

339

result.uSize = IEEE754_UNION_IS_HALF;

340

result.uValue = IEEE754_AssembleHalf(uSingleSign,

0,

HALF_EXPONENT_ZERO);

} else {

/* --- IS SINGLE SUBNORMAL --- */

345

/* The largest single subnormal is slightly less than the

346

* largest single normal which is 2^-149 or

347

* 2.2040517676619426e-38. The smallest half subnormal is

348

* 2^-14 or 5.9604644775390625E-8. There is no overlap so

349

* single subnormals can't be converted to halfs of any sort.

350

*/

351

result.uSize = IEEE754_UNION_IS_SINGLE;

352

result.uValue = uSingle;

353

}

354

} else if(nSingleUnbiasedExponent == SINGLE_EXPONENT_INF_OR_NAN) {

355

if(uSingleSignificand == 0) {

356

/* ---- IS INFINITY ---- */

357

result.uSize = IEEE754_UNION_IS_HALF;

358

result.uValue = IEEE754_AssembleHalf(uSingleSign, 0, HALF_EXPONENT_INF_OR_NAN);

359

} else {

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

360

if(bNoNaNPayload) {

361

/* --- REQUIRE CANNONICAL NAN --- */

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

362

result.uSize = IEEE754_UNION_IS_HALF;

363

result.uValue = IEEE754_AssembleHalf(uSingleSign,

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

364

HALF_QUIET_NAN_BIT,

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

365

HALF_EXPONENT_INF_OR_NAN);

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

366

} else {

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

367

/* The NaN can only be converted if no payload bits are lost

368

* per RFC 8949 section 4.1 that defines Preferred

369

* Serializaton. Note that Deterministically Encode CBOR in

370

* section 4.2 allows for some variation of this rule, but at

371

* the moment this implementation is of Preferred

372

* Serialization, not CDE. As of December 2023, we are also

373

* expecting an update to CDE. This code may need to be

374

* updated for CDE.

375

*/

376

uDroppedBits = uSingleSignificand & (SINGLE_SIGNIFICAND_MASK >> HALF_NUM_SIGNIFICAND_BITS);

377

if(uDroppedBits == 0) {

378

/* --- IS CONVERTABLE NAN --- */

379

uHalfSignificand = uSingleSignificand >> (SINGLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);

380

result.uSize = IEEE754_UNION_IS_HALF;

381

result.uValue = IEEE754_AssembleHalf(uSingleSign,

382

uHalfSignificand,

383

HALF_EXPONENT_INF_OR_NAN);

384

385

} else {

386

/* --- IS UNCONVERTABLE NAN --- */

387

result.uSize = IEEE754_UNION_IS_SINGLE;

388

result.uValue = uSingle;

389

}

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

}

}

} else {

/* ---- REGULAR NUMBER ---- */

394

/* A regular single can be converted to a regular half if the

395

* single's exponent is in the smaller range of a half and if no

396

* precision is lost in the significand.

397

*/

398

if(nSingleUnbiasedExponent >= HALF_EXPONENT_MIN &&

399

nSingleUnbiasedExponent <= HALF_EXPONENT_MAX &&

400

(uSingleSignificand & (SINGLE_SIGNIFICAND_MASK >> HALF_NUM_SIGNIFICAND_BITS)) == 0) {

401

uHalfSignificand = uSingleSignificand >> (SINGLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);

402

403

/* --- CONVERT TO HALF NORMAL --- */

404

result.uSize = IEEE754_UNION_IS_HALF;

405

result.uValue = IEEE754_AssembleHalf(uSingleSign,

406

uHalfSignificand,

407

nSingleUnbiasedExponent);

408

} else {

409

/* Unable to convert to a half normal. See if it can be

410

* converted to a half subnormal. To do that, the exponent

411

* must be in range and no precision can be lost in the

412

* signficand.

413

*

414

* This is more complicated because the number is not

415

* normalized. The signficand must be shifted proprotionally

416

* to the exponent and 1 must be added in. See

417

* https://en.wikipedia.org/wiki/Single-precision_floating-point_format#Exponent_encoding

418

*

419

* Exponents -14 to -24 map to a shift of 0 to 10 of the

420

* significand. The largest value of a half subnormal has an

421

* exponent of -14. Subnormals are not normalized like

422

* normals meaning they lose precision as the numbers get

423

* smaller. Normals don't lose precision because the exponent

424

* allows all the bits of the significand to be significant.

425

*/

426

/* The exponent of the largest possible half-precision

427

* subnormal is HALF_EXPONENT_MIN (-14). Exponents larger

428

* than this are normal and handled above. We're going to

429

* shift the significand right by at least this amount.

430

*/

431

nExponentDifference = -(nSingleUnbiasedExponent - HALF_EXPONENT_MIN);

432

433

/* In addition to the shift based on the exponent's value,

434

* the single significand has to be shifted right to fit into

435

* a half-precision significand */

436

nShiftAmount = nExponentDifference + (SINGLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);

437

438

/* Must add 1 in to the possible significand because there is

439

* an implied 1 for normal values and not for subnormal

440

* values. See equations here:

441

* https://en.wikipedia.org/wiki/Single-precision_floating-point_format#Exponent_encoding

442

*/

443

uHalfSignificand = (uSingleSignificand + (1 << SINGLE_NUM_SIGNIFICAND_BITS)) >> nShiftAmount;

444

445

/* If only zero bits get shifted out, this can be converted

446

* to subnormal */

447

if(nSingleUnbiasedExponent < HALF_EXPONENT_MIN &&

448

nSingleUnbiasedExponent >= HALF_EXPONENT_MIN - HALF_NUM_SIGNIFICAND_BITS &&

449

uHalfSignificand << nShiftAmount == uSingleSignificand + (1 << SINGLE_NUM_SIGNIFICAND_BITS)) {

450

/* --- CONVERTABLE TO HALF SUBNORMAL --- */

451

result.uSize = IEEE754_UNION_IS_HALF;

452

result.uValue = IEEE754_AssembleHalf(uSingleSign,

uHalfSignificand,

HALF_EXPONENT_ZERO);

} else {

/* --- DO NOT CONVERT --- */

457

result.uSize = IEEE754_UNION_IS_SINGLE;

458

result.uValue = uSingle;

}

}

}

return result;

}

/**

* @brief Assemble sign, significand and exponent into single precision float.

469

*

470

* @param[in] uSingleSign 0 if positive, 1 if negative

471

* @pararm[in] uSingleSignificand Bits of the significand

472

* @param[in] nSingleUnBiasedExponent Exponent

473

*

474

* This returns the bits for a single-precision float, a binary32 as

475

* specified in IEEE754. It is returned as a uint64_t rather than a

476

* uint32_t or a float for convenience of usage.

477

*/

478

static uint64_t

479

IEEE754_AssembleSingle(uint64_t uSingleSign,

480

uint64_t uSingleSignificand,

481

int64_t nSingleUnBiasedExponent)

482

{

483

uint64_t uSingleBiasedExponent;

484

485

uSingleBiasedExponent = (uint64_t)(nSingleUnBiasedExponent + SINGLE_EXPONENT_BIAS);

486

487

return uSingleSignificand |

488

(uSingleBiasedExponent << SINGLE_EXPONENT_SHIFT) |

489

(uSingleSign << SINGLE_SIGN_SHIFT);

}

/**

* @brief Convert a double-precision float to single-precision.

495

*

496

* @param[in] d The value to convert.

497

*

498

* @returns Either unconverted value or value converted to single-precision.

499

*

500

* This always succeeds. If the value cannot be converted without the

501

* loss of precision, it is not converted.

502

*

503

* This handles all subnormals and NaN payloads.

504

*/

505

static IEEE754_union

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

506

IEEE754_DoubleToSingle(const double d)

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

507

{

508

IEEE754_union Result;

509

int64_t nExponentDifference;

510

int64_t nShiftAmount;

511

uint64_t uSingleSignificand;

512

uint64_t uDroppedBits;

513

514

515

/* Pull the three parts out of the double-precision float. Most

516

* work is done with uint64_t which helps avoid integer promotions

517

* and static analyzer complaints.

518

*/

519

const uint64_t uDouble = CopyDoubleToUint64(d);

520

const uint64_t uDoubleBiasedExponent = (uDouble & DOUBLE_EXPONENT_MASK) >> DOUBLE_EXPONENT_SHIFT;

521

const int64_t nDoubleUnbiasedExponent = (int64_t)uDoubleBiasedExponent - DOUBLE_EXPONENT_BIAS;

522

const uint64_t uDoubleSign = (uDouble & DOUBLE_SIGN_MASK) >> DOUBLE_SIGN_SHIFT;

523

const uint64_t uDoubleSignificand = uDouble & DOUBLE_SIGNIFICAND_MASK;

524

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

525

if(nDoubleUnbiasedExponent == DOUBLE_EXPONENT_ZERO) {

526

if(uDoubleSignificand == 0) {

527

/* --- IS ZERO --- */

528

Result.uSize = IEEE754_UNION_IS_SINGLE;

529

Result.uValue = IEEE754_AssembleSingle(uDoubleSign,

530

0,

531

SINGLE_EXPONENT_ZERO);

Laurence Lundblade

2018-11-02 21:44:06 +0700

[diff] [blame]

532

} else {

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

533

/* --- IS DOUBLE SUBNORMAL --- */

534

/* The largest double subnormal is slightly less than the

535

* largest double normal which is 2^-1022 or

536

* 2.2250738585072014e-308. The smallest single subnormal

537

* is 2^-149 or 1.401298464324817e-45. There is no

538

* overlap so double subnormals can't be converted to

539

* singles of any sort.

540

*/

541

Result.uSize = IEEE754_UNION_IS_DOUBLE;

542

Result.uValue = uDouble;

543

}

544

} else if(nDoubleUnbiasedExponent == DOUBLE_EXPONENT_INF_OR_NAN) {

545

if(uDoubleSignificand == 0) {

546

/* ---- IS INFINITY ---- */

547

Result.uSize = IEEE754_UNION_IS_SINGLE;

548

Result.uValue = IEEE754_AssembleSingle(uDoubleSign,

549

0,

550

SINGLE_EXPONENT_INF_OR_NAN);

551

} else {

552

/* The NaN can only be converted if no payload bits are

553

* lost per RFC 8949 section 4.1 that defines Preferred

554

* Serializaton. Note that Deterministically Encode CBOR

555

* in section 4.2 allows for some variation of this rule,

556

* but at the moment this implementation is of Preferred

557

* Serialization, not CDE. As of December 2023, we are

558

* also expecting an update to CDE. This code may need to

559

* be updated for CDE.

560

*/

561

uDroppedBits = uDoubleSignificand & (DOUBLE_SIGNIFICAND_MASK >> SINGLE_NUM_SIGNIFICAND_BITS);

562

if(uDroppedBits == 0) {

563

/* --- IS CONVERTABLE NAN --- */

564

uSingleSignificand = uDoubleSignificand >> (DOUBLE_NUM_SIGNIFICAND_BITS - SINGLE_NUM_SIGNIFICAND_BITS);

565

Result.uSize = IEEE754_UNION_IS_SINGLE;

566

Result.uValue = IEEE754_AssembleSingle(uDoubleSign,

567

uSingleSignificand,

568

SINGLE_EXPONENT_INF_OR_NAN);

569

} else {

570

/* --- IS UNCONVERTABLE NAN --- */

571

Result.uSize = IEEE754_UNION_IS_DOUBLE;

572

Result.uValue = uDouble;

Laurence Lundblade

2018-11-02 21:44:06 +0700

[diff] [blame]

573

}

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

574

}

575

} else {

576

/* ---- REGULAR NUMBER ---- */

577

/* A regular double can be converted to a regular single if

578

* the double's exponent is in the smaller range of a single

579

* and if no precision is lost in the significand.

580

*/

581

uDroppedBits = uDoubleSignificand & (DOUBLE_SIGNIFICAND_MASK >> SINGLE_NUM_SIGNIFICAND_BITS);

582

if(nDoubleUnbiasedExponent >= SINGLE_EXPONENT_MIN &&

583

nDoubleUnbiasedExponent <= SINGLE_EXPONENT_MAX &&

584

uDroppedBits == 0) {

585

/* --- IS CONVERTABLE TO SINGLE --- */

586

uSingleSignificand = uDoubleSignificand >> (DOUBLE_NUM_SIGNIFICAND_BITS - SINGLE_NUM_SIGNIFICAND_BITS);

587

Result.uSize = IEEE754_UNION_IS_SINGLE;

588

Result.uValue = IEEE754_AssembleSingle(uDoubleSign,

589

uSingleSignificand,

590

nDoubleUnbiasedExponent);

Laurence Lundblade

2018-11-02 21:44:06 +0700

[diff] [blame]

591

} else {

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

592

/* Unable to convert to a single normal. See if it can be

593

* converted to a single subnormal. To do that, the

594

* exponent must be in range and no precision can be lost

595

* in the signficand.

596

*

597

* This is more complicated because the number is not

598

* normalized. The signficand must be shifted

599

* proprotionally to the exponent and 1 must be added

600

* in. See

601

* https://en.wikipedia.org/wiki/Single-precision_floating-point_format#Exponent_encoding

602

*/

603

nExponentDifference = -(nDoubleUnbiasedExponent - SINGLE_EXPONENT_MIN);

604

nShiftAmount = nExponentDifference + (DOUBLE_NUM_SIGNIFICAND_BITS - SINGLE_NUM_SIGNIFICAND_BITS);

605

uSingleSignificand = (uDoubleSignificand + (1ULL << DOUBLE_NUM_SIGNIFICAND_BITS)) >> nShiftAmount;

606

607

if(nDoubleUnbiasedExponent < SINGLE_EXPONENT_MIN &&

608

nDoubleUnbiasedExponent >= SINGLE_EXPONENT_MIN - SINGLE_NUM_SIGNIFICAND_BITS &&

609

uSingleSignificand << nShiftAmount == uDoubleSignificand + (1ULL << DOUBLE_NUM_SIGNIFICAND_BITS)) {

610

/* --- IS CONVERTABLE TO SINGLE SUBNORMAL --- */

611

Result.uSize = IEEE754_UNION_IS_SINGLE;

612

Result.uValue = IEEE754_AssembleSingle(uDoubleSign,

613

uSingleSignificand,

614

SINGLE_EXPONENT_ZERO);

615

} else {

616

/* --- CAN NOT BE CONVERTED --- */

617

Result.uSize = IEEE754_UNION_IS_DOUBLE;

618

Result.uValue = uDouble;

619

}

Laurence Lundblade

2018-11-02 21:44:06 +0700

[diff] [blame]

620

}

Laurence Lundblade

2018-11-02 21:44:06 +0700

[diff] [blame]

621

}

Laurence Lundblade

2018-12-17 16:17:45 -0800

[diff] [blame]

622

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

623

return Result;

Laurence Lundblade

2018-11-02 21:44:06 +0700

[diff] [blame]

624

}

Laurence Lundblade

2018-09-19 11:25:27 -0700

[diff] [blame]

625

626

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

627

/* Public function; see ieee754.h */

628

IEEE754_union

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

629

IEEE754_DoubleToSmaller(const double d,

630

const int bAllowHalfPrecision,

631

const int bNoNanPayload)

Laurence Lundblade

2018-09-19 11:25:27 -0700

[diff] [blame]

632

{

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

633

IEEE754_union result;

Laurence Lundblade

2018-12-17 16:17:45 -0800

[diff] [blame]

634

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

635

result = IEEE754_DoubleToSingle(d);

Laurence Lundblade

2018-12-17 16:17:45 -0800

[diff] [blame]

636

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

637

if(result.uSize == IEEE754_UNION_IS_SINGLE && bAllowHalfPrecision) {

638

/* Cast to uint32_t is OK, because value was just successfully

639

* converted to single. */

640

float uSingle = CopyUint32ToSingle((uint32_t)result.uValue);

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

641

result = IEEE754_SingleToHalf(uSingle, bNoNanPayload);

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

642

}

Laurence Lundblade

2018-09-19 11:25:27 -0700

[diff] [blame]

643

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

644

return result;

Laurence Lundblade

2018-09-19 11:25:27 -0700

[diff] [blame]

645

}

646

Laurence Lundblade

2018-12-17 16:17:45 -0800

[diff] [blame]

647

Laurence Lundblade

2024-03-23 22:37:37 -0700

[diff] [blame]

648

649

/* This returns 64 minus the number of zero bits on the right. It is

650

* is the amount of precision in the 64-bit significand passed in.

651

* When used for 52 and 23-bit significands, subtract 12 and 41

652

* to get their precision.

653

*

654

* The value returned is for a *normalized* number like the

655

* significand of a double. When used for precision for a non-normalized

656

* number like a uint64_t, further computation is required.

657

*

658

* If the significand is 0, then 0 is returned as the precision.*/

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

659

static int

Laurence Lundblade

2024-03-23 22:37:37 -0700

[diff] [blame]

660

IEEE754_Private_CountPrecisionBits(uint64_t uSignigicand)

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

661

{

662

int nNonZeroBitsCount;

663

uint64_t uMask;

Laurence Lundblade

2018-12-17 16:17:45 -0800

[diff] [blame]

664

Laurence Lundblade

2024-03-23 22:37:37 -0700

[diff] [blame]

665

for(nNonZeroBitsCount = 64; nNonZeroBitsCount > 0; nNonZeroBitsCount--) {

666

uMask = 0x01UL << (64 - nNonZeroBitsCount);

667

if(uMask & uSignigicand) {

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

668

break;

669

}

670

}

Laurence Lundblade

2024-03-23 22:37:37 -0700

[diff] [blame]

671

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

672

return nNonZeroBitsCount;

}

Laurence Lundblade

2024-03-23 22:37:37 -0700

[diff] [blame]

676

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

677

/* Public function; see ieee754.h */

678

struct IEEE754_ToInt

679

IEEE754_DoubleToInt(const double d)

680

{

Laurence Lundblade

2024-03-23 22:37:37 -0700

[diff] [blame]

681

int64_t nPrecisionBits;

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

682

struct IEEE754_ToInt Result;

683

uint64_t uInteger;

684

685

/* Pull the three parts out of the double-precision float. Most

686

* work is done with uint64_t which helps avoid integer promotions

687

* and static analyzer complaints.

688

*/

689

const uint64_t uDouble = CopyDoubleToUint64(d);

690

const uint64_t uDoubleBiasedExponent = (uDouble & DOUBLE_EXPONENT_MASK) >> DOUBLE_EXPONENT_SHIFT;

691

/* Cast safe because of mask above; exponents < DOUBLE_EXPONENT_MAX */

692

const int64_t nDoubleUnbiasedExponent = (int64_t)uDoubleBiasedExponent - DOUBLE_EXPONENT_BIAS;

693

const uint64_t uDoubleSignificand = uDouble & DOUBLE_SIGNIFICAND_MASK;

Laurence Lundblade

2024-04-22 10:31:40 -0700

[diff] [blame]

694

const uint64_t bIsNegative = uDouble & DOUBLE_SIGN_MASK;

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

695

696

if(nDoubleUnbiasedExponent == DOUBLE_EXPONENT_ZERO) {

697

if(uDoubleSignificand == 0) {

698

/* --- POSITIVE AND NEGATIVE ZERO --- */

699

Result.integer.un_signed = 0;

700

Result.type = IEEE754_ToInt_IS_UINT;

701

} else {

702

/* --- SUBNORMAL --- */

703

Result.type = IEEE754_ToInt_NO_CONVERSION;

704

}

705

} else if(nDoubleUnbiasedExponent == DOUBLE_EXPONENT_INF_OR_NAN) {

706

if(uDoubleSignificand != 0) {

707

/* --- NAN --- */

708

Result.type = IEEE754_ToInt_NaN; /* dCBOR doesn't care about payload */

709

} else {

Laurence Lundblade

2024-04-22 10:31:40 -0700

[diff] [blame]

710

/* --- INFINITY --- */

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

711

Result.type = IEEE754_ToInt_NO_CONVERSION;

712

}

Laurence Lundblade

2024-04-22 10:31:40 -0700

[diff] [blame]

713

} else if(nDoubleUnbiasedExponent < 0) {

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

714

/* --- Exponent out of range --- */

715

Result.type = IEEE754_ToInt_NO_CONVERSION;

Laurence Lundblade

2024-04-22 10:31:40 -0700

[diff] [blame]

716

} else if(nDoubleUnbiasedExponent >= 64) {

717

if(nDoubleUnbiasedExponent == 64 && uDoubleSignificand == 0 && bIsNegative) {

718

/* Very special case for -18446744073709551616.0 */

719

Result.integer.un_signed = 0; /* No negative 0, use it to indicate 2^64 */

720

Result.type = IEEE754_ToInt_IS_65BIT_NEG;

721

} else {

722

/* --- Exponent out of range --- */

723

Result.type = IEEE754_ToInt_NO_CONVERSION;

724

}

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

725

} else {

Laurence Lundblade

2024-03-23 22:37:37 -0700

[diff] [blame]

726

/* Conversion only fails when the input is too large or is not a

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

727

* whole number, never because of lack of precision because

728

* 64-bit integers always have more precision than the 52-bits

729

* of a double.

730

*/

Laurence Lundblade

2024-03-23 22:37:37 -0700

[diff] [blame]

731

nPrecisionBits = IEEE754_Private_CountPrecisionBits(uDoubleSignificand) -

732

(64-DOUBLE_NUM_SIGNIFICAND_BITS);

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

733

Laurence Lundblade

2024-03-23 22:37:37 -0700

[diff] [blame]

734

if(nPrecisionBits && nPrecisionBits > nDoubleUnbiasedExponent) {

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

735

/* --- Not a whole number --- */

736

Result.type = IEEE754_ToInt_NO_CONVERSION;

737

} else {

738

/* --- CONVERTABLE WHOLE NUMBER --- */

739

/* Add in the one that is implied in normal floats */

740

uInteger = uDoubleSignificand + (1ULL << DOUBLE_NUM_SIGNIFICAND_BITS);

741

/* Factor in the exponent */

742

if(nDoubleUnbiasedExponent < DOUBLE_NUM_SIGNIFICAND_BITS) {

743

/* Numbers less than 2^52 with up to 52 significant bits */

744

uInteger >>= DOUBLE_NUM_SIGNIFICAND_BITS - nDoubleUnbiasedExponent;

745

} else {

746

/* Numbers greater than 2^52 with at most 52 significant bits */

747

uInteger <<= nDoubleUnbiasedExponent - DOUBLE_NUM_SIGNIFICAND_BITS;

748

}

Laurence Lundblade

2024-04-22 10:31:40 -0700

[diff] [blame]

749

if(bIsNegative) {

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

750

/* Cast safe because exponent range check above */

Laurence Lundblade

2024-04-22 10:31:40 -0700

[diff] [blame]

751

if(nDoubleUnbiasedExponent == 63) {

752

Result.integer.un_signed = uInteger;

753

Result.type = IEEE754_ToInt_IS_65BIT_NEG;

754

} else {

755

Result.integer.is_signed = -((int64_t)uInteger);

756

Result.type = IEEE754_ToInt_IS_INT;

757

}

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

758

} else {

759

Result.integer.un_signed = uInteger;

760

Result.type = IEEE754_ToInt_IS_UINT;

}

}

}

return Result;

}

/* Public function; see ieee754.h */

770

struct IEEE754_ToInt

771

IEEE754_SingleToInt(const float f)

772

{

Laurence Lundblade

2024-03-23 22:37:37 -0700

[diff] [blame]

773

int32_t nPrecisionBits;

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

774

struct IEEE754_ToInt Result;

775

uint64_t uInteger;

776

777

/* Pull the three parts out of the single-precision float. Most

778

* work is done with uint32_t which helps avoid integer promotions

779

* and static analyzer complaints.

780

*/

781

const uint32_t uSingle = CopyFloatToUint32(f);

782

const uint32_t uSingleBiasedExponent = (uSingle & SINGLE_EXPONENT_MASK) >> SINGLE_EXPONENT_SHIFT;

783

/* Cast safe because of mask above; exponents < SINGLE_EXPONENT_MAX */

784

const int32_t nSingleUnbiasedExponent = (int32_t)uSingleBiasedExponent - SINGLE_EXPONENT_BIAS;

785

const uint32_t uSingleleSignificand = uSingle & SINGLE_SIGNIFICAND_MASK;

Laurence Lundblade

2024-04-22 10:31:40 -0700

[diff] [blame]

786

const uint64_t bIsNegative = uSingle & SINGLE_SIGN_MASK;

787

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

788

789

if(nSingleUnbiasedExponent == SINGLE_EXPONENT_ZERO) {

790

if(uSingleleSignificand == 0 && !(uSingle & SINGLE_SIGN_MASK)) {

791

/* --- POSITIVE AND NEGATIVE ZERO --- */

792

Result.integer.un_signed = 0;

793

Result.type = IEEE754_ToInt_IS_UINT;

794

} else {

795

/* --- Subnormal --- */

796

Result.type = IEEE754_ToInt_NO_CONVERSION;

797

}

798

} else if(nSingleUnbiasedExponent == SINGLE_EXPONENT_INF_OR_NAN) {

799

/* --- NAN or INFINITY --- */

800

if(uSingleleSignificand != 0) {

801

Result.type = IEEE754_ToInt_NaN; /* dCBOR doesn't care about payload */

802

} else {

803

Result.type = IEEE754_ToInt_NO_CONVERSION;

804

}

Laurence Lundblade

2024-04-22 10:31:40 -0700

[diff] [blame]

805

} else if(nSingleUnbiasedExponent < 0) {

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

806

/* --- Exponent out of range --- */

807

Result.type = IEEE754_ToInt_NO_CONVERSION;

Laurence Lundblade

2024-04-22 10:31:40 -0700

[diff] [blame]

808

} else if(nSingleUnbiasedExponent >= 64) {

809

if(nSingleUnbiasedExponent == 64 && uSingleleSignificand == 0 && bIsNegative) {

810

/* Very special case for -18446744073709551616.0 */

811

Result.integer.un_signed = 0; /* No negative 0, use it to indicate 2^64 */

812

Result.type = IEEE754_ToInt_IS_65BIT_NEG;

813

} else {

814

/* --- Exponent out of range --- */

815

Result.type = IEEE754_ToInt_NO_CONVERSION;

816

}

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

817

} else {

Laurence Lundblade

2024-03-23 22:37:37 -0700

[diff] [blame]

818

/* Conversion only fails when the input is too large or is not a

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

819

* whole number, never because of lack of precision because

Laurence Lundblade

2024-03-23 22:37:37 -0700

[diff] [blame]

820

* 64-bit integers always have more precision than the 23 bits

821

* of a single.

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

822

*/

Laurence Lundblade

2024-03-23 22:37:37 -0700

[diff] [blame]

823

nPrecisionBits = IEEE754_Private_CountPrecisionBits(uSingleleSignificand) -

824

(64 - SINGLE_NUM_SIGNIFICAND_BITS);

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

825

Laurence Lundblade

2024-03-23 22:37:37 -0700

[diff] [blame]

826

if(nPrecisionBits && nPrecisionBits > nSingleUnbiasedExponent) {

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

827

/* --- Not a whole number --- */

828

Result.type = IEEE754_ToInt_NO_CONVERSION;

829

} else {

830

/* --- CONVERTABLE WHOLE NUMBER --- */

831

/* Add in the one that is implied in normal floats */

832

uInteger = uSingleleSignificand + (1ULL << SINGLE_NUM_SIGNIFICAND_BITS);

833

/* Factor in the exponent */

834

if(nSingleUnbiasedExponent < SINGLE_NUM_SIGNIFICAND_BITS) {

835

/* Numbers less than 2^23 with up to 23 significant bits */

836

uInteger >>= SINGLE_NUM_SIGNIFICAND_BITS - nSingleUnbiasedExponent;

837

} else {

838

/* Numbers greater than 2^23 with at most 23 significant bits*/

839

uInteger <<= nSingleUnbiasedExponent - SINGLE_NUM_SIGNIFICAND_BITS;

840

}

Laurence Lundblade

2024-04-22 10:31:40 -0700

[diff] [blame]

841

if(bIsNegative) {

842

/* Cast safe because exponent range check above */

843

if(nSingleUnbiasedExponent == 63) {

844

Result.integer.un_signed = uInteger;

845

Result.type = IEEE754_ToInt_IS_65BIT_NEG;

846

} else {

847

Result.integer.is_signed = -((int64_t)uInteger);

848

Result.type = IEEE754_ToInt_IS_INT;

849

}

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

850

} else {

851

Result.integer.un_signed = uInteger;

852

Result.type = IEEE754_ToInt_IS_UINT;

}

}

}

return Result;

}

Laurence Lundblade

fe09bbf

2020-07-16 12:14:51 -0700

[diff] [blame]

859

Laurence Lundblade

2024-03-23 22:37:37 -0700

[diff] [blame]

860

861

862

/* Public function; see ieee754.h */

863

double

864

IEEE754_UintToDouble(const uint64_t uInt, const int uIsNegative)

865

{

866

int nDoubleUnbiasedExponent;

867

uint64_t uDoubleSignificand;

868

int nPrecisionBits;

869

Laurence Lundblade

0bfa3cb

2024-04-21 13:14:24 -0700

[diff] [blame]

870

if(uInt == 0) {

Laurence Lundblade

9d04d2b

2024-07-24 16:57:43 -0700

[diff] [blame]

871

uDoubleSignificand = 0;

872

nDoubleUnbiasedExponent = DOUBLE_EXPONENT_ZERO;

873

Laurence Lundblade

0bfa3cb

2024-04-21 13:14:24 -0700

[diff] [blame]

874

} else {

875

/* Figure out the exponent and normalize the significand. This is

876

* done by shifting out all leading zero bits and counting them. If

877

* none are shifted out, the exponent is 63. */

878

uDoubleSignificand = uInt;

879

nDoubleUnbiasedExponent = 63;

880

while(1) {

881

if(uDoubleSignificand & 0x8000000000000000UL) {

882

break;

883

}

884

uDoubleSignificand <<= 1;

885

nDoubleUnbiasedExponent--;

886

};

887

888

/* Position significand correctly for a double. Only shift 63 bits

889

* because of the 1 that is present by implication in IEEE 754.*/

890

uDoubleSignificand >>= 63 - DOUBLE_NUM_SIGNIFICAND_BITS;

891

892

/* Subtract 1 which is present by implication in IEEE 754 */

893

uDoubleSignificand -= 1ULL << (DOUBLE_NUM_SIGNIFICAND_BITS);

894

895

nPrecisionBits = IEEE754_Private_CountPrecisionBits(uInt) - (64 - nDoubleUnbiasedExponent);

896

897

if(nPrecisionBits > DOUBLE_NUM_SIGNIFICAND_BITS) {

898

/* Will lose precision if converted */

899

return IEEE754_UINT_TO_DOUBLE_OOB;

Laurence Lundblade

2024-03-23 22:37:37 -0700

[diff] [blame]

900

}

Laurence Lundblade

2024-03-23 22:37:37 -0700

[diff] [blame]

901

}

902

903

return IEEE754_AssembleDouble((uint64_t)uIsNegative,

904

uDoubleSignificand,

905

nDoubleUnbiasedExponent);

906

}

907

Laurence Lundblade

b275cdc

2020-07-12 12:34:38 -0700

[diff] [blame]

908

#endif /* QCBOR_DISABLE_PREFERRED_FLOAT */

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

/* Public function; see ieee754.h */

913

int

Laurence Lundblade

2024-07-12 11:00:20 -0700

[diff] [blame]

914

IEEE754_DoubleHasNaNPayload(const double d)

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

915

{

916

const uint64_t uDouble = CopyDoubleToUint64(d);

917

const uint64_t uDoubleBiasedExponent = (uDouble & DOUBLE_EXPONENT_MASK) >> DOUBLE_EXPONENT_SHIFT;

918

/* Cast safe because of mask above; exponents < DOUBLE_EXPONENT_MAX */

919

const int64_t nDoubleUnbiasedExponent = (int64_t)uDoubleBiasedExponent - DOUBLE_EXPONENT_BIAS;

920

const uint64_t uDoubleSignificand = uDouble & DOUBLE_SIGNIFICAND_MASK;

921

922

if(nDoubleUnbiasedExponent == DOUBLE_EXPONENT_INF_OR_NAN &&

923

uDoubleSignificand != 0 &&

924

uDoubleSignificand != DOUBLE_QUIET_NAN_BIT) {

return 1;

} else {

return 0;

}

}

/* Public function; see ieee754.h */

933

int

Laurence Lundblade

2024-07-12 11:00:20 -0700

[diff] [blame]

934

IEEE754_SingleHasNaNPayload(const float f)

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

935

{

936

const uint32_t uSingle = CopyFloatToUint32(f);

937

const uint32_t uSingleBiasedExponent = (uSingle & SINGLE_EXPONENT_MASK) >> SINGLE_EXPONENT_SHIFT;

938

/* Cast safe because of mask above; exponents < SINGLE_EXPONENT_MAX */

939

const int32_t nSingleUnbiasedExponent = (int32_t)uSingleBiasedExponent - SINGLE_EXPONENT_BIAS;

Laurence Lundblade

2024-07-12 11:00:20 -0700

[diff] [blame]

940

const uint32_t uSingleSignificand = uSingle & SINGLE_SIGNIFICAND_MASK;

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

941

942

if(nSingleUnbiasedExponent == SINGLE_EXPONENT_INF_OR_NAN &&

Laurence Lundblade

2024-07-12 11:00:20 -0700

[diff] [blame]

943

uSingleSignificand != 0 &&

944

uSingleSignificand != SINGLE_QUIET_NAN_BIT) {

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

return 1;

} else {

return 0;

}

}

Laurence Lundblade