Blame - src/ieee754.c - mirror/QCBOR - TrustedFirmware Git Browser

2024-01-07 19:17:52 -0700

[diff] [blame]

1

/* ==========================================================================

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

2

* ieee754.c -- floating-point conversion for half, double & single-precision

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

*

*

* SPDX-License-Identifier: BSD-3-Clause

8

*

9

* See BSD-3-Clause license in README.md

10

*

11

* Created on 7/23/18

12

* ========================================================================== */

Laurence Lundblade

cc2ed34

2018-09-22 17:29:55 -0700

[diff] [blame]

13

Máté Tóth-Pál

ef5f07a

2021-09-17 19:31:37 +0200

[diff] [blame]

14

#include "qcbor/qcbor_common.h"

15

Laurence Lundblade

2018-09-19 11:25:27 -0700

[diff] [blame]

16

#include "ieee754.h"

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

17

#include <string.h> /* For memcpy() */

Laurence Lundblade

2018-09-19 11:25:27 -0700

[diff] [blame]

18

Laurence Lundblade

8db3d3e

2018-09-29 11:46:37 -0700

[diff] [blame]

19

Laurence Lundblade

2018-09-19 11:25:27 -0700

[diff] [blame]

20

/*

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

21

* This has long lines and is easier to read because of

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

22

* them. Some coding guidelines prefer 80 column lines (can they not

23

* afford big displays?).

24

*

25

* This code works solely using shifts and masks and thus has no

26

* dependency on any math libraries. It can even work if the CPU

27

* doesn't have any floating-point support, though that isn't the most

28

* useful thing to do.

29

*

30

* The memcpy() dependency is only for CopyFloatToUint32() and friends

31

* which only is needed to avoid type punning when converting the

32

* actual float bits to an unsigned value so the bit shifts and masks

33

* can work.

34

*

35

* The references used to write this code:

36

*

37

* IEEE 754-2008, particularly section 3.6 and 6.2.1

38

*

39

* https://en.wikipedia.org/wiki/IEEE_754 and subordinate pages

40

*

41

* https://stackoverflow.com/questions/19800415/why-does-ieee-754-reserve-so-many-nan-values

42

*

43

* https://stackoverflow.com/questions/46073295/implicit-type-promotion-rules

44

*

45

* https://stackoverflow.com/questions/589575/what-does-the-c-standard-state-the-size-of-int-long-type-to-be

46

*

47

* IEEE754_FloatToDouble(uint32_t uFloat) was created but is not

48

* needed. It can be retrieved from github history if needed.

Laurence Lundblade

2018-09-19 11:25:27 -0700

[diff] [blame]

*/

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

52

53

54

/* ----- Half Precsion ----------- */

Laurence Lundblade

2018-09-19 11:25:27 -0700

[diff] [blame]

55

#define HALF_NUM_SIGNIFICAND_BITS (10)

56

#define HALF_NUM_EXPONENT_BITS (5)

57

#define HALF_NUM_SIGN_BITS (1)

58

59

#define HALF_SIGNIFICAND_SHIFT (0)

60

#define HALF_EXPONENT_SHIFT (HALF_NUM_SIGNIFICAND_BITS)

61

#define HALF_SIGN_SHIFT (HALF_NUM_SIGNIFICAND_BITS + HALF_NUM_EXPONENT_BITS)

62

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

63

#define HALF_SIGNIFICAND_MASK (0x3ffU) // The lower 10 bits

Laurence Lundblade

06350ea

2020-01-27 19:32:40 -0800

[diff] [blame]

64

#define HALF_EXPONENT_MASK (0x1fU << HALF_EXPONENT_SHIFT) // 0x7c00 5 bits of exponent

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

65

#define HALF_SIGN_MASK (0x01U << HALF_SIGN_SHIFT) // 0x8000 1 bit of sign

Laurence Lundblade

06350ea

2020-01-27 19:32:40 -0800

[diff] [blame]

66

#define HALF_QUIET_NAN_BIT (0x01U << (HALF_NUM_SIGNIFICAND_BITS-1)) // 0x0200

Laurence Lundblade

2018-09-19 11:25:27 -0700

[diff] [blame]

67

68

/* Biased Biased Unbiased Use

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

69

* 0x00 0 -15 0 and subnormal

70

* 0x01 1 -14 Smallest normal exponent

71

* 0x1e 30 15 Largest normal exponent

72

* 0x1F 31 16 NaN and Infinity */

Laurence Lundblade

2018-09-19 11:25:27 -0700

[diff] [blame]

73

#define HALF_EXPONENT_BIAS (15)

74

#define HALF_EXPONENT_MAX (HALF_EXPONENT_BIAS) // 15 Unbiased

75

#define HALF_EXPONENT_MIN (-HALF_EXPONENT_BIAS+1) // -14 Unbiased

76

#define HALF_EXPONENT_ZERO (-HALF_EXPONENT_BIAS) // -15 Unbiased

77

#define HALF_EXPONENT_INF_OR_NAN (HALF_EXPONENT_BIAS+1) // 16 Unbiased

78

79

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

80

/* ------ Single-Precision -------- */

Laurence Lundblade

2018-09-19 11:25:27 -0700

[diff] [blame]

81

#define SINGLE_NUM_SIGNIFICAND_BITS (23)

82

#define SINGLE_NUM_EXPONENT_BITS (8)

83

#define SINGLE_NUM_SIGN_BITS (1)

84

85

#define SINGLE_SIGNIFICAND_SHIFT (0)

86

#define SINGLE_EXPONENT_SHIFT (SINGLE_NUM_SIGNIFICAND_BITS)

87

#define SINGLE_SIGN_SHIFT (SINGLE_NUM_SIGNIFICAND_BITS + SINGLE_NUM_EXPONENT_BITS)

88

Laurence Lundblade

06350ea

2020-01-27 19:32:40 -0800

[diff] [blame]

89

#define SINGLE_SIGNIFICAND_MASK (0x7fffffU) // The lower 23 bits

90

#define SINGLE_EXPONENT_MASK (0xffU << SINGLE_EXPONENT_SHIFT) // 8 bits of exponent

91

#define SINGLE_SIGN_MASK (0x01U << SINGLE_SIGN_SHIFT) // 1 bit of sign

92

#define SINGLE_QUIET_NAN_BIT (0x01U << (SINGLE_NUM_SIGNIFICAND_BITS-1))

Laurence Lundblade

2018-09-19 11:25:27 -0700

[diff] [blame]

93

94

/* Biased Biased Unbiased Use

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

95

* 0x0000 0 -127 0 and subnormal

96

* 0x0001 1 -126 Smallest normal exponent

97

* 0x7f 127 0 1

98

* 0xfe 254 127 Largest normal exponent

99

* 0xff 255 128 NaN and Infinity */

Laurence Lundblade

2018-09-19 11:25:27 -0700

[diff] [blame]

100

#define SINGLE_EXPONENT_BIAS (127)

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

101

#define SINGLE_EXPONENT_MAX (SINGLE_EXPONENT_BIAS)

102

#define SINGLE_EXPONENT_MIN (-SINGLE_EXPONENT_BIAS+1)

103

#define SINGLE_EXPONENT_ZERO (-SINGLE_EXPONENT_BIAS)

104

#define SINGLE_EXPONENT_INF_OR_NAN (SINGLE_EXPONENT_BIAS+1)

Laurence Lundblade

2018-09-19 11:25:27 -0700

[diff] [blame]

105

106

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

107

/* --------- Double-Precision ---------- */

Laurence Lundblade

2018-09-19 11:25:27 -0700

[diff] [blame]

108

#define DOUBLE_NUM_SIGNIFICAND_BITS (52)

109

#define DOUBLE_NUM_EXPONENT_BITS (11)

110

#define DOUBLE_NUM_SIGN_BITS (1)

111

112

#define DOUBLE_SIGNIFICAND_SHIFT (0)

113

#define DOUBLE_EXPONENT_SHIFT (DOUBLE_NUM_SIGNIFICAND_BITS)

114

#define DOUBLE_SIGN_SHIFT (DOUBLE_NUM_SIGNIFICAND_BITS + DOUBLE_NUM_EXPONENT_BITS)

115

Laurence Lundblade

8db3d3e

2018-09-29 11:46:37 -0700

[diff] [blame]

116

#define DOUBLE_SIGNIFICAND_MASK (0xfffffffffffffULL) // The lower 52 bits

117

#define DOUBLE_EXPONENT_MASK (0x7ffULL << DOUBLE_EXPONENT_SHIFT) // 11 bits of exponent

118

#define DOUBLE_SIGN_MASK (0x01ULL << DOUBLE_SIGN_SHIFT) // 1 bit of sign

119

#define DOUBLE_QUIET_NAN_BIT (0x01ULL << (DOUBLE_NUM_SIGNIFICAND_BITS-1))

120

Laurence Lundblade

2018-09-19 11:25:27 -0700

[diff] [blame]

121

122

/* Biased Biased Unbiased Use

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

123

* 0x00000000 0 -1023 0 and subnormal

124

* 0x00000001 1 -1022 Smallest normal exponent

125

* 0x000007fe 2046 1023 Largest normal exponent

126

* 0x000007ff 2047 1024 NaN and Infinity */

Laurence Lundblade

2018-09-19 11:25:27 -0700

[diff] [blame]

127

#define DOUBLE_EXPONENT_BIAS (1023)

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

128

#define DOUBLE_EXPONENT_MAX (DOUBLE_EXPONENT_BIAS)

129

#define DOUBLE_EXPONENT_MIN (-DOUBLE_EXPONENT_BIAS+1)

130

#define DOUBLE_EXPONENT_ZERO (-DOUBLE_EXPONENT_BIAS)

131

#define DOUBLE_EXPONENT_INF_OR_NAN (DOUBLE_EXPONENT_BIAS+1)

132

Laurence Lundblade

2018-09-19 11:25:27 -0700

[diff] [blame]

/*

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

137

* Convenient functions to avoid type punning, compiler warnings and

138

* such. The optimizer reduces them to a simple assignment. This is a

139

* crusty corner of C. It shouldn't be this hard.

140

*

141

* These are also in UsefulBuf.h under a different name. They are copied

142

* here to avoid a dependency on UsefulBuf.h. There is no object code

143

* size impact because these always optimze down to a simple assignment.

Laurence Lundblade

2018-09-19 11:25:27 -0700

[diff] [blame]

144

*/

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

145

static inline uint32_t

146

CopyFloatToUint32(float f)

Laurence Lundblade

2018-09-19 11:25:27 -0700

[diff] [blame]

147

{

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

148

uint32_t u32;

149

memcpy(&u32, &f, sizeof(uint32_t));

150

return u32;

Laurence Lundblade

2018-09-19 11:25:27 -0700

[diff] [blame]

151

}

152

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

153

static inline uint64_t

154

CopyDoubleToUint64(double d)

Laurence Lundblade

2018-09-19 11:25:27 -0700

[diff] [blame]

155

{

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

156

uint64_t u64;

157

memcpy(&u64, &d, sizeof(uint64_t));

158

return u64;

Laurence Lundblade

2018-09-19 11:25:27 -0700

[diff] [blame]

159

}

160

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

161

162

#ifndef QCBOR_DISABLE_PREFERRED_FLOAT

163

164

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

165

static inline double

166

CopyUint64ToDouble(uint64_t u64)

Laurence Lundblade

2018-11-02 21:44:06 +0700

[diff] [blame]

167

{

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

168

double d;

169

memcpy(&d, &u64, sizeof(uint64_t));

return d;

}

static inline float

CopyUint32ToSingle(uint32_t u32)

175

{

176

float f;

177

memcpy(&f, &u32, sizeof(uint32_t));

178

return f;

Laurence Lundblade

2018-11-02 21:44:06 +0700

[diff] [blame]

179

}

Laurence Lundblade

2018-09-19 11:25:27 -0700

[diff] [blame]

180

181

Laurence Lundblade

2018-12-17 16:17:45 -0800

[diff] [blame]

182

183

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

184

/**

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

185

* @brief Assemble sign, significand and exponent into double precision float.

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

186

*

187

* @param[in] uDoubleSign 0 if positive, 1 if negative

188

* @pararm[in] uDoubleSignificand Bits of the significand

189

* @param[in] nDoubleUnBiasedExponent Exponent

190

*

191

* This returns the bits for a single-precision float, a binary64

192

* as specified in IEEE754.

Laurence Lundblade

fe09bbf

2020-07-16 12:14:51 -0700

[diff] [blame]

193

*/

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

194

static double

195

IEEE754_AssembleDouble(uint64_t uDoubleSign,

196

uint64_t uDoubleSignificand,

197

int64_t nDoubleUnBiasedExponent)

Laurence Lundblade

2018-11-02 21:44:06 +0700

[diff] [blame]

198

{

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

199

uint64_t uDoubleBiasedExponent;

200

201

uDoubleBiasedExponent = (uint64_t)(nDoubleUnBiasedExponent + DOUBLE_EXPONENT_BIAS);

202

203

return CopyUint64ToDouble(uDoubleSignificand |

204

(uDoubleBiasedExponent << DOUBLE_EXPONENT_SHIFT) |

205

(uDoubleSign << DOUBLE_SIGN_SHIFT));

206

}

Laurence Lundblade

2018-12-17 16:17:45 -0800

[diff] [blame]

207

208

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

209

/* Public function; see ieee754.h */

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

210

double

211

IEEE754_HalfToDouble(uint16_t uHalfPrecision)

212

{

213

uint64_t uDoubleSignificand;

214

int64_t nDoubleUnBiasedExponent;

215

double dResult;

216

217

/* Pull out the three parts of the half-precision float. Do all

218

* the work in 64 bits because that is what the end result is. It

219

* may give smaller code size and will keep static analyzers

220

* happier.

221

*/

222

const uint64_t uHalfSignificand = uHalfPrecision & HALF_SIGNIFICAND_MASK;

223

const uint64_t uHalfBiasedExponent = (uHalfPrecision & HALF_EXPONENT_MASK) >> HALF_EXPONENT_SHIFT;

224

const int64_t nHalfUnBiasedExponent = (int64_t)uHalfBiasedExponent - HALF_EXPONENT_BIAS;

225

const uint64_t uHalfSign = (uHalfPrecision & HALF_SIGN_MASK) >> HALF_SIGN_SHIFT;

226

227

if(nHalfUnBiasedExponent == HALF_EXPONENT_ZERO) {

228

/* 0 or subnormal */

229

if(uHalfSignificand) {

230

/* --- SUBNORMAL --- */

231

/* A half-precision subnormal can always be converted to a

232

* normal double-precision float because the ranges line up.

233

* The exponent of a subnormal starts out at the min exponent

234

* for a normal. As the sub normal significand bits are

235

* shifted, left to normalize, the exponent is

236

* decremented. Shifting continues until fully normalized.

237

*/

238

nDoubleUnBiasedExponent = HALF_EXPONENT_MIN;

239

uDoubleSignificand = uHalfSignificand;

240

do {

241

uDoubleSignificand <<= 1;

242

nDoubleUnBiasedExponent--;

243

} while ((uDoubleSignificand & (1ULL << HALF_NUM_SIGNIFICAND_BITS)) == 0);

244

/* A normal has an implied 1 in the most significant

245

* position that a subnormal doesn't. */

246

uDoubleSignificand -= 1ULL << HALF_NUM_SIGNIFICAND_BITS;

247

/* Must shift into place for a double significand */

248

uDoubleSignificand <<= DOUBLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS;

249

250

dResult = IEEE754_AssembleDouble(uHalfSign,

251

uDoubleSignificand,

252

nDoubleUnBiasedExponent);

253

} else {

254

/* --- ZERO --- */

255

dResult = IEEE754_AssembleDouble(uHalfSign,

256

0,

257

DOUBLE_EXPONENT_ZERO);

258

}

259

} else if(nHalfUnBiasedExponent == HALF_EXPONENT_INF_OR_NAN) {

260

/* NaN or Inifinity */

261

if(uHalfSignificand) {

262

/* --- NaN --- */

263

/* Half-precision payloads always fit into double precision

264

* payloads. They are shifted left the same as a normal

265

* number significand.

266

*/

267

uDoubleSignificand = uHalfSignificand << (DOUBLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);

268

dResult = IEEE754_AssembleDouble(uHalfSign,

269

uDoubleSignificand,

270

DOUBLE_EXPONENT_INF_OR_NAN);

271

} else {

272

/* --- INFINITY --- */

273

dResult = IEEE754_AssembleDouble(uHalfSign,

274

0,

275

DOUBLE_EXPONENT_INF_OR_NAN);

276

}

277

} else {

278

/* --- NORMAL NUMBER --- */

279

uDoubleSignificand = uHalfSignificand << (DOUBLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);

280

dResult = IEEE754_AssembleDouble(uHalfSign,

281

uDoubleSignificand,

282

nHalfUnBiasedExponent);

}

return dResult;

}

/**

* @brief Assemble sign, significand and exponent into single precision float.

291

*

292

* @param[in] uHalfSign 0 if positive, 1 if negative

293

* @pararm[in] uHalfSignificand Bits of the significand

294

* @param[in] nHalfUnBiasedExponent Exponent

295

*

296

* This returns the bits for a single-precision float, a binary32 as

297

* specified in IEEE754. It is returned as a uint64_t rather than a

298

* uint32_t or a float for convenience of usage.

299

*/

300

static uint32_t

301

IEEE754_AssembleHalf(uint32_t uHalfSign,

302

uint32_t uHalfSignificand,

303

int32_t nHalfUnBiasedExponent)

304

{

305

uint32_t uHalfUnbiasedExponent;

306

307

uHalfUnbiasedExponent = (uint32_t)(nHalfUnBiasedExponent + HALF_EXPONENT_BIAS);

308

309

return uHalfSignificand |

310

(uHalfUnbiasedExponent << HALF_EXPONENT_SHIFT) |

311

(uHalfSign << HALF_SIGN_SHIFT);

}

/* Public function; see ieee754.h */

316

IEEE754_union

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

317

IEEE754_SingleToHalf(const float f, const int bNoNaNPayload)

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

318

{

319

IEEE754_union result;

320

uint32_t uDroppedBits;

321

int32_t nExponentDifference;

322

int32_t nShiftAmount;

323

uint32_t uHalfSignificand;

324

325

/* Pull the three parts out of the double-precision float Most work

326

* is done with uint32_t which helps avoid integer promotions and

327

* static analyzer complaints.

328

*/

329

const uint32_t uSingle = CopyFloatToUint32(f);

330

const uint32_t uSingleBiasedExponent = (uSingle & SINGLE_EXPONENT_MASK) >> SINGLE_EXPONENT_SHIFT;

331

const int32_t nSingleUnbiasedExponent = (int32_t)uSingleBiasedExponent - SINGLE_EXPONENT_BIAS;

332

const uint32_t uSingleSignificand = uSingle & SINGLE_SIGNIFICAND_MASK;

333

const uint32_t uSingleSign = (uSingle & SINGLE_SIGN_MASK) >> SINGLE_SIGN_SHIFT;

334

335

if(nSingleUnbiasedExponent == SINGLE_EXPONENT_ZERO) {

336

if(uSingleSignificand == 0) {

337

/* --- IS ZERO --- */

338

result.uSize = IEEE754_UNION_IS_HALF;

339

result.uValue = IEEE754_AssembleHalf(uSingleSign,

0,

HALF_EXPONENT_ZERO);

} else {

/* --- IS SINGLE SUBNORMAL --- */

344

/* The largest single subnormal is slightly less than the

345

* largest single normal which is 2^-149 or

346

* 2.2040517676619426e-38. The smallest half subnormal is

347

* 2^-14 or 5.9604644775390625E-8. There is no overlap so

348

* single subnormals can't be converted to halfs of any sort.

349

*/

350

result.uSize = IEEE754_UNION_IS_SINGLE;

351

result.uValue = uSingle;

352

}

353

} else if(nSingleUnbiasedExponent == SINGLE_EXPONENT_INF_OR_NAN) {

354

if(uSingleSignificand == 0) {

355

/* ---- IS INFINITY ---- */

356

result.uSize = IEEE754_UNION_IS_HALF;

357

result.uValue = IEEE754_AssembleHalf(uSingleSign, 0, HALF_EXPONENT_INF_OR_NAN);

358

} else {

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

359

if(bNoNaNPayload) {

360

/* --- REQUIRE CANNONICAL NAN --- */

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

361

result.uSize = IEEE754_UNION_IS_HALF;

362

result.uValue = IEEE754_AssembleHalf(uSingleSign,

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

363

HALF_QUIET_NAN_BIT,

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

364

HALF_EXPONENT_INF_OR_NAN);

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

365

} else {

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

366

/* The NaN can only be converted if no payload bits are lost

367

* per RFC 8949 section 4.1 that defines Preferred

368

* Serializaton. Note that Deterministically Encode CBOR in

369

* section 4.2 allows for some variation of this rule, but at

370

* the moment this implementation is of Preferred

371

* Serialization, not CDE. As of December 2023, we are also

372

* expecting an update to CDE. This code may need to be

373

* updated for CDE.

374

*/

375

uDroppedBits = uSingleSignificand & (SINGLE_SIGNIFICAND_MASK >> HALF_NUM_SIGNIFICAND_BITS);

376

if(uDroppedBits == 0) {

377

/* --- IS CONVERTABLE NAN --- */

378

uHalfSignificand = uSingleSignificand >> (SINGLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);

379

result.uSize = IEEE754_UNION_IS_HALF;

380

result.uValue = IEEE754_AssembleHalf(uSingleSign,

381

uHalfSignificand,

382

HALF_EXPONENT_INF_OR_NAN);

383

384

} else {

385

/* --- IS UNCONVERTABLE NAN --- */

386

result.uSize = IEEE754_UNION_IS_SINGLE;

387

result.uValue = uSingle;

388

}

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

}

}

} else {

/* ---- REGULAR NUMBER ---- */

393

/* A regular single can be converted to a regular half if the

394

* single's exponent is in the smaller range of a half and if no

395

* precision is lost in the significand.

396

*/

397

if(nSingleUnbiasedExponent >= HALF_EXPONENT_MIN &&

398

nSingleUnbiasedExponent <= HALF_EXPONENT_MAX &&

399

(uSingleSignificand & (SINGLE_SIGNIFICAND_MASK >> HALF_NUM_SIGNIFICAND_BITS)) == 0) {

400

uHalfSignificand = uSingleSignificand >> (SINGLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);

401

402

/* --- CONVERT TO HALF NORMAL --- */

403

result.uSize = IEEE754_UNION_IS_HALF;

404

result.uValue = IEEE754_AssembleHalf(uSingleSign,

405

uHalfSignificand,

406

nSingleUnbiasedExponent);

407

} else {

408

/* Unable to convert to a half normal. See if it can be

409

* converted to a half subnormal. To do that, the exponent

410

* must be in range and no precision can be lost in the

411

* signficand.

412

*

413

* This is more complicated because the number is not

414

* normalized. The signficand must be shifted proprotionally

415

* to the exponent and 1 must be added in. See

416

* https://en.wikipedia.org/wiki/Single-precision_floating-point_format#Exponent_encoding

417

*

418

* Exponents -14 to -24 map to a shift of 0 to 10 of the

419

* significand. The largest value of a half subnormal has an

420

* exponent of -14. Subnormals are not normalized like

421

* normals meaning they lose precision as the numbers get

422

* smaller. Normals don't lose precision because the exponent

423

* allows all the bits of the significand to be significant.

424

*/

425

/* The exponent of the largest possible half-precision

426

* subnormal is HALF_EXPONENT_MIN (-14). Exponents larger

427

* than this are normal and handled above. We're going to

428

* shift the significand right by at least this amount.

429

*/

430

nExponentDifference = -(nSingleUnbiasedExponent - HALF_EXPONENT_MIN);

431

432

/* In addition to the shift based on the exponent's value,

433

* the single significand has to be shifted right to fit into

434

* a half-precision significand */

435

nShiftAmount = nExponentDifference + (SINGLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);

436

437

/* Must add 1 in to the possible significand because there is

438

* an implied 1 for normal values and not for subnormal

439

* values. See equations here:

440

* https://en.wikipedia.org/wiki/Single-precision_floating-point_format#Exponent_encoding

441

*/

442

uHalfSignificand = (uSingleSignificand + (1 << SINGLE_NUM_SIGNIFICAND_BITS)) >> nShiftAmount;

443

444

/* If only zero bits get shifted out, this can be converted

445

* to subnormal */

446

if(nSingleUnbiasedExponent < HALF_EXPONENT_MIN &&

447

nSingleUnbiasedExponent >= HALF_EXPONENT_MIN - HALF_NUM_SIGNIFICAND_BITS &&

448

uHalfSignificand << nShiftAmount == uSingleSignificand + (1 << SINGLE_NUM_SIGNIFICAND_BITS)) {

449

/* --- CONVERTABLE TO HALF SUBNORMAL --- */

450

result.uSize = IEEE754_UNION_IS_HALF;

451

result.uValue = IEEE754_AssembleHalf(uSingleSign,

uHalfSignificand,

HALF_EXPONENT_ZERO);

} else {

/* --- DO NOT CONVERT --- */

456

result.uSize = IEEE754_UNION_IS_SINGLE;

457

result.uValue = uSingle;

}

}

}

return result;

}

/**

* @brief Assemble sign, significand and exponent into single precision float.

468

*

469

* @param[in] uSingleSign 0 if positive, 1 if negative

470

* @pararm[in] uSingleSignificand Bits of the significand

471

* @param[in] nSingleUnBiasedExponent Exponent

472

*

473

* This returns the bits for a single-precision float, a binary32 as

474

* specified in IEEE754. It is returned as a uint64_t rather than a

475

* uint32_t or a float for convenience of usage.

476

*/

477

static uint64_t

478

IEEE754_AssembleSingle(uint64_t uSingleSign,

479

uint64_t uSingleSignificand,

480

int64_t nSingleUnBiasedExponent)

481

{

482

uint64_t uSingleBiasedExponent;

483

484

uSingleBiasedExponent = (uint64_t)(nSingleUnBiasedExponent + SINGLE_EXPONENT_BIAS);

485

486

return uSingleSignificand |

487

(uSingleBiasedExponent << SINGLE_EXPONENT_SHIFT) |

488

(uSingleSign << SINGLE_SIGN_SHIFT);

}

/**

* @brief Convert a double-precision float to single-precision.

494

*

495

* @param[in] d The value to convert.

496

*

497

* @returns Either unconverted value or value converted to single-precision.

498

*

499

* This always succeeds. If the value cannot be converted without the

500

* loss of precision, it is not converted.

501

*

502

* This handles all subnormals and NaN payloads.

503

*/

504

static IEEE754_union

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

505

IEEE754_DoubleToSingle(const double d)

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

506

{

507

IEEE754_union Result;

508

int64_t nExponentDifference;

509

int64_t nShiftAmount;

510

uint64_t uSingleSignificand;

511

uint64_t uDroppedBits;

512

513

514

/* Pull the three parts out of the double-precision float. Most

515

* work is done with uint64_t which helps avoid integer promotions

516

* and static analyzer complaints.

517

*/

518

const uint64_t uDouble = CopyDoubleToUint64(d);

519

const uint64_t uDoubleBiasedExponent = (uDouble & DOUBLE_EXPONENT_MASK) >> DOUBLE_EXPONENT_SHIFT;

520

const int64_t nDoubleUnbiasedExponent = (int64_t)uDoubleBiasedExponent - DOUBLE_EXPONENT_BIAS;

521

const uint64_t uDoubleSign = (uDouble & DOUBLE_SIGN_MASK) >> DOUBLE_SIGN_SHIFT;

522

const uint64_t uDoubleSignificand = uDouble & DOUBLE_SIGNIFICAND_MASK;

523

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

524

if(nDoubleUnbiasedExponent == DOUBLE_EXPONENT_ZERO) {

525

if(uDoubleSignificand == 0) {

526

/* --- IS ZERO --- */

527

Result.uSize = IEEE754_UNION_IS_SINGLE;

528

Result.uValue = IEEE754_AssembleSingle(uDoubleSign,

529

0,

530

SINGLE_EXPONENT_ZERO);

Laurence Lundblade

2018-11-02 21:44:06 +0700

[diff] [blame]

531

} else {

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

532

/* --- IS DOUBLE SUBNORMAL --- */

533

/* The largest double subnormal is slightly less than the

534

* largest double normal which is 2^-1022 or

535

* 2.2250738585072014e-308. The smallest single subnormal

536

* is 2^-149 or 1.401298464324817e-45. There is no

537

* overlap so double subnormals can't be converted to

538

* singles of any sort.

539

*/

540

Result.uSize = IEEE754_UNION_IS_DOUBLE;

541

Result.uValue = uDouble;

542

}

543

} else if(nDoubleUnbiasedExponent == DOUBLE_EXPONENT_INF_OR_NAN) {

544

if(uDoubleSignificand == 0) {

545

/* ---- IS INFINITY ---- */

546

Result.uSize = IEEE754_UNION_IS_SINGLE;

547

Result.uValue = IEEE754_AssembleSingle(uDoubleSign,

548

0,

549

SINGLE_EXPONENT_INF_OR_NAN);

550

} else {

551

/* The NaN can only be converted if no payload bits are

552

* lost per RFC 8949 section 4.1 that defines Preferred

553

* Serializaton. Note that Deterministically Encode CBOR

554

* in section 4.2 allows for some variation of this rule,

555

* but at the moment this implementation is of Preferred

556

* Serialization, not CDE. As of December 2023, we are

557

* also expecting an update to CDE. This code may need to

558

* be updated for CDE.

559

*/

560

uDroppedBits = uDoubleSignificand & (DOUBLE_SIGNIFICAND_MASK >> SINGLE_NUM_SIGNIFICAND_BITS);

561

if(uDroppedBits == 0) {

562

/* --- IS CONVERTABLE NAN --- */

563

uSingleSignificand = uDoubleSignificand >> (DOUBLE_NUM_SIGNIFICAND_BITS - SINGLE_NUM_SIGNIFICAND_BITS);

564

Result.uSize = IEEE754_UNION_IS_SINGLE;

565

Result.uValue = IEEE754_AssembleSingle(uDoubleSign,

566

uSingleSignificand,

567

SINGLE_EXPONENT_INF_OR_NAN);

568

} else {

569

/* --- IS UNCONVERTABLE NAN --- */

570

Result.uSize = IEEE754_UNION_IS_DOUBLE;

571

Result.uValue = uDouble;

Laurence Lundblade

2018-11-02 21:44:06 +0700

[diff] [blame]

572

}

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

573

}

574

} else {

575

/* ---- REGULAR NUMBER ---- */

576

/* A regular double can be converted to a regular single if

577

* the double's exponent is in the smaller range of a single

578

* and if no precision is lost in the significand.

579

*/

580

uDroppedBits = uDoubleSignificand & (DOUBLE_SIGNIFICAND_MASK >> SINGLE_NUM_SIGNIFICAND_BITS);

581

if(nDoubleUnbiasedExponent >= SINGLE_EXPONENT_MIN &&

582

nDoubleUnbiasedExponent <= SINGLE_EXPONENT_MAX &&

583

uDroppedBits == 0) {

584

/* --- IS CONVERTABLE TO SINGLE --- */

585

uSingleSignificand = uDoubleSignificand >> (DOUBLE_NUM_SIGNIFICAND_BITS - SINGLE_NUM_SIGNIFICAND_BITS);

586

Result.uSize = IEEE754_UNION_IS_SINGLE;

587

Result.uValue = IEEE754_AssembleSingle(uDoubleSign,

588

uSingleSignificand,

589

nDoubleUnbiasedExponent);

Laurence Lundblade

2018-11-02 21:44:06 +0700

[diff] [blame]

590

} else {

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

591

/* Unable to convert to a single normal. See if it can be

592

* converted to a single subnormal. To do that, the

593

* exponent must be in range and no precision can be lost

594

* in the signficand.

595

*

596

* This is more complicated because the number is not

597

* normalized. The signficand must be shifted

598

* proprotionally to the exponent and 1 must be added

599

* in. See

600

* https://en.wikipedia.org/wiki/Single-precision_floating-point_format#Exponent_encoding

601

*/

602

nExponentDifference = -(nDoubleUnbiasedExponent - SINGLE_EXPONENT_MIN);

603

nShiftAmount = nExponentDifference + (DOUBLE_NUM_SIGNIFICAND_BITS - SINGLE_NUM_SIGNIFICAND_BITS);

604

uSingleSignificand = (uDoubleSignificand + (1ULL << DOUBLE_NUM_SIGNIFICAND_BITS)) >> nShiftAmount;

605

606

if(nDoubleUnbiasedExponent < SINGLE_EXPONENT_MIN &&

607

nDoubleUnbiasedExponent >= SINGLE_EXPONENT_MIN - SINGLE_NUM_SIGNIFICAND_BITS &&

608

uSingleSignificand << nShiftAmount == uDoubleSignificand + (1ULL << DOUBLE_NUM_SIGNIFICAND_BITS)) {

609

/* --- IS CONVERTABLE TO SINGLE SUBNORMAL --- */

610

Result.uSize = IEEE754_UNION_IS_SINGLE;

611

Result.uValue = IEEE754_AssembleSingle(uDoubleSign,

612

uSingleSignificand,

613

SINGLE_EXPONENT_ZERO);

614

} else {

615

/* --- CAN NOT BE CONVERTED --- */

616

Result.uSize = IEEE754_UNION_IS_DOUBLE;

617

Result.uValue = uDouble;

618

}

Laurence Lundblade

2018-11-02 21:44:06 +0700

[diff] [blame]

619

}

Laurence Lundblade

2018-11-02 21:44:06 +0700

[diff] [blame]

620

}

Laurence Lundblade

2018-12-17 16:17:45 -0800

[diff] [blame]

621

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

622

return Result;

Laurence Lundblade

2018-11-02 21:44:06 +0700

[diff] [blame]

623

}

Laurence Lundblade

2018-09-19 11:25:27 -0700

[diff] [blame]

624

625

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

626

/* Public function; see ieee754.h */

627

IEEE754_union

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

628

IEEE754_DoubleToSmaller(const double d,

629

const int bAllowHalfPrecision,

630

const int bNoNanPayload)

Laurence Lundblade

2018-09-19 11:25:27 -0700

[diff] [blame]

631

{

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

632

IEEE754_union result;

Laurence Lundblade

2018-12-17 16:17:45 -0800

[diff] [blame]

633

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

634

result = IEEE754_DoubleToSingle(d);

Laurence Lundblade

2018-12-17 16:17:45 -0800

[diff] [blame]

635

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

636

if(result.uSize == IEEE754_UNION_IS_SINGLE && bAllowHalfPrecision) {

637

/* Cast to uint32_t is OK, because value was just successfully

638

* converted to single. */

639

float uSingle = CopyUint32ToSingle((uint32_t)result.uValue);

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

640

result = IEEE754_SingleToHalf(uSingle, bNoNanPayload);

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

641

}

Laurence Lundblade

2018-09-19 11:25:27 -0700

[diff] [blame]

642

Laurence Lundblade

2024-01-07 19:17:52 -0700

[diff] [blame]

643

return result;

Laurence Lundblade

2018-09-19 11:25:27 -0700

[diff] [blame]

644

}

645

Laurence Lundblade

2018-12-17 16:17:45 -0800

[diff] [blame]

646

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

647

static int

648

IEEE754_Private_CountNonZeroBits(int nMax, uint64_t uTarget)

649

{

650

int nNonZeroBitsCount;

651

uint64_t uMask;

Laurence Lundblade

2018-12-17 16:17:45 -0800

[diff] [blame]

652

Laurence Lundblade

2024-02-17 20:38:55 -0800

[diff] [blame]

653

for(nNonZeroBitsCount = nMax; nNonZeroBitsCount > 0; nNonZeroBitsCount--) {

654

uMask = (0x01UL << nMax) >> nNonZeroBitsCount;

655

if(uMask & uTarget) {

break;

}

}

return nNonZeroBitsCount;

}

/* Public function; see ieee754.h */

664

struct IEEE754_ToInt

665

IEEE754_DoubleToInt(const double d)

666

{

667

int64_t nNonZeroBitsCount;

668

struct IEEE754_ToInt Result;

669

uint64_t uInteger;

670

671

/* Pull the three parts out of the double-precision float. Most

672

* work is done with uint64_t which helps avoid integer promotions

673

* and static analyzer complaints.

674

*/

675

const uint64_t uDouble = CopyDoubleToUint64(d);

676

const uint64_t uDoubleBiasedExponent = (uDouble & DOUBLE_EXPONENT_MASK) >> DOUBLE_EXPONENT_SHIFT;

677

/* Cast safe because of mask above; exponents < DOUBLE_EXPONENT_MAX */

678

const int64_t nDoubleUnbiasedExponent = (int64_t)uDoubleBiasedExponent - DOUBLE_EXPONENT_BIAS;

679

const uint64_t uDoubleSignificand = uDouble & DOUBLE_SIGNIFICAND_MASK;

680

681

if(nDoubleUnbiasedExponent == DOUBLE_EXPONENT_ZERO) {

682

if(uDoubleSignificand == 0) {

683

/* --- POSITIVE AND NEGATIVE ZERO --- */

684

Result.integer.un_signed = 0;

685

Result.type = IEEE754_ToInt_IS_UINT;

686

} else {

687

/* --- SUBNORMAL --- */

688

Result.type = IEEE754_ToInt_NO_CONVERSION;

689

}

690

} else if(nDoubleUnbiasedExponent == DOUBLE_EXPONENT_INF_OR_NAN) {

691

if(uDoubleSignificand != 0) {

692

/* --- NAN --- */

693

Result.type = IEEE754_ToInt_NaN; /* dCBOR doesn't care about payload */

694

} else {

695

/* --- INIFINITY --- */

696

Result.type = IEEE754_ToInt_NO_CONVERSION;

697

}

698

} else if(nDoubleUnbiasedExponent < 0 ||

699

(nDoubleUnbiasedExponent >= ((uDouble & DOUBLE_SIGN_MASK) ? 63 : 64))) {

700

/* --- Exponent out of range --- */

701

Result.type = IEEE754_ToInt_NO_CONVERSION;

702

} else {

703

/* Count down from 52 to the number of bits that are not zero in

704

* the significand. This counts from the least significant bit

705

* until a non-zero bit is found to know if it is a whole

706

* number.

707

*

708

* Conversion only fails when the input is too large or is not a

709

* whole number, never because of lack of precision because

710

* 64-bit integers always have more precision than the 52-bits

711

* of a double.

712

*/

713

nNonZeroBitsCount = IEEE754_Private_CountNonZeroBits(DOUBLE_NUM_SIGNIFICAND_BITS, uDoubleSignificand);

714

715

if(nNonZeroBitsCount && nNonZeroBitsCount > nDoubleUnbiasedExponent) {

716

/* --- Not a whole number --- */

717

Result.type = IEEE754_ToInt_NO_CONVERSION;

718

} else {

719

/* --- CONVERTABLE WHOLE NUMBER --- */

720

/* Add in the one that is implied in normal floats */

721

uInteger = uDoubleSignificand + (1ULL << DOUBLE_NUM_SIGNIFICAND_BITS);

722

/* Factor in the exponent */

723

if(nDoubleUnbiasedExponent < DOUBLE_NUM_SIGNIFICAND_BITS) {

724

/* Numbers less than 2^52 with up to 52 significant bits */

725

uInteger >>= DOUBLE_NUM_SIGNIFICAND_BITS - nDoubleUnbiasedExponent;

726

} else {

727

/* Numbers greater than 2^52 with at most 52 significant bits */

728

uInteger <<= nDoubleUnbiasedExponent - DOUBLE_NUM_SIGNIFICAND_BITS;

729

}

730

if(uDouble & DOUBLE_SIGN_MASK) {

731

/* Cast safe because exponent range check above */

732

Result.integer.is_signed = -((int64_t)uInteger);

733

Result.type = IEEE754_ToInt_IS_INT;

734

} else {

735

Result.integer.un_signed = uInteger;

736

Result.type = IEEE754_ToInt_IS_UINT;

}

}

}

return Result;

}

/* Public function; see ieee754.h */

746

struct IEEE754_ToInt

747

IEEE754_SingleToInt(const float f)

748

{

749

int32_t nNonZeroBitsCount;

750

struct IEEE754_ToInt Result;

751

uint64_t uInteger;

752

753

/* Pull the three parts out of the single-precision float. Most

754

* work is done with uint32_t which helps avoid integer promotions

755

* and static analyzer complaints.

756

*/

757

const uint32_t uSingle = CopyFloatToUint32(f);

758

const uint32_t uSingleBiasedExponent = (uSingle & SINGLE_EXPONENT_MASK) >> SINGLE_EXPONENT_SHIFT;

759

/* Cast safe because of mask above; exponents < SINGLE_EXPONENT_MAX */

760

const int32_t nSingleUnbiasedExponent = (int32_t)uSingleBiasedExponent - SINGLE_EXPONENT_BIAS;

761

const uint32_t uSingleleSignificand = uSingle & SINGLE_SIGNIFICAND_MASK;

762

763

if(nSingleUnbiasedExponent == SINGLE_EXPONENT_ZERO) {

764

if(uSingleleSignificand == 0 && !(uSingle & SINGLE_SIGN_MASK)) {

765

/* --- POSITIVE AND NEGATIVE ZERO --- */

766

Result.integer.un_signed = 0;

767

Result.type = IEEE754_ToInt_IS_UINT;

768

} else {

769

/* --- Subnormal --- */

770

Result.type = IEEE754_ToInt_NO_CONVERSION;

771

}

772

} else if(nSingleUnbiasedExponent == SINGLE_EXPONENT_INF_OR_NAN) {

773

/* --- NAN or INFINITY --- */

774

if(uSingleleSignificand != 0) {

775

Result.type = IEEE754_ToInt_NaN; /* dCBOR doesn't care about payload */

776

} else {

777

Result.type = IEEE754_ToInt_NO_CONVERSION;

778

}

779

} else if(nSingleUnbiasedExponent < 0 ||

780

(nSingleUnbiasedExponent >= ((uSingle & SINGLE_SIGN_MASK) ? 63 : 64))) {

781

/* --- Exponent out of range --- */

782

Result.type = IEEE754_ToInt_NO_CONVERSION;

783

} else {

784

/* Count down from 23 to the number of bits that are not zero in

785

* the significand. This counts from the least significant bit

786

* until a non-zero bit is found.

787

*

788

* Conversion only fails when the input is too large or is not a

789

* whole number, never because of lack of precision because

790

* 64-bit integers always have more precision than the 52-bits

791

* of a double.

792

*/

793

nNonZeroBitsCount = IEEE754_Private_CountNonZeroBits(SINGLE_NUM_SIGNIFICAND_BITS, uSingleleSignificand);

794

795

if(nNonZeroBitsCount && nNonZeroBitsCount > nSingleUnbiasedExponent) {

796

/* --- Not a whole number --- */

797

Result.type = IEEE754_ToInt_NO_CONVERSION;

798

} else {

799

/* --- CONVERTABLE WHOLE NUMBER --- */

800

/* Add in the one that is implied in normal floats */

801

uInteger = uSingleleSignificand + (1ULL << SINGLE_NUM_SIGNIFICAND_BITS);

802

/* Factor in the exponent */

803

if(nSingleUnbiasedExponent < SINGLE_NUM_SIGNIFICAND_BITS) {

804

/* Numbers less than 2^23 with up to 23 significant bits */

805

uInteger >>= SINGLE_NUM_SIGNIFICAND_BITS - nSingleUnbiasedExponent;

806

} else {

807

/* Numbers greater than 2^23 with at most 23 significant bits*/

808

uInteger <<= nSingleUnbiasedExponent - SINGLE_NUM_SIGNIFICAND_BITS;

809

}

810

if(uSingle & SINGLE_SIGN_MASK) {

811

Result.integer.is_signed = -((int64_t)uInteger);

812

Result.type = IEEE754_ToInt_IS_INT;

813

} else {

814

Result.integer.un_signed = uInteger;

815

Result.type = IEEE754_ToInt_IS_UINT;

}

}

}

return Result;

}

Laurence Lundblade

fe09bbf

2020-07-16 12:14:51 -0700

[diff] [blame]

822

Laurence Lundblade

b275cdc

2020-07-12 12:34:38 -0700

[diff] [blame]

823

#endif /* QCBOR_DISABLE_PREFERRED_FLOAT */

Laurence Lundblade