blob: eaf75aa3efb373add2ff016c65527acd1a96b04d [file] [log] [blame]
Laurence Lundblade6ed34222018-12-18 09:46:23 -08001/*==============================================================================
2 float_tests.c -- tests for float and conversion to/from half-precision
3
4 Copyright (c) 2018-2019, Laurence Lundblade. All rights reserved.
5
6 SPDX-License-Identifier: BSD-3-Clause
7
8 See BSD-3-Clause license in README.md
9
10 Created on 9/19/18
11 ==============================================================================*/
12
13#include "float_tests.h"
14#include "qcbor.h"
15#include "half_to_double_from_rfc7049.h"
16#include <math.h> // For INFINITY and NAN and isnan()
17
18
19
20static const uint8_t spExpectedHalf[] = {
21 0xB1,
22 0x64,
23 0x7A, 0x65, 0x72, 0x6F,
24 0xF9, 0x00, 0x00, // 0.000
25 0x6A,
26 0x69, 0x6E, 0x66, 0x69, 0x6E, 0x69, 0x74, 0x69, 0x74, 0x79,
27 0xF9, 0x7C, 0x00, // Infinity
28 0x73,
29 0x6E, 0x65, 0x67, 0x61, 0x74, 0x69, 0x76, 0x65, 0x20, 0x69, 0x6E, 0x66, 0x69, 0x6E, 0x69, 0x74, 0x69, 0x74, 0x79,
30 0xF9, 0xFC, 0x00, // -Inifinity
31 0x63,
32 0x4E, 0x61, 0x4E,
33 0xF9, 0x7E, 0x00, // NaN
34 0x63,
35 0x6F, 0x6E, 0x65,
36 0xF9, 0x3C, 0x00, // 1.0
37 0x69,
38 0x6F, 0x6E, 0x65, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64,
39 0xF9, 0x35, 0x55, // 0.333251953125
40 0x76,
41 0x6C, 0x61, 0x72, 0x67, 0x65, 0x73, 0x74, 0x20, 0x68, 0x61, 0x6C, 0x66, 0x2D, 0x70, 0x72, 0x65, 0x63, 0x69, 0x73, 0x69, 0x6F, 0x6E,
42 0xF9, 0x7B, 0xFF, // 65504.0
43 0x78, 0x18, 0x74, 0x6F, 0x6F, 0x2D, 0x6C, 0x61, 0x72, 0x67, 0x65, 0x20, 0x68, 0x61, 0x6C, 0x66, 0x2D, 0x70, 0x72, 0x65, 0x63, 0x69, 0x73, 0x69, 0x6F, 0x6E,
44 0xF9, 0x7C, 0x00, // Infinity
45 0x72,
46 0x73, 0x6D, 0x61, 0x6C, 0x6C, 0x65, 0x73, 0x74, 0x20, 0x73, 0x75, 0x62, 0x6E, 0x6F, 0x72, 0x6D, 0x61, 0x6C,
47 0xF9, 0x00, 0x01, // 0.000000059604
48 0x6F,
49 0x73, 0x6D, 0x61, 0x6C, 0x6C, 0x65, 0x73, 0x74, 0x20, 0x6E, 0x6F, 0x72, 0x6D, 0x61, 0x6C,
50 0xF9, 0x03, 0xFF, // 0.0000609755516
51 0x71,
52 0x62, 0x69, 0x67, 0x67, 0x65, 0x73, 0x74, 0x20, 0x73, 0x75, 0x62, 0x6E, 0x6F, 0x72, 0x6D, 0x61, 0x6C,
53 0xF9, 0x04, 0x00, // 0.000061988
54 0x70,
55 0x73, 0x75, 0x62, 0x6E, 0x6F, 0x72, 0x6D, 0x61, 0x6C, 0x20, 0x73, 0x69, 0x6E, 0x67, 0x6C, 0x65,
56 0xF9, 0x00, 0x00,
57 0x03,
58 0xF9, 0xC0, 0x00, // -2
59 0x04,
60 0xF9, 0x7E, 0x00, // qNaN
61 0x05,
62 0xF9, 0x7C, 0x01, // sNaN
63 0x06,
64 0xF9, 0x7E, 0x0F, // qNaN with payload 0x0f
65 0x07,
66 0xF9, 0x7C, 0x0F, // sNaN with payload 0x0f
67
68};
69
70
71int HalfPrecisionDecodeBasicTests()
72{
73 UsefulBufC HalfPrecision = UsefulBuf_FROM_BYTE_ARRAY_LITERAL(spExpectedHalf);
74
75 QCBORDecodeContext DC;
76 QCBORDecode_Init(&DC, HalfPrecision, 0);
77
78 QCBORItem Item;
79
80 QCBORDecode_GetNext(&DC, &Item);
81 if(Item.uDataType != QCBOR_TYPE_MAP) {
82 return -1;
83 }
84
85 QCBORDecode_GetNext(&DC, &Item);
86 if(Item.uDataType != QCBOR_TYPE_DOUBLE || Item.val.dfnum != 0.0F) {
87 return -2;
88 }
89
90 QCBORDecode_GetNext(&DC, &Item);
91 if(Item.uDataType != QCBOR_TYPE_DOUBLE || Item.val.dfnum != INFINITY) {
92 return -3;
93 }
94
95 QCBORDecode_GetNext(&DC, &Item);
96 if(Item.uDataType != QCBOR_TYPE_DOUBLE || Item.val.dfnum != -INFINITY) {
97 return -4;
98 }
99
100 QCBORDecode_GetNext(&DC, &Item); // TODO, is this really converting right? It is carrying payload, but this confuses things.
101 if(Item.uDataType != QCBOR_TYPE_DOUBLE || !isnan(Item.val.dfnum)) {
102 return -5;
103 }
104
105 QCBORDecode_GetNext(&DC, &Item);
106 if(Item.uDataType != QCBOR_TYPE_DOUBLE || Item.val.dfnum != 1.0F) {
107 return -6;
108 }
109
110 QCBORDecode_GetNext(&DC, &Item);
111 if(Item.uDataType != QCBOR_TYPE_DOUBLE || Item.val.dfnum != 0.333251953125F) {
112 return -7;
113 }
114
115 QCBORDecode_GetNext(&DC, &Item);
116 if(Item.uDataType != QCBOR_TYPE_DOUBLE || Item.val.dfnum != 65504.0F) {
117 return -8;
118 }
119
120 QCBORDecode_GetNext(&DC, &Item);
121 if(Item.uDataType != QCBOR_TYPE_DOUBLE || Item.val.dfnum != INFINITY) {
122 return -9;
123 }
124
125 QCBORDecode_GetNext(&DC, &Item); // TODO: check this
126 if(Item.uDataType != QCBOR_TYPE_DOUBLE || Item.val.dfnum != 0.0000000596046448F) {
127 return -10;
128 }
129
130 QCBORDecode_GetNext(&DC, &Item); // TODO: check this
131 if(Item.uDataType != QCBOR_TYPE_DOUBLE || Item.val.dfnum != 0.0000609755516F) {
132 return -11;
133 }
134
135 QCBORDecode_GetNext(&DC, &Item); // TODO check this
136 if(Item.uDataType != QCBOR_TYPE_DOUBLE || Item.val.dfnum != 0.0000610351563F) {
137 return -12;
138 }
139
140 QCBORDecode_GetNext(&DC, &Item);
141 if(Item.uDataType != QCBOR_TYPE_DOUBLE || Item.val.dfnum != 0) {
142 return -13;
143 }
144
145 QCBORDecode_GetNext(&DC, &Item);
146 if(Item.uDataType != QCBOR_TYPE_DOUBLE || Item.val.dfnum != -2.0F) {
147 return -14;
148 }
149
150 // TODO: double check these four tests
151 QCBORDecode_GetNext(&DC, &Item); // qNaN
152 if(Item.uDataType != QCBOR_TYPE_DOUBLE || UsefulBufUtil_CopyDoubleToUint64(Item.val.dfnum) != 0x7ff8000000000000ULL) {
153 return -15;
154 }
155 QCBORDecode_GetNext(&DC, &Item); // sNaN
156 if(Item.uDataType != QCBOR_TYPE_DOUBLE || UsefulBufUtil_CopyDoubleToUint64(Item.val.dfnum) != 0x7ff0000000000001ULL) {
157 return -16;
158 }
159 QCBORDecode_GetNext(&DC, &Item); // qNaN with payload 0x0f
160 if(Item.uDataType != QCBOR_TYPE_DOUBLE || UsefulBufUtil_CopyDoubleToUint64(Item.val.dfnum) != 0x7ff800000000000fULL) {
161 return -17;
162 }
163 QCBORDecode_GetNext(&DC, &Item); // sNaN with payload 0x0f
164 if(Item.uDataType != QCBOR_TYPE_DOUBLE || UsefulBufUtil_CopyDoubleToUint64(Item.val.dfnum) != 0x7ff000000000000fULL) {
165 return -18;
166 }
167
168 if(QCBORDecode_Finish(&DC)) {
169 return -19;
170 }
171
172 return 0;
173}
174
175
176
177
178int HalfPrecisionAgainstRFCCodeTest()
179{
180 for(uint32_t uHalfP = 0; uHalfP < 0xffff; uHalfP += 60) {
181 unsigned char x[2];
182 x[1] = uHalfP & 0xff;
183 x[0] = uHalfP >> 8;
184 double d = decode_half(x);
185
186 // Contruct the CBOR for the half-precision float by hand
187 UsefulBuf_MAKE_STACK_UB(__xx, 3);
188 UsefulOutBuf UOB;
189 UsefulOutBuf_Init(&UOB, __xx);
190
191 const uint8_t uHalfPrecInitialByte = HALF_PREC_FLOAT + (CBOR_MAJOR_TYPE_SIMPLE << 5); // 0xf9
192 UsefulOutBuf_AppendByte(&UOB, uHalfPrecInitialByte); // The initial byte for a half-precision float
193 UsefulOutBuf_AppendUint16(&UOB, (uint16_t)uHalfP);
194
195 // Now parse the hand-constructed CBOR. This will invoke the conversion to a float
196 QCBORDecodeContext DC;
197 QCBORDecode_Init(&DC, UsefulOutBuf_OutUBuf(&UOB), 0);
198
199 QCBORItem Item;
200
201 QCBORDecode_GetNext(&DC, &Item);
202 if(Item.uDataType != QCBOR_TYPE_DOUBLE) {
203 return -1;
204 }
205
206 //printf("%04x QCBOR:%15.15f RFC: %15.15f (%8x)\n", uHalfP,Item.val.fnum, d , UsefulBufUtil_CopyFloatToUint32(d));
207
208 if(isnan(d)) {
209 // The RFC code uses the native instructions which may or may not
210 // handle sNaN, qNaN and NaN payloads correctly. This test just
211 // makes sure it is a NaN and doesn't worry about the type of NaN
212 if(!isnan(Item.val.dfnum)) {
213 return -3;
214 }
215 } else {
216 if(Item.val.dfnum != d) {
217 return -2;
218 }
219 }
220 }
221 return 0;
222}
223
224
225/*
226 {"zero": 0.0,
227 "negative zero": -0.0,
228 "infinitity": Infinity,
229 "negative infinitity": -Infinity,
230 "NaN": NaN,
231 "one": 1.0,
232 "one third": 0.333251953125,
233 "largest half-precision": 65504.0,
234 "largest half-precision point one": 65504.1,
235 "too-large half-precision": 65536.0,
236 "smallest subnormal": 5.96046448e-8,
237 "smallest normal": 0.00006103515261202119,
238 "biggest subnormal": 0.00006103515625,
239 "subnormal single": 4.00000646641519e-40,
240 3: -2.0,
241 "large single exp": 2.5521177519070385e+38,
242 "too-large single exp": 5.104235503814077e+38,
243 "biggest single with prec": 16777216.0,
244 "first single with prec loss": 16777217.0,
245 1: "fin"}
246 */
247static const uint8_t spExpectedSmallest[] = {
248 0xB4, 0x64, 0x7A, 0x65, 0x72, 0x6F, 0xF9, 0x00, 0x00, 0x6D,
249 0x6E, 0x65, 0x67, 0x61, 0x74, 0x69, 0x76, 0x65, 0x20, 0x7A,
250 0x65, 0x72, 0x6F, 0xF9, 0x80, 0x00, 0x6A, 0x69, 0x6E, 0x66,
251 0x69, 0x6E, 0x69, 0x74, 0x69, 0x74, 0x79, 0xF9, 0x7C, 0x00,
252 0x73, 0x6E, 0x65, 0x67, 0x61, 0x74, 0x69, 0x76, 0x65, 0x20,
253 0x69, 0x6E, 0x66, 0x69, 0x6E, 0x69, 0x74, 0x69, 0x74, 0x79,
254 0xF9, 0xFC, 0x00, 0x63, 0x4E, 0x61, 0x4E, 0xF9, 0x7E, 0x00,
255 0x63, 0x6F, 0x6E, 0x65, 0xF9, 0x3C, 0x00, 0x69, 0x6F, 0x6E,
256 0x65, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0xF9, 0x35, 0x55,
257 0x76, 0x6C, 0x61, 0x72, 0x67, 0x65, 0x73, 0x74, 0x20, 0x68,
258 0x61, 0x6C, 0x66, 0x2D, 0x70, 0x72, 0x65, 0x63, 0x69, 0x73,
259 0x69, 0x6F, 0x6E, 0xF9, 0x7B, 0xFF, 0x78, 0x20, 0x6C, 0x61,
260 0x72, 0x67, 0x65, 0x73, 0x74, 0x20, 0x68, 0x61, 0x6C, 0x66,
261 0x2D, 0x70, 0x72, 0x65, 0x63, 0x69, 0x73, 0x69, 0x6F, 0x6E,
262 0x20, 0x70, 0x6F, 0x69, 0x6E, 0x74, 0x20, 0x6F, 0x6E, 0x65,
263 0xFB, 0x40, 0xEF, 0xFC, 0x03, 0x33, 0x33, 0x33, 0x33, 0x78,
264 0x18, 0x74, 0x6F, 0x6F, 0x2D, 0x6C, 0x61, 0x72, 0x67, 0x65,
265 0x20, 0x68, 0x61, 0x6C, 0x66, 0x2D, 0x70, 0x72, 0x65, 0x63,
266 0x69, 0x73, 0x69, 0x6F, 0x6E, 0xFA, 0x47, 0x80, 0x00, 0x00,
267 0x72, 0x73, 0x6D, 0x61, 0x6C, 0x6C, 0x65, 0x73, 0x74, 0x20,
268 0x73, 0x75, 0x62, 0x6E, 0x6F, 0x72, 0x6D, 0x61, 0x6C, 0xFB,
269 0x3E, 0x70, 0x00, 0x00, 0x00, 0x1C, 0x5F, 0x68, 0x6F, 0x73,
270 0x6D, 0x61, 0x6C, 0x6C, 0x65, 0x73, 0x74, 0x20, 0x6E, 0x6F,
271 0x72, 0x6D, 0x61, 0x6C, 0xFA, 0x38, 0x7F, 0xFF, 0xFF, 0x71,
272 0x62, 0x69, 0x67, 0x67, 0x65, 0x73, 0x74, 0x20, 0x73, 0x75,
273 0x62, 0x6E, 0x6F, 0x72, 0x6D, 0x61, 0x6C, 0xF9, 0x04, 0x00,
274 0x70, 0x73, 0x75, 0x62, 0x6E, 0x6F, 0x72, 0x6D, 0x61, 0x6C,
275 0x20, 0x73, 0x69, 0x6E, 0x67, 0x6C, 0x65, 0xFB, 0x37, 0xC1,
276 0x6C, 0x28, 0x00, 0x00, 0x00, 0x00, 0x03, 0xF9, 0xC0, 0x00,
277 0x70, 0x6C, 0x61, 0x72, 0x67, 0x65, 0x20, 0x73, 0x69, 0x6E,
278 0x67, 0x6C, 0x65, 0x20, 0x65, 0x78, 0x70, 0xFA, 0x7F, 0x40,
279 0x00, 0x00, 0x74, 0x74, 0x6F, 0x6F, 0x2D, 0x6C, 0x61, 0x72,
280 0x67, 0x65, 0x20, 0x73, 0x69, 0x6E, 0x67, 0x6C, 0x65, 0x20,
281 0x65, 0x78, 0x70, 0xFB, 0x47, 0xF8, 0x00, 0x00, 0x00, 0x00,
282 0x00, 0x00, 0x78, 0x18, 0x62, 0x69, 0x67, 0x67, 0x65, 0x73,
283 0x74, 0x20, 0x73, 0x69, 0x6E, 0x67, 0x6C, 0x65, 0x20, 0x77,
284 0x69, 0x74, 0x68, 0x20, 0x70, 0x72, 0x65, 0x63, 0xFA, 0x4B,
285 0x80, 0x00, 0x00, 0x78, 0x1B, 0x66, 0x69, 0x72, 0x73, 0x74,
286 0x20, 0x73, 0x69, 0x6E, 0x67, 0x6C, 0x65, 0x20, 0x77, 0x69,
287 0x74, 0x68, 0x20, 0x70, 0x72, 0x65, 0x63, 0x20, 0x6C, 0x6F,
288 0x73, 0x73, 0xFB, 0x41, 0x70, 0x00, 0x00, 0x10, 0x00, 0x00,
289 0x00, 0x01, 0x63, 0x66, 0x69, 0x6E
290};
291
292
293int DoubleAsSmallestTest()
294{
295 UsefulBuf_MAKE_STACK_UB(EncodedHalfsMem, 420);
296
297#define QCBOREncode_AddDoubleAsSmallestToMap QCBOREncode_AddDoubleToMap
298#define QCBOREncode_AddDoubleAsSmallestToMapN QCBOREncode_AddDoubleToMapN
299
300
301 QCBOREncodeContext EC;
302 QCBOREncode_Init(&EC, EncodedHalfsMem);
303 // These are mostly from https://en.wikipedia.org/wiki/Half-precision_floating-point_format
304 QCBOREncode_OpenMap(&EC);
305 // 64 # text(4)
306 // 7A65726F # "zero"
307 // F9 0000 # primitive(0)
308 QCBOREncode_AddDoubleAsSmallestToMap(&EC, "zero", 0.00);
309
310 // 64 # text(4)
311 // 7A65726F # "negative zero"
312 // F9 8000 # primitive(0)
313 QCBOREncode_AddDoubleAsSmallestToMap(&EC, "negative zero", -0.00);
314
315 // 6A # text(10)
316 // 696E66696E6974697479 # "infinitity"
317 // F9 7C00 # primitive(31744)
318 QCBOREncode_AddDoubleAsSmallestToMap(&EC, "infinitity", INFINITY);
319
320 // 73 # text(19)
321 // 6E6567617469766520696E66696E6974697479 # "negative infinitity"
322 // F9 FC00 # primitive(64512)
323 QCBOREncode_AddDoubleAsSmallestToMap(&EC, "negative infinitity", -INFINITY);
324
325 // 63 # text(3)
326 // 4E614E # "NaN"
327 // F9 7E00 # primitive(32256)
328 QCBOREncode_AddDoubleAsSmallestToMap(&EC, "NaN", NAN);
329
330 // TODO: test a few NaN variants
331
332 // 63 # text(3)
333 // 6F6E65 # "one"
334 // F9 3C00 # primitive(15360)
335 QCBOREncode_AddDoubleAsSmallestToMap(&EC, "one", 1.0);
336
337 // 69 # text(9)
338 // 6F6E65207468697264 # "one third"
339 // F9 3555 # primitive(13653)
340 QCBOREncode_AddDoubleAsSmallestToMap(&EC, "one third", 0.333251953125);
341
342 // 76 # text(22)
343 // 6C6172676573742068616C662D707265636973696F6E # "largest half-precision"
344 // F9 7BFF # primitive(31743)
345 QCBOREncode_AddDoubleAsSmallestToMap(&EC, "largest half-precision",65504.0);
346
347 // 76 # text(22)
348 // 6C6172676573742068616C662D707265636973696F6E # "largest half-precision"
349 // F9 7BFF # primitive(31743)
350 QCBOREncode_AddDoubleAsSmallestToMap(&EC, "largest half-precision point one",65504.1);
351
352 // Float 65536.0F is 0x47800000 in hex. It has an exponent of 16, which is larger than 15, the largest half-precision exponent
353 // 78 18 # text(24)
354 // 746F6F2D6C617267652068616C662D707265636973696F6E # "too-large half-precision"
355 // FA 47800000 # primitive(31743)
356 QCBOREncode_AddDoubleAsSmallestToMap(&EC, "too-large half-precision", 65536.0);
357
358 // The smallest possible half-precision subnormal, but digitis are lost converting
359 // to half, so this turns into a double
360 // 72 # text(18)
361 // 736D616C6C657374207375626E6F726D616C # "smallest subnormal"
362 // FB 3E700000001C5F68 # primitive(4499096027744984936)
363 QCBOREncode_AddDoubleAsSmallestToMap(&EC, "smallest subnormal", 0.0000000596046448);
364
365 // The smallest possible half-precision snormal, but digitis are lost converting
366 // to half, so this turns into a single TODO: confirm this is right
367 // 6F # text(15)
368 // 736D616C6C657374206E6F726D616C # "smallest normal"
369 // FA 387FFFFF # primitive(947912703)
370 // in hex single is 0x387fffff, exponent -15, significand 7fffff
371 QCBOREncode_AddDoubleAsSmallestToMap(&EC, "smallest normal", 0.0000610351526F);
372
373 // 71 # text(17)
374 // 62696767657374207375626E6F726D616C # "biggest subnormal"
375 // F9 0400 # primitive(1024)
376 // in hex single is 0x38800000, exponent -14, significand 0
377 QCBOREncode_AddDoubleAsSmallestToMap(&EC, "biggest subnormal", 0.0000610351563F);
378
379 // 70 # text(16)
380 // 7375626E6F726D616C2073696E676C65 # "subnormal single"
381 // FB 37C16C2800000000 # primitive(4017611261645684736)
382 QCBOREncode_AddDoubleAsSmallestToMap(&EC, "subnormal single", 4e-40F);
383
384 // 03 # unsigned(3)
385 // F9 C000 # primitive(49152)
386 QCBOREncode_AddDoubleAsSmallestToMapN(&EC, 3, -2.0);
387
388 // 70 # text(16)
389 // 6C617267652073696E676C6520657870 # "large single exp"
390 // FA 7F400000 # primitive(2134900736)
391 // (0x01LL << (DOUBLE_NUM_SIGNIFICAND_BITS-1)) | ((127LL + DOUBLE_EXPONENT_BIAS) << DOUBLE_EXPONENT_SHIFT);
392 QCBOREncode_AddDoubleAsSmallestToMap(&EC, "large single exp", 2.5521177519070385E+38); // Exponent fits single
393
394 // 74 # text(20)
395 // 746F6F2D6C617267652073696E676C6520657870 # "too-large single exp"
396 // FB 47F8000000000000 # primitive(5185894970917126144)
397 // (0x01LL << (DOUBLE_NUM_SIGNIFICAND_BITS-1)) | ((128LL + DOUBLE_EXPONENT_BIAS) << DOUBLE_EXPONENT_SHIFT);
398 QCBOREncode_AddDoubleAsSmallestToMap(&EC, "too-large single exp", 5.104235503814077E+38); // Exponent too large for single
399
400 // 66 # text(6)
401 // 646664666465 # "dfdfde"
402 // FA 4B800000 # primitive(1266679808)
403 QCBOREncode_AddDoubleAsSmallestToMap(&EC, "biggest single with prec",16777216); // Single with no precision loss
404
405 // 78 18 # text(24)
406 // 626967676573742073696E676C6520776974682070726563 # "biggest single with prec"
407 // FA 4B800000 # primitive(1266679808)
408 QCBOREncode_AddDoubleAsSmallestToMap(&EC, "first single with prec loss",16777217); // Double becuase of precision loss
409
410 // Just a convenient marker when cutting and pasting encoded CBOR
411 QCBOREncode_AddSZStringToMapN(&EC, 1, "fin");
412
413 QCBOREncode_CloseMap(&EC);
414
415 UsefulBufC EncodedHalfs;
416 int nReturn = QCBOREncode_Finish(&EC, &EncodedHalfs);
417 if(nReturn) {
418 return -1;
419 }
420
421 if(UsefulBuf_Compare(EncodedHalfs, UsefulBuf_FROM_BYTE_ARRAY_LITERAL(spExpectedSmallest))) {
422 return -3;
423 }
424
425 return 0;
426}
427
428
429
430#ifdef NAN_EXPERIMENT
431/*
432 Code for checking what the double to float cast does with
433 NaNs. Not run as part of tests. Keep it around to
434 be able to check various platforms and CPUs.
435 */
436
437#define DOUBLE_NUM_SIGNIFICAND_BITS (52)
438#define DOUBLE_NUM_EXPONENT_BITS (11)
439#define DOUBLE_NUM_SIGN_BITS (1)
440
441#define DOUBLE_SIGNIFICAND_SHIFT (0)
442#define DOUBLE_EXPONENT_SHIFT (DOUBLE_NUM_SIGNIFICAND_BITS)
443#define DOUBLE_SIGN_SHIFT (DOUBLE_NUM_SIGNIFICAND_BITS + DOUBLE_NUM_EXPONENT_BITS)
444
445#define DOUBLE_SIGNIFICAND_MASK (0xfffffffffffffULL) // The lower 52 bits
446#define DOUBLE_EXPONENT_MASK (0x7ffULL << DOUBLE_EXPONENT_SHIFT) // 11 bits of exponent
447#define DOUBLE_SIGN_MASK (0x01ULL << DOUBLE_SIGN_SHIFT) // 1 bit of sign
448#define DOUBLE_QUIET_NAN_BIT (0x01ULL << (DOUBLE_NUM_SIGNIFICAND_BITS-1))
449
450
451static int NaNExperiments() {
452 double dqNaN = UsefulBufUtil_CopyUint64ToDouble(DOUBLE_EXPONENT_MASK | DOUBLE_QUIET_NAN_BIT);
453 double dsNaN = UsefulBufUtil_CopyUint64ToDouble(DOUBLE_EXPONENT_MASK | 0x01);
454 double dqNaNPayload = UsefulBufUtil_CopyUint64ToDouble(DOUBLE_EXPONENT_MASK | DOUBLE_QUIET_NAN_BIT | 0xf00f);
455
456 float f1 = (float)dqNaN;
457 float f2 = (float)dsNaN;
458 float f3 = (float)dqNaNPayload;
459
460
461 uint32_t uqNaN = UsefulBufUtil_CopyFloatToUint32((float)dqNaN);
462 uint32_t usNaN = UsefulBufUtil_CopyFloatToUint32((float)dsNaN);
463 uint32_t uqNaNPayload = UsefulBufUtil_CopyFloatToUint32((float)dqNaNPayload);
464
465 // Result of this on x86 is that every NaN is a qNaN. The intel
466 // CVTSD2SS instruction ignores the NaN payload and even converts
467 // a sNaN to a qNaN.
468
469 return 0;
470}
471#endif
472
473
474