Blame - arch/mips/math-emu/dp_maddf.c - hafnium/third_party/linux.git

blob: 3da0ce44cdef4fd4636e25673be1be5ab7172508 [file] [log] [blame]

David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame^]	1	// SPDX-License-Identifier: GPL-2.0-only
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2	/*
				3	* IEEE754 floating point arithmetic
				4	* double precision: MADDF.f (Fused Multiply Add)
				5	* MADDF.fmt: FPR[fd] = FPR[fd] + (FPR[fs] x FPR[ft])
				6	*
				7	* MIPS floating point support
				8	* Copyright (C) 2015 Imagination Technologies, Ltd.
				9	* Author: Markos Chandras <markos.chandras@imgtec.com>
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	10	*/
				11
				12	#include "ieee754dp.h"
				13
				14
				15	/* 128 bits shift right logical with rounding. */
				16	static void srl128(u64 hptr, u64 lptr, int count)
				17	{
				18	u64 low;
				19
				20	if (count >= 128) {
				21	lptr = hptr != 0 \|\| *lptr != 0;
				22	*hptr = 0;
				23	} else if (count >= 64) {
				24	if (count == 64) {
				25	lptr = hptr \| (*lptr != 0);
				26	} else {
				27	low = *lptr;
				28	lptr = hptr >> (count - 64);
				29	lptr \|= (hptr << (128 - count)) != 0 \|\| low != 0;
				30	}
				31	*hptr = 0;
				32	} else {
				33	low = *lptr;
				34	lptr = low >> count \| hptr << (64 - count);
				35	*lptr \|= (low << (64 - count)) != 0;
				36	hptr = hptr >> count;
				37	}
				38	}
				39
				40	static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x,
				41	union ieee754dp y, enum maddf_flags flags)
				42	{
				43	int re;
				44	int rs;
				45	unsigned int lxm;
				46	unsigned int hxm;
				47	unsigned int lym;
				48	unsigned int hym;
				49	u64 lrm;
				50	u64 hrm;
				51	u64 lzm;
				52	u64 hzm;
				53	u64 t;
				54	u64 at;
				55	int s;
				56
				57	COMPXDP;
				58	COMPYDP;
				59	COMPZDP;
				60
				61	EXPLODEXDP;
				62	EXPLODEYDP;
				63	EXPLODEZDP;
				64
				65	FLUSHXDP;
				66	FLUSHYDP;
				67	FLUSHZDP;
				68
				69	ieee754_clearcx();
				70
				71	/*
				72	* Handle the cases when at least one of x, y or z is a NaN.
				73	* Order of precedence is sNaN, qNaN and z, x, y.
				74	*/
				75	if (zc == IEEE754_CLASS_SNAN)
				76	return ieee754dp_nanxcpt(z);
				77	if (xc == IEEE754_CLASS_SNAN)
				78	return ieee754dp_nanxcpt(x);
				79	if (yc == IEEE754_CLASS_SNAN)
				80	return ieee754dp_nanxcpt(y);
				81	if (zc == IEEE754_CLASS_QNAN)
				82	return z;
				83	if (xc == IEEE754_CLASS_QNAN)
				84	return x;
				85	if (yc == IEEE754_CLASS_QNAN)
				86	return y;
				87
				88	if (zc == IEEE754_CLASS_DNORM)
				89	DPDNORMZ;
				90	/* ZERO z cases are handled separately below */
				91
				92	switch (CLPAIR(xc, yc)) {
				93
				94	/*
				95	* Infinity handling
				96	*/
				97	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO):
				98	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF):
				99	ieee754_setcx(IEEE754_INVALID_OPERATION);
				100	return ieee754dp_indef();
				101
				102	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF):
				103	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF):
				104	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM):
				105	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM):
				106	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
				107	if ((zc == IEEE754_CLASS_INF) &&
				108	((!(flags & MADDF_NEGATE_PRODUCT) && (zs != (xs ^ ys))) \|\|
				109	((flags & MADDF_NEGATE_PRODUCT) && (zs == (xs ^ ys))))) {
				110	/*
				111	* Cases of addition of infinities with opposite signs
				112	* or subtraction of infinities with same signs.
				113	*/
				114	ieee754_setcx(IEEE754_INVALID_OPERATION);
				115	return ieee754dp_indef();
				116	}
				117	/*
				118	* z is here either not an infinity, or an infinity having the
				119	* same sign as product (x*y) (in case of MADDF.D instruction)
				120	* or product -(x*y) (in MSUBF.D case). The result must be an
				121	* infinity, and its sign is determined only by the value of
				122	* (flags & MADDF_NEGATE_PRODUCT) and the signs of x and y.
				123	*/
				124	if (flags & MADDF_NEGATE_PRODUCT)
				125	return ieee754dp_inf(1 ^ (xs ^ ys));
				126	else
				127	return ieee754dp_inf(xs ^ ys);
				128
				129	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
				130	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM):
				131	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM):
				132	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO):
				133	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO):
				134	if (zc == IEEE754_CLASS_INF)
				135	return ieee754dp_inf(zs);
				136	if (zc == IEEE754_CLASS_ZERO) {
				137	/* Handle cases +0 + (-0) and similar ones. */
				138	if ((!(flags & MADDF_NEGATE_PRODUCT)
				139	&& (zs == (xs ^ ys))) \|\|
				140	((flags & MADDF_NEGATE_PRODUCT)
				141	&& (zs != (xs ^ ys))))
				142	/*
				143	* Cases of addition of zeros of equal signs
				144	* or subtraction of zeroes of opposite signs.
				145	* The sign of the resulting zero is in any
				146	* such case determined only by the sign of z.
				147	*/
				148	return z;
				149
				150	return ieee754dp_zero(ieee754_csr.rm == FPU_CSR_RD);
				151	}
				152	/* xy is here 0, and z is not 0, so just return z /
				153	return z;
				154
				155	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
				156	DPDNORMX;
				157	/* fall through */
				158
				159	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
				160	if (zc == IEEE754_CLASS_INF)
				161	return ieee754dp_inf(zs);
				162	DPDNORMY;
				163	break;
				164
				165	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM):
				166	if (zc == IEEE754_CLASS_INF)
				167	return ieee754dp_inf(zs);
				168	DPDNORMX;
				169	break;
				170
				171	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM):
				172	if (zc == IEEE754_CLASS_INF)
				173	return ieee754dp_inf(zs);
				174	/* continue to real computations */
				175	}
				176
				177	/* Finally get to do some computation */
				178
				179	/*
				180	* Do the multiplication bit first
				181	*
				182	* rm = xm * ym, re = xe + ye basically
				183	*
				184	* At this point xm and ym should have been normalized.
				185	*/
				186	assert(xm & DP_HIDDEN_BIT);
				187	assert(ym & DP_HIDDEN_BIT);
				188
				189	re = xe + ye;
				190	rs = xs ^ ys;
				191	if (flags & MADDF_NEGATE_PRODUCT)
				192	rs ^= 1;
				193
				194	/* shunt to top of word */
				195	xm <<= 64 - (DP_FBITS + 1);
				196	ym <<= 64 - (DP_FBITS + 1);
				197
				198	/*
				199	* Multiply 64 bits xm and ym to give 128 bits result in hrm:lrm.
				200	*/
				201
				202	lxm = xm;
				203	hxm = xm >> 32;
				204	lym = ym;
				205	hym = ym >> 32;
				206
				207	lrm = DPXMULT(lxm, lym);
				208	hrm = DPXMULT(hxm, hym);
				209
				210	t = DPXMULT(lxm, hym);
				211
				212	at = lrm + (t << 32);
				213	hrm += at < lrm;
				214	lrm = at;
				215
				216	hrm = hrm + (t >> 32);
				217
				218	t = DPXMULT(hxm, lym);
				219
				220	at = lrm + (t << 32);
				221	hrm += at < lrm;
				222	lrm = at;
				223
				224	hrm = hrm + (t >> 32);
				225
				226	/* Put explicit bit at bit 126 if necessary */
				227	if ((int64_t)hrm < 0) {
				228	lrm = (hrm << 63) \| (lrm >> 1);
				229	hrm = hrm >> 1;
				230	re++;
				231	}
				232
				233	assert(hrm & (1 << 62));
				234
				235	if (zc == IEEE754_CLASS_ZERO) {
				236	/*
				237	* Move explicit bit from bit 126 to bit 55 since the
				238	* ieee754dp_format code expects the mantissa to be
				239	* 56 bits wide (53 + 3 rounding bits).
				240	*/
				241	srl128(&hrm, &lrm, (126 - 55));
				242	return ieee754dp_format(rs, re, lrm);
				243	}
				244
				245	/* Move explicit bit from bit 52 to bit 126 */
				246	lzm = 0;
				247	hzm = zm << 10;
				248	assert(hzm & (1 << 62));
				249
				250	/* Make the exponents the same */
				251	if (ze > re) {
				252	/*
				253	* Have to shift y fraction right to align.
				254	*/
				255	s = ze - re;
				256	srl128(&hrm, &lrm, s);
				257	re += s;
				258	} else if (re > ze) {
				259	/*
				260	* Have to shift x fraction right to align.
				261	*/
				262	s = re - ze;
				263	srl128(&hzm, &lzm, s);
				264	ze += s;
				265	}
				266	assert(ze == re);
				267	assert(ze <= DP_EMAX);
				268
				269	/* Do the addition */
				270	if (zs == rs) {
				271	/*
				272	* Generate 128 bit result by adding two 127 bit numbers
				273	* leaving result in hzm:lzm, zs and ze.
				274	*/
				275	hzm = hzm + hrm + (lzm > (lzm + lrm));
				276	lzm = lzm + lrm;
				277	if ((int64_t)hzm < 0) { /* carry out */
				278	srl128(&hzm, &lzm, 1);
				279	ze++;
				280	}
				281	} else {
				282	if (hzm > hrm \|\| (hzm == hrm && lzm >= lrm)) {
				283	hzm = hzm - hrm - (lzm < lrm);
				284	lzm = lzm - lrm;
				285	} else {
				286	hzm = hrm - hzm - (lrm < lzm);
				287	lzm = lrm - lzm;
				288	zs = rs;
				289	}
				290	if (lzm == 0 && hzm == 0)
				291	return ieee754dp_zero(ieee754_csr.rm == FPU_CSR_RD);
				292
				293	/*
				294	* Put explicit bit at bit 126 if necessary.
				295	*/
				296	if (hzm == 0) {
				297	/* left shift by 63 or 64 bits */
				298	if ((int64_t)lzm < 0) {
				299	/* MSB of lzm is the explicit bit */
				300	hzm = lzm >> 1;
				301	lzm = lzm << 63;
				302	ze -= 63;
				303	} else {
				304	hzm = lzm;
				305	lzm = 0;
				306	ze -= 64;
				307	}
				308	}
				309
				310	t = 0;
				311	while ((hzm >> (62 - t)) == 0)
				312	t++;
				313
				314	assert(t <= 62);
				315	if (t) {
				316	hzm = hzm << t \| lzm >> (64 - t);
				317	lzm = lzm << t;
				318	ze -= t;
				319	}
				320	}
				321
				322	/*
				323	* Move explicit bit from bit 126 to bit 55 since the
				324	* ieee754dp_format code expects the mantissa to be
				325	* 56 bits wide (53 + 3 rounding bits).
				326	*/
				327	srl128(&hzm, &lzm, (126 - 55));
				328
				329	return ieee754dp_format(zs, ze, lzm);
				330	}
				331
				332	union ieee754dp ieee754dp_maddf(union ieee754dp z, union ieee754dp x,
				333	union ieee754dp y)
				334	{
				335	return _dp_maddf(z, x, y, 0);
				336	}
				337
				338	union ieee754dp ieee754dp_msubf(union ieee754dp z, union ieee754dp x,
				339	union ieee754dp y)
				340	{
				341	return _dp_maddf(z, x, y, MADDF_NEGATE_PRODUCT);
				342	}