/* xxxprec.h -- common extended precision functionality */ #include #include "xmath.h" _C_STD_BEGIN #if !defined(MRTDLL) _C_LIB_DECL #endif /* defined(MRTDLL) */ #if _HAS_DINKUM_CLIB #else /* _HAS_DINKUM_CLIB */ #ifndef ldexpf #define ldexpf(x, y) ldexp((double)(x), (y)) #endif /* ldexp */ #ifndef sqrtf #define sqrtf(x) sqrt((double)(x)) #endif /* sqrtf */ #endif /* _HAS_DINKUM_CLIB */ #define BIG_EXP (2 * FMAXEXP) /* very large, as exponents go */ #define BITS_WORD (FBITS / 2) /* all words same for now */ #define NBUF 4 /* size of delay line for mulh */ #define COPY_UP(j, n) {int m = j; \ for (; ++m < n && (p[m - 1] = p[m]) != FLIT(0.0); ) \ ; \ p[n - 1] = FLIT(0.0);} /* STET */ #if 0 #include static void printit(const char *s, FTYPE *p, int n) { /* print xp array */ int i; printf(s); for (i = 0; i < n && (p[i] != FLIT(0.0) || i == 0); ++i) printf(" %La", (long double)p[i]); printf("\n"); } #endif /* 0 */ _CRTIMP2_PURE FTYPE __CLRCALL_PURE_OR_CDECL FNAME(Xp_getw)(const FTYPE *p, int n) { /* get total value */ if (n == 0) return (FLIT(0.0)); else if (n == 1 || p[0] == FLIT(0.0) || p[1] == FLIT(0.0)) return (p[0]); else if (n == 2 || p[2] == FLIT(0.0)) return (p[0] + p[1]); else { /* extra bits, ensure proper rounding */ FTYPE p01 = p[0] + p[1]; FTYPE p2 = p[2]; if (p[3] != FLIT(0.0)) FSETLSB(p2); /* fold in sticky bit from p[3] */ if (p01 - p[0] == p[1]) return (p01 + p2); /* carry is within p[2], add it in */ else return (p[0] + (p[1] + p2)); /* fold in p[2] then add it in */ } } _CRTIMP2_PURE FTYPE *__CLRCALL_PURE_OR_CDECL FNAME(Xp_setw)(FTYPE *p, int n, FTYPE x) { /* load a full-precision value */ FTYPE x0 = x; short errx, xexp; if (n <= 0) ; /* no room, do nothing */ else if (n == 1 || (errx = FNAME(Dunscale)(&xexp, &x0)) == 0) p[0] = x0; /* zero or no extra room, store original value */ else if (0 < errx) { /* store Inf or NaN with backstop for safety */ p[0] = x0; p[1] = FLIT(0.0); } else { /* finite, unpack it */ FNAME(Dint)(&x0, BITS_WORD); FNAME(Dscale)(&x0, xexp); p[0] = x0; /* ms bits */ p[1] = x - x0; /* ls bits */ if ((FBITS & 1) != 0 && 2 < n && p[1] != FLIT(0.0)) { /* may need a third word */ x = p[1]; FNAME(Dunscale)(&xexp, &p[1]); FNAME(Dint)(&p[1], BITS_WORD); FNAME(Dscale)(&p[1], xexp); p[2] = x - p[1]; if (3 < n && p[2] != FLIT(0.0)) p[3] = FLIT(0.0); } else if (2 < n) p[2] = FLIT(0.0); } return (p); } _CRTIMP2_PURE FTYPE *__CLRCALL_PURE_OR_CDECL FNAME(Xp_addh)(FTYPE *p, int n, FTYPE x0) { /* add a half-precision value */ FTYPE xscaled = x0; short errx, xexp; if (n == 0) ; else if (0 < (errx = FNAME(Dunscale)(&xexp, &xscaled))) if (errx == _NANCODE || (errx = FNAME(Dtest)(&p[0])) <= 0) p[0] = x0; /* x0 NaN, or x0 Inf and y finite, just store x0 */ else if (errx == _NANCODE || FISNEG(x0) == FISNEG(p[0])) ; /* leave NaN or Inf alone */ else { /* Inf - Inf is invalid */ _Feraise(_FE_INVALID); p[0] = FCONST(Nan); if (1 < n) p[1] = FLIT(0.0); } else if (errx < 0) { /* x0 is finite nonzero, add it */ long prevexp = BIG_EXP; int k; for (k = 0; k < n; ) { /* look for term comparable to xexp to add x0 */ FTYPE yscaled = p[k]; int mybits = BITS_WORD; short yexp; long diff; if (0 < (errx = FNAME(Dunscale)(&yexp, &yscaled))) break; /* y is Inf or NaN, just leave it alone */ else if (errx == 0) { /* 0 + x == x */ p[k] = x0; if (k + 1 < n) p[k + 1] = FLIT(0.0); /* add new trailing zero */ break; } else if ((diff = (long)yexp - xexp) <= -mybits && x0 != FLIT(0.0)) { /* insert nonzero x0 and loop to renormalize */ int j; for (j = k; ++j < n && p[j] != FLIT(0.0); ) ; if (j < n - 1) ++j; /* extra room, copy trailing zero down too */ else if (j == n) --j; /* no room, don't copy smallest word */ for (; k < j; --j) p[j] = p[j - 1]; /* copy down words */ p[k] = x0; x0 = FLIT(0.0); } else if (mybits <= diff && x0 != FLIT(0.0)) { /* loop to add finite x0 to smaller words */ prevexp = yexp; ++k; } else { /* partition sum and renormalize */ if ((p[k] += x0) == FLIT(0.0)) { /* term sum is zero, copy up words */ COPY_UP(k, n) if (p[k] == FLIT(0.0)) break; } x0 = p[k]; FNAME(Dunscale)(&xexp, &x0); if (prevexp - mybits < xexp) { /* propagate bits up */ FNAME(Dint)(&x0, (short)(xexp - (prevexp - mybits))); FNAME(Dscale)(&x0, xexp); if ((p[k] -= x0) == FLIT(0.0)) { /* all bits carry, copy up words */ COPY_UP(k, n) } if (--k == 0) prevexp = BIG_EXP; else { /* recompute prevexp */ xscaled = p[k - 1]; FNAME(Dunscale)(&yexp, &xscaled); prevexp = yexp; } } else if (k + 1 == n) break; /* don't truncate bits in last word */ else { /* propagate any excess bits down */ x0 = p[k]; FNAME(Dunscale)(&yexp, &p[k]); FNAME(Dint)(&p[k], BITS_WORD); FNAME(Dscale)(&p[k], yexp); x0 -= p[k]; prevexp = yexp; xscaled = x0 != FLIT(0.0) ? x0 : p[k]; FNAME(Dunscale)(&xexp, &xscaled); ++k; } } } } return (p); } _CRTIMP2_PURE FTYPE *__CLRCALL_PURE_OR_CDECL FNAME(Xp_mulh)(FTYPE *p, int n, FTYPE x0) { /* multiply by a half-precision value */ short errx; int j, k; FTYPE buf[NBUF]; if (0 < n) { /* check for special values */ buf[0] = p[0] * x0; if (0 <= (errx = FNAME(Dtest)(&buf[0]))) { /* quit early on 0, Inf, or NaN */ if (errx == _NANCODE) _Feraise(_FE_INVALID); p[0] = buf[0]; if (0 < errx && 1 < n) p[1] = FLIT(0.0); return (p); } p[0] = FLIT(0.0); } for (j = 1, k = 0; k < n; ++k, --j) { /* sum partial products */ for (; j < NBUF; ++j) if (k + j < n && p[k + j] != FLIT(0.0)) { /* copy up a partial product */ buf[j] = p[k + j] * x0; p[k + j] = FLIT(0.0); } else { /* terminate sequence */ buf[j] = FLIT(0.0); j = 2 * NBUF; break; } if (buf[0] == FLIT(0.0)) break; /* input done */ else { /* add in partial product by halves */ int i = 0; FTYPE y0 = buf[0]; short xexp; FNAME(Dunscale)(&xexp, &y0); FNAME(Dint)(&y0, BITS_WORD); /* clear low half bits */ FNAME(Dscale)(&y0, xexp); FNAME(Xp_addh)(p, n, y0); /* add in ms part */ FNAME(Xp_addh)(p, n, buf[0] - y0); /* add in ls part */ for (; ++i < j; ) if ((buf[i - 1] = buf[i]) == FLIT(0.0)) break; /* copy down delay line */ } } return (p); } _CRTIMP2_PURE FTYPE *__CLRCALL_PURE_OR_CDECL FNAME(Xp_setn)(FTYPE *p, int n, long x) { /* load a long integer */ #if 27 <= FBITS FNAME(Xp_setw)(p, n, (FTYPE)x); #else /* 27 <= FBITS */ FNAME(Xp_setw)(p, n, (FTYPE)(x / 10000)); FNAME(Xp_mulh)(p, n, (FTYPE)10000); FNAME(Xp_addh)(p, n, (FTYPE)(x % 10000)); #endif /* 27 <= FBITS */ return (p); } _CRTIMP2_PURE FTYPE *__CLRCALL_PURE_OR_CDECL FNAME(Xp_movx)(FTYPE *p, int n, const FTYPE *q) { /* copy an extended precision value */ memcpy(p, q, n * sizeof (FTYPE)); return (p); } _CRTIMP2_PURE FTYPE *__CLRCALL_PURE_OR_CDECL FNAME(Xp_addx)(FTYPE *p, int n, const FTYPE *q, int m) { /* add an extended precision value */ int k; for (k = 0; k < m && q[k] != FLIT(0.0); ++k) FNAME(Xp_addh)(p, n, q[k]); return (p); } _CRTIMP2_PURE FTYPE *__CLRCALL_PURE_OR_CDECL FNAME(Xp_subx)(FTYPE *p, int n, const FTYPE *q, int m) { /* subtract an extended precision value */ int k; for (k = 0; k < m && q[k] != FLIT(0.0); ++k) FNAME(Xp_addh)(p, n, -q[k]); return (p); } _CRTIMP2_PURE FTYPE *__CLRCALL_PURE_OR_CDECL FNAME(Xp_ldexpx)(FTYPE *p, int n, int m) { /* scale an extended precision value */ int k; for (k = 0; k < n; ++k) { p[k] = FFUN(ldexp)(p[k], m); if (p[k] == FLIT(0.0)) break; } return (p); } _CRTIMP2_PURE FTYPE *__CLRCALL_PURE_OR_CDECL FNAME(Xp_mulx)(FTYPE *p, int n, const FTYPE *q, int m, FTYPE *ptemp2) { /* multiply by an extended precision value (needs 2*n temp) */ if (n == 0 || m == 0) ; else if (q[0] == FLIT(0.0) || q[1] == FLIT(0.0)) FNAME(Xp_mulh)(p, n, q[0]); else { /* sum partial products */ FTYPE *px = ptemp2; FTYPE *pac = ptemp2 + n; int j; FNAME(Xp_movx)(px, n, p); FNAME(Xp_mulh)(p, n, q[0]); /* form first partial product in place */ for (j = 1; j < m && q[j] != FLIT(0.0); ++j) { /* add in a partial product */ FNAME(Xp_movx)(pac, n, px); FNAME(Xp_mulh)(pac, n, q[j]); FNAME(Xp_addx)(p, n, pac, n); } } return (p); } _CRTIMP2_PURE FTYPE *__CLRCALL_PURE_OR_CDECL FNAME(Xp_invx)(FTYPE *p, int n, FTYPE *ptemp4) { /* invert an extended precision value (needs 4*n temp) */ short errx; if (n == 0) ; else if (0 <= (errx = FNAME(Dtest)(&p[0]))) { /* not finite, return special value */ if (errx == _INFCODE) p[0] = FLIT(0.0); /* 1/Inf == 0 */ else if (errx == 0) p[0] = FCONST(Inf); /* 1/0 == Inf */ /* else 1/NaN == NaN */ } else { /* p[0] is finite nonzero, invert it */ FTYPE *pac = ptemp4; FTYPE *py = ptemp4 + n; FTYPE *ptemp2 = py + n; FTYPE x0 = p[0]; int k; FNAME(Xp_movx)(py, n, p); FNAME(Xp_mulh)(py, n, -FLIT(1.0)); /* py = -x */ if (1 < n) x0 += p[1]; FNAME(Xp_setw)(p, n, FINVERT(x0)); /* p = y */ for (k = 1; k < n; k <<= 1) { /* iterate to double previous precision of 1/x */ FNAME(Xp_movx)(pac, n, p); FNAME(Xp_mulx)(pac, n, py, n, ptemp2); FNAME(Xp_addh)(pac, n, FLIT(1.0)); /* 1-x*y */ FNAME(Xp_mulx)(pac, n, p, n, ptemp2); /* y*(1-x*y) */ FNAME(Xp_addx)(p, n, pac, n); /* y += y*(1-x*y) */ } } return (p); } _CRTIMP2_PURE FTYPE *__CLRCALL_PURE_OR_CDECL FNAME(Xp_sqrtx)(FTYPE *p, int n, FTYPE *ptemp4) { /* find square root of an extended precision value (needs 4*n temp) */ if (n == 0) ; else if (0 <= FNAME(Dtest)(&p[0]) || p[0] < FLIT(0.0)) { /* not finite nonnegative, return special value */ if (p[0] < FLIT(0.0)) { /* sqrt(negative), report domain error */ _Feraise(_FE_INVALID); p[0] = FCONST(Nan); } } else { /* worth iterating, compute x* sqrt(1/x) */ FTYPE *pac = ptemp4; FTYPE *py = ptemp4 + n; FTYPE *ptemp2 = py + n; FTYPE x0 = p[0]; int k; if (1 < n) x0 += p[1]; FNAME(Xp_setw)(py, n, FINVERT(FFUN(sqrt)(x0))); /* py = y */ for (k = 2; k < n; k <<= 1) { /* iterate to double previous precision of sqrt(1/x) */ FNAME(Xp_movx)(pac, n, py); FNAME(Xp_mulh)(pac, n, -FLIT(0.5)); FNAME(Xp_mulx)(pac, n, p, n, ptemp2); FNAME(Xp_mulx)(pac, n, py, n, ptemp2); FNAME(Xp_addh)(pac, n, FLIT(1.5)); /* 3/2-x*y*y/2 */ FNAME(Xp_mulx)(py, n, pac, n, ptemp2); /* y *= 3/2-x*y*y/2 */ } FNAME(Xp_mulx)(p, n, py, n, ptemp2); /* x*sqrt(1/x) */ } return (p); } #if !defined(MRTDLL) _END_C_LIB_DECL #endif /* !defined(MRTDLL) */ _C_STD_END /* * Copyright (c) 1992-2012 by P.J. Plauger. ALL RIGHTS RESERVED. * Consult your license regarding permissions and restrictions. V6.00:0009 */