8ae002
Partial backports of:
8ae002
=====================
8ae002
8ae002
commit c5d5d574cbfa96d0f6c1db24d1e072c472627e41
8ae002
Author: Ondřej Bílka <neleai@seznam.cz>
8ae002
Date:   Thu Oct 17 16:03:24 2013 +0200
8ae002
8ae002
    Format floating routines.
8ae002
8ae002
commit da08f647d58d674db08cdb3e61c8826c89470e2e
8ae002
Author: Siddhesh Poyarekar <siddhesh@redhat.com>
8ae002
Date:   Fri Dec 21 09:15:10 2012 +0530
8ae002
8ae002
    Move more constants into static variables
8ae002
    
8ae002
    Code cleanup.
8ae002
8ae002
commit f93a8d15699ee699282465dc1e03e033f3fabb52
8ae002
Author: Siddhesh Poyarekar <siddhesh@redhat.com>
8ae002
Date:   Wed Jan 16 16:06:48 2013 +0530
8ae002
8ae002
    Consolidate constant defines into mpa.h
8ae002
8ae002
commit caa99d06e7f1403887294442af520b0f8c6f3de0
8ae002
Author: Siddhesh Poyarekar <siddhesh@redhat.com>
8ae002
Date:   Fri Jan 18 11:18:13 2013 +0530
8ae002
8ae002
    Simplify calculation of 2^-m in __mpexp
8ae002
8ae002
commit 107a5bf085f5c4ef8c28266a34d476724cfc3475
8ae002
Author: Joseph Myers <joseph@codesourcery.com>
8ae002
Date:   Tue Nov 18 15:40:56 2014 +0000
8ae002
8ae002
    Fix libm mpone, mptwo namespace (bug 17616).
8ae002
8ae002
To provided __mptwo for __inv.
8ae002
8ae002
Full backports of the following:
8ae002
================================
8ae002
8ae002
commit 44e0d4c20ce5bf3825897e5d4b7caae94016214d
8ae002
Author: Siddhesh Poyarekar <siddhesh@redhat.com>
8ae002
Date:   Wed Jan 2 11:44:13 2013 +0530
8ae002
8ae002
    Split mantissa calculation loop and add branch prediction
8ae002
8ae002
commit f8af25d218202ff2f5d167b8e44e4b79f91d147f
8ae002
Author: Siddhesh Poyarekar <siddhesh@redhat.com>
8ae002
Date:   Fri Jan 4 15:09:33 2013 +0530
8ae002
8ae002
    Remove commented declarations
8ae002
8ae002
commit a9e48ab40e230c7fe34e4892bec8af4f3f975a20
8ae002
Author: Siddhesh Poyarekar <siddhesh@redhat.com>
8ae002
Date:   Fri Jan 4 15:42:09 2013 +0530
8ae002
8ae002
    Clean up comment for MP_NO
8ae002
8ae002
commit fffb407f4668b40b3df1eb8ee3ae3bc64ee79e20
8ae002
Author: Siddhesh Poyarekar <siddhesh@redhat.com>
8ae002
Date:   Fri Jan 4 22:52:12 2013 +0530
8ae002
8ae002
    Remove unused __cr and __cpymn
8ae002
8ae002
commit 950c99ca9094e7dc6394e90395f51e12093393aa
8ae002
Author: Siddhesh Poyarekar <siddhesh@redhat.com>
8ae002
Date:   Wed Jan 9 19:07:15 2013 +0530
8ae002
8ae002
    Update comments in mpa.c
8ae002
    
8ae002
    Fixed comment style and clearer wording in some cases.
8ae002
8ae002
commit 1066a53440d2744566e97c59bcd0d422186b3e90
8ae002
Author: Siddhesh Poyarekar <siddhesh@redhat.com>
8ae002
Date:   Mon Jan 14 21:31:25 2013 +0530
8ae002
8ae002
    Fix code formatting in mpa.c
8ae002
    
8ae002
    This includes the overridden mpa.c in power4.
8ae002
8ae002
commit 2a91b5735ac1bc65ce5c2a3646d75ba7208e26e9
8ae002
Author: Siddhesh Poyarekar <siddhesh@redhat.com>
8ae002
Date:   Mon Jan 14 21:36:58 2013 +0530
8ae002
8ae002
    Minor tweak to mp multiplication
8ae002
    
8ae002
    Add a local variable to remove extra copies to/from memory in the Z
8ae002
    array.
8ae002
8ae002
ommit 45f058844c33f670475bd02f266942746bcb332b
8ae002
Author: Siddhesh Poyarekar <siddhesh@redhat.com>
8ae002
Date:   Tue Feb 26 21:28:16 2013 +0530
8ae002
8ae002
    Another tweak to the multiplication algorithm
8ae002
    
8ae002
    Reduce the formula to calculate mantissa so that we reduce the net
8ae002
    number of multiplications performed.
8ae002
8ae002
commit bab8a695ee79a5a6e9b2b699938952b006fcbbec
8ae002
Author: Siddhesh Poyarekar <siddhesh@redhat.com>
8ae002
Date:   Thu Feb 21 14:29:18 2013 +0530
8ae002
8ae002
    Fix whitespace differences between generic and powerpc mpa.c
8ae002
8ae002
8ae002
commit 2d0e0f29f85036d1189231cb7c1f19f27ba14a89
8ae002
Author: Siddhesh Poyarekar <siddhesh@redhat.com>
8ae002
Date:   Fri Feb 15 23:56:20 2013 +0530
8ae002
8ae002
    Fix determination of lower precision in __mul
8ae002
8ae002
commit 909279a5cfa938c989c9b01c8f48a0276291ec45
8ae002
Author: Siddhesh Poyarekar <siddhesh@redhat.com>
8ae002
Date:   Wed Feb 13 14:16:23 2013 +0530
8ae002
8ae002
    Optimized mp multiplication
8ae002
    
8ae002
    Don't bother multiplying zeroes since that only wastes cycles.
8ae002
8ae002
commit bdf028142eb77d6ae59500db875068fa5d7b059d
8ae002
Author: Siddhesh Poyarekar <siddhesh@redhat.com>
8ae002
Date:   Wed Feb 13 13:55:29 2013 +0530
8ae002
8ae002
    Clean up add_magnitudes and sub_magnitudes
8ae002
8ae002
commit d6752ccd696c71d23cd3df8fb9cc60b61c32e65a
8ae002
Author: Siddhesh Poyarekar <siddhesh@redhat.com>
8ae002
Date:   Thu Feb 14 10:31:09 2013 +0530
8ae002
8ae002
    New __sqr function as a faster special case of __mul
8ae002
8ae002
commit 4709fe7602b56e9f6ee1ab6afb4067409a784f29
8ae002
Author: Siddhesh Poyarekar <siddhesh@redhat.com>
8ae002
Date:   Sat Feb 16 00:09:29 2013 +0530
8ae002
8ae002
    Use intermediate variable to compute exponent in __mul
8ae002
8ae002
commit 20cd7fb3ae63795ae7c9a464abf5ed19b364ade0
8ae002
Author: Siddhesh Poyarekar <siddhesh@redhat.com>
8ae002
Date:   Wed Feb 20 18:56:20 2013 +0530
8ae002
8ae002
    Copy comment about inner loop from powerpc mpa.c to the default one
8ae002
8ae002
commit e69804d14e43f14c3c65dc570afdbfb822c9838b
8ae002
Author: Siddhesh Poyarekar <siddhesh@redhat.com>
8ae002
Date:   Mon Feb 25 16:43:02 2013 +0530
8ae002
8ae002
    Use long wherever possible in mpa.c
8ae002
    
8ae002
    Using long throughout like powerpc does is beneficial since it reduces
8ae002
    the need to switch to 32-bit instructions.  It gives a very minor
8ae002
    performance improvement.
8ae002
8ae002
commit 82a9811d29c00161c7c8ea7f70e2cc30988e192e
8ae002
Author: Siddhesh Poyarekar <siddhesh@redhat.com>
8ae002
Date:   Thu Mar 7 12:23:29 2013 +0530
8ae002
8ae002
    Use generic mpa.c code for everything except __mul and __sqr
8ae002
8ae002
commit 6f2e90e78f151bab153c2b38492505ae2012db06
8ae002
Author: Siddhesh Poyarekar <siddhesh@redhat.com>
8ae002
Date:   Tue Mar 26 19:28:50 2013 +0530
8ae002
8ae002
    Make mantissa type of mp_no configurable
8ae002
8ae002
    The mantissa of mp_no is intended to take only integral values.  This
8ae002
    is a relatively good choice for powerpc due to its 4 fpus, but not for
8ae002
    other architectures, which suffer due to this choice.  This change
8ae002
    makes the default mantissa a long integer and allows powerpc to
8ae002
    override it.  Additionally, some operations have been optimized for
8ae002
    integer manipulation, resulting in a significant improvement in
8ae002
    performance.
8ae002
8ae002
commit 5739f705eed5cf58e7b439e5983542e06d7fc2da
8ae002
Author: Siddhesh Poyarekar <siddhesh@redhat.com>
8ae002
Date:   Tue Mar 26 20:24:04 2013 +0530
8ae002
8ae002
    Use integral constants
8ae002
    
8ae002
    The compiler is smart enough to convert those into double for powerpc,
8ae002
    but if we put them as doubles, it adds overhead by performing those
8ae002
    operations in floating point mode.
8ae002
8ae002
commit 89f3b6e18c6e7833438789746fcfc2e7189f7cac
8ae002
Author: Joseph Myers <joseph@codesourcery.com>
8ae002
Date:   Thu May 21 23:05:45 2015 +0000
8ae002
8ae002
    Fix sysdeps/ieee754/dbl-64/mpa.c for -Wuninitialized.
8ae002
    
8ae002
    If you remove the "override CFLAGS += -Wno-uninitialized" in
8ae002
    math/Makefile, one of the errors you get is:
8ae002
    
8ae002
    ../sysdeps/ieee754/dbl-64/mpa.c: In function '__mp_dbl.part.0':
8ae002
    ../sysdeps/ieee754/dbl-64/mpa.c:183:5: error: 'c' may be used uninitialized in this function [-Werror=maybe-uninitialized]
8ae002
       c *= X[0];
8ae002
    
8ae002
    The problem is that the p < 5 case initializes c if p is 1, 2, 3 or 4
8ae002
    but not otherwise, and in fact p is positive for all calls to this
8ae002
    function so the uninitialized case can't actually occur.  This patch
8ae002
    replaces the "if (p == 4)" last case with a comment so the compiler
8ae002
    can see that all paths do initialize c.
8ae002
8ae002
    Tested for x86_64.
8ae002
8ae002
            * sysdeps/ieee754/dbl-64/mpa.c (norm): Remove if condition on
8ae002
            (p == 4) case.
8ae002
8ae002
Index: glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mpa.c
8ae002
===================================================================
8ae002
--- glibc-2.17-c758a686.orig/sysdeps/ieee754/dbl-64/mpa.c
8ae002
+++ glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mpa.c
8ae002
@@ -1,7 +1,7 @@
8ae002
 /*
8ae002
  * IBM Accurate Mathematical Library
8ae002
  * written by International Business Machines Corp.
8ae002
- * Copyright (C) 2001, 2011 Free Software Foundation
8ae002
+ * Copyright (C) 2001-2017 Free Software Foundation, Inc.
8ae002
  *
8ae002
  * This program is free software; you can redistribute it and/or modify
8ae002
  * it under the terms of the GNU Lesser General Public License as published by
8ae002
@@ -22,9 +22,7 @@
8ae002
 /*  FUNCTIONS:                                                          */
8ae002
 /*               mcr                                                    */
8ae002
 /*               acr                                                    */
8ae002
-/*               cr                                                     */
8ae002
 /*               cpy                                                    */
8ae002
-/*               cpymn                                                  */
8ae002
 /*               norm                                                   */
8ae002
 /*               denorm                                                 */
8ae002
 /*               mp_dbl                                                 */
8ae002
@@ -44,479 +42,868 @@
8ae002
 
8ae002
 #include "endian.h"
8ae002
 #include "mpa.h"
8ae002
-#include "mpa2.h"
8ae002
-#include <sys/param.h>	/* For MIN() */
8ae002
+#include <sys/param.h>
8ae002
+#include <alloca.h>
8ae002
 
8ae002
 #ifndef SECTION
8ae002
 # define SECTION
8ae002
 #endif
8ae002
 
8ae002
+#ifndef NO__CONST
8ae002
+/* TODO: With only a partial backport of the constant cleanup from
8ae002
+	 upstream we limit the definition of these two constants to
8ae002
+	 this file.  */
8ae002
+static const mp_no __mpone = { 1, { 1.0, 1.0 } };
8ae002
+static const mp_no __mptwo = { 1, { 1.0, 2.0 } };
8ae002
+#endif
8ae002
+
8ae002
 #ifndef NO___ACR
8ae002
-/* mcr() compares the sizes of the mantissas of two multiple precision  */
8ae002
-/* numbers. Mantissas are compared regardless of the signs of the       */
8ae002
-/* numbers, even if x->d[0] or y->d[0] are zero. Exponents are also     */
8ae002
-/* disregarded.                                                         */
8ae002
+/* Compare mantissa of two multiple precision numbers regardless of the sign
8ae002
+   and exponent of the numbers.  */
8ae002
 static int
8ae002
-mcr(const mp_no *x, const mp_no *y, int p) {
8ae002
-  int i;
8ae002
-  for (i=1; i<=p; i++) {
8ae002
-    if      (X[i] == Y[i])  continue;
8ae002
-    else if (X[i] >  Y[i])  return  1;
8ae002
-    else                    return -1; }
8ae002
+mcr (const mp_no *x, const mp_no *y, int p)
8ae002
+{
8ae002
+  long i;
8ae002
+  long p2 = p;
8ae002
+  for (i = 1; i <= p2; i++)
8ae002
+    {
8ae002
+      if (X[i] == Y[i])
8ae002
+	continue;
8ae002
+      else if (X[i] > Y[i])
8ae002
+	return 1;
8ae002
+      else
8ae002
+	return -1;
8ae002
+    }
8ae002
   return 0;
8ae002
 }
8ae002
 
8ae002
-
8ae002
-/* acr() compares the absolute values of two multiple precision numbers */
8ae002
+/* Compare the absolute values of two multiple precision numbers.  */
8ae002
 int
8ae002
-__acr(const mp_no *x, const mp_no *y, int p) {
8ae002
-  int i;
8ae002
-
8ae002
-  if      (X[0] == ZERO) {
8ae002
-    if    (Y[0] == ZERO) i= 0;
8ae002
-    else                 i=-1;
8ae002
-  }
8ae002
-  else if (Y[0] == ZERO) i= 1;
8ae002
-  else {
8ae002
-    if      (EX >  EY)   i= 1;
8ae002
-    else if (EX <  EY)   i=-1;
8ae002
-    else                 i= mcr(x,y,p);
8ae002
-  }
8ae002
-
8ae002
-  return i;
8ae002
-}
8ae002
-#endif
8ae002
-
8ae002
+__acr (const mp_no *x, const mp_no *y, int p)
8ae002
+{
8ae002
+  long i;
8ae002
 
8ae002
-#if 0
8ae002
-/* cr() compares the values of two multiple precision numbers           */
8ae002
-static int  __cr(const mp_no *x, const mp_no *y, int p) {
8ae002
-  int i;
8ae002
-
8ae002
-  if      (X[0] > Y[0])  i= 1;
8ae002
-  else if (X[0] < Y[0])  i=-1;
8ae002
-  else if (X[0] < ZERO ) i= __acr(y,x,p);
8ae002
-  else                   i= __acr(x,y,p);
8ae002
+  if (X[0] == 0)
8ae002
+    {
8ae002
+      if (Y[0] == 0)
8ae002
+	i = 0;
8ae002
+      else
8ae002
+	i = -1;
8ae002
+    }
8ae002
+  else if (Y[0] == 0)
8ae002
+    i = 1;
8ae002
+  else
8ae002
+    {
8ae002
+      if (EX > EY)
8ae002
+	i = 1;
8ae002
+      else if (EX < EY)
8ae002
+	i = -1;
8ae002
+      else
8ae002
+	i = mcr (x, y, p);
8ae002
+    }
8ae002
 
8ae002
   return i;
8ae002
 }
8ae002
 #endif
8ae002
 
8ae002
-
8ae002
 #ifndef NO___CPY
8ae002
-/* Copy a multiple precision number. Set *y=*x. x=y is permissible.      */
8ae002
-void __cpy(const mp_no *x, mp_no *y, int p) {
8ae002
+/* Copy multiple precision number X into Y.  They could be the same
8ae002
+   number.  */
8ae002
+void
8ae002
+__cpy (const mp_no *x, mp_no *y, int p)
8ae002
+{
8ae002
+  long i;
8ae002
+
8ae002
   EY = EX;
8ae002
-  for (int i=0; i <= p; i++)    Y[i] = X[i];
8ae002
+  for (i = 0; i <= p; i++)
8ae002
+    Y[i] = X[i];
8ae002
 }
8ae002
 #endif
8ae002
 
8ae002
+#ifndef NO___MP_DBL
8ae002
+/* Convert a multiple precision number *X into a double precision
8ae002
+   number *Y, normalized case (|x| >= 2**(-1022))).  X has precision
8ae002
+   P, which is positive.  */
8ae002
+static void
8ae002
+norm (const mp_no *x, double *y, int p)
8ae002
+{
8ae002
+# define R RADIXI
8ae002
+  long i;
8ae002
+  double c;
8ae002
+  mantissa_t a, u, v, z[5];
8ae002
+  if (p < 5)
8ae002
+    {
8ae002
+      if (p == 1)
8ae002
+	c = X[1];
8ae002
+      else if (p == 2)
8ae002
+	c = X[1] + R * X[2];
8ae002
+      else if (p == 3)
8ae002
+	c = X[1] + R * (X[2] + R * X[3]);
8ae002
+      else /* p == 4.  */
8ae002
+	c = (X[1] + R * X[2]) + R * R * (X[3] + R * X[4]);
8ae002
+    }
8ae002
+  else
8ae002
+    {
8ae002
+      for (a = 1, z[1] = X[1]; z[1] < TWO23; )
8ae002
+	{
8ae002
+	  a *= 2;
8ae002
+	  z[1] *= 2;
8ae002
+	}
8ae002
 
8ae002
-#if 0
8ae002
-/* Copy a multiple precision number x of precision m into a */
8ae002
-/* multiple precision number y of precision n. In case n>m, */
8ae002
-/* the digits of y beyond the m'th are set to zero. In case */
8ae002
-/* n
8ae002
-/* x=y is permissible.                                      */
8ae002
-
8ae002
-static void __cpymn(const mp_no *x, int m, mp_no *y, int n) {
8ae002
-
8ae002
-  int i,k;
8ae002
-
8ae002
-  EY = EX;     k=MIN(m,n);
8ae002
-  for (i=0; i <= k; i++)    Y[i] = X[i];
8ae002
-  for (   ; i <= n; i++)    Y[i] = ZERO;
8ae002
-}
8ae002
-#endif
8ae002
+      for (i = 2; i < 5; i++)
8ae002
+	{
8ae002
+	  mantissa_store_t d, r;
8ae002
+	  d = X[i] * (mantissa_store_t) a;
8ae002
+	  DIV_RADIX (d, r);
8ae002
+	  z[i] = r;
8ae002
+	  z[i - 1] += d;
8ae002
+	}
8ae002
 
8ae002
+      u = ALIGN_DOWN_TO (z[3], TWO19);
8ae002
+      v = z[3] - u;
8ae002
 
8ae002
-#ifndef NO___MP_DBL
8ae002
-/* Convert a multiple precision number *x into a double precision */
8ae002
-/* number *y, normalized case  (|x| >= 2**(-1022))) */
8ae002
-static void norm(const mp_no *x, double *y, int p)
8ae002
-{
8ae002
-  #define R  radixi.d
8ae002
-  int i;
8ae002
-#if 0
8ae002
-  int k;
8ae002
-#endif
8ae002
-  double a,c,u,v,z[5];
8ae002
-  if (p<5) {
8ae002
-    if      (p==1) c = X[1];
8ae002
-    else if (p==2) c = X[1] + R* X[2];
8ae002
-    else if (p==3) c = X[1] + R*(X[2]  +   R* X[3]);
8ae002
-    else if (p==4) c =(X[1] + R* X[2]) + R*R*(X[3] + R*X[4]);
8ae002
-  }
8ae002
-  else {
8ae002
-    for (a=ONE, z[1]=X[1]; z[1] < TWO23; )
8ae002
-	{a *= TWO;   z[1] *= TWO; }
8ae002
-
8ae002
-    for (i=2; i<5; i++) {
8ae002
-      z[i] = X[i]*a;
8ae002
-      u = (z[i] + CUTTER)-CUTTER;
8ae002
-      if  (u > z[i])  u -= RADIX;
8ae002
-      z[i] -= u;
8ae002
-      z[i-1] += u*RADIXI;
8ae002
-    }
8ae002
-
8ae002
-    u = (z[3] + TWO71) - TWO71;
8ae002
-    if (u > z[3])   u -= TWO19;
8ae002
-    v = z[3]-u;
8ae002
-
8ae002
-    if (v == TWO18) {
8ae002
-      if (z[4] == ZERO) {
8ae002
-	for (i=5; i <= p; i++) {
8ae002
-	  if (X[i] == ZERO)   continue;
8ae002
-	  else                {z[3] += ONE;   break; }
8ae002
+      if (v == TWO18)
8ae002
+	{
8ae002
+	  if (z[4] == 0)
8ae002
+	    {
8ae002
+	      for (i = 5; i <= p; i++)
8ae002
+		{
8ae002
+		  if (X[i] == 0)
8ae002
+		    continue;
8ae002
+		  else
8ae002
+		    {
8ae002
+		      z[3] += 1;
8ae002
+		      break;
8ae002
+		    }
8ae002
+		}
8ae002
+	    }
8ae002
+	  else
8ae002
+	    z[3] += 1;
8ae002
 	}
8ae002
-      }
8ae002
-      else              z[3] += ONE;
8ae002
-    }
8ae002
 
8ae002
-    c = (z[1] + R *(z[2] + R * z[3]))/a;
8ae002
-  }
8ae002
+      c = (z[1] + R * (z[2] + R * z[3])) / a;
8ae002
+    }
8ae002
 
8ae002
   c *= X[0];
8ae002
 
8ae002
-  for (i=1; i
8ae002
-  for (i=1; i>EX; i--)   c *= RADIXI;
8ae002
+  for (i = 1; i < EX; i++)
8ae002
+    c *= RADIX;
8ae002
+  for (i = 1; i > EX; i--)
8ae002
+    c *= RADIXI;
8ae002
 
8ae002
   *y = c;
8ae002
-#undef R
8ae002
+# undef R
8ae002
 }
8ae002
 
8ae002
-/* Convert a multiple precision number *x into a double precision */
8ae002
-/* number *y, denormalized case  (|x| < 2**(-1022))) */
8ae002
-static void denorm(const mp_no *x, double *y, int p)
8ae002
+/* Convert a multiple precision number *X into a double precision
8ae002
+   number *Y, Denormal case  (|x| < 2**(-1022))).  */
8ae002
+static void
8ae002
+denorm (const mp_no *x, double *y, int p)
8ae002
 {
8ae002
-  int i,k;
8ae002
-  double c,u,z[5];
8ae002
-#if 0
8ae002
-  double a,v;
8ae002
-#endif
8ae002
+  long i, k;
8ae002
+  long p2 = p;
8ae002
+  double c;
8ae002
+  mantissa_t u, z[5];
8ae002
+
8ae002
+# define R RADIXI
8ae002
+  if (EX < -44 || (EX == -44 && X[1] < TWO5))
8ae002
+    {
8ae002
+      *y = 0;
8ae002
+      return;
8ae002
+    }
8ae002
 
8ae002
-#define R  radixi.d
8ae002
-  if (EX<-44 || (EX==-44 && X[1]
8ae002
-     { *y=ZERO; return; }
8ae002
-
8ae002
-  if      (p==1) {
8ae002
-    if      (EX==-42) {z[1]=X[1]+TWO10;  z[2]=ZERO;  z[3]=ZERO;  k=3;}
8ae002
-    else if (EX==-43) {z[1]=     TWO10;  z[2]=X[1];  z[3]=ZERO;  k=2;}
8ae002
-    else              {z[1]=     TWO10;  z[2]=ZERO;  z[3]=X[1];  k=1;}
8ae002
-  }
8ae002
-  else if (p==2) {
8ae002
-    if      (EX==-42) {z[1]=X[1]+TWO10;  z[2]=X[2];  z[3]=ZERO;  k=3;}
8ae002
-    else if (EX==-43) {z[1]=     TWO10;  z[2]=X[1];  z[3]=X[2];  k=2;}
8ae002
-    else              {z[1]=     TWO10;  z[2]=ZERO;  z[3]=X[1];  k=1;}
8ae002
-  }
8ae002
-  else {
8ae002
-    if      (EX==-42) {z[1]=X[1]+TWO10;  z[2]=X[2];  k=3;}
8ae002
-    else if (EX==-43) {z[1]=     TWO10;  z[2]=X[1];  k=2;}
8ae002
-    else              {z[1]=     TWO10;  z[2]=ZERO;  k=1;}
8ae002
-    z[3] = X[k];
8ae002
-  }
8ae002
-
8ae002
-  u = (z[3] + TWO57) - TWO57;
8ae002
-  if  (u > z[3])   u -= TWO5;
8ae002
-
8ae002
-  if (u==z[3]) {
8ae002
-    for (i=k+1; i <= p; i++) {
8ae002
-      if (X[i] == ZERO)   continue;
8ae002
-      else {z[3] += ONE;   break; }
8ae002
-    }
8ae002
-  }
8ae002
-
8ae002
-  c = X[0]*((z[1] + R*(z[2] + R*z[3])) - TWO10);
8ae002
-
8ae002
-  *y = c*TWOM1032;
8ae002
-#undef R
8ae002
-}
8ae002
-
8ae002
-/* Convert a multiple precision number *x into a double precision number *y. */
8ae002
-/* The result is correctly rounded to the nearest/even. *x is left unchanged */
8ae002
-
8ae002
-void __mp_dbl(const mp_no *x, double *y, int p) {
8ae002
-#if 0
8ae002
-  int i,k;
8ae002
-  double a,c,u,v,z[5];
8ae002
-#endif
8ae002
+  if (p2 == 1)
8ae002
+    {
8ae002
+      if (EX == -42)
8ae002
+	{
8ae002
+	  z[1] = X[1] + TWO10;
8ae002
+	  z[2] = 0;
8ae002
+	  z[3] = 0;
8ae002
+	  k = 3;
8ae002
+	}
8ae002
+      else if (EX == -43)
8ae002
+	{
8ae002
+	  z[1] = TWO10;
8ae002
+	  z[2] = X[1];
8ae002
+	  z[3] = 0;
8ae002
+	  k = 2;
8ae002
+	}
8ae002
+      else
8ae002
+	{
8ae002
+	  z[1] = TWO10;
8ae002
+	  z[2] = 0;
8ae002
+	  z[3] = X[1];
8ae002
+	  k = 1;
8ae002
+	}
8ae002
+    }
8ae002
+  else if (p2 == 2)
8ae002
+    {
8ae002
+      if (EX == -42)
8ae002
+	{
8ae002
+	  z[1] = X[1] + TWO10;
8ae002
+	  z[2] = X[2];
8ae002
+	  z[3] = 0;
8ae002
+	  k = 3;
8ae002
+	}
8ae002
+      else if (EX == -43)
8ae002
+	{
8ae002
+	  z[1] = TWO10;
8ae002
+	  z[2] = X[1];
8ae002
+	  z[3] = X[2];
8ae002
+	  k = 2;
8ae002
+	}
8ae002
+      else
8ae002
+	{
8ae002
+	  z[1] = TWO10;
8ae002
+	  z[2] = 0;
8ae002
+	  z[3] = X[1];
8ae002
+	  k = 1;
8ae002
+	}
8ae002
+    }
8ae002
+  else
8ae002
+    {
8ae002
+      if (EX == -42)
8ae002
+	{
8ae002
+	  z[1] = X[1] + TWO10;
8ae002
+	  z[2] = X[2];
8ae002
+	  k = 3;
8ae002
+	}
8ae002
+      else if (EX == -43)
8ae002
+	{
8ae002
+	  z[1] = TWO10;
8ae002
+	  z[2] = X[1];
8ae002
+	  k = 2;
8ae002
+	}
8ae002
+      else
8ae002
+	{
8ae002
+	  z[1] = TWO10;
8ae002
+	  z[2] = 0;
8ae002
+	  k = 1;
8ae002
+	}
8ae002
+      z[3] = X[k];
8ae002
+    }
8ae002
 
8ae002
-  if (X[0] == ZERO)  {*y = ZERO;  return; }
8ae002
+  u = ALIGN_DOWN_TO (z[3], TWO5);
8ae002
 
8ae002
-  if      (EX> -42)                 norm(x,y,p);
8ae002
-  else if (EX==-42 && X[1]>=TWO10)  norm(x,y,p);
8ae002
-  else                              denorm(x,y,p);
8ae002
+  if (u == z[3])
8ae002
+    {
8ae002
+      for (i = k + 1; i <= p2; i++)
8ae002
+	{
8ae002
+	  if (X[i] == 0)
8ae002
+	    continue;
8ae002
+	  else
8ae002
+	    {
8ae002
+	      z[3] += 1;
8ae002
+	      break;
8ae002
+	    }
8ae002
+	}
8ae002
+    }
8ae002
+
8ae002
+  c = X[0] * ((z[1] + R * (z[2] + R * z[3])) - TWO10);
8ae002
+
8ae002
+  *y = c * TWOM1032;
8ae002
+# undef R
8ae002
 }
8ae002
-#endif
8ae002
 
8ae002
+/* Convert multiple precision number *X into double precision number *Y.  The
8ae002
+   result is correctly rounded to the nearest/even.  */
8ae002
+void
8ae002
+__mp_dbl (const mp_no *x, double *y, int p)
8ae002
+{
8ae002
+  if (X[0] == 0)
8ae002
+    {
8ae002
+      *y = 0;
8ae002
+      return;
8ae002
+    }
8ae002
 
8ae002
-/* dbl_mp() converts a double precision number x into a multiple precision  */
8ae002
-/* number *y. If the precision p is too small the result is truncated. x is */
8ae002
-/* left unchanged.                                                          */
8ae002
+  if (__glibc_likely (EX > -42 || (EX == -42 && X[1] >= TWO10)))
8ae002
+    norm (x, y, p);
8ae002
+  else
8ae002
+    denorm (x, y, p);
8ae002
+}
8ae002
+#endif
8ae002
 
8ae002
+/* Get the multiple precision equivalent of X into *Y.  If the precision is too
8ae002
+   small, the result is truncated.  */
8ae002
 void
8ae002
 SECTION
8ae002
-__dbl_mp(double x, mp_no *y, int p) {
8ae002
+__dbl_mp (double x, mp_no *y, int p)
8ae002
+{
8ae002
+  long i, n;
8ae002
+  long p2 = p;
8ae002
 
8ae002
-  int i,n;
8ae002
-  double u;
8ae002
+  /* Sign.  */
8ae002
+  if (x == 0)
8ae002
+    {
8ae002
+      Y[0] = 0;
8ae002
+      return;
8ae002
+    }
8ae002
+  else if (x > 0)
8ae002
+    Y[0] = 1;
8ae002
+  else
8ae002
+    {
8ae002
+      Y[0] = -1;
8ae002
+      x = -x;
8ae002
+    }
8ae002
 
8ae002
-  /* Sign */
8ae002
-  if      (x == ZERO)  {Y[0] = ZERO;  return; }
8ae002
-  else if (x >  ZERO)   Y[0] = ONE;
8ae002
-  else                 {Y[0] = MONE;  x=-x;   }
8ae002
-
8ae002
-  /* Exponent */
8ae002
-  for (EY=ONE; x >= RADIX; EY += ONE)   x *= RADIXI;
8ae002
-  for (      ; x <  ONE;   EY -= ONE)   x *= RADIX;
8ae002
-
8ae002
-  /* Digits */
8ae002
-  n=MIN(p,4);
8ae002
-  for (i=1; i<=n; i++) {
8ae002
-    u = (x + TWO52) - TWO52;
8ae002
-    if (u>x)   u -= ONE;
8ae002
-    Y[i] = u;     x -= u;    x *= RADIX; }
8ae002
-  for (   ; i<=p; i++)     Y[i] = ZERO;
8ae002
+  /* Exponent.  */
8ae002
+  for (EY = 1; x >= RADIX; EY += 1)
8ae002
+    x *= RADIXI;
8ae002
+  for (; x < 1; EY -= 1)
8ae002
+    x *= RADIX;
8ae002
+
8ae002
+  /* Digits.  */
8ae002
+  n = MIN (p2, 4);
8ae002
+  for (i = 1; i <= n; i++)
8ae002
+    {
8ae002
+      INTEGER_OF (x, Y[i]);
8ae002
+      x *= RADIX;
8ae002
+    }
8ae002
+  for (; i <= p2; i++)
8ae002
+    Y[i] = 0;
8ae002
 }
8ae002
 
8ae002
-
8ae002
-/*  add_magnitudes() adds the magnitudes of *x & *y assuming that           */
8ae002
-/*  abs(*x) >= abs(*y) > 0.                                                 */
8ae002
-/* The sign of the sum *z is undefined. x&y may overlap but not x&z or y&z. */
8ae002
-/* No guard digit is used. The result equals the exact sum, truncated.      */
8ae002
-/* *x & *y are left unchanged.                                              */
8ae002
-
8ae002
+/* Add magnitudes of *X and *Y assuming that abs (*X) >= abs (*Y) > 0.  The
8ae002
+   sign of the sum *Z is not changed.  X and Y may overlap but not X and Z or
8ae002
+   Y and Z.  No guard digit is used.  The result equals the exact sum,
8ae002
+   truncated.  */
8ae002
 static void
8ae002
 SECTION
8ae002
-add_magnitudes(const mp_no *x, const mp_no *y, mp_no *z, int p) {
8ae002
-
8ae002
-  int i,j,k;
8ae002
+add_magnitudes (const mp_no *x, const mp_no *y, mp_no *z, int p)
8ae002
+{
8ae002
+  long i, j, k;
8ae002
+  long p2 = p;
8ae002
+  mantissa_t zk;
8ae002
 
8ae002
   EZ = EX;
8ae002
 
8ae002
-  i=p;    j=p+ EY - EX;    k=p+1;
8ae002
+  i = p2;
8ae002
+  j = p2 + EY - EX;
8ae002
+  k = p2 + 1;
8ae002
+
8ae002
+  if (__glibc_unlikely (j < 1))
8ae002
+    {
8ae002
+      __cpy (x, z, p);
8ae002
+      return;
8ae002
+    }
8ae002
 
8ae002
-  if (j<1)
8ae002
-     {__cpy(x,z,p);  return; }
8ae002
-  else   Z[k] = ZERO;
8ae002
-
8ae002
-  for (; j>0; i--,j--) {
8ae002
-    Z[k] += X[i] + Y[j];
8ae002
-    if (Z[k] >= RADIX) {
8ae002
-      Z[k]  -= RADIX;
8ae002
-      Z[--k] = ONE; }
8ae002
-    else
8ae002
-      Z[--k] = ZERO;
8ae002
-  }
8ae002
-
8ae002
-  for (; i>0; i--) {
8ae002
-    Z[k] += X[i];
8ae002
-    if (Z[k] >= RADIX) {
8ae002
-      Z[k]  -= RADIX;
8ae002
-      Z[--k] = ONE; }
8ae002
-    else
8ae002
-      Z[--k] = ZERO;
8ae002
-  }
8ae002
-
8ae002
-  if (Z[1] == ZERO) {
8ae002
-    for (i=1; i<=p; i++)    Z[i] = Z[i+1]; }
8ae002
-  else   EZ += ONE;
8ae002
-}
8ae002
+  zk = 0;
8ae002
 
8ae002
+  for (; j > 0; i--, j--)
8ae002
+    {
8ae002
+      zk += X[i] + Y[j];
8ae002
+      if (zk >= RADIX)
8ae002
+	{
8ae002
+	  Z[k--] = zk - RADIX;
8ae002
+	  zk = 1;
8ae002
+	}
8ae002
+      else
8ae002
+	{
8ae002
+	  Z[k--] = zk;
8ae002
+	  zk = 0;
8ae002
+	}
8ae002
+    }
8ae002
 
8ae002
-/*  sub_magnitudes() subtracts the magnitudes of *x & *y assuming that      */
8ae002
-/*  abs(*x) > abs(*y) > 0.                                                  */
8ae002
-/* The sign of the difference *z is undefined. x&y may overlap but not x&z  */
8ae002
-/* or y&z. One guard digit is used. The error is less than one ulp.         */
8ae002
-/* *x & *y are left unchanged.                                              */
8ae002
+  for (; i > 0; i--)
8ae002
+    {
8ae002
+      zk += X[i];
8ae002
+      if (zk >= RADIX)
8ae002
+	{
8ae002
+	  Z[k--] = zk - RADIX;
8ae002
+	  zk = 1;
8ae002
+	}
8ae002
+      else
8ae002
+	{
8ae002
+	  Z[k--] = zk;
8ae002
+	  zk = 0;
8ae002
+	}
8ae002
+    }
8ae002
 
8ae002
+  if (zk == 0)
8ae002
+    {
8ae002
+      for (i = 1; i <= p2; i++)
8ae002
+	Z[i] = Z[i + 1];
8ae002
+    }
8ae002
+  else
8ae002
+    {
8ae002
+      Z[1] = zk;
8ae002
+      EZ += 1;
8ae002
+    }
8ae002
+}
8ae002
+
8ae002
+/* Subtract the magnitudes of *X and *Y assuming that abs (*x) > abs (*y) > 0.
8ae002
+   The sign of the difference *Z is not changed.  X and Y may overlap but not X
8ae002
+   and Z or Y and Z.  One guard digit is used.  The error is less than one
8ae002
+   ULP.  */
8ae002
 static void
8ae002
 SECTION
8ae002
-sub_magnitudes(const mp_no *x, const mp_no *y, mp_no *z, int p) {
8ae002
-
8ae002
-  int i,j,k;
8ae002
+sub_magnitudes (const mp_no *x, const mp_no *y, mp_no *z, int p)
8ae002
+{
8ae002
+  long i, j, k;
8ae002
+  long p2 = p;
8ae002
+  mantissa_t zk;
8ae002
 
8ae002
   EZ = EX;
8ae002
+  i = p2;
8ae002
+  j = p2 + EY - EX;
8ae002
+  k = p2;
8ae002
+
8ae002
+  /* Y is too small compared to X, copy X over to the result.  */
8ae002
+  if (__glibc_unlikely (j < 1))
8ae002
+    {
8ae002
+      __cpy (x, z, p);
8ae002
+      return;
8ae002
+    }
8ae002
+
8ae002
+  /* The relevant least significant digit in Y is non-zero, so we factor it in
8ae002
+     to enhance accuracy.  */
8ae002
+  if (j < p2 && Y[j + 1] > 0)
8ae002
+    {
8ae002
+      Z[k + 1] = RADIX - Y[j + 1];
8ae002
+      zk = -1;
8ae002
+    }
8ae002
+  else
8ae002
+    zk = Z[k + 1] = 0;
8ae002
 
8ae002
-  if (EX == EY) {
8ae002
-    i=j=k=p;
8ae002
-    Z[k] = Z[k+1] = ZERO; }
8ae002
-  else {
8ae002
-    j= EX - EY;
8ae002
-    if (j > p)  {__cpy(x,z,p);  return; }
8ae002
-    else {
8ae002
-      i=p;   j=p+1-j;   k=p;
8ae002
-      if (Y[j] > ZERO) {
8ae002
-	Z[k+1] = RADIX - Y[j--];
8ae002
-	Z[k]   = MONE; }
8ae002
-      else {
8ae002
-	Z[k+1] = ZERO;
8ae002
-	Z[k]   = ZERO;   j--;}
8ae002
-    }
8ae002
-  }
8ae002
-
8ae002
-  for (; j>0; i--,j--) {
8ae002
-    Z[k] += (X[i] - Y[j]);
8ae002
-    if (Z[k] < ZERO) {
8ae002
-      Z[k]  += RADIX;
8ae002
-      Z[--k] = MONE; }
8ae002
-    else
8ae002
-      Z[--k] = ZERO;
8ae002
-  }
8ae002
-
8ae002
-  for (; i>0; i--) {
8ae002
-    Z[k] += X[i];
8ae002
-    if (Z[k] < ZERO) {
8ae002
-      Z[k]  += RADIX;
8ae002
-      Z[--k] = MONE; }
8ae002
-    else
8ae002
-      Z[--k] = ZERO;
8ae002
-  }
8ae002
+  /* Subtract and borrow.  */
8ae002
+  for (; j > 0; i--, j--)
8ae002
+    {
8ae002
+      zk += (X[i] - Y[j]);
8ae002
+      if (zk < 0)
8ae002
+	{
8ae002
+	  Z[k--] = zk + RADIX;
8ae002
+	  zk = -1;
8ae002
+	}
8ae002
+      else
8ae002
+	{
8ae002
+	  Z[k--] = zk;
8ae002
+	  zk = 0;
8ae002
+	}
8ae002
+    }
8ae002
 
8ae002
-  for (i=1; Z[i] == ZERO; i++) ;
8ae002
+  /* We're done with digits from Y, so it's just digits in X.  */
8ae002
+  for (; i > 0; i--)
8ae002
+    {
8ae002
+      zk += X[i];
8ae002
+      if (zk < 0)
8ae002
+	{
8ae002
+	  Z[k--] = zk + RADIX;
8ae002
+	  zk = -1;
8ae002
+	}
8ae002
+      else
8ae002
+	{
8ae002
+	  Z[k--] = zk;
8ae002
+	  zk = 0;
8ae002
+	}
8ae002
+    }
8ae002
+
8ae002
+  /* Normalize.  */
8ae002
+  for (i = 1; Z[i] == 0; i++)
8ae002
+    ;
8ae002
   EZ = EZ - i + 1;
8ae002
-  for (k=1; i <= p+1; )
8ae002
+  for (k = 1; i <= p2 + 1; )
8ae002
     Z[k++] = Z[i++];
8ae002
-  for (; k <= p; )
8ae002
-    Z[k++] = ZERO;
8ae002
+  for (; k <= p2; )
8ae002
+    Z[k++] = 0;
8ae002
 }
8ae002
 
8ae002
-
8ae002
-/* Add two multiple precision numbers. Set *z = *x + *y. x&y may overlap  */
8ae002
-/* but not x&z or y&z. One guard digit is used. The error is less than    */
8ae002
-/* one ulp. *x & *y are left unchanged.                                   */
8ae002
-
8ae002
+/* Add *X and *Y and store the result in *Z.  X and Y may overlap, but not X
8ae002
+   and Z or Y and Z.  One guard digit is used.  The error is less than one
8ae002
+   ULP.  */
8ae002
 void
8ae002
 SECTION
8ae002
-__add(const mp_no *x, const mp_no *y, mp_no *z, int p) {
8ae002
-
8ae002
+__add (const mp_no *x, const mp_no *y, mp_no *z, int p)
8ae002
+{
8ae002
   int n;
8ae002
 
8ae002
-  if      (X[0] == ZERO)     {__cpy(y,z,p);  return; }
8ae002
-  else if (Y[0] == ZERO)     {__cpy(x,z,p);  return; }
8ae002
+  if (X[0] == 0)
8ae002
+    {
8ae002
+      __cpy (y, z, p);
8ae002
+      return;
8ae002
+    }
8ae002
+  else if (Y[0] == 0)
8ae002
+    {
8ae002
+      __cpy (x, z, p);
8ae002
+      return;
8ae002
+    }
8ae002
 
8ae002
-  if (X[0] == Y[0])   {
8ae002
-    if (__acr(x,y,p) > 0)      {add_magnitudes(x,y,z,p);  Z[0] = X[0]; }
8ae002
-    else                     {add_magnitudes(y,x,z,p);  Z[0] = Y[0]; }
8ae002
-  }
8ae002
-  else                       {
8ae002
-    if ((n=__acr(x,y,p)) == 1) {sub_magnitudes(x,y,z,p);  Z[0] = X[0]; }
8ae002
-    else if (n == -1)        {sub_magnitudes(y,x,z,p);  Z[0] = Y[0]; }
8ae002
-    else                      Z[0] = ZERO;
8ae002
-  }
8ae002
+  if (X[0] == Y[0])
8ae002
+    {
8ae002
+      if (__acr (x, y, p) > 0)
8ae002
+	{
8ae002
+	  add_magnitudes (x, y, z, p);
8ae002
+	  Z[0] = X[0];
8ae002
+	}
8ae002
+      else
8ae002
+	{
8ae002
+	  add_magnitudes (y, x, z, p);
8ae002
+	  Z[0] = Y[0];
8ae002
+	}
8ae002
+    }
8ae002
+  else
8ae002
+    {
8ae002
+      if ((n = __acr (x, y, p)) == 1)
8ae002
+	{
8ae002
+	  sub_magnitudes (x, y, z, p);
8ae002
+	  Z[0] = X[0];
8ae002
+	}
8ae002
+      else if (n == -1)
8ae002
+	{
8ae002
+	  sub_magnitudes (y, x, z, p);
8ae002
+	  Z[0] = Y[0];
8ae002
+	}
8ae002
+      else
8ae002
+	Z[0] = 0;
8ae002
+    }
8ae002
 }
8ae002
 
8ae002
+/* Subtract *Y from *X and return the result in *Z.  X and Y may overlap but
8ae002
+   not X and Z or Y and Z.  One guard digit is used.  The error is less than
8ae002
+   one ULP.  */
8ae002
+void
8ae002
+SECTION
8ae002
+__sub (const mp_no *x, const mp_no *y, mp_no *z, int p)
8ae002
+{
8ae002
+  int n;
8ae002
+
8ae002
+  if (X[0] == 0)
8ae002
+    {
8ae002
+      __cpy (y, z, p);
8ae002
+      Z[0] = -Z[0];
8ae002
+      return;
8ae002
+    }
8ae002
+  else if (Y[0] == 0)
8ae002
+    {
8ae002
+      __cpy (x, z, p);
8ae002
+      return;
8ae002
+    }
8ae002
 
8ae002
-/* Subtract two multiple precision numbers. *z is set to *x - *y. x&y may */
8ae002
-/* overlap but not x&z or y&z. One guard digit is used. The error is      */
8ae002
-/* less than one ulp. *x & *y are left unchanged.                         */
8ae002
+  if (X[0] != Y[0])
8ae002
+    {
8ae002
+      if (__acr (x, y, p) > 0)
8ae002
+	{
8ae002
+	  add_magnitudes (x, y, z, p);
8ae002
+	  Z[0] = X[0];
8ae002
+	}
8ae002
+      else
8ae002
+	{
8ae002
+	  add_magnitudes (y, x, z, p);
8ae002
+	  Z[0] = -Y[0];
8ae002
+	}
8ae002
+    }
8ae002
+  else
8ae002
+    {
8ae002
+      if ((n = __acr (x, y, p)) == 1)
8ae002
+	{
8ae002
+	  sub_magnitudes (x, y, z, p);
8ae002
+	  Z[0] = X[0];
8ae002
+	}
8ae002
+      else if (n == -1)
8ae002
+	{
8ae002
+	  sub_magnitudes (y, x, z, p);
8ae002
+	  Z[0] = -Y[0];
8ae002
+	}
8ae002
+      else
8ae002
+	Z[0] = 0;
8ae002
+    }
8ae002
+}
8ae002
 
8ae002
+#ifndef NO__MUL
8ae002
+/* Multiply *X and *Y and store result in *Z.  X and Y may overlap but not X
8ae002
+   and Z or Y and Z.  For P in [1, 2, 3], the exact result is truncated to P
8ae002
+   digits.  In case P > 3 the error is bounded by 1.001 ULP.  */
8ae002
 void
8ae002
 SECTION
8ae002
-__sub(const mp_no *x, const mp_no *y, mp_no *z, int p) {
8ae002
+__mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
8ae002
+{
8ae002
+  long i, j, k, ip, ip2;
8ae002
+  long p2 = p;
8ae002
+  mantissa_store_t zk;
8ae002
+  const mp_no *a;
8ae002
+  mantissa_store_t *diag;
8ae002
+
8ae002
+  /* Is z=0?  */
8ae002
+  if (__glibc_unlikely (X[0] * Y[0] == 0))
8ae002
+    {
8ae002
+      Z[0] = 0;
8ae002
+      return;
8ae002
+    }
8ae002
 
8ae002
-  int n;
8ae002
+  /* We need not iterate through all X's and Y's since it's pointless to
8ae002
+     multiply zeroes.  Here, both are zero...  */
8ae002
+  for (ip2 = p2; ip2 > 0; ip2--)
8ae002
+    if (X[ip2] != 0 || Y[ip2] != 0)
8ae002
+      break;
8ae002
+
8ae002
+  a = X[ip2] != 0 ? y : x;
8ae002
+
8ae002
+  /* ... and here, at least one of them is still zero.  */
8ae002
+  for (ip = ip2; ip > 0; ip--)
8ae002
+    if (a->d[ip] != 0)
8ae002
+      break;
8ae002
+
8ae002
+  /* The product looks like this for p = 3 (as an example):
8ae002
+
8ae002
+
8ae002
+				a1    a2    a3
8ae002
+		 x		b1    b2    b3
8ae002
+		 -----------------------------
8ae002
+			     a1*b3 a2*b3 a3*b3
8ae002
+		       a1*b2 a2*b2 a3*b2
8ae002
+		 a1*b1 a2*b1 a3*b1
8ae002
+
8ae002
+     So our K needs to ideally be P*2, but we're limiting ourselves to P + 3
8ae002
+     for P >= 3.  We compute the above digits in two parts; the last P-1
8ae002
+     digits and then the first P digits.  The last P-1 digits are a sum of
8ae002
+     products of the input digits from P to P-k where K is 0 for the least
8ae002
+     significant digit and increases as we go towards the left.  The product
8ae002
+     term is of the form X[k]*X[P-k] as can be seen in the above example.
8ae002
+
8ae002
+     The first P digits are also a sum of products with the same product term,
8ae002
+     except that the sum is from 1 to k.  This is also evident from the above
8ae002
+     example.
8ae002
+
8ae002
+     Another thing that becomes evident is that only the most significant
8ae002
+     ip+ip2 digits of the result are non-zero, where ip and ip2 are the
8ae002
+     'internal precision' of the input numbers, i.e. digits after ip and ip2
8ae002
+     are all 0.  */
8ae002
+
8ae002
+  k = (__glibc_unlikely (p2 < 3)) ? p2 + p2 : p2 + 3;
8ae002
+
8ae002
+  while (k > ip + ip2 + 1)
8ae002
+    Z[k--] = 0;
8ae002
+
8ae002
+  zk = 0;
8ae002
+
8ae002
+  /* Precompute sums of diagonal elements so that we can directly use them
8ae002
+     later.  See the next comment to know we why need them.  */
8ae002
+  diag = alloca (k * sizeof (mantissa_store_t));
8ae002
+  mantissa_store_t d = 0;
8ae002
+  for (i = 1; i <= ip; i++)
8ae002
+    {
8ae002
+      d += X[i] * (mantissa_store_t) Y[i];
8ae002
+      diag[i] = d;
8ae002
+    }
8ae002
+  while (i < k)
8ae002
+    diag[i++] = d;
8ae002
 
8ae002
-  if      (X[0] == ZERO)     {__cpy(y,z,p);  Z[0] = -Z[0];  return; }
8ae002
-  else if (Y[0] == ZERO)     {__cpy(x,z,p);                 return; }
8ae002
+  while (k > p2)
8ae002
+    {
8ae002
+      long lim = k / 2;
8ae002
 
8ae002
-  if (X[0] != Y[0])    {
8ae002
-    if (__acr(x,y,p) > 0)      {add_magnitudes(x,y,z,p);  Z[0] =  X[0]; }
8ae002
-    else                     {add_magnitudes(y,x,z,p);  Z[0] = -Y[0]; }
8ae002
-  }
8ae002
-  else                       {
8ae002
-    if ((n=__acr(x,y,p)) == 1) {sub_magnitudes(x,y,z,p);  Z[0] =  X[0]; }
8ae002
-    else if (n == -1)        {sub_magnitudes(y,x,z,p);  Z[0] = -Y[0]; }
8ae002
-    else                      Z[0] = ZERO;
8ae002
-  }
8ae002
-}
8ae002
+      if (k % 2 == 0)
8ae002
+	/* We want to add this only once, but since we subtract it in the sum
8ae002
+	   of products above, we add twice.  */
8ae002
+	zk += 2 * X[lim] * (mantissa_store_t) Y[lim];
8ae002
 
8ae002
+      for (i = k - p2, j = p2; i < j; i++, j--)
8ae002
+	zk += (X[i] + X[j]) * (mantissa_store_t) (Y[i] + Y[j]);
8ae002
 
8ae002
-/* Multiply two multiple precision numbers. *z is set to *x * *y. x&y      */
8ae002
-/* may overlap but not x&z or y&z. In case p=1,2,3 the exact result is     */
8ae002
-/* truncated to p digits. In case p>3 the error is bounded by 1.001 ulp.   */
8ae002
-/* *x & *y are left unchanged.                                             */
8ae002
+      zk -= diag[k - 1];
8ae002
 
8ae002
-void
8ae002
-SECTION
8ae002
-__mul(const mp_no *x, const mp_no *y, mp_no *z, int p) {
8ae002
+      DIV_RADIX (zk, Z[k]);
8ae002
+      k--;
8ae002
+    }
8ae002
 
8ae002
-  int i, i1, i2, j, k, k2;
8ae002
-  double u;
8ae002
+  /* The real deal.  Mantissa digit Z[k] is the sum of all X[i] * Y[j] where i
8ae002
+     goes from 1 -> k - 1 and j goes the same range in reverse.  To reduce the
8ae002
+     number of multiplications, we halve the range and if k is an even number,
8ae002
+     add the diagonal element X[k/2]Y[k/2].  Through the half range, we compute
8ae002
+     X[i] * Y[j] as (X[i] + X[j]) * (Y[i] + Y[j]) - X[i] * Y[i] - X[j] * Y[j].
8ae002
+
8ae002
+     This reduction tells us that we're summing two things, the first term
8ae002
+     through the half range and the negative of the sum of the product of all
8ae002
+     terms of X and Y in the full range.  i.e.
8ae002
+
8ae002
+     SUM(X[i] * Y[i]) for k terms.  This is precalculated above for each k in
8ae002
+     a single loop so that it completes in O(n) time and can hence be directly
8ae002
+     used in the loop below.  */
8ae002
+  while (k > 1)
8ae002
+    {
8ae002
+      long lim = k / 2;
8ae002
+
8ae002
+      if (k % 2 == 0)
8ae002
+	/* We want to add this only once, but since we subtract it in the sum
8ae002
+	   of products above, we add twice.  */
8ae002
+        zk += 2 * X[lim] * (mantissa_store_t) Y[lim];
8ae002
 
8ae002
-		      /* Is z=0? */
8ae002
-  if (X[0]*Y[0]==ZERO)
8ae002
-     { Z[0]=ZERO;  return; }
8ae002
-
8ae002
-		       /* Multiply, add and carry */
8ae002
-  k2 = (p<3) ? p+p : p+3;
8ae002
-  Z[k2]=ZERO;
8ae002
-  for (k=k2; k>1; ) {
8ae002
-    if (k > p)  {i1=k-p; i2=p+1; }
8ae002
-    else        {i1=1;   i2=k;   }
8ae002
-    for (i=i1,j=i2-1; i
8ae002
-
8ae002
-    u = (Z[k] + CUTTER)-CUTTER;
8ae002
-    if  (u > Z[k])  u -= RADIX;
8ae002
-    Z[k]  -= u;
8ae002
-    Z[--k] = u*RADIXI;
8ae002
-  }
8ae002
-
8ae002
-		 /* Is there a carry beyond the most significant digit? */
8ae002
-  if (Z[1] == ZERO) {
8ae002
-    for (i=1; i<=p; i++)  Z[i]=Z[i+1];
8ae002
-    EZ = EX + EY - 1; }
8ae002
-  else
8ae002
-    EZ = EX + EY;
8ae002
+      for (i = 1, j = k - 1; i < j; i++, j--)
8ae002
+	zk += (X[i] + X[j]) * (mantissa_store_t) (Y[i] + Y[j]);
8ae002
+
8ae002
+      zk -= diag[k - 1];
8ae002
+
8ae002
+      DIV_RADIX (zk, Z[k]);
8ae002
+      k--;
8ae002
+    }
8ae002
+  Z[k] = zk;
8ae002
+
8ae002
+  /* Get the exponent sum into an intermediate variable.  This is a subtle
8ae002
+     optimization, where given enough registers, all operations on the exponent
8ae002
+     happen in registers and the result is written out only once into EZ.  */
8ae002
+  int e = EX + EY;
8ae002
+
8ae002
+  /* Is there a carry beyond the most significant digit?  */
8ae002
+  if (__glibc_unlikely (Z[1] == 0))
8ae002
+    {
8ae002
+      for (i = 1; i <= p2; i++)
8ae002
+	Z[i] = Z[i + 1];
8ae002
+      e--;
8ae002
+    }
8ae002
 
8ae002
+  EZ = e;
8ae002
   Z[0] = X[0] * Y[0];
8ae002
 }
8ae002
+#endif
8ae002
+
8ae002
+#ifndef NO__SQR
8ae002
+/* Square *X and store result in *Y.  X and Y may not overlap.  For P in
8ae002
+   [1, 2, 3], the exact result is truncated to P digits.  In case P > 3 the
8ae002
+   error is bounded by 1.001 ULP.  This is a faster special case of
8ae002
+   multiplication.  */
8ae002
+void
8ae002
+SECTION
8ae002
+__sqr (const mp_no *x, mp_no *y, int p)
8ae002
+{
8ae002
+  long i, j, k, ip;
8ae002
+  mantissa_store_t yk;
8ae002
 
8ae002
+  /* Is z=0?  */
8ae002
+  if (__glibc_unlikely (X[0] == 0))
8ae002
+    {
8ae002
+      Y[0] = 0;
8ae002
+      return;
8ae002
+    }
8ae002
 
8ae002
-/* Invert a multiple precision number. Set *y = 1 / *x.                     */
8ae002
-/* Relative error bound = 1.001*r**(1-p) for p=2, 1.063*r**(1-p) for p=3,   */
8ae002
-/* 2.001*r**(1-p) for p>3.                                                  */
8ae002
-/* *x=0 is not permissible. *x is left unchanged.                           */
8ae002
+  /* We need not iterate through all X's since it's pointless to
8ae002
+     multiply zeroes.  */
8ae002
+  for (ip = p; ip > 0; ip--)
8ae002
+    if (X[ip] != 0)
8ae002
+      break;
8ae002
 
8ae002
-static
8ae002
-SECTION
8ae002
-void __inv(const mp_no *x, mp_no *y, int p) {
8ae002
-  int i;
8ae002
-#if 0
8ae002
-  int l;
8ae002
+  k = (__glibc_unlikely (p < 3)) ? p + p : p + 3;
8ae002
+
8ae002
+  while (k > 2 * ip + 1)
8ae002
+    Y[k--] = 0;
8ae002
+
8ae002
+  yk = 0;
8ae002
+
8ae002
+  while (k > p)
8ae002
+    {
8ae002
+      mantissa_store_t yk2 = 0;
8ae002
+      long lim = k / 2;
8ae002
+
8ae002
+      if (k % 2 == 0)
8ae002
+	yk += X[lim] * (mantissa_store_t) X[lim];
8ae002
+
8ae002
+      /* In __mul, this loop (and the one within the next while loop) run
8ae002
+         between a range to calculate the mantissa as follows:
8ae002
+
8ae002
+         Z[k] = X[k] * Y[n] + X[k+1] * Y[n-1] ... + X[n-1] * Y[k+1]
8ae002
+		+ X[n] * Y[k]
8ae002
+
8ae002
+         For X == Y, we can get away with summing halfway and doubling the
8ae002
+	 result.  For cases where the range size is even, the mid-point needs
8ae002
+	 to be added separately (above).  */
8ae002
+      for (i = k - p, j = p; i < j; i++, j--)
8ae002
+	yk2 += X[i] * (mantissa_store_t) X[j];
8ae002
+
8ae002
+      yk += 2 * yk2;
8ae002
+
8ae002
+      DIV_RADIX (yk, Y[k]);
8ae002
+      k--;
8ae002
+    }
8ae002
+
8ae002
+  while (k > 1)
8ae002
+    {
8ae002
+      mantissa_store_t yk2 = 0;
8ae002
+      long lim = k / 2;
8ae002
+
8ae002
+      if (k % 2 == 0)
8ae002
+	yk += X[lim] * (mantissa_store_t) X[lim];
8ae002
+
8ae002
+      /* Likewise for this loop.  */
8ae002
+      for (i = 1, j = k - 1; i < j; i++, j--)
8ae002
+	yk2 += X[i] * (mantissa_store_t) X[j];
8ae002
+
8ae002
+      yk += 2 * yk2;
8ae002
+
8ae002
+      DIV_RADIX (yk, Y[k]);
8ae002
+      k--;
8ae002
+    }
8ae002
+  Y[k] = yk;
8ae002
+
8ae002
+  /* Squares are always positive.  */
8ae002
+  Y[0] = 1;
8ae002
+
8ae002
+  /* Get the exponent sum into an intermediate variable.  This is a subtle
8ae002
+     optimization, where given enough registers, all operations on the exponent
8ae002
+     happen in registers and the result is written out only once into EZ.  */
8ae002
+  int e = EX * 2;
8ae002
+
8ae002
+  /* Is there a carry beyond the most significant digit?  */
8ae002
+  if (__glibc_unlikely (Y[1] == 0))
8ae002
+    {
8ae002
+      for (i = 1; i <= p; i++)
8ae002
+	Y[i] = Y[i + 1];
8ae002
+      e--;
8ae002
+    }
8ae002
+
8ae002
+  EY = e;
8ae002
+}
8ae002
 #endif
8ae002
+
8ae002
+/* Invert *X and store in *Y.  Relative error bound:
8ae002
+   - For P = 2: 1.001 * R ^ (1 - P)
8ae002
+   - For P = 3: 1.063 * R ^ (1 - P)
8ae002
+   - For P > 3: 2.001 * R ^ (1 - P)
8ae002
+
8ae002
+   *X = 0 is not permissible.  */
8ae002
+static void
8ae002
+SECTION
8ae002
+__inv (const mp_no *x, mp_no *y, int p)
8ae002
+{
8ae002
+  long i;
8ae002
   double t;
8ae002
-  mp_no z,w;
8ae002
-  static const int np1[] = {0,0,0,0,1,2,2,2,2,3,3,3,3,3,3,3,3,3,
8ae002
-			    4,4,4,4,4,4,4,4,4,4,4,4,4,4,4};
8ae002
-  const mp_no mptwo = {1,{1.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
8ae002
-			 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
8ae002
-			 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
8ae002
-			 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0}};
8ae002
-
8ae002
-  __cpy(x,&z,p);  z.e=0;  __mp_dbl(&z,&t,p);
8ae002
-  t=ONE/t;   __dbl_mp(t,y,p);    EY -= EX;
8ae002
-
8ae002
-  for (i=0; i
8ae002
-    __cpy(y,&w,p);
8ae002
-    __mul(x,&w,y,p);
8ae002
-    __sub(&mptwo,y,&z,p);
8ae002
-    __mul(&w,&z,y,p);
8ae002
-  }
8ae002
+  mp_no z, w;
8ae002
+  static const int np1[] =
8ae002
+    { 0, 0, 0, 0, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
8ae002
+    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
8ae002
+  };
8ae002
+
8ae002
+  __cpy (x, &z, p);
8ae002
+  z.e = 0;
8ae002
+  __mp_dbl (&z, &t, p);
8ae002
+  t = 1 / t;
8ae002
+  __dbl_mp (t, y, p);
8ae002
+  EY -= EX;
8ae002
+
8ae002
+  for (i = 0; i < np1[p]; i++)
8ae002
+    {
8ae002
+      __cpy (y, &w, p);
8ae002
+      __mul (x, &w, y, p);
8ae002
+      __sub (&__mptwo, y, &z, p);
8ae002
+      __mul (&w, &z, y, p);
8ae002
+    }
8ae002
 }
8ae002
 
8ae002
+/* Divide *X by *Y and store result in *Z.  X and Y may overlap but not X and Z
8ae002
+   or Y and Z.  Relative error bound:
8ae002
+   - For P = 2: 2.001 * R ^ (1 - P)
8ae002
+   - For P = 3: 2.063 * R ^ (1 - P)
8ae002
+   - For P > 3: 3.001 * R ^ (1 - P)
8ae002
 
8ae002
-/* Divide one multiple precision number by another.Set *z = *x / *y. *x & *y */
8ae002
-/* are left unchanged. x&y may overlap but not x&z or y&z.                   */
8ae002
-/* Relative error bound = 2.001*r**(1-p) for p=2, 2.063*r**(1-p) for p=3     */
8ae002
-/* and 3.001*r**(1-p) for p>3. *y=0 is not permissible.                      */
8ae002
-
8ae002
+   *X = 0 is not permissible.  */
8ae002
 void
8ae002
 SECTION
8ae002
-__dvd(const mp_no *x, const mp_no *y, mp_no *z, int p) {
8ae002
-
8ae002
+__dvd (const mp_no *x, const mp_no *y, mp_no *z, int p)
8ae002
+{
8ae002
   mp_no w;
8ae002
 
8ae002
-  if (X[0] == ZERO)    Z[0] = ZERO;
8ae002
-  else                {__inv(y,&w,p);   __mul(x,&w,z,p);}
8ae002
+  if (X[0] == 0)
8ae002
+    Z[0] = 0;
8ae002
+  else
8ae002
+    {
8ae002
+      __inv (y, &w, p);
8ae002
+      __mul (x, &w, z, p);
8ae002
+    }
8ae002
 }
8ae002
Index: glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mpa.h
8ae002
===================================================================
8ae002
--- glibc-2.17-c758a686.orig/sysdeps/ieee754/dbl-64/mpa.h
8ae002
+++ glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mpa.h
8ae002
@@ -1,7 +1,7 @@
8ae002
 /*
8ae002
  * IBM Accurate Mathematical Library
8ae002
  * Written by International Business Machines Corp.
8ae002
- * Copyright (C) 2001, 2011 Free Software Foundation, Inc.
8ae002
+ * Copyright (C) 2001-2017 Free Software Foundation, Inc.
8ae002
  *
8ae002
  * This program is free software; you can redistribute it and/or modify
8ae002
  * it under the terms of the GNU Lesser General Public License as published by
8ae002
@@ -23,36 +23,58 @@
8ae002
 /*  FUNCTIONS:                                                          */
8ae002
 /*               mcr                                                    */
8ae002
 /*               acr                                                    */
8ae002
-/*               cr                                                     */
8ae002
 /*               cpy                                                    */
8ae002
-/*               cpymn                                                  */
8ae002
 /*               mp_dbl                                                 */
8ae002
 /*               dbl_mp                                                 */
8ae002
 /*               add                                                    */
8ae002
 /*               sub                                                    */
8ae002
 /*               mul                                                    */
8ae002
-/*               inv                                                    */
8ae002
 /*               dvd                                                    */
8ae002
 /*                                                                      */
8ae002
 /* Arithmetic functions for multiple precision numbers.                 */
8ae002
 /* Common types and definition                                          */
8ae002
 /************************************************************************/
8ae002
 
8ae002
+#include <mpa-arch.h>
8ae002
 
8ae002
-typedef struct {/* This structure holds the details of a multi-precision     */
8ae002
-  int e;        /* floating point number, x: d[0] holds its sign (-1,0 or 1) */
8ae002
-  double d[40]; /* e holds its exponent (...,-2,-1,0,1,2,...) and            */
8ae002
-} mp_no;        /* d[1]...d[p] hold its mantissa digits. The value of x is,  */
8ae002
-		/* x = d[1]*r**(e-1) + d[2]*r**(e-2) + ... + d[p]*r**(e-p).  */
8ae002
-		/* Here   r = 2**24,   0 <= d[i] < r  and  1 <= p <= 32.     */
8ae002
-		/* p is a global variable. A multi-precision number is       */
8ae002
-		/* always normalized. Namely, d[1] > 0. An exception is      */
8ae002
-		/* a zero which is characterized by d[0] = 0. The terms      */
8ae002
-		/* d[p+1], d[p+2], ... of a none zero number have no         */
8ae002
-		/* significance and so are the terms e, d[1],d[2],...        */
8ae002
-		/* of a zero.                                                */
8ae002
+/* The mp_no structure holds the details of a multi-precision floating point
8ae002
+   number.
8ae002
 
8ae002
-typedef union { int i[2]; double d; } number;
8ae002
+   - The radix of the number (R) is 2 ^ 24.
8ae002
+
8ae002
+   - E: The exponent of the number.
8ae002
+
8ae002
+   - D[0]: The sign (-1, 1) or 0 if the value is 0.  In the latter case, the
8ae002
+     values of the remaining members of the structure are ignored.
8ae002
+
8ae002
+   - D[1] - D[p]: The mantissa of the number where:
8ae002
+
8ae002
+	0 <= D[i] < R and
8ae002
+	P is the precision of the number and 1 <= p <= 32
8ae002
+
8ae002
+     D[p+1] ... D[39] have no significance.
8ae002
+
8ae002
+   - The value of the number is:
8ae002
+
8ae002
+	D[1] * R ^ (E - 1) + D[2] * R ^ (E - 2) ... D[p] * R ^ (E - p)
8ae002
+
8ae002
+   */
8ae002
+typedef struct
8ae002
+{
8ae002
+  int e;
8ae002
+  mantissa_t d[40];
8ae002
+} mp_no;
8ae002
+
8ae002
+typedef union
8ae002
+{
8ae002
+  int i[2];
8ae002
+  double d;
8ae002
+} number;
8ae002
+
8ae002
+/* TODO: With only a partial backport of the constant cleanup we don't
8ae002
+         define __mpone or __mptwo here for other code to use.  */
8ae002
+/* extern const mp_no __mpone;
8ae002
+extern const mp_no __mptwo;  */
8ae002
 
8ae002
 #define  X   x->d
8ae002
 #define  Y   y->d
8ae002
@@ -63,21 +85,73 @@ typedef union { int i[2]; double d; } nu
8ae002
 
8ae002
 #define ABS(x)   ((x) <  0  ? -(x) : (x))
8ae002
 
8ae002
-int __acr(const mp_no *, const mp_no *, int);
8ae002
-// int  __cr(const mp_no *, const mp_no *, int);
8ae002
-void __cpy(const mp_no *, mp_no *, int);
8ae002
-// void __cpymn(const mp_no *, int, mp_no *, int);
8ae002
-void __mp_dbl(const mp_no *, double *, int);
8ae002
-void __dbl_mp(double, mp_no *, int);
8ae002
-void __add(const mp_no *, const mp_no *, mp_no *, int);
8ae002
-void __sub(const mp_no *, const mp_no *, mp_no *, int);
8ae002
-void __mul(const mp_no *, const mp_no *, mp_no *, int);
8ae002
-// void __inv(const mp_no *, mp_no *, int);
8ae002
-void __dvd(const mp_no *, const mp_no *, mp_no *, int);
8ae002
+#ifndef RADIXI
8ae002
+# define  RADIXI    0x1.0p-24		/* 2^-24   */
8ae002
+#endif
8ae002
+
8ae002
+#ifndef TWO52
8ae002
+# define  TWO52     0x1.0p52		/* 2^52    */
8ae002
+#endif
8ae002
+
8ae002
+#define  TWO5      TWOPOW (5)		/* 2^5     */
8ae002
+#define  TWO8      TWOPOW (8)		/* 2^52    */
8ae002
+#define  TWO10     TWOPOW (10)		/* 2^10    */
8ae002
+#define  TWO18     TWOPOW (18)		/* 2^18    */
8ae002
+#define  TWO19     TWOPOW (19)		/* 2^19    */
8ae002
+#define  TWO23     TWOPOW (23)		/* 2^23    */
8ae002
+
8ae002
+#define  TWO57     0x1.0p57            /* 2^57    */
8ae002
+#define  TWO71     0x1.0p71            /* 2^71    */
8ae002
+#define  TWOM1032  0x1.0p-1032         /* 2^-1032 */
8ae002
+#define  TWOM1022  0x1.0p-1022         /* 2^-1022 */
8ae002
+
8ae002
+#define  HALF      0x1.0p-1            /* 1/2 */
8ae002
+#define  MHALF     -0x1.0p-1           /* -1/2 */
8ae002
+#define  HALFRAD   0x1.0p23            /* 2^23 */
8ae002
+
8ae002
+int __acr (const mp_no *, const mp_no *, int);
8ae002
+void __cpy (const mp_no *, mp_no *, int);
8ae002
+void __mp_dbl (const mp_no *, double *, int);
8ae002
+void __dbl_mp (double, mp_no *, int);
8ae002
+void __add (const mp_no *, const mp_no *, mp_no *, int);
8ae002
+void __sub (const mp_no *, const mp_no *, mp_no *, int);
8ae002
+void __mul (const mp_no *, const mp_no *, mp_no *, int);
8ae002
+void __sqr (const mp_no *, mp_no *, int);
8ae002
+void __dvd (const mp_no *, const mp_no *, mp_no *, int);
8ae002
 
8ae002
 extern void __mpatan (mp_no *, mp_no *, int);
8ae002
 extern void __mpatan2 (mp_no *, mp_no *, mp_no *, int);
8ae002
 extern void __mpsqrt (mp_no *, mp_no *, int);
8ae002
-extern void __mpexp (mp_no *, mp_no *__y, int);
8ae002
+extern void __mpexp (mp_no *, mp_no *, int);
8ae002
 extern void __c32 (mp_no *, mp_no *, mp_no *, int);
8ae002
 extern int __mpranred (double, mp_no *, int);
8ae002
+
8ae002
+/* Given a power POW, build a multiprecision number 2^POW.  */
8ae002
+static inline void
8ae002
+__pow_mp (int pow, mp_no *y, int p)
8ae002
+{
8ae002
+  int i, rem;
8ae002
+
8ae002
+  /* The exponent is E such that E is a factor of 2^24.  The remainder (of the
8ae002
+     form 2^x) goes entirely into the first digit of the mantissa as it is
8ae002
+     always less than 2^24.  */
8ae002
+  EY = pow / 24;
8ae002
+  rem = pow - EY * 24;
8ae002
+  EY++;
8ae002
+
8ae002
+  /* If the remainder is negative, it means that POW was negative since
8ae002
+     |EY * 24| <= |pow|.  Adjust so that REM is positive and still less than
8ae002
+     24 because of which, the mantissa digit is less than 2^24.  */
8ae002
+  if (rem < 0)
8ae002
+    {
8ae002
+      EY--;
8ae002
+      rem += 24;
8ae002
+    }
8ae002
+  /* The sign of any 2^x is always positive.  */
8ae002
+  Y[0] = 1;
8ae002
+  Y[1] = 1 << rem;
8ae002
+
8ae002
+  /* Everything else is 0.  */
8ae002
+  for (i = 2; i <= p; i++)
8ae002
+    Y[i] = 0;
8ae002
+}
8ae002
Index: glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mpexp.c
8ae002
===================================================================
8ae002
--- glibc-2.17-c758a686.orig/sysdeps/ieee754/dbl-64/mpexp.c
8ae002
+++ glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mpexp.c
8ae002
@@ -69,13 +69,13 @@ __mpexp(mp_no *x, mp_no *y, int p) {
8ae002
   for (i=0; i
8ae002
   for (   ; i>EX; i--)  a *= RADIX;
8ae002
   b = X[1]*RADIXI;   m2 = 24*EX;
8ae002
-  for (; b
8ae002
+  for (; b
8ae002
   if (b == HALF) {
8ae002
-    for (i=2; i<=p; i++) { if (X[i]!=ZERO)  break; }
8ae002
-    if (i==p+1)  { m2--;  a *= TWO; }
8ae002
+    for (i=2; i<=p; i++) { if (X[i]!=0)  break; }
8ae002
+    if (i==p+1)  { m2--;  a *= 2; }
8ae002
   }
8ae002
   if ((m=m1+m2) <= 0) {
8ae002
-    m=0;  a=ONE;
8ae002
+    m=0;  a=1;
8ae002
     for (i=n-1; i>0; i--,n--) { if (m1np[i][p]+m2>0)  break; }
8ae002
   }
8ae002
 
8ae002
@@ -84,8 +84,8 @@ __mpexp(mp_no *x, mp_no *y, int p) {
8ae002
   __mul(x,&mpt1,&mps,p);
8ae002
 
8ae002
   /* Evaluate the polynomial. Put result in mpt2 */
8ae002
-  mpone.e=1;   mpone.d[0]=ONE;   mpone.d[1]=ONE;
8ae002
-  mpk.e = 1;   mpk.d[0] = ONE;   mpk.d[1]=__mpexp_nn[n].d;
8ae002
+  mpone.e=1;   mpone.d[0]=1;   mpone.d[1]=1;
8ae002
+  mpk.e = 1;   mpk.d[0] = 1;   mpk.d[1]=__mpexp_nn[n].d;
8ae002
   __dvd(&mps,&mpk,&mpt1,p);
8ae002
   __add(&mpone,&mpt1,&mpak,p);
8ae002
   for (k=n-1; k>1; k--) {
8ae002
@@ -99,9 +99,9 @@ __mpexp(mp_no *x, mp_no *y, int p) {
8ae002
 
8ae002
   /* Raise polynomial value to the power of 2**m. Put result in y */
8ae002
   for (k=0,j=0; k
8ae002
-    __mul(&mpt2,&mpt2,&mpt1,p);  k++;
8ae002
+    __sqr (&mpt2, &mpt1, p);  k++;
8ae002
     if (k==m)  { j=1;  break; }
8ae002
-    __mul(&mpt1,&mpt1,&mpt2,p);  k++;
8ae002
+    __sqr (&mpt1, &mpt2, p);  k++;
8ae002
   }
8ae002
   if (j)  __cpy(&mpt1,y,p);
8ae002
   else    __cpy(&mpt2,y,p);
8ae002
Index: glibc-2.17-c758a686/sysdeps/x86_64/fpu/multiarch/mpa-avx.c
8ae002
===================================================================
8ae002
--- glibc-2.17-c758a686.orig/sysdeps/x86_64/fpu/multiarch/mpa-avx.c
8ae002
+++ glibc-2.17-c758a686/sysdeps/x86_64/fpu/multiarch/mpa-avx.c
8ae002
@@ -1,5 +1,6 @@
8ae002
 #define __add __add_avx
8ae002
 #define __mul __mul_avx
8ae002
+#define __sqr __sqr_avx
8ae002
 #define __sub __sub_avx
8ae002
 #define __dbl_mp __dbl_mp_avx
8ae002
 #define __dvd __dvd_avx
8ae002
Index: glibc-2.17-c758a686/sysdeps/x86_64/fpu/multiarch/mpa-fma4.c
8ae002
===================================================================
8ae002
--- glibc-2.17-c758a686.orig/sysdeps/x86_64/fpu/multiarch/mpa-fma4.c
8ae002
+++ glibc-2.17-c758a686/sysdeps/x86_64/fpu/multiarch/mpa-fma4.c
8ae002
@@ -1,5 +1,6 @@
8ae002
 #define __add __add_fma4
8ae002
 #define __mul __mul_fma4
8ae002
+#define __sqr __sqr_fma4
8ae002
 #define __sub __sub_fma4
8ae002
 #define __dbl_mp __dbl_mp_fma4
8ae002
 #define __dvd __dvd_fma4
8ae002
Index: glibc-2.17-c758a686/sysdeps/powerpc/power4/fpu/mpa.c
8ae002
===================================================================
8ae002
--- glibc-2.17-c758a686.orig/sysdeps/powerpc/power4/fpu/mpa.c
8ae002
+++ glibc-2.17-c758a686/sysdeps/powerpc/power4/fpu/mpa.c
8ae002
@@ -2,7 +2,7 @@
8ae002
 /*
8ae002
  * IBM Accurate Mathematical Library
8ae002
  * written by International Business Machines Corp.
8ae002
- * Copyright (C) 2001, 2006 Free Software Foundation
8ae002
+ * Copyright (C) 2001-2017 Free Software Foundation, Inc.
8ae002
  *
8ae002
  * This program is free software; you can redistribute it and/or modify
8ae002
  * it under the terms of the GNU Lesser General Public License as published by
8ae002
@@ -17,532 +17,198 @@
8ae002
  * You should have received a copy of the GNU Lesser General Public License
8ae002
  * along with this program; if not, see <http://www.gnu.org/licenses/>.
8ae002
  */
8ae002
-/************************************************************************/
8ae002
-/*  MODULE_NAME: mpa.c                                                  */
8ae002
-/*                                                                      */
8ae002
-/*  FUNCTIONS:                                                          */
8ae002
-/*               mcr                                                    */
8ae002
-/*               acr                                                    */
8ae002
-/*               cr                                                     */
8ae002
-/*               cpy                                                    */
8ae002
-/*               cpymn                                                  */
8ae002
-/*               norm                                                   */
8ae002
-/*               denorm                                                 */
8ae002
-/*               mp_dbl                                                 */
8ae002
-/*               dbl_mp                                                 */
8ae002
-/*               add_magnitudes                                         */
8ae002
-/*               sub_magnitudes                                         */
8ae002
-/*               add                                                    */
8ae002
-/*               sub                                                    */
8ae002
-/*               mul                                                    */
8ae002
-/*               inv                                                    */
8ae002
-/*               dvd                                                    */
8ae002
-/*                                                                      */
8ae002
-/* Arithmetic functions for multiple precision numbers.                 */
8ae002
-/* Relative errors are bounded                                          */
8ae002
-/************************************************************************/
8ae002
-
8ae002
-
8ae002
-#include "endian.h"
8ae002
-#include "mpa.h"
8ae002
-#include "mpa2.h"
8ae002
-#include <sys/param.h>	/* For MIN() */
8ae002
-/* mcr() compares the sizes of the mantissas of two multiple precision  */
8ae002
-/* numbers. Mantissas are compared regardless of the signs of the       */
8ae002
-/* numbers, even if x->d[0] or y->d[0] are zero. Exponents are also     */
8ae002
-/* disregarded.                                                         */
8ae002
-static int mcr(const mp_no *x, const mp_no *y, int p) {
8ae002
-  long i;
8ae002
-  long p2 = p;
8ae002
-  for (i=1; i<=p2; i++) {
8ae002
-    if      (X[i] == Y[i])  continue;
8ae002
-    else if (X[i] >  Y[i])  return  1;
8ae002
-    else                    return -1; }
8ae002
-  return 0;
8ae002
-}
8ae002
-
8ae002
-
8ae002
-
8ae002
-/* acr() compares the absolute values of two multiple precision numbers */
8ae002
-int __acr(const mp_no *x, const mp_no *y, int p) {
8ae002
-  long i;
8ae002
-
8ae002
-  if      (X[0] == ZERO) {
8ae002
-    if    (Y[0] == ZERO) i= 0;
8ae002
-    else                 i=-1;
8ae002
-  }
8ae002
-  else if (Y[0] == ZERO) i= 1;
8ae002
-  else {
8ae002
-    if      (EX >  EY)   i= 1;
8ae002
-    else if (EX <  EY)   i=-1;
8ae002
-    else                 i= mcr(x,y,p);
8ae002
-  }
8ae002
-
8ae002
-  return i;
8ae002
-}
8ae002
-
8ae002
-
8ae002
-/* cr90 compares the values of two multiple precision numbers           */
8ae002
-int  __cr(const mp_no *x, const mp_no *y, int p) {
8ae002
-  int i;
8ae002
-
8ae002
-  if      (X[0] > Y[0])  i= 1;
8ae002
-  else if (X[0] < Y[0])  i=-1;
8ae002
-  else if (X[0] < ZERO ) i= __acr(y,x,p);
8ae002
-  else                   i= __acr(x,y,p);
8ae002
-
8ae002
-  return i;
8ae002
-}
8ae002
-
8ae002
-
8ae002
-/* Copy a multiple precision number. Set *y=*x. x=y is permissible.      */
8ae002
-void __cpy(const mp_no *x, mp_no *y, int p) {
8ae002
-  long i;
8ae002
-
8ae002
-  EY = EX;
8ae002
-  for (i=0; i <= p; i++)    Y[i] = X[i];
8ae002
-
8ae002
-  return;
8ae002
-}
8ae002
-
8ae002
-
8ae002
-/* Copy a multiple precision number x of precision m into a */
8ae002
-/* multiple precision number y of precision n. In case n>m, */
8ae002
-/* the digits of y beyond the m'th are set to zero. In case */
8ae002
-/* n
8ae002
-/* x=y is permissible.                                      */
8ae002
-
8ae002
-void __cpymn(const mp_no *x, int m, mp_no *y, int n) {
8ae002
-
8ae002
-  long i,k;
8ae002
-  long n2 = n;
8ae002
-  long m2 = m;
8ae002
-
8ae002
-  EY = EX;     k=MIN(m2,n2);
8ae002
-  for (i=0; i <= k; i++)    Y[i] = X[i];
8ae002
-  for (   ; i <= n2; i++)    Y[i] = ZERO;
8ae002
-
8ae002
-  return;
8ae002
-}
8ae002
-
8ae002
-/* Convert a multiple precision number *x into a double precision */
8ae002
-/* number *y, normalized case  (|x| >= 2**(-1022))) */
8ae002
-static void norm(const mp_no *x, double *y, int p)
8ae002
-{
8ae002
-  #define R  radixi.d
8ae002
-  long i;
8ae002
-#if 0
8ae002
-  int k;
8ae002
-#endif
8ae002
-  double a,c,u,v,z[5];
8ae002
-  if (p<5) {
8ae002
-    if      (p==1) c = X[1];
8ae002
-    else if (p==2) c = X[1] + R* X[2];
8ae002
-    else if (p==3) c = X[1] + R*(X[2]  +   R* X[3]);
8ae002
-    else if (p==4) c =(X[1] + R* X[2]) + R*R*(X[3] + R*X[4]);
8ae002
-  }
8ae002
-  else {
8ae002
-    for (a=ONE, z[1]=X[1]; z[1] < TWO23; )
8ae002
-        {a *= TWO;   z[1] *= TWO; }
8ae002
-
8ae002
-    for (i=2; i<5; i++) {
8ae002
-      z[i] = X[i]*a;
8ae002
-      u = (z[i] + CUTTER)-CUTTER;
8ae002
-      if  (u > z[i])  u -= RADIX;
8ae002
-      z[i] -= u;
8ae002
-      z[i-1] += u*RADIXI;
8ae002
-    }
8ae002
-
8ae002
-    u = (z[3] + TWO71) - TWO71;
8ae002
-    if (u > z[3])   u -= TWO19;
8ae002
-    v = z[3]-u;
8ae002
-
8ae002
-    if (v == TWO18) {
8ae002
-      if (z[4] == ZERO) {
8ae002
-        for (i=5; i <= p; i++) {
8ae002
-          if (X[i] == ZERO)   continue;
8ae002
-          else                {z[3] += ONE;   break; }
8ae002
-        }
8ae002
-      }
8ae002
-      else              z[3] += ONE;
8ae002
-    }
8ae002
-
8ae002
-    c = (z[1] + R *(z[2] + R * z[3]))/a;
8ae002
-  }
8ae002
 
8ae002
-  c *= X[0];
8ae002
-
8ae002
-  for (i=1; i
8ae002
-  for (i=1; i>EX; i--)   c *= RADIXI;
8ae002
-
8ae002
-  *y = c;
8ae002
-  return;
8ae002
-#undef R
8ae002
-}
8ae002
-
8ae002
-/* Convert a multiple precision number *x into a double precision */
8ae002
-/* number *y, denormalized case  (|x| < 2**(-1022))) */
8ae002
-static void denorm(const mp_no *x, double *y, int p)
8ae002
+/* Define __mul and __sqr and use the rest from generic code.  */
8ae002
+#define NO__MUL
8ae002
+#define NO__SQR
8ae002
+
8ae002
+#include <sysdeps/ieee754/dbl-64/mpa.c>
8ae002
+
8ae002
+/* Multiply *X and *Y and store result in *Z.  X and Y may overlap but not X
8ae002
+   and Z or Y and Z.  For P in [1, 2, 3], the exact result is truncated to P
8ae002
+   digits.  In case P > 3 the error is bounded by 1.001 ULP.  */
8ae002
+void
8ae002
+__mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
8ae002
 {
8ae002
-  long i,k;
8ae002
+  long i, i1, i2, j, k, k2;
8ae002
   long p2 = p;
8ae002
-  double c,u,z[5];
8ae002
-#if 0
8ae002
-  double a,v;
8ae002
-#endif
8ae002
+  double u, zk, zk2;
8ae002
 
8ae002
-#define R  radixi.d
8ae002
-  if (EX<-44 || (EX==-44 && X[1]
8ae002
-     { *y=ZERO; return; }
8ae002
-
8ae002
-  if      (p2==1) {
8ae002
-    if      (EX==-42) {z[1]=X[1]+TWO10;  z[2]=ZERO;  z[3]=ZERO;  k=3;}
8ae002
-    else if (EX==-43) {z[1]=     TWO10;  z[2]=X[1];  z[3]=ZERO;  k=2;}
8ae002
-    else              {z[1]=     TWO10;  z[2]=ZERO;  z[3]=X[1];  k=1;}
8ae002
-  }
8ae002
-  else if (p2==2) {
8ae002
-    if      (EX==-42) {z[1]=X[1]+TWO10;  z[2]=X[2];  z[3]=ZERO;  k=3;}
8ae002
-    else if (EX==-43) {z[1]=     TWO10;  z[2]=X[1];  z[3]=X[2];  k=2;}
8ae002
-    else              {z[1]=     TWO10;  z[2]=ZERO;  z[3]=X[1];  k=1;}
8ae002
-  }
8ae002
-  else {
8ae002
-    if      (EX==-42) {z[1]=X[1]+TWO10;  z[2]=X[2];  k=3;}
8ae002
-    else if (EX==-43) {z[1]=     TWO10;  z[2]=X[1];  k=2;}
8ae002
-    else              {z[1]=     TWO10;  z[2]=ZERO;  k=1;}
8ae002
-    z[3] = X[k];
8ae002
-  }
8ae002
-
8ae002
-  u = (z[3] + TWO57) - TWO57;
8ae002
-  if  (u > z[3])   u -= TWO5;
8ae002
-
8ae002
-  if (u==z[3]) {
8ae002
-    for (i=k+1; i <= p2; i++) {
8ae002
-      if (X[i] == ZERO)   continue;
8ae002
-      else {z[3] += ONE;   break; }
8ae002
+  /* Is z=0?  */
8ae002
+  if (__glibc_unlikely (X[0] * Y[0] == 0))
8ae002
+    {
8ae002
+      Z[0] = 0;
8ae002
+      return;
8ae002
     }
8ae002
-  }
8ae002
-
8ae002
-  c = X[0]*((z[1] + R*(z[2] + R*z[3])) - TWO10);
8ae002
 
8ae002
-  *y = c*TWOM1032;
8ae002
-  return;
8ae002
-
8ae002
-#undef R
8ae002
-}
8ae002
-
8ae002
-/* Convert a multiple precision number *x into a double precision number *y. */
8ae002
-/* The result is correctly rounded to the nearest/even. *x is left unchanged */
8ae002
-
8ae002
-void __mp_dbl(const mp_no *x, double *y, int p) {
8ae002
-#if 0
8ae002
-  int i,k;
8ae002
-  double a,c,u,v,z[5];
8ae002
+  /* Multiply, add and carry */
8ae002
+  k2 = (p2 < 3) ? p2 + p2 : p2 + 3;
8ae002
+  zk = Z[k2] = 0;
8ae002
+  for (k = k2; k > 1;)
8ae002
+    {
8ae002
+      if (k > p2)
8ae002
+	{
8ae002
+	  i1 = k - p2;
8ae002
+	  i2 = p2 + 1;
8ae002
+	}
8ae002
+      else
8ae002
+	{
8ae002
+	  i1 = 1;
8ae002
+	  i2 = k;
8ae002
+	}
8ae002
+#if 1
8ae002
+      /* Rearrange this inner loop to allow the fmadd instructions to be
8ae002
+         independent and execute in parallel on processors that have
8ae002
+         dual symmetrical FP pipelines.  */
8ae002
+      if (i1 < (i2 - 1))
8ae002
+	{
8ae002
+	  /* Make sure we have at least 2 iterations.  */
8ae002
+	  if (((i2 - i1) & 1L) == 1L)
8ae002
+	    {
8ae002
+	      /* Handle the odd iterations case.  */
8ae002
+	      zk2 = x->d[i2 - 1] * y->d[i1];
8ae002
+	    }
8ae002
+	  else
8ae002
+	    zk2 = 0.0;
8ae002
+	  /* Do two multiply/adds per loop iteration, using independent
8ae002
+	     accumulators; zk and zk2.  */
8ae002
+	  for (i = i1, j = i2 - 1; i < i2 - 1; i += 2, j -= 2)
8ae002
+	    {
8ae002
+	      zk += x->d[i] * y->d[j];
8ae002
+	      zk2 += x->d[i + 1] * y->d[j - 1];
8ae002
+	    }
8ae002
+	  zk += zk2;		/* Final sum.  */
8ae002
+	}
8ae002
+      else
8ae002
+	{
8ae002
+	  /* Special case when iterations is 1.  */
8ae002
+	  zk += x->d[i1] * y->d[i1];
8ae002
+	}
8ae002
+#else
8ae002
+      /* The original code.  */
8ae002
+      for (i = i1, j = i2 - 1; i < i2; i++, j--)
8ae002
+	zk += X[i] * Y[j];
8ae002
 #endif
8ae002
 
8ae002
-  if (X[0] == ZERO)  {*y = ZERO;  return; }
8ae002
-
8ae002
-  if      (EX> -42)                 norm(x,y,p);
8ae002
-  else if (EX==-42 && X[1]>=TWO10)  norm(x,y,p);
8ae002
-  else                              denorm(x,y,p);
8ae002
-}
8ae002
-
8ae002
-
8ae002
-/* dbl_mp() converts a double precision number x into a multiple precision  */
8ae002
-/* number *y. If the precision p is too small the result is truncated. x is */
8ae002
-/* left unchanged.                                                          */
8ae002
-
8ae002
-void __dbl_mp(double x, mp_no *y, int p) {
8ae002
+      u = (zk + CUTTER) - CUTTER;
8ae002
+      if (u > zk)
8ae002
+	u -= RADIX;
8ae002
+      Z[k] = zk - u;
8ae002
+      zk = u * RADIXI;
8ae002
+      --k;
8ae002
+    }
8ae002
+  Z[k] = zk;
8ae002
 
8ae002
-  long i,n;
8ae002
-  long p2 = p;
8ae002
-  double u;
8ae002
+  int e = EX + EY;
8ae002
+  /* Is there a carry beyond the most significant digit?  */
8ae002
+  if (Z[1] == 0)
8ae002
+    {
8ae002
+      for (i = 1; i <= p2; i++)
8ae002
+	Z[i] = Z[i + 1];
8ae002
+      e--;
8ae002
+    }
8ae002
 
8ae002
-  /* Sign */
8ae002
-  if      (x == ZERO)  {Y[0] = ZERO;  return; }
8ae002
-  else if (x >  ZERO)   Y[0] = ONE;
8ae002
-  else                 {Y[0] = MONE;  x=-x;   }
8ae002
-
8ae002
-  /* Exponent */
8ae002
-  for (EY=ONE; x >= RADIX; EY += ONE)   x *= RADIXI;
8ae002
-  for (      ; x <  ONE;   EY -= ONE)   x *= RADIX;
8ae002
-
8ae002
-  /* Digits */
8ae002
-  n=MIN(p2,4);
8ae002
-  for (i=1; i<=n; i++) {
8ae002
-    u = (x + TWO52) - TWO52;
8ae002
-    if (u>x)   u -= ONE;
8ae002
-    Y[i] = u;     x -= u;    x *= RADIX; }
8ae002
-  for (   ; i<=p2; i++)     Y[i] = ZERO;
8ae002
-  return;
8ae002
+  EZ = e;
8ae002
+  Z[0] = X[0] * Y[0];
8ae002
 }
8ae002
 
8ae002
+/* Square *X and store result in *Y.  X and Y may not overlap.  For P in
8ae002
+   [1, 2, 3], the exact result is truncated to P digits.  In case P > 3 the
8ae002
+   error is bounded by 1.001 ULP.  This is a faster special case of
8ae002
+   multiplication.  */
8ae002
+void
8ae002
+__sqr (const mp_no *x, mp_no *y, int p)
8ae002
+{
8ae002
+  long i, j, k, ip;
8ae002
+  double u, yk;
8ae002
 
8ae002
-/*  add_magnitudes() adds the magnitudes of *x & *y assuming that           */
8ae002
-/*  abs(*x) >= abs(*y) > 0.                                                 */
8ae002
-/* The sign of the sum *z is undefined. x&y may overlap but not x&z or y&z. */
8ae002
-/* No guard digit is used. The result equals the exact sum, truncated.      */
8ae002
-/* *x & *y are left unchanged.                                              */
8ae002
+  /* Is z=0?  */
8ae002
+  if (__glibc_unlikely (X[0] == 0))
8ae002
+    {
8ae002
+      Y[0] = 0;
8ae002
+      return;
8ae002
+    }
8ae002
 
8ae002
-static void add_magnitudes(const mp_no *x, const mp_no *y, mp_no *z, int p) {
8ae002
+  /* We need not iterate through all X's since it's pointless to
8ae002
+     multiply zeroes.  */
8ae002
+  for (ip = p; ip > 0; ip--)
8ae002
+    if (X[ip] != 0)
8ae002
+      break;
8ae002
 
8ae002
-  long i,j,k;
8ae002
-  long p2 = p;
8ae002
+  k = (__glibc_unlikely (p < 3)) ? p + p : p + 3;
8ae002
 
8ae002
-  EZ = EX;
8ae002
+  while (k > 2 * ip + 1)
8ae002
+    Y[k--] = 0;
8ae002
 
8ae002
-  i=p2;    j=p2+ EY - EX;    k=p2+1;
8ae002
+  yk = 0;
8ae002
 
8ae002
-  if (j<1)
8ae002
-     {__cpy(x,z,p);  return; }
8ae002
-  else   Z[k] = ZERO;
8ae002
-
8ae002
-  for (; j>0; i--,j--) {
8ae002
-    Z[k] += X[i] + Y[j];
8ae002
-    if (Z[k] >= RADIX) {
8ae002
-      Z[k]  -= RADIX;
8ae002
-      Z[--k] = ONE; }
8ae002
-    else
8ae002
-      Z[--k] = ZERO;
8ae002
-  }
8ae002
-
8ae002
-  for (; i>0; i--) {
8ae002
-    Z[k] += X[i];
8ae002
-    if (Z[k] >= RADIX) {
8ae002
-      Z[k]  -= RADIX;
8ae002
-      Z[--k] = ONE; }
8ae002
-    else
8ae002
-      Z[--k] = ZERO;
8ae002
-  }
8ae002
-
8ae002
-  if (Z[1] == ZERO) {
8ae002
-    for (i=1; i<=p2; i++)    Z[i] = Z[i+1]; }
8ae002
-  else   EZ += ONE;
8ae002
-}
8ae002
+  while (k > p)
8ae002
+    {
8ae002
+      double yk2 = 0.0;
8ae002
+      long lim = k / 2;
8ae002
 
8ae002
+      if (k % 2 == 0)
8ae002
+        {
8ae002
+	  yk += X[lim] * X[lim];
8ae002
+	  lim--;
8ae002
+	}
8ae002
 
8ae002
-/*  sub_magnitudes() subtracts the magnitudes of *x & *y assuming that      */
8ae002
-/*  abs(*x) > abs(*y) > 0.                                                  */
8ae002
-/* The sign of the difference *z is undefined. x&y may overlap but not x&z  */
8ae002
-/* or y&z. One guard digit is used. The error is less than one ulp.         */
8ae002
-/* *x & *y are left unchanged.                                              */
8ae002
+      /* In __mul, this loop (and the one within the next while loop) run
8ae002
+         between a range to calculate the mantissa as follows:
8ae002
 
8ae002
-static void sub_magnitudes(const mp_no *x, const mp_no *y, mp_no *z, int p) {
8ae002
+         Z[k] = X[k] * Y[n] + X[k+1] * Y[n-1] ... + X[n-1] * Y[k+1]
8ae002
+		+ X[n] * Y[k]
8ae002
 
8ae002
-  long i,j,k;
8ae002
-  long p2 = p;
8ae002
+         For X == Y, we can get away with summing halfway and doubling the
8ae002
+	 result.  For cases where the range size is even, the mid-point needs
8ae002
+	 to be added separately (above).  */
8ae002
+      for (i = k - p, j = p; i <= lim; i++, j--)
8ae002
+	yk2 += X[i] * X[j];
8ae002
 
8ae002
-  EZ = EX;
8ae002
+      yk += 2.0 * yk2;
8ae002
 
8ae002
-  if (EX == EY) {
8ae002
-    i=j=k=p2;
8ae002
-    Z[k] = Z[k+1] = ZERO; }
8ae002
-  else {
8ae002
-    j= EX - EY;
8ae002
-    if (j > p2)  {__cpy(x,z,p);  return; }
8ae002
-    else {
8ae002
-      i=p2;   j=p2+1-j;   k=p2;
8ae002
-      if (Y[j] > ZERO) {
8ae002
-        Z[k+1] = RADIX - Y[j--];
8ae002
-        Z[k]   = MONE; }
8ae002
-      else {
8ae002
-        Z[k+1] = ZERO;
8ae002
-        Z[k]   = ZERO;   j--;}
8ae002
+      u = (yk + CUTTER) - CUTTER;
8ae002
+      if (u > yk)
8ae002
+	u -= RADIX;
8ae002
+      Y[k--] = yk - u;
8ae002
+      yk = u * RADIXI;
8ae002
     }
8ae002
-  }
8ae002
-
8ae002
-  for (; j>0; i--,j--) {
8ae002
-    Z[k] += (X[i] - Y[j]);
8ae002
-    if (Z[k] < ZERO) {
8ae002
-      Z[k]  += RADIX;
8ae002
-      Z[--k] = MONE; }
8ae002
-    else
8ae002
-      Z[--k] = ZERO;
8ae002
-  }
8ae002
-
8ae002
-  for (; i>0; i--) {
8ae002
-    Z[k] += X[i];
8ae002
-    if (Z[k] < ZERO) {
8ae002
-      Z[k]  += RADIX;
8ae002
-      Z[--k] = MONE; }
8ae002
-    else
8ae002
-      Z[--k] = ZERO;
8ae002
-  }
8ae002
-
8ae002
-  for (i=1; Z[i] == ZERO; i++) ;
8ae002
-  EZ = EZ - i + 1;
8ae002
-  for (k=1; i <= p2+1; )
8ae002
-    Z[k++] = Z[i++];
8ae002
-  for (; k <= p2; )
8ae002
-    Z[k++] = ZERO;
8ae002
-
8ae002
-  return;
8ae002
-}
8ae002
-
8ae002
-
8ae002
-/* Add two multiple precision numbers. Set *z = *x + *y. x&y may overlap  */
8ae002
-/* but not x&z or y&z. One guard digit is used. The error is less than    */
8ae002
-/* one ulp. *x & *y are left unchanged.                                   */
8ae002
-
8ae002
-void __add(const mp_no *x, const mp_no *y, mp_no *z, int p) {
8ae002
-
8ae002
-  int n;
8ae002
-
8ae002
-  if      (X[0] == ZERO)     {__cpy(y,z,p);  return; }
8ae002
-  else if (Y[0] == ZERO)     {__cpy(x,z,p);  return; }
8ae002
-
8ae002
-  if (X[0] == Y[0])   {
8ae002
-    if (__acr(x,y,p) > 0)      {add_magnitudes(x,y,z,p);  Z[0] = X[0]; }
8ae002
-    else                     {add_magnitudes(y,x,z,p);  Z[0] = Y[0]; }
8ae002
-  }
8ae002
-  else                       {
8ae002
-    if ((n=__acr(x,y,p)) == 1) {sub_magnitudes(x,y,z,p);  Z[0] = X[0]; }
8ae002
-    else if (n == -1)        {sub_magnitudes(y,x,z,p);  Z[0] = Y[0]; }
8ae002
-    else                      Z[0] = ZERO;
8ae002
-  }
8ae002
-  return;
8ae002
-}
8ae002
-
8ae002
-
8ae002
-/* Subtract two multiple precision numbers. *z is set to *x - *y. x&y may */
8ae002
-/* overlap but not x&z or y&z. One guard digit is used. The error is      */
8ae002
-/* less than one ulp. *x & *y are left unchanged.                         */
8ae002
-
8ae002
-void __sub(const mp_no *x, const mp_no *y, mp_no *z, int p) {
8ae002
-
8ae002
-  int n;
8ae002
-
8ae002
-  if      (X[0] == ZERO)     {__cpy(y,z,p);  Z[0] = -Z[0];  return; }
8ae002
-  else if (Y[0] == ZERO)     {__cpy(x,z,p);                 return; }
8ae002
-
8ae002
-  if (X[0] != Y[0])    {
8ae002
-    if (__acr(x,y,p) > 0)      {add_magnitudes(x,y,z,p);  Z[0] =  X[0]; }
8ae002
-    else                     {add_magnitudes(y,x,z,p);  Z[0] = -Y[0]; }
8ae002
-  }
8ae002
-  else                       {
8ae002
-    if ((n=__acr(x,y,p)) == 1) {sub_magnitudes(x,y,z,p);  Z[0] =  X[0]; }
8ae002
-    else if (n == -1)        {sub_magnitudes(y,x,z,p);  Z[0] = -Y[0]; }
8ae002
-    else                      Z[0] = ZERO;
8ae002
-  }
8ae002
-  return;
8ae002
-}
8ae002
-
8ae002
-
8ae002
-/* Multiply two multiple precision numbers. *z is set to *x * *y. x&y      */
8ae002
-/* may overlap but not x&z or y&z. In case p=1,2,3 the exact result is     */
8ae002
-/* truncated to p digits. In case p>3 the error is bounded by 1.001 ulp.   */
8ae002
-/* *x & *y are left unchanged.                                             */
8ae002
 
8ae002
-void __mul(const mp_no *x, const mp_no *y, mp_no *z, int p) {
8ae002
-
8ae002
-  long i, i1, i2, j, k, k2;
8ae002
-  long p2 = p;
8ae002
-  double u, zk, zk2;
8ae002
-
8ae002
-                      /* Is z=0? */
8ae002
-  if (X[0]*Y[0]==ZERO)
8ae002
-     { Z[0]=ZERO;  return; }
8ae002
-
8ae002
-                       /* Multiply, add and carry */
8ae002
-  k2 = (p2<3) ? p2+p2 : p2+3;
8ae002
-  zk = Z[k2]=ZERO;
8ae002
-  for (k=k2; k>1; ) {
8ae002
-    if (k > p2)  {i1=k-p2; i2=p2+1; }
8ae002
-    else        {i1=1;   i2=k;   }
8ae002
-#if 1
8ae002
-    /* rearange this inner loop to allow the fmadd instructions to be
8ae002
-       independent and execute in parallel on processors that have
8ae002
-       dual symetrical FP pipelines.  */
8ae002
-    if (i1 < (i2-1))
8ae002
+  while (k > 1)
8ae002
     {
8ae002
-	/* make sure we have at least 2 iterations */
8ae002
-	if (((i2 - i1) & 1L) == 1L)
8ae002
-	{
8ae002
-                /* Handle the odd iterations case.  */
8ae002
-		zk2 = x->d[i2-1]*y->d[i1];
8ae002
-	}
8ae002
-	else
8ae002
-		zk2 = zero.d;
8ae002
-	/* Do two multiply/adds per loop iteration, using independent
8ae002
-           accumulators; zk and zk2.  */
8ae002
-	for (i=i1,j=i2-1; i
8ae002
-	{
8ae002
-		zk += x->d[i]*y->d[j];
8ae002
-		zk2 += x->d[i+1]*y->d[j-1];
8ae002
+      double yk2 = 0.0;
8ae002
+      long lim = k / 2;
8ae002
+
8ae002
+      if (k % 2 == 0)
8ae002
+        {
8ae002
+	  yk += X[lim] * X[lim];
8ae002
+	  lim--;
8ae002
 	}
8ae002
-	zk += zk2; /* final sum.  */
8ae002
-    }
8ae002
-    else
8ae002
+
8ae002
+      /* Likewise for this loop.  */
8ae002
+      for (i = 1, j = k - 1; i <= lim; i++, j--)
8ae002
+	yk2 += X[i] * X[j];
8ae002
+
8ae002
+      yk += 2.0 * yk2;
8ae002
+
8ae002
+      u = (yk + CUTTER) - CUTTER;
8ae002
+      if (u > yk)
8ae002
+	u -= RADIX;
8ae002
+      Y[k--] = yk - u;
8ae002
+      yk = u * RADIXI;
8ae002
+    }
8ae002
+  Y[k] = yk;
8ae002
+
8ae002
+  /* Squares are always positive.  */
8ae002
+  Y[0] = 1.0;
8ae002
+
8ae002
+  int e = EX * 2;
8ae002
+  /* Is there a carry beyond the most significant digit?  */
8ae002
+  if (__glibc_unlikely (Y[1] == 0))
8ae002
     {
8ae002
-        /* Special case when iterations is 1.  */
8ae002
-	zk += x->d[i1]*y->d[i1];
8ae002
+      for (i = 1; i <= p; i++)
8ae002
+	Y[i] = Y[i + 1];
8ae002
+      e--;
8ae002
     }
8ae002
-#else
8ae002
-    /* The orginal code.  */
8ae002
-    for (i=i1,j=i2-1; i
8ae002
-#endif
8ae002
-
8ae002
-    u = (zk + CUTTER)-CUTTER;
8ae002
-    if  (u > zk)  u -= RADIX;
8ae002
-    Z[k]  = zk - u;
8ae002
-    zk = u*RADIXI;
8ae002
-    --k;
8ae002
-  }
8ae002
-  Z[k] = zk;
8ae002
-
8ae002
-                 /* Is there a carry beyond the most significant digit? */
8ae002
-  if (Z[1] == ZERO) {
8ae002
-    for (i=1; i<=p2; i++)  Z[i]=Z[i+1];
8ae002
-    EZ = EX + EY - 1; }
8ae002
-  else
8ae002
-    EZ = EX + EY;
8ae002
-
8ae002
-  Z[0] = X[0] * Y[0];
8ae002
-  return;
8ae002
-}
8ae002
-
8ae002
-
8ae002
-/* Invert a multiple precision number. Set *y = 1 / *x.                     */
8ae002
-/* Relative error bound = 1.001*r**(1-p) for p=2, 1.063*r**(1-p) for p=3,   */
8ae002
-/* 2.001*r**(1-p) for p>3.                                                  */
8ae002
-/* *x=0 is not permissible. *x is left unchanged.                           */
8ae002
-
8ae002
-void __inv(const mp_no *x, mp_no *y, int p) {
8ae002
-  long i;
8ae002
-#if 0
8ae002
-  int l;
8ae002
-#endif
8ae002
-  double t;
8ae002
-  mp_no z,w;
8ae002
-  static const int np1[] = {0,0,0,0,1,2,2,2,2,3,3,3,3,3,3,3,3,3,
8ae002
-                            4,4,4,4,4,4,4,4,4,4,4,4,4,4,4};
8ae002
-  const mp_no mptwo = {1,{1.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
8ae002
-                         0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
8ae002
-                         0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
8ae002
-                         0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0}};
8ae002
-
8ae002
-  __cpy(x,&z,p);  z.e=0;  __mp_dbl(&z,&t,p);
8ae002
-  t=ONE/t;   __dbl_mp(t,y,p);    EY -= EX;
8ae002
-
8ae002
-  for (i=0; i
8ae002
-    __cpy(y,&w,p);
8ae002
-    __mul(x,&w,y,p);
8ae002
-    __sub(&mptwo,y,&z,p);
8ae002
-    __mul(&w,&z,y,p);
8ae002
-  }
8ae002
-  return;
8ae002
-}
8ae002
-
8ae002
-
8ae002
-/* Divide one multiple precision number by another.Set *z = *x / *y. *x & *y */
8ae002
-/* are left unchanged. x&y may overlap but not x&z or y&z.                   */
8ae002
-/* Relative error bound = 2.001*r**(1-p) for p=2, 2.063*r**(1-p) for p=3     */
8ae002
-/* and 3.001*r**(1-p) for p>3. *y=0 is not permissible.                      */
8ae002
-
8ae002
-void __dvd(const mp_no *x, const mp_no *y, mp_no *z, int p) {
8ae002
-
8ae002
-  mp_no w;
8ae002
-
8ae002
-  if (X[0] == ZERO)    Z[0] = ZERO;
8ae002
-  else                {__inv(y,&w,p);   __mul(x,&w,z,p);}
8ae002
-  return;
8ae002
+  EY = e;
8ae002
 }
8ae002
Index: glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/atnat.h
8ae002
===================================================================
8ae002
--- glibc-2.17-c758a686.orig/sysdeps/ieee754/dbl-64/atnat.h
8ae002
+++ glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/atnat.h
8ae002
@@ -138,8 +138,6 @@
8ae002
 #endif
8ae002
 #endif
8ae002
 
8ae002
-#define  ZERO      zero.d
8ae002
-#define  ONE       one.d
8ae002
 #define  A         a.d
8ae002
 #define  B         b.d
8ae002
 #define  C         c.d
8ae002
@@ -160,7 +158,5 @@
8ae002
 #define  U6        u6.d
8ae002
 #define  U7        u7.d
8ae002
 #define  U8        u8.d
8ae002
-#define  TWO8      two8.d
8ae002
-#define  TWO52     two52.d
8ae002
 
8ae002
 #endif
8ae002
Index: glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/atnat2.h
8ae002
===================================================================
8ae002
--- glibc-2.17-c758a686.orig/sysdeps/ieee754/dbl-64/atnat2.h
8ae002
+++ glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/atnat2.h
8ae002
@@ -174,11 +174,4 @@
8ae002
 #endif
8ae002
 #endif
8ae002
 
8ae002
-#define  ZERO      zero.d
8ae002
-#define  MZERO     mzero.d
8ae002
-#define  ONE       one.d
8ae002
-#define  TWO8      two8.d
8ae002
-#define  TWO52     two52.d
8ae002
-#define  TWOM1022  twom1022.d
8ae002
-
8ae002
 #endif
8ae002
Index: glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mpa2.h
8ae002
===================================================================
8ae002
--- glibc-2.17-c758a686.orig/sysdeps/ieee754/dbl-64/mpa2.h
8ae002
+++ /dev/null
8ae002
@@ -1,94 +0,0 @@
8ae002
-
8ae002
-/*
8ae002
- * IBM Accurate Mathematical Library
8ae002
- * Written by International Business Machines Corp.
8ae002
- * Copyright (C) 2001 Free Software Foundation, Inc.
8ae002
- *
8ae002
- * This program is free software; you can redistribute it and/or modify
8ae002
- * it under the terms of the GNU Lesser General Public License as published by
8ae002
- * the Free Software Foundation; either version 2.1 of the License, or
8ae002
- * (at your option) any later version.
8ae002
- *
8ae002
- * This program is distributed in the hope that it will be useful,
8ae002
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
8ae002
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
8ae002
- * GNU Lesser General Public License for more details.
8ae002
- *
8ae002
- * You should have received a copy of the GNU Lesser General Public License
8ae002
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
8ae002
- */
8ae002
-
8ae002
-/**************************************************************************/
8ae002
-/*                                                                        */
8ae002
-/* MODULE_NAME:mpa2.h                                                     */
8ae002
-/*                                                                        */
8ae002
-/*                                                                        */
8ae002
-/*   variables prototype and definition   according to type of processor  */
8ae002
-/*   types definition                                                     */
8ae002
-/**************************************************************************/
8ae002
-
8ae002
-#ifndef MPA2_H
8ae002
-#define MPA2_H
8ae002
-
8ae002
-
8ae002
-#ifdef BIG_ENDI
8ae002
-static const number
8ae002
-/**/ radix          = {{0x41700000, 0x00000000} }, /* 2**24  */
8ae002
-/**/ radixi         = {{0x3e700000, 0x00000000} }, /* 2**-24 */
8ae002
-/**/ cutter         = {{0x44b00000, 0x00000000} }, /* 2**76  */
8ae002
-/**/ zero           = {{0x00000000, 0x00000000} }, /*  0     */
8ae002
-/**/ one            = {{0x3ff00000, 0x00000000} }, /*  1     */
8ae002
-/**/ mone           = {{0xbff00000, 0x00000000} }, /* -1     */
8ae002
-/**/ two            = {{0x40000000, 0x00000000} }, /*  2     */
8ae002
-/**/ two5           = {{0x40400000, 0x00000000} }, /* 2**5   */
8ae002
-/**/ two10          = {{0x40900000, 0x00000000} }, /* 2**10  */
8ae002
-/**/ two18          = {{0x41100000, 0x00000000} }, /* 2**18  */
8ae002
-/**/ two19          = {{0x41200000, 0x00000000} }, /* 2**19  */
8ae002
-/**/ two23          = {{0x41600000, 0x00000000} }, /* 2**23  */
8ae002
-/**/ two52          = {{0x43300000, 0x00000000} }, /* 2**52  */
8ae002
-/**/ two57          = {{0x43800000, 0x00000000} }, /* 2**57  */
8ae002
-/**/ two71          = {{0x44600000, 0x00000000} }, /* 2**71  */
8ae002
-/**/ twom1032       = {{0x00000400, 0x00000000} }; /* 2**-1032 */
8ae002
-
8ae002
-#else
8ae002
-#ifdef LITTLE_ENDI
8ae002
-static const number
8ae002
-/**/ radix          = {{0x00000000, 0x41700000} }, /* 2**24  */
8ae002
-/**/ radixi         = {{0x00000000, 0x3e700000} }, /* 2**-24 */
8ae002
-/**/ cutter         = {{0x00000000, 0x44b00000} }, /* 2**76  */
8ae002
-/**/ zero           = {{0x00000000, 0x00000000} }, /*  0     */
8ae002
-/**/ one            = {{0x00000000, 0x3ff00000} }, /*  1     */
8ae002
-/**/ mone           = {{0x00000000, 0xbff00000} }, /* -1     */
8ae002
-/**/ two            = {{0x00000000, 0x40000000} }, /*  2     */
8ae002
-/**/ two5           = {{0x00000000, 0x40400000} }, /* 2**5   */
8ae002
-/**/ two10          = {{0x00000000, 0x40900000} }, /* 2**10  */
8ae002
-/**/ two18          = {{0x00000000, 0x41100000} }, /* 2**18  */
8ae002
-/**/ two19          = {{0x00000000, 0x41200000} }, /* 2**19  */
8ae002
-/**/ two23          = {{0x00000000, 0x41600000} }, /* 2**23  */
8ae002
-/**/ two52          = {{0x00000000, 0x43300000} }, /* 2**52  */
8ae002
-/**/ two57          = {{0x00000000, 0x43800000} }, /* 2**57  */
8ae002
-/**/ two71          = {{0x00000000, 0x44600000} }, /* 2**71  */
8ae002
-/**/ twom1032       = {{0x00000000, 0x00000400} }; /* 2**-1032 */
8ae002
-
8ae002
-#endif
8ae002
-#endif
8ae002
-
8ae002
-#define  RADIX     radix.d
8ae002
-#define  RADIXI    radixi.d
8ae002
-#define  CUTTER    cutter.d
8ae002
-#define  ZERO      zero.d
8ae002
-#define  ONE       one.d
8ae002
-#define  MONE      mone.d
8ae002
-#define  TWO       two.d
8ae002
-#define  TWO5      two5.d
8ae002
-#define  TWO10     two10.d
8ae002
-#define  TWO18     two18.d
8ae002
-#define  TWO19     two19.d
8ae002
-#define  TWO23     two23.d
8ae002
-#define  TWO52     two52.d
8ae002
-#define  TWO57     two57.d
8ae002
-#define  TWO71     two71.d
8ae002
-#define  TWOM1032  twom1032.d
8ae002
-
8ae002
-
8ae002
-#endif
8ae002
Index: glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mpatan.h
8ae002
===================================================================
8ae002
--- glibc-2.17-c758a686.orig/sysdeps/ieee754/dbl-64/mpatan.h
8ae002
+++ glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mpatan.h
8ae002
@@ -177,6 +177,3 @@ __atan_twonm1[33] = {
8ae002
 
8ae002
 #endif
8ae002
 #endif
8ae002
-
8ae002
-#define  ONE       __atan_one.d
8ae002
-#define  TWO       __atan_two.d
8ae002
Index: glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mpatan2.c
8ae002
===================================================================
8ae002
--- glibc-2.17-c758a686.orig/sysdeps/ieee754/dbl-64/mpatan2.c
8ae002
+++ glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mpatan2.c
8ae002
@@ -49,18 +49,16 @@ void
8ae002
 SECTION
8ae002
 __mpatan2(mp_no *y, mp_no *x, mp_no *z, int p) {
8ae002
 
8ae002
-  static const double ZERO = 0.0, ONE = 1.0;
8ae002
-
8ae002
   mp_no mpone = {0,{0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
8ae002
 		    0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
8ae002
 		    0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0}};
8ae002
   mp_no mpt1,mpt2,mpt3;
8ae002
 
8ae002
 
8ae002
-  if (X[0] <= ZERO) {
8ae002
-    mpone.e = 1;                 mpone.d[0] = mpone.d[1] = ONE;
8ae002
+  if (X[0] <= 0) {
8ae002
+    mpone.e = 1;                 mpone.d[0] = mpone.d[1] = 1;
8ae002
     __dvd(x,y,&mpt1,p);          __mul(&mpt1,&mpt1,&mpt2,p);
8ae002
-    if (mpt1.d[0] != ZERO)       mpt1.d[0] = ONE;
8ae002
+    if (mpt1.d[0] != 0)       mpt1.d[0] = 1;
8ae002
     __add(&mpt2,&mpone,&mpt3,p); __mpsqrt(&mpt3,&mpt2,p);
8ae002
     __add(&mpt1,&mpt2,&mpt3,p);  mpt3.d[0]=Y[0];
8ae002
     __mpatan(&mpt3,&mpt1,p);     __add(&mpt1,&mpt1,z,p);
8ae002
Index: glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mpexp.h
8ae002
===================================================================
8ae002
--- glibc-2.17-c758a686.orig/sysdeps/ieee754/dbl-64/mpexp.h
8ae002
+++ glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mpexp.h
8ae002
@@ -159,11 +159,4 @@ extern const number __mpexp_half attribu
8ae002
 #endif
8ae002
 #endif
8ae002
 
8ae002
-#define  RADIX     __mpexp_radix.d
8ae002
-#define  RADIXI    __mpexp_radixi.d
8ae002
-#define  ZERO      __mpexp_zero.d
8ae002
-#define  ONE       __mpexp_one.d
8ae002
-#define  TWO       __mpexp_two.d
8ae002
-#define  HALF      __mpexp_half.d
8ae002
-
8ae002
 #endif
8ae002
Index: glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mpsqrt.h
8ae002
===================================================================
8ae002
--- glibc-2.17-c758a686.orig/sysdeps/ieee754/dbl-64/mpsqrt.h
8ae002
+++ glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mpsqrt.h
8ae002
@@ -51,7 +51,4 @@ extern const int __mpsqrt_mp[33] attribu
8ae002
 			     4,4,4,4,4,4,4,4,4};
8ae002
 #endif
8ae002
 
8ae002
-#define  ONE       __mpsqrt_one.d
8ae002
-#define  HALFRAD   __mpsqrt_halfrad.d
8ae002
-
8ae002
 #endif
8ae002
Index: glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mptan.c
8ae002
===================================================================
8ae002
--- glibc-2.17-c758a686.orig/sysdeps/ieee754/dbl-64/mptan.c
8ae002
+++ glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mptan.c
8ae002
@@ -47,8 +47,6 @@ void
8ae002
 SECTION
8ae002
 __mptan(double x, mp_no *mpy, int p) {
8ae002
 
8ae002
-  static const double MONE = -1.0;
8ae002
-
8ae002
   int n;
8ae002
   mp_no mpw, mpc, mps;
8ae002
 
8ae002
@@ -56,7 +54,7 @@ __mptan(double x, mp_no *mpy, int p) {
8ae002
   __c32(&mpw, &mpc, &mps, p);              /* computing sin(x) and cos(x) */
8ae002
   if (n)                     /* second or fourth quarter of unit circle */
8ae002
   { __dvd(&mpc,&mps,mpy,p);
8ae002
-    mpy->d[0] *= MONE;
8ae002
+    mpy->d[0] *= -1;
8ae002
   }                          /* tan is negative in this area */
8ae002
   else  __dvd(&mps,&mpc,mpy,p);
8ae002
 
8ae002
Index: glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/ulog.h
8ae002
===================================================================
8ae002
--- glibc-2.17-c758a686.orig/sysdeps/ieee754/dbl-64/ulog.h
8ae002
+++ glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/ulog.h
8ae002
@@ -181,10 +181,6 @@
8ae002
 #endif
8ae002
 #endif
8ae002
 
8ae002
-#define  ZERO      zero.d
8ae002
-#define  ONE       one.d
8ae002
-#define  HALF      half.d
8ae002
-#define  MHALF     mhalf.d
8ae002
 #define  SQRT_2    sqrt_2.d
8ae002
 #define  DEL_U     delu.d
8ae002
 #define  DEL_V     delv.d
8ae002
Index: glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/utan.h
8ae002
===================================================================
8ae002
--- glibc-2.17-c758a686.orig/sysdeps/ieee754/dbl-64/utan.h
8ae002
+++ glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/utan.h
8ae002
@@ -270,10 +270,4 @@
8ae002
 #endif
8ae002
 #endif
8ae002
 
8ae002
-
8ae002
-#define  ZERO      zero.d
8ae002
-#define  ONE       one.d
8ae002
-#define  MONE      mone.d
8ae002
-#define  TWO8      two8.d
8ae002
-
8ae002
 #endif
8ae002
Index: glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mpa-arch.h
8ae002
===================================================================
8ae002
--- /dev/null
8ae002
+++ glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mpa-arch.h
8ae002
@@ -0,0 +1,47 @@
8ae002
+/* Overridable constants and operations.
8ae002
+   Copyright (C) 2013 Free Software Foundation, Inc.
8ae002
+
8ae002
+   This program is free software; you can redistribute it and/or modify
8ae002
+   it under the terms of the GNU Lesser General Public License as published by
8ae002
+   the Free Software Foundation; either version 2.1 of the License, or
8ae002
+   (at your option) any later version.
8ae002
+
8ae002
+   This program is distributed in the hope that it will be useful,
8ae002
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
8ae002
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
8ae002
+   GNU Lesser General Public License for more details.
8ae002
+
8ae002
+   You should have received a copy of the GNU Lesser General Public License
8ae002
+   along with this program; if not, see <http://www.gnu.org/licenses/>.  */
8ae002
+
8ae002
+#include <stdint.h>
8ae002
+
8ae002
+typedef long mantissa_t;
8ae002
+typedef int64_t mantissa_store_t;
8ae002
+
8ae002
+#define TWOPOW(i) (1L << i)
8ae002
+
8ae002
+#define RADIX_EXP 24
8ae002
+#define  RADIX TWOPOW (RADIX_EXP)		/* 2^24    */
8ae002
+
8ae002
+/* Divide D by RADIX and put the remainder in R.  D must be a non-negative
8ae002
+   integral value.  */
8ae002
+#define DIV_RADIX(d, r) \
8ae002
+  ({									      \
8ae002
+    r = d & (RADIX - 1);						      \
8ae002
+    d >>= RADIX_EXP;							      \
8ae002
+  })
8ae002
+
8ae002
+/* Put the integer component of a double X in R and retain the fraction in
8ae002
+   X.  This is used in extracting mantissa digits for MP_NO by using the
8ae002
+   integer portion of the current value of the number as the current mantissa
8ae002
+   digit and then scaling by RADIX to get the next mantissa digit in the same
8ae002
+   manner.  */
8ae002
+#define INTEGER_OF(x, i) \
8ae002
+  ({									      \
8ae002
+    i = (mantissa_t) x;							      \
8ae002
+    x -= i;								      \
8ae002
+  })
8ae002
+
8ae002
+/* Align IN down to F.  The code assumes that F is a power of two.  */
8ae002
+#define ALIGN_DOWN_TO(in, f) ((in) & -(f))
8ae002
Index: glibc-2.17-c758a686/sysdeps/powerpc/power4/fpu/mpa-arch.h
8ae002
===================================================================
8ae002
--- /dev/null
8ae002
+++ glibc-2.17-c758a686/sysdeps/powerpc/power4/fpu/mpa-arch.h
8ae002
@@ -0,0 +1,56 @@
8ae002
+/* Overridable constants and operations.
8ae002
+   Copyright (C) 2013-2017 Free Software Foundation, Inc.
8ae002
+
8ae002
+   This program is free software; you can redistribute it and/or modify
8ae002
+   it under the terms of the GNU Lesser General Public License as published by
8ae002
+   the Free Software Foundation; either version 2.1 of the License, or
8ae002
+   (at your option) any later version.
8ae002
+
8ae002
+   This program is distributed in the hope that it will be useful,
8ae002
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
8ae002
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
8ae002
+   GNU Lesser General Public License for more details.
8ae002
+
8ae002
+   You should have received a copy of the GNU Lesser General Public License
8ae002
+   along with this program; if not, see <http://www.gnu.org/licenses/>.  */
8ae002
+
8ae002
+typedef double mantissa_t;
8ae002
+typedef double mantissa_store_t;
8ae002
+
8ae002
+#define TWOPOW(i) (0x1.0p##i)
8ae002
+
8ae002
+#define RADIX TWOPOW (24)		/* 2^24    */
8ae002
+#define CUTTER TWOPOW (76)		/* 2^76    */
8ae002
+#define RADIXI 0x1.0p-24		/* 2^-24 */
8ae002
+#define TWO52 TWOPOW (52)		/* 2^52 */
8ae002
+
8ae002
+/* Divide D by RADIX and put the remainder in R.  */
8ae002
+#define DIV_RADIX(d,r) \
8ae002
+  ({									      \
8ae002
+    double u = ((d) + CUTTER) - CUTTER;					      \
8ae002
+    if (u > (d))							      \
8ae002
+      u -= RADIX;							      \
8ae002
+    r = (d) - u;							      \
8ae002
+    (d) = u * RADIXI;							      \
8ae002
+  })
8ae002
+
8ae002
+/* Put the integer component of a double X in R and retain the fraction in
8ae002
+   X.  */
8ae002
+#define INTEGER_OF(x, r) \
8ae002
+  ({									      \
8ae002
+    double u = ((x) + TWO52) - TWO52;					      \
8ae002
+    if (u > (x))							      \
8ae002
+      u -= 1;								      \
8ae002
+    (r) = u;								      \
8ae002
+    (x) -= u;								      \
8ae002
+  })
8ae002
+
8ae002
+/* Align IN down to a multiple of F, where F is a power of two.  */
8ae002
+#define ALIGN_DOWN_TO(in, f) \
8ae002
+  ({									      \
8ae002
+    double factor = f * TWO52;						      \
8ae002
+    double u = (in + factor) - factor;					      \
8ae002
+    if (u > in)								      \
8ae002
+      u -= f;								      \
8ae002
+    u;									      \
8ae002
+  })
8ae002
Index: glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/e_atan2.c
8ae002
===================================================================
8ae002
--- glibc-2.17-c758a686.orig/sysdeps/ieee754/dbl-64/e_atan2.c
8ae002
+++ glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/e_atan2.c
8ae002
@@ -98,15 +98,15 @@ __ieee754_atan2(double y,double x) {
8ae002
   /* y=+-0 */
8ae002
   if      (uy==0x00000000) {
8ae002
     if    (dy==0x00000000) {
8ae002
-      if  ((ux&0x80000000)==0x00000000)  return ZERO;
8ae002
+      if  ((ux&0x80000000)==0x00000000)  return 0;
8ae002
       else                               return opi.d; } }
8ae002
   else if (uy==0x80000000) {
8ae002
     if    (dy==0x00000000) {
8ae002
-      if  ((ux&0x80000000)==0x00000000)  return MZERO;
8ae002
+      if  ((ux&0x80000000)==0x00000000)  return -0.0;
8ae002
       else                               return mopi.d;} }
8ae002
 
8ae002
   /* x=+-0 */
8ae002
-  if (x==ZERO) {
8ae002
+  if (x==0) {
8ae002
     if ((uy&0x80000000)==0x00000000)     return hpi.d;
8ae002
     else                                 return mhpi.d; }
8ae002
 
8ae002
@@ -118,8 +118,8 @@ __ieee754_atan2(double y,double x) {
8ae002
       else if (uy==0xfff00000) {
8ae002
 	if    (dy==0x00000000)  return mqpi.d; }
8ae002
       else {
8ae002
-	if    ((uy&0x80000000)==0x00000000)  return ZERO;
8ae002
-	else                                 return MZERO; }
8ae002
+	if    ((uy&0x80000000)==0x00000000)  return 0;
8ae002
+	else                                 return -0.0; }
8ae002
     }
8ae002
   }
8ae002
   else if     (ux==0xfff00000) {
8ae002
@@ -141,14 +141,14 @@ __ieee754_atan2(double y,double x) {
8ae002
     if    (dy==0x00000000)  return mhpi.d; }
8ae002
 
8ae002
   /* either x/y or y/x is very close to zero */
8ae002
-  ax = (x
8ae002
+  ax = (x<0) ? -x : x;    ay = (y<0) ? -y : y;
8ae002
   de = (uy & 0x7ff00000) - (ux & 0x7ff00000);
8ae002
-  if      (de>=ep)  { return ((y>ZERO) ? hpi.d : mhpi.d); }
8ae002
+  if      (de>=ep)  { return ((y>0) ? hpi.d : mhpi.d); }
8ae002
   else if (de<=em)  {
8ae002
-    if    (x>ZERO)  {
8ae002
+    if    (x>0)  {
8ae002
       if  ((z=ay/ax)
8ae002
       else                      return signArctan2(y,z); }
8ae002
-    else            { return ((y>ZERO) ? opi.d : mopi.d); } }
8ae002
+    else            { return ((y>0) ? opi.d : mopi.d); } }
8ae002
 
8ae002
   /* if either x or y is extremely close to zero, scale abs(x), abs(y). */
8ae002
   if (ax
8ae002
@@ -170,7 +170,7 @@ __ieee754_atan2(double y,double x) {
8ae002
     EMULV(ay,u,v,vv,t1,t2,t3,t4,t5)
8ae002
     du=((ax-v)-vv)/ay; }
8ae002
 
8ae002
-  if (x>ZERO) {
8ae002
+  if (x>0) {
8ae002
 
8ae002
     /* (i)   x>0, abs(y)< abs(x):  atan(ay/ax) */
8ae002
     if (ay
8ae002
@@ -180,7 +180,7 @@ __ieee754_atan2(double y,double x) {
8ae002
 
8ae002
 	MUL2(u,du,u,du,v,vv,t1,t2,t3,t4,t5,t6,t7,t8)
8ae002
 	s1=v*(f11.d+v*(f13.d+v*(f15.d+v*(f17.d+v*f19.d))));
8ae002
-	ADD2(f9.d,ff9.d,s1,ZERO,s2,ss2,t1,t2)
8ae002
+	ADD2(f9.d,ff9.d,s1,0,s2,ss2,t1,t2)
8ae002
 	MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
8ae002
 	ADD2(f7.d,ff7.d,s1,ss1,s2,ss2,t1,t2)
8ae002
 	MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
8ae002
@@ -212,7 +212,7 @@ __ieee754_atan2(double y,double x) {
8ae002
 	EADD(t1,du,v,vv)
8ae002
 	s1=v*(hij[i][11].d+v*(hij[i][12].d+v*(hij[i][13].d+
8ae002
 	   v*(hij[i][14].d+v* hij[i][15].d))));
8ae002
-	ADD2(hij[i][9].d,hij[i][10].d,s1,ZERO,s2,ss2,t1,t2)
8ae002
+	ADD2(hij[i][9].d,hij[i][10].d,s1,0,s2,ss2,t1,t2)
8ae002
 	MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
8ae002
 	ADD2(hij[i][7].d,hij[i][8].d,s1,ss1,s2,ss2,t1,t2)
8ae002
 	MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
8ae002
@@ -237,7 +237,7 @@ __ieee754_atan2(double y,double x) {
8ae002
 
8ae002
 	MUL2(u,du,u,du,v,vv,t1,t2,t3,t4,t5,t6,t7,t8)
8ae002
 	s1=v*(f11.d+v*(f13.d+v*(f15.d+v*(f17.d+v*f19.d))));
8ae002
-	ADD2(f9.d,ff9.d,s1,ZERO,s2,ss2,t1,t2)
8ae002
+	ADD2(f9.d,ff9.d,s1,0,s2,ss2,t1,t2)
8ae002
 	MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
8ae002
 	ADD2(f7.d,ff7.d,s1,ss1,s2,ss2,t1,t2)
8ae002
 	MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
8ae002
@@ -265,7 +265,7 @@ __ieee754_atan2(double y,double x) {
8ae002
 	EADD(t1,du,v,vv)
8ae002
 	s1=v*(hij[i][11].d+v*(hij[i][12].d+v*(hij[i][13].d+
8ae002
 	   v*(hij[i][14].d+v* hij[i][15].d))));
8ae002
-	ADD2(hij[i][9].d,hij[i][10].d,s1,ZERO,s2,ss2,t1,t2)
8ae002
+	ADD2(hij[i][9].d,hij[i][10].d,s1,0,s2,ss2,t1,t2)
8ae002
 	MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
8ae002
 	ADD2(hij[i][7].d,hij[i][8].d,s1,ss1,s2,ss2,t1,t2)
8ae002
 	MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
8ae002
@@ -293,7 +293,7 @@ __ieee754_atan2(double y,double x) {
8ae002
 
8ae002
 	MUL2(u,du,u,du,v,vv,t1,t2,t3,t4,t5,t6,t7,t8)
8ae002
 	s1=v*(f11.d+v*(f13.d+v*(f15.d+v*(f17.d+v*f19.d))));
8ae002
-	ADD2(f9.d,ff9.d,s1,ZERO,s2,ss2,t1,t2)
8ae002
+	ADD2(f9.d,ff9.d,s1,0,s2,ss2,t1,t2)
8ae002
 	MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
8ae002
 	ADD2(f7.d,ff7.d,s1,ss1,s2,ss2,t1,t2)
8ae002
 	MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
8ae002
@@ -321,7 +321,7 @@ __ieee754_atan2(double y,double x) {
8ae002
 	EADD(t1,du,v,vv)
8ae002
 	s1=v*(hij[i][11].d+v*(hij[i][12].d+v*(hij[i][13].d+
8ae002
 	   v*(hij[i][14].d+v* hij[i][15].d))));
8ae002
-	ADD2(hij[i][9].d,hij[i][10].d,s1,ZERO,s2,ss2,t1,t2)
8ae002
+	ADD2(hij[i][9].d,hij[i][10].d,s1,0,s2,ss2,t1,t2)
8ae002
 	MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
8ae002
 	ADD2(hij[i][7].d,hij[i][8].d,s1,ss1,s2,ss2,t1,t2)
8ae002
 	MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
8ae002
@@ -347,7 +347,7 @@ __ieee754_atan2(double y,double x) {
8ae002
 
8ae002
 	MUL2(u,du,u,du,v,vv,t1,t2,t3,t4,t5,t6,t7,t8)
8ae002
 	s1=v*(f11.d+v*(f13.d+v*(f15.d+v*(f17.d+v*f19.d))));
8ae002
-	ADD2(f9.d,ff9.d,s1,ZERO,s2,ss2,t1,t2)
8ae002
+	ADD2(f9.d,ff9.d,s1,0,s2,ss2,t1,t2)
8ae002
 	MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
8ae002
 	ADD2(f7.d,ff7.d,s1,ss1,s2,ss2,t1,t2)
8ae002
 	MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
8ae002
@@ -375,7 +375,7 @@ __ieee754_atan2(double y,double x) {
8ae002
 	EADD(t1,du,v,vv)
8ae002
 	s1=v*(hij[i][11].d+v*(hij[i][12].d+v*(hij[i][13].d+
8ae002
 	   v*(hij[i][14].d+v* hij[i][15].d))));
8ae002
-	ADD2(hij[i][9].d,hij[i][10].d,s1,ZERO,s2,ss2,t1,t2)
8ae002
+	ADD2(hij[i][9].d,hij[i][10].d,s1,0,s2,ss2,t1,t2)
8ae002
 	MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
8ae002
 	ADD2(hij[i][7].d,hij[i][8].d,s1,ss1,s2,ss2,t1,t2)
8ae002
 	MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
8ae002
Index: glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/e_log.c
8ae002
===================================================================
8ae002
--- glibc-2.17-c758a686.orig/sysdeps/ieee754/dbl-64/e_log.c
8ae002
+++ glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/e_log.c
8ae002
@@ -78,9 +78,9 @@ __ieee754_log(double x) {
8ae002
   n=0;
8ae002
   if (__builtin_expect(ux < 0x00100000, 0)) {
8ae002
     if (__builtin_expect(((ux & 0x7fffffff) | dx) == 0, 0))
8ae002
-      return MHALF/ZERO; /* return -INF */
8ae002
+      return MHALF/0; /* return -INF */
8ae002
     if (__builtin_expect(ux < 0, 0))
8ae002
-      return (x-x)/ZERO;                         /* return NaN  */
8ae002
+      return (x-x)/0;                         /* return NaN  */
8ae002
     n -= 54;    x *= two54.d;                              /* scale x     */
8ae002
     num.d = x;
8ae002
   }
8ae002
@@ -89,7 +89,7 @@ __ieee754_log(double x) {
8ae002
 
8ae002
   /* Regular values of x */
8ae002
 
8ae002
-  w = x-ONE;
8ae002
+  w = x-1;
8ae002
   if (__builtin_expect(ABS(w) > U03, 1)) { goto case_03; }
8ae002
 
8ae002
 
8ae002
@@ -113,25 +113,25 @@ __ieee754_log(double x) {
8ae002
 	    w*(d17.d+w*(d18.d+w*(d19.d+w*d20.d))))))));
8ae002
   EMULV(w,a,s2,ss2,t1,t2,t3,t4,t5)
8ae002
   ADD2(d10.d,dd10.d,s2,ss2,s3,ss3,t1,t2)
8ae002
-  MUL2(w,ZERO,s3,ss3,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8)
8ae002
+  MUL2(w,0,s3,ss3,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8)
8ae002
   ADD2(d9.d,dd9.d,s2,ss2,s3,ss3,t1,t2)
8ae002
-  MUL2(w,ZERO,s3,ss3,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8)
8ae002
+  MUL2(w,0,s3,ss3,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8)
8ae002
   ADD2(d8.d,dd8.d,s2,ss2,s3,ss3,t1,t2)
8ae002
-  MUL2(w,ZERO,s3,ss3,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8)
8ae002
+  MUL2(w,0,s3,ss3,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8)
8ae002
   ADD2(d7.d,dd7.d,s2,ss2,s3,ss3,t1,t2)
8ae002
-  MUL2(w,ZERO,s3,ss3,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8)
8ae002
+  MUL2(w,0,s3,ss3,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8)
8ae002
   ADD2(d6.d,dd6.d,s2,ss2,s3,ss3,t1,t2)
8ae002
-  MUL2(w,ZERO,s3,ss3,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8)
8ae002
+  MUL2(w,0,s3,ss3,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8)
8ae002
   ADD2(d5.d,dd5.d,s2,ss2,s3,ss3,t1,t2)
8ae002
-  MUL2(w,ZERO,s3,ss3,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8)
8ae002
+  MUL2(w,0,s3,ss3,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8)
8ae002
   ADD2(d4.d,dd4.d,s2,ss2,s3,ss3,t1,t2)
8ae002
-  MUL2(w,ZERO,s3,ss3,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8)
8ae002
+  MUL2(w,0,s3,ss3,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8)
8ae002
   ADD2(d3.d,dd3.d,s2,ss2,s3,ss3,t1,t2)
8ae002
-  MUL2(w,ZERO,s3,ss3,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8)
8ae002
+  MUL2(w,0,s3,ss3,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8)
8ae002
   ADD2(d2.d,dd2.d,s2,ss2,s3,ss3,t1,t2)
8ae002
-  MUL2(w,ZERO,s3,ss3,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8)
8ae002
-  MUL2(w,ZERO,s2,ss2,s3,ss3,t1,t2,t3,t4,t5,t6,t7,t8)
8ae002
-  ADD2(w,ZERO,    s3,ss3, b, bb,t1,t2)
8ae002
+  MUL2(w,0,s3,ss3,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8)
8ae002
+  MUL2(w,0,s2,ss2,s3,ss3,t1,t2,t3,t4,t5,t6,t7,t8)
8ae002
+  ADD2(w,0,    s3,ss3, b, bb,t1,t2)
8ae002
 
8ae002
   /* End stage II, case abs(x-1) < 0.03 */
8ae002
   if ((y=b+(bb+b*E4)) == b+(bb-b*E4))  return y;
8ae002
@@ -155,7 +155,7 @@ __ieee754_log(double x) {
8ae002
   j = (num.i[HIGH_HALF] & 0x000fffff) >> 4;
8ae002
 
8ae002
   /* Compute w=(u-ui*vj)/(ui*vj) */
8ae002
-  p0=(ONE+(i-75)*DEL_U)*(ONE+(j-180)*DEL_V);
8ae002
+  p0=(1+(i-75)*DEL_U)*(1+(j-180)*DEL_V);
8ae002
   q=u-p0;   r0=Iu[i].d*Iv[j].d;   w=q*r0;
8ae002
 
8ae002
   /* Evaluate polynomial I */
8ae002
@@ -178,11 +178,11 @@ __ieee754_log(double x) {
8ae002
 
8ae002
   /* Improve the accuracy of r0 */
8ae002
   EMULV(p0,r0,sa,sb,t1,t2,t3,t4,t5)
8ae002
-  t=r0*((ONE-sa)-sb);
8ae002
+  t=r0*((1-sa)-sb);
8ae002
   EADD(r0,t,ra,rb)
8ae002
 
8ae002
   /* Compute w */
8ae002
-  MUL2(q,ZERO,ra,rb,w,ww,t1,t2,t3,t4,t5,t6,t7,t8)
8ae002
+  MUL2(q,0,ra,rb,w,ww,t1,t2,t3,t4,t5,t6,t7,t8)
8ae002
 
8ae002
   EADD(A,B0,a0,aa0)
8ae002
 
8ae002
Index: glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/s_atan.c
8ae002
===================================================================
8ae002
--- glibc-2.17-c758a686.orig/sysdeps/ieee754/dbl-64/s_atan.c
8ae002
+++ glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/s_atan.c
8ae002
@@ -82,7 +82,7 @@ double atan(double x) {
8ae002
     return x+x;
8ae002
 
8ae002
   /* Regular values of x, including denormals +-0 and +-INF */
8ae002
-  u = (x
8ae002
+  u = (x<0) ? -x : x;
8ae002
   if (u
8ae002
     if (u
8ae002
       if (u
8ae002
@@ -93,7 +93,7 @@ double atan(double x) {
8ae002
 
8ae002
 	EMULV(x,x,v,vv,t1,t2,t3,t4,t5)                       /* v+vv=x^2 */
8ae002
 	s1=v*(f11.d+v*(f13.d+v*(f15.d+v*(f17.d+v*f19.d))));
8ae002
-	ADD2(f9.d,ff9.d,s1,ZERO,s2,ss2,t1,t2)
8ae002
+	ADD2(f9.d,ff9.d,s1,0,s2,ss2,t1,t2)
8ae002
 	MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
8ae002
 	ADD2(f7.d,ff7.d,s1,ss1,s2,ss2,t1,t2)
8ae002
 	MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
8ae002
@@ -101,8 +101,8 @@ double atan(double x) {
8ae002
 	MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
8ae002
 	ADD2(f3.d,ff3.d,s1,ss1,s2,ss2,t1,t2)
8ae002
 	MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
8ae002
-	MUL2(x,ZERO,s1,ss1,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8)
8ae002
-	ADD2(x,ZERO,s2,ss2,s1,ss1,t1,t2)
8ae002
+	MUL2(x,0,s1,ss1,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8)
8ae002
+	ADD2(x,0,s2,ss2,s1,ss1,t1,t2)
8ae002
 	if ((y=s1+(ss1-U5*s1)) == s1+(ss1+U5*s1))  return y;
8ae002
 
8ae002
 	return atanMp(x,pr);
8ae002
@@ -124,14 +124,14 @@ double atan(double x) {
8ae002
       z=u-hij[i][0].d;
8ae002
       s1=z*(hij[i][11].d+z*(hij[i][12].d+z*(hij[i][13].d+
8ae002
 	 z*(hij[i][14].d+z* hij[i][15].d))));
8ae002
-      ADD2(hij[i][9].d,hij[i][10].d,s1,ZERO,s2,ss2,t1,t2)
8ae002
-      MUL2(z,ZERO,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
8ae002
+      ADD2(hij[i][9].d,hij[i][10].d,s1,0,s2,ss2,t1,t2)
8ae002
+      MUL2(z,0,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
8ae002
       ADD2(hij[i][7].d,hij[i][8].d,s1,ss1,s2,ss2,t1,t2)
8ae002
-      MUL2(z,ZERO,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
8ae002
+      MUL2(z,0,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
8ae002
       ADD2(hij[i][5].d,hij[i][6].d,s1,ss1,s2,ss2,t1,t2)
8ae002
-      MUL2(z,ZERO,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
8ae002
+      MUL2(z,0,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
8ae002
       ADD2(hij[i][3].d,hij[i][4].d,s1,ss1,s2,ss2,t1,t2)
8ae002
-      MUL2(z,ZERO,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
8ae002
+      MUL2(z,0,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
8ae002
       ADD2(hij[i][1].d,hij[i][2].d,s1,ss1,s2,ss2,t1,t2)
8ae002
       if ((y=s2+(ss2-U6*s2)) == s2+(ss2+U6*s2))  return __signArctan(x,y);
8ae002
 
8ae002
@@ -140,9 +140,9 @@ double atan(double x) {
8ae002
   }
8ae002
   else {
8ae002
     if (u
8ae002
-      w=ONE/u;
8ae002
+      w=1/u;
8ae002
       EMULV(w,u,t1,t2,t3,t4,t5,t6,t7)
8ae002
-      ww=w*((ONE-t1)-t2);
8ae002
+      ww=w*((1-t1)-t2);
8ae002
       i=(TWO52+TWO8*w)-TWO52;  i-=16;
8ae002
       z=(w-cij[i][0].d)+ww;
8ae002
       yy=HPI1-z*(cij[i][2].d+z*(cij[i][3].d+z*(cij[i][4].d+
8ae002
@@ -152,12 +152,12 @@ double atan(double x) {
8ae002
       else        u3=U32;  /* w >= 1/2 */
8ae002
       if ((y=t1+(yy-u3)) == t1+(yy+u3))  return __signArctan(x,y);
8ae002
 
8ae002
-      DIV2(ONE,ZERO,u,ZERO,w,ww,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10)
8ae002
+      DIV2(1,0,u,0,w,ww,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10)
8ae002
       t1=w-hij[i][0].d;
8ae002
       EADD(t1,ww,z,zz)
8ae002
       s1=z*(hij[i][11].d+z*(hij[i][12].d+z*(hij[i][13].d+
8ae002
 	 z*(hij[i][14].d+z* hij[i][15].d))));
8ae002
-      ADD2(hij[i][9].d,hij[i][10].d,s1,ZERO,s2,ss2,t1,t2)
8ae002
+      ADD2(hij[i][9].d,hij[i][10].d,s1,0,s2,ss2,t1,t2)
8ae002
       MUL2(z,zz,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
8ae002
       ADD2(hij[i][7].d,hij[i][8].d,s1,ss1,s2,ss2,t1,t2)
8ae002
       MUL2(z,zz,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
8ae002
@@ -173,18 +173,18 @@ double atan(double x) {
8ae002
     }
8ae002
     else {
8ae002
       if (u
8ae002
-	w=ONE/u;   v=w*w;
8ae002
+	w=1/u;   v=w*w;
8ae002
 	EMULV(w,u,t1,t2,t3,t4,t5,t6,t7)
8ae002
 	yy=w*v*(d3.d+v*(d5.d+v*(d7.d+v*(d9.d+v*(d11.d+v*d13.d)))));
8ae002
-	ww=w*((ONE-t1)-t2);
8ae002
+	ww=w*((1-t1)-t2);
8ae002
 	ESUB(HPI,w,t3,cor)
8ae002
 	yy=((HPI1+cor)-ww)-yy;
8ae002
 	if ((y=t3+(yy-U4)) == t3+(yy+U4))  return __signArctan(x,y);
8ae002
 
8ae002
-	DIV2(ONE,ZERO,u,ZERO,w,ww,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10)
8ae002
+	DIV2(1,0,u,0,w,ww,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10)
8ae002
 	MUL2(w,ww,w,ww,v,vv,t1,t2,t3,t4,t5,t6,t7,t8)
8ae002
 	s1=v*(f11.d+v*(f13.d+v*(f15.d+v*(f17.d+v*f19.d))));
8ae002
-	ADD2(f9.d,ff9.d,s1,ZERO,s2,ss2,t1,t2)
8ae002
+	ADD2(f9.d,ff9.d,s1,0,s2,ss2,t1,t2)
8ae002
 	MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
8ae002
 	ADD2(f7.d,ff7.d,s1,ss1,s2,ss2,t1,t2)
8ae002
 	MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
8ae002
Index: glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/s_tan.c
8ae002
===================================================================
8ae002
--- glibc-2.17-c758a686.orig/sysdeps/ieee754/dbl-64/s_tan.c
8ae002
+++ glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/s_tan.c
8ae002
@@ -84,7 +84,7 @@ tan(double x) {
8ae002
     goto ret;
8ae002
   }
8ae002
 
8ae002
-  w=(x
8ae002
+  w=(x<0) ? -x : x;
8ae002
 
8ae002
   /* (I) The case abs(x) <= 1.259e-8 */
8ae002
   if (w<=g1.d) { retval = x; goto ret; }
8ae002
@@ -125,11 +125,11 @@ tan(double x) {
8ae002
 
8ae002
     /* First stage */
8ae002
     i = ((int) (mfftnhf.d+TWO8*w));
8ae002
-    z = w-xfg[i][0].d;  z2 = z*z;   s = (x
8ae002
+    z = w-xfg[i][0].d;  z2 = z*z;   s = (x<0) ? -1 : 1;
8ae002
     pz = z+z*z2*(e0.d+z2*e1.d);
8ae002
     fi = xfg[i][1].d;   gi = xfg[i][2].d;   t2 = pz*(gi+fi)/(gi-pz);
8ae002
     if ((y=fi+(t2-fi*u3.d))==fi+(t2+fi*u3.d)) { retval = (s*y); goto ret; }
8ae002
-    t3 = (t2
8ae002
+    t3 = (t2<0) ? -t2 : t2;
8ae002
     t4 = fi*ua3.d+t3*ub3.d;
8ae002
     if ((y=fi+(t2-t4))==fi+(t2+t4)) { retval = (s*y); goto ret; }
8ae002
 
8ae002
@@ -165,8 +165,8 @@ tan(double x) {
8ae002
     da = xn*mp3.d;
8ae002
     a=t1-da;
8ae002
     da = (t1-a)-da;
8ae002
-    if (a
8ae002
-    else         {ya= a;  yya= da;  sy= ONE;}
8ae002
+    if (a<0)  {ya=-a;  yya=-da;  sy=-1;}
8ae002
+    else         {ya= a;  yya= da;  sy= 1;}
8ae002
 
8ae002
     /* (IV),(V) The case 0.787 < abs(x) <= 25,    abs(y) <= 1e-7 */
8ae002
     if (ya<=gy1.d) { retval = tanMp(x); goto ret; }
8ae002
@@ -240,14 +240,14 @@ tan(double x) {
8ae002
       /* -cot */
8ae002
       t2 = pz*(fi+gi)/(fi+pz);
8ae002
       if ((y=gi-(t2-gi*u10.d))==gi-(t2+gi*u10.d)) { retval = (-sy*y); goto ret; }
8ae002
-      t3 = (t2
8ae002
+      t3 = (t2<0) ? -t2 : t2;
8ae002
       t4 = gi*ua10.d+t3*ub10.d;
8ae002
       if ((y=gi-(t2-t4))==gi-(t2+t4)) { retval = (-sy*y); goto ret; } }
8ae002
     else   {
8ae002
       /* tan */
8ae002
       t2 = pz*(gi+fi)/(gi-pz);
8ae002
       if ((y=fi+(t2-fi*u9.d))==fi+(t2+fi*u9.d)) { retval = (sy*y); goto ret; }
8ae002
-      t3 = (t2
8ae002
+      t3 = (t2<0) ? -t2 : t2;
8ae002
       t4 = fi*ua9.d+t3*ub9.d;
8ae002
       if ((y=fi+(t2-t4))==fi+(t2+t4)) { retval = (sy*y); goto ret; } }
8ae002
 
8ae002
@@ -295,8 +295,8 @@ tan(double x) {
8ae002
     a = t - t1;
8ae002
     da = ((t-a)-t1)+da;
8ae002
     EADD(a,da,t1,t2)   a=t1;  da=t2;
8ae002
-    if (a
8ae002
-    else         {ya= a;  yya= da;  sy= ONE;}
8ae002
+    if (a<0)  {ya=-a;  yya=-da;  sy=-1;}
8ae002
+    else         {ya= a;  yya= da;  sy= 1;}
8ae002
 
8ae002
     /* (+++) The case 25 < abs(x) <= 1e8,    abs(y) <= 1e-7 */
8ae002
     if (ya<=gy1.d) { retval = tanMp(x); goto ret; }
8ae002
@@ -355,14 +355,14 @@ tan(double x) {
8ae002
       /* -cot */
8ae002
       t2 = pz*(fi+gi)/(fi+pz);
8ae002
       if ((y=gi-(t2-gi*u18.d))==gi-(t2+gi*u18.d)) { retval = (-sy*y); goto ret; }
8ae002
-      t3 = (t2
8ae002
+      t3 = (t2<0) ? -t2 : t2;
8ae002
       t4 = gi*ua18.d+t3*ub18.d;
8ae002
       if ((y=gi-(t2-t4))==gi-(t2+t4)) { retval = (-sy*y); goto ret; } }
8ae002
     else   {
8ae002
       /* tan */
8ae002
       t2 = pz*(gi+fi)/(gi-pz);
8ae002
       if ((y=fi+(t2-fi*u17.d))==fi+(t2+fi*u17.d)) { retval = (sy*y); goto ret; }
8ae002
-      t3 = (t2
8ae002
+      t3 = (t2<0) ? -t2 : t2;
8ae002
       t4 = fi*ua17.d+t3*ub17.d;
8ae002
       if ((y=fi+(t2-t4))==fi+(t2+t4)) { retval = (sy*y); goto ret; } }
8ae002
 
8ae002
@@ -398,8 +398,8 @@ tan(double x) {
8ae002
   /* Range reduction by algorithm iii */
8ae002
   n = (__branred(x,&a,&da)) & 0x00000001;
8ae002
   EADD(a,da,t1,t2)   a=t1;  da=t2;
8ae002
-  if (a
8ae002
-  else         {ya= a;  yya= da;  sy= ONE;}
8ae002
+  if (a<0)  {ya=-a;  yya=-da;  sy=-1;}
8ae002
+  else         {ya= a;  yya= da;  sy= 1;}
8ae002
 
8ae002
   /* (+++) The case 1e8 < abs(x) < 2**1024,    abs(y) <= 1e-7 */
8ae002
   if (ya<=gy1.d) { retval = tanMp(x); goto ret; }
8ae002
@@ -463,14 +463,14 @@ tan(double x) {
8ae002
     /* -cot */
8ae002
     t2 = pz*(fi+gi)/(fi+pz);
8ae002
     if ((y=gi-(t2-gi*u26.d))==gi-(t2+gi*u26.d)) { retval = (-sy*y); goto ret; }
8ae002
-    t3 = (t2
8ae002
+    t3 = (t2<0) ? -t2 : t2;
8ae002
     t4 = gi*ua26.d+t3*ub26.d;
8ae002
     if ((y=gi-(t2-t4))==gi-(t2+t4)) { retval = (-sy*y); goto ret; } }
8ae002
   else   {
8ae002
     /* tan */
8ae002
     t2 = pz*(gi+fi)/(gi-pz);
8ae002
     if ((y=fi+(t2-fi*u25.d))==fi+(t2+fi*u25.d)) { retval = (sy*y); goto ret; }
8ae002
-    t3 = (t2
8ae002
+    t3 = (t2<0) ? -t2 : t2;
8ae002
     t4 = fi*ua25.d+t3*ub25.d;
8ae002
     if ((y=fi+(t2-t4))==fi+(t2+t4)) { retval = (sy*y); goto ret; } }
8ae002
 
8ae002
Index: glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mpatan.c
8ae002
===================================================================
8ae002
--- glibc-2.17-c758a686.orig/sysdeps/ieee754/dbl-64/mpatan.c
8ae002
+++ glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mpatan.c
8ae002
@@ -69,8 +69,8 @@ __mpatan(mp_no *x, mp_no *y, int p) {
8ae002
 	{if (dx>__atan_xm[m].d) break;}
8ae002
     }
8ae002
     mpone.e    = mptwo.e    = mptwoim1.e = 1;
8ae002
-    mpone.d[0] = mpone.d[1] = mptwo.d[0] = mptwoim1.d[0] = ONE;
8ae002
-    mptwo.d[1] = TWO;
8ae002
+    mpone.d[0] = mpone.d[1] = mptwo.d[0] = mptwoim1.d[0] = 1;
8ae002
+    mptwo.d[1] = 2;
8ae002
 
8ae002
 				 /* Reduce x m times */
8ae002
     __mul(x,x,&mpsm,p);
8ae002
@@ -92,7 +92,7 @@ __mpatan(mp_no *x, mp_no *y, int p) {
8ae002
     n=__atan_np[p];    mptwoim1.d[1] = __atan_twonm1[p].d;
8ae002
     __dvd(&mpsm,&mptwoim1,&mpt,p);
8ae002
     for (i=n-1; i>1; i--) {
8ae002
-      mptwoim1.d[1] -= TWO;
8ae002
+      mptwoim1.d[1] -= 2;
8ae002
       __dvd(&mpsm,&mptwoim1,&mpt1,p);
8ae002
       __mul(&mpsm,&mpt,&mpt2,p);
8ae002
       __sub(&mpt1,&mpt2,&mpt,p);
8ae002
Index: glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mpsqrt.c
8ae002
===================================================================
8ae002
--- glibc-2.17-c758a686.orig/sysdeps/ieee754/dbl-64/mpsqrt.c
8ae002
+++ glibc-2.17-c758a686/sysdeps/ieee754/dbl-64/mpsqrt.c
8ae002
@@ -62,8 +62,8 @@ __mpsqrt(mp_no *x, mp_no *y, int p) {
8ae002
   mp_no mpxn,mpz,mpu,mpt1,mpt2;
8ae002
 
8ae002
   /* Prepare multi-precision 1/2 and 3/2 */
8ae002
-  mphalf.e  =0;  mphalf.d[0]  =ONE;  mphalf.d[1]  =HALFRAD;
8ae002
-  mp3halfs.e=1;  mp3halfs.d[0]=ONE;  mp3halfs.d[1]=ONE;  mp3halfs.d[2]=HALFRAD;
8ae002
+  mphalf.e  =0;  mphalf.d[0]  =1;  mphalf.d[1]  =HALFRAD;
8ae002
+  mp3halfs.e=1;  mp3halfs.d[0]=1;  mp3halfs.d[1]=1;  mp3halfs.d[2]=HALFRAD;
8ae002
 
8ae002
   ey=EX/2;     __cpy(x,&mpxn,p);    mpxn.e -= (ey+ey);
8ae002
   __mp_dbl(&mpxn,&dx,p);   dy=fastiroot(dx);    __dbl_mp(dy,&mpu,p);