From 1eb4b9fc586e487899fe7495ec4e973d338c436f Mon Sep 17 00:00:00 2001 From: CentOS Sources Date: Nov 05 2019 20:55:23 +0000 Subject: import numpy-1.14.3-9.el8 --- diff --git a/SOURCES/numpy-1.14.3-CVE-2019-6446.patch b/SOURCES/numpy-1.14.3-CVE-2019-6446.patch new file mode 100644 index 0000000..0c82447 --- /dev/null +++ b/SOURCES/numpy-1.14.3-CVE-2019-6446.patch @@ -0,0 +1,166 @@ +From b272ff74ae319a82e4d3e3de1348b2a6115f6501 Mon Sep 17 00:00:00 2001 +From: Paul Ivanov +Date: Wed, 30 Jan 2019 14:22:44 -0800 +Subject: [PATCH] BUG: load fails when using pickle without allow_pickle=True + +a partial mitigation of #12759. + +see also https://nvd.nist.gov/vuln/detail/CVE-2019-6446 +--- + numpy/core/tests/test_regression.py | 2 +- + numpy/lib/format.py | 8 ++++++-- + numpy/lib/npyio.py | 17 ++++++++++++----- + numpy/lib/tests/test_format.py | 15 +++++++++------ + numpy/lib/tests/test_io.py | 2 +- + 5 files changed, 29 insertions(+), 15 deletions(-) + +diff --git a/numpy/core/tests/test_regression.py b/numpy/core/tests/test_regression.py +index 52793d453..b144aec17 100644 +--- a/numpy/core/tests/test_regression.py ++++ b/numpy/core/tests/test_regression.py +@@ -96,7 +96,7 @@ class TestRegression(object): + ca = np.char.array(np.arange(1000, 1010), itemsize=4) + ca.dump(f) + f.seek(0) +- ca = np.load(f) ++ ca = np.load(f, allow_pickle=True) + f.close() + + def test_noncontiguous_fill(self): +diff --git a/numpy/lib/format.py b/numpy/lib/format.py +index 363bb2101..e6144b3a6 100644 +--- a/numpy/lib/format.py ++++ b/numpy/lib/format.py +@@ -602,7 +602,7 @@ def write_array(fp, array, version=None, allow_pickle=True, pickle_kwargs=None): + fp.write(chunk.tobytes('C')) + + +-def read_array(fp, allow_pickle=True, pickle_kwargs=None): ++def read_array(fp, allow_pickle=False, pickle_kwargs=None): + """ + Read an array from an NPY file. + +@@ -612,7 +612,11 @@ def read_array(fp, allow_pickle=True, pickle_kwargs=None): + If this is not a real file object, then this may take extra memory + and time. + allow_pickle : bool, optional +- Whether to allow reading pickled data. Default: True ++ Whether to allow writing pickled data. Default: False ++ ++ .. versionchanged:: 1.14.3 ++ Made default False in response to CVE-2019-6446. ++ + pickle_kwargs : dict + Additional keyword arguments to pass to pickle.load. These are only + useful when loading object arrays saved on Python 2 when using +diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py +index 76b135cc2..ef84d68fd 100644 +--- a/numpy/lib/npyio.py ++++ b/numpy/lib/npyio.py +@@ -130,7 +130,11 @@ class NpzFile(object): + An object on which attribute can be performed as an alternative + to getitem access on the `NpzFile` instance itself. + allow_pickle : bool, optional +- Allow loading pickled data. Default: True ++ Allow loading pickled data. Default: False ++ ++ .. versionchanged:: 1.14.3 ++ Made default False in response to CVE-2019-6446. ++ + pickle_kwargs : dict, optional + Additional keyword arguments to pass on to pickle.load. + These are only useful when loading object arrays saved on +@@ -166,7 +170,7 @@ class NpzFile(object): + + """ + +- def __init__(self, fid, own_fid=False, allow_pickle=True, ++ def __init__(self, fid, own_fid=False, allow_pickle=False, + pickle_kwargs=None): + # Import is postponed to here since zipfile depends on gzip, an + # optional component of the so-called standard library. +@@ -265,7 +269,7 @@ class NpzFile(object): + return self.files.__contains__(key) + + +-def load(file, mmap_mode=None, allow_pickle=True, fix_imports=True, ++def load(file, mmap_mode=None, allow_pickle=False, fix_imports=True, + encoding='ASCII'): + """ + Load arrays or pickled objects from ``.npy``, ``.npz`` or pickled files. +@@ -287,8 +291,11 @@ def load(file, mmap_mode=None, allow_pickle=True, fix_imports=True, + Allow loading pickled object arrays stored in npy files. Reasons for + disallowing pickles include security, as loading pickled data can + execute arbitrary code. If pickles are disallowed, loading object +- arrays will fail. +- Default: True ++ arrays will fail. Default: False ++ ++ .. versionchanged:: 1.14.3 ++ Made default False in response to CVE-2019-6446. ++ + fix_imports : bool, optional + Only useful when loading Python 2 generated pickled files on Python 3, + which includes npy/npz files containing object arrays. If `fix_imports` +diff --git a/numpy/lib/tests/test_format.py b/numpy/lib/tests/test_format.py +index 2d2b4cea2..04e090ca7 100644 +--- a/numpy/lib/tests/test_format.py ++++ b/numpy/lib/tests/test_format.py +@@ -426,7 +426,7 @@ def roundtrip(arr): + f = BytesIO() + format.write_array(f, arr) + f2 = BytesIO(f.getvalue()) +- arr2 = format.read_array(f2) ++ arr2 = format.read_array(f2, allow_pickle=True) + return arr2 + + +@@ -553,7 +553,7 @@ def test_pickle_python2_python3(): + path = os.path.join(data_dir, fname) + + for encoding in ['bytes', 'latin1']: +- data_f = np.load(path, encoding=encoding) ++ data_f = np.load(path, allow_pickle=True, encoding=encoding) + if fname.endswith('.npz'): + data = data_f['x'] + data_f.close() +@@ -575,16 +575,19 @@ def test_pickle_python2_python3(): + if sys.version_info[0] >= 3: + if fname.startswith('py2'): + if fname.endswith('.npz'): +- data = np.load(path) ++ data = np.load(path, allow_pickle=True) + assert_raises(UnicodeError, data.__getitem__, 'x') + data.close() +- data = np.load(path, fix_imports=False, encoding='latin1') ++ data = np.load(path, allow_pickle=True, fix_imports=False, ++ encoding='latin1') + assert_raises(ImportError, data.__getitem__, 'x') + data.close() + else: +- assert_raises(UnicodeError, np.load, path) ++ assert_raises(UnicodeError, np.load, path, ++ allow_pickle=True) + assert_raises(ImportError, np.load, path, +- encoding='latin1', fix_imports=False) ++ allow_pickle=True, fix_imports=False, ++ encoding='latin1') + + + def test_pickle_disallow(): +diff --git a/numpy/lib/tests/test_io.py b/numpy/lib/tests/test_io.py +index 2daa0153b..bde256741 100644 +--- a/numpy/lib/tests/test_io.py ++++ b/numpy/lib/tests/test_io.py +@@ -87,7 +87,7 @@ class RoundtripTest(object): + + """ + save_kwds = kwargs.get('save_kwds', {}) +- load_kwds = kwargs.get('load_kwds', {}) ++ load_kwds = kwargs.get('load_kwds', {"allow_pickle": True}) + file_on_disk = kwargs.get('file_on_disk', False) + + if file_on_disk: +-- +2.21.0 + diff --git a/SOURCES/numpy-1.14.3-float128.patch b/SOURCES/numpy-1.14.3-float128.patch new file mode 100644 index 0000000..998074c --- /dev/null +++ b/SOURCES/numpy-1.14.3-float128.patch @@ -0,0 +1,2868 @@ +From 5c6a1415c8b227f10f388f2b2632afa96287fb49 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Nikola=20Forr=C3=B3?= +Date: Wed, 20 Mar 2019 15:52:48 +0100 +Subject: [PATCH] Fix broken float128 on all arches except x86_64 + +Backport of the following upstream commits: + +3d21112d6 MAINT: Add bitmask helper functions +4d7c529d7 MAINT: Reorganize Dragon4 code to clarify float formats +727756e63 MAINT: replace static BigInts in dragon4 by dummy manager +888d37086 MAINT: Add comments to long_double detection code +cdc6b6847 BUG: Implement float128 dragon4 for IBM double-double (ppc64) +718d9e866 ENH: Add support for the 64-bit RISC-V architecture +b35dfc2d5 ENH: Add AARCH32 support. +3973b2508 Adjust endianness header file to accommodate AARCHXX changes. +f07359b22 BLD: Modify cpu detection and function to get build working on aarch64 (#11568) +dd949d5c6 BUG: Fix printing of longdouble on ppc64le. +641058fb7 MAINT: remove darwin hardcoded LDOUBLE detection +ec843c700 BUG: Fix undefined functions on big-endian systems. +00bc5bc88 BUG: Fix test sensitive to platform byte order. +--- + numpy/core/include/numpy/npy_cpu.h | 30 +- + numpy/core/include/numpy/npy_endian.h | 44 +- + numpy/core/setup.py | 13 +- + numpy/core/setup_common.py | 18 +- + numpy/core/src/multiarray/dragon4.c | 1613 ++++++++++++------- + numpy/core/src/multiarray/dragon4.h | 66 +- + numpy/core/src/multiarray/scalartypes.c.src | 5 +- + numpy/core/src/npymath/ieee754.c.src | 3 +- + numpy/core/src/npymath/npy_math_private.h | 9 +- + numpy/core/src/private/npy_config.h | 3 +- + numpy/core/src/private/npy_fpmath.h | 41 +- + numpy/core/tests/test_scalarprint.py | 63 +- + numpy/core/tests/test_umath.py | 12 +- + numpy/ma/tests/test_core.py | 11 +- + 14 files changed, 1277 insertions(+), 654 deletions(-) + +diff --git a/numpy/core/include/numpy/npy_cpu.h b/numpy/core/include/numpy/npy_cpu.h +index 84653ea18..1109426da 100644 +--- a/numpy/core/include/numpy/npy_cpu.h ++++ b/numpy/core/include/numpy/npy_cpu.h +@@ -17,6 +17,7 @@ + * NPY_CPU_SH_BE + * NPY_CPU_ARCEL + * NPY_CPU_ARCEB ++ * NPY_CPU_RISCV64 + */ + #ifndef _NPY_CPUARCH_H_ + #define _NPY_CPUARCH_H_ +@@ -60,10 +61,27 @@ + #define NPY_CPU_HPPA + #elif defined(__alpha__) + #define NPY_CPU_ALPHA +-#elif defined(__arm__) && defined(__ARMEL__) +- #define NPY_CPU_ARMEL +-#elif defined(__arm__) && defined(__ARMEB__) +- #define NPY_CPU_ARMEB ++#elif defined(__arm__) || defined(__aarch64__) ++ #if defined(__ARMEB__) || defined(__AARCH64EB__) ++ #if defined(__ARM_32BIT_STATE) ++ #define NPY_CPU_ARMEB_AARCH32 ++ #elif defined(__ARM_64BIT_STATE) ++ #define NPY_CPU_ARMEB_AARCH64 ++ #else ++ #define NPY_CPU_ARMEB ++ #endif ++ #elif defined(__ARMEL__) || defined(__AARCH64EL__) ++ #if defined(__ARM_32BIT_STATE) ++ #define NPY_CPU_ARMEL_AARCH32 ++ #elif defined(__ARM_64BIT_STATE) ++ #define NPY_CPU_ARMEL_AARCH64 ++ #else ++ #define NPY_CPU_ARMEL ++ #endif ++ #else ++ # error Unknown ARM CPU, please report this to numpy maintainers with \ ++ information about your platform (OS, CPU and compiler) ++ #endif + #elif defined(__sh__) && defined(__LITTLE_ENDIAN__) + #define NPY_CPU_SH_LE + #elif defined(__sh__) && defined(__BIG_ENDIAN__) +@@ -74,14 +92,14 @@ + #define NPY_CPU_MIPSEB + #elif defined(__or1k__) + #define NPY_CPU_OR1K +-#elif defined(__aarch64__) +- #define NPY_CPU_AARCH64 + #elif defined(__mc68000__) + #define NPY_CPU_M68K + #elif defined(__arc__) && defined(__LITTLE_ENDIAN__) + #define NPY_CPU_ARCEL + #elif defined(__arc__) && defined(__BIG_ENDIAN__) + #define NPY_CPU_ARCEB ++#elif defined(__riscv) && defined(__riscv_xlen) && __riscv_xlen == 64 ++ #define NPY_CPU_RISCV64 + #else + #error Unknown CPU, please report this to numpy maintainers with \ + information about your platform (OS, CPU and compiler) +diff --git a/numpy/core/include/numpy/npy_endian.h b/numpy/core/include/numpy/npy_endian.h +index 1a42121db..44cdffd14 100644 +--- a/numpy/core/include/numpy/npy_endian.h ++++ b/numpy/core/include/numpy/npy_endian.h +@@ -37,27 +37,31 @@ + #define NPY_LITTLE_ENDIAN 1234 + #define NPY_BIG_ENDIAN 4321 + +- #if defined(NPY_CPU_X86) \ +- || defined(NPY_CPU_AMD64) \ +- || defined(NPY_CPU_IA64) \ +- || defined(NPY_CPU_ALPHA) \ +- || defined(NPY_CPU_ARMEL) \ +- || defined(NPY_CPU_AARCH64) \ +- || defined(NPY_CPU_SH_LE) \ +- || defined(NPY_CPU_MIPSEL) \ +- || defined(NPY_CPU_PPC64LE) \ +- || defined(NPY_CPU_ARCEL) ++ #if defined(NPY_CPU_X86) \ ++ || defined(NPY_CPU_AMD64) \ ++ || defined(NPY_CPU_IA64) \ ++ || defined(NPY_CPU_ALPHA) \ ++ || defined(NPY_CPU_ARMEL) \ ++ || defined(NPY_CPU_ARMEL_AARCH32) \ ++ || defined(NPY_CPU_ARMEL_AARCH64) \ ++ || defined(NPY_CPU_SH_LE) \ ++ || defined(NPY_CPU_MIPSEL) \ ++ || defined(NPY_CPU_PPC64LE) \ ++ || defined(NPY_CPU_ARCEL) \ ++ || defined(NPY_CPU_RISCV64) + #define NPY_BYTE_ORDER NPY_LITTLE_ENDIAN +- #elif defined(NPY_CPU_PPC) \ +- || defined(NPY_CPU_SPARC) \ +- || defined(NPY_CPU_S390) \ +- || defined(NPY_CPU_HPPA) \ +- || defined(NPY_CPU_PPC64) \ +- || defined(NPY_CPU_ARMEB) \ +- || defined(NPY_CPU_SH_BE) \ +- || defined(NPY_CPU_MIPSEB) \ +- || defined(NPY_CPU_OR1K) \ +- || defined(NPY_CPU_M68K) \ ++ #elif defined(NPY_CPU_PPC) \ ++ || defined(NPY_CPU_SPARC) \ ++ || defined(NPY_CPU_S390) \ ++ || defined(NPY_CPU_HPPA) \ ++ || defined(NPY_CPU_PPC64) \ ++ || defined(NPY_CPU_ARMEB) \ ++ || defined(NPY_CPU_ARMEB_AARCH32) \ ++ || defined(NPY_CPU_ARMEB_AARCH64) \ ++ || defined(NPY_CPU_SH_BE) \ ++ || defined(NPY_CPU_MIPSEB) \ ++ || defined(NPY_CPU_OR1K) \ ++ || defined(NPY_CPU_M68K) \ + || defined(NPY_CPU_ARCEB) + #define NPY_BYTE_ORDER NPY_BIG_ENDIAN + #else +diff --git a/numpy/core/setup.py b/numpy/core/setup.py +index 371df5bec..cc1bb5ba7 100644 +--- a/numpy/core/setup.py ++++ b/numpy/core/setup.py +@@ -452,17 +452,8 @@ def configuration(parent_package='',top_path=None): + moredefs.append(('NPY_RELAXED_STRIDES_DEBUG', 1)) + + # Get long double representation +- if sys.platform != 'darwin': +- rep = check_long_double_representation(config_cmd) +- if rep in ['INTEL_EXTENDED_12_BYTES_LE', +- 'INTEL_EXTENDED_16_BYTES_LE', +- 'MOTOROLA_EXTENDED_12_BYTES_BE', +- 'IEEE_QUAD_LE', 'IEEE_QUAD_BE', +- 'IEEE_DOUBLE_LE', 'IEEE_DOUBLE_BE', +- 'DOUBLE_DOUBLE_BE', 'DOUBLE_DOUBLE_LE']: +- moredefs.append(('HAVE_LDOUBLE_%s' % rep, 1)) +- else: +- raise ValueError("Unrecognized long double format: %s" % rep) ++ rep = check_long_double_representation(config_cmd) ++ moredefs.append(('HAVE_LDOUBLE_%s' % rep, 1)) + + # Py3K check + if sys.version_info[0] == 3: +diff --git a/numpy/core/setup_common.py b/numpy/core/setup_common.py +index bd093c5c8..343318cd3 100644 +--- a/numpy/core/setup_common.py ++++ b/numpy/core/setup_common.py +@@ -333,9 +333,9 @@ _MOTOROLA_EXTENDED_12B = ['300', '031', '000', '000', '353', '171', + _IEEE_QUAD_PREC_BE = ['300', '031', '326', '363', '105', '100', '000', '000', + '000', '000', '000', '000', '000', '000', '000', '000'] + _IEEE_QUAD_PREC_LE = _IEEE_QUAD_PREC_BE[::-1] +-_DOUBLE_DOUBLE_BE = (['301', '235', '157', '064', '124', '000', '000', '000'] + ++_IBM_DOUBLE_DOUBLE_BE = (['301', '235', '157', '064', '124', '000', '000', '000'] + + ['000'] * 8) +-_DOUBLE_DOUBLE_LE = (['000', '000', '000', '124', '064', '157', '235', '301'] + ++_IBM_DOUBLE_DOUBLE_LE = (['000', '000', '000', '124', '064', '157', '235', '301'] + + ['000'] * 8) + + def long_double_representation(lines): +@@ -362,11 +362,16 @@ def long_double_representation(lines): + # the long double + if read[-8:] == _AFTER_SEQ: + saw = copy.copy(read) ++ # if the content was 12 bytes, we only have 32 - 8 - 12 = 12 ++ # "before" bytes. In other words the first 4 "before" bytes went ++ # past the sliding window. + if read[:12] == _BEFORE_SEQ[4:]: + if read[12:-8] == _INTEL_EXTENDED_12B: + return 'INTEL_EXTENDED_12_BYTES_LE' + if read[12:-8] == _MOTOROLA_EXTENDED_12B: + return 'MOTOROLA_EXTENDED_12_BYTES_BE' ++ # if the content was 16 bytes, we are left with 32-8-16 = 16 ++ # "before" bytes, so 8 went past the sliding window. + elif read[:8] == _BEFORE_SEQ[8:]: + if read[8:-8] == _INTEL_EXTENDED_16B: + return 'INTEL_EXTENDED_16_BYTES_LE' +@@ -374,10 +379,11 @@ def long_double_representation(lines): + return 'IEEE_QUAD_BE' + elif read[8:-8] == _IEEE_QUAD_PREC_LE: + return 'IEEE_QUAD_LE' +- elif read[8:-8] == _DOUBLE_DOUBLE_BE: +- return 'DOUBLE_DOUBLE_BE' +- elif read[8:-8] == _DOUBLE_DOUBLE_LE: +- return 'DOUBLE_DOUBLE_LE' ++ elif read[8:-8] == _IBM_DOUBLE_DOUBLE_LE: ++ return 'IBM_DOUBLE_DOUBLE_LE' ++ elif read[8:-8] == _IBM_DOUBLE_DOUBLE_BE: ++ return 'IBM_DOUBLE_DOUBLE_BE' ++ # if the content was 8 bytes, left with 32-8-8 = 16 bytes + elif read[:16] == _BEFORE_SEQ: + if read[16:-8] == _IEEE_DOUBLE_LE: + return 'IEEE_DOUBLE_LE' +diff --git a/numpy/core/src/multiarray/dragon4.c b/numpy/core/src/multiarray/dragon4.c +index e005234a0..e3da79f40 100644 +--- a/numpy/core/src/multiarray/dragon4.c ++++ b/numpy/core/src/multiarray/dragon4.c +@@ -42,6 +42,18 @@ + #define DEBUG_ASSERT(stmnt) do {} while(0) + #endif + ++static inline npy_uint64 ++bitmask_u64(npy_uint32 n) ++{ ++ return ~(~((npy_uint64)0) << n); ++} ++ ++static inline npy_uint32 ++bitmask_u32(npy_uint32 n) ++{ ++ return ~(~((npy_uint32)0) << n); ++} ++ + /* + * Get the log base 2 of a 32-bit unsigned integer. + * http://graphics.stanford.edu/~seander/bithacks.html#IntegerLogLookup +@@ -102,6 +114,17 @@ LogBase2_64(npy_uint64 val) + return LogBase2_32((npy_uint32)val); + } + ++#if defined(HAVE_LDOUBLE_IEEE_QUAD_LE) || defined(HAVE_LDOUBLE_IEEE_QUAD_BE) ++static npy_uint32 ++LogBase2_128(npy_uint64 hi, npy_uint64 lo) ++{ ++ if (hi) { ++ return 64 + LogBase2_64(hi); ++ } ++ ++ return LogBase2_64(lo); ++} ++#endif /* HAVE_LDOUBLE_IEEE_QUAD_LE */ + + /* + * Maximum number of 32 bit blocks needed in high precision arithmetic to print +@@ -122,6 +145,45 @@ typedef struct BigInt { + npy_uint32 blocks[c_BigInt_MaxBlocks]; + } BigInt; + ++/* ++ * Dummy implementation of a memory manager for BigInts. Currently, only ++ * supports a single call to Dragon4, but that is OK because Dragon4 ++ * does not release the GIL. ++ * ++ * We try to raise an error anyway if dragon4 re-enters, and this code serves ++ * as a placeholder if we want to make it re-entrant in the future. ++ * ++ * Each call to dragon4 uses 7 BigInts. ++ */ ++#define BIGINT_DRAGON4_GROUPSIZE 7 ++typedef struct { ++ BigInt bigints[BIGINT_DRAGON4_GROUPSIZE]; ++ char repr[16384]; ++} Dragon4_Scratch; ++ ++static int _bigint_static_in_use = 0; ++static Dragon4_Scratch _bigint_static; ++ ++static Dragon4_Scratch* ++get_dragon4_bigint_scratch(void) { ++ /* this test+set is not threadsafe, but no matter because we have GIL */ ++ if (_bigint_static_in_use) { ++ PyErr_SetString(PyExc_RuntimeError, ++ "numpy float printing code is not re-entrant. " ++ "Ping the devs to fix it."); ++ return NULL; ++ } ++ _bigint_static_in_use = 1; ++ ++ /* in this dummy implementation we only return the static allocation */ ++ return &_bigint_static; ++} ++ ++static void ++free_dragon4_bigint_scratch(Dragon4_Scratch *mem){ ++ _bigint_static_in_use = 0; ++} ++ + /* Copy integer */ + static void + BigInt_Copy(BigInt *dst, const BigInt *src) +@@ -139,32 +201,86 @@ BigInt_Copy(BigInt *dst, const BigInt *src) + static void + BigInt_Set_uint64(BigInt *i, npy_uint64 val) + { +- if (val > 0xFFFFFFFF) { +- i->blocks[0] = val & 0xFFFFFFFF; +- i->blocks[1] = (val >> 32) & 0xFFFFFFFF; ++ if (val > bitmask_u64(32)) { ++ i->blocks[0] = val & bitmask_u64(32); ++ i->blocks[1] = (val >> 32) & bitmask_u64(32); + i->length = 2; + } + else if (val != 0) { +- i->blocks[0] = val & 0xFFFFFFFF; ++ i->blocks[0] = val & bitmask_u64(32); ++ i->length = 1; ++ } ++ else { ++ i->length = 0; ++ } ++} ++ ++#if (defined(HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_LE) || \ ++ defined(HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_BE) || \ ++ defined(HAVE_LDOUBLE_IEEE_QUAD_LE) || \ ++ defined(HAVE_LDOUBLE_IEEE_QUAD_BE)) ++static void ++BigInt_Set_2x_uint64(BigInt *i, npy_uint64 hi, npy_uint64 lo) ++{ ++ if (hi > bitmask_u64(32)) { ++ i->length = 4; ++ } ++ else if (hi != 0) { ++ i->length = 3; ++ } ++ else if (lo > bitmask_u64(32)) { ++ i->length = 2; ++ } ++ else if (lo != 0) { + i->length = 1; + } + else { + i->length = 0; + } ++ ++ switch (i->length) { ++ case 4: ++ i->blocks[3] = (hi >> 32) & bitmask_u64(32); ++ case 3: ++ i->blocks[2] = hi & bitmask_u64(32); ++ case 2: ++ i->blocks[1] = (lo >> 32) & bitmask_u64(32); ++ case 1: ++ i->blocks[0] = lo & bitmask_u64(32); ++ } + } ++#endif /* DOUBLE_DOUBLE and QUAD */ + + static void + BigInt_Set_uint32(BigInt *i, npy_uint32 val) + { + if (val != 0) { + i->blocks[0] = val; +- i->length = (val != 0); ++ i->length = 1; + } + else { + i->length = 0; + } + } + ++/* ++ * Returns 1 if the value is zero ++ */ ++static int ++BigInt_IsZero(const BigInt *i) ++{ ++ return i->length == 0; ++} ++ ++/* ++ * Returns 1 if the value is even ++ */ ++static int ++BigInt_IsEven(const BigInt *i) ++{ ++ return (i->length == 0) || ( (i->blocks[0] % 2) == 0); ++} ++ + /* + * Returns 0 if (lhs = rhs), negative if (lhs < rhs), positive if (lhs > rhs) + */ +@@ -228,7 +344,7 @@ BigInt_Add(BigInt *result, const BigInt *lhs, const BigInt *rhs) + npy_uint64 sum = carry + (npy_uint64)(*largeCur) + + (npy_uint64)(*smallCur); + carry = sum >> 32; +- *resultCur = sum & 0xFFFFFFFF; ++ *resultCur = sum & bitmask_u64(32); + ++largeCur; + ++smallCur; + ++resultCur; +@@ -238,7 +354,7 @@ BigInt_Add(BigInt *result, const BigInt *lhs, const BigInt *rhs) + while (largeCur != largeEnd) { + npy_uint64 sum = carry + (npy_uint64)(*largeCur); + carry = sum >> 32; +- (*resultCur) = sum & 0xFFFFFFFF; ++ (*resultCur) = sum & bitmask_u64(32); + ++largeCur; + ++resultCur; + } +@@ -307,13 +423,13 @@ BigInt_Multiply(BigInt *result, const BigInt *lhs, const BigInt *rhs) + npy_uint64 product = (*resultCur) + + (*largeCur)*(npy_uint64)multiplier + carry; + carry = product >> 32; +- *resultCur = product & 0xFFFFFFFF; ++ *resultCur = product & bitmask_u64(32); + ++largeCur; + ++resultCur; + } while(largeCur != large->blocks + large->length); + + DEBUG_ASSERT(resultCur < result->blocks + maxResultLen); +- *resultCur = (npy_uint32)(carry & 0xFFFFFFFF); ++ *resultCur = (npy_uint32)(carry & bitmask_u64(32)); + } + } + +@@ -337,7 +453,7 @@ BigInt_Multiply_int(BigInt *result, const BigInt *lhs, npy_uint32 rhs) + const npy_uint32 *pLhsEnd = lhs->blocks + lhs->length; + for ( ; pLhsCur != pLhsEnd; ++pLhsCur, ++resultCur) { + npy_uint64 product = (npy_uint64)(*pLhsCur) * rhs + carry; +- *resultCur = (npy_uint32)(product & 0xFFFFFFFF); ++ *resultCur = (npy_uint32)(product & bitmask_u64(32)); + carry = product >> 32; + } + +@@ -414,7 +530,7 @@ BigInt_Multiply10(BigInt *result) + npy_uint32 *end = result->blocks + result->length; + for ( ; cur != end; ++cur) { + npy_uint64 product = (npy_uint64)(*cur) * 10ull + carry; +- (*cur) = (npy_uint32)(product & 0xFFFFFFFF); ++ (*cur) = (npy_uint32)(product & bitmask_u64(32)); + carry = product >> 32; + } + +@@ -637,13 +753,11 @@ static BigInt g_PowerOf10_Big[] = + + /* result = 10^exponent */ + static void +-BigInt_Pow10(BigInt *result, npy_uint32 exponent) ++BigInt_Pow10(BigInt *result, npy_uint32 exponent, BigInt *temp) + { +- /* create two temporary values to reduce large integer copy operations */ +- BigInt temp1; +- BigInt temp2; +- BigInt *curTemp = &temp1; +- BigInt *pNextTemp = &temp2; ++ /* use two temporary values to reduce large integer copy operations */ ++ BigInt *curTemp = result; ++ BigInt *pNextTemp = temp; + npy_uint32 smallExponent; + npy_uint32 tableIdx = 0; + +@@ -654,7 +768,7 @@ BigInt_Pow10(BigInt *result, npy_uint32 exponent) + * initialize the result by looking up a 32-bit power of 10 corresponding to + * the first 3 bits + */ +- smallExponent = exponent & 0x7; ++ smallExponent = exponent & bitmask_u32(3); + BigInt_Set_uint32(curTemp, g_PowerOf10_U32[smallExponent]); + + /* remove the low bits that we used for the 32-bit lookup table */ +@@ -681,19 +795,17 @@ BigInt_Pow10(BigInt *result, npy_uint32 exponent) + } + + /* output the result */ +- BigInt_Copy(result, curTemp); ++ if (curTemp != result) { ++ BigInt_Copy(result, curTemp); ++ } + } + +-/* result = in * 10^exponent */ ++/* in = in * 10^exponent */ + static void +-BigInt_MultiplyPow10(BigInt *result, const BigInt *in, npy_uint32 exponent) ++BigInt_MultiplyPow10(BigInt *in, npy_uint32 exponent, BigInt *temp) + { +- +- /* create two temporary values to reduce large integer copy operations */ +- BigInt temp1; +- BigInt temp2; +- BigInt *curTemp = &temp1; +- BigInt *pNextTemp = &temp2; ++ /* use two temporary values to reduce large integer copy operations */ ++ BigInt *curTemp, *pNextTemp; + npy_uint32 smallExponent; + npy_uint32 tableIdx = 0; + +@@ -704,12 +816,15 @@ BigInt_MultiplyPow10(BigInt *result, const BigInt *in, npy_uint32 exponent) + * initialize the result by looking up a 32-bit power of 10 corresponding to + * the first 3 bits + */ +- smallExponent = exponent & 0x7; ++ smallExponent = exponent & bitmask_u32(3); + if (smallExponent != 0) { +- BigInt_Multiply_int(curTemp, in, g_PowerOf10_U32[smallExponent]); ++ BigInt_Multiply_int(temp, in, g_PowerOf10_U32[smallExponent]); ++ curTemp = temp; ++ pNextTemp = in; + } + else { +- BigInt_Copy(curTemp, in); ++ curTemp = in; ++ pNextTemp = temp; + } + + /* remove the low bits that we used for the 32-bit lookup table */ +@@ -724,7 +839,7 @@ BigInt_MultiplyPow10(BigInt *result, const BigInt *in, npy_uint32 exponent) + /* multiply into the next temporary */ + BigInt_Multiply(pNextTemp, curTemp, &g_PowerOf10_Big[tableIdx]); + +- // swap to the next temporary ++ /* swap to the next temporary */ + pSwap = curTemp; + curTemp = pNextTemp; + pNextTemp = pSwap; +@@ -736,7 +851,9 @@ BigInt_MultiplyPow10(BigInt *result, const BigInt *in, npy_uint32 exponent) + } + + /* output the result */ +- BigInt_Copy(result, curTemp); ++ if (curTemp != in){ ++ BigInt_Copy(in, curTemp); ++ } + } + + /* result = 2^exponent */ +@@ -788,7 +905,7 @@ BigInt_DivideWithRemainder_MaxQuotient9(BigInt *dividend, const BigInt *divisor) + */ + DEBUG_ASSERT(!divisor->length == 0 && + divisor->blocks[divisor->length-1] >= 8 && +- divisor->blocks[divisor->length-1] < 0xFFFFFFFF && ++ divisor->blocks[divisor->length-1] < bitmask_u64(32) && + dividend->length <= divisor->length); + + /* +@@ -825,10 +942,10 @@ BigInt_DivideWithRemainder_MaxQuotient9(BigInt *dividend, const BigInt *divisor) + carry = product >> 32; + + difference = (npy_uint64)*dividendCur +- - (product & 0xFFFFFFFF) - borrow; ++ - (product & bitmask_u64(32)) - borrow; + borrow = (difference >> 32) & 1; + +- *dividendCur = difference & 0xFFFFFFFF; ++ *dividendCur = difference & bitmask_u64(32); + + ++divisorCur; + ++dividendCur; +@@ -860,7 +977,7 @@ BigInt_DivideWithRemainder_MaxQuotient9(BigInt *dividend, const BigInt *divisor) + - (npy_uint64)*divisorCur - borrow; + borrow = (difference >> 32) & 1; + +- *dividendCur = difference & 0xFFFFFFFF; ++ *dividendCur = difference & bitmask_u64(32); + + ++divisorCur; + ++dividendCur; +@@ -993,12 +1110,20 @@ BigInt_ShiftLeft(BigInt *result, npy_uint32 shift) + * There is some more documentation of these changes on Ryan Juckett's website + * at http://www.ryanjuckett.com/programming/printing-floating-point-numbers/ + * +- * Ryan Juckett's implementation did not implement "IEEE unbiased rounding", +- * except in the last digit. This has been added back, following the Burger & +- * Dybvig code, using the isEven variable. ++ * This code also has a few implementation differences from Ryan Juckett's ++ * version: ++ * 1. fixed overflow problems when mantissa was 64 bits (in float128 types), ++ * by replacing multiplication by 2 or 4 by BigInt_ShiftLeft calls. ++ * 2. Increased c_BigInt_MaxBlocks, for 128-bit floats ++ * 3. Added more entries to the g_PowerOf10_Big table, for 128-bit floats. ++ * 4. Added unbiased rounding calculation with isEven. Ryan Juckett's ++ * implementation did not implement "IEEE unbiased rounding", except in the ++ * last digit. This has been added back, following the Burger & Dybvig ++ * code, using the isEven variable. + * + * Arguments: +- * * mantissa - value significand ++ * * bigints - memory to store all bigints needed (7) for dragon4 computation. ++ * The first BigInt should be filled in with the mantissa. + * * exponent - value exponent in base 2 + * * mantissaBit - index of the highest set mantissa bit + * * hasUnequalMargins - is the high margin twice as large as the low margin +@@ -1007,9 +1132,11 @@ BigInt_ShiftLeft(BigInt *result, npy_uint32 shift) + * * pOutBuffer - buffer to output into + * * bufferSize - maximum characters that can be printed to pOutBuffer + * * pOutExponent - the base 10 exponent of the first digit ++ * ++ * Returns the number of digits written to the output buffer. + */ + static npy_uint32 +-Dragon4(const npy_uint64 mantissa, const npy_int32 exponent, ++Dragon4(BigInt *bigints, const npy_int32 exponent, + const npy_uint32 mantissaBit, const npy_bool hasUnequalMargins, + const DigitMode digitMode, const CutoffMode cutoffMode, + npy_int32 cutoffNumber, char *pOutBuffer, +@@ -1025,21 +1152,24 @@ Dragon4(const npy_uint64 mantissa, const npy_int32 exponent, + * Here, marginLow and marginHigh represent 1/2 of the distance to the next + * floating point value above/below the mantissa. + * +- * scaledMarginHigh is a pointer so that it can point to scaledMarginLow in +- * the case they must be equal to each other, otherwise it will point to +- * optionalMarginHigh. ++ * scaledMarginHigh will point to scaledMarginLow in the case they must be ++ * equal to each other, otherwise it will point to optionalMarginHigh. + */ +- BigInt scale; +- BigInt scaledValue; +- BigInt scaledMarginLow; ++ BigInt *mantissa = &bigints[0]; /* the only initialized bigint */ ++ BigInt *scale = &bigints[1]; ++ BigInt *scaledValue = &bigints[2]; ++ BigInt *scaledMarginLow = &bigints[3]; + BigInt *scaledMarginHigh; +- BigInt optionalMarginHigh; ++ BigInt *optionalMarginHigh = &bigints[4]; ++ ++ BigInt *temp1 = &bigints[5]; ++ BigInt *temp2 = &bigints[6]; + + const npy_float64 log10_2 = 0.30102999566398119521373889472449; + npy_int32 digitExponent, cutoffExponent, hiBlock; + npy_uint32 outputDigit; /* current digit being output */ + npy_uint32 outputLen; +- npy_bool isEven = (mantissa % 2) == 0; ++ npy_bool isEven = BigInt_IsEven(mantissa); + npy_int32 cmp; + + /* values used to determine how to round */ +@@ -1048,12 +1178,14 @@ Dragon4(const npy_uint64 mantissa, const npy_int32 exponent, + DEBUG_ASSERT(bufferSize > 0); + + /* if the mantissa is zero, the value is zero regardless of the exponent */ +- if (mantissa == 0) { ++ if (BigInt_IsZero(mantissa)) { + *curDigit = '0'; + *pOutExponent = 0; + return 1; + } + ++ BigInt_Copy(scaledValue, mantissa); ++ + if (hasUnequalMargins) { + /* if we have no fractional component */ + if (exponent > 0) { +@@ -1067,17 +1199,13 @@ Dragon4(const npy_uint64 mantissa, const npy_int32 exponent, + */ + + /* scaledValue = 2 * 2 * mantissa*2^exponent */ +- BigInt_Set_uint64(&scaledValue, mantissa); +- BigInt_ShiftLeft(&scaledValue, exponent + 2); +- ++ BigInt_ShiftLeft(scaledValue, exponent + 2); + /* scale = 2 * 2 * 1 */ +- BigInt_Set_uint32(&scale, 4); +- ++ BigInt_Set_uint32(scale, 4); + /* scaledMarginLow = 2 * 2^(exponent-1) */ +- BigInt_Pow2(&scaledMarginLow, exponent); +- ++ BigInt_Pow2(scaledMarginLow, exponent); + /* scaledMarginHigh = 2 * 2 * 2^(exponent-1) */ +- BigInt_Pow2(&optionalMarginHigh, exponent + 1); ++ BigInt_Pow2(optionalMarginHigh, exponent + 1); + } + /* else we have a fractional exponent */ + else { +@@ -1087,34 +1215,27 @@ Dragon4(const npy_uint64 mantissa, const npy_int32 exponent, + */ + + /* scaledValue = 2 * 2 * mantissa */ +- BigInt_Set_uint64(&scaledValue, mantissa); +- BigInt_ShiftLeft(&scaledValue, 2); +- ++ BigInt_ShiftLeft(scaledValue, 2); + /* scale = 2 * 2 * 2^(-exponent) */ +- BigInt_Pow2(&scale, -exponent + 2); +- ++ BigInt_Pow2(scale, -exponent + 2); + /* scaledMarginLow = 2 * 2^(-1) */ +- BigInt_Set_uint32(&scaledMarginLow, 1); +- ++ BigInt_Set_uint32(scaledMarginLow, 1); + /* scaledMarginHigh = 2 * 2 * 2^(-1) */ +- BigInt_Set_uint32(&optionalMarginHigh, 2); ++ BigInt_Set_uint32(optionalMarginHigh, 2); + } + + /* the high and low margins are different */ +- scaledMarginHigh = &optionalMarginHigh; ++ scaledMarginHigh = optionalMarginHigh; + } + else { + /* if we have no fractional component */ + if (exponent > 0) { + /* scaledValue = 2 * mantissa*2^exponent */ +- BigInt_Set_uint64(&scaledValue, mantissa); +- BigInt_ShiftLeft(&scaledValue, exponent + 1); +- ++ BigInt_ShiftLeft(scaledValue, exponent + 1); + /* scale = 2 * 1 */ +- BigInt_Set_uint32(&scale, 2); +- ++ BigInt_Set_uint32(scale, 2); + /* scaledMarginLow = 2 * 2^(exponent-1) */ +- BigInt_Pow2(&scaledMarginLow, exponent); ++ BigInt_Pow2(scaledMarginLow, exponent); + } + /* else we have a fractional exponent */ + else { +@@ -1124,18 +1245,15 @@ Dragon4(const npy_uint64 mantissa, const npy_int32 exponent, + */ + + /* scaledValue = 2 * mantissa */ +- BigInt_Set_uint64(&scaledValue, mantissa); +- BigInt_ShiftLeft(&scaledValue, 1); +- ++ BigInt_ShiftLeft(scaledValue, 1); + /* scale = 2 * 2^(-exponent) */ +- BigInt_Pow2(&scale, -exponent + 1); +- ++ BigInt_Pow2(scale, -exponent + 1); + /* scaledMarginLow = 2 * 2^(-1) */ +- BigInt_Set_uint32(&scaledMarginLow, 1); ++ BigInt_Set_uint32(scaledMarginLow, 1); + } + + /* the high and low margins are equal */ +- scaledMarginHigh = &scaledMarginLow; ++ scaledMarginHigh = scaledMarginLow; + } + + /* +@@ -1158,6 +1276,9 @@ Dragon4(const npy_uint64 mantissa, const npy_int32 exponent, + * <= log10(v) + log10(2) + * floor(log10(v)) < ceil((mantissaBit + exponent) * log10(2)) + * <= floor(log10(v)) + 1 ++ * ++ * Warning: This calculation assumes npy_float64 is an IEEE-binary64 ++ * float. This line may need to be updated if this is not the case. + */ + digitExponent = (npy_int32)( + ceil((npy_float64)((npy_int32)mantissaBit + exponent) * log10_2 - 0.69)); +@@ -1179,31 +1300,29 @@ Dragon4(const npy_uint64 mantissa, const npy_int32 exponent, + /* Divide value by 10^digitExponent. */ + if (digitExponent > 0) { + /* A positive exponent creates a division so we multiply the scale. */ +- BigInt temp; +- BigInt_MultiplyPow10(&temp, &scale, digitExponent); +- BigInt_Copy(&scale, &temp); ++ BigInt_MultiplyPow10(scale, digitExponent, temp1); + } + else if (digitExponent < 0) { + /* + * A negative exponent creates a multiplication so we multiply up the + * scaledValue, scaledMarginLow and scaledMarginHigh. + */ +- BigInt pow10, temp; +- BigInt_Pow10(&pow10, -digitExponent); ++ BigInt *temp=temp1, *pow10=temp2; ++ BigInt_Pow10(pow10, -digitExponent, temp); + +- BigInt_Multiply(&temp, &scaledValue, &pow10); +- BigInt_Copy(&scaledValue, &temp); ++ BigInt_Multiply(temp, scaledValue, pow10); ++ BigInt_Copy(scaledValue, temp); + +- BigInt_Multiply(&temp, &scaledMarginLow, &pow10); +- BigInt_Copy(&scaledMarginLow, &temp); ++ BigInt_Multiply(temp, scaledMarginLow, pow10); ++ BigInt_Copy(scaledMarginLow, temp); + +- if (scaledMarginHigh != &scaledMarginLow) { +- BigInt_Multiply2(scaledMarginHigh, &scaledMarginLow); ++ if (scaledMarginHigh != scaledMarginLow) { ++ BigInt_Multiply2(scaledMarginHigh, scaledMarginLow); + } + } + + /* If (value >= 1), our estimate for digitExponent was too low */ +- if (BigInt_Compare(&scaledValue, &scale) >= 0) { ++ if (BigInt_Compare(scaledValue, scale) >= 0) { + /* + * The exponent estimate was incorrect. + * Increment the exponent and don't perform the premultiply needed +@@ -1217,10 +1336,10 @@ Dragon4(const npy_uint64 mantissa, const npy_int32 exponent, + * Multiply larger by the output base to prepare for the first loop + * iteration. + */ +- BigInt_Multiply10(&scaledValue); +- BigInt_Multiply10(&scaledMarginLow); +- if (scaledMarginHigh != &scaledMarginLow) { +- BigInt_Multiply2(scaledMarginHigh, &scaledMarginLow); ++ BigInt_Multiply10(scaledValue); ++ BigInt_Multiply10(scaledMarginLow); ++ if (scaledMarginHigh != scaledMarginLow) { ++ BigInt_Multiply2(scaledMarginHigh, scaledMarginLow); + } + } + +@@ -1261,8 +1380,8 @@ Dragon4(const npy_uint64 mantissa, const npy_int32 exponent, + * to be less than or equal to 429496729 which is the highest number that + * can be multiplied by 10 without overflowing to a new block. + */ +- DEBUG_ASSERT(scale.length > 0); +- hiBlock = scale.blocks[scale.length - 1]; ++ DEBUG_ASSERT(scale->length > 0); ++ hiBlock = scale->blocks[scale->length - 1]; + if (hiBlock < 8 || hiBlock > 429496729) { + npy_uint32 hiBlockLog2, shift; + +@@ -1280,11 +1399,11 @@ Dragon4(const npy_uint64 mantissa, const npy_int32 exponent, + DEBUG_ASSERT(hiBlockLog2 < 3 || hiBlockLog2 > 27); + shift = (32 + 27 - hiBlockLog2) % 32; + +- BigInt_ShiftLeft(&scale, shift); +- BigInt_ShiftLeft(&scaledValue, shift); +- BigInt_ShiftLeft(&scaledMarginLow, shift); +- if (scaledMarginHigh != &scaledMarginLow) { +- BigInt_Multiply2(scaledMarginHigh, &scaledMarginLow); ++ BigInt_ShiftLeft(scale, shift); ++ BigInt_ShiftLeft(scaledValue, shift); ++ BigInt_ShiftLeft(scaledMarginLow, shift); ++ if (scaledMarginHigh != scaledMarginLow) { ++ BigInt_Multiply2(scaledMarginHigh, scaledMarginLow); + } + } + +@@ -1296,25 +1415,25 @@ Dragon4(const npy_uint64 mantissa, const npy_int32 exponent, + * terminate early. + */ + for (;;) { +- BigInt scaledValueHigh; ++ BigInt *scaledValueHigh = temp1; + + digitExponent = digitExponent-1; + + /* divide out the scale to extract the digit */ + outputDigit = +- BigInt_DivideWithRemainder_MaxQuotient9(&scaledValue, &scale); ++ BigInt_DivideWithRemainder_MaxQuotient9(scaledValue, scale); + DEBUG_ASSERT(outputDigit < 10); + + /* update the high end of the value */ +- BigInt_Add(&scaledValueHigh, &scaledValue, scaledMarginHigh); ++ BigInt_Add(scaledValueHigh, scaledValue, scaledMarginHigh); + + /* + * stop looping if we are far enough away from our neighboring + * values or if we have reached the cutoff digit + */ +- cmp = BigInt_Compare(&scaledValue, &scaledMarginLow); ++ cmp = BigInt_Compare(scaledValue, scaledMarginLow); + low = isEven ? (cmp <= 0) : (cmp < 0); +- cmp = BigInt_Compare(&scaledValueHigh, &scale); ++ cmp = BigInt_Compare(scaledValueHigh, scale); + high = isEven ? (cmp >= 0) : (cmp > 0); + if (low | high | (digitExponent == cutoffExponent)) + break; +@@ -1324,10 +1443,10 @@ Dragon4(const npy_uint64 mantissa, const npy_int32 exponent, + ++curDigit; + + /* multiply larger by the output base */ +- BigInt_Multiply10(&scaledValue); +- BigInt_Multiply10(&scaledMarginLow); +- if (scaledMarginHigh != &scaledMarginLow) { +- BigInt_Multiply2(scaledMarginHigh, &scaledMarginLow); ++ BigInt_Multiply10(scaledValue); ++ BigInt_Multiply10(scaledMarginLow); ++ if (scaledMarginHigh != scaledMarginLow) { ++ BigInt_Multiply2(scaledMarginHigh, scaledMarginLow); + } + } + } +@@ -1345,10 +1464,11 @@ Dragon4(const npy_uint64 mantissa, const npy_int32 exponent, + + /* divide out the scale to extract the digit */ + outputDigit = +- BigInt_DivideWithRemainder_MaxQuotient9(&scaledValue, &scale); ++ BigInt_DivideWithRemainder_MaxQuotient9(scaledValue, scale); + DEBUG_ASSERT(outputDigit < 10); + +- if ((scaledValue.length == 0) | (digitExponent == cutoffExponent)) { ++ if ((scaledValue->length == 0) | ++ (digitExponent == cutoffExponent)) { + break; + } + +@@ -1357,7 +1477,7 @@ Dragon4(const npy_uint64 mantissa, const npy_int32 exponent, + ++curDigit; + + /* multiply larger by the output base */ +- BigInt_Multiply10(&scaledValue); ++ BigInt_Multiply10(scaledValue); + } + } + +@@ -1375,8 +1495,8 @@ Dragon4(const npy_uint64 mantissa, const npy_int32 exponent, + * compare( scale * value, scale * 0.5 ) + * compare( 2 * scale * value, scale ) + */ +- BigInt_Multiply2_inplace(&scaledValue); +- compare = BigInt_Compare(&scaledValue, &scale); ++ BigInt_Multiply2_inplace(scaledValue); ++ compare = BigInt_Compare(scaledValue, scale); + roundDown = compare < 0; + + /* +@@ -1431,134 +1551,53 @@ Dragon4(const npy_uint64 mantissa, const npy_int32 exponent, + + + /* +- * Helper union to decompose a 16-bit IEEE float. +- * sign: 1 bit +- * exponent: 5 bits +- * mantissa: 10 bits +- */ +-typedef union FloatUnion16 +-{ +- npy_uint16 integer; +-} FloatUnion16; +- +-npy_bool IsNegative_F16(FloatUnion16 *v) { return (v->integer >> 15) != 0; } +-npy_uint32 GetExponent_F16(FloatUnion16 *v) { return (v->integer >> 10) & 0x1F;} +-npy_uint32 GetMantissa_F16(FloatUnion16 *v) { return v->integer & 0x3FF; } +- +- +-/* +- * Helper union to decompose a 32-bit IEEE float. +- * sign: 1 bit +- * exponent: 8 bits +- * mantissa: 23 bits +- */ +-typedef union FloatUnion32 +-{ +- npy_float32 floatingPoint; +- npy_uint32 integer; +-} FloatUnion32; +- +-npy_bool IsNegative_F32(FloatUnion32 *v) { return (v->integer >> 31) != 0; } +-npy_uint32 GetExponent_F32(FloatUnion32 *v) { return (v->integer >> 23) & 0xFF;} +-npy_uint32 GetMantissa_F32(FloatUnion32 *v) { return v->integer & 0x7FFFFF; } +- +-/* +- * Helper union to decompose a 64-bit IEEE float. +- * sign: 1 bit +- * exponent: 11 bits +- * mantissa: 52 bits +- */ +-typedef union FloatUnion64 +-{ +- npy_float64 floatingPoint; +- npy_uint64 integer; +-} FloatUnion64; +-npy_bool IsNegative_F64(FloatUnion64 *v) { return (v->integer >> 63) != 0; } +-npy_uint32 GetExponent_F64(FloatUnion64 *v) { return (v->integer >> 52) & 0x7FF; } +-npy_uint64 GetMantissa_F64(FloatUnion64 *v) { return v->integer & 0xFFFFFFFFFFFFFull; } +- +-/* +- * Helper unions and datatype to decompose a 80-bit IEEE float +- * sign: 1 bit, second u64 +- * exponent: 15 bits, second u64 +- * intbit 1 bit, first u64 +- * mantissa: 63 bits, first u64 +- */ +- +-/* +- * Since systems have different types of long doubles, and may not necessarily +- * have a 128-byte format we can use to pass values around, here we create +- * our own 128-bit storage type for convenience. +- */ +-typedef struct FloatVal128 { +- npy_uint64 integer[2]; +-} FloatVal128; +-npy_bool IsNegative_F128(FloatVal128 *v) { +- return ((v->integer[1] >> 15) & 0x1) != 0; +-} +-npy_uint32 GetExponent_F128(FloatVal128 *v) { return v->integer[1] & 0x7FFF; } +-npy_uint64 GetMantissa_F128(FloatVal128 *v) { +- return v->integer[0] & 0x7FFFFFFFFFFFFFFFull; +-} +- +-/* +- * then for each different definition of long double, we create a union to +- * unpack the float data safely. We can then copy these integers to a +- * FloatVal128. ++ * The FormatPositional and FormatScientific functions have been more ++ * significantly rewritten relative to Ryan Juckett's code. ++ * ++ * The binary16 and the various 128-bit float functions are new, and adapted ++ * from the 64 bit version. The python interface functions are new. + */ +-#ifdef NPY_FLOAT128 +-typedef union FloatUnion128 +-{ +- npy_float128 floatingPoint; +- struct { +- npy_uint64 a; +- npy_uint16 b; +- } integer; +-} FloatUnion128; +-#endif +- +-#ifdef NPY_FLOAT96 +-typedef union FloatUnion96 +-{ +- npy_float96 floatingPoint; +- struct { +- npy_uint64 a; +- npy_uint32 b; +- } integer; +-} FloatUnion96; +-#endif +- +-#ifdef NPY_FLOAT80 +-typedef union FloatUnion80 +-{ +- npy_float80 floatingPoint; +- struct { +- npy_uint64 a; +- npy_uint16 b; +- } integer; +-} FloatUnion80; +-#endif + + +-/* +- * The main changes above this point, relative to Ryan Juckett's code, are: +- * 1. fixed overflow problems when mantissa was 64 bits (in float128 types), +- * by replacing multiplication by 2 or 4 by BigInt_ShiftLeft calls. +- * 2. Increased c_BigInt_MaxBlocks +- * 3. Added more entries to the g_PowerOf10_Big table +- * 4. Added unbiased rounding calculation with isEven ++/* Options struct for easy passing of Dragon4 options. + * +- * Below this point, the FormatPositional and FormatScientific functions have +- * been more significantly rewritten. The Dragon4_PrintFloat16 and +- * Dragon4_PrintFloat128 functions are new, and were adapted from the 64 and 32 +- * bit versions. The python interfacing functions (in the header) are new. ++ * scientific - boolean controlling whether scientific notation is used ++ * digit_mode - whether to use unique or fixed fracional output ++ * cutoff_mode - whether 'precision' refers to toal digits, or digits past ++ * the decimal point. ++ * precision - When negative, prints as many digits as needed for a unique ++ * number. When positive specifies the maximum number of ++ * significant digits to print. ++ * sign - whether to always show sign ++ * trim_mode - how to treat trailing 0s and '.'. See TrimMode comments. ++ * digits_left - pad characters to left of decimal point. -1 for no padding ++ * digits_right - pad characters to right of decimal point. -1 for no padding. ++ * Padding adds whitespace until there are the specified ++ * number characters to sides of decimal point. Applies after ++ * trim_mode characters were removed. If digits_right is ++ * positive and the decimal point was trimmed, decimal point ++ * will be replaced by a whitespace character. ++ * exp_digits - Only affects scientific output. If positive, pads the ++ * exponent with 0s until there are this many digits. If ++ * negative, only use sufficient digits. + */ +- ++typedef struct Dragon4_Options { ++ npy_bool scientific; ++ DigitMode digit_mode; ++ CutoffMode cutoff_mode; ++ npy_int32 precision; ++ npy_bool sign; ++ TrimMode trim_mode; ++ npy_int32 digits_left; ++ npy_int32 digits_right; ++ npy_int32 exp_digits; ++} Dragon4_Options; + + /* + * Outputs the positive number with positional notation: ddddd.dddd + * The output is always NUL terminated and the output length (not including the + * NUL) is returned. ++ * + * Arguments: + * buffer - buffer to output into + * bufferSize - maximum characters that can be printed to buffer +@@ -1567,20 +1606,11 @@ typedef union FloatUnion80 + * signbit - value of the sign position. Should be '+', '-' or '' + * mantissaBit - index of the highest set mantissa bit + * hasUnequalMargins - is the high margin twice as large as the low margin +- * precision - Negative prints as many digits as are needed for a unique +- * number. Positive specifies the maximum number of significant +- * digits to print past the decimal point. +- * trim_mode - how to treat trailing 0s and '.'. See TrimMode comments. +- * digits_left - pad characters to left of decimal point. -1 for no padding +- * digits_right - pad characters to right of decimal point. -1 for no padding +- * padding adds whitespace until there are the specified +- * number characters to sides of decimal point. Applies after +- * trim_mode characters were removed. If digits_right is +- * positive and the decimal point was trimmed, decimal point +- * will be replaced by a whitespace character. ++ * ++ * See Dragon4_Options for description of remaining arguments. + */ + static npy_uint32 +-FormatPositional(char *buffer, npy_uint32 bufferSize, npy_uint64 mantissa, ++FormatPositional(char *buffer, npy_uint32 bufferSize, BigInt *mantissa, + npy_int32 exponent, char signbit, npy_uint32 mantissaBit, + npy_bool hasUnequalMargins, DigitMode digit_mode, + CutoffMode cutoff_mode, npy_int32 precision, +@@ -1707,7 +1737,7 @@ FormatPositional(char *buffer, npy_uint32 bufferSize, npy_uint64 mantissa, + + /* always add decimal point, except for DprZeros mode */ + if (trim_mode != TrimMode_DptZeros && numFractionDigits == 0 && +- pos < maxPrintLen){ ++ pos < maxPrintLen) { + buffer[pos++] = '.'; + } + +@@ -1745,7 +1775,7 @@ FormatPositional(char *buffer, npy_uint32 bufferSize, npy_uint64 mantissa, + * when rounding, we may still end up with trailing zeros. Remove them + * depending on trim settings. + */ +- if (precision >= 0 && trim_mode != TrimMode_None && numFractionDigits > 0){ ++ if (precision >= 0 && trim_mode != TrimMode_None && numFractionDigits > 0) { + while (buffer[pos-1] == '0') { + pos--; + numFractionDigits--; +@@ -1779,7 +1809,7 @@ FormatPositional(char *buffer, npy_uint32 bufferSize, npy_uint64 mantissa, + npy_int32 shift = digits_left - (numWholeDigits + has_sign); + npy_int32 count = pos; + +- if (count + shift > maxPrintLen){ ++ if (count + shift > maxPrintLen) { + count = maxPrintLen - shift; + } + +@@ -1803,6 +1833,7 @@ FormatPositional(char *buffer, npy_uint32 bufferSize, npy_uint64 mantissa, + * Outputs the positive number with scientific notation: d.dddde[sign]ddd + * The output is always NUL terminated and the output length (not including the + * NUL) is returned. ++ * + * Arguments: + * buffer - buffer to output into + * bufferSize - maximum characters that can be printed to buffer +@@ -1811,15 +1842,11 @@ FormatPositional(char *buffer, npy_uint32 bufferSize, npy_uint64 mantissa, + * signbit - value of the sign position. Should be '+', '-' or '' + * mantissaBit - index of the highest set mantissa bit + * hasUnequalMargins - is the high margin twice as large as the low margin +- * precision - Negative prints as many digits as are needed for a unique +- * number. Positive specifies the maximum number of significant +- * digits to print past the decimal point. +- * trim_mode - how to treat trailing 0s and '.'. See TrimMode comments. +- * digits_left - pad characters to left of decimal point. -1 for no padding +- * exp_digits - pads exponent with zeros until it has this many digits ++ * ++ * See Dragon4_Options for description of remaining arguments. + */ + static npy_uint32 +-FormatScientific (char *buffer, npy_uint32 bufferSize, npy_uint64 mantissa, ++FormatScientific (char *buffer, npy_uint32 bufferSize, BigInt *mantissa, + npy_int32 exponent, char signbit, npy_uint32 mantissaBit, + npy_bool hasUnequalMargins, DigitMode digit_mode, + npy_int32 precision, TrimMode trim_mode, +@@ -1844,7 +1871,7 @@ FormatScientific (char *buffer, npy_uint32 bufferSize, npy_uint64 mantissa, + leftchars = 1 + (signbit == '-' || signbit == '+'); + if (digits_left > leftchars) { + int i; +- for (i = 0; i < digits_left - leftchars && bufferSize > 1; i++){ ++ for (i = 0; i < digits_left - leftchars && bufferSize > 1; i++) { + *pCurOut = ' '; + pCurOut++; + --bufferSize; +@@ -1892,7 +1919,7 @@ FormatScientific (char *buffer, npy_uint32 bufferSize, npy_uint64 mantissa, + + /* always add decimal point, except for DprZeros mode */ + if (trim_mode != TrimMode_DptZeros && numFractionDigits == 0 && +- bufferSize > 1){ ++ bufferSize > 1) { + *pCurOut = '.'; + ++pCurOut; + --bufferSize; +@@ -1931,7 +1958,7 @@ FormatScientific (char *buffer, npy_uint32 bufferSize, npy_uint64 mantissa, + * when rounding, we may still end up with trailing zeros. Remove them + * depending on trim settings. + */ +- if (precision >= 0 && trim_mode != TrimMode_None && numFractionDigits > 0){ ++ if (precision >= 0 && trim_mode != TrimMode_None && numFractionDigits > 0) { + --pCurOut; + while (*pCurOut == '0') { + --pCurOut; +@@ -1972,7 +1999,7 @@ FormatScientific (char *buffer, npy_uint32 bufferSize, npy_uint64 mantissa, + DEBUG_ASSERT(printExponent < 100000); + + /* get exp digits */ +- for (i = 0; i < 5; i++){ ++ for (i = 0; i < 5; i++) { + digits[i] = printExponent % 10; + printExponent /= 10; + } +@@ -1981,7 +2008,7 @@ FormatScientific (char *buffer, npy_uint32 bufferSize, npy_uint64 mantissa, + } + exp_size = i; + /* write remaining digits to tmp buf */ +- for (i = exp_size; i > 0; i--){ ++ for (i = exp_size; i > 0; i--) { + exponentBuffer[2 + (exp_size-i)] = (char)('0' + digits[i-1]); + } + +@@ -2057,12 +2084,12 @@ PrintInfNan(char *buffer, npy_uint32 bufferSize, npy_uint64 mantissa, + + /* only print sign for inf values (though nan can have a sign set) */ + if (signbit == '+') { +- if (pos < maxPrintLen-1){ ++ if (pos < maxPrintLen-1) { + buffer[pos++] = '+'; + } + } + else if (signbit == '-') { +- if (pos < maxPrintLen-1){ ++ if (pos < maxPrintLen-1) { + buffer[pos++] = '-'; + } + } +@@ -2080,7 +2107,9 @@ PrintInfNan(char *buffer, npy_uint32 bufferSize, npy_uint64 mantissa, + buffer[pos + printLen] = '\0'; + + /* +- * // XXX: Should we change this for numpy? ++ * For numpy we ignore unusual mantissa values for nan, but keep this ++ * code in case we change our mind later. ++ * + * // append HEX value + * if (maxPrintLen > 3) { + * printLen += PrintHex(buffer+3, bufferSize-3, mantissa, +@@ -2093,34 +2122,63 @@ PrintInfNan(char *buffer, npy_uint32 bufferSize, npy_uint64 mantissa, + } + + /* +- * These functions print a floating-point number as a decimal string. +- * The output string is always NUL terminated and the string length (not +- * including the NUL) is returned. ++ * The functions below format a floating-point numbers stored in particular ++ * formats, as a decimal string. The output string is always NUL terminated ++ * and the string length (not including the NUL) is returned. ++ * ++ * For 16, 32 and 64 bit floats we assume they are the IEEE 754 type. ++ * For 128 bit floats we account for different definitions. + * + * Arguments are: + * buffer - buffer to output into + * bufferSize - maximum characters that can be printed to buffer +- * value - value significand +- * scientific - boolean controlling whether scientific notation is used +- * precision - If positive, specifies the number of decimals to show after +- * decimal point. If negative, sufficient digits to uniquely +- * specify the float will be output. +- * trim_mode - how to treat trailing zeros and decimal point. See TrimMode. +- * digits_right - pad the result with '' on the right past the decimal point +- * digits_left - pad the result with '' on the right past the decimal point +- * exp_digits - Only affects scientific output. If positive, pads the +- * exponent with 0s until there are this many digits. If +- * negative, only use sufficient digits. ++ * value - value to print ++ * opt - Dragon4 options, see above ++ */ ++ ++/* ++ * Helper function that takes Dragon4 parameters and options and ++ * calls Dragon4. + */ + static npy_uint32 +-Dragon4_PrintFloat16(char *buffer, npy_uint32 bufferSize, npy_uint16 value, +- npy_bool scientific, DigitMode digit_mode, +- CutoffMode cutoff_mode, npy_int32 precision, +- npy_bool sign, TrimMode trim_mode, npy_int32 digits_left, +- npy_int32 digits_right, npy_int32 exp_digits) ++Format_floatbits(char *buffer, npy_uint32 bufferSize, BigInt *mantissa, ++ npy_int32 exponent, char signbit, npy_uint32 mantissaBit, ++ npy_bool hasUnequalMargins, Dragon4_Options *opt) + { +- FloatUnion16 floatUnion; +- npy_uint32 floatExponent, floatMantissa; ++ /* format the value */ ++ if (opt->scientific) { ++ return FormatScientific(buffer, bufferSize, mantissa, exponent, ++ signbit, mantissaBit, hasUnequalMargins, ++ opt->digit_mode, opt->precision, ++ opt->trim_mode, opt->digits_left, ++ opt->exp_digits); ++ } ++ else { ++ return FormatPositional(buffer, bufferSize, mantissa, exponent, ++ signbit, mantissaBit, hasUnequalMargins, ++ opt->digit_mode, opt->cutoff_mode, ++ opt->precision, opt->trim_mode, ++ opt->digits_left, opt->digits_right); ++ } ++} ++ ++/* ++ * IEEE binary16 floating-point format ++ * ++ * sign: 1 bit ++ * exponent: 5 bits ++ * mantissa: 10 bits ++ */ ++static npy_uint32 ++Dragon4_PrintFloat_IEEE_binary16( ++ Dragon4_Scratch *scratch, npy_half *value, Dragon4_Options *opt) ++{ ++ char *buffer = scratch->repr; ++ npy_uint32 bufferSize = sizeof(scratch->repr); ++ BigInt *bigints = scratch->bigints; ++ ++ npy_uint16 val = *value; ++ npy_uint32 floatExponent, floatMantissa, floatSign; + + npy_uint32 mantissa; + npy_int32 exponent; +@@ -2138,20 +2196,20 @@ Dragon4_PrintFloat16(char *buffer, npy_uint32 bufferSize, npy_uint16 value, + } + + /* deconstruct the floating point value */ +- floatUnion.integer = value; +- floatExponent = GetExponent_F16(&floatUnion); +- floatMantissa = GetMantissa_F16(&floatUnion); ++ floatMantissa = val & bitmask_u32(10); ++ floatExponent = (val >> 10) & bitmask_u32(5); ++ floatSign = val >> 15; + + /* output the sign */ +- if (IsNegative_F16(&floatUnion)) { ++ if (floatSign != 0) { + signbit = '-'; + } +- else if (sign) { ++ else if (opt->sign) { + signbit = '+'; + } + + /* if this is a special value */ +- if (floatExponent == 0x1F) { ++ if (floatExponent == bitmask_u32(5)) { + return PrintInfNan(buffer, bufferSize, floatMantissa, 3, signbit); + } + /* else this is a number */ +@@ -2195,29 +2253,33 @@ Dragon4_PrintFloat16(char *buffer, npy_uint32 bufferSize, npy_uint16 value, + hasUnequalMargins = NPY_FALSE; + } + +- /* format the value */ +- if (scientific) { +- return FormatScientific(buffer, bufferSize, mantissa, exponent, signbit, +- mantissaBit, hasUnequalMargins, digit_mode, +- precision, trim_mode, digits_left, exp_digits); +- } +- else { +- return FormatPositional(buffer, bufferSize, mantissa, exponent, signbit, +- mantissaBit, hasUnequalMargins, digit_mode, +- cutoff_mode, precision, trim_mode, +- digits_left, digits_right); +- } ++ BigInt_Set_uint32(&bigints[0], mantissa); ++ return Format_floatbits(buffer, bufferSize, bigints, exponent, ++ signbit, mantissaBit, hasUnequalMargins, opt); + } + ++/* ++ * IEEE binary32 floating-point format ++ * ++ * sign: 1 bit ++ * exponent: 8 bits ++ * mantissa: 23 bits ++ */ + static npy_uint32 +-Dragon4_PrintFloat32(char *buffer, npy_uint32 bufferSize, npy_float32 value, +- npy_bool scientific, DigitMode digit_mode, +- CutoffMode cutoff_mode, npy_int32 precision, +- npy_bool sign, TrimMode trim_mode, npy_int32 digits_left, +- npy_int32 digits_right, npy_int32 exp_digits) ++Dragon4_PrintFloat_IEEE_binary32( ++ Dragon4_Scratch *scratch, npy_float32 *value, ++ Dragon4_Options *opt) + { +- FloatUnion32 floatUnion; +- npy_uint32 floatExponent, floatMantissa; ++ char *buffer = scratch->repr; ++ npy_uint32 bufferSize = sizeof(scratch->repr); ++ BigInt *bigints = scratch->bigints; ++ ++ union ++ { ++ npy_float32 floatingPoint; ++ npy_uint32 integer; ++ } floatUnion; ++ npy_uint32 floatExponent, floatMantissa, floatSign; + + npy_uint32 mantissa; + npy_int32 exponent; +@@ -2235,20 +2297,21 @@ Dragon4_PrintFloat32(char *buffer, npy_uint32 bufferSize, npy_float32 value, + } + + /* deconstruct the floating point value */ +- floatUnion.floatingPoint = value; +- floatExponent = GetExponent_F32(&floatUnion); +- floatMantissa = GetMantissa_F32(&floatUnion); ++ floatUnion.floatingPoint = *value; ++ floatMantissa = floatUnion.integer & bitmask_u32(23); ++ floatExponent = (floatUnion.integer >> 23) & bitmask_u32(8); ++ floatSign = floatUnion.integer >> 31; + + /* output the sign */ +- if (IsNegative_F32(&floatUnion)) { ++ if (floatSign != 0) { + signbit = '-'; + } +- else if (sign) { ++ else if (opt->sign) { + signbit = '+'; + } + + /* if this is a special value */ +- if (floatExponent == 0xFF) { ++ if (floatExponent == bitmask_u32(8)) { + return PrintInfNan(buffer, bufferSize, floatMantissa, 6, signbit); + } + /* else this is a number */ +@@ -2292,29 +2355,32 @@ Dragon4_PrintFloat32(char *buffer, npy_uint32 bufferSize, npy_float32 value, + hasUnequalMargins = NPY_FALSE; + } + +- /* format the value */ +- if (scientific) { +- return FormatScientific(buffer, bufferSize, mantissa, exponent, signbit, +- mantissaBit, hasUnequalMargins, digit_mode, +- precision, trim_mode, digits_left, exp_digits); +- } +- else { +- return FormatPositional(buffer, bufferSize, mantissa, exponent, signbit, +- mantissaBit, hasUnequalMargins, digit_mode, +- cutoff_mode, precision, trim_mode, +- digits_left, digits_right); +- } ++ BigInt_Set_uint32(&bigints[0], mantissa); ++ return Format_floatbits(buffer, bufferSize, bigints, exponent, ++ signbit, mantissaBit, hasUnequalMargins, opt); + } + ++/* ++ * IEEE binary64 floating-point format ++ * ++ * sign: 1 bit ++ * exponent: 11 bits ++ * mantissa: 52 bits ++ */ + static npy_uint32 +-Dragon4_PrintFloat64(char *buffer, npy_uint32 bufferSize, npy_float64 value, +- npy_bool scientific, DigitMode digit_mode, +- CutoffMode cutoff_mode, npy_int32 precision, +- npy_bool sign, TrimMode trim_mode, npy_int32 digits_left, +- npy_int32 digits_right, npy_int32 exp_digits) ++Dragon4_PrintFloat_IEEE_binary64( ++ Dragon4_Scratch *scratch, npy_float64 *value, Dragon4_Options *opt) + { +- FloatUnion64 floatUnion; +- npy_uint32 floatExponent; ++ char *buffer = scratch->repr; ++ npy_uint32 bufferSize = sizeof(scratch->repr); ++ BigInt *bigints = scratch->bigints; ++ ++ union ++ { ++ npy_float64 floatingPoint; ++ npy_uint64 integer; ++ } floatUnion; ++ npy_uint32 floatExponent, floatSign; + npy_uint64 floatMantissa; + + npy_uint64 mantissa; +@@ -2333,20 +2399,21 @@ Dragon4_PrintFloat64(char *buffer, npy_uint32 bufferSize, npy_float64 value, + } + + /* deconstruct the floating point value */ +- floatUnion.floatingPoint = value; +- floatExponent = GetExponent_F64(&floatUnion); +- floatMantissa = GetMantissa_F64(&floatUnion); ++ floatUnion.floatingPoint = *value; ++ floatMantissa = floatUnion.integer & bitmask_u64(52); ++ floatExponent = (floatUnion.integer >> 52) & bitmask_u32(11); ++ floatSign = floatUnion.integer >> 63; + + /* output the sign */ +- if (IsNegative_F64(&floatUnion)) { ++ if (floatSign != 0) { + signbit = '-'; + } +- else if (sign) { ++ else if (opt->sign) { + signbit = '+'; + } + + /* if this is a special value */ +- if (floatExponent == 0x7FF) { ++ if (floatExponent == bitmask_u32(11)) { + return PrintInfNan(buffer, bufferSize, floatMantissa, 13, signbit); + } + /* else this is a number */ +@@ -2390,28 +2457,48 @@ Dragon4_PrintFloat64(char *buffer, npy_uint32 bufferSize, npy_float64 value, + hasUnequalMargins = NPY_FALSE; + } + +- /* format the value */ +- if (scientific) { +- return FormatScientific(buffer, bufferSize, mantissa, exponent, signbit, +- mantissaBit, hasUnequalMargins, digit_mode, +- precision, trim_mode, digits_left, exp_digits); +- } +- else { +- return FormatPositional(buffer, bufferSize, mantissa, exponent, signbit, +- mantissaBit, hasUnequalMargins, digit_mode, +- cutoff_mode, precision, trim_mode, +- digits_left, digits_right); +- } ++ BigInt_Set_uint64(&bigints[0], mantissa); ++ return Format_floatbits(buffer, bufferSize, bigints, exponent, ++ signbit, mantissaBit, hasUnequalMargins, opt); + } + ++ ++/* ++ * Since systems have different types of long doubles, and may not necessarily ++ * have a 128-byte format we can use to pass values around, here we create ++ * our own 128-bit storage type for convenience. ++ */ ++typedef struct FloatVal128 { ++ npy_uint64 hi, lo; ++} FloatVal128; ++ ++#if defined(HAVE_LDOUBLE_INTEL_EXTENDED_10_BYTES_LE) || \ ++ defined(HAVE_LDOUBLE_INTEL_EXTENDED_12_BYTES_LE) || \ ++ defined(HAVE_LDOUBLE_INTEL_EXTENDED_16_BYTES_LE) || \ ++ defined(HAVE_LDOUBLE_MOTOROLA_EXTENDED_12_BYTES_BE) ++/* ++ * Intel's 80-bit IEEE extended precision floating-point format ++ * ++ * "long doubles" with this format are stored as 96 or 128 bits, but ++ * are equivalent to the 80 bit type with some zero padding on the high bits. ++ * This method expects the user to pass in the value using a 128-bit ++ * FloatVal128, so can support 80, 96, or 128 bit storage formats, ++ * and is endian-independent. ++ * ++ * sign: 1 bit, second u64 ++ * exponent: 15 bits, second u64 ++ * intbit 1 bit, first u64 ++ * mantissa: 63 bits, first u64 ++ */ + static npy_uint32 +-Dragon4_PrintFloat128(char *buffer, npy_uint32 bufferSize, FloatVal128 value, +- npy_bool scientific, DigitMode digit_mode, +- CutoffMode cutoff_mode, npy_int32 precision, +- npy_bool sign, TrimMode trim_mode, npy_int32 digits_left, +- npy_int32 digits_right, npy_int32 exp_digits) ++Dragon4_PrintFloat_Intel_extended( ++ Dragon4_Scratch *scratch, FloatVal128 value, Dragon4_Options *opt) + { +- npy_uint32 floatExponent; ++ char *buffer = scratch->repr; ++ npy_uint32 bufferSize = sizeof(scratch->repr); ++ BigInt *bigints = scratch->bigints; ++ ++ npy_uint32 floatExponent, floatSign; + npy_uint64 floatMantissa; + + npy_uint64 mantissa; +@@ -2429,20 +2516,27 @@ Dragon4_PrintFloat128(char *buffer, npy_uint32 bufferSize, FloatVal128 value, + return 0; + } + +- /* deconstruct the floating point value */ +- floatExponent = GetExponent_F128(&value); +- floatMantissa = GetMantissa_F128(&value); ++ /* deconstruct the floating point value (we ignore the intbit) */ ++ floatMantissa = value.lo & bitmask_u64(63); ++ floatExponent = value.hi & bitmask_u32(15); ++ floatSign = (value.hi >> 15) & 0x1; + + /* output the sign */ +- if (IsNegative_F128(&value)) { ++ if (floatSign != 0) { + signbit = '-'; + } +- else if (sign) { ++ else if (opt->sign) { + signbit = '+'; + } + + /* if this is a special value */ +- if (floatExponent == 0x7FFF) { ++ if (floatExponent == bitmask_u32(15)) { ++ /* ++ * Note: Technically there are other special extended values defined if ++ * the intbit is 0, like Pseudo-Infinity, Pseudo-Nan, Quiet-NaN. We ++ * ignore all of these since they are not generated on modern ++ * processors. We treat Quiet-Nan as simply Nan. ++ */ + return PrintInfNan(buffer, bufferSize, floatMantissa, 16, signbit); + } + /* else this is a number */ +@@ -2486,247 +2580,668 @@ Dragon4_PrintFloat128(char *buffer, npy_uint32 bufferSize, FloatVal128 value, + hasUnequalMargins = NPY_FALSE; + } + +- /* format the value */ +- if (scientific) { +- return FormatScientific(buffer, bufferSize, mantissa, exponent, signbit, +- mantissaBit, hasUnequalMargins, digit_mode, +- precision, trim_mode, digits_left, exp_digits); ++ BigInt_Set_uint64(&bigints[0], mantissa); ++ return Format_floatbits(buffer, bufferSize, bigints, exponent, ++ signbit, mantissaBit, hasUnequalMargins, opt); ++ ++} ++ ++#endif /* INTEL_EXTENDED group */ ++ ++ ++#ifdef HAVE_LDOUBLE_INTEL_EXTENDED_10_BYTES_LE ++/* ++ * Intel's 80-bit IEEE extended precision format, 80-bit storage ++ * ++ * Note: It is not clear if a long double with 10-byte storage exists on any ++ * system. But numpy defines NPY_FLOAT80, so if we come across it, assume it is ++ * an Intel extended format. ++ */ ++static npy_uint32 ++Dragon4_PrintFloat_Intel_extended80( ++ Dragon4_Scratch *scratch, npy_float80 *value, Dragon4_Options *opt) ++{ ++ FloatVal128 val128; ++ union { ++ npy_float80 floatingPoint; ++ struct { ++ npy_uint64 a; ++ npy_uint16 b; ++ } integer; ++ } buf80; ++ ++ buf80.floatingPoint = *value; ++ /* Intel is little-endian */ ++ val128.lo = buf80.integer.a; ++ val128.hi = buf80.integer.b; ++ ++ return Dragon4_PrintFloat_Intel_extended(scratch, val128, opt); ++} ++#endif /* HAVE_LDOUBLE_INTEL_EXTENDED_10_BYTES_LE */ ++ ++#ifdef HAVE_LDOUBLE_INTEL_EXTENDED_12_BYTES_LE ++/* Intel's 80-bit IEEE extended precision format, 96-bit storage */ ++static npy_uint32 ++Dragon4_PrintFloat_Intel_extended96( ++ Dragon4_Scratch *scratch, npy_float96 *value, Dragon4_Options *opt) ++{ ++ FloatVal128 val128; ++ union { ++ npy_float96 floatingPoint; ++ struct { ++ npy_uint64 a; ++ npy_uint32 b; ++ } integer; ++ } buf96; ++ ++ buf96.floatingPoint = *value; ++ /* Intel is little-endian */ ++ val128.lo = buf96.integer.a; ++ val128.hi = buf96.integer.b; ++ ++ return Dragon4_PrintFloat_Intel_extended(scratch, val128, opt); ++} ++#endif /* HAVE_LDOUBLE_INTEL_EXTENDED_12_BYTES_LE */ ++ ++#ifdef HAVE_LDOUBLE_MOTOROLA_EXTENDED_12_BYTES_BE ++/* Motorola Big-endian equivalent of the Intel-extended 96 fp format */ ++static npy_uint32 ++Dragon4_PrintFloat_Motorola_extended96( ++ Dragon4_Scratch *scratch, npy_float96 *value, Dragon4_Options *opt) ++{ ++ FloatVal128 val128; ++ union { ++ npy_float96 floatingPoint; ++ struct { ++ npy_uint64 a; ++ npy_uint32 b; ++ } integer; ++ } buf96; ++ ++ buf96.floatingPoint = *value; ++ /* Motorola is big-endian */ ++ val128.lo = buf96.integer.b; ++ val128.hi = buf96.integer.a >> 16; ++ /* once again we assume the int has same endianness as the float */ ++ ++ return Dragon4_PrintFloat_Intel_extended(scratch, val128, opt); ++} ++#endif /* HAVE_LDOUBLE_MOTOROLA_EXTENDED_12_BYTES_BE */ ++ ++ ++#ifdef NPY_FLOAT128 ++ ++typedef union FloatUnion128 ++{ ++ npy_float128 floatingPoint; ++ struct { ++ npy_uint64 a; ++ npy_uint64 b; ++ } integer; ++} FloatUnion128; ++ ++#ifdef HAVE_LDOUBLE_INTEL_EXTENDED_16_BYTES_LE ++/* Intel's 80-bit IEEE extended precision format, 128-bit storage */ ++static npy_uint32 ++Dragon4_PrintFloat_Intel_extended128( ++ Dragon4_Scratch *scratch, npy_float128 *value, Dragon4_Options *opt) ++{ ++ FloatVal128 val128; ++ FloatUnion128 buf128; ++ ++ buf128.floatingPoint = *value; ++ /* Intel is little-endian */ ++ val128.lo = buf128.integer.a; ++ val128.hi = buf128.integer.b; ++ ++ return Dragon4_PrintFloat_Intel_extended(scratch, val128, opt); ++} ++#endif /* HAVE_LDOUBLE_INTEL_EXTENDED_16_BYTES_LE */ ++ ++#if defined(HAVE_LDOUBLE_IEEE_QUAD_LE) || defined(HAVE_LDOUBLE_IEEE_QUAD_BE) ++/* ++ * IEEE binary128 floating-point format ++ * ++ * sign: 1 bit ++ * exponent: 15 bits ++ * mantissa: 112 bits ++ * ++ * Currently binary128 format exists on only a few CPUs, such as on the POWER9 ++ * arch or aarch64. Because of this, this code has not been extensively tested. ++ * I am not sure if the arch also supports uint128, and C does not seem to ++ * support int128 literals. So we use uint64 to do manipulation. ++ */ ++static npy_uint32 ++Dragon4_PrintFloat_IEEE_binary128( ++ Dragon4_Scratch *scratch, FloatVal128 val128, Dragon4_Options *opt) ++{ ++ char *buffer = scratch->repr; ++ npy_uint32 bufferSize = sizeof(scratch->repr); ++ BigInt *bigints = scratch->bigints; ++ ++ npy_uint32 floatExponent, floatSign; ++ ++ npy_uint64 mantissa_hi, mantissa_lo; ++ npy_int32 exponent; ++ npy_uint32 mantissaBit; ++ npy_bool hasUnequalMargins; ++ char signbit = '\0'; ++ ++ if (bufferSize == 0) { ++ return 0; ++ } ++ ++ if (bufferSize == 1) { ++ buffer[0] = '\0'; ++ return 0; ++ } ++ ++ mantissa_hi = val128.hi & bitmask_u64(48); ++ mantissa_lo = val128.lo; ++ floatExponent = (val128.hi >> 48) & bitmask_u32(15); ++ floatSign = val128.hi >> 63; ++ ++ /* output the sign */ ++ if (floatSign != 0) { ++ signbit = '-'; ++ } ++ else if (opt->sign) { ++ signbit = '+'; ++ } ++ ++ /* if this is a special value */ ++ if (floatExponent == bitmask_u32(15)) { ++ npy_uint64 mantissa_zero = mantissa_hi == 0 && mantissa_lo == 0; ++ return PrintInfNan(buffer, bufferSize, !mantissa_zero, 16, signbit); ++ } ++ /* else this is a number */ ++ ++ /* factor the value into its parts */ ++ if (floatExponent != 0) { ++ /* ++ * normal ++ * The floating point equation is: ++ * value = (1 + mantissa/2^112) * 2 ^ (exponent-16383) ++ * We convert the integer equation by factoring a 2^112 out of the ++ * exponent ++ * value = (1 + mantissa/2^112) * 2^112 * 2 ^ (exponent-16383-112) ++ * value = (2^112 + mantissa) * 2 ^ (exponent-16383-112) ++ * Because of the implied 1 in front of the mantissa we have 112 bits of ++ * precision. ++ * m = (2^112 + mantissa) ++ * e = (exponent-16383+1-112) ++ * ++ * Adding 2^112 to the mantissa is the same as adding 2^48 to the hi ++ * 64 bit part. ++ */ ++ mantissa_hi = (1ull << 48) | mantissa_hi; ++ /* mantissa_lo is unchanged */ ++ exponent = floatExponent - 16383 - 112; ++ mantissaBit = 112; ++ hasUnequalMargins = (floatExponent != 1) && (mantissa_hi == 0 && ++ mantissa_lo == 0); + } + else { +- return FormatPositional(buffer, bufferSize, mantissa, exponent, signbit, +- mantissaBit, hasUnequalMargins, digit_mode, +- cutoff_mode, precision, trim_mode, +- digits_left, digits_right); ++ /* ++ * subnormal ++ * The floating point equation is: ++ * value = (mantissa/2^112) * 2 ^ (1-16383) ++ * We convert the integer equation by factoring a 2^112 out of the ++ * exponent ++ * value = (mantissa/2^112) * 2^112 * 2 ^ (1-16383-112) ++ * value = mantissa * 2 ^ (1-16383-112) ++ * We have up to 112 bits of precision. ++ * m = (mantissa) ++ * e = (1-16383-112) ++ */ ++ exponent = 1 - 16383 - 112; ++ mantissaBit = LogBase2_128(mantissa_hi, mantissa_lo); ++ hasUnequalMargins = NPY_FALSE; + } ++ ++ BigInt_Set_2x_uint64(&bigints[0], mantissa_hi, mantissa_lo); ++ return Format_floatbits(buffer, bufferSize, bigints, exponent, ++ signbit, mantissaBit, hasUnequalMargins, opt); + } + +-PyObject * +-Dragon4_Positional_AnySize(void *val, size_t size, DigitMode digit_mode, +- CutoffMode cutoff_mode, int precision, int sign, +- TrimMode trim, int pad_left, int pad_right) ++#if defined(HAVE_LDOUBLE_IEEE_QUAD_LE) ++static npy_uint32 ++Dragon4_PrintFloat_IEEE_binary128_le( ++ Dragon4_Scratch *scratch, npy_float128 *value, Dragon4_Options *opt) + { +- /* +- * Use a very large buffer in case anyone tries to output a large numberG. +- * 16384 should be enough to uniquely print any float128, which goes up +- * to about 10^4932 */ +- static char repr[16384]; + FloatVal128 val128; +-#ifdef NPY_FLOAT80 +- FloatUnion80 buf80;; +-#endif +-#ifdef NPY_FLOAT96 +- FloatUnion96 buf96; +-#endif +-#ifdef NPY_FLOAT128 + FloatUnion128 buf128; +-#endif + +- switch (size) { +- case 2: +- Dragon4_PrintFloat16(repr, sizeof(repr), *(npy_float16*)val, +- 0, digit_mode, cutoff_mode, precision, +- sign, trim, pad_left, pad_right, -1); +- break; +- case 4: +- Dragon4_PrintFloat32(repr, sizeof(repr), *(npy_float32*)val, +- 0, digit_mode, cutoff_mode, precision, +- sign, trim, pad_left, pad_right, -1); +- break; +- case 8: +- Dragon4_PrintFloat64(repr, sizeof(repr), *(npy_float64*)val, +- 0, digit_mode, cutoff_mode, precision, +- sign, trim, pad_left, pad_right, -1); +- break; +-#ifdef NPY_FLOAT80 +- case 10: +- buf80.floatingPoint = *(npy_float80*)val; +- val128.integer[0] = buf80.integer.a; +- val128.integer[1] = buf80.integer.b; +- Dragon4_PrintFloat128(repr, sizeof(repr), val128, +- 0, digit_mode, cutoff_mode, precision, +- sign, trim, pad_left, pad_right, -1); +- break; +-#endif +-#ifdef NPY_FLOAT96 +- case 12: +- buf96.floatingPoint = *(npy_float96*)val; +- val128.integer[0] = buf96.integer.a; +- val128.integer[1] = buf96.integer.b; +- Dragon4_PrintFloat128(repr, sizeof(repr), val128, +- 0, digit_mode, cutoff_mode, precision, +- sign, trim, pad_left, pad_right, -1); +- break; +-#endif +-#ifdef NPY_FLOAT128 +- case 16: +- buf128.floatingPoint = *(npy_float128*)val; +- val128.integer[0] = buf128.integer.a; +- val128.integer[1] = buf128.integer.b; +- Dragon4_PrintFloat128(repr, sizeof(repr), val128, +- 0, digit_mode, cutoff_mode, precision, +- sign, trim, pad_left, pad_right, -1); +- break; +-#endif +- default: +- PyErr_Format(PyExc_ValueError, "unexpected itemsize %zu", size); +- return NULL; ++ buf128.floatingPoint = *value; ++ val128.lo = buf128.integer.a; ++ val128.hi = buf128.integer.b; ++ ++ return Dragon4_PrintFloat_IEEE_binary128(scratch, val128, opt); ++} ++#endif /* HAVE_LDOUBLE_IEEE_QUAD_LE */ ++ ++#if defined(HAVE_LDOUBLE_IEEE_QUAD_BE) ++/* ++ * This function is untested, very few, if any, architectures implement ++ * big endian IEEE binary128 floating point. ++ */ ++static npy_uint32 ++Dragon4_PrintFloat_IEEE_binary128_be( ++ Dragon4_Scratch *scratch, npy_float128 *value, Dragon4_Options *opt) ++{ ++ FloatVal128 val128; ++ FloatUnion128 buf128; ++ ++ buf128.floatingPoint = *value; ++ val128.lo = buf128.integer.b; ++ val128.hi = buf128.integer.a; ++ ++ return Dragon4_PrintFloat_IEEE_binary128(scratch, val128, opt); ++} ++#endif /* HAVE_LDOUBLE_IEEE_QUAD_BE */ ++ ++#endif /* HAVE_LDOUBLE_IEEE_QUAD_LE | HAVE_LDOUBLE_IEEE_BE*/ ++ ++#if (defined(HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_LE) || \ ++ defined(HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_BE)) ++/* ++ * IBM extended precision 128-bit floating-point format, aka IBM double-double ++ * ++ * IBM's double-double type is a pair of IEEE binary64 values, which you add ++ * together to get a total value. The exponents are arranged so that the lower ++ * double is about 2^52 times smaller than the high one, and the nearest ++ * float64 value is simply the upper double, in which case the pair is ++ * considered "normalized" (not to confuse with "normal" and "subnormal" ++ * binary64 values). We assume normalized values. You can see the glibc's ++ * printf on ppc does so too by constructing un-normalized values to get ++ * strange behavior from the OS printf: ++ * ++ * >>> from numpy.core._multiarray_tests import format_float_OSprintf_g ++ * >>> x = np.array([0.3,0.3], dtype='f8').view('f16')[0] ++ * >>> format_float_OSprintf_g(x, 2) ++ * 0.30 ++ * >>> format_float_OSprintf_g(2*x, 2) ++ * 1.20 ++ * ++ * If we don't assume normalization, x should really print as 0.6. ++ * ++ * For normalized values gcc assumes that the total mantissa is no ++ * more than 106 bits (53+53), so we can drop bits from the second double which ++ * would be pushed past 106 when left-shifting by its exponent, as happens ++ * sometimes. (There has been debate about this, see ++ * https://gcc.gnu.org/bugzilla/show_bug.cgi?format=multiple&id=70117, ++ * https://sourceware.org/bugzilla/show_bug.cgi?id=22752 ) ++ * ++ * Note: This function is for the IBM-double-double which is a pair of IEEE ++ * binary64 floats, like on ppc64 systems. This is *not* the hexadecimal ++ * IBM-double-double type, which is a pair of IBM hexadecimal64 floats. ++ * ++ * See also: ++ * https://gcc.gnu.org/wiki/Ieee128PowerPCA ++ * https://www.ibm.com/support/knowledgecenter/en/ssw_aix_71/com.ibm.aix.genprogc/128bit_long_double_floating-point_datatype.htm ++ */ ++static npy_uint32 ++Dragon4_PrintFloat_IBM_double_double( ++ Dragon4_Scratch *scratch, npy_float128 *value, Dragon4_Options *opt) ++{ ++ char *buffer = scratch->repr; ++ npy_uint32 bufferSize = sizeof(scratch->repr); ++ BigInt *bigints = scratch->bigints; ++ ++ FloatVal128 val128; ++ FloatUnion128 buf128; ++ ++ npy_uint32 floatExponent1, floatExponent2; ++ npy_uint64 floatMantissa1, floatMantissa2; ++ npy_uint32 floatSign1, floatSign2; ++ ++ npy_uint64 mantissa1, mantissa2; ++ npy_int32 exponent1, exponent2; ++ int shift; ++ npy_uint32 mantissaBit; ++ npy_bool hasUnequalMargins; ++ char signbit = '\0'; ++ ++ if (bufferSize == 0) { ++ return 0; ++ } ++ ++ if (bufferSize == 1) { ++ buffer[0] = '\0'; ++ return 0; ++ } ++ ++ /* The high part always comes before the low part, regardless of the ++ * endianness of the system. */ ++ buf128.floatingPoint = *value; ++ val128.hi = buf128.integer.a; ++ val128.lo = buf128.integer.b; ++ ++ /* deconstruct the floating point values */ ++ floatMantissa1 = val128.hi & bitmask_u64(52); ++ floatExponent1 = (val128.hi >> 52) & bitmask_u32(11); ++ floatSign1 = (val128.hi >> 63) != 0; ++ ++ floatMantissa2 = val128.lo & bitmask_u64(52); ++ floatExponent2 = (val128.lo >> 52) & bitmask_u32(11); ++ floatSign2 = (val128.lo >> 63) != 0; ++ ++ /* output the sign using 1st float's sign */ ++ if (floatSign1) { ++ signbit = '-'; ++ } ++ else if (opt->sign) { ++ signbit = '+'; ++ } ++ ++ /* we only need to look at the first float for inf/nan */ ++ if (floatExponent1 == bitmask_u32(11)) { ++ return PrintInfNan(buffer, bufferSize, floatMantissa1, 13, signbit); ++ } ++ ++ /* else this is a number */ ++ ++ /* Factor the 1st value into its parts, see binary64 for comments. */ ++ if (floatExponent1 == 0) { ++ /* ++ * If the first number is a subnormal value, the 2nd has to be 0 for ++ * the float128 to be normalized, so we can ignore it. In this case ++ * the float128 only has the precision of a single binary64 value. ++ */ ++ mantissa1 = floatMantissa1; ++ exponent1 = 1 - 1023 - 52; ++ mantissaBit = LogBase2_64(mantissa1); ++ hasUnequalMargins = NPY_FALSE; ++ ++ BigInt_Set_uint64(&bigints[0], mantissa1); ++ } ++ else { ++ mantissa1 = (1ull << 52) | floatMantissa1; ++ exponent1 = floatExponent1 - 1023 - 52; ++ mantissaBit = 52 + 53; ++ ++ /* ++ * Computing hasUnequalMargins and mantissaBit: ++ * This is a little trickier than for IEEE formats. ++ * ++ * When both doubles are "normal" it is clearer since we can think of ++ * it as an IEEE type with a 106 bit mantissa. This value can never ++ * have "unequal" margins because of the implied 1 bit in the 2nd ++ * value. (unequal margins only happen when the mantissa has a value ++ * like "10000000000...", all zeros except the implied bit at the ++ * start, since the next lowest number has a different exponent). ++ * mantissaBits will always be 52+53 in this case. ++ * ++ * If the 1st number is a very small normal, and the 2nd is subnormal ++ * and not 2^52 times smaller, the number behaves like a subnormal ++ * overall, where the upper number just adds some bits on the left. ++ * Like usual subnormals, it has "equal" margins. The slightly tricky ++ * thing is that the number of mantissaBits varies. It will be 52 ++ * (from lower double) plus a variable number depending on the upper ++ * number's exponent. We recompute the number of bits in the shift ++ * calculation below, because the shift will be equal to the number of ++ * lost bits. ++ * ++ * We can get unequal margins only if the first value has all-0 ++ * mantissa (except implied bit), and the second value is exactly 0. As ++ * a special exception the smallest normal value (smallest exponent, 0 ++ * mantissa) should have equal margins, since it is "next to" a ++ * subnormal value. ++ */ ++ ++ /* factor the 2nd value into its parts */ ++ if (floatExponent2 != 0) { ++ mantissa2 = (1ull << 52) | floatMantissa2; ++ exponent2 = floatExponent2 - 1023 - 52; ++ hasUnequalMargins = NPY_FALSE; ++ } ++ else { ++ /* shift exp by one so that leading mantissa bit is still bit 53 */ ++ mantissa2 = floatMantissa2 << 1; ++ exponent2 = - 1023 - 52; ++ hasUnequalMargins = (floatExponent1 != 1) && (floatMantissa1 == 0) ++ && (floatMantissa2 == 0); ++ } ++ ++ /* ++ * The 2nd val's exponent might not be exactly 52 smaller than the 1st, ++ * it can vary a little bit. So do some shifting of the low mantissa, ++ * so that the total mantissa is equivalent to bits 53 to 0 of the ++ * first double immediately followed by bits 53 to 0 of the second. ++ */ ++ shift = exponent1 - exponent2 - 53; ++ if (shift > 0) { ++ /* shift more than 64 is undefined behavior */ ++ mantissa2 = shift < 64 ? mantissa2 >> shift : 0; ++ } ++ else if (shift < 0) { ++ /* ++ * This only happens if the 2nd value is subnormal. ++ * We expect that shift > -64, but check it anyway ++ */ ++ mantissa2 = -shift < 64 ? mantissa2 << -shift : 0; ++ } ++ ++ /* ++ * If the low double is a different sign from the high double, ++ * rearrange so that the total mantissa is the sum of the two ++ * mantissas, instead of a subtraction. ++ * hi - lo -> (hi-1) + (1-lo), where lo < 1 ++ */ ++ if (floatSign1 != floatSign2 && mantissa2 != 0) { ++ mantissa1--; ++ mantissa2 = (1ull << 53) - mantissa2; ++ } ++ ++ /* ++ * Compute the number of bits if we are in the subnormal range. ++ * The value "shift" happens to be exactly the number of lost bits. ++ * Also, shift the bits so that the least significant bit is at ++ * bit position 0, like a typical subnormal. After this exponent1 ++ * should always be 2^-1022 ++ */ ++ if (shift < 0) { ++ mantissa2 = (mantissa2 >> -shift) | (mantissa1 << (53 + shift)); ++ mantissa1 = mantissa1 >> -shift; ++ mantissaBit = mantissaBit -(-shift); ++ exponent1 -= shift; ++ DEBUG_ASSERT(exponent1 == -1022); ++ } ++ ++ /* ++ * set up the BigInt mantissa, by shifting the parts as needed ++ * We can use | instead of + since the mantissas should not overlap ++ */ ++ BigInt_Set_2x_uint64(&bigints[0], mantissa1 >> 11, ++ (mantissa1 << 53) | (mantissa2)); ++ exponent1 = exponent1 - 53; + } + +- return PyUString_FromString(repr); ++ return Format_floatbits(buffer, bufferSize, bigints, exponent1, ++ signbit, mantissaBit, hasUnequalMargins, opt); + } + ++#endif /* HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_LE | HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_BE */ ++ ++#endif /* NPY_FLOAT128 */ ++ ++ ++/* ++ * Here we define two Dragon4 entry functions for each type. One of them ++ * accepts the args in a Dragon4_Options struct for convenience, the ++ * other enumerates only the necessary parameters. ++ * ++ * Use a very large string buffer in case anyone tries to output a large number. ++ * 16384 should be enough to exactly print the integer part of any float128, ++ * which goes up to about 10^4932. The Dragon4_scratch struct provides a string ++ * buffer of this size. ++ */ ++#define make_dragon4_typefuncs_inner(Type, npy_type, format) \ ++\ ++PyObject *\ ++Dragon4_Positional_##Type##_opt(npy_type *val, Dragon4_Options *opt)\ ++{\ ++ PyObject *ret;\ ++ Dragon4_Scratch *scratch = get_dragon4_bigint_scratch();\ ++ if (scratch == NULL) {\ ++ return NULL;\ ++ }\ ++ if (Dragon4_PrintFloat_##format(scratch, val, opt) < 0) {\ ++ free_dragon4_bigint_scratch(scratch);\ ++ return NULL;\ ++ }\ ++ ret = PyUString_FromString(scratch->repr);\ ++ free_dragon4_bigint_scratch(scratch);\ ++ return ret;\ ++}\ ++\ ++PyObject *\ ++Dragon4_Positional_##Type(npy_type *val, DigitMode digit_mode,\ ++ CutoffMode cutoff_mode, int precision,\ ++ int sign, TrimMode trim, int pad_left, int pad_right)\ ++{\ ++ Dragon4_Options opt;\ ++ \ ++ opt.scientific = 0;\ ++ opt.digit_mode = digit_mode;\ ++ opt.cutoff_mode = cutoff_mode;\ ++ opt.precision = precision;\ ++ opt.sign = sign;\ ++ opt.trim_mode = trim;\ ++ opt.digits_left = pad_left;\ ++ opt.digits_right = pad_right;\ ++ opt.exp_digits = -1;\ ++\ ++ return Dragon4_Positional_##Type##_opt(val, &opt);\ ++}\ ++\ ++PyObject *\ ++Dragon4_Scientific_##Type##_opt(npy_type *val, Dragon4_Options *opt)\ ++{\ ++ PyObject *ret;\ ++ Dragon4_Scratch *scratch = get_dragon4_bigint_scratch();\ ++ if (scratch == NULL) {\ ++ return NULL;\ ++ }\ ++ if (Dragon4_PrintFloat_##format(scratch, val, opt) < 0) {\ ++ free_dragon4_bigint_scratch(scratch);\ ++ return NULL;\ ++ }\ ++ ret = PyUString_FromString(scratch->repr);\ ++ free_dragon4_bigint_scratch(scratch);\ ++ return ret;\ ++}\ ++PyObject *\ ++Dragon4_Scientific_##Type(npy_type *val, DigitMode digit_mode, int precision,\ ++ int sign, TrimMode trim, int pad_left, int exp_digits)\ ++{\ ++ Dragon4_Options opt;\ ++\ ++ opt.scientific = 1;\ ++ opt.digit_mode = digit_mode;\ ++ opt.cutoff_mode = CutoffMode_TotalLength;\ ++ opt.precision = precision;\ ++ opt.sign = sign;\ ++ opt.trim_mode = trim;\ ++ opt.digits_left = pad_left;\ ++ opt.digits_right = -1;\ ++ opt.exp_digits = exp_digits;\ ++\ ++ return Dragon4_Scientific_##Type##_opt(val, &opt);\ ++} ++ ++#define make_dragon4_typefuncs(Type, npy_type, format) \ ++ make_dragon4_typefuncs_inner(Type, npy_type, format) ++ ++make_dragon4_typefuncs(Half, npy_half, NPY_HALF_BINFMT_NAME) ++make_dragon4_typefuncs(Float, npy_float, NPY_FLOAT_BINFMT_NAME) ++make_dragon4_typefuncs(Double, npy_double, NPY_DOUBLE_BINFMT_NAME) ++make_dragon4_typefuncs(LongDouble, npy_longdouble, NPY_LONGDOUBLE_BINFMT_NAME) ++ ++#undef make_dragon4_typefuncs ++#undef make_dragon4_typefuncs_inner ++ + PyObject * + Dragon4_Positional(PyObject *obj, DigitMode digit_mode, CutoffMode cutoff_mode, + int precision, int sign, TrimMode trim, int pad_left, + int pad_right) + { +- double val; ++ npy_double val; ++ Dragon4_Options opt; ++ ++ opt.scientific = 0; ++ opt.digit_mode = digit_mode; ++ opt.cutoff_mode = cutoff_mode; ++ opt.precision = precision; ++ opt.sign = sign; ++ opt.trim_mode = trim; ++ opt.digits_left = pad_left; ++ opt.digits_right = pad_right; ++ opt.exp_digits = -1; + + if (PyArray_IsScalar(obj, Half)) { + npy_half x = ((PyHalfScalarObject *)obj)->obval; +- return Dragon4_Positional_AnySize(&x, sizeof(npy_half), +- digit_mode, cutoff_mode, precision, +- sign, trim, pad_left, pad_right); ++ return Dragon4_Positional_Half_opt(&x, &opt); + } + else if (PyArray_IsScalar(obj, Float)) { + npy_float x = ((PyFloatScalarObject *)obj)->obval; +- return Dragon4_Positional_AnySize(&x, sizeof(npy_float), +- digit_mode, cutoff_mode, precision, +- sign, trim, pad_left, pad_right); ++ return Dragon4_Positional_Float_opt(&x, &opt); + } + else if (PyArray_IsScalar(obj, Double)) { + npy_double x = ((PyDoubleScalarObject *)obj)->obval; +- return Dragon4_Positional_AnySize(&x, sizeof(npy_double), +- digit_mode, cutoff_mode, precision, +- sign, trim, pad_left, pad_right); ++ return Dragon4_Positional_Double_opt(&x, &opt); + } + else if (PyArray_IsScalar(obj, LongDouble)) { + npy_longdouble x = ((PyLongDoubleScalarObject *)obj)->obval; +- return Dragon4_Positional_AnySize(&x, sizeof(npy_longdouble), +- digit_mode, cutoff_mode, precision, +- sign, trim, pad_left, pad_right); ++ return Dragon4_Positional_LongDouble_opt(&x, &opt); + } + + val = PyFloat_AsDouble(obj); + if (PyErr_Occurred()) { + return NULL; + } +- return Dragon4_Positional_AnySize(&val, sizeof(double), +- digit_mode, cutoff_mode, precision, +- sign, trim, pad_left, pad_right); +-} +- +-PyObject * +-Dragon4_Scientific_AnySize(void *val, size_t size, DigitMode digit_mode, +- int precision, int sign, TrimMode trim, +- int pad_left, int exp_digits) +-{ +- /* use a very large buffer in case anyone tries to output a large precision */ +- static char repr[4096]; +- FloatVal128 val128; +-#ifdef NPY_FLOAT80 +- FloatUnion80 buf80;; +-#endif +-#ifdef NPY_FLOAT96 +- FloatUnion96 buf96; +-#endif +-#ifdef NPY_FLOAT128 +- FloatUnion128 buf128; +-#endif +- +- /* dummy, is ignored in scientific mode */ +- CutoffMode cutoff_mode = CutoffMode_TotalLength; +- +- switch (size) { +- case 2: +- Dragon4_PrintFloat16(repr, sizeof(repr), *(npy_float16*)val, +- 1, digit_mode, cutoff_mode, precision, sign, +- trim, pad_left, -1, exp_digits); +- break; +- case 4: +- Dragon4_PrintFloat32(repr, sizeof(repr), *(npy_float32*)val, +- 1, digit_mode, cutoff_mode, precision, sign, +- trim, pad_left, -1, exp_digits); +- break; +- case 8: +- Dragon4_PrintFloat64(repr, sizeof(repr), *(npy_float64*)val, +- 1, digit_mode, cutoff_mode, precision, sign, +- trim, pad_left, -1, exp_digits); +- break; +-#ifdef NPY_FLOAT80 +- case 10: +- buf80.floatingPoint = *(npy_float80*)val; +- val128.integer[0] = buf80.integer.a; +- val128.integer[1] = buf80.integer.b; +- Dragon4_PrintFloat128(repr, sizeof(repr), val128, +- 1, digit_mode, cutoff_mode, precision, sign, +- trim, pad_left, -1, exp_digits); +- break; +-#endif +-#ifdef NPY_FLOAT96 +- case 12: +- buf96.floatingPoint = *(npy_float96*)val; +- val128.integer[0] = buf96.integer.a; +- val128.integer[1] = buf96.integer.b; +- Dragon4_PrintFloat128(repr, sizeof(repr), val128, +- 1, digit_mode, cutoff_mode, precision, sign, +- trim, pad_left, -1, exp_digits); +- break; +-#endif +-#ifdef NPY_FLOAT128 +- case 16: +- buf128.floatingPoint = *(npy_float128*)val; +- val128.integer[0] = buf128.integer.a; +- val128.integer[1] = buf128.integer.b; +- Dragon4_PrintFloat128(repr, sizeof(repr), val128, +- 1, digit_mode, cutoff_mode, precision, sign, +- trim, pad_left, -1, exp_digits); +- break; +-#endif +- default: +- PyErr_Format(PyExc_ValueError, "unexpected itemsize %zu", size); +- return NULL; +- } +- +- return PyUString_FromString(repr); ++ return Dragon4_Positional_Double_opt(&val, &opt); + } + + PyObject * + Dragon4_Scientific(PyObject *obj, DigitMode digit_mode, int precision, + int sign, TrimMode trim, int pad_left, int exp_digits) + { +- double val; ++ npy_double val; ++ Dragon4_Options opt; ++ ++ opt.scientific = 1; ++ opt.digit_mode = digit_mode; ++ opt.cutoff_mode = CutoffMode_TotalLength; ++ opt.precision = precision; ++ opt.sign = sign; ++ opt.trim_mode = trim; ++ opt.digits_left = pad_left; ++ opt.digits_right = -1; ++ opt.exp_digits = exp_digits; + + if (PyArray_IsScalar(obj, Half)) { + npy_half x = ((PyHalfScalarObject *)obj)->obval; +- return Dragon4_Scientific_AnySize(&x, sizeof(npy_half), +- digit_mode, precision, +- sign, trim, pad_left, exp_digits); ++ return Dragon4_Scientific_Half_opt(&x, &opt); + } + else if (PyArray_IsScalar(obj, Float)) { + npy_float x = ((PyFloatScalarObject *)obj)->obval; +- return Dragon4_Scientific_AnySize(&x, sizeof(npy_float), +- digit_mode, precision, +- sign, trim, pad_left, exp_digits); ++ return Dragon4_Scientific_Float_opt(&x, &opt); + } + else if (PyArray_IsScalar(obj, Double)) { + npy_double x = ((PyDoubleScalarObject *)obj)->obval; +- return Dragon4_Scientific_AnySize(&x, sizeof(npy_double), +- digit_mode, precision, +- sign, trim, pad_left, exp_digits); ++ return Dragon4_Scientific_Double_opt(&x, &opt); + } + else if (PyArray_IsScalar(obj, LongDouble)) { + npy_longdouble x = ((PyLongDoubleScalarObject *)obj)->obval; +- return Dragon4_Scientific_AnySize(&x, sizeof(npy_longdouble), +- digit_mode, precision, +- sign, trim, pad_left, exp_digits); ++ return Dragon4_Scientific_LongDouble_opt(&x, &opt); + } + + val = PyFloat_AsDouble(obj); + if (PyErr_Occurred()) { + return NULL; + } +- return Dragon4_Scientific_AnySize(&val, sizeof(double), +- digit_mode, precision, +- sign, trim, pad_left, exp_digits); ++ return Dragon4_Scientific_Double_opt(&val, &opt); + } ++ ++#undef DEBUG_ASSERT +diff --git a/numpy/core/src/multiarray/dragon4.h b/numpy/core/src/multiarray/dragon4.h +index 5559c5157..2b8b4cef4 100644 +--- a/numpy/core/src/multiarray/dragon4.h ++++ b/numpy/core/src/multiarray/dragon4.h +@@ -40,6 +40,48 @@ + #include "npy_pycompat.h" + #include "numpy/arrayscalars.h" + ++/* Half binary format */ ++#define NPY_HALF_BINFMT_NAME IEEE_binary16 ++ ++/* Float binary format */ ++#if NPY_BITSOF_FLOAT == 32 ++ #define NPY_FLOAT_BINFMT_NAME IEEE_binary32 ++#elif NPY_BITSOF_FLOAT == 64 ++ #define NPY_FLOAT_BINFMT_NAME IEEE_binary64 ++#else ++ #error No float representation defined ++#endif ++ ++/* Double binary format */ ++#if NPY_BITSOF_DOUBLE == 32 ++ #define NPY_DOUBLE_BINFMT_NAME IEEE_binary32 ++#elif NPY_BITSOF_DOUBLE == 64 ++ #define NPY_DOUBLE_BINFMT_NAME IEEE_binary64 ++#else ++ #error No double representation defined ++#endif ++ ++/* LongDouble binary format */ ++#if defined(HAVE_LDOUBLE_IEEE_QUAD_BE) ++ #define NPY_LONGDOUBLE_BINFMT_NAME IEEE_binary128_be ++#elif defined(HAVE_LDOUBLE_IEEE_QUAD_LE) ++ #define NPY_LONGDOUBLE_BINFMT_NAME IEEE_binary128_le ++#elif (defined(HAVE_LDOUBLE_IEEE_DOUBLE_LE) || \ ++ defined(HAVE_LDOUBLE_IEEE_DOUBLE_BE)) ++ #define NPY_LONGDOUBLE_BINFMT_NAME IEEE_binary64 ++#elif defined(HAVE_LDOUBLE_INTEL_EXTENDED_12_BYTES_LE) ++ #define NPY_LONGDOUBLE_BINFMT_NAME Intel_extended96 ++#elif defined(HAVE_LDOUBLE_INTEL_EXTENDED_16_BYTES_LE) ++ #define NPY_LONGDOUBLE_BINFMT_NAME Intel_extended128 ++#elif defined(HAVE_LDOUBLE_MOTOROLA_EXTENDED_12_BYTES_BE) ++ #define NPY_LONGDOUBLE_BINFMT_NAME Motorola_extended96 ++#elif (defined(HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_LE) || \ ++ defined(HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_BE)) ++ #define NPY_LONGDOUBLE_BINFMT_NAME IBM_double_double ++#else ++ #error No long double representation defined ++#endif ++ + typedef enum DigitMode + { + /* Round digits to print shortest uniquely identifiable number. */ +@@ -64,15 +106,23 @@ typedef enum TrimMode + TrimMode_DptZeros, /* trim trailing zeros & trailing decimal point */ + } TrimMode; + +-PyObject * +-Dragon4_Positional_AnySize(void *val, size_t size, DigitMode digit_mode, +- CutoffMode cutoff_mode, int precision, int sign, +- TrimMode trim, int pad_left, int pad_right); ++#define make_dragon4_typedecl(Type, npy_type) \ ++ PyObject *\ ++ Dragon4_Positional_##Type(npy_type *val, DigitMode digit_mode,\ ++ CutoffMode cutoff_mode, int precision,\ ++ int sign, TrimMode trim, int pad_left,\ ++ int pad_right);\ ++ PyObject *\ ++ Dragon4_Scientific_##Type(npy_type *val, DigitMode digit_mode,\ ++ int precision, int sign, TrimMode trim,\ ++ int pad_left, int exp_digits); + +-PyObject * +-Dragon4_Scientific_AnySize(void *val, size_t size, DigitMode digit_mode, +- int precision, int sign, TrimMode trim, +- int pad_left, int pad_right); ++make_dragon4_typedecl(Half, npy_half) ++make_dragon4_typedecl(Float, npy_float) ++make_dragon4_typedecl(Double, npy_double) ++make_dragon4_typedecl(LongDouble, npy_longdouble) ++ ++#undef make_dragon4_typedecl + + PyObject * + Dragon4_Positional(PyObject *obj, DigitMode digit_mode, CutoffMode cutoff_mode, +diff --git a/numpy/core/src/multiarray/scalartypes.c.src b/numpy/core/src/multiarray/scalartypes.c.src +index c2d4bde66..42d613c90 100644 +--- a/numpy/core/src/multiarray/scalartypes.c.src ++++ b/numpy/core/src/multiarray/scalartypes.c.src +@@ -432,6 +432,7 @@ gentype_format(PyObject *self, PyObject *args) + + /**begin repeat + * #name = half, float, double, longdouble# ++ * #Name = Half, Float, Double, LongDouble# + * #NAME = HALF, FLOAT, DOUBLE, LONGDOUBLE# + * #type = npy_half, npy_float, npy_double, npy_longdouble# + * #suff = h, f, d, l# +@@ -443,12 +444,12 @@ format_@name@(@type@ val, npy_bool scientific, + int pad_left, int pad_right, int exp_digits) + { + if (scientific) { +- return Dragon4_Scientific_AnySize(&val, sizeof(@type@), ++ return Dragon4_Scientific_@Name@(&val, + DigitMode_Unique, precision, + sign, trim, pad_left, exp_digits); + } + else { +- return Dragon4_Positional_AnySize(&val, sizeof(@type@), ++ return Dragon4_Positional_@Name@(&val, + DigitMode_Unique, CutoffMode_TotalLength, precision, + sign, trim, pad_left, pad_right); + } +diff --git a/numpy/core/src/npymath/ieee754.c.src b/numpy/core/src/npymath/ieee754.c.src +index 0370ea6c7..170471f19 100644 +--- a/numpy/core/src/npymath/ieee754.c.src ++++ b/numpy/core/src/npymath/ieee754.c.src +@@ -183,6 +183,7 @@ static npy_longdouble _nextl(npy_longdouble x, int p) + { + npy_int64 hx,ihx,ilx; + npy_uint64 lx; ++ npy_longdouble u; + + GET_LDOUBLE_WORDS64(hx, lx, x); + ihx = hx & 0x7fffffffffffffffLL; /* |hx| */ +@@ -193,7 +194,6 @@ static npy_longdouble _nextl(npy_longdouble x, int p) + return x; /* signal the nan */ + } + if(ihx == 0 && ilx == 0) { /* x == 0 */ +- npy_longdouble u; + SET_LDOUBLE_WORDS64(x, p, 0ULL);/* return +-minsubnormal */ + u = x * x; + if (u == x) { +@@ -203,7 +203,6 @@ static npy_longdouble _nextl(npy_longdouble x, int p) + } + } + +- npy_longdouble u; + if(p < 0) { /* p < 0, x -= ulp */ + if((hx==0xffefffffffffffffLL)&&(lx==0xfc8ffffffffffffeLL)) + return x+x; /* overflow, return -inf */ +diff --git a/numpy/core/src/npymath/npy_math_private.h b/numpy/core/src/npymath/npy_math_private.h +index d75b9e991..e4a919db6 100644 +--- a/numpy/core/src/npymath/npy_math_private.h ++++ b/numpy/core/src/npymath/npy_math_private.h +@@ -287,8 +287,7 @@ do { \ + typedef npy_uint32 ldouble_man_t; + typedef npy_uint32 ldouble_exp_t; + typedef npy_uint32 ldouble_sign_t; +-#elif defined(HAVE_LDOUBLE_IEEE_DOUBLE_16_BYTES_BE) || \ +- defined(HAVE_LDOUBLE_IEEE_DOUBLE_BE) ++#elif defined(HAVE_LDOUBLE_IEEE_DOUBLE_BE) + /* 64 bits IEEE double precision aligned on 16 bytes: used by ppc arch on + * Mac OS X */ + +@@ -435,8 +434,8 @@ do { \ + typedef npy_uint32 ldouble_sign_t; + #endif + +-#if !defined(HAVE_LDOUBLE_DOUBLE_DOUBLE_BE) && \ +- !defined(HAVE_LDOUBLE_DOUBLE_DOUBLE_LE) ++#if !defined(HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_BE) && \ ++ !defined(HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_LE) + /* Get the sign bit of x. x should be of type IEEEl2bitsrep */ + #define GET_LDOUBLE_SIGN(x) \ + (((x).a[LDBL_SIGN_INDEX] & LDBL_SIGN_MASK) >> LDBL_SIGN_SHIFT) +@@ -477,7 +476,7 @@ do { \ + ((x).a[LDBL_MANH_INDEX] & ~LDBL_MANH_MASK) | \ + (((IEEEl2bitsrep_part)(v) << LDBL_MANH_SHIFT) & LDBL_MANH_MASK)) + +-#endif /* #ifndef HAVE_LDOUBLE_DOUBLE_DOUBLE_BE */ ++#endif /* !HAVE_LDOUBLE_DOUBLE_DOUBLE_* */ + + /* + * Those unions are used to convert a pointer of npy_cdouble to native C99 +diff --git a/numpy/core/src/private/npy_config.h b/numpy/core/src/private/npy_config.h +index 107b3cb5b..8143e7719 100644 +--- a/numpy/core/src/private/npy_config.h ++++ b/numpy/core/src/private/npy_config.h +@@ -15,7 +15,8 @@ + * amd64 is not harmed much by the bloat as the system provides 16 byte + * alignment by default. + */ +-#if (defined NPY_CPU_X86 || defined _WIN32) ++#if (defined NPY_CPU_X86 || defined _WIN32 || defined NPY_CPU_ARMEL_AARCH32 ||\ ++ defined NPY_CPU_ARMEB_AARCH32) + #define NPY_MAX_COPY_ALIGNMENT 8 + #else + #define NPY_MAX_COPY_ALIGNMENT 16 +diff --git a/numpy/core/src/private/npy_fpmath.h b/numpy/core/src/private/npy_fpmath.h +index 86b9cf3da..dbb3fb23d 100644 +--- a/numpy/core/src/private/npy_fpmath.h ++++ b/numpy/core/src/private/npy_fpmath.h +@@ -7,45 +7,24 @@ + #include "numpy/npy_cpu.h" + #include "numpy/npy_common.h" + +-#ifdef NPY_OS_DARWIN +- /* This hardcoded logic is fragile, but universal builds makes it +- * difficult to detect arch-specific features */ +- +- /* MAC OS X < 10.4 and gcc < 4 does not support proper long double, and +- * is the same as double on those platforms */ +- #if NPY_BITSOF_LONGDOUBLE == NPY_BITSOF_DOUBLE +- /* This assumes that FPU and ALU have the same endianness */ +- #if NPY_BYTE_ORDER == NPY_LITTLE_ENDIAN +- #define HAVE_LDOUBLE_IEEE_DOUBLE_LE +- #elif NPY_BYTE_ORDER == NPY_BIG_ENDIAN +- #define HAVE_LDOUBLE_IEEE_DOUBLE_BE +- #else +- #error Endianness undefined ? +- #endif +- #else +- #if defined(NPY_CPU_X86) +- #define HAVE_LDOUBLE_INTEL_EXTENDED_12_BYTES_LE +- #elif defined(NPY_CPU_AMD64) +- #define HAVE_LDOUBLE_INTEL_EXTENDED_16_BYTES_LE +- #elif defined(NPY_CPU_PPC) || defined(NPY_CPU_PPC64) +- #define HAVE_LDOUBLE_IEEE_DOUBLE_16_BYTES_BE +- #elif defined(NPY_CPU_PPC64LE) +- #define HAVE_LDOUBLE_IEEE_DOUBLE_16_BYTES_LE +- #endif +- #endif +-#endif +- + #if !(defined(HAVE_LDOUBLE_IEEE_QUAD_BE) || \ + defined(HAVE_LDOUBLE_IEEE_QUAD_LE) || \ + defined(HAVE_LDOUBLE_IEEE_DOUBLE_LE) || \ + defined(HAVE_LDOUBLE_IEEE_DOUBLE_BE) || \ +- defined(HAVE_LDOUBLE_IEEE_DOUBLE_16_BYTES_BE) || \ + defined(HAVE_LDOUBLE_INTEL_EXTENDED_16_BYTES_LE) || \ + defined(HAVE_LDOUBLE_INTEL_EXTENDED_12_BYTES_LE) || \ + defined(HAVE_LDOUBLE_MOTOROLA_EXTENDED_12_BYTES_BE) || \ +- defined(HAVE_LDOUBLE_DOUBLE_DOUBLE_BE) || \ +- defined(HAVE_LDOUBLE_DOUBLE_DOUBLE_LE)) ++ defined(HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_BE) || \ ++ defined(HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_LE)) + #error No long double representation defined + #endif + ++/* for back-compat, also keep old name for double-double */ ++#ifdef HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_LE ++ #define HAVE_LDOUBLE_DOUBLE_DOUBLE_LE ++#endif ++#ifdef HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_BE ++ #define HAVE_LDOUBLE_DOUBLE_DOUBLE_BE ++#endif ++ + #endif +diff --git a/numpy/core/tests/test_scalarprint.py b/numpy/core/tests/test_scalarprint.py +index 61774617d..10fbb8b74 100644 +--- a/numpy/core/tests/test_scalarprint.py ++++ b/numpy/core/tests/test_scalarprint.py +@@ -5,8 +5,9 @@ + from __future__ import division, absolute_import, print_function + + import numpy as np +-from numpy.testing import assert_, assert_equal, run_module_suite ++from numpy.testing import assert_, assert_equal, run_module_suite, dec + import sys, tempfile ++import platform + + + class TestRealScalars(object): +@@ -217,6 +218,66 @@ class TestRealScalars(object): + "1.2" if tp != np.float16 else "1.2002") + assert_equal(fpos(tp('1.'), trim='-'), "1") + ++ @dec.skipif(not platform.machine().startswith("ppc64"), ++ "only applies to ppc float128 values") ++ def test_ppc64_ibm_double_double128(self): ++ # check that the precision decreases once we get into the subnormal ++ # range. Unlike float64, this starts around 1e-292 instead of 1e-308, ++ # which happens when the first double is normal and the second is ++ # subnormal. ++ x = np.float128('2.123123123123123123123123123123123e-286') ++ got = [str(x/np.float128('2e' + str(i))) for i in range(0,40)] ++ expected = [ ++ "1.06156156156156156156156156156157e-286", ++ "1.06156156156156156156156156156158e-287", ++ "1.06156156156156156156156156156159e-288", ++ "1.0615615615615615615615615615616e-289", ++ "1.06156156156156156156156156156157e-290", ++ "1.06156156156156156156156156156156e-291", ++ "1.0615615615615615615615615615616e-292", ++ "1.0615615615615615615615615615615e-293", ++ "1.061561561561561561561561561562e-294", ++ "1.06156156156156156156156156155e-295", ++ "1.0615615615615615615615615616e-296", ++ "1.06156156156156156156156156e-297", ++ "1.06156156156156156156156157e-298", ++ "1.0615615615615615615615616e-299", ++ "1.06156156156156156156156e-300", ++ "1.06156156156156156156155e-301", ++ "1.0615615615615615615616e-302", ++ "1.061561561561561561562e-303", ++ "1.06156156156156156156e-304", ++ "1.0615615615615615618e-305", ++ "1.06156156156156156e-306", ++ "1.06156156156156157e-307", ++ "1.0615615615615616e-308", ++ "1.06156156156156e-309", ++ "1.06156156156157e-310", ++ "1.0615615615616e-311", ++ "1.06156156156e-312", ++ "1.06156156154e-313", ++ "1.0615615616e-314", ++ "1.06156156e-315", ++ "1.06156155e-316", ++ "1.061562e-317", ++ "1.06156e-318", ++ "1.06155e-319", ++ "1.0617e-320", ++ "1.06e-321", ++ "1.04e-322", ++ "1e-323", ++ "0.0", ++ "0.0"] ++ assert_equal(got, expected) ++ ++ # Note: we follow glibc behavior, but it (or gcc) might not be right. ++ # In particular we can get two values that print the same but are not ++ # equal: ++ a = np.float128('2')/np.float128('3') ++ b = np.float128(str(a)) ++ assert_equal(str(a), str(b)) ++ assert_(a != b) ++ + def float32_roundtrip(self): + # gh-9360 + x = np.float32(1024 - 2**-14) +diff --git a/numpy/core/tests/test_umath.py b/numpy/core/tests/test_umath.py +index bebeddc92..09d1bbeaf 100644 +--- a/numpy/core/tests/test_umath.py ++++ b/numpy/core/tests/test_umath.py +@@ -2515,8 +2515,10 @@ def test_nextafter(): + def test_nextafterf(): + return _test_nextafter(np.float32) + +-@dec.knownfailureif(sys.platform == 'win32', +- "Long double support buggy on win32, ticket 1664.") ++@dec.skipif(np.finfo(np.double) == np.finfo(np.longdouble), ++ "long double is same as double") ++@dec.knownfailureif(platform.machine().startswith("ppc64"), ++ "IBM double double") + def test_nextafterl(): + return _test_nextafter(np.longdouble) + +@@ -2544,8 +2546,10 @@ def test_spacing(): + def test_spacingf(): + return _test_spacing(np.float32) + +-@dec.knownfailureif(sys.platform == 'win32', +- "Long double support buggy on win32, ticket 1664.") ++@dec.skipif(np.finfo(np.double) == np.finfo(np.longdouble), ++ "long double is same as double") ++@dec.knownfailureif(platform.machine().startswith("ppc64"), ++ "IBM double double") + def test_spacingl(): + return _test_spacing(np.longdouble) + +diff --git a/numpy/ma/tests/test_core.py b/numpy/ma/tests/test_core.py +index 708c12e8f..1b4702142 100644 +--- a/numpy/ma/tests/test_core.py ++++ b/numpy/ma/tests/test_core.py +@@ -3278,18 +3278,13 @@ class TestMaskedArrayMethods(object): + assert_equal(test.mask, mask_first.mask) + + # Test sort on dtype with subarray (gh-8069) ++ # Just check that the sort does not error, structured array subarrays ++ # are treated as byte strings and that leads to differing behavior ++ # depending on endianess and `endwith`. + dt = np.dtype([('v', int, 2)]) + a = a.view(dt) +- mask_last = mask_last.view(dt) +- mask_first = mask_first.view(dt) +- + test = sort(a) +- assert_equal(test, mask_last) +- assert_equal(test.mask, mask_last.mask) +- + test = sort(a, endwith=False) +- assert_equal(test, mask_first) +- assert_equal(test.mask, mask_first.mask) + + def test_argsort(self): + # Test argsort +-- +2.17.2 + diff --git a/SPECS/numpy.spec b/SPECS/numpy.spec index d8ed3bc..be82ceb 100644 --- a/SPECS/numpy.spec +++ b/SPECS/numpy.spec @@ -1,6 +1,6 @@ Name: numpy Version: 1.14.3 -Release: 6%{?dist} +Release: 9%{?dist} Epoch: 1 Summary: A fast multidimensional array facility for Python @@ -10,6 +10,8 @@ URL: http://www.numpy.org/ Source0: https://github.com/%{name}/%{name}/releases/download/v%{version}/%{name}-%{version}.tar.gz Source1: https://docs.scipy.org/doc/numpy/numpy-html-1.14.2.zip Patch0: f2py-setup.patch +Patch1: numpy-1.14.3-float128.patch +Patch2: numpy-1.14.3-CVE-2019-6446.patch BuildRequires: python3-devel python3-setuptools python3-nose python3-Cython BuildRequires: lapack-devel gcc-gfortran @@ -81,6 +83,8 @@ This package provides the complete documentation for NumPy. %prep %setup -q -n %{name}-%{version}%{?relc} %patch0 -p1 +%patch1 -p1 +%patch2 -p1 # workaround for rhbz#849713 # http://mail.scipy.org/pipermail/numpy-discussion/2012-July/063530.html @@ -194,6 +198,18 @@ fi %changelog +* Wed Jun 05 2019 Nikola Forró - 1:1.14.3-9 +- Fix CVE-2019-6446 + resolves: #1668466 + +* Mon Jun 03 2019 Nikola Forró - 1:1.14.3-8 +- Rebuild with fixed gating.yaml + related: #1687873 + +* Wed Mar 20 2019 Nikola Forró - 1:1.14.3-7 +- Fix broken float128 on all arches except x86_64 + resolves: #1687873 + * Mon Oct 01 2018 Lumír Balhar - 1:1.14.3-6 - Add alternatives for the f2py exacutable - Resolves: rhbz#1633548