diff --git a/.gcc.metadata b/.gcc.metadata new file mode 100644 index 0000000..bc0d5cc --- /dev/null +++ b/.gcc.metadata @@ -0,0 +1,4 @@ +85f620a26aabf6a934c44ca40a9799af0952f863 SOURCES/cloog-0.18.0.tar.gz +8263777994323f58bb81869354fb18c6ca32f88a SOURCES/fastjar-0.97.tar.gz +500237a6ba14b8a56751f57e5957b40cefa9cb01 SOURCES/gcc-4.8.5-20150702.tar.bz2 +d7936929c3937e03f09b64c3c54e49422fa8ddb3 SOURCES/isl-0.11.1.tar.bz2 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..388b5e1 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +SOURCES/cloog-0.18.0.tar.gz +SOURCES/fastjar-0.97.tar.gz +SOURCES/gcc-4.8.5-20150702.tar.bz2 +SOURCES/isl-0.11.1.tar.bz2 diff --git a/README.md b/README.md deleted file mode 100644 index 0e7897f..0000000 --- a/README.md +++ /dev/null @@ -1,5 +0,0 @@ -The master branch has no content - -Look at the c7 branch if you are working with CentOS-7, or the c4/c5/c6 branch for CentOS-4, 5 or 6 - -If you find this file in a distro specific branch, it means that no content has been checked in yet diff --git a/SOURCES/cloog-0.18.0-ppc64le-config.patch b/SOURCES/cloog-0.18.0-ppc64le-config.patch new file mode 100644 index 0000000..5538a24 --- /dev/null +++ b/SOURCES/cloog-0.18.0-ppc64le-config.patch @@ -0,0 +1,717 @@ +--- cloog-0.18.0/m4/libtool.m4.jj 2014-04-10 14:29:12.547772844 -0700 ++++ cloog-0.18.0/m4/libtool.m4 2014-04-10 14:33:01.848642792 -0700 +@@ -1312,7 +1312,7 @@ + rm -rf conftest* + ;; + +-x86_64-*kfreebsd*-gnu|x86_64-*linux*|ppc*-*linux*|powerpc*-*linux*| \ ++x86_64-*kfreebsd*-gnu|x86_64-*linux*|powerpc*-*linux*| \ + s390*-*linux*|s390*-*tpf*|sparc*-*linux*) + # Find out which ABI we are using. + echo 'int i;' > conftest.$ac_ext +@@ -1326,7 +1326,10 @@ + x86_64-*linux*) + LD="${LD-ld} -m elf_i386" + ;; +- ppc64-*linux*|powerpc64-*linux*) ++ powerpc64le-*linux*) ++ LD="${LD-ld} -m elf32lppclinux" ++ ;; ++ powerpc64-*linux*) + LD="${LD-ld} -m elf32ppclinux" + ;; + s390x-*linux*) +@@ -1345,7 +1348,10 @@ + x86_64-*linux*) + LD="${LD-ld} -m elf_x86_64" + ;; +- ppc*-*linux*|powerpc*-*linux*) ++ powerpcle-*linux*) ++ LD="${LD-ld} -m elf64lppc" ++ ;; ++ powerpc-*linux*) + LD="${LD-ld} -m elf64ppc" + ;; + s390*-*linux*|s390*-*tpf*) +--- cloog-0.18.0/autoconf/config.sub.jj 2013-10-11 09:27:45.000000000 +0200 ++++ cloog-0.18.0/autoconf/config.sub 2014-04-10 10:38:39.000000000 +0200 +@@ -1,24 +1,18 @@ + #! /bin/sh + # Configuration validation subroutine script. +-# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, +-# 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, +-# 2011, 2012 Free Software Foundation, Inc. +- +-timestamp='2012-04-18' +- +-# This file is (in principle) common to ALL GNU software. +-# The presence of a machine in this file suggests that SOME GNU software +-# can handle that machine. It does not imply ALL GNU software can. +-# +-# This file is free software; you can redistribute it and/or modify +-# it under the terms of the GNU General Public License as published by +-# the Free Software Foundation; either version 2 of the License, or ++# Copyright 1992-2013 Free Software Foundation, Inc. ++ ++timestamp='2013-10-01' ++ ++# This file is free software; you can redistribute it and/or modify it ++# under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 3 of the License, or + # (at your option) any later version. + # +-# This program is distributed in the hope that it will be useful, +-# but WITHOUT ANY WARRANTY; without even the implied warranty of +-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +-# GNU General Public License for more details. ++# This program is distributed in the hope that it will be useful, but ++# WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++# General Public License for more details. + # + # You should have received a copy of the GNU General Public License + # along with this program; if not, see . +@@ -26,11 +20,12 @@ timestamp='2012-04-18' + # As a special exception to the GNU General Public License, if you + # distribute this file as part of a program that contains a + # configuration script generated by Autoconf, you may include it under +-# the same distribution terms that you use for the rest of that program. ++# the same distribution terms that you use for the rest of that ++# program. This Exception is an additional permission under section 7 ++# of the GNU General Public License, version 3 ("GPLv3"). + + +-# Please send patches to . Submit a context +-# diff and a properly formatted GNU ChangeLog entry. ++# Please send patches with a ChangeLog entry to config-patches@gnu.org. + # + # Configuration subroutine to validate and canonicalize a configuration type. + # Supply the specified configuration type as an argument. +@@ -73,9 +68,7 @@ Report bugs and patches to and include a ChangeLog +-# entry. ++# the same distribution terms that you use for the rest of that ++# program. This Exception is an additional permission under section 7 ++# of the GNU General Public License, version 3 ("GPLv3"). + # +-# This script attempts to guess a canonical system name similar to +-# config.sub. If it succeeds, it prints the system name on stdout, and +-# exits with 0. Otherwise, it exits with 1. ++# Originally written by Per Bothner. + # + # You can get the latest version of this script from: + # http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess;hb=HEAD ++# ++# Please send patches with a ChangeLog entry to config-patches@gnu.org. ++ + + me=`echo "$0" | sed -e 's,.*/,,'` + +@@ -54,9 +50,7 @@ version="\ + GNU config.guess ($timestamp) + + Originally written by Per Bothner. +-Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, +-2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 +-Free Software Foundation, Inc. ++Copyright 1992-2013 Free Software Foundation, Inc. + + This is free software; see the source for copying conditions. There is NO + warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." +@@ -138,6 +132,27 @@ UNAME_RELEASE=`(uname -r) 2>/dev/null` | + UNAME_SYSTEM=`(uname -s) 2>/dev/null` || UNAME_SYSTEM=unknown + UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown + ++case "${UNAME_SYSTEM}" in ++Linux|GNU|GNU/*) ++ # If the system lacks a compiler, then just pick glibc. ++ # We could probably try harder. ++ LIBC=gnu ++ ++ eval $set_cc_for_build ++ cat <<-EOF > $dummy.c ++ #include ++ #if defined(__UCLIBC__) ++ LIBC=uclibc ++ #elif defined(__dietlibc__) ++ LIBC=dietlibc ++ #else ++ LIBC=gnu ++ #endif ++ EOF ++ eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^LIBC'` ++ ;; ++esac ++ + # Note: order is significant - the case branches are not exclusive. + + case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in +@@ -200,6 +215,10 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:$ + # CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used. + echo "${machine}-${os}${release}" + exit ;; ++ *:Bitrig:*:*) ++ UNAME_MACHINE_ARCH=`arch | sed 's/Bitrig.//'` ++ echo ${UNAME_MACHINE_ARCH}-unknown-bitrig${UNAME_RELEASE} ++ exit ;; + *:OpenBSD:*:*) + UNAME_MACHINE_ARCH=`arch | sed 's/OpenBSD.//'` + echo ${UNAME_MACHINE_ARCH}-unknown-openbsd${UNAME_RELEASE} +@@ -302,7 +321,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:$ + arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*) + echo arm-acorn-riscix${UNAME_RELEASE} + exit ;; +- arm:riscos:*:*|arm:RISCOS:*:*) ++ arm*:riscos:*:*|arm*:RISCOS:*:*) + echo arm-unknown-riscos + exit ;; + SR2?01:HI-UX/MPP:*:* | SR8000:HI-UX/MPP:*:*) +@@ -801,6 +820,9 @@ EOF + i*:CYGWIN*:*) + echo ${UNAME_MACHINE}-pc-cygwin + exit ;; ++ *:MINGW64*:*) ++ echo ${UNAME_MACHINE}-pc-mingw64 ++ exit ;; + *:MINGW*:*) + echo ${UNAME_MACHINE}-pc-mingw32 + exit ;; +@@ -852,21 +874,21 @@ EOF + exit ;; + *:GNU:*:*) + # the GNU system +- echo `echo ${UNAME_MACHINE}|sed -e 's,[-/].*$,,'`-unknown-gnu`echo ${UNAME_RELEASE}|sed -e 's,/.*$,,'` ++ echo `echo ${UNAME_MACHINE}|sed -e 's,[-/].*$,,'`-unknown-${LIBC}`echo ${UNAME_RELEASE}|sed -e 's,/.*$,,'` + exit ;; + *:GNU/*:*:*) + # other systems with GNU libc and userland +- echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr '[A-Z]' '[a-z]'``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-gnu ++ echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr '[A-Z]' '[a-z]'``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-${LIBC} + exit ;; + i*86:Minix:*:*) + echo ${UNAME_MACHINE}-pc-minix + exit ;; + aarch64:Linux:*:*) +- echo ${UNAME_MACHINE}-unknown-linux-gnu ++ echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + aarch64_be:Linux:*:*) + UNAME_MACHINE=aarch64_be +- echo ${UNAME_MACHINE}-unknown-linux-gnu ++ echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + alpha:Linux:*:*) + case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in +@@ -879,59 +901,54 @@ EOF + EV68*) UNAME_MACHINE=alphaev68 ;; + esac + objdump --private-headers /bin/sh | grep -q ld.so.1 +- if test "$?" = 0 ; then LIBC="libc1" ; else LIBC="" ; fi +- echo ${UNAME_MACHINE}-unknown-linux-gnu${LIBC} ++ if test "$?" = 0 ; then LIBC="gnulibc1" ; fi ++ echo ${UNAME_MACHINE}-unknown-linux-${LIBC} ++ exit ;; ++ arc:Linux:*:* | arceb:Linux:*:*) ++ echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + arm*:Linux:*:*) + eval $set_cc_for_build + if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \ + | grep -q __ARM_EABI__ + then +- echo ${UNAME_MACHINE}-unknown-linux-gnu ++ echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + else + if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \ + | grep -q __ARM_PCS_VFP + then +- echo ${UNAME_MACHINE}-unknown-linux-gnueabi ++ echo ${UNAME_MACHINE}-unknown-linux-${LIBC}eabi + else +- echo ${UNAME_MACHINE}-unknown-linux-gnueabihf ++ echo ${UNAME_MACHINE}-unknown-linux-${LIBC}eabihf + fi + fi + exit ;; + avr32*:Linux:*:*) +- echo ${UNAME_MACHINE}-unknown-linux-gnu ++ echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + cris:Linux:*:*) +- echo ${UNAME_MACHINE}-axis-linux-gnu ++ echo ${UNAME_MACHINE}-axis-linux-${LIBC} + exit ;; + crisv32:Linux:*:*) +- echo ${UNAME_MACHINE}-axis-linux-gnu ++ echo ${UNAME_MACHINE}-axis-linux-${LIBC} + exit ;; + frv:Linux:*:*) +- echo ${UNAME_MACHINE}-unknown-linux-gnu ++ echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + hexagon:Linux:*:*) +- echo ${UNAME_MACHINE}-unknown-linux-gnu ++ echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + i*86:Linux:*:*) +- LIBC=gnu +- eval $set_cc_for_build +- sed 's/^ //' << EOF >$dummy.c +- #ifdef __dietlibc__ +- LIBC=dietlibc +- #endif +-EOF +- eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^LIBC'` +- echo "${UNAME_MACHINE}-pc-linux-${LIBC}" ++ echo ${UNAME_MACHINE}-pc-linux-${LIBC} + exit ;; + ia64:Linux:*:*) +- echo ${UNAME_MACHINE}-unknown-linux-gnu ++ echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + m32r*:Linux:*:*) +- echo ${UNAME_MACHINE}-unknown-linux-gnu ++ echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + m68*:Linux:*:*) +- echo ${UNAME_MACHINE}-unknown-linux-gnu ++ echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + mips:Linux:*:* | mips64:Linux:*:*) + eval $set_cc_for_build +@@ -950,54 +967,63 @@ EOF + #endif + EOF + eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^CPU'` +- test x"${CPU}" != x && { echo "${CPU}-unknown-linux-gnu"; exit; } ++ test x"${CPU}" != x && { echo "${CPU}-unknown-linux-${LIBC}"; exit; } + ;; ++ or1k:Linux:*:*) ++ echo ${UNAME_MACHINE}-unknown-linux-${LIBC} ++ exit ;; + or32:Linux:*:*) +- echo ${UNAME_MACHINE}-unknown-linux-gnu ++ echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + padre:Linux:*:*) +- echo sparc-unknown-linux-gnu ++ echo sparc-unknown-linux-${LIBC} + exit ;; + parisc64:Linux:*:* | hppa64:Linux:*:*) +- echo hppa64-unknown-linux-gnu ++ echo hppa64-unknown-linux-${LIBC} + exit ;; + parisc:Linux:*:* | hppa:Linux:*:*) + # Look for CPU level + case `grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2` in +- PA7*) echo hppa1.1-unknown-linux-gnu ;; +- PA8*) echo hppa2.0-unknown-linux-gnu ;; +- *) echo hppa-unknown-linux-gnu ;; ++ PA7*) echo hppa1.1-unknown-linux-${LIBC} ;; ++ PA8*) echo hppa2.0-unknown-linux-${LIBC} ;; ++ *) echo hppa-unknown-linux-${LIBC} ;; + esac + exit ;; + ppc64:Linux:*:*) +- echo powerpc64-unknown-linux-gnu ++ echo powerpc64-unknown-linux-${LIBC} + exit ;; + ppc:Linux:*:*) +- echo powerpc-unknown-linux-gnu ++ echo powerpc-unknown-linux-${LIBC} ++ exit ;; ++ ppc64le:Linux:*:*) ++ echo powerpc64le-unknown-linux-${LIBC} ++ exit ;; ++ ppcle:Linux:*:*) ++ echo powerpcle-unknown-linux-${LIBC} + exit ;; + s390:Linux:*:* | s390x:Linux:*:*) +- echo ${UNAME_MACHINE}-ibm-linux ++ echo ${UNAME_MACHINE}-ibm-linux-${LIBC} + exit ;; + sh64*:Linux:*:*) +- echo ${UNAME_MACHINE}-unknown-linux-gnu ++ echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + sh*:Linux:*:*) +- echo ${UNAME_MACHINE}-unknown-linux-gnu ++ echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + sparc:Linux:*:* | sparc64:Linux:*:*) +- echo ${UNAME_MACHINE}-unknown-linux-gnu ++ echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + tile*:Linux:*:*) +- echo ${UNAME_MACHINE}-unknown-linux-gnu ++ echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + vax:Linux:*:*) +- echo ${UNAME_MACHINE}-dec-linux-gnu ++ echo ${UNAME_MACHINE}-dec-linux-${LIBC} + exit ;; + x86_64:Linux:*:*) +- echo ${UNAME_MACHINE}-unknown-linux-gnu ++ echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + xtensa*:Linux:*:*) +- echo ${UNAME_MACHINE}-unknown-linux-gnu ++ echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + i*86:DYNIX/ptx:4*:*) + # ptx 4.0 does uname -s correctly, with DYNIX/ptx in there. +@@ -1201,6 +1227,9 @@ EOF + BePC:Haiku:*:*) # Haiku running on Intel PC compatible. + echo i586-pc-haiku + exit ;; ++ x86_64:Haiku:*:*) ++ echo x86_64-unknown-haiku ++ exit ;; + SX-4:SUPER-UX:*:*) + echo sx4-nec-superux${UNAME_RELEASE} + exit ;; +@@ -1227,19 +1256,21 @@ EOF + exit ;; + *:Darwin:*:*) + UNAME_PROCESSOR=`uname -p` || UNAME_PROCESSOR=unknown +- case $UNAME_PROCESSOR in +- i386) +- eval $set_cc_for_build +- if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then +- if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \ +- (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \ +- grep IS_64BIT_ARCH >/dev/null +- then +- UNAME_PROCESSOR="x86_64" +- fi +- fi ;; +- unknown) UNAME_PROCESSOR=powerpc ;; +- esac ++ eval $set_cc_for_build ++ if test "$UNAME_PROCESSOR" = unknown ; then ++ UNAME_PROCESSOR=powerpc ++ fi ++ if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then ++ if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \ ++ (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \ ++ grep IS_64BIT_ARCH >/dev/null ++ then ++ case $UNAME_PROCESSOR in ++ i386) UNAME_PROCESSOR=x86_64 ;; ++ powerpc) UNAME_PROCESSOR=powerpc64 ;; ++ esac ++ fi ++ fi + echo ${UNAME_PROCESSOR}-apple-darwin${UNAME_RELEASE} + exit ;; + *:procnto*:*:* | *:QNX:[0123456789]*:*) +@@ -1256,7 +1287,7 @@ EOF + NEO-?:NONSTOP_KERNEL:*:*) + echo neo-tandem-nsk${UNAME_RELEASE} + exit ;; +- NSE-?:NONSTOP_KERNEL:*:*) ++ NSE-*:NONSTOP_KERNEL:*:*) + echo nse-tandem-nsk${UNAME_RELEASE} + exit ;; + NSR-?:NONSTOP_KERNEL:*:*) +@@ -1330,9 +1361,6 @@ EOF + exit ;; + esac + +-#echo '(No uname command or uname output not recognized.)' 1>&2 +-#echo "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" 1>&2 +- + eval $set_cc_for_build + cat >$dummy.c < conftest.$ac_ext +@@ -1326,7 +1326,10 @@ + x86_64-*linux*) + LD="${LD-ld} -m elf_i386" + ;; +- ppc64-*linux*|powerpc64-*linux*) ++ powerpc64le-*linux*) ++ LD="${LD-ld} -m elf32lppclinux" ++ ;; ++ powerpc64-*linux*) + LD="${LD-ld} -m elf32ppclinux" + ;; + s390x-*linux*) +@@ -1345,7 +1348,10 @@ + x86_64-*linux*) + LD="${LD-ld} -m elf_x86_64" + ;; +- ppc*-*linux*|powerpc*-*linux*) ++ powerpcle-*linux*) ++ LD="${LD-ld} -m elf64lppc" ++ ;; ++ powerpc-*linux*) + LD="${LD-ld} -m elf64ppc" + ;; + s390*-*linux*|s390*-*tpf*) diff --git a/SOURCES/fastjar-0.97-aarch64-config.patch b/SOURCES/fastjar-0.97-aarch64-config.patch new file mode 100644 index 0000000..d340abc --- /dev/null +++ b/SOURCES/fastjar-0.97-aarch64-config.patch @@ -0,0 +1,1536 @@ +--- fastjar-0.97/config.guess ++++ fastjar-0.97/config.guess +@@ -1,10 +1,10 @@ + #! /bin/sh + # Attempt to guess a canonical system name. + # Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, +-# 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 +-# Free Software Foundation, Inc. ++# 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, ++# 2011, 2012 Free Software Foundation, Inc. + +-timestamp='2008-01-23' ++timestamp='2012-06-10' + + # This file is free software; you can redistribute it and/or modify it + # under the terms of the GNU General Public License as published by +@@ -17,9 +17,7 @@ timestamp='2008-01-23' + # General Public License for more details. + # + # You should have received a copy of the GNU General Public License +-# along with this program; if not, write to the Free Software +-# Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA +-# 02110-1301, USA. ++# along with this program; if not, see . + # + # As a special exception to the GNU General Public License, if you + # distribute this file as part of a program that contains a +@@ -27,16 +25,16 @@ timestamp='2008-01-23' + # the same distribution terms that you use for the rest of that program. + + +-# Originally written by Per Bothner . +-# Please send patches to . Submit a context +-# diff and a properly formatted ChangeLog entry. ++# Originally written by Per Bothner. Please send patches (context ++# diff format) to and include a ChangeLog ++# entry. + # + # This script attempts to guess a canonical system name similar to + # config.sub. If it succeeds, it prints the system name on stdout, and + # exits with 0. Otherwise, it exits with 1. + # +-# The plan is that this can be called by configure scripts if you +-# don't specify an explicit build system type. ++# You can get the latest version of this script from: ++# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess;hb=HEAD + + me=`echo "$0" | sed -e 's,.*/,,'` + +@@ -56,8 +54,9 @@ version="\ + GNU config.guess ($timestamp) + + Originally written by Per Bothner. +-Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, +-2002, 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc. ++Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, ++2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 ++Free Software Foundation, Inc. + + This is free software; see the source for copying conditions. There is NO + warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." +@@ -144,7 +143,7 @@ UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown + case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in + *:NetBSD:*:*) + # NetBSD (nbsd) targets should (where applicable) match one or +- # more of the tupples: *-*-netbsdelf*, *-*-netbsdaout*, ++ # more of the tuples: *-*-netbsdelf*, *-*-netbsdaout*, + # *-*-netbsdecoff* and *-*-netbsd*. For targets that recently + # switched to ELF, *-*-netbsd* would select the old + # object file format. This provides both forward +@@ -170,7 +169,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in + arm*|i386|m68k|ns32k|sh3*|sparc|vax) + eval $set_cc_for_build + if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \ +- | grep __ELF__ >/dev/null ++ | grep -q __ELF__ + then + # Once all utilities can be ECOFF (netbsdecoff) or a.out (netbsdaout). + # Return netbsd for either. FIX? +@@ -180,7 +179,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in + fi + ;; + *) +- os=netbsd ++ os=netbsd + ;; + esac + # The OS release +@@ -223,7 +222,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in + UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $3}'` + ;; + *5.*) +- UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'` ++ UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'` + ;; + esac + # According to Compaq, /usr/sbin/psrinfo has been available on +@@ -269,7 +268,10 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in + # A Xn.n version is an unreleased experimental baselevel. + # 1.2 uses "1.2" for uname -r. + echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'` +- exit ;; ++ # Reset EXIT trap before exiting to avoid spurious non-zero exit code. ++ exitcode=$? ++ trap '' 0 ++ exit $exitcode ;; + Alpha\ *:Windows_NT*:*) + # How do we know it's Interix rather than the generic POSIX subsystem? + # Should we change UNAME_MACHINE based on the output of uname instead +@@ -295,7 +297,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in + echo s390-ibm-zvmoe + exit ;; + *:OS400:*:*) +- echo powerpc-ibm-os400 ++ echo powerpc-ibm-os400 + exit ;; + arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*) + echo arm-acorn-riscix${UNAME_RELEASE} +@@ -324,14 +326,33 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in + case `/usr/bin/uname -p` in + sparc) echo sparc-icl-nx7; exit ;; + esac ;; ++ s390x:SunOS:*:*) ++ echo ${UNAME_MACHINE}-ibm-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` ++ exit ;; + sun4H:SunOS:5.*:*) + echo sparc-hal-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + exit ;; + sun4*:SunOS:5.*:* | tadpole*:SunOS:5.*:*) + echo sparc-sun-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + exit ;; ++ i86pc:AuroraUX:5.*:* | i86xen:AuroraUX:5.*:*) ++ echo i386-pc-auroraux${UNAME_RELEASE} ++ exit ;; + i86pc:SunOS:5.*:* | i86xen:SunOS:5.*:*) +- echo i386-pc-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` ++ eval $set_cc_for_build ++ SUN_ARCH="i386" ++ # If there is a compiler, see if it is configured for 64-bit objects. ++ # Note that the Sun cc does not turn __LP64__ into 1 like gcc does. ++ # This test works for both compilers. ++ if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then ++ if (echo '#ifdef __amd64'; echo IS_64BIT_ARCH; echo '#endif') | \ ++ (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \ ++ grep IS_64BIT_ARCH >/dev/null ++ then ++ SUN_ARCH="x86_64" ++ fi ++ fi ++ echo ${SUN_ARCH}-pc-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + exit ;; + sun4*:SunOS:6*:*) + # According to config.sub, this is the proper way to canonicalize +@@ -375,23 +396,23 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in + # MiNT. But MiNT is downward compatible to TOS, so this should + # be no problem. + atarist[e]:*MiNT:*:* | atarist[e]:*mint:*:* | atarist[e]:*TOS:*:*) +- echo m68k-atari-mint${UNAME_RELEASE} ++ echo m68k-atari-mint${UNAME_RELEASE} + exit ;; + atari*:*MiNT:*:* | atari*:*mint:*:* | atarist[e]:*TOS:*:*) + echo m68k-atari-mint${UNAME_RELEASE} +- exit ;; ++ exit ;; + *falcon*:*MiNT:*:* | *falcon*:*mint:*:* | *falcon*:*TOS:*:*) +- echo m68k-atari-mint${UNAME_RELEASE} ++ echo m68k-atari-mint${UNAME_RELEASE} + exit ;; + milan*:*MiNT:*:* | milan*:*mint:*:* | *milan*:*TOS:*:*) +- echo m68k-milan-mint${UNAME_RELEASE} +- exit ;; ++ echo m68k-milan-mint${UNAME_RELEASE} ++ exit ;; + hades*:*MiNT:*:* | hades*:*mint:*:* | *hades*:*TOS:*:*) +- echo m68k-hades-mint${UNAME_RELEASE} +- exit ;; ++ echo m68k-hades-mint${UNAME_RELEASE} ++ exit ;; + *:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*) +- echo m68k-unknown-mint${UNAME_RELEASE} +- exit ;; ++ echo m68k-unknown-mint${UNAME_RELEASE} ++ exit ;; + m68k:machten:*:*) + echo m68k-apple-machten${UNAME_RELEASE} + exit ;; +@@ -461,8 +482,8 @@ EOF + echo m88k-motorola-sysv3 + exit ;; + AViiON:dgux:*:*) +- # DG/UX returns AViiON for all architectures +- UNAME_PROCESSOR=`/usr/bin/uname -p` ++ # DG/UX returns AViiON for all architectures ++ UNAME_PROCESSOR=`/usr/bin/uname -p` + if [ $UNAME_PROCESSOR = mc88100 ] || [ $UNAME_PROCESSOR = mc88110 ] + then + if [ ${TARGET_BINARY_INTERFACE}x = m88kdguxelfx ] || \ +@@ -475,7 +496,7 @@ EOF + else + echo i586-dg-dgux${UNAME_RELEASE} + fi +- exit ;; ++ exit ;; + M88*:DolphinOS:*:*) # DolphinOS (SVR3) + echo m88k-dolphin-sysv3 + exit ;; +@@ -532,7 +553,7 @@ EOF + echo rs6000-ibm-aix3.2 + fi + exit ;; +- *:AIX:*:[456]) ++ *:AIX:*:[4567]) + IBM_CPU_ID=`/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }'` + if /usr/sbin/lsattr -El ${IBM_CPU_ID} | grep ' POWER' >/dev/null 2>&1; then + IBM_ARCH=rs6000 +@@ -575,52 +596,52 @@ EOF + 9000/[678][0-9][0-9]) + if [ -x /usr/bin/getconf ]; then + sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null` +- sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null` +- case "${sc_cpu_version}" in +- 523) HP_ARCH="hppa1.0" ;; # CPU_PA_RISC1_0 +- 528) HP_ARCH="hppa1.1" ;; # CPU_PA_RISC1_1 +- 532) # CPU_PA_RISC2_0 +- case "${sc_kernel_bits}" in +- 32) HP_ARCH="hppa2.0n" ;; +- 64) HP_ARCH="hppa2.0w" ;; ++ sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null` ++ case "${sc_cpu_version}" in ++ 523) HP_ARCH="hppa1.0" ;; # CPU_PA_RISC1_0 ++ 528) HP_ARCH="hppa1.1" ;; # CPU_PA_RISC1_1 ++ 532) # CPU_PA_RISC2_0 ++ case "${sc_kernel_bits}" in ++ 32) HP_ARCH="hppa2.0n" ;; ++ 64) HP_ARCH="hppa2.0w" ;; + '') HP_ARCH="hppa2.0" ;; # HP-UX 10.20 +- esac ;; +- esac ++ esac ;; ++ esac + fi + if [ "${HP_ARCH}" = "" ]; then + eval $set_cc_for_build +- sed 's/^ //' << EOF >$dummy.c ++ sed 's/^ //' << EOF >$dummy.c + +- #define _HPUX_SOURCE +- #include +- #include ++ #define _HPUX_SOURCE ++ #include ++ #include + +- int main () +- { +- #if defined(_SC_KERNEL_BITS) +- long bits = sysconf(_SC_KERNEL_BITS); +- #endif +- long cpu = sysconf (_SC_CPU_VERSION); ++ int main () ++ { ++ #if defined(_SC_KERNEL_BITS) ++ long bits = sysconf(_SC_KERNEL_BITS); ++ #endif ++ long cpu = sysconf (_SC_CPU_VERSION); + +- switch (cpu) +- { +- case CPU_PA_RISC1_0: puts ("hppa1.0"); break; +- case CPU_PA_RISC1_1: puts ("hppa1.1"); break; +- case CPU_PA_RISC2_0: +- #if defined(_SC_KERNEL_BITS) +- switch (bits) +- { +- case 64: puts ("hppa2.0w"); break; +- case 32: puts ("hppa2.0n"); break; +- default: puts ("hppa2.0"); break; +- } break; +- #else /* !defined(_SC_KERNEL_BITS) */ +- puts ("hppa2.0"); break; +- #endif +- default: puts ("hppa1.0"); break; +- } +- exit (0); +- } ++ switch (cpu) ++ { ++ case CPU_PA_RISC1_0: puts ("hppa1.0"); break; ++ case CPU_PA_RISC1_1: puts ("hppa1.1"); break; ++ case CPU_PA_RISC2_0: ++ #if defined(_SC_KERNEL_BITS) ++ switch (bits) ++ { ++ case 64: puts ("hppa2.0w"); break; ++ case 32: puts ("hppa2.0n"); break; ++ default: puts ("hppa2.0"); break; ++ } break; ++ #else /* !defined(_SC_KERNEL_BITS) */ ++ puts ("hppa2.0"); break; ++ #endif ++ default: puts ("hppa1.0"); break; ++ } ++ exit (0); ++ } + EOF + (CCOPTS= $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null) && HP_ARCH=`$dummy` + test -z "$HP_ARCH" && HP_ARCH=hppa +@@ -640,7 +661,7 @@ EOF + # => hppa64-hp-hpux11.23 + + if echo __LP64__ | (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | +- grep __LP64__ >/dev/null ++ grep -q __LP64__ + then + HP_ARCH="hppa2.0w" + else +@@ -711,22 +732,22 @@ EOF + exit ;; + C1*:ConvexOS:*:* | convex:ConvexOS:C1*:*) + echo c1-convex-bsd +- exit ;; ++ exit ;; + C2*:ConvexOS:*:* | convex:ConvexOS:C2*:*) + if getsysinfo -f scalar_acc + then echo c32-convex-bsd + else echo c2-convex-bsd + fi +- exit ;; ++ exit ;; + C34*:ConvexOS:*:* | convex:ConvexOS:C34*:*) + echo c34-convex-bsd +- exit ;; ++ exit ;; + C38*:ConvexOS:*:* | convex:ConvexOS:C38*:*) + echo c38-convex-bsd +- exit ;; ++ exit ;; + C4*:ConvexOS:*:* | convex:ConvexOS:C4*:*) + echo c4-convex-bsd +- exit ;; ++ exit ;; + CRAY*Y-MP:*:*:*) + echo ymp-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' + exit ;; +@@ -750,14 +771,14 @@ EOF + exit ;; + F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*) + FUJITSU_PROC=`uname -m | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'` +- FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'` +- FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'` +- echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" +- exit ;; ++ FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'` ++ FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'` ++ echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" ++ exit ;; + 5000:UNIX_System_V:4.*:*) +- FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'` +- FUJITSU_REL=`echo ${UNAME_RELEASE} | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/ /_/'` +- echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" ++ FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'` ++ FUJITSU_REL=`echo ${UNAME_RELEASE} | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/ /_/'` ++ echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" + exit ;; + i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*) + echo ${UNAME_MACHINE}-pc-bsdi${UNAME_RELEASE} +@@ -769,13 +790,12 @@ EOF + echo ${UNAME_MACHINE}-unknown-bsdi${UNAME_RELEASE} + exit ;; + *:FreeBSD:*:*) +- case ${UNAME_MACHINE} in +- pc98) +- echo i386-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;; ++ UNAME_PROCESSOR=`/usr/bin/uname -p` ++ case ${UNAME_PROCESSOR} in + amd64) + echo x86_64-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;; + *) +- echo ${UNAME_MACHINE}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;; ++ echo ${UNAME_PROCESSOR}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;; + esac + exit ;; + i*:CYGWIN*:*) +@@ -784,19 +804,22 @@ EOF + *:MINGW*:*) + echo ${UNAME_MACHINE}-pc-mingw32 + exit ;; ++ i*:MSYS*:*) ++ echo ${UNAME_MACHINE}-pc-msys ++ exit ;; + i*:windows32*:*) +- # uname -m includes "-pc" on this system. +- echo ${UNAME_MACHINE}-mingw32 ++ # uname -m includes "-pc" on this system. ++ echo ${UNAME_MACHINE}-mingw32 + exit ;; + i*:PW*:*) + echo ${UNAME_MACHINE}-pc-pw32 + exit ;; +- *:Interix*:[3456]*) +- case ${UNAME_MACHINE} in ++ *:Interix*:*) ++ case ${UNAME_MACHINE} in + x86) + echo i586-pc-interix${UNAME_RELEASE} + exit ;; +- EM64T | authenticamd) ++ authenticamd | genuineintel | EM64T) + echo x86_64-unknown-interix${UNAME_RELEASE} + exit ;; + IA64) +@@ -806,6 +829,9 @@ EOF + [345]86:Windows_95:* | [345]86:Windows_98:* | [345]86:Windows_NT:*) + echo i${UNAME_MACHINE}-pc-mks + exit ;; ++ 8664:Windows_NT:*) ++ echo x86_64-pc-mks ++ exit ;; + i*:Windows_NT*:* | Pentium*:Windows_NT*:*) + # How do we know it's Interix rather than the generic POSIX subsystem? + # It also conflicts with pre-2.0 versions of AT&T UWIN. Should we +@@ -835,6 +861,27 @@ EOF + i*86:Minix:*:*) + echo ${UNAME_MACHINE}-pc-minix + exit ;; ++ aarch64:Linux:*:*) ++ echo ${UNAME_MACHINE}-unknown-linux-gnu ++ exit ;; ++ aarch64_be:Linux:*:*) ++ UNAME_MACHINE=aarch64_be ++ echo ${UNAME_MACHINE}-unknown-linux-gnu ++ exit ;; ++ alpha:Linux:*:*) ++ case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in ++ EV5) UNAME_MACHINE=alphaev5 ;; ++ EV56) UNAME_MACHINE=alphaev56 ;; ++ PCA56) UNAME_MACHINE=alphapca56 ;; ++ PCA57) UNAME_MACHINE=alphapca56 ;; ++ EV6) UNAME_MACHINE=alphaev6 ;; ++ EV67) UNAME_MACHINE=alphaev67 ;; ++ EV68*) UNAME_MACHINE=alphaev68 ;; ++ esac ++ objdump --private-headers /bin/sh | grep -q ld.so.1 ++ if test "$?" = 0 ; then LIBC="libc1" ; else LIBC="" ; fi ++ echo ${UNAME_MACHINE}-unknown-linux-gnu${LIBC} ++ exit ;; + arm*:Linux:*:*) + eval $set_cc_for_build + if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \ +@@ -842,20 +889,40 @@ EOF + then + echo ${UNAME_MACHINE}-unknown-linux-gnu + else +- echo ${UNAME_MACHINE}-unknown-linux-gnueabi ++ if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \ ++ | grep -q __ARM_PCS_VFP ++ then ++ echo ${UNAME_MACHINE}-unknown-linux-gnueabi ++ else ++ echo ${UNAME_MACHINE}-unknown-linux-gnueabihf ++ fi + fi + exit ;; + avr32*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + cris:Linux:*:*) +- echo cris-axis-linux-gnu ++ echo ${UNAME_MACHINE}-axis-linux-gnu + exit ;; + crisv32:Linux:*:*) +- echo crisv32-axis-linux-gnu ++ echo ${UNAME_MACHINE}-axis-linux-gnu + exit ;; + frv:Linux:*:*) +- echo frv-unknown-linux-gnu ++ echo ${UNAME_MACHINE}-unknown-linux-gnu ++ exit ;; ++ hexagon:Linux:*:*) ++ echo ${UNAME_MACHINE}-unknown-linux-gnu ++ exit ;; ++ i*86:Linux:*:*) ++ LIBC=gnu ++ eval $set_cc_for_build ++ sed 's/^ //' << EOF >$dummy.c ++ #ifdef __dietlibc__ ++ LIBC=dietlibc ++ #endif ++EOF ++ eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^LIBC'` ++ echo "${UNAME_MACHINE}-pc-linux-${LIBC}" + exit ;; + ia64:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu +@@ -866,74 +933,33 @@ EOF + m68*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; +- mips:Linux:*:*) ++ mips:Linux:*:* | mips64:Linux:*:*) + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c + #undef CPU +- #undef mips +- #undef mipsel ++ #undef ${UNAME_MACHINE} ++ #undef ${UNAME_MACHINE}el + #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL) +- CPU=mipsel ++ CPU=${UNAME_MACHINE}el + #else + #if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB) +- CPU=mips ++ CPU=${UNAME_MACHINE} + #else + CPU= + #endif + #endif + EOF +- eval "`$CC_FOR_BUILD -E $dummy.c 2>/dev/null | sed -n ' +- /^CPU/{ +- s: ::g +- p +- }'`" +- test x"${CPU}" != x && { echo "${CPU}-unknown-linux-gnu"; exit; } +- ;; +- mips64:Linux:*:*) +- eval $set_cc_for_build +- sed 's/^ //' << EOF >$dummy.c +- #undef CPU +- #undef mips64 +- #undef mips64el +- #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL) +- CPU=mips64el +- #else +- #if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB) +- CPU=mips64 +- #else +- CPU= +- #endif +- #endif +-EOF +- eval "`$CC_FOR_BUILD -E $dummy.c 2>/dev/null | sed -n ' +- /^CPU/{ +- s: ::g +- p +- }'`" ++ eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^CPU'` + test x"${CPU}" != x && { echo "${CPU}-unknown-linux-gnu"; exit; } + ;; + or32:Linux:*:*) +- echo or32-unknown-linux-gnu ++ echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; +- ppc:Linux:*:*) +- echo powerpc-unknown-linux-gnu ++ padre:Linux:*:*) ++ echo sparc-unknown-linux-gnu + exit ;; +- ppc64:Linux:*:*) +- echo powerpc64-unknown-linux-gnu +- exit ;; +- alpha:Linux:*:*) +- case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in +- EV5) UNAME_MACHINE=alphaev5 ;; +- EV56) UNAME_MACHINE=alphaev56 ;; +- PCA56) UNAME_MACHINE=alphapca56 ;; +- PCA57) UNAME_MACHINE=alphapca56 ;; +- EV6) UNAME_MACHINE=alphaev6 ;; +- EV67) UNAME_MACHINE=alphaev67 ;; +- EV68*) UNAME_MACHINE=alphaev68 ;; +- esac +- objdump --private-headers /bin/sh | grep ld.so.1 >/dev/null +- if test "$?" = 0 ; then LIBC="libc1" ; else LIBC="" ; fi +- echo ${UNAME_MACHINE}-unknown-linux-gnu${LIBC} ++ parisc64:Linux:*:* | hppa64:Linux:*:*) ++ echo hppa64-unknown-linux-gnu + exit ;; + parisc:Linux:*:* | hppa:Linux:*:*) + # Look for CPU level +@@ -943,14 +969,17 @@ EOF + *) echo hppa-unknown-linux-gnu ;; + esac + exit ;; +- parisc64:Linux:*:* | hppa64:Linux:*:*) +- echo hppa64-unknown-linux-gnu ++ ppc64:Linux:*:*) ++ echo powerpc64-unknown-linux-gnu ++ exit ;; ++ ppc:Linux:*:*) ++ echo powerpc-unknown-linux-gnu + exit ;; + s390:Linux:*:* | s390x:Linux:*:*) + echo ${UNAME_MACHINE}-ibm-linux + exit ;; + sh64*:Linux:*:*) +- echo ${UNAME_MACHINE}-unknown-linux-gnu ++ echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + sh*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu +@@ -958,78 +987,18 @@ EOF + sparc:Linux:*:* | sparc64:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; ++ tile*:Linux:*:*) ++ echo ${UNAME_MACHINE}-unknown-linux-gnu ++ exit ;; + vax:Linux:*:*) + echo ${UNAME_MACHINE}-dec-linux-gnu + exit ;; + x86_64:Linux:*:*) +- echo x86_64-unknown-linux-gnu ++ echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + xtensa*:Linux:*:*) +- echo ${UNAME_MACHINE}-unknown-linux-gnu ++ echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; +- i*86:Linux:*:*) +- # The BFD linker knows what the default object file format is, so +- # first see if it will tell us. cd to the root directory to prevent +- # problems with other programs or directories called `ld' in the path. +- # Set LC_ALL=C to ensure ld outputs messages in English. +- ld_supported_targets=`cd /; LC_ALL=C ld --help 2>&1 \ +- | sed -ne '/supported targets:/!d +- s/[ ][ ]*/ /g +- s/.*supported targets: *// +- s/ .*// +- p'` +- case "$ld_supported_targets" in +- elf32-i386) +- TENTATIVE="${UNAME_MACHINE}-pc-linux-gnu" +- ;; +- a.out-i386-linux) +- echo "${UNAME_MACHINE}-pc-linux-gnuaout" +- exit ;; +- coff-i386) +- echo "${UNAME_MACHINE}-pc-linux-gnucoff" +- exit ;; +- "") +- # Either a pre-BFD a.out linker (linux-gnuoldld) or +- # one that does not give us useful --help. +- echo "${UNAME_MACHINE}-pc-linux-gnuoldld" +- exit ;; +- esac +- # Determine whether the default compiler is a.out or elf +- eval $set_cc_for_build +- sed 's/^ //' << EOF >$dummy.c +- #include +- #ifdef __ELF__ +- # ifdef __GLIBC__ +- # if __GLIBC__ >= 2 +- LIBC=gnu +- # else +- LIBC=gnulibc1 +- # endif +- # else +- LIBC=gnulibc1 +- # endif +- #else +- #if defined(__INTEL_COMPILER) || defined(__PGI) || defined(__SUNPRO_C) || defined(__SUNPRO_CC) +- LIBC=gnu +- #else +- LIBC=gnuaout +- #endif +- #endif +- #ifdef __dietlibc__ +- LIBC=dietlibc +- #endif +-EOF +- eval "`$CC_FOR_BUILD -E $dummy.c 2>/dev/null | sed -n ' +- /^LIBC/{ +- s: ::g +- p +- }'`" +- test x"${LIBC}" != x && { +- echo "${UNAME_MACHINE}-pc-linux-${LIBC}" +- exit +- } +- test x"${TENTATIVE}" != x && { echo "${TENTATIVE}"; exit; } +- ;; + i*86:DYNIX/ptx:4*:*) + # ptx 4.0 does uname -s correctly, with DYNIX/ptx in there. + # earlier versions are messed up and put the nodename in both +@@ -1037,11 +1006,11 @@ EOF + echo i386-sequent-sysv4 + exit ;; + i*86:UNIX_SV:4.2MP:2.*) +- # Unixware is an offshoot of SVR4, but it has its own version +- # number series starting with 2... +- # I am not positive that other SVR4 systems won't match this, ++ # Unixware is an offshoot of SVR4, but it has its own version ++ # number series starting with 2... ++ # I am not positive that other SVR4 systems won't match this, + # I just have to hope. -- rms. +- # Use sysv4.2uw... so that sysv4* matches it. ++ # Use sysv4.2uw... so that sysv4* matches it. + echo ${UNAME_MACHINE}-pc-sysv4.2uw${UNAME_VERSION} + exit ;; + i*86:OS/2:*:*) +@@ -1058,7 +1027,7 @@ EOF + i*86:syllable:*:*) + echo ${UNAME_MACHINE}-pc-syllable + exit ;; +- i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.0*:*) ++ i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.[02]*:*) + echo i386-unknown-lynxos${UNAME_RELEASE} + exit ;; + i*86:*DOS:*:*) +@@ -1073,7 +1042,7 @@ EOF + fi + exit ;; + i*86:*:5:[678]*) +- # UnixWare 7.x, OpenUNIX and OpenServer 6. ++ # UnixWare 7.x, OpenUNIX and OpenServer 6. + case `/bin/uname -X | grep "^Machine"` in + *486*) UNAME_MACHINE=i486 ;; + *Pentium) UNAME_MACHINE=i586 ;; +@@ -1101,10 +1070,13 @@ EOF + exit ;; + pc:*:*:*) + # Left here for compatibility: +- # uname -m prints for DJGPP always 'pc', but it prints nothing about +- # the processor, so we play safe by assuming i386. +- echo i386-pc-msdosdjgpp +- exit ;; ++ # uname -m prints for DJGPP always 'pc', but it prints nothing about ++ # the processor, so we play safe by assuming i586. ++ # Note: whatever this is, it MUST be the same as what config.sub ++ # prints for the "djgpp" host, or else GDB configury will decide that ++ # this is a cross-build. ++ echo i586-pc-msdosdjgpp ++ exit ;; + Intel:Mach:3*:*) + echo i386-pc-mach3 + exit ;; +@@ -1139,8 +1111,18 @@ EOF + /bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \ + && { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;; + 3[34]??:*:4.0:* | 3[34]??,*:*:4.0:*) +- /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ +- && { echo i486-ncr-sysv4; exit; } ;; ++ /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ ++ && { echo i486-ncr-sysv4; exit; } ;; ++ NCR*:*:4.2:* | MPRAS*:*:4.2:*) ++ OS_REL='.3' ++ test -r /etc/.relid \ ++ && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid` ++ /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ ++ && { echo i486-ncr-sysv4.3${OS_REL}; exit; } ++ /bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \ ++ && { echo i586-ncr-sysv4.3${OS_REL}; exit; } ++ /bin/uname -p 2>/dev/null | /bin/grep pteron >/dev/null \ ++ && { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;; + m68*:LynxOS:2.*:* | m68*:LynxOS:3.0*:*) + echo m68k-unknown-lynxos${UNAME_RELEASE} + exit ;; +@@ -1153,7 +1135,7 @@ EOF + rs6000:LynxOS:2.*:*) + echo rs6000-unknown-lynxos${UNAME_RELEASE} + exit ;; +- PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.0*:*) ++ PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.[02]*:*) + echo powerpc-unknown-lynxos${UNAME_RELEASE} + exit ;; + SM[BE]S:UNIX_SV:*:*) +@@ -1173,10 +1155,10 @@ EOF + echo ns32k-sni-sysv + fi + exit ;; +- PENTIUM:*:4.0*:*) # Unisys `ClearPath HMP IX 4000' SVR4/MP effort +- # says +- echo i586-unisys-sysv4 +- exit ;; ++ PENTIUM:*:4.0*:*) # Unisys `ClearPath HMP IX 4000' SVR4/MP effort ++ # says ++ echo i586-unisys-sysv4 ++ exit ;; + *:UNIX_System_V:4*:FTX*) + # From Gerald Hewes . + # How about differentiating between stratus architectures? -djm +@@ -1202,11 +1184,11 @@ EOF + exit ;; + R[34]000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R*000:UNIX_SV:*:*) + if [ -d /usr/nec ]; then +- echo mips-nec-sysv${UNAME_RELEASE} ++ echo mips-nec-sysv${UNAME_RELEASE} + else +- echo mips-unknown-sysv${UNAME_RELEASE} ++ echo mips-unknown-sysv${UNAME_RELEASE} + fi +- exit ;; ++ exit ;; + BeBox:BeOS:*:*) # BeOS running on hardware made by Be, PPC only. + echo powerpc-be-beos + exit ;; +@@ -1216,6 +1198,9 @@ EOF + BePC:BeOS:*:*) # BeOS running on Intel PC compatible. + echo i586-pc-beos + exit ;; ++ BePC:Haiku:*:*) # Haiku running on Intel PC compatible. ++ echo i586-pc-haiku ++ exit ;; + SX-4:SUPER-UX:*:*) + echo sx4-nec-superux${UNAME_RELEASE} + exit ;; +@@ -1243,6 +1228,16 @@ EOF + *:Darwin:*:*) + UNAME_PROCESSOR=`uname -p` || UNAME_PROCESSOR=unknown + case $UNAME_PROCESSOR in ++ i386) ++ eval $set_cc_for_build ++ if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then ++ if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \ ++ (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \ ++ grep IS_64BIT_ARCH >/dev/null ++ then ++ UNAME_PROCESSOR="x86_64" ++ fi ++ fi ;; + unknown) UNAME_PROCESSOR=powerpc ;; + esac + echo ${UNAME_PROCESSOR}-apple-darwin${UNAME_RELEASE} +@@ -1258,7 +1253,10 @@ EOF + *:QNX:*:4*) + echo i386-pc-qnx + exit ;; +- NSE-?:NONSTOP_KERNEL:*:*) ++ NEO-?:NONSTOP_KERNEL:*:*) ++ echo neo-tandem-nsk${UNAME_RELEASE} ++ exit ;; ++ NSE-*:NONSTOP_KERNEL:*:*) + echo nse-tandem-nsk${UNAME_RELEASE} + exit ;; + NSR-?:NONSTOP_KERNEL:*:*) +@@ -1303,13 +1301,13 @@ EOF + echo pdp10-unknown-its + exit ;; + SEI:*:*:SEIUX) +- echo mips-sei-seiux${UNAME_RELEASE} ++ echo mips-sei-seiux${UNAME_RELEASE} + exit ;; + *:DragonFly:*:*) + echo ${UNAME_MACHINE}-unknown-dragonfly`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` + exit ;; + *:*VMS:*:*) +- UNAME_MACHINE=`(uname -p) 2>/dev/null` ++ UNAME_MACHINE=`(uname -p) 2>/dev/null` + case "${UNAME_MACHINE}" in + A*) echo alpha-dec-vms ; exit ;; + I*) echo ia64-dec-vms ; exit ;; +@@ -1324,6 +1322,12 @@ EOF + i*86:rdos:*:*) + echo ${UNAME_MACHINE}-pc-rdos + exit ;; ++ i*86:AROS:*:*) ++ echo ${UNAME_MACHINE}-pc-aros ++ exit ;; ++ x86_64:VMkernel:*:*) ++ echo ${UNAME_MACHINE}-unknown-esx ++ exit ;; + esac + + #echo '(No uname command or uname output not recognized.)' 1>&2 +@@ -1346,11 +1350,11 @@ main () + #include + printf ("m68k-sony-newsos%s\n", + #ifdef NEWSOS4 +- "4" ++ "4" + #else +- "" ++ "" + #endif +- ); exit (0); ++ ); exit (0); + #endif + #endif + +--- fastjar-0.97/config.sub ++++ fastjar-0.97/config.sub +@@ -1,10 +1,10 @@ + #! /bin/sh + # Configuration validation subroutine script. + # Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, +-# 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 +-# Free Software Foundation, Inc. ++# 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, ++# 2011, 2012 Free Software Foundation, Inc. + +-timestamp='2008-01-16' ++timestamp='2012-04-18' + + # This file is (in principle) common to ALL GNU software. + # The presence of a machine in this file suggests that SOME GNU software +@@ -21,9 +21,7 @@ timestamp='2008-01-16' + # GNU General Public License for more details. + # + # You should have received a copy of the GNU General Public License +-# along with this program; if not, write to the Free Software +-# Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA +-# 02110-1301, USA. ++# along with this program; if not, see . + # + # As a special exception to the GNU General Public License, if you + # distribute this file as part of a program that contains a +@@ -32,13 +30,16 @@ timestamp='2008-01-16' + + + # Please send patches to . Submit a context +-# diff and a properly formatted ChangeLog entry. ++# diff and a properly formatted GNU ChangeLog entry. + # + # Configuration subroutine to validate and canonicalize a configuration type. + # Supply the specified configuration type as an argument. + # If it is invalid, we print an error message on stderr and exit with code 1. + # Otherwise, we print the canonical config type on stdout and succeed. + ++# You can get the latest version of this script from: ++# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub;hb=HEAD ++ + # This file is supposed to be the same for all GNU packages + # and recognize all the CPU types, system types and aliases + # that are meaningful with *any* GNU software. +@@ -72,8 +73,9 @@ Report bugs and patches to ." + version="\ + GNU config.sub ($timestamp) + +-Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, +-2002, 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc. ++Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, ++2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 ++Free Software Foundation, Inc. + + This is free software; see the source for copying conditions. There is NO + warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." +@@ -120,12 +122,18 @@ esac + # Here we must recognize all the valid KERNEL-OS combinations. + maybe_os=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'` + case $maybe_os in +- nto-qnx* | linux-gnu* | linux-dietlibc | linux-newlib* | linux-uclibc* | \ +- uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | knetbsd*-gnu* | netbsd*-gnu* | \ ++ nto-qnx* | linux-gnu* | linux-android* | linux-dietlibc | linux-newlib* | \ ++ linux-uclibc* | uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | \ ++ knetbsd*-gnu* | netbsd*-gnu* | \ ++ kopensolaris*-gnu* | \ + storm-chaos* | os2-emx* | rtmk-nova*) + os=-$maybe_os + basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'` + ;; ++ android-linux) ++ os=-linux-android ++ basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`-unknown ++ ;; + *) + basic_machine=`echo $1 | sed 's/-[^-]*$//'` + if [ $basic_machine != $1 ] +@@ -148,10 +156,13 @@ case $os in + -convergent* | -ncr* | -news | -32* | -3600* | -3100* | -hitachi* |\ + -c[123]* | -convex* | -sun | -crds | -omron* | -dg | -ultra | -tti* | \ + -harris | -dolphin | -highlevel | -gould | -cbm | -ns | -masscomp | \ +- -apple | -axis | -knuth | -cray) ++ -apple | -axis | -knuth | -cray | -microblaze) + os= + basic_machine=$1 + ;; ++ -bluegene*) ++ os=-cnk ++ ;; + -sim | -cisco | -oki | -wec | -winbond) + os= + basic_machine=$1 +@@ -166,10 +177,10 @@ case $os in + os=-chorusos + basic_machine=$1 + ;; +- -chorusrdb) +- os=-chorusrdb ++ -chorusrdb) ++ os=-chorusrdb + basic_machine=$1 +- ;; ++ ;; + -hiux*) + os=-hiuxwe2 + ;; +@@ -214,6 +225,12 @@ case $os in + -isc*) + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; ++ -lynx*178) ++ os=-lynxos178 ++ ;; ++ -lynx*5) ++ os=-lynxos5 ++ ;; + -lynx*) + os=-lynxos + ;; +@@ -238,24 +255,32 @@ case $basic_machine in + # Some are omitted here because they have special meanings below. + 1750a | 580 \ + | a29k \ ++ | aarch64 | aarch64_be \ + | alpha | alphaev[4-8] | alphaev56 | alphaev6[78] | alphapca5[67] \ + | alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \ + | am33_2.0 \ + | arc | arm | arm[bl]e | arme[lb] | armv[2345] | armv[345][lb] | avr | avr32 \ ++ | be32 | be64 \ + | bfin \ + | c4x | clipper \ + | d10v | d30v | dlx | dsp16xx \ ++ | epiphany \ + | fido | fr30 | frv \ + | h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \ ++ | hexagon \ + | i370 | i860 | i960 | ia64 \ + | ip2k | iq2000 \ ++ | le32 | le64 \ ++ | lm32 \ + | m32c | m32r | m32rle | m68000 | m68k | m88k \ +- | maxq | mb | microblaze | mcore | mep \ ++ | maxq | mb | microblaze | mcore | mep | metag \ + | mips | mipsbe | mipseb | mipsel | mipsle \ + | mips16 \ + | mips64 | mips64el \ +- | mips64vr | mips64vrel \ ++ | mips64octeon | mips64octeonel \ + | mips64orion | mips64orionel \ ++ | mips64r5900 | mips64r5900el \ ++ | mips64vr | mips64vrel \ + | mips64vr4100 | mips64vr4100el \ + | mips64vr4300 | mips64vr4300el \ + | mips64vr5000 | mips64vr5000el \ +@@ -268,29 +293,42 @@ case $basic_machine in + | mipsisa64sr71k | mipsisa64sr71kel \ + | mipstx39 | mipstx39el \ + | mn10200 | mn10300 \ ++ | moxie \ + | mt \ + | msp430 \ ++ | nds32 | nds32le | nds32be \ + | nios | nios2 \ + | ns16k | ns32k \ ++ | open8 \ + | or32 \ + | pdp10 | pdp11 | pj | pjl \ +- | powerpc | powerpc64 | powerpc64le | powerpcle | ppcbe \ ++ | powerpc | powerpc64 | powerpc64le | powerpcle \ + | pyramid \ ++ | rl78 | rx \ + | score \ +- | sh | sh[1234] | sh[24]a | sh[23]e | sh[34]eb | sheb | shbe | shle | sh[1234]le | sh3ele \ ++ | sh | sh[1234] | sh[24]a | sh[24]aeb | sh[23]e | sh[34]eb | sheb | shbe | shle | sh[1234]le | sh3ele \ + | sh64 | sh64le \ + | sparc | sparc64 | sparc64b | sparc64v | sparc86x | sparclet | sparclite \ + | sparcv8 | sparcv9 | sparcv9b | sparcv9v \ +- | spu | strongarm \ +- | tahoe | thumb | tic4x | tic80 | tron \ +- | v850 | v850e \ ++ | spu \ ++ | tahoe | tic4x | tic54x | tic55x | tic6x | tic80 | tron \ ++ | ubicom32 \ ++ | v850 | v850e | v850e1 | v850e2 | v850es | v850e2v3 \ + | we32k \ +- | x86 | xc16x | xscale | xscalee[bl] | xstormy16 | xtensa \ +- | z8k) ++ | x86 | xc16x | xstormy16 | xtensa \ ++ | z8k | z80) + basic_machine=$basic_machine-unknown + ;; +- m6811 | m68hc11 | m6812 | m68hc12) +- # Motorola 68HC11/12. ++ c54x) ++ basic_machine=tic54x-unknown ++ ;; ++ c55x) ++ basic_machine=tic55x-unknown ++ ;; ++ c6x) ++ basic_machine=tic6x-unknown ++ ;; ++ m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x | picochip) + basic_machine=$basic_machine-unknown + os=-none + ;; +@@ -300,6 +338,21 @@ case $basic_machine in + basic_machine=mt-unknown + ;; + ++ strongarm | thumb | xscale) ++ basic_machine=arm-unknown ++ ;; ++ xgate) ++ basic_machine=$basic_machine-unknown ++ os=-none ++ ;; ++ xscaleeb) ++ basic_machine=armeb-unknown ++ ;; ++ ++ xscaleel) ++ basic_machine=armel-unknown ++ ;; ++ + # We use `pc' rather than `unknown' + # because (1) that's what they normally are, and + # (2) the word "unknown" tends to confuse beginning users. +@@ -314,29 +367,36 @@ case $basic_machine in + # Recognize the basic CPU types with company name. + 580-* \ + | a29k-* \ ++ | aarch64-* | aarch64_be-* \ + | alpha-* | alphaev[4-8]-* | alphaev56-* | alphaev6[78]-* \ + | alpha64-* | alpha64ev[4-8]-* | alpha64ev56-* | alpha64ev6[78]-* \ + | alphapca5[67]-* | alpha64pca5[67]-* | arc-* \ + | arm-* | armbe-* | armle-* | armeb-* | armv*-* \ + | avr-* | avr32-* \ ++ | be32-* | be64-* \ + | bfin-* | bs2000-* \ +- | c[123]* | c30-* | [cjt]90-* | c4x-* | c54x-* | c55x-* | c6x-* \ ++ | c[123]* | c30-* | [cjt]90-* | c4x-* \ + | clipper-* | craynv-* | cydra-* \ + | d10v-* | d30v-* | dlx-* \ + | elxsi-* \ + | f30[01]-* | f700-* | fido-* | fr30-* | frv-* | fx80-* \ + | h8300-* | h8500-* \ + | hppa-* | hppa1.[01]-* | hppa2.0-* | hppa2.0[nw]-* | hppa64-* \ ++ | hexagon-* \ + | i*86-* | i860-* | i960-* | ia64-* \ + | ip2k-* | iq2000-* \ ++ | le32-* | le64-* \ ++ | lm32-* \ + | m32c-* | m32r-* | m32rle-* \ + | m68000-* | m680[012346]0-* | m68360-* | m683?2-* | m68k-* \ +- | m88110-* | m88k-* | maxq-* | mcore-* \ ++ | m88110-* | m88k-* | maxq-* | mcore-* | metag-* | microblaze-* \ + | mips-* | mipsbe-* | mipseb-* | mipsel-* | mipsle-* \ + | mips16-* \ + | mips64-* | mips64el-* \ +- | mips64vr-* | mips64vrel-* \ ++ | mips64octeon-* | mips64octeonel-* \ + | mips64orion-* | mips64orionel-* \ ++ | mips64r5900-* | mips64r5900el-* \ ++ | mips64vr-* | mips64vrel-* \ + | mips64vr4100-* | mips64vr4100el-* \ + | mips64vr4300-* | mips64vr4300el-* \ + | mips64vr5000-* | mips64vr5000el-* \ +@@ -351,27 +411,32 @@ case $basic_machine in + | mmix-* \ + | mt-* \ + | msp430-* \ ++ | nds32-* | nds32le-* | nds32be-* \ + | nios-* | nios2-* \ + | none-* | np1-* | ns16k-* | ns32k-* \ ++ | open8-* \ + | orion-* \ + | pdp10-* | pdp11-* | pj-* | pjl-* | pn-* | power-* \ +- | powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* | ppcbe-* \ ++ | powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* \ + | pyramid-* \ +- | romp-* | rs6000-* \ +- | sh-* | sh[1234]-* | sh[24]a-* | sh[23]e-* | sh[34]eb-* | sheb-* | shbe-* \ ++ | rl78-* | romp-* | rs6000-* | rx-* \ ++ | sh-* | sh[1234]-* | sh[24]a-* | sh[24]aeb-* | sh[23]e-* | sh[34]eb-* | sheb-* | shbe-* \ + | shle-* | sh[1234]le-* | sh3ele-* | sh64-* | sh64le-* \ + | sparc-* | sparc64-* | sparc64b-* | sparc64v-* | sparc86x-* | sparclet-* \ + | sparclite-* \ +- | sparcv8-* | sparcv9-* | sparcv9b-* | sparcv9v-* | strongarm-* | sv1-* | sx?-* \ +- | tahoe-* | thumb-* \ ++ | sparcv8-* | sparcv9-* | sparcv9b-* | sparcv9v-* | sv1-* | sx?-* \ ++ | tahoe-* \ + | tic30-* | tic4x-* | tic54x-* | tic55x-* | tic6x-* | tic80-* \ ++ | tile*-* \ + | tron-* \ +- | v850-* | v850e-* | vax-* \ ++ | ubicom32-* \ ++ | v850-* | v850e-* | v850e1-* | v850es-* | v850e2-* | v850e2v3-* \ ++ | vax-* \ + | we32k-* \ +- | x86-* | x86_64-* | xc16x-* | xps100-* | xscale-* | xscalee[bl]-* \ ++ | x86-* | x86_64-* | xc16x-* | xps100-* \ + | xstormy16-* | xtensa*-* \ + | ymp-* \ +- | z8k-*) ++ | z8k-* | z80-*) + ;; + # Recognize the basic CPU types without company name, with glob match. + xtensa*) +@@ -393,7 +458,7 @@ case $basic_machine in + basic_machine=a29k-amd + os=-udi + ;; +- abacus) ++ abacus) + basic_machine=abacus-unknown + ;; + adobe68k) +@@ -439,6 +504,10 @@ case $basic_machine in + basic_machine=m68k-apollo + os=-bsd + ;; ++ aros) ++ basic_machine=i386-pc ++ os=-aros ++ ;; + aux) + basic_machine=m68k-apple + os=-aux +@@ -455,10 +524,27 @@ case $basic_machine in + basic_machine=bfin-`echo $basic_machine | sed 's/^[^-]*-//'` + os=-linux + ;; ++ bluegene*) ++ basic_machine=powerpc-ibm ++ os=-cnk ++ ;; ++ c54x-*) ++ basic_machine=tic54x-`echo $basic_machine | sed 's/^[^-]*-//'` ++ ;; ++ c55x-*) ++ basic_machine=tic55x-`echo $basic_machine | sed 's/^[^-]*-//'` ++ ;; ++ c6x-*) ++ basic_machine=tic6x-`echo $basic_machine | sed 's/^[^-]*-//'` ++ ;; + c90) + basic_machine=c90-cray + os=-unicos + ;; ++ cegcc) ++ basic_machine=arm-unknown ++ os=-cegcc ++ ;; + convex-c1) + basic_machine=c1-convex + os=-bsd +@@ -487,7 +573,7 @@ case $basic_machine in + basic_machine=craynv-cray + os=-unicosmp + ;; +- cr16) ++ cr16 | cr16-*) + basic_machine=cr16-unknown + os=-elf + ;; +@@ -526,6 +612,10 @@ case $basic_machine in + basic_machine=m88k-motorola + os=-sysv3 + ;; ++ dicos) ++ basic_machine=i686-pc ++ os=-dicos ++ ;; + djgpp) + basic_machine=i586-pc + os=-msdosdjgpp +@@ -641,7 +731,6 @@ case $basic_machine in + i370-ibm* | ibm*) + basic_machine=i370-ibm + ;; +-# I'm not sure what "Sysv32" means. Should this be sysv3.2? + i*86v32) + basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` + os=-sysv32 +@@ -699,6 +788,9 @@ case $basic_machine in + basic_machine=ns32k-utek + os=-sysv + ;; ++ microblaze) ++ basic_machine=microblaze-xilinx ++ ;; + mingw32) + basic_machine=i386-pc + os=-mingw32 +@@ -735,10 +827,18 @@ case $basic_machine in + ms1-*) + basic_machine=`echo $basic_machine | sed -e 's/ms1-/mt-/'` + ;; ++ msys) ++ basic_machine=i386-pc ++ os=-msys ++ ;; + mvs) + basic_machine=i370-ibm + os=-mvs + ;; ++ nacl) ++ basic_machine=le32-unknown ++ os=-nacl ++ ;; + ncr3000) + basic_machine=i486-ncr + os=-sysv4 +@@ -803,6 +903,12 @@ case $basic_machine in + np1) + basic_machine=np1-gould + ;; ++ neo-tandem) ++ basic_machine=neo-tandem ++ ;; ++ nse-tandem) ++ basic_machine=nse-tandem ++ ;; + nsr-tandem) + basic_machine=nsr-tandem + ;; +@@ -885,9 +991,10 @@ case $basic_machine in + ;; + power) basic_machine=power-ibm + ;; +- ppc) basic_machine=powerpc-unknown ++ ppc | ppcbe) basic_machine=powerpc-unknown + ;; +- ppc-*) basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'` ++ ppc-* | ppcbe-*) ++ basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + ppcle | powerpclittle | ppc-le | powerpc-little) + basic_machine=powerpcle-unknown +@@ -981,6 +1088,9 @@ case $basic_machine in + basic_machine=i860-stratus + os=-sysv4 + ;; ++ strongarm-* | thumb-*) ++ basic_machine=arm-`echo $basic_machine | sed 's/^[^-]*-//'` ++ ;; + sun2) + basic_machine=m68000-sun + ;; +@@ -1037,20 +1147,8 @@ case $basic_machine in + basic_machine=t90-cray + os=-unicos + ;; +- tic54x | c54x*) +- basic_machine=tic54x-unknown +- os=-coff +- ;; +- tic55x | c55x*) +- basic_machine=tic55x-unknown +- os=-coff +- ;; +- tic6x | c6x*) +- basic_machine=tic6x-unknown +- os=-coff +- ;; + tile*) +- basic_machine=tile-unknown ++ basic_machine=$basic_machine-unknown + os=-linux-gnu + ;; + tx39) +@@ -1120,6 +1218,9 @@ case $basic_machine in + xps | xps100) + basic_machine=xps100-honeywell + ;; ++ xscale-* | xscalee[bl]-*) ++ basic_machine=`echo $basic_machine | sed 's/^xscale/arm/'` ++ ;; + ymp) + basic_machine=ymp-cray + os=-unicos +@@ -1128,6 +1229,10 @@ case $basic_machine in + basic_machine=z8k-unknown + os=-sim + ;; ++ z80-*-coff) ++ basic_machine=z80-unknown ++ os=-sim ++ ;; + none) + basic_machine=none-none + os=-none +@@ -1166,7 +1271,7 @@ case $basic_machine in + we32k) + basic_machine=we32k-att + ;; +- sh[1234] | sh[24]a | sh[34]eb | sh[1234]le | sh[23]ele) ++ sh[1234] | sh[24]a | sh[24]aeb | sh[34]eb | sh[1234]le | sh[23]ele) + basic_machine=sh-unknown + ;; + sparc | sparcv8 | sparcv9 | sparcv9b | sparcv9v) +@@ -1213,9 +1318,12 @@ esac + if [ x"$os" != x"" ] + then + case $os in +- # First match some system type aliases +- # that might get confused with valid system types. ++ # First match some system type aliases ++ # that might get confused with valid system types. + # -solaris* is a basic system type, with this one exception. ++ -auroraux) ++ os=-auroraux ++ ;; + -solaris1 | -solaris1.*) + os=`echo $os | sed -e 's|solaris1|sunos4|'` + ;; +@@ -1236,10 +1344,11 @@ case $os in + # Each alternative MUST END IN A *, to match a version number. + # -sysv* is not here because it comes later, after sysvr4. + -gnu* | -bsd* | -mach* | -minix* | -genix* | -ultrix* | -irix* \ +- | -*vms* | -sco* | -esix* | -isc* | -aix* | -sunos | -sunos[34]*\ +- | -hpux* | -unos* | -osf* | -luna* | -dgux* | -solaris* | -sym* \ ++ | -*vms* | -sco* | -esix* | -isc* | -aix* | -cnk* | -sunos | -sunos[34]*\ ++ | -hpux* | -unos* | -osf* | -luna* | -dgux* | -auroraux* | -solaris* \ ++ | -sym* | -kopensolaris* \ + | -amigaos* | -amigados* | -msdos* | -newsos* | -unicos* | -aof* \ +- | -aos* \ ++ | -aos* | -aros* \ + | -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \ + | -clix* | -riscos* | -uniplus* | -iris* | -rtu* | -xenix* \ + | -hiux* | -386bsd* | -knetbsd* | -mirbsd* | -netbsd* \ +@@ -1248,9 +1357,10 @@ case $os in + | -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \ + | -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \ + | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \ +- | -chorusos* | -chorusrdb* \ +- | -cygwin* | -pe* | -psos* | -moss* | -proelf* | -rtems* \ +- | -mingw32* | -linux-gnu* | -linux-newlib* | -linux-uclibc* \ ++ | -chorusos* | -chorusrdb* | -cegcc* \ ++ | -cygwin* | -msys* | -pe* | -psos* | -moss* | -proelf* | -rtems* \ ++ | -mingw32* | -linux-gnu* | -linux-android* \ ++ | -linux-newlib* | -linux-uclibc* \ + | -uxpv* | -beos* | -mpeix* | -udk* \ + | -interix* | -uwin* | -mks* | -rhapsody* | -darwin* | -opened* \ + | -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \ +@@ -1258,7 +1368,7 @@ case $os in + | -os2* | -vos* | -palmos* | -uclinux* | -nucleus* \ + | -morphos* | -superux* | -rtmk* | -rtmk-nova* | -windiss* \ + | -powermax* | -dnix* | -nx6 | -nx7 | -sei* | -dragonfly* \ +- | -skyos* | -haiku* | -rdos* | -toppers* | -drops*) ++ | -skyos* | -haiku* | -rdos* | -toppers* | -drops* | -es*) + # Remember, each alternative MUST END IN *, to match a version number. + ;; + -qnx*) +@@ -1297,7 +1407,7 @@ case $os in + -opened*) + os=-openedition + ;; +- -os400*) ++ -os400*) + os=-os400 + ;; + -wince*) +@@ -1346,7 +1456,7 @@ case $os in + -sinix*) + os=-sysv4 + ;; +- -tpf*) ++ -tpf*) + os=-tpf + ;; + -triton*) +@@ -1388,6 +1498,11 @@ case $os in + -zvmoe) + os=-zvmoe + ;; ++ -dicos*) ++ os=-dicos ++ ;; ++ -nacl*) ++ ;; + -none) + ;; + *) +@@ -1410,10 +1525,10 @@ else + # system, and we'll never get to this point. + + case $basic_machine in +- score-*) ++ score-*) + os=-elf + ;; +- spu-*) ++ spu-*) + os=-elf + ;; + *-acorn) +@@ -1425,8 +1540,20 @@ case $basic_machine in + arm*-semi) + os=-aout + ;; +- c4x-* | tic4x-*) +- os=-coff ++ c4x-* | tic4x-*) ++ os=-coff ++ ;; ++ hexagon-*) ++ os=-elf ++ ;; ++ tic54x-*) ++ os=-coff ++ ;; ++ tic55x-*) ++ os=-coff ++ ;; ++ tic6x-*) ++ os=-coff + ;; + # This must come before the *-dec entry. + pdp10-*) +@@ -1446,14 +1573,11 @@ case $basic_machine in + ;; + m68000-sun) + os=-sunos3 +- # This also exists in the configure program, but was not the +- # default. +- # os=-sunos4 + ;; + m68*-cisco) + os=-aout + ;; +- mep-*) ++ mep-*) + os=-elf + ;; + mips*-cisco) +@@ -1480,7 +1604,7 @@ case $basic_machine in + *-ibm) + os=-aix + ;; +- *-knuth) ++ *-knuth) + os=-mmixware + ;; + *-wec) +@@ -1585,7 +1709,7 @@ case $basic_machine in + -sunos*) + vendor=sun + ;; +- -aix*) ++ -cnk*|-aix*) + vendor=ibm + ;; + -beos*) diff --git a/SOURCES/fastjar-0.97-filename0.patch b/SOURCES/fastjar-0.97-filename0.patch new file mode 100644 index 0000000..34a02a7 --- /dev/null +++ b/SOURCES/fastjar-0.97-filename0.patch @@ -0,0 +1,14 @@ +2010-03-01 Richard Guenther + + * jartool.c (read_entries): Properly zero-terminate filename. + +--- fastjar-0.97/jartool.c 6 Sep 2009 22:16:00 -0000 1.59 ++++ fastjar-0.97/jartool.c 1 Mar 2010 15:38:43 -0000 1.60 +@@ -790,6 +790,7 @@ int read_entries (int fd) + progname, jarfile); + return 1; + } ++ ze->filename[len] = '\0'; + len = UNPACK_UB4(header, CEN_EFLEN); + len += UNPACK_UB4(header, CEN_COMLEN); + if (lseek (fd, len, SEEK_CUR) == -1) diff --git a/SOURCES/fastjar-0.97-len1.patch b/SOURCES/fastjar-0.97-len1.patch new file mode 100644 index 0000000..722351d --- /dev/null +++ b/SOURCES/fastjar-0.97-len1.patch @@ -0,0 +1,16 @@ +2009-12-21 Chris Ball + + * jartool.c (add_file_to_jar): Test write return value against -1 + instead of 1. + +--- fastjar-0.97/jartool.c.jj 2008-10-15 12:35:37.000000000 -0400 ++++ fastjar-0.97/jartool.c 2009-12-22 06:48:09.309530000 -0500 +@@ -1257,7 +1257,7 @@ int add_file_to_jar(int jfd, int ffd, co + exit_on_error("write"); + + /* write the file name to the zip file */ +- if (1 == write(jfd, fname, file_name_length)) ++ if (-1 == write(jfd, fname, file_name_length)) + exit_on_error("write"); + + if(verbose){ diff --git a/SOURCES/fastjar-0.97-ppc64le-config.patch b/SOURCES/fastjar-0.97-ppc64le-config.patch new file mode 100644 index 0000000..5671561 --- /dev/null +++ b/SOURCES/fastjar-0.97-ppc64le-config.patch @@ -0,0 +1,708 @@ +--- fastjar-0.97/m4/libtool.m4.jj 2008-10-16 10:14:54.000000000 +0200 ++++ fastjar-0.97/m4/libtool.m4 2014-04-10 20:41:26.563064023 +0200 +@@ -1263,7 +1263,7 @@ ia64-*-hpux*) + rm -rf conftest* + ;; + +-x86_64-*kfreebsd*-gnu|x86_64-*linux*|ppc*-*linux*|powerpc*-*linux*| \ ++x86_64-*kfreebsd*-gnu|x86_64-*linux*|powerpc*-*linux*| \ + s390*-*linux*|s390*-*tpf*|sparc*-*linux*) + # Find out which ABI we are using. + echo 'int i;' > conftest.$ac_ext +@@ -1277,7 +1277,10 @@ s390*-*linux*|s390*-*tpf*|sparc*-*linux* + x86_64-*linux*) + LD="${LD-ld} -m elf_i386" + ;; +- ppc64-*linux*|powerpc64-*linux*) ++ powerpc64le-*linux*) ++ LD="${LD-ld} -m elf32lppclinux" ++ ;; ++ powerpc64-*linux*) + LD="${LD-ld} -m elf32ppclinux" + ;; + s390x-*linux*) +@@ -1296,7 +1299,10 @@ s390*-*linux*|s390*-*tpf*|sparc*-*linux* + x86_64-*linux*) + LD="${LD-ld} -m elf_x86_64" + ;; +- ppc*-*linux*|powerpc*-*linux*) ++ powerpcle-*linux*) ++ LD="${LD-ld} -m elf64lppc" ++ ;; ++ powerpc-*linux*) + LD="${LD-ld} -m elf64ppc" + ;; + s390*-*linux*|s390*-*tpf*) +--- fastjar-0.97/config.sub.jj 2014-04-10 20:40:53.000000000 +0200 ++++ fastjar-0.97/config.sub 2014-04-10 10:38:39.000000000 +0200 +@@ -1,24 +1,18 @@ + #! /bin/sh + # Configuration validation subroutine script. +-# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, +-# 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, +-# 2011, 2012 Free Software Foundation, Inc. +- +-timestamp='2012-04-18' +- +-# This file is (in principle) common to ALL GNU software. +-# The presence of a machine in this file suggests that SOME GNU software +-# can handle that machine. It does not imply ALL GNU software can. +-# +-# This file is free software; you can redistribute it and/or modify +-# it under the terms of the GNU General Public License as published by +-# the Free Software Foundation; either version 2 of the License, or ++# Copyright 1992-2013 Free Software Foundation, Inc. ++ ++timestamp='2013-10-01' ++ ++# This file is free software; you can redistribute it and/or modify it ++# under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 3 of the License, or + # (at your option) any later version. + # +-# This program is distributed in the hope that it will be useful, +-# but WITHOUT ANY WARRANTY; without even the implied warranty of +-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +-# GNU General Public License for more details. ++# This program is distributed in the hope that it will be useful, but ++# WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++# General Public License for more details. + # + # You should have received a copy of the GNU General Public License + # along with this program; if not, see . +@@ -26,11 +20,12 @@ timestamp='2012-04-18' + # As a special exception to the GNU General Public License, if you + # distribute this file as part of a program that contains a + # configuration script generated by Autoconf, you may include it under +-# the same distribution terms that you use for the rest of that program. ++# the same distribution terms that you use for the rest of that ++# program. This Exception is an additional permission under section 7 ++# of the GNU General Public License, version 3 ("GPLv3"). + + +-# Please send patches to . Submit a context +-# diff and a properly formatted GNU ChangeLog entry. ++# Please send patches with a ChangeLog entry to config-patches@gnu.org. + # + # Configuration subroutine to validate and canonicalize a configuration type. + # Supply the specified configuration type as an argument. +@@ -73,9 +68,7 @@ Report bugs and patches to and include a ChangeLog +-# entry. ++# the same distribution terms that you use for the rest of that ++# program. This Exception is an additional permission under section 7 ++# of the GNU General Public License, version 3 ("GPLv3"). + # +-# This script attempts to guess a canonical system name similar to +-# config.sub. If it succeeds, it prints the system name on stdout, and +-# exits with 0. Otherwise, it exits with 1. ++# Originally written by Per Bothner. + # + # You can get the latest version of this script from: + # http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess;hb=HEAD ++# ++# Please send patches with a ChangeLog entry to config-patches@gnu.org. ++ + + me=`echo "$0" | sed -e 's,.*/,,'` + +@@ -54,9 +50,7 @@ version="\ + GNU config.guess ($timestamp) + + Originally written by Per Bothner. +-Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, +-2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 +-Free Software Foundation, Inc. ++Copyright 1992-2013 Free Software Foundation, Inc. + + This is free software; see the source for copying conditions. There is NO + warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." +@@ -138,6 +132,27 @@ UNAME_RELEASE=`(uname -r) 2>/dev/null` | + UNAME_SYSTEM=`(uname -s) 2>/dev/null` || UNAME_SYSTEM=unknown + UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown + ++case "${UNAME_SYSTEM}" in ++Linux|GNU|GNU/*) ++ # If the system lacks a compiler, then just pick glibc. ++ # We could probably try harder. ++ LIBC=gnu ++ ++ eval $set_cc_for_build ++ cat <<-EOF > $dummy.c ++ #include ++ #if defined(__UCLIBC__) ++ LIBC=uclibc ++ #elif defined(__dietlibc__) ++ LIBC=dietlibc ++ #else ++ LIBC=gnu ++ #endif ++ EOF ++ eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^LIBC'` ++ ;; ++esac ++ + # Note: order is significant - the case branches are not exclusive. + + case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in +@@ -200,6 +215,10 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:$ + # CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used. + echo "${machine}-${os}${release}" + exit ;; ++ *:Bitrig:*:*) ++ UNAME_MACHINE_ARCH=`arch | sed 's/Bitrig.//'` ++ echo ${UNAME_MACHINE_ARCH}-unknown-bitrig${UNAME_RELEASE} ++ exit ;; + *:OpenBSD:*:*) + UNAME_MACHINE_ARCH=`arch | sed 's/OpenBSD.//'` + echo ${UNAME_MACHINE_ARCH}-unknown-openbsd${UNAME_RELEASE} +@@ -302,7 +321,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:$ + arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*) + echo arm-acorn-riscix${UNAME_RELEASE} + exit ;; +- arm:riscos:*:*|arm:RISCOS:*:*) ++ arm*:riscos:*:*|arm*:RISCOS:*:*) + echo arm-unknown-riscos + exit ;; + SR2?01:HI-UX/MPP:*:* | SR8000:HI-UX/MPP:*:*) +@@ -801,6 +820,9 @@ EOF + i*:CYGWIN*:*) + echo ${UNAME_MACHINE}-pc-cygwin + exit ;; ++ *:MINGW64*:*) ++ echo ${UNAME_MACHINE}-pc-mingw64 ++ exit ;; + *:MINGW*:*) + echo ${UNAME_MACHINE}-pc-mingw32 + exit ;; +@@ -852,21 +874,21 @@ EOF + exit ;; + *:GNU:*:*) + # the GNU system +- echo `echo ${UNAME_MACHINE}|sed -e 's,[-/].*$,,'`-unknown-gnu`echo ${UNAME_RELEASE}|sed -e 's,/.*$,,'` ++ echo `echo ${UNAME_MACHINE}|sed -e 's,[-/].*$,,'`-unknown-${LIBC}`echo ${UNAME_RELEASE}|sed -e 's,/.*$,,'` + exit ;; + *:GNU/*:*:*) + # other systems with GNU libc and userland +- echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr '[A-Z]' '[a-z]'``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-gnu ++ echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr '[A-Z]' '[a-z]'``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-${LIBC} + exit ;; + i*86:Minix:*:*) + echo ${UNAME_MACHINE}-pc-minix + exit ;; + aarch64:Linux:*:*) +- echo ${UNAME_MACHINE}-unknown-linux-gnu ++ echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + aarch64_be:Linux:*:*) + UNAME_MACHINE=aarch64_be +- echo ${UNAME_MACHINE}-unknown-linux-gnu ++ echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + alpha:Linux:*:*) + case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in +@@ -879,59 +901,54 @@ EOF + EV68*) UNAME_MACHINE=alphaev68 ;; + esac + objdump --private-headers /bin/sh | grep -q ld.so.1 +- if test "$?" = 0 ; then LIBC="libc1" ; else LIBC="" ; fi +- echo ${UNAME_MACHINE}-unknown-linux-gnu${LIBC} ++ if test "$?" = 0 ; then LIBC="gnulibc1" ; fi ++ echo ${UNAME_MACHINE}-unknown-linux-${LIBC} ++ exit ;; ++ arc:Linux:*:* | arceb:Linux:*:*) ++ echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + arm*:Linux:*:*) + eval $set_cc_for_build + if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \ + | grep -q __ARM_EABI__ + then +- echo ${UNAME_MACHINE}-unknown-linux-gnu ++ echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + else + if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \ + | grep -q __ARM_PCS_VFP + then +- echo ${UNAME_MACHINE}-unknown-linux-gnueabi ++ echo ${UNAME_MACHINE}-unknown-linux-${LIBC}eabi + else +- echo ${UNAME_MACHINE}-unknown-linux-gnueabihf ++ echo ${UNAME_MACHINE}-unknown-linux-${LIBC}eabihf + fi + fi + exit ;; + avr32*:Linux:*:*) +- echo ${UNAME_MACHINE}-unknown-linux-gnu ++ echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + cris:Linux:*:*) +- echo ${UNAME_MACHINE}-axis-linux-gnu ++ echo ${UNAME_MACHINE}-axis-linux-${LIBC} + exit ;; + crisv32:Linux:*:*) +- echo ${UNAME_MACHINE}-axis-linux-gnu ++ echo ${UNAME_MACHINE}-axis-linux-${LIBC} + exit ;; + frv:Linux:*:*) +- echo ${UNAME_MACHINE}-unknown-linux-gnu ++ echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + hexagon:Linux:*:*) +- echo ${UNAME_MACHINE}-unknown-linux-gnu ++ echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + i*86:Linux:*:*) +- LIBC=gnu +- eval $set_cc_for_build +- sed 's/^ //' << EOF >$dummy.c +- #ifdef __dietlibc__ +- LIBC=dietlibc +- #endif +-EOF +- eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^LIBC'` +- echo "${UNAME_MACHINE}-pc-linux-${LIBC}" ++ echo ${UNAME_MACHINE}-pc-linux-${LIBC} + exit ;; + ia64:Linux:*:*) +- echo ${UNAME_MACHINE}-unknown-linux-gnu ++ echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + m32r*:Linux:*:*) +- echo ${UNAME_MACHINE}-unknown-linux-gnu ++ echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + m68*:Linux:*:*) +- echo ${UNAME_MACHINE}-unknown-linux-gnu ++ echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + mips:Linux:*:* | mips64:Linux:*:*) + eval $set_cc_for_build +@@ -950,54 +967,63 @@ EOF + #endif + EOF + eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^CPU'` +- test x"${CPU}" != x && { echo "${CPU}-unknown-linux-gnu"; exit; } ++ test x"${CPU}" != x && { echo "${CPU}-unknown-linux-${LIBC}"; exit; } + ;; ++ or1k:Linux:*:*) ++ echo ${UNAME_MACHINE}-unknown-linux-${LIBC} ++ exit ;; + or32:Linux:*:*) +- echo ${UNAME_MACHINE}-unknown-linux-gnu ++ echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + padre:Linux:*:*) +- echo sparc-unknown-linux-gnu ++ echo sparc-unknown-linux-${LIBC} + exit ;; + parisc64:Linux:*:* | hppa64:Linux:*:*) +- echo hppa64-unknown-linux-gnu ++ echo hppa64-unknown-linux-${LIBC} + exit ;; + parisc:Linux:*:* | hppa:Linux:*:*) + # Look for CPU level + case `grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2` in +- PA7*) echo hppa1.1-unknown-linux-gnu ;; +- PA8*) echo hppa2.0-unknown-linux-gnu ;; +- *) echo hppa-unknown-linux-gnu ;; ++ PA7*) echo hppa1.1-unknown-linux-${LIBC} ;; ++ PA8*) echo hppa2.0-unknown-linux-${LIBC} ;; ++ *) echo hppa-unknown-linux-${LIBC} ;; + esac + exit ;; + ppc64:Linux:*:*) +- echo powerpc64-unknown-linux-gnu ++ echo powerpc64-unknown-linux-${LIBC} + exit ;; + ppc:Linux:*:*) +- echo powerpc-unknown-linux-gnu ++ echo powerpc-unknown-linux-${LIBC} ++ exit ;; ++ ppc64le:Linux:*:*) ++ echo powerpc64le-unknown-linux-${LIBC} ++ exit ;; ++ ppcle:Linux:*:*) ++ echo powerpcle-unknown-linux-${LIBC} + exit ;; + s390:Linux:*:* | s390x:Linux:*:*) +- echo ${UNAME_MACHINE}-ibm-linux ++ echo ${UNAME_MACHINE}-ibm-linux-${LIBC} + exit ;; + sh64*:Linux:*:*) +- echo ${UNAME_MACHINE}-unknown-linux-gnu ++ echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + sh*:Linux:*:*) +- echo ${UNAME_MACHINE}-unknown-linux-gnu ++ echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + sparc:Linux:*:* | sparc64:Linux:*:*) +- echo ${UNAME_MACHINE}-unknown-linux-gnu ++ echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + tile*:Linux:*:*) +- echo ${UNAME_MACHINE}-unknown-linux-gnu ++ echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + vax:Linux:*:*) +- echo ${UNAME_MACHINE}-dec-linux-gnu ++ echo ${UNAME_MACHINE}-dec-linux-${LIBC} + exit ;; + x86_64:Linux:*:*) +- echo ${UNAME_MACHINE}-unknown-linux-gnu ++ echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + xtensa*:Linux:*:*) +- echo ${UNAME_MACHINE}-unknown-linux-gnu ++ echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit ;; + i*86:DYNIX/ptx:4*:*) + # ptx 4.0 does uname -s correctly, with DYNIX/ptx in there. +@@ -1201,6 +1227,9 @@ EOF + BePC:Haiku:*:*) # Haiku running on Intel PC compatible. + echo i586-pc-haiku + exit ;; ++ x86_64:Haiku:*:*) ++ echo x86_64-unknown-haiku ++ exit ;; + SX-4:SUPER-UX:*:*) + echo sx4-nec-superux${UNAME_RELEASE} + exit ;; +@@ -1227,19 +1256,21 @@ EOF + exit ;; + *:Darwin:*:*) + UNAME_PROCESSOR=`uname -p` || UNAME_PROCESSOR=unknown +- case $UNAME_PROCESSOR in +- i386) +- eval $set_cc_for_build +- if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then +- if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \ +- (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \ +- grep IS_64BIT_ARCH >/dev/null +- then +- UNAME_PROCESSOR="x86_64" +- fi +- fi ;; +- unknown) UNAME_PROCESSOR=powerpc ;; +- esac ++ eval $set_cc_for_build ++ if test "$UNAME_PROCESSOR" = unknown ; then ++ UNAME_PROCESSOR=powerpc ++ fi ++ if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then ++ if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \ ++ (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \ ++ grep IS_64BIT_ARCH >/dev/null ++ then ++ case $UNAME_PROCESSOR in ++ i386) UNAME_PROCESSOR=x86_64 ;; ++ powerpc) UNAME_PROCESSOR=powerpc64 ;; ++ esac ++ fi ++ fi + echo ${UNAME_PROCESSOR}-apple-darwin${UNAME_RELEASE} + exit ;; + *:procnto*:*:* | *:QNX:[0123456789]*:*) +@@ -1330,9 +1361,6 @@ EOF + exit ;; + esac + +-#echo '(No uname command or uname output not recognized.)' 1>&2 +-#echo "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" 1>&2 +- + eval $set_cc_for_build + cat >$dummy.c < conftest.$ac_ext +@@ -6936,7 +6936,10 @@ s390*-*linux*|s390*-*tpf*|sparc*-*linux* + x86_64-*linux*) + LD="${LD-ld} -m elf_i386" + ;; +- ppc64-*linux*|powerpc64-*linux*) ++ powerpc64le-*linux*) ++ LD="${LD-ld} -m elf32lppclinux" ++ ;; ++ powerpc64-*linux*) + LD="${LD-ld} -m elf32ppclinux" + ;; + s390x-*linux*) +@@ -6955,7 +6958,10 @@ s390*-*linux*|s390*-*tpf*|sparc*-*linux* + x86_64-*linux*) + LD="${LD-ld} -m elf_x86_64" + ;; +- ppc*-*linux*|powerpc*-*linux*) ++ powerpcle-*linux*) ++ LD="${LD-ld} -m elf64lppc" ++ ;; ++ powerpc-*linux*) + LD="${LD-ld} -m elf64ppc" + ;; + s390*-*linux*|s390*-*tpf*) diff --git a/SOURCES/fastjar-0.97-segfault.patch b/SOURCES/fastjar-0.97-segfault.patch new file mode 100644 index 0000000..ab62624 --- /dev/null +++ b/SOURCES/fastjar-0.97-segfault.patch @@ -0,0 +1,29 @@ +2009-01-14 Jakub Jelinek + + * jartool.c (make_manifest): Initialize current_time before + calling unix2dostime on it. + +--- fastjar-0.97/jartool.c.jj 2008-10-15 18:35:37.000000000 +0200 ++++ fastjar-0.97/jartool.c 2009-01-14 15:40:50.000000000 +0100 +@@ -820,6 +820,10 @@ int make_manifest(int jfd, const char *m + int mod_time; /* file modification time */ + struct zipentry *ze; + ++ current_time = time(NULL); ++ if(current_time == (time_t)-1) ++ exit_on_error("time"); ++ + mod_time = unix2dostime(¤t_time); + + /* If we are creating a new manifest, create a META-INF directory entry */ +@@ -828,10 +832,6 @@ int make_manifest(int jfd, const char *m + + memset((file_header + 12), '\0', 16); /*clear mod time, crc, size fields*/ + +- current_time = time(NULL); +- if(current_time == (time_t)-1) +- exit_on_error("time"); +- + PACK_UB2(file_header, LOC_EXTRA, 0); + PACK_UB2(file_header, LOC_COMP, 0); + PACK_UB2(file_header, LOC_FNLEN, nlen); diff --git a/SOURCES/fastjar-CVE-2010-0831.patch b/SOURCES/fastjar-CVE-2010-0831.patch new file mode 100644 index 0000000..2c6e23c --- /dev/null +++ b/SOURCES/fastjar-CVE-2010-0831.patch @@ -0,0 +1,102 @@ +2010-06-10 Jakub Jelinek + Dan Rosenberg + + * jartool.c (extract_jar): Fix up checks for traversal to parent + directories, disallow absolute paths, make the code slightly more + efficient. + +--- fastjar-0.97/jartool.c.jj 2009-09-07 00:10:47.000000000 +0200 ++++ fastjar-0.97/jartool.c 2010-06-08 20:00:29.000000000 +0200 +@@ -1730,7 +1730,17 @@ int extract_jar(int fd, const char **fil + struct stat sbuf; + int depth = 0; + +- tmp_buff = malloc(sizeof(char) * strlen((const char *)filename)); ++ if(*filename == '/'){ ++ fprintf(stderr, "Absolute path names are not allowed.\n"); ++ exit(EXIT_FAILURE); ++ } ++ ++ tmp_buff = malloc(strlen((const char *)filename)); ++ ++ if(tmp_buff == NULL) { ++ fprintf(stderr, "Out of memory.\n"); ++ exit(EXIT_FAILURE); ++ } + + for(;;){ + const ub1 *idx = (const unsigned char *)strchr((const char *)start, '/'); +@@ -1738,25 +1748,28 @@ int extract_jar(int fd, const char **fil + if(idx == NULL) + break; + else if(idx == start){ ++ tmp_buff[idx - filename] = '/'; + start++; + continue; + } +- start = idx + 1; + +- strncpy(tmp_buff, (const char *)filename, (idx - filename)); +- tmp_buff[(idx - filename)] = '\0'; ++ memcpy(tmp_buff + (start - filename), (const char *)start, (idx - start)); ++ tmp_buff[idx - filename] = '\0'; + + #ifdef DEBUG + printf("checking the existance of %s\n", tmp_buff); + #endif +- if(strcmp(tmp_buff, "..") == 0){ ++ if(idx - start == 2 && memcmp(start, "..", 2) == 0){ + --depth; + if (depth < 0){ + fprintf(stderr, "Traversal to parent directories during unpacking!\n"); + exit(EXIT_FAILURE); + } +- } else if (strcmp(tmp_buff, ".") != 0) ++ } else if (idx - start != 1 || *start != '.') + ++depth; ++ ++ start = idx + 1; ++ + if(stat(tmp_buff, &sbuf) < 0){ + if(errno != ENOENT) + exit_on_error("stat"); +@@ -1765,6 +1778,7 @@ int extract_jar(int fd, const char **fil + #ifdef DEBUG + printf("Directory exists\n"); + #endif ++ tmp_buff[idx - filename] = '/'; + continue; + }else { + fprintf(stderr, "Hmmm.. %s exists but isn't a directory!\n", +@@ -1781,10 +1795,11 @@ int extract_jar(int fd, const char **fil + if(verbose && handle) + printf("%10s: %s/\n", "created", tmp_buff); + ++ tmp_buff[idx - filename] = '/'; + } + + /* only a directory */ +- if(strlen((const char *)start) == 0) ++ if(*start == '\0') + dir = TRUE; + + #ifdef DEBUG +@@ -1792,7 +1807,7 @@ int extract_jar(int fd, const char **fil + #endif + + /* If the entry was just a directory, don't write to file, etc */ +- if(strlen((const char *)start) == 0) ++ if(*start == '\0') + f_fd = -1; + + free(tmp_buff); +@@ -1876,7 +1891,8 @@ int extract_jar(int fd, const char **fil + exit(EXIT_FAILURE); + } + +- close(f_fd); ++ if (f_fd != -1) ++ close(f_fd); + + if(verbose && dir == FALSE && handle) + printf("%10s: %s\n", diff --git a/SOURCES/fastjar-man.patch b/SOURCES/fastjar-man.patch new file mode 100644 index 0000000..34bf704 --- /dev/null +++ b/SOURCES/fastjar-man.patch @@ -0,0 +1,27 @@ +2010-03-24 Jan Kratochvil + + * Makefile.am (POD2MAN): Provide --date from ChangeLog. + * Makefile.in: Regenerate. + +--- fastjar-0.97/Makefile.am.jj 2008-10-16 04:24:55.000000000 -0400 ++++ fastjar-0.97/Makefile.am 2010-06-21 09:29:41.021398000 -0400 +@@ -39,7 +39,7 @@ EXTRA_DIST = \ + texi2pod.pl + + TEXI2POD = perl $(srcdir)/texi2pod.pl +-POD2MAN = pod2man --center="GNU" --release=@VERSION@ ++POD2MAN = pod2man --center="GNU" --release=@VERSION@ --date=$(shell sed -n '1s/ .*//p' <$(srcdir)/ChangeLog) + + .pod.1: + -($(POD2MAN) --section=1 $< > $(@).T$$$$ && \ +--- fastjar-0.97/Makefile.in.jj 2008-10-16 04:15:16.000000000 -0400 ++++ fastjar-0.97/Makefile.in 2010-06-21 09:30:15.882810000 -0400 +@@ -515,7 +515,7 @@ EXTRA_DIST = \ + texi2pod.pl + + TEXI2POD = perl $(srcdir)/texi2pod.pl +-POD2MAN = pod2man --center="GNU" --release=@VERSION@ ++POD2MAN = pod2man --center="GNU" --release=@VERSION@ --date=$(shell sed -n '1s/ .*//p' <$(srcdir)/ChangeLog) + + #SPLINT_FLAGS=-I . -I $(srcdir)/lib -I $(srcdir) -DHAVE_CONFIG_H +posixlib +weak + SPLINT_FLAGS = -I . -I $(srcdir)/lib -I $(srcdir) -DHAVE_CONFIG_H -DPRIx32= -warnposix +weak diff --git a/SOURCES/gcc48-aarch64-ada.patch b/SOURCES/gcc48-aarch64-ada.patch new file mode 100644 index 0000000..4d2fb7d --- /dev/null +++ b/SOURCES/gcc48-aarch64-ada.patch @@ -0,0 +1,96 @@ +2014-04-09 Richard Henderson + + * gcc-interface/Makefile.in: Support aarch64-linux. + * init.c: Enable alternate stack support also on aarch64. + * types.h (Fat_Pointer): Remove aligned attribute. + +--- gcc/ada/gcc-interface/Makefile.in ++++ gcc/ada/gcc-interface/Makefile.in +@@ -2123,6 +2123,44 @@ ifeq ($(strip $(filter-out alpha% linux%,$(arch) $(osys))),) + LIBRARY_VERSION := $(LIB_VERSION) + endif + ++# AArch64 Linux ++ifeq ($(strip $(filter-out aarch64% linux%,$(arch) $(osys))),) ++ LIBGNAT_TARGET_PAIRS = \ ++ a-exetim.adb + + * common/config/aarch64/aarch64-common.c (TARGET_OPTION_INIT_STRUCT): + Define. + (aarch64_option_init_struct): New function. + +--- gcc/common/config/aarch64/aarch64-common.c ++++ gcc/common/config/aarch64/aarch64-common.c +@@ -39,6 +39,9 @@ + #undef TARGET_OPTION_OPTIMIZATION_TABLE + #define TARGET_OPTION_OPTIMIZATION_TABLE aarch_option_optimization_table + ++#undef TARGET_OPTION_INIT_STRUCT ++#define TARGET_OPTION_INIT_STRUCT aarch64_option_init_struct ++ + /* Set default optimization options. */ + static const struct default_options aarch_option_optimization_table[] = + { +@@ -47,6 +50,16 @@ static const struct default_options aarch_option_optimization_table[] = + { OPT_LEVELS_NONE, 0, NULL, 0 } + }; + ++/* Implement TARGET_OPTION_INIT_STRUCT. */ ++ ++static void ++aarch64_option_init_struct (struct gcc_options *opts) ++{ ++ /* By default, always emit DWARF-2 unwind info. This allows debugging ++ without maintaining a stack frame back-chain. */ ++ opts->x_flag_asynchronous_unwind_tables = 1; ++} ++ + /* Implement TARGET_HANDLE_OPTION. + This function handles the target specific options for CPU/target selection. + diff --git a/SOURCES/gcc48-aarch64-unwind-opt.patch b/SOURCES/gcc48-aarch64-unwind-opt.patch new file mode 100644 index 0000000..074cd7e --- /dev/null +++ b/SOURCES/gcc48-aarch64-unwind-opt.patch @@ -0,0 +1,342 @@ +2014-08-08 Richard Henderson + + * config/aarch64/aarch64.c (aarch64_save_or_restore_fprs): Add + cfi_ops argument, for restore put REG_CFA_RESTORE notes into + *cfi_ops rather than on individual insns. Cleanup. + (aarch64_save_or_restore_callee_save_registers): Likewise. + (aarch64_expand_prologue): Adjust caller. + (aarch64_expand_epilogue): Likewise. Cleanup. Emit queued cfi_ops + on the stack restore insn. + +--- gcc/config/aarch64/aarch64.c 2014-07-15 02:27:16.000000000 -0700 ++++ gcc/config/aarch64/aarch64.c 2014-08-21 12:52:44.190455860 -0700 +@@ -1603,24 +1603,23 @@ aarch64_register_saved_on_entry (int reg + + static void + aarch64_save_or_restore_fprs (int start_offset, int increment, +- bool restore, rtx base_rtx) +- ++ bool restore, rtx base_rtx, rtx *cfi_ops) + { + unsigned regno; + unsigned regno2; + rtx insn; + rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM; + +- + for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++) + { + if (aarch64_register_saved_on_entry (regno)) + { +- rtx mem; ++ rtx mem, reg1; + mem = gen_mem_ref (DFmode, + plus_constant (Pmode, + base_rtx, + start_offset)); ++ reg1 = gen_rtx_REG (DFmode, regno); + + for (regno2 = regno + 1; + regno2 <= V31_REGNUM +@@ -1632,54 +1631,51 @@ aarch64_save_or_restore_fprs (int start_ + if (regno2 <= V31_REGNUM && + aarch64_register_saved_on_entry (regno2)) + { +- rtx mem2; ++ rtx mem2, reg2; + /* Next highest register to be saved. */ + mem2 = gen_mem_ref (DFmode, + plus_constant + (Pmode, + base_rtx, + start_offset + increment)); ++ reg2 = gen_rtx_REG (DFmode, regno2); ++ + if (restore == false) + { +- insn = emit_insn +- ( gen_store_pairdf (mem, gen_rtx_REG (DFmode, regno), +- mem2, gen_rtx_REG (DFmode, regno2))); +- ++ insn = emit_insn (gen_store_pairdf (mem, reg1, mem2, reg2)); ++ /* The first part of a frame-related parallel insn ++ is always assumed to be relevant to the frame ++ calculations; subsequent parts, are only ++ frame-related if explicitly marked. */ ++ RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1; ++ RTX_FRAME_RELATED_P (insn) = 1; + } + else + { +- insn = emit_insn +- ( gen_load_pairdf (gen_rtx_REG (DFmode, regno), mem, +- gen_rtx_REG (DFmode, regno2), mem2)); +- +- add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DFmode, regno)); +- add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DFmode, regno2)); ++ emit_insn (gen_load_pairdf (reg1, mem, reg2, mem2)); ++ *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg1, *cfi_ops); ++ *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops); + } + +- /* The first part of a frame-related parallel insn +- is always assumed to be relevant to the frame +- calculations; subsequent parts, are only +- frame-related if explicitly marked. */ +- RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, +- 1)) = 1; + regno = regno2; + start_offset += increment * 2; + } + else + { + if (restore == false) +- insn = emit_move_insn (mem, gen_rtx_REG (DFmode, regno)); ++ { ++ insn = emit_move_insn (mem, reg1); ++ RTX_FRAME_RELATED_P (insn) = 1; ++ } + else + { +- insn = emit_move_insn (gen_rtx_REG (DFmode, regno), mem); +- add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno)); ++ emit_move_insn (reg1, mem); ++ *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg1, *cfi_ops); + } + start_offset += increment; + } +- RTX_FRAME_RELATED_P (insn) = 1; + } + } +- + } + + +@@ -1687,13 +1683,14 @@ aarch64_save_or_restore_fprs (int start_ + restore's have to happen. */ + static void + aarch64_save_or_restore_callee_save_registers (HOST_WIDE_INT offset, +- bool restore) ++ bool restore, rtx *cfi_ops) + { + rtx insn; + rtx base_rtx = stack_pointer_rtx; + HOST_WIDE_INT start_offset = offset; + HOST_WIDE_INT increment = UNITS_PER_WORD; +- rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM; ++ rtx (*gen_mem_ref)(enum machine_mode, rtx) ++ = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM; + unsigned limit = (frame_pointer_needed)? R28_REGNUM: R30_REGNUM; + unsigned regno; + unsigned regno2; +@@ -1702,11 +1699,13 @@ aarch64_save_or_restore_callee_save_regi + { + if (aarch64_register_saved_on_entry (regno)) + { +- rtx mem; ++ rtx mem, reg1; ++ + mem = gen_mem_ref (Pmode, + plus_constant (Pmode, + base_rtx, + start_offset)); ++ reg1 = gen_rtx_REG (DImode, regno); + + for (regno2 = regno + 1; + regno2 <= limit +@@ -1718,56 +1717,54 @@ aarch64_save_or_restore_callee_save_regi + if (regno2 <= limit && + aarch64_register_saved_on_entry (regno2)) + { +- rtx mem2; ++ rtx mem2, reg2; + /* Next highest register to be saved. */ + mem2 = gen_mem_ref (Pmode, + plus_constant + (Pmode, + base_rtx, + start_offset + increment)); ++ reg2 = gen_rtx_REG (DImode, regno2); ++ + if (restore == false) + { +- insn = emit_insn +- ( gen_store_pairdi (mem, gen_rtx_REG (DImode, regno), +- mem2, gen_rtx_REG (DImode, regno2))); +- ++ insn = emit_insn (gen_store_pairdi (mem, reg1, mem2, reg2)); ++ /* The first part of a frame-related parallel insn ++ is always assumed to be relevant to the frame ++ calculations; subsequent parts, are only ++ frame-related if explicitly marked. */ ++ RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1; ++ RTX_FRAME_RELATED_P (insn) = 1; + } + else + { +- insn = emit_insn +- ( gen_load_pairdi (gen_rtx_REG (DImode, regno), mem, +- gen_rtx_REG (DImode, regno2), mem2)); +- +- add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno)); +- add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno2)); ++ emit_insn (gen_load_pairdi (reg1, mem, reg2, mem2)); ++ *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg1, *cfi_ops); ++ *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops); + } + +- /* The first part of a frame-related parallel insn +- is always assumed to be relevant to the frame +- calculations; subsequent parts, are only +- frame-related if explicitly marked. */ +- RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, +- 1)) = 1; + regno = regno2; + start_offset += increment * 2; + } + else + { + if (restore == false) +- insn = emit_move_insn (mem, gen_rtx_REG (DImode, regno)); ++ { ++ insn = emit_move_insn (mem, reg1); ++ RTX_FRAME_RELATED_P (insn) = 1; ++ } + else + { +- insn = emit_move_insn (gen_rtx_REG (DImode, regno), mem); +- add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno)); ++ emit_move_insn (reg1, mem); ++ *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg1, *cfi_ops); + } + start_offset += increment; + } +- RTX_FRAME_RELATED_P (insn) = 1; + } + } + +- aarch64_save_or_restore_fprs (start_offset, increment, restore, base_rtx); +- ++ aarch64_save_or_restore_fprs (start_offset, increment, restore, ++ base_rtx, cfi_ops); + } + + /* AArch64 stack frames generated by this compiler look like: +@@ -1966,7 +1963,7 @@ aarch64_expand_prologue (void) + } + + aarch64_save_or_restore_callee_save_registers +- (fp_offset + cfun->machine->frame.hardfp_offset, 0); ++ (fp_offset + cfun->machine->frame.hardfp_offset, 0, NULL); + } + + /* when offset >= 512, +@@ -1991,6 +1988,7 @@ aarch64_expand_epilogue (bool for_sibcal + HOST_WIDE_INT fp_offset; + rtx insn; + rtx cfa_reg; ++ rtx cfi_ops = NULL; + + aarch64_layout_frame (); + original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size; +@@ -2035,15 +2033,17 @@ aarch64_expand_epilogue (bool for_sibcal + insn = emit_insn (gen_add3_insn (stack_pointer_rtx, + hard_frame_pointer_rtx, + GEN_INT (- fp_offset))); ++ /* CFA should be calculated from the value of SP from now on. */ ++ add_reg_note (insn, REG_CFA_ADJUST_CFA, ++ gen_rtx_SET (VOIDmode, stack_pointer_rtx, ++ plus_constant (Pmode, hard_frame_pointer_rtx, ++ -fp_offset))); + RTX_FRAME_RELATED_P (insn) = 1; +- /* As SP is set to (FP - fp_offset), according to the rules in +- dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated +- from the value of SP from now on. */ + cfa_reg = stack_pointer_rtx; + } + + aarch64_save_or_restore_callee_save_registers +- (fp_offset + cfun->machine->frame.hardfp_offset, 1); ++ (fp_offset + cfun->machine->frame.hardfp_offset, 1, &cfi_ops); + + /* Restore the frame pointer and lr if the frame pointer is needed. */ + if (offset > 0) +@@ -2051,6 +2051,8 @@ aarch64_expand_epilogue (bool for_sibcal + if (frame_pointer_needed) + { + rtx mem_fp, mem_lr; ++ rtx reg_fp = hard_frame_pointer_rtx; ++ rtx reg_lr = gen_rtx_REG (DImode, LR_REGNUM); + + if (fp_offset) + { +@@ -2063,52 +2065,36 @@ aarch64_expand_epilogue (bool for_sibcal + stack_pointer_rtx, + fp_offset + + UNITS_PER_WORD)); +- insn = emit_insn (gen_load_pairdi (hard_frame_pointer_rtx, +- mem_fp, +- gen_rtx_REG (DImode, +- LR_REGNUM), +- mem_lr)); ++ emit_insn (gen_load_pairdi (reg_fp, mem_fp, reg_lr, mem_lr)); ++ ++ insn = emit_insn (gen_add2_insn (stack_pointer_rtx, ++ GEN_INT (offset))); + } + else + { + insn = emit_insn (gen_loadwb_pairdi_di +- (stack_pointer_rtx, +- stack_pointer_rtx, +- hard_frame_pointer_rtx, +- gen_rtx_REG (DImode, LR_REGNUM), +- GEN_INT (offset), ++ (stack_pointer_rtx, stack_pointer_rtx, ++ reg_fp, reg_lr, GEN_INT (offset), + GEN_INT (GET_MODE_SIZE (DImode) + offset))); +- RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1; +- add_reg_note (insn, REG_CFA_ADJUST_CFA, +- (gen_rtx_SET (Pmode, stack_pointer_rtx, +- plus_constant (Pmode, cfa_reg, +- offset)))); +- } +- +- /* The first part of a frame-related parallel insn +- is always assumed to be relevant to the frame +- calculations; subsequent parts, are only +- frame-related if explicitly marked. */ +- RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1; +- RTX_FRAME_RELATED_P (insn) = 1; +- add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx); +- add_reg_note (insn, REG_CFA_RESTORE, +- gen_rtx_REG (DImode, LR_REGNUM)); +- +- if (fp_offset) +- { +- insn = emit_insn (gen_add2_insn (stack_pointer_rtx, +- GEN_INT (offset))); +- RTX_FRAME_RELATED_P (insn) = 1; + } ++ cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg_fp, cfi_ops); ++ cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg_lr, cfi_ops); + } + else + { + insn = emit_insn (gen_add2_insn (stack_pointer_rtx, + GEN_INT (offset))); +- RTX_FRAME_RELATED_P (insn) = 1; + } ++ cfi_ops = alloc_reg_note (REG_CFA_ADJUST_CFA, ++ gen_rtx_SET (VOIDmode, stack_pointer_rtx, ++ plus_constant (Pmode, cfa_reg, ++ offset)), ++ cfi_ops); ++ REG_NOTES (insn) = cfi_ops; ++ RTX_FRAME_RELATED_P (insn) = 1; + } ++ else ++ gcc_assert (cfi_ops == NULL); + + /* Stack adjustment for exception handler. */ + if (crtl->calls_eh_return) diff --git a/SOURCES/gcc48-cloog-dl.patch b/SOURCES/gcc48-cloog-dl.patch new file mode 100644 index 0000000..52b5c51 --- /dev/null +++ b/SOURCES/gcc48-cloog-dl.patch @@ -0,0 +1,474 @@ +--- gcc/Makefile.in.jj 2012-12-13 17:09:20.000000000 +0100 ++++ gcc/Makefile.in 2012-12-14 11:45:22.585670055 +0100 +@@ -1022,7 +1022,7 @@ BUILD_LIBDEPS= $(BUILD_LIBIBERTY) + # and the system's installed libraries. + LIBS = @LIBS@ libcommon.a $(CPPLIB) $(LIBINTL) $(LIBICONV) $(LIBBACKTRACE) \ + $(LIBIBERTY) $(LIBDECNUMBER) $(HOST_LIBS) +-BACKENDLIBS = $(CLOOGLIBS) $(GMPLIBS) $(PLUGINLIBS) $(HOST_LIBS) \ ++BACKENDLIBS = $(if $(CLOOGLIBS),-ldl) $(GMPLIBS) $(PLUGINLIBS) $(HOST_LIBS) \ + $(ZLIB) + # Any system libraries needed just for GNAT. + SYSLIBS = @GNAT_LIBEXC@ +@@ -3442,6 +3442,15 @@ $(common_out_object_file): $(common_out_ + $(DIAGNOSTIC_CORE_H) $(FLAGS_H) $(OPTS_H) $(TM_H) $(TM_P_H) $(MACHMODE_H) + $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) \ + $< $(OUTPUT_OPTION) ++ ++graphite%.o : \ ++ ALL_CFLAGS := -O $(filter-out -fkeep-inline-functions, $(ALL_CFLAGS)) ++graphite.o : \ ++ ALL_CFLAGS := -O $(filter-out -fkeep-inline-functions, $(ALL_CFLAGS)) ++graphite%.o : \ ++ ALL_CXXFLAGS := -O $(filter-out -fkeep-inline-functions, $(ALL_CXXFLAGS)) ++graphite.o : \ ++ ALL_CXXFLAGS := -O $(filter-out -fkeep-inline-functions, $(ALL_CXXFLAGS)) + # + # Generate header and source files from the machine description, + # and compile them. +--- gcc/graphite-poly.h.jj 2012-12-13 11:31:27.000000000 +0100 ++++ gcc/graphite-poly.h 2012-12-14 13:41:41.970800726 +0100 +@@ -22,6 +22,369 @@ along with GCC; see the file COPYING3. + #ifndef GCC_GRAPHITE_POLY_H + #define GCC_GRAPHITE_POLY_H + ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#define DYNSYMS \ ++ DYNSYM (clast_pprint); \ ++ DYNSYM (cloog_clast_create_from_input); \ ++ DYNSYM (cloog_clast_free); \ ++ DYNSYM (cloog_domain_from_isl_set); \ ++ DYNSYM (cloog_input_alloc); \ ++ DYNSYM (cloog_isl_state_malloc); \ ++ DYNSYM (cloog_options_free); \ ++ DYNSYM (cloog_options_malloc); \ ++ DYNSYM (cloog_scattering_from_isl_map); \ ++ DYNSYM (cloog_state_free); \ ++ DYNSYM (cloog_union_domain_add_domain); \ ++ DYNSYM (cloog_union_domain_alloc); \ ++ DYNSYM (cloog_union_domain_set_name); \ ++ DYNSYM (isl_aff_add_coefficient_si); \ ++ DYNSYM (isl_aff_add_constant); \ ++ DYNSYM (isl_aff_free); \ ++ DYNSYM (isl_aff_get_coefficient); \ ++ DYNSYM (isl_aff_get_space); \ ++ DYNSYM (isl_aff_mod); \ ++ DYNSYM (isl_aff_set_coefficient_si); \ ++ DYNSYM (isl_aff_set_constant_si); \ ++ DYNSYM (isl_aff_zero_on_domain); \ ++ DYNSYM (isl_band_free); \ ++ DYNSYM (isl_band_get_children); \ ++ DYNSYM (isl_band_get_partial_schedule); \ ++ DYNSYM (isl_band_has_children); \ ++ DYNSYM (isl_band_list_free); \ ++ DYNSYM (isl_band_list_get_band); \ ++ DYNSYM (isl_band_list_get_ctx); \ ++ DYNSYM (isl_band_list_n_band); \ ++ DYNSYM (isl_band_member_is_zero_distance); \ ++ DYNSYM (isl_band_n_member); \ ++ DYNSYM (isl_basic_map_add_constraint); \ ++ DYNSYM (isl_basic_map_project_out); \ ++ DYNSYM (isl_basic_map_universe); \ ++ DYNSYM (isl_constraint_set_coefficient); \ ++ DYNSYM (isl_constraint_set_coefficient_si); \ ++ DYNSYM (isl_constraint_set_constant); \ ++ DYNSYM (isl_constraint_set_constant_si); \ ++ DYNSYM (isl_ctx_alloc); \ ++ DYNSYM (isl_ctx_free); \ ++ DYNSYM (isl_equality_alloc); \ ++ DYNSYM (isl_id_alloc); \ ++ DYNSYM (isl_id_copy); \ ++ DYNSYM (isl_id_free); \ ++ DYNSYM (isl_inequality_alloc); \ ++ DYNSYM (isl_local_space_copy); \ ++ DYNSYM (isl_local_space_free); \ ++ DYNSYM (isl_local_space_from_space); \ ++ DYNSYM (isl_local_space_range); \ ++ DYNSYM (isl_map_add_constraint); \ ++ DYNSYM (isl_map_add_dims); \ ++ DYNSYM (isl_map_align_params); \ ++ DYNSYM (isl_map_apply_range); \ ++ DYNSYM (isl_map_copy); \ ++ DYNSYM (isl_map_dim); \ ++ DYNSYM (isl_map_dump); \ ++ DYNSYM (isl_map_equate); \ ++ DYNSYM (isl_map_fix_si); \ ++ DYNSYM (isl_map_flat_product); \ ++ DYNSYM (isl_map_flat_range_product); \ ++ DYNSYM (isl_map_free); \ ++ DYNSYM (isl_map_from_basic_map); \ ++ DYNSYM (isl_map_from_pw_aff); \ ++ DYNSYM (isl_map_from_union_map); \ ++ DYNSYM (isl_map_get_ctx); \ ++ DYNSYM (isl_map_get_space); \ ++ DYNSYM (isl_map_get_tuple_id); \ ++ DYNSYM (isl_map_insert_dims); \ ++ DYNSYM (isl_map_intersect); \ ++ DYNSYM (isl_map_intersect_domain); \ ++ DYNSYM (isl_map_intersect_range); \ ++ DYNSYM (isl_map_is_empty); \ ++ DYNSYM (isl_map_lex_ge); \ ++ DYNSYM (isl_map_lex_le); \ ++ DYNSYM (isl_map_n_out); \ ++ DYNSYM (isl_map_range); \ ++ DYNSYM (isl_map_set_tuple_id); \ ++ DYNSYM (isl_map_universe); \ ++ DYNSYM (isl_options_set_on_error); \ ++ DYNSYM (isl_options_set_schedule_fuse); \ ++ DYNSYM (isl_options_set_schedule_max_constant_term); \ ++ DYNSYM (isl_options_set_schedule_maximize_band_depth); \ ++ DYNSYM (isl_printer_free); \ ++ DYNSYM (isl_printer_print_aff); \ ++ DYNSYM (isl_printer_print_constraint); \ ++ DYNSYM (isl_printer_print_map); \ ++ DYNSYM (isl_printer_print_set); \ ++ DYNSYM (isl_printer_to_file); \ ++ DYNSYM (isl_pw_aff_add); \ ++ DYNSYM (isl_pw_aff_alloc); \ ++ DYNSYM (isl_pw_aff_copy); \ ++ DYNSYM (isl_pw_aff_eq_set); \ ++ DYNSYM (isl_pw_aff_free); \ ++ DYNSYM (isl_pw_aff_from_aff); \ ++ DYNSYM (isl_pw_aff_ge_set); \ ++ DYNSYM (isl_pw_aff_gt_set); \ ++ DYNSYM (isl_pw_aff_is_cst); \ ++ DYNSYM (isl_pw_aff_le_set); \ ++ DYNSYM (isl_pw_aff_lt_set); \ ++ DYNSYM (isl_pw_aff_mod); \ ++ DYNSYM (isl_pw_aff_mul); \ ++ DYNSYM (isl_pw_aff_ne_set); \ ++ DYNSYM (isl_pw_aff_nonneg_set); \ ++ DYNSYM (isl_pw_aff_set_tuple_id); \ ++ DYNSYM (isl_pw_aff_sub); \ ++ DYNSYM (isl_pw_aff_zero_set); \ ++ DYNSYM (isl_schedule_free); \ ++ DYNSYM (isl_schedule_get_band_forest); \ ++ DYNSYM (isl_set_add_constraint); \ ++ DYNSYM (isl_set_add_dims); \ ++ DYNSYM (isl_set_apply); \ ++ DYNSYM (isl_set_coalesce); \ ++ DYNSYM (isl_set_copy); \ ++ DYNSYM (isl_set_dim); \ ++ DYNSYM (isl_set_fix_si); \ ++ DYNSYM (isl_set_free); \ ++ DYNSYM (isl_set_from_cloog_domain); \ ++ DYNSYM (isl_set_get_space); \ ++ DYNSYM (isl_set_get_tuple_id); \ ++ DYNSYM (isl_set_intersect); \ ++ DYNSYM (isl_set_is_empty); \ ++ DYNSYM (isl_set_max); \ ++ DYNSYM (isl_set_min); \ ++ DYNSYM (isl_set_nat_universe); \ ++ DYNSYM (isl_set_project_out); \ ++ DYNSYM (isl_set_set_tuple_id); \ ++ DYNSYM (isl_set_universe); \ ++ DYNSYM (isl_space_add_dims); \ ++ DYNSYM (isl_space_alloc); \ ++ DYNSYM (isl_space_copy); \ ++ DYNSYM (isl_space_dim); \ ++ DYNSYM (isl_space_domain); \ ++ DYNSYM (isl_space_find_dim_by_id); \ ++ DYNSYM (isl_space_free); \ ++ DYNSYM (isl_space_from_domain); \ ++ DYNSYM (isl_space_get_tuple_id); \ ++ DYNSYM (isl_space_params_alloc); \ ++ DYNSYM (isl_space_range); \ ++ DYNSYM (isl_space_set_alloc); \ ++ DYNSYM (isl_space_set_dim_id); \ ++ DYNSYM (isl_space_set_tuple_id); \ ++ DYNSYM (isl_union_map_add_map); \ ++ DYNSYM (isl_union_map_align_params); \ ++ DYNSYM (isl_union_map_apply_domain); \ ++ DYNSYM (isl_union_map_apply_range); \ ++ DYNSYM (isl_union_map_compute_flow); \ ++ DYNSYM (isl_union_map_copy); \ ++ DYNSYM (isl_union_map_empty); \ ++ DYNSYM (isl_union_map_flat_range_product); \ ++ DYNSYM (isl_union_map_foreach_map); \ ++ DYNSYM (isl_union_map_free); \ ++ DYNSYM (isl_union_map_from_map); \ ++ DYNSYM (isl_union_map_get_ctx); \ ++ DYNSYM (isl_union_map_get_space); \ ++ DYNSYM (isl_union_map_gist_domain); \ ++ DYNSYM (isl_union_map_gist_range); \ ++ DYNSYM (isl_union_map_intersect_domain); \ ++ DYNSYM (isl_union_map_is_empty); \ ++ DYNSYM (isl_union_map_subtract); \ ++ DYNSYM (isl_union_map_union); \ ++ DYNSYM (isl_union_set_add_set); \ ++ DYNSYM (isl_union_set_compute_schedule); \ ++ DYNSYM (isl_union_set_copy); \ ++ DYNSYM (isl_union_set_empty); \ ++ DYNSYM (isl_union_set_from_set); \ ++ DYNSYM (stmt_ass); \ ++ DYNSYM (stmt_block); \ ++ DYNSYM (stmt_for); \ ++ DYNSYM (stmt_guard); \ ++ DYNSYM (stmt_root); \ ++ DYNSYM (stmt_user); ++extern struct cloog_pointers_s__ ++{ ++ bool inited; ++ void *h; ++#define DYNSYM(x) __typeof (x) *p_##x ++ DYNSYMS ++#undef DYNSYM ++} cloog_pointers__; ++ ++#define cloog_block_alloc (*cloog_pointers__.p_cloog_block_alloc) ++#define clast_pprint (*cloog_pointers__.p_clast_pprint) ++#define cloog_clast_create_from_input (*cloog_pointers__.p_cloog_clast_create_from_input) ++#define cloog_clast_free (*cloog_pointers__.p_cloog_clast_free) ++#define cloog_domain_from_isl_set (*cloog_pointers__.p_cloog_domain_from_isl_set) ++#define cloog_input_alloc (*cloog_pointers__.p_cloog_input_alloc) ++#define cloog_isl_state_malloc (*cloog_pointers__.p_cloog_isl_state_malloc) ++#define cloog_options_free (*cloog_pointers__.p_cloog_options_free) ++#define cloog_options_malloc (*cloog_pointers__.p_cloog_options_malloc) ++#define cloog_scattering_from_isl_map (*cloog_pointers__.p_cloog_scattering_from_isl_map) ++#define cloog_state_free (*cloog_pointers__.p_cloog_state_free) ++#define cloog_union_domain_add_domain (*cloog_pointers__.p_cloog_union_domain_add_domain) ++#define cloog_union_domain_alloc (*cloog_pointers__.p_cloog_union_domain_alloc) ++#define cloog_union_domain_set_name (*cloog_pointers__.p_cloog_union_domain_set_name) ++#define isl_aff_add_coefficient_si (*cloog_pointers__.p_isl_aff_add_coefficient_si) ++#define isl_aff_add_constant (*cloog_pointers__.p_isl_aff_add_constant) ++#define isl_aff_free (*cloog_pointers__.p_isl_aff_free) ++#define isl_aff_get_coefficient (*cloog_pointers__.p_isl_aff_get_coefficient) ++#define isl_aff_get_space (*cloog_pointers__.p_isl_aff_get_space) ++#define isl_aff_mod (*cloog_pointers__.p_isl_aff_mod) ++#define isl_aff_set_coefficient_si (*cloog_pointers__.p_isl_aff_set_coefficient_si) ++#define isl_aff_set_constant_si (*cloog_pointers__.p_isl_aff_set_constant_si) ++#define isl_aff_zero_on_domain (*cloog_pointers__.p_isl_aff_zero_on_domain) ++#define isl_band_free (*cloog_pointers__.p_isl_band_free) ++#define isl_band_get_children (*cloog_pointers__.p_isl_band_get_children) ++#define isl_band_get_partial_schedule (*cloog_pointers__.p_isl_band_get_partial_schedule) ++#define isl_band_has_children (*cloog_pointers__.p_isl_band_has_children) ++#define isl_band_list_free (*cloog_pointers__.p_isl_band_list_free) ++#define isl_band_list_get_band (*cloog_pointers__.p_isl_band_list_get_band) ++#define isl_band_list_get_ctx (*cloog_pointers__.p_isl_band_list_get_ctx) ++#define isl_band_list_n_band (*cloog_pointers__.p_isl_band_list_n_band) ++#define isl_band_member_is_zero_distance (*cloog_pointers__.p_isl_band_member_is_zero_distance) ++#define isl_band_n_member (*cloog_pointers__.p_isl_band_n_member) ++#define isl_basic_map_add_constraint (*cloog_pointers__.p_isl_basic_map_add_constraint) ++#define isl_basic_map_project_out (*cloog_pointers__.p_isl_basic_map_project_out) ++#define isl_basic_map_universe (*cloog_pointers__.p_isl_basic_map_universe) ++#define isl_constraint_set_coefficient (*cloog_pointers__.p_isl_constraint_set_coefficient) ++#define isl_constraint_set_coefficient_si (*cloog_pointers__.p_isl_constraint_set_coefficient_si) ++#define isl_constraint_set_constant (*cloog_pointers__.p_isl_constraint_set_constant) ++#define isl_constraint_set_constant_si (*cloog_pointers__.p_isl_constraint_set_constant_si) ++#define isl_ctx_alloc (*cloog_pointers__.p_isl_ctx_alloc) ++#define isl_ctx_free (*cloog_pointers__.p_isl_ctx_free) ++#define isl_equality_alloc (*cloog_pointers__.p_isl_equality_alloc) ++#define isl_id_alloc (*cloog_pointers__.p_isl_id_alloc) ++#define isl_id_copy (*cloog_pointers__.p_isl_id_copy) ++#define isl_id_free (*cloog_pointers__.p_isl_id_free) ++#define isl_inequality_alloc (*cloog_pointers__.p_isl_inequality_alloc) ++#define isl_local_space_copy (*cloog_pointers__.p_isl_local_space_copy) ++#define isl_local_space_free (*cloog_pointers__.p_isl_local_space_free) ++#define isl_local_space_from_space (*cloog_pointers__.p_isl_local_space_from_space) ++#define isl_local_space_range (*cloog_pointers__.p_isl_local_space_range) ++#define isl_map_add_constraint (*cloog_pointers__.p_isl_map_add_constraint) ++#define isl_map_add_dims (*cloog_pointers__.p_isl_map_add_dims) ++#define isl_map_align_params (*cloog_pointers__.p_isl_map_align_params) ++#define isl_map_apply_range (*cloog_pointers__.p_isl_map_apply_range) ++#define isl_map_copy (*cloog_pointers__.p_isl_map_copy) ++#define isl_map_dim (*cloog_pointers__.p_isl_map_dim) ++#define isl_map_dump (*cloog_pointers__.p_isl_map_dump) ++#define isl_map_equate (*cloog_pointers__.p_isl_map_equate) ++#define isl_map_fix_si (*cloog_pointers__.p_isl_map_fix_si) ++#define isl_map_flat_product (*cloog_pointers__.p_isl_map_flat_product) ++#define isl_map_flat_range_product (*cloog_pointers__.p_isl_map_flat_range_product) ++#define isl_map_free (*cloog_pointers__.p_isl_map_free) ++#define isl_map_from_basic_map (*cloog_pointers__.p_isl_map_from_basic_map) ++#define isl_map_from_pw_aff (*cloog_pointers__.p_isl_map_from_pw_aff) ++#define isl_map_from_union_map (*cloog_pointers__.p_isl_map_from_union_map) ++#define isl_map_get_ctx (*cloog_pointers__.p_isl_map_get_ctx) ++#define isl_map_get_space (*cloog_pointers__.p_isl_map_get_space) ++#define isl_map_get_tuple_id (*cloog_pointers__.p_isl_map_get_tuple_id) ++#define isl_map_insert_dims (*cloog_pointers__.p_isl_map_insert_dims) ++#define isl_map_intersect (*cloog_pointers__.p_isl_map_intersect) ++#define isl_map_intersect_domain (*cloog_pointers__.p_isl_map_intersect_domain) ++#define isl_map_intersect_range (*cloog_pointers__.p_isl_map_intersect_range) ++#define isl_map_is_empty (*cloog_pointers__.p_isl_map_is_empty) ++#define isl_map_lex_ge (*cloog_pointers__.p_isl_map_lex_ge) ++#define isl_map_lex_le (*cloog_pointers__.p_isl_map_lex_le) ++#define isl_map_n_out (*cloog_pointers__.p_isl_map_n_out) ++#define isl_map_range (*cloog_pointers__.p_isl_map_range) ++#define isl_map_set_tuple_id (*cloog_pointers__.p_isl_map_set_tuple_id) ++#define isl_map_universe (*cloog_pointers__.p_isl_map_universe) ++#define isl_options_set_on_error (*cloog_pointers__.p_isl_options_set_on_error) ++#define isl_options_set_schedule_fuse (*cloog_pointers__.p_isl_options_set_schedule_fuse) ++#define isl_options_set_schedule_max_constant_term (*cloog_pointers__.p_isl_options_set_schedule_max_constant_term) ++#define isl_options_set_schedule_maximize_band_depth (*cloog_pointers__.p_isl_options_set_schedule_maximize_band_depth) ++#define isl_printer_free (*cloog_pointers__.p_isl_printer_free) ++#define isl_printer_print_aff (*cloog_pointers__.p_isl_printer_print_aff) ++#define isl_printer_print_constraint (*cloog_pointers__.p_isl_printer_print_constraint) ++#define isl_printer_print_map (*cloog_pointers__.p_isl_printer_print_map) ++#define isl_printer_print_set (*cloog_pointers__.p_isl_printer_print_set) ++#define isl_printer_to_file (*cloog_pointers__.p_isl_printer_to_file) ++#define isl_pw_aff_add (*cloog_pointers__.p_isl_pw_aff_add) ++#define isl_pw_aff_alloc (*cloog_pointers__.p_isl_pw_aff_alloc) ++#define isl_pw_aff_copy (*cloog_pointers__.p_isl_pw_aff_copy) ++#define isl_pw_aff_eq_set (*cloog_pointers__.p_isl_pw_aff_eq_set) ++#define isl_pw_aff_free (*cloog_pointers__.p_isl_pw_aff_free) ++#define isl_pw_aff_from_aff (*cloog_pointers__.p_isl_pw_aff_from_aff) ++#define isl_pw_aff_ge_set (*cloog_pointers__.p_isl_pw_aff_ge_set) ++#define isl_pw_aff_gt_set (*cloog_pointers__.p_isl_pw_aff_gt_set) ++#define isl_pw_aff_is_cst (*cloog_pointers__.p_isl_pw_aff_is_cst) ++#define isl_pw_aff_le_set (*cloog_pointers__.p_isl_pw_aff_le_set) ++#define isl_pw_aff_lt_set (*cloog_pointers__.p_isl_pw_aff_lt_set) ++#define isl_pw_aff_mod (*cloog_pointers__.p_isl_pw_aff_mod) ++#define isl_pw_aff_mul (*cloog_pointers__.p_isl_pw_aff_mul) ++#define isl_pw_aff_ne_set (*cloog_pointers__.p_isl_pw_aff_ne_set) ++#define isl_pw_aff_nonneg_set (*cloog_pointers__.p_isl_pw_aff_nonneg_set) ++#define isl_pw_aff_set_tuple_id (*cloog_pointers__.p_isl_pw_aff_set_tuple_id) ++#define isl_pw_aff_sub (*cloog_pointers__.p_isl_pw_aff_sub) ++#define isl_pw_aff_zero_set (*cloog_pointers__.p_isl_pw_aff_zero_set) ++#define isl_schedule_free (*cloog_pointers__.p_isl_schedule_free) ++#define isl_schedule_get_band_forest (*cloog_pointers__.p_isl_schedule_get_band_forest) ++#define isl_set_add_constraint (*cloog_pointers__.p_isl_set_add_constraint) ++#define isl_set_add_dims (*cloog_pointers__.p_isl_set_add_dims) ++#define isl_set_apply (*cloog_pointers__.p_isl_set_apply) ++#define isl_set_coalesce (*cloog_pointers__.p_isl_set_coalesce) ++#define isl_set_copy (*cloog_pointers__.p_isl_set_copy) ++#define isl_set_dim (*cloog_pointers__.p_isl_set_dim) ++#define isl_set_fix_si (*cloog_pointers__.p_isl_set_fix_si) ++#define isl_set_free (*cloog_pointers__.p_isl_set_free) ++#define isl_set_from_cloog_domain (*cloog_pointers__.p_isl_set_from_cloog_domain) ++#define isl_set_get_space (*cloog_pointers__.p_isl_set_get_space) ++#define isl_set_get_tuple_id (*cloog_pointers__.p_isl_set_get_tuple_id) ++#define isl_set_intersect (*cloog_pointers__.p_isl_set_intersect) ++#define isl_set_is_empty (*cloog_pointers__.p_isl_set_is_empty) ++#define isl_set_max (*cloog_pointers__.p_isl_set_max) ++#define isl_set_min (*cloog_pointers__.p_isl_set_min) ++#define isl_set_nat_universe (*cloog_pointers__.p_isl_set_nat_universe) ++#define isl_set_project_out (*cloog_pointers__.p_isl_set_project_out) ++#define isl_set_set_tuple_id (*cloog_pointers__.p_isl_set_set_tuple_id) ++#define isl_set_universe (*cloog_pointers__.p_isl_set_universe) ++#define isl_space_add_dims (*cloog_pointers__.p_isl_space_add_dims) ++#define isl_space_alloc (*cloog_pointers__.p_isl_space_alloc) ++#define isl_space_copy (*cloog_pointers__.p_isl_space_copy) ++#define isl_space_dim (*cloog_pointers__.p_isl_space_dim) ++#define isl_space_domain (*cloog_pointers__.p_isl_space_domain) ++#define isl_space_find_dim_by_id (*cloog_pointers__.p_isl_space_find_dim_by_id) ++#define isl_space_free (*cloog_pointers__.p_isl_space_free) ++#define isl_space_from_domain (*cloog_pointers__.p_isl_space_from_domain) ++#define isl_space_get_tuple_id (*cloog_pointers__.p_isl_space_get_tuple_id) ++#define isl_space_params_alloc (*cloog_pointers__.p_isl_space_params_alloc) ++#define isl_space_range (*cloog_pointers__.p_isl_space_range) ++#define isl_space_set_alloc (*cloog_pointers__.p_isl_space_set_alloc) ++#define isl_space_set_dim_id (*cloog_pointers__.p_isl_space_set_dim_id) ++#define isl_space_set_tuple_id (*cloog_pointers__.p_isl_space_set_tuple_id) ++#define isl_union_map_add_map (*cloog_pointers__.p_isl_union_map_add_map) ++#define isl_union_map_align_params (*cloog_pointers__.p_isl_union_map_align_params) ++#define isl_union_map_apply_domain (*cloog_pointers__.p_isl_union_map_apply_domain) ++#define isl_union_map_apply_range (*cloog_pointers__.p_isl_union_map_apply_range) ++#define isl_union_map_compute_flow (*cloog_pointers__.p_isl_union_map_compute_flow) ++#define isl_union_map_copy (*cloog_pointers__.p_isl_union_map_copy) ++#define isl_union_map_empty (*cloog_pointers__.p_isl_union_map_empty) ++#define isl_union_map_flat_range_product (*cloog_pointers__.p_isl_union_map_flat_range_product) ++#define isl_union_map_foreach_map (*cloog_pointers__.p_isl_union_map_foreach_map) ++#define isl_union_map_free (*cloog_pointers__.p_isl_union_map_free) ++#define isl_union_map_from_map (*cloog_pointers__.p_isl_union_map_from_map) ++#define isl_union_map_get_ctx (*cloog_pointers__.p_isl_union_map_get_ctx) ++#define isl_union_map_get_space (*cloog_pointers__.p_isl_union_map_get_space) ++#define isl_union_map_gist_domain (*cloog_pointers__.p_isl_union_map_gist_domain) ++#define isl_union_map_gist_range (*cloog_pointers__.p_isl_union_map_gist_range) ++#define isl_union_map_intersect_domain (*cloog_pointers__.p_isl_union_map_intersect_domain) ++#define isl_union_map_is_empty (*cloog_pointers__.p_isl_union_map_is_empty) ++#define isl_union_map_subtract (*cloog_pointers__.p_isl_union_map_subtract) ++#define isl_union_map_union (*cloog_pointers__.p_isl_union_map_union) ++#define isl_union_set_add_set (*cloog_pointers__.p_isl_union_set_add_set) ++#define isl_union_set_compute_schedule (*cloog_pointers__.p_isl_union_set_compute_schedule) ++#define isl_union_set_copy (*cloog_pointers__.p_isl_union_set_copy) ++#define isl_union_set_empty (*cloog_pointers__.p_isl_union_set_empty) ++#define isl_union_set_from_set (*cloog_pointers__.p_isl_union_set_from_set) ++#define stmt_ass (*cloog_pointers__.p_stmt_ass) ++#define stmt_block (*cloog_pointers__.p_stmt_block) ++#define stmt_for (*cloog_pointers__.p_stmt_for) ++#define stmt_guard (*cloog_pointers__.p_stmt_guard) ++#define stmt_root (*cloog_pointers__.p_stmt_root) ++#define stmt_user (*cloog_pointers__.p_stmt_user) ++ + typedef struct poly_dr *poly_dr_p; + + typedef struct poly_bb *poly_bb_p; +--- gcc/graphite.c.jj 2012-12-13 11:31:00.000000000 +0100 ++++ gcc/graphite.c 2012-12-14 13:40:44.155136961 +0100 +@@ -66,6 +66,34 @@ along with GCC; see the file COPYING3. + + CloogState *cloog_state; + ++__typeof (cloog_pointers__) cloog_pointers__; ++ ++static bool ++init_cloog_pointers (void) ++{ ++ void *h; ++ ++ if (cloog_pointers__.inited) ++ return cloog_pointers__.h != NULL; ++ h = dlopen ("libcloog-isl.so.4", RTLD_LAZY); ++ cloog_pointers__.h = h; ++ if (h == NULL) ++ return false; ++#define DYNSYM(x) \ ++ do \ ++ { \ ++ union { __typeof (cloog_pointers__.p_##x) p; void *q; } u; \ ++ u.q = dlsym (h, #x); \ ++ if (u.q == NULL) \ ++ return false; \ ++ cloog_pointers__.p_##x = u.p; \ ++ } \ ++ while (0) ++ DYNSYMS ++#undef DYNSYM ++ return true; ++} ++ + /* Print global statistics to FILE. */ + + static void +@@ -264,6 +292,15 @@ graphite_transform_loops (void) + if (parallelized_function_p (cfun->decl)) + return; + ++ if (number_of_loops () <= 1) ++ return; ++ ++ if (!init_cloog_pointers ()) ++ { ++ sorry ("Graphite loop optimizations cannot be used"); ++ return; ++ } ++ + ctx = isl_ctx_alloc (); + isl_options_set_on_error(ctx, ISL_ON_ERROR_ABORT); + if (!graphite_initialize (ctx)) +--- gcc/graphite-clast-to-gimple.c.jj 2012-12-13 11:31:27.000000000 +0100 ++++ gcc/graphite-clast-to-gimple.c 2012-12-14 13:27:47.196519858 +0100 +@@ -910,7 +910,7 @@ compute_bounds_for_loop (struct clast_fo + from STMT_FOR. */ + + static tree +-type_for_clast_for (struct clast_for *stmt_for, ivs_params_p ip) ++type_for_clast_for (struct clast_for *stmt_fora, ivs_params_p ip) + { + mpz_t bound_one, bound_two; + tree lb_type, ub_type; +@@ -918,8 +918,8 @@ type_for_clast_for (struct clast_for *st + mpz_init (bound_one); + mpz_init (bound_two); + +- lb_type = type_for_clast_expr (stmt_for->LB, ip, bound_one, bound_two); +- ub_type = type_for_clast_expr (stmt_for->UB, ip, bound_one, bound_two); ++ lb_type = type_for_clast_expr (stmt_fora->LB, ip, bound_one, bound_two); ++ ub_type = type_for_clast_expr (stmt_fora->UB, ip, bound_one, bound_two); + + mpz_clear (bound_one); + mpz_clear (bound_two); diff --git a/SOURCES/gcc48-cloog-dl2.patch b/SOURCES/gcc48-cloog-dl2.patch new file mode 100644 index 0000000..2f647a3 --- /dev/null +++ b/SOURCES/gcc48-cloog-dl2.patch @@ -0,0 +1,74 @@ +2011-04-04 Jakub Jelinek + + * toplev.c (toplev_main_argv): New variable. + (toplev_main): Initialize it. + * graphite.c (init_cloog_pointers): Load libcloog-isl.so.4 from gcc's private + directory. + +--- gcc/toplev.c.jj 2008-12-09 23:59:10.000000000 +0100 ++++ gcc/toplev.c 2009-01-27 14:33:52.000000000 +0100 +@@ -107,6 +107,8 @@ static bool no_backend; + /* Length of line when printing switch values. */ + #define MAX_LINE 75 + ++const char **toplev_main_argv; ++ + /* Decoded options, and number of such options. */ + struct cl_decoded_option *save_decoded_options; + unsigned int save_decoded_options_count; +@@ -1909,6 +1911,8 @@ toplev_main (int argc, char **argv) + + expandargv (&argc, &argv); + ++ toplev_main_argv = CONST_CAST2 (const char **, char **, argv); ++ + /* Initialization of GCC's environment, and diagnostics. */ + general_init (argv[0]); + +--- gcc/graphite.c.jj 2010-12-01 10:24:32.000000000 -0500 ++++ gcc/graphite.c 2010-12-01 11:46:07.832118193 -0500 +@@ -72,11 +72,39 @@ __typeof (cloog_pointers__) cloog_pointe + static bool + init_cloog_pointers (void) + { +- void *h; +- +- if (cloog_pointers__.inited) +- return cloog_pointers__.h != NULL; +- h = dlopen ("libcloog-isl.so.4", RTLD_LAZY); ++ void *h = NULL; ++ extern const char **toplev_main_argv; ++ char *buf, *p; ++ size_t len; ++ ++ if (cloog_pointers__.inited) ++ return cloog_pointers__.h != NULL; ++ len = progname - toplev_main_argv[0]; ++ buf = XALLOCAVAR (char, len + sizeof "libcloog-isl.so.4"); ++ memcpy (buf, toplev_main_argv[0], len); ++ strcpy (buf + len, "libcloog-isl.so.4"); ++ len += sizeof "libcloog-isl.so.4"; ++ p = strstr (buf, "/libexec/"); ++ if (p != NULL) ++ { ++ while (1) ++ { ++ char *q = strstr (p + 8, "/libexec/"); ++ if (q == NULL) ++ break; ++ p = q; ++ } ++ memmove (p + 4, p + 8, len - (p + 8 - buf)); ++ h = dlopen (buf, RTLD_LAZY); ++ if (h == NULL) ++ { ++ len = progname - toplev_main_argv[0]; ++ memcpy (buf, toplev_main_argv[0], len); ++ strcpy (buf + len, "libcloog-isl.so.4"); ++ } ++ } ++ if (h == NULL) ++ h = dlopen (buf, RTLD_LAZY); + cloog_pointers__.h = h; + if (h == NULL) + return false; diff --git a/SOURCES/gcc48-color-auto.patch b/SOURCES/gcc48-color-auto.patch new file mode 100644 index 0000000..a8cf938 --- /dev/null +++ b/SOURCES/gcc48-color-auto.patch @@ -0,0 +1,46 @@ +2013-09-20 Jakub Jelinek + + * common.opt (-fdiagnostics-color=): Default to auto. + * toplev.c (process_options): Always default to + -fdiagnostics-color=auto if no -f{,no-}diagnostics-color*. + * doc/invoke.texi (-fdiagnostics-color*): Adjust documentation + of the default. + +--- gcc/common.opt.jj 2013-09-18 12:06:53.000000000 +0200 ++++ gcc/common.opt 2013-09-20 10:00:00.935823900 +0200 +@@ -1037,7 +1037,7 @@ Common Alias(fdiagnostics-color=,always, + ; + + fdiagnostics-color= +-Common Joined RejectNegative Var(flag_diagnostics_show_color) Enum(diagnostic_color_rule) Init(DIAGNOSTICS_COLOR_NO) ++Common Joined RejectNegative Var(flag_diagnostics_show_color) Enum(diagnostic_color_rule) Init(DIAGNOSTICS_COLOR_AUTO) + -fdiagnostics-color=[never|always|auto] Colorize diagnostics + + ; Required for these enum values. +--- gcc/toplev.c.jj 2013-09-09 11:32:39.000000000 +0200 ++++ gcc/toplev.c 2013-09-20 10:10:08.198721005 +0200 +@@ -1229,10 +1229,8 @@ process_options (void) + + maximum_field_alignment = initial_max_fld_align * BITS_PER_UNIT; + +- /* Default to -fdiagnostics-color=auto if GCC_COLORS is in the environment, +- otherwise default to -fdiagnostics-color=never. */ +- if (!global_options_set.x_flag_diagnostics_show_color +- && getenv ("GCC_COLORS")) ++ /* Default to -fdiagnostics-color=auto. */ ++ if (!global_options_set.x_flag_diagnostics_show_color) + pp_show_color (global_dc->printer) + = colorize_init (DIAGNOSTICS_COLOR_AUTO); + +--- gcc/doc/invoke.texi.jj 2013-09-18 12:06:50.000000000 +0200 ++++ gcc/doc/invoke.texi 2013-09-20 10:09:29.079904455 +0200 +@@ -3046,8 +3046,7 @@ a message which is too long to fit on a + @cindex highlight, color, colour + @vindex GCC_COLORS @r{environment variable} + Use color in diagnostics. @var{WHEN} is @samp{never}, @samp{always}, +-or @samp{auto}. The default is @samp{never} if @env{GCC_COLORS} environment +-variable isn't present in the environment, and @samp{auto} otherwise. ++or @samp{auto}. The default is @samp{auto}. + @samp{auto} means to use color only when the standard error is a terminal. + The forms @option{-fdiagnostics-color} and @option{-fno-diagnostics-color} are + aliases for @option{-fdiagnostics-color=always} and diff --git a/SOURCES/gcc48-hack.patch b/SOURCES/gcc48-hack.patch new file mode 100644 index 0000000..1903e95 --- /dev/null +++ b/SOURCES/gcc48-hack.patch @@ -0,0 +1,102 @@ +--- libada/Makefile.in.jj 2009-01-14 12:07:35.000000000 +0100 ++++ libada/Makefile.in 2009-01-15 14:25:33.000000000 +0100 +@@ -66,18 +66,40 @@ libsubdir := $(libdir)/gcc/$(target_nonc + ADA_RTS_DIR=$(GCC_DIR)/ada/rts$(subst /,_,$(MULTISUBDIR)) + ADA_RTS_SUBDIR=./rts$(subst /,_,$(MULTISUBDIR)) + ++DEFAULTMULTIFLAGS := ++ifeq ($(MULTISUBDIR),) ++targ:=$(subst -, ,$(target)) ++arch:=$(word 1,$(targ)) ++ifeq ($(words $(targ)),2) ++osys:=$(word 2,$(targ)) ++else ++osys:=$(word 3,$(targ)) ++endif ++ifeq ($(strip $(filter-out i%86 x86_64 powerpc% ppc% s390% sparc% linux%, $(arch) $(osys))),) ++ifeq ($(shell $(CC) $(CFLAGS) -print-multi-os-directory),../lib64) ++DEFAULTMULTIFLAGS := -m64 ++else ++ifeq ($(strip $(filter-out s390%, $(arch))),) ++DEFAULTMULTIFLAGS := -m31 ++else ++DEFAULTMULTIFLAGS := -m32 ++endif ++endif ++endif ++endif ++ + # exeext should not be used because it's the *host* exeext. We're building + # a *target* library, aren't we?!? Likewise for CC. Still, provide bogus + # definitions just in case something slips through the safety net provided + # by recursive make invocations in gcc/ada/Makefile.in + LIBADA_FLAGS_TO_PASS = \ + "MAKEOVERRIDES=" \ +- "LDFLAGS=$(LDFLAGS)" \ ++ "LDFLAGS=$(LDFLAGS) $(DEFAULTMULTIFLAGS)" \ + "LN_S=$(LN_S)" \ + "SHELL=$(SHELL)" \ +- "GNATLIBFLAGS=$(GNATLIBFLAGS) $(MULTIFLAGS)" \ +- "GNATLIBCFLAGS=$(GNATLIBCFLAGS) $(MULTIFLAGS)" \ +- "GNATLIBCFLAGS_FOR_C=$(GNATLIBCFLAGS_FOR_C) $(MULTIFLAGS)" \ ++ "GNATLIBFLAGS=$(GNATLIBFLAGS) $(MULTIFLAGS) $(DEFAULTMULTIFLAGS)" \ ++ "GNATLIBCFLAGS=$(GNATLIBCFLAGS) $(MULTIFLAGS) $(DEFAULTMULTIFLAGS)" \ ++ "GNATLIBCFLAGS_FOR_C=$(GNATLIBCFLAGS_FOR_C) $(MULTIFLAGS) $(DEFAULTMULTIFLAGS)" \ + "PICFLAG_FOR_TARGET=$(PICFLAG)" \ + "THREAD_KIND=$(THREAD_KIND)" \ + "TRACE=$(TRACE)" \ +@@ -88,7 +110,7 @@ LIBADA_FLAGS_TO_PASS = \ + "exeext=.exeext.should.not.be.used " \ + 'CC=the.host.compiler.should.not.be.needed' \ + "GCC_FOR_TARGET=$(CC)" \ +- "CFLAGS=$(CFLAGS)" ++ "CFLAGS=$(CFLAGS) $(DEFAULTMULTIFLAGS)" + + # Rules to build gnatlib. + .PHONY: gnatlib gnatlib-plain gnatlib-sjlj gnatlib-zcx gnatlib-shared osconstool +--- gcc/ada/sem_util.adb (revision 161677) ++++ gcc/ada/sem_util.adb (working copy) +@@ -5487,7 +5487,7 @@ package body Sem_Util is + Exp : Node_Id; + Assn : Node_Id; + Choice : Node_Id; +- Comp_Type : Entity_Id; ++ Comp_Type : Entity_Id := Empty; + Is_Array_Aggr : Boolean; + + begin +--- config-ml.in.jj 2010-06-30 09:50:44.000000000 +0200 ++++ config-ml.in 2010-07-02 21:24:17.994211151 +0200 +@@ -516,6 +516,8 @@ multi-do: + ADAFLAGS="$(ADAFLAGS) $${flags}" \ + prefix="$(prefix)" \ + exec_prefix="$(exec_prefix)" \ ++ mandir="$(mandir)" \ ++ infodir="$(infodir)" \ + GCJFLAGS="$(GCJFLAGS) $${flags}" \ + GOCFLAGS="$(GOCFLAGS) $${flags}" \ + CXXFLAGS="$(CXXFLAGS) $${flags}" \ +--- libjava/Makefile.am.jj 2010-07-09 11:17:33.729604090 +0200 ++++ libjava/Makefile.am 2010-07-09 13:16:41.894375641 +0200 +@@ -710,7 +710,8 @@ if USE_LIBGCJ_BC + ## later. + @echo Installing dummy lib libgcj_bc.so.1.0.0; \ + rm $(DESTDIR)$(toolexeclibdir)/libgcj_bc.so; \ +- mv $(DESTDIR)$(toolexeclibdir)/libgcj_bc.so.1.0.0 $(DESTDIR)$(toolexeclibdir)/libgcj_bc.so; \ ++ $(INSTALL) $(INSTALL_STRIP_FLAG) $(here)/.libs/libgcj_bc.so $(DESTDIR)$(toolexeclibdir)/libgcj_bc.so; \ ++ rm $(DESTDIR)$(toolexeclibdir)/libgcj_bc.so.1.0.0; \ + $(libgcj_bc_dummy_LINK) -xc /dev/null -Wl,-soname,libgcj_bc.so.1 \ + -o $(DESTDIR)$(toolexeclibdir)/libgcj_bc.so.1.0.0 -lgcj || exit; \ + rm $(DESTDIR)$(toolexeclibdir)/libgcj_bc.so.1; \ +--- libjava/Makefile.in.jj 2010-07-09 11:17:34.000000000 +0200 ++++ libjava/Makefile.in 2010-07-09 13:18:07.542572270 +0200 +@@ -12665,7 +12665,8 @@ install-exec-hook: install-binPROGRAMS i + install-libexecsubPROGRAMS + @USE_LIBGCJ_BC_TRUE@ @echo Installing dummy lib libgcj_bc.so.1.0.0; \ + @USE_LIBGCJ_BC_TRUE@ rm $(DESTDIR)$(toolexeclibdir)/libgcj_bc.so; \ +-@USE_LIBGCJ_BC_TRUE@ mv $(DESTDIR)$(toolexeclibdir)/libgcj_bc.so.1.0.0 $(DESTDIR)$(toolexeclibdir)/libgcj_bc.so; \ ++@USE_LIBGCJ_BC_TRUE@ $(INSTALL) $(INSTALL_STRIP_FLAG) $(here)/.libs/libgcj_bc.so $(DESTDIR)$(toolexeclibdir)/libgcj_bc.so; \ ++@USE_LIBGCJ_BC_TRUE@ rm $(DESTDIR)$(toolexeclibdir)/libgcj_bc.so.1.0.0; \ + @USE_LIBGCJ_BC_TRUE@ $(libgcj_bc_dummy_LINK) -xc /dev/null -Wl,-soname,libgcj_bc.so.1 \ + @USE_LIBGCJ_BC_TRUE@ -o $(DESTDIR)$(toolexeclibdir)/libgcj_bc.so.1.0.0 -lgcj || exit; \ + @USE_LIBGCJ_BC_TRUE@ rm $(DESTDIR)$(toolexeclibdir)/libgcj_bc.so.1; \ diff --git a/SOURCES/gcc48-i386-libgomp.patch b/SOURCES/gcc48-i386-libgomp.patch new file mode 100644 index 0000000..520561e --- /dev/null +++ b/SOURCES/gcc48-i386-libgomp.patch @@ -0,0 +1,11 @@ +--- libgomp/configure.tgt.jj 2008-01-10 20:53:48.000000000 +0100 ++++ libgomp/configure.tgt 2008-03-27 12:44:51.000000000 +0100 +@@ -67,7 +67,7 @@ if test $enable_linux_futex = yes; then + ;; + *) + if test -z "$with_arch"; then +- XCFLAGS="${XCFLAGS} -march=i486 -mtune=${target_cpu}" ++ XCFLAGS="${XCFLAGS} -march=i486 -mtune=generic" + fi + esac + ;; diff --git a/SOURCES/gcc48-java-nomulti.patch b/SOURCES/gcc48-java-nomulti.patch new file mode 100644 index 0000000..17334aa --- /dev/null +++ b/SOURCES/gcc48-java-nomulti.patch @@ -0,0 +1,44 @@ +--- libjava/configure.ac.jj 2007-12-07 17:55:50.000000000 +0100 ++++ libjava/configure.ac 2007-12-07 18:36:56.000000000 +0100 +@@ -82,6 +82,13 @@ AC_ARG_ENABLE(java-maintainer-mode, + [allow rebuilding of .class and .h files])) + AM_CONDITIONAL(JAVA_MAINTAINER_MODE, test "$enable_java_maintainer_mode" = yes) + ++AC_ARG_ENABLE(libjava-multilib, ++ AS_HELP_STRING([--enable-libjava-multilib], [build libjava as multilib])) ++if test "$enable_libjava_multilib" = no; then ++ multilib=no ++ ac_configure_args="$ac_configure_args --disable-multilib" ++fi ++ + # It may not be safe to run linking tests in AC_PROG_CC/AC_PROG_CXX. + GCC_NO_EXECUTABLES + +--- libjava/configure.jj 2007-12-07 17:55:50.000000000 +0100 ++++ libjava/configure 2007-12-07 18:39:58.000000000 +0100 +@@ -1021,6 +1021,8 @@ Optional Features: + default=yes + --enable-java-maintainer-mode + allow rebuilding of .class and .h files ++ --enable-libjava-multilib ++ build libjava as multilib + --disable-dependency-tracking speeds up one-time build + --enable-dependency-tracking do not reject slow dependency extractors + --enable-maintainer-mode enable make rules and dependencies not useful +@@ -1973,6 +1975,16 @@ else + fi + + ++# Check whether --enable-libjava-multilib was given. ++if test "${enable_libjava_multilib+set}" = set; then ++ enableval=$enable_libjava_multilib; ++fi ++ ++if test "$enable_libjava_multilib" = no; then ++ multilib=no ++ ac_configure_args="$ac_configure_args --disable-multilib" ++fi ++ + # It may not be safe to run linking tests in AC_PROG_CC/AC_PROG_CXX. + + diff --git a/SOURCES/gcc48-libgo-p224.patch b/SOURCES/gcc48-libgo-p224.patch new file mode 100644 index 0000000..50461bc --- /dev/null +++ b/SOURCES/gcc48-libgo-p224.patch @@ -0,0 +1,1302 @@ +--- libgo/Makefile.am.jj 2013-12-12 19:01:49.000000000 +0100 ++++ libgo/Makefile.am 2014-02-18 17:31:54.798484657 +0100 +@@ -1109,8 +1109,7 @@ go_crypto_dsa_files = \ + go_crypto_ecdsa_files = \ + go/crypto/ecdsa/ecdsa.go + go_crypto_elliptic_files = \ +- go/crypto/elliptic/elliptic.go \ +- go/crypto/elliptic/p224.go ++ go/crypto/elliptic/elliptic.go + go_crypto_hmac_files = \ + go/crypto/hmac/hmac.go + go_crypto_md5_files = \ +--- libgo/Makefile.in.jj 2013-12-12 19:01:49.000000000 +0100 ++++ libgo/Makefile.in 2014-02-18 17:32:11.350389191 +0100 +@@ -1274,8 +1274,7 @@ go_crypto_ecdsa_files = \ + go/crypto/ecdsa/ecdsa.go + + go_crypto_elliptic_files = \ +- go/crypto/elliptic/elliptic.go \ +- go/crypto/elliptic/p224.go ++ go/crypto/elliptic/elliptic.go + + go_crypto_hmac_files = \ + go/crypto/hmac/hmac.go +--- libgo/go/crypto/elliptic/elliptic.go.jj 2012-12-13 11:32:02.640039537 +0100 ++++ libgo/go/crypto/elliptic/elliptic.go 2014-02-18 17:28:22.909692022 +0100 +@@ -327,7 +327,6 @@ var p384 *CurveParams + var p521 *CurveParams + + func initAll() { +- initP224() + initP256() + initP384() + initP521() +--- libgo/go/crypto/elliptic/elliptic_test.go.jj 2012-12-13 11:32:02.640039537 +0100 ++++ libgo/go/crypto/elliptic/elliptic_test.go 2014-02-18 17:31:04.052774265 +0100 +@@ -5,329 +5,14 @@ + package elliptic + + import ( +- "crypto/rand" +- "encoding/hex" +- "fmt" +- "math/big" + "testing" + ) + +-func TestOnCurve(t *testing.T) { +- p224 := P224() +- if !p224.IsOnCurve(p224.Params().Gx, p224.Params().Gy) { +- t.Errorf("FAIL") +- } +-} +- +-type baseMultTest struct { +- k string +- x, y string +-} +- +-var p224BaseMultTests = []baseMultTest{ +- { +- "1", +- "b70e0cbd6bb4bf7f321390b94a03c1d356c21122343280d6115c1d21", +- "bd376388b5f723fb4c22dfe6cd4375a05a07476444d5819985007e34", +- }, +- { +- "2", +- "706a46dc76dcb76798e60e6d89474788d16dc18032d268fd1a704fa6", +- "1c2b76a7bc25e7702a704fa986892849fca629487acf3709d2e4e8bb", +- }, +- { +- "3", +- "df1b1d66a551d0d31eff822558b9d2cc75c2180279fe0d08fd896d04", +- "a3f7f03cadd0be444c0aa56830130ddf77d317344e1af3591981a925", +- }, +- { +- "4", +- "ae99feebb5d26945b54892092a8aee02912930fa41cd114e40447301", +- "482580a0ec5bc47e88bc8c378632cd196cb3fa058a7114eb03054c9", +- }, +- { +- "5", +- "31c49ae75bce7807cdff22055d94ee9021fedbb5ab51c57526f011aa", +- "27e8bff1745635ec5ba0c9f1c2ede15414c6507d29ffe37e790a079b", +- }, +- { +- "6", +- "1f2483f82572251fca975fea40db821df8ad82a3c002ee6c57112408", +- "89faf0ccb750d99b553c574fad7ecfb0438586eb3952af5b4b153c7e", +- }, +- { +- "7", +- "db2f6be630e246a5cf7d99b85194b123d487e2d466b94b24a03c3e28", +- "f3a30085497f2f611ee2517b163ef8c53b715d18bb4e4808d02b963", +- }, +- { +- "8", +- "858e6f9cc6c12c31f5df124aa77767b05c8bc021bd683d2b55571550", +- "46dcd3ea5c43898c5c5fc4fdac7db39c2f02ebee4e3541d1e78047a", +- }, +- { +- "9", +- "2fdcccfee720a77ef6cb3bfbb447f9383117e3daa4a07e36ed15f78d", +- "371732e4f41bf4f7883035e6a79fcedc0e196eb07b48171697517463", +- }, +- { +- "10", +- "aea9e17a306517eb89152aa7096d2c381ec813c51aa880e7bee2c0fd", +- "39bb30eab337e0a521b6cba1abe4b2b3a3e524c14a3fe3eb116b655f", +- }, +- { +- "11", +- "ef53b6294aca431f0f3c22dc82eb9050324f1d88d377e716448e507c", +- "20b510004092e96636cfb7e32efded8265c266dfb754fa6d6491a6da", +- }, +- { +- "12", +- "6e31ee1dc137f81b056752e4deab1443a481033e9b4c93a3044f4f7a", +- "207dddf0385bfdeab6e9acda8da06b3bbef224a93ab1e9e036109d13", +- }, +- { +- "13", +- "34e8e17a430e43289793c383fac9774247b40e9ebd3366981fcfaeca", +- "252819f71c7fb7fbcb159be337d37d3336d7feb963724fdfb0ecb767", +- }, +- { +- "14", +- "a53640c83dc208603ded83e4ecf758f24c357d7cf48088b2ce01e9fa", +- "d5814cd724199c4a5b974a43685fbf5b8bac69459c9469bc8f23ccaf", +- }, +- { +- "15", +- "baa4d8635511a7d288aebeedd12ce529ff102c91f97f867e21916bf9", +- "979a5f4759f80f4fb4ec2e34f5566d595680a11735e7b61046127989", +- }, +- { +- "16", +- "b6ec4fe1777382404ef679997ba8d1cc5cd8e85349259f590c4c66d", +- "3399d464345906b11b00e363ef429221f2ec720d2f665d7dead5b482", +- }, +- { +- "17", +- "b8357c3a6ceef288310e17b8bfeff9200846ca8c1942497c484403bc", +- "ff149efa6606a6bd20ef7d1b06bd92f6904639dce5174db6cc554a26", +- }, +- { +- "18", +- "c9ff61b040874c0568479216824a15eab1a838a797d189746226e4cc", +- "ea98d60e5ffc9b8fcf999fab1df7e7ef7084f20ddb61bb045a6ce002", +- }, +- { +- "19", +- "a1e81c04f30ce201c7c9ace785ed44cc33b455a022f2acdbc6cae83c", +- "dcf1f6c3db09c70acc25391d492fe25b4a180babd6cea356c04719cd", +- }, +- { +- "20", +- "fcc7f2b45df1cd5a3c0c0731ca47a8af75cfb0347e8354eefe782455", +- "d5d7110274cba7cdee90e1a8b0d394c376a5573db6be0bf2747f530", +- }, +- { +- "112233445566778899", +- "61f077c6f62ed802dad7c2f38f5c67f2cc453601e61bd076bb46179e", +- "2272f9e9f5933e70388ee652513443b5e289dd135dcc0d0299b225e4", +- }, +- { +- "112233445566778899112233445566778899", +- "29895f0af496bfc62b6ef8d8a65c88c613949b03668aab4f0429e35", +- "3ea6e53f9a841f2019ec24bde1a75677aa9b5902e61081c01064de93", +- }, +- { +- "6950511619965839450988900688150712778015737983940691968051900319680", +- "ab689930bcae4a4aa5f5cb085e823e8ae30fd365eb1da4aba9cf0379", +- "3345a121bbd233548af0d210654eb40bab788a03666419be6fbd34e7", +- }, +- { +- "13479972933410060327035789020509431695094902435494295338570602119423", +- "bdb6a8817c1f89da1c2f3dd8e97feb4494f2ed302a4ce2bc7f5f4025", +- "4c7020d57c00411889462d77a5438bb4e97d177700bf7243a07f1680", +- }, +- { +- "13479971751745682581351455311314208093898607229429740618390390702079", +- "d58b61aa41c32dd5eba462647dba75c5d67c83606c0af2bd928446a9", +- "d24ba6a837be0460dd107ae77725696d211446c5609b4595976b16bd", +- }, +- { +- "13479972931865328106486971546324465392952975980343228160962702868479", +- "dc9fa77978a005510980e929a1485f63716df695d7a0c18bb518df03", +- "ede2b016f2ddffc2a8c015b134928275ce09e5661b7ab14ce0d1d403", +- }, +- { +- "11795773708834916026404142434151065506931607341523388140225443265536", +- "499d8b2829cfb879c901f7d85d357045edab55028824d0f05ba279ba", +- "bf929537b06e4015919639d94f57838fa33fc3d952598dcdbb44d638", +- }, +- { +- "784254593043826236572847595991346435467177662189391577090", +- "8246c999137186632c5f9eddf3b1b0e1764c5e8bd0e0d8a554b9cb77", +- "e80ed8660bc1cb17ac7d845be40a7a022d3306f116ae9f81fea65947", +- }, +- { +- "13479767645505654746623887797783387853576174193480695826442858012671", +- "6670c20afcceaea672c97f75e2e9dd5c8460e54bb38538ebb4bd30eb", +- "f280d8008d07a4caf54271f993527d46ff3ff46fd1190a3f1faa4f74", +- }, +- { +- "205688069665150753842126177372015544874550518966168735589597183", +- "eca934247425cfd949b795cb5ce1eff401550386e28d1a4c5a8eb", +- "d4c01040dba19628931bc8855370317c722cbd9ca6156985f1c2e9ce", +- }, +- { +- "13479966930919337728895168462090683249159702977113823384618282123295", +- "ef353bf5c73cd551b96d596fbc9a67f16d61dd9fe56af19de1fba9cd", +- "21771b9cdce3e8430c09b3838be70b48c21e15bc09ee1f2d7945b91f", +- }, +- { +- "50210731791415612487756441341851895584393717453129007497216", +- "4036052a3091eb481046ad3289c95d3ac905ca0023de2c03ecd451cf", +- "d768165a38a2b96f812586a9d59d4136035d9c853a5bf2e1c86a4993", +- }, +- { +- "26959946667150639794667015087019625940457807714424391721682722368041", +- "fcc7f2b45df1cd5a3c0c0731ca47a8af75cfb0347e8354eefe782455", +- "f2a28eefd8b345832116f1e574f2c6b2c895aa8c24941f40d8b80ad1", +- }, +- { +- "26959946667150639794667015087019625940457807714424391721682722368042", +- "a1e81c04f30ce201c7c9ace785ed44cc33b455a022f2acdbc6cae83c", +- "230e093c24f638f533dac6e2b6d01da3b5e7f45429315ca93fb8e634", +- }, +- { +- "26959946667150639794667015087019625940457807714424391721682722368043", +- "c9ff61b040874c0568479216824a15eab1a838a797d189746226e4cc", +- "156729f1a003647030666054e208180f8f7b0df2249e44fba5931fff", +- }, +- { +- "26959946667150639794667015087019625940457807714424391721682722368044", +- "b8357c3a6ceef288310e17b8bfeff9200846ca8c1942497c484403bc", +- "eb610599f95942df1082e4f9426d086fb9c6231ae8b24933aab5db", +- }, +- { +- "26959946667150639794667015087019625940457807714424391721682722368045", +- "b6ec4fe1777382404ef679997ba8d1cc5cd8e85349259f590c4c66d", +- "cc662b9bcba6f94ee4ff1c9c10bd6ddd0d138df2d099a282152a4b7f", +- }, +- { +- "26959946667150639794667015087019625940457807714424391721682722368046", +- "baa4d8635511a7d288aebeedd12ce529ff102c91f97f867e21916bf9", +- "6865a0b8a607f0b04b13d1cb0aa992a5a97f5ee8ca1849efb9ed8678", +- }, +- { +- "26959946667150639794667015087019625940457807714424391721682722368047", +- "a53640c83dc208603ded83e4ecf758f24c357d7cf48088b2ce01e9fa", +- "2a7eb328dbe663b5a468b5bc97a040a3745396ba636b964370dc3352", +- }, +- { +- "26959946667150639794667015087019625940457807714424391721682722368048", +- "34e8e17a430e43289793c383fac9774247b40e9ebd3366981fcfaeca", +- "dad7e608e380480434ea641cc82c82cbc92801469c8db0204f13489a", +- }, +- { +- "26959946667150639794667015087019625940457807714424391721682722368049", +- "6e31ee1dc137f81b056752e4deab1443a481033e9b4c93a3044f4f7a", +- "df82220fc7a4021549165325725f94c3410ddb56c54e161fc9ef62ee", +- }, +- { +- "26959946667150639794667015087019625940457807714424391721682722368050", +- "ef53b6294aca431f0f3c22dc82eb9050324f1d88d377e716448e507c", +- "df4aefffbf6d1699c930481cd102127c9a3d992048ab05929b6e5927", +- }, +- { +- "26959946667150639794667015087019625940457807714424391721682722368051", +- "aea9e17a306517eb89152aa7096d2c381ec813c51aa880e7bee2c0fd", +- "c644cf154cc81f5ade49345e541b4d4b5c1adb3eb5c01c14ee949aa2", +- }, +- { +- "26959946667150639794667015087019625940457807714424391721682722368052", +- "2fdcccfee720a77ef6cb3bfbb447f9383117e3daa4a07e36ed15f78d", +- "c8e8cd1b0be40b0877cfca1958603122f1e6914f84b7e8e968ae8b9e", +- }, +- { +- "26959946667150639794667015087019625940457807714424391721682722368053", +- "858e6f9cc6c12c31f5df124aa77767b05c8bc021bd683d2b55571550", +- "fb9232c15a3bc7673a3a03b0253824c53d0fd1411b1cabe2e187fb87", +- }, +- { +- "26959946667150639794667015087019625940457807714424391721682722368054", +- "db2f6be630e246a5cf7d99b85194b123d487e2d466b94b24a03c3e28", +- "f0c5cff7ab680d09ee11dae84e9c1072ac48ea2e744b1b7f72fd469e", +- }, +- { +- "26959946667150639794667015087019625940457807714424391721682722368055", +- "1f2483f82572251fca975fea40db821df8ad82a3c002ee6c57112408", +- "76050f3348af2664aac3a8b05281304ebc7a7914c6ad50a4b4eac383", +- }, +- { +- "26959946667150639794667015087019625940457807714424391721682722368056", +- "31c49ae75bce7807cdff22055d94ee9021fedbb5ab51c57526f011aa", +- "d817400e8ba9ca13a45f360e3d121eaaeb39af82d6001c8186f5f866", +- }, +- { +- "26959946667150639794667015087019625940457807714424391721682722368057", +- "ae99feebb5d26945b54892092a8aee02912930fa41cd114e40447301", +- "fb7da7f5f13a43b81774373c879cd32d6934c05fa758eeb14fcfab38", +- }, +- { +- "26959946667150639794667015087019625940457807714424391721682722368058", +- "df1b1d66a551d0d31eff822558b9d2cc75c2180279fe0d08fd896d04", +- "5c080fc3522f41bbb3f55a97cfecf21f882ce8cbb1e50ca6e67e56dc", +- }, +- { +- "26959946667150639794667015087019625940457807714424391721682722368059", +- "706a46dc76dcb76798e60e6d89474788d16dc18032d268fd1a704fa6", +- "e3d4895843da188fd58fb0567976d7b50359d6b78530c8f62d1b1746", +- }, +- { +- "26959946667150639794667015087019625940457807714424391721682722368060", +- "b70e0cbd6bb4bf7f321390b94a03c1d356c21122343280d6115c1d21", +- "42c89c774a08dc04b3dd201932bc8a5ea5f8b89bbb2a7e667aff81cd", +- }, +-} +- +-func TestBaseMult(t *testing.T) { +- p224 := P224() +- for i, e := range p224BaseMultTests { +- k, ok := new(big.Int).SetString(e.k, 10) +- if !ok { +- t.Errorf("%d: bad value for k: %s", i, e.k) +- } +- x, y := p224.ScalarBaseMult(k.Bytes()) +- if fmt.Sprintf("%x", x) != e.x || fmt.Sprintf("%x", y) != e.y { +- t.Errorf("%d: bad output for k=%s: got (%x, %x), want (%s, %s)", i, e.k, x, y, e.x, e.y) +- } +- if testing.Short() && i > 5 { +- break +- } +- } +-} +- +-func TestGenericBaseMult(t *testing.T) { +- // We use the P224 CurveParams directly in order to test the generic implementation. +- p224 := P224().Params() +- for i, e := range p224BaseMultTests { +- k, ok := new(big.Int).SetString(e.k, 10) +- if !ok { +- t.Errorf("%d: bad value for k: %s", i, e.k) +- } +- x, y := p224.ScalarBaseMult(k.Bytes()) +- if fmt.Sprintf("%x", x) != e.x || fmt.Sprintf("%x", y) != e.y { +- t.Errorf("%d: bad output for k=%s: got (%x, %x), want (%s, %s)", i, e.k, x, y, e.x, e.y) +- } +- if testing.Short() && i > 5 { +- break +- } +- } +-} +- + func TestInfinity(t *testing.T) { + tests := []struct { + name string + curve Curve + }{ +- {"p224", P224()}, + {"p256", P256()}, + } + +@@ -359,43 +44,3 @@ func TestInfinity(t *testing.T) { + } + } + } +- +-func BenchmarkBaseMult(b *testing.B) { +- b.ResetTimer() +- p224 := P224() +- e := p224BaseMultTests[25] +- k, _ := new(big.Int).SetString(e.k, 10) +- b.StartTimer() +- for i := 0; i < b.N; i++ { +- p224.ScalarBaseMult(k.Bytes()) +- } +-} +- +-func TestMarshal(t *testing.T) { +- p224 := P224() +- _, x, y, err := GenerateKey(p224, rand.Reader) +- if err != nil { +- t.Error(err) +- return +- } +- serialized := Marshal(p224, x, y) +- xx, yy := Unmarshal(p224, serialized) +- if xx == nil { +- t.Error("failed to unmarshal") +- return +- } +- if xx.Cmp(x) != 0 || yy.Cmp(y) != 0 { +- t.Error("unmarshal returned different values") +- return +- } +-} +- +-func TestP224Overflow(t *testing.T) { +- // This tests for a specific bug in the P224 implementation. +- p224 := P224() +- pointData, _ := hex.DecodeString("049B535B45FB0A2072398A6831834624C7E32CCFD5A4B933BCEAF77F1DD945E08BBE5178F5EDF5E733388F196D2A631D2E075BB16CBFEEA15B") +- x, y := Unmarshal(p224, pointData) +- if !p224.IsOnCurve(x, y) { +- t.Error("P224 failed to validate a correct point") +- } +-} +--- libgo/go/crypto/ecdsa/ecdsa_test.go.jj 2012-12-13 11:32:02.589039782 +0100 ++++ libgo/go/crypto/ecdsa/ecdsa_test.go 2014-02-18 17:28:22.909692022 +0100 +@@ -33,7 +33,6 @@ func testKeyGeneration(t *testing.T, c e + } + + func TestKeyGeneration(t *testing.T) { +- testKeyGeneration(t, elliptic.P224(), "p224") + if testing.Short() { + return + } +@@ -63,7 +62,6 @@ func testSignAndVerify(t *testing.T, c e + } + + func TestSignAndVerify(t *testing.T) { +- testSignAndVerify(t, elliptic.P224(), "p224") + if testing.Short() { + return + } +@@ -129,8 +127,6 @@ func TestVectors(t *testing.T) { + parts := strings.SplitN(line, ",", 2) + + switch parts[0] { +- case "P-224": +- pub.Curve = elliptic.P224() + case "P-256": + pub.Curve = elliptic.P256() + case "P-384": +--- libgo/go/crypto/x509/x509.go.jj 2013-08-14 13:55:08.939843607 +0200 ++++ libgo/go/crypto/x509/x509.go 2014-02-18 17:28:22.943691764 +0100 +@@ -283,9 +283,6 @@ func getPublicKeyAlgorithmFromOID(oid as + + // RFC 5480, 2.1.1.1. Named Curve + // +-// secp224r1 OBJECT IDENTIFIER ::= { +-// iso(1) identified-organization(3) certicom(132) curve(0) 33 } +-// + // secp256r1 OBJECT IDENTIFIER ::= { + // iso(1) member-body(2) us(840) ansi-X9-62(10045) curves(3) + // prime(1) 7 } +@@ -298,7 +295,6 @@ func getPublicKeyAlgorithmFromOID(oid as + // + // NB: secp256r1 is equivalent to prime256v1 + var ( +- oidNamedCurveP224 = asn1.ObjectIdentifier{1, 3, 132, 0, 33} + oidNamedCurveP256 = asn1.ObjectIdentifier{1, 2, 840, 10045, 3, 1, 7} + oidNamedCurveP384 = asn1.ObjectIdentifier{1, 3, 132, 0, 34} + oidNamedCurveP521 = asn1.ObjectIdentifier{1, 3, 132, 0, 35} +@@ -306,8 +302,6 @@ var ( + + func namedCurveFromOID(oid asn1.ObjectIdentifier) elliptic.Curve { + switch { +- case oid.Equal(oidNamedCurveP224): +- return elliptic.P224() + case oid.Equal(oidNamedCurveP256): + return elliptic.P256() + case oid.Equal(oidNamedCurveP384): +@@ -320,8 +314,6 @@ func namedCurveFromOID(oid asn1.ObjectId + + func oidFromNamedCurve(curve elliptic.Curve) (asn1.ObjectIdentifier, bool) { + switch curve { +- case elliptic.P224(): +- return oidNamedCurveP224, true + case elliptic.P256(): + return oidNamedCurveP256, true + case elliptic.P384(): +@@ -1212,7 +1204,7 @@ func CreateCertificate(rand io.Reader, t + hashFunc = crypto.SHA1 + case *ecdsa.PrivateKey: + switch priv.Curve { +- case elliptic.P224(), elliptic.P256(): ++ case elliptic.P256(): + hashFunc = crypto.SHA256 + signatureAlgorithm.Algorithm = oidSignatureECDSAWithSHA256 + case elliptic.P384(): +--- libgo/go/crypto/elliptic/p224.go.jj 2012-12-13 11:32:02.641039533 +0100 ++++ libgo/go/crypto/elliptic/p224.go 2014-02-15 11:40:56.191557928 +0100 +@@ -1,765 +0,0 @@ +-// Copyright 2012 The Go Authors. All rights reserved. +-// Use of this source code is governed by a BSD-style +-// license that can be found in the LICENSE file. +- +-package elliptic +- +-// This is a constant-time, 32-bit implementation of P224. See FIPS 186-3, +-// section D.2.2. +-// +-// See http://www.imperialviolet.org/2010/12/04/ecc.html ([1]) for background. +- +-import ( +- "math/big" +-) +- +-var p224 p224Curve +- +-type p224Curve struct { +- *CurveParams +- gx, gy, b p224FieldElement +-} +- +-func initP224() { +- // See FIPS 186-3, section D.2.2 +- p224.CurveParams = new(CurveParams) +- p224.P, _ = new(big.Int).SetString("26959946667150639794667015087019630673557916260026308143510066298881", 10) +- p224.N, _ = new(big.Int).SetString("26959946667150639794667015087019625940457807714424391721682722368061", 10) +- p224.B, _ = new(big.Int).SetString("b4050a850c04b3abf54132565044b0b7d7bfd8ba270b39432355ffb4", 16) +- p224.Gx, _ = new(big.Int).SetString("b70e0cbd6bb4bf7f321390b94a03c1d356c21122343280d6115c1d21", 16) +- p224.Gy, _ = new(big.Int).SetString("bd376388b5f723fb4c22dfe6cd4375a05a07476444d5819985007e34", 16) +- p224.BitSize = 224 +- +- p224FromBig(&p224.gx, p224.Gx) +- p224FromBig(&p224.gy, p224.Gy) +- p224FromBig(&p224.b, p224.B) +-} +- +-// P224 returns a Curve which implements P-224 (see FIPS 186-3, section D.2.2) +-func P224() Curve { +- initonce.Do(initAll) +- return p224 +-} +- +-func (curve p224Curve) Params() *CurveParams { +- return curve.CurveParams +-} +- +-func (curve p224Curve) IsOnCurve(bigX, bigY *big.Int) bool { +- var x, y p224FieldElement +- p224FromBig(&x, bigX) +- p224FromBig(&y, bigY) +- +- // y² = x³ - 3x + b +- var tmp p224LargeFieldElement +- var x3 p224FieldElement +- p224Square(&x3, &x, &tmp) +- p224Mul(&x3, &x3, &x, &tmp) +- +- for i := 0; i < 8; i++ { +- x[i] *= 3 +- } +- p224Sub(&x3, &x3, &x) +- p224Reduce(&x3) +- p224Add(&x3, &x3, &curve.b) +- p224Contract(&x3, &x3) +- +- p224Square(&y, &y, &tmp) +- p224Contract(&y, &y) +- +- for i := 0; i < 8; i++ { +- if y[i] != x3[i] { +- return false +- } +- } +- return true +-} +- +-func (p224Curve) Add(bigX1, bigY1, bigX2, bigY2 *big.Int) (x, y *big.Int) { +- var x1, y1, z1, x2, y2, z2, x3, y3, z3 p224FieldElement +- +- p224FromBig(&x1, bigX1) +- p224FromBig(&y1, bigY1) +- if bigX1.Sign() != 0 || bigY1.Sign() != 0 { +- z1[0] = 1 +- } +- p224FromBig(&x2, bigX2) +- p224FromBig(&y2, bigY2) +- if bigX2.Sign() != 0 || bigY2.Sign() != 0 { +- z2[0] = 1 +- } +- +- p224AddJacobian(&x3, &y3, &z3, &x1, &y1, &z1, &x2, &y2, &z2) +- return p224ToAffine(&x3, &y3, &z3) +-} +- +-func (p224Curve) Double(bigX1, bigY1 *big.Int) (x, y *big.Int) { +- var x1, y1, z1, x2, y2, z2 p224FieldElement +- +- p224FromBig(&x1, bigX1) +- p224FromBig(&y1, bigY1) +- z1[0] = 1 +- +- p224DoubleJacobian(&x2, &y2, &z2, &x1, &y1, &z1) +- return p224ToAffine(&x2, &y2, &z2) +-} +- +-func (p224Curve) ScalarMult(bigX1, bigY1 *big.Int, scalar []byte) (x, y *big.Int) { +- var x1, y1, z1, x2, y2, z2 p224FieldElement +- +- p224FromBig(&x1, bigX1) +- p224FromBig(&y1, bigY1) +- z1[0] = 1 +- +- p224ScalarMult(&x2, &y2, &z2, &x1, &y1, &z1, scalar) +- return p224ToAffine(&x2, &y2, &z2) +-} +- +-func (curve p224Curve) ScalarBaseMult(scalar []byte) (x, y *big.Int) { +- var z1, x2, y2, z2 p224FieldElement +- +- z1[0] = 1 +- p224ScalarMult(&x2, &y2, &z2, &curve.gx, &curve.gy, &z1, scalar) +- return p224ToAffine(&x2, &y2, &z2) +-} +- +-// Field element functions. +-// +-// The field that we're dealing with is ℤ/pℤ where p = 2**224 - 2**96 + 1. +-// +-// Field elements are represented by a FieldElement, which is a typedef to an +-// array of 8 uint32's. The value of a FieldElement, a, is: +-// a[0] + 2**28·a[1] + 2**56·a[1] + ... + 2**196·a[7] +-// +-// Using 28-bit limbs means that there's only 4 bits of headroom, which is less +-// than we would really like. But it has the useful feature that we hit 2**224 +-// exactly, making the reflections during a reduce much nicer. +-type p224FieldElement [8]uint32 +- +-// p224P is the order of the field, represented as a p224FieldElement. +-var p224P = [8]uint32{1, 0, 0, 0xffff000, 0xfffffff, 0xfffffff, 0xfffffff, 0xfffffff} +- +-// p224IsZero returns 1 if a == 0 mod p and 0 otherwise. +-// +-// a[i] < 2**29 +-func p224IsZero(a *p224FieldElement) uint32 { +- // Since a p224FieldElement contains 224 bits there are two possible +- // representations of 0: 0 and p. +- var minimal p224FieldElement +- p224Contract(&minimal, a) +- +- var isZero, isP uint32 +- for i, v := range minimal { +- isZero |= v +- isP |= v - p224P[i] +- } +- +- // If either isZero or isP is 0, then we should return 1. +- isZero |= isZero >> 16 +- isZero |= isZero >> 8 +- isZero |= isZero >> 4 +- isZero |= isZero >> 2 +- isZero |= isZero >> 1 +- +- isP |= isP >> 16 +- isP |= isP >> 8 +- isP |= isP >> 4 +- isP |= isP >> 2 +- isP |= isP >> 1 +- +- // For isZero and isP, the LSB is 0 iff all the bits are zero. +- result := isZero & isP +- result = (^result) & 1 +- +- return result +-} +- +-// p224Add computes *out = a+b +-// +-// a[i] + b[i] < 2**32 +-func p224Add(out, a, b *p224FieldElement) { +- for i := 0; i < 8; i++ { +- out[i] = a[i] + b[i] +- } +-} +- +-const two31p3 = 1<<31 + 1<<3 +-const two31m3 = 1<<31 - 1<<3 +-const two31m15m3 = 1<<31 - 1<<15 - 1<<3 +- +-// p224ZeroModP31 is 0 mod p where bit 31 is set in all limbs so that we can +-// subtract smaller amounts without underflow. See the section "Subtraction" in +-// [1] for reasoning. +-var p224ZeroModP31 = []uint32{two31p3, two31m3, two31m3, two31m15m3, two31m3, two31m3, two31m3, two31m3} +- +-// p224Sub computes *out = a-b +-// +-// a[i], b[i] < 2**30 +-// out[i] < 2**32 +-func p224Sub(out, a, b *p224FieldElement) { +- for i := 0; i < 8; i++ { +- out[i] = a[i] + p224ZeroModP31[i] - b[i] +- } +-} +- +-// LargeFieldElement also represents an element of the field. The limbs are +-// still spaced 28-bits apart and in little-endian order. So the limbs are at +-// 0, 28, 56, ..., 392 bits, each 64-bits wide. +-type p224LargeFieldElement [15]uint64 +- +-const two63p35 = 1<<63 + 1<<35 +-const two63m35 = 1<<63 - 1<<35 +-const two63m35m19 = 1<<63 - 1<<35 - 1<<19 +- +-// p224ZeroModP63 is 0 mod p where bit 63 is set in all limbs. See the section +-// "Subtraction" in [1] for why. +-var p224ZeroModP63 = [8]uint64{two63p35, two63m35, two63m35, two63m35, two63m35m19, two63m35, two63m35, two63m35} +- +-const bottom12Bits = 0xfff +-const bottom28Bits = 0xfffffff +- +-// p224Mul computes *out = a*b +-// +-// a[i] < 2**29, b[i] < 2**30 (or vice versa) +-// out[i] < 2**29 +-func p224Mul(out, a, b *p224FieldElement, tmp *p224LargeFieldElement) { +- for i := 0; i < 15; i++ { +- tmp[i] = 0 +- } +- +- for i := 0; i < 8; i++ { +- for j := 0; j < 8; j++ { +- tmp[i+j] += uint64(a[i]) * uint64(b[j]) +- } +- } +- +- p224ReduceLarge(out, tmp) +-} +- +-// Square computes *out = a*a +-// +-// a[i] < 2**29 +-// out[i] < 2**29 +-func p224Square(out, a *p224FieldElement, tmp *p224LargeFieldElement) { +- for i := 0; i < 15; i++ { +- tmp[i] = 0 +- } +- +- for i := 0; i < 8; i++ { +- for j := 0; j <= i; j++ { +- r := uint64(a[i]) * uint64(a[j]) +- if i == j { +- tmp[i+j] += r +- } else { +- tmp[i+j] += r << 1 +- } +- } +- } +- +- p224ReduceLarge(out, tmp) +-} +- +-// ReduceLarge converts a p224LargeFieldElement to a p224FieldElement. +-// +-// in[i] < 2**62 +-func p224ReduceLarge(out *p224FieldElement, in *p224LargeFieldElement) { +- for i := 0; i < 8; i++ { +- in[i] += p224ZeroModP63[i] +- } +- +- // Eliminate the coefficients at 2**224 and greater. +- for i := 14; i >= 8; i-- { +- in[i-8] -= in[i] +- in[i-5] += (in[i] & 0xffff) << 12 +- in[i-4] += in[i] >> 16 +- } +- in[8] = 0 +- // in[0..8] < 2**64 +- +- // As the values become small enough, we start to store them in |out| +- // and use 32-bit operations. +- for i := 1; i < 8; i++ { +- in[i+1] += in[i] >> 28 +- out[i] = uint32(in[i] & bottom28Bits) +- } +- in[0] -= in[8] +- out[3] += uint32(in[8]&0xffff) << 12 +- out[4] += uint32(in[8] >> 16) +- // in[0] < 2**64 +- // out[3] < 2**29 +- // out[4] < 2**29 +- // out[1,2,5..7] < 2**28 +- +- out[0] = uint32(in[0] & bottom28Bits) +- out[1] += uint32((in[0] >> 28) & bottom28Bits) +- out[2] += uint32(in[0] >> 56) +- // out[0] < 2**28 +- // out[1..4] < 2**29 +- // out[5..7] < 2**28 +-} +- +-// Reduce reduces the coefficients of a to smaller bounds. +-// +-// On entry: a[i] < 2**31 + 2**30 +-// On exit: a[i] < 2**29 +-func p224Reduce(a *p224FieldElement) { +- for i := 0; i < 7; i++ { +- a[i+1] += a[i] >> 28 +- a[i] &= bottom28Bits +- } +- top := a[7] >> 28 +- a[7] &= bottom28Bits +- +- // top < 2**4 +- mask := top +- mask |= mask >> 2 +- mask |= mask >> 1 +- mask <<= 31 +- mask = uint32(int32(mask) >> 31) +- // Mask is all ones if top != 0, all zero otherwise +- +- a[0] -= top +- a[3] += top << 12 +- +- // We may have just made a[0] negative but, if we did, then we must +- // have added something to a[3], this it's > 2**12. Therefore we can +- // carry down to a[0]. +- a[3] -= 1 & mask +- a[2] += mask & (1<<28 - 1) +- a[1] += mask & (1<<28 - 1) +- a[0] += mask & (1 << 28) +-} +- +-// p224Invert calculates *out = in**-1 by computing in**(2**224 - 2**96 - 1), +-// i.e. Fermat's little theorem. +-func p224Invert(out, in *p224FieldElement) { +- var f1, f2, f3, f4 p224FieldElement +- var c p224LargeFieldElement +- +- p224Square(&f1, in, &c) // 2 +- p224Mul(&f1, &f1, in, &c) // 2**2 - 1 +- p224Square(&f1, &f1, &c) // 2**3 - 2 +- p224Mul(&f1, &f1, in, &c) // 2**3 - 1 +- p224Square(&f2, &f1, &c) // 2**4 - 2 +- p224Square(&f2, &f2, &c) // 2**5 - 4 +- p224Square(&f2, &f2, &c) // 2**6 - 8 +- p224Mul(&f1, &f1, &f2, &c) // 2**6 - 1 +- p224Square(&f2, &f1, &c) // 2**7 - 2 +- for i := 0; i < 5; i++ { // 2**12 - 2**6 +- p224Square(&f2, &f2, &c) +- } +- p224Mul(&f2, &f2, &f1, &c) // 2**12 - 1 +- p224Square(&f3, &f2, &c) // 2**13 - 2 +- for i := 0; i < 11; i++ { // 2**24 - 2**12 +- p224Square(&f3, &f3, &c) +- } +- p224Mul(&f2, &f3, &f2, &c) // 2**24 - 1 +- p224Square(&f3, &f2, &c) // 2**25 - 2 +- for i := 0; i < 23; i++ { // 2**48 - 2**24 +- p224Square(&f3, &f3, &c) +- } +- p224Mul(&f3, &f3, &f2, &c) // 2**48 - 1 +- p224Square(&f4, &f3, &c) // 2**49 - 2 +- for i := 0; i < 47; i++ { // 2**96 - 2**48 +- p224Square(&f4, &f4, &c) +- } +- p224Mul(&f3, &f3, &f4, &c) // 2**96 - 1 +- p224Square(&f4, &f3, &c) // 2**97 - 2 +- for i := 0; i < 23; i++ { // 2**120 - 2**24 +- p224Square(&f4, &f4, &c) +- } +- p224Mul(&f2, &f4, &f2, &c) // 2**120 - 1 +- for i := 0; i < 6; i++ { // 2**126 - 2**6 +- p224Square(&f2, &f2, &c) +- } +- p224Mul(&f1, &f1, &f2, &c) // 2**126 - 1 +- p224Square(&f1, &f1, &c) // 2**127 - 2 +- p224Mul(&f1, &f1, in, &c) // 2**127 - 1 +- for i := 0; i < 97; i++ { // 2**224 - 2**97 +- p224Square(&f1, &f1, &c) +- } +- p224Mul(out, &f1, &f3, &c) // 2**224 - 2**96 - 1 +-} +- +-// p224Contract converts a FieldElement to its unique, minimal form. +-// +-// On entry, in[i] < 2**29 +-// On exit, in[i] < 2**28 +-func p224Contract(out, in *p224FieldElement) { +- copy(out[:], in[:]) +- +- for i := 0; i < 7; i++ { +- out[i+1] += out[i] >> 28 +- out[i] &= bottom28Bits +- } +- top := out[7] >> 28 +- out[7] &= bottom28Bits +- +- out[0] -= top +- out[3] += top << 12 +- +- // We may just have made out[i] negative. So we carry down. If we made +- // out[0] negative then we know that out[3] is sufficiently positive +- // because we just added to it. +- for i := 0; i < 3; i++ { +- mask := uint32(int32(out[i]) >> 31) +- out[i] += (1 << 28) & mask +- out[i+1] -= 1 & mask +- } +- +- // We might have pushed out[3] over 2**28 so we perform another, partial, +- // carry chain. +- for i := 3; i < 7; i++ { +- out[i+1] += out[i] >> 28 +- out[i] &= bottom28Bits +- } +- top = out[7] >> 28 +- out[7] &= bottom28Bits +- +- // Eliminate top while maintaining the same value mod p. +- out[0] -= top +- out[3] += top << 12 +- +- // There are two cases to consider for out[3]: +- // 1) The first time that we eliminated top, we didn't push out[3] over +- // 2**28. In this case, the partial carry chain didn't change any values +- // and top is zero. +- // 2) We did push out[3] over 2**28 the first time that we eliminated top. +- // The first value of top was in [0..16), therefore, prior to eliminating +- // the first top, 0xfff1000 <= out[3] <= 0xfffffff. Therefore, after +- // overflowing and being reduced by the second carry chain, out[3] <= +- // 0xf000. Thus it cannot have overflowed when we eliminated top for the +- // second time. +- +- // Again, we may just have made out[0] negative, so do the same carry down. +- // As before, if we made out[0] negative then we know that out[3] is +- // sufficiently positive. +- for i := 0; i < 3; i++ { +- mask := uint32(int32(out[i]) >> 31) +- out[i] += (1 << 28) & mask +- out[i+1] -= 1 & mask +- } +- +- // Now we see if the value is >= p and, if so, subtract p. +- +- // First we build a mask from the top four limbs, which must all be +- // equal to bottom28Bits if the whole value is >= p. If top4AllOnes +- // ends up with any zero bits in the bottom 28 bits, then this wasn't +- // true. +- top4AllOnes := uint32(0xffffffff) +- for i := 4; i < 8; i++ { +- top4AllOnes &= out[i] +- } +- top4AllOnes |= 0xf0000000 +- // Now we replicate any zero bits to all the bits in top4AllOnes. +- top4AllOnes &= top4AllOnes >> 16 +- top4AllOnes &= top4AllOnes >> 8 +- top4AllOnes &= top4AllOnes >> 4 +- top4AllOnes &= top4AllOnes >> 2 +- top4AllOnes &= top4AllOnes >> 1 +- top4AllOnes = uint32(int32(top4AllOnes<<31) >> 31) +- +- // Now we test whether the bottom three limbs are non-zero. +- bottom3NonZero := out[0] | out[1] | out[2] +- bottom3NonZero |= bottom3NonZero >> 16 +- bottom3NonZero |= bottom3NonZero >> 8 +- bottom3NonZero |= bottom3NonZero >> 4 +- bottom3NonZero |= bottom3NonZero >> 2 +- bottom3NonZero |= bottom3NonZero >> 1 +- bottom3NonZero = uint32(int32(bottom3NonZero<<31) >> 31) +- +- // Everything depends on the value of out[3]. +- // If it's > 0xffff000 and top4AllOnes != 0 then the whole value is >= p +- // If it's = 0xffff000 and top4AllOnes != 0 and bottom3NonZero != 0, +- // then the whole value is >= p +- // If it's < 0xffff000, then the whole value is < p +- n := out[3] - 0xffff000 +- out3Equal := n +- out3Equal |= out3Equal >> 16 +- out3Equal |= out3Equal >> 8 +- out3Equal |= out3Equal >> 4 +- out3Equal |= out3Equal >> 2 +- out3Equal |= out3Equal >> 1 +- out3Equal = ^uint32(int32(out3Equal<<31) >> 31) +- +- // If out[3] > 0xffff000 then n's MSB will be zero. +- out3GT := ^uint32(int32(n) >> 31) +- +- mask := top4AllOnes & ((out3Equal & bottom3NonZero) | out3GT) +- out[0] -= 1 & mask +- out[3] -= 0xffff000 & mask +- out[4] -= 0xfffffff & mask +- out[5] -= 0xfffffff & mask +- out[6] -= 0xfffffff & mask +- out[7] -= 0xfffffff & mask +-} +- +-// Group element functions. +-// +-// These functions deal with group elements. The group is an elliptic curve +-// group with a = -3 defined in FIPS 186-3, section D.2.2. +- +-// p224AddJacobian computes *out = a+b where a != b. +-func p224AddJacobian(x3, y3, z3, x1, y1, z1, x2, y2, z2 *p224FieldElement) { +- // See http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-p224Add-2007-bl +- var z1z1, z2z2, u1, u2, s1, s2, h, i, j, r, v p224FieldElement +- var c p224LargeFieldElement +- +- z1IsZero := p224IsZero(z1) +- z2IsZero := p224IsZero(z2) +- +- // Z1Z1 = Z1² +- p224Square(&z1z1, z1, &c) +- // Z2Z2 = Z2² +- p224Square(&z2z2, z2, &c) +- // U1 = X1*Z2Z2 +- p224Mul(&u1, x1, &z2z2, &c) +- // U2 = X2*Z1Z1 +- p224Mul(&u2, x2, &z1z1, &c) +- // S1 = Y1*Z2*Z2Z2 +- p224Mul(&s1, z2, &z2z2, &c) +- p224Mul(&s1, y1, &s1, &c) +- // S2 = Y2*Z1*Z1Z1 +- p224Mul(&s2, z1, &z1z1, &c) +- p224Mul(&s2, y2, &s2, &c) +- // H = U2-U1 +- p224Sub(&h, &u2, &u1) +- p224Reduce(&h) +- xEqual := p224IsZero(&h) +- // I = (2*H)² +- for j := 0; j < 8; j++ { +- i[j] = h[j] << 1 +- } +- p224Reduce(&i) +- p224Square(&i, &i, &c) +- // J = H*I +- p224Mul(&j, &h, &i, &c) +- // r = 2*(S2-S1) +- p224Sub(&r, &s2, &s1) +- p224Reduce(&r) +- yEqual := p224IsZero(&r) +- if xEqual == 1 && yEqual == 1 && z1IsZero == 0 && z2IsZero == 0 { +- p224DoubleJacobian(x3, y3, z3, x1, y1, z1) +- return +- } +- for i := 0; i < 8; i++ { +- r[i] <<= 1 +- } +- p224Reduce(&r) +- // V = U1*I +- p224Mul(&v, &u1, &i, &c) +- // Z3 = ((Z1+Z2)²-Z1Z1-Z2Z2)*H +- p224Add(&z1z1, &z1z1, &z2z2) +- p224Add(&z2z2, z1, z2) +- p224Reduce(&z2z2) +- p224Square(&z2z2, &z2z2, &c) +- p224Sub(z3, &z2z2, &z1z1) +- p224Reduce(z3) +- p224Mul(z3, z3, &h, &c) +- // X3 = r²-J-2*V +- for i := 0; i < 8; i++ { +- z1z1[i] = v[i] << 1 +- } +- p224Add(&z1z1, &j, &z1z1) +- p224Reduce(&z1z1) +- p224Square(x3, &r, &c) +- p224Sub(x3, x3, &z1z1) +- p224Reduce(x3) +- // Y3 = r*(V-X3)-2*S1*J +- for i := 0; i < 8; i++ { +- s1[i] <<= 1 +- } +- p224Mul(&s1, &s1, &j, &c) +- p224Sub(&z1z1, &v, x3) +- p224Reduce(&z1z1) +- p224Mul(&z1z1, &z1z1, &r, &c) +- p224Sub(y3, &z1z1, &s1) +- p224Reduce(y3) +- +- p224CopyConditional(x3, x2, z1IsZero) +- p224CopyConditional(x3, x1, z2IsZero) +- p224CopyConditional(y3, y2, z1IsZero) +- p224CopyConditional(y3, y1, z2IsZero) +- p224CopyConditional(z3, z2, z1IsZero) +- p224CopyConditional(z3, z1, z2IsZero) +-} +- +-// p224DoubleJacobian computes *out = a+a. +-func p224DoubleJacobian(x3, y3, z3, x1, y1, z1 *p224FieldElement) { +- var delta, gamma, beta, alpha, t p224FieldElement +- var c p224LargeFieldElement +- +- p224Square(&delta, z1, &c) +- p224Square(&gamma, y1, &c) +- p224Mul(&beta, x1, &gamma, &c) +- +- // alpha = 3*(X1-delta)*(X1+delta) +- p224Add(&t, x1, &delta) +- for i := 0; i < 8; i++ { +- t[i] += t[i] << 1 +- } +- p224Reduce(&t) +- p224Sub(&alpha, x1, &delta) +- p224Reduce(&alpha) +- p224Mul(&alpha, &alpha, &t, &c) +- +- // Z3 = (Y1+Z1)²-gamma-delta +- p224Add(z3, y1, z1) +- p224Reduce(z3) +- p224Square(z3, z3, &c) +- p224Sub(z3, z3, &gamma) +- p224Reduce(z3) +- p224Sub(z3, z3, &delta) +- p224Reduce(z3) +- +- // X3 = alpha²-8*beta +- for i := 0; i < 8; i++ { +- delta[i] = beta[i] << 3 +- } +- p224Reduce(&delta) +- p224Square(x3, &alpha, &c) +- p224Sub(x3, x3, &delta) +- p224Reduce(x3) +- +- // Y3 = alpha*(4*beta-X3)-8*gamma² +- for i := 0; i < 8; i++ { +- beta[i] <<= 2 +- } +- p224Sub(&beta, &beta, x3) +- p224Reduce(&beta) +- p224Square(&gamma, &gamma, &c) +- for i := 0; i < 8; i++ { +- gamma[i] <<= 3 +- } +- p224Reduce(&gamma) +- p224Mul(y3, &alpha, &beta, &c) +- p224Sub(y3, y3, &gamma) +- p224Reduce(y3) +-} +- +-// p224CopyConditional sets *out = *in iff the least-significant-bit of control +-// is true, and it runs in constant time. +-func p224CopyConditional(out, in *p224FieldElement, control uint32) { +- control <<= 31 +- control = uint32(int32(control) >> 31) +- +- for i := 0; i < 8; i++ { +- out[i] ^= (out[i] ^ in[i]) & control +- } +-} +- +-func p224ScalarMult(outX, outY, outZ, inX, inY, inZ *p224FieldElement, scalar []byte) { +- var xx, yy, zz p224FieldElement +- for i := 0; i < 8; i++ { +- outX[i] = 0 +- outY[i] = 0 +- outZ[i] = 0 +- } +- +- for _, byte := range scalar { +- for bitNum := uint(0); bitNum < 8; bitNum++ { +- p224DoubleJacobian(outX, outY, outZ, outX, outY, outZ) +- bit := uint32((byte >> (7 - bitNum)) & 1) +- p224AddJacobian(&xx, &yy, &zz, inX, inY, inZ, outX, outY, outZ) +- p224CopyConditional(outX, &xx, bit) +- p224CopyConditional(outY, &yy, bit) +- p224CopyConditional(outZ, &zz, bit) +- } +- } +-} +- +-// p224ToAffine converts from Jacobian to affine form. +-func p224ToAffine(x, y, z *p224FieldElement) (*big.Int, *big.Int) { +- var zinv, zinvsq, outx, outy p224FieldElement +- var tmp p224LargeFieldElement +- +- if isPointAtInfinity := p224IsZero(z); isPointAtInfinity == 1 { +- return new(big.Int), new(big.Int) +- } +- +- p224Invert(&zinv, z) +- p224Square(&zinvsq, &zinv, &tmp) +- p224Mul(x, x, &zinvsq, &tmp) +- p224Mul(&zinvsq, &zinvsq, &zinv, &tmp) +- p224Mul(y, y, &zinvsq, &tmp) +- +- p224Contract(&outx, x) +- p224Contract(&outy, y) +- return p224ToBig(&outx), p224ToBig(&outy) +-} +- +-// get28BitsFromEnd returns the least-significant 28 bits from buf>>shift, +-// where buf is interpreted as a big-endian number. +-func get28BitsFromEnd(buf []byte, shift uint) (uint32, []byte) { +- var ret uint32 +- +- for i := uint(0); i < 4; i++ { +- var b byte +- if l := len(buf); l > 0 { +- b = buf[l-1] +- // We don't remove the byte if we're about to return and we're not +- // reading all of it. +- if i != 3 || shift == 4 { +- buf = buf[:l-1] +- } +- } +- ret |= uint32(b) << (8 * i) >> shift +- } +- ret &= bottom28Bits +- return ret, buf +-} +- +-// p224FromBig sets *out = *in. +-func p224FromBig(out *p224FieldElement, in *big.Int) { +- bytes := in.Bytes() +- out[0], bytes = get28BitsFromEnd(bytes, 0) +- out[1], bytes = get28BitsFromEnd(bytes, 4) +- out[2], bytes = get28BitsFromEnd(bytes, 0) +- out[3], bytes = get28BitsFromEnd(bytes, 4) +- out[4], bytes = get28BitsFromEnd(bytes, 0) +- out[5], bytes = get28BitsFromEnd(bytes, 4) +- out[6], bytes = get28BitsFromEnd(bytes, 0) +- out[7], bytes = get28BitsFromEnd(bytes, 4) +-} +- +-// p224ToBig returns in as a big.Int. +-func p224ToBig(in *p224FieldElement) *big.Int { +- var buf [28]byte +- buf[27] = byte(in[0]) +- buf[26] = byte(in[0] >> 8) +- buf[25] = byte(in[0] >> 16) +- buf[24] = byte(((in[0] >> 24) & 0x0f) | (in[1]<<4)&0xf0) +- +- buf[23] = byte(in[1] >> 4) +- buf[22] = byte(in[1] >> 12) +- buf[21] = byte(in[1] >> 20) +- +- buf[20] = byte(in[2]) +- buf[19] = byte(in[2] >> 8) +- buf[18] = byte(in[2] >> 16) +- buf[17] = byte(((in[2] >> 24) & 0x0f) | (in[3]<<4)&0xf0) +- +- buf[16] = byte(in[3] >> 4) +- buf[15] = byte(in[3] >> 12) +- buf[14] = byte(in[3] >> 20) +- +- buf[13] = byte(in[4]) +- buf[12] = byte(in[4] >> 8) +- buf[11] = byte(in[4] >> 16) +- buf[10] = byte(((in[4] >> 24) & 0x0f) | (in[5]<<4)&0xf0) +- +- buf[9] = byte(in[5] >> 4) +- buf[8] = byte(in[5] >> 12) +- buf[7] = byte(in[5] >> 20) +- +- buf[6] = byte(in[6]) +- buf[5] = byte(in[6] >> 8) +- buf[4] = byte(in[6] >> 16) +- buf[3] = byte(((in[6] >> 24) & 0x0f) | (in[7]<<4)&0xf0) +- +- buf[2] = byte(in[7] >> 4) +- buf[1] = byte(in[7] >> 12) +- buf[0] = byte(in[7] >> 20) +- +- return new(big.Int).SetBytes(buf[:]) +-} +--- libgo/go/crypto/elliptic/p224_test.go.jj 2014-02-18 18:03:31.615598561 +0100 ++++ libgo/go/crypto/elliptic/p224_test.go 2014-02-15 11:40:56.191557928 +0100 +@@ -1,47 +0,0 @@ +-// Copyright 2012 The Go Authors. All rights reserved. +-// Use of this source code is governed by a BSD-style +-// license that can be found in the LICENSE file. +- +-package elliptic +- +-import ( +- "math/big" +- "testing" +-) +- +-var toFromBigTests = []string{ +- "0", +- "1", +- "23", +- "b70e0cb46bb4bf7f321390b94a03c1d356c01122343280d6105c1d21", +- "706a46d476dcb76798e6046d89474788d164c18032d268fd10704fa6", +-} +- +-func p224AlternativeToBig(in *p224FieldElement) *big.Int { +- ret := new(big.Int) +- tmp := new(big.Int) +- +- for i := uint(0); i < 8; i++ { +- tmp.SetInt64(int64(in[i])) +- tmp.Lsh(tmp, 28*i) +- ret.Add(ret, tmp) +- } +- ret.Mod(ret, p224.P) +- return ret +-} +- +-func TestToFromBig(t *testing.T) { +- for i, test := range toFromBigTests { +- n, _ := new(big.Int).SetString(test, 16) +- var x p224FieldElement +- p224FromBig(&x, n) +- m := p224ToBig(&x) +- if n.Cmp(m) != 0 { +- t.Errorf("#%d: %x != %x", i, n, m) +- } +- q := p224AlternativeToBig(&x) +- if n.Cmp(q) != 0 { +- t.Errorf("#%d: %x != %x (alternative)", i, n, m) +- } +- } +-} diff --git a/SOURCES/gcc48-libgomp-20160715.patch b/SOURCES/gcc48-libgomp-20160715.patch new file mode 100644 index 0000000..9b6a61e --- /dev/null +++ b/SOURCES/gcc48-libgomp-20160715.patch @@ -0,0 +1,10653 @@ +--- libgomp/config/linux/wait.h.jj 2013-01-31 20:29:10.091548989 +0100 ++++ libgomp/config/linux/wait.h 2016-07-13 16:57:18.902355979 +0200 +@@ -34,13 +34,13 @@ + + #define FUTEX_WAIT 0 + #define FUTEX_WAKE 1 +-#define FUTEX_PRIVATE_FLAG 128L ++#define FUTEX_PRIVATE_FLAG 128 + + #ifdef HAVE_ATTRIBUTE_VISIBILITY + # pragma GCC visibility push(hidden) + #endif + +-extern long int gomp_futex_wait, gomp_futex_wake; ++extern int gomp_futex_wait, gomp_futex_wake; + + #include + +@@ -48,7 +48,9 @@ static inline int do_spin (int *addr, in + { + unsigned long long i, count = gomp_spin_count_var; + +- if (__builtin_expect (gomp_managed_threads > gomp_available_cpus, 0)) ++ if (__builtin_expect (__atomic_load_n (&gomp_managed_threads, ++ MEMMODEL_RELAXED) ++ > gomp_available_cpus, 0)) + count = gomp_throttled_spin_count_var; + for (i = 0; i < count; i++) + if (__builtin_expect (__atomic_load_n (addr, MEMMODEL_RELAXED) != val, 0)) +--- libgomp/config/linux/affinity.c.jj 2014-05-15 10:56:37.499502573 +0200 ++++ libgomp/config/linux/affinity.c 2016-07-13 16:57:18.902355979 +0200 +@@ -352,6 +352,45 @@ gomp_affinity_print_place (void *p) + fprintf (stderr, ":%lu", len); + } + ++int ++omp_get_place_num_procs (int place_num) ++{ ++ if (place_num < 0 || place_num >= gomp_places_list_len) ++ return 0; ++ ++ cpu_set_t *cpusetp = (cpu_set_t *) gomp_places_list[place_num]; ++ return gomp_cpuset_popcount (gomp_cpuset_size, cpusetp); ++} ++ ++void ++omp_get_place_proc_ids (int place_num, int *ids) ++{ ++ if (place_num < 0 || place_num >= gomp_places_list_len) ++ return; ++ ++ cpu_set_t *cpusetp = (cpu_set_t *) gomp_places_list[place_num]; ++ unsigned long i, max = 8 * gomp_cpuset_size; ++ for (i = 0; i < max; i++) ++ if (CPU_ISSET_S (i, gomp_cpuset_size, cpusetp)) ++ *ids++ = i; ++} ++ ++void ++gomp_get_place_proc_ids_8 (int place_num, int64_t *ids) ++{ ++ if (place_num < 0 || place_num >= gomp_places_list_len) ++ return; ++ ++ cpu_set_t *cpusetp = (cpu_set_t *) gomp_places_list[place_num]; ++ unsigned long i, max = 8 * gomp_cpuset_size; ++ for (i = 0; i < max; i++) ++ if (CPU_ISSET_S (i, gomp_cpuset_size, cpusetp)) ++ *ids++ = i; ++} ++ ++ialias(omp_get_place_num_procs) ++ialias(omp_get_place_proc_ids) ++ + #else + + #include "../posix/affinity.c" +--- libgomp/config/linux/mutex.c.jj 2013-01-21 16:00:38.220917670 +0100 ++++ libgomp/config/linux/mutex.c 2016-07-13 16:57:18.870356375 +0200 +@@ -28,8 +28,8 @@ + + #include "wait.h" + +-long int gomp_futex_wake = FUTEX_WAKE | FUTEX_PRIVATE_FLAG; +-long int gomp_futex_wait = FUTEX_WAIT | FUTEX_PRIVATE_FLAG; ++int gomp_futex_wake = FUTEX_WAKE | FUTEX_PRIVATE_FLAG; ++int gomp_futex_wait = FUTEX_WAIT | FUTEX_PRIVATE_FLAG; + + void + gomp_mutex_lock_slow (gomp_mutex_t *mutex, int oldval) +--- libgomp/config/posix/affinity.c.jj 2014-05-15 10:56:37.987498844 +0200 ++++ libgomp/config/posix/affinity.c 2016-07-15 12:08:28.410015743 +0200 +@@ -113,3 +113,27 @@ gomp_affinity_print_place (void *p) + { + (void) p; + } ++ ++int ++omp_get_place_num_procs (int place_num) ++{ ++ (void) place_num; ++ return 0; ++} ++ ++void ++omp_get_place_proc_ids (int place_num, int *ids) ++{ ++ (void) place_num; ++ (void) ids; ++} ++ ++void ++gomp_get_place_proc_ids_8 (int place_num, int64_t *ids) ++{ ++ (void) place_num; ++ (void) ids; ++} ++ ++ialias(omp_get_place_num_procs) ++ialias(omp_get_place_proc_ids) +--- libgomp/loop_ull.c.jj 2013-01-21 16:00:46.477871806 +0100 ++++ libgomp/loop_ull.c 2016-07-13 16:57:18.918355780 +0200 +@@ -174,15 +174,15 @@ GOMP_loop_ull_runtime_start (bool up, go + { + case GFS_STATIC: + return gomp_loop_ull_static_start (up, start, end, incr, +- icv->run_sched_modifier, ++ icv->run_sched_chunk_size, + istart, iend); + case GFS_DYNAMIC: + return gomp_loop_ull_dynamic_start (up, start, end, incr, +- icv->run_sched_modifier, ++ icv->run_sched_chunk_size, + istart, iend); + case GFS_GUIDED: + return gomp_loop_ull_guided_start (up, start, end, incr, +- icv->run_sched_modifier, ++ icv->run_sched_chunk_size, + istart, iend); + case GFS_AUTO: + /* For now map to schedule(static), later on we could play with feedback +@@ -278,15 +278,15 @@ GOMP_loop_ull_ordered_runtime_start (boo + { + case GFS_STATIC: + return gomp_loop_ull_ordered_static_start (up, start, end, incr, +- icv->run_sched_modifier, ++ icv->run_sched_chunk_size, + istart, iend); + case GFS_DYNAMIC: + return gomp_loop_ull_ordered_dynamic_start (up, start, end, incr, +- icv->run_sched_modifier, ++ icv->run_sched_chunk_size, + istart, iend); + case GFS_GUIDED: + return gomp_loop_ull_ordered_guided_start (up, start, end, incr, +- icv->run_sched_modifier, ++ icv->run_sched_chunk_size, + istart, iend); + case GFS_AUTO: + /* For now map to schedule(static), later on we could play with feedback +@@ -298,6 +298,114 @@ GOMP_loop_ull_ordered_runtime_start (boo + } + } + ++/* The *_doacross_*_start routines are similar. The only difference is that ++ this work-share construct is initialized to expect an ORDERED(N) - DOACROSS ++ section, and the worksharing loop iterates always from 0 to COUNTS[0] - 1 ++ and other COUNTS array elements tell the library number of iterations ++ in the ordered inner loops. */ ++ ++static bool ++gomp_loop_ull_doacross_static_start (unsigned ncounts, gomp_ull *counts, ++ gomp_ull chunk_size, gomp_ull *istart, ++ gomp_ull *iend) ++{ ++ struct gomp_thread *thr = gomp_thread (); ++ ++ thr->ts.static_trip = 0; ++ if (gomp_work_share_start (false)) ++ { ++ gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1, ++ GFS_STATIC, chunk_size); ++ gomp_doacross_ull_init (ncounts, counts, chunk_size); ++ gomp_work_share_init_done (); ++ } ++ ++ return !gomp_iter_ull_static_next (istart, iend); ++} ++ ++static bool ++gomp_loop_ull_doacross_dynamic_start (unsigned ncounts, gomp_ull *counts, ++ gomp_ull chunk_size, gomp_ull *istart, ++ gomp_ull *iend) ++{ ++ struct gomp_thread *thr = gomp_thread (); ++ bool ret; ++ ++ if (gomp_work_share_start (false)) ++ { ++ gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1, ++ GFS_DYNAMIC, chunk_size); ++ gomp_doacross_ull_init (ncounts, counts, chunk_size); ++ gomp_work_share_init_done (); ++ } ++ ++#if defined HAVE_SYNC_BUILTINS && defined __LP64__ ++ ret = gomp_iter_ull_dynamic_next (istart, iend); ++#else ++ gomp_mutex_lock (&thr->ts.work_share->lock); ++ ret = gomp_iter_ull_dynamic_next_locked (istart, iend); ++ gomp_mutex_unlock (&thr->ts.work_share->lock); ++#endif ++ ++ return ret; ++} ++ ++static bool ++gomp_loop_ull_doacross_guided_start (unsigned ncounts, gomp_ull *counts, ++ gomp_ull chunk_size, gomp_ull *istart, ++ gomp_ull *iend) ++{ ++ struct gomp_thread *thr = gomp_thread (); ++ bool ret; ++ ++ if (gomp_work_share_start (false)) ++ { ++ gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1, ++ GFS_GUIDED, chunk_size); ++ gomp_doacross_ull_init (ncounts, counts, chunk_size); ++ gomp_work_share_init_done (); ++ } ++ ++#if defined HAVE_SYNC_BUILTINS && defined __LP64__ ++ ret = gomp_iter_ull_guided_next (istart, iend); ++#else ++ gomp_mutex_lock (&thr->ts.work_share->lock); ++ ret = gomp_iter_ull_guided_next_locked (istart, iend); ++ gomp_mutex_unlock (&thr->ts.work_share->lock); ++#endif ++ ++ return ret; ++} ++ ++bool ++GOMP_loop_ull_doacross_runtime_start (unsigned ncounts, gomp_ull *counts, ++ gomp_ull *istart, gomp_ull *iend) ++{ ++ struct gomp_task_icv *icv = gomp_icv (false); ++ switch (icv->run_sched_var) ++ { ++ case GFS_STATIC: ++ return gomp_loop_ull_doacross_static_start (ncounts, counts, ++ icv->run_sched_chunk_size, ++ istart, iend); ++ case GFS_DYNAMIC: ++ return gomp_loop_ull_doacross_dynamic_start (ncounts, counts, ++ icv->run_sched_chunk_size, ++ istart, iend); ++ case GFS_GUIDED: ++ return gomp_loop_ull_doacross_guided_start (ncounts, counts, ++ icv->run_sched_chunk_size, ++ istart, iend); ++ case GFS_AUTO: ++ /* For now map to schedule(static), later on we could play with feedback ++ driven choice. */ ++ return gomp_loop_ull_doacross_static_start (ncounts, counts, ++ 0, istart, iend); ++ default: ++ abort (); ++ } ++} ++ + /* The *_next routines are called when the thread completes processing of + the iteration block currently assigned to it. If the work-share + construct is bound directly to a parallel construct, then the iteration +@@ -457,6 +565,10 @@ extern __typeof(gomp_loop_ull_dynamic_st + __attribute__((alias ("gomp_loop_ull_dynamic_start"))); + extern __typeof(gomp_loop_ull_guided_start) GOMP_loop_ull_guided_start + __attribute__((alias ("gomp_loop_ull_guided_start"))); ++extern __typeof(gomp_loop_ull_dynamic_start) GOMP_loop_ull_nonmonotonic_dynamic_start ++ __attribute__((alias ("gomp_loop_ull_dynamic_start"))); ++extern __typeof(gomp_loop_ull_guided_start) GOMP_loop_ull_nonmonotonic_guided_start ++ __attribute__((alias ("gomp_loop_ull_guided_start"))); + + extern __typeof(gomp_loop_ull_ordered_static_start) GOMP_loop_ull_ordered_static_start + __attribute__((alias ("gomp_loop_ull_ordered_static_start"))); +@@ -465,12 +577,23 @@ extern __typeof(gomp_loop_ull_ordered_dy + extern __typeof(gomp_loop_ull_ordered_guided_start) GOMP_loop_ull_ordered_guided_start + __attribute__((alias ("gomp_loop_ull_ordered_guided_start"))); + ++extern __typeof(gomp_loop_ull_doacross_static_start) GOMP_loop_ull_doacross_static_start ++ __attribute__((alias ("gomp_loop_ull_doacross_static_start"))); ++extern __typeof(gomp_loop_ull_doacross_dynamic_start) GOMP_loop_ull_doacross_dynamic_start ++ __attribute__((alias ("gomp_loop_ull_doacross_dynamic_start"))); ++extern __typeof(gomp_loop_ull_doacross_guided_start) GOMP_loop_ull_doacross_guided_start ++ __attribute__((alias ("gomp_loop_ull_doacross_guided_start"))); ++ + extern __typeof(gomp_loop_ull_static_next) GOMP_loop_ull_static_next + __attribute__((alias ("gomp_loop_ull_static_next"))); + extern __typeof(gomp_loop_ull_dynamic_next) GOMP_loop_ull_dynamic_next + __attribute__((alias ("gomp_loop_ull_dynamic_next"))); + extern __typeof(gomp_loop_ull_guided_next) GOMP_loop_ull_guided_next + __attribute__((alias ("gomp_loop_ull_guided_next"))); ++extern __typeof(gomp_loop_ull_dynamic_next) GOMP_loop_ull_nonmonotonic_dynamic_next ++ __attribute__((alias ("gomp_loop_ull_dynamic_next"))); ++extern __typeof(gomp_loop_ull_guided_next) GOMP_loop_ull_nonmonotonic_guided_next ++ __attribute__((alias ("gomp_loop_ull_guided_next"))); + + extern __typeof(gomp_loop_ull_ordered_static_next) GOMP_loop_ull_ordered_static_next + __attribute__((alias ("gomp_loop_ull_ordered_static_next"))); +@@ -507,6 +630,25 @@ GOMP_loop_ull_guided_start (bool up, gom + } + + bool ++GOMP_loop_ull_nonmonotonic_dynamic_start (bool up, gomp_ull start, ++ gomp_ull end, gomp_ull incr, ++ gomp_ull chunk_size, ++ gomp_ull *istart, gomp_ull *iend) ++{ ++ return gomp_loop_ull_dynamic_start (up, start, end, incr, chunk_size, istart, ++ iend); ++} ++ ++bool ++GOMP_loop_ull_nonmonotonic_guided_start (bool up, gomp_ull start, gomp_ull end, ++ gomp_ull incr, gomp_ull chunk_size, ++ gomp_ull *istart, gomp_ull *iend) ++{ ++ return gomp_loop_ull_guided_start (up, start, end, incr, chunk_size, istart, ++ iend); ++} ++ ++bool + GOMP_loop_ull_ordered_static_start (bool up, gomp_ull start, gomp_ull end, + gomp_ull incr, gomp_ull chunk_size, + gomp_ull *istart, gomp_ull *iend) +@@ -534,6 +676,33 @@ GOMP_loop_ull_ordered_guided_start (bool + } + + bool ++GOMP_loop_ull_doacross_static_start (unsigned ncounts, gomp_ull *counts, ++ gomp_ull chunk_size, gomp_ull *istart, ++ gomp_ull *iend) ++{ ++ return gomp_loop_ull_doacross_static_start (ncounts, counts, chunk_size, ++ istart, iend); ++} ++ ++bool ++GOMP_loop_ull_doacross_dynamic_start (unsigned ncounts, gomp_ull *counts, ++ gomp_ull chunk_size, gomp_ull *istart, ++ gomp_ull *iend) ++{ ++ return gomp_loop_ull_doacross_dynamic_start (ncounts, counts, chunk_size, ++ istart, iend); ++} ++ ++bool ++GOMP_loop_ull_doacross_guided_start (unsigned ncounts, gomp_ull *counts, ++ gomp_ull chunk_size, gomp_ull *istart, ++ gomp_ull *iend) ++{ ++ return gomp_loop_ull_doacross_guided_start (ncounts, counts, chunk_size, ++ istart, iend); ++} ++ ++bool + GOMP_loop_ull_static_next (gomp_ull *istart, gomp_ull *iend) + { + return gomp_loop_ull_static_next (istart, iend); +@@ -550,6 +719,18 @@ GOMP_loop_ull_guided_next (gomp_ull *ist + { + return gomp_loop_ull_guided_next (istart, iend); + } ++ ++bool ++GOMP_loop_ull_nonmonotonic_dynamic_next (gomp_ull *istart, gomp_ull *iend) ++{ ++ return gomp_loop_ull_dynamic_next (istart, iend); ++} ++ ++bool ++GOMP_loop_ull_nonmonotonic_guided_next (gomp_ull *istart, gomp_ull *iend) ++{ ++ return gomp_loop_ull_guided_next (istart, iend); ++} + + bool + GOMP_loop_ull_ordered_static_next (gomp_ull *istart, gomp_ull *iend) +--- libgomp/team.c.jj 2014-05-15 10:56:32.092524669 +0200 ++++ libgomp/team.c 2016-07-13 17:58:01.907291111 +0200 +@@ -133,6 +133,25 @@ gomp_thread_start (void *xdata) + return NULL; + } + ++static inline struct gomp_team * ++get_last_team (unsigned nthreads) ++{ ++ struct gomp_thread *thr = gomp_thread (); ++ if (thr->ts.team == NULL) ++ { ++ struct gomp_thread_pool *pool = thr->thread_pool; ++ if (pool != NULL) ++ { ++ struct gomp_team *last_team = pool->last_team; ++ if (last_team != NULL && last_team->nthreads == nthreads) ++ { ++ pool->last_team = NULL; ++ return last_team; ++ } ++ } ++ } ++ return NULL; ++} + + /* Create a new team data structure. */ + +@@ -140,18 +159,27 @@ struct gomp_team * + gomp_new_team (unsigned nthreads) + { + struct gomp_team *team; +- size_t size; + int i; + +- size = sizeof (*team) + nthreads * (sizeof (team->ordered_release[0]) +- + sizeof (team->implicit_task[0])); +- team = gomp_malloc (size); ++ team = get_last_team (nthreads); ++ if (team == NULL) ++ { ++ size_t extra = sizeof (team->ordered_release[0]) ++ + sizeof (team->implicit_task[0]); ++ team = gomp_malloc (sizeof (*team) + nthreads * extra); ++ ++#ifndef HAVE_SYNC_BUILTINS ++ gomp_mutex_init (&team->work_share_list_free_lock); ++#endif ++ gomp_barrier_init (&team->barrier, nthreads); ++ gomp_mutex_init (&team->task_lock); ++ ++ team->nthreads = nthreads; ++ } + + team->work_share_chunk = 8; + #ifdef HAVE_SYNC_BUILTINS + team->single_count = 0; +-#else +- gomp_mutex_init (&team->work_share_list_free_lock); + #endif + team->work_shares_to_free = &team->work_shares[0]; + gomp_init_work_share (&team->work_shares[0], false, nthreads); +@@ -162,15 +190,11 @@ gomp_new_team (unsigned nthreads) + team->work_shares[i].next_free = &team->work_shares[i + 1]; + team->work_shares[i].next_free = NULL; + +- team->nthreads = nthreads; +- gomp_barrier_init (&team->barrier, nthreads); +- + gomp_sem_init (&team->master_release, 0); + team->ordered_release = (void *) &team->implicit_task[nthreads]; + team->ordered_release[0] = &team->master_release; + +- gomp_mutex_init (&team->task_lock); +- team->task_queue = NULL; ++ priority_queue_init (&team->task_queue); + team->task_count = 0; + team->task_queued_count = 0; + team->task_running_count = 0; +@@ -186,8 +210,12 @@ gomp_new_team (unsigned nthreads) + static void + free_team (struct gomp_team *team) + { ++#ifndef HAVE_SYNC_BUILTINS ++ gomp_mutex_destroy (&team->work_share_list_free_lock); ++#endif + gomp_barrier_destroy (&team->barrier); + gomp_mutex_destroy (&team->task_lock); ++ priority_queue_free (&team->task_queue); + free (team); + } + +@@ -258,6 +286,8 @@ gomp_free_thread (void *arg __attribute_ + free (pool); + thr->thread_pool = NULL; + } ++ if (thr->ts.level == 0 && __builtin_expect (thr->ts.team != NULL, 0)) ++ gomp_team_end (); + if (thr->task != NULL) + { + struct gomp_task *task = thr->task; +@@ -287,7 +317,7 @@ gomp_team_start (void (*fn) (void *), vo + struct gomp_thread **affinity_thr = NULL; + + thr = gomp_thread (); +- nested = thr->ts.team != NULL; ++ nested = thr->ts.level; + if (__builtin_expect (thr->thread_pool == NULL, 0)) + { + thr->thread_pool = gomp_new_thread_pool (); +@@ -894,9 +924,6 @@ gomp_team_end (void) + while (ws != NULL); + } + gomp_sem_destroy (&team->master_release); +-#ifndef HAVE_SYNC_BUILTINS +- gomp_mutex_destroy (&team->work_share_list_free_lock); +-#endif + + if (__builtin_expect (thr->ts.team != NULL, 0) + || __builtin_expect (team->nthreads == 1, 0)) +--- libgomp/target.c.jj 2014-05-15 10:56:38.313498020 +0200 ++++ libgomp/target.c 2016-07-15 16:58:29.249328861 +0200 +@@ -22,14 +22,22 @@ + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +-/* This file handles the maintainence of threads in response to team +- creation and termination. */ ++/* This file contains the support of offloading. */ + ++#include "config.h" + #include "libgomp.h" ++#include "oacc-plugin.h" ++#include "oacc-int.h" ++#include "gomp-constants.h" + #include + #include + #include ++#ifdef HAVE_INTTYPES_H ++# include /* For PRIu64. */ ++#endif + #include ++#include ++#include + + attribute_hidden int + gomp_get_num_devices (void) +@@ -37,22 +45,87 @@ gomp_get_num_devices (void) + return 0; + } + +-/* Called when encountering a target directive. If DEVICE +- is -1, it means use device-var ICV. If it is -2 (or any other value +- larger than last available hw device, use host fallback. +- FN is address of host code, OPENMP_TARGET contains value of the +- __OPENMP_TARGET__ symbol in the shared library or binary that invokes +- GOMP_target. HOSTADDRS, SIZES and KINDS are arrays +- with MAPNUM entries, with addresses of the host objects, +- sizes of the host objects (resp. for pointer kind pointer bias +- and assumed sizeof (void *) size) and kinds. */ ++/* This function should be called from every offload image while loading. ++ It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of ++ the target, and TARGET_DATA needed by target plugin. */ + + void +-GOMP_target (int device, void (*fn) (void *), const void *openmp_target, +- size_t mapnum, void **hostaddrs, size_t *sizes, +- unsigned char *kinds) ++GOMP_offload_register_ver (unsigned version, const void *host_table, ++ int target_type, const void *target_data) ++{ ++ (void) version; ++ (void) host_table; ++ (void) target_type; ++ (void) target_data; ++} ++ ++void ++GOMP_offload_register (const void *host_table, int target_type, ++ const void *target_data) ++{ ++ (void) host_table; ++ (void) target_type; ++ (void) target_data; ++} ++ ++/* This function should be called from every offload image while unloading. ++ It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of ++ the target, and TARGET_DATA needed by target plugin. */ ++ ++void ++GOMP_offload_unregister_ver (unsigned version, const void *host_table, ++ int target_type, const void *target_data) ++{ ++ (void) version; ++ (void) host_table; ++ (void) target_type; ++ (void) target_data; ++} ++ ++void ++GOMP_offload_unregister (const void *host_table, int target_type, ++ const void *target_data) ++{ ++ (void) host_table; ++ (void) target_type; ++ (void) target_data; ++} ++ ++/* This function initializes the target device, specified by DEVICEP. DEVICEP ++ must be locked on entry, and remains locked on return. */ ++ ++attribute_hidden void ++gomp_init_device (struct gomp_device_descr *devicep) ++{ ++ devicep->state = GOMP_DEVICE_INITIALIZED; ++} ++ ++attribute_hidden void ++gomp_unload_device (struct gomp_device_descr *devicep) ++{ ++} ++ ++/* Free address mapping tables. MM must be locked on entry, and remains locked ++ on return. */ ++ ++attribute_hidden void ++gomp_free_memmap (struct splay_tree_s *mem_map) ++{ ++ while (mem_map->root) ++ { ++ struct target_mem_desc *tgt = mem_map->root->key.tgt; ++ ++ splay_tree_remove (mem_map, &mem_map->root->key); ++ free (tgt->array); ++ free (tgt); ++ } ++} ++ ++/* Host fallback for GOMP_target{,_ext} routines. */ ++ ++static void ++gomp_target_fallback (void (*fn) (void *), void **hostaddrs) + { +- /* Host fallback. */ + struct gomp_thread old_thr, *thr = gomp_thread (); + old_thr = *thr; + memset (thr, '\0', sizeof (*thr)); +@@ -66,10 +139,167 @@ GOMP_target (int device, void (*fn) (voi + *thr = old_thr; + } + ++/* Calculate alignment and size requirements of a private copy of data shared ++ as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */ ++ ++static inline void ++calculate_firstprivate_requirements (size_t mapnum, size_t *sizes, ++ unsigned short *kinds, size_t *tgt_align, ++ size_t *tgt_size) ++{ ++ size_t i; ++ for (i = 0; i < mapnum; i++) ++ if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE) ++ { ++ size_t align = (size_t) 1 << (kinds[i] >> 8); ++ if (*tgt_align < align) ++ *tgt_align = align; ++ *tgt_size = (*tgt_size + align - 1) & ~(align - 1); ++ *tgt_size += sizes[i]; ++ } ++} ++ ++/* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */ ++ ++static inline void ++copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs, ++ size_t *sizes, unsigned short *kinds, size_t tgt_align, ++ size_t tgt_size) ++{ ++ uintptr_t al = (uintptr_t) tgt & (tgt_align - 1); ++ if (al) ++ tgt += tgt_align - al; ++ tgt_size = 0; ++ size_t i; ++ for (i = 0; i < mapnum; i++) ++ if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE) ++ { ++ size_t align = (size_t) 1 << (kinds[i] >> 8); ++ tgt_size = (tgt_size + align - 1) & ~(align - 1); ++ memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]); ++ hostaddrs[i] = tgt + tgt_size; ++ tgt_size = tgt_size + sizes[i]; ++ } ++} ++ ++/* Called when encountering a target directive. If DEVICE ++ is GOMP_DEVICE_ICV, it means use device-var ICV. If it is ++ GOMP_DEVICE_HOST_FALLBACK (or any value ++ larger than last available hw device), use host fallback. ++ FN is address of host code, UNUSED is part of the current ABI, but ++ we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays ++ with MAPNUM entries, with addresses of the host objects, ++ sizes of the host objects (resp. for pointer kind pointer bias ++ and assumed sizeof (void *) size) and kinds. */ ++ ++void ++GOMP_target (int device, void (*fn) (void *), const void *unused, ++ size_t mapnum, void **hostaddrs, size_t *sizes, ++ unsigned char *kinds) ++{ ++ return gomp_target_fallback (fn, hostaddrs); ++} ++ ++/* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present, ++ and several arguments have been added: ++ FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h. ++ DEPEND is array of dependencies, see GOMP_task for details. ++ ++ ARGS is a pointer to an array consisting of a variable number of both ++ device-independent and device-specific arguments, which can take one two ++ elements where the first specifies for which device it is intended, the type ++ and optionally also the value. If the value is not present in the first ++ one, the whole second element the actual value. The last element of the ++ array is a single NULL. Among the device independent can be for example ++ NUM_TEAMS and THREAD_LIMIT. ++ ++ NUM_TEAMS is positive if GOMP_teams will be called in the body with ++ that value, or 1 if teams construct is not present, or 0, if ++ teams construct does not have num_teams clause and so the choice is ++ implementation defined, and -1 if it can't be determined on the host ++ what value will GOMP_teams have on the device. ++ THREAD_LIMIT similarly is positive if GOMP_teams will be called in the ++ body with that value, or 0, if teams construct does not have thread_limit ++ clause or the teams construct is not present, or -1 if it can't be ++ determined on the host what value will GOMP_teams have on the device. */ ++ ++void ++GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum, ++ void **hostaddrs, size_t *sizes, unsigned short *kinds, ++ unsigned int flags, void **depend, void **args) ++{ ++ size_t tgt_align = 0, tgt_size = 0; ++ bool fpc_done = false; ++ ++ if (flags & GOMP_TARGET_FLAG_NOWAIT) ++ { ++ struct gomp_thread *thr = gomp_thread (); ++ if (thr->ts.team ++ && !thr->task->final_task) ++ { ++ gomp_create_target_task (NULL, fn, mapnum, hostaddrs, ++ sizes, kinds, flags, depend, args, ++ GOMP_TARGET_TASK_BEFORE_MAP); ++ return; ++ } ++ } ++ ++ /* If there are depend clauses, but nowait is not present ++ (or we are in a final task), block the parent task until the ++ dependencies are resolved and then just continue with the rest ++ of the function as if it is a merged task. */ ++ if (depend != NULL) ++ { ++ struct gomp_thread *thr = gomp_thread (); ++ if (thr->task && thr->task->depend_hash) ++ { ++ /* If we might need to wait, copy firstprivate now. */ ++ calculate_firstprivate_requirements (mapnum, sizes, kinds, ++ &tgt_align, &tgt_size); ++ if (tgt_align) ++ { ++ char *tgt = gomp_alloca (tgt_size + tgt_align - 1); ++ copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds, ++ tgt_align, tgt_size); ++ } ++ fpc_done = true; ++ gomp_task_maybe_wait_for_dependencies (depend); ++ } ++ } ++ ++ if (!fpc_done) ++ { ++ calculate_firstprivate_requirements (mapnum, sizes, kinds, ++ &tgt_align, &tgt_size); ++ if (tgt_align) ++ { ++ char *tgt = gomp_alloca (tgt_size + tgt_align - 1); ++ copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds, ++ tgt_align, tgt_size); ++ } ++ } ++ gomp_target_fallback (fn, hostaddrs); ++} ++ ++/* Host fallback for GOMP_target_data{,_ext} routines. */ ++ ++static void ++gomp_target_data_fallback (void) ++{ ++} ++ + void +-GOMP_target_data (int device, const void *openmp_target, size_t mapnum, ++GOMP_target_data (int device, const void *unused, size_t mapnum, + void **hostaddrs, size_t *sizes, unsigned char *kinds) + { ++ return gomp_target_data_fallback (); ++} ++ ++void ++GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs, ++ size_t *sizes, unsigned short *kinds) ++{ ++ return gomp_target_data_fallback (); + } + + void +@@ -78,12 +308,112 @@ GOMP_target_end_data (void) + } + + void +-GOMP_target_update (int device, const void *openmp_target, size_t mapnum, ++GOMP_target_update (int device, const void *unused, size_t mapnum, + void **hostaddrs, size_t *sizes, unsigned char *kinds) + { + } + + void ++GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs, ++ size_t *sizes, unsigned short *kinds, ++ unsigned int flags, void **depend) ++{ ++ /* If there are depend clauses, but nowait is not present, ++ block the parent task until the dependencies are resolved ++ and then just continue with the rest of the function as if it ++ is a merged task. Until we are able to schedule task during ++ variable mapping or unmapping, ignore nowait if depend clauses ++ are not present. */ ++ if (depend != NULL) ++ { ++ struct gomp_thread *thr = gomp_thread (); ++ if (thr->task && thr->task->depend_hash) ++ { ++ if ((flags & GOMP_TARGET_FLAG_NOWAIT) ++ && thr->ts.team ++ && !thr->task->final_task) ++ { ++ if (gomp_create_target_task (NULL, (void (*) (void *)) NULL, ++ mapnum, hostaddrs, sizes, kinds, ++ flags | GOMP_TARGET_FLAG_UPDATE, ++ depend, NULL, GOMP_TARGET_TASK_DATA)) ++ return; ++ } ++ else ++ { ++ struct gomp_team *team = thr->ts.team; ++ /* If parallel or taskgroup has been cancelled, don't start new ++ tasks. */ ++ if (team ++ && (gomp_team_barrier_cancelled (&team->barrier) ++ || (thr->task->taskgroup ++ && thr->task->taskgroup->cancelled))) ++ return; ++ ++ gomp_task_maybe_wait_for_dependencies (depend); ++ } ++ } ++ } ++} ++ ++void ++GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs, ++ size_t *sizes, unsigned short *kinds, ++ unsigned int flags, void **depend) ++{ ++ /* If there are depend clauses, but nowait is not present, ++ block the parent task until the dependencies are resolved ++ and then just continue with the rest of the function as if it ++ is a merged task. Until we are able to schedule task during ++ variable mapping or unmapping, ignore nowait if depend clauses ++ are not present. */ ++ if (depend != NULL) ++ { ++ struct gomp_thread *thr = gomp_thread (); ++ if (thr->task && thr->task->depend_hash) ++ { ++ if ((flags & GOMP_TARGET_FLAG_NOWAIT) ++ && thr->ts.team ++ && !thr->task->final_task) ++ { ++ if (gomp_create_target_task (NULL, (void (*) (void *)) NULL, ++ mapnum, hostaddrs, sizes, kinds, ++ flags, depend, NULL, ++ GOMP_TARGET_TASK_DATA)) ++ return; ++ } ++ else ++ { ++ struct gomp_team *team = thr->ts.team; ++ /* If parallel or taskgroup has been cancelled, don't start new ++ tasks. */ ++ if (team ++ && (gomp_team_barrier_cancelled (&team->barrier) ++ || (thr->task->taskgroup ++ && thr->task->taskgroup->cancelled))) ++ return; ++ ++ gomp_task_maybe_wait_for_dependencies (depend); ++ } ++ } ++ } ++} ++ ++bool ++gomp_target_task_fn (void *data) ++{ ++ struct gomp_target_task *ttask = (struct gomp_target_task *) data; ++ ++ if (ttask->fn != NULL) ++ { ++ ttask->state = GOMP_TARGET_TASK_FALLBACK; ++ gomp_target_fallback (ttask->fn, ttask->hostaddrs); ++ return false; ++ } ++ return false; ++} ++ ++void + GOMP_teams (unsigned int num_teams, unsigned int thread_limit) + { + if (thread_limit) +@@ -94,3 +424,153 @@ GOMP_teams (unsigned int num_teams, unsi + } + (void) num_teams; + } ++ ++void * ++omp_target_alloc (size_t size, int device_num) ++{ ++ if (device_num == GOMP_DEVICE_HOST_FALLBACK) ++ return malloc (size); ++ ++ return NULL; ++} ++ ++void ++omp_target_free (void *device_ptr, int device_num) ++{ ++ if (device_ptr == NULL) ++ return; ++ ++ if (device_num == GOMP_DEVICE_HOST_FALLBACK) ++ { ++ free (device_ptr); ++ return; ++ } ++} ++ ++int ++omp_target_is_present (void *ptr, int device_num) ++{ ++ if (ptr == NULL) ++ return 1; ++ ++ if (device_num == GOMP_DEVICE_HOST_FALLBACK) ++ return 1; ++ ++ return 0; ++} ++ ++int ++omp_target_memcpy (void *dst, void *src, size_t length, size_t dst_offset, ++ size_t src_offset, int dst_device_num, int src_device_num) ++{ ++ if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK) ++ return EINVAL; ++ if (src_device_num != GOMP_DEVICE_HOST_FALLBACK) ++ return EINVAL; ++ memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length); ++ return 0; ++} ++ ++#define HALF_SIZE_T (((size_t) 1) << (8 * sizeof (size_t) / 2)) ++ ++#define __builtin_mul_overflow(x, y, z) \ ++ ({ bool retval = false; \ ++ size_t xval = (x); \ ++ size_t yval = (y); \ ++ size_t zval = xval * yval; \ ++ if (__builtin_expect ((xval | yval) >= HALF_SIZE_T, 0)) \ ++ { \ ++ if (xval && zval / xval != yval) \ ++ retval = true; \ ++ } \ ++ *(z) = zval; \ ++ retval; }) ++ ++static int ++omp_target_memcpy_rect_worker (void *dst, void *src, size_t element_size, ++ int num_dims, const size_t *volume, ++ const size_t *dst_offsets, ++ const size_t *src_offsets, ++ const size_t *dst_dimensions, ++ const size_t *src_dimensions) ++{ ++ size_t dst_slice = element_size; ++ size_t src_slice = element_size; ++ size_t j, dst_off, src_off, length; ++ int i, ret; ++ ++ ++ if (num_dims == 1) ++ { ++ if (__builtin_mul_overflow (element_size, volume[0], &length) ++ || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off) ++ || __builtin_mul_overflow (element_size, src_offsets[0], &src_off)) ++ return EINVAL; ++ memcpy ((char *) dst + dst_off, (char *) src + src_off, length); ++ ret = 1; ++ return ret ? 0 : EINVAL; ++ } ++ ++ /* FIXME: it would be nice to have some plugin function to handle ++ num_dims == 2 and num_dims == 3 more efficiently. Larger ones can ++ be handled in the generic recursion below, and for host-host it ++ should be used even for any num_dims >= 2. */ ++ ++ for (i = 1; i < num_dims; i++) ++ if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice) ++ || __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice)) ++ return EINVAL; ++ if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off) ++ || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off)) ++ return EINVAL; ++ for (j = 0; j < volume[0]; j++) ++ { ++ ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off, ++ (char *) src + src_off, ++ element_size, num_dims - 1, ++ volume + 1, dst_offsets + 1, ++ src_offsets + 1, dst_dimensions + 1, ++ src_dimensions + 1); ++ if (ret) ++ return ret; ++ dst_off += dst_slice; ++ src_off += src_slice; ++ } ++ return 0; ++} ++ ++int ++omp_target_memcpy_rect (void *dst, void *src, size_t element_size, ++ int num_dims, const size_t *volume, ++ const size_t *dst_offsets, ++ const size_t *src_offsets, ++ const size_t *dst_dimensions, ++ const size_t *src_dimensions, ++ int dst_device_num, int src_device_num) ++{ ++ if (!dst && !src) ++ return INT_MAX; ++ ++ if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK) ++ return EINVAL; ++ if (src_device_num != GOMP_DEVICE_HOST_FALLBACK) ++ return EINVAL; ++ ++ int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims, ++ volume, dst_offsets, src_offsets, ++ dst_dimensions, src_dimensions); ++ return ret; ++} ++ ++int ++omp_target_associate_ptr (void *host_ptr, void *device_ptr, size_t size, ++ size_t device_offset, int device_num) ++{ ++ return EINVAL; ++} ++ ++int ++omp_target_disassociate_ptr (void *ptr, int device_num) ++{ ++ return EINVAL; ++} +--- libgomp/fortran.c.jj 2014-05-15 10:56:31.593531223 +0200 ++++ libgomp/fortran.c 2016-07-13 16:57:04.432535397 +0200 +@@ -67,12 +67,20 @@ ialias_redirect (omp_get_active_level) + ialias_redirect (omp_in_final) + ialias_redirect (omp_get_cancellation) + ialias_redirect (omp_get_proc_bind) ++ialias_redirect (omp_get_num_places) ++ialias_redirect (omp_get_place_num_procs) ++ialias_redirect (omp_get_place_proc_ids) ++ialias_redirect (omp_get_place_num) ++ialias_redirect (omp_get_partition_num_places) ++ialias_redirect (omp_get_partition_place_nums) + ialias_redirect (omp_set_default_device) + ialias_redirect (omp_get_default_device) + ialias_redirect (omp_get_num_devices) + ialias_redirect (omp_get_num_teams) + ialias_redirect (omp_get_team_num) + ialias_redirect (omp_is_initial_device) ++ialias_redirect (omp_get_initial_device) ++ialias_redirect (omp_get_max_task_priority) + #endif + + #ifndef LIBGOMP_GNU_SYMBOL_VERSIONING +@@ -342,35 +350,35 @@ omp_get_wtime_ (void) + } + + void +-omp_set_schedule_ (const int32_t *kind, const int32_t *modifier) ++omp_set_schedule_ (const int32_t *kind, const int32_t *chunk_size) + { +- omp_set_schedule (*kind, *modifier); ++ omp_set_schedule (*kind, *chunk_size); + } + + void +-omp_set_schedule_8_ (const int32_t *kind, const int64_t *modifier) ++omp_set_schedule_8_ (const int32_t *kind, const int64_t *chunk_size) + { +- omp_set_schedule (*kind, TO_INT (*modifier)); ++ omp_set_schedule (*kind, TO_INT (*chunk_size)); + } + + void +-omp_get_schedule_ (int32_t *kind, int32_t *modifier) ++omp_get_schedule_ (int32_t *kind, int32_t *chunk_size) + { + omp_sched_t k; +- int m; +- omp_get_schedule (&k, &m); ++ int cs; ++ omp_get_schedule (&k, &cs); + *kind = k; +- *modifier = m; ++ *chunk_size = cs; + } + + void +-omp_get_schedule_8_ (int32_t *kind, int64_t *modifier) ++omp_get_schedule_8_ (int32_t *kind, int64_t *chunk_size) + { + omp_sched_t k; +- int m; +- omp_get_schedule (&k, &m); ++ int cs; ++ omp_get_schedule (&k, &cs); + *kind = k; +- *modifier = m; ++ *chunk_size = cs; + } + + int32_t +@@ -451,6 +459,69 @@ omp_get_proc_bind_ (void) + return omp_get_proc_bind (); + } + ++int32_t ++omp_get_num_places_ (void) ++{ ++ return omp_get_num_places (); ++} ++ ++int32_t ++omp_get_place_num_procs_ (const int32_t *place_num) ++{ ++ return omp_get_place_num_procs (*place_num); ++} ++ ++int32_t ++omp_get_place_num_procs_8_ (const int64_t *place_num) ++{ ++ return omp_get_place_num_procs (TO_INT (*place_num)); ++} ++ ++void ++omp_get_place_proc_ids_ (const int32_t *place_num, int32_t *ids) ++{ ++ omp_get_place_proc_ids (*place_num, (int *) ids); ++} ++ ++void ++omp_get_place_proc_ids_8_ (const int64_t *place_num, int64_t *ids) ++{ ++ gomp_get_place_proc_ids_8 (TO_INT (*place_num), ids); ++} ++ ++int32_t ++omp_get_place_num_ (void) ++{ ++ return omp_get_place_num (); ++} ++ ++int32_t ++omp_get_partition_num_places_ (void) ++{ ++ return omp_get_partition_num_places (); ++} ++ ++void ++omp_get_partition_place_nums_ (int32_t *place_nums) ++{ ++ omp_get_partition_place_nums ((int *) place_nums); ++} ++ ++void ++omp_get_partition_place_nums_8_ (int64_t *place_nums) ++{ ++ if (gomp_places_list == NULL) ++ return; ++ ++ struct gomp_thread *thr = gomp_thread (); ++ if (thr->place == 0) ++ gomp_init_affinity (); ++ ++ unsigned int i; ++ for (i = 0; i < thr->ts.place_partition_len; i++) ++ *place_nums++ = (int64_t) thr->ts.place_partition_off + i; ++} ++ + void + omp_set_default_device_ (const int32_t *device_num) + { +@@ -492,3 +563,15 @@ omp_is_initial_device_ (void) + { + return omp_is_initial_device (); + } ++ ++int32_t ++omp_get_initial_device_ (void) ++{ ++ return omp_get_initial_device (); ++} ++ ++int32_t ++omp_get_max_task_priority_ (void) ++{ ++ return omp_get_max_task_priority (); ++} +--- libgomp/libgomp.map.jj 2014-05-15 10:56:31.927533549 +0200 ++++ libgomp/libgomp.map 2016-07-13 16:57:04.434535373 +0200 +@@ -134,6 +134,36 @@ OMP_4.0 { + omp_is_initial_device_; + } OMP_3.1; + ++OMP_4.5 { ++ global: ++ omp_get_max_task_priority; ++ omp_get_max_task_priority_; ++ omp_get_num_places; ++ omp_get_num_places_; ++ omp_get_place_num_procs; ++ omp_get_place_num_procs_; ++ omp_get_place_num_procs_8_; ++ omp_get_place_proc_ids; ++ omp_get_place_proc_ids_; ++ omp_get_place_proc_ids_8_; ++ omp_get_place_num; ++ omp_get_place_num_; ++ omp_get_partition_num_places; ++ omp_get_partition_num_places_; ++ omp_get_partition_place_nums; ++ omp_get_partition_place_nums_; ++ omp_get_partition_place_nums_8_; ++ omp_get_initial_device; ++ omp_get_initial_device_; ++ omp_target_alloc; ++ omp_target_free; ++ omp_target_is_present; ++ omp_target_memcpy; ++ omp_target_memcpy_rect; ++ omp_target_associate_ptr; ++ omp_target_disassociate_ptr; ++} OMP_4.0; ++ + GOMP_1.0 { + global: + GOMP_atomic_end; +@@ -227,3 +257,158 @@ GOMP_4.0 { + GOMP_target_update; + GOMP_teams; + } GOMP_3.0; ++ ++GOMP_4.0.1 { ++ global: ++ GOMP_offload_register; ++ GOMP_offload_unregister; ++} GOMP_4.0; ++ ++GOMP_4.5 { ++ global: ++ GOMP_target_ext; ++ GOMP_target_data_ext; ++ GOMP_target_update_ext; ++ GOMP_target_enter_exit_data; ++ GOMP_taskloop; ++ GOMP_taskloop_ull; ++ GOMP_offload_register_ver; ++ GOMP_offload_unregister_ver; ++ GOMP_loop_doacross_dynamic_start; ++ GOMP_loop_doacross_guided_start; ++ GOMP_loop_doacross_runtime_start; ++ GOMP_loop_doacross_static_start; ++ GOMP_doacross_post; ++ GOMP_doacross_wait; ++ GOMP_loop_ull_doacross_dynamic_start; ++ GOMP_loop_ull_doacross_guided_start; ++ GOMP_loop_ull_doacross_runtime_start; ++ GOMP_loop_ull_doacross_static_start; ++ GOMP_doacross_ull_post; ++ GOMP_doacross_ull_wait; ++ GOMP_loop_nonmonotonic_dynamic_next; ++ GOMP_loop_nonmonotonic_dynamic_start; ++ GOMP_loop_nonmonotonic_guided_next; ++ GOMP_loop_nonmonotonic_guided_start; ++ GOMP_loop_ull_nonmonotonic_dynamic_next; ++ GOMP_loop_ull_nonmonotonic_dynamic_start; ++ GOMP_loop_ull_nonmonotonic_guided_next; ++ GOMP_loop_ull_nonmonotonic_guided_start; ++ GOMP_parallel_loop_nonmonotonic_dynamic; ++ GOMP_parallel_loop_nonmonotonic_guided; ++} GOMP_4.0.1; ++ ++OACC_2.0 { ++ global: ++ acc_get_num_devices; ++ acc_get_num_devices_h_; ++ acc_set_device_type; ++ acc_set_device_type_h_; ++ acc_get_device_type; ++ acc_get_device_type_h_; ++ acc_set_device_num; ++ acc_set_device_num_h_; ++ acc_get_device_num; ++ acc_get_device_num_h_; ++ acc_async_test; ++ acc_async_test_h_; ++ acc_async_test_all; ++ acc_async_test_all_h_; ++ acc_wait; ++ acc_wait_h_; ++ acc_wait_async; ++ acc_wait_async_h_; ++ acc_wait_all; ++ acc_wait_all_h_; ++ acc_wait_all_async; ++ acc_wait_all_async_h_; ++ acc_init; ++ acc_init_h_; ++ acc_shutdown; ++ acc_shutdown_h_; ++ acc_on_device; ++ acc_on_device_h_; ++ acc_malloc; ++ acc_free; ++ acc_copyin; ++ acc_copyin_32_h_; ++ acc_copyin_64_h_; ++ acc_copyin_array_h_; ++ acc_present_or_copyin; ++ acc_present_or_copyin_32_h_; ++ acc_present_or_copyin_64_h_; ++ acc_present_or_copyin_array_h_; ++ acc_create; ++ acc_create_32_h_; ++ acc_create_64_h_; ++ acc_create_array_h_; ++ acc_present_or_create; ++ acc_present_or_create_32_h_; ++ acc_present_or_create_64_h_; ++ acc_present_or_create_array_h_; ++ acc_copyout; ++ acc_copyout_32_h_; ++ acc_copyout_64_h_; ++ acc_copyout_array_h_; ++ acc_delete; ++ acc_delete_32_h_; ++ acc_delete_64_h_; ++ acc_delete_array_h_; ++ acc_update_device; ++ acc_update_device_32_h_; ++ acc_update_device_64_h_; ++ acc_update_device_array_h_; ++ acc_update_self; ++ acc_update_self_32_h_; ++ acc_update_self_64_h_; ++ acc_update_self_array_h_; ++ acc_map_data; ++ acc_unmap_data; ++ acc_deviceptr; ++ acc_hostptr; ++ acc_is_present; ++ acc_is_present_32_h_; ++ acc_is_present_64_h_; ++ acc_is_present_array_h_; ++ acc_memcpy_to_device; ++ acc_memcpy_from_device; ++ acc_get_current_cuda_device; ++ acc_get_current_cuda_context; ++ acc_get_cuda_stream; ++ acc_set_cuda_stream; ++}; ++ ++GOACC_2.0 { ++ global: ++ GOACC_data_end; ++ GOACC_data_start; ++ GOACC_enter_exit_data; ++ GOACC_parallel; ++ GOACC_update; ++ GOACC_wait; ++ GOACC_get_thread_num; ++ GOACC_get_num_threads; ++}; ++ ++GOACC_2.0.1 { ++ global: ++ GOACC_declare; ++ GOACC_parallel_keyed; ++} GOACC_2.0; ++ ++GOMP_PLUGIN_1.0 { ++ global: ++ GOMP_PLUGIN_malloc; ++ GOMP_PLUGIN_malloc_cleared; ++ GOMP_PLUGIN_realloc; ++ GOMP_PLUGIN_debug; ++ GOMP_PLUGIN_error; ++ GOMP_PLUGIN_fatal; ++ GOMP_PLUGIN_async_unmap_vars; ++ GOMP_PLUGIN_acc_thread; ++}; ++ ++GOMP_PLUGIN_1.1 { ++ global: ++ GOMP_PLUGIN_target_task_completion; ++} GOMP_PLUGIN_1.0; +--- libgomp/ordered.c.jj 2013-01-21 16:00:46.137873657 +0100 ++++ libgomp/ordered.c 2016-07-13 16:57:18.918355780 +0200 +@@ -25,6 +25,9 @@ + /* This file handles the ORDERED construct. */ + + #include "libgomp.h" ++#include ++#include ++#include "doacross.h" + + + /* This function is called when first allocating an iteration block. That +@@ -249,3 +252,533 @@ void + GOMP_ordered_end (void) + { + } ++ ++/* DOACROSS initialization. */ ++ ++#define MAX_COLLAPSED_BITS (__SIZEOF_LONG__ * __CHAR_BIT__) ++ ++void ++gomp_doacross_init (unsigned ncounts, long *counts, long chunk_size) ++{ ++ struct gomp_thread *thr = gomp_thread (); ++ struct gomp_team *team = thr->ts.team; ++ struct gomp_work_share *ws = thr->ts.work_share; ++ unsigned int i, bits[MAX_COLLAPSED_BITS], num_bits = 0; ++ unsigned long ent, num_ents, elt_sz, shift_sz; ++ struct gomp_doacross_work_share *doacross; ++ ++ if (team == NULL || team->nthreads == 1) ++ return; ++ ++ for (i = 0; i < ncounts; i++) ++ { ++ /* If any count is 0, GOMP_doacross_{post,wait} can't be called. */ ++ if (counts[i] == 0) ++ return; ++ ++ if (num_bits <= MAX_COLLAPSED_BITS) ++ { ++ unsigned int this_bits; ++ if (counts[i] == 1) ++ this_bits = 1; ++ else ++ this_bits = __SIZEOF_LONG__ * __CHAR_BIT__ ++ - __builtin_clzl (counts[i] - 1); ++ if (num_bits + this_bits <= MAX_COLLAPSED_BITS) ++ { ++ bits[i] = this_bits; ++ num_bits += this_bits; ++ } ++ else ++ num_bits = MAX_COLLAPSED_BITS + 1; ++ } ++ } ++ ++ if (ws->sched == GFS_STATIC) ++ num_ents = team->nthreads; ++ else if (ws->sched == GFS_GUIDED) ++ num_ents = counts[0]; ++ else ++ num_ents = (counts[0] - 1) / chunk_size + 1; ++ if (num_bits <= MAX_COLLAPSED_BITS) ++ { ++ elt_sz = sizeof (unsigned long); ++ shift_sz = ncounts * sizeof (unsigned int); ++ } ++ else ++ { ++ elt_sz = sizeof (unsigned long) * ncounts; ++ shift_sz = 0; ++ } ++ elt_sz = (elt_sz + 63) & ~63UL; ++ ++ doacross = gomp_malloc (sizeof (*doacross) + 63 + num_ents * elt_sz ++ + shift_sz); ++ doacross->chunk_size = chunk_size; ++ doacross->elt_sz = elt_sz; ++ doacross->ncounts = ncounts; ++ doacross->flattened = false; ++ doacross->array = (unsigned char *) ++ ((((uintptr_t) (doacross + 1)) + 63 + shift_sz) ++ & ~(uintptr_t) 63); ++ if (num_bits <= MAX_COLLAPSED_BITS) ++ { ++ unsigned int shift_count = 0; ++ doacross->flattened = true; ++ for (i = ncounts; i > 0; i--) ++ { ++ doacross->shift_counts[i - 1] = shift_count; ++ shift_count += bits[i - 1]; ++ } ++ for (ent = 0; ent < num_ents; ent++) ++ *(unsigned long *) (doacross->array + ent * elt_sz) = 0; ++ } ++ else ++ for (ent = 0; ent < num_ents; ent++) ++ memset (doacross->array + ent * elt_sz, '\0', ++ sizeof (unsigned long) * ncounts); ++ if (ws->sched == GFS_STATIC && chunk_size == 0) ++ { ++ unsigned long q = counts[0] / num_ents; ++ unsigned long t = counts[0] % num_ents; ++ doacross->boundary = t * (q + 1); ++ doacross->q = q; ++ doacross->t = t; ++ } ++ ws->doacross = doacross; ++} ++ ++/* DOACROSS POST operation. */ ++ ++void ++GOMP_doacross_post (long *counts) ++{ ++ struct gomp_thread *thr = gomp_thread (); ++ struct gomp_work_share *ws = thr->ts.work_share; ++ struct gomp_doacross_work_share *doacross = ws->doacross; ++ unsigned long ent; ++ unsigned int i; ++ ++ if (__builtin_expect (doacross == NULL, 0)) ++ { ++ __sync_synchronize (); ++ return; ++ } ++ ++ if (__builtin_expect (ws->sched == GFS_STATIC, 1)) ++ ent = thr->ts.team_id; ++ else if (ws->sched == GFS_GUIDED) ++ ent = counts[0]; ++ else ++ ent = counts[0] / doacross->chunk_size; ++ unsigned long *array = (unsigned long *) (doacross->array ++ + ent * doacross->elt_sz); ++ ++ if (__builtin_expect (doacross->flattened, 1)) ++ { ++ unsigned long flattened ++ = (unsigned long) counts[0] << doacross->shift_counts[0]; ++ ++ for (i = 1; i < doacross->ncounts; i++) ++ flattened |= (unsigned long) counts[i] ++ << doacross->shift_counts[i]; ++ flattened++; ++ if (flattened == __atomic_load_n (array, MEMMODEL_ACQUIRE)) ++ __atomic_thread_fence (MEMMODEL_RELEASE); ++ else ++ __atomic_store_n (array, flattened, MEMMODEL_RELEASE); ++ return; ++ } ++ ++ __atomic_thread_fence (MEMMODEL_ACQUIRE); ++ for (i = doacross->ncounts; i-- > 0; ) ++ { ++ if (counts[i] + 1UL != __atomic_load_n (&array[i], MEMMODEL_RELAXED)) ++ __atomic_store_n (&array[i], counts[i] + 1UL, MEMMODEL_RELEASE); ++ } ++} ++ ++/* DOACROSS WAIT operation. */ ++ ++void ++GOMP_doacross_wait (long first, ...) ++{ ++ struct gomp_thread *thr = gomp_thread (); ++ struct gomp_work_share *ws = thr->ts.work_share; ++ struct gomp_doacross_work_share *doacross = ws->doacross; ++ va_list ap; ++ unsigned long ent; ++ unsigned int i; ++ ++ if (__builtin_expect (doacross == NULL, 0)) ++ { ++ __sync_synchronize (); ++ return; ++ } ++ ++ if (__builtin_expect (ws->sched == GFS_STATIC, 1)) ++ { ++ if (ws->chunk_size == 0) ++ { ++ if (first < doacross->boundary) ++ ent = first / (doacross->q + 1); ++ else ++ ent = (first - doacross->boundary) / doacross->q ++ + doacross->t; ++ } ++ else ++ ent = first / ws->chunk_size % thr->ts.team->nthreads; ++ } ++ else if (ws->sched == GFS_GUIDED) ++ ent = first; ++ else ++ ent = first / doacross->chunk_size; ++ unsigned long *array = (unsigned long *) (doacross->array ++ + ent * doacross->elt_sz); ++ ++ if (__builtin_expect (doacross->flattened, 1)) ++ { ++ unsigned long flattened ++ = (unsigned long) first << doacross->shift_counts[0]; ++ unsigned long cur; ++ ++ va_start (ap, first); ++ for (i = 1; i < doacross->ncounts; i++) ++ flattened |= (unsigned long) va_arg (ap, long) ++ << doacross->shift_counts[i]; ++ cur = __atomic_load_n (array, MEMMODEL_ACQUIRE); ++ if (flattened < cur) ++ { ++ __atomic_thread_fence (MEMMODEL_RELEASE); ++ va_end (ap); ++ return; ++ } ++ doacross_spin (array, flattened, cur); ++ __atomic_thread_fence (MEMMODEL_RELEASE); ++ va_end (ap); ++ return; ++ } ++ ++ do ++ { ++ va_start (ap, first); ++ for (i = 0; i < doacross->ncounts; i++) ++ { ++ unsigned long thisv ++ = (unsigned long) (i ? va_arg (ap, long) : first) + 1; ++ unsigned long cur = __atomic_load_n (&array[i], MEMMODEL_RELAXED); ++ if (thisv < cur) ++ { ++ i = doacross->ncounts; ++ break; ++ } ++ if (thisv > cur) ++ break; ++ } ++ va_end (ap); ++ if (i == doacross->ncounts) ++ break; ++ cpu_relax (); ++ } ++ while (1); ++ __sync_synchronize (); ++} ++ ++typedef unsigned long long gomp_ull; ++ ++void ++gomp_doacross_ull_init (unsigned ncounts, gomp_ull *counts, gomp_ull chunk_size) ++{ ++ struct gomp_thread *thr = gomp_thread (); ++ struct gomp_team *team = thr->ts.team; ++ struct gomp_work_share *ws = thr->ts.work_share; ++ unsigned int i, bits[MAX_COLLAPSED_BITS], num_bits = 0; ++ unsigned long ent, num_ents, elt_sz, shift_sz; ++ struct gomp_doacross_work_share *doacross; ++ ++ if (team == NULL || team->nthreads == 1) ++ return; ++ ++ for (i = 0; i < ncounts; i++) ++ { ++ /* If any count is 0, GOMP_doacross_{post,wait} can't be called. */ ++ if (counts[i] == 0) ++ return; ++ ++ if (num_bits <= MAX_COLLAPSED_BITS) ++ { ++ unsigned int this_bits; ++ if (counts[i] == 1) ++ this_bits = 1; ++ else ++ this_bits = __SIZEOF_LONG_LONG__ * __CHAR_BIT__ ++ - __builtin_clzll (counts[i] - 1); ++ if (num_bits + this_bits <= MAX_COLLAPSED_BITS) ++ { ++ bits[i] = this_bits; ++ num_bits += this_bits; ++ } ++ else ++ num_bits = MAX_COLLAPSED_BITS + 1; ++ } ++ } ++ ++ if (ws->sched == GFS_STATIC) ++ num_ents = team->nthreads; ++ else if (ws->sched == GFS_GUIDED) ++ num_ents = counts[0]; ++ else ++ num_ents = (counts[0] - 1) / chunk_size + 1; ++ if (num_bits <= MAX_COLLAPSED_BITS) ++ { ++ elt_sz = sizeof (unsigned long); ++ shift_sz = ncounts * sizeof (unsigned int); ++ } ++ else ++ { ++ if (sizeof (gomp_ull) == sizeof (unsigned long)) ++ elt_sz = sizeof (gomp_ull) * ncounts; ++ else if (sizeof (gomp_ull) == 2 * sizeof (unsigned long)) ++ elt_sz = sizeof (unsigned long) * 2 * ncounts; ++ else ++ abort (); ++ shift_sz = 0; ++ } ++ elt_sz = (elt_sz + 63) & ~63UL; ++ ++ doacross = gomp_malloc (sizeof (*doacross) + 63 + num_ents * elt_sz ++ + shift_sz); ++ doacross->chunk_size_ull = chunk_size; ++ doacross->elt_sz = elt_sz; ++ doacross->ncounts = ncounts; ++ doacross->flattened = false; ++ doacross->boundary = 0; ++ doacross->array = (unsigned char *) ++ ((((uintptr_t) (doacross + 1)) + 63 + shift_sz) ++ & ~(uintptr_t) 63); ++ if (num_bits <= MAX_COLLAPSED_BITS) ++ { ++ unsigned int shift_count = 0; ++ doacross->flattened = true; ++ for (i = ncounts; i > 0; i--) ++ { ++ doacross->shift_counts[i - 1] = shift_count; ++ shift_count += bits[i - 1]; ++ } ++ for (ent = 0; ent < num_ents; ent++) ++ *(unsigned long *) (doacross->array + ent * elt_sz) = 0; ++ } ++ else ++ for (ent = 0; ent < num_ents; ent++) ++ memset (doacross->array + ent * elt_sz, '\0', ++ sizeof (unsigned long) * ncounts); ++ if (ws->sched == GFS_STATIC && chunk_size == 0) ++ { ++ gomp_ull q = counts[0] / num_ents; ++ gomp_ull t = counts[0] % num_ents; ++ doacross->boundary_ull = t * (q + 1); ++ doacross->q_ull = q; ++ doacross->t = t; ++ } ++ ws->doacross = doacross; ++} ++ ++/* DOACROSS POST operation. */ ++ ++void ++GOMP_doacross_ull_post (gomp_ull *counts) ++{ ++ struct gomp_thread *thr = gomp_thread (); ++ struct gomp_work_share *ws = thr->ts.work_share; ++ struct gomp_doacross_work_share *doacross = ws->doacross; ++ unsigned long ent; ++ unsigned int i; ++ ++ if (__builtin_expect (doacross == NULL, 0)) ++ { ++ __sync_synchronize (); ++ return; ++ } ++ ++ if (__builtin_expect (ws->sched == GFS_STATIC, 1)) ++ ent = thr->ts.team_id; ++ else if (ws->sched == GFS_GUIDED) ++ ent = counts[0]; ++ else ++ ent = counts[0] / doacross->chunk_size_ull; ++ ++ if (__builtin_expect (doacross->flattened, 1)) ++ { ++ unsigned long *array = (unsigned long *) (doacross->array ++ + ent * doacross->elt_sz); ++ gomp_ull flattened ++ = counts[0] << doacross->shift_counts[0]; ++ ++ for (i = 1; i < doacross->ncounts; i++) ++ flattened |= counts[i] << doacross->shift_counts[i]; ++ flattened++; ++ if (flattened == __atomic_load_n (array, MEMMODEL_ACQUIRE)) ++ __atomic_thread_fence (MEMMODEL_RELEASE); ++ else ++ __atomic_store_n (array, flattened, MEMMODEL_RELEASE); ++ return; ++ } ++ ++ __atomic_thread_fence (MEMMODEL_ACQUIRE); ++ if (sizeof (gomp_ull) == sizeof (unsigned long)) ++ { ++ gomp_ull *array = (gomp_ull *) (doacross->array ++ + ent * doacross->elt_sz); ++ ++ for (i = doacross->ncounts; i-- > 0; ) ++ { ++ if (counts[i] + 1UL != __atomic_load_n (&array[i], MEMMODEL_RELAXED)) ++ __atomic_store_n (&array[i], counts[i] + 1UL, MEMMODEL_RELEASE); ++ } ++ } ++ else ++ { ++ unsigned long *array = (unsigned long *) (doacross->array ++ + ent * doacross->elt_sz); ++ ++ for (i = doacross->ncounts; i-- > 0; ) ++ { ++ gomp_ull cull = counts[i] + 1UL; ++ unsigned long c = (unsigned long) cull; ++ if (c != __atomic_load_n (&array[2 * i + 1], MEMMODEL_RELAXED)) ++ __atomic_store_n (&array[2 * i + 1], c, MEMMODEL_RELEASE); ++ c = cull >> (__SIZEOF_LONG_LONG__ * __CHAR_BIT__ / 2); ++ if (c != __atomic_load_n (&array[2 * i], MEMMODEL_RELAXED)) ++ __atomic_store_n (&array[2 * i], c, MEMMODEL_RELEASE); ++ } ++ } ++} ++ ++/* DOACROSS WAIT operation. */ ++ ++void ++GOMP_doacross_ull_wait (gomp_ull first, ...) ++{ ++ struct gomp_thread *thr = gomp_thread (); ++ struct gomp_work_share *ws = thr->ts.work_share; ++ struct gomp_doacross_work_share *doacross = ws->doacross; ++ va_list ap; ++ unsigned long ent; ++ unsigned int i; ++ ++ if (__builtin_expect (doacross == NULL, 0)) ++ { ++ __sync_synchronize (); ++ return; ++ } ++ ++ if (__builtin_expect (ws->sched == GFS_STATIC, 1)) ++ { ++ if (ws->chunk_size_ull == 0) ++ { ++ if (first < doacross->boundary_ull) ++ ent = first / (doacross->q_ull + 1); ++ else ++ ent = (first - doacross->boundary_ull) / doacross->q_ull ++ + doacross->t; ++ } ++ else ++ ent = first / ws->chunk_size_ull % thr->ts.team->nthreads; ++ } ++ else if (ws->sched == GFS_GUIDED) ++ ent = first; ++ else ++ ent = first / doacross->chunk_size_ull; ++ ++ if (__builtin_expect (doacross->flattened, 1)) ++ { ++ unsigned long *array = (unsigned long *) (doacross->array ++ + ent * doacross->elt_sz); ++ gomp_ull flattened = first << doacross->shift_counts[0]; ++ unsigned long cur; ++ ++ va_start (ap, first); ++ for (i = 1; i < doacross->ncounts; i++) ++ flattened |= va_arg (ap, gomp_ull) ++ << doacross->shift_counts[i]; ++ cur = __atomic_load_n (array, MEMMODEL_ACQUIRE); ++ if (flattened < cur) ++ { ++ __atomic_thread_fence (MEMMODEL_RELEASE); ++ va_end (ap); ++ return; ++ } ++ doacross_spin (array, flattened, cur); ++ __atomic_thread_fence (MEMMODEL_RELEASE); ++ va_end (ap); ++ return; ++ } ++ ++ if (sizeof (gomp_ull) == sizeof (unsigned long)) ++ { ++ gomp_ull *array = (gomp_ull *) (doacross->array ++ + ent * doacross->elt_sz); ++ do ++ { ++ va_start (ap, first); ++ for (i = 0; i < doacross->ncounts; i++) ++ { ++ gomp_ull thisv ++ = (i ? va_arg (ap, gomp_ull) : first) + 1; ++ gomp_ull cur = __atomic_load_n (&array[i], MEMMODEL_RELAXED); ++ if (thisv < cur) ++ { ++ i = doacross->ncounts; ++ break; ++ } ++ if (thisv > cur) ++ break; ++ } ++ va_end (ap); ++ if (i == doacross->ncounts) ++ break; ++ cpu_relax (); ++ } ++ while (1); ++ } ++ else ++ { ++ unsigned long *array = (unsigned long *) (doacross->array ++ + ent * doacross->elt_sz); ++ do ++ { ++ va_start (ap, first); ++ for (i = 0; i < doacross->ncounts; i++) ++ { ++ gomp_ull thisv ++ = (i ? va_arg (ap, gomp_ull) : first) + 1; ++ unsigned long t ++ = thisv >> (__SIZEOF_LONG_LONG__ * __CHAR_BIT__ / 2); ++ unsigned long cur ++ = __atomic_load_n (&array[2 * i], MEMMODEL_RELAXED); ++ if (t < cur) ++ { ++ i = doacross->ncounts; ++ break; ++ } ++ if (t > cur) ++ break; ++ t = thisv; ++ cur = __atomic_load_n (&array[2 * i + 1], MEMMODEL_RELAXED); ++ if (t < cur) ++ { ++ i = doacross->ncounts; ++ break; ++ } ++ if (t > cur) ++ break; ++ } ++ va_end (ap); ++ if (i == doacross->ncounts) ++ break; ++ cpu_relax (); ++ } ++ while (1); ++ } ++ __sync_synchronize (); ++} +--- libgomp/loop.c.jj 2014-05-15 10:56:36.487505570 +0200 ++++ libgomp/loop.c 2016-07-13 16:57:13.488423109 +0200 +@@ -110,6 +110,11 @@ gomp_loop_static_start (long start, long + return !gomp_iter_static_next (istart, iend); + } + ++/* The current dynamic implementation is always monotonic. The ++ entrypoints without nonmonotonic in them have to be always monotonic, ++ but the nonmonotonic ones could be changed to use work-stealing for ++ improved scalability. */ ++ + static bool + gomp_loop_dynamic_start (long start, long end, long incr, long chunk_size, + long *istart, long *iend) +@@ -135,6 +140,9 @@ gomp_loop_dynamic_start (long start, lon + return ret; + } + ++/* Similarly as for dynamic, though the question is how can the chunk sizes ++ be decreased without a central locking or atomics. */ ++ + static bool + gomp_loop_guided_start (long start, long end, long incr, long chunk_size, + long *istart, long *iend) +@@ -168,13 +176,16 @@ GOMP_loop_runtime_start (long start, lon + switch (icv->run_sched_var) + { + case GFS_STATIC: +- return gomp_loop_static_start (start, end, incr, icv->run_sched_modifier, ++ return gomp_loop_static_start (start, end, incr, ++ icv->run_sched_chunk_size, + istart, iend); + case GFS_DYNAMIC: +- return gomp_loop_dynamic_start (start, end, incr, icv->run_sched_modifier, ++ return gomp_loop_dynamic_start (start, end, incr, ++ icv->run_sched_chunk_size, + istart, iend); + case GFS_GUIDED: +- return gomp_loop_guided_start (start, end, incr, icv->run_sched_modifier, ++ return gomp_loop_guided_start (start, end, incr, ++ icv->run_sched_chunk_size, + istart, iend); + case GFS_AUTO: + /* For now map to schedule(static), later on we could play with feedback +@@ -265,15 +276,15 @@ GOMP_loop_ordered_runtime_start (long st + { + case GFS_STATIC: + return gomp_loop_ordered_static_start (start, end, incr, +- icv->run_sched_modifier, ++ icv->run_sched_chunk_size, + istart, iend); + case GFS_DYNAMIC: + return gomp_loop_ordered_dynamic_start (start, end, incr, +- icv->run_sched_modifier, ++ icv->run_sched_chunk_size, + istart, iend); + case GFS_GUIDED: + return gomp_loop_ordered_guided_start (start, end, incr, +- icv->run_sched_modifier, ++ icv->run_sched_chunk_size, + istart, iend); + case GFS_AUTO: + /* For now map to schedule(static), later on we could play with feedback +@@ -285,6 +296,111 @@ GOMP_loop_ordered_runtime_start (long st + } + } + ++/* The *_doacross_*_start routines are similar. The only difference is that ++ this work-share construct is initialized to expect an ORDERED(N) - DOACROSS ++ section, and the worksharing loop iterates always from 0 to COUNTS[0] - 1 ++ and other COUNTS array elements tell the library number of iterations ++ in the ordered inner loops. */ ++ ++static bool ++gomp_loop_doacross_static_start (unsigned ncounts, long *counts, ++ long chunk_size, long *istart, long *iend) ++{ ++ struct gomp_thread *thr = gomp_thread (); ++ ++ thr->ts.static_trip = 0; ++ if (gomp_work_share_start (false)) ++ { ++ gomp_loop_init (thr->ts.work_share, 0, counts[0], 1, ++ GFS_STATIC, chunk_size); ++ gomp_doacross_init (ncounts, counts, chunk_size); ++ gomp_work_share_init_done (); ++ } ++ ++ return !gomp_iter_static_next (istart, iend); ++} ++ ++static bool ++gomp_loop_doacross_dynamic_start (unsigned ncounts, long *counts, ++ long chunk_size, long *istart, long *iend) ++{ ++ struct gomp_thread *thr = gomp_thread (); ++ bool ret; ++ ++ if (gomp_work_share_start (false)) ++ { ++ gomp_loop_init (thr->ts.work_share, 0, counts[0], 1, ++ GFS_DYNAMIC, chunk_size); ++ gomp_doacross_init (ncounts, counts, chunk_size); ++ gomp_work_share_init_done (); ++ } ++ ++#ifdef HAVE_SYNC_BUILTINS ++ ret = gomp_iter_dynamic_next (istart, iend); ++#else ++ gomp_mutex_lock (&thr->ts.work_share->lock); ++ ret = gomp_iter_dynamic_next_locked (istart, iend); ++ gomp_mutex_unlock (&thr->ts.work_share->lock); ++#endif ++ ++ return ret; ++} ++ ++static bool ++gomp_loop_doacross_guided_start (unsigned ncounts, long *counts, ++ long chunk_size, long *istart, long *iend) ++{ ++ struct gomp_thread *thr = gomp_thread (); ++ bool ret; ++ ++ if (gomp_work_share_start (false)) ++ { ++ gomp_loop_init (thr->ts.work_share, 0, counts[0], 1, ++ GFS_GUIDED, chunk_size); ++ gomp_doacross_init (ncounts, counts, chunk_size); ++ gomp_work_share_init_done (); ++ } ++ ++#ifdef HAVE_SYNC_BUILTINS ++ ret = gomp_iter_guided_next (istart, iend); ++#else ++ gomp_mutex_lock (&thr->ts.work_share->lock); ++ ret = gomp_iter_guided_next_locked (istart, iend); ++ gomp_mutex_unlock (&thr->ts.work_share->lock); ++#endif ++ ++ return ret; ++} ++ ++bool ++GOMP_loop_doacross_runtime_start (unsigned ncounts, long *counts, ++ long *istart, long *iend) ++{ ++ struct gomp_task_icv *icv = gomp_icv (false); ++ switch (icv->run_sched_var) ++ { ++ case GFS_STATIC: ++ return gomp_loop_doacross_static_start (ncounts, counts, ++ icv->run_sched_chunk_size, ++ istart, iend); ++ case GFS_DYNAMIC: ++ return gomp_loop_doacross_dynamic_start (ncounts, counts, ++ icv->run_sched_chunk_size, ++ istart, iend); ++ case GFS_GUIDED: ++ return gomp_loop_doacross_guided_start (ncounts, counts, ++ icv->run_sched_chunk_size, ++ istart, iend); ++ case GFS_AUTO: ++ /* For now map to schedule(static), later on we could play with feedback ++ driven choice. */ ++ return gomp_loop_doacross_static_start (ncounts, counts, ++ 0, istart, iend); ++ default: ++ abort (); ++ } ++} ++ + /* The *_next routines are called when the thread completes processing of + the iteration block currently assigned to it. If the work-share + construct is bound directly to a parallel construct, then the iteration +@@ -483,7 +599,7 @@ GOMP_parallel_loop_runtime_start (void ( + { + struct gomp_task_icv *icv = gomp_icv (false); + gomp_parallel_loop_start (fn, data, num_threads, start, end, incr, +- icv->run_sched_var, icv->run_sched_modifier, 0); ++ icv->run_sched_var, icv->run_sched_chunk_size, 0); + } + + ialias_redirect (GOMP_parallel_end) +@@ -521,6 +637,37 @@ GOMP_parallel_loop_guided (void (*fn) (v + GOMP_parallel_end (); + } + ++#ifdef HAVE_ATTRIBUTE_ALIAS ++extern __typeof(GOMP_parallel_loop_dynamic) GOMP_parallel_loop_nonmonotonic_dynamic ++ __attribute__((alias ("GOMP_parallel_loop_dynamic"))); ++extern __typeof(GOMP_parallel_loop_guided) GOMP_parallel_loop_nonmonotonic_guided ++ __attribute__((alias ("GOMP_parallel_loop_guided"))); ++#else ++void ++GOMP_parallel_loop_nonmonotonic_dynamic (void (*fn) (void *), void *data, ++ unsigned num_threads, long start, ++ long end, long incr, long chunk_size, ++ unsigned flags) ++{ ++ gomp_parallel_loop_start (fn, data, num_threads, start, end, incr, ++ GFS_DYNAMIC, chunk_size, flags); ++ fn (data); ++ GOMP_parallel_end (); ++} ++ ++void ++GOMP_parallel_loop_nonmonotonic_guided (void (*fn) (void *), void *data, ++ unsigned num_threads, long start, ++ long end, long incr, long chunk_size, ++ unsigned flags) ++{ ++ gomp_parallel_loop_start (fn, data, num_threads, start, end, incr, ++ GFS_GUIDED, chunk_size, flags); ++ fn (data); ++ GOMP_parallel_end (); ++} ++#endif ++ + void + GOMP_parallel_loop_runtime (void (*fn) (void *), void *data, + unsigned num_threads, long start, long end, +@@ -528,7 +675,7 @@ GOMP_parallel_loop_runtime (void (*fn) ( + { + struct gomp_task_icv *icv = gomp_icv (false); + gomp_parallel_loop_start (fn, data, num_threads, start, end, incr, +- icv->run_sched_var, icv->run_sched_modifier, ++ icv->run_sched_var, icv->run_sched_chunk_size, + flags); + fn (data); + GOMP_parallel_end (); +@@ -569,6 +716,10 @@ extern __typeof(gomp_loop_dynamic_start) + __attribute__((alias ("gomp_loop_dynamic_start"))); + extern __typeof(gomp_loop_guided_start) GOMP_loop_guided_start + __attribute__((alias ("gomp_loop_guided_start"))); ++extern __typeof(gomp_loop_dynamic_start) GOMP_loop_nonmonotonic_dynamic_start ++ __attribute__((alias ("gomp_loop_dynamic_start"))); ++extern __typeof(gomp_loop_guided_start) GOMP_loop_nonmonotonic_guided_start ++ __attribute__((alias ("gomp_loop_guided_start"))); + + extern __typeof(gomp_loop_ordered_static_start) GOMP_loop_ordered_static_start + __attribute__((alias ("gomp_loop_ordered_static_start"))); +@@ -577,12 +728,23 @@ extern __typeof(gomp_loop_ordered_dynami + extern __typeof(gomp_loop_ordered_guided_start) GOMP_loop_ordered_guided_start + __attribute__((alias ("gomp_loop_ordered_guided_start"))); + ++extern __typeof(gomp_loop_doacross_static_start) GOMP_loop_doacross_static_start ++ __attribute__((alias ("gomp_loop_doacross_static_start"))); ++extern __typeof(gomp_loop_doacross_dynamic_start) GOMP_loop_doacross_dynamic_start ++ __attribute__((alias ("gomp_loop_doacross_dynamic_start"))); ++extern __typeof(gomp_loop_doacross_guided_start) GOMP_loop_doacross_guided_start ++ __attribute__((alias ("gomp_loop_doacross_guided_start"))); ++ + extern __typeof(gomp_loop_static_next) GOMP_loop_static_next + __attribute__((alias ("gomp_loop_static_next"))); + extern __typeof(gomp_loop_dynamic_next) GOMP_loop_dynamic_next + __attribute__((alias ("gomp_loop_dynamic_next"))); + extern __typeof(gomp_loop_guided_next) GOMP_loop_guided_next + __attribute__((alias ("gomp_loop_guided_next"))); ++extern __typeof(gomp_loop_dynamic_next) GOMP_loop_nonmonotonic_dynamic_next ++ __attribute__((alias ("gomp_loop_dynamic_next"))); ++extern __typeof(gomp_loop_guided_next) GOMP_loop_nonmonotonic_guided_next ++ __attribute__((alias ("gomp_loop_guided_next"))); + + extern __typeof(gomp_loop_ordered_static_next) GOMP_loop_ordered_static_next + __attribute__((alias ("gomp_loop_ordered_static_next"))); +@@ -613,6 +775,21 @@ GOMP_loop_guided_start (long start, long + } + + bool ++GOMP_loop_nonmonotonic_dynamic_start (long start, long end, long incr, ++ long chunk_size, long *istart, ++ long *iend) ++{ ++ return gomp_loop_dynamic_start (start, end, incr, chunk_size, istart, iend); ++} ++ ++bool ++GOMP_loop_nonmonotonic_guided_start (long start, long end, long incr, ++ long chunk_size, long *istart, long *iend) ++{ ++ return gomp_loop_guided_start (start, end, incr, chunk_size, istart, iend); ++} ++ ++bool + GOMP_loop_ordered_static_start (long start, long end, long incr, + long chunk_size, long *istart, long *iend) + { +@@ -637,6 +814,30 @@ GOMP_loop_ordered_guided_start (long sta + } + + bool ++GOMP_loop_doacross_static_start (unsigned ncounts, long *counts, ++ long chunk_size, long *istart, long *iend) ++{ ++ return gomp_loop_doacross_static_start (ncounts, counts, chunk_size, ++ istart, iend); ++} ++ ++bool ++GOMP_loop_doacross_dynamic_start (unsigned ncounts, long *counts, ++ long chunk_size, long *istart, long *iend) ++{ ++ return gomp_loop_doacross_dynamic_start (ncounts, counts, chunk_size, ++ istart, iend); ++} ++ ++bool ++GOMP_loop_doacross_guided_start (unsigned ncounts, long *counts, ++ long chunk_size, long *istart, long *iend) ++{ ++ return gomp_loop_doacross_guided_start (ncounts, counts, chunk_size, ++ istart, iend); ++} ++ ++bool + GOMP_loop_static_next (long *istart, long *iend) + { + return gomp_loop_static_next (istart, iend); +@@ -653,6 +854,18 @@ GOMP_loop_guided_next (long *istart, lon + { + return gomp_loop_guided_next (istart, iend); + } ++ ++bool ++GOMP_loop_nonmonotonic_dynamic_next (long *istart, long *iend) ++{ ++ return gomp_loop_dynamic_next (istart, iend); ++} ++ ++bool ++GOMP_loop_nonmonotonic_guided_next (long *istart, long *iend) ++{ ++ return gomp_loop_guided_next (istart, iend); ++} + + bool + GOMP_loop_ordered_static_next (long *istart, long *iend) +--- libgomp/error.c.jj 2013-01-21 16:00:31.834953566 +0100 ++++ libgomp/error.c 2016-07-13 16:57:04.437535335 +0200 +@@ -35,7 +35,26 @@ + #include + + +-static void ++#undef gomp_vdebug ++void ++gomp_vdebug (int kind __attribute__ ((unused)), const char *msg, va_list list) ++{ ++ if (gomp_debug_var) ++ vfprintf (stderr, msg, list); ++} ++ ++#undef gomp_debug ++void ++gomp_debug (int kind, const char *msg, ...) ++{ ++ va_list list; ++ ++ va_start (list, msg); ++ gomp_vdebug (kind, msg, list); ++ va_end (list); ++} ++ ++void + gomp_verror (const char *fmt, va_list list) + { + fputs ("\nlibgomp: ", stderr); +@@ -54,13 +73,18 @@ gomp_error (const char *fmt, ...) + } + + void ++gomp_vfatal (const char *fmt, va_list list) ++{ ++ gomp_verror (fmt, list); ++ exit (EXIT_FAILURE); ++} ++ ++void + gomp_fatal (const char *fmt, ...) + { + va_list list; + + va_start (list, fmt); +- gomp_verror (fmt, list); ++ gomp_vfatal (fmt, list); + va_end (list); +- +- exit (EXIT_FAILURE); + } +--- libgomp/Makefile.am.jj 2014-05-15 11:12:10.000000000 +0200 ++++ libgomp/Makefile.am 2016-07-14 16:10:51.968202878 +0200 +@@ -60,7 +60,13 @@ libgomp_la_LINK = $(LINK) $(libgomp_la_L + libgomp_la_SOURCES = alloc.c barrier.c critical.c env.c error.c iter.c \ + iter_ull.c loop.c loop_ull.c ordered.c parallel.c sections.c single.c \ + task.c team.c work.c lock.c mutex.c proc.c sem.c bar.c ptrlock.c \ +- time.c fortran.c affinity.c target.c ++ time.c fortran.c affinity.c target.c splay-tree.c libgomp-plugin.c \ ++ oacc-parallel.c oacc-host.c oacc-init.c oacc-mem.c oacc-async.c \ ++ oacc-plugin.c oacc-cuda.c priority_queue.c ++ ++if USE_FORTRAN ++libgomp_la_SOURCES += openacc.f90 ++endif + + nodist_noinst_HEADERS = libgomp_f.h + nodist_libsubinclude_HEADERS = omp.h +--- libgomp/Makefile.in.jj 2014-05-15 11:12:10.000000000 +0200 ++++ libgomp/Makefile.in 2016-07-14 16:11:10.981954087 +0200 +@@ -36,6 +36,7 @@ POST_UNINSTALL = : + build_triplet = @build@ + host_triplet = @host@ + target_triplet = @target@ ++@USE_FORTRAN_TRUE@am__append_1 = openacc.f90 + subdir = . + DIST_COMMON = ChangeLog $(srcdir)/Makefile.in $(srcdir)/Makefile.am \ + $(top_srcdir)/configure $(am__configure_deps) \ +@@ -92,11 +93,15 @@ am__installdirs = "$(DESTDIR)$(toolexecl + "$(DESTDIR)$(toolexeclibdir)" + LTLIBRARIES = $(toolexeclib_LTLIBRARIES) + libgomp_la_LIBADD = ++@USE_FORTRAN_TRUE@am__objects_1 = openacc.lo + am_libgomp_la_OBJECTS = alloc.lo barrier.lo critical.lo env.lo \ + error.lo iter.lo iter_ull.lo loop.lo loop_ull.lo ordered.lo \ + parallel.lo sections.lo single.lo task.lo team.lo work.lo \ + lock.lo mutex.lo proc.lo sem.lo bar.lo ptrlock.lo time.lo \ +- fortran.lo affinity.lo target.lo ++ fortran.lo affinity.lo target.lo splay-tree.lo \ ++ libgomp-plugin.lo oacc-parallel.lo oacc-host.lo oacc-init.lo \ ++ oacc-mem.lo oacc-async.lo oacc-plugin.lo oacc-cuda.lo \ ++ priority_queue.lo $(am__objects_1) + libgomp_la_OBJECTS = $(am_libgomp_la_OBJECTS) + DEFAULT_INCLUDES = -I.@am__isrc@ + depcomp = $(SHELL) $(top_srcdir)/../depcomp +@@ -108,6 +113,13 @@ LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIB + --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) + CCLD = $(CC) ++FCCOMPILE = $(FC) $(AM_FCFLAGS) $(FCFLAGS) ++LTFCCOMPILE = $(LIBTOOL) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ ++ --mode=compile $(FC) $(AM_FCFLAGS) $(FCFLAGS) ++FCLD = $(FC) ++FCLINK = $(LIBTOOL) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ ++ --mode=link $(FCLD) $(AM_FCFLAGS) $(FCFLAGS) $(AM_LDFLAGS) \ ++ $(LDFLAGS) -o $@ + SOURCES = $(libgomp_la_SOURCES) + MULTISRCTOP = + MULTIBUILDTOP = +@@ -315,10 +327,12 @@ libgomp_la_LDFLAGS = $(libgomp_version_i + libgomp_la_DEPENDENCIES = $(libgomp_version_dep) + libgomp_la_LINK = $(LINK) $(libgomp_la_LDFLAGS) + libgomp_la_SOURCES = alloc.c barrier.c critical.c env.c error.c iter.c \ +- iter_ull.c loop.c loop_ull.c ordered.c parallel.c sections.c single.c \ +- task.c team.c work.c lock.c mutex.c proc.c sem.c bar.c ptrlock.c \ +- time.c fortran.c affinity.c target.c +- ++ iter_ull.c loop.c loop_ull.c ordered.c parallel.c sections.c \ ++ single.c task.c team.c work.c lock.c mutex.c proc.c sem.c \ ++ bar.c ptrlock.c time.c fortran.c affinity.c target.c \ ++ splay-tree.c libgomp-plugin.c oacc-parallel.c oacc-host.c \ ++ oacc-init.c oacc-mem.c oacc-async.c oacc-plugin.c oacc-cuda.c \ ++ priority_queue.c $(am__append_1) + nodist_noinst_HEADERS = libgomp_f.h + nodist_libsubinclude_HEADERS = omp.h + @USE_FORTRAN_TRUE@nodist_finclude_HEADERS = omp_lib.h omp_lib.f90 omp_lib.mod omp_lib_kinds.mod +@@ -351,7 +365,7 @@ all: config.h + $(MAKE) $(AM_MAKEFLAGS) all-recursive + + .SUFFIXES: +-.SUFFIXES: .c .dvi .lo .o .obj .ps ++.SUFFIXES: .c .dvi .f90 .lo .o .obj .ps + am--refresh: + @: + $(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) +@@ -463,17 +477,27 @@ distclean-compile: + @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fortran.Plo@am__quote@ + @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/iter.Plo@am__quote@ + @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/iter_ull.Plo@am__quote@ ++@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libgomp-plugin.Plo@am__quote@ + @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lock.Plo@am__quote@ + @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/loop.Plo@am__quote@ + @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/loop_ull.Plo@am__quote@ + @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mutex.Plo@am__quote@ ++@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-async.Plo@am__quote@ ++@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-cuda.Plo@am__quote@ ++@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-host.Plo@am__quote@ ++@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-init.Plo@am__quote@ ++@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-mem.Plo@am__quote@ ++@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-parallel.Plo@am__quote@ ++@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-plugin.Plo@am__quote@ + @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ordered.Plo@am__quote@ + @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/parallel.Plo@am__quote@ ++@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/priority_queue.Plo@am__quote@ + @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/proc.Plo@am__quote@ + @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ptrlock.Plo@am__quote@ + @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sections.Plo@am__quote@ + @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sem.Plo@am__quote@ + @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/single.Plo@am__quote@ ++@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/splay-tree.Plo@am__quote@ + @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/target.Plo@am__quote@ + @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/task.Plo@am__quote@ + @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/team.Plo@am__quote@ +@@ -501,6 +525,15 @@ distclean-compile: + @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ + @am__fastdepCC_FALSE@ $(LTCOMPILE) -c -o $@ $< + ++.f90.o: ++ $(FCCOMPILE) -c -o $@ $< ++ ++.f90.obj: ++ $(FCCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'` ++ ++.f90.lo: ++ $(LTFCCOMPILE) -c -o $@ $< ++ + mostlyclean-libtool: + -rm -f *.lo + +--- libgomp/task.c.jj 2014-08-06 16:25:16.575091658 +0200 ++++ libgomp/task.c 2016-07-13 17:47:58.722758497 +0200 +@@ -28,6 +28,7 @@ + #include "libgomp.h" + #include + #include ++#include "gomp-constants.h" + + typedef struct gomp_task_depend_entry *hash_entry_type; + +@@ -63,6 +64,14 @@ void + gomp_init_task (struct gomp_task *task, struct gomp_task *parent_task, + struct gomp_task_icv *prev_icv) + { ++ /* It would seem that using memset here would be a win, but it turns ++ out that partially filling gomp_task allows us to keep the ++ overhead of task creation low. In the nqueens-1.c test, for a ++ sufficiently large N, we drop the overhead from 5-6% to 1%. ++ ++ Note, the nqueens-1.c test in serial mode is a good test to ++ benchmark the overhead of creating tasks as there are millions of ++ tiny tasks created that all run undeferred. */ + task->parent = parent_task; + task->icv = *prev_icv; + task->kind = GOMP_TASK_IMPLICIT; +@@ -71,7 +80,7 @@ gomp_init_task (struct gomp_task *task, + task->final_task = false; + task->copy_ctors_done = false; + task->parent_depends_on = false; +- task->children = NULL; ++ priority_queue_init (&task->children_queue); + task->taskgroup = NULL; + task->dependers = NULL; + task->depend_hash = NULL; +@@ -90,30 +99,194 @@ gomp_end_task (void) + thr->task = task->parent; + } + ++/* Clear the parent field of every task in LIST. */ ++ + static inline void +-gomp_clear_parent (struct gomp_task *children) ++gomp_clear_parent_in_list (struct priority_list *list) + { +- struct gomp_task *task = children; +- +- if (task) ++ struct priority_node *p = list->tasks; ++ if (p) + do + { +- task->parent = NULL; +- task = task->next_child; ++ priority_node_to_task (PQ_CHILDREN, p)->parent = NULL; ++ p = p->next; + } +- while (task != children); ++ while (p != list->tasks); ++} ++ ++/* Splay tree version of gomp_clear_parent_in_list. ++ ++ Clear the parent field of every task in NODE within SP, and free ++ the node when done. */ ++ ++static void ++gomp_clear_parent_in_tree (prio_splay_tree sp, prio_splay_tree_node node) ++{ ++ if (!node) ++ return; ++ prio_splay_tree_node left = node->left, right = node->right; ++ gomp_clear_parent_in_list (&node->key.l); ++#if _LIBGOMP_CHECKING_ ++ memset (node, 0xaf, sizeof (*node)); ++#endif ++ /* No need to remove the node from the tree. We're nuking ++ everything, so just free the nodes and our caller can clear the ++ entire splay tree. */ ++ free (node); ++ gomp_clear_parent_in_tree (sp, left); ++ gomp_clear_parent_in_tree (sp, right); ++} ++ ++/* Clear the parent field of every task in Q and remove every task ++ from Q. */ ++ ++static inline void ++gomp_clear_parent (struct priority_queue *q) ++{ ++ if (priority_queue_multi_p (q)) ++ { ++ gomp_clear_parent_in_tree (&q->t, q->t.root); ++ /* All the nodes have been cleared in gomp_clear_parent_in_tree. ++ No need to remove anything. We can just nuke everything. */ ++ q->t.root = NULL; ++ } ++ else ++ gomp_clear_parent_in_list (&q->l); + } + +-static void gomp_task_maybe_wait_for_dependencies (void **depend); ++/* Helper function for GOMP_task and gomp_create_target_task. ++ ++ For a TASK with in/out dependencies, fill in the various dependency ++ queues. PARENT is the parent of said task. DEPEND is as in ++ GOMP_task. */ ++ ++static void ++gomp_task_handle_depend (struct gomp_task *task, struct gomp_task *parent, ++ void **depend) ++{ ++ size_t ndepend = (uintptr_t) depend[0]; ++ size_t nout = (uintptr_t) depend[1]; ++ size_t i; ++ hash_entry_type ent; ++ ++ task->depend_count = ndepend; ++ task->num_dependees = 0; ++ if (parent->depend_hash == NULL) ++ parent->depend_hash = htab_create (2 * ndepend > 12 ? 2 * ndepend : 12); ++ for (i = 0; i < ndepend; i++) ++ { ++ task->depend[i].addr = depend[2 + i]; ++ task->depend[i].next = NULL; ++ task->depend[i].prev = NULL; ++ task->depend[i].task = task; ++ task->depend[i].is_in = i >= nout; ++ task->depend[i].redundant = false; ++ task->depend[i].redundant_out = false; ++ ++ hash_entry_type *slot = htab_find_slot (&parent->depend_hash, ++ &task->depend[i], INSERT); ++ hash_entry_type out = NULL, last = NULL; ++ if (*slot) ++ { ++ /* If multiple depends on the same task are the same, all but the ++ first one are redundant. As inout/out come first, if any of them ++ is inout/out, it will win, which is the right semantics. */ ++ if ((*slot)->task == task) ++ { ++ task->depend[i].redundant = true; ++ continue; ++ } ++ for (ent = *slot; ent; ent = ent->next) ++ { ++ if (ent->redundant_out) ++ break; ++ ++ last = ent; ++ ++ /* depend(in:...) doesn't depend on earlier depend(in:...). */ ++ if (i >= nout && ent->is_in) ++ continue; ++ ++ if (!ent->is_in) ++ out = ent; ++ ++ struct gomp_task *tsk = ent->task; ++ if (tsk->dependers == NULL) ++ { ++ tsk->dependers ++ = gomp_malloc (sizeof (struct gomp_dependers_vec) ++ + 6 * sizeof (struct gomp_task *)); ++ tsk->dependers->n_elem = 1; ++ tsk->dependers->allocated = 6; ++ tsk->dependers->elem[0] = task; ++ task->num_dependees++; ++ continue; ++ } ++ /* We already have some other dependency on tsk from earlier ++ depend clause. */ ++ else if (tsk->dependers->n_elem ++ && (tsk->dependers->elem[tsk->dependers->n_elem - 1] ++ == task)) ++ continue; ++ else if (tsk->dependers->n_elem == tsk->dependers->allocated) ++ { ++ tsk->dependers->allocated ++ = tsk->dependers->allocated * 2 + 2; ++ tsk->dependers ++ = gomp_realloc (tsk->dependers, ++ sizeof (struct gomp_dependers_vec) ++ + (tsk->dependers->allocated ++ * sizeof (struct gomp_task *))); ++ } ++ tsk->dependers->elem[tsk->dependers->n_elem++] = task; ++ task->num_dependees++; ++ } ++ task->depend[i].next = *slot; ++ (*slot)->prev = &task->depend[i]; ++ } ++ *slot = &task->depend[i]; ++ ++ /* There is no need to store more than one depend({,in}out:) task per ++ address in the hash table chain for the purpose of creation of ++ deferred tasks, because each out depends on all earlier outs, thus it ++ is enough to record just the last depend({,in}out:). For depend(in:), ++ we need to keep all of the previous ones not terminated yet, because ++ a later depend({,in}out:) might need to depend on all of them. So, if ++ the new task's clause is depend({,in}out:), we know there is at most ++ one other depend({,in}out:) clause in the list (out). For ++ non-deferred tasks we want to see all outs, so they are moved to the ++ end of the chain, after first redundant_out entry all following ++ entries should be redundant_out. */ ++ if (!task->depend[i].is_in && out) ++ { ++ if (out != last) ++ { ++ out->next->prev = out->prev; ++ out->prev->next = out->next; ++ out->next = last->next; ++ out->prev = last; ++ last->next = out; ++ if (out->next) ++ out->next->prev = out; ++ } ++ out->redundant_out = true; ++ } ++ } ++} + + /* Called when encountering an explicit task directive. If IF_CLAUSE is + false, then we must not delay in executing the task. If UNTIED is true, +- then the task may be executed by any member of the team. */ ++ then the task may be executed by any member of the team. ++ ++ DEPEND is an array containing: ++ depend[0]: number of depend elements. ++ depend[1]: number of depend elements of type "out". ++ depend[2..N+1]: address of [1..N]th depend element. */ + + void + GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), + long arg_size, long arg_align, bool if_clause, unsigned flags, +- void **depend) ++ void **depend, int priority) + { + struct gomp_thread *thr = gomp_thread (); + struct gomp_team *team = thr->ts.team; +@@ -125,8 +298,7 @@ GOMP_task (void (*fn) (void *), void *da + might be running on different thread than FN. */ + if (cpyfn) + if_clause = false; +- if (flags & 1) +- flags &= ~1; ++ flags &= ~GOMP_TASK_FLAG_UNTIED; + #endif + + /* If parallel or taskgroup has been cancelled, don't start new tasks. */ +@@ -135,6 +307,11 @@ GOMP_task (void (*fn) (void *), void *da + || (thr->task->taskgroup && thr->task->taskgroup->cancelled))) + return; + ++ if ((flags & GOMP_TASK_FLAG_PRIORITY) == 0) ++ priority = 0; ++ else if (priority > gomp_max_task_priority_var) ++ priority = gomp_max_task_priority_var; ++ + if (!if_clause || team == NULL + || (thr->task && thr->task->final_task) + || team->task_count > 64 * team->nthreads) +@@ -147,12 +324,15 @@ GOMP_task (void (*fn) (void *), void *da + depend clauses for non-deferred tasks other than this, because + the parent task is suspended until the child task finishes and thus + it can't start further child tasks. */ +- if ((flags & 8) && thr->task && thr->task->depend_hash) ++ if ((flags & GOMP_TASK_FLAG_DEPEND) ++ && thr->task && thr->task->depend_hash) + gomp_task_maybe_wait_for_dependencies (depend); + + gomp_init_task (&task, thr->task, gomp_icv (false)); +- task.kind = GOMP_TASK_IFFALSE; +- task.final_task = (thr->task && thr->task->final_task) || (flags & 2); ++ task.kind = GOMP_TASK_UNDEFERRED; ++ task.final_task = (thr->task && thr->task->final_task) ++ || (flags & GOMP_TASK_FLAG_FINAL); ++ task.priority = priority; + if (thr->task) + { + task.in_tied_task = thr->task->in_tied_task; +@@ -178,10 +358,10 @@ GOMP_task (void (*fn) (void *), void *da + child thread, but seeing a stale non-NULL value is not a + problem. Once past the task_lock acquisition, this thread + will see the real value of task.children. */ +- if (task.children != NULL) ++ if (!priority_queue_empty_p (&task.children_queue, MEMMODEL_RELAXED)) + { + gomp_mutex_lock (&team->task_lock); +- gomp_clear_parent (task.children); ++ gomp_clear_parent (&task.children_queue); + gomp_mutex_unlock (&team->task_lock); + } + gomp_end_task (); +@@ -195,7 +375,7 @@ GOMP_task (void (*fn) (void *), void *da + bool do_wake; + size_t depend_size = 0; + +- if (flags & 8) ++ if (flags & GOMP_TASK_FLAG_DEPEND) + depend_size = ((uintptr_t) depend[0] + * sizeof (struct gomp_task_depend_entry)); + task = gomp_malloc (sizeof (*task) + depend_size +@@ -203,7 +383,8 @@ GOMP_task (void (*fn) (void *), void *da + arg = (char *) (((uintptr_t) (task + 1) + depend_size + arg_align - 1) + & ~(uintptr_t) (arg_align - 1)); + gomp_init_task (task, parent, gomp_icv (false)); +- task->kind = GOMP_TASK_IFFALSE; ++ task->priority = priority; ++ task->kind = GOMP_TASK_UNDEFERRED; + task->in_tied_task = parent->in_tied_task; + task->taskgroup = taskgroup; + thr->task = task; +@@ -218,7 +399,7 @@ GOMP_task (void (*fn) (void *), void *da + task->kind = GOMP_TASK_WAITING; + task->fn = fn; + task->fn_data = arg; +- task->final_task = (flags & 2) >> 1; ++ task->final_task = (flags & GOMP_TASK_FLAG_FINAL) >> 1; + gomp_mutex_lock (&team->task_lock); + /* If parallel or taskgroup has been cancelled, don't start new + tasks. */ +@@ -235,171 +416,39 @@ GOMP_task (void (*fn) (void *), void *da + taskgroup->num_children++; + if (depend_size) + { +- size_t ndepend = (uintptr_t) depend[0]; +- size_t nout = (uintptr_t) depend[1]; +- size_t i; +- hash_entry_type ent; +- +- task->depend_count = ndepend; +- task->num_dependees = 0; +- if (parent->depend_hash == NULL) +- parent->depend_hash +- = htab_create (2 * ndepend > 12 ? 2 * ndepend : 12); +- for (i = 0; i < ndepend; i++) +- { +- task->depend[i].addr = depend[2 + i]; +- task->depend[i].next = NULL; +- task->depend[i].prev = NULL; +- task->depend[i].task = task; +- task->depend[i].is_in = i >= nout; +- task->depend[i].redundant = false; +- task->depend[i].redundant_out = false; +- +- hash_entry_type *slot +- = htab_find_slot (&parent->depend_hash, &task->depend[i], +- INSERT); +- hash_entry_type out = NULL, last = NULL; +- if (*slot) +- { +- /* If multiple depends on the same task are the +- same, all but the first one are redundant. +- As inout/out come first, if any of them is +- inout/out, it will win, which is the right +- semantics. */ +- if ((*slot)->task == task) +- { +- task->depend[i].redundant = true; +- continue; +- } +- for (ent = *slot; ent; ent = ent->next) +- { +- if (ent->redundant_out) +- break; +- +- last = ent; +- +- /* depend(in:...) doesn't depend on earlier +- depend(in:...). */ +- if (i >= nout && ent->is_in) +- continue; +- +- if (!ent->is_in) +- out = ent; +- +- struct gomp_task *tsk = ent->task; +- if (tsk->dependers == NULL) +- { +- tsk->dependers +- = gomp_malloc (sizeof (struct gomp_dependers_vec) +- + 6 * sizeof (struct gomp_task *)); +- tsk->dependers->n_elem = 1; +- tsk->dependers->allocated = 6; +- tsk->dependers->elem[0] = task; +- task->num_dependees++; +- continue; +- } +- /* We already have some other dependency on tsk +- from earlier depend clause. */ +- else if (tsk->dependers->n_elem +- && (tsk->dependers->elem[tsk->dependers->n_elem +- - 1] +- == task)) +- continue; +- else if (tsk->dependers->n_elem +- == tsk->dependers->allocated) +- { +- tsk->dependers->allocated +- = tsk->dependers->allocated * 2 + 2; +- tsk->dependers +- = gomp_realloc (tsk->dependers, +- sizeof (struct gomp_dependers_vec) +- + (tsk->dependers->allocated +- * sizeof (struct gomp_task *))); +- } +- tsk->dependers->elem[tsk->dependers->n_elem++] = task; +- task->num_dependees++; +- } +- task->depend[i].next = *slot; +- (*slot)->prev = &task->depend[i]; +- } +- *slot = &task->depend[i]; +- +- /* There is no need to store more than one depend({,in}out:) +- task per address in the hash table chain for the purpose +- of creation of deferred tasks, because each out +- depends on all earlier outs, thus it is enough to record +- just the last depend({,in}out:). For depend(in:), we need +- to keep all of the previous ones not terminated yet, because +- a later depend({,in}out:) might need to depend on all of +- them. So, if the new task's clause is depend({,in}out:), +- we know there is at most one other depend({,in}out:) clause +- in the list (out). For non-deferred tasks we want to see +- all outs, so they are moved to the end of the chain, +- after first redundant_out entry all following entries +- should be redundant_out. */ +- if (!task->depend[i].is_in && out) +- { +- if (out != last) +- { +- out->next->prev = out->prev; +- out->prev->next = out->next; +- out->next = last->next; +- out->prev = last; +- last->next = out; +- if (out->next) +- out->next->prev = out; +- } +- out->redundant_out = true; +- } +- } ++ gomp_task_handle_depend (task, parent, depend); + if (task->num_dependees) + { ++ /* Tasks that depend on other tasks are not put into the ++ various waiting queues, so we are done for now. Said ++ tasks are instead put into the queues via ++ gomp_task_run_post_handle_dependers() after their ++ dependencies have been satisfied. After which, they ++ can be picked up by the various scheduling ++ points. */ + gomp_mutex_unlock (&team->task_lock); + return; + } + } +- if (parent->children) +- { +- task->next_child = parent->children; +- task->prev_child = parent->children->prev_child; +- task->next_child->prev_child = task; +- task->prev_child->next_child = task; +- } +- else +- { +- task->next_child = task; +- task->prev_child = task; +- } +- parent->children = task; ++ ++ priority_queue_insert (PQ_CHILDREN, &parent->children_queue, ++ task, priority, ++ PRIORITY_INSERT_BEGIN, ++ /*adjust_parent_depends_on=*/false, ++ task->parent_depends_on); + if (taskgroup) +- { +- if (taskgroup->children) +- { +- task->next_taskgroup = taskgroup->children; +- task->prev_taskgroup = taskgroup->children->prev_taskgroup; +- task->next_taskgroup->prev_taskgroup = task; +- task->prev_taskgroup->next_taskgroup = task; +- } +- else +- { +- task->next_taskgroup = task; +- task->prev_taskgroup = task; +- } +- taskgroup->children = task; +- } +- if (team->task_queue) +- { +- task->next_queue = team->task_queue; +- task->prev_queue = team->task_queue->prev_queue; +- task->next_queue->prev_queue = task; +- task->prev_queue->next_queue = task; +- } +- else +- { +- task->next_queue = task; +- task->prev_queue = task; +- team->task_queue = task; +- } ++ priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue, ++ task, priority, ++ PRIORITY_INSERT_BEGIN, ++ /*adjust_parent_depends_on=*/false, ++ task->parent_depends_on); ++ ++ priority_queue_insert (PQ_TEAM, &team->task_queue, ++ task, priority, ++ PRIORITY_INSERT_END, ++ /*adjust_parent_depends_on=*/false, ++ task->parent_depends_on); ++ + ++team->task_count; + ++team->task_queued_count; + gomp_team_barrier_set_task_pending (&team->barrier); +@@ -411,36 +460,529 @@ GOMP_task (void (*fn) (void *), void *da + } + } + +-static inline bool +-gomp_task_run_pre (struct gomp_task *child_task, struct gomp_task *parent, +- struct gomp_taskgroup *taskgroup, struct gomp_team *team) ++ialias (GOMP_taskgroup_start) ++ialias (GOMP_taskgroup_end) ++ ++#define TYPE long ++#define UTYPE unsigned long ++#define TYPE_is_long 1 ++#include "taskloop.c" ++#undef TYPE ++#undef UTYPE ++#undef TYPE_is_long ++ ++#define TYPE unsigned long long ++#define UTYPE TYPE ++#define GOMP_taskloop GOMP_taskloop_ull ++#include "taskloop.c" ++#undef TYPE ++#undef UTYPE ++#undef GOMP_taskloop ++ ++static void inline ++priority_queue_move_task_first (enum priority_queue_type type, ++ struct priority_queue *head, ++ struct gomp_task *task) + { ++#if _LIBGOMP_CHECKING_ ++ if (!priority_queue_task_in_queue_p (type, head, task)) ++ gomp_fatal ("Attempt to move first missing task %p", task); ++#endif ++ struct priority_list *list; ++ if (priority_queue_multi_p (head)) ++ { ++ list = priority_queue_lookup_priority (head, task->priority); ++#if _LIBGOMP_CHECKING_ ++ if (!list) ++ gomp_fatal ("Unable to find priority %d", task->priority); ++#endif ++ } ++ else ++ list = &head->l; ++ priority_list_remove (list, task_to_priority_node (type, task), 0); ++ priority_list_insert (type, list, task, task->priority, ++ PRIORITY_INSERT_BEGIN, type == PQ_CHILDREN, ++ task->parent_depends_on); ++} ++ ++/* Actual body of GOMP_PLUGIN_target_task_completion that is executed ++ with team->task_lock held, or is executed in the thread that called ++ gomp_target_task_fn if GOMP_PLUGIN_target_task_completion has been ++ run before it acquires team->task_lock. */ ++ ++static void ++gomp_target_task_completion (struct gomp_team *team, struct gomp_task *task) ++{ ++ struct gomp_task *parent = task->parent; + if (parent) ++ priority_queue_move_task_first (PQ_CHILDREN, &parent->children_queue, ++ task); ++ ++ struct gomp_taskgroup *taskgroup = task->taskgroup; ++ if (taskgroup) ++ priority_queue_move_task_first (PQ_TASKGROUP, &taskgroup->taskgroup_queue, ++ task); ++ ++ priority_queue_insert (PQ_TEAM, &team->task_queue, task, task->priority, ++ PRIORITY_INSERT_BEGIN, false, ++ task->parent_depends_on); ++ task->kind = GOMP_TASK_WAITING; ++ if (parent && parent->taskwait) + { +- if (parent->children == child_task) +- parent->children = child_task->next_child; +- if (__builtin_expect (child_task->parent_depends_on, 0) +- && parent->taskwait->last_parent_depends_on == child_task) +- { +- if (child_task->prev_child->kind == GOMP_TASK_WAITING +- && child_task->prev_child->parent_depends_on) +- parent->taskwait->last_parent_depends_on = child_task->prev_child; +- else +- parent->taskwait->last_parent_depends_on = NULL; ++ if (parent->taskwait->in_taskwait) ++ { ++ /* One more task has had its dependencies met. ++ Inform any waiters. */ ++ parent->taskwait->in_taskwait = false; ++ gomp_sem_post (&parent->taskwait->taskwait_sem); + } ++ else if (parent->taskwait->in_depend_wait) ++ { ++ /* One more task has had its dependencies met. ++ Inform any waiters. */ ++ parent->taskwait->in_depend_wait = false; ++ gomp_sem_post (&parent->taskwait->taskwait_sem); ++ } ++ } ++ if (taskgroup && taskgroup->in_taskgroup_wait) ++ { ++ /* One more task has had its dependencies met. ++ Inform any waiters. */ ++ taskgroup->in_taskgroup_wait = false; ++ gomp_sem_post (&taskgroup->taskgroup_sem); + } +- if (taskgroup && taskgroup->children == child_task) +- taskgroup->children = child_task->next_taskgroup; +- child_task->prev_queue->next_queue = child_task->next_queue; +- child_task->next_queue->prev_queue = child_task->prev_queue; +- if (team->task_queue == child_task) ++ ++ ++team->task_queued_count; ++ gomp_team_barrier_set_task_pending (&team->barrier); ++ /* I'm afraid this can't be done after releasing team->task_lock, ++ as gomp_target_task_completion is run from unrelated thread and ++ therefore in between gomp_mutex_unlock and gomp_team_barrier_wake ++ the team could be gone already. */ ++ if (team->nthreads > team->task_running_count) ++ gomp_team_barrier_wake (&team->barrier, 1); ++} ++ ++/* Signal that a target task TTASK has completed the asynchronously ++ running phase and should be requeued as a task to handle the ++ variable unmapping. */ ++ ++void ++GOMP_PLUGIN_target_task_completion (void *data) ++{ ++ struct gomp_target_task *ttask = (struct gomp_target_task *) data; ++ struct gomp_task *task = ttask->task; ++ struct gomp_team *team = ttask->team; ++ ++ gomp_mutex_lock (&team->task_lock); ++ if (ttask->state == GOMP_TARGET_TASK_READY_TO_RUN) + { +- if (child_task->next_queue != child_task) +- team->task_queue = child_task->next_queue; ++ ttask->state = GOMP_TARGET_TASK_FINISHED; ++ gomp_mutex_unlock (&team->task_lock); ++ return; ++ } ++ ttask->state = GOMP_TARGET_TASK_FINISHED; ++ gomp_target_task_completion (team, task); ++ gomp_mutex_unlock (&team->task_lock); ++} ++ ++static void gomp_task_run_post_handle_depend_hash (struct gomp_task *); ++ ++/* Called for nowait target tasks. */ ++ ++bool ++gomp_create_target_task (struct gomp_device_descr *devicep, ++ void (*fn) (void *), size_t mapnum, void **hostaddrs, ++ size_t *sizes, unsigned short *kinds, ++ unsigned int flags, void **depend, void **args, ++ enum gomp_target_task_state state) ++{ ++ struct gomp_thread *thr = gomp_thread (); ++ struct gomp_team *team = thr->ts.team; ++ ++ /* If parallel or taskgroup has been cancelled, don't start new tasks. */ ++ if (team ++ && (gomp_team_barrier_cancelled (&team->barrier) ++ || (thr->task->taskgroup && thr->task->taskgroup->cancelled))) ++ return true; ++ ++ struct gomp_target_task *ttask; ++ struct gomp_task *task; ++ struct gomp_task *parent = thr->task; ++ struct gomp_taskgroup *taskgroup = parent->taskgroup; ++ bool do_wake; ++ size_t depend_size = 0; ++ uintptr_t depend_cnt = 0; ++ size_t tgt_align = 0, tgt_size = 0; ++ ++ if (depend != NULL) ++ { ++ depend_cnt = (uintptr_t) depend[0]; ++ depend_size = depend_cnt * sizeof (struct gomp_task_depend_entry); ++ } ++ if (fn) ++ { ++ /* GOMP_MAP_FIRSTPRIVATE need to be copied first, as they are ++ firstprivate on the target task. */ ++ size_t i; ++ for (i = 0; i < mapnum; i++) ++ if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE) ++ { ++ size_t align = (size_t) 1 << (kinds[i] >> 8); ++ if (tgt_align < align) ++ tgt_align = align; ++ tgt_size = (tgt_size + align - 1) & ~(align - 1); ++ tgt_size += sizes[i]; ++ } ++ if (tgt_align) ++ tgt_size += tgt_align - 1; + else +- team->task_queue = NULL; ++ tgt_size = 0; + } ++ ++ task = gomp_malloc (sizeof (*task) + depend_size ++ + sizeof (*ttask) ++ + mapnum * (sizeof (void *) + sizeof (size_t) ++ + sizeof (unsigned short)) ++ + tgt_size); ++ gomp_init_task (task, parent, gomp_icv (false)); ++ task->priority = 0; ++ task->kind = GOMP_TASK_WAITING; ++ task->in_tied_task = parent->in_tied_task; ++ task->taskgroup = taskgroup; ++ ttask = (struct gomp_target_task *) &task->depend[depend_cnt]; ++ ttask->devicep = devicep; ++ ttask->fn = fn; ++ ttask->mapnum = mapnum; ++ ttask->args = args; ++ memcpy (ttask->hostaddrs, hostaddrs, mapnum * sizeof (void *)); ++ ttask->sizes = (size_t *) &ttask->hostaddrs[mapnum]; ++ memcpy (ttask->sizes, sizes, mapnum * sizeof (size_t)); ++ ttask->kinds = (unsigned short *) &ttask->sizes[mapnum]; ++ memcpy (ttask->kinds, kinds, mapnum * sizeof (unsigned short)); ++ if (tgt_align) ++ { ++ char *tgt = (char *) &ttask->kinds[mapnum]; ++ size_t i; ++ uintptr_t al = (uintptr_t) tgt & (tgt_align - 1); ++ if (al) ++ tgt += tgt_align - al; ++ tgt_size = 0; ++ for (i = 0; i < mapnum; i++) ++ if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE) ++ { ++ size_t align = (size_t) 1 << (kinds[i] >> 8); ++ tgt_size = (tgt_size + align - 1) & ~(align - 1); ++ memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]); ++ ttask->hostaddrs[i] = tgt + tgt_size; ++ tgt_size = tgt_size + sizes[i]; ++ } ++ } ++ ttask->flags = flags; ++ ttask->state = state; ++ ttask->task = task; ++ ttask->team = team; ++ task->fn = NULL; ++ task->fn_data = ttask; ++ task->final_task = 0; ++ gomp_mutex_lock (&team->task_lock); ++ /* If parallel or taskgroup has been cancelled, don't start new tasks. */ ++ if (__builtin_expect (gomp_team_barrier_cancelled (&team->barrier) ++ || (taskgroup && taskgroup->cancelled), 0)) ++ { ++ gomp_mutex_unlock (&team->task_lock); ++ gomp_finish_task (task); ++ free (task); ++ return true; ++ } ++ if (depend_size) ++ { ++ gomp_task_handle_depend (task, parent, depend); ++ if (task->num_dependees) ++ { ++ if (taskgroup) ++ taskgroup->num_children++; ++ gomp_mutex_unlock (&team->task_lock); ++ return true; ++ } ++ } ++ if (state == GOMP_TARGET_TASK_DATA) ++ { ++ gomp_task_run_post_handle_depend_hash (task); ++ gomp_mutex_unlock (&team->task_lock); ++ gomp_finish_task (task); ++ free (task); ++ return false; ++ } ++ if (taskgroup) ++ taskgroup->num_children++; ++ /* For async offloading, if we don't need to wait for dependencies, ++ run the gomp_target_task_fn right away, essentially schedule the ++ mapping part of the task in the current thread. */ ++ if (devicep != NULL ++ && (devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) ++ { ++ priority_queue_insert (PQ_CHILDREN, &parent->children_queue, task, 0, ++ PRIORITY_INSERT_END, ++ /*adjust_parent_depends_on=*/false, ++ task->parent_depends_on); ++ if (taskgroup) ++ priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue, ++ task, 0, PRIORITY_INSERT_END, ++ /*adjust_parent_depends_on=*/false, ++ task->parent_depends_on); ++ task->pnode[PQ_TEAM].next = NULL; ++ task->pnode[PQ_TEAM].prev = NULL; ++ task->kind = GOMP_TASK_TIED; ++ ++team->task_count; ++ gomp_mutex_unlock (&team->task_lock); ++ ++ thr->task = task; ++ gomp_target_task_fn (task->fn_data); ++ thr->task = parent; ++ ++ gomp_mutex_lock (&team->task_lock); ++ task->kind = GOMP_TASK_ASYNC_RUNNING; ++ /* If GOMP_PLUGIN_target_task_completion has run already ++ in between gomp_target_task_fn and the mutex lock, ++ perform the requeuing here. */ ++ if (ttask->state == GOMP_TARGET_TASK_FINISHED) ++ gomp_target_task_completion (team, task); ++ else ++ ttask->state = GOMP_TARGET_TASK_RUNNING; ++ gomp_mutex_unlock (&team->task_lock); ++ return true; ++ } ++ priority_queue_insert (PQ_CHILDREN, &parent->children_queue, task, 0, ++ PRIORITY_INSERT_BEGIN, ++ /*adjust_parent_depends_on=*/false, ++ task->parent_depends_on); ++ if (taskgroup) ++ priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue, task, 0, ++ PRIORITY_INSERT_BEGIN, ++ /*adjust_parent_depends_on=*/false, ++ task->parent_depends_on); ++ priority_queue_insert (PQ_TEAM, &team->task_queue, task, 0, ++ PRIORITY_INSERT_END, ++ /*adjust_parent_depends_on=*/false, ++ task->parent_depends_on); ++ ++team->task_count; ++ ++team->task_queued_count; ++ gomp_team_barrier_set_task_pending (&team->barrier); ++ do_wake = team->task_running_count + !parent->in_tied_task ++ < team->nthreads; ++ gomp_mutex_unlock (&team->task_lock); ++ if (do_wake) ++ gomp_team_barrier_wake (&team->barrier, 1); ++ return true; ++} ++ ++/* Given a parent_depends_on task in LIST, move it to the front of its ++ priority so it is run as soon as possible. ++ ++ Care is taken to update the list's LAST_PARENT_DEPENDS_ON field. ++ ++ We rearrange the queue such that all parent_depends_on tasks are ++ first, and last_parent_depends_on points to the last such task we ++ rearranged. For example, given the following tasks in a queue ++ where PD[123] are the parent_depends_on tasks: ++ ++ task->children ++ | ++ V ++ C1 -> C2 -> C3 -> PD1 -> PD2 -> PD3 -> C4 ++ ++ We rearrange such that: ++ ++ task->children ++ | +--- last_parent_depends_on ++ | | ++ V V ++ PD1 -> PD2 -> PD3 -> C1 -> C2 -> C3 -> C4. */ ++ ++static void inline ++priority_list_upgrade_task (struct priority_list *list, ++ struct priority_node *node) ++{ ++ struct priority_node *last_parent_depends_on ++ = list->last_parent_depends_on; ++ if (last_parent_depends_on) ++ { ++ node->prev->next = node->next; ++ node->next->prev = node->prev; ++ node->prev = last_parent_depends_on; ++ node->next = last_parent_depends_on->next; ++ node->prev->next = node; ++ node->next->prev = node; ++ } ++ else if (node != list->tasks) ++ { ++ node->prev->next = node->next; ++ node->next->prev = node->prev; ++ node->prev = list->tasks->prev; ++ node->next = list->tasks; ++ list->tasks = node; ++ node->prev->next = node; ++ node->next->prev = node; ++ } ++ list->last_parent_depends_on = node; ++} ++ ++/* Given a parent_depends_on TASK in its parent's children_queue, move ++ it to the front of its priority so it is run as soon as possible. ++ ++ PARENT is passed as an optimization. ++ ++ (This function could be defined in priority_queue.c, but we want it ++ inlined, and putting it in priority_queue.h is not an option, given ++ that gomp_task has not been properly defined at that point). */ ++ ++static void inline ++priority_queue_upgrade_task (struct gomp_task *task, ++ struct gomp_task *parent) ++{ ++ struct priority_queue *head = &parent->children_queue; ++ struct priority_node *node = &task->pnode[PQ_CHILDREN]; ++#if _LIBGOMP_CHECKING_ ++ if (!task->parent_depends_on) ++ gomp_fatal ("priority_queue_upgrade_task: task must be a " ++ "parent_depends_on task"); ++ if (!priority_queue_task_in_queue_p (PQ_CHILDREN, head, task)) ++ gomp_fatal ("priority_queue_upgrade_task: cannot find task=%p", task); ++#endif ++ if (priority_queue_multi_p (head)) ++ { ++ struct priority_list *list ++ = priority_queue_lookup_priority (head, task->priority); ++ priority_list_upgrade_task (list, node); ++ } ++ else ++ priority_list_upgrade_task (&head->l, node); ++} ++ ++/* Given a CHILD_TASK in LIST that is about to be executed, move it out of ++ the way in LIST so that other tasks can be considered for ++ execution. LIST contains tasks of type TYPE. ++ ++ Care is taken to update the queue's LAST_PARENT_DEPENDS_ON field ++ if applicable. */ ++ ++static void inline ++priority_list_downgrade_task (enum priority_queue_type type, ++ struct priority_list *list, ++ struct gomp_task *child_task) ++{ ++ struct priority_node *node = task_to_priority_node (type, child_task); ++ if (list->tasks == node) ++ list->tasks = node->next; ++ else if (node->next != list->tasks) ++ { ++ /* The task in NODE is about to become TIED and TIED tasks ++ cannot come before WAITING tasks. If we're about to ++ leave the queue in such an indeterminate state, rewire ++ things appropriately. However, a TIED task at the end is ++ perfectly fine. */ ++ struct gomp_task *next_task = priority_node_to_task (type, node->next); ++ if (next_task->kind == GOMP_TASK_WAITING) ++ { ++ /* Remove from list. */ ++ node->prev->next = node->next; ++ node->next->prev = node->prev; ++ /* Rewire at the end. */ ++ node->next = list->tasks; ++ node->prev = list->tasks->prev; ++ list->tasks->prev->next = node; ++ list->tasks->prev = node; ++ } ++ } ++ ++ /* If the current task is the last_parent_depends_on for its ++ priority, adjust last_parent_depends_on appropriately. */ ++ if (__builtin_expect (child_task->parent_depends_on, 0) ++ && list->last_parent_depends_on == node) ++ { ++ struct gomp_task *prev_child = priority_node_to_task (type, node->prev); ++ if (node->prev != node ++ && prev_child->kind == GOMP_TASK_WAITING ++ && prev_child->parent_depends_on) ++ list->last_parent_depends_on = node->prev; ++ else ++ { ++ /* There are no more parent_depends_on entries waiting ++ to run, clear the list. */ ++ list->last_parent_depends_on = NULL; ++ } ++ } ++} ++ ++/* Given a TASK in HEAD that is about to be executed, move it out of ++ the way so that other tasks can be considered for execution. HEAD ++ contains tasks of type TYPE. ++ ++ Care is taken to update the queue's LAST_PARENT_DEPENDS_ON field ++ if applicable. ++ ++ (This function could be defined in priority_queue.c, but we want it ++ inlined, and putting it in priority_queue.h is not an option, given ++ that gomp_task has not been properly defined at that point). */ ++ ++static void inline ++priority_queue_downgrade_task (enum priority_queue_type type, ++ struct priority_queue *head, ++ struct gomp_task *task) ++{ ++#if _LIBGOMP_CHECKING_ ++ if (!priority_queue_task_in_queue_p (type, head, task)) ++ gomp_fatal ("Attempt to downgrade missing task %p", task); ++#endif ++ if (priority_queue_multi_p (head)) ++ { ++ struct priority_list *list ++ = priority_queue_lookup_priority (head, task->priority); ++ priority_list_downgrade_task (type, list, task); ++ } ++ else ++ priority_list_downgrade_task (type, &head->l, task); ++} ++ ++/* Setup CHILD_TASK to execute. This is done by setting the task to ++ TIED, and updating all relevant queues so that CHILD_TASK is no ++ longer chosen for scheduling. Also, remove CHILD_TASK from the ++ overall team task queue entirely. ++ ++ Return TRUE if task or its containing taskgroup has been ++ cancelled. */ ++ ++static inline bool ++gomp_task_run_pre (struct gomp_task *child_task, struct gomp_task *parent, ++ struct gomp_team *team) ++{ ++#if _LIBGOMP_CHECKING_ ++ if (child_task->parent) ++ priority_queue_verify (PQ_CHILDREN, ++ &child_task->parent->children_queue, true); ++ if (child_task->taskgroup) ++ priority_queue_verify (PQ_TASKGROUP, ++ &child_task->taskgroup->taskgroup_queue, false); ++ priority_queue_verify (PQ_TEAM, &team->task_queue, false); ++#endif ++ ++ /* Task is about to go tied, move it out of the way. */ ++ if (parent) ++ priority_queue_downgrade_task (PQ_CHILDREN, &parent->children_queue, ++ child_task); ++ ++ /* Task is about to go tied, move it out of the way. */ ++ struct gomp_taskgroup *taskgroup = child_task->taskgroup; ++ if (taskgroup) ++ priority_queue_downgrade_task (PQ_TASKGROUP, &taskgroup->taskgroup_queue, ++ child_task); ++ ++ priority_queue_remove (PQ_TEAM, &team->task_queue, child_task, ++ MEMMODEL_RELAXED); ++ child_task->pnode[PQ_TEAM].next = NULL; ++ child_task->pnode[PQ_TEAM].prev = NULL; + child_task->kind = GOMP_TASK_TIED; ++ + if (--team->task_queued_count == 0) + gomp_team_barrier_clear_task_pending (&team->barrier); + if ((gomp_team_barrier_cancelled (&team->barrier) +@@ -478,6 +1020,14 @@ gomp_task_run_post_handle_depend_hash (s + } + } + ++/* After a CHILD_TASK has been run, adjust the dependency queue for ++ each task that depends on CHILD_TASK, to record the fact that there ++ is one less dependency to worry about. If a task that depended on ++ CHILD_TASK now has no dependencies, place it in the various queues ++ so it gets scheduled to run. ++ ++ TEAM is the team to which CHILD_TASK belongs to. */ ++ + static size_t + gomp_task_run_post_handle_dependers (struct gomp_task *child_task, + struct gomp_team *team) +@@ -487,91 +1037,60 @@ gomp_task_run_post_handle_dependers (str + for (i = 0; i < count; i++) + { + struct gomp_task *task = child_task->dependers->elem[i]; ++ ++ /* CHILD_TASK satisfies a dependency for TASK. Keep track of ++ TASK's remaining dependencies. Once TASK has no other ++ depenencies, put it into the various queues so it will get ++ scheduled for execution. */ + if (--task->num_dependees != 0) + continue; + + struct gomp_taskgroup *taskgroup = task->taskgroup; + if (parent) + { +- if (parent->children) +- { +- /* If parent is in gomp_task_maybe_wait_for_dependencies +- and it doesn't need to wait for this task, put it after +- all ready to run tasks it needs to wait for. */ +- if (parent->taskwait && parent->taskwait->last_parent_depends_on +- && !task->parent_depends_on) +- { +- struct gomp_task *last_parent_depends_on +- = parent->taskwait->last_parent_depends_on; +- task->next_child = last_parent_depends_on->next_child; +- task->prev_child = last_parent_depends_on; +- } +- else +- { +- task->next_child = parent->children; +- task->prev_child = parent->children->prev_child; +- parent->children = task; +- } +- task->next_child->prev_child = task; +- task->prev_child->next_child = task; +- } +- else +- { +- task->next_child = task; +- task->prev_child = task; +- parent->children = task; +- } ++ priority_queue_insert (PQ_CHILDREN, &parent->children_queue, ++ task, task->priority, ++ PRIORITY_INSERT_BEGIN, ++ /*adjust_parent_depends_on=*/true, ++ task->parent_depends_on); + if (parent->taskwait) + { + if (parent->taskwait->in_taskwait) + { ++ /* One more task has had its dependencies met. ++ Inform any waiters. */ + parent->taskwait->in_taskwait = false; + gomp_sem_post (&parent->taskwait->taskwait_sem); + } + else if (parent->taskwait->in_depend_wait) + { ++ /* One more task has had its dependencies met. ++ Inform any waiters. */ + parent->taskwait->in_depend_wait = false; + gomp_sem_post (&parent->taskwait->taskwait_sem); + } +- if (parent->taskwait->last_parent_depends_on == NULL +- && task->parent_depends_on) +- parent->taskwait->last_parent_depends_on = task; + } + } + if (taskgroup) + { +- if (taskgroup->children) +- { +- task->next_taskgroup = taskgroup->children; +- task->prev_taskgroup = taskgroup->children->prev_taskgroup; +- task->next_taskgroup->prev_taskgroup = task; +- task->prev_taskgroup->next_taskgroup = task; +- } +- else +- { +- task->next_taskgroup = task; +- task->prev_taskgroup = task; +- } +- taskgroup->children = task; ++ priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue, ++ task, task->priority, ++ PRIORITY_INSERT_BEGIN, ++ /*adjust_parent_depends_on=*/false, ++ task->parent_depends_on); + if (taskgroup->in_taskgroup_wait) + { ++ /* One more task has had its dependencies met. ++ Inform any waiters. */ + taskgroup->in_taskgroup_wait = false; + gomp_sem_post (&taskgroup->taskgroup_sem); + } + } +- if (team->task_queue) +- { +- task->next_queue = team->task_queue; +- task->prev_queue = team->task_queue->prev_queue; +- task->next_queue->prev_queue = task; +- task->prev_queue->next_queue = task; +- } +- else +- { +- task->next_queue = task; +- task->prev_queue = task; +- team->task_queue = task; +- } ++ priority_queue_insert (PQ_TEAM, &team->task_queue, ++ task, task->priority, ++ PRIORITY_INSERT_END, ++ /*adjust_parent_depends_on=*/false, ++ task->parent_depends_on); + ++team->task_count; + ++team->task_queued_count; + ++ret; +@@ -601,12 +1120,18 @@ gomp_task_run_post_handle_depend (struct + return gomp_task_run_post_handle_dependers (child_task, team); + } + ++/* Remove CHILD_TASK from its parent. */ ++ + static inline void + gomp_task_run_post_remove_parent (struct gomp_task *child_task) + { + struct gomp_task *parent = child_task->parent; + if (parent == NULL) + return; ++ ++ /* If this was the last task the parent was depending on, ++ synchronize with gomp_task_maybe_wait_for_dependencies so it can ++ clean up and return. */ + if (__builtin_expect (child_task->parent_depends_on, 0) + && --parent->taskwait->n_depend == 0 + && parent->taskwait->in_depend_wait) +@@ -614,36 +1139,31 @@ gomp_task_run_post_remove_parent (struct + parent->taskwait->in_depend_wait = false; + gomp_sem_post (&parent->taskwait->taskwait_sem); + } +- child_task->prev_child->next_child = child_task->next_child; +- child_task->next_child->prev_child = child_task->prev_child; +- if (parent->children != child_task) +- return; +- if (child_task->next_child != child_task) +- parent->children = child_task->next_child; +- else ++ ++ if (priority_queue_remove (PQ_CHILDREN, &parent->children_queue, ++ child_task, MEMMODEL_RELEASE) ++ && parent->taskwait && parent->taskwait->in_taskwait) + { +- /* We access task->children in GOMP_taskwait +- outside of the task lock mutex region, so +- need a release barrier here to ensure memory +- written by child_task->fn above is flushed +- before the NULL is written. */ +- __atomic_store_n (&parent->children, NULL, MEMMODEL_RELEASE); +- if (parent->taskwait && parent->taskwait->in_taskwait) +- { +- parent->taskwait->in_taskwait = false; +- gomp_sem_post (&parent->taskwait->taskwait_sem); +- } ++ parent->taskwait->in_taskwait = false; ++ gomp_sem_post (&parent->taskwait->taskwait_sem); + } ++ child_task->pnode[PQ_CHILDREN].next = NULL; ++ child_task->pnode[PQ_CHILDREN].prev = NULL; + } + ++/* Remove CHILD_TASK from its taskgroup. */ ++ + static inline void + gomp_task_run_post_remove_taskgroup (struct gomp_task *child_task) + { + struct gomp_taskgroup *taskgroup = child_task->taskgroup; + if (taskgroup == NULL) + return; +- child_task->prev_taskgroup->next_taskgroup = child_task->next_taskgroup; +- child_task->next_taskgroup->prev_taskgroup = child_task->prev_taskgroup; ++ bool empty = priority_queue_remove (PQ_TASKGROUP, ++ &taskgroup->taskgroup_queue, ++ child_task, MEMMODEL_RELAXED); ++ child_task->pnode[PQ_TASKGROUP].next = NULL; ++ child_task->pnode[PQ_TASKGROUP].prev = NULL; + if (taskgroup->num_children > 1) + --taskgroup->num_children; + else +@@ -655,18 +1175,10 @@ gomp_task_run_post_remove_taskgroup (str + before the NULL is written. */ + __atomic_store_n (&taskgroup->num_children, 0, MEMMODEL_RELEASE); + } +- if (taskgroup->children != child_task) +- return; +- if (child_task->next_taskgroup != child_task) +- taskgroup->children = child_task->next_taskgroup; +- else ++ if (empty && taskgroup->in_taskgroup_wait) + { +- taskgroup->children = NULL; +- if (taskgroup->in_taskgroup_wait) +- { +- taskgroup->in_taskgroup_wait = false; +- gomp_sem_post (&taskgroup->taskgroup_sem); +- } ++ taskgroup->in_taskgroup_wait = false; ++ gomp_sem_post (&taskgroup->taskgroup_sem); + } + } + +@@ -696,11 +1208,15 @@ gomp_barrier_handle_tasks (gomp_barrier_ + while (1) + { + bool cancelled = false; +- if (team->task_queue != NULL) ++ if (!priority_queue_empty_p (&team->task_queue, MEMMODEL_RELAXED)) + { +- child_task = team->task_queue; ++ bool ignored; ++ child_task ++ = priority_queue_next_task (PQ_TEAM, &team->task_queue, ++ PQ_IGNORED, NULL, ++ &ignored); + cancelled = gomp_task_run_pre (child_task, child_task->parent, +- child_task->taskgroup, team); ++ team); + if (__builtin_expect (cancelled, 0)) + { + if (to_free) +@@ -729,7 +1245,29 @@ gomp_barrier_handle_tasks (gomp_barrier_ + if (child_task) + { + thr->task = child_task; +- child_task->fn (child_task->fn_data); ++ if (__builtin_expect (child_task->fn == NULL, 0)) ++ { ++ if (gomp_target_task_fn (child_task->fn_data)) ++ { ++ thr->task = task; ++ gomp_mutex_lock (&team->task_lock); ++ child_task->kind = GOMP_TASK_ASYNC_RUNNING; ++ team->task_running_count--; ++ struct gomp_target_task *ttask ++ = (struct gomp_target_task *) child_task->fn_data; ++ /* If GOMP_PLUGIN_target_task_completion has run already ++ in between gomp_target_task_fn and the mutex lock, ++ perform the requeuing here. */ ++ if (ttask->state == GOMP_TARGET_TASK_FINISHED) ++ gomp_target_task_completion (team, child_task); ++ else ++ ttask->state = GOMP_TARGET_TASK_RUNNING; ++ child_task = NULL; ++ continue; ++ } ++ } ++ else ++ child_task->fn (child_task->fn_data); + thr->task = task; + } + else +@@ -741,7 +1279,7 @@ gomp_barrier_handle_tasks (gomp_barrier_ + size_t new_tasks + = gomp_task_run_post_handle_depend (child_task, team); + gomp_task_run_post_remove_parent (child_task); +- gomp_clear_parent (child_task->children); ++ gomp_clear_parent (&child_task->children_queue); + gomp_task_run_post_remove_taskgroup (child_task); + to_free = child_task; + child_task = NULL; +@@ -765,7 +1303,9 @@ gomp_barrier_handle_tasks (gomp_barrier_ + } + } + +-/* Called when encountering a taskwait directive. */ ++/* Called when encountering a taskwait directive. ++ ++ Wait for all children of the current task. */ + + void + GOMP_taskwait (void) +@@ -785,15 +1325,16 @@ GOMP_taskwait (void) + child thread task work function are seen before we exit from + GOMP_taskwait. */ + if (task == NULL +- || __atomic_load_n (&task->children, MEMMODEL_ACQUIRE) == NULL) ++ || priority_queue_empty_p (&task->children_queue, MEMMODEL_ACQUIRE)) + return; + + memset (&taskwait, 0, sizeof (taskwait)); ++ bool child_q = false; + gomp_mutex_lock (&team->task_lock); + while (1) + { + bool cancelled = false; +- if (task->children == NULL) ++ if (priority_queue_empty_p (&task->children_queue, MEMMODEL_RELAXED)) + { + bool destroy_taskwait = task->taskwait != NULL; + task->taskwait = NULL; +@@ -807,12 +1348,14 @@ GOMP_taskwait (void) + gomp_sem_destroy (&taskwait.taskwait_sem); + return; + } +- if (task->children->kind == GOMP_TASK_WAITING) ++ struct gomp_task *next_task ++ = priority_queue_next_task (PQ_CHILDREN, &task->children_queue, ++ PQ_TEAM, &team->task_queue, &child_q); ++ if (next_task->kind == GOMP_TASK_WAITING) + { +- child_task = task->children; ++ child_task = next_task; + cancelled +- = gomp_task_run_pre (child_task, task, child_task->taskgroup, +- team); ++ = gomp_task_run_pre (child_task, task, team); + if (__builtin_expect (cancelled, 0)) + { + if (to_free) +@@ -826,8 +1369,10 @@ GOMP_taskwait (void) + } + else + { +- /* All tasks we are waiting for are already running +- in other threads. Wait for them. */ ++ /* All tasks we are waiting for are either running in other ++ threads, or they are tasks that have not had their ++ dependencies met (so they're not even in the queue). Wait ++ for them. */ + if (task->taskwait == NULL) + { + taskwait.in_depend_wait = false; +@@ -851,7 +1396,28 @@ GOMP_taskwait (void) + if (child_task) + { + thr->task = child_task; +- child_task->fn (child_task->fn_data); ++ if (__builtin_expect (child_task->fn == NULL, 0)) ++ { ++ if (gomp_target_task_fn (child_task->fn_data)) ++ { ++ thr->task = task; ++ gomp_mutex_lock (&team->task_lock); ++ child_task->kind = GOMP_TASK_ASYNC_RUNNING; ++ struct gomp_target_task *ttask ++ = (struct gomp_target_task *) child_task->fn_data; ++ /* If GOMP_PLUGIN_target_task_completion has run already ++ in between gomp_target_task_fn and the mutex lock, ++ perform the requeuing here. */ ++ if (ttask->state == GOMP_TARGET_TASK_FINISHED) ++ gomp_target_task_completion (team, child_task); ++ else ++ ttask->state = GOMP_TARGET_TASK_RUNNING; ++ child_task = NULL; ++ continue; ++ } ++ } ++ else ++ child_task->fn (child_task->fn_data); + thr->task = task; + } + else +@@ -862,17 +1428,19 @@ GOMP_taskwait (void) + finish_cancelled:; + size_t new_tasks + = gomp_task_run_post_handle_depend (child_task, team); +- child_task->prev_child->next_child = child_task->next_child; +- child_task->next_child->prev_child = child_task->prev_child; +- if (task->children == child_task) +- { +- if (child_task->next_child != child_task) +- task->children = child_task->next_child; +- else +- task->children = NULL; ++ ++ if (child_q) ++ { ++ priority_queue_remove (PQ_CHILDREN, &task->children_queue, ++ child_task, MEMMODEL_RELAXED); ++ child_task->pnode[PQ_CHILDREN].next = NULL; ++ child_task->pnode[PQ_CHILDREN].prev = NULL; + } +- gomp_clear_parent (child_task->children); ++ ++ gomp_clear_parent (&child_task->children_queue); ++ + gomp_task_run_post_remove_taskgroup (child_task); ++ + to_free = child_task; + child_task = NULL; + team->task_count--; +@@ -887,10 +1455,20 @@ GOMP_taskwait (void) + } + } + +-/* This is like GOMP_taskwait, but we only wait for tasks that the +- upcoming task depends on. */ ++/* An undeferred task is about to run. Wait for all tasks that this ++ undeferred task depends on. + +-static void ++ This is done by first putting all known ready dependencies ++ (dependencies that have their own dependencies met) at the top of ++ the scheduling queues. Then we iterate through these imminently ++ ready tasks (and possibly other high priority tasks), and run them. ++ If we run out of ready dependencies to execute, we either wait for ++ the reamining dependencies to finish, or wait for them to get ++ scheduled so we can run them. ++ ++ DEPEND is as in GOMP_task. */ ++ ++void + gomp_task_maybe_wait_for_dependencies (void **depend) + { + struct gomp_thread *thr = gomp_thread (); +@@ -898,7 +1476,6 @@ gomp_task_maybe_wait_for_dependencies (v + struct gomp_team *team = thr->ts.team; + struct gomp_task_depend_entry elem, *ent = NULL; + struct gomp_taskwait taskwait; +- struct gomp_task *last_parent_depends_on = NULL; + size_t ndepend = (uintptr_t) depend[0]; + size_t nout = (uintptr_t) depend[1]; + size_t i; +@@ -922,32 +1499,11 @@ gomp_task_maybe_wait_for_dependencies (v + { + tsk->parent_depends_on = true; + ++num_awaited; ++ /* If depenency TSK itself has no dependencies and is ++ ready to run, move it up front so that we run it as ++ soon as possible. */ + if (tsk->num_dependees == 0 && tsk->kind == GOMP_TASK_WAITING) +- { +- /* If a task we need to wait for is not already +- running and is ready to be scheduled, move it +- to front, so that we run it as soon as possible. */ +- if (last_parent_depends_on) +- { +- tsk->prev_child->next_child = tsk->next_child; +- tsk->next_child->prev_child = tsk->prev_child; +- tsk->prev_child = last_parent_depends_on; +- tsk->next_child = last_parent_depends_on->next_child; +- tsk->prev_child->next_child = tsk; +- tsk->next_child->prev_child = tsk; +- } +- else if (tsk != task->children) +- { +- tsk->prev_child->next_child = tsk->next_child; +- tsk->next_child->prev_child = tsk->prev_child; +- tsk->prev_child = task->children; +- tsk->next_child = task->children->next_child; +- task->children = tsk; +- tsk->prev_child->next_child = tsk; +- tsk->next_child->prev_child = tsk; +- } +- last_parent_depends_on = tsk; +- } ++ priority_queue_upgrade_task (tsk, task); + } + } + } +@@ -959,7 +1515,6 @@ gomp_task_maybe_wait_for_dependencies (v + + memset (&taskwait, 0, sizeof (taskwait)); + taskwait.n_depend = num_awaited; +- taskwait.last_parent_depends_on = last_parent_depends_on; + gomp_sem_init (&taskwait.taskwait_sem, 0); + task->taskwait = &taskwait; + +@@ -978,12 +1533,30 @@ gomp_task_maybe_wait_for_dependencies (v + gomp_sem_destroy (&taskwait.taskwait_sem); + return; + } +- if (task->children->kind == GOMP_TASK_WAITING) ++ ++ /* Theoretically when we have multiple priorities, we should ++ chose between the highest priority item in ++ task->children_queue and team->task_queue here, so we should ++ use priority_queue_next_task(). However, since we are ++ running an undeferred task, perhaps that makes all tasks it ++ depends on undeferred, thus a priority of INF? This would ++ make it unnecessary to take anything into account here, ++ but the dependencies. ++ ++ On the other hand, if we want to use priority_queue_next_task(), ++ care should be taken to only use priority_queue_remove() ++ below if the task was actually removed from the children ++ queue. */ ++ bool ignored; ++ struct gomp_task *next_task ++ = priority_queue_next_task (PQ_CHILDREN, &task->children_queue, ++ PQ_IGNORED, NULL, &ignored); ++ ++ if (next_task->kind == GOMP_TASK_WAITING) + { +- child_task = task->children; ++ child_task = next_task; + cancelled +- = gomp_task_run_pre (child_task, task, child_task->taskgroup, +- team); ++ = gomp_task_run_pre (child_task, task, team); + if (__builtin_expect (cancelled, 0)) + { + if (to_free) +@@ -996,8 +1569,10 @@ gomp_task_maybe_wait_for_dependencies (v + } + } + else +- /* All tasks we are waiting for are already running +- in other threads. Wait for them. */ ++ /* All tasks we are waiting for are either running in other ++ threads, or they are tasks that have not had their ++ dependencies met (so they're not even in the queue). Wait ++ for them. */ + taskwait.in_depend_wait = true; + gomp_mutex_unlock (&team->task_lock); + if (do_wake) +@@ -1014,7 +1589,28 @@ gomp_task_maybe_wait_for_dependencies (v + if (child_task) + { + thr->task = child_task; +- child_task->fn (child_task->fn_data); ++ if (__builtin_expect (child_task->fn == NULL, 0)) ++ { ++ if (gomp_target_task_fn (child_task->fn_data)) ++ { ++ thr->task = task; ++ gomp_mutex_lock (&team->task_lock); ++ child_task->kind = GOMP_TASK_ASYNC_RUNNING; ++ struct gomp_target_task *ttask ++ = (struct gomp_target_task *) child_task->fn_data; ++ /* If GOMP_PLUGIN_target_task_completion has run already ++ in between gomp_target_task_fn and the mutex lock, ++ perform the requeuing here. */ ++ if (ttask->state == GOMP_TARGET_TASK_FINISHED) ++ gomp_target_task_completion (team, child_task); ++ else ++ ttask->state = GOMP_TARGET_TASK_RUNNING; ++ child_task = NULL; ++ continue; ++ } ++ } ++ else ++ child_task->fn (child_task->fn_data); + thr->task = task; + } + else +@@ -1027,16 +1623,13 @@ gomp_task_maybe_wait_for_dependencies (v + = gomp_task_run_post_handle_depend (child_task, team); + if (child_task->parent_depends_on) + --taskwait.n_depend; +- child_task->prev_child->next_child = child_task->next_child; +- child_task->next_child->prev_child = child_task->prev_child; +- if (task->children == child_task) +- { +- if (child_task->next_child != child_task) +- task->children = child_task->next_child; +- else +- task->children = NULL; +- } +- gomp_clear_parent (child_task->children); ++ ++ priority_queue_remove (PQ_CHILDREN, &task->children_queue, ++ child_task, MEMMODEL_RELAXED); ++ child_task->pnode[PQ_CHILDREN].next = NULL; ++ child_task->pnode[PQ_CHILDREN].prev = NULL; ++ ++ gomp_clear_parent (&child_task->children_queue); + gomp_task_run_post_remove_taskgroup (child_task); + to_free = child_task; + child_task = NULL; +@@ -1069,14 +1662,14 @@ GOMP_taskgroup_start (void) + struct gomp_taskgroup *taskgroup; + + /* If team is NULL, all tasks are executed as +- GOMP_TASK_IFFALSE tasks and thus all children tasks of ++ GOMP_TASK_UNDEFERRED tasks and thus all children tasks of + taskgroup and their descendant tasks will be finished + by the time GOMP_taskgroup_end is called. */ + if (team == NULL) + return; + taskgroup = gomp_malloc (sizeof (struct gomp_taskgroup)); + taskgroup->prev = task->taskgroup; +- taskgroup->children = NULL; ++ priority_queue_init (&taskgroup->taskgroup_queue); + taskgroup->in_taskgroup_wait = false; + taskgroup->cancelled = false; + taskgroup->num_children = 0; +@@ -1098,6 +1691,17 @@ GOMP_taskgroup_end (void) + if (team == NULL) + return; + taskgroup = task->taskgroup; ++ if (__builtin_expect (taskgroup == NULL, 0) ++ && thr->ts.level == 0) ++ { ++ /* This can happen if GOMP_taskgroup_start is called when ++ thr->ts.team == NULL, but inside of the taskgroup there ++ is #pragma omp target nowait that creates an implicit ++ team with a single thread. In this case, we want to wait ++ for all outstanding tasks in this team. */ ++ gomp_team_barrier_wait (&team->barrier); ++ return; ++ } + + /* The acquire barrier on load of taskgroup->num_children here + synchronizes with the write of 0 in gomp_task_run_post_remove_taskgroup. +@@ -1108,19 +1712,25 @@ GOMP_taskgroup_end (void) + if (__atomic_load_n (&taskgroup->num_children, MEMMODEL_ACQUIRE) == 0) + goto finish; + ++ bool unused; + gomp_mutex_lock (&team->task_lock); + while (1) + { + bool cancelled = false; +- if (taskgroup->children == NULL) ++ if (priority_queue_empty_p (&taskgroup->taskgroup_queue, ++ MEMMODEL_RELAXED)) + { + if (taskgroup->num_children) + { +- if (task->children == NULL) ++ if (priority_queue_empty_p (&task->children_queue, ++ MEMMODEL_RELAXED)) + goto do_wait; +- child_task = task->children; +- } +- else ++ child_task ++ = priority_queue_next_task (PQ_CHILDREN, &task->children_queue, ++ PQ_TEAM, &team->task_queue, ++ &unused); ++ } ++ else + { + gomp_mutex_unlock (&team->task_lock); + if (to_free) +@@ -1132,12 +1742,13 @@ GOMP_taskgroup_end (void) + } + } + else +- child_task = taskgroup->children; ++ child_task ++ = priority_queue_next_task (PQ_TASKGROUP, &taskgroup->taskgroup_queue, ++ PQ_TEAM, &team->task_queue, &unused); + if (child_task->kind == GOMP_TASK_WAITING) + { + cancelled +- = gomp_task_run_pre (child_task, child_task->parent, taskgroup, +- team); ++ = gomp_task_run_pre (child_task, child_task->parent, team); + if (__builtin_expect (cancelled, 0)) + { + if (to_free) +@@ -1153,8 +1764,10 @@ GOMP_taskgroup_end (void) + { + child_task = NULL; + do_wait: +- /* All tasks we are waiting for are already running +- in other threads. Wait for them. */ ++ /* All tasks we are waiting for are either running in other ++ threads, or they are tasks that have not had their ++ dependencies met (so they're not even in the queue). Wait ++ for them. */ + taskgroup->in_taskgroup_wait = true; + } + gomp_mutex_unlock (&team->task_lock); +@@ -1172,7 +1785,28 @@ GOMP_taskgroup_end (void) + if (child_task) + { + thr->task = child_task; +- child_task->fn (child_task->fn_data); ++ if (__builtin_expect (child_task->fn == NULL, 0)) ++ { ++ if (gomp_target_task_fn (child_task->fn_data)) ++ { ++ thr->task = task; ++ gomp_mutex_lock (&team->task_lock); ++ child_task->kind = GOMP_TASK_ASYNC_RUNNING; ++ struct gomp_target_task *ttask ++ = (struct gomp_target_task *) child_task->fn_data; ++ /* If GOMP_PLUGIN_target_task_completion has run already ++ in between gomp_target_task_fn and the mutex lock, ++ perform the requeuing here. */ ++ if (ttask->state == GOMP_TARGET_TASK_FINISHED) ++ gomp_target_task_completion (team, child_task); ++ else ++ ttask->state = GOMP_TARGET_TASK_RUNNING; ++ child_task = NULL; ++ continue; ++ } ++ } ++ else ++ child_task->fn (child_task->fn_data); + thr->task = task; + } + else +@@ -1184,7 +1818,7 @@ GOMP_taskgroup_end (void) + size_t new_tasks + = gomp_task_run_post_handle_depend (child_task, team); + gomp_task_run_post_remove_parent (child_task); +- gomp_clear_parent (child_task->children); ++ gomp_clear_parent (&child_task->children_queue); + gomp_task_run_post_remove_taskgroup (child_task); + to_free = child_task; + child_task = NULL; +--- libgomp/libgomp_g.h.jj 2014-05-15 10:56:31.429532978 +0200 ++++ libgomp/libgomp_g.h 2016-07-13 16:57:04.422535521 +0200 +@@ -29,6 +29,7 @@ + #define LIBGOMP_G_H 1 + + #include ++#include + + /* barrier.c */ + +@@ -50,6 +51,10 @@ extern bool GOMP_loop_static_start (long + extern bool GOMP_loop_dynamic_start (long, long, long, long, long *, long *); + extern bool GOMP_loop_guided_start (long, long, long, long, long *, long *); + extern bool GOMP_loop_runtime_start (long, long, long, long *, long *); ++extern bool GOMP_loop_nonmonotonic_dynamic_start (long, long, long, long, ++ long *, long *); ++extern bool GOMP_loop_nonmonotonic_guided_start (long, long, long, long, ++ long *, long *); + + extern bool GOMP_loop_ordered_static_start (long, long, long, long, + long *, long *); +@@ -63,12 +68,23 @@ extern bool GOMP_loop_static_next (long + extern bool GOMP_loop_dynamic_next (long *, long *); + extern bool GOMP_loop_guided_next (long *, long *); + extern bool GOMP_loop_runtime_next (long *, long *); ++extern bool GOMP_loop_nonmonotonic_dynamic_next (long *, long *); ++extern bool GOMP_loop_nonmonotonic_guided_next (long *, long *); + + extern bool GOMP_loop_ordered_static_next (long *, long *); + extern bool GOMP_loop_ordered_dynamic_next (long *, long *); + extern bool GOMP_loop_ordered_guided_next (long *, long *); + extern bool GOMP_loop_ordered_runtime_next (long *, long *); + ++extern bool GOMP_loop_doacross_static_start (unsigned, long *, long, long *, ++ long *); ++extern bool GOMP_loop_doacross_dynamic_start (unsigned, long *, long, long *, ++ long *); ++extern bool GOMP_loop_doacross_guided_start (unsigned, long *, long, long *, ++ long *); ++extern bool GOMP_loop_doacross_runtime_start (unsigned, long *, long *, ++ long *); ++ + extern void GOMP_parallel_loop_static_start (void (*)(void *), void *, + unsigned, long, long, long, long); + extern void GOMP_parallel_loop_dynamic_start (void (*)(void *), void *, +@@ -89,6 +105,12 @@ extern void GOMP_parallel_loop_guided (v + extern void GOMP_parallel_loop_runtime (void (*)(void *), void *, + unsigned, long, long, long, + unsigned); ++extern void GOMP_parallel_loop_nonmonotonic_dynamic (void (*)(void *), void *, ++ unsigned, long, long, ++ long, long, unsigned); ++extern void GOMP_parallel_loop_nonmonotonic_guided (void (*)(void *), void *, ++ unsigned, long, long, ++ long, long, unsigned); + + extern void GOMP_loop_end (void); + extern void GOMP_loop_end_nowait (void); +@@ -119,6 +141,18 @@ extern bool GOMP_loop_ull_runtime_start + unsigned long long, + unsigned long long *, + unsigned long long *); ++extern bool GOMP_loop_ull_nonmonotonic_dynamic_start (bool, unsigned long long, ++ unsigned long long, ++ unsigned long long, ++ unsigned long long, ++ unsigned long long *, ++ unsigned long long *); ++extern bool GOMP_loop_ull_nonmonotonic_guided_start (bool, unsigned long long, ++ unsigned long long, ++ unsigned long long, ++ unsigned long long, ++ unsigned long long *, ++ unsigned long long *); + + extern bool GOMP_loop_ull_ordered_static_start (bool, unsigned long long, + unsigned long long, +@@ -152,6 +186,10 @@ extern bool GOMP_loop_ull_guided_next (u + unsigned long long *); + extern bool GOMP_loop_ull_runtime_next (unsigned long long *, + unsigned long long *); ++extern bool GOMP_loop_ull_nonmonotonic_dynamic_next (unsigned long long *, ++ unsigned long long *); ++extern bool GOMP_loop_ull_nonmonotonic_guided_next (unsigned long long *, ++ unsigned long long *); + + extern bool GOMP_loop_ull_ordered_static_next (unsigned long long *, + unsigned long long *); +@@ -162,10 +200,34 @@ extern bool GOMP_loop_ull_ordered_guided + extern bool GOMP_loop_ull_ordered_runtime_next (unsigned long long *, + unsigned long long *); + ++extern bool GOMP_loop_ull_doacross_static_start (unsigned, ++ unsigned long long *, ++ unsigned long long, ++ unsigned long long *, ++ unsigned long long *); ++extern bool GOMP_loop_ull_doacross_dynamic_start (unsigned, ++ unsigned long long *, ++ unsigned long long, ++ unsigned long long *, ++ unsigned long long *); ++extern bool GOMP_loop_ull_doacross_guided_start (unsigned, ++ unsigned long long *, ++ unsigned long long, ++ unsigned long long *, ++ unsigned long long *); ++extern bool GOMP_loop_ull_doacross_runtime_start (unsigned, ++ unsigned long long *, ++ unsigned long long *, ++ unsigned long long *); ++ + /* ordered.c */ + + extern void GOMP_ordered_start (void); + extern void GOMP_ordered_end (void); ++extern void GOMP_doacross_post (long *); ++extern void GOMP_doacross_wait (long, ...); ++extern void GOMP_doacross_ull_post (unsigned long long *); ++extern void GOMP_doacross_ull_wait (unsigned long long, ...); + + /* parallel.c */ + +@@ -178,7 +240,15 @@ extern bool GOMP_cancellation_point (int + /* task.c */ + + extern void GOMP_task (void (*) (void *), void *, void (*) (void *, void *), +- long, long, bool, unsigned, void **); ++ long, long, bool, unsigned, void **, int); ++extern void GOMP_taskloop (void (*) (void *), void *, ++ void (*) (void *, void *), long, long, unsigned, ++ unsigned long, int, long, long, long); ++extern void GOMP_taskloop_ull (void (*) (void *), void *, ++ void (*) (void *, void *), long, long, ++ unsigned, unsigned long, int, ++ unsigned long long, unsigned long long, ++ unsigned long long); + extern void GOMP_taskwait (void); + extern void GOMP_taskyield (void); + extern void GOMP_taskgroup_start (void); +@@ -206,11 +276,38 @@ extern void GOMP_single_copy_end (void * + + extern void GOMP_target (int, void (*) (void *), const void *, + size_t, void **, size_t *, unsigned char *); ++extern void GOMP_target_ext (int, void (*) (void *), size_t, void **, size_t *, ++ unsigned short *, unsigned int, void **, void **); + extern void GOMP_target_data (int, const void *, + size_t, void **, size_t *, unsigned char *); ++extern void GOMP_target_data_ext (int, size_t, void **, size_t *, ++ unsigned short *); + extern void GOMP_target_end_data (void); + extern void GOMP_target_update (int, const void *, + size_t, void **, size_t *, unsigned char *); ++extern void GOMP_target_update_ext (int, size_t, void **, size_t *, ++ unsigned short *, unsigned int, void **); ++extern void GOMP_target_enter_exit_data (int, size_t, void **, size_t *, ++ unsigned short *, unsigned int, ++ void **); + extern void GOMP_teams (unsigned int, unsigned int); + ++/* oacc-parallel.c */ ++ ++extern void GOACC_parallel_keyed (int, void (*) (void *), size_t, ++ void **, size_t *, unsigned short *, ...); ++extern void GOACC_parallel (int, void (*) (void *), size_t, void **, size_t *, ++ unsigned short *, int, int, int, int, int, ...); ++extern void GOACC_data_start (int, size_t, void **, size_t *, ++ unsigned short *); ++extern void GOACC_data_end (void); ++extern void GOACC_enter_exit_data (int, size_t, void **, ++ size_t *, unsigned short *, int, int, ...); ++extern void GOACC_update (int, size_t, void **, size_t *, ++ unsigned short *, int, int, ...); ++extern void GOACC_wait (int, int, ...); ++extern int GOACC_get_num_threads (void); ++extern int GOACC_get_thread_num (void); ++extern void GOACC_declare (int, size_t, void **, size_t *, unsigned short *); ++ + #endif /* LIBGOMP_G_H */ +--- libgomp/libgomp.h.jj 2014-08-01 15:59:49.145188127 +0200 ++++ libgomp/libgomp.h 2016-07-14 17:40:24.038243456 +0200 +@@ -34,12 +34,35 @@ + #ifndef LIBGOMP_H + #define LIBGOMP_H 1 + ++#ifndef _LIBGOMP_CHECKING_ ++/* Define to 1 to perform internal sanity checks. */ ++#define _LIBGOMP_CHECKING_ 0 ++#endif ++ + #include "config.h" + #include "gstdint.h" ++#include "libgomp-plugin.h" + + #include + #include + #include ++#include ++ ++/* Needed for memset in priority_queue.c. */ ++#if _LIBGOMP_CHECKING_ ++# ifdef STRING_WITH_STRINGS ++# include ++# include ++# else ++# ifdef HAVE_STRING_H ++# include ++# else ++# ifdef HAVE_STRINGS_H ++# include ++# endif ++# endif ++# endif ++#endif + + #ifdef HAVE_ATTRIBUTE_VISIBILITY + # pragma GCC visibility push(hidden) +@@ -56,6 +79,44 @@ enum memmodel + MEMMODEL_SEQ_CST = 5 + }; + ++/* alloc.c */ ++ ++extern void *gomp_malloc (size_t) __attribute__((malloc)); ++extern void *gomp_malloc_cleared (size_t) __attribute__((malloc)); ++extern void *gomp_realloc (void *, size_t); ++ ++/* Avoid conflicting prototypes of alloca() in system headers by using ++ GCC's builtin alloca(). */ ++#define gomp_alloca(x) __builtin_alloca(x) ++ ++/* error.c */ ++ ++extern void gomp_vdebug (int, const char *, va_list); ++extern void gomp_debug (int, const char *, ...) ++ __attribute__ ((format (printf, 2, 3))); ++#define gomp_vdebug(KIND, FMT, VALIST) \ ++ do { \ ++ if (__builtin_expect (gomp_debug_var, 0)) \ ++ (gomp_vdebug) ((KIND), (FMT), (VALIST)); \ ++ } while (0) ++#define gomp_debug(KIND, ...) \ ++ do { \ ++ if (__builtin_expect (gomp_debug_var, 0)) \ ++ (gomp_debug) ((KIND), __VA_ARGS__); \ ++ } while (0) ++extern void gomp_verror (const char *, va_list); ++extern void gomp_error (const char *, ...) ++ __attribute__ ((format (printf, 1, 2))); ++extern void gomp_vfatal (const char *, va_list) ++ __attribute__ ((noreturn)); ++extern void gomp_fatal (const char *, ...) ++ __attribute__ ((noreturn, format (printf, 1, 2))); ++ ++struct gomp_task; ++struct gomp_taskgroup; ++struct htab; ++ ++#include "priority_queue.h" + #include "sem.h" + #include "mutex.h" + #include "bar.h" +@@ -74,6 +135,44 @@ enum gomp_schedule_type + GFS_AUTO + }; + ++struct gomp_doacross_work_share ++{ ++ union { ++ /* chunk_size copy, as ws->chunk_size is multiplied by incr for ++ GFS_DYNAMIC. */ ++ long chunk_size; ++ /* Likewise, but for ull implementation. */ ++ unsigned long long chunk_size_ull; ++ /* For schedule(static,0) this is the number ++ of iterations assigned to the last thread, i.e. number of ++ iterations / number of threads. */ ++ long q; ++ /* Likewise, but for ull implementation. */ ++ unsigned long long q_ull; ++ }; ++ /* Size of each array entry (padded to cache line size). */ ++ unsigned long elt_sz; ++ /* Number of dimensions in sink vectors. */ ++ unsigned int ncounts; ++ /* True if the iterations can be flattened. */ ++ bool flattened; ++ /* Actual array (of elt_sz sized units), aligned to cache line size. ++ This is indexed by team_id for GFS_STATIC and outermost iteration ++ / chunk_size for other schedules. */ ++ unsigned char *array; ++ /* These two are only used for schedule(static,0). */ ++ /* This one is number of iterations % number of threads. */ ++ long t; ++ union { ++ /* And this one is cached t * (q + 1). */ ++ long boundary; ++ /* Likewise, but for the ull implementation. */ ++ unsigned long long boundary_ull; ++ }; ++ /* Array of shift counts for each dimension if they can be flattened. */ ++ unsigned int shift_counts[]; ++}; ++ + struct gomp_work_share + { + /* This member records the SCHEDULE clause to be used for this construct. +@@ -105,13 +204,18 @@ struct gomp_work_share + }; + }; + +- /* This is a circular queue that details which threads will be allowed +- into the ordered region and in which order. When a thread allocates +- iterations on which it is going to work, it also registers itself at +- the end of the array. When a thread reaches the ordered region, it +- checks to see if it is the one at the head of the queue. If not, it +- blocks on its RELEASE semaphore. */ +- unsigned *ordered_team_ids; ++ union { ++ /* This is a circular queue that details which threads will be allowed ++ into the ordered region and in which order. When a thread allocates ++ iterations on which it is going to work, it also registers itself at ++ the end of the array. When a thread reaches the ordered region, it ++ checks to see if it is the one at the head of the queue. If not, it ++ blocks on its RELEASE semaphore. */ ++ unsigned *ordered_team_ids; ++ ++ /* This is a pointer to DOACROSS work share data. */ ++ struct gomp_doacross_work_share *doacross; ++ }; + + /* This is the number of threads that have registered themselves in + the circular queue ordered_team_ids. */ +@@ -230,7 +334,7 @@ struct gomp_task_icv + { + unsigned long nthreads_var; + enum gomp_schedule_type run_sched_var; +- int run_sched_modifier; ++ int run_sched_chunk_size; + int default_device_var; + unsigned int thread_limit_var; + bool dyn_var; +@@ -246,6 +350,7 @@ extern gomp_mutex_t gomp_managed_threads + #endif + extern unsigned long gomp_max_active_levels_var; + extern bool gomp_cancel_var; ++extern int gomp_max_task_priority_var; + extern unsigned long long gomp_spin_count_var, gomp_throttled_spin_count_var; + extern unsigned long gomp_available_cpus, gomp_managed_threads; + extern unsigned long *gomp_nthreads_var_list, gomp_nthreads_var_list_len; +@@ -253,25 +358,36 @@ extern char *gomp_bind_var_list; + extern unsigned long gomp_bind_var_list_len; + extern void **gomp_places_list; + extern unsigned long gomp_places_list_len; ++extern int gomp_debug_var; ++extern int goacc_device_num; ++extern char *goacc_device_type; + + enum gomp_task_kind + { ++ /* Implicit task. */ + GOMP_TASK_IMPLICIT, +- GOMP_TASK_IFFALSE, ++ /* Undeferred task. */ ++ GOMP_TASK_UNDEFERRED, ++ /* Task created by GOMP_task and waiting to be run. */ + GOMP_TASK_WAITING, +- GOMP_TASK_TIED ++ /* Task currently executing or scheduled and about to execute. */ ++ GOMP_TASK_TIED, ++ /* Used for target tasks that have vars mapped and async run started, ++ but not yet completed. Once that completes, they will be readded ++ into the queues as GOMP_TASK_WAITING in order to perform the var ++ unmapping. */ ++ GOMP_TASK_ASYNC_RUNNING + }; + +-struct gomp_task; +-struct gomp_taskgroup; +-struct htab; +- + struct gomp_task_depend_entry + { ++ /* Address of dependency. */ + void *addr; + struct gomp_task_depend_entry *next; + struct gomp_task_depend_entry *prev; ++ /* Task that provides the dependency in ADDR. */ + struct gomp_task *task; ++ /* Depend entry is of type "IN". */ + bool is_in; + bool redundant; + bool redundant_out; +@@ -290,8 +406,8 @@ struct gomp_taskwait + { + bool in_taskwait; + bool in_depend_wait; ++ /* Number of tasks we are waiting for. */ + size_t n_depend; +- struct gomp_task *last_parent_depends_on; + gomp_sem_t taskwait_sem; + }; + +@@ -299,20 +415,31 @@ struct gomp_taskwait + + struct gomp_task + { ++ /* Parent of this task. */ + struct gomp_task *parent; +- struct gomp_task *children; +- struct gomp_task *next_child; +- struct gomp_task *prev_child; +- struct gomp_task *next_queue; +- struct gomp_task *prev_queue; +- struct gomp_task *next_taskgroup; +- struct gomp_task *prev_taskgroup; ++ /* Children of this task. */ ++ struct priority_queue children_queue; ++ /* Taskgroup this task belongs in. */ + struct gomp_taskgroup *taskgroup; ++ /* Tasks that depend on this task. */ + struct gomp_dependers_vec *dependers; + struct htab *depend_hash; + struct gomp_taskwait *taskwait; ++ /* Number of items in DEPEND. */ + size_t depend_count; ++ /* Number of tasks this task depends on. Once this counter reaches ++ 0, we have no unsatisfied dependencies, and this task can be put ++ into the various queues to be scheduled. */ + size_t num_dependees; ++ ++ /* Priority of this task. */ ++ int priority; ++ /* The priority node for this task in each of the different queues. ++ We put this here to avoid allocating space for each priority ++ node. Then we play offsetof() games to convert between pnode[] ++ entries and the gomp_task in which they reside. */ ++ struct priority_node pnode[3]; ++ + struct gomp_task_icv icv; + void (*fn) (void *); + void *fn_data; +@@ -320,20 +447,58 @@ struct gomp_task + bool in_tied_task; + bool final_task; + bool copy_ctors_done; ++ /* Set for undeferred tasks with unsatisfied dependencies which ++ block further execution of their parent until the dependencies ++ are satisfied. */ + bool parent_depends_on; ++ /* Dependencies provided and/or needed for this task. DEPEND_COUNT ++ is the number of items available. */ + struct gomp_task_depend_entry depend[]; + }; + ++/* This structure describes a single #pragma omp taskgroup. */ ++ + struct gomp_taskgroup + { + struct gomp_taskgroup *prev; +- struct gomp_task *children; ++ /* Queue of tasks that belong in this taskgroup. */ ++ struct priority_queue taskgroup_queue; + bool in_taskgroup_wait; + bool cancelled; + gomp_sem_t taskgroup_sem; + size_t num_children; + }; + ++/* Various state of OpenMP async offloading tasks. */ ++enum gomp_target_task_state ++{ ++ GOMP_TARGET_TASK_DATA, ++ GOMP_TARGET_TASK_BEFORE_MAP, ++ GOMP_TARGET_TASK_FALLBACK, ++ GOMP_TARGET_TASK_READY_TO_RUN, ++ GOMP_TARGET_TASK_RUNNING, ++ GOMP_TARGET_TASK_FINISHED ++}; ++ ++/* This structure describes a target task. */ ++ ++struct gomp_target_task ++{ ++ struct gomp_device_descr *devicep; ++ void (*fn) (void *); ++ size_t mapnum; ++ size_t *sizes; ++ unsigned short *kinds; ++ unsigned int flags; ++ enum gomp_target_task_state state; ++ struct target_mem_desc *tgt; ++ struct gomp_task *task; ++ struct gomp_team *team; ++ /* Device-specific target arguments. */ ++ void **args; ++ void *hostaddrs[]; ++}; ++ + /* This structure describes a "team" of threads. These are the threads + that are spawned by a PARALLEL constructs, as well as the work sharing + constructs that the team encounters. */ +@@ -396,7 +561,8 @@ struct gomp_team + struct gomp_work_share work_shares[8]; + + gomp_mutex_t task_lock; +- struct gomp_task *task_queue; ++ /* Scheduled tasks. */ ++ struct priority_queue task_queue; + /* Number of all GOMP_TASK_{WAITING,TIED} tasks in the team. */ + unsigned int task_count; + /* Number of GOMP_TASK_WAITING tasks currently waiting to be scheduled. */ +@@ -451,6 +617,9 @@ struct gomp_thread_pool + struct gomp_thread **threads; + unsigned threads_size; + unsigned threads_used; ++ /* The last team is used for non-nested teams to delay their destruction to ++ make sure all the threads in the team move on to the pool's barrier before ++ the team's barrier is destroyed. */ + struct gomp_team *last_team; + /* Number of threads running in this contention group. */ + unsigned long threads_busy; +@@ -519,23 +688,7 @@ extern bool gomp_affinity_same_place (vo + extern bool gomp_affinity_finalize_place_list (bool); + extern bool gomp_affinity_init_level (int, unsigned long, bool); + extern void gomp_affinity_print_place (void *); +- +-/* alloc.c */ +- +-extern void *gomp_malloc (size_t) __attribute__((malloc)); +-extern void *gomp_malloc_cleared (size_t) __attribute__((malloc)); +-extern void *gomp_realloc (void *, size_t); +- +-/* Avoid conflicting prototypes of alloca() in system headers by using +- GCC's builtin alloca(). */ +-#define gomp_alloca(x) __builtin_alloca(x) +- +-/* error.c */ +- +-extern void gomp_error (const char *, ...) +- __attribute__((format (printf, 1, 2))); +-extern void gomp_fatal (const char *, ...) +- __attribute__((noreturn, format (printf, 1, 2))); ++extern void gomp_get_place_proc_ids_8 (int, int64_t *); + + /* iter.c */ + +@@ -572,6 +725,9 @@ extern void gomp_ordered_next (void); + extern void gomp_ordered_static_init (void); + extern void gomp_ordered_static_next (void); + extern void gomp_ordered_sync (void); ++extern void gomp_doacross_init (unsigned, long *, long); ++extern void gomp_doacross_ull_init (unsigned, unsigned long long *, ++ unsigned long long); + + /* parallel.c */ + +@@ -588,6 +744,12 @@ extern void gomp_init_task (struct gomp_ + struct gomp_task_icv *); + extern void gomp_end_task (void); + extern void gomp_barrier_handle_tasks (gomp_barrier_state_t); ++extern void gomp_task_maybe_wait_for_dependencies (void **); ++extern bool gomp_create_target_task (struct gomp_device_descr *, ++ void (*) (void *), size_t, void **, ++ size_t *, unsigned short *, unsigned int, ++ void **, void **, ++ enum gomp_target_task_state); + + static void inline + gomp_finish_task (struct gomp_task *task) +@@ -606,7 +768,213 @@ extern void gomp_free_thread (void *); + + /* target.c */ + ++extern void gomp_init_targets_once (void); + extern int gomp_get_num_devices (void); ++extern bool gomp_target_task_fn (void *); ++ ++/* Splay tree definitions. */ ++typedef struct splay_tree_node_s *splay_tree_node; ++typedef struct splay_tree_s *splay_tree; ++typedef struct splay_tree_key_s *splay_tree_key; ++ ++struct target_var_desc { ++ /* Splay key. */ ++ splay_tree_key key; ++ /* True if data should be copied from device to host at the end. */ ++ bool copy_from; ++ /* True if data always should be copied from device to host at the end. */ ++ bool always_copy_from; ++ /* Relative offset against key host_start. */ ++ uintptr_t offset; ++ /* Actual length. */ ++ uintptr_t length; ++}; ++ ++struct target_mem_desc { ++ /* Reference count. */ ++ uintptr_t refcount; ++ /* All the splay nodes allocated together. */ ++ splay_tree_node array; ++ /* Start of the target region. */ ++ uintptr_t tgt_start; ++ /* End of the targer region. */ ++ uintptr_t tgt_end; ++ /* Handle to free. */ ++ void *to_free; ++ /* Previous target_mem_desc. */ ++ struct target_mem_desc *prev; ++ /* Number of items in following list. */ ++ size_t list_count; ++ ++ /* Corresponding target device descriptor. */ ++ struct gomp_device_descr *device_descr; ++ ++ /* List of target items to remove (or decrease refcount) ++ at the end of region. */ ++ struct target_var_desc list[]; ++}; ++ ++/* Special value for refcount - infinity. */ ++#define REFCOUNT_INFINITY (~(uintptr_t) 0) ++/* Special value for refcount - tgt_offset contains target address of the ++ artificial pointer to "omp declare target link" object. */ ++#define REFCOUNT_LINK (~(uintptr_t) 1) ++ ++struct splay_tree_key_s { ++ /* Address of the host object. */ ++ uintptr_t host_start; ++ /* Address immediately after the host object. */ ++ uintptr_t host_end; ++ /* Descriptor of the target memory. */ ++ struct target_mem_desc *tgt; ++ /* Offset from tgt->tgt_start to the start of the target object. */ ++ uintptr_t tgt_offset; ++ /* Reference count. */ ++ uintptr_t refcount; ++ /* Pointer to the original mapping of "omp declare target link" object. */ ++ splay_tree_key link_key; ++}; ++ ++/* The comparison function. */ ++ ++static inline int ++splay_compare (splay_tree_key x, splay_tree_key y) ++{ ++ if (x->host_start == x->host_end ++ && y->host_start == y->host_end) ++ return 0; ++ if (x->host_end <= y->host_start) ++ return -1; ++ if (x->host_start >= y->host_end) ++ return 1; ++ return 0; ++} ++ ++#include "splay-tree.h" ++ ++typedef struct acc_dispatch_t ++{ ++ /* This is a linked list of data mapped using the ++ acc_map_data/acc_unmap_data or "acc enter data"/"acc exit data" pragmas. ++ Unlike mapped_data in the goacc_thread struct, unmapping can ++ happen out-of-order with respect to mapping. */ ++ /* This is guarded by the lock in the "outer" struct gomp_device_descr. */ ++ struct target_mem_desc *data_environ; ++ ++ /* Execute. */ ++ void (*exec_func) (void (*) (void *), size_t, void **, void **, int, ++ unsigned *, void *); ++ ++ /* Async cleanup callback registration. */ ++ void (*register_async_cleanup_func) (void *, int); ++ ++ /* Asynchronous routines. */ ++ int (*async_test_func) (int); ++ int (*async_test_all_func) (void); ++ void (*async_wait_func) (int); ++ void (*async_wait_async_func) (int, int); ++ void (*async_wait_all_func) (void); ++ void (*async_wait_all_async_func) (int); ++ void (*async_set_async_func) (int); ++ ++ /* Create/destroy TLS data. */ ++ void *(*create_thread_data_func) (int); ++ void (*destroy_thread_data_func) (void *); ++ ++ /* NVIDIA target specific routines. */ ++ struct { ++ void *(*get_current_device_func) (void); ++ void *(*get_current_context_func) (void); ++ void *(*get_stream_func) (int); ++ int (*set_stream_func) (int, void *); ++ } cuda; ++} acc_dispatch_t; ++ ++/* Various state of the accelerator device. */ ++enum gomp_device_state ++{ ++ GOMP_DEVICE_UNINITIALIZED, ++ GOMP_DEVICE_INITIALIZED, ++ GOMP_DEVICE_FINALIZED ++}; ++ ++/* This structure describes accelerator device. ++ It contains name of the corresponding libgomp plugin, function handlers for ++ interaction with the device, ID-number of the device, and information about ++ mapped memory. */ ++struct gomp_device_descr ++{ ++ /* Immutable data, which is only set during initialization, and which is not ++ guarded by the lock. */ ++ ++ /* The name of the device. */ ++ const char *name; ++ ++ /* Capabilities of device (supports OpenACC, OpenMP). */ ++ unsigned int capabilities; ++ ++ /* This is the ID number of device among devices of the same type. */ ++ int target_id; ++ ++ /* This is the TYPE of device. */ ++ enum offload_target_type type; ++ ++ /* Function handlers. */ ++ const char *(*get_name_func) (void); ++ unsigned int (*get_caps_func) (void); ++ int (*get_type_func) (void); ++ int (*get_num_devices_func) (void); ++ bool (*init_device_func) (int); ++ bool (*fini_device_func) (int); ++ unsigned (*version_func) (void); ++ int (*load_image_func) (int, unsigned, const void *, struct addr_pair **); ++ bool (*unload_image_func) (int, unsigned, const void *); ++ void *(*alloc_func) (int, size_t); ++ bool (*free_func) (int, void *); ++ bool (*dev2host_func) (int, void *, const void *, size_t); ++ bool (*host2dev_func) (int, void *, const void *, size_t); ++ bool (*dev2dev_func) (int, void *, const void *, size_t); ++ bool (*can_run_func) (void *); ++ void (*run_func) (int, void *, void *, void **); ++ void (*async_run_func) (int, void *, void *, void **, void *); ++ ++ /* Splay tree containing information about mapped memory regions. */ ++ struct splay_tree_s mem_map; ++ ++ /* Mutex for the mutable data. */ ++ gomp_mutex_t lock; ++ ++ /* Current state of the device. OpenACC allows to move from INITIALIZED state ++ back to UNINITIALIZED state. OpenMP allows only to move from INITIALIZED ++ to FINALIZED state (at program shutdown). */ ++ enum gomp_device_state state; ++ ++ /* OpenACC-specific data and functions. */ ++ /* This is mutable because of its mutable data_environ and target_data ++ members. */ ++ acc_dispatch_t openacc; ++}; ++ ++/* Kind of the pragma, for which gomp_map_vars () is called. */ ++enum gomp_map_vars_kind ++{ ++ GOMP_MAP_VARS_OPENACC, ++ GOMP_MAP_VARS_TARGET, ++ GOMP_MAP_VARS_DATA, ++ GOMP_MAP_VARS_ENTER_DATA ++}; ++ ++extern void gomp_acc_insert_pointer (size_t, void **, size_t *, void *); ++extern void gomp_acc_remove_pointer (void *, bool, int, int); ++ ++extern struct target_mem_desc *gomp_map_vars (struct gomp_device_descr *, ++ size_t, void **, void **, ++ size_t *, void *, bool, ++ enum gomp_map_vars_kind); ++extern void gomp_unmap_vars (struct target_mem_desc *, bool); ++extern void gomp_init_device (struct gomp_device_descr *); ++extern void gomp_free_memmap (struct splay_tree_s *); ++extern void gomp_unload_device (struct gomp_device_descr *); + + /* work.c */ + +@@ -646,8 +1014,28 @@ typedef enum omp_proc_bind_t + omp_proc_bind_spread = 4 + } omp_proc_bind_t; + ++typedef enum omp_lock_hint_t ++{ ++ omp_lock_hint_none = 0, ++ omp_lock_hint_uncontended = 1, ++ omp_lock_hint_contended = 2, ++ omp_lock_hint_nonspeculative = 4, ++ omp_lock_hint_speculative = 8, ++} omp_lock_hint_t; ++ ++extern void omp_init_lock_with_hint (omp_lock_t *, omp_lock_hint_t) ++ __GOMP_NOTHROW; ++extern void omp_init_nest_lock_with_hint (omp_lock_t *, omp_lock_hint_t) ++ __GOMP_NOTHROW; ++ + extern int omp_get_cancellation (void) __GOMP_NOTHROW; + extern omp_proc_bind_t omp_get_proc_bind (void) __GOMP_NOTHROW; ++extern int omp_get_num_places (void) __GOMP_NOTHROW; ++extern int omp_get_place_num_procs (int) __GOMP_NOTHROW; ++extern void omp_get_place_proc_ids (int, int *) __GOMP_NOTHROW; ++extern int omp_get_place_num (void) __GOMP_NOTHROW; ++extern int omp_get_partition_num_places (void) __GOMP_NOTHROW; ++extern void omp_get_partition_place_nums (int *) __GOMP_NOTHROW; + + extern void omp_set_default_device (int) __GOMP_NOTHROW; + extern int omp_get_default_device (void) __GOMP_NOTHROW; +@@ -656,6 +1044,24 @@ extern int omp_get_num_teams (void) __GO + extern int omp_get_team_num (void) __GOMP_NOTHROW; + + extern int omp_is_initial_device (void) __GOMP_NOTHROW; ++extern int omp_get_initial_device (void) __GOMP_NOTHROW; ++extern int omp_get_max_task_priority (void) __GOMP_NOTHROW; ++ ++extern void *omp_target_alloc (__SIZE_TYPE__, int) __GOMP_NOTHROW; ++extern void omp_target_free (void *, int) __GOMP_NOTHROW; ++extern int omp_target_is_present (void *, int) __GOMP_NOTHROW; ++extern int omp_target_memcpy (void *, void *, __SIZE_TYPE__, __SIZE_TYPE__, ++ __SIZE_TYPE__, int, int) __GOMP_NOTHROW; ++extern int omp_target_memcpy_rect (void *, void *, __SIZE_TYPE__, int, ++ const __SIZE_TYPE__ *, ++ const __SIZE_TYPE__ *, ++ const __SIZE_TYPE__ *, ++ const __SIZE_TYPE__ *, ++ const __SIZE_TYPE__ *, int, int) ++ __GOMP_NOTHROW; ++extern int omp_target_associate_ptr (void *, void *, __SIZE_TYPE__, ++ __SIZE_TYPE__, int) __GOMP_NOTHROW; ++extern int omp_target_disassociate_ptr (void *, int) __GOMP_NOTHROW; + + #if !defined (HAVE_ATTRIBUTE_VISIBILITY) \ + || !defined (HAVE_ATTRIBUTE_ALIAS) \ +@@ -728,4 +1134,34 @@ extern int gomp_test_nest_lock_25 (omp_n + # define ialias_call(fn) fn + #endif + ++/* Helper function for priority_node_to_task() and ++ task_to_priority_node(). ++ ++ Return the offset from a task to its priority_node entry. The ++ priority_node entry is has a type of TYPE. */ ++ ++static inline size_t ++priority_queue_offset (enum priority_queue_type type) ++{ ++ return offsetof (struct gomp_task, pnode[(int) type]); ++} ++ ++/* Return the task associated with a priority NODE of type TYPE. */ ++ ++static inline struct gomp_task * ++priority_node_to_task (enum priority_queue_type type, ++ struct priority_node *node) ++{ ++ return (struct gomp_task *) ((char *) node - priority_queue_offset (type)); ++} ++ ++/* Return the priority node of type TYPE for a given TASK. */ ++ ++static inline struct priority_node * ++task_to_priority_node (enum priority_queue_type type, ++ struct gomp_task *task) ++{ ++ return (struct priority_node *) ((char *) task ++ + priority_queue_offset (type)); ++} + #endif /* LIBGOMP_H */ +--- libgomp/env.c.jj 2014-05-15 10:56:32.420522486 +0200 ++++ libgomp/env.c 2016-07-13 16:57:04.437535335 +0200 +@@ -27,6 +27,8 @@ + + #include "libgomp.h" + #include "libgomp_f.h" ++#include "oacc-int.h" ++#include "gomp-constants.h" + #include + #include + #include +@@ -56,7 +58,7 @@ struct gomp_task_icv gomp_global_icv = { + .nthreads_var = 1, + .thread_limit_var = UINT_MAX, + .run_sched_var = GFS_DYNAMIC, +- .run_sched_modifier = 1, ++ .run_sched_chunk_size = 1, + .default_device_var = 0, + .dyn_var = false, + .nest_var = false, +@@ -66,6 +68,7 @@ struct gomp_task_icv gomp_global_icv = { + + unsigned long gomp_max_active_levels_var = INT_MAX; + bool gomp_cancel_var = false; ++int gomp_max_task_priority_var = 0; + #ifndef HAVE_SYNC_BUILTINS + gomp_mutex_t gomp_managed_threads_lock; + #endif +@@ -76,6 +79,9 @@ char *gomp_bind_var_list; + unsigned long gomp_bind_var_list_len; + void **gomp_places_list; + unsigned long gomp_places_list_len; ++int gomp_debug_var; ++char *goacc_device_type; ++int goacc_device_num; + + /* Parse the OMP_SCHEDULE environment variable. */ + +@@ -118,7 +124,7 @@ parse_schedule (void) + ++env; + if (*env == '\0') + { +- gomp_global_icv.run_sched_modifier ++ gomp_global_icv.run_sched_chunk_size + = gomp_global_icv.run_sched_var != GFS_STATIC; + return; + } +@@ -144,7 +150,7 @@ parse_schedule (void) + + if (value == 0 && gomp_global_icv.run_sched_var != GFS_STATIC) + value = 1; +- gomp_global_icv.run_sched_modifier = value; ++ gomp_global_icv.run_sched_chunk_size = value; + return; + + unknown: +@@ -1011,6 +1017,16 @@ parse_affinity (bool ignore) + return false; + } + ++static void ++parse_acc_device_type (void) ++{ ++ const char *env = getenv ("ACC_DEVICE_TYPE"); ++ ++ if (env && *env != '\0') ++ goacc_device_type = strdup (env); ++ else ++ goacc_device_type = NULL; ++} + + static void + handle_omp_display_env (unsigned long stacksize, int wait_policy) +@@ -1054,7 +1070,7 @@ handle_omp_display_env (unsigned long st + + fputs ("\nOPENMP DISPLAY ENVIRONMENT BEGIN\n", stderr); + +- fputs (" _OPENMP = '201307'\n", stderr); ++ fputs (" _OPENMP = '201511'\n", stderr); + fprintf (stderr, " OMP_DYNAMIC = '%s'\n", + gomp_global_icv.dyn_var ? "TRUE" : "FALSE"); + fprintf (stderr, " OMP_NESTED = '%s'\n", +@@ -1142,6 +1158,8 @@ handle_omp_display_env (unsigned long st + gomp_cancel_var ? "TRUE" : "FALSE"); + fprintf (stderr, " OMP_DEFAULT_DEVICE = '%d'\n", + gomp_global_icv.default_device_var); ++ fprintf (stderr, " OMP_MAX_TASK_PRIORITY = '%d'\n", ++ gomp_max_task_priority_var); + + if (verbose) + { +@@ -1174,6 +1192,7 @@ initialize_env (void) + parse_boolean ("OMP_NESTED", &gomp_global_icv.nest_var); + parse_boolean ("OMP_CANCELLATION", &gomp_cancel_var); + parse_int ("OMP_DEFAULT_DEVICE", &gomp_global_icv.default_device_var, true); ++ parse_int ("OMP_MAX_TASK_PRIORITY", &gomp_max_task_priority_var, true); + parse_unsigned_long ("OMP_MAX_ACTIVE_LEVELS", &gomp_max_active_levels_var, + true); + if (parse_unsigned_long ("OMP_THREAD_LIMIT", &thread_limit_var, false)) +@@ -1181,6 +1200,7 @@ initialize_env (void) + gomp_global_icv.thread_limit_var + = thread_limit_var > INT_MAX ? UINT_MAX : thread_limit_var; + } ++ parse_int ("GOMP_DEBUG", &gomp_debug_var, true); + #ifndef HAVE_SYNC_BUILTINS + gomp_mutex_init (&gomp_managed_threads_lock); + #endif +@@ -1271,6 +1291,15 @@ initialize_env (void) + } + + handle_omp_display_env (stacksize, wait_policy); ++ ++ /* OpenACC. */ ++ ++ if (!parse_int ("ACC_DEVICE_NUM", &goacc_device_num, true)) ++ goacc_device_num = 0; ++ ++ parse_acc_device_type (); ++ ++ goacc_runtime_initialize (); + } + + +@@ -1312,21 +1341,21 @@ omp_get_nested (void) + } + + void +-omp_set_schedule (omp_sched_t kind, int modifier) ++omp_set_schedule (omp_sched_t kind, int chunk_size) + { + struct gomp_task_icv *icv = gomp_icv (true); + switch (kind) + { + case omp_sched_static: +- if (modifier < 1) +- modifier = 0; +- icv->run_sched_modifier = modifier; ++ if (chunk_size < 1) ++ chunk_size = 0; ++ icv->run_sched_chunk_size = chunk_size; + break; + case omp_sched_dynamic: + case omp_sched_guided: +- if (modifier < 1) +- modifier = 1; +- icv->run_sched_modifier = modifier; ++ if (chunk_size < 1) ++ chunk_size = 1; ++ icv->run_sched_chunk_size = chunk_size; + break; + case omp_sched_auto: + break; +@@ -1337,11 +1366,11 @@ omp_set_schedule (omp_sched_t kind, int + } + + void +-omp_get_schedule (omp_sched_t *kind, int *modifier) ++omp_get_schedule (omp_sched_t *kind, int *chunk_size) + { + struct gomp_task_icv *icv = gomp_icv (false); + *kind = icv->run_sched_var; +- *modifier = icv->run_sched_modifier; ++ *chunk_size = icv->run_sched_chunk_size; + } + + int +@@ -1377,6 +1406,12 @@ omp_get_cancellation (void) + return gomp_cancel_var; + } + ++int ++omp_get_max_task_priority (void) ++{ ++ return gomp_max_task_priority_var; ++} ++ + omp_proc_bind_t + omp_get_proc_bind (void) + { +@@ -1425,6 +1460,59 @@ omp_is_initial_device (void) + return 1; + } + ++int ++omp_get_initial_device (void) ++{ ++ return GOMP_DEVICE_HOST_FALLBACK; ++} ++ ++int ++omp_get_num_places (void) ++{ ++ return gomp_places_list_len; ++} ++ ++int ++omp_get_place_num (void) ++{ ++ if (gomp_places_list == NULL) ++ return -1; ++ ++ struct gomp_thread *thr = gomp_thread (); ++ if (thr->place == 0) ++ gomp_init_affinity (); ++ ++ return (int) thr->place - 1; ++} ++ ++int ++omp_get_partition_num_places (void) ++{ ++ if (gomp_places_list == NULL) ++ return 0; ++ ++ struct gomp_thread *thr = gomp_thread (); ++ if (thr->place == 0) ++ gomp_init_affinity (); ++ ++ return thr->ts.place_partition_len; ++} ++ ++void ++omp_get_partition_place_nums (int *place_nums) ++{ ++ if (gomp_places_list == NULL) ++ return; ++ ++ struct gomp_thread *thr = gomp_thread (); ++ if (thr->place == 0) ++ gomp_init_affinity (); ++ ++ unsigned int i; ++ for (i = 0; i < thr->ts.place_partition_len; i++) ++ *place_nums++ = thr->ts.place_partition_off + i; ++} ++ + ialias (omp_set_dynamic) + ialias (omp_set_nested) + ialias (omp_set_num_threads) +@@ -1444,3 +1532,9 @@ ialias (omp_get_num_devices) + ialias (omp_get_num_teams) + ialias (omp_get_team_num) + ialias (omp_is_initial_device) ++ialias (omp_get_initial_device) ++ialias (omp_get_max_task_priority) ++ialias (omp_get_num_places) ++ialias (omp_get_place_num) ++ialias (omp_get_partition_num_places) ++ialias (omp_get_partition_place_nums) +--- libgomp/openacc.h.jj 2016-07-13 16:57:04.432535397 +0200 ++++ libgomp/openacc.h 2016-07-13 16:57:04.432535397 +0200 +@@ -0,0 +1,131 @@ ++/* OpenACC Runtime Library User-facing Declarations ++ ++ Copyright (C) 2013-2016 Free Software Foundation, Inc. ++ ++ Contributed by Mentor Embedded. ++ ++ This file is part of the GNU Offloading and Multi Processing Library ++ (libgomp). ++ ++ Libgomp is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY ++ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS ++ FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ . */ ++ ++#ifndef _OPENACC_H ++#define _OPENACC_H 1 ++ ++/* The OpenACC standard is silent on whether or not including ++ might or must not include other header files. We chose to include ++ some. */ ++#include ++ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++#if __cplusplus >= 201103 ++# define __GOACC_NOTHROW noexcept ++#elif __cplusplus ++# define __GOACC_NOTHROW throw () ++#else /* Not C++ */ ++# define __GOACC_NOTHROW __attribute__ ((__nothrow__)) ++#endif ++ ++/* Types */ ++typedef enum acc_device_t { ++ /* Keep in sync with include/gomp-constants.h. */ ++ acc_device_none = 0, ++ acc_device_default = 1, ++ acc_device_host = 2, ++ /* acc_device_host_nonshm = 3 removed. */ ++ acc_device_not_host = 4, ++ acc_device_nvidia = 5, ++ _ACC_device_hwm, ++ /* Ensure enumeration is layout compatible with int. */ ++ _ACC_highest = __INT_MAX__, ++ _ACC_neg = -1 ++} acc_device_t; ++ ++typedef enum acc_async_t { ++ /* Keep in sync with include/gomp-constants.h. */ ++ acc_async_noval = -1, ++ acc_async_sync = -2 ++} acc_async_t; ++ ++int acc_get_num_devices (acc_device_t) __GOACC_NOTHROW; ++void acc_set_device_type (acc_device_t) __GOACC_NOTHROW; ++acc_device_t acc_get_device_type (void) __GOACC_NOTHROW; ++void acc_set_device_num (int, acc_device_t) __GOACC_NOTHROW; ++int acc_get_device_num (acc_device_t) __GOACC_NOTHROW; ++int acc_async_test (int) __GOACC_NOTHROW; ++int acc_async_test_all (void) __GOACC_NOTHROW; ++void acc_wait (int) __GOACC_NOTHROW; ++void acc_wait_async (int, int) __GOACC_NOTHROW; ++void acc_wait_all (void) __GOACC_NOTHROW; ++void acc_wait_all_async (int) __GOACC_NOTHROW; ++void acc_init (acc_device_t) __GOACC_NOTHROW; ++void acc_shutdown (acc_device_t) __GOACC_NOTHROW; ++#ifdef __cplusplus ++int acc_on_device (int __arg) __GOACC_NOTHROW; ++#else ++int acc_on_device (acc_device_t __arg) __GOACC_NOTHROW; ++#endif ++void *acc_malloc (size_t) __GOACC_NOTHROW; ++void acc_free (void *) __GOACC_NOTHROW; ++/* Some of these would be more correct with const qualifiers, but ++ the standard specifies otherwise. */ ++void *acc_copyin (void *, size_t) __GOACC_NOTHROW; ++void *acc_present_or_copyin (void *, size_t) __GOACC_NOTHROW; ++void *acc_create (void *, size_t) __GOACC_NOTHROW; ++void *acc_present_or_create (void *, size_t) __GOACC_NOTHROW; ++void acc_copyout (void *, size_t) __GOACC_NOTHROW; ++void acc_delete (void *, size_t) __GOACC_NOTHROW; ++void acc_update_device (void *, size_t) __GOACC_NOTHROW; ++void acc_update_self (void *, size_t) __GOACC_NOTHROW; ++void acc_map_data (void *, void *, size_t) __GOACC_NOTHROW; ++void acc_unmap_data (void *) __GOACC_NOTHROW; ++void *acc_deviceptr (void *) __GOACC_NOTHROW; ++void *acc_hostptr (void *) __GOACC_NOTHROW; ++int acc_is_present (void *, size_t) __GOACC_NOTHROW; ++void acc_memcpy_to_device (void *, void *, size_t) __GOACC_NOTHROW; ++void acc_memcpy_from_device (void *, void *, size_t) __GOACC_NOTHROW; ++ ++/* Old names. OpenACC does not specify whether these can or must ++ not be macros, inlines or aliases for the new names. */ ++#define acc_pcreate acc_present_or_create ++#define acc_pcopyin acc_present_or_copyin ++ ++/* CUDA-specific routines. */ ++void *acc_get_current_cuda_device (void) __GOACC_NOTHROW; ++void *acc_get_current_cuda_context (void) __GOACC_NOTHROW; ++void *acc_get_cuda_stream (int) __GOACC_NOTHROW; ++int acc_set_cuda_stream (int, void *) __GOACC_NOTHROW; ++ ++#ifdef __cplusplus ++} ++ ++/* Forwarding function with correctly typed arg. */ ++ ++#pragma acc routine seq ++inline int acc_on_device (acc_device_t __arg) __GOACC_NOTHROW ++{ ++ return acc_on_device ((int) __arg); ++} ++#endif ++ ++#endif /* _OPENACC_H */ +--- libgomp/config/linux/doacross.h.jj 2016-07-13 16:57:18.902355979 +0200 ++++ libgomp/config/linux/doacross.h 2016-07-13 16:57:18.902355979 +0200 +@@ -0,0 +1,57 @@ ++/* Copyright (C) 2015-2016 Free Software Foundation, Inc. ++ Contributed by Jakub Jelinek . ++ ++ This file is part of the GNU Offloading and Multi Processing Library ++ (libgomp). ++ ++ Libgomp is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY ++ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS ++ FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ . */ ++ ++/* This is a Linux specific implementation of doacross spinning. */ ++ ++#ifndef GOMP_DOACROSS_H ++#define GOMP_DOACROSS_H 1 ++ ++#include "libgomp.h" ++#include ++#include "wait.h" ++ ++#ifdef HAVE_ATTRIBUTE_VISIBILITY ++# pragma GCC visibility push(hidden) ++#endif ++ ++static inline void doacross_spin (unsigned long *addr, unsigned long expected, ++ unsigned long cur) ++{ ++ /* FIXME: back off depending on how large expected - cur is. */ ++ do ++ { ++ cpu_relax (); ++ cur = __atomic_load_n (addr, MEMMODEL_RELAXED); ++ if (expected < cur) ++ return; ++ } ++ while (1); ++} ++ ++#ifdef HAVE_ATTRIBUTE_VISIBILITY ++# pragma GCC visibility pop ++#endif ++ ++#endif /* GOMP_DOACROSS_H */ +--- libgomp/config/posix/doacross.h.jj 2016-07-13 16:57:18.903355966 +0200 ++++ libgomp/config/posix/doacross.h 2016-07-13 16:57:18.903355966 +0200 +@@ -0,0 +1,62 @@ ++/* Copyright (C) 2015-2016 Free Software Foundation, Inc. ++ Contributed by Jakub Jelinek . ++ ++ This file is part of the GNU Offloading and Multi Processing Library ++ (libgomp). ++ ++ Libgomp is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY ++ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS ++ FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ . */ ++ ++/* This is a generic implementation of doacross spinning. */ ++ ++#ifndef GOMP_DOACROSS_H ++#define GOMP_DOACROSS_H 1 ++ ++#include "libgomp.h" ++#include ++ ++#ifdef HAVE_ATTRIBUTE_VISIBILITY ++# pragma GCC visibility push(hidden) ++#endif ++ ++static inline void ++cpu_relax (void) ++{ ++ __asm volatile ("" : : : "memory"); ++} ++ ++static inline void doacross_spin (unsigned long *addr, unsigned long expected, ++ unsigned long cur) ++{ ++ /* FIXME: back off depending on how large expected - cur is. */ ++ do ++ { ++ cpu_relax (); ++ cur = __atomic_load_n (addr, MEMMODEL_RELAXED); ++ if (expected < cur) ++ return; ++ } ++ while (1); ++} ++ ++#ifdef HAVE_ATTRIBUTE_VISIBILITY ++# pragma GCC visibility pop ++#endif ++ ++#endif /* GOMP_DOACROSS_H */ +--- libgomp/splay-tree.c.jj 2016-07-13 16:57:18.919355768 +0200 ++++ libgomp/splay-tree.c 2016-07-13 16:57:18.919355768 +0200 +@@ -0,0 +1,238 @@ ++/* A splay-tree datatype. ++ Copyright (C) 1998-2016 Free Software Foundation, Inc. ++ Contributed by Mark Mitchell (mark@markmitchell.com). ++ ++ This file is part of the GNU Offloading and Multi Processing Library ++ (libgomp). ++ ++ Libgomp is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY ++ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS ++ FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ . */ ++ ++/* The splay tree code copied from include/splay-tree.h and adjusted, ++ so that all the data lives directly in splay_tree_node_s structure ++ and no extra allocations are needed. */ ++ ++/* For an easily readable description of splay-trees, see: ++ ++ Lewis, Harry R. and Denenberg, Larry. Data Structures and Their ++ Algorithms. Harper-Collins, Inc. 1991. ++ ++ The major feature of splay trees is that all basic tree operations ++ are amortized O(log n) time for a tree with n nodes. */ ++ ++#include "libgomp.h" ++ ++/* Rotate the edge joining the left child N with its parent P. PP is the ++ grandparents' pointer to P. */ ++ ++static inline void ++rotate_left (splay_tree_node *pp, splay_tree_node p, splay_tree_node n) ++{ ++ splay_tree_node tmp; ++ tmp = n->right; ++ n->right = p; ++ p->left = tmp; ++ *pp = n; ++} ++ ++/* Rotate the edge joining the right child N with its parent P. PP is the ++ grandparents' pointer to P. */ ++ ++static inline void ++rotate_right (splay_tree_node *pp, splay_tree_node p, splay_tree_node n) ++{ ++ splay_tree_node tmp; ++ tmp = n->left; ++ n->left = p; ++ p->right = tmp; ++ *pp = n; ++} ++ ++/* Bottom up splay of KEY. */ ++ ++static void ++splay_tree_splay (splay_tree sp, splay_tree_key key) ++{ ++ if (sp->root == NULL) ++ return; ++ ++ do { ++ int cmp1, cmp2; ++ splay_tree_node n, c; ++ ++ n = sp->root; ++ cmp1 = splay_compare (key, &n->key); ++ ++ /* Found. */ ++ if (cmp1 == 0) ++ return; ++ ++ /* Left or right? If no child, then we're done. */ ++ if (cmp1 < 0) ++ c = n->left; ++ else ++ c = n->right; ++ if (!c) ++ return; ++ ++ /* Next one left or right? If found or no child, we're done ++ after one rotation. */ ++ cmp2 = splay_compare (key, &c->key); ++ if (cmp2 == 0 ++ || (cmp2 < 0 && !c->left) ++ || (cmp2 > 0 && !c->right)) ++ { ++ if (cmp1 < 0) ++ rotate_left (&sp->root, n, c); ++ else ++ rotate_right (&sp->root, n, c); ++ return; ++ } ++ ++ /* Now we have the four cases of double-rotation. */ ++ if (cmp1 < 0 && cmp2 < 0) ++ { ++ rotate_left (&n->left, c, c->left); ++ rotate_left (&sp->root, n, n->left); ++ } ++ else if (cmp1 > 0 && cmp2 > 0) ++ { ++ rotate_right (&n->right, c, c->right); ++ rotate_right (&sp->root, n, n->right); ++ } ++ else if (cmp1 < 0 && cmp2 > 0) ++ { ++ rotate_right (&n->left, c, c->right); ++ rotate_left (&sp->root, n, n->left); ++ } ++ else if (cmp1 > 0 && cmp2 < 0) ++ { ++ rotate_left (&n->right, c, c->left); ++ rotate_right (&sp->root, n, n->right); ++ } ++ } while (1); ++} ++ ++/* Insert a new NODE into SP. The NODE shouldn't exist in the tree. */ ++ ++attribute_hidden void ++splay_tree_insert (splay_tree sp, splay_tree_node node) ++{ ++ int comparison = 0; ++ ++ splay_tree_splay (sp, &node->key); ++ ++ if (sp->root) ++ comparison = splay_compare (&sp->root->key, &node->key); ++ ++ if (sp->root && comparison == 0) ++ gomp_fatal ("Duplicate node"); ++ else ++ { ++ /* Insert it at the root. */ ++ if (sp->root == NULL) ++ node->left = node->right = NULL; ++ else if (comparison < 0) ++ { ++ node->left = sp->root; ++ node->right = node->left->right; ++ node->left->right = NULL; ++ } ++ else ++ { ++ node->right = sp->root; ++ node->left = node->right->left; ++ node->right->left = NULL; ++ } ++ ++ sp->root = node; ++ } ++} ++ ++/* Remove node with KEY from SP. It is not an error if it did not exist. */ ++ ++attribute_hidden void ++splay_tree_remove (splay_tree sp, splay_tree_key key) ++{ ++ splay_tree_splay (sp, key); ++ ++ if (sp->root && splay_compare (&sp->root->key, key) == 0) ++ { ++ splay_tree_node left, right; ++ ++ left = sp->root->left; ++ right = sp->root->right; ++ ++ /* One of the children is now the root. Doesn't matter much ++ which, so long as we preserve the properties of the tree. */ ++ if (left) ++ { ++ sp->root = left; ++ ++ /* If there was a right child as well, hang it off the ++ right-most leaf of the left child. */ ++ if (right) ++ { ++ while (left->right) ++ left = left->right; ++ left->right = right; ++ } ++ } ++ else ++ sp->root = right; ++ } ++} ++ ++/* Lookup KEY in SP, returning NODE if present, and NULL ++ otherwise. */ ++ ++attribute_hidden splay_tree_key ++splay_tree_lookup (splay_tree sp, splay_tree_key key) ++{ ++ splay_tree_splay (sp, key); ++ ++ if (sp->root && splay_compare (&sp->root->key, key) == 0) ++ return &sp->root->key; ++ else ++ return NULL; ++} ++ ++/* Helper function for splay_tree_foreach. ++ ++ Run FUNC on every node in KEY. */ ++ ++static void ++splay_tree_foreach_internal (splay_tree_node node, splay_tree_callback func, ++ void *data) ++{ ++ if (!node) ++ return; ++ func (&node->key, data); ++ splay_tree_foreach_internal (node->left, func, data); ++ /* Yeah, whatever. GCC can fix my tail recursion. */ ++ splay_tree_foreach_internal (node->right, func, data); ++} ++ ++/* Run FUNC on each of the nodes in SP. */ ++ ++attribute_hidden void ++splay_tree_foreach (splay_tree sp, splay_tree_callback func, void *data) ++{ ++ splay_tree_foreach_internal (sp->root, func, data); ++} +--- libgomp/libgomp-plugin.c.jj 2016-07-13 16:57:04.435535360 +0200 ++++ libgomp/libgomp-plugin.c 2016-07-13 16:57:04.435535360 +0200 +@@ -0,0 +1,80 @@ ++/* Copyright (C) 2014-2016 Free Software Foundation, Inc. ++ ++ Contributed by Mentor Embedded. ++ ++ This file is part of the GNU Offloading and Multi Processing Library ++ (libgomp). ++ ++ Libgomp is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY ++ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS ++ FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ . */ ++ ++/* Exported (non-hidden) functions exposing libgomp interface for plugins. */ ++ ++#include ++ ++#include "libgomp.h" ++#include "libgomp-plugin.h" ++ ++void * ++GOMP_PLUGIN_malloc (size_t size) ++{ ++ return gomp_malloc (size); ++} ++ ++void * ++GOMP_PLUGIN_malloc_cleared (size_t size) ++{ ++ return gomp_malloc_cleared (size); ++} ++ ++void * ++GOMP_PLUGIN_realloc (void *ptr, size_t size) ++{ ++ return gomp_realloc (ptr, size); ++} ++ ++void ++GOMP_PLUGIN_debug (int kind, const char *msg, ...) ++{ ++ va_list ap; ++ ++ va_start (ap, msg); ++ gomp_vdebug (kind, msg, ap); ++ va_end (ap); ++} ++ ++void ++GOMP_PLUGIN_error (const char *msg, ...) ++{ ++ va_list ap; ++ ++ va_start (ap, msg); ++ gomp_verror (msg, ap); ++ va_end (ap); ++} ++ ++void ++GOMP_PLUGIN_fatal (const char *msg, ...) ++{ ++ va_list ap; ++ ++ va_start (ap, msg); ++ gomp_vfatal (msg, ap); ++ va_end (ap); ++} +--- libgomp/libgomp-plugin.h.jj 2016-07-13 16:57:04.438535323 +0200 ++++ libgomp/libgomp-plugin.h 2016-07-13 16:57:04.438535323 +0200 +@@ -0,0 +1,80 @@ ++/* Copyright (C) 2014-2016 Free Software Foundation, Inc. ++ ++ Contributed by Mentor Embedded. ++ ++ This file is part of the GNU Offloading and Multi Processing Library ++ (libgomp). ++ ++ Libgomp is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY ++ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS ++ FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ . */ ++ ++/* An interface to various libgomp-internal functions for use by plugins. */ ++ ++#ifndef LIBGOMP_PLUGIN_H ++#define LIBGOMP_PLUGIN_H 1 ++ ++#include ++#include ++ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++/* Capabilities of offloading devices. */ ++#define GOMP_OFFLOAD_CAP_SHARED_MEM (1 << 0) ++#define GOMP_OFFLOAD_CAP_NATIVE_EXEC (1 << 1) ++#define GOMP_OFFLOAD_CAP_OPENMP_400 (1 << 2) ++#define GOMP_OFFLOAD_CAP_OPENACC_200 (1 << 3) ++ ++/* Type of offload target device. Keep in sync with include/gomp-constants.h. */ ++enum offload_target_type ++{ ++ OFFLOAD_TARGET_TYPE_HOST = 2, ++ /* OFFLOAD_TARGET_TYPE_HOST_NONSHM = 3 removed. */ ++ OFFLOAD_TARGET_TYPE_NVIDIA_PTX = 5, ++ OFFLOAD_TARGET_TYPE_INTEL_MIC = 6, ++ OFFLOAD_TARGET_TYPE_HSA = 7 ++}; ++ ++/* Auxiliary struct, used for transferring pairs of addresses from plugin ++ to libgomp. */ ++struct addr_pair ++{ ++ uintptr_t start; ++ uintptr_t end; ++}; ++ ++/* Miscellaneous functions. */ ++extern void *GOMP_PLUGIN_malloc (size_t) __attribute__ ((malloc)); ++extern void *GOMP_PLUGIN_malloc_cleared (size_t) __attribute__ ((malloc)); ++extern void *GOMP_PLUGIN_realloc (void *, size_t); ++void GOMP_PLUGIN_target_task_completion (void *); ++ ++extern void GOMP_PLUGIN_debug (int, const char *, ...) ++ __attribute__ ((format (printf, 2, 3))); ++extern void GOMP_PLUGIN_error (const char *, ...) ++ __attribute__ ((format (printf, 1, 2))); ++extern void GOMP_PLUGIN_fatal (const char *, ...) ++ __attribute__ ((noreturn, format (printf, 1, 2))); ++ ++#ifdef __cplusplus ++} ++#endif ++ ++#endif +--- libgomp/oacc-async.c.jj 2016-07-13 16:57:13.488423109 +0200 ++++ libgomp/oacc-async.c 2016-07-13 16:57:13.488423109 +0200 +@@ -0,0 +1,107 @@ ++/* OpenACC Runtime Library Definitions. ++ ++ Copyright (C) 2013-2016 Free Software Foundation, Inc. ++ ++ Contributed by Mentor Embedded. ++ ++ This file is part of the GNU Offloading and Multi Processing Library ++ (libgomp). ++ ++ Libgomp is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY ++ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS ++ FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ . */ ++ ++#include ++#include "openacc.h" ++#include "libgomp.h" ++#include "oacc-int.h" ++ ++int ++acc_async_test (int async) ++{ ++ if (async < acc_async_sync) ++ gomp_fatal ("invalid async argument: %d", async); ++ ++ struct goacc_thread *thr = goacc_thread (); ++ ++ if (!thr || !thr->dev) ++ gomp_fatal ("no device active"); ++ ++ return thr->dev->openacc.async_test_func (async); ++} ++ ++int ++acc_async_test_all (void) ++{ ++ struct goacc_thread *thr = goacc_thread (); ++ ++ if (!thr || !thr->dev) ++ gomp_fatal ("no device active"); ++ ++ return thr->dev->openacc.async_test_all_func (); ++} ++ ++void ++acc_wait (int async) ++{ ++ if (async < acc_async_sync) ++ gomp_fatal ("invalid async argument: %d", async); ++ ++ struct goacc_thread *thr = goacc_thread (); ++ ++ if (!thr || !thr->dev) ++ gomp_fatal ("no device active"); ++ ++ thr->dev->openacc.async_wait_func (async); ++} ++ ++void ++acc_wait_async (int async1, int async2) ++{ ++ struct goacc_thread *thr = goacc_thread (); ++ ++ if (!thr || !thr->dev) ++ gomp_fatal ("no device active"); ++ ++ thr->dev->openacc.async_wait_async_func (async1, async2); ++} ++ ++void ++acc_wait_all (void) ++{ ++ struct goacc_thread *thr = goacc_thread (); ++ ++ if (!thr || !thr->dev) ++ gomp_fatal ("no device active"); ++ ++ thr->dev->openacc.async_wait_all_func (); ++} ++ ++void ++acc_wait_all_async (int async) ++{ ++ if (async < acc_async_sync) ++ gomp_fatal ("invalid async argument: %d", async); ++ ++ struct goacc_thread *thr = goacc_thread (); ++ ++ if (!thr || !thr->dev) ++ gomp_fatal ("no device active"); ++ ++ thr->dev->openacc.async_wait_all_async_func (async); ++} +--- libgomp/splay-tree.h.jj 2016-07-13 16:57:18.934355582 +0200 ++++ libgomp/splay-tree.h 2016-07-13 16:57:18.934355582 +0200 +@@ -0,0 +1,130 @@ ++/* A splay-tree datatype. ++ Copyright (C) 1998-2016 Free Software Foundation, Inc. ++ Contributed by Mark Mitchell (mark@markmitchell.com). ++ ++ This file is part of the GNU Offloading and Multi Processing Library ++ (libgomp). ++ ++ Libgomp is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY ++ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS ++ FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ . */ ++ ++/* The splay tree code copied from include/splay-tree.h and adjusted, ++ so that all the data lives directly in splay_tree_node_s structure ++ and no extra allocations are needed. ++ ++ Files including this header should before including it add: ++typedef struct splay_tree_node_s *splay_tree_node; ++typedef struct splay_tree_s *splay_tree; ++typedef struct splay_tree_key_s *splay_tree_key; ++ define splay_tree_key_s structure, and define ++ splay_compare inline function. ++ ++ Alternatively, they can define splay_tree_prefix macro before ++ including this header and then all the above types, the ++ splay_compare function and the splay_tree_{lookup,insert_remove} ++ function will be prefixed by that prefix. If splay_tree_prefix ++ macro is defined, this header must be included twice: once where ++ you need the header file definitions, and once where you need the ++ .c implementation routines. In the latter case, you must also ++ define the macro splay_tree_c. See the include of splay-tree.h in ++ priority_queue.[hc] for an example. */ ++ ++/* For an easily readable description of splay-trees, see: ++ ++ Lewis, Harry R. and Denenberg, Larry. Data Structures and Their ++ Algorithms. Harper-Collins, Inc. 1991. ++ ++ The major feature of splay trees is that all basic tree operations ++ are amortized O(log n) time for a tree with n nodes. */ ++ ++#ifdef splay_tree_prefix ++# define splay_tree_name_1(prefix, name) prefix ## _ ## name ++# define splay_tree_name(prefix, name) splay_tree_name_1 (prefix, name) ++# define splay_tree_node_s \ ++ splay_tree_name (splay_tree_prefix, splay_tree_node_s) ++# define splay_tree_s \ ++ splay_tree_name (splay_tree_prefix, splay_tree_s) ++# define splay_tree_key_s \ ++ splay_tree_name (splay_tree_prefix, splay_tree_key_s) ++# define splay_tree_node \ ++ splay_tree_name (splay_tree_prefix, splay_tree_node) ++# define splay_tree \ ++ splay_tree_name (splay_tree_prefix, splay_tree) ++# define splay_tree_key \ ++ splay_tree_name (splay_tree_prefix, splay_tree_key) ++# define splay_compare \ ++ splay_tree_name (splay_tree_prefix, splay_compare) ++# define splay_tree_lookup \ ++ splay_tree_name (splay_tree_prefix, splay_tree_lookup) ++# define splay_tree_insert \ ++ splay_tree_name (splay_tree_prefix, splay_tree_insert) ++# define splay_tree_remove \ ++ splay_tree_name (splay_tree_prefix, splay_tree_remove) ++# define splay_tree_foreach \ ++ splay_tree_name (splay_tree_prefix, splay_tree_foreach) ++# define splay_tree_callback \ ++ splay_tree_name (splay_tree_prefix, splay_tree_callback) ++#endif ++ ++#ifndef splay_tree_c ++/* Header file definitions and prototypes. */ ++ ++/* The nodes in the splay tree. */ ++struct splay_tree_node_s { ++ struct splay_tree_key_s key; ++ /* The left and right children, respectively. */ ++ splay_tree_node left; ++ splay_tree_node right; ++}; ++ ++/* The splay tree. */ ++struct splay_tree_s { ++ splay_tree_node root; ++}; ++ ++typedef void (*splay_tree_callback) (splay_tree_key, void *); ++ ++extern splay_tree_key splay_tree_lookup (splay_tree, splay_tree_key); ++extern void splay_tree_insert (splay_tree, splay_tree_node); ++extern void splay_tree_remove (splay_tree, splay_tree_key); ++extern void splay_tree_foreach (splay_tree, splay_tree_callback, void *); ++#else /* splay_tree_c */ ++# ifdef splay_tree_prefix ++# include "splay-tree.c" ++# undef splay_tree_name_1 ++# undef splay_tree_name ++# undef splay_tree_node_s ++# undef splay_tree_s ++# undef splay_tree_key_s ++# undef splay_tree_node ++# undef splay_tree ++# undef splay_tree_key ++# undef splay_compare ++# undef splay_tree_lookup ++# undef splay_tree_insert ++# undef splay_tree_remove ++# undef splay_tree_foreach ++# undef splay_tree_callback ++# undef splay_tree_c ++# endif ++#endif /* #ifndef splay_tree_c */ ++ ++#ifdef splay_tree_prefix ++# undef splay_tree_prefix ++#endif +--- libgomp/oacc-plugin.c.jj 2016-07-13 16:57:13.481423196 +0200 ++++ libgomp/oacc-plugin.c 2016-07-14 15:40:21.653151873 +0200 +@@ -0,0 +1,44 @@ ++/* Copyright (C) 2014-2016 Free Software Foundation, Inc. ++ ++ Contributed by Mentor Embedded. ++ ++ This file is part of the GNU Offloading and Multi Processing Library ++ (libgomp). ++ ++ Libgomp is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY ++ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS ++ FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ . */ ++ ++/* Initialize and register OpenACC dispatch table from libgomp plugin. */ ++ ++#include "libgomp.h" ++#include "oacc-plugin.h" ++#include "oacc-int.h" ++ ++void ++GOMP_PLUGIN_async_unmap_vars (void *ptr, int async) ++{ ++} ++ ++/* Return the target-specific part of the TLS data for the current thread. */ ++ ++void * ++GOMP_PLUGIN_acc_thread (void) ++{ ++ return NULL; ++} +--- libgomp/oacc-init.c.jj 2016-07-13 16:57:04.423535509 +0200 ++++ libgomp/oacc-init.c 2016-07-14 19:06:41.679575688 +0200 +@@ -0,0 +1,640 @@ ++/* OpenACC Runtime initialization routines ++ ++ Copyright (C) 2013-2016 Free Software Foundation, Inc. ++ ++ Contributed by Mentor Embedded. ++ ++ This file is part of the GNU Offloading and Multi Processing Library ++ (libgomp). ++ ++ Libgomp is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY ++ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS ++ FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ . */ ++ ++#include "libgomp.h" ++#include "oacc-int.h" ++#include "openacc.h" ++#include ++#include ++#include ++#include ++#include ++ ++/* This lock is used to protect access to cached_base_dev, dispatchers and ++ the (abstract) initialisation state of attached offloading devices. */ ++ ++static gomp_mutex_t acc_device_lock; ++ ++/* A cached version of the dispatcher for the global "current" accelerator type, ++ e.g. used as the default when creating new host threads. This is the ++ device-type equivalent of goacc_device_num (which specifies which device to ++ use out of potentially several of the same type). If there are several ++ devices of a given type, this points at the first one. */ ++ ++static struct gomp_device_descr *cached_base_dev = NULL; ++ ++#if defined HAVE_TLS || defined USE_EMUTLS ++__thread struct goacc_thread *goacc_tls_data; ++#else ++pthread_key_t goacc_tls_key; ++#endif ++static pthread_key_t goacc_cleanup_key; ++ ++static struct goacc_thread *goacc_threads; ++static gomp_mutex_t goacc_thread_lock; ++ ++/* An array of dispatchers for device types, indexed by the type. This array ++ only references "base" devices, and other instances of the same type are ++ found by simply indexing from each such device (which are stored linearly, ++ grouped by device in target.c:devices). */ ++static struct gomp_device_descr *dispatchers[_ACC_device_hwm] = { 0 }; ++ ++attribute_hidden void ++goacc_register (struct gomp_device_descr *disp) ++{ ++ /* Only register the 0th device here. */ ++ if (disp->target_id != 0) ++ return; ++ ++ gomp_mutex_lock (&acc_device_lock); ++ ++ assert (acc_device_type (disp->type) != acc_device_none ++ && acc_device_type (disp->type) != acc_device_default ++ && acc_device_type (disp->type) != acc_device_not_host); ++ assert (!dispatchers[disp->type]); ++ dispatchers[disp->type] = disp; ++ ++ gomp_mutex_unlock (&acc_device_lock); ++} ++ ++static const char * ++name_of_acc_device_t (enum acc_device_t type) ++{ ++ switch (type) ++ { ++ case acc_device_none: return "none"; ++ case acc_device_default: return "default"; ++ case acc_device_host: return "host"; ++ case acc_device_not_host: return "not_host"; ++ case acc_device_nvidia: return "nvidia"; ++ default: gomp_fatal ("unknown device type %u", (unsigned) type); ++ } ++} ++ ++/* ACC_DEVICE_LOCK must be held before calling this function. If FAIL_IS_ERROR ++ is true, this function raises an error if there are no devices of type D, ++ otherwise it returns NULL in that case. */ ++ ++static struct gomp_device_descr * ++resolve_device (acc_device_t d, bool fail_is_error) ++{ ++ acc_device_t d_arg = d; ++ ++ switch (d) ++ { ++ case acc_device_default: ++ { ++ if (goacc_device_type) ++ { ++ /* Lookup the named device. */ ++ if (!strcasecmp (goacc_device_type, "host")) ++ { ++ d = acc_device_host; ++ goto found; ++ } ++ ++ if (fail_is_error) ++ { ++ gomp_mutex_unlock (&acc_device_lock); ++ gomp_fatal ("device type %s not supported", goacc_device_type); ++ } ++ else ++ return NULL; ++ } ++ ++ /* No default device specified, so start scanning for any non-host ++ device that is available. */ ++ d = acc_device_not_host; ++ } ++ /* FALLTHROUGH */ ++ ++ case acc_device_not_host: ++ if (d_arg == acc_device_default) ++ { ++ d = acc_device_host; ++ goto found; ++ } ++ if (fail_is_error) ++ { ++ gomp_mutex_unlock (&acc_device_lock); ++ gomp_fatal ("no device found"); ++ } ++ else ++ return NULL; ++ break; ++ ++ case acc_device_host: ++ break; ++ ++ default: ++ if (d > _ACC_device_hwm) ++ { ++ if (fail_is_error) ++ goto unsupported_device; ++ else ++ return NULL; ++ } ++ break; ++ } ++ found: ++ ++ assert (d != acc_device_none ++ && d != acc_device_default ++ && d != acc_device_not_host); ++ ++ if (dispatchers[d] == NULL && fail_is_error) ++ { ++ unsupported_device: ++ gomp_mutex_unlock (&acc_device_lock); ++ gomp_fatal ("device type %s not supported", name_of_acc_device_t (d)); ++ } ++ ++ return dispatchers[d]; ++} ++ ++/* Emit a suitable error if no device of a particular type is available, or ++ the given device number is out-of-range. */ ++static void ++acc_dev_num_out_of_range (acc_device_t d, int ord, int ndevs) ++{ ++ if (ndevs == 0) ++ gomp_fatal ("no devices of type %s available", name_of_acc_device_t (d)); ++ else ++ gomp_fatal ("device %u out of range", ord); ++} ++ ++/* This is called when plugins have been initialized, and serves to call ++ (indirectly) the target's device_init hook. Calling multiple times without ++ an intervening acc_shutdown_1 call is an error. ACC_DEVICE_LOCK must be ++ held before calling this function. */ ++ ++static struct gomp_device_descr * ++acc_init_1 (acc_device_t d) ++{ ++ struct gomp_device_descr *base_dev, *acc_dev; ++ int ndevs; ++ ++ base_dev = resolve_device (d, true); ++ ++ ndevs = base_dev->get_num_devices_func (); ++ ++ if (ndevs <= 0 || goacc_device_num >= ndevs) ++ acc_dev_num_out_of_range (d, goacc_device_num, ndevs); ++ ++ acc_dev = &base_dev[goacc_device_num]; ++ ++ gomp_mutex_lock (&acc_dev->lock); ++ if (acc_dev->state == GOMP_DEVICE_INITIALIZED) ++ { ++ gomp_mutex_unlock (&acc_dev->lock); ++ gomp_fatal ("device already active"); ++ } ++ ++ gomp_init_device (acc_dev); ++ gomp_mutex_unlock (&acc_dev->lock); ++ ++ return base_dev; ++} ++ ++/* ACC_DEVICE_LOCK must be held before calling this function. */ ++ ++static void ++acc_shutdown_1 (acc_device_t d) ++{ ++ struct gomp_device_descr *base_dev; ++ struct goacc_thread *walk; ++ int ndevs, i; ++ bool devices_active = false; ++ ++ /* Get the base device for this device type. */ ++ base_dev = resolve_device (d, true); ++ ++ ndevs = base_dev->get_num_devices_func (); ++ ++ gomp_mutex_lock (&goacc_thread_lock); ++ ++ /* Free target-specific TLS data and close all devices. */ ++ for (walk = goacc_threads; walk != NULL; walk = walk->next) ++ { ++ if (walk->target_tls) ++ base_dev->openacc.destroy_thread_data_func (walk->target_tls); ++ ++ walk->target_tls = NULL; ++ ++ /* Similarly, if this happens then user code has done something weird. */ ++ if (walk->saved_bound_dev) ++ { ++ gomp_mutex_unlock (&goacc_thread_lock); ++ gomp_fatal ("shutdown during host fallback"); ++ } ++ ++ if (walk->dev) ++ { ++ gomp_mutex_lock (&walk->dev->lock); ++ gomp_free_memmap (&walk->dev->mem_map); ++ gomp_mutex_unlock (&walk->dev->lock); ++ ++ walk->dev = NULL; ++ walk->base_dev = NULL; ++ } ++ } ++ ++ gomp_mutex_unlock (&goacc_thread_lock); ++ ++ /* Close all the devices of this type that have been opened. */ ++ bool ret = true; ++ for (i = 0; i < ndevs; i++) ++ { ++ struct gomp_device_descr *acc_dev = &base_dev[i]; ++ gomp_mutex_lock (&acc_dev->lock); ++ if (acc_dev->state == GOMP_DEVICE_INITIALIZED) ++ { ++ devices_active = true; ++ ret &= acc_dev->fini_device_func (acc_dev->target_id); ++ acc_dev->state = GOMP_DEVICE_UNINITIALIZED; ++ } ++ gomp_mutex_unlock (&acc_dev->lock); ++ } ++ ++ if (!ret) ++ gomp_fatal ("device finalization failed"); ++ ++ if (!devices_active) ++ gomp_fatal ("no device initialized"); ++} ++ ++static struct goacc_thread * ++goacc_new_thread (void) ++{ ++ struct goacc_thread *thr = gomp_malloc (sizeof (struct gomp_thread)); ++ ++#if defined HAVE_TLS || defined USE_EMUTLS ++ goacc_tls_data = thr; ++#else ++ pthread_setspecific (goacc_tls_key, thr); ++#endif ++ ++ pthread_setspecific (goacc_cleanup_key, thr); ++ ++ gomp_mutex_lock (&goacc_thread_lock); ++ thr->next = goacc_threads; ++ goacc_threads = thr; ++ gomp_mutex_unlock (&goacc_thread_lock); ++ ++ return thr; ++} ++ ++static void ++goacc_destroy_thread (void *data) ++{ ++ struct goacc_thread *thr = data, *walk, *prev; ++ ++ gomp_mutex_lock (&goacc_thread_lock); ++ ++ if (thr) ++ { ++ struct gomp_device_descr *acc_dev = thr->dev; ++ ++ if (acc_dev && thr->target_tls) ++ { ++ acc_dev->openacc.destroy_thread_data_func (thr->target_tls); ++ thr->target_tls = NULL; ++ } ++ ++ assert (!thr->mapped_data); ++ ++ /* Remove from thread list. */ ++ for (prev = NULL, walk = goacc_threads; walk; ++ prev = walk, walk = walk->next) ++ if (walk == thr) ++ { ++ if (prev == NULL) ++ goacc_threads = walk->next; ++ else ++ prev->next = walk->next; ++ ++ free (thr); ++ ++ break; ++ } ++ ++ assert (walk); ++ } ++ ++ gomp_mutex_unlock (&goacc_thread_lock); ++} ++ ++/* Use the ORD'th device instance for the current host thread (or -1 for the ++ current global default). The device (and the runtime) must be initialised ++ before calling this function. */ ++ ++void ++goacc_attach_host_thread_to_device (int ord) ++{ ++ struct goacc_thread *thr = goacc_thread (); ++ struct gomp_device_descr *acc_dev = NULL, *base_dev = NULL; ++ int num_devices; ++ ++ if (thr && thr->dev && (thr->dev->target_id == ord || ord < 0)) ++ return; ++ ++ if (ord < 0) ++ ord = goacc_device_num; ++ ++ /* Decide which type of device to use. If the current thread has a device ++ type already (e.g. set by acc_set_device_type), use that, else use the ++ global default. */ ++ if (thr && thr->base_dev) ++ base_dev = thr->base_dev; ++ else ++ { ++ assert (cached_base_dev); ++ base_dev = cached_base_dev; ++ } ++ ++ num_devices = base_dev->get_num_devices_func (); ++ if (num_devices <= 0 || ord >= num_devices) ++ acc_dev_num_out_of_range (acc_device_type (base_dev->type), ord, ++ num_devices); ++ ++ if (!thr) ++ thr = goacc_new_thread (); ++ ++ thr->base_dev = base_dev; ++ thr->dev = acc_dev = &base_dev[ord]; ++ thr->saved_bound_dev = NULL; ++ ++ thr->target_tls ++ = acc_dev->openacc.create_thread_data_func (ord); ++ ++ acc_dev->openacc.async_set_async_func (acc_async_sync); ++} ++ ++/* OpenACC 2.0a (3.2.12, 3.2.13) doesn't specify whether the serialization of ++ init/shutdown is per-process or per-thread. We choose per-process. */ ++ ++void ++acc_init (acc_device_t d) ++{ ++ gomp_mutex_lock (&acc_device_lock); ++ ++ cached_base_dev = acc_init_1 (d); ++ ++ gomp_mutex_unlock (&acc_device_lock); ++ ++ goacc_attach_host_thread_to_device (-1); ++} ++ ++ialias (acc_init) ++ ++void ++acc_shutdown (acc_device_t d) ++{ ++ gomp_mutex_lock (&acc_device_lock); ++ ++ acc_shutdown_1 (d); ++ ++ gomp_mutex_unlock (&acc_device_lock); ++} ++ ++ialias (acc_shutdown) ++ ++int ++acc_get_num_devices (acc_device_t d) ++{ ++ int n = 0; ++ struct gomp_device_descr *acc_dev; ++ ++ if (d == acc_device_none) ++ return 0; ++ ++ gomp_mutex_lock (&acc_device_lock); ++ acc_dev = resolve_device (d, false); ++ gomp_mutex_unlock (&acc_device_lock); ++ ++ if (!acc_dev) ++ return 0; ++ ++ n = acc_dev->get_num_devices_func (); ++ if (n < 0) ++ n = 0; ++ ++ return n; ++} ++ ++ialias (acc_get_num_devices) ++ ++/* Set the device type for the current thread only (using the current global ++ default device number), initialising that device if necessary. Also set the ++ default device type for new threads to D. */ ++ ++void ++acc_set_device_type (acc_device_t d) ++{ ++ struct gomp_device_descr *base_dev, *acc_dev; ++ struct goacc_thread *thr = goacc_thread (); ++ ++ gomp_mutex_lock (&acc_device_lock); ++ ++ cached_base_dev = base_dev = resolve_device (d, true); ++ acc_dev = &base_dev[goacc_device_num]; ++ ++ gomp_mutex_lock (&acc_dev->lock); ++ if (acc_dev->state == GOMP_DEVICE_UNINITIALIZED) ++ gomp_init_device (acc_dev); ++ gomp_mutex_unlock (&acc_dev->lock); ++ ++ gomp_mutex_unlock (&acc_device_lock); ++ ++ /* We're changing device type: invalidate the current thread's dev and ++ base_dev pointers. */ ++ if (thr && thr->base_dev != base_dev) ++ { ++ thr->base_dev = thr->dev = NULL; ++ } ++ ++ goacc_attach_host_thread_to_device (-1); ++} ++ ++ialias (acc_set_device_type) ++ ++acc_device_t ++acc_get_device_type (void) ++{ ++ acc_device_t res = acc_device_none; ++ struct gomp_device_descr *dev; ++ struct goacc_thread *thr = goacc_thread (); ++ ++ if (thr && thr->base_dev) ++ res = acc_device_type (thr->base_dev->type); ++ else ++ { ++ gomp_mutex_lock (&acc_device_lock); ++ dev = resolve_device (acc_device_default, true); ++ gomp_mutex_unlock (&acc_device_lock); ++ res = acc_device_type (dev->type); ++ } ++ ++ assert (res != acc_device_default ++ && res != acc_device_not_host); ++ ++ return res; ++} ++ ++ialias (acc_get_device_type) ++ ++int ++acc_get_device_num (acc_device_t d) ++{ ++ const struct gomp_device_descr *dev; ++ struct goacc_thread *thr = goacc_thread (); ++ ++ if (d >= _ACC_device_hwm) ++ gomp_fatal ("unknown device type %u", (unsigned) d); ++ ++ gomp_mutex_lock (&acc_device_lock); ++ dev = resolve_device (d, true); ++ gomp_mutex_unlock (&acc_device_lock); ++ ++ if (thr && thr->base_dev == dev && thr->dev) ++ return thr->dev->target_id; ++ ++ return goacc_device_num; ++} ++ ++ialias (acc_get_device_num) ++ ++void ++acc_set_device_num (int ord, acc_device_t d) ++{ ++ struct gomp_device_descr *base_dev, *acc_dev; ++ int num_devices; ++ ++ if (ord < 0) ++ ord = goacc_device_num; ++ ++ if ((int) d == 0) ++ /* Set whatever device is being used by the current host thread to use ++ device instance ORD. It's unclear if this is supposed to affect other ++ host threads too (OpenACC 2.0 (3.2.4) acc_set_device_num). */ ++ goacc_attach_host_thread_to_device (ord); ++ else ++ { ++ gomp_mutex_lock (&acc_device_lock); ++ ++ cached_base_dev = base_dev = resolve_device (d, true); ++ ++ num_devices = base_dev->get_num_devices_func (); ++ ++ if (num_devices <= 0 || ord >= num_devices) ++ acc_dev_num_out_of_range (d, ord, num_devices); ++ ++ acc_dev = &base_dev[ord]; ++ ++ gomp_mutex_lock (&acc_dev->lock); ++ if (acc_dev->state == GOMP_DEVICE_UNINITIALIZED) ++ gomp_init_device (acc_dev); ++ gomp_mutex_unlock (&acc_dev->lock); ++ ++ gomp_mutex_unlock (&acc_device_lock); ++ ++ goacc_attach_host_thread_to_device (ord); ++ } ++ ++ goacc_device_num = ord; ++} ++ ++ialias (acc_set_device_num) ++ ++int ++acc_on_device (acc_device_t dev) ++{ ++ return dev == acc_device_host || dev == acc_device_none; ++} ++ ++ialias (acc_on_device) ++ ++attribute_hidden void ++goacc_runtime_initialize (void) ++{ ++ gomp_mutex_init (&acc_device_lock); ++ ++#if !(defined HAVE_TLS || defined USE_EMUTLS) ++ pthread_key_create (&goacc_tls_key, NULL); ++#endif ++ ++ pthread_key_create (&goacc_cleanup_key, goacc_destroy_thread); ++ ++ cached_base_dev = NULL; ++ ++ goacc_threads = NULL; ++ gomp_mutex_init (&goacc_thread_lock); ++ ++ /* Initialize and register the 'host' device type. */ ++ goacc_host_init (); ++} ++ ++/* Compiler helper functions */ ++ ++attribute_hidden void ++goacc_save_and_set_bind (acc_device_t d) ++{ ++ struct goacc_thread *thr = goacc_thread (); ++ ++ assert (!thr->saved_bound_dev); ++ ++ thr->saved_bound_dev = thr->dev; ++ thr->dev = dispatchers[d]; ++} ++ ++attribute_hidden void ++goacc_restore_bind (void) ++{ ++ struct goacc_thread *thr = goacc_thread (); ++ ++ thr->dev = thr->saved_bound_dev; ++ thr->saved_bound_dev = NULL; ++} ++ ++/* This is called from any OpenACC support function that may need to implicitly ++ initialize the libgomp runtime, either globally or from a new host thread. ++ On exit "goacc_thread" will return a valid & populated thread block. */ ++ ++attribute_hidden void ++goacc_lazy_initialize (void) ++{ ++ struct goacc_thread *thr = goacc_thread (); ++ ++ if (thr && thr->dev) ++ return; ++ ++ if (!cached_base_dev) ++ acc_init (acc_device_default); ++ else ++ goacc_attach_host_thread_to_device (-1); ++} +--- libgomp/oacc-int.h.jj 2016-07-13 16:57:04.400535794 +0200 ++++ libgomp/oacc-int.h 2016-07-13 16:57:04.400535794 +0200 +@@ -0,0 +1,106 @@ ++/* OpenACC Runtime - internal declarations ++ ++ Copyright (C) 2013-2016 Free Software Foundation, Inc. ++ ++ Contributed by Mentor Embedded. ++ ++ This file is part of the GNU Offloading and Multi Processing Library ++ (libgomp). ++ ++ Libgomp is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY ++ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS ++ FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ . */ ++ ++/* This file contains data types and function declarations that are not ++ part of the official OpenACC user interface. There are declarations ++ in here that are part of the GNU OpenACC ABI, in that the compiler is ++ required to know about them and use them. ++ ++ The convention is that the all caps prefix "GOACC" is used group items ++ that are part of the external ABI, and the lower case prefix "goacc" ++ is used group items that are completely private to the library. */ ++ ++#ifndef OACC_INT_H ++#define OACC_INT_H 1 ++ ++#include "openacc.h" ++#include "config.h" ++#include ++#include ++#include ++ ++#ifdef HAVE_ATTRIBUTE_VISIBILITY ++# pragma GCC visibility push(hidden) ++#endif ++ ++static inline enum acc_device_t ++acc_device_type (enum offload_target_type type) ++{ ++ return (enum acc_device_t) type; ++} ++ ++struct goacc_thread ++{ ++ /* The base device for the current thread. */ ++ struct gomp_device_descr *base_dev; ++ ++ /* The device for the current thread. */ ++ struct gomp_device_descr *dev; ++ ++ struct gomp_device_descr *saved_bound_dev; ++ ++ /* This is a linked list of data mapped by the "acc data" pragma, following ++ strictly push/pop semantics according to lexical scope. */ ++ struct target_mem_desc *mapped_data; ++ ++ /* These structures form a list: this is the next thread in that list. */ ++ struct goacc_thread *next; ++ ++ /* Target-specific data (used by plugin). */ ++ void *target_tls; ++}; ++ ++#if defined HAVE_TLS || defined USE_EMUTLS ++extern __thread struct goacc_thread *goacc_tls_data; ++static inline struct goacc_thread * ++goacc_thread (void) ++{ ++ return goacc_tls_data; ++} ++#else ++extern pthread_key_t goacc_tls_key; ++static inline struct goacc_thread * ++goacc_thread (void) ++{ ++ return pthread_getspecific (goacc_tls_key); ++} ++#endif ++ ++void goacc_register (struct gomp_device_descr *) __GOACC_NOTHROW; ++void goacc_attach_host_thread_to_device (int); ++void goacc_runtime_initialize (void); ++void goacc_save_and_set_bind (acc_device_t); ++void goacc_restore_bind (void); ++void goacc_lazy_initialize (void); ++void goacc_host_init (void); ++ ++#ifdef HAVE_ATTRIBUTE_VISIBILITY ++# pragma GCC visibility pop ++#endif ++ ++#endif +--- libgomp/oacc-host.c.jj 2016-07-13 16:57:13.489423096 +0200 ++++ libgomp/oacc-host.c 2016-07-13 16:57:13.489423096 +0200 +@@ -0,0 +1,266 @@ ++/* OpenACC Runtime Library: acc_device_host. ++ ++ Copyright (C) 2013-2016 Free Software Foundation, Inc. ++ ++ Contributed by Mentor Embedded. ++ ++ This file is part of the GNU Offloading and Multi Processing Library ++ (libgomp). ++ ++ Libgomp is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY ++ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS ++ FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ . */ ++ ++#include "libgomp.h" ++#include "oacc-int.h" ++#include "gomp-constants.h" ++ ++#include ++#include ++#include ++ ++static struct gomp_device_descr host_dispatch; ++ ++static const char * ++host_get_name (void) ++{ ++ return host_dispatch.name; ++} ++ ++static unsigned int ++host_get_caps (void) ++{ ++ return host_dispatch.capabilities; ++} ++ ++static int ++host_get_type (void) ++{ ++ return host_dispatch.type; ++} ++ ++static int ++host_get_num_devices (void) ++{ ++ return 1; ++} ++ ++static bool ++host_init_device (int n __attribute__ ((unused))) ++{ ++ return true; ++} ++ ++static bool ++host_fini_device (int n __attribute__ ((unused))) ++{ ++ return true; ++} ++ ++static unsigned ++host_version (void) ++{ ++ return GOMP_VERSION; ++} ++ ++static int ++host_load_image (int n __attribute__ ((unused)), ++ unsigned v __attribute__ ((unused)), ++ const void *t __attribute__ ((unused)), ++ struct addr_pair **r __attribute__ ((unused))) ++{ ++ return 0; ++} ++ ++static bool ++host_unload_image (int n __attribute__ ((unused)), ++ unsigned v __attribute__ ((unused)), ++ const void *t __attribute__ ((unused))) ++{ ++ return true; ++} ++ ++static void * ++host_alloc (int n __attribute__ ((unused)), size_t s) ++{ ++ return gomp_malloc (s); ++} ++ ++static bool ++host_free (int n __attribute__ ((unused)), void *p) ++{ ++ free (p); ++ return true; ++} ++ ++static bool ++host_dev2host (int n __attribute__ ((unused)), ++ void *h __attribute__ ((unused)), ++ const void *d __attribute__ ((unused)), ++ size_t s __attribute__ ((unused))) ++{ ++ return true; ++} ++ ++static bool ++host_host2dev (int n __attribute__ ((unused)), ++ void *d __attribute__ ((unused)), ++ const void *h __attribute__ ((unused)), ++ size_t s __attribute__ ((unused))) ++{ ++ return true; ++} ++ ++static void ++host_run (int n __attribute__ ((unused)), void *fn_ptr, void *vars, ++ void **args __attribute__((unused))) ++{ ++ void (*fn)(void *) = (void (*)(void *)) fn_ptr; ++ ++ fn (vars); ++} ++ ++static void ++host_openacc_exec (void (*fn) (void *), ++ size_t mapnum __attribute__ ((unused)), ++ void **hostaddrs, ++ void **devaddrs __attribute__ ((unused)), ++ int async __attribute__ ((unused)), ++ unsigned *dims __attribute ((unused)), ++ void *targ_mem_desc __attribute__ ((unused))) ++{ ++ fn (hostaddrs); ++} ++ ++static void ++host_openacc_register_async_cleanup (void *targ_mem_desc __attribute__ ((unused)), ++ int async __attribute__ ((unused))) ++{ ++} ++ ++static int ++host_openacc_async_test (int async __attribute__ ((unused))) ++{ ++ return 1; ++} ++ ++static int ++host_openacc_async_test_all (void) ++{ ++ return 1; ++} ++ ++static void ++host_openacc_async_wait (int async __attribute__ ((unused))) ++{ ++} ++ ++static void ++host_openacc_async_wait_async (int async1 __attribute__ ((unused)), ++ int async2 __attribute__ ((unused))) ++{ ++} ++ ++static void ++host_openacc_async_wait_all (void) ++{ ++} ++ ++static void ++host_openacc_async_wait_all_async (int async __attribute__ ((unused))) ++{ ++} ++ ++static void ++host_openacc_async_set_async (int async __attribute__ ((unused))) ++{ ++} ++ ++static void * ++host_openacc_create_thread_data (int ord __attribute__ ((unused))) ++{ ++ return NULL; ++} ++ ++static void ++host_openacc_destroy_thread_data (void *tls_data __attribute__ ((unused))) ++{ ++} ++ ++static struct gomp_device_descr host_dispatch = ++ { ++ .name = "host", ++ .capabilities = (GOMP_OFFLOAD_CAP_SHARED_MEM ++ | GOMP_OFFLOAD_CAP_NATIVE_EXEC ++ | GOMP_OFFLOAD_CAP_OPENACC_200), ++ .target_id = 0, ++ .type = OFFLOAD_TARGET_TYPE_HOST, ++ ++ .get_name_func = host_get_name, ++ .get_caps_func = host_get_caps, ++ .get_type_func = host_get_type, ++ .get_num_devices_func = host_get_num_devices, ++ .init_device_func = host_init_device, ++ .fini_device_func = host_fini_device, ++ .version_func = host_version, ++ .load_image_func = host_load_image, ++ .unload_image_func = host_unload_image, ++ .alloc_func = host_alloc, ++ .free_func = host_free, ++ .dev2host_func = host_dev2host, ++ .host2dev_func = host_host2dev, ++ .run_func = host_run, ++ ++ .mem_map = { NULL }, ++ /* .lock initilized in goacc_host_init. */ ++ .state = GOMP_DEVICE_UNINITIALIZED, ++ ++ .openacc = { ++ .data_environ = NULL, ++ ++ .exec_func = host_openacc_exec, ++ ++ .register_async_cleanup_func = host_openacc_register_async_cleanup, ++ ++ .async_test_func = host_openacc_async_test, ++ .async_test_all_func = host_openacc_async_test_all, ++ .async_wait_func = host_openacc_async_wait, ++ .async_wait_async_func = host_openacc_async_wait_async, ++ .async_wait_all_func = host_openacc_async_wait_all, ++ .async_wait_all_async_func = host_openacc_async_wait_all_async, ++ .async_set_async_func = host_openacc_async_set_async, ++ ++ .create_thread_data_func = host_openacc_create_thread_data, ++ .destroy_thread_data_func = host_openacc_destroy_thread_data, ++ ++ .cuda = { ++ .get_current_device_func = NULL, ++ .get_current_context_func = NULL, ++ .get_stream_func = NULL, ++ .set_stream_func = NULL, ++ } ++ } ++ }; ++ ++/* Initialize and register this device type. */ ++void ++goacc_host_init (void) ++{ ++ gomp_mutex_init (&host_dispatch.lock); ++ goacc_register (&host_dispatch); ++} +--- libgomp/oacc-parallel.c.jj 2016-07-13 16:57:04.399535807 +0200 ++++ libgomp/oacc-parallel.c 2016-07-14 18:53:06.694996381 +0200 +@@ -0,0 +1,241 @@ ++/* Copyright (C) 2013-2016 Free Software Foundation, Inc. ++ ++ Contributed by Mentor Embedded. ++ ++ This file is part of the GNU Offloading and Multi Processing Library ++ (libgomp). ++ ++ Libgomp is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY ++ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS ++ FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ . */ ++ ++/* This file handles OpenACC constructs. */ ++ ++#include "openacc.h" ++#include "libgomp.h" ++#include "libgomp_g.h" ++#include "gomp-constants.h" ++#include "oacc-int.h" ++#ifdef HAVE_INTTYPES_H ++# include /* For PRIu64. */ ++#endif ++#include ++#include ++#include ++ ++static void goacc_wait (int async, int num_waits, va_list *ap); ++ ++ ++/* Launch a possibly offloaded function on DEVICE. FN is the host fn ++ address. MAPNUM, HOSTADDRS, SIZES & KINDS describe the memory ++ blocks to be copied to/from the device. Varadic arguments are ++ keyed optional parameters terminated with a zero. */ ++ ++void ++GOACC_parallel_keyed (int device, void (*fn) (void *), ++ size_t mapnum, void **hostaddrs, size_t *sizes, ++ unsigned short *kinds, ...) ++{ ++ bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK; ++ struct goacc_thread *thr; ++ struct gomp_device_descr *acc_dev; ++ ++#ifdef HAVE_INTTYPES_H ++ gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n", ++ __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds); ++#else ++ gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n", ++ __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds); ++#endif ++ goacc_lazy_initialize (); ++ ++ thr = goacc_thread (); ++ acc_dev = thr->dev; ++ ++ /* Host fallback if "if" clause is false or if the current device is set to ++ the host. */ ++ if (host_fallback) ++ { ++ goacc_save_and_set_bind (acc_device_host); ++ fn (hostaddrs); ++ goacc_restore_bind (); ++ return; ++ } ++ else if (acc_device_type (acc_dev->type) == acc_device_host) ++ { ++ fn (hostaddrs); ++ return; ++ } ++ ++ /* acc_device_host is the only supported device type. */ ++} ++ ++/* Legacy entry point, only provide host execution. */ ++ ++void ++GOACC_parallel (int device, void (*fn) (void *), ++ size_t mapnum, void **hostaddrs, size_t *sizes, ++ unsigned short *kinds, ++ int num_gangs, int num_workers, int vector_length, ++ int async, int num_waits, ...) ++{ ++ goacc_save_and_set_bind (acc_device_host); ++ fn (hostaddrs); ++ goacc_restore_bind (); ++} ++ ++void ++GOACC_data_start (int device, size_t mapnum, ++ void **hostaddrs, size_t *sizes, unsigned short *kinds) ++{ ++ goacc_lazy_initialize (); ++} ++ ++void ++GOACC_data_end (void) ++{ ++ gomp_debug (0, " %s: restore mappings\n", __FUNCTION__); ++ gomp_debug (0, " %s: mappings restored\n", __FUNCTION__); ++} ++ ++void ++GOACC_enter_exit_data (int device, size_t mapnum, ++ void **hostaddrs, size_t *sizes, unsigned short *kinds, ++ int async, int num_waits, ...) ++{ ++ goacc_lazy_initialize (); ++} ++ ++static void ++goacc_wait (int async, int num_waits, va_list *ap) ++{ ++ struct goacc_thread *thr = goacc_thread (); ++ struct gomp_device_descr *acc_dev = thr->dev; ++ ++ while (num_waits--) ++ { ++ int qid = va_arg (*ap, int); ++ ++ if (acc_async_test (qid)) ++ continue; ++ ++ if (async == acc_async_sync) ++ acc_wait (qid); ++ else if (qid == async) ++ ;/* If we're waiting on the same asynchronous queue as we're ++ launching on, the queue itself will order work as ++ required, so there's no need to wait explicitly. */ ++ else ++ acc_dev->openacc.async_wait_async_func (qid, async); ++ } ++} ++ ++void ++GOACC_update (int device, size_t mapnum, ++ void **hostaddrs, size_t *sizes, unsigned short *kinds, ++ int async, int num_waits, ...) ++{ ++ goacc_lazy_initialize (); ++} ++ ++void ++GOACC_wait (int async, int num_waits, ...) ++{ ++ if (num_waits) ++ { ++ va_list ap; ++ ++ va_start (ap, num_waits); ++ goacc_wait (async, num_waits, &ap); ++ va_end (ap); ++ } ++ else if (async == acc_async_sync) ++ acc_wait_all (); ++ else if (async == acc_async_noval) ++ goacc_thread ()->dev->openacc.async_wait_all_async_func (acc_async_noval); ++} ++ ++int ++GOACC_get_num_threads (void) ++{ ++ return 1; ++} ++ ++int ++GOACC_get_thread_num (void) ++{ ++ return 0; ++} ++ ++void ++GOACC_declare (int device, size_t mapnum, ++ void **hostaddrs, size_t *sizes, unsigned short *kinds) ++{ ++ int i; ++ ++ for (i = 0; i < mapnum; i++) ++ { ++ unsigned char kind = kinds[i] & 0xff; ++ ++ if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET) ++ continue; ++ ++ switch (kind) ++ { ++ case GOMP_MAP_FORCE_ALLOC: ++ case GOMP_MAP_FORCE_FROM: ++ case GOMP_MAP_FORCE_TO: ++ case GOMP_MAP_POINTER: ++ case GOMP_MAP_DELETE: ++ GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i], ++ &kinds[i], 0, 0); ++ break; ++ ++ case GOMP_MAP_FORCE_DEVICEPTR: ++ break; ++ ++ case GOMP_MAP_ALLOC: ++ if (!acc_is_present (hostaddrs[i], sizes[i])) ++ GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i], ++ &kinds[i], 0, 0); ++ break; ++ ++ case GOMP_MAP_TO: ++ GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i], ++ &kinds[i], 0, 0); ++ ++ break; ++ ++ case GOMP_MAP_FROM: ++ kinds[i] = GOMP_MAP_FORCE_FROM; ++ GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i], ++ &kinds[i], 0, 0); ++ break; ++ ++ case GOMP_MAP_FORCE_PRESENT: ++ if (!acc_is_present (hostaddrs[i], sizes[i])) ++ gomp_fatal ("[%p,%ld] is not mapped", hostaddrs[i], ++ (unsigned long) sizes[i]); ++ break; ++ ++ default: ++ assert (0); ++ break; ++ } ++ } ++} +--- libgomp/oacc-cuda.c.jj 2016-07-13 16:57:04.432535397 +0200 ++++ libgomp/oacc-cuda.c 2016-07-13 16:57:04.432535397 +0200 +@@ -0,0 +1,86 @@ ++/* OpenACC Runtime Library: CUDA support glue. ++ ++ Copyright (C) 2014-2016 Free Software Foundation, Inc. ++ ++ Contributed by Mentor Embedded. ++ ++ This file is part of the GNU Offloading and Multi Processing Library ++ (libgomp). ++ ++ Libgomp is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY ++ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS ++ FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ . */ ++ ++#include "openacc.h" ++#include "config.h" ++#include "libgomp.h" ++#include "oacc-int.h" ++ ++void * ++acc_get_current_cuda_device (void) ++{ ++ struct goacc_thread *thr = goacc_thread (); ++ ++ if (thr && thr->dev && thr->dev->openacc.cuda.get_current_device_func) ++ return thr->dev->openacc.cuda.get_current_device_func (); ++ ++ return NULL; ++} ++ ++void * ++acc_get_current_cuda_context (void) ++{ ++ struct goacc_thread *thr = goacc_thread (); ++ ++ if (thr && thr->dev && thr->dev->openacc.cuda.get_current_context_func) ++ return thr->dev->openacc.cuda.get_current_context_func (); ++ ++ return NULL; ++} ++ ++void * ++acc_get_cuda_stream (int async) ++{ ++ struct goacc_thread *thr = goacc_thread (); ++ ++ if (async < 0) ++ return NULL; ++ ++ if (thr && thr->dev && thr->dev->openacc.cuda.get_stream_func) ++ return thr->dev->openacc.cuda.get_stream_func (async); ++ ++ return NULL; ++} ++ ++int ++acc_set_cuda_stream (int async, void *stream) ++{ ++ struct goacc_thread *thr; ++ ++ if (async < 0 || stream == NULL) ++ return 0; ++ ++ goacc_lazy_initialize (); ++ ++ thr = goacc_thread (); ++ ++ if (thr && thr->dev && thr->dev->openacc.cuda.set_stream_func) ++ return thr->dev->openacc.cuda.set_stream_func (async, stream); ++ ++ return -1; ++} +--- libgomp/openacc_lib.h.jj 2016-07-13 16:57:13.486423134 +0200 ++++ libgomp/openacc_lib.h 2016-07-13 16:57:13.486423134 +0200 +@@ -0,0 +1,382 @@ ++! OpenACC Runtime Library Definitions. -*- mode: fortran -*- ++ ++! Copyright (C) 2014-2016 Free Software Foundation, Inc. ++ ++! Contributed by Tobias Burnus ++! and Mentor Embedded. ++ ++! This file is part of the GNU Offloading and Multi Processing Library ++! (libgomp). ++ ++! Libgomp is free software; you can redistribute it and/or modify it ++! under the terms of the GNU General Public License as published by ++! the Free Software Foundation; either version 3, or (at your option) ++! any later version. ++ ++! Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY ++! WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS ++! FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++! more details. ++ ++! Under Section 7 of GPL version 3, you are granted additional ++! permissions described in the GCC Runtime Library Exception, version ++! 3.1, as published by the Free Software Foundation. ++ ++! You should have received a copy of the GNU General Public License and ++! a copy of the GCC Runtime Library Exception along with this program; ++! see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++! . ++ ++! NOTE: Due to the use of dimension (..), the code only works when compiled ++! with -std=f2008ts/gnu/legacy but not with other standard settings. ++! Alternatively, the user can use the module version, which permits ++! compilation with -std=f95. ++ ++ integer, parameter :: acc_device_kind = 4 ++ ++! Keep in sync with include/gomp-constants.h. ++ integer (acc_device_kind), parameter :: acc_device_none = 0 ++ integer (acc_device_kind), parameter :: acc_device_default = 1 ++ integer (acc_device_kind), parameter :: acc_device_host = 2 ++! integer (acc_device_kind), parameter :: acc_device_host_nonshm = 3 ++! removed. ++ integer (acc_device_kind), parameter :: acc_device_not_host = 4 ++ integer (acc_device_kind), parameter :: acc_device_nvidia = 5 ++ ++ integer, parameter :: acc_handle_kind = 4 ++ ++! Keep in sync with include/gomp-constants.h. ++ integer (acc_handle_kind), parameter :: acc_async_noval = -1 ++ integer (acc_handle_kind), parameter :: acc_async_sync = -2 ++ ++ integer, parameter :: openacc_version = 201306 ++ ++ interface acc_get_num_devices ++ function acc_get_num_devices_h (d) ++ import acc_device_kind ++ integer acc_get_num_devices_h ++ integer (acc_device_kind) d ++ end function ++ end interface ++ ++ interface acc_set_device_type ++ subroutine acc_set_device_type_h (d) ++ import acc_device_kind ++ integer (acc_device_kind) d ++ end subroutine ++ end interface ++ ++ interface acc_get_device_type ++ function acc_get_device_type_h () ++ import acc_device_kind ++ integer (acc_device_kind) acc_get_device_type_h ++ end function ++ end interface ++ ++ interface acc_set_device_num ++ subroutine acc_set_device_num_h (n, d) ++ import acc_device_kind ++ integer n ++ integer (acc_device_kind) d ++ end subroutine ++ end interface ++ ++ interface acc_get_device_num ++ function acc_get_device_num_h (d) ++ import acc_device_kind ++ integer acc_get_device_num_h ++ integer (acc_device_kind) d ++ end function ++ end interface ++ ++ interface acc_async_test ++ function acc_async_test_h (a) ++ logical acc_async_test_h ++ integer a ++ end function ++ end interface ++ ++ interface acc_async_test_all ++ function acc_async_test_all_h () ++ logical acc_async_test_all_h ++ end function ++ end interface ++ ++ interface acc_wait ++ subroutine acc_wait_h (a) ++ integer a ++ end subroutine ++ end interface ++ ++ interface acc_wait_async ++ subroutine acc_wait_async_h (a1, a2) ++ integer a1, a2 ++ end subroutine ++ end interface ++ ++ interface acc_wait_all ++ subroutine acc_wait_all_h () ++ end subroutine ++ end interface ++ ++ interface acc_wait_all_async ++ subroutine acc_wait_all_async_h (a) ++ integer a ++ end subroutine ++ end interface ++ ++ interface acc_init ++ subroutine acc_init_h (devicetype) ++ import acc_device_kind ++ integer (acc_device_kind) devicetype ++ end subroutine ++ end interface ++ ++ interface acc_shutdown ++ subroutine acc_shutdown_h (devicetype) ++ import acc_device_kind ++ integer (acc_device_kind) devicetype ++ end subroutine ++ end interface ++ ++ interface acc_on_device ++ function acc_on_device_h (devicetype) ++ import acc_device_kind ++ logical acc_on_device_h ++ integer (acc_device_kind) devicetype ++ end function ++ end interface ++ ++ ! acc_malloc: Only available in C/C++ ++ ! acc_free: Only available in C/C++ ++ ++ interface acc_copyin ++ subroutine acc_copyin_32_h (a, len) ++ use iso_c_binding, only: c_int32_t ++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ end subroutine ++ ++ subroutine acc_copyin_64_h (a, len) ++ use iso_c_binding, only: c_int64_t ++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ end subroutine ++ ++ subroutine acc_copyin_array_h (a) ++ type (*), dimension (..), contiguous :: a ++ end subroutine ++ end interface ++ ++ interface acc_present_or_copyin ++ subroutine acc_present_or_copyin_32_h (a, len) ++ use iso_c_binding, only: c_int32_t ++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ end subroutine ++ ++ subroutine acc_present_or_copyin_64_h (a, len) ++ use iso_c_binding, only: c_int64_t ++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ end subroutine ++ ++ subroutine acc_present_or_copyin_array_h (a) ++ type (*), dimension (..), contiguous :: a ++ end subroutine ++ end interface ++ ++ interface acc_pcopyin ++ subroutine acc_pcopyin_32_h (a, len) ++ use iso_c_binding, only: c_int32_t ++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ end subroutine ++ ++ subroutine acc_pcopyin_64_h (a, len) ++ use iso_c_binding, only: c_int64_t ++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ end subroutine ++ ++ subroutine acc_pcopyin_array_h (a) ++ type (*), dimension (..), contiguous :: a ++ end subroutine ++ end interface ++ ++ interface acc_create ++ subroutine acc_create_32_h (a, len) ++ use iso_c_binding, only: c_int32_t ++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ end subroutine ++ ++ subroutine acc_create_64_h (a, len) ++ use iso_c_binding, only: c_int64_t ++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ end subroutine ++ ++ subroutine acc_create_array_h (a) ++ type (*), dimension (..), contiguous :: a ++ end subroutine ++ end interface ++ ++ interface acc_present_or_create ++ subroutine acc_present_or_create_32_h (a, len) ++ use iso_c_binding, only: c_int32_t ++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ end subroutine ++ ++ subroutine acc_present_or_create_64_h (a, len) ++ use iso_c_binding, only: c_int64_t ++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ end subroutine ++ ++ subroutine acc_present_or_create_array_h (a) ++ type (*), dimension (..), contiguous :: a ++ end subroutine ++ end interface ++ ++ interface acc_pcreate ++ subroutine acc_pcreate_32_h (a, len) ++ use iso_c_binding, only: c_int32_t ++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ end subroutine ++ ++ subroutine acc_pcreate_64_h (a, len) ++ use iso_c_binding, only: c_int64_t ++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ end subroutine ++ ++ subroutine acc_pcreate_array_h (a) ++ type (*), dimension (..), contiguous :: a ++ end subroutine ++ end interface ++ ++ interface acc_copyout ++ subroutine acc_copyout_32_h (a, len) ++ use iso_c_binding, only: c_int32_t ++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ end subroutine ++ ++ subroutine acc_copyout_64_h (a, len) ++ use iso_c_binding, only: c_int64_t ++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ end subroutine ++ ++ subroutine acc_copyout_array_h (a) ++ type (*), dimension (..), contiguous :: a ++ end subroutine ++ end interface ++ ++ interface acc_delete ++ subroutine acc_delete_32_h (a, len) ++ use iso_c_binding, only: c_int32_t ++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ end subroutine ++ ++ subroutine acc_delete_64_h (a, len) ++ use iso_c_binding, only: c_int64_t ++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ end subroutine ++ ++ subroutine acc_delete_array_h (a) ++ type (*), dimension (..), contiguous :: a ++ end subroutine ++ end interface ++ ++ interface acc_update_device ++ subroutine acc_update_device_32_h (a, len) ++ use iso_c_binding, only: c_int32_t ++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ end subroutine ++ ++ subroutine acc_update_device_64_h (a, len) ++ use iso_c_binding, only: c_int64_t ++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ end subroutine ++ ++ subroutine acc_update_device_array_h (a) ++ type (*), dimension (..), contiguous :: a ++ end subroutine ++ end interface ++ ++ interface acc_update_self ++ subroutine acc_update_self_32_h (a, len) ++ use iso_c_binding, only: c_int32_t ++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ end subroutine ++ ++ subroutine acc_update_self_64_h (a, len) ++ use iso_c_binding, only: c_int64_t ++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ end subroutine ++ ++ subroutine acc_update_self_array_h (a) ++ type (*), dimension (..), contiguous :: a ++ end subroutine ++ end interface ++ ++ ! acc_map_data: Only available in C/C++ ++ ! acc_unmap_data: Only available in C/C++ ++ ! acc_deviceptr: Only available in C/C++ ++ ! acc_ostptr: Only available in C/C++ ++ ++ interface acc_is_present ++ function acc_is_present_32_h (a, len) ++ use iso_c_binding, only: c_int32_t ++ logical acc_is_present_32_h ++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ end function ++ ++ function acc_is_present_64_h (a, len) ++ use iso_c_binding, only: c_int64_t ++ logical acc_is_present_64_h ++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ end function ++ ++ function acc_is_present_array_h (a) ++ logical acc_is_present_array_h ++ type (*), dimension (..), contiguous :: a ++ end function ++ end interface ++ ++ ! acc_memcpy_to_device: Only available in C/C++ ++ ! acc_memcpy_from_device: Only available in C/C++ +--- libgomp/gomp-constants.h.jj 2016-07-14 16:02:47.212545826 +0200 ++++ libgomp/gomp-constants.h 2016-05-26 21:04:40.000000000 +0200 +@@ -0,0 +1,259 @@ ++/* Communication between GCC and libgomp. ++ ++ Copyright (C) 2014-2015 Free Software Foundation, Inc. ++ ++ Contributed by Mentor Embedded. ++ ++ This file is part of the GNU Offloading and Multi Processing Library ++ (libgomp). ++ ++ Libgomp is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY ++ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS ++ FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ . */ ++ ++#ifndef GOMP_CONSTANTS_H ++#define GOMP_CONSTANTS_H 1 ++ ++/* Memory mapping types. */ ++ ++/* One byte. */ ++#define GOMP_MAP_LAST (1 << 8) ++ ++#define GOMP_MAP_FLAG_TO (1 << 0) ++#define GOMP_MAP_FLAG_FROM (1 << 1) ++/* Special map kinds, enumerated starting here. */ ++#define GOMP_MAP_FLAG_SPECIAL_0 (1 << 2) ++#define GOMP_MAP_FLAG_SPECIAL_1 (1 << 3) ++#define GOMP_MAP_FLAG_SPECIAL_2 (1 << 4) ++#define GOMP_MAP_FLAG_SPECIAL (GOMP_MAP_FLAG_SPECIAL_1 \ ++ | GOMP_MAP_FLAG_SPECIAL_0) ++/* Flag to force a specific behavior (or else, trigger a run-time error). */ ++#define GOMP_MAP_FLAG_FORCE (1 << 7) ++ ++enum gomp_map_kind ++ { ++ /* If not already present, allocate. */ ++ GOMP_MAP_ALLOC = 0, ++ /* ..., and copy to device. */ ++ GOMP_MAP_TO = (GOMP_MAP_ALLOC | GOMP_MAP_FLAG_TO), ++ /* ..., and copy from device. */ ++ GOMP_MAP_FROM = (GOMP_MAP_ALLOC | GOMP_MAP_FLAG_FROM), ++ /* ..., and copy to and from device. */ ++ GOMP_MAP_TOFROM = (GOMP_MAP_TO | GOMP_MAP_FROM), ++ /* The following kind is an internal only map kind, used for pointer based ++ array sections. OMP_CLAUSE_SIZE for these is not the pointer size, ++ which is implicitly POINTER_SIZE_UNITS, but the bias. */ ++ GOMP_MAP_POINTER = (GOMP_MAP_FLAG_SPECIAL_0 | 0), ++ /* Also internal, behaves like GOMP_MAP_TO, but additionally any ++ GOMP_MAP_POINTER records consecutive after it which have addresses ++ falling into that range will not be ignored if GOMP_MAP_TO_PSET wasn't ++ mapped already. */ ++ GOMP_MAP_TO_PSET = (GOMP_MAP_FLAG_SPECIAL_0 | 1), ++ /* Must already be present. */ ++ GOMP_MAP_FORCE_PRESENT = (GOMP_MAP_FLAG_SPECIAL_0 | 2), ++ /* Deallocate a mapping, without copying from device. */ ++ GOMP_MAP_DELETE = (GOMP_MAP_FLAG_SPECIAL_0 | 3), ++ /* Is a device pointer. OMP_CLAUSE_SIZE for these is unused; is implicitly ++ POINTER_SIZE_UNITS. */ ++ GOMP_MAP_FORCE_DEVICEPTR = (GOMP_MAP_FLAG_SPECIAL_1 | 0), ++ /* Do not map, copy bits for firstprivate instead. */ ++ /* OpenACC device_resident. */ ++ GOMP_MAP_DEVICE_RESIDENT = (GOMP_MAP_FLAG_SPECIAL_1 | 1), ++ /* OpenACC link. */ ++ GOMP_MAP_LINK = (GOMP_MAP_FLAG_SPECIAL_1 | 2), ++ /* Allocate. */ ++ GOMP_MAP_FIRSTPRIVATE = (GOMP_MAP_FLAG_SPECIAL | 0), ++ /* Similarly, but store the value in the pointer rather than ++ pointed by the pointer. */ ++ GOMP_MAP_FIRSTPRIVATE_INT = (GOMP_MAP_FLAG_SPECIAL | 1), ++ /* Pointer translate host address into device address and copy that ++ back to host. */ ++ GOMP_MAP_USE_DEVICE_PTR = (GOMP_MAP_FLAG_SPECIAL | 2), ++ /* Allocate a zero length array section. Prefer next non-zero length ++ mapping over previous non-zero length mapping over zero length mapping ++ at the address. If not already mapped, do nothing (and pointer translate ++ to NULL). */ ++ GOMP_MAP_ZERO_LEN_ARRAY_SECTION = (GOMP_MAP_FLAG_SPECIAL | 3), ++ /* Allocate. */ ++ GOMP_MAP_FORCE_ALLOC = (GOMP_MAP_FLAG_FORCE | GOMP_MAP_ALLOC), ++ /* ..., and copy to device. */ ++ GOMP_MAP_FORCE_TO = (GOMP_MAP_FLAG_FORCE | GOMP_MAP_TO), ++ /* ..., and copy from device. */ ++ GOMP_MAP_FORCE_FROM = (GOMP_MAP_FLAG_FORCE | GOMP_MAP_FROM), ++ /* ..., and copy to and from device. */ ++ GOMP_MAP_FORCE_TOFROM = (GOMP_MAP_FLAG_FORCE | GOMP_MAP_TOFROM), ++ /* If not already present, allocate. And unconditionally copy to ++ device. */ ++ GOMP_MAP_ALWAYS_TO = (GOMP_MAP_FLAG_SPECIAL_2 | GOMP_MAP_TO), ++ /* If not already present, allocate. And unconditionally copy from ++ device. */ ++ GOMP_MAP_ALWAYS_FROM = (GOMP_MAP_FLAG_SPECIAL_2 ++ | GOMP_MAP_FROM), ++ /* If not already present, allocate. And unconditionally copy to and from ++ device. */ ++ GOMP_MAP_ALWAYS_TOFROM = (GOMP_MAP_FLAG_SPECIAL_2 ++ | GOMP_MAP_TOFROM), ++ /* Map a sparse struct; the address is the base of the structure, alignment ++ it's required alignment, and size is the number of adjacent entries ++ that belong to the struct. The adjacent entries should be sorted by ++ increasing address, so it is easy to determine lowest needed address ++ (address of the first adjacent entry) and highest needed address ++ (address of the last adjacent entry plus its size). */ ++ GOMP_MAP_STRUCT = (GOMP_MAP_FLAG_SPECIAL_2 ++ | GOMP_MAP_FLAG_SPECIAL | 0), ++ /* On a location of a pointer/reference that is assumed to be already mapped ++ earlier, store the translated address of the preceeding mapping. ++ No refcount is bumped by this, and the store is done unconditionally. */ ++ GOMP_MAP_ALWAYS_POINTER = (GOMP_MAP_FLAG_SPECIAL_2 ++ | GOMP_MAP_FLAG_SPECIAL | 1), ++ /* Forced deallocation of zero length array section. */ ++ GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION ++ = (GOMP_MAP_FLAG_SPECIAL_2 ++ | GOMP_MAP_FLAG_SPECIAL | 3), ++ /* Decrement usage count and deallocate if zero. */ ++ GOMP_MAP_RELEASE = (GOMP_MAP_FLAG_SPECIAL_2 ++ | GOMP_MAP_DELETE), ++ ++ /* Internal to GCC, not used in libgomp. */ ++ /* Do not map, but pointer assign a pointer instead. */ ++ GOMP_MAP_FIRSTPRIVATE_POINTER = (GOMP_MAP_LAST | 1), ++ /* Do not map, but pointer assign a reference instead. */ ++ GOMP_MAP_FIRSTPRIVATE_REFERENCE = (GOMP_MAP_LAST | 2) ++ }; ++ ++#define GOMP_MAP_COPY_TO_P(X) \ ++ (!((X) & GOMP_MAP_FLAG_SPECIAL) \ ++ && ((X) & GOMP_MAP_FLAG_TO)) ++ ++#define GOMP_MAP_COPY_FROM_P(X) \ ++ (!((X) & GOMP_MAP_FLAG_SPECIAL) \ ++ && ((X) & GOMP_MAP_FLAG_FROM)) ++ ++#define GOMP_MAP_POINTER_P(X) \ ++ ((X) == GOMP_MAP_POINTER) ++ ++#define GOMP_MAP_ALWAYS_TO_P(X) \ ++ (((X) == GOMP_MAP_ALWAYS_TO) || ((X) == GOMP_MAP_ALWAYS_TOFROM)) ++ ++#define GOMP_MAP_ALWAYS_FROM_P(X) \ ++ (((X) == GOMP_MAP_ALWAYS_FROM) || ((X) == GOMP_MAP_ALWAYS_TOFROM)) ++ ++#define GOMP_MAP_ALWAYS_P(X) \ ++ (GOMP_MAP_ALWAYS_TO_P (X) || ((X) == GOMP_MAP_ALWAYS_FROM)) ++ ++ ++/* Asynchronous behavior. Keep in sync with ++ libgomp/{openacc.h,openacc.f90,openacc_lib.h}:acc_async_t. */ ++ ++#define GOMP_ASYNC_NOVAL -1 ++#define GOMP_ASYNC_SYNC -2 ++ ++ ++/* Device codes. Keep in sync with ++ libgomp/{openacc.h,openacc.f90,openacc_lib.h}:acc_device_t as well as ++ libgomp/libgomp-plugin.h. */ ++#define GOMP_DEVICE_NONE 0 ++#define GOMP_DEVICE_DEFAULT 1 ++#define GOMP_DEVICE_HOST 2 ++/* #define GOMP_DEVICE_HOST_NONSHM 3 removed. */ ++#define GOMP_DEVICE_NOT_HOST 4 ++#define GOMP_DEVICE_NVIDIA_PTX 5 ++#define GOMP_DEVICE_INTEL_MIC 6 ++#define GOMP_DEVICE_HSA 7 ++ ++#define GOMP_DEVICE_ICV -1 ++#define GOMP_DEVICE_HOST_FALLBACK -2 ++ ++/* GOMP_task/GOMP_taskloop* flags argument. */ ++#define GOMP_TASK_FLAG_UNTIED (1 << 0) ++#define GOMP_TASK_FLAG_FINAL (1 << 1) ++#define GOMP_TASK_FLAG_MERGEABLE (1 << 2) ++#define GOMP_TASK_FLAG_DEPEND (1 << 3) ++#define GOMP_TASK_FLAG_PRIORITY (1 << 4) ++#define GOMP_TASK_FLAG_UP (1 << 8) ++#define GOMP_TASK_FLAG_GRAINSIZE (1 << 9) ++#define GOMP_TASK_FLAG_IF (1 << 10) ++#define GOMP_TASK_FLAG_NOGROUP (1 << 11) ++ ++/* GOMP_target{_ext,update_ext,enter_exit_data} flags argument. */ ++#define GOMP_TARGET_FLAG_NOWAIT (1 << 0) ++#define GOMP_TARGET_FLAG_EXIT_DATA (1 << 1) ++/* Internal to libgomp. */ ++#define GOMP_TARGET_FLAG_UPDATE (1U << 31) ++ ++/* Versions of libgomp and device-specific plugins. GOMP_VERSION ++ should be incremented whenever an ABI-incompatible change is introduced ++ to the plugin interface defined in libgomp/libgomp.h. */ ++#define GOMP_VERSION 1 ++#define GOMP_VERSION_NVIDIA_PTX 1 ++#define GOMP_VERSION_INTEL_MIC 0 ++#define GOMP_VERSION_HSA 0 ++ ++#define GOMP_VERSION_PACK(LIB, DEV) (((LIB) << 16) | (DEV)) ++#define GOMP_VERSION_LIB(PACK) (((PACK) >> 16) & 0xffff) ++#define GOMP_VERSION_DEV(PACK) ((PACK) & 0xffff) ++ ++#define GOMP_DIM_GANG 0 ++#define GOMP_DIM_WORKER 1 ++#define GOMP_DIM_VECTOR 2 ++#define GOMP_DIM_MAX 3 ++#define GOMP_DIM_MASK(X) (1u << (X)) ++ ++/* Varadic launch arguments. End of list is marked by a zero. */ ++#define GOMP_LAUNCH_DIM 1 /* Launch dimensions, op = mask */ ++#define GOMP_LAUNCH_ASYNC 2 /* Async, op = cst val if not MAX */ ++#define GOMP_LAUNCH_WAIT 3 /* Waits, op = num waits. */ ++#define GOMP_LAUNCH_CODE_SHIFT 28 ++#define GOMP_LAUNCH_DEVICE_SHIFT 16 ++#define GOMP_LAUNCH_OP_SHIFT 0 ++#define GOMP_LAUNCH_PACK(CODE,DEVICE,OP) \ ++ (((CODE) << GOMP_LAUNCH_CODE_SHIFT) \ ++ | ((DEVICE) << GOMP_LAUNCH_DEVICE_SHIFT) \ ++ | ((OP) << GOMP_LAUNCH_OP_SHIFT)) ++#define GOMP_LAUNCH_CODE(X) (((X) >> GOMP_LAUNCH_CODE_SHIFT) & 0xf) ++#define GOMP_LAUNCH_DEVICE(X) (((X) >> GOMP_LAUNCH_DEVICE_SHIFT) & 0xfff) ++#define GOMP_LAUNCH_OP(X) (((X) >> GOMP_LAUNCH_OP_SHIFT) & 0xffff) ++#define GOMP_LAUNCH_OP_MAX 0xffff ++ ++/* Bitmask to apply in order to find out the intended device of a target ++ argument. */ ++#define GOMP_TARGET_ARG_DEVICE_MASK ((1 << 7) - 1) ++/* The target argument is significant for all devices. */ ++#define GOMP_TARGET_ARG_DEVICE_ALL 0 ++ ++/* Flag set when the subsequent element in the device-specific argument ++ values. */ ++#define GOMP_TARGET_ARG_SUBSEQUENT_PARAM (1 << 7) ++ ++/* Bitmask to apply to a target argument to find out the value identifier. */ ++#define GOMP_TARGET_ARG_ID_MASK (((1 << 8) - 1) << 8) ++/* Target argument index of NUM_TEAMS. */ ++#define GOMP_TARGET_ARG_NUM_TEAMS (1 << 8) ++/* Target argument index of THREAD_LIMIT. */ ++#define GOMP_TARGET_ARG_THREAD_LIMIT (2 << 8) ++ ++/* If the value is directly embeded in target argument, it should be a 16-bit ++ at most and shifted by this many bits. */ ++#define GOMP_TARGET_ARG_VALUE_SHIFT 16 ++ ++/* HSA specific data structures. */ ++ ++/* Identifiers of device-specific target arguments. */ ++#define GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES (1 << 8) ++ ++#endif +--- libgomp/oacc-mem.c.jj 2016-07-13 16:57:04.433535385 +0200 ++++ libgomp/oacc-mem.c 2016-07-14 15:39:44.644631308 +0200 +@@ -0,0 +1,204 @@ ++/* OpenACC Runtime initialization routines ++ ++ Copyright (C) 2013-2016 Free Software Foundation, Inc. ++ ++ Contributed by Mentor Embedded. ++ ++ This file is part of the GNU Offloading and Multi Processing Library ++ (libgomp). ++ ++ Libgomp is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY ++ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS ++ FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ . */ ++ ++#include "openacc.h" ++#include "config.h" ++#include "libgomp.h" ++#include "gomp-constants.h" ++#include "oacc-int.h" ++#include ++#include ++#include ++ ++/* OpenACC is silent on how memory exhaustion is indicated. We return ++ NULL. */ ++ ++void * ++acc_malloc (size_t s) ++{ ++ if (!s) ++ return NULL; ++ ++ goacc_lazy_initialize (); ++ return malloc (s); ++} ++ ++/* OpenACC 2.0a (3.2.16) doesn't specify what to do in the event ++ the device address is mapped. We choose to check if it mapped, ++ and if it is, to unmap it. */ ++void ++acc_free (void *d) ++{ ++ return free (d); ++} ++ ++void ++acc_memcpy_to_device (void *d, void *h, size_t s) ++{ ++ memmove (d, h, s); ++} ++ ++void ++acc_memcpy_from_device (void *h, void *d, size_t s) ++{ ++ memmove (h, d, s); ++} ++ ++/* Return the device pointer that corresponds to host data H. Or NULL ++ if no mapping. */ ++ ++void * ++acc_deviceptr (void *h) ++{ ++ goacc_lazy_initialize (); ++ return h; ++} ++ ++/* Return the host pointer that corresponds to device data D. Or NULL ++ if no mapping. */ ++ ++void * ++acc_hostptr (void *d) ++{ ++ goacc_lazy_initialize (); ++ return d; ++} ++ ++/* Return 1 if host data [H,+S] is present on the device. */ ++ ++int ++acc_is_present (void *h, size_t s) ++{ ++ if (!s || !h) ++ return 0; ++ ++ goacc_lazy_initialize (); ++ return h != NULL; ++} ++ ++/* Create a mapping for host [H,+S] -> device [D,+S] */ ++ ++void ++acc_map_data (void *h, void *d, size_t s) ++{ ++ goacc_lazy_initialize (); ++ ++ if (d != h) ++ gomp_fatal ("cannot map data on shared-memory system"); ++} ++ ++void ++acc_unmap_data (void *h) ++{ ++} ++ ++#define FLAG_PRESENT (1 << 0) ++#define FLAG_CREATE (1 << 1) ++#define FLAG_COPY (1 << 2) ++ ++static void * ++present_create_copy (unsigned f, void *h, size_t s) ++{ ++ if (!h || !s) ++ gomp_fatal ("[%p,+%d] is a bad range", (void *)h, (int)s); ++ ++ goacc_lazy_initialize (); ++ return h; ++} ++ ++void * ++acc_create (void *h, size_t s) ++{ ++ return present_create_copy (FLAG_CREATE, h, s); ++} ++ ++void * ++acc_copyin (void *h, size_t s) ++{ ++ return present_create_copy (FLAG_CREATE | FLAG_COPY, h, s); ++} ++ ++void * ++acc_present_or_create (void *h, size_t s) ++{ ++ return present_create_copy (FLAG_PRESENT | FLAG_CREATE, h, s); ++} ++ ++void * ++acc_present_or_copyin (void *h, size_t s) ++{ ++ return present_create_copy (FLAG_PRESENT | FLAG_CREATE | FLAG_COPY, h, s); ++} ++ ++#define FLAG_COPYOUT (1 << 0) ++ ++static void ++delete_copyout (unsigned f, void *h, size_t s, const char *libfnname) ++{ ++} ++ ++void ++acc_delete (void *h , size_t s) ++{ ++ delete_copyout (0, h, s, __FUNCTION__); ++} ++ ++void ++acc_copyout (void *h, size_t s) ++{ ++ delete_copyout (FLAG_COPYOUT, h, s, __FUNCTION__); ++} ++ ++static void ++update_dev_host (int is_dev, void *h, size_t s) ++{ ++ goacc_lazy_initialize (); ++} ++ ++void ++acc_update_device (void *h, size_t s) ++{ ++ update_dev_host (1, h, s); ++} ++ ++void ++acc_update_self (void *h, size_t s) ++{ ++ update_dev_host (0, h, s); ++} ++ ++void ++gomp_acc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes, ++ void *kinds) ++{ ++} ++ ++void ++gomp_acc_remove_pointer (void *h, bool force_copyfrom, int async, int mapnum) ++{ ++} +--- libgomp/oacc-plugin.h.jj 2016-07-13 16:57:13.487423121 +0200 ++++ libgomp/oacc-plugin.h 2016-07-13 16:57:13.487423121 +0200 +@@ -0,0 +1,33 @@ ++/* Copyright (C) 2014-2016 Free Software Foundation, Inc. ++ ++ Contributed by Mentor Embedded. ++ ++ This file is part of the GNU Offloading and Multi Processing Library ++ (libgomp). ++ ++ Libgomp is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY ++ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS ++ FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ . */ ++ ++#ifndef OACC_PLUGIN_H ++#define OACC_PLUGIN_H 1 ++ ++extern void GOMP_PLUGIN_async_unmap_vars (void *, int); ++extern void *GOMP_PLUGIN_acc_thread (void); ++ ++#endif +--- libgomp/taskloop.c.jj 2016-07-13 16:57:18.935355570 +0200 ++++ libgomp/taskloop.c 2016-07-13 16:57:18.935355570 +0200 +@@ -0,0 +1,340 @@ ++/* Copyright (C) 2015-2016 Free Software Foundation, Inc. ++ Contributed by Jakub Jelinek . ++ ++ This file is part of the GNU Offloading and Multi Processing Library ++ (libgomp). ++ ++ Libgomp is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY ++ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS ++ FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ . */ ++ ++/* This file handles the taskloop construct. It is included twice, once ++ for the long and once for unsigned long long variant. */ ++ ++/* Called when encountering an explicit task directive. If IF_CLAUSE is ++ false, then we must not delay in executing the task. If UNTIED is true, ++ then the task may be executed by any member of the team. */ ++ ++void ++GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), ++ long arg_size, long arg_align, unsigned flags, ++ unsigned long num_tasks, int priority, ++ TYPE start, TYPE end, TYPE step) ++{ ++ struct gomp_thread *thr = gomp_thread (); ++ struct gomp_team *team = thr->ts.team; ++ ++#ifdef HAVE_BROKEN_POSIX_SEMAPHORES ++ /* If pthread_mutex_* is used for omp_*lock*, then each task must be ++ tied to one thread all the time. This means UNTIED tasks must be ++ tied and if CPYFN is non-NULL IF(0) must be forced, as CPYFN ++ might be running on different thread than FN. */ ++ if (cpyfn) ++ flags &= ~GOMP_TASK_FLAG_IF; ++ flags &= ~GOMP_TASK_FLAG_UNTIED; ++#endif ++ ++ /* If parallel or taskgroup has been cancelled, don't start new tasks. */ ++ if (team && gomp_team_barrier_cancelled (&team->barrier)) ++ return; ++ ++#ifdef TYPE_is_long ++ TYPE s = step; ++ if (step > 0) ++ { ++ if (start >= end) ++ return; ++ s--; ++ } ++ else ++ { ++ if (start <= end) ++ return; ++ s++; ++ } ++ UTYPE n = (end - start + s) / step; ++#else ++ UTYPE n; ++ if (flags & GOMP_TASK_FLAG_UP) ++ { ++ if (start >= end) ++ return; ++ n = (end - start + step - 1) / step; ++ } ++ else ++ { ++ if (start <= end) ++ return; ++ n = (start - end - step - 1) / -step; ++ } ++#endif ++ ++ TYPE task_step = step; ++ unsigned long nfirst = n; ++ if (flags & GOMP_TASK_FLAG_GRAINSIZE) ++ { ++ unsigned long grainsize = num_tasks; ++#ifdef TYPE_is_long ++ num_tasks = n / grainsize; ++#else ++ UTYPE ndiv = n / grainsize; ++ num_tasks = ndiv; ++ if (num_tasks != ndiv) ++ num_tasks = ~0UL; ++#endif ++ if (num_tasks <= 1) ++ { ++ num_tasks = 1; ++ task_step = end - start; ++ } ++ else if (num_tasks >= grainsize ++#ifndef TYPE_is_long ++ && num_tasks != ~0UL ++#endif ++ ) ++ { ++ UTYPE mul = num_tasks * grainsize; ++ task_step = (TYPE) grainsize * step; ++ if (mul != n) ++ { ++ task_step += step; ++ nfirst = n - mul - 1; ++ } ++ } ++ else ++ { ++ UTYPE div = n / num_tasks; ++ UTYPE mod = n % num_tasks; ++ task_step = (TYPE) div * step; ++ if (mod) ++ { ++ task_step += step; ++ nfirst = mod - 1; ++ } ++ } ++ } ++ else ++ { ++ if (num_tasks == 0) ++ num_tasks = team ? team->nthreads : 1; ++ if (num_tasks >= n) ++ num_tasks = n; ++ else ++ { ++ UTYPE div = n / num_tasks; ++ UTYPE mod = n % num_tasks; ++ task_step = (TYPE) div * step; ++ if (mod) ++ { ++ task_step += step; ++ nfirst = mod - 1; ++ } ++ } ++ } ++ ++ if (flags & GOMP_TASK_FLAG_NOGROUP) ++ { ++ if (thr->task && thr->task->taskgroup && thr->task->taskgroup->cancelled) ++ return; ++ } ++ else ++ ialias_call (GOMP_taskgroup_start) (); ++ ++ if (priority > gomp_max_task_priority_var) ++ priority = gomp_max_task_priority_var; ++ ++ if ((flags & GOMP_TASK_FLAG_IF) == 0 || team == NULL ++ || (thr->task && thr->task->final_task) ++ || team->task_count + num_tasks > 64 * team->nthreads) ++ { ++ unsigned long i; ++ if (__builtin_expect (cpyfn != NULL, 0)) ++ { ++ struct gomp_task task[num_tasks]; ++ struct gomp_task *parent = thr->task; ++ arg_size = (arg_size + arg_align - 1) & ~(arg_align - 1); ++ char buf[num_tasks * arg_size + arg_align - 1]; ++ char *arg = (char *) (((uintptr_t) buf + arg_align - 1) ++ & ~(uintptr_t) (arg_align - 1)); ++ char *orig_arg = arg; ++ for (i = 0; i < num_tasks; i++) ++ { ++ gomp_init_task (&task[i], parent, gomp_icv (false)); ++ task[i].priority = priority; ++ task[i].kind = GOMP_TASK_UNDEFERRED; ++ task[i].final_task = (thr->task && thr->task->final_task) ++ || (flags & GOMP_TASK_FLAG_FINAL); ++ if (thr->task) ++ { ++ task[i].in_tied_task = thr->task->in_tied_task; ++ task[i].taskgroup = thr->task->taskgroup; ++ } ++ thr->task = &task[i]; ++ cpyfn (arg, data); ++ arg += arg_size; ++ } ++ arg = orig_arg; ++ for (i = 0; i < num_tasks; i++) ++ { ++ thr->task = &task[i]; ++ ((TYPE *)arg)[0] = start; ++ start += task_step; ++ ((TYPE *)arg)[1] = start; ++ if (i == nfirst) ++ task_step -= step; ++ fn (arg); ++ arg += arg_size; ++ if (!priority_queue_empty_p (&task[i].children_queue, ++ MEMMODEL_RELAXED)) ++ { ++ gomp_mutex_lock (&team->task_lock); ++ gomp_clear_parent (&task[i].children_queue); ++ gomp_mutex_unlock (&team->task_lock); ++ } ++ gomp_end_task (); ++ } ++ } ++ else ++ for (i = 0; i < num_tasks; i++) ++ { ++ struct gomp_task task; ++ ++ gomp_init_task (&task, thr->task, gomp_icv (false)); ++ task.priority = priority; ++ task.kind = GOMP_TASK_UNDEFERRED; ++ task.final_task = (thr->task && thr->task->final_task) ++ || (flags & GOMP_TASK_FLAG_FINAL); ++ if (thr->task) ++ { ++ task.in_tied_task = thr->task->in_tied_task; ++ task.taskgroup = thr->task->taskgroup; ++ } ++ thr->task = &task; ++ ((TYPE *)data)[0] = start; ++ start += task_step; ++ ((TYPE *)data)[1] = start; ++ if (i == nfirst) ++ task_step -= step; ++ fn (data); ++ if (!priority_queue_empty_p (&task.children_queue, ++ MEMMODEL_RELAXED)) ++ { ++ gomp_mutex_lock (&team->task_lock); ++ gomp_clear_parent (&task.children_queue); ++ gomp_mutex_unlock (&team->task_lock); ++ } ++ gomp_end_task (); ++ } ++ } ++ else ++ { ++ struct gomp_task *tasks[num_tasks]; ++ struct gomp_task *parent = thr->task; ++ struct gomp_taskgroup *taskgroup = parent->taskgroup; ++ char *arg; ++ int do_wake; ++ unsigned long i; ++ ++ for (i = 0; i < num_tasks; i++) ++ { ++ struct gomp_task *task ++ = gomp_malloc (sizeof (*task) + arg_size + arg_align - 1); ++ tasks[i] = task; ++ arg = (char *) (((uintptr_t) (task + 1) + arg_align - 1) ++ & ~(uintptr_t) (arg_align - 1)); ++ gomp_init_task (task, parent, gomp_icv (false)); ++ task->priority = priority; ++ task->kind = GOMP_TASK_UNDEFERRED; ++ task->in_tied_task = parent->in_tied_task; ++ task->taskgroup = taskgroup; ++ thr->task = task; ++ if (cpyfn) ++ { ++ cpyfn (arg, data); ++ task->copy_ctors_done = true; ++ } ++ else ++ memcpy (arg, data, arg_size); ++ ((TYPE *)arg)[0] = start; ++ start += task_step; ++ ((TYPE *)arg)[1] = start; ++ if (i == nfirst) ++ task_step -= step; ++ thr->task = parent; ++ task->kind = GOMP_TASK_WAITING; ++ task->fn = fn; ++ task->fn_data = arg; ++ task->final_task = (flags & GOMP_TASK_FLAG_FINAL) >> 1; ++ } ++ gomp_mutex_lock (&team->task_lock); ++ /* If parallel or taskgroup has been cancelled, don't start new ++ tasks. */ ++ if (__builtin_expect ((gomp_team_barrier_cancelled (&team->barrier) ++ || (taskgroup && taskgroup->cancelled)) ++ && cpyfn == NULL, 0)) ++ { ++ gomp_mutex_unlock (&team->task_lock); ++ for (i = 0; i < num_tasks; i++) ++ { ++ gomp_finish_task (tasks[i]); ++ free (tasks[i]); ++ } ++ if ((flags & GOMP_TASK_FLAG_NOGROUP) == 0) ++ ialias_call (GOMP_taskgroup_end) (); ++ return; ++ } ++ if (taskgroup) ++ taskgroup->num_children += num_tasks; ++ for (i = 0; i < num_tasks; i++) ++ { ++ struct gomp_task *task = tasks[i]; ++ priority_queue_insert (PQ_CHILDREN, &parent->children_queue, ++ task, priority, ++ PRIORITY_INSERT_BEGIN, ++ /*last_parent_depends_on=*/false, ++ task->parent_depends_on); ++ if (taskgroup) ++ priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue, ++ task, priority, PRIORITY_INSERT_BEGIN, ++ /*last_parent_depends_on=*/false, ++ task->parent_depends_on); ++ priority_queue_insert (PQ_TEAM, &team->task_queue, task, priority, ++ PRIORITY_INSERT_END, ++ /*last_parent_depends_on=*/false, ++ task->parent_depends_on); ++ ++team->task_count; ++ ++team->task_queued_count; ++ } ++ gomp_team_barrier_set_task_pending (&team->barrier); ++ if (team->task_running_count + !parent->in_tied_task ++ < team->nthreads) ++ { ++ do_wake = team->nthreads - team->task_running_count ++ - !parent->in_tied_task; ++ if ((unsigned long) do_wake > num_tasks) ++ do_wake = num_tasks; ++ } ++ else ++ do_wake = 0; ++ gomp_mutex_unlock (&team->task_lock); ++ if (do_wake) ++ gomp_team_barrier_wake (&team->barrier, do_wake); ++ } ++ if ((flags & GOMP_TASK_FLAG_NOGROUP) == 0) ++ ialias_call (GOMP_taskgroup_end) (); ++} +--- libgomp/priority_queue.h.jj 2016-07-13 16:57:04.438535323 +0200 ++++ libgomp/priority_queue.h 2016-07-13 16:57:04.438535323 +0200 +@@ -0,0 +1,485 @@ ++/* Copyright (C) 2015-2016 Free Software Foundation, Inc. ++ Contributed by Aldy Hernandez . ++ ++ This file is part of the GNU Offloading and Multi Processing Library ++ (libgomp). ++ ++ Libgomp is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY ++ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS ++ FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ . */ ++ ++/* Header file for a priority queue of GOMP tasks. */ ++ ++/* ?? Perhaps all the priority_tree_* functions are complex and rare ++ enough to go out-of-line and be moved to priority_queue.c. ?? */ ++ ++#ifndef _PRIORITY_QUEUE_H_ ++#define _PRIORITY_QUEUE_H_ ++ ++/* One task. */ ++ ++struct priority_node ++{ ++ /* Next and previous chains in a circular doubly linked list for ++ tasks within this task's priority. */ ++ struct priority_node *next, *prev; ++}; ++ ++/* All tasks within the same priority. */ ++ ++struct priority_list ++{ ++ /* Priority of the tasks in this set. */ ++ int priority; ++ ++ /* Tasks. */ ++ struct priority_node *tasks; ++ ++ /* This points to the last of the higher priority WAITING tasks. ++ Remember that for the children queue, we have: ++ ++ parent_depends_on WAITING tasks. ++ !parent_depends_on WAITING tasks. ++ TIED tasks. ++ ++ This is a pointer to the last of the parent_depends_on WAITING ++ tasks which are essentially, higher priority items within their ++ priority. */ ++ struct priority_node *last_parent_depends_on; ++}; ++ ++/* Another splay tree instantiation, for priority_list's. */ ++typedef struct prio_splay_tree_node_s *prio_splay_tree_node; ++typedef struct prio_splay_tree_s *prio_splay_tree; ++typedef struct prio_splay_tree_key_s *prio_splay_tree_key; ++struct prio_splay_tree_key_s { ++ /* This structure must only containing a priority_list, as we cast ++ prio_splay_tree_key to priority_list throughout. */ ++ struct priority_list l; ++}; ++#define splay_tree_prefix prio ++#include "splay-tree.h" ++ ++/* The entry point into a priority queue of tasks. ++ ++ There are two alternate implementations with which to store tasks: ++ as a balanced tree of sorts, or as a simple list of tasks. If ++ there are only priority-0 items (ROOT is NULL), we use the simple ++ list, otherwise (ROOT is non-NULL) we use the tree. */ ++ ++struct priority_queue ++{ ++ /* If t.root != NULL, this is a splay tree of priority_lists to hold ++ all tasks. This is only used if multiple priorities are in play, ++ otherwise we use the priority_list `l' below to hold all ++ (priority-0) tasks. */ ++ struct prio_splay_tree_s t; ++ ++ /* If T above is NULL, only priority-0 items exist, so keep them ++ in a simple list. */ ++ struct priority_list l; ++}; ++ ++enum priority_insert_type { ++ /* Insert at the beginning of a priority list. */ ++ PRIORITY_INSERT_BEGIN, ++ /* Insert at the end of a priority list. */ ++ PRIORITY_INSERT_END ++}; ++ ++/* Used to determine in which queue a given priority node belongs in. ++ See pnode field of gomp_task. */ ++ ++enum priority_queue_type ++{ ++ PQ_TEAM, /* Node belongs in gomp_team's task_queue. */ ++ PQ_CHILDREN, /* Node belongs in parent's children_queue. */ ++ PQ_TASKGROUP, /* Node belongs in taskgroup->taskgroup_queue. */ ++ PQ_IGNORED = 999 ++}; ++ ++/* Priority queue implementation prototypes. */ ++ ++extern bool priority_queue_task_in_queue_p (enum priority_queue_type, ++ struct priority_queue *, ++ struct gomp_task *); ++extern void priority_queue_dump (enum priority_queue_type, ++ struct priority_queue *); ++extern void priority_queue_verify (enum priority_queue_type, ++ struct priority_queue *, bool); ++extern void priority_tree_remove (enum priority_queue_type, ++ struct priority_queue *, ++ struct priority_node *); ++extern struct gomp_task *priority_tree_next_task (enum priority_queue_type, ++ struct priority_queue *, ++ enum priority_queue_type, ++ struct priority_queue *, ++ bool *); ++ ++/* Return TRUE if there is more than one priority in HEAD. This is ++ used throughout to to choose between the fast path (priority 0 only ++ items) and a world with multiple priorities. */ ++ ++static inline bool ++priority_queue_multi_p (struct priority_queue *head) ++{ ++ return __builtin_expect (head->t.root != NULL, 0); ++} ++ ++/* Initialize a priority queue. */ ++ ++static inline void ++priority_queue_init (struct priority_queue *head) ++{ ++ head->t.root = NULL; ++ /* To save a few microseconds, we don't initialize head->l.priority ++ to 0 here. It is implied that priority will be 0 if head->t.root ++ == NULL. ++ ++ priority_tree_insert() will fix this when we encounter multiple ++ priorities. */ ++ head->l.tasks = NULL; ++ head->l.last_parent_depends_on = NULL; ++} ++ ++static inline void ++priority_queue_free (struct priority_queue *head) ++{ ++ /* There's nothing to do, as tasks were freed as they were removed ++ in priority_queue_remove. */ ++} ++ ++/* Forward declarations. */ ++static inline size_t priority_queue_offset (enum priority_queue_type); ++static inline struct gomp_task *priority_node_to_task ++ (enum priority_queue_type, ++ struct priority_node *); ++static inline struct priority_node *task_to_priority_node ++ (enum priority_queue_type, ++ struct gomp_task *); ++ ++/* Return TRUE if priority queue HEAD is empty. ++ ++ MODEL IS MEMMODEL_ACQUIRE if we should use an acquire atomic to ++ read from the root of the queue, otherwise MEMMODEL_RELAXED if we ++ should use a plain load. */ ++ ++static inline _Bool ++priority_queue_empty_p (struct priority_queue *head, enum memmodel model) ++{ ++ /* Note: The acquire barriers on the loads here synchronize with ++ the write of a NULL in gomp_task_run_post_remove_parent. It is ++ not necessary that we synchronize with other non-NULL writes at ++ this point, but we must ensure that all writes to memory by a ++ child thread task work function are seen before we exit from ++ GOMP_taskwait. */ ++ if (priority_queue_multi_p (head)) ++ { ++ if (model == MEMMODEL_ACQUIRE) ++ return __atomic_load_n (&head->t.root, MEMMODEL_ACQUIRE) == NULL; ++ return head->t.root == NULL; ++ } ++ if (model == MEMMODEL_ACQUIRE) ++ return __atomic_load_n (&head->l.tasks, MEMMODEL_ACQUIRE) == NULL; ++ return head->l.tasks == NULL; ++} ++ ++/* Look for a given PRIORITY in HEAD. Return it if found, otherwise ++ return NULL. This only applies to the tree variant in HEAD. There ++ is no point in searching for priorities in HEAD->L. */ ++ ++static inline struct priority_list * ++priority_queue_lookup_priority (struct priority_queue *head, int priority) ++{ ++ if (head->t.root == NULL) ++ return NULL; ++ struct prio_splay_tree_key_s k; ++ k.l.priority = priority; ++ return (struct priority_list *) ++ prio_splay_tree_lookup (&head->t, &k); ++} ++ ++/* Insert task in DATA, with PRIORITY, in the priority list in LIST. ++ LIST contains items of type TYPE. ++ ++ If POS is PRIORITY_INSERT_BEGIN, the new task is inserted at the ++ top of its respective priority. If POS is PRIORITY_INSERT_END, the ++ task is inserted at the end of its priority. ++ ++ If ADJUST_PARENT_DEPENDS_ON is TRUE, LIST is a children queue, and ++ we must keep track of higher and lower priority WAITING tasks by ++ keeping the queue's last_parent_depends_on field accurate. This ++ only applies to the children queue, and the caller must ensure LIST ++ is a children queue in this case. ++ ++ If ADJUST_PARENT_DEPENDS_ON is TRUE, TASK_IS_PARENT_DEPENDS_ON is ++ set to the task's parent_depends_on field. If ++ ADJUST_PARENT_DEPENDS_ON is FALSE, this field is irrelevant. ++ ++ Return the new priority_node. */ ++ ++static inline void ++priority_list_insert (enum priority_queue_type type, ++ struct priority_list *list, ++ struct gomp_task *task, ++ int priority, ++ enum priority_insert_type pos, ++ bool adjust_parent_depends_on, ++ bool task_is_parent_depends_on) ++{ ++ struct priority_node *node = task_to_priority_node (type, task); ++ if (list->tasks) ++ { ++ /* If we are keeping track of higher/lower priority items, ++ but this is a lower priority WAITING task ++ (parent_depends_on != NULL), put it after all ready to ++ run tasks. See the comment in ++ priority_queue_upgrade_task for a visual on how tasks ++ should be organized. */ ++ if (adjust_parent_depends_on ++ && pos == PRIORITY_INSERT_BEGIN ++ && list->last_parent_depends_on ++ && !task_is_parent_depends_on) ++ { ++ struct priority_node *last_parent_depends_on ++ = list->last_parent_depends_on; ++ node->next = last_parent_depends_on->next; ++ node->prev = last_parent_depends_on; ++ } ++ /* Otherwise, put it at the top/bottom of the queue. */ ++ else ++ { ++ node->next = list->tasks; ++ node->prev = list->tasks->prev; ++ if (pos == PRIORITY_INSERT_BEGIN) ++ list->tasks = node; ++ } ++ node->next->prev = node; ++ node->prev->next = node; ++ } ++ else ++ { ++ node->next = node; ++ node->prev = node; ++ list->tasks = node; ++ } ++ if (adjust_parent_depends_on ++ && list->last_parent_depends_on == NULL ++ && task_is_parent_depends_on) ++ list->last_parent_depends_on = node; ++} ++ ++/* Tree version of priority_list_insert. */ ++ ++static inline void ++priority_tree_insert (enum priority_queue_type type, ++ struct priority_queue *head, ++ struct gomp_task *task, ++ int priority, ++ enum priority_insert_type pos, ++ bool adjust_parent_depends_on, ++ bool task_is_parent_depends_on) ++{ ++ if (__builtin_expect (head->t.root == NULL, 0)) ++ { ++ /* The first time around, transfer any priority 0 items to the ++ tree. */ ++ if (head->l.tasks != NULL) ++ { ++ prio_splay_tree_node k = gomp_malloc (sizeof (*k)); ++ k->left = NULL; ++ k->right = NULL; ++ k->key.l.priority = 0; ++ k->key.l.tasks = head->l.tasks; ++ k->key.l.last_parent_depends_on = head->l.last_parent_depends_on; ++ prio_splay_tree_insert (&head->t, k); ++ head->l.tasks = NULL; ++ } ++ } ++ struct priority_list *list ++ = priority_queue_lookup_priority (head, priority); ++ if (!list) ++ { ++ prio_splay_tree_node k = gomp_malloc (sizeof (*k)); ++ k->left = NULL; ++ k->right = NULL; ++ k->key.l.priority = priority; ++ k->key.l.tasks = NULL; ++ k->key.l.last_parent_depends_on = NULL; ++ prio_splay_tree_insert (&head->t, k); ++ list = &k->key.l; ++ } ++ priority_list_insert (type, list, task, priority, pos, ++ adjust_parent_depends_on, ++ task_is_parent_depends_on); ++} ++ ++/* Generic version of priority_*_insert. */ ++ ++static inline void ++priority_queue_insert (enum priority_queue_type type, ++ struct priority_queue *head, ++ struct gomp_task *task, ++ int priority, ++ enum priority_insert_type pos, ++ bool adjust_parent_depends_on, ++ bool task_is_parent_depends_on) ++{ ++#if _LIBGOMP_CHECKING_ ++ if (priority_queue_task_in_queue_p (type, head, task)) ++ gomp_fatal ("Attempt to insert existing task %p", task); ++#endif ++ if (priority_queue_multi_p (head) || __builtin_expect (priority > 0, 0)) ++ priority_tree_insert (type, head, task, priority, pos, ++ adjust_parent_depends_on, ++ task_is_parent_depends_on); ++ else ++ priority_list_insert (type, &head->l, task, priority, pos, ++ adjust_parent_depends_on, ++ task_is_parent_depends_on); ++} ++ ++/* If multiple priorities are in play, return the highest priority ++ task from within Q1 and Q2, while giving preference to tasks from ++ Q1. If the returned task is chosen from Q1, *Q1_CHOSEN_P is set to ++ TRUE, otherwise it is set to FALSE. ++ ++ If multiple priorities are not in play (only 0 priorities are ++ available), the next task is chosen exclusively from Q1. ++ ++ As a special case, Q2 can be NULL, in which case, we just choose ++ the highest priority WAITING task in Q1. This is an optimization ++ to speed up looking through only one queue. ++ ++ We assume Q1 has at least one item. */ ++ ++static inline struct gomp_task * ++priority_queue_next_task (enum priority_queue_type t1, ++ struct priority_queue *q1, ++ enum priority_queue_type t2, ++ struct priority_queue *q2, ++ bool *q1_chosen_p) ++{ ++#if _LIBGOMP_CHECKING_ ++ if (priority_queue_empty_p (q1, MEMMODEL_RELAXED)) ++ gomp_fatal ("priority_queue_next_task: Q1 is empty"); ++#endif ++ if (priority_queue_multi_p (q1)) ++ { ++ struct gomp_task *t ++ = priority_tree_next_task (t1, q1, t2, q2, q1_chosen_p); ++ /* If T is NULL, there are no WAITING tasks in Q1. In which ++ case, return any old (non-waiting) task which will cause the ++ caller to do the right thing when checking T->KIND == ++ GOMP_TASK_WAITING. */ ++ if (!t) ++ { ++#if _LIBGOMP_CHECKING_ ++ if (*q1_chosen_p == false) ++ gomp_fatal ("priority_queue_next_task inconsistency"); ++#endif ++ return priority_node_to_task (t1, q1->t.root->key.l.tasks); ++ } ++ return t; ++ } ++ else ++ { ++ *q1_chosen_p = true; ++ return priority_node_to_task (t1, q1->l.tasks); ++ } ++} ++ ++/* Remove NODE from LIST. ++ ++ If we are removing the one and only item in the list, and MODEL is ++ MEMMODEL_RELEASE, use an atomic release to clear the list. ++ ++ If the list becomes empty after the remove, return TRUE. */ ++ ++static inline bool ++priority_list_remove (struct priority_list *list, ++ struct priority_node *node, ++ enum memmodel model) ++{ ++ bool empty = false; ++ node->prev->next = node->next; ++ node->next->prev = node->prev; ++ if (list->tasks == node) ++ { ++ if (node->next != node) ++ list->tasks = node->next; ++ else ++ { ++ /* We access task->children in GOMP_taskwait outside of ++ the task lock mutex region, so need a release barrier ++ here to ensure memory written by child_task->fn above ++ is flushed before the NULL is written. */ ++ if (model == MEMMODEL_RELEASE) ++ __atomic_store_n (&list->tasks, NULL, MEMMODEL_RELEASE); ++ else ++ list->tasks = NULL; ++ empty = true; ++ goto remove_out; ++ } ++ } ++remove_out: ++#if _LIBGOMP_CHECKING_ ++ memset (node, 0xaf, sizeof (*node)); ++#endif ++ return empty; ++} ++ ++/* This is the generic version of priority_list_remove. ++ ++ Remove NODE from priority queue HEAD. HEAD contains tasks of type TYPE. ++ ++ If we are removing the one and only item in the priority queue and ++ MODEL is MEMMODEL_RELEASE, use an atomic release to clear the queue. ++ ++ If the queue becomes empty after the remove, return TRUE. */ ++ ++static inline bool ++priority_queue_remove (enum priority_queue_type type, ++ struct priority_queue *head, ++ struct gomp_task *task, ++ enum memmodel model) ++{ ++#if _LIBGOMP_CHECKING_ ++ if (!priority_queue_task_in_queue_p (type, head, task)) ++ gomp_fatal ("Attempt to remove missing task %p", task); ++#endif ++ if (priority_queue_multi_p (head)) ++ { ++ priority_tree_remove (type, head, task_to_priority_node (type, task)); ++ if (head->t.root == NULL) ++ { ++ if (model == MEMMODEL_RELEASE) ++ /* Errr, we store NULL twice, the alternative would be to ++ use an atomic release directly in the splay tree ++ routines. Worth it? */ ++ __atomic_store_n (&head->t.root, NULL, MEMMODEL_RELEASE); ++ return true; ++ } ++ return false; ++ } ++ else ++ return priority_list_remove (&head->l, ++ task_to_priority_node (type, task), model); ++} ++ ++#endif /* _PRIORITY_QUEUE_H_ */ +--- libgomp/priority_queue.c.jj 2016-07-13 16:57:04.435535360 +0200 ++++ libgomp/priority_queue.c 2016-07-13 16:57:04.435535360 +0200 +@@ -0,0 +1,300 @@ ++/* Copyright (C) 2015-2016 Free Software Foundation, Inc. ++ Contributed by Aldy Hernandez . ++ ++ This file is part of the GNU Offloading and Multi Processing Library ++ (libgomp). ++ ++ Libgomp is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY ++ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS ++ FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ . */ ++ ++/* Priority queue implementation of GOMP tasks. */ ++ ++#include "libgomp.h" ++ ++#if _LIBGOMP_CHECKING_ ++#include ++ ++/* Sanity check to verify whether a TASK is in LIST. Return TRUE if ++ found, FALSE otherwise. ++ ++ TYPE is the type of priority queue this task resides in. */ ++ ++static inline bool ++priority_queue_task_in_list_p (enum priority_queue_type type, ++ struct priority_list *list, ++ struct gomp_task *task) ++{ ++ struct priority_node *p = list->tasks; ++ do ++ { ++ if (priority_node_to_task (type, p) == task) ++ return true; ++ p = p->next; ++ } ++ while (p != list->tasks); ++ return false; ++} ++ ++/* Tree version of priority_queue_task_in_list_p. */ ++ ++static inline bool ++priority_queue_task_in_tree_p (enum priority_queue_type type, ++ struct priority_queue *head, ++ struct gomp_task *task) ++{ ++ struct priority_list *list ++ = priority_queue_lookup_priority (head, task->priority); ++ if (!list) ++ return false; ++ return priority_queue_task_in_list_p (type, list, task); ++} ++ ++/* Generic version of priority_queue_task_in_list_p that works for ++ trees or lists. */ ++ ++bool ++priority_queue_task_in_queue_p (enum priority_queue_type type, ++ struct priority_queue *head, ++ struct gomp_task *task) ++{ ++ if (priority_queue_empty_p (head, MEMMODEL_RELAXED)) ++ return false; ++ if (priority_queue_multi_p (head)) ++ return priority_queue_task_in_tree_p (type, head, task); ++ else ++ return priority_queue_task_in_list_p (type, &head->l, task); ++} ++ ++/* Sanity check LIST to make sure the tasks therein are in the right ++ order. LIST is a priority list of type TYPE. ++ ++ The expected order is that GOMP_TASK_WAITING tasks come before ++ GOMP_TASK_TIED/GOMP_TASK_ASYNC_RUNNING ones. ++ ++ If CHECK_DEPS is TRUE, we also check that parent_depends_on WAITING ++ tasks come before !parent_depends_on WAITING tasks. This is only ++ applicable to the children queue, and the caller is expected to ++ ensure that we are verifying the children queue. */ ++ ++static void ++priority_list_verify (enum priority_queue_type type, ++ struct priority_list *list, bool check_deps) ++{ ++ bool seen_tied = false; ++ bool seen_plain_waiting = false; ++ struct priority_node *p = list->tasks; ++ while (1) ++ { ++ struct gomp_task *t = priority_node_to_task (type, p); ++ if (seen_tied && t->kind == GOMP_TASK_WAITING) ++ gomp_fatal ("priority_queue_verify: WAITING task after TIED"); ++ if (t->kind >= GOMP_TASK_TIED) ++ seen_tied = true; ++ else if (check_deps && t->kind == GOMP_TASK_WAITING) ++ { ++ if (t->parent_depends_on) ++ { ++ if (seen_plain_waiting) ++ gomp_fatal ("priority_queue_verify: " ++ "parent_depends_on after !parent_depends_on"); ++ } ++ else ++ seen_plain_waiting = true; ++ } ++ p = p->next; ++ if (p == list->tasks) ++ break; ++ } ++} ++ ++/* Callback type for priority_tree_verify_callback. */ ++struct cbtype ++{ ++ enum priority_queue_type type; ++ bool check_deps; ++}; ++ ++/* Verify every task in NODE. ++ ++ Callback for splay_tree_foreach. */ ++ ++static void ++priority_tree_verify_callback (prio_splay_tree_key key, void *data) ++{ ++ struct cbtype *cb = (struct cbtype *) data; ++ priority_list_verify (cb->type, &key->l, cb->check_deps); ++} ++ ++/* Generic version of priority_list_verify. ++ ++ Sanity check HEAD to make sure the tasks therein are in the right ++ order. The priority_queue holds tasks of type TYPE. ++ ++ If CHECK_DEPS is TRUE, we also check that parent_depends_on WAITING ++ tasks come before !parent_depends_on WAITING tasks. This is only ++ applicable to the children queue, and the caller is expected to ++ ensure that we are verifying the children queue. */ ++ ++void ++priority_queue_verify (enum priority_queue_type type, ++ struct priority_queue *head, bool check_deps) ++{ ++ if (priority_queue_empty_p (head, MEMMODEL_RELAXED)) ++ return; ++ if (priority_queue_multi_p (head)) ++ { ++ struct cbtype cb = { type, check_deps }; ++ prio_splay_tree_foreach (&head->t, ++ priority_tree_verify_callback, &cb); ++ } ++ else ++ priority_list_verify (type, &head->l, check_deps); ++} ++#endif /* _LIBGOMP_CHECKING_ */ ++ ++/* Remove NODE from priority queue HEAD, wherever it may be inside the ++ tree. HEAD contains tasks of type TYPE. */ ++ ++void ++priority_tree_remove (enum priority_queue_type type, ++ struct priority_queue *head, ++ struct priority_node *node) ++{ ++ /* ?? The only reason this function is not inlined is because we ++ need to find the priority within gomp_task (which has not been ++ completely defined in the header file). If the lack of inlining ++ is a concern, we could pass the priority number as a ++ parameter, or we could move this to libgomp.h. */ ++ int priority = priority_node_to_task (type, node)->priority; ++ ++ /* ?? We could avoid this lookup by keeping a pointer to the key in ++ the priority_node. */ ++ struct priority_list *list ++ = priority_queue_lookup_priority (head, priority); ++#if _LIBGOMP_CHECKING_ ++ if (!list) ++ gomp_fatal ("Unable to find priority %d", priority); ++#endif ++ /* If NODE was the last in its priority, clean up the priority. */ ++ if (priority_list_remove (list, node, MEMMODEL_RELAXED)) ++ { ++ prio_splay_tree_remove (&head->t, (prio_splay_tree_key) list); ++ list->tasks = NULL; ++#if _LIBGOMP_CHECKING_ ++ memset (list, 0xaf, sizeof (*list)); ++#endif ++ free (list); ++ } ++} ++ ++/* Return the highest priority WAITING task in a splay tree NODE. If ++ there are no WAITING tasks available, return NULL. ++ ++ NODE is a priority list containing tasks of type TYPE. ++ ++ The right most node in a tree contains the highest priority. ++ Recurse down to find such a node. If the task at that max node is ++ not WAITING, bubble back up and look at the remaining tasks ++ in-order. */ ++ ++static struct gomp_task * ++priority_tree_next_task_1 (enum priority_queue_type type, ++ prio_splay_tree_node node) ++{ ++ again: ++ if (!node) ++ return NULL; ++ struct gomp_task *ret = priority_tree_next_task_1 (type, node->right); ++ if (ret) ++ return ret; ++ ret = priority_node_to_task (type, node->key.l.tasks); ++ if (ret->kind == GOMP_TASK_WAITING) ++ return ret; ++ node = node->left; ++ goto again; ++} ++ ++/* Return the highest priority WAITING task from within Q1 and Q2, ++ while giving preference to tasks from Q1. Q1 is a queue containing ++ items of type TYPE1. Q2 is a queue containing items of type TYPE2. ++ ++ Since we are mostly interested in Q1, if there are no WAITING tasks ++ in Q1, we don't bother checking Q2, and just return NULL. ++ ++ As a special case, Q2 can be NULL, in which case, we just choose ++ the highest priority WAITING task in Q1. This is an optimization ++ to speed up looking through only one queue. ++ ++ If the returned task is chosen from Q1, *Q1_CHOSEN_P is set to ++ TRUE, otherwise it is set to FALSE. */ ++ ++struct gomp_task * ++priority_tree_next_task (enum priority_queue_type type1, ++ struct priority_queue *q1, ++ enum priority_queue_type type2, ++ struct priority_queue *q2, ++ bool *q1_chosen_p) ++{ ++ struct gomp_task *t1 = priority_tree_next_task_1 (type1, q1->t.root); ++ if (!t1 ++ /* Special optimization when only searching through one queue. */ ++ || !q2) ++ { ++ *q1_chosen_p = true; ++ return t1; ++ } ++ struct gomp_task *t2 = priority_tree_next_task_1 (type2, q2->t.root); ++ if (!t2 || t1->priority > t2->priority) ++ { ++ *q1_chosen_p = true; ++ return t1; ++ } ++ if (t2->priority > t1->priority) ++ { ++ *q1_chosen_p = false; ++ return t2; ++ } ++ /* If we get here, the priorities are the same, so we must look at ++ parent_depends_on to make our decision. */ ++#if _LIBGOMP_CHECKING_ ++ if (t1 != t2) ++ gomp_fatal ("priority_tree_next_task: t1 != t2"); ++#endif ++ if (t2->parent_depends_on && !t1->parent_depends_on) ++ { ++ *q1_chosen_p = false; ++ return t2; ++ } ++ *q1_chosen_p = true; ++ return t1; ++} ++ ++/* Priority splay trees comparison function. */ ++static inline int ++prio_splay_compare (prio_splay_tree_key x, prio_splay_tree_key y) ++{ ++ if (x->l.priority == y->l.priority) ++ return 0; ++ return x->l.priority < y->l.priority ? -1 : 1; ++} ++ ++/* Define another splay tree instantiation, for priority_list's. */ ++#define splay_tree_prefix prio ++#define splay_tree_c ++#include "splay-tree.h" +--- libgomp/openacc.f90.jj 2016-07-13 16:57:04.434535373 +0200 ++++ libgomp/openacc.f90 2016-07-14 19:01:54.901230875 +0200 +@@ -0,0 +1,911 @@ ++! OpenACC Runtime Library Definitions. ++ ++! Copyright (C) 2014-2016 Free Software Foundation, Inc. ++ ++! Contributed by Tobias Burnus ++! and Mentor Embedded. ++ ++! This file is part of the GNU Offloading and Multi Processing Library ++! (libgomp). ++ ++! Libgomp is free software; you can redistribute it and/or modify it ++! under the terms of the GNU General Public License as published by ++! the Free Software Foundation; either version 3, or (at your option) ++! any later version. ++ ++! Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY ++! WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS ++! FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++! more details. ++ ++! Under Section 7 of GPL version 3, you are granted additional ++! permissions described in the GCC Runtime Library Exception, version ++! 3.1, as published by the Free Software Foundation. ++ ++! You should have received a copy of the GNU General Public License and ++! a copy of the GCC Runtime Library Exception along with this program; ++! see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++! . ++ ++module openacc_kinds ++ use iso_fortran_env, only: int32 ++ implicit none ++ ++ private :: int32 ++ public :: acc_device_kind ++ ++ integer, parameter :: acc_device_kind = int32 ++ ++ public :: acc_device_none, acc_device_default, acc_device_host ++ public :: acc_device_not_host, acc_device_nvidia ++ ++ ! Keep in sync with include/gomp-constants.h. ++ integer (acc_device_kind), parameter :: acc_device_none = 0 ++ integer (acc_device_kind), parameter :: acc_device_default = 1 ++ integer (acc_device_kind), parameter :: acc_device_host = 2 ++ ! integer (acc_device_kind), parameter :: acc_device_host_nonshm = 3 removed. ++ integer (acc_device_kind), parameter :: acc_device_not_host = 4 ++ integer (acc_device_kind), parameter :: acc_device_nvidia = 5 ++ ++ public :: acc_handle_kind ++ ++ integer, parameter :: acc_handle_kind = int32 ++ ++ public :: acc_async_noval, acc_async_sync ++ ++ ! Keep in sync with include/gomp-constants.h. ++ integer (acc_handle_kind), parameter :: acc_async_noval = -1 ++ integer (acc_handle_kind), parameter :: acc_async_sync = -2 ++ ++end module ++ ++module openacc_internal ++ use openacc_kinds ++ implicit none ++ ++ interface ++ function acc_get_num_devices_h (d) ++ import ++ integer acc_get_num_devices_h ++ integer (acc_device_kind) d ++ end function ++ ++ subroutine acc_set_device_type_h (d) ++ import ++ integer (acc_device_kind) d ++ end subroutine ++ ++ function acc_get_device_type_h () ++ import ++ integer (acc_device_kind) acc_get_device_type_h ++ end function ++ ++ subroutine acc_set_device_num_h (n, d) ++ import ++ integer n ++ integer (acc_device_kind) d ++ end subroutine ++ ++ function acc_get_device_num_h (d) ++ import ++ integer acc_get_device_num_h ++ integer (acc_device_kind) d ++ end function ++ ++ function acc_async_test_h (a) ++ logical acc_async_test_h ++ integer a ++ end function ++ ++ function acc_async_test_all_h () ++ logical acc_async_test_all_h ++ end function ++ ++ subroutine acc_wait_h (a) ++ integer a ++ end subroutine ++ ++ subroutine acc_wait_async_h (a1, a2) ++ integer a1, a2 ++ end subroutine ++ ++ subroutine acc_wait_all_h () ++ end subroutine ++ ++ subroutine acc_wait_all_async_h (a) ++ integer a ++ end subroutine ++ ++ subroutine acc_init_h (d) ++ import ++ integer (acc_device_kind) d ++ end subroutine ++ ++ subroutine acc_shutdown_h (d) ++ import ++ integer (acc_device_kind) d ++ end subroutine ++ ++ function acc_on_device_h (d) ++ import ++ integer (acc_device_kind) d ++ logical acc_on_device_h ++ end function ++ ++ subroutine acc_copyin_32_h (a, len) ++ use iso_c_binding, only: c_int32_t ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ end subroutine ++ ++ subroutine acc_copyin_64_h (a, len) ++ use iso_c_binding, only: c_int64_t ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ end subroutine ++ ++ subroutine acc_copyin_array_h (a) ++ type (*), dimension (..), contiguous :: a ++ end subroutine ++ ++ subroutine acc_present_or_copyin_32_h (a, len) ++ use iso_c_binding, only: c_int32_t ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ end subroutine ++ ++ subroutine acc_present_or_copyin_64_h (a, len) ++ use iso_c_binding, only: c_int64_t ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ end subroutine ++ ++ subroutine acc_present_or_copyin_array_h (a) ++ type (*), dimension (..), contiguous :: a ++ end subroutine ++ ++ subroutine acc_create_32_h (a, len) ++ use iso_c_binding, only: c_int32_t ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ end subroutine ++ ++ subroutine acc_create_64_h (a, len) ++ use iso_c_binding, only: c_int64_t ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ end subroutine ++ ++ subroutine acc_create_array_h (a) ++ type (*), dimension (..), contiguous :: a ++ end subroutine ++ ++ subroutine acc_present_or_create_32_h (a, len) ++ use iso_c_binding, only: c_int32_t ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ end subroutine ++ ++ subroutine acc_present_or_create_64_h (a, len) ++ use iso_c_binding, only: c_int64_t ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ end subroutine ++ ++ subroutine acc_present_or_create_array_h (a) ++ type (*), dimension (..), contiguous :: a ++ end subroutine ++ ++ subroutine acc_copyout_32_h (a, len) ++ use iso_c_binding, only: c_int32_t ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ end subroutine ++ ++ subroutine acc_copyout_64_h (a, len) ++ use iso_c_binding, only: c_int64_t ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ end subroutine ++ ++ subroutine acc_copyout_array_h (a) ++ type (*), dimension (..), contiguous :: a ++ end subroutine ++ ++ subroutine acc_delete_32_h (a, len) ++ use iso_c_binding, only: c_int32_t ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ end subroutine ++ ++ subroutine acc_delete_64_h (a, len) ++ use iso_c_binding, only: c_int64_t ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ end subroutine ++ ++ subroutine acc_delete_array_h (a) ++ type (*), dimension (..), contiguous :: a ++ end subroutine ++ ++ subroutine acc_update_device_32_h (a, len) ++ use iso_c_binding, only: c_int32_t ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ end subroutine ++ ++ subroutine acc_update_device_64_h (a, len) ++ use iso_c_binding, only: c_int64_t ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ end subroutine ++ ++ subroutine acc_update_device_array_h (a) ++ type (*), dimension (..), contiguous :: a ++ end subroutine ++ ++ subroutine acc_update_self_32_h (a, len) ++ use iso_c_binding, only: c_int32_t ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ end subroutine ++ ++ subroutine acc_update_self_64_h (a, len) ++ use iso_c_binding, only: c_int64_t ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ end subroutine ++ ++ subroutine acc_update_self_array_h (a) ++ type (*), dimension (..), contiguous :: a ++ end subroutine ++ ++ function acc_is_present_32_h (a, len) ++ use iso_c_binding, only: c_int32_t ++ logical acc_is_present_32_h ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ end function ++ ++ function acc_is_present_64_h (a, len) ++ use iso_c_binding, only: c_int64_t ++ logical acc_is_present_64_h ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ end function ++ ++ function acc_is_present_array_h (a) ++ logical acc_is_present_array_h ++ type (*), dimension (..), contiguous :: a ++ end function ++ end interface ++ ++ interface ++ function acc_get_num_devices_l (d) & ++ bind (C, name = "acc_get_num_devices") ++ use iso_c_binding, only: c_int ++ integer (c_int) :: acc_get_num_devices_l ++ integer (c_int), value :: d ++ end function ++ ++ subroutine acc_set_device_type_l (d) & ++ bind (C, name = "acc_set_device_type") ++ use iso_c_binding, only: c_int ++ integer (c_int), value :: d ++ end subroutine ++ ++ function acc_get_device_type_l () & ++ bind (C, name = "acc_get_device_type") ++ use iso_c_binding, only: c_int ++ integer (c_int) :: acc_get_device_type_l ++ end function ++ ++ subroutine acc_set_device_num_l (n, d) & ++ bind (C, name = "acc_set_device_num") ++ use iso_c_binding, only: c_int ++ integer (c_int), value :: n, d ++ end subroutine ++ ++ function acc_get_device_num_l (d) & ++ bind (C, name = "acc_get_device_num") ++ use iso_c_binding, only: c_int ++ integer (c_int) :: acc_get_device_num_l ++ integer (c_int), value :: d ++ end function ++ ++ function acc_async_test_l (a) & ++ bind (C, name = "acc_async_test") ++ use iso_c_binding, only: c_int ++ integer (c_int) :: acc_async_test_l ++ integer (c_int), value :: a ++ end function ++ ++ function acc_async_test_all_l () & ++ bind (C, name = "acc_async_test_all") ++ use iso_c_binding, only: c_int ++ integer (c_int) :: acc_async_test_all_l ++ end function ++ ++ subroutine acc_wait_l (a) & ++ bind (C, name = "acc_wait") ++ use iso_c_binding, only: c_int ++ integer (c_int), value :: a ++ end subroutine ++ ++ subroutine acc_wait_async_l (a1, a2) & ++ bind (C, name = "acc_wait_async") ++ use iso_c_binding, only: c_int ++ integer (c_int), value :: a1, a2 ++ end subroutine ++ ++ subroutine acc_wait_all_l () & ++ bind (C, name = "acc_wait_all") ++ use iso_c_binding, only: c_int ++ end subroutine ++ ++ subroutine acc_wait_all_async_l (a) & ++ bind (C, name = "acc_wait_all_async") ++ use iso_c_binding, only: c_int ++ integer (c_int), value :: a ++ end subroutine ++ ++ subroutine acc_init_l (d) & ++ bind (C, name = "acc_init") ++ use iso_c_binding, only: c_int ++ integer (c_int), value :: d ++ end subroutine ++ ++ subroutine acc_shutdown_l (d) & ++ bind (C, name = "acc_shutdown") ++ use iso_c_binding, only: c_int ++ integer (c_int), value :: d ++ end subroutine ++ ++ function acc_on_device_l (d) & ++ bind (C, name = "acc_on_device") ++ use iso_c_binding, only: c_int ++ integer (c_int) :: acc_on_device_l ++ integer (c_int), value :: d ++ end function ++ ++ subroutine acc_copyin_l (a, len) & ++ bind (C, name = "acc_copyin") ++ use iso_c_binding, only: c_size_t ++ type (*), dimension (*) :: a ++ integer (c_size_t), value :: len ++ end subroutine ++ ++ subroutine acc_present_or_copyin_l (a, len) & ++ bind (C, name = "acc_present_or_copyin") ++ use iso_c_binding, only: c_size_t ++ type (*), dimension (*) :: a ++ integer (c_size_t), value :: len ++ end subroutine ++ ++ subroutine acc_create_l (a, len) & ++ bind (C, name = "acc_create") ++ use iso_c_binding, only: c_size_t ++ type (*), dimension (*) :: a ++ integer (c_size_t), value :: len ++ end subroutine ++ ++ subroutine acc_present_or_create_l (a, len) & ++ bind (C, name = "acc_present_or_create") ++ use iso_c_binding, only: c_size_t ++ type (*), dimension (*) :: a ++ integer (c_size_t), value :: len ++ end subroutine ++ ++ subroutine acc_copyout_l (a, len) & ++ bind (C, name = "acc_copyout") ++ use iso_c_binding, only: c_size_t ++ type (*), dimension (*) :: a ++ integer (c_size_t), value :: len ++ end subroutine ++ ++ subroutine acc_delete_l (a, len) & ++ bind (C, name = "acc_delete") ++ use iso_c_binding, only: c_size_t ++ type (*), dimension (*) :: a ++ integer (c_size_t), value :: len ++ end subroutine ++ ++ subroutine acc_update_device_l (a, len) & ++ bind (C, name = "acc_update_device") ++ use iso_c_binding, only: c_size_t ++ type (*), dimension (*) :: a ++ integer (c_size_t), value :: len ++ end subroutine ++ ++ subroutine acc_update_self_l (a, len) & ++ bind (C, name = "acc_update_self") ++ use iso_c_binding, only: c_size_t ++ type (*), dimension (*) :: a ++ integer (c_size_t), value :: len ++ end subroutine ++ ++ function acc_is_present_l (a, len) & ++ bind (C, name = "acc_is_present") ++ use iso_c_binding, only: c_int32_t, c_size_t ++ integer (c_int32_t) :: acc_is_present_l ++ type (*), dimension (*) :: a ++ integer (c_size_t), value :: len ++ end function ++ end interface ++end module ++ ++module openacc ++ use openacc_kinds ++ use openacc_internal ++ implicit none ++ ++ public :: openacc_version ++ ++ public :: acc_get_num_devices, acc_set_device_type, acc_get_device_type ++ public :: acc_set_device_num, acc_get_device_num, acc_async_test ++ public :: acc_async_test_all, acc_wait, acc_wait_async, acc_wait_all ++ public :: acc_wait_all_async, acc_init, acc_shutdown, acc_on_device ++ public :: acc_copyin, acc_present_or_copyin, acc_pcopyin, acc_create ++ public :: acc_present_or_create, acc_pcreate, acc_copyout, acc_delete ++ public :: acc_update_device, acc_update_self, acc_is_present ++ ++ integer, parameter :: openacc_version = 201306 ++ ++ interface acc_get_num_devices ++ procedure :: acc_get_num_devices_h ++ end interface ++ ++ interface acc_set_device_type ++ procedure :: acc_set_device_type_h ++ end interface ++ ++ interface acc_get_device_type ++ procedure :: acc_get_device_type_h ++ end interface ++ ++ interface acc_set_device_num ++ procedure :: acc_set_device_num_h ++ end interface ++ ++ interface acc_get_device_num ++ procedure :: acc_get_device_num_h ++ end interface ++ ++ interface acc_async_test ++ procedure :: acc_async_test_h ++ end interface ++ ++ interface acc_async_test_all ++ procedure :: acc_async_test_all_h ++ end interface ++ ++ interface acc_wait ++ procedure :: acc_wait_h ++ end interface ++ ++ interface acc_wait_async ++ procedure :: acc_wait_async_h ++ end interface ++ ++ interface acc_wait_all ++ procedure :: acc_wait_all_h ++ end interface ++ ++ interface acc_wait_all_async ++ procedure :: acc_wait_all_async_h ++ end interface ++ ++ interface acc_init ++ procedure :: acc_init_h ++ end interface ++ ++ interface acc_shutdown ++ procedure :: acc_shutdown_h ++ end interface ++ ++ interface acc_on_device ++ procedure :: acc_on_device_h ++ end interface ++ ++ ! acc_malloc: Only available in C/C++ ++ ! acc_free: Only available in C/C++ ++ ++ ! As vendor extension, the following code supports both 32bit and 64bit ++ ! arguments for "size"; the OpenACC standard only permits default-kind ++ ! integers, which are of kind 4 (i.e. 32 bits). ++ ! Additionally, the two-argument version also takes arrays as argument. ++ ! and the one argument version also scalars. Note that the code assumes ++ ! that the arrays are contiguous. ++ ++ interface acc_copyin ++ procedure :: acc_copyin_32_h ++ procedure :: acc_copyin_64_h ++ procedure :: acc_copyin_array_h ++ end interface ++ ++ interface acc_present_or_copyin ++ procedure :: acc_present_or_copyin_32_h ++ procedure :: acc_present_or_copyin_64_h ++ procedure :: acc_present_or_copyin_array_h ++ end interface ++ ++ interface acc_pcopyin ++ procedure :: acc_present_or_copyin_32_h ++ procedure :: acc_present_or_copyin_64_h ++ procedure :: acc_present_or_copyin_array_h ++ end interface ++ ++ interface acc_create ++ procedure :: acc_create_32_h ++ procedure :: acc_create_64_h ++ procedure :: acc_create_array_h ++ end interface ++ ++ interface acc_present_or_create ++ procedure :: acc_present_or_create_32_h ++ procedure :: acc_present_or_create_64_h ++ procedure :: acc_present_or_create_array_h ++ end interface ++ ++ interface acc_pcreate ++ procedure :: acc_present_or_create_32_h ++ procedure :: acc_present_or_create_64_h ++ procedure :: acc_present_or_create_array_h ++ end interface ++ ++ interface acc_copyout ++ procedure :: acc_copyout_32_h ++ procedure :: acc_copyout_64_h ++ procedure :: acc_copyout_array_h ++ end interface ++ ++ interface acc_delete ++ procedure :: acc_delete_32_h ++ procedure :: acc_delete_64_h ++ procedure :: acc_delete_array_h ++ end interface ++ ++ interface acc_update_device ++ procedure :: acc_update_device_32_h ++ procedure :: acc_update_device_64_h ++ procedure :: acc_update_device_array_h ++ end interface ++ ++ interface acc_update_self ++ procedure :: acc_update_self_32_h ++ procedure :: acc_update_self_64_h ++ procedure :: acc_update_self_array_h ++ end interface ++ ++ ! acc_map_data: Only available in C/C++ ++ ! acc_unmap_data: Only available in C/C++ ++ ! acc_deviceptr: Only available in C/C++ ++ ! acc_hostptr: Only available in C/C++ ++ ++ interface acc_is_present ++ procedure :: acc_is_present_32_h ++ procedure :: acc_is_present_64_h ++ procedure :: acc_is_present_array_h ++ end interface ++ ++ ! acc_memcpy_to_device: Only available in C/C++ ++ ! acc_memcpy_from_device: Only available in C/C++ ++ ++end module ++ ++function acc_get_num_devices_h (d) ++ use openacc_internal, only: acc_get_num_devices_l ++ use openacc_kinds ++ integer acc_get_num_devices_h ++ integer (acc_device_kind) d ++ acc_get_num_devices_h = acc_get_num_devices_l (d) ++end function ++ ++subroutine acc_set_device_type_h (d) ++ use openacc_internal, only: acc_set_device_type_l ++ use openacc_kinds ++ integer (acc_device_kind) d ++ call acc_set_device_type_l (d) ++end subroutine ++ ++function acc_get_device_type_h () ++ use openacc_internal, only: acc_get_device_type_l ++ use openacc_kinds ++ integer (acc_device_kind) acc_get_device_type_h ++ acc_get_device_type_h = acc_get_device_type_l () ++end function ++ ++subroutine acc_set_device_num_h (n, d) ++ use openacc_internal, only: acc_set_device_num_l ++ use openacc_kinds ++ integer n ++ integer (acc_device_kind) d ++ call acc_set_device_num_l (n, d) ++end subroutine ++ ++function acc_get_device_num_h (d) ++ use openacc_internal, only: acc_get_device_num_l ++ use openacc_kinds ++ integer acc_get_device_num_h ++ integer (acc_device_kind) d ++ acc_get_device_num_h = acc_get_device_num_l (d) ++end function ++ ++function acc_async_test_h (a) ++ use openacc_internal, only: acc_async_test_l ++ logical acc_async_test_h ++ integer a ++ if (acc_async_test_l (a) .eq. 1) then ++ acc_async_test_h = .TRUE. ++ else ++ acc_async_test_h = .FALSE. ++ end if ++end function ++ ++function acc_async_test_all_h () ++ use openacc_internal, only: acc_async_test_all_l ++ logical acc_async_test_all_h ++ if (acc_async_test_all_l () .eq. 1) then ++ acc_async_test_all_h = .TRUE. ++ else ++ acc_async_test_all_h = .FALSE. ++ end if ++end function ++ ++subroutine acc_wait_h (a) ++ use openacc_internal, only: acc_wait_l ++ integer a ++ call acc_wait_l (a) ++end subroutine ++ ++subroutine acc_wait_async_h (a1, a2) ++ use openacc_internal, only: acc_wait_async_l ++ integer a1, a2 ++ call acc_wait_async_l (a1, a2) ++end subroutine ++ ++subroutine acc_wait_all_h () ++ use openacc_internal, only: acc_wait_all_l ++ call acc_wait_all_l () ++end subroutine ++ ++subroutine acc_wait_all_async_h (a) ++ use openacc_internal, only: acc_wait_all_async_l ++ integer a ++ call acc_wait_all_async_l (a) ++end subroutine ++ ++subroutine acc_init_h (d) ++ use openacc_internal, only: acc_init_l ++ use openacc_kinds ++ integer (acc_device_kind) d ++ call acc_init_l (d) ++end subroutine ++ ++subroutine acc_shutdown_h (d) ++ use openacc_internal, only: acc_shutdown_l ++ use openacc_kinds ++ integer (acc_device_kind) d ++ call acc_shutdown_l (d) ++end subroutine ++ ++function acc_on_device_h (d) ++ use openacc_internal, only: acc_on_device_l ++ use openacc_kinds ++ integer (acc_device_kind) d ++ logical acc_on_device_h ++ if (acc_on_device_l (d) .eq. 1) then ++ acc_on_device_h = .TRUE. ++ else ++ acc_on_device_h = .FALSE. ++ end if ++end function ++ ++subroutine acc_copyin_32_h (a, len) ++ use iso_c_binding, only: c_int32_t, c_size_t ++ use openacc_internal, only: acc_copyin_l ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ call acc_copyin_l (a, int (len, kind = c_size_t)) ++end subroutine ++ ++subroutine acc_copyin_64_h (a, len) ++ use iso_c_binding, only: c_int64_t, c_size_t ++ use openacc_internal, only: acc_copyin_l ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ call acc_copyin_l (a, int (len, kind = c_size_t)) ++end subroutine ++ ++subroutine acc_copyin_array_h (a) ++ use openacc_internal, only: acc_copyin_l ++ type (*), dimension (..), contiguous :: a ++ call acc_copyin_l (a, sizeof (a)) ++end subroutine ++ ++subroutine acc_present_or_copyin_32_h (a, len) ++ use iso_c_binding, only: c_int32_t, c_size_t ++ use openacc_internal, only: acc_present_or_copyin_l ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ call acc_present_or_copyin_l (a, int (len, kind = c_size_t)) ++end subroutine ++ ++subroutine acc_present_or_copyin_64_h (a, len) ++ use iso_c_binding, only: c_int64_t, c_size_t ++ use openacc_internal, only: acc_present_or_copyin_l ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ call acc_present_or_copyin_l (a, int (len, kind = c_size_t)) ++end subroutine ++ ++subroutine acc_present_or_copyin_array_h (a) ++ use openacc_internal, only: acc_present_or_copyin_l ++ type (*), dimension (..), contiguous :: a ++ call acc_present_or_copyin_l (a, sizeof (a)) ++end subroutine ++ ++subroutine acc_create_32_h (a, len) ++ use iso_c_binding, only: c_int32_t, c_size_t ++ use openacc_internal, only: acc_create_l ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ call acc_create_l (a, int (len, kind = c_size_t)) ++end subroutine ++ ++subroutine acc_create_64_h (a, len) ++ use iso_c_binding, only: c_int64_t, c_size_t ++ use openacc_internal, only: acc_create_l ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ call acc_create_l (a, int (len, kind = c_size_t)) ++end subroutine ++ ++subroutine acc_create_array_h (a) ++ use openacc_internal, only: acc_create_l ++ type (*), dimension (..), contiguous :: a ++ call acc_create_l (a, sizeof (a)) ++end subroutine ++ ++subroutine acc_present_or_create_32_h (a, len) ++ use iso_c_binding, only: c_int32_t, c_size_t ++ use openacc_internal, only: acc_present_or_create_l ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ call acc_present_or_create_l (a, int (len, kind = c_size_t)) ++end subroutine ++ ++subroutine acc_present_or_create_64_h (a, len) ++ use iso_c_binding, only: c_int64_t, c_size_t ++ use openacc_internal, only: acc_present_or_create_l ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ call acc_present_or_create_l (a, int (len, kind = c_size_t)) ++end subroutine ++ ++subroutine acc_present_or_create_array_h (a) ++ use openacc_internal, only: acc_present_or_create_l ++ type (*), dimension (..), contiguous :: a ++ call acc_present_or_create_l (a, sizeof (a)) ++end subroutine ++ ++subroutine acc_copyout_32_h (a, len) ++ use iso_c_binding, only: c_int32_t, c_size_t ++ use openacc_internal, only: acc_copyout_l ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ call acc_copyout_l (a, int (len, kind = c_size_t)) ++end subroutine ++ ++subroutine acc_copyout_64_h (a, len) ++ use iso_c_binding, only: c_int64_t, c_size_t ++ use openacc_internal, only: acc_copyout_l ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ call acc_copyout_l (a, int (len, kind = c_size_t)) ++end subroutine ++ ++subroutine acc_copyout_array_h (a) ++ use openacc_internal, only: acc_copyout_l ++ type (*), dimension (..), contiguous :: a ++ call acc_copyout_l (a, sizeof (a)) ++end subroutine ++ ++subroutine acc_delete_32_h (a, len) ++ use iso_c_binding, only: c_int32_t, c_size_t ++ use openacc_internal, only: acc_delete_l ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ call acc_delete_l (a, int (len, kind = c_size_t)) ++end subroutine ++ ++subroutine acc_delete_64_h (a, len) ++ use iso_c_binding, only: c_int64_t, c_size_t ++ use openacc_internal, only: acc_delete_l ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ call acc_delete_l (a, int (len, kind = c_size_t)) ++end subroutine ++ ++subroutine acc_delete_array_h (a) ++ use openacc_internal, only: acc_delete_l ++ type (*), dimension (..), contiguous :: a ++ call acc_delete_l (a, sizeof (a)) ++end subroutine ++ ++subroutine acc_update_device_32_h (a, len) ++ use iso_c_binding, only: c_int32_t, c_size_t ++ use openacc_internal, only: acc_update_device_l ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ call acc_update_device_l (a, int (len, kind = c_size_t)) ++end subroutine ++ ++subroutine acc_update_device_64_h (a, len) ++ use iso_c_binding, only: c_int64_t, c_size_t ++ use openacc_internal, only: acc_update_device_l ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ call acc_update_device_l (a, int (len, kind = c_size_t)) ++end subroutine ++ ++subroutine acc_update_device_array_h (a) ++ use openacc_internal, only: acc_update_device_l ++ type (*), dimension (..), contiguous :: a ++ call acc_update_device_l (a, sizeof (a)) ++end subroutine ++ ++subroutine acc_update_self_32_h (a, len) ++ use iso_c_binding, only: c_int32_t, c_size_t ++ use openacc_internal, only: acc_update_self_l ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ call acc_update_self_l (a, int (len, kind = c_size_t)) ++end subroutine ++ ++subroutine acc_update_self_64_h (a, len) ++ use iso_c_binding, only: c_int64_t, c_size_t ++ use openacc_internal, only: acc_update_self_l ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ call acc_update_self_l (a, int (len, kind = c_size_t)) ++end subroutine ++ ++subroutine acc_update_self_array_h (a) ++ use openacc_internal, only: acc_update_self_l ++ type (*), dimension (..), contiguous :: a ++ call acc_update_self_l (a, sizeof (a)) ++end subroutine ++ ++function acc_is_present_32_h (a, len) ++ use iso_c_binding, only: c_int32_t, c_size_t ++ use openacc_internal, only: acc_is_present_l ++ logical acc_is_present_32_h ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ if (acc_is_present_l (a, int (len, kind = c_size_t)) .eq. 1) then ++ acc_is_present_32_h = .TRUE. ++ else ++ acc_is_present_32_h = .FALSE. ++ end if ++end function ++ ++function acc_is_present_64_h (a, len) ++ use iso_c_binding, only: c_int64_t, c_size_t ++ use openacc_internal, only: acc_is_present_l ++ logical acc_is_present_64_h ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ if (acc_is_present_l (a, int (len, kind = c_size_t)) .eq. 1) then ++ acc_is_present_64_h = .TRUE. ++ else ++ acc_is_present_64_h = .FALSE. ++ end if ++end function ++ ++function acc_is_present_array_h (a) ++ use openacc_internal, only: acc_is_present_l ++ logical acc_is_present_array_h ++ type (*), dimension (..), contiguous :: a ++ acc_is_present_array_h = acc_is_present_l (a, sizeof (a)) == 1 ++end function diff --git a/SOURCES/gcc48-libgomp-omp_h-multilib.patch b/SOURCES/gcc48-libgomp-omp_h-multilib.patch new file mode 100644 index 0000000..d0e98d1 --- /dev/null +++ b/SOURCES/gcc48-libgomp-omp_h-multilib.patch @@ -0,0 +1,17 @@ +2008-06-09 Jakub Jelinek + + * omp.h.in (omp_nest_lock_t): Fix up for Linux multilibs. + +--- libgomp/omp.h.in.jj 2008-06-09 13:34:05.000000000 +0200 ++++ libgomp/omp.h.in 2008-06-09 13:34:48.000000000 +0200 +@@ -42,8 +42,8 @@ typedef struct + + typedef struct + { +- unsigned char _x[@OMP_NEST_LOCK_SIZE@] +- __attribute__((__aligned__(@OMP_NEST_LOCK_ALIGN@))); ++ unsigned char _x[8 + sizeof (void *)] ++ __attribute__((__aligned__(sizeof (void *)))); + } omp_nest_lock_t; + #endif + diff --git a/SOURCES/gcc48-libstdc++-docs.patch b/SOURCES/gcc48-libstdc++-docs.patch new file mode 100644 index 0000000..382666b --- /dev/null +++ b/SOURCES/gcc48-libstdc++-docs.patch @@ -0,0 +1,26 @@ +--- libstdc++-v3/doc/html/index.html.jj 2011-01-03 12:53:21.282829010 +0100 ++++ libstdc++-v3/doc/html/index.html 2011-01-04 18:06:28.999851145 +0100 +@@ -5,6 +5,8 @@ + FSF + +

++ Release 4.8.3 ++

+ Permission is granted to copy, distribute and/or modify this + document under the terms of the GNU Free Documentation + License, Version 1.2 or any later version published by the +--- libstdc++-v3/doc/html/api.html.jj 2011-01-03 12:53:21.000000000 +0100 ++++ libstdc++-v3/doc/html/api.html 2011-01-04 18:12:01.672757784 +0100 +@@ -18,8 +18,11 @@ + member functions for the library classes, finding out what is in a + particular include file, looking at inheritance diagrams, etc. +

+- The API documentation, rendered into HTML, can be viewed online: ++ The API documentation, rendered into HTML, can be viewed here: +