From ee890f87ea2ed076d1cc948c74c926591812d0d2 Mon Sep 17 00:00:00 2001
From: CentOS Sources <bugs@centos.org>
Date: Nov 03 2016 06:03:47 +0000
Subject: import gcc-4.8.5-11.el7


---

diff --git a/SOURCES/gcc48-libgomp-20160715.patch b/SOURCES/gcc48-libgomp-20160715.patch
new file mode 100644
index 0000000..9b6a61e
--- /dev/null
+++ b/SOURCES/gcc48-libgomp-20160715.patch
@@ -0,0 +1,10653 @@
+--- libgomp/config/linux/wait.h.jj	2013-01-31 20:29:10.091548989 +0100
++++ libgomp/config/linux/wait.h	2016-07-13 16:57:18.902355979 +0200
+@@ -34,13 +34,13 @@
+ 
+ #define FUTEX_WAIT	0
+ #define FUTEX_WAKE	1
+-#define FUTEX_PRIVATE_FLAG	128L
++#define FUTEX_PRIVATE_FLAG	128
+ 
+ #ifdef HAVE_ATTRIBUTE_VISIBILITY
+ # pragma GCC visibility push(hidden)
+ #endif
+ 
+-extern long int gomp_futex_wait, gomp_futex_wake;
++extern int gomp_futex_wait, gomp_futex_wake;
+ 
+ #include <futex.h>
+ 
+@@ -48,7 +48,9 @@ static inline int do_spin (int *addr, in
+ {
+   unsigned long long i, count = gomp_spin_count_var;
+ 
+-  if (__builtin_expect (gomp_managed_threads > gomp_available_cpus, 0))
++  if (__builtin_expect (__atomic_load_n (&gomp_managed_threads,
++                                         MEMMODEL_RELAXED)
++                        > gomp_available_cpus, 0))
+     count = gomp_throttled_spin_count_var;
+   for (i = 0; i < count; i++)
+     if (__builtin_expect (__atomic_load_n (addr, MEMMODEL_RELAXED) != val, 0))
+--- libgomp/config/linux/affinity.c.jj	2014-05-15 10:56:37.499502573 +0200
++++ libgomp/config/linux/affinity.c	2016-07-13 16:57:18.902355979 +0200
+@@ -352,6 +352,45 @@ gomp_affinity_print_place (void *p)
+     fprintf (stderr, ":%lu", len);
+ }
+ 
++int
++omp_get_place_num_procs (int place_num)
++{
++  if (place_num < 0 || place_num >= gomp_places_list_len)
++    return 0;
++
++  cpu_set_t *cpusetp = (cpu_set_t *) gomp_places_list[place_num];
++  return gomp_cpuset_popcount (gomp_cpuset_size, cpusetp);
++}
++
++void
++omp_get_place_proc_ids (int place_num, int *ids)
++{
++  if (place_num < 0 || place_num >= gomp_places_list_len)
++    return;
++
++  cpu_set_t *cpusetp = (cpu_set_t *) gomp_places_list[place_num];
++  unsigned long i, max = 8 * gomp_cpuset_size;
++  for (i = 0; i < max; i++)
++    if (CPU_ISSET_S (i, gomp_cpuset_size, cpusetp))
++      *ids++ = i;
++}
++
++void
++gomp_get_place_proc_ids_8 (int place_num, int64_t *ids)
++{
++  if (place_num < 0 || place_num >= gomp_places_list_len)
++    return;
++
++  cpu_set_t *cpusetp = (cpu_set_t *) gomp_places_list[place_num];
++  unsigned long i, max = 8 * gomp_cpuset_size;
++  for (i = 0; i < max; i++)
++    if (CPU_ISSET_S (i, gomp_cpuset_size, cpusetp))
++      *ids++ = i;
++}
++
++ialias(omp_get_place_num_procs)
++ialias(omp_get_place_proc_ids)
++
+ #else
+ 
+ #include "../posix/affinity.c"
+--- libgomp/config/linux/mutex.c.jj	2013-01-21 16:00:38.220917670 +0100
++++ libgomp/config/linux/mutex.c	2016-07-13 16:57:18.870356375 +0200
+@@ -28,8 +28,8 @@
+ 
+ #include "wait.h"
+ 
+-long int gomp_futex_wake = FUTEX_WAKE | FUTEX_PRIVATE_FLAG;
+-long int gomp_futex_wait = FUTEX_WAIT | FUTEX_PRIVATE_FLAG;
++int gomp_futex_wake = FUTEX_WAKE | FUTEX_PRIVATE_FLAG;
++int gomp_futex_wait = FUTEX_WAIT | FUTEX_PRIVATE_FLAG;
+ 
+ void
+ gomp_mutex_lock_slow (gomp_mutex_t *mutex, int oldval)
+--- libgomp/config/posix/affinity.c.jj	2014-05-15 10:56:37.987498844 +0200
++++ libgomp/config/posix/affinity.c	2016-07-15 12:08:28.410015743 +0200
+@@ -113,3 +113,27 @@ gomp_affinity_print_place (void *p)
+ {
+   (void) p;
+ }
++
++int
++omp_get_place_num_procs (int place_num)
++{
++  (void) place_num;
++  return 0;
++}
++
++void
++omp_get_place_proc_ids (int place_num, int *ids)
++{
++  (void) place_num;
++  (void) ids;
++}
++
++void
++gomp_get_place_proc_ids_8 (int place_num, int64_t *ids)
++{
++  (void) place_num;
++  (void) ids;
++}
++
++ialias(omp_get_place_num_procs)
++ialias(omp_get_place_proc_ids)
+--- libgomp/loop_ull.c.jj	2013-01-21 16:00:46.477871806 +0100
++++ libgomp/loop_ull.c	2016-07-13 16:57:18.918355780 +0200
+@@ -174,15 +174,15 @@ GOMP_loop_ull_runtime_start (bool up, go
+     {
+     case GFS_STATIC:
+       return gomp_loop_ull_static_start (up, start, end, incr,
+-					 icv->run_sched_modifier,
++					 icv->run_sched_chunk_size,
+ 					 istart, iend);
+     case GFS_DYNAMIC:
+       return gomp_loop_ull_dynamic_start (up, start, end, incr,
+-					  icv->run_sched_modifier,
++					  icv->run_sched_chunk_size,
+ 					  istart, iend);
+     case GFS_GUIDED:
+       return gomp_loop_ull_guided_start (up, start, end, incr,
+-					 icv->run_sched_modifier,
++					 icv->run_sched_chunk_size,
+ 					 istart, iend);
+     case GFS_AUTO:
+       /* For now map to schedule(static), later on we could play with feedback
+@@ -278,15 +278,15 @@ GOMP_loop_ull_ordered_runtime_start (boo
+     {
+     case GFS_STATIC:
+       return gomp_loop_ull_ordered_static_start (up, start, end, incr,
+-						 icv->run_sched_modifier,
++						 icv->run_sched_chunk_size,
+ 						 istart, iend);
+     case GFS_DYNAMIC:
+       return gomp_loop_ull_ordered_dynamic_start (up, start, end, incr,
+-						  icv->run_sched_modifier,
++						  icv->run_sched_chunk_size,
+ 						  istart, iend);
+     case GFS_GUIDED:
+       return gomp_loop_ull_ordered_guided_start (up, start, end, incr,
+-						 icv->run_sched_modifier,
++						 icv->run_sched_chunk_size,
+ 						 istart, iend);
+     case GFS_AUTO:
+       /* For now map to schedule(static), later on we could play with feedback
+@@ -298,6 +298,114 @@ GOMP_loop_ull_ordered_runtime_start (boo
+     }
+ }
+ 
++/* The *_doacross_*_start routines are similar.  The only difference is that
++   this work-share construct is initialized to expect an ORDERED(N) - DOACROSS
++   section, and the worksharing loop iterates always from 0 to COUNTS[0] - 1
++   and other COUNTS array elements tell the library number of iterations
++   in the ordered inner loops.  */
++
++static bool
++gomp_loop_ull_doacross_static_start (unsigned ncounts, gomp_ull *counts,
++				     gomp_ull chunk_size, gomp_ull *istart,
++				     gomp_ull *iend)
++{
++  struct gomp_thread *thr = gomp_thread ();
++
++  thr->ts.static_trip = 0;
++  if (gomp_work_share_start (false))
++    {
++      gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1,
++			  GFS_STATIC, chunk_size);
++      gomp_doacross_ull_init (ncounts, counts, chunk_size);
++      gomp_work_share_init_done ();
++    }
++
++  return !gomp_iter_ull_static_next (istart, iend);
++}
++
++static bool
++gomp_loop_ull_doacross_dynamic_start (unsigned ncounts, gomp_ull *counts,
++				      gomp_ull chunk_size, gomp_ull *istart,
++				      gomp_ull *iend)
++{
++  struct gomp_thread *thr = gomp_thread ();
++  bool ret;
++
++  if (gomp_work_share_start (false))
++    {
++      gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1,
++			  GFS_DYNAMIC, chunk_size);
++      gomp_doacross_ull_init (ncounts, counts, chunk_size);
++      gomp_work_share_init_done ();
++    }
++
++#if defined HAVE_SYNC_BUILTINS && defined __LP64__
++  ret = gomp_iter_ull_dynamic_next (istart, iend);
++#else
++  gomp_mutex_lock (&thr->ts.work_share->lock);
++  ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
++  gomp_mutex_unlock (&thr->ts.work_share->lock);
++#endif
++
++  return ret;
++}
++
++static bool
++gomp_loop_ull_doacross_guided_start (unsigned ncounts, gomp_ull *counts,
++				     gomp_ull chunk_size, gomp_ull *istart,
++				     gomp_ull *iend)
++{
++  struct gomp_thread *thr = gomp_thread ();
++  bool ret;
++
++  if (gomp_work_share_start (false))
++    {
++      gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1,
++			  GFS_GUIDED, chunk_size);
++      gomp_doacross_ull_init (ncounts, counts, chunk_size);
++      gomp_work_share_init_done ();
++    }
++
++#if defined HAVE_SYNC_BUILTINS && defined __LP64__
++  ret = gomp_iter_ull_guided_next (istart, iend);
++#else
++  gomp_mutex_lock (&thr->ts.work_share->lock);
++  ret = gomp_iter_ull_guided_next_locked (istart, iend);
++  gomp_mutex_unlock (&thr->ts.work_share->lock);
++#endif
++
++  return ret;
++}
++
++bool
++GOMP_loop_ull_doacross_runtime_start (unsigned ncounts, gomp_ull *counts,
++				      gomp_ull *istart, gomp_ull *iend)
++{
++  struct gomp_task_icv *icv = gomp_icv (false);
++  switch (icv->run_sched_var)
++    {
++    case GFS_STATIC:
++      return gomp_loop_ull_doacross_static_start (ncounts, counts,
++						  icv->run_sched_chunk_size,
++						  istart, iend);
++    case GFS_DYNAMIC:
++      return gomp_loop_ull_doacross_dynamic_start (ncounts, counts,
++						   icv->run_sched_chunk_size,
++						   istart, iend);
++    case GFS_GUIDED:
++      return gomp_loop_ull_doacross_guided_start (ncounts, counts,
++						  icv->run_sched_chunk_size,
++						  istart, iend);
++    case GFS_AUTO:
++      /* For now map to schedule(static), later on we could play with feedback
++	 driven choice.  */
++      return gomp_loop_ull_doacross_static_start (ncounts, counts,
++						  0, istart, iend);
++    default:
++      abort ();
++    }
++}
++
+ /* The *_next routines are called when the thread completes processing of
+    the iteration block currently assigned to it.  If the work-share
+    construct is bound directly to a parallel construct, then the iteration
+@@ -457,6 +565,10 @@ extern __typeof(gomp_loop_ull_dynamic_st
+ 	__attribute__((alias ("gomp_loop_ull_dynamic_start")));
+ extern __typeof(gomp_loop_ull_guided_start) GOMP_loop_ull_guided_start
+ 	__attribute__((alias ("gomp_loop_ull_guided_start")));
++extern __typeof(gomp_loop_ull_dynamic_start) GOMP_loop_ull_nonmonotonic_dynamic_start
++	__attribute__((alias ("gomp_loop_ull_dynamic_start")));
++extern __typeof(gomp_loop_ull_guided_start) GOMP_loop_ull_nonmonotonic_guided_start
++	__attribute__((alias ("gomp_loop_ull_guided_start")));
+ 
+ extern __typeof(gomp_loop_ull_ordered_static_start) GOMP_loop_ull_ordered_static_start
+ 	__attribute__((alias ("gomp_loop_ull_ordered_static_start")));
+@@ -465,12 +577,23 @@ extern __typeof(gomp_loop_ull_ordered_dy
+ extern __typeof(gomp_loop_ull_ordered_guided_start) GOMP_loop_ull_ordered_guided_start
+ 	__attribute__((alias ("gomp_loop_ull_ordered_guided_start")));
+ 
++extern __typeof(gomp_loop_ull_doacross_static_start) GOMP_loop_ull_doacross_static_start
++	__attribute__((alias ("gomp_loop_ull_doacross_static_start")));
++extern __typeof(gomp_loop_ull_doacross_dynamic_start) GOMP_loop_ull_doacross_dynamic_start
++	__attribute__((alias ("gomp_loop_ull_doacross_dynamic_start")));
++extern __typeof(gomp_loop_ull_doacross_guided_start) GOMP_loop_ull_doacross_guided_start
++	__attribute__((alias ("gomp_loop_ull_doacross_guided_start")));
++
+ extern __typeof(gomp_loop_ull_static_next) GOMP_loop_ull_static_next
+ 	__attribute__((alias ("gomp_loop_ull_static_next")));
+ extern __typeof(gomp_loop_ull_dynamic_next) GOMP_loop_ull_dynamic_next
+ 	__attribute__((alias ("gomp_loop_ull_dynamic_next")));
+ extern __typeof(gomp_loop_ull_guided_next) GOMP_loop_ull_guided_next
+ 	__attribute__((alias ("gomp_loop_ull_guided_next")));
++extern __typeof(gomp_loop_ull_dynamic_next) GOMP_loop_ull_nonmonotonic_dynamic_next
++	__attribute__((alias ("gomp_loop_ull_dynamic_next")));
++extern __typeof(gomp_loop_ull_guided_next) GOMP_loop_ull_nonmonotonic_guided_next
++	__attribute__((alias ("gomp_loop_ull_guided_next")));
+ 
+ extern __typeof(gomp_loop_ull_ordered_static_next) GOMP_loop_ull_ordered_static_next
+ 	__attribute__((alias ("gomp_loop_ull_ordered_static_next")));
+@@ -507,6 +630,25 @@ GOMP_loop_ull_guided_start (bool up, gom
+ }
+ 
+ bool
++GOMP_loop_ull_nonmonotonic_dynamic_start (bool up, gomp_ull start,
++					  gomp_ull end, gomp_ull incr,
++					  gomp_ull chunk_size,
++					  gomp_ull *istart, gomp_ull *iend)
++{
++  return gomp_loop_ull_dynamic_start (up, start, end, incr, chunk_size, istart,
++				      iend);
++}
++
++bool
++GOMP_loop_ull_nonmonotonic_guided_start (bool up, gomp_ull start, gomp_ull end,
++					 gomp_ull incr, gomp_ull chunk_size,
++					 gomp_ull *istart, gomp_ull *iend)
++{
++  return gomp_loop_ull_guided_start (up, start, end, incr, chunk_size, istart,
++				     iend);
++}
++
++bool
+ GOMP_loop_ull_ordered_static_start (bool up, gomp_ull start, gomp_ull end,
+ 				    gomp_ull incr, gomp_ull chunk_size,
+ 				    gomp_ull *istart, gomp_ull *iend)
+@@ -534,6 +676,33 @@ GOMP_loop_ull_ordered_guided_start (bool
+ }
+ 
+ bool
++GOMP_loop_ull_doacross_static_start (unsigned ncounts, gomp_ull *counts,
++				     gomp_ull chunk_size, gomp_ull *istart,
++				     gomp_ull *iend)
++{
++  return gomp_loop_ull_doacross_static_start (ncounts, counts, chunk_size,
++					      istart, iend);
++}
++
++bool
++GOMP_loop_ull_doacross_dynamic_start (unsigned ncounts, gomp_ull *counts,
++				      gomp_ull chunk_size, gomp_ull *istart,
++				      gomp_ull *iend)
++{
++  return gomp_loop_ull_doacross_dynamic_start (ncounts, counts, chunk_size,
++					       istart, iend);
++}
++
++bool
++GOMP_loop_ull_doacross_guided_start (unsigned ncounts, gomp_ull *counts,
++				     gomp_ull chunk_size, gomp_ull *istart,
++				     gomp_ull *iend)
++{
++  return gomp_loop_ull_doacross_guided_start (ncounts, counts, chunk_size,
++					      istart, iend);
++}
++
++bool
+ GOMP_loop_ull_static_next (gomp_ull *istart, gomp_ull *iend)
+ {
+   return gomp_loop_ull_static_next (istart, iend);
+@@ -550,6 +719,18 @@ GOMP_loop_ull_guided_next (gomp_ull *ist
+ {
+   return gomp_loop_ull_guided_next (istart, iend);
+ }
++
++bool
++GOMP_loop_ull_nonmonotonic_dynamic_next (gomp_ull *istart, gomp_ull *iend)
++{
++  return gomp_loop_ull_dynamic_next (istart, iend);
++}
++
++bool
++GOMP_loop_ull_nonmonotonic_guided_next (gomp_ull *istart, gomp_ull *iend)
++{
++  return gomp_loop_ull_guided_next (istart, iend);
++}
+ 
+ bool
+ GOMP_loop_ull_ordered_static_next (gomp_ull *istart, gomp_ull *iend)
+--- libgomp/team.c.jj	2014-05-15 10:56:32.092524669 +0200
++++ libgomp/team.c	2016-07-13 17:58:01.907291111 +0200
+@@ -133,6 +133,25 @@ gomp_thread_start (void *xdata)
+   return NULL;
+ }
+ 
++static inline struct gomp_team *
++get_last_team (unsigned nthreads)
++{
++  struct gomp_thread *thr = gomp_thread ();
++  if (thr->ts.team == NULL)
++    {
++      struct gomp_thread_pool *pool = thr->thread_pool;
++      if (pool != NULL)
++	{
++	  struct gomp_team *last_team = pool->last_team;
++	  if (last_team != NULL && last_team->nthreads == nthreads)
++	    {
++	      pool->last_team = NULL;
++	      return last_team;
++	    }
++	}
++    }
++  return NULL;
++}
+ 
+ /* Create a new team data structure.  */
+ 
+@@ -140,18 +159,27 @@ struct gomp_team *
+ gomp_new_team (unsigned nthreads)
+ {
+   struct gomp_team *team;
+-  size_t size;
+   int i;
+ 
+-  size = sizeof (*team) + nthreads * (sizeof (team->ordered_release[0])
+-				      + sizeof (team->implicit_task[0]));
+-  team = gomp_malloc (size);
++  team = get_last_team (nthreads);
++  if (team == NULL)
++    {
++      size_t extra = sizeof (team->ordered_release[0])
++		     + sizeof (team->implicit_task[0]);
++      team = gomp_malloc (sizeof (*team) + nthreads * extra);
++
++#ifndef HAVE_SYNC_BUILTINS
++      gomp_mutex_init (&team->work_share_list_free_lock);
++#endif
++      gomp_barrier_init (&team->barrier, nthreads);
++      gomp_mutex_init (&team->task_lock);
++
++      team->nthreads = nthreads;
++    }
+ 
+   team->work_share_chunk = 8;
+ #ifdef HAVE_SYNC_BUILTINS
+   team->single_count = 0;
+-#else
+-  gomp_mutex_init (&team->work_share_list_free_lock);
+ #endif
+   team->work_shares_to_free = &team->work_shares[0];
+   gomp_init_work_share (&team->work_shares[0], false, nthreads);
+@@ -162,15 +190,11 @@ gomp_new_team (unsigned nthreads)
+     team->work_shares[i].next_free = &team->work_shares[i + 1];
+   team->work_shares[i].next_free = NULL;
+ 
+-  team->nthreads = nthreads;
+-  gomp_barrier_init (&team->barrier, nthreads);
+-
+   gomp_sem_init (&team->master_release, 0);
+   team->ordered_release = (void *) &team->implicit_task[nthreads];
+   team->ordered_release[0] = &team->master_release;
+ 
+-  gomp_mutex_init (&team->task_lock);
+-  team->task_queue = NULL;
++  priority_queue_init (&team->task_queue);
+   team->task_count = 0;
+   team->task_queued_count = 0;
+   team->task_running_count = 0;
+@@ -186,8 +210,12 @@ gomp_new_team (unsigned nthreads)
+ static void
+ free_team (struct gomp_team *team)
+ {
++#ifndef HAVE_SYNC_BUILTINS
++  gomp_mutex_destroy (&team->work_share_list_free_lock);
++#endif
+   gomp_barrier_destroy (&team->barrier);
+   gomp_mutex_destroy (&team->task_lock);
++  priority_queue_free (&team->task_queue);
+   free (team);
+ }
+ 
+@@ -258,6 +286,8 @@ gomp_free_thread (void *arg __attribute_
+       free (pool);
+       thr->thread_pool = NULL;
+     }
++  if (thr->ts.level == 0 && __builtin_expect (thr->ts.team != NULL, 0))
++    gomp_team_end ();
+   if (thr->task != NULL)
+     {
+       struct gomp_task *task = thr->task;
+@@ -287,7 +317,7 @@ gomp_team_start (void (*fn) (void *), vo
+   struct gomp_thread **affinity_thr = NULL;
+ 
+   thr = gomp_thread ();
+-  nested = thr->ts.team != NULL;
++  nested = thr->ts.level;
+   if (__builtin_expect (thr->thread_pool == NULL, 0))
+     {
+       thr->thread_pool = gomp_new_thread_pool ();
+@@ -894,9 +924,6 @@ gomp_team_end (void)
+       while (ws != NULL);
+     }
+   gomp_sem_destroy (&team->master_release);
+-#ifndef HAVE_SYNC_BUILTINS
+-  gomp_mutex_destroy (&team->work_share_list_free_lock);
+-#endif
+ 
+   if (__builtin_expect (thr->ts.team != NULL, 0)
+       || __builtin_expect (team->nthreads == 1, 0))
+--- libgomp/target.c.jj	2014-05-15 10:56:38.313498020 +0200
++++ libgomp/target.c	2016-07-15 16:58:29.249328861 +0200
+@@ -22,14 +22,22 @@
+    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+    <http://www.gnu.org/licenses/>.  */
+ 
+-/* This file handles the maintainence of threads in response to team
+-   creation and termination.  */
++/* This file contains the support of offloading.  */
+ 
++#include "config.h"
+ #include "libgomp.h"
++#include "oacc-plugin.h"
++#include "oacc-int.h"
++#include "gomp-constants.h"
+ #include <limits.h>
+ #include <stdbool.h>
+ #include <stdlib.h>
++#ifdef HAVE_INTTYPES_H
++# include <inttypes.h>  /* For PRIu64.  */
++#endif
+ #include <string.h>
++#include <assert.h>
++#include <errno.h>
+ 
+ attribute_hidden int
+ gomp_get_num_devices (void)
+@@ -37,22 +45,87 @@ gomp_get_num_devices (void)
+   return 0;
+ }
+ 
+-/* Called when encountering a target directive.  If DEVICE
+-   is -1, it means use device-var ICV.  If it is -2 (or any other value
+-   larger than last available hw device, use host fallback.
+-   FN is address of host code, OPENMP_TARGET contains value of the
+-   __OPENMP_TARGET__ symbol in the shared library or binary that invokes
+-   GOMP_target.  HOSTADDRS, SIZES and KINDS are arrays
+-   with MAPNUM entries, with addresses of the host objects,
+-   sizes of the host objects (resp. for pointer kind pointer bias
+-   and assumed sizeof (void *) size) and kinds.  */
++/* This function should be called from every offload image while loading.
++   It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
++   the target, and TARGET_DATA needed by target plugin.  */
+ 
+ void
+-GOMP_target (int device, void (*fn) (void *), const void *openmp_target,
+-	     size_t mapnum, void **hostaddrs, size_t *sizes,
+-	     unsigned char *kinds)
++GOMP_offload_register_ver (unsigned version, const void *host_table,
++			   int target_type, const void *target_data)
++{
++  (void) version;
++  (void) host_table;
++  (void) target_type;
++  (void) target_data;
++}
++
++void
++GOMP_offload_register (const void *host_table, int target_type,
++		       const void *target_data)
++{
++  (void) host_table;
++  (void) target_type;
++  (void) target_data;
++}
++
++/* This function should be called from every offload image while unloading.
++   It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
++   the target, and TARGET_DATA needed by target plugin.  */
++
++void
++GOMP_offload_unregister_ver (unsigned version, const void *host_table,
++			     int target_type, const void *target_data)
++{
++  (void) version;
++  (void) host_table;
++  (void) target_type;
++  (void) target_data;
++}
++
++void
++GOMP_offload_unregister (const void *host_table, int target_type,
++			 const void *target_data)
++{
++  (void) host_table;
++  (void) target_type;
++  (void) target_data;
++}
++
++/* This function initializes the target device, specified by DEVICEP.  DEVICEP
++   must be locked on entry, and remains locked on return.  */
++
++attribute_hidden void
++gomp_init_device (struct gomp_device_descr *devicep)
++{
++  devicep->state = GOMP_DEVICE_INITIALIZED;
++}
++
++attribute_hidden void
++gomp_unload_device (struct gomp_device_descr *devicep)
++{
++}
++
++/* Free address mapping tables.  MM must be locked on entry, and remains locked
++   on return.  */
++
++attribute_hidden void
++gomp_free_memmap (struct splay_tree_s *mem_map)
++{
++  while (mem_map->root)
++    {
++      struct target_mem_desc *tgt = mem_map->root->key.tgt;
++
++      splay_tree_remove (mem_map, &mem_map->root->key);
++      free (tgt->array);
++      free (tgt);
++    }
++}
++
++/* Host fallback for GOMP_target{,_ext} routines.  */
++
++static void
++gomp_target_fallback (void (*fn) (void *), void **hostaddrs)
+ {
+-  /* Host fallback.  */
+   struct gomp_thread old_thr, *thr = gomp_thread ();
+   old_thr = *thr;
+   memset (thr, '\0', sizeof (*thr));
+@@ -66,10 +139,167 @@ GOMP_target (int device, void (*fn) (voi
+   *thr = old_thr;
+ }
+ 
++/* Calculate alignment and size requirements of a private copy of data shared
++   as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE.  */
++
++static inline void
++calculate_firstprivate_requirements (size_t mapnum, size_t *sizes,
++				     unsigned short *kinds, size_t *tgt_align,
++				     size_t *tgt_size)
++{
++  size_t i;
++  for (i = 0; i < mapnum; i++)
++    if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
++      {
++	size_t align = (size_t) 1 << (kinds[i] >> 8);
++	if (*tgt_align < align)
++	  *tgt_align = align;
++	*tgt_size = (*tgt_size + align - 1) & ~(align - 1);
++	*tgt_size += sizes[i];
++      }
++}
++
++/* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST.  */
++
++static inline void
++copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs,
++			size_t *sizes, unsigned short *kinds, size_t tgt_align,
++			size_t tgt_size)
++{
++  uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
++  if (al)
++    tgt += tgt_align - al;
++  tgt_size = 0;
++  size_t i;
++  for (i = 0; i < mapnum; i++)
++    if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
++      {
++	size_t align = (size_t) 1 << (kinds[i] >> 8);
++	tgt_size = (tgt_size + align - 1) & ~(align - 1);
++	memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
++	hostaddrs[i] = tgt + tgt_size;
++	tgt_size = tgt_size + sizes[i];
++      }
++}
++
++/* Called when encountering a target directive.  If DEVICE
++   is GOMP_DEVICE_ICV, it means use device-var ICV.  If it is
++   GOMP_DEVICE_HOST_FALLBACK (or any value
++   larger than last available hw device), use host fallback.
++   FN is address of host code, UNUSED is part of the current ABI, but
++   we're not actually using it.  HOSTADDRS, SIZES and KINDS are arrays
++   with MAPNUM entries, with addresses of the host objects,
++   sizes of the host objects (resp. for pointer kind pointer bias
++   and assumed sizeof (void *) size) and kinds.  */
++
++void
++GOMP_target (int device, void (*fn) (void *), const void *unused,
++	     size_t mapnum, void **hostaddrs, size_t *sizes,
++	     unsigned char *kinds)
++{
++  return gomp_target_fallback (fn, hostaddrs);
++}
++
++/* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
++   and several arguments have been added:
++   FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
++   DEPEND is array of dependencies, see GOMP_task for details.
++
++   ARGS is a pointer to an array consisting of a variable number of both
++   device-independent and device-specific arguments, which can take one two
++   elements where the first specifies for which device it is intended, the type
++   and optionally also the value.  If the value is not present in the first
++   one, the whole second element the actual value.  The last element of the
++   array is a single NULL.  Among the device independent can be for example
++   NUM_TEAMS and THREAD_LIMIT.
++
++   NUM_TEAMS is positive if GOMP_teams will be called in the body with
++   that value, or 1 if teams construct is not present, or 0, if
++   teams construct does not have num_teams clause and so the choice is
++   implementation defined, and -1 if it can't be determined on the host
++   what value will GOMP_teams have on the device.
++   THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
++   body with that value, or 0, if teams construct does not have thread_limit
++   clause or the teams construct is not present, or -1 if it can't be
++   determined on the host what value will GOMP_teams have on the device.  */
++
++void
++GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
++		 void **hostaddrs, size_t *sizes, unsigned short *kinds,
++		 unsigned int flags, void **depend, void **args)
++{
++  size_t tgt_align = 0, tgt_size = 0;
++  bool fpc_done = false;
++
++  if (flags & GOMP_TARGET_FLAG_NOWAIT)
++    {
++      struct gomp_thread *thr = gomp_thread ();
++      if (thr->ts.team
++	  && !thr->task->final_task)
++	{
++	  gomp_create_target_task (NULL, fn, mapnum, hostaddrs,
++				   sizes, kinds, flags, depend, args,
++				   GOMP_TARGET_TASK_BEFORE_MAP);
++	  return;
++	}
++    }
++
++  /* If there are depend clauses, but nowait is not present
++     (or we are in a final task), block the parent task until the
++     dependencies are resolved and then just continue with the rest
++     of the function as if it is a merged task.  */
++  if (depend != NULL)
++    {
++      struct gomp_thread *thr = gomp_thread ();
++      if (thr->task && thr->task->depend_hash)
++	{
++	  /* If we might need to wait, copy firstprivate now.  */
++	  calculate_firstprivate_requirements (mapnum, sizes, kinds,
++					       &tgt_align, &tgt_size);
++	  if (tgt_align)
++	    {
++	      char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
++	      copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
++				      tgt_align, tgt_size);
++	    }
++	  fpc_done = true;
++	  gomp_task_maybe_wait_for_dependencies (depend);
++	}
++    }
++
++  if (!fpc_done)
++    {
++      calculate_firstprivate_requirements (mapnum, sizes, kinds,
++					   &tgt_align, &tgt_size);
++      if (tgt_align)
++	{
++	  char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
++	  copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
++				  tgt_align, tgt_size);
++	}
++    }
++  gomp_target_fallback (fn, hostaddrs);
++}
++
++/* Host fallback for GOMP_target_data{,_ext} routines.  */
++
++static void
++gomp_target_data_fallback (void)
++{
++}
++
+ void
+-GOMP_target_data (int device, const void *openmp_target, size_t mapnum,
++GOMP_target_data (int device, const void *unused, size_t mapnum,
+ 		  void **hostaddrs, size_t *sizes, unsigned char *kinds)
+ {
++  return gomp_target_data_fallback ();
++}
++
++void
++GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs,
++		      size_t *sizes, unsigned short *kinds)
++{
++  return gomp_target_data_fallback ();
+ }
+ 
+ void
+@@ -78,12 +308,112 @@ GOMP_target_end_data (void)
+ }
+ 
+ void
+-GOMP_target_update (int device, const void *openmp_target, size_t mapnum,
++GOMP_target_update (int device, const void *unused, size_t mapnum,
+ 		    void **hostaddrs, size_t *sizes, unsigned char *kinds)
+ {
+ }
+ 
+ void
++GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs,
++			size_t *sizes, unsigned short *kinds,
++			unsigned int flags, void **depend)
++{
++  /* If there are depend clauses, but nowait is not present,
++     block the parent task until the dependencies are resolved
++     and then just continue with the rest of the function as if it
++     is a merged task.  Until we are able to schedule task during
++     variable mapping or unmapping, ignore nowait if depend clauses
++     are not present.  */
++  if (depend != NULL)
++    {
++      struct gomp_thread *thr = gomp_thread ();
++      if (thr->task && thr->task->depend_hash)
++	{
++	  if ((flags & GOMP_TARGET_FLAG_NOWAIT)
++	      && thr->ts.team
++	      && !thr->task->final_task)
++	    {
++	      if (gomp_create_target_task (NULL, (void (*) (void *)) NULL,
++					   mapnum, hostaddrs, sizes, kinds,
++					   flags | GOMP_TARGET_FLAG_UPDATE,
++					   depend, NULL, GOMP_TARGET_TASK_DATA))
++		return;
++	    }
++	  else
++	    {
++	      struct gomp_team *team = thr->ts.team;
++	      /* If parallel or taskgroup has been cancelled, don't start new
++		 tasks.  */
++	      if (team
++		  && (gomp_team_barrier_cancelled (&team->barrier)
++		      || (thr->task->taskgroup
++			  && thr->task->taskgroup->cancelled)))
++		return;
++
++	      gomp_task_maybe_wait_for_dependencies (depend);
++	    }
++	}
++    }
++}
++
++void
++GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
++			     size_t *sizes, unsigned short *kinds,
++			     unsigned int flags, void **depend)
++{
++  /* If there are depend clauses, but nowait is not present,
++     block the parent task until the dependencies are resolved
++     and then just continue with the rest of the function as if it
++     is a merged task.  Until we are able to schedule task during
++     variable mapping or unmapping, ignore nowait if depend clauses
++     are not present.  */
++  if (depend != NULL)
++    {
++      struct gomp_thread *thr = gomp_thread ();
++      if (thr->task && thr->task->depend_hash)
++	{
++	  if ((flags & GOMP_TARGET_FLAG_NOWAIT)
++	      && thr->ts.team
++	      && !thr->task->final_task)
++	    {
++	      if (gomp_create_target_task (NULL, (void (*) (void *)) NULL,
++					   mapnum, hostaddrs, sizes, kinds,
++					   flags, depend, NULL,
++					   GOMP_TARGET_TASK_DATA))
++		return;
++	    }
++	  else
++	    {
++	      struct gomp_team *team = thr->ts.team;
++	      /* If parallel or taskgroup has been cancelled, don't start new
++		 tasks.  */
++	      if (team
++		  && (gomp_team_barrier_cancelled (&team->barrier)
++		      || (thr->task->taskgroup
++			  && thr->task->taskgroup->cancelled)))
++		return;
++
++	      gomp_task_maybe_wait_for_dependencies (depend);
++	    }
++	}
++    }
++}
++
++bool
++gomp_target_task_fn (void *data)
++{
++  struct gomp_target_task *ttask = (struct gomp_target_task *) data;
++
++  if (ttask->fn != NULL)
++    {
++      ttask->state = GOMP_TARGET_TASK_FALLBACK;
++      gomp_target_fallback (ttask->fn, ttask->hostaddrs);
++      return false;
++    }
++  return false;
++}
++
++void
+ GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
+ {
+   if (thread_limit)
+@@ -94,3 +424,153 @@ GOMP_teams (unsigned int num_teams, unsi
+     }
+   (void) num_teams;
+ }
++
++void *
++omp_target_alloc (size_t size, int device_num)
++{
++  if (device_num == GOMP_DEVICE_HOST_FALLBACK)
++    return malloc (size);
++
++  return NULL;
++}
++
++void
++omp_target_free (void *device_ptr, int device_num)
++{
++  if (device_ptr == NULL)
++    return;
++
++  if (device_num == GOMP_DEVICE_HOST_FALLBACK)
++    {
++      free (device_ptr);
++      return;
++    }
++}
++
++int
++omp_target_is_present (void *ptr, int device_num)
++{
++  if (ptr == NULL)
++    return 1;
++
++  if (device_num == GOMP_DEVICE_HOST_FALLBACK)
++    return 1;
++
++  return 0;
++}
++
++int
++omp_target_memcpy (void *dst, void *src, size_t length, size_t dst_offset,
++		   size_t src_offset, int dst_device_num, int src_device_num)
++{
++  if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
++    return EINVAL;
++  if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
++    return EINVAL;
++  memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length);
++  return 0;
++}
++
++#define HALF_SIZE_T (((size_t) 1) << (8 * sizeof (size_t) / 2))
++
++#define __builtin_mul_overflow(x, y, z) \
++  ({ bool retval = false;					\
++     size_t xval = (x);						\
++     size_t yval = (y);						\
++     size_t zval = xval * yval;					\
++     if (__builtin_expect ((xval | yval) >= HALF_SIZE_T, 0))	\
++       {							\
++         if (xval && zval / xval != yval)			\
++	   retval = true;					\
++       }							\
++     *(z) = zval;						\
++     retval; })
++
++static int
++omp_target_memcpy_rect_worker (void *dst, void *src, size_t element_size,
++			       int num_dims, const size_t *volume,
++			       const size_t *dst_offsets,
++			       const size_t *src_offsets,
++			       const size_t *dst_dimensions,
++			       const size_t *src_dimensions)
++{
++  size_t dst_slice = element_size;
++  size_t src_slice = element_size;
++  size_t j, dst_off, src_off, length;
++  int i, ret;
++
++
++  if (num_dims == 1)
++    {
++      if (__builtin_mul_overflow (element_size, volume[0], &length)
++	  || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off)
++	  || __builtin_mul_overflow (element_size, src_offsets[0], &src_off))
++	return EINVAL;
++      memcpy ((char *) dst + dst_off, (char *) src + src_off, length);
++      ret = 1;
++      return ret ? 0 : EINVAL;
++    }
++
++  /* FIXME: it would be nice to have some plugin function to handle
++     num_dims == 2 and num_dims == 3 more efficiently.  Larger ones can
++     be handled in the generic recursion below, and for host-host it
++     should be used even for any num_dims >= 2.  */
++
++  for (i = 1; i < num_dims; i++)
++    if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice)
++	|| __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice))
++      return EINVAL;
++  if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off)
++      || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off))
++    return EINVAL;
++  for (j = 0; j < volume[0]; j++)
++    {
++      ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off,
++					   (char *) src + src_off,
++					   element_size, num_dims - 1,
++					   volume + 1, dst_offsets + 1,
++					   src_offsets + 1, dst_dimensions + 1,
++					   src_dimensions + 1);
++      if (ret)
++	return ret;
++      dst_off += dst_slice;
++      src_off += src_slice;
++    }
++  return 0;
++}
++
++int
++omp_target_memcpy_rect (void *dst, void *src, size_t element_size,
++			int num_dims, const size_t *volume,
++			const size_t *dst_offsets,
++			const size_t *src_offsets,
++			const size_t *dst_dimensions,
++			const size_t *src_dimensions,
++			int dst_device_num, int src_device_num)
++{
++  if (!dst && !src)
++    return INT_MAX;
++
++  if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
++    return EINVAL;
++  if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
++    return EINVAL;
++
++  int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims,
++					   volume, dst_offsets, src_offsets,
++					   dst_dimensions, src_dimensions);
++  return ret;
++}
++
++int
++omp_target_associate_ptr (void *host_ptr, void *device_ptr, size_t size,
++			  size_t device_offset, int device_num)
++{
++  return EINVAL;
++}
++
++int
++omp_target_disassociate_ptr (void *ptr, int device_num)
++{
++  return EINVAL;
++}
+--- libgomp/fortran.c.jj	2014-05-15 10:56:31.593531223 +0200
++++ libgomp/fortran.c	2016-07-13 16:57:04.432535397 +0200
+@@ -67,12 +67,20 @@ ialias_redirect (omp_get_active_level)
+ ialias_redirect (omp_in_final)
+ ialias_redirect (omp_get_cancellation)
+ ialias_redirect (omp_get_proc_bind)
++ialias_redirect (omp_get_num_places)
++ialias_redirect (omp_get_place_num_procs)
++ialias_redirect (omp_get_place_proc_ids)
++ialias_redirect (omp_get_place_num)
++ialias_redirect (omp_get_partition_num_places)
++ialias_redirect (omp_get_partition_place_nums)
+ ialias_redirect (omp_set_default_device)
+ ialias_redirect (omp_get_default_device)
+ ialias_redirect (omp_get_num_devices)
+ ialias_redirect (omp_get_num_teams)
+ ialias_redirect (omp_get_team_num)
+ ialias_redirect (omp_is_initial_device)
++ialias_redirect (omp_get_initial_device)
++ialias_redirect (omp_get_max_task_priority)
+ #endif
+ 
+ #ifndef LIBGOMP_GNU_SYMBOL_VERSIONING
+@@ -342,35 +350,35 @@ omp_get_wtime_ (void)
+ }
+ 
+ void
+-omp_set_schedule_ (const int32_t *kind, const int32_t *modifier)
++omp_set_schedule_ (const int32_t *kind, const int32_t *chunk_size)
+ {
+-  omp_set_schedule (*kind, *modifier);
++  omp_set_schedule (*kind, *chunk_size);
+ }
+ 
+ void
+-omp_set_schedule_8_ (const int32_t *kind, const int64_t *modifier)
++omp_set_schedule_8_ (const int32_t *kind, const int64_t *chunk_size)
+ {
+-  omp_set_schedule (*kind, TO_INT (*modifier));
++  omp_set_schedule (*kind, TO_INT (*chunk_size));
+ }
+ 
+ void
+-omp_get_schedule_ (int32_t *kind, int32_t *modifier)
++omp_get_schedule_ (int32_t *kind, int32_t *chunk_size)
+ {
+   omp_sched_t k;
+-  int m;
+-  omp_get_schedule (&k, &m);
++  int cs;
++  omp_get_schedule (&k, &cs);
+   *kind = k;
+-  *modifier = m;
++  *chunk_size = cs;
+ }
+ 
+ void
+-omp_get_schedule_8_ (int32_t *kind, int64_t *modifier)
++omp_get_schedule_8_ (int32_t *kind, int64_t *chunk_size)
+ {
+   omp_sched_t k;
+-  int m;
+-  omp_get_schedule (&k, &m);
++  int cs;
++  omp_get_schedule (&k, &cs);
+   *kind = k;
+-  *modifier = m;
++  *chunk_size = cs;
+ }
+ 
+ int32_t
+@@ -451,6 +459,69 @@ omp_get_proc_bind_ (void)
+   return omp_get_proc_bind ();
+ }
+ 
++int32_t
++omp_get_num_places_ (void)
++{
++  return omp_get_num_places ();
++}
++
++int32_t
++omp_get_place_num_procs_ (const int32_t *place_num)
++{
++  return omp_get_place_num_procs (*place_num);
++}
++
++int32_t
++omp_get_place_num_procs_8_ (const int64_t *place_num)
++{
++  return omp_get_place_num_procs (TO_INT (*place_num));
++}
++
++void
++omp_get_place_proc_ids_ (const int32_t *place_num, int32_t *ids)
++{
++  omp_get_place_proc_ids (*place_num, (int *) ids);
++}
++
++void
++omp_get_place_proc_ids_8_ (const int64_t *place_num, int64_t *ids)
++{
++  gomp_get_place_proc_ids_8 (TO_INT (*place_num), ids);
++}
++
++int32_t
++omp_get_place_num_ (void)
++{
++  return omp_get_place_num ();
++}
++
++int32_t
++omp_get_partition_num_places_ (void)
++{
++  return omp_get_partition_num_places ();
++}
++
++void
++omp_get_partition_place_nums_ (int32_t *place_nums)
++{
++  omp_get_partition_place_nums ((int *) place_nums);
++}
++
++void
++omp_get_partition_place_nums_8_ (int64_t *place_nums)
++{
++  if (gomp_places_list == NULL)
++    return;
++
++  struct gomp_thread *thr = gomp_thread ();
++  if (thr->place == 0)
++    gomp_init_affinity ();
++
++  unsigned int i;
++  for (i = 0; i < thr->ts.place_partition_len; i++)
++    *place_nums++ = (int64_t) thr->ts.place_partition_off + i;
++}
++
+ void
+ omp_set_default_device_ (const int32_t *device_num)
+ {
+@@ -492,3 +563,15 @@ omp_is_initial_device_ (void)
+ {
+   return omp_is_initial_device ();
+ }
++
++int32_t
++omp_get_initial_device_ (void)
++{
++  return omp_get_initial_device ();
++}
++
++int32_t
++omp_get_max_task_priority_ (void)
++{
++  return omp_get_max_task_priority ();
++}
+--- libgomp/libgomp.map.jj	2014-05-15 10:56:31.927533549 +0200
++++ libgomp/libgomp.map	2016-07-13 16:57:04.434535373 +0200
+@@ -134,6 +134,36 @@ OMP_4.0 {
+ 	omp_is_initial_device_;
+ } OMP_3.1;
+ 
++OMP_4.5 {
++  global:
++	omp_get_max_task_priority;
++	omp_get_max_task_priority_;
++	omp_get_num_places;
++	omp_get_num_places_;
++	omp_get_place_num_procs;
++	omp_get_place_num_procs_;
++	omp_get_place_num_procs_8_;
++	omp_get_place_proc_ids;
++	omp_get_place_proc_ids_;
++	omp_get_place_proc_ids_8_;
++	omp_get_place_num;
++	omp_get_place_num_;
++	omp_get_partition_num_places;
++	omp_get_partition_num_places_;
++	omp_get_partition_place_nums;
++	omp_get_partition_place_nums_;
++	omp_get_partition_place_nums_8_;
++	omp_get_initial_device;
++	omp_get_initial_device_;
++	omp_target_alloc;
++	omp_target_free;
++	omp_target_is_present;
++	omp_target_memcpy;
++	omp_target_memcpy_rect;
++	omp_target_associate_ptr;
++	omp_target_disassociate_ptr;
++} OMP_4.0;
++
+ GOMP_1.0 {
+   global:
+ 	GOMP_atomic_end;
+@@ -227,3 +257,158 @@ GOMP_4.0 {
+ 	GOMP_target_update;
+ 	GOMP_teams;
+ } GOMP_3.0;
++
++GOMP_4.0.1 {
++  global:
++	GOMP_offload_register;
++	GOMP_offload_unregister;
++} GOMP_4.0;
++
++GOMP_4.5 {
++  global:
++	GOMP_target_ext;
++	GOMP_target_data_ext;
++	GOMP_target_update_ext;
++	GOMP_target_enter_exit_data;
++	GOMP_taskloop;
++	GOMP_taskloop_ull;
++	GOMP_offload_register_ver;
++	GOMP_offload_unregister_ver;
++	GOMP_loop_doacross_dynamic_start;
++	GOMP_loop_doacross_guided_start;
++	GOMP_loop_doacross_runtime_start;
++	GOMP_loop_doacross_static_start;
++	GOMP_doacross_post;
++	GOMP_doacross_wait;
++	GOMP_loop_ull_doacross_dynamic_start;
++	GOMP_loop_ull_doacross_guided_start;
++	GOMP_loop_ull_doacross_runtime_start;
++	GOMP_loop_ull_doacross_static_start;
++	GOMP_doacross_ull_post;
++	GOMP_doacross_ull_wait;
++	GOMP_loop_nonmonotonic_dynamic_next;
++	GOMP_loop_nonmonotonic_dynamic_start;
++	GOMP_loop_nonmonotonic_guided_next;
++	GOMP_loop_nonmonotonic_guided_start;
++	GOMP_loop_ull_nonmonotonic_dynamic_next;
++	GOMP_loop_ull_nonmonotonic_dynamic_start;
++	GOMP_loop_ull_nonmonotonic_guided_next;
++	GOMP_loop_ull_nonmonotonic_guided_start;
++	GOMP_parallel_loop_nonmonotonic_dynamic;
++	GOMP_parallel_loop_nonmonotonic_guided;
++} GOMP_4.0.1;
++
++OACC_2.0 {
++  global:
++	acc_get_num_devices;
++	acc_get_num_devices_h_;
++	acc_set_device_type;
++	acc_set_device_type_h_;
++	acc_get_device_type;
++	acc_get_device_type_h_;
++	acc_set_device_num;
++	acc_set_device_num_h_;
++	acc_get_device_num;
++	acc_get_device_num_h_;
++	acc_async_test;
++	acc_async_test_h_;
++	acc_async_test_all;
++	acc_async_test_all_h_;
++	acc_wait;
++	acc_wait_h_;
++	acc_wait_async;
++	acc_wait_async_h_;
++	acc_wait_all;
++	acc_wait_all_h_;
++	acc_wait_all_async;
++	acc_wait_all_async_h_;
++	acc_init;
++	acc_init_h_;
++	acc_shutdown;
++	acc_shutdown_h_;
++	acc_on_device;
++	acc_on_device_h_;
++	acc_malloc;
++	acc_free;
++	acc_copyin;
++	acc_copyin_32_h_;
++	acc_copyin_64_h_;
++	acc_copyin_array_h_;
++	acc_present_or_copyin;
++	acc_present_or_copyin_32_h_;
++	acc_present_or_copyin_64_h_;
++	acc_present_or_copyin_array_h_;
++	acc_create;
++	acc_create_32_h_;
++	acc_create_64_h_;
++	acc_create_array_h_;
++	acc_present_or_create;
++	acc_present_or_create_32_h_;
++	acc_present_or_create_64_h_;
++	acc_present_or_create_array_h_;
++	acc_copyout;
++	acc_copyout_32_h_;
++	acc_copyout_64_h_;
++	acc_copyout_array_h_;
++	acc_delete;
++	acc_delete_32_h_;
++	acc_delete_64_h_;
++	acc_delete_array_h_;
++	acc_update_device;
++	acc_update_device_32_h_;
++	acc_update_device_64_h_;
++	acc_update_device_array_h_;
++	acc_update_self;
++	acc_update_self_32_h_;
++	acc_update_self_64_h_;
++	acc_update_self_array_h_;
++	acc_map_data;
++	acc_unmap_data;
++	acc_deviceptr;
++	acc_hostptr;
++	acc_is_present;
++	acc_is_present_32_h_;
++	acc_is_present_64_h_;
++	acc_is_present_array_h_;
++	acc_memcpy_to_device;
++	acc_memcpy_from_device;
++	acc_get_current_cuda_device;
++	acc_get_current_cuda_context;
++	acc_get_cuda_stream;
++	acc_set_cuda_stream;
++};
++
++GOACC_2.0 {
++  global:
++	GOACC_data_end;
++	GOACC_data_start;
++	GOACC_enter_exit_data;
++	GOACC_parallel;
++	GOACC_update;
++	GOACC_wait;
++	GOACC_get_thread_num;
++	GOACC_get_num_threads;
++};
++
++GOACC_2.0.1 {
++  global:
++	GOACC_declare;
++	GOACC_parallel_keyed;
++} GOACC_2.0;
++
++GOMP_PLUGIN_1.0 {
++  global:
++	GOMP_PLUGIN_malloc;
++	GOMP_PLUGIN_malloc_cleared;
++	GOMP_PLUGIN_realloc;
++	GOMP_PLUGIN_debug;
++	GOMP_PLUGIN_error;
++	GOMP_PLUGIN_fatal;
++	GOMP_PLUGIN_async_unmap_vars;
++	GOMP_PLUGIN_acc_thread;
++};
++
++GOMP_PLUGIN_1.1 {
++  global:
++	GOMP_PLUGIN_target_task_completion;
++} GOMP_PLUGIN_1.0;
+--- libgomp/ordered.c.jj	2013-01-21 16:00:46.137873657 +0100
++++ libgomp/ordered.c	2016-07-13 16:57:18.918355780 +0200
+@@ -25,6 +25,9 @@
+ /* This file handles the ORDERED construct.  */
+ 
+ #include "libgomp.h"
++#include <stdarg.h>
++#include <string.h>
++#include "doacross.h"
+ 
+ 
+ /* This function is called when first allocating an iteration block.  That
+@@ -249,3 +252,533 @@ void
+ GOMP_ordered_end (void)
+ {
+ }
++
++/* DOACROSS initialization.  */
++
++#define MAX_COLLAPSED_BITS (__SIZEOF_LONG__ * __CHAR_BIT__)
++
++void
++gomp_doacross_init (unsigned ncounts, long *counts, long chunk_size)
++{
++  struct gomp_thread *thr = gomp_thread ();
++  struct gomp_team *team = thr->ts.team;
++  struct gomp_work_share *ws = thr->ts.work_share;
++  unsigned int i, bits[MAX_COLLAPSED_BITS], num_bits = 0;
++  unsigned long ent, num_ents, elt_sz, shift_sz;
++  struct gomp_doacross_work_share *doacross;
++
++  if (team == NULL || team->nthreads == 1)
++    return;
++
++  for (i = 0; i < ncounts; i++)
++    {
++      /* If any count is 0, GOMP_doacross_{post,wait} can't be called.  */
++      if (counts[i] == 0)
++	return;
++
++      if (num_bits <= MAX_COLLAPSED_BITS)
++	{
++	  unsigned int this_bits;
++	  if (counts[i] == 1)
++	    this_bits = 1;
++	  else
++	    this_bits = __SIZEOF_LONG__ * __CHAR_BIT__
++			- __builtin_clzl (counts[i] - 1);
++	  if (num_bits + this_bits <= MAX_COLLAPSED_BITS)
++	    {
++	      bits[i] = this_bits;
++	      num_bits += this_bits;
++	    }
++	  else
++	    num_bits = MAX_COLLAPSED_BITS + 1;
++	}
++    }
++
++  if (ws->sched == GFS_STATIC)
++    num_ents = team->nthreads;
++  else if (ws->sched == GFS_GUIDED)
++    num_ents = counts[0];
++  else
++    num_ents = (counts[0] - 1) / chunk_size + 1;
++  if (num_bits <= MAX_COLLAPSED_BITS)
++    {
++      elt_sz = sizeof (unsigned long);
++      shift_sz = ncounts * sizeof (unsigned int);
++    }
++  else
++    {
++      elt_sz = sizeof (unsigned long) * ncounts;
++      shift_sz = 0;
++    }
++  elt_sz = (elt_sz + 63) & ~63UL;
++
++  doacross = gomp_malloc (sizeof (*doacross) + 63 + num_ents * elt_sz
++			  + shift_sz);
++  doacross->chunk_size = chunk_size;
++  doacross->elt_sz = elt_sz;
++  doacross->ncounts = ncounts;
++  doacross->flattened = false;
++  doacross->array = (unsigned char *)
++		    ((((uintptr_t) (doacross + 1)) + 63 + shift_sz)
++		     & ~(uintptr_t) 63);
++  if (num_bits <= MAX_COLLAPSED_BITS)
++    {
++      unsigned int shift_count = 0;
++      doacross->flattened = true;
++      for (i = ncounts; i > 0; i--)
++	{
++	  doacross->shift_counts[i - 1] = shift_count;
++	  shift_count += bits[i - 1];
++	}
++      for (ent = 0; ent < num_ents; ent++)
++	*(unsigned long *) (doacross->array + ent * elt_sz) = 0;
++    }
++  else
++    for (ent = 0; ent < num_ents; ent++)
++      memset (doacross->array + ent * elt_sz, '\0',
++	      sizeof (unsigned long) * ncounts);
++  if (ws->sched == GFS_STATIC && chunk_size == 0)
++    {
++      unsigned long q = counts[0] / num_ents;
++      unsigned long t = counts[0] % num_ents;
++      doacross->boundary = t * (q + 1);
++      doacross->q = q;
++      doacross->t = t;
++    }
++  ws->doacross = doacross;
++}
++
++/* DOACROSS POST operation.  */
++
++void
++GOMP_doacross_post (long *counts)
++{
++  struct gomp_thread *thr = gomp_thread ();
++  struct gomp_work_share *ws = thr->ts.work_share;
++  struct gomp_doacross_work_share *doacross = ws->doacross;
++  unsigned long ent;
++  unsigned int i;
++
++  if (__builtin_expect (doacross == NULL, 0))
++    {
++      __sync_synchronize ();
++      return;
++    }
++
++  if (__builtin_expect (ws->sched == GFS_STATIC, 1))
++    ent = thr->ts.team_id;
++  else if (ws->sched == GFS_GUIDED)
++    ent = counts[0];
++  else
++    ent = counts[0] / doacross->chunk_size;
++  unsigned long *array = (unsigned long *) (doacross->array
++					    + ent * doacross->elt_sz);
++
++  if (__builtin_expect (doacross->flattened, 1))
++    {
++      unsigned long flattened
++	= (unsigned long) counts[0] << doacross->shift_counts[0];
++
++      for (i = 1; i < doacross->ncounts; i++)
++	flattened |= (unsigned long) counts[i]
++		     << doacross->shift_counts[i];
++      flattened++;
++      if (flattened == __atomic_load_n (array, MEMMODEL_ACQUIRE))
++	__atomic_thread_fence (MEMMODEL_RELEASE);
++      else
++	__atomic_store_n (array, flattened, MEMMODEL_RELEASE);
++      return;
++    }
++
++  __atomic_thread_fence (MEMMODEL_ACQUIRE);
++  for (i = doacross->ncounts; i-- > 0; )
++    {
++      if (counts[i] + 1UL != __atomic_load_n (&array[i], MEMMODEL_RELAXED))
++	__atomic_store_n (&array[i], counts[i] + 1UL, MEMMODEL_RELEASE);
++    }
++}
++
++/* DOACROSS WAIT operation.  */
++
++void
++GOMP_doacross_wait (long first, ...)
++{
++  struct gomp_thread *thr = gomp_thread ();
++  struct gomp_work_share *ws = thr->ts.work_share;
++  struct gomp_doacross_work_share *doacross = ws->doacross;
++  va_list ap;
++  unsigned long ent;
++  unsigned int i;
++
++  if (__builtin_expect (doacross == NULL, 0))
++    {
++      __sync_synchronize ();
++      return;
++    }
++
++  if (__builtin_expect (ws->sched == GFS_STATIC, 1))
++    {
++      if (ws->chunk_size == 0)
++	{
++	  if (first < doacross->boundary)
++	    ent = first / (doacross->q + 1);
++	  else
++	    ent = (first - doacross->boundary) / doacross->q
++		  + doacross->t;
++	}
++      else
++	ent = first / ws->chunk_size % thr->ts.team->nthreads;
++    }
++  else if (ws->sched == GFS_GUIDED)
++    ent = first;
++  else
++    ent = first / doacross->chunk_size;
++  unsigned long *array = (unsigned long *) (doacross->array
++					    + ent * doacross->elt_sz);
++
++  if (__builtin_expect (doacross->flattened, 1))
++    {
++      unsigned long flattened
++	= (unsigned long) first << doacross->shift_counts[0];
++      unsigned long cur;
++
++      va_start (ap, first);
++      for (i = 1; i < doacross->ncounts; i++)
++	flattened |= (unsigned long) va_arg (ap, long)
++		     << doacross->shift_counts[i];
++      cur = __atomic_load_n (array, MEMMODEL_ACQUIRE);
++      if (flattened < cur)
++	{
++	  __atomic_thread_fence (MEMMODEL_RELEASE);
++	  va_end (ap);
++	  return;
++	}
++      doacross_spin (array, flattened, cur);
++      __atomic_thread_fence (MEMMODEL_RELEASE);
++      va_end (ap);
++      return;
++    }
++
++  do
++    {
++      va_start (ap, first);
++      for (i = 0; i < doacross->ncounts; i++)
++	{
++	  unsigned long thisv
++	    = (unsigned long) (i ? va_arg (ap, long) : first) + 1;
++	  unsigned long cur = __atomic_load_n (&array[i], MEMMODEL_RELAXED);
++	  if (thisv < cur)
++	    {
++	      i = doacross->ncounts;
++	      break;
++	    }
++	  if (thisv > cur)
++	    break;
++	}
++      va_end (ap);
++      if (i == doacross->ncounts)
++	break;
++      cpu_relax ();
++    }
++  while (1);
++  __sync_synchronize ();
++}
++
++typedef unsigned long long gomp_ull;
++
++void
++gomp_doacross_ull_init (unsigned ncounts, gomp_ull *counts, gomp_ull chunk_size)
++{
++  struct gomp_thread *thr = gomp_thread ();
++  struct gomp_team *team = thr->ts.team;
++  struct gomp_work_share *ws = thr->ts.work_share;
++  unsigned int i, bits[MAX_COLLAPSED_BITS], num_bits = 0;
++  unsigned long ent, num_ents, elt_sz, shift_sz;
++  struct gomp_doacross_work_share *doacross;
++
++  if (team == NULL || team->nthreads == 1)
++    return;
++
++  for (i = 0; i < ncounts; i++)
++    {
++      /* If any count is 0, GOMP_doacross_{post,wait} can't be called.  */
++      if (counts[i] == 0)
++	return;
++
++      if (num_bits <= MAX_COLLAPSED_BITS)
++	{
++	  unsigned int this_bits;
++	  if (counts[i] == 1)
++	    this_bits = 1;
++	  else
++	    this_bits = __SIZEOF_LONG_LONG__ * __CHAR_BIT__
++			- __builtin_clzll (counts[i] - 1);
++	  if (num_bits + this_bits <= MAX_COLLAPSED_BITS)
++	    {
++	      bits[i] = this_bits;
++	      num_bits += this_bits;
++	    }
++	  else
++	    num_bits = MAX_COLLAPSED_BITS + 1;
++	}
++    }
++
++  if (ws->sched == GFS_STATIC)
++    num_ents = team->nthreads;
++  else if (ws->sched == GFS_GUIDED)
++    num_ents = counts[0];
++  else
++    num_ents = (counts[0] - 1) / chunk_size + 1;
++  if (num_bits <= MAX_COLLAPSED_BITS)
++    {
++      elt_sz = sizeof (unsigned long);
++      shift_sz = ncounts * sizeof (unsigned int);
++    }
++  else
++    {
++      if (sizeof (gomp_ull) == sizeof (unsigned long))
++	elt_sz = sizeof (gomp_ull) * ncounts;
++      else if (sizeof (gomp_ull) == 2 * sizeof (unsigned long))
++	elt_sz = sizeof (unsigned long) * 2 * ncounts;
++      else
++	abort ();
++      shift_sz = 0;
++    }
++  elt_sz = (elt_sz + 63) & ~63UL;
++
++  doacross = gomp_malloc (sizeof (*doacross) + 63 + num_ents * elt_sz
++			  + shift_sz);
++  doacross->chunk_size_ull = chunk_size;
++  doacross->elt_sz = elt_sz;
++  doacross->ncounts = ncounts;
++  doacross->flattened = false;
++  doacross->boundary = 0;
++  doacross->array = (unsigned char *)
++		    ((((uintptr_t) (doacross + 1)) + 63 + shift_sz)
++		     & ~(uintptr_t) 63);
++  if (num_bits <= MAX_COLLAPSED_BITS)
++    {
++      unsigned int shift_count = 0;
++      doacross->flattened = true;
++      for (i = ncounts; i > 0; i--)
++	{
++	  doacross->shift_counts[i - 1] = shift_count;
++	  shift_count += bits[i - 1];
++	}
++      for (ent = 0; ent < num_ents; ent++)
++	*(unsigned long *) (doacross->array + ent * elt_sz) = 0;
++    }
++  else
++    for (ent = 0; ent < num_ents; ent++)
++      memset (doacross->array + ent * elt_sz, '\0',
++	      sizeof (unsigned long) * ncounts);
++  if (ws->sched == GFS_STATIC && chunk_size == 0)
++    {
++      gomp_ull q = counts[0] / num_ents;
++      gomp_ull t = counts[0] % num_ents;
++      doacross->boundary_ull = t * (q + 1);
++      doacross->q_ull = q;
++      doacross->t = t;
++    }
++  ws->doacross = doacross;
++}
++
++/* DOACROSS POST operation.  */
++
++void
++GOMP_doacross_ull_post (gomp_ull *counts)
++{
++  struct gomp_thread *thr = gomp_thread ();
++  struct gomp_work_share *ws = thr->ts.work_share;
++  struct gomp_doacross_work_share *doacross = ws->doacross;
++  unsigned long ent;
++  unsigned int i;
++
++  if (__builtin_expect (doacross == NULL, 0))
++    {
++      __sync_synchronize ();
++      return;
++    }
++
++  if (__builtin_expect (ws->sched == GFS_STATIC, 1))
++    ent = thr->ts.team_id;
++  else if (ws->sched == GFS_GUIDED)
++    ent = counts[0];
++  else
++    ent = counts[0] / doacross->chunk_size_ull;
++
++  if (__builtin_expect (doacross->flattened, 1))
++    {
++      unsigned long *array = (unsigned long *) (doacross->array
++			      + ent * doacross->elt_sz);
++      gomp_ull flattened
++	= counts[0] << doacross->shift_counts[0];
++
++      for (i = 1; i < doacross->ncounts; i++)
++	flattened |= counts[i] << doacross->shift_counts[i];
++      flattened++;
++      if (flattened == __atomic_load_n (array, MEMMODEL_ACQUIRE))
++	__atomic_thread_fence (MEMMODEL_RELEASE);
++      else
++	__atomic_store_n (array, flattened, MEMMODEL_RELEASE);
++      return;
++    }
++
++  __atomic_thread_fence (MEMMODEL_ACQUIRE);
++  if (sizeof (gomp_ull) == sizeof (unsigned long))
++    {
++      gomp_ull *array = (gomp_ull *) (doacross->array
++				      + ent * doacross->elt_sz);
++
++      for (i = doacross->ncounts; i-- > 0; )
++	{
++	  if (counts[i] + 1UL != __atomic_load_n (&array[i], MEMMODEL_RELAXED))
++	    __atomic_store_n (&array[i], counts[i] + 1UL, MEMMODEL_RELEASE);
++	}
++    }
++  else
++    {
++      unsigned long *array = (unsigned long *) (doacross->array
++						+ ent * doacross->elt_sz);
++
++      for (i = doacross->ncounts; i-- > 0; )
++	{
++	  gomp_ull cull = counts[i] + 1UL;
++	  unsigned long c = (unsigned long) cull;
++	  if (c != __atomic_load_n (&array[2 * i + 1], MEMMODEL_RELAXED))
++	    __atomic_store_n (&array[2 * i + 1], c, MEMMODEL_RELEASE);
++	  c = cull >> (__SIZEOF_LONG_LONG__ * __CHAR_BIT__ / 2);
++	  if (c != __atomic_load_n (&array[2 * i], MEMMODEL_RELAXED))
++	    __atomic_store_n (&array[2 * i], c, MEMMODEL_RELEASE);
++	}
++    }
++}
++
++/* DOACROSS WAIT operation.  */
++
++void
++GOMP_doacross_ull_wait (gomp_ull first, ...)
++{
++  struct gomp_thread *thr = gomp_thread ();
++  struct gomp_work_share *ws = thr->ts.work_share;
++  struct gomp_doacross_work_share *doacross = ws->doacross;
++  va_list ap;
++  unsigned long ent;
++  unsigned int i;
++
++  if (__builtin_expect (doacross == NULL, 0))
++    {
++      __sync_synchronize ();
++      return;
++    }
++
++  if (__builtin_expect (ws->sched == GFS_STATIC, 1))
++    {
++      if (ws->chunk_size_ull == 0)
++	{
++	  if (first < doacross->boundary_ull)
++	    ent = first / (doacross->q_ull + 1);
++	  else
++	    ent = (first - doacross->boundary_ull) / doacross->q_ull
++		  + doacross->t;
++	}
++      else
++	ent = first / ws->chunk_size_ull % thr->ts.team->nthreads;
++    }
++  else if (ws->sched == GFS_GUIDED)
++    ent = first;
++  else
++    ent = first / doacross->chunk_size_ull;
++
++  if (__builtin_expect (doacross->flattened, 1))
++    {
++      unsigned long *array = (unsigned long *) (doacross->array
++						+ ent * doacross->elt_sz);
++      gomp_ull flattened = first << doacross->shift_counts[0];
++      unsigned long cur;
++
++      va_start (ap, first);
++      for (i = 1; i < doacross->ncounts; i++)
++	flattened |= va_arg (ap, gomp_ull)
++		     << doacross->shift_counts[i];
++      cur = __atomic_load_n (array, MEMMODEL_ACQUIRE);
++      if (flattened < cur)
++	{
++	  __atomic_thread_fence (MEMMODEL_RELEASE);
++	  va_end (ap);
++	  return;
++	}
++      doacross_spin (array, flattened, cur);
++      __atomic_thread_fence (MEMMODEL_RELEASE);
++      va_end (ap);
++      return;
++    }
++
++  if (sizeof (gomp_ull) == sizeof (unsigned long))
++    {
++      gomp_ull *array = (gomp_ull *) (doacross->array
++				      + ent * doacross->elt_sz);
++      do
++	{
++	  va_start (ap, first);
++	  for (i = 0; i < doacross->ncounts; i++)
++	    {
++	      gomp_ull thisv
++		= (i ? va_arg (ap, gomp_ull) : first) + 1;
++	      gomp_ull cur = __atomic_load_n (&array[i], MEMMODEL_RELAXED);
++	      if (thisv < cur)
++		{
++		  i = doacross->ncounts;
++		  break;
++		}
++	      if (thisv > cur)
++		break;
++	    }
++	  va_end (ap);
++	  if (i == doacross->ncounts)
++	    break;
++	  cpu_relax ();
++	}
++      while (1);
++    }
++  else
++    {
++      unsigned long *array = (unsigned long *) (doacross->array
++						+ ent * doacross->elt_sz);
++      do
++	{
++	  va_start (ap, first);
++	  for (i = 0; i < doacross->ncounts; i++)
++	    {
++	      gomp_ull thisv
++		= (i ? va_arg (ap, gomp_ull) : first) + 1;
++	      unsigned long t
++		= thisv >> (__SIZEOF_LONG_LONG__ * __CHAR_BIT__ / 2);
++	      unsigned long cur
++		= __atomic_load_n (&array[2 * i], MEMMODEL_RELAXED);
++	      if (t < cur)
++		{
++		  i = doacross->ncounts;
++		  break;
++		}
++	      if (t > cur)
++		break;
++	      t = thisv;
++	      cur = __atomic_load_n (&array[2 * i + 1], MEMMODEL_RELAXED);
++	      if (t < cur)
++		{
++		  i = doacross->ncounts;
++		  break;
++		}
++	      if (t > cur)
++		break;
++	    }
++	  va_end (ap);
++	  if (i == doacross->ncounts)
++	    break;
++	  cpu_relax ();
++	}
++      while (1);
++    }
++  __sync_synchronize ();
++}
+--- libgomp/loop.c.jj	2014-05-15 10:56:36.487505570 +0200
++++ libgomp/loop.c	2016-07-13 16:57:13.488423109 +0200
+@@ -110,6 +110,11 @@ gomp_loop_static_start (long start, long
+   return !gomp_iter_static_next (istart, iend);
+ }
+ 
++/* The current dynamic implementation is always monotonic.  The
++   entrypoints without nonmonotonic in them have to be always monotonic,
++   but the nonmonotonic ones could be changed to use work-stealing for
++   improved scalability.  */
++
+ static bool
+ gomp_loop_dynamic_start (long start, long end, long incr, long chunk_size,
+ 			 long *istart, long *iend)
+@@ -135,6 +140,9 @@ gomp_loop_dynamic_start (long start, lon
+   return ret;
+ }
+ 
++/* Similarly as for dynamic, though the question is how can the chunk sizes
++   be decreased without a central locking or atomics.  */
++
+ static bool
+ gomp_loop_guided_start (long start, long end, long incr, long chunk_size,
+ 			long *istart, long *iend)
+@@ -168,13 +176,16 @@ GOMP_loop_runtime_start (long start, lon
+   switch (icv->run_sched_var)
+     {
+     case GFS_STATIC:
+-      return gomp_loop_static_start (start, end, incr, icv->run_sched_modifier,
++      return gomp_loop_static_start (start, end, incr,
++				     icv->run_sched_chunk_size,
+ 				     istart, iend);
+     case GFS_DYNAMIC:
+-      return gomp_loop_dynamic_start (start, end, incr, icv->run_sched_modifier,
++      return gomp_loop_dynamic_start (start, end, incr,
++				      icv->run_sched_chunk_size,
+ 				      istart, iend);
+     case GFS_GUIDED:
+-      return gomp_loop_guided_start (start, end, incr, icv->run_sched_modifier,
++      return gomp_loop_guided_start (start, end, incr,
++				     icv->run_sched_chunk_size,
+ 				     istart, iend);
+     case GFS_AUTO:
+       /* For now map to schedule(static), later on we could play with feedback
+@@ -265,15 +276,15 @@ GOMP_loop_ordered_runtime_start (long st
+     {
+     case GFS_STATIC:
+       return gomp_loop_ordered_static_start (start, end, incr,
+-					     icv->run_sched_modifier,
++					     icv->run_sched_chunk_size,
+ 					     istart, iend);
+     case GFS_DYNAMIC:
+       return gomp_loop_ordered_dynamic_start (start, end, incr,
+-					      icv->run_sched_modifier,
++					      icv->run_sched_chunk_size,
+ 					      istart, iend);
+     case GFS_GUIDED:
+       return gomp_loop_ordered_guided_start (start, end, incr,
+-					     icv->run_sched_modifier,
++					     icv->run_sched_chunk_size,
+ 					     istart, iend);
+     case GFS_AUTO:
+       /* For now map to schedule(static), later on we could play with feedback
+@@ -285,6 +296,111 @@ GOMP_loop_ordered_runtime_start (long st
+     }
+ }
+ 
++/* The *_doacross_*_start routines are similar.  The only difference is that
++   this work-share construct is initialized to expect an ORDERED(N) - DOACROSS
++   section, and the worksharing loop iterates always from 0 to COUNTS[0] - 1
++   and other COUNTS array elements tell the library number of iterations
++   in the ordered inner loops.  */
++
++static bool
++gomp_loop_doacross_static_start (unsigned ncounts, long *counts,
++				 long chunk_size, long *istart, long *iend)
++{
++  struct gomp_thread *thr = gomp_thread ();
++
++  thr->ts.static_trip = 0;
++  if (gomp_work_share_start (false))
++    {
++      gomp_loop_init (thr->ts.work_share, 0, counts[0], 1,
++		      GFS_STATIC, chunk_size);
++      gomp_doacross_init (ncounts, counts, chunk_size);
++      gomp_work_share_init_done ();
++    }
++
++  return !gomp_iter_static_next (istart, iend);
++}
++
++static bool
++gomp_loop_doacross_dynamic_start (unsigned ncounts, long *counts,
++				  long chunk_size, long *istart, long *iend)
++{
++  struct gomp_thread *thr = gomp_thread ();
++  bool ret;
++
++  if (gomp_work_share_start (false))
++    {
++      gomp_loop_init (thr->ts.work_share, 0, counts[0], 1,
++		      GFS_DYNAMIC, chunk_size);
++      gomp_doacross_init (ncounts, counts, chunk_size);
++      gomp_work_share_init_done ();
++    }
++
++#ifdef HAVE_SYNC_BUILTINS
++  ret = gomp_iter_dynamic_next (istart, iend);
++#else
++  gomp_mutex_lock (&thr->ts.work_share->lock);
++  ret = gomp_iter_dynamic_next_locked (istart, iend);
++  gomp_mutex_unlock (&thr->ts.work_share->lock);
++#endif
++
++  return ret;
++}
++
++static bool
++gomp_loop_doacross_guided_start (unsigned ncounts, long *counts,
++				 long chunk_size, long *istart, long *iend)
++{
++  struct gomp_thread *thr = gomp_thread ();
++  bool ret;
++
++  if (gomp_work_share_start (false))
++    {
++      gomp_loop_init (thr->ts.work_share, 0, counts[0], 1,
++		      GFS_GUIDED, chunk_size);
++      gomp_doacross_init (ncounts, counts, chunk_size);
++      gomp_work_share_init_done ();
++    }
++
++#ifdef HAVE_SYNC_BUILTINS
++  ret = gomp_iter_guided_next (istart, iend);
++#else
++  gomp_mutex_lock (&thr->ts.work_share->lock);
++  ret = gomp_iter_guided_next_locked (istart, iend);
++  gomp_mutex_unlock (&thr->ts.work_share->lock);
++#endif
++
++  return ret;
++}
++
++bool
++GOMP_loop_doacross_runtime_start (unsigned ncounts, long *counts,
++				  long *istart, long *iend)
++{
++  struct gomp_task_icv *icv = gomp_icv (false);
++  switch (icv->run_sched_var)
++    {
++    case GFS_STATIC:
++      return gomp_loop_doacross_static_start (ncounts, counts,
++					      icv->run_sched_chunk_size,
++					      istart, iend);
++    case GFS_DYNAMIC:
++      return gomp_loop_doacross_dynamic_start (ncounts, counts,
++					       icv->run_sched_chunk_size,
++					       istart, iend);
++    case GFS_GUIDED:
++      return gomp_loop_doacross_guided_start (ncounts, counts,
++					      icv->run_sched_chunk_size,
++					      istart, iend);
++    case GFS_AUTO:
++      /* For now map to schedule(static), later on we could play with feedback
++	 driven choice.  */
++      return gomp_loop_doacross_static_start (ncounts, counts,
++					      0, istart, iend);
++    default:
++      abort ();
++    }
++}
++
+ /* The *_next routines are called when the thread completes processing of 
+    the iteration block currently assigned to it.  If the work-share 
+    construct is bound directly to a parallel construct, then the iteration
+@@ -483,7 +599,7 @@ GOMP_parallel_loop_runtime_start (void (
+ {
+   struct gomp_task_icv *icv = gomp_icv (false);
+   gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
+-			    icv->run_sched_var, icv->run_sched_modifier, 0);
++			    icv->run_sched_var, icv->run_sched_chunk_size, 0);
+ }
+ 
+ ialias_redirect (GOMP_parallel_end)
+@@ -521,6 +637,37 @@ GOMP_parallel_loop_guided (void (*fn) (v
+   GOMP_parallel_end ();
+ }
+ 
++#ifdef HAVE_ATTRIBUTE_ALIAS
++extern __typeof(GOMP_parallel_loop_dynamic) GOMP_parallel_loop_nonmonotonic_dynamic
++	__attribute__((alias ("GOMP_parallel_loop_dynamic")));
++extern __typeof(GOMP_parallel_loop_guided) GOMP_parallel_loop_nonmonotonic_guided
++	__attribute__((alias ("GOMP_parallel_loop_guided")));
++#else
++void
++GOMP_parallel_loop_nonmonotonic_dynamic (void (*fn) (void *), void *data,
++					 unsigned num_threads, long start,
++					 long end, long incr, long chunk_size,
++					 unsigned flags)
++{
++  gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
++			    GFS_DYNAMIC, chunk_size, flags);
++  fn (data);
++  GOMP_parallel_end ();
++}
++
++void
++GOMP_parallel_loop_nonmonotonic_guided (void (*fn) (void *), void *data,
++					unsigned num_threads, long start,
++					long end, long incr, long chunk_size,
++					unsigned flags)
++{
++  gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
++			    GFS_GUIDED, chunk_size, flags);
++  fn (data);
++  GOMP_parallel_end ();
++}
++#endif
++
+ void
+ GOMP_parallel_loop_runtime (void (*fn) (void *), void *data,
+ 			    unsigned num_threads, long start, long end,
+@@ -528,7 +675,7 @@ GOMP_parallel_loop_runtime (void (*fn) (
+ {
+   struct gomp_task_icv *icv = gomp_icv (false);
+   gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
+-			    icv->run_sched_var, icv->run_sched_modifier,
++			    icv->run_sched_var, icv->run_sched_chunk_size,
+ 			    flags);
+   fn (data);
+   GOMP_parallel_end ();
+@@ -569,6 +716,10 @@ extern __typeof(gomp_loop_dynamic_start)
+ 	__attribute__((alias ("gomp_loop_dynamic_start")));
+ extern __typeof(gomp_loop_guided_start) GOMP_loop_guided_start
+ 	__attribute__((alias ("gomp_loop_guided_start")));
++extern __typeof(gomp_loop_dynamic_start) GOMP_loop_nonmonotonic_dynamic_start
++	__attribute__((alias ("gomp_loop_dynamic_start")));
++extern __typeof(gomp_loop_guided_start) GOMP_loop_nonmonotonic_guided_start
++	__attribute__((alias ("gomp_loop_guided_start")));
+ 
+ extern __typeof(gomp_loop_ordered_static_start) GOMP_loop_ordered_static_start
+ 	__attribute__((alias ("gomp_loop_ordered_static_start")));
+@@ -577,12 +728,23 @@ extern __typeof(gomp_loop_ordered_dynami
+ extern __typeof(gomp_loop_ordered_guided_start) GOMP_loop_ordered_guided_start
+ 	__attribute__((alias ("gomp_loop_ordered_guided_start")));
+ 
++extern __typeof(gomp_loop_doacross_static_start) GOMP_loop_doacross_static_start
++	__attribute__((alias ("gomp_loop_doacross_static_start")));
++extern __typeof(gomp_loop_doacross_dynamic_start) GOMP_loop_doacross_dynamic_start
++	__attribute__((alias ("gomp_loop_doacross_dynamic_start")));
++extern __typeof(gomp_loop_doacross_guided_start) GOMP_loop_doacross_guided_start
++	__attribute__((alias ("gomp_loop_doacross_guided_start")));
++
+ extern __typeof(gomp_loop_static_next) GOMP_loop_static_next
+ 	__attribute__((alias ("gomp_loop_static_next")));
+ extern __typeof(gomp_loop_dynamic_next) GOMP_loop_dynamic_next
+ 	__attribute__((alias ("gomp_loop_dynamic_next")));
+ extern __typeof(gomp_loop_guided_next) GOMP_loop_guided_next
+ 	__attribute__((alias ("gomp_loop_guided_next")));
++extern __typeof(gomp_loop_dynamic_next) GOMP_loop_nonmonotonic_dynamic_next
++	__attribute__((alias ("gomp_loop_dynamic_next")));
++extern __typeof(gomp_loop_guided_next) GOMP_loop_nonmonotonic_guided_next
++	__attribute__((alias ("gomp_loop_guided_next")));
+ 
+ extern __typeof(gomp_loop_ordered_static_next) GOMP_loop_ordered_static_next
+ 	__attribute__((alias ("gomp_loop_ordered_static_next")));
+@@ -613,6 +775,21 @@ GOMP_loop_guided_start (long start, long
+ }
+ 
+ bool
++GOMP_loop_nonmonotonic_dynamic_start (long start, long end, long incr,
++				      long chunk_size, long *istart,
++				      long *iend)
++{
++  return gomp_loop_dynamic_start (start, end, incr, chunk_size, istart, iend);
++}
++
++bool
++GOMP_loop_nonmonotonic_guided_start (long start, long end, long incr,
++				     long chunk_size, long *istart, long *iend)
++{
++  return gomp_loop_guided_start (start, end, incr, chunk_size, istart, iend);
++}
++
++bool
+ GOMP_loop_ordered_static_start (long start, long end, long incr,
+ 				long chunk_size, long *istart, long *iend)
+ {
+@@ -637,6 +814,30 @@ GOMP_loop_ordered_guided_start (long sta
+ }
+ 
+ bool
++GOMP_loop_doacross_static_start (unsigned ncounts, long *counts,
++				 long chunk_size, long *istart, long *iend)
++{
++  return gomp_loop_doacross_static_start (ncounts, counts, chunk_size,
++					  istart, iend);
++}
++
++bool
++GOMP_loop_doacross_dynamic_start (unsigned ncounts, long *counts,
++				  long chunk_size, long *istart, long *iend)
++{
++  return gomp_loop_doacross_dynamic_start (ncounts, counts, chunk_size,
++					   istart, iend);
++}
++
++bool
++GOMP_loop_doacross_guided_start (unsigned ncounts, long *counts,
++				 long chunk_size, long *istart, long *iend)
++{
++  return gomp_loop_doacross_guided_start (ncounts, counts, chunk_size,
++					  istart, iend);
++}
++
++bool
+ GOMP_loop_static_next (long *istart, long *iend)
+ {
+   return gomp_loop_static_next (istart, iend);
+@@ -653,6 +854,18 @@ GOMP_loop_guided_next (long *istart, lon
+ {
+   return gomp_loop_guided_next (istart, iend);
+ }
++
++bool
++GOMP_loop_nonmonotonic_dynamic_next (long *istart, long *iend)
++{
++  return gomp_loop_dynamic_next (istart, iend);
++}
++
++bool
++GOMP_loop_nonmonotonic_guided_next (long *istart, long *iend)
++{
++  return gomp_loop_guided_next (istart, iend);
++}
+ 
+ bool
+ GOMP_loop_ordered_static_next (long *istart, long *iend)
+--- libgomp/error.c.jj	2013-01-21 16:00:31.834953566 +0100
++++ libgomp/error.c	2016-07-13 16:57:04.437535335 +0200
+@@ -35,7 +35,26 @@
+ #include <stdlib.h>
+ 
+ 
+-static void
++#undef gomp_vdebug
++void
++gomp_vdebug (int kind __attribute__ ((unused)), const char *msg, va_list list)
++{
++  if (gomp_debug_var)
++    vfprintf (stderr, msg, list);
++}
++
++#undef gomp_debug
++void
++gomp_debug (int kind, const char *msg, ...)
++{
++  va_list list;
++
++  va_start (list, msg);
++  gomp_vdebug (kind, msg, list);
++  va_end (list);
++}
++
++void
+ gomp_verror (const char *fmt, va_list list)
+ {
+   fputs ("\nlibgomp: ", stderr);
+@@ -54,13 +73,18 @@ gomp_error (const char *fmt, ...)
+ }
+ 
+ void
++gomp_vfatal (const char *fmt, va_list list)
++{
++  gomp_verror (fmt, list);
++  exit (EXIT_FAILURE);
++}
++
++void
+ gomp_fatal (const char *fmt, ...)
+ {
+   va_list list;
+ 
+   va_start (list, fmt);
+-  gomp_verror (fmt, list);
++  gomp_vfatal (fmt, list);
+   va_end (list);
+-
+-  exit (EXIT_FAILURE);
+ }
+--- libgomp/Makefile.am.jj	2014-05-15 11:12:10.000000000 +0200
++++ libgomp/Makefile.am	2016-07-14 16:10:51.968202878 +0200
+@@ -60,7 +60,13 @@ libgomp_la_LINK = $(LINK) $(libgomp_la_L
+ libgomp_la_SOURCES = alloc.c barrier.c critical.c env.c error.c iter.c \
+ 	iter_ull.c loop.c loop_ull.c ordered.c parallel.c sections.c single.c \
+ 	task.c team.c work.c lock.c mutex.c proc.c sem.c bar.c ptrlock.c \
+-	time.c fortran.c affinity.c target.c
++	time.c fortran.c affinity.c target.c splay-tree.c libgomp-plugin.c \
++	oacc-parallel.c oacc-host.c oacc-init.c oacc-mem.c oacc-async.c \
++	oacc-plugin.c oacc-cuda.c priority_queue.c
++
++if USE_FORTRAN
++libgomp_la_SOURCES += openacc.f90
++endif
+ 
+ nodist_noinst_HEADERS = libgomp_f.h
+ nodist_libsubinclude_HEADERS = omp.h
+--- libgomp/Makefile.in.jj	2014-05-15 11:12:10.000000000 +0200
++++ libgomp/Makefile.in	2016-07-14 16:11:10.981954087 +0200
+@@ -36,6 +36,7 @@ POST_UNINSTALL = :
+ build_triplet = @build@
+ host_triplet = @host@
+ target_triplet = @target@
++@USE_FORTRAN_TRUE@am__append_1 = openacc.f90
+ subdir = .
+ DIST_COMMON = ChangeLog $(srcdir)/Makefile.in $(srcdir)/Makefile.am \
+ 	$(top_srcdir)/configure $(am__configure_deps) \
+@@ -92,11 +93,15 @@ am__installdirs = "$(DESTDIR)$(toolexecl
+ 	"$(DESTDIR)$(toolexeclibdir)"
+ LTLIBRARIES = $(toolexeclib_LTLIBRARIES)
+ libgomp_la_LIBADD =
++@USE_FORTRAN_TRUE@am__objects_1 = openacc.lo
+ am_libgomp_la_OBJECTS = alloc.lo barrier.lo critical.lo env.lo \
+ 	error.lo iter.lo iter_ull.lo loop.lo loop_ull.lo ordered.lo \
+ 	parallel.lo sections.lo single.lo task.lo team.lo work.lo \
+ 	lock.lo mutex.lo proc.lo sem.lo bar.lo ptrlock.lo time.lo \
+-	fortran.lo affinity.lo target.lo
++	fortran.lo affinity.lo target.lo splay-tree.lo \
++	libgomp-plugin.lo oacc-parallel.lo oacc-host.lo oacc-init.lo \
++	oacc-mem.lo oacc-async.lo oacc-plugin.lo oacc-cuda.lo \
++	priority_queue.lo $(am__objects_1)
+ libgomp_la_OBJECTS = $(am_libgomp_la_OBJECTS)
+ DEFAULT_INCLUDES = -I.@am__isrc@
+ depcomp = $(SHELL) $(top_srcdir)/../depcomp
+@@ -108,6 +113,13 @@ LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIB
+ 	--mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+ 	$(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+ CCLD = $(CC)
++FCCOMPILE = $(FC) $(AM_FCFLAGS) $(FCFLAGS)
++LTFCCOMPILE = $(LIBTOOL) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
++	--mode=compile $(FC) $(AM_FCFLAGS) $(FCFLAGS)
++FCLD = $(FC)
++FCLINK = $(LIBTOOL) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
++	--mode=link $(FCLD) $(AM_FCFLAGS) $(FCFLAGS) $(AM_LDFLAGS) \
++	$(LDFLAGS) -o $@
+ SOURCES = $(libgomp_la_SOURCES)
+ MULTISRCTOP = 
+ MULTIBUILDTOP = 
+@@ -315,10 +327,12 @@ libgomp_la_LDFLAGS = $(libgomp_version_i
+ libgomp_la_DEPENDENCIES = $(libgomp_version_dep)
+ libgomp_la_LINK = $(LINK) $(libgomp_la_LDFLAGS)
+ libgomp_la_SOURCES = alloc.c barrier.c critical.c env.c error.c iter.c \
+-	iter_ull.c loop.c loop_ull.c ordered.c parallel.c sections.c single.c \
+-	task.c team.c work.c lock.c mutex.c proc.c sem.c bar.c ptrlock.c \
+-	time.c fortran.c affinity.c target.c
+-
++	iter_ull.c loop.c loop_ull.c ordered.c parallel.c sections.c \
++	single.c task.c team.c work.c lock.c mutex.c proc.c sem.c \
++	bar.c ptrlock.c time.c fortran.c affinity.c target.c \
++	splay-tree.c libgomp-plugin.c oacc-parallel.c oacc-host.c \
++	oacc-init.c oacc-mem.c oacc-async.c oacc-plugin.c oacc-cuda.c \
++	priority_queue.c $(am__append_1)
+ nodist_noinst_HEADERS = libgomp_f.h
+ nodist_libsubinclude_HEADERS = omp.h
+ @USE_FORTRAN_TRUE@nodist_finclude_HEADERS = omp_lib.h omp_lib.f90 omp_lib.mod omp_lib_kinds.mod
+@@ -351,7 +365,7 @@ all: config.h
+ 	$(MAKE) $(AM_MAKEFLAGS) all-recursive
+ 
+ .SUFFIXES:
+-.SUFFIXES: .c .dvi .lo .o .obj .ps
++.SUFFIXES: .c .dvi .f90 .lo .o .obj .ps
+ am--refresh:
+ 	@:
+ $(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am  $(am__configure_deps)
+@@ -463,17 +477,27 @@ distclean-compile:
+ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fortran.Plo@am__quote@
+ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/iter.Plo@am__quote@
+ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/iter_ull.Plo@am__quote@
++@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libgomp-plugin.Plo@am__quote@
+ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lock.Plo@am__quote@
+ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/loop.Plo@am__quote@
+ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/loop_ull.Plo@am__quote@
+ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mutex.Plo@am__quote@
++@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-async.Plo@am__quote@
++@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-cuda.Plo@am__quote@
++@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-host.Plo@am__quote@
++@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-init.Plo@am__quote@
++@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-mem.Plo@am__quote@
++@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-parallel.Plo@am__quote@
++@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-plugin.Plo@am__quote@
+ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ordered.Plo@am__quote@
+ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/parallel.Plo@am__quote@
++@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/priority_queue.Plo@am__quote@
+ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/proc.Plo@am__quote@
+ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ptrlock.Plo@am__quote@
+ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sections.Plo@am__quote@
+ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sem.Plo@am__quote@
+ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/single.Plo@am__quote@
++@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/splay-tree.Plo@am__quote@
+ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/target.Plo@am__quote@
+ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/task.Plo@am__quote@
+ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/team.Plo@am__quote@
+@@ -501,6 +525,15 @@ distclean-compile:
+ @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ @am__fastdepCC_FALSE@	$(LTCOMPILE) -c -o $@ $<
+ 
++.f90.o:
++	$(FCCOMPILE) -c -o $@ $<
++
++.f90.obj:
++	$(FCCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
++
++.f90.lo:
++	$(LTFCCOMPILE) -c -o $@ $<
++
+ mostlyclean-libtool:
+ 	-rm -f *.lo
+ 
+--- libgomp/task.c.jj	2014-08-06 16:25:16.575091658 +0200
++++ libgomp/task.c	2016-07-13 17:47:58.722758497 +0200
+@@ -28,6 +28,7 @@
+ #include "libgomp.h"
+ #include <stdlib.h>
+ #include <string.h>
++#include "gomp-constants.h"
+ 
+ typedef struct gomp_task_depend_entry *hash_entry_type;
+ 
+@@ -63,6 +64,14 @@ void
+ gomp_init_task (struct gomp_task *task, struct gomp_task *parent_task,
+ 		struct gomp_task_icv *prev_icv)
+ {
++  /* It would seem that using memset here would be a win, but it turns
++     out that partially filling gomp_task allows us to keep the
++     overhead of task creation low.  In the nqueens-1.c test, for a
++     sufficiently large N, we drop the overhead from 5-6% to 1%.
++
++     Note, the nqueens-1.c test in serial mode is a good test to
++     benchmark the overhead of creating tasks as there are millions of
++     tiny tasks created that all run undeferred.  */
+   task->parent = parent_task;
+   task->icv = *prev_icv;
+   task->kind = GOMP_TASK_IMPLICIT;
+@@ -71,7 +80,7 @@ gomp_init_task (struct gomp_task *task,
+   task->final_task = false;
+   task->copy_ctors_done = false;
+   task->parent_depends_on = false;
+-  task->children = NULL;
++  priority_queue_init (&task->children_queue);
+   task->taskgroup = NULL;
+   task->dependers = NULL;
+   task->depend_hash = NULL;
+@@ -90,30 +99,194 @@ gomp_end_task (void)
+   thr->task = task->parent;
+ }
+ 
++/* Clear the parent field of every task in LIST.  */
++
+ static inline void
+-gomp_clear_parent (struct gomp_task *children)
++gomp_clear_parent_in_list (struct priority_list *list)
+ {
+-  struct gomp_task *task = children;
+-
+-  if (task)
++  struct priority_node *p = list->tasks;
++  if (p)
+     do
+       {
+-	task->parent = NULL;
+-	task = task->next_child;
++	priority_node_to_task (PQ_CHILDREN, p)->parent = NULL;
++	p = p->next;
+       }
+-    while (task != children);
++    while (p != list->tasks);
++}
++
++/* Splay tree version of gomp_clear_parent_in_list.
++
++   Clear the parent field of every task in NODE within SP, and free
++   the node when done.  */
++
++static void
++gomp_clear_parent_in_tree (prio_splay_tree sp, prio_splay_tree_node node)
++{
++  if (!node)
++    return;
++  prio_splay_tree_node left = node->left, right = node->right;
++  gomp_clear_parent_in_list (&node->key.l);
++#if _LIBGOMP_CHECKING_
++  memset (node, 0xaf, sizeof (*node));
++#endif
++  /* No need to remove the node from the tree.  We're nuking
++     everything, so just free the nodes and our caller can clear the
++     entire splay tree.  */
++  free (node);
++  gomp_clear_parent_in_tree (sp, left);
++  gomp_clear_parent_in_tree (sp, right);
++}
++
++/* Clear the parent field of every task in Q and remove every task
++   from Q.  */
++
++static inline void
++gomp_clear_parent (struct priority_queue *q)
++{
++  if (priority_queue_multi_p (q))
++    {
++      gomp_clear_parent_in_tree (&q->t, q->t.root);
++      /* All the nodes have been cleared in gomp_clear_parent_in_tree.
++	 No need to remove anything.  We can just nuke everything.  */
++      q->t.root = NULL;
++    }
++  else
++    gomp_clear_parent_in_list (&q->l);
+ }
+ 
+-static void gomp_task_maybe_wait_for_dependencies (void **depend);
++/* Helper function for GOMP_task and gomp_create_target_task.
++
++   For a TASK with in/out dependencies, fill in the various dependency
++   queues.  PARENT is the parent of said task.  DEPEND is as in
++   GOMP_task.  */
++
++static void
++gomp_task_handle_depend (struct gomp_task *task, struct gomp_task *parent,
++			 void **depend)
++{
++  size_t ndepend = (uintptr_t) depend[0];
++  size_t nout = (uintptr_t) depend[1];
++  size_t i;
++  hash_entry_type ent;
++
++  task->depend_count = ndepend;
++  task->num_dependees = 0;
++  if (parent->depend_hash == NULL)
++    parent->depend_hash = htab_create (2 * ndepend > 12 ? 2 * ndepend : 12);
++  for (i = 0; i < ndepend; i++)
++    {
++      task->depend[i].addr = depend[2 + i];
++      task->depend[i].next = NULL;
++      task->depend[i].prev = NULL;
++      task->depend[i].task = task;
++      task->depend[i].is_in = i >= nout;
++      task->depend[i].redundant = false;
++      task->depend[i].redundant_out = false;
++
++      hash_entry_type *slot = htab_find_slot (&parent->depend_hash,
++					      &task->depend[i], INSERT);
++      hash_entry_type out = NULL, last = NULL;
++      if (*slot)
++	{
++	  /* If multiple depends on the same task are the same, all but the
++	     first one are redundant.  As inout/out come first, if any of them
++	     is inout/out, it will win, which is the right semantics.  */
++	  if ((*slot)->task == task)
++	    {
++	      task->depend[i].redundant = true;
++	      continue;
++	    }
++	  for (ent = *slot; ent; ent = ent->next)
++	    {
++	      if (ent->redundant_out)
++		break;
++
++	      last = ent;
++
++	      /* depend(in:...) doesn't depend on earlier depend(in:...).  */
++	      if (i >= nout && ent->is_in)
++		continue;
++
++	      if (!ent->is_in)
++		out = ent;
++
++	      struct gomp_task *tsk = ent->task;
++	      if (tsk->dependers == NULL)
++		{
++		  tsk->dependers
++		    = gomp_malloc (sizeof (struct gomp_dependers_vec)
++				   + 6 * sizeof (struct gomp_task *));
++		  tsk->dependers->n_elem = 1;
++		  tsk->dependers->allocated = 6;
++		  tsk->dependers->elem[0] = task;
++		  task->num_dependees++;
++		  continue;
++		}
++	      /* We already have some other dependency on tsk from earlier
++		 depend clause.  */
++	      else if (tsk->dependers->n_elem
++		       && (tsk->dependers->elem[tsk->dependers->n_elem - 1]
++			   == task))
++		continue;
++	      else if (tsk->dependers->n_elem == tsk->dependers->allocated)
++		{
++		  tsk->dependers->allocated
++		    = tsk->dependers->allocated * 2 + 2;
++		  tsk->dependers
++		    = gomp_realloc (tsk->dependers,
++				    sizeof (struct gomp_dependers_vec)
++				    + (tsk->dependers->allocated
++				       * sizeof (struct gomp_task *)));
++		}
++	      tsk->dependers->elem[tsk->dependers->n_elem++] = task;
++	      task->num_dependees++;
++	    }
++	  task->depend[i].next = *slot;
++	  (*slot)->prev = &task->depend[i];
++	}
++      *slot = &task->depend[i];
++
++      /* There is no need to store more than one depend({,in}out:) task per
++	 address in the hash table chain for the purpose of creation of
++	 deferred tasks, because each out depends on all earlier outs, thus it
++	 is enough to record just the last depend({,in}out:).  For depend(in:),
++	 we need to keep all of the previous ones not terminated yet, because
++	 a later depend({,in}out:) might need to depend on all of them.  So, if
++	 the new task's clause is depend({,in}out:), we know there is at most
++	 one other depend({,in}out:) clause in the list (out).  For
++	 non-deferred tasks we want to see all outs, so they are moved to the
++	 end of the chain, after first redundant_out entry all following
++	 entries should be redundant_out.  */
++      if (!task->depend[i].is_in && out)
++	{
++	  if (out != last)
++	    {
++	      out->next->prev = out->prev;
++	      out->prev->next = out->next;
++	      out->next = last->next;
++	      out->prev = last;
++	      last->next = out;
++	      if (out->next)
++		out->next->prev = out;
++	    }
++	  out->redundant_out = true;
++	}
++    }
++}
+ 
+ /* Called when encountering an explicit task directive.  If IF_CLAUSE is
+    false, then we must not delay in executing the task.  If UNTIED is true,
+-   then the task may be executed by any member of the team.  */
++   then the task may be executed by any member of the team.
++
++   DEPEND is an array containing:
++	depend[0]: number of depend elements.
++	depend[1]: number of depend elements of type "out".
++	depend[2..N+1]: address of [1..N]th depend element.  */
+ 
+ void
+ GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
+ 	   long arg_size, long arg_align, bool if_clause, unsigned flags,
+-	   void **depend)
++	   void **depend, int priority)
+ {
+   struct gomp_thread *thr = gomp_thread ();
+   struct gomp_team *team = thr->ts.team;
+@@ -125,8 +298,7 @@ GOMP_task (void (*fn) (void *), void *da
+      might be running on different thread than FN.  */
+   if (cpyfn)
+     if_clause = false;
+-  if (flags & 1)
+-    flags &= ~1;
++  flags &= ~GOMP_TASK_FLAG_UNTIED;
+ #endif
+ 
+   /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
+@@ -135,6 +307,11 @@ GOMP_task (void (*fn) (void *), void *da
+ 	  || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
+     return;
+ 
++  if ((flags & GOMP_TASK_FLAG_PRIORITY) == 0)
++    priority = 0;
++  else if (priority > gomp_max_task_priority_var)
++    priority = gomp_max_task_priority_var;
++
+   if (!if_clause || team == NULL
+       || (thr->task && thr->task->final_task)
+       || team->task_count > 64 * team->nthreads)
+@@ -147,12 +324,15 @@ GOMP_task (void (*fn) (void *), void *da
+ 	 depend clauses for non-deferred tasks other than this, because
+ 	 the parent task is suspended until the child task finishes and thus
+ 	 it can't start further child tasks.  */
+-      if ((flags & 8) && thr->task && thr->task->depend_hash)
++      if ((flags & GOMP_TASK_FLAG_DEPEND)
++	  && thr->task && thr->task->depend_hash)
+ 	gomp_task_maybe_wait_for_dependencies (depend);
+ 
+       gomp_init_task (&task, thr->task, gomp_icv (false));
+-      task.kind = GOMP_TASK_IFFALSE;
+-      task.final_task = (thr->task && thr->task->final_task) || (flags & 2);
++      task.kind = GOMP_TASK_UNDEFERRED;
++      task.final_task = (thr->task && thr->task->final_task)
++			|| (flags & GOMP_TASK_FLAG_FINAL);
++      task.priority = priority;
+       if (thr->task)
+ 	{
+ 	  task.in_tied_task = thr->task->in_tied_task;
+@@ -178,10 +358,10 @@ GOMP_task (void (*fn) (void *), void *da
+ 	 child thread, but seeing a stale non-NULL value is not a
+ 	 problem.  Once past the task_lock acquisition, this thread
+ 	 will see the real value of task.children.  */
+-      if (task.children != NULL)
++      if (!priority_queue_empty_p (&task.children_queue, MEMMODEL_RELAXED))
+ 	{
+ 	  gomp_mutex_lock (&team->task_lock);
+-	  gomp_clear_parent (task.children);
++	  gomp_clear_parent (&task.children_queue);
+ 	  gomp_mutex_unlock (&team->task_lock);
+ 	}
+       gomp_end_task ();
+@@ -195,7 +375,7 @@ GOMP_task (void (*fn) (void *), void *da
+       bool do_wake;
+       size_t depend_size = 0;
+ 
+-      if (flags & 8)
++      if (flags & GOMP_TASK_FLAG_DEPEND)
+ 	depend_size = ((uintptr_t) depend[0]
+ 		       * sizeof (struct gomp_task_depend_entry));
+       task = gomp_malloc (sizeof (*task) + depend_size
+@@ -203,7 +383,8 @@ GOMP_task (void (*fn) (void *), void *da
+       arg = (char *) (((uintptr_t) (task + 1) + depend_size + arg_align - 1)
+ 		      & ~(uintptr_t) (arg_align - 1));
+       gomp_init_task (task, parent, gomp_icv (false));
+-      task->kind = GOMP_TASK_IFFALSE;
++      task->priority = priority;
++      task->kind = GOMP_TASK_UNDEFERRED;
+       task->in_tied_task = parent->in_tied_task;
+       task->taskgroup = taskgroup;
+       thr->task = task;
+@@ -218,7 +399,7 @@ GOMP_task (void (*fn) (void *), void *da
+       task->kind = GOMP_TASK_WAITING;
+       task->fn = fn;
+       task->fn_data = arg;
+-      task->final_task = (flags & 2) >> 1;
++      task->final_task = (flags & GOMP_TASK_FLAG_FINAL) >> 1;
+       gomp_mutex_lock (&team->task_lock);
+       /* If parallel or taskgroup has been cancelled, don't start new
+ 	 tasks.  */
+@@ -235,171 +416,39 @@ GOMP_task (void (*fn) (void *), void *da
+ 	taskgroup->num_children++;
+       if (depend_size)
+ 	{
+-	  size_t ndepend = (uintptr_t) depend[0];
+-	  size_t nout = (uintptr_t) depend[1];
+-	  size_t i;
+-	  hash_entry_type ent;
+-
+-	  task->depend_count = ndepend;
+-	  task->num_dependees = 0;
+-	  if (parent->depend_hash == NULL)
+-	    parent->depend_hash
+-	      = htab_create (2 * ndepend > 12 ? 2 * ndepend : 12);
+-	  for (i = 0; i < ndepend; i++)
+-	    {
+-	      task->depend[i].addr = depend[2 + i];
+-	      task->depend[i].next = NULL;
+-	      task->depend[i].prev = NULL;
+-	      task->depend[i].task = task;
+-	      task->depend[i].is_in = i >= nout;
+-	      task->depend[i].redundant = false;
+-	      task->depend[i].redundant_out = false;
+-
+-	      hash_entry_type *slot
+-		= htab_find_slot (&parent->depend_hash, &task->depend[i],
+-				  INSERT);
+-	      hash_entry_type out = NULL, last = NULL;
+-	      if (*slot)
+-		{
+-		  /* If multiple depends on the same task are the
+-		     same, all but the first one are redundant.
+-		     As inout/out come first, if any of them is
+-		     inout/out, it will win, which is the right
+-		     semantics.  */
+-		  if ((*slot)->task == task)
+-		    {
+-		      task->depend[i].redundant = true;
+-		      continue;
+-		    }
+-		  for (ent = *slot; ent; ent = ent->next)
+-		    {
+-		      if (ent->redundant_out)
+-			break;
+-
+-		      last = ent;
+-
+-		      /* depend(in:...) doesn't depend on earlier
+-			 depend(in:...).  */
+-		      if (i >= nout && ent->is_in)
+-			continue;
+-
+-		      if (!ent->is_in)
+-			out = ent;
+-
+-		      struct gomp_task *tsk = ent->task;
+-		      if (tsk->dependers == NULL)
+-			{
+-			  tsk->dependers
+-			    = gomp_malloc (sizeof (struct gomp_dependers_vec)
+-					   + 6 * sizeof (struct gomp_task *));
+-			  tsk->dependers->n_elem = 1;
+-			  tsk->dependers->allocated = 6;
+-			  tsk->dependers->elem[0] = task;
+-			  task->num_dependees++;
+-			  continue;
+-			}
+-		      /* We already have some other dependency on tsk
+-			 from earlier depend clause.  */
+-		      else if (tsk->dependers->n_elem
+-			       && (tsk->dependers->elem[tsk->dependers->n_elem
+-							- 1]
+-				   == task))
+-			continue;
+-		      else if (tsk->dependers->n_elem
+-			       == tsk->dependers->allocated)
+-			{
+-			  tsk->dependers->allocated
+-			    = tsk->dependers->allocated * 2 + 2;
+-			  tsk->dependers
+-			    = gomp_realloc (tsk->dependers,
+-					    sizeof (struct gomp_dependers_vec)
+-					    + (tsk->dependers->allocated
+-					       * sizeof (struct gomp_task *)));
+-			}
+-		      tsk->dependers->elem[tsk->dependers->n_elem++] = task;
+-		      task->num_dependees++;
+-		    }
+-		  task->depend[i].next = *slot;
+-		  (*slot)->prev = &task->depend[i];
+-		}
+-	      *slot = &task->depend[i];
+-
+-	      /* There is no need to store more than one depend({,in}out:)
+-		 task per address in the hash table chain for the purpose
+-		 of creation of deferred tasks, because each out
+-		 depends on all earlier outs, thus it is enough to record
+-		 just the last depend({,in}out:).  For depend(in:), we need
+-		 to keep all of the previous ones not terminated yet, because
+-		 a later depend({,in}out:) might need to depend on all of
+-		 them.  So, if the new task's clause is depend({,in}out:),
+-		 we know there is at most one other depend({,in}out:) clause
+-		 in the list (out).  For non-deferred tasks we want to see
+-		 all outs, so they are moved to the end of the chain,
+-		 after first redundant_out entry all following entries
+-		 should be redundant_out.  */
+-	      if (!task->depend[i].is_in && out)
+-		{
+-		  if (out != last)
+-		    {
+-		      out->next->prev = out->prev;
+-		      out->prev->next = out->next;
+-		      out->next = last->next;
+-		      out->prev = last;
+-		      last->next = out;
+-		      if (out->next)
+-			out->next->prev = out;
+-		    }
+-		  out->redundant_out = true;
+-		}
+-	    }
++	  gomp_task_handle_depend (task, parent, depend);
+ 	  if (task->num_dependees)
+ 	    {
++	      /* Tasks that depend on other tasks are not put into the
++		 various waiting queues, so we are done for now.  Said
++		 tasks are instead put into the queues via
++		 gomp_task_run_post_handle_dependers() after their
++		 dependencies have been satisfied.  After which, they
++		 can be picked up by the various scheduling
++		 points.  */
+ 	      gomp_mutex_unlock (&team->task_lock);
+ 	      return;
+ 	    }
+ 	}
+-      if (parent->children)
+-	{
+-	  task->next_child = parent->children;
+-	  task->prev_child = parent->children->prev_child;
+-	  task->next_child->prev_child = task;
+-	  task->prev_child->next_child = task;
+-	}
+-      else
+-	{
+-	  task->next_child = task;
+-	  task->prev_child = task;
+-	}
+-      parent->children = task;
++
++      priority_queue_insert (PQ_CHILDREN, &parent->children_queue,
++			     task, priority,
++			     PRIORITY_INSERT_BEGIN,
++			     /*adjust_parent_depends_on=*/false,
++			     task->parent_depends_on);
+       if (taskgroup)
+-	{
+-	  if (taskgroup->children)
+-	    {
+-	      task->next_taskgroup = taskgroup->children;
+-	      task->prev_taskgroup = taskgroup->children->prev_taskgroup;
+-	      task->next_taskgroup->prev_taskgroup = task;
+-	      task->prev_taskgroup->next_taskgroup = task;
+-	    }
+-	  else
+-	    {
+-	      task->next_taskgroup = task;
+-	      task->prev_taskgroup = task;
+-	    }
+-	  taskgroup->children = task;
+-	}
+-      if (team->task_queue)
+-	{
+-	  task->next_queue = team->task_queue;
+-	  task->prev_queue = team->task_queue->prev_queue;
+-	  task->next_queue->prev_queue = task;
+-	  task->prev_queue->next_queue = task;
+-	}
+-      else
+-	{
+-	  task->next_queue = task;
+-	  task->prev_queue = task;
+-	  team->task_queue = task;
+-	}
++	priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
++			       task, priority,
++			       PRIORITY_INSERT_BEGIN,
++			       /*adjust_parent_depends_on=*/false,
++			       task->parent_depends_on);
++
++      priority_queue_insert (PQ_TEAM, &team->task_queue,
++			     task, priority,
++			     PRIORITY_INSERT_END,
++			     /*adjust_parent_depends_on=*/false,
++			     task->parent_depends_on);
++
+       ++team->task_count;
+       ++team->task_queued_count;
+       gomp_team_barrier_set_task_pending (&team->barrier);
+@@ -411,36 +460,529 @@ GOMP_task (void (*fn) (void *), void *da
+     }
+ }
+ 
+-static inline bool
+-gomp_task_run_pre (struct gomp_task *child_task, struct gomp_task *parent,
+-		   struct gomp_taskgroup *taskgroup, struct gomp_team *team)
++ialias (GOMP_taskgroup_start)
++ialias (GOMP_taskgroup_end)
++
++#define TYPE long
++#define UTYPE unsigned long
++#define TYPE_is_long 1
++#include "taskloop.c"
++#undef TYPE
++#undef UTYPE
++#undef TYPE_is_long
++
++#define TYPE unsigned long long
++#define UTYPE TYPE
++#define GOMP_taskloop GOMP_taskloop_ull
++#include "taskloop.c"
++#undef TYPE
++#undef UTYPE
++#undef GOMP_taskloop
++
++static void inline
++priority_queue_move_task_first (enum priority_queue_type type,
++				struct priority_queue *head,
++				struct gomp_task *task)
+ {
++#if _LIBGOMP_CHECKING_
++  if (!priority_queue_task_in_queue_p (type, head, task))
++    gomp_fatal ("Attempt to move first missing task %p", task);
++#endif
++  struct priority_list *list;
++  if (priority_queue_multi_p (head))
++    {
++      list = priority_queue_lookup_priority (head, task->priority);
++#if _LIBGOMP_CHECKING_
++      if (!list)
++	gomp_fatal ("Unable to find priority %d", task->priority);
++#endif
++    }
++  else
++    list = &head->l;
++  priority_list_remove (list, task_to_priority_node (type, task), 0);
++  priority_list_insert (type, list, task, task->priority,
++			PRIORITY_INSERT_BEGIN, type == PQ_CHILDREN,
++			task->parent_depends_on);
++}
++
++/* Actual body of GOMP_PLUGIN_target_task_completion that is executed
++   with team->task_lock held, or is executed in the thread that called
++   gomp_target_task_fn if GOMP_PLUGIN_target_task_completion has been
++   run before it acquires team->task_lock.  */
++
++static void
++gomp_target_task_completion (struct gomp_team *team, struct gomp_task *task)
++{
++  struct gomp_task *parent = task->parent;
+   if (parent)
++    priority_queue_move_task_first (PQ_CHILDREN, &parent->children_queue,
++				    task);
++
++  struct gomp_taskgroup *taskgroup = task->taskgroup;
++  if (taskgroup)
++    priority_queue_move_task_first (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
++				    task);
++
++  priority_queue_insert (PQ_TEAM, &team->task_queue, task, task->priority,
++			 PRIORITY_INSERT_BEGIN, false,
++			 task->parent_depends_on);
++  task->kind = GOMP_TASK_WAITING;
++  if (parent && parent->taskwait)
+     {
+-      if (parent->children == child_task)
+-	parent->children = child_task->next_child;
+-      if (__builtin_expect (child_task->parent_depends_on, 0)
+-	  && parent->taskwait->last_parent_depends_on == child_task)
+-	{
+-	  if (child_task->prev_child->kind == GOMP_TASK_WAITING
+-	      && child_task->prev_child->parent_depends_on)
+-	    parent->taskwait->last_parent_depends_on = child_task->prev_child;
+-	  else
+-	    parent->taskwait->last_parent_depends_on = NULL;
++      if (parent->taskwait->in_taskwait)
++	{
++	  /* One more task has had its dependencies met.
++	     Inform any waiters.  */
++	  parent->taskwait->in_taskwait = false;
++	  gomp_sem_post (&parent->taskwait->taskwait_sem);
+ 	}
++      else if (parent->taskwait->in_depend_wait)
++	{
++	  /* One more task has had its dependencies met.
++	     Inform any waiters.  */
++	  parent->taskwait->in_depend_wait = false;
++	  gomp_sem_post (&parent->taskwait->taskwait_sem);
++	}
++    }
++  if (taskgroup && taskgroup->in_taskgroup_wait)
++    {
++      /* One more task has had its dependencies met.
++	 Inform any waiters.  */
++      taskgroup->in_taskgroup_wait = false;
++      gomp_sem_post (&taskgroup->taskgroup_sem);
+     }
+-  if (taskgroup && taskgroup->children == child_task)
+-    taskgroup->children = child_task->next_taskgroup;
+-  child_task->prev_queue->next_queue = child_task->next_queue;
+-  child_task->next_queue->prev_queue = child_task->prev_queue;
+-  if (team->task_queue == child_task)
++
++  ++team->task_queued_count;
++  gomp_team_barrier_set_task_pending (&team->barrier);
++  /* I'm afraid this can't be done after releasing team->task_lock,
++     as gomp_target_task_completion is run from unrelated thread and
++     therefore in between gomp_mutex_unlock and gomp_team_barrier_wake
++     the team could be gone already.  */
++  if (team->nthreads > team->task_running_count)
++    gomp_team_barrier_wake (&team->barrier, 1);
++}
++
++/* Signal that a target task TTASK has completed the asynchronously
++   running phase and should be requeued as a task to handle the
++   variable unmapping.  */
++
++void
++GOMP_PLUGIN_target_task_completion (void *data)
++{
++  struct gomp_target_task *ttask = (struct gomp_target_task *) data;
++  struct gomp_task *task = ttask->task;
++  struct gomp_team *team = ttask->team;
++
++  gomp_mutex_lock (&team->task_lock);
++  if (ttask->state == GOMP_TARGET_TASK_READY_TO_RUN)
+     {
+-      if (child_task->next_queue != child_task)
+-	team->task_queue = child_task->next_queue;
++      ttask->state = GOMP_TARGET_TASK_FINISHED;
++      gomp_mutex_unlock (&team->task_lock);
++      return;
++    }
++  ttask->state = GOMP_TARGET_TASK_FINISHED;
++  gomp_target_task_completion (team, task);
++  gomp_mutex_unlock (&team->task_lock);
++}
++
++static void gomp_task_run_post_handle_depend_hash (struct gomp_task *);
++
++/* Called for nowait target tasks.  */
++
++bool
++gomp_create_target_task (struct gomp_device_descr *devicep,
++			 void (*fn) (void *), size_t mapnum, void **hostaddrs,
++			 size_t *sizes, unsigned short *kinds,
++			 unsigned int flags, void **depend, void **args,
++			 enum gomp_target_task_state state)
++{
++  struct gomp_thread *thr = gomp_thread ();
++  struct gomp_team *team = thr->ts.team;
++
++  /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
++  if (team
++      && (gomp_team_barrier_cancelled (&team->barrier)
++	  || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
++    return true;
++
++  struct gomp_target_task *ttask;
++  struct gomp_task *task;
++  struct gomp_task *parent = thr->task;
++  struct gomp_taskgroup *taskgroup = parent->taskgroup;
++  bool do_wake;
++  size_t depend_size = 0;
++  uintptr_t depend_cnt = 0;
++  size_t tgt_align = 0, tgt_size = 0;
++
++  if (depend != NULL)
++    {
++      depend_cnt = (uintptr_t) depend[0];
++      depend_size = depend_cnt * sizeof (struct gomp_task_depend_entry);
++    }
++  if (fn)
++    {
++      /* GOMP_MAP_FIRSTPRIVATE need to be copied first, as they are
++	 firstprivate on the target task.  */
++      size_t i;
++      for (i = 0; i < mapnum; i++)
++	if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
++	  {
++	    size_t align = (size_t) 1 << (kinds[i] >> 8);
++	    if (tgt_align < align)
++	      tgt_align = align;
++	    tgt_size = (tgt_size + align - 1) & ~(align - 1);
++	    tgt_size += sizes[i];
++	  }
++      if (tgt_align)
++	tgt_size += tgt_align - 1;
+       else
+-	team->task_queue = NULL;
++	tgt_size = 0;
+     }
++
++  task = gomp_malloc (sizeof (*task) + depend_size
++		      + sizeof (*ttask)
++		      + mapnum * (sizeof (void *) + sizeof (size_t)
++				  + sizeof (unsigned short))
++		      + tgt_size);
++  gomp_init_task (task, parent, gomp_icv (false));
++  task->priority = 0;
++  task->kind = GOMP_TASK_WAITING;
++  task->in_tied_task = parent->in_tied_task;
++  task->taskgroup = taskgroup;
++  ttask = (struct gomp_target_task *) &task->depend[depend_cnt];
++  ttask->devicep = devicep;
++  ttask->fn = fn;
++  ttask->mapnum = mapnum;
++  ttask->args = args;
++  memcpy (ttask->hostaddrs, hostaddrs, mapnum * sizeof (void *));
++  ttask->sizes = (size_t *) &ttask->hostaddrs[mapnum];
++  memcpy (ttask->sizes, sizes, mapnum * sizeof (size_t));
++  ttask->kinds = (unsigned short *) &ttask->sizes[mapnum];
++  memcpy (ttask->kinds, kinds, mapnum * sizeof (unsigned short));
++  if (tgt_align)
++    {
++      char *tgt = (char *) &ttask->kinds[mapnum];
++      size_t i;
++      uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
++      if (al)
++	tgt += tgt_align - al;
++      tgt_size = 0;
++      for (i = 0; i < mapnum; i++)
++	if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
++	  {
++	    size_t align = (size_t) 1 << (kinds[i] >> 8);
++	    tgt_size = (tgt_size + align - 1) & ~(align - 1);
++	    memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
++	    ttask->hostaddrs[i] = tgt + tgt_size;
++	    tgt_size = tgt_size + sizes[i];
++	  }
++    }
++  ttask->flags = flags;
++  ttask->state = state;
++  ttask->task = task;
++  ttask->team = team;
++  task->fn = NULL;
++  task->fn_data = ttask;
++  task->final_task = 0;
++  gomp_mutex_lock (&team->task_lock);
++  /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
++  if (__builtin_expect (gomp_team_barrier_cancelled (&team->barrier)
++			|| (taskgroup && taskgroup->cancelled), 0))
++    {
++      gomp_mutex_unlock (&team->task_lock);
++      gomp_finish_task (task);
++      free (task);
++      return true;
++    }
++  if (depend_size)
++    {
++      gomp_task_handle_depend (task, parent, depend);
++      if (task->num_dependees)
++	{
++	  if (taskgroup)
++	    taskgroup->num_children++;
++	  gomp_mutex_unlock (&team->task_lock);
++	  return true;
++	}
++    }
++  if (state == GOMP_TARGET_TASK_DATA)
++    {
++      gomp_task_run_post_handle_depend_hash (task);
++      gomp_mutex_unlock (&team->task_lock);
++      gomp_finish_task (task);
++      free (task);
++      return false;
++    }
++  if (taskgroup)
++    taskgroup->num_children++;
++  /* For async offloading, if we don't need to wait for dependencies,
++     run the gomp_target_task_fn right away, essentially schedule the
++     mapping part of the task in the current thread.  */
++  if (devicep != NULL
++      && (devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
++    {
++      priority_queue_insert (PQ_CHILDREN, &parent->children_queue, task, 0,
++			     PRIORITY_INSERT_END,
++			     /*adjust_parent_depends_on=*/false,
++			     task->parent_depends_on);
++      if (taskgroup)
++	priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
++			       task, 0, PRIORITY_INSERT_END,
++			       /*adjust_parent_depends_on=*/false,
++			       task->parent_depends_on);
++      task->pnode[PQ_TEAM].next = NULL;
++      task->pnode[PQ_TEAM].prev = NULL;
++      task->kind = GOMP_TASK_TIED;
++      ++team->task_count;
++      gomp_mutex_unlock (&team->task_lock);
++
++      thr->task = task;
++      gomp_target_task_fn (task->fn_data);
++      thr->task = parent;
++
++      gomp_mutex_lock (&team->task_lock);
++      task->kind = GOMP_TASK_ASYNC_RUNNING;
++      /* If GOMP_PLUGIN_target_task_completion has run already
++	 in between gomp_target_task_fn and the mutex lock,
++	 perform the requeuing here.  */
++      if (ttask->state == GOMP_TARGET_TASK_FINISHED)
++	gomp_target_task_completion (team, task);
++      else
++	ttask->state = GOMP_TARGET_TASK_RUNNING;
++      gomp_mutex_unlock (&team->task_lock);
++      return true;
++    }
++  priority_queue_insert (PQ_CHILDREN, &parent->children_queue, task, 0,
++			 PRIORITY_INSERT_BEGIN,
++			 /*adjust_parent_depends_on=*/false,
++			 task->parent_depends_on);
++  if (taskgroup)
++    priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue, task, 0,
++			   PRIORITY_INSERT_BEGIN,
++			   /*adjust_parent_depends_on=*/false,
++			   task->parent_depends_on);
++  priority_queue_insert (PQ_TEAM, &team->task_queue, task, 0,
++			 PRIORITY_INSERT_END,
++			 /*adjust_parent_depends_on=*/false,
++			 task->parent_depends_on);
++  ++team->task_count;
++  ++team->task_queued_count;
++  gomp_team_barrier_set_task_pending (&team->barrier);
++  do_wake = team->task_running_count + !parent->in_tied_task
++	    < team->nthreads;
++  gomp_mutex_unlock (&team->task_lock);
++  if (do_wake)
++    gomp_team_barrier_wake (&team->barrier, 1);
++  return true;
++}
++
++/* Given a parent_depends_on task in LIST, move it to the front of its
++   priority so it is run as soon as possible.
++
++   Care is taken to update the list's LAST_PARENT_DEPENDS_ON field.
++
++   We rearrange the queue such that all parent_depends_on tasks are
++   first, and last_parent_depends_on points to the last such task we
++   rearranged.  For example, given the following tasks in a queue
++   where PD[123] are the parent_depends_on tasks:
++
++	task->children
++	|
++	V
++	C1 -> C2 -> C3 -> PD1 -> PD2 -> PD3 -> C4
++
++	We rearrange such that:
++
++	task->children
++	|	       +--- last_parent_depends_on
++	|	       |
++	V	       V
++	PD1 -> PD2 -> PD3 -> C1 -> C2 -> C3 -> C4.  */
++
++static void inline
++priority_list_upgrade_task (struct priority_list *list,
++			    struct priority_node *node)
++{
++  struct priority_node *last_parent_depends_on
++    = list->last_parent_depends_on;
++  if (last_parent_depends_on)
++    {
++      node->prev->next = node->next;
++      node->next->prev = node->prev;
++      node->prev = last_parent_depends_on;
++      node->next = last_parent_depends_on->next;
++      node->prev->next = node;
++      node->next->prev = node;
++    }
++  else if (node != list->tasks)
++    {
++      node->prev->next = node->next;
++      node->next->prev = node->prev;
++      node->prev = list->tasks->prev;
++      node->next = list->tasks;
++      list->tasks = node;
++      node->prev->next = node;
++      node->next->prev = node;
++    }
++  list->last_parent_depends_on = node;
++}
++
++/* Given a parent_depends_on TASK in its parent's children_queue, move
++   it to the front of its priority so it is run as soon as possible.
++
++   PARENT is passed as an optimization.
++
++   (This function could be defined in priority_queue.c, but we want it
++   inlined, and putting it in priority_queue.h is not an option, given
++   that gomp_task has not been properly defined at that point).  */
++
++static void inline
++priority_queue_upgrade_task (struct gomp_task *task,
++			     struct gomp_task *parent)
++{
++  struct priority_queue *head = &parent->children_queue;
++  struct priority_node *node = &task->pnode[PQ_CHILDREN];
++#if _LIBGOMP_CHECKING_
++  if (!task->parent_depends_on)
++    gomp_fatal ("priority_queue_upgrade_task: task must be a "
++		"parent_depends_on task");
++  if (!priority_queue_task_in_queue_p (PQ_CHILDREN, head, task))
++    gomp_fatal ("priority_queue_upgrade_task: cannot find task=%p", task);
++#endif
++  if (priority_queue_multi_p (head))
++    {
++      struct priority_list *list
++	= priority_queue_lookup_priority (head, task->priority);
++      priority_list_upgrade_task (list, node);
++    }
++  else
++    priority_list_upgrade_task (&head->l, node);
++}
++
++/* Given a CHILD_TASK in LIST that is about to be executed, move it out of
++   the way in LIST so that other tasks can be considered for
++   execution.  LIST contains tasks of type TYPE.
++
++   Care is taken to update the queue's LAST_PARENT_DEPENDS_ON field
++   if applicable.  */
++
++static void inline
++priority_list_downgrade_task (enum priority_queue_type type,
++			      struct priority_list *list,
++			      struct gomp_task *child_task)
++{
++  struct priority_node *node = task_to_priority_node (type, child_task);
++  if (list->tasks == node)
++    list->tasks = node->next;
++  else if (node->next != list->tasks)
++    {
++      /* The task in NODE is about to become TIED and TIED tasks
++	 cannot come before WAITING tasks.  If we're about to
++	 leave the queue in such an indeterminate state, rewire
++	 things appropriately.  However, a TIED task at the end is
++	 perfectly fine.  */
++      struct gomp_task *next_task = priority_node_to_task (type, node->next);
++      if (next_task->kind == GOMP_TASK_WAITING)
++	{
++	  /* Remove from list.  */
++	  node->prev->next = node->next;
++	  node->next->prev = node->prev;
++	  /* Rewire at the end.  */
++	  node->next = list->tasks;
++	  node->prev = list->tasks->prev;
++	  list->tasks->prev->next = node;
++	  list->tasks->prev = node;
++	}
++    }
++
++  /* If the current task is the last_parent_depends_on for its
++     priority, adjust last_parent_depends_on appropriately.  */
++  if (__builtin_expect (child_task->parent_depends_on, 0)
++      && list->last_parent_depends_on == node)
++    {
++      struct gomp_task *prev_child = priority_node_to_task (type, node->prev);
++      if (node->prev != node
++	  && prev_child->kind == GOMP_TASK_WAITING
++	  && prev_child->parent_depends_on)
++	list->last_parent_depends_on = node->prev;
++      else
++	{
++	  /* There are no more parent_depends_on entries waiting
++	     to run, clear the list.  */
++	  list->last_parent_depends_on = NULL;
++	}
++    }
++}
++
++/* Given a TASK in HEAD that is about to be executed, move it out of
++   the way so that other tasks can be considered for execution.  HEAD
++   contains tasks of type TYPE.
++
++   Care is taken to update the queue's LAST_PARENT_DEPENDS_ON field
++   if applicable.
++
++   (This function could be defined in priority_queue.c, but we want it
++   inlined, and putting it in priority_queue.h is not an option, given
++   that gomp_task has not been properly defined at that point).  */
++
++static void inline
++priority_queue_downgrade_task (enum priority_queue_type type,
++			       struct priority_queue *head,
++			       struct gomp_task *task)
++{
++#if _LIBGOMP_CHECKING_
++  if (!priority_queue_task_in_queue_p (type, head, task))
++    gomp_fatal ("Attempt to downgrade missing task %p", task);
++#endif
++  if (priority_queue_multi_p (head))
++    {
++      struct priority_list *list
++	= priority_queue_lookup_priority (head, task->priority);
++      priority_list_downgrade_task (type, list, task);
++    }
++  else
++    priority_list_downgrade_task (type, &head->l, task);
++}
++
++/* Setup CHILD_TASK to execute.  This is done by setting the task to
++   TIED, and updating all relevant queues so that CHILD_TASK is no
++   longer chosen for scheduling.  Also, remove CHILD_TASK from the
++   overall team task queue entirely.
++
++   Return TRUE if task or its containing taskgroup has been
++   cancelled.  */
++
++static inline bool
++gomp_task_run_pre (struct gomp_task *child_task, struct gomp_task *parent,
++		   struct gomp_team *team)
++{
++#if _LIBGOMP_CHECKING_
++  if (child_task->parent)
++    priority_queue_verify (PQ_CHILDREN,
++			   &child_task->parent->children_queue, true);
++  if (child_task->taskgroup)
++    priority_queue_verify (PQ_TASKGROUP,
++			   &child_task->taskgroup->taskgroup_queue, false);
++  priority_queue_verify (PQ_TEAM, &team->task_queue, false);
++#endif
++
++  /* Task is about to go tied, move it out of the way.  */
++  if (parent)
++    priority_queue_downgrade_task (PQ_CHILDREN, &parent->children_queue,
++				   child_task);
++
++  /* Task is about to go tied, move it out of the way.  */
++  struct gomp_taskgroup *taskgroup = child_task->taskgroup;
++  if (taskgroup)
++    priority_queue_downgrade_task (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
++				   child_task);
++
++  priority_queue_remove (PQ_TEAM, &team->task_queue, child_task,
++			 MEMMODEL_RELAXED);
++  child_task->pnode[PQ_TEAM].next = NULL;
++  child_task->pnode[PQ_TEAM].prev = NULL;
+   child_task->kind = GOMP_TASK_TIED;
++
+   if (--team->task_queued_count == 0)
+     gomp_team_barrier_clear_task_pending (&team->barrier);
+   if ((gomp_team_barrier_cancelled (&team->barrier)
+@@ -478,6 +1020,14 @@ gomp_task_run_post_handle_depend_hash (s
+       }
+ }
+ 
++/* After a CHILD_TASK has been run, adjust the dependency queue for
++   each task that depends on CHILD_TASK, to record the fact that there
++   is one less dependency to worry about.  If a task that depended on
++   CHILD_TASK now has no dependencies, place it in the various queues
++   so it gets scheduled to run.
++
++   TEAM is the team to which CHILD_TASK belongs to.  */
++
+ static size_t
+ gomp_task_run_post_handle_dependers (struct gomp_task *child_task,
+ 				     struct gomp_team *team)
+@@ -487,91 +1037,60 @@ gomp_task_run_post_handle_dependers (str
+   for (i = 0; i < count; i++)
+     {
+       struct gomp_task *task = child_task->dependers->elem[i];
++
++      /* CHILD_TASK satisfies a dependency for TASK.  Keep track of
++	 TASK's remaining dependencies.  Once TASK has no other
++	 depenencies, put it into the various queues so it will get
++	 scheduled for execution.  */
+       if (--task->num_dependees != 0)
+ 	continue;
+ 
+       struct gomp_taskgroup *taskgroup = task->taskgroup;
+       if (parent)
+ 	{
+-	  if (parent->children)
+-	    {
+-	      /* If parent is in gomp_task_maybe_wait_for_dependencies
+-		 and it doesn't need to wait for this task, put it after
+-		 all ready to run tasks it needs to wait for.  */
+-	      if (parent->taskwait && parent->taskwait->last_parent_depends_on
+-		  && !task->parent_depends_on)
+-		{
+-		  struct gomp_task *last_parent_depends_on
+-		    = parent->taskwait->last_parent_depends_on;
+-		  task->next_child = last_parent_depends_on->next_child;
+-		  task->prev_child = last_parent_depends_on;
+-		}
+-	      else
+-		{
+-		  task->next_child = parent->children;
+-		  task->prev_child = parent->children->prev_child;
+-		  parent->children = task;
+-		}
+-	      task->next_child->prev_child = task;
+-	      task->prev_child->next_child = task;
+-	    }
+-	  else
+-	    {
+-	      task->next_child = task;
+-	      task->prev_child = task;
+-	      parent->children = task;
+-	    }
++	  priority_queue_insert (PQ_CHILDREN, &parent->children_queue,
++				 task, task->priority,
++				 PRIORITY_INSERT_BEGIN,
++				 /*adjust_parent_depends_on=*/true,
++				 task->parent_depends_on);
+ 	  if (parent->taskwait)
+ 	    {
+ 	      if (parent->taskwait->in_taskwait)
+ 		{
++		  /* One more task has had its dependencies met.
++		     Inform any waiters.  */
+ 		  parent->taskwait->in_taskwait = false;
+ 		  gomp_sem_post (&parent->taskwait->taskwait_sem);
+ 		}
+ 	      else if (parent->taskwait->in_depend_wait)
+ 		{
++		  /* One more task has had its dependencies met.
++		     Inform any waiters.  */
+ 		  parent->taskwait->in_depend_wait = false;
+ 		  gomp_sem_post (&parent->taskwait->taskwait_sem);
+ 		}
+-	      if (parent->taskwait->last_parent_depends_on == NULL
+-		  && task->parent_depends_on)
+-		parent->taskwait->last_parent_depends_on = task;
+ 	    }
+ 	}
+       if (taskgroup)
+ 	{
+-	  if (taskgroup->children)
+-	    {
+-	      task->next_taskgroup = taskgroup->children;
+-	      task->prev_taskgroup = taskgroup->children->prev_taskgroup;
+-	      task->next_taskgroup->prev_taskgroup = task;
+-	      task->prev_taskgroup->next_taskgroup = task;
+-	    }
+-	  else
+-	    {
+-	      task->next_taskgroup = task;
+-	      task->prev_taskgroup = task;
+-	    }
+-	  taskgroup->children = task;
++	  priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
++				 task, task->priority,
++				 PRIORITY_INSERT_BEGIN,
++				 /*adjust_parent_depends_on=*/false,
++				 task->parent_depends_on);
+ 	  if (taskgroup->in_taskgroup_wait)
+ 	    {
++	      /* One more task has had its dependencies met.
++		 Inform any waiters.  */
+ 	      taskgroup->in_taskgroup_wait = false;
+ 	      gomp_sem_post (&taskgroup->taskgroup_sem);
+ 	    }
+ 	}
+-      if (team->task_queue)
+-	{
+-	  task->next_queue = team->task_queue;
+-	  task->prev_queue = team->task_queue->prev_queue;
+-	  task->next_queue->prev_queue = task;
+-	  task->prev_queue->next_queue = task;
+-	}
+-      else
+-	{
+-	  task->next_queue = task;
+-	  task->prev_queue = task;
+-	  team->task_queue = task;
+-	}
++      priority_queue_insert (PQ_TEAM, &team->task_queue,
++			     task, task->priority,
++			     PRIORITY_INSERT_END,
++			     /*adjust_parent_depends_on=*/false,
++			     task->parent_depends_on);
+       ++team->task_count;
+       ++team->task_queued_count;
+       ++ret;
+@@ -601,12 +1120,18 @@ gomp_task_run_post_handle_depend (struct
+   return gomp_task_run_post_handle_dependers (child_task, team);
+ }
+ 
++/* Remove CHILD_TASK from its parent.  */
++
+ static inline void
+ gomp_task_run_post_remove_parent (struct gomp_task *child_task)
+ {
+   struct gomp_task *parent = child_task->parent;
+   if (parent == NULL)
+     return;
++
++  /* If this was the last task the parent was depending on,
++     synchronize with gomp_task_maybe_wait_for_dependencies so it can
++     clean up and return.  */
+   if (__builtin_expect (child_task->parent_depends_on, 0)
+       && --parent->taskwait->n_depend == 0
+       && parent->taskwait->in_depend_wait)
+@@ -614,36 +1139,31 @@ gomp_task_run_post_remove_parent (struct
+       parent->taskwait->in_depend_wait = false;
+       gomp_sem_post (&parent->taskwait->taskwait_sem);
+     }
+-  child_task->prev_child->next_child = child_task->next_child;
+-  child_task->next_child->prev_child = child_task->prev_child;
+-  if (parent->children != child_task)
+-    return;
+-  if (child_task->next_child != child_task)
+-    parent->children = child_task->next_child;
+-  else
++
++  if (priority_queue_remove (PQ_CHILDREN, &parent->children_queue,
++			     child_task, MEMMODEL_RELEASE)
++      && parent->taskwait && parent->taskwait->in_taskwait)
+     {
+-      /* We access task->children in GOMP_taskwait
+-	 outside of the task lock mutex region, so
+-	 need a release barrier here to ensure memory
+-	 written by child_task->fn above is flushed
+-	 before the NULL is written.  */
+-      __atomic_store_n (&parent->children, NULL, MEMMODEL_RELEASE);
+-      if (parent->taskwait && parent->taskwait->in_taskwait)
+-	{
+-	  parent->taskwait->in_taskwait = false;
+-	  gomp_sem_post (&parent->taskwait->taskwait_sem);
+-	}
++      parent->taskwait->in_taskwait = false;
++      gomp_sem_post (&parent->taskwait->taskwait_sem);
+     }
++  child_task->pnode[PQ_CHILDREN].next = NULL;
++  child_task->pnode[PQ_CHILDREN].prev = NULL;
+ }
+ 
++/* Remove CHILD_TASK from its taskgroup.  */
++
+ static inline void
+ gomp_task_run_post_remove_taskgroup (struct gomp_task *child_task)
+ {
+   struct gomp_taskgroup *taskgroup = child_task->taskgroup;
+   if (taskgroup == NULL)
+     return;
+-  child_task->prev_taskgroup->next_taskgroup = child_task->next_taskgroup;
+-  child_task->next_taskgroup->prev_taskgroup = child_task->prev_taskgroup;
++  bool empty = priority_queue_remove (PQ_TASKGROUP,
++				      &taskgroup->taskgroup_queue,
++				      child_task, MEMMODEL_RELAXED);
++  child_task->pnode[PQ_TASKGROUP].next = NULL;
++  child_task->pnode[PQ_TASKGROUP].prev = NULL;
+   if (taskgroup->num_children > 1)
+     --taskgroup->num_children;
+   else
+@@ -655,18 +1175,10 @@ gomp_task_run_post_remove_taskgroup (str
+ 	 before the NULL is written.  */
+       __atomic_store_n (&taskgroup->num_children, 0, MEMMODEL_RELEASE);
+     }
+-  if (taskgroup->children != child_task)
+-    return;
+-  if (child_task->next_taskgroup != child_task)
+-    taskgroup->children = child_task->next_taskgroup;
+-  else
++  if (empty && taskgroup->in_taskgroup_wait)
+     {
+-      taskgroup->children = NULL;
+-      if (taskgroup->in_taskgroup_wait)
+-	{
+-	  taskgroup->in_taskgroup_wait = false;
+-	  gomp_sem_post (&taskgroup->taskgroup_sem);
+-	}
++      taskgroup->in_taskgroup_wait = false;
++      gomp_sem_post (&taskgroup->taskgroup_sem);
+     }
+ }
+ 
+@@ -696,11 +1208,15 @@ gomp_barrier_handle_tasks (gomp_barrier_
+   while (1)
+     {
+       bool cancelled = false;
+-      if (team->task_queue != NULL)
++      if (!priority_queue_empty_p (&team->task_queue, MEMMODEL_RELAXED))
+ 	{
+-	  child_task = team->task_queue;
++	  bool ignored;
++	  child_task
++	    = priority_queue_next_task (PQ_TEAM, &team->task_queue,
++					PQ_IGNORED, NULL,
++					&ignored);
+ 	  cancelled = gomp_task_run_pre (child_task, child_task->parent,
+-					 child_task->taskgroup, team);
++					 team);
+ 	  if (__builtin_expect (cancelled, 0))
+ 	    {
+ 	      if (to_free)
+@@ -729,7 +1245,29 @@ gomp_barrier_handle_tasks (gomp_barrier_
+       if (child_task)
+ 	{
+ 	  thr->task = child_task;
+-	  child_task->fn (child_task->fn_data);
++	  if (__builtin_expect (child_task->fn == NULL, 0))
++	    {
++	      if (gomp_target_task_fn (child_task->fn_data))
++		{
++		  thr->task = task;
++		  gomp_mutex_lock (&team->task_lock);
++		  child_task->kind = GOMP_TASK_ASYNC_RUNNING;
++		  team->task_running_count--;
++		  struct gomp_target_task *ttask
++		    = (struct gomp_target_task *) child_task->fn_data;
++		  /* If GOMP_PLUGIN_target_task_completion has run already
++		     in between gomp_target_task_fn and the mutex lock,
++		     perform the requeuing here.  */
++		  if (ttask->state == GOMP_TARGET_TASK_FINISHED)
++		    gomp_target_task_completion (team, child_task);
++		  else
++		    ttask->state = GOMP_TARGET_TASK_RUNNING;
++		  child_task = NULL;
++		  continue;
++		}
++	    }
++	  else
++	    child_task->fn (child_task->fn_data);
+ 	  thr->task = task;
+ 	}
+       else
+@@ -741,7 +1279,7 @@ gomp_barrier_handle_tasks (gomp_barrier_
+ 	  size_t new_tasks
+ 	    = gomp_task_run_post_handle_depend (child_task, team);
+ 	  gomp_task_run_post_remove_parent (child_task);
+-	  gomp_clear_parent (child_task->children);
++	  gomp_clear_parent (&child_task->children_queue);
+ 	  gomp_task_run_post_remove_taskgroup (child_task);
+ 	  to_free = child_task;
+ 	  child_task = NULL;
+@@ -765,7 +1303,9 @@ gomp_barrier_handle_tasks (gomp_barrier_
+     }
+ }
+ 
+-/* Called when encountering a taskwait directive.  */
++/* Called when encountering a taskwait directive.
++
++   Wait for all children of the current task.  */
+ 
+ void
+ GOMP_taskwait (void)
+@@ -785,15 +1325,16 @@ GOMP_taskwait (void)
+      child thread task work function are seen before we exit from
+      GOMP_taskwait.  */
+   if (task == NULL
+-      || __atomic_load_n (&task->children, MEMMODEL_ACQUIRE) == NULL)
++      || priority_queue_empty_p (&task->children_queue, MEMMODEL_ACQUIRE))
+     return;
+ 
+   memset (&taskwait, 0, sizeof (taskwait));
++  bool child_q = false;
+   gomp_mutex_lock (&team->task_lock);
+   while (1)
+     {
+       bool cancelled = false;
+-      if (task->children == NULL)
++      if (priority_queue_empty_p (&task->children_queue, MEMMODEL_RELAXED))
+ 	{
+ 	  bool destroy_taskwait = task->taskwait != NULL;
+ 	  task->taskwait = NULL;
+@@ -807,12 +1348,14 @@ GOMP_taskwait (void)
+ 	    gomp_sem_destroy (&taskwait.taskwait_sem);
+ 	  return;
+ 	}
+-      if (task->children->kind == GOMP_TASK_WAITING)
++      struct gomp_task *next_task
++	= priority_queue_next_task (PQ_CHILDREN, &task->children_queue,
++				    PQ_TEAM, &team->task_queue, &child_q);
++      if (next_task->kind == GOMP_TASK_WAITING)
+ 	{
+-	  child_task = task->children;
++	  child_task = next_task;
+ 	  cancelled
+-	    = gomp_task_run_pre (child_task, task, child_task->taskgroup,
+-				 team);
++	    = gomp_task_run_pre (child_task, task, team);
+ 	  if (__builtin_expect (cancelled, 0))
+ 	    {
+ 	      if (to_free)
+@@ -826,8 +1369,10 @@ GOMP_taskwait (void)
+ 	}
+       else
+ 	{
+-	  /* All tasks we are waiting for are already running
+-	     in other threads.  Wait for them.  */
++	/* All tasks we are waiting for are either running in other
++	   threads, or they are tasks that have not had their
++	   dependencies met (so they're not even in the queue).  Wait
++	   for them.  */
+ 	  if (task->taskwait == NULL)
+ 	    {
+ 	      taskwait.in_depend_wait = false;
+@@ -851,7 +1396,28 @@ GOMP_taskwait (void)
+       if (child_task)
+ 	{
+ 	  thr->task = child_task;
+-	  child_task->fn (child_task->fn_data);
++	  if (__builtin_expect (child_task->fn == NULL, 0))
++	    {
++	      if (gomp_target_task_fn (child_task->fn_data))
++		{
++		  thr->task = task;
++		  gomp_mutex_lock (&team->task_lock);
++		  child_task->kind = GOMP_TASK_ASYNC_RUNNING;
++		  struct gomp_target_task *ttask
++		    = (struct gomp_target_task *) child_task->fn_data;
++		  /* If GOMP_PLUGIN_target_task_completion has run already
++		     in between gomp_target_task_fn and the mutex lock,
++		     perform the requeuing here.  */
++		  if (ttask->state == GOMP_TARGET_TASK_FINISHED)
++		    gomp_target_task_completion (team, child_task);
++		  else
++		    ttask->state = GOMP_TARGET_TASK_RUNNING;
++		  child_task = NULL;
++		  continue;
++		}
++	    }
++	  else
++	    child_task->fn (child_task->fn_data);
+ 	  thr->task = task;
+ 	}
+       else
+@@ -862,17 +1428,19 @@ GOMP_taskwait (void)
+ 	 finish_cancelled:;
+ 	  size_t new_tasks
+ 	    = gomp_task_run_post_handle_depend (child_task, team);
+-	  child_task->prev_child->next_child = child_task->next_child;
+-	  child_task->next_child->prev_child = child_task->prev_child;
+-	  if (task->children == child_task)
+-	    {
+-	      if (child_task->next_child != child_task)
+-		task->children = child_task->next_child;
+-	      else
+-		task->children = NULL;
++
++	  if (child_q)
++	    {
++	      priority_queue_remove (PQ_CHILDREN, &task->children_queue,
++				     child_task, MEMMODEL_RELAXED);
++	      child_task->pnode[PQ_CHILDREN].next = NULL;
++	      child_task->pnode[PQ_CHILDREN].prev = NULL;
+ 	    }
+-	  gomp_clear_parent (child_task->children);
++
++	  gomp_clear_parent (&child_task->children_queue);
++
+ 	  gomp_task_run_post_remove_taskgroup (child_task);
++
+ 	  to_free = child_task;
+ 	  child_task = NULL;
+ 	  team->task_count--;
+@@ -887,10 +1455,20 @@ GOMP_taskwait (void)
+     }
+ }
+ 
+-/* This is like GOMP_taskwait, but we only wait for tasks that the
+-   upcoming task depends on.  */
++/* An undeferred task is about to run.  Wait for all tasks that this
++   undeferred task depends on.
+ 
+-static void
++   This is done by first putting all known ready dependencies
++   (dependencies that have their own dependencies met) at the top of
++   the scheduling queues.  Then we iterate through these imminently
++   ready tasks (and possibly other high priority tasks), and run them.
++   If we run out of ready dependencies to execute, we either wait for
++   the reamining dependencies to finish, or wait for them to get
++   scheduled so we can run them.
++
++   DEPEND is as in GOMP_task.  */
++
++void
+ gomp_task_maybe_wait_for_dependencies (void **depend)
+ {
+   struct gomp_thread *thr = gomp_thread ();
+@@ -898,7 +1476,6 @@ gomp_task_maybe_wait_for_dependencies (v
+   struct gomp_team *team = thr->ts.team;
+   struct gomp_task_depend_entry elem, *ent = NULL;
+   struct gomp_taskwait taskwait;
+-  struct gomp_task *last_parent_depends_on = NULL;
+   size_t ndepend = (uintptr_t) depend[0];
+   size_t nout = (uintptr_t) depend[1];
+   size_t i;
+@@ -922,32 +1499,11 @@ gomp_task_maybe_wait_for_dependencies (v
+ 	      {
+ 		tsk->parent_depends_on = true;
+ 		++num_awaited;
++		/* If depenency TSK itself has no dependencies and is
++		   ready to run, move it up front so that we run it as
++		   soon as possible.  */
+ 		if (tsk->num_dependees == 0 && tsk->kind == GOMP_TASK_WAITING)
+-		  {
+-		    /* If a task we need to wait for is not already
+-		       running and is ready to be scheduled, move it
+-		       to front, so that we run it as soon as possible.  */
+-		    if (last_parent_depends_on)
+-		      {
+-			tsk->prev_child->next_child = tsk->next_child;
+-			tsk->next_child->prev_child = tsk->prev_child;
+-			tsk->prev_child = last_parent_depends_on;
+-			tsk->next_child = last_parent_depends_on->next_child;
+-			tsk->prev_child->next_child = tsk;
+-			tsk->next_child->prev_child = tsk;
+-		      }
+-		    else if (tsk != task->children)
+-		      {
+-			tsk->prev_child->next_child = tsk->next_child;
+-			tsk->next_child->prev_child = tsk->prev_child;
+-			tsk->prev_child = task->children;
+-			tsk->next_child = task->children->next_child;
+-			task->children = tsk;
+-			tsk->prev_child->next_child = tsk;
+-			tsk->next_child->prev_child = tsk;
+-		      }
+-		    last_parent_depends_on = tsk;
+-		  }
++		  priority_queue_upgrade_task (tsk, task);
+ 	      }
+ 	  }
+     }
+@@ -959,7 +1515,6 @@ gomp_task_maybe_wait_for_dependencies (v
+ 
+   memset (&taskwait, 0, sizeof (taskwait));
+   taskwait.n_depend = num_awaited;
+-  taskwait.last_parent_depends_on = last_parent_depends_on;
+   gomp_sem_init (&taskwait.taskwait_sem, 0);
+   task->taskwait = &taskwait;
+ 
+@@ -978,12 +1533,30 @@ gomp_task_maybe_wait_for_dependencies (v
+ 	  gomp_sem_destroy (&taskwait.taskwait_sem);
+ 	  return;
+ 	}
+-      if (task->children->kind == GOMP_TASK_WAITING)
++
++      /* Theoretically when we have multiple priorities, we should
++	 chose between the highest priority item in
++	 task->children_queue and team->task_queue here, so we should
++	 use priority_queue_next_task().  However, since we are
++	 running an undeferred task, perhaps that makes all tasks it
++	 depends on undeferred, thus a priority of INF?  This would
++	 make it unnecessary to take anything into account here,
++	 but the dependencies.
++
++	 On the other hand, if we want to use priority_queue_next_task(),
++	 care should be taken to only use priority_queue_remove()
++	 below if the task was actually removed from the children
++	 queue.  */
++      bool ignored;
++      struct gomp_task *next_task
++	= priority_queue_next_task (PQ_CHILDREN, &task->children_queue,
++				    PQ_IGNORED, NULL, &ignored);
++
++      if (next_task->kind == GOMP_TASK_WAITING)
+ 	{
+-	  child_task = task->children;
++	  child_task = next_task;
+ 	  cancelled
+-	    = gomp_task_run_pre (child_task, task, child_task->taskgroup,
+-				 team);
++	    = gomp_task_run_pre (child_task, task, team);
+ 	  if (__builtin_expect (cancelled, 0))
+ 	    {
+ 	      if (to_free)
+@@ -996,8 +1569,10 @@ gomp_task_maybe_wait_for_dependencies (v
+ 	    }
+ 	}
+       else
+-	/* All tasks we are waiting for are already running
+-	   in other threads.  Wait for them.  */
++	/* All tasks we are waiting for are either running in other
++	   threads, or they are tasks that have not had their
++	   dependencies met (so they're not even in the queue).  Wait
++	   for them.  */
+ 	taskwait.in_depend_wait = true;
+       gomp_mutex_unlock (&team->task_lock);
+       if (do_wake)
+@@ -1014,7 +1589,28 @@ gomp_task_maybe_wait_for_dependencies (v
+       if (child_task)
+ 	{
+ 	  thr->task = child_task;
+-	  child_task->fn (child_task->fn_data);
++	  if (__builtin_expect (child_task->fn == NULL, 0))
++	    {
++	      if (gomp_target_task_fn (child_task->fn_data))
++		{
++		  thr->task = task;
++		  gomp_mutex_lock (&team->task_lock);
++		  child_task->kind = GOMP_TASK_ASYNC_RUNNING;
++		  struct gomp_target_task *ttask
++		    = (struct gomp_target_task *) child_task->fn_data;
++		  /* If GOMP_PLUGIN_target_task_completion has run already
++		     in between gomp_target_task_fn and the mutex lock,
++		     perform the requeuing here.  */
++		  if (ttask->state == GOMP_TARGET_TASK_FINISHED)
++		    gomp_target_task_completion (team, child_task);
++		  else
++		    ttask->state = GOMP_TARGET_TASK_RUNNING;
++		  child_task = NULL;
++		  continue;
++		}
++	    }
++	  else
++	    child_task->fn (child_task->fn_data);
+ 	  thr->task = task;
+ 	}
+       else
+@@ -1027,16 +1623,13 @@ gomp_task_maybe_wait_for_dependencies (v
+ 	    = gomp_task_run_post_handle_depend (child_task, team);
+ 	  if (child_task->parent_depends_on)
+ 	    --taskwait.n_depend;
+-	  child_task->prev_child->next_child = child_task->next_child;
+-	  child_task->next_child->prev_child = child_task->prev_child;
+-	  if (task->children == child_task)
+-	    {
+-	      if (child_task->next_child != child_task)
+-		task->children = child_task->next_child;
+-	      else
+-		task->children = NULL;
+-	    }
+-	  gomp_clear_parent (child_task->children);
++
++	  priority_queue_remove (PQ_CHILDREN, &task->children_queue,
++				 child_task, MEMMODEL_RELAXED);
++	  child_task->pnode[PQ_CHILDREN].next = NULL;
++	  child_task->pnode[PQ_CHILDREN].prev = NULL;
++
++	  gomp_clear_parent (&child_task->children_queue);
+ 	  gomp_task_run_post_remove_taskgroup (child_task);
+ 	  to_free = child_task;
+ 	  child_task = NULL;
+@@ -1069,14 +1662,14 @@ GOMP_taskgroup_start (void)
+   struct gomp_taskgroup *taskgroup;
+ 
+   /* If team is NULL, all tasks are executed as
+-     GOMP_TASK_IFFALSE tasks and thus all children tasks of
++     GOMP_TASK_UNDEFERRED tasks and thus all children tasks of
+      taskgroup and their descendant tasks will be finished
+      by the time GOMP_taskgroup_end is called.  */
+   if (team == NULL)
+     return;
+   taskgroup = gomp_malloc (sizeof (struct gomp_taskgroup));
+   taskgroup->prev = task->taskgroup;
+-  taskgroup->children = NULL;
++  priority_queue_init (&taskgroup->taskgroup_queue);
+   taskgroup->in_taskgroup_wait = false;
+   taskgroup->cancelled = false;
+   taskgroup->num_children = 0;
+@@ -1098,6 +1691,17 @@ GOMP_taskgroup_end (void)
+   if (team == NULL)
+     return;
+   taskgroup = task->taskgroup;
++  if (__builtin_expect (taskgroup == NULL, 0)
++      && thr->ts.level == 0)
++    {
++      /* This can happen if GOMP_taskgroup_start is called when
++	 thr->ts.team == NULL, but inside of the taskgroup there
++	 is #pragma omp target nowait that creates an implicit
++	 team with a single thread.  In this case, we want to wait
++	 for all outstanding tasks in this team.  */
++      gomp_team_barrier_wait (&team->barrier);
++      return;
++    }
+ 
+   /* The acquire barrier on load of taskgroup->num_children here
+      synchronizes with the write of 0 in gomp_task_run_post_remove_taskgroup.
+@@ -1108,19 +1712,25 @@ GOMP_taskgroup_end (void)
+   if (__atomic_load_n (&taskgroup->num_children, MEMMODEL_ACQUIRE) == 0)
+     goto finish;
+ 
++  bool unused;
+   gomp_mutex_lock (&team->task_lock);
+   while (1)
+     {
+       bool cancelled = false;
+-      if (taskgroup->children == NULL)
++      if (priority_queue_empty_p (&taskgroup->taskgroup_queue,
++				  MEMMODEL_RELAXED))
+ 	{
+ 	  if (taskgroup->num_children)
+ 	    {
+-	      if (task->children == NULL)
++	      if (priority_queue_empty_p (&task->children_queue,
++					  MEMMODEL_RELAXED))
+ 		goto do_wait;
+-	      child_task = task->children;
+-            }
+-          else
++	      child_task
++		= priority_queue_next_task (PQ_CHILDREN, &task->children_queue,
++					    PQ_TEAM, &team->task_queue,
++					    &unused);
++	    }
++	  else
+ 	    {
+ 	      gomp_mutex_unlock (&team->task_lock);
+ 	      if (to_free)
+@@ -1132,12 +1742,13 @@ GOMP_taskgroup_end (void)
+ 	    }
+ 	}
+       else
+-	child_task = taskgroup->children;
++	child_task
++	  = priority_queue_next_task (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
++				      PQ_TEAM, &team->task_queue, &unused);
+       if (child_task->kind == GOMP_TASK_WAITING)
+ 	{
+ 	  cancelled
+-	    = gomp_task_run_pre (child_task, child_task->parent, taskgroup,
+-				 team);
++	    = gomp_task_run_pre (child_task, child_task->parent, team);
+ 	  if (__builtin_expect (cancelled, 0))
+ 	    {
+ 	      if (to_free)
+@@ -1153,8 +1764,10 @@ GOMP_taskgroup_end (void)
+ 	{
+ 	  child_task = NULL;
+ 	 do_wait:
+-	  /* All tasks we are waiting for are already running
+-	     in other threads.  Wait for them.  */
++	/* All tasks we are waiting for are either running in other
++	   threads, or they are tasks that have not had their
++	   dependencies met (so they're not even in the queue).  Wait
++	   for them.  */
+ 	  taskgroup->in_taskgroup_wait = true;
+ 	}
+       gomp_mutex_unlock (&team->task_lock);
+@@ -1172,7 +1785,28 @@ GOMP_taskgroup_end (void)
+       if (child_task)
+ 	{
+ 	  thr->task = child_task;
+-	  child_task->fn (child_task->fn_data);
++	  if (__builtin_expect (child_task->fn == NULL, 0))
++	    {
++	      if (gomp_target_task_fn (child_task->fn_data))
++		{
++		  thr->task = task;
++		  gomp_mutex_lock (&team->task_lock);
++		  child_task->kind = GOMP_TASK_ASYNC_RUNNING;
++		  struct gomp_target_task *ttask
++		    = (struct gomp_target_task *) child_task->fn_data;
++		  /* If GOMP_PLUGIN_target_task_completion has run already
++		     in between gomp_target_task_fn and the mutex lock,
++		     perform the requeuing here.  */
++		  if (ttask->state == GOMP_TARGET_TASK_FINISHED)
++		    gomp_target_task_completion (team, child_task);
++		  else
++		    ttask->state = GOMP_TARGET_TASK_RUNNING;
++		  child_task = NULL;
++		  continue;
++		}
++	    }
++	  else
++	    child_task->fn (child_task->fn_data);
+ 	  thr->task = task;
+ 	}
+       else
+@@ -1184,7 +1818,7 @@ GOMP_taskgroup_end (void)
+ 	  size_t new_tasks
+ 	    = gomp_task_run_post_handle_depend (child_task, team);
+ 	  gomp_task_run_post_remove_parent (child_task);
+-	  gomp_clear_parent (child_task->children);
++	  gomp_clear_parent (&child_task->children_queue);
+ 	  gomp_task_run_post_remove_taskgroup (child_task);
+ 	  to_free = child_task;
+ 	  child_task = NULL;
+--- libgomp/libgomp_g.h.jj	2014-05-15 10:56:31.429532978 +0200
++++ libgomp/libgomp_g.h	2016-07-13 16:57:04.422535521 +0200
+@@ -29,6 +29,7 @@
+ #define LIBGOMP_G_H 1
+ 
+ #include <stdbool.h>
++#include <stddef.h>
+ 
+ /* barrier.c */
+ 
+@@ -50,6 +51,10 @@ extern bool GOMP_loop_static_start (long
+ extern bool GOMP_loop_dynamic_start (long, long, long, long, long *, long *);
+ extern bool GOMP_loop_guided_start (long, long, long, long, long *, long *);
+ extern bool GOMP_loop_runtime_start (long, long, long, long *, long *);
++extern bool GOMP_loop_nonmonotonic_dynamic_start (long, long, long, long,
++						  long *, long *);
++extern bool GOMP_loop_nonmonotonic_guided_start (long, long, long, long,
++						 long *, long *);
+ 
+ extern bool GOMP_loop_ordered_static_start (long, long, long, long,
+ 					    long *, long *);
+@@ -63,12 +68,23 @@ extern bool GOMP_loop_static_next (long
+ extern bool GOMP_loop_dynamic_next (long *, long *);
+ extern bool GOMP_loop_guided_next (long *, long *);
+ extern bool GOMP_loop_runtime_next (long *, long *);
++extern bool GOMP_loop_nonmonotonic_dynamic_next (long *, long *);
++extern bool GOMP_loop_nonmonotonic_guided_next (long *, long *);
+ 
+ extern bool GOMP_loop_ordered_static_next (long *, long *);
+ extern bool GOMP_loop_ordered_dynamic_next (long *, long *);
+ extern bool GOMP_loop_ordered_guided_next (long *, long *);
+ extern bool GOMP_loop_ordered_runtime_next (long *, long *);
+ 
++extern bool GOMP_loop_doacross_static_start (unsigned, long *, long, long *,
++					     long *);
++extern bool GOMP_loop_doacross_dynamic_start (unsigned, long *, long, long *,
++					      long *);
++extern bool GOMP_loop_doacross_guided_start (unsigned, long *, long, long *,
++					     long *);
++extern bool GOMP_loop_doacross_runtime_start (unsigned, long *, long *,
++					      long *);
++
+ extern void GOMP_parallel_loop_static_start (void (*)(void *), void *,
+ 					     unsigned, long, long, long, long);
+ extern void GOMP_parallel_loop_dynamic_start (void (*)(void *), void *,
+@@ -89,6 +105,12 @@ extern void GOMP_parallel_loop_guided (v
+ extern void GOMP_parallel_loop_runtime (void (*)(void *), void *,
+ 					unsigned, long, long, long,
+ 					unsigned);
++extern void GOMP_parallel_loop_nonmonotonic_dynamic (void (*)(void *), void *,
++						     unsigned, long, long,
++						     long, long, unsigned);
++extern void GOMP_parallel_loop_nonmonotonic_guided (void (*)(void *), void *,
++						    unsigned, long, long,
++						    long, long, unsigned);
+ 
+ extern void GOMP_loop_end (void);
+ extern void GOMP_loop_end_nowait (void);
+@@ -119,6 +141,18 @@ extern bool GOMP_loop_ull_runtime_start
+ 					 unsigned long long,
+ 					 unsigned long long *,
+ 					 unsigned long long *);
++extern bool GOMP_loop_ull_nonmonotonic_dynamic_start (bool, unsigned long long,
++						      unsigned long long,
++						      unsigned long long,
++						      unsigned long long,
++						      unsigned long long *,
++						      unsigned long long *);
++extern bool GOMP_loop_ull_nonmonotonic_guided_start (bool, unsigned long long,
++						     unsigned long long,
++						     unsigned long long,
++						     unsigned long long,
++						     unsigned long long *,
++						     unsigned long long *);
+ 
+ extern bool GOMP_loop_ull_ordered_static_start (bool, unsigned long long,
+ 						unsigned long long,
+@@ -152,6 +186,10 @@ extern bool GOMP_loop_ull_guided_next (u
+ 				       unsigned long long *);
+ extern bool GOMP_loop_ull_runtime_next (unsigned long long *,
+ 					unsigned long long *);
++extern bool GOMP_loop_ull_nonmonotonic_dynamic_next (unsigned long long *,
++						     unsigned long long *);
++extern bool GOMP_loop_ull_nonmonotonic_guided_next (unsigned long long *,
++						    unsigned long long *);
+ 
+ extern bool GOMP_loop_ull_ordered_static_next (unsigned long long *,
+ 					       unsigned long long *);
+@@ -162,10 +200,34 @@ extern bool GOMP_loop_ull_ordered_guided
+ extern bool GOMP_loop_ull_ordered_runtime_next (unsigned long long *,
+ 						unsigned long long *);
+ 
++extern bool GOMP_loop_ull_doacross_static_start (unsigned,
++						 unsigned long long *,
++						 unsigned long long,
++						 unsigned long long *,
++						 unsigned long long *);
++extern bool GOMP_loop_ull_doacross_dynamic_start (unsigned,
++						  unsigned long long *,
++						  unsigned long long,
++						  unsigned long long *,
++						  unsigned long long *);
++extern bool GOMP_loop_ull_doacross_guided_start (unsigned,
++						 unsigned long long *,
++						 unsigned long long,
++						 unsigned long long *,
++						 unsigned long long *);
++extern bool GOMP_loop_ull_doacross_runtime_start (unsigned,
++						  unsigned long long *,
++						  unsigned long long *,
++						  unsigned long long *);
++
+ /* ordered.c */
+ 
+ extern void GOMP_ordered_start (void);
+ extern void GOMP_ordered_end (void);
++extern void GOMP_doacross_post (long *);
++extern void GOMP_doacross_wait (long, ...);
++extern void GOMP_doacross_ull_post (unsigned long long *);
++extern void GOMP_doacross_ull_wait (unsigned long long, ...);
+ 
+ /* parallel.c */
+ 
+@@ -178,7 +240,15 @@ extern bool GOMP_cancellation_point (int
+ /* task.c */
+ 
+ extern void GOMP_task (void (*) (void *), void *, void (*) (void *, void *),
+-		       long, long, bool, unsigned, void **);
++		       long, long, bool, unsigned, void **, int);
++extern void GOMP_taskloop (void (*) (void *), void *,
++			   void (*) (void *, void *), long, long, unsigned,
++			   unsigned long, int, long, long, long);
++extern void GOMP_taskloop_ull (void (*) (void *), void *,
++			       void (*) (void *, void *), long, long,
++			       unsigned, unsigned long, int,
++			       unsigned long long, unsigned long long,
++			       unsigned long long);
+ extern void GOMP_taskwait (void);
+ extern void GOMP_taskyield (void);
+ extern void GOMP_taskgroup_start (void);
+@@ -206,11 +276,38 @@ extern void GOMP_single_copy_end (void *
+ 
+ extern void GOMP_target (int, void (*) (void *), const void *,
+ 			 size_t, void **, size_t *, unsigned char *);
++extern void GOMP_target_ext (int, void (*) (void *), size_t, void **, size_t *,
++			     unsigned short *, unsigned int, void **, void **);
+ extern void GOMP_target_data (int, const void *,
+ 			      size_t, void **, size_t *, unsigned char *);
++extern void GOMP_target_data_ext (int, size_t, void **, size_t *,
++				  unsigned short *);
+ extern void GOMP_target_end_data (void);
+ extern void GOMP_target_update (int, const void *,
+ 				size_t, void **, size_t *, unsigned char *);
++extern void GOMP_target_update_ext (int, size_t, void **, size_t *,
++				    unsigned short *, unsigned int, void **);
++extern void GOMP_target_enter_exit_data (int, size_t, void **, size_t *,
++					 unsigned short *, unsigned int,
++					 void **);
+ extern void GOMP_teams (unsigned int, unsigned int);
+ 
++/* oacc-parallel.c */
++
++extern void GOACC_parallel_keyed (int, void (*) (void *), size_t,
++				  void **, size_t *, unsigned short *, ...);
++extern void GOACC_parallel (int, void (*) (void *), size_t, void **, size_t *,
++			    unsigned short *, int, int, int, int, int, ...);
++extern void GOACC_data_start (int, size_t, void **, size_t *,
++			      unsigned short *);
++extern void GOACC_data_end (void);
++extern void GOACC_enter_exit_data (int, size_t, void **,
++				   size_t *, unsigned short *, int, int, ...);
++extern void GOACC_update (int, size_t, void **, size_t *,
++			  unsigned short *, int, int, ...);
++extern void GOACC_wait (int, int, ...);
++extern int GOACC_get_num_threads (void);
++extern int GOACC_get_thread_num (void);
++extern void GOACC_declare (int, size_t, void **, size_t *, unsigned short *);
++
+ #endif /* LIBGOMP_G_H */
+--- libgomp/libgomp.h.jj	2014-08-01 15:59:49.145188127 +0200
++++ libgomp/libgomp.h	2016-07-14 17:40:24.038243456 +0200
+@@ -34,12 +34,35 @@
+ #ifndef LIBGOMP_H 
+ #define LIBGOMP_H 1
+ 
++#ifndef _LIBGOMP_CHECKING_
++/* Define to 1 to perform internal sanity checks.  */
++#define _LIBGOMP_CHECKING_ 0
++#endif
++
+ #include "config.h"
+ #include "gstdint.h"
++#include "libgomp-plugin.h"
+ 
+ #include <pthread.h>
+ #include <stdbool.h>
+ #include <stdlib.h>
++#include <stdarg.h>
++
++/* Needed for memset in priority_queue.c.  */
++#if _LIBGOMP_CHECKING_
++# ifdef STRING_WITH_STRINGS
++#  include <string.h>
++#  include <strings.h>
++# else
++#  ifdef HAVE_STRING_H
++#   include <string.h>
++#  else
++#   ifdef HAVE_STRINGS_H
++#    include <strings.h>
++#   endif
++#  endif
++# endif
++#endif
+ 
+ #ifdef HAVE_ATTRIBUTE_VISIBILITY
+ # pragma GCC visibility push(hidden)
+@@ -56,6 +79,44 @@ enum memmodel
+   MEMMODEL_SEQ_CST = 5
+ };
+ 
++/* alloc.c */
++
++extern void *gomp_malloc (size_t) __attribute__((malloc));
++extern void *gomp_malloc_cleared (size_t) __attribute__((malloc));
++extern void *gomp_realloc (void *, size_t);
++
++/* Avoid conflicting prototypes of alloca() in system headers by using
++   GCC's builtin alloca().  */
++#define gomp_alloca(x)  __builtin_alloca(x)
++
++/* error.c */
++
++extern void gomp_vdebug (int, const char *, va_list);
++extern void gomp_debug (int, const char *, ...)
++	__attribute__ ((format (printf, 2, 3)));
++#define gomp_vdebug(KIND, FMT, VALIST) \
++  do { \
++    if (__builtin_expect (gomp_debug_var, 0)) \
++      (gomp_vdebug) ((KIND), (FMT), (VALIST)); \
++  } while (0)
++#define gomp_debug(KIND, ...) \
++  do { \
++    if (__builtin_expect (gomp_debug_var, 0)) \
++      (gomp_debug) ((KIND), __VA_ARGS__); \
++  } while (0)
++extern void gomp_verror (const char *, va_list);
++extern void gomp_error (const char *, ...)
++	__attribute__ ((format (printf, 1, 2)));
++extern void gomp_vfatal (const char *, va_list)
++	__attribute__ ((noreturn));
++extern void gomp_fatal (const char *, ...)
++	__attribute__ ((noreturn, format (printf, 1, 2)));
++
++struct gomp_task;
++struct gomp_taskgroup;
++struct htab;
++
++#include "priority_queue.h"
+ #include "sem.h"
+ #include "mutex.h"
+ #include "bar.h"
+@@ -74,6 +135,44 @@ enum gomp_schedule_type
+   GFS_AUTO
+ };
+ 
++struct gomp_doacross_work_share
++{
++  union {
++    /* chunk_size copy, as ws->chunk_size is multiplied by incr for
++       GFS_DYNAMIC.  */
++    long chunk_size;
++    /* Likewise, but for ull implementation.  */
++    unsigned long long chunk_size_ull;
++    /* For schedule(static,0) this is the number
++       of iterations assigned to the last thread, i.e. number of
++       iterations / number of threads.  */
++    long q;
++    /* Likewise, but for ull implementation.  */
++    unsigned long long q_ull;
++  };
++  /* Size of each array entry (padded to cache line size).  */
++  unsigned long elt_sz;
++  /* Number of dimensions in sink vectors.  */
++  unsigned int ncounts;
++  /* True if the iterations can be flattened.  */
++  bool flattened;
++  /* Actual array (of elt_sz sized units), aligned to cache line size.
++     This is indexed by team_id for GFS_STATIC and outermost iteration
++     / chunk_size for other schedules.  */
++  unsigned char *array;
++  /* These two are only used for schedule(static,0).  */
++  /* This one is number of iterations % number of threads.  */
++  long t;
++  union {
++    /* And this one is cached t * (q + 1).  */
++    long boundary;
++    /* Likewise, but for the ull implementation.  */
++    unsigned long long boundary_ull;
++  };
++  /* Array of shift counts for each dimension if they can be flattened.  */
++  unsigned int shift_counts[];
++};
++
+ struct gomp_work_share
+ {
+   /* This member records the SCHEDULE clause to be used for this construct.
+@@ -105,13 +204,18 @@ struct gomp_work_share
+     };
+   };
+ 
+-  /* This is a circular queue that details which threads will be allowed
+-     into the ordered region and in which order.  When a thread allocates
+-     iterations on which it is going to work, it also registers itself at
+-     the end of the array.  When a thread reaches the ordered region, it
+-     checks to see if it is the one at the head of the queue.  If not, it
+-     blocks on its RELEASE semaphore.  */
+-  unsigned *ordered_team_ids;
++  union {
++    /* This is a circular queue that details which threads will be allowed
++       into the ordered region and in which order.  When a thread allocates
++       iterations on which it is going to work, it also registers itself at
++       the end of the array.  When a thread reaches the ordered region, it
++       checks to see if it is the one at the head of the queue.  If not, it
++       blocks on its RELEASE semaphore.  */
++    unsigned *ordered_team_ids;
++
++    /* This is a pointer to DOACROSS work share data.  */
++    struct gomp_doacross_work_share *doacross;
++  };
+ 
+   /* This is the number of threads that have registered themselves in
+      the circular queue ordered_team_ids.  */
+@@ -230,7 +334,7 @@ struct gomp_task_icv
+ {
+   unsigned long nthreads_var;
+   enum gomp_schedule_type run_sched_var;
+-  int run_sched_modifier;
++  int run_sched_chunk_size;
+   int default_device_var;
+   unsigned int thread_limit_var;
+   bool dyn_var;
+@@ -246,6 +350,7 @@ extern gomp_mutex_t gomp_managed_threads
+ #endif
+ extern unsigned long gomp_max_active_levels_var;
+ extern bool gomp_cancel_var;
++extern int gomp_max_task_priority_var;
+ extern unsigned long long gomp_spin_count_var, gomp_throttled_spin_count_var;
+ extern unsigned long gomp_available_cpus, gomp_managed_threads;
+ extern unsigned long *gomp_nthreads_var_list, gomp_nthreads_var_list_len;
+@@ -253,25 +358,36 @@ extern char *gomp_bind_var_list;
+ extern unsigned long gomp_bind_var_list_len;
+ extern void **gomp_places_list;
+ extern unsigned long gomp_places_list_len;
++extern int gomp_debug_var;
++extern int goacc_device_num;
++extern char *goacc_device_type;
+ 
+ enum gomp_task_kind
+ {
++  /* Implicit task.  */
+   GOMP_TASK_IMPLICIT,
+-  GOMP_TASK_IFFALSE,
++  /* Undeferred task.  */
++  GOMP_TASK_UNDEFERRED,
++  /* Task created by GOMP_task and waiting to be run.  */
+   GOMP_TASK_WAITING,
+-  GOMP_TASK_TIED
++  /* Task currently executing or scheduled and about to execute.  */
++  GOMP_TASK_TIED,
++  /* Used for target tasks that have vars mapped and async run started,
++     but not yet completed.  Once that completes, they will be readded
++     into the queues as GOMP_TASK_WAITING in order to perform the var
++     unmapping.  */
++  GOMP_TASK_ASYNC_RUNNING
+ };
+ 
+-struct gomp_task;
+-struct gomp_taskgroup;
+-struct htab;
+-
+ struct gomp_task_depend_entry
+ {
++  /* Address of dependency.  */
+   void *addr;
+   struct gomp_task_depend_entry *next;
+   struct gomp_task_depend_entry *prev;
++  /* Task that provides the dependency in ADDR.  */
+   struct gomp_task *task;
++  /* Depend entry is of type "IN".  */
+   bool is_in;
+   bool redundant;
+   bool redundant_out;
+@@ -290,8 +406,8 @@ struct gomp_taskwait
+ {
+   bool in_taskwait;
+   bool in_depend_wait;
++  /* Number of tasks we are waiting for.  */
+   size_t n_depend;
+-  struct gomp_task *last_parent_depends_on;
+   gomp_sem_t taskwait_sem;
+ };
+ 
+@@ -299,20 +415,31 @@ struct gomp_taskwait
+ 
+ struct gomp_task
+ {
++  /* Parent of this task.  */
+   struct gomp_task *parent;
+-  struct gomp_task *children;
+-  struct gomp_task *next_child;
+-  struct gomp_task *prev_child;
+-  struct gomp_task *next_queue;
+-  struct gomp_task *prev_queue;
+-  struct gomp_task *next_taskgroup;
+-  struct gomp_task *prev_taskgroup;
++  /* Children of this task.  */
++  struct priority_queue children_queue;
++  /* Taskgroup this task belongs in.  */
+   struct gomp_taskgroup *taskgroup;
++  /* Tasks that depend on this task.  */
+   struct gomp_dependers_vec *dependers;
+   struct htab *depend_hash;
+   struct gomp_taskwait *taskwait;
++  /* Number of items in DEPEND.  */
+   size_t depend_count;
++  /* Number of tasks this task depends on.  Once this counter reaches
++     0, we have no unsatisfied dependencies, and this task can be put
++     into the various queues to be scheduled.  */
+   size_t num_dependees;
++
++  /* Priority of this task.  */
++  int priority;
++  /* The priority node for this task in each of the different queues.
++     We put this here to avoid allocating space for each priority
++     node.  Then we play offsetof() games to convert between pnode[]
++     entries and the gomp_task in which they reside.  */
++  struct priority_node pnode[3];
++
+   struct gomp_task_icv icv;
+   void (*fn) (void *);
+   void *fn_data;
+@@ -320,20 +447,58 @@ struct gomp_task
+   bool in_tied_task;
+   bool final_task;
+   bool copy_ctors_done;
++  /* Set for undeferred tasks with unsatisfied dependencies which
++     block further execution of their parent until the dependencies
++     are satisfied.  */
+   bool parent_depends_on;
++  /* Dependencies provided and/or needed for this task.  DEPEND_COUNT
++     is the number of items available.  */
+   struct gomp_task_depend_entry depend[];
+ };
+ 
++/* This structure describes a single #pragma omp taskgroup.  */
++
+ struct gomp_taskgroup
+ {
+   struct gomp_taskgroup *prev;
+-  struct gomp_task *children;
++  /* Queue of tasks that belong in this taskgroup.  */
++  struct priority_queue taskgroup_queue;
+   bool in_taskgroup_wait;
+   bool cancelled;
+   gomp_sem_t taskgroup_sem;
+   size_t num_children;
+ };
+ 
++/* Various state of OpenMP async offloading tasks.  */
++enum gomp_target_task_state
++{
++  GOMP_TARGET_TASK_DATA,
++  GOMP_TARGET_TASK_BEFORE_MAP,
++  GOMP_TARGET_TASK_FALLBACK,
++  GOMP_TARGET_TASK_READY_TO_RUN,
++  GOMP_TARGET_TASK_RUNNING,
++  GOMP_TARGET_TASK_FINISHED
++};
++
++/* This structure describes a target task.  */
++
++struct gomp_target_task
++{
++  struct gomp_device_descr *devicep;
++  void (*fn) (void *);
++  size_t mapnum;
++  size_t *sizes;
++  unsigned short *kinds;
++  unsigned int flags;
++  enum gomp_target_task_state state;
++  struct target_mem_desc *tgt;
++  struct gomp_task *task;
++  struct gomp_team *team;
++  /* Device-specific target arguments.  */
++  void **args;
++  void *hostaddrs[];
++};
++
+ /* This structure describes a "team" of threads.  These are the threads
+    that are spawned by a PARALLEL constructs, as well as the work sharing
+    constructs that the team encounters.  */
+@@ -396,7 +561,8 @@ struct gomp_team
+   struct gomp_work_share work_shares[8];
+ 
+   gomp_mutex_t task_lock;
+-  struct gomp_task *task_queue;
++  /* Scheduled tasks.  */
++  struct priority_queue task_queue;
+   /* Number of all GOMP_TASK_{WAITING,TIED} tasks in the team.  */
+   unsigned int task_count;
+   /* Number of GOMP_TASK_WAITING tasks currently waiting to be scheduled.  */
+@@ -451,6 +617,9 @@ struct gomp_thread_pool
+   struct gomp_thread **threads;
+   unsigned threads_size;
+   unsigned threads_used;
++  /* The last team is used for non-nested teams to delay their destruction to
++     make sure all the threads in the team move on to the pool's barrier before
++     the team's barrier is destroyed.  */
+   struct gomp_team *last_team;
+   /* Number of threads running in this contention group.  */
+   unsigned long threads_busy;
+@@ -519,23 +688,7 @@ extern bool gomp_affinity_same_place (vo
+ extern bool gomp_affinity_finalize_place_list (bool);
+ extern bool gomp_affinity_init_level (int, unsigned long, bool);
+ extern void gomp_affinity_print_place (void *);
+-
+-/* alloc.c */
+-
+-extern void *gomp_malloc (size_t) __attribute__((malloc));
+-extern void *gomp_malloc_cleared (size_t) __attribute__((malloc));
+-extern void *gomp_realloc (void *, size_t);
+-
+-/* Avoid conflicting prototypes of alloca() in system headers by using
+-   GCC's builtin alloca().  */
+-#define gomp_alloca(x)  __builtin_alloca(x)
+-
+-/* error.c */
+-
+-extern void gomp_error (const char *, ...)
+-	__attribute__((format (printf, 1, 2)));
+-extern void gomp_fatal (const char *, ...)
+-	__attribute__((noreturn, format (printf, 1, 2)));
++extern void gomp_get_place_proc_ids_8 (int, int64_t *);
+ 
+ /* iter.c */
+ 
+@@ -572,6 +725,9 @@ extern void gomp_ordered_next (void);
+ extern void gomp_ordered_static_init (void);
+ extern void gomp_ordered_static_next (void);
+ extern void gomp_ordered_sync (void);
++extern void gomp_doacross_init (unsigned, long *, long);
++extern void gomp_doacross_ull_init (unsigned, unsigned long long *,
++				    unsigned long long);
+ 
+ /* parallel.c */
+ 
+@@ -588,6 +744,12 @@ extern void gomp_init_task (struct gomp_
+ 			    struct gomp_task_icv *);
+ extern void gomp_end_task (void);
+ extern void gomp_barrier_handle_tasks (gomp_barrier_state_t);
++extern void gomp_task_maybe_wait_for_dependencies (void **);
++extern bool gomp_create_target_task (struct gomp_device_descr *,
++				     void (*) (void *), size_t, void **,
++				     size_t *, unsigned short *, unsigned int,
++				     void **, void **,
++				     enum gomp_target_task_state);
+ 
+ static void inline
+ gomp_finish_task (struct gomp_task *task)
+@@ -606,7 +768,213 @@ extern void gomp_free_thread (void *);
+ 
+ /* target.c */
+ 
++extern void gomp_init_targets_once (void);
+ extern int gomp_get_num_devices (void);
++extern bool gomp_target_task_fn (void *);
++
++/* Splay tree definitions.  */
++typedef struct splay_tree_node_s *splay_tree_node;
++typedef struct splay_tree_s *splay_tree;
++typedef struct splay_tree_key_s *splay_tree_key;
++
++struct target_var_desc {
++  /* Splay key.  */
++  splay_tree_key key;
++  /* True if data should be copied from device to host at the end.  */
++  bool copy_from;
++  /* True if data always should be copied from device to host at the end.  */
++  bool always_copy_from;
++  /* Relative offset against key host_start.  */
++  uintptr_t offset;
++  /* Actual length.  */
++  uintptr_t length;
++};
++
++struct target_mem_desc {
++  /* Reference count.  */
++  uintptr_t refcount;
++  /* All the splay nodes allocated together.  */
++  splay_tree_node array;
++  /* Start of the target region.  */
++  uintptr_t tgt_start;
++  /* End of the targer region.  */
++  uintptr_t tgt_end;
++  /* Handle to free.  */
++  void *to_free;
++  /* Previous target_mem_desc.  */
++  struct target_mem_desc *prev;
++  /* Number of items in following list.  */
++  size_t list_count;
++
++  /* Corresponding target device descriptor.  */
++  struct gomp_device_descr *device_descr;
++
++  /* List of target items to remove (or decrease refcount)
++     at the end of region.  */
++  struct target_var_desc list[];
++};
++
++/* Special value for refcount - infinity.  */
++#define REFCOUNT_INFINITY (~(uintptr_t) 0)
++/* Special value for refcount - tgt_offset contains target address of the
++   artificial pointer to "omp declare target link" object.  */
++#define REFCOUNT_LINK (~(uintptr_t) 1)
++
++struct splay_tree_key_s {
++  /* Address of the host object.  */
++  uintptr_t host_start;
++  /* Address immediately after the host object.  */
++  uintptr_t host_end;
++  /* Descriptor of the target memory.  */
++  struct target_mem_desc *tgt;
++  /* Offset from tgt->tgt_start to the start of the target object.  */
++  uintptr_t tgt_offset;
++  /* Reference count.  */
++  uintptr_t refcount;
++  /* Pointer to the original mapping of "omp declare target link" object.  */
++  splay_tree_key link_key;
++};
++
++/* The comparison function.  */
++
++static inline int
++splay_compare (splay_tree_key x, splay_tree_key y)
++{
++  if (x->host_start == x->host_end
++      && y->host_start == y->host_end)
++    return 0;
++  if (x->host_end <= y->host_start)
++    return -1;
++  if (x->host_start >= y->host_end)
++    return 1;
++  return 0;
++}
++
++#include "splay-tree.h"
++
++typedef struct acc_dispatch_t
++{
++  /* This is a linked list of data mapped using the
++     acc_map_data/acc_unmap_data or "acc enter data"/"acc exit data" pragmas.
++     Unlike mapped_data in the goacc_thread struct, unmapping can
++     happen out-of-order with respect to mapping.  */
++  /* This is guarded by the lock in the "outer" struct gomp_device_descr.  */
++  struct target_mem_desc *data_environ;
++
++  /* Execute.  */
++  void (*exec_func) (void (*) (void *), size_t, void **, void **, int,
++		     unsigned *, void *);
++
++  /* Async cleanup callback registration.  */
++  void (*register_async_cleanup_func) (void *, int);
++
++  /* Asynchronous routines.  */
++  int (*async_test_func) (int);
++  int (*async_test_all_func) (void);
++  void (*async_wait_func) (int);
++  void (*async_wait_async_func) (int, int);
++  void (*async_wait_all_func) (void);
++  void (*async_wait_all_async_func) (int);
++  void (*async_set_async_func) (int);
++
++  /* Create/destroy TLS data.  */
++  void *(*create_thread_data_func) (int);
++  void (*destroy_thread_data_func) (void *);
++
++  /* NVIDIA target specific routines.  */
++  struct {
++    void *(*get_current_device_func) (void);
++    void *(*get_current_context_func) (void);
++    void *(*get_stream_func) (int);
++    int (*set_stream_func) (int, void *);
++  } cuda;
++} acc_dispatch_t;
++
++/* Various state of the accelerator device.  */
++enum gomp_device_state
++{
++  GOMP_DEVICE_UNINITIALIZED,
++  GOMP_DEVICE_INITIALIZED,
++  GOMP_DEVICE_FINALIZED
++};
++
++/* This structure describes accelerator device.
++   It contains name of the corresponding libgomp plugin, function handlers for
++   interaction with the device, ID-number of the device, and information about
++   mapped memory.  */
++struct gomp_device_descr
++{
++  /* Immutable data, which is only set during initialization, and which is not
++     guarded by the lock.  */
++
++  /* The name of the device.  */
++  const char *name;
++
++  /* Capabilities of device (supports OpenACC, OpenMP).  */
++  unsigned int capabilities;
++
++  /* This is the ID number of device among devices of the same type.  */
++  int target_id;
++
++  /* This is the TYPE of device.  */
++  enum offload_target_type type;
++
++  /* Function handlers.  */
++  const char *(*get_name_func) (void);
++  unsigned int (*get_caps_func) (void);
++  int (*get_type_func) (void);
++  int (*get_num_devices_func) (void);
++  bool (*init_device_func) (int);
++  bool (*fini_device_func) (int);
++  unsigned (*version_func) (void);
++  int (*load_image_func) (int, unsigned, const void *, struct addr_pair **);
++  bool (*unload_image_func) (int, unsigned, const void *);
++  void *(*alloc_func) (int, size_t);
++  bool (*free_func) (int, void *);
++  bool (*dev2host_func) (int, void *, const void *, size_t);
++  bool (*host2dev_func) (int, void *, const void *, size_t);
++  bool (*dev2dev_func) (int, void *, const void *, size_t);
++  bool (*can_run_func) (void *);
++  void (*run_func) (int, void *, void *, void **);
++  void (*async_run_func) (int, void *, void *, void **, void *);
++
++  /* Splay tree containing information about mapped memory regions.  */
++  struct splay_tree_s mem_map;
++
++  /* Mutex for the mutable data.  */
++  gomp_mutex_t lock;
++
++  /* Current state of the device.  OpenACC allows to move from INITIALIZED state
++     back to UNINITIALIZED state.  OpenMP allows only to move from INITIALIZED
++     to FINALIZED state (at program shutdown).  */
++  enum gomp_device_state state;
++
++  /* OpenACC-specific data and functions.  */
++  /* This is mutable because of its mutable data_environ and target_data
++     members.  */
++  acc_dispatch_t openacc;
++};
++
++/* Kind of the pragma, for which gomp_map_vars () is called.  */
++enum gomp_map_vars_kind
++{
++  GOMP_MAP_VARS_OPENACC,
++  GOMP_MAP_VARS_TARGET,
++  GOMP_MAP_VARS_DATA,
++  GOMP_MAP_VARS_ENTER_DATA
++};
++
++extern void gomp_acc_insert_pointer (size_t, void **, size_t *, void *);
++extern void gomp_acc_remove_pointer (void *, bool, int, int);
++
++extern struct target_mem_desc *gomp_map_vars (struct gomp_device_descr *,
++					      size_t, void **, void **,
++					      size_t *, void *, bool,
++					      enum gomp_map_vars_kind);
++extern void gomp_unmap_vars (struct target_mem_desc *, bool);
++extern void gomp_init_device (struct gomp_device_descr *);
++extern void gomp_free_memmap (struct splay_tree_s *);
++extern void gomp_unload_device (struct gomp_device_descr *);
+ 
+ /* work.c */
+ 
+@@ -646,8 +1014,28 @@ typedef enum omp_proc_bind_t
+   omp_proc_bind_spread = 4
+ } omp_proc_bind_t;
+ 
++typedef enum omp_lock_hint_t
++{
++  omp_lock_hint_none = 0,
++  omp_lock_hint_uncontended = 1,
++  omp_lock_hint_contended = 2,
++  omp_lock_hint_nonspeculative = 4,
++  omp_lock_hint_speculative = 8,
++} omp_lock_hint_t;
++
++extern void omp_init_lock_with_hint (omp_lock_t *, omp_lock_hint_t)
++  __GOMP_NOTHROW;
++extern void omp_init_nest_lock_with_hint (omp_lock_t *, omp_lock_hint_t)
++  __GOMP_NOTHROW;
++
+ extern int omp_get_cancellation (void) __GOMP_NOTHROW;
+ extern omp_proc_bind_t omp_get_proc_bind (void) __GOMP_NOTHROW;
++extern int omp_get_num_places (void) __GOMP_NOTHROW;
++extern int omp_get_place_num_procs (int) __GOMP_NOTHROW;
++extern void omp_get_place_proc_ids (int, int *) __GOMP_NOTHROW;
++extern int omp_get_place_num (void) __GOMP_NOTHROW;
++extern int omp_get_partition_num_places (void) __GOMP_NOTHROW;
++extern void omp_get_partition_place_nums (int *) __GOMP_NOTHROW;
+ 
+ extern void omp_set_default_device (int) __GOMP_NOTHROW;
+ extern int omp_get_default_device (void) __GOMP_NOTHROW;
+@@ -656,6 +1044,24 @@ extern int omp_get_num_teams (void) __GO
+ extern int omp_get_team_num (void) __GOMP_NOTHROW;
+ 
+ extern int omp_is_initial_device (void) __GOMP_NOTHROW;
++extern int omp_get_initial_device (void) __GOMP_NOTHROW;
++extern int omp_get_max_task_priority (void) __GOMP_NOTHROW;
++
++extern void *omp_target_alloc (__SIZE_TYPE__, int) __GOMP_NOTHROW;
++extern void omp_target_free (void *, int) __GOMP_NOTHROW;
++extern int omp_target_is_present (void *, int) __GOMP_NOTHROW;
++extern int omp_target_memcpy (void *, void *, __SIZE_TYPE__, __SIZE_TYPE__,
++			      __SIZE_TYPE__, int, int) __GOMP_NOTHROW;
++extern int omp_target_memcpy_rect (void *, void *, __SIZE_TYPE__, int,
++				   const __SIZE_TYPE__ *,
++				   const __SIZE_TYPE__ *,
++				   const __SIZE_TYPE__ *,
++				   const __SIZE_TYPE__ *,
++				   const __SIZE_TYPE__ *, int, int)
++  __GOMP_NOTHROW;
++extern int omp_target_associate_ptr (void *, void *, __SIZE_TYPE__,
++				     __SIZE_TYPE__, int) __GOMP_NOTHROW;
++extern int omp_target_disassociate_ptr (void *, int) __GOMP_NOTHROW;   
+ 
+ #if !defined (HAVE_ATTRIBUTE_VISIBILITY) \
+     || !defined (HAVE_ATTRIBUTE_ALIAS) \
+@@ -728,4 +1134,34 @@ extern int gomp_test_nest_lock_25 (omp_n
+ # define ialias_call(fn) fn
+ #endif
+ 
++/* Helper function for priority_node_to_task() and
++   task_to_priority_node().
++
++   Return the offset from a task to its priority_node entry.  The
++   priority_node entry is has a type of TYPE.  */
++
++static inline size_t
++priority_queue_offset (enum priority_queue_type type)
++{
++  return offsetof (struct gomp_task, pnode[(int) type]);
++}
++
++/* Return the task associated with a priority NODE of type TYPE.  */
++
++static inline struct gomp_task *
++priority_node_to_task (enum priority_queue_type type,
++		       struct priority_node *node)
++{
++  return (struct gomp_task *) ((char *) node - priority_queue_offset (type));
++}
++
++/* Return the priority node of type TYPE for a given TASK.  */
++
++static inline struct priority_node *
++task_to_priority_node (enum priority_queue_type type,
++		       struct gomp_task *task)
++{
++  return (struct priority_node *) ((char *) task
++				   + priority_queue_offset (type));
++}
+ #endif /* LIBGOMP_H */
+--- libgomp/env.c.jj	2014-05-15 10:56:32.420522486 +0200
++++ libgomp/env.c	2016-07-13 16:57:04.437535335 +0200
+@@ -27,6 +27,8 @@
+ 
+ #include "libgomp.h"
+ #include "libgomp_f.h"
++#include "oacc-int.h"
++#include "gomp-constants.h"
+ #include <ctype.h>
+ #include <stdlib.h>
+ #include <stdio.h>
+@@ -56,7 +58,7 @@ struct gomp_task_icv gomp_global_icv = {
+   .nthreads_var = 1,
+   .thread_limit_var = UINT_MAX,
+   .run_sched_var = GFS_DYNAMIC,
+-  .run_sched_modifier = 1,
++  .run_sched_chunk_size = 1,
+   .default_device_var = 0,
+   .dyn_var = false,
+   .nest_var = false,
+@@ -66,6 +68,7 @@ struct gomp_task_icv gomp_global_icv = {
+ 
+ unsigned long gomp_max_active_levels_var = INT_MAX;
+ bool gomp_cancel_var = false;
++int gomp_max_task_priority_var = 0;
+ #ifndef HAVE_SYNC_BUILTINS
+ gomp_mutex_t gomp_managed_threads_lock;
+ #endif
+@@ -76,6 +79,9 @@ char *gomp_bind_var_list;
+ unsigned long gomp_bind_var_list_len;
+ void **gomp_places_list;
+ unsigned long gomp_places_list_len;
++int gomp_debug_var;
++char *goacc_device_type;
++int goacc_device_num;
+ 
+ /* Parse the OMP_SCHEDULE environment variable.  */
+ 
+@@ -118,7 +124,7 @@ parse_schedule (void)
+     ++env;
+   if (*env == '\0')
+     {
+-      gomp_global_icv.run_sched_modifier
++      gomp_global_icv.run_sched_chunk_size
+ 	= gomp_global_icv.run_sched_var != GFS_STATIC;
+       return;
+     }
+@@ -144,7 +150,7 @@ parse_schedule (void)
+ 
+   if (value == 0 && gomp_global_icv.run_sched_var != GFS_STATIC)
+     value = 1;
+-  gomp_global_icv.run_sched_modifier = value;
++  gomp_global_icv.run_sched_chunk_size = value;
+   return;
+ 
+  unknown:
+@@ -1011,6 +1017,16 @@ parse_affinity (bool ignore)
+   return false;
+ }
+ 
++static void
++parse_acc_device_type (void)
++{
++  const char *env = getenv ("ACC_DEVICE_TYPE");
++
++  if (env && *env != '\0')
++    goacc_device_type = strdup (env);
++  else
++    goacc_device_type = NULL;
++}
+ 
+ static void
+ handle_omp_display_env (unsigned long stacksize, int wait_policy)
+@@ -1054,7 +1070,7 @@ handle_omp_display_env (unsigned long st
+ 
+   fputs ("\nOPENMP DISPLAY ENVIRONMENT BEGIN\n", stderr);
+ 
+-  fputs ("  _OPENMP = '201307'\n", stderr);
++  fputs ("  _OPENMP = '201511'\n", stderr);
+   fprintf (stderr, "  OMP_DYNAMIC = '%s'\n",
+ 	   gomp_global_icv.dyn_var ? "TRUE" : "FALSE");
+   fprintf (stderr, "  OMP_NESTED = '%s'\n",
+@@ -1142,6 +1158,8 @@ handle_omp_display_env (unsigned long st
+ 	   gomp_cancel_var ? "TRUE" : "FALSE");
+   fprintf (stderr, "  OMP_DEFAULT_DEVICE = '%d'\n",
+ 	   gomp_global_icv.default_device_var);
++  fprintf (stderr, "  OMP_MAX_TASK_PRIORITY = '%d'\n",
++	   gomp_max_task_priority_var);
+ 
+   if (verbose)
+     {
+@@ -1174,6 +1192,7 @@ initialize_env (void)
+   parse_boolean ("OMP_NESTED", &gomp_global_icv.nest_var);
+   parse_boolean ("OMP_CANCELLATION", &gomp_cancel_var);
+   parse_int ("OMP_DEFAULT_DEVICE", &gomp_global_icv.default_device_var, true);
++  parse_int ("OMP_MAX_TASK_PRIORITY", &gomp_max_task_priority_var, true);
+   parse_unsigned_long ("OMP_MAX_ACTIVE_LEVELS", &gomp_max_active_levels_var,
+ 		       true);
+   if (parse_unsigned_long ("OMP_THREAD_LIMIT", &thread_limit_var, false))
+@@ -1181,6 +1200,7 @@ initialize_env (void)
+       gomp_global_icv.thread_limit_var
+ 	= thread_limit_var > INT_MAX ? UINT_MAX : thread_limit_var;
+     }
++  parse_int ("GOMP_DEBUG", &gomp_debug_var, true);
+ #ifndef HAVE_SYNC_BUILTINS
+   gomp_mutex_init (&gomp_managed_threads_lock);
+ #endif
+@@ -1271,6 +1291,15 @@ initialize_env (void)
+     }
+ 
+   handle_omp_display_env (stacksize, wait_policy);
++
++  /* OpenACC.  */
++
++  if (!parse_int ("ACC_DEVICE_NUM", &goacc_device_num, true))
++    goacc_device_num = 0;
++
++  parse_acc_device_type ();
++
++  goacc_runtime_initialize ();
+ }
+ 
+ 
+@@ -1312,21 +1341,21 @@ omp_get_nested (void)
+ }
+ 
+ void
+-omp_set_schedule (omp_sched_t kind, int modifier)
++omp_set_schedule (omp_sched_t kind, int chunk_size)
+ {
+   struct gomp_task_icv *icv = gomp_icv (true);
+   switch (kind)
+     {
+     case omp_sched_static:
+-      if (modifier < 1)
+-	modifier = 0;
+-      icv->run_sched_modifier = modifier;
++      if (chunk_size < 1)
++	chunk_size = 0;
++      icv->run_sched_chunk_size = chunk_size;
+       break;
+     case omp_sched_dynamic:
+     case omp_sched_guided:
+-      if (modifier < 1)
+-	modifier = 1;
+-      icv->run_sched_modifier = modifier;
++      if (chunk_size < 1)
++	chunk_size = 1;
++      icv->run_sched_chunk_size = chunk_size;
+       break;
+     case omp_sched_auto:
+       break;
+@@ -1337,11 +1366,11 @@ omp_set_schedule (omp_sched_t kind, int
+ }
+ 
+ void
+-omp_get_schedule (omp_sched_t *kind, int *modifier)
++omp_get_schedule (omp_sched_t *kind, int *chunk_size)
+ {
+   struct gomp_task_icv *icv = gomp_icv (false);
+   *kind = icv->run_sched_var;
+-  *modifier = icv->run_sched_modifier;
++  *chunk_size = icv->run_sched_chunk_size;
+ }
+ 
+ int
+@@ -1377,6 +1406,12 @@ omp_get_cancellation (void)
+   return gomp_cancel_var;
+ }
+ 
++int
++omp_get_max_task_priority (void)
++{
++  return gomp_max_task_priority_var;
++}
++
+ omp_proc_bind_t
+ omp_get_proc_bind (void)
+ {
+@@ -1425,6 +1460,59 @@ omp_is_initial_device (void)
+   return 1;
+ }
+ 
++int
++omp_get_initial_device (void)
++{
++  return GOMP_DEVICE_HOST_FALLBACK;
++}
++
++int
++omp_get_num_places (void)
++{
++  return gomp_places_list_len;
++}
++
++int
++omp_get_place_num (void)
++{
++  if (gomp_places_list == NULL)
++    return -1;
++
++  struct gomp_thread *thr = gomp_thread ();
++  if (thr->place == 0)
++    gomp_init_affinity ();
++
++  return (int) thr->place - 1;
++}
++
++int
++omp_get_partition_num_places (void)
++{
++  if (gomp_places_list == NULL)
++    return 0;
++
++  struct gomp_thread *thr = gomp_thread ();
++  if (thr->place == 0)
++    gomp_init_affinity ();
++
++  return thr->ts.place_partition_len;
++}
++
++void
++omp_get_partition_place_nums (int *place_nums)
++{
++  if (gomp_places_list == NULL)
++    return;
++
++  struct gomp_thread *thr = gomp_thread ();
++  if (thr->place == 0)
++    gomp_init_affinity ();
++
++  unsigned int i;
++  for (i = 0; i < thr->ts.place_partition_len; i++)
++    *place_nums++ = thr->ts.place_partition_off + i;
++}
++
+ ialias (omp_set_dynamic)
+ ialias (omp_set_nested)
+ ialias (omp_set_num_threads)
+@@ -1444,3 +1532,9 @@ ialias (omp_get_num_devices)
+ ialias (omp_get_num_teams)
+ ialias (omp_get_team_num)
+ ialias (omp_is_initial_device)
++ialias (omp_get_initial_device)
++ialias (omp_get_max_task_priority)
++ialias (omp_get_num_places)
++ialias (omp_get_place_num)
++ialias (omp_get_partition_num_places)
++ialias (omp_get_partition_place_nums)
+--- libgomp/openacc.h.jj	2016-07-13 16:57:04.432535397 +0200
++++ libgomp/openacc.h	2016-07-13 16:57:04.432535397 +0200
+@@ -0,0 +1,131 @@
++/* OpenACC Runtime Library User-facing Declarations
++
++   Copyright (C) 2013-2016 Free Software Foundation, Inc.
++
++   Contributed by Mentor Embedded.
++
++   This file is part of the GNU Offloading and Multi Processing Library
++   (libgomp).
++
++   Libgomp is free software; you can redistribute it and/or modify it
++   under the terms of the GNU General Public License as published by
++   the Free Software Foundation; either version 3, or (at your option)
++   any later version.
++
++   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
++   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
++   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++   more details.
++
++   Under Section 7 of GPL version 3, you are granted additional
++   permissions described in the GCC Runtime Library Exception, version
++   3.1, as published by the Free Software Foundation.
++
++   You should have received a copy of the GNU General Public License and
++   a copy of the GCC Runtime Library Exception along with this program;
++   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
++   <http://www.gnu.org/licenses/>.  */
++
++#ifndef _OPENACC_H
++#define _OPENACC_H 1
++
++/* The OpenACC standard is silent on whether or not including <openacc.h>
++   might or must not include other header files.  We chose to include
++   some.  */
++#include <stddef.h>
++
++#ifdef __cplusplus
++extern "C" {
++#endif
++
++#if __cplusplus >= 201103
++# define __GOACC_NOTHROW noexcept
++#elif __cplusplus
++# define __GOACC_NOTHROW throw ()
++#else /* Not C++ */
++# define __GOACC_NOTHROW __attribute__ ((__nothrow__))
++#endif
++
++/* Types */
++typedef enum acc_device_t {
++  /* Keep in sync with include/gomp-constants.h.  */
++  acc_device_none = 0,
++  acc_device_default = 1,
++  acc_device_host = 2,
++  /* acc_device_host_nonshm = 3 removed.  */
++  acc_device_not_host = 4,
++  acc_device_nvidia = 5,
++  _ACC_device_hwm,
++  /* Ensure enumeration is layout compatible with int.  */
++  _ACC_highest = __INT_MAX__,
++  _ACC_neg = -1
++} acc_device_t;
++
++typedef enum acc_async_t {
++  /* Keep in sync with include/gomp-constants.h.  */
++  acc_async_noval = -1,
++  acc_async_sync  = -2
++} acc_async_t;
++
++int acc_get_num_devices (acc_device_t) __GOACC_NOTHROW;
++void acc_set_device_type (acc_device_t) __GOACC_NOTHROW;
++acc_device_t acc_get_device_type (void) __GOACC_NOTHROW;
++void acc_set_device_num (int, acc_device_t) __GOACC_NOTHROW;
++int acc_get_device_num (acc_device_t) __GOACC_NOTHROW;
++int acc_async_test (int) __GOACC_NOTHROW;
++int acc_async_test_all (void) __GOACC_NOTHROW;
++void acc_wait (int) __GOACC_NOTHROW;
++void acc_wait_async (int, int) __GOACC_NOTHROW;
++void acc_wait_all (void) __GOACC_NOTHROW;
++void acc_wait_all_async (int) __GOACC_NOTHROW;
++void acc_init (acc_device_t) __GOACC_NOTHROW;
++void acc_shutdown (acc_device_t) __GOACC_NOTHROW;
++#ifdef __cplusplus
++int acc_on_device (int __arg) __GOACC_NOTHROW;
++#else
++int acc_on_device (acc_device_t __arg) __GOACC_NOTHROW;
++#endif
++void *acc_malloc (size_t) __GOACC_NOTHROW;
++void acc_free (void *) __GOACC_NOTHROW;
++/* Some of these would be more correct with const qualifiers, but
++   the standard specifies otherwise.  */
++void *acc_copyin (void *, size_t) __GOACC_NOTHROW;
++void *acc_present_or_copyin (void *, size_t) __GOACC_NOTHROW;
++void *acc_create (void *, size_t) __GOACC_NOTHROW;
++void *acc_present_or_create (void *, size_t) __GOACC_NOTHROW;
++void acc_copyout (void *, size_t) __GOACC_NOTHROW;
++void acc_delete (void *, size_t) __GOACC_NOTHROW;
++void acc_update_device (void *, size_t) __GOACC_NOTHROW;
++void acc_update_self (void *, size_t) __GOACC_NOTHROW;
++void acc_map_data (void *, void *, size_t) __GOACC_NOTHROW;
++void acc_unmap_data (void *) __GOACC_NOTHROW;
++void *acc_deviceptr (void *) __GOACC_NOTHROW;
++void *acc_hostptr (void *) __GOACC_NOTHROW;
++int acc_is_present (void *, size_t) __GOACC_NOTHROW;
++void acc_memcpy_to_device (void *, void *, size_t) __GOACC_NOTHROW;
++void acc_memcpy_from_device (void *, void *, size_t) __GOACC_NOTHROW;
++
++/* Old names.  OpenACC does not specify whether these can or must
++   not be macros, inlines or aliases for the new names.  */
++#define acc_pcreate acc_present_or_create
++#define acc_pcopyin acc_present_or_copyin
++
++/* CUDA-specific routines.  */
++void *acc_get_current_cuda_device (void) __GOACC_NOTHROW;
++void *acc_get_current_cuda_context (void) __GOACC_NOTHROW;
++void *acc_get_cuda_stream (int) __GOACC_NOTHROW;
++int acc_set_cuda_stream (int, void *) __GOACC_NOTHROW;
++
++#ifdef __cplusplus
++}
++
++/* Forwarding function with correctly typed arg.  */
++
++#pragma acc routine seq
++inline int acc_on_device (acc_device_t __arg) __GOACC_NOTHROW
++{
++  return acc_on_device ((int) __arg);
++}
++#endif
++
++#endif /* _OPENACC_H */
+--- libgomp/config/linux/doacross.h.jj	2016-07-13 16:57:18.902355979 +0200
++++ libgomp/config/linux/doacross.h	2016-07-13 16:57:18.902355979 +0200
+@@ -0,0 +1,57 @@
++/* Copyright (C) 2015-2016 Free Software Foundation, Inc.
++   Contributed by Jakub Jelinek <jakub@redhat.com>.
++
++   This file is part of the GNU Offloading and Multi Processing Library
++   (libgomp).
++
++   Libgomp is free software; you can redistribute it and/or modify it
++   under the terms of the GNU General Public License as published by
++   the Free Software Foundation; either version 3, or (at your option)
++   any later version.
++
++   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
++   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
++   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++   more details.
++
++   Under Section 7 of GPL version 3, you are granted additional
++   permissions described in the GCC Runtime Library Exception, version
++   3.1, as published by the Free Software Foundation.
++
++   You should have received a copy of the GNU General Public License and
++   a copy of the GCC Runtime Library Exception along with this program;
++   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
++   <http://www.gnu.org/licenses/>.  */
++
++/* This is a Linux specific implementation of doacross spinning.  */
++
++#ifndef GOMP_DOACROSS_H
++#define GOMP_DOACROSS_H 1
++
++#include "libgomp.h"
++#include <errno.h>
++#include "wait.h"
++
++#ifdef HAVE_ATTRIBUTE_VISIBILITY
++# pragma GCC visibility push(hidden)
++#endif
++
++static inline void doacross_spin (unsigned long *addr, unsigned long expected,
++				  unsigned long cur)
++{
++  /* FIXME: back off depending on how large expected - cur is.  */
++  do
++    {
++      cpu_relax ();
++      cur = __atomic_load_n (addr, MEMMODEL_RELAXED);
++      if (expected < cur)
++	return;
++    }
++  while (1);
++}
++
++#ifdef HAVE_ATTRIBUTE_VISIBILITY
++# pragma GCC visibility pop
++#endif
++
++#endif /* GOMP_DOACROSS_H */
+--- libgomp/config/posix/doacross.h.jj	2016-07-13 16:57:18.903355966 +0200
++++ libgomp/config/posix/doacross.h	2016-07-13 16:57:18.903355966 +0200
+@@ -0,0 +1,62 @@
++/* Copyright (C) 2015-2016 Free Software Foundation, Inc.
++   Contributed by Jakub Jelinek <jakub@redhat.com>.
++
++   This file is part of the GNU Offloading and Multi Processing Library
++   (libgomp).
++
++   Libgomp is free software; you can redistribute it and/or modify it
++   under the terms of the GNU General Public License as published by
++   the Free Software Foundation; either version 3, or (at your option)
++   any later version.
++
++   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
++   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
++   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++   more details.
++
++   Under Section 7 of GPL version 3, you are granted additional
++   permissions described in the GCC Runtime Library Exception, version
++   3.1, as published by the Free Software Foundation.
++
++   You should have received a copy of the GNU General Public License and
++   a copy of the GCC Runtime Library Exception along with this program;
++   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
++   <http://www.gnu.org/licenses/>.  */
++
++/* This is a generic implementation of doacross spinning.  */
++
++#ifndef GOMP_DOACROSS_H
++#define GOMP_DOACROSS_H 1
++
++#include "libgomp.h"
++#include <errno.h>
++
++#ifdef HAVE_ATTRIBUTE_VISIBILITY
++# pragma GCC visibility push(hidden)
++#endif
++
++static inline void
++cpu_relax (void)
++{
++  __asm volatile ("" : : : "memory");
++}
++
++static inline void doacross_spin (unsigned long *addr, unsigned long expected,
++				  unsigned long cur)
++{
++  /* FIXME: back off depending on how large expected - cur is.  */
++  do
++    {
++      cpu_relax ();
++      cur = __atomic_load_n (addr, MEMMODEL_RELAXED);
++      if (expected < cur)
++	return;
++    }
++  while (1);
++}
++
++#ifdef HAVE_ATTRIBUTE_VISIBILITY
++# pragma GCC visibility pop
++#endif
++
++#endif /* GOMP_DOACROSS_H */
+--- libgomp/splay-tree.c.jj	2016-07-13 16:57:18.919355768 +0200
++++ libgomp/splay-tree.c	2016-07-13 16:57:18.919355768 +0200
+@@ -0,0 +1,238 @@
++/* A splay-tree datatype.
++   Copyright (C) 1998-2016 Free Software Foundation, Inc.
++   Contributed by Mark Mitchell (mark@markmitchell.com).
++
++   This file is part of the GNU Offloading and Multi Processing Library
++   (libgomp).
++
++   Libgomp is free software; you can redistribute it and/or modify it
++   under the terms of the GNU General Public License as published by
++   the Free Software Foundation; either version 3, or (at your option)
++   any later version.
++
++   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
++   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
++   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++   more details.
++
++   Under Section 7 of GPL version 3, you are granted additional
++   permissions described in the GCC Runtime Library Exception, version
++   3.1, as published by the Free Software Foundation.
++
++   You should have received a copy of the GNU General Public License and
++   a copy of the GCC Runtime Library Exception along with this program;
++   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
++   <http://www.gnu.org/licenses/>.  */
++
++/* The splay tree code copied from include/splay-tree.h and adjusted,
++   so that all the data lives directly in splay_tree_node_s structure
++   and no extra allocations are needed.  */
++
++/* For an easily readable description of splay-trees, see:
++
++     Lewis, Harry R. and Denenberg, Larry.  Data Structures and Their
++     Algorithms.  Harper-Collins, Inc.  1991.
++
++   The major feature of splay trees is that all basic tree operations
++   are amortized O(log n) time for a tree with n nodes.  */
++
++#include "libgomp.h"
++
++/* Rotate the edge joining the left child N with its parent P.  PP is the
++   grandparents' pointer to P.  */
++
++static inline void
++rotate_left (splay_tree_node *pp, splay_tree_node p, splay_tree_node n)
++{
++  splay_tree_node tmp;
++  tmp = n->right;
++  n->right = p;
++  p->left = tmp;
++  *pp = n;
++}
++
++/* Rotate the edge joining the right child N with its parent P.  PP is the
++   grandparents' pointer to P.  */
++
++static inline void
++rotate_right (splay_tree_node *pp, splay_tree_node p, splay_tree_node n)
++{
++  splay_tree_node tmp;
++  tmp = n->left;
++  n->left = p;
++  p->right = tmp;
++  *pp = n;
++}
++
++/* Bottom up splay of KEY.  */
++
++static void
++splay_tree_splay (splay_tree sp, splay_tree_key key)
++{
++  if (sp->root == NULL)
++    return;
++
++  do {
++    int cmp1, cmp2;
++    splay_tree_node n, c;
++
++    n = sp->root;
++    cmp1 = splay_compare (key, &n->key);
++
++    /* Found.  */
++    if (cmp1 == 0)
++      return;
++
++    /* Left or right?  If no child, then we're done.  */
++    if (cmp1 < 0)
++      c = n->left;
++    else
++      c = n->right;
++    if (!c)
++      return;
++
++    /* Next one left or right?  If found or no child, we're done
++       after one rotation.  */
++    cmp2 = splay_compare (key, &c->key);
++    if (cmp2 == 0
++	|| (cmp2 < 0 && !c->left)
++	|| (cmp2 > 0 && !c->right))
++      {
++	if (cmp1 < 0)
++	  rotate_left (&sp->root, n, c);
++	else
++	  rotate_right (&sp->root, n, c);
++	return;
++      }
++
++    /* Now we have the four cases of double-rotation.  */
++    if (cmp1 < 0 && cmp2 < 0)
++      {
++	rotate_left (&n->left, c, c->left);
++	rotate_left (&sp->root, n, n->left);
++      }
++    else if (cmp1 > 0 && cmp2 > 0)
++      {
++	rotate_right (&n->right, c, c->right);
++	rotate_right (&sp->root, n, n->right);
++      }
++    else if (cmp1 < 0 && cmp2 > 0)
++      {
++	rotate_right (&n->left, c, c->right);
++	rotate_left (&sp->root, n, n->left);
++      }
++    else if (cmp1 > 0 && cmp2 < 0)
++      {
++	rotate_left (&n->right, c, c->left);
++	rotate_right (&sp->root, n, n->right);
++      }
++  } while (1);
++}
++
++/* Insert a new NODE into SP.  The NODE shouldn't exist in the tree.  */
++
++attribute_hidden void
++splay_tree_insert (splay_tree sp, splay_tree_node node)
++{
++  int comparison = 0;
++
++  splay_tree_splay (sp, &node->key);
++
++  if (sp->root)
++    comparison = splay_compare (&sp->root->key, &node->key);
++
++  if (sp->root && comparison == 0)
++    gomp_fatal ("Duplicate node");
++  else
++    {
++      /* Insert it at the root.  */
++      if (sp->root == NULL)
++	node->left = node->right = NULL;
++      else if (comparison < 0)
++	{
++	  node->left = sp->root;
++	  node->right = node->left->right;
++	  node->left->right = NULL;
++	}
++      else
++	{
++	  node->right = sp->root;
++	  node->left = node->right->left;
++	  node->right->left = NULL;
++	}
++
++      sp->root = node;
++    }
++}
++
++/* Remove node with KEY from SP.  It is not an error if it did not exist.  */
++
++attribute_hidden void
++splay_tree_remove (splay_tree sp, splay_tree_key key)
++{
++  splay_tree_splay (sp, key);
++
++  if (sp->root && splay_compare (&sp->root->key, key) == 0)
++    {
++      splay_tree_node left, right;
++
++      left = sp->root->left;
++      right = sp->root->right;
++
++      /* One of the children is now the root.  Doesn't matter much
++	 which, so long as we preserve the properties of the tree.  */
++      if (left)
++	{
++	  sp->root = left;
++
++	  /* If there was a right child as well, hang it off the
++	     right-most leaf of the left child.  */
++	  if (right)
++	    {
++	      while (left->right)
++		left = left->right;
++	      left->right = right;
++	    }
++	}
++      else
++	sp->root = right;
++    }
++}
++
++/* Lookup KEY in SP, returning NODE if present, and NULL
++   otherwise.  */
++
++attribute_hidden splay_tree_key
++splay_tree_lookup (splay_tree sp, splay_tree_key key)
++{
++  splay_tree_splay (sp, key);
++
++  if (sp->root && splay_compare (&sp->root->key, key) == 0)
++    return &sp->root->key;
++  else
++    return NULL;
++}
++
++/* Helper function for splay_tree_foreach.
++
++   Run FUNC on every node in KEY.  */
++
++static void
++splay_tree_foreach_internal (splay_tree_node node, splay_tree_callback func,
++			     void *data)
++{
++  if (!node)
++    return;
++  func (&node->key, data);
++  splay_tree_foreach_internal (node->left, func, data);
++  /* Yeah, whatever.  GCC can fix my tail recursion.  */
++  splay_tree_foreach_internal (node->right, func, data);
++}
++
++/* Run FUNC on each of the nodes in SP.  */
++
++attribute_hidden void
++splay_tree_foreach (splay_tree sp, splay_tree_callback func, void *data)
++{
++  splay_tree_foreach_internal (sp->root, func, data);
++}
+--- libgomp/libgomp-plugin.c.jj	2016-07-13 16:57:04.435535360 +0200
++++ libgomp/libgomp-plugin.c	2016-07-13 16:57:04.435535360 +0200
+@@ -0,0 +1,80 @@
++/* Copyright (C) 2014-2016 Free Software Foundation, Inc.
++
++   Contributed by Mentor Embedded.
++
++   This file is part of the GNU Offloading and Multi Processing Library
++   (libgomp).
++
++   Libgomp is free software; you can redistribute it and/or modify it
++   under the terms of the GNU General Public License as published by
++   the Free Software Foundation; either version 3, or (at your option)
++   any later version.
++
++   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
++   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
++   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++   more details.
++
++   Under Section 7 of GPL version 3, you are granted additional
++   permissions described in the GCC Runtime Library Exception, version
++   3.1, as published by the Free Software Foundation.
++
++   You should have received a copy of the GNU General Public License and
++   a copy of the GCC Runtime Library Exception along with this program;
++   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
++   <http://www.gnu.org/licenses/>.  */
++
++/* Exported (non-hidden) functions exposing libgomp interface for plugins.  */
++
++#include <stdlib.h>
++
++#include "libgomp.h"
++#include "libgomp-plugin.h"
++
++void *
++GOMP_PLUGIN_malloc (size_t size)
++{
++  return gomp_malloc (size);
++}
++
++void *
++GOMP_PLUGIN_malloc_cleared (size_t size)
++{
++  return gomp_malloc_cleared (size);
++}
++
++void *
++GOMP_PLUGIN_realloc (void *ptr, size_t size)
++{
++  return gomp_realloc (ptr, size);
++}
++
++void
++GOMP_PLUGIN_debug (int kind, const char *msg, ...)
++{
++  va_list ap;
++
++  va_start (ap, msg);
++  gomp_vdebug (kind, msg, ap);
++  va_end (ap);
++}
++
++void
++GOMP_PLUGIN_error (const char *msg, ...)
++{
++  va_list ap;
++
++  va_start (ap, msg);
++  gomp_verror (msg, ap);
++  va_end (ap);
++}
++
++void
++GOMP_PLUGIN_fatal (const char *msg, ...)
++{
++  va_list ap;
++
++  va_start (ap, msg);
++  gomp_vfatal (msg, ap);
++  va_end (ap);
++}
+--- libgomp/libgomp-plugin.h.jj	2016-07-13 16:57:04.438535323 +0200
++++ libgomp/libgomp-plugin.h	2016-07-13 16:57:04.438535323 +0200
+@@ -0,0 +1,80 @@
++/* Copyright (C) 2014-2016 Free Software Foundation, Inc.
++
++   Contributed by Mentor Embedded.
++
++   This file is part of the GNU Offloading and Multi Processing Library
++   (libgomp).
++
++   Libgomp is free software; you can redistribute it and/or modify it
++   under the terms of the GNU General Public License as published by
++   the Free Software Foundation; either version 3, or (at your option)
++   any later version.
++
++   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
++   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
++   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++   more details.
++
++   Under Section 7 of GPL version 3, you are granted additional
++   permissions described in the GCC Runtime Library Exception, version
++   3.1, as published by the Free Software Foundation.
++
++   You should have received a copy of the GNU General Public License and
++   a copy of the GCC Runtime Library Exception along with this program;
++   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
++   <http://www.gnu.org/licenses/>.  */
++
++/* An interface to various libgomp-internal functions for use by plugins.  */
++
++#ifndef LIBGOMP_PLUGIN_H
++#define LIBGOMP_PLUGIN_H 1
++
++#include <stddef.h>
++#include <stdint.h>
++
++#ifdef __cplusplus
++extern "C" {
++#endif
++
++/* Capabilities of offloading devices.  */
++#define GOMP_OFFLOAD_CAP_SHARED_MEM	(1 << 0)
++#define GOMP_OFFLOAD_CAP_NATIVE_EXEC	(1 << 1)
++#define GOMP_OFFLOAD_CAP_OPENMP_400	(1 << 2)
++#define GOMP_OFFLOAD_CAP_OPENACC_200	(1 << 3)
++
++/* Type of offload target device.  Keep in sync with include/gomp-constants.h.  */
++enum offload_target_type
++{
++  OFFLOAD_TARGET_TYPE_HOST = 2,
++  /* OFFLOAD_TARGET_TYPE_HOST_NONSHM = 3 removed.  */
++  OFFLOAD_TARGET_TYPE_NVIDIA_PTX = 5,
++  OFFLOAD_TARGET_TYPE_INTEL_MIC = 6,
++  OFFLOAD_TARGET_TYPE_HSA = 7
++};
++
++/* Auxiliary struct, used for transferring pairs of addresses from plugin
++   to libgomp.  */
++struct addr_pair
++{
++  uintptr_t start;
++  uintptr_t end;
++};
++
++/* Miscellaneous functions.  */
++extern void *GOMP_PLUGIN_malloc (size_t) __attribute__ ((malloc));
++extern void *GOMP_PLUGIN_malloc_cleared (size_t) __attribute__ ((malloc));
++extern void *GOMP_PLUGIN_realloc (void *, size_t);
++void GOMP_PLUGIN_target_task_completion (void *);
++
++extern void GOMP_PLUGIN_debug (int, const char *, ...)
++	__attribute__ ((format (printf, 2, 3)));
++extern void GOMP_PLUGIN_error (const char *, ...)
++	__attribute__ ((format (printf, 1, 2)));
++extern void GOMP_PLUGIN_fatal (const char *, ...)
++	__attribute__ ((noreturn, format (printf, 1, 2)));
++
++#ifdef __cplusplus
++}
++#endif
++
++#endif
+--- libgomp/oacc-async.c.jj	2016-07-13 16:57:13.488423109 +0200
++++ libgomp/oacc-async.c	2016-07-13 16:57:13.488423109 +0200
+@@ -0,0 +1,107 @@
++/* OpenACC Runtime Library Definitions.
++
++   Copyright (C) 2013-2016 Free Software Foundation, Inc.
++
++   Contributed by Mentor Embedded.
++
++   This file is part of the GNU Offloading and Multi Processing Library
++   (libgomp).
++
++   Libgomp is free software; you can redistribute it and/or modify it
++   under the terms of the GNU General Public License as published by
++   the Free Software Foundation; either version 3, or (at your option)
++   any later version.
++
++   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
++   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
++   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++   more details.
++
++   Under Section 7 of GPL version 3, you are granted additional
++   permissions described in the GCC Runtime Library Exception, version
++   3.1, as published by the Free Software Foundation.
++
++   You should have received a copy of the GNU General Public License and
++   a copy of the GCC Runtime Library Exception along with this program;
++   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
++   <http://www.gnu.org/licenses/>.  */
++
++#include <assert.h>
++#include "openacc.h"
++#include "libgomp.h"
++#include "oacc-int.h"
++
++int
++acc_async_test (int async)
++{
++  if (async < acc_async_sync)
++    gomp_fatal ("invalid async argument: %d", async);
++
++  struct goacc_thread *thr = goacc_thread ();
++
++  if (!thr || !thr->dev)
++    gomp_fatal ("no device active");
++
++  return thr->dev->openacc.async_test_func (async);
++}
++
++int
++acc_async_test_all (void)
++{
++  struct goacc_thread *thr = goacc_thread ();
++
++  if (!thr || !thr->dev)
++    gomp_fatal ("no device active");
++
++  return thr->dev->openacc.async_test_all_func ();
++}
++
++void
++acc_wait (int async)
++{
++  if (async < acc_async_sync)
++    gomp_fatal ("invalid async argument: %d", async);
++
++  struct goacc_thread *thr = goacc_thread ();
++
++  if (!thr || !thr->dev)
++    gomp_fatal ("no device active");
++
++  thr->dev->openacc.async_wait_func (async);
++}
++
++void
++acc_wait_async (int async1, int async2)
++{
++  struct goacc_thread *thr = goacc_thread ();
++
++  if (!thr || !thr->dev)
++    gomp_fatal ("no device active");
++
++  thr->dev->openacc.async_wait_async_func (async1, async2);
++}
++
++void
++acc_wait_all (void)
++{
++  struct goacc_thread *thr = goacc_thread ();
++
++  if (!thr || !thr->dev)
++    gomp_fatal ("no device active");
++
++  thr->dev->openacc.async_wait_all_func ();
++}
++
++void
++acc_wait_all_async (int async)
++{
++  if (async < acc_async_sync)
++    gomp_fatal ("invalid async argument: %d", async);
++
++  struct goacc_thread *thr = goacc_thread ();
++
++  if (!thr || !thr->dev)
++    gomp_fatal ("no device active");
++
++  thr->dev->openacc.async_wait_all_async_func (async);
++}
+--- libgomp/splay-tree.h.jj	2016-07-13 16:57:18.934355582 +0200
++++ libgomp/splay-tree.h	2016-07-13 16:57:18.934355582 +0200
+@@ -0,0 +1,130 @@
++/* A splay-tree datatype.
++   Copyright (C) 1998-2016 Free Software Foundation, Inc.
++   Contributed by Mark Mitchell (mark@markmitchell.com).
++
++   This file is part of the GNU Offloading and Multi Processing Library
++   (libgomp).
++
++   Libgomp is free software; you can redistribute it and/or modify it
++   under the terms of the GNU General Public License as published by
++   the Free Software Foundation; either version 3, or (at your option)
++   any later version.
++
++   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
++   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
++   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++   more details.
++
++   Under Section 7 of GPL version 3, you are granted additional
++   permissions described in the GCC Runtime Library Exception, version
++   3.1, as published by the Free Software Foundation.
++
++   You should have received a copy of the GNU General Public License and
++   a copy of the GCC Runtime Library Exception along with this program;
++   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
++   <http://www.gnu.org/licenses/>.  */
++
++/* The splay tree code copied from include/splay-tree.h and adjusted,
++   so that all the data lives directly in splay_tree_node_s structure
++   and no extra allocations are needed.
++
++   Files including this header should before including it add:
++typedef struct splay_tree_node_s *splay_tree_node;
++typedef struct splay_tree_s *splay_tree;
++typedef struct splay_tree_key_s *splay_tree_key;
++   define splay_tree_key_s structure, and define
++   splay_compare inline function.
++
++   Alternatively, they can define splay_tree_prefix macro before
++   including this header and then all the above types, the
++   splay_compare function and the splay_tree_{lookup,insert_remove}
++   function will be prefixed by that prefix.  If splay_tree_prefix
++   macro is defined, this header must be included twice: once where
++   you need the header file definitions, and once where you need the
++   .c implementation routines.  In the latter case, you must also
++   define the macro splay_tree_c.  See the include of splay-tree.h in
++   priority_queue.[hc] for an example.  */
++
++/* For an easily readable description of splay-trees, see:
++
++     Lewis, Harry R. and Denenberg, Larry.  Data Structures and Their
++     Algorithms.  Harper-Collins, Inc.  1991.
++
++   The major feature of splay trees is that all basic tree operations
++   are amortized O(log n) time for a tree with n nodes.  */
++
++#ifdef splay_tree_prefix
++# define splay_tree_name_1(prefix, name) prefix ## _ ## name
++# define splay_tree_name(prefix, name) splay_tree_name_1 (prefix, name)
++# define splay_tree_node_s	\
++    splay_tree_name (splay_tree_prefix, splay_tree_node_s)
++# define splay_tree_s		\
++    splay_tree_name (splay_tree_prefix, splay_tree_s)
++# define splay_tree_key_s	\
++    splay_tree_name (splay_tree_prefix, splay_tree_key_s)
++# define splay_tree_node	\
++    splay_tree_name (splay_tree_prefix, splay_tree_node)
++# define splay_tree		\
++    splay_tree_name (splay_tree_prefix, splay_tree)
++# define splay_tree_key		\
++    splay_tree_name (splay_tree_prefix, splay_tree_key)
++# define splay_compare		\
++    splay_tree_name (splay_tree_prefix, splay_compare)
++# define splay_tree_lookup	\
++    splay_tree_name (splay_tree_prefix, splay_tree_lookup)
++# define splay_tree_insert	\
++    splay_tree_name (splay_tree_prefix, splay_tree_insert)
++# define splay_tree_remove	\
++    splay_tree_name (splay_tree_prefix, splay_tree_remove)
++# define splay_tree_foreach	\
++    splay_tree_name (splay_tree_prefix, splay_tree_foreach)
++# define splay_tree_callback	\
++    splay_tree_name (splay_tree_prefix, splay_tree_callback)
++#endif
++
++#ifndef splay_tree_c
++/* Header file definitions and prototypes.  */
++
++/* The nodes in the splay tree.  */
++struct splay_tree_node_s {
++  struct splay_tree_key_s key;
++  /* The left and right children, respectively.  */
++  splay_tree_node left;
++  splay_tree_node right;
++};
++
++/* The splay tree.  */
++struct splay_tree_s {
++  splay_tree_node root;
++};
++
++typedef void (*splay_tree_callback) (splay_tree_key, void *);
++
++extern splay_tree_key splay_tree_lookup (splay_tree, splay_tree_key);
++extern void splay_tree_insert (splay_tree, splay_tree_node);
++extern void splay_tree_remove (splay_tree, splay_tree_key);
++extern void splay_tree_foreach (splay_tree, splay_tree_callback, void *);
++#else  /* splay_tree_c */
++#  ifdef splay_tree_prefix
++#    include "splay-tree.c"
++#    undef splay_tree_name_1
++#    undef splay_tree_name
++#    undef splay_tree_node_s
++#    undef splay_tree_s
++#    undef splay_tree_key_s
++#    undef splay_tree_node
++#    undef splay_tree
++#    undef splay_tree_key
++#    undef splay_compare
++#    undef splay_tree_lookup
++#    undef splay_tree_insert
++#    undef splay_tree_remove
++#    undef splay_tree_foreach
++#    undef splay_tree_callback
++#    undef splay_tree_c
++#  endif
++#endif /* #ifndef splay_tree_c */
++
++#ifdef splay_tree_prefix
++#  undef splay_tree_prefix
++#endif
+--- libgomp/oacc-plugin.c.jj	2016-07-13 16:57:13.481423196 +0200
++++ libgomp/oacc-plugin.c	2016-07-14 15:40:21.653151873 +0200
+@@ -0,0 +1,44 @@
++/* Copyright (C) 2014-2016 Free Software Foundation, Inc.
++
++   Contributed by Mentor Embedded.
++
++   This file is part of the GNU Offloading and Multi Processing Library
++   (libgomp).
++
++   Libgomp is free software; you can redistribute it and/or modify it
++   under the terms of the GNU General Public License as published by
++   the Free Software Foundation; either version 3, or (at your option)
++   any later version.
++
++   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
++   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
++   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++   more details.
++
++   Under Section 7 of GPL version 3, you are granted additional
++   permissions described in the GCC Runtime Library Exception, version
++   3.1, as published by the Free Software Foundation.
++
++   You should have received a copy of the GNU General Public License and
++   a copy of the GCC Runtime Library Exception along with this program;
++   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
++   <http://www.gnu.org/licenses/>.  */
++
++/* Initialize and register OpenACC dispatch table from libgomp plugin.  */
++
++#include "libgomp.h"
++#include "oacc-plugin.h"
++#include "oacc-int.h"
++
++void
++GOMP_PLUGIN_async_unmap_vars (void *ptr, int async)
++{
++}
++
++/* Return the target-specific part of the TLS data for the current thread.  */
++
++void *
++GOMP_PLUGIN_acc_thread (void)
++{
++  return NULL;
++}
+--- libgomp/oacc-init.c.jj	2016-07-13 16:57:04.423535509 +0200
++++ libgomp/oacc-init.c	2016-07-14 19:06:41.679575688 +0200
+@@ -0,0 +1,640 @@
++/* OpenACC Runtime initialization routines
++
++   Copyright (C) 2013-2016 Free Software Foundation, Inc.
++
++   Contributed by Mentor Embedded.
++
++   This file is part of the GNU Offloading and Multi Processing Library
++   (libgomp).
++
++   Libgomp is free software; you can redistribute it and/or modify it
++   under the terms of the GNU General Public License as published by
++   the Free Software Foundation; either version 3, or (at your option)
++   any later version.
++
++   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
++   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
++   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++   more details.
++
++   Under Section 7 of GPL version 3, you are granted additional
++   permissions described in the GCC Runtime Library Exception, version
++   3.1, as published by the Free Software Foundation.
++
++   You should have received a copy of the GNU General Public License and
++   a copy of the GCC Runtime Library Exception along with this program;
++   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
++   <http://www.gnu.org/licenses/>.  */
++
++#include "libgomp.h"
++#include "oacc-int.h"
++#include "openacc.h"
++#include <assert.h>
++#include <stdlib.h>
++#include <strings.h>
++#include <stdbool.h>
++#include <string.h>
++
++/* This lock is used to protect access to cached_base_dev, dispatchers and
++   the (abstract) initialisation state of attached offloading devices.  */
++
++static gomp_mutex_t acc_device_lock;
++
++/* A cached version of the dispatcher for the global "current" accelerator type,
++   e.g. used as the default when creating new host threads.  This is the
++   device-type equivalent of goacc_device_num (which specifies which device to
++   use out of potentially several of the same type).  If there are several
++   devices of a given type, this points at the first one.  */
++
++static struct gomp_device_descr *cached_base_dev = NULL;
++
++#if defined HAVE_TLS || defined USE_EMUTLS
++__thread struct goacc_thread *goacc_tls_data;
++#else
++pthread_key_t goacc_tls_key;
++#endif
++static pthread_key_t goacc_cleanup_key;
++
++static struct goacc_thread *goacc_threads;
++static gomp_mutex_t goacc_thread_lock;
++
++/* An array of dispatchers for device types, indexed by the type.  This array
++   only references "base" devices, and other instances of the same type are
++   found by simply indexing from each such device (which are stored linearly,
++   grouped by device in target.c:devices).  */
++static struct gomp_device_descr *dispatchers[_ACC_device_hwm] = { 0 };
++
++attribute_hidden void
++goacc_register (struct gomp_device_descr *disp)
++{
++  /* Only register the 0th device here.  */
++  if (disp->target_id != 0)
++    return;
++
++  gomp_mutex_lock (&acc_device_lock);
++
++  assert (acc_device_type (disp->type) != acc_device_none
++	  && acc_device_type (disp->type) != acc_device_default
++	  && acc_device_type (disp->type) != acc_device_not_host);
++  assert (!dispatchers[disp->type]);
++  dispatchers[disp->type] = disp;
++
++  gomp_mutex_unlock (&acc_device_lock);
++}
++
++static const char *
++name_of_acc_device_t (enum acc_device_t type)
++{
++  switch (type)
++    {
++    case acc_device_none: return "none";
++    case acc_device_default: return "default";
++    case acc_device_host: return "host";
++    case acc_device_not_host: return "not_host";
++    case acc_device_nvidia: return "nvidia";
++    default: gomp_fatal ("unknown device type %u", (unsigned) type);
++    }
++}
++
++/* ACC_DEVICE_LOCK must be held before calling this function.  If FAIL_IS_ERROR
++   is true, this function raises an error if there are no devices of type D,
++   otherwise it returns NULL in that case.  */
++
++static struct gomp_device_descr *
++resolve_device (acc_device_t d, bool fail_is_error)
++{
++  acc_device_t d_arg = d;
++
++  switch (d)
++    {
++    case acc_device_default:
++      {
++	if (goacc_device_type)
++	  {
++	    /* Lookup the named device.  */
++	    if (!strcasecmp (goacc_device_type, "host"))
++	      {
++		d = acc_device_host;
++		goto found;
++	      }
++
++	    if (fail_is_error)
++	      {
++		gomp_mutex_unlock (&acc_device_lock);
++		gomp_fatal ("device type %s not supported", goacc_device_type);
++	      }
++	    else
++	      return NULL;
++	  }
++
++	/* No default device specified, so start scanning for any non-host
++	   device that is available.  */
++	d = acc_device_not_host;
++      }
++      /* FALLTHROUGH */
++
++    case acc_device_not_host:
++      if (d_arg == acc_device_default)
++	{
++	  d = acc_device_host;
++	  goto found;
++	}
++      if (fail_is_error)
++        {
++	  gomp_mutex_unlock (&acc_device_lock);
++	  gomp_fatal ("no device found");
++	}
++      else
++        return NULL;
++      break;
++
++    case acc_device_host:
++      break;
++
++    default:
++      if (d > _ACC_device_hwm)
++	{
++	  if (fail_is_error)
++	    goto unsupported_device;
++	  else
++	    return NULL;
++	}
++      break;
++    }
++ found:
++
++  assert (d != acc_device_none
++	  && d != acc_device_default
++	  && d != acc_device_not_host);
++
++  if (dispatchers[d] == NULL && fail_is_error)
++    {
++    unsupported_device:
++      gomp_mutex_unlock (&acc_device_lock);
++      gomp_fatal ("device type %s not supported", name_of_acc_device_t (d));
++    }
++
++  return dispatchers[d];
++}
++
++/* Emit a suitable error if no device of a particular type is available, or
++   the given device number is out-of-range.  */
++static void
++acc_dev_num_out_of_range (acc_device_t d, int ord, int ndevs)
++{
++  if (ndevs == 0)
++    gomp_fatal ("no devices of type %s available", name_of_acc_device_t (d));
++  else
++    gomp_fatal ("device %u out of range", ord);
++}
++
++/* This is called when plugins have been initialized, and serves to call
++   (indirectly) the target's device_init hook.  Calling multiple times without
++   an intervening acc_shutdown_1 call is an error.  ACC_DEVICE_LOCK must be
++   held before calling this function.  */
++
++static struct gomp_device_descr *
++acc_init_1 (acc_device_t d)
++{
++  struct gomp_device_descr *base_dev, *acc_dev;
++  int ndevs;
++
++  base_dev = resolve_device (d, true);
++
++  ndevs = base_dev->get_num_devices_func ();
++
++  if (ndevs <= 0 || goacc_device_num >= ndevs)
++    acc_dev_num_out_of_range (d, goacc_device_num, ndevs);
++
++  acc_dev = &base_dev[goacc_device_num];
++
++  gomp_mutex_lock (&acc_dev->lock);
++  if (acc_dev->state == GOMP_DEVICE_INITIALIZED)
++    {
++      gomp_mutex_unlock (&acc_dev->lock);
++      gomp_fatal ("device already active");
++    }
++
++  gomp_init_device (acc_dev);
++  gomp_mutex_unlock (&acc_dev->lock);
++
++  return base_dev;
++}
++
++/* ACC_DEVICE_LOCK must be held before calling this function.  */
++
++static void
++acc_shutdown_1 (acc_device_t d)
++{
++  struct gomp_device_descr *base_dev;
++  struct goacc_thread *walk;
++  int ndevs, i;
++  bool devices_active = false;
++
++  /* Get the base device for this device type.  */
++  base_dev = resolve_device (d, true);
++
++  ndevs = base_dev->get_num_devices_func ();
++
++  gomp_mutex_lock (&goacc_thread_lock);
++
++  /* Free target-specific TLS data and close all devices.  */
++  for (walk = goacc_threads; walk != NULL; walk = walk->next)
++    {
++      if (walk->target_tls)
++	base_dev->openacc.destroy_thread_data_func (walk->target_tls);
++
++      walk->target_tls = NULL;
++
++      /* Similarly, if this happens then user code has done something weird.  */
++      if (walk->saved_bound_dev)
++	{
++	  gomp_mutex_unlock (&goacc_thread_lock);
++	  gomp_fatal ("shutdown during host fallback");
++	}
++
++      if (walk->dev)
++	{
++	  gomp_mutex_lock (&walk->dev->lock);
++	  gomp_free_memmap (&walk->dev->mem_map);
++	  gomp_mutex_unlock (&walk->dev->lock);
++
++	  walk->dev = NULL;
++	  walk->base_dev = NULL;
++	}
++    }
++
++  gomp_mutex_unlock (&goacc_thread_lock);
++
++  /* Close all the devices of this type that have been opened.  */
++  bool ret = true;
++  for (i = 0; i < ndevs; i++)
++    {
++      struct gomp_device_descr *acc_dev = &base_dev[i];
++      gomp_mutex_lock (&acc_dev->lock);
++      if (acc_dev->state == GOMP_DEVICE_INITIALIZED)
++        {
++	  devices_active = true;
++	  ret &= acc_dev->fini_device_func (acc_dev->target_id);
++	  acc_dev->state = GOMP_DEVICE_UNINITIALIZED;
++	}
++      gomp_mutex_unlock (&acc_dev->lock);
++    }
++
++  if (!ret)
++    gomp_fatal ("device finalization failed");
++
++  if (!devices_active)
++    gomp_fatal ("no device initialized");
++}
++
++static struct goacc_thread *
++goacc_new_thread (void)
++{
++  struct goacc_thread *thr = gomp_malloc (sizeof (struct gomp_thread));
++
++#if defined HAVE_TLS || defined USE_EMUTLS
++  goacc_tls_data = thr;
++#else
++  pthread_setspecific (goacc_tls_key, thr);
++#endif
++
++  pthread_setspecific (goacc_cleanup_key, thr);
++
++  gomp_mutex_lock (&goacc_thread_lock);
++  thr->next = goacc_threads;
++  goacc_threads = thr;
++  gomp_mutex_unlock (&goacc_thread_lock);
++
++  return thr;
++}
++
++static void
++goacc_destroy_thread (void *data)
++{
++  struct goacc_thread *thr = data, *walk, *prev;
++
++  gomp_mutex_lock (&goacc_thread_lock);
++
++  if (thr)
++    {
++      struct gomp_device_descr *acc_dev = thr->dev;
++
++      if (acc_dev && thr->target_tls)
++	{
++	  acc_dev->openacc.destroy_thread_data_func (thr->target_tls);
++	  thr->target_tls = NULL;
++	}
++
++      assert (!thr->mapped_data);
++
++      /* Remove from thread list.  */
++      for (prev = NULL, walk = goacc_threads; walk;
++	   prev = walk, walk = walk->next)
++	if (walk == thr)
++	  {
++	    if (prev == NULL)
++	      goacc_threads = walk->next;
++	    else
++	      prev->next = walk->next;
++
++	    free (thr);
++
++	    break;
++	  }
++
++      assert (walk);
++    }
++
++  gomp_mutex_unlock (&goacc_thread_lock);
++}
++
++/* Use the ORD'th device instance for the current host thread (or -1 for the
++   current global default).  The device (and the runtime) must be initialised
++   before calling this function.  */
++
++void
++goacc_attach_host_thread_to_device (int ord)
++{
++  struct goacc_thread *thr = goacc_thread ();
++  struct gomp_device_descr *acc_dev = NULL, *base_dev = NULL;
++  int num_devices;
++  
++  if (thr && thr->dev && (thr->dev->target_id == ord || ord < 0))
++    return;
++  
++  if (ord < 0)
++    ord = goacc_device_num;
++  
++  /* Decide which type of device to use.  If the current thread has a device
++     type already (e.g. set by acc_set_device_type), use that, else use the
++     global default.  */
++  if (thr && thr->base_dev)
++    base_dev = thr->base_dev;
++  else
++    {
++      assert (cached_base_dev);
++      base_dev = cached_base_dev;
++    }
++  
++  num_devices = base_dev->get_num_devices_func ();
++  if (num_devices <= 0 || ord >= num_devices)
++    acc_dev_num_out_of_range (acc_device_type (base_dev->type), ord,
++			      num_devices);
++  
++  if (!thr)
++    thr = goacc_new_thread ();
++  
++  thr->base_dev = base_dev;
++  thr->dev = acc_dev = &base_dev[ord];
++  thr->saved_bound_dev = NULL;
++  
++  thr->target_tls
++    = acc_dev->openacc.create_thread_data_func (ord);
++  
++  acc_dev->openacc.async_set_async_func (acc_async_sync);
++}
++
++/* OpenACC 2.0a (3.2.12, 3.2.13) doesn't specify whether the serialization of
++   init/shutdown is per-process or per-thread.  We choose per-process.  */
++
++void
++acc_init (acc_device_t d)
++{
++  gomp_mutex_lock (&acc_device_lock);
++
++  cached_base_dev = acc_init_1 (d);
++
++  gomp_mutex_unlock (&acc_device_lock);
++  
++  goacc_attach_host_thread_to_device (-1);
++}
++
++ialias (acc_init)
++
++void
++acc_shutdown (acc_device_t d)
++{
++  gomp_mutex_lock (&acc_device_lock);
++
++  acc_shutdown_1 (d);
++
++  gomp_mutex_unlock (&acc_device_lock);
++}
++
++ialias (acc_shutdown)
++
++int
++acc_get_num_devices (acc_device_t d)
++{
++  int n = 0;
++  struct gomp_device_descr *acc_dev;
++
++  if (d == acc_device_none)
++    return 0;
++
++  gomp_mutex_lock (&acc_device_lock);
++  acc_dev = resolve_device (d, false);
++  gomp_mutex_unlock (&acc_device_lock);
++
++  if (!acc_dev)
++    return 0;
++
++  n = acc_dev->get_num_devices_func ();
++  if (n < 0)
++    n = 0;
++
++  return n;
++}
++
++ialias (acc_get_num_devices)
++
++/* Set the device type for the current thread only (using the current global
++   default device number), initialising that device if necessary.  Also set the
++   default device type for new threads to D.  */
++
++void
++acc_set_device_type (acc_device_t d)
++{
++  struct gomp_device_descr *base_dev, *acc_dev;
++  struct goacc_thread *thr = goacc_thread ();
++
++  gomp_mutex_lock (&acc_device_lock);
++
++  cached_base_dev = base_dev = resolve_device (d, true);
++  acc_dev = &base_dev[goacc_device_num];
++
++  gomp_mutex_lock (&acc_dev->lock);
++  if (acc_dev->state == GOMP_DEVICE_UNINITIALIZED)
++    gomp_init_device (acc_dev);
++  gomp_mutex_unlock (&acc_dev->lock);
++
++  gomp_mutex_unlock (&acc_device_lock);
++
++  /* We're changing device type: invalidate the current thread's dev and
++     base_dev pointers.  */
++  if (thr && thr->base_dev != base_dev)
++    {
++      thr->base_dev = thr->dev = NULL;
++    }
++
++  goacc_attach_host_thread_to_device (-1);
++}
++
++ialias (acc_set_device_type)
++
++acc_device_t
++acc_get_device_type (void)
++{
++  acc_device_t res = acc_device_none;
++  struct gomp_device_descr *dev;
++  struct goacc_thread *thr = goacc_thread ();
++
++  if (thr && thr->base_dev)
++    res = acc_device_type (thr->base_dev->type);
++  else
++    {
++      gomp_mutex_lock (&acc_device_lock);
++      dev = resolve_device (acc_device_default, true);
++      gomp_mutex_unlock (&acc_device_lock);
++      res = acc_device_type (dev->type);
++    }
++
++  assert (res != acc_device_default
++	  && res != acc_device_not_host);
++
++  return res;
++}
++
++ialias (acc_get_device_type)
++
++int
++acc_get_device_num (acc_device_t d)
++{
++  const struct gomp_device_descr *dev;
++  struct goacc_thread *thr = goacc_thread ();
++
++  if (d >= _ACC_device_hwm)
++    gomp_fatal ("unknown device type %u", (unsigned) d);
++
++  gomp_mutex_lock (&acc_device_lock);
++  dev = resolve_device (d, true);
++  gomp_mutex_unlock (&acc_device_lock);
++
++  if (thr && thr->base_dev == dev && thr->dev)
++    return thr->dev->target_id;
++
++  return goacc_device_num;
++}
++
++ialias (acc_get_device_num)
++
++void
++acc_set_device_num (int ord, acc_device_t d)
++{
++  struct gomp_device_descr *base_dev, *acc_dev;
++  int num_devices;
++
++  if (ord < 0)
++    ord = goacc_device_num;
++
++  if ((int) d == 0)
++    /* Set whatever device is being used by the current host thread to use
++       device instance ORD.  It's unclear if this is supposed to affect other
++       host threads too (OpenACC 2.0 (3.2.4) acc_set_device_num).  */
++    goacc_attach_host_thread_to_device (ord);
++  else
++    {
++      gomp_mutex_lock (&acc_device_lock);
++
++      cached_base_dev = base_dev = resolve_device (d, true);
++
++      num_devices = base_dev->get_num_devices_func ();
++
++      if (num_devices <= 0 || ord >= num_devices)
++        acc_dev_num_out_of_range (d, ord, num_devices);
++
++      acc_dev = &base_dev[ord];
++
++      gomp_mutex_lock (&acc_dev->lock);
++      if (acc_dev->state == GOMP_DEVICE_UNINITIALIZED)
++        gomp_init_device (acc_dev);
++      gomp_mutex_unlock (&acc_dev->lock);
++
++      gomp_mutex_unlock (&acc_device_lock);
++
++      goacc_attach_host_thread_to_device (ord);
++    }
++  
++  goacc_device_num = ord;
++}
++
++ialias (acc_set_device_num)
++
++int
++acc_on_device (acc_device_t dev)
++{
++  return dev == acc_device_host || dev == acc_device_none;
++}
++
++ialias (acc_on_device)
++
++attribute_hidden void
++goacc_runtime_initialize (void)
++{
++  gomp_mutex_init (&acc_device_lock);
++
++#if !(defined HAVE_TLS || defined USE_EMUTLS)
++  pthread_key_create (&goacc_tls_key, NULL);
++#endif
++
++  pthread_key_create (&goacc_cleanup_key, goacc_destroy_thread);
++
++  cached_base_dev = NULL;
++
++  goacc_threads = NULL;
++  gomp_mutex_init (&goacc_thread_lock);
++
++  /* Initialize and register the 'host' device type.  */
++  goacc_host_init ();
++}
++
++/* Compiler helper functions */
++
++attribute_hidden void
++goacc_save_and_set_bind (acc_device_t d)
++{
++  struct goacc_thread *thr = goacc_thread ();
++
++  assert (!thr->saved_bound_dev);
++
++  thr->saved_bound_dev = thr->dev;
++  thr->dev = dispatchers[d];
++}
++
++attribute_hidden void
++goacc_restore_bind (void)
++{
++  struct goacc_thread *thr = goacc_thread ();
++
++  thr->dev = thr->saved_bound_dev;
++  thr->saved_bound_dev = NULL;
++}
++
++/* This is called from any OpenACC support function that may need to implicitly
++   initialize the libgomp runtime, either globally or from a new host thread. 
++   On exit "goacc_thread" will return a valid & populated thread block.  */
++
++attribute_hidden void
++goacc_lazy_initialize (void)
++{
++  struct goacc_thread *thr = goacc_thread ();
++
++  if (thr && thr->dev)
++    return;
++
++  if (!cached_base_dev)
++    acc_init (acc_device_default);
++  else
++    goacc_attach_host_thread_to_device (-1);
++}
+--- libgomp/oacc-int.h.jj	2016-07-13 16:57:04.400535794 +0200
++++ libgomp/oacc-int.h	2016-07-13 16:57:04.400535794 +0200
+@@ -0,0 +1,106 @@
++/* OpenACC Runtime - internal declarations
++
++   Copyright (C) 2013-2016 Free Software Foundation, Inc.
++
++   Contributed by Mentor Embedded.
++
++   This file is part of the GNU Offloading and Multi Processing Library
++   (libgomp).
++
++   Libgomp is free software; you can redistribute it and/or modify it
++   under the terms of the GNU General Public License as published by
++   the Free Software Foundation; either version 3, or (at your option)
++   any later version.
++
++   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
++   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
++   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++   more details.
++
++   Under Section 7 of GPL version 3, you are granted additional
++   permissions described in the GCC Runtime Library Exception, version
++   3.1, as published by the Free Software Foundation.
++
++   You should have received a copy of the GNU General Public License and
++   a copy of the GCC Runtime Library Exception along with this program;
++   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
++   <http://www.gnu.org/licenses/>.  */
++
++/* This file contains data types and function declarations that are not
++   part of the official OpenACC user interface.  There are declarations
++   in here that are part of the GNU OpenACC ABI, in that the compiler is
++   required to know about them and use them.
++
++   The convention is that the all caps prefix "GOACC" is used group items
++   that are part of the external ABI, and the lower case prefix "goacc"
++   is used group items that are completely private to the library.  */
++
++#ifndef OACC_INT_H
++#define OACC_INT_H 1
++
++#include "openacc.h"
++#include "config.h"
++#include <stddef.h>
++#include <stdbool.h>
++#include <stdarg.h>
++
++#ifdef HAVE_ATTRIBUTE_VISIBILITY
++# pragma GCC visibility push(hidden)
++#endif
++
++static inline enum acc_device_t
++acc_device_type (enum offload_target_type type)
++{
++  return (enum acc_device_t) type;
++}
++
++struct goacc_thread
++{
++  /* The base device for the current thread.  */
++  struct gomp_device_descr *base_dev;
++
++  /* The device for the current thread.  */
++  struct gomp_device_descr *dev;
++
++  struct gomp_device_descr *saved_bound_dev;
++
++  /* This is a linked list of data mapped by the "acc data" pragma, following
++     strictly push/pop semantics according to lexical scope.  */
++  struct target_mem_desc *mapped_data;
++
++  /* These structures form a list: this is the next thread in that list.  */
++  struct goacc_thread *next;
++
++  /* Target-specific data (used by plugin).  */
++  void *target_tls;
++};
++
++#if defined HAVE_TLS || defined USE_EMUTLS
++extern __thread struct goacc_thread *goacc_tls_data;
++static inline struct goacc_thread *
++goacc_thread (void)
++{
++  return goacc_tls_data;
++}
++#else
++extern pthread_key_t goacc_tls_key;
++static inline struct goacc_thread *
++goacc_thread (void)
++{
++  return pthread_getspecific (goacc_tls_key);
++}
++#endif
++
++void goacc_register (struct gomp_device_descr *) __GOACC_NOTHROW;
++void goacc_attach_host_thread_to_device (int);
++void goacc_runtime_initialize (void);
++void goacc_save_and_set_bind (acc_device_t);
++void goacc_restore_bind (void);
++void goacc_lazy_initialize (void);
++void goacc_host_init (void);
++
++#ifdef HAVE_ATTRIBUTE_VISIBILITY
++# pragma GCC visibility pop
++#endif
++
++#endif
+--- libgomp/oacc-host.c.jj	2016-07-13 16:57:13.489423096 +0200
++++ libgomp/oacc-host.c	2016-07-13 16:57:13.489423096 +0200
+@@ -0,0 +1,266 @@
++/* OpenACC Runtime Library: acc_device_host.
++
++   Copyright (C) 2013-2016 Free Software Foundation, Inc.
++
++   Contributed by Mentor Embedded.
++
++   This file is part of the GNU Offloading and Multi Processing Library
++   (libgomp).
++
++   Libgomp is free software; you can redistribute it and/or modify it
++   under the terms of the GNU General Public License as published by
++   the Free Software Foundation; either version 3, or (at your option)
++   any later version.
++
++   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
++   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
++   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++   more details.
++
++   Under Section 7 of GPL version 3, you are granted additional
++   permissions described in the GCC Runtime Library Exception, version
++   3.1, as published by the Free Software Foundation.
++
++   You should have received a copy of the GNU General Public License and
++   a copy of the GCC Runtime Library Exception along with this program;
++   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
++   <http://www.gnu.org/licenses/>.  */
++
++#include "libgomp.h"
++#include "oacc-int.h"
++#include "gomp-constants.h"
++
++#include <stdbool.h>
++#include <stddef.h>
++#include <stdint.h>
++
++static struct gomp_device_descr host_dispatch;
++
++static const char *
++host_get_name (void)
++{
++  return host_dispatch.name;
++}
++
++static unsigned int
++host_get_caps (void)
++{
++  return host_dispatch.capabilities;
++}
++
++static int
++host_get_type (void)
++{
++  return host_dispatch.type;
++}
++
++static int
++host_get_num_devices (void)
++{
++  return 1;
++}
++
++static bool
++host_init_device (int n __attribute__ ((unused)))
++{
++  return true;
++}
++
++static bool
++host_fini_device (int n __attribute__ ((unused)))
++{
++  return true;
++}
++
++static unsigned
++host_version (void)
++{
++  return GOMP_VERSION;
++}
++
++static int
++host_load_image (int n __attribute__ ((unused)),
++		 unsigned v __attribute__ ((unused)),
++		 const void *t __attribute__ ((unused)),
++		 struct addr_pair **r __attribute__ ((unused)))
++{
++  return 0;
++}
++
++static bool
++host_unload_image (int n __attribute__ ((unused)),
++		   unsigned v __attribute__ ((unused)),
++		   const void *t __attribute__ ((unused)))
++{
++  return true;
++}
++
++static void *
++host_alloc (int n __attribute__ ((unused)), size_t s)
++{
++  return gomp_malloc (s);
++}
++
++static bool
++host_free (int n __attribute__ ((unused)), void *p)
++{
++  free (p);
++  return true;
++}
++
++static bool
++host_dev2host (int n __attribute__ ((unused)),
++	       void *h __attribute__ ((unused)),
++	       const void *d __attribute__ ((unused)),
++	       size_t s __attribute__ ((unused)))
++{
++  return true;
++}
++
++static bool
++host_host2dev (int n __attribute__ ((unused)),
++	       void *d __attribute__ ((unused)),
++	       const void *h __attribute__ ((unused)),
++	       size_t s __attribute__ ((unused)))
++{
++  return true;
++}
++
++static void
++host_run (int n __attribute__ ((unused)), void *fn_ptr, void *vars,
++	  void **args __attribute__((unused)))
++{
++  void (*fn)(void *) = (void (*)(void *)) fn_ptr;
++
++  fn (vars);
++}
++
++static void
++host_openacc_exec (void (*fn) (void *),
++		   size_t mapnum __attribute__ ((unused)),
++		   void **hostaddrs,
++		   void **devaddrs __attribute__ ((unused)),
++		   int async __attribute__ ((unused)),
++		   unsigned *dims __attribute ((unused)),
++		   void *targ_mem_desc __attribute__ ((unused)))
++{
++  fn (hostaddrs);
++}
++
++static void
++host_openacc_register_async_cleanup (void *targ_mem_desc __attribute__ ((unused)),
++				     int async __attribute__ ((unused)))
++{
++}
++
++static int
++host_openacc_async_test (int async __attribute__ ((unused)))
++{
++  return 1;
++}
++
++static int
++host_openacc_async_test_all (void)
++{
++  return 1;
++}
++
++static void
++host_openacc_async_wait (int async __attribute__ ((unused)))
++{
++}
++
++static void
++host_openacc_async_wait_async (int async1 __attribute__ ((unused)),
++			       int async2 __attribute__ ((unused)))
++{
++}
++
++static void
++host_openacc_async_wait_all (void)
++{
++}
++
++static void
++host_openacc_async_wait_all_async (int async __attribute__ ((unused)))
++{
++}
++
++static void
++host_openacc_async_set_async (int async __attribute__ ((unused)))
++{
++}
++
++static void *
++host_openacc_create_thread_data (int ord __attribute__ ((unused)))
++{
++  return NULL;
++}
++
++static void
++host_openacc_destroy_thread_data (void *tls_data __attribute__ ((unused)))
++{
++}
++
++static struct gomp_device_descr host_dispatch =
++  {
++    .name = "host",
++    .capabilities = (GOMP_OFFLOAD_CAP_SHARED_MEM
++		     | GOMP_OFFLOAD_CAP_NATIVE_EXEC
++		     | GOMP_OFFLOAD_CAP_OPENACC_200),
++    .target_id = 0,
++    .type = OFFLOAD_TARGET_TYPE_HOST,
++
++    .get_name_func = host_get_name,
++    .get_caps_func = host_get_caps,
++    .get_type_func = host_get_type,
++    .get_num_devices_func = host_get_num_devices,
++    .init_device_func = host_init_device,
++    .fini_device_func = host_fini_device,
++    .version_func = host_version,
++    .load_image_func = host_load_image,
++    .unload_image_func = host_unload_image,
++    .alloc_func = host_alloc,
++    .free_func = host_free,
++    .dev2host_func = host_dev2host,
++    .host2dev_func = host_host2dev,
++    .run_func = host_run,
++
++    .mem_map = { NULL },
++    /* .lock initilized in goacc_host_init.  */
++    .state = GOMP_DEVICE_UNINITIALIZED,
++
++    .openacc = {
++      .data_environ = NULL,
++
++      .exec_func = host_openacc_exec,
++
++      .register_async_cleanup_func = host_openacc_register_async_cleanup,
++
++      .async_test_func = host_openacc_async_test,
++      .async_test_all_func = host_openacc_async_test_all,
++      .async_wait_func = host_openacc_async_wait,
++      .async_wait_async_func = host_openacc_async_wait_async,
++      .async_wait_all_func = host_openacc_async_wait_all,
++      .async_wait_all_async_func = host_openacc_async_wait_all_async,
++      .async_set_async_func = host_openacc_async_set_async,
++
++      .create_thread_data_func = host_openacc_create_thread_data,
++      .destroy_thread_data_func = host_openacc_destroy_thread_data,
++
++      .cuda = {
++	.get_current_device_func = NULL,
++	.get_current_context_func = NULL,
++	.get_stream_func = NULL,
++	.set_stream_func = NULL,
++      }
++    }
++  };
++
++/* Initialize and register this device type.  */
++void
++goacc_host_init (void)
++{
++  gomp_mutex_init (&host_dispatch.lock);
++  goacc_register (&host_dispatch);
++}
+--- libgomp/oacc-parallel.c.jj	2016-07-13 16:57:04.399535807 +0200
++++ libgomp/oacc-parallel.c	2016-07-14 18:53:06.694996381 +0200
+@@ -0,0 +1,241 @@
++/* Copyright (C) 2013-2016 Free Software Foundation, Inc.
++
++   Contributed by Mentor Embedded.
++
++   This file is part of the GNU Offloading and Multi Processing Library
++   (libgomp).
++
++   Libgomp is free software; you can redistribute it and/or modify it
++   under the terms of the GNU General Public License as published by
++   the Free Software Foundation; either version 3, or (at your option)
++   any later version.
++
++   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
++   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
++   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++   more details.
++
++   Under Section 7 of GPL version 3, you are granted additional
++   permissions described in the GCC Runtime Library Exception, version
++   3.1, as published by the Free Software Foundation.
++
++   You should have received a copy of the GNU General Public License and
++   a copy of the GCC Runtime Library Exception along with this program;
++   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
++   <http://www.gnu.org/licenses/>.  */
++
++/* This file handles OpenACC constructs.  */
++
++#include "openacc.h"
++#include "libgomp.h"
++#include "libgomp_g.h"
++#include "gomp-constants.h"
++#include "oacc-int.h"
++#ifdef HAVE_INTTYPES_H
++# include <inttypes.h>  /* For PRIu64.  */
++#endif
++#include <string.h>
++#include <stdarg.h>
++#include <assert.h>
++
++static void goacc_wait (int async, int num_waits, va_list *ap);
++
++
++/* Launch a possibly offloaded function on DEVICE.  FN is the host fn
++   address.  MAPNUM, HOSTADDRS, SIZES & KINDS  describe the memory
++   blocks to be copied to/from the device.  Varadic arguments are
++   keyed optional parameters terminated with a zero.  */
++
++void
++GOACC_parallel_keyed (int device, void (*fn) (void *),
++		      size_t mapnum, void **hostaddrs, size_t *sizes,
++		      unsigned short *kinds, ...)
++{
++  bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
++  struct goacc_thread *thr;
++  struct gomp_device_descr *acc_dev;
++
++#ifdef HAVE_INTTYPES_H
++  gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n",
++	      __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds);
++#else
++  gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n",
++	      __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds);
++#endif
++  goacc_lazy_initialize ();
++
++  thr = goacc_thread ();
++  acc_dev = thr->dev;
++
++  /* Host fallback if "if" clause is false or if the current device is set to
++     the host.  */
++  if (host_fallback)
++    {
++      goacc_save_and_set_bind (acc_device_host);
++      fn (hostaddrs);
++      goacc_restore_bind ();
++      return;
++    }
++  else if (acc_device_type (acc_dev->type) == acc_device_host)
++    {
++      fn (hostaddrs);
++      return;
++    }
++
++  /* acc_device_host is the only supported device type.  */
++}
++
++/* Legacy entry point, only provide host execution.  */
++
++void
++GOACC_parallel (int device, void (*fn) (void *),
++		size_t mapnum, void **hostaddrs, size_t *sizes,
++		unsigned short *kinds,
++		int num_gangs, int num_workers, int vector_length,
++		int async, int num_waits, ...)
++{
++  goacc_save_and_set_bind (acc_device_host);
++  fn (hostaddrs);
++  goacc_restore_bind ();
++}
++
++void
++GOACC_data_start (int device, size_t mapnum,
++		  void **hostaddrs, size_t *sizes, unsigned short *kinds)
++{
++  goacc_lazy_initialize ();
++}
++
++void
++GOACC_data_end (void)
++{
++  gomp_debug (0, "  %s: restore mappings\n", __FUNCTION__);
++  gomp_debug (0, "  %s: mappings restored\n", __FUNCTION__);
++}
++
++void
++GOACC_enter_exit_data (int device, size_t mapnum,
++		       void **hostaddrs, size_t *sizes, unsigned short *kinds,
++		       int async, int num_waits, ...)
++{
++  goacc_lazy_initialize ();
++}
++
++static void
++goacc_wait (int async, int num_waits, va_list *ap)
++{
++  struct goacc_thread *thr = goacc_thread ();
++  struct gomp_device_descr *acc_dev = thr->dev;
++
++  while (num_waits--)
++    {
++      int qid = va_arg (*ap, int);
++      
++      if (acc_async_test (qid))
++	continue;
++
++      if (async == acc_async_sync)
++	acc_wait (qid);
++      else if (qid == async)
++	;/* If we're waiting on the same asynchronous queue as we're
++	    launching on, the queue itself will order work as
++	    required, so there's no need to wait explicitly.  */
++      else
++	acc_dev->openacc.async_wait_async_func (qid, async);
++    }
++}
++
++void
++GOACC_update (int device, size_t mapnum,
++	      void **hostaddrs, size_t *sizes, unsigned short *kinds,
++	      int async, int num_waits, ...)
++{
++  goacc_lazy_initialize ();
++}
++
++void
++GOACC_wait (int async, int num_waits, ...)
++{
++  if (num_waits)
++    {
++      va_list ap;
++
++      va_start (ap, num_waits);
++      goacc_wait (async, num_waits, &ap);
++      va_end (ap);
++    }
++  else if (async == acc_async_sync)
++    acc_wait_all ();
++  else if (async == acc_async_noval)
++    goacc_thread ()->dev->openacc.async_wait_all_async_func (acc_async_noval);
++}
++
++int
++GOACC_get_num_threads (void)
++{
++  return 1;
++}
++
++int
++GOACC_get_thread_num (void)
++{
++  return 0;
++}
++
++void
++GOACC_declare (int device, size_t mapnum,
++	       void **hostaddrs, size_t *sizes, unsigned short *kinds)
++{
++  int i;
++
++  for (i = 0; i < mapnum; i++)
++    {
++      unsigned char kind = kinds[i] & 0xff;
++
++      if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET)
++	continue;
++
++      switch (kind)
++	{
++	  case GOMP_MAP_FORCE_ALLOC:
++	  case GOMP_MAP_FORCE_FROM:
++	  case GOMP_MAP_FORCE_TO:
++	  case GOMP_MAP_POINTER:
++	  case GOMP_MAP_DELETE:
++	    GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i],
++				   &kinds[i], 0, 0);
++	    break;
++
++	  case GOMP_MAP_FORCE_DEVICEPTR:
++	    break;
++
++	  case GOMP_MAP_ALLOC:
++	    if (!acc_is_present (hostaddrs[i], sizes[i]))
++	      GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i],
++				     &kinds[i], 0, 0);
++	    break;
++
++	  case GOMP_MAP_TO:
++	    GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i],
++				   &kinds[i], 0, 0);
++
++	    break;
++
++	  case GOMP_MAP_FROM:
++	    kinds[i] = GOMP_MAP_FORCE_FROM;
++	    GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i],
++				   &kinds[i], 0, 0);
++	    break;
++
++	  case GOMP_MAP_FORCE_PRESENT:
++	    if (!acc_is_present (hostaddrs[i], sizes[i]))
++	      gomp_fatal ("[%p,%ld] is not mapped", hostaddrs[i],
++			  (unsigned long) sizes[i]);
++	    break;
++
++	  default:
++	    assert (0);
++	    break;
++	}
++    }
++}
+--- libgomp/oacc-cuda.c.jj	2016-07-13 16:57:04.432535397 +0200
++++ libgomp/oacc-cuda.c	2016-07-13 16:57:04.432535397 +0200
+@@ -0,0 +1,86 @@
++/* OpenACC Runtime Library: CUDA support glue.
++
++   Copyright (C) 2014-2016 Free Software Foundation, Inc.
++
++   Contributed by Mentor Embedded.
++
++   This file is part of the GNU Offloading and Multi Processing Library
++   (libgomp).
++
++   Libgomp is free software; you can redistribute it and/or modify it
++   under the terms of the GNU General Public License as published by
++   the Free Software Foundation; either version 3, or (at your option)
++   any later version.
++
++   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
++   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
++   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++   more details.
++
++   Under Section 7 of GPL version 3, you are granted additional
++   permissions described in the GCC Runtime Library Exception, version
++   3.1, as published by the Free Software Foundation.
++
++   You should have received a copy of the GNU General Public License and
++   a copy of the GCC Runtime Library Exception along with this program;
++   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
++   <http://www.gnu.org/licenses/>.  */
++
++#include "openacc.h"
++#include "config.h"
++#include "libgomp.h"
++#include "oacc-int.h"
++
++void *
++acc_get_current_cuda_device (void)
++{
++  struct goacc_thread *thr = goacc_thread ();
++
++  if (thr && thr->dev && thr->dev->openacc.cuda.get_current_device_func)
++    return thr->dev->openacc.cuda.get_current_device_func ();
++
++  return NULL;
++}
++
++void *
++acc_get_current_cuda_context (void)
++{
++  struct goacc_thread *thr = goacc_thread ();
++
++  if (thr && thr->dev && thr->dev->openacc.cuda.get_current_context_func)
++    return thr->dev->openacc.cuda.get_current_context_func ();
++ 
++  return NULL;
++}
++
++void *
++acc_get_cuda_stream (int async)
++{
++  struct goacc_thread *thr = goacc_thread ();
++
++  if (async < 0)
++    return NULL;
++
++  if (thr && thr->dev && thr->dev->openacc.cuda.get_stream_func)
++    return thr->dev->openacc.cuda.get_stream_func (async);
++ 
++  return NULL;
++}
++
++int
++acc_set_cuda_stream (int async, void *stream)
++{
++  struct goacc_thread *thr;
++
++  if (async < 0 || stream == NULL)
++    return 0;
++
++  goacc_lazy_initialize ();
++
++  thr = goacc_thread ();
++
++  if (thr && thr->dev && thr->dev->openacc.cuda.set_stream_func)
++    return thr->dev->openacc.cuda.set_stream_func (async, stream);
++
++  return -1;
++}
+--- libgomp/openacc_lib.h.jj	2016-07-13 16:57:13.486423134 +0200
++++ libgomp/openacc_lib.h	2016-07-13 16:57:13.486423134 +0200
+@@ -0,0 +1,382 @@
++!  OpenACC Runtime Library Definitions.			-*- mode: fortran -*-
++
++!  Copyright (C) 2014-2016 Free Software Foundation, Inc.
++
++!  Contributed by Tobias Burnus <burnus@net-b.de>
++!              and Mentor Embedded.
++
++!  This file is part of the GNU Offloading and Multi Processing Library
++!  (libgomp).
++
++!  Libgomp is free software; you can redistribute it and/or modify it
++!  under the terms of the GNU General Public License as published by
++!  the Free Software Foundation; either version 3, or (at your option)
++!  any later version.
++
++!  Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
++!  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
++!  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++!  more details.
++
++!  Under Section 7 of GPL version 3, you are granted additional
++!  permissions described in the GCC Runtime Library Exception, version
++!  3.1, as published by the Free Software Foundation.
++
++!  You should have received a copy of the GNU General Public License and
++!  a copy of the GCC Runtime Library Exception along with this program;
++!  see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
++!  <http://www.gnu.org/licenses/>.
++
++! NOTE: Due to the use of dimension (..), the code only works when compiled
++! with -std=f2008ts/gnu/legacy but not with other standard settings.
++! Alternatively, the user can use the module version, which permits
++! compilation with -std=f95.
++
++      integer, parameter :: acc_device_kind = 4
++
++!     Keep in sync with include/gomp-constants.h.
++      integer (acc_device_kind), parameter :: acc_device_none = 0
++      integer (acc_device_kind), parameter :: acc_device_default = 1
++      integer (acc_device_kind), parameter :: acc_device_host = 2
++!     integer (acc_device_kind), parameter :: acc_device_host_nonshm = 3
++!     removed.
++      integer (acc_device_kind), parameter :: acc_device_not_host = 4
++      integer (acc_device_kind), parameter :: acc_device_nvidia = 5
++
++      integer, parameter :: acc_handle_kind = 4
++
++!     Keep in sync with include/gomp-constants.h.
++      integer (acc_handle_kind), parameter :: acc_async_noval = -1
++      integer (acc_handle_kind), parameter :: acc_async_sync = -2
++
++      integer, parameter :: openacc_version = 201306
++
++      interface acc_get_num_devices
++        function acc_get_num_devices_h (d)
++          import acc_device_kind
++          integer acc_get_num_devices_h
++          integer (acc_device_kind) d
++        end function
++      end interface
++
++      interface acc_set_device_type
++        subroutine acc_set_device_type_h (d)
++          import acc_device_kind
++          integer (acc_device_kind) d
++        end subroutine
++      end interface
++
++      interface acc_get_device_type
++        function acc_get_device_type_h ()
++          import acc_device_kind
++          integer (acc_device_kind) acc_get_device_type_h
++        end function
++      end interface
++
++      interface acc_set_device_num
++        subroutine acc_set_device_num_h (n, d)
++          import acc_device_kind
++          integer n
++          integer (acc_device_kind) d
++        end subroutine
++      end interface
++
++      interface acc_get_device_num
++        function acc_get_device_num_h (d)
++          import acc_device_kind
++          integer acc_get_device_num_h
++          integer (acc_device_kind) d
++        end function
++      end interface
++
++      interface acc_async_test
++        function acc_async_test_h (a)
++          logical acc_async_test_h
++          integer a
++        end function
++      end interface
++
++      interface acc_async_test_all
++        function acc_async_test_all_h ()
++          logical acc_async_test_all_h
++        end function
++      end interface
++
++      interface acc_wait
++        subroutine acc_wait_h (a)
++          integer a
++        end subroutine
++      end interface
++
++      interface acc_wait_async
++        subroutine acc_wait_async_h (a1, a2)
++          integer a1, a2
++        end subroutine
++      end interface
++
++      interface acc_wait_all
++        subroutine acc_wait_all_h ()
++        end subroutine
++      end interface
++
++      interface acc_wait_all_async
++        subroutine acc_wait_all_async_h (a)
++          integer a
++        end subroutine
++      end interface
++
++      interface acc_init
++        subroutine acc_init_h (devicetype)
++          import acc_device_kind
++          integer (acc_device_kind) devicetype
++        end subroutine
++      end interface
++
++      interface acc_shutdown
++        subroutine acc_shutdown_h (devicetype)
++          import acc_device_kind
++          integer (acc_device_kind) devicetype
++        end subroutine
++      end interface
++
++      interface acc_on_device
++        function acc_on_device_h (devicetype)
++          import acc_device_kind
++          logical acc_on_device_h
++          integer (acc_device_kind) devicetype
++        end function
++      end interface
++
++      ! acc_malloc: Only available in C/C++
++      ! acc_free: Only available in C/C++
++
++      interface acc_copyin
++        subroutine acc_copyin_32_h (a, len)
++          use iso_c_binding, only: c_int32_t
++          !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++          type (*), dimension (*) :: a
++          integer (c_int32_t) len
++        end subroutine
++
++        subroutine acc_copyin_64_h (a, len)
++          use iso_c_binding, only: c_int64_t
++          !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++          type (*), dimension (*) :: a
++          integer (c_int64_t) len
++        end subroutine
++
++        subroutine acc_copyin_array_h (a)
++          type (*), dimension (..), contiguous :: a
++          end subroutine
++      end interface
++
++      interface acc_present_or_copyin
++        subroutine acc_present_or_copyin_32_h (a, len)
++          use iso_c_binding, only: c_int32_t
++          !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++          type (*), dimension (*) :: a
++          integer (c_int32_t) len
++        end subroutine
++
++        subroutine acc_present_or_copyin_64_h (a, len)
++          use iso_c_binding, only: c_int64_t
++          !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++          type (*), dimension (*) :: a
++          integer (c_int64_t) len
++        end subroutine
++
++        subroutine acc_present_or_copyin_array_h (a)
++          type (*), dimension (..), contiguous :: a
++          end subroutine
++      end interface
++
++      interface acc_pcopyin
++        subroutine acc_pcopyin_32_h (a, len)
++          use iso_c_binding, only: c_int32_t
++          !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++          type (*), dimension (*) :: a
++          integer (c_int32_t) len
++        end subroutine
++
++        subroutine acc_pcopyin_64_h (a, len)
++          use iso_c_binding, only: c_int64_t
++          !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++          type (*), dimension (*) :: a
++          integer (c_int64_t) len
++        end subroutine
++
++        subroutine acc_pcopyin_array_h (a)
++          type (*), dimension (..), contiguous :: a
++          end subroutine
++      end interface
++
++      interface acc_create
++        subroutine acc_create_32_h (a, len)
++          use iso_c_binding, only: c_int32_t
++          !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++          type (*), dimension (*) :: a
++          integer (c_int32_t) len
++        end subroutine
++
++        subroutine acc_create_64_h (a, len)
++          use iso_c_binding, only: c_int64_t
++          !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++          type (*), dimension (*) :: a
++          integer (c_int64_t) len
++        end subroutine
++
++        subroutine acc_create_array_h (a)
++          type (*), dimension (..), contiguous :: a
++          end subroutine
++      end interface
++
++      interface acc_present_or_create
++        subroutine acc_present_or_create_32_h (a, len)
++          use iso_c_binding, only: c_int32_t
++          !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++          type (*), dimension (*) :: a
++          integer (c_int32_t) len
++        end subroutine
++
++        subroutine acc_present_or_create_64_h (a, len)
++          use iso_c_binding, only: c_int64_t
++          !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++          type (*), dimension (*) :: a
++          integer (c_int64_t) len
++        end subroutine
++
++        subroutine acc_present_or_create_array_h (a)
++          type (*), dimension (..), contiguous :: a
++          end subroutine
++      end interface
++
++      interface acc_pcreate
++        subroutine acc_pcreate_32_h (a, len)
++          use iso_c_binding, only: c_int32_t
++          !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++          type (*), dimension (*) :: a
++          integer (c_int32_t) len
++        end subroutine
++
++        subroutine acc_pcreate_64_h (a, len)
++          use iso_c_binding, only: c_int64_t
++          !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++          type (*), dimension (*) :: a
++          integer (c_int64_t) len
++        end subroutine
++
++        subroutine acc_pcreate_array_h (a)
++          type (*), dimension (..), contiguous :: a
++          end subroutine
++      end interface
++
++      interface acc_copyout
++        subroutine acc_copyout_32_h (a, len)
++          use iso_c_binding, only: c_int32_t
++          !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++          type (*), dimension (*) :: a
++          integer (c_int32_t) len
++        end subroutine
++
++        subroutine acc_copyout_64_h (a, len)
++          use iso_c_binding, only: c_int64_t
++          !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++          type (*), dimension (*) :: a
++          integer (c_int64_t) len
++        end subroutine
++
++        subroutine acc_copyout_array_h (a)
++          type (*), dimension (..), contiguous :: a
++        end subroutine
++      end interface
++
++      interface acc_delete
++        subroutine acc_delete_32_h (a, len)
++          use iso_c_binding, only: c_int32_t
++          !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++          type (*), dimension (*) :: a
++          integer (c_int32_t) len
++        end subroutine
++
++        subroutine acc_delete_64_h (a, len)
++          use iso_c_binding, only: c_int64_t
++          !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++          type (*), dimension (*) :: a
++          integer (c_int64_t) len
++        end subroutine
++
++        subroutine acc_delete_array_h (a)
++          type (*), dimension (..), contiguous :: a
++        end subroutine
++      end interface
++
++      interface acc_update_device
++        subroutine acc_update_device_32_h (a, len)
++          use iso_c_binding, only: c_int32_t
++          !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++          type (*), dimension (*) :: a
++          integer (c_int32_t) len
++        end subroutine
++
++        subroutine acc_update_device_64_h (a, len)
++          use iso_c_binding, only: c_int64_t
++          !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++          type (*), dimension (*) :: a
++          integer (c_int64_t) len
++        end subroutine
++
++        subroutine acc_update_device_array_h (a)
++          type (*), dimension (..), contiguous :: a
++        end subroutine
++      end interface
++
++      interface acc_update_self
++        subroutine acc_update_self_32_h (a, len)
++          use iso_c_binding, only: c_int32_t
++          !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++          type (*), dimension (*) :: a
++          integer (c_int32_t) len
++        end subroutine
++
++        subroutine acc_update_self_64_h (a, len)
++          use iso_c_binding, only: c_int64_t
++          !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++          type (*), dimension (*) :: a
++          integer (c_int64_t) len
++        end subroutine
++
++        subroutine acc_update_self_array_h (a)
++          type (*), dimension (..), contiguous :: a
++        end subroutine
++      end interface
++
++      ! acc_map_data: Only available in C/C++
++      ! acc_unmap_data: Only available in C/C++
++      ! acc_deviceptr: Only available in C/C++
++      ! acc_ostptr: Only available in C/C++
++
++      interface acc_is_present
++        function acc_is_present_32_h (a, len)
++          use iso_c_binding, only: c_int32_t
++          logical acc_is_present_32_h
++          !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++          type (*), dimension (*) :: a
++          integer (c_int32_t) len
++        end function
++
++        function acc_is_present_64_h (a, len)
++          use iso_c_binding, only: c_int64_t
++          logical acc_is_present_64_h
++          !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
++          type (*), dimension (*) :: a
++          integer (c_int64_t) len
++        end function
++
++        function acc_is_present_array_h (a)
++          logical acc_is_present_array_h
++          type (*), dimension (..), contiguous :: a
++        end function
++      end interface
++
++      ! acc_memcpy_to_device: Only available in C/C++
++      ! acc_memcpy_from_device: Only available in C/C++
+--- libgomp/gomp-constants.h.jj	2016-07-14 16:02:47.212545826 +0200
++++ libgomp/gomp-constants.h	2016-05-26 21:04:40.000000000 +0200
+@@ -0,0 +1,259 @@
++/* Communication between GCC and libgomp.
++
++   Copyright (C) 2014-2015 Free Software Foundation, Inc.
++
++   Contributed by Mentor Embedded.
++
++   This file is part of the GNU Offloading and Multi Processing Library
++   (libgomp).
++
++   Libgomp is free software; you can redistribute it and/or modify it
++   under the terms of the GNU General Public License as published by
++   the Free Software Foundation; either version 3, or (at your option)
++   any later version.
++
++   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
++   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
++   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++   more details.
++
++   Under Section 7 of GPL version 3, you are granted additional
++   permissions described in the GCC Runtime Library Exception, version
++   3.1, as published by the Free Software Foundation.
++
++   You should have received a copy of the GNU General Public License and
++   a copy of the GCC Runtime Library Exception along with this program;
++   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
++   <http://www.gnu.org/licenses/>.  */
++
++#ifndef GOMP_CONSTANTS_H
++#define GOMP_CONSTANTS_H 1
++
++/* Memory mapping types.  */
++
++/* One byte.  */
++#define GOMP_MAP_LAST			(1 << 8)
++
++#define GOMP_MAP_FLAG_TO		(1 << 0)
++#define GOMP_MAP_FLAG_FROM		(1 << 1)
++/* Special map kinds, enumerated starting here.  */
++#define GOMP_MAP_FLAG_SPECIAL_0		(1 << 2)
++#define GOMP_MAP_FLAG_SPECIAL_1		(1 << 3)
++#define GOMP_MAP_FLAG_SPECIAL_2		(1 << 4)
++#define GOMP_MAP_FLAG_SPECIAL		(GOMP_MAP_FLAG_SPECIAL_1 \
++					 | GOMP_MAP_FLAG_SPECIAL_0)
++/* Flag to force a specific behavior (or else, trigger a run-time error).  */
++#define GOMP_MAP_FLAG_FORCE		(1 << 7)
++
++enum gomp_map_kind
++  {
++    /* If not already present, allocate.  */
++    GOMP_MAP_ALLOC =			0,
++    /* ..., and copy to device.  */
++    GOMP_MAP_TO =			(GOMP_MAP_ALLOC | GOMP_MAP_FLAG_TO),
++    /* ..., and copy from device.  */
++    GOMP_MAP_FROM =			(GOMP_MAP_ALLOC | GOMP_MAP_FLAG_FROM),
++    /* ..., and copy to and from device.  */
++    GOMP_MAP_TOFROM =			(GOMP_MAP_TO | GOMP_MAP_FROM),
++    /* The following kind is an internal only map kind, used for pointer based
++       array sections.  OMP_CLAUSE_SIZE for these is not the pointer size,
++       which is implicitly POINTER_SIZE_UNITS, but the bias.  */
++    GOMP_MAP_POINTER =			(GOMP_MAP_FLAG_SPECIAL_0 | 0),
++    /* Also internal, behaves like GOMP_MAP_TO, but additionally any
++       GOMP_MAP_POINTER records consecutive after it which have addresses
++       falling into that range will not be ignored if GOMP_MAP_TO_PSET wasn't
++       mapped already.  */
++    GOMP_MAP_TO_PSET =			(GOMP_MAP_FLAG_SPECIAL_0 | 1),
++    /* Must already be present.  */
++    GOMP_MAP_FORCE_PRESENT =		(GOMP_MAP_FLAG_SPECIAL_0 | 2),
++    /* Deallocate a mapping, without copying from device.  */
++    GOMP_MAP_DELETE =			(GOMP_MAP_FLAG_SPECIAL_0 | 3),
++    /* Is a device pointer.  OMP_CLAUSE_SIZE for these is unused; is implicitly
++       POINTER_SIZE_UNITS.  */
++    GOMP_MAP_FORCE_DEVICEPTR =		(GOMP_MAP_FLAG_SPECIAL_1 | 0),
++    /* Do not map, copy bits for firstprivate instead.  */
++    /* OpenACC device_resident.  */
++    GOMP_MAP_DEVICE_RESIDENT =		(GOMP_MAP_FLAG_SPECIAL_1 | 1),
++    /* OpenACC link.  */
++    GOMP_MAP_LINK =			(GOMP_MAP_FLAG_SPECIAL_1 | 2),
++    /* Allocate.  */
++    GOMP_MAP_FIRSTPRIVATE =		(GOMP_MAP_FLAG_SPECIAL | 0),
++    /* Similarly, but store the value in the pointer rather than
++       pointed by the pointer.  */
++    GOMP_MAP_FIRSTPRIVATE_INT =		(GOMP_MAP_FLAG_SPECIAL | 1),
++    /* Pointer translate host address into device address and copy that
++       back to host.  */
++    GOMP_MAP_USE_DEVICE_PTR =		(GOMP_MAP_FLAG_SPECIAL | 2),
++    /* Allocate a zero length array section.  Prefer next non-zero length
++       mapping over previous non-zero length mapping over zero length mapping
++       at the address.  If not already mapped, do nothing (and pointer translate
++       to NULL).  */
++    GOMP_MAP_ZERO_LEN_ARRAY_SECTION = 	(GOMP_MAP_FLAG_SPECIAL | 3),
++    /* Allocate.  */
++    GOMP_MAP_FORCE_ALLOC =		(GOMP_MAP_FLAG_FORCE | GOMP_MAP_ALLOC),
++    /* ..., and copy to device.  */
++    GOMP_MAP_FORCE_TO =			(GOMP_MAP_FLAG_FORCE | GOMP_MAP_TO),
++    /* ..., and copy from device.  */
++    GOMP_MAP_FORCE_FROM =		(GOMP_MAP_FLAG_FORCE | GOMP_MAP_FROM),
++    /* ..., and copy to and from device.  */
++    GOMP_MAP_FORCE_TOFROM =		(GOMP_MAP_FLAG_FORCE | GOMP_MAP_TOFROM),
++    /* If not already present, allocate.  And unconditionally copy to
++       device.  */
++    GOMP_MAP_ALWAYS_TO =		(GOMP_MAP_FLAG_SPECIAL_2 | GOMP_MAP_TO),
++    /* If not already present, allocate.  And unconditionally copy from
++       device.  */
++    GOMP_MAP_ALWAYS_FROM =		(GOMP_MAP_FLAG_SPECIAL_2
++					 | GOMP_MAP_FROM),
++    /* If not already present, allocate.  And unconditionally copy to and from
++       device.  */
++    GOMP_MAP_ALWAYS_TOFROM =		(GOMP_MAP_FLAG_SPECIAL_2
++					 | GOMP_MAP_TOFROM),
++    /* Map a sparse struct; the address is the base of the structure, alignment
++       it's required alignment, and size is the number of adjacent entries
++       that belong to the struct.  The adjacent entries should be sorted by
++       increasing address, so it is easy to determine lowest needed address
++       (address of the first adjacent entry) and highest needed address
++       (address of the last adjacent entry plus its size).  */
++    GOMP_MAP_STRUCT =			(GOMP_MAP_FLAG_SPECIAL_2
++					 | GOMP_MAP_FLAG_SPECIAL | 0),
++    /* On a location of a pointer/reference that is assumed to be already mapped
++       earlier, store the translated address of the preceeding mapping.
++       No refcount is bumped by this, and the store is done unconditionally.  */
++    GOMP_MAP_ALWAYS_POINTER =		(GOMP_MAP_FLAG_SPECIAL_2
++					 | GOMP_MAP_FLAG_SPECIAL | 1),
++    /* Forced deallocation of zero length array section.  */
++    GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
++      =					(GOMP_MAP_FLAG_SPECIAL_2
++					 | GOMP_MAP_FLAG_SPECIAL | 3),
++    /* Decrement usage count and deallocate if zero.  */
++    GOMP_MAP_RELEASE =			(GOMP_MAP_FLAG_SPECIAL_2
++					 | GOMP_MAP_DELETE),
++
++    /* Internal to GCC, not used in libgomp.  */
++    /* Do not map, but pointer assign a pointer instead.  */
++    GOMP_MAP_FIRSTPRIVATE_POINTER =	(GOMP_MAP_LAST | 1),
++    /* Do not map, but pointer assign a reference instead.  */
++    GOMP_MAP_FIRSTPRIVATE_REFERENCE =	(GOMP_MAP_LAST | 2)
++  };
++
++#define GOMP_MAP_COPY_TO_P(X) \
++  (!((X) & GOMP_MAP_FLAG_SPECIAL) \
++   && ((X) & GOMP_MAP_FLAG_TO))
++
++#define GOMP_MAP_COPY_FROM_P(X) \
++  (!((X) & GOMP_MAP_FLAG_SPECIAL) \
++   && ((X) & GOMP_MAP_FLAG_FROM))
++
++#define GOMP_MAP_POINTER_P(X) \
++  ((X) == GOMP_MAP_POINTER)
++
++#define GOMP_MAP_ALWAYS_TO_P(X) \
++  (((X) == GOMP_MAP_ALWAYS_TO) || ((X) == GOMP_MAP_ALWAYS_TOFROM))
++
++#define GOMP_MAP_ALWAYS_FROM_P(X) \
++  (((X) == GOMP_MAP_ALWAYS_FROM) || ((X) == GOMP_MAP_ALWAYS_TOFROM))
++
++#define GOMP_MAP_ALWAYS_P(X) \
++  (GOMP_MAP_ALWAYS_TO_P (X) || ((X) == GOMP_MAP_ALWAYS_FROM))
++
++
++/* Asynchronous behavior.  Keep in sync with
++   libgomp/{openacc.h,openacc.f90,openacc_lib.h}:acc_async_t.  */
++
++#define GOMP_ASYNC_NOVAL		-1
++#define GOMP_ASYNC_SYNC			-2
++
++
++/* Device codes.  Keep in sync with
++   libgomp/{openacc.h,openacc.f90,openacc_lib.h}:acc_device_t as well as
++   libgomp/libgomp-plugin.h.  */
++#define GOMP_DEVICE_NONE		0
++#define GOMP_DEVICE_DEFAULT		1
++#define GOMP_DEVICE_HOST		2
++/* #define GOMP_DEVICE_HOST_NONSHM	3 removed.  */
++#define GOMP_DEVICE_NOT_HOST		4
++#define GOMP_DEVICE_NVIDIA_PTX		5
++#define GOMP_DEVICE_INTEL_MIC		6
++#define GOMP_DEVICE_HSA			7
++
++#define GOMP_DEVICE_ICV			-1
++#define GOMP_DEVICE_HOST_FALLBACK	-2
++
++/* GOMP_task/GOMP_taskloop* flags argument.  */
++#define GOMP_TASK_FLAG_UNTIED		(1 << 0)
++#define GOMP_TASK_FLAG_FINAL		(1 << 1)
++#define GOMP_TASK_FLAG_MERGEABLE	(1 << 2)
++#define GOMP_TASK_FLAG_DEPEND		(1 << 3)
++#define GOMP_TASK_FLAG_PRIORITY		(1 << 4)
++#define GOMP_TASK_FLAG_UP		(1 << 8)
++#define GOMP_TASK_FLAG_GRAINSIZE	(1 << 9)
++#define GOMP_TASK_FLAG_IF		(1 << 10)
++#define GOMP_TASK_FLAG_NOGROUP		(1 << 11)
++
++/* GOMP_target{_ext,update_ext,enter_exit_data} flags argument.  */
++#define GOMP_TARGET_FLAG_NOWAIT		(1 << 0)
++#define GOMP_TARGET_FLAG_EXIT_DATA	(1 << 1)
++/* Internal to libgomp.  */
++#define GOMP_TARGET_FLAG_UPDATE		(1U << 31)
++
++/* Versions of libgomp and device-specific plugins.  GOMP_VERSION
++   should be incremented whenever an ABI-incompatible change is introduced
++   to the plugin interface defined in libgomp/libgomp.h.  */
++#define GOMP_VERSION	1
++#define GOMP_VERSION_NVIDIA_PTX 1
++#define GOMP_VERSION_INTEL_MIC 0
++#define GOMP_VERSION_HSA 0
++
++#define GOMP_VERSION_PACK(LIB, DEV) (((LIB) << 16) | (DEV))
++#define GOMP_VERSION_LIB(PACK) (((PACK) >> 16) & 0xffff)
++#define GOMP_VERSION_DEV(PACK) ((PACK) & 0xffff)
++
++#define GOMP_DIM_GANG	0
++#define GOMP_DIM_WORKER	1
++#define GOMP_DIM_VECTOR	2
++#define GOMP_DIM_MAX	3
++#define GOMP_DIM_MASK(X) (1u << (X))
++
++/* Varadic launch arguments.  End of list is marked by a zero.  */
++#define GOMP_LAUNCH_DIM		1  /* Launch dimensions, op = mask */
++#define GOMP_LAUNCH_ASYNC	2  /* Async, op = cst val if not MAX  */
++#define GOMP_LAUNCH_WAIT	3  /* Waits, op = num waits.  */
++#define GOMP_LAUNCH_CODE_SHIFT	28
++#define GOMP_LAUNCH_DEVICE_SHIFT 16
++#define GOMP_LAUNCH_OP_SHIFT 0
++#define GOMP_LAUNCH_PACK(CODE,DEVICE,OP)	\
++  (((CODE) << GOMP_LAUNCH_CODE_SHIFT)		\
++   | ((DEVICE) << GOMP_LAUNCH_DEVICE_SHIFT)	\
++   | ((OP) << GOMP_LAUNCH_OP_SHIFT))
++#define GOMP_LAUNCH_CODE(X) (((X) >> GOMP_LAUNCH_CODE_SHIFT) & 0xf)
++#define GOMP_LAUNCH_DEVICE(X) (((X) >> GOMP_LAUNCH_DEVICE_SHIFT) & 0xfff)
++#define GOMP_LAUNCH_OP(X) (((X) >> GOMP_LAUNCH_OP_SHIFT) & 0xffff)
++#define GOMP_LAUNCH_OP_MAX 0xffff
++
++/* Bitmask to apply in order to find out the intended device of a target
++   argument.  */
++#define GOMP_TARGET_ARG_DEVICE_MASK		((1 << 7) - 1)
++/* The target argument is significant for all devices.  */
++#define GOMP_TARGET_ARG_DEVICE_ALL		0
++
++/* Flag set when the subsequent element in the device-specific argument
++   values.  */
++#define GOMP_TARGET_ARG_SUBSEQUENT_PARAM	(1 << 7)
++
++/* Bitmask to apply to a target argument to find out the value identifier.  */
++#define GOMP_TARGET_ARG_ID_MASK			(((1 << 8) - 1) << 8)
++/* Target argument index of NUM_TEAMS.  */
++#define GOMP_TARGET_ARG_NUM_TEAMS		(1 << 8)
++/* Target argument index of THREAD_LIMIT.  */
++#define GOMP_TARGET_ARG_THREAD_LIMIT		(2 << 8)
++
++/* If the value is directly embeded in target argument, it should be a 16-bit
++   at most and shifted by this many bits.  */
++#define GOMP_TARGET_ARG_VALUE_SHIFT		16
++
++/* HSA specific data structures.  */
++
++/* Identifiers of device-specific target arguments.  */
++#define GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES	(1 << 8)
++
++#endif
+--- libgomp/oacc-mem.c.jj	2016-07-13 16:57:04.433535385 +0200
++++ libgomp/oacc-mem.c	2016-07-14 15:39:44.644631308 +0200
+@@ -0,0 +1,204 @@
++/* OpenACC Runtime initialization routines
++
++   Copyright (C) 2013-2016 Free Software Foundation, Inc.
++
++   Contributed by Mentor Embedded.
++
++   This file is part of the GNU Offloading and Multi Processing Library
++   (libgomp).
++
++   Libgomp is free software; you can redistribute it and/or modify it
++   under the terms of the GNU General Public License as published by
++   the Free Software Foundation; either version 3, or (at your option)
++   any later version.
++
++   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
++   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
++   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++   more details.
++
++   Under Section 7 of GPL version 3, you are granted additional
++   permissions described in the GCC Runtime Library Exception, version
++   3.1, as published by the Free Software Foundation.
++
++   You should have received a copy of the GNU General Public License and
++   a copy of the GCC Runtime Library Exception along with this program;
++   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
++   <http://www.gnu.org/licenses/>.  */
++
++#include "openacc.h"
++#include "config.h"
++#include "libgomp.h"
++#include "gomp-constants.h"
++#include "oacc-int.h"
++#include <stdint.h>
++#include <string.h>
++#include <assert.h>
++
++/* OpenACC is silent on how memory exhaustion is indicated.  We return
++   NULL.  */
++
++void *
++acc_malloc (size_t s)
++{
++  if (!s)
++    return NULL;
++
++  goacc_lazy_initialize ();
++  return malloc (s);
++}
++
++/* OpenACC 2.0a (3.2.16) doesn't specify what to do in the event
++   the device address is mapped. We choose to check if it mapped,
++   and if it is, to unmap it. */
++void
++acc_free (void *d)
++{
++  return free (d);
++}
++
++void
++acc_memcpy_to_device (void *d, void *h, size_t s)
++{
++  memmove (d, h, s);
++}
++
++void
++acc_memcpy_from_device (void *h, void *d, size_t s)
++{
++  memmove (h, d, s);
++}
++
++/* Return the device pointer that corresponds to host data H.  Or NULL
++   if no mapping.  */
++
++void *
++acc_deviceptr (void *h)
++{
++  goacc_lazy_initialize ();
++  return h;
++}
++
++/* Return the host pointer that corresponds to device data D.  Or NULL
++   if no mapping.  */
++
++void *
++acc_hostptr (void *d)
++{
++  goacc_lazy_initialize ();
++  return d;
++}
++
++/* Return 1 if host data [H,+S] is present on the device.  */
++
++int
++acc_is_present (void *h, size_t s)
++{
++  if (!s || !h)
++    return 0;
++
++  goacc_lazy_initialize ();
++  return h != NULL;
++}
++
++/* Create a mapping for host [H,+S] -> device [D,+S] */
++
++void
++acc_map_data (void *h, void *d, size_t s)
++{
++  goacc_lazy_initialize ();
++
++  if (d != h)
++    gomp_fatal ("cannot map data on shared-memory system");
++}
++
++void
++acc_unmap_data (void *h)
++{
++}
++
++#define FLAG_PRESENT (1 << 0)
++#define FLAG_CREATE (1 << 1)
++#define FLAG_COPY (1 << 2)
++
++static void *
++present_create_copy (unsigned f, void *h, size_t s)
++{
++  if (!h || !s)
++    gomp_fatal ("[%p,+%d] is a bad range", (void *)h, (int)s);
++
++  goacc_lazy_initialize ();
++  return h;
++}
++
++void *
++acc_create (void *h, size_t s)
++{
++  return present_create_copy (FLAG_CREATE, h, s);
++}
++
++void *
++acc_copyin (void *h, size_t s)
++{
++  return present_create_copy (FLAG_CREATE | FLAG_COPY, h, s);
++}
++
++void *
++acc_present_or_create (void *h, size_t s)
++{
++  return present_create_copy (FLAG_PRESENT | FLAG_CREATE, h, s);
++}
++
++void *
++acc_present_or_copyin (void *h, size_t s)
++{
++  return present_create_copy (FLAG_PRESENT | FLAG_CREATE | FLAG_COPY, h, s);
++}
++
++#define FLAG_COPYOUT (1 << 0)
++
++static void
++delete_copyout (unsigned f, void *h, size_t s, const char *libfnname)
++{
++}
++
++void
++acc_delete (void *h , size_t s)
++{
++  delete_copyout (0, h, s, __FUNCTION__);
++}
++
++void
++acc_copyout (void *h, size_t s)
++{
++  delete_copyout (FLAG_COPYOUT, h, s, __FUNCTION__);
++}
++
++static void
++update_dev_host (int is_dev, void *h, size_t s)
++{
++  goacc_lazy_initialize ();
++}
++
++void
++acc_update_device (void *h, size_t s)
++{
++  update_dev_host (1, h, s);
++}
++
++void
++acc_update_self (void *h, size_t s)
++{
++  update_dev_host (0, h, s);
++}
++
++void
++gomp_acc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes,
++			 void *kinds)
++{
++}
++
++void
++gomp_acc_remove_pointer (void *h, bool force_copyfrom, int async, int mapnum)
++{
++}
+--- libgomp/oacc-plugin.h.jj	2016-07-13 16:57:13.487423121 +0200
++++ libgomp/oacc-plugin.h	2016-07-13 16:57:13.487423121 +0200
+@@ -0,0 +1,33 @@
++/* Copyright (C) 2014-2016 Free Software Foundation, Inc.
++
++   Contributed by Mentor Embedded.
++
++   This file is part of the GNU Offloading and Multi Processing Library
++   (libgomp).
++
++   Libgomp is free software; you can redistribute it and/or modify it
++   under the terms of the GNU General Public License as published by
++   the Free Software Foundation; either version 3, or (at your option)
++   any later version.
++
++   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
++   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
++   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++   more details.
++
++   Under Section 7 of GPL version 3, you are granted additional
++   permissions described in the GCC Runtime Library Exception, version
++   3.1, as published by the Free Software Foundation.
++
++   You should have received a copy of the GNU General Public License and
++   a copy of the GCC Runtime Library Exception along with this program;
++   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
++   <http://www.gnu.org/licenses/>.  */
++
++#ifndef OACC_PLUGIN_H
++#define OACC_PLUGIN_H 1
++
++extern void GOMP_PLUGIN_async_unmap_vars (void *, int);
++extern void *GOMP_PLUGIN_acc_thread (void);
++
++#endif
+--- libgomp/taskloop.c.jj	2016-07-13 16:57:18.935355570 +0200
++++ libgomp/taskloop.c	2016-07-13 16:57:18.935355570 +0200
+@@ -0,0 +1,340 @@
++/* Copyright (C) 2015-2016 Free Software Foundation, Inc.
++   Contributed by Jakub Jelinek <jakub@redhat.com>.
++
++   This file is part of the GNU Offloading and Multi Processing Library
++   (libgomp).
++
++   Libgomp is free software; you can redistribute it and/or modify it
++   under the terms of the GNU General Public License as published by
++   the Free Software Foundation; either version 3, or (at your option)
++   any later version.
++
++   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
++   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
++   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++   more details.
++
++   Under Section 7 of GPL version 3, you are granted additional
++   permissions described in the GCC Runtime Library Exception, version
++   3.1, as published by the Free Software Foundation.
++
++   You should have received a copy of the GNU General Public License and
++   a copy of the GCC Runtime Library Exception along with this program;
++   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
++   <http://www.gnu.org/licenses/>.  */
++
++/* This file handles the taskloop construct.  It is included twice, once
++   for the long and once for unsigned long long variant.  */
++
++/* Called when encountering an explicit task directive.  If IF_CLAUSE is
++   false, then we must not delay in executing the task.  If UNTIED is true,
++   then the task may be executed by any member of the team.  */
++
++void
++GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
++	       long arg_size, long arg_align, unsigned flags,
++	       unsigned long num_tasks, int priority,
++	       TYPE start, TYPE end, TYPE step)
++{
++  struct gomp_thread *thr = gomp_thread ();
++  struct gomp_team *team = thr->ts.team;
++
++#ifdef HAVE_BROKEN_POSIX_SEMAPHORES
++  /* If pthread_mutex_* is used for omp_*lock*, then each task must be
++     tied to one thread all the time.  This means UNTIED tasks must be
++     tied and if CPYFN is non-NULL IF(0) must be forced, as CPYFN
++     might be running on different thread than FN.  */
++  if (cpyfn)
++    flags &= ~GOMP_TASK_FLAG_IF;
++  flags &= ~GOMP_TASK_FLAG_UNTIED;
++#endif
++
++  /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
++  if (team && gomp_team_barrier_cancelled (&team->barrier))
++    return;
++
++#ifdef TYPE_is_long
++  TYPE s = step;
++  if (step > 0)
++    {
++      if (start >= end)
++	return;
++      s--;
++    }
++  else
++    {
++      if (start <= end)
++	return;
++      s++;
++    }
++  UTYPE n = (end - start + s) / step;
++#else
++  UTYPE n;
++  if (flags & GOMP_TASK_FLAG_UP)
++    {
++      if (start >= end)
++	return;
++      n = (end - start + step - 1) / step;
++    }
++  else
++    {
++      if (start <= end)
++	return;
++      n = (start - end - step - 1) / -step;
++    }
++#endif
++
++  TYPE task_step = step;
++  unsigned long nfirst = n;
++  if (flags & GOMP_TASK_FLAG_GRAINSIZE)
++    {
++      unsigned long grainsize = num_tasks;
++#ifdef TYPE_is_long
++      num_tasks = n / grainsize;
++#else
++      UTYPE ndiv = n / grainsize;
++      num_tasks = ndiv;
++      if (num_tasks != ndiv)
++	num_tasks = ~0UL;
++#endif
++      if (num_tasks <= 1)
++	{
++	  num_tasks = 1;
++	  task_step = end - start;
++	}
++      else if (num_tasks >= grainsize
++#ifndef TYPE_is_long
++	       && num_tasks != ~0UL
++#endif
++	      )
++	{
++	  UTYPE mul = num_tasks * grainsize;
++	  task_step = (TYPE) grainsize * step;
++	  if (mul != n)
++	    {
++	      task_step += step;
++	      nfirst = n - mul - 1;
++	    }
++	}
++      else
++	{
++	  UTYPE div = n / num_tasks;
++	  UTYPE mod = n % num_tasks;
++	  task_step = (TYPE) div * step;
++	  if (mod)
++	    {
++	      task_step += step;
++	      nfirst = mod - 1;
++	    }
++	}
++    }
++  else
++    {
++      if (num_tasks == 0)
++	num_tasks = team ? team->nthreads : 1;
++      if (num_tasks >= n)
++	num_tasks = n;
++      else
++	{
++	  UTYPE div = n / num_tasks;
++	  UTYPE mod = n % num_tasks;
++	  task_step = (TYPE) div * step;
++	  if (mod)
++	    {
++	      task_step += step;
++	      nfirst = mod - 1;
++	    }
++	}
++    }
++
++  if (flags & GOMP_TASK_FLAG_NOGROUP)
++    {
++      if (thr->task && thr->task->taskgroup && thr->task->taskgroup->cancelled)
++	return;
++    }
++  else
++    ialias_call (GOMP_taskgroup_start) ();
++
++  if (priority > gomp_max_task_priority_var)
++    priority = gomp_max_task_priority_var;
++
++  if ((flags & GOMP_TASK_FLAG_IF) == 0 || team == NULL
++      || (thr->task && thr->task->final_task)
++      || team->task_count + num_tasks > 64 * team->nthreads)
++    {
++      unsigned long i;
++      if (__builtin_expect (cpyfn != NULL, 0))
++	{
++	  struct gomp_task task[num_tasks];
++	  struct gomp_task *parent = thr->task;
++	  arg_size = (arg_size + arg_align - 1) & ~(arg_align - 1);
++	  char buf[num_tasks * arg_size + arg_align - 1];
++	  char *arg = (char *) (((uintptr_t) buf + arg_align - 1)
++				& ~(uintptr_t) (arg_align - 1));
++	  char *orig_arg = arg;
++	  for (i = 0; i < num_tasks; i++)
++	    {
++	      gomp_init_task (&task[i], parent, gomp_icv (false));
++	      task[i].priority = priority;
++	      task[i].kind = GOMP_TASK_UNDEFERRED;
++	      task[i].final_task = (thr->task && thr->task->final_task)
++				   || (flags & GOMP_TASK_FLAG_FINAL);
++	      if (thr->task)
++		{
++		  task[i].in_tied_task = thr->task->in_tied_task;
++		  task[i].taskgroup = thr->task->taskgroup;
++		}
++	      thr->task = &task[i];
++	      cpyfn (arg, data);
++	      arg += arg_size;
++	    }
++	  arg = orig_arg;
++	  for (i = 0; i < num_tasks; i++)
++	    {
++	      thr->task = &task[i];
++	      ((TYPE *)arg)[0] = start;
++	      start += task_step;
++	      ((TYPE *)arg)[1] = start;
++	      if (i == nfirst)
++		task_step -= step;
++	      fn (arg);
++	      arg += arg_size;
++	      if (!priority_queue_empty_p (&task[i].children_queue,
++					   MEMMODEL_RELAXED))
++		{
++		  gomp_mutex_lock (&team->task_lock);
++		  gomp_clear_parent (&task[i].children_queue);
++		  gomp_mutex_unlock (&team->task_lock);
++		}
++	      gomp_end_task ();
++	    }
++	}
++      else
++	for (i = 0; i < num_tasks; i++)
++	  {
++	    struct gomp_task task;
++
++	    gomp_init_task (&task, thr->task, gomp_icv (false));
++	    task.priority = priority;
++	    task.kind = GOMP_TASK_UNDEFERRED;
++	    task.final_task = (thr->task && thr->task->final_task)
++			      || (flags & GOMP_TASK_FLAG_FINAL);
++	    if (thr->task)
++	      {
++		task.in_tied_task = thr->task->in_tied_task;
++		task.taskgroup = thr->task->taskgroup;
++	      }
++	    thr->task = &task;
++	    ((TYPE *)data)[0] = start;
++	    start += task_step;
++	    ((TYPE *)data)[1] = start;
++	    if (i == nfirst)
++	      task_step -= step;
++	    fn (data);
++	    if (!priority_queue_empty_p (&task.children_queue,
++					 MEMMODEL_RELAXED))
++	      {
++		gomp_mutex_lock (&team->task_lock);
++		gomp_clear_parent (&task.children_queue);
++		gomp_mutex_unlock (&team->task_lock);
++	      }
++	    gomp_end_task ();
++	  }
++    }
++  else
++    {
++      struct gomp_task *tasks[num_tasks];
++      struct gomp_task *parent = thr->task;
++      struct gomp_taskgroup *taskgroup = parent->taskgroup;
++      char *arg;
++      int do_wake;
++      unsigned long i;
++
++      for (i = 0; i < num_tasks; i++)
++	{
++	  struct gomp_task *task
++	    = gomp_malloc (sizeof (*task) + arg_size + arg_align - 1);
++	  tasks[i] = task;
++	  arg = (char *) (((uintptr_t) (task + 1) + arg_align - 1)
++			  & ~(uintptr_t) (arg_align - 1));
++	  gomp_init_task (task, parent, gomp_icv (false));
++	  task->priority = priority;
++	  task->kind = GOMP_TASK_UNDEFERRED;
++	  task->in_tied_task = parent->in_tied_task;
++	  task->taskgroup = taskgroup;
++	  thr->task = task;
++	  if (cpyfn)
++	    {
++	      cpyfn (arg, data);
++	      task->copy_ctors_done = true;
++	    }
++	  else
++	    memcpy (arg, data, arg_size);
++	  ((TYPE *)arg)[0] = start;
++	  start += task_step;
++	  ((TYPE *)arg)[1] = start;
++	  if (i == nfirst)
++	    task_step -= step;
++	  thr->task = parent;
++	  task->kind = GOMP_TASK_WAITING;
++	  task->fn = fn;
++	  task->fn_data = arg;
++	  task->final_task = (flags & GOMP_TASK_FLAG_FINAL) >> 1;
++	}
++      gomp_mutex_lock (&team->task_lock);
++      /* If parallel or taskgroup has been cancelled, don't start new
++	 tasks.  */
++      if (__builtin_expect ((gomp_team_barrier_cancelled (&team->barrier)
++			     || (taskgroup && taskgroup->cancelled))
++			    && cpyfn == NULL, 0))
++	{
++	  gomp_mutex_unlock (&team->task_lock);
++	  for (i = 0; i < num_tasks; i++)
++	    {
++	      gomp_finish_task (tasks[i]);
++	      free (tasks[i]);
++	    }
++	  if ((flags & GOMP_TASK_FLAG_NOGROUP) == 0)
++	    ialias_call (GOMP_taskgroup_end) ();
++	  return;
++	}
++      if (taskgroup)
++	taskgroup->num_children += num_tasks;
++      for (i = 0; i < num_tasks; i++)
++	{
++	  struct gomp_task *task = tasks[i];
++	  priority_queue_insert (PQ_CHILDREN, &parent->children_queue,
++				 task, priority,
++				 PRIORITY_INSERT_BEGIN,
++				 /*last_parent_depends_on=*/false,
++				 task->parent_depends_on);
++	  if (taskgroup)
++	    priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
++				   task, priority, PRIORITY_INSERT_BEGIN,
++				   /*last_parent_depends_on=*/false,
++				   task->parent_depends_on);
++	  priority_queue_insert (PQ_TEAM, &team->task_queue, task, priority,
++				 PRIORITY_INSERT_END,
++				 /*last_parent_depends_on=*/false,
++				 task->parent_depends_on);
++	  ++team->task_count;
++	  ++team->task_queued_count;
++	}
++      gomp_team_barrier_set_task_pending (&team->barrier);
++      if (team->task_running_count + !parent->in_tied_task
++	  < team->nthreads)
++	{
++	  do_wake = team->nthreads - team->task_running_count
++		    - !parent->in_tied_task;
++	  if ((unsigned long) do_wake > num_tasks)
++	    do_wake = num_tasks;
++	}
++      else
++	do_wake = 0;
++      gomp_mutex_unlock (&team->task_lock);
++      if (do_wake)
++	gomp_team_barrier_wake (&team->barrier, do_wake);
++    }
++  if ((flags & GOMP_TASK_FLAG_NOGROUP) == 0)
++    ialias_call (GOMP_taskgroup_end) ();
++}
+--- libgomp/priority_queue.h.jj	2016-07-13 16:57:04.438535323 +0200
++++ libgomp/priority_queue.h	2016-07-13 16:57:04.438535323 +0200
+@@ -0,0 +1,485 @@
++/* Copyright (C) 2015-2016 Free Software Foundation, Inc.
++   Contributed by Aldy Hernandez <aldyh@redhat.com>.
++
++   This file is part of the GNU Offloading and Multi Processing Library
++   (libgomp).
++
++   Libgomp is free software; you can redistribute it and/or modify it
++   under the terms of the GNU General Public License as published by
++   the Free Software Foundation; either version 3, or (at your option)
++   any later version.
++
++   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
++   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
++   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++   more details.
++
++   Under Section 7 of GPL version 3, you are granted additional
++   permissions described in the GCC Runtime Library Exception, version
++   3.1, as published by the Free Software Foundation.
++
++   You should have received a copy of the GNU General Public License and
++   a copy of the GCC Runtime Library Exception along with this program;
++   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
++   <http://www.gnu.org/licenses/>.  */
++
++/* Header file for a priority queue of GOMP tasks.  */
++
++/* ?? Perhaps all the priority_tree_* functions are complex and rare
++   enough to go out-of-line and be moved to priority_queue.c.  ??  */
++
++#ifndef _PRIORITY_QUEUE_H_
++#define _PRIORITY_QUEUE_H_
++
++/* One task.  */
++
++struct priority_node
++{
++  /* Next and previous chains in a circular doubly linked list for
++     tasks within this task's priority.  */
++  struct priority_node *next, *prev;
++};
++
++/* All tasks within the same priority.  */
++
++struct priority_list
++{
++  /* Priority of the tasks in this set.  */
++  int priority;
++
++  /* Tasks.  */
++  struct priority_node *tasks;
++
++  /* This points to the last of the higher priority WAITING tasks.
++     Remember that for the children queue, we have:
++
++	parent_depends_on WAITING tasks.
++	!parent_depends_on WAITING tasks.
++	TIED tasks.
++
++     This is a pointer to the last of the parent_depends_on WAITING
++     tasks which are essentially, higher priority items within their
++     priority.  */
++  struct priority_node *last_parent_depends_on;
++};
++
++/* Another splay tree instantiation, for priority_list's.  */
++typedef struct prio_splay_tree_node_s *prio_splay_tree_node;
++typedef struct prio_splay_tree_s *prio_splay_tree;
++typedef struct prio_splay_tree_key_s *prio_splay_tree_key;
++struct prio_splay_tree_key_s {
++  /* This structure must only containing a priority_list, as we cast
++     prio_splay_tree_key to priority_list throughout.  */
++  struct priority_list l;
++};
++#define splay_tree_prefix prio
++#include "splay-tree.h"
++
++/* The entry point into a priority queue of tasks.
++
++   There are two alternate implementations with which to store tasks:
++   as a balanced tree of sorts, or as a simple list of tasks.  If
++   there are only priority-0 items (ROOT is NULL), we use the simple
++   list, otherwise (ROOT is non-NULL) we use the tree.  */
++
++struct priority_queue
++{
++  /* If t.root != NULL, this is a splay tree of priority_lists to hold
++     all tasks.  This is only used if multiple priorities are in play,
++     otherwise we use the priority_list `l' below to hold all
++     (priority-0) tasks.  */
++  struct prio_splay_tree_s t;
++
++  /* If T above is NULL, only priority-0 items exist, so keep them
++     in a simple list.  */
++  struct priority_list l;
++};
++
++enum priority_insert_type {
++  /* Insert at the beginning of a priority list.  */
++  PRIORITY_INSERT_BEGIN,
++  /* Insert at the end of a priority list.  */
++  PRIORITY_INSERT_END
++};
++
++/* Used to determine in which queue a given priority node belongs in.
++   See pnode field of gomp_task.  */
++
++enum priority_queue_type
++{
++  PQ_TEAM,	    /* Node belongs in gomp_team's task_queue.  */
++  PQ_CHILDREN,	    /* Node belongs in parent's children_queue.  */
++  PQ_TASKGROUP,	    /* Node belongs in taskgroup->taskgroup_queue.  */
++  PQ_IGNORED = 999
++};
++
++/* Priority queue implementation prototypes.  */
++
++extern bool priority_queue_task_in_queue_p (enum priority_queue_type,
++					    struct priority_queue *,
++					    struct gomp_task *);
++extern void priority_queue_dump (enum priority_queue_type,
++				 struct priority_queue *);
++extern void priority_queue_verify (enum priority_queue_type,
++				   struct priority_queue *, bool);
++extern void priority_tree_remove (enum priority_queue_type,
++				  struct priority_queue *,
++				  struct priority_node *);
++extern struct gomp_task *priority_tree_next_task (enum priority_queue_type,
++						  struct priority_queue *,
++						  enum priority_queue_type,
++						  struct priority_queue *,
++						  bool *);
++
++/* Return TRUE if there is more than one priority in HEAD.  This is
++   used throughout to to choose between the fast path (priority 0 only
++   items) and a world with multiple priorities.  */
++
++static inline bool
++priority_queue_multi_p (struct priority_queue *head)
++{
++  return __builtin_expect (head->t.root != NULL, 0);
++}
++
++/* Initialize a priority queue.  */
++
++static inline void
++priority_queue_init (struct priority_queue *head)
++{
++  head->t.root = NULL;
++  /* To save a few microseconds, we don't initialize head->l.priority
++     to 0 here.  It is implied that priority will be 0 if head->t.root
++     == NULL.
++
++     priority_tree_insert() will fix this when we encounter multiple
++     priorities.  */
++  head->l.tasks = NULL;
++  head->l.last_parent_depends_on = NULL;
++}
++
++static inline void
++priority_queue_free (struct priority_queue *head)
++{
++  /* There's nothing to do, as tasks were freed as they were removed
++     in priority_queue_remove.  */
++}
++
++/* Forward declarations.  */
++static inline size_t priority_queue_offset (enum priority_queue_type);
++static inline struct gomp_task *priority_node_to_task
++				(enum priority_queue_type,
++				 struct priority_node *);
++static inline struct priority_node *task_to_priority_node
++				    (enum priority_queue_type,
++				     struct gomp_task *);
++
++/* Return TRUE if priority queue HEAD is empty.
++
++   MODEL IS MEMMODEL_ACQUIRE if we should use an acquire atomic to
++   read from the root of the queue, otherwise MEMMODEL_RELAXED if we
++   should use a plain load.  */
++
++static inline _Bool
++priority_queue_empty_p (struct priority_queue *head, enum memmodel model)
++{
++  /* Note: The acquire barriers on the loads here synchronize with
++     the write of a NULL in gomp_task_run_post_remove_parent.  It is
++     not necessary that we synchronize with other non-NULL writes at
++     this point, but we must ensure that all writes to memory by a
++     child thread task work function are seen before we exit from
++     GOMP_taskwait.  */
++  if (priority_queue_multi_p (head))
++    {
++      if (model == MEMMODEL_ACQUIRE)
++	return __atomic_load_n (&head->t.root, MEMMODEL_ACQUIRE) == NULL;
++      return head->t.root == NULL;
++    }
++  if (model == MEMMODEL_ACQUIRE)
++    return __atomic_load_n (&head->l.tasks, MEMMODEL_ACQUIRE) == NULL;
++  return head->l.tasks == NULL;
++}
++
++/* Look for a given PRIORITY in HEAD.  Return it if found, otherwise
++   return NULL.  This only applies to the tree variant in HEAD.  There
++   is no point in searching for priorities in HEAD->L.  */
++
++static inline struct priority_list *
++priority_queue_lookup_priority (struct priority_queue *head, int priority)
++{
++  if (head->t.root == NULL)
++    return NULL;
++  struct prio_splay_tree_key_s k;
++  k.l.priority = priority;
++  return (struct priority_list *)
++    prio_splay_tree_lookup (&head->t, &k);
++}
++
++/* Insert task in DATA, with PRIORITY, in the priority list in LIST.
++   LIST contains items of type TYPE.
++
++   If POS is PRIORITY_INSERT_BEGIN, the new task is inserted at the
++   top of its respective priority.  If POS is PRIORITY_INSERT_END, the
++   task is inserted at the end of its priority.
++
++   If ADJUST_PARENT_DEPENDS_ON is TRUE, LIST is a children queue, and
++   we must keep track of higher and lower priority WAITING tasks by
++   keeping the queue's last_parent_depends_on field accurate.  This
++   only applies to the children queue, and the caller must ensure LIST
++   is a children queue in this case.
++
++   If ADJUST_PARENT_DEPENDS_ON is TRUE, TASK_IS_PARENT_DEPENDS_ON is
++   set to the task's parent_depends_on field.  If
++   ADJUST_PARENT_DEPENDS_ON is FALSE, this field is irrelevant.
++
++   Return the new priority_node.  */
++
++static inline void
++priority_list_insert (enum priority_queue_type type,
++		      struct priority_list *list,
++		      struct gomp_task *task,
++		      int priority,
++		      enum priority_insert_type pos,
++		      bool adjust_parent_depends_on,
++		      bool task_is_parent_depends_on)
++{
++  struct priority_node *node = task_to_priority_node (type, task);
++  if (list->tasks)
++    {
++      /* If we are keeping track of higher/lower priority items,
++	 but this is a lower priority WAITING task
++	 (parent_depends_on != NULL), put it after all ready to
++	 run tasks.  See the comment in
++	 priority_queue_upgrade_task for a visual on how tasks
++	 should be organized.  */
++      if (adjust_parent_depends_on
++	  && pos == PRIORITY_INSERT_BEGIN
++	  && list->last_parent_depends_on
++	  && !task_is_parent_depends_on)
++	{
++	  struct priority_node *last_parent_depends_on
++	    = list->last_parent_depends_on;
++	  node->next = last_parent_depends_on->next;
++	  node->prev = last_parent_depends_on;
++	}
++      /* Otherwise, put it at the top/bottom of the queue.  */
++      else
++	{
++	  node->next = list->tasks;
++	  node->prev = list->tasks->prev;
++	  if (pos == PRIORITY_INSERT_BEGIN)
++	    list->tasks = node;
++	}
++      node->next->prev = node;
++      node->prev->next = node;
++    }
++  else
++    {
++      node->next = node;
++      node->prev = node;
++      list->tasks = node;
++    }
++  if (adjust_parent_depends_on
++      && list->last_parent_depends_on == NULL
++      && task_is_parent_depends_on)
++    list->last_parent_depends_on = node;
++}
++
++/* Tree version of priority_list_insert.  */
++
++static inline void
++priority_tree_insert (enum priority_queue_type type,
++		      struct priority_queue *head,
++		      struct gomp_task *task,
++		      int priority,
++		      enum priority_insert_type pos,
++		      bool adjust_parent_depends_on,
++		      bool task_is_parent_depends_on)
++{
++  if (__builtin_expect (head->t.root == NULL, 0))
++    {
++      /* The first time around, transfer any priority 0 items to the
++	 tree.  */
++      if (head->l.tasks != NULL)
++	{
++	  prio_splay_tree_node k = gomp_malloc (sizeof (*k));
++	  k->left = NULL;
++	  k->right = NULL;
++	  k->key.l.priority = 0;
++	  k->key.l.tasks = head->l.tasks;
++	  k->key.l.last_parent_depends_on = head->l.last_parent_depends_on;
++	  prio_splay_tree_insert (&head->t, k);
++	  head->l.tasks = NULL;
++	}
++    }
++  struct priority_list *list
++    = priority_queue_lookup_priority (head, priority);
++  if (!list)
++    {
++      prio_splay_tree_node k = gomp_malloc (sizeof (*k));
++      k->left = NULL;
++      k->right = NULL;
++      k->key.l.priority = priority;
++      k->key.l.tasks = NULL;
++      k->key.l.last_parent_depends_on = NULL;
++      prio_splay_tree_insert (&head->t, k);
++      list = &k->key.l;
++    }
++  priority_list_insert (type, list, task, priority, pos,
++			adjust_parent_depends_on,
++			task_is_parent_depends_on);
++}
++
++/* Generic version of priority_*_insert.  */
++
++static inline void
++priority_queue_insert (enum priority_queue_type type,
++		       struct priority_queue *head,
++		       struct gomp_task *task,
++		       int priority,
++		       enum priority_insert_type pos,
++		       bool adjust_parent_depends_on,
++		       bool task_is_parent_depends_on)
++{
++#if _LIBGOMP_CHECKING_
++  if (priority_queue_task_in_queue_p (type, head, task))
++    gomp_fatal ("Attempt to insert existing task %p", task);
++#endif
++  if (priority_queue_multi_p (head) || __builtin_expect (priority > 0, 0))
++    priority_tree_insert (type, head, task, priority, pos,
++			  adjust_parent_depends_on,
++			  task_is_parent_depends_on);
++  else
++    priority_list_insert (type, &head->l, task, priority, pos,
++			  adjust_parent_depends_on,
++			  task_is_parent_depends_on);
++}
++
++/* If multiple priorities are in play, return the highest priority
++   task from within Q1 and Q2, while giving preference to tasks from
++   Q1.  If the returned task is chosen from Q1, *Q1_CHOSEN_P is set to
++   TRUE, otherwise it is set to FALSE.
++
++   If multiple priorities are not in play (only 0 priorities are
++   available), the next task is chosen exclusively from Q1.
++
++   As a special case, Q2 can be NULL, in which case, we just choose
++   the highest priority WAITING task in Q1.  This is an optimization
++   to speed up looking through only one queue.
++
++   We assume Q1 has at least one item.  */
++
++static inline struct gomp_task *
++priority_queue_next_task (enum priority_queue_type t1,
++			  struct priority_queue *q1,
++			  enum priority_queue_type t2,
++			  struct priority_queue *q2,
++			  bool *q1_chosen_p)
++{
++#if _LIBGOMP_CHECKING_
++  if (priority_queue_empty_p (q1, MEMMODEL_RELAXED))
++    gomp_fatal ("priority_queue_next_task: Q1 is empty");
++#endif
++  if (priority_queue_multi_p (q1))
++    {
++      struct gomp_task *t
++	= priority_tree_next_task (t1, q1, t2, q2, q1_chosen_p);
++      /* If T is NULL, there are no WAITING tasks in Q1.  In which
++	 case, return any old (non-waiting) task which will cause the
++	 caller to do the right thing when checking T->KIND ==
++	 GOMP_TASK_WAITING.  */
++      if (!t)
++	{
++#if _LIBGOMP_CHECKING_
++	  if (*q1_chosen_p == false)
++	    gomp_fatal ("priority_queue_next_task inconsistency");
++#endif
++	  return priority_node_to_task (t1, q1->t.root->key.l.tasks);
++	}
++      return t;
++    }
++  else
++    {
++      *q1_chosen_p = true;
++      return priority_node_to_task (t1, q1->l.tasks);
++    }
++}
++
++/* Remove NODE from LIST.
++
++   If we are removing the one and only item in the list, and MODEL is
++   MEMMODEL_RELEASE, use an atomic release to clear the list.
++
++   If the list becomes empty after the remove, return TRUE.  */
++
++static inline bool
++priority_list_remove (struct priority_list *list,
++		      struct priority_node *node,
++		      enum memmodel model)
++{
++  bool empty = false;
++  node->prev->next = node->next;
++  node->next->prev = node->prev;
++  if (list->tasks == node)
++    {
++      if (node->next != node)
++	list->tasks = node->next;
++      else
++	{
++	  /* We access task->children in GOMP_taskwait outside of
++	     the task lock mutex region, so need a release barrier
++	     here to ensure memory written by child_task->fn above
++	     is flushed before the NULL is written.  */
++	  if (model == MEMMODEL_RELEASE)
++	    __atomic_store_n (&list->tasks, NULL, MEMMODEL_RELEASE);
++	  else
++	    list->tasks = NULL;
++	  empty = true;
++	  goto remove_out;
++	}
++    }
++remove_out:
++#if _LIBGOMP_CHECKING_
++  memset (node, 0xaf, sizeof (*node));
++#endif
++  return empty;
++}
++
++/* This is the generic version of priority_list_remove.
++
++   Remove NODE from priority queue HEAD.  HEAD contains tasks of type TYPE.
++
++   If we are removing the one and only item in the priority queue and
++   MODEL is MEMMODEL_RELEASE, use an atomic release to clear the queue.
++
++   If the queue becomes empty after the remove, return TRUE.  */
++
++static inline bool
++priority_queue_remove (enum priority_queue_type type,
++		       struct priority_queue *head,
++		       struct gomp_task *task,
++		       enum memmodel model)
++{
++#if _LIBGOMP_CHECKING_
++  if (!priority_queue_task_in_queue_p (type, head, task))
++    gomp_fatal ("Attempt to remove missing task %p", task);
++#endif
++  if (priority_queue_multi_p (head))
++    {
++      priority_tree_remove (type, head, task_to_priority_node (type, task));
++      if (head->t.root == NULL)
++	{
++	  if (model == MEMMODEL_RELEASE)
++	    /* Errr, we store NULL twice, the alternative would be to
++	       use an atomic release directly in the splay tree
++	       routines.  Worth it?  */
++	    __atomic_store_n (&head->t.root, NULL, MEMMODEL_RELEASE);
++	  return true;
++	}
++      return false;
++    }
++  else
++    return priority_list_remove (&head->l,
++				 task_to_priority_node (type, task), model);
++}
++
++#endif /* _PRIORITY_QUEUE_H_ */
+--- libgomp/priority_queue.c.jj	2016-07-13 16:57:04.435535360 +0200
++++ libgomp/priority_queue.c	2016-07-13 16:57:04.435535360 +0200
+@@ -0,0 +1,300 @@
++/* Copyright (C) 2015-2016 Free Software Foundation, Inc.
++   Contributed by Aldy Hernandez <aldyh@redhat.com>.
++
++   This file is part of the GNU Offloading and Multi Processing Library
++   (libgomp).
++
++   Libgomp is free software; you can redistribute it and/or modify it
++   under the terms of the GNU General Public License as published by
++   the Free Software Foundation; either version 3, or (at your option)
++   any later version.
++
++   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
++   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
++   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++   more details.
++
++   Under Section 7 of GPL version 3, you are granted additional
++   permissions described in the GCC Runtime Library Exception, version
++   3.1, as published by the Free Software Foundation.
++
++   You should have received a copy of the GNU General Public License and
++   a copy of the GCC Runtime Library Exception along with this program;
++   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
++   <http://www.gnu.org/licenses/>.  */
++
++/* Priority queue implementation of GOMP tasks.  */
++
++#include "libgomp.h"
++
++#if _LIBGOMP_CHECKING_
++#include <stdio.h>
++
++/* Sanity check to verify whether a TASK is in LIST.  Return TRUE if
++   found, FALSE otherwise.
++
++   TYPE is the type of priority queue this task resides in.  */
++
++static inline bool
++priority_queue_task_in_list_p (enum priority_queue_type type,
++			       struct priority_list *list,
++			       struct gomp_task *task)
++{
++  struct priority_node *p = list->tasks;
++  do
++    {
++      if (priority_node_to_task (type, p) == task)
++	return true;
++      p = p->next;
++    }
++  while (p != list->tasks);
++  return false;
++}
++
++/* Tree version of priority_queue_task_in_list_p.  */
++
++static inline bool
++priority_queue_task_in_tree_p (enum priority_queue_type type,
++			       struct priority_queue *head,
++			       struct gomp_task *task)
++{
++  struct priority_list *list
++    = priority_queue_lookup_priority (head, task->priority);
++  if (!list)
++    return false;
++  return priority_queue_task_in_list_p (type, list, task);
++}
++
++/* Generic version of priority_queue_task_in_list_p that works for
++   trees or lists.  */
++
++bool
++priority_queue_task_in_queue_p (enum priority_queue_type type,
++				struct priority_queue *head,
++				struct gomp_task *task)
++{
++  if (priority_queue_empty_p (head, MEMMODEL_RELAXED))
++    return false;
++  if (priority_queue_multi_p (head))
++    return priority_queue_task_in_tree_p (type, head, task);
++  else
++    return priority_queue_task_in_list_p (type, &head->l, task);
++}
++
++/* Sanity check LIST to make sure the tasks therein are in the right
++   order.  LIST is a priority list of type TYPE.
++
++   The expected order is that GOMP_TASK_WAITING tasks come before
++   GOMP_TASK_TIED/GOMP_TASK_ASYNC_RUNNING ones.
++
++   If CHECK_DEPS is TRUE, we also check that parent_depends_on WAITING
++   tasks come before !parent_depends_on WAITING tasks.  This is only
++   applicable to the children queue, and the caller is expected to
++   ensure that we are verifying the children queue.  */
++
++static void
++priority_list_verify (enum priority_queue_type type,
++		      struct priority_list *list, bool check_deps)
++{
++  bool seen_tied = false;
++  bool seen_plain_waiting = false;
++  struct priority_node *p = list->tasks;
++  while (1)
++    {
++      struct gomp_task *t = priority_node_to_task (type, p);
++      if (seen_tied && t->kind == GOMP_TASK_WAITING)
++	gomp_fatal ("priority_queue_verify: WAITING task after TIED");
++      if (t->kind >= GOMP_TASK_TIED)
++	seen_tied = true;
++      else if (check_deps && t->kind == GOMP_TASK_WAITING)
++	{
++	  if (t->parent_depends_on)
++	    {
++	      if (seen_plain_waiting)
++		gomp_fatal ("priority_queue_verify: "
++			    "parent_depends_on after !parent_depends_on");
++	    }
++	  else
++	    seen_plain_waiting = true;
++	}
++      p = p->next;
++      if (p == list->tasks)
++	break;
++    }
++}
++
++/* Callback type for priority_tree_verify_callback.  */
++struct cbtype
++{
++  enum priority_queue_type type;
++  bool check_deps;
++};
++
++/* Verify every task in NODE.
++
++   Callback for splay_tree_foreach.  */
++
++static void
++priority_tree_verify_callback (prio_splay_tree_key key, void *data)
++{
++  struct cbtype *cb = (struct cbtype *) data;
++  priority_list_verify (cb->type, &key->l, cb->check_deps);
++}
++
++/* Generic version of priority_list_verify.
++
++   Sanity check HEAD to make sure the tasks therein are in the right
++   order.  The priority_queue holds tasks of type TYPE.
++
++   If CHECK_DEPS is TRUE, we also check that parent_depends_on WAITING
++   tasks come before !parent_depends_on WAITING tasks.  This is only
++   applicable to the children queue, and the caller is expected to
++   ensure that we are verifying the children queue.  */
++
++void
++priority_queue_verify (enum priority_queue_type type,
++		       struct priority_queue *head, bool check_deps)
++{
++  if (priority_queue_empty_p (head, MEMMODEL_RELAXED))
++    return;
++  if (priority_queue_multi_p (head))
++    {
++      struct cbtype cb = { type, check_deps };
++      prio_splay_tree_foreach (&head->t,
++			       priority_tree_verify_callback, &cb);
++    }
++  else
++    priority_list_verify (type, &head->l, check_deps);
++}
++#endif /* _LIBGOMP_CHECKING_ */
++
++/* Remove NODE from priority queue HEAD, wherever it may be inside the
++   tree.  HEAD contains tasks of type TYPE.  */
++
++void
++priority_tree_remove (enum priority_queue_type type,
++		      struct priority_queue *head,
++		      struct priority_node *node)
++{
++  /* ?? The only reason this function is not inlined is because we
++     need to find the priority within gomp_task (which has not been
++     completely defined in the header file).  If the lack of inlining
++     is a concern, we could pass the priority number as a
++     parameter, or we could move this to libgomp.h.  */
++  int priority = priority_node_to_task (type, node)->priority;
++
++  /* ?? We could avoid this lookup by keeping a pointer to the key in
++     the priority_node.  */
++  struct priority_list *list
++    = priority_queue_lookup_priority (head, priority);
++#if _LIBGOMP_CHECKING_
++  if (!list)
++    gomp_fatal ("Unable to find priority %d", priority);
++#endif
++  /* If NODE was the last in its priority, clean up the priority.  */
++  if (priority_list_remove (list, node, MEMMODEL_RELAXED))
++    {
++      prio_splay_tree_remove (&head->t, (prio_splay_tree_key) list);
++      list->tasks = NULL;
++#if _LIBGOMP_CHECKING_
++      memset (list, 0xaf, sizeof (*list));
++#endif
++      free (list);
++    }
++}
++
++/* Return the highest priority WAITING task in a splay tree NODE.  If
++   there are no WAITING tasks available, return NULL.
++
++   NODE is a priority list containing tasks of type TYPE.
++
++   The right most node in a tree contains the highest priority.
++   Recurse down to find such a node.  If the task at that max node is
++   not WAITING, bubble back up and look at the remaining tasks
++   in-order.  */
++
++static struct gomp_task *
++priority_tree_next_task_1 (enum priority_queue_type type,
++			   prio_splay_tree_node node)
++{
++ again:
++  if (!node)
++    return NULL;
++  struct gomp_task *ret = priority_tree_next_task_1 (type, node->right);
++  if (ret)
++    return ret;
++  ret = priority_node_to_task (type, node->key.l.tasks);
++  if (ret->kind == GOMP_TASK_WAITING)
++    return ret;
++  node = node->left;
++  goto again;
++}
++
++/* Return the highest priority WAITING task from within Q1 and Q2,
++   while giving preference to tasks from Q1.  Q1 is a queue containing
++   items of type TYPE1.  Q2 is a queue containing items of type TYPE2.
++
++   Since we are mostly interested in Q1, if there are no WAITING tasks
++   in Q1, we don't bother checking Q2, and just return NULL.
++
++   As a special case, Q2 can be NULL, in which case, we just choose
++   the highest priority WAITING task in Q1.  This is an optimization
++   to speed up looking through only one queue.
++
++   If the returned task is chosen from Q1, *Q1_CHOSEN_P is set to
++   TRUE, otherwise it is set to FALSE.  */
++
++struct gomp_task *
++priority_tree_next_task (enum priority_queue_type type1,
++			 struct priority_queue *q1,
++			 enum priority_queue_type type2,
++			 struct priority_queue *q2,
++			 bool *q1_chosen_p)
++{
++  struct gomp_task *t1 = priority_tree_next_task_1 (type1, q1->t.root);
++  if (!t1
++      /* Special optimization when only searching through one queue.  */
++      || !q2)
++    {
++      *q1_chosen_p = true;
++      return t1;
++    }
++  struct gomp_task *t2 = priority_tree_next_task_1 (type2, q2->t.root);
++  if (!t2 || t1->priority > t2->priority)
++    {
++      *q1_chosen_p = true;
++      return t1;
++    }
++  if (t2->priority > t1->priority)
++    {
++      *q1_chosen_p = false;
++      return t2;
++    }
++  /* If we get here, the priorities are the same, so we must look at
++     parent_depends_on to make our decision.  */
++#if _LIBGOMP_CHECKING_
++  if (t1 != t2)
++    gomp_fatal ("priority_tree_next_task: t1 != t2");
++#endif
++  if (t2->parent_depends_on && !t1->parent_depends_on)
++    {
++      *q1_chosen_p = false;
++      return t2;
++    }
++  *q1_chosen_p = true;
++  return t1;
++}
++
++/* Priority splay trees comparison function.  */
++static inline int
++prio_splay_compare (prio_splay_tree_key x, prio_splay_tree_key y)
++{
++  if (x->l.priority == y->l.priority)
++    return 0;
++  return x->l.priority < y->l.priority ? -1 : 1;
++}
++
++/* Define another splay tree instantiation, for priority_list's.  */
++#define splay_tree_prefix prio
++#define splay_tree_c
++#include "splay-tree.h"
+--- libgomp/openacc.f90.jj	2016-07-13 16:57:04.434535373 +0200
++++ libgomp/openacc.f90	2016-07-14 19:01:54.901230875 +0200
+@@ -0,0 +1,911 @@
++!  OpenACC Runtime Library Definitions.
++
++!  Copyright (C) 2014-2016 Free Software Foundation, Inc.
++
++!  Contributed by Tobias Burnus <burnus@net-b.de>
++!              and Mentor Embedded.
++
++!  This file is part of the GNU Offloading and Multi Processing Library
++!  (libgomp).
++
++!  Libgomp is free software; you can redistribute it and/or modify it
++!  under the terms of the GNU General Public License as published by
++!  the Free Software Foundation; either version 3, or (at your option)
++!  any later version.
++
++!  Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
++!  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
++!  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++!  more details.
++
++!  Under Section 7 of GPL version 3, you are granted additional
++!  permissions described in the GCC Runtime Library Exception, version
++!  3.1, as published by the Free Software Foundation.
++
++!  You should have received a copy of the GNU General Public License and
++!  a copy of the GCC Runtime Library Exception along with this program;
++!  see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
++!  <http://www.gnu.org/licenses/>.
++
++module openacc_kinds
++  use iso_fortran_env, only: int32
++  implicit none
++
++  private :: int32
++  public :: acc_device_kind
++
++  integer, parameter :: acc_device_kind = int32
++
++  public :: acc_device_none, acc_device_default, acc_device_host
++  public :: acc_device_not_host, acc_device_nvidia
++
++  ! Keep in sync with include/gomp-constants.h.
++  integer (acc_device_kind), parameter :: acc_device_none = 0
++  integer (acc_device_kind), parameter :: acc_device_default = 1
++  integer (acc_device_kind), parameter :: acc_device_host = 2
++  ! integer (acc_device_kind), parameter :: acc_device_host_nonshm = 3 removed.
++  integer (acc_device_kind), parameter :: acc_device_not_host = 4
++  integer (acc_device_kind), parameter :: acc_device_nvidia = 5
++
++  public :: acc_handle_kind
++
++  integer, parameter :: acc_handle_kind = int32
++
++  public :: acc_async_noval, acc_async_sync
++
++  ! Keep in sync with include/gomp-constants.h.
++  integer (acc_handle_kind), parameter :: acc_async_noval = -1
++  integer (acc_handle_kind), parameter :: acc_async_sync = -2
++
++end module
++
++module openacc_internal
++  use openacc_kinds
++  implicit none
++
++  interface
++    function acc_get_num_devices_h (d)
++      import
++      integer acc_get_num_devices_h
++      integer (acc_device_kind) d
++    end function
++
++    subroutine acc_set_device_type_h (d)
++      import
++      integer (acc_device_kind) d
++    end subroutine
++
++    function acc_get_device_type_h ()
++      import
++      integer (acc_device_kind) acc_get_device_type_h
++    end function
++
++    subroutine acc_set_device_num_h (n, d)
++      import
++      integer n
++      integer (acc_device_kind) d
++    end subroutine
++
++    function acc_get_device_num_h (d)
++      import
++      integer acc_get_device_num_h
++      integer (acc_device_kind) d
++    end function
++
++    function acc_async_test_h (a)
++      logical acc_async_test_h
++      integer a
++    end function
++
++    function acc_async_test_all_h ()
++      logical acc_async_test_all_h
++    end function
++
++    subroutine acc_wait_h (a)
++      integer a
++    end subroutine
++
++    subroutine acc_wait_async_h (a1, a2)
++      integer a1, a2
++    end subroutine
++
++    subroutine acc_wait_all_h ()
++    end subroutine
++
++    subroutine acc_wait_all_async_h (a)
++      integer a
++    end subroutine
++
++    subroutine acc_init_h (d)
++      import
++      integer (acc_device_kind) d
++    end subroutine
++
++    subroutine acc_shutdown_h (d)
++      import
++      integer (acc_device_kind) d
++    end subroutine
++
++    function acc_on_device_h (d)
++      import
++      integer (acc_device_kind) d
++      logical acc_on_device_h
++    end function
++
++    subroutine acc_copyin_32_h (a, len)
++      use iso_c_binding, only: c_int32_t
++      type (*), dimension (*) :: a
++      integer (c_int32_t) len
++    end subroutine
++
++    subroutine acc_copyin_64_h (a, len)
++      use iso_c_binding, only: c_int64_t
++      type (*), dimension (*) :: a
++      integer (c_int64_t) len
++    end subroutine
++
++    subroutine acc_copyin_array_h (a)
++      type (*), dimension (..), contiguous :: a
++    end subroutine
++
++    subroutine acc_present_or_copyin_32_h (a, len)
++      use iso_c_binding, only: c_int32_t
++      type (*), dimension (*) :: a
++      integer (c_int32_t) len
++    end subroutine
++
++    subroutine acc_present_or_copyin_64_h (a, len)
++      use iso_c_binding, only: c_int64_t
++      type (*), dimension (*) :: a
++      integer (c_int64_t) len
++    end subroutine
++
++    subroutine acc_present_or_copyin_array_h (a)
++      type (*), dimension (..), contiguous :: a
++    end subroutine
++
++    subroutine acc_create_32_h (a, len)
++      use iso_c_binding, only: c_int32_t
++      type (*), dimension (*) :: a
++      integer (c_int32_t) len
++    end subroutine
++
++    subroutine acc_create_64_h (a, len)
++      use iso_c_binding, only: c_int64_t
++      type (*), dimension (*) :: a
++      integer (c_int64_t) len
++    end subroutine
++
++    subroutine acc_create_array_h (a)
++      type (*), dimension (..), contiguous :: a
++    end subroutine
++
++    subroutine acc_present_or_create_32_h (a, len)
++      use iso_c_binding, only: c_int32_t
++      type (*), dimension (*) :: a
++      integer (c_int32_t) len
++    end subroutine
++
++    subroutine acc_present_or_create_64_h (a, len)
++      use iso_c_binding, only: c_int64_t
++      type (*), dimension (*) :: a
++      integer (c_int64_t) len
++    end subroutine
++
++    subroutine acc_present_or_create_array_h (a)
++      type (*), dimension (..), contiguous :: a
++    end subroutine
++
++    subroutine acc_copyout_32_h (a, len)
++      use iso_c_binding, only: c_int32_t
++      type (*), dimension (*) :: a
++      integer (c_int32_t) len
++    end subroutine
++
++    subroutine acc_copyout_64_h (a, len)
++      use iso_c_binding, only: c_int64_t
++      type (*), dimension (*) :: a
++      integer (c_int64_t) len
++    end subroutine
++
++    subroutine acc_copyout_array_h (a)
++      type (*), dimension (..), contiguous :: a
++    end subroutine
++
++    subroutine acc_delete_32_h (a, len)
++      use iso_c_binding, only: c_int32_t
++      type (*), dimension (*) :: a
++      integer (c_int32_t) len
++    end subroutine
++
++    subroutine acc_delete_64_h (a, len)
++      use iso_c_binding, only: c_int64_t
++      type (*), dimension (*) :: a
++      integer (c_int64_t) len
++    end subroutine
++
++    subroutine acc_delete_array_h (a)
++      type (*), dimension (..), contiguous :: a
++    end subroutine
++
++    subroutine acc_update_device_32_h (a, len)
++      use iso_c_binding, only: c_int32_t
++      type (*), dimension (*) :: a
++      integer (c_int32_t) len
++    end subroutine
++
++    subroutine acc_update_device_64_h (a, len)
++      use iso_c_binding, only: c_int64_t
++      type (*), dimension (*) :: a
++      integer (c_int64_t) len
++    end subroutine
++
++    subroutine acc_update_device_array_h (a)
++      type (*), dimension (..), contiguous :: a
++    end subroutine
++
++    subroutine acc_update_self_32_h (a, len)
++      use iso_c_binding, only: c_int32_t
++      type (*), dimension (*) :: a
++      integer (c_int32_t) len
++    end subroutine
++
++    subroutine acc_update_self_64_h (a, len)
++      use iso_c_binding, only: c_int64_t
++      type (*), dimension (*) :: a
++      integer (c_int64_t) len
++    end subroutine
++
++    subroutine acc_update_self_array_h (a)
++      type (*), dimension (..), contiguous :: a
++    end subroutine
++
++    function acc_is_present_32_h (a, len)
++      use iso_c_binding, only: c_int32_t
++      logical acc_is_present_32_h
++      type (*), dimension (*) :: a
++      integer (c_int32_t) len
++    end function
++
++    function acc_is_present_64_h (a, len)
++      use iso_c_binding, only: c_int64_t
++      logical acc_is_present_64_h
++      type (*), dimension (*) :: a
++      integer (c_int64_t) len
++    end function
++
++    function acc_is_present_array_h (a)
++      logical acc_is_present_array_h
++      type (*), dimension (..), contiguous :: a
++    end function
++  end interface
++
++  interface
++    function acc_get_num_devices_l (d) &
++        bind (C, name = "acc_get_num_devices")
++      use iso_c_binding, only: c_int
++      integer (c_int) :: acc_get_num_devices_l
++      integer (c_int), value :: d
++    end function
++
++    subroutine acc_set_device_type_l (d) &
++        bind (C, name = "acc_set_device_type")
++      use iso_c_binding, only: c_int
++      integer (c_int), value :: d
++    end subroutine
++
++    function acc_get_device_type_l () &
++        bind (C, name = "acc_get_device_type")
++      use iso_c_binding, only: c_int
++      integer (c_int) :: acc_get_device_type_l
++    end function
++
++    subroutine acc_set_device_num_l (n, d) &
++        bind (C, name = "acc_set_device_num")
++      use iso_c_binding, only: c_int
++      integer (c_int), value :: n, d
++    end subroutine
++
++    function acc_get_device_num_l (d) &
++        bind (C, name = "acc_get_device_num")
++      use iso_c_binding, only: c_int
++      integer (c_int) :: acc_get_device_num_l
++      integer (c_int), value :: d
++    end function
++
++    function acc_async_test_l (a) &
++        bind (C, name = "acc_async_test")
++      use iso_c_binding, only: c_int
++      integer (c_int) :: acc_async_test_l
++      integer (c_int), value :: a
++    end function
++
++    function acc_async_test_all_l () &
++        bind (C, name = "acc_async_test_all")
++      use iso_c_binding, only: c_int
++      integer (c_int) :: acc_async_test_all_l
++    end function
++
++    subroutine acc_wait_l (a) &
++        bind (C, name = "acc_wait")
++      use iso_c_binding, only: c_int
++      integer (c_int), value :: a
++    end subroutine
++
++    subroutine acc_wait_async_l (a1, a2) &
++        bind (C, name = "acc_wait_async")
++      use iso_c_binding, only: c_int
++      integer (c_int), value :: a1, a2
++    end subroutine
++
++    subroutine acc_wait_all_l () &
++        bind (C, name = "acc_wait_all")
++      use iso_c_binding, only: c_int
++    end subroutine
++
++    subroutine acc_wait_all_async_l (a) &
++        bind (C, name = "acc_wait_all_async")
++      use iso_c_binding, only: c_int
++      integer (c_int), value :: a
++    end subroutine
++
++    subroutine acc_init_l (d) &
++        bind (C, name = "acc_init")
++      use iso_c_binding, only: c_int
++      integer (c_int), value :: d
++    end subroutine
++
++    subroutine acc_shutdown_l (d) &
++        bind (C, name = "acc_shutdown")
++      use iso_c_binding, only: c_int
++      integer (c_int), value :: d
++    end subroutine
++
++    function acc_on_device_l (d) &
++        bind (C, name = "acc_on_device")
++      use iso_c_binding, only: c_int
++      integer (c_int) :: acc_on_device_l
++      integer (c_int), value :: d
++    end function
++
++    subroutine acc_copyin_l (a, len) &
++        bind (C, name = "acc_copyin")
++      use iso_c_binding, only: c_size_t
++      type (*), dimension (*) :: a
++      integer (c_size_t), value :: len
++    end subroutine
++
++    subroutine acc_present_or_copyin_l (a, len) &
++        bind (C, name = "acc_present_or_copyin")
++      use iso_c_binding, only: c_size_t
++      type (*), dimension (*) :: a
++      integer (c_size_t), value :: len
++    end subroutine
++
++    subroutine acc_create_l (a, len) &
++        bind (C, name = "acc_create")
++      use iso_c_binding, only: c_size_t
++      type (*), dimension (*) :: a
++      integer (c_size_t), value :: len
++    end subroutine
++
++    subroutine acc_present_or_create_l (a, len) &
++        bind (C, name = "acc_present_or_create")
++      use iso_c_binding, only: c_size_t
++      type (*), dimension (*) :: a
++      integer (c_size_t), value :: len
++    end subroutine
++
++    subroutine acc_copyout_l (a, len) &
++        bind (C, name = "acc_copyout")
++      use iso_c_binding, only: c_size_t
++      type (*), dimension (*) :: a
++      integer (c_size_t), value :: len
++    end subroutine
++
++    subroutine acc_delete_l (a, len) &
++        bind (C, name = "acc_delete")
++      use iso_c_binding, only: c_size_t
++      type (*), dimension (*) :: a
++      integer (c_size_t), value :: len
++    end subroutine
++
++    subroutine acc_update_device_l (a, len) &
++        bind (C, name = "acc_update_device")
++      use iso_c_binding, only: c_size_t
++      type (*), dimension (*) :: a
++      integer (c_size_t), value :: len
++    end subroutine
++
++    subroutine acc_update_self_l (a, len) &
++        bind (C, name = "acc_update_self")
++      use iso_c_binding, only: c_size_t
++      type (*), dimension (*) :: a
++      integer (c_size_t), value :: len
++    end subroutine
++
++    function acc_is_present_l (a, len) &
++        bind (C, name = "acc_is_present")
++      use iso_c_binding, only: c_int32_t, c_size_t
++      integer (c_int32_t) :: acc_is_present_l
++      type (*), dimension (*) :: a
++      integer (c_size_t), value :: len
++    end function
++  end interface
++end module
++
++module openacc
++  use openacc_kinds
++  use openacc_internal
++  implicit none
++
++  public :: openacc_version
++
++  public :: acc_get_num_devices, acc_set_device_type, acc_get_device_type
++  public :: acc_set_device_num, acc_get_device_num, acc_async_test
++  public :: acc_async_test_all, acc_wait, acc_wait_async, acc_wait_all
++  public :: acc_wait_all_async, acc_init, acc_shutdown, acc_on_device
++  public :: acc_copyin, acc_present_or_copyin, acc_pcopyin, acc_create
++  public :: acc_present_or_create, acc_pcreate, acc_copyout, acc_delete
++  public :: acc_update_device, acc_update_self, acc_is_present
++
++  integer, parameter :: openacc_version = 201306
++
++  interface acc_get_num_devices
++    procedure :: acc_get_num_devices_h
++  end interface
++
++  interface acc_set_device_type
++    procedure :: acc_set_device_type_h
++  end interface
++
++  interface acc_get_device_type
++    procedure :: acc_get_device_type_h
++  end interface
++
++  interface acc_set_device_num
++    procedure :: acc_set_device_num_h
++  end interface
++
++  interface acc_get_device_num
++    procedure :: acc_get_device_num_h
++  end interface
++
++  interface acc_async_test
++    procedure :: acc_async_test_h
++  end interface
++
++  interface acc_async_test_all
++    procedure :: acc_async_test_all_h
++  end interface
++
++  interface acc_wait
++    procedure :: acc_wait_h
++  end interface
++
++  interface acc_wait_async
++    procedure :: acc_wait_async_h
++  end interface
++
++  interface acc_wait_all
++    procedure :: acc_wait_all_h
++  end interface
++
++  interface acc_wait_all_async
++    procedure :: acc_wait_all_async_h
++  end interface
++
++  interface acc_init
++    procedure :: acc_init_h
++  end interface
++
++  interface acc_shutdown
++    procedure :: acc_shutdown_h
++  end interface
++
++  interface acc_on_device
++    procedure :: acc_on_device_h
++  end interface
++
++  ! acc_malloc: Only available in C/C++
++  ! acc_free: Only available in C/C++
++
++  ! As vendor extension, the following code supports both 32bit and 64bit
++  ! arguments for "size"; the OpenACC standard only permits default-kind
++  ! integers, which are of kind 4 (i.e. 32 bits).
++  ! Additionally, the two-argument version also takes arrays as argument.
++  ! and the one argument version also scalars. Note that the code assumes
++  ! that the arrays are contiguous.
++
++  interface acc_copyin
++    procedure :: acc_copyin_32_h
++    procedure :: acc_copyin_64_h
++    procedure :: acc_copyin_array_h
++  end interface
++
++  interface acc_present_or_copyin
++    procedure :: acc_present_or_copyin_32_h
++    procedure :: acc_present_or_copyin_64_h
++    procedure :: acc_present_or_copyin_array_h
++  end interface
++
++  interface acc_pcopyin
++    procedure :: acc_present_or_copyin_32_h
++    procedure :: acc_present_or_copyin_64_h
++    procedure :: acc_present_or_copyin_array_h
++  end interface
++
++  interface acc_create
++    procedure :: acc_create_32_h
++    procedure :: acc_create_64_h
++    procedure :: acc_create_array_h
++  end interface
++
++  interface acc_present_or_create
++    procedure :: acc_present_or_create_32_h
++    procedure :: acc_present_or_create_64_h
++    procedure :: acc_present_or_create_array_h
++  end interface
++
++  interface acc_pcreate
++    procedure :: acc_present_or_create_32_h
++    procedure :: acc_present_or_create_64_h
++    procedure :: acc_present_or_create_array_h
++  end interface
++
++  interface acc_copyout
++    procedure :: acc_copyout_32_h
++    procedure :: acc_copyout_64_h
++    procedure :: acc_copyout_array_h
++  end interface
++
++  interface acc_delete
++    procedure :: acc_delete_32_h
++    procedure :: acc_delete_64_h
++    procedure :: acc_delete_array_h
++  end interface
++
++  interface acc_update_device
++    procedure :: acc_update_device_32_h
++    procedure :: acc_update_device_64_h
++    procedure :: acc_update_device_array_h
++  end interface
++
++  interface acc_update_self
++    procedure :: acc_update_self_32_h
++    procedure :: acc_update_self_64_h
++    procedure :: acc_update_self_array_h
++  end interface
++
++  ! acc_map_data: Only available in C/C++
++  ! acc_unmap_data: Only available in C/C++
++  ! acc_deviceptr: Only available in C/C++
++  ! acc_hostptr: Only available in C/C++
++
++  interface acc_is_present
++    procedure :: acc_is_present_32_h
++    procedure :: acc_is_present_64_h
++    procedure :: acc_is_present_array_h
++  end interface
++
++  ! acc_memcpy_to_device: Only available in C/C++
++  ! acc_memcpy_from_device: Only available in C/C++
++
++end module
++
++function acc_get_num_devices_h (d)
++  use openacc_internal, only: acc_get_num_devices_l
++  use openacc_kinds
++  integer acc_get_num_devices_h
++  integer (acc_device_kind) d
++  acc_get_num_devices_h = acc_get_num_devices_l (d)
++end function
++
++subroutine acc_set_device_type_h (d)
++  use openacc_internal, only: acc_set_device_type_l
++  use openacc_kinds
++  integer (acc_device_kind) d
++  call acc_set_device_type_l (d)
++end subroutine
++
++function acc_get_device_type_h ()
++  use openacc_internal, only: acc_get_device_type_l
++  use openacc_kinds
++  integer (acc_device_kind) acc_get_device_type_h
++  acc_get_device_type_h = acc_get_device_type_l ()
++end function
++
++subroutine acc_set_device_num_h (n, d)
++  use openacc_internal, only: acc_set_device_num_l
++  use openacc_kinds
++  integer n
++  integer (acc_device_kind) d
++  call acc_set_device_num_l (n, d)
++end subroutine
++
++function acc_get_device_num_h (d)
++  use openacc_internal, only: acc_get_device_num_l
++  use openacc_kinds
++  integer acc_get_device_num_h
++  integer (acc_device_kind) d
++  acc_get_device_num_h = acc_get_device_num_l (d)
++end function
++
++function acc_async_test_h (a)
++  use openacc_internal, only: acc_async_test_l
++  logical acc_async_test_h
++  integer a
++  if (acc_async_test_l (a) .eq. 1) then
++    acc_async_test_h = .TRUE.
++  else
++    acc_async_test_h = .FALSE.
++  end if
++end function
++
++function acc_async_test_all_h ()
++  use openacc_internal, only: acc_async_test_all_l
++  logical acc_async_test_all_h
++  if (acc_async_test_all_l () .eq. 1) then
++    acc_async_test_all_h = .TRUE.
++  else
++    acc_async_test_all_h = .FALSE.
++  end if
++end function
++
++subroutine acc_wait_h (a)
++  use openacc_internal, only: acc_wait_l
++  integer a
++  call acc_wait_l (a)
++end subroutine
++
++subroutine acc_wait_async_h (a1, a2)
++  use openacc_internal, only: acc_wait_async_l
++  integer a1, a2
++  call acc_wait_async_l (a1, a2)
++end subroutine
++
++subroutine acc_wait_all_h ()
++  use openacc_internal, only: acc_wait_all_l
++  call acc_wait_all_l ()
++end subroutine
++
++subroutine acc_wait_all_async_h (a)
++  use openacc_internal, only: acc_wait_all_async_l
++  integer a
++  call acc_wait_all_async_l (a)
++end subroutine
++
++subroutine acc_init_h (d)
++  use openacc_internal, only: acc_init_l
++  use openacc_kinds
++  integer (acc_device_kind) d
++  call acc_init_l (d)
++end subroutine
++
++subroutine acc_shutdown_h (d)
++  use openacc_internal, only: acc_shutdown_l
++  use openacc_kinds
++  integer (acc_device_kind) d
++  call acc_shutdown_l (d)
++end subroutine
++
++function acc_on_device_h (d)
++  use openacc_internal, only: acc_on_device_l
++  use openacc_kinds
++  integer (acc_device_kind) d
++  logical acc_on_device_h
++  if (acc_on_device_l (d) .eq. 1) then
++    acc_on_device_h = .TRUE.
++  else
++    acc_on_device_h = .FALSE.
++  end if
++end function
++
++subroutine acc_copyin_32_h (a, len)
++  use iso_c_binding, only: c_int32_t, c_size_t
++  use openacc_internal, only: acc_copyin_l
++  type (*), dimension (*) :: a
++  integer (c_int32_t) len
++  call acc_copyin_l (a, int (len, kind = c_size_t))
++end subroutine
++
++subroutine acc_copyin_64_h (a, len)
++  use iso_c_binding, only: c_int64_t, c_size_t
++  use openacc_internal, only: acc_copyin_l
++  type (*), dimension (*) :: a
++  integer (c_int64_t) len
++  call acc_copyin_l (a, int (len, kind = c_size_t))
++end subroutine
++
++subroutine acc_copyin_array_h (a)
++  use openacc_internal, only: acc_copyin_l
++  type (*), dimension (..), contiguous :: a
++  call acc_copyin_l (a, sizeof (a))
++end subroutine
++
++subroutine acc_present_or_copyin_32_h (a, len)
++  use iso_c_binding, only: c_int32_t, c_size_t
++  use openacc_internal, only: acc_present_or_copyin_l
++  type (*), dimension (*) :: a
++  integer (c_int32_t) len
++  call acc_present_or_copyin_l (a, int (len, kind = c_size_t))
++end subroutine
++
++subroutine acc_present_or_copyin_64_h (a, len)
++  use iso_c_binding, only: c_int64_t, c_size_t
++  use openacc_internal, only: acc_present_or_copyin_l
++  type (*), dimension (*) :: a
++  integer (c_int64_t) len
++  call acc_present_or_copyin_l (a, int (len, kind = c_size_t))
++end subroutine
++
++subroutine acc_present_or_copyin_array_h (a)
++  use openacc_internal, only: acc_present_or_copyin_l
++  type (*), dimension (..), contiguous :: a
++  call acc_present_or_copyin_l (a, sizeof (a))
++end subroutine
++
++subroutine acc_create_32_h (a, len)
++  use iso_c_binding, only: c_int32_t, c_size_t
++  use openacc_internal, only: acc_create_l
++  type (*), dimension (*) :: a
++  integer (c_int32_t) len
++  call acc_create_l (a, int (len, kind = c_size_t))
++end subroutine
++
++subroutine acc_create_64_h (a, len)
++  use iso_c_binding, only: c_int64_t, c_size_t
++  use openacc_internal, only: acc_create_l
++  type (*), dimension (*) :: a
++  integer (c_int64_t) len
++  call acc_create_l (a, int (len, kind = c_size_t))
++end subroutine
++
++subroutine acc_create_array_h (a)
++  use openacc_internal, only: acc_create_l
++  type (*), dimension (..), contiguous :: a
++  call acc_create_l (a, sizeof (a))
++end subroutine
++
++subroutine acc_present_or_create_32_h (a, len)
++  use iso_c_binding, only: c_int32_t, c_size_t
++  use openacc_internal, only: acc_present_or_create_l
++  type (*), dimension (*) :: a
++  integer (c_int32_t) len
++  call acc_present_or_create_l (a, int (len, kind = c_size_t))
++end subroutine
++
++subroutine acc_present_or_create_64_h (a, len)
++  use iso_c_binding, only: c_int64_t, c_size_t
++  use openacc_internal, only: acc_present_or_create_l
++  type (*), dimension (*) :: a
++  integer (c_int64_t) len
++  call acc_present_or_create_l (a, int (len, kind = c_size_t))
++end subroutine
++
++subroutine acc_present_or_create_array_h (a)
++  use openacc_internal, only: acc_present_or_create_l
++  type (*), dimension (..), contiguous :: a
++  call acc_present_or_create_l (a, sizeof (a))
++end subroutine
++
++subroutine acc_copyout_32_h (a, len)
++  use iso_c_binding, only: c_int32_t, c_size_t
++  use openacc_internal, only: acc_copyout_l
++  type (*), dimension (*) :: a
++  integer (c_int32_t) len
++  call acc_copyout_l (a, int (len, kind = c_size_t))
++end subroutine
++
++subroutine acc_copyout_64_h (a, len)
++  use iso_c_binding, only: c_int64_t, c_size_t
++  use openacc_internal, only: acc_copyout_l
++  type (*), dimension (*) :: a
++  integer (c_int64_t) len
++  call acc_copyout_l (a, int (len, kind = c_size_t))
++end subroutine
++
++subroutine acc_copyout_array_h (a)
++  use openacc_internal, only: acc_copyout_l
++  type (*), dimension (..), contiguous :: a
++  call acc_copyout_l (a, sizeof (a))
++end subroutine
++
++subroutine acc_delete_32_h (a, len)
++  use iso_c_binding, only: c_int32_t, c_size_t
++  use openacc_internal, only: acc_delete_l
++  type (*), dimension (*) :: a
++  integer (c_int32_t) len
++  call acc_delete_l (a, int (len, kind = c_size_t))
++end subroutine
++
++subroutine acc_delete_64_h (a, len)
++  use iso_c_binding, only: c_int64_t, c_size_t
++  use openacc_internal, only: acc_delete_l
++  type (*), dimension (*) :: a
++  integer (c_int64_t) len
++  call acc_delete_l (a, int (len, kind = c_size_t))
++end subroutine
++
++subroutine acc_delete_array_h (a)
++  use openacc_internal, only: acc_delete_l
++  type (*), dimension (..), contiguous :: a
++  call acc_delete_l (a, sizeof (a))
++end subroutine
++
++subroutine acc_update_device_32_h (a, len)
++  use iso_c_binding, only: c_int32_t, c_size_t
++  use openacc_internal, only: acc_update_device_l
++  type (*), dimension (*) :: a
++  integer (c_int32_t) len
++  call acc_update_device_l (a, int (len, kind = c_size_t))
++end subroutine
++
++subroutine acc_update_device_64_h (a, len)
++  use iso_c_binding, only: c_int64_t, c_size_t
++  use openacc_internal, only: acc_update_device_l
++  type (*), dimension (*) :: a
++  integer (c_int64_t) len
++  call acc_update_device_l (a, int (len, kind = c_size_t))
++end subroutine
++
++subroutine acc_update_device_array_h (a)
++  use openacc_internal, only: acc_update_device_l
++  type (*), dimension (..), contiguous :: a
++  call acc_update_device_l (a, sizeof (a))
++end subroutine
++
++subroutine acc_update_self_32_h (a, len)
++  use iso_c_binding, only: c_int32_t, c_size_t
++  use openacc_internal, only: acc_update_self_l
++  type (*), dimension (*) :: a
++  integer (c_int32_t) len
++  call acc_update_self_l (a, int (len, kind = c_size_t))
++end subroutine
++
++subroutine acc_update_self_64_h (a, len)
++  use iso_c_binding, only: c_int64_t, c_size_t
++  use openacc_internal, only: acc_update_self_l
++  type (*), dimension (*) :: a
++  integer (c_int64_t) len
++  call acc_update_self_l (a, int (len, kind = c_size_t))
++end subroutine
++
++subroutine acc_update_self_array_h (a)
++  use openacc_internal, only: acc_update_self_l
++  type (*), dimension (..), contiguous :: a
++  call acc_update_self_l (a, sizeof (a))
++end subroutine
++
++function acc_is_present_32_h (a, len)
++  use iso_c_binding, only: c_int32_t, c_size_t
++  use openacc_internal, only: acc_is_present_l
++  logical acc_is_present_32_h
++  type (*), dimension (*) :: a
++  integer (c_int32_t) len
++  if (acc_is_present_l (a, int (len, kind = c_size_t)) .eq. 1) then
++    acc_is_present_32_h = .TRUE.
++  else
++    acc_is_present_32_h = .FALSE.
++  end if
++end function
++
++function acc_is_present_64_h (a, len)
++  use iso_c_binding, only: c_int64_t, c_size_t
++  use openacc_internal, only: acc_is_present_l
++  logical acc_is_present_64_h
++  type (*), dimension (*) :: a
++  integer (c_int64_t) len
++  if (acc_is_present_l (a, int (len, kind = c_size_t)) .eq. 1) then
++    acc_is_present_64_h = .TRUE.
++  else
++    acc_is_present_64_h = .FALSE.
++  end if
++end function
++
++function acc_is_present_array_h (a)
++  use openacc_internal, only: acc_is_present_l
++  logical acc_is_present_array_h
++  type (*), dimension (..), contiguous :: a
++  acc_is_present_array_h = acc_is_present_l (a, sizeof (a)) == 1
++end function
diff --git a/SOURCES/gcc48-pr52714.patch b/SOURCES/gcc48-pr52714.patch
new file mode 100644
index 0000000..2ea553d
--- /dev/null
+++ b/SOURCES/gcc48-pr52714.patch
@@ -0,0 +1,76 @@
+2014-02-27  Jeff Law  <law@redhat.com>
+
+	PR rtl-optimization/52714
+	* combine.c (try_combine): When splitting an unrecognized PARALLEL
+	into two independent simple sets, if I3 is a jump, ensure the
+	pattern we place into I3 is a (set (pc) ...)
+
+	* gcc.c-torture/compile/pr52714.c: New test.
+
+2016-06-15  Jakub Jelinek  <jakub@redhat.com>
+
+	* gcc.c-torture/compile/20160615-1.c: New test.
+
+--- gcc/combine.c	(revision 208203)
++++ gcc/combine.c	(revision 208204)
+@@ -3706,6 +3706,9 @@ try_combine (rtx i3, rtx i2, rtx i1, rtx
+ #ifdef HAVE_cc0
+ 	  && !reg_referenced_p (cc0_rtx, XVECEXP (newpat, 0, 0))
+ #endif
++	  /* If I3 is a jump, ensure that set0 is a jump so that
++	     we do not create invalid RTL.  */
++	  && (!JUMP_P (i3) || SET_DEST (XVECEXP (newpat, 0, 0)) == pc_rtx)
+ 	 )
+ 	{
+ 	  newi2pat = XVECEXP (newpat, 0, 1);
+@@ -3716,6 +3719,9 @@ try_combine (rtx i3, rtx i2, rtx i1, rtx
+ #ifdef HAVE_cc0
+ 	       && !reg_referenced_p (cc0_rtx, XVECEXP (newpat, 0, 1))
+ #endif
++	       /* If I3 is a jump, ensure that set1 is a jump so that
++		  we do not create invalid RTL.  */
++	       && (!JUMP_P (i3) || SET_DEST (XVECEXP (newpat, 0, 1)) == pc_rtx)
+ 	      )
+ 	{
+ 	  newi2pat = XVECEXP (newpat, 0, 0);
+--- gcc/testsuite/gcc.c-torture/compile/pr52714.c	(revision 0)
++++ gcc/testsuite/gcc.c-torture/compile/pr52714.c	(revision 208204)
+@@ -0,0 +1,25 @@
++
++int __re_compile_fastmap(unsigned char *p)
++{
++    unsigned char **stack;
++    unsigned size;
++    unsigned avail;
++
++    stack = __builtin_alloca(5 * sizeof(unsigned char*));
++    if (stack == 0)
++	return -2;
++    size = 5;
++    avail = 0;
++
++    for (;;) {
++	switch (*p++) {
++	case 0:
++	    if (avail == size)
++		return -2;
++	    stack[avail++] = p;
++	}
++    }
++
++    return 0;
++}
++
+--- gcc/testsuite/gcc.c-torture/compile/20160615-1.c.jj	2016-06-15 11:17:54.690689056 +0200
++++ gcc/testsuite/gcc.c-torture/compile/20160615-1.c	2016-06-15 11:17:48.811765657 +0200
+@@ -0,0 +1,10 @@
++int a;
++void bar (int, unsigned, unsigned);
++
++void
++foo (unsigned x)
++{
++  unsigned b = a ? x : 0;
++  if (x || b)
++    bar (0, x, b);
++}
diff --git a/SOURCES/gcc48-pr53477.patch b/SOURCES/gcc48-pr53477.patch
new file mode 100644
index 0000000..70d5d56
--- /dev/null
+++ b/SOURCES/gcc48-pr53477.patch
@@ -0,0 +1,131 @@
+2013-08-20  Phil Muldoon  <pmuldoon@redhat.com>
+
+	PR libstdc++/53477
+	http://sourceware.org/bugzilla/show_bug.cgi?id=15195
+
+	* python/libstdcxx/v6/printers.py (Printer.__call__): If a value
+	is a reference, fetch referenced value.
+	(RxPrinter.invoke): Ditto.
+	* testsuite/libstdc++-prettyprinters/cxx11.cc (main): Add -O0
+	flag. Add referenced value tests.
+
+--- libstdc++-v3/python/libstdcxx/v6/printers.py	(revision 201887)
++++ libstdc++-v3/python/libstdcxx/v6/printers.py	(revision 201888)
+@@ -786,6 +786,11 @@ class RxPrinter(object):
+     def invoke(self, value):
+         if not self.enabled:
+             return None
++
++        if value.type.code == gdb.TYPE_CODE_REF:
++            if hasattr(gdb.Value,"referenced_value"):
++                value = value.referenced_value()
++
+         return self.function(self.name, value)
+ 
+ # A pretty-printer that conforms to the "PrettyPrinter" protocol from
+@@ -841,6 +846,11 @@ class Printer(object):
+             return None
+ 
+         basename = match.group(1)
++
++        if val.type.code == gdb.TYPE_CODE_REF:
++            if hasattr(gdb.Value,"referenced_value"):
++                val = val.referenced_value()
++
+         if basename in self.lookup:
+             return self.lookup[basename].invoke(val)
+ 
+--- libstdc++-v3/testsuite/libstdc++-prettyprinters/cxx11.cc	(revision 201887)
++++ libstdc++-v3/testsuite/libstdc++-prettyprinters/cxx11.cc	(revision 201888)
+@@ -1,5 +1,5 @@
+ // { dg-do run }
+-// { dg-options "-std=gnu++11 -g" }
++// { dg-options "-std=gnu++11 -g -O0" }
+ 
+ // Copyright (C) 2011-2013 Free Software Foundation, Inc.
+ //
+@@ -24,6 +24,8 @@
+ #include <string>
+ #include <iostream>
+ 
++typedef std::tuple<int, int> ExTuple;
++
+ template<class T>
+ void
+ placeholder(const T &s)
+@@ -62,43 +64,75 @@ main()
+   std::forward_list<int> efl;
+ // { dg-final { note-test efl "empty std::forward_list" } }
+ 
++  std::forward_list<int> &refl = efl;
++// { dg-final { note-test refl "empty std::forward_list" } }
++
+   std::forward_list<int> fl;
+   fl.push_front(2);
+   fl.push_front(1);
+ // { dg-final { note-test fl {std::forward_list = {[0] = 1, [1] = 2}} } }
+ 
++  std::forward_list<int> &rfl = fl;
++// { dg-final { note-test rfl {std::forward_list = {[0] = 1, [1] = 2}} } }
++
+   std::unordered_map<int, std::string> eum;
+ // { dg-final { note-test eum "std::unordered_map with 0 elements" } }
++  std::unordered_map<int, std::string> &reum = eum;
++// { dg-final { note-test reum "std::unordered_map with 0 elements" } }
++
+   std::unordered_multimap<int, std::string> eumm;
+ // { dg-final { note-test eumm "std::unordered_multimap with 0 elements" } }
++  std::unordered_multimap<int, std::string> &reumm = eumm;
++// { dg-final { note-test reumm "std::unordered_multimap with 0 elements" } }
++
+   std::unordered_set<int> eus;
+ // { dg-final { note-test eus "std::unordered_set with 0 elements" } }
++  std::unordered_set<int> &reus = eus;
++// { dg-final { note-test reus "std::unordered_set with 0 elements" } }
++
+   std::unordered_multiset<int> eums;
+ // { dg-final { note-test eums "std::unordered_multiset with 0 elements" } }
++  std::unordered_multiset<int> &reums = eums;
++// { dg-final { note-test reums "std::unordered_multiset with 0 elements" } }
+ 
+   std::unordered_map<int, std::string> uom;
+   uom[5] = "three";
+   uom[3] = "seven";
+ // { dg-final { note-test uom {std::unordered_map with 2 elements = {[3] = "seven", [5] = "three"}} } }
+ 
++  std::unordered_map<int, std::string> &ruom = uom;
++// { dg-final { note-test ruom {std::unordered_map with 2 elements = {[3] = "seven", [5] = "three"}} } }
++
+   std::unordered_multimap<int, std::string> uomm;
+   uomm.insert(std::pair<int, std::string> (5, "three"));
+   uomm.insert(std::pair<int, std::string> (5, "seven"));
+ // { dg-final { note-test uomm {std::unordered_multimap with 2 elements = {[5] = "seven", [5] = "three"}} } }
++  std::unordered_multimap<int, std::string> &ruomm = uomm;
++// { dg-final { note-test ruomm {std::unordered_multimap with 2 elements = {[5] = "seven", [5] = "three"}} } }
+ 
+   std::unordered_set<int> uos;
+   uos.insert(5);
+ // { dg-final { note-test uos {std::unordered_set with 1 elements = {[0] = 5}} } }
++  std::unordered_set<int> &ruos = uos;
++// { dg-final { note-test ruos {std::unordered_set with 1 elements = {[0] = 5}} } }
+ 
+   std::unordered_multiset<int> uoms;
+   uoms.insert(5);
+ // { dg-final { note-test uoms {std::unordered_multiset with 1 elements = {[0] = 5}} } }
++  std::unordered_multiset<int> &ruoms = uoms;
++// { dg-final { note-test ruoms {std::unordered_multiset with 1 elements = {[0] = 5}} } }
+ 
+   std::unique_ptr<datum> uptr (new datum);
+   uptr->s = "hi bob";
+   uptr->i = 23;
+ // { dg-final { regexp-test uptr {std::unique_ptr.datum. containing 0x.*} } }
++  std::unique_ptr<datum> &ruptr = uptr;
++// { dg-final { regexp-test ruptr {std::unique_ptr.datum. containing 0x.*} } }
+ 
++  ExTuple tpl(6,7);
++// { dg-final { note-test tpl {std::tuple containing = {[1] = 6, [2] = 7}} } }  
++  ExTuple &rtpl = tpl;
++// { dg-final { note-test rtpl {std::tuple containing = {[1] = 6, [2] = 7}} } }   
+   placeholder(""); // Mark SPOT
+   use(efl);
+   use(fl);
diff --git a/SOURCES/gcc48-pr63293.patch b/SOURCES/gcc48-pr63293.patch
new file mode 100644
index 0000000..4b11a8c
--- /dev/null
+++ b/SOURCES/gcc48-pr63293.patch
@@ -0,0 +1,60 @@
+2014-11-04  Jiong Wang  <jiong.wang@arm.com>
+	    Wilco Dijkstra  <wilco.dijkstra@arm.com>
+
+	PR target/63293
+	* config/aarch64/aarch64.c (aarch64_expand_epiloue): Add barriers before
+	stack adjustment.
+
+--- gcc/config/aarch64/aarch64.c	(revision 217090)
++++ gcc/config/aarch64/aarch64.c	(revision 217091)
+@@ -1989,6 +1989,9 @@ aarch64_expand_epilogue (bool for_sibcal
+   rtx insn;
+   rtx cfa_reg;
+   rtx cfi_ops = NULL;
++  /* We need to add memory barrier to prevent read from deallocated stack.  */
++  bool need_barrier_p = (get_frame_size () != 0
++			 || cfun->machine->saved_varargs_size);
+ 
+   aarch64_layout_frame ();
+   original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
+@@ -2030,6 +2033,9 @@ aarch64_expand_epilogue (bool for_sibcal
+   if (frame_pointer_needed
+       && (crtl->outgoing_args_size || cfun->calls_alloca))
+     {
++      if (cfun->calls_alloca)
++	emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
++
+       insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
+ 				       hard_frame_pointer_rtx,
+ 				       GEN_INT (- fp_offset)));
+@@ -2048,6 +2054,9 @@ aarch64_expand_epilogue (bool for_sibcal
+   /* Restore the frame pointer and lr if the frame pointer is needed.  */
+   if (offset > 0)
+     {
++      if (need_barrier_p && (!frame_pointer_needed || !fp_offset))
++	emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
++
+       if (frame_pointer_needed)
+ 	{
+ 	  rtx mem_fp, mem_lr;
+@@ -2067,6 +2076,10 @@ aarch64_expand_epilogue (bool for_sibcal
+ 						     + UNITS_PER_WORD));
+ 	      emit_insn (gen_load_pairdi (reg_fp, mem_fp, reg_lr, mem_lr));
+ 
++	      if (need_barrier_p)
++		emit_insn (gen_stack_tie (stack_pointer_rtx,
++					  stack_pointer_rtx));
++
+ 	      insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
+ 					       GEN_INT (offset)));
+ 	    }
+@@ -2128,6 +2141,9 @@ aarch64_expand_epilogue (bool for_sibcal
+ 
+   if (frame_size > -1)
+     {
++      if (need_barrier_p)
++	emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
++
+       if (frame_size >= 0x1000000)
+ 	{
+ 	  rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
diff --git a/SOURCES/gcc48-pr65142.patch b/SOURCES/gcc48-pr65142.patch
new file mode 100644
index 0000000..367ec16
--- /dev/null
+++ b/SOURCES/gcc48-pr65142.patch
@@ -0,0 +1,23 @@
+2016-06-01  Jakub Jelinek  <jakub@redhat.com>
+
+	Backported from mainline
+	2015-10-02  Jonathan Wakely  <jwakely@redhat.com>
+
+	PR libstdc++/65142
+	* src/c++11/random.cc (random_device::_M_getval()): Check read result.
+
+--- libstdc++-v3/src/c++11/random.cc	(revision 228423)
++++ libstdc++-v3/src/c++11/random.cc	(revision 228424)
+@@ -126,8 +126,10 @@ namespace std _GLIBCXX_VISIBILITY(defaul
+ #endif
+ 
+     result_type __ret;
+-    std::fread(reinterpret_cast<void*>(&__ret), sizeof(result_type),
+-	       1, _M_file);
++    const size_t e = std::fread(reinterpret_cast<void*>(&__ret),
++				sizeof(result_type), 1, _M_file);
++    if (e != 1)
++      std::__throw_runtime_error(__N("random_device could not be read"));
+     return __ret;
+   }
+ 
diff --git a/SOURCES/gcc48-pr67281.patch b/SOURCES/gcc48-pr67281.patch
new file mode 100644
index 0000000..9637e08
--- /dev/null
+++ b/SOURCES/gcc48-pr67281.patch
@@ -0,0 +1,348 @@
+2015-10-14  Peter Bergner  <bergner@vnet.ibm.com>
+	    Torvald Riegel  <triegel@redhat.com>
+
+	PR target/67281
+	* config/rs6000/htm.md (UNSPEC_HTM_FENCE): New.
+	(tabort, tabort<wd>c, tabort<wd>ci, tbegin, tcheck, tend,
+	trechkpt, treclaim, tsr, ttest): Rename define_insns from this...
+	(*tabort, *tabort<wd>c, *tabort<wd>ci, *tbegin, *tcheck, *tend,
+	*trechkpt, *treclaim, *tsr, *ttest): ...to this.  Add memory barrier.
+	(tabort, tabort<wd>c, tabort<wd>ci, tbegin, tcheck, tend,
+	trechkpt, treclaim, tsr, ttest): New define_expands.
+	* config/rs6000/rs6000-c.c (rs6000_target_modify_macros): Define
+	__TM_FENCE__ for htm.
+	* doc/extend.texi: Update documentation for htm builtins.
+
+2015-08-03  Peter Bergner  <bergner@vnet.ibm.com>
+
+	* config/rs6000/htm.md (tabort.): Restrict the source operand to
+	using a base register.
+
+	* gcc.target/powerpc/htm-tabort-no-r0.c: New test.
+
+--- gcc/doc/extend.texi	(revision 228826)
++++ gcc/doc/extend.texi	(revision 228827)
+@@ -16092,6 +16092,28 @@ unsigned int __builtin_tresume (void)
+ unsigned int __builtin_tsuspend (void)
+ @end smallexample
+ 
++Note that the semantics of the above HTM builtins are required to mimic
++the locking semantics used for critical sections.  Builtins that are used
++to create a new transaction or restart a suspended transaction must have
++lock acquisition like semantics while those builtins that end or suspend a
++transaction must have lock release like semantics.  Specifically, this must
++mimic lock semantics as specified by C++11, for example: Lock acquisition is
++as-if an execution of __atomic_exchange_n(&globallock,1,__ATOMIC_ACQUIRE)
++that returns 0, and lock release is as-if an execution of
++__atomic_store(&globallock,0,__ATOMIC_RELEASE), with globallock being an
++implicit implementation-defined lock used for all transactions.  The HTM
++instructions associated with with the builtins inherently provide the
++correct acquisition and release hardware barriers required.  However,
++the compiler must also be prohibited from moving loads and stores across
++the builtins in a way that would violate their semantics.  This has been
++accomplished by adding memory barriers to the associated HTM instructions
++(which is a conservative approach to provide acquire and release semantics).
++Earlier versions of the compiler did not treat the HTM instructions as
++memory barriers.  A @code{__TM_FENCE__} macro has been added, which can
++be used to determine whether the current compiler treats HTM instructions
++as memory barriers or not.  This allows the user to explicitly add memory
++barriers to their code when using an older version of the compiler.
++
+ The following set of built-in functions are available to gain access
+ to the HTM specific special purpose registers.
+ 
+--- gcc/config/rs6000/htm.md	(revision 226531)
++++ gcc/config/rs6000/htm.md	(revision 228827)
+@@ -27,6 +27,14 @@ (define_constants
+   ])
+ 
+ ;;
++;; UNSPEC usage
++;;
++
++(define_c_enum "unspec"
++  [UNSPEC_HTM_FENCE
++  ])
++
++;;
+ ;; UNSPEC_VOLATILE usage
+ ;;
+ 
+@@ -45,96 +53,223 @@ (define_c_enum "unspecv"
+    UNSPECV_HTM_MTSPR
+   ])
+ 
++(define_expand "tabort"
++  [(parallel
++     [(set (match_operand:CC 1 "cc_reg_operand" "=x")
++	   (unspec_volatile:CC [(match_operand:SI 0 "base_reg_operand" "b")]
++			       UNSPECV_HTM_TABORT))
++      (set (match_dup 2) (unspec:BLK [(match_dup 2)] UNSPEC_HTM_FENCE))])]
++  "TARGET_HTM"
++{
++  operands[2] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
++  MEM_VOLATILE_P (operands[2]) = 1;
++})
+ 
+-(define_insn "tabort"
++(define_insn "*tabort"
+   [(set (match_operand:CC 1 "cc_reg_operand" "=x")
+-	(unspec_volatile:CC [(match_operand:SI 0 "gpc_reg_operand" "r")]
+-			    UNSPECV_HTM_TABORT))]
++	(unspec_volatile:CC [(match_operand:SI 0 "base_reg_operand" "b")]
++			    UNSPECV_HTM_TABORT))
++   (set (match_operand:BLK 2) (unspec:BLK [(match_dup 2)] UNSPEC_HTM_FENCE))]
+   "TARGET_HTM"
+   "tabort. %0"
+   [(set_attr "type" "htm")
+    (set_attr "length" "4")])
+ 
+-(define_insn "tabort<wd>c"
++(define_expand "tabort<wd>c"
++  [(parallel
++     [(set (match_operand:CC 3 "cc_reg_operand" "=x")
++	   (unspec_volatile:CC [(match_operand 0 "u5bit_cint_operand" "n")
++				(match_operand:GPR 1 "gpc_reg_operand" "r")
++				(match_operand:GPR 2 "gpc_reg_operand" "r")]
++			       UNSPECV_HTM_TABORTXC))
++      (set (match_dup 4) (unspec:BLK [(match_dup 4)] UNSPEC_HTM_FENCE))])]
++  "TARGET_HTM"
++{
++  operands[4] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
++  MEM_VOLATILE_P (operands[4]) = 1;
++})
++
++(define_insn "*tabort<wd>c"
+   [(set (match_operand:CC 3 "cc_reg_operand" "=x")
+ 	(unspec_volatile:CC [(match_operand 0 "u5bit_cint_operand" "n")
+ 			     (match_operand:GPR 1 "gpc_reg_operand" "r")
+ 			     (match_operand:GPR 2 "gpc_reg_operand" "r")]
+-			    UNSPECV_HTM_TABORTXC))]
++			    UNSPECV_HTM_TABORTXC))
++   (set (match_operand:BLK 4) (unspec:BLK [(match_dup 4)] UNSPEC_HTM_FENCE))]
+   "TARGET_HTM"
+   "tabort<wd>c. %0,%1,%2"
+   [(set_attr "type" "htm")
+    (set_attr "length" "4")])
+ 
+-(define_insn "tabort<wd>ci"
++(define_expand "tabort<wd>ci"
++  [(parallel
++     [(set (match_operand:CC 3 "cc_reg_operand" "=x")
++	   (unspec_volatile:CC [(match_operand 0 "u5bit_cint_operand" "n")
++				(match_operand:GPR 1 "gpc_reg_operand" "r")
++				(match_operand 2 "s5bit_cint_operand" "n")]
++			       UNSPECV_HTM_TABORTXCI))
++      (set (match_dup 4) (unspec:BLK [(match_dup 4)] UNSPEC_HTM_FENCE))])]
++  "TARGET_HTM"
++{
++  operands[4] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
++  MEM_VOLATILE_P (operands[4]) = 1;
++})
++
++(define_insn "*tabort<wd>ci"
+   [(set (match_operand:CC 3 "cc_reg_operand" "=x")
+ 	(unspec_volatile:CC [(match_operand 0 "u5bit_cint_operand" "n")
+ 			     (match_operand:GPR 1 "gpc_reg_operand" "r")
+ 			     (match_operand 2 "s5bit_cint_operand" "n")]
+-			    UNSPECV_HTM_TABORTXCI))]
++			    UNSPECV_HTM_TABORTXCI))
++   (set (match_operand:BLK 4) (unspec:BLK [(match_dup 4)] UNSPEC_HTM_FENCE))]
+   "TARGET_HTM"
+   "tabort<wd>ci. %0,%1,%2"
+   [(set_attr "type" "htm")
+    (set_attr "length" "4")])
+ 
+-(define_insn "tbegin"
++(define_expand "tbegin"
++  [(parallel
++     [(set (match_operand:CC 1 "cc_reg_operand" "=x")
++	   (unspec_volatile:CC [(match_operand 0 "const_0_to_1_operand" "n")]
++			       UNSPECV_HTM_TBEGIN))
++      (set (match_dup 2) (unspec:BLK [(match_dup 2)] UNSPEC_HTM_FENCE))])]
++  "TARGET_HTM"
++{
++  operands[2] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
++  MEM_VOLATILE_P (operands[2]) = 1;
++})
++
++(define_insn "*tbegin"
+   [(set (match_operand:CC 1 "cc_reg_operand" "=x")
+ 	(unspec_volatile:CC [(match_operand 0 "const_0_to_1_operand" "n")]
+-			    UNSPECV_HTM_TBEGIN))]
++			    UNSPECV_HTM_TBEGIN))
++   (set (match_operand:BLK 2) (unspec:BLK [(match_dup 2)] UNSPEC_HTM_FENCE))]
+   "TARGET_HTM"
+   "tbegin. %0"
+   [(set_attr "type" "htm")
+    (set_attr "length" "4")])
+ 
+-(define_insn "tcheck"
++(define_expand "tcheck"
++  [(parallel
++     [(set (match_operand:CC 0 "cc_reg_operand" "=y")
++	   (unspec_volatile:CC [(const_int 0)] UNSPECV_HTM_TCHECK))
++      (set (match_dup 1) (unspec:BLK [(match_dup 1)] UNSPEC_HTM_FENCE))])]
++  "TARGET_HTM"
++{
++  operands[1] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
++  MEM_VOLATILE_P (operands[1]) = 1;
++})
++
++(define_insn "*tcheck"
+   [(set (match_operand:CC 0 "cc_reg_operand" "=y")
+-	(unspec_volatile:CC [(const_int 0)]
+-			    UNSPECV_HTM_TCHECK))]
++	(unspec_volatile:CC [(const_int 0)] UNSPECV_HTM_TCHECK))
++   (set (match_operand:BLK 1) (unspec:BLK [(match_dup 1)] UNSPEC_HTM_FENCE))]
+   "TARGET_HTM"
+   "tcheck %0"
+   [(set_attr "type" "htm")
+    (set_attr "length" "4")])
+ 
+-(define_insn "tend"
++(define_expand "tend"
++  [(parallel
++     [(set (match_operand:CC 1 "cc_reg_operand" "=x")
++	   (unspec_volatile:CC [(match_operand 0 "const_0_to_1_operand" "n")]
++			       UNSPECV_HTM_TEND))
++      (set (match_dup 2) (unspec:BLK [(match_dup 2)] UNSPEC_HTM_FENCE))])]
++  "TARGET_HTM"
++{
++  operands[2] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
++  MEM_VOLATILE_P (operands[2]) = 1;
++})
++
++(define_insn "*tend"
+   [(set (match_operand:CC 1 "cc_reg_operand" "=x")
+ 	(unspec_volatile:CC [(match_operand 0 "const_0_to_1_operand" "n")]
+-			    UNSPECV_HTM_TEND))]
++			    UNSPECV_HTM_TEND))
++   (set (match_operand:BLK 2) (unspec:BLK [(match_dup 2)] UNSPEC_HTM_FENCE))]
+   "TARGET_HTM"
+   "tend. %0"
+   [(set_attr "type" "htm")
+    (set_attr "length" "4")])
+ 
+-(define_insn "trechkpt"
++(define_expand "trechkpt"
++  [(parallel
++     [(set (match_operand:CC 0 "cc_reg_operand" "=x")
++	   (unspec_volatile:CC [(const_int 0)] UNSPECV_HTM_TRECHKPT))
++      (set (match_dup 1) (unspec:BLK [(match_dup 1)] UNSPEC_HTM_FENCE))])]
++  "TARGET_HTM"
++{
++  operands[1] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
++  MEM_VOLATILE_P (operands[1]) = 1;
++})
++
++(define_insn "*trechkpt"
+   [(set (match_operand:CC 0 "cc_reg_operand" "=x")
+-	(unspec_volatile:CC [(const_int 0)]
+-			    UNSPECV_HTM_TRECHKPT))]
++	(unspec_volatile:CC [(const_int 0)] UNSPECV_HTM_TRECHKPT))
++   (set (match_operand:BLK 1) (unspec:BLK [(match_dup 1)] UNSPEC_HTM_FENCE))]
+   "TARGET_HTM"
+   "trechkpt."
+   [(set_attr "type" "htm")
+    (set_attr "length" "4")])
+ 
+-(define_insn "treclaim"
++(define_expand "treclaim"
++  [(parallel
++     [(set (match_operand:CC 1 "cc_reg_operand" "=x")
++	   (unspec_volatile:CC [(match_operand:SI 0 "gpc_reg_operand" "r")]
++			       UNSPECV_HTM_TRECLAIM))
++      (set (match_dup 2) (unspec:BLK [(match_dup 2)] UNSPEC_HTM_FENCE))])]
++  "TARGET_HTM"
++{
++  operands[2] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
++  MEM_VOLATILE_P (operands[2]) = 1;
++})
++
++(define_insn "*treclaim"
+   [(set (match_operand:CC 1 "cc_reg_operand" "=x")
+ 	(unspec_volatile:CC [(match_operand:SI 0 "gpc_reg_operand" "r")]
+-			    UNSPECV_HTM_TRECLAIM))]
++			    UNSPECV_HTM_TRECLAIM))
++   (set (match_operand:BLK 2) (unspec:BLK [(match_dup 2)] UNSPEC_HTM_FENCE))]
+   "TARGET_HTM"
+   "treclaim. %0"
+   [(set_attr "type" "htm")
+    (set_attr "length" "4")])
+ 
+-(define_insn "tsr"
++(define_expand "tsr"
++  [(parallel
++     [(set (match_operand:CC 1 "cc_reg_operand" "=x")
++	   (unspec_volatile:CC [(match_operand 0 "const_0_to_1_operand" "n")]
++			       UNSPECV_HTM_TSR))
++      (set (match_dup 2) (unspec:BLK [(match_dup 2)] UNSPEC_HTM_FENCE))])]
++  "TARGET_HTM"
++{
++  operands[2] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
++  MEM_VOLATILE_P (operands[2]) = 1;
++})
++
++(define_insn "*tsr"
+   [(set (match_operand:CC 1 "cc_reg_operand" "=x")
+ 	(unspec_volatile:CC [(match_operand 0 "const_0_to_1_operand" "n")]
+-			    UNSPECV_HTM_TSR))]
++			    UNSPECV_HTM_TSR))
++   (set (match_operand:BLK 2) (unspec:BLK [(match_dup 2)] UNSPEC_HTM_FENCE))]
+   "TARGET_HTM"
+   "tsr. %0"
+   [(set_attr "type" "htm")
+    (set_attr "length" "4")])
+ 
+-(define_insn "ttest"
++(define_expand "ttest"
++  [(parallel
++     [(set (match_operand:CC 0 "cc_reg_operand" "=x")
++	   (unspec_volatile:CC [(const_int 0)] UNSPECV_HTM_TTEST))
++      (set (match_dup 1) (unspec:BLK [(match_dup 1)] UNSPEC_HTM_FENCE))])]
++  "TARGET_HTM"
++{
++  operands[1] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
++  MEM_VOLATILE_P (operands[1]) = 1;
++})
++
++(define_insn "*ttest"
+   [(set (match_operand:CC 0 "cc_reg_operand" "=x")
+-	(unspec_volatile:CC [(const_int 0)]
+-			    UNSPECV_HTM_TTEST))]
++	(unspec_volatile:CC [(const_int 0)] UNSPECV_HTM_TTEST))
++   (set (match_operand:BLK 1) (unspec:BLK [(match_dup 1)] UNSPEC_HTM_FENCE))]
+   "TARGET_HTM"
+   "tabortwci. 0,1,0"
+   [(set_attr "type" "htm")
+--- gcc/config/rs6000/rs6000-c.c	(revision 228826)
++++ gcc/config/rs6000/rs6000-c.c	(revision 228827)
+@@ -372,7 +372,11 @@ rs6000_target_modify_macros (bool define
+   if ((flags & OPTION_MASK_VSX) != 0)
+     rs6000_define_or_undefine_macro (define_p, "__VSX__");
+   if ((flags & OPTION_MASK_HTM) != 0)
+-    rs6000_define_or_undefine_macro (define_p, "__HTM__");
++    {
++      rs6000_define_or_undefine_macro (define_p, "__HTM__");
++      /* Tell the user that our HTM insn patterns act as memory barriers.  */
++      rs6000_define_or_undefine_macro (define_p, "__TM_FENCE__");
++    }
+   if ((flags & OPTION_MASK_P8_VECTOR) != 0)
+     rs6000_define_or_undefine_macro (define_p, "__POWER8_VECTOR__");
+   if ((flags & OPTION_MASK_QUAD_MEMORY) != 0)
+--- gcc/testsuite/gcc.target/powerpc/htm-tabort-no-r0.c	(revision 0)
++++ gcc/testsuite/gcc.target/powerpc/htm-tabort-no-r0.c	(revision 226532)
+@@ -0,0 +1,12 @@
++/* { dg-do compile { target { powerpc*-*-* } } } */
++/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
++/* { dg-require-effective-target powerpc_htm_ok } */
++/* { dg-options "-O2 -mhtm -ffixed-r3 -ffixed-r4 -ffixed-r5 -ffixed-r6 -ffixed-r7 -ffixed-r8 -ffixed-r9 -ffixed-r10 -ffixed-r11 -ffixed-r12" } */
++
++/* { dg-final { scan-assembler-not "tabort\\.\[ \t\]0" } } */
++
++int
++foo (void)
++{
++  return __builtin_tabort (10);
++}
diff --git a/SOURCES/gcc48-pr68680.patch b/SOURCES/gcc48-pr68680.patch
new file mode 100644
index 0000000..59f6ffe
--- /dev/null
+++ b/SOURCES/gcc48-pr68680.patch
@@ -0,0 +1,46 @@
+2015-12-04  Jakub Jelinek  <jakub@redhat.com>
+
+	PR tree-optimization/68680
+	* calls.c (special_function_p): Return ECF_MAY_BE_ALLOCA for
+	BUILT_IN_ALLOCA{,_WITH_ALIGN}.
+
+	* gcc.target/i386/pr68680.c: New test.
+
+--- gcc/calls.c	(revision 231278)
++++ gcc/calls.c	(revision 231279)
+@@ -564,6 +564,17 @@ special_function_p (const_tree fndecl, i
+ 	flags |= ECF_NORETURN;
+     }
+ 
++  if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
++    switch (DECL_FUNCTION_CODE (fndecl))
++      {
++      case BUILT_IN_ALLOCA:
++      case BUILT_IN_ALLOCA_WITH_ALIGN:
++	flags |= ECF_MAY_BE_ALLOCA;
++	break;
++      default:
++	break;
++      }
++
+   return flags;
+ }
+ 
+--- gcc/testsuite/gcc.target/i386/pr68680.c	(revision 0)
++++ gcc/testsuite/gcc.target/i386/pr68680.c	(revision 231279)
+@@ -0,0 +1,15 @@
++/* PR tree-optimization/68680 */
++/* { dg-do compile } */
++/* { dg-options "-O2 -fstack-protector-strong" } */
++
++int foo (char *);
++
++int
++bar (unsigned long x)
++{
++  char a[x];
++  return foo (a);
++}
++
++/* Verify that this function is stack protected.  */
++/* { dg-final { scan-assembler "stack_chk_fail" } } */
diff --git a/SOURCES/gcc48-rh1180633.patch b/SOURCES/gcc48-rh1180633.patch
new file mode 100644
index 0000000..f3898f9
--- /dev/null
+++ b/SOURCES/gcc48-rh1180633.patch
@@ -0,0 +1,338 @@
+2016-01-22  Torvald Riegel  <triegel@redhat.com>
+
+	* beginend.cc (GTM::gtm_thread::serial_lock): Put on cacheline
+	boundary.
+	(htm_fastpath): Remove.
+	(gtm_thread::begin_transaction): Fix HTM fastpath.
+	(_ITM_commitTransaction): Adapt.
+	(_ITM_commitTransactionEH): Adapt.
+	* libitm/config/linux/rwlock.h (gtm_rwlock): Add htm_fastpath member
+	and accessors.
+	* libitm/config/posix/rwlock.h (gtm_rwlock): Likewise.
+	* libitm/config/posix/rwlock.cc (gtm_rwlock::gtm_rwlock): Adapt.
+	* libitm/libitm_i.h (htm_fastpath): Remove declaration.
+	* libitm/method-serial.cc (htm_mg): Adapt.
+	(gtm_thread::serialirr_mode): Adapt.
+	* libitm/query.cc (_ITM_inTransaction, _ITM_getTransactionId): Adapt.
+
+--- libitm/beginend.cc
++++ libitm/beginend.cc
+@@ -32,7 +32,11 @@ using namespace GTM;
+ extern __thread gtm_thread_tls _gtm_thr_tls;
+ #endif
+ 
+-gtm_rwlock GTM::gtm_thread::serial_lock;
++// Put this at the start of a cacheline so that serial_lock's writers and
++// htm_fastpath fields are on the same cacheline, so that HW transactions
++// only have to pay one cacheline capacity to monitor both.
++gtm_rwlock GTM::gtm_thread::serial_lock
++  __attribute__((aligned(HW_CACHELINE_SIZE)));
+ gtm_thread *GTM::gtm_thread::list_of_threads = 0;
+ unsigned GTM::gtm_thread::number_of_threads = 0;
+ 
+@@ -54,9 +58,6 @@ static pthread_mutex_t global_tid_lock = PTHREAD_MUTEX_INITIALIZER;
+ static pthread_key_t thr_release_key;
+ static pthread_once_t thr_release_once = PTHREAD_ONCE_INIT;
+ 
+-// See gtm_thread::begin_transaction.
+-uint32_t GTM::htm_fastpath = 0;
+-
+ /* Allocate a transaction structure.  */
+ void *
+ GTM::gtm_thread::operator new (size_t s)
+@@ -174,9 +175,11 @@ GTM::gtm_thread::begin_transaction (uint32_t prop, const gtm_jmpbuf *jb)
+   // lock's writer flag and thus abort if another thread is or becomes a
+   // serial transaction.  Therefore, if the fastpath is enabled, then a
+   // transaction is not executing as a HW transaction iff the serial lock is
+-  // write-locked.  This allows us to use htm_fastpath and the serial lock's
+-  // writer flag to reliable determine whether the current thread runs a HW
+-  // transaction, and thus we do not need to maintain this information in
++  // write-locked.  Also, HW transactions monitor the fastpath control
++  // variable, so that they will only execute if dispatch_htm is still the
++  // current method group.  This allows us to use htm_fastpath and the serial
++  // lock's writers flag to reliable determine whether the current thread runs
++  // a HW transaction, and thus we do not need to maintain this information in
+   // per-thread state.
+   // If an uninstrumented code path is not available, we can still run
+   // instrumented code from a HW transaction because the HTM fastpath kicks
+@@ -187,9 +190,14 @@ GTM::gtm_thread::begin_transaction (uint32_t prop, const gtm_jmpbuf *jb)
+   // indeed in serial mode, and HW transactions should never need serial mode
+   // for any internal changes (e.g., they never abort visibly to the STM code
+   // and thus do not trigger the standard retry handling).
+-  if (likely(htm_fastpath && (prop & pr_hasNoAbort)))
++  if (likely(serial_lock.get_htm_fastpath() && (prop & pr_hasNoAbort)))
+     {
+-      for (uint32_t t = htm_fastpath; t; t--)
++      // Note that the snapshot of htm_fastpath that we take here could be
++      // outdated, and a different method group than dispatch_htm may have
++      // been chosen in the meantime.  Therefore, take care not not touch
++      // anything besides the serial lock, which is independent of method
++      // groups.
++      for (uint32_t t = serial_lock.get_htm_fastpath(); t; t--)
+ 	{
+ 	  uint32_t ret = htm_begin();
+ 	  if (htm_begin_success(ret))
+@@ -197,9 +205,11 @@ GTM::gtm_thread::begin_transaction (uint32_t prop, const gtm_jmpbuf *jb)
+ 	      // We are executing a transaction now.
+ 	      // Monitor the writer flag in the serial-mode lock, and abort
+ 	      // if there is an active or waiting serial-mode transaction.
++	      // Also checks that htm_fastpath is still nonzero and thus
++	      // HW transactions are allowed to run.
+ 	      // Note that this can also happen due to an enclosing
+ 	      // serial-mode transaction; we handle this case below.
+-	      if (unlikely(serial_lock.is_write_locked()))
++	      if (unlikely(serial_lock.htm_fastpath_disabled()))
+ 		htm_abort();
+ 	      else
+ 		// We do not need to set a_saveLiveVariables because of HTM.
+@@ -210,9 +220,12 @@ GTM::gtm_thread::begin_transaction (uint32_t prop, const gtm_jmpbuf *jb)
+ 	  // retrying the transaction will be successful.
+ 	  if (!htm_abort_should_retry(ret))
+ 	    break;
++	  // Check whether the HTM fastpath has been disabled.
++	  if (!serial_lock.get_htm_fastpath())
++	    break;
+ 	  // Wait until any concurrent serial-mode transactions have finished.
+ 	  // This is an empty critical section, but won't be elided.
+-	  if (serial_lock.is_write_locked())
++	  if (serial_lock.htm_fastpath_disabled())
+ 	    {
+ 	      tx = gtm_thr();
+ 	      if (unlikely(tx == NULL))
+@@ -618,7 +631,7 @@ _ITM_commitTransaction(void)
+   // a serial-mode transaction.  If we are, then there will be no other
+   // concurrent serial-mode transaction.
+   // See gtm_thread::begin_transaction.
+-  if (likely(htm_fastpath && !gtm_thread::serial_lock.is_write_locked()))
++  if (likely(!gtm_thread::serial_lock.htm_fastpath_disabled()))
+     {
+       htm_commit();
+       return;
+@@ -634,7 +647,7 @@ _ITM_commitTransactionEH(void *exc_ptr)
+ {
+ #if defined(USE_HTM_FASTPATH)
+   // See _ITM_commitTransaction.
+-  if (likely(htm_fastpath && !gtm_thread::serial_lock.is_write_locked()))
++  if (likely(!gtm_thread::serial_lock.htm_fastpath_disabled()))
+     {
+       htm_commit();
+       return;
+--- libitm/config/linux/rwlock.h
++++ libitm/config/linux/rwlock.h
+@@ -39,16 +39,29 @@ struct gtm_thread;
+ //
+ // In this implementation, writers are given highest priority access but
+ // read-to-write upgrades do not have a higher priority than writers.
++//
++// Do not change the layout of this class; it must remain a POD type with
++// standard layout, and the writers field must be first (i.e., so the
++// assembler code can assume that its address is equal to the address of the
++// respective instance of the class), and htm_fastpath must be second.
+ 
+ class gtm_rwlock
+ {
+-  // TODO Put futexes on different cachelines?
+   std::atomic<int> writers;       // Writers' futex.
++  // We put the HTM fastpath control variable here so that HTM fastpath
++  // transactions can check efficiently whether they are allowed to run.
++  // This must be accessed atomically because threads can load this value
++  // when they are neither a registered reader nor writer (i.e., when they
++  // attempt to execute the HTM fastpath).
++  std::atomic<uint32_t> htm_fastpath;
++  // TODO Put these futexes on different cachelines?  (writers and htm_fastpath
++  // should remain on the same cacheline.
+   std::atomic<int> writer_readers;// A confirmed writer waits here for readers.
+   std::atomic<int> readers;       // Readers wait here for writers (iff true).
+ 
+  public:
+-  gtm_rwlock() : writers(0), writer_readers(0), readers(0) {};
++  gtm_rwlock() : writers(0), htm_fastpath(0), writer_readers(0), readers(0)
++  { }
+ 
+   void read_lock (gtm_thread *tx);
+   void read_unlock (gtm_thread *tx);
+@@ -59,12 +72,28 @@ class gtm_rwlock
+   bool write_upgrade (gtm_thread *tx);
+   void write_upgrade_finish (gtm_thread *tx);
+ 
+-  // Returns true iff there is a concurrent active or waiting writer.
+-  // This is primarily useful for simple HyTM approaches, and the value being
+-  // checked is loaded with memory_order_relaxed.
+-  bool is_write_locked()
++  // Returns true iff there is a concurrent active or waiting writer, or
++  // htm_fastpath is zero. This is primarily useful for simple HyTM
++  // approaches, and the values being checked are loaded with
++  // memory_order_relaxed.
++  bool htm_fastpath_disabled ()
++  {
++    return writers.load (memory_order_relaxed) != 0
++	|| htm_fastpath.load (memory_order_relaxed) == 0;
++  }
++
++  // This does not need to return an exact value, hence relaxed MO is
++  // sufficient.
++  uint32_t get_htm_fastpath ()
++  {
++    return htm_fastpath.load (memory_order_relaxed);
++  }
++  // This must only be called while having acquired the write lock, and other
++  // threads do not need to load an exact value; hence relaxed MO is
++  // sufficient.
++  void set_htm_fastpath (uint32_t val)
+   {
+-    return writers.load (memory_order_relaxed) != 0;
++    htm_fastpath.store (val, memory_order_relaxed);
+   }
+ 
+  protected:
+--- libitm/config/posix/rwlock.h
++++ libitm/config/posix/rwlock.h
+@@ -44,19 +44,32 @@ struct gtm_thread;
+ //
+ // In this implementation, writers are given highest priority access but
+ // read-to-write upgrades do not have a higher priority than writers.
++//
++// Do not change the layout of this class; it must remain a POD type with
++// standard layout, and the summary field must be first (i.e., so the
++// assembler code can assume that its address is equal to the address of the
++// respective instance of the class), and htm_fastpath must be second.
+ 
+ class gtm_rwlock
+ {
+-  pthread_mutex_t mutex;	        // Held if manipulating any field.
+-  pthread_cond_t c_readers;	        // Readers wait here
+-  pthread_cond_t c_writers;	        // Writers wait here for writers
+-  pthread_cond_t c_confirmed_writers;	// Writers wait here for readers
+-
+   static const unsigned a_writer  = 1;	// An active writer.
+   static const unsigned w_writer  = 2;	// The w_writers field != 0
+   static const unsigned w_reader  = 4;  // The w_readers field != 0
+ 
+   std::atomic<unsigned int> summary;	// Bitmask of the above.
++
++  // We put the HTM fastpath control variable here so that HTM fastpath
++  // transactions can check efficiently whether they are allowed to run.
++  // This must be accessed atomically because threads can load this value
++  // when they are neither a registered reader nor writer (i.e., when they
++  // attempt to execute the HTM fastpath).
++  std::atomic<uint32_t> htm_fastpath;
++
++  pthread_mutex_t mutex;	        // Held if manipulating any field.
++  pthread_cond_t c_readers;	        // Readers wait here
++  pthread_cond_t c_writers;	        // Writers wait here for writers
++  pthread_cond_t c_confirmed_writers;	// Writers wait here for readers
++
+   unsigned int a_readers;	// Nr active readers as observed by a writer
+   unsigned int w_readers;	// Nr waiting readers
+   unsigned int w_writers;	// Nr waiting writers
+@@ -74,12 +87,28 @@ class gtm_rwlock
+   bool write_upgrade (gtm_thread *tx);
+   void write_upgrade_finish (gtm_thread *tx);
+ 
+-  // Returns true iff there is a concurrent active or waiting writer.
+-  // This is primarily useful for simple HyTM approaches, and the value being
+-  // checked is loaded with memory_order_relaxed.
+-  bool is_write_locked()
++  // Returns true iff there is a concurrent active or waiting writer, or
++  // htm_fastpath is zero. This is primarily useful for simple HyTM
++  // approaches, and the values being checked are loaded with
++  // memory_order_relaxed.
++  bool htm_fastpath_disabled ()
++  {
++    return (summary.load (memory_order_relaxed) & (a_writer | w_writer))
++	|| htm_fastpath.load (memory_order_relaxed) == 0;
++  }
++
++  // This does not need to return an exact value, hence relaxed MO is
++  // sufficient.
++  uint32_t get_htm_fastpath ()
++  {
++    return htm_fastpath.load (memory_order_relaxed);
++  }
++  // This must only be called while having acquired the write lock, and other
++  // threads do not need to load an exact value; hence relaxed MO is
++  // sufficient.
++  void set_htm_fastpath (uint32_t val)
+   {
+-    return summary.load (memory_order_relaxed) & (a_writer | w_writer);
++    htm_fastpath.store (val, memory_order_relaxed);
+   }
+ 
+  protected:
+--- libitm/config/posix/rwlock.cc
++++ libitm/config/posix/rwlock.cc
+@@ -30,11 +30,12 @@ namespace GTM HIDDEN {
+ // ??? Move this back to the header file when constexpr is implemented.
+ 
+ gtm_rwlock::gtm_rwlock()
+-  : mutex (PTHREAD_MUTEX_INITIALIZER),
++  : summary (0),
++    htm_fastpath (0),
++    mutex (PTHREAD_MUTEX_INITIALIZER),
+     c_readers (PTHREAD_COND_INITIALIZER),
+     c_writers (PTHREAD_COND_INITIALIZER),
+     c_confirmed_writers (PTHREAD_COND_INITIALIZER),
+-    summary (0),
+     a_readers (0),
+     w_readers (0),
+     w_writers (0)
+--- libitm/libitm_i.h
++++ libitm/libitm_i.h
+@@ -336,10 +336,6 @@ extern abi_dispatch *dispatch_htm();
+ 
+ extern gtm_cacheline_mask gtm_mask_stack(gtm_cacheline *, gtm_cacheline_mask);
+ 
+-// Control variable for the HTM fastpath that uses serial mode as fallback.
+-// Non-zero if the HTM fastpath is enabled. See gtm_thread::begin_transaction.
+-extern uint32_t htm_fastpath;
+-
+ } // namespace GTM
+ 
+ #endif // LIBITM_I_H
+--- libitm/method-serial.cc
++++ libitm/method-serial.cc
+@@ -222,13 +222,13 @@ struct htm_mg : public method_group
+     // Enable the HTM fastpath if the HW is available.  The fastpath is
+     // initially disabled.
+ #ifdef USE_HTM_FASTPATH
+-    htm_fastpath = htm_init();
++    gtm_thread::serial_lock.set_htm_fastpath(htm_init());
+ #endif
+   }
+   virtual void fini()
+   {
+     // Disable the HTM fastpath.
+-    htm_fastpath = 0;
++    gtm_thread::serial_lock.set_htm_fastpath(0);
+   }
+ };
+ 
+@@ -288,7 +288,7 @@ GTM::gtm_thread::serialirr_mode ()
+ #if defined(USE_HTM_FASTPATH)
+   // HTM fastpath.  If we are executing a HW transaction, don't go serial but
+   // continue.  See gtm_thread::begin_transaction.
+-  if (likely(htm_fastpath && !gtm_thread::serial_lock.is_write_locked()))
++  if (likely(!gtm_thread::serial_lock.htm_fastpath_disabled()))
+     return;
+ #endif
+ 
+--- libitm/query.cc
++++ libitm/query.cc
+@@ -49,7 +49,7 @@ _ITM_inTransaction (void)
+   // a transaction and thus we can't deduce this by looking at just the serial
+   // lock.  This function isn't used in practice currently, so the easiest
+   // way to handle it is to just abort.
+-  if (htm_fastpath && htm_transaction_active())
++  if (gtm_thread::serial_lock.get_htm_fastpath() && htm_transaction_active())
+     htm_abort();
+ #endif
+   struct gtm_thread *tx = gtm_thr();
+@@ -69,7 +69,7 @@ _ITM_getTransactionId (void)
+ {
+ #if defined(USE_HTM_FASTPATH)
+   // See ITM_inTransaction.
+-  if (htm_fastpath && htm_transaction_active())
++  if (gtm_thread::serial_lock.get_htm_fastpath() && htm_transaction_active())
+     htm_abort();
+ #endif
+   struct gtm_thread *tx = gtm_thr();
diff --git a/SOURCES/gcc48-rh1278872.patch b/SOURCES/gcc48-rh1278872.patch
new file mode 100644
index 0000000..ce82c3d
--- /dev/null
+++ b/SOURCES/gcc48-rh1278872.patch
@@ -0,0 +1,78 @@
+2013-07-25  Sterling Augustine  <saugustine@google.com>
+
+	* dwarf2out.c (size_of_pubnames): Move code to...
+	(include_pubname_in_output): ...here.  New.
+	(output_pubnames): Call include_pubname_in_output.  Move assertion.
+
+--- gcc/dwarf2out.c	(revision 201254)
++++ gcc/dwarf2out.c	(revision 201255)
+@@ -7806,6 +7806,30 @@ unmark_all_dies (dw_die_ref die)
+       unmark_all_dies (AT_ref (a));
+ }
+ 
++/* Calculate if the entry should appear in the final output file.  It may be
++   from a pruned a type.  */
++
++static bool
++include_pubname_in_output (vec<pubname_entry, va_gc> *table, pubname_entry *p)
++{
++  if (table == pubname_table)
++    {
++      /* Enumerator names are part of the pubname table, but the parent
++	 DW_TAG_enumeration_type die may have been pruned.  Don't output
++	 them if that is the case.  */
++      if (p->die->die_tag == DW_TAG_enumerator && !p->die->die_mark)
++	return false;
++
++      /* Everything else in the pubname table is included.  */
++      return true;
++    }
++
++  /* The pubtypes table shouldn't include types that have been
++     pruned.  */
++  return (p->die->die_offset != 0
++	  || !flag_eliminate_unused_debug_types);
++}
++
+ /* Return the size of the .debug_pubnames or .debug_pubtypes table
+    generated for the compilation unit.  */
+ 
+@@ -7818,9 +7842,7 @@ size_of_pubnames (vec<pubname_entry, va_
+ 
+   size = DWARF_PUBNAMES_HEADER_SIZE;
+   FOR_EACH_VEC_ELT (*names, i, p)
+-    if (names != pubtype_table
+-	|| p->die->die_offset != 0
+-	|| !flag_eliminate_unused_debug_types)
++    if (include_pubname_in_output (names, p))
+       size += strlen (p->name) + DWARF_OFFSET_SIZE + 1;
+ 
+   size += DWARF_OFFSET_SIZE;
+@@ -8999,22 +9021,14 @@ output_pubnames (vec<pubname_entry, va_g
+ 
+   FOR_EACH_VEC_ELT (*names, i, pub)
+     {
+-      /* Enumerator names are part of the pubname table, but the parent
+-         DW_TAG_enumeration_type die may have been pruned.  Don't output
+-         them if that is the case.  */
+-      if (pub->die->die_tag == DW_TAG_enumerator && !pub->die->die_mark)
+-        continue;
+-
+-      /* We shouldn't see pubnames for DIEs outside of the main CU.  */
+-      if (names == pubname_table)
+-	gcc_assert (pub->die->die_mark);
+-
+-      if (names != pubtype_table
+-	  || pub->die->die_offset != 0
+-	  || !flag_eliminate_unused_debug_types)
++      if (include_pubname_in_output (names, pub))
+ 	{
+ 	  dw_offset die_offset = pub->die->die_offset;
+ 
++	  /* We shouldn't see pubnames for DIEs outside of the main CU.  */
++	  if (names == pubname_table)
++	    gcc_assert (pub->die->die_mark);
++
+ 	  /* If we're putting types in their own .debug_types sections,
+ 	     the .debug_pubtypes table will still point to the compile
+ 	     unit (not the type unit), so we want to use the offset of
diff --git a/SOURCES/gcc48-rh1296211.patch b/SOURCES/gcc48-rh1296211.patch
new file mode 100644
index 0000000..f1b084a
--- /dev/null
+++ b/SOURCES/gcc48-rh1296211.patch
@@ -0,0 +1,14 @@
+2015-09-02  Alan Modra  <amodra@gmail.com>
+
+	* config/rs6000/sysv4.h (LINK_SPEC): Delete link_target.
+
+--- gcc/config/rs6000/sysv4.h	(revision 227396)
++++ gcc/config/rs6000/sysv4.h	(revision 227397)
+@@ -574,7 +574,6 @@ ENDIAN_SELECT(" -mbig", " -mlittle", DEF
+ %{R*} \
+ %(link_shlib) \
+ %{!T*: %(link_start) } \
+-%(link_target) \
+ %(link_os)"
+ 
+ /* Shared libraries are not default.  */
diff --git a/SOURCES/gcc48-rh1304449.patch b/SOURCES/gcc48-rh1304449.patch
new file mode 100644
index 0000000..213ff0c
--- /dev/null
+++ b/SOURCES/gcc48-rh1304449.patch
@@ -0,0 +1,496 @@
+2015-12-24  Kirill Yukhin  <kirill.yukhin@intel.com>
+
+	* common/config/i386/i386-common.c (OPTION_MASK_ISA_PKU_SET): New.
+	(OPTION_MASK_ISA_PKU_UNSET): Ditto.
+	(ix86_handle_option): Handle OPT_mpku.
+	* config.gcc: Add pkuintrin.h to i[34567]86-*-* and x86_64-*-*
+	targets.
+	* config/i386/cpuid.h (host_detect_local_cpu): Detect PKU feature.
+	* config/i386/i386-c.c (ix86_target_macros_internal): Handle PKU ISA
+	flag.
+	* config/i386/i386.c (ix86_target_string): Add "-mpku" to
+	ix86_target_opts.
+	(ix86_option_override_internal): Define PTA_PKU, mention new key
+	in skylake-avx512. Handle new ISA bits.
+	(ix86_valid_target_attribute_inner_p): Add "pku".
+	(enum ix86_builtins): Add IX86_BUILTIN_RDPKRU and IX86_BUILTIN_WRPKRU.
+	(builtin_description bdesc_special_args[]): Add new built-ins.
+	* config/i386/i386.h (define TARGET_PKU): New.
+	(define TARGET_PKU_P): Ditto.
+	* config/i386/i386.md (define_c_enum "unspecv"): Add UNSPEC_PKU.
+	(define_expand "rdpkru"): New.
+	(define_insn "*rdpkru"): Ditto.
+	(define_expand "wrpkru"): Ditto.
+	(define_insn "*wrpkru"): Ditto.
+	* config/i386/i386.opt (mpku): Ditto.
+	* config/i386/pkuintrin.h: New file.
+	* config/i386/x86intrin.h: Include pkuintrin.h
+	* doc/extend.texi: Describe new built-ins.
+	* doc/invoke.texi: Describe new switches.
+
+	* g++.dg/other/i386-2.C: Add -mpku.
+	* g++.dg/other/i386-3.C: Ditto.
+	* gcc.target/i386/rdpku-1.c: New test.
+	* gcc.target/i386/sse-12.c: Add -mpku.
+	* gcc.target/i386/sse-13.c: Ditto.
+	* gcc.target/i386/sse-22.c: Ditto.
+	* gcc.target/i386/sse-33.c: Ditto.
+	* gcc.target/i386/wrpku-1.c: New test.
+
+--- gcc/config.gcc	(revision 231943)
++++ gcc/config.gcc	(revision 231945)
+@@ -368,7 +368,7 @@ i[34567]86-*-*)
+ 		       lzcntintrin.h bmiintrin.h bmi2intrin.h tbmintrin.h
+ 		       avx2intrin.h fmaintrin.h f16cintrin.h rtmintrin.h
+ 		       xtestintrin.h rdseedintrin.h prfchwintrin.h adxintrin.h
+-		       fxsrintrin.h xsaveintrin.h xsaveoptintrin.h"
++		       fxsrintrin.h xsaveintrin.h xsaveoptintrin.h pkuintrin.h"
+ 	;;
+ x86_64-*-*)
+ 	cpu_type=i386
+@@ -383,7 +383,7 @@ x86_64-*-*)
+ 		       lzcntintrin.h bmiintrin.h tbmintrin.h bmi2intrin.h
+ 		       avx2intrin.h fmaintrin.h f16cintrin.h rtmintrin.h
+ 		       xtestintrin.h rdseedintrin.h prfchwintrin.h adxintrin.h
+-		       fxsrintrin.h xsaveintrin.h xsaveoptintrin.h"
++		       fxsrintrin.h xsaveintrin.h xsaveoptintrin.h pkuintrin.h"
+ 	need_64bit_hwint=yes
+ 	;;
+ ia64-*-*)
+--- gcc/common/config/i386/i386-common.c	(revision 231943)
++++ gcc/common/config/i386/i386-common.c	(revision 231945)
+@@ -98,6 +98,7 @@ along with GCC; see the file COPYING3.
+ #define OPTION_MASK_ISA_RDRND_SET OPTION_MASK_ISA_RDRND
+ #define OPTION_MASK_ISA_F16C_SET \
+   (OPTION_MASK_ISA_F16C | OPTION_MASK_ISA_AVX_SET)
++#define OPTION_MASK_ISA_PKU_SET OPTION_MASK_ISA_PKU
+ 
+ /* Define a set of ISAs which aren't available when a given ISA is
+    disabled.  MMX and SSE ISAs are handled separately.  */
+@@ -164,6 +165,7 @@ along with GCC; see the file COPYING3.
+ #define OPTION_MASK_ISA_FSGSBASE_UNSET OPTION_MASK_ISA_FSGSBASE
+ #define OPTION_MASK_ISA_RDRND_UNSET OPTION_MASK_ISA_RDRND
+ #define OPTION_MASK_ISA_F16C_UNSET OPTION_MASK_ISA_F16C
++#define OPTION_MASK_ISA_PKU_UNSET OPTION_MASK_ISA_PKU
+ 
+ /* Implement TARGET_HANDLE_OPTION.  */
+ 
+@@ -659,6 +661,19 @@ ix86_handle_option (struct gcc_options *
+ 	}
+       return true;
+ 
++    case OPT_mpku:
++      if (value)
++	{
++	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PKU_SET;
++	  opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_PKU_SET;
++	}
++      else
++	{
++	  opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_PKU_UNSET;
++	  opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_PKU_UNSET;
++	}
++      return true;
++
+   /* Comes from final.c -- no real reason to change it.  */
+ #define MAX_CODE_ALIGN 16
+ 
+--- gcc/config/i386/i386.h	(revision 231943)
++++ gcc/config/i386/i386.h	(revision 231945)
+@@ -80,6 +80,7 @@ see the files COPYING3 and COPYING.RUNTI
+ #define TARGET_FXSR	TARGET_ISA_FXSR
+ #define TARGET_XSAVE	TARGET_ISA_XSAVE
+ #define TARGET_XSAVEOPT	TARGET_ISA_XSAVEOPT
++#define TARGET_PKU	TARGET_ISA_PKU
+ 
+ #define TARGET_LP64	TARGET_ABI_64
+ #define TARGET_X32	TARGET_ABI_X32
+--- gcc/config/i386/i386.md	(revision 231943)
++++ gcc/config/i386/i386.md	(revision 231945)
+@@ -224,6 +224,9 @@ (define_c_enum "unspecv" [
+   UNSPECV_XTEST
+ 
+   UNSPECV_NLGR
++
++  ;; For RDPKRU and WRPKRU support
++  UNSPECV_PKU
+ ])
+ 
+ ;; Constants to represent rounding modes in the ROUND instruction
+@@ -18289,6 +18292,48 @@ (define_insn "xtest_1"
+   [(set_attr "type" "other")
+    (set_attr "length" "3")])
+ 
++;; RDPKRU and WRPKRU
++
++(define_expand "rdpkru"
++  [(parallel
++     [(set (match_operand:SI 0 "register_operand")
++	   (unspec_volatile:SI [(match_dup 1)] UNSPECV_PKU))
++      (set (match_dup 2) (const_int 0))])]
++  "TARGET_PKU"
++{
++  operands[1] = force_reg (SImode, const0_rtx);
++  operands[2] = gen_reg_rtx (SImode);
++})
++
++(define_insn "*rdpkru"
++  [(set (match_operand:SI 0 "register_operand" "=a")
++	(unspec_volatile:SI [(match_operand:SI 2 "register_operand" "c")]
++			    UNSPECV_PKU))
++   (set (match_operand:SI 1 "register_operand" "=d")
++	(const_int 0))]
++  "TARGET_PKU"
++  "rdpkru"
++  [(set_attr "type" "other")])
++
++(define_expand "wrpkru"
++  [(unspec_volatile:SI
++     [(match_operand:SI 0 "register_operand")
++      (match_dup 1) (match_dup 2)] UNSPECV_PKU)]
++  "TARGET_PKU"
++{
++  operands[1] = force_reg (SImode, const0_rtx);
++  operands[2] = force_reg (SImode, const0_rtx);
++})
++
++(define_insn "*wrpkru"
++  [(unspec_volatile:SI
++     [(match_operand:SI 0 "register_operand" "a")
++      (match_operand:SI 1 "register_operand" "d")
++      (match_operand:SI 2 "register_operand" "c")] UNSPECV_PKU)]
++  "TARGET_PKU"
++  "wrpkru"
++  [(set_attr "type" "other")])
++
+ (include "mmx.md")
+ (include "sse.md")
+ (include "sync.md")
+--- gcc/config/i386/pkuintrin.h	(revision 0)
++++ gcc/config/i386/pkuintrin.h	(revision 231945)
+@@ -0,0 +1,45 @@
++/* Copyright (C) 2015 Free Software Foundation, Inc.
++
++   This file is part of GCC.
++
++   GCC is free software; you can redistribute it and/or modify
++   it under the terms of the GNU General Public License as published by
++   the Free Software Foundation; either version 3, or (at your option)
++   any later version.
++
++   GCC is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++   GNU General Public License for more details.
++
++   Under Section 7 of GPL version 3, you are granted additional
++   permissions described in the GCC Runtime Library Exception, version
++   3.1, as published by the Free Software Foundation.
++
++   You should have received a copy of the GNU General Public License and
++   a copy of the GCC Runtime Library Exception along with this program;
++   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
++   <http://www.gnu.org/licenses/>.  */
++
++#if !defined _X86INTRIN_H_INCLUDED
++# error "Never use <pkuintrin.h> directly; include <x86intrin.h> instead."
++#endif
++
++#ifndef _PKUINTRIN_H_INCLUDED
++#define _PKUINTRIN_H_INCLUDED
++
++extern __inline unsigned int
++__attribute__((__gnu_inline__, __always_inline__, __artificial__))
++_rdpkru_u32(void)
++{
++  return __builtin_ia32_rdpkru ();
++}
++
++extern __inline void
++__attribute__((__gnu_inline__, __always_inline__, __artificial__))
++_wrpkru(unsigned int key)
++{
++  return __builtin_ia32_wrpkru (key);
++}
++
++#endif /* _PKUINTRIN_H_INCLUDED */
+--- gcc/config/i386/cpuid.h	(revision 231943)
++++ gcc/config/i386/cpuid.h	(revision 231945)
+@@ -74,6 +74,10 @@
+ #define bit_RDSEED	(1 << 18)
+ #define bit_ADX	(1 << 19)
+ 
++/* %ecx */
++#define bit_PKU	(1 << 3)
++#define bit_OSPKE	(1 << 4)
++ 
+ /* Extended State Enumeration Sub-leaf (%eax == 13, %ecx == 1) */
+ #define bit_XSAVEOPT	(1 << 0)
+ 
+--- gcc/config/i386/x86intrin.h	(revision 231943)
++++ gcc/config/i386/x86intrin.h	(revision 231945)
+@@ -119,4 +119,8 @@
+ 
+ #include <adxintrin.h>
+ 
++#ifdef __PKU__
++#include <pkuintrin.h>
++#endif
++
+ #endif /* _X86INTRIN_H_INCLUDED */
+--- gcc/config/i386/i386-c.c	(revision 231943)
++++ gcc/config/i386/i386-c.c	(revision 231945)
+@@ -348,6 +348,8 @@ ix86_target_macros_internal (HOST_WIDE_I
+     def_or_undef (parse_in, "__XSAVE__");
+   if (isa_flag & OPTION_MASK_ISA_XSAVEOPT)
+     def_or_undef (parse_in, "__XSAVEOPT__");
++  if (isa_flag & OPTION_MASK_ISA_PKU)
++    def_or_undef (parse_in, "__PKU__");
+   if ((fpmath & FPMATH_SSE) && (isa_flag & OPTION_MASK_ISA_SSE))
+     def_or_undef (parse_in, "__SSE_MATH__");
+   if ((fpmath & FPMATH_SSE) && (isa_flag & OPTION_MASK_ISA_SSE2))
+--- gcc/config/i386/i386.opt	(revision 231943)
++++ gcc/config/i386/i386.opt	(revision 231945)
+@@ -626,3 +626,7 @@ Split 32-byte AVX unaligned store
+ mrtm
+ Target Report Mask(ISA_RTM) Var(ix86_isa_flags) Save
+ Support RTM built-in functions and code generation
++
++mpku
++Target Report Mask(ISA_PKU) Var(ix86_isa_flags) Save
++Support PKU built-in functions and code generation
+--- gcc/config/i386/driver-i386.c	(revision 231943)
++++ gcc/config/i386/driver-i386.c	(revision 231945)
+@@ -408,6 +408,7 @@ const char *host_detect_local_cpu (int a
+   unsigned int has_rdrnd = 0, has_f16c = 0, has_fsgsbase = 0;
+   unsigned int has_rdseed = 0, has_prfchw = 0, has_adx = 0;
+   unsigned int has_osxsave = 0, has_fxsr = 0, has_xsave = 0, has_xsaveopt = 0;
++  unsigned int has_pku = 0;
+ 
+   bool arch;
+ 
+@@ -479,6 +480,8 @@ const char *host_detect_local_cpu (int a
+       has_fsgsbase = ebx & bit_FSGSBASE;
+       has_rdseed = ebx & bit_RDSEED;
+       has_adx = ebx & bit_ADX;
++
++      has_pku = ecx & bit_OSPKE;
+     }
+ 
+   if (max_level >= 13)
+@@ -855,12 +858,13 @@ const char *host_detect_local_cpu (int a
+       const char *fxsr = has_fxsr ? " -mfxsr" : " -mno-fxsr";
+       const char *xsave = has_xsave ? " -mxsave" : " -mno-xsave";
+       const char *xsaveopt = has_xsaveopt ? " -mxsaveopt" : " -mno-xsaveopt";
++      const char *pku = has_pku ? " -mpku" : " -mno-pku";
+ 
+       options = concat (options, cx16, sahf, movbe, ase, pclmul,
+ 			popcnt, abm, lwp, fma, fma4, xop, bmi, bmi2,
+ 			tbm, avx, avx2, sse4_2, sse4_1, lzcnt, rtm,
+ 			hle, rdrnd, f16c, fsgsbase, rdseed, prfchw, adx,
+-			fxsr, xsave, xsaveopt, NULL);
++			fxsr, xsave, xsaveopt, pku, NULL);
+     }
+ 
+ done:
+--- gcc/config/i386/i386.c	(revision 231943)
++++ gcc/config/i386/i386.c	(revision 231945)
+@@ -2632,6 +2632,7 @@ ix86_target_string (HOST_WIDE_INT isa, i
+     { "-mrtm",		OPTION_MASK_ISA_RTM },
+     { "-mxsave",	OPTION_MASK_ISA_XSAVE },
+     { "-mxsaveopt",	OPTION_MASK_ISA_XSAVEOPT },
++    { "-mpku",		OPTION_MASK_ISA_PKU },
+   };
+ 
+   /* Flag options.  */
+@@ -2905,6 +2906,7 @@ ix86_option_override_internal (bool main
+ #define PTA_FXSR		(HOST_WIDE_INT_1 << 37)
+ #define PTA_XSAVE		(HOST_WIDE_INT_1 << 38)
+ #define PTA_XSAVEOPT		(HOST_WIDE_INT_1 << 39)
++#define PTA_PKU			(HOST_WIDE_INT_1 << 60)
+ 
+ /* if this reaches 64, need to widen struct pta flags below */
+ 
+@@ -3429,6 +3431,9 @@ ix86_option_override_internal (bool main
+ 	if (processor_alias_table[i].flags & PTA_XSAVEOPT
+ 	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
+ 	  ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
++	if (processor_alias_table[i].flags & PTA_PKU
++	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PKU))
++	  ix86_isa_flags |= OPTION_MASK_ISA_PKU;
+ 	if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
+ 	  x86_prefetch_sse = true;
+ 
+@@ -4220,6 +4225,7 @@ ix86_valid_target_attribute_inner_p (tre
+     IX86_ATTR_ISA ("fxsr",	OPT_mfxsr),
+     IX86_ATTR_ISA ("xsave",	OPT_mxsave),
+     IX86_ATTR_ISA ("xsaveopt",	OPT_mxsaveopt),
++    IX86_ATTR_ISA ("pku",	OPT_mpku),
+ 
+     /* enum options */
+     IX86_ATTR_ENUM ("fpmath=",	OPT_mfpmath_),
+@@ -27042,6 +27048,10 @@ enum ix86_builtins
+   IX86_BUILTIN_CPU_IS,
+   IX86_BUILTIN_CPU_SUPPORTS,
+ 
++  /* PKU instructions.  */
++  IX86_BUILTIN_RDPKRU,
++  IX86_BUILTIN_WRPKRU,
++
+   IX86_BUILTIN_MAX
+ };
+ 
+@@ -27357,6 +27367,10 @@ static const struct builtin_description
+   { OPTION_MASK_ISA_RTM, CODE_FOR_xbegin, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
+   { OPTION_MASK_ISA_RTM, CODE_FOR_xend, "__builtin_ia32_xend", IX86_BUILTIN_XEND, UNKNOWN, (int) VOID_FTYPE_VOID },
+   { OPTION_MASK_ISA_RTM, CODE_FOR_xtest, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST, UNKNOWN, (int) INT_FTYPE_VOID },
++
++  /* RDPKRU and WRPKRU.  */
++  { OPTION_MASK_ISA_PKU, CODE_FOR_rdpkru, "__builtin_ia32_rdpkru", IX86_BUILTIN_RDPKRU, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
++  { OPTION_MASK_ISA_PKU, CODE_FOR_wrpkru, "__builtin_ia32_wrpkru", IX86_BUILTIN_WRPKRU, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
+ };
+ 
+ /* Builtins with variable number of arguments.  */
+--- gcc/doc/extend.texi	(revision 231943)
++++ gcc/doc/extend.texi	(revision 231945)
+@@ -10996,6 +10996,13 @@ void __builtin_ia32_xabort (status)
+ int __builtin_ia32_xtest ()
+ @end smallexample
+ 
++The following built-in functions are available when @option{-mpku} is used.
++They generate reads and writes to PKRU.
++@smallexample
++void __builtin_ia32_wrpkru (unsigned int)
++unsigned int __builtin_ia32_rdpkru ()
++@end smallexample
++
+ @node X86 transactional memory intrinsics
+ @subsection X86 transaction memory intrinsics
+ 
+--- gcc/doc/invoke.texi	(revision 231943)
++++ gcc/doc/invoke.texi	(revision 231945)
+@@ -645,7 +645,7 @@ Objective-C and Objective-C++ Dialects}.
+ -mmmx  -msse  -msse2 -msse3 -mssse3 -msse4.1 -msse4.2 -msse4 -mavx @gol
+ -mavx2 -maes -mpclmul -mfsgsbase -mrdrnd -mf16c -mfma @gol
+ -msse4a -m3dnow -mpopcnt -mabm -mbmi -mtbm -mfma4 -mxop -mlzcnt @gol
+--mbmi2 -mrtm -mlwp -mthreads @gol
++-mbmi2 -mrtm -mlwp -mpku -mthreads @gol
+ -mno-align-stringops  -minline-all-stringops @gol
+ -minline-stringops-dynamically -mstringop-strategy=@var{alg} @gol
+ -mpush-args  -maccumulate-outgoing-args  -m128bit-long-double @gol
+@@ -14326,6 +14326,8 @@ preferred alignment to @option{-mpreferr
+ @itemx -mlzcnt
+ @itemx -mno-lzcnt
+ @itemx -mrtm
++@itemx -mpku
++@itemx -mno-pku
+ @itemx -mtbm
+ @itemx -mno-tbm
+ @opindex mmmx
+@@ -14336,7 +14338,7 @@ preferred alignment to @option{-mpreferr
+ @opindex mno-3dnow
+ These switches enable or disable the use of instructions in the MMX, SSE,
+ SSE2, SSE3, SSSE3, SSE4.1, AVX, AVX2, AES, PCLMUL, FSGSBASE, RDRND, F16C,
+-FMA, SSE4A, FMA4, XOP, LWP, ABM, BMI, BMI2, LZCNT, RTM or 3DNow!@:
++FMA, SSE4A, FMA4, XOP, LWP, ABM, BMI, BMI2, LZCNT, RTM, PKU or 3DNow!@:
+ extended instruction sets.
+ These extensions are also available as built-in functions: see
+ @ref{X86 Built-in Functions}, for details of the functions enabled and
+--- gcc/testsuite/gcc.target/i386/sse-12.c	(revision 231943)
++++ gcc/testsuite/gcc.target/i386/sse-12.c	(revision 231945)
+@@ -3,7 +3,7 @@
+    popcntintrin.h and mm_malloc.h are usable
+    with -O -std=c89 -pedantic-errors.  */
+ /* { dg-do compile } */
+-/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt" } */
++/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mpku" } */
+ 
+ #include <x86intrin.h>
+ 
+--- gcc/testsuite/gcc.target/i386/sse-13.c	(revision 231943)
++++ gcc/testsuite/gcc.target/i386/sse-13.c	(revision 231945)
+@@ -1,5 +1,5 @@
+ /* { dg-do compile } */
+-/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt" } */
++/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mpku" } */
+ 
+ #include <mm_malloc.h>
+ 
+--- gcc/testsuite/gcc.target/i386/sse-22.c	(revision 231943)
++++ gcc/testsuite/gcc.target/i386/sse-22.c	(revision 231945)
+@@ -268,7 +268,7 @@ test_2 (_mm_clmulepi64_si128, __m128i, _
+ 
+ /* x86intrin.h (FMA4/XOP/LWP/BMI/BMI2/TBM/LZCNT/FMA). */
+ #ifdef DIFFERENT_PRAGMAS
+-#pragma GCC target ("fma4,xop,lwp,bmi,bmi2,tbm,lzcnt,fma,rdseed,prfchw,adx,fxsr,xsaveopt")
++#pragma GCC target ("fma4,xop,lwp,bmi,bmi2,tbm,lzcnt,fma,rdseed,prfchw,adx,fxsr,xsaveopt,pku")
+ #endif
+ #include <x86intrin.h>
+ /* xopintrin.h */
+--- gcc/testsuite/gcc.target/i386/sse-23.c	(revision 231943)
++++ gcc/testsuite/gcc.target/i386/sse-23.c	(revision 231945)
+@@ -183,7 +183,7 @@
+ /* rtmintrin.h */
+ #define __builtin_ia32_xabort(M) __builtin_ia32_xabort(1)
+ 
+-#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt")
++#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,pku")
+ #include <wmmintrin.h>
+ #include <smmintrin.h>
+ #include <mm3dnow.h>
+--- gcc/testsuite/gcc.target/i386/rdpku-1.c	(revision 0)
++++ gcc/testsuite/gcc.target/i386/rdpku-1.c	(revision 231945)
+@@ -0,0 +1,11 @@
++/* { dg-do compile } */
++/* { dg-options "-mpku -O2" } */
++/* { dg-final { scan-assembler "rdpkru\n" } } */
++
++#include <x86intrin.h>
++
++unsigned extern
++rdpku_test (void)
++{
++  return _rdpkru_u32 ();
++}
+--- gcc/testsuite/gcc.target/i386/wrpku-1.c	(revision 0)
++++ gcc/testsuite/gcc.target/i386/wrpku-1.c	(revision 231945)
+@@ -0,0 +1,11 @@
++/* { dg-do compile } */
++/* { dg-options "-mpku -O2" } */
++/* { dg-final { scan-assembler "wrpkru\n" } } */
++
++#include <x86intrin.h>
++
++void extern
++wrpku_test (unsigned int key)
++{
++  _wrpkru (key);
++}
+--- gcc/testsuite/g++.dg/other/i386-2.C	(revision 231943)
++++ gcc/testsuite/g++.dg/other/i386-2.C	(revision 231945)
+@@ -1,9 +1,9 @@
+ /* { dg-do compile { target i?86-*-* x86_64-*-* } } */
+-/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt" } */
++/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mpku" } */
+ 
+ /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
+    xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h,
+-   popcntintrin.h, fmaintrin.h and mm_malloc.h.h are usable with 
++   popcntintrin.h, fmaintrin.h, pkuintrin.h and mm_malloc.h.h are usable with 
+    -O -pedantic-errors.  */
+ 
+ #include <x86intrin.h>
+--- gcc/testsuite/g++.dg/other/i386-3.C	(revision 231943)
++++ gcc/testsuite/g++.dg/other/i386-3.C	(revision 231945)
+@@ -1,9 +1,9 @@
+ /* { dg-do compile { target i?86-*-* x86_64-*-* } } */
+-/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt" } */
++/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mpku" } */
+ 
+ /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
+    xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h,
+-   popcntintrin.h, fmaintrin.h and mm_malloc.h are usable with
++   popcntintrin.h, fmaintrin.h, pkuintrin.h and mm_malloc.h are usable with
+    -O -fkeep-inline-functions.  */
+ 
+ #include <x86intrin.h>
diff --git a/SOURCES/gcc48-rh1312436.patch b/SOURCES/gcc48-rh1312436.patch
new file mode 100644
index 0000000..6bf9313
--- /dev/null
+++ b/SOURCES/gcc48-rh1312436.patch
@@ -0,0 +1,76 @@
+2015-12-02  Pierre-Marie de Rodat  <derodat@adacore.com>
+
+	* dwarf2out.c (dwar2out_var_location): Enhance pattern matching to get
+	the SYMBOL_REF they embed.
+	(gen_subprogram_die): Handle such calls.
+
+--- gcc/dwarf2out.c	(revision 231184)
++++ gcc/dwarf2out.c	(revision 231185)
+@@ -18051,18 +18051,23 @@ gen_subprogram_die (tree decl, dw_die_re
+ 		    }
+ 		  if (mode == VOIDmode || mode == BLKmode)
+ 		    continue;
+-		  if (XEXP (XEXP (arg, 0), 0) == pc_rtx)
++		  /* Get dynamic information about call target only if we
++		     have no static information: we cannot generate both
++		     DW_AT_abstract_origin and DW_AT_GNU_call_site_target
++		     attributes.  */
++		  if (ca_loc->symbol_ref == NULL_RTX)
+ 		    {
+-		      gcc_assert (ca_loc->symbol_ref == NULL_RTX);
+-		      tloc = XEXP (XEXP (arg, 0), 1);
+-		      continue;
+-		    }
+-		  else if (GET_CODE (XEXP (XEXP (arg, 0), 0)) == CLOBBER
+-			   && XEXP (XEXP (XEXP (arg, 0), 0), 0) == pc_rtx)
+-		    {
+-		      gcc_assert (ca_loc->symbol_ref == NULL_RTX);
+-		      tlocc = XEXP (XEXP (arg, 0), 1);
+-		      continue;
++		      if (XEXP (XEXP (arg, 0), 0) == pc_rtx)
++			{
++			  tloc = XEXP (XEXP (arg, 0), 1);
++			  continue;
++			}
++		      else if (GET_CODE (XEXP (XEXP (arg, 0), 0)) == CLOBBER
++			       && XEXP (XEXP (XEXP (arg, 0), 0), 0) == pc_rtx)
++			{
++			  tlocc = XEXP (XEXP (arg, 0), 1);
++			  continue;
++			}
+ 		    }
+ 		  reg = NULL;
+ 		  if (REG_P (XEXP (XEXP (arg, 0), 0)))
+@@ -20842,15 +20847,27 @@ dwarf2out_var_location (rtx loc_note)
+       if (!CALL_P (prev))
+ 	prev = XVECEXP (PATTERN (prev), 0, 0);
+       ca_loc->tail_call_p = SIBLING_CALL_P (prev);
++
++      /* Look for a SYMBOL_REF in the "prev" instruction.  */
+       x = get_call_rtx_from (PATTERN (prev));
+       if (x)
+ 	{
+-	  x = XEXP (XEXP (x, 0), 0);
+-	  if (GET_CODE (x) == SYMBOL_REF
+-	      && SYMBOL_REF_DECL (x)
+-	      && TREE_CODE (SYMBOL_REF_DECL (x)) == FUNCTION_DECL)
+-	    ca_loc->symbol_ref = x;
++	  /* Try to get the call symbol, if any.  */
++	  if (MEM_P (XEXP (x, 0)))
++	    x = XEXP (x, 0);
++	  /* First, look for a memory access to a symbol_ref.  */
++	  if (GET_CODE (XEXP (x, 0)) == SYMBOL_REF
++	      && SYMBOL_REF_DECL (XEXP (x, 0))
++	      && TREE_CODE (SYMBOL_REF_DECL (XEXP (x, 0))) == FUNCTION_DECL)
++	    ca_loc->symbol_ref = XEXP (x, 0);
++	  /* Otherwise, look at a compile-time known user-level function
++	     declaration.  */
++	  else if (MEM_P (x)
++		   && MEM_EXPR (x)
++		   && TREE_CODE (MEM_EXPR (x)) == FUNCTION_DECL)
++	    ca_loc->symbol_ref = XEXP (DECL_RTL (MEM_EXPR (x)), 0);
+ 	}
++
+       ca_loc->block = insn_scope (prev);
+       if (call_arg_locations)
+ 	call_arg_loc_last->next = ca_loc;
diff --git a/SOURCES/gcc48-rh1312850.patch b/SOURCES/gcc48-rh1312850.patch
new file mode 100644
index 0000000..841627f
--- /dev/null
+++ b/SOURCES/gcc48-rh1312850.patch
@@ -0,0 +1,24 @@
+2016-05-26  Martin Sebor  <msebor@redhat.com>
+	    Jakub Jelinek  <jakub@redhat.com>
+
+	* asan/asan_rtl.cc (InitializeHighMemEnd): Backport part of upstream
+	r221457 fix and typo fix from r206158.
+
+--- libsanitizer/asan/asan_rtl.cc.jj	2013-03-04 12:44:18.000000000 +0100
++++ libsanitizer/asan/asan_rtl.cc	2016-05-26 09:57:10.761973999 +0200
+@@ -308,11 +308,13 @@ static void InitializeHighMemEnd() {
+ # if defined(__powerpc64__)
+   // FIXME:
+   // On PowerPC64 we have two different address space layouts: 44- and 46-bit.
+-  // We somehow need to figure our which one we are using now and choose
++  // We somehow need to figure out which one we are using now and choose
+   // one of 0x00000fffffffffffUL and 0x00003fffffffffffUL.
+   // Note that with 'ulimit -s unlimited' the stack is moved away from the top
+   // of the address space, so simply checking the stack address is not enough.
+-  kHighMemEnd = (1ULL << 44) - 1;  // 0x00000fffffffffffUL
++  // This should (does) work for both PowerPC64 Endian modes.
++  kHighMemEnd =
++      (1ULL << (MostSignificantSetBitIndex(GET_CURRENT_FRAME()) + 1)) - 1;
+ # else
+   kHighMemEnd = (1ULL << 47) - 1;  // 0x00007fffffffffffUL;
+ # endif
diff --git a/SOURCES/gcc48-rh1344807.patch b/SOURCES/gcc48-rh1344807.patch
new file mode 100644
index 0000000..130f558
--- /dev/null
+++ b/SOURCES/gcc48-rh1344807.patch
@@ -0,0 +1,37 @@
+2016-06-14  Jason Merrill  <jason@redhat.com>
+
+	* call.c (add_function_candidate): Be more careful about
+	ref-to-ptr conversion.
+
+2016-06-15  Jakub Jelinek  <jakub@redhat.com>
+
+	* g++.dg/cpp0x/ref-qual17.C: New test.
+
+--- gcc/cp/call.c.jj	2014-08-06 10:45:03.260163142 +0200
++++ gcc/cp/call.c	2016-06-15 11:15:06.663878423 +0200
+@@ -1975,7 +1975,9 @@ add_function_candidate (struct z_candida
+ 		  bool rv = FUNCTION_RVALUE_QUALIFIED (TREE_TYPE (fn));
+ 		  parmtype = cp_build_reference_type (parmtype, rv);
+ 		  if (TREE_CODE (arg) == CONVERT_EXPR
+-		      && TYPE_PTR_P (TREE_TYPE (arg)))
++		      && TYPE_PTR_P (TREE_TYPE (arg))
++		      && (TREE_CODE (TREE_TYPE (TREE_OPERAND (arg, 0)))
++			  == REFERENCE_TYPE))
+ 		    /* Strip conversion from reference to pointer.  */
+ 		    arg = TREE_OPERAND (arg, 0);
+ 		  arg = build_fold_indirect_ref (arg);
+--- gcc/testsuite/g++.dg/cpp0x/ref-qual17.C.jj	2016-06-15 11:12:57.692558903 +0200
++++ gcc/testsuite/g++.dg/cpp0x/ref-qual17.C	2016-06-15 11:07:02.000000000 +0200
+@@ -0,0 +1,12 @@
++// { dg-do compile { target c++11 } }
++
++struct A
++{
++  void foo () &;
++};
++
++void
++bar (__UINTPTR_TYPE__ a)
++{
++  reinterpret_cast<A *>(a)->foo ();
++}
diff --git a/SOURCES/gcc48-s390-z13.patch b/SOURCES/gcc48-s390-z13.patch
new file mode 100644
index 0000000..05a3d5f
--- /dev/null
+++ b/SOURCES/gcc48-s390-z13.patch
@@ -0,0 +1,16938 @@
+Backport of trunk revisions: r214898, r221047, r223367, r223368, r223369, r223393, r223395, r223396, r223397, r223398, r223399, r223400, r223403, r224227, r224867, r224868, r224869, r224870, r224871, r224872, r224873, r224874, r226671, r226672, r227058, r227635, r227636, r227637, r227780, r231153, r231154, r231155, r231156, r231157, r231158, r231159, r231809, r232972, r232973, r233548, r233549, r233550, r233552, r233553, r233554, r233555, r233556, r233623, r236067
+
+2016-05-10  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>
+
+	* config/s390/s390.md ("*vec_cmp<insn_cmp>df_cconly")
+	("*fixuns_truncdfdi2_z13")
+	("*fixuns_trunc<FP:mode><GPR:mode>2_z196")
+	("*fix_truncdfdi2_bfp_z13", "*floatunsdidf2_z13")
+	("*extendsfdf2_z13"): Replace TARGET_Z13 with TARGET_VX.
+
+2016-02-23  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>
+
+	* gcc.target/s390/md/movstr-2.c: Move and rename to ...
+	* gcc.target/s390/vector/stpcpy-1.c: ... this one.
+
+2016-02-19  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>
+
+	* config/s390/vector.md: Add missing commutative operand markers
+	to the patterns which qualify for one.
+	* config/s390/vx-builtins.md: Likewise.
+
+2016-02-19  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>
+
+	* config/s390/vector.md (VI, VI_QHS): Add single element vector
+	types to mode iterators.
+	(vec_double): ... and mode attribute.
+	* config/s390/vx-builtins.md (non_vec_int): Likewise.
+
+2016-02-19  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>
+
+	* config/s390/vector.md ("<ti*>add<mode>3", "<ti*>sub<mode>3"):
+	Change the predicate of op2 from nonimmediate to general and let
+	reload fix it if necessary.
+
+	* gcc.target/s390/vector/int128-1.c: New test.
+
+2016-02-19  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>
+
+	* config/s390/vecintrin.h (vec_sub_u128): Define missing macro.
+
+2016-02-19  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>
+
+	* config/s390/s390.c (s390_expand_vcond): Use the compare operand
+	mode.
+
+	* gcc.target/s390/vector/vec-vcond-1.c: New test.
+
+2016-02-19  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>
+
+	* config/s390/s390-protos.h: Add s390_expand_vec_movstr prototype.
+	* config/s390/s390.c (s390_expand_vec_movstr): New function.
+	* config/s390/s390.md ("movstr<P:mode>"): Call
+	s390_expand_vec_movstr.
+
+	* gcc.target/s390/md/movstr-2.c: New test.
+
+2016-02-19  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>
+
+	* config/s390/s390.md: Add missing output modifier for operand 1
+	to print it as address properly.
+
+2016-02-19  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>
+
+	* config/s390/2827.md: Rename ooo_* insn attributes to zEC12_*.
+	* config/s390/2964.md: New file.
+	* config/s390/s390.c (s390_get_sched_attrmask): Use the right set
+	of insn grouping attributes depending on the CPU level.
+	(s390_get_unit_mask): New function.
+	(s390_sched_score): Remove the OOO from the scheduling macros.
+	Add loop to calculate a score for the instruction mix.
+	(s390_sched_reorder): Likewise plus improve debug output.
+	(s390_sched_variable_issue): Rename macros as above.  Calculate
+	the unit distances after actually scheduling an insn.  Improve
+	debug output.
+	(s390_sched_init): Clear last_scheduled_unit_distance array.
+	* config/s390/s390.md: Include 2964.md.
+
+2016-01-29  Dominik Vogt  <vogt@linux.vnet.ibm.com>
+
+	* config/s390/s390-c.c (s390_resolve_overloaded_builtin): Format
+	declaration name with %qs and print it in both error messages.
+	Also fix indentation.
+
+2016-01-29  Dominik Vogt  <vogt@linux.vnet.ibm.com>
+
+	PR other/69006
+	* config/s390/s390-c.c (s390_resolve_overloaded_builtin): Remove
+	trailing blank line from error message.
+
+2015-12-18  Robin Dapp  <rdapp@linux.vnet.ibm.com>
+
+	* config/s390/predicates.md: Change and rename
+	constm1_operand to all_ones_operand
+	* config/s390/s390.c (s390_expand_vcond): Use all_ones_operand
+	* config/s390/vector.md: Likewise
+
+2015-12-02  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>
+
+	* config/s390/predicates.md (const_mask_operand): New predicate.
+	* config/s390/s390-builtins.def: Set a smaller bitmask for a few builtins.
+	* config/s390/vector.md: Change predicate from immediate_operand
+	to either const_int_operand or const_mask_operand.  Add special
+	insn conditions on patterns which have to exclude certain values.
+	* config/s390/vx-builtins.md: Likewise.
+
+2015-12-02  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>
+
+	* config/s390/vector.md ("*vec_set<mode>"): Change shift count
+	mode from DI to SI.
+
+2015-12-02  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>
+
+	* config/s390/s390-builtin-types.def: New builtin types added.
+	* config/s390/s390-builtins.def: Add s390_vec_splat_* definitions.
+	* config/s390/s390.c (s390_expand_builtin): Always truncate
+	constants to the mode in the pattern.
+	* config/s390/vecintrin.h: Let the vec_splat_* macros point to the
+	respective builtin __builtin_s390_vec_splat_*.
+
+	* gcc.target/s390/zvector/vec-splat-2.c: New test.
+
+2015-12-02  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>
+
+	* config/s390/s390-builtin-types.def: Sort builtin types.
+
+2015-12-02  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>
+
+	* config/s390/s390-c.c (s390_get_vstring_flags): Invert the
+	condition for the RT flag.
+
+2015-12-02  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>
+
+	* config/s390/constraints.md ("jKK"): New constraint.
+	* config/s390/s390.c (tm-constrs.h): Include for
+	satisfies_constraint_*.
+	(s390_legitimate_constant_p): Allow jKK constants.  Use
+	satisfies_constraint_* also for the others.
+	(legitimate_reload_vector_constant_p): Likewise.
+	(print_operand): Allow h output modifier on vectors.
+	* config/s390/vector.md ("mov<mode>"): Add vrepi.
+
+	* gcc.target/s390/vector/vec-vrepi-1.c: New test.
+
+2015-12-02  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>
+
+	* config/s390/vector.md ("*vec_splats<mode>"): Fix constraint
+	latter I->K.
+
+	* gcc.target/s390/zvector/vec-splat-1.c: New test.
+
+2015-09-15  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>
+
+        * config/s390/s390.c (s390_const_operand_ok): Add missing
+	brackets.
+
+2015-09-10  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>
+
+	* config/s390/s390.c (s390_contiguous_bitmask_vector_p): Reject if
+	the vector element is bigger than 64 bit.
+
+	* gcc.target/s390/vector/vec-genbytemask-1.c: Add check for V1TI
+	initialization with a byte mask.  No change expected here.
+	* gcc.target/s390/vector/vec-genmask-1.c: Fix whitespace.
+	* gcc.target/s390/vector/vec-genmask-2.c: Add check for V1TI
+	initialization with contigious bitmask.  Literal pool is expectd
+	to be used here.
+
+2015-09-10  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>
+
+	* config/s390/vx-builtins.md ("vec_vmal<mode>", "vec_vmah<mode>")
+	("vec_vmalh<mode>"): Change mode iterator from VI_HW to VI_HW_QHS.
+
+2015-09-10  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>
+
+	* config/s390/s390.c: Add V1TImode to constant pool modes.
+
+2015-08-21  Dominik Vogt  <vogt@linux.vnet.ibm.com>
+
+	* config/s390/s390-builtins.def: Fix value range of vec_load_bndry.
+
+	* gcc.target/s390/zvector/vec-load_bndry-1.c: New test.
+
+2015-08-06  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>
+
+	* config/s390/s390.c (s390_expand_tbegin): Expand either
+	tbegin_1_z13 or tbegin_1 depending on VX flag.
+	* config/s390/s390.md ("tbegin_1_z13"): New expander.
+
+	* gcc.target/s390/htm-builtins-z13-1.c: New test.
+
+2015-08-06  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>
+
+	* config/s390/s390.opt: Clarify description for -mzvector
+	* doc/invoke.texi: Add documentation for -mhtm, -mvx, and
+	-mzvector.
+
+2015-06-24  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>
+
+	* config/s390/vx-builtins.md
+	("vec_scatter_element<mode>_<non_vec_int>")
+	("vec_scatter_element<V_HW_64:mode>_SI"): Replace gf mode
+	attribute with bhfgq.
+
+2015-06-24  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>
+
+	* config/s390/s390-builtins.def: Fix vpopct instruction comments.
+
+2015-06-24  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>
+
+	* config/s390/s390-builtin-types.def: Add flag to indicate the
+	options under which the function type is needed.
+	* config/s390/s390-builtins.def: Add flag to indicate the options
+	under which the builtin is enabled.
+	* config/s390/s390-builtins.h: Add flags parameter to macro
+	definitions.
+	(bflags_for_builtin): New function.
+	(flags_for_builtin): Renamed to ...
+	(opflags_for_builtin): ... this.
+	* config/s390/s390-c.c (s390_resolve_overloaded_builtin): Rename
+	flags_for_builtin to bflags_for_builtin and
+	flags_overloaded_builtin_var to opflags_overloaded_builtin_var.
+	* config/s390/s390.c: Add initialization of bflags_builtin and
+	opflags_builtin arrays.
+	Remove code for flags_builtin.
+	(s390_init_builtins): Only create builtin function types if one of
+	their flags is active.
+	Only create builtins if all of their flags are active.
+	(s390_expand_builtin): Rename flags_for_builtin to
+	opflags_for_builtin.
+
+2015-06-24  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>
+
+	* config/s390/vecintrin.h: Remove internal builtins.
+
+2015-06-24  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>
+
+	* config/s390/s390.c (s390_secondary_reload): Fix check for
+	GENERAL_REGS register class.
+
+2015-06-24  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>
+
+	* config/s390/s390.c (s390_support_vector_misalignment): Call
+	default implementation for !TARGET_VX.
+
+2015-06-24  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>
+
+	* config/s390/s390.c (s390_legitimate_constant_p): Add
+	TARGET_VX check.
+
+2015-06-24  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>
+
+	* config/s390/s390.c (s390_vector_abi): New variable definition.
+	(s390_check_type_for_vector_abi): New function.
+	(TARGET_ASM_FILE_END): New macro definition.
+	(s390_asm_file_end): New function.
+	(s390_function_arg): Call s390_check_type_for_vector_abi.
+	(s390_gimplify_va_arg): Likewise.
+	* configure: Regenerate.
+	* configure.ac: Check for .gnu_attribute Binutils feature.
+
+	* gcc.target/s390/vector/vec-abi-1.c: Add gnu attribute check.
+	* gcc.target/s390/vector/vec-abi-attr-1.c: New test.
+	* gcc.target/s390/vector/vec-abi-attr-2.c: New test.
+	* gcc.target/s390/vector/vec-abi-attr-3.c: New test.
+	* gcc.target/s390/vector/vec-abi-attr-4.c: New test.
+	* gcc.target/s390/vector/vec-abi-attr-5.c: New test.
+	* gcc.target/s390/vector/vec-abi-attr-6.c: New test.
+
+2015-06-08  Jakub Jelinek  <jakub@redhat.com>
+
+	* genattrtab.c (insn_alternatives): Change type from int *
+	to uint64_t *.
+	(check_attr_test): Shift ((uint64_t) 1) instead of 1 up.
+	(get_attr_value): Change type of num_alt to uint64_t.
+	(compute_alternative_mask): Change return type from
+	int to uint64_t, shift ((uint64_t) 1) instead of 1 up.
+	(make_alternative_compare, mk_attr_alt): Change argument type
+	from int to uint64_t.
+	(simplify_test_exp): Change type of i from int to uint64_t.
+	Shift ((uint64_t) 1) instead of 1 up.
+	(main): Adjust oballocvec first argument from int to uint64_t.
+	Shift ((uint64_t) 1) instead of 1 up.
+
+2015-05-19  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>
+
+	* lib/target-supports.exp: Vector do not always have natural
+	alignment on s390*.
+
+2015-05-19  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>
+
+	* gcc.dg/tree-ssa/gen-vect-11b.c: Disable vector instructions on
+	s390*.
+	* gcc.dg/tree-ssa/gen-vect-11c.c: Likewise.
+
+2015-05-19  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>
+
+	* gcc.target/s390/zvector/vec-dbl-math-compile-1.c: New test.
+	* gcc.target/s390/zvector/vec-genbytemask-1.c: New test.
+	* gcc.target/s390/zvector/vec-genmask-1.c: New test.
+	* gcc.target/s390/zvector/vec-lcbb-1.c: New test.
+	* gcc.target/s390/zvector/vec-overloading-1.c: New test.
+	* gcc.target/s390/zvector/vec-overloading-2.c: New test.
+	* gcc.target/s390/zvector/vec-overloading-3.c: New test.
+	* gcc.target/s390/zvector/vec-overloading-4.c: New test.
+	* gcc.target/s390/zvector/vec-test-mask-1.c: New test.
+	* gcc.target/s390/zvector/vec-elem-1.c: New test.
+
+2015-05-19  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>
+
+	* config.gcc: Add vecintrin.h to extra_headers.  Add s390-c.o to
+	c_target_objs and cxx_target_objs.  Add t-s390 to tmake_file.
+	* config/s390/s390-builtin-types.def: New file.
+	* config/s390/s390-builtins.def: New file.
+	* config/s390/s390-builtins.h: New file.
+	* config/s390/s390-c.c: New file.
+	* config/s390/s390-modes.def: Add modes CCVEQANY, CCVH,
+	CCVHANY, CCVHU, CCVHUANY, CCVFHANY, CCVFHEANY.
+	* config/s390/s390-protos.h (s390_expand_vec_compare_cc)
+	(s390_cpu_cpp_builtins, s390_register_target_pragmas): Add
+	prototypes.
+	* config/s390/s390.c (s390-builtins.h, s390-builtins.def):
+	Include.
+	(flags_builtin, flags_overloaded_builtin_var, s390_builtin_types)
+	(s390_builtin_fn_types, s390_builtin_decls, code_for_builtin): New
+	variable definitions.
+	(s390_const_operand_ok): New function.
+	(s390_expand_builtin): Rewrite.
+	(s390_init_builtins): New function.
+	(s390_handle_vectorbool_attribute): New function.
+	(s390_attribute_table): Add s390_vector_bool attribute.
+	(s390_match_ccmode_set): Handle new cc modes CCVH, CCVHU.
+	(s390_branch_condition_mask): Generate masks for new modes.
+	(s390_expand_vec_compare_cc): New function.
+	(s390_mangle_type): Add mangling for vector bool types.
+	(enum s390_builtin): Remove.
+	(s390_atomic_assign_expand_fenv): Rename constants for sfpc and
+	efpc builtins.
+	* config/s390/s390.h (TARGET_CPU_CPP_BUILTINS): Call
+	s390_cpu_cpp_builtins.
+	(REGISTER_TARGET_PRAGMAS): New macro.
+	* config/s390/s390.md: Define more UNSPEC_VEC_* constants.
+	(insn_cmp mode attribute): Add new CC modes.
+	(s390_sfpc, s390_efpc): Rename patterns to sfpc and efpc.
+	(lcbb): New pattern definition.
+	* config/s390/s390intrin.h: Include vecintrin.h.
+	* config/s390/t-s390: New file.
+	* config/s390/vecintrin.h: New file.
+	* config/s390/vector.md: Include vx-builtins.md.
+	* config/s390/vx-builtins.md: New file.S/390 zvector builtin
+	support.
+
+2015-05-19  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>
+
+	* config/s390/s390-modes.def: Add new modes CCVEQ, CCVFH, and
+	CCVFHE.
+	* config/s390/s390.c (s390_match_ccmode_set): Handle new modes.
+	(s390_select_ccmode): Likewise.
+	(s390_canonicalize_comparison): Swap operands if necessary.
+	(s390_expand_vec_compare_scalar): Expand DFmode compare using
+	single element vector instructions.
+	(s390_emit_compare): Call s390_expand_vec_compare_scalar.
+	(s390_branch_condition_mask): Generate CC masks for the new modes.
+	* config/s390/s390.md (v0, vf, vd): New mode attributes.
+	(VFCMP, asm_fcmp, insn_cmp): New mode iterator and attributes.
+	(*vec_cmp<insn_cmp>df_cconly, *fixuns_truncdfdi2_z13)
+	(*fix_trunc<BFP:mode><GPR:mode>2_bfp, *floatunsdidf2_z13)
+	(*floatuns<GPR:mode><FP:mode>2, *extendsfdf2_z13)
+	(*extend<DSF:mode><BFP:mode>2): New insn definition.
+	(fix_trunc<BFP:mode><GPR:mode>2_bfp, loatuns<GPR:mode><FP:mode>2)
+	(extend<DSF:mode><BFP:mode>2): Turn into expander.
+	(floatdi<mode>2, truncdfsf2, add<mode>3, sub<mode>3, mul<mode>3)
+	(div<mode>3, *neg<mode>2, *abs<mode>2, *negabs<mode>2)
+	(sqrt<mode>2): Add vector instruction.
+
+	* gcc.target/s390/vector/vec-scalar-cmp-1.c: New test.
+
+2015-05-19  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>
+
+	* gcc.target/s390/s390.exp
+	(check_effective_target_vector): New check.
+	* gcc.target/s390/vector/vec-abi-1.c: New test.
+	* gcc.target/s390/vector/vec-abi-2.c: New test.
+	* gcc.target/s390/vector/vec-abi-3.c: New test.
+	* gcc.target/s390/vector/vec-abi-4.c: New test.
+	* gcc.target/s390/vector/vec-abi-align-1.c: New test.
+	* gcc.target/s390/vector/vec-abi-single-1.c: New test.
+	* gcc.target/s390/vector/vec-abi-single-2.c: New test.
+	* gcc.target/s390/vector/vec-abi-struct-1.c: New test.
+	* gcc.target/s390/vector/vec-abi-vararg-1.c: New test.
+	* gcc.target/s390/vector/vec-abi-vararg-2.c: New test.
+	* gcc.target/s390/vector/vec-clobber-1.c: New test.
+	* gcc.target/s390/vector/vec-cmp-1.c: New test.
+	* gcc.target/s390/vector/vec-cmp-2.c: New test.
+	* gcc.target/s390/vector/vec-dbl-math-compile-1.c: New test.
+	* gcc.target/s390/vector/vec-genbytemask-1.c: New test.
+	* gcc.target/s390/vector/vec-genbytemask-2.c: New test.
+	* gcc.target/s390/vector/vec-genmask-1.c: New test.
+	* gcc.target/s390/vector/vec-genmask-2.c: New test.
+	* gcc.target/s390/vector/vec-init-1.c: New test.
+	* gcc.target/s390/vector/vec-int-math-compile-1.c: New test.
+	* gcc.target/s390/vector/vec-shift-1.c: New test.
+	* gcc.target/s390/vector/vec-sub-1.c: New test.
+
+2015-05-19  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>
+
+	* config/s390/constraints.md (j00, jm1, jxx, jyy, v): New
+	constraints.
+	* config/s390/predicates.md (const0_operand, constm1_operand)
+	(constable_operand): Accept vector operands.
+	* config/s390/s390-modes.def: Add supported vector modes.
+	* config/s390/s390-protos.h (s390_cannot_change_mode_class)
+	(s390_function_arg_vector, s390_contiguous_bitmask_vector_p)
+	(s390_bytemask_vector_p, s390_expand_vec_strlen)
+	(s390_expand_vec_compare, s390_expand_vcond)
+	(s390_expand_vec_init): Add prototypes.
+	* config/s390/s390.c (VEC_ARG_NUM_REG): New macro.
+	(s390_vector_mode_supported_p): New function.
+	(s390_contiguous_bitmask_p): Mask out the irrelevant bits.
+	(s390_contiguous_bitmask_vector_p): New function.
+	(s390_bytemask_vector_p): New function.
+	(s390_split_ok_p): Vector regs don't work either.
+	(regclass_map): Add VEC_REGS.
+	(s390_legitimate_constant_p): Handle vector constants.
+	(s390_cannot_force_const_mem): Handle CONST_VECTOR.
+	(legitimate_reload_vector_constant_p): New function.
+	(s390_preferred_reload_class): Handle CONST_VECTOR.
+	(s390_reload_symref_address):  Likewise.
+	(s390_secondary_reload): Vector memory instructions only support
+	short displacements.  Rename reload*_nonoffmem* to reload*_la*.
+	(s390_emit_ccraw_jump): New function.
+	(s390_expand_vec_strlen): New function.
+	(s390_expand_vec_compare): New function.
+	(s390_expand_vcond): New function.
+	(s390_expand_vec_init): New function.
+	(s390_dwarf_frame_reg_mode): New function.
+	(print_operand): Handle addresses with 'O' and 'R' constraints.
+	(NR_C_MODES, constant_modes): Add vector modes.
+	(s390_output_pool_entry): Handle vector constants.
+	(s390_hard_regno_mode_ok): Handle vector registers.
+	(s390_class_max_nregs): Likewise.
+	(s390_cannot_change_mode_class): New function.
+	(s390_invalid_arg_for_unprototyped_fn): New function.
+	(s390_function_arg_vector): New function.
+	(s390_function_arg_float): Remove size variable.
+	(s390_pass_by_reference): Handle vector arguments.
+	(s390_function_arg_advance): Likewise.
+	(s390_function_arg): Likewise.
+	(s390_return_in_memory): Vector values are returned in a VR if
+	possible.
+	(s390_function_and_libcall_value): Handle vector arguments.
+	(s390_gimplify_va_arg): Likewise.
+	(s390_call_saved_register_used): Consider the arguments named.
+	(s390_conditional_register_usage): Disable v16-v31 for non-vec
+	targets.
+	(s390_preferred_simd_mode): New function.
+	(s390_support_vector_misalignment): New function.
+	(s390_vector_alignment): New function.
+	(TARGET_STRICT_ARGUMENT_NAMING, TARGET_DWARF_FRAME_REG_MODE)
+	(TARGET_VECTOR_MODE_SUPPORTED_P)
+	(TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN)
+	(TARGET_VECTORIZE_PREFERRED_SIMD_MODE)
+	(TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT)
+	(TARGET_VECTOR_ALIGNMENT): Define target macro.
+	* config/s390/s390.h (FUNCTION_ARG_PADDING): Define macro.
+	(FIRST_PSEUDO_REGISTER): Increase value.
+	(VECTOR_NOFP_REGNO_P, VECTOR_REGNO_P, VECTOR_NOFP_REG_P)
+	(VECTOR_REG_P): Define macros.
+	(FIXED_REGISTERS, CALL_USED_REGISTERS)
+	(CALL_REALLY_USED_REGISTERS, REG_ALLOC_ORDER)
+	(HARD_REGNO_CALL_PART_CLOBBERED, REG_CLASS_NAMES)
+	(FUNCTION_ARG_REGNO_P, FUNCTION_VALUE_REGNO_P, REGISTER_NAMES):
+	Add vector registers.
+	(CANNOT_CHANGE_MODE_CLASS): Call C function.
+	(enum reg_class): Add VEC_REGS, ADDR_VEC_REGS, GENERAL_VEC_REGS.
+	(SECONDARY_MEMORY_NEEDED): Allow SF<->SI mode moves without
+	memory.
+	(DBX_REGISTER_NUMBER, FIRST_VEC_ARG_REGNO, LAST_VEC_ARG_REGNO)
+	(SHORT_DISP_IN_RANGE, VECTOR_STORE_FLAG_VALUE): Define macro.
+	* config/s390/s390.md (UNSPEC_VEC_*): New constants.
+	(VR*_REGNUM): New constants.
+	(ALL): New mode iterator.
+	(INTALL): Remove mode iterator.
+	Include vector.md.
+	(movti): Implement TImode moves for VRs.
+	Disable TImode splitter for VR targets.
+	Implement splitting TImode GPR<->VR moves.
+	(reload*_tomem_z10, reload*_toreg_z10): Replace INTALL with ALL.
+	(reload<mode>_nonoffmem_in, reload<mode>_nonoffmem_out): Rename to
+	reload<mode>_la_in, reload<mode>_la_out.
+	(*movdi_64, *movsi_zarch, *movhi, *movqi, *mov<mode>_64dfp)
+	(*mov<mode>_64, *mov<mode>_31): Add vector instructions.
+	(TD/TF mode splitter): Enable for GPRs only (formerly !FP).
+	(mov<mode> SF SD): Prefer lder, lde for loading.
+	Add lrl and strl instructions.
+	Add vector instructions.
+	(strlen<mode>): Rename old strlen<mode> to strlen_srst<mode>.
+	Call s390_expand_vec_strlen on z13.
+	(*cc_to_int): Change predicate to nonimmediate_operand.
+	(addti3): Rename to *addti3.  New expander.
+	(subti3): Rename to *subti3.  New expander.
+	* config/s390/vector.md: New file.
+
+2015-05-19  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>
+
+	* common/config/s390/s390-common.c (processor_flags_table): Add
+	z13.
+	* config.gcc: Add z13.
+	* config/s390/s390-opts.h (enum processor_type): Add
+	PROCESSOR_2964_Z13.
+	* config/s390/s390.c (s390_adjust_priority): Check for
+	PROCESSOR_2964_Z13.
+	(s390_reorg): Likewise.
+	(s390_sched_reorder): Likewise.
+	(s390_sched_variable_issue): Likewise.
+	(s390_loop_unroll_adjust): Likewise.
+	(s390_option_override): Likewise. Default to -mvx when available.
+	* config/s390/s390.h (enum processor_flags): Add PF_Z13 and PF_VX.
+	(TARGET_CPU_Z13, TARGET_CPU_VX, TARGET_Z13, TARGET_VX)
+	(TARGET_VX_ABI): Define macros.
+	macros.
+	(TARGET_DEFAULT): Add MASK_OPT_VX.
+	* config/s390/s390.md ("cpu" attribute): Add z13.
+	("cpu_facility" attribute): Add vec.
+	* config/s390/s390.opt (processor_type): Add z13.
+	(mvx): New options.
+	* doc/invoke.texi: Add z13 option for -march.
+
+2015-05-19  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>
+
+	* optabs.c (expand_vec_perm): Don't re-use SEL as target operand.
+
+2015-05-19  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>
+
+	* config/s390/s390.c (s390_secondary_reload): Fix check for
+	load/store relative.
+
+2015-05-19  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>
+
+	* recog.h: Increase MAX_RECOG_ALTERNATIVES.  Change type of
+	alternative_mask to uint64_t.
+
+2015-02-27  Andreas Krebbel  <Andreas.Krebbel@de.ibm.com>
+
+	* config/s390/s390.c (enum s390_builtin):
+	Add S390_BUILTIN_S390_SFPC and S390_BUILTIN_S390_EFPC.
+	(code_for_builtin): Add CODE_FOR_s390_sfpc and CODE_FOR_s390_efpc.
+	(s390_init_builtins): Generate new builtin functions.
+	* config/s390/s390.md (UNSPECV_SFPC, UNSPECV_EFPC): New constants.
+	(s390_sfpc, s390_efpc): New pattern definitions.
+
+2014-09-03  Matthew Fortune  <matthew.fortune@imgtec.com>
+
+	* target.def (TARGET_DWARF_FRAME_REG_MODE): New target hook.
+	* targhooks.c (default_dwarf_frame_reg_mode): New function.
+	* targhooks.h (default_dwarf_frame_reg_mode): New prototype.
+	* doc/tm.texi.in (TARGET_DWARF_FRAME_REG_MODE): Document.
+	* doc/tm.texi: Regenerate.
+	* dwarf2cfi.c (expand_builtin_init_dwarf_reg_sizes): Abstract mode
+	selection logic to default_dwarf_frame_reg_mode.
+
+--- gcc/common/config/s390/s390-common.c	2013-08-14 13:55:13.000000000 +0200
++++ gcc/common/config/s390/s390-common.c	2016-05-11 15:53:24.000000000 +0200
+@@ -42,7 +42,10 @@ EXPORTED_CONST int processor_flags_table
+     /* z196 */   PF_IEEE_FLOAT | PF_ZARCH | PF_LONG_DISPLACEMENT
+                  | PF_EXTIMM | PF_DFP | PF_Z10 | PF_Z196,
+     /* zEC12 */  PF_IEEE_FLOAT | PF_ZARCH | PF_LONG_DISPLACEMENT
++                 | PF_EXTIMM | PF_DFP | PF_Z10 | PF_Z196 | PF_ZEC12 | PF_TX,
++    /* z13 */    PF_IEEE_FLOAT | PF_ZARCH | PF_LONG_DISPLACEMENT
+                  | PF_EXTIMM | PF_DFP | PF_Z10 | PF_Z196 | PF_ZEC12 | PF_TX
++                 | PF_Z13 | PF_VX
+   };
+ 
+ /* Change optimizations to be performed, depending on the
+--- gcc/config/s390/2827.md	2015-06-18 17:09:04.000000000 +0200
++++ gcc/config/s390/2827.md	2016-05-11 18:03:45.000000000 +0200
+@@ -18,20 +18,19 @@
+ ;; along with GCC; see the file COPYING3.  If not see
+ ;; <http://www.gnu.org/licenses/>.
+ 
+-
+-(define_attr "ooo_cracked" ""
++(define_attr "zEC12_cracked" ""
+   (cond [(eq_attr "mnemonic" "cgdbr,clfxtr,cdgtr,celfbr,cxgtr,clfebr,clc,lngfr,cs,cfxbr,xc,clfdbr,basr,ex,cxlgtr,clfdtr,srdl,lpgfr,cdlgbr,cgxtr,cxlftr,nc,cxftr,cdfbr,clfxbr,cdftr,clgxbr,cgdtr,cxlgbr,mvc,clgdtr,cegbr,cfebr,cdlftr,sldl,cdlgtr,csg,chhsi,clgebr,cxgbr,cxfbr,cdlfbr,cgebr,lzxr,oc,cdgbr,brasl,cgxbr,cxlfbr,clgxtr,exrl,cfdbr,celgbr,clgdbr,lxr,cpsdr,lcgfr,bras,srda,cefbr") (const_int 1)]
+         (const_int 0)))
+ 
+-(define_attr "ooo_expanded" ""
++(define_attr "zEC12_expanded" ""
+   (cond [(eq_attr "mnemonic" "dlr,dsgr,d,dsgf,stam,dsgfr,dlgr,dsg,cds,dr,stm,mvc,dl,cdsg,stmy,dlg,stmg,lam") (const_int 1)]
+         (const_int 0)))
+ 
+-(define_attr "ooo_endgroup" ""
++(define_attr "zEC12_endgroup" ""
+   (cond [(eq_attr "mnemonic" "ipm") (const_int 1)]
+         (const_int 0)))
+ 
+-(define_attr "ooo_groupalone" ""
++(define_attr "zEC12_groupalone" ""
+   (cond [(eq_attr "mnemonic" "lnxbr,madb,ltxtr,clc,axtr,msebr,slbgr,xc,alcr,lpxbr,slbr,maebr,mlg,mfy,lxdtr,maeb,lxeb,nc,mxtr,sxtr,dxbr,alc,msdbr,ltxbr,lxdb,madbr,lxdbr,lxebr,mvc,m,mseb,mlr,mlgr,slb,tcxb,msdb,sqxbr,alcgr,oc,flogr,alcg,mxbr,dxtr,axbr,mr,sxbr,slbg,ml,lcxbr,bcr_flush") (const_int 1)]
+         (const_int 0)))
+ 
+--- gcc/config/s390/2964.md	1970-01-01 01:00:00.000000000 +0100
++++ gcc/config/s390/2964.md	2016-05-11 18:03:45.000000000 +0200
+@@ -0,0 +1,232 @@
++;; Scheduling description for z13.
++;;   Copyright (C) 2016 Free Software Foundation, Inc.
++;;   Contributed by Andreas Krebbel (Andreas.Krebbel@de.ibm.com)
++
++;; This file is part of GCC.
++
++;; GCC is free software; you can redistribute it and/or modify it under
++;; the terms of the GNU General Public License as published by the Free
++;; Software Foundation; either version 3, or (at your option) any later
++;; version.
++
++;; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
++;; WARRANTY; without even the implied warranty of MERCHANTABILITY or
++;; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++;; for more details.
++
++;; You should have received a copy of the GNU General Public License
++;; along with GCC; see the file COPYING3.  If not see
++;; <http://www.gnu.org/licenses/>.
++
++
++; generator options: vector_ecycs=12 cracked_ecycs=6 scale_ecycs=5
++
++(define_attr "z13_cracked" ""
++  (cond [(eq_attr "mnemonic" "celgbr,vscef,vsceg,exrl,clfebr,cefbr,chhsi,\
++vgef,vgeg,cdlftr,lcgfr,cfdbr,cgdbr,lzxr,cfxbr,rnsbg,cgdtr,cegbr,rxsbg,ex,\
++cgxtr,clfxtr,cdlgtr,brasl,efpc,cfebr,tbeginc,celfbr,clgxbr,vsteb,vsteh,\
++clfdtr,cdfbr,lngfr,clgebr,stpq,cs,lpgfr,cdlgbr,lpq,cdgtr,d,cgxbr,cdftr,\
++rosbg,clgdbr,cdgbr,bras,tbegin,clfdbr,cdlfbr,cgebr,clfxbr,lxr,csy,csg,clgdtr,\
++clgxtr") (const_int 1)]
++        (const_int 0)))
++
++(define_attr "z13_expanded" ""
++  (cond [(eq_attr "mnemonic" "cxlftr,cdsg,cdsy,stam,lam,dsgf,lmg,cxlgtr,\
++dl,cxftr,sldl,dsg,cxlfbr,cxgtr,stmg,stmy,stm,lm,cds,lmy,cxfbr,cxlgbr,srda,\
++srdl,cxgbr,dlg") (const_int 1)]
++        (const_int 0)))
++
++(define_attr "z13_groupalone" ""
++  (cond [(eq_attr "mnemonic" "mvc,dxbr,lxebr,axtr,cxtr,alcr,lxdb,lxeb,mxtr,\
++mfy,cxbr,dsgr,lcxbr,slb,mr,dr,alc,slbr,maebr,mlgr,dsgfr,sxtr,tdcxt,tabort,\
++msebr,lxdtr,ltxtr,slbg,ml,mxbr,maeb,oc,dxtr,msdb,sqxbr,mseb,xc,m,clc,mlg,\
++mlr,fixbra,alcgr,nc,sfpc,dlgr,fixbr,slbgr,fixtr,lpxbr,axbr,lxdbr,ltxbr,\
++tcxb,dlr,lnxbr,sxbr,flogr,alcg,tend,madb,bcr_flush") (const_int 1)]
++        (const_int 0)))
++
++(define_attr "z13_endgroup" ""
++  (cond [(eq_attr "mnemonic" "ipm") (const_int 1)]
++        (const_int 0)))
++
++(define_attr "z13_unit_lsu" ""
++  (cond [(eq_attr "mnemonic" "vlbb,mvc,llgc,llc,llhrl,vl,llghrl,vlrepf,\
++vlrepg,vlreph,lde,ldy,tabort,l,llh,ld,lg,ly,vlrepb,vllezb,vllezf,vllezg,\
++vllezh,oc,xc,clc,lrl,ear,nc,lgrl,sfpc,llgf,llgfrl,llgh,llgt,lcbb,vll,sar") (const_int 1)]
++        (const_int 0)))
++
++(define_attr "z13_unit_fxu" ""
++  (cond [(eq_attr "mnemonic" "s,lcgr,x,nop,oiy,ppa,ng,msy,sgrk,vstl,aghik,\
++msgf,ipm,mvi,stocg,rll,srlg,cghsi,clgit,srlk,alrk,sg,sh,sl,st,sy,vst,ark,\
++xgr,agsi,tm,nrk,shy,llhr,agf,alcr,slgfr,sr,clgrt,laa,lder,sgf,lan,llilf,\
++llilh,ag,llill,lay,al,n,laxg,ar,ahi,sgr,ntstg,ay,stcy,nopr,mfy,ngrk,lbr,\
++br,dsgr,stdy,ork,ldgr,lcr,cg,ch,lgfrl,cl,stoc,cr,agfr,stgrl,cy,alfi,xg,\
++cgfi,xi,clfhsi,cgfr,xr,slb,mghi,clfi,slg,clhhsi,agfi,clfit,sly,mr,ldr,nihf,\
++nihh,algfi,dr,nihl,algf,algfr,algr,clgf,clgr,clgt,aghi,alc,alg,locg,alr,\
++locr,cghi,aly,alghsik,slbr,clgfrl,mhy,cit,nr,ny,xiy,mlgr,sthy,cly,dsgfr,\
++rllg,cgit,lgb,lgf,clgrl,lgh,lrvgr,cliy,cgrl,lgr,slrk,clrt,icy,laog,og,agr,\
++mvhi,lhrl,or,lhr,vlvgp,lhy,nilf,oy,nilh,nill,lcdfr,mviy,tmhh,tmhl,sthrl,\
++ltgf,ltgr,srk,clghrl,ahy,vstef,vsteg,ah,vlgvb,llgcr,tmh,tml,clmy,slr,cfi,\
++stc,std,ste,stg,sth,locgr,slbg,sty,tmlh,la,lb,mvghi,lh,risbgn,lrvg,lr,asi,\
++lt,ahik,lrvr,cgf,cgh,cgr,clhrl,lzdr,tmll,mh,ml,vlvgb,ms,lrv,vlvgf,xgrk,\
++vlvgg,llgfr,vlvgh,slfi,chi,chy,mhi,lzer,alhsik,ni,ltgfr,loc,icm,oi,cgfrl,\
++agrk,lgat,oilh,llghr,lghrl,oill,xihf,lpgr,cgrt,clrl,sgfr,lpr,lgbr,strl,\
++algrk,alsi,srak,slgf,a,c,slgr,m,o,algsi,icmh,srag,iilf,ogrk,clg,icmy,\
++cli,clm,clr,clt,slgrk,mlg,lao,mlr,risbg,mvhhi,lat,etnd,lax,iihf,sra,alcgr,\
++msgr,clghsi,stey,ngr,xilf,laag,oihf,oihh,oihl,ltg,ltr,niy,lgfi,dlgr,lgfr,\
++slgfi,llcr,slbgr,chrl,lgdr,pfpo,lang,basr,sllg,sllk,lghi,lghr,vlgvf,vlgvg,\
++vlgvh,vlr,chsi,lngr,cghrl,srl,lhi,oilf,crl,crt,afi,xrk,llgtr,llihf,llihh,\
++llihl,dlr,msgfi,msgfr,msg,flogr,xy,msr,clgfi,clgfr,ogr,popcnt,alcg,lndfr,\
++larl,sll,tmy,msfi,ic,lpdfr,tend,lnr") (const_int 1)]
++        (const_int 0)))
++
++(define_attr "z13_unit_vfu" ""
++  (cond [(eq_attr "mnemonic" "seb,vcksm,vfadb,vleib,vchgs,vleif,vleig,vleih,\
++vgbm,verimb,vone,verimf,verimg,verimh,dxbr,verllvb,lpebr,verllvf,verllvg,\
++verllvh,vfeneb,wcdgb,vfenef,vfeneh,vchhs,vctzb,vctzf,vctzg,vctzh,vlcb,aeb,\
++vlcf,vlcg,vlch,vfmsdb,vgfmab,ltebr,vgfmaf,vgfmag,vgfmah,vmaeh,vsb,vsf,vsg,\
++vsh,vsl,vsq,lxebr,cdtr,fiebr,vupllb,vupllf,vupllh,vmrhb,madbr,vtm,vmrhf,\
++vmrhg,vmrhh,axtr,fiebra,vleb,cxtr,vlef,vleg,vleh,vpkf,vpkg,vpkh,vmlob,vmlof,\
++vmloh,lxdb,ldeb,mdtr,vceqfs,adb,wflndb,lxeb,vn,vo,vchlb,vx,mxtr,vchlf,vchlg,\
++vchlh,vfcedbs,vfcedb,vceqgs,cxbr,msdbr,vcdgb,debr,vceqhs,meeb,lcxbr,vavglb,\
++vavglf,vavglg,vavglh,wfcedbs,vmrlb,vmrlf,vmrlg,vmrlh,wfchedbs,vmxb,tcdb,\
++vmahh,vsrlb,wcgdb,lcdbr,vistrbs,vrepb,wfmdb,vrepf,vrepg,vreph,ler,wcdlgb,\
++ley,vistrb,vistrf,vistrh,tceb,wfsqdb,sqeb,vsumqf,vsumqg,vesrlb,vfeezbs,\
++maebr,vesrlf,vesrlg,vesrlh,vmeb,vmef,vmeh,meebr,vflcdb,wfmadb,vperm,sxtr,\
++vclzf,vgm,vgmb,vgmf,vgmg,vgmh,tdcxt,vzero,msebr,veslb,veslf,veslg,vfenezb,\
++vfenezf,vfenezh,vistrfs,vchf,vchg,vchh,vmhb,vmhf,vmhh,cdb,veslvb,ledbr,\
++veslvf,veslvg,veslvh,wclgdb,vfmdb,vmnlb,vmnlf,vmnlg,vmnlh,vclzb,vfeezfs,\
++vclzg,vclzh,mdb,vmxlb,vmxlf,vmxlg,vmxlh,ltdtr,vsbcbiq,ceb,wfddb,sebr,vistrhs,\
++lxdtr,lcebr,vab,vaf,vag,vah,ltxtr,vlpf,vlpg,vsegb,vaq,vsegf,vsegh,wfchdbs,\
++sdtr,cdbr,vfeezhs,le,wldeb,vfmadb,vchlbs,vacccq,vmaleb,vsel,vmalef,vmaleh,\
++vflndb,mdbr,vmlb,wflpdb,ldetr,vpksfs,vpksf,vpksg,vpksh,sqdb,mxbr,sqdbr,\
++vmaeb,veslh,vmaef,vpklsf,vpklsg,vpklsh,verllb,vchb,ddtr,verllf,verllg,verllh,\
++wfsdb,maeb,vclgdb,vftcidb,vpksgs,vmxf,vmxg,vmxh,fidbra,vmnb,vmnf,vmng,vfchedbs,\
++lnebr,vfidb,dxtr,ddb,msdb,vmalhb,vfddb,vmalhf,vmalhh,vpkshs,vfsdb,sqxbr,\
++vmalhw,ltdbr,vmob,vmof,vmoh,deb,vchlfs,mseb,vcdlgb,vlpb,wfmsdb,vlph,vmahb,\
++vldeb,vmahf,vgfmb,fidbr,vfsqdb,aebr,wledb,vchlgs,vesravb,vfchdbs,cebr,vesravf,\
++vesravg,vesravh,vcgdb,fixbra,vrepib,vrepif,vrepig,vrepih,tdcdt,vchlhs,vceqb,\
++vscbib,vceqf,vceqg,vscbif,vscbig,vscbih,vmlhw,vscbiq,vuphb,vuphf,vuphh,\
++vfchedb,tdcet,vslb,vpklsfs,adbr,sqebr,vfchdb,fixbr,vpklsgs,vsldb,vmleb,\
++vmlef,vmleh,cpsdr,vmalb,vmalf,vavgb,vmlf,vavgf,vavgg,vavgh,vgfmf,vgfmg,\
++vgfmh,fidtr,vpklshs,lndbr,vno,lpdbr,vacq,vledb,vchbs,vfeeb,vfeef,vfeeh,\
++fixtr,vaccb,wfadb,vaccf,vaccg,vacch,vnot,vmalob,vaccq,vmalof,vmaloh,lpxbr,\
++ledtr,vuplb,vuplf,axbr,lxdbr,ltxbr,vpopct,vpdi,vmlhb,vmlhf,vmlhh,sdbr,vnc,\
++vsumb,vsrab,vsumh,vmaob,vmaof,vmaoh,vesrlvb,vesrlvf,vesrlvg,vesrlvh,tcxb,\
++vceqbs,vceqh,lnxbr,sxbr,vesrab,wflcdb,vesraf,vesrag,vesrah,vflpdb,vmnh,\
++vsbiq,adtr,vsra,vsrl,vuplhb,sdb,vuplhf,vuplhh,vsumgf,vsumgh,ldebr,vuplhw,\
++vchfs,madb,ddbr") (const_int 1)]
++        (const_int 0)))
++
++(define_insn_reservation "z13_0" 0
++  (and (eq_attr "cpu" "z13")
++       (eq_attr "mnemonic" "s,lcgr,x,nop,oiy,vlbb,ppa,ng,sgrk,vstl,aghik,\
++mvc,ipm,llgc,mvi,stocg,rll,jg,srlg,cghsi,clgit,srlk,alrk,sg,sh,sl,st,sy,\
++vst,ark,xgr,agsi,tm,nrk,shy,llhr,agf,alcr,slgfr,sr,clgrt,llc,laa,lder,sgf,\
++lan,llhrl,llilf,llilh,ag,llill,lay,al,n,laxg,ar,ahi,sgr,ntstg,ay,stcy,vl,\
++nopr,ngrk,lbr,br,stdy,ork,ldgr,lcr,cg,ch,llghrl,lgfrl,cl,stoc,cr,agfr,stgrl,\
++cy,alfi,xg,cgfi,xi,vlrepf,vlrepg,vlreph,clfhsi,cgfr,xr,slb,mghi,clfi,slg,\
++lde,clhhsi,agfi,clfit,sly,ldr,ldy,nihf,nihh,algfi,nihl,algf,algfr,algr,\
++clgf,clgr,clgt,aghi,alc,alg,locg,alr,locr,cghi,aly,alghsik,slbr,clgfrl,\
++mhy,cit,nr,ny,xiy,sthy,cly,rllg,cgit,lgb,lgf,clgrl,lgh,lrvgr,cliy,cgrl,\
++lgr,slrk,clrt,icy,laog,og,agr,mvhi,lhrl,or,lhr,vlvgp,lhy,nilf,oy,nilh,tabort,\
++nill,lcdfr,mviy,tmhh,tmhl,sthrl,ltgf,ltgr,srk,clghrl,ahy,vstef,vsteg,ah,\
++vlgvb,llgcr,tmh,tml,clmy,slr,cfi,stc,std,ste,stg,sth,l,locgr,llh,slbg,sty,\
++tmlh,la,lb,ld,mvghi,lg,lh,risbgn,lrvg,lr,asi,lt,ahik,ly,lrvr,vlrepb,vllezb,\
++cgf,cgh,vllezf,vllezg,vllezh,cgr,clhrl,lzdr,tmll,mh,vlvgb,lrv,vlvgf,xgrk,\
++vlvgg,llgfr,vlvgh,slfi,chi,chy,mhi,lzer,alhsik,ni,ltgfr,loc,icm,oc,oi,cgfrl,\
++agrk,lgat,oilh,llghr,lghrl,oill,xihf,lpgr,cgrt,clrl,sgfr,lpr,lgbr,strl,\
++algrk,alsi,srak,brcl,slgf,xc,a,c,slgr,j,o,algsi,icmh,srag,iilf,ogrk,clc,\
++clg,icmy,cli,clm,clr,clt,slgrk,lrl,lao,risbg,mvhhi,lat,etnd,lax,iihf,sra,\
++alcgr,clghsi,ear,nc,lgrl,stey,ngr,xilf,laag,oihf,oihh,oihl,ltg,ltr,niy,\
++lgfi,sfpc,lgfr,slgfi,llcr,llgf,llgfrl,llgh,slbgr,llgt,chrl,lgdr,pfpo,lang,\
++basr,lcbb,sllg,sllk,lghi,vll,lghr,vlgvf,vlgvg,vlgvh,vlr,chsi,lngr,cghrl,\
++srl,sar,lhi,oilf,crl,crt,afi,xrk,llgtr,llihf,llihh,llihl,xy,clgfi,clgfr,\
++ogr,popcnt,alcg,lndfr,larl,sll,tmy,ic,lpdfr,tend,lnr,bcr_flush")) "nothing")
++
++(define_insn_reservation "z13_1" 1
++  (and (eq_attr "cpu" "z13")
++       (eq_attr "mnemonic" "celgbr,vscef,vsceg,msy,msgf,cxlftr,cdsg,cdsy,\
++exrl,clfebr,cefbr,chhsi,stam,vgef,vgeg,cdlftr,lam,mfy,lcgfr,cfdbr,dsgf,\
++cgdbr,lzxr,lmg,cfxbr,rnsbg,cxlgtr,mr,dl,cxftr,sldl,cgdtr,cegbr,rxsbg,ex,\
++cgxtr,clfxtr,mlgr,cdlgtr,brasl,dsg,efpc,cfebr,tbeginc,celfbr,clgxbr,vsteb,\
++vsteh,cxlfbr,clfdtr,cxgtr,stmg,stmy,stm,lm,cds,cdfbr,ml,ms,lngfr,clgebr,\
++stpq,lmy,cs,lpgfr,cdlgbr,lpq,cxfbr,cxlgbr,cdgtr,d,m,mlg,mlr,cgxbr,cdftr,\
++msgr,rosbg,clgdbr,cdgbr,srda,bras,srdl,tbegin,clfdbr,cdlfbr,cxgbr,cgebr,\
++dlg,clfxbr,lxr,csy,msgfi,msgfr,msg,flogr,msr,csg,msfi,clgdtr,clgxtr")) "nothing")
++
++(define_insn_reservation "z13_2" 2
++  (and (eq_attr "cpu" "z13")
++       (eq_attr "mnemonic" "seb,vcksm,vfadb,vleib,vchgs,vleif,vleig,vleih,\
++vgbm,verimb,vone,verimf,verimg,verimh,verllvb,lpebr,verllvf,verllvg,verllvh,\
++vfeneb,wcdgb,vfenef,vfeneh,vchhs,vctzb,vctzf,vctzg,vctzh,vlcb,aeb,vlcf,\
++vlcg,vlch,vfmsdb,vgfmab,ltebr,vgfmaf,vgfmag,vgfmah,vmaeh,vsb,vsf,vsg,vsh,\
++vsl,vsq,lxebr,cdtr,fiebr,vupllb,vupllf,vupllh,vmrhb,madbr,vtm,vmrhf,vmrhg,\
++vmrhh,axtr,fiebra,vleb,cxtr,vlef,vleg,vleh,vpkf,vpkg,vpkh,vmlob,vmlof,vmloh,\
++lxdb,ldeb,vceqfs,adb,wflndb,lxeb,vn,vo,vchlb,vx,vchlf,vchlg,vchlh,vfcedbs,\
++vfcedb,vceqgs,cxbr,msdbr,vcdgb,vceqhs,meeb,lcxbr,vavglb,vavglf,vavglg,vavglh,\
++wfcedbs,vmrlb,vmrlf,vmrlg,vmrlh,wfchedbs,vmxb,tcdb,vmahh,vsrlb,wcgdb,lcdbr,\
++vistrbs,vrepb,wfmdb,vrepf,vrepg,vreph,ler,wcdlgb,ley,vistrb,vistrf,vistrh,\
++tceb,vsumqf,vsumqg,vesrlb,vfeezbs,maebr,vesrlf,vesrlg,vesrlh,vmeb,vmef,\
++vmeh,meebr,vflcdb,wfmadb,vperm,sxtr,vclzf,vgm,vgmb,vgmf,vgmg,vgmh,tdcxt,\
++vzero,msebr,veslb,veslf,veslg,vfenezb,vfenezf,vfenezh,vistrfs,vchf,vchg,\
++vchh,vmhb,vmhf,vmhh,cdb,veslvb,ledbr,veslvf,veslvg,veslvh,wclgdb,vfmdb,\
++vmnlb,vmnlf,vmnlg,vmnlh,vclzb,vfeezfs,vclzg,vclzh,mdb,vmxlb,vmxlf,vmxlg,\
++vmxlh,ltdtr,vsbcbiq,ceb,sebr,vistrhs,lxdtr,lcebr,vab,vaf,vag,vah,ltxtr,\
++vlpf,vlpg,vsegb,vaq,vsegf,vsegh,wfchdbs,sdtr,cdbr,vfeezhs,le,wldeb,vfmadb,\
++vchlbs,vacccq,vmaleb,vsel,vmalef,vmaleh,vflndb,mdbr,vmlb,wflpdb,ldetr,vpksfs,\
++vpksf,vpksg,vpksh,vmaeb,veslh,vmaef,vpklsf,vpklsg,vpklsh,verllb,vchb,verllf,\
++verllg,verllh,wfsdb,maeb,vclgdb,vftcidb,vpksgs,vmxf,vmxg,vmxh,fidbra,vmnb,\
++vmnf,vmng,vfchedbs,lnebr,vfidb,msdb,vmalhb,vmalhf,vmalhh,vpkshs,vfsdb,vmalhw,\
++ltdbr,vmob,vmof,vmoh,vchlfs,mseb,vcdlgb,vlpb,wfmsdb,vlph,vmahb,vldeb,vmahf,\
++vgfmb,fidbr,aebr,wledb,vchlgs,vesravb,vfchdbs,cebr,vesravf,vesravg,vesravh,\
++vcgdb,fixbra,vrepib,vrepif,vrepig,vrepih,tdcdt,vchlhs,vceqb,vscbib,vceqf,\
++vceqg,vscbif,vscbig,vscbih,vmlhw,vscbiq,vuphb,vuphf,vuphh,vfchedb,tdcet,\
++vslb,vpklsfs,adbr,vfchdb,fixbr,vpklsgs,vsldb,vmleb,vmlef,vmleh,cpsdr,vmalb,\
++vmalf,vavgb,vmlf,vavgf,vavgg,vavgh,vgfmf,vgfmg,vgfmh,fidtr,vpklshs,lndbr,\
++vno,lpdbr,vacq,vledb,vchbs,vfeeb,vfeef,vfeeh,fixtr,vaccb,wfadb,vaccf,vaccg,\
++vacch,vnot,vmalob,vaccq,vmalof,vmaloh,lpxbr,vuplb,vuplf,axbr,lxdbr,ltxbr,\
++vpopct,vpdi,vmlhb,vmlhf,vmlhh,sdbr,vnc,vsumb,vsrab,vsumh,vmaob,vmaof,vmaoh,\
++vesrlvb,vesrlvf,vesrlvg,vesrlvh,tcxb,vceqbs,vceqh,lnxbr,sxbr,vesrab,wflcdb,\
++vesraf,vesrag,vesrah,vflpdb,vmnh,vsbiq,adtr,vsra,vsrl,vuplhb,sdb,vuplhf,\
++vuplhh,vsumgf,vsumgh,ldebr,vuplhw,vchfs,madb")) "nothing")
++
++(define_insn_reservation "z13_3" 3
++  (and (eq_attr "cpu" "z13")
++       (eq_attr "mnemonic" "ledtr")) "nothing")
++
++(define_insn_reservation "z13_4" 4
++  (and (eq_attr "cpu" "z13")
++       (eq_attr "mnemonic" "dr,mxbr,dlr")) "nothing")
++
++(define_insn_reservation "z13_6" 6
++  (and (eq_attr "cpu" "z13")
++       (eq_attr "mnemonic" "debr,sqeb,deb,sqebr")) "nothing")
++
++(define_insn_reservation "z13_7" 7
++  (and (eq_attr "cpu" "z13")
++       (eq_attr "mnemonic" "mdtr")) "nothing")
++
++(define_insn_reservation "z13_8" 8
++  (and (eq_attr "cpu" "z13")
++       (eq_attr "mnemonic" "wfddb,ddb,vfddb,ddbr")) "nothing")
++
++(define_insn_reservation "z13_9" 9
++  (and (eq_attr "cpu" "z13")
++       (eq_attr "mnemonic" "dsgr,wfsqdb,dsgfr,sqdb,sqdbr,vfsqdb")) "nothing")
++
++(define_insn_reservation "z13_13" 13
++  (and (eq_attr "cpu" "z13")
++       (eq_attr "mnemonic" "mxtr,ddtr")) "nothing")
++
++(define_insn_reservation "z13_16" 16
++  (and (eq_attr "cpu" "z13")
++       (eq_attr "mnemonic" "sqxbr")) "nothing")
++
++(define_insn_reservation "z13_17" 17
++  (and (eq_attr "cpu" "z13")
++       (eq_attr "mnemonic" "dxtr")) "nothing")
++
++(define_insn_reservation "z13_20" 20
++  (and (eq_attr "cpu" "z13")
++       (eq_attr "mnemonic" "dxbr,dlgr")) "nothing")
++
+--- gcc/config/s390/constraints.md	2013-01-21 16:11:50.000000000 +0100
++++ gcc/config/s390/constraints.md	2016-05-11 18:40:20.880008612 +0200
+@@ -29,7 +29,15 @@
+ ;;    c -- Condition code register 33.
+ ;;    d -- Any register from 0 to 15.
+ ;;    f -- Floating point registers.
++;;    j -- Multiple letter constraint for constant scalar and vector values
++;;         j00: constant zero scalar or vector
++;;         jm1: constant scalar or vector with all bits set
++;;         jxx: contiguous bitmask of 0 or 1 in all vector elements
++;;         jyy: constant consisting of byte chunks being either 0 or 0xff
++;;         jKK: constant vector with all elements having the same value and
++;;              matching K constraint
+ ;;    t -- Access registers 36 and 37.
++;;    v -- Vector registers v0-v31.
+ ;;    C -- A signed 8-bit constant (-128..127)
+ ;;    D -- An unsigned 16-bit constant (0..65535)
+ ;;    G -- Const double zero operand
+@@ -109,6 +117,11 @@
+    Access registers 36 and 37")
+ 
+ 
++(define_register_constraint "v"
++  "VEC_REGS"
++  "Vector registers v0-v31")
++
++
+ ;;
+ ;;  General constraints for constants.
+ ;;
+@@ -374,6 +387,33 @@
+        (match_test "s390_O_constraint_str ('n', ival)")))
+ 
+ 
++;;
++;; Vector constraints follow.
++;;
++
++(define_constraint "j00"
++  "Zero scalar or vector constant"
++  (match_test "op == CONST0_RTX (GET_MODE (op))"))
++
++(define_constraint "jm1"
++  "All one bit scalar or vector constant"
++  (match_test "op == CONSTM1_RTX (GET_MODE (op))"))
++
++(define_constraint "jxx"
++  "@internal"
++  (and (match_code "const_vector")
++       (match_test "s390_contiguous_bitmask_vector_p (op, NULL, NULL)")))
++
++(define_constraint "jyy"
++  "@internal"
++  (and (match_code "const_vector")
++       (match_test "s390_bytemask_vector_p (op, NULL)")))
++
++(define_constraint "jKK"
++  "@internal"
++  (and (and (match_code "const_vector")
++	    (match_test "s390_const_vec_duplicate_p (op)"))
++       (match_test "satisfies_constraint_K (XVECEXP (op, 0, 0))")))
+ 
+ 
+ ;;
+--- gcc/config/s390/predicates.md	2013-08-14 13:55:12.000000000 +0200
++++ gcc/config/s390/predicates.md	2016-05-11 18:17:42.508662564 +0200
+@@ -24,16 +24,26 @@
+ 
+ ;; operands --------------------------------------------------------------
+ 
+-;; Return true if OP a (const_int 0) operand.
+-
++;; Return true if OP a const 0 operand (int/float/vector).
+ (define_predicate "const0_operand"
+-  (and (match_code "const_int, const_double")
++  (and (match_code "const_int,const_double,const_vector")
+        (match_test "op == CONST0_RTX (mode)")))
+ 
++;; Return true if OP an all ones operand (int/vector).
++(define_predicate "all_ones_operand"
++  (and (match_code "const_int, const_double, const_vector")
++       (match_test "INTEGRAL_MODE_P (GET_MODE (op))")
++       (match_test "op == CONSTM1_RTX (mode)")))
++
++;; Return true if OP is a 4 bit mask operand
++(define_predicate "const_mask_operand"
++  (and (match_code "const_int")
++       (match_test "UINTVAL (op) < 16")))
++
+ ;; Return true if OP is constant.
+ 
+ (define_special_predicate "consttable_operand"
+-  (and (match_code "symbol_ref, label_ref, const, const_int, const_double")
++  (and (match_code "symbol_ref, label_ref, const, const_int, const_double, const_vector")
+        (match_test "CONSTANT_P (op)")))
+ 
+ ;; Return true if OP is a valid S-type operand.
+--- gcc/config/s390/s390-builtins.def	1970-01-01 01:00:00.000000000 +0100
++++ gcc/config/s390/s390-builtins.def	2016-05-11 17:53:57.000000000 +0200
+@@ -0,0 +1,2488 @@
++/* Builtin definitions for IBM S/390 and zSeries
++   Copyright (C) 2015 Free Software Foundation, Inc.
++
++   Contributed by Andreas Krebbel (Andreas.Krebbel@de.ibm.com).
++
++   This file is part of GCC.
++
++   GCC is free software; you can redistribute it and/or modify it
++   under the terms of the GNU General Public License as published by
++   the Free Software Foundation; either version 3, or (at your option)
++   any later version.
++
++   GCC is distributed in the hope that it will be useful, but WITHOUT
++   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
++   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
++   License for more details.
++
++   You should have received a copy of the GNU General Public License
++   along with GCC; see the file COPYING3.  If not see
++   <http://www.gnu.org/licenses/>.  */
++
++#undef O_U1
++#undef O_U2
++#undef O_U3
++#undef O_U4
++#undef O_U5
++#undef O_U8
++#undef O_U12
++#undef O_U16
++#undef O_U32
++
++#undef O_S2
++#undef O_S3
++#undef O_S4
++#undef O_S5
++#undef O_S8
++#undef O_S12
++#undef O_S16
++#undef O_S32
++#undef O_ELEM
++#undef O_LIT
++
++#undef O1_U1
++#undef O2_U1
++#undef O3_U1
++#undef O4_U1
++
++#undef O1_U2
++#undef O2_U2
++#undef O3_U2
++#undef O4_U2
++
++#undef O1_U3
++#undef O2_U3
++#undef O3_U3
++#undef O4_U3
++
++#undef O1_U4
++#undef O2_U4
++#undef O3_U4
++#undef O4_U4
++
++#undef O1_U5
++#undef O2_U5
++#undef O3_U5
++#undef O4_U5
++
++#undef O1_U8
++#undef O2_U8
++#undef O3_U8
++#undef O4_U8
++
++#undef O1_U12
++#undef O2_U12
++#undef O3_U12
++#undef O4_U12
++
++#undef O1_U16
++#undef O2_U16
++#undef O3_U16
++#undef O4_U16
++
++#undef O1_U32
++#undef O2_U32
++#undef O3_U32
++#undef O4_U32
++
++#undef O1_S2
++#undef O2_S2
++#undef O3_S2
++#undef O4_S2
++
++#undef O1_S3
++#undef O2_S3
++#undef O3_S3
++#undef O4_S3
++
++#undef O1_S4
++#undef O2_S4
++#undef O3_S4
++#undef O4_S4
++
++#undef O1_S5
++#undef O2_S5
++#undef O3_S5
++#undef O4_S5
++
++#undef O1_S8
++#undef O2_S8
++#undef O3_S8
++#undef O4_S8
++
++#undef O1_S12
++#undef O2_S12
++#undef O3_S12
++#undef O4_S12
++
++#undef O1_S16
++#undef O2_S16
++#undef O3_S16
++#undef O4_S16
++
++#undef O1_S32
++#undef O2_S32
++#undef O3_S32
++#undef O4_S32
++
++#undef O1_ELEM
++#undef O2_ELEM
++#undef O3_ELEM
++#undef O4_ELEM
++
++#undef O1_LIT
++#undef O2_LIT
++#undef O3_LIT
++#undef O4_LIT
++
++#undef O_SHIFT
++#undef O_IMM_P
++#undef O_UIMM_P
++#undef O_SIMM_P
++
++#define O_U1   1 /* unsigned  1 bit literal */
++#define O_U2   2 /* unsigned  2 bit literal */
++#define O_U3   3 /* unsigned  3 bit literal */
++#define O_U4   4 /* unsigned  4 bit literal */
++#define O_U5   5 /* unsigned  5 bit literal */
++#define O_U8   6 /* unsigned  8 bit literal */
++#define O_U12  7 /* unsigned 16 bit literal */
++#define O_U16  8 /* unsigned 16 bit literal */
++#define O_U32  9 /* unsigned 32 bit literal */
++
++#define O_S2  10 /* signed  2 bit literal */
++#define O_S3  11 /* signed  3 bit literal */
++#define O_S4  12 /* signed  4 bit literal */
++#define O_S5  13 /* signed  5 bit literal */
++#define O_S8  14 /* signed  8 bit literal */
++#define O_S12 15 /* signed 12 bit literal */
++#define O_S16 16 /* signed 16 bit literal */
++#define O_S32 17 /* signed 32 bit literal */
++
++#define O_ELEM  18 /* Element selector requiring modulo arithmetic. */
++#define O_LIT   19 /* Operand must be a literal fitting the target type.  */
++
++#define O_SHIFT 5
++
++#define O_UIMM_P(X) ((X) >= O_U1 && (X) <= O_U32)
++#define O_SIMM_P(X) ((X) >= O_S2 && (X) <= O_S32)
++#define O_IMM_P(X) ((X) == O_LIT || ((X) >= O_U1 && (X) <= O_S32))
++
++#define O1_U1 O_U1
++#define O2_U1 (O_U1 << O_SHIFT)
++#define O3_U1 (O_U1 << (2 * O_SHIFT))
++#define O4_U1 (O_U1 << (3 * O_SHIFT))
++
++#define O1_U2 O_U2
++#define O2_U2 (O_U2 << O_SHIFT)
++#define O3_U2 (O_U2 << (2 * O_SHIFT))
++#define O4_U2 (O_U2 << (3 * O_SHIFT))
++
++#define O1_U3 O_U3
++#define O2_U3 (O_U3 << O_SHIFT)
++#define O3_U3 (O_U3 << (2 * O_SHIFT))
++#define O4_U3 (O_U3 << (3 * O_SHIFT))
++
++#define O1_U4 O_U4
++#define O2_U4 (O_U4 << O_SHIFT)
++#define O3_U4 (O_U4 << (2 * O_SHIFT))
++#define O4_U4 (O_U4 << (3 * O_SHIFT))
++
++#define O1_U5 O_U5
++#define O2_U5 (O_U5 << O_SHIFT)
++#define O3_U5 (O_U5 << (2 * O_SHIFT))
++#define O4_U5 (O_U5 << (3 * O_SHIFT))
++
++#define O1_U8 O_U8
++#define O2_U8 (O_U8 << O_SHIFT)
++#define O3_U8 (O_U8 << (2 * O_SHIFT))
++#define O4_U8 (O_U8 << (3 * O_SHIFT))
++
++#define O1_U12 O_U12
++#define O2_U12 (O_U12 << O_SHIFT)
++#define O3_U12 (O_U12 << (2 * O_SHIFT))
++#define O4_U12 (O_U12 << (3 * O_SHIFT))
++
++#define O1_U16 O_U16
++#define O2_U16 (O_U16 << O_SHIFT)
++#define O3_U16 (O_U16 << (2 * O_SHIFT))
++#define O4_U16 (O_U16 << (3 * O_SHIFT))
++
++#define O1_U32 O_U32
++#define O2_U32 (O_U32 << O_SHIFT)
++#define O3_U32 (O_U32 << (2 * O_SHIFT))
++#define O4_U32 (O_U32 << (3 * O_SHIFT))
++
++
++#define O1_S2 O_S2
++#define O2_S2 (O_S2 << O_SHIFT)
++#define O3_S2 (O_S2 << (2 * O_SHIFT))
++#define O4_S2 (O_S2 << (3 * O_SHIFT))
++
++#define O1_S3 O_S3
++#define O2_S3 (O_S3 << O_SHIFT)
++#define O3_S3 (O_S3 << (2 * O_SHIFT))
++#define O4_S3 (O_S3 << (3 * O_SHIFT))
++
++#define O1_S4 O_S4
++#define O2_S4 (O_S4 << O_SHIFT)
++#define O3_S4 (O_S4 << (2 * O_SHIFT))
++#define O4_S4 (O_S4 << (3 * O_SHIFT))
++
++#define O1_S5 O_S5
++#define O2_S5 (O_S5 << O_SHIFT)
++#define O3_S5 (O_S5 << (2 * O_SHIFT))
++#define O4_S5 (O_S5 << (3 * O_SHIFT))
++
++#define O1_S8 O_S8
++#define O2_S8 (O_S8 << O_SHIFT)
++#define O3_S8 (O_S8 << (2 * O_SHIFT))
++#define O4_S8 (O_S8 << (3 * O_SHIFT))
++
++#define O1_S12 O_S12
++#define O2_S12 (O_S12 << O_SHIFT)
++#define O3_S12 (O_S12 << (2 * O_SHIFT))
++#define O4_S12 (O_S12 << (3 * O_SHIFT))
++
++#define O1_S16 O_S16
++#define O2_S16 (O_S16 << O_SHIFT)
++#define O3_S16 (O_S16 << (2 * O_SHIFT))
++#define O4_S16 (O_S16 << (3 * O_SHIFT))
++
++#define O1_S32 O_S32
++#define O2_S32 (O_S32 << O_SHIFT)
++#define O3_S32 (O_S32 << (2 * O_SHIFT))
++#define O4_S32 (O_S32 << (3 * O_SHIFT))
++
++#define O1_ELEM O_ELEM
++#define O2_ELEM (O_ELEM << O_SHIFT)
++#define O3_ELEM (O_ELEM << (2 * O_SHIFT))
++#define O4_ELEM (O_ELEM << (3 * O_SHIFT))
++
++#define O1_LIT O_LIT
++#define O2_LIT (O_LIT << O_SHIFT)
++#define O3_LIT (O_LIT << (2 * O_SHIFT))
++#define O4_LIT (O_LIT << (3 * O_SHIFT))
++
++
++/* Builtin flags. Flags applying to the whole builtin definition.  */
++
++#undef B_INT
++#undef B_HTM
++#undef B_VX
++
++#undef BFLAGS_MASK_INIT
++#define BFLAGS_MASK_INIT (B_INT)
++
++#define B_INT   (1 << 0)  /* Internal builtins.  This builtin cannot be used in user programs.  */
++#define B_HTM   (1 << 1)  /* Builtins requiring the transactional execution facility.  */
++#define B_VX    (1 << 2)  /* Builtins requiring the z13 vector extensions.  */
++
++
++/* B_DEF defines a standard (not overloaded) builtin
++   B_DEF (<builtin name>, <RTL expander name>, <function attributes>, <builtin flags>, <operand flags, see above>, <fntype>)
++
++   OB_DEF defines an overloaded builtin
++   OB_DEF (<builtin name>, <start variant>, <end variant>, <builtin flags>, <fntype>)
++
++   OB_DEF_VAR defines a variant of an overloaded builtin
++   OB_DEF_VAR (<variant name>, <standard builtin name>, <flags>, <fntype>) */
++
++
++B_DEF      (tbeginc,                    tbeginc,            0,                  B_HTM,              0,                  BT_FN_INT)
++B_DEF      (tbegin,                     tbegin,             returns_twice_attr, B_HTM,              0,                  BT_FN_INT_VOIDPTR)
++B_DEF      (tbegin_nofloat,             tbegin_nofloat,     returns_twice_attr, B_HTM,              0,                  BT_FN_INT_VOIDPTR)
++B_DEF      (tbegin_retry,               tbegin_retry,       returns_twice_attr, B_HTM,              0,                  BT_FN_INT_VOIDPTR_INT)
++B_DEF      (tbegin_retry_nofloat,       tbegin_retry_nofloat,returns_twice_attr,B_HTM,              0,                  BT_FN_INT_VOIDPTR_INT)
++B_DEF      (tend,                       tend,               0,                  B_HTM,              0,                  BT_FN_INT)
++B_DEF      (tabort,                     tabort,             noreturn_attr,      B_HTM,              0,                  BT_FN_VOID_INT)
++B_DEF      (tx_nesting_depth,           etnd,               0,                  B_HTM,              0,                  BT_FN_INT)
++B_DEF      (non_tx_store,               ntstg,              0,                  B_HTM,              0,                  BT_FN_VOID_UINT64PTR_UINT64)
++B_DEF      (tx_assist,                  tx_assist,          0,                  B_HTM,              0,                  BT_FN_VOID_INT)
++B_DEF      (s390_sfpc,                  sfpc,               0,                  0,                  0,                  BT_FN_VOID_UINT)
++B_DEF      (s390_efpc,                  efpc,               0,                  0,                  0,                  BT_FN_UINT)
++B_DEF      (s390_lcbb,                  lcbb,               0,                  B_VX,               O2_U4,              BT_FN_UINT_VOIDCONSTPTR_INT)
++
++OB_DEF     (s390_vec_step,              MAX,                MAX,                B_VX,               BT_FN_INT_INT)
++
++OB_DEF     (s390_vec_gather_element,    s390_vec_gather_element_s32,s390_vec_gather_element_dbl,B_VX,BT_FN_OV4SI_OV4SI_OUV4SI_INTCONSTPTR_UCHAR)
++OB_DEF_VAR (s390_vec_gather_element_s32,s390_vgef,          O4_U2,              BT_OV_V4SI_V4SI_UV4SI_INTCONSTPTR_UCHAR)
++OB_DEF_VAR (s390_vec_gather_element_b32,s390_vgef,          O4_U2,              BT_OV_BV4SI_BV4SI_UV4SI_UINTCONSTPTR_UCHAR)
++OB_DEF_VAR (s390_vec_gather_element_u32,s390_vgef,          O4_U2,              BT_OV_UV4SI_UV4SI_UV4SI_UINTCONSTPTR_UCHAR)
++OB_DEF_VAR (s390_vec_gather_element_s64,s390_vgeg,          O4_U1,              BT_OV_V2DI_V2DI_UV2DI_LONGLONGCONSTPTR_UCHAR)
++OB_DEF_VAR (s390_vec_gather_element_b64,s390_vgeg,          O4_U1,              BT_OV_BV2DI_BV2DI_UV2DI_ULONGLONGCONSTPTR_UCHAR)
++OB_DEF_VAR (s390_vec_gather_element_u64,s390_vgeg,          O4_U1,              BT_OV_UV2DI_UV2DI_UV2DI_ULONGLONGCONSTPTR_UCHAR)
++OB_DEF_VAR (s390_vec_gather_element_dbl,s390_vgeg,          O4_U1,              BT_OV_V2DF_V2DF_UV2DI_DBLCONSTPTR_UCHAR)
++
++B_DEF      (s390_vgef,                  vec_gather_elementv4si,0,               B_VX,               O4_U2,              BT_FN_UV4SI_UV4SI_UV4SI_UINTCONSTPTR_UCHAR)
++B_DEF      (s390_vgeg,                  vec_gather_elementv2di,0,               B_VX,               O4_U1,              BT_FN_UV2DI_UV2DI_UV2DI_ULONGLONGCONSTPTR_UCHAR)
++B_DEF      (s390_vgbm,                  vec_genbytemaskv16qi,0,                 B_VX,               O1_U16,             BT_FN_UV16QI_USHORT)
++B_DEF      (s390_vgmb,                  vec_genmaskv16qi,   0,                  B_VX,               O1_U8 | O2_U8,      BT_FN_UV16QI_UCHAR_UCHAR)
++B_DEF      (s390_vgmh,                  vec_genmaskv8hi,    0,                  B_VX,               O1_U8 | O2_U8,      BT_FN_UV8HI_UCHAR_UCHAR)
++B_DEF      (s390_vgmf,                  vec_genmaskv4si,    0,                  B_VX,               O1_U8 | O2_U8,      BT_FN_UV4SI_UCHAR_UCHAR)
++B_DEF      (s390_vgmg,                  vec_genmaskv2di,    0,                  B_VX,               O1_U8 | O2_U8,      BT_FN_UV2DI_UCHAR_UCHAR)
++
++OB_DEF     (s390_vec_xld2,              s390_vec_xld2_s8,   s390_vec_xld2_dbl,  B_VX,               BT_FN_V4SI_INT_VOIDPTR)
++OB_DEF_VAR (s390_vec_xld2_s8,           MAX,                O1_LIT,             BT_OV_V16QI_LONG_SCHARPTR)               /* vl */
++OB_DEF_VAR (s390_vec_xld2_u8,           MAX,                O1_LIT,             BT_OV_UV16QI_LONG_UCHARPTR)              /* vl */
++OB_DEF_VAR (s390_vec_xld2_s16,          MAX,                O1_LIT,             BT_OV_V8HI_LONG_SHORTPTR)                /* vl */
++OB_DEF_VAR (s390_vec_xld2_u16,          MAX,                O1_LIT,             BT_OV_UV8HI_LONG_USHORTPTR)              /* vl */
++OB_DEF_VAR (s390_vec_xld2_s32,          MAX,                O1_LIT,             BT_OV_V4SI_LONG_INTPTR)                  /* vl */
++OB_DEF_VAR (s390_vec_xld2_u32,          MAX,                O1_LIT,             BT_OV_UV4SI_LONG_UINTPTR)                /* vl */
++OB_DEF_VAR (s390_vec_xld2_s64,          MAX,                O1_LIT,             BT_OV_V2DI_LONG_LONGLONGPTR)             /* vl */
++OB_DEF_VAR (s390_vec_xld2_u64,          MAX,                O1_LIT,             BT_OV_UV2DI_LONG_ULONGLONGPTR)           /* vl */
++OB_DEF_VAR (s390_vec_xld2_dbl,          MAX,                O1_LIT,             BT_OV_V2DF_LONG_DBLPTR)                  /* vl */
++
++OB_DEF     (s390_vec_xlw4,              s390_vec_xlw4_s8,   s390_vec_xlw4_u32,  B_VX,               BT_FN_V4SI_INT_VOIDPTR)
++OB_DEF_VAR (s390_vec_xlw4_s8,           MAX,                O1_LIT,             BT_OV_V16QI_LONG_SCHARPTR)               /* vl */
++OB_DEF_VAR (s390_vec_xlw4_u8,           MAX,                O1_LIT,             BT_OV_UV16QI_LONG_UCHARPTR)              /* vl */
++OB_DEF_VAR (s390_vec_xlw4_s16,          MAX,                O1_LIT,             BT_OV_V8HI_LONG_SHORTPTR)                /* vl */
++OB_DEF_VAR (s390_vec_xlw4_u16,          MAX,                O1_LIT,             BT_OV_UV8HI_LONG_USHORTPTR)              /* vl */
++OB_DEF_VAR (s390_vec_xlw4_s32,          MAX,                O1_LIT,             BT_OV_V4SI_LONG_INTPTR)                  /* vl */
++OB_DEF_VAR (s390_vec_xlw4_u32,          MAX,                O1_LIT,             BT_OV_UV4SI_LONG_UINTPTR)                /* vl */
++
++OB_DEF     (s390_vec_splats,            s390_vec_splats_s8, s390_vec_splats_dbl,B_VX,               BT_FN_OV4SI_INT)
++OB_DEF_VAR (s390_vec_splats_s8,         s390_vlrepb,        0,                  BT_OV_V16QI_SCHAR)
++OB_DEF_VAR (s390_vec_splats_u8,         s390_vlrepb,        0,                  BT_OV_UV16QI_UCHAR)
++OB_DEF_VAR (s390_vec_splats_s16,        s390_vlreph,        0,                  BT_OV_V8HI_SHORT)
++OB_DEF_VAR (s390_vec_splats_u16,        s390_vlreph,        0,                  BT_OV_UV8HI_USHORT)
++OB_DEF_VAR (s390_vec_splats_s32,        s390_vlrepf,        0,                  BT_OV_V4SI_INT)
++OB_DEF_VAR (s390_vec_splats_u32,        s390_vlrepf,        0,                  BT_OV_UV4SI_UINT)
++OB_DEF_VAR (s390_vec_splats_s64,        s390_vlrepg,        0,                  BT_OV_V2DI_LONGLONG)
++OB_DEF_VAR (s390_vec_splats_u64,        s390_vlrepg,        0,                  BT_OV_UV2DI_ULONGLONG)
++OB_DEF_VAR (s390_vec_splats_dbl,        s390_vlrepg_dbl,    0,                  BT_OV_V2DF_DBL)                          /* vlrepg */
++
++B_DEF      (s390_vlrepb,                vec_splatsv16qi,    0,                  B_VX,               0,                  BT_FN_UV16QI_UCHAR)
++B_DEF      (s390_vlreph,                vec_splatsv8hi,     0,                  B_VX,               0,                  BT_FN_UV8HI_USHORT)
++B_DEF      (s390_vlrepf,                vec_splatsv4si,     0,                  B_VX,               0,                  BT_FN_UV4SI_UINT)
++B_DEF      (s390_vlrepg,                vec_splatsv2di,     0,                  B_VX,               0,                  BT_FN_UV2DI_ULONGLONG)
++B_DEF      (s390_vlrepg_dbl,            vec_splatsv2df,     0,                  B_VX | B_INT,       0,                  BT_FN_V2DF_DBL)
++B_DEF      (s390_vrepib,                vec_splatsv16qi,    0,                  B_VX,               O1_U8,              BT_FN_V16QI_UCHAR)
++B_DEF      (s390_vrepih,                vec_splatsv8hi,     0,                  B_VX,               O1_S16,             BT_FN_V8HI_SHORT)
++B_DEF      (s390_vrepif,                vec_splatsv4si,     0,                  B_VX,               O1_S16,             BT_FN_V4SI_SHORT)
++B_DEF      (s390_vrepig,                vec_splatsv2di,     0,                  B_VX,               O1_S16,             BT_FN_V2DI_SHORT)
++
++B_DEF      (s390_vec_splat_u8,          vec_splatsv16qi,    0,                  B_VX,               O1_U8,              BT_FN_UV16QI_UCHAR)
++B_DEF      (s390_vec_splat_s8,          vec_splatsv16qi,    0,                  B_VX,               O1_S8,              BT_FN_V16QI_SCHAR)
++B_DEF      (s390_vec_splat_u16,         vec_splatsv8hi,     0,                  B_VX,               O1_U16,             BT_FN_UV8HI_USHORT)
++B_DEF      (s390_vec_splat_s16,         vec_splatsv8hi,     0,                  B_VX,               O1_S16,             BT_FN_V8HI_SHORT)
++B_DEF      (s390_vec_splat_u32,         vec_splatsv4si,     0,                  B_VX,               O1_U16,             BT_FN_UV4SI_USHORT)
++B_DEF      (s390_vec_splat_s32,         vec_splatsv4si,     0,                  B_VX,               O1_S16,             BT_FN_V4SI_SHORT)
++B_DEF      (s390_vec_splat_u64,         vec_splatsv2di,     0,                  B_VX,               O1_U16,             BT_FN_UV2DI_USHORT)
++B_DEF      (s390_vec_splat_s64,         vec_splatsv2di,     0,                  B_VX,               O1_S16,             BT_FN_V2DI_SHORT)
++
++OB_DEF     (s390_vec_insert,            s390_vec_insert_s8, s390_vec_insert_dbl,B_VX,               BT_FN_OV4SI_INT_OV4SI_INT)
++OB_DEF_VAR (s390_vec_insert_s8,         s390_vlvgb,         O3_ELEM,            BT_OV_V16QI_SCHAR_V16QI_INT)
++OB_DEF_VAR (s390_vec_insert_u8,         s390_vlvgb,         O3_ELEM,            BT_OV_UV16QI_UCHAR_UV16QI_INT)
++OB_DEF_VAR (s390_vec_insert_b8,         s390_vlvgb,         O3_ELEM,            BT_OV_UV16QI_UCHAR_BV16QI_INT)
++OB_DEF_VAR (s390_vec_insert_s16,        s390_vlvgh,         O3_ELEM,            BT_OV_V8HI_SHORT_V8HI_INT)
++OB_DEF_VAR (s390_vec_insert_u16,        s390_vlvgh,         O3_ELEM,            BT_OV_UV8HI_USHORT_UV8HI_INT)
++OB_DEF_VAR (s390_vec_insert_b16,        s390_vlvgh,         O3_ELEM,            BT_OV_UV8HI_USHORT_BV8HI_INT)
++OB_DEF_VAR (s390_vec_insert_s32,        s390_vlvgf,         O3_ELEM,            BT_OV_V4SI_INT_V4SI_INT)
++OB_DEF_VAR (s390_vec_insert_u32,        s390_vlvgf,         O3_ELEM,            BT_OV_UV4SI_UINT_UV4SI_INT)
++OB_DEF_VAR (s390_vec_insert_b32,        s390_vlvgf,         O3_ELEM,            BT_OV_UV4SI_UINT_BV4SI_INT)
++OB_DEF_VAR (s390_vec_insert_s64,        s390_vlvgg,         O3_ELEM,            BT_OV_V2DI_LONGLONG_V2DI_INT)
++OB_DEF_VAR (s390_vec_insert_u64,        s390_vlvgg,         O3_ELEM,            BT_OV_UV2DI_ULONGLONG_UV2DI_INT)
++OB_DEF_VAR (s390_vec_insert_b64,        s390_vlvgg,         O3_ELEM,            BT_OV_UV2DI_ULONGLONG_BV2DI_INT)
++OB_DEF_VAR (s390_vec_insert_dbl,        s390_vlvgg_dbl,     O3_ELEM,            BT_OV_V2DF_DBL_V2DF_INT)
++
++B_DEF      (s390_vlvgb,                 vec_insertv16qi,    0,                  B_VX,               0,                  BT_FN_UV16QI_UV16QI_UCHAR_INT)
++B_DEF      (s390_vlvgh,                 vec_insertv8hi,     0,                  B_VX,               0,                  BT_FN_UV8HI_UV8HI_USHORT_INT)
++B_DEF      (s390_vlvgf,                 vec_insertv4si,     0,                  B_VX,               0,                  BT_FN_UV4SI_UV4SI_UINT_INT)
++B_DEF      (s390_vlvgg,                 vec_insertv2di,     0,                  B_VX,               0,                  BT_FN_UV2DI_UV2DI_ULONGLONG_INT)
++B_DEF      (s390_vlvgg_dbl,             vec_insertv2df,     0,                  B_VX | B_INT,       0,                  BT_FN_V2DF_V2DF_DBL_INT)
++
++OB_DEF     (s390_vec_promote,           s390_vec_promote_s8,s390_vec_promote_dbl,B_VX,              BT_FN_OV4SI_INT_INT)
++OB_DEF_VAR (s390_vec_promote_s8,        s390_vlvgb_noin,    O2_ELEM,            BT_OV_V16QI_SCHAR_INT)                   /* vlvgb */
++OB_DEF_VAR (s390_vec_promote_u8,        s390_vlvgb_noin,    O2_ELEM,            BT_OV_UV16QI_UCHAR_INT)                  /* vlvgb */
++OB_DEF_VAR (s390_vec_promote_s16,       s390_vlvgh_noin,    O2_ELEM,            BT_OV_V8HI_SHORT_INT)                    /* vlvgh */
++OB_DEF_VAR (s390_vec_promote_u16,       s390_vlvgh_noin,    O2_ELEM,            BT_OV_UV8HI_USHORT_INT)                  /* vlvgh */
++OB_DEF_VAR (s390_vec_promote_s32,       s390_vlvgf_noin,    O2_ELEM,            BT_OV_V4SI_INT_INT)                      /* vlvgf */
++OB_DEF_VAR (s390_vec_promote_u32,       s390_vlvgf_noin,    O2_ELEM,            BT_OV_UV4SI_UINT_INT)                    /* vlvgf */
++OB_DEF_VAR (s390_vec_promote_s64,       s390_vlvgg_noin,    O2_ELEM,            BT_OV_V2DI_LONGLONG_INT)                 /* vlvgg */
++OB_DEF_VAR (s390_vec_promote_u64,       s390_vlvgg_noin,    O2_ELEM,            BT_OV_UV2DI_ULONGLONG_INT)               /* vlvgg */
++OB_DEF_VAR (s390_vec_promote_dbl,       s390_vlvgg_dbl_noin,O2_ELEM,            BT_OV_V2DF_DBL_INT)                      /* vlvgg */
++
++B_DEF      (s390_vlvgb_noin,            vec_promotev16qi,   0,                  B_VX | B_INT,       0,                  BT_FN_UV16QI_UCHAR_INT)
++B_DEF      (s390_vlvgh_noin,            vec_promotev8hi,    0,                  B_VX | B_INT,       0,                  BT_FN_UV8HI_USHORT_INT)
++B_DEF      (s390_vlvgf_noin,            vec_promotev4si,    0,                  B_VX | B_INT,       0,                  BT_FN_UV4SI_UINT_INT)
++B_DEF      (s390_vlvgg_noin,            vec_promotev2di,    0,                  B_VX | B_INT,       0,                  BT_FN_UV2DI_ULONGLONG_INT)
++B_DEF      (s390_vlvgg_dbl_noin,        vec_promotev2df,    0,                  B_VX | B_INT,       0,                  BT_FN_V2DF_DBL_INT)
++
++OB_DEF     (s390_vec_extract,           s390_vec_extract_s8,s390_vec_extract_dbl,B_VX,              BT_FN_INT_OV4SI_INT)
++OB_DEF_VAR (s390_vec_extract_s8,        s390_vlgvb,         O2_ELEM,            BT_OV_SCHAR_V16QI_INT)
++OB_DEF_VAR (s390_vec_extract_u8,        s390_vlgvb,         O2_ELEM,            BT_OV_UCHAR_UV16QI_INT)
++OB_DEF_VAR (s390_vec_extract_b8,        s390_vlgvb,         O2_ELEM,            BT_OV_UCHAR_BV16QI_INT)
++OB_DEF_VAR (s390_vec_extract_s16,       s390_vlgvh,         O2_ELEM,            BT_OV_SHORT_V8HI_INT)
++OB_DEF_VAR (s390_vec_extract_u16,       s390_vlgvh,         O2_ELEM,            BT_OV_USHORT_UV8HI_INT)
++OB_DEF_VAR (s390_vec_extract_b16,       s390_vlgvh,         O2_ELEM,            BT_OV_USHORT_BV8HI_INT)
++OB_DEF_VAR (s390_vec_extract_s32,       s390_vlgvf,         O2_ELEM,            BT_OV_INT_V4SI_INT)
++OB_DEF_VAR (s390_vec_extract_u32,       s390_vlgvf,         O2_ELEM,            BT_OV_UINT_UV4SI_INT)
++OB_DEF_VAR (s390_vec_extract_b32,       s390_vlgvf,         O2_ELEM,            BT_OV_UINT_BV4SI_INT)
++OB_DEF_VAR (s390_vec_extract_s64,       s390_vlgvg,         O2_ELEM,            BT_OV_LONGLONG_V2DI_INT)
++OB_DEF_VAR (s390_vec_extract_u64,       s390_vlgvg,         O2_ELEM,            BT_OV_ULONGLONG_UV2DI_INT)
++OB_DEF_VAR (s390_vec_extract_b64,       s390_vlgvg,         O2_ELEM,            BT_OV_ULONGLONG_BV2DI_INT)
++OB_DEF_VAR (s390_vec_extract_dbl,       s390_vlgvg_dbl,     O2_ELEM,            BT_OV_DBL_V2DF_INT)                      /* vlgvg */
++
++B_DEF      (s390_vlgvb,                 vec_extractv16qi,   0,                  B_VX,               0,                  BT_FN_UCHAR_UV16QI_INT)
++B_DEF      (s390_vlgvh,                 vec_extractv8hi,    0,                  B_VX,               0,                  BT_FN_USHORT_UV8HI_INT)
++B_DEF      (s390_vlgvf,                 vec_extractv4si,    0,                  B_VX,               0,                  BT_FN_UINT_UV4SI_INT)
++B_DEF      (s390_vlgvg,                 vec_extractv2di,    0,                  B_VX,               0,                  BT_FN_ULONGLONG_UV2DI_INT)
++B_DEF      (s390_vlgvg_dbl,             vec_extractv2df,    0,                  B_VX | B_INT,       0,                  BT_FN_DBL_V2DF_INT)
++
++OB_DEF     (s390_vec_insert_and_zero,   s390_vec_insert_and_zero_s8,s390_vec_insert_and_zero_dbl,B_VX,BT_FN_OV4SI_INTCONSTPTR)
++OB_DEF_VAR (s390_vec_insert_and_zero_s8,s390_vllezb,        0,                  BT_OV_V16QI_SCHARCONSTPTR)
++OB_DEF_VAR (s390_vec_insert_and_zero_u8,s390_vllezb,        0,                  BT_OV_UV16QI_UCHARCONSTPTR)
++OB_DEF_VAR (s390_vec_insert_and_zero_s16,s390_vllezh,       0,                  BT_OV_V8HI_SHORTCONSTPTR)
++OB_DEF_VAR (s390_vec_insert_and_zero_u16,s390_vllezh,       0,                  BT_OV_UV8HI_USHORTCONSTPTR)
++OB_DEF_VAR (s390_vec_insert_and_zero_s32,s390_vllezf,       0,                  BT_OV_V4SI_INTCONSTPTR)
++OB_DEF_VAR (s390_vec_insert_and_zero_u32,s390_vllezf,       0,                  BT_OV_UV4SI_UINTCONSTPTR)
++OB_DEF_VAR (s390_vec_insert_and_zero_s64,s390_vllezg,       0,                  BT_OV_V2DI_LONGLONGCONSTPTR)
++OB_DEF_VAR (s390_vec_insert_and_zero_u64,s390_vllezg,       0,                  BT_OV_UV2DI_ULONGLONGCONSTPTR)
++OB_DEF_VAR (s390_vec_insert_and_zero_dbl,s390_vllezg,       0,                  BT_OV_V2DF_DBLCONSTPTR)
++
++B_DEF      (s390_vllezb,                vec_insert_and_zerov16qi,0,             B_VX,               0,                  BT_FN_UV16QI_UCHARCONSTPTR)
++B_DEF      (s390_vllezh,                vec_insert_and_zerov8hi,0,              B_VX,               0,                  BT_FN_UV8HI_USHORTCONSTPTR)
++B_DEF      (s390_vllezf,                vec_insert_and_zerov4si,0,              B_VX,               0,                  BT_FN_UV4SI_UINTCONSTPTR)
++B_DEF      (s390_vllezg,                vec_insert_and_zerov2di,0,              B_VX,               0,                  BT_FN_UV2DI_ULONGLONGCONSTPTR)
++
++OB_DEF     (s390_vec_load_bndry,        s390_vec_load_bndry_s8,s390_vec_load_bndry_dbl,B_VX,        BT_FN_OV4SI_INTCONSTPTR_INT)
++OB_DEF_VAR (s390_vec_load_bndry_s8,     s390_vlbb,          O2_U16,              BT_OV_V16QI_SCHARCONSTPTR_USHORT)
++OB_DEF_VAR (s390_vec_load_bndry_u8,     s390_vlbb,          O2_U16,              BT_OV_UV16QI_UCHARCONSTPTR_USHORT)
++OB_DEF_VAR (s390_vec_load_bndry_s16,    s390_vlbb,          O2_U16,              BT_OV_V8HI_SHORTCONSTPTR_USHORT)
++OB_DEF_VAR (s390_vec_load_bndry_u16,    s390_vlbb,          O2_U16,              BT_OV_UV8HI_USHORTCONSTPTR_USHORT)
++OB_DEF_VAR (s390_vec_load_bndry_s32,    s390_vlbb,          O2_U16,              BT_OV_V4SI_INTCONSTPTR_USHORT)
++OB_DEF_VAR (s390_vec_load_bndry_u32,    s390_vlbb,          O2_U16,              BT_OV_UV4SI_UINTCONSTPTR_USHORT)
++OB_DEF_VAR (s390_vec_load_bndry_s64,    s390_vlbb,          O2_U16,              BT_OV_V2DI_LONGLONGCONSTPTR_USHORT)
++OB_DEF_VAR (s390_vec_load_bndry_u64,    s390_vlbb,          O2_U16,              BT_OV_UV2DI_ULONGLONGCONSTPTR_USHORT)
++OB_DEF_VAR (s390_vec_load_bndry_dbl,    s390_vlbb,          O2_U16,              BT_OV_V2DF_DBLCONSTPTR_USHORT)
++
++B_DEF      (s390_vlbb,                  vlbb,               0,                  B_VX,               O2_U3,              BT_FN_UV16QI_UCHARCONSTPTR_USHORT)
++
++OB_DEF     (s390_vec_load_pair,         s390_vec_load_pair_s64,s390_vec_load_pair_u64,B_VX,         BT_FN_OV2DI_LONGLONG_LONGLONG)
++OB_DEF_VAR (s390_vec_load_pair_s64,     MAX,                0,                  BT_OV_V2DI_LONGLONG_LONGLONG)            /* vlvgp */
++OB_DEF_VAR (s390_vec_load_pair_u64,     MAX,                0,                  BT_OV_UV2DI_ULONGLONG_ULONGLONG)         /* vlvgp */
++
++OB_DEF     (s390_vec_load_len,          s390_vec_load_len_s8,s390_vec_load_len_dbl,B_VX,            BT_FN_OV4SI_INTCONSTPTR_UINT)
++OB_DEF_VAR (s390_vec_load_len_s8,       s390_vll,           0,                  BT_OV_V16QI_SCHARCONSTPTR_UINT)
++OB_DEF_VAR (s390_vec_load_len_u8,       s390_vll,           0,                  BT_OV_UV16QI_UCHARCONSTPTR_UINT)
++OB_DEF_VAR (s390_vec_load_len_s16,      s390_vll,           0,                  BT_OV_V8HI_SHORTCONSTPTR_UINT)
++OB_DEF_VAR (s390_vec_load_len_u16,      s390_vll,           0,                  BT_OV_UV8HI_USHORTCONSTPTR_UINT)
++OB_DEF_VAR (s390_vec_load_len_s32,      s390_vll,           0,                  BT_OV_V4SI_INTCONSTPTR_UINT)
++OB_DEF_VAR (s390_vec_load_len_u32,      s390_vll,           0,                  BT_OV_UV4SI_UINTCONSTPTR_UINT)
++OB_DEF_VAR (s390_vec_load_len_s64,      s390_vll,           0,                  BT_OV_V2DI_LONGLONGCONSTPTR_UINT)
++OB_DEF_VAR (s390_vec_load_len_u64,      s390_vll,           0,                  BT_OV_UV2DI_ULONGLONGCONSTPTR_UINT)
++OB_DEF_VAR (s390_vec_load_len_dbl,      s390_vll,           0,                  BT_OV_V2DF_DBLCONSTPTR_UINT)
++
++B_DEF      (s390_vll,                   vllv16qi,           0,                  B_VX,               0,                  BT_FN_V16QI_UINT_VOIDCONSTPTR)
++
++OB_DEF     (s390_vec_mergeh,            s390_vec_mergeh_s8, s390_vec_mergeh_dbl,B_VX,               BT_FN_OV4SI_OV4SI_OV4SI)
++OB_DEF_VAR (s390_vec_mergeh_s8,         s390_vmrhb,         0,                  BT_OV_V16QI_V16QI_V16QI)
++OB_DEF_VAR (s390_vec_mergeh_u8,         s390_vmrhb,         0,                  BT_OV_UV16QI_UV16QI_UV16QI)
++OB_DEF_VAR (s390_vec_mergeh_b8,         s390_vmrhb,         0,                  BT_OV_BV16QI_BV16QI_BV16QI)
++OB_DEF_VAR (s390_vec_mergeh_s16,        s390_vmrhh,         0,                  BT_OV_V8HI_V8HI_V8HI)
++OB_DEF_VAR (s390_vec_mergeh_u16,        s390_vmrhh,         0,                  BT_OV_UV8HI_UV8HI_UV8HI)
++OB_DEF_VAR (s390_vec_mergeh_b16,        s390_vmrhh,         0,                  BT_OV_BV8HI_BV8HI_BV8HI)
++OB_DEF_VAR (s390_vec_mergeh_s32,        s390_vmrhf,         0,                  BT_OV_V4SI_V4SI_V4SI)
++OB_DEF_VAR (s390_vec_mergeh_u32,        s390_vmrhf,         0,                  BT_OV_UV4SI_UV4SI_UV4SI)
++OB_DEF_VAR (s390_vec_mergeh_b32,        s390_vmrhf,         0,                  BT_OV_BV4SI_BV4SI_BV4SI)
++OB_DEF_VAR (s390_vec_mergeh_s64,        s390_vmrhg,         0,                  BT_OV_V2DI_V2DI_V2DI)
++OB_DEF_VAR (s390_vec_mergeh_u64,        s390_vmrhg,         0,                  BT_OV_UV2DI_UV2DI_UV2DI)
++OB_DEF_VAR (s390_vec_mergeh_b64,        s390_vmrhg,         0,                  BT_OV_BV2DI_BV2DI_BV2DI)
++OB_DEF_VAR (s390_vec_mergeh_dbl,        s390_vmrhg,         0,                  BT_OV_V2DF_V2DF_V2DF)
++
++B_DEF      (s390_vmrhb,                 vec_mergehv16qi,    0,                  B_VX,               0,                  BT_FN_UV16QI_UV16QI_UV16QI)
++B_DEF      (s390_vmrhh,                 vec_mergehv8hi,     0,                  B_VX,               0,                  BT_FN_UV8HI_UV8HI_UV8HI)
++B_DEF      (s390_vmrhf,                 vec_mergehv4si,     0,                  B_VX,               0,                  BT_FN_UV4SI_UV4SI_UV4SI)
++B_DEF      (s390_vmrhg,                 vec_mergehv2di,     0,                  B_VX,               0,                  BT_FN_UV2DI_UV2DI_UV2DI)
++
++OB_DEF     (s390_vec_mergel,            s390_vec_mergel_s8, s390_vec_mergel_dbl,B_VX,               BT_FN_OV4SI_OV4SI_OV4SI)
++OB_DEF_VAR (s390_vec_mergel_s8,         s390_vmrlb,         0,                  BT_OV_V16QI_V16QI_V16QI)
++OB_DEF_VAR (s390_vec_mergel_u8,         s390_vmrlb,         0,                  BT_OV_UV16QI_UV16QI_UV16QI)
++OB_DEF_VAR (s390_vec_mergel_b8,         s390_vmrlb,         0,                  BT_OV_BV16QI_BV16QI_BV16QI)
++OB_DEF_VAR (s390_vec_mergel_s16,        s390_vmrlh,         0,                  BT_OV_V8HI_V8HI_V8HI)
++OB_DEF_VAR (s390_vec_mergel_u16,        s390_vmrlh,         0,                  BT_OV_UV8HI_UV8HI_UV8HI)
++OB_DEF_VAR (s390_vec_mergel_b16,        s390_vmrlh,         0,                  BT_OV_BV8HI_BV8HI_BV8HI)
++OB_DEF_VAR (s390_vec_mergel_s32,        s390_vmrlf,         0,                  BT_OV_V4SI_V4SI_V4SI)
++OB_DEF_VAR (s390_vec_mergel_u32,        s390_vmrlf,         0,                  BT_OV_UV4SI_UV4SI_UV4SI)
++OB_DEF_VAR (s390_vec_mergel_b32,        s390_vmrlf,         0,                  BT_OV_BV4SI_BV4SI_BV4SI)
++OB_DEF_VAR (s390_vec_mergel_s64,        s390_vmrlg,         0,                  BT_OV_V2DI_V2DI_V2DI)
++OB_DEF_VAR (s390_vec_mergel_u64,        s390_vmrlg,         0,                  BT_OV_UV2DI_UV2DI_UV2DI)
++OB_DEF_VAR (s390_vec_mergel_b64,        s390_vmrlg,         0,                  BT_OV_BV2DI_BV2DI_BV2DI)
++OB_DEF_VAR (s390_vec_mergel_dbl,        s390_vmrlg,         0,                  BT_OV_V2DF_V2DF_V2DF)
++
++B_DEF      (s390_vmrlb,                 vec_mergelv16qi,    0,                  B_VX,               0,                  BT_FN_UV16QI_UV16QI_UV16QI)
++B_DEF      (s390_vmrlh,                 vec_mergelv8hi,     0,                  B_VX,               0,                  BT_FN_UV8HI_UV8HI_UV8HI)
++B_DEF      (s390_vmrlf,                 vec_mergelv4si,     0,                  B_VX,               0,                  BT_FN_UV4SI_UV4SI_UV4SI)
++B_DEF      (s390_vmrlg,                 vec_mergelv2di,     0,                  B_VX,               0,                  BT_FN_UV2DI_UV2DI_UV2DI)
++
++OB_DEF     (s390_vec_pack,              s390_vec_pack_s16,  s390_vec_pack_b64,  B_VX,               BT_FN_OV4SI_OV4SI_OV4SI)
++OB_DEF_VAR (s390_vec_pack_s16,          s390_vpkh,          0,                  BT_OV_V16QI_V8HI_V8HI)
++OB_DEF_VAR (s390_vec_pack_u16,          s390_vpkh,          0,                  BT_OV_UV16QI_UV8HI_UV8HI)
++OB_DEF_VAR (s390_vec_pack_b16,          s390_vpkh,          0,                  BT_OV_BV16QI_BV8HI_BV8HI)
++OB_DEF_VAR (s390_vec_pack_s32,          s390_vpkf,          0,                  BT_OV_V8HI_V4SI_V4SI)
++OB_DEF_VAR (s390_vec_pack_u32,          s390_vpkf,          0,                  BT_OV_UV8HI_UV4SI_UV4SI)
++OB_DEF_VAR (s390_vec_pack_b32,          s390_vpkf,          0,                  BT_OV_BV8HI_BV4SI_BV4SI)
++OB_DEF_VAR (s390_vec_pack_s64,          s390_vpkg,          0,                  BT_OV_V4SI_V2DI_V2DI)
++OB_DEF_VAR (s390_vec_pack_u64,          s390_vpkg,          0,                  BT_OV_UV4SI_UV2DI_UV2DI)
++OB_DEF_VAR (s390_vec_pack_b64,          s390_vpkg,          0,                  BT_OV_BV4SI_BV2DI_BV2DI)
++
++B_DEF      (s390_vpkh,                  vec_packv8hi,       0,                  B_VX,               0,                  BT_FN_UV16QI_UV8HI_UV8HI)
++B_DEF      (s390_vpkf,                  vec_packv4si,       0,                  B_VX,               0,                  BT_FN_UV8HI_UV4SI_UV4SI)
++B_DEF      (s390_vpkg,                  vec_packv2di,       0,                  B_VX,               0,                  BT_FN_UV4SI_UV2DI_UV2DI)
++
++OB_DEF     (s390_vec_packs,             s390_vec_packs_s16, s390_vec_packs_u64, B_VX,               BT_FN_OV4SI_OV4SI_OV4SI)
++OB_DEF_VAR (s390_vec_packs_s16,         s390_vpksh,         0,                  BT_OV_V16QI_V8HI_V8HI)
++OB_DEF_VAR (s390_vec_packs_u16,         s390_vpklsh,        0,                  BT_OV_UV16QI_UV8HI_UV8HI)
++OB_DEF_VAR (s390_vec_packs_s32,         s390_vpksf,         0,                  BT_OV_V8HI_V4SI_V4SI)
++OB_DEF_VAR (s390_vec_packs_u32,         s390_vpklsf,        0,                  BT_OV_UV8HI_UV4SI_UV4SI)
++OB_DEF_VAR (s390_vec_packs_s64,         s390_vpksg,         0,                  BT_OV_V4SI_V2DI_V2DI)
++OB_DEF_VAR (s390_vec_packs_u64,         s390_vpklsg,        0,                  BT_OV_UV4SI_UV2DI_UV2DI)
++
++B_DEF      (s390_vpksh,                 vec_packsv8hi,      0,                  B_VX,               0,                  BT_FN_V16QI_V8HI_V8HI)
++B_DEF      (s390_vpklsh,                vec_packsuv8hi,     0,                  B_VX,               0,                  BT_FN_UV16QI_UV8HI_UV8HI)
++B_DEF      (s390_vpksf,                 vec_packsv4si,      0,                  B_VX,               0,                  BT_FN_V8HI_V4SI_V4SI)
++B_DEF      (s390_vpklsf,                vec_packsuv4si,     0,                  B_VX,               0,                  BT_FN_UV8HI_UV4SI_UV4SI)
++B_DEF      (s390_vpksg,                 vec_packsv2di,      0,                  B_VX,               0,                  BT_FN_V4SI_V2DI_V2DI)
++B_DEF      (s390_vpklsg,                vec_packsuv2di,     0,                  B_VX,               0,                  BT_FN_UV4SI_UV2DI_UV2DI)
++
++OB_DEF     (s390_vec_packs_cc,          s390_vec_packs_cc_s16,s390_vec_packs_cc_u64,B_VX,           BT_FN_OV4SI_OV4SI_OV4SI_INTPTR)
++OB_DEF_VAR (s390_vec_packs_cc_s16,      s390_vpkshs,        0,                  BT_OV_V16QI_V8HI_V8HI_INTPTR)
++OB_DEF_VAR (s390_vec_packs_cc_u16,      s390_vpklshs,       0,                  BT_OV_UV16QI_UV8HI_UV8HI_INTPTR)
++OB_DEF_VAR (s390_vec_packs_cc_s32,      s390_vpksfs,        0,                  BT_OV_V8HI_V4SI_V4SI_INTPTR)
++OB_DEF_VAR (s390_vec_packs_cc_u32,      s390_vpklsfs,       0,                  BT_OV_UV8HI_UV4SI_UV4SI_INTPTR)
++OB_DEF_VAR (s390_vec_packs_cc_s64,      s390_vpksgs,        0,                  BT_OV_V4SI_V2DI_V2DI_INTPTR)
++OB_DEF_VAR (s390_vec_packs_cc_u64,      s390_vpklsgs,       0,                  BT_OV_UV4SI_UV2DI_UV2DI_INTPTR)
++
++B_DEF      (s390_vpkshs,                vec_packs_ccv8hi,   0,                  B_VX,               0,                  BT_FN_V16QI_V8HI_V8HI_INTPTR)
++B_DEF      (s390_vpklshs,               vec_packsu_ccv8hi,  0,                  B_VX,               0,                  BT_FN_UV16QI_UV8HI_UV8HI_INTPTR)
++B_DEF      (s390_vpksfs,                vec_packs_ccv4si,   0,                  B_VX,               0,                  BT_FN_V8HI_V4SI_V4SI_INTPTR)
++B_DEF      (s390_vpklsfs,               vec_packsu_ccv4si,  0,                  B_VX,               0,                  BT_FN_UV8HI_UV4SI_UV4SI_INTPTR)
++B_DEF      (s390_vpksgs,                vec_packs_ccv2di,   0,                  B_VX,               0,                  BT_FN_V4SI_V2DI_V2DI_INTPTR)
++B_DEF      (s390_vpklsgs,               vec_packsu_ccv2di,  0,                  B_VX,               0,                  BT_FN_UV4SI_UV2DI_UV2DI_INTPTR)
++
++OB_DEF     (s390_vec_packsu,            s390_vec_packsu_s16,s390_vec_packsu_u64,B_VX,               BT_FN_OV4SI_OV4SI_OV4SI)
++OB_DEF_VAR (s390_vec_packsu_s16,        s390_vec_packsu_u16,0,                  BT_OV_UV16QI_V8HI_V8HI)                  /* vpklsh */
++OB_DEF_VAR (s390_vec_packsu_u16,        s390_vpklsh,        0,                  BT_OV_UV16QI_UV8HI_UV8HI)
++OB_DEF_VAR (s390_vec_packsu_s32,        s390_vec_packsu_u32,0,                  BT_OV_UV8HI_V4SI_V4SI)                   /* vpklsf */
++OB_DEF_VAR (s390_vec_packsu_u32,        s390_vpklsf,        0,                  BT_OV_UV8HI_UV4SI_UV4SI)
++OB_DEF_VAR (s390_vec_packsu_s64,        s390_vec_packsu_u64,0,                  BT_OV_UV4SI_V2DI_V2DI)                   /* vpklsg */
++OB_DEF_VAR (s390_vec_packsu_u64,        s390_vpklsg,        0,                  BT_OV_UV4SI_UV2DI_UV2DI)
++
++B_DEF      (s390_vec_packsu_u16,        vec_packsu_uv8hi,   0,                  B_VX | B_INT,       0,                  BT_FN_UV16QI_UV8HI_UV8HI)                /* vpklsh */
++B_DEF      (s390_vec_packsu_u32,        vec_packsu_uv4si,   0,                  B_VX | B_INT,       0,                  BT_FN_UV8HI_UV4SI_UV4SI)                 /* vpklsf */
++B_DEF      (s390_vec_packsu_u64,        vec_packsu_uv2di,   0,                  B_VX | B_INT,       0,                  BT_FN_UV4SI_UV2DI_UV2DI)                 /* vpklsg */
++
++OB_DEF     (s390_vec_packsu_cc,         s390_vec_packsu_cc_u16,s390_vec_packsu_cc_u64,B_VX,         BT_FN_OV4SI_OV4SI_OV4SI_INTPTR)
++OB_DEF_VAR (s390_vec_packsu_cc_u16,     s390_vpklshs,       0,                  BT_OV_UV16QI_UV8HI_UV8HI_INTPTR)
++OB_DEF_VAR (s390_vec_packsu_cc_u32,     s390_vpklsfs,       0,                  BT_OV_UV8HI_UV4SI_UV4SI_INTPTR)
++OB_DEF_VAR (s390_vec_packsu_cc_u64,     s390_vpklsgs,       0,                  BT_OV_UV4SI_UV2DI_UV2DI_INTPTR)
++
++OB_DEF     (s390_vec_perm,              s390_vec_perm_s8,   s390_vec_perm_dbl,  B_VX,               BT_FN_OV4SI_OV4SI_OV4SI_OV4SI)
++OB_DEF_VAR (s390_vec_perm_s8,           s390_vperm,         0,                  BT_OV_V16QI_V16QI_V16QI_UV16QI)
++OB_DEF_VAR (s390_vec_perm_b8,           s390_vperm,         0,                  BT_OV_BV16QI_BV16QI_BV16QI_UV16QI)
++OB_DEF_VAR (s390_vec_perm_u8,           s390_vperm,         0,                  BT_OV_UV16QI_UV16QI_UV16QI_UV16QI)
++OB_DEF_VAR (s390_vec_perm_s16,          s390_vperm,         0,                  BT_OV_V8HI_V8HI_V8HI_UV16QI)
++OB_DEF_VAR (s390_vec_perm_b16,          s390_vperm,         0,                  BT_OV_BV8HI_BV8HI_BV8HI_UV16QI)
++OB_DEF_VAR (s390_vec_perm_u16,          s390_vperm,         0,                  BT_OV_UV8HI_UV8HI_UV8HI_UV16QI)
++OB_DEF_VAR (s390_vec_perm_s32,          s390_vperm,         0,                  BT_OV_V4SI_V4SI_V4SI_UV16QI)
++OB_DEF_VAR (s390_vec_perm_b32,          s390_vperm,         0,                  BT_OV_BV4SI_BV4SI_BV4SI_UV16QI)
++OB_DEF_VAR (s390_vec_perm_u32,          s390_vperm,         0,                  BT_OV_UV4SI_UV4SI_UV4SI_UV16QI)
++OB_DEF_VAR (s390_vec_perm_s64,          s390_vperm,         0,                  BT_OV_V2DI_V2DI_V2DI_UV16QI)
++OB_DEF_VAR (s390_vec_perm_b64,          s390_vperm,         0,                  BT_OV_BV2DI_BV2DI_BV2DI_UV16QI)
++OB_DEF_VAR (s390_vec_perm_u64,          s390_vperm,         0,                  BT_OV_UV2DI_UV2DI_UV2DI_UV16QI)
++OB_DEF_VAR (s390_vec_perm_dbl,          s390_vperm,         0,                  BT_OV_V2DF_V2DF_V2DF_UV16QI)
++
++B_DEF      (s390_vperm,                 vec_permv16qi,      0,                  B_VX,               0,                  BT_FN_UV16QI_UV16QI_UV16QI_UV16QI)
++
++OB_DEF     (s390_vec_permi,             s390_vec_permi_s64, s390_vec_permi_dbl, B_VX,               BT_FN_OV4SI_OV4SI_OV4SI_INT)
++OB_DEF_VAR (s390_vec_permi_s64,         s390_vpdi,          O3_U2,              BT_OV_V2DI_V2DI_V2DI_INT)
++OB_DEF_VAR (s390_vec_permi_b64,         s390_vpdi,          O3_U2,              BT_OV_BV2DI_BV2DI_BV2DI_INT)
++OB_DEF_VAR (s390_vec_permi_u64,         s390_vpdi,          O3_U2,              BT_OV_UV2DI_UV2DI_UV2DI_INT)
++OB_DEF_VAR (s390_vec_permi_dbl,         s390_vpdi,          O3_U2,              BT_OV_V2DF_V2DF_V2DF_INT)
++
++B_DEF      (s390_vpdi,                  vec_permiv2di,      0,                  B_VX,               O3_U2,              BT_FN_UV2DI_UV2DI_UV2DI_INT)
++
++OB_DEF     (s390_vec_splat,             s390_vec_splat2_s8, s390_vec_splat2_dbl,B_VX,               BT_FN_OV4SI_OV4SI_UCHAR)
++OB_DEF_VAR (s390_vec_splat2_s8,         s390_vrepb,         O2_U4,              BT_OV_V16QI_V16QI_UCHAR)
++OB_DEF_VAR (s390_vec_splat2_b8,         s390_vrepb,         O2_U4,              BT_OV_BV16QI_BV16QI_UCHAR)
++OB_DEF_VAR (s390_vec_splat2_u8,         s390_vrepb,         O2_U4,              BT_OV_UV16QI_UV16QI_UCHAR)
++OB_DEF_VAR (s390_vec_splat2_s16,        s390_vreph,         O2_U3,              BT_OV_V8HI_V8HI_UCHAR)
++OB_DEF_VAR (s390_vec_splat2_b16,        s390_vreph,         O2_U3,              BT_OV_BV8HI_BV8HI_UCHAR)
++OB_DEF_VAR (s390_vec_splat2_u16,        s390_vreph,         O2_U3,              BT_OV_UV8HI_UV8HI_UCHAR)
++OB_DEF_VAR (s390_vec_splat2_s32,        s390_vrepf,         O2_U2,              BT_OV_V4SI_V4SI_UCHAR)
++OB_DEF_VAR (s390_vec_splat2_b32,        s390_vrepf,         O2_U2,              BT_OV_BV4SI_BV4SI_UCHAR)
++OB_DEF_VAR (s390_vec_splat2_u32,        s390_vrepf,         O2_U2,              BT_OV_UV4SI_UV4SI_UCHAR)
++OB_DEF_VAR (s390_vec_splat2_s64,        s390_vrepg,         O2_U1,              BT_OV_V2DI_V2DI_UCHAR)
++OB_DEF_VAR (s390_vec_splat2_b64,        s390_vrepg,         O2_U1,              BT_OV_BV2DI_BV2DI_UCHAR)
++OB_DEF_VAR (s390_vec_splat2_u64,        s390_vrepg,         O2_U1,              BT_OV_UV2DI_UV2DI_UCHAR)
++OB_DEF_VAR (s390_vec_splat2_dbl,        s390_vrepg,         O2_U1,              BT_OV_V2DF_V2DF_UCHAR)
++
++B_DEF      (s390_vrepb,                 vec_splatv16qi,     0,                  B_VX,               O2_U4,              BT_FN_UV16QI_UV16QI_UCHAR)
++B_DEF      (s390_vreph,                 vec_splatv8hi,      0,                  B_VX,               O2_U3,              BT_FN_UV8HI_UV8HI_UCHAR)
++B_DEF      (s390_vrepf,                 vec_splatv4si,      0,                  B_VX,               O2_U2,              BT_FN_UV4SI_UV4SI_UCHAR)
++B_DEF      (s390_vrepg,                 vec_splatv2di,      0,                  B_VX,               O2_U1,              BT_FN_UV2DI_UV2DI_UCHAR)
++
++OB_DEF     (s390_vec_scatter_element,   s390_vec_scatter_element_s32,s390_vec_scatter_element_dbl,B_VX,BT_FN_VOID_V4SI_V4SI_INTPTR_ULONGLONG)
++OB_DEF_VAR (s390_vec_scatter_element_s32,s390_vscef,        O4_U2,              BT_OV_VOID_V4SI_UV4SI_INTPTR_ULONGLONG)
++OB_DEF_VAR (s390_vec_scatter_element_b32,s390_vscef,        O4_U2,              BT_OV_VOID_BV4SI_UV4SI_UINTPTR_ULONGLONG)
++OB_DEF_VAR (s390_vec_scatter_element_u32,s390_vscef,        O4_U2,              BT_OV_VOID_UV4SI_UV4SI_UINTPTR_ULONGLONG)
++OB_DEF_VAR (s390_vec_scatter_element_s64,s390_vsceg,        O4_U1,              BT_OV_VOID_V2DI_UV2DI_LONGLONGPTR_ULONGLONG)
++OB_DEF_VAR (s390_vec_scatter_element_b64,s390_vsceg,        O4_U1,              BT_OV_VOID_BV2DI_UV2DI_ULONGLONGPTR_ULONGLONG)
++OB_DEF_VAR (s390_vec_scatter_element_u64,s390_vsceg,        O4_U1,              BT_OV_VOID_UV2DI_UV2DI_ULONGLONGPTR_ULONGLONG)
++OB_DEF_VAR (s390_vec_scatter_element_dbl,s390_vsceg,        O4_U1,              BT_OV_VOID_V2DF_UV2DI_DBLPTR_ULONGLONG)
++
++B_DEF      (s390_vscef,                 vec_scatter_elementv4si,0,              B_VX,               O4_U2,              BT_FN_VOID_UV4SI_UV4SI_UINTPTR_ULONGLONG)
++B_DEF      (s390_vsceg,                 vec_scatter_elementv2di,0,              B_VX,               O4_U1,              BT_FN_VOID_UV2DI_UV2DI_ULONGLONGPTR_ULONGLONG)
++
++OB_DEF     (s390_vec_sel,               s390_vec_sel_b8_a,  s390_vec_sel_dbl_b, B_VX,               BT_FN_OV4SI_OV4SI_OV4SI_OV4SI)
++OB_DEF_VAR (s390_vec_sel_b8_a,          s390_vsel,          0,                  BT_OV_BV16QI_BV16QI_BV16QI_UV16QI)
++OB_DEF_VAR (s390_vec_sel_b8_b,          s390_vsel,          0,                  BT_OV_BV16QI_BV16QI_BV16QI_BV16QI)
++OB_DEF_VAR (s390_vec_sel_s8_a,          s390_vsel,          0,                  BT_OV_V16QI_V16QI_V16QI_UV16QI)
++OB_DEF_VAR (s390_vec_sel_s8_b,          s390_vsel,          0,                  BT_OV_V16QI_V16QI_V16QI_BV16QI)
++OB_DEF_VAR (s390_vec_sel_u8_a,          s390_vsel,          0,                  BT_OV_UV16QI_UV16QI_UV16QI_UV16QI)
++OB_DEF_VAR (s390_vec_sel_u8_b,          s390_vsel,          0,                  BT_OV_UV16QI_UV16QI_UV16QI_BV16QI)
++OB_DEF_VAR (s390_vec_sel_b16_a,         s390_vsel,          0,                  BT_OV_BV8HI_BV8HI_BV8HI_UV8HI)
++OB_DEF_VAR (s390_vec_sel_b16_b,         s390_vsel,          0,                  BT_OV_BV8HI_BV8HI_BV8HI_BV8HI)
++OB_DEF_VAR (s390_vec_sel_s16_a,         s390_vsel,          0,                  BT_OV_V8HI_V8HI_V8HI_UV8HI)
++OB_DEF_VAR (s390_vec_sel_s16_b,         s390_vsel,          0,                  BT_OV_V8HI_V8HI_V8HI_BV8HI)
++OB_DEF_VAR (s390_vec_sel_u16_a,         s390_vsel,          0,                  BT_OV_UV8HI_UV8HI_UV8HI_UV8HI)
++OB_DEF_VAR (s390_vec_sel_u16_b,         s390_vsel,          0,                  BT_OV_UV8HI_UV8HI_UV8HI_BV8HI)
++OB_DEF_VAR (s390_vec_sel_b32_a,         s390_vsel,          0,                  BT_OV_BV4SI_BV4SI_BV4SI_UV4SI)
++OB_DEF_VAR (s390_vec_sel_b32_b,         s390_vsel,          0,                  BT_OV_BV4SI_BV4SI_BV4SI_BV4SI)
++OB_DEF_VAR (s390_vec_sel_s32_a,         s390_vsel,          0,                  BT_OV_V4SI_V4SI_V4SI_UV4SI)
++OB_DEF_VAR (s390_vec_sel_s32_b,         s390_vsel,          0,                  BT_OV_V4SI_V4SI_V4SI_BV4SI)
++OB_DEF_VAR (s390_vec_sel_u32_a,         s390_vsel,          0,                  BT_OV_UV4SI_UV4SI_UV4SI_UV4SI)
++OB_DEF_VAR (s390_vec_sel_u32_b,         s390_vsel,          0,                  BT_OV_UV4SI_UV4SI_UV4SI_BV4SI)
++OB_DEF_VAR (s390_vec_sel_b64_a,         s390_vsel,          0,                  BT_OV_BV2DI_BV2DI_BV2DI_UV2DI)
++OB_DEF_VAR (s390_vec_sel_b64_b,         s390_vsel,          0,                  BT_OV_BV2DI_BV2DI_BV2DI_BV2DI)
++OB_DEF_VAR (s390_vec_sel_s64_a,         s390_vsel,          0,                  BT_OV_V2DI_V2DI_V2DI_UV2DI)
++OB_DEF_VAR (s390_vec_sel_s64_b,         s390_vsel,          0,                  BT_OV_V2DI_V2DI_V2DI_BV2DI)
++OB_DEF_VAR (s390_vec_sel_u64_a,         s390_vsel,          0,                  BT_OV_UV2DI_UV2DI_UV2DI_UV2DI)
++OB_DEF_VAR (s390_vec_sel_u64_b,         s390_vsel,          0,                  BT_OV_UV2DI_UV2DI_UV2DI_BV2DI)
++OB_DEF_VAR (s390_vec_sel_dbl_a,         s390_vsel,          0,                  BT_OV_V2DF_V2DF_V2DF_UV2DI)
++OB_DEF_VAR (s390_vec_sel_dbl_b,         s390_vsel,          0,                  BT_OV_V2DF_V2DF_V2DF_BV2DI)
++
++B_DEF      (s390_vsel,                  vec_selv16qi,       0,                  B_VX,               0,                  BT_FN_UV16QI_UV16QI_UV16QI_UV16QI)
++
++OB_DEF     (s390_vec_extend_s64,        s390_vec_extend_s64_s8,s390_vec_extend_s64_s32,B_VX,        BT_FN_OV4SI_OV4SI)
++OB_DEF_VAR (s390_vec_extend_s64_s8,     s390_vsegb,         0,                  BT_OV_V2DI_V16QI)
++OB_DEF_VAR (s390_vec_extend_s64_s16,    s390_vsegh,         0,                  BT_OV_V2DI_V8HI)
++OB_DEF_VAR (s390_vec_extend_s64_s32,    s390_vsegf,         0,                  BT_OV_V2DI_V4SI)
++
++B_DEF      (s390_vsegb,                 vec_extendv16qi,    0,                  B_VX,               0,                  BT_FN_V2DI_V16QI)
++B_DEF      (s390_vsegh,                 vec_extendv8hi,     0,                  B_VX,               0,                  BT_FN_V2DI_V8HI)
++B_DEF      (s390_vsegf,                 vec_extendv4si,     0,                  B_VX,               0,                  BT_FN_V2DI_V4SI)
++
++OB_DEF     (s390_vec_xstd2,             s390_vec_xstd2_s8,  s390_vec_xstd2_dbl, B_VX,               BT_FN_VOID_OV4SI_INT_VOIDPTR)
++OB_DEF_VAR (s390_vec_xstd2_s8,          MAX,                O2_LIT,             BT_OV_VOID_V16QI_LONG_SCHARPTR)          /* vst */
++OB_DEF_VAR (s390_vec_xstd2_u8,          MAX,                O2_LIT,             BT_OV_VOID_UV16QI_LONG_UCHARPTR)         /* vst */
++OB_DEF_VAR (s390_vec_xstd2_s16,         MAX,                O2_LIT,             BT_OV_VOID_V8HI_LONG_SHORTPTR)           /* vst */
++OB_DEF_VAR (s390_vec_xstd2_u16,         MAX,                O2_LIT,             BT_OV_VOID_UV8HI_LONG_USHORTPTR)         /* vst */
++OB_DEF_VAR (s390_vec_xstd2_s32,         MAX,                O2_LIT,             BT_OV_VOID_V4SI_LONG_INTPTR)             /* vst */
++OB_DEF_VAR (s390_vec_xstd2_u32,         MAX,                O2_LIT,             BT_OV_VOID_UV4SI_LONG_UINTPTR)           /* vst */
++OB_DEF_VAR (s390_vec_xstd2_s64,         MAX,                O2_LIT,             BT_OV_VOID_V2DI_LONG_LONGLONGPTR)        /* vst */
++OB_DEF_VAR (s390_vec_xstd2_u64,         MAX,                O2_LIT,             BT_OV_VOID_UV2DI_LONG_ULONGLONGPTR)      /* vst */
++OB_DEF_VAR (s390_vec_xstd2_dbl,         MAX,                O2_LIT,             BT_OV_VOID_V2DF_LONG_DBLPTR)             /* vst */
++
++OB_DEF     (s390_vec_xstw4,             s390_vec_xstw4_s8,  s390_vec_xstw4_u32, B_VX,               BT_FN_VOID_OV4SI_INT_VOIDPTR)
++OB_DEF_VAR (s390_vec_xstw4_s8,          MAX,                O2_LIT,             BT_OV_VOID_V16QI_LONG_SCHARPTR)          /* vst */
++OB_DEF_VAR (s390_vec_xstw4_u8,          MAX,                O2_LIT,             BT_OV_VOID_UV16QI_LONG_UCHARPTR)         /* vst */
++OB_DEF_VAR (s390_vec_xstw4_s16,         MAX,                O2_LIT,             BT_OV_VOID_V8HI_LONG_SHORTPTR)           /* vst */
++OB_DEF_VAR (s390_vec_xstw4_u16,         MAX,                O2_LIT,             BT_OV_VOID_UV8HI_LONG_USHORTPTR)         /* vst */
++OB_DEF_VAR (s390_vec_xstw4_s32,         MAX,                O2_LIT,             BT_OV_VOID_V4SI_LONG_INTPTR)             /* vst */
++OB_DEF_VAR (s390_vec_xstw4_u32,         MAX,                O2_LIT,             BT_OV_VOID_UV4SI_LONG_UINTPTR)           /* vst */
++
++OB_DEF     (s390_vec_store_len,         s390_vec_store_len_s8,s390_vec_store_len_dbl,B_VX,          BT_FN_VOID_OV4SI_VOIDPTR_UINT)
++OB_DEF_VAR (s390_vec_store_len_s8,      s390_vstl,          0,                  BT_OV_VOID_V16QI_SCHARPTR_UINT)
++OB_DEF_VAR (s390_vec_store_len_u8,      s390_vstl,          0,                  BT_OV_VOID_UV16QI_UCHARPTR_UINT)
++OB_DEF_VAR (s390_vec_store_len_s16,     s390_vstl,          0,                  BT_OV_VOID_V8HI_SHORTPTR_UINT)
++OB_DEF_VAR (s390_vec_store_len_u16,     s390_vstl,          0,                  BT_OV_VOID_UV8HI_USHORTPTR_UINT)
++OB_DEF_VAR (s390_vec_store_len_s32,     s390_vstl,          0,                  BT_OV_VOID_V4SI_INTPTR_UINT)
++OB_DEF_VAR (s390_vec_store_len_u32,     s390_vstl,          0,                  BT_OV_VOID_UV4SI_UINTPTR_UINT)
++OB_DEF_VAR (s390_vec_store_len_s64,     s390_vstl,          0,                  BT_OV_VOID_V2DI_LONGLONGPTR_UINT)
++OB_DEF_VAR (s390_vec_store_len_u64,     s390_vstl,          0,                  BT_OV_VOID_UV2DI_ULONGLONGPTR_UINT)
++OB_DEF_VAR (s390_vec_store_len_dbl,     s390_vstl,          0,                  BT_OV_VOID_V2DF_DBLPTR_UINT)
++
++B_DEF      (s390_vstl,                  vstlv16qi,          0,                  B_VX,               0,                  BT_FN_VOID_V16QI_UINT_VOIDPTR)
++
++OB_DEF     (s390_vec_unpackh,           s390_vec_unpackh_s8,s390_vec_unpackh_u32,B_VX,              BT_FN_OV4SI_OV4SI)
++OB_DEF_VAR (s390_vec_unpackh_s8,        s390_vuphb,         0,                  BT_OV_V8HI_V16QI)
++OB_DEF_VAR (s390_vec_unpackh_b8,        s390_vuphb,         0,                  BT_OV_BV8HI_BV16QI)
++OB_DEF_VAR (s390_vec_unpackh_u8,        s390_vuplhb,        0,                  BT_OV_UV8HI_UV16QI)
++OB_DEF_VAR (s390_vec_unpackh_s16,       s390_vuphh,         0,                  BT_OV_V4SI_V8HI)
++OB_DEF_VAR (s390_vec_unpackh_b16,       s390_vuphh,         0,                  BT_OV_BV4SI_BV8HI)
++OB_DEF_VAR (s390_vec_unpackh_u16,       s390_vuplhh,        0,                  BT_OV_UV4SI_UV8HI)
++OB_DEF_VAR (s390_vec_unpackh_s32,       s390_vuphf,         0,                  BT_OV_V2DI_V4SI)
++OB_DEF_VAR (s390_vec_unpackh_b32,       s390_vuphf,         0,                  BT_OV_BV2DI_BV4SI)
++OB_DEF_VAR (s390_vec_unpackh_u32,       s390_vuplhf,        0,                  BT_OV_UV2DI_UV4SI)
++
++B_DEF      (s390_vuphb,                 vec_unpackhv16qi,   0,                  B_VX,               0,                  BT_FN_V8HI_V16QI)
++B_DEF      (s390_vuplhb,                vec_unpackh_lv16qi, 0,                  B_VX,               0,                  BT_FN_UV8HI_UV16QI)
++B_DEF      (s390_vuphh,                 vec_unpackhv8hi,    0,                  B_VX,               0,                  BT_FN_V4SI_V8HI)
++B_DEF      (s390_vuplhh,                vec_unpackh_lv8hi,  0,                  B_VX,               0,                  BT_FN_UV4SI_UV8HI)
++B_DEF      (s390_vuphf,                 vec_unpackhv4si,    0,                  B_VX,               0,                  BT_FN_V2DI_V4SI)
++B_DEF      (s390_vuplhf,                vec_unpackh_lv4si,  0,                  B_VX,               0,                  BT_FN_UV2DI_UV4SI)
++
++OB_DEF     (s390_vec_unpackl,           s390_vec_unpackl_s8,s390_vec_unpackl_u32,B_VX,              BT_FN_OV4SI_OV4SI)
++OB_DEF_VAR (s390_vec_unpackl_s8,        s390_vuplb,         0,                  BT_OV_V8HI_V16QI)
++OB_DEF_VAR (s390_vec_unpackl_b8,        s390_vuplb,         0,                  BT_OV_BV8HI_BV16QI)
++OB_DEF_VAR (s390_vec_unpackl_u8,        s390_vupllb,        0,                  BT_OV_UV8HI_UV16QI)
++OB_DEF_VAR (s390_vec_unpackl_s16,       s390_vuplhw,        0,                  BT_OV_V4SI_V8HI)
++OB_DEF_VAR (s390_vec_unpackl_b16,       s390_vupllh,        0,                  BT_OV_BV4SI_BV8HI)
++OB_DEF_VAR (s390_vec_unpackl_u16,       s390_vupllh,        0,                  BT_OV_UV4SI_UV8HI)
++OB_DEF_VAR (s390_vec_unpackl_s32,       s390_vuplf,         0,                  BT_OV_V2DI_V4SI)
++OB_DEF_VAR (s390_vec_unpackl_b32,       s390_vuplf,         0,                  BT_OV_BV2DI_BV4SI)
++OB_DEF_VAR (s390_vec_unpackl_u32,       s390_vupllf,        0,                  BT_OV_UV2DI_UV4SI)
++
++B_DEF      (s390_vuplb,                 vec_unpacklv16qi,   0,                  B_VX,               0,                  BT_FN_V8HI_V16QI)
++B_DEF      (s390_vupllb,                vec_unpackl_lv16qi, 0,                  B_VX,               0,                  BT_FN_UV8HI_UV16QI)
++B_DEF      (s390_vuplhw,                vec_unpacklv8hi,    0,                  B_VX,               0,                  BT_FN_V4SI_V8HI)
++B_DEF      (s390_vupllh,                vec_unpackl_lv8hi,  0,                  B_VX,               0,                  BT_FN_UV4SI_UV8HI)
++B_DEF      (s390_vuplf,                 vec_unpacklv4si,    0,                  B_VX,               0,                  BT_FN_V2DI_V4SI)
++B_DEF      (s390_vupllf,                vec_unpackl_lv4si,  0,                  B_VX,               0,                  BT_FN_UV2DI_UV4SI)
++B_DEF      (s390_vaq,                   vec_add_u128,       0,                  B_VX,               0,                  BT_FN_UV16QI_UV16QI_UV16QI)
++
++OB_DEF     (s390_vec_addc,              s390_vec_addc_u8,   s390_vec_addc_u64,  B_VX,               BT_FN_OV4SI_OV4SI_OV4SI)
++OB_DEF_VAR (s390_vec_addc_u8,           s390_vaccb,         0,                  BT_OV_UV16QI_UV16QI_UV16QI)
++OB_DEF_VAR (s390_vec_addc_u16,          s390_vacch,         0,                  BT_OV_UV8HI_UV8HI_UV8HI)
++OB_DEF_VAR (s390_vec_addc_u32,          s390_vaccf,         0,                  BT_OV_UV4SI_UV4SI_UV4SI)
++OB_DEF_VAR (s390_vec_addc_u64,          s390_vaccg,         0,                  BT_OV_UV2DI_UV2DI_UV2DI)
++
++B_DEF      (s390_vaccb,                 vec_addcv16qi,      0,                  B_VX,               0,                  BT_FN_UV16QI_UV16QI_UV16QI)
++B_DEF      (s390_vacch,                 vec_addcv8hi,       0,                  B_VX,               0,                  BT_FN_UV8HI_UV8HI_UV8HI)
++B_DEF      (s390_vaccf,                 vec_addcv4si,       0,                  B_VX,               0,                  BT_FN_UV4SI_UV4SI_UV4SI)
++B_DEF      (s390_vaccg,                 vec_addcv2di,       0,                  B_VX,               0,                  BT_FN_UV2DI_UV2DI_UV2DI)
++B_DEF      (s390_vaccq,                 vec_addc_u128,      0,                  B_VX,               0,                  BT_FN_UV16QI_UV16QI_UV16QI)
++B_DEF      (s390_vacq,                  vec_adde_u128,      0,                  B_VX,               0,                  BT_FN_UV16QI_UV16QI_UV16QI_UV16QI)
++B_DEF      (s390_vacccq,                vec_addec_u128,     0,                  B_VX,               0,                  BT_FN_UV16QI_UV16QI_UV16QI_UV16QI)
++
++OB_DEF     (s390_vec_and,               s390_vec_and_b8,    s390_vec_and_dbl_c, B_VX,               BT_FN_OV4SI_OV4SI_OV4SI)
++OB_DEF_VAR (s390_vec_and_b8,            s390_vn,            0,                  BT_OV_BV16QI_BV16QI_BV16QI)
++OB_DEF_VAR (s390_vec_and_s8_a,          s390_vn,            0,                  BT_OV_V16QI_BV16QI_V16QI)
++OB_DEF_VAR (s390_vec_and_s8_b,          s390_vn,            0,                  BT_OV_V16QI_V16QI_V16QI)
++OB_DEF_VAR (s390_vec_and_s8_c,          s390_vn,            0,                  BT_OV_V16QI_V16QI_BV16QI)
++OB_DEF_VAR (s390_vec_and_u8_a,          s390_vn,            0,                  BT_OV_UV16QI_BV16QI_UV16QI)
++OB_DEF_VAR (s390_vec_and_u8_b,          s390_vn,            0,                  BT_OV_UV16QI_UV16QI_UV16QI)
++OB_DEF_VAR (s390_vec_and_u8_c,          s390_vn,            0,                  BT_OV_UV16QI_UV16QI_BV16QI)
++OB_DEF_VAR (s390_vec_and_b16,           s390_vn,            0,                  BT_OV_BV8HI_BV8HI_BV8HI)
++OB_DEF_VAR (s390_vec_and_s16_a,         s390_vn,            0,                  BT_OV_V8HI_BV8HI_V8HI)
++OB_DEF_VAR (s390_vec_and_s16_b,         s390_vn,            0,                  BT_OV_V8HI_V8HI_V8HI)
++OB_DEF_VAR (s390_vec_and_s16_c,         s390_vn,            0,                  BT_OV_V8HI_V8HI_BV8HI)
++OB_DEF_VAR (s390_vec_and_u16_a,         s390_vn,            0,                  BT_OV_UV8HI_BV8HI_UV8HI)
++OB_DEF_VAR (s390_vec_and_u16_b,         s390_vn,            0,                  BT_OV_UV8HI_UV8HI_UV8HI)
++OB_DEF_VAR (s390_vec_and_u16_c,         s390_vn,            0,                  BT_OV_UV8HI_UV8HI_BV8HI)
++OB_DEF_VAR (s390_vec_and_b32,           s390_vn,            0,                  BT_OV_BV4SI_BV4SI_BV4SI)
++OB_DEF_VAR (s390_vec_and_s32_a,         s390_vn,            0,                  BT_OV_V4SI_BV4SI_V4SI)
++OB_DEF_VAR (s390_vec_and_s32_b,         s390_vn,            0,                  BT_OV_V4SI_V4SI_V4SI)
++OB_DEF_VAR (s390_vec_and_s32_c,         s390_vn,            0,                  BT_OV_V4SI_V4SI_BV4SI)
++OB_DEF_VAR (s390_vec_and_u32_a,         s390_vn,            0,                  BT_OV_UV4SI_BV4SI_UV4SI)
++OB_DEF_VAR (s390_vec_and_u32_b,         s390_vn,            0,                  BT_OV_UV4SI_UV4SI_UV4SI)
++OB_DEF_VAR (s390_vec_and_u32_c,         s390_vn,            0,                  BT_OV_UV4SI_UV4SI_BV4SI)
++OB_DEF_VAR (s390_vec_and_b64,           s390_vn,            0,                  BT_OV_BV2DI_BV2DI_BV2DI)
++OB_DEF_VAR (s390_vec_and_s64_a,         s390_vn,            0,                  BT_OV_V2DI_BV2DI_V2DI)
++OB_DEF_VAR (s390_vec_and_s64_b,         s390_vn,            0,                  BT_OV_V2DI_V2DI_V2DI)
++OB_DEF_VAR (s390_vec_and_s64_c,         s390_vn,            0,                  BT_OV_V2DI_V2DI_BV2DI)
++OB_DEF_VAR (s390_vec_and_u64_a,         s390_vn,            0,                  BT_OV_UV2DI_BV2DI_UV2DI)
++OB_DEF_VAR (s390_vec_and_u64_b,         s390_vn,            0,                  BT_OV_UV2DI_UV2DI_UV2DI)
++OB_DEF_VAR (s390_vec_and_u64_c,         s390_vn,            0,                  BT_OV_UV2DI_UV2DI_BV2DI)
++OB_DEF_VAR (s390_vec_and_dbl_a,         s390_vn,            0,                  BT_OV_V2DF_BV2DI_V2DF)
++OB_DEF_VAR (s390_vec_and_dbl_b,         s390_vn,            0,                  BT_OV_V2DF_V2DF_V2DF)
++OB_DEF_VAR (s390_vec_and_dbl_c,         s390_vn,            0,                  BT_OV_V2DF_V2DF_BV2DI)
++
++B_DEF      (s390_vn,                    andv16qi3,          0,                  B_VX,               0,                  BT_FN_UV16QI_UV16QI_UV16QI)
++
++OB_DEF     (s390_vec_andc,              s390_vec_andc_b8,   s390_vec_andc_dbl_c,B_VX,               BT_FN_OV4SI_OV4SI_OV4SI)
++OB_DEF_VAR (s390_vec_andc_b8,           s390_vnc,           0,                  BT_OV_BV16QI_BV16QI_BV16QI)
++OB_DEF_VAR (s390_vec_andc_s8_a,         s390_vnc,           0,                  BT_OV_V16QI_BV16QI_V16QI)
++OB_DEF_VAR (s390_vec_andc_s8_b,         s390_vnc,           0,                  BT_OV_V16QI_V16QI_V16QI)
++OB_DEF_VAR (s390_vec_andc_s8_c,         s390_vnc,           0,                  BT_OV_V16QI_V16QI_BV16QI)
++OB_DEF_VAR (s390_vec_andc_u8_a,         s390_vnc,           0,                  BT_OV_UV16QI_BV16QI_UV16QI)
++OB_DEF_VAR (s390_vec_andc_u8_b,         s390_vnc,           0,                  BT_OV_UV16QI_UV16QI_UV16QI)
++OB_DEF_VAR (s390_vec_andc_u8_c,         s390_vnc,           0,                  BT_OV_UV16QI_UV16QI_BV16QI)
++OB_DEF_VAR (s390_vec_andc_b16,          s390_vnc,           0,                  BT_OV_BV8HI_BV8HI_BV8HI)
++OB_DEF_VAR (s390_vec_andc_s16_a,        s390_vnc,           0,                  BT_OV_V8HI_BV8HI_V8HI)
++OB_DEF_VAR (s390_vec_andc_s16_b,        s390_vnc,           0,                  BT_OV_V8HI_V8HI_V8HI)
++OB_DEF_VAR (s390_vec_andc_s16_c,        s390_vnc,           0,                  BT_OV_V8HI_V8HI_BV8HI)
++OB_DEF_VAR (s390_vec_andc_u16_a,        s390_vnc,           0,                  BT_OV_UV8HI_BV8HI_UV8HI)
++OB_DEF_VAR (s390_vec_andc_u16_b,        s390_vnc,           0,                  BT_OV_UV8HI_UV8HI_UV8HI)
++OB_DEF_VAR (s390_vec_andc_u16_c,        s390_vnc,           0,                  BT_OV_UV8HI_UV8HI_BV8HI)
++OB_DEF_VAR (s390_vec_andc_b32,          s390_vnc,           0,                  BT_OV_BV4SI_BV4SI_BV4SI)
++OB_DEF_VAR (s390_vec_andc_s32_a,        s390_vnc,           0,                  BT_OV_V4SI_BV4SI_V4SI)
++OB_DEF_VAR (s390_vec_andc_s32_b,        s390_vnc,           0,                  BT_OV_V4SI_V4SI_V4SI)
++OB_DEF_VAR (s390_vec_andc_s32_c,        s390_vnc,           0,                  BT_OV_V4SI_V4SI_BV4SI)
++OB_DEF_VAR (s390_vec_andc_u32_a,        s390_vnc,           0,                  BT_OV_UV4SI_BV4SI_UV4SI)
++OB_DEF_VAR (s390_vec_andc_u32_b,        s390_vnc,           0,                  BT_OV_UV4SI_UV4SI_UV4SI)
++OB_DEF_VAR (s390_vec_andc_u32_c,        s390_vnc,           0,                  BT_OV_UV4SI_UV4SI_BV4SI)
++OB_DEF_VAR (s390_vec_andc_b64,          s390_vnc,           0,                  BT_OV_BV2DI_BV2DI_BV2DI)
++OB_DEF_VAR (s390_vec_andc_s64_a,        s390_vnc,           0,                  BT_OV_V2DI_BV2DI_V2DI)
++OB_DEF_VAR (s390_vec_andc_s64_b,        s390_vnc,           0,                  BT_OV_V2DI_V2DI_V2DI)
++OB_DEF_VAR (s390_vec_andc_s64_c,        s390_vnc,           0,                  BT_OV_V2DI_V2DI_BV2DI)
++OB_DEF_VAR (s390_vec_andc_u64_a,        s390_vnc,           0,                  BT_OV_UV2DI_BV2DI_UV2DI)
++OB_DEF_VAR (s390_vec_andc_u64_b,        s390_vnc,           0,                  BT_OV_UV2DI_UV2DI_UV2DI)
++OB_DEF_VAR (s390_vec_andc_u64_c,        s390_vnc,           0,                  BT_OV_UV2DI_UV2DI_BV2DI)
++OB_DEF_VAR (s390_vec_andc_dbl_a,        s390_vnc,           0,                  BT_OV_V2DF_BV2DI_V2DF)
++OB_DEF_VAR (s390_vec_andc_dbl_b,        s390_vnc,           0,                  BT_OV_V2DF_V2DF_V2DF)
++OB_DEF_VAR (s390_vec_andc_dbl_c,        s390_vnc,           0,                  BT_OV_V2DF_V2DF_BV2DI)
++
++B_DEF      (s390_vnc,                   vec_andcv16qi3,     0,                  B_VX,               0,                  BT_FN_UV16QI_UV16QI_UV16QI)
++
++OB_DEF     (s390_vec_avg,               s390_vec_avg_s8,    s390_vec_avg_u64,   B_VX,               BT_FN_OV4SI_OV4SI_OV4SI)
++OB_DEF_VAR (s390_vec_avg_s8,            s390_vavgb,         0,                  BT_OV_V16QI_V16QI_V16QI)
++OB_DEF_VAR (s390_vec_avg_u8,            s390_vavglb,        0,                  BT_OV_UV16QI_UV16QI_UV16QI)
++OB_DEF_VAR (s390_vec_avg_s16,           s390_vavgh,         0,                  BT_OV_V8HI_V8HI_V8HI)
++OB_DEF_VAR (s390_vec_avg_u16,           s390_vavglh,        0,                  BT_OV_UV8HI_UV8HI_UV8HI)
++OB_DEF_VAR (s390_vec_avg_s32,           s390_vavgf,         0,                  BT_OV_V4SI_V4SI_V4SI)
++OB_DEF_VAR (s390_vec_avg_u32,           s390_vavglf,        0,                  BT_OV_UV4SI_UV4SI_UV4SI)
++OB_DEF_VAR (s390_vec_avg_s64,           s390_vavgg,         0,                  BT_OV_V2DI_V2DI_V2DI)
++OB_DEF_VAR (s390_vec_avg_u64,           s390_vavglg,        0,                  BT_OV_UV2DI_UV2DI_UV2DI)
++
++B_DEF      (s390_vavgb,                 vec_avgv16qi,       0,                  B_VX,               0,                  BT_FN_V16QI_V16QI_V16QI)
++B_DEF      (s390_vavglb,                vec_avguv16qi,      0,                  B_VX,               0,                  BT_FN_UV16QI_UV16QI_UV16QI)
++B_DEF      (s390_vavgh,                 vec_avgv8hi,        0,                  B_VX,               0,                  BT_FN_V8HI_V8HI_V8HI)
++B_DEF      (s390_vavglh,                vec_avguv8hi,       0,                  B_VX,               0,                  BT_FN_UV8HI_UV8HI_UV8HI)
++B_DEF      (s390_vavgf,                 vec_avgv4si,        0,                  B_VX,               0,                  BT_FN_V4SI_V4SI_V4SI)
++B_DEF      (s390_vavglf,                vec_avguv4si,       0,                  B_VX,               0,                  BT_FN_UV4SI_UV4SI_UV4SI)
++B_DEF      (s390_vavgg,                 vec_avgv2di,        0,                  B_VX,               0,                  BT_FN_V2DI_V2DI_V2DI)
++B_DEF      (s390_vavglg,                vec_avguv2di,       0,                  B_VX,               0,                  BT_FN_UV2DI_UV2DI_UV2DI)
++B_DEF      (s390_vcksm,                 vec_checksum,       0,                  B_VX,               0,                  BT_FN_UV4SI_UV4SI_UV4SI)
++B_DEF      (s390_vceqbs,                vec_cmpeqv16qi_cc,  0,                  B_VX,               0,                  BT_FN_V16QI_UV16QI_UV16QI_INTPTR)
++B_DEF      (s390_vceqhs,                vec_cmpeqv8hi_cc,   0,                  B_VX,               0,                  BT_FN_V8HI_UV8HI_UV8HI_INTPTR)
++B_DEF      (s390_vceqfs,                vec_cmpeqv4si_cc,   0,                  B_VX,               0,                  BT_FN_V4SI_UV4SI_UV4SI_INTPTR)
++B_DEF      (s390_vceqgs,                vec_cmpeqv2di_cc,   0,                  B_VX,               0,                  BT_FN_V2DI_UV2DI_UV2DI_INTPTR)
++B_DEF      (s390_vfcedbs,               vec_cmpeqv2df_cc,   0,                  B_VX,               0,                  BT_FN_V2DI_V2DF_V2DF_INTPTR)
++B_DEF      (s390_vchbs,                 vec_cmphv16qi_cc,   0,                  B_VX,               0,                  BT_FN_V16QI_V16QI_V16QI_INTPTR)
++B_DEF      (s390_vchlbs,                vec_cmphlv16qi_cc,  0,                  B_VX,               0,                  BT_FN_V16QI_UV16QI_UV16QI_INTPTR)
++B_DEF      (s390_vchhs,                 vec_cmphv8hi_cc,    0,                  B_VX,               0,                  BT_FN_V8HI_V8HI_V8HI_INTPTR)
++B_DEF      (s390_vchlhs,                vec_cmphlv8hi_cc,   0,                  B_VX,               0,                  BT_FN_V8HI_UV8HI_UV8HI_INTPTR)
++B_DEF      (s390_vchfs,                 vec_cmphv4si_cc,    0,                  B_VX,               0,                  BT_FN_V4SI_V4SI_V4SI_INTPTR)
++B_DEF      (s390_vchlfs,                vec_cmphlv4si_cc,   0,                  B_VX,               0,                  BT_FN_V4SI_UV4SI_UV4SI_INTPTR)
++B_DEF      (s390_vchgs,                 vec_cmphv2di_cc,    0,                  B_VX,               0,                  BT_FN_V2DI_V2DI_V2DI_INTPTR)
++B_DEF      (s390_vchlgs,                vec_cmphlv2di_cc,   0,                  B_VX,               0,                  BT_FN_V2DI_UV2DI_UV2DI_INTPTR)
++B_DEF      (s390_vfchdbs,               vec_cmphv2df_cc,    0,                  B_VX,               0,                  BT_FN_V2DI_V2DF_V2DF_INTPTR)
++B_DEF      (s390_vfchedbs,              vec_cmphev2df_cc,   0,                  B_VX,               0,                  BT_FN_V2DI_V2DF_V2DF_INTPTR)
++B_DEF      (vec_all_eqv16qi,            vec_all_eqv16qi,    0,                  B_VX | B_INT,       0,                  BT_FN_INT_UV16QI_UV16QI)
++B_DEF      (vec_all_eqv8hi,             vec_all_eqv8hi,     0,                  B_VX | B_INT,       0,                  BT_FN_INT_UV8HI_UV8HI)
++B_DEF      (vec_all_eqv4si,             vec_all_eqv4si,     0,                  B_VX | B_INT,       0,                  BT_FN_INT_UV4SI_UV4SI)
++B_DEF      (vec_all_eqv2di,             vec_all_eqv2di,     0,                  B_VX | B_INT,       0,                  BT_FN_INT_UV2DI_UV2DI)
++B_DEF      (vec_all_eqv2df,             vec_all_eqv2df,     0,                  B_VX | B_INT,       0,                  BT_FN_INT_V2DF_V2DF)
++B_DEF      (vec_all_nev16qi,            vec_all_nev16qi,    0,                  B_VX | B_INT,       0,                  BT_FN_INT_UV16QI_UV16QI)
++B_DEF      (vec_all_nev8hi,             vec_all_nev8hi,     0,                  B_VX | B_INT,       0,                  BT_FN_INT_UV8HI_UV8HI)
++B_DEF      (vec_all_nev4si,             vec_all_nev4si,     0,                  B_VX | B_INT,       0,                  BT_FN_INT_UV4SI_UV4SI)
++B_DEF      (vec_all_nev2di,             vec_all_nev2di,     0,                  B_VX | B_INT,       0,                  BT_FN_INT_UV2DI_UV2DI)
++B_DEF      (vec_all_nev2df,             vec_all_nev2df,     0,                  B_VX | B_INT,       0,                  BT_FN_INT_V2DF_V2DF)
++B_DEF      (vec_all_gev16qi,            vec_all_gev16qi,    0,                  B_VX | B_INT,       0,                  BT_FN_INT_V16QI_V16QI)
++B_DEF      (vec_all_geuv16qi,           vec_all_geuv16qi,   0,                  B_VX | B_INT,       0,                  BT_FN_INT_UV16QI_UV16QI)
++B_DEF      (vec_all_gev8hi,             vec_all_gev8hi,     0,                  B_VX | B_INT,       0,                  BT_FN_INT_V8HI_V8HI)
++B_DEF      (vec_all_geuv8hi,            vec_all_geuv8hi,    0,                  B_VX | B_INT,       0,                  BT_FN_INT_UV8HI_UV8HI)
++B_DEF      (vec_all_gev4si,             vec_all_gev4si,     0,                  B_VX | B_INT,       0,                  BT_FN_INT_V4SI_V4SI)
++B_DEF      (vec_all_geuv4si,            vec_all_geuv4si,    0,                  B_VX | B_INT,       0,                  BT_FN_INT_UV4SI_UV4SI)
++B_DEF      (vec_all_gev2di,             vec_all_gev2di,     0,                  B_VX | B_INT,       0,                  BT_FN_INT_V2DI_V2DI)
++B_DEF      (vec_all_geuv2di,            vec_all_geuv2di,    0,                  B_VX | B_INT,       0,                  BT_FN_INT_UV2DI_UV2DI)
++B_DEF      (vec_all_gev2df,             vec_all_gev2df,     0,                  B_VX | B_INT,       0,                  BT_FN_INT_V2DF_V2DF)
++B_DEF      (vec_all_gtv16qi,            vec_all_gtv16qi,    0,                  B_VX | B_INT,       0,                  BT_FN_INT_V16QI_V16QI)
++B_DEF      (vec_all_gtuv16qi,           vec_all_gtuv16qi,   0,                  B_VX | B_INT,       0,                  BT_FN_INT_UV16QI_UV16QI)
++B_DEF      (vec_all_gtv8hi,             vec_all_gtv8hi,     0,                  B_VX | B_INT,       0,                  BT_FN_INT_V8HI_V8HI)
++B_DEF      (vec_all_gtuv8hi,            vec_all_gtuv8hi,    0,                  B_VX | B_INT,       0,                  BT_FN_INT_UV8HI_UV8HI)
++B_DEF      (vec_all_gtv4si,             vec_all_gtv4si,     0,                  B_VX | B_INT,       0,                  BT_FN_INT_V4SI_V4SI)
++B_DEF      (vec_all_gtuv4si,            vec_all_gtuv4si,    0,                  B_VX | B_INT,       0,                  BT_FN_INT_UV4SI_UV4SI)
++B_DEF      (vec_all_gtv2di,             vec_all_gtv2di,     0,                  B_VX | B_INT,       0,                  BT_FN_INT_V2DI_V2DI)
++B_DEF      (vec_all_gtuv2di,            vec_all_gtuv2di,    0,                  B_VX | B_INT,       0,                  BT_FN_INT_UV2DI_UV2DI)
++B_DEF      (vec_all_gtv2df,             vec_all_gtv2df,     0,                  B_VX | B_INT,       0,                  BT_FN_INT_V2DF_V2DF)
++B_DEF      (vec_all_lev16qi,            vec_all_lev16qi,    0,                  B_VX | B_INT,       0,                  BT_FN_INT_V16QI_V16QI)
++B_DEF      (vec_all_leuv16qi,           vec_all_leuv16qi,   0,                  B_VX | B_INT,       0,                  BT_FN_INT_UV16QI_UV16QI)
++B_DEF      (vec_all_lev8hi,             vec_all_lev8hi,     0,                  B_VX | B_INT,       0,                  BT_FN_INT_V8HI_V8HI)
++B_DEF      (vec_all_leuv8hi,            vec_all_leuv8hi,    0,                  B_VX | B_INT,       0,                  BT_FN_INT_UV8HI_UV8HI)
++B_DEF      (vec_all_lev4si,             vec_all_lev4si,     0,                  B_VX | B_INT,       0,                  BT_FN_INT_V4SI_V4SI)
++B_DEF      (vec_all_leuv4si,            vec_all_leuv4si,    0,                  B_VX | B_INT,       0,                  BT_FN_INT_UV4SI_UV4SI)
++B_DEF      (vec_all_lev2di,             vec_all_lev2di,     0,                  B_VX | B_INT,       0,                  BT_FN_INT_V2DI_V2DI)
++B_DEF      (vec_all_leuv2di,            vec_all_leuv2di,    0,                  B_VX | B_INT,       0,                  BT_FN_INT_UV2DI_UV2DI)
++B_DEF      (vec_all_lev2df,             vec_all_lev2df,     0,                  B_VX | B_INT,       0,                  BT_FN_INT_V2DF_V2DF)
++B_DEF      (vec_all_ltv16qi,            vec_all_ltv16qi,    0,                  B_VX | B_INT,       0,                  BT_FN_INT_V16QI_V16QI)
++B_DEF      (vec_all_ltuv16qi,           vec_all_ltuv16qi,   0,                  B_VX | B_INT,       0,                  BT_FN_INT_UV16QI_UV16QI)
++B_DEF      (vec_all_ltv8hi,             vec_all_ltv8hi,     0,                  B_VX | B_INT,       0,                  BT_FN_INT_V8HI_V8HI)
++B_DEF      (vec_all_ltuv8hi,            vec_all_ltuv8hi,    0,                  B_VX | B_INT,       0,                  BT_FN_INT_UV8HI_UV8HI)
++B_DEF      (vec_all_ltv4si,             vec_all_ltv4si,     0,                  B_VX | B_INT,       0,                  BT_FN_INT_V4SI_V4SI)
++B_DEF      (vec_all_ltuv4si,            vec_all_ltuv4si,    0,                  B_VX | B_INT,       0,                  BT_FN_INT_UV4SI_UV4SI)
++B_DEF      (vec_all_ltv2di,             vec_all_ltv2di,     0,                  B_VX | B_INT,       0,                  BT_FN_INT_V2DI_V2DI)
++B_DEF      (vec_all_ltuv2di,            vec_all_ltuv2di,    0,                  B_VX | B_INT,       0,                  BT_FN_INT_UV2DI_UV2DI)
++B_DEF      (vec_all_ltv2df,             vec_all_ltv2df,     0,                  B_VX | B_INT,       0,                  BT_FN_INT_V2DF_V2DF)
++
++OB_DEF     (s390_vec_all_eq,            s390_vec_all_eq_s8_a,s390_vec_all_eq_dbl,B_VX,              BT_FN_INT_OV4SI_OV4SI)
++OB_DEF_VAR (s390_vec_all_eq_s8_a,       vec_all_eqv16qi,    0,                  BT_OV_INT_V16QI_V16QI)
++OB_DEF_VAR (s390_vec_all_eq_s8_b,       vec_all_eqv16qi,    0,                  BT_OV_INT_V16QI_BV16QI)
++OB_DEF_VAR (s390_vec_all_eq_b8_a,       vec_all_eqv16qi,    0,                  BT_OV_INT_BV16QI_BV16QI)
++OB_DEF_VAR (s390_vec_all_eq_b8_b,       vec_all_eqv16qi,    0,                  BT_OV_INT_BV16QI_V16QI)
++OB_DEF_VAR (s390_vec_all_eq_b8_c,       vec_all_eqv16qi,    0,                  BT_OV_INT_BV16QI_UV16QI)
++OB_DEF_VAR (s390_vec_all_eq_u8_a,       vec_all_eqv16qi,    0,                  BT_OV_INT_UV16QI_UV16QI)
++OB_DEF_VAR (s390_vec_all_eq_u8_b,       vec_all_eqv16qi,    0,                  BT_OV_INT_UV16QI_BV16QI)
++OB_DEF_VAR (s390_vec_all_eq_s16_a,      vec_all_eqv8hi,     0,                  BT_OV_INT_V8HI_V8HI)
++OB_DEF_VAR (s390_vec_all_eq_s16_b,      vec_all_eqv8hi,     0,                  BT_OV_INT_V8HI_BV8HI)
++OB_DEF_VAR (s390_vec_all_eq_b16_a,      vec_all_eqv8hi,     0,                  BT_OV_INT_BV8HI_BV8HI)
++OB_DEF_VAR (s390_vec_all_eq_b16_b,      vec_all_eqv8hi,     0,                  BT_OV_INT_BV8HI_V8HI)
++OB_DEF_VAR (s390_vec_all_eq_b16_c,      vec_all_eqv8hi,     0,                  BT_OV_INT_BV8HI_UV8HI)
++OB_DEF_VAR (s390_vec_all_eq_u16_a,      vec_all_eqv8hi,     0,                  BT_OV_INT_UV8HI_UV8HI)
++OB_DEF_VAR (s390_vec_all_eq_u16_b,      vec_all_eqv8hi,     0,                  BT_OV_INT_UV8HI_BV8HI)
++OB_DEF_VAR (s390_vec_all_eq_s32_a,      vec_all_eqv4si,     0,                  BT_OV_INT_V4SI_V4SI)
++OB_DEF_VAR (s390_vec_all_eq_s32_b,      vec_all_eqv4si,     0,                  BT_OV_INT_V4SI_BV4SI)
++OB_DEF_VAR (s390_vec_all_eq_b32_a,      vec_all_eqv4si,     0,                  BT_OV_INT_BV4SI_BV4SI)
++OB_DEF_VAR (s390_vec_all_eq_b32_b,      vec_all_eqv4si,     0,                  BT_OV_INT_BV4SI_V4SI)
++OB_DEF_VAR (s390_vec_all_eq_b32_c,      vec_all_eqv4si,     0,                  BT_OV_INT_BV4SI_UV4SI)
++OB_DEF_VAR (s390_vec_all_eq_u32_a,      vec_all_eqv4si,     0,                  BT_OV_INT_UV4SI_UV4SI)
++OB_DEF_VAR (s390_vec_all_eq_u32_b,      vec_all_eqv4si,     0,                  BT_OV_INT_UV4SI_BV4SI)
++OB_DEF_VAR (s390_vec_all_eq_s64_a,      vec_all_eqv2di,     0,                  BT_OV_INT_V2DI_V2DI)
++OB_DEF_VAR (s390_vec_all_eq_s64_b,      vec_all_eqv2di,     0,                  BT_OV_INT_V2DI_BV2DI)
++OB_DEF_VAR (s390_vec_all_eq_b64_a,      vec_all_eqv2di,     0,                  BT_OV_INT_BV2DI_BV2DI)
++OB_DEF_VAR (s390_vec_all_eq_b64_b,      vec_all_eqv2di,     0,                  BT_OV_INT_BV2DI_V2DI)
++OB_DEF_VAR (s390_vec_all_eq_b64_c,      vec_all_eqv2di,     0,                  BT_OV_INT_BV2DI_UV2DI)
++OB_DEF_VAR (s390_vec_all_eq_u64_a,      vec_all_eqv2di,     0,                  BT_OV_INT_UV2DI_UV2DI)
++OB_DEF_VAR (s390_vec_all_eq_u64_b,      vec_all_eqv2di,     0,                  BT_OV_INT_UV2DI_BV2DI)
++OB_DEF_VAR (s390_vec_all_eq_dbl,        vec_all_eqv2df,     0,                  BT_OV_INT_V2DF_V2DF)
++
++OB_DEF     (s390_vec_all_ne,            s390_vec_all_ne_s8_a,s390_vec_all_ne_dbl,B_VX,              BT_FN_INT_OV4SI_OV4SI)
++OB_DEF_VAR (s390_vec_all_ne_s8_a,       vec_all_nev16qi,    0,                  BT_OV_INT_V16QI_V16QI)
++OB_DEF_VAR (s390_vec_all_ne_s8_b,       vec_all_nev16qi,    0,                  BT_OV_INT_V16QI_BV16QI)
++OB_DEF_VAR (s390_vec_all_ne_b8_a,       vec_all_nev16qi,    0,                  BT_OV_INT_BV16QI_BV16QI)
++OB_DEF_VAR (s390_vec_all_ne_b8_b,       vec_all_nev16qi,    0,                  BT_OV_INT_BV16QI_V16QI)
++OB_DEF_VAR (s390_vec_all_ne_b8_c,       vec_all_nev16qi,    0,                  BT_OV_INT_BV16QI_UV16QI)
++OB_DEF_VAR (s390_vec_all_ne_u8_a,       vec_all_nev16qi,    0,                  BT_OV_INT_UV16QI_UV16QI)
++OB_DEF_VAR (s390_vec_all_ne_u8_b,       vec_all_nev16qi,    0,                  BT_OV_INT_UV16QI_BV16QI)
++OB_DEF_VAR (s390_vec_all_ne_s16_a,      vec_all_nev8hi,     0,                  BT_OV_INT_V8HI_V8HI)
++OB_DEF_VAR (s390_vec_all_ne_s16_b,      vec_all_nev8hi,     0,                  BT_OV_INT_V8HI_BV8HI)
++OB_DEF_VAR (s390_vec_all_ne_b16_a,      vec_all_nev8hi,     0,                  BT_OV_INT_BV8HI_BV8HI)
++OB_DEF_VAR (s390_vec_all_ne_b16_b,      vec_all_nev8hi,     0,                  BT_OV_INT_BV8HI_V8HI)
++OB_DEF_VAR (s390_vec_all_ne_b16_c,      vec_all_nev8hi,     0,                  BT_OV_INT_BV8HI_UV8HI)
++OB_DEF_VAR (s390_vec_all_ne_u16_a,      vec_all_nev8hi,     0,                  BT_OV_INT_UV8HI_UV8HI)
++OB_DEF_VAR (s390_vec_all_ne_u16_b,      vec_all_nev8hi,     0,                  BT_OV_INT_UV8HI_BV8HI)
++OB_DEF_VAR (s390_vec_all_ne_s32_a,      vec_all_nev4si,     0,                  BT_OV_INT_V4SI_V4SI)
++OB_DEF_VAR (s390_vec_all_ne_s32_b,      vec_all_nev4si,     0,                  BT_OV_INT_V4SI_BV4SI)
++OB_DEF_VAR (s390_vec_all_ne_b32_a,      vec_all_nev4si,     0,                  BT_OV_INT_BV4SI_BV4SI)
++OB_DEF_VAR (s390_vec_all_ne_b32_b,      vec_all_nev4si,     0,                  BT_OV_INT_BV4SI_V4SI)
++OB_DEF_VAR (s390_vec_all_ne_b32_c,      vec_all_nev4si,     0,                  BT_OV_INT_BV4SI_UV4SI)
++OB_DEF_VAR (s390_vec_all_ne_u32_a,      vec_all_nev4si,     0,                  BT_OV_INT_UV4SI_UV4SI)
++OB_DEF_VAR (s390_vec_all_ne_u32_b,      vec_all_nev4si,     0,                  BT_OV_INT_UV4SI_BV4SI)
++OB_DEF_VAR (s390_vec_all_ne_s64_a,      vec_all_nev2di,     0,                  BT_OV_INT_V2DI_V2DI)
++OB_DEF_VAR (s390_vec_all_ne_s64_b,      vec_all_nev2di,     0,                  BT_OV_INT_V2DI_BV2DI)
++OB_DEF_VAR (s390_vec_all_ne_b64_a,      vec_all_nev2di,     0,                  BT_OV_INT_BV2DI_BV2DI)
++OB_DEF_VAR (s390_vec_all_ne_b64_b,      vec_all_nev2di,     0,                  BT_OV_INT_BV2DI_V2DI)
++OB_DEF_VAR (s390_vec_all_ne_b64_c,      vec_all_nev2di,     0,                  BT_OV_INT_BV2DI_UV2DI)
++OB_DEF_VAR (s390_vec_all_ne_u64_a,      vec_all_nev2di,     0,                  BT_OV_INT_UV2DI_UV2DI)
++OB_DEF_VAR (s390_vec_all_ne_u64_b,      vec_all_nev2di,     0,                  BT_OV_INT_UV2DI_BV2DI)
++OB_DEF_VAR (s390_vec_all_ne_dbl,        vec_all_nev2df,     0,                  BT_OV_INT_V2DF_V2DF)
++
++OB_DEF     (s390_vec_all_ge,            s390_vec_all_ge_s8_a,s390_vec_all_ge_dbl,B_VX,              BT_FN_INT_OV4SI_OV4SI)
++OB_DEF_VAR (s390_vec_all_ge_s8_a,       vec_all_gev16qi,    0,                  BT_OV_INT_V16QI_V16QI)
++OB_DEF_VAR (s390_vec_all_ge_s8_b,       vec_all_gev16qi,    0,                  BT_OV_INT_V16QI_BV16QI)
++OB_DEF_VAR (s390_vec_all_ge_b8_a,       vec_all_geuv16qi,   0,                  BT_OV_INT_BV16QI_BV16QI)
++OB_DEF_VAR (s390_vec_all_ge_b8_b,       vec_all_gev16qi,    0,                  BT_OV_INT_BV16QI_V16QI)
++OB_DEF_VAR (s390_vec_all_ge_b8_c,       vec_all_geuv16qi,   0,                  BT_OV_INT_BV16QI_UV16QI)
++OB_DEF_VAR (s390_vec_all_ge_u8_a,       vec_all_geuv16qi,   0,                  BT_OV_INT_UV16QI_UV16QI)
++OB_DEF_VAR (s390_vec_all_ge_u8_b,       vec_all_geuv16qi,   0,                  BT_OV_INT_UV16QI_BV16QI)
++OB_DEF_VAR (s390_vec_all_ge_s16_a,      vec_all_gev8hi,     0,                  BT_OV_INT_V8HI_V8HI)
++OB_DEF_VAR (s390_vec_all_ge_s16_b,      vec_all_gev8hi,     0,                  BT_OV_INT_V8HI_BV8HI)
++OB_DEF_VAR (s390_vec_all_ge_b16_a,      vec_all_geuv8hi,    0,                  BT_OV_INT_BV8HI_BV8HI)
++OB_DEF_VAR (s390_vec_all_ge_b16_b,      vec_all_gev8hi,     0,                  BT_OV_INT_BV8HI_V8HI)
++OB_DEF_VAR (s390_vec_all_ge_b16_c,      vec_all_geuv8hi,    0,                  BT_OV_INT_BV8HI_UV8HI)
++OB_DEF_VAR (s390_vec_all_ge_u16_a,      vec_all_geuv8hi,    0,                  BT_OV_INT_UV8HI_UV8HI)
++OB_DEF_VAR (s390_vec_all_ge_u16_b,      vec_all_geuv8hi,    0,                  BT_OV_INT_UV8HI_BV8HI)
++OB_DEF_VAR (s390_vec_all_ge_s32_a,      vec_all_gev4si,     0,                  BT_OV_INT_V4SI_V4SI)
++OB_DEF_VAR (s390_vec_all_ge_s32_b,      vec_all_gev4si,     0,                  BT_OV_INT_V4SI_BV4SI)
++OB_DEF_VAR (s390_vec_all_ge_b32_a,      vec_all_geuv4si,    0,                  BT_OV_INT_BV4SI_BV4SI)
++OB_DEF_VAR (s390_vec_all_ge_b32_b,      vec_all_gev4si,     0,                  BT_OV_INT_BV4SI_V4SI)
++OB_DEF_VAR (s390_vec_all_ge_b32_c,      vec_all_geuv4si,    0,                  BT_OV_INT_BV4SI_UV4SI)
++OB_DEF_VAR (s390_vec_all_ge_u32_a,      vec_all_geuv4si,    0,                  BT_OV_INT_UV4SI_UV4SI)
++OB_DEF_VAR (s390_vec_all_ge_u32_b,      vec_all_geuv4si,    0,                  BT_OV_INT_UV4SI_BV4SI)
++OB_DEF_VAR (s390_vec_all_ge_s64_a,      vec_all_gev2di,     0,                  BT_OV_INT_V2DI_V2DI)
++OB_DEF_VAR (s390_vec_all_ge_s64_b,      vec_all_gev2di,     0,                  BT_OV_INT_V2DI_BV2DI)
++OB_DEF_VAR (s390_vec_all_ge_b64_a,      vec_all_geuv2di,    0,                  BT_OV_INT_BV2DI_BV2DI)
++OB_DEF_VAR (s390_vec_all_ge_b64_b,      vec_all_gev2di,     0,                  BT_OV_INT_BV2DI_V2DI)
++OB_DEF_VAR (s390_vec_all_ge_b64_c,      vec_all_geuv2di,    0,                  BT_OV_INT_BV2DI_UV2DI)
++OB_DEF_VAR (s390_vec_all_ge_u64_a,      vec_all_geuv2di,    0,                  BT_OV_INT_UV2DI_UV2DI)
++OB_DEF_VAR (s390_vec_all_ge_u64_b,      vec_all_geuv2di,    0,                  BT_OV_INT_UV2DI_BV2DI)
++OB_DEF_VAR (s390_vec_all_ge_dbl,        vec_all_gev2df,     0,                  BT_OV_INT_V2DF_V2DF)
++
++OB_DEF     (s390_vec_all_gt,            s390_vec_all_gt_s8_a,s390_vec_all_gt_dbl,B_VX,              BT_FN_INT_OV4SI_OV4SI)
++OB_DEF_VAR (s390_vec_all_gt_s8_a,       vec_all_gtv16qi,    0,                  BT_OV_INT_V16QI_V16QI)
++OB_DEF_VAR (s390_vec_all_gt_s8_b,       vec_all_gtv16qi,    0,                  BT_OV_INT_V16QI_BV16QI)
++OB_DEF_VAR (s390_vec_all_gt_b8_a,       vec_all_gtuv16qi,   0,                  BT_OV_INT_BV16QI_BV16QI)
++OB_DEF_VAR (s390_vec_all_gt_b8_b,       vec_all_gtv16qi,    0,                  BT_OV_INT_BV16QI_V16QI)
++OB_DEF_VAR (s390_vec_all_gt_b8_c,       vec_all_gtuv16qi,   0,                  BT_OV_INT_BV16QI_UV16QI)
++OB_DEF_VAR (s390_vec_all_gt_u8_a,       vec_all_gtuv16qi,   0,                  BT_OV_INT_UV16QI_UV16QI)
++OB_DEF_VAR (s390_vec_all_gt_u8_b,       vec_all_gtuv16qi,   0,                  BT_OV_INT_UV16QI_BV16QI)
++OB_DEF_VAR (s390_vec_all_gt_s16_a,      vec_all_gtv8hi,     0,                  BT_OV_INT_V8HI_V8HI)
++OB_DEF_VAR (s390_vec_all_gt_s16_b,      vec_all_gtv8hi,     0,                  BT_OV_INT_V8HI_BV8HI)
++OB_DEF_VAR (s390_vec_all_gt_b16_a,      vec_all_gtuv8hi,    0,                  BT_OV_INT_BV8HI_BV8HI)
++OB_DEF_VAR (s390_vec_all_gt_b16_b,      vec_all_gtv8hi,     0,                  BT_OV_INT_BV8HI_V8HI)
++OB_DEF_VAR (s390_vec_all_gt_b16_c,      vec_all_gtuv8hi,    0,                  BT_OV_INT_BV8HI_UV8HI)
++OB_DEF_VAR (s390_vec_all_gt_u16_a,      vec_all_gtuv8hi,    0,                  BT_OV_INT_UV8HI_UV8HI)
++OB_DEF_VAR (s390_vec_all_gt_u16_b,      vec_all_gtuv8hi,    0,                  BT_OV_INT_UV8HI_BV8HI)
++OB_DEF_VAR (s390_vec_all_gt_s32_a,      vec_all_gtv4si,     0,                  BT_OV_INT_V4SI_V4SI)
++OB_DEF_VAR (s390_vec_all_gt_s32_b,      vec_all_gtv4si,     0,                  BT_OV_INT_V4SI_BV4SI)
++OB_DEF_VAR (s390_vec_all_gt_b32_a,      vec_all_gtuv4si,    0,                  BT_OV_INT_BV4SI_BV4SI)
++OB_DEF_VAR (s390_vec_all_gt_b32_b,      vec_all_gtv4si,     0,                  BT_OV_INT_BV4SI_V4SI)
++OB_DEF_VAR (s390_vec_all_gt_b32_c,      vec_all_gtuv4si,    0,                  BT_OV_INT_BV4SI_UV4SI)
++OB_DEF_VAR (s390_vec_all_gt_u32_a,      vec_all_gtuv4si,    0,                  BT_OV_INT_UV4SI_UV4SI)
++OB_DEF_VAR (s390_vec_all_gt_u32_b,      vec_all_gtuv4si,    0,                  BT_OV_INT_UV4SI_BV4SI)
++OB_DEF_VAR (s390_vec_all_gt_s64_a,      vec_all_gtv2di,     0,                  BT_OV_INT_V2DI_V2DI)
++OB_DEF_VAR (s390_vec_all_gt_s64_b,      vec_all_gtv2di,     0,                  BT_OV_INT_V2DI_BV2DI)
++OB_DEF_VAR (s390_vec_all_gt_b64_a,      vec_all_gtuv2di,    0,                  BT_OV_INT_BV2DI_BV2DI)
++OB_DEF_VAR (s390_vec_all_gt_b64_b,      vec_all_gtv2di,     0,                  BT_OV_INT_BV2DI_V2DI)
++OB_DEF_VAR (s390_vec_all_gt_b64_c,      vec_all_gtuv2di,    0,                  BT_OV_INT_BV2DI_UV2DI)
++OB_DEF_VAR (s390_vec_all_gt_u64_a,      vec_all_gtuv2di,    0,                  BT_OV_INT_UV2DI_UV2DI)
++OB_DEF_VAR (s390_vec_all_gt_u64_b,      vec_all_gtuv2di,    0,                  BT_OV_INT_UV2DI_BV2DI)
++OB_DEF_VAR (s390_vec_all_gt_dbl,        vec_all_gtv2df,     0,                  BT_OV_INT_V2DF_V2DF)
++
++OB_DEF     (s390_vec_all_le,            s390_vec_all_le_s8_a,s390_vec_all_le_dbl,B_VX,              BT_FN_INT_OV4SI_OV4SI)
++OB_DEF_VAR (s390_vec_all_le_s8_a,       vec_all_lev16qi,    0,                  BT_OV_INT_V16QI_V16QI)
++OB_DEF_VAR (s390_vec_all_le_s8_b,       vec_all_lev16qi,    0,                  BT_OV_INT_V16QI_BV16QI)
++OB_DEF_VAR (s390_vec_all_le_b8_a,       vec_all_leuv16qi,   0,                  BT_OV_INT_BV16QI_BV16QI)
++OB_DEF_VAR (s390_vec_all_le_b8_b,       vec_all_lev16qi,    0,                  BT_OV_INT_BV16QI_V16QI)
++OB_DEF_VAR (s390_vec_all_le_b8_c,       vec_all_leuv16qi,   0,                  BT_OV_INT_BV16QI_UV16QI)
++OB_DEF_VAR (s390_vec_all_le_u8_a,       vec_all_leuv16qi,   0,                  BT_OV_INT_UV16QI_UV16QI)
++OB_DEF_VAR (s390_vec_all_le_u8_b,       vec_all_leuv16qi,   0,                  BT_OV_INT_UV16QI_BV16QI)
++OB_DEF_VAR (s390_vec_all_le_s16_a,      vec_all_lev8hi,     0,                  BT_OV_INT_V8HI_V8HI)
++OB_DEF_VAR (s390_vec_all_le_s16_b,      vec_all_lev8hi,     0,                  BT_OV_INT_V8HI_BV8HI)
++OB_DEF_VAR (s390_vec_all_le_b16_a,      vec_all_leuv8hi,    0,                  BT_OV_INT_BV8HI_BV8HI)
++OB_DEF_VAR (s390_vec_all_le_b16_b,      vec_all_lev8hi,     0,                  BT_OV_INT_BV8HI_V8HI)
++OB_DEF_VAR (s390_vec_all_le_b16_c,      vec_all_leuv8hi,    0,                  BT_OV_INT_BV8HI_UV8HI)
++OB_DEF_VAR (s390_vec_all_le_u16_a,      vec_all_leuv8hi,    0,                  BT_OV_INT_UV8HI_UV8HI)
++OB_DEF_VAR (s390_vec_all_le_u16_b,      vec_all_leuv8hi,    0,                  BT_OV_INT_UV8HI_BV8HI)
++OB_DEF_VAR (s390_vec_all_le_s32_a,      vec_all_lev4si,     0,                  BT_OV_INT_V4SI_V4SI)
++OB_DEF_VAR (s390_vec_all_le_s32_b,      vec_all_lev4si,     0,                  BT_OV_INT_V4SI_BV4SI)
++OB_DEF_VAR (s390_vec_all_le_b32_a,      vec_all_leuv4si,    0,                  BT_OV_INT_BV4SI_BV4SI)
++OB_DEF_VAR (s390_vec_all_le_b32_b,      vec_all_lev4si,     0,                  BT_OV_INT_BV4SI_V4SI)
++OB_DEF_VAR (s390_vec_all_le_b32_c,      vec_all_leuv4si,    0,                  BT_OV_INT_BV4SI_UV4SI)
++OB_DEF_VAR (s390_vec_all_le_u32_a,      vec_all_leuv4si,    0,                  BT_OV_INT_UV4SI_UV4SI)
++OB_DEF_VAR (s390_vec_all_le_u32_b,      vec_all_leuv4si,    0,                  BT_OV_INT_UV4SI_BV4SI)
++OB_DEF_VAR (s390_vec_all_le_s64_a,      vec_all_lev2di,     0,                  BT_OV_INT_V2DI_V2DI)
++OB_DEF_VAR (s390_vec_all_le_s64_b,      vec_all_lev2di,     0,                  BT_OV_INT_V2DI_BV2DI)
++OB_DEF_VAR (s390_vec_all_le_b64_a,      vec_all_leuv2di,    0,                  BT_OV_INT_BV2DI_BV2DI)
++OB_DEF_VAR (s390_vec_all_le_b64_b,      vec_all_lev2di,     0,                  BT_OV_INT_BV2DI_V2DI)
++OB_DEF_VAR (s390_vec_all_le_b64_c,      vec_all_leuv2di,    0,                  BT_OV_INT_BV2DI_UV2DI)
++OB_DEF_VAR (s390_vec_all_le_u64_a,      vec_all_leuv2di,    0,                  BT_OV_INT_UV2DI_UV2DI)
++OB_DEF_VAR (s390_vec_all_le_u64_b,      vec_all_leuv2di,    0,                  BT_OV_INT_UV2DI_BV2DI)
++OB_DEF_VAR (s390_vec_all_le_dbl,        vec_all_lev2df,     0,                  BT_OV_INT_V2DF_V2DF)
++
++OB_DEF     (s390_vec_all_lt,            s390_vec_all_lt_s8_a,s390_vec_all_lt_dbl,B_VX,              BT_FN_INT_OV4SI_OV4SI)
++OB_DEF_VAR (s390_vec_all_lt_s8_a,       vec_all_ltv16qi,    0,                  BT_OV_INT_V16QI_V16QI)
++OB_DEF_VAR (s390_vec_all_lt_s8_b,       vec_all_ltv16qi,    0,                  BT_OV_INT_V16QI_BV16QI)
++OB_DEF_VAR (s390_vec_all_lt_b8_a,       vec_all_ltuv16qi,   0,                  BT_OV_INT_BV16QI_BV16QI)
++OB_DEF_VAR (s390_vec_all_lt_b8_b,       vec_all_ltv16qi,    0,                  BT_OV_INT_BV16QI_V16QI)
++OB_DEF_VAR (s390_vec_all_lt_b8_c,       vec_all_ltuv16qi,   0,                  BT_OV_INT_BV16QI_UV16QI)
++OB_DEF_VAR (s390_vec_all_lt_u8_a,       vec_all_ltuv16qi,   0,                  BT_OV_INT_UV16QI_UV16QI)
++OB_DEF_VAR (s390_vec_all_lt_u8_b,       vec_all_ltuv16qi,   0,                  BT_OV_INT_UV16QI_BV16QI)
++OB_DEF_VAR (s390_vec_all_lt_s16_a,      vec_all_ltv8hi,     0,                  BT_OV_INT_V8HI_V8HI)
++OB_DEF_VAR (s390_vec_all_lt_s16_b,      vec_all_ltv8hi,     0,                  BT_OV_INT_V8HI_BV8HI)
++OB_DEF_VAR (s390_vec_all_lt_b16_a,      vec_all_ltuv8hi,    0,                  BT_OV_INT_BV8HI_BV8HI)
++OB_DEF_VAR (s390_vec_all_lt_b16_b,      vec_all_ltv8hi,     0,                  BT_OV_INT_BV8HI_V8HI)
++OB_DEF_VAR (s390_vec_all_lt_b16_c,      vec_all_ltuv8hi,    0,                  BT_OV_INT_BV8HI_UV8HI)
++OB_DEF_VAR (s390_vec_all_lt_u16_a,      vec_all_ltuv8hi,    0,                  BT_OV_INT_UV8HI_UV8HI)
++OB_DEF_VAR (s390_vec_all_lt_u16_b,      vec_all_ltuv8hi,    0,                  BT_OV_INT_UV8HI_BV8HI)
++OB_DEF_VAR (s390_vec_all_lt_s32_a,      vec_all_ltv4si,     0,                  BT_OV_INT_V4SI_V4SI)
++OB_DEF_VAR (s390_vec_all_lt_s32_b,      vec_all_ltv4si,     0,                  BT_OV_INT_V4SI_BV4SI)
++OB_DEF_VAR (s390_vec_all_lt_b32_a,      vec_all_ltuv4si,    0,                  BT_OV_INT_BV4SI_BV4SI)
++OB_DEF_VAR (s390_vec_all_lt_b32_b,      vec_all_ltv4si,     0,                  BT_OV_INT_BV4SI_V4SI)
++OB_DEF_VAR (s390_vec_all_lt_b32_c,      vec_all_ltuv4si,    0,                  BT_OV_INT_BV4SI_UV4SI)
++OB_DEF_VAR (s390_vec_all_lt_u32_a,      vec_all_ltuv4si,    0,                  BT_OV_INT_UV4SI_UV4SI)
++OB_DEF_VAR (s390_vec_all_lt_u32_b,      vec_all_ltuv4si,    0,                  BT_OV_INT_UV4SI_BV4SI)
++OB_DEF_VAR (s390_vec_all_lt_s64_a,      vec_all_ltv2di,     0,                  BT_OV_INT_V2DI_V2DI)
++OB_DEF_VAR (s390_vec_all_lt_s64_b,      vec_all_ltv2di,     0,                  BT_OV_INT_V2DI_BV2DI)
++OB_DEF_VAR (s390_vec_all_lt_b64_a,      vec_all_ltuv2di,    0,                  BT_OV_INT_BV2DI_BV2DI)
++OB_DEF_VAR (s390_vec_all_lt_b64_b,      vec_all_ltv2di,     0,                  BT_OV_INT_BV2DI_V2DI)
++OB_DEF_VAR (s390_vec_all_lt_b64_c,      vec_all_ltuv2di,    0,                  BT_OV_INT_BV2DI_UV2DI)
++OB_DEF_VAR (s390_vec_all_lt_u64_a,      vec_all_ltuv2di,    0,                  BT_OV_INT_UV2DI_UV2DI)
++OB_DEF_VAR (s390_vec_all_lt_u64_b,      vec_all_ltuv2di,    0,                  BT_OV_INT_UV2DI_BV2DI)
++OB_DEF_VAR (s390_vec_all_lt_dbl,        vec_all_ltv2df,     0,                  BT_OV_INT_V2DF_V2DF)
++
++B_DEF      (vec_any_eqv16qi,            vec_any_eqv16qi,    0,                  B_VX | B_INT,       0,                  BT_FN_INT_UV16QI_UV16QI)
++B_DEF      (vec_any_eqv8hi,             vec_any_eqv8hi,     0,                  B_VX | B_INT,       0,                  BT_FN_INT_UV8HI_UV8HI)
++B_DEF      (vec_any_eqv4si,             vec_any_eqv4si,     0,                  B_VX | B_INT,       0,                  BT_FN_INT_UV4SI_UV4SI)
++B_DEF      (vec_any_eqv2di,             vec_any_eqv2di,     0,                  B_VX | B_INT,       0,                  BT_FN_INT_UV2DI_UV2DI)
++B_DEF      (vec_any_eqv2df,             vec_any_eqv2df,     0,                  B_VX | B_INT,       0,                  BT_FN_INT_V2DF_V2DF)
++B_DEF      (vec_any_nev16qi,            vec_any_nev16qi,    0,                  B_VX | B_INT,       0,                  BT_FN_INT_UV16QI_UV16QI)
++B_DEF      (vec_any_nev8hi,             vec_any_nev8hi,     0,                  B_VX | B_INT,       0,                  BT_FN_INT_UV8HI_UV8HI)
++B_DEF      (vec_any_nev4si,             vec_any_nev4si,     0,                  B_VX | B_INT,       0,                  BT_FN_INT_UV4SI_UV4SI)
++B_DEF      (vec_any_nev2di,             vec_any_nev2di,     0,                  B_VX | B_INT,       0,                  BT_FN_INT_UV2DI_UV2DI)
++B_DEF      (vec_any_nev2df,             vec_any_nev2df,     0,                  B_VX | B_INT,       0,                  BT_FN_INT_V2DF_V2DF)
++B_DEF      (vec_any_gev16qi,            vec_any_gev16qi,    0,                  B_VX | B_INT,       0,                  BT_FN_INT_V16QI_V16QI)
++B_DEF      (vec_any_geuv16qi,           vec_any_geuv16qi,   0,                  B_VX | B_INT,       0,                  BT_FN_INT_UV16QI_UV16QI)
++B_DEF      (vec_any_gev8hi,             vec_any_gev8hi,     0,                  B_VX | B_INT,       0,                  BT_FN_INT_V8HI_V8HI)
++B_DEF      (vec_any_geuv8hi,            vec_any_geuv8hi,    0,                  B_VX | B_INT,       0,                  BT_FN_INT_UV8HI_UV8HI)
++B_DEF      (vec_any_gev4si,             vec_any_gev4si,     0,                  B_VX | B_INT,       0,                  BT_FN_INT_V4SI_V4SI)
++B_DEF      (vec_any_geuv4si,            vec_any_geuv4si,    0,                  B_VX | B_INT,       0,                  BT_FN_INT_UV4SI_UV4SI)
++B_DEF      (vec_any_gev2di,             vec_any_gev2di,     0,                  B_VX | B_INT,       0,                  BT_FN_INT_V2DI_V2DI)
++B_DEF      (vec_any_geuv2di,            vec_any_geuv2di,    0,                  B_VX | B_INT,       0,                  BT_FN_INT_UV2DI_UV2DI)
++B_DEF      (vec_any_gev2df,             vec_any_gev2df,     0,                  B_VX | B_INT,       0,                  BT_FN_INT_V2DF_V2DF)
++B_DEF      (vec_any_gtv16qi,            vec_any_gtv16qi,    0,                  B_VX | B_INT,       0,                  BT_FN_INT_V16QI_V16QI)
++B_DEF      (vec_any_gtuv16qi,           vec_any_gtuv16qi,   0,                  B_VX | B_INT,       0,                  BT_FN_INT_UV16QI_UV16QI)
++B_DEF      (vec_any_gtv8hi,             vec_any_gtv8hi,     0,                  B_VX | B_INT,       0,                  BT_FN_INT_V8HI_V8HI)
++B_DEF      (vec_any_gtuv8hi,            vec_any_gtuv8hi,    0,                  B_VX | B_INT,       0,                  BT_FN_INT_UV8HI_UV8HI)
++B_DEF      (vec_any_gtv4si,             vec_any_gtv4si,     0,                  B_VX | B_INT,       0,                  BT_FN_INT_V4SI_V4SI)
++B_DEF      (vec_any_gtuv4si,            vec_any_gtuv4si,    0,                  B_VX | B_INT,       0,                  BT_FN_INT_UV4SI_UV4SI)
++B_DEF      (vec_any_gtv2di,             vec_any_gtv2di,     0,                  B_VX | B_INT,       0,                  BT_FN_INT_V2DI_V2DI)
++B_DEF      (vec_any_gtuv2di,            vec_any_gtuv2di,    0,                  B_VX | B_INT,       0,                  BT_FN_INT_UV2DI_UV2DI)
++B_DEF      (vec_any_gtv2df,             vec_any_gtv2df,     0,                  B_VX | B_INT,       0,                  BT_FN_INT_V2DF_V2DF)
++B_DEF      (vec_any_lev16qi,            vec_any_lev16qi,    0,                  B_VX | B_INT,       0,                  BT_FN_INT_V16QI_V16QI)
++B_DEF      (vec_any_leuv16qi,           vec_any_leuv16qi,   0,                  B_VX | B_INT,       0,                  BT_FN_INT_UV16QI_UV16QI)
++B_DEF      (vec_any_lev8hi,             vec_any_lev8hi,     0,                  B_VX | B_INT,       0,                  BT_FN_INT_V8HI_V8HI)
++B_DEF      (vec_any_leuv8hi,            vec_any_leuv8hi,    0,                  B_VX | B_INT,       0,                  BT_FN_INT_UV8HI_UV8HI)
++B_DEF      (vec_any_lev4si,             vec_any_lev4si,     0,                  B_VX | B_INT,       0,                  BT_FN_INT_V4SI_V4SI)
++B_DEF      (vec_any_leuv4si,            vec_any_leuv4si,    0,                  B_VX | B_INT,       0,                  BT_FN_INT_UV4SI_UV4SI)
++B_DEF      (vec_any_lev2di,             vec_any_lev2di,     0,                  B_VX | B_INT,       0,                  BT_FN_INT_V2DI_V2DI)
++B_DEF      (vec_any_leuv2di,            vec_any_leuv2di,    0,                  B_VX | B_INT,       0,                  BT_FN_INT_UV2DI_UV2DI)
++B_DEF      (vec_any_lev2df,             vec_any_lev2df,     0,                  B_VX | B_INT,       0,                  BT_FN_INT_V2DF_V2DF)
++B_DEF      (vec_any_ltv16qi,            vec_any_ltv16qi,    0,                  B_VX | B_INT,       0,                  BT_FN_INT_V16QI_V16QI)
++B_DEF      (vec_any_ltuv16qi,           vec_any_ltuv16qi,   0,                  B_VX | B_INT,       0,                  BT_FN_INT_UV16QI_UV16QI)
++B_DEF      (vec_any_ltv8hi,             vec_any_ltv8hi,     0,                  B_VX | B_INT,       0,                  BT_FN_INT_V8HI_V8HI)
++B_DEF      (vec_any_ltuv8hi,            vec_any_ltuv8hi,    0,                  B_VX | B_INT,       0,                  BT_FN_INT_UV8HI_UV8HI)
++B_DEF      (vec_any_ltv4si,             vec_any_ltv4si,     0,                  B_VX | B_INT,       0,                  BT_FN_INT_V4SI_V4SI)
++B_DEF      (vec_any_ltuv4si,            vec_any_ltuv4si,    0,                  B_VX | B_INT,       0,                  BT_FN_INT_UV4SI_UV4SI)
++B_DEF      (vec_any_ltv2di,             vec_any_ltv2di,     0,                  B_VX | B_INT,       0,                  BT_FN_INT_V2DI_V2DI)
++B_DEF      (vec_any_ltuv2di,            vec_any_ltuv2di,    0,                  B_VX | B_INT,       0,                  BT_FN_INT_UV2DI_UV2DI)
++B_DEF      (vec_any_ltv2df,             vec_any_ltv2df,     0,                  B_VX | B_INT,       0,                  BT_FN_INT_V2DF_V2DF)
++
++OB_DEF     (s390_vec_any_eq,            s390_vec_any_eq_s8_a,s390_vec_any_eq_dbl,B_VX,              BT_FN_INT_OV4SI_OV4SI)
++OB_DEF_VAR (s390_vec_any_eq_s8_a,       vec_any_eqv16qi,    0,                  BT_OV_INT_V16QI_V16QI)
++OB_DEF_VAR (s390_vec_any_eq_s8_b,       vec_any_eqv16qi,    0,                  BT_OV_INT_V16QI_BV16QI)
++OB_DEF_VAR (s390_vec_any_eq_b8_a,       vec_any_eqv16qi,    0,                  BT_OV_INT_BV16QI_BV16QI)
++OB_DEF_VAR (s390_vec_any_eq_b8_b,       vec_any_eqv16qi,    0,                  BT_OV_INT_BV16QI_V16QI)
++OB_DEF_VAR (s390_vec_any_eq_b8_c,       vec_any_eqv16qi,    0,                  BT_OV_INT_BV16QI_UV16QI)
++OB_DEF_VAR (s390_vec_any_eq_u8_a,       vec_any_eqv16qi,    0,                  BT_OV_INT_UV16QI_UV16QI)
++OB_DEF_VAR (s390_vec_any_eq_u8_b,       vec_any_eqv16qi,    0,                  BT_OV_INT_UV16QI_BV16QI)
++OB_DEF_VAR (s390_vec_any_eq_s16_a,      vec_any_eqv8hi,     0,                  BT_OV_INT_V8HI_V8HI)
++OB_DEF_VAR (s390_vec_any_eq_s16_b,      vec_any_eqv8hi,     0,                  BT_OV_INT_V8HI_BV8HI)
++OB_DEF_VAR (s390_vec_any_eq_b16_a,      vec_any_eqv8hi,     0,                  BT_OV_INT_BV8HI_BV8HI)
++OB_DEF_VAR (s390_vec_any_eq_b16_b,      vec_any_eqv8hi,     0,                  BT_OV_INT_BV8HI_V8HI)
++OB_DEF_VAR (s390_vec_any_eq_b16_c,      vec_any_eqv8hi,     0,                  BT_OV_INT_BV8HI_UV8HI)
++OB_DEF_VAR (s390_vec_any_eq_u16_a,      vec_any_eqv8hi,     0,                  BT_OV_INT_UV8HI_UV8HI)
++OB_DEF_VAR (s390_vec_any_eq_u16_b,      vec_any_eqv8hi,     0,                  BT_OV_INT_UV8HI_BV8HI)
++OB_DEF_VAR (s390_vec_any_eq_s32_a,      vec_any_eqv4si,     0,                  BT_OV_INT_V4SI_V4SI)
++OB_DEF_VAR (s390_vec_any_eq_s32_b,      vec_any_eqv4si,     0,                  BT_OV_INT_V4SI_BV4SI)
++OB_DEF_VAR (s390_vec_any_eq_b32_a,      vec_any_eqv4si,     0,                  BT_OV_INT_BV4SI_BV4SI)
++OB_DEF_VAR (s390_vec_any_eq_b32_b,      vec_any_eqv4si,     0,                  BT_OV_INT_BV4SI_V4SI)
++OB_DEF_VAR (s390_vec_any_eq_b32_c,      vec_any_eqv4si,     0,                  BT_OV_INT_BV4SI_UV4SI)
++OB_DEF_VAR (s390_vec_any_eq_u32_a,      vec_any_eqv4si,     0,                  BT_OV_INT_UV4SI_UV4SI)
++OB_DEF_VAR (s390_vec_any_eq_u32_b,      vec_any_eqv4si,     0,                  BT_OV_INT_UV4SI_BV4SI)
++OB_DEF_VAR (s390_vec_any_eq_s64_a,      vec_any_eqv2di,     0,                  BT_OV_INT_V2DI_V2DI)
++OB_DEF_VAR (s390_vec_any_eq_s64_b,      vec_any_eqv2di,     0,                  BT_OV_INT_V2DI_BV2DI)
++OB_DEF_VAR (s390_vec_any_eq_b64_a,      vec_any_eqv2di,     0,                  BT_OV_INT_BV2DI_BV2DI)
++OB_DEF_VAR (s390_vec_any_eq_b64_b,      vec_any_eqv2di,     0,                  BT_OV_INT_BV2DI_V2DI)
++OB_DEF_VAR (s390_vec_any_eq_b64_c,      vec_any_eqv2di,     0,                  BT_OV_INT_BV2DI_UV2DI)
++OB_DEF_VAR (s390_vec_any_eq_u64_a,      vec_any_eqv2di,     0,                  BT_OV_INT_UV2DI_UV2DI)
++OB_DEF_VAR (s390_vec_any_eq_u64_b,      vec_any_eqv2di,     0,                  BT_OV_INT_UV2DI_BV2DI)
++OB_DEF_VAR (s390_vec_any_eq_dbl,        vec_any_eqv2df,     0,                  BT_OV_INT_V2DF_V2DF)
++
++OB_DEF     (s390_vec_any_ne,            s390_vec_any_ne_s8_a,s390_vec_any_ne_dbl,B_VX,              BT_FN_INT_OV4SI_OV4SI)
++OB_DEF_VAR (s390_vec_any_ne_s8_a,       vec_any_nev16qi,    0,                  BT_OV_INT_V16QI_V16QI)
++OB_DEF_VAR (s390_vec_any_ne_s8_b,       vec_any_nev16qi,    0,                  BT_OV_INT_V16QI_BV16QI)
++OB_DEF_VAR (s390_vec_any_ne_b8_a,       vec_any_nev16qi,    0,                  BT_OV_INT_BV16QI_BV16QI)
++OB_DEF_VAR (s390_vec_any_ne_b8_b,       vec_any_nev16qi,    0,                  BT_OV_INT_BV16QI_V16QI)
++OB_DEF_VAR (s390_vec_any_ne_b8_c,       vec_any_nev16qi,    0,                  BT_OV_INT_BV16QI_UV16QI)
++OB_DEF_VAR (s390_vec_any_ne_u8_a,       vec_any_nev16qi,    0,                  BT_OV_INT_UV16QI_UV16QI)
++OB_DEF_VAR (s390_vec_any_ne_u8_b,       vec_any_nev16qi,    0,                  BT_OV_INT_UV16QI_BV16QI)
++OB_DEF_VAR (s390_vec_any_ne_s16_a,      vec_any_nev8hi,     0,                  BT_OV_INT_V8HI_V8HI)
++OB_DEF_VAR (s390_vec_any_ne_s16_b,      vec_any_nev8hi,     0,                  BT_OV_INT_V8HI_BV8HI)
++OB_DEF_VAR (s390_vec_any_ne_b16_a,      vec_any_nev8hi,     0,                  BT_OV_INT_BV8HI_BV8HI)
++OB_DEF_VAR (s390_vec_any_ne_b16_b,      vec_any_nev8hi,     0,                  BT_OV_INT_BV8HI_V8HI)
++OB_DEF_VAR (s390_vec_any_ne_b16_c,      vec_any_nev8hi,     0,                  BT_OV_INT_BV8HI_UV8HI)
++OB_DEF_VAR (s390_vec_any_ne_u16_a,      vec_any_nev8hi,     0,                  BT_OV_INT_UV8HI_UV8HI)
++OB_DEF_VAR (s390_vec_any_ne_u16_b,      vec_any_nev8hi,     0,                  BT_OV_INT_UV8HI_BV8HI)
++OB_DEF_VAR (s390_vec_any_ne_s32_a,      vec_any_nev4si,     0,                  BT_OV_INT_V4SI_V4SI)
++OB_DEF_VAR (s390_vec_any_ne_s32_b,      vec_any_nev4si,     0,                  BT_OV_INT_V4SI_BV4SI)
++OB_DEF_VAR (s390_vec_any_ne_b32_a,      vec_any_nev4si,     0,                  BT_OV_INT_BV4SI_BV4SI)
++OB_DEF_VAR (s390_vec_any_ne_b32_b,      vec_any_nev4si,     0,                  BT_OV_INT_BV4SI_V4SI)
++OB_DEF_VAR (s390_vec_any_ne_b32_c,      vec_any_nev4si,     0,                  BT_OV_INT_BV4SI_UV4SI)
++OB_DEF_VAR (s390_vec_any_ne_u32_a,      vec_any_nev4si,     0,                  BT_OV_INT_UV4SI_UV4SI)
++OB_DEF_VAR (s390_vec_any_ne_u32_b,      vec_any_nev4si,     0,                  BT_OV_INT_UV4SI_BV4SI)
++OB_DEF_VAR (s390_vec_any_ne_s64_a,      vec_any_nev2di,     0,                  BT_OV_INT_V2DI_V2DI)
++OB_DEF_VAR (s390_vec_any_ne_s64_b,      vec_any_nev2di,     0,                  BT_OV_INT_V2DI_BV2DI)
++OB_DEF_VAR (s390_vec_any_ne_b64_a,      vec_any_nev2di,     0,                  BT_OV_INT_BV2DI_BV2DI)
++OB_DEF_VAR (s390_vec_any_ne_b64_b,      vec_any_nev2di,     0,                  BT_OV_INT_BV2DI_V2DI)
++OB_DEF_VAR (s390_vec_any_ne_b64_c,      vec_any_nev2di,     0,                  BT_OV_INT_BV2DI_UV2DI)
++OB_DEF_VAR (s390_vec_any_ne_u64_a,      vec_any_nev2di,     0,                  BT_OV_INT_UV2DI_UV2DI)
++OB_DEF_VAR (s390_vec_any_ne_u64_b,      vec_any_nev2di,     0,                  BT_OV_INT_UV2DI_BV2DI)
++OB_DEF_VAR (s390_vec_any_ne_dbl,        vec_any_nev2df,     0,                  BT_OV_INT_V2DF_V2DF)
++
++OB_DEF     (s390_vec_any_ge,            s390_vec_any_ge_s8_a,s390_vec_any_ge_dbl,B_VX,              BT_FN_INT_OV4SI_OV4SI)
++OB_DEF_VAR (s390_vec_any_ge_s8_a,       vec_any_gev16qi,    0,                  BT_OV_INT_V16QI_V16QI)
++OB_DEF_VAR (s390_vec_any_ge_s8_b,       vec_any_gev16qi,    0,                  BT_OV_INT_V16QI_BV16QI)
++OB_DEF_VAR (s390_vec_any_ge_b8_a,       vec_any_geuv16qi,   0,                  BT_OV_INT_BV16QI_BV16QI)
++OB_DEF_VAR (s390_vec_any_ge_b8_b,       vec_any_gev16qi,    0,                  BT_OV_INT_BV16QI_V16QI)
++OB_DEF_VAR (s390_vec_any_ge_b8_c,       vec_any_geuv16qi,   0,                  BT_OV_INT_BV16QI_UV16QI)
++OB_DEF_VAR (s390_vec_any_ge_u8_a,       vec_any_geuv16qi,   0,                  BT_OV_INT_UV16QI_UV16QI)
++OB_DEF_VAR (s390_vec_any_ge_u8_b,       vec_any_geuv16qi,   0,                  BT_OV_INT_UV16QI_BV16QI)
++OB_DEF_VAR (s390_vec_any_ge_s16_a,      vec_any_gev8hi,     0,                  BT_OV_INT_V8HI_V8HI)
++OB_DEF_VAR (s390_vec_any_ge_s16_b,      vec_any_gev8hi,     0,                  BT_OV_INT_V8HI_BV8HI)
++OB_DEF_VAR (s390_vec_any_ge_b16_a,      vec_any_geuv8hi,    0,                  BT_OV_INT_BV8HI_BV8HI)
++OB_DEF_VAR (s390_vec_any_ge_b16_b,      vec_any_gev8hi,     0,                  BT_OV_INT_BV8HI_V8HI)
++OB_DEF_VAR (s390_vec_any_ge_b16_c,      vec_any_geuv8hi,    0,                  BT_OV_INT_BV8HI_UV8HI)
++OB_DEF_VAR (s390_vec_any_ge_u16_a,      vec_any_geuv8hi,    0,                  BT_OV_INT_UV8HI_UV8HI)
++OB_DEF_VAR (s390_vec_any_ge_u16_b,      vec_any_geuv8hi,    0,                  BT_OV_INT_UV8HI_BV8HI)
++OB_DEF_VAR (s390_vec_any_ge_s32_a,      vec_any_gev4si,     0,                  BT_OV_INT_V4SI_V4SI)
++OB_DEF_VAR (s390_vec_any_ge_s32_b,      vec_any_gev4si,     0,                  BT_OV_INT_V4SI_BV4SI)
++OB_DEF_VAR (s390_vec_any_ge_b32_a,      vec_any_geuv4si,    0,                  BT_OV_INT_BV4SI_BV4SI)
++OB_DEF_VAR (s390_vec_any_ge_b32_b,      vec_any_gev4si,     0,                  BT_OV_INT_BV4SI_V4SI)
++OB_DEF_VAR (s390_vec_any_ge_b32_c,      vec_any_geuv4si,    0,                  BT_OV_INT_BV4SI_UV4SI)
++OB_DEF_VAR (s390_vec_any_ge_u32_a,      vec_any_geuv4si,    0,                  BT_OV_INT_UV4SI_UV4SI)
++OB_DEF_VAR (s390_vec_any_ge_u32_b,      vec_any_geuv4si,    0,                  BT_OV_INT_UV4SI_BV4SI)
++OB_DEF_VAR (s390_vec_any_ge_s64_a,      vec_any_gev2di,     0,                  BT_OV_INT_V2DI_V2DI)
++OB_DEF_VAR (s390_vec_any_ge_s64_b,      vec_any_gev2di,     0,                  BT_OV_INT_V2DI_BV2DI)
++OB_DEF_VAR (s390_vec_any_ge_b64_a,      vec_any_geuv2di,    0,                  BT_OV_INT_BV2DI_BV2DI)
++OB_DEF_VAR (s390_vec_any_ge_b64_b,      vec_any_gev2di,     0,                  BT_OV_INT_BV2DI_V2DI)
++OB_DEF_VAR (s390_vec_any_ge_b64_c,      vec_any_geuv2di,    0,                  BT_OV_INT_BV2DI_UV2DI)
++OB_DEF_VAR (s390_vec_any_ge_u64_a,      vec_any_geuv2di,    0,                  BT_OV_INT_UV2DI_UV2DI)
++OB_DEF_VAR (s390_vec_any_ge_u64_b,      vec_any_geuv2di,    0,                  BT_OV_INT_UV2DI_BV2DI)
++OB_DEF_VAR (s390_vec_any_ge_dbl,        vec_any_gev2df,     0,                  BT_OV_INT_V2DF_V2DF)
++
++OB_DEF     (s390_vec_any_gt,            s390_vec_any_gt_s8_a,s390_vec_any_gt_dbl,B_VX,              BT_FN_INT_OV4SI_OV4SI)
++OB_DEF_VAR (s390_vec_any_gt_s8_a,       vec_any_gtv16qi,    0,                  BT_OV_INT_V16QI_V16QI)
++OB_DEF_VAR (s390_vec_any_gt_s8_b,       vec_any_gtv16qi,    0,                  BT_OV_INT_V16QI_BV16QI)
++OB_DEF_VAR (s390_vec_any_gt_b8_a,       vec_any_gtuv16qi,   0,                  BT_OV_INT_BV16QI_BV16QI)
++OB_DEF_VAR (s390_vec_any_gt_b8_b,       vec_any_gtv16qi,    0,                  BT_OV_INT_BV16QI_V16QI)
++OB_DEF_VAR (s390_vec_any_gt_b8_c,       vec_any_gtuv16qi,   0,                  BT_OV_INT_BV16QI_UV16QI)
++OB_DEF_VAR (s390_vec_any_gt_u8_a,       vec_any_gtuv16qi,   0,                  BT_OV_INT_UV16QI_UV16QI)
++OB_DEF_VAR (s390_vec_any_gt_u8_b,       vec_any_gtuv16qi,   0,                  BT_OV_INT_UV16QI_BV16QI)
++OB_DEF_VAR (s390_vec_any_gt_s16_a,      vec_any_gtv8hi,     0,                  BT_OV_INT_V8HI_V8HI)
++OB_DEF_VAR (s390_vec_any_gt_s16_b,      vec_any_gtv8hi,     0,                  BT_OV_INT_V8HI_BV8HI)
++OB_DEF_VAR (s390_vec_any_gt_b16_a,      vec_any_gtuv8hi,    0,                  BT_OV_INT_BV8HI_BV8HI)
++OB_DEF_VAR (s390_vec_any_gt_b16_b,      vec_any_gtv8hi,     0,                  BT_OV_INT_BV8HI_V8HI)
++OB_DEF_VAR (s390_vec_any_gt_b16_c,      vec_any_gtuv8hi,    0,                  BT_OV_INT_BV8HI_UV8HI)
++OB_DEF_VAR (s390_vec_any_gt_u16_a,      vec_any_gtuv8hi,    0,                  BT_OV_INT_UV8HI_UV8HI)
++OB_DEF_VAR (s390_vec_any_gt_u16_b,      vec_any_gtuv8hi,    0,                  BT_OV_INT_UV8HI_BV8HI)
++OB_DEF_VAR (s390_vec_any_gt_s32_a,      vec_any_gtv4si,     0,                  BT_OV_INT_V4SI_V4SI)
++OB_DEF_VAR (s390_vec_any_gt_s32_b,      vec_any_gtv4si,     0,                  BT_OV_INT_V4SI_BV4SI)
++OB_DEF_VAR (s390_vec_any_gt_b32_a,      vec_any_gtuv4si,    0,                  BT_OV_INT_BV4SI_BV4SI)
++OB_DEF_VAR (s390_vec_any_gt_b32_b,      vec_any_gtv4si,     0,                  BT_OV_INT_BV4SI_V4SI)
++OB_DEF_VAR (s390_vec_any_gt_b32_c,      vec_any_gtuv4si,    0,                  BT_OV_INT_BV4SI_UV4SI)
++OB_DEF_VAR (s390_vec_any_gt_u32_a,      vec_any_gtuv4si,    0,                  BT_OV_INT_UV4SI_UV4SI)
++OB_DEF_VAR (s390_vec_any_gt_u32_b,      vec_any_gtuv4si,    0,                  BT_OV_INT_UV4SI_BV4SI)
++OB_DEF_VAR (s390_vec_any_gt_s64_a,      vec_any_gtv2di,     0,                  BT_OV_INT_V2DI_V2DI)
++OB_DEF_VAR (s390_vec_any_gt_s64_b,      vec_any_gtv2di,     0,                  BT_OV_INT_V2DI_BV2DI)
++OB_DEF_VAR (s390_vec_any_gt_b64_a,      vec_any_gtuv2di,    0,                  BT_OV_INT_BV2DI_BV2DI)
++OB_DEF_VAR (s390_vec_any_gt_b64_b,      vec_any_gtv2di,     0,                  BT_OV_INT_BV2DI_V2DI)
++OB_DEF_VAR (s390_vec_any_gt_b64_c,      vec_any_gtuv2di,    0,                  BT_OV_INT_BV2DI_UV2DI)
++OB_DEF_VAR (s390_vec_any_gt_u64_a,      vec_any_gtuv2di,    0,                  BT_OV_INT_UV2DI_UV2DI)
++OB_DEF_VAR (s390_vec_any_gt_u64_b,      vec_any_gtuv2di,    0,                  BT_OV_INT_UV2DI_BV2DI)
++OB_DEF_VAR (s390_vec_any_gt_dbl,        vec_any_gtv2df,     0,                  BT_OV_INT_V2DF_V2DF)
++
++OB_DEF     (s390_vec_any_le,            s390_vec_any_le_s8_a,s390_vec_any_le_dbl,B_VX,              BT_FN_INT_OV4SI_OV4SI)
++OB_DEF_VAR (s390_vec_any_le_s8_a,       vec_any_lev16qi,    0,                  BT_OV_INT_V16QI_V16QI)
++OB_DEF_VAR (s390_vec_any_le_s8_b,       vec_any_lev16qi,    0,                  BT_OV_INT_V16QI_BV16QI)
++OB_DEF_VAR (s390_vec_any_le_b8_a,       vec_any_leuv16qi,   0,                  BT_OV_INT_BV16QI_BV16QI)
++OB_DEF_VAR (s390_vec_any_le_b8_b,       vec_any_lev16qi,    0,                  BT_OV_INT_BV16QI_V16QI)
++OB_DEF_VAR (s390_vec_any_le_b8_c,       vec_any_leuv16qi,   0,                  BT_OV_INT_BV16QI_UV16QI)
++OB_DEF_VAR (s390_vec_any_le_u8_a,       vec_any_leuv16qi,   0,                  BT_OV_INT_UV16QI_UV16QI)
++OB_DEF_VAR (s390_vec_any_le_u8_b,       vec_any_leuv16qi,   0,                  BT_OV_INT_UV16QI_BV16QI)
++OB_DEF_VAR (s390_vec_any_le_s16_a,      vec_any_lev8hi,     0,                  BT_OV_INT_V8HI_V8HI)
++OB_DEF_VAR (s390_vec_any_le_s16_b,      vec_any_lev8hi,     0,                  BT_OV_INT_V8HI_BV8HI)
++OB_DEF_VAR (s390_vec_any_le_b16_a,      vec_any_leuv8hi,    0,                  BT_OV_INT_BV8HI_BV8HI)
++OB_DEF_VAR (s390_vec_any_le_b16_b,      vec_any_lev8hi,     0,                  BT_OV_INT_BV8HI_V8HI)
++OB_DEF_VAR (s390_vec_any_le_b16_c,      vec_any_leuv8hi,    0,                  BT_OV_INT_BV8HI_UV8HI)
++OB_DEF_VAR (s390_vec_any_le_u16_a,      vec_any_leuv8hi,    0,                  BT_OV_INT_UV8HI_UV8HI)
++OB_DEF_VAR (s390_vec_any_le_u16_b,      vec_any_leuv8hi,    0,                  BT_OV_INT_UV8HI_BV8HI)
++OB_DEF_VAR (s390_vec_any_le_s32_a,      vec_any_lev4si,     0,                  BT_OV_INT_V4SI_V4SI)
++OB_DEF_VAR (s390_vec_any_le_s32_b,      vec_any_lev4si,     0,                  BT_OV_INT_V4SI_BV4SI)
++OB_DEF_VAR (s390_vec_any_le_b32_a,      vec_any_leuv4si,    0,                  BT_OV_INT_BV4SI_BV4SI)
++OB_DEF_VAR (s390_vec_any_le_b32_b,      vec_any_lev4si,     0,                  BT_OV_INT_BV4SI_V4SI)
++OB_DEF_VAR (s390_vec_any_le_b32_c,      vec_any_leuv4si,    0,                  BT_OV_INT_BV4SI_UV4SI)
++OB_DEF_VAR (s390_vec_any_le_u32_a,      vec_any_leuv4si,    0,                  BT_OV_INT_UV4SI_UV4SI)
++OB_DEF_VAR (s390_vec_any_le_u32_b,      vec_any_leuv4si,    0,                  BT_OV_INT_UV4SI_BV4SI)
++OB_DEF_VAR (s390_vec_any_le_s64_a,      vec_any_lev2di,     0,                  BT_OV_INT_V2DI_V2DI)
++OB_DEF_VAR (s390_vec_any_le_s64_b,      vec_any_lev2di,     0,                  BT_OV_INT_V2DI_BV2DI)
++OB_DEF_VAR (s390_vec_any_le_b64_a,      vec_any_leuv2di,    0,                  BT_OV_INT_BV2DI_BV2DI)
++OB_DEF_VAR (s390_vec_any_le_b64_b,      vec_any_lev2di,     0,                  BT_OV_INT_BV2DI_V2DI)
++OB_DEF_VAR (s390_vec_any_le_b64_c,      vec_any_leuv2di,    0,                  BT_OV_INT_BV2DI_UV2DI)
++OB_DEF_VAR (s390_vec_any_le_u64_a,      vec_any_leuv2di,    0,                  BT_OV_INT_UV2DI_UV2DI)
++OB_DEF_VAR (s390_vec_any_le_u64_b,      vec_any_leuv2di,    0,                  BT_OV_INT_UV2DI_BV2DI)
++OB_DEF_VAR (s390_vec_any_le_dbl,        vec_any_lev2df,     0,                  BT_OV_INT_V2DF_V2DF)
++
++OB_DEF     (s390_vec_any_lt,            s390_vec_any_lt_s8_a,s390_vec_any_lt_dbl,B_VX,              BT_FN_INT_OV4SI_OV4SI)
++OB_DEF_VAR (s390_vec_any_lt_s8_a,       vec_any_ltv16qi,    0,                  BT_OV_INT_V16QI_V16QI)
++OB_DEF_VAR (s390_vec_any_lt_s8_b,       vec_any_ltv16qi,    0,                  BT_OV_INT_V16QI_BV16QI)
++OB_DEF_VAR (s390_vec_any_lt_b8_a,       vec_any_ltuv16qi,   0,                  BT_OV_INT_BV16QI_BV16QI)
++OB_DEF_VAR (s390_vec_any_lt_b8_b,       vec_any_ltv16qi,    0,                  BT_OV_INT_BV16QI_V16QI)
++OB_DEF_VAR (s390_vec_any_lt_b8_c,       vec_any_ltuv16qi,   0,                  BT_OV_INT_BV16QI_UV16QI)
++OB_DEF_VAR (s390_vec_any_lt_u8_a,       vec_any_ltuv16qi,   0,                  BT_OV_INT_UV16QI_UV16QI)
++OB_DEF_VAR (s390_vec_any_lt_u8_b,       vec_any_ltuv16qi,   0,                  BT_OV_INT_UV16QI_BV16QI)
++OB_DEF_VAR (s390_vec_any_lt_s16_a,      vec_any_ltv8hi,     0,                  BT_OV_INT_V8HI_V8HI)
++OB_DEF_VAR (s390_vec_any_lt_s16_b,      vec_any_ltv8hi,     0,                  BT_OV_INT_V8HI_BV8HI)
++OB_DEF_VAR (s390_vec_any_lt_b16_a,      vec_any_ltuv8hi,    0,                  BT_OV_INT_BV8HI_BV8HI)
++OB_DEF_VAR (s390_vec_any_lt_b16_b,      vec_any_ltv8hi,     0,                  BT_OV_INT_BV8HI_V8HI)
++OB_DEF_VAR (s390_vec_any_lt_b16_c,      vec_any_ltuv8hi,    0,                  BT_OV_INT_BV8HI_UV8HI)
++OB_DEF_VAR (s390_vec_any_lt_u16_a,      vec_any_ltuv8hi,    0,                  BT_OV_INT_UV8HI_UV8HI)
++OB_DEF_VAR (s390_vec_any_lt_u16_b,      vec_any_ltuv8hi,    0,                  BT_OV_INT_UV8HI_BV8HI)
++OB_DEF_VAR (s390_vec_any_lt_s32_a,      vec_any_ltv4si,     0,                  BT_OV_INT_V4SI_V4SI)
++OB_DEF_VAR (s390_vec_any_lt_s32_b,      vec_any_ltv4si,     0,                  BT_OV_INT_V4SI_BV4SI)
++OB_DEF_VAR (s390_vec_any_lt_b32_a,      vec_any_ltuv4si,    0,                  BT_OV_INT_BV4SI_BV4SI)
++OB_DEF_VAR (s390_vec_any_lt_b32_b,      vec_any_ltv4si,     0,                  BT_OV_INT_BV4SI_V4SI)
++OB_DEF_VAR (s390_vec_any_lt_b32_c,      vec_any_ltuv4si,    0,                  BT_OV_INT_BV4SI_UV4SI)
++OB_DEF_VAR (s390_vec_any_lt_u32_a,      vec_any_ltuv4si,    0,                  BT_OV_INT_UV4SI_UV4SI)
++OB_DEF_VAR (s390_vec_any_lt_u32_b,      vec_any_ltuv4si,    0,                  BT_OV_INT_UV4SI_BV4SI)
++OB_DEF_VAR (s390_vec_any_lt_s64_a,      vec_any_ltv2di,     0,                  BT_OV_INT_V2DI_V2DI)
++OB_DEF_VAR (s390_vec_any_lt_s64_b,      vec_any_ltv2di,     0,                  BT_OV_INT_V2DI_BV2DI)
++OB_DEF_VAR (s390_vec_any_lt_b64_a,      vec_any_ltuv2di,    0,                  BT_OV_INT_BV2DI_BV2DI)
++OB_DEF_VAR (s390_vec_any_lt_b64_b,      vec_any_ltv2di,     0,                  BT_OV_INT_BV2DI_V2DI)
++OB_DEF_VAR (s390_vec_any_lt_b64_c,      vec_any_ltuv2di,    0,                  BT_OV_INT_BV2DI_UV2DI)
++OB_DEF_VAR (s390_vec_any_lt_u64_a,      vec_any_ltuv2di,    0,                  BT_OV_INT_UV2DI_UV2DI)
++OB_DEF_VAR (s390_vec_any_lt_u64_b,      vec_any_ltuv2di,    0,                  BT_OV_INT_UV2DI_BV2DI)
++OB_DEF_VAR (s390_vec_any_lt_dbl,        vec_any_ltv2df,     0,                  BT_OV_INT_V2DF_V2DF)
++
++OB_DEF     (s390_vec_cmpeq,             s390_vec_cmpeq_s8,  s390_vec_cmpeq_dbl, B_VX,               BT_FN_OV4SI_OV4SI_OV4SI)
++OB_DEF_VAR (s390_vec_cmpeq_s8,          s390_vceqb,         0,                  BT_OV_BV16QI_V16QI_V16QI)
++OB_DEF_VAR (s390_vec_cmpeq_u8,          s390_vceqb,         0,                  BT_OV_BV16QI_UV16QI_UV16QI)
++OB_DEF_VAR (s390_vec_cmpeq_b8,          s390_vceqb,         0,                  BT_OV_BV16QI_BV16QI_BV16QI)
++OB_DEF_VAR (s390_vec_cmpeq_s16,         s390_vceqh,         0,                  BT_OV_BV8HI_V8HI_V8HI)
++OB_DEF_VAR (s390_vec_cmpeq_u16,         s390_vceqh,         0,                  BT_OV_BV8HI_UV8HI_UV8HI)
++OB_DEF_VAR (s390_vec_cmpeq_b16,         s390_vceqh,         0,                  BT_OV_BV8HI_BV8HI_BV8HI)
++OB_DEF_VAR (s390_vec_cmpeq_s32,         s390_vceqf,         0,                  BT_OV_BV4SI_V4SI_V4SI)
++OB_DEF_VAR (s390_vec_cmpeq_u32,         s390_vceqf,         0,                  BT_OV_BV4SI_UV4SI_UV4SI)
++OB_DEF_VAR (s390_vec_cmpeq_b32,         s390_vceqf,         0,                  BT_OV_BV4SI_BV4SI_BV4SI)
++OB_DEF_VAR (s390_vec_cmpeq_s64,         s390_vceqg,         0,                  BT_OV_BV2DI_V2DI_V2DI)
++OB_DEF_VAR (s390_vec_cmpeq_u64,         s390_vceqg,         0,                  BT_OV_BV2DI_UV2DI_UV2DI)
++OB_DEF_VAR (s390_vec_cmpeq_b64,         s390_vceqg,         0,                  BT_OV_BV2DI_BV2DI_BV2DI)
++OB_DEF_VAR (s390_vec_cmpeq_dbl,         s390_vfcedb,        0,                  BT_OV_BV2DI_V2DF_V2DF)
++
++B_DEF      (s390_vceqb,                 vec_cmpeqv16qi,     0,                  B_VX,               0,                  BT_FN_V16QI_UV16QI_UV16QI)
++B_DEF      (s390_vceqh,                 vec_cmpeqv8hi,      0,                  B_VX,               0,                  BT_FN_V8HI_UV8HI_UV8HI)
++B_DEF      (s390_vceqf,                 vec_cmpeqv4si,      0,                  B_VX,               0,                  BT_FN_V4SI_UV4SI_UV4SI)
++B_DEF      (s390_vceqg,                 vec_cmpeqv2di,      0,                  B_VX,               0,                  BT_FN_V2DI_UV2DI_UV2DI)
++B_DEF      (s390_vfcedb,                vec_cmpeqv2df,      0,                  B_VX,               0,                  BT_FN_V2DI_V2DF_V2DF)
++
++OB_DEF     (s390_vec_cmpge,             s390_vec_cmpge_s8,  s390_vec_cmpge_dbl, B_VX,               BT_FN_OV4SI_OV4SI_OV4SI)
++OB_DEF_VAR (s390_vec_cmpge_s8,          vec_cmpgev16qi,     0,                  BT_OV_BV16QI_V16QI_V16QI)
++OB_DEF_VAR (s390_vec_cmpge_u8,          vec_cmpgeuv16qi,    0,                  BT_OV_BV16QI_UV16QI_UV16QI)
++OB_DEF_VAR (s390_vec_cmpge_s16,         vec_cmpgev8hi,      0,                  BT_OV_BV8HI_V8HI_V8HI)
++OB_DEF_VAR (s390_vec_cmpge_u16,         vec_cmpgeuv8hi,     0,                  BT_OV_BV8HI_UV8HI_UV8HI)
++OB_DEF_VAR (s390_vec_cmpge_s32,         vec_cmpgev4si,      0,                  BT_OV_BV4SI_V4SI_V4SI)
++OB_DEF_VAR (s390_vec_cmpge_u32,         vec_cmpgeuv4si,     0,                  BT_OV_BV4SI_UV4SI_UV4SI)
++OB_DEF_VAR (s390_vec_cmpge_s64,         vec_cmpgev2di,      0,                  BT_OV_BV2DI_V2DI_V2DI)
++OB_DEF_VAR (s390_vec_cmpge_u64,         vec_cmpgeuv2di,     0,                  BT_OV_BV2DI_UV2DI_UV2DI)
++OB_DEF_VAR (s390_vec_cmpge_dbl,         s390_vfchedb,       0,                  BT_OV_BV2DI_V2DF_V2DF)
++
++B_DEF      (vec_cmpgev16qi,             vec_cmpgev16qi,     0,                  B_VX | B_INT,       0,                  BT_FN_V16QI_UV16QI_UV16QI)
++B_DEF      (vec_cmpgeuv16qi,            vec_cmpgeuv16qi,    0,                  B_VX | B_INT,       0,                  BT_FN_V16QI_UV16QI_UV16QI)
++B_DEF      (vec_cmpgev8hi,              vec_cmpgev8hi,      0,                  B_VX | B_INT,       0,                  BT_FN_V8HI_UV8HI_UV8HI)
++B_DEF      (vec_cmpgeuv8hi,             vec_cmpgeuv8hi,     0,                  B_VX | B_INT,       0,                  BT_FN_V8HI_UV8HI_UV8HI)
++B_DEF      (vec_cmpgev4si,              vec_cmpgev4si,      0,                  B_VX | B_INT,       0,                  BT_FN_V4SI_UV4SI_UV4SI)
++B_DEF      (vec_cmpgeuv4si,             vec_cmpgeuv4si,     0,                  B_VX | B_INT,       0,                  BT_FN_V4SI_UV4SI_UV4SI)
++B_DEF      (vec_cmpgev2di,              vec_cmpgev2di,      0,                  B_VX | B_INT,       0,                  BT_FN_V2DI_UV2DI_UV2DI)
++B_DEF      (vec_cmpgeuv2di,             vec_cmpgeuv2di,     0,                  B_VX | B_INT,       0,                  BT_FN_V2DI_UV2DI_UV2DI)
++B_DEF      (s390_vfchedb,               vec_cmpgev2df,      0,                  B_VX,               0,                  BT_FN_V2DI_V2DF_V2DF)
++
++OB_DEF     (s390_vec_cmpgt,             s390_vec_cmpgt_s8,  s390_vec_cmpgt_dbl, B_VX,               BT_FN_OV4SI_OV4SI_OV4SI)
++OB_DEF_VAR (s390_vec_cmpgt_s8,          s390_vchb,          0,                  BT_OV_BV16QI_V16QI_V16QI)
++OB_DEF_VAR (s390_vec_cmpgt_u8,          s390_vchlb,         0,                  BT_OV_BV16QI_UV16QI_UV16QI)
++OB_DEF_VAR (s390_vec_cmpgt_s16,         s390_vchh,          0,                  BT_OV_BV8HI_V8HI_V8HI)
++OB_DEF_VAR (s390_vec_cmpgt_u16,         s390_vchlh,         0,                  BT_OV_BV8HI_UV8HI_UV8HI)
++OB_DEF_VAR (s390_vec_cmpgt_s32,         s390_vchf,          0,                  BT_OV_BV4SI_V4SI_V4SI)
++OB_DEF_VAR (s390_vec_cmpgt_u32,         s390_vchlf,         0,                  BT_OV_BV4SI_UV4SI_UV4SI)
++OB_DEF_VAR (s390_vec_cmpgt_s64,         s390_vchg,          0,                  BT_OV_BV2DI_V2DI_V2DI)
++OB_DEF_VAR (s390_vec_cmpgt_u64,         s390_vchlg,         0,                  BT_OV_BV2DI_UV2DI_UV2DI)
++OB_DEF_VAR (s390_vec_cmpgt_dbl,         s390_vfchdb,        0,                  BT_OV_BV2DI_V2DF_V2DF)
++
++B_DEF      (s390_vchb,                  vec_cmpgtv16qi,     0,                  B_VX,               0,                  BT_FN_V16QI_V16QI_V16QI)
++B_DEF      (s390_vchlb,                 vec_cmpgtuv16qi,    0,                  B_VX,               0,                  BT_FN_V16QI_UV16QI_UV16QI)
++B_DEF      (s390_vchh,                  vec_cmpgtv8hi,      0,                  B_VX,               0,                  BT_FN_V8HI_V8HI_V8HI)
++B_DEF      (s390_vchlh,                 vec_cmpgtuv8hi,     0,                  B_VX,               0,                  BT_FN_V8HI_UV8HI_UV8HI)
++B_DEF      (s390_vchf,                  vec_cmpgtv4si,      0,                  B_VX,               0,                  BT_FN_V4SI_V4SI_V4SI)
++B_DEF      (s390_vchlf,                 vec_cmpgtuv4si,     0,                  B_VX,               0,                  BT_FN_V4SI_UV4SI_UV4SI)
++B_DEF      (s390_vchg,                  vec_cmpgtv2di,      0,                  B_VX,               0,                  BT_FN_V2DI_V2DI_V2DI)
++B_DEF      (s390_vchlg,                 vec_cmpgtuv2di,     0,                  B_VX,               0,                  BT_FN_V2DI_UV2DI_UV2DI)
++B_DEF      (s390_vfchdb,                vec_cmpgtv2df,      0,                  B_VX,               0,                  BT_FN_V2DI_V2DF_V2DF)
++
++OB_DEF     (s390_vec_cmple,             s390_vec_cmple_s8,  s390_vec_cmple_dbl, B_VX,               BT_FN_OV4SI_OV4SI_OV4SI)
++OB_DEF_VAR (s390_vec_cmple_s8,          vec_cmplev16qi,     0,                  BT_OV_BV16QI_V16QI_V16QI)
++OB_DEF_VAR (s390_vec_cmple_u8,          vec_cmpleuv16qi,    0,                  BT_OV_BV16QI_UV16QI_UV16QI)
++OB_DEF_VAR (s390_vec_cmple_s16,         vec_cmplev8hi,      0,                  BT_OV_BV8HI_V8HI_V8HI)
++OB_DEF_VAR (s390_vec_cmple_u16,         vec_cmpleuv8hi,     0,                  BT_OV_BV8HI_UV8HI_UV8HI)
++OB_DEF_VAR (s390_vec_cmple_s32,         vec_cmplev4si,      0,                  BT_OV_BV4SI_V4SI_V4SI)
++OB_DEF_VAR (s390_vec_cmple_u32,         vec_cmpleuv4si,     0,                  BT_OV_BV4SI_UV4SI_UV4SI)
++OB_DEF_VAR (s390_vec_cmple_s64,         vec_cmplev2di,      0,                  BT_OV_BV2DI_V2DI_V2DI)
++OB_DEF_VAR (s390_vec_cmple_u64,         vec_cmpleuv2di,     0,                  BT_OV_BV2DI_UV2DI_UV2DI)
++OB_DEF_VAR (s390_vec_cmple_dbl,         vec_cmplev2df,      0,                  BT_OV_BV2DI_V2DF_V2DF)
++
++B_DEF      (vec_cmplev16qi,             vec_cmplev16qi,     0,                  B_VX | B_INT,       0,                  BT_FN_V16QI_UV16QI_UV16QI)
++B_DEF      (vec_cmpleuv16qi,            vec_cmpleuv16qi,    0,                  B_VX | B_INT,       0,                  BT_FN_V16QI_UV16QI_UV16QI)
++B_DEF      (vec_cmplev8hi,              vec_cmplev8hi,      0,                  B_VX | B_INT,       0,                  BT_FN_V8HI_UV8HI_UV8HI)
++B_DEF      (vec_cmpleuv8hi,             vec_cmpleuv8hi,     0,                  B_VX | B_INT,       0,                  BT_FN_V8HI_UV8HI_UV8HI)
++B_DEF      (vec_cmplev4si,              vec_cmplev4si,      0,                  B_VX | B_INT,       0,                  BT_FN_V4SI_UV4SI_UV4SI)
++B_DEF      (vec_cmpleuv4si,             vec_cmpleuv4si,     0,                  B_VX | B_INT,       0,                  BT_FN_V4SI_UV4SI_UV4SI)
++B_DEF      (vec_cmplev2di,              vec_cmplev2di,      0,                  B_VX | B_INT,       0,                  BT_FN_V2DI_UV2DI_UV2DI)
++B_DEF      (vec_cmpleuv2di,             vec_cmpleuv2di,     0,                  B_VX | B_INT,       0,                  BT_FN_V2DI_UV2DI_UV2DI)
++B_DEF      (vec_cmplev2df,              vec_cmplev2df,      0,                  B_VX | B_INT,       0,                  BT_FN_V2DI_V2DF_V2DF)
++
++OB_DEF     (s390_vec_cmplt,             s390_vec_cmplt_s8,  s390_vec_cmplt_dbl, B_VX,               BT_FN_OV4SI_OV4SI_OV4SI)
++OB_DEF_VAR (s390_vec_cmplt_s8,          vec_cmpltv16qi,     0,                  BT_OV_BV16QI_V16QI_V16QI)
++OB_DEF_VAR (s390_vec_cmplt_u8,          vec_cmpltuv16qi,    0,                  BT_OV_BV16QI_UV16QI_UV16QI)
++OB_DEF_VAR (s390_vec_cmplt_s16,         vec_cmpltv8hi,      0,                  BT_OV_BV8HI_V8HI_V8HI)
++OB_DEF_VAR (s390_vec_cmplt_u16,         vec_cmpltuv8hi,     0,                  BT_OV_BV8HI_UV8HI_UV8HI)
++OB_DEF_VAR (s390_vec_cmplt_s32,         vec_cmpltv4si,      0,                  BT_OV_BV4SI_V4SI_V4SI)
++OB_DEF_VAR (s390_vec_cmplt_u32,         vec_cmpltuv4si,     0,                  BT_OV_BV4SI_UV4SI_UV4SI)
++OB_DEF_VAR (s390_vec_cmplt_s64,         vec_cmpltv2di,      0,                  BT_OV_BV2DI_V2DI_V2DI)
++OB_DEF_VAR (s390_vec_cmplt_u64,         vec_cmpltuv2di,     0,                  BT_OV_BV2DI_UV2DI_UV2DI)
++OB_DEF_VAR (s390_vec_cmplt_dbl,         vec_cmpltv2df,      0,                  BT_OV_BV2DI_V2DF_V2DF)
++
++B_DEF      (vec_cmpltv16qi,             vec_cmpltv16qi,     0,                  B_VX | B_INT,       0,                  BT_FN_V16QI_UV16QI_UV16QI)
++B_DEF      (vec_cmpltuv16qi,            vec_cmpltuv16qi,    0,                  B_VX | B_INT,       0,                  BT_FN_V16QI_UV16QI_UV16QI)
++B_DEF      (vec_cmpltv8hi,              vec_cmpltv8hi,      0,                  B_VX | B_INT,       0,                  BT_FN_V8HI_UV8HI_UV8HI)
++B_DEF      (vec_cmpltuv8hi,             vec_cmpltuv8hi,     0,                  B_VX | B_INT,       0,                  BT_FN_V8HI_UV8HI_UV8HI)
++B_DEF      (vec_cmpltv4si,              vec_cmpltv4si,      0,                  B_VX | B_INT,       0,                  BT_FN_V4SI_UV4SI_UV4SI)
++B_DEF      (vec_cmpltuv4si,             vec_cmpltuv4si,     0,                  B_VX | B_INT,       0,                  BT_FN_V4SI_UV4SI_UV4SI)
++B_DEF      (vec_cmpltv2di,              vec_cmpltv2di,      0,                  B_VX | B_INT,       0,                  BT_FN_V2DI_UV2DI_UV2DI)
++B_DEF      (vec_cmpltuv2di,             vec_cmpltuv2di,     0,                  B_VX | B_INT,       0,                  BT_FN_V2DI_UV2DI_UV2DI)
++B_DEF      (vec_cmpltv2df,              vec_cmpltv2df,      0,                  B_VX | B_INT,       0,                  BT_FN_V2DI_V2DF_V2DF)
++
++OB_DEF     (s390_vec_cntlz,             s390_vec_cntlz_s8,  s390_vec_cntlz_u64, B_VX,               BT_FN_OV4SI_OV4SI)
++OB_DEF_VAR (s390_vec_cntlz_s8,          s390_vclzb,         0,                  BT_OV_UV16QI_V16QI)
++OB_DEF_VAR (s390_vec_cntlz_u8,          s390_vclzb,         0,                  BT_OV_UV16QI_UV16QI)
++OB_DEF_VAR (s390_vec_cntlz_s16,         s390_vclzh,         0,                  BT_OV_UV8HI_V8HI)
++OB_DEF_VAR (s390_vec_cntlz_u16,         s390_vclzh,         0,                  BT_OV_UV8HI_UV8HI)
++OB_DEF_VAR (s390_vec_cntlz_s32,         s390_vclzf,         0,                  BT_OV_UV4SI_V4SI)
++OB_DEF_VAR (s390_vec_cntlz_u32,         s390_vclzf,         0,                  BT_OV_UV4SI_UV4SI)
++OB_DEF_VAR (s390_vec_cntlz_s64,         s390_vclzg,         0,                  BT_OV_UV2DI_V2DI)
++OB_DEF_VAR (s390_vec_cntlz_u64,         s390_vclzg,         0,                  BT_OV_UV2DI_UV2DI)
++
++B_DEF      (s390_vclzb,                 clzv16qi2,          0,                  B_VX,               0,                  BT_FN_UV16QI_UV16QI)
++B_DEF      (s390_vclzh,                 clzv8hi2,           0,                  B_VX,               0,                  BT_FN_UV8HI_UV8HI)
++B_DEF      (s390_vclzf,                 clzv4si2,           0,                  B_VX,               0,                  BT_FN_UV4SI_UV4SI)
++B_DEF      (s390_vclzg,                 clzv2di2,           0,                  B_VX,               0,                  BT_FN_UV2DI_UV2DI)
++
++OB_DEF     (s390_vec_cnttz,             s390_vec_cnttz_s8,  s390_vec_cnttz_u64, B_VX,               BT_FN_OV4SI_OV4SI)
++OB_DEF_VAR (s390_vec_cnttz_s8,          s390_vctzb,         0,                  BT_OV_UV16QI_V16QI)
++OB_DEF_VAR (s390_vec_cnttz_u8,          s390_vctzb,         0,                  BT_OV_UV16QI_UV16QI)
++OB_DEF_VAR (s390_vec_cnttz_s16,         s390_vctzh,         0,                  BT_OV_UV8HI_V8HI)
++OB_DEF_VAR (s390_vec_cnttz_u16,         s390_vctzh,         0,                  BT_OV_UV8HI_UV8HI)
++OB_DEF_VAR (s390_vec_cnttz_s32,         s390_vctzf,         0,                  BT_OV_UV4SI_V4SI)
++OB_DEF_VAR (s390_vec_cnttz_u32,         s390_vctzf,         0,                  BT_OV_UV4SI_UV4SI)
++OB_DEF_VAR (s390_vec_cnttz_s64,         s390_vctzg,         0,                  BT_OV_UV2DI_V2DI)
++OB_DEF_VAR (s390_vec_cnttz_u64,         s390_vctzg,         0,                  BT_OV_UV2DI_UV2DI)
++
++B_DEF      (s390_vctzb,                 ctzv16qi2,          0,                  B_VX,               0,                  BT_FN_UV16QI_UV16QI)
++B_DEF      (s390_vctzh,                 ctzv8hi2,           0,                  B_VX,               0,                  BT_FN_UV8HI_UV8HI)
++B_DEF      (s390_vctzf,                 ctzv4si2,           0,                  B_VX,               0,                  BT_FN_UV4SI_UV4SI)
++B_DEF      (s390_vctzg,                 ctzv2di2,           0,                  B_VX,               0,                  BT_FN_UV2DI_UV2DI)
++
++OB_DEF     (s390_vec_xor,               s390_vec_xor_b8,    s390_vec_xor_dbl_c, B_VX,               BT_FN_OV4SI_OV4SI_OV4SI)
++OB_DEF_VAR (s390_vec_xor_b8,            s390_vx,            0,                  BT_OV_BV16QI_BV16QI_BV16QI)
++OB_DEF_VAR (s390_vec_xor_s8_a,          s390_vx,            0,                  BT_OV_V16QI_BV16QI_V16QI)
++OB_DEF_VAR (s390_vec_xor_s8_b,          s390_vx,            0,                  BT_OV_V16QI_V16QI_V16QI)
++OB_DEF_VAR (s390_vec_xor_s8_c,          s390_vx,            0,                  BT_OV_V16QI_V16QI_BV16QI)
++OB_DEF_VAR (s390_vec_xor_u8_a,          s390_vx,            0,                  BT_OV_UV16QI_BV16QI_UV16QI)
++OB_DEF_VAR (s390_vec_xor_u8_b,          s390_vx,            0,                  BT_OV_UV16QI_UV16QI_UV16QI)
++OB_DEF_VAR (s390_vec_xor_u8_c,          s390_vx,            0,                  BT_OV_UV16QI_UV16QI_BV16QI)
++OB_DEF_VAR (s390_vec_xor_b16,           s390_vx,            0,                  BT_OV_BV8HI_BV8HI_BV8HI)
++OB_DEF_VAR (s390_vec_xor_s16_a,         s390_vx,            0,                  BT_OV_V8HI_BV8HI_V8HI)
++OB_DEF_VAR (s390_vec_xor_s16_b,         s390_vx,            0,                  BT_OV_V8HI_V8HI_V8HI)
++OB_DEF_VAR (s390_vec_xor_s16_c,         s390_vx,            0,                  BT_OV_V8HI_V8HI_BV8HI)
++OB_DEF_VAR (s390_vec_xor_u16_a,         s390_vx,            0,                  BT_OV_UV8HI_BV8HI_UV8HI)
++OB_DEF_VAR (s390_vec_xor_u16_b,         s390_vx,            0,                  BT_OV_UV8HI_UV8HI_UV8HI)
++OB_DEF_VAR (s390_vec_xor_u16_c,         s390_vx,            0,                  BT_OV_UV8HI_UV8HI_BV8HI)
++OB_DEF_VAR (s390_vec_xor_b32,           s390_vx,            0,                  BT_OV_BV4SI_BV4SI_BV4SI)
++OB_DEF_VAR (s390_vec_xor_s32_a,         s390_vx,            0,                  BT_OV_V4SI_BV4SI_V4SI)
++OB_DEF_VAR (s390_vec_xor_s32_b,         s390_vx,            0,                  BT_OV_V4SI_V4SI_V4SI)
++OB_DEF_VAR (s390_vec_xor_s32_c,         s390_vx,            0,                  BT_OV_V4SI_V4SI_BV4SI)
++OB_DEF_VAR (s390_vec_xor_u32_a,         s390_vx,            0,                  BT_OV_UV4SI_BV4SI_UV4SI)
++OB_DEF_VAR (s390_vec_xor_u32_b,         s390_vx,            0,                  BT_OV_UV4SI_UV4SI_UV4SI)
++OB_DEF_VAR (s390_vec_xor_u32_c,         s390_vx,            0,                  BT_OV_UV4SI_UV4SI_BV4SI)
++OB_DEF_VAR (s390_vec_xor_b64,           s390_vx,            0,                  BT_OV_BV2DI_BV2DI_BV2DI)
++OB_DEF_VAR (s390_vec_xor_s64_a,         s390_vx,            0,                  BT_OV_V2DI_BV2DI_V2DI)
++OB_DEF_VAR (s390_vec_xor_s64_b,         s390_vx,            0,                  BT_OV_V2DI_V2DI_V2DI)
++OB_DEF_VAR (s390_vec_xor_s64_c,         s390_vx,            0,                  BT_OV_V2DI_V2DI_BV2DI)
++OB_DEF_VAR (s390_vec_xor_u64_a,         s390_vx,            0,                  BT_OV_UV2DI_BV2DI_UV2DI)
++OB_DEF_VAR (s390_vec_xor_u64_b,         s390_vx,            0,                  BT_OV_UV2DI_UV2DI_UV2DI)
++OB_DEF_VAR (s390_vec_xor_u64_c,         s390_vx,            0,                  BT_OV_UV2DI_UV2DI_BV2DI)
++OB_DEF_VAR (s390_vec_xor_dbl_a,         s390_vx,            0,                  BT_OV_V2DF_BV2DI_V2DF)
++OB_DEF_VAR (s390_vec_xor_dbl_b,         s390_vx,            0,                  BT_OV_V2DF_V2DF_V2DF)
++OB_DEF_VAR (s390_vec_xor_dbl_c,         s390_vx,            0,                  BT_OV_V2DF_V2DF_BV2DI)
++
++B_DEF      (s390_vx,                    xorv16qi3,          0,                  B_VX,               0,                  BT_FN_UV16QI_UV16QI_UV16QI)
++
++OB_DEF     (s390_vec_gfmsum,            s390_vec_gfmsum_u8, s390_vec_gfmsum_u32,B_VX,               BT_FN_OV4SI_OV4SI_OV4SI)
++OB_DEF_VAR (s390_vec_gfmsum_u8,         s390_vgfmb,         0,                  BT_OV_UV8HI_UV16QI_UV16QI)
++OB_DEF_VAR (s390_vec_gfmsum_u16,        s390_vgfmh,         0,                  BT_OV_UV4SI_UV8HI_UV8HI)
++OB_DEF_VAR (s390_vec_gfmsum_u32,        s390_vgfmf,         0,                  BT_OV_UV2DI_UV4SI_UV4SI)
++
++B_DEF      (s390_vgfmb,                 vec_gfmsumv16qi,    0,                  B_VX,               0,                  BT_FN_UV8HI_UV16QI_UV16QI)
++B_DEF      (s390_vgfmh,                 vec_gfmsumv8hi,     0,                  B_VX,               0,                  BT_FN_UV4SI_UV8HI_UV8HI)
++B_DEF      (s390_vgfmf,                 vec_gfmsumv4si,     0,                  B_VX,               0,                  BT_FN_UV2DI_UV4SI_UV4SI)
++B_DEF      (s390_vgfmg,                 vec_gfmsum_128,     0,                  B_VX,               0,                  BT_FN_UV16QI_UV2DI_UV2DI)
++
++OB_DEF     (s390_vec_gfmsum_accum,      s390_vec_gfmsum_accum_u8,s390_vec_gfmsum_accum_u32,B_VX,    BT_FN_OV4SI_OV4SI_OV4SI_OV4SI)
++OB_DEF_VAR (s390_vec_gfmsum_accum_u8,   s390_vgfmab,        0,                  BT_OV_UV8HI_UV16QI_UV16QI_UV8HI)
++OB_DEF_VAR (s390_vec_gfmsum_accum_u16,  s390_vgfmah,        0,                  BT_OV_UV4SI_UV8HI_UV8HI_UV4SI)
++OB_DEF_VAR (s390_vec_gfmsum_accum_u32,  s390_vgfmaf,        0,                  BT_OV_UV2DI_UV4SI_UV4SI_UV2DI)
++
++B_DEF      (s390_vgfmab,                vec_gfmsum_accumv16qi,0,                B_VX,               0,                  BT_FN_UV8HI_UV16QI_UV16QI_UV8HI)
++B_DEF      (s390_vgfmah,                vec_gfmsum_accumv8hi,0,                 B_VX,               0,                  BT_FN_UV4SI_UV8HI_UV8HI_UV4SI)
++B_DEF      (s390_vgfmaf,                vec_gfmsum_accumv4si,0,                 B_VX,               0,                  BT_FN_UV2DI_UV4SI_UV4SI_UV2DI)
++B_DEF      (s390_vgfmag,                vec_gfmsum_accum_128,0,                 B_VX,               0,                  BT_FN_UV16QI_UV2DI_UV2DI_UV16QI)
++
++OB_DEF     (s390_vec_abs,               s390_vec_abs_s8,    s390_vec_abs_dbl,   B_VX,               BT_FN_OV4SI_OV4SI)
++OB_DEF_VAR (s390_vec_abs_s8,            s390_vlpb,          0,                  BT_OV_V16QI_V16QI)
++OB_DEF_VAR (s390_vec_abs_s16,           s390_vlph,          0,                  BT_OV_V8HI_V8HI)
++OB_DEF_VAR (s390_vec_abs_s32,           s390_vlpf,          0,                  BT_OV_V4SI_V4SI)
++OB_DEF_VAR (s390_vec_abs_s64,           s390_vlpg,          0,                  BT_OV_V2DI_V2DI)
++OB_DEF_VAR (s390_vec_abs_dbl,           s390_vflpdb,        0,                  BT_OV_V2DF_V2DF)
++
++B_DEF      (s390_vlpb,                  absv16qi2,          0,                  B_VX,               0,                  BT_FN_V16QI_V16QI)
++B_DEF      (s390_vlph,                  absv8hi2,           0,                  B_VX,               0,                  BT_FN_V8HI_V8HI)
++B_DEF      (s390_vlpf,                  absv4si2,           0,                  B_VX,               0,                  BT_FN_V4SI_V4SI)
++B_DEF      (s390_vlpg,                  absv2di2,           0,                  B_VX,               0,                  BT_FN_V2DI_V2DI)
++B_DEF      (s390_vflpdb,                absv2df2,           0,                  B_VX,               0,                  BT_FN_V2DF_V2DF)
++
++OB_DEF     (s390_vec_max,               s390_vec_max_s8_a,  s390_vec_max_dbl,   B_VX,               BT_FN_OV4SI_OV4SI_OV4SI)
++OB_DEF_VAR (s390_vec_max_s8_a,          s390_vmxb,          0,                  BT_OV_V16QI_BV16QI_V16QI)
++OB_DEF_VAR (s390_vec_max_s8_b,          s390_vmxb,          0,                  BT_OV_V16QI_V16QI_V16QI)
++OB_DEF_VAR (s390_vec_max_s8_c,          s390_vmxb,          0,                  BT_OV_V16QI_V16QI_BV16QI)
++OB_DEF_VAR (s390_vec_max_u8_a,          s390_vmxlb,         0,                  BT_OV_UV16QI_BV16QI_UV16QI)
++OB_DEF_VAR (s390_vec_max_u8_b,          s390_vmxlb,         0,                  BT_OV_UV16QI_UV16QI_UV16QI)
++OB_DEF_VAR (s390_vec_max_u8_c,          s390_vmxlb,         0,                  BT_OV_UV16QI_UV16QI_BV16QI)
++OB_DEF_VAR (s390_vec_max_s16_a,         s390_vmxh,          0,                  BT_OV_V8HI_BV8HI_V8HI)
++OB_DEF_VAR (s390_vec_max_s16_b,         s390_vmxh,          0,                  BT_OV_V8HI_V8HI_V8HI)
++OB_DEF_VAR (s390_vec_max_s16_c,         s390_vmxh,          0,                  BT_OV_V8HI_V8HI_BV8HI)
++OB_DEF_VAR (s390_vec_max_u16_a,         s390_vmxlh,         0,                  BT_OV_UV8HI_BV8HI_UV8HI)
++OB_DEF_VAR (s390_vec_max_u16_b,         s390_vmxlh,         0,                  BT_OV_UV8HI_UV8HI_UV8HI)
++OB_DEF_VAR (s390_vec_max_u16_c,         s390_vmxlh,         0,                  BT_OV_UV8HI_UV8HI_BV8HI)
++OB_DEF_VAR (s390_vec_max_s32_a,         s390_vmxf,          0,                  BT_OV_V4SI_BV4SI_V4SI)
++OB_DEF_VAR (s390_vec_max_s32_b,         s390_vmxf,          0,                  BT_OV_V4SI_V4SI_V4SI)
++OB_DEF_VAR (s390_vec_max_s32_c,         s390_vmxf,          0,                  BT_OV_V4SI_V4SI_BV4SI)
++OB_DEF_VAR (s390_vec_max_u32_a,         s390_vmxlf,         0,                  BT_OV_UV4SI_BV4SI_UV4SI)
++OB_DEF_VAR (s390_vec_max_u32_b,         s390_vmxlf,         0,                  BT_OV_UV4SI_UV4SI_UV4SI)
++OB_DEF_VAR (s390_vec_max_u32_c,         s390_vmxlf,         0,                  BT_OV_UV4SI_UV4SI_BV4SI)
++OB_DEF_VAR (s390_vec_max_s64_a,         s390_vmxg,          0,                  BT_OV_V2DI_BV2DI_V2DI)
++OB_DEF_VAR (s390_vec_max_s64_b,         s390_vmxg,          0,                  BT_OV_V2DI_V2DI_V2DI)
++OB_DEF_VAR (s390_vec_max_s64_c,         s390_vmxg,          0,                  BT_OV_V2DI_V2DI_BV2DI)
++OB_DEF_VAR (s390_vec_max_u64_a,         s390_vmxlg,         0,                  BT_OV_UV2DI_BV2DI_UV2DI)
++OB_DEF_VAR (s390_vec_max_u64_b,         s390_vmxlg,         0,                  BT_OV_UV2DI_UV2DI_UV2DI)
++OB_DEF_VAR (s390_vec_max_u64_c,         s390_vmxlg,         0,                  BT_OV_UV2DI_UV2DI_BV2DI)
++OB_DEF_VAR (s390_vec_max_dbl,           s390_vec_max_dbl,   0,                  BT_OV_V2DF_V2DF_V2DF)
++
++B_DEF      (s390_vmxb,                  smaxv16qi3,         0,                  B_VX,               0,                  BT_FN_V16QI_BV16QI_V16QI)
++B_DEF      (s390_vmxlb,                 umaxv16qi3,         0,                  B_VX,               0,                  BT_FN_UV16QI_UV16QI_UV16QI)
++B_DEF      (s390_vmxh,                  smaxv8hi3,          0,                  B_VX,               0,                  BT_FN_V8HI_BV8HI_V8HI)
++B_DEF      (s390_vmxlh,                 umaxv8hi3,          0,                  B_VX,               0,                  BT_FN_UV8HI_UV8HI_UV8HI)
++B_DEF      (s390_vmxf,                  smaxv4si3,          0,                  B_VX,               0,                  BT_FN_V4SI_BV4SI_V4SI)
++B_DEF      (s390_vmxlf,                 umaxv4si3,          0,                  B_VX,               0,                  BT_FN_UV4SI_UV4SI_UV4SI)
++B_DEF      (s390_vmxg,                  smaxv2di3,          0,                  B_VX,               0,                  BT_FN_V2DI_BV2DI_V2DI)
++B_DEF      (s390_vmxlg,                 umaxv2di3,          0,                  B_VX,               0,                  BT_FN_UV2DI_UV2DI_UV2DI)
++B_DEF      (s390_vec_max_dbl,           smaxv2df3,          0,                  B_VX | B_INT,       0,                  BT_FN_V2DF_V2DF_V2DF)
++
++OB_DEF     (s390_vec_min,               s390_vec_min_s8_a,  s390_vec_min_dbl,   B_VX,               BT_FN_OV4SI_OV4SI_OV4SI)
++OB_DEF_VAR (s390_vec_min_s8_a,          s390_vmnb,          0,                  BT_OV_V16QI_BV16QI_V16QI)
++OB_DEF_VAR (s390_vec_min_s8_b,          s390_vmnb,          0,                  BT_OV_V16QI_V16QI_V16QI)
++OB_DEF_VAR (s390_vec_min_s8_c,          s390_vmnb,          0,                  BT_OV_V16QI_V16QI_BV16QI)
++OB_DEF_VAR (s390_vec_min_u8_a,          s390_vmnlb,         0,                  BT_OV_UV16QI_BV16QI_UV16QI)
++OB_DEF_VAR (s390_vec_min_u8_b,          s390_vmnlb,         0,                  BT_OV_UV16QI_UV16QI_UV16QI)
++OB_DEF_VAR (s390_vec_min_u8_c,          s390_vmnlb,         0,                  BT_OV_UV16QI_UV16QI_BV16QI)
++OB_DEF_VAR (s390_vec_min_s16_a,         s390_vmnh,          0,                  BT_OV_V8HI_BV8HI_V8HI)
++OB_DEF_VAR (s390_vec_min_s16_b,         s390_vmnh,          0,                  BT_OV_V8HI_V8HI_V8HI)
++OB_DEF_VAR (s390_vec_min_s16_c,         s390_vmnh,          0,                  BT_OV_V8HI_V8HI_BV8HI)
++OB_DEF_VAR (s390_vec_min_u16_a,         s390_vmnlh,         0,                  BT_OV_UV8HI_BV8HI_UV8HI)
++OB_DEF_VAR (s390_vec_min_u16_b,         s390_vmnlh,         0,                  BT_OV_UV8HI_UV8HI_UV8HI)
++OB_DEF_VAR (s390_vec_min_u16_c,         s390_vmnlh,         0,                  BT_OV_UV8HI_UV8HI_BV8HI)
++OB_DEF_VAR (s390_vec_min_s32_a,         s390_vmnf,          0,                  BT_OV_V4SI_BV4SI_V4SI)
++OB_DEF_VAR (s390_vec_min_s32_b,         s390_vmnf,          0,                  BT_OV_V4SI_V4SI_V4SI)
++OB_DEF_VAR (s390_vec_min_s32_c,         s390_vmnf,          0,                  BT_OV_V4SI_V4SI_BV4SI)
++OB_DEF_VAR (s390_vec_min_u32_a,         s390_vmnlf,         0,                  BT_OV_UV4SI_BV4SI_UV4SI)
++OB_DEF_VAR (s390_vec_min_u32_b,         s390_vmnlf,         0,                  BT_OV_UV4SI_UV4SI_UV4SI)
++OB_DEF_VAR (s390_vec_min_u32_c,         s390_vmnlf,         0,                  BT_OV_UV4SI_UV4SI_BV4SI)
++OB_DEF_VAR (s390_vec_min_s64_a,         s390_vmng,          0,                  BT_OV_V2DI_BV2DI_V2DI)
++OB_DEF_VAR (s390_vec_min_s64_b,         s390_vmng,          0,                  BT_OV_V2DI_V2DI_V2DI)
++OB_DEF_VAR (s390_vec_min_s64_c,         s390_vmng,          0,                  BT_OV_V2DI_V2DI_BV2DI)
++OB_DEF_VAR (s390_vec_min_u64_a,         s390_vmnlg,         0,                  BT_OV_UV2DI_BV2DI_UV2DI)
++OB_DEF_VAR (s390_vec_min_u64_b,         s390_vmnlg,         0,                  BT_OV_UV2DI_UV2DI_UV2DI)
++OB_DEF_VAR (s390_vec_min_u64_c,         s390_vmnlg,         0,                  BT_OV_UV2DI_UV2DI_BV2DI)
++OB_DEF_VAR (s390_vec_min_dbl,           s390_vec_min_dbl,   0,                  BT_OV_V2DF_V2DF_V2DF)
++
++B_DEF      (s390_vmnb,                  sminv16qi3,         0,                  B_VX,               0,                  BT_FN_V16QI_BV16QI_V16QI)
++B_DEF      (s390_vmnlb,                 uminv16qi3,         0,                  B_VX,               0,                  BT_FN_UV16QI_UV16QI_UV16QI)
++B_DEF      (s390_vmnh,                  sminv8hi3,          0,                  B_VX,               0,                  BT_FN_V8HI_BV8HI_V8HI)
++B_DEF      (s390_vmnlh,                 uminv8hi3,          0,                  B_VX,               0,                  BT_FN_UV8HI_UV8HI_UV8HI)
++B_DEF      (s390_vmnf,                  sminv4si3,          0,                  B_VX,               0,                  BT_FN_V4SI_BV4SI_V4SI)
++B_DEF      (s390_vmnlf,                 uminv4si3,          0,                  B_VX,               0,                  BT_FN_UV4SI_UV4SI_UV4SI)
++B_DEF      (s390_vmng,                  sminv2di3,          0,                  B_VX,               0,                  BT_FN_V2DI_BV2DI_V2DI)
++B_DEF      (s390_vmnlg,                 uminv2di3,          0,                  B_VX,               0,                  BT_FN_UV2DI_UV2DI_UV2DI)
++B_DEF      (s390_vec_min_dbl,           sminv2df3,          0,                  B_VX | B_INT,       0,                  BT_FN_V2DF_V2DF_V2DF)
++
++OB_DEF     (s390_vec_mladd,             s390_vec_mladd_u8,  s390_vec_mladd_s32_c,B_VX,              BT_FN_OV4SI_OV4SI_OV4SI_OV4SI)
++OB_DEF_VAR (s390_vec_mladd_u8,          s390_vmalb,         0,                  BT_OV_UV16QI_UV16QI_UV16QI_UV16QI)
++OB_DEF_VAR (s390_vec_mladd_s8_a,        s390_vmalb,         0,                  BT_OV_V16QI_UV16QI_V16QI_V16QI)
++OB_DEF_VAR (s390_vec_mladd_s8_b,        s390_vmalb,         0,                  BT_OV_V16QI_V16QI_UV16QI_UV16QI)
++OB_DEF_VAR (s390_vec_mladd_s8_c,        s390_vmalb,         0,                  BT_OV_V16QI_V16QI_V16QI_V16QI)
++OB_DEF_VAR (s390_vec_mladd_u16,         s390_vmalhw,        0,                  BT_OV_UV8HI_UV8HI_UV8HI_UV8HI)
++OB_DEF_VAR (s390_vec_mladd_s16_a,       s390_vmalhw,        0,                  BT_OV_V8HI_UV8HI_V8HI_V8HI)
++OB_DEF_VAR (s390_vec_mladd_s16_b,       s390_vmalhw,        0,                  BT_OV_V8HI_V8HI_UV8HI_UV8HI)
++OB_DEF_VAR (s390_vec_mladd_s16_c,       s390_vmalhw,        0,                  BT_OV_V8HI_V8HI_V8HI_V8HI)
++OB_DEF_VAR (s390_vec_mladd_u32,         s390_vmalf,         0,                  BT_OV_UV4SI_UV4SI_UV4SI_UV4SI)
++OB_DEF_VAR (s390_vec_mladd_s32_a,       s390_vmalf,         0,                  BT_OV_V4SI_UV4SI_V4SI_V4SI)
++OB_DEF_VAR (s390_vec_mladd_s32_b,       s390_vmalf,         0,                  BT_OV_V4SI_V4SI_UV4SI_UV4SI)
++OB_DEF_VAR (s390_vec_mladd_s32_c,       s390_vmalf,         0,                  BT_OV_V4SI_V4SI_V4SI_V4SI)
++
++B_DEF      (s390_vmalb,                 vec_vmalv16qi,      0,                  B_VX,               0,                  BT_FN_UV16QI_UV16QI_UV16QI_UV16QI)
++B_DEF      (s390_vmalhw,                vec_vmalv8hi,       0,                  B_VX,               0,                  BT_FN_UV8HI_UV8HI_UV8HI_UV8HI)
++B_DEF      (s390_vmalf,                 vec_vmalv4si,       0,                  B_VX,               0,                  BT_FN_UV4SI_UV4SI_UV4SI_UV4SI)
++
++OB_DEF     (s390_vec_mhadd,             s390_vec_mhadd_u8,  s390_vec_mhadd_s32, B_VX,               BT_FN_OV4SI_OV4SI_OV4SI_OV4SI)
++OB_DEF_VAR (s390_vec_mhadd_u8,          s390_vmalhb,        0,                  BT_OV_UV16QI_UV16QI_UV16QI_UV16QI)
++OB_DEF_VAR (s390_vec_mhadd_s8,          s390_vmahb,         0,                  BT_OV_V16QI_V16QI_V16QI_V16QI)
++OB_DEF_VAR (s390_vec_mhadd_u16,         s390_vmalhh,        0,                  BT_OV_UV8HI_UV8HI_UV8HI_UV8HI)
++OB_DEF_VAR (s390_vec_mhadd_s16,         s390_vmahh,         0,                  BT_OV_V8HI_V8HI_V8HI_V8HI)
++OB_DEF_VAR (s390_vec_mhadd_u32,         s390_vmalhf,        0,                  BT_OV_UV4SI_UV4SI_UV4SI_UV4SI)
++OB_DEF_VAR (s390_vec_mhadd_s32,         s390_vmahf,         0,                  BT_OV_V4SI_V4SI_V4SI_V4SI)
++
++B_DEF      (s390_vmalhb,                vec_vmalhv16qi,     0,                  B_VX,               0,                  BT_FN_UV16QI_UV16QI_UV16QI_UV16QI)
++B_DEF      (s390_vmahb,                 vec_vmahv16qi,      0,                  B_VX,               0,                  BT_FN_V16QI_V16QI_V16QI_V16QI)
++B_DEF      (s390_vmalhh,                vec_vmalhv8hi,      0,                  B_VX,               0,                  BT_FN_UV8HI_UV8HI_UV8HI_UV8HI)
++B_DEF      (s390_vmahh,                 vec_vmahv8hi,       0,                  B_VX,               0,                  BT_FN_V8HI_V8HI_V8HI_V8HI)
++B_DEF      (s390_vmalhf,                vec_vmalhv4si,      0,                  B_VX,               0,                  BT_FN_UV4SI_UV4SI_UV4SI_UV4SI)
++B_DEF      (s390_vmahf,                 vec_vmahv4si,       0,                  B_VX,               0,                  BT_FN_V4SI_V4SI_V4SI_V4SI)
++
++OB_DEF     (s390_vec_meadd,             s390_vec_meadd_u8,  s390_vec_meadd_s32, B_VX,               BT_FN_OV4SI_OV4SI_OV4SI_OV4SI)
++OB_DEF_VAR (s390_vec_meadd_u8,          s390_vmaleb,        0,                  BT_OV_UV8HI_UV16QI_UV16QI_UV8HI)
++OB_DEF_VAR (s390_vec_meadd_s8,          s390_vmaeb,         0,                  BT_OV_V8HI_V16QI_V16QI_V8HI)
++OB_DEF_VAR (s390_vec_meadd_u16,         s390_vmaleh,        0,                  BT_OV_UV4SI_UV8HI_UV8HI_UV4SI)
++OB_DEF_VAR (s390_vec_meadd_s16,         s390_vmaeh,         0,                  BT_OV_V4SI_V8HI_V8HI_V4SI)
++OB_DEF_VAR (s390_vec_meadd_u32,         s390_vmalef,        0,                  BT_OV_UV2DI_UV4SI_UV4SI_UV2DI)
++OB_DEF_VAR (s390_vec_meadd_s32,         s390_vmaef,         0,                  BT_OV_V2DI_V4SI_V4SI_V2DI)
++
++B_DEF      (s390_vmaleb,                vec_vmalev16qi,     0,                  B_VX,               0,                  BT_FN_UV8HI_UV16QI_UV16QI_UV8HI)
++B_DEF      (s390_vmaeb,                 vec_vmaev16qi,      0,                  B_VX,               0,                  BT_FN_V8HI_V16QI_V16QI_V8HI)
++B_DEF      (s390_vmaleh,                vec_vmalev8hi,      0,                  B_VX,               0,                  BT_FN_UV4SI_UV8HI_UV8HI_UV4SI)
++B_DEF      (s390_vmaeh,                 vec_vmaev8hi,       0,                  B_VX,               0,                  BT_FN_V4SI_V8HI_V8HI_V4SI)
++B_DEF      (s390_vmalef,                vec_vmalev4si,      0,                  B_VX,               0,                  BT_FN_UV2DI_UV4SI_UV4SI_UV2DI)
++B_DEF      (s390_vmaef,                 vec_vmaev4si,       0,                  B_VX,               0,                  BT_FN_V2DI_V4SI_V4SI_V2DI)
++
++OB_DEF     (s390_vec_moadd,             s390_vec_moadd_u8,  s390_vec_moadd_s32, B_VX,               BT_FN_OV4SI_OV4SI_OV4SI_OV4SI)
++OB_DEF_VAR (s390_vec_moadd_u8,          s390_vmalob,        0,                  BT_OV_UV8HI_UV16QI_UV16QI_UV8HI)
++OB_DEF_VAR (s390_vec_moadd_s8,          s390_vmaob,         0,                  BT_OV_V8HI_V16QI_V16QI_V8HI)
++OB_DEF_VAR (s390_vec_moadd_u16,         s390_vmaloh,        0,                  BT_OV_UV4SI_UV8HI_UV8HI_UV4SI)
++OB_DEF_VAR (s390_vec_moadd_s16,         s390_vmaoh,         0,                  BT_OV_V4SI_V8HI_V8HI_V4SI)
++OB_DEF_VAR (s390_vec_moadd_u32,         s390_vmalof,        0,                  BT_OV_UV2DI_UV4SI_UV4SI_UV2DI)
++OB_DEF_VAR (s390_vec_moadd_s32,         s390_vmaof,         0,                  BT_OV_V2DI_V4SI_V4SI_V2DI)
++
++B_DEF      (s390_vmalob,                vec_vmalov16qi,     0,                  B_VX,               0,                  BT_FN_UV8HI_UV16QI_UV16QI_UV8HI)
++B_DEF      (s390_vmaob,                 vec_vmaov16qi,      0,                  B_VX,               0,                  BT_FN_V8HI_V16QI_V16QI_V8HI)
++B_DEF      (s390_vmaloh,                vec_vmalov8hi,      0,                  B_VX,               0,                  BT_FN_UV4SI_UV8HI_UV8HI_UV4SI)
++B_DEF      (s390_vmaoh,                 vec_vmaov8hi,       0,                  B_VX,               0,                  BT_FN_V4SI_V8HI_V8HI_V4SI)
++B_DEF      (s390_vmalof,                vec_vmalov4si,      0,                  B_VX,               0,                  BT_FN_UV2DI_UV4SI_UV4SI_UV2DI)
++B_DEF      (s390_vmaof,                 vec_vmaov4si,       0,                  B_VX,               0,                  BT_FN_V2DI_V4SI_V4SI_V2DI)
++
++OB_DEF     (s390_vec_mulh,              s390_vec_mulh_u8,   s390_vec_mulh_s32,  B_VX,               BT_FN_OV4SI_OV4SI_OV4SI)
++OB_DEF_VAR (s390_vec_mulh_u8,           s390_vmlhb,         0,                  BT_OV_UV16QI_UV16QI_UV16QI)
++OB_DEF_VAR (s390_vec_mulh_s8,           s390_vmhb,          0,                  BT_OV_V16QI_V16QI_V16QI)
++OB_DEF_VAR (s390_vec_mulh_u16,          s390_vmlhh,         0,                  BT_OV_UV8HI_UV8HI_UV8HI)
++OB_DEF_VAR (s390_vec_mulh_s16,          s390_vmhh,          0,                  BT_OV_V8HI_V8HI_V8HI)
++OB_DEF_VAR (s390_vec_mulh_u32,          s390_vmlhf,         0,                  BT_OV_UV4SI_UV4SI_UV4SI)
++OB_DEF_VAR (s390_vec_mulh_s32,          s390_vmhf,          0,                  BT_OV_V4SI_V4SI_V4SI)
++
++B_DEF      (s390_vmlhb,                 vec_umulhv16qi,     0,                  B_VX,               0,                  BT_FN_UV16QI_UV16QI_UV16QI)
++B_DEF      (s390_vmhb,                  vec_smulhv16qi,     0,                  B_VX,               0,                  BT_FN_V16QI_V16QI_V16QI)
++B_DEF      (s390_vmlhh,                 vec_umulhv8hi,      0,                  B_VX,               0,                  BT_FN_UV8HI_UV8HI_UV8HI)
++B_DEF      (s390_vmhh,                  vec_smulhv8hi,      0,                  B_VX,               0,                  BT_FN_V8HI_V8HI_V8HI)
++B_DEF      (s390_vmlhf,                 vec_umulhv4si,      0,                  B_VX,               0,                  BT_FN_UV4SI_UV4SI_UV4SI)
++B_DEF      (s390_vmhf,                  vec_smulhv4si,      0,                  B_VX,               0,                  BT_FN_V4SI_V4SI_V4SI)
++
++OB_DEF     (s390_vec_mule,              s390_vec_mule_u8,   s390_vec_mule_s32,  B_VX,               BT_FN_OV4SI_OV4SI_OV4SI)
++OB_DEF_VAR (s390_vec_mule_u8,           s390_vmleb,         0,                  BT_OV_UV8HI_UV16QI_UV16QI)
++OB_DEF_VAR (s390_vec_mule_s8,           s390_vmeb,          0,                  BT_OV_V8HI_V16QI_V16QI)
++OB_DEF_VAR (s390_vec_mule_u16,          s390_vmleh,         0,                  BT_OV_UV4SI_UV8HI_UV8HI)
++OB_DEF_VAR (s390_vec_mule_s15,          s390_vmeh,          0,                  BT_OV_V4SI_V8HI_V8HI)
++OB_DEF_VAR (s390_vec_mule_u32,          s390_vmlef,         0,                  BT_OV_UV2DI_UV4SI_UV4SI)
++OB_DEF_VAR (s390_vec_mule_s32,          s390_vmef,          0,                  BT_OV_V2DI_V4SI_V4SI)
++
++B_DEF      (s390_vmleb,                 vec_widen_umult_even_v16qi,0,           B_VX,               0,                  BT_FN_UV8HI_UV16QI_UV16QI)
++B_DEF      (s390_vmeb,                  vec_widen_smult_even_v16qi,0,           B_VX,               0,                  BT_FN_V8HI_V16QI_V16QI)
++B_DEF      (s390_vmleh,                 vec_widen_umult_even_v8hi,0,            B_VX,               0,                  BT_FN_UV4SI_UV8HI_UV8HI)
++B_DEF      (s390_vmeh,                  vec_widen_smult_even_v8hi,0,            B_VX,               0,                  BT_FN_V4SI_V8HI_V8HI)
++B_DEF      (s390_vmlef,                 vec_widen_umult_even_v4si,0,            B_VX,               0,                  BT_FN_UV2DI_UV4SI_UV4SI)
++B_DEF      (s390_vmef,                  vec_widen_smult_even_v4si,0,            B_VX,               0,                  BT_FN_V2DI_V4SI_V4SI)
++
++OB_DEF     (s390_vec_mulo,              s390_vec_mulo_u8,   s390_vec_mulo_s32,  B_VX,               BT_FN_OV4SI_OV4SI_OV4SI)
++OB_DEF_VAR (s390_vec_mulo_u8,           s390_vmlob,         0,                  BT_OV_UV8HI_UV16QI_UV16QI)
++OB_DEF_VAR (s390_vec_mulo_s8,           s390_vmob,          0,                  BT_OV_V8HI_V16QI_V16QI)
++OB_DEF_VAR (s390_vec_mulo_u16,          s390_vmloh,         0,                  BT_OV_UV4SI_UV8HI_UV8HI)
++OB_DEF_VAR (s390_vec_mulo_s16,          s390_vmoh,          0,                  BT_OV_V4SI_V8HI_V8HI)
++OB_DEF_VAR (s390_vec_mulo_u32,          s390_vmlof,         0,                  BT_OV_UV2DI_UV4SI_UV4SI)
++OB_DEF_VAR (s390_vec_mulo_s32,          s390_vmof,          0,                  BT_OV_V2DI_V4SI_V4SI)
++
++B_DEF      (s390_vmlob,                 vec_widen_umult_odd_v16qi,0,            B_VX,               0,                  BT_FN_UV8HI_UV16QI_UV16QI)
++B_DEF      (s390_vmob,                  vec_widen_smult_odd_v16qi,0,            B_VX,               0,                  BT_FN_V8HI_V16QI_V16QI)
++B_DEF      (s390_vmloh,                 vec_widen_umult_odd_v8hi,0,             B_VX,               0,                  BT_FN_UV4SI_UV8HI_UV8HI)
++B_DEF      (s390_vmoh,                  vec_widen_smult_odd_v8hi,0,             B_VX,               0,                  BT_FN_V4SI_V8HI_V8HI)
++B_DEF      (s390_vmlof,                 vec_widen_umult_odd_v4si,0,             B_VX,               0,                  BT_FN_UV2DI_UV4SI_UV4SI)
++B_DEF      (s390_vmof,                  vec_widen_smult_odd_v4si,0,             B_VX,               0,                  BT_FN_V2DI_V4SI_V4SI)
++
++OB_DEF     (s390_vec_nor,               s390_vec_nor_b8,    s390_vec_nor_dbl_c, B_VX,               BT_FN_OV4SI_OV4SI_OV4SI)
++OB_DEF_VAR (s390_vec_nor_b8,            s390_vno,           0,                  BT_OV_BV16QI_BV16QI_BV16QI)
++OB_DEF_VAR (s390_vec_nor_s8_a,          s390_vno,           0,                  BT_OV_V16QI_BV16QI_V16QI)
++OB_DEF_VAR (s390_vec_nor_s8_b,          s390_vno,           0,                  BT_OV_V16QI_V16QI_V16QI)
++OB_DEF_VAR (s390_vec_nor_s8_c,          s390_vno,           0,                  BT_OV_V16QI_V16QI_BV16QI)
++OB_DEF_VAR (s390_vec_nor_u8_a,          s390_vno,           0,                  BT_OV_UV16QI_BV16QI_UV16QI)
++OB_DEF_VAR (s390_vec_nor_u8_b,          s390_vno,           0,                  BT_OV_UV16QI_UV16QI_UV16QI)
++OB_DEF_VAR (s390_vec_nor_u8_c,          s390_vno,           0,                  BT_OV_UV16QI_UV16QI_BV16QI)
++OB_DEF_VAR (s390_vec_nor_b16,           s390_vno,           0,                  BT_OV_BV8HI_BV8HI_BV8HI)
++OB_DEF_VAR (s390_vec_nor_s16_a,         s390_vno,           0,                  BT_OV_V8HI_BV8HI_V8HI)
++OB_DEF_VAR (s390_vec_nor_s16_b,         s390_vno,           0,                  BT_OV_V8HI_V8HI_V8HI)
++OB_DEF_VAR (s390_vec_nor_s16_c,         s390_vno,           0,                  BT_OV_V8HI_V8HI_BV8HI)
++OB_DEF_VAR (s390_vec_nor_u16_a,         s390_vno,           0,                  BT_OV_UV8HI_BV8HI_UV8HI)
++OB_DEF_VAR (s390_vec_nor_u16_b,         s390_vno,           0,                  BT_OV_UV8HI_UV8HI_UV8HI)
++OB_DEF_VAR (s390_vec_nor_u16_c,         s390_vno,           0,                  BT_OV_UV8HI_UV8HI_BV8HI)
++OB_DEF_VAR (s390_vec_nor_b32,           s390_vno,           0,                  BT_OV_BV4SI_BV4SI_BV4SI)
++OB_DEF_VAR (s390_vec_nor_s32_a,         s390_vno,           0,                  BT_OV_V4SI_BV4SI_V4SI)
++OB_DEF_VAR (s390_vec_nor_s32_b,         s390_vno,           0,                  BT_OV_V4SI_V4SI_V4SI)
++OB_DEF_VAR (s390_vec_nor_s32_c,         s390_vno,           0,                  BT_OV_V4SI_V4SI_BV4SI)
++OB_DEF_VAR (s390_vec_nor_u32_a,         s390_vno,           0,                  BT_OV_UV4SI_BV4SI_UV4SI)
++OB_DEF_VAR (s390_vec_nor_u32_b,         s390_vno,           0,                  BT_OV_UV4SI_UV4SI_UV4SI)
++OB_DEF_VAR (s390_vec_nor_u32_c,         s390_vno,           0,                  BT_OV_UV4SI_UV4SI_BV4SI)
++OB_DEF_VAR (s390_vec_nor_b64,           s390_vno,           0,                  BT_OV_BV2DI_BV2DI_BV2DI)
++OB_DEF_VAR (s390_vec_nor_s64_a,         s390_vno,           0,                  BT_OV_V2DI_BV2DI_V2DI)
++OB_DEF_VAR (s390_vec_nor_s64_b,         s390_vno,           0,                  BT_OV_V2DI_V2DI_V2DI)
++OB_DEF_VAR (s390_vec_nor_s64_c,         s390_vno,           0,                  BT_OV_V2DI_V2DI_BV2DI)
++OB_DEF_VAR (s390_vec_nor_u64_a,         s390_vno,           0,                  BT_OV_UV2DI_BV2DI_UV2DI)
++OB_DEF_VAR (s390_vec_nor_u64_b,         s390_vno,           0,                  BT_OV_UV2DI_UV2DI_UV2DI)
++OB_DEF_VAR (s390_vec_nor_u64_c,         s390_vno,           0,                  BT_OV_UV2DI_UV2DI_BV2DI)
++OB_DEF_VAR (s390_vec_nor_dbl_a,         s390_vno,           0,                  BT_OV_V2DF_BV2DI_V2DF)
++OB_DEF_VAR (s390_vec_nor_dbl_b,         s390_vno,           0,                  BT_OV_V2DF_V2DF_V2DF)
++OB_DEF_VAR (s390_vec_nor_dbl_c,         s390_vno,           0,                  BT_OV_V2DF_V2DF_BV2DI)
++
++B_DEF      (s390_vno,                   vec_norv16qi3,      0,                  B_VX,               0,                  BT_FN_UV16QI_UV16QI_UV16QI)
++
++OB_DEF     (s390_vec_or,                s390_vec_or_b8,     s390_vec_or_dbl_c,  B_VX,               BT_FN_OV4SI_OV4SI_OV4SI)
++OB_DEF_VAR (s390_vec_or_b8,             s390_vo,            0,                  BT_OV_BV16QI_BV16QI_BV16QI)
++OB_DEF_VAR (s390_vec_or_s8_a,           s390_vo,            0,                  BT_OV_V16QI_BV16QI_V16QI)
++OB_DEF_VAR (s390_vec_or_s8_b,           s390_vo,            0,                  BT_OV_V16QI_V16QI_V16QI)
++OB_DEF_VAR (s390_vec_or_s8_c,           s390_vo,            0,                  BT_OV_V16QI_V16QI_BV16QI)
++OB_DEF_VAR (s390_vec_or_u8_a,           s390_vo,            0,                  BT_OV_UV16QI_BV16QI_UV16QI)
++OB_DEF_VAR (s390_vec_or_u8_b,           s390_vo,            0,                  BT_OV_UV16QI_UV16QI_UV16QI)
++OB_DEF_VAR (s390_vec_or_u8_c,           s390_vo,            0,                  BT_OV_UV16QI_UV16QI_BV16QI)
++OB_DEF_VAR (s390_vec_or_b16,            s390_vo,            0,                  BT_OV_BV8HI_BV8HI_BV8HI)
++OB_DEF_VAR (s390_vec_or_s16_a,          s390_vo,            0,                  BT_OV_V8HI_BV8HI_V8HI)
++OB_DEF_VAR (s390_vec_or_s16_b,          s390_vo,            0,                  BT_OV_V8HI_V8HI_V8HI)
++OB_DEF_VAR (s390_vec_or_s16_c,          s390_vo,            0,                  BT_OV_V8HI_V8HI_BV8HI)
++OB_DEF_VAR (s390_vec_or_u16_a,          s390_vo,            0,                  BT_OV_UV8HI_BV8HI_UV8HI)
++OB_DEF_VAR (s390_vec_or_u16_b,          s390_vo,            0,                  BT_OV_UV8HI_UV8HI_UV8HI)
++OB_DEF_VAR (s390_vec_or_u16_c,          s390_vo,            0,                  BT_OV_UV8HI_UV8HI_BV8HI)
++OB_DEF_VAR (s390_vec_or_b32,            s390_vo,            0,                  BT_OV_BV4SI_BV4SI_BV4SI)
++OB_DEF_VAR (s390_vec_or_s32_a,          s390_vo,            0,                  BT_OV_V4SI_BV4SI_V4SI)
++OB_DEF_VAR (s390_vec_or_s32_b,          s390_vo,            0,                  BT_OV_V4SI_V4SI_V4SI)
++OB_DEF_VAR (s390_vec_or_s32_c,          s390_vo,            0,                  BT_OV_V4SI_V4SI_BV4SI)
++OB_DEF_VAR (s390_vec_or_u32_a,          s390_vo,            0,                  BT_OV_UV4SI_BV4SI_UV4SI)
++OB_DEF_VAR (s390_vec_or_u32_b,          s390_vo,            0,                  BT_OV_UV4SI_UV4SI_UV4SI)
++OB_DEF_VAR (s390_vec_or_u32_c,          s390_vo,            0,                  BT_OV_UV4SI_UV4SI_BV4SI)
++OB_DEF_VAR (s390_vec_or_b64,            s390_vo,            0,                  BT_OV_BV2DI_BV2DI_BV2DI)
++OB_DEF_VAR (s390_vec_or_s64_a,          s390_vo,            0,                  BT_OV_V2DI_BV2DI_V2DI)
++OB_DEF_VAR (s390_vec_or_s64_b,          s390_vo,            0,                  BT_OV_V2DI_V2DI_V2DI)
++OB_DEF_VAR (s390_vec_or_s64_c,          s390_vo,            0,                  BT_OV_V2DI_V2DI_BV2DI)
++OB_DEF_VAR (s390_vec_or_u64_a,          s390_vo,            0,                  BT_OV_UV2DI_BV2DI_UV2DI)
++OB_DEF_VAR (s390_vec_or_u64_b,          s390_vo,            0,                  BT_OV_UV2DI_UV2DI_UV2DI)
++OB_DEF_VAR (s390_vec_or_u64_c,          s390_vo,            0,                  BT_OV_UV2DI_UV2DI_BV2DI)
++OB_DEF_VAR (s390_vec_or_dbl_a,          s390_vo,            0,                  BT_OV_V2DF_BV2DI_V2DF)
++OB_DEF_VAR (s390_vec_or_dbl_b,          s390_vo,            0,                  BT_OV_V2DF_V2DF_V2DF)
++OB_DEF_VAR (s390_vec_or_dbl_c,          s390_vo,            0,                  BT_OV_V2DF_V2DF_BV2DI)
++
++B_DEF      (s390_vo,                    iorv16qi3,          0,                  B_VX,               0,                  BT_FN_UV16QI_UV16QI_UV16QI)
++
++OB_DEF     (s390_vec_popcnt,            s390_vec_popcnt_s8, s390_vec_popcnt_u64,B_VX,               BT_FN_OV4SI_OV4SI)
++OB_DEF_VAR (s390_vec_popcnt_s8,         s390_vpopctb,       0,                  BT_OV_UV16QI_V16QI)  /* vpopct */
++OB_DEF_VAR (s390_vec_popcnt_u8,         s390_vpopctb,       0,                  BT_OV_UV16QI_UV16QI) /* vpopct */
++OB_DEF_VAR (s390_vec_popcnt_s16,        s390_vpopcth,       0,                  BT_OV_UV8HI_V8HI)    /* vpopct */
++OB_DEF_VAR (s390_vec_popcnt_u16,        s390_vpopcth,       0,                  BT_OV_UV8HI_UV8HI)   /* vpopct */
++OB_DEF_VAR (s390_vec_popcnt_s32,        s390_vpopctf,       0,                  BT_OV_UV4SI_V4SI)    /* vpopct vsumb */
++OB_DEF_VAR (s390_vec_popcnt_u32,        s390_vpopctf,       0,                  BT_OV_UV4SI_UV4SI)   /* vpopct vsumb */
++OB_DEF_VAR (s390_vec_popcnt_s64,        s390_vpopctg,       0,                  BT_OV_UV2DI_V2DI)    /* vpopct vsumb vsumgf */
++OB_DEF_VAR (s390_vec_popcnt_u64,        s390_vpopctg,       0,                  BT_OV_UV2DI_UV2DI)   /* vpopct vsumb vsumgf */
++
++B_DEF      (s390_vpopctb,               popcountv16qi2,     0,                  B_VX,               0,                  BT_FN_UV16QI_UV16QI) /* vpopct */
++B_DEF      (s390_vpopcth,               popcountv8hi2,      0,                  B_VX,               0,                  BT_FN_UV8HI_UV8HI)   /* vpopct */
++B_DEF      (s390_vpopctf,               popcountv4si2,      0,                  B_VX,               0,                  BT_FN_UV4SI_UV4SI)   /* vpopct vsumb */
++B_DEF      (s390_vpopctg,               popcountv2di2,      0,                  B_VX,               0,                  BT_FN_UV2DI_UV2DI)   /* vpopct vsumb vsumgf */
++
++OB_DEF     (s390_vec_rl,                s390_vec_rl_u8,     s390_vec_rl_s64,    B_VX,               BT_FN_OV4SI_OV4SI_OV4SI)
++OB_DEF_VAR (s390_vec_rl_u8,             s390_verllvb,       0,                  BT_OV_UV16QI_UV16QI_UV16QI)
++OB_DEF_VAR (s390_vec_rl_s8,             s390_verllvb,       0,                  BT_OV_V16QI_V16QI_UV16QI)
++OB_DEF_VAR (s390_vec_rl_u16,            s390_verllvh,       0,                  BT_OV_UV8HI_UV8HI_UV8HI)
++OB_DEF_VAR (s390_vec_rl_s16,            s390_verllvh,       0,                  BT_OV_V8HI_V8HI_UV8HI)
++OB_DEF_VAR (s390_vec_rl_u32,            s390_verllvf,       0,                  BT_OV_UV4SI_UV4SI_UV4SI)
++OB_DEF_VAR (s390_vec_rl_s32,            s390_verllvf,       0,                  BT_OV_V4SI_V4SI_UV4SI)
++OB_DEF_VAR (s390_vec_rl_u64,            s390_verllvg,       0,                  BT_OV_UV2DI_UV2DI_UV2DI)
++OB_DEF_VAR (s390_vec_rl_s64,            s390_verllvg,       0,                  BT_OV_V2DI_V2DI_UV2DI)
++
++B_DEF      (s390_verllvb,               vrotlv16qi3,        0,                  B_VX,               0,                  BT_FN_UV16QI_UV16QI_UV16QI)
++B_DEF      (s390_verllvh,               vrotlv8hi3,         0,                  B_VX,               0,                  BT_FN_UV8HI_UV8HI_UV8HI)
++B_DEF      (s390_verllvf,               vrotlv4si3,         0,                  B_VX,               0,                  BT_FN_UV4SI_UV4SI_UV4SI)
++B_DEF      (s390_verllvg,               vrotlv2di3,         0,                  B_VX,               0,                  BT_FN_UV2DI_UV2DI_UV2DI)
++
++OB_DEF     (s390_vec_rli,               s390_vec_rli_u8,    s390_vec_rli_s64,   B_VX,               BT_FN_OV4SI_OV4SI_ULONG)
++OB_DEF_VAR (s390_vec_rli_u8,            s390_verllb,        0,                  BT_OV_UV16QI_UV16QI_ULONG)
++OB_DEF_VAR (s390_vec_rli_s8,            s390_verllb,        0,                  BT_OV_V16QI_V16QI_ULONG)
++OB_DEF_VAR (s390_vec_rli_u16,           s390_verllh,        0,                  BT_OV_UV8HI_UV8HI_ULONG)
++OB_DEF_VAR (s390_vec_rli_s16,           s390_verllh,        0,                  BT_OV_V8HI_V8HI_ULONG)
++OB_DEF_VAR (s390_vec_rli_u32,           s390_verllf,        0,                  BT_OV_UV4SI_UV4SI_ULONG)
++OB_DEF_VAR (s390_vec_rli_s32,           s390_verllf,        0,                  BT_OV_V4SI_V4SI_ULONG)
++OB_DEF_VAR (s390_vec_rli_u64,           s390_verllg,        0,                  BT_OV_UV2DI_UV2DI_ULONG)
++OB_DEF_VAR (s390_vec_rli_s64,           s390_verllg,        0,                  BT_OV_V2DI_V2DI_ULONG)
++
++B_DEF      (s390_verllb,                rotlv16qi3,         0,                  B_VX,               0,                  BT_FN_UV16QI_UV16QI_UINT)
++B_DEF      (s390_verllh,                rotlv8hi3,          0,                  B_VX,               0,                  BT_FN_UV8HI_UV8HI_UINT)
++B_DEF      (s390_verllf,                rotlv4si3,          0,                  B_VX,               0,                  BT_FN_UV4SI_UV4SI_UINT)
++B_DEF      (s390_verllg,                rotlv2di3,          0,                  B_VX,               0,                  BT_FN_UV2DI_UV2DI_UINT)
++
++OB_DEF     (s390_vec_rl_mask,           s390_vec_rl_mask_s8,s390_vec_rl_mask_u64,B_VX,              BT_FN_OV4SI_OV4SI_OV4SI_UCHAR)
++OB_DEF_VAR (s390_vec_rl_mask_s8,        s390_verimb,        O3_U8,              BT_OV_V16QI_V16QI_UV16QI_UCHAR)
++OB_DEF_VAR (s390_vec_rl_mask_u8,        s390_verimb,        O3_U8,              BT_OV_UV16QI_UV16QI_UV16QI_UCHAR)
++OB_DEF_VAR (s390_vec_rl_mask_s16,       s390_verimh,        O3_U8,              BT_OV_V8HI_V8HI_UV8HI_UCHAR)
++OB_DEF_VAR (s390_vec_rl_mask_u16,       s390_verimh,        O3_U8,              BT_OV_UV8HI_UV8HI_UV8HI_UCHAR)
++OB_DEF_VAR (s390_vec_rl_mask_s32,       s390_verimf,        O3_U8,              BT_OV_V4SI_V4SI_UV4SI_UCHAR)
++OB_DEF_VAR (s390_vec_rl_mask_u32,       s390_verimf,        O3_U8,              BT_OV_UV4SI_UV4SI_UV4SI_UCHAR)
++OB_DEF_VAR (s390_vec_rl_mask_s64,       s390_verimg,        O3_U8,              BT_OV_V2DI_V2DI_UV2DI_UCHAR)
++OB_DEF_VAR (s390_vec_rl_mask_u64,       s390_verimg,        O3_U8,              BT_OV_UV2DI_UV2DI_UV2DI_UCHAR)
++
++B_DEF      (s390_verimb,                verimv16qi,         0,                  B_VX,               O4_U8,              BT_FN_UV16QI_UV16QI_UV16QI_UV16QI_INT)
++B_DEF      (s390_verimh,                verimv8hi,          0,                  B_VX,               O4_U8,              BT_FN_UV8HI_UV8HI_UV8HI_UV8HI_INT)
++B_DEF      (s390_verimf,                verimv4si,          0,                  B_VX,               O4_U8,              BT_FN_UV4SI_UV4SI_UV4SI_UV4SI_INT)
++B_DEF      (s390_verimg,                verimv2di,          0,                  B_VX,               O4_U8,              BT_FN_UV2DI_UV2DI_UV2DI_UV2DI_INT)
++
++OB_DEF     (s390_vec_sll,               s390_vec_sll_u8q,   s390_vec_sll_b64s,  B_VX,               BT_FN_OV4SI_OV4SI_OV4SI)
++OB_DEF_VAR (s390_vec_sll_u8q,           s390_vsl,           0,                  BT_OV_UV16QI_UV16QI_UV16QI)
++OB_DEF_VAR (s390_vec_sll_u8h,           s390_vsl,           0,                  BT_OV_UV16QI_UV16QI_UV8HI)
++OB_DEF_VAR (s390_vec_sll_u8s,           s390_vsl,           0,                  BT_OV_UV16QI_UV16QI_UV4SI)
++OB_DEF_VAR (s390_vec_sll_s8q,           s390_vsl,           0,                  BT_OV_V16QI_V16QI_UV16QI)
++OB_DEF_VAR (s390_vec_sll_s8h,           s390_vsl,           0,                  BT_OV_V16QI_V16QI_UV8HI)
++OB_DEF_VAR (s390_vec_sll_s8s,           s390_vsl,           0,                  BT_OV_V16QI_V16QI_UV4SI)
++OB_DEF_VAR (s390_vec_sll_b8q,           s390_vsl,           0,                  BT_OV_BV16QI_BV16QI_UV16QI)
++OB_DEF_VAR (s390_vec_sll_b8h,           s390_vsl,           0,                  BT_OV_BV16QI_BV16QI_UV8HI)
++OB_DEF_VAR (s390_vec_sll_b8s,           s390_vsl,           0,                  BT_OV_BV16QI_BV16QI_UV4SI)
++OB_DEF_VAR (s390_vec_sll_u16q,          s390_vsl,           0,                  BT_OV_UV8HI_UV8HI_UV16QI)
++OB_DEF_VAR (s390_vec_sll_u16h,          s390_vsl,           0,                  BT_OV_UV8HI_UV8HI_UV8HI)
++OB_DEF_VAR (s390_vec_sll_u16s,          s390_vsl,           0,                  BT_OV_UV8HI_UV8HI_UV4SI)
++OB_DEF_VAR (s390_vec_sll_s16q,          s390_vsl,           0,                  BT_OV_V8HI_V8HI_UV16QI)
++OB_DEF_VAR (s390_vec_sll_s16h,          s390_vsl,           0,                  BT_OV_V8HI_V8HI_UV8HI)
++OB_DEF_VAR (s390_vec_sll_s16s,          s390_vsl,           0,                  BT_OV_V8HI_V8HI_UV4SI)
++OB_DEF_VAR (s390_vec_sll_b16q,          s390_vsl,           0,                  BT_OV_BV8HI_BV8HI_UV16QI)
++OB_DEF_VAR (s390_vec_sll_b16h,          s390_vsl,           0,                  BT_OV_BV8HI_BV8HI_UV8HI)
++OB_DEF_VAR (s390_vec_sll_b16s,          s390_vsl,           0,                  BT_OV_BV8HI_BV8HI_UV4SI)
++OB_DEF_VAR (s390_vec_sll_u32q,          s390_vsl,           0,                  BT_OV_UV4SI_UV4SI_UV16QI)
++OB_DEF_VAR (s390_vec_sll_u32h,          s390_vsl,           0,                  BT_OV_UV4SI_UV4SI_UV8HI)
++OB_DEF_VAR (s390_vec_sll_u32s,          s390_vsl,           0,                  BT_OV_UV4SI_UV4SI_UV4SI)
++OB_DEF_VAR (s390_vec_sll_s32q,          s390_vsl,           0,                  BT_OV_V4SI_V4SI_UV16QI)
++OB_DEF_VAR (s390_vec_sll_s32h,          s390_vsl,           0,                  BT_OV_V4SI_V4SI_UV8HI)
++OB_DEF_VAR (s390_vec_sll_s32s,          s390_vsl,           0,                  BT_OV_V4SI_V4SI_UV4SI)
++OB_DEF_VAR (s390_vec_sll_b32q,          s390_vsl,           0,                  BT_OV_BV4SI_BV4SI_UV16QI)
++OB_DEF_VAR (s390_vec_sll_b32h,          s390_vsl,           0,                  BT_OV_BV4SI_BV4SI_UV8HI)
++OB_DEF_VAR (s390_vec_sll_b32s,          s390_vsl,           0,                  BT_OV_BV4SI_BV4SI_UV4SI)
++OB_DEF_VAR (s390_vec_sll_u64q,          s390_vsl,           0,                  BT_OV_UV2DI_UV2DI_UV16QI)
++OB_DEF_VAR (s390_vec_sll_u64h,          s390_vsl,           0,                  BT_OV_UV2DI_UV2DI_UV8HI)
++OB_DEF_VAR (s390_vec_sll_u64s,          s390_vsl,           0,                  BT_OV_UV2DI_UV2DI_UV4SI)
++OB_DEF_VAR (s390_vec_sll_s64q,          s390_vsl,           0,                  BT_OV_V2DI_V2DI_UV16QI)
++OB_DEF_VAR (s390_vec_sll_s64h,          s390_vsl,           0,                  BT_OV_V2DI_V2DI_UV8HI)
++OB_DEF_VAR (s390_vec_sll_s64s,          s390_vsl,           0,                  BT_OV_V2DI_V2DI_UV4SI)
++OB_DEF_VAR (s390_vec_sll_b64q,          s390_vsl,           0,                  BT_OV_BV2DI_BV2DI_UV16QI)
++OB_DEF_VAR (s390_vec_sll_b64h,          s390_vsl,           0,                  BT_OV_BV2DI_BV2DI_UV8HI)
++OB_DEF_VAR (s390_vec_sll_b64s,          s390_vsl,           0,                  BT_OV_BV2DI_BV2DI_UV4SI)
++
++B_DEF      (s390_vsl,                   vec_sllv16qiv16qi,  0,                  B_VX,               0,                  BT_FN_UV16QI_UV16QI_UV16QI)
++
++OB_DEF     (s390_vec_slb,               s390_vec_slb_u8_u8, s390_vec_slb_dbl_s64,B_VX,              BT_FN_OV4SI_OV4SI_OV4SI)
++OB_DEF_VAR (s390_vec_slb_u8_u8,         s390_vslb,          0,                  BT_OV_UV16QI_UV16QI_UV16QI)
++OB_DEF_VAR (s390_vec_slb_u8_s8,         s390_vslb,          0,                  BT_OV_UV16QI_UV16QI_V16QI)
++OB_DEF_VAR (s390_vec_slb_s8_u8,         s390_vslb,          0,                  BT_OV_V16QI_V16QI_UV16QI)
++OB_DEF_VAR (s390_vec_slb_s8_s8,         s390_vslb,          0,                  BT_OV_V16QI_V16QI_V16QI)
++OB_DEF_VAR (s390_vec_slb_u16_u16,       s390_vslb,          0,                  BT_OV_UV8HI_UV8HI_UV8HI)
++OB_DEF_VAR (s390_vec_slb_u16_s16,       s390_vslb,          0,                  BT_OV_UV8HI_UV8HI_V8HI)
++OB_DEF_VAR (s390_vec_slb_s16_u16,       s390_vslb,          0,                  BT_OV_V8HI_V8HI_UV8HI)
++OB_DEF_VAR (s390_vec_slb_s16_s16,       s390_vslb,          0,                  BT_OV_V8HI_V8HI_V8HI)
++OB_DEF_VAR (s390_vec_slb_u32_u32,       s390_vslb,          0,                  BT_OV_UV4SI_UV4SI_UV4SI)
++OB_DEF_VAR (s390_vec_slb_u32_s32,       s390_vslb,          0,                  BT_OV_UV4SI_UV4SI_V4SI)
++OB_DEF_VAR (s390_vec_slb_s32_u32,       s390_vslb,          0,                  BT_OV_V4SI_V4SI_UV4SI)
++OB_DEF_VAR (s390_vec_slb_s32_s32,       s390_vslb,          0,                  BT_OV_V4SI_V4SI_V4SI)
++OB_DEF_VAR (s390_vec_slb_u64_u64,       s390_vslb,          0,                  BT_OV_UV2DI_UV2DI_UV2DI)
++OB_DEF_VAR (s390_vec_slb_u64_s64,       s390_vslb,          0,                  BT_OV_UV2DI_UV2DI_V2DI)
++OB_DEF_VAR (s390_vec_slb_s64_u64,       s390_vslb,          0,                  BT_OV_V2DI_V2DI_UV2DI)
++OB_DEF_VAR (s390_vec_slb_s64_s64,       s390_vslb,          0,                  BT_OV_V2DI_V2DI_V2DI)
++OB_DEF_VAR (s390_vec_slb_dbl_u64,       s390_vslb,          0,                  BT_OV_V2DF_V2DF_UV2DI)
++OB_DEF_VAR (s390_vec_slb_dbl_s64,       s390_vslb,          0,                  BT_OV_V2DF_V2DF_V2DI)
++
++B_DEF      (s390_vslb,                  vec_slbv16qi,       0,                  B_VX,               0,                  BT_FN_UV16QI_UV16QI_UV16QI)
++
++OB_DEF     (s390_vec_sld,               s390_vec_sld_s8,    s390_vec_sld_dbl,   B_VX,               BT_FN_OV4SI_OV4SI_OV4SI_ULONGLONG)
++OB_DEF_VAR (s390_vec_sld_s8,            s390_vsldb,         O3_U4,              BT_OV_V16QI_V16QI_V16QI_ULONGLONG)
++OB_DEF_VAR (s390_vec_sld_u8,            s390_vsldb,         O3_U4,              BT_OV_UV16QI_UV16QI_UV16QI_ULONGLONG)
++OB_DEF_VAR (s390_vec_sld_s16,           s390_vsldb,         O3_U4,              BT_OV_V8HI_V8HI_V8HI_ULONGLONG)
++OB_DEF_VAR (s390_vec_sld_u16,           s390_vsldb,         O3_U4,              BT_OV_UV8HI_UV8HI_UV8HI_ULONGLONG)
++OB_DEF_VAR (s390_vec_sld_s32,           s390_vsldb,         O3_U4,              BT_OV_V4SI_V4SI_V4SI_ULONGLONG)
++OB_DEF_VAR (s390_vec_sld_u32,           s390_vsldb,         O3_U4,              BT_OV_UV4SI_UV4SI_UV4SI_ULONGLONG)
++OB_DEF_VAR (s390_vec_sld_s64,           s390_vsldb,         O3_U4,              BT_OV_V2DI_V2DI_V2DI_ULONGLONG)
++OB_DEF_VAR (s390_vec_sld_u64,           s390_vsldb,         O3_U4,              BT_OV_UV2DI_UV2DI_UV2DI_ULONGLONG)
++OB_DEF_VAR (s390_vec_sld_dbl,           s390_vsldb,         O3_U4,              BT_OV_V2DF_V2DF_V2DF_ULONGLONG)
++
++B_DEF      (s390_vsldb,                 vec_sldv16qi,       0,                  B_VX,               O3_U4,              BT_FN_UV16QI_UV16QI_UV16QI_INT)
++
++OB_DEF     (s390_vec_sldw,              s390_vec_sldw_s8,   s390_vec_sldw_dbl,  B_VX,               BT_FN_OV4SI_OV4SI_OV4SI_INT)
++OB_DEF_VAR (s390_vec_sldw_s8,           s390_vsldb,         O3_U4,              BT_OV_V16QI_V16QI_V16QI_INT)
++OB_DEF_VAR (s390_vec_sldw_u8,           s390_vsldb,         O3_U4,              BT_OV_UV16QI_UV16QI_UV16QI_INT)
++OB_DEF_VAR (s390_vec_sldw_s16,          s390_vsldb,         O3_U4,              BT_OV_V8HI_V8HI_V8HI_INT)
++OB_DEF_VAR (s390_vec_sldw_u16,          s390_vsldb,         O3_U4,              BT_OV_UV8HI_UV8HI_UV8HI_INT)
++OB_DEF_VAR (s390_vec_sldw_s32,          s390_vsldb,         O3_U4,              BT_OV_V4SI_V4SI_V4SI_INT)
++OB_DEF_VAR (s390_vec_sldw_u32,          s390_vsldb,         O3_U4,              BT_OV_UV4SI_UV4SI_UV4SI_INT)
++OB_DEF_VAR (s390_vec_sldw_s64,          s390_vsldb,         O3_U4,              BT_OV_V2DI_V2DI_V2DI_INT)
++OB_DEF_VAR (s390_vec_sldw_u64,          s390_vsldb,         O3_U4,              BT_OV_UV2DI_UV2DI_UV2DI_INT)
++OB_DEF_VAR (s390_vec_sldw_dbl,          s390_vsldb,         O3_U4,              BT_OV_V2DF_V2DF_V2DF_INT)
++
++OB_DEF     (s390_vec_sral,              s390_vec_sral_u8q,  s390_vec_sral_b64s, B_VX,               BT_FN_OV4SI_OV4SI_OV4SI)
++OB_DEF_VAR (s390_vec_sral_u8q,          s390_vsra,          0,                  BT_OV_UV16QI_UV16QI_UV16QI)
++OB_DEF_VAR (s390_vec_sral_u8h,          s390_vsra,          0,                  BT_OV_UV16QI_UV16QI_UV8HI)
++OB_DEF_VAR (s390_vec_sral_u8s,          s390_vsra,          0,                  BT_OV_UV16QI_UV16QI_UV4SI)
++OB_DEF_VAR (s390_vec_sral_s8q,          s390_vsra,          0,                  BT_OV_V16QI_V16QI_UV16QI)
++OB_DEF_VAR (s390_vec_sral_s8h,          s390_vsra,          0,                  BT_OV_V16QI_V16QI_UV8HI)
++OB_DEF_VAR (s390_vec_sral_s8s,          s390_vsra,          0,                  BT_OV_V16QI_V16QI_UV4SI)
++OB_DEF_VAR (s390_vec_sral_b8q,          s390_vsra,          0,                  BT_OV_BV16QI_BV16QI_UV16QI)
++OB_DEF_VAR (s390_vec_sral_b8h,          s390_vsra,          0,                  BT_OV_BV16QI_BV16QI_UV8HI)
++OB_DEF_VAR (s390_vec_sral_b8s,          s390_vsra,          0,                  BT_OV_BV16QI_BV16QI_UV4SI)
++OB_DEF_VAR (s390_vec_sral_u16q,         s390_vsra,          0,                  BT_OV_UV8HI_UV8HI_UV16QI)
++OB_DEF_VAR (s390_vec_sral_u16h,         s390_vsra,          0,                  BT_OV_UV8HI_UV8HI_UV8HI)
++OB_DEF_VAR (s390_vec_sral_u16s,         s390_vsra,          0,                  BT_OV_UV8HI_UV8HI_UV4SI)
++OB_DEF_VAR (s390_vec_sral_s16q,         s390_vsra,          0,                  BT_OV_V8HI_V8HI_UV16QI)
++OB_DEF_VAR (s390_vec_sral_s16h,         s390_vsra,          0,                  BT_OV_V8HI_V8HI_UV8HI)
++OB_DEF_VAR (s390_vec_sral_s16s,         s390_vsra,          0,                  BT_OV_V8HI_V8HI_UV4SI)
++OB_DEF_VAR (s390_vec_sral_b16q,         s390_vsra,          0,                  BT_OV_BV8HI_BV8HI_UV16QI)
++OB_DEF_VAR (s390_vec_sral_b16h,         s390_vsra,          0,                  BT_OV_BV8HI_BV8HI_UV8HI)
++OB_DEF_VAR (s390_vec_sral_b16s,         s390_vsra,          0,                  BT_OV_BV8HI_BV8HI_UV4SI)
++OB_DEF_VAR (s390_vec_sral_u32q,         s390_vsra,          0,                  BT_OV_UV4SI_UV4SI_UV16QI)
++OB_DEF_VAR (s390_vec_sral_u32h,         s390_vsra,          0,                  BT_OV_UV4SI_UV4SI_UV8HI)
++OB_DEF_VAR (s390_vec_sral_u32s,         s390_vsra,          0,                  BT_OV_UV4SI_UV4SI_UV4SI)
++OB_DEF_VAR (s390_vec_sral_s32q,         s390_vsra,          0,                  BT_OV_V4SI_V4SI_UV16QI)
++OB_DEF_VAR (s390_vec_sral_s32h,         s390_vsra,          0,                  BT_OV_V4SI_V4SI_UV8HI)
++OB_DEF_VAR (s390_vec_sral_s32s,         s390_vsra,          0,                  BT_OV_V4SI_V4SI_UV4SI)
++OB_DEF_VAR (s390_vec_sral_b32q,         s390_vsra,          0,                  BT_OV_BV4SI_BV4SI_UV16QI)
++OB_DEF_VAR (s390_vec_sral_b32h,         s390_vsra,          0,                  BT_OV_BV4SI_BV4SI_UV8HI)
++OB_DEF_VAR (s390_vec_sral_b32s,         s390_vsra,          0,                  BT_OV_BV4SI_BV4SI_UV4SI)
++OB_DEF_VAR (s390_vec_sral_u64q,         s390_vsra,          0,                  BT_OV_UV2DI_UV2DI_UV16QI)
++OB_DEF_VAR (s390_vec_sral_u64h,         s390_vsra,          0,                  BT_OV_UV2DI_UV2DI_UV8HI)
++OB_DEF_VAR (s390_vec_sral_u64s,         s390_vsra,          0,                  BT_OV_UV2DI_UV2DI_UV4SI)
++OB_DEF_VAR (s390_vec_sral_s64q,         s390_vsra,          0,                  BT_OV_V2DI_V2DI_UV16QI)
++OB_DEF_VAR (s390_vec_sral_s64h,         s390_vsra,          0,                  BT_OV_V2DI_V2DI_UV8HI)
++OB_DEF_VAR (s390_vec_sral_s64s,         s390_vsra,          0,                  BT_OV_V2DI_V2DI_UV4SI)
++OB_DEF_VAR (s390_vec_sral_b64q,         s390_vsra,          0,                  BT_OV_BV2DI_BV2DI_UV16QI)
++OB_DEF_VAR (s390_vec_sral_b64h,         s390_vsra,          0,                  BT_OV_BV2DI_BV2DI_UV8HI)
++OB_DEF_VAR (s390_vec_sral_b64s,         s390_vsra,          0,                  BT_OV_BV2DI_BV2DI_UV4SI)
++
++B_DEF      (s390_vsra,                  vec_sralv16qiv16qi, 0,                  B_VX,               0,                  BT_FN_UV16QI_UV16QI_UV16QI)
++
++OB_DEF     (s390_vec_srab,              s390_vec_srab_u8_u8,s390_vec_srab_dbl_s64,B_VX,             BT_FN_OV4SI_OV4SI_OV4SI)
++OB_DEF_VAR (s390_vec_srab_u8_u8,        s390_vsrab,         0,                  BT_OV_UV16QI_UV16QI_UV16QI)
++OB_DEF_VAR (s390_vec_srab_u8_s8,        s390_vsrab,         0,                  BT_OV_UV16QI_UV16QI_V16QI)
++OB_DEF_VAR (s390_vec_srab_s8_u8,        s390_vsrab,         0,                  BT_OV_V16QI_V16QI_UV16QI)
++OB_DEF_VAR (s390_vec_srab_s8_s8,        s390_vsrab,         0,                  BT_OV_V16QI_V16QI_V16QI)
++OB_DEF_VAR (s390_vec_srab_u16_u16,      s390_vsrab,         0,                  BT_OV_UV8HI_UV8HI_UV8HI)
++OB_DEF_VAR (s390_vec_srab_u16_s16,      s390_vsrab,         0,                  BT_OV_UV8HI_UV8HI_V8HI)
++OB_DEF_VAR (s390_vec_srab_s16_u16,      s390_vsrab,         0,                  BT_OV_V8HI_V8HI_UV8HI)
++OB_DEF_VAR (s390_vec_srab_s16_s16,      s390_vsrab,         0,                  BT_OV_V8HI_V8HI_V8HI)
++OB_DEF_VAR (s390_vec_srab_u32_u32,      s390_vsrab,         0,                  BT_OV_UV4SI_UV4SI_UV4SI)
++OB_DEF_VAR (s390_vec_srab_u32_s32,      s390_vsrab,         0,                  BT_OV_UV4SI_UV4SI_V4SI)
++OB_DEF_VAR (s390_vec_srab_s32_u32,      s390_vsrab,         0,                  BT_OV_V4SI_V4SI_UV4SI)
++OB_DEF_VAR (s390_vec_srab_s32_s32,      s390_vsrab,         0,                  BT_OV_V4SI_V4SI_V4SI)
++OB_DEF_VAR (s390_vec_srab_u64_u64,      s390_vsrab,         0,                  BT_OV_UV2DI_UV2DI_UV2DI)
++OB_DEF_VAR (s390_vec_srab_u64_s64,      s390_vsrab,         0,                  BT_OV_UV2DI_UV2DI_V2DI)
++OB_DEF_VAR (s390_vec_srab_s64_u64,      s390_vsrab,         0,                  BT_OV_V2DI_V2DI_UV2DI)
++OB_DEF_VAR (s390_vec_srab_s64_s64,      s390_vsrab,         0,                  BT_OV_V2DI_V2DI_V2DI)
++OB_DEF_VAR (s390_vec_srab_dbl_u64,      s390_vsrab,         0,                  BT_OV_V2DF_V2DF_UV2DI)
++OB_DEF_VAR (s390_vec_srab_dbl_s64,      s390_vsrab,         0,                  BT_OV_V2DF_V2DF_V2DI)
++
++B_DEF      (s390_vsrab,                 vec_srabv16qi,      0,                  B_VX,               0,                  BT_FN_UV16QI_UV16QI_UV16QI)
++
++OB_DEF     (s390_vec_srl,               s390_vec_srl_u8q,   s390_vec_srl_b64s,  B_VX,               BT_FN_OV4SI_OV4SI_OV4SI)
++OB_DEF_VAR (s390_vec_srl_u8q,           s390_vsrl,          0,                  BT_OV_UV16QI_UV16QI_UV16QI)
++OB_DEF_VAR (s390_vec_srl_u8h,           s390_vsrl,          0,                  BT_OV_UV16QI_UV16QI_UV8HI)
++OB_DEF_VAR (s390_vec_srl_u8s,           s390_vsrl,          0,                  BT_OV_UV16QI_UV16QI_UV4SI)
++OB_DEF_VAR (s390_vec_srl_s8q,           s390_vsrl,          0,                  BT_OV_V16QI_V16QI_UV16QI)
++OB_DEF_VAR (s390_vec_srl_s8h,           s390_vsrl,          0,                  BT_OV_V16QI_V16QI_UV8HI)
++OB_DEF_VAR (s390_vec_srl_s8s,           s390_vsrl,          0,                  BT_OV_V16QI_V16QI_UV4SI)
++OB_DEF_VAR (s390_vec_srl_b8q,           s390_vsrl,          0,                  BT_OV_BV16QI_BV16QI_UV16QI)
++OB_DEF_VAR (s390_vec_srl_b8h,           s390_vsrl,          0,                  BT_OV_BV16QI_BV16QI_UV8HI)
++OB_DEF_VAR (s390_vec_srl_b8s,           s390_vsrl,          0,                  BT_OV_BV16QI_BV16QI_UV4SI)
++OB_DEF_VAR (s390_vec_srl_u16q,          s390_vsrl,          0,                  BT_OV_UV8HI_UV8HI_UV16QI)
++OB_DEF_VAR (s390_vec_srl_u16h,          s390_vsrl,          0,                  BT_OV_UV8HI_UV8HI_UV8HI)
++OB_DEF_VAR (s390_vec_srl_u16s,          s390_vsrl,          0,                  BT_OV_UV8HI_UV8HI_UV4SI)
++OB_DEF_VAR (s390_vec_srl_s16q,          s390_vsrl,          0,                  BT_OV_V8HI_V8HI_UV16QI)
++OB_DEF_VAR (s390_vec_srl_s16h,          s390_vsrl,          0,                  BT_OV_V8HI_V8HI_UV8HI)
++OB_DEF_VAR (s390_vec_srl_s16s,          s390_vsrl,          0,                  BT_OV_V8HI_V8HI_UV4SI)
++OB_DEF_VAR (s390_vec_srl_b16q,          s390_vsrl,          0,                  BT_OV_BV8HI_BV8HI_UV16QI)
++OB_DEF_VAR (s390_vec_srl_b16h,          s390_vsrl,          0,                  BT_OV_BV8HI_BV8HI_UV8HI)
++OB_DEF_VAR (s390_vec_srl_b16s,          s390_vsrl,          0,                  BT_OV_BV8HI_BV8HI_UV4SI)
++OB_DEF_VAR (s390_vec_srl_u32q,          s390_vsrl,          0,                  BT_OV_UV4SI_UV4SI_UV16QI)
++OB_DEF_VAR (s390_vec_srl_u32h,          s390_vsrl,          0,                  BT_OV_UV4SI_UV4SI_UV8HI)
++OB_DEF_VAR (s390_vec_srl_u32s,          s390_vsrl,          0,                  BT_OV_UV4SI_UV4SI_UV4SI)
++OB_DEF_VAR (s390_vec_srl_s32q,          s390_vsrl,          0,                  BT_OV_V4SI_V4SI_UV16QI)
++OB_DEF_VAR (s390_vec_srl_s32h,          s390_vsrl,          0,                  BT_OV_V4SI_V4SI_UV8HI)
++OB_DEF_VAR (s390_vec_srl_s32s,          s390_vsrl,          0,                  BT_OV_V4SI_V4SI_UV4SI)
++OB_DEF_VAR (s390_vec_srl_b32q,          s390_vsrl,          0,                  BT_OV_BV4SI_BV4SI_UV16QI)
++OB_DEF_VAR (s390_vec_srl_b32h,          s390_vsrl,          0,                  BT_OV_BV4SI_BV4SI_UV8HI)
++OB_DEF_VAR (s390_vec_srl_b32s,          s390_vsrl,          0,                  BT_OV_BV4SI_BV4SI_UV4SI)
++OB_DEF_VAR (s390_vec_srl_u64q,          s390_vsrl,          0,                  BT_OV_UV2DI_UV2DI_UV16QI)
++OB_DEF_VAR (s390_vec_srl_u64h,          s390_vsrl,          0,                  BT_OV_UV2DI_UV2DI_UV8HI)
++OB_DEF_VAR (s390_vec_srl_u64s,          s390_vsrl,          0,                  BT_OV_UV2DI_UV2DI_UV4SI)
++OB_DEF_VAR (s390_vec_srl_s64q,          s390_vsrl,          0,                  BT_OV_V2DI_V2DI_UV16QI)
++OB_DEF_VAR (s390_vec_srl_s64h,          s390_vsrl,          0,                  BT_OV_V2DI_V2DI_UV8HI)
++OB_DEF_VAR (s390_vec_srl_s64s,          s390_vsrl,          0,                  BT_OV_V2DI_V2DI_UV4SI)
++OB_DEF_VAR (s390_vec_srl_b64q,          s390_vsrl,          0,                  BT_OV_BV2DI_BV2DI_UV16QI)
++OB_DEF_VAR (s390_vec_srl_b64h,          s390_vsrl,          0,                  BT_OV_BV2DI_BV2DI_UV8HI)
++OB_DEF_VAR (s390_vec_srl_b64s,          s390_vsrl,          0,                  BT_OV_BV2DI_BV2DI_UV4SI)
++
++B_DEF      (s390_vsrl,                  vec_srlv16qiv16qi,  0,                  B_VX,               0,                  BT_FN_UV16QI_UV16QI_UV16QI)
++
++OB_DEF     (s390_vec_srb,               s390_vec_srb_u8_u8, s390_vec_srb_dbl_s64,B_VX,              BT_FN_OV4SI_OV4SI_OV4SI)
++OB_DEF_VAR (s390_vec_srb_u8_u8,         s390_vsrlb,         0,                  BT_OV_UV16QI_UV16QI_UV16QI)
++OB_DEF_VAR (s390_vec_srb_u8_s8,         s390_vsrlb,         0,                  BT_OV_UV16QI_UV16QI_V16QI)
++OB_DEF_VAR (s390_vec_srb_s8_u8,         s390_vsrlb,         0,                  BT_OV_V16QI_V16QI_UV16QI)
++OB_DEF_VAR (s390_vec_srb_s8_s8,         s390_vsrlb,         0,                  BT_OV_V16QI_V16QI_V16QI)
++OB_DEF_VAR (s390_vec_srb_u16_u16,       s390_vsrlb,         0,                  BT_OV_UV8HI_UV8HI_UV8HI)
++OB_DEF_VAR (s390_vec_srb_u16_s16,       s390_vsrlb,         0,                  BT_OV_UV8HI_UV8HI_V8HI)
++OB_DEF_VAR (s390_vec_srb_s16_u16,       s390_vsrlb,         0,                  BT_OV_V8HI_V8HI_UV8HI)
++OB_DEF_VAR (s390_vec_srb_s16_s16,       s390_vsrlb,         0,                  BT_OV_V8HI_V8HI_V8HI)
++OB_DEF_VAR (s390_vec_srb_u32_u32,       s390_vsrlb,         0,                  BT_OV_UV4SI_UV4SI_UV4SI)
++OB_DEF_VAR (s390_vec_srb_u32_s32,       s390_vsrlb,         0,                  BT_OV_UV4SI_UV4SI_V4SI)
++OB_DEF_VAR (s390_vec_srb_s32_u32,       s390_vsrlb,         0,                  BT_OV_V4SI_V4SI_UV4SI)
++OB_DEF_VAR (s390_vec_srb_s32_s32,       s390_vsrlb,         0,                  BT_OV_V4SI_V4SI_V4SI)
++OB_DEF_VAR (s390_vec_srb_u64_u64,       s390_vsrlb,         0,                  BT_OV_UV2DI_UV2DI_UV2DI)
++OB_DEF_VAR (s390_vec_srb_u64_s64,       s390_vsrlb,         0,                  BT_OV_UV2DI_UV2DI_V2DI)
++OB_DEF_VAR (s390_vec_srb_s64_u64,       s390_vsrlb,         0,                  BT_OV_V2DI_V2DI_UV2DI)
++OB_DEF_VAR (s390_vec_srb_s64_s64,       s390_vsrlb,         0,                  BT_OV_V2DI_V2DI_V2DI)
++OB_DEF_VAR (s390_vec_srb_dbl_u64,       s390_vsrlb,         0,                  BT_OV_V2DF_V2DF_UV2DI)
++OB_DEF_VAR (s390_vec_srb_dbl_s64,       s390_vsrlb,         0,                  BT_OV_V2DF_V2DF_V2DI)
++
++B_DEF      (s390_vsrlb,                 vec_srbv16qi,       0,                  B_VX,               0,                  BT_FN_UV16QI_UV16QI_UV16QI)
++B_DEF      (s390_vsq,                   vec_sub_u128,       0,                  B_VX,               0,                  BT_FN_UV16QI_UV16QI_UV16QI)
++
++OB_DEF     (s390_vec_subc,              s390_vec_subc_u8,   s390_vec_subc_u64,  B_VX,               BT_FN_OV4SI_OV4SI_OV4SI)
++OB_DEF_VAR (s390_vec_subc_u8,           s390_vscbib,        0,                  BT_OV_UV16QI_UV16QI_UV16QI)
++OB_DEF_VAR (s390_vec_subc_u16,          s390_vscbih,        0,                  BT_OV_UV8HI_UV8HI_UV8HI)
++OB_DEF_VAR (s390_vec_subc_u32,          s390_vscbif,        0,                  BT_OV_UV4SI_UV4SI_UV4SI)
++OB_DEF_VAR (s390_vec_subc_u64,          s390_vscbig,        0,                  BT_OV_UV2DI_UV2DI_UV2DI)
++
++B_DEF      (s390_vscbib,                vec_subcv16qi,      0,                  B_VX,               0,                  BT_FN_UV16QI_UV16QI_UV16QI)
++B_DEF      (s390_vscbih,                vec_subcv8hi,       0,                  B_VX,               0,                  BT_FN_UV8HI_UV8HI_UV8HI)
++B_DEF      (s390_vscbif,                vec_subcv4si,       0,                  B_VX,               0,                  BT_FN_UV4SI_UV4SI_UV4SI)
++B_DEF      (s390_vscbig,                vec_subcv2di,       0,                  B_VX,               0,                  BT_FN_UV2DI_UV2DI_UV2DI)
++B_DEF      (s390_vscbiq,                vec_subc_u128,      0,                  B_VX,               0,                  BT_FN_UV16QI_UV16QI_UV16QI)
++B_DEF      (s390_vsbiq,                 vec_sube_u128,      0,                  B_VX,               0,                  BT_FN_UV16QI_UV16QI_UV16QI_UV16QI)
++B_DEF      (s390_vsbcbiq,               vec_subec_u128,     0,                  B_VX,               0,                  BT_FN_UV16QI_UV16QI_UV16QI_UV16QI)
++
++OB_DEF     (s390_vec_sum2,              s390_vec_sum2_u16,  s390_vec_sum2_u32,  B_VX,               BT_FN_OV4SI_OV4SI_OV4SI)
++OB_DEF_VAR (s390_vec_sum2_u16,          s390_vsumgh,        0,                  BT_OV_UV2DI_UV8HI_UV8HI)
++OB_DEF_VAR (s390_vec_sum2_u32,          s390_vsumgf,        0,                  BT_OV_UV2DI_UV4SI_UV4SI)
++
++B_DEF      (s390_vsumgh,                vec_sum2v8hi,       0,                  B_VX,               0,                  BT_FN_UV2DI_UV8HI_UV8HI)
++B_DEF      (s390_vsumgf,                vec_sum2v4si,       0,                  B_VX,               0,                  BT_FN_UV2DI_UV4SI_UV4SI)
++
++OB_DEF     (s390_vec_sum_u128,          s390_vec_sum_u128_u32,s390_vec_sum_u128_u64,B_VX,           BT_FN_OV4SI_OV4SI_OV4SI)
++OB_DEF_VAR (s390_vec_sum_u128_u32,      s390_vsumqf,        0,                  BT_OV_UV16QI_UV4SI_UV4SI)
++OB_DEF_VAR (s390_vec_sum_u128_u64,      s390_vsumqg,        0,                  BT_OV_UV16QI_UV2DI_UV2DI)
++
++B_DEF      (s390_vsumqf,                vec_sum_u128v4si,   0,                  B_VX,               0,                  BT_FN_UV16QI_UV4SI_UV4SI)
++B_DEF      (s390_vsumqg,                vec_sum_u128v2di,   0,                  B_VX,               0,                  BT_FN_UV16QI_UV2DI_UV2DI)
++
++OB_DEF     (s390_vec_sum4,              s390_vec_sum4_u8,   s390_vec_sum4_u16,  B_VX,               BT_FN_OV4SI_OV4SI_OV4SI)
++OB_DEF_VAR (s390_vec_sum4_u8,           s390_vsumb,         0,                  BT_OV_UV4SI_UV16QI_UV16QI)
++OB_DEF_VAR (s390_vec_sum4_u16,          s390_vsumh,         0,                  BT_OV_UV4SI_UV8HI_UV8HI)
++
++B_DEF      (s390_vsumb,                 vec_sum4v16qi,      0,                  B_VX,               0,                  BT_FN_UV4SI_UV16QI_UV16QI)
++B_DEF      (s390_vsumh,                 vec_sum4v8hi,       0,                  B_VX,               0,                  BT_FN_UV4SI_UV8HI_UV8HI)
++
++OB_DEF     (s390_vec_test_mask,         s390_vec_test_mask_s8,s390_vec_test_mask_dbl,B_VX,          BT_FN_INT_OV4SI_OV4SI)
++OB_DEF_VAR (s390_vec_test_mask_s8,      s390_vtm,           0,                  BT_OV_INT_V16QI_UV16QI)
++OB_DEF_VAR (s390_vec_test_mask_u8,      s390_vtm,           0,                  BT_OV_INT_UV16QI_UV16QI)
++OB_DEF_VAR (s390_vec_test_mask_s16,     s390_vtm,           0,                  BT_OV_INT_V8HI_UV8HI)
++OB_DEF_VAR (s390_vec_test_mask_u16,     s390_vtm,           0,                  BT_OV_INT_UV8HI_UV8HI)
++OB_DEF_VAR (s390_vec_test_mask_s32,     s390_vtm,           0,                  BT_OV_INT_V4SI_UV4SI)
++OB_DEF_VAR (s390_vec_test_mask_u32,     s390_vtm,           0,                  BT_OV_INT_UV4SI_UV4SI)
++OB_DEF_VAR (s390_vec_test_mask_s64,     s390_vtm,           0,                  BT_OV_INT_V2DI_UV2DI)
++OB_DEF_VAR (s390_vec_test_mask_u64,     s390_vtm,           0,                  BT_OV_INT_UV2DI_UV2DI)
++OB_DEF_VAR (s390_vec_test_mask_dbl,     s390_vtm,           0,                  BT_OV_INT_V2DF_UV2DI)
++
++B_DEF      (s390_vtm,                   vec_test_mask_intv16qi,0,               B_VX,               0,                  BT_FN_INT_UV16QI_UV16QI)
++B_DEF      (s390_vfaeb,                 vfaev16qi,          0,                  B_VX,               O3_U4,              BT_FN_UV16QI_UV16QI_UV16QI_INT)
++B_DEF      (s390_vfaeh,                 vfaev8hi,           0,                  B_VX,               O3_U4,              BT_FN_UV8HI_UV8HI_UV8HI_INT)
++B_DEF      (s390_vfaef,                 vfaev4si,           0,                  B_VX,               O3_U4,              BT_FN_UV4SI_UV4SI_UV4SI_INT)
++B_DEF      (s390_vfaezb,                vfaezv16qi,         0,                  B_VX,               O3_U4,              BT_FN_UV16QI_UV16QI_UV16QI_INT)
++B_DEF      (s390_vfaezh,                vfaezv8hi,          0,                  B_VX,               O3_U4,              BT_FN_UV8HI_UV8HI_UV8HI_INT)
++B_DEF      (s390_vfaezf,                vfaezv4si,          0,                  B_VX,               O3_U4,              BT_FN_UV4SI_UV4SI_UV4SI_INT)
++B_DEF      (s390_vfaebs,                vfaesv16qi,         0,                  B_VX,               O3_U4,              BT_FN_UV16QI_UV16QI_UV16QI_INT_INTPTR)
++B_DEF      (s390_vfaehs,                vfaesv8hi,          0,                  B_VX,               O3_U4,              BT_FN_UV8HI_UV8HI_UV8HI_INT_INTPTR)
++B_DEF      (s390_vfaefs,                vfaesv4si,          0,                  B_VX,               O3_U4,              BT_FN_UV4SI_UV4SI_UV4SI_INT_INTPTR)
++B_DEF      (s390_vfaezbs,               vfaezsv16qi,        0,                  B_VX,               O3_U4,              BT_FN_UV16QI_UV16QI_UV16QI_INT_INTPTR)
++B_DEF      (s390_vfaezhs,               vfaezsv8hi,         0,                  B_VX,               O3_U4,              BT_FN_UV8HI_UV8HI_UV8HI_INT_INTPTR)
++B_DEF      (s390_vfaezfs,               vfaezsv4si,         0,                  B_VX,               O3_U4,              BT_FN_UV4SI_UV4SI_UV4SI_INT_INTPTR)
++
++OB_DEF     (s390_vec_find_any_eq_idx,   s390_vfaeb_idx_s8,  s390_vfaef_idx_u32b,B_VX,               BT_FN_INT_OV4SI_OV4SI)
++OB_DEF_VAR (s390_vfaeb_idx_s8,          s390_vfaeb,         0,                  BT_OV_V16QI_V16QI_V16QI)
++OB_DEF_VAR (s390_vfaeb_idx_u8a,         s390_vfaeb,         0,                  BT_OV_UV16QI_BV16QI_BV16QI)
++OB_DEF_VAR (s390_vfaeb_idx_u8b,         s390_vfaeb,         0,                  BT_OV_UV16QI_UV16QI_UV16QI)
++OB_DEF_VAR (s390_vfaeh_idx_s16,         s390_vfaeh,         0,                  BT_OV_V8HI_V8HI_V8HI)
++OB_DEF_VAR (s390_vfaeh_idx_u16a,        s390_vfaeh,         0,                  BT_OV_UV8HI_BV8HI_BV8HI)
++OB_DEF_VAR (s390_vfaeh_idx_u16b,        s390_vfaeh,         0,                  BT_OV_UV8HI_UV8HI_UV8HI)
++OB_DEF_VAR (s390_vfaef_idx_s32,         s390_vfaef,         0,                  BT_OV_V4SI_V4SI_V4SI)
++OB_DEF_VAR (s390_vfaef_idx_u32a,        s390_vfaef,         0,                  BT_OV_UV4SI_BV4SI_BV4SI)
++OB_DEF_VAR (s390_vfaef_idx_u32b,        s390_vfaef,         0,                  BT_OV_UV4SI_UV4SI_UV4SI)
++
++OB_DEF     (s390_vec_find_any_ne_idx,   s390_vfaeb_inv_idx_s8,s390_vfaef_inv_idx_u32b,B_VX,         BT_FN_INT_OV4SI_OV4SI)
++OB_DEF_VAR (s390_vfaeb_inv_idx_s8,      s390_vfaeb,         0,                  BT_OV_V16QI_V16QI_V16QI)
++OB_DEF_VAR (s390_vfaeb_inv_idx_u8a,     s390_vfaeb,         0,                  BT_OV_UV16QI_BV16QI_BV16QI)
++OB_DEF_VAR (s390_vfaeb_inv_idx_u8b,     s390_vfaeb,         0,                  BT_OV_UV16QI_UV16QI_UV16QI)
++OB_DEF_VAR (s390_vfaeh_inv_idx_s16,     s390_vfaeh,         0,                  BT_OV_V8HI_V8HI_V8HI)
++OB_DEF_VAR (s390_vfaeh_inv_idx_u16a,    s390_vfaeh,         0,                  BT_OV_UV8HI_BV8HI_BV8HI)
++OB_DEF_VAR (s390_vfaeh_inv_idx_u16b,    s390_vfaeh,         0,                  BT_OV_UV8HI_UV8HI_UV8HI)
++OB_DEF_VAR (s390_vfaef_inv_idx_s32,     s390_vfaef,         0,                  BT_OV_V4SI_V4SI_V4SI)
++OB_DEF_VAR (s390_vfaef_inv_idx_u32a,    s390_vfaef,         0,                  BT_OV_UV4SI_BV4SI_BV4SI)
++OB_DEF_VAR (s390_vfaef_inv_idx_u32b,    s390_vfaef,         0,                  BT_OV_UV4SI_UV4SI_UV4SI)
++
++OB_DEF     (s390_vec_find_any_eq_or_0_idx,s390_vfaezb_idx_s8,s390_vfaezf_idx_u32b,B_VX,             BT_FN_INT_OV4SI_OV4SI)
++OB_DEF_VAR (s390_vfaezb_idx_s8,         s390_vfaezb,        0,                  BT_OV_V16QI_V16QI_V16QI)
++OB_DEF_VAR (s390_vfaezb_idx_u8a,        s390_vfaezb,        0,                  BT_OV_UV16QI_BV16QI_BV16QI)
++OB_DEF_VAR (s390_vfaezb_idx_u8b,        s390_vfaezb,        0,                  BT_OV_UV16QI_UV16QI_UV16QI)
++OB_DEF_VAR (s390_vfaezh_idx_s16,        s390_vfaezh,        0,                  BT_OV_V8HI_V8HI_V8HI)
++OB_DEF_VAR (s390_vfaezh_idx_u16a,       s390_vfaezh,        0,                  BT_OV_UV8HI_BV8HI_BV8HI)
++OB_DEF_VAR (s390_vfaezh_idx_u16b,       s390_vfaezh,        0,                  BT_OV_UV8HI_UV8HI_UV8HI)
++OB_DEF_VAR (s390_vfaezf_idx_s32,        s390_vfaezf,        0,                  BT_OV_V4SI_V4SI_V4SI)
++OB_DEF_VAR (s390_vfaezf_idx_u32a,       s390_vfaezf,        0,                  BT_OV_UV4SI_BV4SI_BV4SI)
++OB_DEF_VAR (s390_vfaezf_idx_u32b,       s390_vfaezf,        0,                  BT_OV_UV4SI_UV4SI_UV4SI)
++
++OB_DEF     (s390_vec_find_any_ne_or_0_idx,s390_vfaezb_inv_idx_s8,s390_vfaezf_inv_idx_u32b,B_VX,     BT_FN_INT_OV4SI_OV4SI)
++OB_DEF_VAR (s390_vfaezb_inv_idx_s8,     s390_vfaezb,        0,                  BT_OV_V16QI_V16QI_V16QI)
++OB_DEF_VAR (s390_vfaezb_inv_idx_u8a,    s390_vfaezb,        0,                  BT_OV_UV16QI_BV16QI_BV16QI)
++OB_DEF_VAR (s390_vfaezb_inv_idx_u8b,    s390_vfaezb,        0,                  BT_OV_UV16QI_UV16QI_UV16QI)
++OB_DEF_VAR (s390_vfaezh_inv_idx_s16,    s390_vfaezh,        0,                  BT_OV_V8HI_V8HI_V8HI)
++OB_DEF_VAR (s390_vfaezh_inv_idx_u16a,   s390_vfaezh,        0,                  BT_OV_UV8HI_BV8HI_BV8HI)
++OB_DEF_VAR (s390_vfaezh_inv_idx_u16b,   s390_vfaezh,        0,                  BT_OV_UV8HI_UV8HI_UV8HI)
++OB_DEF_VAR (s390_vfaezf_inv_idx_s32,    s390_vfaezf,        0,                  BT_OV_V4SI_V4SI_V4SI)
++OB_DEF_VAR (s390_vfaezf_inv_idx_u32a,   s390_vfaezf,        0,                  BT_OV_UV4SI_BV4SI_BV4SI)
++OB_DEF_VAR (s390_vfaezf_inv_idx_u32b,   s390_vfaezf,        0,                  BT_OV_UV4SI_UV4SI_UV4SI)
++
++OB_DEF     (s390_vec_find_any_eq,       s390_vfaeb_s8,      s390_vfaef_b32,     B_VX,               BT_FN_OV4SI_OV4SI_OV4SI)
++OB_DEF_VAR (s390_vfaeb_s8,              s390_vfaeb,         0,                  BT_OV_BV16QI_V16QI_V16QI)
++OB_DEF_VAR (s390_vfaeb_u8,              s390_vfaeb,         0,                  BT_OV_BV16QI_BV16QI_BV16QI)
++OB_DEF_VAR (s390_vfaeb_b8,              s390_vfaeb,         0,                  BT_OV_BV16QI_UV16QI_UV16QI)
++OB_DEF_VAR (s390_vfaeh_s16,             s390_vfaeh,         0,                  BT_OV_BV8HI_V8HI_V8HI)
++OB_DEF_VAR (s390_vfaeh_u16,             s390_vfaeh,         0,                  BT_OV_BV8HI_BV8HI_BV8HI)
++OB_DEF_VAR (s390_vfaeh_b16,             s390_vfaeh,         0,                  BT_OV_BV8HI_UV8HI_UV8HI)
++OB_DEF_VAR (s390_vfaef_s32,             s390_vfaef,         0,                  BT_OV_BV4SI_V4SI_V4SI)
++OB_DEF_VAR (s390_vfaef_u32,             s390_vfaef,         0,                  BT_OV_BV4SI_BV4SI_BV4SI)
++OB_DEF_VAR (s390_vfaef_b32,             s390_vfaef,         0,                  BT_OV_BV4SI_UV4SI_UV4SI)
++
++OB_DEF     (s390_vec_find_any_ne,       s390_vfaeb_inv_s8,  s390_vfaef_inv_b32, B_VX,               BT_FN_OV4SI_OV4SI_OV4SI)
++OB_DEF_VAR (s390_vfaeb_inv_s8,          s390_vfaeb,         0,                  BT_OV_BV16QI_V16QI_V16QI)
++OB_DEF_VAR (s390_vfaeb_inv_u8,          s390_vfaeb,         0,                  BT_OV_BV16QI_BV16QI_BV16QI)
++OB_DEF_VAR (s390_vfaeb_inv_b8,          s390_vfaeb,         0,                  BT_OV_BV16QI_UV16QI_UV16QI)
++OB_DEF_VAR (s390_vfaeh_inv_s16,         s390_vfaeh,         0,                  BT_OV_BV8HI_V8HI_V8HI)
++OB_DEF_VAR (s390_vfaeh_inv_u16,         s390_vfaeh,         0,                  BT_OV_BV8HI_BV8HI_BV8HI)
++OB_DEF_VAR (s390_vfaeh_inv_b16,         s390_vfaeh,         0,                  BT_OV_BV8HI_UV8HI_UV8HI)
++OB_DEF_VAR (s390_vfaef_inv_s32,         s390_vfaef,         0,                  BT_OV_BV4SI_V4SI_V4SI)
++OB_DEF_VAR (s390_vfaef_inv_u32,         s390_vfaef,         0,                  BT_OV_BV4SI_BV4SI_BV4SI)
++OB_DEF_VAR (s390_vfaef_inv_b32,         s390_vfaef,         0,                  BT_OV_BV4SI_UV4SI_UV4SI)
++
++OB_DEF     (s390_vec_find_any_eq_idx_cc,s390_vfaebs_idx_s8, s390_vfaefs_idx_u32b,B_VX,              BT_FN_INT_OV4SI_OV4SI_INTPTR)
++OB_DEF_VAR (s390_vfaebs_idx_s8,         s390_vfaebs,        0,                  BT_OV_V16QI_V16QI_V16QI_INTPTR)
++OB_DEF_VAR (s390_vfaebs_idx_u8a,        s390_vfaebs,        0,                  BT_OV_UV16QI_BV16QI_BV16QI_INTPTR)
++OB_DEF_VAR (s390_vfaebs_idx_u8b,        s390_vfaebs,        0,                  BT_OV_UV16QI_UV16QI_UV16QI_INTPTR)
++OB_DEF_VAR (s390_vfaehs_idx_s16,        s390_vfaehs,        0,                  BT_OV_V8HI_V8HI_V8HI_INTPTR)
++OB_DEF_VAR (s390_vfaehs_idx_u16a,       s390_vfaehs,        0,                  BT_OV_UV8HI_BV8HI_BV8HI_INTPTR)
++OB_DEF_VAR (s390_vfaehs_idx_u16b,       s390_vfaehs,        0,                  BT_OV_UV8HI_UV8HI_UV8HI_INTPTR)
++OB_DEF_VAR (s390_vfaefs_idx_s32,        s390_vfaefs,        0,                  BT_OV_V4SI_V4SI_V4SI_INTPTR)
++OB_DEF_VAR (s390_vfaefs_idx_u32a,       s390_vfaefs,        0,                  BT_OV_UV4SI_BV4SI_BV4SI_INTPTR)
++OB_DEF_VAR (s390_vfaefs_idx_u32b,       s390_vfaefs,        0,                  BT_OV_UV4SI_UV4SI_UV4SI_INTPTR)
++
++OB_DEF     (s390_vec_find_any_ne_idx_cc,s390_vfaebs_inv_idx_s8,s390_vfaefs_inv_idx_u32b,B_VX,       BT_FN_INT_OV4SI_OV4SI_INTPTR)
++OB_DEF_VAR (s390_vfaebs_inv_idx_s8,     s390_vfaebs,        0,                  BT_OV_V16QI_V16QI_V16QI_INTPTR)
++OB_DEF_VAR (s390_vfaebs_inv_idx_u8a,    s390_vfaebs,        0,                  BT_OV_UV16QI_BV16QI_BV16QI_INTPTR)
++OB_DEF_VAR (s390_vfaebs_inv_idx_u8b,    s390_vfaebs,        0,                  BT_OV_UV16QI_UV16QI_UV16QI_INTPTR)
++OB_DEF_VAR (s390_vfaehs_inv_idx_s16,    s390_vfaehs,        0,                  BT_OV_V8HI_V8HI_V8HI_INTPTR)
++OB_DEF_VAR (s390_vfaehs_inv_idx_u16a,   s390_vfaehs,        0,                  BT_OV_UV8HI_BV8HI_BV8HI_INTPTR)
++OB_DEF_VAR (s390_vfaehs_inv_idx_u16b,   s390_vfaehs,        0,                  BT_OV_UV8HI_UV8HI_UV8HI_INTPTR)
++OB_DEF_VAR (s390_vfaefs_inv_idx_s32,    s390_vfaefs,        0,                  BT_OV_V4SI_V4SI_V4SI_INTPTR)
++OB_DEF_VAR (s390_vfaefs_inv_idx_u32a,   s390_vfaefs,        0,                  BT_OV_UV4SI_BV4SI_BV4SI_INTPTR)
++OB_DEF_VAR (s390_vfaefs_inv_idx_u32b,   s390_vfaefs,        0,                  BT_OV_UV4SI_UV4SI_UV4SI_INTPTR)
++
++OB_DEF     (s390_vec_find_any_eq_or_0_idx_cc,s390_vfaezbs_idx_s8,s390_vfaezfs_idx_u32b,B_VX,        BT_FN_INT_OV4SI_OV4SI_INTPTR)
++OB_DEF_VAR (s390_vfaezbs_idx_s8,        s390_vfaezbs,       0,                  BT_OV_V16QI_V16QI_V16QI_INTPTR)
++OB_DEF_VAR (s390_vfaezbs_idx_u8a,       s390_vfaezbs,       0,                  BT_OV_UV16QI_BV16QI_BV16QI_INTPTR)
++OB_DEF_VAR (s390_vfaezbs_idx_u8b,       s390_vfaezbs,       0,                  BT_OV_UV16QI_UV16QI_UV16QI_INTPTR)
++OB_DEF_VAR (s390_vfaezhs_idx_s16,       s390_vfaezhs,       0,                  BT_OV_V8HI_V8HI_V8HI_INTPTR)
++OB_DEF_VAR (s390_vfaezhs_idx_u16a,      s390_vfaezhs,       0,                  BT_OV_UV8HI_BV8HI_BV8HI_INTPTR)
++OB_DEF_VAR (s390_vfaezhs_idx_u16b,      s390_vfaezhs,       0,                  BT_OV_UV8HI_UV8HI_UV8HI_INTPTR)
++OB_DEF_VAR (s390_vfaezfs_idx_s32,       s390_vfaezfs,       0,                  BT_OV_V4SI_V4SI_V4SI_INTPTR)
++OB_DEF_VAR (s390_vfaezfs_idx_u32a,      s390_vfaezfs,       0,                  BT_OV_UV4SI_BV4SI_BV4SI_INTPTR)
++OB_DEF_VAR (s390_vfaezfs_idx_u32b,      s390_vfaezfs,       0,                  BT_OV_UV4SI_UV4SI_UV4SI_INTPTR)
++
++OB_DEF     (s390_vec_find_any_ne_or_0_idx_cc,s390_vfaezbs_inv_idx_s8,s390_vfaezfs_inv_idx_u32b,B_VX,BT_FN_INT_OV4SI_OV4SI_INTPTR)
++OB_DEF_VAR (s390_vfaezbs_inv_idx_s8,    s390_vfaezbs,       0,                  BT_OV_V16QI_V16QI_V16QI_INTPTR)
++OB_DEF_VAR (s390_vfaezbs_inv_idx_u8a,   s390_vfaezbs,       0,                  BT_OV_UV16QI_BV16QI_BV16QI_INTPTR)
++OB_DEF_VAR (s390_vfaezbs_inv_idx_u8b,   s390_vfaezbs,       0,                  BT_OV_UV16QI_UV16QI_UV16QI_INTPTR)
++OB_DEF_VAR (s390_vfaezhs_inv_idx_s16,   s390_vfaezhs,       0,                  BT_OV_V8HI_V8HI_V8HI_INTPTR)
++OB_DEF_VAR (s390_vfaezhs_inv_idx_u16a,  s390_vfaezhs,       0,                  BT_OV_UV8HI_BV8HI_BV8HI_INTPTR)
++OB_DEF_VAR (s390_vfaezhs_inv_idx_u16b,  s390_vfaezhs,       0,                  BT_OV_UV8HI_UV8HI_UV8HI_INTPTR)
++OB_DEF_VAR (s390_vfaezfs_inv_idx_s32,   s390_vfaezfs,       0,                  BT_OV_V4SI_V4SI_V4SI_INTPTR)
++OB_DEF_VAR (s390_vfaezfs_inv_idx_u32a,  s390_vfaezfs,       0,                  BT_OV_UV4SI_BV4SI_BV4SI_INTPTR)
++OB_DEF_VAR (s390_vfaezfs_inv_idx_u32b,  s390_vfaezfs,       0,                  BT_OV_UV4SI_UV4SI_UV4SI_INTPTR)
++
++OB_DEF     (s390_vec_find_any_eq_cc,    s390_vfaebs_s8,     s390_vfaefs_b32,    B_VX,               BT_FN_OV4SI_OV4SI_OV4SI_INTPTR)
++OB_DEF_VAR (s390_vfaebs_s8,             s390_vfaebs,        0,                  BT_OV_BV16QI_V16QI_V16QI_INTPTR)
++OB_DEF_VAR (s390_vfaebs_u8,             s390_vfaebs,        0,                  BT_OV_BV16QI_BV16QI_BV16QI_INTPTR)
++OB_DEF_VAR (s390_vfaebs_b8,             s390_vfaebs,        0,                  BT_OV_BV16QI_UV16QI_UV16QI_INTPTR)
++OB_DEF_VAR (s390_vfaehs_s16,            s390_vfaehs,        0,                  BT_OV_BV8HI_V8HI_V8HI_INTPTR)
++OB_DEF_VAR (s390_vfaehs_u16,            s390_vfaehs,        0,                  BT_OV_BV8HI_BV8HI_BV8HI_INTPTR)
++OB_DEF_VAR (s390_vfaehs_b16,            s390_vfaehs,        0,                  BT_OV_BV8HI_UV8HI_UV8HI_INTPTR)
++OB_DEF_VAR (s390_vfaefs_s32,            s390_vfaefs,        0,                  BT_OV_BV4SI_V4SI_V4SI_INTPTR)
++OB_DEF_VAR (s390_vfaefs_u32,            s390_vfaefs,        0,                  BT_OV_BV4SI_BV4SI_BV4SI_INTPTR)
++OB_DEF_VAR (s390_vfaefs_b32,            s390_vfaefs,        0,                  BT_OV_BV4SI_UV4SI_UV4SI_INTPTR)
++
++OB_DEF     (s390_vec_find_any_ne_cc,    s390_vfaebs_inv_s8, s390_vfaefs_inv_b32,B_VX,               BT_FN_OV4SI_OV4SI_OV4SI_INTPTR)
++OB_DEF_VAR (s390_vfaebs_inv_s8,         s390_vfaebs,        0,                  BT_OV_BV16QI_V16QI_V16QI_INTPTR)
++OB_DEF_VAR (s390_vfaebs_inv_u8,         s390_vfaebs,        0,                  BT_OV_BV16QI_BV16QI_BV16QI_INTPTR)
++OB_DEF_VAR (s390_vfaebs_inv_b8,         s390_vfaebs,        0,                  BT_OV_BV16QI_UV16QI_UV16QI_INTPTR)
++OB_DEF_VAR (s390_vfaehs_inv_s16,        s390_vfaehs,        0,                  BT_OV_BV8HI_V8HI_V8HI_INTPTR)
++OB_DEF_VAR (s390_vfaehs_inv_u16,        s390_vfaehs,        0,                  BT_OV_BV8HI_BV8HI_BV8HI_INTPTR)
++OB_DEF_VAR (s390_vfaehs_inv_b16,        s390_vfaehs,        0,                  BT_OV_BV8HI_UV8HI_UV8HI_INTPTR)
++OB_DEF_VAR (s390_vfaefs_inv_s32,        s390_vfaefs,        0,                  BT_OV_BV4SI_V4SI_V4SI_INTPTR)
++OB_DEF_VAR (s390_vfaefs_inv_u32,        s390_vfaefs,        0,                  BT_OV_BV4SI_BV4SI_BV4SI_INTPTR)
++OB_DEF_VAR (s390_vfaefs_inv_b32,        s390_vfaefs,        0,                  BT_OV_BV4SI_UV4SI_UV4SI_INTPTR)
++
++B_DEF      (s390_vfeeb,                 vfeev16qi,          0,                  B_VX,               0,                  BT_FN_UV16QI_UV16QI_UV16QI)
++B_DEF      (s390_vfeeh,                 vfeev8hi,           0,                  B_VX,               0,                  BT_FN_UV8HI_UV8HI_UV8HI)
++B_DEF      (s390_vfeef,                 vfeev4si,           0,                  B_VX,               0,                  BT_FN_UV4SI_UV4SI_UV4SI)
++B_DEF      (s390_vfeezb,                vfeezv16qi,         0,                  B_VX,               0,                  BT_FN_UV16QI_UV16QI_UV16QI)
++B_DEF      (s390_vfeezh,                vfeezv8hi,          0,                  B_VX,               0,                  BT_FN_UV8HI_UV8HI_UV8HI)
++B_DEF      (s390_vfeezf,                vfeezv4si,          0,                  B_VX,               0,                  BT_FN_UV4SI_UV4SI_UV4SI)
++B_DEF      (s390_vfeebs,                vfeesv16qi,         0,                  B_VX,               0,                  BT_FN_UV16QI_UV16QI_UV16QI_INTPTR)
++B_DEF      (s390_vfeehs,                vfeesv8hi,          0,                  B_VX,               0,                  BT_FN_UV8HI_UV8HI_UV8HI_INTPTR)
++B_DEF      (s390_vfeefs,                vfeesv4si,          0,                  B_VX,               0,                  BT_FN_UV4SI_UV4SI_UV4SI_INTPTR)
++B_DEF      (s390_vfeezbs,               vfeezsv16qi,        0,                  B_VX,               0,                  BT_FN_UV16QI_UV16QI_UV16QI_INTPTR)
++B_DEF      (s390_vfeezhs,               vfeezsv8hi,         0,                  B_VX,               0,                  BT_FN_UV8HI_UV8HI_UV8HI_INTPTR)
++B_DEF      (s390_vfeezfs,               vfeezsv4si,         0,                  B_VX,               0,                  BT_FN_UV4SI_UV4SI_UV4SI_INTPTR)
++
++OB_DEF     (s390_vec_cmpeq_idx,         s390_vfeeb_s8,      s390_vfeef_u32b,    B_VX,               BT_FN_INT_OV4SI_OV4SI)
++OB_DEF_VAR (s390_vfeeb_s8,              s390_vfeeb,         0,                  BT_OV_V16QI_V16QI_V16QI)
++OB_DEF_VAR (s390_vfeeb_u8a,             s390_vfeeb,         0,                  BT_OV_UV16QI_BV16QI_BV16QI)
++OB_DEF_VAR (s390_vfeeb_u8b,             s390_vfeeb,         0,                  BT_OV_UV16QI_UV16QI_UV16QI)
++OB_DEF_VAR (s390_vfeeh_s16,             s390_vfeeh,         0,                  BT_OV_V8HI_V8HI_V8HI)
++OB_DEF_VAR (s390_vfeeh_u16a,            s390_vfeeh,         0,                  BT_OV_UV8HI_BV8HI_BV8HI)
++OB_DEF_VAR (s390_vfeeh_u16b,            s390_vfeeh,         0,                  BT_OV_UV8HI_UV8HI_UV8HI)
++OB_DEF_VAR (s390_vfeef_s32,             s390_vfeef,         0,                  BT_OV_V4SI_V4SI_V4SI)
++OB_DEF_VAR (s390_vfeef_u32a,            s390_vfeef,         0,                  BT_OV_UV4SI_BV4SI_BV4SI)
++OB_DEF_VAR (s390_vfeef_u32b,            s390_vfeef,         0,                  BT_OV_UV4SI_UV4SI_UV4SI)
++
++OB_DEF     (s390_vec_cmpeq_or_0_idx,    s390_vfeezb_s8,     s390_vfeezf_u32b,   B_VX,               BT_FN_INT_OV4SI_OV4SI)
++OB_DEF_VAR (s390_vfeezb_s8,             s390_vfeezb,        0,                  BT_OV_V16QI_V16QI_V16QI)
++OB_DEF_VAR (s390_vfeezb_u8a,            s390_vfeezb,        0,                  BT_OV_UV16QI_BV16QI_BV16QI)
++OB_DEF_VAR (s390_vfeezb_u8b,            s390_vfeezb,        0,                  BT_OV_UV16QI_UV16QI_UV16QI)
++OB_DEF_VAR (s390_vfeezh_s16,            s390_vfeezh,        0,                  BT_OV_V8HI_V8HI_V8HI)
++OB_DEF_VAR (s390_vfeezh_u16a,           s390_vfeezh,        0,                  BT_OV_UV8HI_BV8HI_BV8HI)
++OB_DEF_VAR (s390_vfeezh_u16b,           s390_vfeezh,        0,                  BT_OV_UV8HI_UV8HI_UV8HI)
++OB_DEF_VAR (s390_vfeezf_s32,            s390_vfeezf,        0,                  BT_OV_V4SI_V4SI_V4SI)
++OB_DEF_VAR (s390_vfeezf_u32a,           s390_vfeezf,        0,                  BT_OV_UV4SI_BV4SI_BV4SI)
++OB_DEF_VAR (s390_vfeezf_u32b,           s390_vfeezf,        0,                  BT_OV_UV4SI_UV4SI_UV4SI)
++
++OB_DEF     (s390_vec_cmpeq_idx_cc,      s390_vfeebs_s8,     s390_vfeefs_u32b,   B_VX,               BT_FN_INT_OV4SI_OV4SI_INTPTR)
++OB_DEF_VAR (s390_vfeebs_s8,             s390_vfeebs,        0,                  BT_OV_V16QI_V16QI_V16QI_INTPTR)
++OB_DEF_VAR (s390_vfeebs_u8a,            s390_vfeebs,        0,                  BT_OV_UV16QI_BV16QI_BV16QI_INTPTR)
++OB_DEF_VAR (s390_vfeebs_u8b,            s390_vfeebs,        0,                  BT_OV_UV16QI_UV16QI_UV16QI_INTPTR)
++OB_DEF_VAR (s390_vfeehs_s16,            s390_vfeehs,        0,                  BT_OV_V8HI_V8HI_V8HI_INTPTR)
++OB_DEF_VAR (s390_vfeehs_u16a,           s390_vfeehs,        0,                  BT_OV_UV8HI_BV8HI_BV8HI_INTPTR)
++OB_DEF_VAR (s390_vfeehs_u16b,           s390_vfeehs,        0,                  BT_OV_UV8HI_UV8HI_UV8HI_INTPTR)
++OB_DEF_VAR (s390_vfeefs_s32,            s390_vfeefs,        0,                  BT_OV_V4SI_V4SI_V4SI_INTPTR)
++OB_DEF_VAR (s390_vfeefs_u32a,           s390_vfeefs,        0,                  BT_OV_UV4SI_BV4SI_BV4SI_INTPTR)
++OB_DEF_VAR (s390_vfeefs_u32b,           s390_vfeefs,        0,                  BT_OV_UV4SI_UV4SI_UV4SI_INTPTR)
++
++OB_DEF     (s390_vec_cmpeq_or_0_idx_cc, s390_vfeezbs_s8,    s390_vfeezfs_u32b,  B_VX,               BT_FN_INT_OV4SI_OV4SI_INTPTR)
++OB_DEF_VAR (s390_vfeezbs_s8,            s390_vfeezbs,       0,                  BT_OV_V16QI_V16QI_V16QI_INTPTR)
++OB_DEF_VAR (s390_vfeezbs_u8a,           s390_vfeezbs,       0,                  BT_OV_UV16QI_BV16QI_BV16QI_INTPTR)
++OB_DEF_VAR (s390_vfeezbs_u8b,           s390_vfeezbs,       0,                  BT_OV_UV16QI_UV16QI_UV16QI_INTPTR)
++OB_DEF_VAR (s390_vfeezhs_s16,           s390_vfeezhs,       0,                  BT_OV_V8HI_V8HI_V8HI_INTPTR)
++OB_DEF_VAR (s390_vfeezhs_u16a,          s390_vfeezhs,       0,                  BT_OV_UV8HI_BV8HI_BV8HI_INTPTR)
++OB_DEF_VAR (s390_vfeezhs_u16b,          s390_vfeezhs,       0,                  BT_OV_UV8HI_UV8HI_UV8HI_INTPTR)
++OB_DEF_VAR (s390_vfeezfs_s32,           s390_vfeezfs,       0,                  BT_OV_V4SI_V4SI_V4SI_INTPTR)
++OB_DEF_VAR (s390_vfeezfs_u32a,          s390_vfeezfs,       0,                  BT_OV_UV4SI_BV4SI_BV4SI_INTPTR)
++OB_DEF_VAR (s390_vfeezfs_u32b,          s390_vfeezfs,       0,                  BT_OV_UV4SI_UV4SI_UV4SI_INTPTR)
++
++B_DEF      (s390_vfeneb,                vfenev16qi,         0,                  B_VX,               0,                  BT_FN_UV16QI_UV16QI_UV16QI)
++B_DEF      (s390_vfeneh,                vfenev8hi,          0,                  B_VX,               0,                  BT_FN_UV8HI_UV8HI_UV8HI)
++B_DEF      (s390_vfenef,                vfenev4si,          0,                  B_VX,               0,                  BT_FN_UV4SI_UV4SI_UV4SI)
++B_DEF      (s390_vfenezb,               vfenezv16qi,        0,                  B_VX,               0,                  BT_FN_UV16QI_UV16QI_UV16QI)
++B_DEF      (s390_vfenezh,               vfenezv8hi,         0,                  B_VX,               0,                  BT_FN_UV8HI_UV8HI_UV8HI)
++B_DEF      (s390_vfenezf,               vfenezv4si,         0,                  B_VX,               0,                  BT_FN_UV4SI_UV4SI_UV4SI)
++B_DEF      (s390_vfenebs,               vfenesv16qi,        0,                  B_VX,               0,                  BT_FN_UV16QI_UV16QI_UV16QI_INTPTR)
++B_DEF      (s390_vfenehs,               vfenesv8hi,         0,                  B_VX,               0,                  BT_FN_UV8HI_UV8HI_UV8HI_INTPTR)
++B_DEF      (s390_vfenefs,               vfenesv4si,         0,                  B_VX,               0,                  BT_FN_UV4SI_UV4SI_UV4SI_INTPTR)
++B_DEF      (s390_vfenezbs,              vfenezsv16qi,       0,                  B_VX,               0,                  BT_FN_UV16QI_UV16QI_UV16QI_INTPTR)
++B_DEF      (s390_vfenezhs,              vfenezsv8hi,        0,                  B_VX,               0,                  BT_FN_UV8HI_UV8HI_UV8HI_INTPTR)
++B_DEF      (s390_vfenezfs,              vfenezsv4si,        0,                  B_VX,               0,                  BT_FN_UV4SI_UV4SI_UV4SI_INTPTR)
++
++OB_DEF     (s390_vec_cmpne_idx,         s390_vfeneb_s8,     s390_vfenef_u32b,   B_VX,               BT_FN_INT_OV4SI_OV4SI)
++OB_DEF_VAR (s390_vfeneb_s8,             s390_vfeneb,        0,                  BT_OV_V16QI_V16QI_V16QI)
++OB_DEF_VAR (s390_vfeneb_u8a,            s390_vfeneb,        0,                  BT_OV_UV16QI_BV16QI_BV16QI)
++OB_DEF_VAR (s390_vfeneb_u8b,            s390_vfeneb,        0,                  BT_OV_UV16QI_UV16QI_UV16QI)
++OB_DEF_VAR (s390_vfeneh_s16,            s390_vfeneh,        0,                  BT_OV_V8HI_V8HI_V8HI)
++OB_DEF_VAR (s390_vfeneh_u16a,           s390_vfeneh,        0,                  BT_OV_UV8HI_BV8HI_BV8HI)
++OB_DEF_VAR (s390_vfeneh_u16b,           s390_vfeneh,        0,                  BT_OV_UV8HI_UV8HI_UV8HI)
++OB_DEF_VAR (s390_vfenef_s32,            s390_vfenef,        0,                  BT_OV_V4SI_V4SI_V4SI)
++OB_DEF_VAR (s390_vfenef_u32a,           s390_vfenef,        0,                  BT_OV_UV4SI_BV4SI_BV4SI)
++OB_DEF_VAR (s390_vfenef_u32b,           s390_vfenef,        0,                  BT_OV_UV4SI_UV4SI_UV4SI)
++
++OB_DEF     (s390_vec_cmpne_or_0_idx,    s390_vfenezb_s8,    s390_vfenezf_u32b,  B_VX,               BT_FN_INT_OV4SI_OV4SI)
++OB_DEF_VAR (s390_vfenezb_s8,            s390_vfenezb,       0,                  BT_OV_V16QI_V16QI_V16QI)
++OB_DEF_VAR (s390_vfenezb_u8a,           s390_vfenezb,       0,                  BT_OV_UV16QI_BV16QI_BV16QI)
++OB_DEF_VAR (s390_vfenezb_u8b,           s390_vfenezb,       0,                  BT_OV_UV16QI_UV16QI_UV16QI)
++OB_DEF_VAR (s390_vfenezh_s16,           s390_vfenezh,       0,                  BT_OV_V8HI_V8HI_V8HI)
++OB_DEF_VAR (s390_vfenezh_u16a,          s390_vfenezh,       0,                  BT_OV_UV8HI_BV8HI_BV8HI)
++OB_DEF_VAR (s390_vfenezh_u16b,          s390_vfenezh,       0,                  BT_OV_UV8HI_UV8HI_UV8HI)
++OB_DEF_VAR (s390_vfenezf_s32,           s390_vfenezf,       0,                  BT_OV_V4SI_V4SI_V4SI)
++OB_DEF_VAR (s390_vfenezf_u32a,          s390_vfenezf,       0,                  BT_OV_UV4SI_BV4SI_BV4SI)
++OB_DEF_VAR (s390_vfenezf_u32b,          s390_vfenezf,       0,                  BT_OV_UV4SI_UV4SI_UV4SI)
++
++OB_DEF     (s390_vec_cmpne_idx_cc,      s390_vfenebs_s8,    s390_vfenefs_u32b,  B_VX,               BT_FN_INT_OV4SI_OV4SI_INTPTR)
++OB_DEF_VAR (s390_vfenebs_s8,            s390_vfenebs,       0,                  BT_OV_V16QI_V16QI_V16QI_INTPTR)
++OB_DEF_VAR (s390_vfenebs_u8a,           s390_vfenebs,       0,                  BT_OV_UV16QI_BV16QI_BV16QI_INTPTR)
++OB_DEF_VAR (s390_vfenebs_u8b,           s390_vfenebs,       0,                  BT_OV_UV16QI_UV16QI_UV16QI_INTPTR)
++OB_DEF_VAR (s390_vfenehs_s16,           s390_vfenehs,       0,                  BT_OV_V8HI_V8HI_V8HI_INTPTR)
++OB_DEF_VAR (s390_vfenehs_u16a,          s390_vfenehs,       0,                  BT_OV_UV8HI_BV8HI_BV8HI_INTPTR)
++OB_DEF_VAR (s390_vfenehs_u16b,          s390_vfenehs,       0,                  BT_OV_UV8HI_UV8HI_UV8HI_INTPTR)
++OB_DEF_VAR (s390_vfenefs_s32,           s390_vfenefs,       0,                  BT_OV_V4SI_V4SI_V4SI_INTPTR)
++OB_DEF_VAR (s390_vfenefs_u32a,          s390_vfenefs,       0,                  BT_OV_UV4SI_BV4SI_BV4SI_INTPTR)
++OB_DEF_VAR (s390_vfenefs_u32b,          s390_vfenefs,       0,                  BT_OV_UV4SI_UV4SI_UV4SI_INTPTR)
++
++OB_DEF     (s390_vec_cmpne_or_0_idx_cc, s390_vfenezbs_s8,   s390_vfenezfs_u32b, B_VX,               BT_FN_INT_OV4SI_OV4SI_INTPTR)
++OB_DEF_VAR (s390_vfenezbs_s8,           s390_vfenezbs,      0,                  BT_OV_V16QI_V16QI_V16QI_INTPTR)
++OB_DEF_VAR (s390_vfenezbs_u8a,          s390_vfenezbs,      0,                  BT_OV_UV16QI_BV16QI_BV16QI_INTPTR)
++OB_DEF_VAR (s390_vfenezbs_u8b,          s390_vfenezbs,      0,                  BT_OV_UV16QI_UV16QI_UV16QI_INTPTR)
++OB_DEF_VAR (s390_vfenezhs_s16,          s390_vfenezhs,      0,                  BT_OV_V8HI_V8HI_V8HI_INTPTR)
++OB_DEF_VAR (s390_vfenezhs_u16a,         s390_vfenezhs,      0,                  BT_OV_UV8HI_BV8HI_BV8HI_INTPTR)
++OB_DEF_VAR (s390_vfenezhs_u16b,         s390_vfenezhs,      0,                  BT_OV_UV8HI_UV8HI_UV8HI_INTPTR)
++OB_DEF_VAR (s390_vfenezfs_s32,          s390_vfenezfs,      0,                  BT_OV_V4SI_V4SI_V4SI_INTPTR)
++OB_DEF_VAR (s390_vfenezfs_u32a,         s390_vfenezfs,      0,                  BT_OV_UV4SI_BV4SI_BV4SI_INTPTR)
++OB_DEF_VAR (s390_vfenezfs_u32b,         s390_vfenezfs,      0,                  BT_OV_UV4SI_UV4SI_UV4SI_INTPTR)
++
++B_DEF      (s390_vistrb,                vistrv16qi,         0,                  B_VX,               0,                  BT_FN_UV16QI_UV16QI)
++B_DEF      (s390_vistrh,                vistrv8hi,          0,                  B_VX,               0,                  BT_FN_UV8HI_UV8HI)
++B_DEF      (s390_vistrf,                vistrv4si,          0,                  B_VX,               0,                  BT_FN_UV4SI_UV4SI)
++B_DEF      (s390_vistrbs,               vistrsv16qi,        0,                  B_VX,               0,                  BT_FN_UV16QI_UV16QI_INTPTR)
++B_DEF      (s390_vistrhs,               vistrsv8hi,         0,                  B_VX,               0,                  BT_FN_UV8HI_UV8HI_INTPTR)
++B_DEF      (s390_vistrfs,               vistrsv4si,         0,                  B_VX,               0,                  BT_FN_UV4SI_UV4SI_INTPTR)
++
++OB_DEF     (s390_vec_cp_until_zero,     s390_vistrb_s8,     s390_vistrf_u32,    B_VX,               BT_FN_OV4SI_OV4SI)
++OB_DEF_VAR (s390_vistrb_s8,             s390_vistrb,        0,                  BT_OV_V16QI_V16QI)
++OB_DEF_VAR (s390_vistrb_b8,             s390_vistrb,        0,                  BT_OV_BV16QI_BV16QI)
++OB_DEF_VAR (s390_vistrb_u8,             s390_vistrb,        0,                  BT_OV_UV16QI_UV16QI)
++OB_DEF_VAR (s390_vistrh_s16,            s390_vistrh,        0,                  BT_OV_V8HI_V8HI)
++OB_DEF_VAR (s390_vistrh_b16,            s390_vistrh,        0,                  BT_OV_BV8HI_BV8HI)
++OB_DEF_VAR (s390_vistrh_u16,            s390_vistrh,        0,                  BT_OV_UV8HI_UV8HI)
++OB_DEF_VAR (s390_vistrf_s32,            s390_vistrf,        0,                  BT_OV_V4SI_V4SI)
++OB_DEF_VAR (s390_vistrf_b32,            s390_vistrf,        0,                  BT_OV_BV4SI_BV4SI)
++OB_DEF_VAR (s390_vistrf_u32,            s390_vistrf,        0,                  BT_OV_UV4SI_UV4SI)
++
++OB_DEF     (s390_vec_cp_until_zero_cc,  s390_vistrbs_s8,    s390_vistrfs_u32,   B_VX,               BT_FN_OV4SI_OV4SI_INTPTR)
++OB_DEF_VAR (s390_vistrbs_s8,            s390_vistrbs,       0,                  BT_OV_V16QI_V16QI_INTPTR)
++OB_DEF_VAR (s390_vistrbs_b8,            s390_vistrbs,       0,                  BT_OV_BV16QI_BV16QI_INTPTR)
++OB_DEF_VAR (s390_vistrbs_u8,            s390_vistrbs,       0,                  BT_OV_UV16QI_UV16QI_INTPTR)
++OB_DEF_VAR (s390_vistrhs_s16,           s390_vistrhs,       0,                  BT_OV_V8HI_V8HI_INTPTR)
++OB_DEF_VAR (s390_vistrhs_b16,           s390_vistrhs,       0,                  BT_OV_BV8HI_BV8HI_INTPTR)
++OB_DEF_VAR (s390_vistrhs_u16,           s390_vistrhs,       0,                  BT_OV_UV8HI_UV8HI_INTPTR)
++OB_DEF_VAR (s390_vistrfs_s32,           s390_vistrfs,       0,                  BT_OV_V4SI_V4SI_INTPTR)
++OB_DEF_VAR (s390_vistrfs_b32,           s390_vistrfs,       0,                  BT_OV_BV4SI_BV4SI_INTPTR)
++OB_DEF_VAR (s390_vistrfs_u32,           s390_vistrfs,       0,                  BT_OV_UV4SI_UV4SI_INTPTR)
++
++B_DEF      (s390_vstrcb,                vstrcv16qi,         0,                  B_VX,               O4_U4,              BT_FN_UV16QI_UV16QI_UV16QI_UV16QI_INT)
++B_DEF      (s390_vstrch,                vstrcv8hi,          0,                  B_VX,               O4_U4,              BT_FN_UV8HI_UV8HI_UV8HI_UV8HI_INT)
++B_DEF      (s390_vstrcf,                vstrcv4si,          0,                  B_VX,               O4_U4,              BT_FN_UV4SI_UV4SI_UV4SI_UV4SI_INT)
++B_DEF      (s390_vstrczb,               vstrczv16qi,        0,                  B_VX,               O4_U4,              BT_FN_UV16QI_UV16QI_UV16QI_UV16QI_INT)
++B_DEF      (s390_vstrczh,               vstrczv8hi,         0,                  B_VX,               O4_U4,              BT_FN_UV8HI_UV8HI_UV8HI_UV8HI_INT)
++B_DEF      (s390_vstrczf,               vstrczv4si,         0,                  B_VX,               O4_U4,              BT_FN_UV4SI_UV4SI_UV4SI_UV4SI_INT)
++B_DEF      (s390_vstrcbs,               vstrcsv16qi,        0,                  B_VX,               O4_U4,              BT_FN_UV16QI_UV16QI_UV16QI_UV16QI_INT_INTPTR)
++B_DEF      (s390_vstrchs,               vstrcsv8hi,         0,                  B_VX,               O4_U4,              BT_FN_UV8HI_UV8HI_UV8HI_UV8HI_INT_INTPTR)
++B_DEF      (s390_vstrcfs,               vstrcsv4si,         0,                  B_VX,               O4_U4,              BT_FN_UV4SI_UV4SI_UV4SI_UV4SI_INT_INTPTR)
++B_DEF      (s390_vstrczbs,              vstrczsv16qi,       0,                  B_VX,               O4_U4,              BT_FN_UV16QI_UV16QI_UV16QI_UV16QI_INT_INTPTR)
++B_DEF      (s390_vstrczhs,              vstrczsv8hi,        0,                  B_VX,               O4_U4,              BT_FN_UV8HI_UV8HI_UV8HI_UV8HI_INT_INTPTR)
++B_DEF      (s390_vstrczfs,              vstrczsv4si,        0,                  B_VX,               O4_U4,              BT_FN_UV4SI_UV4SI_UV4SI_UV4SI_INT_INTPTR)
++
++OB_DEF     (s390_vec_cmprg_idx,         s390_vstrcb_idx_u8, s390_vstrcf_idx_u32,B_VX,               BT_FN_OV4SI_OV4SI_OV4SI_OV4SI)
++OB_DEF_VAR (s390_vstrcb_idx_u8,         s390_vstrcb,        0,                  BT_OV_UV16QI_UV16QI_UV16QI_UV16QI)
++OB_DEF_VAR (s390_vstrch_idx_u16,        s390_vstrch,        0,                  BT_OV_UV8HI_UV8HI_UV8HI_UV8HI)
++OB_DEF_VAR (s390_vstrcf_idx_u32,        s390_vstrcf,        0,                  BT_OV_UV4SI_UV4SI_UV4SI_UV4SI)
++
++OB_DEF     (s390_vec_cmpnrg_idx,        s390_vstrcb_inv_idx_u8,s390_vstrcf_inv_idx_u32,B_VX,        BT_FN_OV4SI_OV4SI_OV4SI_OV4SI)
++OB_DEF_VAR (s390_vstrcb_inv_idx_u8,     s390_vstrcb,        0,                  BT_OV_UV16QI_UV16QI_UV16QI_UV16QI)
++OB_DEF_VAR (s390_vstrch_inv_idx_u16,    s390_vstrch,        0,                  BT_OV_UV8HI_UV8HI_UV8HI_UV8HI)
++OB_DEF_VAR (s390_vstrcf_inv_idx_u32,    s390_vstrcf,        0,                  BT_OV_UV4SI_UV4SI_UV4SI_UV4SI)
++
++OB_DEF     (s390_vec_cmprg_or_0_idx,    s390_vstrczb_idx_u8,s390_vstrczf_idx_u32,B_VX,              BT_FN_OV4SI_OV4SI_OV4SI_OV4SI)
++OB_DEF_VAR (s390_vstrczb_idx_u8,        s390_vstrczb,       0,                  BT_OV_UV16QI_UV16QI_UV16QI_UV16QI)
++OB_DEF_VAR (s390_vstrczh_idx_u16,       s390_vstrczh,       0,                  BT_OV_UV8HI_UV8HI_UV8HI_UV8HI)
++OB_DEF_VAR (s390_vstrczf_idx_u32,       s390_vstrczf,       0,                  BT_OV_UV4SI_UV4SI_UV4SI_UV4SI)
++
++OB_DEF     (s390_vec_cmpnrg_or_0_idx,   s390_vstrczb_inv_idx_u8,s390_vstrczf_inv_idx_u32,B_VX,      BT_FN_OV4SI_OV4SI_OV4SI_OV4SI)
++OB_DEF_VAR (s390_vstrczb_inv_idx_u8,    s390_vstrczb,       0,                  BT_OV_UV16QI_UV16QI_UV16QI_UV16QI)
++OB_DEF_VAR (s390_vstrczh_inv_idx_u16,   s390_vstrczh,       0,                  BT_OV_UV8HI_UV8HI_UV8HI_UV8HI)
++OB_DEF_VAR (s390_vstrczf_inv_idx_u32,   s390_vstrczf,       0,                  BT_OV_UV4SI_UV4SI_UV4SI_UV4SI)
++
++OB_DEF     (s390_vec_cmprg,             s390_vstrcb_u8,     s390_vstrcf_u32,    B_VX,               BT_FN_OV4SI_OV4SI_OV4SI_OV4SI)
++OB_DEF_VAR (s390_vstrcb_u8,             s390_vstrcb,        0,                  BT_OV_BV16QI_UV16QI_UV16QI_UV16QI)
++OB_DEF_VAR (s390_vstrch_u16,            s390_vstrch,        0,                  BT_OV_BV8HI_UV8HI_UV8HI_UV8HI)
++OB_DEF_VAR (s390_vstrcf_u32,            s390_vstrcf,        0,                  BT_OV_BV4SI_UV4SI_UV4SI_UV4SI)
++
++OB_DEF     (s390_vec_cmpnrg,            s390_vstrcb_inv_u8, s390_vstrcf_inv_u32,B_VX,               BT_FN_OV4SI_OV4SI_OV4SI_OV4SI)
++OB_DEF_VAR (s390_vstrcb_inv_u8,         s390_vstrcb,        0,                  BT_OV_BV16QI_UV16QI_UV16QI_UV16QI)
++OB_DEF_VAR (s390_vstrch_inv_u16,        s390_vstrch,        0,                  BT_OV_BV8HI_UV8HI_UV8HI_UV8HI)
++OB_DEF_VAR (s390_vstrcf_inv_u32,        s390_vstrcf,        0,                  BT_OV_BV4SI_UV4SI_UV4SI_UV4SI)
++
++OB_DEF     (s390_vec_cmprg_idx_cc,      s390_vstrcbs_idx_u8,s390_vstrcfs_idx_u32,B_VX,              BT_FN_OV4SI_OV4SI_OV4SI_OV4SI_INTPTR)
++OB_DEF_VAR (s390_vstrcbs_idx_u8,        s390_vstrcbs,       0,                  BT_OV_UV16QI_UV16QI_UV16QI_UV16QI_INTPTR)
++OB_DEF_VAR (s390_vstrchs_idx_u16,       s390_vstrchs,       0,                  BT_OV_UV8HI_UV8HI_UV8HI_UV8HI_INTPTR)
++OB_DEF_VAR (s390_vstrcfs_idx_u32,       s390_vstrcfs,       0,                  BT_OV_UV4SI_UV4SI_UV4SI_UV4SI_INTPTR)
++
++OB_DEF     (s390_vec_cmpnrg_idx_cc,     s390_vstrcbs_inv_idx_u8,s390_vstrcfs_inv_idx_u32,B_VX,      BT_FN_OV4SI_OV4SI_OV4SI_OV4SI_INTPTR)
++OB_DEF_VAR (s390_vstrcbs_inv_idx_u8,    s390_vstrcbs,       0,                  BT_OV_UV16QI_UV16QI_UV16QI_UV16QI_INTPTR) /* vstrcb */
++OB_DEF_VAR (s390_vstrchs_inv_idx_u16,   s390_vstrchs,       0,                  BT_OV_UV8HI_UV8HI_UV8HI_UV8HI_INTPTR)    /* vstrch */
++OB_DEF_VAR (s390_vstrcfs_inv_idx_u32,   s390_vstrcfs,       0,                  BT_OV_UV4SI_UV4SI_UV4SI_UV4SI_INTPTR)    /* vstrcf */
++
++OB_DEF     (s390_vec_cmprg_or_0_idx_cc, s390_vstrczbs_idx_u8,s390_vstrczfs_idx_u32,B_VX,            BT_FN_OV4SI_OV4SI_OV4SI_OV4SI_INTPTR)
++OB_DEF_VAR (s390_vstrczbs_idx_u8,       s390_vstrczbs,      0,                  BT_OV_UV16QI_UV16QI_UV16QI_UV16QI_INTPTR)
++OB_DEF_VAR (s390_vstrczhs_idx_u16,      s390_vstrczhs,      0,                  BT_OV_UV8HI_UV8HI_UV8HI_UV8HI_INTPTR)
++OB_DEF_VAR (s390_vstrczfs_idx_u32,      s390_vstrczfs,      0,                  BT_OV_UV4SI_UV4SI_UV4SI_UV4SI_INTPTR)
++
++OB_DEF     (s390_vec_cmpnrg_or_0_idx_cc,s390_vstrczbs_inv_idx_u8,s390_vstrczfs_inv_idx_u32,B_VX,    BT_FN_OV4SI_OV4SI_OV4SI_OV4SI_INTPTR)
++OB_DEF_VAR (s390_vstrczbs_inv_idx_u8,   s390_vstrczbs,      0,                  BT_OV_UV16QI_UV16QI_UV16QI_UV16QI_INTPTR)
++OB_DEF_VAR (s390_vstrczhs_inv_idx_u16,  s390_vstrczhs,      0,                  BT_OV_UV8HI_UV8HI_UV8HI_UV8HI_INTPTR)
++OB_DEF_VAR (s390_vstrczfs_inv_idx_u32,  s390_vstrczfs,      0,                  BT_OV_UV4SI_UV4SI_UV4SI_UV4SI_INTPTR)
++
++OB_DEF     (s390_vec_cmprg_cc,          s390_vstrcbs_u8,    s390_vstrcfs_u32,   B_VX,               BT_FN_OV4SI_OV4SI_OV4SI_OV4SI_INTPTR)
++OB_DEF_VAR (s390_vstrcbs_u8,            s390_vstrcbs,       0,                  BT_OV_BV16QI_UV16QI_UV16QI_UV16QI_INTPTR)
++OB_DEF_VAR (s390_vstrchs_u16,           s390_vstrchs,       0,                  BT_OV_BV8HI_UV8HI_UV8HI_UV8HI_INTPTR)
++OB_DEF_VAR (s390_vstrcfs_u32,           s390_vstrcfs,       0,                  BT_OV_BV4SI_UV4SI_UV4SI_UV4SI_INTPTR)
++
++OB_DEF     (s390_vec_cmpnrg_cc,         s390_vstrcbs_inv_u8,s390_vstrcfs_inv_u32,B_VX,              BT_FN_OV4SI_OV4SI_OV4SI_OV4SI_INTPTR)
++OB_DEF_VAR (s390_vstrcbs_inv_u8,        s390_vstrcbs,       0,                  BT_OV_BV16QI_UV16QI_UV16QI_UV16QI_INTPTR)
++OB_DEF_VAR (s390_vstrchs_inv_u16,       s390_vstrchs,       0,                  BT_OV_BV8HI_UV8HI_UV8HI_UV8HI_INTPTR)
++OB_DEF_VAR (s390_vstrcfs_inv_u32,       s390_vstrcfs,       0,                  BT_OV_BV4SI_UV4SI_UV4SI_UV4SI_INTPTR)
++
++B_DEF      (s390_vec_all_nge,           vec_all_unltv2df,   0,                  B_VX,               0,                  BT_FN_INT_V2DF_V2DF)
++B_DEF      (s390_vec_all_ngt,           vec_all_unlev2df,   0,                  B_VX,               0,                  BT_FN_INT_V2DF_V2DF)
++B_DEF      (s390_vec_any_nge,           vec_any_unltv2df,   0,                  B_VX,               0,                  BT_FN_INT_V2DF_V2DF)
++B_DEF      (s390_vec_any_ngt,           vec_any_unlev2df,   0,                  B_VX,               0,                  BT_FN_INT_V2DF_V2DF)
++
++OB_DEF     (s390_vec_ctd,               s390_vec_ctd_s64,   s390_vec_ctd_u64,   B_VX,               BT_FN_V2DF_UV4SI_INT)
++OB_DEF_VAR (s390_vec_ctd_s64,           s390_vec_ctd_s64,   O2_U5,              BT_OV_V2DF_V2DI_INT)                     /* vcdgb */
++OB_DEF_VAR (s390_vec_ctd_u64,           s390_vec_ctd_u64,   O2_U5,              BT_OV_V2DF_UV2DI_INT)                    /* vcdlgb */
++
++B_DEF      (s390_vec_ctd_s64,           vec_ctd_s64,        0,                  B_VX,               O2_U3,              BT_FN_V2DF_V2DI_INT)                     /* vcdgb */
++B_DEF      (s390_vec_ctd_u64,           vec_ctd_u64,        0,                  B_VX,               O2_U3,              BT_FN_V2DF_UV2DI_INT)                    /* vcdlgb */
++B_DEF      (s390_vcdgb,                 vec_di_to_df_s64,   0,                  B_VX,               O2_U3,              BT_FN_V2DF_V2DI_INT)                     /* vcdgb */
++B_DEF      (s390_vcdlgb,                vec_di_to_df_u64,   0,                  B_VX,               O2_U3,              BT_FN_V2DF_UV2DI_INT)                    /* vcdlgb */
++B_DEF      (s390_vec_ctsl,              vec_ctsl,           0,                  B_VX,               O2_U3,              BT_FN_V2DI_V2DF_INT)                     /* vcgdb */
++B_DEF      (s390_vec_ctul,              vec_ctul,           0,                  B_VX,               O2_U3,              BT_FN_UV2DI_V2DF_INT)                    /* vclgdb */
++B_DEF      (s390_vcgdb,                 vec_df_to_di_s64,   0,                  B_VX,               O2_U3,              BT_FN_V2DI_V2DF_INT)                     /* vcgdb */
++B_DEF      (s390_vclgdb,                vec_df_to_di_u64,   0,                  B_VX,               O2_U3,              BT_FN_UV2DI_V2DF_INT)                    /* vclgdb */
++B_DEF      (s390_vfidb,                 vfidb,              0,                  B_VX,               O2_U4 | O3_U3,      BT_FN_V2DF_V2DF_UCHAR_UCHAR)
++B_DEF      (s390_vec_ld2f,              vec_ld2f,           0,                  B_VX,               0,                  BT_FN_V2DF_FLTCONSTPTR)                  /* vldeb */
++B_DEF      (s390_vec_st2f,              vec_st2f,           0,                  B_VX,               0,                  BT_FN_VOID_V2DF_FLTPTR)                  /* vledb */
++B_DEF      (s390_vfmadb,                fmav2df4,           0,                  B_VX,               0,                  BT_FN_V2DF_V2DF_V2DF_V2DF)
++B_DEF      (s390_vfmsdb,                fmsv2df4,           0,                  B_VX,               0,                  BT_FN_V2DF_V2DF_V2DF_V2DF)
++B_DEF      (s390_vflndb,                vec_nabs,           0,                  B_VX,               0,                  BT_FN_V2DF_V2DF)
++B_DEF      (s390_vfsqdb,                sqrtv2df2,          0,                  B_VX,               0,                  BT_FN_V2DF_V2DF)
++B_DEF      (s390_vftcidb,               vftcidb,            0,                  B_VX,               O2_U12,             BT_FN_V2DI_V2DF_INT_INTPTR)
+--- gcc/config/s390/s390-builtins.h	1970-01-01 01:00:00.000000000 +0100
++++ gcc/config/s390/s390-builtins.h	2016-05-11 17:33:27.000000000 +0200
+@@ -0,0 +1,175 @@
++/* Common data structures used for builtin handling on S/390.
++   Copyright (C) 2015 Free Software Foundation, Inc.
++
++   Contributed by Andreas Krebbel (Andreas.Krebbel@de.ibm.com).
++
++   This file is part of GCC.
++
++   GCC is free software; you can redistribute it and/or modify it
++   under the terms of the GNU General Public License as published by
++   the Free Software Foundation; either version 3, or (at your option)
++   any later version.
++
++   GCC is distributed in the hope that it will be useful, but WITHOUT
++   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
++   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
++   License for more details.
++
++   You should have received a copy of the GNU General Public License
++   along with GCC; see the file COPYING3.  If not see
++   <http://www.gnu.org/licenses/>.  */
++
++/* This files contains data structure definitions which can be used by
++   s390-builtins.c as well as s390-c.c.  Since the latter is
++   considered to be part of the front-end we have to be careful not
++   to use any of tree and rtx like data structures.  */
++
++/* Builtin types, data and prototypes. */
++
++enum s390_builtin_type_index
++{
++#undef DEF_TYPE
++#undef DEF_POINTER_TYPE
++#undef DEF_DISTINCT_TYPE
++#undef DEF_VECTOR_TYPE
++#undef DEF_OPAQUE_VECTOR_TYPE
++#undef DEF_FN_TYPE
++#undef DEF_OV_TYPE
++#define DEF_TYPE(INDEX, ...) INDEX,
++#define DEF_POINTER_TYPE(INDEX, ...) INDEX,
++#define DEF_DISTINCT_TYPE(INDEX, ...) INDEX,
++#define DEF_VECTOR_TYPE(INDEX, ...) INDEX,
++#define DEF_OPAQUE_VECTOR_TYPE(INDEX, ...) INDEX,
++#define DEF_FN_TYPE(...)
++#define DEF_OV_TYPE(...)
++#include "s390-builtin-types.def"
++  BT_MAX
++};
++
++enum s390_builtin_fn_type_index
++{
++#undef DEF_TYPE
++#undef DEF_POINTER_TYPE
++#undef DEF_DISTINCT_TYPE
++#undef DEF_VECTOR_TYPE
++#undef DEF_OPAQUE_VECTOR_TYPE
++#undef DEF_FN_TYPE
++#undef DEF_OV_TYPE
++#define DEF_TYPE(...)
++#define DEF_POINTER_TYPE(...)
++#define DEF_DISTINCT_TYPE(...)
++#define DEF_VECTOR_TYPE(...)
++#define DEF_OPAQUE_VECTOR_TYPE(...)
++#define DEF_FN_TYPE(INDEX, ...) INDEX,
++#define DEF_OV_TYPE(...)
++#include "s390-builtin-types.def"
++  BT_FN_MAX
++};
++
++enum s390_builtin_ov_type_index
++{
++#undef DEF_TYPE
++#undef DEF_POINTER_TYPE
++#undef DEF_DISTINCT_TYPE
++#undef DEF_VECTOR_TYPE
++#undef DEF_OPAQUE_VECTOR_TYPE
++#undef DEF_FN_TYPE
++#undef DEF_OV_TYPE
++#define DEF_TYPE(...)
++#define DEF_POINTER_TYPE(...)
++#define DEF_DISTINCT_TYPE(...)
++#define DEF_VECTOR_TYPE(...)
++#define DEF_OPAQUE_VECTOR_TYPE(...)
++#define DEF_FN_TYPE(...)
++#define DEF_OV_TYPE(INDEX, ...) INDEX,
++#include "s390-builtin-types.def"
++  BT_OV_MAX
++};
++
++#define MAX_OV_OPERANDS 6
++
++extern tree s390_builtin_types[BT_MAX];
++extern tree s390_builtin_fn_types[BT_FN_MAX];
++
++  /* Builtins.  */
++
++enum s390_builtins {
++#undef B_DEF
++#undef OB_DEF
++#undef OB_DEF_VAR
++#define B_DEF(NAME, ...) S390_BUILTIN_##NAME,
++#define OB_DEF(...)
++#define OB_DEF_VAR(...)
++
++#include "s390-builtins.def"
++  S390_BUILTIN_MAX
++};
++
++
++/* Generate an enumeration of all overloaded builtins defined with
++   OB_DEF in s390-builtins.def.  */
++enum s390_overloaded_builtins {
++#undef B_DEF
++#undef OB_DEF
++#undef OB_DEF_VAR
++#define B_DEF(...)
++#define OB_DEF(NAME, ...) S390_OVERLOADED_BUILTIN_##NAME,
++#define OB_DEF_VAR(...)
++#include "s390-builtins.def"
++S390_OVERLOADED_BUILTIN_MAX
++};
++
++/* Generate an enumeration of all variants of overloaded builtins
++   defined with OB_DEF_VAR in s390-builtins.def.  */
++enum s390_overloaded_builtin_vars {
++#undef B_DEF
++#undef OB_DEF
++#undef OB_DEF_VAR
++#define B_DEF(...)
++#define OB_DEF(...)
++#define OB_DEF_VAR(NAME, ...) S390_OVERLOADED_BUILTIN_VAR_##NAME,
++#include "s390-builtins.def"
++S390_OVERLOADED_BUILTIN_VAR_MAX
++};
++
++#define S390_OVERLOADED_BUILTIN_OFFSET S390_BUILTIN_MAX
++#define S390_OVERLOADED_BUILTIN_VAR_OFFSET \
++  (S390_BUILTIN_MAX + S390_OVERLOADED_BUILTIN_MAX)
++#define S390_ALL_BUILTIN_MAX				\
++  (S390_BUILTIN_MAX + S390_OVERLOADED_BUILTIN_MAX +	\
++   S390_OVERLOADED_BUILTIN_VAR_MAX)
++
++extern const unsigned int bflags_builtin[S390_BUILTIN_MAX + 1];
++extern const unsigned int opflags_builtin[S390_BUILTIN_MAX + 1];
++
++extern const unsigned int
++  bflags_overloaded_builtin[S390_OVERLOADED_BUILTIN_MAX + 1];
++extern const unsigned int
++  opflags_overloaded_builtin_var[S390_OVERLOADED_BUILTIN_VAR_MAX + 1];
++
++static inline unsigned int
++bflags_for_builtin (int fcode)
++{
++  if (fcode >= S390_OVERLOADED_BUILTIN_VAR_OFFSET)
++    gcc_unreachable ();
++  else if (fcode >= S390_OVERLOADED_BUILTIN_OFFSET)
++    return bflags_overloaded_builtin[fcode - S390_BUILTIN_MAX];
++  else
++    return bflags_builtin[fcode];
++}
++
++static inline unsigned int
++opflags_for_builtin (int fcode)
++{
++  if (fcode >= S390_OVERLOADED_BUILTIN_VAR_OFFSET)
++    return opflags_overloaded_builtin_var[fcode -
++					  S390_OVERLOADED_BUILTIN_VAR_OFFSET];
++  else if (fcode >= S390_OVERLOADED_BUILTIN_OFFSET)
++    gcc_unreachable ();
++  else
++    return opflags_builtin[fcode];
++}
++
++extern tree s390_builtin_decls[S390_BUILTIN_MAX +
++			       S390_OVERLOADED_BUILTIN_MAX +
++			       S390_OVERLOADED_BUILTIN_VAR_MAX];
+--- gcc/config/s390/s390-builtin-types.def	1970-01-01 01:00:00.000000000 +0100
++++ gcc/config/s390/s390-builtin-types.def	2016-05-11 17:53:39.000000000 +0200
+@@ -0,0 +1,755 @@
++/* Builtin type definitions for IBM S/390 and zSeries
++   Copyright (C) 2015 Free Software Foundation, Inc.
++
++   Contributed by Andreas Krebbel (Andreas.Krebbel@de.ibm.com).
++
++   This file is part of GCC.
++
++   GCC is free software; you can redistribute it and/or modify it
++   under the terms of the GNU General Public License as published by
++   the Free Software Foundation; either version 3, or (at your option)
++   any later version.
++
++   GCC is distributed in the hope that it will be useful, but WITHOUT
++   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
++   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
++   License for more details.
++
++   You should have received a copy of the GNU General Public License
++   along with GCC; see the file COPYING3.  If not see
++   <http://www.gnu.org/licenses/>.  */
++
++#define DEF_FN_TYPE_1(FN_TYPE, FLAGS, T1)	\
++  DEF_FN_TYPE (FN_TYPE,				\
++	       FLAGS,				\
++	       s390_builtin_types[T1])
++#define DEF_FN_TYPE_2(FN_TYPE, FLAGS, T1, T2)	\
++  DEF_FN_TYPE (FN_TYPE,				\
++	       FLAGS,				\
++	       s390_builtin_types[T1],		\
++	       s390_builtin_types[T2])
++#define DEF_FN_TYPE_3(FN_TYPE, FLAGS, T1, T2, T3)	\
++  DEF_FN_TYPE (FN_TYPE,					\
++	       FLAGS,					\
++	       s390_builtin_types[T1],			\
++	       s390_builtin_types[T2],			\
++	       s390_builtin_types[T3])
++#define DEF_FN_TYPE_4(FN_TYPE, FLAGS, T1, T2, T3, T4)	\
++  DEF_FN_TYPE (FN_TYPE,					\
++	       FLAGS,					\
++	       s390_builtin_types[T1],			\
++	       s390_builtin_types[T2],			\
++	       s390_builtin_types[T3],			\
++	       s390_builtin_types[T4])
++#define DEF_FN_TYPE_5(FN_TYPE, FLAGS, T1, T2, T3, T4, T5)	\
++  DEF_FN_TYPE (FN_TYPE,						\
++	       FLAGS,						\
++	       s390_builtin_types[T1],				\
++	       s390_builtin_types[T2],				\
++	       s390_builtin_types[T3],				\
++	       s390_builtin_types[T4],				\
++	       s390_builtin_types[T5])
++#define DEF_FN_TYPE_6(FN_TYPE, FLAGS, T1, T2, T3, T4, T5, T6)	\
++  DEF_FN_TYPE (FN_TYPE,						\
++	       FLAGS,						\
++	       s390_builtin_types[T1],				\
++	       s390_builtin_types[T2],				\
++	       s390_builtin_types[T3],				\
++	       s390_builtin_types[T4],				\
++	       s390_builtin_types[T5],				\
++	       s390_builtin_types[T6])
++DEF_TYPE (BT_INT, B_HTM | B_VX, integer_type_node, 0)
++DEF_TYPE (BT_VOID, 0, void_type_node, 0)
++DEF_TYPE (BT_FLTCONST, B_VX, float_type_node, 1)
++DEF_TYPE (BT_UINT64, B_HTM, c_uint64_type_node, 0)
++DEF_TYPE (BT_FLT, B_VX, float_type_node, 0)
++DEF_TYPE (BT_UINT, 0, unsigned_type_node, 0)
++DEF_TYPE (BT_VOIDCONST, B_VX, void_type_node, 1)
++DEF_TYPE (BT_ULONG, B_VX, long_unsigned_type_node, 0)
++DEF_TYPE (BT_USHORTCONST, B_VX, short_unsigned_type_node, 1)
++DEF_TYPE (BT_SHORTCONST, B_VX, short_integer_type_node, 1)
++DEF_TYPE (BT_INTCONST, B_VX, integer_type_node, 1)
++DEF_TYPE (BT_UCHARCONST, B_VX, unsigned_char_type_node, 1)
++DEF_TYPE (BT_UCHAR, B_VX, unsigned_char_type_node, 0)
++DEF_TYPE (BT_SCHARCONST, B_VX, signed_char_type_node, 1)
++DEF_TYPE (BT_SHORT, B_VX, short_integer_type_node, 0)
++DEF_TYPE (BT_LONG, B_VX, long_integer_type_node, 0)
++DEF_TYPE (BT_SCHAR, B_VX, signed_char_type_node, 0)
++DEF_TYPE (BT_ULONGLONGCONST, B_VX, long_long_unsigned_type_node, 1)
++DEF_TYPE (BT_USHORT, B_VX, short_unsigned_type_node, 0)
++DEF_TYPE (BT_LONGLONG, B_VX, long_long_integer_type_node, 0)
++DEF_TYPE (BT_DBLCONST, B_VX, double_type_node, 1)
++DEF_TYPE (BT_ULONGLONG, B_VX, long_long_unsigned_type_node, 0)
++DEF_TYPE (BT_DBL, B_VX, double_type_node, 0)
++DEF_TYPE (BT_LONGLONGCONST, B_VX, long_long_integer_type_node, 1)
++DEF_TYPE (BT_UINTCONST, B_VX, unsigned_type_node, 1)
++DEF_VECTOR_TYPE (BT_UV2DI, B_VX, BT_ULONGLONG, 2)
++DEF_VECTOR_TYPE (BT_V4SI, B_VX, BT_INT, 4)
++DEF_VECTOR_TYPE (BT_V8HI, B_VX, BT_SHORT, 8)
++DEF_VECTOR_TYPE (BT_UV4SI, B_VX, BT_UINT, 4)
++DEF_VECTOR_TYPE (BT_V16QI, B_VX, BT_SCHAR, 16)
++DEF_VECTOR_TYPE (BT_V2DF, B_VX, BT_DBL, 2)
++DEF_VECTOR_TYPE (BT_V2DI, B_VX, BT_LONGLONG, 2)
++DEF_VECTOR_TYPE (BT_UV8HI, B_VX, BT_USHORT, 8)
++DEF_VECTOR_TYPE (BT_UV16QI, B_VX, BT_UCHAR, 16)
++DEF_POINTER_TYPE (BT_UCHARPTR, B_VX, BT_UCHAR)
++DEF_POINTER_TYPE (BT_DBLCONSTPTR, B_VX, BT_DBLCONST)
++DEF_POINTER_TYPE (BT_VOIDPTR, B_HTM | B_VX, BT_VOID)
++DEF_POINTER_TYPE (BT_FLTPTR, B_VX, BT_FLT)
++DEF_POINTER_TYPE (BT_UINT64PTR, B_HTM, BT_UINT64)
++DEF_POINTER_TYPE (BT_SCHARPTR, B_VX, BT_SCHAR)
++DEF_POINTER_TYPE (BT_UINTCONSTPTR, B_VX, BT_UINTCONST)
++DEF_POINTER_TYPE (BT_ULONGLONGCONSTPTR, B_VX, BT_ULONGLONGCONST)
++DEF_POINTER_TYPE (BT_LONGLONGCONSTPTR, B_VX, BT_LONGLONGCONST)
++DEF_POINTER_TYPE (BT_SHORTPTR, B_VX, BT_SHORT)
++DEF_POINTER_TYPE (BT_USHORTPTR, B_VX, BT_USHORT)
++DEF_POINTER_TYPE (BT_INTPTR, B_VX, BT_INT)
++DEF_POINTER_TYPE (BT_INTCONSTPTR, B_VX, BT_INTCONST)
++DEF_POINTER_TYPE (BT_LONGLONGPTR, B_VX, BT_LONGLONG)
++DEF_POINTER_TYPE (BT_ULONGLONGPTR, B_VX, BT_ULONGLONG)
++DEF_POINTER_TYPE (BT_DBLPTR, B_VX, BT_DBL)
++DEF_POINTER_TYPE (BT_VOIDCONSTPTR, B_VX, BT_VOIDCONST)
++DEF_POINTER_TYPE (BT_USHORTCONSTPTR, B_VX, BT_USHORTCONST)
++DEF_POINTER_TYPE (BT_SHORTCONSTPTR, B_VX, BT_SHORTCONST)
++DEF_POINTER_TYPE (BT_UCHARCONSTPTR, B_VX, BT_UCHARCONST)
++DEF_POINTER_TYPE (BT_FLTCONSTPTR, B_VX, BT_FLTCONST)
++DEF_POINTER_TYPE (BT_SCHARCONSTPTR, B_VX, BT_SCHARCONST)
++DEF_POINTER_TYPE (BT_UINTPTR, B_VX, BT_UINT)
++DEF_DISTINCT_TYPE (BT_BLONGLONG, B_VX, BT_ULONGLONG)
++DEF_DISTINCT_TYPE (BT_BINT, B_VX, BT_UINT)
++DEF_DISTINCT_TYPE (BT_BSHORT, B_VX, BT_USHORT)
++DEF_DISTINCT_TYPE (BT_BCHAR, B_VX, BT_UCHAR)
++DEF_OPAQUE_VECTOR_TYPE (BT_OV2DI, B_VX, BT_LONGLONG, 2)
++DEF_OPAQUE_VECTOR_TYPE (BT_BV16QI, B_VX, BT_BCHAR, 16)
++DEF_OPAQUE_VECTOR_TYPE (BT_OV4SI, B_VX, BT_INT, 4)
++DEF_OPAQUE_VECTOR_TYPE (BT_OUV4SI, B_VX, BT_UINT, 4)
++DEF_OPAQUE_VECTOR_TYPE (BT_BV4SI, B_VX, BT_BINT, 4)
++DEF_OPAQUE_VECTOR_TYPE (BT_BV2DI, B_VX, BT_BLONGLONG, 2)
++DEF_OPAQUE_VECTOR_TYPE (BT_BV8HI, B_VX, BT_BSHORT, 8)
++DEF_FN_TYPE_1 (BT_FN_INT, B_HTM, BT_INT)
++DEF_FN_TYPE_1 (BT_FN_UINT, 0, BT_UINT)
++DEF_FN_TYPE_2 (BT_FN_INT_INT, B_VX, BT_INT, BT_INT)
++DEF_FN_TYPE_2 (BT_FN_INT_VOIDPTR, B_HTM, BT_INT, BT_VOIDPTR)
++DEF_FN_TYPE_2 (BT_FN_OV4SI_INT, B_VX, BT_OV4SI, BT_INT)
++DEF_FN_TYPE_2 (BT_FN_OV4SI_INTCONSTPTR, B_VX, BT_OV4SI, BT_INTCONSTPTR)
++DEF_FN_TYPE_2 (BT_FN_OV4SI_OV4SI, B_VX, BT_OV4SI, BT_OV4SI)
++DEF_FN_TYPE_2 (BT_FN_UV16QI_UCHAR, B_VX, BT_UV16QI, BT_UCHAR)
++DEF_FN_TYPE_2 (BT_FN_UV16QI_UCHARCONSTPTR, B_VX, BT_UV16QI, BT_UCHARCONSTPTR)
++DEF_FN_TYPE_2 (BT_FN_UV16QI_USHORT, B_VX, BT_UV16QI, BT_USHORT)
++DEF_FN_TYPE_2 (BT_FN_UV16QI_UV16QI, B_VX, BT_UV16QI, BT_UV16QI)
++DEF_FN_TYPE_2 (BT_FN_UV2DI_ULONGLONG, B_VX, BT_UV2DI, BT_ULONGLONG)
++DEF_FN_TYPE_2 (BT_FN_UV2DI_ULONGLONGCONSTPTR, B_VX, BT_UV2DI, BT_ULONGLONGCONSTPTR)
++DEF_FN_TYPE_2 (BT_FN_UV2DI_USHORT, B_VX, BT_UV2DI, BT_USHORT)
++DEF_FN_TYPE_2 (BT_FN_UV2DI_UV2DI, B_VX, BT_UV2DI, BT_UV2DI)
++DEF_FN_TYPE_2 (BT_FN_UV2DI_UV4SI, B_VX, BT_UV2DI, BT_UV4SI)
++DEF_FN_TYPE_2 (BT_FN_UV4SI_UINT, B_VX, BT_UV4SI, BT_UINT)
++DEF_FN_TYPE_2 (BT_FN_UV4SI_UINTCONSTPTR, B_VX, BT_UV4SI, BT_UINTCONSTPTR)
++DEF_FN_TYPE_2 (BT_FN_UV4SI_USHORT, B_VX, BT_UV4SI, BT_USHORT)
++DEF_FN_TYPE_2 (BT_FN_UV4SI_UV4SI, B_VX, BT_UV4SI, BT_UV4SI)
++DEF_FN_TYPE_2 (BT_FN_UV4SI_UV8HI, B_VX, BT_UV4SI, BT_UV8HI)
++DEF_FN_TYPE_2 (BT_FN_UV8HI_USHORT, B_VX, BT_UV8HI, BT_USHORT)
++DEF_FN_TYPE_2 (BT_FN_UV8HI_USHORTCONSTPTR, B_VX, BT_UV8HI, BT_USHORTCONSTPTR)
++DEF_FN_TYPE_2 (BT_FN_UV8HI_UV16QI, B_VX, BT_UV8HI, BT_UV16QI)
++DEF_FN_TYPE_2 (BT_FN_UV8HI_UV8HI, B_VX, BT_UV8HI, BT_UV8HI)
++DEF_FN_TYPE_2 (BT_FN_V16QI_SCHAR, B_VX, BT_V16QI, BT_SCHAR)
++DEF_FN_TYPE_2 (BT_FN_V16QI_UCHAR, B_VX, BT_V16QI, BT_UCHAR)
++DEF_FN_TYPE_2 (BT_FN_V16QI_V16QI, B_VX, BT_V16QI, BT_V16QI)
++DEF_FN_TYPE_2 (BT_FN_V2DF_DBL, B_VX, BT_V2DF, BT_DBL)
++DEF_FN_TYPE_2 (BT_FN_V2DF_FLTCONSTPTR, B_VX, BT_V2DF, BT_FLTCONSTPTR)
++DEF_FN_TYPE_2 (BT_FN_V2DF_V2DF, B_VX, BT_V2DF, BT_V2DF)
++DEF_FN_TYPE_2 (BT_FN_V2DI_SHORT, B_VX, BT_V2DI, BT_SHORT)
++DEF_FN_TYPE_2 (BT_FN_V2DI_V16QI, B_VX, BT_V2DI, BT_V16QI)
++DEF_FN_TYPE_2 (BT_FN_V2DI_V2DI, B_VX, BT_V2DI, BT_V2DI)
++DEF_FN_TYPE_2 (BT_FN_V2DI_V4SI, B_VX, BT_V2DI, BT_V4SI)
++DEF_FN_TYPE_2 (BT_FN_V2DI_V8HI, B_VX, BT_V2DI, BT_V8HI)
++DEF_FN_TYPE_2 (BT_FN_V4SI_SHORT, B_VX, BT_V4SI, BT_SHORT)
++DEF_FN_TYPE_2 (BT_FN_V4SI_V4SI, B_VX, BT_V4SI, BT_V4SI)
++DEF_FN_TYPE_2 (BT_FN_V4SI_V8HI, B_VX, BT_V4SI, BT_V8HI)
++DEF_FN_TYPE_2 (BT_FN_V8HI_SHORT, B_VX, BT_V8HI, BT_SHORT)
++DEF_FN_TYPE_2 (BT_FN_V8HI_V16QI, B_VX, BT_V8HI, BT_V16QI)
++DEF_FN_TYPE_2 (BT_FN_V8HI_V8HI, B_VX, BT_V8HI, BT_V8HI)
++DEF_FN_TYPE_2 (BT_FN_VOID_INT, B_HTM, BT_VOID, BT_INT)
++DEF_FN_TYPE_2 (BT_FN_VOID_UINT, 0, BT_VOID, BT_UINT)
++DEF_FN_TYPE_3 (BT_FN_DBL_V2DF_INT, B_VX, BT_DBL, BT_V2DF, BT_INT)
++DEF_FN_TYPE_3 (BT_FN_INT_OV4SI_INT, B_VX, BT_INT, BT_OV4SI, BT_INT)
++DEF_FN_TYPE_3 (BT_FN_INT_OV4SI_OV4SI, B_VX, BT_INT, BT_OV4SI, BT_OV4SI)
++DEF_FN_TYPE_3 (BT_FN_INT_UV16QI_UV16QI, B_VX, BT_INT, BT_UV16QI, BT_UV16QI)
++DEF_FN_TYPE_3 (BT_FN_INT_UV2DI_UV2DI, B_VX, BT_INT, BT_UV2DI, BT_UV2DI)
++DEF_FN_TYPE_3 (BT_FN_INT_UV4SI_UV4SI, B_VX, BT_INT, BT_UV4SI, BT_UV4SI)
++DEF_FN_TYPE_3 (BT_FN_INT_UV8HI_UV8HI, B_VX, BT_INT, BT_UV8HI, BT_UV8HI)
++DEF_FN_TYPE_3 (BT_FN_INT_V16QI_V16QI, B_VX, BT_INT, BT_V16QI, BT_V16QI)
++DEF_FN_TYPE_3 (BT_FN_INT_V2DF_V2DF, B_VX, BT_INT, BT_V2DF, BT_V2DF)
++DEF_FN_TYPE_3 (BT_FN_INT_V2DI_V2DI, B_VX, BT_INT, BT_V2DI, BT_V2DI)
++DEF_FN_TYPE_3 (BT_FN_INT_V4SI_V4SI, B_VX, BT_INT, BT_V4SI, BT_V4SI)
++DEF_FN_TYPE_3 (BT_FN_INT_V8HI_V8HI, B_VX, BT_INT, BT_V8HI, BT_V8HI)
++DEF_FN_TYPE_3 (BT_FN_INT_VOIDPTR_INT, B_HTM, BT_INT, BT_VOIDPTR, BT_INT)
++DEF_FN_TYPE_3 (BT_FN_OV2DI_LONGLONG_LONGLONG, B_VX, BT_OV2DI, BT_LONGLONG, BT_LONGLONG)
++DEF_FN_TYPE_3 (BT_FN_OV4SI_INTCONSTPTR_INT, B_VX, BT_OV4SI, BT_INTCONSTPTR, BT_INT)
++DEF_FN_TYPE_3 (BT_FN_OV4SI_INTCONSTPTR_UINT, B_VX, BT_OV4SI, BT_INTCONSTPTR, BT_UINT)
++DEF_FN_TYPE_3 (BT_FN_OV4SI_INT_INT, B_VX, BT_OV4SI, BT_INT, BT_INT)
++DEF_FN_TYPE_3 (BT_FN_OV4SI_OV4SI_INTPTR, B_VX, BT_OV4SI, BT_OV4SI, BT_INTPTR)
++DEF_FN_TYPE_3 (BT_FN_OV4SI_OV4SI_OV4SI, B_VX, BT_OV4SI, BT_OV4SI, BT_OV4SI)
++DEF_FN_TYPE_3 (BT_FN_OV4SI_OV4SI_UCHAR, B_VX, BT_OV4SI, BT_OV4SI, BT_UCHAR)
++DEF_FN_TYPE_3 (BT_FN_OV4SI_OV4SI_ULONG, B_VX, BT_OV4SI, BT_OV4SI, BT_ULONG)
++DEF_FN_TYPE_3 (BT_FN_UCHAR_UV16QI_INT, B_VX, BT_UCHAR, BT_UV16QI, BT_INT)
++DEF_FN_TYPE_3 (BT_FN_UINT_UV4SI_INT, B_VX, BT_UINT, BT_UV4SI, BT_INT)
++DEF_FN_TYPE_3 (BT_FN_UINT_VOIDCONSTPTR_INT, B_VX, BT_UINT, BT_VOIDCONSTPTR, BT_INT)
++DEF_FN_TYPE_3 (BT_FN_ULONGLONG_UV2DI_INT, B_VX, BT_ULONGLONG, BT_UV2DI, BT_INT)
++DEF_FN_TYPE_3 (BT_FN_USHORT_UV8HI_INT, B_VX, BT_USHORT, BT_UV8HI, BT_INT)
++DEF_FN_TYPE_3 (BT_FN_UV16QI_UCHARCONSTPTR_USHORT, B_VX, BT_UV16QI, BT_UCHARCONSTPTR, BT_USHORT)
++DEF_FN_TYPE_3 (BT_FN_UV16QI_UCHAR_INT, B_VX, BT_UV16QI, BT_UCHAR, BT_INT)
++DEF_FN_TYPE_3 (BT_FN_UV16QI_UCHAR_UCHAR, B_VX, BT_UV16QI, BT_UCHAR, BT_UCHAR)
++DEF_FN_TYPE_3 (BT_FN_UV16QI_UV16QI_INTPTR, B_VX, BT_UV16QI, BT_UV16QI, BT_INTPTR)
++DEF_FN_TYPE_3 (BT_FN_UV16QI_UV16QI_UCHAR, B_VX, BT_UV16QI, BT_UV16QI, BT_UCHAR)
++DEF_FN_TYPE_3 (BT_FN_UV16QI_UV16QI_UINT, B_VX, BT_UV16QI, BT_UV16QI, BT_UINT)
++DEF_FN_TYPE_3 (BT_FN_UV16QI_UV16QI_UV16QI, B_VX, BT_UV16QI, BT_UV16QI, BT_UV16QI)
++DEF_FN_TYPE_3 (BT_FN_UV16QI_UV2DI_UV2DI, B_VX, BT_UV16QI, BT_UV2DI, BT_UV2DI)
++DEF_FN_TYPE_3 (BT_FN_UV16QI_UV4SI_UV4SI, B_VX, BT_UV16QI, BT_UV4SI, BT_UV4SI)
++DEF_FN_TYPE_3 (BT_FN_UV16QI_UV8HI_UV8HI, B_VX, BT_UV16QI, BT_UV8HI, BT_UV8HI)
++DEF_FN_TYPE_3 (BT_FN_UV2DI_UCHAR_UCHAR, B_VX, BT_UV2DI, BT_UCHAR, BT_UCHAR)
++DEF_FN_TYPE_3 (BT_FN_UV2DI_ULONGLONG_INT, B_VX, BT_UV2DI, BT_ULONGLONG, BT_INT)
++DEF_FN_TYPE_3 (BT_FN_UV2DI_UV2DI_UCHAR, B_VX, BT_UV2DI, BT_UV2DI, BT_UCHAR)
++DEF_FN_TYPE_3 (BT_FN_UV2DI_UV2DI_UINT, B_VX, BT_UV2DI, BT_UV2DI, BT_UINT)
++DEF_FN_TYPE_3 (BT_FN_UV2DI_UV2DI_UV2DI, B_VX, BT_UV2DI, BT_UV2DI, BT_UV2DI)
++DEF_FN_TYPE_3 (BT_FN_UV2DI_UV4SI_UV4SI, B_VX, BT_UV2DI, BT_UV4SI, BT_UV4SI)
++DEF_FN_TYPE_3 (BT_FN_UV2DI_UV8HI_UV8HI, B_VX, BT_UV2DI, BT_UV8HI, BT_UV8HI)
++DEF_FN_TYPE_3 (BT_FN_UV2DI_V2DF_INT, B_VX, BT_UV2DI, BT_V2DF, BT_INT)
++DEF_FN_TYPE_3 (BT_FN_UV4SI_UCHAR_UCHAR, B_VX, BT_UV4SI, BT_UCHAR, BT_UCHAR)
++DEF_FN_TYPE_3 (BT_FN_UV4SI_UINT_INT, B_VX, BT_UV4SI, BT_UINT, BT_INT)
++DEF_FN_TYPE_3 (BT_FN_UV4SI_UV16QI_UV16QI, B_VX, BT_UV4SI, BT_UV16QI, BT_UV16QI)
++DEF_FN_TYPE_3 (BT_FN_UV4SI_UV2DI_UV2DI, B_VX, BT_UV4SI, BT_UV2DI, BT_UV2DI)
++DEF_FN_TYPE_3 (BT_FN_UV4SI_UV4SI_INTPTR, B_VX, BT_UV4SI, BT_UV4SI, BT_INTPTR)
++DEF_FN_TYPE_3 (BT_FN_UV4SI_UV4SI_UCHAR, B_VX, BT_UV4SI, BT_UV4SI, BT_UCHAR)
++DEF_FN_TYPE_3 (BT_FN_UV4SI_UV4SI_UINT, B_VX, BT_UV4SI, BT_UV4SI, BT_UINT)
++DEF_FN_TYPE_3 (BT_FN_UV4SI_UV4SI_UV4SI, B_VX, BT_UV4SI, BT_UV4SI, BT_UV4SI)
++DEF_FN_TYPE_3 (BT_FN_UV4SI_UV8HI_UV8HI, B_VX, BT_UV4SI, BT_UV8HI, BT_UV8HI)
++DEF_FN_TYPE_3 (BT_FN_UV8HI_UCHAR_UCHAR, B_VX, BT_UV8HI, BT_UCHAR, BT_UCHAR)
++DEF_FN_TYPE_3 (BT_FN_UV8HI_USHORT_INT, B_VX, BT_UV8HI, BT_USHORT, BT_INT)
++DEF_FN_TYPE_3 (BT_FN_UV8HI_UV16QI_UV16QI, B_VX, BT_UV8HI, BT_UV16QI, BT_UV16QI)
++DEF_FN_TYPE_3 (BT_FN_UV8HI_UV4SI_UV4SI, B_VX, BT_UV8HI, BT_UV4SI, BT_UV4SI)
++DEF_FN_TYPE_3 (BT_FN_UV8HI_UV8HI_INTPTR, B_VX, BT_UV8HI, BT_UV8HI, BT_INTPTR)
++DEF_FN_TYPE_3 (BT_FN_UV8HI_UV8HI_UCHAR, B_VX, BT_UV8HI, BT_UV8HI, BT_UCHAR)
++DEF_FN_TYPE_3 (BT_FN_UV8HI_UV8HI_UINT, B_VX, BT_UV8HI, BT_UV8HI, BT_UINT)
++DEF_FN_TYPE_3 (BT_FN_UV8HI_UV8HI_UV8HI, B_VX, BT_UV8HI, BT_UV8HI, BT_UV8HI)
++DEF_FN_TYPE_3 (BT_FN_V16QI_BV16QI_V16QI, B_VX, BT_V16QI, BT_BV16QI, BT_V16QI)
++DEF_FN_TYPE_3 (BT_FN_V16QI_UINT_VOIDCONSTPTR, B_VX, BT_V16QI, BT_UINT, BT_VOIDCONSTPTR)
++DEF_FN_TYPE_3 (BT_FN_V16QI_UV16QI_UV16QI, B_VX, BT_V16QI, BT_UV16QI, BT_UV16QI)
++DEF_FN_TYPE_3 (BT_FN_V16QI_V16QI_V16QI, B_VX, BT_V16QI, BT_V16QI, BT_V16QI)
++DEF_FN_TYPE_3 (BT_FN_V16QI_V8HI_V8HI, B_VX, BT_V16QI, BT_V8HI, BT_V8HI)
++DEF_FN_TYPE_3 (BT_FN_V2DF_DBL_INT, B_VX, BT_V2DF, BT_DBL, BT_INT)
++DEF_FN_TYPE_3 (BT_FN_V2DF_UV2DI_INT, B_VX, BT_V2DF, BT_UV2DI, BT_INT)
++DEF_FN_TYPE_3 (BT_FN_V2DF_UV4SI_INT, B_VX, BT_V2DF, BT_UV4SI, BT_INT)
++DEF_FN_TYPE_3 (BT_FN_V2DF_V2DF_V2DF, B_VX, BT_V2DF, BT_V2DF, BT_V2DF)
++DEF_FN_TYPE_3 (BT_FN_V2DF_V2DI_INT, B_VX, BT_V2DF, BT_V2DI, BT_INT)
++DEF_FN_TYPE_3 (BT_FN_V2DI_BV2DI_V2DI, B_VX, BT_V2DI, BT_BV2DI, BT_V2DI)
++DEF_FN_TYPE_3 (BT_FN_V2DI_UV2DI_UV2DI, B_VX, BT_V2DI, BT_UV2DI, BT_UV2DI)
++DEF_FN_TYPE_3 (BT_FN_V2DI_V2DF_INT, B_VX, BT_V2DI, BT_V2DF, BT_INT)
++DEF_FN_TYPE_3 (BT_FN_V2DI_V2DF_V2DF, B_VX, BT_V2DI, BT_V2DF, BT_V2DF)
++DEF_FN_TYPE_3 (BT_FN_V2DI_V2DI_V2DI, B_VX, BT_V2DI, BT_V2DI, BT_V2DI)
++DEF_FN_TYPE_3 (BT_FN_V2DI_V4SI_V4SI, B_VX, BT_V2DI, BT_V4SI, BT_V4SI)
++DEF_FN_TYPE_3 (BT_FN_V4SI_BV4SI_V4SI, B_VX, BT_V4SI, BT_BV4SI, BT_V4SI)
++DEF_FN_TYPE_3 (BT_FN_V4SI_INT_VOIDPTR, B_VX, BT_V4SI, BT_INT, BT_VOIDPTR)
++DEF_FN_TYPE_3 (BT_FN_V4SI_UV4SI_UV4SI, B_VX, BT_V4SI, BT_UV4SI, BT_UV4SI)
++DEF_FN_TYPE_3 (BT_FN_V4SI_V2DI_V2DI, B_VX, BT_V4SI, BT_V2DI, BT_V2DI)
++DEF_FN_TYPE_3 (BT_FN_V4SI_V4SI_V4SI, B_VX, BT_V4SI, BT_V4SI, BT_V4SI)
++DEF_FN_TYPE_3 (BT_FN_V4SI_V8HI_V8HI, B_VX, BT_V4SI, BT_V8HI, BT_V8HI)
++DEF_FN_TYPE_3 (BT_FN_V8HI_BV8HI_V8HI, B_VX, BT_V8HI, BT_BV8HI, BT_V8HI)
++DEF_FN_TYPE_3 (BT_FN_V8HI_UV8HI_UV8HI, B_VX, BT_V8HI, BT_UV8HI, BT_UV8HI)
++DEF_FN_TYPE_3 (BT_FN_V8HI_V16QI_V16QI, B_VX, BT_V8HI, BT_V16QI, BT_V16QI)
++DEF_FN_TYPE_3 (BT_FN_V8HI_V4SI_V4SI, B_VX, BT_V8HI, BT_V4SI, BT_V4SI)
++DEF_FN_TYPE_3 (BT_FN_V8HI_V8HI_V8HI, B_VX, BT_V8HI, BT_V8HI, BT_V8HI)
++DEF_FN_TYPE_3 (BT_FN_VOID_UINT64PTR_UINT64, B_HTM, BT_VOID, BT_UINT64PTR, BT_UINT64)
++DEF_FN_TYPE_3 (BT_FN_VOID_V2DF_FLTPTR, B_VX, BT_VOID, BT_V2DF, BT_FLTPTR)
++DEF_FN_TYPE_4 (BT_FN_INT_OV4SI_OV4SI_INTPTR, B_VX, BT_INT, BT_OV4SI, BT_OV4SI, BT_INTPTR)
++DEF_FN_TYPE_4 (BT_FN_OV4SI_INT_OV4SI_INT, B_VX, BT_OV4SI, BT_INT, BT_OV4SI, BT_INT)
++DEF_FN_TYPE_4 (BT_FN_OV4SI_OV4SI_OV4SI_INT, B_VX, BT_OV4SI, BT_OV4SI, BT_OV4SI, BT_INT)
++DEF_FN_TYPE_4 (BT_FN_OV4SI_OV4SI_OV4SI_INTPTR, B_VX, BT_OV4SI, BT_OV4SI, BT_OV4SI, BT_INTPTR)
++DEF_FN_TYPE_4 (BT_FN_OV4SI_OV4SI_OV4SI_OV4SI, B_VX, BT_OV4SI, BT_OV4SI, BT_OV4SI, BT_OV4SI)
++DEF_FN_TYPE_4 (BT_FN_OV4SI_OV4SI_OV4SI_UCHAR, B_VX, BT_OV4SI, BT_OV4SI, BT_OV4SI, BT_UCHAR)
++DEF_FN_TYPE_4 (BT_FN_OV4SI_OV4SI_OV4SI_ULONGLONG, B_VX, BT_OV4SI, BT_OV4SI, BT_OV4SI, BT_ULONGLONG)
++DEF_FN_TYPE_4 (BT_FN_UV16QI_UV16QI_UCHAR_INT, B_VX, BT_UV16QI, BT_UV16QI, BT_UCHAR, BT_INT)
++DEF_FN_TYPE_4 (BT_FN_UV16QI_UV16QI_UV16QI_INT, B_VX, BT_UV16QI, BT_UV16QI, BT_UV16QI, BT_INT)
++DEF_FN_TYPE_4 (BT_FN_UV16QI_UV16QI_UV16QI_INTPTR, B_VX, BT_UV16QI, BT_UV16QI, BT_UV16QI, BT_INTPTR)
++DEF_FN_TYPE_4 (BT_FN_UV16QI_UV16QI_UV16QI_UV16QI, B_VX, BT_UV16QI, BT_UV16QI, BT_UV16QI, BT_UV16QI)
++DEF_FN_TYPE_4 (BT_FN_UV16QI_UV2DI_UV2DI_UV16QI, B_VX, BT_UV16QI, BT_UV2DI, BT_UV2DI, BT_UV16QI)
++DEF_FN_TYPE_4 (BT_FN_UV16QI_UV8HI_UV8HI_INTPTR, B_VX, BT_UV16QI, BT_UV8HI, BT_UV8HI, BT_INTPTR)
++DEF_FN_TYPE_4 (BT_FN_UV2DI_UV2DI_ULONGLONG_INT, B_VX, BT_UV2DI, BT_UV2DI, BT_ULONGLONG, BT_INT)
++DEF_FN_TYPE_4 (BT_FN_UV2DI_UV2DI_UV2DI_INT, B_VX, BT_UV2DI, BT_UV2DI, BT_UV2DI, BT_INT)
++DEF_FN_TYPE_4 (BT_FN_UV2DI_UV4SI_UV4SI_UV2DI, B_VX, BT_UV2DI, BT_UV4SI, BT_UV4SI, BT_UV2DI)
++DEF_FN_TYPE_4 (BT_FN_UV4SI_UV2DI_UV2DI_INTPTR, B_VX, BT_UV4SI, BT_UV2DI, BT_UV2DI, BT_INTPTR)
++DEF_FN_TYPE_4 (BT_FN_UV4SI_UV4SI_UINT_INT, B_VX, BT_UV4SI, BT_UV4SI, BT_UINT, BT_INT)
++DEF_FN_TYPE_4 (BT_FN_UV4SI_UV4SI_UV4SI_INT, B_VX, BT_UV4SI, BT_UV4SI, BT_UV4SI, BT_INT)
++DEF_FN_TYPE_4 (BT_FN_UV4SI_UV4SI_UV4SI_INTPTR, B_VX, BT_UV4SI, BT_UV4SI, BT_UV4SI, BT_INTPTR)
++DEF_FN_TYPE_4 (BT_FN_UV4SI_UV4SI_UV4SI_UV4SI, B_VX, BT_UV4SI, BT_UV4SI, BT_UV4SI, BT_UV4SI)
++DEF_FN_TYPE_4 (BT_FN_UV4SI_UV8HI_UV8HI_UV4SI, B_VX, BT_UV4SI, BT_UV8HI, BT_UV8HI, BT_UV4SI)
++DEF_FN_TYPE_4 (BT_FN_UV8HI_UV16QI_UV16QI_UV8HI, B_VX, BT_UV8HI, BT_UV16QI, BT_UV16QI, BT_UV8HI)
++DEF_FN_TYPE_4 (BT_FN_UV8HI_UV4SI_UV4SI_INTPTR, B_VX, BT_UV8HI, BT_UV4SI, BT_UV4SI, BT_INTPTR)
++DEF_FN_TYPE_4 (BT_FN_UV8HI_UV8HI_USHORT_INT, B_VX, BT_UV8HI, BT_UV8HI, BT_USHORT, BT_INT)
++DEF_FN_TYPE_4 (BT_FN_UV8HI_UV8HI_UV8HI_INT, B_VX, BT_UV8HI, BT_UV8HI, BT_UV8HI, BT_INT)
++DEF_FN_TYPE_4 (BT_FN_UV8HI_UV8HI_UV8HI_INTPTR, B_VX, BT_UV8HI, BT_UV8HI, BT_UV8HI, BT_INTPTR)
++DEF_FN_TYPE_4 (BT_FN_UV8HI_UV8HI_UV8HI_UV8HI, B_VX, BT_UV8HI, BT_UV8HI, BT_UV8HI, BT_UV8HI)
++DEF_FN_TYPE_4 (BT_FN_V16QI_UV16QI_UV16QI_INTPTR, B_VX, BT_V16QI, BT_UV16QI, BT_UV16QI, BT_INTPTR)
++DEF_FN_TYPE_4 (BT_FN_V16QI_V16QI_V16QI_INTPTR, B_VX, BT_V16QI, BT_V16QI, BT_V16QI, BT_INTPTR)
++DEF_FN_TYPE_4 (BT_FN_V16QI_V16QI_V16QI_V16QI, B_VX, BT_V16QI, BT_V16QI, BT_V16QI, BT_V16QI)
++DEF_FN_TYPE_4 (BT_FN_V16QI_V8HI_V8HI_INTPTR, B_VX, BT_V16QI, BT_V8HI, BT_V8HI, BT_INTPTR)
++DEF_FN_TYPE_4 (BT_FN_V2DF_V2DF_DBL_INT, B_VX, BT_V2DF, BT_V2DF, BT_DBL, BT_INT)
++DEF_FN_TYPE_4 (BT_FN_V2DF_V2DF_UCHAR_UCHAR, B_VX, BT_V2DF, BT_V2DF, BT_UCHAR, BT_UCHAR)
++DEF_FN_TYPE_4 (BT_FN_V2DF_V2DF_V2DF_V2DF, B_VX, BT_V2DF, BT_V2DF, BT_V2DF, BT_V2DF)
++DEF_FN_TYPE_4 (BT_FN_V2DI_UV2DI_UV2DI_INTPTR, B_VX, BT_V2DI, BT_UV2DI, BT_UV2DI, BT_INTPTR)
++DEF_FN_TYPE_4 (BT_FN_V2DI_V2DF_INT_INTPTR, B_VX, BT_V2DI, BT_V2DF, BT_INT, BT_INTPTR)
++DEF_FN_TYPE_4 (BT_FN_V2DI_V2DF_V2DF_INTPTR, B_VX, BT_V2DI, BT_V2DF, BT_V2DF, BT_INTPTR)
++DEF_FN_TYPE_4 (BT_FN_V2DI_V2DI_V2DI_INTPTR, B_VX, BT_V2DI, BT_V2DI, BT_V2DI, BT_INTPTR)
++DEF_FN_TYPE_4 (BT_FN_V2DI_V4SI_V4SI_V2DI, B_VX, BT_V2DI, BT_V4SI, BT_V4SI, BT_V2DI)
++DEF_FN_TYPE_4 (BT_FN_V4SI_UV4SI_UV4SI_INTPTR, B_VX, BT_V4SI, BT_UV4SI, BT_UV4SI, BT_INTPTR)
++DEF_FN_TYPE_4 (BT_FN_V4SI_V2DI_V2DI_INTPTR, B_VX, BT_V4SI, BT_V2DI, BT_V2DI, BT_INTPTR)
++DEF_FN_TYPE_4 (BT_FN_V4SI_V4SI_V4SI_INTPTR, B_VX, BT_V4SI, BT_V4SI, BT_V4SI, BT_INTPTR)
++DEF_FN_TYPE_4 (BT_FN_V4SI_V4SI_V4SI_V4SI, B_VX, BT_V4SI, BT_V4SI, BT_V4SI, BT_V4SI)
++DEF_FN_TYPE_4 (BT_FN_V4SI_V8HI_V8HI_V4SI, B_VX, BT_V4SI, BT_V8HI, BT_V8HI, BT_V4SI)
++DEF_FN_TYPE_4 (BT_FN_V8HI_UV8HI_UV8HI_INTPTR, B_VX, BT_V8HI, BT_UV8HI, BT_UV8HI, BT_INTPTR)
++DEF_FN_TYPE_4 (BT_FN_V8HI_V16QI_V16QI_V8HI, B_VX, BT_V8HI, BT_V16QI, BT_V16QI, BT_V8HI)
++DEF_FN_TYPE_4 (BT_FN_V8HI_V4SI_V4SI_INTPTR, B_VX, BT_V8HI, BT_V4SI, BT_V4SI, BT_INTPTR)
++DEF_FN_TYPE_4 (BT_FN_V8HI_V8HI_V8HI_INTPTR, B_VX, BT_V8HI, BT_V8HI, BT_V8HI, BT_INTPTR)
++DEF_FN_TYPE_4 (BT_FN_V8HI_V8HI_V8HI_V8HI, B_VX, BT_V8HI, BT_V8HI, BT_V8HI, BT_V8HI)
++DEF_FN_TYPE_4 (BT_FN_VOID_OV4SI_INT_VOIDPTR, B_VX, BT_VOID, BT_OV4SI, BT_INT, BT_VOIDPTR)
++DEF_FN_TYPE_4 (BT_FN_VOID_OV4SI_VOIDPTR_UINT, B_VX, BT_VOID, BT_OV4SI, BT_VOIDPTR, BT_UINT)
++DEF_FN_TYPE_4 (BT_FN_VOID_V16QI_UINT_VOIDPTR, B_VX, BT_VOID, BT_V16QI, BT_UINT, BT_VOIDPTR)
++DEF_FN_TYPE_5 (BT_FN_OV4SI_OV4SI_OUV4SI_INTCONSTPTR_UCHAR, B_VX, BT_OV4SI, BT_OV4SI, BT_OUV4SI, BT_INTCONSTPTR, BT_UCHAR)
++DEF_FN_TYPE_5 (BT_FN_OV4SI_OV4SI_OV4SI_OV4SI_INTPTR, B_VX, BT_OV4SI, BT_OV4SI, BT_OV4SI, BT_OV4SI, BT_INTPTR)
++DEF_FN_TYPE_5 (BT_FN_UV16QI_UV16QI_UV16QI_INT_INTPTR, B_VX, BT_UV16QI, BT_UV16QI, BT_UV16QI, BT_INT, BT_INTPTR)
++DEF_FN_TYPE_5 (BT_FN_UV16QI_UV16QI_UV16QI_UV16QI_INT, B_VX, BT_UV16QI, BT_UV16QI, BT_UV16QI, BT_UV16QI, BT_INT)
++DEF_FN_TYPE_5 (BT_FN_UV2DI_UV2DI_UV2DI_ULONGLONGCONSTPTR_UCHAR, B_VX, BT_UV2DI, BT_UV2DI, BT_UV2DI, BT_ULONGLONGCONSTPTR, BT_UCHAR)
++DEF_FN_TYPE_5 (BT_FN_UV2DI_UV2DI_UV2DI_UV2DI_INT, B_VX, BT_UV2DI, BT_UV2DI, BT_UV2DI, BT_UV2DI, BT_INT)
++DEF_FN_TYPE_5 (BT_FN_UV4SI_UV4SI_UV4SI_INT_INTPTR, B_VX, BT_UV4SI, BT_UV4SI, BT_UV4SI, BT_INT, BT_INTPTR)
++DEF_FN_TYPE_5 (BT_FN_UV4SI_UV4SI_UV4SI_UINTCONSTPTR_UCHAR, B_VX, BT_UV4SI, BT_UV4SI, BT_UV4SI, BT_UINTCONSTPTR, BT_UCHAR)
++DEF_FN_TYPE_5 (BT_FN_UV4SI_UV4SI_UV4SI_UV4SI_INT, B_VX, BT_UV4SI, BT_UV4SI, BT_UV4SI, BT_UV4SI, BT_INT)
++DEF_FN_TYPE_5 (BT_FN_UV8HI_UV8HI_UV8HI_INT_INTPTR, B_VX, BT_UV8HI, BT_UV8HI, BT_UV8HI, BT_INT, BT_INTPTR)
++DEF_FN_TYPE_5 (BT_FN_UV8HI_UV8HI_UV8HI_UV8HI_INT, B_VX, BT_UV8HI, BT_UV8HI, BT_UV8HI, BT_UV8HI, BT_INT)
++DEF_FN_TYPE_5 (BT_FN_VOID_UV2DI_UV2DI_ULONGLONGPTR_ULONGLONG, B_VX, BT_VOID, BT_UV2DI, BT_UV2DI, BT_ULONGLONGPTR, BT_ULONGLONG)
++DEF_FN_TYPE_5 (BT_FN_VOID_UV4SI_UV4SI_UINTPTR_ULONGLONG, B_VX, BT_VOID, BT_UV4SI, BT_UV4SI, BT_UINTPTR, BT_ULONGLONG)
++DEF_FN_TYPE_5 (BT_FN_VOID_V4SI_V4SI_INTPTR_ULONGLONG, B_VX, BT_VOID, BT_V4SI, BT_V4SI, BT_INTPTR, BT_ULONGLONG)
++DEF_FN_TYPE_6 (BT_FN_UV16QI_UV16QI_UV16QI_UV16QI_INT_INTPTR, B_VX, BT_UV16QI, BT_UV16QI, BT_UV16QI, BT_UV16QI, BT_INT, BT_INTPTR)
++DEF_FN_TYPE_6 (BT_FN_UV4SI_UV4SI_UV4SI_UV4SI_INT_INTPTR, B_VX, BT_UV4SI, BT_UV4SI, BT_UV4SI, BT_UV4SI, BT_INT, BT_INTPTR)
++DEF_FN_TYPE_6 (BT_FN_UV8HI_UV8HI_UV8HI_UV8HI_INT_INTPTR, B_VX, BT_UV8HI, BT_UV8HI, BT_UV8HI, BT_UV8HI, BT_INT, BT_INTPTR)
++DEF_OV_TYPE (BT_OV_BV16QI_BV16QI, BT_BV16QI, BT_BV16QI)
++DEF_OV_TYPE (BT_OV_BV16QI_BV16QI_BV16QI, BT_BV16QI, BT_BV16QI, BT_BV16QI)
++DEF_OV_TYPE (BT_OV_BV16QI_BV16QI_BV16QI_BV16QI, BT_BV16QI, BT_BV16QI, BT_BV16QI, BT_BV16QI)
++DEF_OV_TYPE (BT_OV_BV16QI_BV16QI_BV16QI_INTPTR, BT_BV16QI, BT_BV16QI, BT_BV16QI, BT_INTPTR)
++DEF_OV_TYPE (BT_OV_BV16QI_BV16QI_BV16QI_UV16QI, BT_BV16QI, BT_BV16QI, BT_BV16QI, BT_UV16QI)
++DEF_OV_TYPE (BT_OV_BV16QI_BV16QI_INTPTR, BT_BV16QI, BT_BV16QI, BT_INTPTR)
++DEF_OV_TYPE (BT_OV_BV16QI_BV16QI_UCHAR, BT_BV16QI, BT_BV16QI, BT_UCHAR)
++DEF_OV_TYPE (BT_OV_BV16QI_BV16QI_UV16QI, BT_BV16QI, BT_BV16QI, BT_UV16QI)
++DEF_OV_TYPE (BT_OV_BV16QI_BV16QI_UV4SI, BT_BV16QI, BT_BV16QI, BT_UV4SI)
++DEF_OV_TYPE (BT_OV_BV16QI_BV16QI_UV8HI, BT_BV16QI, BT_BV16QI, BT_UV8HI)
++DEF_OV_TYPE (BT_OV_BV16QI_BV8HI_BV8HI, BT_BV16QI, BT_BV8HI, BT_BV8HI)
++DEF_OV_TYPE (BT_OV_BV16QI_UV16QI_UV16QI, BT_BV16QI, BT_UV16QI, BT_UV16QI)
++DEF_OV_TYPE (BT_OV_BV16QI_UV16QI_UV16QI_INTPTR, BT_BV16QI, BT_UV16QI, BT_UV16QI, BT_INTPTR)
++DEF_OV_TYPE (BT_OV_BV16QI_UV16QI_UV16QI_UV16QI, BT_BV16QI, BT_UV16QI, BT_UV16QI, BT_UV16QI)
++DEF_OV_TYPE (BT_OV_BV16QI_UV16QI_UV16QI_UV16QI_INTPTR, BT_BV16QI, BT_UV16QI, BT_UV16QI, BT_UV16QI, BT_INTPTR)
++DEF_OV_TYPE (BT_OV_BV16QI_V16QI_V16QI, BT_BV16QI, BT_V16QI, BT_V16QI)
++DEF_OV_TYPE (BT_OV_BV16QI_V16QI_V16QI_INTPTR, BT_BV16QI, BT_V16QI, BT_V16QI, BT_INTPTR)
++DEF_OV_TYPE (BT_OV_BV2DI_BV2DI_BV2DI, BT_BV2DI, BT_BV2DI, BT_BV2DI)
++DEF_OV_TYPE (BT_OV_BV2DI_BV2DI_BV2DI_BV2DI, BT_BV2DI, BT_BV2DI, BT_BV2DI, BT_BV2DI)
++DEF_OV_TYPE (BT_OV_BV2DI_BV2DI_BV2DI_INT, BT_BV2DI, BT_BV2DI, BT_BV2DI, BT_INT)
++DEF_OV_TYPE (BT_OV_BV2DI_BV2DI_BV2DI_UV16QI, BT_BV2DI, BT_BV2DI, BT_BV2DI, BT_UV16QI)
++DEF_OV_TYPE (BT_OV_BV2DI_BV2DI_BV2DI_UV2DI, BT_BV2DI, BT_BV2DI, BT_BV2DI, BT_UV2DI)
++DEF_OV_TYPE (BT_OV_BV2DI_BV2DI_UCHAR, BT_BV2DI, BT_BV2DI, BT_UCHAR)
++DEF_OV_TYPE (BT_OV_BV2DI_BV2DI_UV16QI, BT_BV2DI, BT_BV2DI, BT_UV16QI)
++DEF_OV_TYPE (BT_OV_BV2DI_BV2DI_UV2DI_ULONGLONGCONSTPTR_UCHAR, BT_BV2DI, BT_BV2DI, BT_UV2DI, BT_ULONGLONGCONSTPTR, BT_UCHAR)
++DEF_OV_TYPE (BT_OV_BV2DI_BV2DI_UV4SI, BT_BV2DI, BT_BV2DI, BT_UV4SI)
++DEF_OV_TYPE (BT_OV_BV2DI_BV2DI_UV8HI, BT_BV2DI, BT_BV2DI, BT_UV8HI)
++DEF_OV_TYPE (BT_OV_BV2DI_BV4SI, BT_BV2DI, BT_BV4SI)
++DEF_OV_TYPE (BT_OV_BV2DI_UV2DI_UV2DI, BT_BV2DI, BT_UV2DI, BT_UV2DI)
++DEF_OV_TYPE (BT_OV_BV2DI_V2DF_V2DF, BT_BV2DI, BT_V2DF, BT_V2DF)
++DEF_OV_TYPE (BT_OV_BV2DI_V2DI_V2DI, BT_BV2DI, BT_V2DI, BT_V2DI)
++DEF_OV_TYPE (BT_OV_BV4SI_BV2DI_BV2DI, BT_BV4SI, BT_BV2DI, BT_BV2DI)
++DEF_OV_TYPE (BT_OV_BV4SI_BV4SI, BT_BV4SI, BT_BV4SI)
++DEF_OV_TYPE (BT_OV_BV4SI_BV4SI_BV4SI, BT_BV4SI, BT_BV4SI, BT_BV4SI)
++DEF_OV_TYPE (BT_OV_BV4SI_BV4SI_BV4SI_BV4SI, BT_BV4SI, BT_BV4SI, BT_BV4SI, BT_BV4SI)
++DEF_OV_TYPE (BT_OV_BV4SI_BV4SI_BV4SI_INTPTR, BT_BV4SI, BT_BV4SI, BT_BV4SI, BT_INTPTR)
++DEF_OV_TYPE (BT_OV_BV4SI_BV4SI_BV4SI_UV16QI, BT_BV4SI, BT_BV4SI, BT_BV4SI, BT_UV16QI)
++DEF_OV_TYPE (BT_OV_BV4SI_BV4SI_BV4SI_UV4SI, BT_BV4SI, BT_BV4SI, BT_BV4SI, BT_UV4SI)
++DEF_OV_TYPE (BT_OV_BV4SI_BV4SI_INTPTR, BT_BV4SI, BT_BV4SI, BT_INTPTR)
++DEF_OV_TYPE (BT_OV_BV4SI_BV4SI_UCHAR, BT_BV4SI, BT_BV4SI, BT_UCHAR)
++DEF_OV_TYPE (BT_OV_BV4SI_BV4SI_UV16QI, BT_BV4SI, BT_BV4SI, BT_UV16QI)
++DEF_OV_TYPE (BT_OV_BV4SI_BV4SI_UV4SI, BT_BV4SI, BT_BV4SI, BT_UV4SI)
++DEF_OV_TYPE (BT_OV_BV4SI_BV4SI_UV4SI_UINTCONSTPTR_UCHAR, BT_BV4SI, BT_BV4SI, BT_UV4SI, BT_UINTCONSTPTR, BT_UCHAR)
++DEF_OV_TYPE (BT_OV_BV4SI_BV4SI_UV8HI, BT_BV4SI, BT_BV4SI, BT_UV8HI)
++DEF_OV_TYPE (BT_OV_BV4SI_BV8HI, BT_BV4SI, BT_BV8HI)
++DEF_OV_TYPE (BT_OV_BV4SI_UV4SI_UV4SI, BT_BV4SI, BT_UV4SI, BT_UV4SI)
++DEF_OV_TYPE (BT_OV_BV4SI_UV4SI_UV4SI_INTPTR, BT_BV4SI, BT_UV4SI, BT_UV4SI, BT_INTPTR)
++DEF_OV_TYPE (BT_OV_BV4SI_UV4SI_UV4SI_UV4SI, BT_BV4SI, BT_UV4SI, BT_UV4SI, BT_UV4SI)
++DEF_OV_TYPE (BT_OV_BV4SI_UV4SI_UV4SI_UV4SI_INTPTR, BT_BV4SI, BT_UV4SI, BT_UV4SI, BT_UV4SI, BT_INTPTR)
++DEF_OV_TYPE (BT_OV_BV4SI_V4SI_V4SI, BT_BV4SI, BT_V4SI, BT_V4SI)
++DEF_OV_TYPE (BT_OV_BV4SI_V4SI_V4SI_INTPTR, BT_BV4SI, BT_V4SI, BT_V4SI, BT_INTPTR)
++DEF_OV_TYPE (BT_OV_BV8HI_BV16QI, BT_BV8HI, BT_BV16QI)
++DEF_OV_TYPE (BT_OV_BV8HI_BV4SI_BV4SI, BT_BV8HI, BT_BV4SI, BT_BV4SI)
++DEF_OV_TYPE (BT_OV_BV8HI_BV8HI, BT_BV8HI, BT_BV8HI)
++DEF_OV_TYPE (BT_OV_BV8HI_BV8HI_BV8HI, BT_BV8HI, BT_BV8HI, BT_BV8HI)
++DEF_OV_TYPE (BT_OV_BV8HI_BV8HI_BV8HI_BV8HI, BT_BV8HI, BT_BV8HI, BT_BV8HI, BT_BV8HI)
++DEF_OV_TYPE (BT_OV_BV8HI_BV8HI_BV8HI_INTPTR, BT_BV8HI, BT_BV8HI, BT_BV8HI, BT_INTPTR)
++DEF_OV_TYPE (BT_OV_BV8HI_BV8HI_BV8HI_UV16QI, BT_BV8HI, BT_BV8HI, BT_BV8HI, BT_UV16QI)
++DEF_OV_TYPE (BT_OV_BV8HI_BV8HI_BV8HI_UV8HI, BT_BV8HI, BT_BV8HI, BT_BV8HI, BT_UV8HI)
++DEF_OV_TYPE (BT_OV_BV8HI_BV8HI_INTPTR, BT_BV8HI, BT_BV8HI, BT_INTPTR)
++DEF_OV_TYPE (BT_OV_BV8HI_BV8HI_UCHAR, BT_BV8HI, BT_BV8HI, BT_UCHAR)
++DEF_OV_TYPE (BT_OV_BV8HI_BV8HI_UV16QI, BT_BV8HI, BT_BV8HI, BT_UV16QI)
++DEF_OV_TYPE (BT_OV_BV8HI_BV8HI_UV4SI, BT_BV8HI, BT_BV8HI, BT_UV4SI)
++DEF_OV_TYPE (BT_OV_BV8HI_BV8HI_UV8HI, BT_BV8HI, BT_BV8HI, BT_UV8HI)
++DEF_OV_TYPE (BT_OV_BV8HI_UV8HI_UV8HI, BT_BV8HI, BT_UV8HI, BT_UV8HI)
++DEF_OV_TYPE (BT_OV_BV8HI_UV8HI_UV8HI_INTPTR, BT_BV8HI, BT_UV8HI, BT_UV8HI, BT_INTPTR)
++DEF_OV_TYPE (BT_OV_BV8HI_UV8HI_UV8HI_UV8HI, BT_BV8HI, BT_UV8HI, BT_UV8HI, BT_UV8HI)
++DEF_OV_TYPE (BT_OV_BV8HI_UV8HI_UV8HI_UV8HI_INTPTR, BT_BV8HI, BT_UV8HI, BT_UV8HI, BT_UV8HI, BT_INTPTR)
++DEF_OV_TYPE (BT_OV_BV8HI_V8HI_V8HI, BT_BV8HI, BT_V8HI, BT_V8HI)
++DEF_OV_TYPE (BT_OV_BV8HI_V8HI_V8HI_INTPTR, BT_BV8HI, BT_V8HI, BT_V8HI, BT_INTPTR)
++DEF_OV_TYPE (BT_OV_DBL_V2DF_INT, BT_DBL, BT_V2DF, BT_INT)
++DEF_OV_TYPE (BT_OV_INT_BV16QI_BV16QI, BT_INT, BT_BV16QI, BT_BV16QI)
++DEF_OV_TYPE (BT_OV_INT_BV16QI_UV16QI, BT_INT, BT_BV16QI, BT_UV16QI)
++DEF_OV_TYPE (BT_OV_INT_BV16QI_V16QI, BT_INT, BT_BV16QI, BT_V16QI)
++DEF_OV_TYPE (BT_OV_INT_BV2DI_BV2DI, BT_INT, BT_BV2DI, BT_BV2DI)
++DEF_OV_TYPE (BT_OV_INT_BV2DI_UV2DI, BT_INT, BT_BV2DI, BT_UV2DI)
++DEF_OV_TYPE (BT_OV_INT_BV2DI_V2DI, BT_INT, BT_BV2DI, BT_V2DI)
++DEF_OV_TYPE (BT_OV_INT_BV4SI_BV4SI, BT_INT, BT_BV4SI, BT_BV4SI)
++DEF_OV_TYPE (BT_OV_INT_BV4SI_UV4SI, BT_INT, BT_BV4SI, BT_UV4SI)
++DEF_OV_TYPE (BT_OV_INT_BV4SI_V4SI, BT_INT, BT_BV4SI, BT_V4SI)
++DEF_OV_TYPE (BT_OV_INT_BV8HI_BV8HI, BT_INT, BT_BV8HI, BT_BV8HI)
++DEF_OV_TYPE (BT_OV_INT_BV8HI_UV8HI, BT_INT, BT_BV8HI, BT_UV8HI)
++DEF_OV_TYPE (BT_OV_INT_BV8HI_V8HI, BT_INT, BT_BV8HI, BT_V8HI)
++DEF_OV_TYPE (BT_OV_INT_UV16QI_BV16QI, BT_INT, BT_UV16QI, BT_BV16QI)
++DEF_OV_TYPE (BT_OV_INT_UV16QI_UV16QI, BT_INT, BT_UV16QI, BT_UV16QI)
++DEF_OV_TYPE (BT_OV_INT_UV2DI_BV2DI, BT_INT, BT_UV2DI, BT_BV2DI)
++DEF_OV_TYPE (BT_OV_INT_UV2DI_UV2DI, BT_INT, BT_UV2DI, BT_UV2DI)
++DEF_OV_TYPE (BT_OV_INT_UV4SI_BV4SI, BT_INT, BT_UV4SI, BT_BV4SI)
++DEF_OV_TYPE (BT_OV_INT_UV4SI_UV4SI, BT_INT, BT_UV4SI, BT_UV4SI)
++DEF_OV_TYPE (BT_OV_INT_UV8HI_BV8HI, BT_INT, BT_UV8HI, BT_BV8HI)
++DEF_OV_TYPE (BT_OV_INT_UV8HI_UV8HI, BT_INT, BT_UV8HI, BT_UV8HI)
++DEF_OV_TYPE (BT_OV_INT_V16QI_BV16QI, BT_INT, BT_V16QI, BT_BV16QI)
++DEF_OV_TYPE (BT_OV_INT_V16QI_UV16QI, BT_INT, BT_V16QI, BT_UV16QI)
++DEF_OV_TYPE (BT_OV_INT_V16QI_V16QI, BT_INT, BT_V16QI, BT_V16QI)
++DEF_OV_TYPE (BT_OV_INT_V2DF_UV2DI, BT_INT, BT_V2DF, BT_UV2DI)
++DEF_OV_TYPE (BT_OV_INT_V2DF_V2DF, BT_INT, BT_V2DF, BT_V2DF)
++DEF_OV_TYPE (BT_OV_INT_V2DI_BV2DI, BT_INT, BT_V2DI, BT_BV2DI)
++DEF_OV_TYPE (BT_OV_INT_V2DI_UV2DI, BT_INT, BT_V2DI, BT_UV2DI)
++DEF_OV_TYPE (BT_OV_INT_V2DI_V2DI, BT_INT, BT_V2DI, BT_V2DI)
++DEF_OV_TYPE (BT_OV_INT_V4SI_BV4SI, BT_INT, BT_V4SI, BT_BV4SI)
++DEF_OV_TYPE (BT_OV_INT_V4SI_INT, BT_INT, BT_V4SI, BT_INT)
++DEF_OV_TYPE (BT_OV_INT_V4SI_UV4SI, BT_INT, BT_V4SI, BT_UV4SI)
++DEF_OV_TYPE (BT_OV_INT_V4SI_V4SI, BT_INT, BT_V4SI, BT_V4SI)
++DEF_OV_TYPE (BT_OV_INT_V8HI_BV8HI, BT_INT, BT_V8HI, BT_BV8HI)
++DEF_OV_TYPE (BT_OV_INT_V8HI_UV8HI, BT_INT, BT_V8HI, BT_UV8HI)
++DEF_OV_TYPE (BT_OV_INT_V8HI_V8HI, BT_INT, BT_V8HI, BT_V8HI)
++DEF_OV_TYPE (BT_OV_LONGLONG_V2DI_INT, BT_LONGLONG, BT_V2DI, BT_INT)
++DEF_OV_TYPE (BT_OV_SCHAR_V16QI_INT, BT_SCHAR, BT_V16QI, BT_INT)
++DEF_OV_TYPE (BT_OV_SHORT_V8HI_INT, BT_SHORT, BT_V8HI, BT_INT)
++DEF_OV_TYPE (BT_OV_UCHAR_BV16QI_INT, BT_UCHAR, BT_BV16QI, BT_INT)
++DEF_OV_TYPE (BT_OV_UCHAR_UV16QI_INT, BT_UCHAR, BT_UV16QI, BT_INT)
++DEF_OV_TYPE (BT_OV_UINT_BV4SI_INT, BT_UINT, BT_BV4SI, BT_INT)
++DEF_OV_TYPE (BT_OV_UINT_UV4SI_INT, BT_UINT, BT_UV4SI, BT_INT)
++DEF_OV_TYPE (BT_OV_ULONGLONG_BV2DI_INT, BT_ULONGLONG, BT_BV2DI, BT_INT)
++DEF_OV_TYPE (BT_OV_ULONGLONG_UV2DI_INT, BT_ULONGLONG, BT_UV2DI, BT_INT)
++DEF_OV_TYPE (BT_OV_USHORT_BV8HI_INT, BT_USHORT, BT_BV8HI, BT_INT)
++DEF_OV_TYPE (BT_OV_USHORT_UV8HI_INT, BT_USHORT, BT_UV8HI, BT_INT)
++DEF_OV_TYPE (BT_OV_UV16QI_BV16QI_BV16QI, BT_UV16QI, BT_BV16QI, BT_BV16QI)
++DEF_OV_TYPE (BT_OV_UV16QI_BV16QI_BV16QI_INTPTR, BT_UV16QI, BT_BV16QI, BT_BV16QI, BT_INTPTR)
++DEF_OV_TYPE (BT_OV_UV16QI_BV16QI_UV16QI, BT_UV16QI, BT_BV16QI, BT_UV16QI)
++DEF_OV_TYPE (BT_OV_UV16QI_LONG_UCHARPTR, BT_UV16QI, BT_LONG, BT_UCHARPTR)
++DEF_OV_TYPE (BT_OV_UV16QI_UCHAR, BT_UV16QI, BT_UCHAR)
++DEF_OV_TYPE (BT_OV_UV16QI_UCHARCONSTPTR, BT_UV16QI, BT_UCHARCONSTPTR)
++DEF_OV_TYPE (BT_OV_UV16QI_UCHARCONSTPTR_UINT, BT_UV16QI, BT_UCHARCONSTPTR, BT_UINT)
++DEF_OV_TYPE (BT_OV_UV16QI_UCHARCONSTPTR_USHORT, BT_UV16QI, BT_UCHARCONSTPTR, BT_USHORT)
++DEF_OV_TYPE (BT_OV_UV16QI_UCHAR_BV16QI_INT, BT_UV16QI, BT_UCHAR, BT_BV16QI, BT_INT)
++DEF_OV_TYPE (BT_OV_UV16QI_UCHAR_INT, BT_UV16QI, BT_UCHAR, BT_INT)
++DEF_OV_TYPE (BT_OV_UV16QI_UCHAR_UV16QI_INT, BT_UV16QI, BT_UCHAR, BT_UV16QI, BT_INT)
++DEF_OV_TYPE (BT_OV_UV16QI_UV16QI, BT_UV16QI, BT_UV16QI)
++DEF_OV_TYPE (BT_OV_UV16QI_UV16QI_BV16QI, BT_UV16QI, BT_UV16QI, BT_BV16QI)
++DEF_OV_TYPE (BT_OV_UV16QI_UV16QI_INTPTR, BT_UV16QI, BT_UV16QI, BT_INTPTR)
++DEF_OV_TYPE (BT_OV_UV16QI_UV16QI_UCHAR, BT_UV16QI, BT_UV16QI, BT_UCHAR)
++DEF_OV_TYPE (BT_OV_UV16QI_UV16QI_ULONG, BT_UV16QI, BT_UV16QI, BT_ULONG)
++DEF_OV_TYPE (BT_OV_UV16QI_UV16QI_UV16QI, BT_UV16QI, BT_UV16QI, BT_UV16QI)
++DEF_OV_TYPE (BT_OV_UV16QI_UV16QI_UV16QI_BV16QI, BT_UV16QI, BT_UV16QI, BT_UV16QI, BT_BV16QI)
++DEF_OV_TYPE (BT_OV_UV16QI_UV16QI_UV16QI_INT, BT_UV16QI, BT_UV16QI, BT_UV16QI, BT_INT)
++DEF_OV_TYPE (BT_OV_UV16QI_UV16QI_UV16QI_INTPTR, BT_UV16QI, BT_UV16QI, BT_UV16QI, BT_INTPTR)
++DEF_OV_TYPE (BT_OV_UV16QI_UV16QI_UV16QI_UCHAR, BT_UV16QI, BT_UV16QI, BT_UV16QI, BT_UCHAR)
++DEF_OV_TYPE (BT_OV_UV16QI_UV16QI_UV16QI_ULONGLONG, BT_UV16QI, BT_UV16QI, BT_UV16QI, BT_ULONGLONG)
++DEF_OV_TYPE (BT_OV_UV16QI_UV16QI_UV16QI_UV16QI, BT_UV16QI, BT_UV16QI, BT_UV16QI, BT_UV16QI)
++DEF_OV_TYPE (BT_OV_UV16QI_UV16QI_UV16QI_UV16QI_INTPTR, BT_UV16QI, BT_UV16QI, BT_UV16QI, BT_UV16QI, BT_INTPTR)
++DEF_OV_TYPE (BT_OV_UV16QI_UV16QI_UV4SI, BT_UV16QI, BT_UV16QI, BT_UV4SI)
++DEF_OV_TYPE (BT_OV_UV16QI_UV16QI_UV8HI, BT_UV16QI, BT_UV16QI, BT_UV8HI)
++DEF_OV_TYPE (BT_OV_UV16QI_UV16QI_V16QI, BT_UV16QI, BT_UV16QI, BT_V16QI)
++DEF_OV_TYPE (BT_OV_UV16QI_UV2DI_UV2DI, BT_UV16QI, BT_UV2DI, BT_UV2DI)
++DEF_OV_TYPE (BT_OV_UV16QI_UV4SI_UV4SI, BT_UV16QI, BT_UV4SI, BT_UV4SI)
++DEF_OV_TYPE (BT_OV_UV16QI_UV8HI_UV8HI, BT_UV16QI, BT_UV8HI, BT_UV8HI)
++DEF_OV_TYPE (BT_OV_UV16QI_UV8HI_UV8HI_INTPTR, BT_UV16QI, BT_UV8HI, BT_UV8HI, BT_INTPTR)
++DEF_OV_TYPE (BT_OV_UV16QI_V16QI, BT_UV16QI, BT_V16QI)
++DEF_OV_TYPE (BT_OV_UV16QI_V8HI_V8HI, BT_UV16QI, BT_V8HI, BT_V8HI)
++DEF_OV_TYPE (BT_OV_UV2DI_BV2DI_UV2DI, BT_UV2DI, BT_BV2DI, BT_UV2DI)
++DEF_OV_TYPE (BT_OV_UV2DI_LONG_ULONGLONGPTR, BT_UV2DI, BT_LONG, BT_ULONGLONGPTR)
++DEF_OV_TYPE (BT_OV_UV2DI_ULONGLONG, BT_UV2DI, BT_ULONGLONG)
++DEF_OV_TYPE (BT_OV_UV2DI_ULONGLONGCONSTPTR, BT_UV2DI, BT_ULONGLONGCONSTPTR)
++DEF_OV_TYPE (BT_OV_UV2DI_ULONGLONGCONSTPTR_UINT, BT_UV2DI, BT_ULONGLONGCONSTPTR, BT_UINT)
++DEF_OV_TYPE (BT_OV_UV2DI_ULONGLONGCONSTPTR_USHORT, BT_UV2DI, BT_ULONGLONGCONSTPTR, BT_USHORT)
++DEF_OV_TYPE (BT_OV_UV2DI_ULONGLONG_BV2DI_INT, BT_UV2DI, BT_ULONGLONG, BT_BV2DI, BT_INT)
++DEF_OV_TYPE (BT_OV_UV2DI_ULONGLONG_INT, BT_UV2DI, BT_ULONGLONG, BT_INT)
++DEF_OV_TYPE (BT_OV_UV2DI_ULONGLONG_ULONGLONG, BT_UV2DI, BT_ULONGLONG, BT_ULONGLONG)
++DEF_OV_TYPE (BT_OV_UV2DI_ULONGLONG_UV2DI_INT, BT_UV2DI, BT_ULONGLONG, BT_UV2DI, BT_INT)
++DEF_OV_TYPE (BT_OV_UV2DI_UV2DI, BT_UV2DI, BT_UV2DI)
++DEF_OV_TYPE (BT_OV_UV2DI_UV2DI_BV2DI, BT_UV2DI, BT_UV2DI, BT_BV2DI)
++DEF_OV_TYPE (BT_OV_UV2DI_UV2DI_UCHAR, BT_UV2DI, BT_UV2DI, BT_UCHAR)
++DEF_OV_TYPE (BT_OV_UV2DI_UV2DI_ULONG, BT_UV2DI, BT_UV2DI, BT_ULONG)
++DEF_OV_TYPE (BT_OV_UV2DI_UV2DI_UV16QI, BT_UV2DI, BT_UV2DI, BT_UV16QI)
++DEF_OV_TYPE (BT_OV_UV2DI_UV2DI_UV2DI, BT_UV2DI, BT_UV2DI, BT_UV2DI)
++DEF_OV_TYPE (BT_OV_UV2DI_UV2DI_UV2DI_BV2DI, BT_UV2DI, BT_UV2DI, BT_UV2DI, BT_BV2DI)
++DEF_OV_TYPE (BT_OV_UV2DI_UV2DI_UV2DI_INT, BT_UV2DI, BT_UV2DI, BT_UV2DI, BT_INT)
++DEF_OV_TYPE (BT_OV_UV2DI_UV2DI_UV2DI_UCHAR, BT_UV2DI, BT_UV2DI, BT_UV2DI, BT_UCHAR)
++DEF_OV_TYPE (BT_OV_UV2DI_UV2DI_UV2DI_ULONGLONG, BT_UV2DI, BT_UV2DI, BT_UV2DI, BT_ULONGLONG)
++DEF_OV_TYPE (BT_OV_UV2DI_UV2DI_UV2DI_ULONGLONGCONSTPTR_UCHAR, BT_UV2DI, BT_UV2DI, BT_UV2DI, BT_ULONGLONGCONSTPTR, BT_UCHAR)
++DEF_OV_TYPE (BT_OV_UV2DI_UV2DI_UV2DI_UV16QI, BT_UV2DI, BT_UV2DI, BT_UV2DI, BT_UV16QI)
++DEF_OV_TYPE (BT_OV_UV2DI_UV2DI_UV2DI_UV2DI, BT_UV2DI, BT_UV2DI, BT_UV2DI, BT_UV2DI)
++DEF_OV_TYPE (BT_OV_UV2DI_UV2DI_UV4SI, BT_UV2DI, BT_UV2DI, BT_UV4SI)
++DEF_OV_TYPE (BT_OV_UV2DI_UV2DI_UV8HI, BT_UV2DI, BT_UV2DI, BT_UV8HI)
++DEF_OV_TYPE (BT_OV_UV2DI_UV2DI_V2DI, BT_UV2DI, BT_UV2DI, BT_V2DI)
++DEF_OV_TYPE (BT_OV_UV2DI_UV4SI, BT_UV2DI, BT_UV4SI)
++DEF_OV_TYPE (BT_OV_UV2DI_UV4SI_UV4SI, BT_UV2DI, BT_UV4SI, BT_UV4SI)
++DEF_OV_TYPE (BT_OV_UV2DI_UV4SI_UV4SI_UV2DI, BT_UV2DI, BT_UV4SI, BT_UV4SI, BT_UV2DI)
++DEF_OV_TYPE (BT_OV_UV2DI_UV8HI_UV8HI, BT_UV2DI, BT_UV8HI, BT_UV8HI)
++DEF_OV_TYPE (BT_OV_UV2DI_V2DI, BT_UV2DI, BT_V2DI)
++DEF_OV_TYPE (BT_OV_UV4SI_BV4SI_BV4SI, BT_UV4SI, BT_BV4SI, BT_BV4SI)
++DEF_OV_TYPE (BT_OV_UV4SI_BV4SI_BV4SI_INTPTR, BT_UV4SI, BT_BV4SI, BT_BV4SI, BT_INTPTR)
++DEF_OV_TYPE (BT_OV_UV4SI_BV4SI_UV4SI, BT_UV4SI, BT_BV4SI, BT_UV4SI)
++DEF_OV_TYPE (BT_OV_UV4SI_LONG_UINTPTR, BT_UV4SI, BT_LONG, BT_UINTPTR)
++DEF_OV_TYPE (BT_OV_UV4SI_UINT, BT_UV4SI, BT_UINT)
++DEF_OV_TYPE (BT_OV_UV4SI_UINTCONSTPTR, BT_UV4SI, BT_UINTCONSTPTR)
++DEF_OV_TYPE (BT_OV_UV4SI_UINTCONSTPTR_UINT, BT_UV4SI, BT_UINTCONSTPTR, BT_UINT)
++DEF_OV_TYPE (BT_OV_UV4SI_UINTCONSTPTR_USHORT, BT_UV4SI, BT_UINTCONSTPTR, BT_USHORT)
++DEF_OV_TYPE (BT_OV_UV4SI_UINT_BV4SI_INT, BT_UV4SI, BT_UINT, BT_BV4SI, BT_INT)
++DEF_OV_TYPE (BT_OV_UV4SI_UINT_INT, BT_UV4SI, BT_UINT, BT_INT)
++DEF_OV_TYPE (BT_OV_UV4SI_UINT_UV4SI_INT, BT_UV4SI, BT_UINT, BT_UV4SI, BT_INT)
++DEF_OV_TYPE (BT_OV_UV4SI_UV16QI_UV16QI, BT_UV4SI, BT_UV16QI, BT_UV16QI)
++DEF_OV_TYPE (BT_OV_UV4SI_UV2DI_UV2DI, BT_UV4SI, BT_UV2DI, BT_UV2DI)
++DEF_OV_TYPE (BT_OV_UV4SI_UV2DI_UV2DI_INTPTR, BT_UV4SI, BT_UV2DI, BT_UV2DI, BT_INTPTR)
++DEF_OV_TYPE (BT_OV_UV4SI_UV4SI, BT_UV4SI, BT_UV4SI)
++DEF_OV_TYPE (BT_OV_UV4SI_UV4SI_BV4SI, BT_UV4SI, BT_UV4SI, BT_BV4SI)
++DEF_OV_TYPE (BT_OV_UV4SI_UV4SI_INTPTR, BT_UV4SI, BT_UV4SI, BT_INTPTR)
++DEF_OV_TYPE (BT_OV_UV4SI_UV4SI_UCHAR, BT_UV4SI, BT_UV4SI, BT_UCHAR)
++DEF_OV_TYPE (BT_OV_UV4SI_UV4SI_ULONG, BT_UV4SI, BT_UV4SI, BT_ULONG)
++DEF_OV_TYPE (BT_OV_UV4SI_UV4SI_UV16QI, BT_UV4SI, BT_UV4SI, BT_UV16QI)
++DEF_OV_TYPE (BT_OV_UV4SI_UV4SI_UV4SI, BT_UV4SI, BT_UV4SI, BT_UV4SI)
++DEF_OV_TYPE (BT_OV_UV4SI_UV4SI_UV4SI_BV4SI, BT_UV4SI, BT_UV4SI, BT_UV4SI, BT_BV4SI)
++DEF_OV_TYPE (BT_OV_UV4SI_UV4SI_UV4SI_INT, BT_UV4SI, BT_UV4SI, BT_UV4SI, BT_INT)
++DEF_OV_TYPE (BT_OV_UV4SI_UV4SI_UV4SI_INTPTR, BT_UV4SI, BT_UV4SI, BT_UV4SI, BT_INTPTR)
++DEF_OV_TYPE (BT_OV_UV4SI_UV4SI_UV4SI_UCHAR, BT_UV4SI, BT_UV4SI, BT_UV4SI, BT_UCHAR)
++DEF_OV_TYPE (BT_OV_UV4SI_UV4SI_UV4SI_UINTCONSTPTR_UCHAR, BT_UV4SI, BT_UV4SI, BT_UV4SI, BT_UINTCONSTPTR, BT_UCHAR)
++DEF_OV_TYPE (BT_OV_UV4SI_UV4SI_UV4SI_ULONGLONG, BT_UV4SI, BT_UV4SI, BT_UV4SI, BT_ULONGLONG)
++DEF_OV_TYPE (BT_OV_UV4SI_UV4SI_UV4SI_UV16QI, BT_UV4SI, BT_UV4SI, BT_UV4SI, BT_UV16QI)
++DEF_OV_TYPE (BT_OV_UV4SI_UV4SI_UV4SI_UV4SI, BT_UV4SI, BT_UV4SI, BT_UV4SI, BT_UV4SI)
++DEF_OV_TYPE (BT_OV_UV4SI_UV4SI_UV4SI_UV4SI_INTPTR, BT_UV4SI, BT_UV4SI, BT_UV4SI, BT_UV4SI, BT_INTPTR)
++DEF_OV_TYPE (BT_OV_UV4SI_UV4SI_UV8HI, BT_UV4SI, BT_UV4SI, BT_UV8HI)
++DEF_OV_TYPE (BT_OV_UV4SI_UV4SI_V4SI, BT_UV4SI, BT_UV4SI, BT_V4SI)
++DEF_OV_TYPE (BT_OV_UV4SI_UV8HI, BT_UV4SI, BT_UV8HI)
++DEF_OV_TYPE (BT_OV_UV4SI_UV8HI_UV8HI, BT_UV4SI, BT_UV8HI, BT_UV8HI)
++DEF_OV_TYPE (BT_OV_UV4SI_UV8HI_UV8HI_UV4SI, BT_UV4SI, BT_UV8HI, BT_UV8HI, BT_UV4SI)
++DEF_OV_TYPE (BT_OV_UV4SI_V2DI_V2DI, BT_UV4SI, BT_V2DI, BT_V2DI)
++DEF_OV_TYPE (BT_OV_UV4SI_V4SI, BT_UV4SI, BT_V4SI)
++DEF_OV_TYPE (BT_OV_UV8HI_BV8HI_BV8HI, BT_UV8HI, BT_BV8HI, BT_BV8HI)
++DEF_OV_TYPE (BT_OV_UV8HI_BV8HI_BV8HI_INTPTR, BT_UV8HI, BT_BV8HI, BT_BV8HI, BT_INTPTR)
++DEF_OV_TYPE (BT_OV_UV8HI_BV8HI_UV8HI, BT_UV8HI, BT_BV8HI, BT_UV8HI)
++DEF_OV_TYPE (BT_OV_UV8HI_LONG_USHORTPTR, BT_UV8HI, BT_LONG, BT_USHORTPTR)
++DEF_OV_TYPE (BT_OV_UV8HI_USHORT, BT_UV8HI, BT_USHORT)
++DEF_OV_TYPE (BT_OV_UV8HI_USHORTCONSTPTR, BT_UV8HI, BT_USHORTCONSTPTR)
++DEF_OV_TYPE (BT_OV_UV8HI_USHORTCONSTPTR_UINT, BT_UV8HI, BT_USHORTCONSTPTR, BT_UINT)
++DEF_OV_TYPE (BT_OV_UV8HI_USHORTCONSTPTR_USHORT, BT_UV8HI, BT_USHORTCONSTPTR, BT_USHORT)
++DEF_OV_TYPE (BT_OV_UV8HI_USHORT_BV8HI_INT, BT_UV8HI, BT_USHORT, BT_BV8HI, BT_INT)
++DEF_OV_TYPE (BT_OV_UV8HI_USHORT_INT, BT_UV8HI, BT_USHORT, BT_INT)
++DEF_OV_TYPE (BT_OV_UV8HI_USHORT_UV8HI_INT, BT_UV8HI, BT_USHORT, BT_UV8HI, BT_INT)
++DEF_OV_TYPE (BT_OV_UV8HI_UV16QI, BT_UV8HI, BT_UV16QI)
++DEF_OV_TYPE (BT_OV_UV8HI_UV16QI_UV16QI, BT_UV8HI, BT_UV16QI, BT_UV16QI)
++DEF_OV_TYPE (BT_OV_UV8HI_UV16QI_UV16QI_UV8HI, BT_UV8HI, BT_UV16QI, BT_UV16QI, BT_UV8HI)
++DEF_OV_TYPE (BT_OV_UV8HI_UV4SI_UV4SI, BT_UV8HI, BT_UV4SI, BT_UV4SI)
++DEF_OV_TYPE (BT_OV_UV8HI_UV4SI_UV4SI_INTPTR, BT_UV8HI, BT_UV4SI, BT_UV4SI, BT_INTPTR)
++DEF_OV_TYPE (BT_OV_UV8HI_UV8HI, BT_UV8HI, BT_UV8HI)
++DEF_OV_TYPE (BT_OV_UV8HI_UV8HI_BV8HI, BT_UV8HI, BT_UV8HI, BT_BV8HI)
++DEF_OV_TYPE (BT_OV_UV8HI_UV8HI_INTPTR, BT_UV8HI, BT_UV8HI, BT_INTPTR)
++DEF_OV_TYPE (BT_OV_UV8HI_UV8HI_UCHAR, BT_UV8HI, BT_UV8HI, BT_UCHAR)
++DEF_OV_TYPE (BT_OV_UV8HI_UV8HI_ULONG, BT_UV8HI, BT_UV8HI, BT_ULONG)
++DEF_OV_TYPE (BT_OV_UV8HI_UV8HI_UV16QI, BT_UV8HI, BT_UV8HI, BT_UV16QI)
++DEF_OV_TYPE (BT_OV_UV8HI_UV8HI_UV4SI, BT_UV8HI, BT_UV8HI, BT_UV4SI)
++DEF_OV_TYPE (BT_OV_UV8HI_UV8HI_UV8HI, BT_UV8HI, BT_UV8HI, BT_UV8HI)
++DEF_OV_TYPE (BT_OV_UV8HI_UV8HI_UV8HI_BV8HI, BT_UV8HI, BT_UV8HI, BT_UV8HI, BT_BV8HI)
++DEF_OV_TYPE (BT_OV_UV8HI_UV8HI_UV8HI_INT, BT_UV8HI, BT_UV8HI, BT_UV8HI, BT_INT)
++DEF_OV_TYPE (BT_OV_UV8HI_UV8HI_UV8HI_INTPTR, BT_UV8HI, BT_UV8HI, BT_UV8HI, BT_INTPTR)
++DEF_OV_TYPE (BT_OV_UV8HI_UV8HI_UV8HI_UCHAR, BT_UV8HI, BT_UV8HI, BT_UV8HI, BT_UCHAR)
++DEF_OV_TYPE (BT_OV_UV8HI_UV8HI_UV8HI_ULONGLONG, BT_UV8HI, BT_UV8HI, BT_UV8HI, BT_ULONGLONG)
++DEF_OV_TYPE (BT_OV_UV8HI_UV8HI_UV8HI_UV16QI, BT_UV8HI, BT_UV8HI, BT_UV8HI, BT_UV16QI)
++DEF_OV_TYPE (BT_OV_UV8HI_UV8HI_UV8HI_UV8HI, BT_UV8HI, BT_UV8HI, BT_UV8HI, BT_UV8HI)
++DEF_OV_TYPE (BT_OV_UV8HI_UV8HI_UV8HI_UV8HI_INTPTR, BT_UV8HI, BT_UV8HI, BT_UV8HI, BT_UV8HI, BT_INTPTR)
++DEF_OV_TYPE (BT_OV_UV8HI_UV8HI_V8HI, BT_UV8HI, BT_UV8HI, BT_V8HI)
++DEF_OV_TYPE (BT_OV_UV8HI_V4SI_V4SI, BT_UV8HI, BT_V4SI, BT_V4SI)
++DEF_OV_TYPE (BT_OV_UV8HI_V8HI, BT_UV8HI, BT_V8HI)
++DEF_OV_TYPE (BT_OV_V16QI_BV16QI_V16QI, BT_V16QI, BT_BV16QI, BT_V16QI)
++DEF_OV_TYPE (BT_OV_V16QI_LONG_SCHARPTR, BT_V16QI, BT_LONG, BT_SCHARPTR)
++DEF_OV_TYPE (BT_OV_V16QI_SCHAR, BT_V16QI, BT_SCHAR)
++DEF_OV_TYPE (BT_OV_V16QI_SCHARCONSTPTR, BT_V16QI, BT_SCHARCONSTPTR)
++DEF_OV_TYPE (BT_OV_V16QI_SCHARCONSTPTR_UINT, BT_V16QI, BT_SCHARCONSTPTR, BT_UINT)
++DEF_OV_TYPE (BT_OV_V16QI_SCHARCONSTPTR_USHORT, BT_V16QI, BT_SCHARCONSTPTR, BT_USHORT)
++DEF_OV_TYPE (BT_OV_V16QI_SCHAR_INT, BT_V16QI, BT_SCHAR, BT_INT)
++DEF_OV_TYPE (BT_OV_V16QI_SCHAR_V16QI_INT, BT_V16QI, BT_SCHAR, BT_V16QI, BT_INT)
++DEF_OV_TYPE (BT_OV_V16QI_UV16QI_V16QI_V16QI, BT_V16QI, BT_UV16QI, BT_V16QI, BT_V16QI)
++DEF_OV_TYPE (BT_OV_V16QI_V16QI, BT_V16QI, BT_V16QI)
++DEF_OV_TYPE (BT_OV_V16QI_V16QI_BV16QI, BT_V16QI, BT_V16QI, BT_BV16QI)
++DEF_OV_TYPE (BT_OV_V16QI_V16QI_INTPTR, BT_V16QI, BT_V16QI, BT_INTPTR)
++DEF_OV_TYPE (BT_OV_V16QI_V16QI_UCHAR, BT_V16QI, BT_V16QI, BT_UCHAR)
++DEF_OV_TYPE (BT_OV_V16QI_V16QI_ULONG, BT_V16QI, BT_V16QI, BT_ULONG)
++DEF_OV_TYPE (BT_OV_V16QI_V16QI_UV16QI, BT_V16QI, BT_V16QI, BT_UV16QI)
++DEF_OV_TYPE (BT_OV_V16QI_V16QI_UV16QI_UCHAR, BT_V16QI, BT_V16QI, BT_UV16QI, BT_UCHAR)
++DEF_OV_TYPE (BT_OV_V16QI_V16QI_UV16QI_UV16QI, BT_V16QI, BT_V16QI, BT_UV16QI, BT_UV16QI)
++DEF_OV_TYPE (BT_OV_V16QI_V16QI_UV4SI, BT_V16QI, BT_V16QI, BT_UV4SI)
++DEF_OV_TYPE (BT_OV_V16QI_V16QI_UV8HI, BT_V16QI, BT_V16QI, BT_UV8HI)
++DEF_OV_TYPE (BT_OV_V16QI_V16QI_V16QI, BT_V16QI, BT_V16QI, BT_V16QI)
++DEF_OV_TYPE (BT_OV_V16QI_V16QI_V16QI_BV16QI, BT_V16QI, BT_V16QI, BT_V16QI, BT_BV16QI)
++DEF_OV_TYPE (BT_OV_V16QI_V16QI_V16QI_INT, BT_V16QI, BT_V16QI, BT_V16QI, BT_INT)
++DEF_OV_TYPE (BT_OV_V16QI_V16QI_V16QI_INTPTR, BT_V16QI, BT_V16QI, BT_V16QI, BT_INTPTR)
++DEF_OV_TYPE (BT_OV_V16QI_V16QI_V16QI_ULONGLONG, BT_V16QI, BT_V16QI, BT_V16QI, BT_ULONGLONG)
++DEF_OV_TYPE (BT_OV_V16QI_V16QI_V16QI_UV16QI, BT_V16QI, BT_V16QI, BT_V16QI, BT_UV16QI)
++DEF_OV_TYPE (BT_OV_V16QI_V16QI_V16QI_V16QI, BT_V16QI, BT_V16QI, BT_V16QI, BT_V16QI)
++DEF_OV_TYPE (BT_OV_V16QI_V8HI_V8HI, BT_V16QI, BT_V8HI, BT_V8HI)
++DEF_OV_TYPE (BT_OV_V16QI_V8HI_V8HI_INTPTR, BT_V16QI, BT_V8HI, BT_V8HI, BT_INTPTR)
++DEF_OV_TYPE (BT_OV_V2DF_BV2DI_V2DF, BT_V2DF, BT_BV2DI, BT_V2DF)
++DEF_OV_TYPE (BT_OV_V2DF_DBL, BT_V2DF, BT_DBL)
++DEF_OV_TYPE (BT_OV_V2DF_DBLCONSTPTR, BT_V2DF, BT_DBLCONSTPTR)
++DEF_OV_TYPE (BT_OV_V2DF_DBLCONSTPTR_UINT, BT_V2DF, BT_DBLCONSTPTR, BT_UINT)
++DEF_OV_TYPE (BT_OV_V2DF_DBLCONSTPTR_USHORT, BT_V2DF, BT_DBLCONSTPTR, BT_USHORT)
++DEF_OV_TYPE (BT_OV_V2DF_DBL_INT, BT_V2DF, BT_DBL, BT_INT)
++DEF_OV_TYPE (BT_OV_V2DF_DBL_V2DF_INT, BT_V2DF, BT_DBL, BT_V2DF, BT_INT)
++DEF_OV_TYPE (BT_OV_V2DF_LONG_DBLPTR, BT_V2DF, BT_LONG, BT_DBLPTR)
++DEF_OV_TYPE (BT_OV_V2DF_UV2DI_INT, BT_V2DF, BT_UV2DI, BT_INT)
++DEF_OV_TYPE (BT_OV_V2DF_V2DF, BT_V2DF, BT_V2DF)
++DEF_OV_TYPE (BT_OV_V2DF_V2DF_BV2DI, BT_V2DF, BT_V2DF, BT_BV2DI)
++DEF_OV_TYPE (BT_OV_V2DF_V2DF_UCHAR, BT_V2DF, BT_V2DF, BT_UCHAR)
++DEF_OV_TYPE (BT_OV_V2DF_V2DF_UV2DI, BT_V2DF, BT_V2DF, BT_UV2DI)
++DEF_OV_TYPE (BT_OV_V2DF_V2DF_UV2DI_DBLCONSTPTR_UCHAR, BT_V2DF, BT_V2DF, BT_UV2DI, BT_DBLCONSTPTR, BT_UCHAR)
++DEF_OV_TYPE (BT_OV_V2DF_V2DF_V2DF, BT_V2DF, BT_V2DF, BT_V2DF)
++DEF_OV_TYPE (BT_OV_V2DF_V2DF_V2DF_BV2DI, BT_V2DF, BT_V2DF, BT_V2DF, BT_BV2DI)
++DEF_OV_TYPE (BT_OV_V2DF_V2DF_V2DF_INT, BT_V2DF, BT_V2DF, BT_V2DF, BT_INT)
++DEF_OV_TYPE (BT_OV_V2DF_V2DF_V2DF_ULONGLONG, BT_V2DF, BT_V2DF, BT_V2DF, BT_ULONGLONG)
++DEF_OV_TYPE (BT_OV_V2DF_V2DF_V2DF_UV16QI, BT_V2DF, BT_V2DF, BT_V2DF, BT_UV16QI)
++DEF_OV_TYPE (BT_OV_V2DF_V2DF_V2DF_UV2DI, BT_V2DF, BT_V2DF, BT_V2DF, BT_UV2DI)
++DEF_OV_TYPE (BT_OV_V2DF_V2DF_V2DI, BT_V2DF, BT_V2DF, BT_V2DI)
++DEF_OV_TYPE (BT_OV_V2DF_V2DI_INT, BT_V2DF, BT_V2DI, BT_INT)
++DEF_OV_TYPE (BT_OV_V2DI_BV2DI_V2DI, BT_V2DI, BT_BV2DI, BT_V2DI)
++DEF_OV_TYPE (BT_OV_V2DI_LONGLONG, BT_V2DI, BT_LONGLONG)
++DEF_OV_TYPE (BT_OV_V2DI_LONGLONGCONSTPTR, BT_V2DI, BT_LONGLONGCONSTPTR)
++DEF_OV_TYPE (BT_OV_V2DI_LONGLONGCONSTPTR_UINT, BT_V2DI, BT_LONGLONGCONSTPTR, BT_UINT)
++DEF_OV_TYPE (BT_OV_V2DI_LONGLONGCONSTPTR_USHORT, BT_V2DI, BT_LONGLONGCONSTPTR, BT_USHORT)
++DEF_OV_TYPE (BT_OV_V2DI_LONGLONG_INT, BT_V2DI, BT_LONGLONG, BT_INT)
++DEF_OV_TYPE (BT_OV_V2DI_LONGLONG_LONGLONG, BT_V2DI, BT_LONGLONG, BT_LONGLONG)
++DEF_OV_TYPE (BT_OV_V2DI_LONGLONG_V2DI_INT, BT_V2DI, BT_LONGLONG, BT_V2DI, BT_INT)
++DEF_OV_TYPE (BT_OV_V2DI_LONG_LONGLONGPTR, BT_V2DI, BT_LONG, BT_LONGLONGPTR)
++DEF_OV_TYPE (BT_OV_V2DI_V16QI, BT_V2DI, BT_V16QI)
++DEF_OV_TYPE (BT_OV_V2DI_V2DI, BT_V2DI, BT_V2DI)
++DEF_OV_TYPE (BT_OV_V2DI_V2DI_BV2DI, BT_V2DI, BT_V2DI, BT_BV2DI)
++DEF_OV_TYPE (BT_OV_V2DI_V2DI_UCHAR, BT_V2DI, BT_V2DI, BT_UCHAR)
++DEF_OV_TYPE (BT_OV_V2DI_V2DI_ULONG, BT_V2DI, BT_V2DI, BT_ULONG)
++DEF_OV_TYPE (BT_OV_V2DI_V2DI_UV16QI, BT_V2DI, BT_V2DI, BT_UV16QI)
++DEF_OV_TYPE (BT_OV_V2DI_V2DI_UV2DI, BT_V2DI, BT_V2DI, BT_UV2DI)
++DEF_OV_TYPE (BT_OV_V2DI_V2DI_UV2DI_LONGLONGCONSTPTR_UCHAR, BT_V2DI, BT_V2DI, BT_UV2DI, BT_LONGLONGCONSTPTR, BT_UCHAR)
++DEF_OV_TYPE (BT_OV_V2DI_V2DI_UV2DI_UCHAR, BT_V2DI, BT_V2DI, BT_UV2DI, BT_UCHAR)
++DEF_OV_TYPE (BT_OV_V2DI_V2DI_UV4SI, BT_V2DI, BT_V2DI, BT_UV4SI)
++DEF_OV_TYPE (BT_OV_V2DI_V2DI_UV8HI, BT_V2DI, BT_V2DI, BT_UV8HI)
++DEF_OV_TYPE (BT_OV_V2DI_V2DI_V2DI, BT_V2DI, BT_V2DI, BT_V2DI)
++DEF_OV_TYPE (BT_OV_V2DI_V2DI_V2DI_BV2DI, BT_V2DI, BT_V2DI, BT_V2DI, BT_BV2DI)
++DEF_OV_TYPE (BT_OV_V2DI_V2DI_V2DI_INT, BT_V2DI, BT_V2DI, BT_V2DI, BT_INT)
++DEF_OV_TYPE (BT_OV_V2DI_V2DI_V2DI_ULONGLONG, BT_V2DI, BT_V2DI, BT_V2DI, BT_ULONGLONG)
++DEF_OV_TYPE (BT_OV_V2DI_V2DI_V2DI_UV16QI, BT_V2DI, BT_V2DI, BT_V2DI, BT_UV16QI)
++DEF_OV_TYPE (BT_OV_V2DI_V2DI_V2DI_UV2DI, BT_V2DI, BT_V2DI, BT_V2DI, BT_UV2DI)
++DEF_OV_TYPE (BT_OV_V2DI_V4SI, BT_V2DI, BT_V4SI)
++DEF_OV_TYPE (BT_OV_V2DI_V4SI_V4SI, BT_V2DI, BT_V4SI, BT_V4SI)
++DEF_OV_TYPE (BT_OV_V2DI_V4SI_V4SI_V2DI, BT_V2DI, BT_V4SI, BT_V4SI, BT_V2DI)
++DEF_OV_TYPE (BT_OV_V2DI_V8HI, BT_V2DI, BT_V8HI)
++DEF_OV_TYPE (BT_OV_V4SI_BV4SI_V4SI, BT_V4SI, BT_BV4SI, BT_V4SI)
++DEF_OV_TYPE (BT_OV_V4SI_INT, BT_V4SI, BT_INT)
++DEF_OV_TYPE (BT_OV_V4SI_INTCONSTPTR, BT_V4SI, BT_INTCONSTPTR)
++DEF_OV_TYPE (BT_OV_V4SI_INTCONSTPTR_UINT, BT_V4SI, BT_INTCONSTPTR, BT_UINT)
++DEF_OV_TYPE (BT_OV_V4SI_INTCONSTPTR_USHORT, BT_V4SI, BT_INTCONSTPTR, BT_USHORT)
++DEF_OV_TYPE (BT_OV_V4SI_INT_INT, BT_V4SI, BT_INT, BT_INT)
++DEF_OV_TYPE (BT_OV_V4SI_INT_V4SI_INT, BT_V4SI, BT_INT, BT_V4SI, BT_INT)
++DEF_OV_TYPE (BT_OV_V4SI_LONG_INTPTR, BT_V4SI, BT_LONG, BT_INTPTR)
++DEF_OV_TYPE (BT_OV_V4SI_UV4SI_V4SI_V4SI, BT_V4SI, BT_UV4SI, BT_V4SI, BT_V4SI)
++DEF_OV_TYPE (BT_OV_V4SI_V2DI_V2DI, BT_V4SI, BT_V2DI, BT_V2DI)
++DEF_OV_TYPE (BT_OV_V4SI_V2DI_V2DI_INTPTR, BT_V4SI, BT_V2DI, BT_V2DI, BT_INTPTR)
++DEF_OV_TYPE (BT_OV_V4SI_V4SI, BT_V4SI, BT_V4SI)
++DEF_OV_TYPE (BT_OV_V4SI_V4SI_BV4SI, BT_V4SI, BT_V4SI, BT_BV4SI)
++DEF_OV_TYPE (BT_OV_V4SI_V4SI_INTPTR, BT_V4SI, BT_V4SI, BT_INTPTR)
++DEF_OV_TYPE (BT_OV_V4SI_V4SI_UCHAR, BT_V4SI, BT_V4SI, BT_UCHAR)
++DEF_OV_TYPE (BT_OV_V4SI_V4SI_ULONG, BT_V4SI, BT_V4SI, BT_ULONG)
++DEF_OV_TYPE (BT_OV_V4SI_V4SI_UV16QI, BT_V4SI, BT_V4SI, BT_UV16QI)
++DEF_OV_TYPE (BT_OV_V4SI_V4SI_UV4SI, BT_V4SI, BT_V4SI, BT_UV4SI)
++DEF_OV_TYPE (BT_OV_V4SI_V4SI_UV4SI_INTCONSTPTR_UCHAR, BT_V4SI, BT_V4SI, BT_UV4SI, BT_INTCONSTPTR, BT_UCHAR)
++DEF_OV_TYPE (BT_OV_V4SI_V4SI_UV4SI_UCHAR, BT_V4SI, BT_V4SI, BT_UV4SI, BT_UCHAR)
++DEF_OV_TYPE (BT_OV_V4SI_V4SI_UV4SI_UV4SI, BT_V4SI, BT_V4SI, BT_UV4SI, BT_UV4SI)
++DEF_OV_TYPE (BT_OV_V4SI_V4SI_UV8HI, BT_V4SI, BT_V4SI, BT_UV8HI)
++DEF_OV_TYPE (BT_OV_V4SI_V4SI_V4SI, BT_V4SI, BT_V4SI, BT_V4SI)
++DEF_OV_TYPE (BT_OV_V4SI_V4SI_V4SI_BV4SI, BT_V4SI, BT_V4SI, BT_V4SI, BT_BV4SI)
++DEF_OV_TYPE (BT_OV_V4SI_V4SI_V4SI_INT, BT_V4SI, BT_V4SI, BT_V4SI, BT_INT)
++DEF_OV_TYPE (BT_OV_V4SI_V4SI_V4SI_INTPTR, BT_V4SI, BT_V4SI, BT_V4SI, BT_INTPTR)
++DEF_OV_TYPE (BT_OV_V4SI_V4SI_V4SI_ULONGLONG, BT_V4SI, BT_V4SI, BT_V4SI, BT_ULONGLONG)
++DEF_OV_TYPE (BT_OV_V4SI_V4SI_V4SI_UV16QI, BT_V4SI, BT_V4SI, BT_V4SI, BT_UV16QI)
++DEF_OV_TYPE (BT_OV_V4SI_V4SI_V4SI_UV4SI, BT_V4SI, BT_V4SI, BT_V4SI, BT_UV4SI)
++DEF_OV_TYPE (BT_OV_V4SI_V4SI_V4SI_V4SI, BT_V4SI, BT_V4SI, BT_V4SI, BT_V4SI)
++DEF_OV_TYPE (BT_OV_V4SI_V8HI, BT_V4SI, BT_V8HI)
++DEF_OV_TYPE (BT_OV_V4SI_V8HI_V8HI, BT_V4SI, BT_V8HI, BT_V8HI)
++DEF_OV_TYPE (BT_OV_V4SI_V8HI_V8HI_V4SI, BT_V4SI, BT_V8HI, BT_V8HI, BT_V4SI)
++DEF_OV_TYPE (BT_OV_V8HI_BV8HI_V8HI, BT_V8HI, BT_BV8HI, BT_V8HI)
++DEF_OV_TYPE (BT_OV_V8HI_LONG_SHORTPTR, BT_V8HI, BT_LONG, BT_SHORTPTR)
++DEF_OV_TYPE (BT_OV_V8HI_SHORT, BT_V8HI, BT_SHORT)
++DEF_OV_TYPE (BT_OV_V8HI_SHORTCONSTPTR, BT_V8HI, BT_SHORTCONSTPTR)
++DEF_OV_TYPE (BT_OV_V8HI_SHORTCONSTPTR_UINT, BT_V8HI, BT_SHORTCONSTPTR, BT_UINT)
++DEF_OV_TYPE (BT_OV_V8HI_SHORTCONSTPTR_USHORT, BT_V8HI, BT_SHORTCONSTPTR, BT_USHORT)
++DEF_OV_TYPE (BT_OV_V8HI_SHORT_INT, BT_V8HI, BT_SHORT, BT_INT)
++DEF_OV_TYPE (BT_OV_V8HI_SHORT_V8HI_INT, BT_V8HI, BT_SHORT, BT_V8HI, BT_INT)
++DEF_OV_TYPE (BT_OV_V8HI_UV8HI_V8HI_V8HI, BT_V8HI, BT_UV8HI, BT_V8HI, BT_V8HI)
++DEF_OV_TYPE (BT_OV_V8HI_V16QI, BT_V8HI, BT_V16QI)
++DEF_OV_TYPE (BT_OV_V8HI_V16QI_V16QI, BT_V8HI, BT_V16QI, BT_V16QI)
++DEF_OV_TYPE (BT_OV_V8HI_V16QI_V16QI_V8HI, BT_V8HI, BT_V16QI, BT_V16QI, BT_V8HI)
++DEF_OV_TYPE (BT_OV_V8HI_V4SI_V4SI, BT_V8HI, BT_V4SI, BT_V4SI)
++DEF_OV_TYPE (BT_OV_V8HI_V4SI_V4SI_INTPTR, BT_V8HI, BT_V4SI, BT_V4SI, BT_INTPTR)
++DEF_OV_TYPE (BT_OV_V8HI_V8HI, BT_V8HI, BT_V8HI)
++DEF_OV_TYPE (BT_OV_V8HI_V8HI_BV8HI, BT_V8HI, BT_V8HI, BT_BV8HI)
++DEF_OV_TYPE (BT_OV_V8HI_V8HI_INTPTR, BT_V8HI, BT_V8HI, BT_INTPTR)
++DEF_OV_TYPE (BT_OV_V8HI_V8HI_UCHAR, BT_V8HI, BT_V8HI, BT_UCHAR)
++DEF_OV_TYPE (BT_OV_V8HI_V8HI_ULONG, BT_V8HI, BT_V8HI, BT_ULONG)
++DEF_OV_TYPE (BT_OV_V8HI_V8HI_UV16QI, BT_V8HI, BT_V8HI, BT_UV16QI)
++DEF_OV_TYPE (BT_OV_V8HI_V8HI_UV4SI, BT_V8HI, BT_V8HI, BT_UV4SI)
++DEF_OV_TYPE (BT_OV_V8HI_V8HI_UV8HI, BT_V8HI, BT_V8HI, BT_UV8HI)
++DEF_OV_TYPE (BT_OV_V8HI_V8HI_UV8HI_UCHAR, BT_V8HI, BT_V8HI, BT_UV8HI, BT_UCHAR)
++DEF_OV_TYPE (BT_OV_V8HI_V8HI_UV8HI_UV8HI, BT_V8HI, BT_V8HI, BT_UV8HI, BT_UV8HI)
++DEF_OV_TYPE (BT_OV_V8HI_V8HI_V8HI, BT_V8HI, BT_V8HI, BT_V8HI)
++DEF_OV_TYPE (BT_OV_V8HI_V8HI_V8HI_BV8HI, BT_V8HI, BT_V8HI, BT_V8HI, BT_BV8HI)
++DEF_OV_TYPE (BT_OV_V8HI_V8HI_V8HI_INT, BT_V8HI, BT_V8HI, BT_V8HI, BT_INT)
++DEF_OV_TYPE (BT_OV_V8HI_V8HI_V8HI_INTPTR, BT_V8HI, BT_V8HI, BT_V8HI, BT_INTPTR)
++DEF_OV_TYPE (BT_OV_V8HI_V8HI_V8HI_ULONGLONG, BT_V8HI, BT_V8HI, BT_V8HI, BT_ULONGLONG)
++DEF_OV_TYPE (BT_OV_V8HI_V8HI_V8HI_UV16QI, BT_V8HI, BT_V8HI, BT_V8HI, BT_UV16QI)
++DEF_OV_TYPE (BT_OV_V8HI_V8HI_V8HI_UV8HI, BT_V8HI, BT_V8HI, BT_V8HI, BT_UV8HI)
++DEF_OV_TYPE (BT_OV_V8HI_V8HI_V8HI_V8HI, BT_V8HI, BT_V8HI, BT_V8HI, BT_V8HI)
++DEF_OV_TYPE (BT_OV_VOID_BV2DI_UV2DI_ULONGLONGPTR_ULONGLONG, BT_VOID, BT_BV2DI, BT_UV2DI, BT_ULONGLONGPTR, BT_ULONGLONG)
++DEF_OV_TYPE (BT_OV_VOID_BV4SI_UV4SI_UINTPTR_ULONGLONG, BT_VOID, BT_BV4SI, BT_UV4SI, BT_UINTPTR, BT_ULONGLONG)
++DEF_OV_TYPE (BT_OV_VOID_UV16QI_LONG_UCHARPTR, BT_VOID, BT_UV16QI, BT_LONG, BT_UCHARPTR)
++DEF_OV_TYPE (BT_OV_VOID_UV16QI_UCHARPTR_UINT, BT_VOID, BT_UV16QI, BT_UCHARPTR, BT_UINT)
++DEF_OV_TYPE (BT_OV_VOID_UV2DI_LONG_ULONGLONGPTR, BT_VOID, BT_UV2DI, BT_LONG, BT_ULONGLONGPTR)
++DEF_OV_TYPE (BT_OV_VOID_UV2DI_ULONGLONGPTR_UINT, BT_VOID, BT_UV2DI, BT_ULONGLONGPTR, BT_UINT)
++DEF_OV_TYPE (BT_OV_VOID_UV2DI_UV2DI_ULONGLONGPTR_ULONGLONG, BT_VOID, BT_UV2DI, BT_UV2DI, BT_ULONGLONGPTR, BT_ULONGLONG)
++DEF_OV_TYPE (BT_OV_VOID_UV4SI_LONG_UINTPTR, BT_VOID, BT_UV4SI, BT_LONG, BT_UINTPTR)
++DEF_OV_TYPE (BT_OV_VOID_UV4SI_UINTPTR_UINT, BT_VOID, BT_UV4SI, BT_UINTPTR, BT_UINT)
++DEF_OV_TYPE (BT_OV_VOID_UV4SI_UV4SI_UINTPTR_ULONGLONG, BT_VOID, BT_UV4SI, BT_UV4SI, BT_UINTPTR, BT_ULONGLONG)
++DEF_OV_TYPE (BT_OV_VOID_UV8HI_LONG_USHORTPTR, BT_VOID, BT_UV8HI, BT_LONG, BT_USHORTPTR)
++DEF_OV_TYPE (BT_OV_VOID_UV8HI_USHORTPTR_UINT, BT_VOID, BT_UV8HI, BT_USHORTPTR, BT_UINT)
++DEF_OV_TYPE (BT_OV_VOID_V16QI_LONG_SCHARPTR, BT_VOID, BT_V16QI, BT_LONG, BT_SCHARPTR)
++DEF_OV_TYPE (BT_OV_VOID_V16QI_SCHARPTR_UINT, BT_VOID, BT_V16QI, BT_SCHARPTR, BT_UINT)
++DEF_OV_TYPE (BT_OV_VOID_V2DF_DBLPTR_UINT, BT_VOID, BT_V2DF, BT_DBLPTR, BT_UINT)
++DEF_OV_TYPE (BT_OV_VOID_V2DF_LONG_DBLPTR, BT_VOID, BT_V2DF, BT_LONG, BT_DBLPTR)
++DEF_OV_TYPE (BT_OV_VOID_V2DF_UV2DI_DBLPTR_ULONGLONG, BT_VOID, BT_V2DF, BT_UV2DI, BT_DBLPTR, BT_ULONGLONG)
++DEF_OV_TYPE (BT_OV_VOID_V2DI_LONGLONGPTR_UINT, BT_VOID, BT_V2DI, BT_LONGLONGPTR, BT_UINT)
++DEF_OV_TYPE (BT_OV_VOID_V2DI_LONG_LONGLONGPTR, BT_VOID, BT_V2DI, BT_LONG, BT_LONGLONGPTR)
++DEF_OV_TYPE (BT_OV_VOID_V2DI_UV2DI_LONGLONGPTR_ULONGLONG, BT_VOID, BT_V2DI, BT_UV2DI, BT_LONGLONGPTR, BT_ULONGLONG)
++DEF_OV_TYPE (BT_OV_VOID_V4SI_INTPTR_UINT, BT_VOID, BT_V4SI, BT_INTPTR, BT_UINT)
++DEF_OV_TYPE (BT_OV_VOID_V4SI_LONG_INTPTR, BT_VOID, BT_V4SI, BT_LONG, BT_INTPTR)
++DEF_OV_TYPE (BT_OV_VOID_V4SI_UV4SI_INTPTR_ULONGLONG, BT_VOID, BT_V4SI, BT_UV4SI, BT_INTPTR, BT_ULONGLONG)
++DEF_OV_TYPE (BT_OV_VOID_V8HI_LONG_SHORTPTR, BT_VOID, BT_V8HI, BT_LONG, BT_SHORTPTR)
++DEF_OV_TYPE (BT_OV_VOID_V8HI_SHORTPTR_UINT, BT_VOID, BT_V8HI, BT_SHORTPTR, BT_UINT)
+--- gcc/config/s390/s390.c	2015-06-18 16:33:04.000000000 +0200
++++ gcc/config/s390/s390.c	2016-05-11 19:11:44.333028400 +0200
+@@ -52,6 +52,10 @@ along with GCC; see the file COPYING3.
+ #include "params.h"
+ #include "cfgloop.h"
+ #include "opts.h"
++#include "intl.h"
++#include "plugin-api.h"
++#include "cgraph.h"
++#include "tm-constrs.h"
+ 
+ /* Define the specific costs for a given cpu.  */
+ 
+@@ -288,6 +292,19 @@ extern int reload_completed;
+ 
+ /* Kept up to date using the SCHED_VARIABLE_ISSUE hook.  */
+ static rtx last_scheduled_insn;
++#define MAX_SCHED_UNITS 3
++static int last_scheduled_unit_distance[MAX_SCHED_UNITS];
++
++/* The maximum score added for an instruction whose unit hasn't been
++   in use for MAX_SCHED_MIX_DISTANCE steps.  Increase this value to
++   give instruction mix scheduling more priority over instruction
++   grouping.  */
++#define MAX_SCHED_MIX_SCORE      8
++
++/* The maximum distance up to which individual scores will be
++   calculated.  Everything beyond this gives MAX_SCHED_MIX_SCORE.
++   Increase this with the OOO windows size of the machine.  */
++#define MAX_SCHED_MIX_DISTANCE 100
+ 
+ /* Structure used to hold the components of a S/390 memory
+    address.  A legitimate address on S/390 is of the general
+@@ -387,6 +404,7 @@ struct GTY(()) machine_function
+ /* Number of GPRs and FPRs used for argument passing.  */
+ #define GP_ARG_NUM_REG 5
+ #define FP_ARG_NUM_REG (TARGET_64BIT? 4 : 2)
++#define VEC_ARG_NUM_REG 8
+ 
+ /* A couple of shortcuts.  */
+ #define CONST_OK_FOR_J(x) \
+@@ -407,6 +425,539 @@ struct GTY(()) machine_function
+    bytes on a z10 (or higher) CPU.  */
+ #define PREDICT_DISTANCE (TARGET_Z10 ? 384 : 2048)
+ 
++
++/* Indicate which ABI has been used for passing vector args.
++   0 - no vector type arguments have been passed where the ABI is relevant
++   1 - the old ABI has been used
++   2 - a vector type argument has been passed either in a vector register
++       or on the stack by value  */
++static int s390_vector_abi = 0;
++
++/* Set the vector ABI marker if TYPE is subject to the vector ABI
++   switch.  The vector ABI affects only vector data types.  There are
++   two aspects of the vector ABI relevant here:
++
++   1. vectors >= 16 bytes have an alignment of 8 bytes with the new
++   ABI and natural alignment with the old.
++
++   2. vector <= 16 bytes are passed in VRs or by value on the stack
++   with the new ABI but by reference on the stack with the old.
++
++   If ARG_P is true TYPE is used for a function argument or return
++   value.  The ABI marker then is set for all vector data types.  If
++   ARG_P is false only type 1 vectors are being checked.  */
++
++static void
++s390_check_type_for_vector_abi (const_tree type, bool arg_p, bool in_struct_p)
++{
++  static htab_t visited_types_hash
++    = htab_create (37, htab_hash_pointer, htab_eq_pointer, free);
++  void **slot;
++
++  if (s390_vector_abi)
++    return;
++
++  if (type == NULL_TREE || TREE_CODE (type) == ERROR_MARK)
++    return;
++
++  slot = htab_find_slot (visited_types_hash, type, INSERT);
++  if (*slot)
++    return;
++
++  *slot = CONST_CAST_TREE (type);
++
++  if (TREE_CODE (type) == VECTOR_TYPE)
++    {
++      int type_size = int_size_in_bytes (type);
++
++      /* Outside arguments only the alignment is changing and this
++	 only happens for vector types >= 16 bytes.  */
++      if (!arg_p && type_size < 16)
++	return;
++
++      /* In arguments vector types > 16 are passed as before (GCC
++	 never enforced the bigger alignment for arguments which was
++	 required by the old vector ABI).  However, it might still be
++	 ABI relevant due to the changed alignment if it is a struct
++	 member.  */
++      if (arg_p && type_size > 16 && !in_struct_p)
++	return;
++
++      s390_vector_abi = TARGET_VX_ABI ? 2 : 1;
++    }
++  else if (POINTER_TYPE_P (type) || TREE_CODE (type) == ARRAY_TYPE)
++    {
++      /* ARRAY_TYPE: Since with neither of the ABIs we have more than
++	 natural alignment there will never be ABI dependent padding
++	 in an array type.  That's why we do not set in_struct_p to
++	 true here.  */
++      s390_check_type_for_vector_abi (TREE_TYPE (type), arg_p, in_struct_p);
++    }
++  else if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
++    {
++      tree arg_chain;
++
++      /* Check the return type.  */
++      s390_check_type_for_vector_abi (TREE_TYPE (type), true, false);
++
++      for (arg_chain = TYPE_ARG_TYPES (type);
++	   arg_chain;
++	   arg_chain = TREE_CHAIN (arg_chain))
++	s390_check_type_for_vector_abi (TREE_VALUE (arg_chain), true, false);
++    }
++  else if (RECORD_OR_UNION_TYPE_P (type))
++    {
++      tree field;
++
++      for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
++	{
++	  if (TREE_CODE (field) != FIELD_DECL)
++	    continue;
++
++	  s390_check_type_for_vector_abi (TREE_TYPE (field), arg_p, true);
++	}
++    }
++}
++
++
++/* System z builtins.  */
++
++#include "s390-builtins.h"
++
++const unsigned int bflags_builtin[S390_BUILTIN_MAX + 1] =
++  {
++#undef B_DEF
++#undef OB_DEF
++#undef OB_DEF_VAR
++#define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, ...) BFLAGS,
++#define OB_DEF(...)
++#define OB_DEF_VAR(...)
++#include "s390-builtins.def"
++    0
++  };
++
++const unsigned int opflags_builtin[S390_BUILTIN_MAX + 1] =
++  {
++#undef B_DEF
++#undef OB_DEF
++#undef OB_DEF_VAR
++#define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, ...) OPFLAGS,
++#define OB_DEF(...)
++#define OB_DEF_VAR(...)
++#include "s390-builtins.def"
++    0
++  };
++
++const unsigned int bflags_overloaded_builtin[S390_OVERLOADED_BUILTIN_MAX + 1] =
++  {
++#undef B_DEF
++#undef OB_DEF
++#undef OB_DEF_VAR
++#define B_DEF(...)
++#define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, ...) BFLAGS,
++#define OB_DEF_VAR(...)
++#include "s390-builtins.def"
++    0
++  };
++
++const unsigned int
++opflags_overloaded_builtin_var[S390_OVERLOADED_BUILTIN_VAR_MAX + 1] =
++  {
++#undef B_DEF
++#undef OB_DEF
++#undef OB_DEF_VAR
++#define B_DEF(...)
++#define OB_DEF(...)
++#define OB_DEF_VAR(NAME, PATTERN, FLAGS, FNTYPE) FLAGS,
++#include "s390-builtins.def"
++    0
++  };
++
++tree s390_builtin_types[BT_MAX];
++tree s390_builtin_fn_types[BT_FN_MAX];
++tree s390_builtin_decls[S390_BUILTIN_MAX +
++			S390_OVERLOADED_BUILTIN_MAX +
++			S390_OVERLOADED_BUILTIN_VAR_MAX];
++
++static enum insn_code const code_for_builtin[S390_BUILTIN_MAX + 1] = {
++#undef B_DEF
++#undef OB_DEF
++#undef OB_DEF_VAR
++#define B_DEF(NAME, PATTERN, ...) CODE_FOR_##PATTERN,
++#define OB_DEF(...)
++#define OB_DEF_VAR(...)
++
++#include "s390-builtins.def"
++  CODE_FOR_nothing
++};
++
++static void
++s390_init_builtins (void)
++{
++  /* These definitions are being used in s390-builtins.def.  */
++  tree returns_twice_attr = tree_cons (get_identifier ("returns_twice"),
++				       NULL, NULL);
++  tree noreturn_attr = tree_cons (get_identifier ("noreturn"), NULL, NULL);
++  tree c_uint64_type_node;
++  unsigned int bflags_mask = (BFLAGS_MASK_INIT);
++
++  bflags_mask |= (TARGET_VX)  ? B_VX  : 0;
++  bflags_mask |= (TARGET_HTM) ? B_HTM : 0;
++
++  /* The uint64_type_node from tree.c is not compatible to the C99
++     uint64_t data type.  What we want is c_uint64_type_node from
++     c-common.c.  But since backend code is not supposed to interface
++     with the frontend we recreate it here.  */
++  if (TARGET_64BIT)
++    c_uint64_type_node = long_unsigned_type_node;
++  else
++    c_uint64_type_node = long_long_unsigned_type_node;
++
++#undef DEF_TYPE
++#define DEF_TYPE(INDEX, BFLAGS, NODE, CONST_P)		\
++  if ((BFLAGS) == 0 || ((BFLAGS) & bflags_mask))	\
++    s390_builtin_types[INDEX] = (!CONST_P) ?		\
++      (NODE) : build_type_variant ((NODE), 1, 0);
++
++#undef DEF_POINTER_TYPE
++#define DEF_POINTER_TYPE(INDEX, BFLAGS, INDEX_BASE)			\
++  if ((BFLAGS) == 0 || ((BFLAGS) & bflags_mask))			\
++    s390_builtin_types[INDEX] =						\
++      build_pointer_type (s390_builtin_types[INDEX_BASE]);
++
++#undef DEF_DISTINCT_TYPE
++#define DEF_DISTINCT_TYPE(INDEX, BFLAGS, INDEX_BASE)			\
++  if ((BFLAGS) == 0 || ((BFLAGS) & bflags_mask))			\
++    s390_builtin_types[INDEX] =						\
++      build_distinct_type_copy (s390_builtin_types[INDEX_BASE]);
++
++#undef DEF_VECTOR_TYPE
++#define DEF_VECTOR_TYPE(INDEX, BFLAGS, INDEX_BASE, ELEMENTS)		\
++  if ((BFLAGS) == 0 || ((BFLAGS) & bflags_mask))			\
++    s390_builtin_types[INDEX] =						\
++      build_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS);
++
++#undef DEF_OPAQUE_VECTOR_TYPE
++#define DEF_OPAQUE_VECTOR_TYPE(INDEX, BFLAGS, INDEX_BASE, ELEMENTS)	\
++  if ((BFLAGS) == 0 || ((BFLAGS) & bflags_mask))			\
++    s390_builtin_types[INDEX] =						\
++      build_opaque_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS);
++
++#undef DEF_FN_TYPE
++#define DEF_FN_TYPE(INDEX, BFLAGS, args...)			\
++  if ((BFLAGS) == 0 || ((BFLAGS) & bflags_mask))		\
++    s390_builtin_fn_types[INDEX] =				\
++    build_function_type_list (args, NULL_TREE);
++#undef DEF_OV_TYPE
++#define DEF_OV_TYPE(...)
++#include "s390-builtin-types.def"
++
++#undef B_DEF
++#define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, FNTYPE)		\
++  if (((BFLAGS) & ~bflags_mask) == 0)					\
++    s390_builtin_decls[S390_BUILTIN_##NAME] =				\
++      add_builtin_function ("__builtin_" #NAME,				\
++			    s390_builtin_fn_types[FNTYPE],		\
++			    S390_BUILTIN_##NAME,			\
++			    BUILT_IN_MD,				\
++			    NULL,					\
++			    ATTRS);
++#undef OB_DEF
++#define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, FNTYPE)	\
++  if (((BFLAGS) & ~bflags_mask) == 0)					\
++    s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] = \
++      add_builtin_function ("__builtin_" #NAME,				\
++			    s390_builtin_fn_types[FNTYPE],		\
++			    S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX, \
++			    BUILT_IN_MD,				\
++			    NULL,					\
++			    0);
++#undef OB_DEF_VAR
++#define OB_DEF_VAR(...)
++#include "s390-builtins.def"
++
++}
++
++/* Return true if ARG is appropriate as argument number ARGNUM of
++   builtin DECL.  The operand flags from s390-builtins.def have to
++   passed as OP_FLAGS.  */
++bool
++s390_const_operand_ok (tree arg, int argnum, int op_flags, tree decl)
++{
++  if (O_UIMM_P (op_flags))
++    {
++      int bitwidths[] = { 1, 2, 3, 4, 5, 8, 12, 16, 32 };
++      int bitwidth = bitwidths[op_flags - O_U1];
++
++      if (!host_integerp (arg, 1)
++	  || ((unsigned HOST_WIDE_INT) tree_low_cst (arg, 1)
++	      > ((unsigned HOST_WIDE_INT)1 << bitwidth) - 1))
++	{
++	  error("constant argument %d for builtin %qF is out of range (0.."
++		HOST_WIDE_INT_PRINT_UNSIGNED ")",
++		argnum, decl,
++		((unsigned HOST_WIDE_INT)1 << bitwidth) - 1);
++	  return false;
++	}
++    }
++
++  if (O_SIMM_P (op_flags))
++    {
++      int bitwidths[] = { 2, 3, 4, 5, 8, 12, 16, 32 };
++      int bitwidth = bitwidths[op_flags - O_S2];
++
++      if (!host_integerp (arg, 0)
++	  || tree_low_cst (arg, 0) < -((HOST_WIDE_INT)1 << (bitwidth - 1))
++	  || (tree_low_cst (arg, 0)
++	      > (((HOST_WIDE_INT)1 << (bitwidth - 1)) - 1)))
++	{
++	  error("constant argument %d for builtin %qF is out of range ("
++		HOST_WIDE_INT_PRINT_DEC ".."
++		HOST_WIDE_INT_PRINT_DEC ")",
++		argnum, decl,
++		-((HOST_WIDE_INT)1 << (bitwidth - 1)),
++		((HOST_WIDE_INT)1 << (bitwidth - 1)) - 1);
++	  return false;
++	}
++    }
++  return true;
++}
++
++/* Expand an expression EXP that calls a built-in function,
++   with result going to TARGET if that's convenient
++   (and in mode MODE if that's convenient).
++   SUBTARGET may be used as the target for computing one of EXP's operands.
++   IGNORE is nonzero if the value is to be ignored.  */
++
++static rtx
++s390_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
++		     enum machine_mode mode ATTRIBUTE_UNUSED,
++		     int ignore ATTRIBUTE_UNUSED)
++{
++#define MAX_ARGS 5
++
++  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
++  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
++  enum insn_code icode;
++  rtx op[MAX_ARGS], pat;
++  int arity;
++  bool nonvoid;
++  tree arg;
++  call_expr_arg_iterator iter;
++  unsigned int all_op_flags = opflags_for_builtin (fcode);
++  enum machine_mode last_vec_mode = VOIDmode;
++
++  if (TARGET_DEBUG_ARG)
++    {
++      fprintf (stderr,
++	       "s390_expand_builtin, code = %4d, %s\n",
++	       (int)fcode, IDENTIFIER_POINTER (DECL_NAME (fndecl)));
++    }
++
++  if (fcode >= S390_OVERLOADED_BUILTIN_VAR_OFFSET
++      && fcode < S390_ALL_BUILTIN_MAX)
++    {
++      gcc_unreachable ();
++    }
++  else if (fcode < S390_OVERLOADED_BUILTIN_OFFSET)
++    {
++      icode = code_for_builtin[fcode];
++      /* Set a flag in the machine specific cfun part in order to support
++	 saving/restoring of FPRs.  */
++      if (fcode == S390_BUILTIN_tbegin || fcode == S390_BUILTIN_tbegin_retry)
++	cfun->machine->tbegin_p = true;
++    }
++  else if (fcode < S390_OVERLOADED_BUILTIN_VAR_OFFSET)
++    {
++      error ("Unresolved overloaded builtin");
++      return const0_rtx;
++    }
++  else
++    internal_error ("bad builtin fcode");
++
++  if (icode == 0)
++    internal_error ("bad builtin icode");
++
++  nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
++
++  if (nonvoid)
++    {
++      enum machine_mode tmode = insn_data[icode].operand[0].mode;
++      if (!target
++	  || GET_MODE (target) != tmode
++	  || !(*insn_data[icode].operand[0].predicate) (target, tmode))
++	target = gen_reg_rtx (tmode);
++
++      /* There are builtins (e.g. vec_promote) with no vector
++	 arguments but an element selector.  So we have to also look
++	 at the vector return type when emitting the modulo
++	 operation.  */
++      if (VECTOR_MODE_P (insn_data[icode].operand[0].mode))
++	last_vec_mode = insn_data[icode].operand[0].mode;
++    }
++
++  arity = 0;
++  FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
++    {
++      const struct insn_operand_data *insn_op;
++      unsigned int op_flags = all_op_flags & ((1 << O_SHIFT) - 1);
++
++      all_op_flags = all_op_flags >> O_SHIFT;
++
++      if (arg == error_mark_node)
++	return NULL_RTX;
++      if (arity >= MAX_ARGS)
++	return NULL_RTX;
++
++      if (O_IMM_P (op_flags)
++	  && TREE_CODE (arg) != INTEGER_CST)
++	{
++	  error ("constant value required for builtin %qF argument %d",
++		 fndecl, arity + 1);
++	  return const0_rtx;
++	}
++
++      if (!s390_const_operand_ok (arg, arity + 1, op_flags, fndecl))
++	return const0_rtx;
++
++      insn_op = &insn_data[icode].operand[arity + nonvoid];
++      op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL);
++
++      /* expand_expr truncates constants to the target mode only if it
++	 is "convenient".  However, our checks below rely on this
++	 being done.  */
++      if (CONST_INT_P (op[arity])
++	  && SCALAR_INT_MODE_P (insn_op->mode)
++	  && GET_MODE (op[arity]) != insn_op->mode)
++	op[arity] = GEN_INT (trunc_int_for_mode (INTVAL (op[arity]),
++						 insn_op->mode));
++
++      /* Wrap the expanded RTX for pointer types into a MEM expr with
++	 the proper mode.  This allows us to use e.g. (match_operand
++	 "memory_operand"..) in the insn patterns instead of (mem
++	 (match_operand "address_operand)).  This is helpful for
++	 patterns not just accepting MEMs.  */
++      if (POINTER_TYPE_P (TREE_TYPE (arg))
++	  && insn_op->predicate != address_operand)
++	op[arity] = gen_rtx_MEM (insn_op->mode, op[arity]);
++
++      /* Expand the module operation required on element selectors.  */
++      if (op_flags == O_ELEM)
++	{
++	  gcc_assert (last_vec_mode != VOIDmode);
++	  op[arity] = simplify_expand_binop (SImode, code_to_optab (AND),
++					     op[arity],
++					     GEN_INT (GET_MODE_NUNITS (last_vec_mode) - 1),
++					     NULL_RTX, 1, OPTAB_DIRECT);
++	}
++
++      /* Record the vector mode used for an element selector.  This assumes:
++	 1. There is no builtin with two different vector modes and an element selector
++         2. The element selector comes after the vector type it is referring to.
++	 This currently the true for all the builtins but FIXME we
++	 should better check for that.  */
++      if (VECTOR_MODE_P (insn_op->mode))
++	last_vec_mode = insn_op->mode;
++
++      if (insn_op->predicate (op[arity], insn_op->mode))
++	{
++	  arity++;
++	  continue;
++	}
++
++      if (MEM_P (op[arity])
++	  && insn_op->predicate == memory_operand
++	  && (GET_MODE (XEXP (op[arity], 0)) == Pmode
++	      || GET_MODE (XEXP (op[arity], 0)) == VOIDmode))
++	{
++	  op[arity] = replace_equiv_address (op[arity],
++					     copy_to_mode_reg (Pmode,
++					       XEXP (op[arity], 0)));
++	}
++      else if (GET_MODE (op[arity]) == insn_op->mode
++	       || GET_MODE (op[arity]) == VOIDmode
++	       || (insn_op->predicate == address_operand
++		   && GET_MODE (op[arity]) == Pmode))
++	{
++	  /* An address_operand usually has VOIDmode in the expander
++	     so we cannot use this.  */
++	  enum machine_mode target_mode =
++	    (insn_op->predicate == address_operand
++	     ? Pmode : insn_op->mode);
++	  op[arity] = copy_to_mode_reg (target_mode, op[arity]);
++	}
++
++      if (!insn_op->predicate (op[arity], insn_op->mode))
++	{
++	  error ("Invalid argument %d for builtin %qF", arity + 1, fndecl);
++	  return const0_rtx;
++	}
++      arity++;
++    }
++
++  if (last_vec_mode != VOIDmode && !TARGET_VX)
++    {
++      error ("Vector type builtin %qF is not supported without -mvx "
++	     "(default with -march=z13).",
++	     fndecl);
++      return const0_rtx;
++    }
++
++  switch (arity)
++    {
++    case 0:
++      pat = GEN_FCN (icode) (target);
++      break;
++    case 1:
++      if (nonvoid)
++        pat = GEN_FCN (icode) (target, op[0]);
++      else
++	pat = GEN_FCN (icode) (op[0]);
++      break;
++    case 2:
++      if (nonvoid)
++	pat = GEN_FCN (icode) (target, op[0], op[1]);
++      else
++	pat = GEN_FCN (icode) (op[0], op[1]);
++      break;
++    case 3:
++      if (nonvoid)
++	pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
++      else
++	pat = GEN_FCN (icode) (op[0], op[1], op[2]);
++      break;
++    case 4:
++      if (nonvoid)
++	pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
++      else
++	pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
++      break;
++    case 5:
++      if (nonvoid)
++	pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
++      else
++	pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
++      break;
++    case 6:
++      if (nonvoid)
++	pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4], op[5]);
++      else
++	pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5]);
++      break;
++    default:
++      gcc_unreachable ();
++    }
++  if (!pat)
++    return NULL_RTX;
++  emit_insn (pat);
++
++  if (nonvoid)
++    return target;
++  else
++    return const0_rtx;
++}
++
++
+ static const int s390_hotpatch_hw_max = 1000000;
+ static int s390_hotpatch_hw_before_label = 0;
+ static int s390_hotpatch_hw_after_label = 0;
+@@ -458,9 +1009,43 @@ s390_handle_hotpatch_attribute (tree *no
+   return NULL_TREE;
+ }
+ 
++/* Expand the s390_vector_bool type attribute.  */
++
++static tree
++s390_handle_vectorbool_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
++				  tree args ATTRIBUTE_UNUSED,
++				  int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
++{
++  tree type = *node, result = NULL_TREE;
++  enum machine_mode mode;
++
++  while (POINTER_TYPE_P (type)
++	 || TREE_CODE (type) == FUNCTION_TYPE
++	 || TREE_CODE (type) == METHOD_TYPE
++	 || TREE_CODE (type) == ARRAY_TYPE)
++    type = TREE_TYPE (type);
++
++  mode = TYPE_MODE (type);
++  switch (mode)
++    {
++    case DImode: case V2DImode: result = s390_builtin_types[BT_BV2DI]; break;
++    case SImode: case V4SImode: result = s390_builtin_types[BT_BV4SI]; break;
++    case HImode: case V8HImode: result = s390_builtin_types[BT_BV8HI]; break;
++    case QImode: case V16QImode: result = s390_builtin_types[BT_BV16QI];
++    default: break;
++    }
++
++  *no_add_attrs = true;  /* No need to hang on to the attribute.  */
++
++  if (result)
++    *node = lang_hooks.types.reconstruct_complex_type (*node, result);
++
++  return NULL_TREE;
++}
++
+ static const struct attribute_spec s390_attribute_table[] = {
+-  { "hotpatch", 2, 2, true, false, false, s390_handle_hotpatch_attribute, false
+-  },
++  { "hotpatch", 2, 2, true, false, false, s390_handle_hotpatch_attribute, false },
++  { "s390_vector_bool", 0, 0, false, true, false, s390_handle_vectorbool_attribute, true },
+   /* End element.  */
+   { NULL,        0, 0, false, false, false, NULL, false }
+ };
+@@ -524,6 +1109,35 @@ s390_scalar_mode_supported_p (enum machi
+   return default_scalar_mode_supported_p (mode);
+ }
+ 
++/* Return true if the back end supports vector mode MODE.  */
++static bool
++s390_vector_mode_supported_p (enum machine_mode mode)
++{
++  enum machine_mode inner;
++
++  if (!VECTOR_MODE_P (mode)
++      || !TARGET_VX
++      || GET_MODE_SIZE (mode) > 16)
++    return false;
++
++  inner = GET_MODE_INNER (mode);
++
++  switch (inner)
++    {
++    case QImode:
++    case HImode:
++    case SImode:
++    case DImode:
++    case TImode:
++    case SFmode:
++    case DFmode:
++    case TFmode:
++      return true;
++    default:
++      return false;
++    }
++}
++
+ /* Set the has_landing_pad_p flag in struct machine_function to VALUE.  */
+ 
+ void
+@@ -595,6 +1209,11 @@ s390_match_ccmode_set (rtx set, enum mac
+     case CCT1mode:
+     case CCT2mode:
+     case CCT3mode:
++    case CCVEQmode:
++    case CCVHmode:
++    case CCVHUmode:
++    case CCVFHmode:
++    case CCVFHEmode:
+       if (req_mode != set_mode)
+         return 0;
+       break;
+@@ -695,6 +1314,29 @@ s390_tm_ccmode (rtx op1, rtx op2, bool m
+ enum machine_mode
+ s390_select_ccmode (enum rtx_code code, rtx op0, rtx op1)
+ {
++  if (TARGET_VX
++      && register_operand (op0, DFmode)
++      && register_operand (op1, DFmode))
++    {
++      /* LT, LE, UNGT, UNGE require swapping OP0 and OP1.  Either
++	 s390_emit_compare or s390_canonicalize_comparison will take
++	 care of it.  */
++      switch (code)
++	{
++	case EQ:
++	case NE:
++	  return CCVEQmode;
++	case GT:
++	case UNLE:
++	  return CCVFHmode;
++	case GE:
++	case UNLT:
++	  return CCVFHEmode;
++	default:
++	  ;
++	}
++    }
++
+   switch (code)
+     {
+       case EQ:
+@@ -972,8 +1614,73 @@ s390_canonicalize_comparison (int *code,
+       rtx tem = *op0; *op0 = *op1; *op1 = tem;
+       *code = (int)swap_condition ((enum rtx_code)*code);
+     }
++
++  /* Using the scalar variants of vector instructions for 64 bit FP
++     comparisons might require swapping the operands.  */
++  if (TARGET_VX
++      && register_operand (*op0, DFmode)
++      && register_operand (*op1, DFmode)
++      && (*code == LT || *code == LE || *code == UNGT || *code == UNGE))
++    {
++      rtx tmp;
++
++      switch (*code)
++	{
++	case LT:   *code = GT; break;
++	case LE:   *code = GE; break;
++	case UNGT: *code = UNLE; break;
++	case UNGE: *code = UNLT; break;
++	default: ;
++	}
++      tmp = *op0; *op0 = *op1; *op1 = tmp;
++    }
++}
++
++/* Helper function for s390_emit_compare.  If possible emit a 64 bit
++   FP compare using the single element variant of vector instructions.
++   Replace CODE with the comparison code to be used in the CC reg
++   compare and return the condition code register RTX in CC.  */
++
++static bool
++s390_expand_vec_compare_scalar (enum rtx_code *code, rtx cmp1, rtx cmp2,
++				rtx *cc)
++{
++  enum machine_mode cmp_mode;
++  bool swap_p = false;
++
++  switch (*code)
++    {
++    case EQ:   cmp_mode = CCVEQmode;  break;
++    case NE:   cmp_mode = CCVEQmode;  break;
++    case GT:   cmp_mode = CCVFHmode;  break;
++    case GE:   cmp_mode = CCVFHEmode; break;
++    case UNLE: cmp_mode = CCVFHmode;  break;
++    case UNLT: cmp_mode = CCVFHEmode; break;
++    case LT:   cmp_mode = CCVFHmode;  *code = GT;   swap_p = true; break;
++    case LE:   cmp_mode = CCVFHEmode; *code = GE;   swap_p = true; break;
++    case UNGE: cmp_mode = CCVFHmode;  *code = UNLE; swap_p = true; break;
++    case UNGT: cmp_mode = CCVFHEmode; *code = UNLT; swap_p = true; break;
++    default: return false;
++    }
++
++  if (swap_p)
++    {
++      rtx tmp = cmp2;
++      cmp2 = cmp1;
++      cmp1 = tmp;
++    }
++  *cc = gen_rtx_REG (cmp_mode, CC_REGNUM);
++  emit_insn (gen_rtx_PARALLEL (VOIDmode,
++	       gen_rtvec (2,
++			  gen_rtx_SET (VOIDmode, *cc,
++				       gen_rtx_COMPARE (cmp_mode, cmp1,
++							cmp2)),
++			  gen_rtx_CLOBBER (VOIDmode,
++					   gen_rtx_SCRATCH (V2DImode)))));
++  return true;
+ }
+ 
++
+ /* Emit a compare instruction suitable to implement the comparison
+    OP0 CODE OP1.  Return the correct condition RTL to be placed in
+    the IF_THEN_ELSE of the conditional branch testing the result.  */
+@@ -984,10 +1691,18 @@ s390_emit_compare (enum rtx_code code, r
+   enum machine_mode mode = s390_select_ccmode (code, op0, op1);
+   rtx cc;
+ 
+-  /* Do not output a redundant compare instruction if a compare_and_swap
+-     pattern already computed the result and the machine modes are compatible.  */
+-  if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
+-    {
++  if (TARGET_VX
++      && register_operand (op0, DFmode)
++      && register_operand (op1, DFmode)
++      && s390_expand_vec_compare_scalar (&code, op0, op1, &cc))
++    {
++      /* Work has been done by s390_expand_vec_compare_scalar already.  */
++    }
++  else if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
++    {
++      /* Do not output a redundant compare instruction if a
++	 compare_and_swap pattern already computed the result and the
++	 machine modes are compatible.  */
+       gcc_assert (s390_cc_modes_compatible (GET_MODE (op0), mode)
+ 		  == GET_MODE (op0));
+       cc = op0;
+@@ -1222,6 +1937,93 @@ s390_branch_condition_mask (rtx code)
+         }
+       break;
+ 
++      /* Vector comparison modes.  */
++
++    case CCVEQmode:
++      switch (GET_CODE (code))
++	{
++	case EQ:        return CC0;
++	case NE:        return CC3;
++	default:        return -1;
++	}
++
++    case CCVEQANYmode:
++      switch (GET_CODE (code))
++	{
++	case EQ:        return CC0 | CC1;
++	case NE:        return CC3 | CC1;
++	default:        return -1;
++	}
++
++      /* Integer vector compare modes.  */
++
++    case CCVHmode:
++      switch (GET_CODE (code))
++	{
++	case GT:        return CC0;
++	case LE:        return CC3;
++	default:        return -1;
++	}
++
++    case CCVHANYmode:
++      switch (GET_CODE (code))
++	{
++	case GT:        return CC0 | CC1;
++	case LE:        return CC3 | CC1;
++	default:        return -1;
++	}
++
++    case CCVHUmode:
++      switch (GET_CODE (code))
++	{
++	case GTU:       return CC0;
++	case LEU:       return CC3;
++	default:        return -1;
++	}
++
++    case CCVHUANYmode:
++      switch (GET_CODE (code))
++	{
++	case GTU:       return CC0 | CC1;
++	case LEU:       return CC3 | CC1;
++	default:        return -1;
++	}
++
++      /* FP vector compare modes.  */
++
++    case CCVFHmode:
++      switch (GET_CODE (code))
++	{
++	case GT:        return CC0;
++	case UNLE:      return CC3;
++	default:        return -1;
++	}
++
++    case CCVFHANYmode:
++      switch (GET_CODE (code))
++	{
++	case GT:        return CC0 | CC1;
++	case UNLE:      return CC3 | CC1;
++	default:        return -1;
++	}
++
++    case CCVFHEmode:
++      switch (GET_CODE (code))
++	{
++	case GE:        return CC0;
++	case UNLT:      return CC3;
++	default:        return -1;
++	}
++
++    case CCVFHEANYmode:
++      switch (GET_CODE (code))
++	{
++	case GE:        return CC0 | CC1;
++	case UNLT:      return CC3 | CC1;
++	default:        return -1;
++	}
++
++
+     case CCRAWmode:
+       switch (GET_CODE (code))
+ 	{
+@@ -1421,6 +2223,9 @@ s390_contiguous_bitmask_p (unsigned HOST
+   /* Calculate a mask for all bits beyond the contiguous bits.  */
+   mask = (-1LL & ~(((1ULL << (tmp_length + tmp_pos - 1)) << 1) - 1));
+ 
++  if ((unsigned)size < sizeof (HOST_WIDE_INT) * BITS_PER_UNIT)
++    mask &= ((unsigned HOST_WIDE_INT) 1 << size) - 1;
++
+   if (mask & in)
+     return false;
+ 
+@@ -1436,6 +2241,128 @@ s390_contiguous_bitmask_p (unsigned HOST
+   return true;
+ }
+ 
++/* Return true if OP is a constant vector with the same constant in
++   all its elements.  */
++
++bool
++s390_const_vec_duplicate_p (rtx op)
++{
++  if (!VECTOR_MODE_P (GET_MODE (op))
++      || GET_CODE (op) != CONST_VECTOR
++      || !CONST_INT_P (XVECEXP (op, 0, 0)))
++    return false;
++
++  if (GET_MODE_NUNITS (GET_MODE (op)) > 1)
++    {
++      int i;
++
++      for (i = 1; i < GET_MODE_NUNITS (GET_MODE (op)); ++i)
++	if (!rtx_equal_p (XVECEXP (op, 0, i), XVECEXP (op, 0, 0)))
++	  return false;
++    }
++  return true;
++}
++
++/* Return true if OP contains the same contiguous bitfield in *all*
++   its elements.  START and END can be used to obtain the start and
++   end position of the bitfield.
++
++   START/STOP give the position of the first/last bit of the bitfield
++   counting from the lowest order bit starting with zero.  In order to
++   use these values for S/390 instructions this has to be converted to
++   "bits big endian" style.  */
++
++bool
++s390_contiguous_bitmask_vector_p (rtx op, int *start, int *end)
++{
++  unsigned HOST_WIDE_INT mask;
++  int length, size;
++
++  if (!VECTOR_MODE_P (GET_MODE (op))
++      || GET_CODE (op) != CONST_VECTOR
++      || !CONST_INT_P (XVECEXP (op, 0, 0)))
++    return false;
++
++  if (GET_MODE_NUNITS (GET_MODE (op)) > 1)
++    {
++      int i;
++
++      for (i = 1; i < GET_MODE_NUNITS (GET_MODE (op)); ++i)
++	if (!rtx_equal_p (XVECEXP (op, 0, i), XVECEXP (op, 0, 0)))
++	  return false;
++    }
++
++  size = GET_MODE_UNIT_BITSIZE (GET_MODE (op));
++
++  /* We cannot deal with V1TI/V1TF. This would require a vgmq.  */
++  if (size > 64)
++    return false;
++
++  mask = UINTVAL (XVECEXP (op, 0, 0));
++  if (s390_contiguous_bitmask_p (mask, size, start,
++				 end != NULL ? &length : NULL))
++    {
++      if (end != NULL)
++	*end = *start + length - 1;
++      return true;
++    }
++  /* 0xff00000f style immediates can be covered by swapping start and
++     end indices in vgm.  */
++  if (s390_contiguous_bitmask_p (~mask, size, start,
++				 end != NULL ? &length : NULL))
++    {
++      if (end != NULL)
++	*end = *start - 1;
++      if (start != NULL)
++	*start = *start + length;
++      return true;
++    }
++  return false;
++}
++
++/* Return true if C consists only of byte chunks being either 0 or
++   0xff.  If MASK is !=NULL a byte mask is generated which is
++   appropriate for the vector generate byte mask instruction.  */
++
++bool
++s390_bytemask_vector_p (rtx op, unsigned *mask)
++{
++  int i;
++  unsigned tmp_mask = 0;
++  int nunit, unit_size;
++
++  if (!VECTOR_MODE_P (GET_MODE (op))
++      || GET_CODE (op) != CONST_VECTOR
++      || !CONST_INT_P (XVECEXP (op, 0, 0)))
++    return false;
++
++  nunit = GET_MODE_NUNITS (GET_MODE (op));
++  unit_size = GET_MODE_UNIT_SIZE (GET_MODE (op));
++
++  for (i = 0; i < nunit; i++)
++    {
++      unsigned HOST_WIDE_INT c;
++      int j;
++
++      if (!CONST_INT_P (XVECEXP (op, 0, i)))
++	return false;
++
++      c = UINTVAL (XVECEXP (op, 0, i));
++      for (j = 0; j < unit_size; j++)
++	{
++	  if ((c & 0xff) != 0 && (c & 0xff) != 0xff)
++	    return false;
++	  tmp_mask |= (c & 1) << ((nunit - 1 - i) * unit_size + j);
++	  c = c >> BITS_PER_UNIT;
++	}
++    }
++
++  if (mask != NULL)
++    *mask = tmp_mask;
++
++  return true;
++}
++
+ /* Check whether a rotate of ROTL followed by an AND of CONTIG is
+    equivalent to a shift followed by the AND.  In particular, CONTIG
+    should not overlap the (rotated) bit 0/bit 63 gap.  Negative values
+@@ -1461,8 +2388,8 @@ s390_extzv_shift_ok (int bitsize, int ro
+ bool
+ s390_split_ok_p (rtx dst, rtx src, enum machine_mode mode, int first_subword)
+ {
+-  /* Floating point registers cannot be split.  */
+-  if (FP_REG_P (src) || FP_REG_P (dst))
++  /* Floating point and vector registers cannot be split.  */
++  if (FP_REG_P (src) || FP_REG_P (dst) || VECTOR_REG_P (src) || VECTOR_REG_P (dst))
+     return false;
+ 
+   /* We don't need to split if operands are directly accessible.  */
+@@ -1752,6 +2679,22 @@ s390_option_override (void)
+   if (!(target_flags_explicit & MASK_OPT_HTM) && TARGET_CPU_HTM && TARGET_ZARCH)
+     target_flags |= MASK_OPT_HTM;
+ 
++  if (target_flags_explicit & MASK_OPT_VX)
++    {
++      if (TARGET_OPT_VX)
++	{
++	  if (!TARGET_CPU_VX)
++	    error ("hardware vector support not available on %s",
++		   s390_arch_string);
++	  if (TARGET_SOFT_FLOAT)
++	    error ("hardware vector support not available with -msoft-float");
++	}
++    }
++  else if (TARGET_CPU_VX)
++    /* Enable vector support if available and not explicitly disabled
++       by user.  E.g. with -m31 -march=z13 -mzarch */
++    target_flags |= MASK_OPT_VX;
++
+   if (TARGET_HARD_DFP && !TARGET_DFP)
+     {
+       if (target_flags_explicit & MASK_HARD_DFP)
+@@ -1791,6 +2734,7 @@ s390_option_override (void)
+       s390_cost = &z196_cost;
+       break;
+     case PROCESSOR_2827_ZEC12:
++    case PROCESSOR_2964_Z13:
+       s390_cost = &zEC12_cost;
+       break;
+     default:
+@@ -1818,7 +2762,8 @@ s390_option_override (void)
+ 
+   if (s390_tune == PROCESSOR_2097_Z10
+       || s390_tune == PROCESSOR_2817_Z196
+-      || s390_tune == PROCESSOR_2827_ZEC12)
++      || s390_tune == PROCESSOR_2827_ZEC12
++      || s390_tune == PROCESSOR_2964_Z13)
+     {
+       maybe_set_param_value (PARAM_MAX_UNROLLED_INSNS, 100,
+ 			     global_options.x_param_values,
+@@ -1882,16 +2827,20 @@ s390_option_override (void)
+ /* Map for smallest class containing reg regno.  */
+ 
+ const enum reg_class regclass_map[FIRST_PSEUDO_REGISTER] =
+-{ GENERAL_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS,
+-  ADDR_REGS,    ADDR_REGS, ADDR_REGS, ADDR_REGS,
+-  ADDR_REGS,    ADDR_REGS, ADDR_REGS, ADDR_REGS,
+-  ADDR_REGS,    ADDR_REGS, ADDR_REGS, ADDR_REGS,
+-  FP_REGS,      FP_REGS,   FP_REGS,   FP_REGS,
+-  FP_REGS,      FP_REGS,   FP_REGS,   FP_REGS,
+-  FP_REGS,      FP_REGS,   FP_REGS,   FP_REGS,
+-  FP_REGS,      FP_REGS,   FP_REGS,   FP_REGS,
+-  ADDR_REGS,    CC_REGS,   ADDR_REGS, ADDR_REGS,
+-  ACCESS_REGS,	ACCESS_REGS
++{ GENERAL_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS,  /*  0 */
++  ADDR_REGS,    ADDR_REGS, ADDR_REGS, ADDR_REGS,  /*  4 */
++  ADDR_REGS,    ADDR_REGS, ADDR_REGS, ADDR_REGS,  /*  8 */
++  ADDR_REGS,    ADDR_REGS, ADDR_REGS, ADDR_REGS,  /* 12 */
++  FP_REGS,      FP_REGS,   FP_REGS,   FP_REGS,    /* 16 */
++  FP_REGS,      FP_REGS,   FP_REGS,   FP_REGS,    /* 20 */
++  FP_REGS,      FP_REGS,   FP_REGS,   FP_REGS,    /* 24 */
++  FP_REGS,      FP_REGS,   FP_REGS,   FP_REGS,    /* 28 */
++  ADDR_REGS,    CC_REGS,   ADDR_REGS, ADDR_REGS,  /* 32 */
++  ACCESS_REGS,	ACCESS_REGS, VEC_REGS, VEC_REGS,  /* 36 */
++  VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS,         /* 40 */
++  VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS,         /* 44 */
++  VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS,         /* 48 */
++  VEC_REGS, VEC_REGS                              /* 52 */
+ };
+ 
+ /* Return attribute type of insn.  */
+@@ -2933,6 +3882,19 @@ legitimate_pic_operand_p (rtx op)
+ static bool
+ s390_legitimate_constant_p (enum machine_mode mode, rtx op)
+ {
++  if (TARGET_VX && VECTOR_MODE_P (mode) && GET_CODE (op) == CONST_VECTOR)
++    {
++      if (GET_MODE_SIZE (mode) != 16)
++	return 0;
++
++      if (!satisfies_constraint_j00 (op)
++	  && !satisfies_constraint_jm1 (op)
++	  && !satisfies_constraint_jKK (op)
++	  && !satisfies_constraint_jxx (op)
++	  && !satisfies_constraint_jyy (op))
++	return 0;
++    }
++
+   /* Accept all non-symbolic constants.  */
+   if (!SYMBOLIC_CONST (op))
+     return 1;
+@@ -2969,6 +3931,7 @@ s390_cannot_force_const_mem (enum machin
+     {
+     case CONST_INT:
+     case CONST_DOUBLE:
++    case CONST_VECTOR:
+       /* Accept all non-symbolic constants.  */
+       return false;
+ 
+@@ -3101,6 +4064,25 @@ legitimate_reload_fp_constant_p (rtx op)
+   return false;
+ }
+ 
++/* Returns true if the constant value OP is a legitimate vector operand
++   during and after reload.
++   This function accepts all constants which can be loaded directly
++   into an VR.  */
++
++static bool
++legitimate_reload_vector_constant_p (rtx op)
++{
++  if (TARGET_VX && GET_MODE_SIZE (GET_MODE (op)) == 16
++      && (satisfies_constraint_j00 (op)
++	  || satisfies_constraint_jm1 (op)
++	  || satisfies_constraint_jKK (op)
++	  || satisfies_constraint_jxx (op)
++	  || satisfies_constraint_jyy (op)))
++    return true;
++
++  return false;
++}
++
+ /* Given an rtx OP being reloaded into a reg required to be in class RCLASS,
+    return the class of reg to actually use.  */
+ 
+@@ -3111,6 +4093,7 @@ s390_preferred_reload_class (rtx op, reg
+     {
+       /* Constants we cannot reload into general registers
+ 	 must be forced into the literal pool.  */
++      case CONST_VECTOR:
+       case CONST_DOUBLE:
+       case CONST_INT:
+ 	if (reg_class_subset_p (GENERAL_REGS, rclass)
+@@ -3122,6 +4105,10 @@ s390_preferred_reload_class (rtx op, reg
+ 	else if (reg_class_subset_p (FP_REGS, rclass)
+ 		 && legitimate_reload_fp_constant_p (op))
+ 	  return FP_REGS;
++	else if (reg_class_subset_p (VEC_REGS, rclass)
++		 && legitimate_reload_vector_constant_p (op))
++	  return VEC_REGS;
++
+ 	return NO_REGS;
+ 
+       /* If a symbolic constant or a PLUS is reloaded,
+@@ -3245,6 +4232,7 @@ s390_reload_symref_address (rtx reg, rtx
+   /* Reload might have pulled a constant out of the literal pool.
+      Force it back in.  */
+   if (CONST_INT_P (mem) || GET_CODE (mem) == CONST_DOUBLE
++      || GET_CODE (mem) == CONST_VECTOR
+       || GET_CODE (mem) == CONST)
+     mem = force_const_mem (GET_MODE (reg), mem);
+ 
+@@ -3284,6 +4272,30 @@ s390_secondary_reload (bool in_p, rtx x,
+   if (reg_classes_intersect_p (CC_REGS, rclass))
+     return GENERAL_REGS;
+ 
++  if (TARGET_VX)
++    {
++      /* The vst/vl vector move instructions allow only for short
++	 displacements.  */
++      if (MEM_P (x)
++	  && GET_CODE (XEXP (x, 0)) == PLUS
++	  && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
++	  && !SHORT_DISP_IN_RANGE(INTVAL (XEXP (XEXP (x, 0), 1)))
++	  && reg_class_subset_p (rclass, VEC_REGS)
++	  && (!reg_class_subset_p (rclass, FP_REGS)
++	      || (GET_MODE_SIZE (mode) > 8
++		  && s390_class_max_nregs (FP_REGS, mode) == 1)))
++	{
++	  if (in_p)
++	    sri->icode = (TARGET_64BIT ?
++			  CODE_FOR_reloaddi_la_in :
++			  CODE_FOR_reloadsi_la_in);
++	  else
++	    sri->icode = (TARGET_64BIT ?
++			  CODE_FOR_reloaddi_la_out :
++			  CODE_FOR_reloadsi_la_out);
++	}
++    }
++
+   if (TARGET_Z10)
+     {
+       HOST_WIDE_INT offset;
+@@ -3299,17 +4311,15 @@ s390_secondary_reload (bool in_p, rtx x,
+ 	sri->icode = ((mode == DImode) ? CODE_FOR_reloaddi_larl_odd_addend_z10
+ 		      : CODE_FOR_reloadsi_larl_odd_addend_z10);
+ 
+-      /* On z10 we need a scratch register when moving QI, TI or floating
+-	 point mode values from or to a memory location with a SYMBOL_REF
+-	 or if the symref addend of a SI or DI move is not aligned to the
+-	 width of the access.  */
++      /* Handle all the (mem (symref)) accesses we cannot use the z10
++	 instructions for.  */
+       if (MEM_P (x)
+ 	  && s390_loadrelative_operand_p (XEXP (x, 0), NULL, NULL)
+-	  && (mode == QImode || mode == TImode || FLOAT_MODE_P (mode)
+-	      || (!TARGET_ZARCH && mode == DImode)
+-	      || ((mode == HImode || mode == SImode || mode == DImode)
+-		  && (!s390_check_symref_alignment (XEXP (x, 0),
+-						    GET_MODE_SIZE (mode))))))
++	  && (mode == QImode
++	      || !reg_class_subset_p (rclass, GENERAL_REGS)
++	      || GET_MODE_SIZE (mode) > UNITS_PER_WORD
++	      || !s390_check_symref_alignment (XEXP (x, 0),
++					       GET_MODE_SIZE (mode))))
+ 	{
+ #define __SECONDARY_RELOAD_CASE(M,m)					\
+ 	  case M##mode:							\
+@@ -3334,7 +4344,27 @@ s390_secondary_reload (bool in_p, rtx x,
+ 	      __SECONDARY_RELOAD_CASE (SD, sd);
+ 	      __SECONDARY_RELOAD_CASE (DD, dd);
+ 	      __SECONDARY_RELOAD_CASE (TD, td);
+-
++	      __SECONDARY_RELOAD_CASE (V1QI, v1qi);
++	      __SECONDARY_RELOAD_CASE (V2QI, v2qi);
++	      __SECONDARY_RELOAD_CASE (V4QI, v4qi);
++	      __SECONDARY_RELOAD_CASE (V8QI, v8qi);
++	      __SECONDARY_RELOAD_CASE (V16QI, v16qi);
++	      __SECONDARY_RELOAD_CASE (V1HI, v1hi);
++	      __SECONDARY_RELOAD_CASE (V2HI, v2hi);
++	      __SECONDARY_RELOAD_CASE (V4HI, v4hi);
++	      __SECONDARY_RELOAD_CASE (V8HI, v8hi);
++	      __SECONDARY_RELOAD_CASE (V1SI, v1si);
++	      __SECONDARY_RELOAD_CASE (V2SI, v2si);
++	      __SECONDARY_RELOAD_CASE (V4SI, v4si);
++	      __SECONDARY_RELOAD_CASE (V1DI, v1di);
++	      __SECONDARY_RELOAD_CASE (V2DI, v2di);
++	      __SECONDARY_RELOAD_CASE (V1TI, v1ti);
++	      __SECONDARY_RELOAD_CASE (V1SF, v1sf);
++	      __SECONDARY_RELOAD_CASE (V2SF, v2sf);
++	      __SECONDARY_RELOAD_CASE (V4SF, v4sf);
++	      __SECONDARY_RELOAD_CASE (V1DF, v1df);
++	      __SECONDARY_RELOAD_CASE (V2DF, v2df);
++	      __SECONDARY_RELOAD_CASE (V1TF, v1tf);
+ 	    default:
+ 	      gcc_unreachable ();
+ 	    }
+@@ -3371,12 +4401,12 @@ s390_secondary_reload (bool in_p, rtx x,
+ 	{
+ 	  if (in_p)
+ 	    sri->icode = (TARGET_64BIT ?
+-			  CODE_FOR_reloaddi_nonoffmem_in :
+-			  CODE_FOR_reloadsi_nonoffmem_in);
++			  CODE_FOR_reloaddi_la_in :
++			  CODE_FOR_reloadsi_la_in);
+ 	  else
+ 	    sri->icode = (TARGET_64BIT ?
+-			  CODE_FOR_reloaddi_nonoffmem_out :
+-			  CODE_FOR_reloadsi_nonoffmem_out);
++			  CODE_FOR_reloaddi_la_out :
++			  CODE_FOR_reloadsi_la_out);
+ 	}
+     }
+ 
+@@ -4610,6 +5640,256 @@ s390_expand_cmpmem (rtx target, rtx op0,
+   return true;
+ }
+ 
++/* Emit a conditional jump to LABEL for condition code mask MASK using
++   comparsion operator COMPARISON.  Return the emitted jump insn.  */
++
++static rtx
++s390_emit_ccraw_jump (HOST_WIDE_INT mask, enum rtx_code comparison, rtx label)
++{
++  rtx temp;
++
++  gcc_assert (comparison == EQ || comparison == NE);
++  gcc_assert (mask > 0 && mask < 15);
++
++  temp = gen_rtx_fmt_ee (comparison, VOIDmode,
++			 gen_rtx_REG (CCRAWmode, CC_REGNUM), GEN_INT (mask));
++  temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
++			       gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
++  temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
++  return emit_jump_insn (temp);
++}
++
++/* Emit the instructions to implement strlen of STRING and store the
++   result in TARGET.  The string has the known ALIGNMENT.  This
++   version uses vector instructions and is therefore not appropriate
++   for targets prior to z13.  */
++
++void
++s390_expand_vec_strlen (rtx target, rtx string, rtx alignment)
++{
++  int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
++  int very_likely = REG_BR_PROB_BASE - 1;
++  rtx highest_index_to_load_reg = gen_reg_rtx (Pmode);
++  rtx str_reg = gen_reg_rtx (V16QImode);
++  rtx str_addr_base_reg = gen_reg_rtx (Pmode);
++  rtx str_idx_reg = gen_reg_rtx (Pmode);
++  rtx result_reg = gen_reg_rtx (V16QImode);
++  rtx is_aligned_label = gen_label_rtx ();
++  rtx into_loop_label = NULL_RTX;
++  rtx loop_start_label = gen_label_rtx ();
++  rtx temp;
++  rtx len = gen_reg_rtx (QImode);
++  rtx cond;
++
++  s390_load_address (str_addr_base_reg, XEXP (string, 0));
++  emit_move_insn (str_idx_reg, const0_rtx);
++
++  if (INTVAL (alignment) < 16)
++    {
++      /* Check whether the address happens to be aligned properly so
++	 jump directly to the aligned loop.  */
++      emit_cmp_and_jump_insns (gen_rtx_AND (Pmode,
++					    str_addr_base_reg, GEN_INT (15)),
++			       const0_rtx, EQ, NULL_RTX,
++			       Pmode, 1, is_aligned_label);
++
++      temp = gen_reg_rtx (Pmode);
++      temp = expand_binop (Pmode, and_optab, str_addr_base_reg,
++			   GEN_INT (15), temp, 1, OPTAB_DIRECT);
++      gcc_assert (REG_P (temp));
++      highest_index_to_load_reg =
++	expand_binop (Pmode, sub_optab, GEN_INT (15), temp,
++		      highest_index_to_load_reg, 1, OPTAB_DIRECT);
++      gcc_assert (REG_P (highest_index_to_load_reg));
++      emit_insn (gen_vllv16qi (str_reg,
++		   convert_to_mode (SImode, highest_index_to_load_reg, 1),
++		   gen_rtx_MEM (BLKmode, str_addr_base_reg)));
++
++      into_loop_label = gen_label_rtx ();
++      s390_emit_jump (into_loop_label, NULL_RTX);
++      emit_barrier ();
++    }
++
++  emit_label (is_aligned_label);
++  LABEL_NUSES (is_aligned_label) = INTVAL (alignment) < 16 ? 2 : 1;
++
++  /* Reaching this point we are only performing 16 bytes aligned
++     loads.  */
++  emit_move_insn (highest_index_to_load_reg, GEN_INT (15));
++
++  emit_label (loop_start_label);
++  LABEL_NUSES (loop_start_label) = 1;
++
++  /* Load 16 bytes of the string into VR.  */
++  emit_move_insn (str_reg,
++		  gen_rtx_MEM (V16QImode,
++			       gen_rtx_PLUS (Pmode, str_idx_reg,
++					     str_addr_base_reg)));
++  if (into_loop_label != NULL_RTX)
++    {
++      emit_label (into_loop_label);
++      LABEL_NUSES (into_loop_label) = 1;
++    }
++
++  /* Increment string index by 16 bytes.  */
++  expand_binop (Pmode, add_optab, str_idx_reg, GEN_INT (16),
++		str_idx_reg, 1, OPTAB_DIRECT);
++
++  emit_insn (gen_vec_vfenesv16qi (result_reg, str_reg, str_reg,
++				  GEN_INT (VSTRING_FLAG_ZS | VSTRING_FLAG_CS)));
++
++  add_reg_note (s390_emit_ccraw_jump (8, NE, loop_start_label),
++		REG_BR_PROB, GEN_INT (very_likely));
++  emit_insn (gen_vec_extractv16qi (len, result_reg, GEN_INT (7)));
++
++  /* If the string pointer wasn't aligned we have loaded less then 16
++     bytes and the remaining bytes got filled with zeros (by vll).
++     Now we have to check whether the resulting index lies within the
++     bytes actually part of the string.  */
++
++  cond = s390_emit_compare (GT, convert_to_mode (Pmode, len, 1),
++			    highest_index_to_load_reg);
++  s390_load_address (highest_index_to_load_reg,
++		     gen_rtx_PLUS (Pmode, highest_index_to_load_reg,
++				   const1_rtx));
++  if (TARGET_64BIT)
++    emit_insn (gen_movdicc (str_idx_reg, cond,
++			    highest_index_to_load_reg, str_idx_reg));
++  else
++    emit_insn (gen_movsicc (str_idx_reg, cond,
++			    highest_index_to_load_reg, str_idx_reg));
++
++  add_reg_note (s390_emit_jump (is_aligned_label, cond), REG_BR_PROB,
++		GEN_INT (very_unlikely));
++
++  expand_binop (Pmode, add_optab, str_idx_reg,
++		GEN_INT (-16), str_idx_reg, 1, OPTAB_DIRECT);
++  /* FIXME: len is already zero extended - so avoid the llgcr emitted
++     here.  */
++  temp = expand_binop (Pmode, add_optab, str_idx_reg,
++		       convert_to_mode (Pmode, len, 1),
++		       target, 1, OPTAB_DIRECT);
++  if (temp != target)
++    emit_move_insn (target, temp);
++}
++
++void
++s390_expand_vec_movstr (rtx result, rtx dst, rtx src)
++{
++  int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
++  rtx temp = gen_reg_rtx (Pmode);
++  rtx src_addr = XEXP (src, 0);
++  rtx dst_addr = XEXP (dst, 0);
++  rtx src_addr_reg = gen_reg_rtx (Pmode);
++  rtx dst_addr_reg = gen_reg_rtx (Pmode);
++  rtx offset = gen_reg_rtx (Pmode);
++  rtx vsrc = gen_reg_rtx (V16QImode);
++  rtx vpos = gen_reg_rtx (V16QImode);
++  rtx loadlen = gen_reg_rtx (SImode);
++  rtx gpos_qi = gen_reg_rtx(QImode);
++  rtx gpos = gen_reg_rtx (SImode);
++  rtx done_label = gen_label_rtx ();
++  rtx loop_label = gen_label_rtx ();
++  rtx exit_label = gen_label_rtx ();
++  rtx full_label = gen_label_rtx ();
++
++  /* Perform a quick check for string ending on the first up to 16
++     bytes and exit early if successful.  */
++
++  emit_insn (gen_vlbb (vsrc, src, GEN_INT (6)));
++  emit_insn (gen_lcbb (loadlen, src_addr, GEN_INT (6)));
++  emit_insn (gen_vfenezv16qi (vpos, vsrc, vsrc));
++  emit_insn (gen_vec_extractv16qi (gpos_qi, vpos, GEN_INT (7)));
++  emit_move_insn (gpos, gen_rtx_SUBREG (SImode, gpos_qi, 0));
++  /* gpos is the byte index if a zero was found and 16 otherwise.
++     So if it is lower than the loaded bytes we have a hit.  */
++  emit_cmp_and_jump_insns (gpos, loadlen, GE, NULL_RTX, SImode, 1,
++			   full_label);
++  emit_insn (gen_vstlv16qi (vsrc, gpos, dst));
++
++  force_expand_binop (Pmode, add_optab, dst_addr, gpos, result,
++		      1, OPTAB_DIRECT);
++  emit_jump (exit_label);
++  emit_barrier ();
++
++  emit_label (full_label);
++  LABEL_NUSES (full_label) = 1;
++
++  /* Calculate `offset' so that src + offset points to the last byte
++     before 16 byte alignment.  */
++
++  /* temp = src_addr & 0xf */
++  force_expand_binop (Pmode, and_optab, src_addr, GEN_INT (15), temp,
++		      1, OPTAB_DIRECT);
++
++  /* offset = 0xf - temp */
++  emit_move_insn (offset, GEN_INT (15));
++  force_expand_binop (Pmode, sub_optab, offset, temp, offset,
++		      1, OPTAB_DIRECT);
++
++  /* Store `offset' bytes in the dstination string.  The quick check
++     has loaded at least `offset' bytes into vsrc.  */
++
++  emit_insn (gen_vstlv16qi (vsrc, gen_lowpart (SImode, offset), dst));
++
++  /* Advance to the next byte to be loaded.  */
++  force_expand_binop (Pmode, add_optab, offset, const1_rtx, offset,
++		      1, OPTAB_DIRECT);
++
++  /* Make sure the addresses are single regs which can be used as a
++     base.  */
++  emit_move_insn (src_addr_reg, src_addr);
++  emit_move_insn (dst_addr_reg, dst_addr);
++
++  /* MAIN LOOP */
++
++  emit_label (loop_label);
++  LABEL_NUSES (loop_label) = 1;
++
++  emit_move_insn (vsrc,
++		  gen_rtx_MEM (V16QImode,
++			       gen_rtx_PLUS (Pmode, src_addr_reg, offset)));
++
++  emit_insn (gen_vec_vfenesv16qi (vpos, vsrc, vsrc,
++				  GEN_INT (VSTRING_FLAG_ZS | VSTRING_FLAG_CS)));
++  add_reg_note (s390_emit_ccraw_jump (8, EQ, done_label),
++		REG_BR_PROB, GEN_INT (very_unlikely));
++
++  emit_move_insn (gen_rtx_MEM (V16QImode,
++			       gen_rtx_PLUS (Pmode, dst_addr_reg, offset)),
++		  vsrc);
++  /* offset += 16 */
++  force_expand_binop (Pmode, add_optab, offset, GEN_INT (16),
++		      offset,  1, OPTAB_DIRECT);
++
++  emit_jump (loop_label);
++  emit_barrier ();
++
++  /* REGULAR EXIT */
++
++  /* We are done.  Add the offset of the zero character to the dst_addr
++     pointer to get the result.  */
++
++  emit_label (done_label);
++  LABEL_NUSES (done_label) = 1;
++
++  force_expand_binop (Pmode, add_optab, dst_addr_reg, offset, dst_addr_reg,
++		      1, OPTAB_DIRECT);
++
++  emit_insn (gen_vec_extractv16qi (gpos_qi, vpos, GEN_INT (7)));
++  emit_move_insn (gpos, gen_rtx_SUBREG (SImode, gpos_qi, 0));
++
++  emit_insn (gen_vstlv16qi (vsrc, gpos, gen_rtx_MEM (BLKmode, dst_addr_reg)));
++
++  force_expand_binop (Pmode, add_optab, dst_addr_reg, gpos, result,
++		      1, OPTAB_DIRECT);
++
++  /* EARLY EXIT */
++
++  emit_label (exit_label);
++  LABEL_NUSES (exit_label) = 1;
++}
++
+ 
+ /* Expand conditional increment or decrement using alc/slb instructions.
+    Should generate code setting DST to either SRC or SRC + INCREMENT,
+@@ -4964,6 +6244,304 @@ s390_expand_mask_and_shift (rtx val, enu
+ 			      NULL_RTX, 1, OPTAB_DIRECT);
+ }
+ 
++/* Generate a vector comparison COND of CMP_OP1 and CMP_OP2 and store
++   the result in TARGET.  */
++
++void
++s390_expand_vec_compare (rtx target, enum rtx_code cond,
++			 rtx cmp_op1, rtx cmp_op2)
++{
++  enum machine_mode mode = GET_MODE (target);
++  bool neg_p = false, swap_p = false;
++  rtx tmp;
++
++  if (GET_MODE (cmp_op1) == V2DFmode)
++    {
++      switch (cond)
++	{
++	  /* NE a != b -> !(a == b) */
++	case NE:   cond = EQ; neg_p = true;                break;
++	  /* UNGT a u> b -> !(b >= a) */
++	case UNGT: cond = GE; neg_p = true; swap_p = true; break;
++	  /* UNGE a u>= b -> !(b > a) */
++	case UNGE: cond = GT; neg_p = true; swap_p = true; break;
++	  /* LE: a <= b -> b >= a */
++	case LE:   cond = GE;               swap_p = true; break;
++	  /* UNLE: a u<= b -> !(a > b) */
++	case UNLE: cond = GT; neg_p = true;                break;
++	  /* LT: a < b -> b > a */
++	case LT:   cond = GT;               swap_p = true; break;
++	  /* UNLT: a u< b -> !(a >= b) */
++	case UNLT: cond = GE; neg_p = true;                break;
++	case UNEQ:
++	  emit_insn (gen_vec_cmpuneqv2df (target, cmp_op1, cmp_op2));
++	  return;
++	case LTGT:
++	  emit_insn (gen_vec_cmpltgtv2df (target, cmp_op1, cmp_op2));
++	  return;
++	case ORDERED:
++	  emit_insn (gen_vec_orderedv2df (target, cmp_op1, cmp_op2));
++	  return;
++	case UNORDERED:
++	  emit_insn (gen_vec_unorderedv2df (target, cmp_op1, cmp_op2));
++	  return;
++	default: break;
++	}
++    }
++  else
++    {
++      switch (cond)
++	{
++	  /* NE: a != b -> !(a == b) */
++	case NE:  cond = EQ;  neg_p = true;                break;
++	  /* GE: a >= b -> !(b > a) */
++	case GE:  cond = GT;  neg_p = true; swap_p = true; break;
++	  /* GEU: a >= b -> !(b > a) */
++	case GEU: cond = GTU; neg_p = true; swap_p = true; break;
++	  /* LE: a <= b -> !(a > b) */
++	case LE:  cond = GT;  neg_p = true;                break;
++	  /* LEU: a <= b -> !(a > b) */
++	case LEU: cond = GTU; neg_p = true;                break;
++	  /* LT: a < b -> b > a */
++	case LT:  cond = GT;                swap_p = true; break;
++	  /* LTU: a < b -> b > a */
++	case LTU: cond = GTU;               swap_p = true; break;
++	default: break;
++	}
++    }
++
++  if (swap_p)
++    {
++      tmp = cmp_op1; cmp_op1 = cmp_op2; cmp_op2 = tmp;
++    }
++
++  emit_insn (gen_rtx_SET (VOIDmode,
++			  target, gen_rtx_fmt_ee (cond,
++						  mode,
++						  cmp_op1, cmp_op2)));
++  if (neg_p)
++    emit_insn (gen_rtx_SET (VOIDmode, target, gen_rtx_NOT (mode, target)));
++}
++
++/* Expand the comparison CODE of CMP1 and CMP2 and copy 1 or 0 into
++   TARGET if either all (ALL_P is true) or any (ALL_P is false) of the
++   elements in CMP1 and CMP2 fulfill the comparison.  */
++void
++s390_expand_vec_compare_cc (rtx target, enum rtx_code code,
++			    rtx cmp1, rtx cmp2, bool all_p)
++{
++  enum rtx_code new_code = code;
++  enum machine_mode cmp_mode, full_cmp_mode, scratch_mode;
++  rtx tmp_reg = gen_reg_rtx (SImode);
++  bool swap_p = false;
++
++  if (GET_MODE_CLASS (GET_MODE (cmp1)) == MODE_VECTOR_INT)
++    {
++      switch (code)
++	{
++	case EQ:  cmp_mode = CCVEQmode; break;
++	case NE:  cmp_mode = CCVEQmode; break;
++	case GT:  cmp_mode = CCVHmode;  break;
++	case GE:  cmp_mode = CCVHmode;  new_code = LE; swap_p = true; break;
++	case LT:  cmp_mode = CCVHmode;  new_code = GT; swap_p = true; break;
++	case LE:  cmp_mode = CCVHmode;  new_code = LE; break;
++	case GTU: cmp_mode = CCVHUmode; break;
++	case GEU: cmp_mode = CCVHUmode; new_code = LEU; swap_p = true; break;
++	case LTU: cmp_mode = CCVHUmode; new_code = GTU; swap_p = true; break;
++	case LEU: cmp_mode = CCVHUmode; new_code = LEU; break;
++	default: gcc_unreachable ();
++	}
++      scratch_mode = GET_MODE (cmp1);
++    }
++  else if (GET_MODE (cmp1) == V2DFmode)
++    {
++      switch (code)
++	{
++	case EQ:   cmp_mode = CCVEQmode;  break;
++	case NE:   cmp_mode = CCVEQmode;  break;
++	case GT:   cmp_mode = CCVFHmode;  break;
++	case GE:   cmp_mode = CCVFHEmode; break;
++	case UNLE: cmp_mode = CCVFHmode;  break;
++	case UNLT: cmp_mode = CCVFHEmode; break;
++	case LT:   cmp_mode = CCVFHmode;  new_code = GT; swap_p = true; break;
++	case LE:   cmp_mode = CCVFHEmode; new_code = GE; swap_p = true; break;
++	default: gcc_unreachable ();
++	}
++      scratch_mode = V2DImode;
++    }
++  else
++    gcc_unreachable ();
++
++  if (!all_p)
++    switch (cmp_mode)
++      {
++      case CCVEQmode:  full_cmp_mode = CCVEQANYmode;  break;
++      case CCVHmode:   full_cmp_mode = CCVHANYmode;   break;
++      case CCVHUmode:  full_cmp_mode = CCVHUANYmode;  break;
++      case CCVFHmode:  full_cmp_mode = CCVFHANYmode;  break;
++      case CCVFHEmode: full_cmp_mode = CCVFHEANYmode; break;
++      default: gcc_unreachable ();
++      }
++  else
++    /* The modes without ANY match the ALL modes.  */
++    full_cmp_mode = cmp_mode;
++
++  if (swap_p)
++    {
++      rtx tmp = cmp2;
++      cmp2 = cmp1;
++      cmp1 = tmp;
++    }
++
++  emit_insn (gen_rtx_PARALLEL (VOIDmode,
++	       gen_rtvec (2, gen_rtx_SET (
++			       VOIDmode,
++			       gen_rtx_REG (cmp_mode, CC_REGNUM),
++			       gen_rtx_COMPARE (cmp_mode, cmp1, cmp2)),
++			  gen_rtx_CLOBBER (VOIDmode,
++					   gen_rtx_SCRATCH (scratch_mode)))));
++  emit_move_insn (target, const0_rtx);
++  emit_move_insn (tmp_reg, const1_rtx);
++
++  emit_move_insn (target,
++		  gen_rtx_IF_THEN_ELSE (SImode,
++		    gen_rtx_fmt_ee (new_code, VOIDmode,
++				    gen_rtx_REG (full_cmp_mode, CC_REGNUM),
++				    const0_rtx),
++		      target, tmp_reg));
++}
++
++/* Generate a vector comparison expression loading either elements of
++   THEN or ELS into TARGET depending on the comparison COND of CMP_OP1
++   and CMP_OP2.  */
++
++void
++s390_expand_vcond (rtx target, rtx then, rtx els,
++		   enum rtx_code cond, rtx cmp_op1, rtx cmp_op2)
++{
++  rtx tmp;
++  enum machine_mode result_mode;
++  rtx result_target;
++
++  /* We always use an integral type vector to hold the comparison
++     result.  */
++  result_mode = GET_MODE (cmp_op1) == V2DFmode ? V2DImode : GET_MODE (cmp_op1);
++  result_target = gen_reg_rtx (result_mode);
++
++  /* Alternatively this could be done by reload by lowering the cmp*
++     predicates.  But it appears to be better for scheduling etc. to
++     have that in early.  */
++  if (!REG_P (cmp_op1))
++    cmp_op1 = force_reg (GET_MODE (cmp_op1), cmp_op1);
++
++  if (!REG_P (cmp_op2))
++    cmp_op2 = force_reg (GET_MODE (cmp_op2), cmp_op2);
++
++  s390_expand_vec_compare (result_target, cond,
++			   cmp_op1, cmp_op2);
++
++  /* If the results are supposed to be either -1 or 0 we are done
++     since this is what our compare instructions generate anyway.  */
++  if (all_ones_operand (then, GET_MODE (then))
++      && const0_operand (els, GET_MODE (els)))
++    {
++      emit_move_insn (target, gen_rtx_SUBREG (GET_MODE (target),
++					      result_target, 0));
++      return;
++    }
++
++  /* Otherwise we will do a vsel afterwards.  */
++  /* This gets triggered e.g.
++     with gcc.c-torture/compile/pr53410-1.c */
++  if (!REG_P (then))
++    then = force_reg (GET_MODE (target), then);
++
++  if (!REG_P (els))
++    els = force_reg (GET_MODE (target), els);
++
++  tmp = gen_rtx_fmt_ee (EQ, VOIDmode,
++			result_target,
++			CONST0_RTX (result_mode));
++
++  /* We compared the result against zero above so we have to swap then
++     and els here.  */
++  tmp = gen_rtx_IF_THEN_ELSE (GET_MODE (target), tmp, els, then);
++
++  gcc_assert (GET_MODE (target) == GET_MODE (then));
++  emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
++}
++
++/* Emit the RTX necessary to initialize the vector TARGET with values
++   in VALS.  */
++void
++s390_expand_vec_init (rtx target, rtx vals)
++{
++  enum machine_mode mode = GET_MODE (target);
++  enum machine_mode inner_mode = GET_MODE_INNER (mode);
++  int n_elts = GET_MODE_NUNITS (mode);
++  bool all_same = true, all_regs = true, all_const_int = true;
++  rtx x;
++  int i;
++
++  for (i = 0; i < n_elts; ++i)
++    {
++      x = XVECEXP (vals, 0, i);
++
++      if (!CONST_INT_P (x))
++	all_const_int = false;
++
++      if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
++	all_same = false;
++
++      if (!REG_P (x))
++	all_regs = false;
++    }
++
++  /* Use vector gen mask or vector gen byte mask if possible.  */
++  if (all_same && all_const_int
++      && (XVECEXP (vals, 0, 0) == const0_rtx
++	  || s390_contiguous_bitmask_vector_p (XVECEXP (vals, 0, 0),
++					       NULL, NULL)
++	  || s390_bytemask_vector_p (XVECEXP (vals, 0, 0), NULL)))
++    {
++      emit_insn (gen_rtx_SET (VOIDmode, target,
++			      gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0))));
++      return;
++    }
++
++  if (all_same)
++    {
++      emit_insn (gen_rtx_SET (VOIDmode, target,
++			      gen_rtx_VEC_DUPLICATE (mode,
++						     XVECEXP (vals, 0, 0))));
++      return;
++    }
++
++  if (all_regs && REG_P (target) && n_elts == 2 && inner_mode == DImode)
++    {
++      /* Use vector load pair.  */
++      emit_insn (gen_rtx_SET (VOIDmode, target,
++			      gen_rtx_VEC_CONCAT (mode,
++						  XVECEXP (vals, 0, 0),
++						  XVECEXP (vals, 0, 1))));
++      return;
++    }
++
++  /* We are about to set the vector elements one by one.  Zero out the
++     full register first in order to help the data flow framework to
++     detect it as full VR set.  */
++  emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
++
++  /* Unfortunately the vec_init expander is not allowed to fail.  So
++     we have to implement the fallback ourselves.  */
++  for (i = 0; i < n_elts; i++)
++    emit_insn (gen_rtx_SET (VOIDmode, target,
++			    gen_rtx_UNSPEC (mode,
++					    gen_rtvec (3, XVECEXP (vals, 0, i),
++						       GEN_INT (i), target),
++					    UNSPEC_VEC_SET)));
++}
++
+ /* Structure to hold the initial parameters for a compare_and_swap operation
+    in HImode and QImode.  */
+ 
+@@ -5259,12 +6837,37 @@ s390_output_dwarf_dtprel (FILE *file, in
+   fputs ("@DTPOFF", file);
+ }
+ 
++/* Return the proper mode for REGNO being represented in the dwarf
++   unwind table.  */
++enum machine_mode
++s390_dwarf_frame_reg_mode (int regno)
++{
++  enum machine_mode save_mode = default_dwarf_frame_reg_mode (regno);
++
++  /* The rightmost 64 bits of vector registers are call-clobbered.  */
++  if (GET_MODE_SIZE (save_mode) > 8)
++    save_mode = DImode;
++
++  return save_mode;
++}
++
+ #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
+ /* Implement TARGET_MANGLE_TYPE.  */
+ 
+ static const char *
+ s390_mangle_type (const_tree type)
+ {
++  type = TYPE_MAIN_VARIANT (type);
++
++  if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
++      && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
++    return NULL;
++
++  if (type == s390_builtin_types[BT_BV16QI]) return "U6__boolc";
++  if (type == s390_builtin_types[BT_BV8HI]) return "U6__bools";
++  if (type == s390_builtin_types[BT_BV4SI]) return "U6__booli";
++  if (type == s390_builtin_types[BT_BV2DI]) return "U6__booll";
++
+   if (TYPE_MAIN_VARIANT (type) == long_double_type_node
+       && TARGET_LONG_DOUBLE_128)
+     return "g";
+@@ -5627,24 +7230,26 @@ print_operand_address (FILE *file, rtx a
+     'J': print tls_load/tls_gdcall/tls_ldcall suffix
+     'M': print the second word of a TImode operand.
+     'N': print the second word of a DImode operand.
+-    'O': print only the displacement of a memory reference.
+-    'R': print only the base register of a memory reference.
++    'O': print only the displacement of a memory reference or address.
++    'R': print only the base register of a memory reference or address.
+     'S': print S-type memory reference (base+displacement).
+     'Y': print shift count operand.
+ 
+     'b': print integer X as if it's an unsigned byte.
+     'c': print integer X as if it's an signed byte.
+-    'e': "end" of DImode contiguous bitmask X.
+-    'f': "end" of SImode contiguous bitmask X.
++    'e': "end" contiguous bitmask X in either DImode or vector inner mode.
++    'f': "end" contiguous bitmask X in SImode.
+     'h': print integer X as if it's a signed halfword.
+     'i': print the first nonzero HImode part of X.
+     'j': print the first HImode part unequal to -1 of X.
+     'k': print the first nonzero SImode part of X.
+     'm': print the first SImode part unequal to -1 of X.
+     'o': print integer X as if it's an unsigned 32bit word.
+-    's': "start" of DImode contiguous bitmask X.
+-    't': "start" of SImode contiguous bitmask X.
++    's': "start" of contiguous bitmask X in either DImode or vector inner mode.
++    't': CONST_INT: "start" of contiguous bitmask X in SImode.
++         CONST_VECTOR: Generate a bitmask for vgbm instruction.
+     'x': print integer X as if it's an unsigned halfword.
++    'v': print register number as vector register (v1 instead of f1).
+ */
+ 
+ void
+@@ -5701,14 +7306,7 @@ print_operand (FILE *file, rtx x, int co
+         struct s390_address ad;
+ 	int ret;
+ 
+-	if (!MEM_P (x))
+-	  {
+-	    output_operand_lossage ("memory reference expected for "
+-				    "'O' output modifier");
+-	    return;
+-	  }
+-
+-	ret = s390_decompose_address (XEXP (x, 0), &ad);
++	ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad);
+ 
+ 	if (!ret
+ 	    || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
+@@ -5730,14 +7328,7 @@ print_operand (FILE *file, rtx x, int co
+         struct s390_address ad;
+ 	int ret;
+ 
+-	if (!MEM_P (x))
+-	  {
+-	    output_operand_lossage ("memory reference expected for "
+-				    "'R' output modifier");
+-	    return;
+-	  }
+-
+-	ret = s390_decompose_address (XEXP (x, 0), &ad);
++	ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad);
+ 
+ 	if (!ret
+ 	    || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
+@@ -5815,7 +7406,17 @@ print_operand (FILE *file, rtx x, int co
+   switch (GET_CODE (x))
+     {
+     case REG:
+-      fprintf (file, "%s", reg_names[REGNO (x)]);
++      /* Print FP regs as fx instead of vx when they are accessed
++	 through non-vector mode.  */
++      if (code == 'v'
++	  || VECTOR_NOFP_REG_P (x)
++	  || (FP_REG_P (x) && VECTOR_MODE_P (GET_MODE (x)))
++	  || (VECTOR_REG_P (x)
++	      && (GET_MODE_SIZE (GET_MODE (x)) /
++		  s390_class_max_nregs (FP_REGS, GET_MODE (x))) > 8))
++	fprintf (file, "%%v%s", reg_names[REGNO (x)] + 2);
++      else
++	fprintf (file, "%s", reg_names[REGNO (x)]);
+       break;
+ 
+     case MEM:
+@@ -5902,6 +7503,44 @@ print_operand (FILE *file, rtx x, int co
+ 				    code);
+ 	}
+       break;
++    case CONST_VECTOR:
++      switch (code)
++	{
++	case 'h':
++	  gcc_assert (s390_const_vec_duplicate_p (x));
++	  fprintf (file, HOST_WIDE_INT_PRINT_DEC,
++		   ((INTVAL (XVECEXP (x, 0, 0)) & 0xffff) ^ 0x8000) - 0x8000);
++	  break;
++	case 'e':
++	case 's':
++	  {
++	    int start, stop, inner_len;
++	    bool ok;
++
++	    inner_len = GET_MODE_UNIT_BITSIZE (GET_MODE (x));
++	    ok = s390_contiguous_bitmask_vector_p (x, &start, &stop);
++	    gcc_assert (ok);
++	    if (code == 's' || code == 't')
++	      ival = inner_len - stop - 1;
++	    else
++	      ival = inner_len - start - 1;
++	    fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
++	  }
++	  break;
++	case 't':
++	  {
++	    unsigned mask;
++	    bool ok = s390_bytemask_vector_p (x, &mask);
++	    gcc_assert (ok);
++	    fprintf (file, "%u", mask);
++	  }
++	  break;
++
++	default:
++	  output_operand_lossage ("invalid constant vector for output "
++				  "modifier '%c'", code);
++	}
++      break;
+ 
+     default:
+       if (code == 0)
+@@ -6051,7 +7690,8 @@ s390_adjust_priority (rtx insn ATTRIBUTE
+       && s390_tune != PROCESSOR_2094_Z9_109
+       && s390_tune != PROCESSOR_2097_Z10
+       && s390_tune != PROCESSOR_2817_Z196
+-      && s390_tune != PROCESSOR_2827_ZEC12)
++      && s390_tune != PROCESSOR_2827_ZEC12
++      && s390_tune != PROCESSOR_2964_Z13)
+     return priority;
+ 
+   switch (s390_safe_attr_type (insn))
+@@ -6459,14 +8099,20 @@ replace_ltrel_base (rtx *x)
+ /* We keep a list of constants which we have to add to internal
+    constant tables in the middle of large functions.  */
+ 
+-#define NR_C_MODES 11
++#define NR_C_MODES 32
+ enum machine_mode constant_modes[NR_C_MODES] =
+ {
+   TFmode, TImode, TDmode,
++  V16QImode, V8HImode, V4SImode, V2DImode, V1TImode,
++  V4SFmode, V2DFmode, V1TFmode,
+   DFmode, DImode, DDmode,
++  V8QImode, V4HImode, V2SImode, V1DImode, V2SFmode, V1DFmode,
+   SFmode, SImode, SDmode,
++  V4QImode, V2HImode, V1SImode,  V1SFmode,
+   HImode,
+-  QImode
++  V2QImode, V1HImode,
++  QImode,
++  V1QImode
+ };
+ 
+ struct constant
+@@ -7490,6 +9136,23 @@ s390_output_pool_entry (rtx exp, enum ma
+       mark_symbol_refs_as_used (exp);
+       break;
+ 
++    case MODE_VECTOR_INT:
++    case MODE_VECTOR_FLOAT:
++      {
++	int i;
++	enum machine_mode inner_mode;
++	gcc_assert (GET_CODE (exp) == CONST_VECTOR);
++
++	inner_mode = GET_MODE_INNER (GET_MODE (exp));
++	for (i = 0; i < XVECLEN (exp, 0); i++)
++	  s390_output_pool_entry (XVECEXP (exp, 0, i),
++				  inner_mode,
++				  i == 0
++				  ? align
++				  : GET_MODE_BITSIZE (inner_mode));
++      }
++      break;
++
+     default:
+       gcc_unreachable ();
+     }
+@@ -8205,9 +9868,25 @@ s390_update_frame_layout (void)
+ bool
+ s390_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
+ {
++  if (!TARGET_VX && VECTOR_NOFP_REGNO_P (regno))
++    return false;
++
+   switch (REGNO_REG_CLASS (regno))
+     {
++    case VEC_REGS:
++      return ((GET_MODE_CLASS (mode) == MODE_INT
++	       && s390_class_max_nregs (VEC_REGS, mode) == 1)
++	      || mode == DFmode
++	      || s390_vector_mode_supported_p (mode));
++      break;
+     case FP_REGS:
++      if (TARGET_VX
++	  && ((GET_MODE_CLASS (mode) == MODE_INT
++	       && s390_class_max_nregs (FP_REGS, mode) == 1)
++	      || mode == DFmode
++	      || s390_vector_mode_supported_p (mode)))
++	return true;
++
+       if (REGNO_PAIR_OK (regno, mode))
+ 	{
+ 	  if (mode == SImode || mode == DImode)
+@@ -8269,19 +9948,86 @@ s390_hard_regno_rename_ok (unsigned int
+ int
+ s390_class_max_nregs (enum reg_class rclass, enum machine_mode mode)
+ {
++  int reg_size;
++  bool reg_pair_required_p = false;
++
+   switch (rclass)
+     {
+     case FP_REGS:
++    case VEC_REGS:
++      reg_size = TARGET_VX ? 16 : 8;
++
++      /* TF and TD modes would fit into a VR but we put them into a
++	 register pair since we do not have 128bit FP instructions on
++	 full VRs.  */
++      if (TARGET_VX
++	  && SCALAR_FLOAT_MODE_P (mode)
++	  && GET_MODE_SIZE (mode) >= 16)
++	reg_pair_required_p = true;
++
++      /* Even if complex types would fit into a single FPR/VR we force
++	 them into a register pair to deal with the parts more easily.
++	 (FIXME: What about complex ints?)  */
+       if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
+-	return 2 * ((GET_MODE_SIZE (mode) / 2 + 8 - 1) / 8);
+-      else
+-	return (GET_MODE_SIZE (mode) + 8 - 1) / 8;
++	reg_pair_required_p = true;
++      break;
+     case ACCESS_REGS:
+-      return (GET_MODE_SIZE (mode) + 4 - 1) / 4;
++      reg_size = 4;
++      break;
+     default:
++      reg_size = UNITS_PER_WORD;
+       break;
+     }
+-  return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
++
++  if (reg_pair_required_p)
++    return 2 * ((GET_MODE_SIZE (mode) / 2 + reg_size - 1) / reg_size);
++
++  return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
++}
++
++/* Return TRUE if changing mode from FROM to TO should not be allowed
++   for register class CLASS.  */
++
++int
++s390_cannot_change_mode_class (enum machine_mode from_mode,
++			       enum machine_mode to_mode,
++			       enum reg_class rclass)
++{
++  enum machine_mode small_mode;
++  enum machine_mode big_mode;
++
++  if (GET_MODE_SIZE (from_mode) == GET_MODE_SIZE (to_mode))
++    return 0;
++
++  if (GET_MODE_SIZE (from_mode) < GET_MODE_SIZE (to_mode))
++    {
++      small_mode = from_mode;
++      big_mode = to_mode;
++    }
++  else
++    {
++      small_mode = to_mode;
++      big_mode = from_mode;
++    }
++
++  /* Values residing in VRs are little-endian style.  All modes are
++     placed left-aligned in an VR.  This means that we cannot allow
++     switching between modes with differing sizes.  Also if the vector
++     facility is available we still place TFmode values in VR register
++     pairs, since the only instructions we have operating on TFmodes
++     only deal with register pairs.  Therefore we have to allow DFmode
++     subregs of TFmodes to enable the TFmode splitters.  */
++  if (reg_classes_intersect_p (VEC_REGS, rclass)
++      && (GET_MODE_SIZE (small_mode) < 8
++	  || s390_class_max_nregs (VEC_REGS, big_mode) == 1))
++    return 1;
++
++  /* Likewise for access registers, since they have only half the
++     word size on 64-bit.  */
++  if (reg_classes_intersect_p (ACCESS_REGS, rclass))
++    return 1;
++
++  return 0;
+ }
+ 
+ /* Return true if register FROM can be eliminated via register TO.  */
+@@ -9112,6 +10858,23 @@ s390_emit_epilogue (bool sibcall)
+ }
+ 
+ 
++/* The VX ABI differs for vararg functions.  Therefore we need the
++   prototype of the callee to be available when passing vector type
++   values.  */
++static const char *
++s390_invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
++{
++  return ((TARGET_VX_ABI
++	   && typelist == 0
++	   && TREE_CODE (TREE_TYPE (val)) == VECTOR_TYPE
++	   && (funcdecl == NULL_TREE
++	       || (TREE_CODE (funcdecl) == FUNCTION_DECL
++		   && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
++	  ? N_("Vector argument passed to unprototyped function")
++	  : NULL);
++}
++
++
+ /* Return the size in bytes of a function argument of
+    type TYPE and/or mode MODE.  At least one of TYPE or
+    MODE must be specified.  */
+@@ -9126,8 +10889,57 @@ s390_function_arg_size (enum machine_mod
+   if (mode != BLKmode)
+     return GET_MODE_SIZE (mode);
+ 
+-  /* If we have neither type nor mode, abort */
+-  gcc_unreachable ();
++  /* If we have neither type nor mode, abort */
++  gcc_unreachable ();
++}
++
++/* Return true if a function argument of type TYPE and mode MODE
++   is to be passed in a vector register, if available.  */
++
++bool
++s390_function_arg_vector (enum machine_mode mode, const_tree type)
++{
++  if (!TARGET_VX_ABI)
++    return false;
++
++  if (s390_function_arg_size (mode, type) > 16)
++    return false;
++
++  /* No type info available for some library calls ...  */
++  if (!type)
++    return VECTOR_MODE_P (mode);
++
++  /* The ABI says that record types with a single member are treated
++     just like that member would be.  */
++  while (TREE_CODE (type) == RECORD_TYPE)
++    {
++      tree field, single = NULL_TREE;
++
++      for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
++	{
++	  if (TREE_CODE (field) != FIELD_DECL)
++	    continue;
++
++	  if (single == NULL_TREE)
++	    single = TREE_TYPE (field);
++	  else
++	    return false;
++	}
++
++      if (single == NULL_TREE)
++	return false;
++      else
++	{
++	  /* If the field declaration adds extra byte due to
++	     e.g. padding this is not accepted as vector type.  */
++	  if (int_size_in_bytes (single) <= 0
++	      || int_size_in_bytes (single) != int_size_in_bytes (type))
++	    return false;
++	  type = single;
++	}
++    }
++
++  return TREE_CODE (type) == VECTOR_TYPE;
+ }
+ 
+ /* Return true if a function argument of type TYPE and mode MODE
+@@ -9136,8 +10948,7 @@ s390_function_arg_size (enum machine_mod
+ static bool
+ s390_function_arg_float (enum machine_mode mode, const_tree type)
+ {
+-  int size = s390_function_arg_size (mode, type);
+-  if (size > 8)
++  if (s390_function_arg_size (mode, type) > 8)
+     return false;
+ 
+   /* Soft-float changes the ABI: no floating-point registers are used.  */
+@@ -9220,20 +11031,24 @@ s390_pass_by_reference (cumulative_args_
+ 			bool named ATTRIBUTE_UNUSED)
+ {
+   int size = s390_function_arg_size (mode, type);
++
++  if (s390_function_arg_vector (mode, type))
++    return false;
++
+   if (size > 8)
+     return true;
+ 
+   if (type)
+     {
+       if (AGGREGATE_TYPE_P (type) && exact_log2 (size) < 0)
+-        return 1;
++        return true;
+ 
+       if (TREE_CODE (type) == COMPLEX_TYPE
+ 	  || TREE_CODE (type) == VECTOR_TYPE)
+-        return 1;
++	return true;
+     }
+ 
+-  return 0;
++  return false;
+ }
+ 
+ /* Update the data in CUM to advance over an argument of mode MODE and
+@@ -9244,11 +11059,21 @@ s390_pass_by_reference (cumulative_args_
+ 
+ static void
+ s390_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
+-			   const_tree type, bool named ATTRIBUTE_UNUSED)
++			   const_tree type, bool named)
+ {
+   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+ 
+-  if (s390_function_arg_float (mode, type))
++  if (s390_function_arg_vector (mode, type))
++    {
++      /* We are called for unnamed vector stdarg arguments which are
++	 passed on the stack.  In this case this hook does not have to
++	 do anything since stack arguments are tracked by common
++	 code.  */
++      if (!named)
++	return;
++      cum->vrs += 1;
++    }
++  else if (s390_function_arg_float (mode, type))
+     {
+       cum->fprs += 1;
+     }
+@@ -9282,14 +11107,26 @@ s390_function_arg_advance (cumulative_ar
+ 
+ static rtx
+ s390_function_arg (cumulative_args_t cum_v, enum machine_mode mode,
+-		   const_tree type, bool named ATTRIBUTE_UNUSED)
++		   const_tree type, bool named)
+ {
+   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+ 
+-  if (s390_function_arg_float (mode, type))
++  if (!named)
++    s390_check_type_for_vector_abi (type, true, false);
++
++  if (s390_function_arg_vector (mode, type))
++    {
++      /* Vector arguments being part of the ellipsis are passed on the
++	 stack.  */
++      if (!named || (cum->vrs + 1 > VEC_ARG_NUM_REG))
++	return NULL_RTX;
++
++      return gen_rtx_REG (mode, cum->vrs + FIRST_VEC_ARG_REGNO);
++    }
++  else if (s390_function_arg_float (mode, type))
+     {
+       if (cum->fprs + 1 > FP_ARG_NUM_REG)
+-	return 0;
++	return NULL_RTX;
+       else
+ 	return gen_rtx_REG (mode, cum->fprs + 16);
+     }
+@@ -9299,7 +11136,7 @@ s390_function_arg (cumulative_args_t cum
+       int n_gprs = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
+ 
+       if (cum->gprs + n_gprs > GP_ARG_NUM_REG)
+-	return 0;
++	return NULL_RTX;
+       else if (n_gprs == 1 || UNITS_PER_WORD == UNITS_PER_LONG)
+ 	return gen_rtx_REG (mode, cum->gprs + 2);
+       else if (n_gprs == 2)
+@@ -9342,6 +11179,12 @@ s390_return_in_memory (const_tree type,
+       || TREE_CODE (type) == REAL_TYPE)
+     return int_size_in_bytes (type) > 8;
+ 
++  /* vector types which fit into a VR.  */
++  if (TARGET_VX_ABI
++      && TREE_CODE (type) == VECTOR_TYPE
++      && int_size_in_bytes (type) <= 16)
++    return false;
++
+   /* Aggregates and similar constructs are always returned
+      in memory.  */
+   if (AGGREGATE_TYPE_P (type)
+@@ -9384,6 +11227,12 @@ s390_function_and_libcall_value (enum ma
+ 				 const_tree fntype_or_decl,
+ 				 bool outgoing ATTRIBUTE_UNUSED)
+ {
++  /* For vector return types it is important to use the RET_TYPE
++     argument whenever available since the middle-end might have
++     changed the mode to a scalar mode.  */
++  bool vector_ret_type_p = ((ret_type && TREE_CODE (ret_type) == VECTOR_TYPE)
++			    || (!ret_type && VECTOR_MODE_P (mode)));
++
+   /* For normal functions perform the promotion as
+      promote_function_mode would do.  */
+   if (ret_type)
+@@ -9393,10 +11242,14 @@ s390_function_and_libcall_value (enum ma
+ 				    fntype_or_decl, 1);
+     }
+ 
+-  gcc_assert (GET_MODE_CLASS (mode) == MODE_INT || SCALAR_FLOAT_MODE_P (mode));
+-  gcc_assert (GET_MODE_SIZE (mode) <= 8);
+-
+-  if (TARGET_HARD_FLOAT && SCALAR_FLOAT_MODE_P (mode))
++  gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
++	      || SCALAR_FLOAT_MODE_P (mode)
++	      || (TARGET_VX_ABI && vector_ret_type_p));
++  gcc_assert (GET_MODE_SIZE (mode) <= (TARGET_VX_ABI ? 16 : 8));
++
++  if (TARGET_VX_ABI && vector_ret_type_p)
++    return gen_rtx_REG (mode, FIRST_VEC_ARG_REGNO);
++  else if (TARGET_HARD_FLOAT && SCALAR_FLOAT_MODE_P (mode))
+     return gen_rtx_REG (mode, 16);
+   else if (GET_MODE_SIZE (mode) <= UNITS_PER_LONG
+ 	   || UNITS_PER_LONG == UNITS_PER_WORD)
+@@ -9560,9 +11413,13 @@ s390_va_start (tree valist, rtx nextarg
+       expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+     }
+ 
+-  /* Find the overflow area.  */
++  /* Find the overflow area.
++     FIXME: This currently is too pessimistic when the vector ABI is
++     enabled.  In that case we *always* set up the overflow area
++     pointer.  */
+   if (n_gpr + cfun->va_list_gpr_size > GP_ARG_NUM_REG
+-      || n_fpr + cfun->va_list_fpr_size > FP_ARG_NUM_REG)
++      || n_fpr + cfun->va_list_fpr_size > FP_ARG_NUM_REG
++      || TARGET_VX_ABI)
+     {
+       t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
+ 
+@@ -9604,6 +11461,9 @@ s390_va_start (tree valist, rtx nextarg
+        ret = args.reg_save_area[args.gpr+8]
+      else
+        ret = *args.overflow_arg_area++;
++   } else if (vector value) {
++       ret = *args.overflow_arg_area;
++       args.overflow_arg_area += size / 8;
+    } else if (float value) {
+      if (args.fgpr < 2)
+        ret = args.reg_save_area[args.fpr+64]
+@@ -9623,7 +11483,10 @@ s390_gimplify_va_arg (tree valist, tree
+   tree f_gpr, f_fpr, f_ovf, f_sav;
+   tree gpr, fpr, ovf, sav, reg, t, u;
+   int indirect_p, size, n_reg, sav_ofs, sav_scale, max_reg;
+-  tree lab_false, lab_over, addr;
++  tree lab_false, lab_over;
++  tree addr = create_tmp_var (ptr_type_node, "addr");
++  bool left_align_p; /* How a value < UNITS_PER_LONG is aligned within
++			a stack slot.  */
+ 
+   f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
+   f_fpr = DECL_CHAIN (f_gpr);
+@@ -9642,6 +11505,8 @@ s390_gimplify_va_arg (tree valist, tree
+ 
+   size = int_size_in_bytes (type);
+ 
++  s390_check_type_for_vector_abi (type, true, false);
++
+   if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
+     {
+       if (TARGET_DEBUG_ARG)
+@@ -9662,6 +11527,23 @@ s390_gimplify_va_arg (tree valist, tree
+       sav_scale = UNITS_PER_LONG;
+       size = UNITS_PER_LONG;
+       max_reg = GP_ARG_NUM_REG - n_reg;
++      left_align_p = false;
++    }
++  else if (s390_function_arg_vector (TYPE_MODE (type), type))
++    {
++      if (TARGET_DEBUG_ARG)
++	{
++	  fprintf (stderr, "va_arg: vector type");
++	  debug_tree (type);
++	}
++
++      indirect_p = 0;
++      reg = NULL_TREE;
++      n_reg = 0;
++      sav_ofs = 0;
++      sav_scale = 8;
++      max_reg = 0;
++      left_align_p = true;
+     }
+   else if (s390_function_arg_float (TYPE_MODE (type), type))
+     {
+@@ -9678,6 +11560,7 @@ s390_gimplify_va_arg (tree valist, tree
+       sav_ofs = 16 * UNITS_PER_LONG;
+       sav_scale = 8;
+       max_reg = FP_ARG_NUM_REG - n_reg;
++      left_align_p = false;
+     }
+   else
+     {
+@@ -9702,53 +11585,74 @@ s390_gimplify_va_arg (tree valist, tree
+ 
+       sav_scale = UNITS_PER_LONG;
+       max_reg = GP_ARG_NUM_REG - n_reg;
++      left_align_p = false;
+     }
+ 
+   /* Pull the value out of the saved registers ...  */
+ 
+-  lab_false = create_artificial_label (UNKNOWN_LOCATION);
+-  lab_over = create_artificial_label (UNKNOWN_LOCATION);
+-  addr = create_tmp_var (ptr_type_node, "addr");
+-
+-  t = fold_convert (TREE_TYPE (reg), size_int (max_reg));
+-  t = build2 (GT_EXPR, boolean_type_node, reg, t);
+-  u = build1 (GOTO_EXPR, void_type_node, lab_false);
+-  t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
+-  gimplify_and_add (t, pre_p);
+-
+-  t = fold_build_pointer_plus_hwi (sav, sav_ofs);
+-  u = build2 (MULT_EXPR, TREE_TYPE (reg), reg,
+-	      fold_convert (TREE_TYPE (reg), size_int (sav_scale)));
+-  t = fold_build_pointer_plus (t, u);
++  if (reg != NULL_TREE)
++    {
++      /*
++	if (reg > ((typeof (reg))max_reg))
++          goto lab_false;
+ 
+-  gimplify_assign (addr, t, pre_p);
++        addr = sav + sav_ofs + reg * save_scale;
++
++	goto lab_over;
++
++        lab_false:
++      */
+ 
+-  gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
++      lab_false = create_artificial_label (UNKNOWN_LOCATION);
++      lab_over = create_artificial_label (UNKNOWN_LOCATION);
+ 
+-  gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
++      t = fold_convert (TREE_TYPE (reg), size_int (max_reg));
++      t = build2 (GT_EXPR, boolean_type_node, reg, t);
++      u = build1 (GOTO_EXPR, void_type_node, lab_false);
++      t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
++      gimplify_and_add (t, pre_p);
+ 
++      t = fold_build_pointer_plus_hwi (sav, sav_ofs);
++      u = build2 (MULT_EXPR, TREE_TYPE (reg), reg,
++		  fold_convert (TREE_TYPE (reg), size_int (sav_scale)));
++      t = fold_build_pointer_plus (t, u);
++
++      gimplify_assign (addr, t, pre_p);
++
++      gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
++
++      gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
++    }
+ 
+   /* ... Otherwise out of the overflow area.  */
+ 
+   t = ovf;
+-  if (size < UNITS_PER_LONG)
++  if (size < UNITS_PER_LONG && !left_align_p)
+     t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG - size);
+ 
+   gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
+ 
+   gimplify_assign (addr, t, pre_p);
+ 
+-  t = fold_build_pointer_plus_hwi (t, size);
++  if (size < UNITS_PER_LONG && left_align_p)
++    t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG);
++  else
++    t = fold_build_pointer_plus_hwi (t, size);
++
+   gimplify_assign (ovf, t, pre_p);
+ 
+-  gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
++  if (reg != NULL_TREE)
++    gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
+ 
+ 
+   /* Increment register save count.  */
+ 
+-  u = build2 (PREINCREMENT_EXPR, TREE_TYPE (reg), reg,
+-	      fold_convert (TREE_TYPE (reg), size_int (n_reg)));
+-  gimplify_and_add (u, pre_p);
++  if (n_reg > 0)
++    {
++      u = build2 (PREINCREMENT_EXPR, TREE_TYPE (reg), reg,
++		  fold_convert (TREE_TYPE (reg), size_int (n_reg)));
++      gimplify_and_add (u, pre_p);
++    }
+ 
+   if (indirect_p)
+     {
+@@ -9793,7 +11697,14 @@ s390_expand_tbegin (rtx dest, rtx tdb, r
+     }
+ 
+   if (clobber_fprs_p)
+-    emit_insn (gen_tbegin_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK), tdb));
++    {
++      if (TARGET_VX)
++	emit_insn (gen_tbegin_1_z13 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
++				     tdb));
++      else
++	emit_insn (gen_tbegin_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
++				 tdb));
++    }
+   else
+     emit_insn (gen_tbegin_nofloat_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
+ 				     tdb));
+@@ -9831,210 +11742,6 @@ s390_expand_tbegin (rtx dest, rtx tdb, r
+     }
+ }
+ 
+-/* Builtins.  */
+-
+-enum s390_builtin
+-{
+-  S390_BUILTIN_TBEGIN,
+-  S390_BUILTIN_TBEGIN_NOFLOAT,
+-  S390_BUILTIN_TBEGIN_RETRY,
+-  S390_BUILTIN_TBEGIN_RETRY_NOFLOAT,
+-  S390_BUILTIN_TBEGINC,
+-  S390_BUILTIN_TEND,
+-  S390_BUILTIN_TABORT,
+-  S390_BUILTIN_NON_TX_STORE,
+-  S390_BUILTIN_TX_NESTING_DEPTH,
+-  S390_BUILTIN_TX_ASSIST,
+-
+-  S390_BUILTIN_max
+-};
+-
+-static enum insn_code const code_for_builtin[S390_BUILTIN_max] = {
+-  CODE_FOR_tbegin,
+-  CODE_FOR_tbegin_nofloat,
+-  CODE_FOR_tbegin_retry,
+-  CODE_FOR_tbegin_retry_nofloat,
+-  CODE_FOR_tbeginc,
+-  CODE_FOR_tend,
+-  CODE_FOR_tabort,
+-  CODE_FOR_ntstg,
+-  CODE_FOR_etnd,
+-  CODE_FOR_tx_assist
+-};
+-
+-static void
+-s390_init_builtins (void)
+-{
+-  tree ftype, uint64_type;
+-  tree returns_twice_attr = tree_cons (get_identifier ("returns_twice"),
+-				       NULL, NULL);
+-  tree noreturn_attr = tree_cons (get_identifier ("noreturn"), NULL, NULL);
+-
+-  /* void foo (void) */
+-  ftype = build_function_type_list (void_type_node, NULL_TREE);
+-  add_builtin_function ("__builtin_tbeginc", ftype, S390_BUILTIN_TBEGINC,
+-			BUILT_IN_MD, NULL, NULL_TREE);
+-
+-  /* void foo (int) */
+-  ftype = build_function_type_list (void_type_node, integer_type_node,
+-				    NULL_TREE);
+-  add_builtin_function ("__builtin_tabort", ftype,
+-			S390_BUILTIN_TABORT, BUILT_IN_MD, NULL, noreturn_attr);
+-  add_builtin_function ("__builtin_tx_assist", ftype,
+-			S390_BUILTIN_TX_ASSIST, BUILT_IN_MD, NULL, NULL_TREE);
+-
+-  /* int foo (void *) */
+-  ftype = build_function_type_list (integer_type_node, ptr_type_node, NULL_TREE);
+-  add_builtin_function ("__builtin_tbegin", ftype, S390_BUILTIN_TBEGIN,
+-			BUILT_IN_MD, NULL, returns_twice_attr);
+-  add_builtin_function ("__builtin_tbegin_nofloat", ftype,
+-			S390_BUILTIN_TBEGIN_NOFLOAT,
+-			BUILT_IN_MD, NULL, returns_twice_attr);
+-
+-  /* int foo (void *, int) */
+-  ftype = build_function_type_list (integer_type_node, ptr_type_node,
+-				    integer_type_node, NULL_TREE);
+-  add_builtin_function ("__builtin_tbegin_retry", ftype,
+-			S390_BUILTIN_TBEGIN_RETRY,
+-			BUILT_IN_MD,
+-			NULL, returns_twice_attr);
+-  add_builtin_function ("__builtin_tbegin_retry_nofloat", ftype,
+-			S390_BUILTIN_TBEGIN_RETRY_NOFLOAT,
+-			BUILT_IN_MD,
+-			NULL, returns_twice_attr);
+-
+-  /* int foo (void) */
+-  ftype = build_function_type_list (integer_type_node, NULL_TREE);
+-  add_builtin_function ("__builtin_tx_nesting_depth", ftype,
+-			S390_BUILTIN_TX_NESTING_DEPTH,
+-			BUILT_IN_MD, NULL, NULL_TREE);
+-  add_builtin_function ("__builtin_tend", ftype,
+-			S390_BUILTIN_TEND, BUILT_IN_MD,	NULL, NULL_TREE);
+-
+-  /* void foo (uint64_t *, uint64_t) */
+-  if (TARGET_64BIT)
+-    uint64_type = long_unsigned_type_node;
+-  else
+-    uint64_type = long_long_unsigned_type_node;
+-
+-   ftype = build_function_type_list (void_type_node,
+- 				    build_pointer_type (uint64_type),
+-				    uint64_type, NULL_TREE);
+-  add_builtin_function ("__builtin_non_tx_store", ftype,
+-			S390_BUILTIN_NON_TX_STORE,
+-			BUILT_IN_MD, NULL, NULL_TREE);
+-}
+-
+-/* Expand an expression EXP that calls a built-in function,
+-   with result going to TARGET if that's convenient
+-   (and in mode MODE if that's convenient).
+-   SUBTARGET may be used as the target for computing one of EXP's operands.
+-   IGNORE is nonzero if the value is to be ignored.  */
+-
+-static rtx
+-s390_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
+-		     enum machine_mode mode ATTRIBUTE_UNUSED,
+-		     int ignore ATTRIBUTE_UNUSED)
+-{
+-#define MAX_ARGS 2
+-
+-  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+-  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
+-  enum insn_code icode;
+-  rtx op[MAX_ARGS], pat;
+-  int arity;
+-  bool nonvoid;
+-  tree arg;
+-  call_expr_arg_iterator iter;
+-
+-  if (fcode >= S390_BUILTIN_max)
+-    internal_error ("bad builtin fcode");
+-  icode = code_for_builtin[fcode];
+-  if (icode == 0)
+-    internal_error ("bad builtin fcode");
+-
+-  if (!TARGET_HTM)
+-    error ("Transactional execution builtins not enabled (-mhtm)\n");
+-
+-  /* Set a flag in the machine specific cfun part in order to support
+-     saving/restoring of FPRs.  */
+-  if (fcode == S390_BUILTIN_TBEGIN || fcode == S390_BUILTIN_TBEGIN_RETRY)
+-    cfun->machine->tbegin_p = true;
+-
+-  nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
+-
+-  arity = 0;
+-  FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
+-    {
+-      const struct insn_operand_data *insn_op;
+-
+-      if (arg == error_mark_node)
+-	return NULL_RTX;
+-      if (arity >= MAX_ARGS)
+-	return NULL_RTX;
+-
+-      insn_op = &insn_data[icode].operand[arity + nonvoid];
+-
+-      op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL);
+-
+-      if (!(*insn_op->predicate) (op[arity], insn_op->mode))
+-	{
+-	  if (insn_op->predicate == memory_operand)
+-	    {
+-	      /* Don't move a NULL pointer into a register. Otherwise
+-		 we have to rely on combine being able to move it back
+-		 in order to get an immediate 0 in the instruction.  */
+-	      if (op[arity] != const0_rtx)
+-		op[arity] = copy_to_mode_reg (Pmode, op[arity]);
+-	      op[arity] = gen_rtx_MEM (insn_op->mode, op[arity]);
+-	    }
+-	  else
+-	    op[arity] = copy_to_mode_reg (insn_op->mode, op[arity]);
+-	}
+-
+-      arity++;
+-    }
+-
+-  if (nonvoid)
+-    {
+-      enum machine_mode tmode = insn_data[icode].operand[0].mode;
+-      if (!target
+-	  || GET_MODE (target) != tmode
+-	  || !(*insn_data[icode].operand[0].predicate) (target, tmode))
+-	target = gen_reg_rtx (tmode);
+-    }
+-
+-  switch (arity)
+-    {
+-    case 0:
+-      pat = GEN_FCN (icode) (target);
+-      break;
+-    case 1:
+-      if (nonvoid)
+-        pat = GEN_FCN (icode) (target, op[0]);
+-      else
+-	pat = GEN_FCN (icode) (op[0]);
+-      break;
+-    case 2:
+-      if (nonvoid)
+-	pat = GEN_FCN (icode) (target, op[0], op[1]);
+-      else
+-	pat = GEN_FCN (icode) (op[0], op[1]);
+-      break;
+-    default:
+-      gcc_unreachable ();
+-    }
+-  if (!pat)
+-    return NULL_RTX;
+-  emit_insn (pat);
+-
+-  if (nonvoid)
+-    return target;
+-  else
+-    return const0_rtx;
+-}
+-
+-
+ /* Output assembly code for the trampoline template to
+    stdio stream FILE.
+ 
+@@ -10496,15 +12203,18 @@ s390_call_saved_register_used (tree call
+       mode = TYPE_MODE (type);
+       gcc_assert (mode);
+ 
++      /* We assume that in the target function all parameters are
++	 named.  This only has an impact on vector argument register
++	 usage none of which is call-saved.  */
+       if (pass_by_reference (&cum_v, mode, type, true))
+  	{
+  	  mode = Pmode;
+  	  type = build_pointer_type (type);
+  	}
+ 
+-       parm_rtx = s390_function_arg (cum, mode, type, 0);
++       parm_rtx = s390_function_arg (cum, mode, type, true);
+ 
+-       s390_function_arg_advance (cum, mode, type, 0);
++       s390_function_arg_advance (cum, mode, type, true);
+ 
+        if (!parm_rtx)
+ 	 continue;
+@@ -10711,6 +12421,13 @@ s390_conditional_register_usage (void)
+       for (i = 16; i < 32; i++)
+ 	call_used_regs[i] = fixed_regs[i] = 1;
+     }
++
++  /* Disable v16 - v31 for non-vector target.  */
++  if (!TARGET_VX)
++    {
++      for (i = VR16_REGNUM; i <= VR31_REGNUM; i++)
++	fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
++    }
+ }
+ 
+ /* Corresponding function to eh_return expander.  */
+@@ -11232,7 +12949,8 @@ s390_reorg (void)
+   /* Walk over the insns and do some >=z10 specific changes.  */
+   if (s390_tune == PROCESSOR_2097_Z10
+       || s390_tune == PROCESSOR_2817_Z196
+-      || s390_tune == PROCESSOR_2827_ZEC12)
++      || s390_tune == PROCESSOR_2827_ZEC12
++      || s390_tune == PROCESSOR_2964_Z13)
+     {
+       rtx insn;
+       bool insn_added_p = false;
+@@ -11392,27 +13110,66 @@ s390_z10_prevent_earlyload_conflicts (rt
+ 
+ static int s390_sched_state;
+ 
+-#define S390_OOO_SCHED_STATE_NORMAL  3
+-#define S390_OOO_SCHED_STATE_CRACKED 4
++#define S390_SCHED_STATE_NORMAL  3
++#define S390_SCHED_STATE_CRACKED 4
+ 
+-#define S390_OOO_SCHED_ATTR_MASK_CRACKED    0x1
+-#define S390_OOO_SCHED_ATTR_MASK_EXPANDED   0x2
+-#define S390_OOO_SCHED_ATTR_MASK_ENDGROUP   0x4
+-#define S390_OOO_SCHED_ATTR_MASK_GROUPALONE 0x8
++#define S390_SCHED_ATTR_MASK_CRACKED    0x1
++#define S390_SCHED_ATTR_MASK_EXPANDED   0x2
++#define S390_SCHED_ATTR_MASK_ENDGROUP   0x4
++#define S390_SCHED_ATTR_MASK_GROUPALONE 0x8
+ 
+ static unsigned int
+ s390_get_sched_attrmask (rtx insn)
+ {
+   unsigned int mask = 0;
+ 
+-  if (get_attr_ooo_cracked (insn))
+-    mask |= S390_OOO_SCHED_ATTR_MASK_CRACKED;
+-  if (get_attr_ooo_expanded (insn))
+-    mask |= S390_OOO_SCHED_ATTR_MASK_EXPANDED;
+-  if (get_attr_ooo_endgroup (insn))
+-    mask |= S390_OOO_SCHED_ATTR_MASK_ENDGROUP;
+-  if (get_attr_ooo_groupalone (insn))
+-    mask |= S390_OOO_SCHED_ATTR_MASK_GROUPALONE;
++  switch (s390_tune)
++    {
++    case PROCESSOR_2827_ZEC12:
++      if (get_attr_zEC12_cracked (insn))
++	mask |= S390_SCHED_ATTR_MASK_CRACKED;
++      if (get_attr_zEC12_expanded (insn))
++	mask |= S390_SCHED_ATTR_MASK_EXPANDED;
++      if (get_attr_zEC12_endgroup (insn))
++	mask |= S390_SCHED_ATTR_MASK_ENDGROUP;
++      if (get_attr_zEC12_groupalone (insn))
++	mask |= S390_SCHED_ATTR_MASK_GROUPALONE;
++      break;
++    case PROCESSOR_2964_Z13:
++      if (get_attr_z13_cracked (insn))
++	mask |= S390_SCHED_ATTR_MASK_CRACKED;
++      if (get_attr_z13_expanded (insn))
++	mask |= S390_SCHED_ATTR_MASK_EXPANDED;
++      if (get_attr_z13_endgroup (insn))
++	mask |= S390_SCHED_ATTR_MASK_ENDGROUP;
++      if (get_attr_z13_groupalone (insn))
++	mask |= S390_SCHED_ATTR_MASK_GROUPALONE;
++      break;
++    default:
++      gcc_unreachable ();
++    }
++  return mask;
++}
++
++static unsigned int
++s390_get_unit_mask (rtx insn, int *units)
++{
++  unsigned int mask = 0;
++
++  switch (s390_tune)
++    {
++    case PROCESSOR_2964_Z13:
++      *units = 3;
++      if (get_attr_z13_unit_lsu (insn))
++	mask |= 1 << 0;
++      if (get_attr_z13_unit_fxu (insn))
++	mask |= 1 << 1;
++      if (get_attr_z13_unit_vfu (insn))
++	mask |= 1 << 2;
++      break;
++    default:
++      gcc_unreachable ();
++    }
+   return mask;
+ }
+ 
+@@ -11430,48 +13187,66 @@ s390_sched_score (rtx insn)
+     case 0:
+       /* Try to put insns into the first slot which would otherwise
+ 	 break a group.  */
+-      if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) != 0
+-	  || (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) != 0)
++      if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
++	  || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0)
+ 	score += 5;
+-      if ((mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) != 0)
++      if ((mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0)
+ 	score += 10;
+     case 1:
+       /* Prefer not cracked insns while trying to put together a
+ 	 group.  */
+-      if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) == 0
+-	  && (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) == 0
+-	  && (mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) == 0)
++      if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0
++	  && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0
++	  && (mask & S390_SCHED_ATTR_MASK_GROUPALONE) == 0)
+ 	score += 10;
+-      if ((mask & S390_OOO_SCHED_ATTR_MASK_ENDGROUP) == 0)
++      if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) == 0)
+ 	score += 5;
+       break;
+     case 2:
+       /* Prefer not cracked insns while trying to put together a
+ 	 group.  */
+-      if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) == 0
+-	  && (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) == 0
+-	  && (mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) == 0)
++      if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0
++	  && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0
++	  && (mask & S390_SCHED_ATTR_MASK_GROUPALONE) == 0)
+ 	score += 10;
+       /* Prefer endgroup insns in the last slot.  */
+-      if ((mask & S390_OOO_SCHED_ATTR_MASK_ENDGROUP) != 0)
++      if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) != 0)
+ 	score += 10;
+       break;
+-    case S390_OOO_SCHED_STATE_NORMAL:
++    case S390_SCHED_STATE_NORMAL:
+       /* Prefer not cracked insns if the last was not cracked.  */
+-      if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) == 0
+-	  && (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) == 0)
++      if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0
++	  && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0)
+ 	score += 5;
+-      if ((mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) != 0)
++      if ((mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0)
+ 	score += 10;
+       break;
+-    case S390_OOO_SCHED_STATE_CRACKED:
++    case S390_SCHED_STATE_CRACKED:
+       /* Try to keep cracked insns together to prevent them from
+ 	 interrupting groups.  */
+-      if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) != 0
+-	  || (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) != 0)
++      if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
++	  || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0)
+ 	score += 5;
+       break;
+     }
++
++  if (s390_tune == PROCESSOR_2964_Z13)
++    {
++      int units, i;
++      unsigned unit_mask, m = 1;
++
++      unit_mask = s390_get_unit_mask (insn, &units);
++      gcc_assert (units <= MAX_SCHED_UNITS);
++
++      /* Add a score in range 0..MAX_SCHED_MIX_SCORE depending on how long
++	 ago the last insn of this unit type got scheduled.  This is
++	 supposed to help providing a proper instruction mix to the
++	 CPU.  */
++      for (i = 0; i < units; i++, m <<= 1)
++	if (m & unit_mask)
++	  score += (last_scheduled_unit_distance[i] * MAX_SCHED_MIX_SCORE /
++		    MAX_SCHED_MIX_DISTANCE);
++    }
+   return score;
+ }
+ 
+@@ -11487,7 +13262,8 @@ s390_sched_reorder (FILE *file, int verb
+     if (reload_completed && *nreadyp > 1)
+       s390_z10_prevent_earlyload_conflicts (ready, nreadyp);
+ 
+-  if (s390_tune == PROCESSOR_2827_ZEC12
++  if ((s390_tune == PROCESSOR_2827_ZEC12
++       || s390_tune == PROCESSOR_2964_Z13)
+       && reload_completed
+       && *nreadyp > 1)
+     {
+@@ -11526,12 +13302,12 @@ s390_sched_reorder (FILE *file, int verb
+ 
+ 	      if (verbose > 5)
+ 		fprintf (file,
+-			 "move insn %d to the top of list\n",
++			 ";;\t\tBACKEND: move insn %d to the top of list\n",
+ 			 INSN_UID (ready[last_index]));
+ 	    }
+ 	  else if (verbose > 5)
+ 	    fprintf (file,
+-		     "best insn %d already on top\n",
++		     ";;\t\tBACKEND: best insn %d already on top\n",
+ 		     INSN_UID (ready[last_index]));
+ 	}
+ 
+@@ -11542,16 +13318,35 @@ s390_sched_reorder (FILE *file, int verb
+ 
+ 	  for (i = last_index; i >= 0; i--)
+ 	    {
+-	      if (recog_memoized (ready[i]) < 0)
++	      unsigned int sched_mask;
++	      rtx insn = ready[i];
++
++	      if (recog_memoized (insn) < 0)
+ 		continue;
+-	      fprintf (file, "insn %d score: %d: ", INSN_UID (ready[i]),
+-		       s390_sched_score (ready[i]));
+-#define PRINT_OOO_ATTR(ATTR) fprintf (file, "%s ", get_attr_##ATTR (ready[i]) ? #ATTR : "!" #ATTR);
+-	      PRINT_OOO_ATTR (ooo_cracked);
+-	      PRINT_OOO_ATTR (ooo_expanded);
+-	      PRINT_OOO_ATTR (ooo_endgroup);
+-	      PRINT_OOO_ATTR (ooo_groupalone);
+-#undef PRINT_OOO_ATTR
++
++	      sched_mask = s390_get_sched_attrmask (insn);
++	      fprintf (file, ";;\t\tBACKEND: insn %d score: %d: ",
++		       INSN_UID (insn),
++		       s390_sched_score (insn));
++#define PRINT_SCHED_ATTR(M, ATTR) fprintf (file, "%s ",\
++					   ((M) & sched_mask) ? #ATTR : "");
++	      PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_CRACKED, cracked);
++	      PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_EXPANDED, expanded);
++	      PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_ENDGROUP, endgroup);
++	      PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_GROUPALONE, groupalone);
++#undef PRINT_SCHED_ATTR
++	      if (s390_tune == PROCESSOR_2964_Z13)
++		{
++		  unsigned int unit_mask, m = 1;
++		  int units, j;
++
++		  unit_mask  = s390_get_unit_mask (insn, &units);
++		  fprintf (file, "(units:");
++		  for (j = 0; j < units; j++, m <<= 1)
++		    if (m & unit_mask)
++		      fprintf (file, " u%d", j);
++		  fprintf (file, ")");
++		}
+ 	      fprintf (file, "\n");
+ 	    }
+ 	}
+@@ -11570,18 +13365,19 @@ s390_sched_variable_issue (FILE *file, i
+ {
+   last_scheduled_insn = insn;
+ 
+-  if (s390_tune == PROCESSOR_2827_ZEC12
++  if ((s390_tune == PROCESSOR_2827_ZEC12
++       || s390_tune == PROCESSOR_2964_Z13)
+       && reload_completed
+       && recog_memoized (insn) >= 0)
+     {
+       unsigned int mask = s390_get_sched_attrmask (insn);
+ 
+-      if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) != 0
+-	  || (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) != 0)
+-	s390_sched_state = S390_OOO_SCHED_STATE_CRACKED;
+-      else if ((mask & S390_OOO_SCHED_ATTR_MASK_ENDGROUP) != 0
+-	       || (mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) != 0)
+-	s390_sched_state = S390_OOO_SCHED_STATE_NORMAL;
++      if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
++	  || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0)
++	s390_sched_state = S390_SCHED_STATE_CRACKED;
++      else if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) != 0
++	       || (mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0)
++	s390_sched_state = S390_SCHED_STATE_NORMAL;
+       else
+ 	{
+ 	  /* Only normal insns are left (mask == 0).  */
+@@ -11590,30 +13386,73 @@ s390_sched_variable_issue (FILE *file, i
+ 	    case 0:
+ 	    case 1:
+ 	    case 2:
+-	    case S390_OOO_SCHED_STATE_NORMAL:
+-	      if (s390_sched_state == S390_OOO_SCHED_STATE_NORMAL)
++	    case S390_SCHED_STATE_NORMAL:
++	      if (s390_sched_state == S390_SCHED_STATE_NORMAL)
+ 		s390_sched_state = 1;
+ 	      else
+ 		s390_sched_state++;
+ 
+ 	      break;
+-	    case S390_OOO_SCHED_STATE_CRACKED:
+-	      s390_sched_state = S390_OOO_SCHED_STATE_NORMAL;
++	    case S390_SCHED_STATE_CRACKED:
++	      s390_sched_state = S390_SCHED_STATE_NORMAL;
+ 	      break;
+ 	    }
+ 	}
++
++      if (s390_tune == PROCESSOR_2964_Z13)
++	{
++	  int units, i;
++	  unsigned unit_mask, m = 1;
++
++	  unit_mask = s390_get_unit_mask (insn, &units);
++	  gcc_assert (units <= MAX_SCHED_UNITS);
++
++	  for (i = 0; i < units; i++, m <<= 1)
++	    if (m & unit_mask)
++	      last_scheduled_unit_distance[i] = 0;
++	    else if (last_scheduled_unit_distance[i] < MAX_SCHED_MIX_DISTANCE)
++	      last_scheduled_unit_distance[i]++;
++	}
++
+       if (verbose > 5)
+ 	{
+-	  fprintf (file, "insn %d: ", INSN_UID (insn));
+-#define PRINT_OOO_ATTR(ATTR)						\
+-	  fprintf (file, "%s ", get_attr_##ATTR (insn) ? #ATTR : "");
+-	  PRINT_OOO_ATTR (ooo_cracked);
+-	  PRINT_OOO_ATTR (ooo_expanded);
+-	  PRINT_OOO_ATTR (ooo_endgroup);
+-	  PRINT_OOO_ATTR (ooo_groupalone);
+-#undef PRINT_OOO_ATTR
+-	  fprintf (file, "\n");
+-	  fprintf (file, "sched state: %d\n", s390_sched_state);
++	  unsigned int sched_mask;
++
++	  sched_mask = s390_get_sched_attrmask (insn);
++
++	  fprintf (file, ";;\t\tBACKEND: insn %d: ", INSN_UID (insn));
++#define PRINT_SCHED_ATTR(M, ATTR) fprintf (file, "%s ", ((M) & sched_mask) ? #ATTR : "");
++	  PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_CRACKED, cracked);
++	  PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_EXPANDED, expanded);
++	  PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_ENDGROUP, endgroup);
++	  PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_GROUPALONE, groupalone);
++#undef PRINT_SCHED_ATTR
++
++	  if (s390_tune == PROCESSOR_2964_Z13)
++	    {
++	      unsigned int unit_mask, m = 1;
++	      int units, j;
++
++	      unit_mask  = s390_get_unit_mask (insn, &units);
++	      fprintf (file, "(units:");
++	      for (j = 0; j < units; j++, m <<= 1)
++		if (m & unit_mask)
++		  fprintf (file, " %d", j);
++	      fprintf (file, ")");
++	    }
++	  fprintf (file, " sched state: %d\n", s390_sched_state);
++
++	  if (s390_tune == PROCESSOR_2964_Z13)
++	    {
++	      int units, j;
++
++	      s390_get_unit_mask (insn, &units);
++
++	      fprintf (file, ";;\t\tBACKEND: units unused for: ");
++	      for (j = 0; j < units; j++)
++		fprintf (file, "%d:%d ", j, last_scheduled_unit_distance[j]);
++	      fprintf (file, "\n");
++	    }
+ 	}
+     }
+ 
+@@ -11630,6 +13469,7 @@ s390_sched_init (FILE *file ATTRIBUTE_UN
+ 		 int max_ready ATTRIBUTE_UNUSED)
+ {
+   last_scheduled_insn = NULL_RTX;
++  memset (last_scheduled_unit_distance, 0, MAX_SCHED_UNITS * sizeof (int));
+   s390_sched_state = 0;
+ }
+ 
+@@ -11663,7 +13503,8 @@ s390_loop_unroll_adjust (unsigned nunrol
+ 
+   if (s390_tune != PROCESSOR_2097_Z10
+       && s390_tune != PROCESSOR_2817_Z196
+-      && s390_tune != PROCESSOR_2827_ZEC12)
++      && s390_tune != PROCESSOR_2827_ZEC12
++      && s390_tune != PROCESSOR_2964_Z13)
+     return nunroll;
+ 
+   /* Count the number of memory references within the loop body.  */
+@@ -11691,6 +13532,84 @@ s390_loop_unroll_adjust (unsigned nunrol
+     }
+ }
+ 
++/* Return the vector mode to be used for inner mode MODE when doing
++   vectorization.  */
++static enum machine_mode
++s390_preferred_simd_mode (enum machine_mode mode)
++{
++  if (TARGET_VX)
++    switch (mode)
++      {
++      case DFmode:
++	return V2DFmode;
++      case DImode:
++	return V2DImode;
++      case SImode:
++	return V4SImode;
++      case HImode:
++	return V8HImode;
++      case QImode:
++	return V16QImode;
++      default:;
++      }
++  return word_mode;
++}
++
++/* Our hardware does not require vectors to be strictly aligned.  */
++static bool
++s390_support_vector_misalignment (enum machine_mode mode ATTRIBUTE_UNUSED,
++				  const_tree type ATTRIBUTE_UNUSED,
++				  int misalignment ATTRIBUTE_UNUSED,
++				  bool is_packed ATTRIBUTE_UNUSED)
++{
++  if (TARGET_VX)
++    return true;
++
++  return default_builtin_support_vector_misalignment (mode, type, misalignment,
++						      is_packed);
++}
++
++/* The vector ABI requires vector types to be aligned on an 8 byte
++   boundary (our stack alignment).  However, we allow this to be
++   overriden by the user, while this definitely breaks the ABI.  */
++static HOST_WIDE_INT
++s390_vector_alignment (const_tree type)
++{
++  if (!TARGET_VX_ABI)
++    return default_vector_alignment (type);
++
++  if (TYPE_USER_ALIGN (type))
++    return TYPE_ALIGN (type);
++
++  return MIN (64, tree_low_cst (TYPE_SIZE (type), 0));
++}
++
++/* Implement TARGET_ASM_FILE_END.  */
++static void
++s390_asm_file_end (void)
++{
++#ifdef HAVE_AS_GNU_ATTRIBUTE
++  varpool_node *vnode;
++  cgraph_node *cnode;
++
++  FOR_EACH_VARIABLE (vnode)
++    if (TREE_PUBLIC (vnode->symbol.decl))
++      s390_check_type_for_vector_abi (TREE_TYPE (vnode->symbol.decl),
++				      false, false);
++
++  FOR_EACH_FUNCTION (cnode)
++    if (TREE_PUBLIC (cnode->symbol.decl))
++      s390_check_type_for_vector_abi (TREE_TYPE (cnode->symbol.decl),
++				      false, false);
++
++
++  if (s390_vector_abi != 0)
++    fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
++	     s390_vector_abi);
++#endif
++  file_end_indicate_exec_stack ();
++}
++
+ /* Initialize GCC target structure.  */
+ 
+ #undef  TARGET_ASM_ALIGNED_HI_OP
+@@ -11797,6 +13716,8 @@ s390_loop_unroll_adjust (unsigned nunrol
+ #define TARGET_FUNCTION_VALUE s390_function_value
+ #undef TARGET_LIBCALL_VALUE
+ #define TARGET_LIBCALL_VALUE s390_libcall_value
++#undef TARGET_STRICT_ARGUMENT_NAMING
++#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
+ 
+ #undef TARGET_FIXED_CONDITION_CODE_REGS
+ #define TARGET_FIXED_CONDITION_CODE_REGS s390_fixed_condition_code_regs
+@@ -11812,6 +13733,9 @@ s390_loop_unroll_adjust (unsigned nunrol
+ #define TARGET_ASM_OUTPUT_DWARF_DTPREL s390_output_dwarf_dtprel
+ #endif
+ 
++#undef TARGET_DWARF_FRAME_REG_MODE
++#define TARGET_DWARF_FRAME_REG_MODE s390_dwarf_frame_reg_mode
++
+ #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
+ #undef TARGET_MANGLE_TYPE
+ #define TARGET_MANGLE_TYPE s390_mangle_type
+@@ -11820,6 +13744,9 @@ s390_loop_unroll_adjust (unsigned nunrol
+ #undef TARGET_SCALAR_MODE_SUPPORTED_P
+ #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
+ 
++#undef TARGET_VECTOR_MODE_SUPPORTED_P
++#define TARGET_VECTOR_MODE_SUPPORTED_P s390_vector_mode_supported_p
++
+ #undef  TARGET_PREFERRED_RELOAD_CLASS
+ #define TARGET_PREFERRED_RELOAD_CLASS s390_preferred_reload_class
+ 
+@@ -11864,6 +13791,21 @@ s390_loop_unroll_adjust (unsigned nunrol
+ #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
+ #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
+ 
++#undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
++#define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN s390_invalid_arg_for_unprototyped_fn
++
++#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
++#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE s390_preferred_simd_mode
++
++#undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
++#define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT s390_support_vector_misalignment
++
++#undef TARGET_VECTOR_ALIGNMENT
++#define TARGET_VECTOR_ALIGNMENT s390_vector_alignment
++
++#undef TARGET_ASM_FILE_END
++#define TARGET_ASM_FILE_END s390_asm_file_end
++
+ struct gcc_target targetm = TARGET_INITIALIZER;
+ 
+ #include "gt-s390.h"
+--- gcc/config/s390/s390-c.c	1970-01-01 01:00:00.000000000 +0100
++++ gcc/config/s390/s390-c.c	2016-05-11 19:20:42.792826040 +0200
+@@ -0,0 +1,903 @@
++/* Language specific subroutines used for code generation on IBM S/390
++   and zSeries
++   Copyright (C) 2015 Free Software Foundation, Inc.
++
++   Contributed by Andreas Krebbel (Andreas.Krebbel@de.ibm.com).
++
++   This file is part of GCC.
++
++   GCC is free software; you can redistribute it and/or modify it
++   under the terms of the GNU General Public License as published by
++   the Free Software Foundation; either version 3, or (at your option)
++   any later version.
++
++   GCC is distributed in the hope that it will be useful, but WITHOUT
++   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
++   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
++   License for more details.
++
++   You should have received a copy of the GNU General Public License
++   along with GCC; see the file COPYING3.  If not see
++   <http://www.gnu.org/licenses/>.
++
++   Based on gcc/config/rs6000/rs6000-c.c.
++
++   In GCC terms this file belongs to the frontend.  It will be
++   compiled with -DIN_GCC_FRONTEND.  With that rtl.h cannot be
++   included anymore - a mechanism supposed to avoid adding frontend -
++   backend dependencies.  */
++
++#include "config.h"
++#include "system.h"
++#include "coretypes.h"
++#include "tm.h"
++#include "cpplib.h"
++#include "machmode.h"
++#include "vec.h"
++#include "double-int.h"
++#include "input.h"
++#include "alias.h"
++#include "symtab.h"
++#include "tree.h"
++#include "c-family/c-common.h"
++#include "c-family/c-pragma.h"
++#include "diagnostic-core.h"
++#include "tm_p.h"
++#include "target.h"
++#include "langhooks.h"
++#include "tree-pretty-print.h"
++#include "c/c-tree.h"
++
++#include "s390-builtins.h"
++
++static GTY(()) tree __vector_keyword;
++static GTY(()) tree vector_keyword;
++static GTY(()) tree __bool_keyword;
++static GTY(()) tree bool_keyword;
++static GTY(()) tree _Bool_keyword;
++
++
++/* Generate an array holding all the descriptions of variants of
++   overloaded builtins defined with OB_DEF_VAR in
++   s390-builtins.def.  */
++static enum s390_builtin_ov_type_index
++type_for_overloaded_builtin_var[S390_OVERLOADED_BUILTIN_VAR_MAX + 1] =
++  {
++#undef B_DEF
++#undef OB_DEF
++#undef OB_DEF_VAR
++#define B_DEF(...)
++#define OB_DEF(...)
++#define OB_DEF_VAR(NAME, PATTERN, FLAGS, FNTYPE) FNTYPE,
++#include "s390-builtins.def"
++    BT_OV_MAX
++  };
++
++
++/* Generate an array indexed by an overloaded builtin index returning
++   the first index in desc_for_overloaded_builtin_var where the
++   variants for the builtin can be found.  */
++static enum s390_overloaded_builtin_vars
++desc_start_for_overloaded_builtin[S390_OVERLOADED_BUILTIN_MAX + 1] =
++  {
++#undef B_DEF
++#undef OB_DEF
++#undef OB_DEF_VAR
++#define B_DEF(...)
++#define OB_DEF(NAME, FIRST_VAR_NAME,...)	\
++    S390_OVERLOADED_BUILTIN_VAR_##FIRST_VAR_NAME,
++#define OB_DEF_VAR(...)
++    #include "s390-builtins.def"
++    S390_OVERLOADED_BUILTIN_VAR_MAX
++  };
++
++/* Generate an array indexed by an overloaded builtin index returning
++   the last index in desc_for_overloaded_builtin_var where the
++   variants for the builtin can be found.  */
++static enum s390_overloaded_builtin_vars
++desc_end_for_overloaded_builtin[S390_OVERLOADED_BUILTIN_MAX + 1] =
++  {
++#undef B_DEF
++#undef OB_DEF
++#undef OB_DEF_VAR
++#define B_DEF(...)
++#define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME,...)	\
++    S390_OVERLOADED_BUILTIN_VAR_##LAST_VAR_NAME,
++#define OB_DEF_VAR(...)
++    #include "s390-builtins.def"
++    S390_OVERLOADED_BUILTIN_VAR_MAX
++  };
++
++static enum s390_builtin_type_index
++s390_builtin_ov_types[BT_OV_MAX][MAX_OV_OPERANDS] =
++  {
++#undef DEF_TYPE
++#undef DEF_POINTER_TYPE
++#undef DEF_DISTINCT_TYPE
++#undef DEF_VECTOR_TYPE
++#undef DEF_OPAQUE_VECTOR_TYPE
++#undef DEF_FN_TYPE
++#undef DEF_OV_TYPE
++#define DEF_TYPE(...)
++#define DEF_POINTER_TYPE(...)
++#define DEF_DISTINCT_TYPE(...)
++#define DEF_VECTOR_TYPE(...)
++#define DEF_OPAQUE_VECTOR_TYPE(...)
++#define DEF_FN_TYPE(...)
++#define DEF_OV_TYPE(INDEX, args...) { args },
++#include "s390-builtin-types.def"
++  };
++
++static const enum s390_builtins
++bt_for_overloaded_builtin_var[S390_OVERLOADED_BUILTIN_VAR_MAX] = {
++#undef B_DEF
++#undef OB_DEF
++#undef OB_DEF_VAR
++#define B_DEF(...)
++#define OB_DEF(...)
++#define OB_DEF_VAR(NAME, BT, ...) S390_BUILTIN_##BT,
++
++#include "s390-builtins.def"
++  };
++
++/* In addition to calling fold_convert for EXPR of type TYPE, also
++   call c_fully_fold to remove any C_MAYBE_CONST_EXPRs that could be
++   hiding there (PR47197).  */
++tree
++fully_fold_convert (tree type, tree expr)
++{
++  tree result = fold_convert (type, expr);
++  bool maybe_const = true;
++
++  if (!c_dialect_cxx ())
++    result = c_fully_fold (result, false, &maybe_const);
++
++  return result;
++}
++
++/* Unify the different variants to the same nodes in order to keep the
++   code working with it simple.  */
++static cpp_hashnode *
++s390_categorize_keyword (const cpp_token *tok)
++{
++  if (tok->type == CPP_NAME)
++    {
++      cpp_hashnode *ident = tok->val.node.node;
++
++      if (ident == C_CPP_HASHNODE (vector_keyword))
++	return C_CPP_HASHNODE (__vector_keyword);
++
++      if (ident == C_CPP_HASHNODE (bool_keyword))
++	return C_CPP_HASHNODE (__bool_keyword);
++
++      if (ident == C_CPP_HASHNODE (_Bool_keyword))
++	return C_CPP_HASHNODE (__bool_keyword);
++      return ident;
++    }
++
++  return 0;
++}
++
++
++/* Called to decide whether a conditional macro should be expanded.
++   Since we have exactly one such macro (i.e, 'vector'), we do not
++   need to examine the 'tok' parameter.  */
++
++static cpp_hashnode *
++s390_macro_to_expand (cpp_reader *pfile, const cpp_token *tok)
++{
++  cpp_hashnode *expand_this = tok->val.node.node;
++  cpp_hashnode *ident;
++  static bool expand_bool_p = false;
++  int idx = 0;
++  enum rid rid_code;
++
++  /* The vector keyword is only expanded if the machine actually
++     provides hardware support.  */
++  if (!TARGET_ZVECTOR)
++    return NULL;
++
++  ident = s390_categorize_keyword (tok);
++
++  /* Triggered when we picked a different variant in
++     s390_categorize_keyword.  */
++  if (ident != expand_this)
++    expand_this = NULL;
++
++  /* The vector keyword has been found already and we remembered to
++     expand the next bool.  */
++  if (expand_bool_p && ident == C_CPP_HASHNODE (__bool_keyword))
++    {
++      expand_bool_p = false;
++      return ident;
++    }
++
++  if (ident != C_CPP_HASHNODE (__vector_keyword))
++    return expand_this;
++
++  do
++    tok = cpp_peek_token (pfile, idx++);
++  while (tok->type == CPP_PADDING);
++  ident = s390_categorize_keyword (tok);
++
++  if (!ident)
++    return expand_this;
++
++  /* vector bool - remember to expand the next bool. */
++  if (ident == C_CPP_HASHNODE (__bool_keyword))
++    {
++      expand_bool_p = true;
++      return C_CPP_HASHNODE (__vector_keyword);
++    }
++
++  /* The boost libraries have code with Iterator::vector vector in it.
++     If we allow the normal handling, this module will be called
++     recursively, and the vector will be skipped.; */
++  if (ident == C_CPP_HASHNODE (__vector_keyword))
++    return expand_this;
++
++  rid_code = (enum rid)(ident->rid_code);
++
++  if (ident->type == NT_MACRO)
++    {
++      /* Now actually fetch the tokens we "peeked" before and do a
++	 lookahead for the next.  */
++      do
++	(void) cpp_get_token (pfile);
++      while (--idx > 0);
++      do
++	tok = cpp_peek_token (pfile, idx++);
++      while (tok->type == CPP_PADDING);
++      ident = s390_categorize_keyword (tok);
++
++      if (ident == C_CPP_HASHNODE (__bool_keyword))
++	{
++	  expand_bool_p = true;
++	  return C_CPP_HASHNODE (__vector_keyword);
++	}
++      else if (ident)
++	rid_code = (enum rid)(ident->rid_code);
++    }
++
++  /* vector keyword followed by type identifier: vector unsigned,
++     vector long, ...
++     Types consisting of more than one identifier are not supported by
++     zvector e.g. long long, long double, unsigned long int.  */
++  if (rid_code == RID_UNSIGNED || rid_code == RID_LONG
++      || rid_code == RID_SHORT || rid_code == RID_SIGNED
++      || rid_code == RID_INT || rid_code == RID_CHAR
++      || rid_code == RID_DOUBLE)
++    {
++      expand_this = C_CPP_HASHNODE (__vector_keyword);
++      /* If the next keyword is bool, it will need to be expanded as
++	 well.  */
++      do
++	tok = cpp_peek_token (pfile, idx++);
++      while (tok->type == CPP_PADDING);
++      ident = s390_categorize_keyword (tok);
++
++      /* __vector long __bool a; */
++      if (ident == C_CPP_HASHNODE (__bool_keyword))
++	expand_bool_p = true;
++      else
++	{
++	  /* Triggered with: __vector long long __bool a; */
++	  do
++	    tok = cpp_peek_token (pfile, idx++);
++	  while (tok->type == CPP_PADDING);
++	  ident = s390_categorize_keyword (tok);
++
++	  if (ident == C_CPP_HASHNODE (__bool_keyword))
++	    expand_bool_p = true;
++	}
++    }
++
++  return expand_this;
++}
++
++/* Define platform dependent macros.  */
++void
++s390_cpu_cpp_builtins (cpp_reader *pfile)
++{
++  cpp_assert (pfile, "cpu=s390");
++  cpp_assert (pfile, "machine=s390");
++  cpp_define (pfile, "__s390__");
++  if (TARGET_ZARCH)
++    cpp_define (pfile, "__zarch__");
++  if (TARGET_64BIT)
++    cpp_define (pfile, "__s390x__");
++  if (TARGET_LONG_DOUBLE_128)
++    cpp_define (pfile, "__LONG_DOUBLE_128__");
++  if (TARGET_HTM)
++    cpp_define (pfile, "__HTM__");
++  if (TARGET_ZVECTOR)
++    {
++      cpp_define (pfile, "__VEC__=10301");
++      cpp_define (pfile, "__vector=__attribute__((vector_size(16)))");
++      cpp_define (pfile, "__bool=__attribute__((s390_vector_bool)) unsigned");
++
++      if (!flag_iso)
++	{
++	  cpp_define (pfile, "__VECTOR_KEYWORD_SUPPORTED__");
++	  cpp_define (pfile, "vector=vector");
++	  cpp_define (pfile, "bool=bool");
++
++	  __vector_keyword = get_identifier ("__vector");
++	  C_CPP_HASHNODE (__vector_keyword)->flags |= NODE_CONDITIONAL;
++
++	  vector_keyword = get_identifier ("vector");
++	  C_CPP_HASHNODE (vector_keyword)->flags |= NODE_CONDITIONAL;
++
++	  __bool_keyword = get_identifier ("__bool");
++	  C_CPP_HASHNODE (__bool_keyword)->flags |= NODE_CONDITIONAL;
++
++	  bool_keyword = get_identifier ("bool");
++	  C_CPP_HASHNODE (bool_keyword)->flags |= NODE_CONDITIONAL;
++
++	  _Bool_keyword = get_identifier ("_Bool");
++	  C_CPP_HASHNODE (_Bool_keyword)->flags |= NODE_CONDITIONAL;
++
++	  /* Enable context-sensitive macros.  */
++	  cpp_get_callbacks (pfile)->macro_to_expand = s390_macro_to_expand;
++	}
++    }
++}
++
++/* Expand builtins which can directly be mapped to tree expressions.
++   LOC - location information
++   FCODE - function code of the builtin
++   ARGLIST - value supposed to be passed as arguments
++   RETURN-TYPE - expected return type of the builtin */
++static tree
++s390_expand_overloaded_builtin (location_t loc,
++				unsigned fcode,
++				vec<tree, va_gc> *arglist,
++				tree return_type)
++{
++  switch (fcode)
++    {
++    case S390_OVERLOADED_BUILTIN_s390_vec_step:
++      if (TREE_CODE (TREE_TYPE ((*arglist)[0])) != VECTOR_TYPE)
++	{
++	  error_at (loc, "Builtin vec_step can only be used on vector types.");
++	  return error_mark_node;
++	}
++      return build_int_cst (NULL_TREE,
++			    TYPE_VECTOR_SUBPARTS (TREE_TYPE ((*arglist)[0])));
++    case S390_OVERLOADED_BUILTIN_s390_vec_xld2:
++    case S390_OVERLOADED_BUILTIN_s390_vec_xlw4:
++      return build2 (MEM_REF, return_type,
++		     fold_build_pointer_plus ((*arglist)[1], (*arglist)[0]),
++		     build_int_cst (TREE_TYPE ((*arglist)[1]), 0));
++    case S390_OVERLOADED_BUILTIN_s390_vec_xstd2:
++    case S390_OVERLOADED_BUILTIN_s390_vec_xstw4:
++      return build2 (MODIFY_EXPR, TREE_TYPE((*arglist)[0]),
++		     build1 (INDIRECT_REF, TREE_TYPE((*arglist)[0]),
++			     fold_build_pointer_plus ((*arglist)[2], (*arglist)[1])),
++		     (*arglist)[0]);
++    case S390_OVERLOADED_BUILTIN_s390_vec_load_pair:
++      {
++	vec<constructor_elt, va_gc> *v;
++	constructor_elt elt1 = { NULL_TREE , (*arglist)[0] };
++	constructor_elt elt2 = { NULL_TREE , (*arglist)[1] };
++
++	vec_alloc (v, 2);
++	v->quick_push (elt1);
++	v->quick_push (elt2);
++	return build_constructor (return_type, v);
++      }
++    default:
++      gcc_unreachable ();
++    }
++}
++
++/* invert result */
++#define __VSTRING_FLAG_IN         8
++/* result type */
++#define __VSTRING_FLAG_RT         4
++/* zero search */
++#define __VSTRING_FLAG_ZS         2
++/* set condition code */
++#define __VSTRING_FLAG_CS         1
++
++/* Return the flags value to be used for string low-level builtins
++   when expanded from overloaded builtin OB_FCODE.  */
++static unsigned int
++s390_get_vstring_flags (int ob_fcode)
++{
++  unsigned int flags = 0;
++
++  switch (ob_fcode)
++    {
++    case S390_OVERLOADED_BUILTIN_s390_vec_find_any_ne_idx:
++    case S390_OVERLOADED_BUILTIN_s390_vec_find_any_ne_or_0_idx:
++    case S390_OVERLOADED_BUILTIN_s390_vec_find_any_ne:
++    case S390_OVERLOADED_BUILTIN_s390_vec_find_any_ne_idx_cc:
++    case S390_OVERLOADED_BUILTIN_s390_vec_find_any_ne_or_0_idx_cc:
++    case S390_OVERLOADED_BUILTIN_s390_vec_find_any_ne_cc:
++    case S390_OVERLOADED_BUILTIN_s390_vec_cmpnrg_idx:
++    case S390_OVERLOADED_BUILTIN_s390_vec_cmpnrg_or_0_idx:
++    case S390_OVERLOADED_BUILTIN_s390_vec_cmpnrg:
++    case S390_OVERLOADED_BUILTIN_s390_vec_cmpnrg_idx_cc:
++    case S390_OVERLOADED_BUILTIN_s390_vec_cmpnrg_or_0_idx_cc:
++    case S390_OVERLOADED_BUILTIN_s390_vec_cmpnrg_cc:
++      flags |= __VSTRING_FLAG_IN;
++      break;
++    default:
++      break;
++    }
++  switch (ob_fcode)
++    {
++
++    case S390_OVERLOADED_BUILTIN_s390_vec_find_any_eq:
++    case S390_OVERLOADED_BUILTIN_s390_vec_find_any_ne:
++    case S390_OVERLOADED_BUILTIN_s390_vec_find_any_eq_cc:
++    case S390_OVERLOADED_BUILTIN_s390_vec_find_any_ne_cc:
++    case S390_OVERLOADED_BUILTIN_s390_vec_cmprg:
++    case S390_OVERLOADED_BUILTIN_s390_vec_cmpnrg:
++    case S390_OVERLOADED_BUILTIN_s390_vec_cmprg_cc:
++    case S390_OVERLOADED_BUILTIN_s390_vec_cmpnrg_cc:
++      flags |= __VSTRING_FLAG_RT;
++      break;
++    default:
++      break;
++    }
++  switch (ob_fcode)
++    {
++
++    case S390_OVERLOADED_BUILTIN_s390_vec_find_any_eq_or_0_idx:
++    case S390_OVERLOADED_BUILTIN_s390_vec_find_any_ne_or_0_idx:
++    case S390_OVERLOADED_BUILTIN_s390_vec_find_any_eq_or_0_idx_cc:
++    case S390_OVERLOADED_BUILTIN_s390_vec_find_any_ne_or_0_idx_cc:
++    case S390_OVERLOADED_BUILTIN_s390_vec_cmprg_or_0_idx:
++    case S390_OVERLOADED_BUILTIN_s390_vec_cmpnrg_or_0_idx:
++    case S390_OVERLOADED_BUILTIN_s390_vec_cmprg_or_0_idx_cc:
++    case S390_OVERLOADED_BUILTIN_s390_vec_cmpnrg_or_0_idx_cc:
++      flags |= __VSTRING_FLAG_ZS;
++      break;
++    default:
++      break;
++    }
++  switch (ob_fcode)
++    {
++    case S390_OVERLOADED_BUILTIN_s390_vec_find_any_eq_idx_cc:
++    case S390_OVERLOADED_BUILTIN_s390_vec_find_any_ne_idx_cc:
++    case S390_OVERLOADED_BUILTIN_s390_vec_find_any_eq_or_0_idx_cc:
++    case S390_OVERLOADED_BUILTIN_s390_vec_find_any_ne_or_0_idx_cc:
++    case S390_OVERLOADED_BUILTIN_s390_vec_find_any_eq_cc:
++    case S390_OVERLOADED_BUILTIN_s390_vec_find_any_ne_cc:
++    case S390_OVERLOADED_BUILTIN_s390_vec_cmprg_idx_cc:
++    case S390_OVERLOADED_BUILTIN_s390_vec_cmpnrg_idx_cc:
++    case S390_OVERLOADED_BUILTIN_s390_vec_cmprg_or_0_idx_cc:
++    case S390_OVERLOADED_BUILTIN_s390_vec_cmpnrg_or_0_idx_cc:
++    case S390_OVERLOADED_BUILTIN_s390_vec_cmprg_cc:
++    case S390_OVERLOADED_BUILTIN_s390_vec_cmpnrg_cc:
++      flags |= __VSTRING_FLAG_CS;
++      break;
++    default:
++      break;
++    }
++  return flags;
++}
++#undef __VSTRING_FLAG_IN
++#undef __VSTRING_FLAG_RT
++#undef __VSTRING_FLAG_ZS
++#undef __VSTRING_FLAG_CS
++
++/* For several overloaded builtins the argument lists do not match
++   exactly the signature of a low-level builtin.  This function
++   adjusts the argument list ARGLIST for the overloaded builtin
++   OB_FCODE to the signature of the low-level builtin given by
++   DECL.  */
++static void
++s390_adjust_builtin_arglist (unsigned int ob_fcode, tree decl,
++			     vec<tree, va_gc> **arglist)
++{
++  tree arg_chain;
++  int src_arg_index, dest_arg_index;
++  vec<tree, va_gc> *folded_args = NULL;
++
++  /* We at most add one more operand to the list.  */
++  vec_alloc (folded_args, (*arglist)->allocated () + 1);
++  for (arg_chain = TYPE_ARG_TYPES (TREE_TYPE (decl)),
++	 src_arg_index = 0, dest_arg_index = 0;
++       !VOID_TYPE_P (TREE_VALUE (arg_chain));
++       arg_chain = TREE_CHAIN (arg_chain), dest_arg_index++)
++    {
++      bool arg_assigned_p = false;
++      switch (ob_fcode)
++	{
++	  /* For all these the low level builtin needs an additional flags parameter.  */
++	case S390_OVERLOADED_BUILTIN_s390_vec_find_any_eq_idx:
++	case S390_OVERLOADED_BUILTIN_s390_vec_find_any_ne_idx:
++	case S390_OVERLOADED_BUILTIN_s390_vec_find_any_eq_or_0_idx:
++	case S390_OVERLOADED_BUILTIN_s390_vec_find_any_ne_or_0_idx:
++	case S390_OVERLOADED_BUILTIN_s390_vec_find_any_eq:
++	case S390_OVERLOADED_BUILTIN_s390_vec_find_any_ne:
++	case S390_OVERLOADED_BUILTIN_s390_vec_find_any_eq_idx_cc:
++	case S390_OVERLOADED_BUILTIN_s390_vec_find_any_ne_idx_cc:
++	case S390_OVERLOADED_BUILTIN_s390_vec_find_any_eq_or_0_idx_cc:
++	case S390_OVERLOADED_BUILTIN_s390_vec_find_any_ne_or_0_idx_cc:
++	case S390_OVERLOADED_BUILTIN_s390_vec_find_any_eq_cc:
++	case S390_OVERLOADED_BUILTIN_s390_vec_find_any_ne_cc:
++	  if (dest_arg_index == 2)
++	    {
++	      folded_args->quick_push (build_int_cst (integer_type_node,
++				       s390_get_vstring_flags (ob_fcode)));
++	      arg_assigned_p = true;
++	    }
++	  break;
++	case S390_OVERLOADED_BUILTIN_s390_vec_cmprg_idx:
++	case S390_OVERLOADED_BUILTIN_s390_vec_cmpnrg_idx:
++	case S390_OVERLOADED_BUILTIN_s390_vec_cmprg_or_0_idx:
++	case S390_OVERLOADED_BUILTIN_s390_vec_cmpnrg_or_0_idx:
++	case S390_OVERLOADED_BUILTIN_s390_vec_cmprg:
++	case S390_OVERLOADED_BUILTIN_s390_vec_cmpnrg:
++	case S390_OVERLOADED_BUILTIN_s390_vec_cmprg_idx_cc:
++	case S390_OVERLOADED_BUILTIN_s390_vec_cmpnrg_idx_cc:
++	case S390_OVERLOADED_BUILTIN_s390_vec_cmprg_or_0_idx_cc:
++	case S390_OVERLOADED_BUILTIN_s390_vec_cmpnrg_or_0_idx_cc:
++	case S390_OVERLOADED_BUILTIN_s390_vec_cmprg_cc:
++	case S390_OVERLOADED_BUILTIN_s390_vec_cmpnrg_cc:
++	  if (dest_arg_index == 3)
++	    {
++	      folded_args->quick_push (build_int_cst (integer_type_node,
++				       s390_get_vstring_flags (ob_fcode)));
++	      arg_assigned_p = true;
++	    }
++	  break;
++	case S390_OVERLOADED_BUILTIN_s390_vec_sel:
++	case S390_OVERLOADED_BUILTIN_s390_vec_insert:
++	case S390_OVERLOADED_BUILTIN_s390_vec_load_len:
++	  /* Swap the first to arguments. It is better to do it here
++	     instead of the header file to avoid operand checking
++	     throwing error messages for a weird operand index.  */
++	  if (dest_arg_index < 2)
++	    {
++	      folded_args->quick_push (fully_fold_convert (TREE_VALUE (arg_chain),
++					 (**arglist)[1 - dest_arg_index]));
++	      src_arg_index++;
++	      arg_assigned_p = true;
++	    }
++	  break;
++	case S390_OVERLOADED_BUILTIN_s390_vec_store_len:
++	  if (dest_arg_index == 1 || dest_arg_index == 2)
++	    {
++	      folded_args->quick_push (fully_fold_convert (TREE_VALUE (arg_chain),
++					 (**arglist)[3 - dest_arg_index]));
++	      src_arg_index++;
++	      arg_assigned_p = true;
++	    }
++	  break;
++
++	case S390_OVERLOADED_BUILTIN_s390_vec_load_bndry:
++	  {
++	    int code;
++
++	    if (dest_arg_index == 1)
++	      {
++		switch (tree_low_cst ((**arglist)[src_arg_index], 1))
++		  {
++		  case 64: code = 0; break;
++		  case 128: code = 1; break;
++		  case 256: code = 2; break;
++		  case 512: code = 3; break;
++		  case 1024: code = 4; break;
++		  case 2048: code = 5; break;
++		  case 4096: code = 6; break;
++		  default:
++		    error ("valid values for builtin %qF argument %d are 64, "
++			   "128, 256, 512, 1024, 2048, and 4096", decl,
++			   src_arg_index + 1);
++		    return;
++		  }
++		folded_args->quick_push (build_int_cst (integer_type_node,
++							code));
++		src_arg_index++;
++		arg_assigned_p = true;
++	      }
++	  }
++	  break;
++	case S390_OVERLOADED_BUILTIN_s390_vec_rl_mask:
++	  /* Duplicate the first src arg.  */
++	  if (dest_arg_index == 0)
++	    {
++	      folded_args->quick_push (fully_fold_convert (TREE_VALUE (arg_chain),
++					   (**arglist)[src_arg_index]));
++	      arg_assigned_p = true;
++	    }
++	  break;
++	default:
++	  break;
++	}
++      if (!arg_assigned_p)
++	{
++	  folded_args->quick_push (fully_fold_convert (TREE_VALUE (arg_chain),
++						 (**arglist)[src_arg_index]));
++	  src_arg_index++;
++	}
++    }
++  *arglist = folded_args;
++}
++
++/* Check whether the arguments in ARGLIST match the function type
++   DEF_TYPE. Return the number of argument types which required
++   conversion/promotion in order to make it match.
++   0 stands for a perfect match - all operand types match without changes
++   INT_MAX stands for a mismatch.  */
++static int
++s390_fn_types_compatible (enum s390_builtin_ov_type_index typeindex,
++			  vec<tree, va_gc> *arglist)
++{
++  unsigned int i;
++  int match_type = 0;
++
++  for (i = 0; i < vec_safe_length (arglist); i++)
++    {
++      tree b_arg_type = s390_builtin_types[s390_builtin_ov_types[typeindex][i + 1]];
++      tree in_arg = (*arglist)[i];
++      tree in_type = TREE_TYPE (in_arg);
++
++      if (TREE_CODE (b_arg_type) == VECTOR_TYPE)
++	{
++	  /* Vector types have to match precisely.  */
++	  if (b_arg_type != in_type
++	      && TYPE_MAIN_VARIANT (b_arg_type) != TYPE_MAIN_VARIANT (in_type))
++	    goto mismatch;
++	}
++
++      if (lang_hooks.types_compatible_p (in_type, b_arg_type))
++	continue;
++
++      if (lang_hooks.types_compatible_p (
++	    lang_hooks.types.type_promotes_to (in_type),
++	    lang_hooks.types.type_promotes_to (b_arg_type)))
++	{
++	  match_type++;
++	  continue;
++	}
++
++      /* In this stage the C++ frontend would go ahead trying to find
++	 implicit conversion chains for the argument to match the
++	 target type.  We will mimic this here only for our limited
++	 subset of argument types.  */
++      if (TREE_CODE (b_arg_type) == INTEGER_TYPE
++	  && TREE_CODE (in_type) == INTEGER_TYPE)
++	{
++	  match_type++;
++	  continue;
++	}
++
++      /* If the incoming pointer argument has more qualifiers than the
++	 argument type it can still be an imperfect match.  */
++      if (POINTER_TYPE_P (b_arg_type) && POINTER_TYPE_P (in_type)
++	  && !(TYPE_QUALS (TREE_TYPE (in_type))
++	       & ~TYPE_QUALS (TREE_TYPE (b_arg_type)))
++	  && (TYPE_QUALS (TREE_TYPE (b_arg_type))
++	      & ~TYPE_QUALS (TREE_TYPE (in_type))))
++	{
++	  tree qual_in_type =
++	    build_qualified_type (TREE_TYPE (in_type),
++				  TYPE_QUALS (TREE_TYPE (b_arg_type)));
++
++	  if (lang_hooks.types_compatible_p (qual_in_type,
++					     TREE_TYPE (b_arg_type)))
++	    {
++	      match_type++;
++	      continue;
++	    }
++	}
++
++    mismatch:
++      if (TARGET_DEBUG_ARG)
++	fprintf (stderr, " mismatch in operand: %d\n", i + 1);
++      return INT_MAX;
++    }
++
++  return match_type;
++}
++
++/* Return the number of elements in the vector arguments of FNDECL in
++   case all it matches for all vector arguments, -1 otherwise.  */
++static int
++s390_vec_n_elem (tree fndecl)
++{
++  tree b_arg_chain;
++  int n_elem = -1;
++
++  if (TREE_CODE (TREE_TYPE (TREE_TYPE (fndecl))) == VECTOR_TYPE)
++    n_elem = TYPE_VECTOR_SUBPARTS (TREE_TYPE (TREE_TYPE ((fndecl))));
++
++  for (b_arg_chain = TYPE_ARG_TYPES (TREE_TYPE (fndecl));
++       !VOID_TYPE_P (TREE_VALUE (b_arg_chain));
++       b_arg_chain = TREE_CHAIN (b_arg_chain))
++    {
++      int tmp_n_elem;
++      if (TREE_CODE (TREE_VALUE (b_arg_chain)) != VECTOR_TYPE)
++	continue;
++      tmp_n_elem = TYPE_VECTOR_SUBPARTS (TREE_VALUE (b_arg_chain));
++      if (n_elem != -1 && n_elem != tmp_n_elem)
++	return -1;
++      n_elem = tmp_n_elem;
++    }
++  return n_elem;
++}
++
++
++/* Return a tree expression for a call to the overloaded builtin
++   function OB_FNDECL at LOC with arguments PASSED_ARGLIST.  */
++tree
++s390_resolve_overloaded_builtin (location_t loc,
++				 tree ob_fndecl,
++				 void *passed_arglist)
++{
++  vec<tree, va_gc> *arglist = static_cast<vec<tree, va_gc> *> (passed_arglist);
++  unsigned int in_args_num = vec_safe_length (arglist);
++  unsigned int ob_args_num = 0;
++  unsigned int ob_fcode = DECL_FUNCTION_CODE (ob_fndecl);
++  enum s390_overloaded_builtin_vars bindex;
++  unsigned int i;
++  int last_match_type = INT_MAX;
++  int last_match_index = -1;
++  unsigned int all_op_flags;
++  int num_matches = 0;
++  tree target_builtin_decl, b_arg_chain, return_type;
++  enum s390_builtin_ov_type_index last_match_fntype_index;
++
++  if (TARGET_DEBUG_ARG)
++    fprintf (stderr,
++      "s390_resolve_overloaded_builtin, code = %4d, %s - %s overloaded\n",
++      (int)ob_fcode, IDENTIFIER_POINTER (DECL_NAME (ob_fndecl)),
++     ob_fcode < S390_BUILTIN_MAX ? "not" : "");
++
++  /* 0...S390_BUILTIN_MAX-1 is for non-overloaded builtins.  */
++  if (ob_fcode < S390_BUILTIN_MAX)
++    {
++      if (bflags_for_builtin(ob_fcode) & B_INT)
++	{
++	  error_at (loc,
++		    "Builtin %qF is for GCC internal use only.",
++		    ob_fndecl);
++	  return error_mark_node;
++	}
++      return NULL_TREE;
++    }
++
++  ob_fcode -= S390_BUILTIN_MAX;
++
++  for (b_arg_chain = TYPE_ARG_TYPES (TREE_TYPE (ob_fndecl));
++       !VOID_TYPE_P (TREE_VALUE (b_arg_chain));
++       b_arg_chain = TREE_CHAIN (b_arg_chain))
++    ob_args_num++;
++
++  if (ob_args_num != in_args_num)
++    {
++      error_at (loc,
++		"Mismatch in number of arguments for builtin %qF. "
++		"Expected: %d got %d", ob_fndecl,
++		ob_args_num, in_args_num);
++      return error_mark_node;
++    }
++
++  for (i = 0; i < in_args_num; i++)
++    if ((*arglist)[i] == error_mark_node)
++      return error_mark_node;
++
++  /* Overloaded builtins without any variants are directly expanded here.  */
++  if (desc_start_for_overloaded_builtin[ob_fcode] ==
++      S390_OVERLOADED_BUILTIN_VAR_MAX)
++    return s390_expand_overloaded_builtin (loc, ob_fcode, arglist, NULL_TREE);
++
++  for (bindex = desc_start_for_overloaded_builtin[ob_fcode];
++       bindex <= desc_end_for_overloaded_builtin[ob_fcode];
++       bindex = (enum s390_overloaded_builtin_vars)((int)bindex + 1))
++  {
++    int match_type;
++    enum s390_builtin_ov_type_index type_index =
++      type_for_overloaded_builtin_var[bindex];
++
++    if (TARGET_DEBUG_ARG)
++      fprintf (stderr, "checking variant number: %d", (int)bindex);
++
++    match_type = s390_fn_types_compatible (type_index, arglist);
++
++    if (match_type == INT_MAX)
++      continue;
++
++    if (TARGET_DEBUG_ARG)
++      fprintf (stderr,
++	       " %s match score: %d\n", match_type == 0 ? "perfect" : "imperfect",
++	       match_type);
++
++    if (match_type < last_match_type)
++      {
++	num_matches = 1;
++	last_match_type = match_type;
++	last_match_fntype_index = type_index;
++	last_match_index = bindex;
++      }
++    else if (match_type == last_match_type)
++      num_matches++;
++  }
++
++  if (last_match_type == INT_MAX)
++    {
++      error_at (loc, "invalid parameter combination for intrinsic %qs",
++		IDENTIFIER_POINTER (DECL_NAME (ob_fndecl)));
++      return error_mark_node;
++    }
++  else if (num_matches > 1)
++    {
++      error_at (loc, "ambiguous overload for intrinsic %qs",
++		IDENTIFIER_POINTER (DECL_NAME (ob_fndecl)));
++      return error_mark_node;
++    }
++
++  if (bt_for_overloaded_builtin_var[last_match_index] == S390_BUILTIN_MAX)
++    target_builtin_decl = ob_fndecl;
++  else
++    target_builtin_decl = s390_builtin_decls[bt_for_overloaded_builtin_var[last_match_index]];
++
++  all_op_flags = opflags_overloaded_builtin_var[last_match_index];
++  return_type = s390_builtin_types[s390_builtin_ov_types[last_match_fntype_index][0]];
++
++  /* Check for the operand flags in the overloaded builtin variant.  */
++  for (i = 0; i < ob_args_num; i++)
++    {
++      unsigned int op_flags = all_op_flags & ((1 << O_SHIFT) - 1);
++      tree arg = (*arglist)[i];
++      tree type = s390_builtin_types[s390_builtin_ov_types[last_match_fntype_index][i + 1]];
++
++      all_op_flags = all_op_flags >> O_SHIFT;
++
++      if (op_flags == O_ELEM)
++	{
++	  int n_elem = s390_vec_n_elem (target_builtin_decl);
++	  gcc_assert (n_elem > 0);
++	  gcc_assert (type == integer_type_node);
++	  (*arglist)[i] = build2 (BIT_AND_EXPR, integer_type_node,
++				  fold_convert (integer_type_node, arg),
++				  build_int_cst (NULL_TREE, n_elem - 1));
++	}
++
++      if (TREE_CODE (arg) != INTEGER_CST || !O_IMM_P (op_flags))
++	continue;
++
++      if ((TYPE_UNSIGNED (type)
++	   && !int_fits_type_p (arg, c_common_unsigned_type (type)))
++	  || (!TYPE_UNSIGNED (type)
++	      && !int_fits_type_p (arg, c_common_signed_type (type))))
++	{
++	  error("constant argument %d for builtin %qF is out "
++		"of range for target type",
++		i + 1, target_builtin_decl);
++	  return error_mark_node;
++	}
++
++      if (TREE_CODE (arg) == INTEGER_CST
++	  && !s390_const_operand_ok (arg, i + 1, op_flags, target_builtin_decl))
++	return error_mark_node;
++    }
++
++  /* Handle builtins we expand directly - without mapping it to a low
++     level builtin.  */
++  if (bt_for_overloaded_builtin_var[last_match_index] == S390_BUILTIN_MAX)
++    return s390_expand_overloaded_builtin (loc, ob_fcode, arglist, return_type);
++
++  s390_adjust_builtin_arglist (ob_fcode, target_builtin_decl, &arglist);
++
++  if (VOID_TYPE_P (return_type))
++    return build_function_call_vec (loc, target_builtin_decl,
++				    arglist, NULL);
++  else
++    return fully_fold_convert (return_type,
++			       build_function_call_vec (loc, target_builtin_decl,
++							arglist, NULL));
++}
++
++/* This is used to define the REGISTER_TARGET_PRAGMAS macro in s390.h.  */
++void
++s390_register_target_pragmas (void)
++{
++  targetm.resolve_overloaded_builtin = s390_resolve_overloaded_builtin;
++}
+--- gcc/config/s390/s390.h	2016-05-11 14:46:08.219982746 +0200
++++ gcc/config/s390/s390.h	2016-05-11 17:12:39.000000000 +0200
+@@ -35,7 +35,9 @@ enum processor_flags
+   PF_Z10 = 32,
+   PF_Z196 = 64,
+   PF_ZEC12 = 128,
+-  PF_TX = 256
++  PF_TX = 256,
++  PF_Z13 = 512,
++  PF_VX = 1024
+ };
+ 
+ /* This is necessary to avoid a warning about comparing different enum
+@@ -64,6 +66,10 @@ enum processor_flags
+  	(s390_arch_flags & PF_ZEC12)
+ #define TARGET_CPU_HTM \
+  	(s390_arch_flags & PF_TX)
++#define TARGET_CPU_Z13 \
++        (s390_arch_flags & PF_Z13)
++#define TARGET_CPU_VX \
++        (s390_arch_flags & PF_VX)
+ 
+ /* These flags indicate that the generated code should run on a cpu
+    providing the respective hardware facility when run in
+@@ -82,7 +88,15 @@ enum processor_flags
+ #define TARGET_ZEC12 \
+        (TARGET_ZARCH && TARGET_CPU_ZEC12)
+ #define TARGET_HTM (TARGET_OPT_HTM)
+-
++#define TARGET_Z13 \
++       (TARGET_ZARCH && TARGET_CPU_Z13)
++#define TARGET_VX \
++       (TARGET_ZARCH && TARGET_CPU_VX && TARGET_OPT_VX && TARGET_HARD_FLOAT)
++
++/* Use the ABI introduced with IBM z13:
++   - pass vector arguments <= 16 bytes in VRs
++   - align *all* vector types to 8 bytes  */
++#define TARGET_VX_ABI TARGET_VX
+ 
+ #define TARGET_AVOID_CMP_AND_BRANCH (s390_tune == PROCESSOR_2817_Z196)
+ 
+@@ -97,25 +111,11 @@ enum processor_flags
+ #define TARGET_TPF 0
+ 
+ /* Target CPU builtins.  */
+-#define TARGET_CPU_CPP_BUILTINS()					\
+-  do									\
+-    {									\
+-      builtin_assert ("cpu=s390");					\
+-      builtin_assert ("machine=s390");					\
+-      builtin_define ("__s390__");					\
+-      if (TARGET_ZARCH)							\
+-	builtin_define ("__zarch__");					\
+-      if (TARGET_64BIT)							\
+-        builtin_define ("__s390x__");					\
+-      if (TARGET_LONG_DOUBLE_128)					\
+-        builtin_define ("__LONG_DOUBLE_128__");				\
+-      if (TARGET_HTM)							\
+-	builtin_define ("__HTM__");					\
+-    }									\
+-  while (0)
++#define TARGET_CPU_CPP_BUILTINS() s390_cpu_cpp_builtins (pfile)
+ 
+ #ifdef DEFAULT_TARGET_64BIT
+-#define TARGET_DEFAULT             (MASK_64BIT | MASK_ZARCH | MASK_HARD_DFP | MASK_OPT_HTM)
++#define TARGET_DEFAULT     (MASK_64BIT | MASK_ZARCH | MASK_HARD_DFP	\
++                            | MASK_OPT_HTM | MASK_OPT_VX)
+ #else
+ #define TARGET_DEFAULT             0
+ #endif
+@@ -184,6 +184,13 @@ enum processor_flags
+ 
+ #define STACK_SIZE_MODE (Pmode)
+ 
++/* Vector arguments are left-justified when placed on the stack during
++   parameter passing.  */
++#define FUNCTION_ARG_PADDING(MODE, TYPE)			\
++  (s390_function_arg_vector ((MODE), (TYPE))			\
++   ? upward							\
++   : DEFAULT_FUNCTION_ARG_PADDING ((MODE), (TYPE)))
++
+ #ifndef IN_LIBGCC2
+ 
+ /* Width of a word, in units (bytes).  */
+@@ -289,9 +296,11 @@ enum processor_flags
+    Reg 35: Return address pointer
+ 
+    Registers 36 and 37 are mapped to access registers
+-   0 and 1, used to implement thread-local storage.  */
++   0 and 1, used to implement thread-local storage.
++
++   Reg 38-53: Vector registers v16-v31  */
+ 
+-#define FIRST_PSEUDO_REGISTER 38
++#define FIRST_PSEUDO_REGISTER 54
+ 
+ /* Standard register usage.  */
+ #define GENERAL_REGNO_P(N)	((int)(N) >= 0 && (N) < 16)
+@@ -300,6 +309,8 @@ enum processor_flags
+ #define CC_REGNO_P(N)		((N) == 33)
+ #define FRAME_REGNO_P(N)	((N) == 32 || (N) == 34 || (N) == 35)
+ #define ACCESS_REGNO_P(N)	((N) == 36 || (N) == 37)
++#define VECTOR_NOFP_REGNO_P(N)  ((N) >= 38 && (N) <= 53)
++#define VECTOR_REGNO_P(N)       (FP_REGNO_P (N) || VECTOR_NOFP_REGNO_P (N))
+ 
+ #define GENERAL_REG_P(X)	(REG_P (X) && GENERAL_REGNO_P (REGNO (X)))
+ #define ADDR_REG_P(X)		(REG_P (X) && ADDR_REGNO_P (REGNO (X)))
+@@ -307,6 +318,8 @@ enum processor_flags
+ #define CC_REG_P(X)		(REG_P (X) && CC_REGNO_P (REGNO (X)))
+ #define FRAME_REG_P(X)		(REG_P (X) && FRAME_REGNO_P (REGNO (X)))
+ #define ACCESS_REG_P(X)		(REG_P (X) && ACCESS_REGNO_P (REGNO (X)))
++#define VECTOR_NOFP_REG_P(X)    (REG_P (X) && VECTOR_NOFP_REGNO_P (REGNO (X)))
++#define VECTOR_REG_P(X)         (REG_P (X) && VECTOR_REGNO_P (REGNO (X)))
+ 
+ /* Set up fixed registers and calling convention:
+ 
+@@ -321,7 +334,9 @@ enum processor_flags
+ 
+    On 31-bit, FPRs 18-19 are call-clobbered;
+    on 64-bit, FPRs 24-31 are call-clobbered.
+-   The remaining FPRs are call-saved.  */
++   The remaining FPRs are call-saved.
++
++   All non-FP vector registers are call-clobbered v16-v31.  */
+ 
+ #define FIXED_REGISTERS				\
+ { 0, 0, 0, 0, 					\
+@@ -333,7 +348,11 @@ enum processor_flags
+   0, 0, 0, 0, 					\
+   0, 0, 0, 0, 					\
+   1, 1, 1, 1,					\
+-  1, 1 }
++  1, 1,						\
++  0, 0, 0, 0, 					\
++  0, 0, 0, 0, 					\
++  0, 0, 0, 0, 					\
++  0, 0, 0, 0 }
+ 
+ #define CALL_USED_REGISTERS			\
+ { 1, 1, 1, 1, 					\
+@@ -345,26 +364,35 @@ enum processor_flags
+   1, 1, 1, 1, 					\
+   1, 1, 1, 1, 					\
+   1, 1, 1, 1,					\
+-  1, 1 }
++  1, 1,					        \
++  1, 1, 1, 1, 					\
++  1, 1, 1, 1,					\
++  1, 1, 1, 1, 					\
++  1, 1, 1, 1 }
+ 
+ #define CALL_REALLY_USED_REGISTERS		\
+-{ 1, 1, 1, 1, 					\
++{ 1, 1, 1, 1, 	/* r0 - r15 */			\
+   1, 1, 0, 0, 					\
+   0, 0, 0, 0, 					\
+   0, 0, 0, 0,					\
++  1, 1, 1, 1, 	/* f0 (16) - f15 (31) */	\
+   1, 1, 1, 1, 					\
+   1, 1, 1, 1, 					\
+   1, 1, 1, 1, 					\
+-  1, 1, 1, 1, 					\
++  1, 1, 1, 1,	/* arg, cc, fp, ret addr */	\
++  0, 0,		/* a0 (36), a1 (37) */	        \
++  1, 1, 1, 1, 	/* v16 (38) - v23 (45) */	\
+   1, 1, 1, 1,					\
+-  0, 0 }
++  1, 1, 1, 1, 	/* v24 (46) - v31 (53) */	\
++  1, 1, 1, 1 }
+ 
+ /* Preferred register allocation order.  */
+-#define REG_ALLOC_ORDER                                         \
+-{  1, 2, 3, 4, 5, 0, 12, 11, 10, 9, 8, 7, 6, 14, 13,            \
+-   16, 17, 18, 19, 20, 21, 22, 23,                              \
+-   24, 25, 26, 27, 28, 29, 30, 31,                              \
+-   15, 32, 33, 34, 35, 36, 37 }
++#define REG_ALLOC_ORDER							\
++  {  1, 2, 3, 4, 5, 0, 12, 11, 10, 9, 8, 7, 6, 14, 13,			\
++     16, 17, 18, 19, 20, 21, 22, 23,					\
++     24, 25, 26, 27, 28, 29, 30, 31,					\
++     38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 	\
++     15, 32, 33, 34, 35, 36, 37 }
+ 
+ 
+ /* Fitting values into registers.  */
+@@ -404,26 +432,22 @@ enum processor_flags
+    but conforms to the 31-bit ABI, GPRs can hold 8 bytes;
+    the ABI guarantees only that the lower 4 bytes are
+    saved across calls, however.  */
+-#define HARD_REGNO_CALL_PART_CLOBBERED(REGNO, MODE)		\
+-  (!TARGET_64BIT && TARGET_ZARCH				\
+-   && GET_MODE_SIZE (MODE) > 4					\
+-   && (((REGNO) >= 6 && (REGNO) <= 15) || (REGNO) == 32))
++#define HARD_REGNO_CALL_PART_CLOBBERED(REGNO, MODE)			\
++  ((!TARGET_64BIT && TARGET_ZARCH					\
++    && GET_MODE_SIZE (MODE) > 4						\
++    && (((REGNO) >= 6 && (REGNO) <= 15) || (REGNO) == 32))		\
++   || (TARGET_VX							\
++       && GET_MODE_SIZE (MODE) > 8					\
++       && (((TARGET_64BIT && (REGNO) >= 24 && (REGNO) <= 31))		\
++	   || (!TARGET_64BIT && ((REGNO) == 18 || (REGNO) == 19)))))
+ 
+ /* Maximum number of registers to represent a value of mode MODE
+    in a register of class CLASS.  */
+ #define CLASS_MAX_NREGS(CLASS, MODE)   					\
+   s390_class_max_nregs ((CLASS), (MODE))
+ 
+-/* If a 4-byte value is loaded into a FPR, it is placed into the
+-   *upper* half of the register, not the lower.  Therefore, we
+-   cannot use SUBREGs to switch between modes in FP registers.
+-   Likewise for access registers, since they have only half the
+-   word size on 64-bit.  */
+ #define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS)		        \
+-  (GET_MODE_SIZE (FROM) != GET_MODE_SIZE (TO)			        \
+-   ? ((reg_classes_intersect_p (FP_REGS, CLASS)				\
+-       && (GET_MODE_SIZE (FROM) < 8 || GET_MODE_SIZE (TO) < 8))		\
+-      || reg_classes_intersect_p (ACCESS_REGS, CLASS)) : 0)
++  s390_cannot_change_mode_class ((FROM), (TO), (CLASS))
+ 
+ /* Register classes.  */
+ 
+@@ -451,6 +475,7 @@ enum reg_class
+   NO_REGS, CC_REGS, ADDR_REGS, GENERAL_REGS, ACCESS_REGS,
+   ADDR_CC_REGS, GENERAL_CC_REGS,
+   FP_REGS, ADDR_FP_REGS, GENERAL_FP_REGS,
++  VEC_REGS, ADDR_VEC_REGS, GENERAL_VEC_REGS,
+   ALL_REGS, LIM_REG_CLASSES
+ };
+ #define N_REG_CLASSES (int) LIM_REG_CLASSES
+@@ -458,11 +483,13 @@ enum reg_class
+ #define REG_CLASS_NAMES							\
+ { "NO_REGS", "CC_REGS", "ADDR_REGS", "GENERAL_REGS", "ACCESS_REGS",	\
+   "ADDR_CC_REGS", "GENERAL_CC_REGS",					\
+-  "FP_REGS", "ADDR_FP_REGS", "GENERAL_FP_REGS", "ALL_REGS" }
++  "FP_REGS", "ADDR_FP_REGS", "GENERAL_FP_REGS",				\
++  "VEC_REGS", "ADDR_VEC_REGS", "GENERAL_VEC_REGS",			\
++  "ALL_REGS" }
+ 
+ /* Class -> register mapping.  */
+-#define REG_CLASS_CONTENTS \
+-{				       			\
++#define REG_CLASS_CONTENTS				\
++{							\
+   { 0x00000000, 0x00000000 },	/* NO_REGS */		\
+   { 0x00000000, 0x00000002 },	/* CC_REGS */		\
+   { 0x0000fffe, 0x0000000d },	/* ADDR_REGS */		\
+@@ -473,7 +500,10 @@ enum reg_class
+   { 0xffff0000, 0x00000000 },	/* FP_REGS */		\
+   { 0xfffffffe, 0x0000000d },	/* ADDR_FP_REGS */	\
+   { 0xffffffff, 0x0000000d },	/* GENERAL_FP_REGS */	\
+-  { 0xffffffff, 0x0000003f },	/* ALL_REGS */		\
++  { 0xffff0000, 0x003fffc0 },	/* VEC_REGS */		\
++  { 0xfffffffe, 0x003fffcd },	/* ADDR_VEC_REGS */	\
++  { 0xffffffff, 0x003fffcd },	/* GENERAL_VEC_REGS */	\
++  { 0xffffffff, 0x003fffff },	/* ALL_REGS */		\
+ }
+ 
+ /* In some case register allocation order is not enough for IRA to
+@@ -504,14 +534,27 @@ extern const enum reg_class regclass_map
+ #define REGNO_OK_FOR_BASE_P(REGNO) REGNO_OK_FOR_INDEX_P (REGNO)
+ 
+ 
+-/* We need secondary memory to move data between GPRs and FPRs.  With
+-   DFP the ldgr lgdr instructions are available.  But these
+-   instructions do not handle GPR pairs so it is not possible for 31
+-   bit.  */
+-#define SECONDARY_MEMORY_NEEDED(CLASS1, CLASS2, MODE) \
+- ((CLASS1) != (CLASS2)                                \
+-  && ((CLASS1) == FP_REGS || (CLASS2) == FP_REGS)     \
+-  && (!TARGET_DFP || !TARGET_64BIT || GET_MODE_SIZE (MODE) != 8))
++/* We need secondary memory to move data between GPRs and FPRs.
++
++   - With DFP the ldgr lgdr instructions are available.  Due to the
++     different alignment we cannot use them for SFmode.  For 31 bit a
++     64 bit value in GPR would be a register pair so here we still
++     need to go via memory.
++
++   - With z13 we can do the SF/SImode moves with vlgvf.  Due to the
++     overlapping of FPRs and VRs we still disallow TF/TD modes to be
++     in full VRs so as before also on z13 we do these moves via
++     memory.
++
++     FIXME: Should we try splitting it into two vlgvg's/vlvg's instead?  */
++#define SECONDARY_MEMORY_NEEDED(CLASS1, CLASS2, MODE)			\
++  (((reg_classes_intersect_p (CLASS1, VEC_REGS)				\
++     && reg_classes_intersect_p (CLASS2, GENERAL_REGS))			\
++    || (reg_classes_intersect_p (CLASS1, GENERAL_REGS)			\
++	&& reg_classes_intersect_p (CLASS2, VEC_REGS)))			\
++   && (!TARGET_DFP || !TARGET_64BIT || GET_MODE_SIZE (MODE) != 8)	\
++   && (!TARGET_VX || (SCALAR_FLOAT_MODE_P (MODE)			\
++			  && GET_MODE_SIZE (MODE) > 8)))
+ 
+ /* Get_secondary_mem widens its argument to BITS_PER_WORD which loses on 64bit
+    because the movsi and movsf patterns don't handle r/f moves.  */
+@@ -605,6 +648,11 @@ extern const enum reg_class regclass_map
+ /* Let the assembler generate debug line info.  */
+ #define DWARF2_ASM_LINE_DEBUG_INFO 1
+ 
++/* Define the dwarf register mapping.
++   v16-v31 -> 68-83
++   rX      -> X      otherwise  */
++#define DBX_REGISTER_NUMBER(regno)			\
++  ((regno >= 38 && regno <= 53) ? regno + 30 : regno)
+ 
+ /* Frame registers.  */
+ 
+@@ -652,21 +700,29 @@ typedef struct s390_arg_structure
+ {
+   int gprs;			/* gpr so far */
+   int fprs;			/* fpr so far */
++  int vrs;                      /* vr so far */
+ }
+ CUMULATIVE_ARGS;
+ 
+ #define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, NN, N_NAMED_ARGS) \
+-  ((CUM).gprs=0, (CUM).fprs=0)
++  ((CUM).gprs=0, (CUM).fprs=0, (CUM).vrs=0)
++
++#define FIRST_VEC_ARG_REGNO 46
++#define LAST_VEC_ARG_REGNO 53
+ 
+ /* Arguments can be placed in general registers 2 to 6, or in floating
+    point registers 0 and 2 for 31 bit and fprs 0, 2, 4 and 6 for 64
+    bit.  */
+-#define FUNCTION_ARG_REGNO_P(N) (((N) >=2 && (N) <7) || \
+-  (N) == 16 || (N) == 17 || (TARGET_64BIT && ((N) == 18 || (N) == 19)))
++#define FUNCTION_ARG_REGNO_P(N)						\
++  (((N) >=2 && (N) < 7) || (N) == 16 || (N) == 17			\
++   || (TARGET_64BIT && ((N) == 18 || (N) == 19))			\
++   || (TARGET_VX && ((N) >= FIRST_VEC_ARG_REGNO && (N) <= LAST_VEC_ARG_REGNO)))
+ 
+ 
+-/* Only gpr 2 and fpr 0 are ever used as return registers.  */
+-#define FUNCTION_VALUE_REGNO_P(N) ((N) == 2 || (N) == 16)
++/* Only gpr 2, fpr 0, and v24 are ever used as return registers.  */
++#define FUNCTION_VALUE_REGNO_P(N)		\
++  ((N) == 2 || (N) == 16			\
++   || (TARGET_VX && (N) == FIRST_VEC_ARG_REGNO))
+ 
+ 
+ /* Function entry and exit.  */
+@@ -844,12 +900,20 @@ do {									\
+ /* How to refer to registers in assembler output.  This sequence is
+    indexed by compiler's hard-register-number (see above).  */
+ #define REGISTER_NAMES							\
+-{ "%r0",  "%r1",  "%r2",  "%r3",  "%r4",  "%r5",  "%r6",  "%r7",	\
+-  "%r8",  "%r9",  "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",	\
+-  "%f0",  "%f2",  "%f4",  "%f6",  "%f1",  "%f3",  "%f5",  "%f7",	\
+-  "%f8",  "%f10", "%f12", "%f14", "%f9",  "%f11", "%f13", "%f15",	\
+-  "%ap",  "%cc",  "%fp",  "%rp",  "%a0",  "%a1"				\
+-}
++  { "%r0",  "%r1",  "%r2",  "%r3",  "%r4",  "%r5",  "%r6",  "%r7",	\
++    "%r8",  "%r9",  "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",	\
++    "%f0",  "%f2",  "%f4",  "%f6",  "%f1",  "%f3",  "%f5",  "%f7",	\
++    "%f8",  "%f10", "%f12", "%f14", "%f9",  "%f11", "%f13", "%f15",	\
++    "%ap",  "%cc",  "%fp",  "%rp",  "%a0",  "%a1",			\
++    "%v16", "%v18", "%v20", "%v22", "%v17", "%v19", "%v21", "%v23",	\
++    "%v24", "%v26", "%v28", "%v30", "%v25", "%v27", "%v29", "%v31"	\
++  }
++
++#define ADDITIONAL_REGISTER_NAMES					\
++  { { "v0", 16 }, { "v2",  17 }, { "v4",  18 }, { "v6",  19 },		\
++    { "v1", 20 }, { "v3",  21 }, { "v5",  22 }, { "v7",  23 },          \
++    { "v8", 24 }, { "v10", 25 }, { "v12", 26 }, { "v14", 27 },          \
++    { "v9", 28 }, { "v11", 29 }, { "v13", 30 }, { "v15", 31 } };
+ 
+ /* Print operand X (an rtx) in assembler syntax to file FILE.  */
+ #define PRINT_OPERAND(FILE, X, CODE) print_operand (FILE, X, CODE)
+@@ -915,13 +979,31 @@ do {									\
+ #define SYMBOL_REF_NOT_NATURALLY_ALIGNED_P(X) \
+   ((SYMBOL_REF_FLAGS (X) & SYMBOL_FLAG_NOT_NATURALLY_ALIGNED))
+ 
++/* Check whether integer displacement is in range for a short displacement.  */
++#define SHORT_DISP_IN_RANGE(d) ((d) >= 0 && (d) <= 4095)
++
+ /* Check whether integer displacement is in range.  */
+ #define DISP_IN_RANGE(d) \
+   (TARGET_LONG_DISPLACEMENT? ((d) >= -524288 && (d) <= 524287) \
+-                           : ((d) >= 0 && (d) <= 4095))
++                           : SHORT_DISP_IN_RANGE(d))
+ 
+ /* Reads can reuse write prefetches, used by tree-ssa-prefetch-loops.c.  */
+ #define READ_CAN_USE_WRITE_PREFETCH 1
+ 
+ extern const int processor_flags_table[];
+-#endif
++
++/* The truth element value for vector comparisons.  Our instructions
++   always generate -1 in that case.  */
++#define VECTOR_STORE_FLAG_VALUE(MODE) CONSTM1_RTX (GET_MODE_INNER (MODE))
++
++/* Target pragma.  */
++
++/* resolve_overloaded_builtin can not be defined the normal way since
++   it is defined in code which technically belongs to the
++   front-end.  */
++#define REGISTER_TARGET_PRAGMAS()		\
++  do {						\
++    s390_register_target_pragmas ();		\
++  } while (0)
++
++#endif /* S390_H */
+--- gcc/config/s390/s390intrin.h	2013-08-14 13:55:12.000000000 +0200
++++ gcc/config/s390/s390intrin.h	2016-05-11 17:12:39.000000000 +0200
+@@ -29,5 +29,8 @@ along with GCC; see the file COPYING3.
+ #include <htmintrin.h>
+ #endif
+ 
++#ifdef __VEC__
++#include <vecintrin.h>
++#endif
+ 
+ #endif /* _S390INTRIN_H*/
+--- gcc/config/s390/s390.md	2015-06-18 16:33:04.000000000 +0200
++++ gcc/config/s390/s390.md	2016-05-11 19:22:59.245881189 +0200
+@@ -125,7 +125,109 @@
+    UNSPEC_FPINT_CEIL
+    UNSPEC_FPINT_NEARBYINT
+    UNSPEC_FPINT_RINT
+- ])
++
++   UNSPEC_LCBB
++
++   ; Vector
++   UNSPEC_VEC_SMULT_HI
++   UNSPEC_VEC_UMULT_HI
++   UNSPEC_VEC_SMULT_LO
++   UNSPEC_VEC_SMULT_EVEN
++   UNSPEC_VEC_UMULT_EVEN
++   UNSPEC_VEC_SMULT_ODD
++   UNSPEC_VEC_UMULT_ODD
++
++   UNSPEC_VEC_VMAL
++   UNSPEC_VEC_VMAH
++   UNSPEC_VEC_VMALH
++   UNSPEC_VEC_VMAE
++   UNSPEC_VEC_VMALE
++   UNSPEC_VEC_VMAO
++   UNSPEC_VEC_VMALO
++
++   UNSPEC_VEC_GATHER
++   UNSPEC_VEC_EXTRACT
++   UNSPEC_VEC_INSERT_AND_ZERO
++   UNSPEC_VEC_LOAD_BNDRY
++   UNSPEC_VEC_LOAD_LEN
++   UNSPEC_VEC_MERGEH
++   UNSPEC_VEC_MERGEL
++   UNSPEC_VEC_PACK
++   UNSPEC_VEC_PACK_SATURATE
++   UNSPEC_VEC_PACK_SATURATE_CC
++   UNSPEC_VEC_PACK_SATURATE_GENCC
++   UNSPEC_VEC_PACK_UNSIGNED_SATURATE
++   UNSPEC_VEC_PACK_UNSIGNED_SATURATE_CC
++   UNSPEC_VEC_PACK_UNSIGNED_SATURATE_GENCC
++   UNSPEC_VEC_PERM
++   UNSPEC_VEC_PERMI
++   UNSPEC_VEC_EXTEND
++   UNSPEC_VEC_STORE_LEN
++   UNSPEC_VEC_UNPACKH
++   UNSPEC_VEC_UNPACKH_L
++   UNSPEC_VEC_UNPACKL
++   UNSPEC_VEC_UNPACKL_L
++   UNSPEC_VEC_ADDC
++   UNSPEC_VEC_ADDC_U128
++   UNSPEC_VEC_ADDE_U128
++   UNSPEC_VEC_ADDEC_U128
++   UNSPEC_VEC_AVG
++   UNSPEC_VEC_AVGU
++   UNSPEC_VEC_CHECKSUM
++   UNSPEC_VEC_GFMSUM
++   UNSPEC_VEC_GFMSUM_128
++   UNSPEC_VEC_GFMSUM_ACCUM
++   UNSPEC_VEC_GFMSUM_ACCUM_128
++   UNSPEC_VEC_SET
++
++   UNSPEC_VEC_VSUMG
++   UNSPEC_VEC_VSUMQ
++   UNSPEC_VEC_VSUM
++   UNSPEC_VEC_RL_MASK
++   UNSPEC_VEC_SLL
++   UNSPEC_VEC_SLB
++   UNSPEC_VEC_SLDB
++   UNSPEC_VEC_SRAL
++   UNSPEC_VEC_SRAB
++   UNSPEC_VEC_SRL
++   UNSPEC_VEC_SRLB
++
++   UNSPEC_VEC_SUB_U128
++   UNSPEC_VEC_SUBC
++   UNSPEC_VEC_SUBC_U128
++   UNSPEC_VEC_SUBE_U128
++   UNSPEC_VEC_SUBEC_U128
++
++   UNSPEC_VEC_TEST_MASK
++
++   UNSPEC_VEC_VFAE
++   UNSPEC_VEC_VFAECC
++
++   UNSPEC_VEC_VFEE
++   UNSPEC_VEC_VFEECC
++   UNSPEC_VEC_VFENE
++   UNSPEC_VEC_VFENECC
++
++   UNSPEC_VEC_VISTR
++   UNSPEC_VEC_VISTRCC
++
++   UNSPEC_VEC_VSTRC
++   UNSPEC_VEC_VSTRCCC
++
++   UNSPEC_VEC_VCDGB
++   UNSPEC_VEC_VCDLGB
++
++   UNSPEC_VEC_VCGDB
++   UNSPEC_VEC_VCLGDB
++
++   UNSPEC_VEC_VFIDB
++
++   UNSPEC_VEC_VLDEB
++   UNSPEC_VEC_VLEDB
++
++   UNSPEC_VEC_VFTCIDB
++   UNSPEC_VEC_VFTCIDBCC
++])
+ 
+ ;;
+ ;; UNSPEC_VOLATILE usage
+@@ -167,6 +269,10 @@
+    UNSPECV_ETND
+    UNSPECV_NTSTG
+    UNSPECV_PPA
++
++   ; Set and get floating point control register
++   UNSPECV_SFPC
++   UNSPECV_EFPC
+   ])
+ 
+ ;;
+@@ -198,6 +304,11 @@
+    ; Floating point registers.
+    (FPR0_REGNUM                 16)
+    (FPR2_REGNUM                 18)
++   (VR0_REGNUM                  16)
++   (VR16_REGNUM                 38)
++   (VR23_REGNUM                 45)
++   (VR24_REGNUM                 46)
++   (VR31_REGNUM                 53)
+   ])
+ 
+ ;;
+@@ -228,7 +339,7 @@
+ ;; Used to determine defaults for length and other attribute values.
+ 
+ (define_attr "op_type"
+-  "NN,E,RR,RRE,RX,RS,RSI,RI,SI,S,SS,SSE,RXE,RSE,RIL,RIE,RXY,RSY,SIY,RRF,RRR,SIL,RRS,RIS"
++  "NN,E,RR,RRE,RX,RS,RSI,RI,SI,S,SS,SSE,RXE,RSE,RIL,RIE,RXY,RSY,SIY,RRF,RRR,SIL,RRS,RIS,VRI,VRR,VRS,VRV,VRX"
+   (const_string "NN"))
+ 
+ ;; Instruction type attribute used for scheduling.
+@@ -306,10 +417,11 @@
+ ;; distinguish between g5 and g6, but there are differences between the two
+ ;; CPUs could in theory be modeled.
+ 
+-(define_attr "cpu" "g5,g6,z900,z990,z9_109,z9_ec,z10,z196,zEC12"
++(define_attr "cpu" "g5,g6,z900,z990,z9_109,z9_ec,z10,z196,zEC12,z13"
+   (const (symbol_ref "s390_tune_attr")))
+ 
+-(define_attr "cpu_facility" "standard,ieee,zarch,longdisp,extimm,dfp,z10,z196,zEC12"
++(define_attr "cpu_facility"
++  "standard,ieee,zarch,longdisp,extimm,dfp,z10,z196,zEC12,vec"
+   (const_string "standard"))
+ 
+ (define_attr "enabled" ""
+@@ -346,6 +458,10 @@
+ 
+          (and (eq_attr "cpu_facility" "zEC12")
+               (match_test "TARGET_ZEC12"))
++	 (const_int 1)
++
++         (and (eq_attr "cpu_facility" "vec")
++              (match_test "TARGET_VX"))
+ 	 (const_int 1)]
+ 	(const_int 0)))
+ 
+@@ -365,6 +481,9 @@
+ ;; Pipeline description for zEC12
+ (include "2827.md")
+ 
++;; Pipeline description for z13
++(include "2964.md")
++
+ ;; Predicates
+ (include "predicates.md")
+ 
+@@ -376,12 +495,13 @@
+ 
+ ;; Iterators
+ 
++(define_mode_iterator ALL [TI DI SI HI QI TF DF SF TD DD SD V1QI V2QI V4QI V8QI V16QI V1HI V2HI V4HI V8HI V1SI V2SI V4SI V1DI V2DI V1SF V2SF V4SF V1TI V1DF V2DF V1TF])
++
+ ;; These mode iterators allow floating point patterns to be generated from the
+ ;; same template.
+ (define_mode_iterator FP_ALL [TF DF SF (TD "TARGET_HARD_DFP") (DD "TARGET_HARD_DFP")
+                               (SD "TARGET_HARD_DFP")])
+ (define_mode_iterator FP [TF DF SF (TD "TARGET_HARD_DFP") (DD "TARGET_HARD_DFP")])
+-(define_mode_iterator FPALL [TF DF SF TD DD SD])
+ (define_mode_iterator BFP [TF DF SF])
+ (define_mode_iterator DFP [TD DD])
+ (define_mode_iterator DFP_ALL [TD DD SD])
+@@ -417,7 +537,6 @@
+ ;; This mode iterator allows the integer patterns to be defined from the
+ ;; same template.
+ (define_mode_iterator INT [(DI "TARGET_ZARCH") SI HI QI])
+-(define_mode_iterator INTALL [TI DI SI HI QI])
+ (define_mode_iterator DINT [(TI "TARGET_ZARCH") DI SI HI QI])
+ 
+ ;; This iterator allows some 'ashift' and 'lshiftrt' pattern to be defined from
+@@ -476,6 +595,14 @@
+ ;; first and the second operand match for bfp modes.
+ (define_mode_attr f0 [(TF "0") (DF "0") (SF "0") (TD "f") (DD "f") (DD "f")])
+ 
++;; This attribute is used to merge the scalar vector instructions into
++;; the FP patterns.  For non-supported modes (all but DF) it expands
++;; to constraints which are supposed to be matched by an earlier
++;; variant.
++(define_mode_attr v0      [(TF "0") (DF "v") (SF "0") (TD "0") (DD "0") (DD "0") (TI "0") (DI "v") (SI "0")])
++(define_mode_attr vf      [(TF "f") (DF "v") (SF "f") (TD "f") (DD "f") (DD "f") (TI "f") (DI "v") (SI "f")])
++(define_mode_attr vd      [(TF "d") (DF "v") (SF "d") (TD "d") (DD "d") (DD "d") (TI "d") (DI "v") (SI "d")])
++
+ ;; This attribute is used in the operand list of the instruction to have an
+ ;; additional operand for the dfp instructions.
+ (define_mode_attr op1 [(TF "") (DF "") (SF "")
+@@ -584,6 +711,19 @@
+ ;; In place of GET_MODE_BITSIZE (<MODE>mode)
+ (define_mode_attr bitsize [(DI "64") (SI "32") (HI "16") (QI "8")])
+ 
++
++
++; Condition code modes generated by vector fp comparisons.  These will
++; be used also in single element mode.
++(define_mode_iterator VFCMP [CCVEQ CCVFH CCVFHE])
++; Used with VFCMP to expand part of the mnemonic
++; For fp we have a mismatch: eq in the insn name - e in asm
++(define_mode_attr asm_fcmp [(CCVEQ "e") (CCVFH "h") (CCVFHE "he")])
++(define_mode_attr insn_cmp [(CCVEQ "eq") (CCVH "h") (CCVHU "hl") (CCVFH "h") (CCVFHE "he")])
++
++
++(include "vector.md")
++
+ ;;
+ ;;- Compare instructions.
+ ;;
+@@ -1091,6 +1231,15 @@
+    [(set_attr "op_type" "RRE,RXE")
+     (set_attr "type"  "fsimp<mode>")])
+ 
++; wfcedbs, wfchdbs, wfchedbs
++(define_insn "*vec_cmp<insn_cmp>df_cconly"
++  [(set (reg:VFCMP CC_REGNUM)
++	(compare:VFCMP (match_operand:DF 0 "register_operand" "v")
++		       (match_operand:DF 1 "register_operand" "v")))
++   (clobber (match_scratch:V2DI 2 "=v"))]
++  "TARGET_VX && TARGET_HARD_FLOAT"
++  "wfc<asm_fcmp>dbs\t%v2,%v0,%v1"
++  [(set_attr "op_type" "VRR")])
+ 
+ ; Compare and Branch instructions
+ 
+@@ -1216,17 +1365,27 @@
+ ; movti instruction pattern(s).
+ ;
+ 
++; FIXME: More constants are possible by enabling jxx, jyy constraints
++; for TImode (use double-int for the calculations)
+ (define_insn "movti"
+-  [(set (match_operand:TI 0 "nonimmediate_operand" "=d,QS,d,o")
+-        (match_operand:TI 1 "general_operand" "QS,d,dPRT,d"))]
++  [(set (match_operand:TI 0 "nonimmediate_operand" "=d,QS,v,  v,  v,v,d, v,QR,   d,o")
++        (match_operand:TI 1 "general_operand"      "QS, d,v,j00,jm1,d,v,QR, v,dPRT,d"))]
+   "TARGET_ZARCH"
+   "@
+    lmg\t%0,%N0,%S1
+    stmg\t%1,%N1,%S0
++   vlr\t%v0,%v1
++   vzero\t%v0
++   vone\t%v0
++   vlvgp\t%v0,%1,%N1
++   #
++   vl\t%v0,%1
++   vst\t%v1,%0
+    #
+    #"
+-  [(set_attr "op_type" "RSY,RSY,*,*")
+-   (set_attr "type" "lm,stm,*,*")])
++  [(set_attr "op_type" "RSY,RSY,VRR,VRI,VRI,VRR,*,VRX,VRX,*,*")
++   (set_attr "type" "lm,stm,*,*,*,*,*,*,*,*,*")
++   (set_attr "cpu_facility" "*,*,vec,vec,vec,vec,vec,vec,vec,*,*")])
+ 
+ (define_split
+   [(set (match_operand:TI 0 "nonimmediate_operand" "")
+@@ -1256,10 +1415,14 @@
+   operands[5] = operand_subword (operands[1], 0, 0, TImode);
+ })
+ 
++; Use part of the TImode target reg to perform the address
++; calculation.  If the TImode value is supposed to be copied into a VR
++; this splitter is not necessary.
+ (define_split
+   [(set (match_operand:TI 0 "register_operand" "")
+         (match_operand:TI 1 "memory_operand" ""))]
+   "TARGET_ZARCH && reload_completed
++   && !VECTOR_REG_P (operands[0])
+    && !s_operand (operands[1], VOIDmode)"
+   [(set (match_dup 0) (match_dup 1))]
+ {
+@@ -1270,6 +1433,25 @@
+ })
+ 
+ 
++; Split a VR -> GPR TImode move into 2 vector load GR from VR element.
++; For the higher order bits we do simply a DImode move while the
++; second part is done via vec extract.  Both will end up as vlgvg.
++(define_split
++  [(set (match_operand:TI 0 "register_operand" "")
++        (match_operand:TI 1 "register_operand" ""))]
++  "TARGET_VX && reload_completed
++   && GENERAL_REG_P (operands[0])
++   && VECTOR_REG_P (operands[1])"
++  [(set (match_dup 2) (match_dup 4))
++   (set (match_dup 3) (unspec:DI [(match_dup 5) (const_int 1)]
++				 UNSPEC_VEC_EXTRACT))]
++{
++  operands[2] = operand_subword (operands[0], 0, 0, TImode);
++  operands[3] = operand_subword (operands[0], 1, 0, TImode);
++  operands[4] = gen_rtx_REG (DImode, REGNO (operands[1]));
++  operands[5] = gen_rtx_REG (V2DImode, REGNO (operands[1]));
++})
++
+ ;
+ ; Patterns used for secondary reloads
+ ;
+@@ -1278,40 +1460,20 @@
+ ; Unfortunately there is no such variant for QI, TI and FP mode moves.
+ ; These patterns are also used for unaligned SI and DI accesses.
+ 
+-(define_expand "reload<INTALL:mode><P:mode>_tomem_z10"
+-  [(parallel [(match_operand:INTALL 0 "memory_operand"   "")
+-	      (match_operand:INTALL 1 "register_operand" "=d")
+-	      (match_operand:P 2 "register_operand" "=&a")])]
+-  "TARGET_Z10"
+-{
+-  s390_reload_symref_address (operands[1], operands[0], operands[2], 1);
+-  DONE;
+-})
+-
+-(define_expand "reload<INTALL:mode><P:mode>_toreg_z10"
+-  [(parallel [(match_operand:INTALL 0 "register_operand" "=d")
+-	      (match_operand:INTALL 1 "memory_operand"   "")
+-	      (match_operand:P 2 "register_operand" "=a")])]
+-  "TARGET_Z10"
+-{
+-  s390_reload_symref_address (operands[0], operands[1], operands[2], 0);
+-  DONE;
+-})
+-
+-(define_expand "reload<FPALL:mode><P:mode>_tomem_z10"
+-  [(parallel [(match_operand:FPALL 0 "memory_operand"   "")
+-	      (match_operand:FPALL 1 "register_operand" "=d")
+-	      (match_operand:P 2 "register_operand" "=&a")])]
++(define_expand "reload<ALL:mode><P:mode>_tomem_z10"
++  [(parallel [(match_operand:ALL 0 "memory_operand"   "")
++	      (match_operand:ALL 1 "register_operand" "=d")
++	      (match_operand:P   2 "register_operand" "=&a")])]
+   "TARGET_Z10"
+ {
+   s390_reload_symref_address (operands[1], operands[0], operands[2], 1);
+   DONE;
+ })
+ 
+-(define_expand "reload<FPALL:mode><P:mode>_toreg_z10"
+-  [(parallel [(match_operand:FPALL 0 "register_operand" "=d")
+-	      (match_operand:FPALL 1 "memory_operand"   "")
+-	      (match_operand:P 2 "register_operand" "=a")])]
++(define_expand "reload<ALL:mode><P:mode>_toreg_z10"
++  [(parallel [(match_operand:ALL 0 "register_operand" "=d")
++	      (match_operand:ALL 1 "memory_operand"   "")
++	      (match_operand:P   2 "register_operand" "=a")])]
+   "TARGET_Z10"
+ {
+   s390_reload_symref_address (operands[0], operands[1], operands[2], 0);
+@@ -1340,9 +1502,16 @@
+   DONE;
+ })
+ 
+-; Handles assessing a non-offsetable memory address
++; Not all the indirect memory access instructions support the full
++; format (long disp + index + base).  So whenever a move from/to such
++; an address is required and the instruction cannot deal with it we do
++; a load address into a scratch register first and use this as the new
++; base register.
++; This in particular is used for:
++; - non-offsetable memory accesses for multiword moves
++; - full vector reg moves with long displacements
+ 
+-(define_expand "reload<mode>_nonoffmem_in"
++(define_expand "reload<mode>_la_in"
+   [(parallel [(match_operand 0   "register_operand" "")
+               (match_operand 1   "" "")
+               (match_operand:P 2 "register_operand" "=&a")])]
+@@ -1355,7 +1524,7 @@
+   DONE;
+ })
+ 
+-(define_expand "reload<mode>_nonoffmem_out"
++(define_expand "reload<mode>_la_out"
+   [(parallel [(match_operand   0 "" "")
+               (match_operand   1 "register_operand" "")
+               (match_operand:P 2 "register_operand" "=&a")])]
+@@ -1408,11 +1577,9 @@
+ 
+ (define_insn "*movdi_64"
+   [(set (match_operand:DI 0 "nonimmediate_operand"
+-                            "=d,d,d,d,d,d,d,d,f,d,d,d,d,d,
+-                             RT,!*f,!*f,!*f,!R,!T,b,Q,d,t,Q,t")
++         "=d,    d,    d,    d,    d, d,    d,    d,f,d,d,d,d, d,RT,!*f,!*f,!*f,!R,!T,b,Q,d,t,Q,t,v,v,v,d, v,QR")
+         (match_operand:DI 1 "general_operand"
+-                            "K,N0HD0,N1HD0,N2HD0,N3HD0,Os,N0SD0,N1SD0,d,f,L,b,d,RT,
+-                             d,*f,R,T,*f,*f,d,K,t,d,t,Q"))]
++         " K,N0HD0,N1HD0,N2HD0,N3HD0,Os,N0SD0,N1SD0,d,f,L,b,d,RT, d, *f,  R,  T,*f,*f,d,K,t,d,t,Q,K,v,d,v,QR, v"))]
+   "TARGET_ZARCH"
+   "@
+    lghi\t%0,%h1
+@@ -1440,15 +1607,21 @@
+    #
+    #
+    stam\t%1,%N1,%S0
+-   lam\t%0,%N0,%S1"
++   lam\t%0,%N0,%S1
++   vleig\t%v0,%h1,0
++   vlr\t%v0,%v1
++   vlvgg\t%v0,%1,0
++   vlgvg\t%0,%v1,0
++   vleg\t%v0,%1,0
++   vsteg\t%v1,%0,0"
+   [(set_attr "op_type" "RI,RI,RI,RI,RI,RIL,RIL,RIL,RRE,RRE,RXY,RIL,RRE,RXY,
+-                        RXY,RR,RX,RXY,RX,RXY,RIL,SIL,*,*,RS,RS")
++                        RXY,RR,RX,RXY,RX,RXY,RIL,SIL,*,*,RS,RS,VRI,VRR,VRS,VRS,VRX,VRX")
+    (set_attr "type" "*,*,*,*,*,*,*,*,floaddf,floaddf,la,larl,lr,load,store,
+-                     floaddf,floaddf,floaddf,fstoredf,fstoredf,larl,*,*,*,
+-                     *,*")
++                     floaddf,floaddf,floaddf,fstoredf,fstoredf,larl,*,*,*,*,
++                     *,*,*,*,*,*,*")
+    (set_attr "cpu_facility" "*,*,*,*,*,extimm,extimm,extimm,dfp,dfp,longdisp,
+                              z10,*,*,*,*,*,longdisp,*,longdisp,
+-                             z10,z10,*,*,*,*")
++                             z10,z10,*,*,*,*,vec,vec,vec,vec,vec,vec")
+    (set_attr "z10prop" "z10_fwd_A1,
+                         z10_fwd_E1,
+                         z10_fwd_E1,
+@@ -1474,7 +1647,7 @@
+                         *,
+                         *,
+                         *,
+-                        *")
++                        *,*,*,*,*,*,*")
+ ])
+ 
+ (define_split
+@@ -1666,9 +1839,9 @@
+ 
+ (define_insn "*movsi_zarch"
+   [(set (match_operand:SI 0 "nonimmediate_operand"
+-			    "=d,d,d,d,d,d,d,d,d,R,T,!*f,!*f,!*f,!R,!T,d,t,Q,b,Q,t")
++	 "=d,    d,    d, d,d,d,d,d,d,R,T,!*f,!*f,!*f,!*f,!*f,!R,!T,d,t,Q,b,Q,t,v,v,v,d, v,QR")
+         (match_operand:SI 1 "general_operand"
+-			    "K,N0HS0,N1HS0,Os,L,b,d,R,T,d,d,*f,R,T,*f,*f,t,d,t,d,K,Q"))]
++	 " K,N0HS0,N1HS0,Os,L,b,d,R,T,d,d, *f, *f,  R,  R,  T,*f,*f,t,d,t,d,K,Q,K,v,d,v,QR, v"))]
+   "TARGET_ZARCH"
+   "@
+    lhi\t%0,%h1
+@@ -1682,7 +1855,9 @@
+    ly\t%0,%1
+    st\t%1,%0
+    sty\t%1,%0
++   lder\t%0,%1
+    ler\t%0,%1
++   lde\t%0,%1
+    le\t%0,%1
+    ley\t%0,%1
+    ste\t%1,%0
+@@ -1692,9 +1867,15 @@
+    stam\t%1,%1,%S0
+    strl\t%1,%0
+    mvhi\t%0,%1
+-   lam\t%0,%0,%S1"
++   lam\t%0,%0,%S1
++   vleif\t%v0,%h1,0
++   vlr\t%v0,%v1
++   vlvgf\t%v0,%1,0
++   vlgvf\t%0,%v1,0
++   vlef\t%v0,%1,0
++   vstef\t%v1,%0,0"
+   [(set_attr "op_type" "RI,RI,RI,RIL,RXY,RIL,RR,RX,RXY,RX,RXY,
+-                        RR,RX,RXY,RX,RXY,RRE,RRE,RS,RIL,SIL,RS")
++                        RRE,RR,RXE,RX,RXY,RX,RXY,RRE,RRE,RS,RIL,SIL,RS,VRI,VRR,VRS,VRS,VRX,VRX")
+    (set_attr "type" "*,
+                      *,
+                      *,
+@@ -1709,6 +1890,8 @@
+                      floadsf,
+                      floadsf,
+                      floadsf,
++                     floadsf,
++                     floadsf,
+                      fstoresf,
+                      fstoresf,
+                      *,
+@@ -1716,9 +1899,9 @@
+                      *,
+                      larl,
+                      *,
+-                     *")
++                     *,*,*,*,*,*,*")
+    (set_attr "cpu_facility" "*,*,*,extimm,longdisp,z10,*,*,longdisp,*,longdisp,
+-                             *,*,longdisp,*,longdisp,*,*,*,z10,z10,*")
++                             vec,*,vec,*,longdisp,*,longdisp,*,*,*,z10,z10,*,vec,vec,vec,vec,vec,vec")
+    (set_attr "z10prop" "z10_fwd_A1,
+                         z10_fwd_E1,
+                         z10_fwd_E1,
+@@ -1735,42 +1918,38 @@
+                         *,
+                         *,
+                         *,
++                        *,
++                        *,
+                         z10_super_E1,
+                         z10_super,
+                         *,
+                         z10_rec,
+                         z10_super,
+-                        *")])
++                        *,*,*,*,*,*,*")])
+ 
+ (define_insn "*movsi_esa"
+-  [(set (match_operand:SI 0 "nonimmediate_operand" "=d,d,d,R,!*f,!*f,!R,d,t,Q,t")
+-        (match_operand:SI 1 "general_operand" "K,d,R,d,*f,R,*f,t,d,t,Q"))]
++  [(set (match_operand:SI 0 "nonimmediate_operand" "=d,d,d,R,!*f,!*f,!*f,!*f,!R,d,t,Q,t")
++        (match_operand:SI 1 "general_operand"       "K,d,R,d, *f, *f,  R,  R,*f,t,d,t,Q"))]
+   "!TARGET_ZARCH"
+   "@
+    lhi\t%0,%h1
+    lr\t%0,%1
+    l\t%0,%1
+    st\t%1,%0
++   lder\t%0,%1
+    ler\t%0,%1
++   lde\t%0,%1
+    le\t%0,%1
+    ste\t%1,%0
+    ear\t%0,%1
+    sar\t%0,%1
+    stam\t%1,%1,%S0
+    lam\t%0,%0,%S1"
+-  [(set_attr "op_type" "RI,RR,RX,RX,RR,RX,RX,RRE,RRE,RS,RS")
+-   (set_attr "type" "*,lr,load,store,floadsf,floadsf,fstoresf,*,*,*,*")
+-   (set_attr "z10prop" "z10_fwd_A1,
+-                        z10_fr_E1,
+-                        z10_fwd_A3,
+-                        z10_rec,
+-                        *,
+-                        *,
+-                        *,
+-                        z10_super_E1,
+-                        z10_super,
+-                        *,
+-                        *")
++  [(set_attr "op_type" "RI,RR,RX,RX,RRE,RR,RXE,RX,RX,RRE,RRE,RS,RS")
++   (set_attr "type" "*,lr,load,store,floadsf,floadsf,floadsf,floadsf,fstoresf,*,*,*,*")
++   (set_attr "z10prop" "z10_fwd_A1,z10_fr_E1,z10_fwd_A3,z10_rec,*,*,*,*,*,z10_super_E1,
++                        z10_super,*,*")
++   (set_attr "cpu_facility" "*,*,*,*,vec,*,vec,*,*,*,*,*,*")
+ ])
+ 
+ (define_peephole2
+@@ -1880,8 +2059,8 @@
+ })
+ 
+ (define_insn "*movhi"
+-  [(set (match_operand:HI 0 "nonimmediate_operand" "=d,d,d,d,d,R,T,b,Q")
+-        (match_operand:HI 1 "general_operand"      " d,n,R,T,b,d,d,d,K"))]
++  [(set (match_operand:HI 0 "nonimmediate_operand" "=d,d,d,d,d,R,T,b,Q,v,v,v,d, v,QR")
++        (match_operand:HI 1 "general_operand"      " d,n,R,T,b,d,d,d,K,K,v,d,v,QR, v"))]
+   ""
+   "@
+    lr\t%0,%1
+@@ -1892,10 +2071,16 @@
+    sth\t%1,%0
+    sthy\t%1,%0
+    sthrl\t%1,%0
+-   mvhhi\t%0,%1"
+-  [(set_attr "op_type"      "RR,RI,RX,RXY,RIL,RX,RXY,RIL,SIL")
+-   (set_attr "type"         "lr,*,*,*,larl,store,store,store,*")
+-   (set_attr "cpu_facility" "*,*,*,*,z10,*,*,z10,z10")
++   mvhhi\t%0,%1
++   vleih\t%v0,%h1,0
++   vlr\t%v0,%v1
++   vlvgh\t%v0,%1,0
++   vlgvh\t%0,%v1,0
++   vleh\t%v0,%1,0
++   vsteh\t%v1,%0,0"
++  [(set_attr "op_type"      "RR,RI,RX,RXY,RIL,RX,RXY,RIL,SIL,VRI,VRR,VRS,VRS,VRX,VRX")
++   (set_attr "type"         "lr,*,*,*,larl,store,store,store,*,*,*,*,*,*,*")
++   (set_attr "cpu_facility" "*,*,*,*,z10,*,*,z10,z10,vec,vec,vec,vec,vec,vec")
+    (set_attr "z10prop" "z10_fr_E1,
+                        z10_fwd_A1,
+                        z10_super_E1,
+@@ -1904,7 +2089,7 @@
+                        z10_rec,
+                        z10_rec,
+                        z10_rec,
+-                       z10_super")])
++                       z10_super,*,*,*,*,*,*")])
+ 
+ (define_peephole2
+   [(set (match_operand:HI 0 "register_operand" "")
+@@ -1939,8 +2124,8 @@
+ })
+ 
+ (define_insn "*movqi"
+-  [(set (match_operand:QI 0 "nonimmediate_operand" "=d,d,d,d,R,T,Q,S,?Q")
+-        (match_operand:QI 1 "general_operand"      " d,n,R,T,d,d,n,n,?Q"))]
++  [(set (match_operand:QI 0 "nonimmediate_operand" "=d,d,d,d,R,T,Q,S,?Q,v,v,v,d, v,QR")
++        (match_operand:QI 1 "general_operand"      " d,n,R,T,d,d,n,n,?Q,K,v,d,v,QR, v"))]
+   ""
+   "@
+    lr\t%0,%1
+@@ -1951,9 +2136,16 @@
+    stcy\t%1,%0
+    mvi\t%S0,%b1
+    mviy\t%S0,%b1
+-   #"
+-  [(set_attr "op_type" "RR,RI,RX,RXY,RX,RXY,SI,SIY,SS")
+-   (set_attr "type" "lr,*,*,*,store,store,store,store,*")
++   #
++   vleib\t%v0,%b1,0
++   vlr\t%v0,%v1
++   vlvgb\t%v0,%1,0
++   vlgvb\t%0,%v1,0
++   vleb\t%v0,%1,0
++   vsteb\t%v1,%0,0"
++  [(set_attr "op_type" "RR,RI,RX,RXY,RX,RXY,SI,SIY,SS,VRI,VRR,VRS,VRS,VRX,VRX")
++   (set_attr "type" "lr,*,*,*,store,store,store,store,*,*,*,*,*,*,*")
++   (set_attr "cpu_facility" "*,*,*,*,*,*,*,*,*,vec,vec,vec,vec,vec,vec")
+    (set_attr "z10prop" "z10_fr_E1,
+                         z10_fwd_A1,
+                         z10_super_E1,
+@@ -1962,7 +2154,7 @@
+                         z10_rec,
+                         z10_super,
+                         z10_super,
+-                        *")])
++                        *,*,*,*,*,*,*")])
+ 
+ (define_peephole2
+   [(set (match_operand:QI 0 "nonimmediate_operand" "")
+@@ -2094,7 +2286,7 @@
+   [(set (match_operand:TD_TF 0 "register_operand" "")
+         (match_operand:TD_TF 1 "memory_operand"   ""))]
+   "TARGET_ZARCH && reload_completed
+-   && !FP_REG_P (operands[0])
++   && GENERAL_REG_P (operands[0])
+    && !s_operand (operands[1], VOIDmode)"
+   [(set (match_dup 0) (match_dup 1))]
+ {
+@@ -2150,9 +2342,9 @@
+ 
+ (define_insn "*mov<mode>_64dfp"
+   [(set (match_operand:DD_DF 0 "nonimmediate_operand"
+-			       "=f,f,f,d,f,f,R,T,d,d, d,RT")
++			       "=f,f,f,d,f,f,R,T,d,d,d, d,b,RT,v,v,d,v,QR")
+         (match_operand:DD_DF 1 "general_operand"
+-			       " G,f,d,f,R,T,f,f,G,d,RT, d"))]
++			       " G,f,d,f,R,T,f,f,G,d,b,RT,d, d,v,d,v,QR,v"))]
+   "TARGET_DFP"
+   "@
+    lzdr\t%0
+@@ -2165,17 +2357,24 @@
+    stdy\t%1,%0
+    lghi\t%0,0
+    lgr\t%0,%1
++   lgrl\t%0,%1
+    lg\t%0,%1
+-   stg\t%1,%0"
+-  [(set_attr "op_type" "RRE,RR,RRE,RRE,RX,RXY,RX,RXY,RI,RRE,RXY,RXY")
++   stgrl\t%1,%0
++   stg\t%1,%0
++   vlr\t%v0,%v1
++   vlvgg\t%v0,%1,0
++   vlgvg\t%0,%v1,0
++   vleg\t%0,%1,0
++   vsteg\t%1,%0,0"
++  [(set_attr "op_type" "RRE,RR,RRE,RRE,RX,RXY,RX,RXY,RI,RRE,RIL,RXY,RIL,RXY,VRR,VRS,VRS,VRX,VRX")
+    (set_attr "type" "fsimpdf,floaddf,floaddf,floaddf,floaddf,floaddf,
+-                     fstoredf,fstoredf,*,lr,load,store")
+-   (set_attr "z10prop" "*,*,*,*,*,*,*,*,z10_fwd_A1,z10_fr_E1,z10_fwd_A3,z10_rec")
+-   (set_attr "cpu_facility" "z196,*,*,*,*,*,*,*,*,*,*,*")])
++                     fstoredf,fstoredf,*,lr,load,load,store,store,*,*,*,load,store")
++   (set_attr "z10prop" "*,*,*,*,*,*,*,*,z10_fwd_A1,z10_fr_E1,z10_fwd_A3,z10_fwd_A3,z10_rec,z10_rec,*,*,*,*,*")
++   (set_attr "cpu_facility" "z196,*,*,*,*,*,*,*,*,*,z10,*,z10,*,vec,vec,vec,vec,vec")])
+ 
+ (define_insn "*mov<mode>_64"
+-  [(set (match_operand:DD_DF 0 "nonimmediate_operand" "=f,f,f,f,R,T,d,d, d,RT")
+-        (match_operand:DD_DF 1 "general_operand"      " G,f,R,T,f,f,G,d,RT, d"))]
++  [(set (match_operand:DD_DF 0 "nonimmediate_operand" "=f,f,f,f,R,T,d,d,d, d,b,RT,v,v,QR")
++        (match_operand:DD_DF 1 "general_operand"      " G,f,R,T,f,f,G,d,b,RT,d, d,v,QR,v"))]
+   "TARGET_ZARCH"
+   "@
+    lzdr\t%0
+@@ -2186,13 +2385,18 @@
+    stdy\t%1,%0
+    lghi\t%0,0
+    lgr\t%0,%1
++   lgrl\t%0,%1
+    lg\t%0,%1
+-   stg\t%1,%0"
+-  [(set_attr "op_type" "RRE,RR,RX,RXY,RX,RXY,RI,RRE,RXY,RXY")
++   stgrl\t%1,%0
++   stg\t%1,%0
++   vlr\t%v0,%v1
++   vleg\t%v0,%1,0
++   vsteg\t%v1,%0,0"
++  [(set_attr "op_type" "RRE,RR,RX,RXY,RX,RXY,RI,RRE,RIL,RXY,RIL,RXY,VRR,VRX,VRX")
+    (set_attr "type"    "fsimpdf,fload<mode>,fload<mode>,fload<mode>,
+-                        fstore<mode>,fstore<mode>,*,lr,load,store")
+-   (set_attr "z10prop" "*,*,*,*,*,*,z10_fwd_A1,z10_fr_E1,z10_fwd_A3,z10_rec")
+-   (set_attr "cpu_facility" "z196,*,*,*,*,*,*,*,*,*")])
++                        fstore<mode>,fstore<mode>,*,lr,load,load,store,store,*,load,store")
++   (set_attr "z10prop" "*,*,*,*,*,*,z10_fwd_A1,z10_fr_E1,z10_fwd_A3,z10_fwd_A3,z10_rec,z10_rec,*,*,*")
++   (set_attr "cpu_facility" "z196,*,*,*,*,*,*,*,z10,*,z10,*,vec,vec,vec")])
+ 
+ (define_insn "*mov<mode>_31"
+   [(set (match_operand:DD_DF 0 "nonimmediate_operand"
+@@ -2265,28 +2469,38 @@
+ 
+ (define_insn "mov<mode>"
+   [(set (match_operand:SD_SF 0 "nonimmediate_operand"
+-			       "=f,f,f,f,R,T,d,d,d,d,R,T")
++			       "=f,f,f,f,f,f,R,T,d,d,d,d,d,b,R,T,v,v,v,d,v,QR")
+         (match_operand:SD_SF 1 "general_operand"
+-			       " G,f,R,T,f,f,G,d,R,T,d,d"))]
++			       " G,f,f,R,R,T,f,f,G,d,b,R,T,d,d,d,v,G,d,v,QR,v"))]
+   ""
+   "@
+    lzer\t%0
++   lder\t%0,%1
+    ler\t%0,%1
++   lde\t%0,%1
+    le\t%0,%1
+    ley\t%0,%1
+    ste\t%1,%0
+    stey\t%1,%0
+    lhi\t%0,0
+    lr\t%0,%1
++   lrl\t%0,%1
+    l\t%0,%1
+    ly\t%0,%1
++   strl\t%1,%0
+    st\t%1,%0
+-   sty\t%1,%0"
+-  [(set_attr "op_type" "RRE,RR,RX,RXY,RX,RXY,RI,RR,RX,RXY,RX,RXY")
+-   (set_attr "type"    "fsimpsf,fload<mode>,fload<mode>,fload<mode>,
+-                        fstore<mode>,fstore<mode>,*,lr,load,load,store,store")
+-   (set_attr "z10prop" "*,*,*,*,*,*,z10_fwd_A1,z10_fr_E1,z10_fwd_A3,z10_fwd_A3,z10_rec,z10_rec")
+-   (set_attr "cpu_facility" "z196,*,*,*,*,*,*,*,*,*,*,*")])
++   sty\t%1,%0
++   vlr\t%v0,%v1
++   vleif\t%v0,0
++   vlvgf\t%v0,%1,0
++   vlgvf\t%0,%v1,0
++   vleg\t%0,%1,0
++   vsteg\t%1,%0,0"
++  [(set_attr "op_type" "RRE,RRE,RR,RXE,RX,RXY,RX,RXY,RI,RR,RIL,RX,RXY,RIL,RX,RXY,VRR,VRI,VRS,VRS,VRX,VRX")
++   (set_attr "type"    "fsimpsf,fsimpsf,fload<mode>,fload<mode>,fload<mode>,fload<mode>,
++                        fstore<mode>,fstore<mode>,*,lr,load,load,load,store,store,store,*,*,*,*,load,store")
++   (set_attr "z10prop" "*,*,*,*,*,*,*,*,z10_fwd_A1,z10_fr_E1,z10_fr_E1,z10_fwd_A3,z10_fwd_A3,z10_rec,z10_rec,z10_rec,*,*,*,*,*,*")
++   (set_attr "cpu_facility" "z196,vec,*,vec,*,*,*,*,*,*,z10,*,*,z10,*,*,vec,vec,vec,vec,vec,vec")])
+ 
+ ;
+ ; movcc instruction pattern
+@@ -2577,6 +2791,22 @@
+ ;
+ 
+ (define_expand "strlen<mode>"
++  [(match_operand:P   0 "register_operand" "")  ; result
++   (match_operand:BLK 1 "memory_operand" "")    ; input string
++   (match_operand:SI  2 "immediate_operand" "") ; search character
++   (match_operand:SI  3 "immediate_operand" "")] ; known alignment
++  ""
++{
++  if (!TARGET_VX || operands[2] != const0_rtx)
++    emit_insn (gen_strlen_srst<mode> (operands[0], operands[1],
++				      operands[2], operands[3]));
++  else
++    s390_expand_vec_strlen (operands[0], operands[1], operands[3]);
++
++  DONE;
++})
++
++(define_expand "strlen_srst<mode>"
+   [(set (reg:SI 0) (match_operand:SI 2 "immediate_operand" ""))
+    (parallel
+     [(set (match_dup 4)
+@@ -2674,8 +2904,16 @@
+      (clobber (reg:CC CC_REGNUM))])]
+   ""
+ {
+-  rtx addr1 = gen_reg_rtx (Pmode);
+-  rtx addr2 = gen_reg_rtx (Pmode);
++  rtx addr1, addr2;
++
++  if (TARGET_VX && optimize_function_for_speed_p (cfun))
++    {
++      s390_expand_vec_movstr (operands[0], operands[1], operands[2]);
++      DONE;
++    }
++
++  addr1 = gen_reg_rtx (Pmode);
++  addr2 = gen_reg_rtx (Pmode);
+ 
+   emit_move_insn (addr1, force_operand (XEXP (operands[1], 0), NULL_RTX));
+   emit_move_insn (addr2, force_operand (XEXP (operands[2], 0), NULL_RTX));
+@@ -2886,8 +3124,12 @@
+   operands[2] = GEN_INT (S390_TDC_INFINITY);
+ })
+ 
++; This extracts CC into a GPR properly shifted.  The actual IPM
++; instruction will be issued by reload.  The constraint of operand 1
++; forces reload to use a GPR.  So reload will issue a movcc insn for
++; copying CC into a GPR first.
+ (define_insn_and_split "*cc_to_int"
+-  [(set (match_operand:SI 0 "register_operand" "=d")
++  [(set (match_operand:SI 0 "nonimmediate_operand"     "=d")
+         (unspec:SI [(match_operand 1 "register_operand" "0")]
+                    UNSPEC_CC_TO_INT))]
+   "operands != NULL"
+@@ -4223,14 +4465,27 @@
+ 
+ ; fixuns_trunc(tf|df|sf|td|dd)(di|si)2 instruction patterns.
+ 
++(define_insn "*fixuns_truncdfdi2_z13"
++  [(set (match_operand:DI                  0 "register_operand" "=d,v")
++	(unsigned_fix:DI (match_operand:DF 1 "register_operand"  "f,v")))
++   (unspec:DI [(match_operand:DI           2 "immediate_operand" "K,K")] UNSPEC_ROUND)
++   (clobber (reg:CC CC_REGNUM))]
++   "TARGET_VX && TARGET_HARD_FLOAT"
++   "@
++    clgdbr\t%0,%h2,%1,0
++    wclgdb\t%v0,%v1,0,%h2"
++   [(set_attr "op_type" "RRF,VRR")
++    (set_attr "type"    "ftoi")])
++
+ ; clfebr, clfdbr, clfxbr, clgebr, clgdbr, clgxbr
+ ;         clfdtr, clfxtr,         clgdtr, clgxtr
+ (define_insn "*fixuns_trunc<FP:mode><GPR:mode>2_z196"
+-  [(set (match_operand:GPR 0 "register_operand" "=r")
+-	(unsigned_fix:GPR (match_operand:FP 1 "register_operand" "f")))
+-   (unspec:GPR [(match_operand:GPR 2 "immediate_operand" "K")] UNSPEC_ROUND)
++  [(set (match_operand:GPR                  0 "register_operand" "=d")
++	(unsigned_fix:GPR (match_operand:FP 1 "register_operand"  "f")))
++   (unspec:GPR [(match_operand:GPR          2 "immediate_operand" "K")] UNSPEC_ROUND)
+    (clobber (reg:CC CC_REGNUM))]
+-   "TARGET_Z196"
++   "TARGET_Z196 && TARGET_HARD_FLOAT
++    && (!TARGET_VX || <GPR:MODE>mode != DImode || <FP:MODE>mode != DFmode)"
+    "cl<GPR:gf><FP:xde><FP:bt>r\t%0,%h2,%1,0"
+    [(set_attr "op_type" "RRF")
+     (set_attr "type"    "ftoi")])
+@@ -4245,18 +4500,37 @@
+   DONE;
+ })
+ 
++(define_insn "*fix_truncdfdi2_bfp_z13"
++  [(set (match_operand:DI         0 "register_operand" "=d,v")
++        (fix:DI (match_operand:DF 1 "register_operand"  "f,v")))
++   (unspec:DI [(match_operand:DI  2 "immediate_operand" "K,K")] UNSPEC_ROUND)
++   (clobber (reg:CC CC_REGNUM))]
++  "TARGET_VX && TARGET_HARD_FLOAT"
++  "@
++   cgdbr\t%0,%h2,%1
++   wcgdb\t%v0,%v1,0,%h2"
++  [(set_attr "op_type" "RRE,VRR")
++   (set_attr "type"    "ftoi")])
++
+ ; cgxbr, cgdbr, cgebr, cfxbr, cfdbr, cfebr
+-(define_insn "fix_trunc<BFP:mode><GPR:mode>2_bfp"
+-  [(set (match_operand:GPR 0 "register_operand" "=d")
+-        (fix:GPR (match_operand:BFP 1 "register_operand" "f")))
+-   (unspec:GPR [(match_operand:GPR 2 "immediate_operand" "K")] UNSPEC_ROUND)
++(define_insn "*fix_trunc<BFP:mode><GPR:mode>2_bfp"
++  [(set (match_operand:GPR          0 "register_operand" "=d")
++        (fix:GPR (match_operand:BFP 1 "register_operand"  "f")))
++   (unspec:GPR [(match_operand:GPR  2 "immediate_operand" "K")] UNSPEC_ROUND)
+    (clobber (reg:CC CC_REGNUM))]
+-  "TARGET_HARD_FLOAT"
++  "TARGET_HARD_FLOAT
++    && (!TARGET_VX || <GPR:MODE>mode != DImode || <BFP:MODE>mode != DFmode)"
+   "c<GPR:gf><BFP:xde>br\t%0,%h2,%1"
+   [(set_attr "op_type" "RRE")
+    (set_attr "type"    "ftoi")])
+ 
+-
++(define_expand "fix_trunc<BFP:mode><GPR:mode>2_bfp"
++  [(parallel
++    [(set (match_operand:GPR          0 "register_operand" "=d")
++	  (fix:GPR (match_operand:BFP 1 "register_operand"  "f")))
++     (unspec:GPR [(match_operand:GPR  2 "immediate_operand" "K")] UNSPEC_ROUND)
++     (clobber (reg:CC CC_REGNUM))])]
++  "TARGET_HARD_FLOAT")
+ ;
+ ; fix_trunc(td|dd)di2 instruction pattern(s).
+ ;
+@@ -4303,12 +4577,15 @@
+ 
+ ; cxgbr, cdgbr, cegbr, cxgtr, cdgtr
+ (define_insn "floatdi<mode>2"
+-  [(set (match_operand:FP 0 "register_operand" "=f")
+-        (float:FP (match_operand:DI 1 "register_operand" "d")))]
++  [(set (match_operand:FP           0 "register_operand" "=f,<vf>")
++        (float:FP (match_operand:DI 1 "register_operand"  "d,<vd>")))]
+   "TARGET_ZARCH && TARGET_HARD_FLOAT"
+-  "c<xde>g<bt>r\t%0,%1"
+-  [(set_attr "op_type" "RRE")
+-   (set_attr "type"    "itof<mode>" )])
++  "@
++   c<xde>g<bt>r\t%0,%1
++   wcdgb\t%v0,%v1,0,0"
++  [(set_attr "op_type"      "RRE,VRR")
++   (set_attr "type"         "itof<mode>" )
++   (set_attr "cpu_facility" "*,vec")])
+ 
+ ; cxfbr, cdfbr, cefbr
+ (define_insn "floatsi<mode>2"
+@@ -4332,27 +4609,47 @@
+ ; floatuns(si|di)(tf|df|sf|td|dd)2 instruction pattern(s).
+ ;
+ 
++(define_insn "*floatunsdidf2_z13"
++  [(set (match_operand:DF                    0 "register_operand" "=f,v")
++        (unsigned_float:DF (match_operand:DI 1 "register_operand"  "d,v")))]
++  "TARGET_VX && TARGET_HARD_FLOAT"
++  "@
++   cdlgbr\t%0,0,%1,0
++   wcdlgb\t%v0,%v1,0,0"
++  [(set_attr "op_type" "RRE,VRR")
++   (set_attr "type"    "itofdf")])
++
+ ; cxlgbr, cdlgbr, celgbr, cxlgtr, cdlgtr
+ ; cxlfbr, cdlfbr, celfbr, cxlftr, cdlftr
+-(define_insn "floatuns<GPR:mode><FP:mode>2"
+-  [(set (match_operand:FP 0 "register_operand" "=f")
+-        (unsigned_float:FP (match_operand:GPR 1 "register_operand" "d")))]
+-  "TARGET_Z196 && TARGET_HARD_FLOAT"
++(define_insn "*floatuns<GPR:mode><FP:mode>2"
++  [(set (match_operand:FP                     0 "register_operand" "=f")
++        (unsigned_float:FP (match_operand:GPR 1 "register_operand"  "d")))]
++  "TARGET_Z196 && TARGET_HARD_FLOAT
++   && (!TARGET_VX || <FP:MODE>mode != DFmode || <GPR:MODE>mode != DImode)"
+   "c<FP:xde>l<GPR:gf><FP:bt>r\t%0,0,%1,0"
+   [(set_attr "op_type" "RRE")
+-   (set_attr "type"    "itof<FP:mode>" )])
++   (set_attr "type"    "itof<FP:mode>")])
++
++(define_expand "floatuns<GPR:mode><FP:mode>2"
++  [(set (match_operand:FP                     0 "register_operand" "")
++        (unsigned_float:FP (match_operand:GPR 1 "register_operand" "")))]
++  "TARGET_Z196 && TARGET_HARD_FLOAT")
+ 
+ ;
+ ; truncdfsf2 instruction pattern(s).
+ ;
+ 
+ (define_insn "truncdfsf2"
+-  [(set (match_operand:SF 0 "register_operand" "=f")
+-        (float_truncate:SF (match_operand:DF 1 "register_operand" "f")))]
++  [(set (match_operand:SF                    0 "register_operand" "=f,v")
++        (float_truncate:SF (match_operand:DF 1 "register_operand"  "f,v")))]
+   "TARGET_HARD_FLOAT"
+-  "ledbr\t%0,%1"
+-  [(set_attr "op_type"  "RRE")
+-   (set_attr "type"   "ftruncdf")])
++  "@
++   ledbr\t%0,%1
++   wledb\t%v0,%v1,0,0" ; IEEE inexact exception not suppressed
++                       ; According to BFP rounding mode
++  [(set_attr "op_type"      "RRE,VRR")
++   (set_attr "type"         "ftruncdf")
++   (set_attr "cpu_facility" "*,vec")])
+ 
+ ;
+ ; trunctf(df|sf)2 instruction pattern(s).
+@@ -4393,17 +4690,35 @@
+ ; extend(sf|df)(df|tf)2 instruction pattern(s).
+ ;
+ 
++(define_insn "*extendsfdf2_z13"
++  [(set (match_operand:DF                  0 "register_operand"     "=f,f,v")
++        (float_extend:DF (match_operand:SF 1 "nonimmediate_operand"  "f,R,v")))]
++  "TARGET_VX && TARGET_HARD_FLOAT"
++  "@
++   ldebr\t%0,%1
++   ldeb\t%0,%1
++   wldeb\t%v0,%v1"
++  [(set_attr "op_type" "RRE,RXE,VRR")
++   (set_attr "type"    "fsimpdf, floaddf,fsimpdf")])
++
+ ; ldebr, ldeb, lxdbr, lxdb, lxebr, lxeb
+-(define_insn "extend<DSF:mode><BFP:mode>2"
+-  [(set (match_operand:BFP 0 "register_operand" "=f,f")
++(define_insn "*extend<DSF:mode><BFP:mode>2"
++  [(set (match_operand:BFP                   0 "register_operand"     "=f,f")
+         (float_extend:BFP (match_operand:DSF 1 "nonimmediate_operand"  "f,R")))]
+   "TARGET_HARD_FLOAT
+-   && GET_MODE_SIZE (<BFP:MODE>mode) > GET_MODE_SIZE (<DSF:MODE>mode)"
++   && GET_MODE_SIZE (<BFP:MODE>mode) > GET_MODE_SIZE (<DSF:MODE>mode)
++   && (!TARGET_VX || <BFP:MODE>mode != DFmode || <DSF:MODE>mode != SFmode)"
+   "@
+    l<BFP:xde><DSF:xde>br\t%0,%1
+    l<BFP:xde><DSF:xde>b\t%0,%1"
+-  [(set_attr "op_type"  "RRE,RXE")
+-   (set_attr "type"   "fsimp<BFP:mode>, fload<BFP:mode>")])
++  [(set_attr "op_type" "RRE,RXE")
++   (set_attr "type"    "fsimp<BFP:mode>, fload<BFP:mode>")])
++
++(define_expand "extend<DSF:mode><BFP:mode>2"
++  [(set (match_operand:BFP                   0 "register_operand"     "")
++        (float_extend:BFP (match_operand:DSF 1 "nonimmediate_operand" "")))]
++  "TARGET_HARD_FLOAT
++   && GET_MODE_SIZE (<BFP:MODE>mode) > GET_MODE_SIZE (<DSF:MODE>mode)")
+ 
+ ;
+ ; extendddtd2 and extendsddd2 instruction pattern(s).
+@@ -4616,10 +4931,29 @@
+ ; addti3 instruction pattern(s).
+ ;
+ 
+-(define_insn_and_split "addti3"
+-  [(set (match_operand:TI 0 "register_operand" "=&d")
++(define_expand "addti3"
++  [(parallel
++    [(set (match_operand:TI          0 "register_operand"     "")
++	  (plus:TI (match_operand:TI 1 "nonimmediate_operand" "")
++		   (match_operand:TI 2 "general_operand"      "") ) )
++     (clobber (reg:CC CC_REGNUM))])]
++  "TARGET_ZARCH"
++{
++  /* For z13 we have vaq which doesn't set CC.  */
++  if (TARGET_VX)
++    {
++      emit_insn (gen_rtx_SET (VOIDmode, operands[0],
++			      gen_rtx_PLUS (TImode,
++                                            copy_to_mode_reg (TImode, operands[1]),
++                                            copy_to_mode_reg (TImode, operands[2]))));
++      DONE;
++    }
++})
++
++(define_insn_and_split "*addti3"
++  [(set (match_operand:TI          0 "register_operand"    "=&d")
+         (plus:TI (match_operand:TI 1 "nonimmediate_operand" "%0")
+-                 (match_operand:TI 2 "general_operand" "do") ) )
++                 (match_operand:TI 2 "general_operand"      "do") ) )
+    (clobber (reg:CC CC_REGNUM))]
+   "TARGET_ZARCH"
+   "#"
+@@ -4639,7 +4973,9 @@
+    operands[5] = operand_subword (operands[2], 0, 0, TImode);
+    operands[6] = operand_subword (operands[0], 1, 0, TImode);
+    operands[7] = operand_subword (operands[1], 1, 0, TImode);
+-   operands[8] = operand_subword (operands[2], 1, 0, TImode);")
++   operands[8] = operand_subword (operands[2], 1, 0, TImode);"
++  [(set_attr "op_type"  "*")
++   (set_attr "cpu_facility" "*")])
+ 
+ ;
+ ; adddi3 instruction pattern(s).
+@@ -4976,17 +5312,20 @@
+ ;
+ 
+ ; axbr, adbr, aebr, axb, adb, aeb, adtr, axtr
++; FIXME: wfadb does not clobber cc
+ (define_insn "add<mode>3"
+-  [(set (match_operand:FP 0 "register_operand"              "=f,   f")
+-        (plus:FP (match_operand:FP 1 "nonimmediate_operand" "%<f0>,0")
+-		 (match_operand:FP 2 "general_operand"      " f,<Rf>")))
++  [(set (match_operand:FP 0 "register_operand"                 "=f,   f,<vf>")
++        (plus:FP (match_operand:FP 1 "nonimmediate_operand" "%<f0>,   0,<v0>")
++		 (match_operand:FP 2 "general_operand"          "f,<Rf>,<vf>")))
+    (clobber (reg:CC CC_REGNUM))]
+   "TARGET_HARD_FLOAT"
+   "@
+    a<xde><bt>r\t%0,<op1>%2
+-   a<xde>b\t%0,%2"
+-  [(set_attr "op_type"  "<RRer>,RXE")
+-   (set_attr "type"     "fsimp<mode>")])
++   a<xde>b\t%0,%2
++   wfadb\t%v0,%v1,%v2"
++  [(set_attr "op_type"      "<RRer>,RXE,VRR")
++   (set_attr "type"         "fsimp<mode>")
++   (set_attr "cpu_facility" "*,*,vec")])
+ 
+ ; axbr, adbr, aebr, axb, adb, aeb, adtr, axtr
+ (define_insn "*add<mode>3_cc"
+@@ -5026,10 +5365,29 @@
+ ; subti3 instruction pattern(s).
+ ;
+ 
+-(define_insn_and_split "subti3"
+-  [(set (match_operand:TI 0 "register_operand" "=&d")
+-        (minus:TI (match_operand:TI 1 "register_operand" "0")
+-                  (match_operand:TI 2 "general_operand" "do") ) )
++(define_expand "subti3"
++  [(parallel
++    [(set (match_operand:TI           0 "register_operand" "")
++	  (minus:TI (match_operand:TI 1 "register_operand" "")
++		    (match_operand:TI 2 "general_operand"  "") ) )
++     (clobber (reg:CC CC_REGNUM))])]
++  "TARGET_ZARCH"
++{
++  /* For z13 we have vaq which doesn't set CC.  */
++  if (TARGET_VX)
++    {
++      emit_insn (gen_rtx_SET (VOIDmode, operands[0],
++			      gen_rtx_MINUS (TImode,
++                                            operands[1],
++                                            copy_to_mode_reg (TImode, operands[2]))));
++      DONE;
++    }
++})
++
++(define_insn_and_split "*subti3"
++  [(set (match_operand:TI           0 "register_operand" "=&d")
++        (minus:TI (match_operand:TI 1 "register_operand"   "0")
++                  (match_operand:TI 2 "general_operand"   "do") ) )
+    (clobber (reg:CC CC_REGNUM))]
+   "TARGET_ZARCH"
+   "#"
+@@ -5048,7 +5406,9 @@
+    operands[5] = operand_subword (operands[2], 0, 0, TImode);
+    operands[6] = operand_subword (operands[0], 1, 0, TImode);
+    operands[7] = operand_subword (operands[1], 1, 0, TImode);
+-   operands[8] = operand_subword (operands[2], 1, 0, TImode);")
++   operands[8] = operand_subword (operands[2], 1, 0, TImode);"
++  [(set_attr "op_type"      "*")
++   (set_attr "cpu_facility" "*")])
+ 
+ ;
+ ; subdi3 instruction pattern(s).
+@@ -5327,16 +5687,18 @@
+ 
+ ; sxbr, sdbr, sebr, sdb, seb, sxtr, sdtr
+ (define_insn "sub<mode>3"
+-  [(set (match_operand:FP 0 "register_operand"            "=f,  f")
+-        (minus:FP (match_operand:FP 1 "register_operand" "<f0>,0")
+-                  (match_operand:FP 2 "general_operand"  "f,<Rf>")))
++  [(set (match_operand:FP           0 "register_operand"   "=f,   f,<vf>")
++        (minus:FP (match_operand:FP 1 "register_operand" "<f0>,   0,<v0>")
++                  (match_operand:FP 2 "general_operand"     "f,<Rf>,<vf>")))
+    (clobber (reg:CC CC_REGNUM))]
+   "TARGET_HARD_FLOAT"
+   "@
+    s<xde><bt>r\t%0,<op1>%2
+-   s<xde>b\t%0,%2"
+-  [(set_attr "op_type"  "<RRer>,RXE")
+-   (set_attr "type"     "fsimp<mode>")])
++   s<xde>b\t%0,%2
++   wfsdb\t%v0,%v1,%v2"
++  [(set_attr "op_type"      "<RRer>,RXE,VRR")
++   (set_attr "type"         "fsimp<mode>")
++   (set_attr "cpu_facility" "*,*,vec")])
+ 
+ ; sxbr, sdbr, sebr, sdb, seb, sxtr, sdtr
+ (define_insn "*sub<mode>3_cc"
+@@ -5742,41 +6104,47 @@
+ 
+ ; mxbr, mdbr, meebr, mxb, mxb, meeb, mdtr, mxtr
+ (define_insn "mul<mode>3"
+-  [(set (match_operand:FP 0 "register_operand"              "=f,f")
+-        (mult:FP (match_operand:FP 1 "nonimmediate_operand" "%<f0>,0")
+-                 (match_operand:FP 2 "general_operand"      "f,<Rf>")))]
++  [(set (match_operand:FP          0 "register_operand"        "=f,   f,<vf>")
++        (mult:FP (match_operand:FP 1 "nonimmediate_operand" "%<f0>,   0,<v0>")
++                 (match_operand:FP 2 "general_operand"          "f,<Rf>,<vf>")))]
+   "TARGET_HARD_FLOAT"
+   "@
+    m<xdee><bt>r\t%0,<op1>%2
+-   m<xdee>b\t%0,%2"
+-  [(set_attr "op_type"  "<RRer>,RXE")
+-   (set_attr "type"     "fmul<mode>")])
++   m<xdee>b\t%0,%2
++   wfmdb\t%v0,%v1,%v2"
++  [(set_attr "op_type"      "<RRer>,RXE,VRR")
++   (set_attr "type"         "fmul<mode>")
++   (set_attr "cpu_facility" "*,*,vec")])
+ 
+ ; madbr, maebr, maxb, madb, maeb
+ (define_insn "fma<mode>4"
+-  [(set (match_operand:DSF 0 "register_operand" "=f,f")
+-	(fma:DSF (match_operand:DSF 1 "nonimmediate_operand" "%f,f")
+-		 (match_operand:DSF 2 "nonimmediate_operand" "f,R")
+-		 (match_operand:DSF 3 "register_operand" "0,0")))]
++  [(set (match_operand:DSF          0 "register_operand"     "=f,f,<vf>")
++	(fma:DSF (match_operand:DSF 1 "nonimmediate_operand" "%f,f,<vf>")
++		 (match_operand:DSF 2 "nonimmediate_operand"  "f,R,<vf>")
++		 (match_operand:DSF 3 "register_operand"      "0,0,<v0>")))]
+   "TARGET_HARD_FLOAT"
+   "@
+    ma<xde>br\t%0,%1,%2
+-   ma<xde>b\t%0,%1,%2"
+-  [(set_attr "op_type"  "RRE,RXE")
+-   (set_attr "type"     "fmadd<mode>")])
++   ma<xde>b\t%0,%1,%2
++   wfmadb\t%v0,%v1,%v2,%v3"
++  [(set_attr "op_type"      "RRE,RXE,VRR")
++   (set_attr "type"         "fmadd<mode>")
++   (set_attr "cpu_facility" "*,*,vec")])
+ 
+ ; msxbr, msdbr, msebr, msxb, msdb, mseb
+ (define_insn "fms<mode>4"
+-  [(set (match_operand:DSF 0 "register_operand" "=f,f")
+-	(fma:DSF (match_operand:DSF 1 "nonimmediate_operand" "%f,f")
+-		 (match_operand:DSF 2 "nonimmediate_operand" "f,R")
+-		 (neg:DSF (match_operand:DSF 3 "register_operand" "0,0"))))]
++  [(set (match_operand:DSF                   0 "register_operand"     "=f,f,<vf>")
++	(fma:DSF (match_operand:DSF          1 "nonimmediate_operand" "%f,f,<vf>")
++		 (match_operand:DSF          2 "nonimmediate_operand"  "f,R,<vf>")
++		 (neg:DSF (match_operand:DSF 3 "register_operand"      "0,0,<v0>"))))]
+   "TARGET_HARD_FLOAT"
+   "@
+    ms<xde>br\t%0,%1,%2
+-   ms<xde>b\t%0,%1,%2"
+-  [(set_attr "op_type"  "RRE,RXE")
+-   (set_attr "type"     "fmadd<mode>")])
++   ms<xde>b\t%0,%1,%2
++   wfmsdb\t%v0,%v1,%v2,%v3"
++  [(set_attr "op_type"      "RRE,RXE,VRR")
++   (set_attr "type"         "fmadd<mode>")
++   (set_attr "cpu_facility" "*,*,vec")])
+ 
+ ;;
+ ;;- Divide and modulo instructions.
+@@ -6202,15 +6570,17 @@
+ 
+ ; dxbr, ddbr, debr, dxb, ddb, deb, ddtr, dxtr
+ (define_insn "div<mode>3"
+-  [(set (match_operand:FP 0 "register_operand"          "=f,f")
+-        (div:FP (match_operand:FP 1 "register_operand" "<f0>,0")
+-                 (match_operand:FP 2 "general_operand"  "f,<Rf>")))]
++  [(set (match_operand:FP         0 "register_operand"   "=f,   f,<vf>")
++        (div:FP (match_operand:FP 1 "register_operand" "<f0>,   0,<v0>")
++		(match_operand:FP 2 "general_operand"     "f,<Rf>,<vf>")))]
+   "TARGET_HARD_FLOAT"
+   "@
+    d<xde><bt>r\t%0,<op1>%2
+-   d<xde>b\t%0,%2"
+-  [(set_attr "op_type"  "<RRer>,RXE")
+-   (set_attr "type"     "fdiv<mode>")])
++   d<xde>b\t%0,%2
++   wfddb\t%v0,%v1,%v2"
++  [(set_attr "op_type"      "<RRer>,RXE,VRR")
++   (set_attr "type"         "fdiv<mode>")
++   (set_attr "cpu_facility" "*,*,vec")])
+ 
+ 
+ ;;
+@@ -7356,14 +7726,18 @@
+    (set_attr "type"     "fsimp<mode>")])
+ 
+ ; lcxbr, lcdbr, lcebr
++; FIXME: wflcdb does not clobber cc
+ (define_insn "*neg<mode>2"
+-  [(set (match_operand:BFP 0 "register_operand" "=f")
+-        (neg:BFP (match_operand:BFP 1 "register_operand" "f")))
++  [(set (match_operand:BFP          0 "register_operand" "=f,<vf>")
++        (neg:BFP (match_operand:BFP 1 "register_operand"  "f,<vf>")))
+    (clobber (reg:CC CC_REGNUM))]
+   "TARGET_HARD_FLOAT"
+-  "lc<xde>br\t%0,%1"
+-  [(set_attr "op_type"  "RRE")
+-   (set_attr "type"     "fsimp<mode>")])
++  "@
++   lc<xde>br\t%0,%1
++   wflcdb\t%0,%1"
++  [(set_attr "op_type"      "RRE,VRR")
++   (set_attr "cpu_facility" "*,vec")
++   (set_attr "type"         "fsimp<mode>,*")])
+ 
+ 
+ ;;
+@@ -7474,14 +7848,18 @@
+    (set_attr "type"     "fsimp<mode>")])
+ 
+ ; lpxbr, lpdbr, lpebr
++; FIXME: wflpdb does not clobber cc
+ (define_insn "*abs<mode>2"
+-  [(set (match_operand:BFP 0 "register_operand" "=f")
+-        (abs:BFP (match_operand:BFP 1 "register_operand" "f")))
++  [(set (match_operand:BFP          0 "register_operand" "=f,<vf>")
++        (abs:BFP (match_operand:BFP 1 "register_operand"  "f,<vf>")))
+    (clobber (reg:CC CC_REGNUM))]
+   "TARGET_HARD_FLOAT"
+-  "lp<xde>br\t%0,%1"
+-  [(set_attr "op_type"  "RRE")
+-   (set_attr "type"     "fsimp<mode>")])
++  "@
++    lp<xde>br\t%0,%1
++    wflpdb\t%0,%1"
++  [(set_attr "op_type"      "RRE,VRR")
++   (set_attr "cpu_facility" "*,vec")
++   (set_attr "type"         "fsimp<mode>,*")])
+ 
+ 
+ ;;
+@@ -7585,14 +7963,18 @@
+    (set_attr "type"     "fsimp<mode>")])
+ 
+ ; lnxbr, lndbr, lnebr
++; FIXME: wflndb does not clobber cc
+ (define_insn "*negabs<mode>2"
+-  [(set (match_operand:BFP 0 "register_operand" "=f")
+-        (neg:BFP (abs:BFP (match_operand:BFP 1 "register_operand" "f"))))
++  [(set (match_operand:BFP                   0 "register_operand" "=f,<vf>")
++        (neg:BFP (abs:BFP (match_operand:BFP 1 "register_operand"  "f,<vf>"))))
+    (clobber (reg:CC CC_REGNUM))]
+   "TARGET_HARD_FLOAT"
+-  "ln<xde>br\t%0,%1"
+-  [(set_attr "op_type"  "RRE")
+-   (set_attr "type"     "fsimp<mode>")])
++  "@
++   ln<xde>br\t%0,%1
++   wflndb\t%0,%1"
++  [(set_attr "op_type"      "RRE,VRR")
++   (set_attr "cpu_facility" "*,vec")
++   (set_attr "type"         "fsimp<mode>,*")])
+ 
+ ;;
+ ;;- Square root instructions.
+@@ -7604,14 +7986,16 @@
+ 
+ ; sqxbr, sqdbr, sqebr, sqdb, sqeb
+ (define_insn "sqrt<mode>2"
+-  [(set (match_operand:BFP 0 "register_operand" "=f,f")
+-	(sqrt:BFP (match_operand:BFP 1 "general_operand" "f,<Rf>")))]
++  [(set (match_operand:BFP           0 "register_operand" "=f,   f,<vf>")
++	(sqrt:BFP (match_operand:BFP 1 "general_operand"   "f,<Rf>,<vf>")))]
+   "TARGET_HARD_FLOAT"
+   "@
+    sq<xde>br\t%0,%1
+-   sq<xde>b\t%0,%1"
+-  [(set_attr "op_type" "RRE,RXE")
+-   (set_attr "type" "fsqrt<mode>")])
++   sq<xde>b\t%0,%1
++   wfsqdb\t%v0,%v1"
++  [(set_attr "op_type"      "RRE,RXE,VRR")
++   (set_attr "type"         "fsqrt<mode>")
++   (set_attr "cpu_facility" "*,*,vec")])
+ 
+ 
+ ;;
+@@ -10006,6 +10390,35 @@
+   DONE;
+ })
+ 
++; Clobber VRs since they don't get restored
++(define_insn "tbegin_1_z13"
++  [(set (reg:CCRAW CC_REGNUM)
++	(unspec_volatile:CCRAW [(match_operand 0 "const_int_operand" "D")]
++			       UNSPECV_TBEGIN))
++   (set (match_operand:BLK 1 "memory_operand" "=Q")
++	(unspec_volatile:BLK [(match_dup 0)] UNSPECV_TBEGIN_TDB))
++   (clobber (reg:TI 16)) (clobber (reg:TI 38))
++   (clobber (reg:TI 17)) (clobber (reg:TI 39))
++   (clobber (reg:TI 18)) (clobber (reg:TI 40))
++   (clobber (reg:TI 19)) (clobber (reg:TI 41))
++   (clobber (reg:TI 20)) (clobber (reg:TI 42))
++   (clobber (reg:TI 21)) (clobber (reg:TI 43))
++   (clobber (reg:TI 22)) (clobber (reg:TI 44))
++   (clobber (reg:TI 23)) (clobber (reg:TI 45))
++   (clobber (reg:TI 24)) (clobber (reg:TI 46))
++   (clobber (reg:TI 25)) (clobber (reg:TI 47))
++   (clobber (reg:TI 26)) (clobber (reg:TI 48))
++   (clobber (reg:TI 27)) (clobber (reg:TI 49))
++   (clobber (reg:TI 28)) (clobber (reg:TI 50))
++   (clobber (reg:TI 29)) (clobber (reg:TI 51))
++   (clobber (reg:TI 30)) (clobber (reg:TI 52))
++   (clobber (reg:TI 31)) (clobber (reg:TI 53))]
++; CONST_OK_FOR_CONSTRAINT_P does not work with D constraint since D is
++; not supposed to be used for immediates (see genpreds.c).
++  "TARGET_VX && INTVAL (operands[0]) >= 0 && INTVAL (operands[0]) <= 0xffff"
++  "tbegin\t%1,%x0"
++  [(set_attr "op_type" "SIL")])
++
+ (define_insn "tbegin_1"
+   [(set (reg:CCRAW CC_REGNUM)
+ 	(unspec_volatile:CCRAW [(match_operand 0 "const_int_operand" "D")]
+@@ -10141,3 +10554,30 @@
+   "TARGET_HTM && INTVAL (operands[2]) < 16"
+   "ppa\t%0,%1,%2"
+   [(set_attr "op_type" "RRF")])
++
++
++; Set and get floating point control register
++
++(define_insn "sfpc"
++  [(unspec_volatile [(match_operand:SI 0 "register_operand" "d")]
++		    UNSPECV_SFPC)]
++  "TARGET_HARD_FLOAT"
++  "sfpc\t%0")
++
++(define_insn "efpc"
++  [(set (match_operand:SI 0 "register_operand" "=d")
++	(unspec_volatile:SI [(const_int 0)] UNSPECV_EFPC))]
++  "TARGET_HARD_FLOAT"
++  "efpc\t%0")
++
++
++; Load count to block boundary
++
++(define_insn "lcbb"
++  [(set (match_operand:SI             0 "register_operand"  "=d")
++	(unspec:SI [(match_operand    1 "address_operand" "ZQZR")
++		    (match_operand:SI 2 "immediate_operand"  "C")] UNSPEC_LCBB))
++   (clobber (reg:CC CC_REGNUM))]
++  "TARGET_Z13"
++  "lcbb\t%0,%a1,%b2"
++  [(set_attr "op_type" "VRX")])
+--- gcc/config/s390/s390-modes.def	2013-08-14 13:55:12.000000000 +0200
++++ gcc/config/s390/s390-modes.def	2016-05-11 17:12:39.000000000 +0200
+@@ -84,6 +84,23 @@ Requested mode            -> Destination
+ CCS, CCU, CCT, CCSR, CCUR -> CCZ
+ CCA                       -> CCAP, CCAN
+ 
++Vector comparison modes
++
++CCVEQ  	  EQ	  - 	       - 	   NE	      (VCEQ)
++CCVEQANY  EQ	  EQ	       - 	   NE	      (VCEQ)
++
++CCVH	  GT	  - 	       - 	   LE	      (VCH)
++CCVHANY	  GT	  GT	       - 	   LE	      (VCH)
++CCVHU	  GTU	  -  	       -  	   LEU	      (VCHL)
++CCVHUANY  GTU	  GTU	       -  	   LEU	      (VCHL)
++
++CCVFH	  GT	  -   	       -   	   UNLE	      (VFCH)
++CCVFHANY  GT	  GT	       -   	   UNLE	      (VFCH)
++CCVFHE	  GE	  -   	       -   	   UNLT	      (VFCHE)
++CCVFHEANY GE	  GE	       -   	   UNLT	      (VFCHE)
++
++
++
+ 
+ *** Comments ***
+ 
+@@ -152,6 +169,15 @@ The compare and swap instructions sets t
+ operands were equal/unequal. The CCZ1 mode ensures the result can be
+ effectively placed into a register.
+ 
++
++CCV*
++
++The variants with and without ANY are generated by the same
++instructions and therefore are holding the same information.  However,
++when generating a condition code mask they require checking different
++bits of CC.  In that case the variants without ANY represent the
++results for *all* elements.
++
+ CCRAW
+ 
+ The cc mode generated by a non-compare instruction.  The condition
+@@ -181,3 +207,38 @@ CC_MODE (CCT1);
+ CC_MODE (CCT2);
+ CC_MODE (CCT3);
+ CC_MODE (CCRAW);
++
++CC_MODE (CCVEQ);
++CC_MODE (CCVEQANY);
++
++CC_MODE (CCVH);
++CC_MODE (CCVHANY);
++CC_MODE (CCVHU);
++CC_MODE (CCVHUANY);
++
++CC_MODE (CCVFH);
++CC_MODE (CCVFHANY);
++CC_MODE (CCVFHE);
++CC_MODE (CCVFHEANY);
++
++
++/* Vector modes.  */
++
++VECTOR_MODES (INT, 2);        /*                 V2QI */
++VECTOR_MODES (INT, 4);        /*            V4QI V2HI */
++VECTOR_MODES (INT, 8);        /*       V8QI V4HI V2SI */
++VECTOR_MODES (INT, 16);       /* V16QI V8HI V4SI V2DI */
++
++VECTOR_MODE (FLOAT, SF, 2);   /* V2SF */
++VECTOR_MODE (FLOAT, SF, 4);   /* V4SF */
++VECTOR_MODE (FLOAT, DF, 2);   /* V2DF */
++
++VECTOR_MODE (INT, QI, 1);     /* V1QI */
++VECTOR_MODE (INT, HI, 1);     /* V1HI */
++VECTOR_MODE (INT, SI, 1);     /* V1SI */
++VECTOR_MODE (INT, DI, 1);     /* V1DI */
++VECTOR_MODE (INT, TI, 1);     /* V1TI */
++
++VECTOR_MODE (FLOAT, SF, 1);   /* V1SF */
++VECTOR_MODE (FLOAT, DF, 1);   /* V1DF */
++VECTOR_MODE (FLOAT, TF, 1);   /* V1TF */
+--- gcc/config/s390/s390.opt	2015-06-18 16:33:05.000000000 +0200
++++ gcc/config/s390/s390.opt	2016-05-11 17:33:59.000000000 +0200
+@@ -76,6 +76,9 @@ Enum(processor_type) String(z196) Value(
+ EnumValue
+ Enum(processor_type) String(zEC12) Value(PROCESSOR_2827_ZEC12)
+ 
++EnumValue
++Enum(processor_type) String(z13) Value(PROCESSOR_2964_Z13)
++
+ mbackchain
+ Target Report Mask(BACKCHAIN)
+ Maintain backchain pointer
+@@ -118,6 +121,10 @@ mhtm
+ Target Report Mask(OPT_HTM)
+ Use hardware transactional execution instructions
+ 
++mvx
++Target Report Mask(OPT_VX)
++Use hardware vector facility instructions and enable the vector ABI
++
+ mpacked-stack
+ Target Report Mask(PACKED_STACK)
+ Use packed stack layout
+@@ -146,6 +153,11 @@ mmvcle
+ Target Report Mask(MVCLE)
+ mvcle use
+ 
++mzvector
++Target Report Mask(ZVECTOR)
++Enable the z vector language extension providing the context-sensitive
++vector macro and enable the Altivec-style builtins in vecintrin.h
++
+ mwarn-dynamicstack
+ Target RejectNegative Var(s390_warn_dynamicstack_p)
+ Warn if a function uses alloca or creates an array with dynamic size
+--- gcc/config/s390/s390-opts.h	2013-01-21 16:11:50.000000000 +0100
++++ gcc/config/s390/s390-opts.h	2016-05-11 15:53:24.000000000 +0200
+@@ -35,6 +35,7 @@ enum processor_type
+   PROCESSOR_2097_Z10,
+   PROCESSOR_2817_Z196,
+   PROCESSOR_2827_ZEC12,
++  PROCESSOR_2964_Z13,
+   PROCESSOR_max
+ };
+ 
+--- gcc/config/s390/s390-protos.h	2014-01-14 16:37:04.000000000 +0100
++++ gcc/config/s390/s390-protos.h	2016-05-11 19:28:17.220349132 +0200
+@@ -41,6 +41,9 @@ extern void s390_set_has_landing_pad_p (
+ extern bool s390_hard_regno_mode_ok (unsigned int, enum machine_mode);
+ extern bool s390_hard_regno_rename_ok (unsigned int, unsigned int);
+ extern int s390_class_max_nregs (enum reg_class, enum machine_mode);
++extern int s390_cannot_change_mode_class (enum machine_mode, enum machine_mode,
++					  enum reg_class);
++extern bool s390_function_arg_vector (enum machine_mode, const_tree);
+ 
+ #ifdef RTX_CODE
+ extern int s390_extra_constraint_str (rtx, int, const char *);
+@@ -49,6 +52,9 @@ extern int s390_const_double_ok_for_cons
+ extern int s390_single_part (rtx, enum machine_mode, enum machine_mode, int);
+ extern unsigned HOST_WIDE_INT s390_extract_part (rtx, enum machine_mode, int);
+ extern bool s390_contiguous_bitmask_p (unsigned HOST_WIDE_INT, int, int *, int *);
++extern bool s390_const_vec_duplicate_p (rtx);
++extern bool s390_contiguous_bitmask_vector_p (rtx, int *, int *);
++extern bool s390_bytemask_vector_p (rtx, unsigned *);
+ extern bool s390_split_ok_p (rtx, rtx, enum machine_mode, int);
+ extern bool s390_overlap_p (rtx, rtx, HOST_WIDE_INT);
+ extern bool s390_offset_p (rtx, rtx, rtx);
+@@ -81,6 +87,8 @@ extern void s390_load_address (rtx, rtx)
+ extern bool s390_expand_movmem (rtx, rtx, rtx);
+ extern void s390_expand_setmem (rtx, rtx, rtx);
+ extern bool s390_expand_cmpmem (rtx, rtx, rtx, rtx);
++extern void s390_expand_vec_strlen (rtx, rtx, rtx);
++extern void s390_expand_vec_movstr (rtx, rtx, rtx);
+ extern bool s390_expand_addcc (enum rtx_code, rtx, rtx, rtx, rtx, rtx);
+ extern bool s390_expand_insv (rtx, rtx, rtx, rtx);
+ extern void s390_expand_cs_hqi (enum machine_mode, rtx, rtx, rtx,
+@@ -88,6 +96,10 @@ extern void s390_expand_cs_hqi (enum mac
+ extern void s390_expand_atomic (enum machine_mode, enum rtx_code,
+ 				rtx, rtx, rtx, bool);
+ extern void s390_expand_tbegin (rtx, rtx, rtx, bool);
++extern void s390_expand_vec_compare (rtx, enum rtx_code, rtx, rtx);
++extern void s390_expand_vec_compare_cc (rtx, enum rtx_code, rtx, rtx, bool);
++extern void s390_expand_vcond (rtx, rtx, rtx, enum rtx_code, rtx, rtx);
++extern void s390_expand_vec_init (rtx, rtx);
+ extern rtx s390_return_addr_rtx (int, rtx);
+ extern rtx s390_back_chain_rtx (void);
+ extern rtx s390_emit_call (rtx, rtx, rtx, rtx);
+@@ -113,3 +125,10 @@ extern bool s390_extzv_shift_ok (int, in
+ extern void s390_asm_output_function_label (FILE *, const char *, tree);
+ 
+ #endif /* RTX_CODE */
++
++/* s390-c.c routines */
++extern void s390_cpu_cpp_builtins (struct cpp_reader *);
++extern void s390_register_target_pragmas (void);
++
++/* Routines for s390-c.c */
++extern bool s390_const_operand_ok (tree, int, int, tree);
+--- gcc/config/s390/t-s390	1970-01-01 01:00:00.000000000 +0100
++++ gcc/config/s390/t-s390	2016-05-11 17:12:39.000000000 +0200
+@@ -0,0 +1,27 @@
++# Copyright (C) 2015 Free Software Foundation, Inc.
++#
++# This file is part of GCC.
++#
++# GCC is free software; you can redistribute it and/or modify
++# it under the terms of the GNU General Public License as published by
++# the Free Software Foundation; either version 3, or (at your option)
++# any later version.
++#
++# GCC is distributed in the hope that it will be useful,
++# but WITHOUT ANY WARRANTY; without even the implied warranty of
++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++# GNU General Public License for more details.
++#
++# You should have received a copy of the GNU General Public License
++# along with GCC; see the file COPYING3.  If not see
++# <http://www.gnu.org/licenses/>.
++
++TM_H += $(srcdir)/config/s390/s390-builtins.def
++TM_H += $(srcdir)/config/s390/s390-builtin-types.def
++
++s390-c.o: $(srcdir)/config/s390/s390-c.c \
++  $(srcdir)/config/s390/s390-protos.h $(CONFIG_H) $(SYSTEM_H) coretypes.h \
++  $(TM_H) $(TREE_H) $(TM_P_H) $(FLAGS_H) $(C_COMMON_H) $(GGC_H) \
++  $(TARGET_H) $(TARGET_DEF_H) $(CPPLIB_H) $(C_PRAGMA_H)
++	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
++		$(srcdir)/config/s390/s390-c.c
+--- gcc/config/s390/vecintrin.h	1970-01-01 01:00:00.000000000 +0100
++++ gcc/config/s390/vecintrin.h	2016-05-11 18:10:53.000000000 +0200
+@@ -0,0 +1,277 @@
++/* GNU compiler hardware transactional execution intrinsics
++   Copyright (C) 2015 Free Software Foundation, Inc.
++   Contributed by Andreas Krebbel (Andreas.Krebbel@de.ibm.com)
++
++This file is part of GCC.
++
++GCC is free software; you can redistribute it and/or modify it under
++the terms of the GNU General Public License as published by the Free
++Software Foundation; either version 3, or (at your option) any later
++version.
++
++GCC is distributed in the hope that it will be useful, but WITHOUT ANY
++WARRANTY; without even the implied warranty of MERCHANTABILITY or
++FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++for more details.
++
++You should have received a copy of the GNU General Public License
++along with GCC; see the file COPYING3.  If not see
++<http://www.gnu.org/licenses/>.  */
++
++#ifndef _VECINTRIN_H
++#define _VECINTRIN_H
++
++#ifdef __VEC__
++
++#define __VFTCI_ZERO           1<<11
++#define __VFTCI_ZERO_N         1<<10
++#define __VFTCI_NORMAL          1<<9
++#define __VFTCI_NORMAL_N        1<<8
++#define __VFTCI_SUBNORMAL       1<<7
++#define __VFTCI_SUBNORMAL_N     1<<6
++#define __VFTCI_INF             1<<5
++#define __VFTCI_INF_N           1<<4
++#define __VFTCI_QNAN            1<<3
++#define __VFTCI_QNAN_N          1<<2
++#define __VFTCI_SNAN            1<<1
++#define __VFTCI_SNAN_N          1<<0
++
++/* This also accepts a type for its parameter, so it is not enough
++   to #define vec_step to __builtin_vec_step.  */
++#define vec_step(x) __builtin_vec_step (* (__typeof__ (x) *) 0)
++
++static inline int
++__lcbb(const void *ptr, int bndry)
++{
++  int code;
++  switch (bndry)
++    {
++    case 64: code = 0; break;
++    case 128: code = 1; break;
++    case 256: code = 2; break;
++    case 512: code = 3; break;
++    case 1024: code = 4; break;
++    case 2048: code = 5; break;
++    case 4096: code = 6; break;
++    default: return 0;
++    }
++  return __builtin_s390_lcbb (ptr, code);
++}
++
++#define vec_all_nle(X, Y) vec_all_nge ((Y), (X))
++#define vec_all_nlt(X, Y) vec_all_ngt ((Y), (X))
++#define vec_any_nle(X, Y) vec_any_nge ((Y), (X))
++#define vec_any_nlt(X, Y) vec_any_ngt ((Y), (X))
++#define vec_genmask __builtin_s390_vgbm
++#define vec_genmasks_8 __builtin_s390_vgmb
++#define vec_genmasks_16 __builtin_s390_vgmh
++#define vec_genmasks_32 __builtin_s390_vgmf
++#define vec_genmasks_64 __builtin_s390_vgmg
++#define vec_splat_u8 __builtin_s390_vec_splat_u8
++#define vec_splat_s8 __builtin_s390_vec_splat_s8
++#define vec_splat_u16 __builtin_s390_vec_splat_u16
++#define vec_splat_s16 __builtin_s390_vec_splat_s16
++#define vec_splat_u32 __builtin_s390_vec_splat_u32
++#define vec_splat_s32 __builtin_s390_vec_splat_s32
++#define vec_splat_u64 __builtin_s390_vec_splat_u64
++#define vec_splat_s64 __builtin_s390_vec_splat_s64
++#define vec_add_u128 __builtin_s390_vaq
++#define vec_addc_u128 __builtin_s390_vaccq
++#define vec_adde_u128 __builtin_s390_vacq
++#define vec_addec_u128 __builtin_s390_vacccq
++#define vec_checksum __builtin_s390_vcksm
++#define vec_gfmsum_128 __builtin_s390_vgfmg
++#define vec_gfmsum_accum_128 __builtin_s390_vgfmag
++#define vec_sub_u128 __builtin_s390_vsq
++#define vec_subc_u128 __builtin_s390_vscbiq
++#define vec_sube_u128 __builtin_s390_vsbiq
++#define vec_subec_u128 __builtin_s390_vsbcbiq
++#define vec_ceil(X) __builtin_s390_vfidb((X), 4, 6)
++#define vec_roundp(X) __builtin_s390_vfidb((X), 4, 6)
++#define vec_floor(X) __builtin_s390_vfidb((X), 4, 7)
++#define vec_roundm(X) __builtin_s390_vfidb((X), 4, 7)
++#define vec_trunc(X) __builtin_s390_vfidb((X), 4, 5)
++#define vec_roundz(X) __builtin_s390_vfidb((X), 4, 5)
++#define vec_roundc(X) __builtin_s390_vfidb((X), 4, 0)
++#define vec_round(X) __builtin_s390_vfidb((X), 4, 4)
++#define vec_madd __builtin_s390_vfmadb
++#define vec_msub __builtin_s390_vfmsdb
++
++static inline int
++vec_all_nan (__vector double a)
++{
++  int cc;
++  __builtin_s390_vftcidb (a,
++			  __VFTCI_QNAN
++			  | __VFTCI_QNAN_N
++			  | __VFTCI_SNAN
++			  | __VFTCI_SNAN_N, &cc);
++  return cc == 0 ? 1 : 0;
++}
++
++static inline int
++vec_all_numeric (__vector double a)
++{
++  int cc;
++  __builtin_s390_vftcidb (a,
++			  __VFTCI_NORMAL
++			  | __VFTCI_NORMAL_N
++			  | __VFTCI_SUBNORMAL
++			  | __VFTCI_SUBNORMAL_N, &cc);
++  return cc == 0 ? 1 : 0;
++}
++
++static inline int
++vec_any_nan (__vector double a)
++{
++  int cc;
++  __builtin_s390_vftcidb (a,
++			  __VFTCI_QNAN
++			  | __VFTCI_QNAN_N
++			  | __VFTCI_SNAN
++			  | __VFTCI_SNAN_N, &cc);
++  return cc != 3 ? 1 : 0;
++}
++
++static inline int
++vec_any_numeric (__vector double a)
++{
++  int cc;
++  __builtin_s390_vftcidb (a,
++			  __VFTCI_NORMAL
++			  | __VFTCI_NORMAL_N
++			  | __VFTCI_SUBNORMAL
++			  | __VFTCI_SUBNORMAL_N, &cc);
++  return cc != 3 ? 1 : 0;
++}
++#define vec_gather_element __builtin_s390_vec_gather_element
++#define vec_xld2 __builtin_s390_vec_xld2
++#define vec_xlw4 __builtin_s390_vec_xlw4
++#define vec_splats __builtin_s390_vec_splats
++#define vec_insert __builtin_s390_vec_insert
++#define vec_promote __builtin_s390_vec_promote
++#define vec_extract __builtin_s390_vec_extract
++#define vec_insert_and_zero __builtin_s390_vec_insert_and_zero
++#define vec_load_bndry __builtin_s390_vec_load_bndry
++#define vec_load_pair __builtin_s390_vec_load_pair
++#define vec_load_len __builtin_s390_vec_load_len
++#define vec_mergeh __builtin_s390_vec_mergeh
++#define vec_mergel __builtin_s390_vec_mergel
++#define vec_pack __builtin_s390_vec_pack
++#define vec_packs __builtin_s390_vec_packs
++#define vec_packs_cc __builtin_s390_vec_packs_cc
++#define vec_packsu __builtin_s390_vec_packsu
++#define vec_packsu_cc __builtin_s390_vec_packsu_cc
++#define vec_perm __builtin_s390_vec_perm
++#define vec_permi __builtin_s390_vec_permi
++#define vec_splat __builtin_s390_vec_splat
++#define vec_scatter_element __builtin_s390_vec_scatter_element
++#define vec_sel __builtin_s390_vec_sel
++#define vec_extend_s64 __builtin_s390_vec_extend_s64
++#define vec_xstd2 __builtin_s390_vec_xstd2
++#define vec_xstw4 __builtin_s390_vec_xstw4
++#define vec_store_len __builtin_s390_vec_store_len
++#define vec_unpackh __builtin_s390_vec_unpackh
++#define vec_unpackl __builtin_s390_vec_unpackl
++#define vec_addc __builtin_s390_vec_addc
++#define vec_and __builtin_s390_vec_and
++#define vec_andc __builtin_s390_vec_andc
++#define vec_avg __builtin_s390_vec_avg
++#define vec_all_eq __builtin_s390_vec_all_eq
++#define vec_all_ne __builtin_s390_vec_all_ne
++#define vec_all_ge __builtin_s390_vec_all_ge
++#define vec_all_gt __builtin_s390_vec_all_gt
++#define vec_all_le __builtin_s390_vec_all_le
++#define vec_all_lt __builtin_s390_vec_all_lt
++#define vec_any_eq __builtin_s390_vec_any_eq
++#define vec_any_ne __builtin_s390_vec_any_ne
++#define vec_any_ge __builtin_s390_vec_any_ge
++#define vec_any_gt __builtin_s390_vec_any_gt
++#define vec_any_le __builtin_s390_vec_any_le
++#define vec_any_lt __builtin_s390_vec_any_lt
++#define vec_cmpeq __builtin_s390_vec_cmpeq
++#define vec_cmpge __builtin_s390_vec_cmpge
++#define vec_cmpgt __builtin_s390_vec_cmpgt
++#define vec_cmple __builtin_s390_vec_cmple
++#define vec_cmplt __builtin_s390_vec_cmplt
++#define vec_cntlz __builtin_s390_vec_cntlz
++#define vec_cnttz __builtin_s390_vec_cnttz
++#define vec_xor __builtin_s390_vec_xor
++#define vec_gfmsum __builtin_s390_vec_gfmsum
++#define vec_gfmsum_accum __builtin_s390_vec_gfmsum_accum
++#define vec_abs __builtin_s390_vec_abs
++#define vec_max __builtin_s390_vec_max
++#define vec_min __builtin_s390_vec_min
++#define vec_mladd __builtin_s390_vec_mladd
++#define vec_mhadd __builtin_s390_vec_mhadd
++#define vec_meadd __builtin_s390_vec_meadd
++#define vec_moadd __builtin_s390_vec_moadd
++#define vec_mulh __builtin_s390_vec_mulh
++#define vec_mule __builtin_s390_vec_mule
++#define vec_mulo __builtin_s390_vec_mulo
++#define vec_nor __builtin_s390_vec_nor
++#define vec_or __builtin_s390_vec_or
++#define vec_popcnt __builtin_s390_vec_popcnt
++#define vec_rl __builtin_s390_vec_rl
++#define vec_rli __builtin_s390_vec_rli
++#define vec_rl_mask __builtin_s390_vec_rl_mask
++#define vec_sll __builtin_s390_vec_sll
++#define vec_slb __builtin_s390_vec_slb
++#define vec_sld __builtin_s390_vec_sld
++#define vec_sldw __builtin_s390_vec_sldw
++#define vec_sral __builtin_s390_vec_sral
++#define vec_srab __builtin_s390_vec_srab
++#define vec_srl __builtin_s390_vec_srl
++#define vec_srb __builtin_s390_vec_srb
++#define vec_subc __builtin_s390_vec_subc
++#define vec_sum2 __builtin_s390_vec_sum2
++#define vec_sum_u128 __builtin_s390_vec_sum_u128
++#define vec_sum4 __builtin_s390_vec_sum4
++#define vec_test_mask __builtin_s390_vec_test_mask
++#define vec_find_any_eq_idx __builtin_s390_vec_find_any_eq_idx
++#define vec_find_any_ne_idx __builtin_s390_vec_find_any_ne_idx
++#define vec_find_any_eq_or_0_idx __builtin_s390_vec_find_any_eq_or_0_idx
++#define vec_find_any_ne_or_0_idx __builtin_s390_vec_find_any_ne_or_0_idx
++#define vec_find_any_eq __builtin_s390_vec_find_any_eq
++#define vec_find_any_ne __builtin_s390_vec_find_any_ne
++#define vec_find_any_eq_idx_cc __builtin_s390_vec_find_any_eq_idx_cc
++#define vec_find_any_ne_idx_cc __builtin_s390_vec_find_any_ne_idx_cc
++#define vec_find_any_eq_or_0_idx_cc __builtin_s390_vec_find_any_eq_or_0_idx_cc
++#define vec_find_any_ne_or_0_idx_cc __builtin_s390_vec_find_any_ne_or_0_idx_cc
++#define vec_find_any_eq_cc __builtin_s390_vec_find_any_eq_cc
++#define vec_find_any_ne_cc __builtin_s390_vec_find_any_ne_cc
++#define vec_cmpeq_idx __builtin_s390_vec_cmpeq_idx
++#define vec_cmpeq_or_0_idx __builtin_s390_vec_cmpeq_or_0_idx
++#define vec_cmpeq_idx_cc __builtin_s390_vec_cmpeq_idx_cc
++#define vec_cmpeq_or_0_idx_cc __builtin_s390_vec_cmpeq_or_0_idx_cc
++#define vec_cmpne_idx __builtin_s390_vec_cmpne_idx
++#define vec_cmpne_or_0_idx __builtin_s390_vec_cmpne_or_0_idx
++#define vec_cmpne_idx_cc __builtin_s390_vec_cmpne_idx_cc
++#define vec_cmpne_or_0_idx_cc __builtin_s390_vec_cmpne_or_0_idx_cc
++#define vec_cp_until_zero __builtin_s390_vec_cp_until_zero
++#define vec_cp_until_zero_cc __builtin_s390_vec_cp_until_zero_cc
++#define vec_cmprg_idx __builtin_s390_vec_cmprg_idx
++#define vec_cmpnrg_idx __builtin_s390_vec_cmpnrg_idx
++#define vec_cmprg_or_0_idx __builtin_s390_vec_cmprg_or_0_idx
++#define vec_cmpnrg_or_0_idx __builtin_s390_vec_cmpnrg_or_0_idx
++#define vec_cmprg __builtin_s390_vec_cmprg
++#define vec_cmpnrg __builtin_s390_vec_cmpnrg
++#define vec_cmprg_idx_cc __builtin_s390_vec_cmprg_idx_cc
++#define vec_cmpnrg_idx_cc __builtin_s390_vec_cmpnrg_idx_cc
++#define vec_cmprg_or_0_idx_cc __builtin_s390_vec_cmprg_or_0_idx_cc
++#define vec_cmpnrg_or_0_idx_cc __builtin_s390_vec_cmpnrg_or_0_idx_cc
++#define vec_cmprg_cc __builtin_s390_vec_cmprg_cc
++#define vec_cmpnrg_cc __builtin_s390_vec_cmpnrg_cc
++#define vec_all_nge __builtin_s390_vec_all_nge
++#define vec_all_ngt __builtin_s390_vec_all_ngt
++#define vec_any_nge __builtin_s390_vec_any_nge
++#define vec_any_ngt __builtin_s390_vec_any_ngt
++#define vec_ctd __builtin_s390_vec_ctd
++#define vec_ctd_s64 __builtin_s390_vec_ctd_s64
++#define vec_ctd_u64 __builtin_s390_vec_ctd_u64
++#define vec_ctsl __builtin_s390_vec_ctsl
++#define vec_ctul __builtin_s390_vec_ctul
++#define vec_ld2f __builtin_s390_vec_ld2f
++#define vec_st2f __builtin_s390_vec_st2f
++#endif /* __VEC__ */
++#endif /* _VECINTRIN_H */
+--- gcc/config/s390/vector.md	1970-01-01 01:00:00.000000000 +0100
++++ gcc/config/s390/vector.md	2016-05-11 18:11:04.000000000 +0200
+@@ -0,0 +1,1229 @@
++;;- Instruction patterns for the System z vector facility
++;;  Copyright (C) 2015 Free Software Foundation, Inc.
++;;  Contributed by Andreas Krebbel (Andreas.Krebbel@de.ibm.com)
++
++;; This file is part of GCC.
++
++;; GCC is free software; you can redistribute it and/or modify it under
++;; the terms of the GNU General Public License as published by the Free
++;; Software Foundation; either version 3, or (at your option) any later
++;; version.
++
++;; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
++;; WARRANTY; without even the implied warranty of MERCHANTABILITY or
++;; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++;; for more details.
++
++;; You should have received a copy of the GNU General Public License
++;; along with GCC; see the file COPYING3.  If not see
++;; <http://www.gnu.org/licenses/>.
++
++; All vector modes supported in a vector register
++(define_mode_iterator V
++  [V1QI V2QI V4QI V8QI V16QI V1HI V2HI V4HI V8HI V1SI V2SI V4SI V1DI V2DI V1SF
++   V2SF V4SF V1DF V2DF])
++(define_mode_iterator VT
++  [V1QI V2QI V4QI V8QI V16QI V1HI V2HI V4HI V8HI V1SI V2SI V4SI V1DI V2DI V1SF
++   V2SF V4SF V1DF V2DF V1TF V1TI TI])
++
++; All vector modes directly supported by the hardware having full vector reg size
++; V_HW2 is duplicate of V_HW for having two iterators expanding
++; independently e.g. vcond
++(define_mode_iterator V_HW  [V16QI V8HI V4SI V2DI V2DF])
++(define_mode_iterator V_HW2 [V16QI V8HI V4SI V2DI V2DF])
++; Including TI for instructions that support it (va, vn, ...)
++(define_mode_iterator VT_HW [V16QI V8HI V4SI V2DI V2DF V1TI TI])
++
++; All full size integer vector modes supported in a vector register + TImode
++(define_mode_iterator VIT_HW    [V16QI V8HI V4SI V2DI V1TI TI])
++(define_mode_iterator VI_HW     [V16QI V8HI V4SI V2DI])
++(define_mode_iterator VI_HW_QHS [V16QI V8HI V4SI])
++(define_mode_iterator VI_HW_HS  [V8HI V4SI])
++(define_mode_iterator VI_HW_QH  [V16QI V8HI])
++
++; All integer vector modes supported in a vector register + TImode
++(define_mode_iterator VIT [V1QI V2QI V4QI V8QI V16QI V1HI V2HI V4HI V8HI V1SI V2SI V4SI V1DI V2DI V1TI TI])
++(define_mode_iterator VI  [V1QI V2QI V4QI V8QI V16QI V1HI V2HI V4HI V8HI V1SI V2SI V4SI V1DI V2DI])
++(define_mode_iterator VI_QHS [V1QI V2QI V4QI V8QI V16QI V1HI V2HI V4HI V8HI V1SI V2SI V4SI])
++
++(define_mode_iterator V_8   [V1QI])
++(define_mode_iterator V_16  [V2QI  V1HI])
++(define_mode_iterator V_32  [V4QI  V2HI V1SI V1SF])
++(define_mode_iterator V_64  [V8QI  V4HI V2SI V2SF V1DI V1DF])
++(define_mode_iterator V_128 [V16QI V8HI V4SI V4SF V2DI V2DF V1TI V1TF])
++
++; A blank for vector modes and a * for TImode.  This is used to hide
++; the TImode expander name in case it is defined already.  See addti3
++; for an example.
++(define_mode_attr ti* [(V1QI "") (V2QI "") (V4QI "") (V8QI "") (V16QI "")
++		       (V1HI "") (V2HI "") (V4HI "") (V8HI "")
++		       (V1SI "") (V2SI "") (V4SI "")
++		       (V1DI "") (V2DI "")
++		       (V1TI "*") (TI "*")])
++
++; The element type of the vector.
++(define_mode_attr non_vec[(V1QI "QI") (V2QI "QI") (V4QI "QI") (V8QI "QI") (V16QI "QI")
++			  (V1HI "HI") (V2HI "HI") (V4HI "HI") (V8HI "HI")
++			  (V1SI "SI") (V2SI "SI") (V4SI "SI")
++			  (V1DI "DI") (V2DI "DI")
++			  (V1TI "TI")
++			  (V1SF "SF") (V2SF "SF") (V4SF "SF")
++			  (V1DF "DF") (V2DF "DF")
++			  (V1TF "TF")])
++
++; The instruction suffix
++(define_mode_attr bhfgq[(V1QI "b") (V2QI "b") (V4QI "b") (V8QI "b") (V16QI "b")
++			(V1HI "h") (V2HI "h") (V4HI "h") (V8HI "h")
++			(V1SI "f") (V2SI "f") (V4SI "f")
++			(V1DI "g") (V2DI "g")
++			(V1TI "q") (TI "q")
++			(V1SF "f") (V2SF "f") (V4SF "f")
++			(V1DF "g") (V2DF "g")
++			(V1TF "q")])
++
++; This is for vmalhw. It gets an 'w' attached to avoid confusion with
++; multiply and add logical high vmalh.
++(define_mode_attr w [(V1QI "")  (V2QI "")  (V4QI "")  (V8QI "") (V16QI "")
++		     (V1HI "w") (V2HI "w") (V4HI "w") (V8HI "w")
++		     (V1SI "")  (V2SI "")  (V4SI "")
++		     (V1DI "")  (V2DI "")])
++
++; Resulting mode of a vector comparison.  For floating point modes an
++; integer vector mode with the same element size is picked.
++(define_mode_attr tointvec [(V1QI "V1QI") (V2QI "V2QI") (V4QI "V4QI") (V8QI "V8QI") (V16QI "V16QI")
++			    (V1HI "V1HI") (V2HI "V2HI") (V4HI "V4HI") (V8HI "V8HI")
++			    (V1SI "V1SI") (V2SI "V2SI") (V4SI "V4SI")
++			    (V1DI "V1DI") (V2DI "V2DI")
++			    (V1TI "V1TI")
++			    (V1SF "V1SI") (V2SF "V2SI") (V4SF "V4SI")
++			    (V1DF "V1DI") (V2DF "V2DI")
++			    (V1TF "V1TI")])
++
++; Vector with doubled element size.
++(define_mode_attr vec_double [(V1QI "V1HI") (V2QI "V1HI") (V4QI "V2HI") (V8QI "V4HI") (V16QI "V8HI")
++			      (V1HI "V1SI") (V2HI "V1SI") (V4HI "V2SI") (V8HI "V4SI")
++			      (V1SI "V1DI") (V2SI "V1DI") (V4SI "V2DI")
++			      (V1DI "V1TI") (V2DI "V1TI")
++			      (V1SF "V1DF") (V2SF "V1DF") (V4SF "V2DF")])
++
++; Vector with half the element size.
++(define_mode_attr vec_half [(V1HI "V2QI") (V2HI "V4QI") (V4HI "V8QI") (V8HI "V16QI")
++			    (V1SI "V2HI") (V2SI "V4HI") (V4SI "V8HI")
++			    (V1DI "V2SI") (V2DI "V4SI")
++			    (V1TI "V2DI")
++			    (V1DF "V2SF") (V2DF "V4SF")
++			    (V1TF "V1DF")])
++
++; The comparisons not setting CC iterate over the rtx code.
++(define_code_iterator VFCMP_HW_OP [eq gt ge])
++(define_code_attr asm_fcmp_op [(eq "e") (gt "h") (ge "he")])
++
++
++
++; Comparison operators on int and fp compares which are directly
++; supported by the HW.
++(define_code_iterator VICMP_HW_OP [eq gt gtu])
++; For int insn_cmp_op can be used in the insn name as well as in the asm output.
++(define_code_attr insn_cmp_op [(eq "eq") (gt "h") (gtu "hl") (ge "he")])
++
++; Flags for vector string instructions (vfae all 4, vfee only ZS and CS, vstrc all 4)
++(define_constants
++  [(VSTRING_FLAG_IN         8)   ; invert result
++   (VSTRING_FLAG_RT         4)   ; result type
++   (VSTRING_FLAG_ZS         2)   ; zero search
++   (VSTRING_FLAG_CS         1)]) ; condition code set
++
++(include "vx-builtins.md")
++
++; Full HW vector size moves
++(define_insn "mov<mode>"
++  [(set (match_operand:V_128 0 "nonimmediate_operand" "=v, v,QR,  v,  v,  v,  v,  v,v,d")
++	(match_operand:V_128 1 "general_operand"      " v,QR, v,j00,jm1,jyy,jxx,jKK,d,v"))]
++  "TARGET_VX"
++  "@
++   vlr\t%v0,%v1
++   vl\t%v0,%1
++   vst\t%v1,%0
++   vzero\t%v0
++   vone\t%v0
++   vgbm\t%v0,%t1
++   vgm<bhfgq>\t%v0,%s1,%e1
++   vrepi<bhfgq>\t%v0,%h1
++   vlvgp\t%v0,%1,%N1
++   #"
++  [(set_attr "op_type" "VRR,VRX,VRX,VRI,VRI,VRI,VRI,VRI,VRR,*")])
++
++(define_split
++  [(set (match_operand:V_128 0 "register_operand" "")
++	(match_operand:V_128 1 "register_operand" ""))]
++  "TARGET_VX && GENERAL_REG_P (operands[0]) && VECTOR_REG_P (operands[1])"
++  [(set (match_dup 2)
++	(unspec:DI [(subreg:V2DI (match_dup 1) 0)
++		    (const_int 0)] UNSPEC_VEC_EXTRACT))
++   (set (match_dup 3)
++	(unspec:DI [(subreg:V2DI (match_dup 1) 0)
++		    (const_int 1)] UNSPEC_VEC_EXTRACT))]
++{
++  operands[2] = operand_subword (operands[0], 0, 0, <MODE>mode);
++  operands[3] = operand_subword (operands[0], 1, 0, <MODE>mode);
++})
++
++; Moves for smaller vector modes.
++
++; In these patterns only the vlr, vone, and vzero instructions write
++; VR bytes outside the mode.  This should be ok since we disallow
++; formerly bigger modes being accessed with smaller modes via
++; subreg. Note: The vone, vzero instructions could easily be replaced
++; with vlei which would only access the bytes belonging to the mode.
++; However, this would probably be slower.
++
++(define_insn "mov<mode>"
++  [(set (match_operand:V_8 0 "nonimmediate_operand" "=v,v,d, v,QR,  v,  v,  v,  v,d,  Q,  S,  Q,  S,  d,  d,d,d,d,R,T")
++        (match_operand:V_8 1 "general_operand"      " v,d,v,QR, v,j00,jm1,jyy,jxx,d,j00,j00,jm1,jm1,j00,jm1,R,T,b,d,d"))]
++  ""
++  "@
++   vlr\t%v0,%v1
++   vlvgb\t%v0,%1,0
++   vlgvb\t%0,%v1,0
++   vleb\t%v0,%1,0
++   vsteb\t%v1,%0,0
++   vzero\t%v0
++   vone\t%v0
++   vgbm\t%v0,%t1
++   vgm\t%v0,%s1,%e1
++   lr\t%0,%1
++   mvi\t%0,0
++   mviy\t%0,0
++   mvi\t%0,-1
++   mviy\t%0,-1
++   lhi\t%0,0
++   lhi\t%0,-1
++   lh\t%0,%1
++   lhy\t%0,%1
++   lhrl\t%0,%1
++   stc\t%1,%0
++   stcy\t%1,%0"
++  [(set_attr "op_type"      "VRR,VRS,VRS,VRX,VRX,VRI,VRI,VRI,VRI,RR,SI,SIY,SI,SIY,RI,RI,RX,RXY,RIL,RX,RXY")])
++
++(define_insn "mov<mode>"
++  [(set (match_operand:V_16 0 "nonimmediate_operand" "=v,v,d, v,QR,  v,  v,  v,  v,d,  Q,  Q,  d,  d,d,d,d,R,T,b")
++        (match_operand:V_16 1 "general_operand"      " v,d,v,QR, v,j00,jm1,jyy,jxx,d,j00,jm1,j00,jm1,R,T,b,d,d,d"))]
++  ""
++  "@
++   vlr\t%v0,%v1
++   vlvgh\t%v0,%1,0
++   vlgvh\t%0,%v1,0
++   vleh\t%v0,%1,0
++   vsteh\t%v1,%0,0
++   vzero\t%v0
++   vone\t%v0
++   vgbm\t%v0,%t1
++   vgm\t%v0,%s1,%e1
++   lr\t%0,%1
++   mvhhi\t%0,0
++   mvhhi\t%0,-1
++   lhi\t%0,0
++   lhi\t%0,-1
++   lh\t%0,%1
++   lhy\t%0,%1
++   lhrl\t%0,%1
++   sth\t%1,%0
++   sthy\t%1,%0
++   sthrl\t%1,%0"
++  [(set_attr "op_type"      "VRR,VRS,VRS,VRX,VRX,VRI,VRI,VRI,VRI,RR,SIL,SIL,RI,RI,RX,RXY,RIL,RX,RXY,RIL")])
++
++(define_insn "mov<mode>"
++  [(set (match_operand:V_32 0 "nonimmediate_operand" "=f,f,f,R,T,v,v,d, v,QR,  f,  v,  v,  v,  v,  Q,  Q,  d,  d,d,d,d,d,R,T,b")
++	(match_operand:V_32 1 "general_operand"      " f,R,T,f,f,v,d,v,QR, v,j00,j00,jm1,jyy,jxx,j00,jm1,j00,jm1,b,d,R,T,d,d,d"))]
++  "TARGET_VX"
++  "@
++   lder\t%v0,%v1
++   lde\t%0,%1
++   ley\t%0,%1
++   ste\t%1,%0
++   stey\t%1,%0
++   vlr\t%v0,%v1
++   vlvgf\t%v0,%1,0
++   vlgvf\t%0,%v1,0
++   vlef\t%v0,%1,0
++   vstef\t%1,%0,0
++   lzer\t%v0
++   vzero\t%v0
++   vone\t%v0
++   vgbm\t%v0,%t1
++   vgm\t%v0,%s1,%e1
++   mvhi\t%0,0
++   mvhi\t%0,-1
++   lhi\t%0,0
++   lhi\t%0,-1
++   lrl\t%0,%1
++   lr\t%0,%1
++   l\t%0,%1
++   ly\t%0,%1
++   st\t%1,%0
++   sty\t%1,%0
++   strl\t%1,%0"
++  [(set_attr "op_type" "RRE,RXE,RXY,RX,RXY,VRR,VRS,VRS,VRX,VRX,RRE,VRI,VRI,VRI,VRI,SIL,SIL,RI,RI,
++                        RIL,RR,RX,RXY,RX,RXY,RIL")])
++
++(define_insn "mov<mode>"
++  [(set (match_operand:V_64 0 "nonimmediate_operand"
++         "=f,f,f,R,T,v,v,d, v,QR,  f,  v,  v,  v,  v,  Q,  Q,  d,  d,f,d,d,d, d,RT,b")
++        (match_operand:V_64 1 "general_operand"
++         " f,R,T,f,f,v,d,v,QR, v,j00,j00,jm1,jyy,jxx,j00,jm1,j00,jm1,d,f,b,d,RT, d,d"))]
++  "TARGET_ZARCH"
++  "@
++   ldr\t%0,%1
++   ld\t%0,%1
++   ldy\t%0,%1
++   std\t%1,%0
++   stdy\t%1,%0
++   vlr\t%v0,%v1
++   vlvgg\t%v0,%1,0
++   vlgvg\t%0,%v1,0
++   vleg\t%v0,%1,0
++   vsteg\t%v1,%0,0
++   lzdr\t%0
++   vzero\t%v0
++   vone\t%v0
++   vgbm\t%v0,%t1
++   vgm\t%v0,%s1,%e1
++   mvghi\t%0,0
++   mvghi\t%0,-1
++   lghi\t%0,0
++   lghi\t%0,-1
++   ldgr\t%0,%1
++   lgdr\t%0,%1
++   lgrl\t%0,%1
++   lgr\t%0,%1
++   lg\t%0,%1
++   stg\t%1,%0
++   stgrl\t%1,%0"
++  [(set_attr "op_type" "RRE,RX,RXY,RX,RXY,VRR,VRS,VRS,VRX,VRX,RRE,VRI,VRI,VRI,VRI,
++                        SIL,SIL,RI,RI,RRE,RRE,RIL,RR,RXY,RXY,RIL")])
++
++
++; vec_load_lanes?
++
++; vec_store_lanes?
++
++; FIXME: Support also vector mode operands for 1
++; FIXME: A target memory operand seems to be useful otherwise we end
++; up with vl vlvgg vst.  Shouldn't the middle-end be able to handle
++; that itself?
++(define_insn "*vec_set<mode>"
++  [(set (match_operand:V                    0 "register_operand"             "=v, v,v")
++	(unspec:V [(match_operand:<non_vec> 1 "general_operand"               "d,QR,K")
++		   (match_operand:SI        2 "shift_count_or_setmem_operand" "Y, I,I")
++		   (match_operand:V         3 "register_operand"              "0, 0,0")]
++		  UNSPEC_VEC_SET))]
++  "TARGET_VX"
++  "@
++   vlvg<bhfgq>\t%v0,%1,%Y2
++   vle<bhfgq>\t%v0,%1,%2
++   vlei<bhfgq>\t%v0,%1,%2"
++  [(set_attr "op_type" "VRS,VRX,VRI")])
++
++; vec_set is supposed to *modify* an existing vector so operand 0 is
++; duplicated as input operand.
++(define_expand "vec_set<mode>"
++  [(set (match_operand:V                    0 "register_operand"              "")
++	(unspec:V [(match_operand:<non_vec> 1 "general_operand"               "")
++		   (match_operand:SI        2 "shift_count_or_setmem_operand" "")
++		   (match_dup 0)]
++		   UNSPEC_VEC_SET))]
++  "TARGET_VX")
++
++; FIXME: Support also vector mode operands for 0
++; FIXME: This should be (vec_select ..) or something but it does only allow constant selectors :(
++; This is used via RTL standard name as well as for expanding the builtin
++(define_insn "vec_extract<mode>"
++  [(set (match_operand:<non_vec> 0 "nonimmediate_operand"                        "=d,QR")
++	(unspec:<non_vec> [(match_operand:V  1 "register_operand"                " v, v")
++			   (match_operand:SI 2 "shift_count_or_setmem_operand"   " Y, I")]
++			  UNSPEC_VEC_EXTRACT))]
++  "TARGET_VX"
++  "@
++   vlgv<bhfgq>\t%0,%v1,%Y2
++   vste<bhfgq>\t%v1,%0,%2"
++  [(set_attr "op_type" "VRS,VRX")])
++
++(define_expand "vec_init<V_HW:mode>"
++  [(match_operand:V_HW 0 "register_operand" "")
++   (match_operand:V_HW 1 "nonmemory_operand" "")]
++  "TARGET_VX"
++{
++  s390_expand_vec_init (operands[0], operands[1]);
++  DONE;
++})
++
++; Replicate from vector element
++(define_insn "*vec_splat<mode>"
++  [(set (match_operand:V_HW   0 "register_operand" "=v")
++	(vec_duplicate:V_HW
++	 (vec_select:<non_vec>
++	  (match_operand:V_HW 1 "register_operand"  "v")
++	  (parallel
++	   [(match_operand:QI 2 "const_mask_operand" "C")]))))]
++  "TARGET_VX && UINTVAL (operands[2]) < GET_MODE_NUNITS (<V_HW:MODE>mode)"
++  "vrep<bhfgq>\t%v0,%v1,%2"
++  [(set_attr "op_type" "VRI")])
++
++(define_insn "*vec_splats<mode>"
++  [(set (match_operand:V_HW                          0 "register_operand" "=v,v,v,v")
++	(vec_duplicate:V_HW (match_operand:<non_vec> 1 "general_operand"  "QR,K,v,d")))]
++  "TARGET_VX"
++  "@
++   vlrep<bhfgq>\t%v0,%1
++   vrepi<bhfgq>\t%v0,%h1
++   vrep<bhfgq>\t%v0,%v1,0
++   #"
++  [(set_attr "op_type" "VRX,VRI,VRI,*")])
++
++; vec_splats is supposed to replicate op1 into all elements of op0
++; This splitter first sets the rightmost element of op0 to op1 and
++; then does a vec_splat to replicate that element into all other
++; elements.
++(define_split
++  [(set (match_operand:V_HW                          0 "register_operand" "")
++	(vec_duplicate:V_HW (match_operand:<non_vec> 1 "register_operand" "")))]
++  "TARGET_VX && GENERAL_REG_P (operands[1])"
++  [(set (match_dup 0)
++	(unspec:V_HW [(match_dup 1) (match_dup 2) (match_dup 0)] UNSPEC_VEC_SET))
++   (set (match_dup 0)
++	(vec_duplicate:V_HW
++	 (vec_select:<non_vec>
++	  (match_dup 0) (parallel [(match_dup 2)]))))]
++{
++  operands[2] = GEN_INT (GET_MODE_NUNITS (<MODE>mode) - 1);
++})
++
++(define_expand "vcond<V_HW:mode><V_HW2:mode>"
++  [(set (match_operand:V_HW 0 "register_operand" "")
++	(if_then_else:V_HW
++	 (match_operator 3 "comparison_operator"
++			 [(match_operand:V_HW2 4 "register_operand" "")
++			  (match_operand:V_HW2 5 "register_operand" "")])
++	 (match_operand:V_HW 1 "nonmemory_operand" "")
++	 (match_operand:V_HW 2 "nonmemory_operand" "")))]
++  "TARGET_VX && GET_MODE_NUNITS (<V_HW:MODE>mode) == GET_MODE_NUNITS (<V_HW2:MODE>mode)"
++{
++  s390_expand_vcond (operands[0], operands[1], operands[2],
++		     GET_CODE (operands[3]), operands[4], operands[5]);
++  DONE;
++})
++
++(define_expand "vcondu<V_HW:mode><V_HW2:mode>"
++  [(set (match_operand:V_HW 0 "register_operand" "")
++	(if_then_else:V_HW
++	 (match_operator 3 "comparison_operator"
++			 [(match_operand:V_HW2 4 "register_operand" "")
++			  (match_operand:V_HW2 5 "register_operand" "")])
++	 (match_operand:V_HW 1 "nonmemory_operand" "")
++	 (match_operand:V_HW 2 "nonmemory_operand" "")))]
++  "TARGET_VX && GET_MODE_NUNITS (<V_HW:MODE>mode) == GET_MODE_NUNITS (<V_HW2:MODE>mode)"
++{
++  s390_expand_vcond (operands[0], operands[1], operands[2],
++		     GET_CODE (operands[3]), operands[4], operands[5]);
++  DONE;
++})
++
++; We only have HW support for byte vectors.  The middle-end is
++; supposed to lower the mode if required.
++(define_insn "vec_permv16qi"
++  [(set (match_operand:V16QI 0 "register_operand"               "=v")
++	(unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v")
++		       (match_operand:V16QI 2 "register_operand" "v")
++		       (match_operand:V16QI 3 "register_operand" "v")]
++		      UNSPEC_VEC_PERM))]
++  "TARGET_VX"
++  "vperm\t%v0,%v1,%v2,%v3"
++  [(set_attr "op_type" "VRR")])
++
++; vec_perm_const for V2DI using vpdi?
++
++;;
++;; Vector integer arithmetic instructions
++;;
++
++; vab, vah, vaf, vag, vaq
++
++; We use nonimmediate_operand instead of register_operand since it is
++; better to have the reloads into VRs instead of splitting the
++; operation into two DImode ADDs.
++(define_insn "<ti*>add<mode>3"
++  [(set (match_operand:VIT           0 "nonimmediate_operand" "=v")
++	(plus:VIT (match_operand:VIT 1 "nonimmediate_operand" "%v")
++		  (match_operand:VIT 2 "general_operand"       "v")))]
++  "TARGET_VX"
++  "va<bhfgq>\t%v0,%v1,%v2"
++  [(set_attr "op_type" "VRR")])
++
++; vsb, vsh, vsf, vsg, vsq
++(define_insn "<ti*>sub<mode>3"
++  [(set (match_operand:VIT            0 "nonimmediate_operand" "=v")
++	(minus:VIT (match_operand:VIT 1 "nonimmediate_operand"  "v")
++		   (match_operand:VIT 2 "general_operand"  "v")))]
++  "TARGET_VX"
++  "vs<bhfgq>\t%v0,%v1,%v2"
++  [(set_attr "op_type" "VRR")])
++
++; vmlb, vmlhw, vmlf
++(define_insn "mul<mode>3"
++  [(set (match_operand:VI_QHS              0 "register_operand" "=v")
++	(mult:VI_QHS (match_operand:VI_QHS 1 "register_operand" "%v")
++		     (match_operand:VI_QHS 2 "register_operand"  "v")))]
++  "TARGET_VX"
++  "vml<bhfgq><w>\t%v0,%v1,%v2"
++  [(set_attr "op_type" "VRR")])
++
++; vlcb, vlch, vlcf, vlcg
++(define_insn "neg<mode>2"
++  [(set (match_operand:VI         0 "register_operand" "=v")
++	(neg:VI (match_operand:VI 1 "register_operand"  "v")))]
++  "TARGET_VX"
++  "vlc<bhfgq>\t%v0,%v1"
++  [(set_attr "op_type" "VRR")])
++
++; vlpb, vlph, vlpf, vlpg
++(define_insn "abs<mode>2"
++  [(set (match_operand:VI         0 "register_operand" "=v")
++	(abs:VI (match_operand:VI 1 "register_operand"  "v")))]
++  "TARGET_VX"
++  "vlp<bhfgq>\t%v0,%v1"
++  [(set_attr "op_type" "VRR")])
++
++
++; Vector sum across
++
++; Sum across DImode parts of the 1st operand and add the rightmost
++; element of 2nd operand
++; vsumgh, vsumgf
++(define_insn "*vec_sum2<mode>"
++  [(set (match_operand:V2DI 0 "register_operand" "=v")
++	(unspec:V2DI [(match_operand:VI_HW_HS 1 "register_operand" "v")
++		      (match_operand:VI_HW_HS 2 "register_operand" "v")]
++		     UNSPEC_VEC_VSUMG))]
++  "TARGET_VX"
++  "vsumg<bhfgq>\t%v0,%v1,%v2"
++  [(set_attr "op_type" "VRR")])
++
++; vsumb, vsumh
++(define_insn "*vec_sum4<mode>"
++  [(set (match_operand:V4SI 0 "register_operand" "=v")
++	(unspec:V4SI [(match_operand:VI_HW_QH 1 "register_operand" "v")
++		      (match_operand:VI_HW_QH 2 "register_operand" "v")]
++		     UNSPEC_VEC_VSUM))]
++  "TARGET_VX"
++  "vsum<bhfgq>\t%v0,%v1,%v2"
++  [(set_attr "op_type" "VRR")])
++
++;;
++;; Vector bit instructions (int + fp)
++;;
++
++; Vector and
++
++(define_insn "and<mode>3"
++  [(set (match_operand:VT         0 "register_operand" "=v")
++	(and:VT (match_operand:VT 1 "register_operand" "%v")
++		(match_operand:VT 2 "register_operand"  "v")))]
++  "TARGET_VX"
++  "vn\t%v0,%v1,%v2"
++  [(set_attr "op_type" "VRR")])
++
++
++; Vector or
++
++(define_insn "ior<mode>3"
++  [(set (match_operand:VT         0 "register_operand" "=v")
++	(ior:VT (match_operand:VT 1 "register_operand" "%v")
++		(match_operand:VT 2 "register_operand"  "v")))]
++  "TARGET_VX"
++  "vo\t%v0,%v1,%v2"
++  [(set_attr "op_type" "VRR")])
++
++
++; Vector xor
++
++(define_insn "xor<mode>3"
++  [(set (match_operand:VT         0 "register_operand" "=v")
++	(xor:VT (match_operand:VT 1 "register_operand" "%v")
++		(match_operand:VT 2 "register_operand"  "v")))]
++  "TARGET_VX"
++  "vx\t%v0,%v1,%v2"
++  [(set_attr "op_type" "VRR")])
++
++
++; Bitwise inversion of a vector - used for vec_cmpne
++(define_insn "*not<mode>"
++  [(set (match_operand:VT         0 "register_operand" "=v")
++	(not:VT (match_operand:VT 1 "register_operand"  "v")))]
++  "TARGET_VX"
++  "vnot\t%v0,%v1"
++  [(set_attr "op_type" "VRR")])
++
++; Vector population count
++
++(define_insn "popcountv16qi2"
++  [(set (match_operand:V16QI                0 "register_operand" "=v")
++	(unspec:V16QI [(match_operand:V16QI 1 "register_operand"  "v")]
++		      UNSPEC_POPCNT))]
++  "TARGET_VX"
++  "vpopct\t%v0,%v1,0"
++  [(set_attr "op_type" "VRR")])
++
++; vpopct only counts bits in byte elements.  Bigger element sizes need
++; to be emulated.  Word and doubleword elements can use the sum across
++; instructions.  For halfword sized elements we do a shift of a copy
++; of the result, add it to the result and extend it to halfword
++; element size (unpack).
++
++(define_expand "popcountv8hi2"
++  [(set (match_dup 2)
++	(unspec:V16QI [(subreg:V16QI (match_operand:V8HI 1 "register_operand" "v") 0)]
++		      UNSPEC_POPCNT))
++   ; Make a copy of the result
++   (set (match_dup 3) (match_dup 2))
++   ; Generate the shift count operand in a VR (8->byte 7)
++   (set (match_dup 4) (match_dup 5))
++   (set (match_dup 4) (unspec:V16QI [(const_int 8)
++				     (const_int 7)
++				     (match_dup 4)] UNSPEC_VEC_SET))
++   ; Vector shift right logical by one byte
++   (set (match_dup 3)
++	(unspec:V16QI [(match_dup 3) (match_dup 4)] UNSPEC_VEC_SRLB))
++   ; Add the shifted and the original result
++   (set (match_dup 2)
++	(plus:V16QI (match_dup 2) (match_dup 3)))
++   ; Generate mask for the odd numbered byte elements
++   (set (match_dup 3)
++	(const_vector:V16QI [(const_int 0) (const_int 255)
++			     (const_int 0) (const_int 255)
++			     (const_int 0) (const_int 255)
++			     (const_int 0) (const_int 255)
++			     (const_int 0) (const_int 255)
++			     (const_int 0) (const_int 255)
++			     (const_int 0) (const_int 255)
++			     (const_int 0) (const_int 255)]))
++   ; Zero out the even indexed bytes
++   (set (match_operand:V8HI 0 "register_operand" "=v")
++	(and:V8HI (subreg:V8HI (match_dup 2) 0)
++		  (subreg:V8HI (match_dup 3) 0)))
++]
++  "TARGET_VX"
++{
++  operands[2] = gen_reg_rtx (V16QImode);
++  operands[3] = gen_reg_rtx (V16QImode);
++  operands[4] = gen_reg_rtx (V16QImode);
++  operands[5] = CONST0_RTX (V16QImode);
++})
++
++(define_expand "popcountv4si2"
++  [(set (match_dup 2)
++	(unspec:V16QI [(subreg:V16QI (match_operand:V4SI 1 "register_operand" "v") 0)]
++		      UNSPEC_POPCNT))
++   (set (match_operand:V4SI 0 "register_operand" "=v")
++	(unspec:V4SI [(match_dup 2) (match_dup 3)]
++		     UNSPEC_VEC_VSUM))]
++  "TARGET_VX"
++{
++  operands[2] = gen_reg_rtx (V16QImode);
++  operands[3] = force_reg (V16QImode, CONST0_RTX (V16QImode));
++})
++
++(define_expand "popcountv2di2"
++  [(set (match_dup 2)
++	(unspec:V16QI [(subreg:V16QI (match_operand:V2DI 1 "register_operand" "v") 0)]
++		      UNSPEC_POPCNT))
++   (set (match_dup 3)
++	(unspec:V4SI [(match_dup 2) (match_dup 4)]
++		     UNSPEC_VEC_VSUM))
++   (set (match_operand:V2DI 0 "register_operand" "=v")
++	(unspec:V2DI [(match_dup 3) (match_dup 5)]
++		     UNSPEC_VEC_VSUMG))]
++  "TARGET_VX"
++{
++  operands[2] = gen_reg_rtx (V16QImode);
++  operands[3] = gen_reg_rtx (V4SImode);
++  operands[4] = force_reg (V16QImode, CONST0_RTX (V16QImode));
++  operands[5] = force_reg (V4SImode, CONST0_RTX (V4SImode));
++})
++
++; Count leading zeros
++(define_insn "clz<mode>2"
++  [(set (match_operand:V        0 "register_operand" "=v")
++	(clz:V (match_operand:V 1 "register_operand"  "v")))]
++  "TARGET_VX"
++  "vclz<bhfgq>\t%v0,%v1"
++  [(set_attr "op_type" "VRR")])
++
++; Count trailing zeros
++(define_insn "ctz<mode>2"
++  [(set (match_operand:V        0 "register_operand" "=v")
++	(ctz:V (match_operand:V 1 "register_operand"  "v")))]
++  "TARGET_VX"
++  "vctz<bhfgq>\t%v0,%v1"
++  [(set_attr "op_type" "VRR")])
++
++
++; Vector rotate instructions
++
++; Each vector element rotated by a scalar
++; verllb, verllh, verllf, verllg
++(define_insn "rotl<mode>3"
++  [(set (match_operand:VI            0 "register_operand"             "=v")
++	(rotate:VI (match_operand:VI 1 "register_operand"              "v")
++		   (match_operand:SI 2 "shift_count_or_setmem_operand" "Y")))]
++  "TARGET_VX"
++  "verll<bhfgq>\t%v0,%v1,%Y2"
++  [(set_attr "op_type" "VRS")])
++
++; Each vector element rotated by the corresponding vector element
++; verllvb, verllvh, verllvf, verllvg
++(define_insn "vrotl<mode>3"
++  [(set (match_operand:VI            0 "register_operand" "=v")
++	(rotate:VI (match_operand:VI 1 "register_operand"  "v")
++		   (match_operand:VI 2 "register_operand"  "v")))]
++  "TARGET_VX"
++  "verllv<bhfgq>\t%v0,%v1,%v2"
++  [(set_attr "op_type" "VRR")])
++
++
++; Shift each element by scalar value
++
++; veslb, veslh, veslf, veslg
++(define_insn "ashl<mode>3"
++  [(set (match_operand:VI            0 "register_operand"             "=v")
++	(ashift:VI (match_operand:VI 1 "register_operand"              "v")
++		   (match_operand:SI 2 "shift_count_or_setmem_operand" "Y")))]
++  "TARGET_VX"
++  "vesl<bhfgq>\t%v0,%v1,%Y2"
++  [(set_attr "op_type" "VRS")])
++
++; vesrab, vesrah, vesraf, vesrag
++(define_insn "ashr<mode>3"
++  [(set (match_operand:VI              0 "register_operand"             "=v")
++	(ashiftrt:VI (match_operand:VI 1 "register_operand"              "v")
++		     (match_operand:SI 2 "shift_count_or_setmem_operand" "Y")))]
++  "TARGET_VX"
++  "vesra<bhfgq>\t%v0,%v1,%Y2"
++  [(set_attr "op_type" "VRS")])
++
++; vesrlb, vesrlh, vesrlf, vesrlg
++(define_insn "lshr<mode>3"
++  [(set (match_operand:VI              0 "register_operand"             "=v")
++	(lshiftrt:VI (match_operand:VI 1 "register_operand"              "v")
++		     (match_operand:SI 2 "shift_count_or_setmem_operand" "Y")))]
++  "TARGET_VX"
++  "vesrl<bhfgq>\t%v0,%v1,%Y2"
++  [(set_attr "op_type" "VRS")])
++
++
++; Shift each element by corresponding vector element
++
++; veslvb, veslvh, veslvf, veslvg
++(define_insn "vashl<mode>3"
++  [(set (match_operand:VI            0 "register_operand" "=v")
++	(ashift:VI (match_operand:VI 1 "register_operand"  "v")
++		   (match_operand:VI 2 "register_operand"  "v")))]
++  "TARGET_VX"
++  "veslv<bhfgq>\t%v0,%v1,%v2"
++  [(set_attr "op_type" "VRR")])
++
++; vesravb, vesravh, vesravf, vesravg
++(define_insn "vashr<mode>3"
++  [(set (match_operand:VI              0 "register_operand" "=v")
++	(ashiftrt:VI (match_operand:VI 1 "register_operand"  "v")
++		     (match_operand:VI 2 "register_operand"  "v")))]
++  "TARGET_VX"
++  "vesrav<bhfgq>\t%v0,%v1,%v2"
++  [(set_attr "op_type" "VRR")])
++
++; vesrlvb, vesrlvh, vesrlvf, vesrlvg
++(define_insn "vlshr<mode>3"
++  [(set (match_operand:VI              0 "register_operand" "=v")
++	(lshiftrt:VI (match_operand:VI 1 "register_operand"  "v")
++		     (match_operand:VI 2 "register_operand"  "v")))]
++  "TARGET_VX"
++  "vesrlv<bhfgq>\t%v0,%v1,%v2"
++  [(set_attr "op_type" "VRR")])
++
++; Vector shift right logical by byte
++
++; Pattern used by e.g. popcount
++(define_insn "*vec_srb<mode>"
++  [(set (match_operand:V_HW 0 "register_operand"                    "=v")
++	(unspec:V_HW [(match_operand:V_HW 1 "register_operand"       "v")
++		      (match_operand:<tointvec> 2 "register_operand" "v")]
++		     UNSPEC_VEC_SRLB))]
++  "TARGET_VX"
++  "vsrlb\t%v0,%v1,%v2"
++  [(set_attr "op_type" "VRR")])
++
++
++; vmnb, vmnh, vmnf, vmng
++(define_insn "smin<mode>3"
++  [(set (match_operand:VI          0 "register_operand" "=v")
++	(smin:VI (match_operand:VI 1 "register_operand" "%v")
++		 (match_operand:VI 2 "register_operand"  "v")))]
++  "TARGET_VX"
++  "vmn<bhfgq>\t%v0,%v1,%v2"
++  [(set_attr "op_type" "VRR")])
++
++; vmxb, vmxh, vmxf, vmxg
++(define_insn "smax<mode>3"
++  [(set (match_operand:VI          0 "register_operand" "=v")
++	(smax:VI (match_operand:VI 1 "register_operand" "%v")
++		 (match_operand:VI 2 "register_operand"  "v")))]
++  "TARGET_VX"
++  "vmx<bhfgq>\t%v0,%v1,%v2"
++  [(set_attr "op_type" "VRR")])
++
++; vmnlb, vmnlh, vmnlf, vmnlg
++(define_insn "umin<mode>3"
++  [(set (match_operand:VI          0 "register_operand" "=v")
++	(umin:VI (match_operand:VI 1 "register_operand" "%v")
++		 (match_operand:VI 2 "register_operand"  "v")))]
++  "TARGET_VX"
++  "vmnl<bhfgq>\t%v0,%v1,%v2"
++  [(set_attr "op_type" "VRR")])
++
++; vmxlb, vmxlh, vmxlf, vmxlg
++(define_insn "umax<mode>3"
++  [(set (match_operand:VI          0 "register_operand" "=v")
++	(umax:VI (match_operand:VI 1 "register_operand" "%v")
++		 (match_operand:VI 2 "register_operand"  "v")))]
++  "TARGET_VX"
++  "vmxl<bhfgq>\t%v0,%v1,%v2"
++  [(set_attr "op_type" "VRR")])
++
++; vmeb, vmeh, vmef
++(define_insn "vec_widen_smult_even_<mode>"
++  [(set (match_operand:<vec_double>                 0 "register_operand" "=v")
++	(unspec:<vec_double> [(match_operand:VI_QHS 1 "register_operand" "%v")
++			      (match_operand:VI_QHS 2 "register_operand"  "v")]
++			     UNSPEC_VEC_SMULT_EVEN))]
++  "TARGET_VX"
++  "vme<bhfgq>\t%v0,%v1,%v2"
++  [(set_attr "op_type" "VRR")])
++
++; vmleb, vmleh, vmlef
++(define_insn "vec_widen_umult_even_<mode>"
++  [(set (match_operand:<vec_double>                 0 "register_operand" "=v")
++	(unspec:<vec_double> [(match_operand:VI_QHS 1 "register_operand" "%v")
++			      (match_operand:VI_QHS 2 "register_operand"  "v")]
++			     UNSPEC_VEC_UMULT_EVEN))]
++  "TARGET_VX"
++  "vmle<bhfgq>\t%v0,%v1,%v2"
++  [(set_attr "op_type" "VRR")])
++
++; vmob, vmoh, vmof
++(define_insn "vec_widen_smult_odd_<mode>"
++  [(set (match_operand:<vec_double>                 0 "register_operand" "=v")
++	(unspec:<vec_double> [(match_operand:VI_QHS 1 "register_operand" "%v")
++			      (match_operand:VI_QHS 2 "register_operand"  "v")]
++			     UNSPEC_VEC_SMULT_ODD))]
++  "TARGET_VX"
++  "vmo<bhfgq>\t%v0,%v1,%v2"
++  [(set_attr "op_type" "VRR")])
++
++; vmlob, vmloh, vmlof
++(define_insn "vec_widen_umult_odd_<mode>"
++  [(set (match_operand:<vec_double>                 0 "register_operand" "=v")
++	(unspec:<vec_double> [(match_operand:VI_QHS 1 "register_operand" "%v")
++			      (match_operand:VI_QHS 2 "register_operand"  "v")]
++			     UNSPEC_VEC_UMULT_ODD))]
++  "TARGET_VX"
++  "vmlo<bhfgq>\t%v0,%v1,%v2"
++  [(set_attr "op_type" "VRR")])
++
++; vec_widen_umult_hi
++; vec_widen_umult_lo
++; vec_widen_smult_hi
++; vec_widen_smult_lo
++
++; vec_widen_ushiftl_hi
++; vec_widen_ushiftl_lo
++; vec_widen_sshiftl_hi
++; vec_widen_sshiftl_lo
++
++;;
++;; Vector floating point arithmetic instructions
++;;
++
++(define_insn "addv2df3"
++  [(set (match_operand:V2DF            0 "register_operand" "=v")
++	(plus:V2DF (match_operand:V2DF 1 "register_operand" "%v")
++		   (match_operand:V2DF 2 "register_operand"  "v")))]
++  "TARGET_VX"
++  "vfadb\t%v0,%v1,%v2"
++  [(set_attr "op_type" "VRR")])
++
++(define_insn "subv2df3"
++  [(set (match_operand:V2DF             0 "register_operand" "=v")
++	(minus:V2DF (match_operand:V2DF 1 "register_operand" "%v")
++		    (match_operand:V2DF 2 "register_operand"  "v")))]
++  "TARGET_VX"
++  "vfsdb\t%v0,%v1,%v2"
++  [(set_attr "op_type" "VRR")])
++
++(define_insn "mulv2df3"
++  [(set (match_operand:V2DF            0 "register_operand" "=v")
++	(mult:V2DF (match_operand:V2DF 1 "register_operand" "%v")
++		   (match_operand:V2DF 2 "register_operand"  "v")))]
++  "TARGET_VX"
++  "vfmdb\t%v0,%v1,%v2"
++  [(set_attr "op_type" "VRR")])
++
++(define_insn "divv2df3"
++  [(set (match_operand:V2DF           0 "register_operand" "=v")
++	(div:V2DF (match_operand:V2DF 1 "register_operand"  "v")
++		  (match_operand:V2DF 2 "register_operand"  "v")))]
++  "TARGET_VX"
++  "vfddb\t%v0,%v1,%v2"
++  [(set_attr "op_type" "VRR")])
++
++(define_insn "sqrtv2df2"
++  [(set (match_operand:V2DF            0 "register_operand" "=v")
++	(sqrt:V2DF (match_operand:V2DF 1 "register_operand"  "v")))]
++  "TARGET_VX"
++  "vfsqdb\t%v0,%v1"
++  [(set_attr "op_type" "VRR")])
++
++(define_insn "fmav2df4"
++  [(set (match_operand:V2DF           0 "register_operand" "=v")
++	(fma:V2DF (match_operand:V2DF 1 "register_operand" "%v")
++		  (match_operand:V2DF 2 "register_operand"  "v")
++		  (match_operand:V2DF 3 "register_operand"  "v")))]
++  "TARGET_VX"
++  "vfmadb\t%v0,%v1,%v2,%v3"
++  [(set_attr "op_type" "VRR")])
++
++(define_insn "fmsv2df4"
++  [(set (match_operand:V2DF                     0 "register_operand" "=v")
++	(fma:V2DF (match_operand:V2DF           1 "register_operand" "%v")
++		  (match_operand:V2DF           2 "register_operand"  "v")
++		  (neg:V2DF (match_operand:V2DF 3 "register_operand"  "v"))))]
++  "TARGET_VX"
++  "vfmsdb\t%v0,%v1,%v2,%v3"
++  [(set_attr "op_type" "VRR")])
++
++(define_insn "negv2df2"
++  [(set (match_operand:V2DF           0 "register_operand" "=v")
++	(neg:V2DF (match_operand:V2DF 1 "register_operand"  "v")))]
++  "TARGET_VX"
++  "vflcdb\t%v0,%v1"
++  [(set_attr "op_type" "VRR")])
++
++(define_insn "absv2df2"
++  [(set (match_operand:V2DF           0 "register_operand" "=v")
++	(abs:V2DF (match_operand:V2DF 1 "register_operand"  "v")))]
++  "TARGET_VX"
++  "vflpdb\t%v0,%v1"
++  [(set_attr "op_type" "VRR")])
++
++(define_insn "*negabsv2df2"
++  [(set (match_operand:V2DF                     0 "register_operand" "=v")
++	(neg:V2DF (abs:V2DF (match_operand:V2DF 1 "register_operand"  "v"))))]
++  "TARGET_VX"
++  "vflndb\t%v0,%v1"
++  [(set_attr "op_type" "VRR")])
++
++; Emulate with compare + select
++(define_insn_and_split "smaxv2df3"
++  [(set (match_operand:V2DF            0 "register_operand" "=v")
++	(smax:V2DF (match_operand:V2DF 1 "register_operand" "%v")
++		   (match_operand:V2DF 2 "register_operand"  "v")))]
++  "TARGET_VX"
++  "#"
++  ""
++  [(set (match_dup 3)
++	(gt:V2DI (match_dup 1) (match_dup 2)))
++   (set (match_dup 0)
++	(if_then_else:V2DF
++	 (eq (match_dup 3) (match_dup 4))
++	 (match_dup 2)
++	 (match_dup 1)))]
++{
++  operands[3] = gen_reg_rtx (V2DImode);
++  operands[4] = CONST0_RTX (V2DImode);
++})
++
++; Emulate with compare + select
++(define_insn_and_split "sminv2df3"
++  [(set (match_operand:V2DF            0 "register_operand" "=v")
++	(smin:V2DF (match_operand:V2DF 1 "register_operand" "%v")
++		   (match_operand:V2DF 2 "register_operand"  "v")))]
++  "TARGET_VX"
++  "#"
++  ""
++  [(set (match_dup 3)
++	(gt:V2DI (match_dup 1) (match_dup 2)))
++   (set (match_dup 0)
++	(if_then_else:V2DF
++	 (eq (match_dup 3) (match_dup 4))
++	 (match_dup 1)
++	 (match_dup 2)))]
++{
++  operands[3] = gen_reg_rtx (V2DImode);
++  operands[4] = CONST0_RTX (V2DImode);
++})
++
++
++;;
++;; Integer compares
++;;
++
++(define_insn "*vec_cmp<VICMP_HW_OP:code><VI:mode>_nocc"
++  [(set (match_operand:VI                 2 "register_operand" "=v")
++	(VICMP_HW_OP:VI (match_operand:VI 0 "register_operand"  "v")
++			(match_operand:VI 1 "register_operand"  "v")))]
++  "TARGET_VX"
++  "vc<VICMP_HW_OP:insn_cmp_op><VI:bhfgq>\t%v2,%v0,%v1"
++  [(set_attr "op_type" "VRR")])
++
++
++;;
++;; Floating point compares
++;;
++
++; EQ, GT, GE
++(define_insn "*vec_cmp<VFCMP_HW_OP:code>v2df_nocc"
++  [(set (match_operand:V2DI                   0 "register_operand" "=v")
++	(VFCMP_HW_OP:V2DI (match_operand:V2DF 1 "register_operand"  "v")
++			  (match_operand:V2DF 2 "register_operand"  "v")))]
++   "TARGET_VX"
++   "vfc<VFCMP_HW_OP:asm_fcmp_op>db\t%v0,%v1,%v2"
++  [(set_attr "op_type" "VRR")])
++
++; Expanders for not directly supported comparisons
++
++; UNEQ a u== b -> !(a > b | b > a)
++(define_expand "vec_cmpuneqv2df"
++  [(set (match_operand:V2DI          0 "register_operand" "=v")
++	(gt:V2DI (match_operand:V2DF 1 "register_operand"  "v")
++		 (match_operand:V2DF 2 "register_operand"  "v")))
++   (set (match_dup 3)
++	(gt:V2DI (match_dup 2) (match_dup 1)))
++   (set (match_dup 0) (ior:V2DI (match_dup 0) (match_dup 3)))
++   (set (match_dup 0) (not:V2DI (match_dup 0)))]
++  "TARGET_VX"
++{
++  operands[3] = gen_reg_rtx (V2DImode);
++})
++
++; LTGT a <> b -> a > b | b > a
++(define_expand "vec_cmpltgtv2df"
++  [(set (match_operand:V2DI          0 "register_operand" "=v")
++	(gt:V2DI (match_operand:V2DF 1 "register_operand"  "v")
++		 (match_operand:V2DF 2 "register_operand"  "v")))
++   (set (match_dup 3) (gt:V2DI (match_dup 2) (match_dup 1)))
++   (set (match_dup 0) (ior:V2DI (match_dup 0) (match_dup 3)))]
++  "TARGET_VX"
++{
++  operands[3] = gen_reg_rtx (V2DImode);
++})
++
++; ORDERED (a, b): a >= b | b > a
++(define_expand "vec_orderedv2df"
++  [(set (match_operand:V2DI          0 "register_operand" "=v")
++	(ge:V2DI (match_operand:V2DF 1 "register_operand"  "v")
++		 (match_operand:V2DF 2 "register_operand"  "v")))
++   (set (match_dup 3) (gt:V2DI (match_dup 2) (match_dup 1)))
++   (set (match_dup 0) (ior:V2DI (match_dup 0) (match_dup 3)))]
++  "TARGET_VX"
++{
++  operands[3] = gen_reg_rtx (V2DImode);
++})
++
++; UNORDERED (a, b): !ORDERED (a, b)
++(define_expand "vec_unorderedv2df"
++  [(set (match_operand:V2DI          0 "register_operand" "=v")
++	(ge:V2DI (match_operand:V2DF 1 "register_operand"  "v")
++		 (match_operand:V2DF 2 "register_operand"  "v")))
++   (set (match_dup 3) (gt:V2DI (match_dup 2) (match_dup 1)))
++   (set (match_dup 0) (ior:V2DI (match_dup 0) (match_dup 3)))
++   (set (match_dup 0) (not:V2DI (match_dup 0)))]
++  "TARGET_VX"
++{
++  operands[3] = gen_reg_rtx (V2DImode);
++})
++
++(define_insn "*vec_load_pairv2di"
++  [(set (match_operand:V2DI                0 "register_operand" "=v")
++	(vec_concat:V2DI (match_operand:DI 1 "register_operand"  "d")
++			 (match_operand:DI 2 "register_operand"  "d")))]
++  "TARGET_VX"
++  "vlvgp\t%v0,%1,%2"
++  [(set_attr "op_type" "VRR")])
++
++(define_insn "vllv16qi"
++  [(set (match_operand:V16QI              0 "register_operand" "=v")
++	(unspec:V16QI [(match_operand:SI  1 "register_operand"  "d")
++		       (match_operand:BLK 2 "memory_operand"    "Q")]
++		      UNSPEC_VEC_LOAD_LEN))]
++  "TARGET_VX"
++  "vll\t%v0,%1,%2"
++  [(set_attr "op_type" "VRS")])
++
++; vfenebs, vfenehs, vfenefs
++; vfenezbs, vfenezhs, vfenezfs
++(define_insn "vec_vfenes<mode>"
++  [(set (match_operand:VI_HW_QHS 0 "register_operand" "=v")
++	(unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "v")
++			   (match_operand:VI_HW_QHS 2 "register_operand" "v")
++			   (match_operand:QI 3 "const_mask_operand" "C")]
++			  UNSPEC_VEC_VFENE))
++   (set (reg:CCRAW CC_REGNUM)
++	(unspec:CCRAW [(match_dup 1)
++		       (match_dup 2)
++		       (match_dup 3)]
++		      UNSPEC_VEC_VFENECC))]
++  "TARGET_VX"
++{
++  unsigned HOST_WIDE_INT flags = INTVAL (operands[3]);
++
++  gcc_assert (!(flags & ~(VSTRING_FLAG_ZS | VSTRING_FLAG_CS)));
++  flags &= ~VSTRING_FLAG_CS;
++
++  if (flags == VSTRING_FLAG_ZS)
++    return "vfenez<bhfgq>s\t%v0,%v1,%v2";
++  return "vfene<bhfgq>s\t%v0,%v1,%v2";
++}
++  [(set_attr "op_type" "VRR")])
++
++
++; Vector select
++
++; The following splitters simplify vec_sel for constant 0 or -1
++; selection sources.  This is required to generate efficient code for
++; vcond.
++
++; a = b == c;
++(define_split
++  [(set (match_operand:V 0 "register_operand" "")
++	(if_then_else:V
++	 (eq (match_operand:<tointvec> 3 "register_operand" "")
++	     (match_operand:V 4 "const0_operand" ""))
++	 (match_operand:V 1 "const0_operand" "")
++	 (match_operand:V 2 "all_ones_operand" "")))]
++  "TARGET_VX"
++  [(set (match_dup 0) (match_dup 3))]
++{
++  PUT_MODE (operands[3], <V:MODE>mode);
++})
++
++; a = ~(b == c)
++(define_split
++  [(set (match_operand:V 0 "register_operand" "")
++	(if_then_else:V
++	 (eq (match_operand:<tointvec> 3 "register_operand" "")
++	     (match_operand:V 4 "const0_operand" ""))
++	 (match_operand:V 1 "all_ones_operand" "")
++	 (match_operand:V 2 "const0_operand" "")))]
++  "TARGET_VX"
++  [(set (match_dup 0) (not:V (match_dup 3)))]
++{
++  PUT_MODE (operands[3], <V:MODE>mode);
++})
++
++; a = b != c
++(define_split
++  [(set (match_operand:V 0 "register_operand" "")
++	(if_then_else:V
++	 (ne (match_operand:<tointvec> 3 "register_operand" "")
++	     (match_operand:V 4 "const0_operand" ""))
++	 (match_operand:V 1 "all_ones_operand" "")
++	 (match_operand:V 2 "const0_operand" "")))]
++  "TARGET_VX"
++  [(set (match_dup 0) (match_dup 3))]
++{
++  PUT_MODE (operands[3], <V:MODE>mode);
++})
++
++; a = ~(b != c)
++(define_split
++  [(set (match_operand:V 0 "register_operand" "")
++	(if_then_else:V
++	 (ne (match_operand:<tointvec> 3 "register_operand" "")
++	     (match_operand:V 4 "const0_operand" ""))
++	 (match_operand:V 1 "const0_operand" "")
++	 (match_operand:V 2 "all_ones_operand" "")))]
++  "TARGET_VX"
++  [(set (match_dup 0) (not:V (match_dup 3)))]
++{
++  PUT_MODE (operands[3], <V:MODE>mode);
++})
++
++; op0 = op3 == 0 ? op1 : op2
++(define_insn "*vec_sel0<mode>"
++  [(set (match_operand:V 0 "register_operand" "=v")
++	(if_then_else:V
++	 (eq (match_operand:<tointvec> 3 "register_operand" "v")
++	     (match_operand:<tointvec> 4 "const0_operand" ""))
++	 (match_operand:V 1 "register_operand" "v")
++	 (match_operand:V 2 "register_operand" "v")))]
++  "TARGET_VX"
++  "vsel\t%v0,%2,%1,%3"
++  [(set_attr "op_type" "VRR")])
++
++; op0 = !op3 == 0 ? op1 : op2
++(define_insn "*vec_sel0<mode>"
++  [(set (match_operand:V 0 "register_operand" "=v")
++	(if_then_else:V
++	 (eq (not:<tointvec> (match_operand:<tointvec> 3 "register_operand" "v"))
++	     (match_operand:<tointvec> 4 "const0_operand" ""))
++	 (match_operand:V 1 "register_operand" "v")
++	 (match_operand:V 2 "register_operand" "v")))]
++  "TARGET_VX"
++  "vsel\t%v0,%1,%2,%3"
++  [(set_attr "op_type" "VRR")])
++
++; op0 = op3 == -1 ? op1 : op2
++(define_insn "*vec_sel1<mode>"
++  [(set (match_operand:V 0 "register_operand" "=v")
++	(if_then_else:V
++	 (eq (match_operand:<tointvec> 3 "register_operand" "v")
++	     (match_operand:<tointvec> 4 "all_ones_operand" ""))
++	 (match_operand:V 1 "register_operand" "v")
++	 (match_operand:V 2 "register_operand" "v")))]
++  "TARGET_VX"
++  "vsel\t%v0,%1,%2,%3"
++  [(set_attr "op_type" "VRR")])
++
++; op0 = !op3 == -1 ? op1 : op2
++(define_insn "*vec_sel1<mode>"
++  [(set (match_operand:V 0 "register_operand" "=v")
++	(if_then_else:V
++	 (eq (not:<tointvec> (match_operand:<tointvec> 3 "register_operand" "v"))
++	     (match_operand:<tointvec> 4 "all_ones_operand" ""))
++	 (match_operand:V 1 "register_operand" "v")
++	 (match_operand:V 2 "register_operand" "v")))]
++  "TARGET_VX"
++  "vsel\t%v0,%2,%1,%3"
++  [(set_attr "op_type" "VRR")])
++
++
++
++; reduc_smin
++; reduc_smax
++; reduc_umin
++; reduc_umax
++
++; vec_shl vrep + vsl
++; vec_shr
++
++; vec_pack_trunc
++; vec_pack_ssat
++; vec_pack_usat
++; vec_pack_sfix_trunc
++; vec_pack_ufix_trunc
++; vec_unpacks_hi
++; vec_unpacks_low
++; vec_unpacku_hi
++; vec_unpacku_low
++; vec_unpacks_float_hi
++; vec_unpacks_float_lo
++; vec_unpacku_float_hi
++; vec_unpacku_float_lo
+--- gcc/config/s390/vx-builtins.md	1970-01-01 01:00:00.000000000 +0100
++++ gcc/config/s390/vx-builtins.md	2016-05-11 19:46:05.504890170 +0200
+@@ -0,0 +1,2081 @@
++;;- Instruction patterns for the System z vector facility builtins.
++;;  Copyright (C) 2015 Free Software Foundation, Inc.
++;;  Contributed by Andreas Krebbel (Andreas.Krebbel@de.ibm.com)
++
++;; This file is part of GCC.
++
++;; GCC is free software; you can redistribute it and/or modify it under
++;; the terms of the GNU General Public License as published by the Free
++;; Software Foundation; either version 3, or (at your option) any later
++;; version.
++
++;; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
++;; WARRANTY; without even the implied warranty of MERCHANTABILITY or
++;; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++;; for more details.
++
++;; You should have received a copy of the GNU General Public License
++;; along with GCC; see the file COPYING3.  If not see
++;; <http://www.gnu.org/licenses/>.
++
++; The patterns in this file are enabled with -mzvector
++
++(define_mode_iterator V_HW_64 [V2DI V2DF])
++(define_mode_iterator V_HW_32_64 [V4SI V2DI V2DF])
++(define_mode_iterator VI_HW_SD [V4SI V2DI])
++(define_mode_iterator V_HW_HSD [V8HI V4SI V2DI V2DF])
++(define_mode_iterator VI_HW_HSD [V8HI V4SI V2DI])
++
++; The element type of the vector with floating point modes translated
++; to int modes of the same size.
++(define_mode_attr non_vec_int[(V1QI "QI") (V2QI "QI") (V4QI "QI") (V8QI "QI") (V16QI "QI")
++			      (V1HI "HI") (V2HI "HI") (V4HI "HI") (V8HI "HI")
++			      (V1SI "SI") (V2SI "SI") (V4SI "SI")
++			      (V1DI "DI") (V2DI "DI")
++			      (V1SF "SI") (V2SF "SI") (V4SF "SI")
++			      (V1DF "DI") (V2DF "DI")])
++
++; Condition code modes generated by int comparisons
++(define_mode_iterator VICMP [CCVEQ CCVH CCVHU])
++
++; Comparisons supported by the vec_cmp* builtins
++(define_code_iterator intcmp [eq gt gtu ge geu lt ltu le leu])
++(define_code_iterator fpcmp  [eq gt ge lt le])
++
++; Comparisons supported by the vec_all/any* builtins
++(define_code_iterator intcmpcc [eq ne gt ge lt le gtu geu ltu leu])
++(define_code_iterator fpcmpcc  [eq ne gt ge unle unlt lt le])
++
++; Flags for vector string instructions (vfae all 4, vfee only ZS and CS, vstrc all 4)
++(define_constants
++  [(VSTRING_FLAG_IN         8)   ; invert result
++   (VSTRING_FLAG_RT         4)   ; result type
++   (VSTRING_FLAG_ZS         2)   ; zero search
++   (VSTRING_FLAG_CS         1)]) ; condition code set
++
++; Rounding modes as being used for e.g. VFI
++(define_constants
++  [(VEC_RND_CURRENT                0)
++   (VEC_RND_NEAREST_AWAY_FROM_ZERO 1)
++   (VEC_RND_SHORT_PREC             3)
++   (VEC_RND_NEAREST_TO_EVEN        4)
++   (VEC_RND_TO_ZERO                5)
++   (VEC_RND_TO_INF                 6)
++   (VEC_RND_TO_MINF                7)])
++
++
++; Vector gather element
++
++(define_insn "vec_gather_element<mode>"
++  [(set (match_operand:V_HW_32_64                     0 "register_operand"  "=v")
++	(unspec:V_HW_32_64 [(match_operand:V_HW_32_64 1 "register_operand"   "0")
++			    (match_operand:<tointvec> 2 "register_operand"   "v")
++			    (match_operand:BLK        3 "memory_operand"    "QR")
++			    (match_operand:QI         4 "const_mask_operand" "C")]
++			   UNSPEC_VEC_GATHER))]
++  "TARGET_VX && UINTVAL (operands[4]) < GET_MODE_NUNITS (<V_HW_32_64:MODE>mode)"
++  "vge<bhfgq>\t%0,%O3(%v2,%R3),%b4"
++  [(set_attr "op_type" "VRV")])
++
++(define_expand "vec_genmask<mode>"
++  [(match_operand:VI_HW 0 "register_operand" "=v")
++   (match_operand:QI    1 "const_int_operand" "C")
++   (match_operand:QI    2 "const_int_operand" "C")]
++  "TARGET_VX"
++{
++  int nunits = GET_MODE_NUNITS (<VI_HW:MODE>mode);
++  int bitlen = GET_MODE_UNIT_BITSIZE (<VI_HW:MODE>mode);
++  /* To bit little endian style.  */
++  int end = bitlen - 1 - INTVAL (operands[1]);
++  int start = bitlen - 1 - INTVAL (operands[2]);
++  rtx const_vec[16];
++  int i;
++  unsigned HOST_WIDE_INT mask;
++  bool swapped_p = false;
++
++  if (start > end)
++    {
++      i = start - 1; start = end + 1; end = i;
++      swapped_p = true;
++    }
++  if (end == 63)
++    mask = (unsigned HOST_WIDE_INT) -1;
++  else
++    mask = ((unsigned HOST_WIDE_INT) 1 << (end + 1)) - 1;
++
++  mask &= ~(((unsigned HOST_WIDE_INT) 1 << start) - 1);
++
++  if (swapped_p)
++    mask = ~mask;
++
++  for (i = 0; i < nunits; i++)
++    const_vec[i] = GEN_INT (trunc_int_for_mode (mask,
++			      GET_MODE_INNER (<VI_HW:MODE>mode)));
++
++  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
++			  gen_rtx_CONST_VECTOR (<VI_HW:MODE>mode,
++						gen_rtvec_v (nunits, const_vec))));
++  DONE;
++})
++
++(define_expand "vec_genbytemaskv16qi"
++  [(match_operand:V16QI 0 "register_operand"  "")
++   (match_operand:HI    1 "const_int_operand" "")]
++  "TARGET_VX"
++{
++  int i;
++  unsigned mask = 0x8000;
++  rtx const_vec[16];
++  unsigned HOST_WIDE_INT byte_mask = INTVAL (operands[1]);
++
++  for (i = 0; i < 16; i++)
++    {
++      if (mask & byte_mask)
++	const_vec[i] = constm1_rtx;
++      else
++	const_vec[i] = const0_rtx;
++      mask = mask >> 1;
++    }
++  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
++			  gen_rtx_CONST_VECTOR (V16QImode,
++						gen_rtvec_v (16, const_vec))));
++  DONE;
++})
++
++(define_expand "vec_splats<mode>"
++  [(set (match_operand:V_HW                          0 "register_operand" "")
++	(vec_duplicate:V_HW (match_operand:<non_vec> 1 "general_operand"  "")))]
++  "TARGET_VX")
++
++(define_expand "vec_insert<mode>"
++  [(set (match_operand:V_HW                    0 "register_operand" "")
++	(unspec:V_HW [(match_operand:<non_vec> 2 "register_operand" "")
++		      (match_operand:SI        3 "shift_count_or_setmem_operand" "")
++		      (match_operand:V_HW      1 "register_operand" "")]
++		     UNSPEC_VEC_SET))]
++  "TARGET_VX"
++  "")
++
++; This is vec_set + modulo arithmetic on the element selector (op 2)
++(define_expand "vec_promote<mode>"
++  [(set (match_operand:V_HW                    0 "register_operand" "")
++	(unspec:V_HW [(match_operand:<non_vec> 1 "register_operand" "")
++		      (match_operand:SI        2 "shift_count_or_setmem_operand" "")
++		      (match_dup 0)]
++		     UNSPEC_VEC_SET))]
++  "TARGET_VX"
++  "")
++
++; vec_extract is also an RTL standard name -> vector.md
++
++(define_insn "vec_insert_and_zero<mode>"
++  [(set (match_operand:V_HW                    0 "register_operand" "=v")
++	(unspec:V_HW [(match_operand:<non_vec> 1 "memory_operand"   "QR")]
++		     UNSPEC_VEC_INSERT_AND_ZERO))]
++  "TARGET_VX"
++  "vllez<bhfgq>\t%v0,%1"
++  [(set_attr "op_type" "VRX")])
++
++(define_insn "vlbb"
++  [(set (match_operand:V16QI              0 "register_operand"   "=v")
++	(unspec:V16QI [(match_operand:BLK 1 "memory_operand"     "QR")
++		       (match_operand:QI  2 "const_mask_operand"  "C")]
++		      UNSPEC_VEC_LOAD_BNDRY))]
++  "TARGET_VX && UINTVAL (operands[2]) < 7"
++  "vlbb\t%v0,%1,%2"
++  [(set_attr "op_type" "VRX")])
++
++; FIXME: The following two patterns might using vec_merge. But what is
++; the canonical form: (vec_select (vec_merge op0 op1)) or (vec_merge
++; (vec_select op0) (vec_select op1)
++(define_insn "vec_mergeh<mode>"
++  [(set (match_operand:V_HW               0 "register_operand" "=v")
++	(unspec:V_HW [(match_operand:V_HW 1 "register_operand"  "v")
++		      (match_operand:V_HW 2 "register_operand"  "v")]
++		     UNSPEC_VEC_MERGEH))]
++  "TARGET_VX"
++  "vmrh<bhfgq>\t%v0,%1,%2"
++  [(set_attr "op_type" "VRR")])
++
++(define_insn "vec_mergel<mode>"
++  [(set (match_operand:V_HW               0 "register_operand" "=v")
++	(unspec:V_HW [(match_operand:V_HW 1 "register_operand"  "v")
++		      (match_operand:V_HW 2 "register_operand"  "v")]
++		     UNSPEC_VEC_MERGEL))]
++  "TARGET_VX"
++  "vmrl<bhfgq>\t%v0,%1,%2"
++  [(set_attr "op_type" "VRR")])
++
++
++; Vector pack
++
++(define_insn "vec_pack<mode>"
++  [(set (match_operand:<vec_half>                    0 "register_operand" "=v")
++	(unspec:<vec_half> [(match_operand:VI_HW_HSD 1 "register_operand"  "v")
++			    (match_operand:VI_HW_HSD 2 "register_operand"  "v")]
++			   UNSPEC_VEC_PACK))]
++  "TARGET_VX"
++  "vpk<bhfgq>\t%v0,%v1,%v2"
++  [(set_attr "op_type" "VRR")])
++
++
++; Vector pack saturate
++
++(define_insn "vec_packs<mode>"
++  [(set (match_operand:<vec_half>                    0 "register_operand" "=v")
++	(unspec:<vec_half> [(match_operand:VI_HW_HSD 1 "register_operand"  "v")
++			    (match_operand:VI_HW_HSD 2 "register_operand"  "v")]
++			   UNSPEC_VEC_PACK_SATURATE))]
++  "TARGET_VX"
++  "vpks<bhfgq>\t%v0,%v1,%v2"
++  [(set_attr "op_type" "VRR")])
++
++
++; This is vec_packs_cc + loading cc into a caller specified memory location.
++(define_expand "vec_packs_cc<mode>"
++  [(parallel
++    [(set (reg:CCRAW CC_REGNUM)
++	  (unspec:CCRAW [(match_operand:VI_HW_HSD 1 "register_operand" "")
++			 (match_operand:VI_HW_HSD 2 "register_operand" "")]
++			UNSPEC_VEC_PACK_SATURATE_GENCC))
++     (set (match_operand:<vec_half> 0 "register_operand" "")
++	  (unspec:<vec_half> [(match_dup 1) (match_dup 2)]
++			     UNSPEC_VEC_PACK_SATURATE_CC))])
++   (set (match_dup 4)
++	(unspec:SI [(reg:CCRAW CC_REGNUM)] UNSPEC_CC_TO_INT))
++   (set (match_operand:SI 3 "memory_operand" "")
++	(match_dup 4))]
++  "TARGET_VX"
++{
++  operands[4] = gen_reg_rtx (SImode);
++})
++
++(define_insn "*vec_packs_cc<mode>"
++  [(set (reg:CCRAW CC_REGNUM)
++	(unspec:CCRAW [(match_operand:VI_HW_HSD 1 "register_operand" "v")
++		       (match_operand:VI_HW_HSD 2 "register_operand" "v")]
++		      UNSPEC_VEC_PACK_SATURATE_GENCC))
++   (set (match_operand:<vec_half> 0 "register_operand" "=v")
++	(unspec:<vec_half> [(match_dup 1) (match_dup 2)]
++			   UNSPEC_VEC_PACK_SATURATE_CC))]
++  "TARGET_VX"
++  "vpks<bhfgq>s\t%v0,%v1,%v2"
++  [(set_attr "op_type" "VRR")])
++
++
++; Vector pack logical saturate
++
++(define_insn "vec_packsu<mode>"
++  [(set (match_operand:<vec_half>                    0 "register_operand" "=v")
++	(unspec:<vec_half> [(match_operand:VI_HW_HSD 1 "register_operand"  "v")
++			    (match_operand:VI_HW_HSD 2 "register_operand"  "v")]
++			   UNSPEC_VEC_PACK_UNSIGNED_SATURATE))]
++  "TARGET_VX"
++  "vpkls<bhfgq>\t%v0,%v1,%v2"
++  [(set_attr "op_type" "VRR")])
++
++; Emulate saturate unsigned pack on signed operands.
++; Zero out negative elements and continue with the unsigned saturating pack.
++(define_expand "vec_packsu_u<mode>"
++  [(set (match_operand:<vec_half>                    0 "register_operand" "=v")
++	(unspec:<vec_half> [(match_operand:VI_HW_HSD 1 "register_operand"  "v")
++			    (match_operand:VI_HW_HSD 2 "register_operand"  "v")]
++			   UNSPEC_VEC_PACK_UNSIGNED_SATURATE))]
++  "TARGET_VX"
++{
++   rtx null_vec = CONST0_RTX(<MODE>mode);
++   enum machine_mode half_mode;
++   switch (<MODE>mode)
++   {
++     case V8HImode: half_mode = V16QImode; break;
++     case V4SImode: half_mode = V8HImode; break;
++     case V2DImode: half_mode = V4SImode; break;
++     default: gcc_unreachable ();
++   }
++   s390_expand_vcond (operands[1], operands[1], null_vec,
++		      GE, operands[1], null_vec);
++   s390_expand_vcond (operands[2], operands[2], null_vec,
++		      GE, operands[2], null_vec);
++   emit_insn (gen_rtx_SET (VOIDmode, operands[0],
++			   gen_rtx_UNSPEC (half_mode,
++					   gen_rtvec (2, operands[1], operands[2]),
++					   UNSPEC_VEC_PACK_UNSIGNED_SATURATE)));
++   DONE;
++})
++
++; This is vec_packsu_cc + loading cc into a caller specified memory location.
++; FIXME: The reg to target mem copy should be issued by reload?!
++(define_expand "vec_packsu_cc<mode>"
++  [(parallel
++    [(set (reg:CCRAW CC_REGNUM)
++	  (unspec:CCRAW [(match_operand:VI_HW_HSD 1 "register_operand" "")
++			 (match_operand:VI_HW_HSD 2 "register_operand" "")]
++			UNSPEC_VEC_PACK_UNSIGNED_SATURATE_GENCC))
++     (set (match_operand:<vec_half> 0 "register_operand" "")
++	  (unspec:<vec_half> [(match_dup 1) (match_dup 2)]
++			     UNSPEC_VEC_PACK_UNSIGNED_SATURATE_CC))])
++   (set (match_dup 4)
++	(unspec:SI [(reg:CCRAW CC_REGNUM)] UNSPEC_CC_TO_INT))
++   (set (match_operand:SI 3 "memory_operand" "")
++	(match_dup 4))]
++  "TARGET_VX"
++{
++  operands[4] = gen_reg_rtx (SImode);
++})
++
++(define_insn "*vec_packsu_cc<mode>"
++  [(set (reg:CCRAW CC_REGNUM)
++	(unspec:CCRAW [(match_operand:VI_HW_HSD 1 "register_operand" "v")
++		       (match_operand:VI_HW_HSD 2 "register_operand" "v")]
++		      UNSPEC_VEC_PACK_UNSIGNED_SATURATE_GENCC))
++   (set (match_operand:<vec_half> 0 "register_operand" "=v")
++	(unspec:<vec_half> [(match_dup 1) (match_dup 2)]
++			   UNSPEC_VEC_PACK_UNSIGNED_SATURATE_CC))]
++  "TARGET_VX"
++  "vpkls<bhfgq>s\t%v0,%v1,%v2"
++  [(set_attr "op_type" "VRR")])
++
++
++; Vector permute
++
++; vec_perm is also RTL standard name, but we can only use it for V16QI
++
++(define_insn "vec_zperm<mode>"
++  [(set (match_operand:V_HW_HSD                   0 "register_operand" "=v")
++	(unspec:V_HW_HSD [(match_operand:V_HW_HSD 1 "register_operand"  "v")
++			  (match_operand:V_HW_HSD 2 "register_operand"  "v")
++			  (match_operand:V16QI    3 "register_operand"  "v")]
++			 UNSPEC_VEC_PERM))]
++  "TARGET_VX"
++  "vperm\t%v0,%v1,%v2,%v3"
++  [(set_attr "op_type" "VRR")])
++
++(define_expand "vec_permi<mode>"
++  [(set (match_operand:V_HW_64                  0 "register_operand"   "")
++	(unspec:V_HW_64 [(match_operand:V_HW_64 1 "register_operand"   "")
++			 (match_operand:V_HW_64 2 "register_operand"   "")
++			 (match_operand:QI      3 "const_mask_operand" "")]
++			UNSPEC_VEC_PERMI))]
++  "TARGET_VX"
++{
++  HOST_WIDE_INT val = INTVAL (operands[3]);
++  operands[3] = GEN_INT ((val & 1) | (val & 2) << 1);
++})
++
++(define_insn "*vec_permi<mode>"
++  [(set (match_operand:V_HW_64                  0 "register_operand"  "=v")
++	(unspec:V_HW_64 [(match_operand:V_HW_64 1 "register_operand"   "v")
++			 (match_operand:V_HW_64 2 "register_operand"   "v")
++			 (match_operand:QI      3 "const_mask_operand" "C")]
++			UNSPEC_VEC_PERMI))]
++  "TARGET_VX && (UINTVAL (operands[3]) & 10) == 0"
++  "vpdi\t%v0,%v1,%v2,%b3"
++  [(set_attr "op_type" "VRR")])
++
++
++; Vector replicate
++
++
++; Replicate from vector element
++(define_expand "vec_splat<mode>"
++  [(set (match_operand:V_HW                      0 "register_operand"  "")
++	(vec_duplicate:V_HW (vec_select:<non_vec>
++			     (match_operand:V_HW 1 "register_operand"  "")
++			     (parallel
++			      [(match_operand:QI 2 "const_mask_operand" "")]))))]
++  "TARGET_VX")
++
++; Vector scatter element
++
++; vscef, vsceg
++
++; A 64 bit target adress generated from 32 bit elements
++(define_insn "vec_scatter_elementv4si_DI"
++  [(set (mem:SI
++	 (plus:DI (zero_extend:DI
++		   (unspec:SI [(match_operand:V4SI 1 "register_operand"   "v")
++			       (match_operand:QI   3 "const_mask_operand" "C")]
++			      UNSPEC_VEC_EXTRACT))
++		  (match_operand:SI                2 "address_operand"   "ZQ")))
++	(unspec:SI [(match_operand:V4SI            0 "register_operand"   "v")
++		    (match_dup 3)] UNSPEC_VEC_EXTRACT))]
++  "TARGET_VX && TARGET_64BIT && UINTVAL (operands[3]) < 4"
++  "vscef\t%v0,%O2(%v1,%R2),%3"
++  [(set_attr "op_type" "VRV")])
++
++; A 31 bit target address is generated from 64 bit elements
++(define_insn "vec_scatter_element<V_HW_64:mode>_SI"
++  [(set (mem:<non_vec>
++	 (plus:SI (subreg:SI
++		   (unspec:<non_vec_int> [(match_operand:V_HW_64 1 "register_operand"   "v")
++					  (match_operand:QI      3 "const_mask_operand" "C")]
++					 UNSPEC_VEC_EXTRACT) 4)
++		  (match_operand:SI                              2 "address_operand"   "ZQ")))
++	(unspec:<non_vec> [(match_operand:V_HW_64                0 "register_operand"   "v")
++			   (match_dup 3)] UNSPEC_VEC_EXTRACT))]
++  "TARGET_VX && !TARGET_64BIT && UINTVAL (operands[3]) < GET_MODE_NUNITS (<V_HW_64:MODE>mode)"
++  "vsce<V_HW_64:bhfgq>\t%v0,%O2(%v1,%R2),%3"
++  [(set_attr "op_type" "VRV")])
++
++; Element size and target adress size is the same
++(define_insn "vec_scatter_element<mode>_<non_vec_int>"
++  [(set (mem:<non_vec>
++	 (plus:<non_vec_int> (unspec:<non_vec_int>
++			      [(match_operand:<tointvec> 1 "register_operand"   "v")
++			       (match_operand:QI         3 "const_mask_operand" "C")]
++			      UNSPEC_VEC_EXTRACT)
++			     (match_operand:DI           2 "address_operand"   "ZQ")))
++	(unspec:<non_vec> [(match_operand:V_HW_32_64     0 "register_operand"   "v")
++			   (match_dup 3)] UNSPEC_VEC_EXTRACT))]
++  "TARGET_VX && UINTVAL (operands[3]) < GET_MODE_NUNITS (<V_HW_32_64:MODE>mode)"
++  "vsce<bhfgq>\t%v0,%O2(%v1,%R2),%3"
++  [(set_attr "op_type" "VRV")])
++
++; Depending on the address size we have to expand a different pattern.
++; This however cannot be represented in s390-builtins.def so we do the
++; multiplexing here in the expander.
++(define_expand "vec_scatter_element<V_HW_32_64:mode>"
++  [(match_operand:V_HW_32_64 0 "register_operand" "")
++   (match_operand:<tointvec> 1 "register_operand" "")
++   (match_operand 2 "address_operand" "")
++   (match_operand:QI 3 "const_mask_operand" "")]
++  "TARGET_VX"
++{
++  if (TARGET_64BIT)
++    {
++      PUT_MODE (operands[2], DImode);
++      emit_insn (
++	gen_vec_scatter_element<V_HW_32_64:mode>_DI (operands[0], operands[1],
++						     operands[2], operands[3]));
++    }
++  else
++    {
++      PUT_MODE (operands[2], SImode);
++      emit_insn (
++	gen_vec_scatter_element<V_HW_32_64:mode>_SI (operands[0], operands[1],
++						     operands[2], operands[3]));
++    }
++  DONE;
++})
++
++
++; Vector select
++
++; Operand 3 selects bits from either OP1 (0) or OP2 (1)
++
++; Comparison operator should not matter as long as we always use the same ?!
++
++; Operands 1 and 2 are swapped in order to match the altivec builtin.
++; If operand 3 is a const_int bitmask this would be vec_merge
++(define_expand "vec_sel<mode>"
++  [(set (match_operand:V_HW 0 "register_operand" "")
++	(if_then_else:V_HW
++	 (eq (match_operand:<tointvec> 3 "register_operand"  "")
++	     (match_dup 4))
++	 (match_operand:V_HW 2 "register_operand"  "")
++	 (match_operand:V_HW 1 "register_operand"  "")))]
++  "TARGET_VX"
++{
++  operands[4] = CONST0_RTX (<tointvec>mode);
++})
++
++
++; Vector sign extend to doubleword
++
++; Sign extend of right most vector element to respective double-word
++(define_insn "vec_extend<mode>"
++  [(set (match_operand:VI_HW_QHS                    0 "register_operand" "=v")
++	(unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand"  "v")]
++			  UNSPEC_VEC_EXTEND))]
++  "TARGET_VX"
++  "vseg<bhfgq>\t%v0,%1"
++  [(set_attr "op_type" "VRR")])
++
++
++; Vector store with length
++
++; Store bytes in OP1 from OP0 with the highest indexed byte to be
++; stored from OP0 given by OP2
++(define_insn "vstl<mode>"
++  [(set (match_operand:BLK             2 "memory_operand"   "=Q")
++	(unspec:BLK [(match_operand:V  0 "register_operand"  "v")
++		     (match_operand:SI 1 "register_operand"  "d")]
++		    UNSPEC_VEC_STORE_LEN))]
++  "TARGET_VX"
++  "vstl\t%v0,%1,%2"
++  [(set_attr "op_type" "VRS")])
++
++
++; Vector unpack high
++
++; vuphb, vuphh, vuphf
++(define_insn "vec_unpackh<mode>"
++  [(set (match_operand:<vec_double>                    0 "register_operand" "=v")
++	(unspec:<vec_double> [(match_operand:VI_HW_QHS 1 "register_operand"  "v")]
++			     UNSPEC_VEC_UNPACKH))]
++  "TARGET_VX"
++  "vuph<bhfgq>\t%v0,%v1"
++  [(set_attr "op_type" "VRR")])
++
++; vuplhb, vuplhh, vuplhf
++(define_insn "vec_unpackh_l<mode>"
++  [(set (match_operand:<vec_double>                    0 "register_operand" "=v")
++	(unspec:<vec_double> [(match_operand:VI_HW_QHS 1 "register_operand"  "v")]
++			     UNSPEC_VEC_UNPACKH_L))]
++  "TARGET_VX"
++  "vuplh<bhfgq>\t%v0,%v1"
++  [(set_attr "op_type" "VRR")])
++
++
++; Vector unpack low
++
++; vuplb, vuplhw, vuplf
++(define_insn "vec_unpackl<mode>"
++  [(set (match_operand:<vec_double>                    0 "register_operand" "=v")
++	(unspec:<vec_double> [(match_operand:VI_HW_QHS 1 "register_operand"  "v")]
++			     UNSPEC_VEC_UNPACKL))]
++  "TARGET_VX"
++  "vupl<bhfgq><w>\t%v0,%v1"
++  [(set_attr "op_type" "VRR")])
++
++; vupllb, vupllh, vupllf
++(define_insn "vec_unpackl_l<mode>"
++  [(set (match_operand:<vec_double>                    0 "register_operand" "=v")
++	(unspec:<vec_double> [(match_operand:VI_HW_QHS 1 "register_operand"  "v")]
++			     UNSPEC_VEC_UNPACKL_L))]
++  "TARGET_VX"
++  "vupll<bhfgq>\t%v0,%v1"
++  [(set_attr "op_type" "VRR")])
++
++
++; Vector add
++
++; vaq
++
++; zvector builtins uses V16QI operands.  So replace the modes in order
++; to map this to a TImode add.  We have to keep the V16QI mode
++; operands in the expander in order to allow some operand type
++; checking when expanding the builtin.
++(define_expand "vec_add_u128"
++  [(match_operand:V16QI 0 "register_operand" "")
++   (match_operand:V16QI 1 "register_operand" "")
++   (match_operand:V16QI 2 "register_operand" "")]
++  "TARGET_VX"
++{
++  rtx op0 = gen_rtx_SUBREG (TImode, operands[0], 0);
++  rtx op1 = gen_rtx_SUBREG (TImode, operands[1], 0);
++  rtx op2 = gen_rtx_SUBREG (TImode, operands[2], 0);
++
++  emit_insn (gen_rtx_SET (VOIDmode, op0,
++			  gen_rtx_PLUS (TImode, op1, op2)));
++  DONE;
++})
++
++; Vector add compute carry
++
++(define_insn "vec_addc<mode>"
++  [(set (match_operand:VI_HW                0 "register_operand" "=v")
++	(unspec:VI_HW [(match_operand:VI_HW 1 "register_operand" "%v")
++		       (match_operand:VI_HW 2 "register_operand"  "v")]
++		      UNSPEC_VEC_ADDC))]
++  "TARGET_VX"
++  "vacc<bhfgq>\t%v0,%v1,%v2"
++  [(set_attr "op_type" "VRR")])
++
++(define_insn "vec_addc_u128"
++  [(set (match_operand:V16QI                0 "register_operand" "=v")
++	(unspec:V16QI [(match_operand:V16QI 1 "register_operand" "%v")
++		       (match_operand:V16QI 2 "register_operand"  "v")]
++		      UNSPEC_VEC_ADDC_U128))]
++  "TARGET_VX"
++  "vaccq\t%v0,%v1,%v2"
++  [(set_attr "op_type" "VRR")])
++
++
++; Vector add with carry
++
++(define_insn "vec_adde_u128"
++  [(set (match_operand:V16QI                0 "register_operand" "=v")
++	(unspec:V16QI [(match_operand:V16QI 1 "register_operand" "%v")
++		       (match_operand:V16QI 2 "register_operand"  "v")
++		       (match_operand:V16QI 3 "register_operand"  "v")]
++		      UNSPEC_VEC_ADDE_U128))]
++  "TARGET_VX"
++  "vacq\t%v0,%v1,%v2,%v3"
++  [(set_attr "op_type" "VRR")])
++
++
++; Vector add with carry compute carry
++
++(define_insn "vec_addec_u128"
++  [(set (match_operand:V16QI                0 "register_operand" "=v")
++	(unspec:V16QI [(match_operand:V16QI 1 "register_operand" "%v")
++		       (match_operand:V16QI 2 "register_operand"  "v")
++		       (match_operand:V16QI 3 "register_operand"  "v")]
++		      UNSPEC_VEC_ADDEC_U128))]
++  "TARGET_VX"
++  "vacccq\t%v0,%v1,%v2,%v3"
++  [(set_attr "op_type" "VRR")])
++
++
++; Vector and
++
++; The following two patterns allow mixed mode and's as required for the intrinsics.
++(define_insn "and_av2df3"
++  [(set (match_operand:V2DF                        0 "register_operand" "=v")
++	(and:V2DF (subreg:V2DF (match_operand:V2DI 1 "register_operand"  "v") 0)
++		  (match_operand:V2DF              2 "register_operand"  "v")))]
++  "TARGET_VX"
++  "vn\t%v0,%v1,%v2"
++  [(set_attr "op_type" "VRR")])
++
++(define_insn "and_cv2df3"
++  [(set (match_operand:V2DF                        0 "register_operand" "=v")
++	(and:V2DF (match_operand:V2DF              1 "register_operand"  "v")
++		  (subreg:V2DF (match_operand:V2DI 2 "register_operand"  "v") 0)))]
++  "TARGET_VX"
++  "vn\t%v0,%v1,%v2"
++  [(set_attr "op_type" "VRR")])
++
++
++; Vector and with complement
++
++; vnc
++(define_insn "vec_andc<mode>3"
++  [(set (match_operand:VT_HW                       0 "register_operand" "=v")
++	(and:VT_HW (not:VT_HW (match_operand:VT_HW 2 "register_operand"  "v"))
++		  (match_operand:VT_HW             1 "register_operand"  "v")))]
++  "TARGET_VX"
++  "vnc\t%v0,%v1,%v2"
++  [(set_attr "op_type" "VRR")])
++
++; The following two patterns allow mixed mode and's as required for the intrinsics.
++(define_insn "vec_andc_av2df3"
++  [(set (match_operand:V2DF                        0 "register_operand" "=v")
++	(and:V2DF (not:V2DF (match_operand:V2DF    2 "register_operand"  "v"))
++		  (subreg:V2DF (match_operand:V2DI 1 "register_operand"  "v") 0)))]
++
++  "TARGET_VX"
++  "vnc\t%v0,%v1,%v2"
++  [(set_attr "op_type" "VRR")])
++
++(define_insn "vec_andc_cv2df3"
++  [(set (match_operand:V2DF 0 "register_operand" "=v")
++	(and:V2DF (not:V2DF (subreg:V2DF (match_operand:V2DI 2 "register_operand" "v") 0))
++		  (match_operand:V2DF 1 "register_operand" "v")))]
++  "TARGET_VX"
++  "vnc\t%v0,%v1,%v2"
++  [(set_attr "op_type" "VRR")])
++
++
++; Vector average
++
++(define_insn "vec_avg<mode>"
++  [(set (match_operand:VI_HW                0 "register_operand" "=v")
++	(unspec:VI_HW [(match_operand:VI_HW 1 "register_operand" "%v")
++		       (match_operand:VI_HW 2 "register_operand"  "v")]
++		      UNSPEC_VEC_AVG))]
++  "TARGET_VX"
++  "vavg<bhfgq>\t%v0,%v1,%v2"
++  [(set_attr "op_type" "VRR")])
++
++; Vector average logical
++
++(define_insn "vec_avgu<mode>"
++  [(set (match_operand:VI_HW                0 "register_operand" "=v")
++	(unspec:VI_HW [(match_operand:VI_HW 1 "register_operand" "%v")
++		       (match_operand:VI_HW 2 "register_operand"  "v")]
++		      UNSPEC_VEC_AVGU))]
++  "TARGET_VX"
++  "vavgl<bhfgq>\t%v0,%v1,%v2"
++  [(set_attr "op_type" "VRR")])
++
++
++; Vector checksum
++
++(define_insn "vec_checksum"
++  [(set (match_operand:V4SI               0 "register_operand" "=v")
++	(unspec:V4SI [(match_operand:V4SI 1 "register_operand"  "v")
++		      (match_operand:V4SI 2 "register_operand"  "v")]
++		     UNSPEC_VEC_CHECKSUM))]
++  "TARGET_VX"
++  "vcksm\t%v0,%v1,%v2"
++  [(set_attr "op_type" "VRR")])
++
++;;
++;; Vector compare
++;;
++
++; vec_all/any int compares
++
++(define_expand "vec_all_<intcmpcc:code><VI_HW:mode>"
++  [(match_operand:SI                0 "register_operand" "")
++   (intcmpcc (match_operand:VI_HW 1 "register_operand" "")
++	     (match_operand:VI_HW 2 "register_operand" ""))]
++  "TARGET_VX"
++{
++  s390_expand_vec_compare_cc (operands[0],
++			      <intcmpcc:CODE>,
++			      operands[1],
++			      operands[2],
++			      true);
++  DONE;
++})
++
++(define_expand "vec_any_<intcmpcc:code><VI_HW:mode>"
++  [(match_operand:SI                0 "register_operand" "")
++   (intcmpcc (match_operand:VI_HW 1 "register_operand" "")
++	     (match_operand:VI_HW 2 "register_operand" ""))]
++  "TARGET_VX"
++{
++  s390_expand_vec_compare_cc (operands[0],
++			      <intcmpcc:CODE>,
++			      operands[1],
++			      operands[2],
++			      false);
++  DONE;
++})
++
++; vec_all/any fp compares
++
++(define_expand "vec_all_<fpcmpcc:code>v2df"
++  [(match_operand:SI            0 "register_operand" "")
++   (fpcmpcc (match_operand:V2DF 1 "register_operand" "")
++	    (match_operand:V2DF 2 "register_operand" ""))]
++  "TARGET_VX"
++{
++  s390_expand_vec_compare_cc (operands[0],
++			      <fpcmpcc:CODE>,
++			      operands[1],
++			      operands[2],
++			      true);
++  DONE;
++})
++
++(define_expand "vec_any_<fpcmpcc:code>v2df"
++  [(match_operand:SI            0 "register_operand" "")
++   (fpcmpcc (match_operand:V2DF 1 "register_operand" "")
++	    (match_operand:V2DF 2 "register_operand" ""))]
++  "TARGET_VX"
++{
++  s390_expand_vec_compare_cc (operands[0],
++			      <fpcmpcc:CODE>,
++			      operands[1],
++			      operands[2],
++			      false);
++  DONE;
++})
++
++
++; Compare without generating CC
++
++(define_expand "vec_cmp<intcmp:code><VI_HW:mode>"
++  [(set (match_operand:VI_HW               0 "register_operand" "=v")
++	(intcmp:VI_HW (match_operand:VI_HW 1 "register_operand"  "v")
++		      (match_operand:VI_HW 2 "register_operand"  "v")))]
++  "TARGET_VX"
++{
++  s390_expand_vec_compare (operands[0], <intcmp:CODE>, operands[1], operands[2]);
++  DONE;
++})
++
++(define_expand "vec_cmp<fpcmp:code>v2df"
++  [(set (match_operand:V2DI             0 "register_operand" "=v")
++	(fpcmp:V2DI (match_operand:V2DF 1 "register_operand"  "v")
++		    (match_operand:V2DF 2 "register_operand"  "v")))]
++  "TARGET_VX"
++{
++  s390_expand_vec_compare (operands[0], <fpcmp:CODE>, operands[1], operands[2]);
++  DONE;
++})
++
++
++; Vector count leading zeros
++
++; vec_cntlz -> clz
++; vec_cnttz -> ctz
++
++; Vector xor
++
++; vec_xor -> xor
++
++; The following two patterns allow mixed mode xor's as required for the intrinsics.
++(define_insn "xor_av2df3"
++  [(set (match_operand:V2DF 0 "register_operand" "=v")
++	(xor:V2DF (subreg:V2DF (match_operand:V2DI 1 "register_operand" "v") 0)
++		  (match_operand:V2DF 2 "register_operand" "v")))]
++  "TARGET_VX"
++  "vx\t%v0,%v1,%v2"
++  [(set_attr "op_type" "VRR")])
++
++(define_insn "xor_cv2df3"
++  [(set (match_operand:V2DF 0 "register_operand" "=v")
++	(xor:V2DF (match_operand:V2DF 1 "register_operand" "v")
++		  (subreg:V2DF (match_operand:V2DI 2 "register_operand" "v") 0)))]
++  "TARGET_VX"
++  "vx\t%v0,%v1,%v2"
++  [(set_attr "op_type" "VRR")])
++
++
++; Vector Galois field multiply sum
++
++(define_insn "vec_gfmsum<mode>"
++  [(set (match_operand:VI_HW_QHS 0 "register_operand" "=v")
++	(unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "v")
++			   (match_operand:VI_HW_QHS 2 "register_operand" "v")]
++			  UNSPEC_VEC_GFMSUM))]
++  "TARGET_VX"
++  "vgfm<bhfgq>\t%v0,%v1,%v2"
++  [(set_attr "op_type" "VRR")])
++
++(define_insn "vec_gfmsum_128"
++  [(set (match_operand:V16QI 0 "register_operand" "=v")
++	(unspec:V16QI [(match_operand:V2DI 1 "register_operand" "v")
++		       (match_operand:V2DI 2 "register_operand" "v")]
++		      UNSPEC_VEC_GFMSUM_128))]
++  "TARGET_VX"
++  "vgfmg\t%v0,%v1,%v2"
++  [(set_attr "op_type" "VRR")])
++
++(define_insn "vec_gfmsum_accum<mode>"
++  [(set (match_operand:<vec_double> 0 "register_operand" "=v")
++	(unspec:<vec_double> [(match_operand:VI_HW_QHS 1 "register_operand" "v")
++			      (match_operand:VI_HW_QHS 2 "register_operand" "v")
++			      (match_operand:<vec_double> 3 "register_operand" "v")]
++			     UNSPEC_VEC_GFMSUM_ACCUM))]
++  "TARGET_VX"
++  "vgfma<bhfgq>\t%v0,%v1,%v2,%v3"
++  [(set_attr "op_type" "VRR")])
++
++(define_insn "vec_gfmsum_accum_128"
++  [(set (match_operand:V16QI 0 "register_operand" "=v")
++	(unspec:V16QI [(match_operand:V2DI 1 "register_operand" "v")
++		       (match_operand:V2DI 2 "register_operand" "v")
++		       (match_operand:V16QI 3 "register_operand" "v")]
++		      UNSPEC_VEC_GFMSUM_ACCUM_128))]
++  "TARGET_VX"
++  "vgfmag\t%v0,%v1,%v2,%v3"
++  [(set_attr "op_type" "VRR")])
++
++
++; FIXME: vec_neg ?
++
++; Vector load positive: vec_abs -> abs
++; Vector maximum vec_max -> smax, logical vec_max -> umax
++; Vector maximum vec_min -> smin, logical vec_min -> umin
++
++
++; Vector multiply and add high
++
++; vec_mladd -> vec_vmal
++; vmalb, vmalh, vmalf, vmalg
++(define_insn "vec_vmal<mode>"
++  [(set (match_operand:VI_HW_QHS 0 "register_operand" "=v")
++	(unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "%v")
++			   (match_operand:VI_HW_QHS 2 "register_operand"  "v")
++			   (match_operand:VI_HW_QHS 3 "register_operand"  "v")]
++			  UNSPEC_VEC_VMAL))]
++  "TARGET_VX"
++  "vmal<bhfgq><w>\t%v0,%v1,%v2,%v3"
++  [(set_attr "op_type" "VRR")])
++
++; vec_mhadd -> vec_vmah/vec_vmalh
++
++; vmahb; vmahh, vmahf, vmahg
++(define_insn "vec_vmah<mode>"
++  [(set (match_operand:VI_HW_QHS 0 "register_operand" "=v")
++	(unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "%v")
++			   (match_operand:VI_HW_QHS 2 "register_operand"  "v")
++			   (match_operand:VI_HW_QHS 3 "register_operand"  "v")]
++			  UNSPEC_VEC_VMAH))]
++  "TARGET_VX"
++  "vmah<bhfgq>\t%v0,%v1,%v2,%v3"
++  [(set_attr "op_type" "VRR")])
++
++; vmalhb; vmalhh, vmalhf, vmalhg
++(define_insn "vec_vmalh<mode>"
++  [(set (match_operand:VI_HW_QHS 0 "register_operand" "=v")
++	(unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "%v")
++			   (match_operand:VI_HW_QHS 2 "register_operand"  "v")
++			   (match_operand:VI_HW_QHS 3 "register_operand"  "v")]
++			  UNSPEC_VEC_VMALH))]
++  "TARGET_VX"
++  "vmalh<bhfgq>\t%v0,%v1,%v2,%v3"
++  [(set_attr "op_type" "VRR")])
++
++; vec_meadd -> vec_vmae/vec_vmale
++
++; vmaeb; vmaeh, vmaef, vmaeg
++(define_insn "vec_vmae<mode>"
++  [(set (match_operand:<vec_double> 0 "register_operand" "=v")
++	(unspec:<vec_double> [(match_operand:VI_HW_QHS 1 "register_operand"   "%v")
++			      (match_operand:VI_HW_QHS 2 "register_operand"    "v")
++			      (match_operand:<vec_double> 3 "register_operand" "v")]
++			     UNSPEC_VEC_VMAE))]
++  "TARGET_VX"
++  "vmae<bhfgq>\t%v0,%v1,%v2,%v3"
++  [(set_attr "op_type" "VRR")])
++
++; vmaleb; vmaleh, vmalef, vmaleg
++(define_insn "vec_vmale<mode>"
++  [(set (match_operand:<vec_double> 0 "register_operand" "=v")
++	(unspec:<vec_double> [(match_operand:VI_HW_QHS 1 "register_operand" "%v")
++			      (match_operand:VI_HW_QHS 2 "register_operand" "v")
++			      (match_operand:<vec_double> 3 "register_operand" "v")]
++			     UNSPEC_VEC_VMALE))]
++  "TARGET_VX"
++  "vmale<bhfgq>\t%v0,%v1,%v2,%v3"
++  [(set_attr "op_type" "VRR")])
++
++; vec_moadd -> vec_vmao/vec_vmalo
++
++; vmaob; vmaoh, vmaof, vmaog
++(define_insn "vec_vmao<mode>"
++  [(set (match_operand:<vec_double> 0 "register_operand" "=v")
++	(unspec:<vec_double> [(match_operand:VI_HW_QHS 1 "register_operand" "%v")
++			      (match_operand:VI_HW_QHS 2 "register_operand" "v")
++			      (match_operand:<vec_double> 3 "register_operand" "v")]
++			     UNSPEC_VEC_VMAO))]
++  "TARGET_VX"
++  "vmao<bhfgq>\t%v0,%v1,%v2,%v3"
++  [(set_attr "op_type" "VRR")])
++
++; vmalob; vmaloh, vmalof, vmalog
++(define_insn "vec_vmalo<mode>"
++  [(set (match_operand:<vec_double> 0 "register_operand" "=v")
++	(unspec:<vec_double> [(match_operand:VI_HW_QHS 1 "register_operand" "v")
++			      (match_operand:VI_HW_QHS 2 "register_operand" "v")
++			      (match_operand:<vec_double> 3 "register_operand" "v")]
++			     UNSPEC_VEC_VMALO))]
++  "TARGET_VX"
++  "vmalo<bhfgq>\t%v0,%v1,%v2,%v3"
++  [(set_attr "op_type" "VRR")])
++
++
++; Vector multiply high
++
++; vec_mulh -> vec_smulh/vec_umulh
++
++; vmhb, vmhh, vmhf
++(define_insn "vec_smulh<mode>"
++  [(set (match_operand:VI_HW_QHS 0 "register_operand" "=v")
++	(unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "%v")
++			   (match_operand:VI_HW_QHS 2 "register_operand" "v")]
++			  UNSPEC_VEC_SMULT_HI))]
++  "TARGET_VX"
++  "vmh<bhfgq>\t%v0,%v1,%v2"
++  [(set_attr "op_type" "VRR")])
++
++; vmlhb, vmlhh, vmlhf
++(define_insn "vec_umulh<mode>"
++  [(set (match_operand:VI_HW_QHS 0 "register_operand" "=v")
++	(unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "%v")
++			   (match_operand:VI_HW_QHS 2 "register_operand" "v")]
++			  UNSPEC_VEC_UMULT_HI))]
++  "TARGET_VX"
++  "vmlh<bhfgq>\t%v0,%v1,%v2"
++  [(set_attr "op_type" "VRR")])
++
++
++; Vector multiply low
++
++; vec_mule -> vec_widen_umult_even/vec_widen_smult_even
++; vec_mulo -> vec_widen_umult_odd/vec_widen_smult_odd
++
++
++; Vector nor
++
++(define_insn "vec_nor<mode>3"
++  [(set (match_operand:VT_HW 0 "register_operand" "=v")
++	(not:VT_HW (ior:VT_HW (match_operand:VT_HW 1 "register_operand" "%v")
++			      (match_operand:VT_HW 2 "register_operand" "v"))))]
++  "TARGET_VX"
++  "vno\t%v0,%v1,%v2"
++  [(set_attr "op_type" "VRR")])
++
++; The following two patterns allow mixed mode and's as required for the intrinsics.
++(define_insn "vec_nor_av2df3"
++  [(set (match_operand:V2DF 0 "register_operand" "=v")
++	(not:V2DF (ior:V2DF (subreg:V2DF (match_operand:V2DI 1 "register_operand" "v") 0)
++			    (match_operand:V2DF 2 "register_operand" "v"))))]
++  "TARGET_VX"
++  "vno\t%v0,%v1,%v2"
++  [(set_attr "op_type" "VRR")])
++
++(define_insn "vec_nor_cv2df3"
++  [(set (match_operand:V2DF 0 "register_operand" "=v")
++	(not:V2DF (ior:V2DF (match_operand:V2DF 1 "register_operand" "v")
++			    (subreg:V2DF (match_operand:V2DI 2 "register_operand" "v") 0))))]
++  "TARGET_VX"
++  "vno\t%v0,%v1,%v2"
++  [(set_attr "op_type" "VRR")])
++
++
++; Vector or
++
++; The following two patterns allow mixed mode or's as required for the intrinsics.
++(define_insn "ior_av2df3"
++  [(set (match_operand:V2DF 0 "register_operand" "=v")
++	(ior:V2DF (subreg:V2DF (match_operand:V2DI 1 "register_operand" "v") 0)
++		  (match_operand:V2DF 2 "register_operand" "v")))]
++  "TARGET_VX"
++  "vo\t%v0,%v1,%v2"
++  [(set_attr "op_type" "VRR")])
++
++(define_insn "ior_cv2df3"
++  [(set (match_operand:V2DF 0 "register_operand" "=v")
++	(ior:V2DF (match_operand:V2DF 1 "register_operand" "v")
++		  (subreg:V2DF (match_operand:V2DI 2 "register_operand" "v") 0)))]
++  "TARGET_VX"
++  "vo\t%v0,%v1,%v2"
++  [(set_attr "op_type" "VRR")])
++
++
++; Vector population count vec_popcnt -> popcount
++; Vector element rotate left logical vec_rl -> vrotl, vec_rli -> rot
++
++; Vector element rotate and insert under mask
++
++; verimb, verimh, verimf, verimg
++(define_insn "verim<mode>"
++  [(set (match_operand:VI_HW                0 "register_operand" "=v")
++	(unspec:VI_HW [(match_operand:VI_HW 1 "register_operand"  "0")
++		       (match_operand:VI_HW 2 "register_operand"  "v")
++		       (match_operand:VI_HW 3 "register_operand"  "v")
++		       (match_operand:QI    4 "const_int_operand" "C")]
++		      UNSPEC_VEC_RL_MASK))]
++  "TARGET_VX"
++  "verim<bhfgq>\t%v0,%v2,%v3,%b4"
++  [(set_attr "op_type" "VRI")])
++
++
++; Vector shift left
++
++(define_insn "vec_sll<VI_HW:mode><VI_HW_QHS:mode>"
++  [(set (match_operand:VI_HW                    0 "register_operand" "=v")
++	(unspec:VI_HW [(match_operand:VI_HW     1 "register_operand"  "v")
++		       (match_operand:VI_HW_QHS 2 "register_operand"  "v")]
++		      UNSPEC_VEC_SLL))]
++  "TARGET_VX"
++  "vsl\t%v0,%v1,%v2"
++  [(set_attr "op_type" "VRR")])
++
++
++; Vector shift left by byte
++
++(define_insn "vec_slb<mode>"
++  [(set (match_operand:V_HW 0 "register_operand"                    "=v")
++	(unspec:V_HW [(match_operand:V_HW 1 "register_operand"       "v")
++		      (match_operand:<tointvec> 2 "register_operand" "v")]
++		     UNSPEC_VEC_SLB))]
++  "TARGET_VX"
++  "vslb\t%v0,%v1,%v2"
++  [(set_attr "op_type" "VRR")])
++
++
++; Vector shift left double by byte
++
++(define_insn "vec_sld<mode>"
++  [(set (match_operand:V_HW 0 "register_operand"              "=v")
++	(unspec:V_HW [(match_operand:V_HW 1 "register_operand" "v")
++		      (match_operand:V_HW 2 "register_operand" "v")
++		      (match_operand:QI 3 "const_int_operand"  "C")]
++		     UNSPEC_VEC_SLDB))]
++  "TARGET_VX"
++  "vsldb\t%v0,%v1,%v2,%b3"
++  [(set_attr "op_type" "VRI")])
++
++(define_expand "vec_sldw<mode>"
++  [(set (match_operand:V_HW 0 "register_operand"               "")
++	(unspec:V_HW [(match_operand:V_HW 1 "register_operand" "")
++		      (match_operand:V_HW 2 "register_operand" "")
++		      (match_operand:QI 3 "const_int_operand"  "")]
++		     UNSPEC_VEC_SLDB))]
++  "TARGET_VX"
++{
++  operands[3] = GEN_INT (INTVAL (operands[3]) << 2);
++})
++
++; Vector shift right arithmetic
++
++(define_insn "vec_sral<VI_HW:mode><VI_HW_QHS:mode>"
++  [(set (match_operand:VI_HW                    0 "register_operand" "=v")
++	(unspec:VI_HW [(match_operand:VI_HW     1 "register_operand"  "v")
++		       (match_operand:VI_HW_QHS 2 "register_operand"  "v")]
++		      UNSPEC_VEC_SRAL))]
++  "TARGET_VX"
++  "vsra\t%v0,%v1,%v2"
++  [(set_attr "op_type" "VRR")])
++
++
++; Vector shift right arithmetic by byte
++
++(define_insn "vec_srab<mode>"
++  [(set (match_operand:V_HW 0 "register_operand"                    "=v")
++	(unspec:V_HW [(match_operand:V_HW 1 "register_operand"       "v")
++		      (match_operand:<tointvec> 2 "register_operand" "v")]
++		     UNSPEC_VEC_SRAB))]
++  "TARGET_VX"
++  "vsrab\t%v0,%v1,%v2"
++  [(set_attr "op_type" "VRR")])
++
++
++; Vector shift right logical
++
++(define_insn "vec_srl<VI_HW:mode><VI_HW_QHS:mode>"
++  [(set (match_operand:VI_HW                    0 "register_operand" "=v")
++	(unspec:VI_HW [(match_operand:VI_HW     1 "register_operand"  "v")
++		       (match_operand:VI_HW_QHS 2 "register_operand"  "v")]
++		      UNSPEC_VEC_SRL))]
++  "TARGET_VX"
++  "vsrl\t%v0,%v1,%v2"
++  [(set_attr "op_type" "VRR")])
++
++
++; Vector shift right logical by byte
++
++; Pattern definition in vector.md
++(define_expand "vec_srb<mode>"
++  [(set (match_operand:V_HW 0 "register_operand"                     "")
++	(unspec:V_HW [(match_operand:V_HW 1 "register_operand"       "")
++		      (match_operand:<tointvec> 2 "register_operand" "")]
++		     UNSPEC_VEC_SRLB))]
++  "TARGET_VX")
++
++
++; Vector subtract
++
++(define_insn "vec_sub_u128"
++  [(set (match_operand:V16QI 0 "register_operand"               "=v")
++	(unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v")
++		       (match_operand:V16QI 2 "register_operand" "v")]
++		     UNSPEC_VEC_SUB_U128))]
++  "TARGET_VX"
++  "vsq\t%v0,%v1,%v2"
++  [(set_attr "op_type" "VRR")])
++
++
++; Vector subtract compute borrow indication
++
++(define_insn "vec_subc<mode>"
++  [(set (match_operand:VI_HW 0 "register_operand"               "=v")
++	(unspec:VI_HW [(match_operand:VI_HW 1 "register_operand" "v")
++		       (match_operand:VI_HW 2 "register_operand" "v")]
++		      UNSPEC_VEC_SUBC))]
++  "TARGET_VX"
++  "vscbi<bhfgq>\t%v0,%v1,%v2"
++  [(set_attr "op_type" "VRR")])
++
++(define_insn "vec_subc_u128"
++  [(set (match_operand:V16QI 0 "register_operand"               "=v")
++	(unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v")
++		       (match_operand:V16QI 2 "register_operand" "v")]
++		     UNSPEC_VEC_SUBC_U128))]
++  "TARGET_VX"
++  "vscbiq\t%v0,%v1,%v2"
++  [(set_attr "op_type" "VRR")])
++
++
++; Vector subtract with borrow indication
++
++(define_insn "vec_sube_u128"
++  [(set (match_operand:V16QI 0 "register_operand"               "=v")
++	(unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v")
++		       (match_operand:V16QI 2 "register_operand" "v")
++		       (match_operand:V16QI 3 "register_operand" "v")]
++		      UNSPEC_VEC_SUBE_U128))]
++  "TARGET_VX"
++  "vsbiq\t%v0,%v1,%v2,%v3"
++  [(set_attr "op_type" "VRR")])
++
++
++; Vector subtract with borrow compute and borrow indication
++
++(define_insn "vec_subec_u128"
++  [(set (match_operand:V16QI 0 "register_operand"               "=v")
++	(unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v")
++		       (match_operand:V16QI 2 "register_operand" "v")
++		       (match_operand:V16QI 3 "register_operand" "v")]
++		      UNSPEC_VEC_SUBEC_U128))]
++  "TARGET_VX"
++  "vsbcbiq\t%v0,%v1,%v2,%v3"
++  [(set_attr "op_type" "VRR")])
++
++
++; Vector sum across
++
++; Sum across DImode parts of the 1st operand and add the rightmost
++; element of 2nd operand
++; vsumgh, vsumgf
++(define_expand "vec_sum2<mode>"
++  [(set (match_operand:V2DI 0 "register_operand" "")
++	(unspec:V2DI [(match_operand:VI_HW_HS 1 "register_operand" "")
++		      (match_operand:VI_HW_HS 2 "register_operand" "")]
++		     UNSPEC_VEC_VSUMG))]
++  "TARGET_VX")
++
++; vsumqh, vsumqf
++(define_insn "vec_sum_u128<mode>"
++  [(set (match_operand:V2DI 0 "register_operand" "=v")
++	(unspec:V2DI [(match_operand:VI_HW_SD 1 "register_operand" "v")
++		      (match_operand:VI_HW_SD 2 "register_operand" "v")]
++		     UNSPEC_VEC_VSUMQ))]
++  "TARGET_VX"
++  "vsumq<bhfgq>\t%v0,%v1,%v2"
++  [(set_attr "op_type" "VRR")])
++
++; vsumb, vsumh
++(define_expand "vec_sum4<mode>"
++  [(set (match_operand:V4SI 0 "register_operand" "")
++	(unspec:V4SI [(match_operand:VI_HW_QH 1 "register_operand" "")
++		      (match_operand:VI_HW_QH 2 "register_operand" "")]
++		     UNSPEC_VEC_VSUM))]
++  "TARGET_VX")
++
++
++; Vector test under mask
++
++(define_expand "vec_test_mask_int<mode>"
++  [(set (reg:CCRAW CC_REGNUM)
++	(unspec:CCRAW [(match_operand:V_HW 1 "register_operand" "")
++		       (match_operand:<tointvec> 2 "register_operand" "")]
++		      UNSPEC_VEC_TEST_MASK))
++   (set (match_operand:SI 0 "register_operand" "")
++	(unspec:SI [(reg:CCRAW CC_REGNUM)] UNSPEC_CC_TO_INT))]
++  "TARGET_VX")
++
++(define_insn "*vec_test_mask<mode>"
++  [(set (reg:CCRAW CC_REGNUM)
++	(unspec:CCRAW [(match_operand:V_HW 0 "register_operand" "v")
++		       (match_operand:<tointvec> 1 "register_operand" "v")]
++		      UNSPEC_VEC_TEST_MASK))]
++  "TARGET_VX"
++  "vtm\t%v0,%v1"
++  [(set_attr "op_type" "VRR")])
++
++
++; Vector find any element equal
++
++; vfaeb, vfaeh, vfaef
++; vfaezb, vfaezh, vfaezf
++(define_insn "vfae<mode>"
++  [(set (match_operand:VI_HW_QHS 0 "register_operand" "=v")
++	(unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "v")
++			   (match_operand:VI_HW_QHS 2 "register_operand" "v")
++			   (match_operand:QI        3 "const_mask_operand" "C")]
++			  UNSPEC_VEC_VFAE))]
++  "TARGET_VX"
++{
++  unsigned HOST_WIDE_INT flags = INTVAL (operands[3]);
++
++  if (flags & VSTRING_FLAG_ZS)
++    {
++      flags &= ~VSTRING_FLAG_ZS;
++      operands[3] = GEN_INT (flags);
++      return "vfaez<bhfgq>\t%v0,%v1,%v2,%b3";
++    }
++  return "vfae<bhfgq>\t%v0,%v1,%v2,%b3";
++}
++[(set_attr "op_type" "VRR")])
++
++; vfaebs, vfaehs, vfaefs
++; vfaezbs, vfaezhs, vfaezfs
++(define_insn "*vfaes<mode>"
++  [(set (match_operand:VI_HW_QHS 0 "register_operand" "=v")
++	(unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand"   "v")
++			   (match_operand:VI_HW_QHS 2 "register_operand"   "v")
++			   (match_operand:QI        3 "const_mask_operand" "C")]
++			  UNSPEC_VEC_VFAE))
++   (set (reg:CCRAW CC_REGNUM)
++	(unspec:CCRAW [(match_dup 1)
++		       (match_dup 2)
++		       (match_dup 3)]
++		      UNSPEC_VEC_VFAECC))]
++  "TARGET_VX"
++{
++  unsigned HOST_WIDE_INT flags = INTVAL (operands[3]);
++
++  if (flags & VSTRING_FLAG_ZS)
++    {
++      flags &= ~VSTRING_FLAG_ZS;
++      operands[3] = GEN_INT (flags);
++      return "vfaez<bhfgq>s\t%v0,%v1,%v2,%b3";
++    }
++  return "vfae<bhfgq>s\t%v0,%v1,%v2,%b3";
++}
++  [(set_attr "op_type" "VRR")])
++
++(define_expand "vfaez<mode>"
++  [(set (match_operand:VI_HW_QHS 0 "register_operand" "=v")
++	(unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand"  "")
++			   (match_operand:VI_HW_QHS 2 "register_operand"  "")
++			   (match_operand:QI        3 "const_mask_operand" "")]
++			  UNSPEC_VEC_VFAE))]
++  "TARGET_VX"
++{
++  operands[3] = GEN_INT (INTVAL (operands[3]) | VSTRING_FLAG_ZS);
++})
++
++(define_expand "vfaes<mode>"
++  [(parallel
++    [(set (match_operand:VI_HW_QHS 0 "register_operand" "")
++	(unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand"  "")
++			   (match_operand:VI_HW_QHS 2 "register_operand"  "")
++			   (match_operand:QI        3 "const_mask_operand" "")]
++			  UNSPEC_VEC_VFAE))
++   (set (reg:CCRAW CC_REGNUM)
++	(unspec:CCRAW [(match_dup 1)
++		       (match_dup 2)
++		       (match_dup 3)]
++		      UNSPEC_VEC_VFAECC))])
++   (set (match_operand:SI 4 "memory_operand" "")
++	(unspec:SI [(reg:CCRAW CC_REGNUM)] UNSPEC_CC_TO_INT))]
++  "TARGET_VX"
++{
++  operands[3] = GEN_INT (INTVAL (operands[3]) | VSTRING_FLAG_CS);
++})
++
++(define_expand "vfaezs<mode>"
++  [(parallel
++    [(set (match_operand:VI_HW_QHS 0 "register_operand" "")
++	(unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand"  "")
++			   (match_operand:VI_HW_QHS 2 "register_operand"  "")
++			   (match_operand:SI        3 "const_mask_operand" "")]
++			  UNSPEC_VEC_VFAE))
++   (set (reg:CCRAW CC_REGNUM)
++	(unspec:CCRAW [(match_dup 1)
++		       (match_dup 2)
++		       (match_dup 3)]
++		      UNSPEC_VEC_VFAECC))])
++   (set (match_operand:SI 4 "memory_operand" "")
++	(unspec:SI [(reg:CCRAW CC_REGNUM)] UNSPEC_CC_TO_INT))]
++  "TARGET_VX"
++{
++  operands[3] = GEN_INT (INTVAL (operands[3]) | VSTRING_FLAG_CS | VSTRING_FLAG_ZS);
++})
++
++
++; Vector find element equal
++
++; vfeebs, vfeehs, vfeefs
++; vfeezbs, vfeezhs, vfeezfs
++(define_insn "*vfees<mode>"
++  [(set (match_operand:VI_HW_QHS 0 "register_operand" "=v")
++	(unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "v")
++			   (match_operand:VI_HW_QHS 2 "register_operand" "v")
++			   (match_operand:QI 3 "const_mask_operand" "C")]
++			  UNSPEC_VEC_VFEE))
++   (set (reg:CCRAW CC_REGNUM)
++	(unspec:CCRAW [(match_dup 1)
++		       (match_dup 2)
++		       (match_dup 3)]
++		      UNSPEC_VEC_VFEECC))]
++  "TARGET_VX"
++{
++  unsigned HOST_WIDE_INT flags = INTVAL (operands[3]);
++
++  gcc_assert (!(flags & ~(VSTRING_FLAG_ZS | VSTRING_FLAG_CS)));
++  flags &= ~VSTRING_FLAG_CS;
++
++  if (flags == VSTRING_FLAG_ZS)
++    return "vfeez<bhfgq>s\t%v0,%v1,%v2";
++  return "vfee<bhfgq>s\t%v0,%v1,%v2,%b3";
++}
++  [(set_attr "op_type" "VRR")])
++
++; vfeeb, vfeeh, vfeef
++(define_insn "vfee<mode>"
++  [(set (match_operand:VI_HW_QHS                    0 "register_operand" "")
++	(unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "")
++			   (match_operand:VI_HW_QHS 2 "register_operand" "")
++			   (const_int 0)]
++			  UNSPEC_VEC_VFEE))]
++  "TARGET_VX"
++  "vfee<bhfgq>\t%v0,%v1,%v2,0"
++  [(set_attr "op_type" "VRR")])
++
++; vfeezb, vfeezh, vfeezf
++(define_insn "vfeez<mode>"
++  [(set (match_operand:VI_HW_QHS                    0 "register_operand" "")
++	(unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "")
++			   (match_operand:VI_HW_QHS 2 "register_operand" "")
++			   (const_int VSTRING_FLAG_ZS)]
++			  UNSPEC_VEC_VFEE))]
++  "TARGET_VX"
++  "vfeez<bhfgq>s\t%v0,%v1,%v2,2"
++  [(set_attr "op_type" "VRR")])
++
++(define_expand "vfees<mode>"
++  [(parallel
++    [(set (match_operand:VI_HW_QHS                    0 "register_operand" "")
++	  (unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "")
++			     (match_operand:VI_HW_QHS 2 "register_operand" "")
++			     (const_int VSTRING_FLAG_CS)]
++			    UNSPEC_VEC_VFEE))
++   (set (reg:CCRAW CC_REGNUM)
++	(unspec:CCRAW [(match_dup 1)
++		       (match_dup 2)
++		       (const_int VSTRING_FLAG_CS)]
++		      UNSPEC_VEC_VFEECC))])
++   (set (match_operand:SI 3 "memory_operand" "")
++	(unspec:SI [(reg:CCRAW CC_REGNUM)] UNSPEC_CC_TO_INT))]
++  "TARGET_VX")
++
++(define_expand "vfeezs<mode>"
++  [(parallel
++    [(set (match_operand:VI_HW_QHS                    0 "register_operand" "")
++	  (unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "")
++			     (match_operand:VI_HW_QHS 2 "register_operand" "")
++			     (match_dup 4)]
++			    UNSPEC_VEC_VFEE))
++   (set (reg:CCRAW CC_REGNUM)
++	(unspec:CCRAW [(match_dup 1)
++		       (match_dup 2)
++		       (match_dup 4)]
++		      UNSPEC_VEC_VFEECC))])
++   (set (match_operand:SI 3 "memory_operand" "")
++	(unspec:SI [(reg:CCRAW CC_REGNUM)] UNSPEC_CC_TO_INT))]
++  "TARGET_VX"
++{
++  operands[4] = GEN_INT (VSTRING_FLAG_ZS | VSTRING_FLAG_CS);
++})
++
++; Vector find element not equal
++
++; vfeneb, vfeneh, vfenef
++(define_insn "vfene<mode>"
++  [(set (match_operand:VI_HW_QHS                    0 "register_operand" "=v")
++	(unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand"  "v")
++			   (match_operand:VI_HW_QHS 2 "register_operand"  "v")
++			   (const_int 0)]
++			  UNSPEC_VEC_VFENE))]
++  "TARGET_VX"
++  "vfene<bhfgq>\t%v0,%v1,%v2,0"
++  [(set_attr "op_type" "VRR")])
++
++; vec_vfenes can be found in vector.md since it is used for strlen
++
++; vfenezb, vfenezh, vfenezf
++(define_insn "vfenez<mode>"
++  [(set (match_operand:VI_HW_QHS                    0 "register_operand" "")
++	(unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "")
++			   (match_operand:VI_HW_QHS 2 "register_operand" "")
++			   (const_int VSTRING_FLAG_ZS)]
++			  UNSPEC_VEC_VFENE))]
++  "TARGET_VX"
++  "vfenez<bhfgq>\t%v0,%v1,%v2"
++  [(set_attr "op_type" "VRR")])
++
++(define_expand "vfenes<mode>"
++  [(parallel
++    [(set (match_operand:VI_HW_QHS                    0 "register_operand" "")
++	  (unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "")
++			     (match_operand:VI_HW_QHS 2 "register_operand" "")
++			     (const_int VSTRING_FLAG_CS)]
++			    UNSPEC_VEC_VFENE))
++   (set (reg:CCRAW CC_REGNUM)
++	(unspec:CCRAW [(match_dup 1)
++		       (match_dup 2)
++		       (const_int VSTRING_FLAG_CS)]
++		      UNSPEC_VEC_VFENECC))])
++   (set (match_operand:SI 3 "memory_operand" "")
++	(unspec:SI [(reg:CCRAW CC_REGNUM)] UNSPEC_CC_TO_INT))]
++  "TARGET_VX")
++
++(define_expand "vfenezs<mode>"
++  [(parallel
++    [(set (match_operand:VI_HW_QHS                    0 "register_operand" "")
++	  (unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "")
++			     (match_operand:VI_HW_QHS 2 "register_operand" "")
++			     (match_dup 4)]
++			    UNSPEC_VEC_VFENE))
++     (set (reg:CCRAW CC_REGNUM)
++	  (unspec:CCRAW [(match_dup 1)
++			 (match_dup 2)
++			 (match_dup 4)]
++			UNSPEC_VEC_VFENECC))])
++   (set (match_operand:SI 3 "memory_operand" "")
++	(unspec:SI [(reg:CCRAW CC_REGNUM)] UNSPEC_CC_TO_INT))]
++  "TARGET_VX"
++{
++  operands[4] = GEN_INT (VSTRING_FLAG_ZS | VSTRING_FLAG_CS);
++})
++
++; Vector isolate string
++
++; vistrb, vistrh, vistrf
++(define_insn "vistr<mode>"
++  [(set (match_operand:VI_HW_QHS 0 "register_operand" "=v")
++	(unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "v")]
++			  UNSPEC_VEC_VISTR))]
++  "TARGET_VX"
++  "vistr<bhfgq>\t%v0,%v1"
++  [(set_attr "op_type" "VRR")])
++
++; vistrbs, vistrhs, vistrfs
++(define_insn "*vistrs<mode>"
++  [(set (match_operand:VI_HW_QHS 0 "register_operand" "=v")
++	(unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "v")]
++			  UNSPEC_VEC_VISTR))
++   (set (reg:CCRAW CC_REGNUM)
++	(unspec:CCRAW [(match_dup 1)] UNSPEC_VEC_VISTRCC))]
++  "TARGET_VX"
++  "vistr<bhfgq>s\t%v0,%v1"
++  [(set_attr "op_type" "VRR")])
++
++(define_expand "vistrs<mode>"
++  [(parallel
++    [(set (match_operand:VI_HW_QHS                    0 "register_operand" "")
++	  (unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "")]
++			    UNSPEC_VEC_VISTR))
++     (set (reg:CCRAW CC_REGNUM)
++	  (unspec:CCRAW [(match_dup 1)]
++			UNSPEC_VEC_VISTRCC))])
++   (set (match_operand:SI 2 "memory_operand" "")
++	(unspec:SI [(reg:CCRAW CC_REGNUM)] UNSPEC_CC_TO_INT))]
++  "TARGET_VX")
++
++
++; Vector compare range
++
++; vstrcb, vstrch, vstrcf
++; vstrczb, vstrczh, vstrczf
++(define_insn "vstrc<mode>"
++  [(set (match_operand:VI_HW_QHS                    0 "register_operand"  "=v")
++	(unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand"   "v")
++			   (match_operand:VI_HW_QHS 2 "register_operand"   "v")
++			   (match_operand:VI_HW_QHS 3 "register_operand"   "v")
++			   (match_operand:QI        4 "const_mask_operand" "C")]
++			  UNSPEC_VEC_VSTRC))]
++  "TARGET_VX"
++{
++  unsigned HOST_WIDE_INT flags = INTVAL (operands[4]);
++
++  if (flags & VSTRING_FLAG_ZS)
++    {
++      flags &= ~VSTRING_FLAG_ZS;
++      operands[4] = GEN_INT (flags);
++      return "vstrcz<bhfgq>\t%v0,%v1,%v2,%v3,%b4";
++    }
++  return "vstrc<bhfgq>\t%v0,%v1,%v2,%v3,%b4";
++}
++[(set_attr "op_type" "VRR")])
++
++; vstrcbs, vstrchs, vstrcfs
++; vstrczbs, vstrczhs, vstrczfs
++(define_insn "*vstrcs<mode>"
++  [(set (match_operand:VI_HW_QHS                    0 "register_operand"  "=v")
++	(unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand"   "v")
++			   (match_operand:VI_HW_QHS 2 "register_operand"   "v")
++			   (match_operand:VI_HW_QHS 3 "register_operand"   "v")
++			   (match_operand:QI        4 "const_mask_operand" "C")]
++			  UNSPEC_VEC_VSTRC))
++   (set (reg:CCRAW CC_REGNUM)
++	(unspec:CCRAW [(match_dup 1)
++		       (match_dup 2)
++		       (match_dup 3)
++		       (match_dup 4)]
++		      UNSPEC_VEC_VSTRCCC))]
++  "TARGET_VX"
++{
++  unsigned HOST_WIDE_INT flags = INTVAL (operands[4]);
++
++  if (flags & VSTRING_FLAG_ZS)
++    {
++      flags &= ~VSTRING_FLAG_ZS;
++      operands[4] = GEN_INT (flags);
++      return "vstrcz<bhfgq>s\t%v0,%v1,%v2,%v3,%b4";
++    }
++  return "vstrc<bhfgq>s\t%v0,%v1,%v2,%v3,%b4";
++}
++  [(set_attr "op_type" "VRR")])
++
++(define_expand "vstrcz<mode>"
++  [(set (match_operand:VI_HW_QHS 0 "register_operand" "")
++	(unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand"   "")
++			   (match_operand:VI_HW_QHS 2 "register_operand"   "")
++			   (match_operand:VI_HW_QHS 3 "register_operand"   "")
++			   (match_operand:QI        4 "const_mask_operand" "")]
++			  UNSPEC_VEC_VSTRC))]
++  "TARGET_VX"
++{
++  operands[4] = GEN_INT (INTVAL (operands[4]) | VSTRING_FLAG_ZS);
++})
++
++(define_expand "vstrcs<mode>"
++  [(parallel
++    [(set (match_operand:VI_HW_QHS 0 "register_operand" "")
++	(unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "")
++			   (match_operand:VI_HW_QHS 2 "register_operand" "")
++			   (match_operand:VI_HW_QHS 3 "register_operand" "")
++			   (match_operand:QI        4 "const_mask_operand" "")]
++			  UNSPEC_VEC_VSTRC))
++   (set (reg:CCRAW CC_REGNUM)
++	(unspec:CCRAW [(match_dup 1)
++		       (match_dup 2)
++		       (match_dup 3)
++		       (match_dup 4)]
++		      UNSPEC_VEC_VSTRCCC))])
++   (set (match_operand:SI 5 "memory_operand" "")
++	(unspec:SI [(reg:CCRAW CC_REGNUM)] UNSPEC_CC_TO_INT))]
++  "TARGET_VX"
++{
++  operands[4] = GEN_INT (INTVAL (operands[4]) | VSTRING_FLAG_CS);
++})
++
++(define_expand "vstrczs<mode>"
++  [(parallel
++    [(set (match_operand:VI_HW_QHS 0 "register_operand" "")
++	(unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "")
++			   (match_operand:VI_HW_QHS 2 "register_operand" "")
++			   (match_operand:VI_HW_QHS 3 "register_operand" "")
++			   (match_operand:QI        4 "const_mask_operand" "")]
++			  UNSPEC_VEC_VSTRC))
++   (set (reg:CCRAW CC_REGNUM)
++	(unspec:CCRAW [(match_dup 1)
++		       (match_dup 2)
++		       (match_dup 3)
++		       (match_dup 4)]
++		      UNSPEC_VEC_VSTRCCC))])
++   (set (match_operand:SI 5 "memory_operand" "")
++	(unspec:SI [(reg:CCRAW CC_REGNUM)] UNSPEC_CC_TO_INT))]
++  "TARGET_VX"
++{
++  operands[4] = GEN_INT (INTVAL (operands[4]) | VSTRING_FLAG_CS | VSTRING_FLAG_ZS);
++})
++
++
++; Signed V2DI -> V2DF conversion - inexact exception disabled
++(define_insn "vec_di_to_df_s64"
++  [(set (match_operand:V2DF 0 "register_operand"                "=v")
++	(unspec:V2DF [(match_operand:V2DI 1 "register_operand"   "v")
++		      (match_operand:QI   2 "const_mask_operand" "C")]
++		     UNSPEC_VEC_VCDGB))]
++  "TARGET_VX && UINTVAL (operands[2]) != 2 && UINTVAL (operands[2]) <= 7"
++  "vcdgb\t%v0,%v1,4,%b2"
++  [(set_attr "op_type" "VRR")])
++
++; The result needs to be multiplied with 2**-op2
++(define_expand "vec_ctd_s64"
++  [(set (match_operand:V2DF               0 "register_operand" "")
++	(unspec:V2DF [(match_operand:V2DI 1 "register_operand" "")
++		      (const_int 0)] ; According to current BFP rounding mode
++		     UNSPEC_VEC_VCDGB))
++   (use (match_operand:QI 2 "const_int_operand" ""))
++   (set (match_dup 0) (mult:V2DF (match_dup 0) (match_dup 3)))]
++  "TARGET_VX"
++{
++  REAL_VALUE_TYPE f;
++  rtx c;
++
++  real_2expN (&f, -INTVAL (operands[2]), DFmode);
++  c = CONST_DOUBLE_FROM_REAL_VALUE (f, DFmode);
++
++  operands[3] = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, c, c));
++  operands[3] = force_reg (V2DFmode, operands[3]);
++})
++
++; Unsigned V2DI -> V2DF conversion - inexact exception disabled
++(define_insn "vec_di_to_df_u64"
++  [(set (match_operand:V2DF 0 "register_operand"               "=v")
++	(unspec:V2DF [(match_operand:V2DI 1 "register_operand"  "v")
++		      (match_operand:QI   2 "const_int_operand" "C")]
++		     UNSPEC_VEC_VCDLGB))]
++  "TARGET_VX"
++  "vcdlgb\t%v0,%v1,4,%b2"
++  [(set_attr "op_type" "VRR")])
++
++; The result needs to be multiplied with 2**-op2
++(define_expand "vec_ctd_u64"
++  [(set (match_operand:V2DF               0 "register_operand" "")
++	(unspec:V2DF [(match_operand:V2DI 1 "register_operand" "")
++		      (const_int 0)] ; According to current BFP rounding mode
++		     UNSPEC_VEC_VCDLGB))
++   (use (match_operand:QI 2 "const_int_operand" ""))
++   (set (match_dup 0) (mult:V2DF (match_dup 0) (match_dup 3)))]
++  "TARGET_VX"
++{
++  REAL_VALUE_TYPE f;
++  rtx c;
++
++  real_2expN (&f, -INTVAL (operands[2]), DFmode);
++  c = CONST_DOUBLE_FROM_REAL_VALUE (f, DFmode);
++
++  operands[3] = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, c, c));
++  operands[3] = force_reg (V2DFmode, operands[3]);
++})
++
++
++; Signed V2DF -> V2DI conversion - inexact exception disabled
++(define_insn "vec_df_to_di_s64"
++  [(set (match_operand:V2DI 0 "register_operand"               "=v")
++	(unspec:V2DI [(match_operand:V2DF 1 "register_operand"  "v")
++		      (match_operand:QI   2 "const_int_operand" "C")]
++		     UNSPEC_VEC_VCGDB))]
++  "TARGET_VX"
++  "vcgdb\t%v0,%v1,4,%b2"
++  [(set_attr "op_type" "VRR")])
++
++; The input needs to be multiplied with 2**op2
++(define_expand "vec_ctsl"
++  [(use (match_operand:QI 2 "const_int_operand" ""))
++   (set (match_dup 4) (mult:V2DF (match_operand:V2DF 1 "register_operand" "")
++				 (match_dup 3)))
++   (set (match_operand:V2DI 0 "register_operand" "")
++	(unspec:V2DI [(match_dup 4) (const_int 0)] ; According to current BFP rounding mode
++		     UNSPEC_VEC_VCGDB))]
++  "TARGET_VX"
++{
++  REAL_VALUE_TYPE f;
++  rtx c;
++
++  real_2expN (&f, INTVAL (operands[2]), DFmode);
++  c = CONST_DOUBLE_FROM_REAL_VALUE (f, DFmode);
++
++  operands[3] = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, c, c));
++  operands[3] = force_reg (V2DFmode, operands[3]);
++  operands[4] = gen_reg_rtx (V2DFmode);
++})
++
++; Unsigned V2DF -> V2DI conversion - inexact exception disabled
++(define_insn "vec_df_to_di_u64"
++  [(set (match_operand:V2DI 0 "register_operand"               "=v")
++	(unspec:V2DI [(match_operand:V2DF 1 "register_operand"  "v")
++		      (match_operand:QI   2 "const_mask_operand" "C")]
++		     UNSPEC_VEC_VCLGDB))]
++  "TARGET_VX && UINTVAL (operands[2]) <= 7"
++  "vclgdb\t%v0,%v1,4,%b2"
++  [(set_attr "op_type" "VRR")])
++
++; The input needs to be multiplied with 2**op2
++(define_expand "vec_ctul"
++  [(use (match_operand:QI 2 "const_int_operand" ""))
++   (set (match_dup 4) (mult:V2DF (match_operand:V2DF 1 "register_operand" "")
++				 (match_dup 3)))
++   (set (match_operand:V2DI 0 "register_operand" "")
++	(unspec:V2DI [(match_dup 4) (const_int 0)] ; According to current BFP rounding mode
++		     UNSPEC_VEC_VCLGDB))]
++  "TARGET_VX"
++{
++  REAL_VALUE_TYPE f;
++  rtx c;
++
++  real_2expN (&f, INTVAL (operands[2]), DFmode);
++  c = CONST_DOUBLE_FROM_REAL_VALUE (f, DFmode);
++
++  operands[3] = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, c, c));
++  operands[3] = force_reg (V2DFmode, operands[3]);
++  operands[4] = gen_reg_rtx (V2DFmode);
++})
++
++; Vector load fp integer - IEEE inexact exception is suppressed
++(define_insn "vfidb"
++  [(set (match_operand:V2DI               0 "register_operand"  "=v")
++	(unspec:V2DI [(match_operand:V2DF 1 "register_operand"   "v")
++		      (match_operand:QI   2 "const_mask_operand" "C")
++		      (match_operand:QI   3 "const_mask_operand" "C")]
++		     UNSPEC_VEC_VFIDB))]
++  "TARGET_VX && !(UINTVAL (operands[2]) & 3) && UINTVAL (operands[3]) <= 7"
++  "vfidb\t%v0,%v1,%b2,%b3"
++  [(set_attr "op_type" "VRR")])
++
++(define_expand "vec_ceil"
++  [(set (match_operand:V2DI               0 "register_operand" "")
++	(unspec:V2DI [(match_operand:V2DF 1 "register_operand" "")
++		      (const_int VEC_RND_TO_INF)]
++		     UNSPEC_VEC_VFIDB))]
++  "TARGET_VX")
++
++(define_expand "vec_floor"
++  [(set (match_operand:V2DI               0 "register_operand" "")
++	(unspec:V2DI [(match_operand:V2DF 1 "register_operand" "")
++		      (const_int VEC_RND_TO_MINF)]
++		     UNSPEC_VEC_VFIDB))]
++  "TARGET_VX")
++
++(define_expand "vec_trunc"
++  [(set (match_operand:V2DI               0 "register_operand" "")
++	(unspec:V2DI [(match_operand:V2DF 1 "register_operand" "")
++		      (const_int VEC_RND_TO_ZERO)]
++		     UNSPEC_VEC_VFIDB))]
++  "TARGET_VX")
++
++(define_expand "vec_roundc"
++  [(set (match_operand:V2DI               0 "register_operand" "")
++	(unspec:V2DI [(match_operand:V2DF 1 "register_operand" "")
++		      (const_int VEC_RND_CURRENT)]
++		     UNSPEC_VEC_VFIDB))]
++  "TARGET_VX")
++
++(define_expand "vec_round"
++  [(set (match_operand:V2DI               0 "register_operand" "")
++	(unspec:V2DI [(match_operand:V2DF 1 "register_operand" "")
++		      (const_int VEC_RND_NEAREST_TO_EVEN)]
++		     UNSPEC_VEC_VFIDB))]
++  "TARGET_VX")
++
++
++; Vector load lengthened - V4SF -> V2DF
++
++(define_insn "*vldeb"
++  [(set (match_operand:V2DF 0 "register_operand"               "=v")
++	(unspec:V2DF [(match_operand:V4SF 1 "register_operand"  "v")]
++		     UNSPEC_VEC_VLDEB))]
++  "TARGET_VX"
++  "vldeb\t%v0,%v1"
++  [(set_attr "op_type" "VRR")])
++
++(define_expand "vec_ld2f"
++  [; Initialize a vector to all zeroes.  FIXME: This should not be
++   ; necessary since all elements of the vector will be set anyway.
++   ; This is just to make it explicit to the data flow framework.
++   (set (match_dup 2) (match_dup 3))
++   (set (match_dup 2) (unspec:V4SF [(match_operand:SF 1 "memory_operand" "")
++				    (const_int 0)
++				    (match_dup 2)]
++				    UNSPEC_VEC_SET))
++   (set (match_dup 2) (unspec:V4SF [(match_dup 4)
++				    (const_int 2)
++				    (match_dup 2)]
++				    UNSPEC_VEC_SET))
++   (set (match_operand:V2DF 0 "register_operand" "")
++	(unspec:V2DF [(match_dup 2)] UNSPEC_VEC_VLDEB))]
++  "TARGET_VX"
++{
++  operands[2] = gen_reg_rtx (V4SFmode);
++  operands[3] = CONST0_RTX (V4SFmode);
++  operands[4] = adjust_address (operands[1], SFmode, 4);
++})
++
++
++; Vector load rounded - V2DF -> V4SF
++
++(define_insn "*vledb"
++  [(set (match_operand:V4SF 0 "register_operand"               "=v")
++	(unspec:V4SF [(match_operand:V2DF 1 "register_operand"  "v")]
++		     UNSPEC_VEC_VLEDB))]
++  "TARGET_VX"
++  "vledb\t%v0,%v1,0,0"
++  [(set_attr "op_type" "VRR")])
++
++(define_expand "vec_st2f"
++  [(set (match_dup 2)
++	(unspec:V4SF [(match_operand:V2DF 0 "register_operand" "")]
++		     UNSPEC_VEC_VLEDB))
++   (set (match_operand:SF 1 "memory_operand" "")
++	(unspec:SF [(match_dup 2) (const_int 0)] UNSPEC_VEC_EXTRACT))
++   (set (match_dup 3)
++	(unspec:SF [(match_dup 2) (const_int 2)] UNSPEC_VEC_EXTRACT))]
++  "TARGET_VX"
++{
++  operands[2] = gen_reg_rtx (V4SFmode);
++  operands[3] = adjust_address (operands[1], SFmode, 4);
++})
++
++
++; Vector load negated fp
++
++(define_expand "vec_nabs"
++  [(set (match_operand:V2DF 0 "register_operand" "")
++	(neg:V2DF (abs:V2DF (match_operand:V2DF 1 "register_operand" ""))))]
++  "TARGET_VX")
++
++; Vector square root fp vec_sqrt -> sqrt rtx standard name
++
++; Vector FP test data class immediate
++
++(define_insn "*vftcidb"
++  [(set (match_operand:V2DF 0 "register_operand"  "=v")
++	(unspec:V2DF [(match_operand:V2DF 1 "register_operand"  "v")
++		      (match_operand:HI   2 "const_int_operand" "J")]
++		     UNSPEC_VEC_VFTCIDB))
++   (set (reg:CCRAW CC_REGNUM)
++	(unspec:CCRAW [(match_dup 1) (match_dup 2)] UNSPEC_VEC_VFTCIDBCC))]
++  "TARGET_VX && CONST_OK_FOR_CONSTRAINT_P (INTVAL (operands[2]), 'J', \"J\")"
++  "vftcidb\t%v0,%v1,%x2"
++  [(set_attr "op_type" "VRR")])
++
++(define_insn "*vftcidb_cconly"
++  [(set (reg:CCRAW CC_REGNUM)
++	(unspec:CCRAW [(match_operand:V2DF 1 "register_operand"  "v")
++		       (match_operand:HI   2 "const_int_operand" "J")]
++		      UNSPEC_VEC_VFTCIDBCC))
++   (clobber (match_scratch:V2DI 0 "=v"))]
++  "TARGET_VX && CONST_OK_FOR_CONSTRAINT_P (INTVAL (operands[2]), 'J', \"J\")"
++  "vftcidb\t%v0,%v1,%x2"
++  [(set_attr "op_type" "VRR")])
++
++(define_expand "vftcidb"
++  [(parallel
++    [(set (match_operand:V2DF               0 "register_operand"  "")
++	  (unspec:V2DF [(match_operand:V2DF 1 "register_operand"  "")
++			(match_operand:HI   2 "const_int_operand" "")]
++		       UNSPEC_VEC_VFTCIDB))
++     (set (reg:CCRAW CC_REGNUM)
++	  (unspec:CCRAW [(match_dup 1) (match_dup 2)] UNSPEC_VEC_VFTCIDBCC))])
++   (set (match_operand:SI 3 "memory_operand" "")
++	(unspec:SI [(reg:CCRAW CC_REGNUM)] UNSPEC_CC_TO_INT))]
++  "TARGET_VX && CONST_OK_FOR_CONSTRAINT_P (INTVAL (operands[2]), 'J', \"J\")")
++
++;;
++;; Integer compares
++;;
++
++; All comparisons which produce a CC need fully populated (VI_HW)
++; vector arguments.  Otherwise the any/all CCs would be just bogus.
++
++(define_insn "*vec_cmp<VICMP:insn_cmp><VI_HW:mode>_cconly"
++  [(set (reg:VICMP CC_REGNUM)
++	(compare:VICMP (match_operand:VI_HW 0 "register_operand" "v")
++		       (match_operand:VI_HW 1 "register_operand" "v")))
++   (clobber (match_scratch:VI_HW 2 "=v"))]
++  "TARGET_VX"
++  "vc<VICMP:insn_cmp><VI_HW:bhfgq>s\t%v2,%v0,%v1"
++  [(set_attr "op_type" "VRR")])
++
++; FIXME: The following 2x3 definitions should be merged into 2 with
++; VICMP like above but I could not find a way to set the comparison
++; operator (eq) depending on the mode CCVEQ (mode_iterator). Or the
++; other way around - setting the mode depending on the code
++; (code_iterator).
++(define_expand "vec_cmpeq<VI_HW:mode>_cc"
++  [(parallel
++    [(set (reg:CCVEQ CC_REGNUM)
++	(compare:CCVEQ (match_operand:VI_HW 1 "register_operand" "v")
++		       (match_operand:VI_HW 2 "register_operand" "v")))
++     (set (match_operand:VI_HW 0 "register_operand" "=v")
++	  (eq:VI_HW (match_dup 1) (match_dup 2)))])
++   (set (match_operand:SI 3 "memory_operand" "")
++	(unspec:SI [(reg:CCVEQ CC_REGNUM)] UNSPEC_CC_TO_INT))]
++  "TARGET_VX")
++
++(define_expand "vec_cmph<VI_HW:mode>_cc"
++  [(parallel
++    [(set (reg:CCVH CC_REGNUM)
++	  (compare:CCVH (match_operand:VI_HW 1 "register_operand" "v")
++			(match_operand:VI_HW 2 "register_operand" "v")))
++     (set (match_operand:VI_HW 0 "register_operand" "=v")
++	  (gt:VI_HW (match_dup 1) (match_dup 2)))])
++   (set (match_operand:SI 3 "memory_operand" "")
++	(unspec:SI [(reg:CCVH CC_REGNUM)] UNSPEC_CC_TO_INT))]
++  "TARGET_VX")
++
++(define_expand "vec_cmphl<VI_HW:mode>_cc"
++  [(parallel
++    [(set (reg:CCVHU CC_REGNUM)
++	  (compare:CCVHU (match_operand:VI_HW 1 "register_operand" "v")
++			 (match_operand:VI_HW 2 "register_operand" "v")))
++     (set (match_operand:VI_HW 0 "register_operand" "=v")
++	  (gtu:VI_HW (match_dup 1) (match_dup 2)))])
++   (set (match_operand:SI 3 "memory_operand" "")
++	(unspec:SI [(reg:CCVHU CC_REGNUM)] UNSPEC_CC_TO_INT))]
++  "TARGET_VX")
++
++
++(define_insn "*vec_cmpeq<VI_HW:mode>_cc"
++  [(set (reg:CCVEQ CC_REGNUM)
++	(compare:CCVEQ (match_operand:VI_HW 0 "register_operand"  "v")
++		       (match_operand:VI_HW 1 "register_operand"  "v")))
++   (set (match_operand:VI_HW                2 "register_operand" "=v")
++	(eq:VI_HW (match_dup 0) (match_dup 1)))]
++  "TARGET_VX"
++  "vceq<VI_HW:bhfgq>s\t%v2,%v0,%v1"
++  [(set_attr "op_type" "VRR")])
++
++(define_insn "*vec_cmph<VI_HW:mode>_cc"
++  [(set (reg:CCVH CC_REGNUM)
++	(compare:CCVH (match_operand:VI_HW 0 "register_operand"  "v")
++		      (match_operand:VI_HW 1 "register_operand"  "v")))
++   (set (match_operand:VI_HW               2 "register_operand" "=v")
++	(gt:VI_HW (match_dup 0) (match_dup 1)))]
++  "TARGET_VX"
++  "vch<VI_HW:bhfgq>s\t%v2,%v0,%v1"
++  [(set_attr "op_type" "VRR")])
++
++(define_insn "*vec_cmphl<VI_HW:mode>_cc"
++  [(set (reg:CCVHU CC_REGNUM)
++	(compare:CCVHU (match_operand:VI_HW 0 "register_operand"  "v")
++		       (match_operand:VI_HW 1 "register_operand"  "v")))
++   (set (match_operand:VI_HW                2 "register_operand" "=v")
++	(gtu:VI_HW (match_dup 0) (match_dup 1)))]
++  "TARGET_VX"
++  "vchl<VI_HW:bhfgq>s\t%v2,%v0,%v1"
++  [(set_attr "op_type" "VRR")])
++
++;;
++;; Floating point comparesg
++;;
++
++(define_insn "*vec_cmp<insn_cmp>v2df_cconly"
++  [(set (reg:VFCMP CC_REGNUM)
++	(compare:VFCMP (match_operand:V2DF 0 "register_operand" "v")
++		       (match_operand:V2DF 1 "register_operand" "v")))
++   (clobber (match_scratch:V2DI 2 "=v"))]
++  "TARGET_VX"
++  "vfc<asm_fcmp>dbs\t%v2,%v0,%v1"
++  [(set_attr "op_type" "VRR")])
++
++; FIXME: Merge the following 2x3 patterns with VFCMP
++(define_expand "vec_cmpeqv2df_cc"
++  [(parallel
++    [(set (reg:CCVEQ CC_REGNUM)
++	  (compare:CCVEQ (match_operand:V2DF 1 "register_operand"  "v")
++			 (match_operand:V2DF 2 "register_operand"  "v")))
++     (set (match_operand:V2DI 0 "register_operand" "=v")
++	  (eq:V2DI (match_dup 1) (match_dup 2)))])
++   (set (match_operand:SI 3 "memory_operand" "")
++	(unspec:SI [(reg:CCVEQ CC_REGNUM)] UNSPEC_CC_TO_INT))]
++  "TARGET_VX")
++
++(define_expand "vec_cmphv2df_cc"
++  [(parallel
++    [(set (reg:CCVH CC_REGNUM)
++	  (compare:CCVH (match_operand:V2DF 1 "register_operand"  "v")
++			(match_operand:V2DF 2 "register_operand"  "v")))
++     (set (match_operand:V2DI 0 "register_operand" "=v")
++	  (gt:V2DI (match_dup 1) (match_dup 2)))])
++   (set (match_operand:SI 3 "memory_operand" "")
++	(unspec:SI [(reg:CCVH CC_REGNUM)] UNSPEC_CC_TO_INT))]
++  "TARGET_VX")
++
++(define_expand "vec_cmphev2df_cc"
++  [(parallel
++    [(set (reg:CCVFHE CC_REGNUM)
++	  (compare:CCVFHE (match_operand:V2DF 1 "register_operand"  "v")
++			  (match_operand:V2DF 2 "register_operand"  "v")))
++     (set (match_operand:V2DI 0 "register_operand" "=v")
++	  (ge:V2DI (match_dup 1) (match_dup 2)))])
++   (set (match_operand:SI 3 "memory_operand" "")
++	(unspec:SI [(reg:CCVFHE CC_REGNUM)] UNSPEC_CC_TO_INT))]
++  "TARGET_VX")
++
++
++(define_insn "*vec_cmpeqv2df_cc"
++  [(set (reg:CCVEQ CC_REGNUM)
++	(compare:CCVEQ (match_operand:V2DF 0 "register_operand"  "v")
++		       (match_operand:V2DF 1 "register_operand"  "v")))
++   (set (match_operand:V2DI                2 "register_operand" "=v")
++	(eq:V2DI (match_dup 0) (match_dup 1)))]
++  "TARGET_VX"
++  "vfcedbs\t%v2,%v0,%v1"
++  [(set_attr "op_type" "VRR")])
++
++(define_insn "*vec_cmphv2df_cc"
++  [(set (reg:CCVH CC_REGNUM)
++	(compare:CCVH (match_operand:V2DF 0 "register_operand"  "v")
++		      (match_operand:V2DF 1 "register_operand"  "v")))
++   (set (match_operand:V2DI               2 "register_operand" "=v")
++	(gt:V2DI (match_dup 0) (match_dup 1)))]
++  "TARGET_VX"
++  "vfchdbs\t%v2,%v0,%v1"
++  [(set_attr "op_type" "VRR")])
++
++(define_insn "*vec_cmphev2df_cc"
++  [(set (reg:CCVFHE CC_REGNUM)
++	(compare:CCVFHE (match_operand:V2DF 0 "register_operand"  "v")
++			(match_operand:V2DF 1 "register_operand"  "v")))
++   (set (match_operand:V2DI                 2 "register_operand" "=v")
++	(ge:V2DI (match_dup 0) (match_dup 1)))]
++  "TARGET_VX"
++  "vfchedbs\t%v2,%v0,%v1"
++  [(set_attr "op_type" "VRR")])
+--- gcc/config.gcc	2016-05-11 14:46:08.298981685 +0200
++++ gcc/config.gcc	2016-05-11 17:17:32.000000000 +0200
+@@ -452,7 +452,7 @@ s390*-*-*)
+ 	cpu_type=s390
+ 	need_64bit_hwint=yes
+ 	extra_options="${extra_options} fused-madd.opt"
+-	extra_headers="s390intrin.h htmintrin.h htmxlintrin.h"
++	extra_headers="s390intrin.h htmintrin.h htmxlintrin.h vecintrin.h"
+ 	;;
+ # Note the 'l'; we need to be able to match e.g. "shle" or "shl".
+ sh[123456789lbe]*-*-* | sh-*-*)
+@@ -2249,27 +2249,35 @@ rx-*-elf*)
+ s390-*-linux*)
+ 	default_gnu_indirect_function=yes
+ 	tm_file="s390/s390.h dbxelf.h elfos.h gnu-user.h linux.h glibc-stdint.h s390/linux.h"
++	c_target_objs="${c_target_objs} s390-c.o"
++	cxx_target_objs="${cxx_target_objs} s390-c.o"
+ 	if test x$enable_targets = xall; then
+ 		tmake_file="${tmake_file} s390/t-linux64"
+ 	fi
++	tmake_file="${tmake_file} s390/t-s390"
+ 	;;
+ s390x-*-linux*)
+ 	default_gnu_indirect_function=yes
+ 	tm_file="s390/s390x.h s390/s390.h dbxelf.h elfos.h gnu-user.h linux.h glibc-stdint.h s390/linux.h"
+ 	tm_p_file=s390/s390-protos.h
++	c_target_objs="${c_target_objs} s390-c.o"
++	cxx_target_objs="${cxx_target_objs} s390-c.o"
+ 	md_file=s390/s390.md
+ 	extra_modes=s390/s390-modes.def
+ 	out_file=s390/s390.c
+-	tmake_file="${tmake_file} s390/t-linux64"
++	tmake_file="${tmake_file} s390/t-linux64 s390/t-s390"
+ 	;;
+ s390x-ibm-tpf*)
+-        tm_file="s390/s390x.h s390/s390.h dbxelf.h elfos.h s390/tpf.h"
+-        tm_p_file=s390/s390-protos.h
+-        md_file=s390/s390.md
+-        extra_modes=s390/s390-modes.def
+-        out_file=s390/s390.c
+-        thread_file='tpf'
++	tm_file="s390/s390x.h s390/s390.h dbxelf.h elfos.h s390/tpf.h"
++	tm_p_file=s390/s390-protos.h
++	c_target_objs="${c_target_objs} s390-c.o"
++	cxx_target_objs="${cxx_target_objs} s390-c.o"
++	md_file=s390/s390.md
++	extra_modes=s390/s390-modes.def
++	out_file=s390/s390.c
++	thread_file='tpf'
+ 	extra_options="${extra_options} s390/tpf.opt"
++	tmake_file="${tmake_file} s390/t-s390"
+ 	;;
+ score-*-elf)
+ 	gas=yes
+@@ -3603,7 +3611,7 @@ case "${target}" in
+ 		for which in arch tune; do
+ 			eval "val=\$with_$which"
+ 			case ${val} in
+-			"" | g5 | g6 | z900 | z990 | z9-109 | z9-ec | z10 | z196 | zEC12)
++			"" | g5 | g6 | z900 | z990 | z9-109 | z9-ec | z10 | z196 | zEC12 | z13)
+ 				# OK
+ 				;;
+ 			*)
+--- gcc/configure	2016-05-11 14:46:08.719976035 +0200
++++ gcc/configure	2016-05-11 19:41:14.975813805 +0200
+@@ -26000,6 +26000,42 @@ $as_echo "#define HAVE_LD_PERSONALITY_RE
+ 
+     fi
+     ;;
++  s390*-*-*)
++    { $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for .gnu_attribute support" >&5
++$as_echo_n "checking assembler for .gnu_attribute support... " >&6; }
++if test "${gcc_cv_as_s390_gnu_attribute+set}" = set; then :
++  $as_echo_n "(cached) " >&6
++else
++  gcc_cv_as_s390_gnu_attribute=no
++    if test $in_tree_gas = yes; then
++    if test $gcc_cv_gas_vers -ge `expr \( \( 2 \* 1000 \) + 18 \) \* 1000 + 0`
++  then gcc_cv_as_s390_gnu_attribute=yes
++fi
++  elif test x$gcc_cv_as != x; then
++    $as_echo '.gnu_attribute 8,1' > conftest.s
++    if { ac_try='$gcc_cv_as $gcc_cv_as_flags  -o conftest.o conftest.s >&5'
++  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
++  (eval $ac_try) 2>&5
++  ac_status=$?
++  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
++  test $ac_status = 0; }; }
++    then
++	gcc_cv_as_s390_gnu_attribute=yes
++    else
++      echo "configure: failed program was" >&5
++      cat conftest.s >&5
++    fi
++    rm -f conftest.o conftest.s
++  fi
++fi
++{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gcc_cv_as_s390_gnu_attribute" >&5
++$as_echo "$gcc_cv_as_s390_gnu_attribute" >&6; }
++if test $gcc_cv_as_s390_gnu_attribute = yes; then
++
++$as_echo "#define HAVE_AS_GNU_ATTRIBUTE 1" >>confdefs.h
++
++fi
++    ;;
+ esac
+ 
+ # Mips and HP-UX need the GNU assembler.
+--- gcc/configure.ac	2015-06-18 16:32:50.000000000 +0200
++++ gcc/configure.ac	2016-05-11 19:34:04.507631160 +0200
+@@ -4207,6 +4207,13 @@ EOF
+ pointers into PC-relative form.])
+     fi
+     ;;
++  s390*-*-*)
++    gcc_GAS_CHECK_FEATURE([.gnu_attribute support],
++      gcc_cv_as_s390_gnu_attribute, [2,18,0],,
++      [.gnu_attribute 8,1],,
++      [AC_DEFINE(HAVE_AS_GNU_ATTRIBUTE, 1,
++	  [Define if your assembler supports .gnu_attribute.])])
++    ;;
+ esac
+ 
+ # Mips and HP-UX need the GNU assembler.
+--- gcc/doc/invoke.texi	2016-05-11 14:46:08.615977431 +0200
++++ gcc/doc/invoke.texi	2016-05-11 19:27:23.065121001 +0200
+@@ -885,6 +885,7 @@ See RS/6000 and PowerPC Options.
+ -mbackchain  -mno-backchain -mpacked-stack  -mno-packed-stack @gol
+ -msmall-exec  -mno-small-exec  -mmvcle -mno-mvcle @gol
+ -m64  -m31  -mdebug  -mno-debug  -mesa  -mzarch @gol
++-mhtm -mvx -mzvector @gol
+ -mtpf-trace -mno-tpf-trace  -mfused-madd  -mno-fused-madd @gol
+ -mwarn-framesize  -mwarn-dynamicstack  -mstack-size -mstack-guard @gol
+ -mhotpatch=@var{halfwords},@var{halfwords}}
+@@ -18596,6 +18597,46 @@ When generating code compliant to the GN
+ the default is @option{-mesa}.  When generating code compliant
+ to the GNU/Linux for zSeries ABI, the default is @option{-mzarch}.
+ 
++@item -mhtm
++@itemx -mno-htm
++@opindex mhtm
++@opindex mno-htm
++The @option{-mhtm} option enables a set of builtins making use of
++instructions available with the transactional execution facility
++introduced with the IBM zEnterprise EC12 machine generation
++@ref{S/390 System z Built-in Functions}.
++@option{-mhtm} is enabled by default when using @option{-march=zEC12}.
++
++@item -mvx
++@itemx -mno-vx
++@opindex mvx
++@opindex mno-vx
++When @option{-mvx} is specified, generate code using the instructions
++available with the vector extension facility introduced with the IBM
++z13 machine generation.
++This option changes the ABI for some vector type values with regard to
++alignment and calling conventions.  In case vector type values are
++being used in an ABI-relevant context a GAS @samp{.gnu_attribute}
++command will be added to mark the resulting binary with the ABI used.
++@option{-mvx} is enabled by default when using @option{-march=z13}.
++
++@item -mzvector
++@itemx -mno-zvector
++@opindex mzvector
++@opindex mno-zvector
++The @option{-mzvector} option enables vector language extensions and
++builtins using instructions available with the vector extension
++facility introduced with the IBM z13 machine generation.
++This option adds support for @samp{vector} to be used as a keyword to
++define vector type variables and arguments.  @samp{vector} is only
++available when GNU extensions are enabled.  It will not be expanded
++when requesting strict standard compliance e.g. with @option{-std=c99}.
++In addition to the GCC low-level builtins @option{-mzvector} enables
++a set of builtins added for compatibility with Altivec-style
++implementations like Power and Cell.  In order to make use of these
++builtins the header file @file{vecintrin.h} needs to be included.
++@option{-mzvector} is disabled by default.
++
+ @item -mmvcle
+ @itemx -mno-mvcle
+ @opindex mmvcle
+@@ -18617,7 +18658,8 @@ The default is to not print debug inform
+ Generate code that runs on @var{cpu-type}, which is the name of a system
+ representing a certain processor type.  Possible values for
+ @var{cpu-type} are @samp{g5}, @samp{g6}, @samp{z900}, @samp{z990},
+-@samp{z9-109}, @samp{z9-ec}, @samp{z10}, @samp{z196}, and @samp{zEC12}.
++@samp{z9-109}, @samp{z9-ec}, @samp{z10}, @samp{z196}, @samp{zEC12},
++and @samp{z13}.
+ When generating code using the instructions available on z/Architecture,
+ the default is @option{-march=z900}.  Otherwise, the default is
+ @option{-march=g5}.
+--- gcc/doc/tm.texi	2016-05-11 14:46:08.216982786 +0200
++++ gcc/doc/tm.texi	2016-05-11 15:41:36.000000000 +0200
+@@ -8983,6 +8983,13 @@ register in Dwarf.  Otherwise, this hook
+ If not defined, the default is to return @code{NULL_RTX}.
+ @end deftypefn
+ 
++@deftypefn {Target Hook} {enum machine_mode} TARGET_DWARF_FRAME_REG_MODE (int @var{regno})
++Given a register, this hook should return the mode which the
++corresponding Dwarf frame register should have.  This is normally
++used to return a smaller mode than the raw mode to prevent call
++clobbered parts of a register altering the frame register size
++@end deftypefn
++
+ @deftypefn {Target Hook} void TARGET_INIT_DWARF_REG_SIZES_EXTRA (tree @var{address})
+ If some registers are represented in Dwarf-2 unwind information in
+ multiple pieces, define this hook to fill in information about the
+--- gcc/doc/tm.texi.in	2016-05-11 14:46:08.213982826 +0200
++++ gcc/doc/tm.texi.in	2016-05-11 15:41:36.000000000 +0200
+@@ -8854,6 +8854,8 @@ register in Dwarf.  Otherwise, this hook
+ If not defined, the default is to return @code{NULL_RTX}.
+ @end deftypefn
+ 
++@hook TARGET_DWARF_FRAME_REG_MODE
++
+ @hook TARGET_INIT_DWARF_REG_SIZES_EXTRA
+ If some registers are represented in Dwarf-2 unwind information in
+ multiple pieces, define this hook to fill in information about the
+--- gcc/dwarf2cfi.c	2013-01-21 16:10:46.000000000 +0100
++++ gcc/dwarf2cfi.c	2016-05-11 15:41:36.000000000 +0200
+@@ -244,11 +244,9 @@ expand_builtin_init_dwarf_reg_sizes (tre
+       if (rnum < DWARF_FRAME_REGISTERS)
+ 	{
+ 	  HOST_WIDE_INT offset = rnum * GET_MODE_SIZE (mode);
+-	  enum machine_mode save_mode = reg_raw_mode[i];
+ 	  HOST_WIDE_INT size;
++	  enum machine_mode save_mode = targetm.dwarf_frame_reg_mode (i);
+ 
+-	  if (HARD_REGNO_CALL_PART_CLOBBERED (i, save_mode))
+-	    save_mode = choose_hard_reg_mode (i, 1, true);
+ 	  if (dnum == DWARF_FRAME_RETURN_COLUMN)
+ 	    {
+ 	      if (save_mode == VOIDmode)
+--- gcc/genattrtab.c	2013-01-21 16:08:23.000000000 +0100
++++ gcc/genattrtab.c	2016-05-11 17:32:29.000000000 +0200
+@@ -229,7 +229,7 @@ static int *insn_n_alternatives;
+ /* Stores, for each insn code, a bitmap that has bits on for each possible
+    alternative.  */
+ 
+-static int *insn_alternatives;
++static uint64_t *insn_alternatives;
+ 
+ /* Used to simplify expressions.  */
+ 
+@@ -257,7 +257,7 @@ static char *attr_printf           (unsi
+   ATTRIBUTE_PRINTF_2;
+ static rtx make_numeric_value      (int);
+ static struct attr_desc *find_attr (const char **, int);
+-static rtx mk_attr_alt             (int);
++static rtx mk_attr_alt             (uint64_t);
+ static char *next_comma_elt	   (const char **);
+ static rtx insert_right_side	   (enum rtx_code, rtx, rtx, int, int);
+ static rtx copy_boolean		   (rtx);
+@@ -771,7 +771,7 @@ check_attr_test (rtx exp, int is_const,
+ 	  if (attr == NULL)
+ 	    {
+ 	      if (! strcmp (XSTR (exp, 0), "alternative"))
+-		return mk_attr_alt (1 << atoi (XSTR (exp, 1)));
++		return mk_attr_alt (((uint64_t) 1) << atoi (XSTR (exp, 1)));
+ 	      else
+ 		fatal ("unknown attribute `%s' in EQ_ATTR", XSTR (exp, 0));
+ 	    }
+@@ -817,7 +817,7 @@ check_attr_test (rtx exp, int is_const,
+ 
+ 	      name_ptr = XSTR (exp, 1);
+ 	      while ((p = next_comma_elt (&name_ptr)) != NULL)
+-		set |= 1 << atoi (p);
++		set |= ((uint64_t) 1) << atoi (p);
+ 
+ 	      return mk_attr_alt (set);
+ 	    }
+@@ -1292,7 +1292,7 @@ static struct attr_value *
+ get_attr_value (rtx value, struct attr_desc *attr, int insn_code)
+ {
+   struct attr_value *av;
+-  int num_alt = 0;
++  uint64_t num_alt = 0;
+ 
+   value = make_canonical (attr, value);
+   if (compares_alternatives_p (value))
+@@ -1934,7 +1934,7 @@ insert_right_side (enum rtx_code code, r
+    This routine is passed an expression and either AND or IOR.  It returns a
+    bitmask indicating which alternatives are mentioned within EXP.  */
+ 
+-static int
++static uint64_t
+ compute_alternative_mask (rtx exp, enum rtx_code code)
+ {
+   const char *string;
+@@ -1965,15 +1965,15 @@ compute_alternative_mask (rtx exp, enum
+     return 0;
+ 
+   if (string[1] == 0)
+-    return 1 << (string[0] - '0');
+-  return 1 << atoi (string);
++    return ((uint64_t) 1) << (string[0] - '0');
++  return ((uint64_t) 1) << atoi (string);
+ }
+ 
+ /* Given I, a single-bit mask, return RTX to compare the `alternative'
+    attribute with the value represented by that bit.  */
+ 
+ static rtx
+-make_alternative_compare (int mask)
++make_alternative_compare (uint64_t mask)
+ {
+   return mk_attr_alt (mask);
+ }
+@@ -2472,7 +2472,7 @@ attr_alt_complement (rtx s)
+    in E.  */
+ 
+ static rtx
+-mk_attr_alt (int e)
++mk_attr_alt (uint64_t e)
+ {
+   rtx result = rtx_alloc (EQ_ATTR_ALT);
+ 
+@@ -2499,7 +2499,7 @@ simplify_test_exp (rtx exp, int insn_cod
+   struct attr_value *av;
+   struct insn_ent *ie;
+   struct attr_value_list *iv;
+-  int i;
++  uint64_t i;
+   rtx newexp = exp;
+   bool left_alt, right_alt;
+ 
+@@ -2779,7 +2779,7 @@ simplify_test_exp (rtx exp, int insn_cod
+     case EQ_ATTR:
+       if (XSTR (exp, 0) == alternative_name)
+ 	{
+-	  newexp = mk_attr_alt (1 << atoi (XSTR (exp, 1)));
++	  newexp = mk_attr_alt (((uint64_t) 1) << atoi (XSTR (exp, 1)));
+ 	  break;
+ 	}
+ 
+@@ -5240,10 +5240,11 @@ main (int argc, char **argv)
+     expand_delays ();
+ 
+   /* Make `insn_alternatives'.  */
+-  insn_alternatives = oballocvec (int, insn_code_number);
++  insn_alternatives = oballocvec (uint64_t, insn_code_number);
+   for (id = defs; id; id = id->next)
+     if (id->insn_code >= 0)
+-      insn_alternatives[id->insn_code] = (1 << id->num_alternatives) - 1;
++      insn_alternatives[id->insn_code]
++	= (((uint64_t) 1) << id->num_alternatives) - 1;
+ 
+   /* Make `insn_n_alternatives'.  */
+   insn_n_alternatives = oballocvec (int, insn_code_number);
+--- gcc/optabs.c	2014-05-15 10:46:12.000000000 +0200
++++ gcc/optabs.c	2016-05-11 15:53:11.000000000 +0200
+@@ -6659,11 +6659,11 @@ expand_vec_perm (enum machine_mode mode,
+       enum machine_mode selmode = GET_MODE (sel);
+       if (u == 2)
+ 	sel = expand_simple_binop (selmode, PLUS, sel, sel,
+-				   sel, 0, OPTAB_DIRECT);
++				   NULL, 0, OPTAB_DIRECT);
+       else
+ 	sel = expand_simple_binop (selmode, ASHIFT, sel,
+ 				   GEN_INT (exact_log2 (u)),
+-				   sel, 0, OPTAB_DIRECT);
++				   NULL, 0, OPTAB_DIRECT);
+       gcc_assert (sel != NULL);
+ 
+       /* Broadcast the low byte each element into each of its bytes.  */
+--- gcc/recog.h	2013-09-09 19:16:08.000000000 +0200
++++ gcc/recog.h	2016-05-11 15:52:48.000000000 +0200
+@@ -21,7 +21,7 @@ along with GCC; see the file COPYING3.
+ #define GCC_RECOG_H
+ 
+ /* Random number that should be large enough for all purposes.  */
+-#define MAX_RECOG_ALTERNATIVES 30
++#define MAX_RECOG_ALTERNATIVES 35
+ 
+ /* Types of operands.  */
+ enum op_type {
+--- gcc/target.def	2013-03-04 12:46:23.000000000 +0100
++++ gcc/target.def	2016-05-11 15:41:36.000000000 +0200
+@@ -1834,6 +1834,17 @@ DEFHOOK
+  rtx, (rtx reg),
+  hook_rtx_rtx_null)
+ 
++/* Given a register return the mode of the corresponding DWARF frame
++   register.  */
++DEFHOOK
++(dwarf_frame_reg_mode,
++ "Given a register, this hook should return the mode which the\n\
++corresponding Dwarf frame register should have.  This is normally\n\
++used to return a smaller mode than the raw mode to prevent call\n\
++clobbered parts of a register altering the frame register size",
++ enum machine_mode, (int regno),
++ default_dwarf_frame_reg_mode)
++
+ /* If expand_builtin_init_dwarf_reg_sizes needs to fill in table
+    entries not corresponding directly to registers below
+    FIRST_PSEUDO_REGISTER, this hook should generate the necessary
+--- gcc/targhooks.c	2013-01-21 16:02:59.000000000 +0100
++++ gcc/targhooks.c	2016-05-11 15:41:36.000000000 +0200
+@@ -1411,6 +1411,19 @@ default_debug_unwind_info (void)
+   return UI_NONE;
+ }
+ 
++/* Determine the correct mode for a Dwarf frame register that represents
++   register REGNO.  */
++
++enum machine_mode
++default_dwarf_frame_reg_mode (int regno)
++{
++  enum machine_mode save_mode = reg_raw_mode[regno];
++
++  if (HARD_REGNO_CALL_PART_CLOBBERED (regno, save_mode))
++    save_mode = choose_hard_reg_mode (regno, 1, true);
++  return save_mode;
++}
++
+ /* To be used by targets where reg_raw_mode doesn't return the right
+    mode for registers used in apply_builtin_return and apply_builtin_arg.  */
+ 
+--- gcc/targhooks.h	2013-01-21 16:03:00.000000000 +0100
++++ gcc/targhooks.h	2016-05-11 15:42:21.000000000 +0200
+@@ -186,6 +186,7 @@ extern int default_label_align_max_skip
+ extern int default_jump_align_max_skip (rtx);
+ extern section * default_function_section(tree decl, enum node_frequency freq,
+ 					  bool startup, bool exit);
++extern enum machine_mode default_dwarf_frame_reg_mode (int);
+ extern enum machine_mode default_get_reg_raw_mode(int);
+ 
+ extern void *default_get_pch_validity (size_t *);
+--- gcc/testsuite/gcc.dg/tree-ssa/gen-vect-11b.c	2012-12-13 11:28:46.000000000 +0100
++++ gcc/testsuite/gcc.dg/tree-ssa/gen-vect-11b.c	2016-05-11 17:30:16.000000000 +0200
+@@ -1,5 +1,6 @@
+ /* { dg-do run { target vect_cmdline_needed } } */
+ /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details" } */
++/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details -mno-vx" { target { s390*-*-* } } } */
+ /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details -mno-sse" { target { i?86-*-* x86_64-*-* } } } */
+ 
+ #include <stdlib.h>
+--- gcc/testsuite/gcc.dg/tree-ssa/gen-vect-11c.c	2012-12-13 11:28:46.000000000 +0100
++++ gcc/testsuite/gcc.dg/tree-ssa/gen-vect-11c.c	2016-05-11 17:30:16.000000000 +0200
+@@ -1,5 +1,6 @@
+ /* { dg-do run { target vect_cmdline_needed } } */
+ /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details" } */
++/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details -mno-vx" { target { s390*-*-* } } } */
+ /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details -mno-sse" { target { i?86-*-* x86_64-*-* } } } */
+ 
+ #include <stdlib.h>
+--- gcc/testsuite/gcc.target/s390/htm-builtins-z13-1.c	1970-01-01 01:00:00.000000000 +0100
++++ gcc/testsuite/gcc.target/s390/htm-builtins-z13-1.c	2016-05-11 17:34:03.000000000 +0200
+@@ -0,0 +1,34 @@
++/* Verify if VRs are saved and restored.  */
++
++/* { dg-do run } */
++/* { dg-require-effective-target vector } */
++/* { dg-options "-O3 -march=z13 -mzarch" } */
++
++typedef int __attribute__((vector_size(16))) v4si;
++
++v4si __attribute__((noinline))
++foo (v4si a)
++{
++  a += (v4si){ 1, 1, 1, 1 };
++  if (__builtin_tbegin (0) == 0)
++    {
++      a += (v4si){ 1, 1, 1, 1 };
++      __builtin_tabort (256);
++      __builtin_tend ();
++    }
++  else
++    a -= (v4si){ 1, 1, 1, 1 };
++
++  return a;
++}
++
++int
++main ()
++{
++  v4si a = (v4si){ 0, 0, 0, 0 };
++
++  a = foo (a);
++
++  if (a[0] != 0)
++    __builtin_abort ();
++}
+--- gcc/testsuite/gcc.target/s390/s390.exp	2015-06-18 16:32:12.000000000 +0200
++++ gcc/testsuite/gcc.target/s390/s390.exp	2016-05-11 17:12:20.000000000 +0200
+@@ -37,6 +37,21 @@ proc check_effective_target_htm { } {
+     }] "-march=zEC12 -mzarch" ] } { return 0 } else { return 1 }
+ }
+ 
++# Return 1 if vector (va - vector add) instructions are understood by
++# the assembler and can be executed.  This also covers checking for
++# the VX kernel feature.  A kernel without that feature does not
++# enable the vector facility and the following check will die with a
++# signal.
++proc check_effective_target_vector { } {
++    if { ![check_runtime s390_check_vector [subst {
++	int main (void)
++	{
++	    asm ("va %%v24, %%v26, %%v28, 3" : : : "v24", "v26", "v28");
++	    return 0;
++	}
++    }] "-march=z13 -mzarch" ] } { return 0 } else { return 1 }
++}
++
+ # If a testcase doesn't have special options, use these.
+ global DEFAULT_CFLAGS
+ if ![info exists DEFAULT_CFLAGS] then {
+@@ -59,5 +74,8 @@ set-torture-options $HOTPATCH_TEST_OPTS
+ gcc-dg-runtest [lsort [glob -nocomplain $hotpatch_tests]] $DEFAULT_CFLAGS
+ torture-finish
+ 
++dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*vector*/*.\[cS\]]] \
++	"" $DEFAULT_CFLAGS
++
+ # All done.
+ dg-finish
+--- gcc/testsuite/gcc.target/s390/vector/int128-1.c	1970-01-01 01:00:00.000000000 +0100
++++ gcc/testsuite/gcc.target/s390/vector/int128-1.c	2016-05-11 18:10:56.000000000 +0200
+@@ -0,0 +1,47 @@
++/* Check that vaq/vsq are used for int128 operations.  */
++
++/* { dg-do compile { target { lp64 } } } */
++/* { dg-options "-O3 -mzarch -march=z13" } */
++
++
++const __int128 c = (__int128)0x0123456789abcd55 + ((__int128)7 << 64);
++
++
++__int128
++addreg(__int128 a, __int128 b)
++{
++  return a + b;
++}
++
++__int128
++addconst(__int128 a)
++{
++  return a + c;
++}
++
++__int128
++addmem(__int128 *a, __int128_t *b)
++{
++  return *a + *b;
++}
++
++__int128
++subreg(__int128 a, __int128 b)
++{
++  return a - b;
++}
++
++__int128
++subconst(__int128 a)
++{
++  return a - c; /* This becomes vaq as well.  */
++}
++
++__int128
++submem(__int128 *a, __int128_t *b)
++{
++  return *a - *b;
++}
++
++/* { dg-final { scan-assembler-times "vaq" 4 } } */
++/* { dg-final { scan-assembler-times "vsq" 2 } } */
+--- gcc/testsuite/gcc.target/s390/vector/stpcpy-1.c	1970-01-01 01:00:00.000000000 +0100
++++ gcc/testsuite/gcc.target/s390/vector/stpcpy-1.c	2016-05-11 18:11:22.000000000 +0200
+@@ -0,0 +1,100 @@
++/* The z13 stpcpy implementation plays some alignment tricks for good
++   performance.  This test tries to make sure it works correctly and
++   does not access bytes beyond the source and destination
++   strings.  */
++
++/* { dg-do run } */
++/* { dg-require-effective-target vector } */
++/* { dg-options "-O3 -mzarch -march=z13" } */
++
++#include <stdio.h>
++#include <sys/mman.h>
++
++#define PAGE_SIZE 4096
++
++struct {
++  char unused[PAGE_SIZE - 32];
++  char m32[15]; /* page bndry - 32 */
++  char m17[1];
++  char m16[1];
++  char m15[14];
++  char m1[1];
++  char next_page[PAGE_SIZE];
++} s, d __attribute__((aligned(PAGE_SIZE)));
++
++char *__attribute__((noinline))
++my_stpcpy(char *dest, const char *src)
++{
++  return __builtin_stpcpy (dest, src);
++}
++
++void __attribute__ ((noinline))
++check (char *dest, char *src, size_t len)
++{
++  char *result;
++
++  result = my_stpcpy (dest, src);
++  if (result != dest + len)
++    __builtin_abort ();
++  if (__builtin_memcmp (src, dest, len) != 0)
++    __builtin_abort ();
++}
++
++int
++main ()
++{
++  char *src[5] = { s.m32, s.m17, s.m16, s.m15, s.m1 };
++  char *dst[5] = { d.m32, d.m17, d.m16, d.m15, d.m1 };
++  int len[8] = { 33, 32, 31, 17, 16, 15, 1, 0 };
++  int i, j, k;
++  char backup;
++
++  for (i = 0; i < sizeof (s); i++)
++    ((char*)&s)[i] = i % 26 + 97;
++
++  for (i = 0; i < 5; i++)
++    for (j = 0; j < 5; j++)
++      for (k = 0; k < 8; k++)
++	{
++	  backup = src[j][len[k]];
++	  src[j][len[k]] = 0;
++	  __builtin_memset (&d, 0, sizeof (d));
++	  check (dst[i], src[j], len[k]);
++	  src[j][len[k]] = backup;
++	}
++
++  /* Make all source strings end before the page boundary.  */
++  backup = s.m1[0];
++  s.m1[0] = 0;
++
++  if (mprotect (&s.next_page, PAGE_SIZE, PROT_NONE) == -1)
++    perror ("mprotect src");
++
++  for (i = 0; i < 5; i++)
++    for (j = 0; j < 5; j++)
++      check (dst[i], src[j],
++	     PAGE_SIZE - ((unsigned long)src[j] & ((1UL << 12) - 1)) - 1);
++
++  if (mprotect (&s.next_page, PAGE_SIZE, PROT_READ | PROT_WRITE) == -1)
++    perror ("mprotect src");
++
++  s.m1[0] = backup;
++
++  if (mprotect (&d.next_page, PAGE_SIZE, PROT_NONE) == -1)
++    perror ("mprotect dst");
++
++  for (i = 0; i < 5; i++)
++    for (j = 0; j < 5; j++)
++      {
++	int len = PAGE_SIZE - ((unsigned long)dst[i] & ((1UL << 12) - 1)) - 1;
++	char backup = src[j][len];
++
++	src[j][len] = 0;
++	__builtin_memset (&d, 0,
++			  (unsigned long)&d.next_page - (unsigned long)&d);
++	check (dst[i], src[j], len);
++	src[j][len] = backup;
++      }
++
++  return 0;
++}
+--- gcc/testsuite/gcc.target/s390/vector/vec-abi-1.c	1970-01-01 01:00:00.000000000 +0100
++++ gcc/testsuite/gcc.target/s390/vector/vec-abi-1.c	2016-05-11 17:32:39.000000000 +0200
+@@ -0,0 +1,18 @@
++/* Check calling convention in the vector ABI.  */
++
++/* { dg-do compile { target { s390*-*-* } } } */
++/* { dg-options "-O3 -mzarch -march=z13" } */
++
++/* Make sure the last argument is fetched from the argument overflow area.  */
++/* { dg-final { scan-assembler "vl\t%v\[0-9\]*,160\\(%r15\\)" { target lp64 } } } */
++/* { dg-final { scan-assembler "vl\t%v\[0-9\]*,96\\(%r15\\)" { target ilp32 } } } */
++/* { dg-final { scan-assembler "gnu_attribute 8, 2" } } */
++
++typedef double v2df __attribute__((vector_size(16)));
++
++v2df
++add (v2df a, v2df b, v2df c, v2df d,
++     v2df e, v2df f, v2df g, v2df h, v2df i)
++{
++  return a + b + c + d + e + f + g + h + i;
++}
+--- gcc/testsuite/gcc.target/s390/vector/vec-abi-2.c	1970-01-01 01:00:00.000000000 +0100
++++ gcc/testsuite/gcc.target/s390/vector/vec-abi-2.c	2016-05-11 17:12:20.000000000 +0200
+@@ -0,0 +1,15 @@
++/* Check calling convention in the vector ABI.  */
++
++/* { dg-do compile { target { s390*-*-* } } } */
++/* { dg-options "-O3 -mzarch -march=z13" } */
++
++/* This needs to be v24 = v24 * v26 + v28 */
++/* { dg-final { scan-assembler "vfmadb\t%v24,%v24,%v26,%v28" } } */
++
++typedef double v2df __attribute__((vector_size(16)));
++
++v2df
++madd (v2df a, v2df b, v2df c)
++{
++  return a * b + c;
++}
+--- gcc/testsuite/gcc.target/s390/vector/vec-abi-3.c	1970-01-01 01:00:00.000000000 +0100
++++ gcc/testsuite/gcc.target/s390/vector/vec-abi-3.c	2016-05-11 17:12:20.000000000 +0200
+@@ -0,0 +1,101 @@
++/* Check calling convention in the vector ABI regarding vector like structs.  */
++
++/* { dg-do compile { target { s390*-*-* } } } */
++/* { dg-options "-O3 -mzarch -march=z13" } */
++
++/* addA */
++/* { dg-final { scan-assembler-times "vfadb\t%v24,%v24,%v26" 1 } } */
++
++/* addB and addE*/
++/* { dg-final { scan-assembler-times "vah\t%v24,%v\[0-9\]*,%v\[0-9\]*" 2 } } */
++
++/* addC */
++/* { dg-final { scan-assembler-times "vag\t%v24,%v\[0-9\]*,%v\[0-9\]*" 1 } } */
++
++/* addB and addC are expected to read the arguments via pointers in r2 and r3 */
++/* { dg-final { scan-assembler-times "vl\t%v\[0-9\]*,0\\(%r2\\)" 2 } } */
++/* { dg-final { scan-assembler-times "vl\t%v\[0-9\]*,0\\(%r3\\)" 2 } } */
++
++/* addD */
++/* { dg-final { scan-assembler-times "vaf\t%v24,%v24,%v26" 1 } } */
++
++/* addE */
++/* { dg-final { scan-assembler-times "vah\t%v24,%v24,%v26" 1 } } */
++
++/* addF */
++/* { dg-final { scan-assembler-times "vab\t%v24,%v\[0-9\]*,%v\[0-9\]*" 1 } } */
++/* { dg-final { scan-assembler-times "srlg\t%r\[0-9\]*,%r2,32" 1 { target lp64 } } } */
++/* { dg-final { scan-assembler-times "srlg\t%r\[0-9\]*,%r3,32" 1 { target lp64 } } } */
++/* { dg-final { scan-assembler-times "llgfr\t%.*,%r2" 1 { target { ! lp64 } } } } */
++/* { dg-final { scan-assembler-times "llgfr\t%.*,%r4" 1 { target { ! lp64 } } } } */
++
++
++typedef double v2df __attribute__((vector_size(16)));
++typedef long long v2di __attribute__((vector_size(16)));
++typedef int v4si __attribute__((vector_size(16)));
++typedef short v8hi __attribute__((vector_size(16)));
++
++typedef short v2hi __attribute__((vector_size(4)));
++typedef char v4qi __attribute__((vector_size(4)));
++
++/* Vector like structs are passed in VRs.  */
++struct A { v2df a; };
++
++v2df
++addA (struct A a, struct A b)
++{
++  return a.a + b.a;
++}
++
++/* Only single element vectors qualify as vector type parms.  This one
++   is passed as a struct. Since it is bigger than 8 bytes it is passed
++   on the stack with the reference being put into r2/r3.  */
++struct B { v8hi a; char b;};
++
++v8hi
++addB (struct B a, struct B b)
++{
++  return a.a + b.a;
++}
++
++/* The resulting struct is bigger than 16 bytes and therefore passed
++   on the stack with the references residing in r2/r3.  */
++struct C { v2di __attribute__((aligned(32))) a; };
++
++v2di
++addC (struct C a, struct C b)
++{
++  return a.a + b.a;
++}
++
++/* The attribute here does not have any effect. So this struct stays
++   vector like and hence is passed in a VR.  */
++struct D { v4si __attribute__((aligned(16))) a; };
++
++v4si
++addD (struct D a, struct D b)
++{
++  return a.a + b.a;
++}
++
++
++/* Smaller vectors are passed in vector registers. This also applies
++   for vector like structs.  */
++struct E { v2hi a; };
++
++v2hi
++addE (struct E a, struct E b)
++{
++  return a.a + b.a;
++}
++
++/* This struct is not passed in VRs because of padding.  But since it
++   fits in a GPR and has a power of two size. It is passed in
++   GPRs.  */
++struct F { v4qi __attribute__((aligned(8))) a; };
++
++v4qi
++addF (struct F a, struct F b)
++{
++  return a.a + b.a;
++}
+--- gcc/testsuite/gcc.target/s390/vector/vec-abi-4.c	1970-01-01 01:00:00.000000000 +0100
++++ gcc/testsuite/gcc.target/s390/vector/vec-abi-4.c	2016-05-11 17:12:20.000000000 +0200
+@@ -0,0 +1,19 @@
++/* Check calling convention in the vector ABI.  Smaller vector need to
++   be placed left-justified in the stack slot.  */
++
++/* { dg-do compile { target { s390*-*-* } } } */
++/* { dg-options "-O3 -mzarch -march=z13" } */
++
++/* { dg-final { scan-assembler-times "lde\t%.*,160\\\(%r15\\\)" 1 { target lp64 } } } */
++/* { dg-final { scan-assembler-times "lde\t%.*,168\\\(%r15\\\)" 1 { target lp64 } } } */
++/* { dg-final { scan-assembler-times "lde\t%.*,96\\\(%r15\\\)" 1 { target { ! lp64 } } } } */
++/* { dg-final { scan-assembler-times "lde\t%.*,100\\\(%r15\\\)" 1 { target { ! lp64 } } } } */
++
++typedef char __attribute__((vector_size(4))) v4qi;
++
++v4qi
++foo (v4qi a, v4qi b, v4qi c, v4qi d, v4qi e,
++     v4qi f, v4qi g, v4qi h, v4qi i, v4qi j)
++{
++  return (a + b + c + d + e + f + g + h + i + j);
++}
+--- gcc/testsuite/gcc.target/s390/vector/vec-abi-align-1.c	1970-01-01 01:00:00.000000000 +0100
++++ gcc/testsuite/gcc.target/s390/vector/vec-abi-align-1.c	2016-05-11 17:12:20.000000000 +0200
+@@ -0,0 +1,48 @@
++/* Check alignment convention in the vector ABI.  */
++
++/* { dg-do compile { target { s390*-*-* } } } */
++/* { dg-options "-O3 -mzarch -march=z13" } */
++
++#include <stddef.h>
++
++/* Vector types get an 8 byte alignment.  */
++typedef double v2df __attribute__((vector_size(16)));
++typedef struct
++{
++  char a;
++  v2df b;
++} A;
++char c1[offsetof (A, b) == 8 ? 0 : -1];
++
++/* Smaller vector allow for smaller alignments.  */
++typedef char v4qi __attribute__((vector_size(4)));
++typedef struct
++{
++  char a;
++  v4qi b;
++} B;
++char c2[offsetof (B, b) == 4 ? 0 : -1];
++
++
++typedef double v4df __attribute__((vector_size(32)));
++typedef struct
++{
++  char a;
++  v4df b;
++} C;
++char c3[offsetof (C, b) == 8 ? 0 : -1];
++
++/* However, we allow the programmer to chose a bigger alignment.  */
++typedef struct
++{
++  char a;
++  v2df b __attribute__((aligned(16)));
++} D;
++char c4[offsetof (D, b) == 16 ? 0 : -1];
++
++typedef struct
++{
++  char a;
++  v2df b;
++} __attribute__((packed)) E;
++char c5[offsetof (E, b) == 1 ? 0 : -1];
+--- gcc/testsuite/gcc.target/s390/vector/vec-abi-attr-1.c	1970-01-01 01:00:00.000000000 +0100
++++ gcc/testsuite/gcc.target/s390/vector/vec-abi-attr-1.c	2016-05-11 17:32:39.000000000 +0200
+@@ -0,0 +1,18 @@
++/* Check calling convention in the vector ABI.  */
++
++/* { dg-do compile { target { s390*-*-* } } } */
++/* { dg-options "-O3 -mzarch -march=z13 -mno-vx" } */
++
++/* The function passes arguments whose calling conventions change with
++   -mvx/-mno-vx.  In that case GCC has to emit the ABI attribute to
++   allow GDB and Binutils to detect this.  */
++/* { dg-final { scan-assembler "gnu_attribute 8, 1" } } */
++
++typedef double v2df __attribute__((vector_size(16)));
++
++v2df
++add (v2df a, v2df b, v2df c, v2df d,
++     v2df e, v2df f, v2df g, v2df h, v2df i)
++{
++  return a + b + c + d + e + f + g + h + i;
++}
+--- gcc/testsuite/gcc.target/s390/vector/vec-abi-attr-2.c	1970-01-01 01:00:00.000000000 +0100
++++ gcc/testsuite/gcc.target/s390/vector/vec-abi-attr-2.c	2016-05-11 17:32:39.000000000 +0200
+@@ -0,0 +1,53 @@
++/* Check calling convention in the vector ABI.  */
++
++/* { dg-do compile { target { s390*-*-* } } } */
++/* { dg-options "-O3 -mzarch -march=z13" } */
++
++/* No abi attribute should be emitted when nothing relevant happened.  */
++/* { dg-final { scan-assembler-not "gnu_attribute" } } */
++
++#include <stdarg.h>
++
++/* Local use is ok.  */
++
++typedef int v4si __attribute__((vector_size(16)));
++
++static
++v4si __attribute__((__noinline__))
++foo (v4si a)
++{
++  return a + (v4si){ 1, 2, 3, 4 };
++}
++
++int
++bar (int a)
++{
++  return foo ((v4si){ 1, 1, 1, 1 })[1];
++}
++
++/* Big vector type only used as function argument and return value
++   without being a struct/union member.  The alignment change is not
++   relevant here.  */
++
++typedef double v4df __attribute__((vector_size(32)));
++
++v4df
++add (v4df a, v4df b, v4df c, v4df d,
++     v4df e, v4df f, v4df g, v4df h, v4df i)
++{
++  return a + b + c + d + e + f + g + h + i;
++}
++
++double
++bar2 (int n, ...)
++{
++  double ret;
++  v4df a;
++  va_list va;
++
++  va_start (va, n);
++  ret = va_arg (va, v4df)[2];
++  va_end (va);
++
++  return ret;
++}
+--- gcc/testsuite/gcc.target/s390/vector/vec-abi-attr-3.c	1970-01-01 01:00:00.000000000 +0100
++++ gcc/testsuite/gcc.target/s390/vector/vec-abi-attr-3.c	2016-05-11 17:32:39.000000000 +0200
+@@ -0,0 +1,18 @@
++/* Check calling convention in the vector ABI.  */
++
++/* { dg-do compile { target { s390*-*-* } } } */
++/* { dg-options "-O3 -mzarch -march=z13" } */
++
++/* { dg-final { scan-assembler "gnu_attribute 8, 2" } } */
++
++typedef double v4df __attribute__((vector_size(32)));
++typedef struct { v4df a; } s;
++
++s
++add (v4df a, v4df b, v4df c, v4df d,
++     v4df e, v4df f, v4df g, v4df h, v4df i)
++{
++  s t;
++  t.a = a + b + c + d + e + f + g + h + i;
++  return t;
++}
+--- gcc/testsuite/gcc.target/s390/vector/vec-abi-attr-4.c	1970-01-01 01:00:00.000000000 +0100
++++ gcc/testsuite/gcc.target/s390/vector/vec-abi-attr-4.c	2016-05-11 17:32:39.000000000 +0200
+@@ -0,0 +1,17 @@
++/* Check calling convention in the vector ABI.  */
++
++/* { dg-do compile { target { s390*-*-* } } } */
++/* { dg-options "-O3 -mzarch -march=z13" } */
++
++/* { dg-final { scan-assembler "gnu_attribute 8, 2" } } */
++
++typedef int __attribute__((vector_size(16))) v4si;
++
++extern void bar (v4si);
++
++void
++foo (int a)
++{
++  v4si b = (v4si){ a, a, a, a };
++  bar (b);
++}
+--- gcc/testsuite/gcc.target/s390/vector/vec-abi-attr-5.c	1970-01-01 01:00:00.000000000 +0100
++++ gcc/testsuite/gcc.target/s390/vector/vec-abi-attr-5.c	2016-05-11 17:32:39.000000000 +0200
+@@ -0,0 +1,19 @@
++/* Check calling convention in the vector ABI.  */
++
++/* { dg-do compile { target { s390*-*-* } } } */
++/* { dg-options "-O3 -mzarch -march=z13" } */
++
++/* { dg-final { scan-assembler "gnu_attribute 8, 2" } } */
++
++#include <stdarg.h>
++
++typedef int __attribute__((vector_size(16))) v4si;
++
++extern void bar (int, ...);
++
++void
++foo (int a)
++{
++  v4si b = (v4si){ a, a, a, a };
++  bar (1, b);
++}
+--- gcc/testsuite/gcc.target/s390/vector/vec-abi-attr-6.c	1970-01-01 01:00:00.000000000 +0100
++++ gcc/testsuite/gcc.target/s390/vector/vec-abi-attr-6.c	2016-05-11 17:32:39.000000000 +0200
+@@ -0,0 +1,24 @@
++/* Check calling convention in the vector ABI.  */
++
++/* { dg-do compile { target { s390*-*-* } } } */
++/* { dg-options "-O3 -mzarch -march=z13" } */
++
++/* { dg-final { scan-assembler "gnu_attribute 8, 2" } } */
++
++#include <stdarg.h>
++
++typedef int __attribute__((vector_size(16))) v4si;
++
++int
++bar (int n, ...)
++{
++  int ret;
++  v4si a;
++  va_list va;
++
++  va_start (va, n);
++  ret = va_arg (va, v4si)[2];
++  va_end (va);
++
++  return ret;
++}
+--- gcc/testsuite/gcc.target/s390/vector/vec-abi-single-1.c	1970-01-01 01:00:00.000000000 +0100
++++ gcc/testsuite/gcc.target/s390/vector/vec-abi-single-1.c	2016-05-11 17:12:20.000000000 +0200
+@@ -0,0 +1,24 @@
++/* Check calling convention in the vector ABI for single element vectors.  */
++
++/* { dg-do compile { target { s390*-*-* } } } */
++/* { dg-options "-O3 -mzarch -march=z13" } */
++
++/* { dg-final { scan-assembler-times "vlr\t%v24,%v26" 7 } } */
++
++typedef int  __attribute__((vector_size(16))) v4si;
++
++typedef char __attribute__((vector_size(1))) v1qi;
++typedef short int __attribute__((vector_size(2))) v1hi;
++typedef int __attribute__((vector_size(4))) v1si;
++typedef long long __attribute__((vector_size(8))) v1di;
++typedef float __attribute__((vector_size(4))) v1sf;
++typedef double __attribute__((vector_size(8))) v1df;
++typedef long double __attribute__((vector_size(16))) v1tf;
++
++v1qi foo1 (v4si a, v1qi b) { return b; }
++v1hi foo2 (v4si a, v1hi b) { return b; }
++v1si foo3 (v4si a, v1si b) { return b; }
++v1di foo4 (v4si a, v1di b) { return b; }
++v1sf foo5 (v4si a, v1sf b) { return b; }
++v1df foo6 (v4si a, v1df b) { return b; }
++v1tf foo7 (v4si a, v1tf b) { return b; }
+--- gcc/testsuite/gcc.target/s390/vector/vec-abi-single-2.c	1970-01-01 01:00:00.000000000 +0100
++++ gcc/testsuite/gcc.target/s390/vector/vec-abi-single-2.c	2016-05-11 17:12:20.000000000 +0200
+@@ -0,0 +1,12 @@
++/* Check calling convention in the vector ABI for single element vectors.  */
++
++/* { dg-do compile { target { lp64 } } } */
++/* { dg-options "-O3 -mzarch -march=z13" } */
++
++/* { dg-final { scan-assembler-times "vlr\t%v24,%v26" 1 } } */
++
++typedef int  __attribute__((vector_size(16))) v4si;
++
++typedef __int128_t __attribute__((vector_size(16))) v1ti;
++
++v1ti foo (v4si a, v1ti b) { return b; }
+--- gcc/testsuite/gcc.target/s390/vector/vec-abi-struct-1.c	1970-01-01 01:00:00.000000000 +0100
++++ gcc/testsuite/gcc.target/s390/vector/vec-abi-struct-1.c	2016-05-11 17:12:20.000000000 +0200
+@@ -0,0 +1,37 @@
++/* Check calling convention in the vector ABI.  */
++
++/* { dg-do compile { target { s390*-*-* } } } */
++/* { dg-options "-O3 -mzarch -march=z13" } */
++
++/* c.i and c.j are passed by reference since a struct with two
++   elements is no vector type argument.  */
++/* { dg-final { scan-assembler "ld\t%v\[0-9\]*,0\\(%r3\\)" } } */
++/* { dg-final { scan-assembler "ld\t%v\[0-9\]*,8\\(%r3\\)" } } */
++
++/* just_v2si is passed in a vector reg if it as an incoming arg.
++   However, as return value it is passed via hidden first pointer
++   argument.  */
++/* { dg-final { scan-assembler ".*st.*\t%v\[0-9\]*,0\\(%r2\\)" } } */
++
++/* { dg-final { scan-assembler "gnu_attribute 8, 2" } } */
++
++typedef int __attribute__ ((vector_size(8))) v2si;
++
++struct just_v2si
++{
++  v2si i;
++};
++
++struct two_v2si
++{
++  v2si i, j;
++};
++
++struct just_v2si
++add_structvecs (v2si a, struct just_v2si b, struct two_v2si c)
++{
++  struct just_v2si res;
++
++  res.i = a + b.i + c.i + c.j;
++  return res;
++}
+--- gcc/testsuite/gcc.target/s390/vector/vec-abi-vararg-1.c	1970-01-01 01:00:00.000000000 +0100
++++ gcc/testsuite/gcc.target/s390/vector/vec-abi-vararg-1.c	2016-05-11 17:12:20.000000000 +0200
+@@ -0,0 +1,60 @@
++/* Check calling convention with variable argument lists in the vector
++   ABI.  */
++
++/* { dg-do run { target { s390*-*-* } } } */
++/* { dg-require-effective-target vector } */
++/* { dg-options "-O3 -mzarch -march=z13 --save-temps" } */
++
++/* Make sure arguments are fetched from the argument overflow area.  */
++/* { dg-final { scan-assembler "vl\t%v\[0-9\]*,352\\(%r15\\)" { target lp64 } } } */
++/* { dg-final { scan-assembler "ld\t%v\[0-9\]*,368\\(%r15\\)" { target lp64 } } } */
++/* { dg-final { scan-assembler "vl\t%v\[0-9\]*,376\\(%r15\\)" { target lp64 } } } */
++/* { dg-final { scan-assembler "ld\t%v\[0-9\]*,392\\(%r15\\)" { target lp64 } } } */
++
++/* { dg-final { scan-assembler "vl\t%v\[0-9\]*,208\\(%r15\\)" { target ilp32 } } } */
++/* { dg-final { scan-assembler "ld\t%v\[0-9\]*,224\\(%r15\\)" { target ilp32 } } } */
++/* { dg-final { scan-assembler "vl\t%v\[0-9\]*,232\\(%r15\\)" { target ilp32 } } } */
++/* { dg-final { scan-assembler "ld\t%v\[0-9\]*,248\\(%r15\\)" { target ilp32 } } } */
++
++/* { dg-final { cleanup-saved-temps } } */
++
++#include <stdarg.h>
++
++extern void abort (void);
++
++typedef long long v2di __attribute__((vector_size(16)));
++typedef int v2si __attribute__((vector_size(8)));
++
++v2di __attribute__((noinline))
++add (int a, ...)
++{
++  int i;
++  va_list va;
++  v2di di_result = { 0, 0 };
++  v2si si_result = (v2si){ 0, 0 };
++
++  va_start (va, a);
++
++  di_result += va_arg (va, v2di);
++  si_result += va_arg (va, v2si);
++  di_result += va_arg (va, v2di);
++  si_result += va_arg (va, v2si);
++
++  va_end (va);
++
++  di_result[0] += si_result[0];
++  di_result[1] += si_result[1];
++
++  return di_result;
++}
++
++int
++main ()
++{
++  v2di r = add (4, (v2di){ 11, 21 }, (v2si){ 12, 22 }, (v2di){ 13, 23 }, (v2si){ 14, 24 });
++
++  if (r[0] != 50 || r[1] != 90)
++    abort ();
++
++  return 0;
++}
+--- gcc/testsuite/gcc.target/s390/vector/vec-abi-vararg-2.c	1970-01-01 01:00:00.000000000 +0100
++++ gcc/testsuite/gcc.target/s390/vector/vec-abi-vararg-2.c	2016-05-11 17:12:20.000000000 +0200
+@@ -0,0 +1,18 @@
++/* Check calling convention in the vector ABI.  */
++
++/* { dg-do compile { target { s390*-*-* } } } */
++/* { dg-options "-O3 -mzarch -march=z13 -Wno-implicit-function-declaration" } */
++
++
++typedef long v2di __attribute__((vector_size(16)));
++extern v2di foo1 (int, v2di);
++extern v2di foo2 (int, int);
++extern v2di foo3 (int, ...);
++
++v2di bar1 (int a)  { return foo2 (1, a); }
++v2di bar2 (int a)  { return foo3 (1, a); }
++v2di bar3 (v2di a) { return foo1 (1, a); }
++v2di bar4 (v2di a) { return foo3 (1, a); }
++
++int bar5 (int a)  { return foo4 (1, a); }
++int bar6 (v2di a) { return foo4 (1, a); } /* { dg-error "Vector argument passed to unprototyped function" } */
+--- gcc/testsuite/gcc.target/s390/vector/vec-clobber-1.c	1970-01-01 01:00:00.000000000 +0100
++++ gcc/testsuite/gcc.target/s390/vector/vec-clobber-1.c	2016-05-11 17:12:20.000000000 +0200
+@@ -0,0 +1,38 @@
++/* { dg-do run { target { s390*-*-* } } } */
++/* { dg-require-effective-target vector } */
++/* { dg-options "-O3 -mzarch -march=z13" } */
++
++/* For FP zero checks we use the ltdbr instruction.  Since this is an
++   load and test it actually writes the FPR.  Whenever an FPR gets
++   written the rest of the overlapping VR is clobbered.  */
++typedef double __attribute__((vector_size(16))) v2df;
++
++v2df a = { 1.0, 2.0 };
++
++extern void abort (void);
++
++void __attribute__((noinline))
++foo (v2df a)
++{
++  v2df b = { 1.0, 3.0 };
++
++  b -= a;
++
++  /* Take away all the VRs not overlapping with FPRs.  */
++  asm volatile ("" : : :
++		"v16","v17","v18","v19",
++		"v20","v21","v22","v23",
++		"v24","v25","v26","v27",
++		"v28","v29","v30","v31");
++  if (b[0] != 0.0) /* ltdbr */
++    abort ();
++  if (b[1] != 1.0)
++    abort ();
++}
++
++int
++main ()
++{
++  foo (a);
++  return 0;
++}
+--- gcc/testsuite/gcc.target/s390/vector/vec-cmp-1.c	1970-01-01 01:00:00.000000000 +0100
++++ gcc/testsuite/gcc.target/s390/vector/vec-cmp-1.c	2016-05-11 17:12:20.000000000 +0200
+@@ -0,0 +1,45 @@
++/* Check that the proper unsigned compare instructions are being generated.  */
++
++/* { dg-do compile { target { s390*-*-* } } } */
++/* { dg-options "-O3 -mzarch -march=z13" } */
++
++/* { dg-final { scan-assembler-times "vchlb" 1 } } */
++/* { dg-final { scan-assembler-times "vchlh" 1 } } */
++/* { dg-final { scan-assembler-times "vchlf" 1 } } */
++/* { dg-final { scan-assembler-times "vchlg" 1 } } */
++
++typedef __attribute__((vector_size(16))) signed char v16qi;
++typedef __attribute__((vector_size(16))) unsigned char uv16qi;
++
++typedef __attribute__((vector_size(16))) signed short v8hi;
++typedef __attribute__((vector_size(16))) unsigned short uv8hi;
++
++typedef __attribute__((vector_size(16))) signed int v4si;
++typedef __attribute__((vector_size(16))) unsigned int uv4si;
++
++typedef __attribute__((vector_size(16))) signed long long v2di;
++typedef __attribute__((vector_size(16))) unsigned long long uv2di;
++
++v16qi
++f (uv16qi a, uv16qi b)
++{
++  return a > b;
++}
++
++v8hi
++g (uv8hi a, uv8hi b)
++{
++  return a > b;
++}
++
++v4si
++h (uv4si a, uv4si b)
++{
++  return a > b;
++}
++
++v2di
++i (uv2di a, uv2di b)
++{
++  return a > b;
++}
+--- gcc/testsuite/gcc.target/s390/vector/vec-cmp-2.c	1970-01-01 01:00:00.000000000 +0100
++++ gcc/testsuite/gcc.target/s390/vector/vec-cmp-2.c	2016-05-11 17:12:20.000000000 +0200
+@@ -0,0 +1,38 @@
++/* Check that the proper signed compare instructions are being generated.  */
++
++/* { dg-do compile { target { s390*-*-* } } } */
++/* { dg-options "-O3 -mzarch -march=z13" } */
++
++/* { dg-final { scan-assembler-times "vchb" 1 } } */
++/* { dg-final { scan-assembler-times "vchh" 1 } } */
++/* { dg-final { scan-assembler-times "vchf" 1 } } */
++/* { dg-final { scan-assembler-times "vchg" 1 } } */
++
++typedef __attribute__((vector_size(16))) signed char v16qi;
++typedef __attribute__((vector_size(16))) signed short v8hi;
++typedef __attribute__((vector_size(16))) signed int v4si;
++typedef __attribute__((vector_size(16))) signed long long v2di;
++
++v16qi
++f (v16qi a, v16qi b)
++{
++  return a > b;
++}
++
++v8hi
++g (v8hi a, v8hi b)
++{
++  return a > b;
++}
++
++v4si
++h (v4si a, v4si b)
++{
++  return a > b;
++}
++
++v2di
++i (v2di a, v2di b)
++{
++  return a > b;
++}
+--- gcc/testsuite/gcc.target/s390/vector/vec-dbl-math-compile-1.c	1970-01-01 01:00:00.000000000 +0100
++++ gcc/testsuite/gcc.target/s390/vector/vec-dbl-math-compile-1.c	2016-05-11 17:12:20.000000000 +0200
+@@ -0,0 +1,48 @@
++/* { dg-do compile { target { s390*-*-* } } } */
++/* { dg-options "-O3 -mzarch -march=z13 --save-temps" } */
++
++typedef __attribute__((vector_size(16))) double v2df;
++
++v2df
++adddbl (v2df a, v2df b)
++{
++  return a + b;
++}
++/* { dg-final { scan-assembler-times "vfadb" 1 } } */
++
++v2df
++subdbl (v2df a, v2df b)
++{
++  return a - b;
++}
++/* { dg-final { scan-assembler-times "vfsdb" 1 } } */
++
++v2df
++muldbl (v2df a, v2df b)
++{
++  return a * b;
++}
++/* { dg-final { scan-assembler-times "vfmdb" 1 } } */
++
++v2df
++divdbl (v2df a, v2df b)
++{
++  return a / b;
++}
++/* { dg-final { scan-assembler-times "vfd" 1 } } */
++
++v2df
++fmadbl (v2df a, v2df b, v2df c)
++{
++  return a * b + c;
++}
++/* { dg-final { scan-assembler-times "vfma" 1 } } */
++
++v2df
++fmsdbl (v2df a, v2df b, v2df c)
++{
++  return a * b - c;
++}
++/* { dg-final { scan-assembler-times "vfms" 1 } } */
++
++/* { dg-final { cleanup-saved-temps } } */
+--- gcc/testsuite/gcc.target/s390/vector/vec-genbytemask-1.c	1970-01-01 01:00:00.000000000 +0100
++++ gcc/testsuite/gcc.target/s390/vector/vec-genbytemask-1.c	2016-05-11 17:38:00.000000000 +0200
+@@ -0,0 +1,83 @@
++/* { dg-do run } */
++/* { dg-options "-O3 -mzarch -march=z13 --save-temps" } */
++/* { dg-require-effective-target vector } */
++/* { dg-require-effective-target int128 } */
++
++typedef unsigned char     uv16qi __attribute__((vector_size(16)));
++typedef unsigned short     uv8hi __attribute__((vector_size(16)));
++typedef unsigned int       uv4si __attribute__((vector_size(16)));
++typedef unsigned long long uv2di __attribute__((vector_size(16)));
++typedef unsigned __int128  uv1ti __attribute__((vector_size(16)));
++
++uv2di __attribute__((noinline))
++foo1 ()
++{
++  return (uv2di){ 0xff00ff00ff00ff00, 0x00ff00ff00ff00ff };
++}
++/* { dg-final { scan-assembler-times "vgbm\t%v24,43605" 1 } } */
++
++uv4si __attribute__((noinline))
++foo2 ()
++{
++  return (uv4si){ 0xff0000ff, 0x0000ffff, 0xffff0000, 0x00ffff00 };
++}
++/* { dg-final { scan-assembler-times "vgbm\t%v24,37830" 1 } } */
++
++uv8hi __attribute__((noinline))
++foo3a ()
++{
++  return (uv8hi){ 0xff00, 0xff00, 0xff00, 0xff00,
++      0xff00, 0xff00, 0xff00, 0xff00 };
++}
++/* { dg-final { scan-assembler-times "vgbm\t%v24,43690" 1 } } */
++
++uv8hi __attribute__((noinline))
++foo3b ()
++{
++  return (uv8hi){ 0x00ff, 0x00ff, 0x00ff, 0x00ff,
++      0x00ff, 0x00ff, 0x00ff, 0x00ff };
++}
++/* { dg-final { scan-assembler-times "vgbm\t%v24,21845" 1 } } */
++
++uv16qi __attribute__((noinline))
++foo4 ()
++{
++  return (uv16qi){ 0xff, 0xff, 0xff, 0xff,
++      0, 0, 0, 0,
++      0xff, 0, 0xff, 0,
++      0, 0xff, 0, 0xff };
++}
++
++uv1ti __attribute__((noinline))
++foo5 ()
++{
++  return (uv1ti){ 0xff00ff00ff00ff00ULL };
++}
++
++/* { dg-final { scan-assembler-times "vgbm\t%v24,61605" 1 } } */
++
++int
++main ()
++{
++  if (foo1()[1] != 0x00ff00ff00ff00ffULL)
++    __builtin_abort ();
++
++  if (foo2()[1] != 0x0000ffff)
++    __builtin_abort ();
++
++  if (foo3a()[1] != 0xff00)
++    __builtin_abort ();
++
++  if (foo3b()[1] != 0x00ff)
++    __builtin_abort ();
++
++  if (foo4()[1] != 0xff)
++    __builtin_abort ();
++
++  if (foo5()[0] != 0xff00ff00ff00ff00ULL)
++    __builtin_abort ();
++
++  return 0;
++}
++
++/* { dg-final { cleanup-saved-temps } } */
+--- gcc/testsuite/gcc.target/s390/vector/vec-genbytemask-2.c	1970-01-01 01:00:00.000000000 +0100
++++ gcc/testsuite/gcc.target/s390/vector/vec-genbytemask-2.c	2016-05-11 17:12:20.000000000 +0200
+@@ -0,0 +1,46 @@
++/* { dg-do compile } */
++/* { dg-options "-O3 -mzarch -march=z13" } */
++
++typedef unsigned char     uv16qi __attribute__((vector_size(16)));
++typedef unsigned short     uv8hi __attribute__((vector_size(16)));
++typedef unsigned int       uv4si __attribute__((vector_size(16)));
++typedef unsigned long long uv2di __attribute__((vector_size(16)));
++
++/* The elements differ.  */
++uv2di __attribute__((noinline))
++foo1 ()
++{
++  return (uv2di){ 0x001fffffffffff00, 0x0000ffffffffff00 };
++}
++
++/* Non-contiguous bitmasks */
++
++uv4si __attribute__((noinline))
++foo2 ()
++{
++  return (uv4si){ 0xff00100f, 0xff00100f, 0xff00100f, 0xff00100f };
++}
++
++uv8hi __attribute__((noinline))
++foo3a ()
++{
++  return (uv8hi){ 0xf700, 0xf700, 0xf700, 0xf700,
++      0xf700, 0xf700, 0xf700, 0xf700 };
++}
++
++uv8hi __attribute__((noinline))
++foo3b ()
++{
++  return (uv8hi){ 0x10ff, 0x10ff, 0x10ff, 0x10ff,
++      0x10ff, 0x10ff, 0x10ff, 0x10ff };
++}
++
++uv16qi __attribute__((noinline))
++foo4 ()
++{
++  return (uv16qi){ 0x82, 0x82, 0x82, 0x82,
++      0x82, 0x82, 0x82, 0x82,
++      0x82, 0x82, 0x82, 0x82,
++      0x82, 0x82, 0x82, 0x82 };
++}
++/* { dg-final { scan-assembler-not "vgbm" } } */
+--- gcc/testsuite/gcc.target/s390/vector/vec-genmask-1.c	1970-01-01 01:00:00.000000000 +0100
++++ gcc/testsuite/gcc.target/s390/vector/vec-genmask-1.c	2016-05-11 17:12:20.000000000 +0200
+@@ -0,0 +1,70 @@
++/* { dg-do run } */
++/* { dg-options "-O3 -mzarch -march=z13 --save-temps" } */
++/* { dg-require-effective-target vector } */
++
++typedef unsigned char     uv16qi __attribute__((vector_size(16)));
++typedef unsigned short     uv8hi __attribute__((vector_size(16)));
++typedef unsigned int       uv4si __attribute__((vector_size(16)));
++typedef unsigned long long uv2di __attribute__((vector_size(16)));
++
++uv2di __attribute__((noinline))
++foo1 ()
++{
++  return (uv2di){ 0x000fffffffffff00, 0x000fffffffffff00 };
++}
++/* { dg-final { scan-assembler-times "vgmg\t%v24,12,55" 1 } } */
++
++uv4si __attribute__((noinline))
++foo2 ()
++{
++  return (uv4si){ 0xff00000f, 0xff00000f, 0xff00000f, 0xff00000f };
++}
++/* { dg-final { scan-assembler-times "vgmf\t%v24,28,7" 1 } } */
++
++uv8hi __attribute__((noinline))
++foo3a ()
++{
++  return (uv8hi){ 0xfff0, 0xfff0, 0xfff0, 0xfff0,
++      0xfff0, 0xfff0, 0xfff0, 0xfff0 };
++}
++/* { dg-final { scan-assembler-times "vgmh\t%v24,0,11" 1 } } */
++
++uv8hi __attribute__((noinline))
++foo3b ()
++{
++  return (uv8hi){ 0x0fff, 0x0fff, 0x0fff, 0x0fff,
++      0x0fff, 0x0fff, 0x0fff, 0x0fff };
++}
++/* { dg-final { scan-assembler-times "vgmh\t%v24,4,15" 1 } } */
++
++uv16qi __attribute__((noinline))
++foo4 ()
++{
++  return (uv16qi){ 0x8, 0x8, 0x8, 0x8,
++      0x8, 0x8, 0x8, 0x8,
++      0x8, 0x8, 0x8, 0x8,
++      0x8, 0x8, 0x8, 0x8 };
++}
++/* { dg-final { scan-assembler-times "vgmb\t%v24,4,4" 1 } } */
++
++int
++main ()
++{
++  if (foo1()[1] != 0x000fffffffffff00ULL)
++    __builtin_abort ();
++
++  if (foo2()[1] != 0xff00000f)
++    __builtin_abort ();
++
++  if (foo3a()[1] != 0xfff0)
++    __builtin_abort ();
++
++  if (foo3b()[1] != 0x0fff)
++    __builtin_abort ();
++
++  if (foo4()[1] != 0x8)
++    __builtin_abort ();
++  return 0;
++}
++
++/* { dg-final { cleanup-saved-temps } } */
+--- gcc/testsuite/gcc.target/s390/vector/vec-genmask-2.c	1970-01-01 01:00:00.000000000 +0100
++++ gcc/testsuite/gcc.target/s390/vector/vec-genmask-2.c	2016-05-11 17:38:00.000000000 +0200
+@@ -0,0 +1,55 @@
++/* { dg-do compile } */
++/* { dg-options "-O3 -mzarch -march=z13" } */
++/* { dg-require-effective-target int128 } */
++
++typedef unsigned char     uv16qi __attribute__((vector_size(16)));
++typedef unsigned short     uv8hi __attribute__((vector_size(16)));
++typedef unsigned int       uv4si __attribute__((vector_size(16)));
++typedef unsigned long long uv2di __attribute__((vector_size(16)));
++typedef unsigned __int128  uv1ti __attribute__((vector_size(16)));
++
++/* The elements differ.  */
++uv2di __attribute__((noinline))
++foo1 ()
++{
++  return (uv2di){ 0x000fffffffffff00, 0x0000ffffffffff00 };
++}
++
++/* Non-contiguous bitmasks */
++
++uv4si __attribute__((noinline))
++foo2 ()
++{
++  return (uv4si){ 0xff00100f, 0xff00100f, 0xff00100f, 0xff00100f };
++}
++
++uv8hi __attribute__((noinline))
++foo3a ()
++{
++  return (uv8hi){ 0xf700, 0xf700, 0xf700, 0xf700,
++      0xf700, 0xf700, 0xf700, 0xf700 };
++}
++
++uv8hi __attribute__((noinline))
++foo3b ()
++{
++  return (uv8hi){ 0x10ff, 0x10ff, 0x10ff, 0x10ff,
++      0x10ff, 0x10ff, 0x10ff, 0x10ff };
++}
++
++uv16qi __attribute__((noinline))
++foo4 ()
++{
++  return (uv16qi){ 0x82, 0x82, 0x82, 0x82,
++      0x82, 0x82, 0x82, 0x82,
++      0x82, 0x82, 0x82, 0x82,
++      0x82, 0x82, 0x82, 0x82 };
++}
++
++/* We do not have vgmq.  */
++uv1ti
++foo5()
++{
++  return (uv1ti){ ((unsigned __int128)1 << 53) - 1 };
++}
++/* { dg-final { scan-assembler-not "vgm" } } */
+--- gcc/testsuite/gcc.target/s390/vector/vec-init-1.c	1970-01-01 01:00:00.000000000 +0100
++++ gcc/testsuite/gcc.target/s390/vector/vec-init-1.c	2016-05-11 17:12:20.000000000 +0200
+@@ -0,0 +1,68 @@
++/* Check that the vec_init expander does its job.  */
++
++/* { dg-do compile } */
++/* { dg-options "-O3 -mzarch -march=z13" } */
++
++
++
++
++
++typedef __attribute__((vector_size(16))) signed int v4si;
++
++extern v4si G;
++
++v4si
++f (signed int a)
++{
++  return G == a;
++}
++/* { dg-final { scan-assembler-times "vrepf" 1 } } */
++
++v4si
++g (signed int *a)
++{
++  return G == *a;
++}
++/* { dg-final { scan-assembler-times "vlrepf" 1 } } */
++
++v4si
++h ()
++{
++  return G == 1;
++}
++/* { dg-final { scan-assembler-times "vgmf\t%v.*,31,31" 1 } } */
++
++v4si
++i ()
++{
++  return G == -1;
++}
++/* { dg-final { scan-assembler-times "vone" 1 } } */
++
++v4si
++j ()
++{
++  return G == 0;
++}
++/* { dg-final { scan-assembler-times "vzero" 1 } } */
++
++v4si
++k ()
++{
++  return G == (v4si){ 0xff80, 0xff80, 0xff80, 0xff80 };
++}
++/* { dg-final { scan-assembler-times "vgmf\t%v.*,16,24" 1 } } */
++
++v4si
++l ()
++{
++  return G == (v4si){ 0xf000000f, 0xf000000f, 0xf000000f, 0xf000000f };
++}
++/* { dg-final { scan-assembler-times "vgmf\t%v.*,28,3" 1 } } */
++
++v4si
++m ()
++{
++  return G == (v4si){ 0x00ff00ff, 0x0000ffff, 0xffff0000, 0xff00ff00 };
++}
++/* { dg-final { scan-assembler-times "vgbm\t%v.*,21450" 1 } } */
+--- gcc/testsuite/gcc.target/s390/vector/vec-int-math-compile-1.c	1970-01-01 01:00:00.000000000 +0100
++++ gcc/testsuite/gcc.target/s390/vector/vec-int-math-compile-1.c	2016-05-11 17:12:20.000000000 +0200
+@@ -0,0 +1,40 @@
++/* { dg-do compile { target { s390*-*-* } } } */
++/* { dg-options "-O3 -mzarch -march=z13" } */
++
++typedef __attribute__((vector_size(16))) signed int v4si;
++
++v4si
++adddbl (v4si a, v4si b)
++{
++  return a + b;
++}
++
++v4si
++subdbl (v4si a, v4si b)
++{
++  return a - b;
++}
++
++v4si
++muldbl (v4si a, v4si b)
++{
++  return a * b;
++}
++
++v4si
++divdbl (v4si a, v4si b)
++{
++  return a / b;
++}
++
++v4si
++fmadbl (v4si a, v4si b, v4si c)
++{
++  return a * b + c;
++}
++
++v4si
++fmsdbl (v4si a, v4si b, v4si c)
++{
++  return a * b - c;
++}
+--- gcc/testsuite/gcc.target/s390/vector/vec-scalar-cmp-1.c	1970-01-01 01:00:00.000000000 +0100
++++ gcc/testsuite/gcc.target/s390/vector/vec-scalar-cmp-1.c	2016-05-11 17:12:28.000000000 +0200
+@@ -0,0 +1,49 @@
++/* Check that we use the scalar variants of vector compares.  */
++
++/* { dg-do compile { target { s390*-*-* } } } */
++/* { dg-options "-O3 -mzarch -march=z13" } */
++
++/* { dg-final { scan-assembler-times "wfcedbs\t%v\[0-9\]*,%v0,%v2" 2 } } */
++/* { dg-final { scan-assembler-times "wfchdbs\t%v\[0-9\]*,%v0,%v2" 1 } } */
++/* { dg-final { scan-assembler-times "wfchedbs\t%v\[0-9\]*,%v2,%v0" 1 } } */
++/* { dg-final { scan-assembler-times "wfchdbs\t%v\[0-9\]*,%v2,%v0" 1 } } */
++/* { dg-final { scan-assembler-times "wfchedbs\t%v\[0-9\]*,%v2,%v0" 1 } } */
++/* { dg-final { scan-assembler-times "locrne" 5 } } */
++/* { dg-final { scan-assembler-times "locrno" 1 } } */
++
++
++int
++eq (double a, double b)
++{
++  return a == b;
++}
++
++int
++ne (double a, double b)
++{
++  return a != b;
++}
++
++int
++gt (double a, double b)
++{
++  return a > b;
++}
++
++int
++ge (double a, double b)
++{
++  return a >= b;
++}
++
++int
++lt (double a, double b)
++{
++  return a < b;
++}
++
++int
++le (double a, double b)
++{
++  return a <= b;
++}
+--- gcc/testsuite/gcc.target/s390/vector/vec-shift-1.c	1970-01-01 01:00:00.000000000 +0100
++++ gcc/testsuite/gcc.target/s390/vector/vec-shift-1.c	2016-05-11 17:12:20.000000000 +0200
+@@ -0,0 +1,108 @@
++/* { dg-do compile { target { s390*-*-* } } } */
++/* { dg-options "-O3 -mzarch -march=z13" } */
++
++/* { dg-final { scan-assembler-times "veslb" 2 } } */
++/* { dg-final { scan-assembler-times "veslh" 2 } } */
++/* { dg-final { scan-assembler-times "veslf" 2 } } */
++/* { dg-final { scan-assembler-times "veslg" 2 } } */
++
++/* { dg-final { scan-assembler-times "vesrab" 1 } } */
++/* { dg-final { scan-assembler-times "vesrah" 1 } } */
++/* { dg-final { scan-assembler-times "vesraf" 1 } } */
++/* { dg-final { scan-assembler-times "vesrag" 1 } } */
++
++/* { dg-final { scan-assembler-times "vesrlb" 1 } } */
++/* { dg-final { scan-assembler-times "vesrlh" 1 } } */
++/* { dg-final { scan-assembler-times "vesrlf" 1 } } */
++/* { dg-final { scan-assembler-times "vesrlg" 1 } } */
++
++/* { dg-final { scan-assembler-times "veslvb" 2 } } */
++/* { dg-final { scan-assembler-times "veslvh" 2 } } */
++/* { dg-final { scan-assembler-times "veslvf" 2 } } */
++/* { dg-final { scan-assembler-times "veslvg" 2 } } */
++
++/* { dg-final { scan-assembler-times "vesravb" 1 } } */
++/* { dg-final { scan-assembler-times "vesravh" 1 } } */
++/* { dg-final { scan-assembler-times "vesravf" 1 } } */
++/* { dg-final { scan-assembler-times "vesravg" 1 } } */
++
++/* { dg-final { scan-assembler-times "vesrlvb" 1 } } */
++/* { dg-final { scan-assembler-times "vesrlvh" 1 } } */
++/* { dg-final { scan-assembler-times "vesrlvf" 1 } } */
++/* { dg-final { scan-assembler-times "vesrlvg" 1 } } */
++
++typedef __attribute__((vector_size(16))) signed char v16qi;
++typedef __attribute__((vector_size(16))) unsigned char uv16qi;
++
++typedef __attribute__((vector_size(16))) signed short v8hi;
++typedef __attribute__((vector_size(16))) unsigned short uv8hi;
++
++typedef __attribute__((vector_size(16))) signed int v4si;
++typedef __attribute__((vector_size(16))) unsigned int uv4si;
++
++typedef __attribute__((vector_size(16))) signed long long v2di;
++typedef __attribute__((vector_size(16))) unsigned long long uv2di;
++
++uv16qi g_uvqi0, g_uvqi1, g_uvqi2;
++v16qi g_vqi0, g_vqi1, g_vqi2;
++
++uv8hi g_uvhi0, g_uvhi1, g_uvhi2;
++v8hi g_vhi0, g_vhi1, g_vhi2;
++
++uv4si g_uvsi0, g_uvsi1, g_uvsi2;
++v4si g_vsi0, g_vsi1, g_vsi2;
++
++uv2di g_uvdi0, g_uvdi1, g_uvdi2;
++v2di g_vdi0, g_vdi1, g_vdi2;
++
++void
++shift_left_by_scalar (int s)
++{
++  g_uvqi0 = g_uvqi1 << s;
++  g_vqi0 = g_vqi1 << s;
++  g_uvhi0 = g_uvhi1 << s;
++  g_vhi0 = g_vhi1 << s;
++  g_uvsi0 = g_uvsi1 << s;
++  g_vsi0 = g_vsi1 << s;
++  g_uvdi0 = g_uvdi1 << s;
++  g_vdi0 = g_vdi1 << s;
++}
++
++void
++shift_right_by_scalar (int s)
++{
++  g_uvqi0 = g_uvqi1 >> s;
++  g_vqi0 = g_vqi1 >> s;
++  g_uvhi0 = g_uvhi1 >> s;
++  g_vhi0 = g_vhi1 >> s;
++  g_uvsi0 = g_uvsi1 >> s;
++  g_vsi0 = g_vsi1 >> s;
++  g_uvdi0 = g_uvdi1 >> s;
++  g_vdi0 = g_vdi1 >> s;
++}
++
++void
++shift_left_by_vector ()
++{
++  g_uvqi0 = g_uvqi1 << g_uvqi2;
++  g_vqi0 = g_vqi1 << g_vqi2;
++  g_uvhi0 = g_uvhi1 << g_uvhi2;
++  g_vhi0 = g_vhi1 << g_vhi2;
++  g_uvsi0 = g_uvsi1 << g_uvsi2;
++  g_vsi0 = g_vsi1 << g_vsi2;
++  g_uvdi0 = g_uvdi1 << g_uvdi2;
++  g_vdi0 = g_vdi1 << g_vdi2;
++}
++
++void
++shift_right_by_vector ()
++{
++  g_uvqi0 = g_uvqi1 >> g_uvqi2;
++  g_vqi0 = g_vqi1 >> g_vqi2;
++  g_uvhi0 = g_uvhi1 >> g_uvhi2;
++  g_vhi0 = g_vhi1 >> g_vhi2;
++  g_uvsi0 = g_uvsi1 >> g_uvsi2;
++  g_vsi0 = g_vsi1 >> g_vsi2;
++  g_uvdi0 = g_uvdi1 >> g_uvdi2;
++  g_vdi0 = g_vdi1 >> g_vdi2;
++}
+--- gcc/testsuite/gcc.target/s390/vector/vec-sub-1.c	1970-01-01 01:00:00.000000000 +0100
++++ gcc/testsuite/gcc.target/s390/vector/vec-sub-1.c	2016-05-11 17:12:20.000000000 +0200
+@@ -0,0 +1,51 @@
++/* { dg-do compile { target { s390*-*-* } } } */
++/* { dg-options "-O3 -mzarch -march=z13" } */
++
++/* { dg-final { scan-assembler-times "vsb" 2 } } */
++/* { dg-final { scan-assembler-times "vsh" 2 } } */
++/* { dg-final { scan-assembler-times "vsf" 2 } } */
++/* { dg-final { scan-assembler-times "vsg" 2 } } */
++/* { dg-final { scan-assembler-times "vfs" 1 } } */
++
++
++typedef unsigned char     uv16qi __attribute__((vector_size(16)));
++typedef signed char        v16qi __attribute__((vector_size(16)));
++typedef unsigned short     uv8hi __attribute__((vector_size(16)));
++typedef signed short        v8hi __attribute__((vector_size(16)));
++typedef unsigned int       uv4si __attribute__((vector_size(16)));
++typedef signed int          v4si __attribute__((vector_size(16)));
++typedef unsigned long long uv2di __attribute__((vector_size(16)));
++typedef signed long long    v2di __attribute__((vector_size(16)));
++typedef double              v2df __attribute__((vector_size(16)));
++
++uv16qi g_uvqi0, g_uvqi1, g_uvqi2;
++v16qi g_vqi0, g_vqi1, g_vqi2;
++
++uv8hi g_uvhi0, g_uvhi1, g_uvhi2;
++v8hi g_vhi0, g_vhi1, g_vhi2;
++
++uv4si g_uvsi0, g_uvsi1, g_uvsi2;
++v4si g_vsi0, g_vsi1, g_vsi2;
++
++uv2di g_uvdi0, g_uvdi1, g_uvdi2;
++v2di g_vdi0, g_vdi1, g_vdi2;
++
++v2df g_vdf0, g_vdf1, g_vdf2;
++
++void
++sub1 ()
++{
++  g_vqi0 = g_vqi1 - g_vqi2;
++  g_uvqi0 = g_uvqi1 - g_uvqi2;
++
++  g_vhi0 = g_vhi1 - g_vhi2;
++  g_uvhi0 = g_uvhi1 - g_uvhi2;
++
++  g_vsi0 = g_vsi1 - g_vsi2;
++  g_uvsi0 = g_uvsi1 - g_uvsi2;
++
++  g_vdi0 = g_vdi1 - g_vdi2;
++  g_uvdi0 = g_uvdi1 - g_uvdi2;
++
++  g_vdf0 = g_vdf1 - g_vdf2;
++}
+--- gcc/testsuite/gcc.target/s390/vector/vec-vcond-1.c	1970-01-01 01:00:00.000000000 +0100
++++ gcc/testsuite/gcc.target/s390/vector/vec-vcond-1.c	2016-05-11 18:08:10.000000000 +0200
+@@ -0,0 +1,23 @@
++/* A const vector operand is forced into a register in
++   s390_expand_vcond.
++   This testcase once failed because the target mode (v2di) was picked
++   for the reg instead of the mode of the other comparison
++   operand.  */
++
++/* { dg-do compile { target { s390*-*-* } } } */
++/* { dg-options "-O3 -mzarch -march=z13" } */
++
++typedef __attribute__((vector_size(16))) long   v2di;
++typedef __attribute__((vector_size(16))) double v2df;
++
++v2di
++foo (v2df a)
++{
++  return a == (v2df){ 0.0, 0.0 };
++}
++
++v2di
++bar (v2df a)
++{
++  return (v2df){ 1.0, 1.0 } == (v2df){ 0.0, 0.0 };
++}
+--- gcc/testsuite/gcc.target/s390/vector/vec-vrepi-1.c	1970-01-01 01:00:00.000000000 +0100
++++ gcc/testsuite/gcc.target/s390/vector/vec-vrepi-1.c	2016-05-11 17:41:29.000000000 +0200
+@@ -0,0 +1,58 @@
++/* { dg-do run } */
++/* { dg-options "-O3 -mzarch -march=z13 --save-temps" } */
++/* { dg-require-effective-target vector } */
++
++typedef unsigned char     uv16qi __attribute__((vector_size(16)));
++typedef unsigned short     uv8hi __attribute__((vector_size(16)));
++typedef unsigned int       uv4si __attribute__((vector_size(16)));
++typedef unsigned long long uv2di __attribute__((vector_size(16)));
++
++uv2di __attribute__((noinline))
++foo1 ()
++{
++  return (uv2di){ 0x7f0f, 0x7f0f };
++}
++/* { dg-final { scan-assembler-times "vrepig\t%v24,32527" 1 } } */
++
++uv4si __attribute__((noinline))
++foo2 ()
++{
++  return (uv4si){ 0x7f0f, 0x7f0f, 0x7f0f, 0x7f0f };
++}
++/* { dg-final { scan-assembler-times "vrepif\t%v24,32527" 1 } } */
++
++uv8hi __attribute__((noinline))
++foo3 ()
++{
++  return (uv8hi){ 0x7f0f, 0x7f0f, 0x7f0f, 0x7f0f,
++      0x7f0f, 0x7f0f, 0x7f0f, 0x7f0f };
++}
++/* { dg-final { scan-assembler-times "vrepih\t%v24,32527" 1 } } */
++
++uv16qi __attribute__((noinline))
++foo4 ()
++{
++  return (uv16qi){ 0x77, 0x77, 0x77, 0x77,
++      0x77, 0x77, 0x77, 0x77,
++      0x77, 0x77, 0x77, 0x77,
++      0x77, 0x77, 0x77, 0x77 };
++}
++/* { dg-final { scan-assembler-times "vrepib\t%v24,119" 1 } } */
++
++int
++main ()
++{
++  if (foo1()[1] != 0x7f0f)
++    __builtin_abort ();
++
++  if (foo2()[1] != 0x7f0f)
++    __builtin_abort ();
++
++  if (foo3()[1] != 0x7f0f)
++    __builtin_abort ();
++
++  if (foo4()[1] != 0x77)
++    __builtin_abort ();
++
++  return 0;
++}
+--- gcc/testsuite/gcc.target/s390/zvector/vec-dbl-math-compile-1.c	1970-01-01 01:00:00.000000000 +0100
++++ gcc/testsuite/gcc.target/s390/zvector/vec-dbl-math-compile-1.c	2016-05-11 17:30:06.000000000 +0200
+@@ -0,0 +1,67 @@
++/* { dg-do compile { target { s390*-*-* } } } */
++/* { dg-options "-O3 -mzarch -march=z13 -mzvector --save-temps" } */
++
++/* { dg-final { scan-assembler-times "vfcedb\t" 1 } } */
++/* { dg-final { scan-assembler-times "vfchdb\t" 2 } } */
++/* { dg-final { scan-assembler-times "vfchedb\t" 2 } } */
++
++/* { dg-final { scan-assembler-times "vfcedbs\t" 2 } } */
++/* { dg-final { scan-assembler-times "vfchdbs\t" 2 } } */
++
++/* { dg-final { cleanup-saved-temps } } */
++
++#include <vecintrin.h>
++
++vector bool long long
++cmpeq (vector double a, vector double b)
++{
++  return vec_cmpeq (a, b); /* vfcedb */
++}
++
++vector bool long long
++cmpgt (vector double a, vector double b)
++{
++  return vec_cmpgt (a, b); /* vfchdb */
++}
++
++vector bool long long
++cmpge (vector double a, vector double b)
++{
++  return vec_cmpge (a, b); /* vfchedb */
++}
++
++vector bool long long
++cmplt (vector double a, vector double b)
++{
++  return vec_cmplt (a, b); /* vfchdb */
++}
++
++vector bool long long
++cmple (vector double a, vector double b)
++{
++  return vec_cmple (a, b); /* vfchedb */
++}
++
++int
++all_eq (vector double a, vector double b)
++{
++  return vec_all_eq (a, b);
++}
++
++int
++any_eq (vector double a, vector double b)
++{
++  return vec_any_eq (a, b);
++}
++
++int
++all_lt (vector double a, vector double b)
++{
++  return vec_all_lt (a, b);
++}
++
++int
++any_lt (vector double a, vector double b)
++{
++  return vec_any_lt (a, b);
++}
+--- gcc/testsuite/gcc.target/s390/zvector/vec-elem-1.c	1970-01-01 01:00:00.000000000 +0100
++++ gcc/testsuite/gcc.target/s390/zvector/vec-elem-1.c	2016-05-11 17:30:06.000000000 +0200
+@@ -0,0 +1,11 @@
++/* { dg-do compile { target { s390*-*-* } } } */
++/* { dg-options "-O3 -mzarch -march=z13 -mzvector" } */
++
++/* { dg-final { scan-assembler "nilf\t%r2,15" } } */
++/* { dg-final { scan-assembler "vlgvb" } } */
++
++signed char
++foo(unsigned char uc)
++{
++  return __builtin_s390_vec_extract((__vector signed char){ 0 }, uc);
++}
+--- gcc/testsuite/gcc.target/s390/zvector/vec-genbytemask-1.c	1970-01-01 01:00:00.000000000 +0100
++++ gcc/testsuite/gcc.target/s390/zvector/vec-genbytemask-1.c	2016-05-11 17:30:06.000000000 +0200
+@@ -0,0 +1,21 @@
++/* { dg-do compile } */
++/* { dg-options "-O3 -mzarch -march=z13 -mzvector" } */
++
++#include <vecintrin.h>
++
++
++vector unsigned char a, b, c, d;
++
++int
++foo ()
++{
++  a = vec_genmask (0);
++  b = vec_genmask (65535);
++  c = vec_genmask (43605);
++  d = vec_genmask (37830);
++}
++
++/* { dg-final { scan-assembler-times "vzero" 1 } } */
++/* { dg-final { scan-assembler-times "vone" 1 } } */
++/* { dg-final { scan-assembler-times "vgbm\t%v.*,43605" 1 } } */
++/* { dg-final { scan-assembler-times "vgbm\t%v.*,37830" 1 } } */
+--- gcc/testsuite/gcc.target/s390/zvector/vec-genmask-1.c	1970-01-01 01:00:00.000000000 +0100
++++ gcc/testsuite/gcc.target/s390/zvector/vec-genmask-1.c	2016-05-11 17:30:06.000000000 +0200
+@@ -0,0 +1,24 @@
++/* { dg-do compile } */
++/* { dg-options "-O3 -mzarch -march=z13 -mzvector" } */
++
++#include <vecintrin.h>
++
++
++vector unsigned int a, b, c, d, e, f;
++
++int
++foo ()
++{
++  a = vec_genmasks_32 (0, 31);
++  b = vec_genmasks_32 (0, 0);
++  c = vec_genmasks_32 (31, 31);
++  d = vec_genmasks_32 (5, 5);
++  e = vec_genmasks_32 (31, 0);
++  f = vec_genmasks_32 (6, 5);
++}
++/* { dg-final { scan-assembler-times "vone" 1 } } */
++/* { dg-final { scan-assembler-times "vgmf\t%v.*,0,0" 1 } } */
++/* { dg-final { scan-assembler-times "vgmf\t%v.*,31,31" 1 } } */
++/* { dg-final { scan-assembler-times "vgmf\t%v.*,5,5" 1 } } */
++/* { dg-final { scan-assembler-times "vgmf\t%v.*,31,0" 1 } } */
++/* { dg-final { scan-assembler-times "vone" 1 } } */
+--- gcc/testsuite/gcc.target/s390/zvector/vec-lcbb-1.c	1970-01-01 01:00:00.000000000 +0100
++++ gcc/testsuite/gcc.target/s390/zvector/vec-lcbb-1.c	2016-05-11 17:30:06.000000000 +0200
+@@ -0,0 +1,31 @@
++/* { dg-do compile { target { s390*-*-* } } } */
++/* { dg-options "-O3 -mzarch -march=z13 -mzvector" } */
++
++/* { dg-final { scan-assembler-times "\tlcbb\t" 4 } } */
++
++#include <vecintrin.h>
++
++/* CC will be extracted into a GPR and returned.  */
++int
++foo1 (void *ptr)
++{
++  return __lcbb (ptr, 64);
++}
++
++int
++foo2 (void *ptr)
++{
++  return __lcbb (ptr, 128) > 16;
++}
++
++int
++foo3 (void *ptr)
++{
++  return __lcbb (ptr, 256) == 16;
++}
++
++int
++foo4 (void *ptr)
++{
++  return __lcbb (ptr, 512) < 16;
++}
+--- gcc/testsuite/gcc.target/s390/zvector/vec-load_bndry-1.c	1970-01-01 01:00:00.000000000 +0100
++++ gcc/testsuite/gcc.target/s390/zvector/vec-load_bndry-1.c	2016-05-11 17:34:31.000000000 +0200
+@@ -0,0 +1,80 @@
++/* { dg-do compile { target { s390*-*-* } } } */
++/* { dg-options "-O0 -mzarch -march=z13 -mzvector" } */
++
++#include <vecintrin.h>
++
++signed char
++foo64 (signed char *p)
++{
++  return vec_load_bndry (p, 64)[0];
++  /* { dg-final { scan-assembler-times "\tvlbb\t%v..?,0\\(%r..?\\),0" 1 } } */
++}
++
++signed char
++foo128 (signed char *p)
++{
++  return
++    vec_load_bndry (p, 128)[0]
++    + vec_load_bndry (p + 16, 128)[0];
++  /* { dg-final { scan-assembler-times "\tvlbb\t%v..?,0\\(%r..?\\),1" 2 } } */
++}
++
++signed char
++foo256 (signed char *p)
++{
++  return
++    vec_load_bndry (p, 256)[0]
++    + vec_load_bndry (p + 16, 256)[0]
++    + vec_load_bndry (p + 32, 256)[0];
++  /* { dg-final { scan-assembler-times "\tvlbb\t%v..?,0\\(%r..?\\),2" 3 } } */
++}
++
++signed char
++foo512 (signed char *p)
++{
++  return
++    vec_load_bndry (p, 512)[0]
++    + vec_load_bndry (p + 16, 512)[0]
++    + vec_load_bndry (p + 32, 512)[0]
++    + vec_load_bndry (p + 48, 512)[0];
++  /* { dg-final { scan-assembler-times "\tvlbb\t%v..?,0\\(%r..?\\),3" 4 } } */
++}
++
++signed char
++foo1024 (signed char *p)
++{
++  return
++    vec_load_bndry (p, 1024)[0]
++    + vec_load_bndry (p + 16, 1024)[0]
++    + vec_load_bndry (p + 32, 1024)[0]
++    + vec_load_bndry (p + 48, 1024)[0]
++    + vec_load_bndry (p + 64, 1024)[0];
++  /* { dg-final { scan-assembler-times "\tvlbb\t%v..?,0\\(%r..?\\),4" 5 } } */
++}
++
++signed char
++foo2048 (signed char *p)
++{
++  return
++    vec_load_bndry (p, 2048)[0]
++    + vec_load_bndry (p + 16, 2048)[0]
++    + vec_load_bndry (p + 32, 2048)[0]
++    + vec_load_bndry (p + 48, 2048)[0]
++    + vec_load_bndry (p + 64, 2048)[0]
++    + vec_load_bndry (p + 80, 2048)[0];
++  /* { dg-final { scan-assembler-times "\tvlbb\t%v..?,0\\(%r..?\\),5" 6 } } */
++}
++
++signed char
++foo4096 (signed char *p)
++{
++  return
++    vec_load_bndry (p, 4096)[0]
++    + vec_load_bndry (p + 16, 4096)[0]
++    + vec_load_bndry (p + 32, 4096)[0]
++    + vec_load_bndry (p + 48, 4096)[0]
++    + vec_load_bndry (p + 64, 4096)[0]
++    + vec_load_bndry (p + 80, 4096)[0]
++    + vec_load_bndry (p + 96, 4096)[0];
++  /* { dg-final { scan-assembler-times "\tvlbb\t%v..?,0\\(%r..?\\),6" 7 } } */
++}
+--- gcc/testsuite/gcc.target/s390/zvector/vec-overloading-1.c	1970-01-01 01:00:00.000000000 +0100
++++ gcc/testsuite/gcc.target/s390/zvector/vec-overloading-1.c	2016-05-11 17:30:06.000000000 +0200
+@@ -0,0 +1,77 @@
++/* Test whether overloading works as expected.  */
++
++/* { dg-do compile { target { s390*-*-* } } } */
++/* { dg-options "-march=z13 -mzarch -mzvector -fdump-tree-original" } */
++
++__vector int var_v4si;
++__vector unsigned var_uv4si;
++__vector bool var_bv4si;
++__vector long long var_v2di;
++__vector unsigned long long var_uv2di;
++__vector bool long long var_bv2di;
++__vector double var_v2df;
++
++int *intptr;
++unsigned *uintptr;
++double *dblptr;
++unsigned long long ull;
++const int *cintptr;
++long long* llptr;
++unsigned long long* ullptr;
++
++typedef __vector int v4si;
++typedef __vector unsigned int uv4si;
++
++v4si var2_v4si;
++uv4si var2_uv4si;
++
++void
++foo ()
++{
++  __builtin_s390_vec_scatter_element (var_v4si,  var_uv4si, intptr, (unsigned long long)0);
++  __builtin_s390_vec_scatter_element (var2_v4si, var2_uv4si, intptr, (unsigned long long)0);
++  __builtin_s390_vec_scatter_element (var_bv4si, var_uv4si, uintptr, (unsigned long long)0);
++  __builtin_s390_vec_scatter_element (var_uv4si, var_uv4si, uintptr, (unsigned long long)0);
++  __builtin_s390_vec_scatter_element (var_v2di,  var_uv2di, llptr, (unsigned long long)0);
++  __builtin_s390_vec_scatter_element (var_bv2di, var_uv2di, ullptr, (unsigned long long)0);
++  __builtin_s390_vec_scatter_element (var_uv2di, var_uv2di, ullptr, (unsigned long long)0);
++  __builtin_s390_vec_scatter_element (var_v2df,  var_uv2di, dblptr, (unsigned long long)0);
++
++  /* While the last argument is a int there is a way to convert it to
++     unsigned long long, so this variant is supposed to match.  */
++ __builtin_s390_vec_scatter_element (var_v4si,  var_uv4si, intptr, 0);
++
++  __builtin_s390_vec_insert_and_zero (intptr);
++  __builtin_s390_vec_insert_and_zero (cintptr);
++
++  __builtin_s390_vec_promote ((signed char)1, 1);
++  __builtin_s390_vec_promote ((unsigned char)1, 1);
++  __builtin_s390_vec_promote ((short int)1, 1);
++  __builtin_s390_vec_promote ((unsigned short int)1, 1);
++  __builtin_s390_vec_promote ((int)1, 1);
++  __builtin_s390_vec_promote ((unsigned)1, 1);
++  __builtin_s390_vec_promote ((long long)1, 1);
++  __builtin_s390_vec_promote ((unsigned long long)1, 1);
++  __builtin_s390_vec_promote ((double)1, 1);
++
++  /* This is supposed to match vec_promote_s32 */
++  __builtin_s390_vec_promote (1, (signed char) -1);
++
++  /* Constants in C usually are considered int.  */
++  __builtin_s390_vec_promote (1, 1);
++
++  /* And (unsigned) long if they are too big for int.  */
++  __builtin_s390_vec_promote (1ULL << 32, 1);
++  __builtin_s390_vec_promote (1LL << 32, 1);
++}
++
++/* { dg-final { scan-tree-dump-times "__builtin_s390_vscef " 5 "original" } } */
++/* { dg-final { scan-tree-dump-times "__builtin_s390_vsceg " 4 "original" } } */
++
++/* { dg-final { scan-tree-dump-times "__builtin_s390_vllezf " 2 "original" } } */
++
++/* { dg-final { scan-tree-dump-times "__builtin_s390_vlvgb_noin " 2 "original" } } */
++/* { dg-final { scan-tree-dump-times "__builtin_s390_vlvgh_noin " 2 "original" } } */
++/* { dg-final { scan-tree-dump-times "__builtin_s390_vlvgf_noin " 4 "original" } } */
++/* { dg-final { scan-tree-dump-times "__builtin_s390_vlvgg_noin " 4 "original" } } */
++/* { dg-final { scan-tree-dump-times "__builtin_s390_vlvgg_dbl_noin " 1 "original" } } */
+--- gcc/testsuite/gcc.target/s390/zvector/vec-overloading-2.c	1970-01-01 01:00:00.000000000 +0100
++++ gcc/testsuite/gcc.target/s390/zvector/vec-overloading-2.c	2016-05-11 17:30:06.000000000 +0200
+@@ -0,0 +1,54 @@
++/* Test whether overloading works as expected.  */
++
++/* { dg-do compile { target { s390*-*-* } } } */
++/* { dg-options "-march=z13 -mzarch -mzvector" } */
++
++__vector int v4si;
++__vector unsigned uv4si;
++__vector bool bv4si;
++__vector long long v2di;
++__vector unsigned long long uv2di;
++__vector bool long long bv2di;
++__vector double v2df;
++int *intptr;
++unsigned *uintptr;
++double *dblptr;
++long long ll;
++unsigned long long ull;
++const int *cintptr;
++long long* llptr;
++unsigned long long* ullptr;
++
++void
++foo ()
++{
++  __builtin_s390_vec_scatter_element (v4si,  uv4si, (int*)0, 0); /* ok */
++  __builtin_s390_vec_insert_and_zero (intptr); /* ok */
++
++  /* The unsigned pointer must not match the signed pointer.  */
++  __builtin_s390_vec_scatter_element (v4si, uv4si, uintptr, 0); /* { dg-error "invalid parameter combination for intrinsic" } */
++
++  /* Make sure signed int pointers don't match unsigned int pointers.  */
++  __builtin_s390_vec_scatter_element (bv4si, uv4si, intptr, 0); /* { dg-error "invalid parameter combination for intrinsic" } */
++
++  /* Const pointers do not match unqualified operands.  */
++  __builtin_s390_vec_scatter_element (v4si, uv4si, cintptr, 0); /* { dg-error "invalid parameter combination for intrinsic" } */
++
++  /* Volatile pointers do not match unqualified operands.  */
++  __builtin_s390_vec_scatter_element (v4si, uv4si, cintptr, 0); /* { dg-error "invalid parameter combination for intrinsic" } */
++
++  /* The third operands needs to be double *.  */
++  __builtin_s390_vec_scatter_element (v2df, uv4si, intptr, 0); /* { dg-error "invalid parameter combination for intrinsic" } */
++
++  /* This is an ambigious overload.  */
++  __builtin_s390_vec_scatter_element (v4si, uv4si, 0, 0); /* { dg-error "invalid parameter combination for intrinsic" } */
++
++  /* Pointer to vector must not match.  */
++  __builtin_s390_vec_scatter_element (v4si, uv4si, &v4si, 0); /* { dg-error "invalid parameter combination for intrinsic" } */
++
++  /* Don't accept const int* for int*.  */
++  __builtin_s390_vec_scatter_element (v4si,  uv4si, cintptr, 0); /* { dg-error "invalid parameter combination for intrinsic" } */
++
++  __builtin_s390_vec_load_pair (ll, ull); /* { dg-error "ambiguous overload for intrinsic" } */
++  __builtin_s390_vec_load_pair (ull, ll); /* { dg-error "ambiguous overload for intrinsic" } */
++}
+--- gcc/testsuite/gcc.target/s390/zvector/vec-overloading-3.c	1970-01-01 01:00:00.000000000 +0100
++++ gcc/testsuite/gcc.target/s390/zvector/vec-overloading-3.c	2016-05-11 17:30:06.000000000 +0200
+@@ -0,0 +1,19 @@
++/* Check for error messages supposed to be issued during overloading.  */
++
++/* { dg-do compile { target { s390*-*-* } } } */
++/* { dg-options "-march=z13 -mzarch -mzvector" } */
++
++__vector int v4si;
++__vector unsigned uv4si;
++
++int *intptr;
++unsigned long long ull;
++const unsigned int *ucintptr;
++
++void
++foo ()
++{
++  /* A backend check makes sure the forth operand is a literal.  */
++  __builtin_s390_vec_gather_element (uv4si, uv4si, ucintptr, 256); /* { dg-error "constant argument 4 for builtin.*is out of range for target type" } */
++  __builtin_s390_vec_gather_element (uv4si, uv4si, ucintptr, 5); /* { dg-error "constant argument 4 for builtin.*is out of range" } */
++}
+--- gcc/testsuite/gcc.target/s390/zvector/vec-overloading-4.c	1970-01-01 01:00:00.000000000 +0100
++++ gcc/testsuite/gcc.target/s390/zvector/vec-overloading-4.c	2016-05-11 17:30:06.000000000 +0200
+@@ -0,0 +1,18 @@
++/* Check for error messages supposed to be issued during builtin expansion.  */
++
++/* { dg-do compile { target { s390*-*-* } } } */
++/* { dg-options "-march=z13 -mzarch -mzvector" } */
++
++__vector int v4si;
++__vector unsigned uv4si;
++
++int *intptr;
++unsigned long long ull;
++const unsigned int *ucintptr;
++
++void
++foo ()
++{
++  /* A backend check makes sure the forth operand is a literal.  */
++  __builtin_s390_vec_scatter_element (v4si, uv4si, intptr, ull); /* { dg-error "constant value required for builtin" } */
++}
+--- gcc/testsuite/gcc.target/s390/zvector/vec-splat-1.c	1970-01-01 01:00:00.000000000 +0100
++++ gcc/testsuite/gcc.target/s390/zvector/vec-splat-1.c	2016-05-11 17:41:24.000000000 +0200
+@@ -0,0 +1,42 @@
++/* { dg-do compile } */
++/* { dg-options "-O3 -mzarch -march=z13 -mzvector" } */
++
++#include <vecintrin.h>
++
++vector signed char v16qi;
++vector short       v8hi;
++vector int         v4si;
++vector long long   v2di;
++
++vector unsigned char      uv16qi;
++vector unsigned short     uv8hi;
++vector unsigned int       uv4si;
++vector unsigned long long uv2di;
++
++int
++foo ()
++{
++  v16qi  = vec_splats ((signed char)0x77);
++  uv16qi = vec_splats ((unsigned char)0x77);
++
++  v8hi  = vec_splats ((short int)0x7f0f);
++  uv8hi = vec_splats ((unsigned short int)0x7f0f);
++
++  v4si  = vec_splats ((int)0x7f0f);
++  uv4si = vec_splats ((unsigned int)0x7f0f);
++
++  v2di  = vec_splats ((long long)0x7f0f);
++  uv2di = vec_splats ((unsigned long long)0x7f0f);
++}
++
++/* { dg-final { scan-assembler-times "vrepib\t%v.*,119" 1 } } */
++/* { dg-final { scan-assembler-times "vrepib\t%v.*,119" 1 } } */
++
++/* { dg-final { scan-assembler-times "vrepih\t%v.*,32527" 1 } } */
++/* { dg-final { scan-assembler-times "vrepih\t%v.*,32527" 1 } } */
++
++/* { dg-final { scan-assembler-times "vrepif\t%v.*,32527" 1 } } */
++/* { dg-final { scan-assembler-times "vrepif\t%v.*,32527" 1 } } */
++
++/* { dg-final { scan-assembler-times "vrepig\t%v.*,32527" 1 } } */
++/* { dg-final { scan-assembler-times "vrepig\t%v.*,32527" 1 } } */
+--- gcc/testsuite/gcc.target/s390/zvector/vec-splat-2.c	1970-01-01 01:00:00.000000000 +0100
++++ gcc/testsuite/gcc.target/s390/zvector/vec-splat-2.c	2016-05-11 17:53:39.000000000 +0200
+@@ -0,0 +1,42 @@
++/* { dg-do compile } */
++/* { dg-options "-O3 -mzarch -march=z13 -mzvector" } */
++
++#include <vecintrin.h>
++
++vector signed char v16qi;
++vector short       v8hi;
++vector int         v4si;
++vector long long   v2di;
++
++vector unsigned char      uv16qi;
++vector unsigned short     uv8hi;
++vector unsigned int       uv4si;
++vector unsigned long long uv2di;
++
++int
++foo ()
++{
++  v16qi  = vec_splat_s8 (-112);
++  uv16qi = vec_splat_u8 (215);
++
++  v8hi  = vec_splat_s16 (-32000);
++  uv8hi = vec_splat_u16 (64000);
++
++  v4si  = vec_splat_s32 (-32000);
++  uv4si = vec_splat_u32 (64000);
++
++  v2di  = vec_splat_s64 (-32000);
++  uv2di = vec_splat_u64 (64000);
++}
++
++/* { dg-final { scan-assembler-times "vrepib\t%v.*,-112" 1 } } */
++/* { dg-final { scan-assembler-times "vrepib\t%v.*,-41" 1 } } */
++
++/* { dg-final { scan-assembler-times "vrepih\t%v.*,-32000" 1 } } */
++/* { dg-final { scan-assembler-times "vrepih\t%v.*,-1536" 1 } } */
++
++/* { dg-final { scan-assembler-times "vrepif\t%v.*,-32000" 1 } } */
++/* { dg-final { scan-assembler-times "vrepif\t%v.*,-1536" 1 } } */
++
++/* { dg-final { scan-assembler-times "vrepig\t%v.*,-32000" 1 } } */
++/* { dg-final { scan-assembler-times "vrepig\t%v.*,-1536" 1 } } */
+--- gcc/testsuite/gcc.target/s390/zvector/vec-test-mask-1.c	1970-01-01 01:00:00.000000000 +0100
++++ gcc/testsuite/gcc.target/s390/zvector/vec-test-mask-1.c	2016-05-11 17:30:06.000000000 +0200
+@@ -0,0 +1,25 @@
++/* { dg-do compile { target { s390*-*-* } } } */
++/* { dg-options "-O3 -mzarch -march=z13 -mzvector" } */
++
++/* { dg-final { scan-assembler-times "vtm" 2 } } */
++/* { dg-final { scan-assembler-times "ipm" 1 } } */
++
++#include <vecintrin.h>
++
++/* CC will be extracted into a GPR and returned.  */
++int
++foo (vector unsigned int a, vector unsigned b)
++{
++  return vec_test_mask (a, b);
++}
++
++extern void baz (void);
++
++/* In that case the ipm/srl is supposed to optimized out by
++   combine/s390_canonicalize_comparison.  */
++int
++bar (vector unsigned int a, vector unsigned b)
++{
++  if (vec_test_mask (a, b) == 2)
++    baz ();
++}
+--- gcc/testsuite/lib/target-supports.exp	2015-06-18 16:32:16.000000000 +0200
++++ gcc/testsuite/lib/target-supports.exp	2016-05-11 17:32:08.000000000 +0200
+@@ -3800,7 +3800,8 @@ proc check_effective_target_vect_natural
+         verbose "check_effective_target_vect_natural_alignment: using cached result" 2
+     } else {
+         set et_vect_natural_alignment_saved 1
+-        if { [check_effective_target_arm_eabi] } {
++        if { [check_effective_target_arm_eabi]
++	     || [istarget s390*-*-*] } {
+             set et_vect_natural_alignment_saved 0
+         }
+     }
diff --git a/SPECS/gcc.spec b/SPECS/gcc.spec
index d243c09..5675774 100644
--- a/SPECS/gcc.spec
+++ b/SPECS/gcc.spec
@@ -2,7 +2,7 @@
 %global SVNREV 225304
 # Note, gcc_release must be integer, if you want to add suffixes to
 # %{release}, append them after %{gcc_release} on Release: line.
-%global gcc_release 4
+%global gcc_release 11
 %global _unpackaged_files_terminate_build 0
 %global _performance_build 1
 %global multilib_64_archs sparc64 ppc64 ppc64p7 s390x x86_64
@@ -218,6 +218,21 @@ Patch18: gcc48-aarch64-ada.patch
 Patch19: gcc48-aarch64-async-unw-tables.patch
 Patch20: gcc48-aarch64-unwind-opt.patch
 Patch21: gcc48-rh1243366.patch
+Patch22: gcc48-rh1180633.patch
+Patch23: gcc48-rh1278872.patch
+Patch24: gcc48-pr67281.patch
+Patch25: gcc48-pr68680.patch
+Patch26: gcc48-rh1312436.patch
+Patch27: gcc48-pr53477.patch
+Patch28: gcc48-rh1296211.patch
+Patch29: gcc48-rh1304449.patch
+Patch30: gcc48-s390-z13.patch
+Patch31: gcc48-rh1312850.patch
+Patch32: gcc48-pr65142.patch
+Patch33: gcc48-pr52714.patch
+Patch34: gcc48-rh1344807.patch
+Patch35: gcc48-libgomp-20160715.patch
+Patch36: gcc48-pr63293.patch
 
 Patch1000: fastjar-0.97-segfault.patch
 Patch1001: fastjar-0.97-len1.patch
@@ -916,6 +931,22 @@ rm -f libgo/go/crypto/elliptic/p224{,_test}.go
 %patch19 -p0 -b .aarch64-async-unw-tables~
 %patch20 -p0 -b .aarch64-unwind-opt~
 %patch21 -p0 -b .rh1243366~
+%patch22 -p0 -b .rh1180633~
+%patch23 -p0 -b .rh1278872~
+%patch24 -p0 -b .pr67281~
+%patch25 -p0 -b .pr68680~
+%patch26 -p0 -b .rh1312436~
+%patch27 -p0 -b .pr53477~
+touch -r %{PATCH27} libstdc++-v3/python/libstdcxx/v6/printers.py
+%patch28 -p0 -b .rh1296211~
+%patch29 -p0 -b .rh1304449~
+%patch30 -p0 -b .s390-z13~
+%patch31 -p0 -b .rh1312850~
+%patch32 -p0 -b .pr65142~
+%patch33 -p0 -b .pr52714~
+%patch34 -p0 -b .rh1344807~
+%patch35 -p0 -b .libgomp-20160715~
+%patch36 -p0 -b .pr63293~
 
 %if 0%{?_enable_debug_packages}
 cat > split-debuginfo.sh <<\EOF
@@ -1229,7 +1260,7 @@ CC="$CC" CFLAGS="$OPT_FLAGS" \
 	--host=%{gcc_target_platform} --build=%{gcc_target_platform} --target=%{gcc_target_platform} --with-cpu=v7
 %endif
 %ifarch ppc64le
-       --with-cpu-64=power7 --with-tune-64=power8 \
+       --with-cpu-64=power8 --with-tune-64=power8 \
 %endif
 %ifarch ppc ppc64 ppc64p7
 %if 0%{?rhel} >= 7
@@ -2381,6 +2412,7 @@ fi
 %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_version}/include/fxsrintrin.h
 %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_version}/include/xsaveintrin.h
 %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_version}/include/xsaveoptintrin.h
+%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_version}/include/pkuintrin.h
 %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_version}/include/mm_malloc.h
 %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_version}/include/mm3dnow.h
 %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_version}/include/cpuid.h
@@ -2416,6 +2448,7 @@ fi
 %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_version}/include/s390intrin.h
 %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_version}/include/htmintrin.h
 %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_version}/include/htmxlintrin.h
+%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_version}/include/vecintrin.h
 %endif
 %{_prefix}/libexec/gcc/%{gcc_target_platform}/%{gcc_version}/collect2
 %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_version}/crt*.o
@@ -3357,6 +3390,45 @@ fi
 %{_prefix}/libexec/gcc/%{gcc_target_platform}/%{gcc_version}/plugin
 
 %changelog
+* Wed Aug 31 2016 Jakub Jelinek <jakub@redhat.com> 4.8.5-11
+- on aarch64 emit scheduling barriers before stack deallocation in
+  function epilogues (#1362635, PR target/63293)
+
+* Wed Aug 10 2016 Jakub Jelinek <jakub@redhat.com> 4.8.5-10
+- include vecintrin.h intrinsic header on s390 (#1182152)
+
+* Fri Jul 15 2016 Jakub Jelinek <jakub@redhat.com> 4.8.5-9
+- backport OpenMP 4.5 support to libgomp (library only; #1357060,
+  PRs libgomp/68579, libgomp/64625)
+
+* Wed Jun 15 2016 Jakub Jelinek <jakub@redhat.com> 4.8.5-8
+- fix a bug in C++ ref-to-ptr conversion (#1344807)
+- fix combiner handling of jumps on aarch64 (#1344672,
+  PR rtl-optimization/52714)
+
+* Thu Jun  9 2016 Jakub Jelinek <jakub@redhat.com> 4.8.5-7
+- ensure the timestamp on printers.py is always the same (#1344291)
+
+* Mon Jun  6 2016 Jakub Jelinek <jakub@redhat.com> 4.8.5-6
+- backport s390 z13 support (#1182152)
+- fix up -fsanitize=address on powerpc64 with 46-bit virtual address space
+  (#1312850)
+- throw exception on std::random_device::_M_getval() failure (#1262846,
+  PR libstdc++/65142, CVE-2015-5276)
+
+* Tue May 10 2016 Jakub Jelinek <jakub@redhat.com> 4.8.5-5
+- fix up libitm HTM fastpath (#1180633)
+- on ppc64le default to -mcpu=power8 instead of -mcpu=power7 (#1213268)
+- fix up size in .debug_pubnames (#1278872)
+- turn powerpc* HTM insns into memory barriers (#1282755, PR target/67281)
+- make sure to handle __builtin_alloca_with_align like alloca in
+  -fstack-protector* (#1289022, PR tree-optimization/68680)
+- improve DW_AT_abstract_origin of DW_TAG_GNU_call_site on s390 with -fPIC
+  (#1312436)
+- fix up libstdc++ pretty-printers (#1076690, PR libstdc++/53477)
+- don't pass explicit --oformat option to ld on powerpc* (#1296211)
+- backport Intel Memory Protection Keys ISA support - -mpku (#1304449)
+
 * Wed Jul 15 2015 Jakub Jelinek <jakub@redhat.com> 4.8.5-4
 - fix up basic_streambuf copy constructor and assignment operator
   (#1243366)