From ee890f87ea2ed076d1cc948c74c926591812d0d2 Mon Sep 17 00:00:00 2001 From: CentOS Sources Date: Nov 03 2016 06:03:47 +0000 Subject: import gcc-4.8.5-11.el7 --- diff --git a/SOURCES/gcc48-libgomp-20160715.patch b/SOURCES/gcc48-libgomp-20160715.patch new file mode 100644 index 0000000..9b6a61e --- /dev/null +++ b/SOURCES/gcc48-libgomp-20160715.patch @@ -0,0 +1,10653 @@ +--- libgomp/config/linux/wait.h.jj 2013-01-31 20:29:10.091548989 +0100 ++++ libgomp/config/linux/wait.h 2016-07-13 16:57:18.902355979 +0200 +@@ -34,13 +34,13 @@ + + #define FUTEX_WAIT 0 + #define FUTEX_WAKE 1 +-#define FUTEX_PRIVATE_FLAG 128L ++#define FUTEX_PRIVATE_FLAG 128 + + #ifdef HAVE_ATTRIBUTE_VISIBILITY + # pragma GCC visibility push(hidden) + #endif + +-extern long int gomp_futex_wait, gomp_futex_wake; ++extern int gomp_futex_wait, gomp_futex_wake; + + #include + +@@ -48,7 +48,9 @@ static inline int do_spin (int *addr, in + { + unsigned long long i, count = gomp_spin_count_var; + +- if (__builtin_expect (gomp_managed_threads > gomp_available_cpus, 0)) ++ if (__builtin_expect (__atomic_load_n (&gomp_managed_threads, ++ MEMMODEL_RELAXED) ++ > gomp_available_cpus, 0)) + count = gomp_throttled_spin_count_var; + for (i = 0; i < count; i++) + if (__builtin_expect (__atomic_load_n (addr, MEMMODEL_RELAXED) != val, 0)) +--- libgomp/config/linux/affinity.c.jj 2014-05-15 10:56:37.499502573 +0200 ++++ libgomp/config/linux/affinity.c 2016-07-13 16:57:18.902355979 +0200 +@@ -352,6 +352,45 @@ gomp_affinity_print_place (void *p) + fprintf (stderr, ":%lu", len); + } + ++int ++omp_get_place_num_procs (int place_num) ++{ ++ if (place_num < 0 || place_num >= gomp_places_list_len) ++ return 0; ++ ++ cpu_set_t *cpusetp = (cpu_set_t *) gomp_places_list[place_num]; ++ return gomp_cpuset_popcount (gomp_cpuset_size, cpusetp); ++} ++ ++void ++omp_get_place_proc_ids (int place_num, int *ids) ++{ ++ if (place_num < 0 || place_num >= gomp_places_list_len) ++ return; ++ ++ cpu_set_t *cpusetp = (cpu_set_t *) gomp_places_list[place_num]; ++ unsigned long i, max = 8 * gomp_cpuset_size; ++ for (i = 0; i < max; i++) ++ if (CPU_ISSET_S (i, gomp_cpuset_size, cpusetp)) ++ *ids++ = i; ++} ++ ++void ++gomp_get_place_proc_ids_8 (int place_num, int64_t *ids) ++{ ++ if (place_num < 0 || place_num >= gomp_places_list_len) ++ return; ++ ++ cpu_set_t *cpusetp = (cpu_set_t *) gomp_places_list[place_num]; ++ unsigned long i, max = 8 * gomp_cpuset_size; ++ for (i = 0; i < max; i++) ++ if (CPU_ISSET_S (i, gomp_cpuset_size, cpusetp)) ++ *ids++ = i; ++} ++ ++ialias(omp_get_place_num_procs) ++ialias(omp_get_place_proc_ids) ++ + #else + + #include "../posix/affinity.c" +--- libgomp/config/linux/mutex.c.jj 2013-01-21 16:00:38.220917670 +0100 ++++ libgomp/config/linux/mutex.c 2016-07-13 16:57:18.870356375 +0200 +@@ -28,8 +28,8 @@ + + #include "wait.h" + +-long int gomp_futex_wake = FUTEX_WAKE | FUTEX_PRIVATE_FLAG; +-long int gomp_futex_wait = FUTEX_WAIT | FUTEX_PRIVATE_FLAG; ++int gomp_futex_wake = FUTEX_WAKE | FUTEX_PRIVATE_FLAG; ++int gomp_futex_wait = FUTEX_WAIT | FUTEX_PRIVATE_FLAG; + + void + gomp_mutex_lock_slow (gomp_mutex_t *mutex, int oldval) +--- libgomp/config/posix/affinity.c.jj 2014-05-15 10:56:37.987498844 +0200 ++++ libgomp/config/posix/affinity.c 2016-07-15 12:08:28.410015743 +0200 +@@ -113,3 +113,27 @@ gomp_affinity_print_place (void *p) + { + (void) p; + } ++ ++int ++omp_get_place_num_procs (int place_num) ++{ ++ (void) place_num; ++ return 0; ++} ++ ++void ++omp_get_place_proc_ids (int place_num, int *ids) ++{ ++ (void) place_num; ++ (void) ids; ++} ++ ++void ++gomp_get_place_proc_ids_8 (int place_num, int64_t *ids) ++{ ++ (void) place_num; ++ (void) ids; ++} ++ ++ialias(omp_get_place_num_procs) ++ialias(omp_get_place_proc_ids) +--- libgomp/loop_ull.c.jj 2013-01-21 16:00:46.477871806 +0100 ++++ libgomp/loop_ull.c 2016-07-13 16:57:18.918355780 +0200 +@@ -174,15 +174,15 @@ GOMP_loop_ull_runtime_start (bool up, go + { + case GFS_STATIC: + return gomp_loop_ull_static_start (up, start, end, incr, +- icv->run_sched_modifier, ++ icv->run_sched_chunk_size, + istart, iend); + case GFS_DYNAMIC: + return gomp_loop_ull_dynamic_start (up, start, end, incr, +- icv->run_sched_modifier, ++ icv->run_sched_chunk_size, + istart, iend); + case GFS_GUIDED: + return gomp_loop_ull_guided_start (up, start, end, incr, +- icv->run_sched_modifier, ++ icv->run_sched_chunk_size, + istart, iend); + case GFS_AUTO: + /* For now map to schedule(static), later on we could play with feedback +@@ -278,15 +278,15 @@ GOMP_loop_ull_ordered_runtime_start (boo + { + case GFS_STATIC: + return gomp_loop_ull_ordered_static_start (up, start, end, incr, +- icv->run_sched_modifier, ++ icv->run_sched_chunk_size, + istart, iend); + case GFS_DYNAMIC: + return gomp_loop_ull_ordered_dynamic_start (up, start, end, incr, +- icv->run_sched_modifier, ++ icv->run_sched_chunk_size, + istart, iend); + case GFS_GUIDED: + return gomp_loop_ull_ordered_guided_start (up, start, end, incr, +- icv->run_sched_modifier, ++ icv->run_sched_chunk_size, + istart, iend); + case GFS_AUTO: + /* For now map to schedule(static), later on we could play with feedback +@@ -298,6 +298,114 @@ GOMP_loop_ull_ordered_runtime_start (boo + } + } + ++/* The *_doacross_*_start routines are similar. The only difference is that ++ this work-share construct is initialized to expect an ORDERED(N) - DOACROSS ++ section, and the worksharing loop iterates always from 0 to COUNTS[0] - 1 ++ and other COUNTS array elements tell the library number of iterations ++ in the ordered inner loops. */ ++ ++static bool ++gomp_loop_ull_doacross_static_start (unsigned ncounts, gomp_ull *counts, ++ gomp_ull chunk_size, gomp_ull *istart, ++ gomp_ull *iend) ++{ ++ struct gomp_thread *thr = gomp_thread (); ++ ++ thr->ts.static_trip = 0; ++ if (gomp_work_share_start (false)) ++ { ++ gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1, ++ GFS_STATIC, chunk_size); ++ gomp_doacross_ull_init (ncounts, counts, chunk_size); ++ gomp_work_share_init_done (); ++ } ++ ++ return !gomp_iter_ull_static_next (istart, iend); ++} ++ ++static bool ++gomp_loop_ull_doacross_dynamic_start (unsigned ncounts, gomp_ull *counts, ++ gomp_ull chunk_size, gomp_ull *istart, ++ gomp_ull *iend) ++{ ++ struct gomp_thread *thr = gomp_thread (); ++ bool ret; ++ ++ if (gomp_work_share_start (false)) ++ { ++ gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1, ++ GFS_DYNAMIC, chunk_size); ++ gomp_doacross_ull_init (ncounts, counts, chunk_size); ++ gomp_work_share_init_done (); ++ } ++ ++#if defined HAVE_SYNC_BUILTINS && defined __LP64__ ++ ret = gomp_iter_ull_dynamic_next (istart, iend); ++#else ++ gomp_mutex_lock (&thr->ts.work_share->lock); ++ ret = gomp_iter_ull_dynamic_next_locked (istart, iend); ++ gomp_mutex_unlock (&thr->ts.work_share->lock); ++#endif ++ ++ return ret; ++} ++ ++static bool ++gomp_loop_ull_doacross_guided_start (unsigned ncounts, gomp_ull *counts, ++ gomp_ull chunk_size, gomp_ull *istart, ++ gomp_ull *iend) ++{ ++ struct gomp_thread *thr = gomp_thread (); ++ bool ret; ++ ++ if (gomp_work_share_start (false)) ++ { ++ gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1, ++ GFS_GUIDED, chunk_size); ++ gomp_doacross_ull_init (ncounts, counts, chunk_size); ++ gomp_work_share_init_done (); ++ } ++ ++#if defined HAVE_SYNC_BUILTINS && defined __LP64__ ++ ret = gomp_iter_ull_guided_next (istart, iend); ++#else ++ gomp_mutex_lock (&thr->ts.work_share->lock); ++ ret = gomp_iter_ull_guided_next_locked (istart, iend); ++ gomp_mutex_unlock (&thr->ts.work_share->lock); ++#endif ++ ++ return ret; ++} ++ ++bool ++GOMP_loop_ull_doacross_runtime_start (unsigned ncounts, gomp_ull *counts, ++ gomp_ull *istart, gomp_ull *iend) ++{ ++ struct gomp_task_icv *icv = gomp_icv (false); ++ switch (icv->run_sched_var) ++ { ++ case GFS_STATIC: ++ return gomp_loop_ull_doacross_static_start (ncounts, counts, ++ icv->run_sched_chunk_size, ++ istart, iend); ++ case GFS_DYNAMIC: ++ return gomp_loop_ull_doacross_dynamic_start (ncounts, counts, ++ icv->run_sched_chunk_size, ++ istart, iend); ++ case GFS_GUIDED: ++ return gomp_loop_ull_doacross_guided_start (ncounts, counts, ++ icv->run_sched_chunk_size, ++ istart, iend); ++ case GFS_AUTO: ++ /* For now map to schedule(static), later on we could play with feedback ++ driven choice. */ ++ return gomp_loop_ull_doacross_static_start (ncounts, counts, ++ 0, istart, iend); ++ default: ++ abort (); ++ } ++} ++ + /* The *_next routines are called when the thread completes processing of + the iteration block currently assigned to it. If the work-share + construct is bound directly to a parallel construct, then the iteration +@@ -457,6 +565,10 @@ extern __typeof(gomp_loop_ull_dynamic_st + __attribute__((alias ("gomp_loop_ull_dynamic_start"))); + extern __typeof(gomp_loop_ull_guided_start) GOMP_loop_ull_guided_start + __attribute__((alias ("gomp_loop_ull_guided_start"))); ++extern __typeof(gomp_loop_ull_dynamic_start) GOMP_loop_ull_nonmonotonic_dynamic_start ++ __attribute__((alias ("gomp_loop_ull_dynamic_start"))); ++extern __typeof(gomp_loop_ull_guided_start) GOMP_loop_ull_nonmonotonic_guided_start ++ __attribute__((alias ("gomp_loop_ull_guided_start"))); + + extern __typeof(gomp_loop_ull_ordered_static_start) GOMP_loop_ull_ordered_static_start + __attribute__((alias ("gomp_loop_ull_ordered_static_start"))); +@@ -465,12 +577,23 @@ extern __typeof(gomp_loop_ull_ordered_dy + extern __typeof(gomp_loop_ull_ordered_guided_start) GOMP_loop_ull_ordered_guided_start + __attribute__((alias ("gomp_loop_ull_ordered_guided_start"))); + ++extern __typeof(gomp_loop_ull_doacross_static_start) GOMP_loop_ull_doacross_static_start ++ __attribute__((alias ("gomp_loop_ull_doacross_static_start"))); ++extern __typeof(gomp_loop_ull_doacross_dynamic_start) GOMP_loop_ull_doacross_dynamic_start ++ __attribute__((alias ("gomp_loop_ull_doacross_dynamic_start"))); ++extern __typeof(gomp_loop_ull_doacross_guided_start) GOMP_loop_ull_doacross_guided_start ++ __attribute__((alias ("gomp_loop_ull_doacross_guided_start"))); ++ + extern __typeof(gomp_loop_ull_static_next) GOMP_loop_ull_static_next + __attribute__((alias ("gomp_loop_ull_static_next"))); + extern __typeof(gomp_loop_ull_dynamic_next) GOMP_loop_ull_dynamic_next + __attribute__((alias ("gomp_loop_ull_dynamic_next"))); + extern __typeof(gomp_loop_ull_guided_next) GOMP_loop_ull_guided_next + __attribute__((alias ("gomp_loop_ull_guided_next"))); ++extern __typeof(gomp_loop_ull_dynamic_next) GOMP_loop_ull_nonmonotonic_dynamic_next ++ __attribute__((alias ("gomp_loop_ull_dynamic_next"))); ++extern __typeof(gomp_loop_ull_guided_next) GOMP_loop_ull_nonmonotonic_guided_next ++ __attribute__((alias ("gomp_loop_ull_guided_next"))); + + extern __typeof(gomp_loop_ull_ordered_static_next) GOMP_loop_ull_ordered_static_next + __attribute__((alias ("gomp_loop_ull_ordered_static_next"))); +@@ -507,6 +630,25 @@ GOMP_loop_ull_guided_start (bool up, gom + } + + bool ++GOMP_loop_ull_nonmonotonic_dynamic_start (bool up, gomp_ull start, ++ gomp_ull end, gomp_ull incr, ++ gomp_ull chunk_size, ++ gomp_ull *istart, gomp_ull *iend) ++{ ++ return gomp_loop_ull_dynamic_start (up, start, end, incr, chunk_size, istart, ++ iend); ++} ++ ++bool ++GOMP_loop_ull_nonmonotonic_guided_start (bool up, gomp_ull start, gomp_ull end, ++ gomp_ull incr, gomp_ull chunk_size, ++ gomp_ull *istart, gomp_ull *iend) ++{ ++ return gomp_loop_ull_guided_start (up, start, end, incr, chunk_size, istart, ++ iend); ++} ++ ++bool + GOMP_loop_ull_ordered_static_start (bool up, gomp_ull start, gomp_ull end, + gomp_ull incr, gomp_ull chunk_size, + gomp_ull *istart, gomp_ull *iend) +@@ -534,6 +676,33 @@ GOMP_loop_ull_ordered_guided_start (bool + } + + bool ++GOMP_loop_ull_doacross_static_start (unsigned ncounts, gomp_ull *counts, ++ gomp_ull chunk_size, gomp_ull *istart, ++ gomp_ull *iend) ++{ ++ return gomp_loop_ull_doacross_static_start (ncounts, counts, chunk_size, ++ istart, iend); ++} ++ ++bool ++GOMP_loop_ull_doacross_dynamic_start (unsigned ncounts, gomp_ull *counts, ++ gomp_ull chunk_size, gomp_ull *istart, ++ gomp_ull *iend) ++{ ++ return gomp_loop_ull_doacross_dynamic_start (ncounts, counts, chunk_size, ++ istart, iend); ++} ++ ++bool ++GOMP_loop_ull_doacross_guided_start (unsigned ncounts, gomp_ull *counts, ++ gomp_ull chunk_size, gomp_ull *istart, ++ gomp_ull *iend) ++{ ++ return gomp_loop_ull_doacross_guided_start (ncounts, counts, chunk_size, ++ istart, iend); ++} ++ ++bool + GOMP_loop_ull_static_next (gomp_ull *istart, gomp_ull *iend) + { + return gomp_loop_ull_static_next (istart, iend); +@@ -550,6 +719,18 @@ GOMP_loop_ull_guided_next (gomp_ull *ist + { + return gomp_loop_ull_guided_next (istart, iend); + } ++ ++bool ++GOMP_loop_ull_nonmonotonic_dynamic_next (gomp_ull *istart, gomp_ull *iend) ++{ ++ return gomp_loop_ull_dynamic_next (istart, iend); ++} ++ ++bool ++GOMP_loop_ull_nonmonotonic_guided_next (gomp_ull *istart, gomp_ull *iend) ++{ ++ return gomp_loop_ull_guided_next (istart, iend); ++} + + bool + GOMP_loop_ull_ordered_static_next (gomp_ull *istart, gomp_ull *iend) +--- libgomp/team.c.jj 2014-05-15 10:56:32.092524669 +0200 ++++ libgomp/team.c 2016-07-13 17:58:01.907291111 +0200 +@@ -133,6 +133,25 @@ gomp_thread_start (void *xdata) + return NULL; + } + ++static inline struct gomp_team * ++get_last_team (unsigned nthreads) ++{ ++ struct gomp_thread *thr = gomp_thread (); ++ if (thr->ts.team == NULL) ++ { ++ struct gomp_thread_pool *pool = thr->thread_pool; ++ if (pool != NULL) ++ { ++ struct gomp_team *last_team = pool->last_team; ++ if (last_team != NULL && last_team->nthreads == nthreads) ++ { ++ pool->last_team = NULL; ++ return last_team; ++ } ++ } ++ } ++ return NULL; ++} + + /* Create a new team data structure. */ + +@@ -140,18 +159,27 @@ struct gomp_team * + gomp_new_team (unsigned nthreads) + { + struct gomp_team *team; +- size_t size; + int i; + +- size = sizeof (*team) + nthreads * (sizeof (team->ordered_release[0]) +- + sizeof (team->implicit_task[0])); +- team = gomp_malloc (size); ++ team = get_last_team (nthreads); ++ if (team == NULL) ++ { ++ size_t extra = sizeof (team->ordered_release[0]) ++ + sizeof (team->implicit_task[0]); ++ team = gomp_malloc (sizeof (*team) + nthreads * extra); ++ ++#ifndef HAVE_SYNC_BUILTINS ++ gomp_mutex_init (&team->work_share_list_free_lock); ++#endif ++ gomp_barrier_init (&team->barrier, nthreads); ++ gomp_mutex_init (&team->task_lock); ++ ++ team->nthreads = nthreads; ++ } + + team->work_share_chunk = 8; + #ifdef HAVE_SYNC_BUILTINS + team->single_count = 0; +-#else +- gomp_mutex_init (&team->work_share_list_free_lock); + #endif + team->work_shares_to_free = &team->work_shares[0]; + gomp_init_work_share (&team->work_shares[0], false, nthreads); +@@ -162,15 +190,11 @@ gomp_new_team (unsigned nthreads) + team->work_shares[i].next_free = &team->work_shares[i + 1]; + team->work_shares[i].next_free = NULL; + +- team->nthreads = nthreads; +- gomp_barrier_init (&team->barrier, nthreads); +- + gomp_sem_init (&team->master_release, 0); + team->ordered_release = (void *) &team->implicit_task[nthreads]; + team->ordered_release[0] = &team->master_release; + +- gomp_mutex_init (&team->task_lock); +- team->task_queue = NULL; ++ priority_queue_init (&team->task_queue); + team->task_count = 0; + team->task_queued_count = 0; + team->task_running_count = 0; +@@ -186,8 +210,12 @@ gomp_new_team (unsigned nthreads) + static void + free_team (struct gomp_team *team) + { ++#ifndef HAVE_SYNC_BUILTINS ++ gomp_mutex_destroy (&team->work_share_list_free_lock); ++#endif + gomp_barrier_destroy (&team->barrier); + gomp_mutex_destroy (&team->task_lock); ++ priority_queue_free (&team->task_queue); + free (team); + } + +@@ -258,6 +286,8 @@ gomp_free_thread (void *arg __attribute_ + free (pool); + thr->thread_pool = NULL; + } ++ if (thr->ts.level == 0 && __builtin_expect (thr->ts.team != NULL, 0)) ++ gomp_team_end (); + if (thr->task != NULL) + { + struct gomp_task *task = thr->task; +@@ -287,7 +317,7 @@ gomp_team_start (void (*fn) (void *), vo + struct gomp_thread **affinity_thr = NULL; + + thr = gomp_thread (); +- nested = thr->ts.team != NULL; ++ nested = thr->ts.level; + if (__builtin_expect (thr->thread_pool == NULL, 0)) + { + thr->thread_pool = gomp_new_thread_pool (); +@@ -894,9 +924,6 @@ gomp_team_end (void) + while (ws != NULL); + } + gomp_sem_destroy (&team->master_release); +-#ifndef HAVE_SYNC_BUILTINS +- gomp_mutex_destroy (&team->work_share_list_free_lock); +-#endif + + if (__builtin_expect (thr->ts.team != NULL, 0) + || __builtin_expect (team->nthreads == 1, 0)) +--- libgomp/target.c.jj 2014-05-15 10:56:38.313498020 +0200 ++++ libgomp/target.c 2016-07-15 16:58:29.249328861 +0200 +@@ -22,14 +22,22 @@ + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +-/* This file handles the maintainence of threads in response to team +- creation and termination. */ ++/* This file contains the support of offloading. */ + ++#include "config.h" + #include "libgomp.h" ++#include "oacc-plugin.h" ++#include "oacc-int.h" ++#include "gomp-constants.h" + #include + #include + #include ++#ifdef HAVE_INTTYPES_H ++# include /* For PRIu64. */ ++#endif + #include ++#include ++#include + + attribute_hidden int + gomp_get_num_devices (void) +@@ -37,22 +45,87 @@ gomp_get_num_devices (void) + return 0; + } + +-/* Called when encountering a target directive. If DEVICE +- is -1, it means use device-var ICV. If it is -2 (or any other value +- larger than last available hw device, use host fallback. +- FN is address of host code, OPENMP_TARGET contains value of the +- __OPENMP_TARGET__ symbol in the shared library or binary that invokes +- GOMP_target. HOSTADDRS, SIZES and KINDS are arrays +- with MAPNUM entries, with addresses of the host objects, +- sizes of the host objects (resp. for pointer kind pointer bias +- and assumed sizeof (void *) size) and kinds. */ ++/* This function should be called from every offload image while loading. ++ It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of ++ the target, and TARGET_DATA needed by target plugin. */ + + void +-GOMP_target (int device, void (*fn) (void *), const void *openmp_target, +- size_t mapnum, void **hostaddrs, size_t *sizes, +- unsigned char *kinds) ++GOMP_offload_register_ver (unsigned version, const void *host_table, ++ int target_type, const void *target_data) ++{ ++ (void) version; ++ (void) host_table; ++ (void) target_type; ++ (void) target_data; ++} ++ ++void ++GOMP_offload_register (const void *host_table, int target_type, ++ const void *target_data) ++{ ++ (void) host_table; ++ (void) target_type; ++ (void) target_data; ++} ++ ++/* This function should be called from every offload image while unloading. ++ It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of ++ the target, and TARGET_DATA needed by target plugin. */ ++ ++void ++GOMP_offload_unregister_ver (unsigned version, const void *host_table, ++ int target_type, const void *target_data) ++{ ++ (void) version; ++ (void) host_table; ++ (void) target_type; ++ (void) target_data; ++} ++ ++void ++GOMP_offload_unregister (const void *host_table, int target_type, ++ const void *target_data) ++{ ++ (void) host_table; ++ (void) target_type; ++ (void) target_data; ++} ++ ++/* This function initializes the target device, specified by DEVICEP. DEVICEP ++ must be locked on entry, and remains locked on return. */ ++ ++attribute_hidden void ++gomp_init_device (struct gomp_device_descr *devicep) ++{ ++ devicep->state = GOMP_DEVICE_INITIALIZED; ++} ++ ++attribute_hidden void ++gomp_unload_device (struct gomp_device_descr *devicep) ++{ ++} ++ ++/* Free address mapping tables. MM must be locked on entry, and remains locked ++ on return. */ ++ ++attribute_hidden void ++gomp_free_memmap (struct splay_tree_s *mem_map) ++{ ++ while (mem_map->root) ++ { ++ struct target_mem_desc *tgt = mem_map->root->key.tgt; ++ ++ splay_tree_remove (mem_map, &mem_map->root->key); ++ free (tgt->array); ++ free (tgt); ++ } ++} ++ ++/* Host fallback for GOMP_target{,_ext} routines. */ ++ ++static void ++gomp_target_fallback (void (*fn) (void *), void **hostaddrs) + { +- /* Host fallback. */ + struct gomp_thread old_thr, *thr = gomp_thread (); + old_thr = *thr; + memset (thr, '\0', sizeof (*thr)); +@@ -66,10 +139,167 @@ GOMP_target (int device, void (*fn) (voi + *thr = old_thr; + } + ++/* Calculate alignment and size requirements of a private copy of data shared ++ as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */ ++ ++static inline void ++calculate_firstprivate_requirements (size_t mapnum, size_t *sizes, ++ unsigned short *kinds, size_t *tgt_align, ++ size_t *tgt_size) ++{ ++ size_t i; ++ for (i = 0; i < mapnum; i++) ++ if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE) ++ { ++ size_t align = (size_t) 1 << (kinds[i] >> 8); ++ if (*tgt_align < align) ++ *tgt_align = align; ++ *tgt_size = (*tgt_size + align - 1) & ~(align - 1); ++ *tgt_size += sizes[i]; ++ } ++} ++ ++/* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */ ++ ++static inline void ++copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs, ++ size_t *sizes, unsigned short *kinds, size_t tgt_align, ++ size_t tgt_size) ++{ ++ uintptr_t al = (uintptr_t) tgt & (tgt_align - 1); ++ if (al) ++ tgt += tgt_align - al; ++ tgt_size = 0; ++ size_t i; ++ for (i = 0; i < mapnum; i++) ++ if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE) ++ { ++ size_t align = (size_t) 1 << (kinds[i] >> 8); ++ tgt_size = (tgt_size + align - 1) & ~(align - 1); ++ memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]); ++ hostaddrs[i] = tgt + tgt_size; ++ tgt_size = tgt_size + sizes[i]; ++ } ++} ++ ++/* Called when encountering a target directive. If DEVICE ++ is GOMP_DEVICE_ICV, it means use device-var ICV. If it is ++ GOMP_DEVICE_HOST_FALLBACK (or any value ++ larger than last available hw device), use host fallback. ++ FN is address of host code, UNUSED is part of the current ABI, but ++ we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays ++ with MAPNUM entries, with addresses of the host objects, ++ sizes of the host objects (resp. for pointer kind pointer bias ++ and assumed sizeof (void *) size) and kinds. */ ++ ++void ++GOMP_target (int device, void (*fn) (void *), const void *unused, ++ size_t mapnum, void **hostaddrs, size_t *sizes, ++ unsigned char *kinds) ++{ ++ return gomp_target_fallback (fn, hostaddrs); ++} ++ ++/* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present, ++ and several arguments have been added: ++ FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h. ++ DEPEND is array of dependencies, see GOMP_task for details. ++ ++ ARGS is a pointer to an array consisting of a variable number of both ++ device-independent and device-specific arguments, which can take one two ++ elements where the first specifies for which device it is intended, the type ++ and optionally also the value. If the value is not present in the first ++ one, the whole second element the actual value. The last element of the ++ array is a single NULL. Among the device independent can be for example ++ NUM_TEAMS and THREAD_LIMIT. ++ ++ NUM_TEAMS is positive if GOMP_teams will be called in the body with ++ that value, or 1 if teams construct is not present, or 0, if ++ teams construct does not have num_teams clause and so the choice is ++ implementation defined, and -1 if it can't be determined on the host ++ what value will GOMP_teams have on the device. ++ THREAD_LIMIT similarly is positive if GOMP_teams will be called in the ++ body with that value, or 0, if teams construct does not have thread_limit ++ clause or the teams construct is not present, or -1 if it can't be ++ determined on the host what value will GOMP_teams have on the device. */ ++ ++void ++GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum, ++ void **hostaddrs, size_t *sizes, unsigned short *kinds, ++ unsigned int flags, void **depend, void **args) ++{ ++ size_t tgt_align = 0, tgt_size = 0; ++ bool fpc_done = false; ++ ++ if (flags & GOMP_TARGET_FLAG_NOWAIT) ++ { ++ struct gomp_thread *thr = gomp_thread (); ++ if (thr->ts.team ++ && !thr->task->final_task) ++ { ++ gomp_create_target_task (NULL, fn, mapnum, hostaddrs, ++ sizes, kinds, flags, depend, args, ++ GOMP_TARGET_TASK_BEFORE_MAP); ++ return; ++ } ++ } ++ ++ /* If there are depend clauses, but nowait is not present ++ (or we are in a final task), block the parent task until the ++ dependencies are resolved and then just continue with the rest ++ of the function as if it is a merged task. */ ++ if (depend != NULL) ++ { ++ struct gomp_thread *thr = gomp_thread (); ++ if (thr->task && thr->task->depend_hash) ++ { ++ /* If we might need to wait, copy firstprivate now. */ ++ calculate_firstprivate_requirements (mapnum, sizes, kinds, ++ &tgt_align, &tgt_size); ++ if (tgt_align) ++ { ++ char *tgt = gomp_alloca (tgt_size + tgt_align - 1); ++ copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds, ++ tgt_align, tgt_size); ++ } ++ fpc_done = true; ++ gomp_task_maybe_wait_for_dependencies (depend); ++ } ++ } ++ ++ if (!fpc_done) ++ { ++ calculate_firstprivate_requirements (mapnum, sizes, kinds, ++ &tgt_align, &tgt_size); ++ if (tgt_align) ++ { ++ char *tgt = gomp_alloca (tgt_size + tgt_align - 1); ++ copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds, ++ tgt_align, tgt_size); ++ } ++ } ++ gomp_target_fallback (fn, hostaddrs); ++} ++ ++/* Host fallback for GOMP_target_data{,_ext} routines. */ ++ ++static void ++gomp_target_data_fallback (void) ++{ ++} ++ + void +-GOMP_target_data (int device, const void *openmp_target, size_t mapnum, ++GOMP_target_data (int device, const void *unused, size_t mapnum, + void **hostaddrs, size_t *sizes, unsigned char *kinds) + { ++ return gomp_target_data_fallback (); ++} ++ ++void ++GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs, ++ size_t *sizes, unsigned short *kinds) ++{ ++ return gomp_target_data_fallback (); + } + + void +@@ -78,12 +308,112 @@ GOMP_target_end_data (void) + } + + void +-GOMP_target_update (int device, const void *openmp_target, size_t mapnum, ++GOMP_target_update (int device, const void *unused, size_t mapnum, + void **hostaddrs, size_t *sizes, unsigned char *kinds) + { + } + + void ++GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs, ++ size_t *sizes, unsigned short *kinds, ++ unsigned int flags, void **depend) ++{ ++ /* If there are depend clauses, but nowait is not present, ++ block the parent task until the dependencies are resolved ++ and then just continue with the rest of the function as if it ++ is a merged task. Until we are able to schedule task during ++ variable mapping or unmapping, ignore nowait if depend clauses ++ are not present. */ ++ if (depend != NULL) ++ { ++ struct gomp_thread *thr = gomp_thread (); ++ if (thr->task && thr->task->depend_hash) ++ { ++ if ((flags & GOMP_TARGET_FLAG_NOWAIT) ++ && thr->ts.team ++ && !thr->task->final_task) ++ { ++ if (gomp_create_target_task (NULL, (void (*) (void *)) NULL, ++ mapnum, hostaddrs, sizes, kinds, ++ flags | GOMP_TARGET_FLAG_UPDATE, ++ depend, NULL, GOMP_TARGET_TASK_DATA)) ++ return; ++ } ++ else ++ { ++ struct gomp_team *team = thr->ts.team; ++ /* If parallel or taskgroup has been cancelled, don't start new ++ tasks. */ ++ if (team ++ && (gomp_team_barrier_cancelled (&team->barrier) ++ || (thr->task->taskgroup ++ && thr->task->taskgroup->cancelled))) ++ return; ++ ++ gomp_task_maybe_wait_for_dependencies (depend); ++ } ++ } ++ } ++} ++ ++void ++GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs, ++ size_t *sizes, unsigned short *kinds, ++ unsigned int flags, void **depend) ++{ ++ /* If there are depend clauses, but nowait is not present, ++ block the parent task until the dependencies are resolved ++ and then just continue with the rest of the function as if it ++ is a merged task. Until we are able to schedule task during ++ variable mapping or unmapping, ignore nowait if depend clauses ++ are not present. */ ++ if (depend != NULL) ++ { ++ struct gomp_thread *thr = gomp_thread (); ++ if (thr->task && thr->task->depend_hash) ++ { ++ if ((flags & GOMP_TARGET_FLAG_NOWAIT) ++ && thr->ts.team ++ && !thr->task->final_task) ++ { ++ if (gomp_create_target_task (NULL, (void (*) (void *)) NULL, ++ mapnum, hostaddrs, sizes, kinds, ++ flags, depend, NULL, ++ GOMP_TARGET_TASK_DATA)) ++ return; ++ } ++ else ++ { ++ struct gomp_team *team = thr->ts.team; ++ /* If parallel or taskgroup has been cancelled, don't start new ++ tasks. */ ++ if (team ++ && (gomp_team_barrier_cancelled (&team->barrier) ++ || (thr->task->taskgroup ++ && thr->task->taskgroup->cancelled))) ++ return; ++ ++ gomp_task_maybe_wait_for_dependencies (depend); ++ } ++ } ++ } ++} ++ ++bool ++gomp_target_task_fn (void *data) ++{ ++ struct gomp_target_task *ttask = (struct gomp_target_task *) data; ++ ++ if (ttask->fn != NULL) ++ { ++ ttask->state = GOMP_TARGET_TASK_FALLBACK; ++ gomp_target_fallback (ttask->fn, ttask->hostaddrs); ++ return false; ++ } ++ return false; ++} ++ ++void + GOMP_teams (unsigned int num_teams, unsigned int thread_limit) + { + if (thread_limit) +@@ -94,3 +424,153 @@ GOMP_teams (unsigned int num_teams, unsi + } + (void) num_teams; + } ++ ++void * ++omp_target_alloc (size_t size, int device_num) ++{ ++ if (device_num == GOMP_DEVICE_HOST_FALLBACK) ++ return malloc (size); ++ ++ return NULL; ++} ++ ++void ++omp_target_free (void *device_ptr, int device_num) ++{ ++ if (device_ptr == NULL) ++ return; ++ ++ if (device_num == GOMP_DEVICE_HOST_FALLBACK) ++ { ++ free (device_ptr); ++ return; ++ } ++} ++ ++int ++omp_target_is_present (void *ptr, int device_num) ++{ ++ if (ptr == NULL) ++ return 1; ++ ++ if (device_num == GOMP_DEVICE_HOST_FALLBACK) ++ return 1; ++ ++ return 0; ++} ++ ++int ++omp_target_memcpy (void *dst, void *src, size_t length, size_t dst_offset, ++ size_t src_offset, int dst_device_num, int src_device_num) ++{ ++ if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK) ++ return EINVAL; ++ if (src_device_num != GOMP_DEVICE_HOST_FALLBACK) ++ return EINVAL; ++ memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length); ++ return 0; ++} ++ ++#define HALF_SIZE_T (((size_t) 1) << (8 * sizeof (size_t) / 2)) ++ ++#define __builtin_mul_overflow(x, y, z) \ ++ ({ bool retval = false; \ ++ size_t xval = (x); \ ++ size_t yval = (y); \ ++ size_t zval = xval * yval; \ ++ if (__builtin_expect ((xval | yval) >= HALF_SIZE_T, 0)) \ ++ { \ ++ if (xval && zval / xval != yval) \ ++ retval = true; \ ++ } \ ++ *(z) = zval; \ ++ retval; }) ++ ++static int ++omp_target_memcpy_rect_worker (void *dst, void *src, size_t element_size, ++ int num_dims, const size_t *volume, ++ const size_t *dst_offsets, ++ const size_t *src_offsets, ++ const size_t *dst_dimensions, ++ const size_t *src_dimensions) ++{ ++ size_t dst_slice = element_size; ++ size_t src_slice = element_size; ++ size_t j, dst_off, src_off, length; ++ int i, ret; ++ ++ ++ if (num_dims == 1) ++ { ++ if (__builtin_mul_overflow (element_size, volume[0], &length) ++ || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off) ++ || __builtin_mul_overflow (element_size, src_offsets[0], &src_off)) ++ return EINVAL; ++ memcpy ((char *) dst + dst_off, (char *) src + src_off, length); ++ ret = 1; ++ return ret ? 0 : EINVAL; ++ } ++ ++ /* FIXME: it would be nice to have some plugin function to handle ++ num_dims == 2 and num_dims == 3 more efficiently. Larger ones can ++ be handled in the generic recursion below, and for host-host it ++ should be used even for any num_dims >= 2. */ ++ ++ for (i = 1; i < num_dims; i++) ++ if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice) ++ || __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice)) ++ return EINVAL; ++ if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off) ++ || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off)) ++ return EINVAL; ++ for (j = 0; j < volume[0]; j++) ++ { ++ ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off, ++ (char *) src + src_off, ++ element_size, num_dims - 1, ++ volume + 1, dst_offsets + 1, ++ src_offsets + 1, dst_dimensions + 1, ++ src_dimensions + 1); ++ if (ret) ++ return ret; ++ dst_off += dst_slice; ++ src_off += src_slice; ++ } ++ return 0; ++} ++ ++int ++omp_target_memcpy_rect (void *dst, void *src, size_t element_size, ++ int num_dims, const size_t *volume, ++ const size_t *dst_offsets, ++ const size_t *src_offsets, ++ const size_t *dst_dimensions, ++ const size_t *src_dimensions, ++ int dst_device_num, int src_device_num) ++{ ++ if (!dst && !src) ++ return INT_MAX; ++ ++ if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK) ++ return EINVAL; ++ if (src_device_num != GOMP_DEVICE_HOST_FALLBACK) ++ return EINVAL; ++ ++ int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims, ++ volume, dst_offsets, src_offsets, ++ dst_dimensions, src_dimensions); ++ return ret; ++} ++ ++int ++omp_target_associate_ptr (void *host_ptr, void *device_ptr, size_t size, ++ size_t device_offset, int device_num) ++{ ++ return EINVAL; ++} ++ ++int ++omp_target_disassociate_ptr (void *ptr, int device_num) ++{ ++ return EINVAL; ++} +--- libgomp/fortran.c.jj 2014-05-15 10:56:31.593531223 +0200 ++++ libgomp/fortran.c 2016-07-13 16:57:04.432535397 +0200 +@@ -67,12 +67,20 @@ ialias_redirect (omp_get_active_level) + ialias_redirect (omp_in_final) + ialias_redirect (omp_get_cancellation) + ialias_redirect (omp_get_proc_bind) ++ialias_redirect (omp_get_num_places) ++ialias_redirect (omp_get_place_num_procs) ++ialias_redirect (omp_get_place_proc_ids) ++ialias_redirect (omp_get_place_num) ++ialias_redirect (omp_get_partition_num_places) ++ialias_redirect (omp_get_partition_place_nums) + ialias_redirect (omp_set_default_device) + ialias_redirect (omp_get_default_device) + ialias_redirect (omp_get_num_devices) + ialias_redirect (omp_get_num_teams) + ialias_redirect (omp_get_team_num) + ialias_redirect (omp_is_initial_device) ++ialias_redirect (omp_get_initial_device) ++ialias_redirect (omp_get_max_task_priority) + #endif + + #ifndef LIBGOMP_GNU_SYMBOL_VERSIONING +@@ -342,35 +350,35 @@ omp_get_wtime_ (void) + } + + void +-omp_set_schedule_ (const int32_t *kind, const int32_t *modifier) ++omp_set_schedule_ (const int32_t *kind, const int32_t *chunk_size) + { +- omp_set_schedule (*kind, *modifier); ++ omp_set_schedule (*kind, *chunk_size); + } + + void +-omp_set_schedule_8_ (const int32_t *kind, const int64_t *modifier) ++omp_set_schedule_8_ (const int32_t *kind, const int64_t *chunk_size) + { +- omp_set_schedule (*kind, TO_INT (*modifier)); ++ omp_set_schedule (*kind, TO_INT (*chunk_size)); + } + + void +-omp_get_schedule_ (int32_t *kind, int32_t *modifier) ++omp_get_schedule_ (int32_t *kind, int32_t *chunk_size) + { + omp_sched_t k; +- int m; +- omp_get_schedule (&k, &m); ++ int cs; ++ omp_get_schedule (&k, &cs); + *kind = k; +- *modifier = m; ++ *chunk_size = cs; + } + + void +-omp_get_schedule_8_ (int32_t *kind, int64_t *modifier) ++omp_get_schedule_8_ (int32_t *kind, int64_t *chunk_size) + { + omp_sched_t k; +- int m; +- omp_get_schedule (&k, &m); ++ int cs; ++ omp_get_schedule (&k, &cs); + *kind = k; +- *modifier = m; ++ *chunk_size = cs; + } + + int32_t +@@ -451,6 +459,69 @@ omp_get_proc_bind_ (void) + return omp_get_proc_bind (); + } + ++int32_t ++omp_get_num_places_ (void) ++{ ++ return omp_get_num_places (); ++} ++ ++int32_t ++omp_get_place_num_procs_ (const int32_t *place_num) ++{ ++ return omp_get_place_num_procs (*place_num); ++} ++ ++int32_t ++omp_get_place_num_procs_8_ (const int64_t *place_num) ++{ ++ return omp_get_place_num_procs (TO_INT (*place_num)); ++} ++ ++void ++omp_get_place_proc_ids_ (const int32_t *place_num, int32_t *ids) ++{ ++ omp_get_place_proc_ids (*place_num, (int *) ids); ++} ++ ++void ++omp_get_place_proc_ids_8_ (const int64_t *place_num, int64_t *ids) ++{ ++ gomp_get_place_proc_ids_8 (TO_INT (*place_num), ids); ++} ++ ++int32_t ++omp_get_place_num_ (void) ++{ ++ return omp_get_place_num (); ++} ++ ++int32_t ++omp_get_partition_num_places_ (void) ++{ ++ return omp_get_partition_num_places (); ++} ++ ++void ++omp_get_partition_place_nums_ (int32_t *place_nums) ++{ ++ omp_get_partition_place_nums ((int *) place_nums); ++} ++ ++void ++omp_get_partition_place_nums_8_ (int64_t *place_nums) ++{ ++ if (gomp_places_list == NULL) ++ return; ++ ++ struct gomp_thread *thr = gomp_thread (); ++ if (thr->place == 0) ++ gomp_init_affinity (); ++ ++ unsigned int i; ++ for (i = 0; i < thr->ts.place_partition_len; i++) ++ *place_nums++ = (int64_t) thr->ts.place_partition_off + i; ++} ++ + void + omp_set_default_device_ (const int32_t *device_num) + { +@@ -492,3 +563,15 @@ omp_is_initial_device_ (void) + { + return omp_is_initial_device (); + } ++ ++int32_t ++omp_get_initial_device_ (void) ++{ ++ return omp_get_initial_device (); ++} ++ ++int32_t ++omp_get_max_task_priority_ (void) ++{ ++ return omp_get_max_task_priority (); ++} +--- libgomp/libgomp.map.jj 2014-05-15 10:56:31.927533549 +0200 ++++ libgomp/libgomp.map 2016-07-13 16:57:04.434535373 +0200 +@@ -134,6 +134,36 @@ OMP_4.0 { + omp_is_initial_device_; + } OMP_3.1; + ++OMP_4.5 { ++ global: ++ omp_get_max_task_priority; ++ omp_get_max_task_priority_; ++ omp_get_num_places; ++ omp_get_num_places_; ++ omp_get_place_num_procs; ++ omp_get_place_num_procs_; ++ omp_get_place_num_procs_8_; ++ omp_get_place_proc_ids; ++ omp_get_place_proc_ids_; ++ omp_get_place_proc_ids_8_; ++ omp_get_place_num; ++ omp_get_place_num_; ++ omp_get_partition_num_places; ++ omp_get_partition_num_places_; ++ omp_get_partition_place_nums; ++ omp_get_partition_place_nums_; ++ omp_get_partition_place_nums_8_; ++ omp_get_initial_device; ++ omp_get_initial_device_; ++ omp_target_alloc; ++ omp_target_free; ++ omp_target_is_present; ++ omp_target_memcpy; ++ omp_target_memcpy_rect; ++ omp_target_associate_ptr; ++ omp_target_disassociate_ptr; ++} OMP_4.0; ++ + GOMP_1.0 { + global: + GOMP_atomic_end; +@@ -227,3 +257,158 @@ GOMP_4.0 { + GOMP_target_update; + GOMP_teams; + } GOMP_3.0; ++ ++GOMP_4.0.1 { ++ global: ++ GOMP_offload_register; ++ GOMP_offload_unregister; ++} GOMP_4.0; ++ ++GOMP_4.5 { ++ global: ++ GOMP_target_ext; ++ GOMP_target_data_ext; ++ GOMP_target_update_ext; ++ GOMP_target_enter_exit_data; ++ GOMP_taskloop; ++ GOMP_taskloop_ull; ++ GOMP_offload_register_ver; ++ GOMP_offload_unregister_ver; ++ GOMP_loop_doacross_dynamic_start; ++ GOMP_loop_doacross_guided_start; ++ GOMP_loop_doacross_runtime_start; ++ GOMP_loop_doacross_static_start; ++ GOMP_doacross_post; ++ GOMP_doacross_wait; ++ GOMP_loop_ull_doacross_dynamic_start; ++ GOMP_loop_ull_doacross_guided_start; ++ GOMP_loop_ull_doacross_runtime_start; ++ GOMP_loop_ull_doacross_static_start; ++ GOMP_doacross_ull_post; ++ GOMP_doacross_ull_wait; ++ GOMP_loop_nonmonotonic_dynamic_next; ++ GOMP_loop_nonmonotonic_dynamic_start; ++ GOMP_loop_nonmonotonic_guided_next; ++ GOMP_loop_nonmonotonic_guided_start; ++ GOMP_loop_ull_nonmonotonic_dynamic_next; ++ GOMP_loop_ull_nonmonotonic_dynamic_start; ++ GOMP_loop_ull_nonmonotonic_guided_next; ++ GOMP_loop_ull_nonmonotonic_guided_start; ++ GOMP_parallel_loop_nonmonotonic_dynamic; ++ GOMP_parallel_loop_nonmonotonic_guided; ++} GOMP_4.0.1; ++ ++OACC_2.0 { ++ global: ++ acc_get_num_devices; ++ acc_get_num_devices_h_; ++ acc_set_device_type; ++ acc_set_device_type_h_; ++ acc_get_device_type; ++ acc_get_device_type_h_; ++ acc_set_device_num; ++ acc_set_device_num_h_; ++ acc_get_device_num; ++ acc_get_device_num_h_; ++ acc_async_test; ++ acc_async_test_h_; ++ acc_async_test_all; ++ acc_async_test_all_h_; ++ acc_wait; ++ acc_wait_h_; ++ acc_wait_async; ++ acc_wait_async_h_; ++ acc_wait_all; ++ acc_wait_all_h_; ++ acc_wait_all_async; ++ acc_wait_all_async_h_; ++ acc_init; ++ acc_init_h_; ++ acc_shutdown; ++ acc_shutdown_h_; ++ acc_on_device; ++ acc_on_device_h_; ++ acc_malloc; ++ acc_free; ++ acc_copyin; ++ acc_copyin_32_h_; ++ acc_copyin_64_h_; ++ acc_copyin_array_h_; ++ acc_present_or_copyin; ++ acc_present_or_copyin_32_h_; ++ acc_present_or_copyin_64_h_; ++ acc_present_or_copyin_array_h_; ++ acc_create; ++ acc_create_32_h_; ++ acc_create_64_h_; ++ acc_create_array_h_; ++ acc_present_or_create; ++ acc_present_or_create_32_h_; ++ acc_present_or_create_64_h_; ++ acc_present_or_create_array_h_; ++ acc_copyout; ++ acc_copyout_32_h_; ++ acc_copyout_64_h_; ++ acc_copyout_array_h_; ++ acc_delete; ++ acc_delete_32_h_; ++ acc_delete_64_h_; ++ acc_delete_array_h_; ++ acc_update_device; ++ acc_update_device_32_h_; ++ acc_update_device_64_h_; ++ acc_update_device_array_h_; ++ acc_update_self; ++ acc_update_self_32_h_; ++ acc_update_self_64_h_; ++ acc_update_self_array_h_; ++ acc_map_data; ++ acc_unmap_data; ++ acc_deviceptr; ++ acc_hostptr; ++ acc_is_present; ++ acc_is_present_32_h_; ++ acc_is_present_64_h_; ++ acc_is_present_array_h_; ++ acc_memcpy_to_device; ++ acc_memcpy_from_device; ++ acc_get_current_cuda_device; ++ acc_get_current_cuda_context; ++ acc_get_cuda_stream; ++ acc_set_cuda_stream; ++}; ++ ++GOACC_2.0 { ++ global: ++ GOACC_data_end; ++ GOACC_data_start; ++ GOACC_enter_exit_data; ++ GOACC_parallel; ++ GOACC_update; ++ GOACC_wait; ++ GOACC_get_thread_num; ++ GOACC_get_num_threads; ++}; ++ ++GOACC_2.0.1 { ++ global: ++ GOACC_declare; ++ GOACC_parallel_keyed; ++} GOACC_2.0; ++ ++GOMP_PLUGIN_1.0 { ++ global: ++ GOMP_PLUGIN_malloc; ++ GOMP_PLUGIN_malloc_cleared; ++ GOMP_PLUGIN_realloc; ++ GOMP_PLUGIN_debug; ++ GOMP_PLUGIN_error; ++ GOMP_PLUGIN_fatal; ++ GOMP_PLUGIN_async_unmap_vars; ++ GOMP_PLUGIN_acc_thread; ++}; ++ ++GOMP_PLUGIN_1.1 { ++ global: ++ GOMP_PLUGIN_target_task_completion; ++} GOMP_PLUGIN_1.0; +--- libgomp/ordered.c.jj 2013-01-21 16:00:46.137873657 +0100 ++++ libgomp/ordered.c 2016-07-13 16:57:18.918355780 +0200 +@@ -25,6 +25,9 @@ + /* This file handles the ORDERED construct. */ + + #include "libgomp.h" ++#include ++#include ++#include "doacross.h" + + + /* This function is called when first allocating an iteration block. That +@@ -249,3 +252,533 @@ void + GOMP_ordered_end (void) + { + } ++ ++/* DOACROSS initialization. */ ++ ++#define MAX_COLLAPSED_BITS (__SIZEOF_LONG__ * __CHAR_BIT__) ++ ++void ++gomp_doacross_init (unsigned ncounts, long *counts, long chunk_size) ++{ ++ struct gomp_thread *thr = gomp_thread (); ++ struct gomp_team *team = thr->ts.team; ++ struct gomp_work_share *ws = thr->ts.work_share; ++ unsigned int i, bits[MAX_COLLAPSED_BITS], num_bits = 0; ++ unsigned long ent, num_ents, elt_sz, shift_sz; ++ struct gomp_doacross_work_share *doacross; ++ ++ if (team == NULL || team->nthreads == 1) ++ return; ++ ++ for (i = 0; i < ncounts; i++) ++ { ++ /* If any count is 0, GOMP_doacross_{post,wait} can't be called. */ ++ if (counts[i] == 0) ++ return; ++ ++ if (num_bits <= MAX_COLLAPSED_BITS) ++ { ++ unsigned int this_bits; ++ if (counts[i] == 1) ++ this_bits = 1; ++ else ++ this_bits = __SIZEOF_LONG__ * __CHAR_BIT__ ++ - __builtin_clzl (counts[i] - 1); ++ if (num_bits + this_bits <= MAX_COLLAPSED_BITS) ++ { ++ bits[i] = this_bits; ++ num_bits += this_bits; ++ } ++ else ++ num_bits = MAX_COLLAPSED_BITS + 1; ++ } ++ } ++ ++ if (ws->sched == GFS_STATIC) ++ num_ents = team->nthreads; ++ else if (ws->sched == GFS_GUIDED) ++ num_ents = counts[0]; ++ else ++ num_ents = (counts[0] - 1) / chunk_size + 1; ++ if (num_bits <= MAX_COLLAPSED_BITS) ++ { ++ elt_sz = sizeof (unsigned long); ++ shift_sz = ncounts * sizeof (unsigned int); ++ } ++ else ++ { ++ elt_sz = sizeof (unsigned long) * ncounts; ++ shift_sz = 0; ++ } ++ elt_sz = (elt_sz + 63) & ~63UL; ++ ++ doacross = gomp_malloc (sizeof (*doacross) + 63 + num_ents * elt_sz ++ + shift_sz); ++ doacross->chunk_size = chunk_size; ++ doacross->elt_sz = elt_sz; ++ doacross->ncounts = ncounts; ++ doacross->flattened = false; ++ doacross->array = (unsigned char *) ++ ((((uintptr_t) (doacross + 1)) + 63 + shift_sz) ++ & ~(uintptr_t) 63); ++ if (num_bits <= MAX_COLLAPSED_BITS) ++ { ++ unsigned int shift_count = 0; ++ doacross->flattened = true; ++ for (i = ncounts; i > 0; i--) ++ { ++ doacross->shift_counts[i - 1] = shift_count; ++ shift_count += bits[i - 1]; ++ } ++ for (ent = 0; ent < num_ents; ent++) ++ *(unsigned long *) (doacross->array + ent * elt_sz) = 0; ++ } ++ else ++ for (ent = 0; ent < num_ents; ent++) ++ memset (doacross->array + ent * elt_sz, '\0', ++ sizeof (unsigned long) * ncounts); ++ if (ws->sched == GFS_STATIC && chunk_size == 0) ++ { ++ unsigned long q = counts[0] / num_ents; ++ unsigned long t = counts[0] % num_ents; ++ doacross->boundary = t * (q + 1); ++ doacross->q = q; ++ doacross->t = t; ++ } ++ ws->doacross = doacross; ++} ++ ++/* DOACROSS POST operation. */ ++ ++void ++GOMP_doacross_post (long *counts) ++{ ++ struct gomp_thread *thr = gomp_thread (); ++ struct gomp_work_share *ws = thr->ts.work_share; ++ struct gomp_doacross_work_share *doacross = ws->doacross; ++ unsigned long ent; ++ unsigned int i; ++ ++ if (__builtin_expect (doacross == NULL, 0)) ++ { ++ __sync_synchronize (); ++ return; ++ } ++ ++ if (__builtin_expect (ws->sched == GFS_STATIC, 1)) ++ ent = thr->ts.team_id; ++ else if (ws->sched == GFS_GUIDED) ++ ent = counts[0]; ++ else ++ ent = counts[0] / doacross->chunk_size; ++ unsigned long *array = (unsigned long *) (doacross->array ++ + ent * doacross->elt_sz); ++ ++ if (__builtin_expect (doacross->flattened, 1)) ++ { ++ unsigned long flattened ++ = (unsigned long) counts[0] << doacross->shift_counts[0]; ++ ++ for (i = 1; i < doacross->ncounts; i++) ++ flattened |= (unsigned long) counts[i] ++ << doacross->shift_counts[i]; ++ flattened++; ++ if (flattened == __atomic_load_n (array, MEMMODEL_ACQUIRE)) ++ __atomic_thread_fence (MEMMODEL_RELEASE); ++ else ++ __atomic_store_n (array, flattened, MEMMODEL_RELEASE); ++ return; ++ } ++ ++ __atomic_thread_fence (MEMMODEL_ACQUIRE); ++ for (i = doacross->ncounts; i-- > 0; ) ++ { ++ if (counts[i] + 1UL != __atomic_load_n (&array[i], MEMMODEL_RELAXED)) ++ __atomic_store_n (&array[i], counts[i] + 1UL, MEMMODEL_RELEASE); ++ } ++} ++ ++/* DOACROSS WAIT operation. */ ++ ++void ++GOMP_doacross_wait (long first, ...) ++{ ++ struct gomp_thread *thr = gomp_thread (); ++ struct gomp_work_share *ws = thr->ts.work_share; ++ struct gomp_doacross_work_share *doacross = ws->doacross; ++ va_list ap; ++ unsigned long ent; ++ unsigned int i; ++ ++ if (__builtin_expect (doacross == NULL, 0)) ++ { ++ __sync_synchronize (); ++ return; ++ } ++ ++ if (__builtin_expect (ws->sched == GFS_STATIC, 1)) ++ { ++ if (ws->chunk_size == 0) ++ { ++ if (first < doacross->boundary) ++ ent = first / (doacross->q + 1); ++ else ++ ent = (first - doacross->boundary) / doacross->q ++ + doacross->t; ++ } ++ else ++ ent = first / ws->chunk_size % thr->ts.team->nthreads; ++ } ++ else if (ws->sched == GFS_GUIDED) ++ ent = first; ++ else ++ ent = first / doacross->chunk_size; ++ unsigned long *array = (unsigned long *) (doacross->array ++ + ent * doacross->elt_sz); ++ ++ if (__builtin_expect (doacross->flattened, 1)) ++ { ++ unsigned long flattened ++ = (unsigned long) first << doacross->shift_counts[0]; ++ unsigned long cur; ++ ++ va_start (ap, first); ++ for (i = 1; i < doacross->ncounts; i++) ++ flattened |= (unsigned long) va_arg (ap, long) ++ << doacross->shift_counts[i]; ++ cur = __atomic_load_n (array, MEMMODEL_ACQUIRE); ++ if (flattened < cur) ++ { ++ __atomic_thread_fence (MEMMODEL_RELEASE); ++ va_end (ap); ++ return; ++ } ++ doacross_spin (array, flattened, cur); ++ __atomic_thread_fence (MEMMODEL_RELEASE); ++ va_end (ap); ++ return; ++ } ++ ++ do ++ { ++ va_start (ap, first); ++ for (i = 0; i < doacross->ncounts; i++) ++ { ++ unsigned long thisv ++ = (unsigned long) (i ? va_arg (ap, long) : first) + 1; ++ unsigned long cur = __atomic_load_n (&array[i], MEMMODEL_RELAXED); ++ if (thisv < cur) ++ { ++ i = doacross->ncounts; ++ break; ++ } ++ if (thisv > cur) ++ break; ++ } ++ va_end (ap); ++ if (i == doacross->ncounts) ++ break; ++ cpu_relax (); ++ } ++ while (1); ++ __sync_synchronize (); ++} ++ ++typedef unsigned long long gomp_ull; ++ ++void ++gomp_doacross_ull_init (unsigned ncounts, gomp_ull *counts, gomp_ull chunk_size) ++{ ++ struct gomp_thread *thr = gomp_thread (); ++ struct gomp_team *team = thr->ts.team; ++ struct gomp_work_share *ws = thr->ts.work_share; ++ unsigned int i, bits[MAX_COLLAPSED_BITS], num_bits = 0; ++ unsigned long ent, num_ents, elt_sz, shift_sz; ++ struct gomp_doacross_work_share *doacross; ++ ++ if (team == NULL || team->nthreads == 1) ++ return; ++ ++ for (i = 0; i < ncounts; i++) ++ { ++ /* If any count is 0, GOMP_doacross_{post,wait} can't be called. */ ++ if (counts[i] == 0) ++ return; ++ ++ if (num_bits <= MAX_COLLAPSED_BITS) ++ { ++ unsigned int this_bits; ++ if (counts[i] == 1) ++ this_bits = 1; ++ else ++ this_bits = __SIZEOF_LONG_LONG__ * __CHAR_BIT__ ++ - __builtin_clzll (counts[i] - 1); ++ if (num_bits + this_bits <= MAX_COLLAPSED_BITS) ++ { ++ bits[i] = this_bits; ++ num_bits += this_bits; ++ } ++ else ++ num_bits = MAX_COLLAPSED_BITS + 1; ++ } ++ } ++ ++ if (ws->sched == GFS_STATIC) ++ num_ents = team->nthreads; ++ else if (ws->sched == GFS_GUIDED) ++ num_ents = counts[0]; ++ else ++ num_ents = (counts[0] - 1) / chunk_size + 1; ++ if (num_bits <= MAX_COLLAPSED_BITS) ++ { ++ elt_sz = sizeof (unsigned long); ++ shift_sz = ncounts * sizeof (unsigned int); ++ } ++ else ++ { ++ if (sizeof (gomp_ull) == sizeof (unsigned long)) ++ elt_sz = sizeof (gomp_ull) * ncounts; ++ else if (sizeof (gomp_ull) == 2 * sizeof (unsigned long)) ++ elt_sz = sizeof (unsigned long) * 2 * ncounts; ++ else ++ abort (); ++ shift_sz = 0; ++ } ++ elt_sz = (elt_sz + 63) & ~63UL; ++ ++ doacross = gomp_malloc (sizeof (*doacross) + 63 + num_ents * elt_sz ++ + shift_sz); ++ doacross->chunk_size_ull = chunk_size; ++ doacross->elt_sz = elt_sz; ++ doacross->ncounts = ncounts; ++ doacross->flattened = false; ++ doacross->boundary = 0; ++ doacross->array = (unsigned char *) ++ ((((uintptr_t) (doacross + 1)) + 63 + shift_sz) ++ & ~(uintptr_t) 63); ++ if (num_bits <= MAX_COLLAPSED_BITS) ++ { ++ unsigned int shift_count = 0; ++ doacross->flattened = true; ++ for (i = ncounts; i > 0; i--) ++ { ++ doacross->shift_counts[i - 1] = shift_count; ++ shift_count += bits[i - 1]; ++ } ++ for (ent = 0; ent < num_ents; ent++) ++ *(unsigned long *) (doacross->array + ent * elt_sz) = 0; ++ } ++ else ++ for (ent = 0; ent < num_ents; ent++) ++ memset (doacross->array + ent * elt_sz, '\0', ++ sizeof (unsigned long) * ncounts); ++ if (ws->sched == GFS_STATIC && chunk_size == 0) ++ { ++ gomp_ull q = counts[0] / num_ents; ++ gomp_ull t = counts[0] % num_ents; ++ doacross->boundary_ull = t * (q + 1); ++ doacross->q_ull = q; ++ doacross->t = t; ++ } ++ ws->doacross = doacross; ++} ++ ++/* DOACROSS POST operation. */ ++ ++void ++GOMP_doacross_ull_post (gomp_ull *counts) ++{ ++ struct gomp_thread *thr = gomp_thread (); ++ struct gomp_work_share *ws = thr->ts.work_share; ++ struct gomp_doacross_work_share *doacross = ws->doacross; ++ unsigned long ent; ++ unsigned int i; ++ ++ if (__builtin_expect (doacross == NULL, 0)) ++ { ++ __sync_synchronize (); ++ return; ++ } ++ ++ if (__builtin_expect (ws->sched == GFS_STATIC, 1)) ++ ent = thr->ts.team_id; ++ else if (ws->sched == GFS_GUIDED) ++ ent = counts[0]; ++ else ++ ent = counts[0] / doacross->chunk_size_ull; ++ ++ if (__builtin_expect (doacross->flattened, 1)) ++ { ++ unsigned long *array = (unsigned long *) (doacross->array ++ + ent * doacross->elt_sz); ++ gomp_ull flattened ++ = counts[0] << doacross->shift_counts[0]; ++ ++ for (i = 1; i < doacross->ncounts; i++) ++ flattened |= counts[i] << doacross->shift_counts[i]; ++ flattened++; ++ if (flattened == __atomic_load_n (array, MEMMODEL_ACQUIRE)) ++ __atomic_thread_fence (MEMMODEL_RELEASE); ++ else ++ __atomic_store_n (array, flattened, MEMMODEL_RELEASE); ++ return; ++ } ++ ++ __atomic_thread_fence (MEMMODEL_ACQUIRE); ++ if (sizeof (gomp_ull) == sizeof (unsigned long)) ++ { ++ gomp_ull *array = (gomp_ull *) (doacross->array ++ + ent * doacross->elt_sz); ++ ++ for (i = doacross->ncounts; i-- > 0; ) ++ { ++ if (counts[i] + 1UL != __atomic_load_n (&array[i], MEMMODEL_RELAXED)) ++ __atomic_store_n (&array[i], counts[i] + 1UL, MEMMODEL_RELEASE); ++ } ++ } ++ else ++ { ++ unsigned long *array = (unsigned long *) (doacross->array ++ + ent * doacross->elt_sz); ++ ++ for (i = doacross->ncounts; i-- > 0; ) ++ { ++ gomp_ull cull = counts[i] + 1UL; ++ unsigned long c = (unsigned long) cull; ++ if (c != __atomic_load_n (&array[2 * i + 1], MEMMODEL_RELAXED)) ++ __atomic_store_n (&array[2 * i + 1], c, MEMMODEL_RELEASE); ++ c = cull >> (__SIZEOF_LONG_LONG__ * __CHAR_BIT__ / 2); ++ if (c != __atomic_load_n (&array[2 * i], MEMMODEL_RELAXED)) ++ __atomic_store_n (&array[2 * i], c, MEMMODEL_RELEASE); ++ } ++ } ++} ++ ++/* DOACROSS WAIT operation. */ ++ ++void ++GOMP_doacross_ull_wait (gomp_ull first, ...) ++{ ++ struct gomp_thread *thr = gomp_thread (); ++ struct gomp_work_share *ws = thr->ts.work_share; ++ struct gomp_doacross_work_share *doacross = ws->doacross; ++ va_list ap; ++ unsigned long ent; ++ unsigned int i; ++ ++ if (__builtin_expect (doacross == NULL, 0)) ++ { ++ __sync_synchronize (); ++ return; ++ } ++ ++ if (__builtin_expect (ws->sched == GFS_STATIC, 1)) ++ { ++ if (ws->chunk_size_ull == 0) ++ { ++ if (first < doacross->boundary_ull) ++ ent = first / (doacross->q_ull + 1); ++ else ++ ent = (first - doacross->boundary_ull) / doacross->q_ull ++ + doacross->t; ++ } ++ else ++ ent = first / ws->chunk_size_ull % thr->ts.team->nthreads; ++ } ++ else if (ws->sched == GFS_GUIDED) ++ ent = first; ++ else ++ ent = first / doacross->chunk_size_ull; ++ ++ if (__builtin_expect (doacross->flattened, 1)) ++ { ++ unsigned long *array = (unsigned long *) (doacross->array ++ + ent * doacross->elt_sz); ++ gomp_ull flattened = first << doacross->shift_counts[0]; ++ unsigned long cur; ++ ++ va_start (ap, first); ++ for (i = 1; i < doacross->ncounts; i++) ++ flattened |= va_arg (ap, gomp_ull) ++ << doacross->shift_counts[i]; ++ cur = __atomic_load_n (array, MEMMODEL_ACQUIRE); ++ if (flattened < cur) ++ { ++ __atomic_thread_fence (MEMMODEL_RELEASE); ++ va_end (ap); ++ return; ++ } ++ doacross_spin (array, flattened, cur); ++ __atomic_thread_fence (MEMMODEL_RELEASE); ++ va_end (ap); ++ return; ++ } ++ ++ if (sizeof (gomp_ull) == sizeof (unsigned long)) ++ { ++ gomp_ull *array = (gomp_ull *) (doacross->array ++ + ent * doacross->elt_sz); ++ do ++ { ++ va_start (ap, first); ++ for (i = 0; i < doacross->ncounts; i++) ++ { ++ gomp_ull thisv ++ = (i ? va_arg (ap, gomp_ull) : first) + 1; ++ gomp_ull cur = __atomic_load_n (&array[i], MEMMODEL_RELAXED); ++ if (thisv < cur) ++ { ++ i = doacross->ncounts; ++ break; ++ } ++ if (thisv > cur) ++ break; ++ } ++ va_end (ap); ++ if (i == doacross->ncounts) ++ break; ++ cpu_relax (); ++ } ++ while (1); ++ } ++ else ++ { ++ unsigned long *array = (unsigned long *) (doacross->array ++ + ent * doacross->elt_sz); ++ do ++ { ++ va_start (ap, first); ++ for (i = 0; i < doacross->ncounts; i++) ++ { ++ gomp_ull thisv ++ = (i ? va_arg (ap, gomp_ull) : first) + 1; ++ unsigned long t ++ = thisv >> (__SIZEOF_LONG_LONG__ * __CHAR_BIT__ / 2); ++ unsigned long cur ++ = __atomic_load_n (&array[2 * i], MEMMODEL_RELAXED); ++ if (t < cur) ++ { ++ i = doacross->ncounts; ++ break; ++ } ++ if (t > cur) ++ break; ++ t = thisv; ++ cur = __atomic_load_n (&array[2 * i + 1], MEMMODEL_RELAXED); ++ if (t < cur) ++ { ++ i = doacross->ncounts; ++ break; ++ } ++ if (t > cur) ++ break; ++ } ++ va_end (ap); ++ if (i == doacross->ncounts) ++ break; ++ cpu_relax (); ++ } ++ while (1); ++ } ++ __sync_synchronize (); ++} +--- libgomp/loop.c.jj 2014-05-15 10:56:36.487505570 +0200 ++++ libgomp/loop.c 2016-07-13 16:57:13.488423109 +0200 +@@ -110,6 +110,11 @@ gomp_loop_static_start (long start, long + return !gomp_iter_static_next (istart, iend); + } + ++/* The current dynamic implementation is always monotonic. The ++ entrypoints without nonmonotonic in them have to be always monotonic, ++ but the nonmonotonic ones could be changed to use work-stealing for ++ improved scalability. */ ++ + static bool + gomp_loop_dynamic_start (long start, long end, long incr, long chunk_size, + long *istart, long *iend) +@@ -135,6 +140,9 @@ gomp_loop_dynamic_start (long start, lon + return ret; + } + ++/* Similarly as for dynamic, though the question is how can the chunk sizes ++ be decreased without a central locking or atomics. */ ++ + static bool + gomp_loop_guided_start (long start, long end, long incr, long chunk_size, + long *istart, long *iend) +@@ -168,13 +176,16 @@ GOMP_loop_runtime_start (long start, lon + switch (icv->run_sched_var) + { + case GFS_STATIC: +- return gomp_loop_static_start (start, end, incr, icv->run_sched_modifier, ++ return gomp_loop_static_start (start, end, incr, ++ icv->run_sched_chunk_size, + istart, iend); + case GFS_DYNAMIC: +- return gomp_loop_dynamic_start (start, end, incr, icv->run_sched_modifier, ++ return gomp_loop_dynamic_start (start, end, incr, ++ icv->run_sched_chunk_size, + istart, iend); + case GFS_GUIDED: +- return gomp_loop_guided_start (start, end, incr, icv->run_sched_modifier, ++ return gomp_loop_guided_start (start, end, incr, ++ icv->run_sched_chunk_size, + istart, iend); + case GFS_AUTO: + /* For now map to schedule(static), later on we could play with feedback +@@ -265,15 +276,15 @@ GOMP_loop_ordered_runtime_start (long st + { + case GFS_STATIC: + return gomp_loop_ordered_static_start (start, end, incr, +- icv->run_sched_modifier, ++ icv->run_sched_chunk_size, + istart, iend); + case GFS_DYNAMIC: + return gomp_loop_ordered_dynamic_start (start, end, incr, +- icv->run_sched_modifier, ++ icv->run_sched_chunk_size, + istart, iend); + case GFS_GUIDED: + return gomp_loop_ordered_guided_start (start, end, incr, +- icv->run_sched_modifier, ++ icv->run_sched_chunk_size, + istart, iend); + case GFS_AUTO: + /* For now map to schedule(static), later on we could play with feedback +@@ -285,6 +296,111 @@ GOMP_loop_ordered_runtime_start (long st + } + } + ++/* The *_doacross_*_start routines are similar. The only difference is that ++ this work-share construct is initialized to expect an ORDERED(N) - DOACROSS ++ section, and the worksharing loop iterates always from 0 to COUNTS[0] - 1 ++ and other COUNTS array elements tell the library number of iterations ++ in the ordered inner loops. */ ++ ++static bool ++gomp_loop_doacross_static_start (unsigned ncounts, long *counts, ++ long chunk_size, long *istart, long *iend) ++{ ++ struct gomp_thread *thr = gomp_thread (); ++ ++ thr->ts.static_trip = 0; ++ if (gomp_work_share_start (false)) ++ { ++ gomp_loop_init (thr->ts.work_share, 0, counts[0], 1, ++ GFS_STATIC, chunk_size); ++ gomp_doacross_init (ncounts, counts, chunk_size); ++ gomp_work_share_init_done (); ++ } ++ ++ return !gomp_iter_static_next (istart, iend); ++} ++ ++static bool ++gomp_loop_doacross_dynamic_start (unsigned ncounts, long *counts, ++ long chunk_size, long *istart, long *iend) ++{ ++ struct gomp_thread *thr = gomp_thread (); ++ bool ret; ++ ++ if (gomp_work_share_start (false)) ++ { ++ gomp_loop_init (thr->ts.work_share, 0, counts[0], 1, ++ GFS_DYNAMIC, chunk_size); ++ gomp_doacross_init (ncounts, counts, chunk_size); ++ gomp_work_share_init_done (); ++ } ++ ++#ifdef HAVE_SYNC_BUILTINS ++ ret = gomp_iter_dynamic_next (istart, iend); ++#else ++ gomp_mutex_lock (&thr->ts.work_share->lock); ++ ret = gomp_iter_dynamic_next_locked (istart, iend); ++ gomp_mutex_unlock (&thr->ts.work_share->lock); ++#endif ++ ++ return ret; ++} ++ ++static bool ++gomp_loop_doacross_guided_start (unsigned ncounts, long *counts, ++ long chunk_size, long *istart, long *iend) ++{ ++ struct gomp_thread *thr = gomp_thread (); ++ bool ret; ++ ++ if (gomp_work_share_start (false)) ++ { ++ gomp_loop_init (thr->ts.work_share, 0, counts[0], 1, ++ GFS_GUIDED, chunk_size); ++ gomp_doacross_init (ncounts, counts, chunk_size); ++ gomp_work_share_init_done (); ++ } ++ ++#ifdef HAVE_SYNC_BUILTINS ++ ret = gomp_iter_guided_next (istart, iend); ++#else ++ gomp_mutex_lock (&thr->ts.work_share->lock); ++ ret = gomp_iter_guided_next_locked (istart, iend); ++ gomp_mutex_unlock (&thr->ts.work_share->lock); ++#endif ++ ++ return ret; ++} ++ ++bool ++GOMP_loop_doacross_runtime_start (unsigned ncounts, long *counts, ++ long *istart, long *iend) ++{ ++ struct gomp_task_icv *icv = gomp_icv (false); ++ switch (icv->run_sched_var) ++ { ++ case GFS_STATIC: ++ return gomp_loop_doacross_static_start (ncounts, counts, ++ icv->run_sched_chunk_size, ++ istart, iend); ++ case GFS_DYNAMIC: ++ return gomp_loop_doacross_dynamic_start (ncounts, counts, ++ icv->run_sched_chunk_size, ++ istart, iend); ++ case GFS_GUIDED: ++ return gomp_loop_doacross_guided_start (ncounts, counts, ++ icv->run_sched_chunk_size, ++ istart, iend); ++ case GFS_AUTO: ++ /* For now map to schedule(static), later on we could play with feedback ++ driven choice. */ ++ return gomp_loop_doacross_static_start (ncounts, counts, ++ 0, istart, iend); ++ default: ++ abort (); ++ } ++} ++ + /* The *_next routines are called when the thread completes processing of + the iteration block currently assigned to it. If the work-share + construct is bound directly to a parallel construct, then the iteration +@@ -483,7 +599,7 @@ GOMP_parallel_loop_runtime_start (void ( + { + struct gomp_task_icv *icv = gomp_icv (false); + gomp_parallel_loop_start (fn, data, num_threads, start, end, incr, +- icv->run_sched_var, icv->run_sched_modifier, 0); ++ icv->run_sched_var, icv->run_sched_chunk_size, 0); + } + + ialias_redirect (GOMP_parallel_end) +@@ -521,6 +637,37 @@ GOMP_parallel_loop_guided (void (*fn) (v + GOMP_parallel_end (); + } + ++#ifdef HAVE_ATTRIBUTE_ALIAS ++extern __typeof(GOMP_parallel_loop_dynamic) GOMP_parallel_loop_nonmonotonic_dynamic ++ __attribute__((alias ("GOMP_parallel_loop_dynamic"))); ++extern __typeof(GOMP_parallel_loop_guided) GOMP_parallel_loop_nonmonotonic_guided ++ __attribute__((alias ("GOMP_parallel_loop_guided"))); ++#else ++void ++GOMP_parallel_loop_nonmonotonic_dynamic (void (*fn) (void *), void *data, ++ unsigned num_threads, long start, ++ long end, long incr, long chunk_size, ++ unsigned flags) ++{ ++ gomp_parallel_loop_start (fn, data, num_threads, start, end, incr, ++ GFS_DYNAMIC, chunk_size, flags); ++ fn (data); ++ GOMP_parallel_end (); ++} ++ ++void ++GOMP_parallel_loop_nonmonotonic_guided (void (*fn) (void *), void *data, ++ unsigned num_threads, long start, ++ long end, long incr, long chunk_size, ++ unsigned flags) ++{ ++ gomp_parallel_loop_start (fn, data, num_threads, start, end, incr, ++ GFS_GUIDED, chunk_size, flags); ++ fn (data); ++ GOMP_parallel_end (); ++} ++#endif ++ + void + GOMP_parallel_loop_runtime (void (*fn) (void *), void *data, + unsigned num_threads, long start, long end, +@@ -528,7 +675,7 @@ GOMP_parallel_loop_runtime (void (*fn) ( + { + struct gomp_task_icv *icv = gomp_icv (false); + gomp_parallel_loop_start (fn, data, num_threads, start, end, incr, +- icv->run_sched_var, icv->run_sched_modifier, ++ icv->run_sched_var, icv->run_sched_chunk_size, + flags); + fn (data); + GOMP_parallel_end (); +@@ -569,6 +716,10 @@ extern __typeof(gomp_loop_dynamic_start) + __attribute__((alias ("gomp_loop_dynamic_start"))); + extern __typeof(gomp_loop_guided_start) GOMP_loop_guided_start + __attribute__((alias ("gomp_loop_guided_start"))); ++extern __typeof(gomp_loop_dynamic_start) GOMP_loop_nonmonotonic_dynamic_start ++ __attribute__((alias ("gomp_loop_dynamic_start"))); ++extern __typeof(gomp_loop_guided_start) GOMP_loop_nonmonotonic_guided_start ++ __attribute__((alias ("gomp_loop_guided_start"))); + + extern __typeof(gomp_loop_ordered_static_start) GOMP_loop_ordered_static_start + __attribute__((alias ("gomp_loop_ordered_static_start"))); +@@ -577,12 +728,23 @@ extern __typeof(gomp_loop_ordered_dynami + extern __typeof(gomp_loop_ordered_guided_start) GOMP_loop_ordered_guided_start + __attribute__((alias ("gomp_loop_ordered_guided_start"))); + ++extern __typeof(gomp_loop_doacross_static_start) GOMP_loop_doacross_static_start ++ __attribute__((alias ("gomp_loop_doacross_static_start"))); ++extern __typeof(gomp_loop_doacross_dynamic_start) GOMP_loop_doacross_dynamic_start ++ __attribute__((alias ("gomp_loop_doacross_dynamic_start"))); ++extern __typeof(gomp_loop_doacross_guided_start) GOMP_loop_doacross_guided_start ++ __attribute__((alias ("gomp_loop_doacross_guided_start"))); ++ + extern __typeof(gomp_loop_static_next) GOMP_loop_static_next + __attribute__((alias ("gomp_loop_static_next"))); + extern __typeof(gomp_loop_dynamic_next) GOMP_loop_dynamic_next + __attribute__((alias ("gomp_loop_dynamic_next"))); + extern __typeof(gomp_loop_guided_next) GOMP_loop_guided_next + __attribute__((alias ("gomp_loop_guided_next"))); ++extern __typeof(gomp_loop_dynamic_next) GOMP_loop_nonmonotonic_dynamic_next ++ __attribute__((alias ("gomp_loop_dynamic_next"))); ++extern __typeof(gomp_loop_guided_next) GOMP_loop_nonmonotonic_guided_next ++ __attribute__((alias ("gomp_loop_guided_next"))); + + extern __typeof(gomp_loop_ordered_static_next) GOMP_loop_ordered_static_next + __attribute__((alias ("gomp_loop_ordered_static_next"))); +@@ -613,6 +775,21 @@ GOMP_loop_guided_start (long start, long + } + + bool ++GOMP_loop_nonmonotonic_dynamic_start (long start, long end, long incr, ++ long chunk_size, long *istart, ++ long *iend) ++{ ++ return gomp_loop_dynamic_start (start, end, incr, chunk_size, istart, iend); ++} ++ ++bool ++GOMP_loop_nonmonotonic_guided_start (long start, long end, long incr, ++ long chunk_size, long *istart, long *iend) ++{ ++ return gomp_loop_guided_start (start, end, incr, chunk_size, istart, iend); ++} ++ ++bool + GOMP_loop_ordered_static_start (long start, long end, long incr, + long chunk_size, long *istart, long *iend) + { +@@ -637,6 +814,30 @@ GOMP_loop_ordered_guided_start (long sta + } + + bool ++GOMP_loop_doacross_static_start (unsigned ncounts, long *counts, ++ long chunk_size, long *istart, long *iend) ++{ ++ return gomp_loop_doacross_static_start (ncounts, counts, chunk_size, ++ istart, iend); ++} ++ ++bool ++GOMP_loop_doacross_dynamic_start (unsigned ncounts, long *counts, ++ long chunk_size, long *istart, long *iend) ++{ ++ return gomp_loop_doacross_dynamic_start (ncounts, counts, chunk_size, ++ istart, iend); ++} ++ ++bool ++GOMP_loop_doacross_guided_start (unsigned ncounts, long *counts, ++ long chunk_size, long *istart, long *iend) ++{ ++ return gomp_loop_doacross_guided_start (ncounts, counts, chunk_size, ++ istart, iend); ++} ++ ++bool + GOMP_loop_static_next (long *istart, long *iend) + { + return gomp_loop_static_next (istart, iend); +@@ -653,6 +854,18 @@ GOMP_loop_guided_next (long *istart, lon + { + return gomp_loop_guided_next (istart, iend); + } ++ ++bool ++GOMP_loop_nonmonotonic_dynamic_next (long *istart, long *iend) ++{ ++ return gomp_loop_dynamic_next (istart, iend); ++} ++ ++bool ++GOMP_loop_nonmonotonic_guided_next (long *istart, long *iend) ++{ ++ return gomp_loop_guided_next (istart, iend); ++} + + bool + GOMP_loop_ordered_static_next (long *istart, long *iend) +--- libgomp/error.c.jj 2013-01-21 16:00:31.834953566 +0100 ++++ libgomp/error.c 2016-07-13 16:57:04.437535335 +0200 +@@ -35,7 +35,26 @@ + #include + + +-static void ++#undef gomp_vdebug ++void ++gomp_vdebug (int kind __attribute__ ((unused)), const char *msg, va_list list) ++{ ++ if (gomp_debug_var) ++ vfprintf (stderr, msg, list); ++} ++ ++#undef gomp_debug ++void ++gomp_debug (int kind, const char *msg, ...) ++{ ++ va_list list; ++ ++ va_start (list, msg); ++ gomp_vdebug (kind, msg, list); ++ va_end (list); ++} ++ ++void + gomp_verror (const char *fmt, va_list list) + { + fputs ("\nlibgomp: ", stderr); +@@ -54,13 +73,18 @@ gomp_error (const char *fmt, ...) + } + + void ++gomp_vfatal (const char *fmt, va_list list) ++{ ++ gomp_verror (fmt, list); ++ exit (EXIT_FAILURE); ++} ++ ++void + gomp_fatal (const char *fmt, ...) + { + va_list list; + + va_start (list, fmt); +- gomp_verror (fmt, list); ++ gomp_vfatal (fmt, list); + va_end (list); +- +- exit (EXIT_FAILURE); + } +--- libgomp/Makefile.am.jj 2014-05-15 11:12:10.000000000 +0200 ++++ libgomp/Makefile.am 2016-07-14 16:10:51.968202878 +0200 +@@ -60,7 +60,13 @@ libgomp_la_LINK = $(LINK) $(libgomp_la_L + libgomp_la_SOURCES = alloc.c barrier.c critical.c env.c error.c iter.c \ + iter_ull.c loop.c loop_ull.c ordered.c parallel.c sections.c single.c \ + task.c team.c work.c lock.c mutex.c proc.c sem.c bar.c ptrlock.c \ +- time.c fortran.c affinity.c target.c ++ time.c fortran.c affinity.c target.c splay-tree.c libgomp-plugin.c \ ++ oacc-parallel.c oacc-host.c oacc-init.c oacc-mem.c oacc-async.c \ ++ oacc-plugin.c oacc-cuda.c priority_queue.c ++ ++if USE_FORTRAN ++libgomp_la_SOURCES += openacc.f90 ++endif + + nodist_noinst_HEADERS = libgomp_f.h + nodist_libsubinclude_HEADERS = omp.h +--- libgomp/Makefile.in.jj 2014-05-15 11:12:10.000000000 +0200 ++++ libgomp/Makefile.in 2016-07-14 16:11:10.981954087 +0200 +@@ -36,6 +36,7 @@ POST_UNINSTALL = : + build_triplet = @build@ + host_triplet = @host@ + target_triplet = @target@ ++@USE_FORTRAN_TRUE@am__append_1 = openacc.f90 + subdir = . + DIST_COMMON = ChangeLog $(srcdir)/Makefile.in $(srcdir)/Makefile.am \ + $(top_srcdir)/configure $(am__configure_deps) \ +@@ -92,11 +93,15 @@ am__installdirs = "$(DESTDIR)$(toolexecl + "$(DESTDIR)$(toolexeclibdir)" + LTLIBRARIES = $(toolexeclib_LTLIBRARIES) + libgomp_la_LIBADD = ++@USE_FORTRAN_TRUE@am__objects_1 = openacc.lo + am_libgomp_la_OBJECTS = alloc.lo barrier.lo critical.lo env.lo \ + error.lo iter.lo iter_ull.lo loop.lo loop_ull.lo ordered.lo \ + parallel.lo sections.lo single.lo task.lo team.lo work.lo \ + lock.lo mutex.lo proc.lo sem.lo bar.lo ptrlock.lo time.lo \ +- fortran.lo affinity.lo target.lo ++ fortran.lo affinity.lo target.lo splay-tree.lo \ ++ libgomp-plugin.lo oacc-parallel.lo oacc-host.lo oacc-init.lo \ ++ oacc-mem.lo oacc-async.lo oacc-plugin.lo oacc-cuda.lo \ ++ priority_queue.lo $(am__objects_1) + libgomp_la_OBJECTS = $(am_libgomp_la_OBJECTS) + DEFAULT_INCLUDES = -I.@am__isrc@ + depcomp = $(SHELL) $(top_srcdir)/../depcomp +@@ -108,6 +113,13 @@ LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIB + --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) + CCLD = $(CC) ++FCCOMPILE = $(FC) $(AM_FCFLAGS) $(FCFLAGS) ++LTFCCOMPILE = $(LIBTOOL) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ ++ --mode=compile $(FC) $(AM_FCFLAGS) $(FCFLAGS) ++FCLD = $(FC) ++FCLINK = $(LIBTOOL) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ ++ --mode=link $(FCLD) $(AM_FCFLAGS) $(FCFLAGS) $(AM_LDFLAGS) \ ++ $(LDFLAGS) -o $@ + SOURCES = $(libgomp_la_SOURCES) + MULTISRCTOP = + MULTIBUILDTOP = +@@ -315,10 +327,12 @@ libgomp_la_LDFLAGS = $(libgomp_version_i + libgomp_la_DEPENDENCIES = $(libgomp_version_dep) + libgomp_la_LINK = $(LINK) $(libgomp_la_LDFLAGS) + libgomp_la_SOURCES = alloc.c barrier.c critical.c env.c error.c iter.c \ +- iter_ull.c loop.c loop_ull.c ordered.c parallel.c sections.c single.c \ +- task.c team.c work.c lock.c mutex.c proc.c sem.c bar.c ptrlock.c \ +- time.c fortran.c affinity.c target.c +- ++ iter_ull.c loop.c loop_ull.c ordered.c parallel.c sections.c \ ++ single.c task.c team.c work.c lock.c mutex.c proc.c sem.c \ ++ bar.c ptrlock.c time.c fortran.c affinity.c target.c \ ++ splay-tree.c libgomp-plugin.c oacc-parallel.c oacc-host.c \ ++ oacc-init.c oacc-mem.c oacc-async.c oacc-plugin.c oacc-cuda.c \ ++ priority_queue.c $(am__append_1) + nodist_noinst_HEADERS = libgomp_f.h + nodist_libsubinclude_HEADERS = omp.h + @USE_FORTRAN_TRUE@nodist_finclude_HEADERS = omp_lib.h omp_lib.f90 omp_lib.mod omp_lib_kinds.mod +@@ -351,7 +365,7 @@ all: config.h + $(MAKE) $(AM_MAKEFLAGS) all-recursive + + .SUFFIXES: +-.SUFFIXES: .c .dvi .lo .o .obj .ps ++.SUFFIXES: .c .dvi .f90 .lo .o .obj .ps + am--refresh: + @: + $(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) +@@ -463,17 +477,27 @@ distclean-compile: + @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fortran.Plo@am__quote@ + @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/iter.Plo@am__quote@ + @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/iter_ull.Plo@am__quote@ ++@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libgomp-plugin.Plo@am__quote@ + @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lock.Plo@am__quote@ + @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/loop.Plo@am__quote@ + @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/loop_ull.Plo@am__quote@ + @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mutex.Plo@am__quote@ ++@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-async.Plo@am__quote@ ++@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-cuda.Plo@am__quote@ ++@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-host.Plo@am__quote@ ++@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-init.Plo@am__quote@ ++@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-mem.Plo@am__quote@ ++@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-parallel.Plo@am__quote@ ++@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-plugin.Plo@am__quote@ + @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ordered.Plo@am__quote@ + @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/parallel.Plo@am__quote@ ++@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/priority_queue.Plo@am__quote@ + @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/proc.Plo@am__quote@ + @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ptrlock.Plo@am__quote@ + @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sections.Plo@am__quote@ + @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sem.Plo@am__quote@ + @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/single.Plo@am__quote@ ++@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/splay-tree.Plo@am__quote@ + @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/target.Plo@am__quote@ + @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/task.Plo@am__quote@ + @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/team.Plo@am__quote@ +@@ -501,6 +525,15 @@ distclean-compile: + @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ + @am__fastdepCC_FALSE@ $(LTCOMPILE) -c -o $@ $< + ++.f90.o: ++ $(FCCOMPILE) -c -o $@ $< ++ ++.f90.obj: ++ $(FCCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'` ++ ++.f90.lo: ++ $(LTFCCOMPILE) -c -o $@ $< ++ + mostlyclean-libtool: + -rm -f *.lo + +--- libgomp/task.c.jj 2014-08-06 16:25:16.575091658 +0200 ++++ libgomp/task.c 2016-07-13 17:47:58.722758497 +0200 +@@ -28,6 +28,7 @@ + #include "libgomp.h" + #include + #include ++#include "gomp-constants.h" + + typedef struct gomp_task_depend_entry *hash_entry_type; + +@@ -63,6 +64,14 @@ void + gomp_init_task (struct gomp_task *task, struct gomp_task *parent_task, + struct gomp_task_icv *prev_icv) + { ++ /* It would seem that using memset here would be a win, but it turns ++ out that partially filling gomp_task allows us to keep the ++ overhead of task creation low. In the nqueens-1.c test, for a ++ sufficiently large N, we drop the overhead from 5-6% to 1%. ++ ++ Note, the nqueens-1.c test in serial mode is a good test to ++ benchmark the overhead of creating tasks as there are millions of ++ tiny tasks created that all run undeferred. */ + task->parent = parent_task; + task->icv = *prev_icv; + task->kind = GOMP_TASK_IMPLICIT; +@@ -71,7 +80,7 @@ gomp_init_task (struct gomp_task *task, + task->final_task = false; + task->copy_ctors_done = false; + task->parent_depends_on = false; +- task->children = NULL; ++ priority_queue_init (&task->children_queue); + task->taskgroup = NULL; + task->dependers = NULL; + task->depend_hash = NULL; +@@ -90,30 +99,194 @@ gomp_end_task (void) + thr->task = task->parent; + } + ++/* Clear the parent field of every task in LIST. */ ++ + static inline void +-gomp_clear_parent (struct gomp_task *children) ++gomp_clear_parent_in_list (struct priority_list *list) + { +- struct gomp_task *task = children; +- +- if (task) ++ struct priority_node *p = list->tasks; ++ if (p) + do + { +- task->parent = NULL; +- task = task->next_child; ++ priority_node_to_task (PQ_CHILDREN, p)->parent = NULL; ++ p = p->next; + } +- while (task != children); ++ while (p != list->tasks); ++} ++ ++/* Splay tree version of gomp_clear_parent_in_list. ++ ++ Clear the parent field of every task in NODE within SP, and free ++ the node when done. */ ++ ++static void ++gomp_clear_parent_in_tree (prio_splay_tree sp, prio_splay_tree_node node) ++{ ++ if (!node) ++ return; ++ prio_splay_tree_node left = node->left, right = node->right; ++ gomp_clear_parent_in_list (&node->key.l); ++#if _LIBGOMP_CHECKING_ ++ memset (node, 0xaf, sizeof (*node)); ++#endif ++ /* No need to remove the node from the tree. We're nuking ++ everything, so just free the nodes and our caller can clear the ++ entire splay tree. */ ++ free (node); ++ gomp_clear_parent_in_tree (sp, left); ++ gomp_clear_parent_in_tree (sp, right); ++} ++ ++/* Clear the parent field of every task in Q and remove every task ++ from Q. */ ++ ++static inline void ++gomp_clear_parent (struct priority_queue *q) ++{ ++ if (priority_queue_multi_p (q)) ++ { ++ gomp_clear_parent_in_tree (&q->t, q->t.root); ++ /* All the nodes have been cleared in gomp_clear_parent_in_tree. ++ No need to remove anything. We can just nuke everything. */ ++ q->t.root = NULL; ++ } ++ else ++ gomp_clear_parent_in_list (&q->l); + } + +-static void gomp_task_maybe_wait_for_dependencies (void **depend); ++/* Helper function for GOMP_task and gomp_create_target_task. ++ ++ For a TASK with in/out dependencies, fill in the various dependency ++ queues. PARENT is the parent of said task. DEPEND is as in ++ GOMP_task. */ ++ ++static void ++gomp_task_handle_depend (struct gomp_task *task, struct gomp_task *parent, ++ void **depend) ++{ ++ size_t ndepend = (uintptr_t) depend[0]; ++ size_t nout = (uintptr_t) depend[1]; ++ size_t i; ++ hash_entry_type ent; ++ ++ task->depend_count = ndepend; ++ task->num_dependees = 0; ++ if (parent->depend_hash == NULL) ++ parent->depend_hash = htab_create (2 * ndepend > 12 ? 2 * ndepend : 12); ++ for (i = 0; i < ndepend; i++) ++ { ++ task->depend[i].addr = depend[2 + i]; ++ task->depend[i].next = NULL; ++ task->depend[i].prev = NULL; ++ task->depend[i].task = task; ++ task->depend[i].is_in = i >= nout; ++ task->depend[i].redundant = false; ++ task->depend[i].redundant_out = false; ++ ++ hash_entry_type *slot = htab_find_slot (&parent->depend_hash, ++ &task->depend[i], INSERT); ++ hash_entry_type out = NULL, last = NULL; ++ if (*slot) ++ { ++ /* If multiple depends on the same task are the same, all but the ++ first one are redundant. As inout/out come first, if any of them ++ is inout/out, it will win, which is the right semantics. */ ++ if ((*slot)->task == task) ++ { ++ task->depend[i].redundant = true; ++ continue; ++ } ++ for (ent = *slot; ent; ent = ent->next) ++ { ++ if (ent->redundant_out) ++ break; ++ ++ last = ent; ++ ++ /* depend(in:...) doesn't depend on earlier depend(in:...). */ ++ if (i >= nout && ent->is_in) ++ continue; ++ ++ if (!ent->is_in) ++ out = ent; ++ ++ struct gomp_task *tsk = ent->task; ++ if (tsk->dependers == NULL) ++ { ++ tsk->dependers ++ = gomp_malloc (sizeof (struct gomp_dependers_vec) ++ + 6 * sizeof (struct gomp_task *)); ++ tsk->dependers->n_elem = 1; ++ tsk->dependers->allocated = 6; ++ tsk->dependers->elem[0] = task; ++ task->num_dependees++; ++ continue; ++ } ++ /* We already have some other dependency on tsk from earlier ++ depend clause. */ ++ else if (tsk->dependers->n_elem ++ && (tsk->dependers->elem[tsk->dependers->n_elem - 1] ++ == task)) ++ continue; ++ else if (tsk->dependers->n_elem == tsk->dependers->allocated) ++ { ++ tsk->dependers->allocated ++ = tsk->dependers->allocated * 2 + 2; ++ tsk->dependers ++ = gomp_realloc (tsk->dependers, ++ sizeof (struct gomp_dependers_vec) ++ + (tsk->dependers->allocated ++ * sizeof (struct gomp_task *))); ++ } ++ tsk->dependers->elem[tsk->dependers->n_elem++] = task; ++ task->num_dependees++; ++ } ++ task->depend[i].next = *slot; ++ (*slot)->prev = &task->depend[i]; ++ } ++ *slot = &task->depend[i]; ++ ++ /* There is no need to store more than one depend({,in}out:) task per ++ address in the hash table chain for the purpose of creation of ++ deferred tasks, because each out depends on all earlier outs, thus it ++ is enough to record just the last depend({,in}out:). For depend(in:), ++ we need to keep all of the previous ones not terminated yet, because ++ a later depend({,in}out:) might need to depend on all of them. So, if ++ the new task's clause is depend({,in}out:), we know there is at most ++ one other depend({,in}out:) clause in the list (out). For ++ non-deferred tasks we want to see all outs, so they are moved to the ++ end of the chain, after first redundant_out entry all following ++ entries should be redundant_out. */ ++ if (!task->depend[i].is_in && out) ++ { ++ if (out != last) ++ { ++ out->next->prev = out->prev; ++ out->prev->next = out->next; ++ out->next = last->next; ++ out->prev = last; ++ last->next = out; ++ if (out->next) ++ out->next->prev = out; ++ } ++ out->redundant_out = true; ++ } ++ } ++} + + /* Called when encountering an explicit task directive. If IF_CLAUSE is + false, then we must not delay in executing the task. If UNTIED is true, +- then the task may be executed by any member of the team. */ ++ then the task may be executed by any member of the team. ++ ++ DEPEND is an array containing: ++ depend[0]: number of depend elements. ++ depend[1]: number of depend elements of type "out". ++ depend[2..N+1]: address of [1..N]th depend element. */ + + void + GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), + long arg_size, long arg_align, bool if_clause, unsigned flags, +- void **depend) ++ void **depend, int priority) + { + struct gomp_thread *thr = gomp_thread (); + struct gomp_team *team = thr->ts.team; +@@ -125,8 +298,7 @@ GOMP_task (void (*fn) (void *), void *da + might be running on different thread than FN. */ + if (cpyfn) + if_clause = false; +- if (flags & 1) +- flags &= ~1; ++ flags &= ~GOMP_TASK_FLAG_UNTIED; + #endif + + /* If parallel or taskgroup has been cancelled, don't start new tasks. */ +@@ -135,6 +307,11 @@ GOMP_task (void (*fn) (void *), void *da + || (thr->task->taskgroup && thr->task->taskgroup->cancelled))) + return; + ++ if ((flags & GOMP_TASK_FLAG_PRIORITY) == 0) ++ priority = 0; ++ else if (priority > gomp_max_task_priority_var) ++ priority = gomp_max_task_priority_var; ++ + if (!if_clause || team == NULL + || (thr->task && thr->task->final_task) + || team->task_count > 64 * team->nthreads) +@@ -147,12 +324,15 @@ GOMP_task (void (*fn) (void *), void *da + depend clauses for non-deferred tasks other than this, because + the parent task is suspended until the child task finishes and thus + it can't start further child tasks. */ +- if ((flags & 8) && thr->task && thr->task->depend_hash) ++ if ((flags & GOMP_TASK_FLAG_DEPEND) ++ && thr->task && thr->task->depend_hash) + gomp_task_maybe_wait_for_dependencies (depend); + + gomp_init_task (&task, thr->task, gomp_icv (false)); +- task.kind = GOMP_TASK_IFFALSE; +- task.final_task = (thr->task && thr->task->final_task) || (flags & 2); ++ task.kind = GOMP_TASK_UNDEFERRED; ++ task.final_task = (thr->task && thr->task->final_task) ++ || (flags & GOMP_TASK_FLAG_FINAL); ++ task.priority = priority; + if (thr->task) + { + task.in_tied_task = thr->task->in_tied_task; +@@ -178,10 +358,10 @@ GOMP_task (void (*fn) (void *), void *da + child thread, but seeing a stale non-NULL value is not a + problem. Once past the task_lock acquisition, this thread + will see the real value of task.children. */ +- if (task.children != NULL) ++ if (!priority_queue_empty_p (&task.children_queue, MEMMODEL_RELAXED)) + { + gomp_mutex_lock (&team->task_lock); +- gomp_clear_parent (task.children); ++ gomp_clear_parent (&task.children_queue); + gomp_mutex_unlock (&team->task_lock); + } + gomp_end_task (); +@@ -195,7 +375,7 @@ GOMP_task (void (*fn) (void *), void *da + bool do_wake; + size_t depend_size = 0; + +- if (flags & 8) ++ if (flags & GOMP_TASK_FLAG_DEPEND) + depend_size = ((uintptr_t) depend[0] + * sizeof (struct gomp_task_depend_entry)); + task = gomp_malloc (sizeof (*task) + depend_size +@@ -203,7 +383,8 @@ GOMP_task (void (*fn) (void *), void *da + arg = (char *) (((uintptr_t) (task + 1) + depend_size + arg_align - 1) + & ~(uintptr_t) (arg_align - 1)); + gomp_init_task (task, parent, gomp_icv (false)); +- task->kind = GOMP_TASK_IFFALSE; ++ task->priority = priority; ++ task->kind = GOMP_TASK_UNDEFERRED; + task->in_tied_task = parent->in_tied_task; + task->taskgroup = taskgroup; + thr->task = task; +@@ -218,7 +399,7 @@ GOMP_task (void (*fn) (void *), void *da + task->kind = GOMP_TASK_WAITING; + task->fn = fn; + task->fn_data = arg; +- task->final_task = (flags & 2) >> 1; ++ task->final_task = (flags & GOMP_TASK_FLAG_FINAL) >> 1; + gomp_mutex_lock (&team->task_lock); + /* If parallel or taskgroup has been cancelled, don't start new + tasks. */ +@@ -235,171 +416,39 @@ GOMP_task (void (*fn) (void *), void *da + taskgroup->num_children++; + if (depend_size) + { +- size_t ndepend = (uintptr_t) depend[0]; +- size_t nout = (uintptr_t) depend[1]; +- size_t i; +- hash_entry_type ent; +- +- task->depend_count = ndepend; +- task->num_dependees = 0; +- if (parent->depend_hash == NULL) +- parent->depend_hash +- = htab_create (2 * ndepend > 12 ? 2 * ndepend : 12); +- for (i = 0; i < ndepend; i++) +- { +- task->depend[i].addr = depend[2 + i]; +- task->depend[i].next = NULL; +- task->depend[i].prev = NULL; +- task->depend[i].task = task; +- task->depend[i].is_in = i >= nout; +- task->depend[i].redundant = false; +- task->depend[i].redundant_out = false; +- +- hash_entry_type *slot +- = htab_find_slot (&parent->depend_hash, &task->depend[i], +- INSERT); +- hash_entry_type out = NULL, last = NULL; +- if (*slot) +- { +- /* If multiple depends on the same task are the +- same, all but the first one are redundant. +- As inout/out come first, if any of them is +- inout/out, it will win, which is the right +- semantics. */ +- if ((*slot)->task == task) +- { +- task->depend[i].redundant = true; +- continue; +- } +- for (ent = *slot; ent; ent = ent->next) +- { +- if (ent->redundant_out) +- break; +- +- last = ent; +- +- /* depend(in:...) doesn't depend on earlier +- depend(in:...). */ +- if (i >= nout && ent->is_in) +- continue; +- +- if (!ent->is_in) +- out = ent; +- +- struct gomp_task *tsk = ent->task; +- if (tsk->dependers == NULL) +- { +- tsk->dependers +- = gomp_malloc (sizeof (struct gomp_dependers_vec) +- + 6 * sizeof (struct gomp_task *)); +- tsk->dependers->n_elem = 1; +- tsk->dependers->allocated = 6; +- tsk->dependers->elem[0] = task; +- task->num_dependees++; +- continue; +- } +- /* We already have some other dependency on tsk +- from earlier depend clause. */ +- else if (tsk->dependers->n_elem +- && (tsk->dependers->elem[tsk->dependers->n_elem +- - 1] +- == task)) +- continue; +- else if (tsk->dependers->n_elem +- == tsk->dependers->allocated) +- { +- tsk->dependers->allocated +- = tsk->dependers->allocated * 2 + 2; +- tsk->dependers +- = gomp_realloc (tsk->dependers, +- sizeof (struct gomp_dependers_vec) +- + (tsk->dependers->allocated +- * sizeof (struct gomp_task *))); +- } +- tsk->dependers->elem[tsk->dependers->n_elem++] = task; +- task->num_dependees++; +- } +- task->depend[i].next = *slot; +- (*slot)->prev = &task->depend[i]; +- } +- *slot = &task->depend[i]; +- +- /* There is no need to store more than one depend({,in}out:) +- task per address in the hash table chain for the purpose +- of creation of deferred tasks, because each out +- depends on all earlier outs, thus it is enough to record +- just the last depend({,in}out:). For depend(in:), we need +- to keep all of the previous ones not terminated yet, because +- a later depend({,in}out:) might need to depend on all of +- them. So, if the new task's clause is depend({,in}out:), +- we know there is at most one other depend({,in}out:) clause +- in the list (out). For non-deferred tasks we want to see +- all outs, so they are moved to the end of the chain, +- after first redundant_out entry all following entries +- should be redundant_out. */ +- if (!task->depend[i].is_in && out) +- { +- if (out != last) +- { +- out->next->prev = out->prev; +- out->prev->next = out->next; +- out->next = last->next; +- out->prev = last; +- last->next = out; +- if (out->next) +- out->next->prev = out; +- } +- out->redundant_out = true; +- } +- } ++ gomp_task_handle_depend (task, parent, depend); + if (task->num_dependees) + { ++ /* Tasks that depend on other tasks are not put into the ++ various waiting queues, so we are done for now. Said ++ tasks are instead put into the queues via ++ gomp_task_run_post_handle_dependers() after their ++ dependencies have been satisfied. After which, they ++ can be picked up by the various scheduling ++ points. */ + gomp_mutex_unlock (&team->task_lock); + return; + } + } +- if (parent->children) +- { +- task->next_child = parent->children; +- task->prev_child = parent->children->prev_child; +- task->next_child->prev_child = task; +- task->prev_child->next_child = task; +- } +- else +- { +- task->next_child = task; +- task->prev_child = task; +- } +- parent->children = task; ++ ++ priority_queue_insert (PQ_CHILDREN, &parent->children_queue, ++ task, priority, ++ PRIORITY_INSERT_BEGIN, ++ /*adjust_parent_depends_on=*/false, ++ task->parent_depends_on); + if (taskgroup) +- { +- if (taskgroup->children) +- { +- task->next_taskgroup = taskgroup->children; +- task->prev_taskgroup = taskgroup->children->prev_taskgroup; +- task->next_taskgroup->prev_taskgroup = task; +- task->prev_taskgroup->next_taskgroup = task; +- } +- else +- { +- task->next_taskgroup = task; +- task->prev_taskgroup = task; +- } +- taskgroup->children = task; +- } +- if (team->task_queue) +- { +- task->next_queue = team->task_queue; +- task->prev_queue = team->task_queue->prev_queue; +- task->next_queue->prev_queue = task; +- task->prev_queue->next_queue = task; +- } +- else +- { +- task->next_queue = task; +- task->prev_queue = task; +- team->task_queue = task; +- } ++ priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue, ++ task, priority, ++ PRIORITY_INSERT_BEGIN, ++ /*adjust_parent_depends_on=*/false, ++ task->parent_depends_on); ++ ++ priority_queue_insert (PQ_TEAM, &team->task_queue, ++ task, priority, ++ PRIORITY_INSERT_END, ++ /*adjust_parent_depends_on=*/false, ++ task->parent_depends_on); ++ + ++team->task_count; + ++team->task_queued_count; + gomp_team_barrier_set_task_pending (&team->barrier); +@@ -411,36 +460,529 @@ GOMP_task (void (*fn) (void *), void *da + } + } + +-static inline bool +-gomp_task_run_pre (struct gomp_task *child_task, struct gomp_task *parent, +- struct gomp_taskgroup *taskgroup, struct gomp_team *team) ++ialias (GOMP_taskgroup_start) ++ialias (GOMP_taskgroup_end) ++ ++#define TYPE long ++#define UTYPE unsigned long ++#define TYPE_is_long 1 ++#include "taskloop.c" ++#undef TYPE ++#undef UTYPE ++#undef TYPE_is_long ++ ++#define TYPE unsigned long long ++#define UTYPE TYPE ++#define GOMP_taskloop GOMP_taskloop_ull ++#include "taskloop.c" ++#undef TYPE ++#undef UTYPE ++#undef GOMP_taskloop ++ ++static void inline ++priority_queue_move_task_first (enum priority_queue_type type, ++ struct priority_queue *head, ++ struct gomp_task *task) + { ++#if _LIBGOMP_CHECKING_ ++ if (!priority_queue_task_in_queue_p (type, head, task)) ++ gomp_fatal ("Attempt to move first missing task %p", task); ++#endif ++ struct priority_list *list; ++ if (priority_queue_multi_p (head)) ++ { ++ list = priority_queue_lookup_priority (head, task->priority); ++#if _LIBGOMP_CHECKING_ ++ if (!list) ++ gomp_fatal ("Unable to find priority %d", task->priority); ++#endif ++ } ++ else ++ list = &head->l; ++ priority_list_remove (list, task_to_priority_node (type, task), 0); ++ priority_list_insert (type, list, task, task->priority, ++ PRIORITY_INSERT_BEGIN, type == PQ_CHILDREN, ++ task->parent_depends_on); ++} ++ ++/* Actual body of GOMP_PLUGIN_target_task_completion that is executed ++ with team->task_lock held, or is executed in the thread that called ++ gomp_target_task_fn if GOMP_PLUGIN_target_task_completion has been ++ run before it acquires team->task_lock. */ ++ ++static void ++gomp_target_task_completion (struct gomp_team *team, struct gomp_task *task) ++{ ++ struct gomp_task *parent = task->parent; + if (parent) ++ priority_queue_move_task_first (PQ_CHILDREN, &parent->children_queue, ++ task); ++ ++ struct gomp_taskgroup *taskgroup = task->taskgroup; ++ if (taskgroup) ++ priority_queue_move_task_first (PQ_TASKGROUP, &taskgroup->taskgroup_queue, ++ task); ++ ++ priority_queue_insert (PQ_TEAM, &team->task_queue, task, task->priority, ++ PRIORITY_INSERT_BEGIN, false, ++ task->parent_depends_on); ++ task->kind = GOMP_TASK_WAITING; ++ if (parent && parent->taskwait) + { +- if (parent->children == child_task) +- parent->children = child_task->next_child; +- if (__builtin_expect (child_task->parent_depends_on, 0) +- && parent->taskwait->last_parent_depends_on == child_task) +- { +- if (child_task->prev_child->kind == GOMP_TASK_WAITING +- && child_task->prev_child->parent_depends_on) +- parent->taskwait->last_parent_depends_on = child_task->prev_child; +- else +- parent->taskwait->last_parent_depends_on = NULL; ++ if (parent->taskwait->in_taskwait) ++ { ++ /* One more task has had its dependencies met. ++ Inform any waiters. */ ++ parent->taskwait->in_taskwait = false; ++ gomp_sem_post (&parent->taskwait->taskwait_sem); + } ++ else if (parent->taskwait->in_depend_wait) ++ { ++ /* One more task has had its dependencies met. ++ Inform any waiters. */ ++ parent->taskwait->in_depend_wait = false; ++ gomp_sem_post (&parent->taskwait->taskwait_sem); ++ } ++ } ++ if (taskgroup && taskgroup->in_taskgroup_wait) ++ { ++ /* One more task has had its dependencies met. ++ Inform any waiters. */ ++ taskgroup->in_taskgroup_wait = false; ++ gomp_sem_post (&taskgroup->taskgroup_sem); + } +- if (taskgroup && taskgroup->children == child_task) +- taskgroup->children = child_task->next_taskgroup; +- child_task->prev_queue->next_queue = child_task->next_queue; +- child_task->next_queue->prev_queue = child_task->prev_queue; +- if (team->task_queue == child_task) ++ ++ ++team->task_queued_count; ++ gomp_team_barrier_set_task_pending (&team->barrier); ++ /* I'm afraid this can't be done after releasing team->task_lock, ++ as gomp_target_task_completion is run from unrelated thread and ++ therefore in between gomp_mutex_unlock and gomp_team_barrier_wake ++ the team could be gone already. */ ++ if (team->nthreads > team->task_running_count) ++ gomp_team_barrier_wake (&team->barrier, 1); ++} ++ ++/* Signal that a target task TTASK has completed the asynchronously ++ running phase and should be requeued as a task to handle the ++ variable unmapping. */ ++ ++void ++GOMP_PLUGIN_target_task_completion (void *data) ++{ ++ struct gomp_target_task *ttask = (struct gomp_target_task *) data; ++ struct gomp_task *task = ttask->task; ++ struct gomp_team *team = ttask->team; ++ ++ gomp_mutex_lock (&team->task_lock); ++ if (ttask->state == GOMP_TARGET_TASK_READY_TO_RUN) + { +- if (child_task->next_queue != child_task) +- team->task_queue = child_task->next_queue; ++ ttask->state = GOMP_TARGET_TASK_FINISHED; ++ gomp_mutex_unlock (&team->task_lock); ++ return; ++ } ++ ttask->state = GOMP_TARGET_TASK_FINISHED; ++ gomp_target_task_completion (team, task); ++ gomp_mutex_unlock (&team->task_lock); ++} ++ ++static void gomp_task_run_post_handle_depend_hash (struct gomp_task *); ++ ++/* Called for nowait target tasks. */ ++ ++bool ++gomp_create_target_task (struct gomp_device_descr *devicep, ++ void (*fn) (void *), size_t mapnum, void **hostaddrs, ++ size_t *sizes, unsigned short *kinds, ++ unsigned int flags, void **depend, void **args, ++ enum gomp_target_task_state state) ++{ ++ struct gomp_thread *thr = gomp_thread (); ++ struct gomp_team *team = thr->ts.team; ++ ++ /* If parallel or taskgroup has been cancelled, don't start new tasks. */ ++ if (team ++ && (gomp_team_barrier_cancelled (&team->barrier) ++ || (thr->task->taskgroup && thr->task->taskgroup->cancelled))) ++ return true; ++ ++ struct gomp_target_task *ttask; ++ struct gomp_task *task; ++ struct gomp_task *parent = thr->task; ++ struct gomp_taskgroup *taskgroup = parent->taskgroup; ++ bool do_wake; ++ size_t depend_size = 0; ++ uintptr_t depend_cnt = 0; ++ size_t tgt_align = 0, tgt_size = 0; ++ ++ if (depend != NULL) ++ { ++ depend_cnt = (uintptr_t) depend[0]; ++ depend_size = depend_cnt * sizeof (struct gomp_task_depend_entry); ++ } ++ if (fn) ++ { ++ /* GOMP_MAP_FIRSTPRIVATE need to be copied first, as they are ++ firstprivate on the target task. */ ++ size_t i; ++ for (i = 0; i < mapnum; i++) ++ if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE) ++ { ++ size_t align = (size_t) 1 << (kinds[i] >> 8); ++ if (tgt_align < align) ++ tgt_align = align; ++ tgt_size = (tgt_size + align - 1) & ~(align - 1); ++ tgt_size += sizes[i]; ++ } ++ if (tgt_align) ++ tgt_size += tgt_align - 1; + else +- team->task_queue = NULL; ++ tgt_size = 0; + } ++ ++ task = gomp_malloc (sizeof (*task) + depend_size ++ + sizeof (*ttask) ++ + mapnum * (sizeof (void *) + sizeof (size_t) ++ + sizeof (unsigned short)) ++ + tgt_size); ++ gomp_init_task (task, parent, gomp_icv (false)); ++ task->priority = 0; ++ task->kind = GOMP_TASK_WAITING; ++ task->in_tied_task = parent->in_tied_task; ++ task->taskgroup = taskgroup; ++ ttask = (struct gomp_target_task *) &task->depend[depend_cnt]; ++ ttask->devicep = devicep; ++ ttask->fn = fn; ++ ttask->mapnum = mapnum; ++ ttask->args = args; ++ memcpy (ttask->hostaddrs, hostaddrs, mapnum * sizeof (void *)); ++ ttask->sizes = (size_t *) &ttask->hostaddrs[mapnum]; ++ memcpy (ttask->sizes, sizes, mapnum * sizeof (size_t)); ++ ttask->kinds = (unsigned short *) &ttask->sizes[mapnum]; ++ memcpy (ttask->kinds, kinds, mapnum * sizeof (unsigned short)); ++ if (tgt_align) ++ { ++ char *tgt = (char *) &ttask->kinds[mapnum]; ++ size_t i; ++ uintptr_t al = (uintptr_t) tgt & (tgt_align - 1); ++ if (al) ++ tgt += tgt_align - al; ++ tgt_size = 0; ++ for (i = 0; i < mapnum; i++) ++ if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE) ++ { ++ size_t align = (size_t) 1 << (kinds[i] >> 8); ++ tgt_size = (tgt_size + align - 1) & ~(align - 1); ++ memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]); ++ ttask->hostaddrs[i] = tgt + tgt_size; ++ tgt_size = tgt_size + sizes[i]; ++ } ++ } ++ ttask->flags = flags; ++ ttask->state = state; ++ ttask->task = task; ++ ttask->team = team; ++ task->fn = NULL; ++ task->fn_data = ttask; ++ task->final_task = 0; ++ gomp_mutex_lock (&team->task_lock); ++ /* If parallel or taskgroup has been cancelled, don't start new tasks. */ ++ if (__builtin_expect (gomp_team_barrier_cancelled (&team->barrier) ++ || (taskgroup && taskgroup->cancelled), 0)) ++ { ++ gomp_mutex_unlock (&team->task_lock); ++ gomp_finish_task (task); ++ free (task); ++ return true; ++ } ++ if (depend_size) ++ { ++ gomp_task_handle_depend (task, parent, depend); ++ if (task->num_dependees) ++ { ++ if (taskgroup) ++ taskgroup->num_children++; ++ gomp_mutex_unlock (&team->task_lock); ++ return true; ++ } ++ } ++ if (state == GOMP_TARGET_TASK_DATA) ++ { ++ gomp_task_run_post_handle_depend_hash (task); ++ gomp_mutex_unlock (&team->task_lock); ++ gomp_finish_task (task); ++ free (task); ++ return false; ++ } ++ if (taskgroup) ++ taskgroup->num_children++; ++ /* For async offloading, if we don't need to wait for dependencies, ++ run the gomp_target_task_fn right away, essentially schedule the ++ mapping part of the task in the current thread. */ ++ if (devicep != NULL ++ && (devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) ++ { ++ priority_queue_insert (PQ_CHILDREN, &parent->children_queue, task, 0, ++ PRIORITY_INSERT_END, ++ /*adjust_parent_depends_on=*/false, ++ task->parent_depends_on); ++ if (taskgroup) ++ priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue, ++ task, 0, PRIORITY_INSERT_END, ++ /*adjust_parent_depends_on=*/false, ++ task->parent_depends_on); ++ task->pnode[PQ_TEAM].next = NULL; ++ task->pnode[PQ_TEAM].prev = NULL; ++ task->kind = GOMP_TASK_TIED; ++ ++team->task_count; ++ gomp_mutex_unlock (&team->task_lock); ++ ++ thr->task = task; ++ gomp_target_task_fn (task->fn_data); ++ thr->task = parent; ++ ++ gomp_mutex_lock (&team->task_lock); ++ task->kind = GOMP_TASK_ASYNC_RUNNING; ++ /* If GOMP_PLUGIN_target_task_completion has run already ++ in between gomp_target_task_fn and the mutex lock, ++ perform the requeuing here. */ ++ if (ttask->state == GOMP_TARGET_TASK_FINISHED) ++ gomp_target_task_completion (team, task); ++ else ++ ttask->state = GOMP_TARGET_TASK_RUNNING; ++ gomp_mutex_unlock (&team->task_lock); ++ return true; ++ } ++ priority_queue_insert (PQ_CHILDREN, &parent->children_queue, task, 0, ++ PRIORITY_INSERT_BEGIN, ++ /*adjust_parent_depends_on=*/false, ++ task->parent_depends_on); ++ if (taskgroup) ++ priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue, task, 0, ++ PRIORITY_INSERT_BEGIN, ++ /*adjust_parent_depends_on=*/false, ++ task->parent_depends_on); ++ priority_queue_insert (PQ_TEAM, &team->task_queue, task, 0, ++ PRIORITY_INSERT_END, ++ /*adjust_parent_depends_on=*/false, ++ task->parent_depends_on); ++ ++team->task_count; ++ ++team->task_queued_count; ++ gomp_team_barrier_set_task_pending (&team->barrier); ++ do_wake = team->task_running_count + !parent->in_tied_task ++ < team->nthreads; ++ gomp_mutex_unlock (&team->task_lock); ++ if (do_wake) ++ gomp_team_barrier_wake (&team->barrier, 1); ++ return true; ++} ++ ++/* Given a parent_depends_on task in LIST, move it to the front of its ++ priority so it is run as soon as possible. ++ ++ Care is taken to update the list's LAST_PARENT_DEPENDS_ON field. ++ ++ We rearrange the queue such that all parent_depends_on tasks are ++ first, and last_parent_depends_on points to the last such task we ++ rearranged. For example, given the following tasks in a queue ++ where PD[123] are the parent_depends_on tasks: ++ ++ task->children ++ | ++ V ++ C1 -> C2 -> C3 -> PD1 -> PD2 -> PD3 -> C4 ++ ++ We rearrange such that: ++ ++ task->children ++ | +--- last_parent_depends_on ++ | | ++ V V ++ PD1 -> PD2 -> PD3 -> C1 -> C2 -> C3 -> C4. */ ++ ++static void inline ++priority_list_upgrade_task (struct priority_list *list, ++ struct priority_node *node) ++{ ++ struct priority_node *last_parent_depends_on ++ = list->last_parent_depends_on; ++ if (last_parent_depends_on) ++ { ++ node->prev->next = node->next; ++ node->next->prev = node->prev; ++ node->prev = last_parent_depends_on; ++ node->next = last_parent_depends_on->next; ++ node->prev->next = node; ++ node->next->prev = node; ++ } ++ else if (node != list->tasks) ++ { ++ node->prev->next = node->next; ++ node->next->prev = node->prev; ++ node->prev = list->tasks->prev; ++ node->next = list->tasks; ++ list->tasks = node; ++ node->prev->next = node; ++ node->next->prev = node; ++ } ++ list->last_parent_depends_on = node; ++} ++ ++/* Given a parent_depends_on TASK in its parent's children_queue, move ++ it to the front of its priority so it is run as soon as possible. ++ ++ PARENT is passed as an optimization. ++ ++ (This function could be defined in priority_queue.c, but we want it ++ inlined, and putting it in priority_queue.h is not an option, given ++ that gomp_task has not been properly defined at that point). */ ++ ++static void inline ++priority_queue_upgrade_task (struct gomp_task *task, ++ struct gomp_task *parent) ++{ ++ struct priority_queue *head = &parent->children_queue; ++ struct priority_node *node = &task->pnode[PQ_CHILDREN]; ++#if _LIBGOMP_CHECKING_ ++ if (!task->parent_depends_on) ++ gomp_fatal ("priority_queue_upgrade_task: task must be a " ++ "parent_depends_on task"); ++ if (!priority_queue_task_in_queue_p (PQ_CHILDREN, head, task)) ++ gomp_fatal ("priority_queue_upgrade_task: cannot find task=%p", task); ++#endif ++ if (priority_queue_multi_p (head)) ++ { ++ struct priority_list *list ++ = priority_queue_lookup_priority (head, task->priority); ++ priority_list_upgrade_task (list, node); ++ } ++ else ++ priority_list_upgrade_task (&head->l, node); ++} ++ ++/* Given a CHILD_TASK in LIST that is about to be executed, move it out of ++ the way in LIST so that other tasks can be considered for ++ execution. LIST contains tasks of type TYPE. ++ ++ Care is taken to update the queue's LAST_PARENT_DEPENDS_ON field ++ if applicable. */ ++ ++static void inline ++priority_list_downgrade_task (enum priority_queue_type type, ++ struct priority_list *list, ++ struct gomp_task *child_task) ++{ ++ struct priority_node *node = task_to_priority_node (type, child_task); ++ if (list->tasks == node) ++ list->tasks = node->next; ++ else if (node->next != list->tasks) ++ { ++ /* The task in NODE is about to become TIED and TIED tasks ++ cannot come before WAITING tasks. If we're about to ++ leave the queue in such an indeterminate state, rewire ++ things appropriately. However, a TIED task at the end is ++ perfectly fine. */ ++ struct gomp_task *next_task = priority_node_to_task (type, node->next); ++ if (next_task->kind == GOMP_TASK_WAITING) ++ { ++ /* Remove from list. */ ++ node->prev->next = node->next; ++ node->next->prev = node->prev; ++ /* Rewire at the end. */ ++ node->next = list->tasks; ++ node->prev = list->tasks->prev; ++ list->tasks->prev->next = node; ++ list->tasks->prev = node; ++ } ++ } ++ ++ /* If the current task is the last_parent_depends_on for its ++ priority, adjust last_parent_depends_on appropriately. */ ++ if (__builtin_expect (child_task->parent_depends_on, 0) ++ && list->last_parent_depends_on == node) ++ { ++ struct gomp_task *prev_child = priority_node_to_task (type, node->prev); ++ if (node->prev != node ++ && prev_child->kind == GOMP_TASK_WAITING ++ && prev_child->parent_depends_on) ++ list->last_parent_depends_on = node->prev; ++ else ++ { ++ /* There are no more parent_depends_on entries waiting ++ to run, clear the list. */ ++ list->last_parent_depends_on = NULL; ++ } ++ } ++} ++ ++/* Given a TASK in HEAD that is about to be executed, move it out of ++ the way so that other tasks can be considered for execution. HEAD ++ contains tasks of type TYPE. ++ ++ Care is taken to update the queue's LAST_PARENT_DEPENDS_ON field ++ if applicable. ++ ++ (This function could be defined in priority_queue.c, but we want it ++ inlined, and putting it in priority_queue.h is not an option, given ++ that gomp_task has not been properly defined at that point). */ ++ ++static void inline ++priority_queue_downgrade_task (enum priority_queue_type type, ++ struct priority_queue *head, ++ struct gomp_task *task) ++{ ++#if _LIBGOMP_CHECKING_ ++ if (!priority_queue_task_in_queue_p (type, head, task)) ++ gomp_fatal ("Attempt to downgrade missing task %p", task); ++#endif ++ if (priority_queue_multi_p (head)) ++ { ++ struct priority_list *list ++ = priority_queue_lookup_priority (head, task->priority); ++ priority_list_downgrade_task (type, list, task); ++ } ++ else ++ priority_list_downgrade_task (type, &head->l, task); ++} ++ ++/* Setup CHILD_TASK to execute. This is done by setting the task to ++ TIED, and updating all relevant queues so that CHILD_TASK is no ++ longer chosen for scheduling. Also, remove CHILD_TASK from the ++ overall team task queue entirely. ++ ++ Return TRUE if task or its containing taskgroup has been ++ cancelled. */ ++ ++static inline bool ++gomp_task_run_pre (struct gomp_task *child_task, struct gomp_task *parent, ++ struct gomp_team *team) ++{ ++#if _LIBGOMP_CHECKING_ ++ if (child_task->parent) ++ priority_queue_verify (PQ_CHILDREN, ++ &child_task->parent->children_queue, true); ++ if (child_task->taskgroup) ++ priority_queue_verify (PQ_TASKGROUP, ++ &child_task->taskgroup->taskgroup_queue, false); ++ priority_queue_verify (PQ_TEAM, &team->task_queue, false); ++#endif ++ ++ /* Task is about to go tied, move it out of the way. */ ++ if (parent) ++ priority_queue_downgrade_task (PQ_CHILDREN, &parent->children_queue, ++ child_task); ++ ++ /* Task is about to go tied, move it out of the way. */ ++ struct gomp_taskgroup *taskgroup = child_task->taskgroup; ++ if (taskgroup) ++ priority_queue_downgrade_task (PQ_TASKGROUP, &taskgroup->taskgroup_queue, ++ child_task); ++ ++ priority_queue_remove (PQ_TEAM, &team->task_queue, child_task, ++ MEMMODEL_RELAXED); ++ child_task->pnode[PQ_TEAM].next = NULL; ++ child_task->pnode[PQ_TEAM].prev = NULL; + child_task->kind = GOMP_TASK_TIED; ++ + if (--team->task_queued_count == 0) + gomp_team_barrier_clear_task_pending (&team->barrier); + if ((gomp_team_barrier_cancelled (&team->barrier) +@@ -478,6 +1020,14 @@ gomp_task_run_post_handle_depend_hash (s + } + } + ++/* After a CHILD_TASK has been run, adjust the dependency queue for ++ each task that depends on CHILD_TASK, to record the fact that there ++ is one less dependency to worry about. If a task that depended on ++ CHILD_TASK now has no dependencies, place it in the various queues ++ so it gets scheduled to run. ++ ++ TEAM is the team to which CHILD_TASK belongs to. */ ++ + static size_t + gomp_task_run_post_handle_dependers (struct gomp_task *child_task, + struct gomp_team *team) +@@ -487,91 +1037,60 @@ gomp_task_run_post_handle_dependers (str + for (i = 0; i < count; i++) + { + struct gomp_task *task = child_task->dependers->elem[i]; ++ ++ /* CHILD_TASK satisfies a dependency for TASK. Keep track of ++ TASK's remaining dependencies. Once TASK has no other ++ depenencies, put it into the various queues so it will get ++ scheduled for execution. */ + if (--task->num_dependees != 0) + continue; + + struct gomp_taskgroup *taskgroup = task->taskgroup; + if (parent) + { +- if (parent->children) +- { +- /* If parent is in gomp_task_maybe_wait_for_dependencies +- and it doesn't need to wait for this task, put it after +- all ready to run tasks it needs to wait for. */ +- if (parent->taskwait && parent->taskwait->last_parent_depends_on +- && !task->parent_depends_on) +- { +- struct gomp_task *last_parent_depends_on +- = parent->taskwait->last_parent_depends_on; +- task->next_child = last_parent_depends_on->next_child; +- task->prev_child = last_parent_depends_on; +- } +- else +- { +- task->next_child = parent->children; +- task->prev_child = parent->children->prev_child; +- parent->children = task; +- } +- task->next_child->prev_child = task; +- task->prev_child->next_child = task; +- } +- else +- { +- task->next_child = task; +- task->prev_child = task; +- parent->children = task; +- } ++ priority_queue_insert (PQ_CHILDREN, &parent->children_queue, ++ task, task->priority, ++ PRIORITY_INSERT_BEGIN, ++ /*adjust_parent_depends_on=*/true, ++ task->parent_depends_on); + if (parent->taskwait) + { + if (parent->taskwait->in_taskwait) + { ++ /* One more task has had its dependencies met. ++ Inform any waiters. */ + parent->taskwait->in_taskwait = false; + gomp_sem_post (&parent->taskwait->taskwait_sem); + } + else if (parent->taskwait->in_depend_wait) + { ++ /* One more task has had its dependencies met. ++ Inform any waiters. */ + parent->taskwait->in_depend_wait = false; + gomp_sem_post (&parent->taskwait->taskwait_sem); + } +- if (parent->taskwait->last_parent_depends_on == NULL +- && task->parent_depends_on) +- parent->taskwait->last_parent_depends_on = task; + } + } + if (taskgroup) + { +- if (taskgroup->children) +- { +- task->next_taskgroup = taskgroup->children; +- task->prev_taskgroup = taskgroup->children->prev_taskgroup; +- task->next_taskgroup->prev_taskgroup = task; +- task->prev_taskgroup->next_taskgroup = task; +- } +- else +- { +- task->next_taskgroup = task; +- task->prev_taskgroup = task; +- } +- taskgroup->children = task; ++ priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue, ++ task, task->priority, ++ PRIORITY_INSERT_BEGIN, ++ /*adjust_parent_depends_on=*/false, ++ task->parent_depends_on); + if (taskgroup->in_taskgroup_wait) + { ++ /* One more task has had its dependencies met. ++ Inform any waiters. */ + taskgroup->in_taskgroup_wait = false; + gomp_sem_post (&taskgroup->taskgroup_sem); + } + } +- if (team->task_queue) +- { +- task->next_queue = team->task_queue; +- task->prev_queue = team->task_queue->prev_queue; +- task->next_queue->prev_queue = task; +- task->prev_queue->next_queue = task; +- } +- else +- { +- task->next_queue = task; +- task->prev_queue = task; +- team->task_queue = task; +- } ++ priority_queue_insert (PQ_TEAM, &team->task_queue, ++ task, task->priority, ++ PRIORITY_INSERT_END, ++ /*adjust_parent_depends_on=*/false, ++ task->parent_depends_on); + ++team->task_count; + ++team->task_queued_count; + ++ret; +@@ -601,12 +1120,18 @@ gomp_task_run_post_handle_depend (struct + return gomp_task_run_post_handle_dependers (child_task, team); + } + ++/* Remove CHILD_TASK from its parent. */ ++ + static inline void + gomp_task_run_post_remove_parent (struct gomp_task *child_task) + { + struct gomp_task *parent = child_task->parent; + if (parent == NULL) + return; ++ ++ /* If this was the last task the parent was depending on, ++ synchronize with gomp_task_maybe_wait_for_dependencies so it can ++ clean up and return. */ + if (__builtin_expect (child_task->parent_depends_on, 0) + && --parent->taskwait->n_depend == 0 + && parent->taskwait->in_depend_wait) +@@ -614,36 +1139,31 @@ gomp_task_run_post_remove_parent (struct + parent->taskwait->in_depend_wait = false; + gomp_sem_post (&parent->taskwait->taskwait_sem); + } +- child_task->prev_child->next_child = child_task->next_child; +- child_task->next_child->prev_child = child_task->prev_child; +- if (parent->children != child_task) +- return; +- if (child_task->next_child != child_task) +- parent->children = child_task->next_child; +- else ++ ++ if (priority_queue_remove (PQ_CHILDREN, &parent->children_queue, ++ child_task, MEMMODEL_RELEASE) ++ && parent->taskwait && parent->taskwait->in_taskwait) + { +- /* We access task->children in GOMP_taskwait +- outside of the task lock mutex region, so +- need a release barrier here to ensure memory +- written by child_task->fn above is flushed +- before the NULL is written. */ +- __atomic_store_n (&parent->children, NULL, MEMMODEL_RELEASE); +- if (parent->taskwait && parent->taskwait->in_taskwait) +- { +- parent->taskwait->in_taskwait = false; +- gomp_sem_post (&parent->taskwait->taskwait_sem); +- } ++ parent->taskwait->in_taskwait = false; ++ gomp_sem_post (&parent->taskwait->taskwait_sem); + } ++ child_task->pnode[PQ_CHILDREN].next = NULL; ++ child_task->pnode[PQ_CHILDREN].prev = NULL; + } + ++/* Remove CHILD_TASK from its taskgroup. */ ++ + static inline void + gomp_task_run_post_remove_taskgroup (struct gomp_task *child_task) + { + struct gomp_taskgroup *taskgroup = child_task->taskgroup; + if (taskgroup == NULL) + return; +- child_task->prev_taskgroup->next_taskgroup = child_task->next_taskgroup; +- child_task->next_taskgroup->prev_taskgroup = child_task->prev_taskgroup; ++ bool empty = priority_queue_remove (PQ_TASKGROUP, ++ &taskgroup->taskgroup_queue, ++ child_task, MEMMODEL_RELAXED); ++ child_task->pnode[PQ_TASKGROUP].next = NULL; ++ child_task->pnode[PQ_TASKGROUP].prev = NULL; + if (taskgroup->num_children > 1) + --taskgroup->num_children; + else +@@ -655,18 +1175,10 @@ gomp_task_run_post_remove_taskgroup (str + before the NULL is written. */ + __atomic_store_n (&taskgroup->num_children, 0, MEMMODEL_RELEASE); + } +- if (taskgroup->children != child_task) +- return; +- if (child_task->next_taskgroup != child_task) +- taskgroup->children = child_task->next_taskgroup; +- else ++ if (empty && taskgroup->in_taskgroup_wait) + { +- taskgroup->children = NULL; +- if (taskgroup->in_taskgroup_wait) +- { +- taskgroup->in_taskgroup_wait = false; +- gomp_sem_post (&taskgroup->taskgroup_sem); +- } ++ taskgroup->in_taskgroup_wait = false; ++ gomp_sem_post (&taskgroup->taskgroup_sem); + } + } + +@@ -696,11 +1208,15 @@ gomp_barrier_handle_tasks (gomp_barrier_ + while (1) + { + bool cancelled = false; +- if (team->task_queue != NULL) ++ if (!priority_queue_empty_p (&team->task_queue, MEMMODEL_RELAXED)) + { +- child_task = team->task_queue; ++ bool ignored; ++ child_task ++ = priority_queue_next_task (PQ_TEAM, &team->task_queue, ++ PQ_IGNORED, NULL, ++ &ignored); + cancelled = gomp_task_run_pre (child_task, child_task->parent, +- child_task->taskgroup, team); ++ team); + if (__builtin_expect (cancelled, 0)) + { + if (to_free) +@@ -729,7 +1245,29 @@ gomp_barrier_handle_tasks (gomp_barrier_ + if (child_task) + { + thr->task = child_task; +- child_task->fn (child_task->fn_data); ++ if (__builtin_expect (child_task->fn == NULL, 0)) ++ { ++ if (gomp_target_task_fn (child_task->fn_data)) ++ { ++ thr->task = task; ++ gomp_mutex_lock (&team->task_lock); ++ child_task->kind = GOMP_TASK_ASYNC_RUNNING; ++ team->task_running_count--; ++ struct gomp_target_task *ttask ++ = (struct gomp_target_task *) child_task->fn_data; ++ /* If GOMP_PLUGIN_target_task_completion has run already ++ in between gomp_target_task_fn and the mutex lock, ++ perform the requeuing here. */ ++ if (ttask->state == GOMP_TARGET_TASK_FINISHED) ++ gomp_target_task_completion (team, child_task); ++ else ++ ttask->state = GOMP_TARGET_TASK_RUNNING; ++ child_task = NULL; ++ continue; ++ } ++ } ++ else ++ child_task->fn (child_task->fn_data); + thr->task = task; + } + else +@@ -741,7 +1279,7 @@ gomp_barrier_handle_tasks (gomp_barrier_ + size_t new_tasks + = gomp_task_run_post_handle_depend (child_task, team); + gomp_task_run_post_remove_parent (child_task); +- gomp_clear_parent (child_task->children); ++ gomp_clear_parent (&child_task->children_queue); + gomp_task_run_post_remove_taskgroup (child_task); + to_free = child_task; + child_task = NULL; +@@ -765,7 +1303,9 @@ gomp_barrier_handle_tasks (gomp_barrier_ + } + } + +-/* Called when encountering a taskwait directive. */ ++/* Called when encountering a taskwait directive. ++ ++ Wait for all children of the current task. */ + + void + GOMP_taskwait (void) +@@ -785,15 +1325,16 @@ GOMP_taskwait (void) + child thread task work function are seen before we exit from + GOMP_taskwait. */ + if (task == NULL +- || __atomic_load_n (&task->children, MEMMODEL_ACQUIRE) == NULL) ++ || priority_queue_empty_p (&task->children_queue, MEMMODEL_ACQUIRE)) + return; + + memset (&taskwait, 0, sizeof (taskwait)); ++ bool child_q = false; + gomp_mutex_lock (&team->task_lock); + while (1) + { + bool cancelled = false; +- if (task->children == NULL) ++ if (priority_queue_empty_p (&task->children_queue, MEMMODEL_RELAXED)) + { + bool destroy_taskwait = task->taskwait != NULL; + task->taskwait = NULL; +@@ -807,12 +1348,14 @@ GOMP_taskwait (void) + gomp_sem_destroy (&taskwait.taskwait_sem); + return; + } +- if (task->children->kind == GOMP_TASK_WAITING) ++ struct gomp_task *next_task ++ = priority_queue_next_task (PQ_CHILDREN, &task->children_queue, ++ PQ_TEAM, &team->task_queue, &child_q); ++ if (next_task->kind == GOMP_TASK_WAITING) + { +- child_task = task->children; ++ child_task = next_task; + cancelled +- = gomp_task_run_pre (child_task, task, child_task->taskgroup, +- team); ++ = gomp_task_run_pre (child_task, task, team); + if (__builtin_expect (cancelled, 0)) + { + if (to_free) +@@ -826,8 +1369,10 @@ GOMP_taskwait (void) + } + else + { +- /* All tasks we are waiting for are already running +- in other threads. Wait for them. */ ++ /* All tasks we are waiting for are either running in other ++ threads, or they are tasks that have not had their ++ dependencies met (so they're not even in the queue). Wait ++ for them. */ + if (task->taskwait == NULL) + { + taskwait.in_depend_wait = false; +@@ -851,7 +1396,28 @@ GOMP_taskwait (void) + if (child_task) + { + thr->task = child_task; +- child_task->fn (child_task->fn_data); ++ if (__builtin_expect (child_task->fn == NULL, 0)) ++ { ++ if (gomp_target_task_fn (child_task->fn_data)) ++ { ++ thr->task = task; ++ gomp_mutex_lock (&team->task_lock); ++ child_task->kind = GOMP_TASK_ASYNC_RUNNING; ++ struct gomp_target_task *ttask ++ = (struct gomp_target_task *) child_task->fn_data; ++ /* If GOMP_PLUGIN_target_task_completion has run already ++ in between gomp_target_task_fn and the mutex lock, ++ perform the requeuing here. */ ++ if (ttask->state == GOMP_TARGET_TASK_FINISHED) ++ gomp_target_task_completion (team, child_task); ++ else ++ ttask->state = GOMP_TARGET_TASK_RUNNING; ++ child_task = NULL; ++ continue; ++ } ++ } ++ else ++ child_task->fn (child_task->fn_data); + thr->task = task; + } + else +@@ -862,17 +1428,19 @@ GOMP_taskwait (void) + finish_cancelled:; + size_t new_tasks + = gomp_task_run_post_handle_depend (child_task, team); +- child_task->prev_child->next_child = child_task->next_child; +- child_task->next_child->prev_child = child_task->prev_child; +- if (task->children == child_task) +- { +- if (child_task->next_child != child_task) +- task->children = child_task->next_child; +- else +- task->children = NULL; ++ ++ if (child_q) ++ { ++ priority_queue_remove (PQ_CHILDREN, &task->children_queue, ++ child_task, MEMMODEL_RELAXED); ++ child_task->pnode[PQ_CHILDREN].next = NULL; ++ child_task->pnode[PQ_CHILDREN].prev = NULL; + } +- gomp_clear_parent (child_task->children); ++ ++ gomp_clear_parent (&child_task->children_queue); ++ + gomp_task_run_post_remove_taskgroup (child_task); ++ + to_free = child_task; + child_task = NULL; + team->task_count--; +@@ -887,10 +1455,20 @@ GOMP_taskwait (void) + } + } + +-/* This is like GOMP_taskwait, but we only wait for tasks that the +- upcoming task depends on. */ ++/* An undeferred task is about to run. Wait for all tasks that this ++ undeferred task depends on. + +-static void ++ This is done by first putting all known ready dependencies ++ (dependencies that have their own dependencies met) at the top of ++ the scheduling queues. Then we iterate through these imminently ++ ready tasks (and possibly other high priority tasks), and run them. ++ If we run out of ready dependencies to execute, we either wait for ++ the reamining dependencies to finish, or wait for them to get ++ scheduled so we can run them. ++ ++ DEPEND is as in GOMP_task. */ ++ ++void + gomp_task_maybe_wait_for_dependencies (void **depend) + { + struct gomp_thread *thr = gomp_thread (); +@@ -898,7 +1476,6 @@ gomp_task_maybe_wait_for_dependencies (v + struct gomp_team *team = thr->ts.team; + struct gomp_task_depend_entry elem, *ent = NULL; + struct gomp_taskwait taskwait; +- struct gomp_task *last_parent_depends_on = NULL; + size_t ndepend = (uintptr_t) depend[0]; + size_t nout = (uintptr_t) depend[1]; + size_t i; +@@ -922,32 +1499,11 @@ gomp_task_maybe_wait_for_dependencies (v + { + tsk->parent_depends_on = true; + ++num_awaited; ++ /* If depenency TSK itself has no dependencies and is ++ ready to run, move it up front so that we run it as ++ soon as possible. */ + if (tsk->num_dependees == 0 && tsk->kind == GOMP_TASK_WAITING) +- { +- /* If a task we need to wait for is not already +- running and is ready to be scheduled, move it +- to front, so that we run it as soon as possible. */ +- if (last_parent_depends_on) +- { +- tsk->prev_child->next_child = tsk->next_child; +- tsk->next_child->prev_child = tsk->prev_child; +- tsk->prev_child = last_parent_depends_on; +- tsk->next_child = last_parent_depends_on->next_child; +- tsk->prev_child->next_child = tsk; +- tsk->next_child->prev_child = tsk; +- } +- else if (tsk != task->children) +- { +- tsk->prev_child->next_child = tsk->next_child; +- tsk->next_child->prev_child = tsk->prev_child; +- tsk->prev_child = task->children; +- tsk->next_child = task->children->next_child; +- task->children = tsk; +- tsk->prev_child->next_child = tsk; +- tsk->next_child->prev_child = tsk; +- } +- last_parent_depends_on = tsk; +- } ++ priority_queue_upgrade_task (tsk, task); + } + } + } +@@ -959,7 +1515,6 @@ gomp_task_maybe_wait_for_dependencies (v + + memset (&taskwait, 0, sizeof (taskwait)); + taskwait.n_depend = num_awaited; +- taskwait.last_parent_depends_on = last_parent_depends_on; + gomp_sem_init (&taskwait.taskwait_sem, 0); + task->taskwait = &taskwait; + +@@ -978,12 +1533,30 @@ gomp_task_maybe_wait_for_dependencies (v + gomp_sem_destroy (&taskwait.taskwait_sem); + return; + } +- if (task->children->kind == GOMP_TASK_WAITING) ++ ++ /* Theoretically when we have multiple priorities, we should ++ chose between the highest priority item in ++ task->children_queue and team->task_queue here, so we should ++ use priority_queue_next_task(). However, since we are ++ running an undeferred task, perhaps that makes all tasks it ++ depends on undeferred, thus a priority of INF? This would ++ make it unnecessary to take anything into account here, ++ but the dependencies. ++ ++ On the other hand, if we want to use priority_queue_next_task(), ++ care should be taken to only use priority_queue_remove() ++ below if the task was actually removed from the children ++ queue. */ ++ bool ignored; ++ struct gomp_task *next_task ++ = priority_queue_next_task (PQ_CHILDREN, &task->children_queue, ++ PQ_IGNORED, NULL, &ignored); ++ ++ if (next_task->kind == GOMP_TASK_WAITING) + { +- child_task = task->children; ++ child_task = next_task; + cancelled +- = gomp_task_run_pre (child_task, task, child_task->taskgroup, +- team); ++ = gomp_task_run_pre (child_task, task, team); + if (__builtin_expect (cancelled, 0)) + { + if (to_free) +@@ -996,8 +1569,10 @@ gomp_task_maybe_wait_for_dependencies (v + } + } + else +- /* All tasks we are waiting for are already running +- in other threads. Wait for them. */ ++ /* All tasks we are waiting for are either running in other ++ threads, or they are tasks that have not had their ++ dependencies met (so they're not even in the queue). Wait ++ for them. */ + taskwait.in_depend_wait = true; + gomp_mutex_unlock (&team->task_lock); + if (do_wake) +@@ -1014,7 +1589,28 @@ gomp_task_maybe_wait_for_dependencies (v + if (child_task) + { + thr->task = child_task; +- child_task->fn (child_task->fn_data); ++ if (__builtin_expect (child_task->fn == NULL, 0)) ++ { ++ if (gomp_target_task_fn (child_task->fn_data)) ++ { ++ thr->task = task; ++ gomp_mutex_lock (&team->task_lock); ++ child_task->kind = GOMP_TASK_ASYNC_RUNNING; ++ struct gomp_target_task *ttask ++ = (struct gomp_target_task *) child_task->fn_data; ++ /* If GOMP_PLUGIN_target_task_completion has run already ++ in between gomp_target_task_fn and the mutex lock, ++ perform the requeuing here. */ ++ if (ttask->state == GOMP_TARGET_TASK_FINISHED) ++ gomp_target_task_completion (team, child_task); ++ else ++ ttask->state = GOMP_TARGET_TASK_RUNNING; ++ child_task = NULL; ++ continue; ++ } ++ } ++ else ++ child_task->fn (child_task->fn_data); + thr->task = task; + } + else +@@ -1027,16 +1623,13 @@ gomp_task_maybe_wait_for_dependencies (v + = gomp_task_run_post_handle_depend (child_task, team); + if (child_task->parent_depends_on) + --taskwait.n_depend; +- child_task->prev_child->next_child = child_task->next_child; +- child_task->next_child->prev_child = child_task->prev_child; +- if (task->children == child_task) +- { +- if (child_task->next_child != child_task) +- task->children = child_task->next_child; +- else +- task->children = NULL; +- } +- gomp_clear_parent (child_task->children); ++ ++ priority_queue_remove (PQ_CHILDREN, &task->children_queue, ++ child_task, MEMMODEL_RELAXED); ++ child_task->pnode[PQ_CHILDREN].next = NULL; ++ child_task->pnode[PQ_CHILDREN].prev = NULL; ++ ++ gomp_clear_parent (&child_task->children_queue); + gomp_task_run_post_remove_taskgroup (child_task); + to_free = child_task; + child_task = NULL; +@@ -1069,14 +1662,14 @@ GOMP_taskgroup_start (void) + struct gomp_taskgroup *taskgroup; + + /* If team is NULL, all tasks are executed as +- GOMP_TASK_IFFALSE tasks and thus all children tasks of ++ GOMP_TASK_UNDEFERRED tasks and thus all children tasks of + taskgroup and their descendant tasks will be finished + by the time GOMP_taskgroup_end is called. */ + if (team == NULL) + return; + taskgroup = gomp_malloc (sizeof (struct gomp_taskgroup)); + taskgroup->prev = task->taskgroup; +- taskgroup->children = NULL; ++ priority_queue_init (&taskgroup->taskgroup_queue); + taskgroup->in_taskgroup_wait = false; + taskgroup->cancelled = false; + taskgroup->num_children = 0; +@@ -1098,6 +1691,17 @@ GOMP_taskgroup_end (void) + if (team == NULL) + return; + taskgroup = task->taskgroup; ++ if (__builtin_expect (taskgroup == NULL, 0) ++ && thr->ts.level == 0) ++ { ++ /* This can happen if GOMP_taskgroup_start is called when ++ thr->ts.team == NULL, but inside of the taskgroup there ++ is #pragma omp target nowait that creates an implicit ++ team with a single thread. In this case, we want to wait ++ for all outstanding tasks in this team. */ ++ gomp_team_barrier_wait (&team->barrier); ++ return; ++ } + + /* The acquire barrier on load of taskgroup->num_children here + synchronizes with the write of 0 in gomp_task_run_post_remove_taskgroup. +@@ -1108,19 +1712,25 @@ GOMP_taskgroup_end (void) + if (__atomic_load_n (&taskgroup->num_children, MEMMODEL_ACQUIRE) == 0) + goto finish; + ++ bool unused; + gomp_mutex_lock (&team->task_lock); + while (1) + { + bool cancelled = false; +- if (taskgroup->children == NULL) ++ if (priority_queue_empty_p (&taskgroup->taskgroup_queue, ++ MEMMODEL_RELAXED)) + { + if (taskgroup->num_children) + { +- if (task->children == NULL) ++ if (priority_queue_empty_p (&task->children_queue, ++ MEMMODEL_RELAXED)) + goto do_wait; +- child_task = task->children; +- } +- else ++ child_task ++ = priority_queue_next_task (PQ_CHILDREN, &task->children_queue, ++ PQ_TEAM, &team->task_queue, ++ &unused); ++ } ++ else + { + gomp_mutex_unlock (&team->task_lock); + if (to_free) +@@ -1132,12 +1742,13 @@ GOMP_taskgroup_end (void) + } + } + else +- child_task = taskgroup->children; ++ child_task ++ = priority_queue_next_task (PQ_TASKGROUP, &taskgroup->taskgroup_queue, ++ PQ_TEAM, &team->task_queue, &unused); + if (child_task->kind == GOMP_TASK_WAITING) + { + cancelled +- = gomp_task_run_pre (child_task, child_task->parent, taskgroup, +- team); ++ = gomp_task_run_pre (child_task, child_task->parent, team); + if (__builtin_expect (cancelled, 0)) + { + if (to_free) +@@ -1153,8 +1764,10 @@ GOMP_taskgroup_end (void) + { + child_task = NULL; + do_wait: +- /* All tasks we are waiting for are already running +- in other threads. Wait for them. */ ++ /* All tasks we are waiting for are either running in other ++ threads, or they are tasks that have not had their ++ dependencies met (so they're not even in the queue). Wait ++ for them. */ + taskgroup->in_taskgroup_wait = true; + } + gomp_mutex_unlock (&team->task_lock); +@@ -1172,7 +1785,28 @@ GOMP_taskgroup_end (void) + if (child_task) + { + thr->task = child_task; +- child_task->fn (child_task->fn_data); ++ if (__builtin_expect (child_task->fn == NULL, 0)) ++ { ++ if (gomp_target_task_fn (child_task->fn_data)) ++ { ++ thr->task = task; ++ gomp_mutex_lock (&team->task_lock); ++ child_task->kind = GOMP_TASK_ASYNC_RUNNING; ++ struct gomp_target_task *ttask ++ = (struct gomp_target_task *) child_task->fn_data; ++ /* If GOMP_PLUGIN_target_task_completion has run already ++ in between gomp_target_task_fn and the mutex lock, ++ perform the requeuing here. */ ++ if (ttask->state == GOMP_TARGET_TASK_FINISHED) ++ gomp_target_task_completion (team, child_task); ++ else ++ ttask->state = GOMP_TARGET_TASK_RUNNING; ++ child_task = NULL; ++ continue; ++ } ++ } ++ else ++ child_task->fn (child_task->fn_data); + thr->task = task; + } + else +@@ -1184,7 +1818,7 @@ GOMP_taskgroup_end (void) + size_t new_tasks + = gomp_task_run_post_handle_depend (child_task, team); + gomp_task_run_post_remove_parent (child_task); +- gomp_clear_parent (child_task->children); ++ gomp_clear_parent (&child_task->children_queue); + gomp_task_run_post_remove_taskgroup (child_task); + to_free = child_task; + child_task = NULL; +--- libgomp/libgomp_g.h.jj 2014-05-15 10:56:31.429532978 +0200 ++++ libgomp/libgomp_g.h 2016-07-13 16:57:04.422535521 +0200 +@@ -29,6 +29,7 @@ + #define LIBGOMP_G_H 1 + + #include ++#include + + /* barrier.c */ + +@@ -50,6 +51,10 @@ extern bool GOMP_loop_static_start (long + extern bool GOMP_loop_dynamic_start (long, long, long, long, long *, long *); + extern bool GOMP_loop_guided_start (long, long, long, long, long *, long *); + extern bool GOMP_loop_runtime_start (long, long, long, long *, long *); ++extern bool GOMP_loop_nonmonotonic_dynamic_start (long, long, long, long, ++ long *, long *); ++extern bool GOMP_loop_nonmonotonic_guided_start (long, long, long, long, ++ long *, long *); + + extern bool GOMP_loop_ordered_static_start (long, long, long, long, + long *, long *); +@@ -63,12 +68,23 @@ extern bool GOMP_loop_static_next (long + extern bool GOMP_loop_dynamic_next (long *, long *); + extern bool GOMP_loop_guided_next (long *, long *); + extern bool GOMP_loop_runtime_next (long *, long *); ++extern bool GOMP_loop_nonmonotonic_dynamic_next (long *, long *); ++extern bool GOMP_loop_nonmonotonic_guided_next (long *, long *); + + extern bool GOMP_loop_ordered_static_next (long *, long *); + extern bool GOMP_loop_ordered_dynamic_next (long *, long *); + extern bool GOMP_loop_ordered_guided_next (long *, long *); + extern bool GOMP_loop_ordered_runtime_next (long *, long *); + ++extern bool GOMP_loop_doacross_static_start (unsigned, long *, long, long *, ++ long *); ++extern bool GOMP_loop_doacross_dynamic_start (unsigned, long *, long, long *, ++ long *); ++extern bool GOMP_loop_doacross_guided_start (unsigned, long *, long, long *, ++ long *); ++extern bool GOMP_loop_doacross_runtime_start (unsigned, long *, long *, ++ long *); ++ + extern void GOMP_parallel_loop_static_start (void (*)(void *), void *, + unsigned, long, long, long, long); + extern void GOMP_parallel_loop_dynamic_start (void (*)(void *), void *, +@@ -89,6 +105,12 @@ extern void GOMP_parallel_loop_guided (v + extern void GOMP_parallel_loop_runtime (void (*)(void *), void *, + unsigned, long, long, long, + unsigned); ++extern void GOMP_parallel_loop_nonmonotonic_dynamic (void (*)(void *), void *, ++ unsigned, long, long, ++ long, long, unsigned); ++extern void GOMP_parallel_loop_nonmonotonic_guided (void (*)(void *), void *, ++ unsigned, long, long, ++ long, long, unsigned); + + extern void GOMP_loop_end (void); + extern void GOMP_loop_end_nowait (void); +@@ -119,6 +141,18 @@ extern bool GOMP_loop_ull_runtime_start + unsigned long long, + unsigned long long *, + unsigned long long *); ++extern bool GOMP_loop_ull_nonmonotonic_dynamic_start (bool, unsigned long long, ++ unsigned long long, ++ unsigned long long, ++ unsigned long long, ++ unsigned long long *, ++ unsigned long long *); ++extern bool GOMP_loop_ull_nonmonotonic_guided_start (bool, unsigned long long, ++ unsigned long long, ++ unsigned long long, ++ unsigned long long, ++ unsigned long long *, ++ unsigned long long *); + + extern bool GOMP_loop_ull_ordered_static_start (bool, unsigned long long, + unsigned long long, +@@ -152,6 +186,10 @@ extern bool GOMP_loop_ull_guided_next (u + unsigned long long *); + extern bool GOMP_loop_ull_runtime_next (unsigned long long *, + unsigned long long *); ++extern bool GOMP_loop_ull_nonmonotonic_dynamic_next (unsigned long long *, ++ unsigned long long *); ++extern bool GOMP_loop_ull_nonmonotonic_guided_next (unsigned long long *, ++ unsigned long long *); + + extern bool GOMP_loop_ull_ordered_static_next (unsigned long long *, + unsigned long long *); +@@ -162,10 +200,34 @@ extern bool GOMP_loop_ull_ordered_guided + extern bool GOMP_loop_ull_ordered_runtime_next (unsigned long long *, + unsigned long long *); + ++extern bool GOMP_loop_ull_doacross_static_start (unsigned, ++ unsigned long long *, ++ unsigned long long, ++ unsigned long long *, ++ unsigned long long *); ++extern bool GOMP_loop_ull_doacross_dynamic_start (unsigned, ++ unsigned long long *, ++ unsigned long long, ++ unsigned long long *, ++ unsigned long long *); ++extern bool GOMP_loop_ull_doacross_guided_start (unsigned, ++ unsigned long long *, ++ unsigned long long, ++ unsigned long long *, ++ unsigned long long *); ++extern bool GOMP_loop_ull_doacross_runtime_start (unsigned, ++ unsigned long long *, ++ unsigned long long *, ++ unsigned long long *); ++ + /* ordered.c */ + + extern void GOMP_ordered_start (void); + extern void GOMP_ordered_end (void); ++extern void GOMP_doacross_post (long *); ++extern void GOMP_doacross_wait (long, ...); ++extern void GOMP_doacross_ull_post (unsigned long long *); ++extern void GOMP_doacross_ull_wait (unsigned long long, ...); + + /* parallel.c */ + +@@ -178,7 +240,15 @@ extern bool GOMP_cancellation_point (int + /* task.c */ + + extern void GOMP_task (void (*) (void *), void *, void (*) (void *, void *), +- long, long, bool, unsigned, void **); ++ long, long, bool, unsigned, void **, int); ++extern void GOMP_taskloop (void (*) (void *), void *, ++ void (*) (void *, void *), long, long, unsigned, ++ unsigned long, int, long, long, long); ++extern void GOMP_taskloop_ull (void (*) (void *), void *, ++ void (*) (void *, void *), long, long, ++ unsigned, unsigned long, int, ++ unsigned long long, unsigned long long, ++ unsigned long long); + extern void GOMP_taskwait (void); + extern void GOMP_taskyield (void); + extern void GOMP_taskgroup_start (void); +@@ -206,11 +276,38 @@ extern void GOMP_single_copy_end (void * + + extern void GOMP_target (int, void (*) (void *), const void *, + size_t, void **, size_t *, unsigned char *); ++extern void GOMP_target_ext (int, void (*) (void *), size_t, void **, size_t *, ++ unsigned short *, unsigned int, void **, void **); + extern void GOMP_target_data (int, const void *, + size_t, void **, size_t *, unsigned char *); ++extern void GOMP_target_data_ext (int, size_t, void **, size_t *, ++ unsigned short *); + extern void GOMP_target_end_data (void); + extern void GOMP_target_update (int, const void *, + size_t, void **, size_t *, unsigned char *); ++extern void GOMP_target_update_ext (int, size_t, void **, size_t *, ++ unsigned short *, unsigned int, void **); ++extern void GOMP_target_enter_exit_data (int, size_t, void **, size_t *, ++ unsigned short *, unsigned int, ++ void **); + extern void GOMP_teams (unsigned int, unsigned int); + ++/* oacc-parallel.c */ ++ ++extern void GOACC_parallel_keyed (int, void (*) (void *), size_t, ++ void **, size_t *, unsigned short *, ...); ++extern void GOACC_parallel (int, void (*) (void *), size_t, void **, size_t *, ++ unsigned short *, int, int, int, int, int, ...); ++extern void GOACC_data_start (int, size_t, void **, size_t *, ++ unsigned short *); ++extern void GOACC_data_end (void); ++extern void GOACC_enter_exit_data (int, size_t, void **, ++ size_t *, unsigned short *, int, int, ...); ++extern void GOACC_update (int, size_t, void **, size_t *, ++ unsigned short *, int, int, ...); ++extern void GOACC_wait (int, int, ...); ++extern int GOACC_get_num_threads (void); ++extern int GOACC_get_thread_num (void); ++extern void GOACC_declare (int, size_t, void **, size_t *, unsigned short *); ++ + #endif /* LIBGOMP_G_H */ +--- libgomp/libgomp.h.jj 2014-08-01 15:59:49.145188127 +0200 ++++ libgomp/libgomp.h 2016-07-14 17:40:24.038243456 +0200 +@@ -34,12 +34,35 @@ + #ifndef LIBGOMP_H + #define LIBGOMP_H 1 + ++#ifndef _LIBGOMP_CHECKING_ ++/* Define to 1 to perform internal sanity checks. */ ++#define _LIBGOMP_CHECKING_ 0 ++#endif ++ + #include "config.h" + #include "gstdint.h" ++#include "libgomp-plugin.h" + + #include + #include + #include ++#include ++ ++/* Needed for memset in priority_queue.c. */ ++#if _LIBGOMP_CHECKING_ ++# ifdef STRING_WITH_STRINGS ++# include ++# include ++# else ++# ifdef HAVE_STRING_H ++# include ++# else ++# ifdef HAVE_STRINGS_H ++# include ++# endif ++# endif ++# endif ++#endif + + #ifdef HAVE_ATTRIBUTE_VISIBILITY + # pragma GCC visibility push(hidden) +@@ -56,6 +79,44 @@ enum memmodel + MEMMODEL_SEQ_CST = 5 + }; + ++/* alloc.c */ ++ ++extern void *gomp_malloc (size_t) __attribute__((malloc)); ++extern void *gomp_malloc_cleared (size_t) __attribute__((malloc)); ++extern void *gomp_realloc (void *, size_t); ++ ++/* Avoid conflicting prototypes of alloca() in system headers by using ++ GCC's builtin alloca(). */ ++#define gomp_alloca(x) __builtin_alloca(x) ++ ++/* error.c */ ++ ++extern void gomp_vdebug (int, const char *, va_list); ++extern void gomp_debug (int, const char *, ...) ++ __attribute__ ((format (printf, 2, 3))); ++#define gomp_vdebug(KIND, FMT, VALIST) \ ++ do { \ ++ if (__builtin_expect (gomp_debug_var, 0)) \ ++ (gomp_vdebug) ((KIND), (FMT), (VALIST)); \ ++ } while (0) ++#define gomp_debug(KIND, ...) \ ++ do { \ ++ if (__builtin_expect (gomp_debug_var, 0)) \ ++ (gomp_debug) ((KIND), __VA_ARGS__); \ ++ } while (0) ++extern void gomp_verror (const char *, va_list); ++extern void gomp_error (const char *, ...) ++ __attribute__ ((format (printf, 1, 2))); ++extern void gomp_vfatal (const char *, va_list) ++ __attribute__ ((noreturn)); ++extern void gomp_fatal (const char *, ...) ++ __attribute__ ((noreturn, format (printf, 1, 2))); ++ ++struct gomp_task; ++struct gomp_taskgroup; ++struct htab; ++ ++#include "priority_queue.h" + #include "sem.h" + #include "mutex.h" + #include "bar.h" +@@ -74,6 +135,44 @@ enum gomp_schedule_type + GFS_AUTO + }; + ++struct gomp_doacross_work_share ++{ ++ union { ++ /* chunk_size copy, as ws->chunk_size is multiplied by incr for ++ GFS_DYNAMIC. */ ++ long chunk_size; ++ /* Likewise, but for ull implementation. */ ++ unsigned long long chunk_size_ull; ++ /* For schedule(static,0) this is the number ++ of iterations assigned to the last thread, i.e. number of ++ iterations / number of threads. */ ++ long q; ++ /* Likewise, but for ull implementation. */ ++ unsigned long long q_ull; ++ }; ++ /* Size of each array entry (padded to cache line size). */ ++ unsigned long elt_sz; ++ /* Number of dimensions in sink vectors. */ ++ unsigned int ncounts; ++ /* True if the iterations can be flattened. */ ++ bool flattened; ++ /* Actual array (of elt_sz sized units), aligned to cache line size. ++ This is indexed by team_id for GFS_STATIC and outermost iteration ++ / chunk_size for other schedules. */ ++ unsigned char *array; ++ /* These two are only used for schedule(static,0). */ ++ /* This one is number of iterations % number of threads. */ ++ long t; ++ union { ++ /* And this one is cached t * (q + 1). */ ++ long boundary; ++ /* Likewise, but for the ull implementation. */ ++ unsigned long long boundary_ull; ++ }; ++ /* Array of shift counts for each dimension if they can be flattened. */ ++ unsigned int shift_counts[]; ++}; ++ + struct gomp_work_share + { + /* This member records the SCHEDULE clause to be used for this construct. +@@ -105,13 +204,18 @@ struct gomp_work_share + }; + }; + +- /* This is a circular queue that details which threads will be allowed +- into the ordered region and in which order. When a thread allocates +- iterations on which it is going to work, it also registers itself at +- the end of the array. When a thread reaches the ordered region, it +- checks to see if it is the one at the head of the queue. If not, it +- blocks on its RELEASE semaphore. */ +- unsigned *ordered_team_ids; ++ union { ++ /* This is a circular queue that details which threads will be allowed ++ into the ordered region and in which order. When a thread allocates ++ iterations on which it is going to work, it also registers itself at ++ the end of the array. When a thread reaches the ordered region, it ++ checks to see if it is the one at the head of the queue. If not, it ++ blocks on its RELEASE semaphore. */ ++ unsigned *ordered_team_ids; ++ ++ /* This is a pointer to DOACROSS work share data. */ ++ struct gomp_doacross_work_share *doacross; ++ }; + + /* This is the number of threads that have registered themselves in + the circular queue ordered_team_ids. */ +@@ -230,7 +334,7 @@ struct gomp_task_icv + { + unsigned long nthreads_var; + enum gomp_schedule_type run_sched_var; +- int run_sched_modifier; ++ int run_sched_chunk_size; + int default_device_var; + unsigned int thread_limit_var; + bool dyn_var; +@@ -246,6 +350,7 @@ extern gomp_mutex_t gomp_managed_threads + #endif + extern unsigned long gomp_max_active_levels_var; + extern bool gomp_cancel_var; ++extern int gomp_max_task_priority_var; + extern unsigned long long gomp_spin_count_var, gomp_throttled_spin_count_var; + extern unsigned long gomp_available_cpus, gomp_managed_threads; + extern unsigned long *gomp_nthreads_var_list, gomp_nthreads_var_list_len; +@@ -253,25 +358,36 @@ extern char *gomp_bind_var_list; + extern unsigned long gomp_bind_var_list_len; + extern void **gomp_places_list; + extern unsigned long gomp_places_list_len; ++extern int gomp_debug_var; ++extern int goacc_device_num; ++extern char *goacc_device_type; + + enum gomp_task_kind + { ++ /* Implicit task. */ + GOMP_TASK_IMPLICIT, +- GOMP_TASK_IFFALSE, ++ /* Undeferred task. */ ++ GOMP_TASK_UNDEFERRED, ++ /* Task created by GOMP_task and waiting to be run. */ + GOMP_TASK_WAITING, +- GOMP_TASK_TIED ++ /* Task currently executing or scheduled and about to execute. */ ++ GOMP_TASK_TIED, ++ /* Used for target tasks that have vars mapped and async run started, ++ but not yet completed. Once that completes, they will be readded ++ into the queues as GOMP_TASK_WAITING in order to perform the var ++ unmapping. */ ++ GOMP_TASK_ASYNC_RUNNING + }; + +-struct gomp_task; +-struct gomp_taskgroup; +-struct htab; +- + struct gomp_task_depend_entry + { ++ /* Address of dependency. */ + void *addr; + struct gomp_task_depend_entry *next; + struct gomp_task_depend_entry *prev; ++ /* Task that provides the dependency in ADDR. */ + struct gomp_task *task; ++ /* Depend entry is of type "IN". */ + bool is_in; + bool redundant; + bool redundant_out; +@@ -290,8 +406,8 @@ struct gomp_taskwait + { + bool in_taskwait; + bool in_depend_wait; ++ /* Number of tasks we are waiting for. */ + size_t n_depend; +- struct gomp_task *last_parent_depends_on; + gomp_sem_t taskwait_sem; + }; + +@@ -299,20 +415,31 @@ struct gomp_taskwait + + struct gomp_task + { ++ /* Parent of this task. */ + struct gomp_task *parent; +- struct gomp_task *children; +- struct gomp_task *next_child; +- struct gomp_task *prev_child; +- struct gomp_task *next_queue; +- struct gomp_task *prev_queue; +- struct gomp_task *next_taskgroup; +- struct gomp_task *prev_taskgroup; ++ /* Children of this task. */ ++ struct priority_queue children_queue; ++ /* Taskgroup this task belongs in. */ + struct gomp_taskgroup *taskgroup; ++ /* Tasks that depend on this task. */ + struct gomp_dependers_vec *dependers; + struct htab *depend_hash; + struct gomp_taskwait *taskwait; ++ /* Number of items in DEPEND. */ + size_t depend_count; ++ /* Number of tasks this task depends on. Once this counter reaches ++ 0, we have no unsatisfied dependencies, and this task can be put ++ into the various queues to be scheduled. */ + size_t num_dependees; ++ ++ /* Priority of this task. */ ++ int priority; ++ /* The priority node for this task in each of the different queues. ++ We put this here to avoid allocating space for each priority ++ node. Then we play offsetof() games to convert between pnode[] ++ entries and the gomp_task in which they reside. */ ++ struct priority_node pnode[3]; ++ + struct gomp_task_icv icv; + void (*fn) (void *); + void *fn_data; +@@ -320,20 +447,58 @@ struct gomp_task + bool in_tied_task; + bool final_task; + bool copy_ctors_done; ++ /* Set for undeferred tasks with unsatisfied dependencies which ++ block further execution of their parent until the dependencies ++ are satisfied. */ + bool parent_depends_on; ++ /* Dependencies provided and/or needed for this task. DEPEND_COUNT ++ is the number of items available. */ + struct gomp_task_depend_entry depend[]; + }; + ++/* This structure describes a single #pragma omp taskgroup. */ ++ + struct gomp_taskgroup + { + struct gomp_taskgroup *prev; +- struct gomp_task *children; ++ /* Queue of tasks that belong in this taskgroup. */ ++ struct priority_queue taskgroup_queue; + bool in_taskgroup_wait; + bool cancelled; + gomp_sem_t taskgroup_sem; + size_t num_children; + }; + ++/* Various state of OpenMP async offloading tasks. */ ++enum gomp_target_task_state ++{ ++ GOMP_TARGET_TASK_DATA, ++ GOMP_TARGET_TASK_BEFORE_MAP, ++ GOMP_TARGET_TASK_FALLBACK, ++ GOMP_TARGET_TASK_READY_TO_RUN, ++ GOMP_TARGET_TASK_RUNNING, ++ GOMP_TARGET_TASK_FINISHED ++}; ++ ++/* This structure describes a target task. */ ++ ++struct gomp_target_task ++{ ++ struct gomp_device_descr *devicep; ++ void (*fn) (void *); ++ size_t mapnum; ++ size_t *sizes; ++ unsigned short *kinds; ++ unsigned int flags; ++ enum gomp_target_task_state state; ++ struct target_mem_desc *tgt; ++ struct gomp_task *task; ++ struct gomp_team *team; ++ /* Device-specific target arguments. */ ++ void **args; ++ void *hostaddrs[]; ++}; ++ + /* This structure describes a "team" of threads. These are the threads + that are spawned by a PARALLEL constructs, as well as the work sharing + constructs that the team encounters. */ +@@ -396,7 +561,8 @@ struct gomp_team + struct gomp_work_share work_shares[8]; + + gomp_mutex_t task_lock; +- struct gomp_task *task_queue; ++ /* Scheduled tasks. */ ++ struct priority_queue task_queue; + /* Number of all GOMP_TASK_{WAITING,TIED} tasks in the team. */ + unsigned int task_count; + /* Number of GOMP_TASK_WAITING tasks currently waiting to be scheduled. */ +@@ -451,6 +617,9 @@ struct gomp_thread_pool + struct gomp_thread **threads; + unsigned threads_size; + unsigned threads_used; ++ /* The last team is used for non-nested teams to delay their destruction to ++ make sure all the threads in the team move on to the pool's barrier before ++ the team's barrier is destroyed. */ + struct gomp_team *last_team; + /* Number of threads running in this contention group. */ + unsigned long threads_busy; +@@ -519,23 +688,7 @@ extern bool gomp_affinity_same_place (vo + extern bool gomp_affinity_finalize_place_list (bool); + extern bool gomp_affinity_init_level (int, unsigned long, bool); + extern void gomp_affinity_print_place (void *); +- +-/* alloc.c */ +- +-extern void *gomp_malloc (size_t) __attribute__((malloc)); +-extern void *gomp_malloc_cleared (size_t) __attribute__((malloc)); +-extern void *gomp_realloc (void *, size_t); +- +-/* Avoid conflicting prototypes of alloca() in system headers by using +- GCC's builtin alloca(). */ +-#define gomp_alloca(x) __builtin_alloca(x) +- +-/* error.c */ +- +-extern void gomp_error (const char *, ...) +- __attribute__((format (printf, 1, 2))); +-extern void gomp_fatal (const char *, ...) +- __attribute__((noreturn, format (printf, 1, 2))); ++extern void gomp_get_place_proc_ids_8 (int, int64_t *); + + /* iter.c */ + +@@ -572,6 +725,9 @@ extern void gomp_ordered_next (void); + extern void gomp_ordered_static_init (void); + extern void gomp_ordered_static_next (void); + extern void gomp_ordered_sync (void); ++extern void gomp_doacross_init (unsigned, long *, long); ++extern void gomp_doacross_ull_init (unsigned, unsigned long long *, ++ unsigned long long); + + /* parallel.c */ + +@@ -588,6 +744,12 @@ extern void gomp_init_task (struct gomp_ + struct gomp_task_icv *); + extern void gomp_end_task (void); + extern void gomp_barrier_handle_tasks (gomp_barrier_state_t); ++extern void gomp_task_maybe_wait_for_dependencies (void **); ++extern bool gomp_create_target_task (struct gomp_device_descr *, ++ void (*) (void *), size_t, void **, ++ size_t *, unsigned short *, unsigned int, ++ void **, void **, ++ enum gomp_target_task_state); + + static void inline + gomp_finish_task (struct gomp_task *task) +@@ -606,7 +768,213 @@ extern void gomp_free_thread (void *); + + /* target.c */ + ++extern void gomp_init_targets_once (void); + extern int gomp_get_num_devices (void); ++extern bool gomp_target_task_fn (void *); ++ ++/* Splay tree definitions. */ ++typedef struct splay_tree_node_s *splay_tree_node; ++typedef struct splay_tree_s *splay_tree; ++typedef struct splay_tree_key_s *splay_tree_key; ++ ++struct target_var_desc { ++ /* Splay key. */ ++ splay_tree_key key; ++ /* True if data should be copied from device to host at the end. */ ++ bool copy_from; ++ /* True if data always should be copied from device to host at the end. */ ++ bool always_copy_from; ++ /* Relative offset against key host_start. */ ++ uintptr_t offset; ++ /* Actual length. */ ++ uintptr_t length; ++}; ++ ++struct target_mem_desc { ++ /* Reference count. */ ++ uintptr_t refcount; ++ /* All the splay nodes allocated together. */ ++ splay_tree_node array; ++ /* Start of the target region. */ ++ uintptr_t tgt_start; ++ /* End of the targer region. */ ++ uintptr_t tgt_end; ++ /* Handle to free. */ ++ void *to_free; ++ /* Previous target_mem_desc. */ ++ struct target_mem_desc *prev; ++ /* Number of items in following list. */ ++ size_t list_count; ++ ++ /* Corresponding target device descriptor. */ ++ struct gomp_device_descr *device_descr; ++ ++ /* List of target items to remove (or decrease refcount) ++ at the end of region. */ ++ struct target_var_desc list[]; ++}; ++ ++/* Special value for refcount - infinity. */ ++#define REFCOUNT_INFINITY (~(uintptr_t) 0) ++/* Special value for refcount - tgt_offset contains target address of the ++ artificial pointer to "omp declare target link" object. */ ++#define REFCOUNT_LINK (~(uintptr_t) 1) ++ ++struct splay_tree_key_s { ++ /* Address of the host object. */ ++ uintptr_t host_start; ++ /* Address immediately after the host object. */ ++ uintptr_t host_end; ++ /* Descriptor of the target memory. */ ++ struct target_mem_desc *tgt; ++ /* Offset from tgt->tgt_start to the start of the target object. */ ++ uintptr_t tgt_offset; ++ /* Reference count. */ ++ uintptr_t refcount; ++ /* Pointer to the original mapping of "omp declare target link" object. */ ++ splay_tree_key link_key; ++}; ++ ++/* The comparison function. */ ++ ++static inline int ++splay_compare (splay_tree_key x, splay_tree_key y) ++{ ++ if (x->host_start == x->host_end ++ && y->host_start == y->host_end) ++ return 0; ++ if (x->host_end <= y->host_start) ++ return -1; ++ if (x->host_start >= y->host_end) ++ return 1; ++ return 0; ++} ++ ++#include "splay-tree.h" ++ ++typedef struct acc_dispatch_t ++{ ++ /* This is a linked list of data mapped using the ++ acc_map_data/acc_unmap_data or "acc enter data"/"acc exit data" pragmas. ++ Unlike mapped_data in the goacc_thread struct, unmapping can ++ happen out-of-order with respect to mapping. */ ++ /* This is guarded by the lock in the "outer" struct gomp_device_descr. */ ++ struct target_mem_desc *data_environ; ++ ++ /* Execute. */ ++ void (*exec_func) (void (*) (void *), size_t, void **, void **, int, ++ unsigned *, void *); ++ ++ /* Async cleanup callback registration. */ ++ void (*register_async_cleanup_func) (void *, int); ++ ++ /* Asynchronous routines. */ ++ int (*async_test_func) (int); ++ int (*async_test_all_func) (void); ++ void (*async_wait_func) (int); ++ void (*async_wait_async_func) (int, int); ++ void (*async_wait_all_func) (void); ++ void (*async_wait_all_async_func) (int); ++ void (*async_set_async_func) (int); ++ ++ /* Create/destroy TLS data. */ ++ void *(*create_thread_data_func) (int); ++ void (*destroy_thread_data_func) (void *); ++ ++ /* NVIDIA target specific routines. */ ++ struct { ++ void *(*get_current_device_func) (void); ++ void *(*get_current_context_func) (void); ++ void *(*get_stream_func) (int); ++ int (*set_stream_func) (int, void *); ++ } cuda; ++} acc_dispatch_t; ++ ++/* Various state of the accelerator device. */ ++enum gomp_device_state ++{ ++ GOMP_DEVICE_UNINITIALIZED, ++ GOMP_DEVICE_INITIALIZED, ++ GOMP_DEVICE_FINALIZED ++}; ++ ++/* This structure describes accelerator device. ++ It contains name of the corresponding libgomp plugin, function handlers for ++ interaction with the device, ID-number of the device, and information about ++ mapped memory. */ ++struct gomp_device_descr ++{ ++ /* Immutable data, which is only set during initialization, and which is not ++ guarded by the lock. */ ++ ++ /* The name of the device. */ ++ const char *name; ++ ++ /* Capabilities of device (supports OpenACC, OpenMP). */ ++ unsigned int capabilities; ++ ++ /* This is the ID number of device among devices of the same type. */ ++ int target_id; ++ ++ /* This is the TYPE of device. */ ++ enum offload_target_type type; ++ ++ /* Function handlers. */ ++ const char *(*get_name_func) (void); ++ unsigned int (*get_caps_func) (void); ++ int (*get_type_func) (void); ++ int (*get_num_devices_func) (void); ++ bool (*init_device_func) (int); ++ bool (*fini_device_func) (int); ++ unsigned (*version_func) (void); ++ int (*load_image_func) (int, unsigned, const void *, struct addr_pair **); ++ bool (*unload_image_func) (int, unsigned, const void *); ++ void *(*alloc_func) (int, size_t); ++ bool (*free_func) (int, void *); ++ bool (*dev2host_func) (int, void *, const void *, size_t); ++ bool (*host2dev_func) (int, void *, const void *, size_t); ++ bool (*dev2dev_func) (int, void *, const void *, size_t); ++ bool (*can_run_func) (void *); ++ void (*run_func) (int, void *, void *, void **); ++ void (*async_run_func) (int, void *, void *, void **, void *); ++ ++ /* Splay tree containing information about mapped memory regions. */ ++ struct splay_tree_s mem_map; ++ ++ /* Mutex for the mutable data. */ ++ gomp_mutex_t lock; ++ ++ /* Current state of the device. OpenACC allows to move from INITIALIZED state ++ back to UNINITIALIZED state. OpenMP allows only to move from INITIALIZED ++ to FINALIZED state (at program shutdown). */ ++ enum gomp_device_state state; ++ ++ /* OpenACC-specific data and functions. */ ++ /* This is mutable because of its mutable data_environ and target_data ++ members. */ ++ acc_dispatch_t openacc; ++}; ++ ++/* Kind of the pragma, for which gomp_map_vars () is called. */ ++enum gomp_map_vars_kind ++{ ++ GOMP_MAP_VARS_OPENACC, ++ GOMP_MAP_VARS_TARGET, ++ GOMP_MAP_VARS_DATA, ++ GOMP_MAP_VARS_ENTER_DATA ++}; ++ ++extern void gomp_acc_insert_pointer (size_t, void **, size_t *, void *); ++extern void gomp_acc_remove_pointer (void *, bool, int, int); ++ ++extern struct target_mem_desc *gomp_map_vars (struct gomp_device_descr *, ++ size_t, void **, void **, ++ size_t *, void *, bool, ++ enum gomp_map_vars_kind); ++extern void gomp_unmap_vars (struct target_mem_desc *, bool); ++extern void gomp_init_device (struct gomp_device_descr *); ++extern void gomp_free_memmap (struct splay_tree_s *); ++extern void gomp_unload_device (struct gomp_device_descr *); + + /* work.c */ + +@@ -646,8 +1014,28 @@ typedef enum omp_proc_bind_t + omp_proc_bind_spread = 4 + } omp_proc_bind_t; + ++typedef enum omp_lock_hint_t ++{ ++ omp_lock_hint_none = 0, ++ omp_lock_hint_uncontended = 1, ++ omp_lock_hint_contended = 2, ++ omp_lock_hint_nonspeculative = 4, ++ omp_lock_hint_speculative = 8, ++} omp_lock_hint_t; ++ ++extern void omp_init_lock_with_hint (omp_lock_t *, omp_lock_hint_t) ++ __GOMP_NOTHROW; ++extern void omp_init_nest_lock_with_hint (omp_lock_t *, omp_lock_hint_t) ++ __GOMP_NOTHROW; ++ + extern int omp_get_cancellation (void) __GOMP_NOTHROW; + extern omp_proc_bind_t omp_get_proc_bind (void) __GOMP_NOTHROW; ++extern int omp_get_num_places (void) __GOMP_NOTHROW; ++extern int omp_get_place_num_procs (int) __GOMP_NOTHROW; ++extern void omp_get_place_proc_ids (int, int *) __GOMP_NOTHROW; ++extern int omp_get_place_num (void) __GOMP_NOTHROW; ++extern int omp_get_partition_num_places (void) __GOMP_NOTHROW; ++extern void omp_get_partition_place_nums (int *) __GOMP_NOTHROW; + + extern void omp_set_default_device (int) __GOMP_NOTHROW; + extern int omp_get_default_device (void) __GOMP_NOTHROW; +@@ -656,6 +1044,24 @@ extern int omp_get_num_teams (void) __GO + extern int omp_get_team_num (void) __GOMP_NOTHROW; + + extern int omp_is_initial_device (void) __GOMP_NOTHROW; ++extern int omp_get_initial_device (void) __GOMP_NOTHROW; ++extern int omp_get_max_task_priority (void) __GOMP_NOTHROW; ++ ++extern void *omp_target_alloc (__SIZE_TYPE__, int) __GOMP_NOTHROW; ++extern void omp_target_free (void *, int) __GOMP_NOTHROW; ++extern int omp_target_is_present (void *, int) __GOMP_NOTHROW; ++extern int omp_target_memcpy (void *, void *, __SIZE_TYPE__, __SIZE_TYPE__, ++ __SIZE_TYPE__, int, int) __GOMP_NOTHROW; ++extern int omp_target_memcpy_rect (void *, void *, __SIZE_TYPE__, int, ++ const __SIZE_TYPE__ *, ++ const __SIZE_TYPE__ *, ++ const __SIZE_TYPE__ *, ++ const __SIZE_TYPE__ *, ++ const __SIZE_TYPE__ *, int, int) ++ __GOMP_NOTHROW; ++extern int omp_target_associate_ptr (void *, void *, __SIZE_TYPE__, ++ __SIZE_TYPE__, int) __GOMP_NOTHROW; ++extern int omp_target_disassociate_ptr (void *, int) __GOMP_NOTHROW; + + #if !defined (HAVE_ATTRIBUTE_VISIBILITY) \ + || !defined (HAVE_ATTRIBUTE_ALIAS) \ +@@ -728,4 +1134,34 @@ extern int gomp_test_nest_lock_25 (omp_n + # define ialias_call(fn) fn + #endif + ++/* Helper function for priority_node_to_task() and ++ task_to_priority_node(). ++ ++ Return the offset from a task to its priority_node entry. The ++ priority_node entry is has a type of TYPE. */ ++ ++static inline size_t ++priority_queue_offset (enum priority_queue_type type) ++{ ++ return offsetof (struct gomp_task, pnode[(int) type]); ++} ++ ++/* Return the task associated with a priority NODE of type TYPE. */ ++ ++static inline struct gomp_task * ++priority_node_to_task (enum priority_queue_type type, ++ struct priority_node *node) ++{ ++ return (struct gomp_task *) ((char *) node - priority_queue_offset (type)); ++} ++ ++/* Return the priority node of type TYPE for a given TASK. */ ++ ++static inline struct priority_node * ++task_to_priority_node (enum priority_queue_type type, ++ struct gomp_task *task) ++{ ++ return (struct priority_node *) ((char *) task ++ + priority_queue_offset (type)); ++} + #endif /* LIBGOMP_H */ +--- libgomp/env.c.jj 2014-05-15 10:56:32.420522486 +0200 ++++ libgomp/env.c 2016-07-13 16:57:04.437535335 +0200 +@@ -27,6 +27,8 @@ + + #include "libgomp.h" + #include "libgomp_f.h" ++#include "oacc-int.h" ++#include "gomp-constants.h" + #include + #include + #include +@@ -56,7 +58,7 @@ struct gomp_task_icv gomp_global_icv = { + .nthreads_var = 1, + .thread_limit_var = UINT_MAX, + .run_sched_var = GFS_DYNAMIC, +- .run_sched_modifier = 1, ++ .run_sched_chunk_size = 1, + .default_device_var = 0, + .dyn_var = false, + .nest_var = false, +@@ -66,6 +68,7 @@ struct gomp_task_icv gomp_global_icv = { + + unsigned long gomp_max_active_levels_var = INT_MAX; + bool gomp_cancel_var = false; ++int gomp_max_task_priority_var = 0; + #ifndef HAVE_SYNC_BUILTINS + gomp_mutex_t gomp_managed_threads_lock; + #endif +@@ -76,6 +79,9 @@ char *gomp_bind_var_list; + unsigned long gomp_bind_var_list_len; + void **gomp_places_list; + unsigned long gomp_places_list_len; ++int gomp_debug_var; ++char *goacc_device_type; ++int goacc_device_num; + + /* Parse the OMP_SCHEDULE environment variable. */ + +@@ -118,7 +124,7 @@ parse_schedule (void) + ++env; + if (*env == '\0') + { +- gomp_global_icv.run_sched_modifier ++ gomp_global_icv.run_sched_chunk_size + = gomp_global_icv.run_sched_var != GFS_STATIC; + return; + } +@@ -144,7 +150,7 @@ parse_schedule (void) + + if (value == 0 && gomp_global_icv.run_sched_var != GFS_STATIC) + value = 1; +- gomp_global_icv.run_sched_modifier = value; ++ gomp_global_icv.run_sched_chunk_size = value; + return; + + unknown: +@@ -1011,6 +1017,16 @@ parse_affinity (bool ignore) + return false; + } + ++static void ++parse_acc_device_type (void) ++{ ++ const char *env = getenv ("ACC_DEVICE_TYPE"); ++ ++ if (env && *env != '\0') ++ goacc_device_type = strdup (env); ++ else ++ goacc_device_type = NULL; ++} + + static void + handle_omp_display_env (unsigned long stacksize, int wait_policy) +@@ -1054,7 +1070,7 @@ handle_omp_display_env (unsigned long st + + fputs ("\nOPENMP DISPLAY ENVIRONMENT BEGIN\n", stderr); + +- fputs (" _OPENMP = '201307'\n", stderr); ++ fputs (" _OPENMP = '201511'\n", stderr); + fprintf (stderr, " OMP_DYNAMIC = '%s'\n", + gomp_global_icv.dyn_var ? "TRUE" : "FALSE"); + fprintf (stderr, " OMP_NESTED = '%s'\n", +@@ -1142,6 +1158,8 @@ handle_omp_display_env (unsigned long st + gomp_cancel_var ? "TRUE" : "FALSE"); + fprintf (stderr, " OMP_DEFAULT_DEVICE = '%d'\n", + gomp_global_icv.default_device_var); ++ fprintf (stderr, " OMP_MAX_TASK_PRIORITY = '%d'\n", ++ gomp_max_task_priority_var); + + if (verbose) + { +@@ -1174,6 +1192,7 @@ initialize_env (void) + parse_boolean ("OMP_NESTED", &gomp_global_icv.nest_var); + parse_boolean ("OMP_CANCELLATION", &gomp_cancel_var); + parse_int ("OMP_DEFAULT_DEVICE", &gomp_global_icv.default_device_var, true); ++ parse_int ("OMP_MAX_TASK_PRIORITY", &gomp_max_task_priority_var, true); + parse_unsigned_long ("OMP_MAX_ACTIVE_LEVELS", &gomp_max_active_levels_var, + true); + if (parse_unsigned_long ("OMP_THREAD_LIMIT", &thread_limit_var, false)) +@@ -1181,6 +1200,7 @@ initialize_env (void) + gomp_global_icv.thread_limit_var + = thread_limit_var > INT_MAX ? UINT_MAX : thread_limit_var; + } ++ parse_int ("GOMP_DEBUG", &gomp_debug_var, true); + #ifndef HAVE_SYNC_BUILTINS + gomp_mutex_init (&gomp_managed_threads_lock); + #endif +@@ -1271,6 +1291,15 @@ initialize_env (void) + } + + handle_omp_display_env (stacksize, wait_policy); ++ ++ /* OpenACC. */ ++ ++ if (!parse_int ("ACC_DEVICE_NUM", &goacc_device_num, true)) ++ goacc_device_num = 0; ++ ++ parse_acc_device_type (); ++ ++ goacc_runtime_initialize (); + } + + +@@ -1312,21 +1341,21 @@ omp_get_nested (void) + } + + void +-omp_set_schedule (omp_sched_t kind, int modifier) ++omp_set_schedule (omp_sched_t kind, int chunk_size) + { + struct gomp_task_icv *icv = gomp_icv (true); + switch (kind) + { + case omp_sched_static: +- if (modifier < 1) +- modifier = 0; +- icv->run_sched_modifier = modifier; ++ if (chunk_size < 1) ++ chunk_size = 0; ++ icv->run_sched_chunk_size = chunk_size; + break; + case omp_sched_dynamic: + case omp_sched_guided: +- if (modifier < 1) +- modifier = 1; +- icv->run_sched_modifier = modifier; ++ if (chunk_size < 1) ++ chunk_size = 1; ++ icv->run_sched_chunk_size = chunk_size; + break; + case omp_sched_auto: + break; +@@ -1337,11 +1366,11 @@ omp_set_schedule (omp_sched_t kind, int + } + + void +-omp_get_schedule (omp_sched_t *kind, int *modifier) ++omp_get_schedule (omp_sched_t *kind, int *chunk_size) + { + struct gomp_task_icv *icv = gomp_icv (false); + *kind = icv->run_sched_var; +- *modifier = icv->run_sched_modifier; ++ *chunk_size = icv->run_sched_chunk_size; + } + + int +@@ -1377,6 +1406,12 @@ omp_get_cancellation (void) + return gomp_cancel_var; + } + ++int ++omp_get_max_task_priority (void) ++{ ++ return gomp_max_task_priority_var; ++} ++ + omp_proc_bind_t + omp_get_proc_bind (void) + { +@@ -1425,6 +1460,59 @@ omp_is_initial_device (void) + return 1; + } + ++int ++omp_get_initial_device (void) ++{ ++ return GOMP_DEVICE_HOST_FALLBACK; ++} ++ ++int ++omp_get_num_places (void) ++{ ++ return gomp_places_list_len; ++} ++ ++int ++omp_get_place_num (void) ++{ ++ if (gomp_places_list == NULL) ++ return -1; ++ ++ struct gomp_thread *thr = gomp_thread (); ++ if (thr->place == 0) ++ gomp_init_affinity (); ++ ++ return (int) thr->place - 1; ++} ++ ++int ++omp_get_partition_num_places (void) ++{ ++ if (gomp_places_list == NULL) ++ return 0; ++ ++ struct gomp_thread *thr = gomp_thread (); ++ if (thr->place == 0) ++ gomp_init_affinity (); ++ ++ return thr->ts.place_partition_len; ++} ++ ++void ++omp_get_partition_place_nums (int *place_nums) ++{ ++ if (gomp_places_list == NULL) ++ return; ++ ++ struct gomp_thread *thr = gomp_thread (); ++ if (thr->place == 0) ++ gomp_init_affinity (); ++ ++ unsigned int i; ++ for (i = 0; i < thr->ts.place_partition_len; i++) ++ *place_nums++ = thr->ts.place_partition_off + i; ++} ++ + ialias (omp_set_dynamic) + ialias (omp_set_nested) + ialias (omp_set_num_threads) +@@ -1444,3 +1532,9 @@ ialias (omp_get_num_devices) + ialias (omp_get_num_teams) + ialias (omp_get_team_num) + ialias (omp_is_initial_device) ++ialias (omp_get_initial_device) ++ialias (omp_get_max_task_priority) ++ialias (omp_get_num_places) ++ialias (omp_get_place_num) ++ialias (omp_get_partition_num_places) ++ialias (omp_get_partition_place_nums) +--- libgomp/openacc.h.jj 2016-07-13 16:57:04.432535397 +0200 ++++ libgomp/openacc.h 2016-07-13 16:57:04.432535397 +0200 +@@ -0,0 +1,131 @@ ++/* OpenACC Runtime Library User-facing Declarations ++ ++ Copyright (C) 2013-2016 Free Software Foundation, Inc. ++ ++ Contributed by Mentor Embedded. ++ ++ This file is part of the GNU Offloading and Multi Processing Library ++ (libgomp). ++ ++ Libgomp is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY ++ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS ++ FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ . */ ++ ++#ifndef _OPENACC_H ++#define _OPENACC_H 1 ++ ++/* The OpenACC standard is silent on whether or not including ++ might or must not include other header files. We chose to include ++ some. */ ++#include ++ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++#if __cplusplus >= 201103 ++# define __GOACC_NOTHROW noexcept ++#elif __cplusplus ++# define __GOACC_NOTHROW throw () ++#else /* Not C++ */ ++# define __GOACC_NOTHROW __attribute__ ((__nothrow__)) ++#endif ++ ++/* Types */ ++typedef enum acc_device_t { ++ /* Keep in sync with include/gomp-constants.h. */ ++ acc_device_none = 0, ++ acc_device_default = 1, ++ acc_device_host = 2, ++ /* acc_device_host_nonshm = 3 removed. */ ++ acc_device_not_host = 4, ++ acc_device_nvidia = 5, ++ _ACC_device_hwm, ++ /* Ensure enumeration is layout compatible with int. */ ++ _ACC_highest = __INT_MAX__, ++ _ACC_neg = -1 ++} acc_device_t; ++ ++typedef enum acc_async_t { ++ /* Keep in sync with include/gomp-constants.h. */ ++ acc_async_noval = -1, ++ acc_async_sync = -2 ++} acc_async_t; ++ ++int acc_get_num_devices (acc_device_t) __GOACC_NOTHROW; ++void acc_set_device_type (acc_device_t) __GOACC_NOTHROW; ++acc_device_t acc_get_device_type (void) __GOACC_NOTHROW; ++void acc_set_device_num (int, acc_device_t) __GOACC_NOTHROW; ++int acc_get_device_num (acc_device_t) __GOACC_NOTHROW; ++int acc_async_test (int) __GOACC_NOTHROW; ++int acc_async_test_all (void) __GOACC_NOTHROW; ++void acc_wait (int) __GOACC_NOTHROW; ++void acc_wait_async (int, int) __GOACC_NOTHROW; ++void acc_wait_all (void) __GOACC_NOTHROW; ++void acc_wait_all_async (int) __GOACC_NOTHROW; ++void acc_init (acc_device_t) __GOACC_NOTHROW; ++void acc_shutdown (acc_device_t) __GOACC_NOTHROW; ++#ifdef __cplusplus ++int acc_on_device (int __arg) __GOACC_NOTHROW; ++#else ++int acc_on_device (acc_device_t __arg) __GOACC_NOTHROW; ++#endif ++void *acc_malloc (size_t) __GOACC_NOTHROW; ++void acc_free (void *) __GOACC_NOTHROW; ++/* Some of these would be more correct with const qualifiers, but ++ the standard specifies otherwise. */ ++void *acc_copyin (void *, size_t) __GOACC_NOTHROW; ++void *acc_present_or_copyin (void *, size_t) __GOACC_NOTHROW; ++void *acc_create (void *, size_t) __GOACC_NOTHROW; ++void *acc_present_or_create (void *, size_t) __GOACC_NOTHROW; ++void acc_copyout (void *, size_t) __GOACC_NOTHROW; ++void acc_delete (void *, size_t) __GOACC_NOTHROW; ++void acc_update_device (void *, size_t) __GOACC_NOTHROW; ++void acc_update_self (void *, size_t) __GOACC_NOTHROW; ++void acc_map_data (void *, void *, size_t) __GOACC_NOTHROW; ++void acc_unmap_data (void *) __GOACC_NOTHROW; ++void *acc_deviceptr (void *) __GOACC_NOTHROW; ++void *acc_hostptr (void *) __GOACC_NOTHROW; ++int acc_is_present (void *, size_t) __GOACC_NOTHROW; ++void acc_memcpy_to_device (void *, void *, size_t) __GOACC_NOTHROW; ++void acc_memcpy_from_device (void *, void *, size_t) __GOACC_NOTHROW; ++ ++/* Old names. OpenACC does not specify whether these can or must ++ not be macros, inlines or aliases for the new names. */ ++#define acc_pcreate acc_present_or_create ++#define acc_pcopyin acc_present_or_copyin ++ ++/* CUDA-specific routines. */ ++void *acc_get_current_cuda_device (void) __GOACC_NOTHROW; ++void *acc_get_current_cuda_context (void) __GOACC_NOTHROW; ++void *acc_get_cuda_stream (int) __GOACC_NOTHROW; ++int acc_set_cuda_stream (int, void *) __GOACC_NOTHROW; ++ ++#ifdef __cplusplus ++} ++ ++/* Forwarding function with correctly typed arg. */ ++ ++#pragma acc routine seq ++inline int acc_on_device (acc_device_t __arg) __GOACC_NOTHROW ++{ ++ return acc_on_device ((int) __arg); ++} ++#endif ++ ++#endif /* _OPENACC_H */ +--- libgomp/config/linux/doacross.h.jj 2016-07-13 16:57:18.902355979 +0200 ++++ libgomp/config/linux/doacross.h 2016-07-13 16:57:18.902355979 +0200 +@@ -0,0 +1,57 @@ ++/* Copyright (C) 2015-2016 Free Software Foundation, Inc. ++ Contributed by Jakub Jelinek . ++ ++ This file is part of the GNU Offloading and Multi Processing Library ++ (libgomp). ++ ++ Libgomp is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY ++ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS ++ FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ . */ ++ ++/* This is a Linux specific implementation of doacross spinning. */ ++ ++#ifndef GOMP_DOACROSS_H ++#define GOMP_DOACROSS_H 1 ++ ++#include "libgomp.h" ++#include ++#include "wait.h" ++ ++#ifdef HAVE_ATTRIBUTE_VISIBILITY ++# pragma GCC visibility push(hidden) ++#endif ++ ++static inline void doacross_spin (unsigned long *addr, unsigned long expected, ++ unsigned long cur) ++{ ++ /* FIXME: back off depending on how large expected - cur is. */ ++ do ++ { ++ cpu_relax (); ++ cur = __atomic_load_n (addr, MEMMODEL_RELAXED); ++ if (expected < cur) ++ return; ++ } ++ while (1); ++} ++ ++#ifdef HAVE_ATTRIBUTE_VISIBILITY ++# pragma GCC visibility pop ++#endif ++ ++#endif /* GOMP_DOACROSS_H */ +--- libgomp/config/posix/doacross.h.jj 2016-07-13 16:57:18.903355966 +0200 ++++ libgomp/config/posix/doacross.h 2016-07-13 16:57:18.903355966 +0200 +@@ -0,0 +1,62 @@ ++/* Copyright (C) 2015-2016 Free Software Foundation, Inc. ++ Contributed by Jakub Jelinek . ++ ++ This file is part of the GNU Offloading and Multi Processing Library ++ (libgomp). ++ ++ Libgomp is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY ++ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS ++ FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ . */ ++ ++/* This is a generic implementation of doacross spinning. */ ++ ++#ifndef GOMP_DOACROSS_H ++#define GOMP_DOACROSS_H 1 ++ ++#include "libgomp.h" ++#include ++ ++#ifdef HAVE_ATTRIBUTE_VISIBILITY ++# pragma GCC visibility push(hidden) ++#endif ++ ++static inline void ++cpu_relax (void) ++{ ++ __asm volatile ("" : : : "memory"); ++} ++ ++static inline void doacross_spin (unsigned long *addr, unsigned long expected, ++ unsigned long cur) ++{ ++ /* FIXME: back off depending on how large expected - cur is. */ ++ do ++ { ++ cpu_relax (); ++ cur = __atomic_load_n (addr, MEMMODEL_RELAXED); ++ if (expected < cur) ++ return; ++ } ++ while (1); ++} ++ ++#ifdef HAVE_ATTRIBUTE_VISIBILITY ++# pragma GCC visibility pop ++#endif ++ ++#endif /* GOMP_DOACROSS_H */ +--- libgomp/splay-tree.c.jj 2016-07-13 16:57:18.919355768 +0200 ++++ libgomp/splay-tree.c 2016-07-13 16:57:18.919355768 +0200 +@@ -0,0 +1,238 @@ ++/* A splay-tree datatype. ++ Copyright (C) 1998-2016 Free Software Foundation, Inc. ++ Contributed by Mark Mitchell (mark@markmitchell.com). ++ ++ This file is part of the GNU Offloading and Multi Processing Library ++ (libgomp). ++ ++ Libgomp is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY ++ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS ++ FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ . */ ++ ++/* The splay tree code copied from include/splay-tree.h and adjusted, ++ so that all the data lives directly in splay_tree_node_s structure ++ and no extra allocations are needed. */ ++ ++/* For an easily readable description of splay-trees, see: ++ ++ Lewis, Harry R. and Denenberg, Larry. Data Structures and Their ++ Algorithms. Harper-Collins, Inc. 1991. ++ ++ The major feature of splay trees is that all basic tree operations ++ are amortized O(log n) time for a tree with n nodes. */ ++ ++#include "libgomp.h" ++ ++/* Rotate the edge joining the left child N with its parent P. PP is the ++ grandparents' pointer to P. */ ++ ++static inline void ++rotate_left (splay_tree_node *pp, splay_tree_node p, splay_tree_node n) ++{ ++ splay_tree_node tmp; ++ tmp = n->right; ++ n->right = p; ++ p->left = tmp; ++ *pp = n; ++} ++ ++/* Rotate the edge joining the right child N with its parent P. PP is the ++ grandparents' pointer to P. */ ++ ++static inline void ++rotate_right (splay_tree_node *pp, splay_tree_node p, splay_tree_node n) ++{ ++ splay_tree_node tmp; ++ tmp = n->left; ++ n->left = p; ++ p->right = tmp; ++ *pp = n; ++} ++ ++/* Bottom up splay of KEY. */ ++ ++static void ++splay_tree_splay (splay_tree sp, splay_tree_key key) ++{ ++ if (sp->root == NULL) ++ return; ++ ++ do { ++ int cmp1, cmp2; ++ splay_tree_node n, c; ++ ++ n = sp->root; ++ cmp1 = splay_compare (key, &n->key); ++ ++ /* Found. */ ++ if (cmp1 == 0) ++ return; ++ ++ /* Left or right? If no child, then we're done. */ ++ if (cmp1 < 0) ++ c = n->left; ++ else ++ c = n->right; ++ if (!c) ++ return; ++ ++ /* Next one left or right? If found or no child, we're done ++ after one rotation. */ ++ cmp2 = splay_compare (key, &c->key); ++ if (cmp2 == 0 ++ || (cmp2 < 0 && !c->left) ++ || (cmp2 > 0 && !c->right)) ++ { ++ if (cmp1 < 0) ++ rotate_left (&sp->root, n, c); ++ else ++ rotate_right (&sp->root, n, c); ++ return; ++ } ++ ++ /* Now we have the four cases of double-rotation. */ ++ if (cmp1 < 0 && cmp2 < 0) ++ { ++ rotate_left (&n->left, c, c->left); ++ rotate_left (&sp->root, n, n->left); ++ } ++ else if (cmp1 > 0 && cmp2 > 0) ++ { ++ rotate_right (&n->right, c, c->right); ++ rotate_right (&sp->root, n, n->right); ++ } ++ else if (cmp1 < 0 && cmp2 > 0) ++ { ++ rotate_right (&n->left, c, c->right); ++ rotate_left (&sp->root, n, n->left); ++ } ++ else if (cmp1 > 0 && cmp2 < 0) ++ { ++ rotate_left (&n->right, c, c->left); ++ rotate_right (&sp->root, n, n->right); ++ } ++ } while (1); ++} ++ ++/* Insert a new NODE into SP. The NODE shouldn't exist in the tree. */ ++ ++attribute_hidden void ++splay_tree_insert (splay_tree sp, splay_tree_node node) ++{ ++ int comparison = 0; ++ ++ splay_tree_splay (sp, &node->key); ++ ++ if (sp->root) ++ comparison = splay_compare (&sp->root->key, &node->key); ++ ++ if (sp->root && comparison == 0) ++ gomp_fatal ("Duplicate node"); ++ else ++ { ++ /* Insert it at the root. */ ++ if (sp->root == NULL) ++ node->left = node->right = NULL; ++ else if (comparison < 0) ++ { ++ node->left = sp->root; ++ node->right = node->left->right; ++ node->left->right = NULL; ++ } ++ else ++ { ++ node->right = sp->root; ++ node->left = node->right->left; ++ node->right->left = NULL; ++ } ++ ++ sp->root = node; ++ } ++} ++ ++/* Remove node with KEY from SP. It is not an error if it did not exist. */ ++ ++attribute_hidden void ++splay_tree_remove (splay_tree sp, splay_tree_key key) ++{ ++ splay_tree_splay (sp, key); ++ ++ if (sp->root && splay_compare (&sp->root->key, key) == 0) ++ { ++ splay_tree_node left, right; ++ ++ left = sp->root->left; ++ right = sp->root->right; ++ ++ /* One of the children is now the root. Doesn't matter much ++ which, so long as we preserve the properties of the tree. */ ++ if (left) ++ { ++ sp->root = left; ++ ++ /* If there was a right child as well, hang it off the ++ right-most leaf of the left child. */ ++ if (right) ++ { ++ while (left->right) ++ left = left->right; ++ left->right = right; ++ } ++ } ++ else ++ sp->root = right; ++ } ++} ++ ++/* Lookup KEY in SP, returning NODE if present, and NULL ++ otherwise. */ ++ ++attribute_hidden splay_tree_key ++splay_tree_lookup (splay_tree sp, splay_tree_key key) ++{ ++ splay_tree_splay (sp, key); ++ ++ if (sp->root && splay_compare (&sp->root->key, key) == 0) ++ return &sp->root->key; ++ else ++ return NULL; ++} ++ ++/* Helper function for splay_tree_foreach. ++ ++ Run FUNC on every node in KEY. */ ++ ++static void ++splay_tree_foreach_internal (splay_tree_node node, splay_tree_callback func, ++ void *data) ++{ ++ if (!node) ++ return; ++ func (&node->key, data); ++ splay_tree_foreach_internal (node->left, func, data); ++ /* Yeah, whatever. GCC can fix my tail recursion. */ ++ splay_tree_foreach_internal (node->right, func, data); ++} ++ ++/* Run FUNC on each of the nodes in SP. */ ++ ++attribute_hidden void ++splay_tree_foreach (splay_tree sp, splay_tree_callback func, void *data) ++{ ++ splay_tree_foreach_internal (sp->root, func, data); ++} +--- libgomp/libgomp-plugin.c.jj 2016-07-13 16:57:04.435535360 +0200 ++++ libgomp/libgomp-plugin.c 2016-07-13 16:57:04.435535360 +0200 +@@ -0,0 +1,80 @@ ++/* Copyright (C) 2014-2016 Free Software Foundation, Inc. ++ ++ Contributed by Mentor Embedded. ++ ++ This file is part of the GNU Offloading and Multi Processing Library ++ (libgomp). ++ ++ Libgomp is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY ++ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS ++ FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ . */ ++ ++/* Exported (non-hidden) functions exposing libgomp interface for plugins. */ ++ ++#include ++ ++#include "libgomp.h" ++#include "libgomp-plugin.h" ++ ++void * ++GOMP_PLUGIN_malloc (size_t size) ++{ ++ return gomp_malloc (size); ++} ++ ++void * ++GOMP_PLUGIN_malloc_cleared (size_t size) ++{ ++ return gomp_malloc_cleared (size); ++} ++ ++void * ++GOMP_PLUGIN_realloc (void *ptr, size_t size) ++{ ++ return gomp_realloc (ptr, size); ++} ++ ++void ++GOMP_PLUGIN_debug (int kind, const char *msg, ...) ++{ ++ va_list ap; ++ ++ va_start (ap, msg); ++ gomp_vdebug (kind, msg, ap); ++ va_end (ap); ++} ++ ++void ++GOMP_PLUGIN_error (const char *msg, ...) ++{ ++ va_list ap; ++ ++ va_start (ap, msg); ++ gomp_verror (msg, ap); ++ va_end (ap); ++} ++ ++void ++GOMP_PLUGIN_fatal (const char *msg, ...) ++{ ++ va_list ap; ++ ++ va_start (ap, msg); ++ gomp_vfatal (msg, ap); ++ va_end (ap); ++} +--- libgomp/libgomp-plugin.h.jj 2016-07-13 16:57:04.438535323 +0200 ++++ libgomp/libgomp-plugin.h 2016-07-13 16:57:04.438535323 +0200 +@@ -0,0 +1,80 @@ ++/* Copyright (C) 2014-2016 Free Software Foundation, Inc. ++ ++ Contributed by Mentor Embedded. ++ ++ This file is part of the GNU Offloading and Multi Processing Library ++ (libgomp). ++ ++ Libgomp is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY ++ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS ++ FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ . */ ++ ++/* An interface to various libgomp-internal functions for use by plugins. */ ++ ++#ifndef LIBGOMP_PLUGIN_H ++#define LIBGOMP_PLUGIN_H 1 ++ ++#include ++#include ++ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++/* Capabilities of offloading devices. */ ++#define GOMP_OFFLOAD_CAP_SHARED_MEM (1 << 0) ++#define GOMP_OFFLOAD_CAP_NATIVE_EXEC (1 << 1) ++#define GOMP_OFFLOAD_CAP_OPENMP_400 (1 << 2) ++#define GOMP_OFFLOAD_CAP_OPENACC_200 (1 << 3) ++ ++/* Type of offload target device. Keep in sync with include/gomp-constants.h. */ ++enum offload_target_type ++{ ++ OFFLOAD_TARGET_TYPE_HOST = 2, ++ /* OFFLOAD_TARGET_TYPE_HOST_NONSHM = 3 removed. */ ++ OFFLOAD_TARGET_TYPE_NVIDIA_PTX = 5, ++ OFFLOAD_TARGET_TYPE_INTEL_MIC = 6, ++ OFFLOAD_TARGET_TYPE_HSA = 7 ++}; ++ ++/* Auxiliary struct, used for transferring pairs of addresses from plugin ++ to libgomp. */ ++struct addr_pair ++{ ++ uintptr_t start; ++ uintptr_t end; ++}; ++ ++/* Miscellaneous functions. */ ++extern void *GOMP_PLUGIN_malloc (size_t) __attribute__ ((malloc)); ++extern void *GOMP_PLUGIN_malloc_cleared (size_t) __attribute__ ((malloc)); ++extern void *GOMP_PLUGIN_realloc (void *, size_t); ++void GOMP_PLUGIN_target_task_completion (void *); ++ ++extern void GOMP_PLUGIN_debug (int, const char *, ...) ++ __attribute__ ((format (printf, 2, 3))); ++extern void GOMP_PLUGIN_error (const char *, ...) ++ __attribute__ ((format (printf, 1, 2))); ++extern void GOMP_PLUGIN_fatal (const char *, ...) ++ __attribute__ ((noreturn, format (printf, 1, 2))); ++ ++#ifdef __cplusplus ++} ++#endif ++ ++#endif +--- libgomp/oacc-async.c.jj 2016-07-13 16:57:13.488423109 +0200 ++++ libgomp/oacc-async.c 2016-07-13 16:57:13.488423109 +0200 +@@ -0,0 +1,107 @@ ++/* OpenACC Runtime Library Definitions. ++ ++ Copyright (C) 2013-2016 Free Software Foundation, Inc. ++ ++ Contributed by Mentor Embedded. ++ ++ This file is part of the GNU Offloading and Multi Processing Library ++ (libgomp). ++ ++ Libgomp is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY ++ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS ++ FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ . */ ++ ++#include ++#include "openacc.h" ++#include "libgomp.h" ++#include "oacc-int.h" ++ ++int ++acc_async_test (int async) ++{ ++ if (async < acc_async_sync) ++ gomp_fatal ("invalid async argument: %d", async); ++ ++ struct goacc_thread *thr = goacc_thread (); ++ ++ if (!thr || !thr->dev) ++ gomp_fatal ("no device active"); ++ ++ return thr->dev->openacc.async_test_func (async); ++} ++ ++int ++acc_async_test_all (void) ++{ ++ struct goacc_thread *thr = goacc_thread (); ++ ++ if (!thr || !thr->dev) ++ gomp_fatal ("no device active"); ++ ++ return thr->dev->openacc.async_test_all_func (); ++} ++ ++void ++acc_wait (int async) ++{ ++ if (async < acc_async_sync) ++ gomp_fatal ("invalid async argument: %d", async); ++ ++ struct goacc_thread *thr = goacc_thread (); ++ ++ if (!thr || !thr->dev) ++ gomp_fatal ("no device active"); ++ ++ thr->dev->openacc.async_wait_func (async); ++} ++ ++void ++acc_wait_async (int async1, int async2) ++{ ++ struct goacc_thread *thr = goacc_thread (); ++ ++ if (!thr || !thr->dev) ++ gomp_fatal ("no device active"); ++ ++ thr->dev->openacc.async_wait_async_func (async1, async2); ++} ++ ++void ++acc_wait_all (void) ++{ ++ struct goacc_thread *thr = goacc_thread (); ++ ++ if (!thr || !thr->dev) ++ gomp_fatal ("no device active"); ++ ++ thr->dev->openacc.async_wait_all_func (); ++} ++ ++void ++acc_wait_all_async (int async) ++{ ++ if (async < acc_async_sync) ++ gomp_fatal ("invalid async argument: %d", async); ++ ++ struct goacc_thread *thr = goacc_thread (); ++ ++ if (!thr || !thr->dev) ++ gomp_fatal ("no device active"); ++ ++ thr->dev->openacc.async_wait_all_async_func (async); ++} +--- libgomp/splay-tree.h.jj 2016-07-13 16:57:18.934355582 +0200 ++++ libgomp/splay-tree.h 2016-07-13 16:57:18.934355582 +0200 +@@ -0,0 +1,130 @@ ++/* A splay-tree datatype. ++ Copyright (C) 1998-2016 Free Software Foundation, Inc. ++ Contributed by Mark Mitchell (mark@markmitchell.com). ++ ++ This file is part of the GNU Offloading and Multi Processing Library ++ (libgomp). ++ ++ Libgomp is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY ++ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS ++ FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ . */ ++ ++/* The splay tree code copied from include/splay-tree.h and adjusted, ++ so that all the data lives directly in splay_tree_node_s structure ++ and no extra allocations are needed. ++ ++ Files including this header should before including it add: ++typedef struct splay_tree_node_s *splay_tree_node; ++typedef struct splay_tree_s *splay_tree; ++typedef struct splay_tree_key_s *splay_tree_key; ++ define splay_tree_key_s structure, and define ++ splay_compare inline function. ++ ++ Alternatively, they can define splay_tree_prefix macro before ++ including this header and then all the above types, the ++ splay_compare function and the splay_tree_{lookup,insert_remove} ++ function will be prefixed by that prefix. If splay_tree_prefix ++ macro is defined, this header must be included twice: once where ++ you need the header file definitions, and once where you need the ++ .c implementation routines. In the latter case, you must also ++ define the macro splay_tree_c. See the include of splay-tree.h in ++ priority_queue.[hc] for an example. */ ++ ++/* For an easily readable description of splay-trees, see: ++ ++ Lewis, Harry R. and Denenberg, Larry. Data Structures and Their ++ Algorithms. Harper-Collins, Inc. 1991. ++ ++ The major feature of splay trees is that all basic tree operations ++ are amortized O(log n) time for a tree with n nodes. */ ++ ++#ifdef splay_tree_prefix ++# define splay_tree_name_1(prefix, name) prefix ## _ ## name ++# define splay_tree_name(prefix, name) splay_tree_name_1 (prefix, name) ++# define splay_tree_node_s \ ++ splay_tree_name (splay_tree_prefix, splay_tree_node_s) ++# define splay_tree_s \ ++ splay_tree_name (splay_tree_prefix, splay_tree_s) ++# define splay_tree_key_s \ ++ splay_tree_name (splay_tree_prefix, splay_tree_key_s) ++# define splay_tree_node \ ++ splay_tree_name (splay_tree_prefix, splay_tree_node) ++# define splay_tree \ ++ splay_tree_name (splay_tree_prefix, splay_tree) ++# define splay_tree_key \ ++ splay_tree_name (splay_tree_prefix, splay_tree_key) ++# define splay_compare \ ++ splay_tree_name (splay_tree_prefix, splay_compare) ++# define splay_tree_lookup \ ++ splay_tree_name (splay_tree_prefix, splay_tree_lookup) ++# define splay_tree_insert \ ++ splay_tree_name (splay_tree_prefix, splay_tree_insert) ++# define splay_tree_remove \ ++ splay_tree_name (splay_tree_prefix, splay_tree_remove) ++# define splay_tree_foreach \ ++ splay_tree_name (splay_tree_prefix, splay_tree_foreach) ++# define splay_tree_callback \ ++ splay_tree_name (splay_tree_prefix, splay_tree_callback) ++#endif ++ ++#ifndef splay_tree_c ++/* Header file definitions and prototypes. */ ++ ++/* The nodes in the splay tree. */ ++struct splay_tree_node_s { ++ struct splay_tree_key_s key; ++ /* The left and right children, respectively. */ ++ splay_tree_node left; ++ splay_tree_node right; ++}; ++ ++/* The splay tree. */ ++struct splay_tree_s { ++ splay_tree_node root; ++}; ++ ++typedef void (*splay_tree_callback) (splay_tree_key, void *); ++ ++extern splay_tree_key splay_tree_lookup (splay_tree, splay_tree_key); ++extern void splay_tree_insert (splay_tree, splay_tree_node); ++extern void splay_tree_remove (splay_tree, splay_tree_key); ++extern void splay_tree_foreach (splay_tree, splay_tree_callback, void *); ++#else /* splay_tree_c */ ++# ifdef splay_tree_prefix ++# include "splay-tree.c" ++# undef splay_tree_name_1 ++# undef splay_tree_name ++# undef splay_tree_node_s ++# undef splay_tree_s ++# undef splay_tree_key_s ++# undef splay_tree_node ++# undef splay_tree ++# undef splay_tree_key ++# undef splay_compare ++# undef splay_tree_lookup ++# undef splay_tree_insert ++# undef splay_tree_remove ++# undef splay_tree_foreach ++# undef splay_tree_callback ++# undef splay_tree_c ++# endif ++#endif /* #ifndef splay_tree_c */ ++ ++#ifdef splay_tree_prefix ++# undef splay_tree_prefix ++#endif +--- libgomp/oacc-plugin.c.jj 2016-07-13 16:57:13.481423196 +0200 ++++ libgomp/oacc-plugin.c 2016-07-14 15:40:21.653151873 +0200 +@@ -0,0 +1,44 @@ ++/* Copyright (C) 2014-2016 Free Software Foundation, Inc. ++ ++ Contributed by Mentor Embedded. ++ ++ This file is part of the GNU Offloading and Multi Processing Library ++ (libgomp). ++ ++ Libgomp is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY ++ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS ++ FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ . */ ++ ++/* Initialize and register OpenACC dispatch table from libgomp plugin. */ ++ ++#include "libgomp.h" ++#include "oacc-plugin.h" ++#include "oacc-int.h" ++ ++void ++GOMP_PLUGIN_async_unmap_vars (void *ptr, int async) ++{ ++} ++ ++/* Return the target-specific part of the TLS data for the current thread. */ ++ ++void * ++GOMP_PLUGIN_acc_thread (void) ++{ ++ return NULL; ++} +--- libgomp/oacc-init.c.jj 2016-07-13 16:57:04.423535509 +0200 ++++ libgomp/oacc-init.c 2016-07-14 19:06:41.679575688 +0200 +@@ -0,0 +1,640 @@ ++/* OpenACC Runtime initialization routines ++ ++ Copyright (C) 2013-2016 Free Software Foundation, Inc. ++ ++ Contributed by Mentor Embedded. ++ ++ This file is part of the GNU Offloading and Multi Processing Library ++ (libgomp). ++ ++ Libgomp is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY ++ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS ++ FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ . */ ++ ++#include "libgomp.h" ++#include "oacc-int.h" ++#include "openacc.h" ++#include ++#include ++#include ++#include ++#include ++ ++/* This lock is used to protect access to cached_base_dev, dispatchers and ++ the (abstract) initialisation state of attached offloading devices. */ ++ ++static gomp_mutex_t acc_device_lock; ++ ++/* A cached version of the dispatcher for the global "current" accelerator type, ++ e.g. used as the default when creating new host threads. This is the ++ device-type equivalent of goacc_device_num (which specifies which device to ++ use out of potentially several of the same type). If there are several ++ devices of a given type, this points at the first one. */ ++ ++static struct gomp_device_descr *cached_base_dev = NULL; ++ ++#if defined HAVE_TLS || defined USE_EMUTLS ++__thread struct goacc_thread *goacc_tls_data; ++#else ++pthread_key_t goacc_tls_key; ++#endif ++static pthread_key_t goacc_cleanup_key; ++ ++static struct goacc_thread *goacc_threads; ++static gomp_mutex_t goacc_thread_lock; ++ ++/* An array of dispatchers for device types, indexed by the type. This array ++ only references "base" devices, and other instances of the same type are ++ found by simply indexing from each such device (which are stored linearly, ++ grouped by device in target.c:devices). */ ++static struct gomp_device_descr *dispatchers[_ACC_device_hwm] = { 0 }; ++ ++attribute_hidden void ++goacc_register (struct gomp_device_descr *disp) ++{ ++ /* Only register the 0th device here. */ ++ if (disp->target_id != 0) ++ return; ++ ++ gomp_mutex_lock (&acc_device_lock); ++ ++ assert (acc_device_type (disp->type) != acc_device_none ++ && acc_device_type (disp->type) != acc_device_default ++ && acc_device_type (disp->type) != acc_device_not_host); ++ assert (!dispatchers[disp->type]); ++ dispatchers[disp->type] = disp; ++ ++ gomp_mutex_unlock (&acc_device_lock); ++} ++ ++static const char * ++name_of_acc_device_t (enum acc_device_t type) ++{ ++ switch (type) ++ { ++ case acc_device_none: return "none"; ++ case acc_device_default: return "default"; ++ case acc_device_host: return "host"; ++ case acc_device_not_host: return "not_host"; ++ case acc_device_nvidia: return "nvidia"; ++ default: gomp_fatal ("unknown device type %u", (unsigned) type); ++ } ++} ++ ++/* ACC_DEVICE_LOCK must be held before calling this function. If FAIL_IS_ERROR ++ is true, this function raises an error if there are no devices of type D, ++ otherwise it returns NULL in that case. */ ++ ++static struct gomp_device_descr * ++resolve_device (acc_device_t d, bool fail_is_error) ++{ ++ acc_device_t d_arg = d; ++ ++ switch (d) ++ { ++ case acc_device_default: ++ { ++ if (goacc_device_type) ++ { ++ /* Lookup the named device. */ ++ if (!strcasecmp (goacc_device_type, "host")) ++ { ++ d = acc_device_host; ++ goto found; ++ } ++ ++ if (fail_is_error) ++ { ++ gomp_mutex_unlock (&acc_device_lock); ++ gomp_fatal ("device type %s not supported", goacc_device_type); ++ } ++ else ++ return NULL; ++ } ++ ++ /* No default device specified, so start scanning for any non-host ++ device that is available. */ ++ d = acc_device_not_host; ++ } ++ /* FALLTHROUGH */ ++ ++ case acc_device_not_host: ++ if (d_arg == acc_device_default) ++ { ++ d = acc_device_host; ++ goto found; ++ } ++ if (fail_is_error) ++ { ++ gomp_mutex_unlock (&acc_device_lock); ++ gomp_fatal ("no device found"); ++ } ++ else ++ return NULL; ++ break; ++ ++ case acc_device_host: ++ break; ++ ++ default: ++ if (d > _ACC_device_hwm) ++ { ++ if (fail_is_error) ++ goto unsupported_device; ++ else ++ return NULL; ++ } ++ break; ++ } ++ found: ++ ++ assert (d != acc_device_none ++ && d != acc_device_default ++ && d != acc_device_not_host); ++ ++ if (dispatchers[d] == NULL && fail_is_error) ++ { ++ unsupported_device: ++ gomp_mutex_unlock (&acc_device_lock); ++ gomp_fatal ("device type %s not supported", name_of_acc_device_t (d)); ++ } ++ ++ return dispatchers[d]; ++} ++ ++/* Emit a suitable error if no device of a particular type is available, or ++ the given device number is out-of-range. */ ++static void ++acc_dev_num_out_of_range (acc_device_t d, int ord, int ndevs) ++{ ++ if (ndevs == 0) ++ gomp_fatal ("no devices of type %s available", name_of_acc_device_t (d)); ++ else ++ gomp_fatal ("device %u out of range", ord); ++} ++ ++/* This is called when plugins have been initialized, and serves to call ++ (indirectly) the target's device_init hook. Calling multiple times without ++ an intervening acc_shutdown_1 call is an error. ACC_DEVICE_LOCK must be ++ held before calling this function. */ ++ ++static struct gomp_device_descr * ++acc_init_1 (acc_device_t d) ++{ ++ struct gomp_device_descr *base_dev, *acc_dev; ++ int ndevs; ++ ++ base_dev = resolve_device (d, true); ++ ++ ndevs = base_dev->get_num_devices_func (); ++ ++ if (ndevs <= 0 || goacc_device_num >= ndevs) ++ acc_dev_num_out_of_range (d, goacc_device_num, ndevs); ++ ++ acc_dev = &base_dev[goacc_device_num]; ++ ++ gomp_mutex_lock (&acc_dev->lock); ++ if (acc_dev->state == GOMP_DEVICE_INITIALIZED) ++ { ++ gomp_mutex_unlock (&acc_dev->lock); ++ gomp_fatal ("device already active"); ++ } ++ ++ gomp_init_device (acc_dev); ++ gomp_mutex_unlock (&acc_dev->lock); ++ ++ return base_dev; ++} ++ ++/* ACC_DEVICE_LOCK must be held before calling this function. */ ++ ++static void ++acc_shutdown_1 (acc_device_t d) ++{ ++ struct gomp_device_descr *base_dev; ++ struct goacc_thread *walk; ++ int ndevs, i; ++ bool devices_active = false; ++ ++ /* Get the base device for this device type. */ ++ base_dev = resolve_device (d, true); ++ ++ ndevs = base_dev->get_num_devices_func (); ++ ++ gomp_mutex_lock (&goacc_thread_lock); ++ ++ /* Free target-specific TLS data and close all devices. */ ++ for (walk = goacc_threads; walk != NULL; walk = walk->next) ++ { ++ if (walk->target_tls) ++ base_dev->openacc.destroy_thread_data_func (walk->target_tls); ++ ++ walk->target_tls = NULL; ++ ++ /* Similarly, if this happens then user code has done something weird. */ ++ if (walk->saved_bound_dev) ++ { ++ gomp_mutex_unlock (&goacc_thread_lock); ++ gomp_fatal ("shutdown during host fallback"); ++ } ++ ++ if (walk->dev) ++ { ++ gomp_mutex_lock (&walk->dev->lock); ++ gomp_free_memmap (&walk->dev->mem_map); ++ gomp_mutex_unlock (&walk->dev->lock); ++ ++ walk->dev = NULL; ++ walk->base_dev = NULL; ++ } ++ } ++ ++ gomp_mutex_unlock (&goacc_thread_lock); ++ ++ /* Close all the devices of this type that have been opened. */ ++ bool ret = true; ++ for (i = 0; i < ndevs; i++) ++ { ++ struct gomp_device_descr *acc_dev = &base_dev[i]; ++ gomp_mutex_lock (&acc_dev->lock); ++ if (acc_dev->state == GOMP_DEVICE_INITIALIZED) ++ { ++ devices_active = true; ++ ret &= acc_dev->fini_device_func (acc_dev->target_id); ++ acc_dev->state = GOMP_DEVICE_UNINITIALIZED; ++ } ++ gomp_mutex_unlock (&acc_dev->lock); ++ } ++ ++ if (!ret) ++ gomp_fatal ("device finalization failed"); ++ ++ if (!devices_active) ++ gomp_fatal ("no device initialized"); ++} ++ ++static struct goacc_thread * ++goacc_new_thread (void) ++{ ++ struct goacc_thread *thr = gomp_malloc (sizeof (struct gomp_thread)); ++ ++#if defined HAVE_TLS || defined USE_EMUTLS ++ goacc_tls_data = thr; ++#else ++ pthread_setspecific (goacc_tls_key, thr); ++#endif ++ ++ pthread_setspecific (goacc_cleanup_key, thr); ++ ++ gomp_mutex_lock (&goacc_thread_lock); ++ thr->next = goacc_threads; ++ goacc_threads = thr; ++ gomp_mutex_unlock (&goacc_thread_lock); ++ ++ return thr; ++} ++ ++static void ++goacc_destroy_thread (void *data) ++{ ++ struct goacc_thread *thr = data, *walk, *prev; ++ ++ gomp_mutex_lock (&goacc_thread_lock); ++ ++ if (thr) ++ { ++ struct gomp_device_descr *acc_dev = thr->dev; ++ ++ if (acc_dev && thr->target_tls) ++ { ++ acc_dev->openacc.destroy_thread_data_func (thr->target_tls); ++ thr->target_tls = NULL; ++ } ++ ++ assert (!thr->mapped_data); ++ ++ /* Remove from thread list. */ ++ for (prev = NULL, walk = goacc_threads; walk; ++ prev = walk, walk = walk->next) ++ if (walk == thr) ++ { ++ if (prev == NULL) ++ goacc_threads = walk->next; ++ else ++ prev->next = walk->next; ++ ++ free (thr); ++ ++ break; ++ } ++ ++ assert (walk); ++ } ++ ++ gomp_mutex_unlock (&goacc_thread_lock); ++} ++ ++/* Use the ORD'th device instance for the current host thread (or -1 for the ++ current global default). The device (and the runtime) must be initialised ++ before calling this function. */ ++ ++void ++goacc_attach_host_thread_to_device (int ord) ++{ ++ struct goacc_thread *thr = goacc_thread (); ++ struct gomp_device_descr *acc_dev = NULL, *base_dev = NULL; ++ int num_devices; ++ ++ if (thr && thr->dev && (thr->dev->target_id == ord || ord < 0)) ++ return; ++ ++ if (ord < 0) ++ ord = goacc_device_num; ++ ++ /* Decide which type of device to use. If the current thread has a device ++ type already (e.g. set by acc_set_device_type), use that, else use the ++ global default. */ ++ if (thr && thr->base_dev) ++ base_dev = thr->base_dev; ++ else ++ { ++ assert (cached_base_dev); ++ base_dev = cached_base_dev; ++ } ++ ++ num_devices = base_dev->get_num_devices_func (); ++ if (num_devices <= 0 || ord >= num_devices) ++ acc_dev_num_out_of_range (acc_device_type (base_dev->type), ord, ++ num_devices); ++ ++ if (!thr) ++ thr = goacc_new_thread (); ++ ++ thr->base_dev = base_dev; ++ thr->dev = acc_dev = &base_dev[ord]; ++ thr->saved_bound_dev = NULL; ++ ++ thr->target_tls ++ = acc_dev->openacc.create_thread_data_func (ord); ++ ++ acc_dev->openacc.async_set_async_func (acc_async_sync); ++} ++ ++/* OpenACC 2.0a (3.2.12, 3.2.13) doesn't specify whether the serialization of ++ init/shutdown is per-process or per-thread. We choose per-process. */ ++ ++void ++acc_init (acc_device_t d) ++{ ++ gomp_mutex_lock (&acc_device_lock); ++ ++ cached_base_dev = acc_init_1 (d); ++ ++ gomp_mutex_unlock (&acc_device_lock); ++ ++ goacc_attach_host_thread_to_device (-1); ++} ++ ++ialias (acc_init) ++ ++void ++acc_shutdown (acc_device_t d) ++{ ++ gomp_mutex_lock (&acc_device_lock); ++ ++ acc_shutdown_1 (d); ++ ++ gomp_mutex_unlock (&acc_device_lock); ++} ++ ++ialias (acc_shutdown) ++ ++int ++acc_get_num_devices (acc_device_t d) ++{ ++ int n = 0; ++ struct gomp_device_descr *acc_dev; ++ ++ if (d == acc_device_none) ++ return 0; ++ ++ gomp_mutex_lock (&acc_device_lock); ++ acc_dev = resolve_device (d, false); ++ gomp_mutex_unlock (&acc_device_lock); ++ ++ if (!acc_dev) ++ return 0; ++ ++ n = acc_dev->get_num_devices_func (); ++ if (n < 0) ++ n = 0; ++ ++ return n; ++} ++ ++ialias (acc_get_num_devices) ++ ++/* Set the device type for the current thread only (using the current global ++ default device number), initialising that device if necessary. Also set the ++ default device type for new threads to D. */ ++ ++void ++acc_set_device_type (acc_device_t d) ++{ ++ struct gomp_device_descr *base_dev, *acc_dev; ++ struct goacc_thread *thr = goacc_thread (); ++ ++ gomp_mutex_lock (&acc_device_lock); ++ ++ cached_base_dev = base_dev = resolve_device (d, true); ++ acc_dev = &base_dev[goacc_device_num]; ++ ++ gomp_mutex_lock (&acc_dev->lock); ++ if (acc_dev->state == GOMP_DEVICE_UNINITIALIZED) ++ gomp_init_device (acc_dev); ++ gomp_mutex_unlock (&acc_dev->lock); ++ ++ gomp_mutex_unlock (&acc_device_lock); ++ ++ /* We're changing device type: invalidate the current thread's dev and ++ base_dev pointers. */ ++ if (thr && thr->base_dev != base_dev) ++ { ++ thr->base_dev = thr->dev = NULL; ++ } ++ ++ goacc_attach_host_thread_to_device (-1); ++} ++ ++ialias (acc_set_device_type) ++ ++acc_device_t ++acc_get_device_type (void) ++{ ++ acc_device_t res = acc_device_none; ++ struct gomp_device_descr *dev; ++ struct goacc_thread *thr = goacc_thread (); ++ ++ if (thr && thr->base_dev) ++ res = acc_device_type (thr->base_dev->type); ++ else ++ { ++ gomp_mutex_lock (&acc_device_lock); ++ dev = resolve_device (acc_device_default, true); ++ gomp_mutex_unlock (&acc_device_lock); ++ res = acc_device_type (dev->type); ++ } ++ ++ assert (res != acc_device_default ++ && res != acc_device_not_host); ++ ++ return res; ++} ++ ++ialias (acc_get_device_type) ++ ++int ++acc_get_device_num (acc_device_t d) ++{ ++ const struct gomp_device_descr *dev; ++ struct goacc_thread *thr = goacc_thread (); ++ ++ if (d >= _ACC_device_hwm) ++ gomp_fatal ("unknown device type %u", (unsigned) d); ++ ++ gomp_mutex_lock (&acc_device_lock); ++ dev = resolve_device (d, true); ++ gomp_mutex_unlock (&acc_device_lock); ++ ++ if (thr && thr->base_dev == dev && thr->dev) ++ return thr->dev->target_id; ++ ++ return goacc_device_num; ++} ++ ++ialias (acc_get_device_num) ++ ++void ++acc_set_device_num (int ord, acc_device_t d) ++{ ++ struct gomp_device_descr *base_dev, *acc_dev; ++ int num_devices; ++ ++ if (ord < 0) ++ ord = goacc_device_num; ++ ++ if ((int) d == 0) ++ /* Set whatever device is being used by the current host thread to use ++ device instance ORD. It's unclear if this is supposed to affect other ++ host threads too (OpenACC 2.0 (3.2.4) acc_set_device_num). */ ++ goacc_attach_host_thread_to_device (ord); ++ else ++ { ++ gomp_mutex_lock (&acc_device_lock); ++ ++ cached_base_dev = base_dev = resolve_device (d, true); ++ ++ num_devices = base_dev->get_num_devices_func (); ++ ++ if (num_devices <= 0 || ord >= num_devices) ++ acc_dev_num_out_of_range (d, ord, num_devices); ++ ++ acc_dev = &base_dev[ord]; ++ ++ gomp_mutex_lock (&acc_dev->lock); ++ if (acc_dev->state == GOMP_DEVICE_UNINITIALIZED) ++ gomp_init_device (acc_dev); ++ gomp_mutex_unlock (&acc_dev->lock); ++ ++ gomp_mutex_unlock (&acc_device_lock); ++ ++ goacc_attach_host_thread_to_device (ord); ++ } ++ ++ goacc_device_num = ord; ++} ++ ++ialias (acc_set_device_num) ++ ++int ++acc_on_device (acc_device_t dev) ++{ ++ return dev == acc_device_host || dev == acc_device_none; ++} ++ ++ialias (acc_on_device) ++ ++attribute_hidden void ++goacc_runtime_initialize (void) ++{ ++ gomp_mutex_init (&acc_device_lock); ++ ++#if !(defined HAVE_TLS || defined USE_EMUTLS) ++ pthread_key_create (&goacc_tls_key, NULL); ++#endif ++ ++ pthread_key_create (&goacc_cleanup_key, goacc_destroy_thread); ++ ++ cached_base_dev = NULL; ++ ++ goacc_threads = NULL; ++ gomp_mutex_init (&goacc_thread_lock); ++ ++ /* Initialize and register the 'host' device type. */ ++ goacc_host_init (); ++} ++ ++/* Compiler helper functions */ ++ ++attribute_hidden void ++goacc_save_and_set_bind (acc_device_t d) ++{ ++ struct goacc_thread *thr = goacc_thread (); ++ ++ assert (!thr->saved_bound_dev); ++ ++ thr->saved_bound_dev = thr->dev; ++ thr->dev = dispatchers[d]; ++} ++ ++attribute_hidden void ++goacc_restore_bind (void) ++{ ++ struct goacc_thread *thr = goacc_thread (); ++ ++ thr->dev = thr->saved_bound_dev; ++ thr->saved_bound_dev = NULL; ++} ++ ++/* This is called from any OpenACC support function that may need to implicitly ++ initialize the libgomp runtime, either globally or from a new host thread. ++ On exit "goacc_thread" will return a valid & populated thread block. */ ++ ++attribute_hidden void ++goacc_lazy_initialize (void) ++{ ++ struct goacc_thread *thr = goacc_thread (); ++ ++ if (thr && thr->dev) ++ return; ++ ++ if (!cached_base_dev) ++ acc_init (acc_device_default); ++ else ++ goacc_attach_host_thread_to_device (-1); ++} +--- libgomp/oacc-int.h.jj 2016-07-13 16:57:04.400535794 +0200 ++++ libgomp/oacc-int.h 2016-07-13 16:57:04.400535794 +0200 +@@ -0,0 +1,106 @@ ++/* OpenACC Runtime - internal declarations ++ ++ Copyright (C) 2013-2016 Free Software Foundation, Inc. ++ ++ Contributed by Mentor Embedded. ++ ++ This file is part of the GNU Offloading and Multi Processing Library ++ (libgomp). ++ ++ Libgomp is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY ++ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS ++ FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ . */ ++ ++/* This file contains data types and function declarations that are not ++ part of the official OpenACC user interface. There are declarations ++ in here that are part of the GNU OpenACC ABI, in that the compiler is ++ required to know about them and use them. ++ ++ The convention is that the all caps prefix "GOACC" is used group items ++ that are part of the external ABI, and the lower case prefix "goacc" ++ is used group items that are completely private to the library. */ ++ ++#ifndef OACC_INT_H ++#define OACC_INT_H 1 ++ ++#include "openacc.h" ++#include "config.h" ++#include ++#include ++#include ++ ++#ifdef HAVE_ATTRIBUTE_VISIBILITY ++# pragma GCC visibility push(hidden) ++#endif ++ ++static inline enum acc_device_t ++acc_device_type (enum offload_target_type type) ++{ ++ return (enum acc_device_t) type; ++} ++ ++struct goacc_thread ++{ ++ /* The base device for the current thread. */ ++ struct gomp_device_descr *base_dev; ++ ++ /* The device for the current thread. */ ++ struct gomp_device_descr *dev; ++ ++ struct gomp_device_descr *saved_bound_dev; ++ ++ /* This is a linked list of data mapped by the "acc data" pragma, following ++ strictly push/pop semantics according to lexical scope. */ ++ struct target_mem_desc *mapped_data; ++ ++ /* These structures form a list: this is the next thread in that list. */ ++ struct goacc_thread *next; ++ ++ /* Target-specific data (used by plugin). */ ++ void *target_tls; ++}; ++ ++#if defined HAVE_TLS || defined USE_EMUTLS ++extern __thread struct goacc_thread *goacc_tls_data; ++static inline struct goacc_thread * ++goacc_thread (void) ++{ ++ return goacc_tls_data; ++} ++#else ++extern pthread_key_t goacc_tls_key; ++static inline struct goacc_thread * ++goacc_thread (void) ++{ ++ return pthread_getspecific (goacc_tls_key); ++} ++#endif ++ ++void goacc_register (struct gomp_device_descr *) __GOACC_NOTHROW; ++void goacc_attach_host_thread_to_device (int); ++void goacc_runtime_initialize (void); ++void goacc_save_and_set_bind (acc_device_t); ++void goacc_restore_bind (void); ++void goacc_lazy_initialize (void); ++void goacc_host_init (void); ++ ++#ifdef HAVE_ATTRIBUTE_VISIBILITY ++# pragma GCC visibility pop ++#endif ++ ++#endif +--- libgomp/oacc-host.c.jj 2016-07-13 16:57:13.489423096 +0200 ++++ libgomp/oacc-host.c 2016-07-13 16:57:13.489423096 +0200 +@@ -0,0 +1,266 @@ ++/* OpenACC Runtime Library: acc_device_host. ++ ++ Copyright (C) 2013-2016 Free Software Foundation, Inc. ++ ++ Contributed by Mentor Embedded. ++ ++ This file is part of the GNU Offloading and Multi Processing Library ++ (libgomp). ++ ++ Libgomp is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY ++ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS ++ FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ . */ ++ ++#include "libgomp.h" ++#include "oacc-int.h" ++#include "gomp-constants.h" ++ ++#include ++#include ++#include ++ ++static struct gomp_device_descr host_dispatch; ++ ++static const char * ++host_get_name (void) ++{ ++ return host_dispatch.name; ++} ++ ++static unsigned int ++host_get_caps (void) ++{ ++ return host_dispatch.capabilities; ++} ++ ++static int ++host_get_type (void) ++{ ++ return host_dispatch.type; ++} ++ ++static int ++host_get_num_devices (void) ++{ ++ return 1; ++} ++ ++static bool ++host_init_device (int n __attribute__ ((unused))) ++{ ++ return true; ++} ++ ++static bool ++host_fini_device (int n __attribute__ ((unused))) ++{ ++ return true; ++} ++ ++static unsigned ++host_version (void) ++{ ++ return GOMP_VERSION; ++} ++ ++static int ++host_load_image (int n __attribute__ ((unused)), ++ unsigned v __attribute__ ((unused)), ++ const void *t __attribute__ ((unused)), ++ struct addr_pair **r __attribute__ ((unused))) ++{ ++ return 0; ++} ++ ++static bool ++host_unload_image (int n __attribute__ ((unused)), ++ unsigned v __attribute__ ((unused)), ++ const void *t __attribute__ ((unused))) ++{ ++ return true; ++} ++ ++static void * ++host_alloc (int n __attribute__ ((unused)), size_t s) ++{ ++ return gomp_malloc (s); ++} ++ ++static bool ++host_free (int n __attribute__ ((unused)), void *p) ++{ ++ free (p); ++ return true; ++} ++ ++static bool ++host_dev2host (int n __attribute__ ((unused)), ++ void *h __attribute__ ((unused)), ++ const void *d __attribute__ ((unused)), ++ size_t s __attribute__ ((unused))) ++{ ++ return true; ++} ++ ++static bool ++host_host2dev (int n __attribute__ ((unused)), ++ void *d __attribute__ ((unused)), ++ const void *h __attribute__ ((unused)), ++ size_t s __attribute__ ((unused))) ++{ ++ return true; ++} ++ ++static void ++host_run (int n __attribute__ ((unused)), void *fn_ptr, void *vars, ++ void **args __attribute__((unused))) ++{ ++ void (*fn)(void *) = (void (*)(void *)) fn_ptr; ++ ++ fn (vars); ++} ++ ++static void ++host_openacc_exec (void (*fn) (void *), ++ size_t mapnum __attribute__ ((unused)), ++ void **hostaddrs, ++ void **devaddrs __attribute__ ((unused)), ++ int async __attribute__ ((unused)), ++ unsigned *dims __attribute ((unused)), ++ void *targ_mem_desc __attribute__ ((unused))) ++{ ++ fn (hostaddrs); ++} ++ ++static void ++host_openacc_register_async_cleanup (void *targ_mem_desc __attribute__ ((unused)), ++ int async __attribute__ ((unused))) ++{ ++} ++ ++static int ++host_openacc_async_test (int async __attribute__ ((unused))) ++{ ++ return 1; ++} ++ ++static int ++host_openacc_async_test_all (void) ++{ ++ return 1; ++} ++ ++static void ++host_openacc_async_wait (int async __attribute__ ((unused))) ++{ ++} ++ ++static void ++host_openacc_async_wait_async (int async1 __attribute__ ((unused)), ++ int async2 __attribute__ ((unused))) ++{ ++} ++ ++static void ++host_openacc_async_wait_all (void) ++{ ++} ++ ++static void ++host_openacc_async_wait_all_async (int async __attribute__ ((unused))) ++{ ++} ++ ++static void ++host_openacc_async_set_async (int async __attribute__ ((unused))) ++{ ++} ++ ++static void * ++host_openacc_create_thread_data (int ord __attribute__ ((unused))) ++{ ++ return NULL; ++} ++ ++static void ++host_openacc_destroy_thread_data (void *tls_data __attribute__ ((unused))) ++{ ++} ++ ++static struct gomp_device_descr host_dispatch = ++ { ++ .name = "host", ++ .capabilities = (GOMP_OFFLOAD_CAP_SHARED_MEM ++ | GOMP_OFFLOAD_CAP_NATIVE_EXEC ++ | GOMP_OFFLOAD_CAP_OPENACC_200), ++ .target_id = 0, ++ .type = OFFLOAD_TARGET_TYPE_HOST, ++ ++ .get_name_func = host_get_name, ++ .get_caps_func = host_get_caps, ++ .get_type_func = host_get_type, ++ .get_num_devices_func = host_get_num_devices, ++ .init_device_func = host_init_device, ++ .fini_device_func = host_fini_device, ++ .version_func = host_version, ++ .load_image_func = host_load_image, ++ .unload_image_func = host_unload_image, ++ .alloc_func = host_alloc, ++ .free_func = host_free, ++ .dev2host_func = host_dev2host, ++ .host2dev_func = host_host2dev, ++ .run_func = host_run, ++ ++ .mem_map = { NULL }, ++ /* .lock initilized in goacc_host_init. */ ++ .state = GOMP_DEVICE_UNINITIALIZED, ++ ++ .openacc = { ++ .data_environ = NULL, ++ ++ .exec_func = host_openacc_exec, ++ ++ .register_async_cleanup_func = host_openacc_register_async_cleanup, ++ ++ .async_test_func = host_openacc_async_test, ++ .async_test_all_func = host_openacc_async_test_all, ++ .async_wait_func = host_openacc_async_wait, ++ .async_wait_async_func = host_openacc_async_wait_async, ++ .async_wait_all_func = host_openacc_async_wait_all, ++ .async_wait_all_async_func = host_openacc_async_wait_all_async, ++ .async_set_async_func = host_openacc_async_set_async, ++ ++ .create_thread_data_func = host_openacc_create_thread_data, ++ .destroy_thread_data_func = host_openacc_destroy_thread_data, ++ ++ .cuda = { ++ .get_current_device_func = NULL, ++ .get_current_context_func = NULL, ++ .get_stream_func = NULL, ++ .set_stream_func = NULL, ++ } ++ } ++ }; ++ ++/* Initialize and register this device type. */ ++void ++goacc_host_init (void) ++{ ++ gomp_mutex_init (&host_dispatch.lock); ++ goacc_register (&host_dispatch); ++} +--- libgomp/oacc-parallel.c.jj 2016-07-13 16:57:04.399535807 +0200 ++++ libgomp/oacc-parallel.c 2016-07-14 18:53:06.694996381 +0200 +@@ -0,0 +1,241 @@ ++/* Copyright (C) 2013-2016 Free Software Foundation, Inc. ++ ++ Contributed by Mentor Embedded. ++ ++ This file is part of the GNU Offloading and Multi Processing Library ++ (libgomp). ++ ++ Libgomp is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY ++ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS ++ FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ . */ ++ ++/* This file handles OpenACC constructs. */ ++ ++#include "openacc.h" ++#include "libgomp.h" ++#include "libgomp_g.h" ++#include "gomp-constants.h" ++#include "oacc-int.h" ++#ifdef HAVE_INTTYPES_H ++# include /* For PRIu64. */ ++#endif ++#include ++#include ++#include ++ ++static void goacc_wait (int async, int num_waits, va_list *ap); ++ ++ ++/* Launch a possibly offloaded function on DEVICE. FN is the host fn ++ address. MAPNUM, HOSTADDRS, SIZES & KINDS describe the memory ++ blocks to be copied to/from the device. Varadic arguments are ++ keyed optional parameters terminated with a zero. */ ++ ++void ++GOACC_parallel_keyed (int device, void (*fn) (void *), ++ size_t mapnum, void **hostaddrs, size_t *sizes, ++ unsigned short *kinds, ...) ++{ ++ bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK; ++ struct goacc_thread *thr; ++ struct gomp_device_descr *acc_dev; ++ ++#ifdef HAVE_INTTYPES_H ++ gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n", ++ __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds); ++#else ++ gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n", ++ __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds); ++#endif ++ goacc_lazy_initialize (); ++ ++ thr = goacc_thread (); ++ acc_dev = thr->dev; ++ ++ /* Host fallback if "if" clause is false or if the current device is set to ++ the host. */ ++ if (host_fallback) ++ { ++ goacc_save_and_set_bind (acc_device_host); ++ fn (hostaddrs); ++ goacc_restore_bind (); ++ return; ++ } ++ else if (acc_device_type (acc_dev->type) == acc_device_host) ++ { ++ fn (hostaddrs); ++ return; ++ } ++ ++ /* acc_device_host is the only supported device type. */ ++} ++ ++/* Legacy entry point, only provide host execution. */ ++ ++void ++GOACC_parallel (int device, void (*fn) (void *), ++ size_t mapnum, void **hostaddrs, size_t *sizes, ++ unsigned short *kinds, ++ int num_gangs, int num_workers, int vector_length, ++ int async, int num_waits, ...) ++{ ++ goacc_save_and_set_bind (acc_device_host); ++ fn (hostaddrs); ++ goacc_restore_bind (); ++} ++ ++void ++GOACC_data_start (int device, size_t mapnum, ++ void **hostaddrs, size_t *sizes, unsigned short *kinds) ++{ ++ goacc_lazy_initialize (); ++} ++ ++void ++GOACC_data_end (void) ++{ ++ gomp_debug (0, " %s: restore mappings\n", __FUNCTION__); ++ gomp_debug (0, " %s: mappings restored\n", __FUNCTION__); ++} ++ ++void ++GOACC_enter_exit_data (int device, size_t mapnum, ++ void **hostaddrs, size_t *sizes, unsigned short *kinds, ++ int async, int num_waits, ...) ++{ ++ goacc_lazy_initialize (); ++} ++ ++static void ++goacc_wait (int async, int num_waits, va_list *ap) ++{ ++ struct goacc_thread *thr = goacc_thread (); ++ struct gomp_device_descr *acc_dev = thr->dev; ++ ++ while (num_waits--) ++ { ++ int qid = va_arg (*ap, int); ++ ++ if (acc_async_test (qid)) ++ continue; ++ ++ if (async == acc_async_sync) ++ acc_wait (qid); ++ else if (qid == async) ++ ;/* If we're waiting on the same asynchronous queue as we're ++ launching on, the queue itself will order work as ++ required, so there's no need to wait explicitly. */ ++ else ++ acc_dev->openacc.async_wait_async_func (qid, async); ++ } ++} ++ ++void ++GOACC_update (int device, size_t mapnum, ++ void **hostaddrs, size_t *sizes, unsigned short *kinds, ++ int async, int num_waits, ...) ++{ ++ goacc_lazy_initialize (); ++} ++ ++void ++GOACC_wait (int async, int num_waits, ...) ++{ ++ if (num_waits) ++ { ++ va_list ap; ++ ++ va_start (ap, num_waits); ++ goacc_wait (async, num_waits, &ap); ++ va_end (ap); ++ } ++ else if (async == acc_async_sync) ++ acc_wait_all (); ++ else if (async == acc_async_noval) ++ goacc_thread ()->dev->openacc.async_wait_all_async_func (acc_async_noval); ++} ++ ++int ++GOACC_get_num_threads (void) ++{ ++ return 1; ++} ++ ++int ++GOACC_get_thread_num (void) ++{ ++ return 0; ++} ++ ++void ++GOACC_declare (int device, size_t mapnum, ++ void **hostaddrs, size_t *sizes, unsigned short *kinds) ++{ ++ int i; ++ ++ for (i = 0; i < mapnum; i++) ++ { ++ unsigned char kind = kinds[i] & 0xff; ++ ++ if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET) ++ continue; ++ ++ switch (kind) ++ { ++ case GOMP_MAP_FORCE_ALLOC: ++ case GOMP_MAP_FORCE_FROM: ++ case GOMP_MAP_FORCE_TO: ++ case GOMP_MAP_POINTER: ++ case GOMP_MAP_DELETE: ++ GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i], ++ &kinds[i], 0, 0); ++ break; ++ ++ case GOMP_MAP_FORCE_DEVICEPTR: ++ break; ++ ++ case GOMP_MAP_ALLOC: ++ if (!acc_is_present (hostaddrs[i], sizes[i])) ++ GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i], ++ &kinds[i], 0, 0); ++ break; ++ ++ case GOMP_MAP_TO: ++ GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i], ++ &kinds[i], 0, 0); ++ ++ break; ++ ++ case GOMP_MAP_FROM: ++ kinds[i] = GOMP_MAP_FORCE_FROM; ++ GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i], ++ &kinds[i], 0, 0); ++ break; ++ ++ case GOMP_MAP_FORCE_PRESENT: ++ if (!acc_is_present (hostaddrs[i], sizes[i])) ++ gomp_fatal ("[%p,%ld] is not mapped", hostaddrs[i], ++ (unsigned long) sizes[i]); ++ break; ++ ++ default: ++ assert (0); ++ break; ++ } ++ } ++} +--- libgomp/oacc-cuda.c.jj 2016-07-13 16:57:04.432535397 +0200 ++++ libgomp/oacc-cuda.c 2016-07-13 16:57:04.432535397 +0200 +@@ -0,0 +1,86 @@ ++/* OpenACC Runtime Library: CUDA support glue. ++ ++ Copyright (C) 2014-2016 Free Software Foundation, Inc. ++ ++ Contributed by Mentor Embedded. ++ ++ This file is part of the GNU Offloading and Multi Processing Library ++ (libgomp). ++ ++ Libgomp is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY ++ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS ++ FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ . */ ++ ++#include "openacc.h" ++#include "config.h" ++#include "libgomp.h" ++#include "oacc-int.h" ++ ++void * ++acc_get_current_cuda_device (void) ++{ ++ struct goacc_thread *thr = goacc_thread (); ++ ++ if (thr && thr->dev && thr->dev->openacc.cuda.get_current_device_func) ++ return thr->dev->openacc.cuda.get_current_device_func (); ++ ++ return NULL; ++} ++ ++void * ++acc_get_current_cuda_context (void) ++{ ++ struct goacc_thread *thr = goacc_thread (); ++ ++ if (thr && thr->dev && thr->dev->openacc.cuda.get_current_context_func) ++ return thr->dev->openacc.cuda.get_current_context_func (); ++ ++ return NULL; ++} ++ ++void * ++acc_get_cuda_stream (int async) ++{ ++ struct goacc_thread *thr = goacc_thread (); ++ ++ if (async < 0) ++ return NULL; ++ ++ if (thr && thr->dev && thr->dev->openacc.cuda.get_stream_func) ++ return thr->dev->openacc.cuda.get_stream_func (async); ++ ++ return NULL; ++} ++ ++int ++acc_set_cuda_stream (int async, void *stream) ++{ ++ struct goacc_thread *thr; ++ ++ if (async < 0 || stream == NULL) ++ return 0; ++ ++ goacc_lazy_initialize (); ++ ++ thr = goacc_thread (); ++ ++ if (thr && thr->dev && thr->dev->openacc.cuda.set_stream_func) ++ return thr->dev->openacc.cuda.set_stream_func (async, stream); ++ ++ return -1; ++} +--- libgomp/openacc_lib.h.jj 2016-07-13 16:57:13.486423134 +0200 ++++ libgomp/openacc_lib.h 2016-07-13 16:57:13.486423134 +0200 +@@ -0,0 +1,382 @@ ++! OpenACC Runtime Library Definitions. -*- mode: fortran -*- ++ ++! Copyright (C) 2014-2016 Free Software Foundation, Inc. ++ ++! Contributed by Tobias Burnus ++! and Mentor Embedded. ++ ++! This file is part of the GNU Offloading and Multi Processing Library ++! (libgomp). ++ ++! Libgomp is free software; you can redistribute it and/or modify it ++! under the terms of the GNU General Public License as published by ++! the Free Software Foundation; either version 3, or (at your option) ++! any later version. ++ ++! Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY ++! WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS ++! FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++! more details. ++ ++! Under Section 7 of GPL version 3, you are granted additional ++! permissions described in the GCC Runtime Library Exception, version ++! 3.1, as published by the Free Software Foundation. ++ ++! You should have received a copy of the GNU General Public License and ++! a copy of the GCC Runtime Library Exception along with this program; ++! see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++! . ++ ++! NOTE: Due to the use of dimension (..), the code only works when compiled ++! with -std=f2008ts/gnu/legacy but not with other standard settings. ++! Alternatively, the user can use the module version, which permits ++! compilation with -std=f95. ++ ++ integer, parameter :: acc_device_kind = 4 ++ ++! Keep in sync with include/gomp-constants.h. ++ integer (acc_device_kind), parameter :: acc_device_none = 0 ++ integer (acc_device_kind), parameter :: acc_device_default = 1 ++ integer (acc_device_kind), parameter :: acc_device_host = 2 ++! integer (acc_device_kind), parameter :: acc_device_host_nonshm = 3 ++! removed. ++ integer (acc_device_kind), parameter :: acc_device_not_host = 4 ++ integer (acc_device_kind), parameter :: acc_device_nvidia = 5 ++ ++ integer, parameter :: acc_handle_kind = 4 ++ ++! Keep in sync with include/gomp-constants.h. ++ integer (acc_handle_kind), parameter :: acc_async_noval = -1 ++ integer (acc_handle_kind), parameter :: acc_async_sync = -2 ++ ++ integer, parameter :: openacc_version = 201306 ++ ++ interface acc_get_num_devices ++ function acc_get_num_devices_h (d) ++ import acc_device_kind ++ integer acc_get_num_devices_h ++ integer (acc_device_kind) d ++ end function ++ end interface ++ ++ interface acc_set_device_type ++ subroutine acc_set_device_type_h (d) ++ import acc_device_kind ++ integer (acc_device_kind) d ++ end subroutine ++ end interface ++ ++ interface acc_get_device_type ++ function acc_get_device_type_h () ++ import acc_device_kind ++ integer (acc_device_kind) acc_get_device_type_h ++ end function ++ end interface ++ ++ interface acc_set_device_num ++ subroutine acc_set_device_num_h (n, d) ++ import acc_device_kind ++ integer n ++ integer (acc_device_kind) d ++ end subroutine ++ end interface ++ ++ interface acc_get_device_num ++ function acc_get_device_num_h (d) ++ import acc_device_kind ++ integer acc_get_device_num_h ++ integer (acc_device_kind) d ++ end function ++ end interface ++ ++ interface acc_async_test ++ function acc_async_test_h (a) ++ logical acc_async_test_h ++ integer a ++ end function ++ end interface ++ ++ interface acc_async_test_all ++ function acc_async_test_all_h () ++ logical acc_async_test_all_h ++ end function ++ end interface ++ ++ interface acc_wait ++ subroutine acc_wait_h (a) ++ integer a ++ end subroutine ++ end interface ++ ++ interface acc_wait_async ++ subroutine acc_wait_async_h (a1, a2) ++ integer a1, a2 ++ end subroutine ++ end interface ++ ++ interface acc_wait_all ++ subroutine acc_wait_all_h () ++ end subroutine ++ end interface ++ ++ interface acc_wait_all_async ++ subroutine acc_wait_all_async_h (a) ++ integer a ++ end subroutine ++ end interface ++ ++ interface acc_init ++ subroutine acc_init_h (devicetype) ++ import acc_device_kind ++ integer (acc_device_kind) devicetype ++ end subroutine ++ end interface ++ ++ interface acc_shutdown ++ subroutine acc_shutdown_h (devicetype) ++ import acc_device_kind ++ integer (acc_device_kind) devicetype ++ end subroutine ++ end interface ++ ++ interface acc_on_device ++ function acc_on_device_h (devicetype) ++ import acc_device_kind ++ logical acc_on_device_h ++ integer (acc_device_kind) devicetype ++ end function ++ end interface ++ ++ ! acc_malloc: Only available in C/C++ ++ ! acc_free: Only available in C/C++ ++ ++ interface acc_copyin ++ subroutine acc_copyin_32_h (a, len) ++ use iso_c_binding, only: c_int32_t ++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ end subroutine ++ ++ subroutine acc_copyin_64_h (a, len) ++ use iso_c_binding, only: c_int64_t ++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ end subroutine ++ ++ subroutine acc_copyin_array_h (a) ++ type (*), dimension (..), contiguous :: a ++ end subroutine ++ end interface ++ ++ interface acc_present_or_copyin ++ subroutine acc_present_or_copyin_32_h (a, len) ++ use iso_c_binding, only: c_int32_t ++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ end subroutine ++ ++ subroutine acc_present_or_copyin_64_h (a, len) ++ use iso_c_binding, only: c_int64_t ++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ end subroutine ++ ++ subroutine acc_present_or_copyin_array_h (a) ++ type (*), dimension (..), contiguous :: a ++ end subroutine ++ end interface ++ ++ interface acc_pcopyin ++ subroutine acc_pcopyin_32_h (a, len) ++ use iso_c_binding, only: c_int32_t ++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ end subroutine ++ ++ subroutine acc_pcopyin_64_h (a, len) ++ use iso_c_binding, only: c_int64_t ++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ end subroutine ++ ++ subroutine acc_pcopyin_array_h (a) ++ type (*), dimension (..), contiguous :: a ++ end subroutine ++ end interface ++ ++ interface acc_create ++ subroutine acc_create_32_h (a, len) ++ use iso_c_binding, only: c_int32_t ++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ end subroutine ++ ++ subroutine acc_create_64_h (a, len) ++ use iso_c_binding, only: c_int64_t ++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ end subroutine ++ ++ subroutine acc_create_array_h (a) ++ type (*), dimension (..), contiguous :: a ++ end subroutine ++ end interface ++ ++ interface acc_present_or_create ++ subroutine acc_present_or_create_32_h (a, len) ++ use iso_c_binding, only: c_int32_t ++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ end subroutine ++ ++ subroutine acc_present_or_create_64_h (a, len) ++ use iso_c_binding, only: c_int64_t ++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ end subroutine ++ ++ subroutine acc_present_or_create_array_h (a) ++ type (*), dimension (..), contiguous :: a ++ end subroutine ++ end interface ++ ++ interface acc_pcreate ++ subroutine acc_pcreate_32_h (a, len) ++ use iso_c_binding, only: c_int32_t ++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ end subroutine ++ ++ subroutine acc_pcreate_64_h (a, len) ++ use iso_c_binding, only: c_int64_t ++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ end subroutine ++ ++ subroutine acc_pcreate_array_h (a) ++ type (*), dimension (..), contiguous :: a ++ end subroutine ++ end interface ++ ++ interface acc_copyout ++ subroutine acc_copyout_32_h (a, len) ++ use iso_c_binding, only: c_int32_t ++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ end subroutine ++ ++ subroutine acc_copyout_64_h (a, len) ++ use iso_c_binding, only: c_int64_t ++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ end subroutine ++ ++ subroutine acc_copyout_array_h (a) ++ type (*), dimension (..), contiguous :: a ++ end subroutine ++ end interface ++ ++ interface acc_delete ++ subroutine acc_delete_32_h (a, len) ++ use iso_c_binding, only: c_int32_t ++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ end subroutine ++ ++ subroutine acc_delete_64_h (a, len) ++ use iso_c_binding, only: c_int64_t ++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ end subroutine ++ ++ subroutine acc_delete_array_h (a) ++ type (*), dimension (..), contiguous :: a ++ end subroutine ++ end interface ++ ++ interface acc_update_device ++ subroutine acc_update_device_32_h (a, len) ++ use iso_c_binding, only: c_int32_t ++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ end subroutine ++ ++ subroutine acc_update_device_64_h (a, len) ++ use iso_c_binding, only: c_int64_t ++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ end subroutine ++ ++ subroutine acc_update_device_array_h (a) ++ type (*), dimension (..), contiguous :: a ++ end subroutine ++ end interface ++ ++ interface acc_update_self ++ subroutine acc_update_self_32_h (a, len) ++ use iso_c_binding, only: c_int32_t ++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ end subroutine ++ ++ subroutine acc_update_self_64_h (a, len) ++ use iso_c_binding, only: c_int64_t ++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ end subroutine ++ ++ subroutine acc_update_self_array_h (a) ++ type (*), dimension (..), contiguous :: a ++ end subroutine ++ end interface ++ ++ ! acc_map_data: Only available in C/C++ ++ ! acc_unmap_data: Only available in C/C++ ++ ! acc_deviceptr: Only available in C/C++ ++ ! acc_ostptr: Only available in C/C++ ++ ++ interface acc_is_present ++ function acc_is_present_32_h (a, len) ++ use iso_c_binding, only: c_int32_t ++ logical acc_is_present_32_h ++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ end function ++ ++ function acc_is_present_64_h (a, len) ++ use iso_c_binding, only: c_int64_t ++ logical acc_is_present_64_h ++ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ end function ++ ++ function acc_is_present_array_h (a) ++ logical acc_is_present_array_h ++ type (*), dimension (..), contiguous :: a ++ end function ++ end interface ++ ++ ! acc_memcpy_to_device: Only available in C/C++ ++ ! acc_memcpy_from_device: Only available in C/C++ +--- libgomp/gomp-constants.h.jj 2016-07-14 16:02:47.212545826 +0200 ++++ libgomp/gomp-constants.h 2016-05-26 21:04:40.000000000 +0200 +@@ -0,0 +1,259 @@ ++/* Communication between GCC and libgomp. ++ ++ Copyright (C) 2014-2015 Free Software Foundation, Inc. ++ ++ Contributed by Mentor Embedded. ++ ++ This file is part of the GNU Offloading and Multi Processing Library ++ (libgomp). ++ ++ Libgomp is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY ++ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS ++ FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ . */ ++ ++#ifndef GOMP_CONSTANTS_H ++#define GOMP_CONSTANTS_H 1 ++ ++/* Memory mapping types. */ ++ ++/* One byte. */ ++#define GOMP_MAP_LAST (1 << 8) ++ ++#define GOMP_MAP_FLAG_TO (1 << 0) ++#define GOMP_MAP_FLAG_FROM (1 << 1) ++/* Special map kinds, enumerated starting here. */ ++#define GOMP_MAP_FLAG_SPECIAL_0 (1 << 2) ++#define GOMP_MAP_FLAG_SPECIAL_1 (1 << 3) ++#define GOMP_MAP_FLAG_SPECIAL_2 (1 << 4) ++#define GOMP_MAP_FLAG_SPECIAL (GOMP_MAP_FLAG_SPECIAL_1 \ ++ | GOMP_MAP_FLAG_SPECIAL_0) ++/* Flag to force a specific behavior (or else, trigger a run-time error). */ ++#define GOMP_MAP_FLAG_FORCE (1 << 7) ++ ++enum gomp_map_kind ++ { ++ /* If not already present, allocate. */ ++ GOMP_MAP_ALLOC = 0, ++ /* ..., and copy to device. */ ++ GOMP_MAP_TO = (GOMP_MAP_ALLOC | GOMP_MAP_FLAG_TO), ++ /* ..., and copy from device. */ ++ GOMP_MAP_FROM = (GOMP_MAP_ALLOC | GOMP_MAP_FLAG_FROM), ++ /* ..., and copy to and from device. */ ++ GOMP_MAP_TOFROM = (GOMP_MAP_TO | GOMP_MAP_FROM), ++ /* The following kind is an internal only map kind, used for pointer based ++ array sections. OMP_CLAUSE_SIZE for these is not the pointer size, ++ which is implicitly POINTER_SIZE_UNITS, but the bias. */ ++ GOMP_MAP_POINTER = (GOMP_MAP_FLAG_SPECIAL_0 | 0), ++ /* Also internal, behaves like GOMP_MAP_TO, but additionally any ++ GOMP_MAP_POINTER records consecutive after it which have addresses ++ falling into that range will not be ignored if GOMP_MAP_TO_PSET wasn't ++ mapped already. */ ++ GOMP_MAP_TO_PSET = (GOMP_MAP_FLAG_SPECIAL_0 | 1), ++ /* Must already be present. */ ++ GOMP_MAP_FORCE_PRESENT = (GOMP_MAP_FLAG_SPECIAL_0 | 2), ++ /* Deallocate a mapping, without copying from device. */ ++ GOMP_MAP_DELETE = (GOMP_MAP_FLAG_SPECIAL_0 | 3), ++ /* Is a device pointer. OMP_CLAUSE_SIZE for these is unused; is implicitly ++ POINTER_SIZE_UNITS. */ ++ GOMP_MAP_FORCE_DEVICEPTR = (GOMP_MAP_FLAG_SPECIAL_1 | 0), ++ /* Do not map, copy bits for firstprivate instead. */ ++ /* OpenACC device_resident. */ ++ GOMP_MAP_DEVICE_RESIDENT = (GOMP_MAP_FLAG_SPECIAL_1 | 1), ++ /* OpenACC link. */ ++ GOMP_MAP_LINK = (GOMP_MAP_FLAG_SPECIAL_1 | 2), ++ /* Allocate. */ ++ GOMP_MAP_FIRSTPRIVATE = (GOMP_MAP_FLAG_SPECIAL | 0), ++ /* Similarly, but store the value in the pointer rather than ++ pointed by the pointer. */ ++ GOMP_MAP_FIRSTPRIVATE_INT = (GOMP_MAP_FLAG_SPECIAL | 1), ++ /* Pointer translate host address into device address and copy that ++ back to host. */ ++ GOMP_MAP_USE_DEVICE_PTR = (GOMP_MAP_FLAG_SPECIAL | 2), ++ /* Allocate a zero length array section. Prefer next non-zero length ++ mapping over previous non-zero length mapping over zero length mapping ++ at the address. If not already mapped, do nothing (and pointer translate ++ to NULL). */ ++ GOMP_MAP_ZERO_LEN_ARRAY_SECTION = (GOMP_MAP_FLAG_SPECIAL | 3), ++ /* Allocate. */ ++ GOMP_MAP_FORCE_ALLOC = (GOMP_MAP_FLAG_FORCE | GOMP_MAP_ALLOC), ++ /* ..., and copy to device. */ ++ GOMP_MAP_FORCE_TO = (GOMP_MAP_FLAG_FORCE | GOMP_MAP_TO), ++ /* ..., and copy from device. */ ++ GOMP_MAP_FORCE_FROM = (GOMP_MAP_FLAG_FORCE | GOMP_MAP_FROM), ++ /* ..., and copy to and from device. */ ++ GOMP_MAP_FORCE_TOFROM = (GOMP_MAP_FLAG_FORCE | GOMP_MAP_TOFROM), ++ /* If not already present, allocate. And unconditionally copy to ++ device. */ ++ GOMP_MAP_ALWAYS_TO = (GOMP_MAP_FLAG_SPECIAL_2 | GOMP_MAP_TO), ++ /* If not already present, allocate. And unconditionally copy from ++ device. */ ++ GOMP_MAP_ALWAYS_FROM = (GOMP_MAP_FLAG_SPECIAL_2 ++ | GOMP_MAP_FROM), ++ /* If not already present, allocate. And unconditionally copy to and from ++ device. */ ++ GOMP_MAP_ALWAYS_TOFROM = (GOMP_MAP_FLAG_SPECIAL_2 ++ | GOMP_MAP_TOFROM), ++ /* Map a sparse struct; the address is the base of the structure, alignment ++ it's required alignment, and size is the number of adjacent entries ++ that belong to the struct. The adjacent entries should be sorted by ++ increasing address, so it is easy to determine lowest needed address ++ (address of the first adjacent entry) and highest needed address ++ (address of the last adjacent entry plus its size). */ ++ GOMP_MAP_STRUCT = (GOMP_MAP_FLAG_SPECIAL_2 ++ | GOMP_MAP_FLAG_SPECIAL | 0), ++ /* On a location of a pointer/reference that is assumed to be already mapped ++ earlier, store the translated address of the preceeding mapping. ++ No refcount is bumped by this, and the store is done unconditionally. */ ++ GOMP_MAP_ALWAYS_POINTER = (GOMP_MAP_FLAG_SPECIAL_2 ++ | GOMP_MAP_FLAG_SPECIAL | 1), ++ /* Forced deallocation of zero length array section. */ ++ GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION ++ = (GOMP_MAP_FLAG_SPECIAL_2 ++ | GOMP_MAP_FLAG_SPECIAL | 3), ++ /* Decrement usage count and deallocate if zero. */ ++ GOMP_MAP_RELEASE = (GOMP_MAP_FLAG_SPECIAL_2 ++ | GOMP_MAP_DELETE), ++ ++ /* Internal to GCC, not used in libgomp. */ ++ /* Do not map, but pointer assign a pointer instead. */ ++ GOMP_MAP_FIRSTPRIVATE_POINTER = (GOMP_MAP_LAST | 1), ++ /* Do not map, but pointer assign a reference instead. */ ++ GOMP_MAP_FIRSTPRIVATE_REFERENCE = (GOMP_MAP_LAST | 2) ++ }; ++ ++#define GOMP_MAP_COPY_TO_P(X) \ ++ (!((X) & GOMP_MAP_FLAG_SPECIAL) \ ++ && ((X) & GOMP_MAP_FLAG_TO)) ++ ++#define GOMP_MAP_COPY_FROM_P(X) \ ++ (!((X) & GOMP_MAP_FLAG_SPECIAL) \ ++ && ((X) & GOMP_MAP_FLAG_FROM)) ++ ++#define GOMP_MAP_POINTER_P(X) \ ++ ((X) == GOMP_MAP_POINTER) ++ ++#define GOMP_MAP_ALWAYS_TO_P(X) \ ++ (((X) == GOMP_MAP_ALWAYS_TO) || ((X) == GOMP_MAP_ALWAYS_TOFROM)) ++ ++#define GOMP_MAP_ALWAYS_FROM_P(X) \ ++ (((X) == GOMP_MAP_ALWAYS_FROM) || ((X) == GOMP_MAP_ALWAYS_TOFROM)) ++ ++#define GOMP_MAP_ALWAYS_P(X) \ ++ (GOMP_MAP_ALWAYS_TO_P (X) || ((X) == GOMP_MAP_ALWAYS_FROM)) ++ ++ ++/* Asynchronous behavior. Keep in sync with ++ libgomp/{openacc.h,openacc.f90,openacc_lib.h}:acc_async_t. */ ++ ++#define GOMP_ASYNC_NOVAL -1 ++#define GOMP_ASYNC_SYNC -2 ++ ++ ++/* Device codes. Keep in sync with ++ libgomp/{openacc.h,openacc.f90,openacc_lib.h}:acc_device_t as well as ++ libgomp/libgomp-plugin.h. */ ++#define GOMP_DEVICE_NONE 0 ++#define GOMP_DEVICE_DEFAULT 1 ++#define GOMP_DEVICE_HOST 2 ++/* #define GOMP_DEVICE_HOST_NONSHM 3 removed. */ ++#define GOMP_DEVICE_NOT_HOST 4 ++#define GOMP_DEVICE_NVIDIA_PTX 5 ++#define GOMP_DEVICE_INTEL_MIC 6 ++#define GOMP_DEVICE_HSA 7 ++ ++#define GOMP_DEVICE_ICV -1 ++#define GOMP_DEVICE_HOST_FALLBACK -2 ++ ++/* GOMP_task/GOMP_taskloop* flags argument. */ ++#define GOMP_TASK_FLAG_UNTIED (1 << 0) ++#define GOMP_TASK_FLAG_FINAL (1 << 1) ++#define GOMP_TASK_FLAG_MERGEABLE (1 << 2) ++#define GOMP_TASK_FLAG_DEPEND (1 << 3) ++#define GOMP_TASK_FLAG_PRIORITY (1 << 4) ++#define GOMP_TASK_FLAG_UP (1 << 8) ++#define GOMP_TASK_FLAG_GRAINSIZE (1 << 9) ++#define GOMP_TASK_FLAG_IF (1 << 10) ++#define GOMP_TASK_FLAG_NOGROUP (1 << 11) ++ ++/* GOMP_target{_ext,update_ext,enter_exit_data} flags argument. */ ++#define GOMP_TARGET_FLAG_NOWAIT (1 << 0) ++#define GOMP_TARGET_FLAG_EXIT_DATA (1 << 1) ++/* Internal to libgomp. */ ++#define GOMP_TARGET_FLAG_UPDATE (1U << 31) ++ ++/* Versions of libgomp and device-specific plugins. GOMP_VERSION ++ should be incremented whenever an ABI-incompatible change is introduced ++ to the plugin interface defined in libgomp/libgomp.h. */ ++#define GOMP_VERSION 1 ++#define GOMP_VERSION_NVIDIA_PTX 1 ++#define GOMP_VERSION_INTEL_MIC 0 ++#define GOMP_VERSION_HSA 0 ++ ++#define GOMP_VERSION_PACK(LIB, DEV) (((LIB) << 16) | (DEV)) ++#define GOMP_VERSION_LIB(PACK) (((PACK) >> 16) & 0xffff) ++#define GOMP_VERSION_DEV(PACK) ((PACK) & 0xffff) ++ ++#define GOMP_DIM_GANG 0 ++#define GOMP_DIM_WORKER 1 ++#define GOMP_DIM_VECTOR 2 ++#define GOMP_DIM_MAX 3 ++#define GOMP_DIM_MASK(X) (1u << (X)) ++ ++/* Varadic launch arguments. End of list is marked by a zero. */ ++#define GOMP_LAUNCH_DIM 1 /* Launch dimensions, op = mask */ ++#define GOMP_LAUNCH_ASYNC 2 /* Async, op = cst val if not MAX */ ++#define GOMP_LAUNCH_WAIT 3 /* Waits, op = num waits. */ ++#define GOMP_LAUNCH_CODE_SHIFT 28 ++#define GOMP_LAUNCH_DEVICE_SHIFT 16 ++#define GOMP_LAUNCH_OP_SHIFT 0 ++#define GOMP_LAUNCH_PACK(CODE,DEVICE,OP) \ ++ (((CODE) << GOMP_LAUNCH_CODE_SHIFT) \ ++ | ((DEVICE) << GOMP_LAUNCH_DEVICE_SHIFT) \ ++ | ((OP) << GOMP_LAUNCH_OP_SHIFT)) ++#define GOMP_LAUNCH_CODE(X) (((X) >> GOMP_LAUNCH_CODE_SHIFT) & 0xf) ++#define GOMP_LAUNCH_DEVICE(X) (((X) >> GOMP_LAUNCH_DEVICE_SHIFT) & 0xfff) ++#define GOMP_LAUNCH_OP(X) (((X) >> GOMP_LAUNCH_OP_SHIFT) & 0xffff) ++#define GOMP_LAUNCH_OP_MAX 0xffff ++ ++/* Bitmask to apply in order to find out the intended device of a target ++ argument. */ ++#define GOMP_TARGET_ARG_DEVICE_MASK ((1 << 7) - 1) ++/* The target argument is significant for all devices. */ ++#define GOMP_TARGET_ARG_DEVICE_ALL 0 ++ ++/* Flag set when the subsequent element in the device-specific argument ++ values. */ ++#define GOMP_TARGET_ARG_SUBSEQUENT_PARAM (1 << 7) ++ ++/* Bitmask to apply to a target argument to find out the value identifier. */ ++#define GOMP_TARGET_ARG_ID_MASK (((1 << 8) - 1) << 8) ++/* Target argument index of NUM_TEAMS. */ ++#define GOMP_TARGET_ARG_NUM_TEAMS (1 << 8) ++/* Target argument index of THREAD_LIMIT. */ ++#define GOMP_TARGET_ARG_THREAD_LIMIT (2 << 8) ++ ++/* If the value is directly embeded in target argument, it should be a 16-bit ++ at most and shifted by this many bits. */ ++#define GOMP_TARGET_ARG_VALUE_SHIFT 16 ++ ++/* HSA specific data structures. */ ++ ++/* Identifiers of device-specific target arguments. */ ++#define GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES (1 << 8) ++ ++#endif +--- libgomp/oacc-mem.c.jj 2016-07-13 16:57:04.433535385 +0200 ++++ libgomp/oacc-mem.c 2016-07-14 15:39:44.644631308 +0200 +@@ -0,0 +1,204 @@ ++/* OpenACC Runtime initialization routines ++ ++ Copyright (C) 2013-2016 Free Software Foundation, Inc. ++ ++ Contributed by Mentor Embedded. ++ ++ This file is part of the GNU Offloading and Multi Processing Library ++ (libgomp). ++ ++ Libgomp is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY ++ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS ++ FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ . */ ++ ++#include "openacc.h" ++#include "config.h" ++#include "libgomp.h" ++#include "gomp-constants.h" ++#include "oacc-int.h" ++#include ++#include ++#include ++ ++/* OpenACC is silent on how memory exhaustion is indicated. We return ++ NULL. */ ++ ++void * ++acc_malloc (size_t s) ++{ ++ if (!s) ++ return NULL; ++ ++ goacc_lazy_initialize (); ++ return malloc (s); ++} ++ ++/* OpenACC 2.0a (3.2.16) doesn't specify what to do in the event ++ the device address is mapped. We choose to check if it mapped, ++ and if it is, to unmap it. */ ++void ++acc_free (void *d) ++{ ++ return free (d); ++} ++ ++void ++acc_memcpy_to_device (void *d, void *h, size_t s) ++{ ++ memmove (d, h, s); ++} ++ ++void ++acc_memcpy_from_device (void *h, void *d, size_t s) ++{ ++ memmove (h, d, s); ++} ++ ++/* Return the device pointer that corresponds to host data H. Or NULL ++ if no mapping. */ ++ ++void * ++acc_deviceptr (void *h) ++{ ++ goacc_lazy_initialize (); ++ return h; ++} ++ ++/* Return the host pointer that corresponds to device data D. Or NULL ++ if no mapping. */ ++ ++void * ++acc_hostptr (void *d) ++{ ++ goacc_lazy_initialize (); ++ return d; ++} ++ ++/* Return 1 if host data [H,+S] is present on the device. */ ++ ++int ++acc_is_present (void *h, size_t s) ++{ ++ if (!s || !h) ++ return 0; ++ ++ goacc_lazy_initialize (); ++ return h != NULL; ++} ++ ++/* Create a mapping for host [H,+S] -> device [D,+S] */ ++ ++void ++acc_map_data (void *h, void *d, size_t s) ++{ ++ goacc_lazy_initialize (); ++ ++ if (d != h) ++ gomp_fatal ("cannot map data on shared-memory system"); ++} ++ ++void ++acc_unmap_data (void *h) ++{ ++} ++ ++#define FLAG_PRESENT (1 << 0) ++#define FLAG_CREATE (1 << 1) ++#define FLAG_COPY (1 << 2) ++ ++static void * ++present_create_copy (unsigned f, void *h, size_t s) ++{ ++ if (!h || !s) ++ gomp_fatal ("[%p,+%d] is a bad range", (void *)h, (int)s); ++ ++ goacc_lazy_initialize (); ++ return h; ++} ++ ++void * ++acc_create (void *h, size_t s) ++{ ++ return present_create_copy (FLAG_CREATE, h, s); ++} ++ ++void * ++acc_copyin (void *h, size_t s) ++{ ++ return present_create_copy (FLAG_CREATE | FLAG_COPY, h, s); ++} ++ ++void * ++acc_present_or_create (void *h, size_t s) ++{ ++ return present_create_copy (FLAG_PRESENT | FLAG_CREATE, h, s); ++} ++ ++void * ++acc_present_or_copyin (void *h, size_t s) ++{ ++ return present_create_copy (FLAG_PRESENT | FLAG_CREATE | FLAG_COPY, h, s); ++} ++ ++#define FLAG_COPYOUT (1 << 0) ++ ++static void ++delete_copyout (unsigned f, void *h, size_t s, const char *libfnname) ++{ ++} ++ ++void ++acc_delete (void *h , size_t s) ++{ ++ delete_copyout (0, h, s, __FUNCTION__); ++} ++ ++void ++acc_copyout (void *h, size_t s) ++{ ++ delete_copyout (FLAG_COPYOUT, h, s, __FUNCTION__); ++} ++ ++static void ++update_dev_host (int is_dev, void *h, size_t s) ++{ ++ goacc_lazy_initialize (); ++} ++ ++void ++acc_update_device (void *h, size_t s) ++{ ++ update_dev_host (1, h, s); ++} ++ ++void ++acc_update_self (void *h, size_t s) ++{ ++ update_dev_host (0, h, s); ++} ++ ++void ++gomp_acc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes, ++ void *kinds) ++{ ++} ++ ++void ++gomp_acc_remove_pointer (void *h, bool force_copyfrom, int async, int mapnum) ++{ ++} +--- libgomp/oacc-plugin.h.jj 2016-07-13 16:57:13.487423121 +0200 ++++ libgomp/oacc-plugin.h 2016-07-13 16:57:13.487423121 +0200 +@@ -0,0 +1,33 @@ ++/* Copyright (C) 2014-2016 Free Software Foundation, Inc. ++ ++ Contributed by Mentor Embedded. ++ ++ This file is part of the GNU Offloading and Multi Processing Library ++ (libgomp). ++ ++ Libgomp is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY ++ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS ++ FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ . */ ++ ++#ifndef OACC_PLUGIN_H ++#define OACC_PLUGIN_H 1 ++ ++extern void GOMP_PLUGIN_async_unmap_vars (void *, int); ++extern void *GOMP_PLUGIN_acc_thread (void); ++ ++#endif +--- libgomp/taskloop.c.jj 2016-07-13 16:57:18.935355570 +0200 ++++ libgomp/taskloop.c 2016-07-13 16:57:18.935355570 +0200 +@@ -0,0 +1,340 @@ ++/* Copyright (C) 2015-2016 Free Software Foundation, Inc. ++ Contributed by Jakub Jelinek . ++ ++ This file is part of the GNU Offloading and Multi Processing Library ++ (libgomp). ++ ++ Libgomp is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY ++ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS ++ FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ . */ ++ ++/* This file handles the taskloop construct. It is included twice, once ++ for the long and once for unsigned long long variant. */ ++ ++/* Called when encountering an explicit task directive. If IF_CLAUSE is ++ false, then we must not delay in executing the task. If UNTIED is true, ++ then the task may be executed by any member of the team. */ ++ ++void ++GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), ++ long arg_size, long arg_align, unsigned flags, ++ unsigned long num_tasks, int priority, ++ TYPE start, TYPE end, TYPE step) ++{ ++ struct gomp_thread *thr = gomp_thread (); ++ struct gomp_team *team = thr->ts.team; ++ ++#ifdef HAVE_BROKEN_POSIX_SEMAPHORES ++ /* If pthread_mutex_* is used for omp_*lock*, then each task must be ++ tied to one thread all the time. This means UNTIED tasks must be ++ tied and if CPYFN is non-NULL IF(0) must be forced, as CPYFN ++ might be running on different thread than FN. */ ++ if (cpyfn) ++ flags &= ~GOMP_TASK_FLAG_IF; ++ flags &= ~GOMP_TASK_FLAG_UNTIED; ++#endif ++ ++ /* If parallel or taskgroup has been cancelled, don't start new tasks. */ ++ if (team && gomp_team_barrier_cancelled (&team->barrier)) ++ return; ++ ++#ifdef TYPE_is_long ++ TYPE s = step; ++ if (step > 0) ++ { ++ if (start >= end) ++ return; ++ s--; ++ } ++ else ++ { ++ if (start <= end) ++ return; ++ s++; ++ } ++ UTYPE n = (end - start + s) / step; ++#else ++ UTYPE n; ++ if (flags & GOMP_TASK_FLAG_UP) ++ { ++ if (start >= end) ++ return; ++ n = (end - start + step - 1) / step; ++ } ++ else ++ { ++ if (start <= end) ++ return; ++ n = (start - end - step - 1) / -step; ++ } ++#endif ++ ++ TYPE task_step = step; ++ unsigned long nfirst = n; ++ if (flags & GOMP_TASK_FLAG_GRAINSIZE) ++ { ++ unsigned long grainsize = num_tasks; ++#ifdef TYPE_is_long ++ num_tasks = n / grainsize; ++#else ++ UTYPE ndiv = n / grainsize; ++ num_tasks = ndiv; ++ if (num_tasks != ndiv) ++ num_tasks = ~0UL; ++#endif ++ if (num_tasks <= 1) ++ { ++ num_tasks = 1; ++ task_step = end - start; ++ } ++ else if (num_tasks >= grainsize ++#ifndef TYPE_is_long ++ && num_tasks != ~0UL ++#endif ++ ) ++ { ++ UTYPE mul = num_tasks * grainsize; ++ task_step = (TYPE) grainsize * step; ++ if (mul != n) ++ { ++ task_step += step; ++ nfirst = n - mul - 1; ++ } ++ } ++ else ++ { ++ UTYPE div = n / num_tasks; ++ UTYPE mod = n % num_tasks; ++ task_step = (TYPE) div * step; ++ if (mod) ++ { ++ task_step += step; ++ nfirst = mod - 1; ++ } ++ } ++ } ++ else ++ { ++ if (num_tasks == 0) ++ num_tasks = team ? team->nthreads : 1; ++ if (num_tasks >= n) ++ num_tasks = n; ++ else ++ { ++ UTYPE div = n / num_tasks; ++ UTYPE mod = n % num_tasks; ++ task_step = (TYPE) div * step; ++ if (mod) ++ { ++ task_step += step; ++ nfirst = mod - 1; ++ } ++ } ++ } ++ ++ if (flags & GOMP_TASK_FLAG_NOGROUP) ++ { ++ if (thr->task && thr->task->taskgroup && thr->task->taskgroup->cancelled) ++ return; ++ } ++ else ++ ialias_call (GOMP_taskgroup_start) (); ++ ++ if (priority > gomp_max_task_priority_var) ++ priority = gomp_max_task_priority_var; ++ ++ if ((flags & GOMP_TASK_FLAG_IF) == 0 || team == NULL ++ || (thr->task && thr->task->final_task) ++ || team->task_count + num_tasks > 64 * team->nthreads) ++ { ++ unsigned long i; ++ if (__builtin_expect (cpyfn != NULL, 0)) ++ { ++ struct gomp_task task[num_tasks]; ++ struct gomp_task *parent = thr->task; ++ arg_size = (arg_size + arg_align - 1) & ~(arg_align - 1); ++ char buf[num_tasks * arg_size + arg_align - 1]; ++ char *arg = (char *) (((uintptr_t) buf + arg_align - 1) ++ & ~(uintptr_t) (arg_align - 1)); ++ char *orig_arg = arg; ++ for (i = 0; i < num_tasks; i++) ++ { ++ gomp_init_task (&task[i], parent, gomp_icv (false)); ++ task[i].priority = priority; ++ task[i].kind = GOMP_TASK_UNDEFERRED; ++ task[i].final_task = (thr->task && thr->task->final_task) ++ || (flags & GOMP_TASK_FLAG_FINAL); ++ if (thr->task) ++ { ++ task[i].in_tied_task = thr->task->in_tied_task; ++ task[i].taskgroup = thr->task->taskgroup; ++ } ++ thr->task = &task[i]; ++ cpyfn (arg, data); ++ arg += arg_size; ++ } ++ arg = orig_arg; ++ for (i = 0; i < num_tasks; i++) ++ { ++ thr->task = &task[i]; ++ ((TYPE *)arg)[0] = start; ++ start += task_step; ++ ((TYPE *)arg)[1] = start; ++ if (i == nfirst) ++ task_step -= step; ++ fn (arg); ++ arg += arg_size; ++ if (!priority_queue_empty_p (&task[i].children_queue, ++ MEMMODEL_RELAXED)) ++ { ++ gomp_mutex_lock (&team->task_lock); ++ gomp_clear_parent (&task[i].children_queue); ++ gomp_mutex_unlock (&team->task_lock); ++ } ++ gomp_end_task (); ++ } ++ } ++ else ++ for (i = 0; i < num_tasks; i++) ++ { ++ struct gomp_task task; ++ ++ gomp_init_task (&task, thr->task, gomp_icv (false)); ++ task.priority = priority; ++ task.kind = GOMP_TASK_UNDEFERRED; ++ task.final_task = (thr->task && thr->task->final_task) ++ || (flags & GOMP_TASK_FLAG_FINAL); ++ if (thr->task) ++ { ++ task.in_tied_task = thr->task->in_tied_task; ++ task.taskgroup = thr->task->taskgroup; ++ } ++ thr->task = &task; ++ ((TYPE *)data)[0] = start; ++ start += task_step; ++ ((TYPE *)data)[1] = start; ++ if (i == nfirst) ++ task_step -= step; ++ fn (data); ++ if (!priority_queue_empty_p (&task.children_queue, ++ MEMMODEL_RELAXED)) ++ { ++ gomp_mutex_lock (&team->task_lock); ++ gomp_clear_parent (&task.children_queue); ++ gomp_mutex_unlock (&team->task_lock); ++ } ++ gomp_end_task (); ++ } ++ } ++ else ++ { ++ struct gomp_task *tasks[num_tasks]; ++ struct gomp_task *parent = thr->task; ++ struct gomp_taskgroup *taskgroup = parent->taskgroup; ++ char *arg; ++ int do_wake; ++ unsigned long i; ++ ++ for (i = 0; i < num_tasks; i++) ++ { ++ struct gomp_task *task ++ = gomp_malloc (sizeof (*task) + arg_size + arg_align - 1); ++ tasks[i] = task; ++ arg = (char *) (((uintptr_t) (task + 1) + arg_align - 1) ++ & ~(uintptr_t) (arg_align - 1)); ++ gomp_init_task (task, parent, gomp_icv (false)); ++ task->priority = priority; ++ task->kind = GOMP_TASK_UNDEFERRED; ++ task->in_tied_task = parent->in_tied_task; ++ task->taskgroup = taskgroup; ++ thr->task = task; ++ if (cpyfn) ++ { ++ cpyfn (arg, data); ++ task->copy_ctors_done = true; ++ } ++ else ++ memcpy (arg, data, arg_size); ++ ((TYPE *)arg)[0] = start; ++ start += task_step; ++ ((TYPE *)arg)[1] = start; ++ if (i == nfirst) ++ task_step -= step; ++ thr->task = parent; ++ task->kind = GOMP_TASK_WAITING; ++ task->fn = fn; ++ task->fn_data = arg; ++ task->final_task = (flags & GOMP_TASK_FLAG_FINAL) >> 1; ++ } ++ gomp_mutex_lock (&team->task_lock); ++ /* If parallel or taskgroup has been cancelled, don't start new ++ tasks. */ ++ if (__builtin_expect ((gomp_team_barrier_cancelled (&team->barrier) ++ || (taskgroup && taskgroup->cancelled)) ++ && cpyfn == NULL, 0)) ++ { ++ gomp_mutex_unlock (&team->task_lock); ++ for (i = 0; i < num_tasks; i++) ++ { ++ gomp_finish_task (tasks[i]); ++ free (tasks[i]); ++ } ++ if ((flags & GOMP_TASK_FLAG_NOGROUP) == 0) ++ ialias_call (GOMP_taskgroup_end) (); ++ return; ++ } ++ if (taskgroup) ++ taskgroup->num_children += num_tasks; ++ for (i = 0; i < num_tasks; i++) ++ { ++ struct gomp_task *task = tasks[i]; ++ priority_queue_insert (PQ_CHILDREN, &parent->children_queue, ++ task, priority, ++ PRIORITY_INSERT_BEGIN, ++ /*last_parent_depends_on=*/false, ++ task->parent_depends_on); ++ if (taskgroup) ++ priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue, ++ task, priority, PRIORITY_INSERT_BEGIN, ++ /*last_parent_depends_on=*/false, ++ task->parent_depends_on); ++ priority_queue_insert (PQ_TEAM, &team->task_queue, task, priority, ++ PRIORITY_INSERT_END, ++ /*last_parent_depends_on=*/false, ++ task->parent_depends_on); ++ ++team->task_count; ++ ++team->task_queued_count; ++ } ++ gomp_team_barrier_set_task_pending (&team->barrier); ++ if (team->task_running_count + !parent->in_tied_task ++ < team->nthreads) ++ { ++ do_wake = team->nthreads - team->task_running_count ++ - !parent->in_tied_task; ++ if ((unsigned long) do_wake > num_tasks) ++ do_wake = num_tasks; ++ } ++ else ++ do_wake = 0; ++ gomp_mutex_unlock (&team->task_lock); ++ if (do_wake) ++ gomp_team_barrier_wake (&team->barrier, do_wake); ++ } ++ if ((flags & GOMP_TASK_FLAG_NOGROUP) == 0) ++ ialias_call (GOMP_taskgroup_end) (); ++} +--- libgomp/priority_queue.h.jj 2016-07-13 16:57:04.438535323 +0200 ++++ libgomp/priority_queue.h 2016-07-13 16:57:04.438535323 +0200 +@@ -0,0 +1,485 @@ ++/* Copyright (C) 2015-2016 Free Software Foundation, Inc. ++ Contributed by Aldy Hernandez . ++ ++ This file is part of the GNU Offloading and Multi Processing Library ++ (libgomp). ++ ++ Libgomp is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY ++ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS ++ FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ . */ ++ ++/* Header file for a priority queue of GOMP tasks. */ ++ ++/* ?? Perhaps all the priority_tree_* functions are complex and rare ++ enough to go out-of-line and be moved to priority_queue.c. ?? */ ++ ++#ifndef _PRIORITY_QUEUE_H_ ++#define _PRIORITY_QUEUE_H_ ++ ++/* One task. */ ++ ++struct priority_node ++{ ++ /* Next and previous chains in a circular doubly linked list for ++ tasks within this task's priority. */ ++ struct priority_node *next, *prev; ++}; ++ ++/* All tasks within the same priority. */ ++ ++struct priority_list ++{ ++ /* Priority of the tasks in this set. */ ++ int priority; ++ ++ /* Tasks. */ ++ struct priority_node *tasks; ++ ++ /* This points to the last of the higher priority WAITING tasks. ++ Remember that for the children queue, we have: ++ ++ parent_depends_on WAITING tasks. ++ !parent_depends_on WAITING tasks. ++ TIED tasks. ++ ++ This is a pointer to the last of the parent_depends_on WAITING ++ tasks which are essentially, higher priority items within their ++ priority. */ ++ struct priority_node *last_parent_depends_on; ++}; ++ ++/* Another splay tree instantiation, for priority_list's. */ ++typedef struct prio_splay_tree_node_s *prio_splay_tree_node; ++typedef struct prio_splay_tree_s *prio_splay_tree; ++typedef struct prio_splay_tree_key_s *prio_splay_tree_key; ++struct prio_splay_tree_key_s { ++ /* This structure must only containing a priority_list, as we cast ++ prio_splay_tree_key to priority_list throughout. */ ++ struct priority_list l; ++}; ++#define splay_tree_prefix prio ++#include "splay-tree.h" ++ ++/* The entry point into a priority queue of tasks. ++ ++ There are two alternate implementations with which to store tasks: ++ as a balanced tree of sorts, or as a simple list of tasks. If ++ there are only priority-0 items (ROOT is NULL), we use the simple ++ list, otherwise (ROOT is non-NULL) we use the tree. */ ++ ++struct priority_queue ++{ ++ /* If t.root != NULL, this is a splay tree of priority_lists to hold ++ all tasks. This is only used if multiple priorities are in play, ++ otherwise we use the priority_list `l' below to hold all ++ (priority-0) tasks. */ ++ struct prio_splay_tree_s t; ++ ++ /* If T above is NULL, only priority-0 items exist, so keep them ++ in a simple list. */ ++ struct priority_list l; ++}; ++ ++enum priority_insert_type { ++ /* Insert at the beginning of a priority list. */ ++ PRIORITY_INSERT_BEGIN, ++ /* Insert at the end of a priority list. */ ++ PRIORITY_INSERT_END ++}; ++ ++/* Used to determine in which queue a given priority node belongs in. ++ See pnode field of gomp_task. */ ++ ++enum priority_queue_type ++{ ++ PQ_TEAM, /* Node belongs in gomp_team's task_queue. */ ++ PQ_CHILDREN, /* Node belongs in parent's children_queue. */ ++ PQ_TASKGROUP, /* Node belongs in taskgroup->taskgroup_queue. */ ++ PQ_IGNORED = 999 ++}; ++ ++/* Priority queue implementation prototypes. */ ++ ++extern bool priority_queue_task_in_queue_p (enum priority_queue_type, ++ struct priority_queue *, ++ struct gomp_task *); ++extern void priority_queue_dump (enum priority_queue_type, ++ struct priority_queue *); ++extern void priority_queue_verify (enum priority_queue_type, ++ struct priority_queue *, bool); ++extern void priority_tree_remove (enum priority_queue_type, ++ struct priority_queue *, ++ struct priority_node *); ++extern struct gomp_task *priority_tree_next_task (enum priority_queue_type, ++ struct priority_queue *, ++ enum priority_queue_type, ++ struct priority_queue *, ++ bool *); ++ ++/* Return TRUE if there is more than one priority in HEAD. This is ++ used throughout to to choose between the fast path (priority 0 only ++ items) and a world with multiple priorities. */ ++ ++static inline bool ++priority_queue_multi_p (struct priority_queue *head) ++{ ++ return __builtin_expect (head->t.root != NULL, 0); ++} ++ ++/* Initialize a priority queue. */ ++ ++static inline void ++priority_queue_init (struct priority_queue *head) ++{ ++ head->t.root = NULL; ++ /* To save a few microseconds, we don't initialize head->l.priority ++ to 0 here. It is implied that priority will be 0 if head->t.root ++ == NULL. ++ ++ priority_tree_insert() will fix this when we encounter multiple ++ priorities. */ ++ head->l.tasks = NULL; ++ head->l.last_parent_depends_on = NULL; ++} ++ ++static inline void ++priority_queue_free (struct priority_queue *head) ++{ ++ /* There's nothing to do, as tasks were freed as they were removed ++ in priority_queue_remove. */ ++} ++ ++/* Forward declarations. */ ++static inline size_t priority_queue_offset (enum priority_queue_type); ++static inline struct gomp_task *priority_node_to_task ++ (enum priority_queue_type, ++ struct priority_node *); ++static inline struct priority_node *task_to_priority_node ++ (enum priority_queue_type, ++ struct gomp_task *); ++ ++/* Return TRUE if priority queue HEAD is empty. ++ ++ MODEL IS MEMMODEL_ACQUIRE if we should use an acquire atomic to ++ read from the root of the queue, otherwise MEMMODEL_RELAXED if we ++ should use a plain load. */ ++ ++static inline _Bool ++priority_queue_empty_p (struct priority_queue *head, enum memmodel model) ++{ ++ /* Note: The acquire barriers on the loads here synchronize with ++ the write of a NULL in gomp_task_run_post_remove_parent. It is ++ not necessary that we synchronize with other non-NULL writes at ++ this point, but we must ensure that all writes to memory by a ++ child thread task work function are seen before we exit from ++ GOMP_taskwait. */ ++ if (priority_queue_multi_p (head)) ++ { ++ if (model == MEMMODEL_ACQUIRE) ++ return __atomic_load_n (&head->t.root, MEMMODEL_ACQUIRE) == NULL; ++ return head->t.root == NULL; ++ } ++ if (model == MEMMODEL_ACQUIRE) ++ return __atomic_load_n (&head->l.tasks, MEMMODEL_ACQUIRE) == NULL; ++ return head->l.tasks == NULL; ++} ++ ++/* Look for a given PRIORITY in HEAD. Return it if found, otherwise ++ return NULL. This only applies to the tree variant in HEAD. There ++ is no point in searching for priorities in HEAD->L. */ ++ ++static inline struct priority_list * ++priority_queue_lookup_priority (struct priority_queue *head, int priority) ++{ ++ if (head->t.root == NULL) ++ return NULL; ++ struct prio_splay_tree_key_s k; ++ k.l.priority = priority; ++ return (struct priority_list *) ++ prio_splay_tree_lookup (&head->t, &k); ++} ++ ++/* Insert task in DATA, with PRIORITY, in the priority list in LIST. ++ LIST contains items of type TYPE. ++ ++ If POS is PRIORITY_INSERT_BEGIN, the new task is inserted at the ++ top of its respective priority. If POS is PRIORITY_INSERT_END, the ++ task is inserted at the end of its priority. ++ ++ If ADJUST_PARENT_DEPENDS_ON is TRUE, LIST is a children queue, and ++ we must keep track of higher and lower priority WAITING tasks by ++ keeping the queue's last_parent_depends_on field accurate. This ++ only applies to the children queue, and the caller must ensure LIST ++ is a children queue in this case. ++ ++ If ADJUST_PARENT_DEPENDS_ON is TRUE, TASK_IS_PARENT_DEPENDS_ON is ++ set to the task's parent_depends_on field. If ++ ADJUST_PARENT_DEPENDS_ON is FALSE, this field is irrelevant. ++ ++ Return the new priority_node. */ ++ ++static inline void ++priority_list_insert (enum priority_queue_type type, ++ struct priority_list *list, ++ struct gomp_task *task, ++ int priority, ++ enum priority_insert_type pos, ++ bool adjust_parent_depends_on, ++ bool task_is_parent_depends_on) ++{ ++ struct priority_node *node = task_to_priority_node (type, task); ++ if (list->tasks) ++ { ++ /* If we are keeping track of higher/lower priority items, ++ but this is a lower priority WAITING task ++ (parent_depends_on != NULL), put it after all ready to ++ run tasks. See the comment in ++ priority_queue_upgrade_task for a visual on how tasks ++ should be organized. */ ++ if (adjust_parent_depends_on ++ && pos == PRIORITY_INSERT_BEGIN ++ && list->last_parent_depends_on ++ && !task_is_parent_depends_on) ++ { ++ struct priority_node *last_parent_depends_on ++ = list->last_parent_depends_on; ++ node->next = last_parent_depends_on->next; ++ node->prev = last_parent_depends_on; ++ } ++ /* Otherwise, put it at the top/bottom of the queue. */ ++ else ++ { ++ node->next = list->tasks; ++ node->prev = list->tasks->prev; ++ if (pos == PRIORITY_INSERT_BEGIN) ++ list->tasks = node; ++ } ++ node->next->prev = node; ++ node->prev->next = node; ++ } ++ else ++ { ++ node->next = node; ++ node->prev = node; ++ list->tasks = node; ++ } ++ if (adjust_parent_depends_on ++ && list->last_parent_depends_on == NULL ++ && task_is_parent_depends_on) ++ list->last_parent_depends_on = node; ++} ++ ++/* Tree version of priority_list_insert. */ ++ ++static inline void ++priority_tree_insert (enum priority_queue_type type, ++ struct priority_queue *head, ++ struct gomp_task *task, ++ int priority, ++ enum priority_insert_type pos, ++ bool adjust_parent_depends_on, ++ bool task_is_parent_depends_on) ++{ ++ if (__builtin_expect (head->t.root == NULL, 0)) ++ { ++ /* The first time around, transfer any priority 0 items to the ++ tree. */ ++ if (head->l.tasks != NULL) ++ { ++ prio_splay_tree_node k = gomp_malloc (sizeof (*k)); ++ k->left = NULL; ++ k->right = NULL; ++ k->key.l.priority = 0; ++ k->key.l.tasks = head->l.tasks; ++ k->key.l.last_parent_depends_on = head->l.last_parent_depends_on; ++ prio_splay_tree_insert (&head->t, k); ++ head->l.tasks = NULL; ++ } ++ } ++ struct priority_list *list ++ = priority_queue_lookup_priority (head, priority); ++ if (!list) ++ { ++ prio_splay_tree_node k = gomp_malloc (sizeof (*k)); ++ k->left = NULL; ++ k->right = NULL; ++ k->key.l.priority = priority; ++ k->key.l.tasks = NULL; ++ k->key.l.last_parent_depends_on = NULL; ++ prio_splay_tree_insert (&head->t, k); ++ list = &k->key.l; ++ } ++ priority_list_insert (type, list, task, priority, pos, ++ adjust_parent_depends_on, ++ task_is_parent_depends_on); ++} ++ ++/* Generic version of priority_*_insert. */ ++ ++static inline void ++priority_queue_insert (enum priority_queue_type type, ++ struct priority_queue *head, ++ struct gomp_task *task, ++ int priority, ++ enum priority_insert_type pos, ++ bool adjust_parent_depends_on, ++ bool task_is_parent_depends_on) ++{ ++#if _LIBGOMP_CHECKING_ ++ if (priority_queue_task_in_queue_p (type, head, task)) ++ gomp_fatal ("Attempt to insert existing task %p", task); ++#endif ++ if (priority_queue_multi_p (head) || __builtin_expect (priority > 0, 0)) ++ priority_tree_insert (type, head, task, priority, pos, ++ adjust_parent_depends_on, ++ task_is_parent_depends_on); ++ else ++ priority_list_insert (type, &head->l, task, priority, pos, ++ adjust_parent_depends_on, ++ task_is_parent_depends_on); ++} ++ ++/* If multiple priorities are in play, return the highest priority ++ task from within Q1 and Q2, while giving preference to tasks from ++ Q1. If the returned task is chosen from Q1, *Q1_CHOSEN_P is set to ++ TRUE, otherwise it is set to FALSE. ++ ++ If multiple priorities are not in play (only 0 priorities are ++ available), the next task is chosen exclusively from Q1. ++ ++ As a special case, Q2 can be NULL, in which case, we just choose ++ the highest priority WAITING task in Q1. This is an optimization ++ to speed up looking through only one queue. ++ ++ We assume Q1 has at least one item. */ ++ ++static inline struct gomp_task * ++priority_queue_next_task (enum priority_queue_type t1, ++ struct priority_queue *q1, ++ enum priority_queue_type t2, ++ struct priority_queue *q2, ++ bool *q1_chosen_p) ++{ ++#if _LIBGOMP_CHECKING_ ++ if (priority_queue_empty_p (q1, MEMMODEL_RELAXED)) ++ gomp_fatal ("priority_queue_next_task: Q1 is empty"); ++#endif ++ if (priority_queue_multi_p (q1)) ++ { ++ struct gomp_task *t ++ = priority_tree_next_task (t1, q1, t2, q2, q1_chosen_p); ++ /* If T is NULL, there are no WAITING tasks in Q1. In which ++ case, return any old (non-waiting) task which will cause the ++ caller to do the right thing when checking T->KIND == ++ GOMP_TASK_WAITING. */ ++ if (!t) ++ { ++#if _LIBGOMP_CHECKING_ ++ if (*q1_chosen_p == false) ++ gomp_fatal ("priority_queue_next_task inconsistency"); ++#endif ++ return priority_node_to_task (t1, q1->t.root->key.l.tasks); ++ } ++ return t; ++ } ++ else ++ { ++ *q1_chosen_p = true; ++ return priority_node_to_task (t1, q1->l.tasks); ++ } ++} ++ ++/* Remove NODE from LIST. ++ ++ If we are removing the one and only item in the list, and MODEL is ++ MEMMODEL_RELEASE, use an atomic release to clear the list. ++ ++ If the list becomes empty after the remove, return TRUE. */ ++ ++static inline bool ++priority_list_remove (struct priority_list *list, ++ struct priority_node *node, ++ enum memmodel model) ++{ ++ bool empty = false; ++ node->prev->next = node->next; ++ node->next->prev = node->prev; ++ if (list->tasks == node) ++ { ++ if (node->next != node) ++ list->tasks = node->next; ++ else ++ { ++ /* We access task->children in GOMP_taskwait outside of ++ the task lock mutex region, so need a release barrier ++ here to ensure memory written by child_task->fn above ++ is flushed before the NULL is written. */ ++ if (model == MEMMODEL_RELEASE) ++ __atomic_store_n (&list->tasks, NULL, MEMMODEL_RELEASE); ++ else ++ list->tasks = NULL; ++ empty = true; ++ goto remove_out; ++ } ++ } ++remove_out: ++#if _LIBGOMP_CHECKING_ ++ memset (node, 0xaf, sizeof (*node)); ++#endif ++ return empty; ++} ++ ++/* This is the generic version of priority_list_remove. ++ ++ Remove NODE from priority queue HEAD. HEAD contains tasks of type TYPE. ++ ++ If we are removing the one and only item in the priority queue and ++ MODEL is MEMMODEL_RELEASE, use an atomic release to clear the queue. ++ ++ If the queue becomes empty after the remove, return TRUE. */ ++ ++static inline bool ++priority_queue_remove (enum priority_queue_type type, ++ struct priority_queue *head, ++ struct gomp_task *task, ++ enum memmodel model) ++{ ++#if _LIBGOMP_CHECKING_ ++ if (!priority_queue_task_in_queue_p (type, head, task)) ++ gomp_fatal ("Attempt to remove missing task %p", task); ++#endif ++ if (priority_queue_multi_p (head)) ++ { ++ priority_tree_remove (type, head, task_to_priority_node (type, task)); ++ if (head->t.root == NULL) ++ { ++ if (model == MEMMODEL_RELEASE) ++ /* Errr, we store NULL twice, the alternative would be to ++ use an atomic release directly in the splay tree ++ routines. Worth it? */ ++ __atomic_store_n (&head->t.root, NULL, MEMMODEL_RELEASE); ++ return true; ++ } ++ return false; ++ } ++ else ++ return priority_list_remove (&head->l, ++ task_to_priority_node (type, task), model); ++} ++ ++#endif /* _PRIORITY_QUEUE_H_ */ +--- libgomp/priority_queue.c.jj 2016-07-13 16:57:04.435535360 +0200 ++++ libgomp/priority_queue.c 2016-07-13 16:57:04.435535360 +0200 +@@ -0,0 +1,300 @@ ++/* Copyright (C) 2015-2016 Free Software Foundation, Inc. ++ Contributed by Aldy Hernandez . ++ ++ This file is part of the GNU Offloading and Multi Processing Library ++ (libgomp). ++ ++ Libgomp is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY ++ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS ++ FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ . */ ++ ++/* Priority queue implementation of GOMP tasks. */ ++ ++#include "libgomp.h" ++ ++#if _LIBGOMP_CHECKING_ ++#include ++ ++/* Sanity check to verify whether a TASK is in LIST. Return TRUE if ++ found, FALSE otherwise. ++ ++ TYPE is the type of priority queue this task resides in. */ ++ ++static inline bool ++priority_queue_task_in_list_p (enum priority_queue_type type, ++ struct priority_list *list, ++ struct gomp_task *task) ++{ ++ struct priority_node *p = list->tasks; ++ do ++ { ++ if (priority_node_to_task (type, p) == task) ++ return true; ++ p = p->next; ++ } ++ while (p != list->tasks); ++ return false; ++} ++ ++/* Tree version of priority_queue_task_in_list_p. */ ++ ++static inline bool ++priority_queue_task_in_tree_p (enum priority_queue_type type, ++ struct priority_queue *head, ++ struct gomp_task *task) ++{ ++ struct priority_list *list ++ = priority_queue_lookup_priority (head, task->priority); ++ if (!list) ++ return false; ++ return priority_queue_task_in_list_p (type, list, task); ++} ++ ++/* Generic version of priority_queue_task_in_list_p that works for ++ trees or lists. */ ++ ++bool ++priority_queue_task_in_queue_p (enum priority_queue_type type, ++ struct priority_queue *head, ++ struct gomp_task *task) ++{ ++ if (priority_queue_empty_p (head, MEMMODEL_RELAXED)) ++ return false; ++ if (priority_queue_multi_p (head)) ++ return priority_queue_task_in_tree_p (type, head, task); ++ else ++ return priority_queue_task_in_list_p (type, &head->l, task); ++} ++ ++/* Sanity check LIST to make sure the tasks therein are in the right ++ order. LIST is a priority list of type TYPE. ++ ++ The expected order is that GOMP_TASK_WAITING tasks come before ++ GOMP_TASK_TIED/GOMP_TASK_ASYNC_RUNNING ones. ++ ++ If CHECK_DEPS is TRUE, we also check that parent_depends_on WAITING ++ tasks come before !parent_depends_on WAITING tasks. This is only ++ applicable to the children queue, and the caller is expected to ++ ensure that we are verifying the children queue. */ ++ ++static void ++priority_list_verify (enum priority_queue_type type, ++ struct priority_list *list, bool check_deps) ++{ ++ bool seen_tied = false; ++ bool seen_plain_waiting = false; ++ struct priority_node *p = list->tasks; ++ while (1) ++ { ++ struct gomp_task *t = priority_node_to_task (type, p); ++ if (seen_tied && t->kind == GOMP_TASK_WAITING) ++ gomp_fatal ("priority_queue_verify: WAITING task after TIED"); ++ if (t->kind >= GOMP_TASK_TIED) ++ seen_tied = true; ++ else if (check_deps && t->kind == GOMP_TASK_WAITING) ++ { ++ if (t->parent_depends_on) ++ { ++ if (seen_plain_waiting) ++ gomp_fatal ("priority_queue_verify: " ++ "parent_depends_on after !parent_depends_on"); ++ } ++ else ++ seen_plain_waiting = true; ++ } ++ p = p->next; ++ if (p == list->tasks) ++ break; ++ } ++} ++ ++/* Callback type for priority_tree_verify_callback. */ ++struct cbtype ++{ ++ enum priority_queue_type type; ++ bool check_deps; ++}; ++ ++/* Verify every task in NODE. ++ ++ Callback for splay_tree_foreach. */ ++ ++static void ++priority_tree_verify_callback (prio_splay_tree_key key, void *data) ++{ ++ struct cbtype *cb = (struct cbtype *) data; ++ priority_list_verify (cb->type, &key->l, cb->check_deps); ++} ++ ++/* Generic version of priority_list_verify. ++ ++ Sanity check HEAD to make sure the tasks therein are in the right ++ order. The priority_queue holds tasks of type TYPE. ++ ++ If CHECK_DEPS is TRUE, we also check that parent_depends_on WAITING ++ tasks come before !parent_depends_on WAITING tasks. This is only ++ applicable to the children queue, and the caller is expected to ++ ensure that we are verifying the children queue. */ ++ ++void ++priority_queue_verify (enum priority_queue_type type, ++ struct priority_queue *head, bool check_deps) ++{ ++ if (priority_queue_empty_p (head, MEMMODEL_RELAXED)) ++ return; ++ if (priority_queue_multi_p (head)) ++ { ++ struct cbtype cb = { type, check_deps }; ++ prio_splay_tree_foreach (&head->t, ++ priority_tree_verify_callback, &cb); ++ } ++ else ++ priority_list_verify (type, &head->l, check_deps); ++} ++#endif /* _LIBGOMP_CHECKING_ */ ++ ++/* Remove NODE from priority queue HEAD, wherever it may be inside the ++ tree. HEAD contains tasks of type TYPE. */ ++ ++void ++priority_tree_remove (enum priority_queue_type type, ++ struct priority_queue *head, ++ struct priority_node *node) ++{ ++ /* ?? The only reason this function is not inlined is because we ++ need to find the priority within gomp_task (which has not been ++ completely defined in the header file). If the lack of inlining ++ is a concern, we could pass the priority number as a ++ parameter, or we could move this to libgomp.h. */ ++ int priority = priority_node_to_task (type, node)->priority; ++ ++ /* ?? We could avoid this lookup by keeping a pointer to the key in ++ the priority_node. */ ++ struct priority_list *list ++ = priority_queue_lookup_priority (head, priority); ++#if _LIBGOMP_CHECKING_ ++ if (!list) ++ gomp_fatal ("Unable to find priority %d", priority); ++#endif ++ /* If NODE was the last in its priority, clean up the priority. */ ++ if (priority_list_remove (list, node, MEMMODEL_RELAXED)) ++ { ++ prio_splay_tree_remove (&head->t, (prio_splay_tree_key) list); ++ list->tasks = NULL; ++#if _LIBGOMP_CHECKING_ ++ memset (list, 0xaf, sizeof (*list)); ++#endif ++ free (list); ++ } ++} ++ ++/* Return the highest priority WAITING task in a splay tree NODE. If ++ there are no WAITING tasks available, return NULL. ++ ++ NODE is a priority list containing tasks of type TYPE. ++ ++ The right most node in a tree contains the highest priority. ++ Recurse down to find such a node. If the task at that max node is ++ not WAITING, bubble back up and look at the remaining tasks ++ in-order. */ ++ ++static struct gomp_task * ++priority_tree_next_task_1 (enum priority_queue_type type, ++ prio_splay_tree_node node) ++{ ++ again: ++ if (!node) ++ return NULL; ++ struct gomp_task *ret = priority_tree_next_task_1 (type, node->right); ++ if (ret) ++ return ret; ++ ret = priority_node_to_task (type, node->key.l.tasks); ++ if (ret->kind == GOMP_TASK_WAITING) ++ return ret; ++ node = node->left; ++ goto again; ++} ++ ++/* Return the highest priority WAITING task from within Q1 and Q2, ++ while giving preference to tasks from Q1. Q1 is a queue containing ++ items of type TYPE1. Q2 is a queue containing items of type TYPE2. ++ ++ Since we are mostly interested in Q1, if there are no WAITING tasks ++ in Q1, we don't bother checking Q2, and just return NULL. ++ ++ As a special case, Q2 can be NULL, in which case, we just choose ++ the highest priority WAITING task in Q1. This is an optimization ++ to speed up looking through only one queue. ++ ++ If the returned task is chosen from Q1, *Q1_CHOSEN_P is set to ++ TRUE, otherwise it is set to FALSE. */ ++ ++struct gomp_task * ++priority_tree_next_task (enum priority_queue_type type1, ++ struct priority_queue *q1, ++ enum priority_queue_type type2, ++ struct priority_queue *q2, ++ bool *q1_chosen_p) ++{ ++ struct gomp_task *t1 = priority_tree_next_task_1 (type1, q1->t.root); ++ if (!t1 ++ /* Special optimization when only searching through one queue. */ ++ || !q2) ++ { ++ *q1_chosen_p = true; ++ return t1; ++ } ++ struct gomp_task *t2 = priority_tree_next_task_1 (type2, q2->t.root); ++ if (!t2 || t1->priority > t2->priority) ++ { ++ *q1_chosen_p = true; ++ return t1; ++ } ++ if (t2->priority > t1->priority) ++ { ++ *q1_chosen_p = false; ++ return t2; ++ } ++ /* If we get here, the priorities are the same, so we must look at ++ parent_depends_on to make our decision. */ ++#if _LIBGOMP_CHECKING_ ++ if (t1 != t2) ++ gomp_fatal ("priority_tree_next_task: t1 != t2"); ++#endif ++ if (t2->parent_depends_on && !t1->parent_depends_on) ++ { ++ *q1_chosen_p = false; ++ return t2; ++ } ++ *q1_chosen_p = true; ++ return t1; ++} ++ ++/* Priority splay trees comparison function. */ ++static inline int ++prio_splay_compare (prio_splay_tree_key x, prio_splay_tree_key y) ++{ ++ if (x->l.priority == y->l.priority) ++ return 0; ++ return x->l.priority < y->l.priority ? -1 : 1; ++} ++ ++/* Define another splay tree instantiation, for priority_list's. */ ++#define splay_tree_prefix prio ++#define splay_tree_c ++#include "splay-tree.h" +--- libgomp/openacc.f90.jj 2016-07-13 16:57:04.434535373 +0200 ++++ libgomp/openacc.f90 2016-07-14 19:01:54.901230875 +0200 +@@ -0,0 +1,911 @@ ++! OpenACC Runtime Library Definitions. ++ ++! Copyright (C) 2014-2016 Free Software Foundation, Inc. ++ ++! Contributed by Tobias Burnus ++! and Mentor Embedded. ++ ++! This file is part of the GNU Offloading and Multi Processing Library ++! (libgomp). ++ ++! Libgomp is free software; you can redistribute it and/or modify it ++! under the terms of the GNU General Public License as published by ++! the Free Software Foundation; either version 3, or (at your option) ++! any later version. ++ ++! Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY ++! WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS ++! FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++! more details. ++ ++! Under Section 7 of GPL version 3, you are granted additional ++! permissions described in the GCC Runtime Library Exception, version ++! 3.1, as published by the Free Software Foundation. ++ ++! You should have received a copy of the GNU General Public License and ++! a copy of the GCC Runtime Library Exception along with this program; ++! see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++! . ++ ++module openacc_kinds ++ use iso_fortran_env, only: int32 ++ implicit none ++ ++ private :: int32 ++ public :: acc_device_kind ++ ++ integer, parameter :: acc_device_kind = int32 ++ ++ public :: acc_device_none, acc_device_default, acc_device_host ++ public :: acc_device_not_host, acc_device_nvidia ++ ++ ! Keep in sync with include/gomp-constants.h. ++ integer (acc_device_kind), parameter :: acc_device_none = 0 ++ integer (acc_device_kind), parameter :: acc_device_default = 1 ++ integer (acc_device_kind), parameter :: acc_device_host = 2 ++ ! integer (acc_device_kind), parameter :: acc_device_host_nonshm = 3 removed. ++ integer (acc_device_kind), parameter :: acc_device_not_host = 4 ++ integer (acc_device_kind), parameter :: acc_device_nvidia = 5 ++ ++ public :: acc_handle_kind ++ ++ integer, parameter :: acc_handle_kind = int32 ++ ++ public :: acc_async_noval, acc_async_sync ++ ++ ! Keep in sync with include/gomp-constants.h. ++ integer (acc_handle_kind), parameter :: acc_async_noval = -1 ++ integer (acc_handle_kind), parameter :: acc_async_sync = -2 ++ ++end module ++ ++module openacc_internal ++ use openacc_kinds ++ implicit none ++ ++ interface ++ function acc_get_num_devices_h (d) ++ import ++ integer acc_get_num_devices_h ++ integer (acc_device_kind) d ++ end function ++ ++ subroutine acc_set_device_type_h (d) ++ import ++ integer (acc_device_kind) d ++ end subroutine ++ ++ function acc_get_device_type_h () ++ import ++ integer (acc_device_kind) acc_get_device_type_h ++ end function ++ ++ subroutine acc_set_device_num_h (n, d) ++ import ++ integer n ++ integer (acc_device_kind) d ++ end subroutine ++ ++ function acc_get_device_num_h (d) ++ import ++ integer acc_get_device_num_h ++ integer (acc_device_kind) d ++ end function ++ ++ function acc_async_test_h (a) ++ logical acc_async_test_h ++ integer a ++ end function ++ ++ function acc_async_test_all_h () ++ logical acc_async_test_all_h ++ end function ++ ++ subroutine acc_wait_h (a) ++ integer a ++ end subroutine ++ ++ subroutine acc_wait_async_h (a1, a2) ++ integer a1, a2 ++ end subroutine ++ ++ subroutine acc_wait_all_h () ++ end subroutine ++ ++ subroutine acc_wait_all_async_h (a) ++ integer a ++ end subroutine ++ ++ subroutine acc_init_h (d) ++ import ++ integer (acc_device_kind) d ++ end subroutine ++ ++ subroutine acc_shutdown_h (d) ++ import ++ integer (acc_device_kind) d ++ end subroutine ++ ++ function acc_on_device_h (d) ++ import ++ integer (acc_device_kind) d ++ logical acc_on_device_h ++ end function ++ ++ subroutine acc_copyin_32_h (a, len) ++ use iso_c_binding, only: c_int32_t ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ end subroutine ++ ++ subroutine acc_copyin_64_h (a, len) ++ use iso_c_binding, only: c_int64_t ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ end subroutine ++ ++ subroutine acc_copyin_array_h (a) ++ type (*), dimension (..), contiguous :: a ++ end subroutine ++ ++ subroutine acc_present_or_copyin_32_h (a, len) ++ use iso_c_binding, only: c_int32_t ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ end subroutine ++ ++ subroutine acc_present_or_copyin_64_h (a, len) ++ use iso_c_binding, only: c_int64_t ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ end subroutine ++ ++ subroutine acc_present_or_copyin_array_h (a) ++ type (*), dimension (..), contiguous :: a ++ end subroutine ++ ++ subroutine acc_create_32_h (a, len) ++ use iso_c_binding, only: c_int32_t ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ end subroutine ++ ++ subroutine acc_create_64_h (a, len) ++ use iso_c_binding, only: c_int64_t ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ end subroutine ++ ++ subroutine acc_create_array_h (a) ++ type (*), dimension (..), contiguous :: a ++ end subroutine ++ ++ subroutine acc_present_or_create_32_h (a, len) ++ use iso_c_binding, only: c_int32_t ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ end subroutine ++ ++ subroutine acc_present_or_create_64_h (a, len) ++ use iso_c_binding, only: c_int64_t ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ end subroutine ++ ++ subroutine acc_present_or_create_array_h (a) ++ type (*), dimension (..), contiguous :: a ++ end subroutine ++ ++ subroutine acc_copyout_32_h (a, len) ++ use iso_c_binding, only: c_int32_t ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ end subroutine ++ ++ subroutine acc_copyout_64_h (a, len) ++ use iso_c_binding, only: c_int64_t ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ end subroutine ++ ++ subroutine acc_copyout_array_h (a) ++ type (*), dimension (..), contiguous :: a ++ end subroutine ++ ++ subroutine acc_delete_32_h (a, len) ++ use iso_c_binding, only: c_int32_t ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ end subroutine ++ ++ subroutine acc_delete_64_h (a, len) ++ use iso_c_binding, only: c_int64_t ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ end subroutine ++ ++ subroutine acc_delete_array_h (a) ++ type (*), dimension (..), contiguous :: a ++ end subroutine ++ ++ subroutine acc_update_device_32_h (a, len) ++ use iso_c_binding, only: c_int32_t ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ end subroutine ++ ++ subroutine acc_update_device_64_h (a, len) ++ use iso_c_binding, only: c_int64_t ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ end subroutine ++ ++ subroutine acc_update_device_array_h (a) ++ type (*), dimension (..), contiguous :: a ++ end subroutine ++ ++ subroutine acc_update_self_32_h (a, len) ++ use iso_c_binding, only: c_int32_t ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ end subroutine ++ ++ subroutine acc_update_self_64_h (a, len) ++ use iso_c_binding, only: c_int64_t ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ end subroutine ++ ++ subroutine acc_update_self_array_h (a) ++ type (*), dimension (..), contiguous :: a ++ end subroutine ++ ++ function acc_is_present_32_h (a, len) ++ use iso_c_binding, only: c_int32_t ++ logical acc_is_present_32_h ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ end function ++ ++ function acc_is_present_64_h (a, len) ++ use iso_c_binding, only: c_int64_t ++ logical acc_is_present_64_h ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ end function ++ ++ function acc_is_present_array_h (a) ++ logical acc_is_present_array_h ++ type (*), dimension (..), contiguous :: a ++ end function ++ end interface ++ ++ interface ++ function acc_get_num_devices_l (d) & ++ bind (C, name = "acc_get_num_devices") ++ use iso_c_binding, only: c_int ++ integer (c_int) :: acc_get_num_devices_l ++ integer (c_int), value :: d ++ end function ++ ++ subroutine acc_set_device_type_l (d) & ++ bind (C, name = "acc_set_device_type") ++ use iso_c_binding, only: c_int ++ integer (c_int), value :: d ++ end subroutine ++ ++ function acc_get_device_type_l () & ++ bind (C, name = "acc_get_device_type") ++ use iso_c_binding, only: c_int ++ integer (c_int) :: acc_get_device_type_l ++ end function ++ ++ subroutine acc_set_device_num_l (n, d) & ++ bind (C, name = "acc_set_device_num") ++ use iso_c_binding, only: c_int ++ integer (c_int), value :: n, d ++ end subroutine ++ ++ function acc_get_device_num_l (d) & ++ bind (C, name = "acc_get_device_num") ++ use iso_c_binding, only: c_int ++ integer (c_int) :: acc_get_device_num_l ++ integer (c_int), value :: d ++ end function ++ ++ function acc_async_test_l (a) & ++ bind (C, name = "acc_async_test") ++ use iso_c_binding, only: c_int ++ integer (c_int) :: acc_async_test_l ++ integer (c_int), value :: a ++ end function ++ ++ function acc_async_test_all_l () & ++ bind (C, name = "acc_async_test_all") ++ use iso_c_binding, only: c_int ++ integer (c_int) :: acc_async_test_all_l ++ end function ++ ++ subroutine acc_wait_l (a) & ++ bind (C, name = "acc_wait") ++ use iso_c_binding, only: c_int ++ integer (c_int), value :: a ++ end subroutine ++ ++ subroutine acc_wait_async_l (a1, a2) & ++ bind (C, name = "acc_wait_async") ++ use iso_c_binding, only: c_int ++ integer (c_int), value :: a1, a2 ++ end subroutine ++ ++ subroutine acc_wait_all_l () & ++ bind (C, name = "acc_wait_all") ++ use iso_c_binding, only: c_int ++ end subroutine ++ ++ subroutine acc_wait_all_async_l (a) & ++ bind (C, name = "acc_wait_all_async") ++ use iso_c_binding, only: c_int ++ integer (c_int), value :: a ++ end subroutine ++ ++ subroutine acc_init_l (d) & ++ bind (C, name = "acc_init") ++ use iso_c_binding, only: c_int ++ integer (c_int), value :: d ++ end subroutine ++ ++ subroutine acc_shutdown_l (d) & ++ bind (C, name = "acc_shutdown") ++ use iso_c_binding, only: c_int ++ integer (c_int), value :: d ++ end subroutine ++ ++ function acc_on_device_l (d) & ++ bind (C, name = "acc_on_device") ++ use iso_c_binding, only: c_int ++ integer (c_int) :: acc_on_device_l ++ integer (c_int), value :: d ++ end function ++ ++ subroutine acc_copyin_l (a, len) & ++ bind (C, name = "acc_copyin") ++ use iso_c_binding, only: c_size_t ++ type (*), dimension (*) :: a ++ integer (c_size_t), value :: len ++ end subroutine ++ ++ subroutine acc_present_or_copyin_l (a, len) & ++ bind (C, name = "acc_present_or_copyin") ++ use iso_c_binding, only: c_size_t ++ type (*), dimension (*) :: a ++ integer (c_size_t), value :: len ++ end subroutine ++ ++ subroutine acc_create_l (a, len) & ++ bind (C, name = "acc_create") ++ use iso_c_binding, only: c_size_t ++ type (*), dimension (*) :: a ++ integer (c_size_t), value :: len ++ end subroutine ++ ++ subroutine acc_present_or_create_l (a, len) & ++ bind (C, name = "acc_present_or_create") ++ use iso_c_binding, only: c_size_t ++ type (*), dimension (*) :: a ++ integer (c_size_t), value :: len ++ end subroutine ++ ++ subroutine acc_copyout_l (a, len) & ++ bind (C, name = "acc_copyout") ++ use iso_c_binding, only: c_size_t ++ type (*), dimension (*) :: a ++ integer (c_size_t), value :: len ++ end subroutine ++ ++ subroutine acc_delete_l (a, len) & ++ bind (C, name = "acc_delete") ++ use iso_c_binding, only: c_size_t ++ type (*), dimension (*) :: a ++ integer (c_size_t), value :: len ++ end subroutine ++ ++ subroutine acc_update_device_l (a, len) & ++ bind (C, name = "acc_update_device") ++ use iso_c_binding, only: c_size_t ++ type (*), dimension (*) :: a ++ integer (c_size_t), value :: len ++ end subroutine ++ ++ subroutine acc_update_self_l (a, len) & ++ bind (C, name = "acc_update_self") ++ use iso_c_binding, only: c_size_t ++ type (*), dimension (*) :: a ++ integer (c_size_t), value :: len ++ end subroutine ++ ++ function acc_is_present_l (a, len) & ++ bind (C, name = "acc_is_present") ++ use iso_c_binding, only: c_int32_t, c_size_t ++ integer (c_int32_t) :: acc_is_present_l ++ type (*), dimension (*) :: a ++ integer (c_size_t), value :: len ++ end function ++ end interface ++end module ++ ++module openacc ++ use openacc_kinds ++ use openacc_internal ++ implicit none ++ ++ public :: openacc_version ++ ++ public :: acc_get_num_devices, acc_set_device_type, acc_get_device_type ++ public :: acc_set_device_num, acc_get_device_num, acc_async_test ++ public :: acc_async_test_all, acc_wait, acc_wait_async, acc_wait_all ++ public :: acc_wait_all_async, acc_init, acc_shutdown, acc_on_device ++ public :: acc_copyin, acc_present_or_copyin, acc_pcopyin, acc_create ++ public :: acc_present_or_create, acc_pcreate, acc_copyout, acc_delete ++ public :: acc_update_device, acc_update_self, acc_is_present ++ ++ integer, parameter :: openacc_version = 201306 ++ ++ interface acc_get_num_devices ++ procedure :: acc_get_num_devices_h ++ end interface ++ ++ interface acc_set_device_type ++ procedure :: acc_set_device_type_h ++ end interface ++ ++ interface acc_get_device_type ++ procedure :: acc_get_device_type_h ++ end interface ++ ++ interface acc_set_device_num ++ procedure :: acc_set_device_num_h ++ end interface ++ ++ interface acc_get_device_num ++ procedure :: acc_get_device_num_h ++ end interface ++ ++ interface acc_async_test ++ procedure :: acc_async_test_h ++ end interface ++ ++ interface acc_async_test_all ++ procedure :: acc_async_test_all_h ++ end interface ++ ++ interface acc_wait ++ procedure :: acc_wait_h ++ end interface ++ ++ interface acc_wait_async ++ procedure :: acc_wait_async_h ++ end interface ++ ++ interface acc_wait_all ++ procedure :: acc_wait_all_h ++ end interface ++ ++ interface acc_wait_all_async ++ procedure :: acc_wait_all_async_h ++ end interface ++ ++ interface acc_init ++ procedure :: acc_init_h ++ end interface ++ ++ interface acc_shutdown ++ procedure :: acc_shutdown_h ++ end interface ++ ++ interface acc_on_device ++ procedure :: acc_on_device_h ++ end interface ++ ++ ! acc_malloc: Only available in C/C++ ++ ! acc_free: Only available in C/C++ ++ ++ ! As vendor extension, the following code supports both 32bit and 64bit ++ ! arguments for "size"; the OpenACC standard only permits default-kind ++ ! integers, which are of kind 4 (i.e. 32 bits). ++ ! Additionally, the two-argument version also takes arrays as argument. ++ ! and the one argument version also scalars. Note that the code assumes ++ ! that the arrays are contiguous. ++ ++ interface acc_copyin ++ procedure :: acc_copyin_32_h ++ procedure :: acc_copyin_64_h ++ procedure :: acc_copyin_array_h ++ end interface ++ ++ interface acc_present_or_copyin ++ procedure :: acc_present_or_copyin_32_h ++ procedure :: acc_present_or_copyin_64_h ++ procedure :: acc_present_or_copyin_array_h ++ end interface ++ ++ interface acc_pcopyin ++ procedure :: acc_present_or_copyin_32_h ++ procedure :: acc_present_or_copyin_64_h ++ procedure :: acc_present_or_copyin_array_h ++ end interface ++ ++ interface acc_create ++ procedure :: acc_create_32_h ++ procedure :: acc_create_64_h ++ procedure :: acc_create_array_h ++ end interface ++ ++ interface acc_present_or_create ++ procedure :: acc_present_or_create_32_h ++ procedure :: acc_present_or_create_64_h ++ procedure :: acc_present_or_create_array_h ++ end interface ++ ++ interface acc_pcreate ++ procedure :: acc_present_or_create_32_h ++ procedure :: acc_present_or_create_64_h ++ procedure :: acc_present_or_create_array_h ++ end interface ++ ++ interface acc_copyout ++ procedure :: acc_copyout_32_h ++ procedure :: acc_copyout_64_h ++ procedure :: acc_copyout_array_h ++ end interface ++ ++ interface acc_delete ++ procedure :: acc_delete_32_h ++ procedure :: acc_delete_64_h ++ procedure :: acc_delete_array_h ++ end interface ++ ++ interface acc_update_device ++ procedure :: acc_update_device_32_h ++ procedure :: acc_update_device_64_h ++ procedure :: acc_update_device_array_h ++ end interface ++ ++ interface acc_update_self ++ procedure :: acc_update_self_32_h ++ procedure :: acc_update_self_64_h ++ procedure :: acc_update_self_array_h ++ end interface ++ ++ ! acc_map_data: Only available in C/C++ ++ ! acc_unmap_data: Only available in C/C++ ++ ! acc_deviceptr: Only available in C/C++ ++ ! acc_hostptr: Only available in C/C++ ++ ++ interface acc_is_present ++ procedure :: acc_is_present_32_h ++ procedure :: acc_is_present_64_h ++ procedure :: acc_is_present_array_h ++ end interface ++ ++ ! acc_memcpy_to_device: Only available in C/C++ ++ ! acc_memcpy_from_device: Only available in C/C++ ++ ++end module ++ ++function acc_get_num_devices_h (d) ++ use openacc_internal, only: acc_get_num_devices_l ++ use openacc_kinds ++ integer acc_get_num_devices_h ++ integer (acc_device_kind) d ++ acc_get_num_devices_h = acc_get_num_devices_l (d) ++end function ++ ++subroutine acc_set_device_type_h (d) ++ use openacc_internal, only: acc_set_device_type_l ++ use openacc_kinds ++ integer (acc_device_kind) d ++ call acc_set_device_type_l (d) ++end subroutine ++ ++function acc_get_device_type_h () ++ use openacc_internal, only: acc_get_device_type_l ++ use openacc_kinds ++ integer (acc_device_kind) acc_get_device_type_h ++ acc_get_device_type_h = acc_get_device_type_l () ++end function ++ ++subroutine acc_set_device_num_h (n, d) ++ use openacc_internal, only: acc_set_device_num_l ++ use openacc_kinds ++ integer n ++ integer (acc_device_kind) d ++ call acc_set_device_num_l (n, d) ++end subroutine ++ ++function acc_get_device_num_h (d) ++ use openacc_internal, only: acc_get_device_num_l ++ use openacc_kinds ++ integer acc_get_device_num_h ++ integer (acc_device_kind) d ++ acc_get_device_num_h = acc_get_device_num_l (d) ++end function ++ ++function acc_async_test_h (a) ++ use openacc_internal, only: acc_async_test_l ++ logical acc_async_test_h ++ integer a ++ if (acc_async_test_l (a) .eq. 1) then ++ acc_async_test_h = .TRUE. ++ else ++ acc_async_test_h = .FALSE. ++ end if ++end function ++ ++function acc_async_test_all_h () ++ use openacc_internal, only: acc_async_test_all_l ++ logical acc_async_test_all_h ++ if (acc_async_test_all_l () .eq. 1) then ++ acc_async_test_all_h = .TRUE. ++ else ++ acc_async_test_all_h = .FALSE. ++ end if ++end function ++ ++subroutine acc_wait_h (a) ++ use openacc_internal, only: acc_wait_l ++ integer a ++ call acc_wait_l (a) ++end subroutine ++ ++subroutine acc_wait_async_h (a1, a2) ++ use openacc_internal, only: acc_wait_async_l ++ integer a1, a2 ++ call acc_wait_async_l (a1, a2) ++end subroutine ++ ++subroutine acc_wait_all_h () ++ use openacc_internal, only: acc_wait_all_l ++ call acc_wait_all_l () ++end subroutine ++ ++subroutine acc_wait_all_async_h (a) ++ use openacc_internal, only: acc_wait_all_async_l ++ integer a ++ call acc_wait_all_async_l (a) ++end subroutine ++ ++subroutine acc_init_h (d) ++ use openacc_internal, only: acc_init_l ++ use openacc_kinds ++ integer (acc_device_kind) d ++ call acc_init_l (d) ++end subroutine ++ ++subroutine acc_shutdown_h (d) ++ use openacc_internal, only: acc_shutdown_l ++ use openacc_kinds ++ integer (acc_device_kind) d ++ call acc_shutdown_l (d) ++end subroutine ++ ++function acc_on_device_h (d) ++ use openacc_internal, only: acc_on_device_l ++ use openacc_kinds ++ integer (acc_device_kind) d ++ logical acc_on_device_h ++ if (acc_on_device_l (d) .eq. 1) then ++ acc_on_device_h = .TRUE. ++ else ++ acc_on_device_h = .FALSE. ++ end if ++end function ++ ++subroutine acc_copyin_32_h (a, len) ++ use iso_c_binding, only: c_int32_t, c_size_t ++ use openacc_internal, only: acc_copyin_l ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ call acc_copyin_l (a, int (len, kind = c_size_t)) ++end subroutine ++ ++subroutine acc_copyin_64_h (a, len) ++ use iso_c_binding, only: c_int64_t, c_size_t ++ use openacc_internal, only: acc_copyin_l ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ call acc_copyin_l (a, int (len, kind = c_size_t)) ++end subroutine ++ ++subroutine acc_copyin_array_h (a) ++ use openacc_internal, only: acc_copyin_l ++ type (*), dimension (..), contiguous :: a ++ call acc_copyin_l (a, sizeof (a)) ++end subroutine ++ ++subroutine acc_present_or_copyin_32_h (a, len) ++ use iso_c_binding, only: c_int32_t, c_size_t ++ use openacc_internal, only: acc_present_or_copyin_l ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ call acc_present_or_copyin_l (a, int (len, kind = c_size_t)) ++end subroutine ++ ++subroutine acc_present_or_copyin_64_h (a, len) ++ use iso_c_binding, only: c_int64_t, c_size_t ++ use openacc_internal, only: acc_present_or_copyin_l ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ call acc_present_or_copyin_l (a, int (len, kind = c_size_t)) ++end subroutine ++ ++subroutine acc_present_or_copyin_array_h (a) ++ use openacc_internal, only: acc_present_or_copyin_l ++ type (*), dimension (..), contiguous :: a ++ call acc_present_or_copyin_l (a, sizeof (a)) ++end subroutine ++ ++subroutine acc_create_32_h (a, len) ++ use iso_c_binding, only: c_int32_t, c_size_t ++ use openacc_internal, only: acc_create_l ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ call acc_create_l (a, int (len, kind = c_size_t)) ++end subroutine ++ ++subroutine acc_create_64_h (a, len) ++ use iso_c_binding, only: c_int64_t, c_size_t ++ use openacc_internal, only: acc_create_l ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ call acc_create_l (a, int (len, kind = c_size_t)) ++end subroutine ++ ++subroutine acc_create_array_h (a) ++ use openacc_internal, only: acc_create_l ++ type (*), dimension (..), contiguous :: a ++ call acc_create_l (a, sizeof (a)) ++end subroutine ++ ++subroutine acc_present_or_create_32_h (a, len) ++ use iso_c_binding, only: c_int32_t, c_size_t ++ use openacc_internal, only: acc_present_or_create_l ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ call acc_present_or_create_l (a, int (len, kind = c_size_t)) ++end subroutine ++ ++subroutine acc_present_or_create_64_h (a, len) ++ use iso_c_binding, only: c_int64_t, c_size_t ++ use openacc_internal, only: acc_present_or_create_l ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ call acc_present_or_create_l (a, int (len, kind = c_size_t)) ++end subroutine ++ ++subroutine acc_present_or_create_array_h (a) ++ use openacc_internal, only: acc_present_or_create_l ++ type (*), dimension (..), contiguous :: a ++ call acc_present_or_create_l (a, sizeof (a)) ++end subroutine ++ ++subroutine acc_copyout_32_h (a, len) ++ use iso_c_binding, only: c_int32_t, c_size_t ++ use openacc_internal, only: acc_copyout_l ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ call acc_copyout_l (a, int (len, kind = c_size_t)) ++end subroutine ++ ++subroutine acc_copyout_64_h (a, len) ++ use iso_c_binding, only: c_int64_t, c_size_t ++ use openacc_internal, only: acc_copyout_l ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ call acc_copyout_l (a, int (len, kind = c_size_t)) ++end subroutine ++ ++subroutine acc_copyout_array_h (a) ++ use openacc_internal, only: acc_copyout_l ++ type (*), dimension (..), contiguous :: a ++ call acc_copyout_l (a, sizeof (a)) ++end subroutine ++ ++subroutine acc_delete_32_h (a, len) ++ use iso_c_binding, only: c_int32_t, c_size_t ++ use openacc_internal, only: acc_delete_l ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ call acc_delete_l (a, int (len, kind = c_size_t)) ++end subroutine ++ ++subroutine acc_delete_64_h (a, len) ++ use iso_c_binding, only: c_int64_t, c_size_t ++ use openacc_internal, only: acc_delete_l ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ call acc_delete_l (a, int (len, kind = c_size_t)) ++end subroutine ++ ++subroutine acc_delete_array_h (a) ++ use openacc_internal, only: acc_delete_l ++ type (*), dimension (..), contiguous :: a ++ call acc_delete_l (a, sizeof (a)) ++end subroutine ++ ++subroutine acc_update_device_32_h (a, len) ++ use iso_c_binding, only: c_int32_t, c_size_t ++ use openacc_internal, only: acc_update_device_l ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ call acc_update_device_l (a, int (len, kind = c_size_t)) ++end subroutine ++ ++subroutine acc_update_device_64_h (a, len) ++ use iso_c_binding, only: c_int64_t, c_size_t ++ use openacc_internal, only: acc_update_device_l ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ call acc_update_device_l (a, int (len, kind = c_size_t)) ++end subroutine ++ ++subroutine acc_update_device_array_h (a) ++ use openacc_internal, only: acc_update_device_l ++ type (*), dimension (..), contiguous :: a ++ call acc_update_device_l (a, sizeof (a)) ++end subroutine ++ ++subroutine acc_update_self_32_h (a, len) ++ use iso_c_binding, only: c_int32_t, c_size_t ++ use openacc_internal, only: acc_update_self_l ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ call acc_update_self_l (a, int (len, kind = c_size_t)) ++end subroutine ++ ++subroutine acc_update_self_64_h (a, len) ++ use iso_c_binding, only: c_int64_t, c_size_t ++ use openacc_internal, only: acc_update_self_l ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ call acc_update_self_l (a, int (len, kind = c_size_t)) ++end subroutine ++ ++subroutine acc_update_self_array_h (a) ++ use openacc_internal, only: acc_update_self_l ++ type (*), dimension (..), contiguous :: a ++ call acc_update_self_l (a, sizeof (a)) ++end subroutine ++ ++function acc_is_present_32_h (a, len) ++ use iso_c_binding, only: c_int32_t, c_size_t ++ use openacc_internal, only: acc_is_present_l ++ logical acc_is_present_32_h ++ type (*), dimension (*) :: a ++ integer (c_int32_t) len ++ if (acc_is_present_l (a, int (len, kind = c_size_t)) .eq. 1) then ++ acc_is_present_32_h = .TRUE. ++ else ++ acc_is_present_32_h = .FALSE. ++ end if ++end function ++ ++function acc_is_present_64_h (a, len) ++ use iso_c_binding, only: c_int64_t, c_size_t ++ use openacc_internal, only: acc_is_present_l ++ logical acc_is_present_64_h ++ type (*), dimension (*) :: a ++ integer (c_int64_t) len ++ if (acc_is_present_l (a, int (len, kind = c_size_t)) .eq. 1) then ++ acc_is_present_64_h = .TRUE. ++ else ++ acc_is_present_64_h = .FALSE. ++ end if ++end function ++ ++function acc_is_present_array_h (a) ++ use openacc_internal, only: acc_is_present_l ++ logical acc_is_present_array_h ++ type (*), dimension (..), contiguous :: a ++ acc_is_present_array_h = acc_is_present_l (a, sizeof (a)) == 1 ++end function diff --git a/SOURCES/gcc48-pr52714.patch b/SOURCES/gcc48-pr52714.patch new file mode 100644 index 0000000..2ea553d --- /dev/null +++ b/SOURCES/gcc48-pr52714.patch @@ -0,0 +1,76 @@ +2014-02-27 Jeff Law + + PR rtl-optimization/52714 + * combine.c (try_combine): When splitting an unrecognized PARALLEL + into two independent simple sets, if I3 is a jump, ensure the + pattern we place into I3 is a (set (pc) ...) + + * gcc.c-torture/compile/pr52714.c: New test. + +2016-06-15 Jakub Jelinek + + * gcc.c-torture/compile/20160615-1.c: New test. + +--- gcc/combine.c (revision 208203) ++++ gcc/combine.c (revision 208204) +@@ -3706,6 +3706,9 @@ try_combine (rtx i3, rtx i2, rtx i1, rtx + #ifdef HAVE_cc0 + && !reg_referenced_p (cc0_rtx, XVECEXP (newpat, 0, 0)) + #endif ++ /* If I3 is a jump, ensure that set0 is a jump so that ++ we do not create invalid RTL. */ ++ && (!JUMP_P (i3) || SET_DEST (XVECEXP (newpat, 0, 0)) == pc_rtx) + ) + { + newi2pat = XVECEXP (newpat, 0, 1); +@@ -3716,6 +3719,9 @@ try_combine (rtx i3, rtx i2, rtx i1, rtx + #ifdef HAVE_cc0 + && !reg_referenced_p (cc0_rtx, XVECEXP (newpat, 0, 1)) + #endif ++ /* If I3 is a jump, ensure that set1 is a jump so that ++ we do not create invalid RTL. */ ++ && (!JUMP_P (i3) || SET_DEST (XVECEXP (newpat, 0, 1)) == pc_rtx) + ) + { + newi2pat = XVECEXP (newpat, 0, 0); +--- gcc/testsuite/gcc.c-torture/compile/pr52714.c (revision 0) ++++ gcc/testsuite/gcc.c-torture/compile/pr52714.c (revision 208204) +@@ -0,0 +1,25 @@ ++ ++int __re_compile_fastmap(unsigned char *p) ++{ ++ unsigned char **stack; ++ unsigned size; ++ unsigned avail; ++ ++ stack = __builtin_alloca(5 * sizeof(unsigned char*)); ++ if (stack == 0) ++ return -2; ++ size = 5; ++ avail = 0; ++ ++ for (;;) { ++ switch (*p++) { ++ case 0: ++ if (avail == size) ++ return -2; ++ stack[avail++] = p; ++ } ++ } ++ ++ return 0; ++} ++ +--- gcc/testsuite/gcc.c-torture/compile/20160615-1.c.jj 2016-06-15 11:17:54.690689056 +0200 ++++ gcc/testsuite/gcc.c-torture/compile/20160615-1.c 2016-06-15 11:17:48.811765657 +0200 +@@ -0,0 +1,10 @@ ++int a; ++void bar (int, unsigned, unsigned); ++ ++void ++foo (unsigned x) ++{ ++ unsigned b = a ? x : 0; ++ if (x || b) ++ bar (0, x, b); ++} diff --git a/SOURCES/gcc48-pr53477.patch b/SOURCES/gcc48-pr53477.patch new file mode 100644 index 0000000..70d5d56 --- /dev/null +++ b/SOURCES/gcc48-pr53477.patch @@ -0,0 +1,131 @@ +2013-08-20 Phil Muldoon + + PR libstdc++/53477 + http://sourceware.org/bugzilla/show_bug.cgi?id=15195 + + * python/libstdcxx/v6/printers.py (Printer.__call__): If a value + is a reference, fetch referenced value. + (RxPrinter.invoke): Ditto. + * testsuite/libstdc++-prettyprinters/cxx11.cc (main): Add -O0 + flag. Add referenced value tests. + +--- libstdc++-v3/python/libstdcxx/v6/printers.py (revision 201887) ++++ libstdc++-v3/python/libstdcxx/v6/printers.py (revision 201888) +@@ -786,6 +786,11 @@ class RxPrinter(object): + def invoke(self, value): + if not self.enabled: + return None ++ ++ if value.type.code == gdb.TYPE_CODE_REF: ++ if hasattr(gdb.Value,"referenced_value"): ++ value = value.referenced_value() ++ + return self.function(self.name, value) + + # A pretty-printer that conforms to the "PrettyPrinter" protocol from +@@ -841,6 +846,11 @@ class Printer(object): + return None + + basename = match.group(1) ++ ++ if val.type.code == gdb.TYPE_CODE_REF: ++ if hasattr(gdb.Value,"referenced_value"): ++ val = val.referenced_value() ++ + if basename in self.lookup: + return self.lookup[basename].invoke(val) + +--- libstdc++-v3/testsuite/libstdc++-prettyprinters/cxx11.cc (revision 201887) ++++ libstdc++-v3/testsuite/libstdc++-prettyprinters/cxx11.cc (revision 201888) +@@ -1,5 +1,5 @@ + // { dg-do run } +-// { dg-options "-std=gnu++11 -g" } ++// { dg-options "-std=gnu++11 -g -O0" } + + // Copyright (C) 2011-2013 Free Software Foundation, Inc. + // +@@ -24,6 +24,8 @@ + #include + #include + ++typedef std::tuple ExTuple; ++ + template + void + placeholder(const T &s) +@@ -62,43 +64,75 @@ main() + std::forward_list efl; + // { dg-final { note-test efl "empty std::forward_list" } } + ++ std::forward_list &refl = efl; ++// { dg-final { note-test refl "empty std::forward_list" } } ++ + std::forward_list fl; + fl.push_front(2); + fl.push_front(1); + // { dg-final { note-test fl {std::forward_list = {[0] = 1, [1] = 2}} } } + ++ std::forward_list &rfl = fl; ++// { dg-final { note-test rfl {std::forward_list = {[0] = 1, [1] = 2}} } } ++ + std::unordered_map eum; + // { dg-final { note-test eum "std::unordered_map with 0 elements" } } ++ std::unordered_map &reum = eum; ++// { dg-final { note-test reum "std::unordered_map with 0 elements" } } ++ + std::unordered_multimap eumm; + // { dg-final { note-test eumm "std::unordered_multimap with 0 elements" } } ++ std::unordered_multimap &reumm = eumm; ++// { dg-final { note-test reumm "std::unordered_multimap with 0 elements" } } ++ + std::unordered_set eus; + // { dg-final { note-test eus "std::unordered_set with 0 elements" } } ++ std::unordered_set &reus = eus; ++// { dg-final { note-test reus "std::unordered_set with 0 elements" } } ++ + std::unordered_multiset eums; + // { dg-final { note-test eums "std::unordered_multiset with 0 elements" } } ++ std::unordered_multiset &reums = eums; ++// { dg-final { note-test reums "std::unordered_multiset with 0 elements" } } + + std::unordered_map uom; + uom[5] = "three"; + uom[3] = "seven"; + // { dg-final { note-test uom {std::unordered_map with 2 elements = {[3] = "seven", [5] = "three"}} } } + ++ std::unordered_map &ruom = uom; ++// { dg-final { note-test ruom {std::unordered_map with 2 elements = {[3] = "seven", [5] = "three"}} } } ++ + std::unordered_multimap uomm; + uomm.insert(std::pair (5, "three")); + uomm.insert(std::pair (5, "seven")); + // { dg-final { note-test uomm {std::unordered_multimap with 2 elements = {[5] = "seven", [5] = "three"}} } } ++ std::unordered_multimap &ruomm = uomm; ++// { dg-final { note-test ruomm {std::unordered_multimap with 2 elements = {[5] = "seven", [5] = "three"}} } } + + std::unordered_set uos; + uos.insert(5); + // { dg-final { note-test uos {std::unordered_set with 1 elements = {[0] = 5}} } } ++ std::unordered_set &ruos = uos; ++// { dg-final { note-test ruos {std::unordered_set with 1 elements = {[0] = 5}} } } + + std::unordered_multiset uoms; + uoms.insert(5); + // { dg-final { note-test uoms {std::unordered_multiset with 1 elements = {[0] = 5}} } } ++ std::unordered_multiset &ruoms = uoms; ++// { dg-final { note-test ruoms {std::unordered_multiset with 1 elements = {[0] = 5}} } } + + std::unique_ptr uptr (new datum); + uptr->s = "hi bob"; + uptr->i = 23; + // { dg-final { regexp-test uptr {std::unique_ptr.datum. containing 0x.*} } } ++ std::unique_ptr &ruptr = uptr; ++// { dg-final { regexp-test ruptr {std::unique_ptr.datum. containing 0x.*} } } + ++ ExTuple tpl(6,7); ++// { dg-final { note-test tpl {std::tuple containing = {[1] = 6, [2] = 7}} } } ++ ExTuple &rtpl = tpl; ++// { dg-final { note-test rtpl {std::tuple containing = {[1] = 6, [2] = 7}} } } + placeholder(""); // Mark SPOT + use(efl); + use(fl); diff --git a/SOURCES/gcc48-pr63293.patch b/SOURCES/gcc48-pr63293.patch new file mode 100644 index 0000000..4b11a8c --- /dev/null +++ b/SOURCES/gcc48-pr63293.patch @@ -0,0 +1,60 @@ +2014-11-04 Jiong Wang + Wilco Dijkstra + + PR target/63293 + * config/aarch64/aarch64.c (aarch64_expand_epiloue): Add barriers before + stack adjustment. + +--- gcc/config/aarch64/aarch64.c (revision 217090) ++++ gcc/config/aarch64/aarch64.c (revision 217091) +@@ -1989,6 +1989,9 @@ aarch64_expand_epilogue (bool for_sibcal + rtx insn; + rtx cfa_reg; + rtx cfi_ops = NULL; ++ /* We need to add memory barrier to prevent read from deallocated stack. */ ++ bool need_barrier_p = (get_frame_size () != 0 ++ || cfun->machine->saved_varargs_size); + + aarch64_layout_frame (); + original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size; +@@ -2030,6 +2033,9 @@ aarch64_expand_epilogue (bool for_sibcal + if (frame_pointer_needed + && (crtl->outgoing_args_size || cfun->calls_alloca)) + { ++ if (cfun->calls_alloca) ++ emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx)); ++ + insn = emit_insn (gen_add3_insn (stack_pointer_rtx, + hard_frame_pointer_rtx, + GEN_INT (- fp_offset))); +@@ -2048,6 +2054,9 @@ aarch64_expand_epilogue (bool for_sibcal + /* Restore the frame pointer and lr if the frame pointer is needed. */ + if (offset > 0) + { ++ if (need_barrier_p && (!frame_pointer_needed || !fp_offset)) ++ emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx)); ++ + if (frame_pointer_needed) + { + rtx mem_fp, mem_lr; +@@ -2067,6 +2076,10 @@ aarch64_expand_epilogue (bool for_sibcal + + UNITS_PER_WORD)); + emit_insn (gen_load_pairdi (reg_fp, mem_fp, reg_lr, mem_lr)); + ++ if (need_barrier_p) ++ emit_insn (gen_stack_tie (stack_pointer_rtx, ++ stack_pointer_rtx)); ++ + insn = emit_insn (gen_add2_insn (stack_pointer_rtx, + GEN_INT (offset))); + } +@@ -2128,6 +2141,9 @@ aarch64_expand_epilogue (bool for_sibcal + + if (frame_size > -1) + { ++ if (need_barrier_p) ++ emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx)); ++ + if (frame_size >= 0x1000000) + { + rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM); diff --git a/SOURCES/gcc48-pr65142.patch b/SOURCES/gcc48-pr65142.patch new file mode 100644 index 0000000..367ec16 --- /dev/null +++ b/SOURCES/gcc48-pr65142.patch @@ -0,0 +1,23 @@ +2016-06-01 Jakub Jelinek + + Backported from mainline + 2015-10-02 Jonathan Wakely + + PR libstdc++/65142 + * src/c++11/random.cc (random_device::_M_getval()): Check read result. + +--- libstdc++-v3/src/c++11/random.cc (revision 228423) ++++ libstdc++-v3/src/c++11/random.cc (revision 228424) +@@ -126,8 +126,10 @@ namespace std _GLIBCXX_VISIBILITY(defaul + #endif + + result_type __ret; +- std::fread(reinterpret_cast(&__ret), sizeof(result_type), +- 1, _M_file); ++ const size_t e = std::fread(reinterpret_cast(&__ret), ++ sizeof(result_type), 1, _M_file); ++ if (e != 1) ++ std::__throw_runtime_error(__N("random_device could not be read")); + return __ret; + } + diff --git a/SOURCES/gcc48-pr67281.patch b/SOURCES/gcc48-pr67281.patch new file mode 100644 index 0000000..9637e08 --- /dev/null +++ b/SOURCES/gcc48-pr67281.patch @@ -0,0 +1,348 @@ +2015-10-14 Peter Bergner + Torvald Riegel + + PR target/67281 + * config/rs6000/htm.md (UNSPEC_HTM_FENCE): New. + (tabort, tabortc, tabortci, tbegin, tcheck, tend, + trechkpt, treclaim, tsr, ttest): Rename define_insns from this... + (*tabort, *tabortc, *tabortci, *tbegin, *tcheck, *tend, + *trechkpt, *treclaim, *tsr, *ttest): ...to this. Add memory barrier. + (tabort, tabortc, tabortci, tbegin, tcheck, tend, + trechkpt, treclaim, tsr, ttest): New define_expands. + * config/rs6000/rs6000-c.c (rs6000_target_modify_macros): Define + __TM_FENCE__ for htm. + * doc/extend.texi: Update documentation for htm builtins. + +2015-08-03 Peter Bergner + + * config/rs6000/htm.md (tabort.): Restrict the source operand to + using a base register. + + * gcc.target/powerpc/htm-tabort-no-r0.c: New test. + +--- gcc/doc/extend.texi (revision 228826) ++++ gcc/doc/extend.texi (revision 228827) +@@ -16092,6 +16092,28 @@ unsigned int __builtin_tresume (void) + unsigned int __builtin_tsuspend (void) + @end smallexample + ++Note that the semantics of the above HTM builtins are required to mimic ++the locking semantics used for critical sections. Builtins that are used ++to create a new transaction or restart a suspended transaction must have ++lock acquisition like semantics while those builtins that end or suspend a ++transaction must have lock release like semantics. Specifically, this must ++mimic lock semantics as specified by C++11, for example: Lock acquisition is ++as-if an execution of __atomic_exchange_n(&globallock,1,__ATOMIC_ACQUIRE) ++that returns 0, and lock release is as-if an execution of ++__atomic_store(&globallock,0,__ATOMIC_RELEASE), with globallock being an ++implicit implementation-defined lock used for all transactions. The HTM ++instructions associated with with the builtins inherently provide the ++correct acquisition and release hardware barriers required. However, ++the compiler must also be prohibited from moving loads and stores across ++the builtins in a way that would violate their semantics. This has been ++accomplished by adding memory barriers to the associated HTM instructions ++(which is a conservative approach to provide acquire and release semantics). ++Earlier versions of the compiler did not treat the HTM instructions as ++memory barriers. A @code{__TM_FENCE__} macro has been added, which can ++be used to determine whether the current compiler treats HTM instructions ++as memory barriers or not. This allows the user to explicitly add memory ++barriers to their code when using an older version of the compiler. ++ + The following set of built-in functions are available to gain access + to the HTM specific special purpose registers. + +--- gcc/config/rs6000/htm.md (revision 226531) ++++ gcc/config/rs6000/htm.md (revision 228827) +@@ -27,6 +27,14 @@ (define_constants + ]) + + ;; ++;; UNSPEC usage ++;; ++ ++(define_c_enum "unspec" ++ [UNSPEC_HTM_FENCE ++ ]) ++ ++;; + ;; UNSPEC_VOLATILE usage + ;; + +@@ -45,96 +53,223 @@ (define_c_enum "unspecv" + UNSPECV_HTM_MTSPR + ]) + ++(define_expand "tabort" ++ [(parallel ++ [(set (match_operand:CC 1 "cc_reg_operand" "=x") ++ (unspec_volatile:CC [(match_operand:SI 0 "base_reg_operand" "b")] ++ UNSPECV_HTM_TABORT)) ++ (set (match_dup 2) (unspec:BLK [(match_dup 2)] UNSPEC_HTM_FENCE))])] ++ "TARGET_HTM" ++{ ++ operands[2] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); ++ MEM_VOLATILE_P (operands[2]) = 1; ++}) + +-(define_insn "tabort" ++(define_insn "*tabort" + [(set (match_operand:CC 1 "cc_reg_operand" "=x") +- (unspec_volatile:CC [(match_operand:SI 0 "gpc_reg_operand" "r")] +- UNSPECV_HTM_TABORT))] ++ (unspec_volatile:CC [(match_operand:SI 0 "base_reg_operand" "b")] ++ UNSPECV_HTM_TABORT)) ++ (set (match_operand:BLK 2) (unspec:BLK [(match_dup 2)] UNSPEC_HTM_FENCE))] + "TARGET_HTM" + "tabort. %0" + [(set_attr "type" "htm") + (set_attr "length" "4")]) + +-(define_insn "tabortc" ++(define_expand "tabortc" ++ [(parallel ++ [(set (match_operand:CC 3 "cc_reg_operand" "=x") ++ (unspec_volatile:CC [(match_operand 0 "u5bit_cint_operand" "n") ++ (match_operand:GPR 1 "gpc_reg_operand" "r") ++ (match_operand:GPR 2 "gpc_reg_operand" "r")] ++ UNSPECV_HTM_TABORTXC)) ++ (set (match_dup 4) (unspec:BLK [(match_dup 4)] UNSPEC_HTM_FENCE))])] ++ "TARGET_HTM" ++{ ++ operands[4] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); ++ MEM_VOLATILE_P (operands[4]) = 1; ++}) ++ ++(define_insn "*tabortc" + [(set (match_operand:CC 3 "cc_reg_operand" "=x") + (unspec_volatile:CC [(match_operand 0 "u5bit_cint_operand" "n") + (match_operand:GPR 1 "gpc_reg_operand" "r") + (match_operand:GPR 2 "gpc_reg_operand" "r")] +- UNSPECV_HTM_TABORTXC))] ++ UNSPECV_HTM_TABORTXC)) ++ (set (match_operand:BLK 4) (unspec:BLK [(match_dup 4)] UNSPEC_HTM_FENCE))] + "TARGET_HTM" + "tabortc. %0,%1,%2" + [(set_attr "type" "htm") + (set_attr "length" "4")]) + +-(define_insn "tabortci" ++(define_expand "tabortci" ++ [(parallel ++ [(set (match_operand:CC 3 "cc_reg_operand" "=x") ++ (unspec_volatile:CC [(match_operand 0 "u5bit_cint_operand" "n") ++ (match_operand:GPR 1 "gpc_reg_operand" "r") ++ (match_operand 2 "s5bit_cint_operand" "n")] ++ UNSPECV_HTM_TABORTXCI)) ++ (set (match_dup 4) (unspec:BLK [(match_dup 4)] UNSPEC_HTM_FENCE))])] ++ "TARGET_HTM" ++{ ++ operands[4] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); ++ MEM_VOLATILE_P (operands[4]) = 1; ++}) ++ ++(define_insn "*tabortci" + [(set (match_operand:CC 3 "cc_reg_operand" "=x") + (unspec_volatile:CC [(match_operand 0 "u5bit_cint_operand" "n") + (match_operand:GPR 1 "gpc_reg_operand" "r") + (match_operand 2 "s5bit_cint_operand" "n")] +- UNSPECV_HTM_TABORTXCI))] ++ UNSPECV_HTM_TABORTXCI)) ++ (set (match_operand:BLK 4) (unspec:BLK [(match_dup 4)] UNSPEC_HTM_FENCE))] + "TARGET_HTM" + "tabortci. %0,%1,%2" + [(set_attr "type" "htm") + (set_attr "length" "4")]) + +-(define_insn "tbegin" ++(define_expand "tbegin" ++ [(parallel ++ [(set (match_operand:CC 1 "cc_reg_operand" "=x") ++ (unspec_volatile:CC [(match_operand 0 "const_0_to_1_operand" "n")] ++ UNSPECV_HTM_TBEGIN)) ++ (set (match_dup 2) (unspec:BLK [(match_dup 2)] UNSPEC_HTM_FENCE))])] ++ "TARGET_HTM" ++{ ++ operands[2] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); ++ MEM_VOLATILE_P (operands[2]) = 1; ++}) ++ ++(define_insn "*tbegin" + [(set (match_operand:CC 1 "cc_reg_operand" "=x") + (unspec_volatile:CC [(match_operand 0 "const_0_to_1_operand" "n")] +- UNSPECV_HTM_TBEGIN))] ++ UNSPECV_HTM_TBEGIN)) ++ (set (match_operand:BLK 2) (unspec:BLK [(match_dup 2)] UNSPEC_HTM_FENCE))] + "TARGET_HTM" + "tbegin. %0" + [(set_attr "type" "htm") + (set_attr "length" "4")]) + +-(define_insn "tcheck" ++(define_expand "tcheck" ++ [(parallel ++ [(set (match_operand:CC 0 "cc_reg_operand" "=y") ++ (unspec_volatile:CC [(const_int 0)] UNSPECV_HTM_TCHECK)) ++ (set (match_dup 1) (unspec:BLK [(match_dup 1)] UNSPEC_HTM_FENCE))])] ++ "TARGET_HTM" ++{ ++ operands[1] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); ++ MEM_VOLATILE_P (operands[1]) = 1; ++}) ++ ++(define_insn "*tcheck" + [(set (match_operand:CC 0 "cc_reg_operand" "=y") +- (unspec_volatile:CC [(const_int 0)] +- UNSPECV_HTM_TCHECK))] ++ (unspec_volatile:CC [(const_int 0)] UNSPECV_HTM_TCHECK)) ++ (set (match_operand:BLK 1) (unspec:BLK [(match_dup 1)] UNSPEC_HTM_FENCE))] + "TARGET_HTM" + "tcheck %0" + [(set_attr "type" "htm") + (set_attr "length" "4")]) + +-(define_insn "tend" ++(define_expand "tend" ++ [(parallel ++ [(set (match_operand:CC 1 "cc_reg_operand" "=x") ++ (unspec_volatile:CC [(match_operand 0 "const_0_to_1_operand" "n")] ++ UNSPECV_HTM_TEND)) ++ (set (match_dup 2) (unspec:BLK [(match_dup 2)] UNSPEC_HTM_FENCE))])] ++ "TARGET_HTM" ++{ ++ operands[2] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); ++ MEM_VOLATILE_P (operands[2]) = 1; ++}) ++ ++(define_insn "*tend" + [(set (match_operand:CC 1 "cc_reg_operand" "=x") + (unspec_volatile:CC [(match_operand 0 "const_0_to_1_operand" "n")] +- UNSPECV_HTM_TEND))] ++ UNSPECV_HTM_TEND)) ++ (set (match_operand:BLK 2) (unspec:BLK [(match_dup 2)] UNSPEC_HTM_FENCE))] + "TARGET_HTM" + "tend. %0" + [(set_attr "type" "htm") + (set_attr "length" "4")]) + +-(define_insn "trechkpt" ++(define_expand "trechkpt" ++ [(parallel ++ [(set (match_operand:CC 0 "cc_reg_operand" "=x") ++ (unspec_volatile:CC [(const_int 0)] UNSPECV_HTM_TRECHKPT)) ++ (set (match_dup 1) (unspec:BLK [(match_dup 1)] UNSPEC_HTM_FENCE))])] ++ "TARGET_HTM" ++{ ++ operands[1] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); ++ MEM_VOLATILE_P (operands[1]) = 1; ++}) ++ ++(define_insn "*trechkpt" + [(set (match_operand:CC 0 "cc_reg_operand" "=x") +- (unspec_volatile:CC [(const_int 0)] +- UNSPECV_HTM_TRECHKPT))] ++ (unspec_volatile:CC [(const_int 0)] UNSPECV_HTM_TRECHKPT)) ++ (set (match_operand:BLK 1) (unspec:BLK [(match_dup 1)] UNSPEC_HTM_FENCE))] + "TARGET_HTM" + "trechkpt." + [(set_attr "type" "htm") + (set_attr "length" "4")]) + +-(define_insn "treclaim" ++(define_expand "treclaim" ++ [(parallel ++ [(set (match_operand:CC 1 "cc_reg_operand" "=x") ++ (unspec_volatile:CC [(match_operand:SI 0 "gpc_reg_operand" "r")] ++ UNSPECV_HTM_TRECLAIM)) ++ (set (match_dup 2) (unspec:BLK [(match_dup 2)] UNSPEC_HTM_FENCE))])] ++ "TARGET_HTM" ++{ ++ operands[2] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); ++ MEM_VOLATILE_P (operands[2]) = 1; ++}) ++ ++(define_insn "*treclaim" + [(set (match_operand:CC 1 "cc_reg_operand" "=x") + (unspec_volatile:CC [(match_operand:SI 0 "gpc_reg_operand" "r")] +- UNSPECV_HTM_TRECLAIM))] ++ UNSPECV_HTM_TRECLAIM)) ++ (set (match_operand:BLK 2) (unspec:BLK [(match_dup 2)] UNSPEC_HTM_FENCE))] + "TARGET_HTM" + "treclaim. %0" + [(set_attr "type" "htm") + (set_attr "length" "4")]) + +-(define_insn "tsr" ++(define_expand "tsr" ++ [(parallel ++ [(set (match_operand:CC 1 "cc_reg_operand" "=x") ++ (unspec_volatile:CC [(match_operand 0 "const_0_to_1_operand" "n")] ++ UNSPECV_HTM_TSR)) ++ (set (match_dup 2) (unspec:BLK [(match_dup 2)] UNSPEC_HTM_FENCE))])] ++ "TARGET_HTM" ++{ ++ operands[2] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); ++ MEM_VOLATILE_P (operands[2]) = 1; ++}) ++ ++(define_insn "*tsr" + [(set (match_operand:CC 1 "cc_reg_operand" "=x") + (unspec_volatile:CC [(match_operand 0 "const_0_to_1_operand" "n")] +- UNSPECV_HTM_TSR))] ++ UNSPECV_HTM_TSR)) ++ (set (match_operand:BLK 2) (unspec:BLK [(match_dup 2)] UNSPEC_HTM_FENCE))] + "TARGET_HTM" + "tsr. %0" + [(set_attr "type" "htm") + (set_attr "length" "4")]) + +-(define_insn "ttest" ++(define_expand "ttest" ++ [(parallel ++ [(set (match_operand:CC 0 "cc_reg_operand" "=x") ++ (unspec_volatile:CC [(const_int 0)] UNSPECV_HTM_TTEST)) ++ (set (match_dup 1) (unspec:BLK [(match_dup 1)] UNSPEC_HTM_FENCE))])] ++ "TARGET_HTM" ++{ ++ operands[1] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); ++ MEM_VOLATILE_P (operands[1]) = 1; ++}) ++ ++(define_insn "*ttest" + [(set (match_operand:CC 0 "cc_reg_operand" "=x") +- (unspec_volatile:CC [(const_int 0)] +- UNSPECV_HTM_TTEST))] ++ (unspec_volatile:CC [(const_int 0)] UNSPECV_HTM_TTEST)) ++ (set (match_operand:BLK 1) (unspec:BLK [(match_dup 1)] UNSPEC_HTM_FENCE))] + "TARGET_HTM" + "tabortwci. 0,1,0" + [(set_attr "type" "htm") +--- gcc/config/rs6000/rs6000-c.c (revision 228826) ++++ gcc/config/rs6000/rs6000-c.c (revision 228827) +@@ -372,7 +372,11 @@ rs6000_target_modify_macros (bool define + if ((flags & OPTION_MASK_VSX) != 0) + rs6000_define_or_undefine_macro (define_p, "__VSX__"); + if ((flags & OPTION_MASK_HTM) != 0) +- rs6000_define_or_undefine_macro (define_p, "__HTM__"); ++ { ++ rs6000_define_or_undefine_macro (define_p, "__HTM__"); ++ /* Tell the user that our HTM insn patterns act as memory barriers. */ ++ rs6000_define_or_undefine_macro (define_p, "__TM_FENCE__"); ++ } + if ((flags & OPTION_MASK_P8_VECTOR) != 0) + rs6000_define_or_undefine_macro (define_p, "__POWER8_VECTOR__"); + if ((flags & OPTION_MASK_QUAD_MEMORY) != 0) +--- gcc/testsuite/gcc.target/powerpc/htm-tabort-no-r0.c (revision 0) ++++ gcc/testsuite/gcc.target/powerpc/htm-tabort-no-r0.c (revision 226532) +@@ -0,0 +1,12 @@ ++/* { dg-do compile { target { powerpc*-*-* } } } */ ++/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ ++/* { dg-require-effective-target powerpc_htm_ok } */ ++/* { dg-options "-O2 -mhtm -ffixed-r3 -ffixed-r4 -ffixed-r5 -ffixed-r6 -ffixed-r7 -ffixed-r8 -ffixed-r9 -ffixed-r10 -ffixed-r11 -ffixed-r12" } */ ++ ++/* { dg-final { scan-assembler-not "tabort\\.\[ \t\]0" } } */ ++ ++int ++foo (void) ++{ ++ return __builtin_tabort (10); ++} diff --git a/SOURCES/gcc48-pr68680.patch b/SOURCES/gcc48-pr68680.patch new file mode 100644 index 0000000..59f6ffe --- /dev/null +++ b/SOURCES/gcc48-pr68680.patch @@ -0,0 +1,46 @@ +2015-12-04 Jakub Jelinek + + PR tree-optimization/68680 + * calls.c (special_function_p): Return ECF_MAY_BE_ALLOCA for + BUILT_IN_ALLOCA{,_WITH_ALIGN}. + + * gcc.target/i386/pr68680.c: New test. + +--- gcc/calls.c (revision 231278) ++++ gcc/calls.c (revision 231279) +@@ -564,6 +564,17 @@ special_function_p (const_tree fndecl, i + flags |= ECF_NORETURN; + } + ++ if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL) ++ switch (DECL_FUNCTION_CODE (fndecl)) ++ { ++ case BUILT_IN_ALLOCA: ++ case BUILT_IN_ALLOCA_WITH_ALIGN: ++ flags |= ECF_MAY_BE_ALLOCA; ++ break; ++ default: ++ break; ++ } ++ + return flags; + } + +--- gcc/testsuite/gcc.target/i386/pr68680.c (revision 0) ++++ gcc/testsuite/gcc.target/i386/pr68680.c (revision 231279) +@@ -0,0 +1,15 @@ ++/* PR tree-optimization/68680 */ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fstack-protector-strong" } */ ++ ++int foo (char *); ++ ++int ++bar (unsigned long x) ++{ ++ char a[x]; ++ return foo (a); ++} ++ ++/* Verify that this function is stack protected. */ ++/* { dg-final { scan-assembler "stack_chk_fail" } } */ diff --git a/SOURCES/gcc48-rh1180633.patch b/SOURCES/gcc48-rh1180633.patch new file mode 100644 index 0000000..f3898f9 --- /dev/null +++ b/SOURCES/gcc48-rh1180633.patch @@ -0,0 +1,338 @@ +2016-01-22 Torvald Riegel + + * beginend.cc (GTM::gtm_thread::serial_lock): Put on cacheline + boundary. + (htm_fastpath): Remove. + (gtm_thread::begin_transaction): Fix HTM fastpath. + (_ITM_commitTransaction): Adapt. + (_ITM_commitTransactionEH): Adapt. + * libitm/config/linux/rwlock.h (gtm_rwlock): Add htm_fastpath member + and accessors. + * libitm/config/posix/rwlock.h (gtm_rwlock): Likewise. + * libitm/config/posix/rwlock.cc (gtm_rwlock::gtm_rwlock): Adapt. + * libitm/libitm_i.h (htm_fastpath): Remove declaration. + * libitm/method-serial.cc (htm_mg): Adapt. + (gtm_thread::serialirr_mode): Adapt. + * libitm/query.cc (_ITM_inTransaction, _ITM_getTransactionId): Adapt. + +--- libitm/beginend.cc ++++ libitm/beginend.cc +@@ -32,7 +32,11 @@ using namespace GTM; + extern __thread gtm_thread_tls _gtm_thr_tls; + #endif + +-gtm_rwlock GTM::gtm_thread::serial_lock; ++// Put this at the start of a cacheline so that serial_lock's writers and ++// htm_fastpath fields are on the same cacheline, so that HW transactions ++// only have to pay one cacheline capacity to monitor both. ++gtm_rwlock GTM::gtm_thread::serial_lock ++ __attribute__((aligned(HW_CACHELINE_SIZE))); + gtm_thread *GTM::gtm_thread::list_of_threads = 0; + unsigned GTM::gtm_thread::number_of_threads = 0; + +@@ -54,9 +58,6 @@ static pthread_mutex_t global_tid_lock = PTHREAD_MUTEX_INITIALIZER; + static pthread_key_t thr_release_key; + static pthread_once_t thr_release_once = PTHREAD_ONCE_INIT; + +-// See gtm_thread::begin_transaction. +-uint32_t GTM::htm_fastpath = 0; +- + /* Allocate a transaction structure. */ + void * + GTM::gtm_thread::operator new (size_t s) +@@ -174,9 +175,11 @@ GTM::gtm_thread::begin_transaction (uint32_t prop, const gtm_jmpbuf *jb) + // lock's writer flag and thus abort if another thread is or becomes a + // serial transaction. Therefore, if the fastpath is enabled, then a + // transaction is not executing as a HW transaction iff the serial lock is +- // write-locked. This allows us to use htm_fastpath and the serial lock's +- // writer flag to reliable determine whether the current thread runs a HW +- // transaction, and thus we do not need to maintain this information in ++ // write-locked. Also, HW transactions monitor the fastpath control ++ // variable, so that they will only execute if dispatch_htm is still the ++ // current method group. This allows us to use htm_fastpath and the serial ++ // lock's writers flag to reliable determine whether the current thread runs ++ // a HW transaction, and thus we do not need to maintain this information in + // per-thread state. + // If an uninstrumented code path is not available, we can still run + // instrumented code from a HW transaction because the HTM fastpath kicks +@@ -187,9 +190,14 @@ GTM::gtm_thread::begin_transaction (uint32_t prop, const gtm_jmpbuf *jb) + // indeed in serial mode, and HW transactions should never need serial mode + // for any internal changes (e.g., they never abort visibly to the STM code + // and thus do not trigger the standard retry handling). +- if (likely(htm_fastpath && (prop & pr_hasNoAbort))) ++ if (likely(serial_lock.get_htm_fastpath() && (prop & pr_hasNoAbort))) + { +- for (uint32_t t = htm_fastpath; t; t--) ++ // Note that the snapshot of htm_fastpath that we take here could be ++ // outdated, and a different method group than dispatch_htm may have ++ // been chosen in the meantime. Therefore, take care not not touch ++ // anything besides the serial lock, which is independent of method ++ // groups. ++ for (uint32_t t = serial_lock.get_htm_fastpath(); t; t--) + { + uint32_t ret = htm_begin(); + if (htm_begin_success(ret)) +@@ -197,9 +205,11 @@ GTM::gtm_thread::begin_transaction (uint32_t prop, const gtm_jmpbuf *jb) + // We are executing a transaction now. + // Monitor the writer flag in the serial-mode lock, and abort + // if there is an active or waiting serial-mode transaction. ++ // Also checks that htm_fastpath is still nonzero and thus ++ // HW transactions are allowed to run. + // Note that this can also happen due to an enclosing + // serial-mode transaction; we handle this case below. +- if (unlikely(serial_lock.is_write_locked())) ++ if (unlikely(serial_lock.htm_fastpath_disabled())) + htm_abort(); + else + // We do not need to set a_saveLiveVariables because of HTM. +@@ -210,9 +220,12 @@ GTM::gtm_thread::begin_transaction (uint32_t prop, const gtm_jmpbuf *jb) + // retrying the transaction will be successful. + if (!htm_abort_should_retry(ret)) + break; ++ // Check whether the HTM fastpath has been disabled. ++ if (!serial_lock.get_htm_fastpath()) ++ break; + // Wait until any concurrent serial-mode transactions have finished. + // This is an empty critical section, but won't be elided. +- if (serial_lock.is_write_locked()) ++ if (serial_lock.htm_fastpath_disabled()) + { + tx = gtm_thr(); + if (unlikely(tx == NULL)) +@@ -618,7 +631,7 @@ _ITM_commitTransaction(void) + // a serial-mode transaction. If we are, then there will be no other + // concurrent serial-mode transaction. + // See gtm_thread::begin_transaction. +- if (likely(htm_fastpath && !gtm_thread::serial_lock.is_write_locked())) ++ if (likely(!gtm_thread::serial_lock.htm_fastpath_disabled())) + { + htm_commit(); + return; +@@ -634,7 +647,7 @@ _ITM_commitTransactionEH(void *exc_ptr) + { + #if defined(USE_HTM_FASTPATH) + // See _ITM_commitTransaction. +- if (likely(htm_fastpath && !gtm_thread::serial_lock.is_write_locked())) ++ if (likely(!gtm_thread::serial_lock.htm_fastpath_disabled())) + { + htm_commit(); + return; +--- libitm/config/linux/rwlock.h ++++ libitm/config/linux/rwlock.h +@@ -39,16 +39,29 @@ struct gtm_thread; + // + // In this implementation, writers are given highest priority access but + // read-to-write upgrades do not have a higher priority than writers. ++// ++// Do not change the layout of this class; it must remain a POD type with ++// standard layout, and the writers field must be first (i.e., so the ++// assembler code can assume that its address is equal to the address of the ++// respective instance of the class), and htm_fastpath must be second. + + class gtm_rwlock + { +- // TODO Put futexes on different cachelines? + std::atomic writers; // Writers' futex. ++ // We put the HTM fastpath control variable here so that HTM fastpath ++ // transactions can check efficiently whether they are allowed to run. ++ // This must be accessed atomically because threads can load this value ++ // when they are neither a registered reader nor writer (i.e., when they ++ // attempt to execute the HTM fastpath). ++ std::atomic htm_fastpath; ++ // TODO Put these futexes on different cachelines? (writers and htm_fastpath ++ // should remain on the same cacheline. + std::atomic writer_readers;// A confirmed writer waits here for readers. + std::atomic readers; // Readers wait here for writers (iff true). + + public: +- gtm_rwlock() : writers(0), writer_readers(0), readers(0) {}; ++ gtm_rwlock() : writers(0), htm_fastpath(0), writer_readers(0), readers(0) ++ { } + + void read_lock (gtm_thread *tx); + void read_unlock (gtm_thread *tx); +@@ -59,12 +72,28 @@ class gtm_rwlock + bool write_upgrade (gtm_thread *tx); + void write_upgrade_finish (gtm_thread *tx); + +- // Returns true iff there is a concurrent active or waiting writer. +- // This is primarily useful for simple HyTM approaches, and the value being +- // checked is loaded with memory_order_relaxed. +- bool is_write_locked() ++ // Returns true iff there is a concurrent active or waiting writer, or ++ // htm_fastpath is zero. This is primarily useful for simple HyTM ++ // approaches, and the values being checked are loaded with ++ // memory_order_relaxed. ++ bool htm_fastpath_disabled () ++ { ++ return writers.load (memory_order_relaxed) != 0 ++ || htm_fastpath.load (memory_order_relaxed) == 0; ++ } ++ ++ // This does not need to return an exact value, hence relaxed MO is ++ // sufficient. ++ uint32_t get_htm_fastpath () ++ { ++ return htm_fastpath.load (memory_order_relaxed); ++ } ++ // This must only be called while having acquired the write lock, and other ++ // threads do not need to load an exact value; hence relaxed MO is ++ // sufficient. ++ void set_htm_fastpath (uint32_t val) + { +- return writers.load (memory_order_relaxed) != 0; ++ htm_fastpath.store (val, memory_order_relaxed); + } + + protected: +--- libitm/config/posix/rwlock.h ++++ libitm/config/posix/rwlock.h +@@ -44,19 +44,32 @@ struct gtm_thread; + // + // In this implementation, writers are given highest priority access but + // read-to-write upgrades do not have a higher priority than writers. ++// ++// Do not change the layout of this class; it must remain a POD type with ++// standard layout, and the summary field must be first (i.e., so the ++// assembler code can assume that its address is equal to the address of the ++// respective instance of the class), and htm_fastpath must be second. + + class gtm_rwlock + { +- pthread_mutex_t mutex; // Held if manipulating any field. +- pthread_cond_t c_readers; // Readers wait here +- pthread_cond_t c_writers; // Writers wait here for writers +- pthread_cond_t c_confirmed_writers; // Writers wait here for readers +- + static const unsigned a_writer = 1; // An active writer. + static const unsigned w_writer = 2; // The w_writers field != 0 + static const unsigned w_reader = 4; // The w_readers field != 0 + + std::atomic summary; // Bitmask of the above. ++ ++ // We put the HTM fastpath control variable here so that HTM fastpath ++ // transactions can check efficiently whether they are allowed to run. ++ // This must be accessed atomically because threads can load this value ++ // when they are neither a registered reader nor writer (i.e., when they ++ // attempt to execute the HTM fastpath). ++ std::atomic htm_fastpath; ++ ++ pthread_mutex_t mutex; // Held if manipulating any field. ++ pthread_cond_t c_readers; // Readers wait here ++ pthread_cond_t c_writers; // Writers wait here for writers ++ pthread_cond_t c_confirmed_writers; // Writers wait here for readers ++ + unsigned int a_readers; // Nr active readers as observed by a writer + unsigned int w_readers; // Nr waiting readers + unsigned int w_writers; // Nr waiting writers +@@ -74,12 +87,28 @@ class gtm_rwlock + bool write_upgrade (gtm_thread *tx); + void write_upgrade_finish (gtm_thread *tx); + +- // Returns true iff there is a concurrent active or waiting writer. +- // This is primarily useful for simple HyTM approaches, and the value being +- // checked is loaded with memory_order_relaxed. +- bool is_write_locked() ++ // Returns true iff there is a concurrent active or waiting writer, or ++ // htm_fastpath is zero. This is primarily useful for simple HyTM ++ // approaches, and the values being checked are loaded with ++ // memory_order_relaxed. ++ bool htm_fastpath_disabled () ++ { ++ return (summary.load (memory_order_relaxed) & (a_writer | w_writer)) ++ || htm_fastpath.load (memory_order_relaxed) == 0; ++ } ++ ++ // This does not need to return an exact value, hence relaxed MO is ++ // sufficient. ++ uint32_t get_htm_fastpath () ++ { ++ return htm_fastpath.load (memory_order_relaxed); ++ } ++ // This must only be called while having acquired the write lock, and other ++ // threads do not need to load an exact value; hence relaxed MO is ++ // sufficient. ++ void set_htm_fastpath (uint32_t val) + { +- return summary.load (memory_order_relaxed) & (a_writer | w_writer); ++ htm_fastpath.store (val, memory_order_relaxed); + } + + protected: +--- libitm/config/posix/rwlock.cc ++++ libitm/config/posix/rwlock.cc +@@ -30,11 +30,12 @@ namespace GTM HIDDEN { + // ??? Move this back to the header file when constexpr is implemented. + + gtm_rwlock::gtm_rwlock() +- : mutex (PTHREAD_MUTEX_INITIALIZER), ++ : summary (0), ++ htm_fastpath (0), ++ mutex (PTHREAD_MUTEX_INITIALIZER), + c_readers (PTHREAD_COND_INITIALIZER), + c_writers (PTHREAD_COND_INITIALIZER), + c_confirmed_writers (PTHREAD_COND_INITIALIZER), +- summary (0), + a_readers (0), + w_readers (0), + w_writers (0) +--- libitm/libitm_i.h ++++ libitm/libitm_i.h +@@ -336,10 +336,6 @@ extern abi_dispatch *dispatch_htm(); + + extern gtm_cacheline_mask gtm_mask_stack(gtm_cacheline *, gtm_cacheline_mask); + +-// Control variable for the HTM fastpath that uses serial mode as fallback. +-// Non-zero if the HTM fastpath is enabled. See gtm_thread::begin_transaction. +-extern uint32_t htm_fastpath; +- + } // namespace GTM + + #endif // LIBITM_I_H +--- libitm/method-serial.cc ++++ libitm/method-serial.cc +@@ -222,13 +222,13 @@ struct htm_mg : public method_group + // Enable the HTM fastpath if the HW is available. The fastpath is + // initially disabled. + #ifdef USE_HTM_FASTPATH +- htm_fastpath = htm_init(); ++ gtm_thread::serial_lock.set_htm_fastpath(htm_init()); + #endif + } + virtual void fini() + { + // Disable the HTM fastpath. +- htm_fastpath = 0; ++ gtm_thread::serial_lock.set_htm_fastpath(0); + } + }; + +@@ -288,7 +288,7 @@ GTM::gtm_thread::serialirr_mode () + #if defined(USE_HTM_FASTPATH) + // HTM fastpath. If we are executing a HW transaction, don't go serial but + // continue. See gtm_thread::begin_transaction. +- if (likely(htm_fastpath && !gtm_thread::serial_lock.is_write_locked())) ++ if (likely(!gtm_thread::serial_lock.htm_fastpath_disabled())) + return; + #endif + +--- libitm/query.cc ++++ libitm/query.cc +@@ -49,7 +49,7 @@ _ITM_inTransaction (void) + // a transaction and thus we can't deduce this by looking at just the serial + // lock. This function isn't used in practice currently, so the easiest + // way to handle it is to just abort. +- if (htm_fastpath && htm_transaction_active()) ++ if (gtm_thread::serial_lock.get_htm_fastpath() && htm_transaction_active()) + htm_abort(); + #endif + struct gtm_thread *tx = gtm_thr(); +@@ -69,7 +69,7 @@ _ITM_getTransactionId (void) + { + #if defined(USE_HTM_FASTPATH) + // See ITM_inTransaction. +- if (htm_fastpath && htm_transaction_active()) ++ if (gtm_thread::serial_lock.get_htm_fastpath() && htm_transaction_active()) + htm_abort(); + #endif + struct gtm_thread *tx = gtm_thr(); diff --git a/SOURCES/gcc48-rh1278872.patch b/SOURCES/gcc48-rh1278872.patch new file mode 100644 index 0000000..ce82c3d --- /dev/null +++ b/SOURCES/gcc48-rh1278872.patch @@ -0,0 +1,78 @@ +2013-07-25 Sterling Augustine + + * dwarf2out.c (size_of_pubnames): Move code to... + (include_pubname_in_output): ...here. New. + (output_pubnames): Call include_pubname_in_output. Move assertion. + +--- gcc/dwarf2out.c (revision 201254) ++++ gcc/dwarf2out.c (revision 201255) +@@ -7806,6 +7806,30 @@ unmark_all_dies (dw_die_ref die) + unmark_all_dies (AT_ref (a)); + } + ++/* Calculate if the entry should appear in the final output file. It may be ++ from a pruned a type. */ ++ ++static bool ++include_pubname_in_output (vec *table, pubname_entry *p) ++{ ++ if (table == pubname_table) ++ { ++ /* Enumerator names are part of the pubname table, but the parent ++ DW_TAG_enumeration_type die may have been pruned. Don't output ++ them if that is the case. */ ++ if (p->die->die_tag == DW_TAG_enumerator && !p->die->die_mark) ++ return false; ++ ++ /* Everything else in the pubname table is included. */ ++ return true; ++ } ++ ++ /* The pubtypes table shouldn't include types that have been ++ pruned. */ ++ return (p->die->die_offset != 0 ++ || !flag_eliminate_unused_debug_types); ++} ++ + /* Return the size of the .debug_pubnames or .debug_pubtypes table + generated for the compilation unit. */ + +@@ -7818,9 +7842,7 @@ size_of_pubnames (vecdie->die_offset != 0 +- || !flag_eliminate_unused_debug_types) ++ if (include_pubname_in_output (names, p)) + size += strlen (p->name) + DWARF_OFFSET_SIZE + 1; + + size += DWARF_OFFSET_SIZE; +@@ -8999,22 +9021,14 @@ output_pubnames (vecdie->die_tag == DW_TAG_enumerator && !pub->die->die_mark) +- continue; +- +- /* We shouldn't see pubnames for DIEs outside of the main CU. */ +- if (names == pubname_table) +- gcc_assert (pub->die->die_mark); +- +- if (names != pubtype_table +- || pub->die->die_offset != 0 +- || !flag_eliminate_unused_debug_types) ++ if (include_pubname_in_output (names, pub)) + { + dw_offset die_offset = pub->die->die_offset; + ++ /* We shouldn't see pubnames for DIEs outside of the main CU. */ ++ if (names == pubname_table) ++ gcc_assert (pub->die->die_mark); ++ + /* If we're putting types in their own .debug_types sections, + the .debug_pubtypes table will still point to the compile + unit (not the type unit), so we want to use the offset of diff --git a/SOURCES/gcc48-rh1296211.patch b/SOURCES/gcc48-rh1296211.patch new file mode 100644 index 0000000..f1b084a --- /dev/null +++ b/SOURCES/gcc48-rh1296211.patch @@ -0,0 +1,14 @@ +2015-09-02 Alan Modra + + * config/rs6000/sysv4.h (LINK_SPEC): Delete link_target. + +--- gcc/config/rs6000/sysv4.h (revision 227396) ++++ gcc/config/rs6000/sysv4.h (revision 227397) +@@ -574,7 +574,6 @@ ENDIAN_SELECT(" -mbig", " -mlittle", DEF + %{R*} \ + %(link_shlib) \ + %{!T*: %(link_start) } \ +-%(link_target) \ + %(link_os)" + + /* Shared libraries are not default. */ diff --git a/SOURCES/gcc48-rh1304449.patch b/SOURCES/gcc48-rh1304449.patch new file mode 100644 index 0000000..213ff0c --- /dev/null +++ b/SOURCES/gcc48-rh1304449.patch @@ -0,0 +1,496 @@ +2015-12-24 Kirill Yukhin + + * common/config/i386/i386-common.c (OPTION_MASK_ISA_PKU_SET): New. + (OPTION_MASK_ISA_PKU_UNSET): Ditto. + (ix86_handle_option): Handle OPT_mpku. + * config.gcc: Add pkuintrin.h to i[34567]86-*-* and x86_64-*-* + targets. + * config/i386/cpuid.h (host_detect_local_cpu): Detect PKU feature. + * config/i386/i386-c.c (ix86_target_macros_internal): Handle PKU ISA + flag. + * config/i386/i386.c (ix86_target_string): Add "-mpku" to + ix86_target_opts. + (ix86_option_override_internal): Define PTA_PKU, mention new key + in skylake-avx512. Handle new ISA bits. + (ix86_valid_target_attribute_inner_p): Add "pku". + (enum ix86_builtins): Add IX86_BUILTIN_RDPKRU and IX86_BUILTIN_WRPKRU. + (builtin_description bdesc_special_args[]): Add new built-ins. + * config/i386/i386.h (define TARGET_PKU): New. + (define TARGET_PKU_P): Ditto. + * config/i386/i386.md (define_c_enum "unspecv"): Add UNSPEC_PKU. + (define_expand "rdpkru"): New. + (define_insn "*rdpkru"): Ditto. + (define_expand "wrpkru"): Ditto. + (define_insn "*wrpkru"): Ditto. + * config/i386/i386.opt (mpku): Ditto. + * config/i386/pkuintrin.h: New file. + * config/i386/x86intrin.h: Include pkuintrin.h + * doc/extend.texi: Describe new built-ins. + * doc/invoke.texi: Describe new switches. + + * g++.dg/other/i386-2.C: Add -mpku. + * g++.dg/other/i386-3.C: Ditto. + * gcc.target/i386/rdpku-1.c: New test. + * gcc.target/i386/sse-12.c: Add -mpku. + * gcc.target/i386/sse-13.c: Ditto. + * gcc.target/i386/sse-22.c: Ditto. + * gcc.target/i386/sse-33.c: Ditto. + * gcc.target/i386/wrpku-1.c: New test. + +--- gcc/config.gcc (revision 231943) ++++ gcc/config.gcc (revision 231945) +@@ -368,7 +368,7 @@ i[34567]86-*-*) + lzcntintrin.h bmiintrin.h bmi2intrin.h tbmintrin.h + avx2intrin.h fmaintrin.h f16cintrin.h rtmintrin.h + xtestintrin.h rdseedintrin.h prfchwintrin.h adxintrin.h +- fxsrintrin.h xsaveintrin.h xsaveoptintrin.h" ++ fxsrintrin.h xsaveintrin.h xsaveoptintrin.h pkuintrin.h" + ;; + x86_64-*-*) + cpu_type=i386 +@@ -383,7 +383,7 @@ x86_64-*-*) + lzcntintrin.h bmiintrin.h tbmintrin.h bmi2intrin.h + avx2intrin.h fmaintrin.h f16cintrin.h rtmintrin.h + xtestintrin.h rdseedintrin.h prfchwintrin.h adxintrin.h +- fxsrintrin.h xsaveintrin.h xsaveoptintrin.h" ++ fxsrintrin.h xsaveintrin.h xsaveoptintrin.h pkuintrin.h" + need_64bit_hwint=yes + ;; + ia64-*-*) +--- gcc/common/config/i386/i386-common.c (revision 231943) ++++ gcc/common/config/i386/i386-common.c (revision 231945) +@@ -98,6 +98,7 @@ along with GCC; see the file COPYING3. + #define OPTION_MASK_ISA_RDRND_SET OPTION_MASK_ISA_RDRND + #define OPTION_MASK_ISA_F16C_SET \ + (OPTION_MASK_ISA_F16C | OPTION_MASK_ISA_AVX_SET) ++#define OPTION_MASK_ISA_PKU_SET OPTION_MASK_ISA_PKU + + /* Define a set of ISAs which aren't available when a given ISA is + disabled. MMX and SSE ISAs are handled separately. */ +@@ -164,6 +165,7 @@ along with GCC; see the file COPYING3. + #define OPTION_MASK_ISA_FSGSBASE_UNSET OPTION_MASK_ISA_FSGSBASE + #define OPTION_MASK_ISA_RDRND_UNSET OPTION_MASK_ISA_RDRND + #define OPTION_MASK_ISA_F16C_UNSET OPTION_MASK_ISA_F16C ++#define OPTION_MASK_ISA_PKU_UNSET OPTION_MASK_ISA_PKU + + /* Implement TARGET_HANDLE_OPTION. */ + +@@ -659,6 +661,19 @@ ix86_handle_option (struct gcc_options * + } + return true; + ++ case OPT_mpku: ++ if (value) ++ { ++ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PKU_SET; ++ opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_PKU_SET; ++ } ++ else ++ { ++ opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_PKU_UNSET; ++ opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_PKU_UNSET; ++ } ++ return true; ++ + /* Comes from final.c -- no real reason to change it. */ + #define MAX_CODE_ALIGN 16 + +--- gcc/config/i386/i386.h (revision 231943) ++++ gcc/config/i386/i386.h (revision 231945) +@@ -80,6 +80,7 @@ see the files COPYING3 and COPYING.RUNTI + #define TARGET_FXSR TARGET_ISA_FXSR + #define TARGET_XSAVE TARGET_ISA_XSAVE + #define TARGET_XSAVEOPT TARGET_ISA_XSAVEOPT ++#define TARGET_PKU TARGET_ISA_PKU + + #define TARGET_LP64 TARGET_ABI_64 + #define TARGET_X32 TARGET_ABI_X32 +--- gcc/config/i386/i386.md (revision 231943) ++++ gcc/config/i386/i386.md (revision 231945) +@@ -224,6 +224,9 @@ (define_c_enum "unspecv" [ + UNSPECV_XTEST + + UNSPECV_NLGR ++ ++ ;; For RDPKRU and WRPKRU support ++ UNSPECV_PKU + ]) + + ;; Constants to represent rounding modes in the ROUND instruction +@@ -18289,6 +18292,48 @@ (define_insn "xtest_1" + [(set_attr "type" "other") + (set_attr "length" "3")]) + ++;; RDPKRU and WRPKRU ++ ++(define_expand "rdpkru" ++ [(parallel ++ [(set (match_operand:SI 0 "register_operand") ++ (unspec_volatile:SI [(match_dup 1)] UNSPECV_PKU)) ++ (set (match_dup 2) (const_int 0))])] ++ "TARGET_PKU" ++{ ++ operands[1] = force_reg (SImode, const0_rtx); ++ operands[2] = gen_reg_rtx (SImode); ++}) ++ ++(define_insn "*rdpkru" ++ [(set (match_operand:SI 0 "register_operand" "=a") ++ (unspec_volatile:SI [(match_operand:SI 2 "register_operand" "c")] ++ UNSPECV_PKU)) ++ (set (match_operand:SI 1 "register_operand" "=d") ++ (const_int 0))] ++ "TARGET_PKU" ++ "rdpkru" ++ [(set_attr "type" "other")]) ++ ++(define_expand "wrpkru" ++ [(unspec_volatile:SI ++ [(match_operand:SI 0 "register_operand") ++ (match_dup 1) (match_dup 2)] UNSPECV_PKU)] ++ "TARGET_PKU" ++{ ++ operands[1] = force_reg (SImode, const0_rtx); ++ operands[2] = force_reg (SImode, const0_rtx); ++}) ++ ++(define_insn "*wrpkru" ++ [(unspec_volatile:SI ++ [(match_operand:SI 0 "register_operand" "a") ++ (match_operand:SI 1 "register_operand" "d") ++ (match_operand:SI 2 "register_operand" "c")] UNSPECV_PKU)] ++ "TARGET_PKU" ++ "wrpkru" ++ [(set_attr "type" "other")]) ++ + (include "mmx.md") + (include "sse.md") + (include "sync.md") +--- gcc/config/i386/pkuintrin.h (revision 0) ++++ gcc/config/i386/pkuintrin.h (revision 231945) +@@ -0,0 +1,45 @@ ++/* Copyright (C) 2015 Free Software Foundation, Inc. ++ ++ This file is part of GCC. ++ ++ GCC is free software; you can redistribute it and/or modify ++ it under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ GCC is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ GNU General Public License for more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ . */ ++ ++#if !defined _X86INTRIN_H_INCLUDED ++# error "Never use directly; include instead." ++#endif ++ ++#ifndef _PKUINTRIN_H_INCLUDED ++#define _PKUINTRIN_H_INCLUDED ++ ++extern __inline unsigned int ++__attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++_rdpkru_u32(void) ++{ ++ return __builtin_ia32_rdpkru (); ++} ++ ++extern __inline void ++__attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++_wrpkru(unsigned int key) ++{ ++ return __builtin_ia32_wrpkru (key); ++} ++ ++#endif /* _PKUINTRIN_H_INCLUDED */ +--- gcc/config/i386/cpuid.h (revision 231943) ++++ gcc/config/i386/cpuid.h (revision 231945) +@@ -74,6 +74,10 @@ + #define bit_RDSEED (1 << 18) + #define bit_ADX (1 << 19) + ++/* %ecx */ ++#define bit_PKU (1 << 3) ++#define bit_OSPKE (1 << 4) ++ + /* Extended State Enumeration Sub-leaf (%eax == 13, %ecx == 1) */ + #define bit_XSAVEOPT (1 << 0) + +--- gcc/config/i386/x86intrin.h (revision 231943) ++++ gcc/config/i386/x86intrin.h (revision 231945) +@@ -119,4 +119,8 @@ + + #include + ++#ifdef __PKU__ ++#include ++#endif ++ + #endif /* _X86INTRIN_H_INCLUDED */ +--- gcc/config/i386/i386-c.c (revision 231943) ++++ gcc/config/i386/i386-c.c (revision 231945) +@@ -348,6 +348,8 @@ ix86_target_macros_internal (HOST_WIDE_I + def_or_undef (parse_in, "__XSAVE__"); + if (isa_flag & OPTION_MASK_ISA_XSAVEOPT) + def_or_undef (parse_in, "__XSAVEOPT__"); ++ if (isa_flag & OPTION_MASK_ISA_PKU) ++ def_or_undef (parse_in, "__PKU__"); + if ((fpmath & FPMATH_SSE) && (isa_flag & OPTION_MASK_ISA_SSE)) + def_or_undef (parse_in, "__SSE_MATH__"); + if ((fpmath & FPMATH_SSE) && (isa_flag & OPTION_MASK_ISA_SSE2)) +--- gcc/config/i386/i386.opt (revision 231943) ++++ gcc/config/i386/i386.opt (revision 231945) +@@ -626,3 +626,7 @@ Split 32-byte AVX unaligned store + mrtm + Target Report Mask(ISA_RTM) Var(ix86_isa_flags) Save + Support RTM built-in functions and code generation ++ ++mpku ++Target Report Mask(ISA_PKU) Var(ix86_isa_flags) Save ++Support PKU built-in functions and code generation +--- gcc/config/i386/driver-i386.c (revision 231943) ++++ gcc/config/i386/driver-i386.c (revision 231945) +@@ -408,6 +408,7 @@ const char *host_detect_local_cpu (int a + unsigned int has_rdrnd = 0, has_f16c = 0, has_fsgsbase = 0; + unsigned int has_rdseed = 0, has_prfchw = 0, has_adx = 0; + unsigned int has_osxsave = 0, has_fxsr = 0, has_xsave = 0, has_xsaveopt = 0; ++ unsigned int has_pku = 0; + + bool arch; + +@@ -479,6 +480,8 @@ const char *host_detect_local_cpu (int a + has_fsgsbase = ebx & bit_FSGSBASE; + has_rdseed = ebx & bit_RDSEED; + has_adx = ebx & bit_ADX; ++ ++ has_pku = ecx & bit_OSPKE; + } + + if (max_level >= 13) +@@ -855,12 +858,13 @@ const char *host_detect_local_cpu (int a + const char *fxsr = has_fxsr ? " -mfxsr" : " -mno-fxsr"; + const char *xsave = has_xsave ? " -mxsave" : " -mno-xsave"; + const char *xsaveopt = has_xsaveopt ? " -mxsaveopt" : " -mno-xsaveopt"; ++ const char *pku = has_pku ? " -mpku" : " -mno-pku"; + + options = concat (options, cx16, sahf, movbe, ase, pclmul, + popcnt, abm, lwp, fma, fma4, xop, bmi, bmi2, + tbm, avx, avx2, sse4_2, sse4_1, lzcnt, rtm, + hle, rdrnd, f16c, fsgsbase, rdseed, prfchw, adx, +- fxsr, xsave, xsaveopt, NULL); ++ fxsr, xsave, xsaveopt, pku, NULL); + } + + done: +--- gcc/config/i386/i386.c (revision 231943) ++++ gcc/config/i386/i386.c (revision 231945) +@@ -2632,6 +2632,7 @@ ix86_target_string (HOST_WIDE_INT isa, i + { "-mrtm", OPTION_MASK_ISA_RTM }, + { "-mxsave", OPTION_MASK_ISA_XSAVE }, + { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT }, ++ { "-mpku", OPTION_MASK_ISA_PKU }, + }; + + /* Flag options. */ +@@ -2905,6 +2906,7 @@ ix86_option_override_internal (bool main + #define PTA_FXSR (HOST_WIDE_INT_1 << 37) + #define PTA_XSAVE (HOST_WIDE_INT_1 << 38) + #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39) ++#define PTA_PKU (HOST_WIDE_INT_1 << 60) + + /* if this reaches 64, need to widen struct pta flags below */ + +@@ -3429,6 +3431,9 @@ ix86_option_override_internal (bool main + if (processor_alias_table[i].flags & PTA_XSAVEOPT + && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT)) + ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT; ++ if (processor_alias_table[i].flags & PTA_PKU ++ && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PKU)) ++ ix86_isa_flags |= OPTION_MASK_ISA_PKU; + if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)) + x86_prefetch_sse = true; + +@@ -4220,6 +4225,7 @@ ix86_valid_target_attribute_inner_p (tre + IX86_ATTR_ISA ("fxsr", OPT_mfxsr), + IX86_ATTR_ISA ("xsave", OPT_mxsave), + IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt), ++ IX86_ATTR_ISA ("pku", OPT_mpku), + + /* enum options */ + IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_), +@@ -27042,6 +27048,10 @@ enum ix86_builtins + IX86_BUILTIN_CPU_IS, + IX86_BUILTIN_CPU_SUPPORTS, + ++ /* PKU instructions. */ ++ IX86_BUILTIN_RDPKRU, ++ IX86_BUILTIN_WRPKRU, ++ + IX86_BUILTIN_MAX + }; + +@@ -27357,6 +27367,10 @@ static const struct builtin_description + { OPTION_MASK_ISA_RTM, CODE_FOR_xbegin, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN, UNKNOWN, (int) UNSIGNED_FTYPE_VOID }, + { OPTION_MASK_ISA_RTM, CODE_FOR_xend, "__builtin_ia32_xend", IX86_BUILTIN_XEND, UNKNOWN, (int) VOID_FTYPE_VOID }, + { OPTION_MASK_ISA_RTM, CODE_FOR_xtest, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST, UNKNOWN, (int) INT_FTYPE_VOID }, ++ ++ /* RDPKRU and WRPKRU. */ ++ { OPTION_MASK_ISA_PKU, CODE_FOR_rdpkru, "__builtin_ia32_rdpkru", IX86_BUILTIN_RDPKRU, UNKNOWN, (int) UNSIGNED_FTYPE_VOID }, ++ { OPTION_MASK_ISA_PKU, CODE_FOR_wrpkru, "__builtin_ia32_wrpkru", IX86_BUILTIN_WRPKRU, UNKNOWN, (int) VOID_FTYPE_UNSIGNED }, + }; + + /* Builtins with variable number of arguments. */ +--- gcc/doc/extend.texi (revision 231943) ++++ gcc/doc/extend.texi (revision 231945) +@@ -10996,6 +10996,13 @@ void __builtin_ia32_xabort (status) + int __builtin_ia32_xtest () + @end smallexample + ++The following built-in functions are available when @option{-mpku} is used. ++They generate reads and writes to PKRU. ++@smallexample ++void __builtin_ia32_wrpkru (unsigned int) ++unsigned int __builtin_ia32_rdpkru () ++@end smallexample ++ + @node X86 transactional memory intrinsics + @subsection X86 transaction memory intrinsics + +--- gcc/doc/invoke.texi (revision 231943) ++++ gcc/doc/invoke.texi (revision 231945) +@@ -645,7 +645,7 @@ Objective-C and Objective-C++ Dialects}. + -mmmx -msse -msse2 -msse3 -mssse3 -msse4.1 -msse4.2 -msse4 -mavx @gol + -mavx2 -maes -mpclmul -mfsgsbase -mrdrnd -mf16c -mfma @gol + -msse4a -m3dnow -mpopcnt -mabm -mbmi -mtbm -mfma4 -mxop -mlzcnt @gol +--mbmi2 -mrtm -mlwp -mthreads @gol ++-mbmi2 -mrtm -mlwp -mpku -mthreads @gol + -mno-align-stringops -minline-all-stringops @gol + -minline-stringops-dynamically -mstringop-strategy=@var{alg} @gol + -mpush-args -maccumulate-outgoing-args -m128bit-long-double @gol +@@ -14326,6 +14326,8 @@ preferred alignment to @option{-mpreferr + @itemx -mlzcnt + @itemx -mno-lzcnt + @itemx -mrtm ++@itemx -mpku ++@itemx -mno-pku + @itemx -mtbm + @itemx -mno-tbm + @opindex mmmx +@@ -14336,7 +14338,7 @@ preferred alignment to @option{-mpreferr + @opindex mno-3dnow + These switches enable or disable the use of instructions in the MMX, SSE, + SSE2, SSE3, SSSE3, SSE4.1, AVX, AVX2, AES, PCLMUL, FSGSBASE, RDRND, F16C, +-FMA, SSE4A, FMA4, XOP, LWP, ABM, BMI, BMI2, LZCNT, RTM or 3DNow!@: ++FMA, SSE4A, FMA4, XOP, LWP, ABM, BMI, BMI2, LZCNT, RTM, PKU or 3DNow!@: + extended instruction sets. + These extensions are also available as built-in functions: see + @ref{X86 Built-in Functions}, for details of the functions enabled and +--- gcc/testsuite/gcc.target/i386/sse-12.c (revision 231943) ++++ gcc/testsuite/gcc.target/i386/sse-12.c (revision 231945) +@@ -3,7 +3,7 @@ + popcntintrin.h and mm_malloc.h are usable + with -O -std=c89 -pedantic-errors. */ + /* { dg-do compile } */ +-/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt" } */ ++/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mpku" } */ + + #include + +--- gcc/testsuite/gcc.target/i386/sse-13.c (revision 231943) ++++ gcc/testsuite/gcc.target/i386/sse-13.c (revision 231945) +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt" } */ ++/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mpku" } */ + + #include + +--- gcc/testsuite/gcc.target/i386/sse-22.c (revision 231943) ++++ gcc/testsuite/gcc.target/i386/sse-22.c (revision 231945) +@@ -268,7 +268,7 @@ test_2 (_mm_clmulepi64_si128, __m128i, _ + + /* x86intrin.h (FMA4/XOP/LWP/BMI/BMI2/TBM/LZCNT/FMA). */ + #ifdef DIFFERENT_PRAGMAS +-#pragma GCC target ("fma4,xop,lwp,bmi,bmi2,tbm,lzcnt,fma,rdseed,prfchw,adx,fxsr,xsaveopt") ++#pragma GCC target ("fma4,xop,lwp,bmi,bmi2,tbm,lzcnt,fma,rdseed,prfchw,adx,fxsr,xsaveopt,pku") + #endif + #include + /* xopintrin.h */ +--- gcc/testsuite/gcc.target/i386/sse-23.c (revision 231943) ++++ gcc/testsuite/gcc.target/i386/sse-23.c (revision 231945) +@@ -183,7 +183,7 @@ + /* rtmintrin.h */ + #define __builtin_ia32_xabort(M) __builtin_ia32_xabort(1) + +-#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt") ++#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,pku") + #include + #include + #include +--- gcc/testsuite/gcc.target/i386/rdpku-1.c (revision 0) ++++ gcc/testsuite/gcc.target/i386/rdpku-1.c (revision 231945) +@@ -0,0 +1,11 @@ ++/* { dg-do compile } */ ++/* { dg-options "-mpku -O2" } */ ++/* { dg-final { scan-assembler "rdpkru\n" } } */ ++ ++#include ++ ++unsigned extern ++rdpku_test (void) ++{ ++ return _rdpkru_u32 (); ++} +--- gcc/testsuite/gcc.target/i386/wrpku-1.c (revision 0) ++++ gcc/testsuite/gcc.target/i386/wrpku-1.c (revision 231945) +@@ -0,0 +1,11 @@ ++/* { dg-do compile } */ ++/* { dg-options "-mpku -O2" } */ ++/* { dg-final { scan-assembler "wrpkru\n" } } */ ++ ++#include ++ ++void extern ++wrpku_test (unsigned int key) ++{ ++ _wrpkru (key); ++} +--- gcc/testsuite/g++.dg/other/i386-2.C (revision 231943) ++++ gcc/testsuite/g++.dg/other/i386-2.C (revision 231945) +@@ -1,9 +1,9 @@ + /* { dg-do compile { target i?86-*-* x86_64-*-* } } */ +-/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt" } */ ++/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mpku" } */ + + /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h, + xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h, +- popcntintrin.h, fmaintrin.h and mm_malloc.h.h are usable with ++ popcntintrin.h, fmaintrin.h, pkuintrin.h and mm_malloc.h.h are usable with + -O -pedantic-errors. */ + + #include +--- gcc/testsuite/g++.dg/other/i386-3.C (revision 231943) ++++ gcc/testsuite/g++.dg/other/i386-3.C (revision 231945) +@@ -1,9 +1,9 @@ + /* { dg-do compile { target i?86-*-* x86_64-*-* } } */ +-/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt" } */ ++/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mpku" } */ + + /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h, + xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h, +- popcntintrin.h, fmaintrin.h and mm_malloc.h are usable with ++ popcntintrin.h, fmaintrin.h, pkuintrin.h and mm_malloc.h are usable with + -O -fkeep-inline-functions. */ + + #include diff --git a/SOURCES/gcc48-rh1312436.patch b/SOURCES/gcc48-rh1312436.patch new file mode 100644 index 0000000..6bf9313 --- /dev/null +++ b/SOURCES/gcc48-rh1312436.patch @@ -0,0 +1,76 @@ +2015-12-02 Pierre-Marie de Rodat + + * dwarf2out.c (dwar2out_var_location): Enhance pattern matching to get + the SYMBOL_REF they embed. + (gen_subprogram_die): Handle such calls. + +--- gcc/dwarf2out.c (revision 231184) ++++ gcc/dwarf2out.c (revision 231185) +@@ -18051,18 +18051,23 @@ gen_subprogram_die (tree decl, dw_die_re + } + if (mode == VOIDmode || mode == BLKmode) + continue; +- if (XEXP (XEXP (arg, 0), 0) == pc_rtx) ++ /* Get dynamic information about call target only if we ++ have no static information: we cannot generate both ++ DW_AT_abstract_origin and DW_AT_GNU_call_site_target ++ attributes. */ ++ if (ca_loc->symbol_ref == NULL_RTX) + { +- gcc_assert (ca_loc->symbol_ref == NULL_RTX); +- tloc = XEXP (XEXP (arg, 0), 1); +- continue; +- } +- else if (GET_CODE (XEXP (XEXP (arg, 0), 0)) == CLOBBER +- && XEXP (XEXP (XEXP (arg, 0), 0), 0) == pc_rtx) +- { +- gcc_assert (ca_loc->symbol_ref == NULL_RTX); +- tlocc = XEXP (XEXP (arg, 0), 1); +- continue; ++ if (XEXP (XEXP (arg, 0), 0) == pc_rtx) ++ { ++ tloc = XEXP (XEXP (arg, 0), 1); ++ continue; ++ } ++ else if (GET_CODE (XEXP (XEXP (arg, 0), 0)) == CLOBBER ++ && XEXP (XEXP (XEXP (arg, 0), 0), 0) == pc_rtx) ++ { ++ tlocc = XEXP (XEXP (arg, 0), 1); ++ continue; ++ } + } + reg = NULL; + if (REG_P (XEXP (XEXP (arg, 0), 0))) +@@ -20842,15 +20847,27 @@ dwarf2out_var_location (rtx loc_note) + if (!CALL_P (prev)) + prev = XVECEXP (PATTERN (prev), 0, 0); + ca_loc->tail_call_p = SIBLING_CALL_P (prev); ++ ++ /* Look for a SYMBOL_REF in the "prev" instruction. */ + x = get_call_rtx_from (PATTERN (prev)); + if (x) + { +- x = XEXP (XEXP (x, 0), 0); +- if (GET_CODE (x) == SYMBOL_REF +- && SYMBOL_REF_DECL (x) +- && TREE_CODE (SYMBOL_REF_DECL (x)) == FUNCTION_DECL) +- ca_loc->symbol_ref = x; ++ /* Try to get the call symbol, if any. */ ++ if (MEM_P (XEXP (x, 0))) ++ x = XEXP (x, 0); ++ /* First, look for a memory access to a symbol_ref. */ ++ if (GET_CODE (XEXP (x, 0)) == SYMBOL_REF ++ && SYMBOL_REF_DECL (XEXP (x, 0)) ++ && TREE_CODE (SYMBOL_REF_DECL (XEXP (x, 0))) == FUNCTION_DECL) ++ ca_loc->symbol_ref = XEXP (x, 0); ++ /* Otherwise, look at a compile-time known user-level function ++ declaration. */ ++ else if (MEM_P (x) ++ && MEM_EXPR (x) ++ && TREE_CODE (MEM_EXPR (x)) == FUNCTION_DECL) ++ ca_loc->symbol_ref = XEXP (DECL_RTL (MEM_EXPR (x)), 0); + } ++ + ca_loc->block = insn_scope (prev); + if (call_arg_locations) + call_arg_loc_last->next = ca_loc; diff --git a/SOURCES/gcc48-rh1312850.patch b/SOURCES/gcc48-rh1312850.patch new file mode 100644 index 0000000..841627f --- /dev/null +++ b/SOURCES/gcc48-rh1312850.patch @@ -0,0 +1,24 @@ +2016-05-26 Martin Sebor + Jakub Jelinek + + * asan/asan_rtl.cc (InitializeHighMemEnd): Backport part of upstream + r221457 fix and typo fix from r206158. + +--- libsanitizer/asan/asan_rtl.cc.jj 2013-03-04 12:44:18.000000000 +0100 ++++ libsanitizer/asan/asan_rtl.cc 2016-05-26 09:57:10.761973999 +0200 +@@ -308,11 +308,13 @@ static void InitializeHighMemEnd() { + # if defined(__powerpc64__) + // FIXME: + // On PowerPC64 we have two different address space layouts: 44- and 46-bit. +- // We somehow need to figure our which one we are using now and choose ++ // We somehow need to figure out which one we are using now and choose + // one of 0x00000fffffffffffUL and 0x00003fffffffffffUL. + // Note that with 'ulimit -s unlimited' the stack is moved away from the top + // of the address space, so simply checking the stack address is not enough. +- kHighMemEnd = (1ULL << 44) - 1; // 0x00000fffffffffffUL ++ // This should (does) work for both PowerPC64 Endian modes. ++ kHighMemEnd = ++ (1ULL << (MostSignificantSetBitIndex(GET_CURRENT_FRAME()) + 1)) - 1; + # else + kHighMemEnd = (1ULL << 47) - 1; // 0x00007fffffffffffUL; + # endif diff --git a/SOURCES/gcc48-rh1344807.patch b/SOURCES/gcc48-rh1344807.patch new file mode 100644 index 0000000..130f558 --- /dev/null +++ b/SOURCES/gcc48-rh1344807.patch @@ -0,0 +1,37 @@ +2016-06-14 Jason Merrill + + * call.c (add_function_candidate): Be more careful about + ref-to-ptr conversion. + +2016-06-15 Jakub Jelinek + + * g++.dg/cpp0x/ref-qual17.C: New test. + +--- gcc/cp/call.c.jj 2014-08-06 10:45:03.260163142 +0200 ++++ gcc/cp/call.c 2016-06-15 11:15:06.663878423 +0200 +@@ -1975,7 +1975,9 @@ add_function_candidate (struct z_candida + bool rv = FUNCTION_RVALUE_QUALIFIED (TREE_TYPE (fn)); + parmtype = cp_build_reference_type (parmtype, rv); + if (TREE_CODE (arg) == CONVERT_EXPR +- && TYPE_PTR_P (TREE_TYPE (arg))) ++ && TYPE_PTR_P (TREE_TYPE (arg)) ++ && (TREE_CODE (TREE_TYPE (TREE_OPERAND (arg, 0))) ++ == REFERENCE_TYPE)) + /* Strip conversion from reference to pointer. */ + arg = TREE_OPERAND (arg, 0); + arg = build_fold_indirect_ref (arg); +--- gcc/testsuite/g++.dg/cpp0x/ref-qual17.C.jj 2016-06-15 11:12:57.692558903 +0200 ++++ gcc/testsuite/g++.dg/cpp0x/ref-qual17.C 2016-06-15 11:07:02.000000000 +0200 +@@ -0,0 +1,12 @@ ++// { dg-do compile { target c++11 } } ++ ++struct A ++{ ++ void foo () &; ++}; ++ ++void ++bar (__UINTPTR_TYPE__ a) ++{ ++ reinterpret_cast(a)->foo (); ++} diff --git a/SOURCES/gcc48-s390-z13.patch b/SOURCES/gcc48-s390-z13.patch new file mode 100644 index 0000000..05a3d5f --- /dev/null +++ b/SOURCES/gcc48-s390-z13.patch @@ -0,0 +1,16938 @@ +Backport of trunk revisions: r214898, r221047, r223367, r223368, r223369, r223393, r223395, r223396, r223397, r223398, r223399, r223400, r223403, r224227, r224867, r224868, r224869, r224870, r224871, r224872, r224873, r224874, r226671, r226672, r227058, r227635, r227636, r227637, r227780, r231153, r231154, r231155, r231156, r231157, r231158, r231159, r231809, r232972, r232973, r233548, r233549, r233550, r233552, r233553, r233554, r233555, r233556, r233623, r236067 + +2016-05-10 Andreas Krebbel + + * config/s390/s390.md ("*vec_cmpdf_cconly") + ("*fixuns_truncdfdi2_z13") + ("*fixuns_trunc2_z196") + ("*fix_truncdfdi2_bfp_z13", "*floatunsdidf2_z13") + ("*extendsfdf2_z13"): Replace TARGET_Z13 with TARGET_VX. + +2016-02-23 Andreas Krebbel + + * gcc.target/s390/md/movstr-2.c: Move and rename to ... + * gcc.target/s390/vector/stpcpy-1.c: ... this one. + +2016-02-19 Andreas Krebbel + + * config/s390/vector.md: Add missing commutative operand markers + to the patterns which qualify for one. + * config/s390/vx-builtins.md: Likewise. + +2016-02-19 Andreas Krebbel + + * config/s390/vector.md (VI, VI_QHS): Add single element vector + types to mode iterators. + (vec_double): ... and mode attribute. + * config/s390/vx-builtins.md (non_vec_int): Likewise. + +2016-02-19 Andreas Krebbel + + * config/s390/vector.md ("add3", "sub3"): + Change the predicate of op2 from nonimmediate to general and let + reload fix it if necessary. + + * gcc.target/s390/vector/int128-1.c: New test. + +2016-02-19 Andreas Krebbel + + * config/s390/vecintrin.h (vec_sub_u128): Define missing macro. + +2016-02-19 Andreas Krebbel + + * config/s390/s390.c (s390_expand_vcond): Use the compare operand + mode. + + * gcc.target/s390/vector/vec-vcond-1.c: New test. + +2016-02-19 Andreas Krebbel + + * config/s390/s390-protos.h: Add s390_expand_vec_movstr prototype. + * config/s390/s390.c (s390_expand_vec_movstr): New function. + * config/s390/s390.md ("movstr"): Call + s390_expand_vec_movstr. + + * gcc.target/s390/md/movstr-2.c: New test. + +2016-02-19 Andreas Krebbel + + * config/s390/s390.md: Add missing output modifier for operand 1 + to print it as address properly. + +2016-02-19 Andreas Krebbel + + * config/s390/2827.md: Rename ooo_* insn attributes to zEC12_*. + * config/s390/2964.md: New file. + * config/s390/s390.c (s390_get_sched_attrmask): Use the right set + of insn grouping attributes depending on the CPU level. + (s390_get_unit_mask): New function. + (s390_sched_score): Remove the OOO from the scheduling macros. + Add loop to calculate a score for the instruction mix. + (s390_sched_reorder): Likewise plus improve debug output. + (s390_sched_variable_issue): Rename macros as above. Calculate + the unit distances after actually scheduling an insn. Improve + debug output. + (s390_sched_init): Clear last_scheduled_unit_distance array. + * config/s390/s390.md: Include 2964.md. + +2016-01-29 Dominik Vogt + + * config/s390/s390-c.c (s390_resolve_overloaded_builtin): Format + declaration name with %qs and print it in both error messages. + Also fix indentation. + +2016-01-29 Dominik Vogt + + PR other/69006 + * config/s390/s390-c.c (s390_resolve_overloaded_builtin): Remove + trailing blank line from error message. + +2015-12-18 Robin Dapp + + * config/s390/predicates.md: Change and rename + constm1_operand to all_ones_operand + * config/s390/s390.c (s390_expand_vcond): Use all_ones_operand + * config/s390/vector.md: Likewise + +2015-12-02 Andreas Krebbel + + * config/s390/predicates.md (const_mask_operand): New predicate. + * config/s390/s390-builtins.def: Set a smaller bitmask for a few builtins. + * config/s390/vector.md: Change predicate from immediate_operand + to either const_int_operand or const_mask_operand. Add special + insn conditions on patterns which have to exclude certain values. + * config/s390/vx-builtins.md: Likewise. + +2015-12-02 Andreas Krebbel + + * config/s390/vector.md ("*vec_set"): Change shift count + mode from DI to SI. + +2015-12-02 Andreas Krebbel + + * config/s390/s390-builtin-types.def: New builtin types added. + * config/s390/s390-builtins.def: Add s390_vec_splat_* definitions. + * config/s390/s390.c (s390_expand_builtin): Always truncate + constants to the mode in the pattern. + * config/s390/vecintrin.h: Let the vec_splat_* macros point to the + respective builtin __builtin_s390_vec_splat_*. + + * gcc.target/s390/zvector/vec-splat-2.c: New test. + +2015-12-02 Andreas Krebbel + + * config/s390/s390-builtin-types.def: Sort builtin types. + +2015-12-02 Andreas Krebbel + + * config/s390/s390-c.c (s390_get_vstring_flags): Invert the + condition for the RT flag. + +2015-12-02 Andreas Krebbel + + * config/s390/constraints.md ("jKK"): New constraint. + * config/s390/s390.c (tm-constrs.h): Include for + satisfies_constraint_*. + (s390_legitimate_constant_p): Allow jKK constants. Use + satisfies_constraint_* also for the others. + (legitimate_reload_vector_constant_p): Likewise. + (print_operand): Allow h output modifier on vectors. + * config/s390/vector.md ("mov"): Add vrepi. + + * gcc.target/s390/vector/vec-vrepi-1.c: New test. + +2015-12-02 Andreas Krebbel + + * config/s390/vector.md ("*vec_splats"): Fix constraint + latter I->K. + + * gcc.target/s390/zvector/vec-splat-1.c: New test. + +2015-09-15 Andreas Krebbel + + * config/s390/s390.c (s390_const_operand_ok): Add missing + brackets. + +2015-09-10 Andreas Krebbel + + * config/s390/s390.c (s390_contiguous_bitmask_vector_p): Reject if + the vector element is bigger than 64 bit. + + * gcc.target/s390/vector/vec-genbytemask-1.c: Add check for V1TI + initialization with a byte mask. No change expected here. + * gcc.target/s390/vector/vec-genmask-1.c: Fix whitespace. + * gcc.target/s390/vector/vec-genmask-2.c: Add check for V1TI + initialization with contigious bitmask. Literal pool is expectd + to be used here. + +2015-09-10 Andreas Krebbel + + * config/s390/vx-builtins.md ("vec_vmal", "vec_vmah") + ("vec_vmalh"): Change mode iterator from VI_HW to VI_HW_QHS. + +2015-09-10 Andreas Krebbel + + * config/s390/s390.c: Add V1TImode to constant pool modes. + +2015-08-21 Dominik Vogt + + * config/s390/s390-builtins.def: Fix value range of vec_load_bndry. + + * gcc.target/s390/zvector/vec-load_bndry-1.c: New test. + +2015-08-06 Andreas Krebbel + + * config/s390/s390.c (s390_expand_tbegin): Expand either + tbegin_1_z13 or tbegin_1 depending on VX flag. + * config/s390/s390.md ("tbegin_1_z13"): New expander. + + * gcc.target/s390/htm-builtins-z13-1.c: New test. + +2015-08-06 Andreas Krebbel + + * config/s390/s390.opt: Clarify description for -mzvector + * doc/invoke.texi: Add documentation for -mhtm, -mvx, and + -mzvector. + +2015-06-24 Andreas Krebbel + + * config/s390/vx-builtins.md + ("vec_scatter_element_") + ("vec_scatter_element_SI"): Replace gf mode + attribute with bhfgq. + +2015-06-24 Andreas Krebbel + + * config/s390/s390-builtins.def: Fix vpopct instruction comments. + +2015-06-24 Andreas Krebbel + + * config/s390/s390-builtin-types.def: Add flag to indicate the + options under which the function type is needed. + * config/s390/s390-builtins.def: Add flag to indicate the options + under which the builtin is enabled. + * config/s390/s390-builtins.h: Add flags parameter to macro + definitions. + (bflags_for_builtin): New function. + (flags_for_builtin): Renamed to ... + (opflags_for_builtin): ... this. + * config/s390/s390-c.c (s390_resolve_overloaded_builtin): Rename + flags_for_builtin to bflags_for_builtin and + flags_overloaded_builtin_var to opflags_overloaded_builtin_var. + * config/s390/s390.c: Add initialization of bflags_builtin and + opflags_builtin arrays. + Remove code for flags_builtin. + (s390_init_builtins): Only create builtin function types if one of + their flags is active. + Only create builtins if all of their flags are active. + (s390_expand_builtin): Rename flags_for_builtin to + opflags_for_builtin. + +2015-06-24 Andreas Krebbel + + * config/s390/vecintrin.h: Remove internal builtins. + +2015-06-24 Andreas Krebbel + + * config/s390/s390.c (s390_secondary_reload): Fix check for + GENERAL_REGS register class. + +2015-06-24 Andreas Krebbel + + * config/s390/s390.c (s390_support_vector_misalignment): Call + default implementation for !TARGET_VX. + +2015-06-24 Andreas Krebbel + + * config/s390/s390.c (s390_legitimate_constant_p): Add + TARGET_VX check. + +2015-06-24 Andreas Krebbel + + * config/s390/s390.c (s390_vector_abi): New variable definition. + (s390_check_type_for_vector_abi): New function. + (TARGET_ASM_FILE_END): New macro definition. + (s390_asm_file_end): New function. + (s390_function_arg): Call s390_check_type_for_vector_abi. + (s390_gimplify_va_arg): Likewise. + * configure: Regenerate. + * configure.ac: Check for .gnu_attribute Binutils feature. + + * gcc.target/s390/vector/vec-abi-1.c: Add gnu attribute check. + * gcc.target/s390/vector/vec-abi-attr-1.c: New test. + * gcc.target/s390/vector/vec-abi-attr-2.c: New test. + * gcc.target/s390/vector/vec-abi-attr-3.c: New test. + * gcc.target/s390/vector/vec-abi-attr-4.c: New test. + * gcc.target/s390/vector/vec-abi-attr-5.c: New test. + * gcc.target/s390/vector/vec-abi-attr-6.c: New test. + +2015-06-08 Jakub Jelinek + + * genattrtab.c (insn_alternatives): Change type from int * + to uint64_t *. + (check_attr_test): Shift ((uint64_t) 1) instead of 1 up. + (get_attr_value): Change type of num_alt to uint64_t. + (compute_alternative_mask): Change return type from + int to uint64_t, shift ((uint64_t) 1) instead of 1 up. + (make_alternative_compare, mk_attr_alt): Change argument type + from int to uint64_t. + (simplify_test_exp): Change type of i from int to uint64_t. + Shift ((uint64_t) 1) instead of 1 up. + (main): Adjust oballocvec first argument from int to uint64_t. + Shift ((uint64_t) 1) instead of 1 up. + +2015-05-19 Andreas Krebbel + + * lib/target-supports.exp: Vector do not always have natural + alignment on s390*. + +2015-05-19 Andreas Krebbel + + * gcc.dg/tree-ssa/gen-vect-11b.c: Disable vector instructions on + s390*. + * gcc.dg/tree-ssa/gen-vect-11c.c: Likewise. + +2015-05-19 Andreas Krebbel + + * gcc.target/s390/zvector/vec-dbl-math-compile-1.c: New test. + * gcc.target/s390/zvector/vec-genbytemask-1.c: New test. + * gcc.target/s390/zvector/vec-genmask-1.c: New test. + * gcc.target/s390/zvector/vec-lcbb-1.c: New test. + * gcc.target/s390/zvector/vec-overloading-1.c: New test. + * gcc.target/s390/zvector/vec-overloading-2.c: New test. + * gcc.target/s390/zvector/vec-overloading-3.c: New test. + * gcc.target/s390/zvector/vec-overloading-4.c: New test. + * gcc.target/s390/zvector/vec-test-mask-1.c: New test. + * gcc.target/s390/zvector/vec-elem-1.c: New test. + +2015-05-19 Andreas Krebbel + + * config.gcc: Add vecintrin.h to extra_headers. Add s390-c.o to + c_target_objs and cxx_target_objs. Add t-s390 to tmake_file. + * config/s390/s390-builtin-types.def: New file. + * config/s390/s390-builtins.def: New file. + * config/s390/s390-builtins.h: New file. + * config/s390/s390-c.c: New file. + * config/s390/s390-modes.def: Add modes CCVEQANY, CCVH, + CCVHANY, CCVHU, CCVHUANY, CCVFHANY, CCVFHEANY. + * config/s390/s390-protos.h (s390_expand_vec_compare_cc) + (s390_cpu_cpp_builtins, s390_register_target_pragmas): Add + prototypes. + * config/s390/s390.c (s390-builtins.h, s390-builtins.def): + Include. + (flags_builtin, flags_overloaded_builtin_var, s390_builtin_types) + (s390_builtin_fn_types, s390_builtin_decls, code_for_builtin): New + variable definitions. + (s390_const_operand_ok): New function. + (s390_expand_builtin): Rewrite. + (s390_init_builtins): New function. + (s390_handle_vectorbool_attribute): New function. + (s390_attribute_table): Add s390_vector_bool attribute. + (s390_match_ccmode_set): Handle new cc modes CCVH, CCVHU. + (s390_branch_condition_mask): Generate masks for new modes. + (s390_expand_vec_compare_cc): New function. + (s390_mangle_type): Add mangling for vector bool types. + (enum s390_builtin): Remove. + (s390_atomic_assign_expand_fenv): Rename constants for sfpc and + efpc builtins. + * config/s390/s390.h (TARGET_CPU_CPP_BUILTINS): Call + s390_cpu_cpp_builtins. + (REGISTER_TARGET_PRAGMAS): New macro. + * config/s390/s390.md: Define more UNSPEC_VEC_* constants. + (insn_cmp mode attribute): Add new CC modes. + (s390_sfpc, s390_efpc): Rename patterns to sfpc and efpc. + (lcbb): New pattern definition. + * config/s390/s390intrin.h: Include vecintrin.h. + * config/s390/t-s390: New file. + * config/s390/vecintrin.h: New file. + * config/s390/vector.md: Include vx-builtins.md. + * config/s390/vx-builtins.md: New file.S/390 zvector builtin + support. + +2015-05-19 Andreas Krebbel + + * config/s390/s390-modes.def: Add new modes CCVEQ, CCVFH, and + CCVFHE. + * config/s390/s390.c (s390_match_ccmode_set): Handle new modes. + (s390_select_ccmode): Likewise. + (s390_canonicalize_comparison): Swap operands if necessary. + (s390_expand_vec_compare_scalar): Expand DFmode compare using + single element vector instructions. + (s390_emit_compare): Call s390_expand_vec_compare_scalar. + (s390_branch_condition_mask): Generate CC masks for the new modes. + * config/s390/s390.md (v0, vf, vd): New mode attributes. + (VFCMP, asm_fcmp, insn_cmp): New mode iterator and attributes. + (*vec_cmpdf_cconly, *fixuns_truncdfdi2_z13) + (*fix_trunc2_bfp, *floatunsdidf2_z13) + (*floatuns2, *extendsfdf2_z13) + (*extend2): New insn definition. + (fix_trunc2_bfp, loatuns2) + (extend2): Turn into expander. + (floatdi2, truncdfsf2, add3, sub3, mul3) + (div3, *neg2, *abs2, *negabs2) + (sqrt2): Add vector instruction. + + * gcc.target/s390/vector/vec-scalar-cmp-1.c: New test. + +2015-05-19 Andreas Krebbel + + * gcc.target/s390/s390.exp + (check_effective_target_vector): New check. + * gcc.target/s390/vector/vec-abi-1.c: New test. + * gcc.target/s390/vector/vec-abi-2.c: New test. + * gcc.target/s390/vector/vec-abi-3.c: New test. + * gcc.target/s390/vector/vec-abi-4.c: New test. + * gcc.target/s390/vector/vec-abi-align-1.c: New test. + * gcc.target/s390/vector/vec-abi-single-1.c: New test. + * gcc.target/s390/vector/vec-abi-single-2.c: New test. + * gcc.target/s390/vector/vec-abi-struct-1.c: New test. + * gcc.target/s390/vector/vec-abi-vararg-1.c: New test. + * gcc.target/s390/vector/vec-abi-vararg-2.c: New test. + * gcc.target/s390/vector/vec-clobber-1.c: New test. + * gcc.target/s390/vector/vec-cmp-1.c: New test. + * gcc.target/s390/vector/vec-cmp-2.c: New test. + * gcc.target/s390/vector/vec-dbl-math-compile-1.c: New test. + * gcc.target/s390/vector/vec-genbytemask-1.c: New test. + * gcc.target/s390/vector/vec-genbytemask-2.c: New test. + * gcc.target/s390/vector/vec-genmask-1.c: New test. + * gcc.target/s390/vector/vec-genmask-2.c: New test. + * gcc.target/s390/vector/vec-init-1.c: New test. + * gcc.target/s390/vector/vec-int-math-compile-1.c: New test. + * gcc.target/s390/vector/vec-shift-1.c: New test. + * gcc.target/s390/vector/vec-sub-1.c: New test. + +2015-05-19 Andreas Krebbel + + * config/s390/constraints.md (j00, jm1, jxx, jyy, v): New + constraints. + * config/s390/predicates.md (const0_operand, constm1_operand) + (constable_operand): Accept vector operands. + * config/s390/s390-modes.def: Add supported vector modes. + * config/s390/s390-protos.h (s390_cannot_change_mode_class) + (s390_function_arg_vector, s390_contiguous_bitmask_vector_p) + (s390_bytemask_vector_p, s390_expand_vec_strlen) + (s390_expand_vec_compare, s390_expand_vcond) + (s390_expand_vec_init): Add prototypes. + * config/s390/s390.c (VEC_ARG_NUM_REG): New macro. + (s390_vector_mode_supported_p): New function. + (s390_contiguous_bitmask_p): Mask out the irrelevant bits. + (s390_contiguous_bitmask_vector_p): New function. + (s390_bytemask_vector_p): New function. + (s390_split_ok_p): Vector regs don't work either. + (regclass_map): Add VEC_REGS. + (s390_legitimate_constant_p): Handle vector constants. + (s390_cannot_force_const_mem): Handle CONST_VECTOR. + (legitimate_reload_vector_constant_p): New function. + (s390_preferred_reload_class): Handle CONST_VECTOR. + (s390_reload_symref_address): Likewise. + (s390_secondary_reload): Vector memory instructions only support + short displacements. Rename reload*_nonoffmem* to reload*_la*. + (s390_emit_ccraw_jump): New function. + (s390_expand_vec_strlen): New function. + (s390_expand_vec_compare): New function. + (s390_expand_vcond): New function. + (s390_expand_vec_init): New function. + (s390_dwarf_frame_reg_mode): New function. + (print_operand): Handle addresses with 'O' and 'R' constraints. + (NR_C_MODES, constant_modes): Add vector modes. + (s390_output_pool_entry): Handle vector constants. + (s390_hard_regno_mode_ok): Handle vector registers. + (s390_class_max_nregs): Likewise. + (s390_cannot_change_mode_class): New function. + (s390_invalid_arg_for_unprototyped_fn): New function. + (s390_function_arg_vector): New function. + (s390_function_arg_float): Remove size variable. + (s390_pass_by_reference): Handle vector arguments. + (s390_function_arg_advance): Likewise. + (s390_function_arg): Likewise. + (s390_return_in_memory): Vector values are returned in a VR if + possible. + (s390_function_and_libcall_value): Handle vector arguments. + (s390_gimplify_va_arg): Likewise. + (s390_call_saved_register_used): Consider the arguments named. + (s390_conditional_register_usage): Disable v16-v31 for non-vec + targets. + (s390_preferred_simd_mode): New function. + (s390_support_vector_misalignment): New function. + (s390_vector_alignment): New function. + (TARGET_STRICT_ARGUMENT_NAMING, TARGET_DWARF_FRAME_REG_MODE) + (TARGET_VECTOR_MODE_SUPPORTED_P) + (TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN) + (TARGET_VECTORIZE_PREFERRED_SIMD_MODE) + (TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT) + (TARGET_VECTOR_ALIGNMENT): Define target macro. + * config/s390/s390.h (FUNCTION_ARG_PADDING): Define macro. + (FIRST_PSEUDO_REGISTER): Increase value. + (VECTOR_NOFP_REGNO_P, VECTOR_REGNO_P, VECTOR_NOFP_REG_P) + (VECTOR_REG_P): Define macros. + (FIXED_REGISTERS, CALL_USED_REGISTERS) + (CALL_REALLY_USED_REGISTERS, REG_ALLOC_ORDER) + (HARD_REGNO_CALL_PART_CLOBBERED, REG_CLASS_NAMES) + (FUNCTION_ARG_REGNO_P, FUNCTION_VALUE_REGNO_P, REGISTER_NAMES): + Add vector registers. + (CANNOT_CHANGE_MODE_CLASS): Call C function. + (enum reg_class): Add VEC_REGS, ADDR_VEC_REGS, GENERAL_VEC_REGS. + (SECONDARY_MEMORY_NEEDED): Allow SF<->SI mode moves without + memory. + (DBX_REGISTER_NUMBER, FIRST_VEC_ARG_REGNO, LAST_VEC_ARG_REGNO) + (SHORT_DISP_IN_RANGE, VECTOR_STORE_FLAG_VALUE): Define macro. + * config/s390/s390.md (UNSPEC_VEC_*): New constants. + (VR*_REGNUM): New constants. + (ALL): New mode iterator. + (INTALL): Remove mode iterator. + Include vector.md. + (movti): Implement TImode moves for VRs. + Disable TImode splitter for VR targets. + Implement splitting TImode GPR<->VR moves. + (reload*_tomem_z10, reload*_toreg_z10): Replace INTALL with ALL. + (reload_nonoffmem_in, reload_nonoffmem_out): Rename to + reload_la_in, reload_la_out. + (*movdi_64, *movsi_zarch, *movhi, *movqi, *mov_64dfp) + (*mov_64, *mov_31): Add vector instructions. + (TD/TF mode splitter): Enable for GPRs only (formerly !FP). + (mov SF SD): Prefer lder, lde for loading. + Add lrl and strl instructions. + Add vector instructions. + (strlen): Rename old strlen to strlen_srst. + Call s390_expand_vec_strlen on z13. + (*cc_to_int): Change predicate to nonimmediate_operand. + (addti3): Rename to *addti3. New expander. + (subti3): Rename to *subti3. New expander. + * config/s390/vector.md: New file. + +2015-05-19 Andreas Krebbel + + * common/config/s390/s390-common.c (processor_flags_table): Add + z13. + * config.gcc: Add z13. + * config/s390/s390-opts.h (enum processor_type): Add + PROCESSOR_2964_Z13. + * config/s390/s390.c (s390_adjust_priority): Check for + PROCESSOR_2964_Z13. + (s390_reorg): Likewise. + (s390_sched_reorder): Likewise. + (s390_sched_variable_issue): Likewise. + (s390_loop_unroll_adjust): Likewise. + (s390_option_override): Likewise. Default to -mvx when available. + * config/s390/s390.h (enum processor_flags): Add PF_Z13 and PF_VX. + (TARGET_CPU_Z13, TARGET_CPU_VX, TARGET_Z13, TARGET_VX) + (TARGET_VX_ABI): Define macros. + macros. + (TARGET_DEFAULT): Add MASK_OPT_VX. + * config/s390/s390.md ("cpu" attribute): Add z13. + ("cpu_facility" attribute): Add vec. + * config/s390/s390.opt (processor_type): Add z13. + (mvx): New options. + * doc/invoke.texi: Add z13 option for -march. + +2015-05-19 Andreas Krebbel + + * optabs.c (expand_vec_perm): Don't re-use SEL as target operand. + +2015-05-19 Andreas Krebbel + + * config/s390/s390.c (s390_secondary_reload): Fix check for + load/store relative. + +2015-05-19 Andreas Krebbel + + * recog.h: Increase MAX_RECOG_ALTERNATIVES. Change type of + alternative_mask to uint64_t. + +2015-02-27 Andreas Krebbel + + * config/s390/s390.c (enum s390_builtin): + Add S390_BUILTIN_S390_SFPC and S390_BUILTIN_S390_EFPC. + (code_for_builtin): Add CODE_FOR_s390_sfpc and CODE_FOR_s390_efpc. + (s390_init_builtins): Generate new builtin functions. + * config/s390/s390.md (UNSPECV_SFPC, UNSPECV_EFPC): New constants. + (s390_sfpc, s390_efpc): New pattern definitions. + +2014-09-03 Matthew Fortune + + * target.def (TARGET_DWARF_FRAME_REG_MODE): New target hook. + * targhooks.c (default_dwarf_frame_reg_mode): New function. + * targhooks.h (default_dwarf_frame_reg_mode): New prototype. + * doc/tm.texi.in (TARGET_DWARF_FRAME_REG_MODE): Document. + * doc/tm.texi: Regenerate. + * dwarf2cfi.c (expand_builtin_init_dwarf_reg_sizes): Abstract mode + selection logic to default_dwarf_frame_reg_mode. + +--- gcc/common/config/s390/s390-common.c 2013-08-14 13:55:13.000000000 +0200 ++++ gcc/common/config/s390/s390-common.c 2016-05-11 15:53:24.000000000 +0200 +@@ -42,7 +42,10 @@ EXPORTED_CONST int processor_flags_table + /* z196 */ PF_IEEE_FLOAT | PF_ZARCH | PF_LONG_DISPLACEMENT + | PF_EXTIMM | PF_DFP | PF_Z10 | PF_Z196, + /* zEC12 */ PF_IEEE_FLOAT | PF_ZARCH | PF_LONG_DISPLACEMENT ++ | PF_EXTIMM | PF_DFP | PF_Z10 | PF_Z196 | PF_ZEC12 | PF_TX, ++ /* z13 */ PF_IEEE_FLOAT | PF_ZARCH | PF_LONG_DISPLACEMENT + | PF_EXTIMM | PF_DFP | PF_Z10 | PF_Z196 | PF_ZEC12 | PF_TX ++ | PF_Z13 | PF_VX + }; + + /* Change optimizations to be performed, depending on the +--- gcc/config/s390/2827.md 2015-06-18 17:09:04.000000000 +0200 ++++ gcc/config/s390/2827.md 2016-05-11 18:03:45.000000000 +0200 +@@ -18,20 +18,19 @@ + ;; along with GCC; see the file COPYING3. If not see + ;; . + +- +-(define_attr "ooo_cracked" "" ++(define_attr "zEC12_cracked" "" + (cond [(eq_attr "mnemonic" "cgdbr,clfxtr,cdgtr,celfbr,cxgtr,clfebr,clc,lngfr,cs,cfxbr,xc,clfdbr,basr,ex,cxlgtr,clfdtr,srdl,lpgfr,cdlgbr,cgxtr,cxlftr,nc,cxftr,cdfbr,clfxbr,cdftr,clgxbr,cgdtr,cxlgbr,mvc,clgdtr,cegbr,cfebr,cdlftr,sldl,cdlgtr,csg,chhsi,clgebr,cxgbr,cxfbr,cdlfbr,cgebr,lzxr,oc,cdgbr,brasl,cgxbr,cxlfbr,clgxtr,exrl,cfdbr,celgbr,clgdbr,lxr,cpsdr,lcgfr,bras,srda,cefbr") (const_int 1)] + (const_int 0))) + +-(define_attr "ooo_expanded" "" ++(define_attr "zEC12_expanded" "" + (cond [(eq_attr "mnemonic" "dlr,dsgr,d,dsgf,stam,dsgfr,dlgr,dsg,cds,dr,stm,mvc,dl,cdsg,stmy,dlg,stmg,lam") (const_int 1)] + (const_int 0))) + +-(define_attr "ooo_endgroup" "" ++(define_attr "zEC12_endgroup" "" + (cond [(eq_attr "mnemonic" "ipm") (const_int 1)] + (const_int 0))) + +-(define_attr "ooo_groupalone" "" ++(define_attr "zEC12_groupalone" "" + (cond [(eq_attr "mnemonic" "lnxbr,madb,ltxtr,clc,axtr,msebr,slbgr,xc,alcr,lpxbr,slbr,maebr,mlg,mfy,lxdtr,maeb,lxeb,nc,mxtr,sxtr,dxbr,alc,msdbr,ltxbr,lxdb,madbr,lxdbr,lxebr,mvc,m,mseb,mlr,mlgr,slb,tcxb,msdb,sqxbr,alcgr,oc,flogr,alcg,mxbr,dxtr,axbr,mr,sxbr,slbg,ml,lcxbr,bcr_flush") (const_int 1)] + (const_int 0))) + +--- gcc/config/s390/2964.md 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/config/s390/2964.md 2016-05-11 18:03:45.000000000 +0200 +@@ -0,0 +1,232 @@ ++;; Scheduling description for z13. ++;; Copyright (C) 2016 Free Software Foundation, Inc. ++;; Contributed by Andreas Krebbel (Andreas.Krebbel@de.ibm.com) ++ ++;; This file is part of GCC. ++ ++;; GCC is free software; you can redistribute it and/or modify it under ++;; the terms of the GNU General Public License as published by the Free ++;; Software Foundation; either version 3, or (at your option) any later ++;; version. ++ ++;; GCC is distributed in the hope that it will be useful, but WITHOUT ANY ++;; WARRANTY; without even the implied warranty of MERCHANTABILITY or ++;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++;; for more details. ++ ++;; You should have received a copy of the GNU General Public License ++;; along with GCC; see the file COPYING3. If not see ++;; . ++ ++ ++; generator options: vector_ecycs=12 cracked_ecycs=6 scale_ecycs=5 ++ ++(define_attr "z13_cracked" "" ++ (cond [(eq_attr "mnemonic" "celgbr,vscef,vsceg,exrl,clfebr,cefbr,chhsi,\ ++vgef,vgeg,cdlftr,lcgfr,cfdbr,cgdbr,lzxr,cfxbr,rnsbg,cgdtr,cegbr,rxsbg,ex,\ ++cgxtr,clfxtr,cdlgtr,brasl,efpc,cfebr,tbeginc,celfbr,clgxbr,vsteb,vsteh,\ ++clfdtr,cdfbr,lngfr,clgebr,stpq,cs,lpgfr,cdlgbr,lpq,cdgtr,d,cgxbr,cdftr,\ ++rosbg,clgdbr,cdgbr,bras,tbegin,clfdbr,cdlfbr,cgebr,clfxbr,lxr,csy,csg,clgdtr,\ ++clgxtr") (const_int 1)] ++ (const_int 0))) ++ ++(define_attr "z13_expanded" "" ++ (cond [(eq_attr "mnemonic" "cxlftr,cdsg,cdsy,stam,lam,dsgf,lmg,cxlgtr,\ ++dl,cxftr,sldl,dsg,cxlfbr,cxgtr,stmg,stmy,stm,lm,cds,lmy,cxfbr,cxlgbr,srda,\ ++srdl,cxgbr,dlg") (const_int 1)] ++ (const_int 0))) ++ ++(define_attr "z13_groupalone" "" ++ (cond [(eq_attr "mnemonic" "mvc,dxbr,lxebr,axtr,cxtr,alcr,lxdb,lxeb,mxtr,\ ++mfy,cxbr,dsgr,lcxbr,slb,mr,dr,alc,slbr,maebr,mlgr,dsgfr,sxtr,tdcxt,tabort,\ ++msebr,lxdtr,ltxtr,slbg,ml,mxbr,maeb,oc,dxtr,msdb,sqxbr,mseb,xc,m,clc,mlg,\ ++mlr,fixbra,alcgr,nc,sfpc,dlgr,fixbr,slbgr,fixtr,lpxbr,axbr,lxdbr,ltxbr,\ ++tcxb,dlr,lnxbr,sxbr,flogr,alcg,tend,madb,bcr_flush") (const_int 1)] ++ (const_int 0))) ++ ++(define_attr "z13_endgroup" "" ++ (cond [(eq_attr "mnemonic" "ipm") (const_int 1)] ++ (const_int 0))) ++ ++(define_attr "z13_unit_lsu" "" ++ (cond [(eq_attr "mnemonic" "vlbb,mvc,llgc,llc,llhrl,vl,llghrl,vlrepf,\ ++vlrepg,vlreph,lde,ldy,tabort,l,llh,ld,lg,ly,vlrepb,vllezb,vllezf,vllezg,\ ++vllezh,oc,xc,clc,lrl,ear,nc,lgrl,sfpc,llgf,llgfrl,llgh,llgt,lcbb,vll,sar") (const_int 1)] ++ (const_int 0))) ++ ++(define_attr "z13_unit_fxu" "" ++ (cond [(eq_attr "mnemonic" "s,lcgr,x,nop,oiy,ppa,ng,msy,sgrk,vstl,aghik,\ ++msgf,ipm,mvi,stocg,rll,srlg,cghsi,clgit,srlk,alrk,sg,sh,sl,st,sy,vst,ark,\ ++xgr,agsi,tm,nrk,shy,llhr,agf,alcr,slgfr,sr,clgrt,laa,lder,sgf,lan,llilf,\ ++llilh,ag,llill,lay,al,n,laxg,ar,ahi,sgr,ntstg,ay,stcy,nopr,mfy,ngrk,lbr,\ ++br,dsgr,stdy,ork,ldgr,lcr,cg,ch,lgfrl,cl,stoc,cr,agfr,stgrl,cy,alfi,xg,\ ++cgfi,xi,clfhsi,cgfr,xr,slb,mghi,clfi,slg,clhhsi,agfi,clfit,sly,mr,ldr,nihf,\ ++nihh,algfi,dr,nihl,algf,algfr,algr,clgf,clgr,clgt,aghi,alc,alg,locg,alr,\ ++locr,cghi,aly,alghsik,slbr,clgfrl,mhy,cit,nr,ny,xiy,mlgr,sthy,cly,dsgfr,\ ++rllg,cgit,lgb,lgf,clgrl,lgh,lrvgr,cliy,cgrl,lgr,slrk,clrt,icy,laog,og,agr,\ ++mvhi,lhrl,or,lhr,vlvgp,lhy,nilf,oy,nilh,nill,lcdfr,mviy,tmhh,tmhl,sthrl,\ ++ltgf,ltgr,srk,clghrl,ahy,vstef,vsteg,ah,vlgvb,llgcr,tmh,tml,clmy,slr,cfi,\ ++stc,std,ste,stg,sth,locgr,slbg,sty,tmlh,la,lb,mvghi,lh,risbgn,lrvg,lr,asi,\ ++lt,ahik,lrvr,cgf,cgh,cgr,clhrl,lzdr,tmll,mh,ml,vlvgb,ms,lrv,vlvgf,xgrk,\ ++vlvgg,llgfr,vlvgh,slfi,chi,chy,mhi,lzer,alhsik,ni,ltgfr,loc,icm,oi,cgfrl,\ ++agrk,lgat,oilh,llghr,lghrl,oill,xihf,lpgr,cgrt,clrl,sgfr,lpr,lgbr,strl,\ ++algrk,alsi,srak,slgf,a,c,slgr,m,o,algsi,icmh,srag,iilf,ogrk,clg,icmy,\ ++cli,clm,clr,clt,slgrk,mlg,lao,mlr,risbg,mvhhi,lat,etnd,lax,iihf,sra,alcgr,\ ++msgr,clghsi,stey,ngr,xilf,laag,oihf,oihh,oihl,ltg,ltr,niy,lgfi,dlgr,lgfr,\ ++slgfi,llcr,slbgr,chrl,lgdr,pfpo,lang,basr,sllg,sllk,lghi,lghr,vlgvf,vlgvg,\ ++vlgvh,vlr,chsi,lngr,cghrl,srl,lhi,oilf,crl,crt,afi,xrk,llgtr,llihf,llihh,\ ++llihl,dlr,msgfi,msgfr,msg,flogr,xy,msr,clgfi,clgfr,ogr,popcnt,alcg,lndfr,\ ++larl,sll,tmy,msfi,ic,lpdfr,tend,lnr") (const_int 1)] ++ (const_int 0))) ++ ++(define_attr "z13_unit_vfu" "" ++ (cond [(eq_attr "mnemonic" "seb,vcksm,vfadb,vleib,vchgs,vleif,vleig,vleih,\ ++vgbm,verimb,vone,verimf,verimg,verimh,dxbr,verllvb,lpebr,verllvf,verllvg,\ ++verllvh,vfeneb,wcdgb,vfenef,vfeneh,vchhs,vctzb,vctzf,vctzg,vctzh,vlcb,aeb,\ ++vlcf,vlcg,vlch,vfmsdb,vgfmab,ltebr,vgfmaf,vgfmag,vgfmah,vmaeh,vsb,vsf,vsg,\ ++vsh,vsl,vsq,lxebr,cdtr,fiebr,vupllb,vupllf,vupllh,vmrhb,madbr,vtm,vmrhf,\ ++vmrhg,vmrhh,axtr,fiebra,vleb,cxtr,vlef,vleg,vleh,vpkf,vpkg,vpkh,vmlob,vmlof,\ ++vmloh,lxdb,ldeb,mdtr,vceqfs,adb,wflndb,lxeb,vn,vo,vchlb,vx,mxtr,vchlf,vchlg,\ ++vchlh,vfcedbs,vfcedb,vceqgs,cxbr,msdbr,vcdgb,debr,vceqhs,meeb,lcxbr,vavglb,\ ++vavglf,vavglg,vavglh,wfcedbs,vmrlb,vmrlf,vmrlg,vmrlh,wfchedbs,vmxb,tcdb,\ ++vmahh,vsrlb,wcgdb,lcdbr,vistrbs,vrepb,wfmdb,vrepf,vrepg,vreph,ler,wcdlgb,\ ++ley,vistrb,vistrf,vistrh,tceb,wfsqdb,sqeb,vsumqf,vsumqg,vesrlb,vfeezbs,\ ++maebr,vesrlf,vesrlg,vesrlh,vmeb,vmef,vmeh,meebr,vflcdb,wfmadb,vperm,sxtr,\ ++vclzf,vgm,vgmb,vgmf,vgmg,vgmh,tdcxt,vzero,msebr,veslb,veslf,veslg,vfenezb,\ ++vfenezf,vfenezh,vistrfs,vchf,vchg,vchh,vmhb,vmhf,vmhh,cdb,veslvb,ledbr,\ ++veslvf,veslvg,veslvh,wclgdb,vfmdb,vmnlb,vmnlf,vmnlg,vmnlh,vclzb,vfeezfs,\ ++vclzg,vclzh,mdb,vmxlb,vmxlf,vmxlg,vmxlh,ltdtr,vsbcbiq,ceb,wfddb,sebr,vistrhs,\ ++lxdtr,lcebr,vab,vaf,vag,vah,ltxtr,vlpf,vlpg,vsegb,vaq,vsegf,vsegh,wfchdbs,\ ++sdtr,cdbr,vfeezhs,le,wldeb,vfmadb,vchlbs,vacccq,vmaleb,vsel,vmalef,vmaleh,\ ++vflndb,mdbr,vmlb,wflpdb,ldetr,vpksfs,vpksf,vpksg,vpksh,sqdb,mxbr,sqdbr,\ ++vmaeb,veslh,vmaef,vpklsf,vpklsg,vpklsh,verllb,vchb,ddtr,verllf,verllg,verllh,\ ++wfsdb,maeb,vclgdb,vftcidb,vpksgs,vmxf,vmxg,vmxh,fidbra,vmnb,vmnf,vmng,vfchedbs,\ ++lnebr,vfidb,dxtr,ddb,msdb,vmalhb,vfddb,vmalhf,vmalhh,vpkshs,vfsdb,sqxbr,\ ++vmalhw,ltdbr,vmob,vmof,vmoh,deb,vchlfs,mseb,vcdlgb,vlpb,wfmsdb,vlph,vmahb,\ ++vldeb,vmahf,vgfmb,fidbr,vfsqdb,aebr,wledb,vchlgs,vesravb,vfchdbs,cebr,vesravf,\ ++vesravg,vesravh,vcgdb,fixbra,vrepib,vrepif,vrepig,vrepih,tdcdt,vchlhs,vceqb,\ ++vscbib,vceqf,vceqg,vscbif,vscbig,vscbih,vmlhw,vscbiq,vuphb,vuphf,vuphh,\ ++vfchedb,tdcet,vslb,vpklsfs,adbr,sqebr,vfchdb,fixbr,vpklsgs,vsldb,vmleb,\ ++vmlef,vmleh,cpsdr,vmalb,vmalf,vavgb,vmlf,vavgf,vavgg,vavgh,vgfmf,vgfmg,\ ++vgfmh,fidtr,vpklshs,lndbr,vno,lpdbr,vacq,vledb,vchbs,vfeeb,vfeef,vfeeh,\ ++fixtr,vaccb,wfadb,vaccf,vaccg,vacch,vnot,vmalob,vaccq,vmalof,vmaloh,lpxbr,\ ++ledtr,vuplb,vuplf,axbr,lxdbr,ltxbr,vpopct,vpdi,vmlhb,vmlhf,vmlhh,sdbr,vnc,\ ++vsumb,vsrab,vsumh,vmaob,vmaof,vmaoh,vesrlvb,vesrlvf,vesrlvg,vesrlvh,tcxb,\ ++vceqbs,vceqh,lnxbr,sxbr,vesrab,wflcdb,vesraf,vesrag,vesrah,vflpdb,vmnh,\ ++vsbiq,adtr,vsra,vsrl,vuplhb,sdb,vuplhf,vuplhh,vsumgf,vsumgh,ldebr,vuplhw,\ ++vchfs,madb,ddbr") (const_int 1)] ++ (const_int 0))) ++ ++(define_insn_reservation "z13_0" 0 ++ (and (eq_attr "cpu" "z13") ++ (eq_attr "mnemonic" "s,lcgr,x,nop,oiy,vlbb,ppa,ng,sgrk,vstl,aghik,\ ++mvc,ipm,llgc,mvi,stocg,rll,jg,srlg,cghsi,clgit,srlk,alrk,sg,sh,sl,st,sy,\ ++vst,ark,xgr,agsi,tm,nrk,shy,llhr,agf,alcr,slgfr,sr,clgrt,llc,laa,lder,sgf,\ ++lan,llhrl,llilf,llilh,ag,llill,lay,al,n,laxg,ar,ahi,sgr,ntstg,ay,stcy,vl,\ ++nopr,ngrk,lbr,br,stdy,ork,ldgr,lcr,cg,ch,llghrl,lgfrl,cl,stoc,cr,agfr,stgrl,\ ++cy,alfi,xg,cgfi,xi,vlrepf,vlrepg,vlreph,clfhsi,cgfr,xr,slb,mghi,clfi,slg,\ ++lde,clhhsi,agfi,clfit,sly,ldr,ldy,nihf,nihh,algfi,nihl,algf,algfr,algr,\ ++clgf,clgr,clgt,aghi,alc,alg,locg,alr,locr,cghi,aly,alghsik,slbr,clgfrl,\ ++mhy,cit,nr,ny,xiy,sthy,cly,rllg,cgit,lgb,lgf,clgrl,lgh,lrvgr,cliy,cgrl,\ ++lgr,slrk,clrt,icy,laog,og,agr,mvhi,lhrl,or,lhr,vlvgp,lhy,nilf,oy,nilh,tabort,\ ++nill,lcdfr,mviy,tmhh,tmhl,sthrl,ltgf,ltgr,srk,clghrl,ahy,vstef,vsteg,ah,\ ++vlgvb,llgcr,tmh,tml,clmy,slr,cfi,stc,std,ste,stg,sth,l,locgr,llh,slbg,sty,\ ++tmlh,la,lb,ld,mvghi,lg,lh,risbgn,lrvg,lr,asi,lt,ahik,ly,lrvr,vlrepb,vllezb,\ ++cgf,cgh,vllezf,vllezg,vllezh,cgr,clhrl,lzdr,tmll,mh,vlvgb,lrv,vlvgf,xgrk,\ ++vlvgg,llgfr,vlvgh,slfi,chi,chy,mhi,lzer,alhsik,ni,ltgfr,loc,icm,oc,oi,cgfrl,\ ++agrk,lgat,oilh,llghr,lghrl,oill,xihf,lpgr,cgrt,clrl,sgfr,lpr,lgbr,strl,\ ++algrk,alsi,srak,brcl,slgf,xc,a,c,slgr,j,o,algsi,icmh,srag,iilf,ogrk,clc,\ ++clg,icmy,cli,clm,clr,clt,slgrk,lrl,lao,risbg,mvhhi,lat,etnd,lax,iihf,sra,\ ++alcgr,clghsi,ear,nc,lgrl,stey,ngr,xilf,laag,oihf,oihh,oihl,ltg,ltr,niy,\ ++lgfi,sfpc,lgfr,slgfi,llcr,llgf,llgfrl,llgh,slbgr,llgt,chrl,lgdr,pfpo,lang,\ ++basr,lcbb,sllg,sllk,lghi,vll,lghr,vlgvf,vlgvg,vlgvh,vlr,chsi,lngr,cghrl,\ ++srl,sar,lhi,oilf,crl,crt,afi,xrk,llgtr,llihf,llihh,llihl,xy,clgfi,clgfr,\ ++ogr,popcnt,alcg,lndfr,larl,sll,tmy,ic,lpdfr,tend,lnr,bcr_flush")) "nothing") ++ ++(define_insn_reservation "z13_1" 1 ++ (and (eq_attr "cpu" "z13") ++ (eq_attr "mnemonic" "celgbr,vscef,vsceg,msy,msgf,cxlftr,cdsg,cdsy,\ ++exrl,clfebr,cefbr,chhsi,stam,vgef,vgeg,cdlftr,lam,mfy,lcgfr,cfdbr,dsgf,\ ++cgdbr,lzxr,lmg,cfxbr,rnsbg,cxlgtr,mr,dl,cxftr,sldl,cgdtr,cegbr,rxsbg,ex,\ ++cgxtr,clfxtr,mlgr,cdlgtr,brasl,dsg,efpc,cfebr,tbeginc,celfbr,clgxbr,vsteb,\ ++vsteh,cxlfbr,clfdtr,cxgtr,stmg,stmy,stm,lm,cds,cdfbr,ml,ms,lngfr,clgebr,\ ++stpq,lmy,cs,lpgfr,cdlgbr,lpq,cxfbr,cxlgbr,cdgtr,d,m,mlg,mlr,cgxbr,cdftr,\ ++msgr,rosbg,clgdbr,cdgbr,srda,bras,srdl,tbegin,clfdbr,cdlfbr,cxgbr,cgebr,\ ++dlg,clfxbr,lxr,csy,msgfi,msgfr,msg,flogr,msr,csg,msfi,clgdtr,clgxtr")) "nothing") ++ ++(define_insn_reservation "z13_2" 2 ++ (and (eq_attr "cpu" "z13") ++ (eq_attr "mnemonic" "seb,vcksm,vfadb,vleib,vchgs,vleif,vleig,vleih,\ ++vgbm,verimb,vone,verimf,verimg,verimh,verllvb,lpebr,verllvf,verllvg,verllvh,\ ++vfeneb,wcdgb,vfenef,vfeneh,vchhs,vctzb,vctzf,vctzg,vctzh,vlcb,aeb,vlcf,\ ++vlcg,vlch,vfmsdb,vgfmab,ltebr,vgfmaf,vgfmag,vgfmah,vmaeh,vsb,vsf,vsg,vsh,\ ++vsl,vsq,lxebr,cdtr,fiebr,vupllb,vupllf,vupllh,vmrhb,madbr,vtm,vmrhf,vmrhg,\ ++vmrhh,axtr,fiebra,vleb,cxtr,vlef,vleg,vleh,vpkf,vpkg,vpkh,vmlob,vmlof,vmloh,\ ++lxdb,ldeb,vceqfs,adb,wflndb,lxeb,vn,vo,vchlb,vx,vchlf,vchlg,vchlh,vfcedbs,\ ++vfcedb,vceqgs,cxbr,msdbr,vcdgb,vceqhs,meeb,lcxbr,vavglb,vavglf,vavglg,vavglh,\ ++wfcedbs,vmrlb,vmrlf,vmrlg,vmrlh,wfchedbs,vmxb,tcdb,vmahh,vsrlb,wcgdb,lcdbr,\ ++vistrbs,vrepb,wfmdb,vrepf,vrepg,vreph,ler,wcdlgb,ley,vistrb,vistrf,vistrh,\ ++tceb,vsumqf,vsumqg,vesrlb,vfeezbs,maebr,vesrlf,vesrlg,vesrlh,vmeb,vmef,\ ++vmeh,meebr,vflcdb,wfmadb,vperm,sxtr,vclzf,vgm,vgmb,vgmf,vgmg,vgmh,tdcxt,\ ++vzero,msebr,veslb,veslf,veslg,vfenezb,vfenezf,vfenezh,vistrfs,vchf,vchg,\ ++vchh,vmhb,vmhf,vmhh,cdb,veslvb,ledbr,veslvf,veslvg,veslvh,wclgdb,vfmdb,\ ++vmnlb,vmnlf,vmnlg,vmnlh,vclzb,vfeezfs,vclzg,vclzh,mdb,vmxlb,vmxlf,vmxlg,\ ++vmxlh,ltdtr,vsbcbiq,ceb,sebr,vistrhs,lxdtr,lcebr,vab,vaf,vag,vah,ltxtr,\ ++vlpf,vlpg,vsegb,vaq,vsegf,vsegh,wfchdbs,sdtr,cdbr,vfeezhs,le,wldeb,vfmadb,\ ++vchlbs,vacccq,vmaleb,vsel,vmalef,vmaleh,vflndb,mdbr,vmlb,wflpdb,ldetr,vpksfs,\ ++vpksf,vpksg,vpksh,vmaeb,veslh,vmaef,vpklsf,vpklsg,vpklsh,verllb,vchb,verllf,\ ++verllg,verllh,wfsdb,maeb,vclgdb,vftcidb,vpksgs,vmxf,vmxg,vmxh,fidbra,vmnb,\ ++vmnf,vmng,vfchedbs,lnebr,vfidb,msdb,vmalhb,vmalhf,vmalhh,vpkshs,vfsdb,vmalhw,\ ++ltdbr,vmob,vmof,vmoh,vchlfs,mseb,vcdlgb,vlpb,wfmsdb,vlph,vmahb,vldeb,vmahf,\ ++vgfmb,fidbr,aebr,wledb,vchlgs,vesravb,vfchdbs,cebr,vesravf,vesravg,vesravh,\ ++vcgdb,fixbra,vrepib,vrepif,vrepig,vrepih,tdcdt,vchlhs,vceqb,vscbib,vceqf,\ ++vceqg,vscbif,vscbig,vscbih,vmlhw,vscbiq,vuphb,vuphf,vuphh,vfchedb,tdcet,\ ++vslb,vpklsfs,adbr,vfchdb,fixbr,vpklsgs,vsldb,vmleb,vmlef,vmleh,cpsdr,vmalb,\ ++vmalf,vavgb,vmlf,vavgf,vavgg,vavgh,vgfmf,vgfmg,vgfmh,fidtr,vpklshs,lndbr,\ ++vno,lpdbr,vacq,vledb,vchbs,vfeeb,vfeef,vfeeh,fixtr,vaccb,wfadb,vaccf,vaccg,\ ++vacch,vnot,vmalob,vaccq,vmalof,vmaloh,lpxbr,vuplb,vuplf,axbr,lxdbr,ltxbr,\ ++vpopct,vpdi,vmlhb,vmlhf,vmlhh,sdbr,vnc,vsumb,vsrab,vsumh,vmaob,vmaof,vmaoh,\ ++vesrlvb,vesrlvf,vesrlvg,vesrlvh,tcxb,vceqbs,vceqh,lnxbr,sxbr,vesrab,wflcdb,\ ++vesraf,vesrag,vesrah,vflpdb,vmnh,vsbiq,adtr,vsra,vsrl,vuplhb,sdb,vuplhf,\ ++vuplhh,vsumgf,vsumgh,ldebr,vuplhw,vchfs,madb")) "nothing") ++ ++(define_insn_reservation "z13_3" 3 ++ (and (eq_attr "cpu" "z13") ++ (eq_attr "mnemonic" "ledtr")) "nothing") ++ ++(define_insn_reservation "z13_4" 4 ++ (and (eq_attr "cpu" "z13") ++ (eq_attr "mnemonic" "dr,mxbr,dlr")) "nothing") ++ ++(define_insn_reservation "z13_6" 6 ++ (and (eq_attr "cpu" "z13") ++ (eq_attr "mnemonic" "debr,sqeb,deb,sqebr")) "nothing") ++ ++(define_insn_reservation "z13_7" 7 ++ (and (eq_attr "cpu" "z13") ++ (eq_attr "mnemonic" "mdtr")) "nothing") ++ ++(define_insn_reservation "z13_8" 8 ++ (and (eq_attr "cpu" "z13") ++ (eq_attr "mnemonic" "wfddb,ddb,vfddb,ddbr")) "nothing") ++ ++(define_insn_reservation "z13_9" 9 ++ (and (eq_attr "cpu" "z13") ++ (eq_attr "mnemonic" "dsgr,wfsqdb,dsgfr,sqdb,sqdbr,vfsqdb")) "nothing") ++ ++(define_insn_reservation "z13_13" 13 ++ (and (eq_attr "cpu" "z13") ++ (eq_attr "mnemonic" "mxtr,ddtr")) "nothing") ++ ++(define_insn_reservation "z13_16" 16 ++ (and (eq_attr "cpu" "z13") ++ (eq_attr "mnemonic" "sqxbr")) "nothing") ++ ++(define_insn_reservation "z13_17" 17 ++ (and (eq_attr "cpu" "z13") ++ (eq_attr "mnemonic" "dxtr")) "nothing") ++ ++(define_insn_reservation "z13_20" 20 ++ (and (eq_attr "cpu" "z13") ++ (eq_attr "mnemonic" "dxbr,dlgr")) "nothing") ++ +--- gcc/config/s390/constraints.md 2013-01-21 16:11:50.000000000 +0100 ++++ gcc/config/s390/constraints.md 2016-05-11 18:40:20.880008612 +0200 +@@ -29,7 +29,15 @@ + ;; c -- Condition code register 33. + ;; d -- Any register from 0 to 15. + ;; f -- Floating point registers. ++;; j -- Multiple letter constraint for constant scalar and vector values ++;; j00: constant zero scalar or vector ++;; jm1: constant scalar or vector with all bits set ++;; jxx: contiguous bitmask of 0 or 1 in all vector elements ++;; jyy: constant consisting of byte chunks being either 0 or 0xff ++;; jKK: constant vector with all elements having the same value and ++;; matching K constraint + ;; t -- Access registers 36 and 37. ++;; v -- Vector registers v0-v31. + ;; C -- A signed 8-bit constant (-128..127) + ;; D -- An unsigned 16-bit constant (0..65535) + ;; G -- Const double zero operand +@@ -109,6 +117,11 @@ + Access registers 36 and 37") + + ++(define_register_constraint "v" ++ "VEC_REGS" ++ "Vector registers v0-v31") ++ ++ + ;; + ;; General constraints for constants. + ;; +@@ -374,6 +387,33 @@ + (match_test "s390_O_constraint_str ('n', ival)"))) + + ++;; ++;; Vector constraints follow. ++;; ++ ++(define_constraint "j00" ++ "Zero scalar or vector constant" ++ (match_test "op == CONST0_RTX (GET_MODE (op))")) ++ ++(define_constraint "jm1" ++ "All one bit scalar or vector constant" ++ (match_test "op == CONSTM1_RTX (GET_MODE (op))")) ++ ++(define_constraint "jxx" ++ "@internal" ++ (and (match_code "const_vector") ++ (match_test "s390_contiguous_bitmask_vector_p (op, NULL, NULL)"))) ++ ++(define_constraint "jyy" ++ "@internal" ++ (and (match_code "const_vector") ++ (match_test "s390_bytemask_vector_p (op, NULL)"))) ++ ++(define_constraint "jKK" ++ "@internal" ++ (and (and (match_code "const_vector") ++ (match_test "s390_const_vec_duplicate_p (op)")) ++ (match_test "satisfies_constraint_K (XVECEXP (op, 0, 0))"))) + + + ;; +--- gcc/config/s390/predicates.md 2013-08-14 13:55:12.000000000 +0200 ++++ gcc/config/s390/predicates.md 2016-05-11 18:17:42.508662564 +0200 +@@ -24,16 +24,26 @@ + + ;; operands -------------------------------------------------------------- + +-;; Return true if OP a (const_int 0) operand. +- ++;; Return true if OP a const 0 operand (int/float/vector). + (define_predicate "const0_operand" +- (and (match_code "const_int, const_double") ++ (and (match_code "const_int,const_double,const_vector") + (match_test "op == CONST0_RTX (mode)"))) + ++;; Return true if OP an all ones operand (int/vector). ++(define_predicate "all_ones_operand" ++ (and (match_code "const_int, const_double, const_vector") ++ (match_test "INTEGRAL_MODE_P (GET_MODE (op))") ++ (match_test "op == CONSTM1_RTX (mode)"))) ++ ++;; Return true if OP is a 4 bit mask operand ++(define_predicate "const_mask_operand" ++ (and (match_code "const_int") ++ (match_test "UINTVAL (op) < 16"))) ++ + ;; Return true if OP is constant. + + (define_special_predicate "consttable_operand" +- (and (match_code "symbol_ref, label_ref, const, const_int, const_double") ++ (and (match_code "symbol_ref, label_ref, const, const_int, const_double, const_vector") + (match_test "CONSTANT_P (op)"))) + + ;; Return true if OP is a valid S-type operand. +--- gcc/config/s390/s390-builtins.def 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/config/s390/s390-builtins.def 2016-05-11 17:53:57.000000000 +0200 +@@ -0,0 +1,2488 @@ ++/* Builtin definitions for IBM S/390 and zSeries ++ Copyright (C) 2015 Free Software Foundation, Inc. ++ ++ Contributed by Andreas Krebbel (Andreas.Krebbel@de.ibm.com). ++ ++ This file is part of GCC. ++ ++ GCC is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ GCC is distributed in the hope that it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++ License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with GCC; see the file COPYING3. If not see ++ . */ ++ ++#undef O_U1 ++#undef O_U2 ++#undef O_U3 ++#undef O_U4 ++#undef O_U5 ++#undef O_U8 ++#undef O_U12 ++#undef O_U16 ++#undef O_U32 ++ ++#undef O_S2 ++#undef O_S3 ++#undef O_S4 ++#undef O_S5 ++#undef O_S8 ++#undef O_S12 ++#undef O_S16 ++#undef O_S32 ++#undef O_ELEM ++#undef O_LIT ++ ++#undef O1_U1 ++#undef O2_U1 ++#undef O3_U1 ++#undef O4_U1 ++ ++#undef O1_U2 ++#undef O2_U2 ++#undef O3_U2 ++#undef O4_U2 ++ ++#undef O1_U3 ++#undef O2_U3 ++#undef O3_U3 ++#undef O4_U3 ++ ++#undef O1_U4 ++#undef O2_U4 ++#undef O3_U4 ++#undef O4_U4 ++ ++#undef O1_U5 ++#undef O2_U5 ++#undef O3_U5 ++#undef O4_U5 ++ ++#undef O1_U8 ++#undef O2_U8 ++#undef O3_U8 ++#undef O4_U8 ++ ++#undef O1_U12 ++#undef O2_U12 ++#undef O3_U12 ++#undef O4_U12 ++ ++#undef O1_U16 ++#undef O2_U16 ++#undef O3_U16 ++#undef O4_U16 ++ ++#undef O1_U32 ++#undef O2_U32 ++#undef O3_U32 ++#undef O4_U32 ++ ++#undef O1_S2 ++#undef O2_S2 ++#undef O3_S2 ++#undef O4_S2 ++ ++#undef O1_S3 ++#undef O2_S3 ++#undef O3_S3 ++#undef O4_S3 ++ ++#undef O1_S4 ++#undef O2_S4 ++#undef O3_S4 ++#undef O4_S4 ++ ++#undef O1_S5 ++#undef O2_S5 ++#undef O3_S5 ++#undef O4_S5 ++ ++#undef O1_S8 ++#undef O2_S8 ++#undef O3_S8 ++#undef O4_S8 ++ ++#undef O1_S12 ++#undef O2_S12 ++#undef O3_S12 ++#undef O4_S12 ++ ++#undef O1_S16 ++#undef O2_S16 ++#undef O3_S16 ++#undef O4_S16 ++ ++#undef O1_S32 ++#undef O2_S32 ++#undef O3_S32 ++#undef O4_S32 ++ ++#undef O1_ELEM ++#undef O2_ELEM ++#undef O3_ELEM ++#undef O4_ELEM ++ ++#undef O1_LIT ++#undef O2_LIT ++#undef O3_LIT ++#undef O4_LIT ++ ++#undef O_SHIFT ++#undef O_IMM_P ++#undef O_UIMM_P ++#undef O_SIMM_P ++ ++#define O_U1 1 /* unsigned 1 bit literal */ ++#define O_U2 2 /* unsigned 2 bit literal */ ++#define O_U3 3 /* unsigned 3 bit literal */ ++#define O_U4 4 /* unsigned 4 bit literal */ ++#define O_U5 5 /* unsigned 5 bit literal */ ++#define O_U8 6 /* unsigned 8 bit literal */ ++#define O_U12 7 /* unsigned 16 bit literal */ ++#define O_U16 8 /* unsigned 16 bit literal */ ++#define O_U32 9 /* unsigned 32 bit literal */ ++ ++#define O_S2 10 /* signed 2 bit literal */ ++#define O_S3 11 /* signed 3 bit literal */ ++#define O_S4 12 /* signed 4 bit literal */ ++#define O_S5 13 /* signed 5 bit literal */ ++#define O_S8 14 /* signed 8 bit literal */ ++#define O_S12 15 /* signed 12 bit literal */ ++#define O_S16 16 /* signed 16 bit literal */ ++#define O_S32 17 /* signed 32 bit literal */ ++ ++#define O_ELEM 18 /* Element selector requiring modulo arithmetic. */ ++#define O_LIT 19 /* Operand must be a literal fitting the target type. */ ++ ++#define O_SHIFT 5 ++ ++#define O_UIMM_P(X) ((X) >= O_U1 && (X) <= O_U32) ++#define O_SIMM_P(X) ((X) >= O_S2 && (X) <= O_S32) ++#define O_IMM_P(X) ((X) == O_LIT || ((X) >= O_U1 && (X) <= O_S32)) ++ ++#define O1_U1 O_U1 ++#define O2_U1 (O_U1 << O_SHIFT) ++#define O3_U1 (O_U1 << (2 * O_SHIFT)) ++#define O4_U1 (O_U1 << (3 * O_SHIFT)) ++ ++#define O1_U2 O_U2 ++#define O2_U2 (O_U2 << O_SHIFT) ++#define O3_U2 (O_U2 << (2 * O_SHIFT)) ++#define O4_U2 (O_U2 << (3 * O_SHIFT)) ++ ++#define O1_U3 O_U3 ++#define O2_U3 (O_U3 << O_SHIFT) ++#define O3_U3 (O_U3 << (2 * O_SHIFT)) ++#define O4_U3 (O_U3 << (3 * O_SHIFT)) ++ ++#define O1_U4 O_U4 ++#define O2_U4 (O_U4 << O_SHIFT) ++#define O3_U4 (O_U4 << (2 * O_SHIFT)) ++#define O4_U4 (O_U4 << (3 * O_SHIFT)) ++ ++#define O1_U5 O_U5 ++#define O2_U5 (O_U5 << O_SHIFT) ++#define O3_U5 (O_U5 << (2 * O_SHIFT)) ++#define O4_U5 (O_U5 << (3 * O_SHIFT)) ++ ++#define O1_U8 O_U8 ++#define O2_U8 (O_U8 << O_SHIFT) ++#define O3_U8 (O_U8 << (2 * O_SHIFT)) ++#define O4_U8 (O_U8 << (3 * O_SHIFT)) ++ ++#define O1_U12 O_U12 ++#define O2_U12 (O_U12 << O_SHIFT) ++#define O3_U12 (O_U12 << (2 * O_SHIFT)) ++#define O4_U12 (O_U12 << (3 * O_SHIFT)) ++ ++#define O1_U16 O_U16 ++#define O2_U16 (O_U16 << O_SHIFT) ++#define O3_U16 (O_U16 << (2 * O_SHIFT)) ++#define O4_U16 (O_U16 << (3 * O_SHIFT)) ++ ++#define O1_U32 O_U32 ++#define O2_U32 (O_U32 << O_SHIFT) ++#define O3_U32 (O_U32 << (2 * O_SHIFT)) ++#define O4_U32 (O_U32 << (3 * O_SHIFT)) ++ ++ ++#define O1_S2 O_S2 ++#define O2_S2 (O_S2 << O_SHIFT) ++#define O3_S2 (O_S2 << (2 * O_SHIFT)) ++#define O4_S2 (O_S2 << (3 * O_SHIFT)) ++ ++#define O1_S3 O_S3 ++#define O2_S3 (O_S3 << O_SHIFT) ++#define O3_S3 (O_S3 << (2 * O_SHIFT)) ++#define O4_S3 (O_S3 << (3 * O_SHIFT)) ++ ++#define O1_S4 O_S4 ++#define O2_S4 (O_S4 << O_SHIFT) ++#define O3_S4 (O_S4 << (2 * O_SHIFT)) ++#define O4_S4 (O_S4 << (3 * O_SHIFT)) ++ ++#define O1_S5 O_S5 ++#define O2_S5 (O_S5 << O_SHIFT) ++#define O3_S5 (O_S5 << (2 * O_SHIFT)) ++#define O4_S5 (O_S5 << (3 * O_SHIFT)) ++ ++#define O1_S8 O_S8 ++#define O2_S8 (O_S8 << O_SHIFT) ++#define O3_S8 (O_S8 << (2 * O_SHIFT)) ++#define O4_S8 (O_S8 << (3 * O_SHIFT)) ++ ++#define O1_S12 O_S12 ++#define O2_S12 (O_S12 << O_SHIFT) ++#define O3_S12 (O_S12 << (2 * O_SHIFT)) ++#define O4_S12 (O_S12 << (3 * O_SHIFT)) ++ ++#define O1_S16 O_S16 ++#define O2_S16 (O_S16 << O_SHIFT) ++#define O3_S16 (O_S16 << (2 * O_SHIFT)) ++#define O4_S16 (O_S16 << (3 * O_SHIFT)) ++ ++#define O1_S32 O_S32 ++#define O2_S32 (O_S32 << O_SHIFT) ++#define O3_S32 (O_S32 << (2 * O_SHIFT)) ++#define O4_S32 (O_S32 << (3 * O_SHIFT)) ++ ++#define O1_ELEM O_ELEM ++#define O2_ELEM (O_ELEM << O_SHIFT) ++#define O3_ELEM (O_ELEM << (2 * O_SHIFT)) ++#define O4_ELEM (O_ELEM << (3 * O_SHIFT)) ++ ++#define O1_LIT O_LIT ++#define O2_LIT (O_LIT << O_SHIFT) ++#define O3_LIT (O_LIT << (2 * O_SHIFT)) ++#define O4_LIT (O_LIT << (3 * O_SHIFT)) ++ ++ ++/* Builtin flags. Flags applying to the whole builtin definition. */ ++ ++#undef B_INT ++#undef B_HTM ++#undef B_VX ++ ++#undef BFLAGS_MASK_INIT ++#define BFLAGS_MASK_INIT (B_INT) ++ ++#define B_INT (1 << 0) /* Internal builtins. This builtin cannot be used in user programs. */ ++#define B_HTM (1 << 1) /* Builtins requiring the transactional execution facility. */ ++#define B_VX (1 << 2) /* Builtins requiring the z13 vector extensions. */ ++ ++ ++/* B_DEF defines a standard (not overloaded) builtin ++ B_DEF (, , , , , ) ++ ++ OB_DEF defines an overloaded builtin ++ OB_DEF (, , , , ) ++ ++ OB_DEF_VAR defines a variant of an overloaded builtin ++ OB_DEF_VAR (, , , ) */ ++ ++ ++B_DEF (tbeginc, tbeginc, 0, B_HTM, 0, BT_FN_INT) ++B_DEF (tbegin, tbegin, returns_twice_attr, B_HTM, 0, BT_FN_INT_VOIDPTR) ++B_DEF (tbegin_nofloat, tbegin_nofloat, returns_twice_attr, B_HTM, 0, BT_FN_INT_VOIDPTR) ++B_DEF (tbegin_retry, tbegin_retry, returns_twice_attr, B_HTM, 0, BT_FN_INT_VOIDPTR_INT) ++B_DEF (tbegin_retry_nofloat, tbegin_retry_nofloat,returns_twice_attr,B_HTM, 0, BT_FN_INT_VOIDPTR_INT) ++B_DEF (tend, tend, 0, B_HTM, 0, BT_FN_INT) ++B_DEF (tabort, tabort, noreturn_attr, B_HTM, 0, BT_FN_VOID_INT) ++B_DEF (tx_nesting_depth, etnd, 0, B_HTM, 0, BT_FN_INT) ++B_DEF (non_tx_store, ntstg, 0, B_HTM, 0, BT_FN_VOID_UINT64PTR_UINT64) ++B_DEF (tx_assist, tx_assist, 0, B_HTM, 0, BT_FN_VOID_INT) ++B_DEF (s390_sfpc, sfpc, 0, 0, 0, BT_FN_VOID_UINT) ++B_DEF (s390_efpc, efpc, 0, 0, 0, BT_FN_UINT) ++B_DEF (s390_lcbb, lcbb, 0, B_VX, O2_U4, BT_FN_UINT_VOIDCONSTPTR_INT) ++ ++OB_DEF (s390_vec_step, MAX, MAX, B_VX, BT_FN_INT_INT) ++ ++OB_DEF (s390_vec_gather_element, s390_vec_gather_element_s32,s390_vec_gather_element_dbl,B_VX,BT_FN_OV4SI_OV4SI_OUV4SI_INTCONSTPTR_UCHAR) ++OB_DEF_VAR (s390_vec_gather_element_s32,s390_vgef, O4_U2, BT_OV_V4SI_V4SI_UV4SI_INTCONSTPTR_UCHAR) ++OB_DEF_VAR (s390_vec_gather_element_b32,s390_vgef, O4_U2, BT_OV_BV4SI_BV4SI_UV4SI_UINTCONSTPTR_UCHAR) ++OB_DEF_VAR (s390_vec_gather_element_u32,s390_vgef, O4_U2, BT_OV_UV4SI_UV4SI_UV4SI_UINTCONSTPTR_UCHAR) ++OB_DEF_VAR (s390_vec_gather_element_s64,s390_vgeg, O4_U1, BT_OV_V2DI_V2DI_UV2DI_LONGLONGCONSTPTR_UCHAR) ++OB_DEF_VAR (s390_vec_gather_element_b64,s390_vgeg, O4_U1, BT_OV_BV2DI_BV2DI_UV2DI_ULONGLONGCONSTPTR_UCHAR) ++OB_DEF_VAR (s390_vec_gather_element_u64,s390_vgeg, O4_U1, BT_OV_UV2DI_UV2DI_UV2DI_ULONGLONGCONSTPTR_UCHAR) ++OB_DEF_VAR (s390_vec_gather_element_dbl,s390_vgeg, O4_U1, BT_OV_V2DF_V2DF_UV2DI_DBLCONSTPTR_UCHAR) ++ ++B_DEF (s390_vgef, vec_gather_elementv4si,0, B_VX, O4_U2, BT_FN_UV4SI_UV4SI_UV4SI_UINTCONSTPTR_UCHAR) ++B_DEF (s390_vgeg, vec_gather_elementv2di,0, B_VX, O4_U1, BT_FN_UV2DI_UV2DI_UV2DI_ULONGLONGCONSTPTR_UCHAR) ++B_DEF (s390_vgbm, vec_genbytemaskv16qi,0, B_VX, O1_U16, BT_FN_UV16QI_USHORT) ++B_DEF (s390_vgmb, vec_genmaskv16qi, 0, B_VX, O1_U8 | O2_U8, BT_FN_UV16QI_UCHAR_UCHAR) ++B_DEF (s390_vgmh, vec_genmaskv8hi, 0, B_VX, O1_U8 | O2_U8, BT_FN_UV8HI_UCHAR_UCHAR) ++B_DEF (s390_vgmf, vec_genmaskv4si, 0, B_VX, O1_U8 | O2_U8, BT_FN_UV4SI_UCHAR_UCHAR) ++B_DEF (s390_vgmg, vec_genmaskv2di, 0, B_VX, O1_U8 | O2_U8, BT_FN_UV2DI_UCHAR_UCHAR) ++ ++OB_DEF (s390_vec_xld2, s390_vec_xld2_s8, s390_vec_xld2_dbl, B_VX, BT_FN_V4SI_INT_VOIDPTR) ++OB_DEF_VAR (s390_vec_xld2_s8, MAX, O1_LIT, BT_OV_V16QI_LONG_SCHARPTR) /* vl */ ++OB_DEF_VAR (s390_vec_xld2_u8, MAX, O1_LIT, BT_OV_UV16QI_LONG_UCHARPTR) /* vl */ ++OB_DEF_VAR (s390_vec_xld2_s16, MAX, O1_LIT, BT_OV_V8HI_LONG_SHORTPTR) /* vl */ ++OB_DEF_VAR (s390_vec_xld2_u16, MAX, O1_LIT, BT_OV_UV8HI_LONG_USHORTPTR) /* vl */ ++OB_DEF_VAR (s390_vec_xld2_s32, MAX, O1_LIT, BT_OV_V4SI_LONG_INTPTR) /* vl */ ++OB_DEF_VAR (s390_vec_xld2_u32, MAX, O1_LIT, BT_OV_UV4SI_LONG_UINTPTR) /* vl */ ++OB_DEF_VAR (s390_vec_xld2_s64, MAX, O1_LIT, BT_OV_V2DI_LONG_LONGLONGPTR) /* vl */ ++OB_DEF_VAR (s390_vec_xld2_u64, MAX, O1_LIT, BT_OV_UV2DI_LONG_ULONGLONGPTR) /* vl */ ++OB_DEF_VAR (s390_vec_xld2_dbl, MAX, O1_LIT, BT_OV_V2DF_LONG_DBLPTR) /* vl */ ++ ++OB_DEF (s390_vec_xlw4, s390_vec_xlw4_s8, s390_vec_xlw4_u32, B_VX, BT_FN_V4SI_INT_VOIDPTR) ++OB_DEF_VAR (s390_vec_xlw4_s8, MAX, O1_LIT, BT_OV_V16QI_LONG_SCHARPTR) /* vl */ ++OB_DEF_VAR (s390_vec_xlw4_u8, MAX, O1_LIT, BT_OV_UV16QI_LONG_UCHARPTR) /* vl */ ++OB_DEF_VAR (s390_vec_xlw4_s16, MAX, O1_LIT, BT_OV_V8HI_LONG_SHORTPTR) /* vl */ ++OB_DEF_VAR (s390_vec_xlw4_u16, MAX, O1_LIT, BT_OV_UV8HI_LONG_USHORTPTR) /* vl */ ++OB_DEF_VAR (s390_vec_xlw4_s32, MAX, O1_LIT, BT_OV_V4SI_LONG_INTPTR) /* vl */ ++OB_DEF_VAR (s390_vec_xlw4_u32, MAX, O1_LIT, BT_OV_UV4SI_LONG_UINTPTR) /* vl */ ++ ++OB_DEF (s390_vec_splats, s390_vec_splats_s8, s390_vec_splats_dbl,B_VX, BT_FN_OV4SI_INT) ++OB_DEF_VAR (s390_vec_splats_s8, s390_vlrepb, 0, BT_OV_V16QI_SCHAR) ++OB_DEF_VAR (s390_vec_splats_u8, s390_vlrepb, 0, BT_OV_UV16QI_UCHAR) ++OB_DEF_VAR (s390_vec_splats_s16, s390_vlreph, 0, BT_OV_V8HI_SHORT) ++OB_DEF_VAR (s390_vec_splats_u16, s390_vlreph, 0, BT_OV_UV8HI_USHORT) ++OB_DEF_VAR (s390_vec_splats_s32, s390_vlrepf, 0, BT_OV_V4SI_INT) ++OB_DEF_VAR (s390_vec_splats_u32, s390_vlrepf, 0, BT_OV_UV4SI_UINT) ++OB_DEF_VAR (s390_vec_splats_s64, s390_vlrepg, 0, BT_OV_V2DI_LONGLONG) ++OB_DEF_VAR (s390_vec_splats_u64, s390_vlrepg, 0, BT_OV_UV2DI_ULONGLONG) ++OB_DEF_VAR (s390_vec_splats_dbl, s390_vlrepg_dbl, 0, BT_OV_V2DF_DBL) /* vlrepg */ ++ ++B_DEF (s390_vlrepb, vec_splatsv16qi, 0, B_VX, 0, BT_FN_UV16QI_UCHAR) ++B_DEF (s390_vlreph, vec_splatsv8hi, 0, B_VX, 0, BT_FN_UV8HI_USHORT) ++B_DEF (s390_vlrepf, vec_splatsv4si, 0, B_VX, 0, BT_FN_UV4SI_UINT) ++B_DEF (s390_vlrepg, vec_splatsv2di, 0, B_VX, 0, BT_FN_UV2DI_ULONGLONG) ++B_DEF (s390_vlrepg_dbl, vec_splatsv2df, 0, B_VX | B_INT, 0, BT_FN_V2DF_DBL) ++B_DEF (s390_vrepib, vec_splatsv16qi, 0, B_VX, O1_U8, BT_FN_V16QI_UCHAR) ++B_DEF (s390_vrepih, vec_splatsv8hi, 0, B_VX, O1_S16, BT_FN_V8HI_SHORT) ++B_DEF (s390_vrepif, vec_splatsv4si, 0, B_VX, O1_S16, BT_FN_V4SI_SHORT) ++B_DEF (s390_vrepig, vec_splatsv2di, 0, B_VX, O1_S16, BT_FN_V2DI_SHORT) ++ ++B_DEF (s390_vec_splat_u8, vec_splatsv16qi, 0, B_VX, O1_U8, BT_FN_UV16QI_UCHAR) ++B_DEF (s390_vec_splat_s8, vec_splatsv16qi, 0, B_VX, O1_S8, BT_FN_V16QI_SCHAR) ++B_DEF (s390_vec_splat_u16, vec_splatsv8hi, 0, B_VX, O1_U16, BT_FN_UV8HI_USHORT) ++B_DEF (s390_vec_splat_s16, vec_splatsv8hi, 0, B_VX, O1_S16, BT_FN_V8HI_SHORT) ++B_DEF (s390_vec_splat_u32, vec_splatsv4si, 0, B_VX, O1_U16, BT_FN_UV4SI_USHORT) ++B_DEF (s390_vec_splat_s32, vec_splatsv4si, 0, B_VX, O1_S16, BT_FN_V4SI_SHORT) ++B_DEF (s390_vec_splat_u64, vec_splatsv2di, 0, B_VX, O1_U16, BT_FN_UV2DI_USHORT) ++B_DEF (s390_vec_splat_s64, vec_splatsv2di, 0, B_VX, O1_S16, BT_FN_V2DI_SHORT) ++ ++OB_DEF (s390_vec_insert, s390_vec_insert_s8, s390_vec_insert_dbl,B_VX, BT_FN_OV4SI_INT_OV4SI_INT) ++OB_DEF_VAR (s390_vec_insert_s8, s390_vlvgb, O3_ELEM, BT_OV_V16QI_SCHAR_V16QI_INT) ++OB_DEF_VAR (s390_vec_insert_u8, s390_vlvgb, O3_ELEM, BT_OV_UV16QI_UCHAR_UV16QI_INT) ++OB_DEF_VAR (s390_vec_insert_b8, s390_vlvgb, O3_ELEM, BT_OV_UV16QI_UCHAR_BV16QI_INT) ++OB_DEF_VAR (s390_vec_insert_s16, s390_vlvgh, O3_ELEM, BT_OV_V8HI_SHORT_V8HI_INT) ++OB_DEF_VAR (s390_vec_insert_u16, s390_vlvgh, O3_ELEM, BT_OV_UV8HI_USHORT_UV8HI_INT) ++OB_DEF_VAR (s390_vec_insert_b16, s390_vlvgh, O3_ELEM, BT_OV_UV8HI_USHORT_BV8HI_INT) ++OB_DEF_VAR (s390_vec_insert_s32, s390_vlvgf, O3_ELEM, BT_OV_V4SI_INT_V4SI_INT) ++OB_DEF_VAR (s390_vec_insert_u32, s390_vlvgf, O3_ELEM, BT_OV_UV4SI_UINT_UV4SI_INT) ++OB_DEF_VAR (s390_vec_insert_b32, s390_vlvgf, O3_ELEM, BT_OV_UV4SI_UINT_BV4SI_INT) ++OB_DEF_VAR (s390_vec_insert_s64, s390_vlvgg, O3_ELEM, BT_OV_V2DI_LONGLONG_V2DI_INT) ++OB_DEF_VAR (s390_vec_insert_u64, s390_vlvgg, O3_ELEM, BT_OV_UV2DI_ULONGLONG_UV2DI_INT) ++OB_DEF_VAR (s390_vec_insert_b64, s390_vlvgg, O3_ELEM, BT_OV_UV2DI_ULONGLONG_BV2DI_INT) ++OB_DEF_VAR (s390_vec_insert_dbl, s390_vlvgg_dbl, O3_ELEM, BT_OV_V2DF_DBL_V2DF_INT) ++ ++B_DEF (s390_vlvgb, vec_insertv16qi, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UCHAR_INT) ++B_DEF (s390_vlvgh, vec_insertv8hi, 0, B_VX, 0, BT_FN_UV8HI_UV8HI_USHORT_INT) ++B_DEF (s390_vlvgf, vec_insertv4si, 0, B_VX, 0, BT_FN_UV4SI_UV4SI_UINT_INT) ++B_DEF (s390_vlvgg, vec_insertv2di, 0, B_VX, 0, BT_FN_UV2DI_UV2DI_ULONGLONG_INT) ++B_DEF (s390_vlvgg_dbl, vec_insertv2df, 0, B_VX | B_INT, 0, BT_FN_V2DF_V2DF_DBL_INT) ++ ++OB_DEF (s390_vec_promote, s390_vec_promote_s8,s390_vec_promote_dbl,B_VX, BT_FN_OV4SI_INT_INT) ++OB_DEF_VAR (s390_vec_promote_s8, s390_vlvgb_noin, O2_ELEM, BT_OV_V16QI_SCHAR_INT) /* vlvgb */ ++OB_DEF_VAR (s390_vec_promote_u8, s390_vlvgb_noin, O2_ELEM, BT_OV_UV16QI_UCHAR_INT) /* vlvgb */ ++OB_DEF_VAR (s390_vec_promote_s16, s390_vlvgh_noin, O2_ELEM, BT_OV_V8HI_SHORT_INT) /* vlvgh */ ++OB_DEF_VAR (s390_vec_promote_u16, s390_vlvgh_noin, O2_ELEM, BT_OV_UV8HI_USHORT_INT) /* vlvgh */ ++OB_DEF_VAR (s390_vec_promote_s32, s390_vlvgf_noin, O2_ELEM, BT_OV_V4SI_INT_INT) /* vlvgf */ ++OB_DEF_VAR (s390_vec_promote_u32, s390_vlvgf_noin, O2_ELEM, BT_OV_UV4SI_UINT_INT) /* vlvgf */ ++OB_DEF_VAR (s390_vec_promote_s64, s390_vlvgg_noin, O2_ELEM, BT_OV_V2DI_LONGLONG_INT) /* vlvgg */ ++OB_DEF_VAR (s390_vec_promote_u64, s390_vlvgg_noin, O2_ELEM, BT_OV_UV2DI_ULONGLONG_INT) /* vlvgg */ ++OB_DEF_VAR (s390_vec_promote_dbl, s390_vlvgg_dbl_noin,O2_ELEM, BT_OV_V2DF_DBL_INT) /* vlvgg */ ++ ++B_DEF (s390_vlvgb_noin, vec_promotev16qi, 0, B_VX | B_INT, 0, BT_FN_UV16QI_UCHAR_INT) ++B_DEF (s390_vlvgh_noin, vec_promotev8hi, 0, B_VX | B_INT, 0, BT_FN_UV8HI_USHORT_INT) ++B_DEF (s390_vlvgf_noin, vec_promotev4si, 0, B_VX | B_INT, 0, BT_FN_UV4SI_UINT_INT) ++B_DEF (s390_vlvgg_noin, vec_promotev2di, 0, B_VX | B_INT, 0, BT_FN_UV2DI_ULONGLONG_INT) ++B_DEF (s390_vlvgg_dbl_noin, vec_promotev2df, 0, B_VX | B_INT, 0, BT_FN_V2DF_DBL_INT) ++ ++OB_DEF (s390_vec_extract, s390_vec_extract_s8,s390_vec_extract_dbl,B_VX, BT_FN_INT_OV4SI_INT) ++OB_DEF_VAR (s390_vec_extract_s8, s390_vlgvb, O2_ELEM, BT_OV_SCHAR_V16QI_INT) ++OB_DEF_VAR (s390_vec_extract_u8, s390_vlgvb, O2_ELEM, BT_OV_UCHAR_UV16QI_INT) ++OB_DEF_VAR (s390_vec_extract_b8, s390_vlgvb, O2_ELEM, BT_OV_UCHAR_BV16QI_INT) ++OB_DEF_VAR (s390_vec_extract_s16, s390_vlgvh, O2_ELEM, BT_OV_SHORT_V8HI_INT) ++OB_DEF_VAR (s390_vec_extract_u16, s390_vlgvh, O2_ELEM, BT_OV_USHORT_UV8HI_INT) ++OB_DEF_VAR (s390_vec_extract_b16, s390_vlgvh, O2_ELEM, BT_OV_USHORT_BV8HI_INT) ++OB_DEF_VAR (s390_vec_extract_s32, s390_vlgvf, O2_ELEM, BT_OV_INT_V4SI_INT) ++OB_DEF_VAR (s390_vec_extract_u32, s390_vlgvf, O2_ELEM, BT_OV_UINT_UV4SI_INT) ++OB_DEF_VAR (s390_vec_extract_b32, s390_vlgvf, O2_ELEM, BT_OV_UINT_BV4SI_INT) ++OB_DEF_VAR (s390_vec_extract_s64, s390_vlgvg, O2_ELEM, BT_OV_LONGLONG_V2DI_INT) ++OB_DEF_VAR (s390_vec_extract_u64, s390_vlgvg, O2_ELEM, BT_OV_ULONGLONG_UV2DI_INT) ++OB_DEF_VAR (s390_vec_extract_b64, s390_vlgvg, O2_ELEM, BT_OV_ULONGLONG_BV2DI_INT) ++OB_DEF_VAR (s390_vec_extract_dbl, s390_vlgvg_dbl, O2_ELEM, BT_OV_DBL_V2DF_INT) /* vlgvg */ ++ ++B_DEF (s390_vlgvb, vec_extractv16qi, 0, B_VX, 0, BT_FN_UCHAR_UV16QI_INT) ++B_DEF (s390_vlgvh, vec_extractv8hi, 0, B_VX, 0, BT_FN_USHORT_UV8HI_INT) ++B_DEF (s390_vlgvf, vec_extractv4si, 0, B_VX, 0, BT_FN_UINT_UV4SI_INT) ++B_DEF (s390_vlgvg, vec_extractv2di, 0, B_VX, 0, BT_FN_ULONGLONG_UV2DI_INT) ++B_DEF (s390_vlgvg_dbl, vec_extractv2df, 0, B_VX | B_INT, 0, BT_FN_DBL_V2DF_INT) ++ ++OB_DEF (s390_vec_insert_and_zero, s390_vec_insert_and_zero_s8,s390_vec_insert_and_zero_dbl,B_VX,BT_FN_OV4SI_INTCONSTPTR) ++OB_DEF_VAR (s390_vec_insert_and_zero_s8,s390_vllezb, 0, BT_OV_V16QI_SCHARCONSTPTR) ++OB_DEF_VAR (s390_vec_insert_and_zero_u8,s390_vllezb, 0, BT_OV_UV16QI_UCHARCONSTPTR) ++OB_DEF_VAR (s390_vec_insert_and_zero_s16,s390_vllezh, 0, BT_OV_V8HI_SHORTCONSTPTR) ++OB_DEF_VAR (s390_vec_insert_and_zero_u16,s390_vllezh, 0, BT_OV_UV8HI_USHORTCONSTPTR) ++OB_DEF_VAR (s390_vec_insert_and_zero_s32,s390_vllezf, 0, BT_OV_V4SI_INTCONSTPTR) ++OB_DEF_VAR (s390_vec_insert_and_zero_u32,s390_vllezf, 0, BT_OV_UV4SI_UINTCONSTPTR) ++OB_DEF_VAR (s390_vec_insert_and_zero_s64,s390_vllezg, 0, BT_OV_V2DI_LONGLONGCONSTPTR) ++OB_DEF_VAR (s390_vec_insert_and_zero_u64,s390_vllezg, 0, BT_OV_UV2DI_ULONGLONGCONSTPTR) ++OB_DEF_VAR (s390_vec_insert_and_zero_dbl,s390_vllezg, 0, BT_OV_V2DF_DBLCONSTPTR) ++ ++B_DEF (s390_vllezb, vec_insert_and_zerov16qi,0, B_VX, 0, BT_FN_UV16QI_UCHARCONSTPTR) ++B_DEF (s390_vllezh, vec_insert_and_zerov8hi,0, B_VX, 0, BT_FN_UV8HI_USHORTCONSTPTR) ++B_DEF (s390_vllezf, vec_insert_and_zerov4si,0, B_VX, 0, BT_FN_UV4SI_UINTCONSTPTR) ++B_DEF (s390_vllezg, vec_insert_and_zerov2di,0, B_VX, 0, BT_FN_UV2DI_ULONGLONGCONSTPTR) ++ ++OB_DEF (s390_vec_load_bndry, s390_vec_load_bndry_s8,s390_vec_load_bndry_dbl,B_VX, BT_FN_OV4SI_INTCONSTPTR_INT) ++OB_DEF_VAR (s390_vec_load_bndry_s8, s390_vlbb, O2_U16, BT_OV_V16QI_SCHARCONSTPTR_USHORT) ++OB_DEF_VAR (s390_vec_load_bndry_u8, s390_vlbb, O2_U16, BT_OV_UV16QI_UCHARCONSTPTR_USHORT) ++OB_DEF_VAR (s390_vec_load_bndry_s16, s390_vlbb, O2_U16, BT_OV_V8HI_SHORTCONSTPTR_USHORT) ++OB_DEF_VAR (s390_vec_load_bndry_u16, s390_vlbb, O2_U16, BT_OV_UV8HI_USHORTCONSTPTR_USHORT) ++OB_DEF_VAR (s390_vec_load_bndry_s32, s390_vlbb, O2_U16, BT_OV_V4SI_INTCONSTPTR_USHORT) ++OB_DEF_VAR (s390_vec_load_bndry_u32, s390_vlbb, O2_U16, BT_OV_UV4SI_UINTCONSTPTR_USHORT) ++OB_DEF_VAR (s390_vec_load_bndry_s64, s390_vlbb, O2_U16, BT_OV_V2DI_LONGLONGCONSTPTR_USHORT) ++OB_DEF_VAR (s390_vec_load_bndry_u64, s390_vlbb, O2_U16, BT_OV_UV2DI_ULONGLONGCONSTPTR_USHORT) ++OB_DEF_VAR (s390_vec_load_bndry_dbl, s390_vlbb, O2_U16, BT_OV_V2DF_DBLCONSTPTR_USHORT) ++ ++B_DEF (s390_vlbb, vlbb, 0, B_VX, O2_U3, BT_FN_UV16QI_UCHARCONSTPTR_USHORT) ++ ++OB_DEF (s390_vec_load_pair, s390_vec_load_pair_s64,s390_vec_load_pair_u64,B_VX, BT_FN_OV2DI_LONGLONG_LONGLONG) ++OB_DEF_VAR (s390_vec_load_pair_s64, MAX, 0, BT_OV_V2DI_LONGLONG_LONGLONG) /* vlvgp */ ++OB_DEF_VAR (s390_vec_load_pair_u64, MAX, 0, BT_OV_UV2DI_ULONGLONG_ULONGLONG) /* vlvgp */ ++ ++OB_DEF (s390_vec_load_len, s390_vec_load_len_s8,s390_vec_load_len_dbl,B_VX, BT_FN_OV4SI_INTCONSTPTR_UINT) ++OB_DEF_VAR (s390_vec_load_len_s8, s390_vll, 0, BT_OV_V16QI_SCHARCONSTPTR_UINT) ++OB_DEF_VAR (s390_vec_load_len_u8, s390_vll, 0, BT_OV_UV16QI_UCHARCONSTPTR_UINT) ++OB_DEF_VAR (s390_vec_load_len_s16, s390_vll, 0, BT_OV_V8HI_SHORTCONSTPTR_UINT) ++OB_DEF_VAR (s390_vec_load_len_u16, s390_vll, 0, BT_OV_UV8HI_USHORTCONSTPTR_UINT) ++OB_DEF_VAR (s390_vec_load_len_s32, s390_vll, 0, BT_OV_V4SI_INTCONSTPTR_UINT) ++OB_DEF_VAR (s390_vec_load_len_u32, s390_vll, 0, BT_OV_UV4SI_UINTCONSTPTR_UINT) ++OB_DEF_VAR (s390_vec_load_len_s64, s390_vll, 0, BT_OV_V2DI_LONGLONGCONSTPTR_UINT) ++OB_DEF_VAR (s390_vec_load_len_u64, s390_vll, 0, BT_OV_UV2DI_ULONGLONGCONSTPTR_UINT) ++OB_DEF_VAR (s390_vec_load_len_dbl, s390_vll, 0, BT_OV_V2DF_DBLCONSTPTR_UINT) ++ ++B_DEF (s390_vll, vllv16qi, 0, B_VX, 0, BT_FN_V16QI_UINT_VOIDCONSTPTR) ++ ++OB_DEF (s390_vec_mergeh, s390_vec_mergeh_s8, s390_vec_mergeh_dbl,B_VX, BT_FN_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_mergeh_s8, s390_vmrhb, 0, BT_OV_V16QI_V16QI_V16QI) ++OB_DEF_VAR (s390_vec_mergeh_u8, s390_vmrhb, 0, BT_OV_UV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_mergeh_b8, s390_vmrhb, 0, BT_OV_BV16QI_BV16QI_BV16QI) ++OB_DEF_VAR (s390_vec_mergeh_s16, s390_vmrhh, 0, BT_OV_V8HI_V8HI_V8HI) ++OB_DEF_VAR (s390_vec_mergeh_u16, s390_vmrhh, 0, BT_OV_UV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_mergeh_b16, s390_vmrhh, 0, BT_OV_BV8HI_BV8HI_BV8HI) ++OB_DEF_VAR (s390_vec_mergeh_s32, s390_vmrhf, 0, BT_OV_V4SI_V4SI_V4SI) ++OB_DEF_VAR (s390_vec_mergeh_u32, s390_vmrhf, 0, BT_OV_UV4SI_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_mergeh_b32, s390_vmrhf, 0, BT_OV_BV4SI_BV4SI_BV4SI) ++OB_DEF_VAR (s390_vec_mergeh_s64, s390_vmrhg, 0, BT_OV_V2DI_V2DI_V2DI) ++OB_DEF_VAR (s390_vec_mergeh_u64, s390_vmrhg, 0, BT_OV_UV2DI_UV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_mergeh_b64, s390_vmrhg, 0, BT_OV_BV2DI_BV2DI_BV2DI) ++OB_DEF_VAR (s390_vec_mergeh_dbl, s390_vmrhg, 0, BT_OV_V2DF_V2DF_V2DF) ++ ++B_DEF (s390_vmrhb, vec_mergehv16qi, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI) ++B_DEF (s390_vmrhh, vec_mergehv8hi, 0, B_VX, 0, BT_FN_UV8HI_UV8HI_UV8HI) ++B_DEF (s390_vmrhf, vec_mergehv4si, 0, B_VX, 0, BT_FN_UV4SI_UV4SI_UV4SI) ++B_DEF (s390_vmrhg, vec_mergehv2di, 0, B_VX, 0, BT_FN_UV2DI_UV2DI_UV2DI) ++ ++OB_DEF (s390_vec_mergel, s390_vec_mergel_s8, s390_vec_mergel_dbl,B_VX, BT_FN_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_mergel_s8, s390_vmrlb, 0, BT_OV_V16QI_V16QI_V16QI) ++OB_DEF_VAR (s390_vec_mergel_u8, s390_vmrlb, 0, BT_OV_UV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_mergel_b8, s390_vmrlb, 0, BT_OV_BV16QI_BV16QI_BV16QI) ++OB_DEF_VAR (s390_vec_mergel_s16, s390_vmrlh, 0, BT_OV_V8HI_V8HI_V8HI) ++OB_DEF_VAR (s390_vec_mergel_u16, s390_vmrlh, 0, BT_OV_UV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_mergel_b16, s390_vmrlh, 0, BT_OV_BV8HI_BV8HI_BV8HI) ++OB_DEF_VAR (s390_vec_mergel_s32, s390_vmrlf, 0, BT_OV_V4SI_V4SI_V4SI) ++OB_DEF_VAR (s390_vec_mergel_u32, s390_vmrlf, 0, BT_OV_UV4SI_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_mergel_b32, s390_vmrlf, 0, BT_OV_BV4SI_BV4SI_BV4SI) ++OB_DEF_VAR (s390_vec_mergel_s64, s390_vmrlg, 0, BT_OV_V2DI_V2DI_V2DI) ++OB_DEF_VAR (s390_vec_mergel_u64, s390_vmrlg, 0, BT_OV_UV2DI_UV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_mergel_b64, s390_vmrlg, 0, BT_OV_BV2DI_BV2DI_BV2DI) ++OB_DEF_VAR (s390_vec_mergel_dbl, s390_vmrlg, 0, BT_OV_V2DF_V2DF_V2DF) ++ ++B_DEF (s390_vmrlb, vec_mergelv16qi, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI) ++B_DEF (s390_vmrlh, vec_mergelv8hi, 0, B_VX, 0, BT_FN_UV8HI_UV8HI_UV8HI) ++B_DEF (s390_vmrlf, vec_mergelv4si, 0, B_VX, 0, BT_FN_UV4SI_UV4SI_UV4SI) ++B_DEF (s390_vmrlg, vec_mergelv2di, 0, B_VX, 0, BT_FN_UV2DI_UV2DI_UV2DI) ++ ++OB_DEF (s390_vec_pack, s390_vec_pack_s16, s390_vec_pack_b64, B_VX, BT_FN_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_pack_s16, s390_vpkh, 0, BT_OV_V16QI_V8HI_V8HI) ++OB_DEF_VAR (s390_vec_pack_u16, s390_vpkh, 0, BT_OV_UV16QI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_pack_b16, s390_vpkh, 0, BT_OV_BV16QI_BV8HI_BV8HI) ++OB_DEF_VAR (s390_vec_pack_s32, s390_vpkf, 0, BT_OV_V8HI_V4SI_V4SI) ++OB_DEF_VAR (s390_vec_pack_u32, s390_vpkf, 0, BT_OV_UV8HI_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_pack_b32, s390_vpkf, 0, BT_OV_BV8HI_BV4SI_BV4SI) ++OB_DEF_VAR (s390_vec_pack_s64, s390_vpkg, 0, BT_OV_V4SI_V2DI_V2DI) ++OB_DEF_VAR (s390_vec_pack_u64, s390_vpkg, 0, BT_OV_UV4SI_UV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_pack_b64, s390_vpkg, 0, BT_OV_BV4SI_BV2DI_BV2DI) ++ ++B_DEF (s390_vpkh, vec_packv8hi, 0, B_VX, 0, BT_FN_UV16QI_UV8HI_UV8HI) ++B_DEF (s390_vpkf, vec_packv4si, 0, B_VX, 0, BT_FN_UV8HI_UV4SI_UV4SI) ++B_DEF (s390_vpkg, vec_packv2di, 0, B_VX, 0, BT_FN_UV4SI_UV2DI_UV2DI) ++ ++OB_DEF (s390_vec_packs, s390_vec_packs_s16, s390_vec_packs_u64, B_VX, BT_FN_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_packs_s16, s390_vpksh, 0, BT_OV_V16QI_V8HI_V8HI) ++OB_DEF_VAR (s390_vec_packs_u16, s390_vpklsh, 0, BT_OV_UV16QI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_packs_s32, s390_vpksf, 0, BT_OV_V8HI_V4SI_V4SI) ++OB_DEF_VAR (s390_vec_packs_u32, s390_vpklsf, 0, BT_OV_UV8HI_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_packs_s64, s390_vpksg, 0, BT_OV_V4SI_V2DI_V2DI) ++OB_DEF_VAR (s390_vec_packs_u64, s390_vpklsg, 0, BT_OV_UV4SI_UV2DI_UV2DI) ++ ++B_DEF (s390_vpksh, vec_packsv8hi, 0, B_VX, 0, BT_FN_V16QI_V8HI_V8HI) ++B_DEF (s390_vpklsh, vec_packsuv8hi, 0, B_VX, 0, BT_FN_UV16QI_UV8HI_UV8HI) ++B_DEF (s390_vpksf, vec_packsv4si, 0, B_VX, 0, BT_FN_V8HI_V4SI_V4SI) ++B_DEF (s390_vpklsf, vec_packsuv4si, 0, B_VX, 0, BT_FN_UV8HI_UV4SI_UV4SI) ++B_DEF (s390_vpksg, vec_packsv2di, 0, B_VX, 0, BT_FN_V4SI_V2DI_V2DI) ++B_DEF (s390_vpklsg, vec_packsuv2di, 0, B_VX, 0, BT_FN_UV4SI_UV2DI_UV2DI) ++ ++OB_DEF (s390_vec_packs_cc, s390_vec_packs_cc_s16,s390_vec_packs_cc_u64,B_VX, BT_FN_OV4SI_OV4SI_OV4SI_INTPTR) ++OB_DEF_VAR (s390_vec_packs_cc_s16, s390_vpkshs, 0, BT_OV_V16QI_V8HI_V8HI_INTPTR) ++OB_DEF_VAR (s390_vec_packs_cc_u16, s390_vpklshs, 0, BT_OV_UV16QI_UV8HI_UV8HI_INTPTR) ++OB_DEF_VAR (s390_vec_packs_cc_s32, s390_vpksfs, 0, BT_OV_V8HI_V4SI_V4SI_INTPTR) ++OB_DEF_VAR (s390_vec_packs_cc_u32, s390_vpklsfs, 0, BT_OV_UV8HI_UV4SI_UV4SI_INTPTR) ++OB_DEF_VAR (s390_vec_packs_cc_s64, s390_vpksgs, 0, BT_OV_V4SI_V2DI_V2DI_INTPTR) ++OB_DEF_VAR (s390_vec_packs_cc_u64, s390_vpklsgs, 0, BT_OV_UV4SI_UV2DI_UV2DI_INTPTR) ++ ++B_DEF (s390_vpkshs, vec_packs_ccv8hi, 0, B_VX, 0, BT_FN_V16QI_V8HI_V8HI_INTPTR) ++B_DEF (s390_vpklshs, vec_packsu_ccv8hi, 0, B_VX, 0, BT_FN_UV16QI_UV8HI_UV8HI_INTPTR) ++B_DEF (s390_vpksfs, vec_packs_ccv4si, 0, B_VX, 0, BT_FN_V8HI_V4SI_V4SI_INTPTR) ++B_DEF (s390_vpklsfs, vec_packsu_ccv4si, 0, B_VX, 0, BT_FN_UV8HI_UV4SI_UV4SI_INTPTR) ++B_DEF (s390_vpksgs, vec_packs_ccv2di, 0, B_VX, 0, BT_FN_V4SI_V2DI_V2DI_INTPTR) ++B_DEF (s390_vpklsgs, vec_packsu_ccv2di, 0, B_VX, 0, BT_FN_UV4SI_UV2DI_UV2DI_INTPTR) ++ ++OB_DEF (s390_vec_packsu, s390_vec_packsu_s16,s390_vec_packsu_u64,B_VX, BT_FN_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_packsu_s16, s390_vec_packsu_u16,0, BT_OV_UV16QI_V8HI_V8HI) /* vpklsh */ ++OB_DEF_VAR (s390_vec_packsu_u16, s390_vpklsh, 0, BT_OV_UV16QI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_packsu_s32, s390_vec_packsu_u32,0, BT_OV_UV8HI_V4SI_V4SI) /* vpklsf */ ++OB_DEF_VAR (s390_vec_packsu_u32, s390_vpklsf, 0, BT_OV_UV8HI_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_packsu_s64, s390_vec_packsu_u64,0, BT_OV_UV4SI_V2DI_V2DI) /* vpklsg */ ++OB_DEF_VAR (s390_vec_packsu_u64, s390_vpklsg, 0, BT_OV_UV4SI_UV2DI_UV2DI) ++ ++B_DEF (s390_vec_packsu_u16, vec_packsu_uv8hi, 0, B_VX | B_INT, 0, BT_FN_UV16QI_UV8HI_UV8HI) /* vpklsh */ ++B_DEF (s390_vec_packsu_u32, vec_packsu_uv4si, 0, B_VX | B_INT, 0, BT_FN_UV8HI_UV4SI_UV4SI) /* vpklsf */ ++B_DEF (s390_vec_packsu_u64, vec_packsu_uv2di, 0, B_VX | B_INT, 0, BT_FN_UV4SI_UV2DI_UV2DI) /* vpklsg */ ++ ++OB_DEF (s390_vec_packsu_cc, s390_vec_packsu_cc_u16,s390_vec_packsu_cc_u64,B_VX, BT_FN_OV4SI_OV4SI_OV4SI_INTPTR) ++OB_DEF_VAR (s390_vec_packsu_cc_u16, s390_vpklshs, 0, BT_OV_UV16QI_UV8HI_UV8HI_INTPTR) ++OB_DEF_VAR (s390_vec_packsu_cc_u32, s390_vpklsfs, 0, BT_OV_UV8HI_UV4SI_UV4SI_INTPTR) ++OB_DEF_VAR (s390_vec_packsu_cc_u64, s390_vpklsgs, 0, BT_OV_UV4SI_UV2DI_UV2DI_INTPTR) ++ ++OB_DEF (s390_vec_perm, s390_vec_perm_s8, s390_vec_perm_dbl, B_VX, BT_FN_OV4SI_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_perm_s8, s390_vperm, 0, BT_OV_V16QI_V16QI_V16QI_UV16QI) ++OB_DEF_VAR (s390_vec_perm_b8, s390_vperm, 0, BT_OV_BV16QI_BV16QI_BV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_perm_u8, s390_vperm, 0, BT_OV_UV16QI_UV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_perm_s16, s390_vperm, 0, BT_OV_V8HI_V8HI_V8HI_UV16QI) ++OB_DEF_VAR (s390_vec_perm_b16, s390_vperm, 0, BT_OV_BV8HI_BV8HI_BV8HI_UV16QI) ++OB_DEF_VAR (s390_vec_perm_u16, s390_vperm, 0, BT_OV_UV8HI_UV8HI_UV8HI_UV16QI) ++OB_DEF_VAR (s390_vec_perm_s32, s390_vperm, 0, BT_OV_V4SI_V4SI_V4SI_UV16QI) ++OB_DEF_VAR (s390_vec_perm_b32, s390_vperm, 0, BT_OV_BV4SI_BV4SI_BV4SI_UV16QI) ++OB_DEF_VAR (s390_vec_perm_u32, s390_vperm, 0, BT_OV_UV4SI_UV4SI_UV4SI_UV16QI) ++OB_DEF_VAR (s390_vec_perm_s64, s390_vperm, 0, BT_OV_V2DI_V2DI_V2DI_UV16QI) ++OB_DEF_VAR (s390_vec_perm_b64, s390_vperm, 0, BT_OV_BV2DI_BV2DI_BV2DI_UV16QI) ++OB_DEF_VAR (s390_vec_perm_u64, s390_vperm, 0, BT_OV_UV2DI_UV2DI_UV2DI_UV16QI) ++OB_DEF_VAR (s390_vec_perm_dbl, s390_vperm, 0, BT_OV_V2DF_V2DF_V2DF_UV16QI) ++ ++B_DEF (s390_vperm, vec_permv16qi, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI_UV16QI) ++ ++OB_DEF (s390_vec_permi, s390_vec_permi_s64, s390_vec_permi_dbl, B_VX, BT_FN_OV4SI_OV4SI_OV4SI_INT) ++OB_DEF_VAR (s390_vec_permi_s64, s390_vpdi, O3_U2, BT_OV_V2DI_V2DI_V2DI_INT) ++OB_DEF_VAR (s390_vec_permi_b64, s390_vpdi, O3_U2, BT_OV_BV2DI_BV2DI_BV2DI_INT) ++OB_DEF_VAR (s390_vec_permi_u64, s390_vpdi, O3_U2, BT_OV_UV2DI_UV2DI_UV2DI_INT) ++OB_DEF_VAR (s390_vec_permi_dbl, s390_vpdi, O3_U2, BT_OV_V2DF_V2DF_V2DF_INT) ++ ++B_DEF (s390_vpdi, vec_permiv2di, 0, B_VX, O3_U2, BT_FN_UV2DI_UV2DI_UV2DI_INT) ++ ++OB_DEF (s390_vec_splat, s390_vec_splat2_s8, s390_vec_splat2_dbl,B_VX, BT_FN_OV4SI_OV4SI_UCHAR) ++OB_DEF_VAR (s390_vec_splat2_s8, s390_vrepb, O2_U4, BT_OV_V16QI_V16QI_UCHAR) ++OB_DEF_VAR (s390_vec_splat2_b8, s390_vrepb, O2_U4, BT_OV_BV16QI_BV16QI_UCHAR) ++OB_DEF_VAR (s390_vec_splat2_u8, s390_vrepb, O2_U4, BT_OV_UV16QI_UV16QI_UCHAR) ++OB_DEF_VAR (s390_vec_splat2_s16, s390_vreph, O2_U3, BT_OV_V8HI_V8HI_UCHAR) ++OB_DEF_VAR (s390_vec_splat2_b16, s390_vreph, O2_U3, BT_OV_BV8HI_BV8HI_UCHAR) ++OB_DEF_VAR (s390_vec_splat2_u16, s390_vreph, O2_U3, BT_OV_UV8HI_UV8HI_UCHAR) ++OB_DEF_VAR (s390_vec_splat2_s32, s390_vrepf, O2_U2, BT_OV_V4SI_V4SI_UCHAR) ++OB_DEF_VAR (s390_vec_splat2_b32, s390_vrepf, O2_U2, BT_OV_BV4SI_BV4SI_UCHAR) ++OB_DEF_VAR (s390_vec_splat2_u32, s390_vrepf, O2_U2, BT_OV_UV4SI_UV4SI_UCHAR) ++OB_DEF_VAR (s390_vec_splat2_s64, s390_vrepg, O2_U1, BT_OV_V2DI_V2DI_UCHAR) ++OB_DEF_VAR (s390_vec_splat2_b64, s390_vrepg, O2_U1, BT_OV_BV2DI_BV2DI_UCHAR) ++OB_DEF_VAR (s390_vec_splat2_u64, s390_vrepg, O2_U1, BT_OV_UV2DI_UV2DI_UCHAR) ++OB_DEF_VAR (s390_vec_splat2_dbl, s390_vrepg, O2_U1, BT_OV_V2DF_V2DF_UCHAR) ++ ++B_DEF (s390_vrepb, vec_splatv16qi, 0, B_VX, O2_U4, BT_FN_UV16QI_UV16QI_UCHAR) ++B_DEF (s390_vreph, vec_splatv8hi, 0, B_VX, O2_U3, BT_FN_UV8HI_UV8HI_UCHAR) ++B_DEF (s390_vrepf, vec_splatv4si, 0, B_VX, O2_U2, BT_FN_UV4SI_UV4SI_UCHAR) ++B_DEF (s390_vrepg, vec_splatv2di, 0, B_VX, O2_U1, BT_FN_UV2DI_UV2DI_UCHAR) ++ ++OB_DEF (s390_vec_scatter_element, s390_vec_scatter_element_s32,s390_vec_scatter_element_dbl,B_VX,BT_FN_VOID_V4SI_V4SI_INTPTR_ULONGLONG) ++OB_DEF_VAR (s390_vec_scatter_element_s32,s390_vscef, O4_U2, BT_OV_VOID_V4SI_UV4SI_INTPTR_ULONGLONG) ++OB_DEF_VAR (s390_vec_scatter_element_b32,s390_vscef, O4_U2, BT_OV_VOID_BV4SI_UV4SI_UINTPTR_ULONGLONG) ++OB_DEF_VAR (s390_vec_scatter_element_u32,s390_vscef, O4_U2, BT_OV_VOID_UV4SI_UV4SI_UINTPTR_ULONGLONG) ++OB_DEF_VAR (s390_vec_scatter_element_s64,s390_vsceg, O4_U1, BT_OV_VOID_V2DI_UV2DI_LONGLONGPTR_ULONGLONG) ++OB_DEF_VAR (s390_vec_scatter_element_b64,s390_vsceg, O4_U1, BT_OV_VOID_BV2DI_UV2DI_ULONGLONGPTR_ULONGLONG) ++OB_DEF_VAR (s390_vec_scatter_element_u64,s390_vsceg, O4_U1, BT_OV_VOID_UV2DI_UV2DI_ULONGLONGPTR_ULONGLONG) ++OB_DEF_VAR (s390_vec_scatter_element_dbl,s390_vsceg, O4_U1, BT_OV_VOID_V2DF_UV2DI_DBLPTR_ULONGLONG) ++ ++B_DEF (s390_vscef, vec_scatter_elementv4si,0, B_VX, O4_U2, BT_FN_VOID_UV4SI_UV4SI_UINTPTR_ULONGLONG) ++B_DEF (s390_vsceg, vec_scatter_elementv2di,0, B_VX, O4_U1, BT_FN_VOID_UV2DI_UV2DI_ULONGLONGPTR_ULONGLONG) ++ ++OB_DEF (s390_vec_sel, s390_vec_sel_b8_a, s390_vec_sel_dbl_b, B_VX, BT_FN_OV4SI_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_sel_b8_a, s390_vsel, 0, BT_OV_BV16QI_BV16QI_BV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_sel_b8_b, s390_vsel, 0, BT_OV_BV16QI_BV16QI_BV16QI_BV16QI) ++OB_DEF_VAR (s390_vec_sel_s8_a, s390_vsel, 0, BT_OV_V16QI_V16QI_V16QI_UV16QI) ++OB_DEF_VAR (s390_vec_sel_s8_b, s390_vsel, 0, BT_OV_V16QI_V16QI_V16QI_BV16QI) ++OB_DEF_VAR (s390_vec_sel_u8_a, s390_vsel, 0, BT_OV_UV16QI_UV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_sel_u8_b, s390_vsel, 0, BT_OV_UV16QI_UV16QI_UV16QI_BV16QI) ++OB_DEF_VAR (s390_vec_sel_b16_a, s390_vsel, 0, BT_OV_BV8HI_BV8HI_BV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_sel_b16_b, s390_vsel, 0, BT_OV_BV8HI_BV8HI_BV8HI_BV8HI) ++OB_DEF_VAR (s390_vec_sel_s16_a, s390_vsel, 0, BT_OV_V8HI_V8HI_V8HI_UV8HI) ++OB_DEF_VAR (s390_vec_sel_s16_b, s390_vsel, 0, BT_OV_V8HI_V8HI_V8HI_BV8HI) ++OB_DEF_VAR (s390_vec_sel_u16_a, s390_vsel, 0, BT_OV_UV8HI_UV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_sel_u16_b, s390_vsel, 0, BT_OV_UV8HI_UV8HI_UV8HI_BV8HI) ++OB_DEF_VAR (s390_vec_sel_b32_a, s390_vsel, 0, BT_OV_BV4SI_BV4SI_BV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_sel_b32_b, s390_vsel, 0, BT_OV_BV4SI_BV4SI_BV4SI_BV4SI) ++OB_DEF_VAR (s390_vec_sel_s32_a, s390_vsel, 0, BT_OV_V4SI_V4SI_V4SI_UV4SI) ++OB_DEF_VAR (s390_vec_sel_s32_b, s390_vsel, 0, BT_OV_V4SI_V4SI_V4SI_BV4SI) ++OB_DEF_VAR (s390_vec_sel_u32_a, s390_vsel, 0, BT_OV_UV4SI_UV4SI_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_sel_u32_b, s390_vsel, 0, BT_OV_UV4SI_UV4SI_UV4SI_BV4SI) ++OB_DEF_VAR (s390_vec_sel_b64_a, s390_vsel, 0, BT_OV_BV2DI_BV2DI_BV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_sel_b64_b, s390_vsel, 0, BT_OV_BV2DI_BV2DI_BV2DI_BV2DI) ++OB_DEF_VAR (s390_vec_sel_s64_a, s390_vsel, 0, BT_OV_V2DI_V2DI_V2DI_UV2DI) ++OB_DEF_VAR (s390_vec_sel_s64_b, s390_vsel, 0, BT_OV_V2DI_V2DI_V2DI_BV2DI) ++OB_DEF_VAR (s390_vec_sel_u64_a, s390_vsel, 0, BT_OV_UV2DI_UV2DI_UV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_sel_u64_b, s390_vsel, 0, BT_OV_UV2DI_UV2DI_UV2DI_BV2DI) ++OB_DEF_VAR (s390_vec_sel_dbl_a, s390_vsel, 0, BT_OV_V2DF_V2DF_V2DF_UV2DI) ++OB_DEF_VAR (s390_vec_sel_dbl_b, s390_vsel, 0, BT_OV_V2DF_V2DF_V2DF_BV2DI) ++ ++B_DEF (s390_vsel, vec_selv16qi, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI_UV16QI) ++ ++OB_DEF (s390_vec_extend_s64, s390_vec_extend_s64_s8,s390_vec_extend_s64_s32,B_VX, BT_FN_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_extend_s64_s8, s390_vsegb, 0, BT_OV_V2DI_V16QI) ++OB_DEF_VAR (s390_vec_extend_s64_s16, s390_vsegh, 0, BT_OV_V2DI_V8HI) ++OB_DEF_VAR (s390_vec_extend_s64_s32, s390_vsegf, 0, BT_OV_V2DI_V4SI) ++ ++B_DEF (s390_vsegb, vec_extendv16qi, 0, B_VX, 0, BT_FN_V2DI_V16QI) ++B_DEF (s390_vsegh, vec_extendv8hi, 0, B_VX, 0, BT_FN_V2DI_V8HI) ++B_DEF (s390_vsegf, vec_extendv4si, 0, B_VX, 0, BT_FN_V2DI_V4SI) ++ ++OB_DEF (s390_vec_xstd2, s390_vec_xstd2_s8, s390_vec_xstd2_dbl, B_VX, BT_FN_VOID_OV4SI_INT_VOIDPTR) ++OB_DEF_VAR (s390_vec_xstd2_s8, MAX, O2_LIT, BT_OV_VOID_V16QI_LONG_SCHARPTR) /* vst */ ++OB_DEF_VAR (s390_vec_xstd2_u8, MAX, O2_LIT, BT_OV_VOID_UV16QI_LONG_UCHARPTR) /* vst */ ++OB_DEF_VAR (s390_vec_xstd2_s16, MAX, O2_LIT, BT_OV_VOID_V8HI_LONG_SHORTPTR) /* vst */ ++OB_DEF_VAR (s390_vec_xstd2_u16, MAX, O2_LIT, BT_OV_VOID_UV8HI_LONG_USHORTPTR) /* vst */ ++OB_DEF_VAR (s390_vec_xstd2_s32, MAX, O2_LIT, BT_OV_VOID_V4SI_LONG_INTPTR) /* vst */ ++OB_DEF_VAR (s390_vec_xstd2_u32, MAX, O2_LIT, BT_OV_VOID_UV4SI_LONG_UINTPTR) /* vst */ ++OB_DEF_VAR (s390_vec_xstd2_s64, MAX, O2_LIT, BT_OV_VOID_V2DI_LONG_LONGLONGPTR) /* vst */ ++OB_DEF_VAR (s390_vec_xstd2_u64, MAX, O2_LIT, BT_OV_VOID_UV2DI_LONG_ULONGLONGPTR) /* vst */ ++OB_DEF_VAR (s390_vec_xstd2_dbl, MAX, O2_LIT, BT_OV_VOID_V2DF_LONG_DBLPTR) /* vst */ ++ ++OB_DEF (s390_vec_xstw4, s390_vec_xstw4_s8, s390_vec_xstw4_u32, B_VX, BT_FN_VOID_OV4SI_INT_VOIDPTR) ++OB_DEF_VAR (s390_vec_xstw4_s8, MAX, O2_LIT, BT_OV_VOID_V16QI_LONG_SCHARPTR) /* vst */ ++OB_DEF_VAR (s390_vec_xstw4_u8, MAX, O2_LIT, BT_OV_VOID_UV16QI_LONG_UCHARPTR) /* vst */ ++OB_DEF_VAR (s390_vec_xstw4_s16, MAX, O2_LIT, BT_OV_VOID_V8HI_LONG_SHORTPTR) /* vst */ ++OB_DEF_VAR (s390_vec_xstw4_u16, MAX, O2_LIT, BT_OV_VOID_UV8HI_LONG_USHORTPTR) /* vst */ ++OB_DEF_VAR (s390_vec_xstw4_s32, MAX, O2_LIT, BT_OV_VOID_V4SI_LONG_INTPTR) /* vst */ ++OB_DEF_VAR (s390_vec_xstw4_u32, MAX, O2_LIT, BT_OV_VOID_UV4SI_LONG_UINTPTR) /* vst */ ++ ++OB_DEF (s390_vec_store_len, s390_vec_store_len_s8,s390_vec_store_len_dbl,B_VX, BT_FN_VOID_OV4SI_VOIDPTR_UINT) ++OB_DEF_VAR (s390_vec_store_len_s8, s390_vstl, 0, BT_OV_VOID_V16QI_SCHARPTR_UINT) ++OB_DEF_VAR (s390_vec_store_len_u8, s390_vstl, 0, BT_OV_VOID_UV16QI_UCHARPTR_UINT) ++OB_DEF_VAR (s390_vec_store_len_s16, s390_vstl, 0, BT_OV_VOID_V8HI_SHORTPTR_UINT) ++OB_DEF_VAR (s390_vec_store_len_u16, s390_vstl, 0, BT_OV_VOID_UV8HI_USHORTPTR_UINT) ++OB_DEF_VAR (s390_vec_store_len_s32, s390_vstl, 0, BT_OV_VOID_V4SI_INTPTR_UINT) ++OB_DEF_VAR (s390_vec_store_len_u32, s390_vstl, 0, BT_OV_VOID_UV4SI_UINTPTR_UINT) ++OB_DEF_VAR (s390_vec_store_len_s64, s390_vstl, 0, BT_OV_VOID_V2DI_LONGLONGPTR_UINT) ++OB_DEF_VAR (s390_vec_store_len_u64, s390_vstl, 0, BT_OV_VOID_UV2DI_ULONGLONGPTR_UINT) ++OB_DEF_VAR (s390_vec_store_len_dbl, s390_vstl, 0, BT_OV_VOID_V2DF_DBLPTR_UINT) ++ ++B_DEF (s390_vstl, vstlv16qi, 0, B_VX, 0, BT_FN_VOID_V16QI_UINT_VOIDPTR) ++ ++OB_DEF (s390_vec_unpackh, s390_vec_unpackh_s8,s390_vec_unpackh_u32,B_VX, BT_FN_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_unpackh_s8, s390_vuphb, 0, BT_OV_V8HI_V16QI) ++OB_DEF_VAR (s390_vec_unpackh_b8, s390_vuphb, 0, BT_OV_BV8HI_BV16QI) ++OB_DEF_VAR (s390_vec_unpackh_u8, s390_vuplhb, 0, BT_OV_UV8HI_UV16QI) ++OB_DEF_VAR (s390_vec_unpackh_s16, s390_vuphh, 0, BT_OV_V4SI_V8HI) ++OB_DEF_VAR (s390_vec_unpackh_b16, s390_vuphh, 0, BT_OV_BV4SI_BV8HI) ++OB_DEF_VAR (s390_vec_unpackh_u16, s390_vuplhh, 0, BT_OV_UV4SI_UV8HI) ++OB_DEF_VAR (s390_vec_unpackh_s32, s390_vuphf, 0, BT_OV_V2DI_V4SI) ++OB_DEF_VAR (s390_vec_unpackh_b32, s390_vuphf, 0, BT_OV_BV2DI_BV4SI) ++OB_DEF_VAR (s390_vec_unpackh_u32, s390_vuplhf, 0, BT_OV_UV2DI_UV4SI) ++ ++B_DEF (s390_vuphb, vec_unpackhv16qi, 0, B_VX, 0, BT_FN_V8HI_V16QI) ++B_DEF (s390_vuplhb, vec_unpackh_lv16qi, 0, B_VX, 0, BT_FN_UV8HI_UV16QI) ++B_DEF (s390_vuphh, vec_unpackhv8hi, 0, B_VX, 0, BT_FN_V4SI_V8HI) ++B_DEF (s390_vuplhh, vec_unpackh_lv8hi, 0, B_VX, 0, BT_FN_UV4SI_UV8HI) ++B_DEF (s390_vuphf, vec_unpackhv4si, 0, B_VX, 0, BT_FN_V2DI_V4SI) ++B_DEF (s390_vuplhf, vec_unpackh_lv4si, 0, B_VX, 0, BT_FN_UV2DI_UV4SI) ++ ++OB_DEF (s390_vec_unpackl, s390_vec_unpackl_s8,s390_vec_unpackl_u32,B_VX, BT_FN_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_unpackl_s8, s390_vuplb, 0, BT_OV_V8HI_V16QI) ++OB_DEF_VAR (s390_vec_unpackl_b8, s390_vuplb, 0, BT_OV_BV8HI_BV16QI) ++OB_DEF_VAR (s390_vec_unpackl_u8, s390_vupllb, 0, BT_OV_UV8HI_UV16QI) ++OB_DEF_VAR (s390_vec_unpackl_s16, s390_vuplhw, 0, BT_OV_V4SI_V8HI) ++OB_DEF_VAR (s390_vec_unpackl_b16, s390_vupllh, 0, BT_OV_BV4SI_BV8HI) ++OB_DEF_VAR (s390_vec_unpackl_u16, s390_vupllh, 0, BT_OV_UV4SI_UV8HI) ++OB_DEF_VAR (s390_vec_unpackl_s32, s390_vuplf, 0, BT_OV_V2DI_V4SI) ++OB_DEF_VAR (s390_vec_unpackl_b32, s390_vuplf, 0, BT_OV_BV2DI_BV4SI) ++OB_DEF_VAR (s390_vec_unpackl_u32, s390_vupllf, 0, BT_OV_UV2DI_UV4SI) ++ ++B_DEF (s390_vuplb, vec_unpacklv16qi, 0, B_VX, 0, BT_FN_V8HI_V16QI) ++B_DEF (s390_vupllb, vec_unpackl_lv16qi, 0, B_VX, 0, BT_FN_UV8HI_UV16QI) ++B_DEF (s390_vuplhw, vec_unpacklv8hi, 0, B_VX, 0, BT_FN_V4SI_V8HI) ++B_DEF (s390_vupllh, vec_unpackl_lv8hi, 0, B_VX, 0, BT_FN_UV4SI_UV8HI) ++B_DEF (s390_vuplf, vec_unpacklv4si, 0, B_VX, 0, BT_FN_V2DI_V4SI) ++B_DEF (s390_vupllf, vec_unpackl_lv4si, 0, B_VX, 0, BT_FN_UV2DI_UV4SI) ++B_DEF (s390_vaq, vec_add_u128, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI) ++ ++OB_DEF (s390_vec_addc, s390_vec_addc_u8, s390_vec_addc_u64, B_VX, BT_FN_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_addc_u8, s390_vaccb, 0, BT_OV_UV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_addc_u16, s390_vacch, 0, BT_OV_UV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_addc_u32, s390_vaccf, 0, BT_OV_UV4SI_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_addc_u64, s390_vaccg, 0, BT_OV_UV2DI_UV2DI_UV2DI) ++ ++B_DEF (s390_vaccb, vec_addcv16qi, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI) ++B_DEF (s390_vacch, vec_addcv8hi, 0, B_VX, 0, BT_FN_UV8HI_UV8HI_UV8HI) ++B_DEF (s390_vaccf, vec_addcv4si, 0, B_VX, 0, BT_FN_UV4SI_UV4SI_UV4SI) ++B_DEF (s390_vaccg, vec_addcv2di, 0, B_VX, 0, BT_FN_UV2DI_UV2DI_UV2DI) ++B_DEF (s390_vaccq, vec_addc_u128, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI) ++B_DEF (s390_vacq, vec_adde_u128, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI_UV16QI) ++B_DEF (s390_vacccq, vec_addec_u128, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI_UV16QI) ++ ++OB_DEF (s390_vec_and, s390_vec_and_b8, s390_vec_and_dbl_c, B_VX, BT_FN_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_and_b8, s390_vn, 0, BT_OV_BV16QI_BV16QI_BV16QI) ++OB_DEF_VAR (s390_vec_and_s8_a, s390_vn, 0, BT_OV_V16QI_BV16QI_V16QI) ++OB_DEF_VAR (s390_vec_and_s8_b, s390_vn, 0, BT_OV_V16QI_V16QI_V16QI) ++OB_DEF_VAR (s390_vec_and_s8_c, s390_vn, 0, BT_OV_V16QI_V16QI_BV16QI) ++OB_DEF_VAR (s390_vec_and_u8_a, s390_vn, 0, BT_OV_UV16QI_BV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_and_u8_b, s390_vn, 0, BT_OV_UV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_and_u8_c, s390_vn, 0, BT_OV_UV16QI_UV16QI_BV16QI) ++OB_DEF_VAR (s390_vec_and_b16, s390_vn, 0, BT_OV_BV8HI_BV8HI_BV8HI) ++OB_DEF_VAR (s390_vec_and_s16_a, s390_vn, 0, BT_OV_V8HI_BV8HI_V8HI) ++OB_DEF_VAR (s390_vec_and_s16_b, s390_vn, 0, BT_OV_V8HI_V8HI_V8HI) ++OB_DEF_VAR (s390_vec_and_s16_c, s390_vn, 0, BT_OV_V8HI_V8HI_BV8HI) ++OB_DEF_VAR (s390_vec_and_u16_a, s390_vn, 0, BT_OV_UV8HI_BV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_and_u16_b, s390_vn, 0, BT_OV_UV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_and_u16_c, s390_vn, 0, BT_OV_UV8HI_UV8HI_BV8HI) ++OB_DEF_VAR (s390_vec_and_b32, s390_vn, 0, BT_OV_BV4SI_BV4SI_BV4SI) ++OB_DEF_VAR (s390_vec_and_s32_a, s390_vn, 0, BT_OV_V4SI_BV4SI_V4SI) ++OB_DEF_VAR (s390_vec_and_s32_b, s390_vn, 0, BT_OV_V4SI_V4SI_V4SI) ++OB_DEF_VAR (s390_vec_and_s32_c, s390_vn, 0, BT_OV_V4SI_V4SI_BV4SI) ++OB_DEF_VAR (s390_vec_and_u32_a, s390_vn, 0, BT_OV_UV4SI_BV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_and_u32_b, s390_vn, 0, BT_OV_UV4SI_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_and_u32_c, s390_vn, 0, BT_OV_UV4SI_UV4SI_BV4SI) ++OB_DEF_VAR (s390_vec_and_b64, s390_vn, 0, BT_OV_BV2DI_BV2DI_BV2DI) ++OB_DEF_VAR (s390_vec_and_s64_a, s390_vn, 0, BT_OV_V2DI_BV2DI_V2DI) ++OB_DEF_VAR (s390_vec_and_s64_b, s390_vn, 0, BT_OV_V2DI_V2DI_V2DI) ++OB_DEF_VAR (s390_vec_and_s64_c, s390_vn, 0, BT_OV_V2DI_V2DI_BV2DI) ++OB_DEF_VAR (s390_vec_and_u64_a, s390_vn, 0, BT_OV_UV2DI_BV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_and_u64_b, s390_vn, 0, BT_OV_UV2DI_UV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_and_u64_c, s390_vn, 0, BT_OV_UV2DI_UV2DI_BV2DI) ++OB_DEF_VAR (s390_vec_and_dbl_a, s390_vn, 0, BT_OV_V2DF_BV2DI_V2DF) ++OB_DEF_VAR (s390_vec_and_dbl_b, s390_vn, 0, BT_OV_V2DF_V2DF_V2DF) ++OB_DEF_VAR (s390_vec_and_dbl_c, s390_vn, 0, BT_OV_V2DF_V2DF_BV2DI) ++ ++B_DEF (s390_vn, andv16qi3, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI) ++ ++OB_DEF (s390_vec_andc, s390_vec_andc_b8, s390_vec_andc_dbl_c,B_VX, BT_FN_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_andc_b8, s390_vnc, 0, BT_OV_BV16QI_BV16QI_BV16QI) ++OB_DEF_VAR (s390_vec_andc_s8_a, s390_vnc, 0, BT_OV_V16QI_BV16QI_V16QI) ++OB_DEF_VAR (s390_vec_andc_s8_b, s390_vnc, 0, BT_OV_V16QI_V16QI_V16QI) ++OB_DEF_VAR (s390_vec_andc_s8_c, s390_vnc, 0, BT_OV_V16QI_V16QI_BV16QI) ++OB_DEF_VAR (s390_vec_andc_u8_a, s390_vnc, 0, BT_OV_UV16QI_BV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_andc_u8_b, s390_vnc, 0, BT_OV_UV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_andc_u8_c, s390_vnc, 0, BT_OV_UV16QI_UV16QI_BV16QI) ++OB_DEF_VAR (s390_vec_andc_b16, s390_vnc, 0, BT_OV_BV8HI_BV8HI_BV8HI) ++OB_DEF_VAR (s390_vec_andc_s16_a, s390_vnc, 0, BT_OV_V8HI_BV8HI_V8HI) ++OB_DEF_VAR (s390_vec_andc_s16_b, s390_vnc, 0, BT_OV_V8HI_V8HI_V8HI) ++OB_DEF_VAR (s390_vec_andc_s16_c, s390_vnc, 0, BT_OV_V8HI_V8HI_BV8HI) ++OB_DEF_VAR (s390_vec_andc_u16_a, s390_vnc, 0, BT_OV_UV8HI_BV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_andc_u16_b, s390_vnc, 0, BT_OV_UV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_andc_u16_c, s390_vnc, 0, BT_OV_UV8HI_UV8HI_BV8HI) ++OB_DEF_VAR (s390_vec_andc_b32, s390_vnc, 0, BT_OV_BV4SI_BV4SI_BV4SI) ++OB_DEF_VAR (s390_vec_andc_s32_a, s390_vnc, 0, BT_OV_V4SI_BV4SI_V4SI) ++OB_DEF_VAR (s390_vec_andc_s32_b, s390_vnc, 0, BT_OV_V4SI_V4SI_V4SI) ++OB_DEF_VAR (s390_vec_andc_s32_c, s390_vnc, 0, BT_OV_V4SI_V4SI_BV4SI) ++OB_DEF_VAR (s390_vec_andc_u32_a, s390_vnc, 0, BT_OV_UV4SI_BV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_andc_u32_b, s390_vnc, 0, BT_OV_UV4SI_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_andc_u32_c, s390_vnc, 0, BT_OV_UV4SI_UV4SI_BV4SI) ++OB_DEF_VAR (s390_vec_andc_b64, s390_vnc, 0, BT_OV_BV2DI_BV2DI_BV2DI) ++OB_DEF_VAR (s390_vec_andc_s64_a, s390_vnc, 0, BT_OV_V2DI_BV2DI_V2DI) ++OB_DEF_VAR (s390_vec_andc_s64_b, s390_vnc, 0, BT_OV_V2DI_V2DI_V2DI) ++OB_DEF_VAR (s390_vec_andc_s64_c, s390_vnc, 0, BT_OV_V2DI_V2DI_BV2DI) ++OB_DEF_VAR (s390_vec_andc_u64_a, s390_vnc, 0, BT_OV_UV2DI_BV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_andc_u64_b, s390_vnc, 0, BT_OV_UV2DI_UV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_andc_u64_c, s390_vnc, 0, BT_OV_UV2DI_UV2DI_BV2DI) ++OB_DEF_VAR (s390_vec_andc_dbl_a, s390_vnc, 0, BT_OV_V2DF_BV2DI_V2DF) ++OB_DEF_VAR (s390_vec_andc_dbl_b, s390_vnc, 0, BT_OV_V2DF_V2DF_V2DF) ++OB_DEF_VAR (s390_vec_andc_dbl_c, s390_vnc, 0, BT_OV_V2DF_V2DF_BV2DI) ++ ++B_DEF (s390_vnc, vec_andcv16qi3, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI) ++ ++OB_DEF (s390_vec_avg, s390_vec_avg_s8, s390_vec_avg_u64, B_VX, BT_FN_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_avg_s8, s390_vavgb, 0, BT_OV_V16QI_V16QI_V16QI) ++OB_DEF_VAR (s390_vec_avg_u8, s390_vavglb, 0, BT_OV_UV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_avg_s16, s390_vavgh, 0, BT_OV_V8HI_V8HI_V8HI) ++OB_DEF_VAR (s390_vec_avg_u16, s390_vavglh, 0, BT_OV_UV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_avg_s32, s390_vavgf, 0, BT_OV_V4SI_V4SI_V4SI) ++OB_DEF_VAR (s390_vec_avg_u32, s390_vavglf, 0, BT_OV_UV4SI_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_avg_s64, s390_vavgg, 0, BT_OV_V2DI_V2DI_V2DI) ++OB_DEF_VAR (s390_vec_avg_u64, s390_vavglg, 0, BT_OV_UV2DI_UV2DI_UV2DI) ++ ++B_DEF (s390_vavgb, vec_avgv16qi, 0, B_VX, 0, BT_FN_V16QI_V16QI_V16QI) ++B_DEF (s390_vavglb, vec_avguv16qi, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI) ++B_DEF (s390_vavgh, vec_avgv8hi, 0, B_VX, 0, BT_FN_V8HI_V8HI_V8HI) ++B_DEF (s390_vavglh, vec_avguv8hi, 0, B_VX, 0, BT_FN_UV8HI_UV8HI_UV8HI) ++B_DEF (s390_vavgf, vec_avgv4si, 0, B_VX, 0, BT_FN_V4SI_V4SI_V4SI) ++B_DEF (s390_vavglf, vec_avguv4si, 0, B_VX, 0, BT_FN_UV4SI_UV4SI_UV4SI) ++B_DEF (s390_vavgg, vec_avgv2di, 0, B_VX, 0, BT_FN_V2DI_V2DI_V2DI) ++B_DEF (s390_vavglg, vec_avguv2di, 0, B_VX, 0, BT_FN_UV2DI_UV2DI_UV2DI) ++B_DEF (s390_vcksm, vec_checksum, 0, B_VX, 0, BT_FN_UV4SI_UV4SI_UV4SI) ++B_DEF (s390_vceqbs, vec_cmpeqv16qi_cc, 0, B_VX, 0, BT_FN_V16QI_UV16QI_UV16QI_INTPTR) ++B_DEF (s390_vceqhs, vec_cmpeqv8hi_cc, 0, B_VX, 0, BT_FN_V8HI_UV8HI_UV8HI_INTPTR) ++B_DEF (s390_vceqfs, vec_cmpeqv4si_cc, 0, B_VX, 0, BT_FN_V4SI_UV4SI_UV4SI_INTPTR) ++B_DEF (s390_vceqgs, vec_cmpeqv2di_cc, 0, B_VX, 0, BT_FN_V2DI_UV2DI_UV2DI_INTPTR) ++B_DEF (s390_vfcedbs, vec_cmpeqv2df_cc, 0, B_VX, 0, BT_FN_V2DI_V2DF_V2DF_INTPTR) ++B_DEF (s390_vchbs, vec_cmphv16qi_cc, 0, B_VX, 0, BT_FN_V16QI_V16QI_V16QI_INTPTR) ++B_DEF (s390_vchlbs, vec_cmphlv16qi_cc, 0, B_VX, 0, BT_FN_V16QI_UV16QI_UV16QI_INTPTR) ++B_DEF (s390_vchhs, vec_cmphv8hi_cc, 0, B_VX, 0, BT_FN_V8HI_V8HI_V8HI_INTPTR) ++B_DEF (s390_vchlhs, vec_cmphlv8hi_cc, 0, B_VX, 0, BT_FN_V8HI_UV8HI_UV8HI_INTPTR) ++B_DEF (s390_vchfs, vec_cmphv4si_cc, 0, B_VX, 0, BT_FN_V4SI_V4SI_V4SI_INTPTR) ++B_DEF (s390_vchlfs, vec_cmphlv4si_cc, 0, B_VX, 0, BT_FN_V4SI_UV4SI_UV4SI_INTPTR) ++B_DEF (s390_vchgs, vec_cmphv2di_cc, 0, B_VX, 0, BT_FN_V2DI_V2DI_V2DI_INTPTR) ++B_DEF (s390_vchlgs, vec_cmphlv2di_cc, 0, B_VX, 0, BT_FN_V2DI_UV2DI_UV2DI_INTPTR) ++B_DEF (s390_vfchdbs, vec_cmphv2df_cc, 0, B_VX, 0, BT_FN_V2DI_V2DF_V2DF_INTPTR) ++B_DEF (s390_vfchedbs, vec_cmphev2df_cc, 0, B_VX, 0, BT_FN_V2DI_V2DF_V2DF_INTPTR) ++B_DEF (vec_all_eqv16qi, vec_all_eqv16qi, 0, B_VX | B_INT, 0, BT_FN_INT_UV16QI_UV16QI) ++B_DEF (vec_all_eqv8hi, vec_all_eqv8hi, 0, B_VX | B_INT, 0, BT_FN_INT_UV8HI_UV8HI) ++B_DEF (vec_all_eqv4si, vec_all_eqv4si, 0, B_VX | B_INT, 0, BT_FN_INT_UV4SI_UV4SI) ++B_DEF (vec_all_eqv2di, vec_all_eqv2di, 0, B_VX | B_INT, 0, BT_FN_INT_UV2DI_UV2DI) ++B_DEF (vec_all_eqv2df, vec_all_eqv2df, 0, B_VX | B_INT, 0, BT_FN_INT_V2DF_V2DF) ++B_DEF (vec_all_nev16qi, vec_all_nev16qi, 0, B_VX | B_INT, 0, BT_FN_INT_UV16QI_UV16QI) ++B_DEF (vec_all_nev8hi, vec_all_nev8hi, 0, B_VX | B_INT, 0, BT_FN_INT_UV8HI_UV8HI) ++B_DEF (vec_all_nev4si, vec_all_nev4si, 0, B_VX | B_INT, 0, BT_FN_INT_UV4SI_UV4SI) ++B_DEF (vec_all_nev2di, vec_all_nev2di, 0, B_VX | B_INT, 0, BT_FN_INT_UV2DI_UV2DI) ++B_DEF (vec_all_nev2df, vec_all_nev2df, 0, B_VX | B_INT, 0, BT_FN_INT_V2DF_V2DF) ++B_DEF (vec_all_gev16qi, vec_all_gev16qi, 0, B_VX | B_INT, 0, BT_FN_INT_V16QI_V16QI) ++B_DEF (vec_all_geuv16qi, vec_all_geuv16qi, 0, B_VX | B_INT, 0, BT_FN_INT_UV16QI_UV16QI) ++B_DEF (vec_all_gev8hi, vec_all_gev8hi, 0, B_VX | B_INT, 0, BT_FN_INT_V8HI_V8HI) ++B_DEF (vec_all_geuv8hi, vec_all_geuv8hi, 0, B_VX | B_INT, 0, BT_FN_INT_UV8HI_UV8HI) ++B_DEF (vec_all_gev4si, vec_all_gev4si, 0, B_VX | B_INT, 0, BT_FN_INT_V4SI_V4SI) ++B_DEF (vec_all_geuv4si, vec_all_geuv4si, 0, B_VX | B_INT, 0, BT_FN_INT_UV4SI_UV4SI) ++B_DEF (vec_all_gev2di, vec_all_gev2di, 0, B_VX | B_INT, 0, BT_FN_INT_V2DI_V2DI) ++B_DEF (vec_all_geuv2di, vec_all_geuv2di, 0, B_VX | B_INT, 0, BT_FN_INT_UV2DI_UV2DI) ++B_DEF (vec_all_gev2df, vec_all_gev2df, 0, B_VX | B_INT, 0, BT_FN_INT_V2DF_V2DF) ++B_DEF (vec_all_gtv16qi, vec_all_gtv16qi, 0, B_VX | B_INT, 0, BT_FN_INT_V16QI_V16QI) ++B_DEF (vec_all_gtuv16qi, vec_all_gtuv16qi, 0, B_VX | B_INT, 0, BT_FN_INT_UV16QI_UV16QI) ++B_DEF (vec_all_gtv8hi, vec_all_gtv8hi, 0, B_VX | B_INT, 0, BT_FN_INT_V8HI_V8HI) ++B_DEF (vec_all_gtuv8hi, vec_all_gtuv8hi, 0, B_VX | B_INT, 0, BT_FN_INT_UV8HI_UV8HI) ++B_DEF (vec_all_gtv4si, vec_all_gtv4si, 0, B_VX | B_INT, 0, BT_FN_INT_V4SI_V4SI) ++B_DEF (vec_all_gtuv4si, vec_all_gtuv4si, 0, B_VX | B_INT, 0, BT_FN_INT_UV4SI_UV4SI) ++B_DEF (vec_all_gtv2di, vec_all_gtv2di, 0, B_VX | B_INT, 0, BT_FN_INT_V2DI_V2DI) ++B_DEF (vec_all_gtuv2di, vec_all_gtuv2di, 0, B_VX | B_INT, 0, BT_FN_INT_UV2DI_UV2DI) ++B_DEF (vec_all_gtv2df, vec_all_gtv2df, 0, B_VX | B_INT, 0, BT_FN_INT_V2DF_V2DF) ++B_DEF (vec_all_lev16qi, vec_all_lev16qi, 0, B_VX | B_INT, 0, BT_FN_INT_V16QI_V16QI) ++B_DEF (vec_all_leuv16qi, vec_all_leuv16qi, 0, B_VX | B_INT, 0, BT_FN_INT_UV16QI_UV16QI) ++B_DEF (vec_all_lev8hi, vec_all_lev8hi, 0, B_VX | B_INT, 0, BT_FN_INT_V8HI_V8HI) ++B_DEF (vec_all_leuv8hi, vec_all_leuv8hi, 0, B_VX | B_INT, 0, BT_FN_INT_UV8HI_UV8HI) ++B_DEF (vec_all_lev4si, vec_all_lev4si, 0, B_VX | B_INT, 0, BT_FN_INT_V4SI_V4SI) ++B_DEF (vec_all_leuv4si, vec_all_leuv4si, 0, B_VX | B_INT, 0, BT_FN_INT_UV4SI_UV4SI) ++B_DEF (vec_all_lev2di, vec_all_lev2di, 0, B_VX | B_INT, 0, BT_FN_INT_V2DI_V2DI) ++B_DEF (vec_all_leuv2di, vec_all_leuv2di, 0, B_VX | B_INT, 0, BT_FN_INT_UV2DI_UV2DI) ++B_DEF (vec_all_lev2df, vec_all_lev2df, 0, B_VX | B_INT, 0, BT_FN_INT_V2DF_V2DF) ++B_DEF (vec_all_ltv16qi, vec_all_ltv16qi, 0, B_VX | B_INT, 0, BT_FN_INT_V16QI_V16QI) ++B_DEF (vec_all_ltuv16qi, vec_all_ltuv16qi, 0, B_VX | B_INT, 0, BT_FN_INT_UV16QI_UV16QI) ++B_DEF (vec_all_ltv8hi, vec_all_ltv8hi, 0, B_VX | B_INT, 0, BT_FN_INT_V8HI_V8HI) ++B_DEF (vec_all_ltuv8hi, vec_all_ltuv8hi, 0, B_VX | B_INT, 0, BT_FN_INT_UV8HI_UV8HI) ++B_DEF (vec_all_ltv4si, vec_all_ltv4si, 0, B_VX | B_INT, 0, BT_FN_INT_V4SI_V4SI) ++B_DEF (vec_all_ltuv4si, vec_all_ltuv4si, 0, B_VX | B_INT, 0, BT_FN_INT_UV4SI_UV4SI) ++B_DEF (vec_all_ltv2di, vec_all_ltv2di, 0, B_VX | B_INT, 0, BT_FN_INT_V2DI_V2DI) ++B_DEF (vec_all_ltuv2di, vec_all_ltuv2di, 0, B_VX | B_INT, 0, BT_FN_INT_UV2DI_UV2DI) ++B_DEF (vec_all_ltv2df, vec_all_ltv2df, 0, B_VX | B_INT, 0, BT_FN_INT_V2DF_V2DF) ++ ++OB_DEF (s390_vec_all_eq, s390_vec_all_eq_s8_a,s390_vec_all_eq_dbl,B_VX, BT_FN_INT_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_all_eq_s8_a, vec_all_eqv16qi, 0, BT_OV_INT_V16QI_V16QI) ++OB_DEF_VAR (s390_vec_all_eq_s8_b, vec_all_eqv16qi, 0, BT_OV_INT_V16QI_BV16QI) ++OB_DEF_VAR (s390_vec_all_eq_b8_a, vec_all_eqv16qi, 0, BT_OV_INT_BV16QI_BV16QI) ++OB_DEF_VAR (s390_vec_all_eq_b8_b, vec_all_eqv16qi, 0, BT_OV_INT_BV16QI_V16QI) ++OB_DEF_VAR (s390_vec_all_eq_b8_c, vec_all_eqv16qi, 0, BT_OV_INT_BV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_all_eq_u8_a, vec_all_eqv16qi, 0, BT_OV_INT_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_all_eq_u8_b, vec_all_eqv16qi, 0, BT_OV_INT_UV16QI_BV16QI) ++OB_DEF_VAR (s390_vec_all_eq_s16_a, vec_all_eqv8hi, 0, BT_OV_INT_V8HI_V8HI) ++OB_DEF_VAR (s390_vec_all_eq_s16_b, vec_all_eqv8hi, 0, BT_OV_INT_V8HI_BV8HI) ++OB_DEF_VAR (s390_vec_all_eq_b16_a, vec_all_eqv8hi, 0, BT_OV_INT_BV8HI_BV8HI) ++OB_DEF_VAR (s390_vec_all_eq_b16_b, vec_all_eqv8hi, 0, BT_OV_INT_BV8HI_V8HI) ++OB_DEF_VAR (s390_vec_all_eq_b16_c, vec_all_eqv8hi, 0, BT_OV_INT_BV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_all_eq_u16_a, vec_all_eqv8hi, 0, BT_OV_INT_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_all_eq_u16_b, vec_all_eqv8hi, 0, BT_OV_INT_UV8HI_BV8HI) ++OB_DEF_VAR (s390_vec_all_eq_s32_a, vec_all_eqv4si, 0, BT_OV_INT_V4SI_V4SI) ++OB_DEF_VAR (s390_vec_all_eq_s32_b, vec_all_eqv4si, 0, BT_OV_INT_V4SI_BV4SI) ++OB_DEF_VAR (s390_vec_all_eq_b32_a, vec_all_eqv4si, 0, BT_OV_INT_BV4SI_BV4SI) ++OB_DEF_VAR (s390_vec_all_eq_b32_b, vec_all_eqv4si, 0, BT_OV_INT_BV4SI_V4SI) ++OB_DEF_VAR (s390_vec_all_eq_b32_c, vec_all_eqv4si, 0, BT_OV_INT_BV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_all_eq_u32_a, vec_all_eqv4si, 0, BT_OV_INT_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_all_eq_u32_b, vec_all_eqv4si, 0, BT_OV_INT_UV4SI_BV4SI) ++OB_DEF_VAR (s390_vec_all_eq_s64_a, vec_all_eqv2di, 0, BT_OV_INT_V2DI_V2DI) ++OB_DEF_VAR (s390_vec_all_eq_s64_b, vec_all_eqv2di, 0, BT_OV_INT_V2DI_BV2DI) ++OB_DEF_VAR (s390_vec_all_eq_b64_a, vec_all_eqv2di, 0, BT_OV_INT_BV2DI_BV2DI) ++OB_DEF_VAR (s390_vec_all_eq_b64_b, vec_all_eqv2di, 0, BT_OV_INT_BV2DI_V2DI) ++OB_DEF_VAR (s390_vec_all_eq_b64_c, vec_all_eqv2di, 0, BT_OV_INT_BV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_all_eq_u64_a, vec_all_eqv2di, 0, BT_OV_INT_UV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_all_eq_u64_b, vec_all_eqv2di, 0, BT_OV_INT_UV2DI_BV2DI) ++OB_DEF_VAR (s390_vec_all_eq_dbl, vec_all_eqv2df, 0, BT_OV_INT_V2DF_V2DF) ++ ++OB_DEF (s390_vec_all_ne, s390_vec_all_ne_s8_a,s390_vec_all_ne_dbl,B_VX, BT_FN_INT_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_all_ne_s8_a, vec_all_nev16qi, 0, BT_OV_INT_V16QI_V16QI) ++OB_DEF_VAR (s390_vec_all_ne_s8_b, vec_all_nev16qi, 0, BT_OV_INT_V16QI_BV16QI) ++OB_DEF_VAR (s390_vec_all_ne_b8_a, vec_all_nev16qi, 0, BT_OV_INT_BV16QI_BV16QI) ++OB_DEF_VAR (s390_vec_all_ne_b8_b, vec_all_nev16qi, 0, BT_OV_INT_BV16QI_V16QI) ++OB_DEF_VAR (s390_vec_all_ne_b8_c, vec_all_nev16qi, 0, BT_OV_INT_BV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_all_ne_u8_a, vec_all_nev16qi, 0, BT_OV_INT_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_all_ne_u8_b, vec_all_nev16qi, 0, BT_OV_INT_UV16QI_BV16QI) ++OB_DEF_VAR (s390_vec_all_ne_s16_a, vec_all_nev8hi, 0, BT_OV_INT_V8HI_V8HI) ++OB_DEF_VAR (s390_vec_all_ne_s16_b, vec_all_nev8hi, 0, BT_OV_INT_V8HI_BV8HI) ++OB_DEF_VAR (s390_vec_all_ne_b16_a, vec_all_nev8hi, 0, BT_OV_INT_BV8HI_BV8HI) ++OB_DEF_VAR (s390_vec_all_ne_b16_b, vec_all_nev8hi, 0, BT_OV_INT_BV8HI_V8HI) ++OB_DEF_VAR (s390_vec_all_ne_b16_c, vec_all_nev8hi, 0, BT_OV_INT_BV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_all_ne_u16_a, vec_all_nev8hi, 0, BT_OV_INT_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_all_ne_u16_b, vec_all_nev8hi, 0, BT_OV_INT_UV8HI_BV8HI) ++OB_DEF_VAR (s390_vec_all_ne_s32_a, vec_all_nev4si, 0, BT_OV_INT_V4SI_V4SI) ++OB_DEF_VAR (s390_vec_all_ne_s32_b, vec_all_nev4si, 0, BT_OV_INT_V4SI_BV4SI) ++OB_DEF_VAR (s390_vec_all_ne_b32_a, vec_all_nev4si, 0, BT_OV_INT_BV4SI_BV4SI) ++OB_DEF_VAR (s390_vec_all_ne_b32_b, vec_all_nev4si, 0, BT_OV_INT_BV4SI_V4SI) ++OB_DEF_VAR (s390_vec_all_ne_b32_c, vec_all_nev4si, 0, BT_OV_INT_BV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_all_ne_u32_a, vec_all_nev4si, 0, BT_OV_INT_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_all_ne_u32_b, vec_all_nev4si, 0, BT_OV_INT_UV4SI_BV4SI) ++OB_DEF_VAR (s390_vec_all_ne_s64_a, vec_all_nev2di, 0, BT_OV_INT_V2DI_V2DI) ++OB_DEF_VAR (s390_vec_all_ne_s64_b, vec_all_nev2di, 0, BT_OV_INT_V2DI_BV2DI) ++OB_DEF_VAR (s390_vec_all_ne_b64_a, vec_all_nev2di, 0, BT_OV_INT_BV2DI_BV2DI) ++OB_DEF_VAR (s390_vec_all_ne_b64_b, vec_all_nev2di, 0, BT_OV_INT_BV2DI_V2DI) ++OB_DEF_VAR (s390_vec_all_ne_b64_c, vec_all_nev2di, 0, BT_OV_INT_BV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_all_ne_u64_a, vec_all_nev2di, 0, BT_OV_INT_UV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_all_ne_u64_b, vec_all_nev2di, 0, BT_OV_INT_UV2DI_BV2DI) ++OB_DEF_VAR (s390_vec_all_ne_dbl, vec_all_nev2df, 0, BT_OV_INT_V2DF_V2DF) ++ ++OB_DEF (s390_vec_all_ge, s390_vec_all_ge_s8_a,s390_vec_all_ge_dbl,B_VX, BT_FN_INT_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_all_ge_s8_a, vec_all_gev16qi, 0, BT_OV_INT_V16QI_V16QI) ++OB_DEF_VAR (s390_vec_all_ge_s8_b, vec_all_gev16qi, 0, BT_OV_INT_V16QI_BV16QI) ++OB_DEF_VAR (s390_vec_all_ge_b8_a, vec_all_geuv16qi, 0, BT_OV_INT_BV16QI_BV16QI) ++OB_DEF_VAR (s390_vec_all_ge_b8_b, vec_all_gev16qi, 0, BT_OV_INT_BV16QI_V16QI) ++OB_DEF_VAR (s390_vec_all_ge_b8_c, vec_all_geuv16qi, 0, BT_OV_INT_BV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_all_ge_u8_a, vec_all_geuv16qi, 0, BT_OV_INT_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_all_ge_u8_b, vec_all_geuv16qi, 0, BT_OV_INT_UV16QI_BV16QI) ++OB_DEF_VAR (s390_vec_all_ge_s16_a, vec_all_gev8hi, 0, BT_OV_INT_V8HI_V8HI) ++OB_DEF_VAR (s390_vec_all_ge_s16_b, vec_all_gev8hi, 0, BT_OV_INT_V8HI_BV8HI) ++OB_DEF_VAR (s390_vec_all_ge_b16_a, vec_all_geuv8hi, 0, BT_OV_INT_BV8HI_BV8HI) ++OB_DEF_VAR (s390_vec_all_ge_b16_b, vec_all_gev8hi, 0, BT_OV_INT_BV8HI_V8HI) ++OB_DEF_VAR (s390_vec_all_ge_b16_c, vec_all_geuv8hi, 0, BT_OV_INT_BV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_all_ge_u16_a, vec_all_geuv8hi, 0, BT_OV_INT_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_all_ge_u16_b, vec_all_geuv8hi, 0, BT_OV_INT_UV8HI_BV8HI) ++OB_DEF_VAR (s390_vec_all_ge_s32_a, vec_all_gev4si, 0, BT_OV_INT_V4SI_V4SI) ++OB_DEF_VAR (s390_vec_all_ge_s32_b, vec_all_gev4si, 0, BT_OV_INT_V4SI_BV4SI) ++OB_DEF_VAR (s390_vec_all_ge_b32_a, vec_all_geuv4si, 0, BT_OV_INT_BV4SI_BV4SI) ++OB_DEF_VAR (s390_vec_all_ge_b32_b, vec_all_gev4si, 0, BT_OV_INT_BV4SI_V4SI) ++OB_DEF_VAR (s390_vec_all_ge_b32_c, vec_all_geuv4si, 0, BT_OV_INT_BV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_all_ge_u32_a, vec_all_geuv4si, 0, BT_OV_INT_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_all_ge_u32_b, vec_all_geuv4si, 0, BT_OV_INT_UV4SI_BV4SI) ++OB_DEF_VAR (s390_vec_all_ge_s64_a, vec_all_gev2di, 0, BT_OV_INT_V2DI_V2DI) ++OB_DEF_VAR (s390_vec_all_ge_s64_b, vec_all_gev2di, 0, BT_OV_INT_V2DI_BV2DI) ++OB_DEF_VAR (s390_vec_all_ge_b64_a, vec_all_geuv2di, 0, BT_OV_INT_BV2DI_BV2DI) ++OB_DEF_VAR (s390_vec_all_ge_b64_b, vec_all_gev2di, 0, BT_OV_INT_BV2DI_V2DI) ++OB_DEF_VAR (s390_vec_all_ge_b64_c, vec_all_geuv2di, 0, BT_OV_INT_BV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_all_ge_u64_a, vec_all_geuv2di, 0, BT_OV_INT_UV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_all_ge_u64_b, vec_all_geuv2di, 0, BT_OV_INT_UV2DI_BV2DI) ++OB_DEF_VAR (s390_vec_all_ge_dbl, vec_all_gev2df, 0, BT_OV_INT_V2DF_V2DF) ++ ++OB_DEF (s390_vec_all_gt, s390_vec_all_gt_s8_a,s390_vec_all_gt_dbl,B_VX, BT_FN_INT_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_all_gt_s8_a, vec_all_gtv16qi, 0, BT_OV_INT_V16QI_V16QI) ++OB_DEF_VAR (s390_vec_all_gt_s8_b, vec_all_gtv16qi, 0, BT_OV_INT_V16QI_BV16QI) ++OB_DEF_VAR (s390_vec_all_gt_b8_a, vec_all_gtuv16qi, 0, BT_OV_INT_BV16QI_BV16QI) ++OB_DEF_VAR (s390_vec_all_gt_b8_b, vec_all_gtv16qi, 0, BT_OV_INT_BV16QI_V16QI) ++OB_DEF_VAR (s390_vec_all_gt_b8_c, vec_all_gtuv16qi, 0, BT_OV_INT_BV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_all_gt_u8_a, vec_all_gtuv16qi, 0, BT_OV_INT_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_all_gt_u8_b, vec_all_gtuv16qi, 0, BT_OV_INT_UV16QI_BV16QI) ++OB_DEF_VAR (s390_vec_all_gt_s16_a, vec_all_gtv8hi, 0, BT_OV_INT_V8HI_V8HI) ++OB_DEF_VAR (s390_vec_all_gt_s16_b, vec_all_gtv8hi, 0, BT_OV_INT_V8HI_BV8HI) ++OB_DEF_VAR (s390_vec_all_gt_b16_a, vec_all_gtuv8hi, 0, BT_OV_INT_BV8HI_BV8HI) ++OB_DEF_VAR (s390_vec_all_gt_b16_b, vec_all_gtv8hi, 0, BT_OV_INT_BV8HI_V8HI) ++OB_DEF_VAR (s390_vec_all_gt_b16_c, vec_all_gtuv8hi, 0, BT_OV_INT_BV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_all_gt_u16_a, vec_all_gtuv8hi, 0, BT_OV_INT_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_all_gt_u16_b, vec_all_gtuv8hi, 0, BT_OV_INT_UV8HI_BV8HI) ++OB_DEF_VAR (s390_vec_all_gt_s32_a, vec_all_gtv4si, 0, BT_OV_INT_V4SI_V4SI) ++OB_DEF_VAR (s390_vec_all_gt_s32_b, vec_all_gtv4si, 0, BT_OV_INT_V4SI_BV4SI) ++OB_DEF_VAR (s390_vec_all_gt_b32_a, vec_all_gtuv4si, 0, BT_OV_INT_BV4SI_BV4SI) ++OB_DEF_VAR (s390_vec_all_gt_b32_b, vec_all_gtv4si, 0, BT_OV_INT_BV4SI_V4SI) ++OB_DEF_VAR (s390_vec_all_gt_b32_c, vec_all_gtuv4si, 0, BT_OV_INT_BV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_all_gt_u32_a, vec_all_gtuv4si, 0, BT_OV_INT_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_all_gt_u32_b, vec_all_gtuv4si, 0, BT_OV_INT_UV4SI_BV4SI) ++OB_DEF_VAR (s390_vec_all_gt_s64_a, vec_all_gtv2di, 0, BT_OV_INT_V2DI_V2DI) ++OB_DEF_VAR (s390_vec_all_gt_s64_b, vec_all_gtv2di, 0, BT_OV_INT_V2DI_BV2DI) ++OB_DEF_VAR (s390_vec_all_gt_b64_a, vec_all_gtuv2di, 0, BT_OV_INT_BV2DI_BV2DI) ++OB_DEF_VAR (s390_vec_all_gt_b64_b, vec_all_gtv2di, 0, BT_OV_INT_BV2DI_V2DI) ++OB_DEF_VAR (s390_vec_all_gt_b64_c, vec_all_gtuv2di, 0, BT_OV_INT_BV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_all_gt_u64_a, vec_all_gtuv2di, 0, BT_OV_INT_UV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_all_gt_u64_b, vec_all_gtuv2di, 0, BT_OV_INT_UV2DI_BV2DI) ++OB_DEF_VAR (s390_vec_all_gt_dbl, vec_all_gtv2df, 0, BT_OV_INT_V2DF_V2DF) ++ ++OB_DEF (s390_vec_all_le, s390_vec_all_le_s8_a,s390_vec_all_le_dbl,B_VX, BT_FN_INT_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_all_le_s8_a, vec_all_lev16qi, 0, BT_OV_INT_V16QI_V16QI) ++OB_DEF_VAR (s390_vec_all_le_s8_b, vec_all_lev16qi, 0, BT_OV_INT_V16QI_BV16QI) ++OB_DEF_VAR (s390_vec_all_le_b8_a, vec_all_leuv16qi, 0, BT_OV_INT_BV16QI_BV16QI) ++OB_DEF_VAR (s390_vec_all_le_b8_b, vec_all_lev16qi, 0, BT_OV_INT_BV16QI_V16QI) ++OB_DEF_VAR (s390_vec_all_le_b8_c, vec_all_leuv16qi, 0, BT_OV_INT_BV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_all_le_u8_a, vec_all_leuv16qi, 0, BT_OV_INT_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_all_le_u8_b, vec_all_leuv16qi, 0, BT_OV_INT_UV16QI_BV16QI) ++OB_DEF_VAR (s390_vec_all_le_s16_a, vec_all_lev8hi, 0, BT_OV_INT_V8HI_V8HI) ++OB_DEF_VAR (s390_vec_all_le_s16_b, vec_all_lev8hi, 0, BT_OV_INT_V8HI_BV8HI) ++OB_DEF_VAR (s390_vec_all_le_b16_a, vec_all_leuv8hi, 0, BT_OV_INT_BV8HI_BV8HI) ++OB_DEF_VAR (s390_vec_all_le_b16_b, vec_all_lev8hi, 0, BT_OV_INT_BV8HI_V8HI) ++OB_DEF_VAR (s390_vec_all_le_b16_c, vec_all_leuv8hi, 0, BT_OV_INT_BV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_all_le_u16_a, vec_all_leuv8hi, 0, BT_OV_INT_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_all_le_u16_b, vec_all_leuv8hi, 0, BT_OV_INT_UV8HI_BV8HI) ++OB_DEF_VAR (s390_vec_all_le_s32_a, vec_all_lev4si, 0, BT_OV_INT_V4SI_V4SI) ++OB_DEF_VAR (s390_vec_all_le_s32_b, vec_all_lev4si, 0, BT_OV_INT_V4SI_BV4SI) ++OB_DEF_VAR (s390_vec_all_le_b32_a, vec_all_leuv4si, 0, BT_OV_INT_BV4SI_BV4SI) ++OB_DEF_VAR (s390_vec_all_le_b32_b, vec_all_lev4si, 0, BT_OV_INT_BV4SI_V4SI) ++OB_DEF_VAR (s390_vec_all_le_b32_c, vec_all_leuv4si, 0, BT_OV_INT_BV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_all_le_u32_a, vec_all_leuv4si, 0, BT_OV_INT_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_all_le_u32_b, vec_all_leuv4si, 0, BT_OV_INT_UV4SI_BV4SI) ++OB_DEF_VAR (s390_vec_all_le_s64_a, vec_all_lev2di, 0, BT_OV_INT_V2DI_V2DI) ++OB_DEF_VAR (s390_vec_all_le_s64_b, vec_all_lev2di, 0, BT_OV_INT_V2DI_BV2DI) ++OB_DEF_VAR (s390_vec_all_le_b64_a, vec_all_leuv2di, 0, BT_OV_INT_BV2DI_BV2DI) ++OB_DEF_VAR (s390_vec_all_le_b64_b, vec_all_lev2di, 0, BT_OV_INT_BV2DI_V2DI) ++OB_DEF_VAR (s390_vec_all_le_b64_c, vec_all_leuv2di, 0, BT_OV_INT_BV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_all_le_u64_a, vec_all_leuv2di, 0, BT_OV_INT_UV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_all_le_u64_b, vec_all_leuv2di, 0, BT_OV_INT_UV2DI_BV2DI) ++OB_DEF_VAR (s390_vec_all_le_dbl, vec_all_lev2df, 0, BT_OV_INT_V2DF_V2DF) ++ ++OB_DEF (s390_vec_all_lt, s390_vec_all_lt_s8_a,s390_vec_all_lt_dbl,B_VX, BT_FN_INT_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_all_lt_s8_a, vec_all_ltv16qi, 0, BT_OV_INT_V16QI_V16QI) ++OB_DEF_VAR (s390_vec_all_lt_s8_b, vec_all_ltv16qi, 0, BT_OV_INT_V16QI_BV16QI) ++OB_DEF_VAR (s390_vec_all_lt_b8_a, vec_all_ltuv16qi, 0, BT_OV_INT_BV16QI_BV16QI) ++OB_DEF_VAR (s390_vec_all_lt_b8_b, vec_all_ltv16qi, 0, BT_OV_INT_BV16QI_V16QI) ++OB_DEF_VAR (s390_vec_all_lt_b8_c, vec_all_ltuv16qi, 0, BT_OV_INT_BV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_all_lt_u8_a, vec_all_ltuv16qi, 0, BT_OV_INT_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_all_lt_u8_b, vec_all_ltuv16qi, 0, BT_OV_INT_UV16QI_BV16QI) ++OB_DEF_VAR (s390_vec_all_lt_s16_a, vec_all_ltv8hi, 0, BT_OV_INT_V8HI_V8HI) ++OB_DEF_VAR (s390_vec_all_lt_s16_b, vec_all_ltv8hi, 0, BT_OV_INT_V8HI_BV8HI) ++OB_DEF_VAR (s390_vec_all_lt_b16_a, vec_all_ltuv8hi, 0, BT_OV_INT_BV8HI_BV8HI) ++OB_DEF_VAR (s390_vec_all_lt_b16_b, vec_all_ltv8hi, 0, BT_OV_INT_BV8HI_V8HI) ++OB_DEF_VAR (s390_vec_all_lt_b16_c, vec_all_ltuv8hi, 0, BT_OV_INT_BV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_all_lt_u16_a, vec_all_ltuv8hi, 0, BT_OV_INT_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_all_lt_u16_b, vec_all_ltuv8hi, 0, BT_OV_INT_UV8HI_BV8HI) ++OB_DEF_VAR (s390_vec_all_lt_s32_a, vec_all_ltv4si, 0, BT_OV_INT_V4SI_V4SI) ++OB_DEF_VAR (s390_vec_all_lt_s32_b, vec_all_ltv4si, 0, BT_OV_INT_V4SI_BV4SI) ++OB_DEF_VAR (s390_vec_all_lt_b32_a, vec_all_ltuv4si, 0, BT_OV_INT_BV4SI_BV4SI) ++OB_DEF_VAR (s390_vec_all_lt_b32_b, vec_all_ltv4si, 0, BT_OV_INT_BV4SI_V4SI) ++OB_DEF_VAR (s390_vec_all_lt_b32_c, vec_all_ltuv4si, 0, BT_OV_INT_BV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_all_lt_u32_a, vec_all_ltuv4si, 0, BT_OV_INT_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_all_lt_u32_b, vec_all_ltuv4si, 0, BT_OV_INT_UV4SI_BV4SI) ++OB_DEF_VAR (s390_vec_all_lt_s64_a, vec_all_ltv2di, 0, BT_OV_INT_V2DI_V2DI) ++OB_DEF_VAR (s390_vec_all_lt_s64_b, vec_all_ltv2di, 0, BT_OV_INT_V2DI_BV2DI) ++OB_DEF_VAR (s390_vec_all_lt_b64_a, vec_all_ltuv2di, 0, BT_OV_INT_BV2DI_BV2DI) ++OB_DEF_VAR (s390_vec_all_lt_b64_b, vec_all_ltv2di, 0, BT_OV_INT_BV2DI_V2DI) ++OB_DEF_VAR (s390_vec_all_lt_b64_c, vec_all_ltuv2di, 0, BT_OV_INT_BV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_all_lt_u64_a, vec_all_ltuv2di, 0, BT_OV_INT_UV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_all_lt_u64_b, vec_all_ltuv2di, 0, BT_OV_INT_UV2DI_BV2DI) ++OB_DEF_VAR (s390_vec_all_lt_dbl, vec_all_ltv2df, 0, BT_OV_INT_V2DF_V2DF) ++ ++B_DEF (vec_any_eqv16qi, vec_any_eqv16qi, 0, B_VX | B_INT, 0, BT_FN_INT_UV16QI_UV16QI) ++B_DEF (vec_any_eqv8hi, vec_any_eqv8hi, 0, B_VX | B_INT, 0, BT_FN_INT_UV8HI_UV8HI) ++B_DEF (vec_any_eqv4si, vec_any_eqv4si, 0, B_VX | B_INT, 0, BT_FN_INT_UV4SI_UV4SI) ++B_DEF (vec_any_eqv2di, vec_any_eqv2di, 0, B_VX | B_INT, 0, BT_FN_INT_UV2DI_UV2DI) ++B_DEF (vec_any_eqv2df, vec_any_eqv2df, 0, B_VX | B_INT, 0, BT_FN_INT_V2DF_V2DF) ++B_DEF (vec_any_nev16qi, vec_any_nev16qi, 0, B_VX | B_INT, 0, BT_FN_INT_UV16QI_UV16QI) ++B_DEF (vec_any_nev8hi, vec_any_nev8hi, 0, B_VX | B_INT, 0, BT_FN_INT_UV8HI_UV8HI) ++B_DEF (vec_any_nev4si, vec_any_nev4si, 0, B_VX | B_INT, 0, BT_FN_INT_UV4SI_UV4SI) ++B_DEF (vec_any_nev2di, vec_any_nev2di, 0, B_VX | B_INT, 0, BT_FN_INT_UV2DI_UV2DI) ++B_DEF (vec_any_nev2df, vec_any_nev2df, 0, B_VX | B_INT, 0, BT_FN_INT_V2DF_V2DF) ++B_DEF (vec_any_gev16qi, vec_any_gev16qi, 0, B_VX | B_INT, 0, BT_FN_INT_V16QI_V16QI) ++B_DEF (vec_any_geuv16qi, vec_any_geuv16qi, 0, B_VX | B_INT, 0, BT_FN_INT_UV16QI_UV16QI) ++B_DEF (vec_any_gev8hi, vec_any_gev8hi, 0, B_VX | B_INT, 0, BT_FN_INT_V8HI_V8HI) ++B_DEF (vec_any_geuv8hi, vec_any_geuv8hi, 0, B_VX | B_INT, 0, BT_FN_INT_UV8HI_UV8HI) ++B_DEF (vec_any_gev4si, vec_any_gev4si, 0, B_VX | B_INT, 0, BT_FN_INT_V4SI_V4SI) ++B_DEF (vec_any_geuv4si, vec_any_geuv4si, 0, B_VX | B_INT, 0, BT_FN_INT_UV4SI_UV4SI) ++B_DEF (vec_any_gev2di, vec_any_gev2di, 0, B_VX | B_INT, 0, BT_FN_INT_V2DI_V2DI) ++B_DEF (vec_any_geuv2di, vec_any_geuv2di, 0, B_VX | B_INT, 0, BT_FN_INT_UV2DI_UV2DI) ++B_DEF (vec_any_gev2df, vec_any_gev2df, 0, B_VX | B_INT, 0, BT_FN_INT_V2DF_V2DF) ++B_DEF (vec_any_gtv16qi, vec_any_gtv16qi, 0, B_VX | B_INT, 0, BT_FN_INT_V16QI_V16QI) ++B_DEF (vec_any_gtuv16qi, vec_any_gtuv16qi, 0, B_VX | B_INT, 0, BT_FN_INT_UV16QI_UV16QI) ++B_DEF (vec_any_gtv8hi, vec_any_gtv8hi, 0, B_VX | B_INT, 0, BT_FN_INT_V8HI_V8HI) ++B_DEF (vec_any_gtuv8hi, vec_any_gtuv8hi, 0, B_VX | B_INT, 0, BT_FN_INT_UV8HI_UV8HI) ++B_DEF (vec_any_gtv4si, vec_any_gtv4si, 0, B_VX | B_INT, 0, BT_FN_INT_V4SI_V4SI) ++B_DEF (vec_any_gtuv4si, vec_any_gtuv4si, 0, B_VX | B_INT, 0, BT_FN_INT_UV4SI_UV4SI) ++B_DEF (vec_any_gtv2di, vec_any_gtv2di, 0, B_VX | B_INT, 0, BT_FN_INT_V2DI_V2DI) ++B_DEF (vec_any_gtuv2di, vec_any_gtuv2di, 0, B_VX | B_INT, 0, BT_FN_INT_UV2DI_UV2DI) ++B_DEF (vec_any_gtv2df, vec_any_gtv2df, 0, B_VX | B_INT, 0, BT_FN_INT_V2DF_V2DF) ++B_DEF (vec_any_lev16qi, vec_any_lev16qi, 0, B_VX | B_INT, 0, BT_FN_INT_V16QI_V16QI) ++B_DEF (vec_any_leuv16qi, vec_any_leuv16qi, 0, B_VX | B_INT, 0, BT_FN_INT_UV16QI_UV16QI) ++B_DEF (vec_any_lev8hi, vec_any_lev8hi, 0, B_VX | B_INT, 0, BT_FN_INT_V8HI_V8HI) ++B_DEF (vec_any_leuv8hi, vec_any_leuv8hi, 0, B_VX | B_INT, 0, BT_FN_INT_UV8HI_UV8HI) ++B_DEF (vec_any_lev4si, vec_any_lev4si, 0, B_VX | B_INT, 0, BT_FN_INT_V4SI_V4SI) ++B_DEF (vec_any_leuv4si, vec_any_leuv4si, 0, B_VX | B_INT, 0, BT_FN_INT_UV4SI_UV4SI) ++B_DEF (vec_any_lev2di, vec_any_lev2di, 0, B_VX | B_INT, 0, BT_FN_INT_V2DI_V2DI) ++B_DEF (vec_any_leuv2di, vec_any_leuv2di, 0, B_VX | B_INT, 0, BT_FN_INT_UV2DI_UV2DI) ++B_DEF (vec_any_lev2df, vec_any_lev2df, 0, B_VX | B_INT, 0, BT_FN_INT_V2DF_V2DF) ++B_DEF (vec_any_ltv16qi, vec_any_ltv16qi, 0, B_VX | B_INT, 0, BT_FN_INT_V16QI_V16QI) ++B_DEF (vec_any_ltuv16qi, vec_any_ltuv16qi, 0, B_VX | B_INT, 0, BT_FN_INT_UV16QI_UV16QI) ++B_DEF (vec_any_ltv8hi, vec_any_ltv8hi, 0, B_VX | B_INT, 0, BT_FN_INT_V8HI_V8HI) ++B_DEF (vec_any_ltuv8hi, vec_any_ltuv8hi, 0, B_VX | B_INT, 0, BT_FN_INT_UV8HI_UV8HI) ++B_DEF (vec_any_ltv4si, vec_any_ltv4si, 0, B_VX | B_INT, 0, BT_FN_INT_V4SI_V4SI) ++B_DEF (vec_any_ltuv4si, vec_any_ltuv4si, 0, B_VX | B_INT, 0, BT_FN_INT_UV4SI_UV4SI) ++B_DEF (vec_any_ltv2di, vec_any_ltv2di, 0, B_VX | B_INT, 0, BT_FN_INT_V2DI_V2DI) ++B_DEF (vec_any_ltuv2di, vec_any_ltuv2di, 0, B_VX | B_INT, 0, BT_FN_INT_UV2DI_UV2DI) ++B_DEF (vec_any_ltv2df, vec_any_ltv2df, 0, B_VX | B_INT, 0, BT_FN_INT_V2DF_V2DF) ++ ++OB_DEF (s390_vec_any_eq, s390_vec_any_eq_s8_a,s390_vec_any_eq_dbl,B_VX, BT_FN_INT_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_any_eq_s8_a, vec_any_eqv16qi, 0, BT_OV_INT_V16QI_V16QI) ++OB_DEF_VAR (s390_vec_any_eq_s8_b, vec_any_eqv16qi, 0, BT_OV_INT_V16QI_BV16QI) ++OB_DEF_VAR (s390_vec_any_eq_b8_a, vec_any_eqv16qi, 0, BT_OV_INT_BV16QI_BV16QI) ++OB_DEF_VAR (s390_vec_any_eq_b8_b, vec_any_eqv16qi, 0, BT_OV_INT_BV16QI_V16QI) ++OB_DEF_VAR (s390_vec_any_eq_b8_c, vec_any_eqv16qi, 0, BT_OV_INT_BV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_any_eq_u8_a, vec_any_eqv16qi, 0, BT_OV_INT_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_any_eq_u8_b, vec_any_eqv16qi, 0, BT_OV_INT_UV16QI_BV16QI) ++OB_DEF_VAR (s390_vec_any_eq_s16_a, vec_any_eqv8hi, 0, BT_OV_INT_V8HI_V8HI) ++OB_DEF_VAR (s390_vec_any_eq_s16_b, vec_any_eqv8hi, 0, BT_OV_INT_V8HI_BV8HI) ++OB_DEF_VAR (s390_vec_any_eq_b16_a, vec_any_eqv8hi, 0, BT_OV_INT_BV8HI_BV8HI) ++OB_DEF_VAR (s390_vec_any_eq_b16_b, vec_any_eqv8hi, 0, BT_OV_INT_BV8HI_V8HI) ++OB_DEF_VAR (s390_vec_any_eq_b16_c, vec_any_eqv8hi, 0, BT_OV_INT_BV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_any_eq_u16_a, vec_any_eqv8hi, 0, BT_OV_INT_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_any_eq_u16_b, vec_any_eqv8hi, 0, BT_OV_INT_UV8HI_BV8HI) ++OB_DEF_VAR (s390_vec_any_eq_s32_a, vec_any_eqv4si, 0, BT_OV_INT_V4SI_V4SI) ++OB_DEF_VAR (s390_vec_any_eq_s32_b, vec_any_eqv4si, 0, BT_OV_INT_V4SI_BV4SI) ++OB_DEF_VAR (s390_vec_any_eq_b32_a, vec_any_eqv4si, 0, BT_OV_INT_BV4SI_BV4SI) ++OB_DEF_VAR (s390_vec_any_eq_b32_b, vec_any_eqv4si, 0, BT_OV_INT_BV4SI_V4SI) ++OB_DEF_VAR (s390_vec_any_eq_b32_c, vec_any_eqv4si, 0, BT_OV_INT_BV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_any_eq_u32_a, vec_any_eqv4si, 0, BT_OV_INT_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_any_eq_u32_b, vec_any_eqv4si, 0, BT_OV_INT_UV4SI_BV4SI) ++OB_DEF_VAR (s390_vec_any_eq_s64_a, vec_any_eqv2di, 0, BT_OV_INT_V2DI_V2DI) ++OB_DEF_VAR (s390_vec_any_eq_s64_b, vec_any_eqv2di, 0, BT_OV_INT_V2DI_BV2DI) ++OB_DEF_VAR (s390_vec_any_eq_b64_a, vec_any_eqv2di, 0, BT_OV_INT_BV2DI_BV2DI) ++OB_DEF_VAR (s390_vec_any_eq_b64_b, vec_any_eqv2di, 0, BT_OV_INT_BV2DI_V2DI) ++OB_DEF_VAR (s390_vec_any_eq_b64_c, vec_any_eqv2di, 0, BT_OV_INT_BV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_any_eq_u64_a, vec_any_eqv2di, 0, BT_OV_INT_UV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_any_eq_u64_b, vec_any_eqv2di, 0, BT_OV_INT_UV2DI_BV2DI) ++OB_DEF_VAR (s390_vec_any_eq_dbl, vec_any_eqv2df, 0, BT_OV_INT_V2DF_V2DF) ++ ++OB_DEF (s390_vec_any_ne, s390_vec_any_ne_s8_a,s390_vec_any_ne_dbl,B_VX, BT_FN_INT_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_any_ne_s8_a, vec_any_nev16qi, 0, BT_OV_INT_V16QI_V16QI) ++OB_DEF_VAR (s390_vec_any_ne_s8_b, vec_any_nev16qi, 0, BT_OV_INT_V16QI_BV16QI) ++OB_DEF_VAR (s390_vec_any_ne_b8_a, vec_any_nev16qi, 0, BT_OV_INT_BV16QI_BV16QI) ++OB_DEF_VAR (s390_vec_any_ne_b8_b, vec_any_nev16qi, 0, BT_OV_INT_BV16QI_V16QI) ++OB_DEF_VAR (s390_vec_any_ne_b8_c, vec_any_nev16qi, 0, BT_OV_INT_BV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_any_ne_u8_a, vec_any_nev16qi, 0, BT_OV_INT_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_any_ne_u8_b, vec_any_nev16qi, 0, BT_OV_INT_UV16QI_BV16QI) ++OB_DEF_VAR (s390_vec_any_ne_s16_a, vec_any_nev8hi, 0, BT_OV_INT_V8HI_V8HI) ++OB_DEF_VAR (s390_vec_any_ne_s16_b, vec_any_nev8hi, 0, BT_OV_INT_V8HI_BV8HI) ++OB_DEF_VAR (s390_vec_any_ne_b16_a, vec_any_nev8hi, 0, BT_OV_INT_BV8HI_BV8HI) ++OB_DEF_VAR (s390_vec_any_ne_b16_b, vec_any_nev8hi, 0, BT_OV_INT_BV8HI_V8HI) ++OB_DEF_VAR (s390_vec_any_ne_b16_c, vec_any_nev8hi, 0, BT_OV_INT_BV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_any_ne_u16_a, vec_any_nev8hi, 0, BT_OV_INT_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_any_ne_u16_b, vec_any_nev8hi, 0, BT_OV_INT_UV8HI_BV8HI) ++OB_DEF_VAR (s390_vec_any_ne_s32_a, vec_any_nev4si, 0, BT_OV_INT_V4SI_V4SI) ++OB_DEF_VAR (s390_vec_any_ne_s32_b, vec_any_nev4si, 0, BT_OV_INT_V4SI_BV4SI) ++OB_DEF_VAR (s390_vec_any_ne_b32_a, vec_any_nev4si, 0, BT_OV_INT_BV4SI_BV4SI) ++OB_DEF_VAR (s390_vec_any_ne_b32_b, vec_any_nev4si, 0, BT_OV_INT_BV4SI_V4SI) ++OB_DEF_VAR (s390_vec_any_ne_b32_c, vec_any_nev4si, 0, BT_OV_INT_BV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_any_ne_u32_a, vec_any_nev4si, 0, BT_OV_INT_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_any_ne_u32_b, vec_any_nev4si, 0, BT_OV_INT_UV4SI_BV4SI) ++OB_DEF_VAR (s390_vec_any_ne_s64_a, vec_any_nev2di, 0, BT_OV_INT_V2DI_V2DI) ++OB_DEF_VAR (s390_vec_any_ne_s64_b, vec_any_nev2di, 0, BT_OV_INT_V2DI_BV2DI) ++OB_DEF_VAR (s390_vec_any_ne_b64_a, vec_any_nev2di, 0, BT_OV_INT_BV2DI_BV2DI) ++OB_DEF_VAR (s390_vec_any_ne_b64_b, vec_any_nev2di, 0, BT_OV_INT_BV2DI_V2DI) ++OB_DEF_VAR (s390_vec_any_ne_b64_c, vec_any_nev2di, 0, BT_OV_INT_BV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_any_ne_u64_a, vec_any_nev2di, 0, BT_OV_INT_UV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_any_ne_u64_b, vec_any_nev2di, 0, BT_OV_INT_UV2DI_BV2DI) ++OB_DEF_VAR (s390_vec_any_ne_dbl, vec_any_nev2df, 0, BT_OV_INT_V2DF_V2DF) ++ ++OB_DEF (s390_vec_any_ge, s390_vec_any_ge_s8_a,s390_vec_any_ge_dbl,B_VX, BT_FN_INT_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_any_ge_s8_a, vec_any_gev16qi, 0, BT_OV_INT_V16QI_V16QI) ++OB_DEF_VAR (s390_vec_any_ge_s8_b, vec_any_gev16qi, 0, BT_OV_INT_V16QI_BV16QI) ++OB_DEF_VAR (s390_vec_any_ge_b8_a, vec_any_geuv16qi, 0, BT_OV_INT_BV16QI_BV16QI) ++OB_DEF_VAR (s390_vec_any_ge_b8_b, vec_any_gev16qi, 0, BT_OV_INT_BV16QI_V16QI) ++OB_DEF_VAR (s390_vec_any_ge_b8_c, vec_any_geuv16qi, 0, BT_OV_INT_BV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_any_ge_u8_a, vec_any_geuv16qi, 0, BT_OV_INT_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_any_ge_u8_b, vec_any_geuv16qi, 0, BT_OV_INT_UV16QI_BV16QI) ++OB_DEF_VAR (s390_vec_any_ge_s16_a, vec_any_gev8hi, 0, BT_OV_INT_V8HI_V8HI) ++OB_DEF_VAR (s390_vec_any_ge_s16_b, vec_any_gev8hi, 0, BT_OV_INT_V8HI_BV8HI) ++OB_DEF_VAR (s390_vec_any_ge_b16_a, vec_any_geuv8hi, 0, BT_OV_INT_BV8HI_BV8HI) ++OB_DEF_VAR (s390_vec_any_ge_b16_b, vec_any_gev8hi, 0, BT_OV_INT_BV8HI_V8HI) ++OB_DEF_VAR (s390_vec_any_ge_b16_c, vec_any_geuv8hi, 0, BT_OV_INT_BV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_any_ge_u16_a, vec_any_geuv8hi, 0, BT_OV_INT_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_any_ge_u16_b, vec_any_geuv8hi, 0, BT_OV_INT_UV8HI_BV8HI) ++OB_DEF_VAR (s390_vec_any_ge_s32_a, vec_any_gev4si, 0, BT_OV_INT_V4SI_V4SI) ++OB_DEF_VAR (s390_vec_any_ge_s32_b, vec_any_gev4si, 0, BT_OV_INT_V4SI_BV4SI) ++OB_DEF_VAR (s390_vec_any_ge_b32_a, vec_any_geuv4si, 0, BT_OV_INT_BV4SI_BV4SI) ++OB_DEF_VAR (s390_vec_any_ge_b32_b, vec_any_gev4si, 0, BT_OV_INT_BV4SI_V4SI) ++OB_DEF_VAR (s390_vec_any_ge_b32_c, vec_any_geuv4si, 0, BT_OV_INT_BV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_any_ge_u32_a, vec_any_geuv4si, 0, BT_OV_INT_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_any_ge_u32_b, vec_any_geuv4si, 0, BT_OV_INT_UV4SI_BV4SI) ++OB_DEF_VAR (s390_vec_any_ge_s64_a, vec_any_gev2di, 0, BT_OV_INT_V2DI_V2DI) ++OB_DEF_VAR (s390_vec_any_ge_s64_b, vec_any_gev2di, 0, BT_OV_INT_V2DI_BV2DI) ++OB_DEF_VAR (s390_vec_any_ge_b64_a, vec_any_geuv2di, 0, BT_OV_INT_BV2DI_BV2DI) ++OB_DEF_VAR (s390_vec_any_ge_b64_b, vec_any_gev2di, 0, BT_OV_INT_BV2DI_V2DI) ++OB_DEF_VAR (s390_vec_any_ge_b64_c, vec_any_geuv2di, 0, BT_OV_INT_BV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_any_ge_u64_a, vec_any_geuv2di, 0, BT_OV_INT_UV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_any_ge_u64_b, vec_any_geuv2di, 0, BT_OV_INT_UV2DI_BV2DI) ++OB_DEF_VAR (s390_vec_any_ge_dbl, vec_any_gev2df, 0, BT_OV_INT_V2DF_V2DF) ++ ++OB_DEF (s390_vec_any_gt, s390_vec_any_gt_s8_a,s390_vec_any_gt_dbl,B_VX, BT_FN_INT_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_any_gt_s8_a, vec_any_gtv16qi, 0, BT_OV_INT_V16QI_V16QI) ++OB_DEF_VAR (s390_vec_any_gt_s8_b, vec_any_gtv16qi, 0, BT_OV_INT_V16QI_BV16QI) ++OB_DEF_VAR (s390_vec_any_gt_b8_a, vec_any_gtuv16qi, 0, BT_OV_INT_BV16QI_BV16QI) ++OB_DEF_VAR (s390_vec_any_gt_b8_b, vec_any_gtv16qi, 0, BT_OV_INT_BV16QI_V16QI) ++OB_DEF_VAR (s390_vec_any_gt_b8_c, vec_any_gtuv16qi, 0, BT_OV_INT_BV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_any_gt_u8_a, vec_any_gtuv16qi, 0, BT_OV_INT_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_any_gt_u8_b, vec_any_gtuv16qi, 0, BT_OV_INT_UV16QI_BV16QI) ++OB_DEF_VAR (s390_vec_any_gt_s16_a, vec_any_gtv8hi, 0, BT_OV_INT_V8HI_V8HI) ++OB_DEF_VAR (s390_vec_any_gt_s16_b, vec_any_gtv8hi, 0, BT_OV_INT_V8HI_BV8HI) ++OB_DEF_VAR (s390_vec_any_gt_b16_a, vec_any_gtuv8hi, 0, BT_OV_INT_BV8HI_BV8HI) ++OB_DEF_VAR (s390_vec_any_gt_b16_b, vec_any_gtv8hi, 0, BT_OV_INT_BV8HI_V8HI) ++OB_DEF_VAR (s390_vec_any_gt_b16_c, vec_any_gtuv8hi, 0, BT_OV_INT_BV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_any_gt_u16_a, vec_any_gtuv8hi, 0, BT_OV_INT_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_any_gt_u16_b, vec_any_gtuv8hi, 0, BT_OV_INT_UV8HI_BV8HI) ++OB_DEF_VAR (s390_vec_any_gt_s32_a, vec_any_gtv4si, 0, BT_OV_INT_V4SI_V4SI) ++OB_DEF_VAR (s390_vec_any_gt_s32_b, vec_any_gtv4si, 0, BT_OV_INT_V4SI_BV4SI) ++OB_DEF_VAR (s390_vec_any_gt_b32_a, vec_any_gtuv4si, 0, BT_OV_INT_BV4SI_BV4SI) ++OB_DEF_VAR (s390_vec_any_gt_b32_b, vec_any_gtv4si, 0, BT_OV_INT_BV4SI_V4SI) ++OB_DEF_VAR (s390_vec_any_gt_b32_c, vec_any_gtuv4si, 0, BT_OV_INT_BV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_any_gt_u32_a, vec_any_gtuv4si, 0, BT_OV_INT_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_any_gt_u32_b, vec_any_gtuv4si, 0, BT_OV_INT_UV4SI_BV4SI) ++OB_DEF_VAR (s390_vec_any_gt_s64_a, vec_any_gtv2di, 0, BT_OV_INT_V2DI_V2DI) ++OB_DEF_VAR (s390_vec_any_gt_s64_b, vec_any_gtv2di, 0, BT_OV_INT_V2DI_BV2DI) ++OB_DEF_VAR (s390_vec_any_gt_b64_a, vec_any_gtuv2di, 0, BT_OV_INT_BV2DI_BV2DI) ++OB_DEF_VAR (s390_vec_any_gt_b64_b, vec_any_gtv2di, 0, BT_OV_INT_BV2DI_V2DI) ++OB_DEF_VAR (s390_vec_any_gt_b64_c, vec_any_gtuv2di, 0, BT_OV_INT_BV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_any_gt_u64_a, vec_any_gtuv2di, 0, BT_OV_INT_UV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_any_gt_u64_b, vec_any_gtuv2di, 0, BT_OV_INT_UV2DI_BV2DI) ++OB_DEF_VAR (s390_vec_any_gt_dbl, vec_any_gtv2df, 0, BT_OV_INT_V2DF_V2DF) ++ ++OB_DEF (s390_vec_any_le, s390_vec_any_le_s8_a,s390_vec_any_le_dbl,B_VX, BT_FN_INT_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_any_le_s8_a, vec_any_lev16qi, 0, BT_OV_INT_V16QI_V16QI) ++OB_DEF_VAR (s390_vec_any_le_s8_b, vec_any_lev16qi, 0, BT_OV_INT_V16QI_BV16QI) ++OB_DEF_VAR (s390_vec_any_le_b8_a, vec_any_leuv16qi, 0, BT_OV_INT_BV16QI_BV16QI) ++OB_DEF_VAR (s390_vec_any_le_b8_b, vec_any_lev16qi, 0, BT_OV_INT_BV16QI_V16QI) ++OB_DEF_VAR (s390_vec_any_le_b8_c, vec_any_leuv16qi, 0, BT_OV_INT_BV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_any_le_u8_a, vec_any_leuv16qi, 0, BT_OV_INT_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_any_le_u8_b, vec_any_leuv16qi, 0, BT_OV_INT_UV16QI_BV16QI) ++OB_DEF_VAR (s390_vec_any_le_s16_a, vec_any_lev8hi, 0, BT_OV_INT_V8HI_V8HI) ++OB_DEF_VAR (s390_vec_any_le_s16_b, vec_any_lev8hi, 0, BT_OV_INT_V8HI_BV8HI) ++OB_DEF_VAR (s390_vec_any_le_b16_a, vec_any_leuv8hi, 0, BT_OV_INT_BV8HI_BV8HI) ++OB_DEF_VAR (s390_vec_any_le_b16_b, vec_any_lev8hi, 0, BT_OV_INT_BV8HI_V8HI) ++OB_DEF_VAR (s390_vec_any_le_b16_c, vec_any_leuv8hi, 0, BT_OV_INT_BV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_any_le_u16_a, vec_any_leuv8hi, 0, BT_OV_INT_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_any_le_u16_b, vec_any_leuv8hi, 0, BT_OV_INT_UV8HI_BV8HI) ++OB_DEF_VAR (s390_vec_any_le_s32_a, vec_any_lev4si, 0, BT_OV_INT_V4SI_V4SI) ++OB_DEF_VAR (s390_vec_any_le_s32_b, vec_any_lev4si, 0, BT_OV_INT_V4SI_BV4SI) ++OB_DEF_VAR (s390_vec_any_le_b32_a, vec_any_leuv4si, 0, BT_OV_INT_BV4SI_BV4SI) ++OB_DEF_VAR (s390_vec_any_le_b32_b, vec_any_lev4si, 0, BT_OV_INT_BV4SI_V4SI) ++OB_DEF_VAR (s390_vec_any_le_b32_c, vec_any_leuv4si, 0, BT_OV_INT_BV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_any_le_u32_a, vec_any_leuv4si, 0, BT_OV_INT_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_any_le_u32_b, vec_any_leuv4si, 0, BT_OV_INT_UV4SI_BV4SI) ++OB_DEF_VAR (s390_vec_any_le_s64_a, vec_any_lev2di, 0, BT_OV_INT_V2DI_V2DI) ++OB_DEF_VAR (s390_vec_any_le_s64_b, vec_any_lev2di, 0, BT_OV_INT_V2DI_BV2DI) ++OB_DEF_VAR (s390_vec_any_le_b64_a, vec_any_leuv2di, 0, BT_OV_INT_BV2DI_BV2DI) ++OB_DEF_VAR (s390_vec_any_le_b64_b, vec_any_lev2di, 0, BT_OV_INT_BV2DI_V2DI) ++OB_DEF_VAR (s390_vec_any_le_b64_c, vec_any_leuv2di, 0, BT_OV_INT_BV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_any_le_u64_a, vec_any_leuv2di, 0, BT_OV_INT_UV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_any_le_u64_b, vec_any_leuv2di, 0, BT_OV_INT_UV2DI_BV2DI) ++OB_DEF_VAR (s390_vec_any_le_dbl, vec_any_lev2df, 0, BT_OV_INT_V2DF_V2DF) ++ ++OB_DEF (s390_vec_any_lt, s390_vec_any_lt_s8_a,s390_vec_any_lt_dbl,B_VX, BT_FN_INT_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_any_lt_s8_a, vec_any_ltv16qi, 0, BT_OV_INT_V16QI_V16QI) ++OB_DEF_VAR (s390_vec_any_lt_s8_b, vec_any_ltv16qi, 0, BT_OV_INT_V16QI_BV16QI) ++OB_DEF_VAR (s390_vec_any_lt_b8_a, vec_any_ltuv16qi, 0, BT_OV_INT_BV16QI_BV16QI) ++OB_DEF_VAR (s390_vec_any_lt_b8_b, vec_any_ltv16qi, 0, BT_OV_INT_BV16QI_V16QI) ++OB_DEF_VAR (s390_vec_any_lt_b8_c, vec_any_ltuv16qi, 0, BT_OV_INT_BV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_any_lt_u8_a, vec_any_ltuv16qi, 0, BT_OV_INT_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_any_lt_u8_b, vec_any_ltuv16qi, 0, BT_OV_INT_UV16QI_BV16QI) ++OB_DEF_VAR (s390_vec_any_lt_s16_a, vec_any_ltv8hi, 0, BT_OV_INT_V8HI_V8HI) ++OB_DEF_VAR (s390_vec_any_lt_s16_b, vec_any_ltv8hi, 0, BT_OV_INT_V8HI_BV8HI) ++OB_DEF_VAR (s390_vec_any_lt_b16_a, vec_any_ltuv8hi, 0, BT_OV_INT_BV8HI_BV8HI) ++OB_DEF_VAR (s390_vec_any_lt_b16_b, vec_any_ltv8hi, 0, BT_OV_INT_BV8HI_V8HI) ++OB_DEF_VAR (s390_vec_any_lt_b16_c, vec_any_ltuv8hi, 0, BT_OV_INT_BV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_any_lt_u16_a, vec_any_ltuv8hi, 0, BT_OV_INT_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_any_lt_u16_b, vec_any_ltuv8hi, 0, BT_OV_INT_UV8HI_BV8HI) ++OB_DEF_VAR (s390_vec_any_lt_s32_a, vec_any_ltv4si, 0, BT_OV_INT_V4SI_V4SI) ++OB_DEF_VAR (s390_vec_any_lt_s32_b, vec_any_ltv4si, 0, BT_OV_INT_V4SI_BV4SI) ++OB_DEF_VAR (s390_vec_any_lt_b32_a, vec_any_ltuv4si, 0, BT_OV_INT_BV4SI_BV4SI) ++OB_DEF_VAR (s390_vec_any_lt_b32_b, vec_any_ltv4si, 0, BT_OV_INT_BV4SI_V4SI) ++OB_DEF_VAR (s390_vec_any_lt_b32_c, vec_any_ltuv4si, 0, BT_OV_INT_BV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_any_lt_u32_a, vec_any_ltuv4si, 0, BT_OV_INT_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_any_lt_u32_b, vec_any_ltuv4si, 0, BT_OV_INT_UV4SI_BV4SI) ++OB_DEF_VAR (s390_vec_any_lt_s64_a, vec_any_ltv2di, 0, BT_OV_INT_V2DI_V2DI) ++OB_DEF_VAR (s390_vec_any_lt_s64_b, vec_any_ltv2di, 0, BT_OV_INT_V2DI_BV2DI) ++OB_DEF_VAR (s390_vec_any_lt_b64_a, vec_any_ltuv2di, 0, BT_OV_INT_BV2DI_BV2DI) ++OB_DEF_VAR (s390_vec_any_lt_b64_b, vec_any_ltv2di, 0, BT_OV_INT_BV2DI_V2DI) ++OB_DEF_VAR (s390_vec_any_lt_b64_c, vec_any_ltuv2di, 0, BT_OV_INT_BV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_any_lt_u64_a, vec_any_ltuv2di, 0, BT_OV_INT_UV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_any_lt_u64_b, vec_any_ltuv2di, 0, BT_OV_INT_UV2DI_BV2DI) ++OB_DEF_VAR (s390_vec_any_lt_dbl, vec_any_ltv2df, 0, BT_OV_INT_V2DF_V2DF) ++ ++OB_DEF (s390_vec_cmpeq, s390_vec_cmpeq_s8, s390_vec_cmpeq_dbl, B_VX, BT_FN_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_cmpeq_s8, s390_vceqb, 0, BT_OV_BV16QI_V16QI_V16QI) ++OB_DEF_VAR (s390_vec_cmpeq_u8, s390_vceqb, 0, BT_OV_BV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_cmpeq_b8, s390_vceqb, 0, BT_OV_BV16QI_BV16QI_BV16QI) ++OB_DEF_VAR (s390_vec_cmpeq_s16, s390_vceqh, 0, BT_OV_BV8HI_V8HI_V8HI) ++OB_DEF_VAR (s390_vec_cmpeq_u16, s390_vceqh, 0, BT_OV_BV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_cmpeq_b16, s390_vceqh, 0, BT_OV_BV8HI_BV8HI_BV8HI) ++OB_DEF_VAR (s390_vec_cmpeq_s32, s390_vceqf, 0, BT_OV_BV4SI_V4SI_V4SI) ++OB_DEF_VAR (s390_vec_cmpeq_u32, s390_vceqf, 0, BT_OV_BV4SI_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_cmpeq_b32, s390_vceqf, 0, BT_OV_BV4SI_BV4SI_BV4SI) ++OB_DEF_VAR (s390_vec_cmpeq_s64, s390_vceqg, 0, BT_OV_BV2DI_V2DI_V2DI) ++OB_DEF_VAR (s390_vec_cmpeq_u64, s390_vceqg, 0, BT_OV_BV2DI_UV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_cmpeq_b64, s390_vceqg, 0, BT_OV_BV2DI_BV2DI_BV2DI) ++OB_DEF_VAR (s390_vec_cmpeq_dbl, s390_vfcedb, 0, BT_OV_BV2DI_V2DF_V2DF) ++ ++B_DEF (s390_vceqb, vec_cmpeqv16qi, 0, B_VX, 0, BT_FN_V16QI_UV16QI_UV16QI) ++B_DEF (s390_vceqh, vec_cmpeqv8hi, 0, B_VX, 0, BT_FN_V8HI_UV8HI_UV8HI) ++B_DEF (s390_vceqf, vec_cmpeqv4si, 0, B_VX, 0, BT_FN_V4SI_UV4SI_UV4SI) ++B_DEF (s390_vceqg, vec_cmpeqv2di, 0, B_VX, 0, BT_FN_V2DI_UV2DI_UV2DI) ++B_DEF (s390_vfcedb, vec_cmpeqv2df, 0, B_VX, 0, BT_FN_V2DI_V2DF_V2DF) ++ ++OB_DEF (s390_vec_cmpge, s390_vec_cmpge_s8, s390_vec_cmpge_dbl, B_VX, BT_FN_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_cmpge_s8, vec_cmpgev16qi, 0, BT_OV_BV16QI_V16QI_V16QI) ++OB_DEF_VAR (s390_vec_cmpge_u8, vec_cmpgeuv16qi, 0, BT_OV_BV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_cmpge_s16, vec_cmpgev8hi, 0, BT_OV_BV8HI_V8HI_V8HI) ++OB_DEF_VAR (s390_vec_cmpge_u16, vec_cmpgeuv8hi, 0, BT_OV_BV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_cmpge_s32, vec_cmpgev4si, 0, BT_OV_BV4SI_V4SI_V4SI) ++OB_DEF_VAR (s390_vec_cmpge_u32, vec_cmpgeuv4si, 0, BT_OV_BV4SI_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_cmpge_s64, vec_cmpgev2di, 0, BT_OV_BV2DI_V2DI_V2DI) ++OB_DEF_VAR (s390_vec_cmpge_u64, vec_cmpgeuv2di, 0, BT_OV_BV2DI_UV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_cmpge_dbl, s390_vfchedb, 0, BT_OV_BV2DI_V2DF_V2DF) ++ ++B_DEF (vec_cmpgev16qi, vec_cmpgev16qi, 0, B_VX | B_INT, 0, BT_FN_V16QI_UV16QI_UV16QI) ++B_DEF (vec_cmpgeuv16qi, vec_cmpgeuv16qi, 0, B_VX | B_INT, 0, BT_FN_V16QI_UV16QI_UV16QI) ++B_DEF (vec_cmpgev8hi, vec_cmpgev8hi, 0, B_VX | B_INT, 0, BT_FN_V8HI_UV8HI_UV8HI) ++B_DEF (vec_cmpgeuv8hi, vec_cmpgeuv8hi, 0, B_VX | B_INT, 0, BT_FN_V8HI_UV8HI_UV8HI) ++B_DEF (vec_cmpgev4si, vec_cmpgev4si, 0, B_VX | B_INT, 0, BT_FN_V4SI_UV4SI_UV4SI) ++B_DEF (vec_cmpgeuv4si, vec_cmpgeuv4si, 0, B_VX | B_INT, 0, BT_FN_V4SI_UV4SI_UV4SI) ++B_DEF (vec_cmpgev2di, vec_cmpgev2di, 0, B_VX | B_INT, 0, BT_FN_V2DI_UV2DI_UV2DI) ++B_DEF (vec_cmpgeuv2di, vec_cmpgeuv2di, 0, B_VX | B_INT, 0, BT_FN_V2DI_UV2DI_UV2DI) ++B_DEF (s390_vfchedb, vec_cmpgev2df, 0, B_VX, 0, BT_FN_V2DI_V2DF_V2DF) ++ ++OB_DEF (s390_vec_cmpgt, s390_vec_cmpgt_s8, s390_vec_cmpgt_dbl, B_VX, BT_FN_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_cmpgt_s8, s390_vchb, 0, BT_OV_BV16QI_V16QI_V16QI) ++OB_DEF_VAR (s390_vec_cmpgt_u8, s390_vchlb, 0, BT_OV_BV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_cmpgt_s16, s390_vchh, 0, BT_OV_BV8HI_V8HI_V8HI) ++OB_DEF_VAR (s390_vec_cmpgt_u16, s390_vchlh, 0, BT_OV_BV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_cmpgt_s32, s390_vchf, 0, BT_OV_BV4SI_V4SI_V4SI) ++OB_DEF_VAR (s390_vec_cmpgt_u32, s390_vchlf, 0, BT_OV_BV4SI_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_cmpgt_s64, s390_vchg, 0, BT_OV_BV2DI_V2DI_V2DI) ++OB_DEF_VAR (s390_vec_cmpgt_u64, s390_vchlg, 0, BT_OV_BV2DI_UV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_cmpgt_dbl, s390_vfchdb, 0, BT_OV_BV2DI_V2DF_V2DF) ++ ++B_DEF (s390_vchb, vec_cmpgtv16qi, 0, B_VX, 0, BT_FN_V16QI_V16QI_V16QI) ++B_DEF (s390_vchlb, vec_cmpgtuv16qi, 0, B_VX, 0, BT_FN_V16QI_UV16QI_UV16QI) ++B_DEF (s390_vchh, vec_cmpgtv8hi, 0, B_VX, 0, BT_FN_V8HI_V8HI_V8HI) ++B_DEF (s390_vchlh, vec_cmpgtuv8hi, 0, B_VX, 0, BT_FN_V8HI_UV8HI_UV8HI) ++B_DEF (s390_vchf, vec_cmpgtv4si, 0, B_VX, 0, BT_FN_V4SI_V4SI_V4SI) ++B_DEF (s390_vchlf, vec_cmpgtuv4si, 0, B_VX, 0, BT_FN_V4SI_UV4SI_UV4SI) ++B_DEF (s390_vchg, vec_cmpgtv2di, 0, B_VX, 0, BT_FN_V2DI_V2DI_V2DI) ++B_DEF (s390_vchlg, vec_cmpgtuv2di, 0, B_VX, 0, BT_FN_V2DI_UV2DI_UV2DI) ++B_DEF (s390_vfchdb, vec_cmpgtv2df, 0, B_VX, 0, BT_FN_V2DI_V2DF_V2DF) ++ ++OB_DEF (s390_vec_cmple, s390_vec_cmple_s8, s390_vec_cmple_dbl, B_VX, BT_FN_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_cmple_s8, vec_cmplev16qi, 0, BT_OV_BV16QI_V16QI_V16QI) ++OB_DEF_VAR (s390_vec_cmple_u8, vec_cmpleuv16qi, 0, BT_OV_BV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_cmple_s16, vec_cmplev8hi, 0, BT_OV_BV8HI_V8HI_V8HI) ++OB_DEF_VAR (s390_vec_cmple_u16, vec_cmpleuv8hi, 0, BT_OV_BV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_cmple_s32, vec_cmplev4si, 0, BT_OV_BV4SI_V4SI_V4SI) ++OB_DEF_VAR (s390_vec_cmple_u32, vec_cmpleuv4si, 0, BT_OV_BV4SI_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_cmple_s64, vec_cmplev2di, 0, BT_OV_BV2DI_V2DI_V2DI) ++OB_DEF_VAR (s390_vec_cmple_u64, vec_cmpleuv2di, 0, BT_OV_BV2DI_UV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_cmple_dbl, vec_cmplev2df, 0, BT_OV_BV2DI_V2DF_V2DF) ++ ++B_DEF (vec_cmplev16qi, vec_cmplev16qi, 0, B_VX | B_INT, 0, BT_FN_V16QI_UV16QI_UV16QI) ++B_DEF (vec_cmpleuv16qi, vec_cmpleuv16qi, 0, B_VX | B_INT, 0, BT_FN_V16QI_UV16QI_UV16QI) ++B_DEF (vec_cmplev8hi, vec_cmplev8hi, 0, B_VX | B_INT, 0, BT_FN_V8HI_UV8HI_UV8HI) ++B_DEF (vec_cmpleuv8hi, vec_cmpleuv8hi, 0, B_VX | B_INT, 0, BT_FN_V8HI_UV8HI_UV8HI) ++B_DEF (vec_cmplev4si, vec_cmplev4si, 0, B_VX | B_INT, 0, BT_FN_V4SI_UV4SI_UV4SI) ++B_DEF (vec_cmpleuv4si, vec_cmpleuv4si, 0, B_VX | B_INT, 0, BT_FN_V4SI_UV4SI_UV4SI) ++B_DEF (vec_cmplev2di, vec_cmplev2di, 0, B_VX | B_INT, 0, BT_FN_V2DI_UV2DI_UV2DI) ++B_DEF (vec_cmpleuv2di, vec_cmpleuv2di, 0, B_VX | B_INT, 0, BT_FN_V2DI_UV2DI_UV2DI) ++B_DEF (vec_cmplev2df, vec_cmplev2df, 0, B_VX | B_INT, 0, BT_FN_V2DI_V2DF_V2DF) ++ ++OB_DEF (s390_vec_cmplt, s390_vec_cmplt_s8, s390_vec_cmplt_dbl, B_VX, BT_FN_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_cmplt_s8, vec_cmpltv16qi, 0, BT_OV_BV16QI_V16QI_V16QI) ++OB_DEF_VAR (s390_vec_cmplt_u8, vec_cmpltuv16qi, 0, BT_OV_BV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_cmplt_s16, vec_cmpltv8hi, 0, BT_OV_BV8HI_V8HI_V8HI) ++OB_DEF_VAR (s390_vec_cmplt_u16, vec_cmpltuv8hi, 0, BT_OV_BV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_cmplt_s32, vec_cmpltv4si, 0, BT_OV_BV4SI_V4SI_V4SI) ++OB_DEF_VAR (s390_vec_cmplt_u32, vec_cmpltuv4si, 0, BT_OV_BV4SI_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_cmplt_s64, vec_cmpltv2di, 0, BT_OV_BV2DI_V2DI_V2DI) ++OB_DEF_VAR (s390_vec_cmplt_u64, vec_cmpltuv2di, 0, BT_OV_BV2DI_UV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_cmplt_dbl, vec_cmpltv2df, 0, BT_OV_BV2DI_V2DF_V2DF) ++ ++B_DEF (vec_cmpltv16qi, vec_cmpltv16qi, 0, B_VX | B_INT, 0, BT_FN_V16QI_UV16QI_UV16QI) ++B_DEF (vec_cmpltuv16qi, vec_cmpltuv16qi, 0, B_VX | B_INT, 0, BT_FN_V16QI_UV16QI_UV16QI) ++B_DEF (vec_cmpltv8hi, vec_cmpltv8hi, 0, B_VX | B_INT, 0, BT_FN_V8HI_UV8HI_UV8HI) ++B_DEF (vec_cmpltuv8hi, vec_cmpltuv8hi, 0, B_VX | B_INT, 0, BT_FN_V8HI_UV8HI_UV8HI) ++B_DEF (vec_cmpltv4si, vec_cmpltv4si, 0, B_VX | B_INT, 0, BT_FN_V4SI_UV4SI_UV4SI) ++B_DEF (vec_cmpltuv4si, vec_cmpltuv4si, 0, B_VX | B_INT, 0, BT_FN_V4SI_UV4SI_UV4SI) ++B_DEF (vec_cmpltv2di, vec_cmpltv2di, 0, B_VX | B_INT, 0, BT_FN_V2DI_UV2DI_UV2DI) ++B_DEF (vec_cmpltuv2di, vec_cmpltuv2di, 0, B_VX | B_INT, 0, BT_FN_V2DI_UV2DI_UV2DI) ++B_DEF (vec_cmpltv2df, vec_cmpltv2df, 0, B_VX | B_INT, 0, BT_FN_V2DI_V2DF_V2DF) ++ ++OB_DEF (s390_vec_cntlz, s390_vec_cntlz_s8, s390_vec_cntlz_u64, B_VX, BT_FN_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_cntlz_s8, s390_vclzb, 0, BT_OV_UV16QI_V16QI) ++OB_DEF_VAR (s390_vec_cntlz_u8, s390_vclzb, 0, BT_OV_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_cntlz_s16, s390_vclzh, 0, BT_OV_UV8HI_V8HI) ++OB_DEF_VAR (s390_vec_cntlz_u16, s390_vclzh, 0, BT_OV_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_cntlz_s32, s390_vclzf, 0, BT_OV_UV4SI_V4SI) ++OB_DEF_VAR (s390_vec_cntlz_u32, s390_vclzf, 0, BT_OV_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_cntlz_s64, s390_vclzg, 0, BT_OV_UV2DI_V2DI) ++OB_DEF_VAR (s390_vec_cntlz_u64, s390_vclzg, 0, BT_OV_UV2DI_UV2DI) ++ ++B_DEF (s390_vclzb, clzv16qi2, 0, B_VX, 0, BT_FN_UV16QI_UV16QI) ++B_DEF (s390_vclzh, clzv8hi2, 0, B_VX, 0, BT_FN_UV8HI_UV8HI) ++B_DEF (s390_vclzf, clzv4si2, 0, B_VX, 0, BT_FN_UV4SI_UV4SI) ++B_DEF (s390_vclzg, clzv2di2, 0, B_VX, 0, BT_FN_UV2DI_UV2DI) ++ ++OB_DEF (s390_vec_cnttz, s390_vec_cnttz_s8, s390_vec_cnttz_u64, B_VX, BT_FN_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_cnttz_s8, s390_vctzb, 0, BT_OV_UV16QI_V16QI) ++OB_DEF_VAR (s390_vec_cnttz_u8, s390_vctzb, 0, BT_OV_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_cnttz_s16, s390_vctzh, 0, BT_OV_UV8HI_V8HI) ++OB_DEF_VAR (s390_vec_cnttz_u16, s390_vctzh, 0, BT_OV_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_cnttz_s32, s390_vctzf, 0, BT_OV_UV4SI_V4SI) ++OB_DEF_VAR (s390_vec_cnttz_u32, s390_vctzf, 0, BT_OV_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_cnttz_s64, s390_vctzg, 0, BT_OV_UV2DI_V2DI) ++OB_DEF_VAR (s390_vec_cnttz_u64, s390_vctzg, 0, BT_OV_UV2DI_UV2DI) ++ ++B_DEF (s390_vctzb, ctzv16qi2, 0, B_VX, 0, BT_FN_UV16QI_UV16QI) ++B_DEF (s390_vctzh, ctzv8hi2, 0, B_VX, 0, BT_FN_UV8HI_UV8HI) ++B_DEF (s390_vctzf, ctzv4si2, 0, B_VX, 0, BT_FN_UV4SI_UV4SI) ++B_DEF (s390_vctzg, ctzv2di2, 0, B_VX, 0, BT_FN_UV2DI_UV2DI) ++ ++OB_DEF (s390_vec_xor, s390_vec_xor_b8, s390_vec_xor_dbl_c, B_VX, BT_FN_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_xor_b8, s390_vx, 0, BT_OV_BV16QI_BV16QI_BV16QI) ++OB_DEF_VAR (s390_vec_xor_s8_a, s390_vx, 0, BT_OV_V16QI_BV16QI_V16QI) ++OB_DEF_VAR (s390_vec_xor_s8_b, s390_vx, 0, BT_OV_V16QI_V16QI_V16QI) ++OB_DEF_VAR (s390_vec_xor_s8_c, s390_vx, 0, BT_OV_V16QI_V16QI_BV16QI) ++OB_DEF_VAR (s390_vec_xor_u8_a, s390_vx, 0, BT_OV_UV16QI_BV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_xor_u8_b, s390_vx, 0, BT_OV_UV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_xor_u8_c, s390_vx, 0, BT_OV_UV16QI_UV16QI_BV16QI) ++OB_DEF_VAR (s390_vec_xor_b16, s390_vx, 0, BT_OV_BV8HI_BV8HI_BV8HI) ++OB_DEF_VAR (s390_vec_xor_s16_a, s390_vx, 0, BT_OV_V8HI_BV8HI_V8HI) ++OB_DEF_VAR (s390_vec_xor_s16_b, s390_vx, 0, BT_OV_V8HI_V8HI_V8HI) ++OB_DEF_VAR (s390_vec_xor_s16_c, s390_vx, 0, BT_OV_V8HI_V8HI_BV8HI) ++OB_DEF_VAR (s390_vec_xor_u16_a, s390_vx, 0, BT_OV_UV8HI_BV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_xor_u16_b, s390_vx, 0, BT_OV_UV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_xor_u16_c, s390_vx, 0, BT_OV_UV8HI_UV8HI_BV8HI) ++OB_DEF_VAR (s390_vec_xor_b32, s390_vx, 0, BT_OV_BV4SI_BV4SI_BV4SI) ++OB_DEF_VAR (s390_vec_xor_s32_a, s390_vx, 0, BT_OV_V4SI_BV4SI_V4SI) ++OB_DEF_VAR (s390_vec_xor_s32_b, s390_vx, 0, BT_OV_V4SI_V4SI_V4SI) ++OB_DEF_VAR (s390_vec_xor_s32_c, s390_vx, 0, BT_OV_V4SI_V4SI_BV4SI) ++OB_DEF_VAR (s390_vec_xor_u32_a, s390_vx, 0, BT_OV_UV4SI_BV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_xor_u32_b, s390_vx, 0, BT_OV_UV4SI_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_xor_u32_c, s390_vx, 0, BT_OV_UV4SI_UV4SI_BV4SI) ++OB_DEF_VAR (s390_vec_xor_b64, s390_vx, 0, BT_OV_BV2DI_BV2DI_BV2DI) ++OB_DEF_VAR (s390_vec_xor_s64_a, s390_vx, 0, BT_OV_V2DI_BV2DI_V2DI) ++OB_DEF_VAR (s390_vec_xor_s64_b, s390_vx, 0, BT_OV_V2DI_V2DI_V2DI) ++OB_DEF_VAR (s390_vec_xor_s64_c, s390_vx, 0, BT_OV_V2DI_V2DI_BV2DI) ++OB_DEF_VAR (s390_vec_xor_u64_a, s390_vx, 0, BT_OV_UV2DI_BV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_xor_u64_b, s390_vx, 0, BT_OV_UV2DI_UV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_xor_u64_c, s390_vx, 0, BT_OV_UV2DI_UV2DI_BV2DI) ++OB_DEF_VAR (s390_vec_xor_dbl_a, s390_vx, 0, BT_OV_V2DF_BV2DI_V2DF) ++OB_DEF_VAR (s390_vec_xor_dbl_b, s390_vx, 0, BT_OV_V2DF_V2DF_V2DF) ++OB_DEF_VAR (s390_vec_xor_dbl_c, s390_vx, 0, BT_OV_V2DF_V2DF_BV2DI) ++ ++B_DEF (s390_vx, xorv16qi3, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI) ++ ++OB_DEF (s390_vec_gfmsum, s390_vec_gfmsum_u8, s390_vec_gfmsum_u32,B_VX, BT_FN_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_gfmsum_u8, s390_vgfmb, 0, BT_OV_UV8HI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_gfmsum_u16, s390_vgfmh, 0, BT_OV_UV4SI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_gfmsum_u32, s390_vgfmf, 0, BT_OV_UV2DI_UV4SI_UV4SI) ++ ++B_DEF (s390_vgfmb, vec_gfmsumv16qi, 0, B_VX, 0, BT_FN_UV8HI_UV16QI_UV16QI) ++B_DEF (s390_vgfmh, vec_gfmsumv8hi, 0, B_VX, 0, BT_FN_UV4SI_UV8HI_UV8HI) ++B_DEF (s390_vgfmf, vec_gfmsumv4si, 0, B_VX, 0, BT_FN_UV2DI_UV4SI_UV4SI) ++B_DEF (s390_vgfmg, vec_gfmsum_128, 0, B_VX, 0, BT_FN_UV16QI_UV2DI_UV2DI) ++ ++OB_DEF (s390_vec_gfmsum_accum, s390_vec_gfmsum_accum_u8,s390_vec_gfmsum_accum_u32,B_VX, BT_FN_OV4SI_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_gfmsum_accum_u8, s390_vgfmab, 0, BT_OV_UV8HI_UV16QI_UV16QI_UV8HI) ++OB_DEF_VAR (s390_vec_gfmsum_accum_u16, s390_vgfmah, 0, BT_OV_UV4SI_UV8HI_UV8HI_UV4SI) ++OB_DEF_VAR (s390_vec_gfmsum_accum_u32, s390_vgfmaf, 0, BT_OV_UV2DI_UV4SI_UV4SI_UV2DI) ++ ++B_DEF (s390_vgfmab, vec_gfmsum_accumv16qi,0, B_VX, 0, BT_FN_UV8HI_UV16QI_UV16QI_UV8HI) ++B_DEF (s390_vgfmah, vec_gfmsum_accumv8hi,0, B_VX, 0, BT_FN_UV4SI_UV8HI_UV8HI_UV4SI) ++B_DEF (s390_vgfmaf, vec_gfmsum_accumv4si,0, B_VX, 0, BT_FN_UV2DI_UV4SI_UV4SI_UV2DI) ++B_DEF (s390_vgfmag, vec_gfmsum_accum_128,0, B_VX, 0, BT_FN_UV16QI_UV2DI_UV2DI_UV16QI) ++ ++OB_DEF (s390_vec_abs, s390_vec_abs_s8, s390_vec_abs_dbl, B_VX, BT_FN_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_abs_s8, s390_vlpb, 0, BT_OV_V16QI_V16QI) ++OB_DEF_VAR (s390_vec_abs_s16, s390_vlph, 0, BT_OV_V8HI_V8HI) ++OB_DEF_VAR (s390_vec_abs_s32, s390_vlpf, 0, BT_OV_V4SI_V4SI) ++OB_DEF_VAR (s390_vec_abs_s64, s390_vlpg, 0, BT_OV_V2DI_V2DI) ++OB_DEF_VAR (s390_vec_abs_dbl, s390_vflpdb, 0, BT_OV_V2DF_V2DF) ++ ++B_DEF (s390_vlpb, absv16qi2, 0, B_VX, 0, BT_FN_V16QI_V16QI) ++B_DEF (s390_vlph, absv8hi2, 0, B_VX, 0, BT_FN_V8HI_V8HI) ++B_DEF (s390_vlpf, absv4si2, 0, B_VX, 0, BT_FN_V4SI_V4SI) ++B_DEF (s390_vlpg, absv2di2, 0, B_VX, 0, BT_FN_V2DI_V2DI) ++B_DEF (s390_vflpdb, absv2df2, 0, B_VX, 0, BT_FN_V2DF_V2DF) ++ ++OB_DEF (s390_vec_max, s390_vec_max_s8_a, s390_vec_max_dbl, B_VX, BT_FN_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_max_s8_a, s390_vmxb, 0, BT_OV_V16QI_BV16QI_V16QI) ++OB_DEF_VAR (s390_vec_max_s8_b, s390_vmxb, 0, BT_OV_V16QI_V16QI_V16QI) ++OB_DEF_VAR (s390_vec_max_s8_c, s390_vmxb, 0, BT_OV_V16QI_V16QI_BV16QI) ++OB_DEF_VAR (s390_vec_max_u8_a, s390_vmxlb, 0, BT_OV_UV16QI_BV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_max_u8_b, s390_vmxlb, 0, BT_OV_UV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_max_u8_c, s390_vmxlb, 0, BT_OV_UV16QI_UV16QI_BV16QI) ++OB_DEF_VAR (s390_vec_max_s16_a, s390_vmxh, 0, BT_OV_V8HI_BV8HI_V8HI) ++OB_DEF_VAR (s390_vec_max_s16_b, s390_vmxh, 0, BT_OV_V8HI_V8HI_V8HI) ++OB_DEF_VAR (s390_vec_max_s16_c, s390_vmxh, 0, BT_OV_V8HI_V8HI_BV8HI) ++OB_DEF_VAR (s390_vec_max_u16_a, s390_vmxlh, 0, BT_OV_UV8HI_BV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_max_u16_b, s390_vmxlh, 0, BT_OV_UV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_max_u16_c, s390_vmxlh, 0, BT_OV_UV8HI_UV8HI_BV8HI) ++OB_DEF_VAR (s390_vec_max_s32_a, s390_vmxf, 0, BT_OV_V4SI_BV4SI_V4SI) ++OB_DEF_VAR (s390_vec_max_s32_b, s390_vmxf, 0, BT_OV_V4SI_V4SI_V4SI) ++OB_DEF_VAR (s390_vec_max_s32_c, s390_vmxf, 0, BT_OV_V4SI_V4SI_BV4SI) ++OB_DEF_VAR (s390_vec_max_u32_a, s390_vmxlf, 0, BT_OV_UV4SI_BV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_max_u32_b, s390_vmxlf, 0, BT_OV_UV4SI_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_max_u32_c, s390_vmxlf, 0, BT_OV_UV4SI_UV4SI_BV4SI) ++OB_DEF_VAR (s390_vec_max_s64_a, s390_vmxg, 0, BT_OV_V2DI_BV2DI_V2DI) ++OB_DEF_VAR (s390_vec_max_s64_b, s390_vmxg, 0, BT_OV_V2DI_V2DI_V2DI) ++OB_DEF_VAR (s390_vec_max_s64_c, s390_vmxg, 0, BT_OV_V2DI_V2DI_BV2DI) ++OB_DEF_VAR (s390_vec_max_u64_a, s390_vmxlg, 0, BT_OV_UV2DI_BV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_max_u64_b, s390_vmxlg, 0, BT_OV_UV2DI_UV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_max_u64_c, s390_vmxlg, 0, BT_OV_UV2DI_UV2DI_BV2DI) ++OB_DEF_VAR (s390_vec_max_dbl, s390_vec_max_dbl, 0, BT_OV_V2DF_V2DF_V2DF) ++ ++B_DEF (s390_vmxb, smaxv16qi3, 0, B_VX, 0, BT_FN_V16QI_BV16QI_V16QI) ++B_DEF (s390_vmxlb, umaxv16qi3, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI) ++B_DEF (s390_vmxh, smaxv8hi3, 0, B_VX, 0, BT_FN_V8HI_BV8HI_V8HI) ++B_DEF (s390_vmxlh, umaxv8hi3, 0, B_VX, 0, BT_FN_UV8HI_UV8HI_UV8HI) ++B_DEF (s390_vmxf, smaxv4si3, 0, B_VX, 0, BT_FN_V4SI_BV4SI_V4SI) ++B_DEF (s390_vmxlf, umaxv4si3, 0, B_VX, 0, BT_FN_UV4SI_UV4SI_UV4SI) ++B_DEF (s390_vmxg, smaxv2di3, 0, B_VX, 0, BT_FN_V2DI_BV2DI_V2DI) ++B_DEF (s390_vmxlg, umaxv2di3, 0, B_VX, 0, BT_FN_UV2DI_UV2DI_UV2DI) ++B_DEF (s390_vec_max_dbl, smaxv2df3, 0, B_VX | B_INT, 0, BT_FN_V2DF_V2DF_V2DF) ++ ++OB_DEF (s390_vec_min, s390_vec_min_s8_a, s390_vec_min_dbl, B_VX, BT_FN_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_min_s8_a, s390_vmnb, 0, BT_OV_V16QI_BV16QI_V16QI) ++OB_DEF_VAR (s390_vec_min_s8_b, s390_vmnb, 0, BT_OV_V16QI_V16QI_V16QI) ++OB_DEF_VAR (s390_vec_min_s8_c, s390_vmnb, 0, BT_OV_V16QI_V16QI_BV16QI) ++OB_DEF_VAR (s390_vec_min_u8_a, s390_vmnlb, 0, BT_OV_UV16QI_BV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_min_u8_b, s390_vmnlb, 0, BT_OV_UV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_min_u8_c, s390_vmnlb, 0, BT_OV_UV16QI_UV16QI_BV16QI) ++OB_DEF_VAR (s390_vec_min_s16_a, s390_vmnh, 0, BT_OV_V8HI_BV8HI_V8HI) ++OB_DEF_VAR (s390_vec_min_s16_b, s390_vmnh, 0, BT_OV_V8HI_V8HI_V8HI) ++OB_DEF_VAR (s390_vec_min_s16_c, s390_vmnh, 0, BT_OV_V8HI_V8HI_BV8HI) ++OB_DEF_VAR (s390_vec_min_u16_a, s390_vmnlh, 0, BT_OV_UV8HI_BV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_min_u16_b, s390_vmnlh, 0, BT_OV_UV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_min_u16_c, s390_vmnlh, 0, BT_OV_UV8HI_UV8HI_BV8HI) ++OB_DEF_VAR (s390_vec_min_s32_a, s390_vmnf, 0, BT_OV_V4SI_BV4SI_V4SI) ++OB_DEF_VAR (s390_vec_min_s32_b, s390_vmnf, 0, BT_OV_V4SI_V4SI_V4SI) ++OB_DEF_VAR (s390_vec_min_s32_c, s390_vmnf, 0, BT_OV_V4SI_V4SI_BV4SI) ++OB_DEF_VAR (s390_vec_min_u32_a, s390_vmnlf, 0, BT_OV_UV4SI_BV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_min_u32_b, s390_vmnlf, 0, BT_OV_UV4SI_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_min_u32_c, s390_vmnlf, 0, BT_OV_UV4SI_UV4SI_BV4SI) ++OB_DEF_VAR (s390_vec_min_s64_a, s390_vmng, 0, BT_OV_V2DI_BV2DI_V2DI) ++OB_DEF_VAR (s390_vec_min_s64_b, s390_vmng, 0, BT_OV_V2DI_V2DI_V2DI) ++OB_DEF_VAR (s390_vec_min_s64_c, s390_vmng, 0, BT_OV_V2DI_V2DI_BV2DI) ++OB_DEF_VAR (s390_vec_min_u64_a, s390_vmnlg, 0, BT_OV_UV2DI_BV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_min_u64_b, s390_vmnlg, 0, BT_OV_UV2DI_UV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_min_u64_c, s390_vmnlg, 0, BT_OV_UV2DI_UV2DI_BV2DI) ++OB_DEF_VAR (s390_vec_min_dbl, s390_vec_min_dbl, 0, BT_OV_V2DF_V2DF_V2DF) ++ ++B_DEF (s390_vmnb, sminv16qi3, 0, B_VX, 0, BT_FN_V16QI_BV16QI_V16QI) ++B_DEF (s390_vmnlb, uminv16qi3, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI) ++B_DEF (s390_vmnh, sminv8hi3, 0, B_VX, 0, BT_FN_V8HI_BV8HI_V8HI) ++B_DEF (s390_vmnlh, uminv8hi3, 0, B_VX, 0, BT_FN_UV8HI_UV8HI_UV8HI) ++B_DEF (s390_vmnf, sminv4si3, 0, B_VX, 0, BT_FN_V4SI_BV4SI_V4SI) ++B_DEF (s390_vmnlf, uminv4si3, 0, B_VX, 0, BT_FN_UV4SI_UV4SI_UV4SI) ++B_DEF (s390_vmng, sminv2di3, 0, B_VX, 0, BT_FN_V2DI_BV2DI_V2DI) ++B_DEF (s390_vmnlg, uminv2di3, 0, B_VX, 0, BT_FN_UV2DI_UV2DI_UV2DI) ++B_DEF (s390_vec_min_dbl, sminv2df3, 0, B_VX | B_INT, 0, BT_FN_V2DF_V2DF_V2DF) ++ ++OB_DEF (s390_vec_mladd, s390_vec_mladd_u8, s390_vec_mladd_s32_c,B_VX, BT_FN_OV4SI_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_mladd_u8, s390_vmalb, 0, BT_OV_UV16QI_UV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_mladd_s8_a, s390_vmalb, 0, BT_OV_V16QI_UV16QI_V16QI_V16QI) ++OB_DEF_VAR (s390_vec_mladd_s8_b, s390_vmalb, 0, BT_OV_V16QI_V16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_mladd_s8_c, s390_vmalb, 0, BT_OV_V16QI_V16QI_V16QI_V16QI) ++OB_DEF_VAR (s390_vec_mladd_u16, s390_vmalhw, 0, BT_OV_UV8HI_UV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_mladd_s16_a, s390_vmalhw, 0, BT_OV_V8HI_UV8HI_V8HI_V8HI) ++OB_DEF_VAR (s390_vec_mladd_s16_b, s390_vmalhw, 0, BT_OV_V8HI_V8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_mladd_s16_c, s390_vmalhw, 0, BT_OV_V8HI_V8HI_V8HI_V8HI) ++OB_DEF_VAR (s390_vec_mladd_u32, s390_vmalf, 0, BT_OV_UV4SI_UV4SI_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_mladd_s32_a, s390_vmalf, 0, BT_OV_V4SI_UV4SI_V4SI_V4SI) ++OB_DEF_VAR (s390_vec_mladd_s32_b, s390_vmalf, 0, BT_OV_V4SI_V4SI_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_mladd_s32_c, s390_vmalf, 0, BT_OV_V4SI_V4SI_V4SI_V4SI) ++ ++B_DEF (s390_vmalb, vec_vmalv16qi, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI_UV16QI) ++B_DEF (s390_vmalhw, vec_vmalv8hi, 0, B_VX, 0, BT_FN_UV8HI_UV8HI_UV8HI_UV8HI) ++B_DEF (s390_vmalf, vec_vmalv4si, 0, B_VX, 0, BT_FN_UV4SI_UV4SI_UV4SI_UV4SI) ++ ++OB_DEF (s390_vec_mhadd, s390_vec_mhadd_u8, s390_vec_mhadd_s32, B_VX, BT_FN_OV4SI_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_mhadd_u8, s390_vmalhb, 0, BT_OV_UV16QI_UV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_mhadd_s8, s390_vmahb, 0, BT_OV_V16QI_V16QI_V16QI_V16QI) ++OB_DEF_VAR (s390_vec_mhadd_u16, s390_vmalhh, 0, BT_OV_UV8HI_UV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_mhadd_s16, s390_vmahh, 0, BT_OV_V8HI_V8HI_V8HI_V8HI) ++OB_DEF_VAR (s390_vec_mhadd_u32, s390_vmalhf, 0, BT_OV_UV4SI_UV4SI_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_mhadd_s32, s390_vmahf, 0, BT_OV_V4SI_V4SI_V4SI_V4SI) ++ ++B_DEF (s390_vmalhb, vec_vmalhv16qi, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI_UV16QI) ++B_DEF (s390_vmahb, vec_vmahv16qi, 0, B_VX, 0, BT_FN_V16QI_V16QI_V16QI_V16QI) ++B_DEF (s390_vmalhh, vec_vmalhv8hi, 0, B_VX, 0, BT_FN_UV8HI_UV8HI_UV8HI_UV8HI) ++B_DEF (s390_vmahh, vec_vmahv8hi, 0, B_VX, 0, BT_FN_V8HI_V8HI_V8HI_V8HI) ++B_DEF (s390_vmalhf, vec_vmalhv4si, 0, B_VX, 0, BT_FN_UV4SI_UV4SI_UV4SI_UV4SI) ++B_DEF (s390_vmahf, vec_vmahv4si, 0, B_VX, 0, BT_FN_V4SI_V4SI_V4SI_V4SI) ++ ++OB_DEF (s390_vec_meadd, s390_vec_meadd_u8, s390_vec_meadd_s32, B_VX, BT_FN_OV4SI_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_meadd_u8, s390_vmaleb, 0, BT_OV_UV8HI_UV16QI_UV16QI_UV8HI) ++OB_DEF_VAR (s390_vec_meadd_s8, s390_vmaeb, 0, BT_OV_V8HI_V16QI_V16QI_V8HI) ++OB_DEF_VAR (s390_vec_meadd_u16, s390_vmaleh, 0, BT_OV_UV4SI_UV8HI_UV8HI_UV4SI) ++OB_DEF_VAR (s390_vec_meadd_s16, s390_vmaeh, 0, BT_OV_V4SI_V8HI_V8HI_V4SI) ++OB_DEF_VAR (s390_vec_meadd_u32, s390_vmalef, 0, BT_OV_UV2DI_UV4SI_UV4SI_UV2DI) ++OB_DEF_VAR (s390_vec_meadd_s32, s390_vmaef, 0, BT_OV_V2DI_V4SI_V4SI_V2DI) ++ ++B_DEF (s390_vmaleb, vec_vmalev16qi, 0, B_VX, 0, BT_FN_UV8HI_UV16QI_UV16QI_UV8HI) ++B_DEF (s390_vmaeb, vec_vmaev16qi, 0, B_VX, 0, BT_FN_V8HI_V16QI_V16QI_V8HI) ++B_DEF (s390_vmaleh, vec_vmalev8hi, 0, B_VX, 0, BT_FN_UV4SI_UV8HI_UV8HI_UV4SI) ++B_DEF (s390_vmaeh, vec_vmaev8hi, 0, B_VX, 0, BT_FN_V4SI_V8HI_V8HI_V4SI) ++B_DEF (s390_vmalef, vec_vmalev4si, 0, B_VX, 0, BT_FN_UV2DI_UV4SI_UV4SI_UV2DI) ++B_DEF (s390_vmaef, vec_vmaev4si, 0, B_VX, 0, BT_FN_V2DI_V4SI_V4SI_V2DI) ++ ++OB_DEF (s390_vec_moadd, s390_vec_moadd_u8, s390_vec_moadd_s32, B_VX, BT_FN_OV4SI_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_moadd_u8, s390_vmalob, 0, BT_OV_UV8HI_UV16QI_UV16QI_UV8HI) ++OB_DEF_VAR (s390_vec_moadd_s8, s390_vmaob, 0, BT_OV_V8HI_V16QI_V16QI_V8HI) ++OB_DEF_VAR (s390_vec_moadd_u16, s390_vmaloh, 0, BT_OV_UV4SI_UV8HI_UV8HI_UV4SI) ++OB_DEF_VAR (s390_vec_moadd_s16, s390_vmaoh, 0, BT_OV_V4SI_V8HI_V8HI_V4SI) ++OB_DEF_VAR (s390_vec_moadd_u32, s390_vmalof, 0, BT_OV_UV2DI_UV4SI_UV4SI_UV2DI) ++OB_DEF_VAR (s390_vec_moadd_s32, s390_vmaof, 0, BT_OV_V2DI_V4SI_V4SI_V2DI) ++ ++B_DEF (s390_vmalob, vec_vmalov16qi, 0, B_VX, 0, BT_FN_UV8HI_UV16QI_UV16QI_UV8HI) ++B_DEF (s390_vmaob, vec_vmaov16qi, 0, B_VX, 0, BT_FN_V8HI_V16QI_V16QI_V8HI) ++B_DEF (s390_vmaloh, vec_vmalov8hi, 0, B_VX, 0, BT_FN_UV4SI_UV8HI_UV8HI_UV4SI) ++B_DEF (s390_vmaoh, vec_vmaov8hi, 0, B_VX, 0, BT_FN_V4SI_V8HI_V8HI_V4SI) ++B_DEF (s390_vmalof, vec_vmalov4si, 0, B_VX, 0, BT_FN_UV2DI_UV4SI_UV4SI_UV2DI) ++B_DEF (s390_vmaof, vec_vmaov4si, 0, B_VX, 0, BT_FN_V2DI_V4SI_V4SI_V2DI) ++ ++OB_DEF (s390_vec_mulh, s390_vec_mulh_u8, s390_vec_mulh_s32, B_VX, BT_FN_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_mulh_u8, s390_vmlhb, 0, BT_OV_UV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_mulh_s8, s390_vmhb, 0, BT_OV_V16QI_V16QI_V16QI) ++OB_DEF_VAR (s390_vec_mulh_u16, s390_vmlhh, 0, BT_OV_UV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_mulh_s16, s390_vmhh, 0, BT_OV_V8HI_V8HI_V8HI) ++OB_DEF_VAR (s390_vec_mulh_u32, s390_vmlhf, 0, BT_OV_UV4SI_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_mulh_s32, s390_vmhf, 0, BT_OV_V4SI_V4SI_V4SI) ++ ++B_DEF (s390_vmlhb, vec_umulhv16qi, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI) ++B_DEF (s390_vmhb, vec_smulhv16qi, 0, B_VX, 0, BT_FN_V16QI_V16QI_V16QI) ++B_DEF (s390_vmlhh, vec_umulhv8hi, 0, B_VX, 0, BT_FN_UV8HI_UV8HI_UV8HI) ++B_DEF (s390_vmhh, vec_smulhv8hi, 0, B_VX, 0, BT_FN_V8HI_V8HI_V8HI) ++B_DEF (s390_vmlhf, vec_umulhv4si, 0, B_VX, 0, BT_FN_UV4SI_UV4SI_UV4SI) ++B_DEF (s390_vmhf, vec_smulhv4si, 0, B_VX, 0, BT_FN_V4SI_V4SI_V4SI) ++ ++OB_DEF (s390_vec_mule, s390_vec_mule_u8, s390_vec_mule_s32, B_VX, BT_FN_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_mule_u8, s390_vmleb, 0, BT_OV_UV8HI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_mule_s8, s390_vmeb, 0, BT_OV_V8HI_V16QI_V16QI) ++OB_DEF_VAR (s390_vec_mule_u16, s390_vmleh, 0, BT_OV_UV4SI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_mule_s15, s390_vmeh, 0, BT_OV_V4SI_V8HI_V8HI) ++OB_DEF_VAR (s390_vec_mule_u32, s390_vmlef, 0, BT_OV_UV2DI_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_mule_s32, s390_vmef, 0, BT_OV_V2DI_V4SI_V4SI) ++ ++B_DEF (s390_vmleb, vec_widen_umult_even_v16qi,0, B_VX, 0, BT_FN_UV8HI_UV16QI_UV16QI) ++B_DEF (s390_vmeb, vec_widen_smult_even_v16qi,0, B_VX, 0, BT_FN_V8HI_V16QI_V16QI) ++B_DEF (s390_vmleh, vec_widen_umult_even_v8hi,0, B_VX, 0, BT_FN_UV4SI_UV8HI_UV8HI) ++B_DEF (s390_vmeh, vec_widen_smult_even_v8hi,0, B_VX, 0, BT_FN_V4SI_V8HI_V8HI) ++B_DEF (s390_vmlef, vec_widen_umult_even_v4si,0, B_VX, 0, BT_FN_UV2DI_UV4SI_UV4SI) ++B_DEF (s390_vmef, vec_widen_smult_even_v4si,0, B_VX, 0, BT_FN_V2DI_V4SI_V4SI) ++ ++OB_DEF (s390_vec_mulo, s390_vec_mulo_u8, s390_vec_mulo_s32, B_VX, BT_FN_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_mulo_u8, s390_vmlob, 0, BT_OV_UV8HI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_mulo_s8, s390_vmob, 0, BT_OV_V8HI_V16QI_V16QI) ++OB_DEF_VAR (s390_vec_mulo_u16, s390_vmloh, 0, BT_OV_UV4SI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_mulo_s16, s390_vmoh, 0, BT_OV_V4SI_V8HI_V8HI) ++OB_DEF_VAR (s390_vec_mulo_u32, s390_vmlof, 0, BT_OV_UV2DI_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_mulo_s32, s390_vmof, 0, BT_OV_V2DI_V4SI_V4SI) ++ ++B_DEF (s390_vmlob, vec_widen_umult_odd_v16qi,0, B_VX, 0, BT_FN_UV8HI_UV16QI_UV16QI) ++B_DEF (s390_vmob, vec_widen_smult_odd_v16qi,0, B_VX, 0, BT_FN_V8HI_V16QI_V16QI) ++B_DEF (s390_vmloh, vec_widen_umult_odd_v8hi,0, B_VX, 0, BT_FN_UV4SI_UV8HI_UV8HI) ++B_DEF (s390_vmoh, vec_widen_smult_odd_v8hi,0, B_VX, 0, BT_FN_V4SI_V8HI_V8HI) ++B_DEF (s390_vmlof, vec_widen_umult_odd_v4si,0, B_VX, 0, BT_FN_UV2DI_UV4SI_UV4SI) ++B_DEF (s390_vmof, vec_widen_smult_odd_v4si,0, B_VX, 0, BT_FN_V2DI_V4SI_V4SI) ++ ++OB_DEF (s390_vec_nor, s390_vec_nor_b8, s390_vec_nor_dbl_c, B_VX, BT_FN_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_nor_b8, s390_vno, 0, BT_OV_BV16QI_BV16QI_BV16QI) ++OB_DEF_VAR (s390_vec_nor_s8_a, s390_vno, 0, BT_OV_V16QI_BV16QI_V16QI) ++OB_DEF_VAR (s390_vec_nor_s8_b, s390_vno, 0, BT_OV_V16QI_V16QI_V16QI) ++OB_DEF_VAR (s390_vec_nor_s8_c, s390_vno, 0, BT_OV_V16QI_V16QI_BV16QI) ++OB_DEF_VAR (s390_vec_nor_u8_a, s390_vno, 0, BT_OV_UV16QI_BV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_nor_u8_b, s390_vno, 0, BT_OV_UV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_nor_u8_c, s390_vno, 0, BT_OV_UV16QI_UV16QI_BV16QI) ++OB_DEF_VAR (s390_vec_nor_b16, s390_vno, 0, BT_OV_BV8HI_BV8HI_BV8HI) ++OB_DEF_VAR (s390_vec_nor_s16_a, s390_vno, 0, BT_OV_V8HI_BV8HI_V8HI) ++OB_DEF_VAR (s390_vec_nor_s16_b, s390_vno, 0, BT_OV_V8HI_V8HI_V8HI) ++OB_DEF_VAR (s390_vec_nor_s16_c, s390_vno, 0, BT_OV_V8HI_V8HI_BV8HI) ++OB_DEF_VAR (s390_vec_nor_u16_a, s390_vno, 0, BT_OV_UV8HI_BV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_nor_u16_b, s390_vno, 0, BT_OV_UV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_nor_u16_c, s390_vno, 0, BT_OV_UV8HI_UV8HI_BV8HI) ++OB_DEF_VAR (s390_vec_nor_b32, s390_vno, 0, BT_OV_BV4SI_BV4SI_BV4SI) ++OB_DEF_VAR (s390_vec_nor_s32_a, s390_vno, 0, BT_OV_V4SI_BV4SI_V4SI) ++OB_DEF_VAR (s390_vec_nor_s32_b, s390_vno, 0, BT_OV_V4SI_V4SI_V4SI) ++OB_DEF_VAR (s390_vec_nor_s32_c, s390_vno, 0, BT_OV_V4SI_V4SI_BV4SI) ++OB_DEF_VAR (s390_vec_nor_u32_a, s390_vno, 0, BT_OV_UV4SI_BV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_nor_u32_b, s390_vno, 0, BT_OV_UV4SI_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_nor_u32_c, s390_vno, 0, BT_OV_UV4SI_UV4SI_BV4SI) ++OB_DEF_VAR (s390_vec_nor_b64, s390_vno, 0, BT_OV_BV2DI_BV2DI_BV2DI) ++OB_DEF_VAR (s390_vec_nor_s64_a, s390_vno, 0, BT_OV_V2DI_BV2DI_V2DI) ++OB_DEF_VAR (s390_vec_nor_s64_b, s390_vno, 0, BT_OV_V2DI_V2DI_V2DI) ++OB_DEF_VAR (s390_vec_nor_s64_c, s390_vno, 0, BT_OV_V2DI_V2DI_BV2DI) ++OB_DEF_VAR (s390_vec_nor_u64_a, s390_vno, 0, BT_OV_UV2DI_BV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_nor_u64_b, s390_vno, 0, BT_OV_UV2DI_UV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_nor_u64_c, s390_vno, 0, BT_OV_UV2DI_UV2DI_BV2DI) ++OB_DEF_VAR (s390_vec_nor_dbl_a, s390_vno, 0, BT_OV_V2DF_BV2DI_V2DF) ++OB_DEF_VAR (s390_vec_nor_dbl_b, s390_vno, 0, BT_OV_V2DF_V2DF_V2DF) ++OB_DEF_VAR (s390_vec_nor_dbl_c, s390_vno, 0, BT_OV_V2DF_V2DF_BV2DI) ++ ++B_DEF (s390_vno, vec_norv16qi3, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI) ++ ++OB_DEF (s390_vec_or, s390_vec_or_b8, s390_vec_or_dbl_c, B_VX, BT_FN_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_or_b8, s390_vo, 0, BT_OV_BV16QI_BV16QI_BV16QI) ++OB_DEF_VAR (s390_vec_or_s8_a, s390_vo, 0, BT_OV_V16QI_BV16QI_V16QI) ++OB_DEF_VAR (s390_vec_or_s8_b, s390_vo, 0, BT_OV_V16QI_V16QI_V16QI) ++OB_DEF_VAR (s390_vec_or_s8_c, s390_vo, 0, BT_OV_V16QI_V16QI_BV16QI) ++OB_DEF_VAR (s390_vec_or_u8_a, s390_vo, 0, BT_OV_UV16QI_BV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_or_u8_b, s390_vo, 0, BT_OV_UV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_or_u8_c, s390_vo, 0, BT_OV_UV16QI_UV16QI_BV16QI) ++OB_DEF_VAR (s390_vec_or_b16, s390_vo, 0, BT_OV_BV8HI_BV8HI_BV8HI) ++OB_DEF_VAR (s390_vec_or_s16_a, s390_vo, 0, BT_OV_V8HI_BV8HI_V8HI) ++OB_DEF_VAR (s390_vec_or_s16_b, s390_vo, 0, BT_OV_V8HI_V8HI_V8HI) ++OB_DEF_VAR (s390_vec_or_s16_c, s390_vo, 0, BT_OV_V8HI_V8HI_BV8HI) ++OB_DEF_VAR (s390_vec_or_u16_a, s390_vo, 0, BT_OV_UV8HI_BV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_or_u16_b, s390_vo, 0, BT_OV_UV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_or_u16_c, s390_vo, 0, BT_OV_UV8HI_UV8HI_BV8HI) ++OB_DEF_VAR (s390_vec_or_b32, s390_vo, 0, BT_OV_BV4SI_BV4SI_BV4SI) ++OB_DEF_VAR (s390_vec_or_s32_a, s390_vo, 0, BT_OV_V4SI_BV4SI_V4SI) ++OB_DEF_VAR (s390_vec_or_s32_b, s390_vo, 0, BT_OV_V4SI_V4SI_V4SI) ++OB_DEF_VAR (s390_vec_or_s32_c, s390_vo, 0, BT_OV_V4SI_V4SI_BV4SI) ++OB_DEF_VAR (s390_vec_or_u32_a, s390_vo, 0, BT_OV_UV4SI_BV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_or_u32_b, s390_vo, 0, BT_OV_UV4SI_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_or_u32_c, s390_vo, 0, BT_OV_UV4SI_UV4SI_BV4SI) ++OB_DEF_VAR (s390_vec_or_b64, s390_vo, 0, BT_OV_BV2DI_BV2DI_BV2DI) ++OB_DEF_VAR (s390_vec_or_s64_a, s390_vo, 0, BT_OV_V2DI_BV2DI_V2DI) ++OB_DEF_VAR (s390_vec_or_s64_b, s390_vo, 0, BT_OV_V2DI_V2DI_V2DI) ++OB_DEF_VAR (s390_vec_or_s64_c, s390_vo, 0, BT_OV_V2DI_V2DI_BV2DI) ++OB_DEF_VAR (s390_vec_or_u64_a, s390_vo, 0, BT_OV_UV2DI_BV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_or_u64_b, s390_vo, 0, BT_OV_UV2DI_UV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_or_u64_c, s390_vo, 0, BT_OV_UV2DI_UV2DI_BV2DI) ++OB_DEF_VAR (s390_vec_or_dbl_a, s390_vo, 0, BT_OV_V2DF_BV2DI_V2DF) ++OB_DEF_VAR (s390_vec_or_dbl_b, s390_vo, 0, BT_OV_V2DF_V2DF_V2DF) ++OB_DEF_VAR (s390_vec_or_dbl_c, s390_vo, 0, BT_OV_V2DF_V2DF_BV2DI) ++ ++B_DEF (s390_vo, iorv16qi3, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI) ++ ++OB_DEF (s390_vec_popcnt, s390_vec_popcnt_s8, s390_vec_popcnt_u64,B_VX, BT_FN_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_popcnt_s8, s390_vpopctb, 0, BT_OV_UV16QI_V16QI) /* vpopct */ ++OB_DEF_VAR (s390_vec_popcnt_u8, s390_vpopctb, 0, BT_OV_UV16QI_UV16QI) /* vpopct */ ++OB_DEF_VAR (s390_vec_popcnt_s16, s390_vpopcth, 0, BT_OV_UV8HI_V8HI) /* vpopct */ ++OB_DEF_VAR (s390_vec_popcnt_u16, s390_vpopcth, 0, BT_OV_UV8HI_UV8HI) /* vpopct */ ++OB_DEF_VAR (s390_vec_popcnt_s32, s390_vpopctf, 0, BT_OV_UV4SI_V4SI) /* vpopct vsumb */ ++OB_DEF_VAR (s390_vec_popcnt_u32, s390_vpopctf, 0, BT_OV_UV4SI_UV4SI) /* vpopct vsumb */ ++OB_DEF_VAR (s390_vec_popcnt_s64, s390_vpopctg, 0, BT_OV_UV2DI_V2DI) /* vpopct vsumb vsumgf */ ++OB_DEF_VAR (s390_vec_popcnt_u64, s390_vpopctg, 0, BT_OV_UV2DI_UV2DI) /* vpopct vsumb vsumgf */ ++ ++B_DEF (s390_vpopctb, popcountv16qi2, 0, B_VX, 0, BT_FN_UV16QI_UV16QI) /* vpopct */ ++B_DEF (s390_vpopcth, popcountv8hi2, 0, B_VX, 0, BT_FN_UV8HI_UV8HI) /* vpopct */ ++B_DEF (s390_vpopctf, popcountv4si2, 0, B_VX, 0, BT_FN_UV4SI_UV4SI) /* vpopct vsumb */ ++B_DEF (s390_vpopctg, popcountv2di2, 0, B_VX, 0, BT_FN_UV2DI_UV2DI) /* vpopct vsumb vsumgf */ ++ ++OB_DEF (s390_vec_rl, s390_vec_rl_u8, s390_vec_rl_s64, B_VX, BT_FN_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_rl_u8, s390_verllvb, 0, BT_OV_UV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_rl_s8, s390_verllvb, 0, BT_OV_V16QI_V16QI_UV16QI) ++OB_DEF_VAR (s390_vec_rl_u16, s390_verllvh, 0, BT_OV_UV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_rl_s16, s390_verllvh, 0, BT_OV_V8HI_V8HI_UV8HI) ++OB_DEF_VAR (s390_vec_rl_u32, s390_verllvf, 0, BT_OV_UV4SI_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_rl_s32, s390_verllvf, 0, BT_OV_V4SI_V4SI_UV4SI) ++OB_DEF_VAR (s390_vec_rl_u64, s390_verllvg, 0, BT_OV_UV2DI_UV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_rl_s64, s390_verllvg, 0, BT_OV_V2DI_V2DI_UV2DI) ++ ++B_DEF (s390_verllvb, vrotlv16qi3, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI) ++B_DEF (s390_verllvh, vrotlv8hi3, 0, B_VX, 0, BT_FN_UV8HI_UV8HI_UV8HI) ++B_DEF (s390_verllvf, vrotlv4si3, 0, B_VX, 0, BT_FN_UV4SI_UV4SI_UV4SI) ++B_DEF (s390_verllvg, vrotlv2di3, 0, B_VX, 0, BT_FN_UV2DI_UV2DI_UV2DI) ++ ++OB_DEF (s390_vec_rli, s390_vec_rli_u8, s390_vec_rli_s64, B_VX, BT_FN_OV4SI_OV4SI_ULONG) ++OB_DEF_VAR (s390_vec_rli_u8, s390_verllb, 0, BT_OV_UV16QI_UV16QI_ULONG) ++OB_DEF_VAR (s390_vec_rli_s8, s390_verllb, 0, BT_OV_V16QI_V16QI_ULONG) ++OB_DEF_VAR (s390_vec_rli_u16, s390_verllh, 0, BT_OV_UV8HI_UV8HI_ULONG) ++OB_DEF_VAR (s390_vec_rli_s16, s390_verllh, 0, BT_OV_V8HI_V8HI_ULONG) ++OB_DEF_VAR (s390_vec_rli_u32, s390_verllf, 0, BT_OV_UV4SI_UV4SI_ULONG) ++OB_DEF_VAR (s390_vec_rli_s32, s390_verllf, 0, BT_OV_V4SI_V4SI_ULONG) ++OB_DEF_VAR (s390_vec_rli_u64, s390_verllg, 0, BT_OV_UV2DI_UV2DI_ULONG) ++OB_DEF_VAR (s390_vec_rli_s64, s390_verllg, 0, BT_OV_V2DI_V2DI_ULONG) ++ ++B_DEF (s390_verllb, rotlv16qi3, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UINT) ++B_DEF (s390_verllh, rotlv8hi3, 0, B_VX, 0, BT_FN_UV8HI_UV8HI_UINT) ++B_DEF (s390_verllf, rotlv4si3, 0, B_VX, 0, BT_FN_UV4SI_UV4SI_UINT) ++B_DEF (s390_verllg, rotlv2di3, 0, B_VX, 0, BT_FN_UV2DI_UV2DI_UINT) ++ ++OB_DEF (s390_vec_rl_mask, s390_vec_rl_mask_s8,s390_vec_rl_mask_u64,B_VX, BT_FN_OV4SI_OV4SI_OV4SI_UCHAR) ++OB_DEF_VAR (s390_vec_rl_mask_s8, s390_verimb, O3_U8, BT_OV_V16QI_V16QI_UV16QI_UCHAR) ++OB_DEF_VAR (s390_vec_rl_mask_u8, s390_verimb, O3_U8, BT_OV_UV16QI_UV16QI_UV16QI_UCHAR) ++OB_DEF_VAR (s390_vec_rl_mask_s16, s390_verimh, O3_U8, BT_OV_V8HI_V8HI_UV8HI_UCHAR) ++OB_DEF_VAR (s390_vec_rl_mask_u16, s390_verimh, O3_U8, BT_OV_UV8HI_UV8HI_UV8HI_UCHAR) ++OB_DEF_VAR (s390_vec_rl_mask_s32, s390_verimf, O3_U8, BT_OV_V4SI_V4SI_UV4SI_UCHAR) ++OB_DEF_VAR (s390_vec_rl_mask_u32, s390_verimf, O3_U8, BT_OV_UV4SI_UV4SI_UV4SI_UCHAR) ++OB_DEF_VAR (s390_vec_rl_mask_s64, s390_verimg, O3_U8, BT_OV_V2DI_V2DI_UV2DI_UCHAR) ++OB_DEF_VAR (s390_vec_rl_mask_u64, s390_verimg, O3_U8, BT_OV_UV2DI_UV2DI_UV2DI_UCHAR) ++ ++B_DEF (s390_verimb, verimv16qi, 0, B_VX, O4_U8, BT_FN_UV16QI_UV16QI_UV16QI_UV16QI_INT) ++B_DEF (s390_verimh, verimv8hi, 0, B_VX, O4_U8, BT_FN_UV8HI_UV8HI_UV8HI_UV8HI_INT) ++B_DEF (s390_verimf, verimv4si, 0, B_VX, O4_U8, BT_FN_UV4SI_UV4SI_UV4SI_UV4SI_INT) ++B_DEF (s390_verimg, verimv2di, 0, B_VX, O4_U8, BT_FN_UV2DI_UV2DI_UV2DI_UV2DI_INT) ++ ++OB_DEF (s390_vec_sll, s390_vec_sll_u8q, s390_vec_sll_b64s, B_VX, BT_FN_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_sll_u8q, s390_vsl, 0, BT_OV_UV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_sll_u8h, s390_vsl, 0, BT_OV_UV16QI_UV16QI_UV8HI) ++OB_DEF_VAR (s390_vec_sll_u8s, s390_vsl, 0, BT_OV_UV16QI_UV16QI_UV4SI) ++OB_DEF_VAR (s390_vec_sll_s8q, s390_vsl, 0, BT_OV_V16QI_V16QI_UV16QI) ++OB_DEF_VAR (s390_vec_sll_s8h, s390_vsl, 0, BT_OV_V16QI_V16QI_UV8HI) ++OB_DEF_VAR (s390_vec_sll_s8s, s390_vsl, 0, BT_OV_V16QI_V16QI_UV4SI) ++OB_DEF_VAR (s390_vec_sll_b8q, s390_vsl, 0, BT_OV_BV16QI_BV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_sll_b8h, s390_vsl, 0, BT_OV_BV16QI_BV16QI_UV8HI) ++OB_DEF_VAR (s390_vec_sll_b8s, s390_vsl, 0, BT_OV_BV16QI_BV16QI_UV4SI) ++OB_DEF_VAR (s390_vec_sll_u16q, s390_vsl, 0, BT_OV_UV8HI_UV8HI_UV16QI) ++OB_DEF_VAR (s390_vec_sll_u16h, s390_vsl, 0, BT_OV_UV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_sll_u16s, s390_vsl, 0, BT_OV_UV8HI_UV8HI_UV4SI) ++OB_DEF_VAR (s390_vec_sll_s16q, s390_vsl, 0, BT_OV_V8HI_V8HI_UV16QI) ++OB_DEF_VAR (s390_vec_sll_s16h, s390_vsl, 0, BT_OV_V8HI_V8HI_UV8HI) ++OB_DEF_VAR (s390_vec_sll_s16s, s390_vsl, 0, BT_OV_V8HI_V8HI_UV4SI) ++OB_DEF_VAR (s390_vec_sll_b16q, s390_vsl, 0, BT_OV_BV8HI_BV8HI_UV16QI) ++OB_DEF_VAR (s390_vec_sll_b16h, s390_vsl, 0, BT_OV_BV8HI_BV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_sll_b16s, s390_vsl, 0, BT_OV_BV8HI_BV8HI_UV4SI) ++OB_DEF_VAR (s390_vec_sll_u32q, s390_vsl, 0, BT_OV_UV4SI_UV4SI_UV16QI) ++OB_DEF_VAR (s390_vec_sll_u32h, s390_vsl, 0, BT_OV_UV4SI_UV4SI_UV8HI) ++OB_DEF_VAR (s390_vec_sll_u32s, s390_vsl, 0, BT_OV_UV4SI_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_sll_s32q, s390_vsl, 0, BT_OV_V4SI_V4SI_UV16QI) ++OB_DEF_VAR (s390_vec_sll_s32h, s390_vsl, 0, BT_OV_V4SI_V4SI_UV8HI) ++OB_DEF_VAR (s390_vec_sll_s32s, s390_vsl, 0, BT_OV_V4SI_V4SI_UV4SI) ++OB_DEF_VAR (s390_vec_sll_b32q, s390_vsl, 0, BT_OV_BV4SI_BV4SI_UV16QI) ++OB_DEF_VAR (s390_vec_sll_b32h, s390_vsl, 0, BT_OV_BV4SI_BV4SI_UV8HI) ++OB_DEF_VAR (s390_vec_sll_b32s, s390_vsl, 0, BT_OV_BV4SI_BV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_sll_u64q, s390_vsl, 0, BT_OV_UV2DI_UV2DI_UV16QI) ++OB_DEF_VAR (s390_vec_sll_u64h, s390_vsl, 0, BT_OV_UV2DI_UV2DI_UV8HI) ++OB_DEF_VAR (s390_vec_sll_u64s, s390_vsl, 0, BT_OV_UV2DI_UV2DI_UV4SI) ++OB_DEF_VAR (s390_vec_sll_s64q, s390_vsl, 0, BT_OV_V2DI_V2DI_UV16QI) ++OB_DEF_VAR (s390_vec_sll_s64h, s390_vsl, 0, BT_OV_V2DI_V2DI_UV8HI) ++OB_DEF_VAR (s390_vec_sll_s64s, s390_vsl, 0, BT_OV_V2DI_V2DI_UV4SI) ++OB_DEF_VAR (s390_vec_sll_b64q, s390_vsl, 0, BT_OV_BV2DI_BV2DI_UV16QI) ++OB_DEF_VAR (s390_vec_sll_b64h, s390_vsl, 0, BT_OV_BV2DI_BV2DI_UV8HI) ++OB_DEF_VAR (s390_vec_sll_b64s, s390_vsl, 0, BT_OV_BV2DI_BV2DI_UV4SI) ++ ++B_DEF (s390_vsl, vec_sllv16qiv16qi, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI) ++ ++OB_DEF (s390_vec_slb, s390_vec_slb_u8_u8, s390_vec_slb_dbl_s64,B_VX, BT_FN_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_slb_u8_u8, s390_vslb, 0, BT_OV_UV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_slb_u8_s8, s390_vslb, 0, BT_OV_UV16QI_UV16QI_V16QI) ++OB_DEF_VAR (s390_vec_slb_s8_u8, s390_vslb, 0, BT_OV_V16QI_V16QI_UV16QI) ++OB_DEF_VAR (s390_vec_slb_s8_s8, s390_vslb, 0, BT_OV_V16QI_V16QI_V16QI) ++OB_DEF_VAR (s390_vec_slb_u16_u16, s390_vslb, 0, BT_OV_UV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_slb_u16_s16, s390_vslb, 0, BT_OV_UV8HI_UV8HI_V8HI) ++OB_DEF_VAR (s390_vec_slb_s16_u16, s390_vslb, 0, BT_OV_V8HI_V8HI_UV8HI) ++OB_DEF_VAR (s390_vec_slb_s16_s16, s390_vslb, 0, BT_OV_V8HI_V8HI_V8HI) ++OB_DEF_VAR (s390_vec_slb_u32_u32, s390_vslb, 0, BT_OV_UV4SI_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_slb_u32_s32, s390_vslb, 0, BT_OV_UV4SI_UV4SI_V4SI) ++OB_DEF_VAR (s390_vec_slb_s32_u32, s390_vslb, 0, BT_OV_V4SI_V4SI_UV4SI) ++OB_DEF_VAR (s390_vec_slb_s32_s32, s390_vslb, 0, BT_OV_V4SI_V4SI_V4SI) ++OB_DEF_VAR (s390_vec_slb_u64_u64, s390_vslb, 0, BT_OV_UV2DI_UV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_slb_u64_s64, s390_vslb, 0, BT_OV_UV2DI_UV2DI_V2DI) ++OB_DEF_VAR (s390_vec_slb_s64_u64, s390_vslb, 0, BT_OV_V2DI_V2DI_UV2DI) ++OB_DEF_VAR (s390_vec_slb_s64_s64, s390_vslb, 0, BT_OV_V2DI_V2DI_V2DI) ++OB_DEF_VAR (s390_vec_slb_dbl_u64, s390_vslb, 0, BT_OV_V2DF_V2DF_UV2DI) ++OB_DEF_VAR (s390_vec_slb_dbl_s64, s390_vslb, 0, BT_OV_V2DF_V2DF_V2DI) ++ ++B_DEF (s390_vslb, vec_slbv16qi, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI) ++ ++OB_DEF (s390_vec_sld, s390_vec_sld_s8, s390_vec_sld_dbl, B_VX, BT_FN_OV4SI_OV4SI_OV4SI_ULONGLONG) ++OB_DEF_VAR (s390_vec_sld_s8, s390_vsldb, O3_U4, BT_OV_V16QI_V16QI_V16QI_ULONGLONG) ++OB_DEF_VAR (s390_vec_sld_u8, s390_vsldb, O3_U4, BT_OV_UV16QI_UV16QI_UV16QI_ULONGLONG) ++OB_DEF_VAR (s390_vec_sld_s16, s390_vsldb, O3_U4, BT_OV_V8HI_V8HI_V8HI_ULONGLONG) ++OB_DEF_VAR (s390_vec_sld_u16, s390_vsldb, O3_U4, BT_OV_UV8HI_UV8HI_UV8HI_ULONGLONG) ++OB_DEF_VAR (s390_vec_sld_s32, s390_vsldb, O3_U4, BT_OV_V4SI_V4SI_V4SI_ULONGLONG) ++OB_DEF_VAR (s390_vec_sld_u32, s390_vsldb, O3_U4, BT_OV_UV4SI_UV4SI_UV4SI_ULONGLONG) ++OB_DEF_VAR (s390_vec_sld_s64, s390_vsldb, O3_U4, BT_OV_V2DI_V2DI_V2DI_ULONGLONG) ++OB_DEF_VAR (s390_vec_sld_u64, s390_vsldb, O3_U4, BT_OV_UV2DI_UV2DI_UV2DI_ULONGLONG) ++OB_DEF_VAR (s390_vec_sld_dbl, s390_vsldb, O3_U4, BT_OV_V2DF_V2DF_V2DF_ULONGLONG) ++ ++B_DEF (s390_vsldb, vec_sldv16qi, 0, B_VX, O3_U4, BT_FN_UV16QI_UV16QI_UV16QI_INT) ++ ++OB_DEF (s390_vec_sldw, s390_vec_sldw_s8, s390_vec_sldw_dbl, B_VX, BT_FN_OV4SI_OV4SI_OV4SI_INT) ++OB_DEF_VAR (s390_vec_sldw_s8, s390_vsldb, O3_U4, BT_OV_V16QI_V16QI_V16QI_INT) ++OB_DEF_VAR (s390_vec_sldw_u8, s390_vsldb, O3_U4, BT_OV_UV16QI_UV16QI_UV16QI_INT) ++OB_DEF_VAR (s390_vec_sldw_s16, s390_vsldb, O3_U4, BT_OV_V8HI_V8HI_V8HI_INT) ++OB_DEF_VAR (s390_vec_sldw_u16, s390_vsldb, O3_U4, BT_OV_UV8HI_UV8HI_UV8HI_INT) ++OB_DEF_VAR (s390_vec_sldw_s32, s390_vsldb, O3_U4, BT_OV_V4SI_V4SI_V4SI_INT) ++OB_DEF_VAR (s390_vec_sldw_u32, s390_vsldb, O3_U4, BT_OV_UV4SI_UV4SI_UV4SI_INT) ++OB_DEF_VAR (s390_vec_sldw_s64, s390_vsldb, O3_U4, BT_OV_V2DI_V2DI_V2DI_INT) ++OB_DEF_VAR (s390_vec_sldw_u64, s390_vsldb, O3_U4, BT_OV_UV2DI_UV2DI_UV2DI_INT) ++OB_DEF_VAR (s390_vec_sldw_dbl, s390_vsldb, O3_U4, BT_OV_V2DF_V2DF_V2DF_INT) ++ ++OB_DEF (s390_vec_sral, s390_vec_sral_u8q, s390_vec_sral_b64s, B_VX, BT_FN_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_sral_u8q, s390_vsra, 0, BT_OV_UV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_sral_u8h, s390_vsra, 0, BT_OV_UV16QI_UV16QI_UV8HI) ++OB_DEF_VAR (s390_vec_sral_u8s, s390_vsra, 0, BT_OV_UV16QI_UV16QI_UV4SI) ++OB_DEF_VAR (s390_vec_sral_s8q, s390_vsra, 0, BT_OV_V16QI_V16QI_UV16QI) ++OB_DEF_VAR (s390_vec_sral_s8h, s390_vsra, 0, BT_OV_V16QI_V16QI_UV8HI) ++OB_DEF_VAR (s390_vec_sral_s8s, s390_vsra, 0, BT_OV_V16QI_V16QI_UV4SI) ++OB_DEF_VAR (s390_vec_sral_b8q, s390_vsra, 0, BT_OV_BV16QI_BV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_sral_b8h, s390_vsra, 0, BT_OV_BV16QI_BV16QI_UV8HI) ++OB_DEF_VAR (s390_vec_sral_b8s, s390_vsra, 0, BT_OV_BV16QI_BV16QI_UV4SI) ++OB_DEF_VAR (s390_vec_sral_u16q, s390_vsra, 0, BT_OV_UV8HI_UV8HI_UV16QI) ++OB_DEF_VAR (s390_vec_sral_u16h, s390_vsra, 0, BT_OV_UV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_sral_u16s, s390_vsra, 0, BT_OV_UV8HI_UV8HI_UV4SI) ++OB_DEF_VAR (s390_vec_sral_s16q, s390_vsra, 0, BT_OV_V8HI_V8HI_UV16QI) ++OB_DEF_VAR (s390_vec_sral_s16h, s390_vsra, 0, BT_OV_V8HI_V8HI_UV8HI) ++OB_DEF_VAR (s390_vec_sral_s16s, s390_vsra, 0, BT_OV_V8HI_V8HI_UV4SI) ++OB_DEF_VAR (s390_vec_sral_b16q, s390_vsra, 0, BT_OV_BV8HI_BV8HI_UV16QI) ++OB_DEF_VAR (s390_vec_sral_b16h, s390_vsra, 0, BT_OV_BV8HI_BV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_sral_b16s, s390_vsra, 0, BT_OV_BV8HI_BV8HI_UV4SI) ++OB_DEF_VAR (s390_vec_sral_u32q, s390_vsra, 0, BT_OV_UV4SI_UV4SI_UV16QI) ++OB_DEF_VAR (s390_vec_sral_u32h, s390_vsra, 0, BT_OV_UV4SI_UV4SI_UV8HI) ++OB_DEF_VAR (s390_vec_sral_u32s, s390_vsra, 0, BT_OV_UV4SI_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_sral_s32q, s390_vsra, 0, BT_OV_V4SI_V4SI_UV16QI) ++OB_DEF_VAR (s390_vec_sral_s32h, s390_vsra, 0, BT_OV_V4SI_V4SI_UV8HI) ++OB_DEF_VAR (s390_vec_sral_s32s, s390_vsra, 0, BT_OV_V4SI_V4SI_UV4SI) ++OB_DEF_VAR (s390_vec_sral_b32q, s390_vsra, 0, BT_OV_BV4SI_BV4SI_UV16QI) ++OB_DEF_VAR (s390_vec_sral_b32h, s390_vsra, 0, BT_OV_BV4SI_BV4SI_UV8HI) ++OB_DEF_VAR (s390_vec_sral_b32s, s390_vsra, 0, BT_OV_BV4SI_BV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_sral_u64q, s390_vsra, 0, BT_OV_UV2DI_UV2DI_UV16QI) ++OB_DEF_VAR (s390_vec_sral_u64h, s390_vsra, 0, BT_OV_UV2DI_UV2DI_UV8HI) ++OB_DEF_VAR (s390_vec_sral_u64s, s390_vsra, 0, BT_OV_UV2DI_UV2DI_UV4SI) ++OB_DEF_VAR (s390_vec_sral_s64q, s390_vsra, 0, BT_OV_V2DI_V2DI_UV16QI) ++OB_DEF_VAR (s390_vec_sral_s64h, s390_vsra, 0, BT_OV_V2DI_V2DI_UV8HI) ++OB_DEF_VAR (s390_vec_sral_s64s, s390_vsra, 0, BT_OV_V2DI_V2DI_UV4SI) ++OB_DEF_VAR (s390_vec_sral_b64q, s390_vsra, 0, BT_OV_BV2DI_BV2DI_UV16QI) ++OB_DEF_VAR (s390_vec_sral_b64h, s390_vsra, 0, BT_OV_BV2DI_BV2DI_UV8HI) ++OB_DEF_VAR (s390_vec_sral_b64s, s390_vsra, 0, BT_OV_BV2DI_BV2DI_UV4SI) ++ ++B_DEF (s390_vsra, vec_sralv16qiv16qi, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI) ++ ++OB_DEF (s390_vec_srab, s390_vec_srab_u8_u8,s390_vec_srab_dbl_s64,B_VX, BT_FN_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_srab_u8_u8, s390_vsrab, 0, BT_OV_UV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_srab_u8_s8, s390_vsrab, 0, BT_OV_UV16QI_UV16QI_V16QI) ++OB_DEF_VAR (s390_vec_srab_s8_u8, s390_vsrab, 0, BT_OV_V16QI_V16QI_UV16QI) ++OB_DEF_VAR (s390_vec_srab_s8_s8, s390_vsrab, 0, BT_OV_V16QI_V16QI_V16QI) ++OB_DEF_VAR (s390_vec_srab_u16_u16, s390_vsrab, 0, BT_OV_UV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_srab_u16_s16, s390_vsrab, 0, BT_OV_UV8HI_UV8HI_V8HI) ++OB_DEF_VAR (s390_vec_srab_s16_u16, s390_vsrab, 0, BT_OV_V8HI_V8HI_UV8HI) ++OB_DEF_VAR (s390_vec_srab_s16_s16, s390_vsrab, 0, BT_OV_V8HI_V8HI_V8HI) ++OB_DEF_VAR (s390_vec_srab_u32_u32, s390_vsrab, 0, BT_OV_UV4SI_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_srab_u32_s32, s390_vsrab, 0, BT_OV_UV4SI_UV4SI_V4SI) ++OB_DEF_VAR (s390_vec_srab_s32_u32, s390_vsrab, 0, BT_OV_V4SI_V4SI_UV4SI) ++OB_DEF_VAR (s390_vec_srab_s32_s32, s390_vsrab, 0, BT_OV_V4SI_V4SI_V4SI) ++OB_DEF_VAR (s390_vec_srab_u64_u64, s390_vsrab, 0, BT_OV_UV2DI_UV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_srab_u64_s64, s390_vsrab, 0, BT_OV_UV2DI_UV2DI_V2DI) ++OB_DEF_VAR (s390_vec_srab_s64_u64, s390_vsrab, 0, BT_OV_V2DI_V2DI_UV2DI) ++OB_DEF_VAR (s390_vec_srab_s64_s64, s390_vsrab, 0, BT_OV_V2DI_V2DI_V2DI) ++OB_DEF_VAR (s390_vec_srab_dbl_u64, s390_vsrab, 0, BT_OV_V2DF_V2DF_UV2DI) ++OB_DEF_VAR (s390_vec_srab_dbl_s64, s390_vsrab, 0, BT_OV_V2DF_V2DF_V2DI) ++ ++B_DEF (s390_vsrab, vec_srabv16qi, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI) ++ ++OB_DEF (s390_vec_srl, s390_vec_srl_u8q, s390_vec_srl_b64s, B_VX, BT_FN_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_srl_u8q, s390_vsrl, 0, BT_OV_UV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_srl_u8h, s390_vsrl, 0, BT_OV_UV16QI_UV16QI_UV8HI) ++OB_DEF_VAR (s390_vec_srl_u8s, s390_vsrl, 0, BT_OV_UV16QI_UV16QI_UV4SI) ++OB_DEF_VAR (s390_vec_srl_s8q, s390_vsrl, 0, BT_OV_V16QI_V16QI_UV16QI) ++OB_DEF_VAR (s390_vec_srl_s8h, s390_vsrl, 0, BT_OV_V16QI_V16QI_UV8HI) ++OB_DEF_VAR (s390_vec_srl_s8s, s390_vsrl, 0, BT_OV_V16QI_V16QI_UV4SI) ++OB_DEF_VAR (s390_vec_srl_b8q, s390_vsrl, 0, BT_OV_BV16QI_BV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_srl_b8h, s390_vsrl, 0, BT_OV_BV16QI_BV16QI_UV8HI) ++OB_DEF_VAR (s390_vec_srl_b8s, s390_vsrl, 0, BT_OV_BV16QI_BV16QI_UV4SI) ++OB_DEF_VAR (s390_vec_srl_u16q, s390_vsrl, 0, BT_OV_UV8HI_UV8HI_UV16QI) ++OB_DEF_VAR (s390_vec_srl_u16h, s390_vsrl, 0, BT_OV_UV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_srl_u16s, s390_vsrl, 0, BT_OV_UV8HI_UV8HI_UV4SI) ++OB_DEF_VAR (s390_vec_srl_s16q, s390_vsrl, 0, BT_OV_V8HI_V8HI_UV16QI) ++OB_DEF_VAR (s390_vec_srl_s16h, s390_vsrl, 0, BT_OV_V8HI_V8HI_UV8HI) ++OB_DEF_VAR (s390_vec_srl_s16s, s390_vsrl, 0, BT_OV_V8HI_V8HI_UV4SI) ++OB_DEF_VAR (s390_vec_srl_b16q, s390_vsrl, 0, BT_OV_BV8HI_BV8HI_UV16QI) ++OB_DEF_VAR (s390_vec_srl_b16h, s390_vsrl, 0, BT_OV_BV8HI_BV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_srl_b16s, s390_vsrl, 0, BT_OV_BV8HI_BV8HI_UV4SI) ++OB_DEF_VAR (s390_vec_srl_u32q, s390_vsrl, 0, BT_OV_UV4SI_UV4SI_UV16QI) ++OB_DEF_VAR (s390_vec_srl_u32h, s390_vsrl, 0, BT_OV_UV4SI_UV4SI_UV8HI) ++OB_DEF_VAR (s390_vec_srl_u32s, s390_vsrl, 0, BT_OV_UV4SI_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_srl_s32q, s390_vsrl, 0, BT_OV_V4SI_V4SI_UV16QI) ++OB_DEF_VAR (s390_vec_srl_s32h, s390_vsrl, 0, BT_OV_V4SI_V4SI_UV8HI) ++OB_DEF_VAR (s390_vec_srl_s32s, s390_vsrl, 0, BT_OV_V4SI_V4SI_UV4SI) ++OB_DEF_VAR (s390_vec_srl_b32q, s390_vsrl, 0, BT_OV_BV4SI_BV4SI_UV16QI) ++OB_DEF_VAR (s390_vec_srl_b32h, s390_vsrl, 0, BT_OV_BV4SI_BV4SI_UV8HI) ++OB_DEF_VAR (s390_vec_srl_b32s, s390_vsrl, 0, BT_OV_BV4SI_BV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_srl_u64q, s390_vsrl, 0, BT_OV_UV2DI_UV2DI_UV16QI) ++OB_DEF_VAR (s390_vec_srl_u64h, s390_vsrl, 0, BT_OV_UV2DI_UV2DI_UV8HI) ++OB_DEF_VAR (s390_vec_srl_u64s, s390_vsrl, 0, BT_OV_UV2DI_UV2DI_UV4SI) ++OB_DEF_VAR (s390_vec_srl_s64q, s390_vsrl, 0, BT_OV_V2DI_V2DI_UV16QI) ++OB_DEF_VAR (s390_vec_srl_s64h, s390_vsrl, 0, BT_OV_V2DI_V2DI_UV8HI) ++OB_DEF_VAR (s390_vec_srl_s64s, s390_vsrl, 0, BT_OV_V2DI_V2DI_UV4SI) ++OB_DEF_VAR (s390_vec_srl_b64q, s390_vsrl, 0, BT_OV_BV2DI_BV2DI_UV16QI) ++OB_DEF_VAR (s390_vec_srl_b64h, s390_vsrl, 0, BT_OV_BV2DI_BV2DI_UV8HI) ++OB_DEF_VAR (s390_vec_srl_b64s, s390_vsrl, 0, BT_OV_BV2DI_BV2DI_UV4SI) ++ ++B_DEF (s390_vsrl, vec_srlv16qiv16qi, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI) ++ ++OB_DEF (s390_vec_srb, s390_vec_srb_u8_u8, s390_vec_srb_dbl_s64,B_VX, BT_FN_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_srb_u8_u8, s390_vsrlb, 0, BT_OV_UV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_srb_u8_s8, s390_vsrlb, 0, BT_OV_UV16QI_UV16QI_V16QI) ++OB_DEF_VAR (s390_vec_srb_s8_u8, s390_vsrlb, 0, BT_OV_V16QI_V16QI_UV16QI) ++OB_DEF_VAR (s390_vec_srb_s8_s8, s390_vsrlb, 0, BT_OV_V16QI_V16QI_V16QI) ++OB_DEF_VAR (s390_vec_srb_u16_u16, s390_vsrlb, 0, BT_OV_UV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_srb_u16_s16, s390_vsrlb, 0, BT_OV_UV8HI_UV8HI_V8HI) ++OB_DEF_VAR (s390_vec_srb_s16_u16, s390_vsrlb, 0, BT_OV_V8HI_V8HI_UV8HI) ++OB_DEF_VAR (s390_vec_srb_s16_s16, s390_vsrlb, 0, BT_OV_V8HI_V8HI_V8HI) ++OB_DEF_VAR (s390_vec_srb_u32_u32, s390_vsrlb, 0, BT_OV_UV4SI_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_srb_u32_s32, s390_vsrlb, 0, BT_OV_UV4SI_UV4SI_V4SI) ++OB_DEF_VAR (s390_vec_srb_s32_u32, s390_vsrlb, 0, BT_OV_V4SI_V4SI_UV4SI) ++OB_DEF_VAR (s390_vec_srb_s32_s32, s390_vsrlb, 0, BT_OV_V4SI_V4SI_V4SI) ++OB_DEF_VAR (s390_vec_srb_u64_u64, s390_vsrlb, 0, BT_OV_UV2DI_UV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_srb_u64_s64, s390_vsrlb, 0, BT_OV_UV2DI_UV2DI_V2DI) ++OB_DEF_VAR (s390_vec_srb_s64_u64, s390_vsrlb, 0, BT_OV_V2DI_V2DI_UV2DI) ++OB_DEF_VAR (s390_vec_srb_s64_s64, s390_vsrlb, 0, BT_OV_V2DI_V2DI_V2DI) ++OB_DEF_VAR (s390_vec_srb_dbl_u64, s390_vsrlb, 0, BT_OV_V2DF_V2DF_UV2DI) ++OB_DEF_VAR (s390_vec_srb_dbl_s64, s390_vsrlb, 0, BT_OV_V2DF_V2DF_V2DI) ++ ++B_DEF (s390_vsrlb, vec_srbv16qi, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI) ++B_DEF (s390_vsq, vec_sub_u128, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI) ++ ++OB_DEF (s390_vec_subc, s390_vec_subc_u8, s390_vec_subc_u64, B_VX, BT_FN_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_subc_u8, s390_vscbib, 0, BT_OV_UV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_subc_u16, s390_vscbih, 0, BT_OV_UV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_subc_u32, s390_vscbif, 0, BT_OV_UV4SI_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_subc_u64, s390_vscbig, 0, BT_OV_UV2DI_UV2DI_UV2DI) ++ ++B_DEF (s390_vscbib, vec_subcv16qi, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI) ++B_DEF (s390_vscbih, vec_subcv8hi, 0, B_VX, 0, BT_FN_UV8HI_UV8HI_UV8HI) ++B_DEF (s390_vscbif, vec_subcv4si, 0, B_VX, 0, BT_FN_UV4SI_UV4SI_UV4SI) ++B_DEF (s390_vscbig, vec_subcv2di, 0, B_VX, 0, BT_FN_UV2DI_UV2DI_UV2DI) ++B_DEF (s390_vscbiq, vec_subc_u128, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI) ++B_DEF (s390_vsbiq, vec_sube_u128, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI_UV16QI) ++B_DEF (s390_vsbcbiq, vec_subec_u128, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI_UV16QI) ++ ++OB_DEF (s390_vec_sum2, s390_vec_sum2_u16, s390_vec_sum2_u32, B_VX, BT_FN_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_sum2_u16, s390_vsumgh, 0, BT_OV_UV2DI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_sum2_u32, s390_vsumgf, 0, BT_OV_UV2DI_UV4SI_UV4SI) ++ ++B_DEF (s390_vsumgh, vec_sum2v8hi, 0, B_VX, 0, BT_FN_UV2DI_UV8HI_UV8HI) ++B_DEF (s390_vsumgf, vec_sum2v4si, 0, B_VX, 0, BT_FN_UV2DI_UV4SI_UV4SI) ++ ++OB_DEF (s390_vec_sum_u128, s390_vec_sum_u128_u32,s390_vec_sum_u128_u64,B_VX, BT_FN_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_sum_u128_u32, s390_vsumqf, 0, BT_OV_UV16QI_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_sum_u128_u64, s390_vsumqg, 0, BT_OV_UV16QI_UV2DI_UV2DI) ++ ++B_DEF (s390_vsumqf, vec_sum_u128v4si, 0, B_VX, 0, BT_FN_UV16QI_UV4SI_UV4SI) ++B_DEF (s390_vsumqg, vec_sum_u128v2di, 0, B_VX, 0, BT_FN_UV16QI_UV2DI_UV2DI) ++ ++OB_DEF (s390_vec_sum4, s390_vec_sum4_u8, s390_vec_sum4_u16, B_VX, BT_FN_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_sum4_u8, s390_vsumb, 0, BT_OV_UV4SI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_sum4_u16, s390_vsumh, 0, BT_OV_UV4SI_UV8HI_UV8HI) ++ ++B_DEF (s390_vsumb, vec_sum4v16qi, 0, B_VX, 0, BT_FN_UV4SI_UV16QI_UV16QI) ++B_DEF (s390_vsumh, vec_sum4v8hi, 0, B_VX, 0, BT_FN_UV4SI_UV8HI_UV8HI) ++ ++OB_DEF (s390_vec_test_mask, s390_vec_test_mask_s8,s390_vec_test_mask_dbl,B_VX, BT_FN_INT_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vec_test_mask_s8, s390_vtm, 0, BT_OV_INT_V16QI_UV16QI) ++OB_DEF_VAR (s390_vec_test_mask_u8, s390_vtm, 0, BT_OV_INT_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vec_test_mask_s16, s390_vtm, 0, BT_OV_INT_V8HI_UV8HI) ++OB_DEF_VAR (s390_vec_test_mask_u16, s390_vtm, 0, BT_OV_INT_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vec_test_mask_s32, s390_vtm, 0, BT_OV_INT_V4SI_UV4SI) ++OB_DEF_VAR (s390_vec_test_mask_u32, s390_vtm, 0, BT_OV_INT_UV4SI_UV4SI) ++OB_DEF_VAR (s390_vec_test_mask_s64, s390_vtm, 0, BT_OV_INT_V2DI_UV2DI) ++OB_DEF_VAR (s390_vec_test_mask_u64, s390_vtm, 0, BT_OV_INT_UV2DI_UV2DI) ++OB_DEF_VAR (s390_vec_test_mask_dbl, s390_vtm, 0, BT_OV_INT_V2DF_UV2DI) ++ ++B_DEF (s390_vtm, vec_test_mask_intv16qi,0, B_VX, 0, BT_FN_INT_UV16QI_UV16QI) ++B_DEF (s390_vfaeb, vfaev16qi, 0, B_VX, O3_U4, BT_FN_UV16QI_UV16QI_UV16QI_INT) ++B_DEF (s390_vfaeh, vfaev8hi, 0, B_VX, O3_U4, BT_FN_UV8HI_UV8HI_UV8HI_INT) ++B_DEF (s390_vfaef, vfaev4si, 0, B_VX, O3_U4, BT_FN_UV4SI_UV4SI_UV4SI_INT) ++B_DEF (s390_vfaezb, vfaezv16qi, 0, B_VX, O3_U4, BT_FN_UV16QI_UV16QI_UV16QI_INT) ++B_DEF (s390_vfaezh, vfaezv8hi, 0, B_VX, O3_U4, BT_FN_UV8HI_UV8HI_UV8HI_INT) ++B_DEF (s390_vfaezf, vfaezv4si, 0, B_VX, O3_U4, BT_FN_UV4SI_UV4SI_UV4SI_INT) ++B_DEF (s390_vfaebs, vfaesv16qi, 0, B_VX, O3_U4, BT_FN_UV16QI_UV16QI_UV16QI_INT_INTPTR) ++B_DEF (s390_vfaehs, vfaesv8hi, 0, B_VX, O3_U4, BT_FN_UV8HI_UV8HI_UV8HI_INT_INTPTR) ++B_DEF (s390_vfaefs, vfaesv4si, 0, B_VX, O3_U4, BT_FN_UV4SI_UV4SI_UV4SI_INT_INTPTR) ++B_DEF (s390_vfaezbs, vfaezsv16qi, 0, B_VX, O3_U4, BT_FN_UV16QI_UV16QI_UV16QI_INT_INTPTR) ++B_DEF (s390_vfaezhs, vfaezsv8hi, 0, B_VX, O3_U4, BT_FN_UV8HI_UV8HI_UV8HI_INT_INTPTR) ++B_DEF (s390_vfaezfs, vfaezsv4si, 0, B_VX, O3_U4, BT_FN_UV4SI_UV4SI_UV4SI_INT_INTPTR) ++ ++OB_DEF (s390_vec_find_any_eq_idx, s390_vfaeb_idx_s8, s390_vfaef_idx_u32b,B_VX, BT_FN_INT_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vfaeb_idx_s8, s390_vfaeb, 0, BT_OV_V16QI_V16QI_V16QI) ++OB_DEF_VAR (s390_vfaeb_idx_u8a, s390_vfaeb, 0, BT_OV_UV16QI_BV16QI_BV16QI) ++OB_DEF_VAR (s390_vfaeb_idx_u8b, s390_vfaeb, 0, BT_OV_UV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vfaeh_idx_s16, s390_vfaeh, 0, BT_OV_V8HI_V8HI_V8HI) ++OB_DEF_VAR (s390_vfaeh_idx_u16a, s390_vfaeh, 0, BT_OV_UV8HI_BV8HI_BV8HI) ++OB_DEF_VAR (s390_vfaeh_idx_u16b, s390_vfaeh, 0, BT_OV_UV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vfaef_idx_s32, s390_vfaef, 0, BT_OV_V4SI_V4SI_V4SI) ++OB_DEF_VAR (s390_vfaef_idx_u32a, s390_vfaef, 0, BT_OV_UV4SI_BV4SI_BV4SI) ++OB_DEF_VAR (s390_vfaef_idx_u32b, s390_vfaef, 0, BT_OV_UV4SI_UV4SI_UV4SI) ++ ++OB_DEF (s390_vec_find_any_ne_idx, s390_vfaeb_inv_idx_s8,s390_vfaef_inv_idx_u32b,B_VX, BT_FN_INT_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vfaeb_inv_idx_s8, s390_vfaeb, 0, BT_OV_V16QI_V16QI_V16QI) ++OB_DEF_VAR (s390_vfaeb_inv_idx_u8a, s390_vfaeb, 0, BT_OV_UV16QI_BV16QI_BV16QI) ++OB_DEF_VAR (s390_vfaeb_inv_idx_u8b, s390_vfaeb, 0, BT_OV_UV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vfaeh_inv_idx_s16, s390_vfaeh, 0, BT_OV_V8HI_V8HI_V8HI) ++OB_DEF_VAR (s390_vfaeh_inv_idx_u16a, s390_vfaeh, 0, BT_OV_UV8HI_BV8HI_BV8HI) ++OB_DEF_VAR (s390_vfaeh_inv_idx_u16b, s390_vfaeh, 0, BT_OV_UV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vfaef_inv_idx_s32, s390_vfaef, 0, BT_OV_V4SI_V4SI_V4SI) ++OB_DEF_VAR (s390_vfaef_inv_idx_u32a, s390_vfaef, 0, BT_OV_UV4SI_BV4SI_BV4SI) ++OB_DEF_VAR (s390_vfaef_inv_idx_u32b, s390_vfaef, 0, BT_OV_UV4SI_UV4SI_UV4SI) ++ ++OB_DEF (s390_vec_find_any_eq_or_0_idx,s390_vfaezb_idx_s8,s390_vfaezf_idx_u32b,B_VX, BT_FN_INT_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vfaezb_idx_s8, s390_vfaezb, 0, BT_OV_V16QI_V16QI_V16QI) ++OB_DEF_VAR (s390_vfaezb_idx_u8a, s390_vfaezb, 0, BT_OV_UV16QI_BV16QI_BV16QI) ++OB_DEF_VAR (s390_vfaezb_idx_u8b, s390_vfaezb, 0, BT_OV_UV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vfaezh_idx_s16, s390_vfaezh, 0, BT_OV_V8HI_V8HI_V8HI) ++OB_DEF_VAR (s390_vfaezh_idx_u16a, s390_vfaezh, 0, BT_OV_UV8HI_BV8HI_BV8HI) ++OB_DEF_VAR (s390_vfaezh_idx_u16b, s390_vfaezh, 0, BT_OV_UV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vfaezf_idx_s32, s390_vfaezf, 0, BT_OV_V4SI_V4SI_V4SI) ++OB_DEF_VAR (s390_vfaezf_idx_u32a, s390_vfaezf, 0, BT_OV_UV4SI_BV4SI_BV4SI) ++OB_DEF_VAR (s390_vfaezf_idx_u32b, s390_vfaezf, 0, BT_OV_UV4SI_UV4SI_UV4SI) ++ ++OB_DEF (s390_vec_find_any_ne_or_0_idx,s390_vfaezb_inv_idx_s8,s390_vfaezf_inv_idx_u32b,B_VX, BT_FN_INT_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vfaezb_inv_idx_s8, s390_vfaezb, 0, BT_OV_V16QI_V16QI_V16QI) ++OB_DEF_VAR (s390_vfaezb_inv_idx_u8a, s390_vfaezb, 0, BT_OV_UV16QI_BV16QI_BV16QI) ++OB_DEF_VAR (s390_vfaezb_inv_idx_u8b, s390_vfaezb, 0, BT_OV_UV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vfaezh_inv_idx_s16, s390_vfaezh, 0, BT_OV_V8HI_V8HI_V8HI) ++OB_DEF_VAR (s390_vfaezh_inv_idx_u16a, s390_vfaezh, 0, BT_OV_UV8HI_BV8HI_BV8HI) ++OB_DEF_VAR (s390_vfaezh_inv_idx_u16b, s390_vfaezh, 0, BT_OV_UV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vfaezf_inv_idx_s32, s390_vfaezf, 0, BT_OV_V4SI_V4SI_V4SI) ++OB_DEF_VAR (s390_vfaezf_inv_idx_u32a, s390_vfaezf, 0, BT_OV_UV4SI_BV4SI_BV4SI) ++OB_DEF_VAR (s390_vfaezf_inv_idx_u32b, s390_vfaezf, 0, BT_OV_UV4SI_UV4SI_UV4SI) ++ ++OB_DEF (s390_vec_find_any_eq, s390_vfaeb_s8, s390_vfaef_b32, B_VX, BT_FN_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vfaeb_s8, s390_vfaeb, 0, BT_OV_BV16QI_V16QI_V16QI) ++OB_DEF_VAR (s390_vfaeb_u8, s390_vfaeb, 0, BT_OV_BV16QI_BV16QI_BV16QI) ++OB_DEF_VAR (s390_vfaeb_b8, s390_vfaeb, 0, BT_OV_BV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vfaeh_s16, s390_vfaeh, 0, BT_OV_BV8HI_V8HI_V8HI) ++OB_DEF_VAR (s390_vfaeh_u16, s390_vfaeh, 0, BT_OV_BV8HI_BV8HI_BV8HI) ++OB_DEF_VAR (s390_vfaeh_b16, s390_vfaeh, 0, BT_OV_BV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vfaef_s32, s390_vfaef, 0, BT_OV_BV4SI_V4SI_V4SI) ++OB_DEF_VAR (s390_vfaef_u32, s390_vfaef, 0, BT_OV_BV4SI_BV4SI_BV4SI) ++OB_DEF_VAR (s390_vfaef_b32, s390_vfaef, 0, BT_OV_BV4SI_UV4SI_UV4SI) ++ ++OB_DEF (s390_vec_find_any_ne, s390_vfaeb_inv_s8, s390_vfaef_inv_b32, B_VX, BT_FN_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vfaeb_inv_s8, s390_vfaeb, 0, BT_OV_BV16QI_V16QI_V16QI) ++OB_DEF_VAR (s390_vfaeb_inv_u8, s390_vfaeb, 0, BT_OV_BV16QI_BV16QI_BV16QI) ++OB_DEF_VAR (s390_vfaeb_inv_b8, s390_vfaeb, 0, BT_OV_BV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vfaeh_inv_s16, s390_vfaeh, 0, BT_OV_BV8HI_V8HI_V8HI) ++OB_DEF_VAR (s390_vfaeh_inv_u16, s390_vfaeh, 0, BT_OV_BV8HI_BV8HI_BV8HI) ++OB_DEF_VAR (s390_vfaeh_inv_b16, s390_vfaeh, 0, BT_OV_BV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vfaef_inv_s32, s390_vfaef, 0, BT_OV_BV4SI_V4SI_V4SI) ++OB_DEF_VAR (s390_vfaef_inv_u32, s390_vfaef, 0, BT_OV_BV4SI_BV4SI_BV4SI) ++OB_DEF_VAR (s390_vfaef_inv_b32, s390_vfaef, 0, BT_OV_BV4SI_UV4SI_UV4SI) ++ ++OB_DEF (s390_vec_find_any_eq_idx_cc,s390_vfaebs_idx_s8, s390_vfaefs_idx_u32b,B_VX, BT_FN_INT_OV4SI_OV4SI_INTPTR) ++OB_DEF_VAR (s390_vfaebs_idx_s8, s390_vfaebs, 0, BT_OV_V16QI_V16QI_V16QI_INTPTR) ++OB_DEF_VAR (s390_vfaebs_idx_u8a, s390_vfaebs, 0, BT_OV_UV16QI_BV16QI_BV16QI_INTPTR) ++OB_DEF_VAR (s390_vfaebs_idx_u8b, s390_vfaebs, 0, BT_OV_UV16QI_UV16QI_UV16QI_INTPTR) ++OB_DEF_VAR (s390_vfaehs_idx_s16, s390_vfaehs, 0, BT_OV_V8HI_V8HI_V8HI_INTPTR) ++OB_DEF_VAR (s390_vfaehs_idx_u16a, s390_vfaehs, 0, BT_OV_UV8HI_BV8HI_BV8HI_INTPTR) ++OB_DEF_VAR (s390_vfaehs_idx_u16b, s390_vfaehs, 0, BT_OV_UV8HI_UV8HI_UV8HI_INTPTR) ++OB_DEF_VAR (s390_vfaefs_idx_s32, s390_vfaefs, 0, BT_OV_V4SI_V4SI_V4SI_INTPTR) ++OB_DEF_VAR (s390_vfaefs_idx_u32a, s390_vfaefs, 0, BT_OV_UV4SI_BV4SI_BV4SI_INTPTR) ++OB_DEF_VAR (s390_vfaefs_idx_u32b, s390_vfaefs, 0, BT_OV_UV4SI_UV4SI_UV4SI_INTPTR) ++ ++OB_DEF (s390_vec_find_any_ne_idx_cc,s390_vfaebs_inv_idx_s8,s390_vfaefs_inv_idx_u32b,B_VX, BT_FN_INT_OV4SI_OV4SI_INTPTR) ++OB_DEF_VAR (s390_vfaebs_inv_idx_s8, s390_vfaebs, 0, BT_OV_V16QI_V16QI_V16QI_INTPTR) ++OB_DEF_VAR (s390_vfaebs_inv_idx_u8a, s390_vfaebs, 0, BT_OV_UV16QI_BV16QI_BV16QI_INTPTR) ++OB_DEF_VAR (s390_vfaebs_inv_idx_u8b, s390_vfaebs, 0, BT_OV_UV16QI_UV16QI_UV16QI_INTPTR) ++OB_DEF_VAR (s390_vfaehs_inv_idx_s16, s390_vfaehs, 0, BT_OV_V8HI_V8HI_V8HI_INTPTR) ++OB_DEF_VAR (s390_vfaehs_inv_idx_u16a, s390_vfaehs, 0, BT_OV_UV8HI_BV8HI_BV8HI_INTPTR) ++OB_DEF_VAR (s390_vfaehs_inv_idx_u16b, s390_vfaehs, 0, BT_OV_UV8HI_UV8HI_UV8HI_INTPTR) ++OB_DEF_VAR (s390_vfaefs_inv_idx_s32, s390_vfaefs, 0, BT_OV_V4SI_V4SI_V4SI_INTPTR) ++OB_DEF_VAR (s390_vfaefs_inv_idx_u32a, s390_vfaefs, 0, BT_OV_UV4SI_BV4SI_BV4SI_INTPTR) ++OB_DEF_VAR (s390_vfaefs_inv_idx_u32b, s390_vfaefs, 0, BT_OV_UV4SI_UV4SI_UV4SI_INTPTR) ++ ++OB_DEF (s390_vec_find_any_eq_or_0_idx_cc,s390_vfaezbs_idx_s8,s390_vfaezfs_idx_u32b,B_VX, BT_FN_INT_OV4SI_OV4SI_INTPTR) ++OB_DEF_VAR (s390_vfaezbs_idx_s8, s390_vfaezbs, 0, BT_OV_V16QI_V16QI_V16QI_INTPTR) ++OB_DEF_VAR (s390_vfaezbs_idx_u8a, s390_vfaezbs, 0, BT_OV_UV16QI_BV16QI_BV16QI_INTPTR) ++OB_DEF_VAR (s390_vfaezbs_idx_u8b, s390_vfaezbs, 0, BT_OV_UV16QI_UV16QI_UV16QI_INTPTR) ++OB_DEF_VAR (s390_vfaezhs_idx_s16, s390_vfaezhs, 0, BT_OV_V8HI_V8HI_V8HI_INTPTR) ++OB_DEF_VAR (s390_vfaezhs_idx_u16a, s390_vfaezhs, 0, BT_OV_UV8HI_BV8HI_BV8HI_INTPTR) ++OB_DEF_VAR (s390_vfaezhs_idx_u16b, s390_vfaezhs, 0, BT_OV_UV8HI_UV8HI_UV8HI_INTPTR) ++OB_DEF_VAR (s390_vfaezfs_idx_s32, s390_vfaezfs, 0, BT_OV_V4SI_V4SI_V4SI_INTPTR) ++OB_DEF_VAR (s390_vfaezfs_idx_u32a, s390_vfaezfs, 0, BT_OV_UV4SI_BV4SI_BV4SI_INTPTR) ++OB_DEF_VAR (s390_vfaezfs_idx_u32b, s390_vfaezfs, 0, BT_OV_UV4SI_UV4SI_UV4SI_INTPTR) ++ ++OB_DEF (s390_vec_find_any_ne_or_0_idx_cc,s390_vfaezbs_inv_idx_s8,s390_vfaezfs_inv_idx_u32b,B_VX,BT_FN_INT_OV4SI_OV4SI_INTPTR) ++OB_DEF_VAR (s390_vfaezbs_inv_idx_s8, s390_vfaezbs, 0, BT_OV_V16QI_V16QI_V16QI_INTPTR) ++OB_DEF_VAR (s390_vfaezbs_inv_idx_u8a, s390_vfaezbs, 0, BT_OV_UV16QI_BV16QI_BV16QI_INTPTR) ++OB_DEF_VAR (s390_vfaezbs_inv_idx_u8b, s390_vfaezbs, 0, BT_OV_UV16QI_UV16QI_UV16QI_INTPTR) ++OB_DEF_VAR (s390_vfaezhs_inv_idx_s16, s390_vfaezhs, 0, BT_OV_V8HI_V8HI_V8HI_INTPTR) ++OB_DEF_VAR (s390_vfaezhs_inv_idx_u16a, s390_vfaezhs, 0, BT_OV_UV8HI_BV8HI_BV8HI_INTPTR) ++OB_DEF_VAR (s390_vfaezhs_inv_idx_u16b, s390_vfaezhs, 0, BT_OV_UV8HI_UV8HI_UV8HI_INTPTR) ++OB_DEF_VAR (s390_vfaezfs_inv_idx_s32, s390_vfaezfs, 0, BT_OV_V4SI_V4SI_V4SI_INTPTR) ++OB_DEF_VAR (s390_vfaezfs_inv_idx_u32a, s390_vfaezfs, 0, BT_OV_UV4SI_BV4SI_BV4SI_INTPTR) ++OB_DEF_VAR (s390_vfaezfs_inv_idx_u32b, s390_vfaezfs, 0, BT_OV_UV4SI_UV4SI_UV4SI_INTPTR) ++ ++OB_DEF (s390_vec_find_any_eq_cc, s390_vfaebs_s8, s390_vfaefs_b32, B_VX, BT_FN_OV4SI_OV4SI_OV4SI_INTPTR) ++OB_DEF_VAR (s390_vfaebs_s8, s390_vfaebs, 0, BT_OV_BV16QI_V16QI_V16QI_INTPTR) ++OB_DEF_VAR (s390_vfaebs_u8, s390_vfaebs, 0, BT_OV_BV16QI_BV16QI_BV16QI_INTPTR) ++OB_DEF_VAR (s390_vfaebs_b8, s390_vfaebs, 0, BT_OV_BV16QI_UV16QI_UV16QI_INTPTR) ++OB_DEF_VAR (s390_vfaehs_s16, s390_vfaehs, 0, BT_OV_BV8HI_V8HI_V8HI_INTPTR) ++OB_DEF_VAR (s390_vfaehs_u16, s390_vfaehs, 0, BT_OV_BV8HI_BV8HI_BV8HI_INTPTR) ++OB_DEF_VAR (s390_vfaehs_b16, s390_vfaehs, 0, BT_OV_BV8HI_UV8HI_UV8HI_INTPTR) ++OB_DEF_VAR (s390_vfaefs_s32, s390_vfaefs, 0, BT_OV_BV4SI_V4SI_V4SI_INTPTR) ++OB_DEF_VAR (s390_vfaefs_u32, s390_vfaefs, 0, BT_OV_BV4SI_BV4SI_BV4SI_INTPTR) ++OB_DEF_VAR (s390_vfaefs_b32, s390_vfaefs, 0, BT_OV_BV4SI_UV4SI_UV4SI_INTPTR) ++ ++OB_DEF (s390_vec_find_any_ne_cc, s390_vfaebs_inv_s8, s390_vfaefs_inv_b32,B_VX, BT_FN_OV4SI_OV4SI_OV4SI_INTPTR) ++OB_DEF_VAR (s390_vfaebs_inv_s8, s390_vfaebs, 0, BT_OV_BV16QI_V16QI_V16QI_INTPTR) ++OB_DEF_VAR (s390_vfaebs_inv_u8, s390_vfaebs, 0, BT_OV_BV16QI_BV16QI_BV16QI_INTPTR) ++OB_DEF_VAR (s390_vfaebs_inv_b8, s390_vfaebs, 0, BT_OV_BV16QI_UV16QI_UV16QI_INTPTR) ++OB_DEF_VAR (s390_vfaehs_inv_s16, s390_vfaehs, 0, BT_OV_BV8HI_V8HI_V8HI_INTPTR) ++OB_DEF_VAR (s390_vfaehs_inv_u16, s390_vfaehs, 0, BT_OV_BV8HI_BV8HI_BV8HI_INTPTR) ++OB_DEF_VAR (s390_vfaehs_inv_b16, s390_vfaehs, 0, BT_OV_BV8HI_UV8HI_UV8HI_INTPTR) ++OB_DEF_VAR (s390_vfaefs_inv_s32, s390_vfaefs, 0, BT_OV_BV4SI_V4SI_V4SI_INTPTR) ++OB_DEF_VAR (s390_vfaefs_inv_u32, s390_vfaefs, 0, BT_OV_BV4SI_BV4SI_BV4SI_INTPTR) ++OB_DEF_VAR (s390_vfaefs_inv_b32, s390_vfaefs, 0, BT_OV_BV4SI_UV4SI_UV4SI_INTPTR) ++ ++B_DEF (s390_vfeeb, vfeev16qi, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI) ++B_DEF (s390_vfeeh, vfeev8hi, 0, B_VX, 0, BT_FN_UV8HI_UV8HI_UV8HI) ++B_DEF (s390_vfeef, vfeev4si, 0, B_VX, 0, BT_FN_UV4SI_UV4SI_UV4SI) ++B_DEF (s390_vfeezb, vfeezv16qi, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI) ++B_DEF (s390_vfeezh, vfeezv8hi, 0, B_VX, 0, BT_FN_UV8HI_UV8HI_UV8HI) ++B_DEF (s390_vfeezf, vfeezv4si, 0, B_VX, 0, BT_FN_UV4SI_UV4SI_UV4SI) ++B_DEF (s390_vfeebs, vfeesv16qi, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI_INTPTR) ++B_DEF (s390_vfeehs, vfeesv8hi, 0, B_VX, 0, BT_FN_UV8HI_UV8HI_UV8HI_INTPTR) ++B_DEF (s390_vfeefs, vfeesv4si, 0, B_VX, 0, BT_FN_UV4SI_UV4SI_UV4SI_INTPTR) ++B_DEF (s390_vfeezbs, vfeezsv16qi, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI_INTPTR) ++B_DEF (s390_vfeezhs, vfeezsv8hi, 0, B_VX, 0, BT_FN_UV8HI_UV8HI_UV8HI_INTPTR) ++B_DEF (s390_vfeezfs, vfeezsv4si, 0, B_VX, 0, BT_FN_UV4SI_UV4SI_UV4SI_INTPTR) ++ ++OB_DEF (s390_vec_cmpeq_idx, s390_vfeeb_s8, s390_vfeef_u32b, B_VX, BT_FN_INT_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vfeeb_s8, s390_vfeeb, 0, BT_OV_V16QI_V16QI_V16QI) ++OB_DEF_VAR (s390_vfeeb_u8a, s390_vfeeb, 0, BT_OV_UV16QI_BV16QI_BV16QI) ++OB_DEF_VAR (s390_vfeeb_u8b, s390_vfeeb, 0, BT_OV_UV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vfeeh_s16, s390_vfeeh, 0, BT_OV_V8HI_V8HI_V8HI) ++OB_DEF_VAR (s390_vfeeh_u16a, s390_vfeeh, 0, BT_OV_UV8HI_BV8HI_BV8HI) ++OB_DEF_VAR (s390_vfeeh_u16b, s390_vfeeh, 0, BT_OV_UV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vfeef_s32, s390_vfeef, 0, BT_OV_V4SI_V4SI_V4SI) ++OB_DEF_VAR (s390_vfeef_u32a, s390_vfeef, 0, BT_OV_UV4SI_BV4SI_BV4SI) ++OB_DEF_VAR (s390_vfeef_u32b, s390_vfeef, 0, BT_OV_UV4SI_UV4SI_UV4SI) ++ ++OB_DEF (s390_vec_cmpeq_or_0_idx, s390_vfeezb_s8, s390_vfeezf_u32b, B_VX, BT_FN_INT_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vfeezb_s8, s390_vfeezb, 0, BT_OV_V16QI_V16QI_V16QI) ++OB_DEF_VAR (s390_vfeezb_u8a, s390_vfeezb, 0, BT_OV_UV16QI_BV16QI_BV16QI) ++OB_DEF_VAR (s390_vfeezb_u8b, s390_vfeezb, 0, BT_OV_UV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vfeezh_s16, s390_vfeezh, 0, BT_OV_V8HI_V8HI_V8HI) ++OB_DEF_VAR (s390_vfeezh_u16a, s390_vfeezh, 0, BT_OV_UV8HI_BV8HI_BV8HI) ++OB_DEF_VAR (s390_vfeezh_u16b, s390_vfeezh, 0, BT_OV_UV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vfeezf_s32, s390_vfeezf, 0, BT_OV_V4SI_V4SI_V4SI) ++OB_DEF_VAR (s390_vfeezf_u32a, s390_vfeezf, 0, BT_OV_UV4SI_BV4SI_BV4SI) ++OB_DEF_VAR (s390_vfeezf_u32b, s390_vfeezf, 0, BT_OV_UV4SI_UV4SI_UV4SI) ++ ++OB_DEF (s390_vec_cmpeq_idx_cc, s390_vfeebs_s8, s390_vfeefs_u32b, B_VX, BT_FN_INT_OV4SI_OV4SI_INTPTR) ++OB_DEF_VAR (s390_vfeebs_s8, s390_vfeebs, 0, BT_OV_V16QI_V16QI_V16QI_INTPTR) ++OB_DEF_VAR (s390_vfeebs_u8a, s390_vfeebs, 0, BT_OV_UV16QI_BV16QI_BV16QI_INTPTR) ++OB_DEF_VAR (s390_vfeebs_u8b, s390_vfeebs, 0, BT_OV_UV16QI_UV16QI_UV16QI_INTPTR) ++OB_DEF_VAR (s390_vfeehs_s16, s390_vfeehs, 0, BT_OV_V8HI_V8HI_V8HI_INTPTR) ++OB_DEF_VAR (s390_vfeehs_u16a, s390_vfeehs, 0, BT_OV_UV8HI_BV8HI_BV8HI_INTPTR) ++OB_DEF_VAR (s390_vfeehs_u16b, s390_vfeehs, 0, BT_OV_UV8HI_UV8HI_UV8HI_INTPTR) ++OB_DEF_VAR (s390_vfeefs_s32, s390_vfeefs, 0, BT_OV_V4SI_V4SI_V4SI_INTPTR) ++OB_DEF_VAR (s390_vfeefs_u32a, s390_vfeefs, 0, BT_OV_UV4SI_BV4SI_BV4SI_INTPTR) ++OB_DEF_VAR (s390_vfeefs_u32b, s390_vfeefs, 0, BT_OV_UV4SI_UV4SI_UV4SI_INTPTR) ++ ++OB_DEF (s390_vec_cmpeq_or_0_idx_cc, s390_vfeezbs_s8, s390_vfeezfs_u32b, B_VX, BT_FN_INT_OV4SI_OV4SI_INTPTR) ++OB_DEF_VAR (s390_vfeezbs_s8, s390_vfeezbs, 0, BT_OV_V16QI_V16QI_V16QI_INTPTR) ++OB_DEF_VAR (s390_vfeezbs_u8a, s390_vfeezbs, 0, BT_OV_UV16QI_BV16QI_BV16QI_INTPTR) ++OB_DEF_VAR (s390_vfeezbs_u8b, s390_vfeezbs, 0, BT_OV_UV16QI_UV16QI_UV16QI_INTPTR) ++OB_DEF_VAR (s390_vfeezhs_s16, s390_vfeezhs, 0, BT_OV_V8HI_V8HI_V8HI_INTPTR) ++OB_DEF_VAR (s390_vfeezhs_u16a, s390_vfeezhs, 0, BT_OV_UV8HI_BV8HI_BV8HI_INTPTR) ++OB_DEF_VAR (s390_vfeezhs_u16b, s390_vfeezhs, 0, BT_OV_UV8HI_UV8HI_UV8HI_INTPTR) ++OB_DEF_VAR (s390_vfeezfs_s32, s390_vfeezfs, 0, BT_OV_V4SI_V4SI_V4SI_INTPTR) ++OB_DEF_VAR (s390_vfeezfs_u32a, s390_vfeezfs, 0, BT_OV_UV4SI_BV4SI_BV4SI_INTPTR) ++OB_DEF_VAR (s390_vfeezfs_u32b, s390_vfeezfs, 0, BT_OV_UV4SI_UV4SI_UV4SI_INTPTR) ++ ++B_DEF (s390_vfeneb, vfenev16qi, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI) ++B_DEF (s390_vfeneh, vfenev8hi, 0, B_VX, 0, BT_FN_UV8HI_UV8HI_UV8HI) ++B_DEF (s390_vfenef, vfenev4si, 0, B_VX, 0, BT_FN_UV4SI_UV4SI_UV4SI) ++B_DEF (s390_vfenezb, vfenezv16qi, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI) ++B_DEF (s390_vfenezh, vfenezv8hi, 0, B_VX, 0, BT_FN_UV8HI_UV8HI_UV8HI) ++B_DEF (s390_vfenezf, vfenezv4si, 0, B_VX, 0, BT_FN_UV4SI_UV4SI_UV4SI) ++B_DEF (s390_vfenebs, vfenesv16qi, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI_INTPTR) ++B_DEF (s390_vfenehs, vfenesv8hi, 0, B_VX, 0, BT_FN_UV8HI_UV8HI_UV8HI_INTPTR) ++B_DEF (s390_vfenefs, vfenesv4si, 0, B_VX, 0, BT_FN_UV4SI_UV4SI_UV4SI_INTPTR) ++B_DEF (s390_vfenezbs, vfenezsv16qi, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_UV16QI_INTPTR) ++B_DEF (s390_vfenezhs, vfenezsv8hi, 0, B_VX, 0, BT_FN_UV8HI_UV8HI_UV8HI_INTPTR) ++B_DEF (s390_vfenezfs, vfenezsv4si, 0, B_VX, 0, BT_FN_UV4SI_UV4SI_UV4SI_INTPTR) ++ ++OB_DEF (s390_vec_cmpne_idx, s390_vfeneb_s8, s390_vfenef_u32b, B_VX, BT_FN_INT_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vfeneb_s8, s390_vfeneb, 0, BT_OV_V16QI_V16QI_V16QI) ++OB_DEF_VAR (s390_vfeneb_u8a, s390_vfeneb, 0, BT_OV_UV16QI_BV16QI_BV16QI) ++OB_DEF_VAR (s390_vfeneb_u8b, s390_vfeneb, 0, BT_OV_UV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vfeneh_s16, s390_vfeneh, 0, BT_OV_V8HI_V8HI_V8HI) ++OB_DEF_VAR (s390_vfeneh_u16a, s390_vfeneh, 0, BT_OV_UV8HI_BV8HI_BV8HI) ++OB_DEF_VAR (s390_vfeneh_u16b, s390_vfeneh, 0, BT_OV_UV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vfenef_s32, s390_vfenef, 0, BT_OV_V4SI_V4SI_V4SI) ++OB_DEF_VAR (s390_vfenef_u32a, s390_vfenef, 0, BT_OV_UV4SI_BV4SI_BV4SI) ++OB_DEF_VAR (s390_vfenef_u32b, s390_vfenef, 0, BT_OV_UV4SI_UV4SI_UV4SI) ++ ++OB_DEF (s390_vec_cmpne_or_0_idx, s390_vfenezb_s8, s390_vfenezf_u32b, B_VX, BT_FN_INT_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vfenezb_s8, s390_vfenezb, 0, BT_OV_V16QI_V16QI_V16QI) ++OB_DEF_VAR (s390_vfenezb_u8a, s390_vfenezb, 0, BT_OV_UV16QI_BV16QI_BV16QI) ++OB_DEF_VAR (s390_vfenezb_u8b, s390_vfenezb, 0, BT_OV_UV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vfenezh_s16, s390_vfenezh, 0, BT_OV_V8HI_V8HI_V8HI) ++OB_DEF_VAR (s390_vfenezh_u16a, s390_vfenezh, 0, BT_OV_UV8HI_BV8HI_BV8HI) ++OB_DEF_VAR (s390_vfenezh_u16b, s390_vfenezh, 0, BT_OV_UV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vfenezf_s32, s390_vfenezf, 0, BT_OV_V4SI_V4SI_V4SI) ++OB_DEF_VAR (s390_vfenezf_u32a, s390_vfenezf, 0, BT_OV_UV4SI_BV4SI_BV4SI) ++OB_DEF_VAR (s390_vfenezf_u32b, s390_vfenezf, 0, BT_OV_UV4SI_UV4SI_UV4SI) ++ ++OB_DEF (s390_vec_cmpne_idx_cc, s390_vfenebs_s8, s390_vfenefs_u32b, B_VX, BT_FN_INT_OV4SI_OV4SI_INTPTR) ++OB_DEF_VAR (s390_vfenebs_s8, s390_vfenebs, 0, BT_OV_V16QI_V16QI_V16QI_INTPTR) ++OB_DEF_VAR (s390_vfenebs_u8a, s390_vfenebs, 0, BT_OV_UV16QI_BV16QI_BV16QI_INTPTR) ++OB_DEF_VAR (s390_vfenebs_u8b, s390_vfenebs, 0, BT_OV_UV16QI_UV16QI_UV16QI_INTPTR) ++OB_DEF_VAR (s390_vfenehs_s16, s390_vfenehs, 0, BT_OV_V8HI_V8HI_V8HI_INTPTR) ++OB_DEF_VAR (s390_vfenehs_u16a, s390_vfenehs, 0, BT_OV_UV8HI_BV8HI_BV8HI_INTPTR) ++OB_DEF_VAR (s390_vfenehs_u16b, s390_vfenehs, 0, BT_OV_UV8HI_UV8HI_UV8HI_INTPTR) ++OB_DEF_VAR (s390_vfenefs_s32, s390_vfenefs, 0, BT_OV_V4SI_V4SI_V4SI_INTPTR) ++OB_DEF_VAR (s390_vfenefs_u32a, s390_vfenefs, 0, BT_OV_UV4SI_BV4SI_BV4SI_INTPTR) ++OB_DEF_VAR (s390_vfenefs_u32b, s390_vfenefs, 0, BT_OV_UV4SI_UV4SI_UV4SI_INTPTR) ++ ++OB_DEF (s390_vec_cmpne_or_0_idx_cc, s390_vfenezbs_s8, s390_vfenezfs_u32b, B_VX, BT_FN_INT_OV4SI_OV4SI_INTPTR) ++OB_DEF_VAR (s390_vfenezbs_s8, s390_vfenezbs, 0, BT_OV_V16QI_V16QI_V16QI_INTPTR) ++OB_DEF_VAR (s390_vfenezbs_u8a, s390_vfenezbs, 0, BT_OV_UV16QI_BV16QI_BV16QI_INTPTR) ++OB_DEF_VAR (s390_vfenezbs_u8b, s390_vfenezbs, 0, BT_OV_UV16QI_UV16QI_UV16QI_INTPTR) ++OB_DEF_VAR (s390_vfenezhs_s16, s390_vfenezhs, 0, BT_OV_V8HI_V8HI_V8HI_INTPTR) ++OB_DEF_VAR (s390_vfenezhs_u16a, s390_vfenezhs, 0, BT_OV_UV8HI_BV8HI_BV8HI_INTPTR) ++OB_DEF_VAR (s390_vfenezhs_u16b, s390_vfenezhs, 0, BT_OV_UV8HI_UV8HI_UV8HI_INTPTR) ++OB_DEF_VAR (s390_vfenezfs_s32, s390_vfenezfs, 0, BT_OV_V4SI_V4SI_V4SI_INTPTR) ++OB_DEF_VAR (s390_vfenezfs_u32a, s390_vfenezfs, 0, BT_OV_UV4SI_BV4SI_BV4SI_INTPTR) ++OB_DEF_VAR (s390_vfenezfs_u32b, s390_vfenezfs, 0, BT_OV_UV4SI_UV4SI_UV4SI_INTPTR) ++ ++B_DEF (s390_vistrb, vistrv16qi, 0, B_VX, 0, BT_FN_UV16QI_UV16QI) ++B_DEF (s390_vistrh, vistrv8hi, 0, B_VX, 0, BT_FN_UV8HI_UV8HI) ++B_DEF (s390_vistrf, vistrv4si, 0, B_VX, 0, BT_FN_UV4SI_UV4SI) ++B_DEF (s390_vistrbs, vistrsv16qi, 0, B_VX, 0, BT_FN_UV16QI_UV16QI_INTPTR) ++B_DEF (s390_vistrhs, vistrsv8hi, 0, B_VX, 0, BT_FN_UV8HI_UV8HI_INTPTR) ++B_DEF (s390_vistrfs, vistrsv4si, 0, B_VX, 0, BT_FN_UV4SI_UV4SI_INTPTR) ++ ++OB_DEF (s390_vec_cp_until_zero, s390_vistrb_s8, s390_vistrf_u32, B_VX, BT_FN_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vistrb_s8, s390_vistrb, 0, BT_OV_V16QI_V16QI) ++OB_DEF_VAR (s390_vistrb_b8, s390_vistrb, 0, BT_OV_BV16QI_BV16QI) ++OB_DEF_VAR (s390_vistrb_u8, s390_vistrb, 0, BT_OV_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vistrh_s16, s390_vistrh, 0, BT_OV_V8HI_V8HI) ++OB_DEF_VAR (s390_vistrh_b16, s390_vistrh, 0, BT_OV_BV8HI_BV8HI) ++OB_DEF_VAR (s390_vistrh_u16, s390_vistrh, 0, BT_OV_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vistrf_s32, s390_vistrf, 0, BT_OV_V4SI_V4SI) ++OB_DEF_VAR (s390_vistrf_b32, s390_vistrf, 0, BT_OV_BV4SI_BV4SI) ++OB_DEF_VAR (s390_vistrf_u32, s390_vistrf, 0, BT_OV_UV4SI_UV4SI) ++ ++OB_DEF (s390_vec_cp_until_zero_cc, s390_vistrbs_s8, s390_vistrfs_u32, B_VX, BT_FN_OV4SI_OV4SI_INTPTR) ++OB_DEF_VAR (s390_vistrbs_s8, s390_vistrbs, 0, BT_OV_V16QI_V16QI_INTPTR) ++OB_DEF_VAR (s390_vistrbs_b8, s390_vistrbs, 0, BT_OV_BV16QI_BV16QI_INTPTR) ++OB_DEF_VAR (s390_vistrbs_u8, s390_vistrbs, 0, BT_OV_UV16QI_UV16QI_INTPTR) ++OB_DEF_VAR (s390_vistrhs_s16, s390_vistrhs, 0, BT_OV_V8HI_V8HI_INTPTR) ++OB_DEF_VAR (s390_vistrhs_b16, s390_vistrhs, 0, BT_OV_BV8HI_BV8HI_INTPTR) ++OB_DEF_VAR (s390_vistrhs_u16, s390_vistrhs, 0, BT_OV_UV8HI_UV8HI_INTPTR) ++OB_DEF_VAR (s390_vistrfs_s32, s390_vistrfs, 0, BT_OV_V4SI_V4SI_INTPTR) ++OB_DEF_VAR (s390_vistrfs_b32, s390_vistrfs, 0, BT_OV_BV4SI_BV4SI_INTPTR) ++OB_DEF_VAR (s390_vistrfs_u32, s390_vistrfs, 0, BT_OV_UV4SI_UV4SI_INTPTR) ++ ++B_DEF (s390_vstrcb, vstrcv16qi, 0, B_VX, O4_U4, BT_FN_UV16QI_UV16QI_UV16QI_UV16QI_INT) ++B_DEF (s390_vstrch, vstrcv8hi, 0, B_VX, O4_U4, BT_FN_UV8HI_UV8HI_UV8HI_UV8HI_INT) ++B_DEF (s390_vstrcf, vstrcv4si, 0, B_VX, O4_U4, BT_FN_UV4SI_UV4SI_UV4SI_UV4SI_INT) ++B_DEF (s390_vstrczb, vstrczv16qi, 0, B_VX, O4_U4, BT_FN_UV16QI_UV16QI_UV16QI_UV16QI_INT) ++B_DEF (s390_vstrczh, vstrczv8hi, 0, B_VX, O4_U4, BT_FN_UV8HI_UV8HI_UV8HI_UV8HI_INT) ++B_DEF (s390_vstrczf, vstrczv4si, 0, B_VX, O4_U4, BT_FN_UV4SI_UV4SI_UV4SI_UV4SI_INT) ++B_DEF (s390_vstrcbs, vstrcsv16qi, 0, B_VX, O4_U4, BT_FN_UV16QI_UV16QI_UV16QI_UV16QI_INT_INTPTR) ++B_DEF (s390_vstrchs, vstrcsv8hi, 0, B_VX, O4_U4, BT_FN_UV8HI_UV8HI_UV8HI_UV8HI_INT_INTPTR) ++B_DEF (s390_vstrcfs, vstrcsv4si, 0, B_VX, O4_U4, BT_FN_UV4SI_UV4SI_UV4SI_UV4SI_INT_INTPTR) ++B_DEF (s390_vstrczbs, vstrczsv16qi, 0, B_VX, O4_U4, BT_FN_UV16QI_UV16QI_UV16QI_UV16QI_INT_INTPTR) ++B_DEF (s390_vstrczhs, vstrczsv8hi, 0, B_VX, O4_U4, BT_FN_UV8HI_UV8HI_UV8HI_UV8HI_INT_INTPTR) ++B_DEF (s390_vstrczfs, vstrczsv4si, 0, B_VX, O4_U4, BT_FN_UV4SI_UV4SI_UV4SI_UV4SI_INT_INTPTR) ++ ++OB_DEF (s390_vec_cmprg_idx, s390_vstrcb_idx_u8, s390_vstrcf_idx_u32,B_VX, BT_FN_OV4SI_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vstrcb_idx_u8, s390_vstrcb, 0, BT_OV_UV16QI_UV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vstrch_idx_u16, s390_vstrch, 0, BT_OV_UV8HI_UV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vstrcf_idx_u32, s390_vstrcf, 0, BT_OV_UV4SI_UV4SI_UV4SI_UV4SI) ++ ++OB_DEF (s390_vec_cmpnrg_idx, s390_vstrcb_inv_idx_u8,s390_vstrcf_inv_idx_u32,B_VX, BT_FN_OV4SI_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vstrcb_inv_idx_u8, s390_vstrcb, 0, BT_OV_UV16QI_UV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vstrch_inv_idx_u16, s390_vstrch, 0, BT_OV_UV8HI_UV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vstrcf_inv_idx_u32, s390_vstrcf, 0, BT_OV_UV4SI_UV4SI_UV4SI_UV4SI) ++ ++OB_DEF (s390_vec_cmprg_or_0_idx, s390_vstrczb_idx_u8,s390_vstrczf_idx_u32,B_VX, BT_FN_OV4SI_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vstrczb_idx_u8, s390_vstrczb, 0, BT_OV_UV16QI_UV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vstrczh_idx_u16, s390_vstrczh, 0, BT_OV_UV8HI_UV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vstrczf_idx_u32, s390_vstrczf, 0, BT_OV_UV4SI_UV4SI_UV4SI_UV4SI) ++ ++OB_DEF (s390_vec_cmpnrg_or_0_idx, s390_vstrczb_inv_idx_u8,s390_vstrczf_inv_idx_u32,B_VX, BT_FN_OV4SI_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vstrczb_inv_idx_u8, s390_vstrczb, 0, BT_OV_UV16QI_UV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vstrczh_inv_idx_u16, s390_vstrczh, 0, BT_OV_UV8HI_UV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vstrczf_inv_idx_u32, s390_vstrczf, 0, BT_OV_UV4SI_UV4SI_UV4SI_UV4SI) ++ ++OB_DEF (s390_vec_cmprg, s390_vstrcb_u8, s390_vstrcf_u32, B_VX, BT_FN_OV4SI_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vstrcb_u8, s390_vstrcb, 0, BT_OV_BV16QI_UV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vstrch_u16, s390_vstrch, 0, BT_OV_BV8HI_UV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vstrcf_u32, s390_vstrcf, 0, BT_OV_BV4SI_UV4SI_UV4SI_UV4SI) ++ ++OB_DEF (s390_vec_cmpnrg, s390_vstrcb_inv_u8, s390_vstrcf_inv_u32,B_VX, BT_FN_OV4SI_OV4SI_OV4SI_OV4SI) ++OB_DEF_VAR (s390_vstrcb_inv_u8, s390_vstrcb, 0, BT_OV_BV16QI_UV16QI_UV16QI_UV16QI) ++OB_DEF_VAR (s390_vstrch_inv_u16, s390_vstrch, 0, BT_OV_BV8HI_UV8HI_UV8HI_UV8HI) ++OB_DEF_VAR (s390_vstrcf_inv_u32, s390_vstrcf, 0, BT_OV_BV4SI_UV4SI_UV4SI_UV4SI) ++ ++OB_DEF (s390_vec_cmprg_idx_cc, s390_vstrcbs_idx_u8,s390_vstrcfs_idx_u32,B_VX, BT_FN_OV4SI_OV4SI_OV4SI_OV4SI_INTPTR) ++OB_DEF_VAR (s390_vstrcbs_idx_u8, s390_vstrcbs, 0, BT_OV_UV16QI_UV16QI_UV16QI_UV16QI_INTPTR) ++OB_DEF_VAR (s390_vstrchs_idx_u16, s390_vstrchs, 0, BT_OV_UV8HI_UV8HI_UV8HI_UV8HI_INTPTR) ++OB_DEF_VAR (s390_vstrcfs_idx_u32, s390_vstrcfs, 0, BT_OV_UV4SI_UV4SI_UV4SI_UV4SI_INTPTR) ++ ++OB_DEF (s390_vec_cmpnrg_idx_cc, s390_vstrcbs_inv_idx_u8,s390_vstrcfs_inv_idx_u32,B_VX, BT_FN_OV4SI_OV4SI_OV4SI_OV4SI_INTPTR) ++OB_DEF_VAR (s390_vstrcbs_inv_idx_u8, s390_vstrcbs, 0, BT_OV_UV16QI_UV16QI_UV16QI_UV16QI_INTPTR) /* vstrcb */ ++OB_DEF_VAR (s390_vstrchs_inv_idx_u16, s390_vstrchs, 0, BT_OV_UV8HI_UV8HI_UV8HI_UV8HI_INTPTR) /* vstrch */ ++OB_DEF_VAR (s390_vstrcfs_inv_idx_u32, s390_vstrcfs, 0, BT_OV_UV4SI_UV4SI_UV4SI_UV4SI_INTPTR) /* vstrcf */ ++ ++OB_DEF (s390_vec_cmprg_or_0_idx_cc, s390_vstrczbs_idx_u8,s390_vstrczfs_idx_u32,B_VX, BT_FN_OV4SI_OV4SI_OV4SI_OV4SI_INTPTR) ++OB_DEF_VAR (s390_vstrczbs_idx_u8, s390_vstrczbs, 0, BT_OV_UV16QI_UV16QI_UV16QI_UV16QI_INTPTR) ++OB_DEF_VAR (s390_vstrczhs_idx_u16, s390_vstrczhs, 0, BT_OV_UV8HI_UV8HI_UV8HI_UV8HI_INTPTR) ++OB_DEF_VAR (s390_vstrczfs_idx_u32, s390_vstrczfs, 0, BT_OV_UV4SI_UV4SI_UV4SI_UV4SI_INTPTR) ++ ++OB_DEF (s390_vec_cmpnrg_or_0_idx_cc,s390_vstrczbs_inv_idx_u8,s390_vstrczfs_inv_idx_u32,B_VX, BT_FN_OV4SI_OV4SI_OV4SI_OV4SI_INTPTR) ++OB_DEF_VAR (s390_vstrczbs_inv_idx_u8, s390_vstrczbs, 0, BT_OV_UV16QI_UV16QI_UV16QI_UV16QI_INTPTR) ++OB_DEF_VAR (s390_vstrczhs_inv_idx_u16, s390_vstrczhs, 0, BT_OV_UV8HI_UV8HI_UV8HI_UV8HI_INTPTR) ++OB_DEF_VAR (s390_vstrczfs_inv_idx_u32, s390_vstrczfs, 0, BT_OV_UV4SI_UV4SI_UV4SI_UV4SI_INTPTR) ++ ++OB_DEF (s390_vec_cmprg_cc, s390_vstrcbs_u8, s390_vstrcfs_u32, B_VX, BT_FN_OV4SI_OV4SI_OV4SI_OV4SI_INTPTR) ++OB_DEF_VAR (s390_vstrcbs_u8, s390_vstrcbs, 0, BT_OV_BV16QI_UV16QI_UV16QI_UV16QI_INTPTR) ++OB_DEF_VAR (s390_vstrchs_u16, s390_vstrchs, 0, BT_OV_BV8HI_UV8HI_UV8HI_UV8HI_INTPTR) ++OB_DEF_VAR (s390_vstrcfs_u32, s390_vstrcfs, 0, BT_OV_BV4SI_UV4SI_UV4SI_UV4SI_INTPTR) ++ ++OB_DEF (s390_vec_cmpnrg_cc, s390_vstrcbs_inv_u8,s390_vstrcfs_inv_u32,B_VX, BT_FN_OV4SI_OV4SI_OV4SI_OV4SI_INTPTR) ++OB_DEF_VAR (s390_vstrcbs_inv_u8, s390_vstrcbs, 0, BT_OV_BV16QI_UV16QI_UV16QI_UV16QI_INTPTR) ++OB_DEF_VAR (s390_vstrchs_inv_u16, s390_vstrchs, 0, BT_OV_BV8HI_UV8HI_UV8HI_UV8HI_INTPTR) ++OB_DEF_VAR (s390_vstrcfs_inv_u32, s390_vstrcfs, 0, BT_OV_BV4SI_UV4SI_UV4SI_UV4SI_INTPTR) ++ ++B_DEF (s390_vec_all_nge, vec_all_unltv2df, 0, B_VX, 0, BT_FN_INT_V2DF_V2DF) ++B_DEF (s390_vec_all_ngt, vec_all_unlev2df, 0, B_VX, 0, BT_FN_INT_V2DF_V2DF) ++B_DEF (s390_vec_any_nge, vec_any_unltv2df, 0, B_VX, 0, BT_FN_INT_V2DF_V2DF) ++B_DEF (s390_vec_any_ngt, vec_any_unlev2df, 0, B_VX, 0, BT_FN_INT_V2DF_V2DF) ++ ++OB_DEF (s390_vec_ctd, s390_vec_ctd_s64, s390_vec_ctd_u64, B_VX, BT_FN_V2DF_UV4SI_INT) ++OB_DEF_VAR (s390_vec_ctd_s64, s390_vec_ctd_s64, O2_U5, BT_OV_V2DF_V2DI_INT) /* vcdgb */ ++OB_DEF_VAR (s390_vec_ctd_u64, s390_vec_ctd_u64, O2_U5, BT_OV_V2DF_UV2DI_INT) /* vcdlgb */ ++ ++B_DEF (s390_vec_ctd_s64, vec_ctd_s64, 0, B_VX, O2_U3, BT_FN_V2DF_V2DI_INT) /* vcdgb */ ++B_DEF (s390_vec_ctd_u64, vec_ctd_u64, 0, B_VX, O2_U3, BT_FN_V2DF_UV2DI_INT) /* vcdlgb */ ++B_DEF (s390_vcdgb, vec_di_to_df_s64, 0, B_VX, O2_U3, BT_FN_V2DF_V2DI_INT) /* vcdgb */ ++B_DEF (s390_vcdlgb, vec_di_to_df_u64, 0, B_VX, O2_U3, BT_FN_V2DF_UV2DI_INT) /* vcdlgb */ ++B_DEF (s390_vec_ctsl, vec_ctsl, 0, B_VX, O2_U3, BT_FN_V2DI_V2DF_INT) /* vcgdb */ ++B_DEF (s390_vec_ctul, vec_ctul, 0, B_VX, O2_U3, BT_FN_UV2DI_V2DF_INT) /* vclgdb */ ++B_DEF (s390_vcgdb, vec_df_to_di_s64, 0, B_VX, O2_U3, BT_FN_V2DI_V2DF_INT) /* vcgdb */ ++B_DEF (s390_vclgdb, vec_df_to_di_u64, 0, B_VX, O2_U3, BT_FN_UV2DI_V2DF_INT) /* vclgdb */ ++B_DEF (s390_vfidb, vfidb, 0, B_VX, O2_U4 | O3_U3, BT_FN_V2DF_V2DF_UCHAR_UCHAR) ++B_DEF (s390_vec_ld2f, vec_ld2f, 0, B_VX, 0, BT_FN_V2DF_FLTCONSTPTR) /* vldeb */ ++B_DEF (s390_vec_st2f, vec_st2f, 0, B_VX, 0, BT_FN_VOID_V2DF_FLTPTR) /* vledb */ ++B_DEF (s390_vfmadb, fmav2df4, 0, B_VX, 0, BT_FN_V2DF_V2DF_V2DF_V2DF) ++B_DEF (s390_vfmsdb, fmsv2df4, 0, B_VX, 0, BT_FN_V2DF_V2DF_V2DF_V2DF) ++B_DEF (s390_vflndb, vec_nabs, 0, B_VX, 0, BT_FN_V2DF_V2DF) ++B_DEF (s390_vfsqdb, sqrtv2df2, 0, B_VX, 0, BT_FN_V2DF_V2DF) ++B_DEF (s390_vftcidb, vftcidb, 0, B_VX, O2_U12, BT_FN_V2DI_V2DF_INT_INTPTR) +--- gcc/config/s390/s390-builtins.h 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/config/s390/s390-builtins.h 2016-05-11 17:33:27.000000000 +0200 +@@ -0,0 +1,175 @@ ++/* Common data structures used for builtin handling on S/390. ++ Copyright (C) 2015 Free Software Foundation, Inc. ++ ++ Contributed by Andreas Krebbel (Andreas.Krebbel@de.ibm.com). ++ ++ This file is part of GCC. ++ ++ GCC is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ GCC is distributed in the hope that it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++ License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with GCC; see the file COPYING3. If not see ++ . */ ++ ++/* This files contains data structure definitions which can be used by ++ s390-builtins.c as well as s390-c.c. Since the latter is ++ considered to be part of the front-end we have to be careful not ++ to use any of tree and rtx like data structures. */ ++ ++/* Builtin types, data and prototypes. */ ++ ++enum s390_builtin_type_index ++{ ++#undef DEF_TYPE ++#undef DEF_POINTER_TYPE ++#undef DEF_DISTINCT_TYPE ++#undef DEF_VECTOR_TYPE ++#undef DEF_OPAQUE_VECTOR_TYPE ++#undef DEF_FN_TYPE ++#undef DEF_OV_TYPE ++#define DEF_TYPE(INDEX, ...) INDEX, ++#define DEF_POINTER_TYPE(INDEX, ...) INDEX, ++#define DEF_DISTINCT_TYPE(INDEX, ...) INDEX, ++#define DEF_VECTOR_TYPE(INDEX, ...) INDEX, ++#define DEF_OPAQUE_VECTOR_TYPE(INDEX, ...) INDEX, ++#define DEF_FN_TYPE(...) ++#define DEF_OV_TYPE(...) ++#include "s390-builtin-types.def" ++ BT_MAX ++}; ++ ++enum s390_builtin_fn_type_index ++{ ++#undef DEF_TYPE ++#undef DEF_POINTER_TYPE ++#undef DEF_DISTINCT_TYPE ++#undef DEF_VECTOR_TYPE ++#undef DEF_OPAQUE_VECTOR_TYPE ++#undef DEF_FN_TYPE ++#undef DEF_OV_TYPE ++#define DEF_TYPE(...) ++#define DEF_POINTER_TYPE(...) ++#define DEF_DISTINCT_TYPE(...) ++#define DEF_VECTOR_TYPE(...) ++#define DEF_OPAQUE_VECTOR_TYPE(...) ++#define DEF_FN_TYPE(INDEX, ...) INDEX, ++#define DEF_OV_TYPE(...) ++#include "s390-builtin-types.def" ++ BT_FN_MAX ++}; ++ ++enum s390_builtin_ov_type_index ++{ ++#undef DEF_TYPE ++#undef DEF_POINTER_TYPE ++#undef DEF_DISTINCT_TYPE ++#undef DEF_VECTOR_TYPE ++#undef DEF_OPAQUE_VECTOR_TYPE ++#undef DEF_FN_TYPE ++#undef DEF_OV_TYPE ++#define DEF_TYPE(...) ++#define DEF_POINTER_TYPE(...) ++#define DEF_DISTINCT_TYPE(...) ++#define DEF_VECTOR_TYPE(...) ++#define DEF_OPAQUE_VECTOR_TYPE(...) ++#define DEF_FN_TYPE(...) ++#define DEF_OV_TYPE(INDEX, ...) INDEX, ++#include "s390-builtin-types.def" ++ BT_OV_MAX ++}; ++ ++#define MAX_OV_OPERANDS 6 ++ ++extern tree s390_builtin_types[BT_MAX]; ++extern tree s390_builtin_fn_types[BT_FN_MAX]; ++ ++ /* Builtins. */ ++ ++enum s390_builtins { ++#undef B_DEF ++#undef OB_DEF ++#undef OB_DEF_VAR ++#define B_DEF(NAME, ...) S390_BUILTIN_##NAME, ++#define OB_DEF(...) ++#define OB_DEF_VAR(...) ++ ++#include "s390-builtins.def" ++ S390_BUILTIN_MAX ++}; ++ ++ ++/* Generate an enumeration of all overloaded builtins defined with ++ OB_DEF in s390-builtins.def. */ ++enum s390_overloaded_builtins { ++#undef B_DEF ++#undef OB_DEF ++#undef OB_DEF_VAR ++#define B_DEF(...) ++#define OB_DEF(NAME, ...) S390_OVERLOADED_BUILTIN_##NAME, ++#define OB_DEF_VAR(...) ++#include "s390-builtins.def" ++S390_OVERLOADED_BUILTIN_MAX ++}; ++ ++/* Generate an enumeration of all variants of overloaded builtins ++ defined with OB_DEF_VAR in s390-builtins.def. */ ++enum s390_overloaded_builtin_vars { ++#undef B_DEF ++#undef OB_DEF ++#undef OB_DEF_VAR ++#define B_DEF(...) ++#define OB_DEF(...) ++#define OB_DEF_VAR(NAME, ...) S390_OVERLOADED_BUILTIN_VAR_##NAME, ++#include "s390-builtins.def" ++S390_OVERLOADED_BUILTIN_VAR_MAX ++}; ++ ++#define S390_OVERLOADED_BUILTIN_OFFSET S390_BUILTIN_MAX ++#define S390_OVERLOADED_BUILTIN_VAR_OFFSET \ ++ (S390_BUILTIN_MAX + S390_OVERLOADED_BUILTIN_MAX) ++#define S390_ALL_BUILTIN_MAX \ ++ (S390_BUILTIN_MAX + S390_OVERLOADED_BUILTIN_MAX + \ ++ S390_OVERLOADED_BUILTIN_VAR_MAX) ++ ++extern const unsigned int bflags_builtin[S390_BUILTIN_MAX + 1]; ++extern const unsigned int opflags_builtin[S390_BUILTIN_MAX + 1]; ++ ++extern const unsigned int ++ bflags_overloaded_builtin[S390_OVERLOADED_BUILTIN_MAX + 1]; ++extern const unsigned int ++ opflags_overloaded_builtin_var[S390_OVERLOADED_BUILTIN_VAR_MAX + 1]; ++ ++static inline unsigned int ++bflags_for_builtin (int fcode) ++{ ++ if (fcode >= S390_OVERLOADED_BUILTIN_VAR_OFFSET) ++ gcc_unreachable (); ++ else if (fcode >= S390_OVERLOADED_BUILTIN_OFFSET) ++ return bflags_overloaded_builtin[fcode - S390_BUILTIN_MAX]; ++ else ++ return bflags_builtin[fcode]; ++} ++ ++static inline unsigned int ++opflags_for_builtin (int fcode) ++{ ++ if (fcode >= S390_OVERLOADED_BUILTIN_VAR_OFFSET) ++ return opflags_overloaded_builtin_var[fcode - ++ S390_OVERLOADED_BUILTIN_VAR_OFFSET]; ++ else if (fcode >= S390_OVERLOADED_BUILTIN_OFFSET) ++ gcc_unreachable (); ++ else ++ return opflags_builtin[fcode]; ++} ++ ++extern tree s390_builtin_decls[S390_BUILTIN_MAX + ++ S390_OVERLOADED_BUILTIN_MAX + ++ S390_OVERLOADED_BUILTIN_VAR_MAX]; +--- gcc/config/s390/s390-builtin-types.def 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/config/s390/s390-builtin-types.def 2016-05-11 17:53:39.000000000 +0200 +@@ -0,0 +1,755 @@ ++/* Builtin type definitions for IBM S/390 and zSeries ++ Copyright (C) 2015 Free Software Foundation, Inc. ++ ++ Contributed by Andreas Krebbel (Andreas.Krebbel@de.ibm.com). ++ ++ This file is part of GCC. ++ ++ GCC is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ GCC is distributed in the hope that it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++ License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with GCC; see the file COPYING3. If not see ++ . */ ++ ++#define DEF_FN_TYPE_1(FN_TYPE, FLAGS, T1) \ ++ DEF_FN_TYPE (FN_TYPE, \ ++ FLAGS, \ ++ s390_builtin_types[T1]) ++#define DEF_FN_TYPE_2(FN_TYPE, FLAGS, T1, T2) \ ++ DEF_FN_TYPE (FN_TYPE, \ ++ FLAGS, \ ++ s390_builtin_types[T1], \ ++ s390_builtin_types[T2]) ++#define DEF_FN_TYPE_3(FN_TYPE, FLAGS, T1, T2, T3) \ ++ DEF_FN_TYPE (FN_TYPE, \ ++ FLAGS, \ ++ s390_builtin_types[T1], \ ++ s390_builtin_types[T2], \ ++ s390_builtin_types[T3]) ++#define DEF_FN_TYPE_4(FN_TYPE, FLAGS, T1, T2, T3, T4) \ ++ DEF_FN_TYPE (FN_TYPE, \ ++ FLAGS, \ ++ s390_builtin_types[T1], \ ++ s390_builtin_types[T2], \ ++ s390_builtin_types[T3], \ ++ s390_builtin_types[T4]) ++#define DEF_FN_TYPE_5(FN_TYPE, FLAGS, T1, T2, T3, T4, T5) \ ++ DEF_FN_TYPE (FN_TYPE, \ ++ FLAGS, \ ++ s390_builtin_types[T1], \ ++ s390_builtin_types[T2], \ ++ s390_builtin_types[T3], \ ++ s390_builtin_types[T4], \ ++ s390_builtin_types[T5]) ++#define DEF_FN_TYPE_6(FN_TYPE, FLAGS, T1, T2, T3, T4, T5, T6) \ ++ DEF_FN_TYPE (FN_TYPE, \ ++ FLAGS, \ ++ s390_builtin_types[T1], \ ++ s390_builtin_types[T2], \ ++ s390_builtin_types[T3], \ ++ s390_builtin_types[T4], \ ++ s390_builtin_types[T5], \ ++ s390_builtin_types[T6]) ++DEF_TYPE (BT_INT, B_HTM | B_VX, integer_type_node, 0) ++DEF_TYPE (BT_VOID, 0, void_type_node, 0) ++DEF_TYPE (BT_FLTCONST, B_VX, float_type_node, 1) ++DEF_TYPE (BT_UINT64, B_HTM, c_uint64_type_node, 0) ++DEF_TYPE (BT_FLT, B_VX, float_type_node, 0) ++DEF_TYPE (BT_UINT, 0, unsigned_type_node, 0) ++DEF_TYPE (BT_VOIDCONST, B_VX, void_type_node, 1) ++DEF_TYPE (BT_ULONG, B_VX, long_unsigned_type_node, 0) ++DEF_TYPE (BT_USHORTCONST, B_VX, short_unsigned_type_node, 1) ++DEF_TYPE (BT_SHORTCONST, B_VX, short_integer_type_node, 1) ++DEF_TYPE (BT_INTCONST, B_VX, integer_type_node, 1) ++DEF_TYPE (BT_UCHARCONST, B_VX, unsigned_char_type_node, 1) ++DEF_TYPE (BT_UCHAR, B_VX, unsigned_char_type_node, 0) ++DEF_TYPE (BT_SCHARCONST, B_VX, signed_char_type_node, 1) ++DEF_TYPE (BT_SHORT, B_VX, short_integer_type_node, 0) ++DEF_TYPE (BT_LONG, B_VX, long_integer_type_node, 0) ++DEF_TYPE (BT_SCHAR, B_VX, signed_char_type_node, 0) ++DEF_TYPE (BT_ULONGLONGCONST, B_VX, long_long_unsigned_type_node, 1) ++DEF_TYPE (BT_USHORT, B_VX, short_unsigned_type_node, 0) ++DEF_TYPE (BT_LONGLONG, B_VX, long_long_integer_type_node, 0) ++DEF_TYPE (BT_DBLCONST, B_VX, double_type_node, 1) ++DEF_TYPE (BT_ULONGLONG, B_VX, long_long_unsigned_type_node, 0) ++DEF_TYPE (BT_DBL, B_VX, double_type_node, 0) ++DEF_TYPE (BT_LONGLONGCONST, B_VX, long_long_integer_type_node, 1) ++DEF_TYPE (BT_UINTCONST, B_VX, unsigned_type_node, 1) ++DEF_VECTOR_TYPE (BT_UV2DI, B_VX, BT_ULONGLONG, 2) ++DEF_VECTOR_TYPE (BT_V4SI, B_VX, BT_INT, 4) ++DEF_VECTOR_TYPE (BT_V8HI, B_VX, BT_SHORT, 8) ++DEF_VECTOR_TYPE (BT_UV4SI, B_VX, BT_UINT, 4) ++DEF_VECTOR_TYPE (BT_V16QI, B_VX, BT_SCHAR, 16) ++DEF_VECTOR_TYPE (BT_V2DF, B_VX, BT_DBL, 2) ++DEF_VECTOR_TYPE (BT_V2DI, B_VX, BT_LONGLONG, 2) ++DEF_VECTOR_TYPE (BT_UV8HI, B_VX, BT_USHORT, 8) ++DEF_VECTOR_TYPE (BT_UV16QI, B_VX, BT_UCHAR, 16) ++DEF_POINTER_TYPE (BT_UCHARPTR, B_VX, BT_UCHAR) ++DEF_POINTER_TYPE (BT_DBLCONSTPTR, B_VX, BT_DBLCONST) ++DEF_POINTER_TYPE (BT_VOIDPTR, B_HTM | B_VX, BT_VOID) ++DEF_POINTER_TYPE (BT_FLTPTR, B_VX, BT_FLT) ++DEF_POINTER_TYPE (BT_UINT64PTR, B_HTM, BT_UINT64) ++DEF_POINTER_TYPE (BT_SCHARPTR, B_VX, BT_SCHAR) ++DEF_POINTER_TYPE (BT_UINTCONSTPTR, B_VX, BT_UINTCONST) ++DEF_POINTER_TYPE (BT_ULONGLONGCONSTPTR, B_VX, BT_ULONGLONGCONST) ++DEF_POINTER_TYPE (BT_LONGLONGCONSTPTR, B_VX, BT_LONGLONGCONST) ++DEF_POINTER_TYPE (BT_SHORTPTR, B_VX, BT_SHORT) ++DEF_POINTER_TYPE (BT_USHORTPTR, B_VX, BT_USHORT) ++DEF_POINTER_TYPE (BT_INTPTR, B_VX, BT_INT) ++DEF_POINTER_TYPE (BT_INTCONSTPTR, B_VX, BT_INTCONST) ++DEF_POINTER_TYPE (BT_LONGLONGPTR, B_VX, BT_LONGLONG) ++DEF_POINTER_TYPE (BT_ULONGLONGPTR, B_VX, BT_ULONGLONG) ++DEF_POINTER_TYPE (BT_DBLPTR, B_VX, BT_DBL) ++DEF_POINTER_TYPE (BT_VOIDCONSTPTR, B_VX, BT_VOIDCONST) ++DEF_POINTER_TYPE (BT_USHORTCONSTPTR, B_VX, BT_USHORTCONST) ++DEF_POINTER_TYPE (BT_SHORTCONSTPTR, B_VX, BT_SHORTCONST) ++DEF_POINTER_TYPE (BT_UCHARCONSTPTR, B_VX, BT_UCHARCONST) ++DEF_POINTER_TYPE (BT_FLTCONSTPTR, B_VX, BT_FLTCONST) ++DEF_POINTER_TYPE (BT_SCHARCONSTPTR, B_VX, BT_SCHARCONST) ++DEF_POINTER_TYPE (BT_UINTPTR, B_VX, BT_UINT) ++DEF_DISTINCT_TYPE (BT_BLONGLONG, B_VX, BT_ULONGLONG) ++DEF_DISTINCT_TYPE (BT_BINT, B_VX, BT_UINT) ++DEF_DISTINCT_TYPE (BT_BSHORT, B_VX, BT_USHORT) ++DEF_DISTINCT_TYPE (BT_BCHAR, B_VX, BT_UCHAR) ++DEF_OPAQUE_VECTOR_TYPE (BT_OV2DI, B_VX, BT_LONGLONG, 2) ++DEF_OPAQUE_VECTOR_TYPE (BT_BV16QI, B_VX, BT_BCHAR, 16) ++DEF_OPAQUE_VECTOR_TYPE (BT_OV4SI, B_VX, BT_INT, 4) ++DEF_OPAQUE_VECTOR_TYPE (BT_OUV4SI, B_VX, BT_UINT, 4) ++DEF_OPAQUE_VECTOR_TYPE (BT_BV4SI, B_VX, BT_BINT, 4) ++DEF_OPAQUE_VECTOR_TYPE (BT_BV2DI, B_VX, BT_BLONGLONG, 2) ++DEF_OPAQUE_VECTOR_TYPE (BT_BV8HI, B_VX, BT_BSHORT, 8) ++DEF_FN_TYPE_1 (BT_FN_INT, B_HTM, BT_INT) ++DEF_FN_TYPE_1 (BT_FN_UINT, 0, BT_UINT) ++DEF_FN_TYPE_2 (BT_FN_INT_INT, B_VX, BT_INT, BT_INT) ++DEF_FN_TYPE_2 (BT_FN_INT_VOIDPTR, B_HTM, BT_INT, BT_VOIDPTR) ++DEF_FN_TYPE_2 (BT_FN_OV4SI_INT, B_VX, BT_OV4SI, BT_INT) ++DEF_FN_TYPE_2 (BT_FN_OV4SI_INTCONSTPTR, B_VX, BT_OV4SI, BT_INTCONSTPTR) ++DEF_FN_TYPE_2 (BT_FN_OV4SI_OV4SI, B_VX, BT_OV4SI, BT_OV4SI) ++DEF_FN_TYPE_2 (BT_FN_UV16QI_UCHAR, B_VX, BT_UV16QI, BT_UCHAR) ++DEF_FN_TYPE_2 (BT_FN_UV16QI_UCHARCONSTPTR, B_VX, BT_UV16QI, BT_UCHARCONSTPTR) ++DEF_FN_TYPE_2 (BT_FN_UV16QI_USHORT, B_VX, BT_UV16QI, BT_USHORT) ++DEF_FN_TYPE_2 (BT_FN_UV16QI_UV16QI, B_VX, BT_UV16QI, BT_UV16QI) ++DEF_FN_TYPE_2 (BT_FN_UV2DI_ULONGLONG, B_VX, BT_UV2DI, BT_ULONGLONG) ++DEF_FN_TYPE_2 (BT_FN_UV2DI_ULONGLONGCONSTPTR, B_VX, BT_UV2DI, BT_ULONGLONGCONSTPTR) ++DEF_FN_TYPE_2 (BT_FN_UV2DI_USHORT, B_VX, BT_UV2DI, BT_USHORT) ++DEF_FN_TYPE_2 (BT_FN_UV2DI_UV2DI, B_VX, BT_UV2DI, BT_UV2DI) ++DEF_FN_TYPE_2 (BT_FN_UV2DI_UV4SI, B_VX, BT_UV2DI, BT_UV4SI) ++DEF_FN_TYPE_2 (BT_FN_UV4SI_UINT, B_VX, BT_UV4SI, BT_UINT) ++DEF_FN_TYPE_2 (BT_FN_UV4SI_UINTCONSTPTR, B_VX, BT_UV4SI, BT_UINTCONSTPTR) ++DEF_FN_TYPE_2 (BT_FN_UV4SI_USHORT, B_VX, BT_UV4SI, BT_USHORT) ++DEF_FN_TYPE_2 (BT_FN_UV4SI_UV4SI, B_VX, BT_UV4SI, BT_UV4SI) ++DEF_FN_TYPE_2 (BT_FN_UV4SI_UV8HI, B_VX, BT_UV4SI, BT_UV8HI) ++DEF_FN_TYPE_2 (BT_FN_UV8HI_USHORT, B_VX, BT_UV8HI, BT_USHORT) ++DEF_FN_TYPE_2 (BT_FN_UV8HI_USHORTCONSTPTR, B_VX, BT_UV8HI, BT_USHORTCONSTPTR) ++DEF_FN_TYPE_2 (BT_FN_UV8HI_UV16QI, B_VX, BT_UV8HI, BT_UV16QI) ++DEF_FN_TYPE_2 (BT_FN_UV8HI_UV8HI, B_VX, BT_UV8HI, BT_UV8HI) ++DEF_FN_TYPE_2 (BT_FN_V16QI_SCHAR, B_VX, BT_V16QI, BT_SCHAR) ++DEF_FN_TYPE_2 (BT_FN_V16QI_UCHAR, B_VX, BT_V16QI, BT_UCHAR) ++DEF_FN_TYPE_2 (BT_FN_V16QI_V16QI, B_VX, BT_V16QI, BT_V16QI) ++DEF_FN_TYPE_2 (BT_FN_V2DF_DBL, B_VX, BT_V2DF, BT_DBL) ++DEF_FN_TYPE_2 (BT_FN_V2DF_FLTCONSTPTR, B_VX, BT_V2DF, BT_FLTCONSTPTR) ++DEF_FN_TYPE_2 (BT_FN_V2DF_V2DF, B_VX, BT_V2DF, BT_V2DF) ++DEF_FN_TYPE_2 (BT_FN_V2DI_SHORT, B_VX, BT_V2DI, BT_SHORT) ++DEF_FN_TYPE_2 (BT_FN_V2DI_V16QI, B_VX, BT_V2DI, BT_V16QI) ++DEF_FN_TYPE_2 (BT_FN_V2DI_V2DI, B_VX, BT_V2DI, BT_V2DI) ++DEF_FN_TYPE_2 (BT_FN_V2DI_V4SI, B_VX, BT_V2DI, BT_V4SI) ++DEF_FN_TYPE_2 (BT_FN_V2DI_V8HI, B_VX, BT_V2DI, BT_V8HI) ++DEF_FN_TYPE_2 (BT_FN_V4SI_SHORT, B_VX, BT_V4SI, BT_SHORT) ++DEF_FN_TYPE_2 (BT_FN_V4SI_V4SI, B_VX, BT_V4SI, BT_V4SI) ++DEF_FN_TYPE_2 (BT_FN_V4SI_V8HI, B_VX, BT_V4SI, BT_V8HI) ++DEF_FN_TYPE_2 (BT_FN_V8HI_SHORT, B_VX, BT_V8HI, BT_SHORT) ++DEF_FN_TYPE_2 (BT_FN_V8HI_V16QI, B_VX, BT_V8HI, BT_V16QI) ++DEF_FN_TYPE_2 (BT_FN_V8HI_V8HI, B_VX, BT_V8HI, BT_V8HI) ++DEF_FN_TYPE_2 (BT_FN_VOID_INT, B_HTM, BT_VOID, BT_INT) ++DEF_FN_TYPE_2 (BT_FN_VOID_UINT, 0, BT_VOID, BT_UINT) ++DEF_FN_TYPE_3 (BT_FN_DBL_V2DF_INT, B_VX, BT_DBL, BT_V2DF, BT_INT) ++DEF_FN_TYPE_3 (BT_FN_INT_OV4SI_INT, B_VX, BT_INT, BT_OV4SI, BT_INT) ++DEF_FN_TYPE_3 (BT_FN_INT_OV4SI_OV4SI, B_VX, BT_INT, BT_OV4SI, BT_OV4SI) ++DEF_FN_TYPE_3 (BT_FN_INT_UV16QI_UV16QI, B_VX, BT_INT, BT_UV16QI, BT_UV16QI) ++DEF_FN_TYPE_3 (BT_FN_INT_UV2DI_UV2DI, B_VX, BT_INT, BT_UV2DI, BT_UV2DI) ++DEF_FN_TYPE_3 (BT_FN_INT_UV4SI_UV4SI, B_VX, BT_INT, BT_UV4SI, BT_UV4SI) ++DEF_FN_TYPE_3 (BT_FN_INT_UV8HI_UV8HI, B_VX, BT_INT, BT_UV8HI, BT_UV8HI) ++DEF_FN_TYPE_3 (BT_FN_INT_V16QI_V16QI, B_VX, BT_INT, BT_V16QI, BT_V16QI) ++DEF_FN_TYPE_3 (BT_FN_INT_V2DF_V2DF, B_VX, BT_INT, BT_V2DF, BT_V2DF) ++DEF_FN_TYPE_3 (BT_FN_INT_V2DI_V2DI, B_VX, BT_INT, BT_V2DI, BT_V2DI) ++DEF_FN_TYPE_3 (BT_FN_INT_V4SI_V4SI, B_VX, BT_INT, BT_V4SI, BT_V4SI) ++DEF_FN_TYPE_3 (BT_FN_INT_V8HI_V8HI, B_VX, BT_INT, BT_V8HI, BT_V8HI) ++DEF_FN_TYPE_3 (BT_FN_INT_VOIDPTR_INT, B_HTM, BT_INT, BT_VOIDPTR, BT_INT) ++DEF_FN_TYPE_3 (BT_FN_OV2DI_LONGLONG_LONGLONG, B_VX, BT_OV2DI, BT_LONGLONG, BT_LONGLONG) ++DEF_FN_TYPE_3 (BT_FN_OV4SI_INTCONSTPTR_INT, B_VX, BT_OV4SI, BT_INTCONSTPTR, BT_INT) ++DEF_FN_TYPE_3 (BT_FN_OV4SI_INTCONSTPTR_UINT, B_VX, BT_OV4SI, BT_INTCONSTPTR, BT_UINT) ++DEF_FN_TYPE_3 (BT_FN_OV4SI_INT_INT, B_VX, BT_OV4SI, BT_INT, BT_INT) ++DEF_FN_TYPE_3 (BT_FN_OV4SI_OV4SI_INTPTR, B_VX, BT_OV4SI, BT_OV4SI, BT_INTPTR) ++DEF_FN_TYPE_3 (BT_FN_OV4SI_OV4SI_OV4SI, B_VX, BT_OV4SI, BT_OV4SI, BT_OV4SI) ++DEF_FN_TYPE_3 (BT_FN_OV4SI_OV4SI_UCHAR, B_VX, BT_OV4SI, BT_OV4SI, BT_UCHAR) ++DEF_FN_TYPE_3 (BT_FN_OV4SI_OV4SI_ULONG, B_VX, BT_OV4SI, BT_OV4SI, BT_ULONG) ++DEF_FN_TYPE_3 (BT_FN_UCHAR_UV16QI_INT, B_VX, BT_UCHAR, BT_UV16QI, BT_INT) ++DEF_FN_TYPE_3 (BT_FN_UINT_UV4SI_INT, B_VX, BT_UINT, BT_UV4SI, BT_INT) ++DEF_FN_TYPE_3 (BT_FN_UINT_VOIDCONSTPTR_INT, B_VX, BT_UINT, BT_VOIDCONSTPTR, BT_INT) ++DEF_FN_TYPE_3 (BT_FN_ULONGLONG_UV2DI_INT, B_VX, BT_ULONGLONG, BT_UV2DI, BT_INT) ++DEF_FN_TYPE_3 (BT_FN_USHORT_UV8HI_INT, B_VX, BT_USHORT, BT_UV8HI, BT_INT) ++DEF_FN_TYPE_3 (BT_FN_UV16QI_UCHARCONSTPTR_USHORT, B_VX, BT_UV16QI, BT_UCHARCONSTPTR, BT_USHORT) ++DEF_FN_TYPE_3 (BT_FN_UV16QI_UCHAR_INT, B_VX, BT_UV16QI, BT_UCHAR, BT_INT) ++DEF_FN_TYPE_3 (BT_FN_UV16QI_UCHAR_UCHAR, B_VX, BT_UV16QI, BT_UCHAR, BT_UCHAR) ++DEF_FN_TYPE_3 (BT_FN_UV16QI_UV16QI_INTPTR, B_VX, BT_UV16QI, BT_UV16QI, BT_INTPTR) ++DEF_FN_TYPE_3 (BT_FN_UV16QI_UV16QI_UCHAR, B_VX, BT_UV16QI, BT_UV16QI, BT_UCHAR) ++DEF_FN_TYPE_3 (BT_FN_UV16QI_UV16QI_UINT, B_VX, BT_UV16QI, BT_UV16QI, BT_UINT) ++DEF_FN_TYPE_3 (BT_FN_UV16QI_UV16QI_UV16QI, B_VX, BT_UV16QI, BT_UV16QI, BT_UV16QI) ++DEF_FN_TYPE_3 (BT_FN_UV16QI_UV2DI_UV2DI, B_VX, BT_UV16QI, BT_UV2DI, BT_UV2DI) ++DEF_FN_TYPE_3 (BT_FN_UV16QI_UV4SI_UV4SI, B_VX, BT_UV16QI, BT_UV4SI, BT_UV4SI) ++DEF_FN_TYPE_3 (BT_FN_UV16QI_UV8HI_UV8HI, B_VX, BT_UV16QI, BT_UV8HI, BT_UV8HI) ++DEF_FN_TYPE_3 (BT_FN_UV2DI_UCHAR_UCHAR, B_VX, BT_UV2DI, BT_UCHAR, BT_UCHAR) ++DEF_FN_TYPE_3 (BT_FN_UV2DI_ULONGLONG_INT, B_VX, BT_UV2DI, BT_ULONGLONG, BT_INT) ++DEF_FN_TYPE_3 (BT_FN_UV2DI_UV2DI_UCHAR, B_VX, BT_UV2DI, BT_UV2DI, BT_UCHAR) ++DEF_FN_TYPE_3 (BT_FN_UV2DI_UV2DI_UINT, B_VX, BT_UV2DI, BT_UV2DI, BT_UINT) ++DEF_FN_TYPE_3 (BT_FN_UV2DI_UV2DI_UV2DI, B_VX, BT_UV2DI, BT_UV2DI, BT_UV2DI) ++DEF_FN_TYPE_3 (BT_FN_UV2DI_UV4SI_UV4SI, B_VX, BT_UV2DI, BT_UV4SI, BT_UV4SI) ++DEF_FN_TYPE_3 (BT_FN_UV2DI_UV8HI_UV8HI, B_VX, BT_UV2DI, BT_UV8HI, BT_UV8HI) ++DEF_FN_TYPE_3 (BT_FN_UV2DI_V2DF_INT, B_VX, BT_UV2DI, BT_V2DF, BT_INT) ++DEF_FN_TYPE_3 (BT_FN_UV4SI_UCHAR_UCHAR, B_VX, BT_UV4SI, BT_UCHAR, BT_UCHAR) ++DEF_FN_TYPE_3 (BT_FN_UV4SI_UINT_INT, B_VX, BT_UV4SI, BT_UINT, BT_INT) ++DEF_FN_TYPE_3 (BT_FN_UV4SI_UV16QI_UV16QI, B_VX, BT_UV4SI, BT_UV16QI, BT_UV16QI) ++DEF_FN_TYPE_3 (BT_FN_UV4SI_UV2DI_UV2DI, B_VX, BT_UV4SI, BT_UV2DI, BT_UV2DI) ++DEF_FN_TYPE_3 (BT_FN_UV4SI_UV4SI_INTPTR, B_VX, BT_UV4SI, BT_UV4SI, BT_INTPTR) ++DEF_FN_TYPE_3 (BT_FN_UV4SI_UV4SI_UCHAR, B_VX, BT_UV4SI, BT_UV4SI, BT_UCHAR) ++DEF_FN_TYPE_3 (BT_FN_UV4SI_UV4SI_UINT, B_VX, BT_UV4SI, BT_UV4SI, BT_UINT) ++DEF_FN_TYPE_3 (BT_FN_UV4SI_UV4SI_UV4SI, B_VX, BT_UV4SI, BT_UV4SI, BT_UV4SI) ++DEF_FN_TYPE_3 (BT_FN_UV4SI_UV8HI_UV8HI, B_VX, BT_UV4SI, BT_UV8HI, BT_UV8HI) ++DEF_FN_TYPE_3 (BT_FN_UV8HI_UCHAR_UCHAR, B_VX, BT_UV8HI, BT_UCHAR, BT_UCHAR) ++DEF_FN_TYPE_3 (BT_FN_UV8HI_USHORT_INT, B_VX, BT_UV8HI, BT_USHORT, BT_INT) ++DEF_FN_TYPE_3 (BT_FN_UV8HI_UV16QI_UV16QI, B_VX, BT_UV8HI, BT_UV16QI, BT_UV16QI) ++DEF_FN_TYPE_3 (BT_FN_UV8HI_UV4SI_UV4SI, B_VX, BT_UV8HI, BT_UV4SI, BT_UV4SI) ++DEF_FN_TYPE_3 (BT_FN_UV8HI_UV8HI_INTPTR, B_VX, BT_UV8HI, BT_UV8HI, BT_INTPTR) ++DEF_FN_TYPE_3 (BT_FN_UV8HI_UV8HI_UCHAR, B_VX, BT_UV8HI, BT_UV8HI, BT_UCHAR) ++DEF_FN_TYPE_3 (BT_FN_UV8HI_UV8HI_UINT, B_VX, BT_UV8HI, BT_UV8HI, BT_UINT) ++DEF_FN_TYPE_3 (BT_FN_UV8HI_UV8HI_UV8HI, B_VX, BT_UV8HI, BT_UV8HI, BT_UV8HI) ++DEF_FN_TYPE_3 (BT_FN_V16QI_BV16QI_V16QI, B_VX, BT_V16QI, BT_BV16QI, BT_V16QI) ++DEF_FN_TYPE_3 (BT_FN_V16QI_UINT_VOIDCONSTPTR, B_VX, BT_V16QI, BT_UINT, BT_VOIDCONSTPTR) ++DEF_FN_TYPE_3 (BT_FN_V16QI_UV16QI_UV16QI, B_VX, BT_V16QI, BT_UV16QI, BT_UV16QI) ++DEF_FN_TYPE_3 (BT_FN_V16QI_V16QI_V16QI, B_VX, BT_V16QI, BT_V16QI, BT_V16QI) ++DEF_FN_TYPE_3 (BT_FN_V16QI_V8HI_V8HI, B_VX, BT_V16QI, BT_V8HI, BT_V8HI) ++DEF_FN_TYPE_3 (BT_FN_V2DF_DBL_INT, B_VX, BT_V2DF, BT_DBL, BT_INT) ++DEF_FN_TYPE_3 (BT_FN_V2DF_UV2DI_INT, B_VX, BT_V2DF, BT_UV2DI, BT_INT) ++DEF_FN_TYPE_3 (BT_FN_V2DF_UV4SI_INT, B_VX, BT_V2DF, BT_UV4SI, BT_INT) ++DEF_FN_TYPE_3 (BT_FN_V2DF_V2DF_V2DF, B_VX, BT_V2DF, BT_V2DF, BT_V2DF) ++DEF_FN_TYPE_3 (BT_FN_V2DF_V2DI_INT, B_VX, BT_V2DF, BT_V2DI, BT_INT) ++DEF_FN_TYPE_3 (BT_FN_V2DI_BV2DI_V2DI, B_VX, BT_V2DI, BT_BV2DI, BT_V2DI) ++DEF_FN_TYPE_3 (BT_FN_V2DI_UV2DI_UV2DI, B_VX, BT_V2DI, BT_UV2DI, BT_UV2DI) ++DEF_FN_TYPE_3 (BT_FN_V2DI_V2DF_INT, B_VX, BT_V2DI, BT_V2DF, BT_INT) ++DEF_FN_TYPE_3 (BT_FN_V2DI_V2DF_V2DF, B_VX, BT_V2DI, BT_V2DF, BT_V2DF) ++DEF_FN_TYPE_3 (BT_FN_V2DI_V2DI_V2DI, B_VX, BT_V2DI, BT_V2DI, BT_V2DI) ++DEF_FN_TYPE_3 (BT_FN_V2DI_V4SI_V4SI, B_VX, BT_V2DI, BT_V4SI, BT_V4SI) ++DEF_FN_TYPE_3 (BT_FN_V4SI_BV4SI_V4SI, B_VX, BT_V4SI, BT_BV4SI, BT_V4SI) ++DEF_FN_TYPE_3 (BT_FN_V4SI_INT_VOIDPTR, B_VX, BT_V4SI, BT_INT, BT_VOIDPTR) ++DEF_FN_TYPE_3 (BT_FN_V4SI_UV4SI_UV4SI, B_VX, BT_V4SI, BT_UV4SI, BT_UV4SI) ++DEF_FN_TYPE_3 (BT_FN_V4SI_V2DI_V2DI, B_VX, BT_V4SI, BT_V2DI, BT_V2DI) ++DEF_FN_TYPE_3 (BT_FN_V4SI_V4SI_V4SI, B_VX, BT_V4SI, BT_V4SI, BT_V4SI) ++DEF_FN_TYPE_3 (BT_FN_V4SI_V8HI_V8HI, B_VX, BT_V4SI, BT_V8HI, BT_V8HI) ++DEF_FN_TYPE_3 (BT_FN_V8HI_BV8HI_V8HI, B_VX, BT_V8HI, BT_BV8HI, BT_V8HI) ++DEF_FN_TYPE_3 (BT_FN_V8HI_UV8HI_UV8HI, B_VX, BT_V8HI, BT_UV8HI, BT_UV8HI) ++DEF_FN_TYPE_3 (BT_FN_V8HI_V16QI_V16QI, B_VX, BT_V8HI, BT_V16QI, BT_V16QI) ++DEF_FN_TYPE_3 (BT_FN_V8HI_V4SI_V4SI, B_VX, BT_V8HI, BT_V4SI, BT_V4SI) ++DEF_FN_TYPE_3 (BT_FN_V8HI_V8HI_V8HI, B_VX, BT_V8HI, BT_V8HI, BT_V8HI) ++DEF_FN_TYPE_3 (BT_FN_VOID_UINT64PTR_UINT64, B_HTM, BT_VOID, BT_UINT64PTR, BT_UINT64) ++DEF_FN_TYPE_3 (BT_FN_VOID_V2DF_FLTPTR, B_VX, BT_VOID, BT_V2DF, BT_FLTPTR) ++DEF_FN_TYPE_4 (BT_FN_INT_OV4SI_OV4SI_INTPTR, B_VX, BT_INT, BT_OV4SI, BT_OV4SI, BT_INTPTR) ++DEF_FN_TYPE_4 (BT_FN_OV4SI_INT_OV4SI_INT, B_VX, BT_OV4SI, BT_INT, BT_OV4SI, BT_INT) ++DEF_FN_TYPE_4 (BT_FN_OV4SI_OV4SI_OV4SI_INT, B_VX, BT_OV4SI, BT_OV4SI, BT_OV4SI, BT_INT) ++DEF_FN_TYPE_4 (BT_FN_OV4SI_OV4SI_OV4SI_INTPTR, B_VX, BT_OV4SI, BT_OV4SI, BT_OV4SI, BT_INTPTR) ++DEF_FN_TYPE_4 (BT_FN_OV4SI_OV4SI_OV4SI_OV4SI, B_VX, BT_OV4SI, BT_OV4SI, BT_OV4SI, BT_OV4SI) ++DEF_FN_TYPE_4 (BT_FN_OV4SI_OV4SI_OV4SI_UCHAR, B_VX, BT_OV4SI, BT_OV4SI, BT_OV4SI, BT_UCHAR) ++DEF_FN_TYPE_4 (BT_FN_OV4SI_OV4SI_OV4SI_ULONGLONG, B_VX, BT_OV4SI, BT_OV4SI, BT_OV4SI, BT_ULONGLONG) ++DEF_FN_TYPE_4 (BT_FN_UV16QI_UV16QI_UCHAR_INT, B_VX, BT_UV16QI, BT_UV16QI, BT_UCHAR, BT_INT) ++DEF_FN_TYPE_4 (BT_FN_UV16QI_UV16QI_UV16QI_INT, B_VX, BT_UV16QI, BT_UV16QI, BT_UV16QI, BT_INT) ++DEF_FN_TYPE_4 (BT_FN_UV16QI_UV16QI_UV16QI_INTPTR, B_VX, BT_UV16QI, BT_UV16QI, BT_UV16QI, BT_INTPTR) ++DEF_FN_TYPE_4 (BT_FN_UV16QI_UV16QI_UV16QI_UV16QI, B_VX, BT_UV16QI, BT_UV16QI, BT_UV16QI, BT_UV16QI) ++DEF_FN_TYPE_4 (BT_FN_UV16QI_UV2DI_UV2DI_UV16QI, B_VX, BT_UV16QI, BT_UV2DI, BT_UV2DI, BT_UV16QI) ++DEF_FN_TYPE_4 (BT_FN_UV16QI_UV8HI_UV8HI_INTPTR, B_VX, BT_UV16QI, BT_UV8HI, BT_UV8HI, BT_INTPTR) ++DEF_FN_TYPE_4 (BT_FN_UV2DI_UV2DI_ULONGLONG_INT, B_VX, BT_UV2DI, BT_UV2DI, BT_ULONGLONG, BT_INT) ++DEF_FN_TYPE_4 (BT_FN_UV2DI_UV2DI_UV2DI_INT, B_VX, BT_UV2DI, BT_UV2DI, BT_UV2DI, BT_INT) ++DEF_FN_TYPE_4 (BT_FN_UV2DI_UV4SI_UV4SI_UV2DI, B_VX, BT_UV2DI, BT_UV4SI, BT_UV4SI, BT_UV2DI) ++DEF_FN_TYPE_4 (BT_FN_UV4SI_UV2DI_UV2DI_INTPTR, B_VX, BT_UV4SI, BT_UV2DI, BT_UV2DI, BT_INTPTR) ++DEF_FN_TYPE_4 (BT_FN_UV4SI_UV4SI_UINT_INT, B_VX, BT_UV4SI, BT_UV4SI, BT_UINT, BT_INT) ++DEF_FN_TYPE_4 (BT_FN_UV4SI_UV4SI_UV4SI_INT, B_VX, BT_UV4SI, BT_UV4SI, BT_UV4SI, BT_INT) ++DEF_FN_TYPE_4 (BT_FN_UV4SI_UV4SI_UV4SI_INTPTR, B_VX, BT_UV4SI, BT_UV4SI, BT_UV4SI, BT_INTPTR) ++DEF_FN_TYPE_4 (BT_FN_UV4SI_UV4SI_UV4SI_UV4SI, B_VX, BT_UV4SI, BT_UV4SI, BT_UV4SI, BT_UV4SI) ++DEF_FN_TYPE_4 (BT_FN_UV4SI_UV8HI_UV8HI_UV4SI, B_VX, BT_UV4SI, BT_UV8HI, BT_UV8HI, BT_UV4SI) ++DEF_FN_TYPE_4 (BT_FN_UV8HI_UV16QI_UV16QI_UV8HI, B_VX, BT_UV8HI, BT_UV16QI, BT_UV16QI, BT_UV8HI) ++DEF_FN_TYPE_4 (BT_FN_UV8HI_UV4SI_UV4SI_INTPTR, B_VX, BT_UV8HI, BT_UV4SI, BT_UV4SI, BT_INTPTR) ++DEF_FN_TYPE_4 (BT_FN_UV8HI_UV8HI_USHORT_INT, B_VX, BT_UV8HI, BT_UV8HI, BT_USHORT, BT_INT) ++DEF_FN_TYPE_4 (BT_FN_UV8HI_UV8HI_UV8HI_INT, B_VX, BT_UV8HI, BT_UV8HI, BT_UV8HI, BT_INT) ++DEF_FN_TYPE_4 (BT_FN_UV8HI_UV8HI_UV8HI_INTPTR, B_VX, BT_UV8HI, BT_UV8HI, BT_UV8HI, BT_INTPTR) ++DEF_FN_TYPE_4 (BT_FN_UV8HI_UV8HI_UV8HI_UV8HI, B_VX, BT_UV8HI, BT_UV8HI, BT_UV8HI, BT_UV8HI) ++DEF_FN_TYPE_4 (BT_FN_V16QI_UV16QI_UV16QI_INTPTR, B_VX, BT_V16QI, BT_UV16QI, BT_UV16QI, BT_INTPTR) ++DEF_FN_TYPE_4 (BT_FN_V16QI_V16QI_V16QI_INTPTR, B_VX, BT_V16QI, BT_V16QI, BT_V16QI, BT_INTPTR) ++DEF_FN_TYPE_4 (BT_FN_V16QI_V16QI_V16QI_V16QI, B_VX, BT_V16QI, BT_V16QI, BT_V16QI, BT_V16QI) ++DEF_FN_TYPE_4 (BT_FN_V16QI_V8HI_V8HI_INTPTR, B_VX, BT_V16QI, BT_V8HI, BT_V8HI, BT_INTPTR) ++DEF_FN_TYPE_4 (BT_FN_V2DF_V2DF_DBL_INT, B_VX, BT_V2DF, BT_V2DF, BT_DBL, BT_INT) ++DEF_FN_TYPE_4 (BT_FN_V2DF_V2DF_UCHAR_UCHAR, B_VX, BT_V2DF, BT_V2DF, BT_UCHAR, BT_UCHAR) ++DEF_FN_TYPE_4 (BT_FN_V2DF_V2DF_V2DF_V2DF, B_VX, BT_V2DF, BT_V2DF, BT_V2DF, BT_V2DF) ++DEF_FN_TYPE_4 (BT_FN_V2DI_UV2DI_UV2DI_INTPTR, B_VX, BT_V2DI, BT_UV2DI, BT_UV2DI, BT_INTPTR) ++DEF_FN_TYPE_4 (BT_FN_V2DI_V2DF_INT_INTPTR, B_VX, BT_V2DI, BT_V2DF, BT_INT, BT_INTPTR) ++DEF_FN_TYPE_4 (BT_FN_V2DI_V2DF_V2DF_INTPTR, B_VX, BT_V2DI, BT_V2DF, BT_V2DF, BT_INTPTR) ++DEF_FN_TYPE_4 (BT_FN_V2DI_V2DI_V2DI_INTPTR, B_VX, BT_V2DI, BT_V2DI, BT_V2DI, BT_INTPTR) ++DEF_FN_TYPE_4 (BT_FN_V2DI_V4SI_V4SI_V2DI, B_VX, BT_V2DI, BT_V4SI, BT_V4SI, BT_V2DI) ++DEF_FN_TYPE_4 (BT_FN_V4SI_UV4SI_UV4SI_INTPTR, B_VX, BT_V4SI, BT_UV4SI, BT_UV4SI, BT_INTPTR) ++DEF_FN_TYPE_4 (BT_FN_V4SI_V2DI_V2DI_INTPTR, B_VX, BT_V4SI, BT_V2DI, BT_V2DI, BT_INTPTR) ++DEF_FN_TYPE_4 (BT_FN_V4SI_V4SI_V4SI_INTPTR, B_VX, BT_V4SI, BT_V4SI, BT_V4SI, BT_INTPTR) ++DEF_FN_TYPE_4 (BT_FN_V4SI_V4SI_V4SI_V4SI, B_VX, BT_V4SI, BT_V4SI, BT_V4SI, BT_V4SI) ++DEF_FN_TYPE_4 (BT_FN_V4SI_V8HI_V8HI_V4SI, B_VX, BT_V4SI, BT_V8HI, BT_V8HI, BT_V4SI) ++DEF_FN_TYPE_4 (BT_FN_V8HI_UV8HI_UV8HI_INTPTR, B_VX, BT_V8HI, BT_UV8HI, BT_UV8HI, BT_INTPTR) ++DEF_FN_TYPE_4 (BT_FN_V8HI_V16QI_V16QI_V8HI, B_VX, BT_V8HI, BT_V16QI, BT_V16QI, BT_V8HI) ++DEF_FN_TYPE_4 (BT_FN_V8HI_V4SI_V4SI_INTPTR, B_VX, BT_V8HI, BT_V4SI, BT_V4SI, BT_INTPTR) ++DEF_FN_TYPE_4 (BT_FN_V8HI_V8HI_V8HI_INTPTR, B_VX, BT_V8HI, BT_V8HI, BT_V8HI, BT_INTPTR) ++DEF_FN_TYPE_4 (BT_FN_V8HI_V8HI_V8HI_V8HI, B_VX, BT_V8HI, BT_V8HI, BT_V8HI, BT_V8HI) ++DEF_FN_TYPE_4 (BT_FN_VOID_OV4SI_INT_VOIDPTR, B_VX, BT_VOID, BT_OV4SI, BT_INT, BT_VOIDPTR) ++DEF_FN_TYPE_4 (BT_FN_VOID_OV4SI_VOIDPTR_UINT, B_VX, BT_VOID, BT_OV4SI, BT_VOIDPTR, BT_UINT) ++DEF_FN_TYPE_4 (BT_FN_VOID_V16QI_UINT_VOIDPTR, B_VX, BT_VOID, BT_V16QI, BT_UINT, BT_VOIDPTR) ++DEF_FN_TYPE_5 (BT_FN_OV4SI_OV4SI_OUV4SI_INTCONSTPTR_UCHAR, B_VX, BT_OV4SI, BT_OV4SI, BT_OUV4SI, BT_INTCONSTPTR, BT_UCHAR) ++DEF_FN_TYPE_5 (BT_FN_OV4SI_OV4SI_OV4SI_OV4SI_INTPTR, B_VX, BT_OV4SI, BT_OV4SI, BT_OV4SI, BT_OV4SI, BT_INTPTR) ++DEF_FN_TYPE_5 (BT_FN_UV16QI_UV16QI_UV16QI_INT_INTPTR, B_VX, BT_UV16QI, BT_UV16QI, BT_UV16QI, BT_INT, BT_INTPTR) ++DEF_FN_TYPE_5 (BT_FN_UV16QI_UV16QI_UV16QI_UV16QI_INT, B_VX, BT_UV16QI, BT_UV16QI, BT_UV16QI, BT_UV16QI, BT_INT) ++DEF_FN_TYPE_5 (BT_FN_UV2DI_UV2DI_UV2DI_ULONGLONGCONSTPTR_UCHAR, B_VX, BT_UV2DI, BT_UV2DI, BT_UV2DI, BT_ULONGLONGCONSTPTR, BT_UCHAR) ++DEF_FN_TYPE_5 (BT_FN_UV2DI_UV2DI_UV2DI_UV2DI_INT, B_VX, BT_UV2DI, BT_UV2DI, BT_UV2DI, BT_UV2DI, BT_INT) ++DEF_FN_TYPE_5 (BT_FN_UV4SI_UV4SI_UV4SI_INT_INTPTR, B_VX, BT_UV4SI, BT_UV4SI, BT_UV4SI, BT_INT, BT_INTPTR) ++DEF_FN_TYPE_5 (BT_FN_UV4SI_UV4SI_UV4SI_UINTCONSTPTR_UCHAR, B_VX, BT_UV4SI, BT_UV4SI, BT_UV4SI, BT_UINTCONSTPTR, BT_UCHAR) ++DEF_FN_TYPE_5 (BT_FN_UV4SI_UV4SI_UV4SI_UV4SI_INT, B_VX, BT_UV4SI, BT_UV4SI, BT_UV4SI, BT_UV4SI, BT_INT) ++DEF_FN_TYPE_5 (BT_FN_UV8HI_UV8HI_UV8HI_INT_INTPTR, B_VX, BT_UV8HI, BT_UV8HI, BT_UV8HI, BT_INT, BT_INTPTR) ++DEF_FN_TYPE_5 (BT_FN_UV8HI_UV8HI_UV8HI_UV8HI_INT, B_VX, BT_UV8HI, BT_UV8HI, BT_UV8HI, BT_UV8HI, BT_INT) ++DEF_FN_TYPE_5 (BT_FN_VOID_UV2DI_UV2DI_ULONGLONGPTR_ULONGLONG, B_VX, BT_VOID, BT_UV2DI, BT_UV2DI, BT_ULONGLONGPTR, BT_ULONGLONG) ++DEF_FN_TYPE_5 (BT_FN_VOID_UV4SI_UV4SI_UINTPTR_ULONGLONG, B_VX, BT_VOID, BT_UV4SI, BT_UV4SI, BT_UINTPTR, BT_ULONGLONG) ++DEF_FN_TYPE_5 (BT_FN_VOID_V4SI_V4SI_INTPTR_ULONGLONG, B_VX, BT_VOID, BT_V4SI, BT_V4SI, BT_INTPTR, BT_ULONGLONG) ++DEF_FN_TYPE_6 (BT_FN_UV16QI_UV16QI_UV16QI_UV16QI_INT_INTPTR, B_VX, BT_UV16QI, BT_UV16QI, BT_UV16QI, BT_UV16QI, BT_INT, BT_INTPTR) ++DEF_FN_TYPE_6 (BT_FN_UV4SI_UV4SI_UV4SI_UV4SI_INT_INTPTR, B_VX, BT_UV4SI, BT_UV4SI, BT_UV4SI, BT_UV4SI, BT_INT, BT_INTPTR) ++DEF_FN_TYPE_6 (BT_FN_UV8HI_UV8HI_UV8HI_UV8HI_INT_INTPTR, B_VX, BT_UV8HI, BT_UV8HI, BT_UV8HI, BT_UV8HI, BT_INT, BT_INTPTR) ++DEF_OV_TYPE (BT_OV_BV16QI_BV16QI, BT_BV16QI, BT_BV16QI) ++DEF_OV_TYPE (BT_OV_BV16QI_BV16QI_BV16QI, BT_BV16QI, BT_BV16QI, BT_BV16QI) ++DEF_OV_TYPE (BT_OV_BV16QI_BV16QI_BV16QI_BV16QI, BT_BV16QI, BT_BV16QI, BT_BV16QI, BT_BV16QI) ++DEF_OV_TYPE (BT_OV_BV16QI_BV16QI_BV16QI_INTPTR, BT_BV16QI, BT_BV16QI, BT_BV16QI, BT_INTPTR) ++DEF_OV_TYPE (BT_OV_BV16QI_BV16QI_BV16QI_UV16QI, BT_BV16QI, BT_BV16QI, BT_BV16QI, BT_UV16QI) ++DEF_OV_TYPE (BT_OV_BV16QI_BV16QI_INTPTR, BT_BV16QI, BT_BV16QI, BT_INTPTR) ++DEF_OV_TYPE (BT_OV_BV16QI_BV16QI_UCHAR, BT_BV16QI, BT_BV16QI, BT_UCHAR) ++DEF_OV_TYPE (BT_OV_BV16QI_BV16QI_UV16QI, BT_BV16QI, BT_BV16QI, BT_UV16QI) ++DEF_OV_TYPE (BT_OV_BV16QI_BV16QI_UV4SI, BT_BV16QI, BT_BV16QI, BT_UV4SI) ++DEF_OV_TYPE (BT_OV_BV16QI_BV16QI_UV8HI, BT_BV16QI, BT_BV16QI, BT_UV8HI) ++DEF_OV_TYPE (BT_OV_BV16QI_BV8HI_BV8HI, BT_BV16QI, BT_BV8HI, BT_BV8HI) ++DEF_OV_TYPE (BT_OV_BV16QI_UV16QI_UV16QI, BT_BV16QI, BT_UV16QI, BT_UV16QI) ++DEF_OV_TYPE (BT_OV_BV16QI_UV16QI_UV16QI_INTPTR, BT_BV16QI, BT_UV16QI, BT_UV16QI, BT_INTPTR) ++DEF_OV_TYPE (BT_OV_BV16QI_UV16QI_UV16QI_UV16QI, BT_BV16QI, BT_UV16QI, BT_UV16QI, BT_UV16QI) ++DEF_OV_TYPE (BT_OV_BV16QI_UV16QI_UV16QI_UV16QI_INTPTR, BT_BV16QI, BT_UV16QI, BT_UV16QI, BT_UV16QI, BT_INTPTR) ++DEF_OV_TYPE (BT_OV_BV16QI_V16QI_V16QI, BT_BV16QI, BT_V16QI, BT_V16QI) ++DEF_OV_TYPE (BT_OV_BV16QI_V16QI_V16QI_INTPTR, BT_BV16QI, BT_V16QI, BT_V16QI, BT_INTPTR) ++DEF_OV_TYPE (BT_OV_BV2DI_BV2DI_BV2DI, BT_BV2DI, BT_BV2DI, BT_BV2DI) ++DEF_OV_TYPE (BT_OV_BV2DI_BV2DI_BV2DI_BV2DI, BT_BV2DI, BT_BV2DI, BT_BV2DI, BT_BV2DI) ++DEF_OV_TYPE (BT_OV_BV2DI_BV2DI_BV2DI_INT, BT_BV2DI, BT_BV2DI, BT_BV2DI, BT_INT) ++DEF_OV_TYPE (BT_OV_BV2DI_BV2DI_BV2DI_UV16QI, BT_BV2DI, BT_BV2DI, BT_BV2DI, BT_UV16QI) ++DEF_OV_TYPE (BT_OV_BV2DI_BV2DI_BV2DI_UV2DI, BT_BV2DI, BT_BV2DI, BT_BV2DI, BT_UV2DI) ++DEF_OV_TYPE (BT_OV_BV2DI_BV2DI_UCHAR, BT_BV2DI, BT_BV2DI, BT_UCHAR) ++DEF_OV_TYPE (BT_OV_BV2DI_BV2DI_UV16QI, BT_BV2DI, BT_BV2DI, BT_UV16QI) ++DEF_OV_TYPE (BT_OV_BV2DI_BV2DI_UV2DI_ULONGLONGCONSTPTR_UCHAR, BT_BV2DI, BT_BV2DI, BT_UV2DI, BT_ULONGLONGCONSTPTR, BT_UCHAR) ++DEF_OV_TYPE (BT_OV_BV2DI_BV2DI_UV4SI, BT_BV2DI, BT_BV2DI, BT_UV4SI) ++DEF_OV_TYPE (BT_OV_BV2DI_BV2DI_UV8HI, BT_BV2DI, BT_BV2DI, BT_UV8HI) ++DEF_OV_TYPE (BT_OV_BV2DI_BV4SI, BT_BV2DI, BT_BV4SI) ++DEF_OV_TYPE (BT_OV_BV2DI_UV2DI_UV2DI, BT_BV2DI, BT_UV2DI, BT_UV2DI) ++DEF_OV_TYPE (BT_OV_BV2DI_V2DF_V2DF, BT_BV2DI, BT_V2DF, BT_V2DF) ++DEF_OV_TYPE (BT_OV_BV2DI_V2DI_V2DI, BT_BV2DI, BT_V2DI, BT_V2DI) ++DEF_OV_TYPE (BT_OV_BV4SI_BV2DI_BV2DI, BT_BV4SI, BT_BV2DI, BT_BV2DI) ++DEF_OV_TYPE (BT_OV_BV4SI_BV4SI, BT_BV4SI, BT_BV4SI) ++DEF_OV_TYPE (BT_OV_BV4SI_BV4SI_BV4SI, BT_BV4SI, BT_BV4SI, BT_BV4SI) ++DEF_OV_TYPE (BT_OV_BV4SI_BV4SI_BV4SI_BV4SI, BT_BV4SI, BT_BV4SI, BT_BV4SI, BT_BV4SI) ++DEF_OV_TYPE (BT_OV_BV4SI_BV4SI_BV4SI_INTPTR, BT_BV4SI, BT_BV4SI, BT_BV4SI, BT_INTPTR) ++DEF_OV_TYPE (BT_OV_BV4SI_BV4SI_BV4SI_UV16QI, BT_BV4SI, BT_BV4SI, BT_BV4SI, BT_UV16QI) ++DEF_OV_TYPE (BT_OV_BV4SI_BV4SI_BV4SI_UV4SI, BT_BV4SI, BT_BV4SI, BT_BV4SI, BT_UV4SI) ++DEF_OV_TYPE (BT_OV_BV4SI_BV4SI_INTPTR, BT_BV4SI, BT_BV4SI, BT_INTPTR) ++DEF_OV_TYPE (BT_OV_BV4SI_BV4SI_UCHAR, BT_BV4SI, BT_BV4SI, BT_UCHAR) ++DEF_OV_TYPE (BT_OV_BV4SI_BV4SI_UV16QI, BT_BV4SI, BT_BV4SI, BT_UV16QI) ++DEF_OV_TYPE (BT_OV_BV4SI_BV4SI_UV4SI, BT_BV4SI, BT_BV4SI, BT_UV4SI) ++DEF_OV_TYPE (BT_OV_BV4SI_BV4SI_UV4SI_UINTCONSTPTR_UCHAR, BT_BV4SI, BT_BV4SI, BT_UV4SI, BT_UINTCONSTPTR, BT_UCHAR) ++DEF_OV_TYPE (BT_OV_BV4SI_BV4SI_UV8HI, BT_BV4SI, BT_BV4SI, BT_UV8HI) ++DEF_OV_TYPE (BT_OV_BV4SI_BV8HI, BT_BV4SI, BT_BV8HI) ++DEF_OV_TYPE (BT_OV_BV4SI_UV4SI_UV4SI, BT_BV4SI, BT_UV4SI, BT_UV4SI) ++DEF_OV_TYPE (BT_OV_BV4SI_UV4SI_UV4SI_INTPTR, BT_BV4SI, BT_UV4SI, BT_UV4SI, BT_INTPTR) ++DEF_OV_TYPE (BT_OV_BV4SI_UV4SI_UV4SI_UV4SI, BT_BV4SI, BT_UV4SI, BT_UV4SI, BT_UV4SI) ++DEF_OV_TYPE (BT_OV_BV4SI_UV4SI_UV4SI_UV4SI_INTPTR, BT_BV4SI, BT_UV4SI, BT_UV4SI, BT_UV4SI, BT_INTPTR) ++DEF_OV_TYPE (BT_OV_BV4SI_V4SI_V4SI, BT_BV4SI, BT_V4SI, BT_V4SI) ++DEF_OV_TYPE (BT_OV_BV4SI_V4SI_V4SI_INTPTR, BT_BV4SI, BT_V4SI, BT_V4SI, BT_INTPTR) ++DEF_OV_TYPE (BT_OV_BV8HI_BV16QI, BT_BV8HI, BT_BV16QI) ++DEF_OV_TYPE (BT_OV_BV8HI_BV4SI_BV4SI, BT_BV8HI, BT_BV4SI, BT_BV4SI) ++DEF_OV_TYPE (BT_OV_BV8HI_BV8HI, BT_BV8HI, BT_BV8HI) ++DEF_OV_TYPE (BT_OV_BV8HI_BV8HI_BV8HI, BT_BV8HI, BT_BV8HI, BT_BV8HI) ++DEF_OV_TYPE (BT_OV_BV8HI_BV8HI_BV8HI_BV8HI, BT_BV8HI, BT_BV8HI, BT_BV8HI, BT_BV8HI) ++DEF_OV_TYPE (BT_OV_BV8HI_BV8HI_BV8HI_INTPTR, BT_BV8HI, BT_BV8HI, BT_BV8HI, BT_INTPTR) ++DEF_OV_TYPE (BT_OV_BV8HI_BV8HI_BV8HI_UV16QI, BT_BV8HI, BT_BV8HI, BT_BV8HI, BT_UV16QI) ++DEF_OV_TYPE (BT_OV_BV8HI_BV8HI_BV8HI_UV8HI, BT_BV8HI, BT_BV8HI, BT_BV8HI, BT_UV8HI) ++DEF_OV_TYPE (BT_OV_BV8HI_BV8HI_INTPTR, BT_BV8HI, BT_BV8HI, BT_INTPTR) ++DEF_OV_TYPE (BT_OV_BV8HI_BV8HI_UCHAR, BT_BV8HI, BT_BV8HI, BT_UCHAR) ++DEF_OV_TYPE (BT_OV_BV8HI_BV8HI_UV16QI, BT_BV8HI, BT_BV8HI, BT_UV16QI) ++DEF_OV_TYPE (BT_OV_BV8HI_BV8HI_UV4SI, BT_BV8HI, BT_BV8HI, BT_UV4SI) ++DEF_OV_TYPE (BT_OV_BV8HI_BV8HI_UV8HI, BT_BV8HI, BT_BV8HI, BT_UV8HI) ++DEF_OV_TYPE (BT_OV_BV8HI_UV8HI_UV8HI, BT_BV8HI, BT_UV8HI, BT_UV8HI) ++DEF_OV_TYPE (BT_OV_BV8HI_UV8HI_UV8HI_INTPTR, BT_BV8HI, BT_UV8HI, BT_UV8HI, BT_INTPTR) ++DEF_OV_TYPE (BT_OV_BV8HI_UV8HI_UV8HI_UV8HI, BT_BV8HI, BT_UV8HI, BT_UV8HI, BT_UV8HI) ++DEF_OV_TYPE (BT_OV_BV8HI_UV8HI_UV8HI_UV8HI_INTPTR, BT_BV8HI, BT_UV8HI, BT_UV8HI, BT_UV8HI, BT_INTPTR) ++DEF_OV_TYPE (BT_OV_BV8HI_V8HI_V8HI, BT_BV8HI, BT_V8HI, BT_V8HI) ++DEF_OV_TYPE (BT_OV_BV8HI_V8HI_V8HI_INTPTR, BT_BV8HI, BT_V8HI, BT_V8HI, BT_INTPTR) ++DEF_OV_TYPE (BT_OV_DBL_V2DF_INT, BT_DBL, BT_V2DF, BT_INT) ++DEF_OV_TYPE (BT_OV_INT_BV16QI_BV16QI, BT_INT, BT_BV16QI, BT_BV16QI) ++DEF_OV_TYPE (BT_OV_INT_BV16QI_UV16QI, BT_INT, BT_BV16QI, BT_UV16QI) ++DEF_OV_TYPE (BT_OV_INT_BV16QI_V16QI, BT_INT, BT_BV16QI, BT_V16QI) ++DEF_OV_TYPE (BT_OV_INT_BV2DI_BV2DI, BT_INT, BT_BV2DI, BT_BV2DI) ++DEF_OV_TYPE (BT_OV_INT_BV2DI_UV2DI, BT_INT, BT_BV2DI, BT_UV2DI) ++DEF_OV_TYPE (BT_OV_INT_BV2DI_V2DI, BT_INT, BT_BV2DI, BT_V2DI) ++DEF_OV_TYPE (BT_OV_INT_BV4SI_BV4SI, BT_INT, BT_BV4SI, BT_BV4SI) ++DEF_OV_TYPE (BT_OV_INT_BV4SI_UV4SI, BT_INT, BT_BV4SI, BT_UV4SI) ++DEF_OV_TYPE (BT_OV_INT_BV4SI_V4SI, BT_INT, BT_BV4SI, BT_V4SI) ++DEF_OV_TYPE (BT_OV_INT_BV8HI_BV8HI, BT_INT, BT_BV8HI, BT_BV8HI) ++DEF_OV_TYPE (BT_OV_INT_BV8HI_UV8HI, BT_INT, BT_BV8HI, BT_UV8HI) ++DEF_OV_TYPE (BT_OV_INT_BV8HI_V8HI, BT_INT, BT_BV8HI, BT_V8HI) ++DEF_OV_TYPE (BT_OV_INT_UV16QI_BV16QI, BT_INT, BT_UV16QI, BT_BV16QI) ++DEF_OV_TYPE (BT_OV_INT_UV16QI_UV16QI, BT_INT, BT_UV16QI, BT_UV16QI) ++DEF_OV_TYPE (BT_OV_INT_UV2DI_BV2DI, BT_INT, BT_UV2DI, BT_BV2DI) ++DEF_OV_TYPE (BT_OV_INT_UV2DI_UV2DI, BT_INT, BT_UV2DI, BT_UV2DI) ++DEF_OV_TYPE (BT_OV_INT_UV4SI_BV4SI, BT_INT, BT_UV4SI, BT_BV4SI) ++DEF_OV_TYPE (BT_OV_INT_UV4SI_UV4SI, BT_INT, BT_UV4SI, BT_UV4SI) ++DEF_OV_TYPE (BT_OV_INT_UV8HI_BV8HI, BT_INT, BT_UV8HI, BT_BV8HI) ++DEF_OV_TYPE (BT_OV_INT_UV8HI_UV8HI, BT_INT, BT_UV8HI, BT_UV8HI) ++DEF_OV_TYPE (BT_OV_INT_V16QI_BV16QI, BT_INT, BT_V16QI, BT_BV16QI) ++DEF_OV_TYPE (BT_OV_INT_V16QI_UV16QI, BT_INT, BT_V16QI, BT_UV16QI) ++DEF_OV_TYPE (BT_OV_INT_V16QI_V16QI, BT_INT, BT_V16QI, BT_V16QI) ++DEF_OV_TYPE (BT_OV_INT_V2DF_UV2DI, BT_INT, BT_V2DF, BT_UV2DI) ++DEF_OV_TYPE (BT_OV_INT_V2DF_V2DF, BT_INT, BT_V2DF, BT_V2DF) ++DEF_OV_TYPE (BT_OV_INT_V2DI_BV2DI, BT_INT, BT_V2DI, BT_BV2DI) ++DEF_OV_TYPE (BT_OV_INT_V2DI_UV2DI, BT_INT, BT_V2DI, BT_UV2DI) ++DEF_OV_TYPE (BT_OV_INT_V2DI_V2DI, BT_INT, BT_V2DI, BT_V2DI) ++DEF_OV_TYPE (BT_OV_INT_V4SI_BV4SI, BT_INT, BT_V4SI, BT_BV4SI) ++DEF_OV_TYPE (BT_OV_INT_V4SI_INT, BT_INT, BT_V4SI, BT_INT) ++DEF_OV_TYPE (BT_OV_INT_V4SI_UV4SI, BT_INT, BT_V4SI, BT_UV4SI) ++DEF_OV_TYPE (BT_OV_INT_V4SI_V4SI, BT_INT, BT_V4SI, BT_V4SI) ++DEF_OV_TYPE (BT_OV_INT_V8HI_BV8HI, BT_INT, BT_V8HI, BT_BV8HI) ++DEF_OV_TYPE (BT_OV_INT_V8HI_UV8HI, BT_INT, BT_V8HI, BT_UV8HI) ++DEF_OV_TYPE (BT_OV_INT_V8HI_V8HI, BT_INT, BT_V8HI, BT_V8HI) ++DEF_OV_TYPE (BT_OV_LONGLONG_V2DI_INT, BT_LONGLONG, BT_V2DI, BT_INT) ++DEF_OV_TYPE (BT_OV_SCHAR_V16QI_INT, BT_SCHAR, BT_V16QI, BT_INT) ++DEF_OV_TYPE (BT_OV_SHORT_V8HI_INT, BT_SHORT, BT_V8HI, BT_INT) ++DEF_OV_TYPE (BT_OV_UCHAR_BV16QI_INT, BT_UCHAR, BT_BV16QI, BT_INT) ++DEF_OV_TYPE (BT_OV_UCHAR_UV16QI_INT, BT_UCHAR, BT_UV16QI, BT_INT) ++DEF_OV_TYPE (BT_OV_UINT_BV4SI_INT, BT_UINT, BT_BV4SI, BT_INT) ++DEF_OV_TYPE (BT_OV_UINT_UV4SI_INT, BT_UINT, BT_UV4SI, BT_INT) ++DEF_OV_TYPE (BT_OV_ULONGLONG_BV2DI_INT, BT_ULONGLONG, BT_BV2DI, BT_INT) ++DEF_OV_TYPE (BT_OV_ULONGLONG_UV2DI_INT, BT_ULONGLONG, BT_UV2DI, BT_INT) ++DEF_OV_TYPE (BT_OV_USHORT_BV8HI_INT, BT_USHORT, BT_BV8HI, BT_INT) ++DEF_OV_TYPE (BT_OV_USHORT_UV8HI_INT, BT_USHORT, BT_UV8HI, BT_INT) ++DEF_OV_TYPE (BT_OV_UV16QI_BV16QI_BV16QI, BT_UV16QI, BT_BV16QI, BT_BV16QI) ++DEF_OV_TYPE (BT_OV_UV16QI_BV16QI_BV16QI_INTPTR, BT_UV16QI, BT_BV16QI, BT_BV16QI, BT_INTPTR) ++DEF_OV_TYPE (BT_OV_UV16QI_BV16QI_UV16QI, BT_UV16QI, BT_BV16QI, BT_UV16QI) ++DEF_OV_TYPE (BT_OV_UV16QI_LONG_UCHARPTR, BT_UV16QI, BT_LONG, BT_UCHARPTR) ++DEF_OV_TYPE (BT_OV_UV16QI_UCHAR, BT_UV16QI, BT_UCHAR) ++DEF_OV_TYPE (BT_OV_UV16QI_UCHARCONSTPTR, BT_UV16QI, BT_UCHARCONSTPTR) ++DEF_OV_TYPE (BT_OV_UV16QI_UCHARCONSTPTR_UINT, BT_UV16QI, BT_UCHARCONSTPTR, BT_UINT) ++DEF_OV_TYPE (BT_OV_UV16QI_UCHARCONSTPTR_USHORT, BT_UV16QI, BT_UCHARCONSTPTR, BT_USHORT) ++DEF_OV_TYPE (BT_OV_UV16QI_UCHAR_BV16QI_INT, BT_UV16QI, BT_UCHAR, BT_BV16QI, BT_INT) ++DEF_OV_TYPE (BT_OV_UV16QI_UCHAR_INT, BT_UV16QI, BT_UCHAR, BT_INT) ++DEF_OV_TYPE (BT_OV_UV16QI_UCHAR_UV16QI_INT, BT_UV16QI, BT_UCHAR, BT_UV16QI, BT_INT) ++DEF_OV_TYPE (BT_OV_UV16QI_UV16QI, BT_UV16QI, BT_UV16QI) ++DEF_OV_TYPE (BT_OV_UV16QI_UV16QI_BV16QI, BT_UV16QI, BT_UV16QI, BT_BV16QI) ++DEF_OV_TYPE (BT_OV_UV16QI_UV16QI_INTPTR, BT_UV16QI, BT_UV16QI, BT_INTPTR) ++DEF_OV_TYPE (BT_OV_UV16QI_UV16QI_UCHAR, BT_UV16QI, BT_UV16QI, BT_UCHAR) ++DEF_OV_TYPE (BT_OV_UV16QI_UV16QI_ULONG, BT_UV16QI, BT_UV16QI, BT_ULONG) ++DEF_OV_TYPE (BT_OV_UV16QI_UV16QI_UV16QI, BT_UV16QI, BT_UV16QI, BT_UV16QI) ++DEF_OV_TYPE (BT_OV_UV16QI_UV16QI_UV16QI_BV16QI, BT_UV16QI, BT_UV16QI, BT_UV16QI, BT_BV16QI) ++DEF_OV_TYPE (BT_OV_UV16QI_UV16QI_UV16QI_INT, BT_UV16QI, BT_UV16QI, BT_UV16QI, BT_INT) ++DEF_OV_TYPE (BT_OV_UV16QI_UV16QI_UV16QI_INTPTR, BT_UV16QI, BT_UV16QI, BT_UV16QI, BT_INTPTR) ++DEF_OV_TYPE (BT_OV_UV16QI_UV16QI_UV16QI_UCHAR, BT_UV16QI, BT_UV16QI, BT_UV16QI, BT_UCHAR) ++DEF_OV_TYPE (BT_OV_UV16QI_UV16QI_UV16QI_ULONGLONG, BT_UV16QI, BT_UV16QI, BT_UV16QI, BT_ULONGLONG) ++DEF_OV_TYPE (BT_OV_UV16QI_UV16QI_UV16QI_UV16QI, BT_UV16QI, BT_UV16QI, BT_UV16QI, BT_UV16QI) ++DEF_OV_TYPE (BT_OV_UV16QI_UV16QI_UV16QI_UV16QI_INTPTR, BT_UV16QI, BT_UV16QI, BT_UV16QI, BT_UV16QI, BT_INTPTR) ++DEF_OV_TYPE (BT_OV_UV16QI_UV16QI_UV4SI, BT_UV16QI, BT_UV16QI, BT_UV4SI) ++DEF_OV_TYPE (BT_OV_UV16QI_UV16QI_UV8HI, BT_UV16QI, BT_UV16QI, BT_UV8HI) ++DEF_OV_TYPE (BT_OV_UV16QI_UV16QI_V16QI, BT_UV16QI, BT_UV16QI, BT_V16QI) ++DEF_OV_TYPE (BT_OV_UV16QI_UV2DI_UV2DI, BT_UV16QI, BT_UV2DI, BT_UV2DI) ++DEF_OV_TYPE (BT_OV_UV16QI_UV4SI_UV4SI, BT_UV16QI, BT_UV4SI, BT_UV4SI) ++DEF_OV_TYPE (BT_OV_UV16QI_UV8HI_UV8HI, BT_UV16QI, BT_UV8HI, BT_UV8HI) ++DEF_OV_TYPE (BT_OV_UV16QI_UV8HI_UV8HI_INTPTR, BT_UV16QI, BT_UV8HI, BT_UV8HI, BT_INTPTR) ++DEF_OV_TYPE (BT_OV_UV16QI_V16QI, BT_UV16QI, BT_V16QI) ++DEF_OV_TYPE (BT_OV_UV16QI_V8HI_V8HI, BT_UV16QI, BT_V8HI, BT_V8HI) ++DEF_OV_TYPE (BT_OV_UV2DI_BV2DI_UV2DI, BT_UV2DI, BT_BV2DI, BT_UV2DI) ++DEF_OV_TYPE (BT_OV_UV2DI_LONG_ULONGLONGPTR, BT_UV2DI, BT_LONG, BT_ULONGLONGPTR) ++DEF_OV_TYPE (BT_OV_UV2DI_ULONGLONG, BT_UV2DI, BT_ULONGLONG) ++DEF_OV_TYPE (BT_OV_UV2DI_ULONGLONGCONSTPTR, BT_UV2DI, BT_ULONGLONGCONSTPTR) ++DEF_OV_TYPE (BT_OV_UV2DI_ULONGLONGCONSTPTR_UINT, BT_UV2DI, BT_ULONGLONGCONSTPTR, BT_UINT) ++DEF_OV_TYPE (BT_OV_UV2DI_ULONGLONGCONSTPTR_USHORT, BT_UV2DI, BT_ULONGLONGCONSTPTR, BT_USHORT) ++DEF_OV_TYPE (BT_OV_UV2DI_ULONGLONG_BV2DI_INT, BT_UV2DI, BT_ULONGLONG, BT_BV2DI, BT_INT) ++DEF_OV_TYPE (BT_OV_UV2DI_ULONGLONG_INT, BT_UV2DI, BT_ULONGLONG, BT_INT) ++DEF_OV_TYPE (BT_OV_UV2DI_ULONGLONG_ULONGLONG, BT_UV2DI, BT_ULONGLONG, BT_ULONGLONG) ++DEF_OV_TYPE (BT_OV_UV2DI_ULONGLONG_UV2DI_INT, BT_UV2DI, BT_ULONGLONG, BT_UV2DI, BT_INT) ++DEF_OV_TYPE (BT_OV_UV2DI_UV2DI, BT_UV2DI, BT_UV2DI) ++DEF_OV_TYPE (BT_OV_UV2DI_UV2DI_BV2DI, BT_UV2DI, BT_UV2DI, BT_BV2DI) ++DEF_OV_TYPE (BT_OV_UV2DI_UV2DI_UCHAR, BT_UV2DI, BT_UV2DI, BT_UCHAR) ++DEF_OV_TYPE (BT_OV_UV2DI_UV2DI_ULONG, BT_UV2DI, BT_UV2DI, BT_ULONG) ++DEF_OV_TYPE (BT_OV_UV2DI_UV2DI_UV16QI, BT_UV2DI, BT_UV2DI, BT_UV16QI) ++DEF_OV_TYPE (BT_OV_UV2DI_UV2DI_UV2DI, BT_UV2DI, BT_UV2DI, BT_UV2DI) ++DEF_OV_TYPE (BT_OV_UV2DI_UV2DI_UV2DI_BV2DI, BT_UV2DI, BT_UV2DI, BT_UV2DI, BT_BV2DI) ++DEF_OV_TYPE (BT_OV_UV2DI_UV2DI_UV2DI_INT, BT_UV2DI, BT_UV2DI, BT_UV2DI, BT_INT) ++DEF_OV_TYPE (BT_OV_UV2DI_UV2DI_UV2DI_UCHAR, BT_UV2DI, BT_UV2DI, BT_UV2DI, BT_UCHAR) ++DEF_OV_TYPE (BT_OV_UV2DI_UV2DI_UV2DI_ULONGLONG, BT_UV2DI, BT_UV2DI, BT_UV2DI, BT_ULONGLONG) ++DEF_OV_TYPE (BT_OV_UV2DI_UV2DI_UV2DI_ULONGLONGCONSTPTR_UCHAR, BT_UV2DI, BT_UV2DI, BT_UV2DI, BT_ULONGLONGCONSTPTR, BT_UCHAR) ++DEF_OV_TYPE (BT_OV_UV2DI_UV2DI_UV2DI_UV16QI, BT_UV2DI, BT_UV2DI, BT_UV2DI, BT_UV16QI) ++DEF_OV_TYPE (BT_OV_UV2DI_UV2DI_UV2DI_UV2DI, BT_UV2DI, BT_UV2DI, BT_UV2DI, BT_UV2DI) ++DEF_OV_TYPE (BT_OV_UV2DI_UV2DI_UV4SI, BT_UV2DI, BT_UV2DI, BT_UV4SI) ++DEF_OV_TYPE (BT_OV_UV2DI_UV2DI_UV8HI, BT_UV2DI, BT_UV2DI, BT_UV8HI) ++DEF_OV_TYPE (BT_OV_UV2DI_UV2DI_V2DI, BT_UV2DI, BT_UV2DI, BT_V2DI) ++DEF_OV_TYPE (BT_OV_UV2DI_UV4SI, BT_UV2DI, BT_UV4SI) ++DEF_OV_TYPE (BT_OV_UV2DI_UV4SI_UV4SI, BT_UV2DI, BT_UV4SI, BT_UV4SI) ++DEF_OV_TYPE (BT_OV_UV2DI_UV4SI_UV4SI_UV2DI, BT_UV2DI, BT_UV4SI, BT_UV4SI, BT_UV2DI) ++DEF_OV_TYPE (BT_OV_UV2DI_UV8HI_UV8HI, BT_UV2DI, BT_UV8HI, BT_UV8HI) ++DEF_OV_TYPE (BT_OV_UV2DI_V2DI, BT_UV2DI, BT_V2DI) ++DEF_OV_TYPE (BT_OV_UV4SI_BV4SI_BV4SI, BT_UV4SI, BT_BV4SI, BT_BV4SI) ++DEF_OV_TYPE (BT_OV_UV4SI_BV4SI_BV4SI_INTPTR, BT_UV4SI, BT_BV4SI, BT_BV4SI, BT_INTPTR) ++DEF_OV_TYPE (BT_OV_UV4SI_BV4SI_UV4SI, BT_UV4SI, BT_BV4SI, BT_UV4SI) ++DEF_OV_TYPE (BT_OV_UV4SI_LONG_UINTPTR, BT_UV4SI, BT_LONG, BT_UINTPTR) ++DEF_OV_TYPE (BT_OV_UV4SI_UINT, BT_UV4SI, BT_UINT) ++DEF_OV_TYPE (BT_OV_UV4SI_UINTCONSTPTR, BT_UV4SI, BT_UINTCONSTPTR) ++DEF_OV_TYPE (BT_OV_UV4SI_UINTCONSTPTR_UINT, BT_UV4SI, BT_UINTCONSTPTR, BT_UINT) ++DEF_OV_TYPE (BT_OV_UV4SI_UINTCONSTPTR_USHORT, BT_UV4SI, BT_UINTCONSTPTR, BT_USHORT) ++DEF_OV_TYPE (BT_OV_UV4SI_UINT_BV4SI_INT, BT_UV4SI, BT_UINT, BT_BV4SI, BT_INT) ++DEF_OV_TYPE (BT_OV_UV4SI_UINT_INT, BT_UV4SI, BT_UINT, BT_INT) ++DEF_OV_TYPE (BT_OV_UV4SI_UINT_UV4SI_INT, BT_UV4SI, BT_UINT, BT_UV4SI, BT_INT) ++DEF_OV_TYPE (BT_OV_UV4SI_UV16QI_UV16QI, BT_UV4SI, BT_UV16QI, BT_UV16QI) ++DEF_OV_TYPE (BT_OV_UV4SI_UV2DI_UV2DI, BT_UV4SI, BT_UV2DI, BT_UV2DI) ++DEF_OV_TYPE (BT_OV_UV4SI_UV2DI_UV2DI_INTPTR, BT_UV4SI, BT_UV2DI, BT_UV2DI, BT_INTPTR) ++DEF_OV_TYPE (BT_OV_UV4SI_UV4SI, BT_UV4SI, BT_UV4SI) ++DEF_OV_TYPE (BT_OV_UV4SI_UV4SI_BV4SI, BT_UV4SI, BT_UV4SI, BT_BV4SI) ++DEF_OV_TYPE (BT_OV_UV4SI_UV4SI_INTPTR, BT_UV4SI, BT_UV4SI, BT_INTPTR) ++DEF_OV_TYPE (BT_OV_UV4SI_UV4SI_UCHAR, BT_UV4SI, BT_UV4SI, BT_UCHAR) ++DEF_OV_TYPE (BT_OV_UV4SI_UV4SI_ULONG, BT_UV4SI, BT_UV4SI, BT_ULONG) ++DEF_OV_TYPE (BT_OV_UV4SI_UV4SI_UV16QI, BT_UV4SI, BT_UV4SI, BT_UV16QI) ++DEF_OV_TYPE (BT_OV_UV4SI_UV4SI_UV4SI, BT_UV4SI, BT_UV4SI, BT_UV4SI) ++DEF_OV_TYPE (BT_OV_UV4SI_UV4SI_UV4SI_BV4SI, BT_UV4SI, BT_UV4SI, BT_UV4SI, BT_BV4SI) ++DEF_OV_TYPE (BT_OV_UV4SI_UV4SI_UV4SI_INT, BT_UV4SI, BT_UV4SI, BT_UV4SI, BT_INT) ++DEF_OV_TYPE (BT_OV_UV4SI_UV4SI_UV4SI_INTPTR, BT_UV4SI, BT_UV4SI, BT_UV4SI, BT_INTPTR) ++DEF_OV_TYPE (BT_OV_UV4SI_UV4SI_UV4SI_UCHAR, BT_UV4SI, BT_UV4SI, BT_UV4SI, BT_UCHAR) ++DEF_OV_TYPE (BT_OV_UV4SI_UV4SI_UV4SI_UINTCONSTPTR_UCHAR, BT_UV4SI, BT_UV4SI, BT_UV4SI, BT_UINTCONSTPTR, BT_UCHAR) ++DEF_OV_TYPE (BT_OV_UV4SI_UV4SI_UV4SI_ULONGLONG, BT_UV4SI, BT_UV4SI, BT_UV4SI, BT_ULONGLONG) ++DEF_OV_TYPE (BT_OV_UV4SI_UV4SI_UV4SI_UV16QI, BT_UV4SI, BT_UV4SI, BT_UV4SI, BT_UV16QI) ++DEF_OV_TYPE (BT_OV_UV4SI_UV4SI_UV4SI_UV4SI, BT_UV4SI, BT_UV4SI, BT_UV4SI, BT_UV4SI) ++DEF_OV_TYPE (BT_OV_UV4SI_UV4SI_UV4SI_UV4SI_INTPTR, BT_UV4SI, BT_UV4SI, BT_UV4SI, BT_UV4SI, BT_INTPTR) ++DEF_OV_TYPE (BT_OV_UV4SI_UV4SI_UV8HI, BT_UV4SI, BT_UV4SI, BT_UV8HI) ++DEF_OV_TYPE (BT_OV_UV4SI_UV4SI_V4SI, BT_UV4SI, BT_UV4SI, BT_V4SI) ++DEF_OV_TYPE (BT_OV_UV4SI_UV8HI, BT_UV4SI, BT_UV8HI) ++DEF_OV_TYPE (BT_OV_UV4SI_UV8HI_UV8HI, BT_UV4SI, BT_UV8HI, BT_UV8HI) ++DEF_OV_TYPE (BT_OV_UV4SI_UV8HI_UV8HI_UV4SI, BT_UV4SI, BT_UV8HI, BT_UV8HI, BT_UV4SI) ++DEF_OV_TYPE (BT_OV_UV4SI_V2DI_V2DI, BT_UV4SI, BT_V2DI, BT_V2DI) ++DEF_OV_TYPE (BT_OV_UV4SI_V4SI, BT_UV4SI, BT_V4SI) ++DEF_OV_TYPE (BT_OV_UV8HI_BV8HI_BV8HI, BT_UV8HI, BT_BV8HI, BT_BV8HI) ++DEF_OV_TYPE (BT_OV_UV8HI_BV8HI_BV8HI_INTPTR, BT_UV8HI, BT_BV8HI, BT_BV8HI, BT_INTPTR) ++DEF_OV_TYPE (BT_OV_UV8HI_BV8HI_UV8HI, BT_UV8HI, BT_BV8HI, BT_UV8HI) ++DEF_OV_TYPE (BT_OV_UV8HI_LONG_USHORTPTR, BT_UV8HI, BT_LONG, BT_USHORTPTR) ++DEF_OV_TYPE (BT_OV_UV8HI_USHORT, BT_UV8HI, BT_USHORT) ++DEF_OV_TYPE (BT_OV_UV8HI_USHORTCONSTPTR, BT_UV8HI, BT_USHORTCONSTPTR) ++DEF_OV_TYPE (BT_OV_UV8HI_USHORTCONSTPTR_UINT, BT_UV8HI, BT_USHORTCONSTPTR, BT_UINT) ++DEF_OV_TYPE (BT_OV_UV8HI_USHORTCONSTPTR_USHORT, BT_UV8HI, BT_USHORTCONSTPTR, BT_USHORT) ++DEF_OV_TYPE (BT_OV_UV8HI_USHORT_BV8HI_INT, BT_UV8HI, BT_USHORT, BT_BV8HI, BT_INT) ++DEF_OV_TYPE (BT_OV_UV8HI_USHORT_INT, BT_UV8HI, BT_USHORT, BT_INT) ++DEF_OV_TYPE (BT_OV_UV8HI_USHORT_UV8HI_INT, BT_UV8HI, BT_USHORT, BT_UV8HI, BT_INT) ++DEF_OV_TYPE (BT_OV_UV8HI_UV16QI, BT_UV8HI, BT_UV16QI) ++DEF_OV_TYPE (BT_OV_UV8HI_UV16QI_UV16QI, BT_UV8HI, BT_UV16QI, BT_UV16QI) ++DEF_OV_TYPE (BT_OV_UV8HI_UV16QI_UV16QI_UV8HI, BT_UV8HI, BT_UV16QI, BT_UV16QI, BT_UV8HI) ++DEF_OV_TYPE (BT_OV_UV8HI_UV4SI_UV4SI, BT_UV8HI, BT_UV4SI, BT_UV4SI) ++DEF_OV_TYPE (BT_OV_UV8HI_UV4SI_UV4SI_INTPTR, BT_UV8HI, BT_UV4SI, BT_UV4SI, BT_INTPTR) ++DEF_OV_TYPE (BT_OV_UV8HI_UV8HI, BT_UV8HI, BT_UV8HI) ++DEF_OV_TYPE (BT_OV_UV8HI_UV8HI_BV8HI, BT_UV8HI, BT_UV8HI, BT_BV8HI) ++DEF_OV_TYPE (BT_OV_UV8HI_UV8HI_INTPTR, BT_UV8HI, BT_UV8HI, BT_INTPTR) ++DEF_OV_TYPE (BT_OV_UV8HI_UV8HI_UCHAR, BT_UV8HI, BT_UV8HI, BT_UCHAR) ++DEF_OV_TYPE (BT_OV_UV8HI_UV8HI_ULONG, BT_UV8HI, BT_UV8HI, BT_ULONG) ++DEF_OV_TYPE (BT_OV_UV8HI_UV8HI_UV16QI, BT_UV8HI, BT_UV8HI, BT_UV16QI) ++DEF_OV_TYPE (BT_OV_UV8HI_UV8HI_UV4SI, BT_UV8HI, BT_UV8HI, BT_UV4SI) ++DEF_OV_TYPE (BT_OV_UV8HI_UV8HI_UV8HI, BT_UV8HI, BT_UV8HI, BT_UV8HI) ++DEF_OV_TYPE (BT_OV_UV8HI_UV8HI_UV8HI_BV8HI, BT_UV8HI, BT_UV8HI, BT_UV8HI, BT_BV8HI) ++DEF_OV_TYPE (BT_OV_UV8HI_UV8HI_UV8HI_INT, BT_UV8HI, BT_UV8HI, BT_UV8HI, BT_INT) ++DEF_OV_TYPE (BT_OV_UV8HI_UV8HI_UV8HI_INTPTR, BT_UV8HI, BT_UV8HI, BT_UV8HI, BT_INTPTR) ++DEF_OV_TYPE (BT_OV_UV8HI_UV8HI_UV8HI_UCHAR, BT_UV8HI, BT_UV8HI, BT_UV8HI, BT_UCHAR) ++DEF_OV_TYPE (BT_OV_UV8HI_UV8HI_UV8HI_ULONGLONG, BT_UV8HI, BT_UV8HI, BT_UV8HI, BT_ULONGLONG) ++DEF_OV_TYPE (BT_OV_UV8HI_UV8HI_UV8HI_UV16QI, BT_UV8HI, BT_UV8HI, BT_UV8HI, BT_UV16QI) ++DEF_OV_TYPE (BT_OV_UV8HI_UV8HI_UV8HI_UV8HI, BT_UV8HI, BT_UV8HI, BT_UV8HI, BT_UV8HI) ++DEF_OV_TYPE (BT_OV_UV8HI_UV8HI_UV8HI_UV8HI_INTPTR, BT_UV8HI, BT_UV8HI, BT_UV8HI, BT_UV8HI, BT_INTPTR) ++DEF_OV_TYPE (BT_OV_UV8HI_UV8HI_V8HI, BT_UV8HI, BT_UV8HI, BT_V8HI) ++DEF_OV_TYPE (BT_OV_UV8HI_V4SI_V4SI, BT_UV8HI, BT_V4SI, BT_V4SI) ++DEF_OV_TYPE (BT_OV_UV8HI_V8HI, BT_UV8HI, BT_V8HI) ++DEF_OV_TYPE (BT_OV_V16QI_BV16QI_V16QI, BT_V16QI, BT_BV16QI, BT_V16QI) ++DEF_OV_TYPE (BT_OV_V16QI_LONG_SCHARPTR, BT_V16QI, BT_LONG, BT_SCHARPTR) ++DEF_OV_TYPE (BT_OV_V16QI_SCHAR, BT_V16QI, BT_SCHAR) ++DEF_OV_TYPE (BT_OV_V16QI_SCHARCONSTPTR, BT_V16QI, BT_SCHARCONSTPTR) ++DEF_OV_TYPE (BT_OV_V16QI_SCHARCONSTPTR_UINT, BT_V16QI, BT_SCHARCONSTPTR, BT_UINT) ++DEF_OV_TYPE (BT_OV_V16QI_SCHARCONSTPTR_USHORT, BT_V16QI, BT_SCHARCONSTPTR, BT_USHORT) ++DEF_OV_TYPE (BT_OV_V16QI_SCHAR_INT, BT_V16QI, BT_SCHAR, BT_INT) ++DEF_OV_TYPE (BT_OV_V16QI_SCHAR_V16QI_INT, BT_V16QI, BT_SCHAR, BT_V16QI, BT_INT) ++DEF_OV_TYPE (BT_OV_V16QI_UV16QI_V16QI_V16QI, BT_V16QI, BT_UV16QI, BT_V16QI, BT_V16QI) ++DEF_OV_TYPE (BT_OV_V16QI_V16QI, BT_V16QI, BT_V16QI) ++DEF_OV_TYPE (BT_OV_V16QI_V16QI_BV16QI, BT_V16QI, BT_V16QI, BT_BV16QI) ++DEF_OV_TYPE (BT_OV_V16QI_V16QI_INTPTR, BT_V16QI, BT_V16QI, BT_INTPTR) ++DEF_OV_TYPE (BT_OV_V16QI_V16QI_UCHAR, BT_V16QI, BT_V16QI, BT_UCHAR) ++DEF_OV_TYPE (BT_OV_V16QI_V16QI_ULONG, BT_V16QI, BT_V16QI, BT_ULONG) ++DEF_OV_TYPE (BT_OV_V16QI_V16QI_UV16QI, BT_V16QI, BT_V16QI, BT_UV16QI) ++DEF_OV_TYPE (BT_OV_V16QI_V16QI_UV16QI_UCHAR, BT_V16QI, BT_V16QI, BT_UV16QI, BT_UCHAR) ++DEF_OV_TYPE (BT_OV_V16QI_V16QI_UV16QI_UV16QI, BT_V16QI, BT_V16QI, BT_UV16QI, BT_UV16QI) ++DEF_OV_TYPE (BT_OV_V16QI_V16QI_UV4SI, BT_V16QI, BT_V16QI, BT_UV4SI) ++DEF_OV_TYPE (BT_OV_V16QI_V16QI_UV8HI, BT_V16QI, BT_V16QI, BT_UV8HI) ++DEF_OV_TYPE (BT_OV_V16QI_V16QI_V16QI, BT_V16QI, BT_V16QI, BT_V16QI) ++DEF_OV_TYPE (BT_OV_V16QI_V16QI_V16QI_BV16QI, BT_V16QI, BT_V16QI, BT_V16QI, BT_BV16QI) ++DEF_OV_TYPE (BT_OV_V16QI_V16QI_V16QI_INT, BT_V16QI, BT_V16QI, BT_V16QI, BT_INT) ++DEF_OV_TYPE (BT_OV_V16QI_V16QI_V16QI_INTPTR, BT_V16QI, BT_V16QI, BT_V16QI, BT_INTPTR) ++DEF_OV_TYPE (BT_OV_V16QI_V16QI_V16QI_ULONGLONG, BT_V16QI, BT_V16QI, BT_V16QI, BT_ULONGLONG) ++DEF_OV_TYPE (BT_OV_V16QI_V16QI_V16QI_UV16QI, BT_V16QI, BT_V16QI, BT_V16QI, BT_UV16QI) ++DEF_OV_TYPE (BT_OV_V16QI_V16QI_V16QI_V16QI, BT_V16QI, BT_V16QI, BT_V16QI, BT_V16QI) ++DEF_OV_TYPE (BT_OV_V16QI_V8HI_V8HI, BT_V16QI, BT_V8HI, BT_V8HI) ++DEF_OV_TYPE (BT_OV_V16QI_V8HI_V8HI_INTPTR, BT_V16QI, BT_V8HI, BT_V8HI, BT_INTPTR) ++DEF_OV_TYPE (BT_OV_V2DF_BV2DI_V2DF, BT_V2DF, BT_BV2DI, BT_V2DF) ++DEF_OV_TYPE (BT_OV_V2DF_DBL, BT_V2DF, BT_DBL) ++DEF_OV_TYPE (BT_OV_V2DF_DBLCONSTPTR, BT_V2DF, BT_DBLCONSTPTR) ++DEF_OV_TYPE (BT_OV_V2DF_DBLCONSTPTR_UINT, BT_V2DF, BT_DBLCONSTPTR, BT_UINT) ++DEF_OV_TYPE (BT_OV_V2DF_DBLCONSTPTR_USHORT, BT_V2DF, BT_DBLCONSTPTR, BT_USHORT) ++DEF_OV_TYPE (BT_OV_V2DF_DBL_INT, BT_V2DF, BT_DBL, BT_INT) ++DEF_OV_TYPE (BT_OV_V2DF_DBL_V2DF_INT, BT_V2DF, BT_DBL, BT_V2DF, BT_INT) ++DEF_OV_TYPE (BT_OV_V2DF_LONG_DBLPTR, BT_V2DF, BT_LONG, BT_DBLPTR) ++DEF_OV_TYPE (BT_OV_V2DF_UV2DI_INT, BT_V2DF, BT_UV2DI, BT_INT) ++DEF_OV_TYPE (BT_OV_V2DF_V2DF, BT_V2DF, BT_V2DF) ++DEF_OV_TYPE (BT_OV_V2DF_V2DF_BV2DI, BT_V2DF, BT_V2DF, BT_BV2DI) ++DEF_OV_TYPE (BT_OV_V2DF_V2DF_UCHAR, BT_V2DF, BT_V2DF, BT_UCHAR) ++DEF_OV_TYPE (BT_OV_V2DF_V2DF_UV2DI, BT_V2DF, BT_V2DF, BT_UV2DI) ++DEF_OV_TYPE (BT_OV_V2DF_V2DF_UV2DI_DBLCONSTPTR_UCHAR, BT_V2DF, BT_V2DF, BT_UV2DI, BT_DBLCONSTPTR, BT_UCHAR) ++DEF_OV_TYPE (BT_OV_V2DF_V2DF_V2DF, BT_V2DF, BT_V2DF, BT_V2DF) ++DEF_OV_TYPE (BT_OV_V2DF_V2DF_V2DF_BV2DI, BT_V2DF, BT_V2DF, BT_V2DF, BT_BV2DI) ++DEF_OV_TYPE (BT_OV_V2DF_V2DF_V2DF_INT, BT_V2DF, BT_V2DF, BT_V2DF, BT_INT) ++DEF_OV_TYPE (BT_OV_V2DF_V2DF_V2DF_ULONGLONG, BT_V2DF, BT_V2DF, BT_V2DF, BT_ULONGLONG) ++DEF_OV_TYPE (BT_OV_V2DF_V2DF_V2DF_UV16QI, BT_V2DF, BT_V2DF, BT_V2DF, BT_UV16QI) ++DEF_OV_TYPE (BT_OV_V2DF_V2DF_V2DF_UV2DI, BT_V2DF, BT_V2DF, BT_V2DF, BT_UV2DI) ++DEF_OV_TYPE (BT_OV_V2DF_V2DF_V2DI, BT_V2DF, BT_V2DF, BT_V2DI) ++DEF_OV_TYPE (BT_OV_V2DF_V2DI_INT, BT_V2DF, BT_V2DI, BT_INT) ++DEF_OV_TYPE (BT_OV_V2DI_BV2DI_V2DI, BT_V2DI, BT_BV2DI, BT_V2DI) ++DEF_OV_TYPE (BT_OV_V2DI_LONGLONG, BT_V2DI, BT_LONGLONG) ++DEF_OV_TYPE (BT_OV_V2DI_LONGLONGCONSTPTR, BT_V2DI, BT_LONGLONGCONSTPTR) ++DEF_OV_TYPE (BT_OV_V2DI_LONGLONGCONSTPTR_UINT, BT_V2DI, BT_LONGLONGCONSTPTR, BT_UINT) ++DEF_OV_TYPE (BT_OV_V2DI_LONGLONGCONSTPTR_USHORT, BT_V2DI, BT_LONGLONGCONSTPTR, BT_USHORT) ++DEF_OV_TYPE (BT_OV_V2DI_LONGLONG_INT, BT_V2DI, BT_LONGLONG, BT_INT) ++DEF_OV_TYPE (BT_OV_V2DI_LONGLONG_LONGLONG, BT_V2DI, BT_LONGLONG, BT_LONGLONG) ++DEF_OV_TYPE (BT_OV_V2DI_LONGLONG_V2DI_INT, BT_V2DI, BT_LONGLONG, BT_V2DI, BT_INT) ++DEF_OV_TYPE (BT_OV_V2DI_LONG_LONGLONGPTR, BT_V2DI, BT_LONG, BT_LONGLONGPTR) ++DEF_OV_TYPE (BT_OV_V2DI_V16QI, BT_V2DI, BT_V16QI) ++DEF_OV_TYPE (BT_OV_V2DI_V2DI, BT_V2DI, BT_V2DI) ++DEF_OV_TYPE (BT_OV_V2DI_V2DI_BV2DI, BT_V2DI, BT_V2DI, BT_BV2DI) ++DEF_OV_TYPE (BT_OV_V2DI_V2DI_UCHAR, BT_V2DI, BT_V2DI, BT_UCHAR) ++DEF_OV_TYPE (BT_OV_V2DI_V2DI_ULONG, BT_V2DI, BT_V2DI, BT_ULONG) ++DEF_OV_TYPE (BT_OV_V2DI_V2DI_UV16QI, BT_V2DI, BT_V2DI, BT_UV16QI) ++DEF_OV_TYPE (BT_OV_V2DI_V2DI_UV2DI, BT_V2DI, BT_V2DI, BT_UV2DI) ++DEF_OV_TYPE (BT_OV_V2DI_V2DI_UV2DI_LONGLONGCONSTPTR_UCHAR, BT_V2DI, BT_V2DI, BT_UV2DI, BT_LONGLONGCONSTPTR, BT_UCHAR) ++DEF_OV_TYPE (BT_OV_V2DI_V2DI_UV2DI_UCHAR, BT_V2DI, BT_V2DI, BT_UV2DI, BT_UCHAR) ++DEF_OV_TYPE (BT_OV_V2DI_V2DI_UV4SI, BT_V2DI, BT_V2DI, BT_UV4SI) ++DEF_OV_TYPE (BT_OV_V2DI_V2DI_UV8HI, BT_V2DI, BT_V2DI, BT_UV8HI) ++DEF_OV_TYPE (BT_OV_V2DI_V2DI_V2DI, BT_V2DI, BT_V2DI, BT_V2DI) ++DEF_OV_TYPE (BT_OV_V2DI_V2DI_V2DI_BV2DI, BT_V2DI, BT_V2DI, BT_V2DI, BT_BV2DI) ++DEF_OV_TYPE (BT_OV_V2DI_V2DI_V2DI_INT, BT_V2DI, BT_V2DI, BT_V2DI, BT_INT) ++DEF_OV_TYPE (BT_OV_V2DI_V2DI_V2DI_ULONGLONG, BT_V2DI, BT_V2DI, BT_V2DI, BT_ULONGLONG) ++DEF_OV_TYPE (BT_OV_V2DI_V2DI_V2DI_UV16QI, BT_V2DI, BT_V2DI, BT_V2DI, BT_UV16QI) ++DEF_OV_TYPE (BT_OV_V2DI_V2DI_V2DI_UV2DI, BT_V2DI, BT_V2DI, BT_V2DI, BT_UV2DI) ++DEF_OV_TYPE (BT_OV_V2DI_V4SI, BT_V2DI, BT_V4SI) ++DEF_OV_TYPE (BT_OV_V2DI_V4SI_V4SI, BT_V2DI, BT_V4SI, BT_V4SI) ++DEF_OV_TYPE (BT_OV_V2DI_V4SI_V4SI_V2DI, BT_V2DI, BT_V4SI, BT_V4SI, BT_V2DI) ++DEF_OV_TYPE (BT_OV_V2DI_V8HI, BT_V2DI, BT_V8HI) ++DEF_OV_TYPE (BT_OV_V4SI_BV4SI_V4SI, BT_V4SI, BT_BV4SI, BT_V4SI) ++DEF_OV_TYPE (BT_OV_V4SI_INT, BT_V4SI, BT_INT) ++DEF_OV_TYPE (BT_OV_V4SI_INTCONSTPTR, BT_V4SI, BT_INTCONSTPTR) ++DEF_OV_TYPE (BT_OV_V4SI_INTCONSTPTR_UINT, BT_V4SI, BT_INTCONSTPTR, BT_UINT) ++DEF_OV_TYPE (BT_OV_V4SI_INTCONSTPTR_USHORT, BT_V4SI, BT_INTCONSTPTR, BT_USHORT) ++DEF_OV_TYPE (BT_OV_V4SI_INT_INT, BT_V4SI, BT_INT, BT_INT) ++DEF_OV_TYPE (BT_OV_V4SI_INT_V4SI_INT, BT_V4SI, BT_INT, BT_V4SI, BT_INT) ++DEF_OV_TYPE (BT_OV_V4SI_LONG_INTPTR, BT_V4SI, BT_LONG, BT_INTPTR) ++DEF_OV_TYPE (BT_OV_V4SI_UV4SI_V4SI_V4SI, BT_V4SI, BT_UV4SI, BT_V4SI, BT_V4SI) ++DEF_OV_TYPE (BT_OV_V4SI_V2DI_V2DI, BT_V4SI, BT_V2DI, BT_V2DI) ++DEF_OV_TYPE (BT_OV_V4SI_V2DI_V2DI_INTPTR, BT_V4SI, BT_V2DI, BT_V2DI, BT_INTPTR) ++DEF_OV_TYPE (BT_OV_V4SI_V4SI, BT_V4SI, BT_V4SI) ++DEF_OV_TYPE (BT_OV_V4SI_V4SI_BV4SI, BT_V4SI, BT_V4SI, BT_BV4SI) ++DEF_OV_TYPE (BT_OV_V4SI_V4SI_INTPTR, BT_V4SI, BT_V4SI, BT_INTPTR) ++DEF_OV_TYPE (BT_OV_V4SI_V4SI_UCHAR, BT_V4SI, BT_V4SI, BT_UCHAR) ++DEF_OV_TYPE (BT_OV_V4SI_V4SI_ULONG, BT_V4SI, BT_V4SI, BT_ULONG) ++DEF_OV_TYPE (BT_OV_V4SI_V4SI_UV16QI, BT_V4SI, BT_V4SI, BT_UV16QI) ++DEF_OV_TYPE (BT_OV_V4SI_V4SI_UV4SI, BT_V4SI, BT_V4SI, BT_UV4SI) ++DEF_OV_TYPE (BT_OV_V4SI_V4SI_UV4SI_INTCONSTPTR_UCHAR, BT_V4SI, BT_V4SI, BT_UV4SI, BT_INTCONSTPTR, BT_UCHAR) ++DEF_OV_TYPE (BT_OV_V4SI_V4SI_UV4SI_UCHAR, BT_V4SI, BT_V4SI, BT_UV4SI, BT_UCHAR) ++DEF_OV_TYPE (BT_OV_V4SI_V4SI_UV4SI_UV4SI, BT_V4SI, BT_V4SI, BT_UV4SI, BT_UV4SI) ++DEF_OV_TYPE (BT_OV_V4SI_V4SI_UV8HI, BT_V4SI, BT_V4SI, BT_UV8HI) ++DEF_OV_TYPE (BT_OV_V4SI_V4SI_V4SI, BT_V4SI, BT_V4SI, BT_V4SI) ++DEF_OV_TYPE (BT_OV_V4SI_V4SI_V4SI_BV4SI, BT_V4SI, BT_V4SI, BT_V4SI, BT_BV4SI) ++DEF_OV_TYPE (BT_OV_V4SI_V4SI_V4SI_INT, BT_V4SI, BT_V4SI, BT_V4SI, BT_INT) ++DEF_OV_TYPE (BT_OV_V4SI_V4SI_V4SI_INTPTR, BT_V4SI, BT_V4SI, BT_V4SI, BT_INTPTR) ++DEF_OV_TYPE (BT_OV_V4SI_V4SI_V4SI_ULONGLONG, BT_V4SI, BT_V4SI, BT_V4SI, BT_ULONGLONG) ++DEF_OV_TYPE (BT_OV_V4SI_V4SI_V4SI_UV16QI, BT_V4SI, BT_V4SI, BT_V4SI, BT_UV16QI) ++DEF_OV_TYPE (BT_OV_V4SI_V4SI_V4SI_UV4SI, BT_V4SI, BT_V4SI, BT_V4SI, BT_UV4SI) ++DEF_OV_TYPE (BT_OV_V4SI_V4SI_V4SI_V4SI, BT_V4SI, BT_V4SI, BT_V4SI, BT_V4SI) ++DEF_OV_TYPE (BT_OV_V4SI_V8HI, BT_V4SI, BT_V8HI) ++DEF_OV_TYPE (BT_OV_V4SI_V8HI_V8HI, BT_V4SI, BT_V8HI, BT_V8HI) ++DEF_OV_TYPE (BT_OV_V4SI_V8HI_V8HI_V4SI, BT_V4SI, BT_V8HI, BT_V8HI, BT_V4SI) ++DEF_OV_TYPE (BT_OV_V8HI_BV8HI_V8HI, BT_V8HI, BT_BV8HI, BT_V8HI) ++DEF_OV_TYPE (BT_OV_V8HI_LONG_SHORTPTR, BT_V8HI, BT_LONG, BT_SHORTPTR) ++DEF_OV_TYPE (BT_OV_V8HI_SHORT, BT_V8HI, BT_SHORT) ++DEF_OV_TYPE (BT_OV_V8HI_SHORTCONSTPTR, BT_V8HI, BT_SHORTCONSTPTR) ++DEF_OV_TYPE (BT_OV_V8HI_SHORTCONSTPTR_UINT, BT_V8HI, BT_SHORTCONSTPTR, BT_UINT) ++DEF_OV_TYPE (BT_OV_V8HI_SHORTCONSTPTR_USHORT, BT_V8HI, BT_SHORTCONSTPTR, BT_USHORT) ++DEF_OV_TYPE (BT_OV_V8HI_SHORT_INT, BT_V8HI, BT_SHORT, BT_INT) ++DEF_OV_TYPE (BT_OV_V8HI_SHORT_V8HI_INT, BT_V8HI, BT_SHORT, BT_V8HI, BT_INT) ++DEF_OV_TYPE (BT_OV_V8HI_UV8HI_V8HI_V8HI, BT_V8HI, BT_UV8HI, BT_V8HI, BT_V8HI) ++DEF_OV_TYPE (BT_OV_V8HI_V16QI, BT_V8HI, BT_V16QI) ++DEF_OV_TYPE (BT_OV_V8HI_V16QI_V16QI, BT_V8HI, BT_V16QI, BT_V16QI) ++DEF_OV_TYPE (BT_OV_V8HI_V16QI_V16QI_V8HI, BT_V8HI, BT_V16QI, BT_V16QI, BT_V8HI) ++DEF_OV_TYPE (BT_OV_V8HI_V4SI_V4SI, BT_V8HI, BT_V4SI, BT_V4SI) ++DEF_OV_TYPE (BT_OV_V8HI_V4SI_V4SI_INTPTR, BT_V8HI, BT_V4SI, BT_V4SI, BT_INTPTR) ++DEF_OV_TYPE (BT_OV_V8HI_V8HI, BT_V8HI, BT_V8HI) ++DEF_OV_TYPE (BT_OV_V8HI_V8HI_BV8HI, BT_V8HI, BT_V8HI, BT_BV8HI) ++DEF_OV_TYPE (BT_OV_V8HI_V8HI_INTPTR, BT_V8HI, BT_V8HI, BT_INTPTR) ++DEF_OV_TYPE (BT_OV_V8HI_V8HI_UCHAR, BT_V8HI, BT_V8HI, BT_UCHAR) ++DEF_OV_TYPE (BT_OV_V8HI_V8HI_ULONG, BT_V8HI, BT_V8HI, BT_ULONG) ++DEF_OV_TYPE (BT_OV_V8HI_V8HI_UV16QI, BT_V8HI, BT_V8HI, BT_UV16QI) ++DEF_OV_TYPE (BT_OV_V8HI_V8HI_UV4SI, BT_V8HI, BT_V8HI, BT_UV4SI) ++DEF_OV_TYPE (BT_OV_V8HI_V8HI_UV8HI, BT_V8HI, BT_V8HI, BT_UV8HI) ++DEF_OV_TYPE (BT_OV_V8HI_V8HI_UV8HI_UCHAR, BT_V8HI, BT_V8HI, BT_UV8HI, BT_UCHAR) ++DEF_OV_TYPE (BT_OV_V8HI_V8HI_UV8HI_UV8HI, BT_V8HI, BT_V8HI, BT_UV8HI, BT_UV8HI) ++DEF_OV_TYPE (BT_OV_V8HI_V8HI_V8HI, BT_V8HI, BT_V8HI, BT_V8HI) ++DEF_OV_TYPE (BT_OV_V8HI_V8HI_V8HI_BV8HI, BT_V8HI, BT_V8HI, BT_V8HI, BT_BV8HI) ++DEF_OV_TYPE (BT_OV_V8HI_V8HI_V8HI_INT, BT_V8HI, BT_V8HI, BT_V8HI, BT_INT) ++DEF_OV_TYPE (BT_OV_V8HI_V8HI_V8HI_INTPTR, BT_V8HI, BT_V8HI, BT_V8HI, BT_INTPTR) ++DEF_OV_TYPE (BT_OV_V8HI_V8HI_V8HI_ULONGLONG, BT_V8HI, BT_V8HI, BT_V8HI, BT_ULONGLONG) ++DEF_OV_TYPE (BT_OV_V8HI_V8HI_V8HI_UV16QI, BT_V8HI, BT_V8HI, BT_V8HI, BT_UV16QI) ++DEF_OV_TYPE (BT_OV_V8HI_V8HI_V8HI_UV8HI, BT_V8HI, BT_V8HI, BT_V8HI, BT_UV8HI) ++DEF_OV_TYPE (BT_OV_V8HI_V8HI_V8HI_V8HI, BT_V8HI, BT_V8HI, BT_V8HI, BT_V8HI) ++DEF_OV_TYPE (BT_OV_VOID_BV2DI_UV2DI_ULONGLONGPTR_ULONGLONG, BT_VOID, BT_BV2DI, BT_UV2DI, BT_ULONGLONGPTR, BT_ULONGLONG) ++DEF_OV_TYPE (BT_OV_VOID_BV4SI_UV4SI_UINTPTR_ULONGLONG, BT_VOID, BT_BV4SI, BT_UV4SI, BT_UINTPTR, BT_ULONGLONG) ++DEF_OV_TYPE (BT_OV_VOID_UV16QI_LONG_UCHARPTR, BT_VOID, BT_UV16QI, BT_LONG, BT_UCHARPTR) ++DEF_OV_TYPE (BT_OV_VOID_UV16QI_UCHARPTR_UINT, BT_VOID, BT_UV16QI, BT_UCHARPTR, BT_UINT) ++DEF_OV_TYPE (BT_OV_VOID_UV2DI_LONG_ULONGLONGPTR, BT_VOID, BT_UV2DI, BT_LONG, BT_ULONGLONGPTR) ++DEF_OV_TYPE (BT_OV_VOID_UV2DI_ULONGLONGPTR_UINT, BT_VOID, BT_UV2DI, BT_ULONGLONGPTR, BT_UINT) ++DEF_OV_TYPE (BT_OV_VOID_UV2DI_UV2DI_ULONGLONGPTR_ULONGLONG, BT_VOID, BT_UV2DI, BT_UV2DI, BT_ULONGLONGPTR, BT_ULONGLONG) ++DEF_OV_TYPE (BT_OV_VOID_UV4SI_LONG_UINTPTR, BT_VOID, BT_UV4SI, BT_LONG, BT_UINTPTR) ++DEF_OV_TYPE (BT_OV_VOID_UV4SI_UINTPTR_UINT, BT_VOID, BT_UV4SI, BT_UINTPTR, BT_UINT) ++DEF_OV_TYPE (BT_OV_VOID_UV4SI_UV4SI_UINTPTR_ULONGLONG, BT_VOID, BT_UV4SI, BT_UV4SI, BT_UINTPTR, BT_ULONGLONG) ++DEF_OV_TYPE (BT_OV_VOID_UV8HI_LONG_USHORTPTR, BT_VOID, BT_UV8HI, BT_LONG, BT_USHORTPTR) ++DEF_OV_TYPE (BT_OV_VOID_UV8HI_USHORTPTR_UINT, BT_VOID, BT_UV8HI, BT_USHORTPTR, BT_UINT) ++DEF_OV_TYPE (BT_OV_VOID_V16QI_LONG_SCHARPTR, BT_VOID, BT_V16QI, BT_LONG, BT_SCHARPTR) ++DEF_OV_TYPE (BT_OV_VOID_V16QI_SCHARPTR_UINT, BT_VOID, BT_V16QI, BT_SCHARPTR, BT_UINT) ++DEF_OV_TYPE (BT_OV_VOID_V2DF_DBLPTR_UINT, BT_VOID, BT_V2DF, BT_DBLPTR, BT_UINT) ++DEF_OV_TYPE (BT_OV_VOID_V2DF_LONG_DBLPTR, BT_VOID, BT_V2DF, BT_LONG, BT_DBLPTR) ++DEF_OV_TYPE (BT_OV_VOID_V2DF_UV2DI_DBLPTR_ULONGLONG, BT_VOID, BT_V2DF, BT_UV2DI, BT_DBLPTR, BT_ULONGLONG) ++DEF_OV_TYPE (BT_OV_VOID_V2DI_LONGLONGPTR_UINT, BT_VOID, BT_V2DI, BT_LONGLONGPTR, BT_UINT) ++DEF_OV_TYPE (BT_OV_VOID_V2DI_LONG_LONGLONGPTR, BT_VOID, BT_V2DI, BT_LONG, BT_LONGLONGPTR) ++DEF_OV_TYPE (BT_OV_VOID_V2DI_UV2DI_LONGLONGPTR_ULONGLONG, BT_VOID, BT_V2DI, BT_UV2DI, BT_LONGLONGPTR, BT_ULONGLONG) ++DEF_OV_TYPE (BT_OV_VOID_V4SI_INTPTR_UINT, BT_VOID, BT_V4SI, BT_INTPTR, BT_UINT) ++DEF_OV_TYPE (BT_OV_VOID_V4SI_LONG_INTPTR, BT_VOID, BT_V4SI, BT_LONG, BT_INTPTR) ++DEF_OV_TYPE (BT_OV_VOID_V4SI_UV4SI_INTPTR_ULONGLONG, BT_VOID, BT_V4SI, BT_UV4SI, BT_INTPTR, BT_ULONGLONG) ++DEF_OV_TYPE (BT_OV_VOID_V8HI_LONG_SHORTPTR, BT_VOID, BT_V8HI, BT_LONG, BT_SHORTPTR) ++DEF_OV_TYPE (BT_OV_VOID_V8HI_SHORTPTR_UINT, BT_VOID, BT_V8HI, BT_SHORTPTR, BT_UINT) +--- gcc/config/s390/s390.c 2015-06-18 16:33:04.000000000 +0200 ++++ gcc/config/s390/s390.c 2016-05-11 19:11:44.333028400 +0200 +@@ -52,6 +52,10 @@ along with GCC; see the file COPYING3. + #include "params.h" + #include "cfgloop.h" + #include "opts.h" ++#include "intl.h" ++#include "plugin-api.h" ++#include "cgraph.h" ++#include "tm-constrs.h" + + /* Define the specific costs for a given cpu. */ + +@@ -288,6 +292,19 @@ extern int reload_completed; + + /* Kept up to date using the SCHED_VARIABLE_ISSUE hook. */ + static rtx last_scheduled_insn; ++#define MAX_SCHED_UNITS 3 ++static int last_scheduled_unit_distance[MAX_SCHED_UNITS]; ++ ++/* The maximum score added for an instruction whose unit hasn't been ++ in use for MAX_SCHED_MIX_DISTANCE steps. Increase this value to ++ give instruction mix scheduling more priority over instruction ++ grouping. */ ++#define MAX_SCHED_MIX_SCORE 8 ++ ++/* The maximum distance up to which individual scores will be ++ calculated. Everything beyond this gives MAX_SCHED_MIX_SCORE. ++ Increase this with the OOO windows size of the machine. */ ++#define MAX_SCHED_MIX_DISTANCE 100 + + /* Structure used to hold the components of a S/390 memory + address. A legitimate address on S/390 is of the general +@@ -387,6 +404,7 @@ struct GTY(()) machine_function + /* Number of GPRs and FPRs used for argument passing. */ + #define GP_ARG_NUM_REG 5 + #define FP_ARG_NUM_REG (TARGET_64BIT? 4 : 2) ++#define VEC_ARG_NUM_REG 8 + + /* A couple of shortcuts. */ + #define CONST_OK_FOR_J(x) \ +@@ -407,6 +425,539 @@ struct GTY(()) machine_function + bytes on a z10 (or higher) CPU. */ + #define PREDICT_DISTANCE (TARGET_Z10 ? 384 : 2048) + ++ ++/* Indicate which ABI has been used for passing vector args. ++ 0 - no vector type arguments have been passed where the ABI is relevant ++ 1 - the old ABI has been used ++ 2 - a vector type argument has been passed either in a vector register ++ or on the stack by value */ ++static int s390_vector_abi = 0; ++ ++/* Set the vector ABI marker if TYPE is subject to the vector ABI ++ switch. The vector ABI affects only vector data types. There are ++ two aspects of the vector ABI relevant here: ++ ++ 1. vectors >= 16 bytes have an alignment of 8 bytes with the new ++ ABI and natural alignment with the old. ++ ++ 2. vector <= 16 bytes are passed in VRs or by value on the stack ++ with the new ABI but by reference on the stack with the old. ++ ++ If ARG_P is true TYPE is used for a function argument or return ++ value. The ABI marker then is set for all vector data types. If ++ ARG_P is false only type 1 vectors are being checked. */ ++ ++static void ++s390_check_type_for_vector_abi (const_tree type, bool arg_p, bool in_struct_p) ++{ ++ static htab_t visited_types_hash ++ = htab_create (37, htab_hash_pointer, htab_eq_pointer, free); ++ void **slot; ++ ++ if (s390_vector_abi) ++ return; ++ ++ if (type == NULL_TREE || TREE_CODE (type) == ERROR_MARK) ++ return; ++ ++ slot = htab_find_slot (visited_types_hash, type, INSERT); ++ if (*slot) ++ return; ++ ++ *slot = CONST_CAST_TREE (type); ++ ++ if (TREE_CODE (type) == VECTOR_TYPE) ++ { ++ int type_size = int_size_in_bytes (type); ++ ++ /* Outside arguments only the alignment is changing and this ++ only happens for vector types >= 16 bytes. */ ++ if (!arg_p && type_size < 16) ++ return; ++ ++ /* In arguments vector types > 16 are passed as before (GCC ++ never enforced the bigger alignment for arguments which was ++ required by the old vector ABI). However, it might still be ++ ABI relevant due to the changed alignment if it is a struct ++ member. */ ++ if (arg_p && type_size > 16 && !in_struct_p) ++ return; ++ ++ s390_vector_abi = TARGET_VX_ABI ? 2 : 1; ++ } ++ else if (POINTER_TYPE_P (type) || TREE_CODE (type) == ARRAY_TYPE) ++ { ++ /* ARRAY_TYPE: Since with neither of the ABIs we have more than ++ natural alignment there will never be ABI dependent padding ++ in an array type. That's why we do not set in_struct_p to ++ true here. */ ++ s390_check_type_for_vector_abi (TREE_TYPE (type), arg_p, in_struct_p); ++ } ++ else if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE) ++ { ++ tree arg_chain; ++ ++ /* Check the return type. */ ++ s390_check_type_for_vector_abi (TREE_TYPE (type), true, false); ++ ++ for (arg_chain = TYPE_ARG_TYPES (type); ++ arg_chain; ++ arg_chain = TREE_CHAIN (arg_chain)) ++ s390_check_type_for_vector_abi (TREE_VALUE (arg_chain), true, false); ++ } ++ else if (RECORD_OR_UNION_TYPE_P (type)) ++ { ++ tree field; ++ ++ for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) ++ { ++ if (TREE_CODE (field) != FIELD_DECL) ++ continue; ++ ++ s390_check_type_for_vector_abi (TREE_TYPE (field), arg_p, true); ++ } ++ } ++} ++ ++ ++/* System z builtins. */ ++ ++#include "s390-builtins.h" ++ ++const unsigned int bflags_builtin[S390_BUILTIN_MAX + 1] = ++ { ++#undef B_DEF ++#undef OB_DEF ++#undef OB_DEF_VAR ++#define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, ...) BFLAGS, ++#define OB_DEF(...) ++#define OB_DEF_VAR(...) ++#include "s390-builtins.def" ++ 0 ++ }; ++ ++const unsigned int opflags_builtin[S390_BUILTIN_MAX + 1] = ++ { ++#undef B_DEF ++#undef OB_DEF ++#undef OB_DEF_VAR ++#define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, ...) OPFLAGS, ++#define OB_DEF(...) ++#define OB_DEF_VAR(...) ++#include "s390-builtins.def" ++ 0 ++ }; ++ ++const unsigned int bflags_overloaded_builtin[S390_OVERLOADED_BUILTIN_MAX + 1] = ++ { ++#undef B_DEF ++#undef OB_DEF ++#undef OB_DEF_VAR ++#define B_DEF(...) ++#define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, ...) BFLAGS, ++#define OB_DEF_VAR(...) ++#include "s390-builtins.def" ++ 0 ++ }; ++ ++const unsigned int ++opflags_overloaded_builtin_var[S390_OVERLOADED_BUILTIN_VAR_MAX + 1] = ++ { ++#undef B_DEF ++#undef OB_DEF ++#undef OB_DEF_VAR ++#define B_DEF(...) ++#define OB_DEF(...) ++#define OB_DEF_VAR(NAME, PATTERN, FLAGS, FNTYPE) FLAGS, ++#include "s390-builtins.def" ++ 0 ++ }; ++ ++tree s390_builtin_types[BT_MAX]; ++tree s390_builtin_fn_types[BT_FN_MAX]; ++tree s390_builtin_decls[S390_BUILTIN_MAX + ++ S390_OVERLOADED_BUILTIN_MAX + ++ S390_OVERLOADED_BUILTIN_VAR_MAX]; ++ ++static enum insn_code const code_for_builtin[S390_BUILTIN_MAX + 1] = { ++#undef B_DEF ++#undef OB_DEF ++#undef OB_DEF_VAR ++#define B_DEF(NAME, PATTERN, ...) CODE_FOR_##PATTERN, ++#define OB_DEF(...) ++#define OB_DEF_VAR(...) ++ ++#include "s390-builtins.def" ++ CODE_FOR_nothing ++}; ++ ++static void ++s390_init_builtins (void) ++{ ++ /* These definitions are being used in s390-builtins.def. */ ++ tree returns_twice_attr = tree_cons (get_identifier ("returns_twice"), ++ NULL, NULL); ++ tree noreturn_attr = tree_cons (get_identifier ("noreturn"), NULL, NULL); ++ tree c_uint64_type_node; ++ unsigned int bflags_mask = (BFLAGS_MASK_INIT); ++ ++ bflags_mask |= (TARGET_VX) ? B_VX : 0; ++ bflags_mask |= (TARGET_HTM) ? B_HTM : 0; ++ ++ /* The uint64_type_node from tree.c is not compatible to the C99 ++ uint64_t data type. What we want is c_uint64_type_node from ++ c-common.c. But since backend code is not supposed to interface ++ with the frontend we recreate it here. */ ++ if (TARGET_64BIT) ++ c_uint64_type_node = long_unsigned_type_node; ++ else ++ c_uint64_type_node = long_long_unsigned_type_node; ++ ++#undef DEF_TYPE ++#define DEF_TYPE(INDEX, BFLAGS, NODE, CONST_P) \ ++ if ((BFLAGS) == 0 || ((BFLAGS) & bflags_mask)) \ ++ s390_builtin_types[INDEX] = (!CONST_P) ? \ ++ (NODE) : build_type_variant ((NODE), 1, 0); ++ ++#undef DEF_POINTER_TYPE ++#define DEF_POINTER_TYPE(INDEX, BFLAGS, INDEX_BASE) \ ++ if ((BFLAGS) == 0 || ((BFLAGS) & bflags_mask)) \ ++ s390_builtin_types[INDEX] = \ ++ build_pointer_type (s390_builtin_types[INDEX_BASE]); ++ ++#undef DEF_DISTINCT_TYPE ++#define DEF_DISTINCT_TYPE(INDEX, BFLAGS, INDEX_BASE) \ ++ if ((BFLAGS) == 0 || ((BFLAGS) & bflags_mask)) \ ++ s390_builtin_types[INDEX] = \ ++ build_distinct_type_copy (s390_builtin_types[INDEX_BASE]); ++ ++#undef DEF_VECTOR_TYPE ++#define DEF_VECTOR_TYPE(INDEX, BFLAGS, INDEX_BASE, ELEMENTS) \ ++ if ((BFLAGS) == 0 || ((BFLAGS) & bflags_mask)) \ ++ s390_builtin_types[INDEX] = \ ++ build_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS); ++ ++#undef DEF_OPAQUE_VECTOR_TYPE ++#define DEF_OPAQUE_VECTOR_TYPE(INDEX, BFLAGS, INDEX_BASE, ELEMENTS) \ ++ if ((BFLAGS) == 0 || ((BFLAGS) & bflags_mask)) \ ++ s390_builtin_types[INDEX] = \ ++ build_opaque_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS); ++ ++#undef DEF_FN_TYPE ++#define DEF_FN_TYPE(INDEX, BFLAGS, args...) \ ++ if ((BFLAGS) == 0 || ((BFLAGS) & bflags_mask)) \ ++ s390_builtin_fn_types[INDEX] = \ ++ build_function_type_list (args, NULL_TREE); ++#undef DEF_OV_TYPE ++#define DEF_OV_TYPE(...) ++#include "s390-builtin-types.def" ++ ++#undef B_DEF ++#define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, FNTYPE) \ ++ if (((BFLAGS) & ~bflags_mask) == 0) \ ++ s390_builtin_decls[S390_BUILTIN_##NAME] = \ ++ add_builtin_function ("__builtin_" #NAME, \ ++ s390_builtin_fn_types[FNTYPE], \ ++ S390_BUILTIN_##NAME, \ ++ BUILT_IN_MD, \ ++ NULL, \ ++ ATTRS); ++#undef OB_DEF ++#define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, FNTYPE) \ ++ if (((BFLAGS) & ~bflags_mask) == 0) \ ++ s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] = \ ++ add_builtin_function ("__builtin_" #NAME, \ ++ s390_builtin_fn_types[FNTYPE], \ ++ S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX, \ ++ BUILT_IN_MD, \ ++ NULL, \ ++ 0); ++#undef OB_DEF_VAR ++#define OB_DEF_VAR(...) ++#include "s390-builtins.def" ++ ++} ++ ++/* Return true if ARG is appropriate as argument number ARGNUM of ++ builtin DECL. The operand flags from s390-builtins.def have to ++ passed as OP_FLAGS. */ ++bool ++s390_const_operand_ok (tree arg, int argnum, int op_flags, tree decl) ++{ ++ if (O_UIMM_P (op_flags)) ++ { ++ int bitwidths[] = { 1, 2, 3, 4, 5, 8, 12, 16, 32 }; ++ int bitwidth = bitwidths[op_flags - O_U1]; ++ ++ if (!host_integerp (arg, 1) ++ || ((unsigned HOST_WIDE_INT) tree_low_cst (arg, 1) ++ > ((unsigned HOST_WIDE_INT)1 << bitwidth) - 1)) ++ { ++ error("constant argument %d for builtin %qF is out of range (0.." ++ HOST_WIDE_INT_PRINT_UNSIGNED ")", ++ argnum, decl, ++ ((unsigned HOST_WIDE_INT)1 << bitwidth) - 1); ++ return false; ++ } ++ } ++ ++ if (O_SIMM_P (op_flags)) ++ { ++ int bitwidths[] = { 2, 3, 4, 5, 8, 12, 16, 32 }; ++ int bitwidth = bitwidths[op_flags - O_S2]; ++ ++ if (!host_integerp (arg, 0) ++ || tree_low_cst (arg, 0) < -((HOST_WIDE_INT)1 << (bitwidth - 1)) ++ || (tree_low_cst (arg, 0) ++ > (((HOST_WIDE_INT)1 << (bitwidth - 1)) - 1))) ++ { ++ error("constant argument %d for builtin %qF is out of range (" ++ HOST_WIDE_INT_PRINT_DEC ".." ++ HOST_WIDE_INT_PRINT_DEC ")", ++ argnum, decl, ++ -((HOST_WIDE_INT)1 << (bitwidth - 1)), ++ ((HOST_WIDE_INT)1 << (bitwidth - 1)) - 1); ++ return false; ++ } ++ } ++ return true; ++} ++ ++/* Expand an expression EXP that calls a built-in function, ++ with result going to TARGET if that's convenient ++ (and in mode MODE if that's convenient). ++ SUBTARGET may be used as the target for computing one of EXP's operands. ++ IGNORE is nonzero if the value is to be ignored. */ ++ ++static rtx ++s390_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, ++ enum machine_mode mode ATTRIBUTE_UNUSED, ++ int ignore ATTRIBUTE_UNUSED) ++{ ++#define MAX_ARGS 5 ++ ++ tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); ++ unsigned int fcode = DECL_FUNCTION_CODE (fndecl); ++ enum insn_code icode; ++ rtx op[MAX_ARGS], pat; ++ int arity; ++ bool nonvoid; ++ tree arg; ++ call_expr_arg_iterator iter; ++ unsigned int all_op_flags = opflags_for_builtin (fcode); ++ enum machine_mode last_vec_mode = VOIDmode; ++ ++ if (TARGET_DEBUG_ARG) ++ { ++ fprintf (stderr, ++ "s390_expand_builtin, code = %4d, %s\n", ++ (int)fcode, IDENTIFIER_POINTER (DECL_NAME (fndecl))); ++ } ++ ++ if (fcode >= S390_OVERLOADED_BUILTIN_VAR_OFFSET ++ && fcode < S390_ALL_BUILTIN_MAX) ++ { ++ gcc_unreachable (); ++ } ++ else if (fcode < S390_OVERLOADED_BUILTIN_OFFSET) ++ { ++ icode = code_for_builtin[fcode]; ++ /* Set a flag in the machine specific cfun part in order to support ++ saving/restoring of FPRs. */ ++ if (fcode == S390_BUILTIN_tbegin || fcode == S390_BUILTIN_tbegin_retry) ++ cfun->machine->tbegin_p = true; ++ } ++ else if (fcode < S390_OVERLOADED_BUILTIN_VAR_OFFSET) ++ { ++ error ("Unresolved overloaded builtin"); ++ return const0_rtx; ++ } ++ else ++ internal_error ("bad builtin fcode"); ++ ++ if (icode == 0) ++ internal_error ("bad builtin icode"); ++ ++ nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node; ++ ++ if (nonvoid) ++ { ++ enum machine_mode tmode = insn_data[icode].operand[0].mode; ++ if (!target ++ || GET_MODE (target) != tmode ++ || !(*insn_data[icode].operand[0].predicate) (target, tmode)) ++ target = gen_reg_rtx (tmode); ++ ++ /* There are builtins (e.g. vec_promote) with no vector ++ arguments but an element selector. So we have to also look ++ at the vector return type when emitting the modulo ++ operation. */ ++ if (VECTOR_MODE_P (insn_data[icode].operand[0].mode)) ++ last_vec_mode = insn_data[icode].operand[0].mode; ++ } ++ ++ arity = 0; ++ FOR_EACH_CALL_EXPR_ARG (arg, iter, exp) ++ { ++ const struct insn_operand_data *insn_op; ++ unsigned int op_flags = all_op_flags & ((1 << O_SHIFT) - 1); ++ ++ all_op_flags = all_op_flags >> O_SHIFT; ++ ++ if (arg == error_mark_node) ++ return NULL_RTX; ++ if (arity >= MAX_ARGS) ++ return NULL_RTX; ++ ++ if (O_IMM_P (op_flags) ++ && TREE_CODE (arg) != INTEGER_CST) ++ { ++ error ("constant value required for builtin %qF argument %d", ++ fndecl, arity + 1); ++ return const0_rtx; ++ } ++ ++ if (!s390_const_operand_ok (arg, arity + 1, op_flags, fndecl)) ++ return const0_rtx; ++ ++ insn_op = &insn_data[icode].operand[arity + nonvoid]; ++ op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL); ++ ++ /* expand_expr truncates constants to the target mode only if it ++ is "convenient". However, our checks below rely on this ++ being done. */ ++ if (CONST_INT_P (op[arity]) ++ && SCALAR_INT_MODE_P (insn_op->mode) ++ && GET_MODE (op[arity]) != insn_op->mode) ++ op[arity] = GEN_INT (trunc_int_for_mode (INTVAL (op[arity]), ++ insn_op->mode)); ++ ++ /* Wrap the expanded RTX for pointer types into a MEM expr with ++ the proper mode. This allows us to use e.g. (match_operand ++ "memory_operand"..) in the insn patterns instead of (mem ++ (match_operand "address_operand)). This is helpful for ++ patterns not just accepting MEMs. */ ++ if (POINTER_TYPE_P (TREE_TYPE (arg)) ++ && insn_op->predicate != address_operand) ++ op[arity] = gen_rtx_MEM (insn_op->mode, op[arity]); ++ ++ /* Expand the module operation required on element selectors. */ ++ if (op_flags == O_ELEM) ++ { ++ gcc_assert (last_vec_mode != VOIDmode); ++ op[arity] = simplify_expand_binop (SImode, code_to_optab (AND), ++ op[arity], ++ GEN_INT (GET_MODE_NUNITS (last_vec_mode) - 1), ++ NULL_RTX, 1, OPTAB_DIRECT); ++ } ++ ++ /* Record the vector mode used for an element selector. This assumes: ++ 1. There is no builtin with two different vector modes and an element selector ++ 2. The element selector comes after the vector type it is referring to. ++ This currently the true for all the builtins but FIXME we ++ should better check for that. */ ++ if (VECTOR_MODE_P (insn_op->mode)) ++ last_vec_mode = insn_op->mode; ++ ++ if (insn_op->predicate (op[arity], insn_op->mode)) ++ { ++ arity++; ++ continue; ++ } ++ ++ if (MEM_P (op[arity]) ++ && insn_op->predicate == memory_operand ++ && (GET_MODE (XEXP (op[arity], 0)) == Pmode ++ || GET_MODE (XEXP (op[arity], 0)) == VOIDmode)) ++ { ++ op[arity] = replace_equiv_address (op[arity], ++ copy_to_mode_reg (Pmode, ++ XEXP (op[arity], 0))); ++ } ++ else if (GET_MODE (op[arity]) == insn_op->mode ++ || GET_MODE (op[arity]) == VOIDmode ++ || (insn_op->predicate == address_operand ++ && GET_MODE (op[arity]) == Pmode)) ++ { ++ /* An address_operand usually has VOIDmode in the expander ++ so we cannot use this. */ ++ enum machine_mode target_mode = ++ (insn_op->predicate == address_operand ++ ? Pmode : insn_op->mode); ++ op[arity] = copy_to_mode_reg (target_mode, op[arity]); ++ } ++ ++ if (!insn_op->predicate (op[arity], insn_op->mode)) ++ { ++ error ("Invalid argument %d for builtin %qF", arity + 1, fndecl); ++ return const0_rtx; ++ } ++ arity++; ++ } ++ ++ if (last_vec_mode != VOIDmode && !TARGET_VX) ++ { ++ error ("Vector type builtin %qF is not supported without -mvx " ++ "(default with -march=z13).", ++ fndecl); ++ return const0_rtx; ++ } ++ ++ switch (arity) ++ { ++ case 0: ++ pat = GEN_FCN (icode) (target); ++ break; ++ case 1: ++ if (nonvoid) ++ pat = GEN_FCN (icode) (target, op[0]); ++ else ++ pat = GEN_FCN (icode) (op[0]); ++ break; ++ case 2: ++ if (nonvoid) ++ pat = GEN_FCN (icode) (target, op[0], op[1]); ++ else ++ pat = GEN_FCN (icode) (op[0], op[1]); ++ break; ++ case 3: ++ if (nonvoid) ++ pat = GEN_FCN (icode) (target, op[0], op[1], op[2]); ++ else ++ pat = GEN_FCN (icode) (op[0], op[1], op[2]); ++ break; ++ case 4: ++ if (nonvoid) ++ pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]); ++ else ++ pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]); ++ break; ++ case 5: ++ if (nonvoid) ++ pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]); ++ else ++ pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]); ++ break; ++ case 6: ++ if (nonvoid) ++ pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4], op[5]); ++ else ++ pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5]); ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ if (!pat) ++ return NULL_RTX; ++ emit_insn (pat); ++ ++ if (nonvoid) ++ return target; ++ else ++ return const0_rtx; ++} ++ ++ + static const int s390_hotpatch_hw_max = 1000000; + static int s390_hotpatch_hw_before_label = 0; + static int s390_hotpatch_hw_after_label = 0; +@@ -458,9 +1009,43 @@ s390_handle_hotpatch_attribute (tree *no + return NULL_TREE; + } + ++/* Expand the s390_vector_bool type attribute. */ ++ ++static tree ++s390_handle_vectorbool_attribute (tree *node, tree name ATTRIBUTE_UNUSED, ++ tree args ATTRIBUTE_UNUSED, ++ int flags ATTRIBUTE_UNUSED, bool *no_add_attrs) ++{ ++ tree type = *node, result = NULL_TREE; ++ enum machine_mode mode; ++ ++ while (POINTER_TYPE_P (type) ++ || TREE_CODE (type) == FUNCTION_TYPE ++ || TREE_CODE (type) == METHOD_TYPE ++ || TREE_CODE (type) == ARRAY_TYPE) ++ type = TREE_TYPE (type); ++ ++ mode = TYPE_MODE (type); ++ switch (mode) ++ { ++ case DImode: case V2DImode: result = s390_builtin_types[BT_BV2DI]; break; ++ case SImode: case V4SImode: result = s390_builtin_types[BT_BV4SI]; break; ++ case HImode: case V8HImode: result = s390_builtin_types[BT_BV8HI]; break; ++ case QImode: case V16QImode: result = s390_builtin_types[BT_BV16QI]; ++ default: break; ++ } ++ ++ *no_add_attrs = true; /* No need to hang on to the attribute. */ ++ ++ if (result) ++ *node = lang_hooks.types.reconstruct_complex_type (*node, result); ++ ++ return NULL_TREE; ++} ++ + static const struct attribute_spec s390_attribute_table[] = { +- { "hotpatch", 2, 2, true, false, false, s390_handle_hotpatch_attribute, false +- }, ++ { "hotpatch", 2, 2, true, false, false, s390_handle_hotpatch_attribute, false }, ++ { "s390_vector_bool", 0, 0, false, true, false, s390_handle_vectorbool_attribute, true }, + /* End element. */ + { NULL, 0, 0, false, false, false, NULL, false } + }; +@@ -524,6 +1109,35 @@ s390_scalar_mode_supported_p (enum machi + return default_scalar_mode_supported_p (mode); + } + ++/* Return true if the back end supports vector mode MODE. */ ++static bool ++s390_vector_mode_supported_p (enum machine_mode mode) ++{ ++ enum machine_mode inner; ++ ++ if (!VECTOR_MODE_P (mode) ++ || !TARGET_VX ++ || GET_MODE_SIZE (mode) > 16) ++ return false; ++ ++ inner = GET_MODE_INNER (mode); ++ ++ switch (inner) ++ { ++ case QImode: ++ case HImode: ++ case SImode: ++ case DImode: ++ case TImode: ++ case SFmode: ++ case DFmode: ++ case TFmode: ++ return true; ++ default: ++ return false; ++ } ++} ++ + /* Set the has_landing_pad_p flag in struct machine_function to VALUE. */ + + void +@@ -595,6 +1209,11 @@ s390_match_ccmode_set (rtx set, enum mac + case CCT1mode: + case CCT2mode: + case CCT3mode: ++ case CCVEQmode: ++ case CCVHmode: ++ case CCVHUmode: ++ case CCVFHmode: ++ case CCVFHEmode: + if (req_mode != set_mode) + return 0; + break; +@@ -695,6 +1314,29 @@ s390_tm_ccmode (rtx op1, rtx op2, bool m + enum machine_mode + s390_select_ccmode (enum rtx_code code, rtx op0, rtx op1) + { ++ if (TARGET_VX ++ && register_operand (op0, DFmode) ++ && register_operand (op1, DFmode)) ++ { ++ /* LT, LE, UNGT, UNGE require swapping OP0 and OP1. Either ++ s390_emit_compare or s390_canonicalize_comparison will take ++ care of it. */ ++ switch (code) ++ { ++ case EQ: ++ case NE: ++ return CCVEQmode; ++ case GT: ++ case UNLE: ++ return CCVFHmode; ++ case GE: ++ case UNLT: ++ return CCVFHEmode; ++ default: ++ ; ++ } ++ } ++ + switch (code) + { + case EQ: +@@ -972,8 +1614,73 @@ s390_canonicalize_comparison (int *code, + rtx tem = *op0; *op0 = *op1; *op1 = tem; + *code = (int)swap_condition ((enum rtx_code)*code); + } ++ ++ /* Using the scalar variants of vector instructions for 64 bit FP ++ comparisons might require swapping the operands. */ ++ if (TARGET_VX ++ && register_operand (*op0, DFmode) ++ && register_operand (*op1, DFmode) ++ && (*code == LT || *code == LE || *code == UNGT || *code == UNGE)) ++ { ++ rtx tmp; ++ ++ switch (*code) ++ { ++ case LT: *code = GT; break; ++ case LE: *code = GE; break; ++ case UNGT: *code = UNLE; break; ++ case UNGE: *code = UNLT; break; ++ default: ; ++ } ++ tmp = *op0; *op0 = *op1; *op1 = tmp; ++ } ++} ++ ++/* Helper function for s390_emit_compare. If possible emit a 64 bit ++ FP compare using the single element variant of vector instructions. ++ Replace CODE with the comparison code to be used in the CC reg ++ compare and return the condition code register RTX in CC. */ ++ ++static bool ++s390_expand_vec_compare_scalar (enum rtx_code *code, rtx cmp1, rtx cmp2, ++ rtx *cc) ++{ ++ enum machine_mode cmp_mode; ++ bool swap_p = false; ++ ++ switch (*code) ++ { ++ case EQ: cmp_mode = CCVEQmode; break; ++ case NE: cmp_mode = CCVEQmode; break; ++ case GT: cmp_mode = CCVFHmode; break; ++ case GE: cmp_mode = CCVFHEmode; break; ++ case UNLE: cmp_mode = CCVFHmode; break; ++ case UNLT: cmp_mode = CCVFHEmode; break; ++ case LT: cmp_mode = CCVFHmode; *code = GT; swap_p = true; break; ++ case LE: cmp_mode = CCVFHEmode; *code = GE; swap_p = true; break; ++ case UNGE: cmp_mode = CCVFHmode; *code = UNLE; swap_p = true; break; ++ case UNGT: cmp_mode = CCVFHEmode; *code = UNLT; swap_p = true; break; ++ default: return false; ++ } ++ ++ if (swap_p) ++ { ++ rtx tmp = cmp2; ++ cmp2 = cmp1; ++ cmp1 = tmp; ++ } ++ *cc = gen_rtx_REG (cmp_mode, CC_REGNUM); ++ emit_insn (gen_rtx_PARALLEL (VOIDmode, ++ gen_rtvec (2, ++ gen_rtx_SET (VOIDmode, *cc, ++ gen_rtx_COMPARE (cmp_mode, cmp1, ++ cmp2)), ++ gen_rtx_CLOBBER (VOIDmode, ++ gen_rtx_SCRATCH (V2DImode))))); ++ return true; + } + ++ + /* Emit a compare instruction suitable to implement the comparison + OP0 CODE OP1. Return the correct condition RTL to be placed in + the IF_THEN_ELSE of the conditional branch testing the result. */ +@@ -984,10 +1691,18 @@ s390_emit_compare (enum rtx_code code, r + enum machine_mode mode = s390_select_ccmode (code, op0, op1); + rtx cc; + +- /* Do not output a redundant compare instruction if a compare_and_swap +- pattern already computed the result and the machine modes are compatible. */ +- if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC) +- { ++ if (TARGET_VX ++ && register_operand (op0, DFmode) ++ && register_operand (op1, DFmode) ++ && s390_expand_vec_compare_scalar (&code, op0, op1, &cc)) ++ { ++ /* Work has been done by s390_expand_vec_compare_scalar already. */ ++ } ++ else if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC) ++ { ++ /* Do not output a redundant compare instruction if a ++ compare_and_swap pattern already computed the result and the ++ machine modes are compatible. */ + gcc_assert (s390_cc_modes_compatible (GET_MODE (op0), mode) + == GET_MODE (op0)); + cc = op0; +@@ -1222,6 +1937,93 @@ s390_branch_condition_mask (rtx code) + } + break; + ++ /* Vector comparison modes. */ ++ ++ case CCVEQmode: ++ switch (GET_CODE (code)) ++ { ++ case EQ: return CC0; ++ case NE: return CC3; ++ default: return -1; ++ } ++ ++ case CCVEQANYmode: ++ switch (GET_CODE (code)) ++ { ++ case EQ: return CC0 | CC1; ++ case NE: return CC3 | CC1; ++ default: return -1; ++ } ++ ++ /* Integer vector compare modes. */ ++ ++ case CCVHmode: ++ switch (GET_CODE (code)) ++ { ++ case GT: return CC0; ++ case LE: return CC3; ++ default: return -1; ++ } ++ ++ case CCVHANYmode: ++ switch (GET_CODE (code)) ++ { ++ case GT: return CC0 | CC1; ++ case LE: return CC3 | CC1; ++ default: return -1; ++ } ++ ++ case CCVHUmode: ++ switch (GET_CODE (code)) ++ { ++ case GTU: return CC0; ++ case LEU: return CC3; ++ default: return -1; ++ } ++ ++ case CCVHUANYmode: ++ switch (GET_CODE (code)) ++ { ++ case GTU: return CC0 | CC1; ++ case LEU: return CC3 | CC1; ++ default: return -1; ++ } ++ ++ /* FP vector compare modes. */ ++ ++ case CCVFHmode: ++ switch (GET_CODE (code)) ++ { ++ case GT: return CC0; ++ case UNLE: return CC3; ++ default: return -1; ++ } ++ ++ case CCVFHANYmode: ++ switch (GET_CODE (code)) ++ { ++ case GT: return CC0 | CC1; ++ case UNLE: return CC3 | CC1; ++ default: return -1; ++ } ++ ++ case CCVFHEmode: ++ switch (GET_CODE (code)) ++ { ++ case GE: return CC0; ++ case UNLT: return CC3; ++ default: return -1; ++ } ++ ++ case CCVFHEANYmode: ++ switch (GET_CODE (code)) ++ { ++ case GE: return CC0 | CC1; ++ case UNLT: return CC3 | CC1; ++ default: return -1; ++ } ++ ++ + case CCRAWmode: + switch (GET_CODE (code)) + { +@@ -1421,6 +2223,9 @@ s390_contiguous_bitmask_p (unsigned HOST + /* Calculate a mask for all bits beyond the contiguous bits. */ + mask = (-1LL & ~(((1ULL << (tmp_length + tmp_pos - 1)) << 1) - 1)); + ++ if ((unsigned)size < sizeof (HOST_WIDE_INT) * BITS_PER_UNIT) ++ mask &= ((unsigned HOST_WIDE_INT) 1 << size) - 1; ++ + if (mask & in) + return false; + +@@ -1436,6 +2241,128 @@ s390_contiguous_bitmask_p (unsigned HOST + return true; + } + ++/* Return true if OP is a constant vector with the same constant in ++ all its elements. */ ++ ++bool ++s390_const_vec_duplicate_p (rtx op) ++{ ++ if (!VECTOR_MODE_P (GET_MODE (op)) ++ || GET_CODE (op) != CONST_VECTOR ++ || !CONST_INT_P (XVECEXP (op, 0, 0))) ++ return false; ++ ++ if (GET_MODE_NUNITS (GET_MODE (op)) > 1) ++ { ++ int i; ++ ++ for (i = 1; i < GET_MODE_NUNITS (GET_MODE (op)); ++i) ++ if (!rtx_equal_p (XVECEXP (op, 0, i), XVECEXP (op, 0, 0))) ++ return false; ++ } ++ return true; ++} ++ ++/* Return true if OP contains the same contiguous bitfield in *all* ++ its elements. START and END can be used to obtain the start and ++ end position of the bitfield. ++ ++ START/STOP give the position of the first/last bit of the bitfield ++ counting from the lowest order bit starting with zero. In order to ++ use these values for S/390 instructions this has to be converted to ++ "bits big endian" style. */ ++ ++bool ++s390_contiguous_bitmask_vector_p (rtx op, int *start, int *end) ++{ ++ unsigned HOST_WIDE_INT mask; ++ int length, size; ++ ++ if (!VECTOR_MODE_P (GET_MODE (op)) ++ || GET_CODE (op) != CONST_VECTOR ++ || !CONST_INT_P (XVECEXP (op, 0, 0))) ++ return false; ++ ++ if (GET_MODE_NUNITS (GET_MODE (op)) > 1) ++ { ++ int i; ++ ++ for (i = 1; i < GET_MODE_NUNITS (GET_MODE (op)); ++i) ++ if (!rtx_equal_p (XVECEXP (op, 0, i), XVECEXP (op, 0, 0))) ++ return false; ++ } ++ ++ size = GET_MODE_UNIT_BITSIZE (GET_MODE (op)); ++ ++ /* We cannot deal with V1TI/V1TF. This would require a vgmq. */ ++ if (size > 64) ++ return false; ++ ++ mask = UINTVAL (XVECEXP (op, 0, 0)); ++ if (s390_contiguous_bitmask_p (mask, size, start, ++ end != NULL ? &length : NULL)) ++ { ++ if (end != NULL) ++ *end = *start + length - 1; ++ return true; ++ } ++ /* 0xff00000f style immediates can be covered by swapping start and ++ end indices in vgm. */ ++ if (s390_contiguous_bitmask_p (~mask, size, start, ++ end != NULL ? &length : NULL)) ++ { ++ if (end != NULL) ++ *end = *start - 1; ++ if (start != NULL) ++ *start = *start + length; ++ return true; ++ } ++ return false; ++} ++ ++/* Return true if C consists only of byte chunks being either 0 or ++ 0xff. If MASK is !=NULL a byte mask is generated which is ++ appropriate for the vector generate byte mask instruction. */ ++ ++bool ++s390_bytemask_vector_p (rtx op, unsigned *mask) ++{ ++ int i; ++ unsigned tmp_mask = 0; ++ int nunit, unit_size; ++ ++ if (!VECTOR_MODE_P (GET_MODE (op)) ++ || GET_CODE (op) != CONST_VECTOR ++ || !CONST_INT_P (XVECEXP (op, 0, 0))) ++ return false; ++ ++ nunit = GET_MODE_NUNITS (GET_MODE (op)); ++ unit_size = GET_MODE_UNIT_SIZE (GET_MODE (op)); ++ ++ for (i = 0; i < nunit; i++) ++ { ++ unsigned HOST_WIDE_INT c; ++ int j; ++ ++ if (!CONST_INT_P (XVECEXP (op, 0, i))) ++ return false; ++ ++ c = UINTVAL (XVECEXP (op, 0, i)); ++ for (j = 0; j < unit_size; j++) ++ { ++ if ((c & 0xff) != 0 && (c & 0xff) != 0xff) ++ return false; ++ tmp_mask |= (c & 1) << ((nunit - 1 - i) * unit_size + j); ++ c = c >> BITS_PER_UNIT; ++ } ++ } ++ ++ if (mask != NULL) ++ *mask = tmp_mask; ++ ++ return true; ++} ++ + /* Check whether a rotate of ROTL followed by an AND of CONTIG is + equivalent to a shift followed by the AND. In particular, CONTIG + should not overlap the (rotated) bit 0/bit 63 gap. Negative values +@@ -1461,8 +2388,8 @@ s390_extzv_shift_ok (int bitsize, int ro + bool + s390_split_ok_p (rtx dst, rtx src, enum machine_mode mode, int first_subword) + { +- /* Floating point registers cannot be split. */ +- if (FP_REG_P (src) || FP_REG_P (dst)) ++ /* Floating point and vector registers cannot be split. */ ++ if (FP_REG_P (src) || FP_REG_P (dst) || VECTOR_REG_P (src) || VECTOR_REG_P (dst)) + return false; + + /* We don't need to split if operands are directly accessible. */ +@@ -1752,6 +2679,22 @@ s390_option_override (void) + if (!(target_flags_explicit & MASK_OPT_HTM) && TARGET_CPU_HTM && TARGET_ZARCH) + target_flags |= MASK_OPT_HTM; + ++ if (target_flags_explicit & MASK_OPT_VX) ++ { ++ if (TARGET_OPT_VX) ++ { ++ if (!TARGET_CPU_VX) ++ error ("hardware vector support not available on %s", ++ s390_arch_string); ++ if (TARGET_SOFT_FLOAT) ++ error ("hardware vector support not available with -msoft-float"); ++ } ++ } ++ else if (TARGET_CPU_VX) ++ /* Enable vector support if available and not explicitly disabled ++ by user. E.g. with -m31 -march=z13 -mzarch */ ++ target_flags |= MASK_OPT_VX; ++ + if (TARGET_HARD_DFP && !TARGET_DFP) + { + if (target_flags_explicit & MASK_HARD_DFP) +@@ -1791,6 +2734,7 @@ s390_option_override (void) + s390_cost = &z196_cost; + break; + case PROCESSOR_2827_ZEC12: ++ case PROCESSOR_2964_Z13: + s390_cost = &zEC12_cost; + break; + default: +@@ -1818,7 +2762,8 @@ s390_option_override (void) + + if (s390_tune == PROCESSOR_2097_Z10 + || s390_tune == PROCESSOR_2817_Z196 +- || s390_tune == PROCESSOR_2827_ZEC12) ++ || s390_tune == PROCESSOR_2827_ZEC12 ++ || s390_tune == PROCESSOR_2964_Z13) + { + maybe_set_param_value (PARAM_MAX_UNROLLED_INSNS, 100, + global_options.x_param_values, +@@ -1882,16 +2827,20 @@ s390_option_override (void) + /* Map for smallest class containing reg regno. */ + + const enum reg_class regclass_map[FIRST_PSEUDO_REGISTER] = +-{ GENERAL_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, +- ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, +- ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, +- ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, +- FP_REGS, FP_REGS, FP_REGS, FP_REGS, +- FP_REGS, FP_REGS, FP_REGS, FP_REGS, +- FP_REGS, FP_REGS, FP_REGS, FP_REGS, +- FP_REGS, FP_REGS, FP_REGS, FP_REGS, +- ADDR_REGS, CC_REGS, ADDR_REGS, ADDR_REGS, +- ACCESS_REGS, ACCESS_REGS ++{ GENERAL_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 0 */ ++ ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 4 */ ++ ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 8 */ ++ ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 12 */ ++ FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 16 */ ++ FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 20 */ ++ FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 24 */ ++ FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 28 */ ++ ADDR_REGS, CC_REGS, ADDR_REGS, ADDR_REGS, /* 32 */ ++ ACCESS_REGS, ACCESS_REGS, VEC_REGS, VEC_REGS, /* 36 */ ++ VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 40 */ ++ VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 44 */ ++ VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 48 */ ++ VEC_REGS, VEC_REGS /* 52 */ + }; + + /* Return attribute type of insn. */ +@@ -2933,6 +3882,19 @@ legitimate_pic_operand_p (rtx op) + static bool + s390_legitimate_constant_p (enum machine_mode mode, rtx op) + { ++ if (TARGET_VX && VECTOR_MODE_P (mode) && GET_CODE (op) == CONST_VECTOR) ++ { ++ if (GET_MODE_SIZE (mode) != 16) ++ return 0; ++ ++ if (!satisfies_constraint_j00 (op) ++ && !satisfies_constraint_jm1 (op) ++ && !satisfies_constraint_jKK (op) ++ && !satisfies_constraint_jxx (op) ++ && !satisfies_constraint_jyy (op)) ++ return 0; ++ } ++ + /* Accept all non-symbolic constants. */ + if (!SYMBOLIC_CONST (op)) + return 1; +@@ -2969,6 +3931,7 @@ s390_cannot_force_const_mem (enum machin + { + case CONST_INT: + case CONST_DOUBLE: ++ case CONST_VECTOR: + /* Accept all non-symbolic constants. */ + return false; + +@@ -3101,6 +4064,25 @@ legitimate_reload_fp_constant_p (rtx op) + return false; + } + ++/* Returns true if the constant value OP is a legitimate vector operand ++ during and after reload. ++ This function accepts all constants which can be loaded directly ++ into an VR. */ ++ ++static bool ++legitimate_reload_vector_constant_p (rtx op) ++{ ++ if (TARGET_VX && GET_MODE_SIZE (GET_MODE (op)) == 16 ++ && (satisfies_constraint_j00 (op) ++ || satisfies_constraint_jm1 (op) ++ || satisfies_constraint_jKK (op) ++ || satisfies_constraint_jxx (op) ++ || satisfies_constraint_jyy (op))) ++ return true; ++ ++ return false; ++} ++ + /* Given an rtx OP being reloaded into a reg required to be in class RCLASS, + return the class of reg to actually use. */ + +@@ -3111,6 +4093,7 @@ s390_preferred_reload_class (rtx op, reg + { + /* Constants we cannot reload into general registers + must be forced into the literal pool. */ ++ case CONST_VECTOR: + case CONST_DOUBLE: + case CONST_INT: + if (reg_class_subset_p (GENERAL_REGS, rclass) +@@ -3122,6 +4105,10 @@ s390_preferred_reload_class (rtx op, reg + else if (reg_class_subset_p (FP_REGS, rclass) + && legitimate_reload_fp_constant_p (op)) + return FP_REGS; ++ else if (reg_class_subset_p (VEC_REGS, rclass) ++ && legitimate_reload_vector_constant_p (op)) ++ return VEC_REGS; ++ + return NO_REGS; + + /* If a symbolic constant or a PLUS is reloaded, +@@ -3245,6 +4232,7 @@ s390_reload_symref_address (rtx reg, rtx + /* Reload might have pulled a constant out of the literal pool. + Force it back in. */ + if (CONST_INT_P (mem) || GET_CODE (mem) == CONST_DOUBLE ++ || GET_CODE (mem) == CONST_VECTOR + || GET_CODE (mem) == CONST) + mem = force_const_mem (GET_MODE (reg), mem); + +@@ -3284,6 +4272,30 @@ s390_secondary_reload (bool in_p, rtx x, + if (reg_classes_intersect_p (CC_REGS, rclass)) + return GENERAL_REGS; + ++ if (TARGET_VX) ++ { ++ /* The vst/vl vector move instructions allow only for short ++ displacements. */ ++ if (MEM_P (x) ++ && GET_CODE (XEXP (x, 0)) == PLUS ++ && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT ++ && !SHORT_DISP_IN_RANGE(INTVAL (XEXP (XEXP (x, 0), 1))) ++ && reg_class_subset_p (rclass, VEC_REGS) ++ && (!reg_class_subset_p (rclass, FP_REGS) ++ || (GET_MODE_SIZE (mode) > 8 ++ && s390_class_max_nregs (FP_REGS, mode) == 1))) ++ { ++ if (in_p) ++ sri->icode = (TARGET_64BIT ? ++ CODE_FOR_reloaddi_la_in : ++ CODE_FOR_reloadsi_la_in); ++ else ++ sri->icode = (TARGET_64BIT ? ++ CODE_FOR_reloaddi_la_out : ++ CODE_FOR_reloadsi_la_out); ++ } ++ } ++ + if (TARGET_Z10) + { + HOST_WIDE_INT offset; +@@ -3299,17 +4311,15 @@ s390_secondary_reload (bool in_p, rtx x, + sri->icode = ((mode == DImode) ? CODE_FOR_reloaddi_larl_odd_addend_z10 + : CODE_FOR_reloadsi_larl_odd_addend_z10); + +- /* On z10 we need a scratch register when moving QI, TI or floating +- point mode values from or to a memory location with a SYMBOL_REF +- or if the symref addend of a SI or DI move is not aligned to the +- width of the access. */ ++ /* Handle all the (mem (symref)) accesses we cannot use the z10 ++ instructions for. */ + if (MEM_P (x) + && s390_loadrelative_operand_p (XEXP (x, 0), NULL, NULL) +- && (mode == QImode || mode == TImode || FLOAT_MODE_P (mode) +- || (!TARGET_ZARCH && mode == DImode) +- || ((mode == HImode || mode == SImode || mode == DImode) +- && (!s390_check_symref_alignment (XEXP (x, 0), +- GET_MODE_SIZE (mode)))))) ++ && (mode == QImode ++ || !reg_class_subset_p (rclass, GENERAL_REGS) ++ || GET_MODE_SIZE (mode) > UNITS_PER_WORD ++ || !s390_check_symref_alignment (XEXP (x, 0), ++ GET_MODE_SIZE (mode)))) + { + #define __SECONDARY_RELOAD_CASE(M,m) \ + case M##mode: \ +@@ -3334,7 +4344,27 @@ s390_secondary_reload (bool in_p, rtx x, + __SECONDARY_RELOAD_CASE (SD, sd); + __SECONDARY_RELOAD_CASE (DD, dd); + __SECONDARY_RELOAD_CASE (TD, td); +- ++ __SECONDARY_RELOAD_CASE (V1QI, v1qi); ++ __SECONDARY_RELOAD_CASE (V2QI, v2qi); ++ __SECONDARY_RELOAD_CASE (V4QI, v4qi); ++ __SECONDARY_RELOAD_CASE (V8QI, v8qi); ++ __SECONDARY_RELOAD_CASE (V16QI, v16qi); ++ __SECONDARY_RELOAD_CASE (V1HI, v1hi); ++ __SECONDARY_RELOAD_CASE (V2HI, v2hi); ++ __SECONDARY_RELOAD_CASE (V4HI, v4hi); ++ __SECONDARY_RELOAD_CASE (V8HI, v8hi); ++ __SECONDARY_RELOAD_CASE (V1SI, v1si); ++ __SECONDARY_RELOAD_CASE (V2SI, v2si); ++ __SECONDARY_RELOAD_CASE (V4SI, v4si); ++ __SECONDARY_RELOAD_CASE (V1DI, v1di); ++ __SECONDARY_RELOAD_CASE (V2DI, v2di); ++ __SECONDARY_RELOAD_CASE (V1TI, v1ti); ++ __SECONDARY_RELOAD_CASE (V1SF, v1sf); ++ __SECONDARY_RELOAD_CASE (V2SF, v2sf); ++ __SECONDARY_RELOAD_CASE (V4SF, v4sf); ++ __SECONDARY_RELOAD_CASE (V1DF, v1df); ++ __SECONDARY_RELOAD_CASE (V2DF, v2df); ++ __SECONDARY_RELOAD_CASE (V1TF, v1tf); + default: + gcc_unreachable (); + } +@@ -3371,12 +4401,12 @@ s390_secondary_reload (bool in_p, rtx x, + { + if (in_p) + sri->icode = (TARGET_64BIT ? +- CODE_FOR_reloaddi_nonoffmem_in : +- CODE_FOR_reloadsi_nonoffmem_in); ++ CODE_FOR_reloaddi_la_in : ++ CODE_FOR_reloadsi_la_in); + else + sri->icode = (TARGET_64BIT ? +- CODE_FOR_reloaddi_nonoffmem_out : +- CODE_FOR_reloadsi_nonoffmem_out); ++ CODE_FOR_reloaddi_la_out : ++ CODE_FOR_reloadsi_la_out); + } + } + +@@ -4610,6 +5640,256 @@ s390_expand_cmpmem (rtx target, rtx op0, + return true; + } + ++/* Emit a conditional jump to LABEL for condition code mask MASK using ++ comparsion operator COMPARISON. Return the emitted jump insn. */ ++ ++static rtx ++s390_emit_ccraw_jump (HOST_WIDE_INT mask, enum rtx_code comparison, rtx label) ++{ ++ rtx temp; ++ ++ gcc_assert (comparison == EQ || comparison == NE); ++ gcc_assert (mask > 0 && mask < 15); ++ ++ temp = gen_rtx_fmt_ee (comparison, VOIDmode, ++ gen_rtx_REG (CCRAWmode, CC_REGNUM), GEN_INT (mask)); ++ temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp, ++ gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx); ++ temp = gen_rtx_SET (VOIDmode, pc_rtx, temp); ++ return emit_jump_insn (temp); ++} ++ ++/* Emit the instructions to implement strlen of STRING and store the ++ result in TARGET. The string has the known ALIGNMENT. This ++ version uses vector instructions and is therefore not appropriate ++ for targets prior to z13. */ ++ ++void ++s390_expand_vec_strlen (rtx target, rtx string, rtx alignment) ++{ ++ int very_unlikely = REG_BR_PROB_BASE / 100 - 1; ++ int very_likely = REG_BR_PROB_BASE - 1; ++ rtx highest_index_to_load_reg = gen_reg_rtx (Pmode); ++ rtx str_reg = gen_reg_rtx (V16QImode); ++ rtx str_addr_base_reg = gen_reg_rtx (Pmode); ++ rtx str_idx_reg = gen_reg_rtx (Pmode); ++ rtx result_reg = gen_reg_rtx (V16QImode); ++ rtx is_aligned_label = gen_label_rtx (); ++ rtx into_loop_label = NULL_RTX; ++ rtx loop_start_label = gen_label_rtx (); ++ rtx temp; ++ rtx len = gen_reg_rtx (QImode); ++ rtx cond; ++ ++ s390_load_address (str_addr_base_reg, XEXP (string, 0)); ++ emit_move_insn (str_idx_reg, const0_rtx); ++ ++ if (INTVAL (alignment) < 16) ++ { ++ /* Check whether the address happens to be aligned properly so ++ jump directly to the aligned loop. */ ++ emit_cmp_and_jump_insns (gen_rtx_AND (Pmode, ++ str_addr_base_reg, GEN_INT (15)), ++ const0_rtx, EQ, NULL_RTX, ++ Pmode, 1, is_aligned_label); ++ ++ temp = gen_reg_rtx (Pmode); ++ temp = expand_binop (Pmode, and_optab, str_addr_base_reg, ++ GEN_INT (15), temp, 1, OPTAB_DIRECT); ++ gcc_assert (REG_P (temp)); ++ highest_index_to_load_reg = ++ expand_binop (Pmode, sub_optab, GEN_INT (15), temp, ++ highest_index_to_load_reg, 1, OPTAB_DIRECT); ++ gcc_assert (REG_P (highest_index_to_load_reg)); ++ emit_insn (gen_vllv16qi (str_reg, ++ convert_to_mode (SImode, highest_index_to_load_reg, 1), ++ gen_rtx_MEM (BLKmode, str_addr_base_reg))); ++ ++ into_loop_label = gen_label_rtx (); ++ s390_emit_jump (into_loop_label, NULL_RTX); ++ emit_barrier (); ++ } ++ ++ emit_label (is_aligned_label); ++ LABEL_NUSES (is_aligned_label) = INTVAL (alignment) < 16 ? 2 : 1; ++ ++ /* Reaching this point we are only performing 16 bytes aligned ++ loads. */ ++ emit_move_insn (highest_index_to_load_reg, GEN_INT (15)); ++ ++ emit_label (loop_start_label); ++ LABEL_NUSES (loop_start_label) = 1; ++ ++ /* Load 16 bytes of the string into VR. */ ++ emit_move_insn (str_reg, ++ gen_rtx_MEM (V16QImode, ++ gen_rtx_PLUS (Pmode, str_idx_reg, ++ str_addr_base_reg))); ++ if (into_loop_label != NULL_RTX) ++ { ++ emit_label (into_loop_label); ++ LABEL_NUSES (into_loop_label) = 1; ++ } ++ ++ /* Increment string index by 16 bytes. */ ++ expand_binop (Pmode, add_optab, str_idx_reg, GEN_INT (16), ++ str_idx_reg, 1, OPTAB_DIRECT); ++ ++ emit_insn (gen_vec_vfenesv16qi (result_reg, str_reg, str_reg, ++ GEN_INT (VSTRING_FLAG_ZS | VSTRING_FLAG_CS))); ++ ++ add_reg_note (s390_emit_ccraw_jump (8, NE, loop_start_label), ++ REG_BR_PROB, GEN_INT (very_likely)); ++ emit_insn (gen_vec_extractv16qi (len, result_reg, GEN_INT (7))); ++ ++ /* If the string pointer wasn't aligned we have loaded less then 16 ++ bytes and the remaining bytes got filled with zeros (by vll). ++ Now we have to check whether the resulting index lies within the ++ bytes actually part of the string. */ ++ ++ cond = s390_emit_compare (GT, convert_to_mode (Pmode, len, 1), ++ highest_index_to_load_reg); ++ s390_load_address (highest_index_to_load_reg, ++ gen_rtx_PLUS (Pmode, highest_index_to_load_reg, ++ const1_rtx)); ++ if (TARGET_64BIT) ++ emit_insn (gen_movdicc (str_idx_reg, cond, ++ highest_index_to_load_reg, str_idx_reg)); ++ else ++ emit_insn (gen_movsicc (str_idx_reg, cond, ++ highest_index_to_load_reg, str_idx_reg)); ++ ++ add_reg_note (s390_emit_jump (is_aligned_label, cond), REG_BR_PROB, ++ GEN_INT (very_unlikely)); ++ ++ expand_binop (Pmode, add_optab, str_idx_reg, ++ GEN_INT (-16), str_idx_reg, 1, OPTAB_DIRECT); ++ /* FIXME: len is already zero extended - so avoid the llgcr emitted ++ here. */ ++ temp = expand_binop (Pmode, add_optab, str_idx_reg, ++ convert_to_mode (Pmode, len, 1), ++ target, 1, OPTAB_DIRECT); ++ if (temp != target) ++ emit_move_insn (target, temp); ++} ++ ++void ++s390_expand_vec_movstr (rtx result, rtx dst, rtx src) ++{ ++ int very_unlikely = REG_BR_PROB_BASE / 100 - 1; ++ rtx temp = gen_reg_rtx (Pmode); ++ rtx src_addr = XEXP (src, 0); ++ rtx dst_addr = XEXP (dst, 0); ++ rtx src_addr_reg = gen_reg_rtx (Pmode); ++ rtx dst_addr_reg = gen_reg_rtx (Pmode); ++ rtx offset = gen_reg_rtx (Pmode); ++ rtx vsrc = gen_reg_rtx (V16QImode); ++ rtx vpos = gen_reg_rtx (V16QImode); ++ rtx loadlen = gen_reg_rtx (SImode); ++ rtx gpos_qi = gen_reg_rtx(QImode); ++ rtx gpos = gen_reg_rtx (SImode); ++ rtx done_label = gen_label_rtx (); ++ rtx loop_label = gen_label_rtx (); ++ rtx exit_label = gen_label_rtx (); ++ rtx full_label = gen_label_rtx (); ++ ++ /* Perform a quick check for string ending on the first up to 16 ++ bytes and exit early if successful. */ ++ ++ emit_insn (gen_vlbb (vsrc, src, GEN_INT (6))); ++ emit_insn (gen_lcbb (loadlen, src_addr, GEN_INT (6))); ++ emit_insn (gen_vfenezv16qi (vpos, vsrc, vsrc)); ++ emit_insn (gen_vec_extractv16qi (gpos_qi, vpos, GEN_INT (7))); ++ emit_move_insn (gpos, gen_rtx_SUBREG (SImode, gpos_qi, 0)); ++ /* gpos is the byte index if a zero was found and 16 otherwise. ++ So if it is lower than the loaded bytes we have a hit. */ ++ emit_cmp_and_jump_insns (gpos, loadlen, GE, NULL_RTX, SImode, 1, ++ full_label); ++ emit_insn (gen_vstlv16qi (vsrc, gpos, dst)); ++ ++ force_expand_binop (Pmode, add_optab, dst_addr, gpos, result, ++ 1, OPTAB_DIRECT); ++ emit_jump (exit_label); ++ emit_barrier (); ++ ++ emit_label (full_label); ++ LABEL_NUSES (full_label) = 1; ++ ++ /* Calculate `offset' so that src + offset points to the last byte ++ before 16 byte alignment. */ ++ ++ /* temp = src_addr & 0xf */ ++ force_expand_binop (Pmode, and_optab, src_addr, GEN_INT (15), temp, ++ 1, OPTAB_DIRECT); ++ ++ /* offset = 0xf - temp */ ++ emit_move_insn (offset, GEN_INT (15)); ++ force_expand_binop (Pmode, sub_optab, offset, temp, offset, ++ 1, OPTAB_DIRECT); ++ ++ /* Store `offset' bytes in the dstination string. The quick check ++ has loaded at least `offset' bytes into vsrc. */ ++ ++ emit_insn (gen_vstlv16qi (vsrc, gen_lowpart (SImode, offset), dst)); ++ ++ /* Advance to the next byte to be loaded. */ ++ force_expand_binop (Pmode, add_optab, offset, const1_rtx, offset, ++ 1, OPTAB_DIRECT); ++ ++ /* Make sure the addresses are single regs which can be used as a ++ base. */ ++ emit_move_insn (src_addr_reg, src_addr); ++ emit_move_insn (dst_addr_reg, dst_addr); ++ ++ /* MAIN LOOP */ ++ ++ emit_label (loop_label); ++ LABEL_NUSES (loop_label) = 1; ++ ++ emit_move_insn (vsrc, ++ gen_rtx_MEM (V16QImode, ++ gen_rtx_PLUS (Pmode, src_addr_reg, offset))); ++ ++ emit_insn (gen_vec_vfenesv16qi (vpos, vsrc, vsrc, ++ GEN_INT (VSTRING_FLAG_ZS | VSTRING_FLAG_CS))); ++ add_reg_note (s390_emit_ccraw_jump (8, EQ, done_label), ++ REG_BR_PROB, GEN_INT (very_unlikely)); ++ ++ emit_move_insn (gen_rtx_MEM (V16QImode, ++ gen_rtx_PLUS (Pmode, dst_addr_reg, offset)), ++ vsrc); ++ /* offset += 16 */ ++ force_expand_binop (Pmode, add_optab, offset, GEN_INT (16), ++ offset, 1, OPTAB_DIRECT); ++ ++ emit_jump (loop_label); ++ emit_barrier (); ++ ++ /* REGULAR EXIT */ ++ ++ /* We are done. Add the offset of the zero character to the dst_addr ++ pointer to get the result. */ ++ ++ emit_label (done_label); ++ LABEL_NUSES (done_label) = 1; ++ ++ force_expand_binop (Pmode, add_optab, dst_addr_reg, offset, dst_addr_reg, ++ 1, OPTAB_DIRECT); ++ ++ emit_insn (gen_vec_extractv16qi (gpos_qi, vpos, GEN_INT (7))); ++ emit_move_insn (gpos, gen_rtx_SUBREG (SImode, gpos_qi, 0)); ++ ++ emit_insn (gen_vstlv16qi (vsrc, gpos, gen_rtx_MEM (BLKmode, dst_addr_reg))); ++ ++ force_expand_binop (Pmode, add_optab, dst_addr_reg, gpos, result, ++ 1, OPTAB_DIRECT); ++ ++ /* EARLY EXIT */ ++ ++ emit_label (exit_label); ++ LABEL_NUSES (exit_label) = 1; ++} ++ + + /* Expand conditional increment or decrement using alc/slb instructions. + Should generate code setting DST to either SRC or SRC + INCREMENT, +@@ -4964,6 +6244,304 @@ s390_expand_mask_and_shift (rtx val, enu + NULL_RTX, 1, OPTAB_DIRECT); + } + ++/* Generate a vector comparison COND of CMP_OP1 and CMP_OP2 and store ++ the result in TARGET. */ ++ ++void ++s390_expand_vec_compare (rtx target, enum rtx_code cond, ++ rtx cmp_op1, rtx cmp_op2) ++{ ++ enum machine_mode mode = GET_MODE (target); ++ bool neg_p = false, swap_p = false; ++ rtx tmp; ++ ++ if (GET_MODE (cmp_op1) == V2DFmode) ++ { ++ switch (cond) ++ { ++ /* NE a != b -> !(a == b) */ ++ case NE: cond = EQ; neg_p = true; break; ++ /* UNGT a u> b -> !(b >= a) */ ++ case UNGT: cond = GE; neg_p = true; swap_p = true; break; ++ /* UNGE a u>= b -> !(b > a) */ ++ case UNGE: cond = GT; neg_p = true; swap_p = true; break; ++ /* LE: a <= b -> b >= a */ ++ case LE: cond = GE; swap_p = true; break; ++ /* UNLE: a u<= b -> !(a > b) */ ++ case UNLE: cond = GT; neg_p = true; break; ++ /* LT: a < b -> b > a */ ++ case LT: cond = GT; swap_p = true; break; ++ /* UNLT: a u< b -> !(a >= b) */ ++ case UNLT: cond = GE; neg_p = true; break; ++ case UNEQ: ++ emit_insn (gen_vec_cmpuneqv2df (target, cmp_op1, cmp_op2)); ++ return; ++ case LTGT: ++ emit_insn (gen_vec_cmpltgtv2df (target, cmp_op1, cmp_op2)); ++ return; ++ case ORDERED: ++ emit_insn (gen_vec_orderedv2df (target, cmp_op1, cmp_op2)); ++ return; ++ case UNORDERED: ++ emit_insn (gen_vec_unorderedv2df (target, cmp_op1, cmp_op2)); ++ return; ++ default: break; ++ } ++ } ++ else ++ { ++ switch (cond) ++ { ++ /* NE: a != b -> !(a == b) */ ++ case NE: cond = EQ; neg_p = true; break; ++ /* GE: a >= b -> !(b > a) */ ++ case GE: cond = GT; neg_p = true; swap_p = true; break; ++ /* GEU: a >= b -> !(b > a) */ ++ case GEU: cond = GTU; neg_p = true; swap_p = true; break; ++ /* LE: a <= b -> !(a > b) */ ++ case LE: cond = GT; neg_p = true; break; ++ /* LEU: a <= b -> !(a > b) */ ++ case LEU: cond = GTU; neg_p = true; break; ++ /* LT: a < b -> b > a */ ++ case LT: cond = GT; swap_p = true; break; ++ /* LTU: a < b -> b > a */ ++ case LTU: cond = GTU; swap_p = true; break; ++ default: break; ++ } ++ } ++ ++ if (swap_p) ++ { ++ tmp = cmp_op1; cmp_op1 = cmp_op2; cmp_op2 = tmp; ++ } ++ ++ emit_insn (gen_rtx_SET (VOIDmode, ++ target, gen_rtx_fmt_ee (cond, ++ mode, ++ cmp_op1, cmp_op2))); ++ if (neg_p) ++ emit_insn (gen_rtx_SET (VOIDmode, target, gen_rtx_NOT (mode, target))); ++} ++ ++/* Expand the comparison CODE of CMP1 and CMP2 and copy 1 or 0 into ++ TARGET if either all (ALL_P is true) or any (ALL_P is false) of the ++ elements in CMP1 and CMP2 fulfill the comparison. */ ++void ++s390_expand_vec_compare_cc (rtx target, enum rtx_code code, ++ rtx cmp1, rtx cmp2, bool all_p) ++{ ++ enum rtx_code new_code = code; ++ enum machine_mode cmp_mode, full_cmp_mode, scratch_mode; ++ rtx tmp_reg = gen_reg_rtx (SImode); ++ bool swap_p = false; ++ ++ if (GET_MODE_CLASS (GET_MODE (cmp1)) == MODE_VECTOR_INT) ++ { ++ switch (code) ++ { ++ case EQ: cmp_mode = CCVEQmode; break; ++ case NE: cmp_mode = CCVEQmode; break; ++ case GT: cmp_mode = CCVHmode; break; ++ case GE: cmp_mode = CCVHmode; new_code = LE; swap_p = true; break; ++ case LT: cmp_mode = CCVHmode; new_code = GT; swap_p = true; break; ++ case LE: cmp_mode = CCVHmode; new_code = LE; break; ++ case GTU: cmp_mode = CCVHUmode; break; ++ case GEU: cmp_mode = CCVHUmode; new_code = LEU; swap_p = true; break; ++ case LTU: cmp_mode = CCVHUmode; new_code = GTU; swap_p = true; break; ++ case LEU: cmp_mode = CCVHUmode; new_code = LEU; break; ++ default: gcc_unreachable (); ++ } ++ scratch_mode = GET_MODE (cmp1); ++ } ++ else if (GET_MODE (cmp1) == V2DFmode) ++ { ++ switch (code) ++ { ++ case EQ: cmp_mode = CCVEQmode; break; ++ case NE: cmp_mode = CCVEQmode; break; ++ case GT: cmp_mode = CCVFHmode; break; ++ case GE: cmp_mode = CCVFHEmode; break; ++ case UNLE: cmp_mode = CCVFHmode; break; ++ case UNLT: cmp_mode = CCVFHEmode; break; ++ case LT: cmp_mode = CCVFHmode; new_code = GT; swap_p = true; break; ++ case LE: cmp_mode = CCVFHEmode; new_code = GE; swap_p = true; break; ++ default: gcc_unreachable (); ++ } ++ scratch_mode = V2DImode; ++ } ++ else ++ gcc_unreachable (); ++ ++ if (!all_p) ++ switch (cmp_mode) ++ { ++ case CCVEQmode: full_cmp_mode = CCVEQANYmode; break; ++ case CCVHmode: full_cmp_mode = CCVHANYmode; break; ++ case CCVHUmode: full_cmp_mode = CCVHUANYmode; break; ++ case CCVFHmode: full_cmp_mode = CCVFHANYmode; break; ++ case CCVFHEmode: full_cmp_mode = CCVFHEANYmode; break; ++ default: gcc_unreachable (); ++ } ++ else ++ /* The modes without ANY match the ALL modes. */ ++ full_cmp_mode = cmp_mode; ++ ++ if (swap_p) ++ { ++ rtx tmp = cmp2; ++ cmp2 = cmp1; ++ cmp1 = tmp; ++ } ++ ++ emit_insn (gen_rtx_PARALLEL (VOIDmode, ++ gen_rtvec (2, gen_rtx_SET ( ++ VOIDmode, ++ gen_rtx_REG (cmp_mode, CC_REGNUM), ++ gen_rtx_COMPARE (cmp_mode, cmp1, cmp2)), ++ gen_rtx_CLOBBER (VOIDmode, ++ gen_rtx_SCRATCH (scratch_mode))))); ++ emit_move_insn (target, const0_rtx); ++ emit_move_insn (tmp_reg, const1_rtx); ++ ++ emit_move_insn (target, ++ gen_rtx_IF_THEN_ELSE (SImode, ++ gen_rtx_fmt_ee (new_code, VOIDmode, ++ gen_rtx_REG (full_cmp_mode, CC_REGNUM), ++ const0_rtx), ++ target, tmp_reg)); ++} ++ ++/* Generate a vector comparison expression loading either elements of ++ THEN or ELS into TARGET depending on the comparison COND of CMP_OP1 ++ and CMP_OP2. */ ++ ++void ++s390_expand_vcond (rtx target, rtx then, rtx els, ++ enum rtx_code cond, rtx cmp_op1, rtx cmp_op2) ++{ ++ rtx tmp; ++ enum machine_mode result_mode; ++ rtx result_target; ++ ++ /* We always use an integral type vector to hold the comparison ++ result. */ ++ result_mode = GET_MODE (cmp_op1) == V2DFmode ? V2DImode : GET_MODE (cmp_op1); ++ result_target = gen_reg_rtx (result_mode); ++ ++ /* Alternatively this could be done by reload by lowering the cmp* ++ predicates. But it appears to be better for scheduling etc. to ++ have that in early. */ ++ if (!REG_P (cmp_op1)) ++ cmp_op1 = force_reg (GET_MODE (cmp_op1), cmp_op1); ++ ++ if (!REG_P (cmp_op2)) ++ cmp_op2 = force_reg (GET_MODE (cmp_op2), cmp_op2); ++ ++ s390_expand_vec_compare (result_target, cond, ++ cmp_op1, cmp_op2); ++ ++ /* If the results are supposed to be either -1 or 0 we are done ++ since this is what our compare instructions generate anyway. */ ++ if (all_ones_operand (then, GET_MODE (then)) ++ && const0_operand (els, GET_MODE (els))) ++ { ++ emit_move_insn (target, gen_rtx_SUBREG (GET_MODE (target), ++ result_target, 0)); ++ return; ++ } ++ ++ /* Otherwise we will do a vsel afterwards. */ ++ /* This gets triggered e.g. ++ with gcc.c-torture/compile/pr53410-1.c */ ++ if (!REG_P (then)) ++ then = force_reg (GET_MODE (target), then); ++ ++ if (!REG_P (els)) ++ els = force_reg (GET_MODE (target), els); ++ ++ tmp = gen_rtx_fmt_ee (EQ, VOIDmode, ++ result_target, ++ CONST0_RTX (result_mode)); ++ ++ /* We compared the result against zero above so we have to swap then ++ and els here. */ ++ tmp = gen_rtx_IF_THEN_ELSE (GET_MODE (target), tmp, els, then); ++ ++ gcc_assert (GET_MODE (target) == GET_MODE (then)); ++ emit_insn (gen_rtx_SET (VOIDmode, target, tmp)); ++} ++ ++/* Emit the RTX necessary to initialize the vector TARGET with values ++ in VALS. */ ++void ++s390_expand_vec_init (rtx target, rtx vals) ++{ ++ enum machine_mode mode = GET_MODE (target); ++ enum machine_mode inner_mode = GET_MODE_INNER (mode); ++ int n_elts = GET_MODE_NUNITS (mode); ++ bool all_same = true, all_regs = true, all_const_int = true; ++ rtx x; ++ int i; ++ ++ for (i = 0; i < n_elts; ++i) ++ { ++ x = XVECEXP (vals, 0, i); ++ ++ if (!CONST_INT_P (x)) ++ all_const_int = false; ++ ++ if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0))) ++ all_same = false; ++ ++ if (!REG_P (x)) ++ all_regs = false; ++ } ++ ++ /* Use vector gen mask or vector gen byte mask if possible. */ ++ if (all_same && all_const_int ++ && (XVECEXP (vals, 0, 0) == const0_rtx ++ || s390_contiguous_bitmask_vector_p (XVECEXP (vals, 0, 0), ++ NULL, NULL) ++ || s390_bytemask_vector_p (XVECEXP (vals, 0, 0), NULL))) ++ { ++ emit_insn (gen_rtx_SET (VOIDmode, target, ++ gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)))); ++ return; ++ } ++ ++ if (all_same) ++ { ++ emit_insn (gen_rtx_SET (VOIDmode, target, ++ gen_rtx_VEC_DUPLICATE (mode, ++ XVECEXP (vals, 0, 0)))); ++ return; ++ } ++ ++ if (all_regs && REG_P (target) && n_elts == 2 && inner_mode == DImode) ++ { ++ /* Use vector load pair. */ ++ emit_insn (gen_rtx_SET (VOIDmode, target, ++ gen_rtx_VEC_CONCAT (mode, ++ XVECEXP (vals, 0, 0), ++ XVECEXP (vals, 0, 1)))); ++ return; ++ } ++ ++ /* We are about to set the vector elements one by one. Zero out the ++ full register first in order to help the data flow framework to ++ detect it as full VR set. */ ++ emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode))); ++ ++ /* Unfortunately the vec_init expander is not allowed to fail. So ++ we have to implement the fallback ourselves. */ ++ for (i = 0; i < n_elts; i++) ++ emit_insn (gen_rtx_SET (VOIDmode, target, ++ gen_rtx_UNSPEC (mode, ++ gen_rtvec (3, XVECEXP (vals, 0, i), ++ GEN_INT (i), target), ++ UNSPEC_VEC_SET))); ++} ++ + /* Structure to hold the initial parameters for a compare_and_swap operation + in HImode and QImode. */ + +@@ -5259,12 +6837,37 @@ s390_output_dwarf_dtprel (FILE *file, in + fputs ("@DTPOFF", file); + } + ++/* Return the proper mode for REGNO being represented in the dwarf ++ unwind table. */ ++enum machine_mode ++s390_dwarf_frame_reg_mode (int regno) ++{ ++ enum machine_mode save_mode = default_dwarf_frame_reg_mode (regno); ++ ++ /* The rightmost 64 bits of vector registers are call-clobbered. */ ++ if (GET_MODE_SIZE (save_mode) > 8) ++ save_mode = DImode; ++ ++ return save_mode; ++} ++ + #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING + /* Implement TARGET_MANGLE_TYPE. */ + + static const char * + s390_mangle_type (const_tree type) + { ++ type = TYPE_MAIN_VARIANT (type); ++ ++ if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE ++ && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE) ++ return NULL; ++ ++ if (type == s390_builtin_types[BT_BV16QI]) return "U6__boolc"; ++ if (type == s390_builtin_types[BT_BV8HI]) return "U6__bools"; ++ if (type == s390_builtin_types[BT_BV4SI]) return "U6__booli"; ++ if (type == s390_builtin_types[BT_BV2DI]) return "U6__booll"; ++ + if (TYPE_MAIN_VARIANT (type) == long_double_type_node + && TARGET_LONG_DOUBLE_128) + return "g"; +@@ -5627,24 +7230,26 @@ print_operand_address (FILE *file, rtx a + 'J': print tls_load/tls_gdcall/tls_ldcall suffix + 'M': print the second word of a TImode operand. + 'N': print the second word of a DImode operand. +- 'O': print only the displacement of a memory reference. +- 'R': print only the base register of a memory reference. ++ 'O': print only the displacement of a memory reference or address. ++ 'R': print only the base register of a memory reference or address. + 'S': print S-type memory reference (base+displacement). + 'Y': print shift count operand. + + 'b': print integer X as if it's an unsigned byte. + 'c': print integer X as if it's an signed byte. +- 'e': "end" of DImode contiguous bitmask X. +- 'f': "end" of SImode contiguous bitmask X. ++ 'e': "end" contiguous bitmask X in either DImode or vector inner mode. ++ 'f': "end" contiguous bitmask X in SImode. + 'h': print integer X as if it's a signed halfword. + 'i': print the first nonzero HImode part of X. + 'j': print the first HImode part unequal to -1 of X. + 'k': print the first nonzero SImode part of X. + 'm': print the first SImode part unequal to -1 of X. + 'o': print integer X as if it's an unsigned 32bit word. +- 's': "start" of DImode contiguous bitmask X. +- 't': "start" of SImode contiguous bitmask X. ++ 's': "start" of contiguous bitmask X in either DImode or vector inner mode. ++ 't': CONST_INT: "start" of contiguous bitmask X in SImode. ++ CONST_VECTOR: Generate a bitmask for vgbm instruction. + 'x': print integer X as if it's an unsigned halfword. ++ 'v': print register number as vector register (v1 instead of f1). + */ + + void +@@ -5701,14 +7306,7 @@ print_operand (FILE *file, rtx x, int co + struct s390_address ad; + int ret; + +- if (!MEM_P (x)) +- { +- output_operand_lossage ("memory reference expected for " +- "'O' output modifier"); +- return; +- } +- +- ret = s390_decompose_address (XEXP (x, 0), &ad); ++ ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad); + + if (!ret + || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base))) +@@ -5730,14 +7328,7 @@ print_operand (FILE *file, rtx x, int co + struct s390_address ad; + int ret; + +- if (!MEM_P (x)) +- { +- output_operand_lossage ("memory reference expected for " +- "'R' output modifier"); +- return; +- } +- +- ret = s390_decompose_address (XEXP (x, 0), &ad); ++ ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad); + + if (!ret + || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base))) +@@ -5815,7 +7406,17 @@ print_operand (FILE *file, rtx x, int co + switch (GET_CODE (x)) + { + case REG: +- fprintf (file, "%s", reg_names[REGNO (x)]); ++ /* Print FP regs as fx instead of vx when they are accessed ++ through non-vector mode. */ ++ if (code == 'v' ++ || VECTOR_NOFP_REG_P (x) ++ || (FP_REG_P (x) && VECTOR_MODE_P (GET_MODE (x))) ++ || (VECTOR_REG_P (x) ++ && (GET_MODE_SIZE (GET_MODE (x)) / ++ s390_class_max_nregs (FP_REGS, GET_MODE (x))) > 8)) ++ fprintf (file, "%%v%s", reg_names[REGNO (x)] + 2); ++ else ++ fprintf (file, "%s", reg_names[REGNO (x)]); + break; + + case MEM: +@@ -5902,6 +7503,44 @@ print_operand (FILE *file, rtx x, int co + code); + } + break; ++ case CONST_VECTOR: ++ switch (code) ++ { ++ case 'h': ++ gcc_assert (s390_const_vec_duplicate_p (x)); ++ fprintf (file, HOST_WIDE_INT_PRINT_DEC, ++ ((INTVAL (XVECEXP (x, 0, 0)) & 0xffff) ^ 0x8000) - 0x8000); ++ break; ++ case 'e': ++ case 's': ++ { ++ int start, stop, inner_len; ++ bool ok; ++ ++ inner_len = GET_MODE_UNIT_BITSIZE (GET_MODE (x)); ++ ok = s390_contiguous_bitmask_vector_p (x, &start, &stop); ++ gcc_assert (ok); ++ if (code == 's' || code == 't') ++ ival = inner_len - stop - 1; ++ else ++ ival = inner_len - start - 1; ++ fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival); ++ } ++ break; ++ case 't': ++ { ++ unsigned mask; ++ bool ok = s390_bytemask_vector_p (x, &mask); ++ gcc_assert (ok); ++ fprintf (file, "%u", mask); ++ } ++ break; ++ ++ default: ++ output_operand_lossage ("invalid constant vector for output " ++ "modifier '%c'", code); ++ } ++ break; + + default: + if (code == 0) +@@ -6051,7 +7690,8 @@ s390_adjust_priority (rtx insn ATTRIBUTE + && s390_tune != PROCESSOR_2094_Z9_109 + && s390_tune != PROCESSOR_2097_Z10 + && s390_tune != PROCESSOR_2817_Z196 +- && s390_tune != PROCESSOR_2827_ZEC12) ++ && s390_tune != PROCESSOR_2827_ZEC12 ++ && s390_tune != PROCESSOR_2964_Z13) + return priority; + + switch (s390_safe_attr_type (insn)) +@@ -6459,14 +8099,20 @@ replace_ltrel_base (rtx *x) + /* We keep a list of constants which we have to add to internal + constant tables in the middle of large functions. */ + +-#define NR_C_MODES 11 ++#define NR_C_MODES 32 + enum machine_mode constant_modes[NR_C_MODES] = + { + TFmode, TImode, TDmode, ++ V16QImode, V8HImode, V4SImode, V2DImode, V1TImode, ++ V4SFmode, V2DFmode, V1TFmode, + DFmode, DImode, DDmode, ++ V8QImode, V4HImode, V2SImode, V1DImode, V2SFmode, V1DFmode, + SFmode, SImode, SDmode, ++ V4QImode, V2HImode, V1SImode, V1SFmode, + HImode, +- QImode ++ V2QImode, V1HImode, ++ QImode, ++ V1QImode + }; + + struct constant +@@ -7490,6 +9136,23 @@ s390_output_pool_entry (rtx exp, enum ma + mark_symbol_refs_as_used (exp); + break; + ++ case MODE_VECTOR_INT: ++ case MODE_VECTOR_FLOAT: ++ { ++ int i; ++ enum machine_mode inner_mode; ++ gcc_assert (GET_CODE (exp) == CONST_VECTOR); ++ ++ inner_mode = GET_MODE_INNER (GET_MODE (exp)); ++ for (i = 0; i < XVECLEN (exp, 0); i++) ++ s390_output_pool_entry (XVECEXP (exp, 0, i), ++ inner_mode, ++ i == 0 ++ ? align ++ : GET_MODE_BITSIZE (inner_mode)); ++ } ++ break; ++ + default: + gcc_unreachable (); + } +@@ -8205,9 +9868,25 @@ s390_update_frame_layout (void) + bool + s390_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode) + { ++ if (!TARGET_VX && VECTOR_NOFP_REGNO_P (regno)) ++ return false; ++ + switch (REGNO_REG_CLASS (regno)) + { ++ case VEC_REGS: ++ return ((GET_MODE_CLASS (mode) == MODE_INT ++ && s390_class_max_nregs (VEC_REGS, mode) == 1) ++ || mode == DFmode ++ || s390_vector_mode_supported_p (mode)); ++ break; + case FP_REGS: ++ if (TARGET_VX ++ && ((GET_MODE_CLASS (mode) == MODE_INT ++ && s390_class_max_nregs (FP_REGS, mode) == 1) ++ || mode == DFmode ++ || s390_vector_mode_supported_p (mode))) ++ return true; ++ + if (REGNO_PAIR_OK (regno, mode)) + { + if (mode == SImode || mode == DImode) +@@ -8269,19 +9948,86 @@ s390_hard_regno_rename_ok (unsigned int + int + s390_class_max_nregs (enum reg_class rclass, enum machine_mode mode) + { ++ int reg_size; ++ bool reg_pair_required_p = false; ++ + switch (rclass) + { + case FP_REGS: ++ case VEC_REGS: ++ reg_size = TARGET_VX ? 16 : 8; ++ ++ /* TF and TD modes would fit into a VR but we put them into a ++ register pair since we do not have 128bit FP instructions on ++ full VRs. */ ++ if (TARGET_VX ++ && SCALAR_FLOAT_MODE_P (mode) ++ && GET_MODE_SIZE (mode) >= 16) ++ reg_pair_required_p = true; ++ ++ /* Even if complex types would fit into a single FPR/VR we force ++ them into a register pair to deal with the parts more easily. ++ (FIXME: What about complex ints?) */ + if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT) +- return 2 * ((GET_MODE_SIZE (mode) / 2 + 8 - 1) / 8); +- else +- return (GET_MODE_SIZE (mode) + 8 - 1) / 8; ++ reg_pair_required_p = true; ++ break; + case ACCESS_REGS: +- return (GET_MODE_SIZE (mode) + 4 - 1) / 4; ++ reg_size = 4; ++ break; + default: ++ reg_size = UNITS_PER_WORD; + break; + } +- return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD; ++ ++ if (reg_pair_required_p) ++ return 2 * ((GET_MODE_SIZE (mode) / 2 + reg_size - 1) / reg_size); ++ ++ return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size; ++} ++ ++/* Return TRUE if changing mode from FROM to TO should not be allowed ++ for register class CLASS. */ ++ ++int ++s390_cannot_change_mode_class (enum machine_mode from_mode, ++ enum machine_mode to_mode, ++ enum reg_class rclass) ++{ ++ enum machine_mode small_mode; ++ enum machine_mode big_mode; ++ ++ if (GET_MODE_SIZE (from_mode) == GET_MODE_SIZE (to_mode)) ++ return 0; ++ ++ if (GET_MODE_SIZE (from_mode) < GET_MODE_SIZE (to_mode)) ++ { ++ small_mode = from_mode; ++ big_mode = to_mode; ++ } ++ else ++ { ++ small_mode = to_mode; ++ big_mode = from_mode; ++ } ++ ++ /* Values residing in VRs are little-endian style. All modes are ++ placed left-aligned in an VR. This means that we cannot allow ++ switching between modes with differing sizes. Also if the vector ++ facility is available we still place TFmode values in VR register ++ pairs, since the only instructions we have operating on TFmodes ++ only deal with register pairs. Therefore we have to allow DFmode ++ subregs of TFmodes to enable the TFmode splitters. */ ++ if (reg_classes_intersect_p (VEC_REGS, rclass) ++ && (GET_MODE_SIZE (small_mode) < 8 ++ || s390_class_max_nregs (VEC_REGS, big_mode) == 1)) ++ return 1; ++ ++ /* Likewise for access registers, since they have only half the ++ word size on 64-bit. */ ++ if (reg_classes_intersect_p (ACCESS_REGS, rclass)) ++ return 1; ++ ++ return 0; + } + + /* Return true if register FROM can be eliminated via register TO. */ +@@ -9112,6 +10858,23 @@ s390_emit_epilogue (bool sibcall) + } + + ++/* The VX ABI differs for vararg functions. Therefore we need the ++ prototype of the callee to be available when passing vector type ++ values. */ ++static const char * ++s390_invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val) ++{ ++ return ((TARGET_VX_ABI ++ && typelist == 0 ++ && TREE_CODE (TREE_TYPE (val)) == VECTOR_TYPE ++ && (funcdecl == NULL_TREE ++ || (TREE_CODE (funcdecl) == FUNCTION_DECL ++ && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD))) ++ ? N_("Vector argument passed to unprototyped function") ++ : NULL); ++} ++ ++ + /* Return the size in bytes of a function argument of + type TYPE and/or mode MODE. At least one of TYPE or + MODE must be specified. */ +@@ -9126,8 +10889,57 @@ s390_function_arg_size (enum machine_mod + if (mode != BLKmode) + return GET_MODE_SIZE (mode); + +- /* If we have neither type nor mode, abort */ +- gcc_unreachable (); ++ /* If we have neither type nor mode, abort */ ++ gcc_unreachable (); ++} ++ ++/* Return true if a function argument of type TYPE and mode MODE ++ is to be passed in a vector register, if available. */ ++ ++bool ++s390_function_arg_vector (enum machine_mode mode, const_tree type) ++{ ++ if (!TARGET_VX_ABI) ++ return false; ++ ++ if (s390_function_arg_size (mode, type) > 16) ++ return false; ++ ++ /* No type info available for some library calls ... */ ++ if (!type) ++ return VECTOR_MODE_P (mode); ++ ++ /* The ABI says that record types with a single member are treated ++ just like that member would be. */ ++ while (TREE_CODE (type) == RECORD_TYPE) ++ { ++ tree field, single = NULL_TREE; ++ ++ for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) ++ { ++ if (TREE_CODE (field) != FIELD_DECL) ++ continue; ++ ++ if (single == NULL_TREE) ++ single = TREE_TYPE (field); ++ else ++ return false; ++ } ++ ++ if (single == NULL_TREE) ++ return false; ++ else ++ { ++ /* If the field declaration adds extra byte due to ++ e.g. padding this is not accepted as vector type. */ ++ if (int_size_in_bytes (single) <= 0 ++ || int_size_in_bytes (single) != int_size_in_bytes (type)) ++ return false; ++ type = single; ++ } ++ } ++ ++ return TREE_CODE (type) == VECTOR_TYPE; + } + + /* Return true if a function argument of type TYPE and mode MODE +@@ -9136,8 +10948,7 @@ s390_function_arg_size (enum machine_mod + static bool + s390_function_arg_float (enum machine_mode mode, const_tree type) + { +- int size = s390_function_arg_size (mode, type); +- if (size > 8) ++ if (s390_function_arg_size (mode, type) > 8) + return false; + + /* Soft-float changes the ABI: no floating-point registers are used. */ +@@ -9220,20 +11031,24 @@ s390_pass_by_reference (cumulative_args_ + bool named ATTRIBUTE_UNUSED) + { + int size = s390_function_arg_size (mode, type); ++ ++ if (s390_function_arg_vector (mode, type)) ++ return false; ++ + if (size > 8) + return true; + + if (type) + { + if (AGGREGATE_TYPE_P (type) && exact_log2 (size) < 0) +- return 1; ++ return true; + + if (TREE_CODE (type) == COMPLEX_TYPE + || TREE_CODE (type) == VECTOR_TYPE) +- return 1; ++ return true; + } + +- return 0; ++ return false; + } + + /* Update the data in CUM to advance over an argument of mode MODE and +@@ -9244,11 +11059,21 @@ s390_pass_by_reference (cumulative_args_ + + static void + s390_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode, +- const_tree type, bool named ATTRIBUTE_UNUSED) ++ const_tree type, bool named) + { + CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); + +- if (s390_function_arg_float (mode, type)) ++ if (s390_function_arg_vector (mode, type)) ++ { ++ /* We are called for unnamed vector stdarg arguments which are ++ passed on the stack. In this case this hook does not have to ++ do anything since stack arguments are tracked by common ++ code. */ ++ if (!named) ++ return; ++ cum->vrs += 1; ++ } ++ else if (s390_function_arg_float (mode, type)) + { + cum->fprs += 1; + } +@@ -9282,14 +11107,26 @@ s390_function_arg_advance (cumulative_ar + + static rtx + s390_function_arg (cumulative_args_t cum_v, enum machine_mode mode, +- const_tree type, bool named ATTRIBUTE_UNUSED) ++ const_tree type, bool named) + { + CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); + +- if (s390_function_arg_float (mode, type)) ++ if (!named) ++ s390_check_type_for_vector_abi (type, true, false); ++ ++ if (s390_function_arg_vector (mode, type)) ++ { ++ /* Vector arguments being part of the ellipsis are passed on the ++ stack. */ ++ if (!named || (cum->vrs + 1 > VEC_ARG_NUM_REG)) ++ return NULL_RTX; ++ ++ return gen_rtx_REG (mode, cum->vrs + FIRST_VEC_ARG_REGNO); ++ } ++ else if (s390_function_arg_float (mode, type)) + { + if (cum->fprs + 1 > FP_ARG_NUM_REG) +- return 0; ++ return NULL_RTX; + else + return gen_rtx_REG (mode, cum->fprs + 16); + } +@@ -9299,7 +11136,7 @@ s390_function_arg (cumulative_args_t cum + int n_gprs = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG; + + if (cum->gprs + n_gprs > GP_ARG_NUM_REG) +- return 0; ++ return NULL_RTX; + else if (n_gprs == 1 || UNITS_PER_WORD == UNITS_PER_LONG) + return gen_rtx_REG (mode, cum->gprs + 2); + else if (n_gprs == 2) +@@ -9342,6 +11179,12 @@ s390_return_in_memory (const_tree type, + || TREE_CODE (type) == REAL_TYPE) + return int_size_in_bytes (type) > 8; + ++ /* vector types which fit into a VR. */ ++ if (TARGET_VX_ABI ++ && TREE_CODE (type) == VECTOR_TYPE ++ && int_size_in_bytes (type) <= 16) ++ return false; ++ + /* Aggregates and similar constructs are always returned + in memory. */ + if (AGGREGATE_TYPE_P (type) +@@ -9384,6 +11227,12 @@ s390_function_and_libcall_value (enum ma + const_tree fntype_or_decl, + bool outgoing ATTRIBUTE_UNUSED) + { ++ /* For vector return types it is important to use the RET_TYPE ++ argument whenever available since the middle-end might have ++ changed the mode to a scalar mode. */ ++ bool vector_ret_type_p = ((ret_type && TREE_CODE (ret_type) == VECTOR_TYPE) ++ || (!ret_type && VECTOR_MODE_P (mode))); ++ + /* For normal functions perform the promotion as + promote_function_mode would do. */ + if (ret_type) +@@ -9393,10 +11242,14 @@ s390_function_and_libcall_value (enum ma + fntype_or_decl, 1); + } + +- gcc_assert (GET_MODE_CLASS (mode) == MODE_INT || SCALAR_FLOAT_MODE_P (mode)); +- gcc_assert (GET_MODE_SIZE (mode) <= 8); +- +- if (TARGET_HARD_FLOAT && SCALAR_FLOAT_MODE_P (mode)) ++ gcc_assert (GET_MODE_CLASS (mode) == MODE_INT ++ || SCALAR_FLOAT_MODE_P (mode) ++ || (TARGET_VX_ABI && vector_ret_type_p)); ++ gcc_assert (GET_MODE_SIZE (mode) <= (TARGET_VX_ABI ? 16 : 8)); ++ ++ if (TARGET_VX_ABI && vector_ret_type_p) ++ return gen_rtx_REG (mode, FIRST_VEC_ARG_REGNO); ++ else if (TARGET_HARD_FLOAT && SCALAR_FLOAT_MODE_P (mode)) + return gen_rtx_REG (mode, 16); + else if (GET_MODE_SIZE (mode) <= UNITS_PER_LONG + || UNITS_PER_LONG == UNITS_PER_WORD) +@@ -9560,9 +11413,13 @@ s390_va_start (tree valist, rtx nextarg + expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); + } + +- /* Find the overflow area. */ ++ /* Find the overflow area. ++ FIXME: This currently is too pessimistic when the vector ABI is ++ enabled. In that case we *always* set up the overflow area ++ pointer. */ + if (n_gpr + cfun->va_list_gpr_size > GP_ARG_NUM_REG +- || n_fpr + cfun->va_list_fpr_size > FP_ARG_NUM_REG) ++ || n_fpr + cfun->va_list_fpr_size > FP_ARG_NUM_REG ++ || TARGET_VX_ABI) + { + t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx); + +@@ -9604,6 +11461,9 @@ s390_va_start (tree valist, rtx nextarg + ret = args.reg_save_area[args.gpr+8] + else + ret = *args.overflow_arg_area++; ++ } else if (vector value) { ++ ret = *args.overflow_arg_area; ++ args.overflow_arg_area += size / 8; + } else if (float value) { + if (args.fgpr < 2) + ret = args.reg_save_area[args.fpr+64] +@@ -9623,7 +11483,10 @@ s390_gimplify_va_arg (tree valist, tree + tree f_gpr, f_fpr, f_ovf, f_sav; + tree gpr, fpr, ovf, sav, reg, t, u; + int indirect_p, size, n_reg, sav_ofs, sav_scale, max_reg; +- tree lab_false, lab_over, addr; ++ tree lab_false, lab_over; ++ tree addr = create_tmp_var (ptr_type_node, "addr"); ++ bool left_align_p; /* How a value < UNITS_PER_LONG is aligned within ++ a stack slot. */ + + f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node)); + f_fpr = DECL_CHAIN (f_gpr); +@@ -9642,6 +11505,8 @@ s390_gimplify_va_arg (tree valist, tree + + size = int_size_in_bytes (type); + ++ s390_check_type_for_vector_abi (type, true, false); ++ + if (pass_by_reference (NULL, TYPE_MODE (type), type, false)) + { + if (TARGET_DEBUG_ARG) +@@ -9662,6 +11527,23 @@ s390_gimplify_va_arg (tree valist, tree + sav_scale = UNITS_PER_LONG; + size = UNITS_PER_LONG; + max_reg = GP_ARG_NUM_REG - n_reg; ++ left_align_p = false; ++ } ++ else if (s390_function_arg_vector (TYPE_MODE (type), type)) ++ { ++ if (TARGET_DEBUG_ARG) ++ { ++ fprintf (stderr, "va_arg: vector type"); ++ debug_tree (type); ++ } ++ ++ indirect_p = 0; ++ reg = NULL_TREE; ++ n_reg = 0; ++ sav_ofs = 0; ++ sav_scale = 8; ++ max_reg = 0; ++ left_align_p = true; + } + else if (s390_function_arg_float (TYPE_MODE (type), type)) + { +@@ -9678,6 +11560,7 @@ s390_gimplify_va_arg (tree valist, tree + sav_ofs = 16 * UNITS_PER_LONG; + sav_scale = 8; + max_reg = FP_ARG_NUM_REG - n_reg; ++ left_align_p = false; + } + else + { +@@ -9702,53 +11585,74 @@ s390_gimplify_va_arg (tree valist, tree + + sav_scale = UNITS_PER_LONG; + max_reg = GP_ARG_NUM_REG - n_reg; ++ left_align_p = false; + } + + /* Pull the value out of the saved registers ... */ + +- lab_false = create_artificial_label (UNKNOWN_LOCATION); +- lab_over = create_artificial_label (UNKNOWN_LOCATION); +- addr = create_tmp_var (ptr_type_node, "addr"); +- +- t = fold_convert (TREE_TYPE (reg), size_int (max_reg)); +- t = build2 (GT_EXPR, boolean_type_node, reg, t); +- u = build1 (GOTO_EXPR, void_type_node, lab_false); +- t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE); +- gimplify_and_add (t, pre_p); +- +- t = fold_build_pointer_plus_hwi (sav, sav_ofs); +- u = build2 (MULT_EXPR, TREE_TYPE (reg), reg, +- fold_convert (TREE_TYPE (reg), size_int (sav_scale))); +- t = fold_build_pointer_plus (t, u); ++ if (reg != NULL_TREE) ++ { ++ /* ++ if (reg > ((typeof (reg))max_reg)) ++ goto lab_false; + +- gimplify_assign (addr, t, pre_p); ++ addr = sav + sav_ofs + reg * save_scale; ++ ++ goto lab_over; ++ ++ lab_false: ++ */ + +- gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over)); ++ lab_false = create_artificial_label (UNKNOWN_LOCATION); ++ lab_over = create_artificial_label (UNKNOWN_LOCATION); + +- gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false)); ++ t = fold_convert (TREE_TYPE (reg), size_int (max_reg)); ++ t = build2 (GT_EXPR, boolean_type_node, reg, t); ++ u = build1 (GOTO_EXPR, void_type_node, lab_false); ++ t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE); ++ gimplify_and_add (t, pre_p); + ++ t = fold_build_pointer_plus_hwi (sav, sav_ofs); ++ u = build2 (MULT_EXPR, TREE_TYPE (reg), reg, ++ fold_convert (TREE_TYPE (reg), size_int (sav_scale))); ++ t = fold_build_pointer_plus (t, u); ++ ++ gimplify_assign (addr, t, pre_p); ++ ++ gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over)); ++ ++ gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false)); ++ } + + /* ... Otherwise out of the overflow area. */ + + t = ovf; +- if (size < UNITS_PER_LONG) ++ if (size < UNITS_PER_LONG && !left_align_p) + t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG - size); + + gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue); + + gimplify_assign (addr, t, pre_p); + +- t = fold_build_pointer_plus_hwi (t, size); ++ if (size < UNITS_PER_LONG && left_align_p) ++ t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG); ++ else ++ t = fold_build_pointer_plus_hwi (t, size); ++ + gimplify_assign (ovf, t, pre_p); + +- gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over)); ++ if (reg != NULL_TREE) ++ gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over)); + + + /* Increment register save count. */ + +- u = build2 (PREINCREMENT_EXPR, TREE_TYPE (reg), reg, +- fold_convert (TREE_TYPE (reg), size_int (n_reg))); +- gimplify_and_add (u, pre_p); ++ if (n_reg > 0) ++ { ++ u = build2 (PREINCREMENT_EXPR, TREE_TYPE (reg), reg, ++ fold_convert (TREE_TYPE (reg), size_int (n_reg))); ++ gimplify_and_add (u, pre_p); ++ } + + if (indirect_p) + { +@@ -9793,7 +11697,14 @@ s390_expand_tbegin (rtx dest, rtx tdb, r + } + + if (clobber_fprs_p) +- emit_insn (gen_tbegin_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK), tdb)); ++ { ++ if (TARGET_VX) ++ emit_insn (gen_tbegin_1_z13 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK), ++ tdb)); ++ else ++ emit_insn (gen_tbegin_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK), ++ tdb)); ++ } + else + emit_insn (gen_tbegin_nofloat_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK), + tdb)); +@@ -9831,210 +11742,6 @@ s390_expand_tbegin (rtx dest, rtx tdb, r + } + } + +-/* Builtins. */ +- +-enum s390_builtin +-{ +- S390_BUILTIN_TBEGIN, +- S390_BUILTIN_TBEGIN_NOFLOAT, +- S390_BUILTIN_TBEGIN_RETRY, +- S390_BUILTIN_TBEGIN_RETRY_NOFLOAT, +- S390_BUILTIN_TBEGINC, +- S390_BUILTIN_TEND, +- S390_BUILTIN_TABORT, +- S390_BUILTIN_NON_TX_STORE, +- S390_BUILTIN_TX_NESTING_DEPTH, +- S390_BUILTIN_TX_ASSIST, +- +- S390_BUILTIN_max +-}; +- +-static enum insn_code const code_for_builtin[S390_BUILTIN_max] = { +- CODE_FOR_tbegin, +- CODE_FOR_tbegin_nofloat, +- CODE_FOR_tbegin_retry, +- CODE_FOR_tbegin_retry_nofloat, +- CODE_FOR_tbeginc, +- CODE_FOR_tend, +- CODE_FOR_tabort, +- CODE_FOR_ntstg, +- CODE_FOR_etnd, +- CODE_FOR_tx_assist +-}; +- +-static void +-s390_init_builtins (void) +-{ +- tree ftype, uint64_type; +- tree returns_twice_attr = tree_cons (get_identifier ("returns_twice"), +- NULL, NULL); +- tree noreturn_attr = tree_cons (get_identifier ("noreturn"), NULL, NULL); +- +- /* void foo (void) */ +- ftype = build_function_type_list (void_type_node, NULL_TREE); +- add_builtin_function ("__builtin_tbeginc", ftype, S390_BUILTIN_TBEGINC, +- BUILT_IN_MD, NULL, NULL_TREE); +- +- /* void foo (int) */ +- ftype = build_function_type_list (void_type_node, integer_type_node, +- NULL_TREE); +- add_builtin_function ("__builtin_tabort", ftype, +- S390_BUILTIN_TABORT, BUILT_IN_MD, NULL, noreturn_attr); +- add_builtin_function ("__builtin_tx_assist", ftype, +- S390_BUILTIN_TX_ASSIST, BUILT_IN_MD, NULL, NULL_TREE); +- +- /* int foo (void *) */ +- ftype = build_function_type_list (integer_type_node, ptr_type_node, NULL_TREE); +- add_builtin_function ("__builtin_tbegin", ftype, S390_BUILTIN_TBEGIN, +- BUILT_IN_MD, NULL, returns_twice_attr); +- add_builtin_function ("__builtin_tbegin_nofloat", ftype, +- S390_BUILTIN_TBEGIN_NOFLOAT, +- BUILT_IN_MD, NULL, returns_twice_attr); +- +- /* int foo (void *, int) */ +- ftype = build_function_type_list (integer_type_node, ptr_type_node, +- integer_type_node, NULL_TREE); +- add_builtin_function ("__builtin_tbegin_retry", ftype, +- S390_BUILTIN_TBEGIN_RETRY, +- BUILT_IN_MD, +- NULL, returns_twice_attr); +- add_builtin_function ("__builtin_tbegin_retry_nofloat", ftype, +- S390_BUILTIN_TBEGIN_RETRY_NOFLOAT, +- BUILT_IN_MD, +- NULL, returns_twice_attr); +- +- /* int foo (void) */ +- ftype = build_function_type_list (integer_type_node, NULL_TREE); +- add_builtin_function ("__builtin_tx_nesting_depth", ftype, +- S390_BUILTIN_TX_NESTING_DEPTH, +- BUILT_IN_MD, NULL, NULL_TREE); +- add_builtin_function ("__builtin_tend", ftype, +- S390_BUILTIN_TEND, BUILT_IN_MD, NULL, NULL_TREE); +- +- /* void foo (uint64_t *, uint64_t) */ +- if (TARGET_64BIT) +- uint64_type = long_unsigned_type_node; +- else +- uint64_type = long_long_unsigned_type_node; +- +- ftype = build_function_type_list (void_type_node, +- build_pointer_type (uint64_type), +- uint64_type, NULL_TREE); +- add_builtin_function ("__builtin_non_tx_store", ftype, +- S390_BUILTIN_NON_TX_STORE, +- BUILT_IN_MD, NULL, NULL_TREE); +-} +- +-/* Expand an expression EXP that calls a built-in function, +- with result going to TARGET if that's convenient +- (and in mode MODE if that's convenient). +- SUBTARGET may be used as the target for computing one of EXP's operands. +- IGNORE is nonzero if the value is to be ignored. */ +- +-static rtx +-s390_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, +- enum machine_mode mode ATTRIBUTE_UNUSED, +- int ignore ATTRIBUTE_UNUSED) +-{ +-#define MAX_ARGS 2 +- +- tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); +- unsigned int fcode = DECL_FUNCTION_CODE (fndecl); +- enum insn_code icode; +- rtx op[MAX_ARGS], pat; +- int arity; +- bool nonvoid; +- tree arg; +- call_expr_arg_iterator iter; +- +- if (fcode >= S390_BUILTIN_max) +- internal_error ("bad builtin fcode"); +- icode = code_for_builtin[fcode]; +- if (icode == 0) +- internal_error ("bad builtin fcode"); +- +- if (!TARGET_HTM) +- error ("Transactional execution builtins not enabled (-mhtm)\n"); +- +- /* Set a flag in the machine specific cfun part in order to support +- saving/restoring of FPRs. */ +- if (fcode == S390_BUILTIN_TBEGIN || fcode == S390_BUILTIN_TBEGIN_RETRY) +- cfun->machine->tbegin_p = true; +- +- nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node; +- +- arity = 0; +- FOR_EACH_CALL_EXPR_ARG (arg, iter, exp) +- { +- const struct insn_operand_data *insn_op; +- +- if (arg == error_mark_node) +- return NULL_RTX; +- if (arity >= MAX_ARGS) +- return NULL_RTX; +- +- insn_op = &insn_data[icode].operand[arity + nonvoid]; +- +- op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL); +- +- if (!(*insn_op->predicate) (op[arity], insn_op->mode)) +- { +- if (insn_op->predicate == memory_operand) +- { +- /* Don't move a NULL pointer into a register. Otherwise +- we have to rely on combine being able to move it back +- in order to get an immediate 0 in the instruction. */ +- if (op[arity] != const0_rtx) +- op[arity] = copy_to_mode_reg (Pmode, op[arity]); +- op[arity] = gen_rtx_MEM (insn_op->mode, op[arity]); +- } +- else +- op[arity] = copy_to_mode_reg (insn_op->mode, op[arity]); +- } +- +- arity++; +- } +- +- if (nonvoid) +- { +- enum machine_mode tmode = insn_data[icode].operand[0].mode; +- if (!target +- || GET_MODE (target) != tmode +- || !(*insn_data[icode].operand[0].predicate) (target, tmode)) +- target = gen_reg_rtx (tmode); +- } +- +- switch (arity) +- { +- case 0: +- pat = GEN_FCN (icode) (target); +- break; +- case 1: +- if (nonvoid) +- pat = GEN_FCN (icode) (target, op[0]); +- else +- pat = GEN_FCN (icode) (op[0]); +- break; +- case 2: +- if (nonvoid) +- pat = GEN_FCN (icode) (target, op[0], op[1]); +- else +- pat = GEN_FCN (icode) (op[0], op[1]); +- break; +- default: +- gcc_unreachable (); +- } +- if (!pat) +- return NULL_RTX; +- emit_insn (pat); +- +- if (nonvoid) +- return target; +- else +- return const0_rtx; +-} +- +- + /* Output assembly code for the trampoline template to + stdio stream FILE. + +@@ -10496,15 +12203,18 @@ s390_call_saved_register_used (tree call + mode = TYPE_MODE (type); + gcc_assert (mode); + ++ /* We assume that in the target function all parameters are ++ named. This only has an impact on vector argument register ++ usage none of which is call-saved. */ + if (pass_by_reference (&cum_v, mode, type, true)) + { + mode = Pmode; + type = build_pointer_type (type); + } + +- parm_rtx = s390_function_arg (cum, mode, type, 0); ++ parm_rtx = s390_function_arg (cum, mode, type, true); + +- s390_function_arg_advance (cum, mode, type, 0); ++ s390_function_arg_advance (cum, mode, type, true); + + if (!parm_rtx) + continue; +@@ -10711,6 +12421,13 @@ s390_conditional_register_usage (void) + for (i = 16; i < 32; i++) + call_used_regs[i] = fixed_regs[i] = 1; + } ++ ++ /* Disable v16 - v31 for non-vector target. */ ++ if (!TARGET_VX) ++ { ++ for (i = VR16_REGNUM; i <= VR31_REGNUM; i++) ++ fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1; ++ } + } + + /* Corresponding function to eh_return expander. */ +@@ -11232,7 +12949,8 @@ s390_reorg (void) + /* Walk over the insns and do some >=z10 specific changes. */ + if (s390_tune == PROCESSOR_2097_Z10 + || s390_tune == PROCESSOR_2817_Z196 +- || s390_tune == PROCESSOR_2827_ZEC12) ++ || s390_tune == PROCESSOR_2827_ZEC12 ++ || s390_tune == PROCESSOR_2964_Z13) + { + rtx insn; + bool insn_added_p = false; +@@ -11392,27 +13110,66 @@ s390_z10_prevent_earlyload_conflicts (rt + + static int s390_sched_state; + +-#define S390_OOO_SCHED_STATE_NORMAL 3 +-#define S390_OOO_SCHED_STATE_CRACKED 4 ++#define S390_SCHED_STATE_NORMAL 3 ++#define S390_SCHED_STATE_CRACKED 4 + +-#define S390_OOO_SCHED_ATTR_MASK_CRACKED 0x1 +-#define S390_OOO_SCHED_ATTR_MASK_EXPANDED 0x2 +-#define S390_OOO_SCHED_ATTR_MASK_ENDGROUP 0x4 +-#define S390_OOO_SCHED_ATTR_MASK_GROUPALONE 0x8 ++#define S390_SCHED_ATTR_MASK_CRACKED 0x1 ++#define S390_SCHED_ATTR_MASK_EXPANDED 0x2 ++#define S390_SCHED_ATTR_MASK_ENDGROUP 0x4 ++#define S390_SCHED_ATTR_MASK_GROUPALONE 0x8 + + static unsigned int + s390_get_sched_attrmask (rtx insn) + { + unsigned int mask = 0; + +- if (get_attr_ooo_cracked (insn)) +- mask |= S390_OOO_SCHED_ATTR_MASK_CRACKED; +- if (get_attr_ooo_expanded (insn)) +- mask |= S390_OOO_SCHED_ATTR_MASK_EXPANDED; +- if (get_attr_ooo_endgroup (insn)) +- mask |= S390_OOO_SCHED_ATTR_MASK_ENDGROUP; +- if (get_attr_ooo_groupalone (insn)) +- mask |= S390_OOO_SCHED_ATTR_MASK_GROUPALONE; ++ switch (s390_tune) ++ { ++ case PROCESSOR_2827_ZEC12: ++ if (get_attr_zEC12_cracked (insn)) ++ mask |= S390_SCHED_ATTR_MASK_CRACKED; ++ if (get_attr_zEC12_expanded (insn)) ++ mask |= S390_SCHED_ATTR_MASK_EXPANDED; ++ if (get_attr_zEC12_endgroup (insn)) ++ mask |= S390_SCHED_ATTR_MASK_ENDGROUP; ++ if (get_attr_zEC12_groupalone (insn)) ++ mask |= S390_SCHED_ATTR_MASK_GROUPALONE; ++ break; ++ case PROCESSOR_2964_Z13: ++ if (get_attr_z13_cracked (insn)) ++ mask |= S390_SCHED_ATTR_MASK_CRACKED; ++ if (get_attr_z13_expanded (insn)) ++ mask |= S390_SCHED_ATTR_MASK_EXPANDED; ++ if (get_attr_z13_endgroup (insn)) ++ mask |= S390_SCHED_ATTR_MASK_ENDGROUP; ++ if (get_attr_z13_groupalone (insn)) ++ mask |= S390_SCHED_ATTR_MASK_GROUPALONE; ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ return mask; ++} ++ ++static unsigned int ++s390_get_unit_mask (rtx insn, int *units) ++{ ++ unsigned int mask = 0; ++ ++ switch (s390_tune) ++ { ++ case PROCESSOR_2964_Z13: ++ *units = 3; ++ if (get_attr_z13_unit_lsu (insn)) ++ mask |= 1 << 0; ++ if (get_attr_z13_unit_fxu (insn)) ++ mask |= 1 << 1; ++ if (get_attr_z13_unit_vfu (insn)) ++ mask |= 1 << 2; ++ break; ++ default: ++ gcc_unreachable (); ++ } + return mask; + } + +@@ -11430,48 +13187,66 @@ s390_sched_score (rtx insn) + case 0: + /* Try to put insns into the first slot which would otherwise + break a group. */ +- if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) != 0 +- || (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) != 0) ++ if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0 ++ || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0) + score += 5; +- if ((mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) != 0) ++ if ((mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0) + score += 10; + case 1: + /* Prefer not cracked insns while trying to put together a + group. */ +- if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) == 0 +- && (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) == 0 +- && (mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) == 0) ++ if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0 ++ && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0 ++ && (mask & S390_SCHED_ATTR_MASK_GROUPALONE) == 0) + score += 10; +- if ((mask & S390_OOO_SCHED_ATTR_MASK_ENDGROUP) == 0) ++ if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) == 0) + score += 5; + break; + case 2: + /* Prefer not cracked insns while trying to put together a + group. */ +- if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) == 0 +- && (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) == 0 +- && (mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) == 0) ++ if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0 ++ && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0 ++ && (mask & S390_SCHED_ATTR_MASK_GROUPALONE) == 0) + score += 10; + /* Prefer endgroup insns in the last slot. */ +- if ((mask & S390_OOO_SCHED_ATTR_MASK_ENDGROUP) != 0) ++ if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) != 0) + score += 10; + break; +- case S390_OOO_SCHED_STATE_NORMAL: ++ case S390_SCHED_STATE_NORMAL: + /* Prefer not cracked insns if the last was not cracked. */ +- if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) == 0 +- && (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) == 0) ++ if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0 ++ && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0) + score += 5; +- if ((mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) != 0) ++ if ((mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0) + score += 10; + break; +- case S390_OOO_SCHED_STATE_CRACKED: ++ case S390_SCHED_STATE_CRACKED: + /* Try to keep cracked insns together to prevent them from + interrupting groups. */ +- if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) != 0 +- || (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) != 0) ++ if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0 ++ || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0) + score += 5; + break; + } ++ ++ if (s390_tune == PROCESSOR_2964_Z13) ++ { ++ int units, i; ++ unsigned unit_mask, m = 1; ++ ++ unit_mask = s390_get_unit_mask (insn, &units); ++ gcc_assert (units <= MAX_SCHED_UNITS); ++ ++ /* Add a score in range 0..MAX_SCHED_MIX_SCORE depending on how long ++ ago the last insn of this unit type got scheduled. This is ++ supposed to help providing a proper instruction mix to the ++ CPU. */ ++ for (i = 0; i < units; i++, m <<= 1) ++ if (m & unit_mask) ++ score += (last_scheduled_unit_distance[i] * MAX_SCHED_MIX_SCORE / ++ MAX_SCHED_MIX_DISTANCE); ++ } + return score; + } + +@@ -11487,7 +13262,8 @@ s390_sched_reorder (FILE *file, int verb + if (reload_completed && *nreadyp > 1) + s390_z10_prevent_earlyload_conflicts (ready, nreadyp); + +- if (s390_tune == PROCESSOR_2827_ZEC12 ++ if ((s390_tune == PROCESSOR_2827_ZEC12 ++ || s390_tune == PROCESSOR_2964_Z13) + && reload_completed + && *nreadyp > 1) + { +@@ -11526,12 +13302,12 @@ s390_sched_reorder (FILE *file, int verb + + if (verbose > 5) + fprintf (file, +- "move insn %d to the top of list\n", ++ ";;\t\tBACKEND: move insn %d to the top of list\n", + INSN_UID (ready[last_index])); + } + else if (verbose > 5) + fprintf (file, +- "best insn %d already on top\n", ++ ";;\t\tBACKEND: best insn %d already on top\n", + INSN_UID (ready[last_index])); + } + +@@ -11542,16 +13318,35 @@ s390_sched_reorder (FILE *file, int verb + + for (i = last_index; i >= 0; i--) + { +- if (recog_memoized (ready[i]) < 0) ++ unsigned int sched_mask; ++ rtx insn = ready[i]; ++ ++ if (recog_memoized (insn) < 0) + continue; +- fprintf (file, "insn %d score: %d: ", INSN_UID (ready[i]), +- s390_sched_score (ready[i])); +-#define PRINT_OOO_ATTR(ATTR) fprintf (file, "%s ", get_attr_##ATTR (ready[i]) ? #ATTR : "!" #ATTR); +- PRINT_OOO_ATTR (ooo_cracked); +- PRINT_OOO_ATTR (ooo_expanded); +- PRINT_OOO_ATTR (ooo_endgroup); +- PRINT_OOO_ATTR (ooo_groupalone); +-#undef PRINT_OOO_ATTR ++ ++ sched_mask = s390_get_sched_attrmask (insn); ++ fprintf (file, ";;\t\tBACKEND: insn %d score: %d: ", ++ INSN_UID (insn), ++ s390_sched_score (insn)); ++#define PRINT_SCHED_ATTR(M, ATTR) fprintf (file, "%s ",\ ++ ((M) & sched_mask) ? #ATTR : ""); ++ PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_CRACKED, cracked); ++ PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_EXPANDED, expanded); ++ PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_ENDGROUP, endgroup); ++ PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_GROUPALONE, groupalone); ++#undef PRINT_SCHED_ATTR ++ if (s390_tune == PROCESSOR_2964_Z13) ++ { ++ unsigned int unit_mask, m = 1; ++ int units, j; ++ ++ unit_mask = s390_get_unit_mask (insn, &units); ++ fprintf (file, "(units:"); ++ for (j = 0; j < units; j++, m <<= 1) ++ if (m & unit_mask) ++ fprintf (file, " u%d", j); ++ fprintf (file, ")"); ++ } + fprintf (file, "\n"); + } + } +@@ -11570,18 +13365,19 @@ s390_sched_variable_issue (FILE *file, i + { + last_scheduled_insn = insn; + +- if (s390_tune == PROCESSOR_2827_ZEC12 ++ if ((s390_tune == PROCESSOR_2827_ZEC12 ++ || s390_tune == PROCESSOR_2964_Z13) + && reload_completed + && recog_memoized (insn) >= 0) + { + unsigned int mask = s390_get_sched_attrmask (insn); + +- if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) != 0 +- || (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) != 0) +- s390_sched_state = S390_OOO_SCHED_STATE_CRACKED; +- else if ((mask & S390_OOO_SCHED_ATTR_MASK_ENDGROUP) != 0 +- || (mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) != 0) +- s390_sched_state = S390_OOO_SCHED_STATE_NORMAL; ++ if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0 ++ || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0) ++ s390_sched_state = S390_SCHED_STATE_CRACKED; ++ else if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) != 0 ++ || (mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0) ++ s390_sched_state = S390_SCHED_STATE_NORMAL; + else + { + /* Only normal insns are left (mask == 0). */ +@@ -11590,30 +13386,73 @@ s390_sched_variable_issue (FILE *file, i + case 0: + case 1: + case 2: +- case S390_OOO_SCHED_STATE_NORMAL: +- if (s390_sched_state == S390_OOO_SCHED_STATE_NORMAL) ++ case S390_SCHED_STATE_NORMAL: ++ if (s390_sched_state == S390_SCHED_STATE_NORMAL) + s390_sched_state = 1; + else + s390_sched_state++; + + break; +- case S390_OOO_SCHED_STATE_CRACKED: +- s390_sched_state = S390_OOO_SCHED_STATE_NORMAL; ++ case S390_SCHED_STATE_CRACKED: ++ s390_sched_state = S390_SCHED_STATE_NORMAL; + break; + } + } ++ ++ if (s390_tune == PROCESSOR_2964_Z13) ++ { ++ int units, i; ++ unsigned unit_mask, m = 1; ++ ++ unit_mask = s390_get_unit_mask (insn, &units); ++ gcc_assert (units <= MAX_SCHED_UNITS); ++ ++ for (i = 0; i < units; i++, m <<= 1) ++ if (m & unit_mask) ++ last_scheduled_unit_distance[i] = 0; ++ else if (last_scheduled_unit_distance[i] < MAX_SCHED_MIX_DISTANCE) ++ last_scheduled_unit_distance[i]++; ++ } ++ + if (verbose > 5) + { +- fprintf (file, "insn %d: ", INSN_UID (insn)); +-#define PRINT_OOO_ATTR(ATTR) \ +- fprintf (file, "%s ", get_attr_##ATTR (insn) ? #ATTR : ""); +- PRINT_OOO_ATTR (ooo_cracked); +- PRINT_OOO_ATTR (ooo_expanded); +- PRINT_OOO_ATTR (ooo_endgroup); +- PRINT_OOO_ATTR (ooo_groupalone); +-#undef PRINT_OOO_ATTR +- fprintf (file, "\n"); +- fprintf (file, "sched state: %d\n", s390_sched_state); ++ unsigned int sched_mask; ++ ++ sched_mask = s390_get_sched_attrmask (insn); ++ ++ fprintf (file, ";;\t\tBACKEND: insn %d: ", INSN_UID (insn)); ++#define PRINT_SCHED_ATTR(M, ATTR) fprintf (file, "%s ", ((M) & sched_mask) ? #ATTR : ""); ++ PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_CRACKED, cracked); ++ PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_EXPANDED, expanded); ++ PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_ENDGROUP, endgroup); ++ PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_GROUPALONE, groupalone); ++#undef PRINT_SCHED_ATTR ++ ++ if (s390_tune == PROCESSOR_2964_Z13) ++ { ++ unsigned int unit_mask, m = 1; ++ int units, j; ++ ++ unit_mask = s390_get_unit_mask (insn, &units); ++ fprintf (file, "(units:"); ++ for (j = 0; j < units; j++, m <<= 1) ++ if (m & unit_mask) ++ fprintf (file, " %d", j); ++ fprintf (file, ")"); ++ } ++ fprintf (file, " sched state: %d\n", s390_sched_state); ++ ++ if (s390_tune == PROCESSOR_2964_Z13) ++ { ++ int units, j; ++ ++ s390_get_unit_mask (insn, &units); ++ ++ fprintf (file, ";;\t\tBACKEND: units unused for: "); ++ for (j = 0; j < units; j++) ++ fprintf (file, "%d:%d ", j, last_scheduled_unit_distance[j]); ++ fprintf (file, "\n"); ++ } + } + } + +@@ -11630,6 +13469,7 @@ s390_sched_init (FILE *file ATTRIBUTE_UN + int max_ready ATTRIBUTE_UNUSED) + { + last_scheduled_insn = NULL_RTX; ++ memset (last_scheduled_unit_distance, 0, MAX_SCHED_UNITS * sizeof (int)); + s390_sched_state = 0; + } + +@@ -11663,7 +13503,8 @@ s390_loop_unroll_adjust (unsigned nunrol + + if (s390_tune != PROCESSOR_2097_Z10 + && s390_tune != PROCESSOR_2817_Z196 +- && s390_tune != PROCESSOR_2827_ZEC12) ++ && s390_tune != PROCESSOR_2827_ZEC12 ++ && s390_tune != PROCESSOR_2964_Z13) + return nunroll; + + /* Count the number of memory references within the loop body. */ +@@ -11691,6 +13532,84 @@ s390_loop_unroll_adjust (unsigned nunrol + } + } + ++/* Return the vector mode to be used for inner mode MODE when doing ++ vectorization. */ ++static enum machine_mode ++s390_preferred_simd_mode (enum machine_mode mode) ++{ ++ if (TARGET_VX) ++ switch (mode) ++ { ++ case DFmode: ++ return V2DFmode; ++ case DImode: ++ return V2DImode; ++ case SImode: ++ return V4SImode; ++ case HImode: ++ return V8HImode; ++ case QImode: ++ return V16QImode; ++ default:; ++ } ++ return word_mode; ++} ++ ++/* Our hardware does not require vectors to be strictly aligned. */ ++static bool ++s390_support_vector_misalignment (enum machine_mode mode ATTRIBUTE_UNUSED, ++ const_tree type ATTRIBUTE_UNUSED, ++ int misalignment ATTRIBUTE_UNUSED, ++ bool is_packed ATTRIBUTE_UNUSED) ++{ ++ if (TARGET_VX) ++ return true; ++ ++ return default_builtin_support_vector_misalignment (mode, type, misalignment, ++ is_packed); ++} ++ ++/* The vector ABI requires vector types to be aligned on an 8 byte ++ boundary (our stack alignment). However, we allow this to be ++ overriden by the user, while this definitely breaks the ABI. */ ++static HOST_WIDE_INT ++s390_vector_alignment (const_tree type) ++{ ++ if (!TARGET_VX_ABI) ++ return default_vector_alignment (type); ++ ++ if (TYPE_USER_ALIGN (type)) ++ return TYPE_ALIGN (type); ++ ++ return MIN (64, tree_low_cst (TYPE_SIZE (type), 0)); ++} ++ ++/* Implement TARGET_ASM_FILE_END. */ ++static void ++s390_asm_file_end (void) ++{ ++#ifdef HAVE_AS_GNU_ATTRIBUTE ++ varpool_node *vnode; ++ cgraph_node *cnode; ++ ++ FOR_EACH_VARIABLE (vnode) ++ if (TREE_PUBLIC (vnode->symbol.decl)) ++ s390_check_type_for_vector_abi (TREE_TYPE (vnode->symbol.decl), ++ false, false); ++ ++ FOR_EACH_FUNCTION (cnode) ++ if (TREE_PUBLIC (cnode->symbol.decl)) ++ s390_check_type_for_vector_abi (TREE_TYPE (cnode->symbol.decl), ++ false, false); ++ ++ ++ if (s390_vector_abi != 0) ++ fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n", ++ s390_vector_abi); ++#endif ++ file_end_indicate_exec_stack (); ++} ++ + /* Initialize GCC target structure. */ + + #undef TARGET_ASM_ALIGNED_HI_OP +@@ -11797,6 +13716,8 @@ s390_loop_unroll_adjust (unsigned nunrol + #define TARGET_FUNCTION_VALUE s390_function_value + #undef TARGET_LIBCALL_VALUE + #define TARGET_LIBCALL_VALUE s390_libcall_value ++#undef TARGET_STRICT_ARGUMENT_NAMING ++#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true + + #undef TARGET_FIXED_CONDITION_CODE_REGS + #define TARGET_FIXED_CONDITION_CODE_REGS s390_fixed_condition_code_regs +@@ -11812,6 +13733,9 @@ s390_loop_unroll_adjust (unsigned nunrol + #define TARGET_ASM_OUTPUT_DWARF_DTPREL s390_output_dwarf_dtprel + #endif + ++#undef TARGET_DWARF_FRAME_REG_MODE ++#define TARGET_DWARF_FRAME_REG_MODE s390_dwarf_frame_reg_mode ++ + #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING + #undef TARGET_MANGLE_TYPE + #define TARGET_MANGLE_TYPE s390_mangle_type +@@ -11820,6 +13744,9 @@ s390_loop_unroll_adjust (unsigned nunrol + #undef TARGET_SCALAR_MODE_SUPPORTED_P + #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p + ++#undef TARGET_VECTOR_MODE_SUPPORTED_P ++#define TARGET_VECTOR_MODE_SUPPORTED_P s390_vector_mode_supported_p ++ + #undef TARGET_PREFERRED_RELOAD_CLASS + #define TARGET_PREFERRED_RELOAD_CLASS s390_preferred_reload_class + +@@ -11864,6 +13791,21 @@ s390_loop_unroll_adjust (unsigned nunrol + #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P + #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true + ++#undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN ++#define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN s390_invalid_arg_for_unprototyped_fn ++ ++#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE ++#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE s390_preferred_simd_mode ++ ++#undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT ++#define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT s390_support_vector_misalignment ++ ++#undef TARGET_VECTOR_ALIGNMENT ++#define TARGET_VECTOR_ALIGNMENT s390_vector_alignment ++ ++#undef TARGET_ASM_FILE_END ++#define TARGET_ASM_FILE_END s390_asm_file_end ++ + struct gcc_target targetm = TARGET_INITIALIZER; + + #include "gt-s390.h" +--- gcc/config/s390/s390-c.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/config/s390/s390-c.c 2016-05-11 19:20:42.792826040 +0200 +@@ -0,0 +1,903 @@ ++/* Language specific subroutines used for code generation on IBM S/390 ++ and zSeries ++ Copyright (C) 2015 Free Software Foundation, Inc. ++ ++ Contributed by Andreas Krebbel (Andreas.Krebbel@de.ibm.com). ++ ++ This file is part of GCC. ++ ++ GCC is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ GCC is distributed in the hope that it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++ License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with GCC; see the file COPYING3. If not see ++ . ++ ++ Based on gcc/config/rs6000/rs6000-c.c. ++ ++ In GCC terms this file belongs to the frontend. It will be ++ compiled with -DIN_GCC_FRONTEND. With that rtl.h cannot be ++ included anymore - a mechanism supposed to avoid adding frontend - ++ backend dependencies. */ ++ ++#include "config.h" ++#include "system.h" ++#include "coretypes.h" ++#include "tm.h" ++#include "cpplib.h" ++#include "machmode.h" ++#include "vec.h" ++#include "double-int.h" ++#include "input.h" ++#include "alias.h" ++#include "symtab.h" ++#include "tree.h" ++#include "c-family/c-common.h" ++#include "c-family/c-pragma.h" ++#include "diagnostic-core.h" ++#include "tm_p.h" ++#include "target.h" ++#include "langhooks.h" ++#include "tree-pretty-print.h" ++#include "c/c-tree.h" ++ ++#include "s390-builtins.h" ++ ++static GTY(()) tree __vector_keyword; ++static GTY(()) tree vector_keyword; ++static GTY(()) tree __bool_keyword; ++static GTY(()) tree bool_keyword; ++static GTY(()) tree _Bool_keyword; ++ ++ ++/* Generate an array holding all the descriptions of variants of ++ overloaded builtins defined with OB_DEF_VAR in ++ s390-builtins.def. */ ++static enum s390_builtin_ov_type_index ++type_for_overloaded_builtin_var[S390_OVERLOADED_BUILTIN_VAR_MAX + 1] = ++ { ++#undef B_DEF ++#undef OB_DEF ++#undef OB_DEF_VAR ++#define B_DEF(...) ++#define OB_DEF(...) ++#define OB_DEF_VAR(NAME, PATTERN, FLAGS, FNTYPE) FNTYPE, ++#include "s390-builtins.def" ++ BT_OV_MAX ++ }; ++ ++ ++/* Generate an array indexed by an overloaded builtin index returning ++ the first index in desc_for_overloaded_builtin_var where the ++ variants for the builtin can be found. */ ++static enum s390_overloaded_builtin_vars ++desc_start_for_overloaded_builtin[S390_OVERLOADED_BUILTIN_MAX + 1] = ++ { ++#undef B_DEF ++#undef OB_DEF ++#undef OB_DEF_VAR ++#define B_DEF(...) ++#define OB_DEF(NAME, FIRST_VAR_NAME,...) \ ++ S390_OVERLOADED_BUILTIN_VAR_##FIRST_VAR_NAME, ++#define OB_DEF_VAR(...) ++ #include "s390-builtins.def" ++ S390_OVERLOADED_BUILTIN_VAR_MAX ++ }; ++ ++/* Generate an array indexed by an overloaded builtin index returning ++ the last index in desc_for_overloaded_builtin_var where the ++ variants for the builtin can be found. */ ++static enum s390_overloaded_builtin_vars ++desc_end_for_overloaded_builtin[S390_OVERLOADED_BUILTIN_MAX + 1] = ++ { ++#undef B_DEF ++#undef OB_DEF ++#undef OB_DEF_VAR ++#define B_DEF(...) ++#define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME,...) \ ++ S390_OVERLOADED_BUILTIN_VAR_##LAST_VAR_NAME, ++#define OB_DEF_VAR(...) ++ #include "s390-builtins.def" ++ S390_OVERLOADED_BUILTIN_VAR_MAX ++ }; ++ ++static enum s390_builtin_type_index ++s390_builtin_ov_types[BT_OV_MAX][MAX_OV_OPERANDS] = ++ { ++#undef DEF_TYPE ++#undef DEF_POINTER_TYPE ++#undef DEF_DISTINCT_TYPE ++#undef DEF_VECTOR_TYPE ++#undef DEF_OPAQUE_VECTOR_TYPE ++#undef DEF_FN_TYPE ++#undef DEF_OV_TYPE ++#define DEF_TYPE(...) ++#define DEF_POINTER_TYPE(...) ++#define DEF_DISTINCT_TYPE(...) ++#define DEF_VECTOR_TYPE(...) ++#define DEF_OPAQUE_VECTOR_TYPE(...) ++#define DEF_FN_TYPE(...) ++#define DEF_OV_TYPE(INDEX, args...) { args }, ++#include "s390-builtin-types.def" ++ }; ++ ++static const enum s390_builtins ++bt_for_overloaded_builtin_var[S390_OVERLOADED_BUILTIN_VAR_MAX] = { ++#undef B_DEF ++#undef OB_DEF ++#undef OB_DEF_VAR ++#define B_DEF(...) ++#define OB_DEF(...) ++#define OB_DEF_VAR(NAME, BT, ...) S390_BUILTIN_##BT, ++ ++#include "s390-builtins.def" ++ }; ++ ++/* In addition to calling fold_convert for EXPR of type TYPE, also ++ call c_fully_fold to remove any C_MAYBE_CONST_EXPRs that could be ++ hiding there (PR47197). */ ++tree ++fully_fold_convert (tree type, tree expr) ++{ ++ tree result = fold_convert (type, expr); ++ bool maybe_const = true; ++ ++ if (!c_dialect_cxx ()) ++ result = c_fully_fold (result, false, &maybe_const); ++ ++ return result; ++} ++ ++/* Unify the different variants to the same nodes in order to keep the ++ code working with it simple. */ ++static cpp_hashnode * ++s390_categorize_keyword (const cpp_token *tok) ++{ ++ if (tok->type == CPP_NAME) ++ { ++ cpp_hashnode *ident = tok->val.node.node; ++ ++ if (ident == C_CPP_HASHNODE (vector_keyword)) ++ return C_CPP_HASHNODE (__vector_keyword); ++ ++ if (ident == C_CPP_HASHNODE (bool_keyword)) ++ return C_CPP_HASHNODE (__bool_keyword); ++ ++ if (ident == C_CPP_HASHNODE (_Bool_keyword)) ++ return C_CPP_HASHNODE (__bool_keyword); ++ return ident; ++ } ++ ++ return 0; ++} ++ ++ ++/* Called to decide whether a conditional macro should be expanded. ++ Since we have exactly one such macro (i.e, 'vector'), we do not ++ need to examine the 'tok' parameter. */ ++ ++static cpp_hashnode * ++s390_macro_to_expand (cpp_reader *pfile, const cpp_token *tok) ++{ ++ cpp_hashnode *expand_this = tok->val.node.node; ++ cpp_hashnode *ident; ++ static bool expand_bool_p = false; ++ int idx = 0; ++ enum rid rid_code; ++ ++ /* The vector keyword is only expanded if the machine actually ++ provides hardware support. */ ++ if (!TARGET_ZVECTOR) ++ return NULL; ++ ++ ident = s390_categorize_keyword (tok); ++ ++ /* Triggered when we picked a different variant in ++ s390_categorize_keyword. */ ++ if (ident != expand_this) ++ expand_this = NULL; ++ ++ /* The vector keyword has been found already and we remembered to ++ expand the next bool. */ ++ if (expand_bool_p && ident == C_CPP_HASHNODE (__bool_keyword)) ++ { ++ expand_bool_p = false; ++ return ident; ++ } ++ ++ if (ident != C_CPP_HASHNODE (__vector_keyword)) ++ return expand_this; ++ ++ do ++ tok = cpp_peek_token (pfile, idx++); ++ while (tok->type == CPP_PADDING); ++ ident = s390_categorize_keyword (tok); ++ ++ if (!ident) ++ return expand_this; ++ ++ /* vector bool - remember to expand the next bool. */ ++ if (ident == C_CPP_HASHNODE (__bool_keyword)) ++ { ++ expand_bool_p = true; ++ return C_CPP_HASHNODE (__vector_keyword); ++ } ++ ++ /* The boost libraries have code with Iterator::vector vector in it. ++ If we allow the normal handling, this module will be called ++ recursively, and the vector will be skipped.; */ ++ if (ident == C_CPP_HASHNODE (__vector_keyword)) ++ return expand_this; ++ ++ rid_code = (enum rid)(ident->rid_code); ++ ++ if (ident->type == NT_MACRO) ++ { ++ /* Now actually fetch the tokens we "peeked" before and do a ++ lookahead for the next. */ ++ do ++ (void) cpp_get_token (pfile); ++ while (--idx > 0); ++ do ++ tok = cpp_peek_token (pfile, idx++); ++ while (tok->type == CPP_PADDING); ++ ident = s390_categorize_keyword (tok); ++ ++ if (ident == C_CPP_HASHNODE (__bool_keyword)) ++ { ++ expand_bool_p = true; ++ return C_CPP_HASHNODE (__vector_keyword); ++ } ++ else if (ident) ++ rid_code = (enum rid)(ident->rid_code); ++ } ++ ++ /* vector keyword followed by type identifier: vector unsigned, ++ vector long, ... ++ Types consisting of more than one identifier are not supported by ++ zvector e.g. long long, long double, unsigned long int. */ ++ if (rid_code == RID_UNSIGNED || rid_code == RID_LONG ++ || rid_code == RID_SHORT || rid_code == RID_SIGNED ++ || rid_code == RID_INT || rid_code == RID_CHAR ++ || rid_code == RID_DOUBLE) ++ { ++ expand_this = C_CPP_HASHNODE (__vector_keyword); ++ /* If the next keyword is bool, it will need to be expanded as ++ well. */ ++ do ++ tok = cpp_peek_token (pfile, idx++); ++ while (tok->type == CPP_PADDING); ++ ident = s390_categorize_keyword (tok); ++ ++ /* __vector long __bool a; */ ++ if (ident == C_CPP_HASHNODE (__bool_keyword)) ++ expand_bool_p = true; ++ else ++ { ++ /* Triggered with: __vector long long __bool a; */ ++ do ++ tok = cpp_peek_token (pfile, idx++); ++ while (tok->type == CPP_PADDING); ++ ident = s390_categorize_keyword (tok); ++ ++ if (ident == C_CPP_HASHNODE (__bool_keyword)) ++ expand_bool_p = true; ++ } ++ } ++ ++ return expand_this; ++} ++ ++/* Define platform dependent macros. */ ++void ++s390_cpu_cpp_builtins (cpp_reader *pfile) ++{ ++ cpp_assert (pfile, "cpu=s390"); ++ cpp_assert (pfile, "machine=s390"); ++ cpp_define (pfile, "__s390__"); ++ if (TARGET_ZARCH) ++ cpp_define (pfile, "__zarch__"); ++ if (TARGET_64BIT) ++ cpp_define (pfile, "__s390x__"); ++ if (TARGET_LONG_DOUBLE_128) ++ cpp_define (pfile, "__LONG_DOUBLE_128__"); ++ if (TARGET_HTM) ++ cpp_define (pfile, "__HTM__"); ++ if (TARGET_ZVECTOR) ++ { ++ cpp_define (pfile, "__VEC__=10301"); ++ cpp_define (pfile, "__vector=__attribute__((vector_size(16)))"); ++ cpp_define (pfile, "__bool=__attribute__((s390_vector_bool)) unsigned"); ++ ++ if (!flag_iso) ++ { ++ cpp_define (pfile, "__VECTOR_KEYWORD_SUPPORTED__"); ++ cpp_define (pfile, "vector=vector"); ++ cpp_define (pfile, "bool=bool"); ++ ++ __vector_keyword = get_identifier ("__vector"); ++ C_CPP_HASHNODE (__vector_keyword)->flags |= NODE_CONDITIONAL; ++ ++ vector_keyword = get_identifier ("vector"); ++ C_CPP_HASHNODE (vector_keyword)->flags |= NODE_CONDITIONAL; ++ ++ __bool_keyword = get_identifier ("__bool"); ++ C_CPP_HASHNODE (__bool_keyword)->flags |= NODE_CONDITIONAL; ++ ++ bool_keyword = get_identifier ("bool"); ++ C_CPP_HASHNODE (bool_keyword)->flags |= NODE_CONDITIONAL; ++ ++ _Bool_keyword = get_identifier ("_Bool"); ++ C_CPP_HASHNODE (_Bool_keyword)->flags |= NODE_CONDITIONAL; ++ ++ /* Enable context-sensitive macros. */ ++ cpp_get_callbacks (pfile)->macro_to_expand = s390_macro_to_expand; ++ } ++ } ++} ++ ++/* Expand builtins which can directly be mapped to tree expressions. ++ LOC - location information ++ FCODE - function code of the builtin ++ ARGLIST - value supposed to be passed as arguments ++ RETURN-TYPE - expected return type of the builtin */ ++static tree ++s390_expand_overloaded_builtin (location_t loc, ++ unsigned fcode, ++ vec *arglist, ++ tree return_type) ++{ ++ switch (fcode) ++ { ++ case S390_OVERLOADED_BUILTIN_s390_vec_step: ++ if (TREE_CODE (TREE_TYPE ((*arglist)[0])) != VECTOR_TYPE) ++ { ++ error_at (loc, "Builtin vec_step can only be used on vector types."); ++ return error_mark_node; ++ } ++ return build_int_cst (NULL_TREE, ++ TYPE_VECTOR_SUBPARTS (TREE_TYPE ((*arglist)[0]))); ++ case S390_OVERLOADED_BUILTIN_s390_vec_xld2: ++ case S390_OVERLOADED_BUILTIN_s390_vec_xlw4: ++ return build2 (MEM_REF, return_type, ++ fold_build_pointer_plus ((*arglist)[1], (*arglist)[0]), ++ build_int_cst (TREE_TYPE ((*arglist)[1]), 0)); ++ case S390_OVERLOADED_BUILTIN_s390_vec_xstd2: ++ case S390_OVERLOADED_BUILTIN_s390_vec_xstw4: ++ return build2 (MODIFY_EXPR, TREE_TYPE((*arglist)[0]), ++ build1 (INDIRECT_REF, TREE_TYPE((*arglist)[0]), ++ fold_build_pointer_plus ((*arglist)[2], (*arglist)[1])), ++ (*arglist)[0]); ++ case S390_OVERLOADED_BUILTIN_s390_vec_load_pair: ++ { ++ vec *v; ++ constructor_elt elt1 = { NULL_TREE , (*arglist)[0] }; ++ constructor_elt elt2 = { NULL_TREE , (*arglist)[1] }; ++ ++ vec_alloc (v, 2); ++ v->quick_push (elt1); ++ v->quick_push (elt2); ++ return build_constructor (return_type, v); ++ } ++ default: ++ gcc_unreachable (); ++ } ++} ++ ++/* invert result */ ++#define __VSTRING_FLAG_IN 8 ++/* result type */ ++#define __VSTRING_FLAG_RT 4 ++/* zero search */ ++#define __VSTRING_FLAG_ZS 2 ++/* set condition code */ ++#define __VSTRING_FLAG_CS 1 ++ ++/* Return the flags value to be used for string low-level builtins ++ when expanded from overloaded builtin OB_FCODE. */ ++static unsigned int ++s390_get_vstring_flags (int ob_fcode) ++{ ++ unsigned int flags = 0; ++ ++ switch (ob_fcode) ++ { ++ case S390_OVERLOADED_BUILTIN_s390_vec_find_any_ne_idx: ++ case S390_OVERLOADED_BUILTIN_s390_vec_find_any_ne_or_0_idx: ++ case S390_OVERLOADED_BUILTIN_s390_vec_find_any_ne: ++ case S390_OVERLOADED_BUILTIN_s390_vec_find_any_ne_idx_cc: ++ case S390_OVERLOADED_BUILTIN_s390_vec_find_any_ne_or_0_idx_cc: ++ case S390_OVERLOADED_BUILTIN_s390_vec_find_any_ne_cc: ++ case S390_OVERLOADED_BUILTIN_s390_vec_cmpnrg_idx: ++ case S390_OVERLOADED_BUILTIN_s390_vec_cmpnrg_or_0_idx: ++ case S390_OVERLOADED_BUILTIN_s390_vec_cmpnrg: ++ case S390_OVERLOADED_BUILTIN_s390_vec_cmpnrg_idx_cc: ++ case S390_OVERLOADED_BUILTIN_s390_vec_cmpnrg_or_0_idx_cc: ++ case S390_OVERLOADED_BUILTIN_s390_vec_cmpnrg_cc: ++ flags |= __VSTRING_FLAG_IN; ++ break; ++ default: ++ break; ++ } ++ switch (ob_fcode) ++ { ++ ++ case S390_OVERLOADED_BUILTIN_s390_vec_find_any_eq: ++ case S390_OVERLOADED_BUILTIN_s390_vec_find_any_ne: ++ case S390_OVERLOADED_BUILTIN_s390_vec_find_any_eq_cc: ++ case S390_OVERLOADED_BUILTIN_s390_vec_find_any_ne_cc: ++ case S390_OVERLOADED_BUILTIN_s390_vec_cmprg: ++ case S390_OVERLOADED_BUILTIN_s390_vec_cmpnrg: ++ case S390_OVERLOADED_BUILTIN_s390_vec_cmprg_cc: ++ case S390_OVERLOADED_BUILTIN_s390_vec_cmpnrg_cc: ++ flags |= __VSTRING_FLAG_RT; ++ break; ++ default: ++ break; ++ } ++ switch (ob_fcode) ++ { ++ ++ case S390_OVERLOADED_BUILTIN_s390_vec_find_any_eq_or_0_idx: ++ case S390_OVERLOADED_BUILTIN_s390_vec_find_any_ne_or_0_idx: ++ case S390_OVERLOADED_BUILTIN_s390_vec_find_any_eq_or_0_idx_cc: ++ case S390_OVERLOADED_BUILTIN_s390_vec_find_any_ne_or_0_idx_cc: ++ case S390_OVERLOADED_BUILTIN_s390_vec_cmprg_or_0_idx: ++ case S390_OVERLOADED_BUILTIN_s390_vec_cmpnrg_or_0_idx: ++ case S390_OVERLOADED_BUILTIN_s390_vec_cmprg_or_0_idx_cc: ++ case S390_OVERLOADED_BUILTIN_s390_vec_cmpnrg_or_0_idx_cc: ++ flags |= __VSTRING_FLAG_ZS; ++ break; ++ default: ++ break; ++ } ++ switch (ob_fcode) ++ { ++ case S390_OVERLOADED_BUILTIN_s390_vec_find_any_eq_idx_cc: ++ case S390_OVERLOADED_BUILTIN_s390_vec_find_any_ne_idx_cc: ++ case S390_OVERLOADED_BUILTIN_s390_vec_find_any_eq_or_0_idx_cc: ++ case S390_OVERLOADED_BUILTIN_s390_vec_find_any_ne_or_0_idx_cc: ++ case S390_OVERLOADED_BUILTIN_s390_vec_find_any_eq_cc: ++ case S390_OVERLOADED_BUILTIN_s390_vec_find_any_ne_cc: ++ case S390_OVERLOADED_BUILTIN_s390_vec_cmprg_idx_cc: ++ case S390_OVERLOADED_BUILTIN_s390_vec_cmpnrg_idx_cc: ++ case S390_OVERLOADED_BUILTIN_s390_vec_cmprg_or_0_idx_cc: ++ case S390_OVERLOADED_BUILTIN_s390_vec_cmpnrg_or_0_idx_cc: ++ case S390_OVERLOADED_BUILTIN_s390_vec_cmprg_cc: ++ case S390_OVERLOADED_BUILTIN_s390_vec_cmpnrg_cc: ++ flags |= __VSTRING_FLAG_CS; ++ break; ++ default: ++ break; ++ } ++ return flags; ++} ++#undef __VSTRING_FLAG_IN ++#undef __VSTRING_FLAG_RT ++#undef __VSTRING_FLAG_ZS ++#undef __VSTRING_FLAG_CS ++ ++/* For several overloaded builtins the argument lists do not match ++ exactly the signature of a low-level builtin. This function ++ adjusts the argument list ARGLIST for the overloaded builtin ++ OB_FCODE to the signature of the low-level builtin given by ++ DECL. */ ++static void ++s390_adjust_builtin_arglist (unsigned int ob_fcode, tree decl, ++ vec **arglist) ++{ ++ tree arg_chain; ++ int src_arg_index, dest_arg_index; ++ vec *folded_args = NULL; ++ ++ /* We at most add one more operand to the list. */ ++ vec_alloc (folded_args, (*arglist)->allocated () + 1); ++ for (arg_chain = TYPE_ARG_TYPES (TREE_TYPE (decl)), ++ src_arg_index = 0, dest_arg_index = 0; ++ !VOID_TYPE_P (TREE_VALUE (arg_chain)); ++ arg_chain = TREE_CHAIN (arg_chain), dest_arg_index++) ++ { ++ bool arg_assigned_p = false; ++ switch (ob_fcode) ++ { ++ /* For all these the low level builtin needs an additional flags parameter. */ ++ case S390_OVERLOADED_BUILTIN_s390_vec_find_any_eq_idx: ++ case S390_OVERLOADED_BUILTIN_s390_vec_find_any_ne_idx: ++ case S390_OVERLOADED_BUILTIN_s390_vec_find_any_eq_or_0_idx: ++ case S390_OVERLOADED_BUILTIN_s390_vec_find_any_ne_or_0_idx: ++ case S390_OVERLOADED_BUILTIN_s390_vec_find_any_eq: ++ case S390_OVERLOADED_BUILTIN_s390_vec_find_any_ne: ++ case S390_OVERLOADED_BUILTIN_s390_vec_find_any_eq_idx_cc: ++ case S390_OVERLOADED_BUILTIN_s390_vec_find_any_ne_idx_cc: ++ case S390_OVERLOADED_BUILTIN_s390_vec_find_any_eq_or_0_idx_cc: ++ case S390_OVERLOADED_BUILTIN_s390_vec_find_any_ne_or_0_idx_cc: ++ case S390_OVERLOADED_BUILTIN_s390_vec_find_any_eq_cc: ++ case S390_OVERLOADED_BUILTIN_s390_vec_find_any_ne_cc: ++ if (dest_arg_index == 2) ++ { ++ folded_args->quick_push (build_int_cst (integer_type_node, ++ s390_get_vstring_flags (ob_fcode))); ++ arg_assigned_p = true; ++ } ++ break; ++ case S390_OVERLOADED_BUILTIN_s390_vec_cmprg_idx: ++ case S390_OVERLOADED_BUILTIN_s390_vec_cmpnrg_idx: ++ case S390_OVERLOADED_BUILTIN_s390_vec_cmprg_or_0_idx: ++ case S390_OVERLOADED_BUILTIN_s390_vec_cmpnrg_or_0_idx: ++ case S390_OVERLOADED_BUILTIN_s390_vec_cmprg: ++ case S390_OVERLOADED_BUILTIN_s390_vec_cmpnrg: ++ case S390_OVERLOADED_BUILTIN_s390_vec_cmprg_idx_cc: ++ case S390_OVERLOADED_BUILTIN_s390_vec_cmpnrg_idx_cc: ++ case S390_OVERLOADED_BUILTIN_s390_vec_cmprg_or_0_idx_cc: ++ case S390_OVERLOADED_BUILTIN_s390_vec_cmpnrg_or_0_idx_cc: ++ case S390_OVERLOADED_BUILTIN_s390_vec_cmprg_cc: ++ case S390_OVERLOADED_BUILTIN_s390_vec_cmpnrg_cc: ++ if (dest_arg_index == 3) ++ { ++ folded_args->quick_push (build_int_cst (integer_type_node, ++ s390_get_vstring_flags (ob_fcode))); ++ arg_assigned_p = true; ++ } ++ break; ++ case S390_OVERLOADED_BUILTIN_s390_vec_sel: ++ case S390_OVERLOADED_BUILTIN_s390_vec_insert: ++ case S390_OVERLOADED_BUILTIN_s390_vec_load_len: ++ /* Swap the first to arguments. It is better to do it here ++ instead of the header file to avoid operand checking ++ throwing error messages for a weird operand index. */ ++ if (dest_arg_index < 2) ++ { ++ folded_args->quick_push (fully_fold_convert (TREE_VALUE (arg_chain), ++ (**arglist)[1 - dest_arg_index])); ++ src_arg_index++; ++ arg_assigned_p = true; ++ } ++ break; ++ case S390_OVERLOADED_BUILTIN_s390_vec_store_len: ++ if (dest_arg_index == 1 || dest_arg_index == 2) ++ { ++ folded_args->quick_push (fully_fold_convert (TREE_VALUE (arg_chain), ++ (**arglist)[3 - dest_arg_index])); ++ src_arg_index++; ++ arg_assigned_p = true; ++ } ++ break; ++ ++ case S390_OVERLOADED_BUILTIN_s390_vec_load_bndry: ++ { ++ int code; ++ ++ if (dest_arg_index == 1) ++ { ++ switch (tree_low_cst ((**arglist)[src_arg_index], 1)) ++ { ++ case 64: code = 0; break; ++ case 128: code = 1; break; ++ case 256: code = 2; break; ++ case 512: code = 3; break; ++ case 1024: code = 4; break; ++ case 2048: code = 5; break; ++ case 4096: code = 6; break; ++ default: ++ error ("valid values for builtin %qF argument %d are 64, " ++ "128, 256, 512, 1024, 2048, and 4096", decl, ++ src_arg_index + 1); ++ return; ++ } ++ folded_args->quick_push (build_int_cst (integer_type_node, ++ code)); ++ src_arg_index++; ++ arg_assigned_p = true; ++ } ++ } ++ break; ++ case S390_OVERLOADED_BUILTIN_s390_vec_rl_mask: ++ /* Duplicate the first src arg. */ ++ if (dest_arg_index == 0) ++ { ++ folded_args->quick_push (fully_fold_convert (TREE_VALUE (arg_chain), ++ (**arglist)[src_arg_index])); ++ arg_assigned_p = true; ++ } ++ break; ++ default: ++ break; ++ } ++ if (!arg_assigned_p) ++ { ++ folded_args->quick_push (fully_fold_convert (TREE_VALUE (arg_chain), ++ (**arglist)[src_arg_index])); ++ src_arg_index++; ++ } ++ } ++ *arglist = folded_args; ++} ++ ++/* Check whether the arguments in ARGLIST match the function type ++ DEF_TYPE. Return the number of argument types which required ++ conversion/promotion in order to make it match. ++ 0 stands for a perfect match - all operand types match without changes ++ INT_MAX stands for a mismatch. */ ++static int ++s390_fn_types_compatible (enum s390_builtin_ov_type_index typeindex, ++ vec *arglist) ++{ ++ unsigned int i; ++ int match_type = 0; ++ ++ for (i = 0; i < vec_safe_length (arglist); i++) ++ { ++ tree b_arg_type = s390_builtin_types[s390_builtin_ov_types[typeindex][i + 1]]; ++ tree in_arg = (*arglist)[i]; ++ tree in_type = TREE_TYPE (in_arg); ++ ++ if (TREE_CODE (b_arg_type) == VECTOR_TYPE) ++ { ++ /* Vector types have to match precisely. */ ++ if (b_arg_type != in_type ++ && TYPE_MAIN_VARIANT (b_arg_type) != TYPE_MAIN_VARIANT (in_type)) ++ goto mismatch; ++ } ++ ++ if (lang_hooks.types_compatible_p (in_type, b_arg_type)) ++ continue; ++ ++ if (lang_hooks.types_compatible_p ( ++ lang_hooks.types.type_promotes_to (in_type), ++ lang_hooks.types.type_promotes_to (b_arg_type))) ++ { ++ match_type++; ++ continue; ++ } ++ ++ /* In this stage the C++ frontend would go ahead trying to find ++ implicit conversion chains for the argument to match the ++ target type. We will mimic this here only for our limited ++ subset of argument types. */ ++ if (TREE_CODE (b_arg_type) == INTEGER_TYPE ++ && TREE_CODE (in_type) == INTEGER_TYPE) ++ { ++ match_type++; ++ continue; ++ } ++ ++ /* If the incoming pointer argument has more qualifiers than the ++ argument type it can still be an imperfect match. */ ++ if (POINTER_TYPE_P (b_arg_type) && POINTER_TYPE_P (in_type) ++ && !(TYPE_QUALS (TREE_TYPE (in_type)) ++ & ~TYPE_QUALS (TREE_TYPE (b_arg_type))) ++ && (TYPE_QUALS (TREE_TYPE (b_arg_type)) ++ & ~TYPE_QUALS (TREE_TYPE (in_type)))) ++ { ++ tree qual_in_type = ++ build_qualified_type (TREE_TYPE (in_type), ++ TYPE_QUALS (TREE_TYPE (b_arg_type))); ++ ++ if (lang_hooks.types_compatible_p (qual_in_type, ++ TREE_TYPE (b_arg_type))) ++ { ++ match_type++; ++ continue; ++ } ++ } ++ ++ mismatch: ++ if (TARGET_DEBUG_ARG) ++ fprintf (stderr, " mismatch in operand: %d\n", i + 1); ++ return INT_MAX; ++ } ++ ++ return match_type; ++} ++ ++/* Return the number of elements in the vector arguments of FNDECL in ++ case all it matches for all vector arguments, -1 otherwise. */ ++static int ++s390_vec_n_elem (tree fndecl) ++{ ++ tree b_arg_chain; ++ int n_elem = -1; ++ ++ if (TREE_CODE (TREE_TYPE (TREE_TYPE (fndecl))) == VECTOR_TYPE) ++ n_elem = TYPE_VECTOR_SUBPARTS (TREE_TYPE (TREE_TYPE ((fndecl)))); ++ ++ for (b_arg_chain = TYPE_ARG_TYPES (TREE_TYPE (fndecl)); ++ !VOID_TYPE_P (TREE_VALUE (b_arg_chain)); ++ b_arg_chain = TREE_CHAIN (b_arg_chain)) ++ { ++ int tmp_n_elem; ++ if (TREE_CODE (TREE_VALUE (b_arg_chain)) != VECTOR_TYPE) ++ continue; ++ tmp_n_elem = TYPE_VECTOR_SUBPARTS (TREE_VALUE (b_arg_chain)); ++ if (n_elem != -1 && n_elem != tmp_n_elem) ++ return -1; ++ n_elem = tmp_n_elem; ++ } ++ return n_elem; ++} ++ ++ ++/* Return a tree expression for a call to the overloaded builtin ++ function OB_FNDECL at LOC with arguments PASSED_ARGLIST. */ ++tree ++s390_resolve_overloaded_builtin (location_t loc, ++ tree ob_fndecl, ++ void *passed_arglist) ++{ ++ vec *arglist = static_cast *> (passed_arglist); ++ unsigned int in_args_num = vec_safe_length (arglist); ++ unsigned int ob_args_num = 0; ++ unsigned int ob_fcode = DECL_FUNCTION_CODE (ob_fndecl); ++ enum s390_overloaded_builtin_vars bindex; ++ unsigned int i; ++ int last_match_type = INT_MAX; ++ int last_match_index = -1; ++ unsigned int all_op_flags; ++ int num_matches = 0; ++ tree target_builtin_decl, b_arg_chain, return_type; ++ enum s390_builtin_ov_type_index last_match_fntype_index; ++ ++ if (TARGET_DEBUG_ARG) ++ fprintf (stderr, ++ "s390_resolve_overloaded_builtin, code = %4d, %s - %s overloaded\n", ++ (int)ob_fcode, IDENTIFIER_POINTER (DECL_NAME (ob_fndecl)), ++ ob_fcode < S390_BUILTIN_MAX ? "not" : ""); ++ ++ /* 0...S390_BUILTIN_MAX-1 is for non-overloaded builtins. */ ++ if (ob_fcode < S390_BUILTIN_MAX) ++ { ++ if (bflags_for_builtin(ob_fcode) & B_INT) ++ { ++ error_at (loc, ++ "Builtin %qF is for GCC internal use only.", ++ ob_fndecl); ++ return error_mark_node; ++ } ++ return NULL_TREE; ++ } ++ ++ ob_fcode -= S390_BUILTIN_MAX; ++ ++ for (b_arg_chain = TYPE_ARG_TYPES (TREE_TYPE (ob_fndecl)); ++ !VOID_TYPE_P (TREE_VALUE (b_arg_chain)); ++ b_arg_chain = TREE_CHAIN (b_arg_chain)) ++ ob_args_num++; ++ ++ if (ob_args_num != in_args_num) ++ { ++ error_at (loc, ++ "Mismatch in number of arguments for builtin %qF. " ++ "Expected: %d got %d", ob_fndecl, ++ ob_args_num, in_args_num); ++ return error_mark_node; ++ } ++ ++ for (i = 0; i < in_args_num; i++) ++ if ((*arglist)[i] == error_mark_node) ++ return error_mark_node; ++ ++ /* Overloaded builtins without any variants are directly expanded here. */ ++ if (desc_start_for_overloaded_builtin[ob_fcode] == ++ S390_OVERLOADED_BUILTIN_VAR_MAX) ++ return s390_expand_overloaded_builtin (loc, ob_fcode, arglist, NULL_TREE); ++ ++ for (bindex = desc_start_for_overloaded_builtin[ob_fcode]; ++ bindex <= desc_end_for_overloaded_builtin[ob_fcode]; ++ bindex = (enum s390_overloaded_builtin_vars)((int)bindex + 1)) ++ { ++ int match_type; ++ enum s390_builtin_ov_type_index type_index = ++ type_for_overloaded_builtin_var[bindex]; ++ ++ if (TARGET_DEBUG_ARG) ++ fprintf (stderr, "checking variant number: %d", (int)bindex); ++ ++ match_type = s390_fn_types_compatible (type_index, arglist); ++ ++ if (match_type == INT_MAX) ++ continue; ++ ++ if (TARGET_DEBUG_ARG) ++ fprintf (stderr, ++ " %s match score: %d\n", match_type == 0 ? "perfect" : "imperfect", ++ match_type); ++ ++ if (match_type < last_match_type) ++ { ++ num_matches = 1; ++ last_match_type = match_type; ++ last_match_fntype_index = type_index; ++ last_match_index = bindex; ++ } ++ else if (match_type == last_match_type) ++ num_matches++; ++ } ++ ++ if (last_match_type == INT_MAX) ++ { ++ error_at (loc, "invalid parameter combination for intrinsic %qs", ++ IDENTIFIER_POINTER (DECL_NAME (ob_fndecl))); ++ return error_mark_node; ++ } ++ else if (num_matches > 1) ++ { ++ error_at (loc, "ambiguous overload for intrinsic %qs", ++ IDENTIFIER_POINTER (DECL_NAME (ob_fndecl))); ++ return error_mark_node; ++ } ++ ++ if (bt_for_overloaded_builtin_var[last_match_index] == S390_BUILTIN_MAX) ++ target_builtin_decl = ob_fndecl; ++ else ++ target_builtin_decl = s390_builtin_decls[bt_for_overloaded_builtin_var[last_match_index]]; ++ ++ all_op_flags = opflags_overloaded_builtin_var[last_match_index]; ++ return_type = s390_builtin_types[s390_builtin_ov_types[last_match_fntype_index][0]]; ++ ++ /* Check for the operand flags in the overloaded builtin variant. */ ++ for (i = 0; i < ob_args_num; i++) ++ { ++ unsigned int op_flags = all_op_flags & ((1 << O_SHIFT) - 1); ++ tree arg = (*arglist)[i]; ++ tree type = s390_builtin_types[s390_builtin_ov_types[last_match_fntype_index][i + 1]]; ++ ++ all_op_flags = all_op_flags >> O_SHIFT; ++ ++ if (op_flags == O_ELEM) ++ { ++ int n_elem = s390_vec_n_elem (target_builtin_decl); ++ gcc_assert (n_elem > 0); ++ gcc_assert (type == integer_type_node); ++ (*arglist)[i] = build2 (BIT_AND_EXPR, integer_type_node, ++ fold_convert (integer_type_node, arg), ++ build_int_cst (NULL_TREE, n_elem - 1)); ++ } ++ ++ if (TREE_CODE (arg) != INTEGER_CST || !O_IMM_P (op_flags)) ++ continue; ++ ++ if ((TYPE_UNSIGNED (type) ++ && !int_fits_type_p (arg, c_common_unsigned_type (type))) ++ || (!TYPE_UNSIGNED (type) ++ && !int_fits_type_p (arg, c_common_signed_type (type)))) ++ { ++ error("constant argument %d for builtin %qF is out " ++ "of range for target type", ++ i + 1, target_builtin_decl); ++ return error_mark_node; ++ } ++ ++ if (TREE_CODE (arg) == INTEGER_CST ++ && !s390_const_operand_ok (arg, i + 1, op_flags, target_builtin_decl)) ++ return error_mark_node; ++ } ++ ++ /* Handle builtins we expand directly - without mapping it to a low ++ level builtin. */ ++ if (bt_for_overloaded_builtin_var[last_match_index] == S390_BUILTIN_MAX) ++ return s390_expand_overloaded_builtin (loc, ob_fcode, arglist, return_type); ++ ++ s390_adjust_builtin_arglist (ob_fcode, target_builtin_decl, &arglist); ++ ++ if (VOID_TYPE_P (return_type)) ++ return build_function_call_vec (loc, target_builtin_decl, ++ arglist, NULL); ++ else ++ return fully_fold_convert (return_type, ++ build_function_call_vec (loc, target_builtin_decl, ++ arglist, NULL)); ++} ++ ++/* This is used to define the REGISTER_TARGET_PRAGMAS macro in s390.h. */ ++void ++s390_register_target_pragmas (void) ++{ ++ targetm.resolve_overloaded_builtin = s390_resolve_overloaded_builtin; ++} +--- gcc/config/s390/s390.h 2016-05-11 14:46:08.219982746 +0200 ++++ gcc/config/s390/s390.h 2016-05-11 17:12:39.000000000 +0200 +@@ -35,7 +35,9 @@ enum processor_flags + PF_Z10 = 32, + PF_Z196 = 64, + PF_ZEC12 = 128, +- PF_TX = 256 ++ PF_TX = 256, ++ PF_Z13 = 512, ++ PF_VX = 1024 + }; + + /* This is necessary to avoid a warning about comparing different enum +@@ -64,6 +66,10 @@ enum processor_flags + (s390_arch_flags & PF_ZEC12) + #define TARGET_CPU_HTM \ + (s390_arch_flags & PF_TX) ++#define TARGET_CPU_Z13 \ ++ (s390_arch_flags & PF_Z13) ++#define TARGET_CPU_VX \ ++ (s390_arch_flags & PF_VX) + + /* These flags indicate that the generated code should run on a cpu + providing the respective hardware facility when run in +@@ -82,7 +88,15 @@ enum processor_flags + #define TARGET_ZEC12 \ + (TARGET_ZARCH && TARGET_CPU_ZEC12) + #define TARGET_HTM (TARGET_OPT_HTM) +- ++#define TARGET_Z13 \ ++ (TARGET_ZARCH && TARGET_CPU_Z13) ++#define TARGET_VX \ ++ (TARGET_ZARCH && TARGET_CPU_VX && TARGET_OPT_VX && TARGET_HARD_FLOAT) ++ ++/* Use the ABI introduced with IBM z13: ++ - pass vector arguments <= 16 bytes in VRs ++ - align *all* vector types to 8 bytes */ ++#define TARGET_VX_ABI TARGET_VX + + #define TARGET_AVOID_CMP_AND_BRANCH (s390_tune == PROCESSOR_2817_Z196) + +@@ -97,25 +111,11 @@ enum processor_flags + #define TARGET_TPF 0 + + /* Target CPU builtins. */ +-#define TARGET_CPU_CPP_BUILTINS() \ +- do \ +- { \ +- builtin_assert ("cpu=s390"); \ +- builtin_assert ("machine=s390"); \ +- builtin_define ("__s390__"); \ +- if (TARGET_ZARCH) \ +- builtin_define ("__zarch__"); \ +- if (TARGET_64BIT) \ +- builtin_define ("__s390x__"); \ +- if (TARGET_LONG_DOUBLE_128) \ +- builtin_define ("__LONG_DOUBLE_128__"); \ +- if (TARGET_HTM) \ +- builtin_define ("__HTM__"); \ +- } \ +- while (0) ++#define TARGET_CPU_CPP_BUILTINS() s390_cpu_cpp_builtins (pfile) + + #ifdef DEFAULT_TARGET_64BIT +-#define TARGET_DEFAULT (MASK_64BIT | MASK_ZARCH | MASK_HARD_DFP | MASK_OPT_HTM) ++#define TARGET_DEFAULT (MASK_64BIT | MASK_ZARCH | MASK_HARD_DFP \ ++ | MASK_OPT_HTM | MASK_OPT_VX) + #else + #define TARGET_DEFAULT 0 + #endif +@@ -184,6 +184,13 @@ enum processor_flags + + #define STACK_SIZE_MODE (Pmode) + ++/* Vector arguments are left-justified when placed on the stack during ++ parameter passing. */ ++#define FUNCTION_ARG_PADDING(MODE, TYPE) \ ++ (s390_function_arg_vector ((MODE), (TYPE)) \ ++ ? upward \ ++ : DEFAULT_FUNCTION_ARG_PADDING ((MODE), (TYPE))) ++ + #ifndef IN_LIBGCC2 + + /* Width of a word, in units (bytes). */ +@@ -289,9 +296,11 @@ enum processor_flags + Reg 35: Return address pointer + + Registers 36 and 37 are mapped to access registers +- 0 and 1, used to implement thread-local storage. */ ++ 0 and 1, used to implement thread-local storage. ++ ++ Reg 38-53: Vector registers v16-v31 */ + +-#define FIRST_PSEUDO_REGISTER 38 ++#define FIRST_PSEUDO_REGISTER 54 + + /* Standard register usage. */ + #define GENERAL_REGNO_P(N) ((int)(N) >= 0 && (N) < 16) +@@ -300,6 +309,8 @@ enum processor_flags + #define CC_REGNO_P(N) ((N) == 33) + #define FRAME_REGNO_P(N) ((N) == 32 || (N) == 34 || (N) == 35) + #define ACCESS_REGNO_P(N) ((N) == 36 || (N) == 37) ++#define VECTOR_NOFP_REGNO_P(N) ((N) >= 38 && (N) <= 53) ++#define VECTOR_REGNO_P(N) (FP_REGNO_P (N) || VECTOR_NOFP_REGNO_P (N)) + + #define GENERAL_REG_P(X) (REG_P (X) && GENERAL_REGNO_P (REGNO (X))) + #define ADDR_REG_P(X) (REG_P (X) && ADDR_REGNO_P (REGNO (X))) +@@ -307,6 +318,8 @@ enum processor_flags + #define CC_REG_P(X) (REG_P (X) && CC_REGNO_P (REGNO (X))) + #define FRAME_REG_P(X) (REG_P (X) && FRAME_REGNO_P (REGNO (X))) + #define ACCESS_REG_P(X) (REG_P (X) && ACCESS_REGNO_P (REGNO (X))) ++#define VECTOR_NOFP_REG_P(X) (REG_P (X) && VECTOR_NOFP_REGNO_P (REGNO (X))) ++#define VECTOR_REG_P(X) (REG_P (X) && VECTOR_REGNO_P (REGNO (X))) + + /* Set up fixed registers and calling convention: + +@@ -321,7 +334,9 @@ enum processor_flags + + On 31-bit, FPRs 18-19 are call-clobbered; + on 64-bit, FPRs 24-31 are call-clobbered. +- The remaining FPRs are call-saved. */ ++ The remaining FPRs are call-saved. ++ ++ All non-FP vector registers are call-clobbered v16-v31. */ + + #define FIXED_REGISTERS \ + { 0, 0, 0, 0, \ +@@ -333,7 +348,11 @@ enum processor_flags + 0, 0, 0, 0, \ + 0, 0, 0, 0, \ + 1, 1, 1, 1, \ +- 1, 1 } ++ 1, 1, \ ++ 0, 0, 0, 0, \ ++ 0, 0, 0, 0, \ ++ 0, 0, 0, 0, \ ++ 0, 0, 0, 0 } + + #define CALL_USED_REGISTERS \ + { 1, 1, 1, 1, \ +@@ -345,26 +364,35 @@ enum processor_flags + 1, 1, 1, 1, \ + 1, 1, 1, 1, \ + 1, 1, 1, 1, \ +- 1, 1 } ++ 1, 1, \ ++ 1, 1, 1, 1, \ ++ 1, 1, 1, 1, \ ++ 1, 1, 1, 1, \ ++ 1, 1, 1, 1 } + + #define CALL_REALLY_USED_REGISTERS \ +-{ 1, 1, 1, 1, \ ++{ 1, 1, 1, 1, /* r0 - r15 */ \ + 1, 1, 0, 0, \ + 0, 0, 0, 0, \ + 0, 0, 0, 0, \ ++ 1, 1, 1, 1, /* f0 (16) - f15 (31) */ \ + 1, 1, 1, 1, \ + 1, 1, 1, 1, \ + 1, 1, 1, 1, \ +- 1, 1, 1, 1, \ ++ 1, 1, 1, 1, /* arg, cc, fp, ret addr */ \ ++ 0, 0, /* a0 (36), a1 (37) */ \ ++ 1, 1, 1, 1, /* v16 (38) - v23 (45) */ \ + 1, 1, 1, 1, \ +- 0, 0 } ++ 1, 1, 1, 1, /* v24 (46) - v31 (53) */ \ ++ 1, 1, 1, 1 } + + /* Preferred register allocation order. */ +-#define REG_ALLOC_ORDER \ +-{ 1, 2, 3, 4, 5, 0, 12, 11, 10, 9, 8, 7, 6, 14, 13, \ +- 16, 17, 18, 19, 20, 21, 22, 23, \ +- 24, 25, 26, 27, 28, 29, 30, 31, \ +- 15, 32, 33, 34, 35, 36, 37 } ++#define REG_ALLOC_ORDER \ ++ { 1, 2, 3, 4, 5, 0, 12, 11, 10, 9, 8, 7, 6, 14, 13, \ ++ 16, 17, 18, 19, 20, 21, 22, 23, \ ++ 24, 25, 26, 27, 28, 29, 30, 31, \ ++ 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, \ ++ 15, 32, 33, 34, 35, 36, 37 } + + + /* Fitting values into registers. */ +@@ -404,26 +432,22 @@ enum processor_flags + but conforms to the 31-bit ABI, GPRs can hold 8 bytes; + the ABI guarantees only that the lower 4 bytes are + saved across calls, however. */ +-#define HARD_REGNO_CALL_PART_CLOBBERED(REGNO, MODE) \ +- (!TARGET_64BIT && TARGET_ZARCH \ +- && GET_MODE_SIZE (MODE) > 4 \ +- && (((REGNO) >= 6 && (REGNO) <= 15) || (REGNO) == 32)) ++#define HARD_REGNO_CALL_PART_CLOBBERED(REGNO, MODE) \ ++ ((!TARGET_64BIT && TARGET_ZARCH \ ++ && GET_MODE_SIZE (MODE) > 4 \ ++ && (((REGNO) >= 6 && (REGNO) <= 15) || (REGNO) == 32)) \ ++ || (TARGET_VX \ ++ && GET_MODE_SIZE (MODE) > 8 \ ++ && (((TARGET_64BIT && (REGNO) >= 24 && (REGNO) <= 31)) \ ++ || (!TARGET_64BIT && ((REGNO) == 18 || (REGNO) == 19))))) + + /* Maximum number of registers to represent a value of mode MODE + in a register of class CLASS. */ + #define CLASS_MAX_NREGS(CLASS, MODE) \ + s390_class_max_nregs ((CLASS), (MODE)) + +-/* If a 4-byte value is loaded into a FPR, it is placed into the +- *upper* half of the register, not the lower. Therefore, we +- cannot use SUBREGs to switch between modes in FP registers. +- Likewise for access registers, since they have only half the +- word size on 64-bit. */ + #define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS) \ +- (GET_MODE_SIZE (FROM) != GET_MODE_SIZE (TO) \ +- ? ((reg_classes_intersect_p (FP_REGS, CLASS) \ +- && (GET_MODE_SIZE (FROM) < 8 || GET_MODE_SIZE (TO) < 8)) \ +- || reg_classes_intersect_p (ACCESS_REGS, CLASS)) : 0) ++ s390_cannot_change_mode_class ((FROM), (TO), (CLASS)) + + /* Register classes. */ + +@@ -451,6 +475,7 @@ enum reg_class + NO_REGS, CC_REGS, ADDR_REGS, GENERAL_REGS, ACCESS_REGS, + ADDR_CC_REGS, GENERAL_CC_REGS, + FP_REGS, ADDR_FP_REGS, GENERAL_FP_REGS, ++ VEC_REGS, ADDR_VEC_REGS, GENERAL_VEC_REGS, + ALL_REGS, LIM_REG_CLASSES + }; + #define N_REG_CLASSES (int) LIM_REG_CLASSES +@@ -458,11 +483,13 @@ enum reg_class + #define REG_CLASS_NAMES \ + { "NO_REGS", "CC_REGS", "ADDR_REGS", "GENERAL_REGS", "ACCESS_REGS", \ + "ADDR_CC_REGS", "GENERAL_CC_REGS", \ +- "FP_REGS", "ADDR_FP_REGS", "GENERAL_FP_REGS", "ALL_REGS" } ++ "FP_REGS", "ADDR_FP_REGS", "GENERAL_FP_REGS", \ ++ "VEC_REGS", "ADDR_VEC_REGS", "GENERAL_VEC_REGS", \ ++ "ALL_REGS" } + + /* Class -> register mapping. */ +-#define REG_CLASS_CONTENTS \ +-{ \ ++#define REG_CLASS_CONTENTS \ ++{ \ + { 0x00000000, 0x00000000 }, /* NO_REGS */ \ + { 0x00000000, 0x00000002 }, /* CC_REGS */ \ + { 0x0000fffe, 0x0000000d }, /* ADDR_REGS */ \ +@@ -473,7 +500,10 @@ enum reg_class + { 0xffff0000, 0x00000000 }, /* FP_REGS */ \ + { 0xfffffffe, 0x0000000d }, /* ADDR_FP_REGS */ \ + { 0xffffffff, 0x0000000d }, /* GENERAL_FP_REGS */ \ +- { 0xffffffff, 0x0000003f }, /* ALL_REGS */ \ ++ { 0xffff0000, 0x003fffc0 }, /* VEC_REGS */ \ ++ { 0xfffffffe, 0x003fffcd }, /* ADDR_VEC_REGS */ \ ++ { 0xffffffff, 0x003fffcd }, /* GENERAL_VEC_REGS */ \ ++ { 0xffffffff, 0x003fffff }, /* ALL_REGS */ \ + } + + /* In some case register allocation order is not enough for IRA to +@@ -504,14 +534,27 @@ extern const enum reg_class regclass_map + #define REGNO_OK_FOR_BASE_P(REGNO) REGNO_OK_FOR_INDEX_P (REGNO) + + +-/* We need secondary memory to move data between GPRs and FPRs. With +- DFP the ldgr lgdr instructions are available. But these +- instructions do not handle GPR pairs so it is not possible for 31 +- bit. */ +-#define SECONDARY_MEMORY_NEEDED(CLASS1, CLASS2, MODE) \ +- ((CLASS1) != (CLASS2) \ +- && ((CLASS1) == FP_REGS || (CLASS2) == FP_REGS) \ +- && (!TARGET_DFP || !TARGET_64BIT || GET_MODE_SIZE (MODE) != 8)) ++/* We need secondary memory to move data between GPRs and FPRs. ++ ++ - With DFP the ldgr lgdr instructions are available. Due to the ++ different alignment we cannot use them for SFmode. For 31 bit a ++ 64 bit value in GPR would be a register pair so here we still ++ need to go via memory. ++ ++ - With z13 we can do the SF/SImode moves with vlgvf. Due to the ++ overlapping of FPRs and VRs we still disallow TF/TD modes to be ++ in full VRs so as before also on z13 we do these moves via ++ memory. ++ ++ FIXME: Should we try splitting it into two vlgvg's/vlvg's instead? */ ++#define SECONDARY_MEMORY_NEEDED(CLASS1, CLASS2, MODE) \ ++ (((reg_classes_intersect_p (CLASS1, VEC_REGS) \ ++ && reg_classes_intersect_p (CLASS2, GENERAL_REGS)) \ ++ || (reg_classes_intersect_p (CLASS1, GENERAL_REGS) \ ++ && reg_classes_intersect_p (CLASS2, VEC_REGS))) \ ++ && (!TARGET_DFP || !TARGET_64BIT || GET_MODE_SIZE (MODE) != 8) \ ++ && (!TARGET_VX || (SCALAR_FLOAT_MODE_P (MODE) \ ++ && GET_MODE_SIZE (MODE) > 8))) + + /* Get_secondary_mem widens its argument to BITS_PER_WORD which loses on 64bit + because the movsi and movsf patterns don't handle r/f moves. */ +@@ -605,6 +648,11 @@ extern const enum reg_class regclass_map + /* Let the assembler generate debug line info. */ + #define DWARF2_ASM_LINE_DEBUG_INFO 1 + ++/* Define the dwarf register mapping. ++ v16-v31 -> 68-83 ++ rX -> X otherwise */ ++#define DBX_REGISTER_NUMBER(regno) \ ++ ((regno >= 38 && regno <= 53) ? regno + 30 : regno) + + /* Frame registers. */ + +@@ -652,21 +700,29 @@ typedef struct s390_arg_structure + { + int gprs; /* gpr so far */ + int fprs; /* fpr so far */ ++ int vrs; /* vr so far */ + } + CUMULATIVE_ARGS; + + #define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, NN, N_NAMED_ARGS) \ +- ((CUM).gprs=0, (CUM).fprs=0) ++ ((CUM).gprs=0, (CUM).fprs=0, (CUM).vrs=0) ++ ++#define FIRST_VEC_ARG_REGNO 46 ++#define LAST_VEC_ARG_REGNO 53 + + /* Arguments can be placed in general registers 2 to 6, or in floating + point registers 0 and 2 for 31 bit and fprs 0, 2, 4 and 6 for 64 + bit. */ +-#define FUNCTION_ARG_REGNO_P(N) (((N) >=2 && (N) <7) || \ +- (N) == 16 || (N) == 17 || (TARGET_64BIT && ((N) == 18 || (N) == 19))) ++#define FUNCTION_ARG_REGNO_P(N) \ ++ (((N) >=2 && (N) < 7) || (N) == 16 || (N) == 17 \ ++ || (TARGET_64BIT && ((N) == 18 || (N) == 19)) \ ++ || (TARGET_VX && ((N) >= FIRST_VEC_ARG_REGNO && (N) <= LAST_VEC_ARG_REGNO))) + + +-/* Only gpr 2 and fpr 0 are ever used as return registers. */ +-#define FUNCTION_VALUE_REGNO_P(N) ((N) == 2 || (N) == 16) ++/* Only gpr 2, fpr 0, and v24 are ever used as return registers. */ ++#define FUNCTION_VALUE_REGNO_P(N) \ ++ ((N) == 2 || (N) == 16 \ ++ || (TARGET_VX && (N) == FIRST_VEC_ARG_REGNO)) + + + /* Function entry and exit. */ +@@ -844,12 +900,20 @@ do { \ + /* How to refer to registers in assembler output. This sequence is + indexed by compiler's hard-register-number (see above). */ + #define REGISTER_NAMES \ +-{ "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", \ +- "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", \ +- "%f0", "%f2", "%f4", "%f6", "%f1", "%f3", "%f5", "%f7", \ +- "%f8", "%f10", "%f12", "%f14", "%f9", "%f11", "%f13", "%f15", \ +- "%ap", "%cc", "%fp", "%rp", "%a0", "%a1" \ +-} ++ { "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", \ ++ "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", \ ++ "%f0", "%f2", "%f4", "%f6", "%f1", "%f3", "%f5", "%f7", \ ++ "%f8", "%f10", "%f12", "%f14", "%f9", "%f11", "%f13", "%f15", \ ++ "%ap", "%cc", "%fp", "%rp", "%a0", "%a1", \ ++ "%v16", "%v18", "%v20", "%v22", "%v17", "%v19", "%v21", "%v23", \ ++ "%v24", "%v26", "%v28", "%v30", "%v25", "%v27", "%v29", "%v31" \ ++ } ++ ++#define ADDITIONAL_REGISTER_NAMES \ ++ { { "v0", 16 }, { "v2", 17 }, { "v4", 18 }, { "v6", 19 }, \ ++ { "v1", 20 }, { "v3", 21 }, { "v5", 22 }, { "v7", 23 }, \ ++ { "v8", 24 }, { "v10", 25 }, { "v12", 26 }, { "v14", 27 }, \ ++ { "v9", 28 }, { "v11", 29 }, { "v13", 30 }, { "v15", 31 } }; + + /* Print operand X (an rtx) in assembler syntax to file FILE. */ + #define PRINT_OPERAND(FILE, X, CODE) print_operand (FILE, X, CODE) +@@ -915,13 +979,31 @@ do { \ + #define SYMBOL_REF_NOT_NATURALLY_ALIGNED_P(X) \ + ((SYMBOL_REF_FLAGS (X) & SYMBOL_FLAG_NOT_NATURALLY_ALIGNED)) + ++/* Check whether integer displacement is in range for a short displacement. */ ++#define SHORT_DISP_IN_RANGE(d) ((d) >= 0 && (d) <= 4095) ++ + /* Check whether integer displacement is in range. */ + #define DISP_IN_RANGE(d) \ + (TARGET_LONG_DISPLACEMENT? ((d) >= -524288 && (d) <= 524287) \ +- : ((d) >= 0 && (d) <= 4095)) ++ : SHORT_DISP_IN_RANGE(d)) + + /* Reads can reuse write prefetches, used by tree-ssa-prefetch-loops.c. */ + #define READ_CAN_USE_WRITE_PREFETCH 1 + + extern const int processor_flags_table[]; +-#endif ++ ++/* The truth element value for vector comparisons. Our instructions ++ always generate -1 in that case. */ ++#define VECTOR_STORE_FLAG_VALUE(MODE) CONSTM1_RTX (GET_MODE_INNER (MODE)) ++ ++/* Target pragma. */ ++ ++/* resolve_overloaded_builtin can not be defined the normal way since ++ it is defined in code which technically belongs to the ++ front-end. */ ++#define REGISTER_TARGET_PRAGMAS() \ ++ do { \ ++ s390_register_target_pragmas (); \ ++ } while (0) ++ ++#endif /* S390_H */ +--- gcc/config/s390/s390intrin.h 2013-08-14 13:55:12.000000000 +0200 ++++ gcc/config/s390/s390intrin.h 2016-05-11 17:12:39.000000000 +0200 +@@ -29,5 +29,8 @@ along with GCC; see the file COPYING3. + #include + #endif + ++#ifdef __VEC__ ++#include ++#endif + + #endif /* _S390INTRIN_H*/ +--- gcc/config/s390/s390.md 2015-06-18 16:33:04.000000000 +0200 ++++ gcc/config/s390/s390.md 2016-05-11 19:22:59.245881189 +0200 +@@ -125,7 +125,109 @@ + UNSPEC_FPINT_CEIL + UNSPEC_FPINT_NEARBYINT + UNSPEC_FPINT_RINT +- ]) ++ ++ UNSPEC_LCBB ++ ++ ; Vector ++ UNSPEC_VEC_SMULT_HI ++ UNSPEC_VEC_UMULT_HI ++ UNSPEC_VEC_SMULT_LO ++ UNSPEC_VEC_SMULT_EVEN ++ UNSPEC_VEC_UMULT_EVEN ++ UNSPEC_VEC_SMULT_ODD ++ UNSPEC_VEC_UMULT_ODD ++ ++ UNSPEC_VEC_VMAL ++ UNSPEC_VEC_VMAH ++ UNSPEC_VEC_VMALH ++ UNSPEC_VEC_VMAE ++ UNSPEC_VEC_VMALE ++ UNSPEC_VEC_VMAO ++ UNSPEC_VEC_VMALO ++ ++ UNSPEC_VEC_GATHER ++ UNSPEC_VEC_EXTRACT ++ UNSPEC_VEC_INSERT_AND_ZERO ++ UNSPEC_VEC_LOAD_BNDRY ++ UNSPEC_VEC_LOAD_LEN ++ UNSPEC_VEC_MERGEH ++ UNSPEC_VEC_MERGEL ++ UNSPEC_VEC_PACK ++ UNSPEC_VEC_PACK_SATURATE ++ UNSPEC_VEC_PACK_SATURATE_CC ++ UNSPEC_VEC_PACK_SATURATE_GENCC ++ UNSPEC_VEC_PACK_UNSIGNED_SATURATE ++ UNSPEC_VEC_PACK_UNSIGNED_SATURATE_CC ++ UNSPEC_VEC_PACK_UNSIGNED_SATURATE_GENCC ++ UNSPEC_VEC_PERM ++ UNSPEC_VEC_PERMI ++ UNSPEC_VEC_EXTEND ++ UNSPEC_VEC_STORE_LEN ++ UNSPEC_VEC_UNPACKH ++ UNSPEC_VEC_UNPACKH_L ++ UNSPEC_VEC_UNPACKL ++ UNSPEC_VEC_UNPACKL_L ++ UNSPEC_VEC_ADDC ++ UNSPEC_VEC_ADDC_U128 ++ UNSPEC_VEC_ADDE_U128 ++ UNSPEC_VEC_ADDEC_U128 ++ UNSPEC_VEC_AVG ++ UNSPEC_VEC_AVGU ++ UNSPEC_VEC_CHECKSUM ++ UNSPEC_VEC_GFMSUM ++ UNSPEC_VEC_GFMSUM_128 ++ UNSPEC_VEC_GFMSUM_ACCUM ++ UNSPEC_VEC_GFMSUM_ACCUM_128 ++ UNSPEC_VEC_SET ++ ++ UNSPEC_VEC_VSUMG ++ UNSPEC_VEC_VSUMQ ++ UNSPEC_VEC_VSUM ++ UNSPEC_VEC_RL_MASK ++ UNSPEC_VEC_SLL ++ UNSPEC_VEC_SLB ++ UNSPEC_VEC_SLDB ++ UNSPEC_VEC_SRAL ++ UNSPEC_VEC_SRAB ++ UNSPEC_VEC_SRL ++ UNSPEC_VEC_SRLB ++ ++ UNSPEC_VEC_SUB_U128 ++ UNSPEC_VEC_SUBC ++ UNSPEC_VEC_SUBC_U128 ++ UNSPEC_VEC_SUBE_U128 ++ UNSPEC_VEC_SUBEC_U128 ++ ++ UNSPEC_VEC_TEST_MASK ++ ++ UNSPEC_VEC_VFAE ++ UNSPEC_VEC_VFAECC ++ ++ UNSPEC_VEC_VFEE ++ UNSPEC_VEC_VFEECC ++ UNSPEC_VEC_VFENE ++ UNSPEC_VEC_VFENECC ++ ++ UNSPEC_VEC_VISTR ++ UNSPEC_VEC_VISTRCC ++ ++ UNSPEC_VEC_VSTRC ++ UNSPEC_VEC_VSTRCCC ++ ++ UNSPEC_VEC_VCDGB ++ UNSPEC_VEC_VCDLGB ++ ++ UNSPEC_VEC_VCGDB ++ UNSPEC_VEC_VCLGDB ++ ++ UNSPEC_VEC_VFIDB ++ ++ UNSPEC_VEC_VLDEB ++ UNSPEC_VEC_VLEDB ++ ++ UNSPEC_VEC_VFTCIDB ++ UNSPEC_VEC_VFTCIDBCC ++]) + + ;; + ;; UNSPEC_VOLATILE usage +@@ -167,6 +269,10 @@ + UNSPECV_ETND + UNSPECV_NTSTG + UNSPECV_PPA ++ ++ ; Set and get floating point control register ++ UNSPECV_SFPC ++ UNSPECV_EFPC + ]) + + ;; +@@ -198,6 +304,11 @@ + ; Floating point registers. + (FPR0_REGNUM 16) + (FPR2_REGNUM 18) ++ (VR0_REGNUM 16) ++ (VR16_REGNUM 38) ++ (VR23_REGNUM 45) ++ (VR24_REGNUM 46) ++ (VR31_REGNUM 53) + ]) + + ;; +@@ -228,7 +339,7 @@ + ;; Used to determine defaults for length and other attribute values. + + (define_attr "op_type" +- "NN,E,RR,RRE,RX,RS,RSI,RI,SI,S,SS,SSE,RXE,RSE,RIL,RIE,RXY,RSY,SIY,RRF,RRR,SIL,RRS,RIS" ++ "NN,E,RR,RRE,RX,RS,RSI,RI,SI,S,SS,SSE,RXE,RSE,RIL,RIE,RXY,RSY,SIY,RRF,RRR,SIL,RRS,RIS,VRI,VRR,VRS,VRV,VRX" + (const_string "NN")) + + ;; Instruction type attribute used for scheduling. +@@ -306,10 +417,11 @@ + ;; distinguish between g5 and g6, but there are differences between the two + ;; CPUs could in theory be modeled. + +-(define_attr "cpu" "g5,g6,z900,z990,z9_109,z9_ec,z10,z196,zEC12" ++(define_attr "cpu" "g5,g6,z900,z990,z9_109,z9_ec,z10,z196,zEC12,z13" + (const (symbol_ref "s390_tune_attr"))) + +-(define_attr "cpu_facility" "standard,ieee,zarch,longdisp,extimm,dfp,z10,z196,zEC12" ++(define_attr "cpu_facility" ++ "standard,ieee,zarch,longdisp,extimm,dfp,z10,z196,zEC12,vec" + (const_string "standard")) + + (define_attr "enabled" "" +@@ -346,6 +458,10 @@ + + (and (eq_attr "cpu_facility" "zEC12") + (match_test "TARGET_ZEC12")) ++ (const_int 1) ++ ++ (and (eq_attr "cpu_facility" "vec") ++ (match_test "TARGET_VX")) + (const_int 1)] + (const_int 0))) + +@@ -365,6 +481,9 @@ + ;; Pipeline description for zEC12 + (include "2827.md") + ++;; Pipeline description for z13 ++(include "2964.md") ++ + ;; Predicates + (include "predicates.md") + +@@ -376,12 +495,13 @@ + + ;; Iterators + ++(define_mode_iterator ALL [TI DI SI HI QI TF DF SF TD DD SD V1QI V2QI V4QI V8QI V16QI V1HI V2HI V4HI V8HI V1SI V2SI V4SI V1DI V2DI V1SF V2SF V4SF V1TI V1DF V2DF V1TF]) ++ + ;; These mode iterators allow floating point patterns to be generated from the + ;; same template. + (define_mode_iterator FP_ALL [TF DF SF (TD "TARGET_HARD_DFP") (DD "TARGET_HARD_DFP") + (SD "TARGET_HARD_DFP")]) + (define_mode_iterator FP [TF DF SF (TD "TARGET_HARD_DFP") (DD "TARGET_HARD_DFP")]) +-(define_mode_iterator FPALL [TF DF SF TD DD SD]) + (define_mode_iterator BFP [TF DF SF]) + (define_mode_iterator DFP [TD DD]) + (define_mode_iterator DFP_ALL [TD DD SD]) +@@ -417,7 +537,6 @@ + ;; This mode iterator allows the integer patterns to be defined from the + ;; same template. + (define_mode_iterator INT [(DI "TARGET_ZARCH") SI HI QI]) +-(define_mode_iterator INTALL [TI DI SI HI QI]) + (define_mode_iterator DINT [(TI "TARGET_ZARCH") DI SI HI QI]) + + ;; This iterator allows some 'ashift' and 'lshiftrt' pattern to be defined from +@@ -476,6 +595,14 @@ + ;; first and the second operand match for bfp modes. + (define_mode_attr f0 [(TF "0") (DF "0") (SF "0") (TD "f") (DD "f") (DD "f")]) + ++;; This attribute is used to merge the scalar vector instructions into ++;; the FP patterns. For non-supported modes (all but DF) it expands ++;; to constraints which are supposed to be matched by an earlier ++;; variant. ++(define_mode_attr v0 [(TF "0") (DF "v") (SF "0") (TD "0") (DD "0") (DD "0") (TI "0") (DI "v") (SI "0")]) ++(define_mode_attr vf [(TF "f") (DF "v") (SF "f") (TD "f") (DD "f") (DD "f") (TI "f") (DI "v") (SI "f")]) ++(define_mode_attr vd [(TF "d") (DF "v") (SF "d") (TD "d") (DD "d") (DD "d") (TI "d") (DI "v") (SI "d")]) ++ + ;; This attribute is used in the operand list of the instruction to have an + ;; additional operand for the dfp instructions. + (define_mode_attr op1 [(TF "") (DF "") (SF "") +@@ -584,6 +711,19 @@ + ;; In place of GET_MODE_BITSIZE (mode) + (define_mode_attr bitsize [(DI "64") (SI "32") (HI "16") (QI "8")]) + ++ ++ ++; Condition code modes generated by vector fp comparisons. These will ++; be used also in single element mode. ++(define_mode_iterator VFCMP [CCVEQ CCVFH CCVFHE]) ++; Used with VFCMP to expand part of the mnemonic ++; For fp we have a mismatch: eq in the insn name - e in asm ++(define_mode_attr asm_fcmp [(CCVEQ "e") (CCVFH "h") (CCVFHE "he")]) ++(define_mode_attr insn_cmp [(CCVEQ "eq") (CCVH "h") (CCVHU "hl") (CCVFH "h") (CCVFHE "he")]) ++ ++ ++(include "vector.md") ++ + ;; + ;;- Compare instructions. + ;; +@@ -1091,6 +1231,15 @@ + [(set_attr "op_type" "RRE,RXE") + (set_attr "type" "fsimp")]) + ++; wfcedbs, wfchdbs, wfchedbs ++(define_insn "*vec_cmpdf_cconly" ++ [(set (reg:VFCMP CC_REGNUM) ++ (compare:VFCMP (match_operand:DF 0 "register_operand" "v") ++ (match_operand:DF 1 "register_operand" "v"))) ++ (clobber (match_scratch:V2DI 2 "=v"))] ++ "TARGET_VX && TARGET_HARD_FLOAT" ++ "wfcdbs\t%v2,%v0,%v1" ++ [(set_attr "op_type" "VRR")]) + + ; Compare and Branch instructions + +@@ -1216,17 +1365,27 @@ + ; movti instruction pattern(s). + ; + ++; FIXME: More constants are possible by enabling jxx, jyy constraints ++; for TImode (use double-int for the calculations) + (define_insn "movti" +- [(set (match_operand:TI 0 "nonimmediate_operand" "=d,QS,d,o") +- (match_operand:TI 1 "general_operand" "QS,d,dPRT,d"))] ++ [(set (match_operand:TI 0 "nonimmediate_operand" "=d,QS,v, v, v,v,d, v,QR, d,o") ++ (match_operand:TI 1 "general_operand" "QS, d,v,j00,jm1,d,v,QR, v,dPRT,d"))] + "TARGET_ZARCH" + "@ + lmg\t%0,%N0,%S1 + stmg\t%1,%N1,%S0 ++ vlr\t%v0,%v1 ++ vzero\t%v0 ++ vone\t%v0 ++ vlvgp\t%v0,%1,%N1 ++ # ++ vl\t%v0,%1 ++ vst\t%v1,%0 + # + #" +- [(set_attr "op_type" "RSY,RSY,*,*") +- (set_attr "type" "lm,stm,*,*")]) ++ [(set_attr "op_type" "RSY,RSY,VRR,VRI,VRI,VRR,*,VRX,VRX,*,*") ++ (set_attr "type" "lm,stm,*,*,*,*,*,*,*,*,*") ++ (set_attr "cpu_facility" "*,*,vec,vec,vec,vec,vec,vec,vec,*,*")]) + + (define_split + [(set (match_operand:TI 0 "nonimmediate_operand" "") +@@ -1256,10 +1415,14 @@ + operands[5] = operand_subword (operands[1], 0, 0, TImode); + }) + ++; Use part of the TImode target reg to perform the address ++; calculation. If the TImode value is supposed to be copied into a VR ++; this splitter is not necessary. + (define_split + [(set (match_operand:TI 0 "register_operand" "") + (match_operand:TI 1 "memory_operand" ""))] + "TARGET_ZARCH && reload_completed ++ && !VECTOR_REG_P (operands[0]) + && !s_operand (operands[1], VOIDmode)" + [(set (match_dup 0) (match_dup 1))] + { +@@ -1270,6 +1433,25 @@ + }) + + ++; Split a VR -> GPR TImode move into 2 vector load GR from VR element. ++; For the higher order bits we do simply a DImode move while the ++; second part is done via vec extract. Both will end up as vlgvg. ++(define_split ++ [(set (match_operand:TI 0 "register_operand" "") ++ (match_operand:TI 1 "register_operand" ""))] ++ "TARGET_VX && reload_completed ++ && GENERAL_REG_P (operands[0]) ++ && VECTOR_REG_P (operands[1])" ++ [(set (match_dup 2) (match_dup 4)) ++ (set (match_dup 3) (unspec:DI [(match_dup 5) (const_int 1)] ++ UNSPEC_VEC_EXTRACT))] ++{ ++ operands[2] = operand_subword (operands[0], 0, 0, TImode); ++ operands[3] = operand_subword (operands[0], 1, 0, TImode); ++ operands[4] = gen_rtx_REG (DImode, REGNO (operands[1])); ++ operands[5] = gen_rtx_REG (V2DImode, REGNO (operands[1])); ++}) ++ + ; + ; Patterns used for secondary reloads + ; +@@ -1278,40 +1460,20 @@ + ; Unfortunately there is no such variant for QI, TI and FP mode moves. + ; These patterns are also used for unaligned SI and DI accesses. + +-(define_expand "reload_tomem_z10" +- [(parallel [(match_operand:INTALL 0 "memory_operand" "") +- (match_operand:INTALL 1 "register_operand" "=d") +- (match_operand:P 2 "register_operand" "=&a")])] +- "TARGET_Z10" +-{ +- s390_reload_symref_address (operands[1], operands[0], operands[2], 1); +- DONE; +-}) +- +-(define_expand "reload_toreg_z10" +- [(parallel [(match_operand:INTALL 0 "register_operand" "=d") +- (match_operand:INTALL 1 "memory_operand" "") +- (match_operand:P 2 "register_operand" "=a")])] +- "TARGET_Z10" +-{ +- s390_reload_symref_address (operands[0], operands[1], operands[2], 0); +- DONE; +-}) +- +-(define_expand "reload_tomem_z10" +- [(parallel [(match_operand:FPALL 0 "memory_operand" "") +- (match_operand:FPALL 1 "register_operand" "=d") +- (match_operand:P 2 "register_operand" "=&a")])] ++(define_expand "reload_tomem_z10" ++ [(parallel [(match_operand:ALL 0 "memory_operand" "") ++ (match_operand:ALL 1 "register_operand" "=d") ++ (match_operand:P 2 "register_operand" "=&a")])] + "TARGET_Z10" + { + s390_reload_symref_address (operands[1], operands[0], operands[2], 1); + DONE; + }) + +-(define_expand "reload_toreg_z10" +- [(parallel [(match_operand:FPALL 0 "register_operand" "=d") +- (match_operand:FPALL 1 "memory_operand" "") +- (match_operand:P 2 "register_operand" "=a")])] ++(define_expand "reload_toreg_z10" ++ [(parallel [(match_operand:ALL 0 "register_operand" "=d") ++ (match_operand:ALL 1 "memory_operand" "") ++ (match_operand:P 2 "register_operand" "=a")])] + "TARGET_Z10" + { + s390_reload_symref_address (operands[0], operands[1], operands[2], 0); +@@ -1340,9 +1502,16 @@ + DONE; + }) + +-; Handles assessing a non-offsetable memory address ++; Not all the indirect memory access instructions support the full ++; format (long disp + index + base). So whenever a move from/to such ++; an address is required and the instruction cannot deal with it we do ++; a load address into a scratch register first and use this as the new ++; base register. ++; This in particular is used for: ++; - non-offsetable memory accesses for multiword moves ++; - full vector reg moves with long displacements + +-(define_expand "reload_nonoffmem_in" ++(define_expand "reload_la_in" + [(parallel [(match_operand 0 "register_operand" "") + (match_operand 1 "" "") + (match_operand:P 2 "register_operand" "=&a")])] +@@ -1355,7 +1524,7 @@ + DONE; + }) + +-(define_expand "reload_nonoffmem_out" ++(define_expand "reload_la_out" + [(parallel [(match_operand 0 "" "") + (match_operand 1 "register_operand" "") + (match_operand:P 2 "register_operand" "=&a")])] +@@ -1408,11 +1577,9 @@ + + (define_insn "*movdi_64" + [(set (match_operand:DI 0 "nonimmediate_operand" +- "=d,d,d,d,d,d,d,d,f,d,d,d,d,d, +- RT,!*f,!*f,!*f,!R,!T,b,Q,d,t,Q,t") ++ "=d, d, d, d, d, d, d, d,f,d,d,d,d, d,RT,!*f,!*f,!*f,!R,!T,b,Q,d,t,Q,t,v,v,v,d, v,QR") + (match_operand:DI 1 "general_operand" +- "K,N0HD0,N1HD0,N2HD0,N3HD0,Os,N0SD0,N1SD0,d,f,L,b,d,RT, +- d,*f,R,T,*f,*f,d,K,t,d,t,Q"))] ++ " K,N0HD0,N1HD0,N2HD0,N3HD0,Os,N0SD0,N1SD0,d,f,L,b,d,RT, d, *f, R, T,*f,*f,d,K,t,d,t,Q,K,v,d,v,QR, v"))] + "TARGET_ZARCH" + "@ + lghi\t%0,%h1 +@@ -1440,15 +1607,21 @@ + # + # + stam\t%1,%N1,%S0 +- lam\t%0,%N0,%S1" ++ lam\t%0,%N0,%S1 ++ vleig\t%v0,%h1,0 ++ vlr\t%v0,%v1 ++ vlvgg\t%v0,%1,0 ++ vlgvg\t%0,%v1,0 ++ vleg\t%v0,%1,0 ++ vsteg\t%v1,%0,0" + [(set_attr "op_type" "RI,RI,RI,RI,RI,RIL,RIL,RIL,RRE,RRE,RXY,RIL,RRE,RXY, +- RXY,RR,RX,RXY,RX,RXY,RIL,SIL,*,*,RS,RS") ++ RXY,RR,RX,RXY,RX,RXY,RIL,SIL,*,*,RS,RS,VRI,VRR,VRS,VRS,VRX,VRX") + (set_attr "type" "*,*,*,*,*,*,*,*,floaddf,floaddf,la,larl,lr,load,store, +- floaddf,floaddf,floaddf,fstoredf,fstoredf,larl,*,*,*, +- *,*") ++ floaddf,floaddf,floaddf,fstoredf,fstoredf,larl,*,*,*,*, ++ *,*,*,*,*,*,*") + (set_attr "cpu_facility" "*,*,*,*,*,extimm,extimm,extimm,dfp,dfp,longdisp, + z10,*,*,*,*,*,longdisp,*,longdisp, +- z10,z10,*,*,*,*") ++ z10,z10,*,*,*,*,vec,vec,vec,vec,vec,vec") + (set_attr "z10prop" "z10_fwd_A1, + z10_fwd_E1, + z10_fwd_E1, +@@ -1474,7 +1647,7 @@ + *, + *, + *, +- *") ++ *,*,*,*,*,*,*") + ]) + + (define_split +@@ -1666,9 +1839,9 @@ + + (define_insn "*movsi_zarch" + [(set (match_operand:SI 0 "nonimmediate_operand" +- "=d,d,d,d,d,d,d,d,d,R,T,!*f,!*f,!*f,!R,!T,d,t,Q,b,Q,t") ++ "=d, d, d, d,d,d,d,d,d,R,T,!*f,!*f,!*f,!*f,!*f,!R,!T,d,t,Q,b,Q,t,v,v,v,d, v,QR") + (match_operand:SI 1 "general_operand" +- "K,N0HS0,N1HS0,Os,L,b,d,R,T,d,d,*f,R,T,*f,*f,t,d,t,d,K,Q"))] ++ " K,N0HS0,N1HS0,Os,L,b,d,R,T,d,d, *f, *f, R, R, T,*f,*f,t,d,t,d,K,Q,K,v,d,v,QR, v"))] + "TARGET_ZARCH" + "@ + lhi\t%0,%h1 +@@ -1682,7 +1855,9 @@ + ly\t%0,%1 + st\t%1,%0 + sty\t%1,%0 ++ lder\t%0,%1 + ler\t%0,%1 ++ lde\t%0,%1 + le\t%0,%1 + ley\t%0,%1 + ste\t%1,%0 +@@ -1692,9 +1867,15 @@ + stam\t%1,%1,%S0 + strl\t%1,%0 + mvhi\t%0,%1 +- lam\t%0,%0,%S1" ++ lam\t%0,%0,%S1 ++ vleif\t%v0,%h1,0 ++ vlr\t%v0,%v1 ++ vlvgf\t%v0,%1,0 ++ vlgvf\t%0,%v1,0 ++ vlef\t%v0,%1,0 ++ vstef\t%v1,%0,0" + [(set_attr "op_type" "RI,RI,RI,RIL,RXY,RIL,RR,RX,RXY,RX,RXY, +- RR,RX,RXY,RX,RXY,RRE,RRE,RS,RIL,SIL,RS") ++ RRE,RR,RXE,RX,RXY,RX,RXY,RRE,RRE,RS,RIL,SIL,RS,VRI,VRR,VRS,VRS,VRX,VRX") + (set_attr "type" "*, + *, + *, +@@ -1709,6 +1890,8 @@ + floadsf, + floadsf, + floadsf, ++ floadsf, ++ floadsf, + fstoresf, + fstoresf, + *, +@@ -1716,9 +1899,9 @@ + *, + larl, + *, +- *") ++ *,*,*,*,*,*,*") + (set_attr "cpu_facility" "*,*,*,extimm,longdisp,z10,*,*,longdisp,*,longdisp, +- *,*,longdisp,*,longdisp,*,*,*,z10,z10,*") ++ vec,*,vec,*,longdisp,*,longdisp,*,*,*,z10,z10,*,vec,vec,vec,vec,vec,vec") + (set_attr "z10prop" "z10_fwd_A1, + z10_fwd_E1, + z10_fwd_E1, +@@ -1735,42 +1918,38 @@ + *, + *, + *, ++ *, ++ *, + z10_super_E1, + z10_super, + *, + z10_rec, + z10_super, +- *")]) ++ *,*,*,*,*,*,*")]) + + (define_insn "*movsi_esa" +- [(set (match_operand:SI 0 "nonimmediate_operand" "=d,d,d,R,!*f,!*f,!R,d,t,Q,t") +- (match_operand:SI 1 "general_operand" "K,d,R,d,*f,R,*f,t,d,t,Q"))] ++ [(set (match_operand:SI 0 "nonimmediate_operand" "=d,d,d,R,!*f,!*f,!*f,!*f,!R,d,t,Q,t") ++ (match_operand:SI 1 "general_operand" "K,d,R,d, *f, *f, R, R,*f,t,d,t,Q"))] + "!TARGET_ZARCH" + "@ + lhi\t%0,%h1 + lr\t%0,%1 + l\t%0,%1 + st\t%1,%0 ++ lder\t%0,%1 + ler\t%0,%1 ++ lde\t%0,%1 + le\t%0,%1 + ste\t%1,%0 + ear\t%0,%1 + sar\t%0,%1 + stam\t%1,%1,%S0 + lam\t%0,%0,%S1" +- [(set_attr "op_type" "RI,RR,RX,RX,RR,RX,RX,RRE,RRE,RS,RS") +- (set_attr "type" "*,lr,load,store,floadsf,floadsf,fstoresf,*,*,*,*") +- (set_attr "z10prop" "z10_fwd_A1, +- z10_fr_E1, +- z10_fwd_A3, +- z10_rec, +- *, +- *, +- *, +- z10_super_E1, +- z10_super, +- *, +- *") ++ [(set_attr "op_type" "RI,RR,RX,RX,RRE,RR,RXE,RX,RX,RRE,RRE,RS,RS") ++ (set_attr "type" "*,lr,load,store,floadsf,floadsf,floadsf,floadsf,fstoresf,*,*,*,*") ++ (set_attr "z10prop" "z10_fwd_A1,z10_fr_E1,z10_fwd_A3,z10_rec,*,*,*,*,*,z10_super_E1, ++ z10_super,*,*") ++ (set_attr "cpu_facility" "*,*,*,*,vec,*,vec,*,*,*,*,*,*") + ]) + + (define_peephole2 +@@ -1880,8 +2059,8 @@ + }) + + (define_insn "*movhi" +- [(set (match_operand:HI 0 "nonimmediate_operand" "=d,d,d,d,d,R,T,b,Q") +- (match_operand:HI 1 "general_operand" " d,n,R,T,b,d,d,d,K"))] ++ [(set (match_operand:HI 0 "nonimmediate_operand" "=d,d,d,d,d,R,T,b,Q,v,v,v,d, v,QR") ++ (match_operand:HI 1 "general_operand" " d,n,R,T,b,d,d,d,K,K,v,d,v,QR, v"))] + "" + "@ + lr\t%0,%1 +@@ -1892,10 +2071,16 @@ + sth\t%1,%0 + sthy\t%1,%0 + sthrl\t%1,%0 +- mvhhi\t%0,%1" +- [(set_attr "op_type" "RR,RI,RX,RXY,RIL,RX,RXY,RIL,SIL") +- (set_attr "type" "lr,*,*,*,larl,store,store,store,*") +- (set_attr "cpu_facility" "*,*,*,*,z10,*,*,z10,z10") ++ mvhhi\t%0,%1 ++ vleih\t%v0,%h1,0 ++ vlr\t%v0,%v1 ++ vlvgh\t%v0,%1,0 ++ vlgvh\t%0,%v1,0 ++ vleh\t%v0,%1,0 ++ vsteh\t%v1,%0,0" ++ [(set_attr "op_type" "RR,RI,RX,RXY,RIL,RX,RXY,RIL,SIL,VRI,VRR,VRS,VRS,VRX,VRX") ++ (set_attr "type" "lr,*,*,*,larl,store,store,store,*,*,*,*,*,*,*") ++ (set_attr "cpu_facility" "*,*,*,*,z10,*,*,z10,z10,vec,vec,vec,vec,vec,vec") + (set_attr "z10prop" "z10_fr_E1, + z10_fwd_A1, + z10_super_E1, +@@ -1904,7 +2089,7 @@ + z10_rec, + z10_rec, + z10_rec, +- z10_super")]) ++ z10_super,*,*,*,*,*,*")]) + + (define_peephole2 + [(set (match_operand:HI 0 "register_operand" "") +@@ -1939,8 +2124,8 @@ + }) + + (define_insn "*movqi" +- [(set (match_operand:QI 0 "nonimmediate_operand" "=d,d,d,d,R,T,Q,S,?Q") +- (match_operand:QI 1 "general_operand" " d,n,R,T,d,d,n,n,?Q"))] ++ [(set (match_operand:QI 0 "nonimmediate_operand" "=d,d,d,d,R,T,Q,S,?Q,v,v,v,d, v,QR") ++ (match_operand:QI 1 "general_operand" " d,n,R,T,d,d,n,n,?Q,K,v,d,v,QR, v"))] + "" + "@ + lr\t%0,%1 +@@ -1951,9 +2136,16 @@ + stcy\t%1,%0 + mvi\t%S0,%b1 + mviy\t%S0,%b1 +- #" +- [(set_attr "op_type" "RR,RI,RX,RXY,RX,RXY,SI,SIY,SS") +- (set_attr "type" "lr,*,*,*,store,store,store,store,*") ++ # ++ vleib\t%v0,%b1,0 ++ vlr\t%v0,%v1 ++ vlvgb\t%v0,%1,0 ++ vlgvb\t%0,%v1,0 ++ vleb\t%v0,%1,0 ++ vsteb\t%v1,%0,0" ++ [(set_attr "op_type" "RR,RI,RX,RXY,RX,RXY,SI,SIY,SS,VRI,VRR,VRS,VRS,VRX,VRX") ++ (set_attr "type" "lr,*,*,*,store,store,store,store,*,*,*,*,*,*,*") ++ (set_attr "cpu_facility" "*,*,*,*,*,*,*,*,*,vec,vec,vec,vec,vec,vec") + (set_attr "z10prop" "z10_fr_E1, + z10_fwd_A1, + z10_super_E1, +@@ -1962,7 +2154,7 @@ + z10_rec, + z10_super, + z10_super, +- *")]) ++ *,*,*,*,*,*,*")]) + + (define_peephole2 + [(set (match_operand:QI 0 "nonimmediate_operand" "") +@@ -2094,7 +2286,7 @@ + [(set (match_operand:TD_TF 0 "register_operand" "") + (match_operand:TD_TF 1 "memory_operand" ""))] + "TARGET_ZARCH && reload_completed +- && !FP_REG_P (operands[0]) ++ && GENERAL_REG_P (operands[0]) + && !s_operand (operands[1], VOIDmode)" + [(set (match_dup 0) (match_dup 1))] + { +@@ -2150,9 +2342,9 @@ + + (define_insn "*mov_64dfp" + [(set (match_operand:DD_DF 0 "nonimmediate_operand" +- "=f,f,f,d,f,f,R,T,d,d, d,RT") ++ "=f,f,f,d,f,f,R,T,d,d,d, d,b,RT,v,v,d,v,QR") + (match_operand:DD_DF 1 "general_operand" +- " G,f,d,f,R,T,f,f,G,d,RT, d"))] ++ " G,f,d,f,R,T,f,f,G,d,b,RT,d, d,v,d,v,QR,v"))] + "TARGET_DFP" + "@ + lzdr\t%0 +@@ -2165,17 +2357,24 @@ + stdy\t%1,%0 + lghi\t%0,0 + lgr\t%0,%1 ++ lgrl\t%0,%1 + lg\t%0,%1 +- stg\t%1,%0" +- [(set_attr "op_type" "RRE,RR,RRE,RRE,RX,RXY,RX,RXY,RI,RRE,RXY,RXY") ++ stgrl\t%1,%0 ++ stg\t%1,%0 ++ vlr\t%v0,%v1 ++ vlvgg\t%v0,%1,0 ++ vlgvg\t%0,%v1,0 ++ vleg\t%0,%1,0 ++ vsteg\t%1,%0,0" ++ [(set_attr "op_type" "RRE,RR,RRE,RRE,RX,RXY,RX,RXY,RI,RRE,RIL,RXY,RIL,RXY,VRR,VRS,VRS,VRX,VRX") + (set_attr "type" "fsimpdf,floaddf,floaddf,floaddf,floaddf,floaddf, +- fstoredf,fstoredf,*,lr,load,store") +- (set_attr "z10prop" "*,*,*,*,*,*,*,*,z10_fwd_A1,z10_fr_E1,z10_fwd_A3,z10_rec") +- (set_attr "cpu_facility" "z196,*,*,*,*,*,*,*,*,*,*,*")]) ++ fstoredf,fstoredf,*,lr,load,load,store,store,*,*,*,load,store") ++ (set_attr "z10prop" "*,*,*,*,*,*,*,*,z10_fwd_A1,z10_fr_E1,z10_fwd_A3,z10_fwd_A3,z10_rec,z10_rec,*,*,*,*,*") ++ (set_attr "cpu_facility" "z196,*,*,*,*,*,*,*,*,*,z10,*,z10,*,vec,vec,vec,vec,vec")]) + + (define_insn "*mov_64" +- [(set (match_operand:DD_DF 0 "nonimmediate_operand" "=f,f,f,f,R,T,d,d, d,RT") +- (match_operand:DD_DF 1 "general_operand" " G,f,R,T,f,f,G,d,RT, d"))] ++ [(set (match_operand:DD_DF 0 "nonimmediate_operand" "=f,f,f,f,R,T,d,d,d, d,b,RT,v,v,QR") ++ (match_operand:DD_DF 1 "general_operand" " G,f,R,T,f,f,G,d,b,RT,d, d,v,QR,v"))] + "TARGET_ZARCH" + "@ + lzdr\t%0 +@@ -2186,13 +2385,18 @@ + stdy\t%1,%0 + lghi\t%0,0 + lgr\t%0,%1 ++ lgrl\t%0,%1 + lg\t%0,%1 +- stg\t%1,%0" +- [(set_attr "op_type" "RRE,RR,RX,RXY,RX,RXY,RI,RRE,RXY,RXY") ++ stgrl\t%1,%0 ++ stg\t%1,%0 ++ vlr\t%v0,%v1 ++ vleg\t%v0,%1,0 ++ vsteg\t%v1,%0,0" ++ [(set_attr "op_type" "RRE,RR,RX,RXY,RX,RXY,RI,RRE,RIL,RXY,RIL,RXY,VRR,VRX,VRX") + (set_attr "type" "fsimpdf,fload,fload,fload, +- fstore,fstore,*,lr,load,store") +- (set_attr "z10prop" "*,*,*,*,*,*,z10_fwd_A1,z10_fr_E1,z10_fwd_A3,z10_rec") +- (set_attr "cpu_facility" "z196,*,*,*,*,*,*,*,*,*")]) ++ fstore,fstore,*,lr,load,load,store,store,*,load,store") ++ (set_attr "z10prop" "*,*,*,*,*,*,z10_fwd_A1,z10_fr_E1,z10_fwd_A3,z10_fwd_A3,z10_rec,z10_rec,*,*,*") ++ (set_attr "cpu_facility" "z196,*,*,*,*,*,*,*,z10,*,z10,*,vec,vec,vec")]) + + (define_insn "*mov_31" + [(set (match_operand:DD_DF 0 "nonimmediate_operand" +@@ -2265,28 +2469,38 @@ + + (define_insn "mov" + [(set (match_operand:SD_SF 0 "nonimmediate_operand" +- "=f,f,f,f,R,T,d,d,d,d,R,T") ++ "=f,f,f,f,f,f,R,T,d,d,d,d,d,b,R,T,v,v,v,d,v,QR") + (match_operand:SD_SF 1 "general_operand" +- " G,f,R,T,f,f,G,d,R,T,d,d"))] ++ " G,f,f,R,R,T,f,f,G,d,b,R,T,d,d,d,v,G,d,v,QR,v"))] + "" + "@ + lzer\t%0 ++ lder\t%0,%1 + ler\t%0,%1 ++ lde\t%0,%1 + le\t%0,%1 + ley\t%0,%1 + ste\t%1,%0 + stey\t%1,%0 + lhi\t%0,0 + lr\t%0,%1 ++ lrl\t%0,%1 + l\t%0,%1 + ly\t%0,%1 ++ strl\t%1,%0 + st\t%1,%0 +- sty\t%1,%0" +- [(set_attr "op_type" "RRE,RR,RX,RXY,RX,RXY,RI,RR,RX,RXY,RX,RXY") +- (set_attr "type" "fsimpsf,fload,fload,fload, +- fstore,fstore,*,lr,load,load,store,store") +- (set_attr "z10prop" "*,*,*,*,*,*,z10_fwd_A1,z10_fr_E1,z10_fwd_A3,z10_fwd_A3,z10_rec,z10_rec") +- (set_attr "cpu_facility" "z196,*,*,*,*,*,*,*,*,*,*,*")]) ++ sty\t%1,%0 ++ vlr\t%v0,%v1 ++ vleif\t%v0,0 ++ vlvgf\t%v0,%1,0 ++ vlgvf\t%0,%v1,0 ++ vleg\t%0,%1,0 ++ vsteg\t%1,%0,0" ++ [(set_attr "op_type" "RRE,RRE,RR,RXE,RX,RXY,RX,RXY,RI,RR,RIL,RX,RXY,RIL,RX,RXY,VRR,VRI,VRS,VRS,VRX,VRX") ++ (set_attr "type" "fsimpsf,fsimpsf,fload,fload,fload,fload, ++ fstore,fstore,*,lr,load,load,load,store,store,store,*,*,*,*,load,store") ++ (set_attr "z10prop" "*,*,*,*,*,*,*,*,z10_fwd_A1,z10_fr_E1,z10_fr_E1,z10_fwd_A3,z10_fwd_A3,z10_rec,z10_rec,z10_rec,*,*,*,*,*,*") ++ (set_attr "cpu_facility" "z196,vec,*,vec,*,*,*,*,*,*,z10,*,*,z10,*,*,vec,vec,vec,vec,vec,vec")]) + + ; + ; movcc instruction pattern +@@ -2577,6 +2791,22 @@ + ; + + (define_expand "strlen" ++ [(match_operand:P 0 "register_operand" "") ; result ++ (match_operand:BLK 1 "memory_operand" "") ; input string ++ (match_operand:SI 2 "immediate_operand" "") ; search character ++ (match_operand:SI 3 "immediate_operand" "")] ; known alignment ++ "" ++{ ++ if (!TARGET_VX || operands[2] != const0_rtx) ++ emit_insn (gen_strlen_srst (operands[0], operands[1], ++ operands[2], operands[3])); ++ else ++ s390_expand_vec_strlen (operands[0], operands[1], operands[3]); ++ ++ DONE; ++}) ++ ++(define_expand "strlen_srst" + [(set (reg:SI 0) (match_operand:SI 2 "immediate_operand" "")) + (parallel + [(set (match_dup 4) +@@ -2674,8 +2904,16 @@ + (clobber (reg:CC CC_REGNUM))])] + "" + { +- rtx addr1 = gen_reg_rtx (Pmode); +- rtx addr2 = gen_reg_rtx (Pmode); ++ rtx addr1, addr2; ++ ++ if (TARGET_VX && optimize_function_for_speed_p (cfun)) ++ { ++ s390_expand_vec_movstr (operands[0], operands[1], operands[2]); ++ DONE; ++ } ++ ++ addr1 = gen_reg_rtx (Pmode); ++ addr2 = gen_reg_rtx (Pmode); + + emit_move_insn (addr1, force_operand (XEXP (operands[1], 0), NULL_RTX)); + emit_move_insn (addr2, force_operand (XEXP (operands[2], 0), NULL_RTX)); +@@ -2886,8 +3124,12 @@ + operands[2] = GEN_INT (S390_TDC_INFINITY); + }) + ++; This extracts CC into a GPR properly shifted. The actual IPM ++; instruction will be issued by reload. The constraint of operand 1 ++; forces reload to use a GPR. So reload will issue a movcc insn for ++; copying CC into a GPR first. + (define_insn_and_split "*cc_to_int" +- [(set (match_operand:SI 0 "register_operand" "=d") ++ [(set (match_operand:SI 0 "nonimmediate_operand" "=d") + (unspec:SI [(match_operand 1 "register_operand" "0")] + UNSPEC_CC_TO_INT))] + "operands != NULL" +@@ -4223,14 +4465,27 @@ + + ; fixuns_trunc(tf|df|sf|td|dd)(di|si)2 instruction patterns. + ++(define_insn "*fixuns_truncdfdi2_z13" ++ [(set (match_operand:DI 0 "register_operand" "=d,v") ++ (unsigned_fix:DI (match_operand:DF 1 "register_operand" "f,v"))) ++ (unspec:DI [(match_operand:DI 2 "immediate_operand" "K,K")] UNSPEC_ROUND) ++ (clobber (reg:CC CC_REGNUM))] ++ "TARGET_VX && TARGET_HARD_FLOAT" ++ "@ ++ clgdbr\t%0,%h2,%1,0 ++ wclgdb\t%v0,%v1,0,%h2" ++ [(set_attr "op_type" "RRF,VRR") ++ (set_attr "type" "ftoi")]) ++ + ; clfebr, clfdbr, clfxbr, clgebr, clgdbr, clgxbr + ; clfdtr, clfxtr, clgdtr, clgxtr + (define_insn "*fixuns_trunc2_z196" +- [(set (match_operand:GPR 0 "register_operand" "=r") +- (unsigned_fix:GPR (match_operand:FP 1 "register_operand" "f"))) +- (unspec:GPR [(match_operand:GPR 2 "immediate_operand" "K")] UNSPEC_ROUND) ++ [(set (match_operand:GPR 0 "register_operand" "=d") ++ (unsigned_fix:GPR (match_operand:FP 1 "register_operand" "f"))) ++ (unspec:GPR [(match_operand:GPR 2 "immediate_operand" "K")] UNSPEC_ROUND) + (clobber (reg:CC CC_REGNUM))] +- "TARGET_Z196" ++ "TARGET_Z196 && TARGET_HARD_FLOAT ++ && (!TARGET_VX || mode != DImode || mode != DFmode)" + "clr\t%0,%h2,%1,0" + [(set_attr "op_type" "RRF") + (set_attr "type" "ftoi")]) +@@ -4245,18 +4500,37 @@ + DONE; + }) + ++(define_insn "*fix_truncdfdi2_bfp_z13" ++ [(set (match_operand:DI 0 "register_operand" "=d,v") ++ (fix:DI (match_operand:DF 1 "register_operand" "f,v"))) ++ (unspec:DI [(match_operand:DI 2 "immediate_operand" "K,K")] UNSPEC_ROUND) ++ (clobber (reg:CC CC_REGNUM))] ++ "TARGET_VX && TARGET_HARD_FLOAT" ++ "@ ++ cgdbr\t%0,%h2,%1 ++ wcgdb\t%v0,%v1,0,%h2" ++ [(set_attr "op_type" "RRE,VRR") ++ (set_attr "type" "ftoi")]) ++ + ; cgxbr, cgdbr, cgebr, cfxbr, cfdbr, cfebr +-(define_insn "fix_trunc2_bfp" +- [(set (match_operand:GPR 0 "register_operand" "=d") +- (fix:GPR (match_operand:BFP 1 "register_operand" "f"))) +- (unspec:GPR [(match_operand:GPR 2 "immediate_operand" "K")] UNSPEC_ROUND) ++(define_insn "*fix_trunc2_bfp" ++ [(set (match_operand:GPR 0 "register_operand" "=d") ++ (fix:GPR (match_operand:BFP 1 "register_operand" "f"))) ++ (unspec:GPR [(match_operand:GPR 2 "immediate_operand" "K")] UNSPEC_ROUND) + (clobber (reg:CC CC_REGNUM))] +- "TARGET_HARD_FLOAT" ++ "TARGET_HARD_FLOAT ++ && (!TARGET_VX || mode != DImode || mode != DFmode)" + "cbr\t%0,%h2,%1" + [(set_attr "op_type" "RRE") + (set_attr "type" "ftoi")]) + +- ++(define_expand "fix_trunc2_bfp" ++ [(parallel ++ [(set (match_operand:GPR 0 "register_operand" "=d") ++ (fix:GPR (match_operand:BFP 1 "register_operand" "f"))) ++ (unspec:GPR [(match_operand:GPR 2 "immediate_operand" "K")] UNSPEC_ROUND) ++ (clobber (reg:CC CC_REGNUM))])] ++ "TARGET_HARD_FLOAT") + ; + ; fix_trunc(td|dd)di2 instruction pattern(s). + ; +@@ -4303,12 +4577,15 @@ + + ; cxgbr, cdgbr, cegbr, cxgtr, cdgtr + (define_insn "floatdi2" +- [(set (match_operand:FP 0 "register_operand" "=f") +- (float:FP (match_operand:DI 1 "register_operand" "d")))] ++ [(set (match_operand:FP 0 "register_operand" "=f,") ++ (float:FP (match_operand:DI 1 "register_operand" "d,")))] + "TARGET_ZARCH && TARGET_HARD_FLOAT" +- "cgr\t%0,%1" +- [(set_attr "op_type" "RRE") +- (set_attr "type" "itof" )]) ++ "@ ++ cgr\t%0,%1 ++ wcdgb\t%v0,%v1,0,0" ++ [(set_attr "op_type" "RRE,VRR") ++ (set_attr "type" "itof" ) ++ (set_attr "cpu_facility" "*,vec")]) + + ; cxfbr, cdfbr, cefbr + (define_insn "floatsi2" +@@ -4332,27 +4609,47 @@ + ; floatuns(si|di)(tf|df|sf|td|dd)2 instruction pattern(s). + ; + ++(define_insn "*floatunsdidf2_z13" ++ [(set (match_operand:DF 0 "register_operand" "=f,v") ++ (unsigned_float:DF (match_operand:DI 1 "register_operand" "d,v")))] ++ "TARGET_VX && TARGET_HARD_FLOAT" ++ "@ ++ cdlgbr\t%0,0,%1,0 ++ wcdlgb\t%v0,%v1,0,0" ++ [(set_attr "op_type" "RRE,VRR") ++ (set_attr "type" "itofdf")]) ++ + ; cxlgbr, cdlgbr, celgbr, cxlgtr, cdlgtr + ; cxlfbr, cdlfbr, celfbr, cxlftr, cdlftr +-(define_insn "floatuns2" +- [(set (match_operand:FP 0 "register_operand" "=f") +- (unsigned_float:FP (match_operand:GPR 1 "register_operand" "d")))] +- "TARGET_Z196 && TARGET_HARD_FLOAT" ++(define_insn "*floatuns2" ++ [(set (match_operand:FP 0 "register_operand" "=f") ++ (unsigned_float:FP (match_operand:GPR 1 "register_operand" "d")))] ++ "TARGET_Z196 && TARGET_HARD_FLOAT ++ && (!TARGET_VX || mode != DFmode || mode != DImode)" + "clr\t%0,0,%1,0" + [(set_attr "op_type" "RRE") +- (set_attr "type" "itof" )]) ++ (set_attr "type" "itof")]) ++ ++(define_expand "floatuns2" ++ [(set (match_operand:FP 0 "register_operand" "") ++ (unsigned_float:FP (match_operand:GPR 1 "register_operand" "")))] ++ "TARGET_Z196 && TARGET_HARD_FLOAT") + + ; + ; truncdfsf2 instruction pattern(s). + ; + + (define_insn "truncdfsf2" +- [(set (match_operand:SF 0 "register_operand" "=f") +- (float_truncate:SF (match_operand:DF 1 "register_operand" "f")))] ++ [(set (match_operand:SF 0 "register_operand" "=f,v") ++ (float_truncate:SF (match_operand:DF 1 "register_operand" "f,v")))] + "TARGET_HARD_FLOAT" +- "ledbr\t%0,%1" +- [(set_attr "op_type" "RRE") +- (set_attr "type" "ftruncdf")]) ++ "@ ++ ledbr\t%0,%1 ++ wledb\t%v0,%v1,0,0" ; IEEE inexact exception not suppressed ++ ; According to BFP rounding mode ++ [(set_attr "op_type" "RRE,VRR") ++ (set_attr "type" "ftruncdf") ++ (set_attr "cpu_facility" "*,vec")]) + + ; + ; trunctf(df|sf)2 instruction pattern(s). +@@ -4393,17 +4690,35 @@ + ; extend(sf|df)(df|tf)2 instruction pattern(s). + ; + ++(define_insn "*extendsfdf2_z13" ++ [(set (match_operand:DF 0 "register_operand" "=f,f,v") ++ (float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "f,R,v")))] ++ "TARGET_VX && TARGET_HARD_FLOAT" ++ "@ ++ ldebr\t%0,%1 ++ ldeb\t%0,%1 ++ wldeb\t%v0,%v1" ++ [(set_attr "op_type" "RRE,RXE,VRR") ++ (set_attr "type" "fsimpdf, floaddf,fsimpdf")]) ++ + ; ldebr, ldeb, lxdbr, lxdb, lxebr, lxeb +-(define_insn "extend2" +- [(set (match_operand:BFP 0 "register_operand" "=f,f") ++(define_insn "*extend2" ++ [(set (match_operand:BFP 0 "register_operand" "=f,f") + (float_extend:BFP (match_operand:DSF 1 "nonimmediate_operand" "f,R")))] + "TARGET_HARD_FLOAT +- && GET_MODE_SIZE (mode) > GET_MODE_SIZE (mode)" ++ && GET_MODE_SIZE (mode) > GET_MODE_SIZE (mode) ++ && (!TARGET_VX || mode != DFmode || mode != SFmode)" + "@ + lbr\t%0,%1 + lb\t%0,%1" +- [(set_attr "op_type" "RRE,RXE") +- (set_attr "type" "fsimp, fload")]) ++ [(set_attr "op_type" "RRE,RXE") ++ (set_attr "type" "fsimp, fload")]) ++ ++(define_expand "extend2" ++ [(set (match_operand:BFP 0 "register_operand" "") ++ (float_extend:BFP (match_operand:DSF 1 "nonimmediate_operand" "")))] ++ "TARGET_HARD_FLOAT ++ && GET_MODE_SIZE (mode) > GET_MODE_SIZE (mode)") + + ; + ; extendddtd2 and extendsddd2 instruction pattern(s). +@@ -4616,10 +4931,29 @@ + ; addti3 instruction pattern(s). + ; + +-(define_insn_and_split "addti3" +- [(set (match_operand:TI 0 "register_operand" "=&d") ++(define_expand "addti3" ++ [(parallel ++ [(set (match_operand:TI 0 "register_operand" "") ++ (plus:TI (match_operand:TI 1 "nonimmediate_operand" "") ++ (match_operand:TI 2 "general_operand" "") ) ) ++ (clobber (reg:CC CC_REGNUM))])] ++ "TARGET_ZARCH" ++{ ++ /* For z13 we have vaq which doesn't set CC. */ ++ if (TARGET_VX) ++ { ++ emit_insn (gen_rtx_SET (VOIDmode, operands[0], ++ gen_rtx_PLUS (TImode, ++ copy_to_mode_reg (TImode, operands[1]), ++ copy_to_mode_reg (TImode, operands[2])))); ++ DONE; ++ } ++}) ++ ++(define_insn_and_split "*addti3" ++ [(set (match_operand:TI 0 "register_operand" "=&d") + (plus:TI (match_operand:TI 1 "nonimmediate_operand" "%0") +- (match_operand:TI 2 "general_operand" "do") ) ) ++ (match_operand:TI 2 "general_operand" "do") ) ) + (clobber (reg:CC CC_REGNUM))] + "TARGET_ZARCH" + "#" +@@ -4639,7 +4973,9 @@ + operands[5] = operand_subword (operands[2], 0, 0, TImode); + operands[6] = operand_subword (operands[0], 1, 0, TImode); + operands[7] = operand_subword (operands[1], 1, 0, TImode); +- operands[8] = operand_subword (operands[2], 1, 0, TImode);") ++ operands[8] = operand_subword (operands[2], 1, 0, TImode);" ++ [(set_attr "op_type" "*") ++ (set_attr "cpu_facility" "*")]) + + ; + ; adddi3 instruction pattern(s). +@@ -4976,17 +5312,20 @@ + ; + + ; axbr, adbr, aebr, axb, adb, aeb, adtr, axtr ++; FIXME: wfadb does not clobber cc + (define_insn "add3" +- [(set (match_operand:FP 0 "register_operand" "=f, f") +- (plus:FP (match_operand:FP 1 "nonimmediate_operand" "%,0") +- (match_operand:FP 2 "general_operand" " f,"))) ++ [(set (match_operand:FP 0 "register_operand" "=f, f,") ++ (plus:FP (match_operand:FP 1 "nonimmediate_operand" "%, 0,") ++ (match_operand:FP 2 "general_operand" "f,,"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_HARD_FLOAT" + "@ + ar\t%0,%2 +- ab\t%0,%2" +- [(set_attr "op_type" ",RXE") +- (set_attr "type" "fsimp")]) ++ ab\t%0,%2 ++ wfadb\t%v0,%v1,%v2" ++ [(set_attr "op_type" ",RXE,VRR") ++ (set_attr "type" "fsimp") ++ (set_attr "cpu_facility" "*,*,vec")]) + + ; axbr, adbr, aebr, axb, adb, aeb, adtr, axtr + (define_insn "*add3_cc" +@@ -5026,10 +5365,29 @@ + ; subti3 instruction pattern(s). + ; + +-(define_insn_and_split "subti3" +- [(set (match_operand:TI 0 "register_operand" "=&d") +- (minus:TI (match_operand:TI 1 "register_operand" "0") +- (match_operand:TI 2 "general_operand" "do") ) ) ++(define_expand "subti3" ++ [(parallel ++ [(set (match_operand:TI 0 "register_operand" "") ++ (minus:TI (match_operand:TI 1 "register_operand" "") ++ (match_operand:TI 2 "general_operand" "") ) ) ++ (clobber (reg:CC CC_REGNUM))])] ++ "TARGET_ZARCH" ++{ ++ /* For z13 we have vaq which doesn't set CC. */ ++ if (TARGET_VX) ++ { ++ emit_insn (gen_rtx_SET (VOIDmode, operands[0], ++ gen_rtx_MINUS (TImode, ++ operands[1], ++ copy_to_mode_reg (TImode, operands[2])))); ++ DONE; ++ } ++}) ++ ++(define_insn_and_split "*subti3" ++ [(set (match_operand:TI 0 "register_operand" "=&d") ++ (minus:TI (match_operand:TI 1 "register_operand" "0") ++ (match_operand:TI 2 "general_operand" "do") ) ) + (clobber (reg:CC CC_REGNUM))] + "TARGET_ZARCH" + "#" +@@ -5048,7 +5406,9 @@ + operands[5] = operand_subword (operands[2], 0, 0, TImode); + operands[6] = operand_subword (operands[0], 1, 0, TImode); + operands[7] = operand_subword (operands[1], 1, 0, TImode); +- operands[8] = operand_subword (operands[2], 1, 0, TImode);") ++ operands[8] = operand_subword (operands[2], 1, 0, TImode);" ++ [(set_attr "op_type" "*") ++ (set_attr "cpu_facility" "*")]) + + ; + ; subdi3 instruction pattern(s). +@@ -5327,16 +5687,18 @@ + + ; sxbr, sdbr, sebr, sdb, seb, sxtr, sdtr + (define_insn "sub3" +- [(set (match_operand:FP 0 "register_operand" "=f, f") +- (minus:FP (match_operand:FP 1 "register_operand" ",0") +- (match_operand:FP 2 "general_operand" "f,"))) ++ [(set (match_operand:FP 0 "register_operand" "=f, f,") ++ (minus:FP (match_operand:FP 1 "register_operand" ", 0,") ++ (match_operand:FP 2 "general_operand" "f,,"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_HARD_FLOAT" + "@ + sr\t%0,%2 +- sb\t%0,%2" +- [(set_attr "op_type" ",RXE") +- (set_attr "type" "fsimp")]) ++ sb\t%0,%2 ++ wfsdb\t%v0,%v1,%v2" ++ [(set_attr "op_type" ",RXE,VRR") ++ (set_attr "type" "fsimp") ++ (set_attr "cpu_facility" "*,*,vec")]) + + ; sxbr, sdbr, sebr, sdb, seb, sxtr, sdtr + (define_insn "*sub3_cc" +@@ -5742,41 +6104,47 @@ + + ; mxbr, mdbr, meebr, mxb, mxb, meeb, mdtr, mxtr + (define_insn "mul3" +- [(set (match_operand:FP 0 "register_operand" "=f,f") +- (mult:FP (match_operand:FP 1 "nonimmediate_operand" "%,0") +- (match_operand:FP 2 "general_operand" "f,")))] ++ [(set (match_operand:FP 0 "register_operand" "=f, f,") ++ (mult:FP (match_operand:FP 1 "nonimmediate_operand" "%, 0,") ++ (match_operand:FP 2 "general_operand" "f,,")))] + "TARGET_HARD_FLOAT" + "@ + mr\t%0,%2 +- mb\t%0,%2" +- [(set_attr "op_type" ",RXE") +- (set_attr "type" "fmul")]) ++ mb\t%0,%2 ++ wfmdb\t%v0,%v1,%v2" ++ [(set_attr "op_type" ",RXE,VRR") ++ (set_attr "type" "fmul") ++ (set_attr "cpu_facility" "*,*,vec")]) + + ; madbr, maebr, maxb, madb, maeb + (define_insn "fma4" +- [(set (match_operand:DSF 0 "register_operand" "=f,f") +- (fma:DSF (match_operand:DSF 1 "nonimmediate_operand" "%f,f") +- (match_operand:DSF 2 "nonimmediate_operand" "f,R") +- (match_operand:DSF 3 "register_operand" "0,0")))] ++ [(set (match_operand:DSF 0 "register_operand" "=f,f,") ++ (fma:DSF (match_operand:DSF 1 "nonimmediate_operand" "%f,f,") ++ (match_operand:DSF 2 "nonimmediate_operand" "f,R,") ++ (match_operand:DSF 3 "register_operand" "0,0,")))] + "TARGET_HARD_FLOAT" + "@ + mabr\t%0,%1,%2 +- mab\t%0,%1,%2" +- [(set_attr "op_type" "RRE,RXE") +- (set_attr "type" "fmadd")]) ++ mab\t%0,%1,%2 ++ wfmadb\t%v0,%v1,%v2,%v3" ++ [(set_attr "op_type" "RRE,RXE,VRR") ++ (set_attr "type" "fmadd") ++ (set_attr "cpu_facility" "*,*,vec")]) + + ; msxbr, msdbr, msebr, msxb, msdb, mseb + (define_insn "fms4" +- [(set (match_operand:DSF 0 "register_operand" "=f,f") +- (fma:DSF (match_operand:DSF 1 "nonimmediate_operand" "%f,f") +- (match_operand:DSF 2 "nonimmediate_operand" "f,R") +- (neg:DSF (match_operand:DSF 3 "register_operand" "0,0"))))] ++ [(set (match_operand:DSF 0 "register_operand" "=f,f,") ++ (fma:DSF (match_operand:DSF 1 "nonimmediate_operand" "%f,f,") ++ (match_operand:DSF 2 "nonimmediate_operand" "f,R,") ++ (neg:DSF (match_operand:DSF 3 "register_operand" "0,0,"))))] + "TARGET_HARD_FLOAT" + "@ + msbr\t%0,%1,%2 +- msb\t%0,%1,%2" +- [(set_attr "op_type" "RRE,RXE") +- (set_attr "type" "fmadd")]) ++ msb\t%0,%1,%2 ++ wfmsdb\t%v0,%v1,%v2,%v3" ++ [(set_attr "op_type" "RRE,RXE,VRR") ++ (set_attr "type" "fmadd") ++ (set_attr "cpu_facility" "*,*,vec")]) + + ;; + ;;- Divide and modulo instructions. +@@ -6202,15 +6570,17 @@ + + ; dxbr, ddbr, debr, dxb, ddb, deb, ddtr, dxtr + (define_insn "div3" +- [(set (match_operand:FP 0 "register_operand" "=f,f") +- (div:FP (match_operand:FP 1 "register_operand" ",0") +- (match_operand:FP 2 "general_operand" "f,")))] ++ [(set (match_operand:FP 0 "register_operand" "=f, f,") ++ (div:FP (match_operand:FP 1 "register_operand" ", 0,") ++ (match_operand:FP 2 "general_operand" "f,,")))] + "TARGET_HARD_FLOAT" + "@ + dr\t%0,%2 +- db\t%0,%2" +- [(set_attr "op_type" ",RXE") +- (set_attr "type" "fdiv")]) ++ db\t%0,%2 ++ wfddb\t%v0,%v1,%v2" ++ [(set_attr "op_type" ",RXE,VRR") ++ (set_attr "type" "fdiv") ++ (set_attr "cpu_facility" "*,*,vec")]) + + + ;; +@@ -7356,14 +7726,18 @@ + (set_attr "type" "fsimp")]) + + ; lcxbr, lcdbr, lcebr ++; FIXME: wflcdb does not clobber cc + (define_insn "*neg2" +- [(set (match_operand:BFP 0 "register_operand" "=f") +- (neg:BFP (match_operand:BFP 1 "register_operand" "f"))) ++ [(set (match_operand:BFP 0 "register_operand" "=f,") ++ (neg:BFP (match_operand:BFP 1 "register_operand" "f,"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_HARD_FLOAT" +- "lcbr\t%0,%1" +- [(set_attr "op_type" "RRE") +- (set_attr "type" "fsimp")]) ++ "@ ++ lcbr\t%0,%1 ++ wflcdb\t%0,%1" ++ [(set_attr "op_type" "RRE,VRR") ++ (set_attr "cpu_facility" "*,vec") ++ (set_attr "type" "fsimp,*")]) + + + ;; +@@ -7474,14 +7848,18 @@ + (set_attr "type" "fsimp")]) + + ; lpxbr, lpdbr, lpebr ++; FIXME: wflpdb does not clobber cc + (define_insn "*abs2" +- [(set (match_operand:BFP 0 "register_operand" "=f") +- (abs:BFP (match_operand:BFP 1 "register_operand" "f"))) ++ [(set (match_operand:BFP 0 "register_operand" "=f,") ++ (abs:BFP (match_operand:BFP 1 "register_operand" "f,"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_HARD_FLOAT" +- "lpbr\t%0,%1" +- [(set_attr "op_type" "RRE") +- (set_attr "type" "fsimp")]) ++ "@ ++ lpbr\t%0,%1 ++ wflpdb\t%0,%1" ++ [(set_attr "op_type" "RRE,VRR") ++ (set_attr "cpu_facility" "*,vec") ++ (set_attr "type" "fsimp,*")]) + + + ;; +@@ -7585,14 +7963,18 @@ + (set_attr "type" "fsimp")]) + + ; lnxbr, lndbr, lnebr ++; FIXME: wflndb does not clobber cc + (define_insn "*negabs2" +- [(set (match_operand:BFP 0 "register_operand" "=f") +- (neg:BFP (abs:BFP (match_operand:BFP 1 "register_operand" "f")))) ++ [(set (match_operand:BFP 0 "register_operand" "=f,") ++ (neg:BFP (abs:BFP (match_operand:BFP 1 "register_operand" "f,")))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_HARD_FLOAT" +- "lnbr\t%0,%1" +- [(set_attr "op_type" "RRE") +- (set_attr "type" "fsimp")]) ++ "@ ++ lnbr\t%0,%1 ++ wflndb\t%0,%1" ++ [(set_attr "op_type" "RRE,VRR") ++ (set_attr "cpu_facility" "*,vec") ++ (set_attr "type" "fsimp,*")]) + + ;; + ;;- Square root instructions. +@@ -7604,14 +7986,16 @@ + + ; sqxbr, sqdbr, sqebr, sqdb, sqeb + (define_insn "sqrt2" +- [(set (match_operand:BFP 0 "register_operand" "=f,f") +- (sqrt:BFP (match_operand:BFP 1 "general_operand" "f,")))] ++ [(set (match_operand:BFP 0 "register_operand" "=f, f,") ++ (sqrt:BFP (match_operand:BFP 1 "general_operand" "f,,")))] + "TARGET_HARD_FLOAT" + "@ + sqbr\t%0,%1 +- sqb\t%0,%1" +- [(set_attr "op_type" "RRE,RXE") +- (set_attr "type" "fsqrt")]) ++ sqb\t%0,%1 ++ wfsqdb\t%v0,%v1" ++ [(set_attr "op_type" "RRE,RXE,VRR") ++ (set_attr "type" "fsqrt") ++ (set_attr "cpu_facility" "*,*,vec")]) + + + ;; +@@ -10006,6 +10390,35 @@ + DONE; + }) + ++; Clobber VRs since they don't get restored ++(define_insn "tbegin_1_z13" ++ [(set (reg:CCRAW CC_REGNUM) ++ (unspec_volatile:CCRAW [(match_operand 0 "const_int_operand" "D")] ++ UNSPECV_TBEGIN)) ++ (set (match_operand:BLK 1 "memory_operand" "=Q") ++ (unspec_volatile:BLK [(match_dup 0)] UNSPECV_TBEGIN_TDB)) ++ (clobber (reg:TI 16)) (clobber (reg:TI 38)) ++ (clobber (reg:TI 17)) (clobber (reg:TI 39)) ++ (clobber (reg:TI 18)) (clobber (reg:TI 40)) ++ (clobber (reg:TI 19)) (clobber (reg:TI 41)) ++ (clobber (reg:TI 20)) (clobber (reg:TI 42)) ++ (clobber (reg:TI 21)) (clobber (reg:TI 43)) ++ (clobber (reg:TI 22)) (clobber (reg:TI 44)) ++ (clobber (reg:TI 23)) (clobber (reg:TI 45)) ++ (clobber (reg:TI 24)) (clobber (reg:TI 46)) ++ (clobber (reg:TI 25)) (clobber (reg:TI 47)) ++ (clobber (reg:TI 26)) (clobber (reg:TI 48)) ++ (clobber (reg:TI 27)) (clobber (reg:TI 49)) ++ (clobber (reg:TI 28)) (clobber (reg:TI 50)) ++ (clobber (reg:TI 29)) (clobber (reg:TI 51)) ++ (clobber (reg:TI 30)) (clobber (reg:TI 52)) ++ (clobber (reg:TI 31)) (clobber (reg:TI 53))] ++; CONST_OK_FOR_CONSTRAINT_P does not work with D constraint since D is ++; not supposed to be used for immediates (see genpreds.c). ++ "TARGET_VX && INTVAL (operands[0]) >= 0 && INTVAL (operands[0]) <= 0xffff" ++ "tbegin\t%1,%x0" ++ [(set_attr "op_type" "SIL")]) ++ + (define_insn "tbegin_1" + [(set (reg:CCRAW CC_REGNUM) + (unspec_volatile:CCRAW [(match_operand 0 "const_int_operand" "D")] +@@ -10141,3 +10554,30 @@ + "TARGET_HTM && INTVAL (operands[2]) < 16" + "ppa\t%0,%1,%2" + [(set_attr "op_type" "RRF")]) ++ ++ ++; Set and get floating point control register ++ ++(define_insn "sfpc" ++ [(unspec_volatile [(match_operand:SI 0 "register_operand" "d")] ++ UNSPECV_SFPC)] ++ "TARGET_HARD_FLOAT" ++ "sfpc\t%0") ++ ++(define_insn "efpc" ++ [(set (match_operand:SI 0 "register_operand" "=d") ++ (unspec_volatile:SI [(const_int 0)] UNSPECV_EFPC))] ++ "TARGET_HARD_FLOAT" ++ "efpc\t%0") ++ ++ ++; Load count to block boundary ++ ++(define_insn "lcbb" ++ [(set (match_operand:SI 0 "register_operand" "=d") ++ (unspec:SI [(match_operand 1 "address_operand" "ZQZR") ++ (match_operand:SI 2 "immediate_operand" "C")] UNSPEC_LCBB)) ++ (clobber (reg:CC CC_REGNUM))] ++ "TARGET_Z13" ++ "lcbb\t%0,%a1,%b2" ++ [(set_attr "op_type" "VRX")]) +--- gcc/config/s390/s390-modes.def 2013-08-14 13:55:12.000000000 +0200 ++++ gcc/config/s390/s390-modes.def 2016-05-11 17:12:39.000000000 +0200 +@@ -84,6 +84,23 @@ Requested mode -> Destination + CCS, CCU, CCT, CCSR, CCUR -> CCZ + CCA -> CCAP, CCAN + ++Vector comparison modes ++ ++CCVEQ EQ - - NE (VCEQ) ++CCVEQANY EQ EQ - NE (VCEQ) ++ ++CCVH GT - - LE (VCH) ++CCVHANY GT GT - LE (VCH) ++CCVHU GTU - - LEU (VCHL) ++CCVHUANY GTU GTU - LEU (VCHL) ++ ++CCVFH GT - - UNLE (VFCH) ++CCVFHANY GT GT - UNLE (VFCH) ++CCVFHE GE - - UNLT (VFCHE) ++CCVFHEANY GE GE - UNLT (VFCHE) ++ ++ ++ + + *** Comments *** + +@@ -152,6 +169,15 @@ The compare and swap instructions sets t + operands were equal/unequal. The CCZ1 mode ensures the result can be + effectively placed into a register. + ++ ++CCV* ++ ++The variants with and without ANY are generated by the same ++instructions and therefore are holding the same information. However, ++when generating a condition code mask they require checking different ++bits of CC. In that case the variants without ANY represent the ++results for *all* elements. ++ + CCRAW + + The cc mode generated by a non-compare instruction. The condition +@@ -181,3 +207,38 @@ CC_MODE (CCT1); + CC_MODE (CCT2); + CC_MODE (CCT3); + CC_MODE (CCRAW); ++ ++CC_MODE (CCVEQ); ++CC_MODE (CCVEQANY); ++ ++CC_MODE (CCVH); ++CC_MODE (CCVHANY); ++CC_MODE (CCVHU); ++CC_MODE (CCVHUANY); ++ ++CC_MODE (CCVFH); ++CC_MODE (CCVFHANY); ++CC_MODE (CCVFHE); ++CC_MODE (CCVFHEANY); ++ ++ ++/* Vector modes. */ ++ ++VECTOR_MODES (INT, 2); /* V2QI */ ++VECTOR_MODES (INT, 4); /* V4QI V2HI */ ++VECTOR_MODES (INT, 8); /* V8QI V4HI V2SI */ ++VECTOR_MODES (INT, 16); /* V16QI V8HI V4SI V2DI */ ++ ++VECTOR_MODE (FLOAT, SF, 2); /* V2SF */ ++VECTOR_MODE (FLOAT, SF, 4); /* V4SF */ ++VECTOR_MODE (FLOAT, DF, 2); /* V2DF */ ++ ++VECTOR_MODE (INT, QI, 1); /* V1QI */ ++VECTOR_MODE (INT, HI, 1); /* V1HI */ ++VECTOR_MODE (INT, SI, 1); /* V1SI */ ++VECTOR_MODE (INT, DI, 1); /* V1DI */ ++VECTOR_MODE (INT, TI, 1); /* V1TI */ ++ ++VECTOR_MODE (FLOAT, SF, 1); /* V1SF */ ++VECTOR_MODE (FLOAT, DF, 1); /* V1DF */ ++VECTOR_MODE (FLOAT, TF, 1); /* V1TF */ +--- gcc/config/s390/s390.opt 2015-06-18 16:33:05.000000000 +0200 ++++ gcc/config/s390/s390.opt 2016-05-11 17:33:59.000000000 +0200 +@@ -76,6 +76,9 @@ Enum(processor_type) String(z196) Value( + EnumValue + Enum(processor_type) String(zEC12) Value(PROCESSOR_2827_ZEC12) + ++EnumValue ++Enum(processor_type) String(z13) Value(PROCESSOR_2964_Z13) ++ + mbackchain + Target Report Mask(BACKCHAIN) + Maintain backchain pointer +@@ -118,6 +121,10 @@ mhtm + Target Report Mask(OPT_HTM) + Use hardware transactional execution instructions + ++mvx ++Target Report Mask(OPT_VX) ++Use hardware vector facility instructions and enable the vector ABI ++ + mpacked-stack + Target Report Mask(PACKED_STACK) + Use packed stack layout +@@ -146,6 +153,11 @@ mmvcle + Target Report Mask(MVCLE) + mvcle use + ++mzvector ++Target Report Mask(ZVECTOR) ++Enable the z vector language extension providing the context-sensitive ++vector macro and enable the Altivec-style builtins in vecintrin.h ++ + mwarn-dynamicstack + Target RejectNegative Var(s390_warn_dynamicstack_p) + Warn if a function uses alloca or creates an array with dynamic size +--- gcc/config/s390/s390-opts.h 2013-01-21 16:11:50.000000000 +0100 ++++ gcc/config/s390/s390-opts.h 2016-05-11 15:53:24.000000000 +0200 +@@ -35,6 +35,7 @@ enum processor_type + PROCESSOR_2097_Z10, + PROCESSOR_2817_Z196, + PROCESSOR_2827_ZEC12, ++ PROCESSOR_2964_Z13, + PROCESSOR_max + }; + +--- gcc/config/s390/s390-protos.h 2014-01-14 16:37:04.000000000 +0100 ++++ gcc/config/s390/s390-protos.h 2016-05-11 19:28:17.220349132 +0200 +@@ -41,6 +41,9 @@ extern void s390_set_has_landing_pad_p ( + extern bool s390_hard_regno_mode_ok (unsigned int, enum machine_mode); + extern bool s390_hard_regno_rename_ok (unsigned int, unsigned int); + extern int s390_class_max_nregs (enum reg_class, enum machine_mode); ++extern int s390_cannot_change_mode_class (enum machine_mode, enum machine_mode, ++ enum reg_class); ++extern bool s390_function_arg_vector (enum machine_mode, const_tree); + + #ifdef RTX_CODE + extern int s390_extra_constraint_str (rtx, int, const char *); +@@ -49,6 +52,9 @@ extern int s390_const_double_ok_for_cons + extern int s390_single_part (rtx, enum machine_mode, enum machine_mode, int); + extern unsigned HOST_WIDE_INT s390_extract_part (rtx, enum machine_mode, int); + extern bool s390_contiguous_bitmask_p (unsigned HOST_WIDE_INT, int, int *, int *); ++extern bool s390_const_vec_duplicate_p (rtx); ++extern bool s390_contiguous_bitmask_vector_p (rtx, int *, int *); ++extern bool s390_bytemask_vector_p (rtx, unsigned *); + extern bool s390_split_ok_p (rtx, rtx, enum machine_mode, int); + extern bool s390_overlap_p (rtx, rtx, HOST_WIDE_INT); + extern bool s390_offset_p (rtx, rtx, rtx); +@@ -81,6 +87,8 @@ extern void s390_load_address (rtx, rtx) + extern bool s390_expand_movmem (rtx, rtx, rtx); + extern void s390_expand_setmem (rtx, rtx, rtx); + extern bool s390_expand_cmpmem (rtx, rtx, rtx, rtx); ++extern void s390_expand_vec_strlen (rtx, rtx, rtx); ++extern void s390_expand_vec_movstr (rtx, rtx, rtx); + extern bool s390_expand_addcc (enum rtx_code, rtx, rtx, rtx, rtx, rtx); + extern bool s390_expand_insv (rtx, rtx, rtx, rtx); + extern void s390_expand_cs_hqi (enum machine_mode, rtx, rtx, rtx, +@@ -88,6 +96,10 @@ extern void s390_expand_cs_hqi (enum mac + extern void s390_expand_atomic (enum machine_mode, enum rtx_code, + rtx, rtx, rtx, bool); + extern void s390_expand_tbegin (rtx, rtx, rtx, bool); ++extern void s390_expand_vec_compare (rtx, enum rtx_code, rtx, rtx); ++extern void s390_expand_vec_compare_cc (rtx, enum rtx_code, rtx, rtx, bool); ++extern void s390_expand_vcond (rtx, rtx, rtx, enum rtx_code, rtx, rtx); ++extern void s390_expand_vec_init (rtx, rtx); + extern rtx s390_return_addr_rtx (int, rtx); + extern rtx s390_back_chain_rtx (void); + extern rtx s390_emit_call (rtx, rtx, rtx, rtx); +@@ -113,3 +125,10 @@ extern bool s390_extzv_shift_ok (int, in + extern void s390_asm_output_function_label (FILE *, const char *, tree); + + #endif /* RTX_CODE */ ++ ++/* s390-c.c routines */ ++extern void s390_cpu_cpp_builtins (struct cpp_reader *); ++extern void s390_register_target_pragmas (void); ++ ++/* Routines for s390-c.c */ ++extern bool s390_const_operand_ok (tree, int, int, tree); +--- gcc/config/s390/t-s390 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/config/s390/t-s390 2016-05-11 17:12:39.000000000 +0200 +@@ -0,0 +1,27 @@ ++# Copyright (C) 2015 Free Software Foundation, Inc. ++# ++# This file is part of GCC. ++# ++# GCC is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 3, or (at your option) ++# any later version. ++# ++# GCC is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with GCC; see the file COPYING3. If not see ++# . ++ ++TM_H += $(srcdir)/config/s390/s390-builtins.def ++TM_H += $(srcdir)/config/s390/s390-builtin-types.def ++ ++s390-c.o: $(srcdir)/config/s390/s390-c.c \ ++ $(srcdir)/config/s390/s390-protos.h $(CONFIG_H) $(SYSTEM_H) coretypes.h \ ++ $(TM_H) $(TREE_H) $(TM_P_H) $(FLAGS_H) $(C_COMMON_H) $(GGC_H) \ ++ $(TARGET_H) $(TARGET_DEF_H) $(CPPLIB_H) $(C_PRAGMA_H) ++ $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ ++ $(srcdir)/config/s390/s390-c.c +--- gcc/config/s390/vecintrin.h 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/config/s390/vecintrin.h 2016-05-11 18:10:53.000000000 +0200 +@@ -0,0 +1,277 @@ ++/* GNU compiler hardware transactional execution intrinsics ++ Copyright (C) 2015 Free Software Foundation, Inc. ++ Contributed by Andreas Krebbel (Andreas.Krebbel@de.ibm.com) ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify it under ++the terms of the GNU General Public License as published by the Free ++Software Foundation; either version 3, or (at your option) any later ++version. ++ ++GCC is distributed in the hope that it will be useful, but WITHOUT ANY ++WARRANTY; without even the implied warranty of MERCHANTABILITY or ++FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++. */ ++ ++#ifndef _VECINTRIN_H ++#define _VECINTRIN_H ++ ++#ifdef __VEC__ ++ ++#define __VFTCI_ZERO 1<<11 ++#define __VFTCI_ZERO_N 1<<10 ++#define __VFTCI_NORMAL 1<<9 ++#define __VFTCI_NORMAL_N 1<<8 ++#define __VFTCI_SUBNORMAL 1<<7 ++#define __VFTCI_SUBNORMAL_N 1<<6 ++#define __VFTCI_INF 1<<5 ++#define __VFTCI_INF_N 1<<4 ++#define __VFTCI_QNAN 1<<3 ++#define __VFTCI_QNAN_N 1<<2 ++#define __VFTCI_SNAN 1<<1 ++#define __VFTCI_SNAN_N 1<<0 ++ ++/* This also accepts a type for its parameter, so it is not enough ++ to #define vec_step to __builtin_vec_step. */ ++#define vec_step(x) __builtin_vec_step (* (__typeof__ (x) *) 0) ++ ++static inline int ++__lcbb(const void *ptr, int bndry) ++{ ++ int code; ++ switch (bndry) ++ { ++ case 64: code = 0; break; ++ case 128: code = 1; break; ++ case 256: code = 2; break; ++ case 512: code = 3; break; ++ case 1024: code = 4; break; ++ case 2048: code = 5; break; ++ case 4096: code = 6; break; ++ default: return 0; ++ } ++ return __builtin_s390_lcbb (ptr, code); ++} ++ ++#define vec_all_nle(X, Y) vec_all_nge ((Y), (X)) ++#define vec_all_nlt(X, Y) vec_all_ngt ((Y), (X)) ++#define vec_any_nle(X, Y) vec_any_nge ((Y), (X)) ++#define vec_any_nlt(X, Y) vec_any_ngt ((Y), (X)) ++#define vec_genmask __builtin_s390_vgbm ++#define vec_genmasks_8 __builtin_s390_vgmb ++#define vec_genmasks_16 __builtin_s390_vgmh ++#define vec_genmasks_32 __builtin_s390_vgmf ++#define vec_genmasks_64 __builtin_s390_vgmg ++#define vec_splat_u8 __builtin_s390_vec_splat_u8 ++#define vec_splat_s8 __builtin_s390_vec_splat_s8 ++#define vec_splat_u16 __builtin_s390_vec_splat_u16 ++#define vec_splat_s16 __builtin_s390_vec_splat_s16 ++#define vec_splat_u32 __builtin_s390_vec_splat_u32 ++#define vec_splat_s32 __builtin_s390_vec_splat_s32 ++#define vec_splat_u64 __builtin_s390_vec_splat_u64 ++#define vec_splat_s64 __builtin_s390_vec_splat_s64 ++#define vec_add_u128 __builtin_s390_vaq ++#define vec_addc_u128 __builtin_s390_vaccq ++#define vec_adde_u128 __builtin_s390_vacq ++#define vec_addec_u128 __builtin_s390_vacccq ++#define vec_checksum __builtin_s390_vcksm ++#define vec_gfmsum_128 __builtin_s390_vgfmg ++#define vec_gfmsum_accum_128 __builtin_s390_vgfmag ++#define vec_sub_u128 __builtin_s390_vsq ++#define vec_subc_u128 __builtin_s390_vscbiq ++#define vec_sube_u128 __builtin_s390_vsbiq ++#define vec_subec_u128 __builtin_s390_vsbcbiq ++#define vec_ceil(X) __builtin_s390_vfidb((X), 4, 6) ++#define vec_roundp(X) __builtin_s390_vfidb((X), 4, 6) ++#define vec_floor(X) __builtin_s390_vfidb((X), 4, 7) ++#define vec_roundm(X) __builtin_s390_vfidb((X), 4, 7) ++#define vec_trunc(X) __builtin_s390_vfidb((X), 4, 5) ++#define vec_roundz(X) __builtin_s390_vfidb((X), 4, 5) ++#define vec_roundc(X) __builtin_s390_vfidb((X), 4, 0) ++#define vec_round(X) __builtin_s390_vfidb((X), 4, 4) ++#define vec_madd __builtin_s390_vfmadb ++#define vec_msub __builtin_s390_vfmsdb ++ ++static inline int ++vec_all_nan (__vector double a) ++{ ++ int cc; ++ __builtin_s390_vftcidb (a, ++ __VFTCI_QNAN ++ | __VFTCI_QNAN_N ++ | __VFTCI_SNAN ++ | __VFTCI_SNAN_N, &cc); ++ return cc == 0 ? 1 : 0; ++} ++ ++static inline int ++vec_all_numeric (__vector double a) ++{ ++ int cc; ++ __builtin_s390_vftcidb (a, ++ __VFTCI_NORMAL ++ | __VFTCI_NORMAL_N ++ | __VFTCI_SUBNORMAL ++ | __VFTCI_SUBNORMAL_N, &cc); ++ return cc == 0 ? 1 : 0; ++} ++ ++static inline int ++vec_any_nan (__vector double a) ++{ ++ int cc; ++ __builtin_s390_vftcidb (a, ++ __VFTCI_QNAN ++ | __VFTCI_QNAN_N ++ | __VFTCI_SNAN ++ | __VFTCI_SNAN_N, &cc); ++ return cc != 3 ? 1 : 0; ++} ++ ++static inline int ++vec_any_numeric (__vector double a) ++{ ++ int cc; ++ __builtin_s390_vftcidb (a, ++ __VFTCI_NORMAL ++ | __VFTCI_NORMAL_N ++ | __VFTCI_SUBNORMAL ++ | __VFTCI_SUBNORMAL_N, &cc); ++ return cc != 3 ? 1 : 0; ++} ++#define vec_gather_element __builtin_s390_vec_gather_element ++#define vec_xld2 __builtin_s390_vec_xld2 ++#define vec_xlw4 __builtin_s390_vec_xlw4 ++#define vec_splats __builtin_s390_vec_splats ++#define vec_insert __builtin_s390_vec_insert ++#define vec_promote __builtin_s390_vec_promote ++#define vec_extract __builtin_s390_vec_extract ++#define vec_insert_and_zero __builtin_s390_vec_insert_and_zero ++#define vec_load_bndry __builtin_s390_vec_load_bndry ++#define vec_load_pair __builtin_s390_vec_load_pair ++#define vec_load_len __builtin_s390_vec_load_len ++#define vec_mergeh __builtin_s390_vec_mergeh ++#define vec_mergel __builtin_s390_vec_mergel ++#define vec_pack __builtin_s390_vec_pack ++#define vec_packs __builtin_s390_vec_packs ++#define vec_packs_cc __builtin_s390_vec_packs_cc ++#define vec_packsu __builtin_s390_vec_packsu ++#define vec_packsu_cc __builtin_s390_vec_packsu_cc ++#define vec_perm __builtin_s390_vec_perm ++#define vec_permi __builtin_s390_vec_permi ++#define vec_splat __builtin_s390_vec_splat ++#define vec_scatter_element __builtin_s390_vec_scatter_element ++#define vec_sel __builtin_s390_vec_sel ++#define vec_extend_s64 __builtin_s390_vec_extend_s64 ++#define vec_xstd2 __builtin_s390_vec_xstd2 ++#define vec_xstw4 __builtin_s390_vec_xstw4 ++#define vec_store_len __builtin_s390_vec_store_len ++#define vec_unpackh __builtin_s390_vec_unpackh ++#define vec_unpackl __builtin_s390_vec_unpackl ++#define vec_addc __builtin_s390_vec_addc ++#define vec_and __builtin_s390_vec_and ++#define vec_andc __builtin_s390_vec_andc ++#define vec_avg __builtin_s390_vec_avg ++#define vec_all_eq __builtin_s390_vec_all_eq ++#define vec_all_ne __builtin_s390_vec_all_ne ++#define vec_all_ge __builtin_s390_vec_all_ge ++#define vec_all_gt __builtin_s390_vec_all_gt ++#define vec_all_le __builtin_s390_vec_all_le ++#define vec_all_lt __builtin_s390_vec_all_lt ++#define vec_any_eq __builtin_s390_vec_any_eq ++#define vec_any_ne __builtin_s390_vec_any_ne ++#define vec_any_ge __builtin_s390_vec_any_ge ++#define vec_any_gt __builtin_s390_vec_any_gt ++#define vec_any_le __builtin_s390_vec_any_le ++#define vec_any_lt __builtin_s390_vec_any_lt ++#define vec_cmpeq __builtin_s390_vec_cmpeq ++#define vec_cmpge __builtin_s390_vec_cmpge ++#define vec_cmpgt __builtin_s390_vec_cmpgt ++#define vec_cmple __builtin_s390_vec_cmple ++#define vec_cmplt __builtin_s390_vec_cmplt ++#define vec_cntlz __builtin_s390_vec_cntlz ++#define vec_cnttz __builtin_s390_vec_cnttz ++#define vec_xor __builtin_s390_vec_xor ++#define vec_gfmsum __builtin_s390_vec_gfmsum ++#define vec_gfmsum_accum __builtin_s390_vec_gfmsum_accum ++#define vec_abs __builtin_s390_vec_abs ++#define vec_max __builtin_s390_vec_max ++#define vec_min __builtin_s390_vec_min ++#define vec_mladd __builtin_s390_vec_mladd ++#define vec_mhadd __builtin_s390_vec_mhadd ++#define vec_meadd __builtin_s390_vec_meadd ++#define vec_moadd __builtin_s390_vec_moadd ++#define vec_mulh __builtin_s390_vec_mulh ++#define vec_mule __builtin_s390_vec_mule ++#define vec_mulo __builtin_s390_vec_mulo ++#define vec_nor __builtin_s390_vec_nor ++#define vec_or __builtin_s390_vec_or ++#define vec_popcnt __builtin_s390_vec_popcnt ++#define vec_rl __builtin_s390_vec_rl ++#define vec_rli __builtin_s390_vec_rli ++#define vec_rl_mask __builtin_s390_vec_rl_mask ++#define vec_sll __builtin_s390_vec_sll ++#define vec_slb __builtin_s390_vec_slb ++#define vec_sld __builtin_s390_vec_sld ++#define vec_sldw __builtin_s390_vec_sldw ++#define vec_sral __builtin_s390_vec_sral ++#define vec_srab __builtin_s390_vec_srab ++#define vec_srl __builtin_s390_vec_srl ++#define vec_srb __builtin_s390_vec_srb ++#define vec_subc __builtin_s390_vec_subc ++#define vec_sum2 __builtin_s390_vec_sum2 ++#define vec_sum_u128 __builtin_s390_vec_sum_u128 ++#define vec_sum4 __builtin_s390_vec_sum4 ++#define vec_test_mask __builtin_s390_vec_test_mask ++#define vec_find_any_eq_idx __builtin_s390_vec_find_any_eq_idx ++#define vec_find_any_ne_idx __builtin_s390_vec_find_any_ne_idx ++#define vec_find_any_eq_or_0_idx __builtin_s390_vec_find_any_eq_or_0_idx ++#define vec_find_any_ne_or_0_idx __builtin_s390_vec_find_any_ne_or_0_idx ++#define vec_find_any_eq __builtin_s390_vec_find_any_eq ++#define vec_find_any_ne __builtin_s390_vec_find_any_ne ++#define vec_find_any_eq_idx_cc __builtin_s390_vec_find_any_eq_idx_cc ++#define vec_find_any_ne_idx_cc __builtin_s390_vec_find_any_ne_idx_cc ++#define vec_find_any_eq_or_0_idx_cc __builtin_s390_vec_find_any_eq_or_0_idx_cc ++#define vec_find_any_ne_or_0_idx_cc __builtin_s390_vec_find_any_ne_or_0_idx_cc ++#define vec_find_any_eq_cc __builtin_s390_vec_find_any_eq_cc ++#define vec_find_any_ne_cc __builtin_s390_vec_find_any_ne_cc ++#define vec_cmpeq_idx __builtin_s390_vec_cmpeq_idx ++#define vec_cmpeq_or_0_idx __builtin_s390_vec_cmpeq_or_0_idx ++#define vec_cmpeq_idx_cc __builtin_s390_vec_cmpeq_idx_cc ++#define vec_cmpeq_or_0_idx_cc __builtin_s390_vec_cmpeq_or_0_idx_cc ++#define vec_cmpne_idx __builtin_s390_vec_cmpne_idx ++#define vec_cmpne_or_0_idx __builtin_s390_vec_cmpne_or_0_idx ++#define vec_cmpne_idx_cc __builtin_s390_vec_cmpne_idx_cc ++#define vec_cmpne_or_0_idx_cc __builtin_s390_vec_cmpne_or_0_idx_cc ++#define vec_cp_until_zero __builtin_s390_vec_cp_until_zero ++#define vec_cp_until_zero_cc __builtin_s390_vec_cp_until_zero_cc ++#define vec_cmprg_idx __builtin_s390_vec_cmprg_idx ++#define vec_cmpnrg_idx __builtin_s390_vec_cmpnrg_idx ++#define vec_cmprg_or_0_idx __builtin_s390_vec_cmprg_or_0_idx ++#define vec_cmpnrg_or_0_idx __builtin_s390_vec_cmpnrg_or_0_idx ++#define vec_cmprg __builtin_s390_vec_cmprg ++#define vec_cmpnrg __builtin_s390_vec_cmpnrg ++#define vec_cmprg_idx_cc __builtin_s390_vec_cmprg_idx_cc ++#define vec_cmpnrg_idx_cc __builtin_s390_vec_cmpnrg_idx_cc ++#define vec_cmprg_or_0_idx_cc __builtin_s390_vec_cmprg_or_0_idx_cc ++#define vec_cmpnrg_or_0_idx_cc __builtin_s390_vec_cmpnrg_or_0_idx_cc ++#define vec_cmprg_cc __builtin_s390_vec_cmprg_cc ++#define vec_cmpnrg_cc __builtin_s390_vec_cmpnrg_cc ++#define vec_all_nge __builtin_s390_vec_all_nge ++#define vec_all_ngt __builtin_s390_vec_all_ngt ++#define vec_any_nge __builtin_s390_vec_any_nge ++#define vec_any_ngt __builtin_s390_vec_any_ngt ++#define vec_ctd __builtin_s390_vec_ctd ++#define vec_ctd_s64 __builtin_s390_vec_ctd_s64 ++#define vec_ctd_u64 __builtin_s390_vec_ctd_u64 ++#define vec_ctsl __builtin_s390_vec_ctsl ++#define vec_ctul __builtin_s390_vec_ctul ++#define vec_ld2f __builtin_s390_vec_ld2f ++#define vec_st2f __builtin_s390_vec_st2f ++#endif /* __VEC__ */ ++#endif /* _VECINTRIN_H */ +--- gcc/config/s390/vector.md 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/config/s390/vector.md 2016-05-11 18:11:04.000000000 +0200 +@@ -0,0 +1,1229 @@ ++;;- Instruction patterns for the System z vector facility ++;; Copyright (C) 2015 Free Software Foundation, Inc. ++;; Contributed by Andreas Krebbel (Andreas.Krebbel@de.ibm.com) ++ ++;; This file is part of GCC. ++ ++;; GCC is free software; you can redistribute it and/or modify it under ++;; the terms of the GNU General Public License as published by the Free ++;; Software Foundation; either version 3, or (at your option) any later ++;; version. ++ ++;; GCC is distributed in the hope that it will be useful, but WITHOUT ANY ++;; WARRANTY; without even the implied warranty of MERCHANTABILITY or ++;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++;; for more details. ++ ++;; You should have received a copy of the GNU General Public License ++;; along with GCC; see the file COPYING3. If not see ++;; . ++ ++; All vector modes supported in a vector register ++(define_mode_iterator V ++ [V1QI V2QI V4QI V8QI V16QI V1HI V2HI V4HI V8HI V1SI V2SI V4SI V1DI V2DI V1SF ++ V2SF V4SF V1DF V2DF]) ++(define_mode_iterator VT ++ [V1QI V2QI V4QI V8QI V16QI V1HI V2HI V4HI V8HI V1SI V2SI V4SI V1DI V2DI V1SF ++ V2SF V4SF V1DF V2DF V1TF V1TI TI]) ++ ++; All vector modes directly supported by the hardware having full vector reg size ++; V_HW2 is duplicate of V_HW for having two iterators expanding ++; independently e.g. vcond ++(define_mode_iterator V_HW [V16QI V8HI V4SI V2DI V2DF]) ++(define_mode_iterator V_HW2 [V16QI V8HI V4SI V2DI V2DF]) ++; Including TI for instructions that support it (va, vn, ...) ++(define_mode_iterator VT_HW [V16QI V8HI V4SI V2DI V2DF V1TI TI]) ++ ++; All full size integer vector modes supported in a vector register + TImode ++(define_mode_iterator VIT_HW [V16QI V8HI V4SI V2DI V1TI TI]) ++(define_mode_iterator VI_HW [V16QI V8HI V4SI V2DI]) ++(define_mode_iterator VI_HW_QHS [V16QI V8HI V4SI]) ++(define_mode_iterator VI_HW_HS [V8HI V4SI]) ++(define_mode_iterator VI_HW_QH [V16QI V8HI]) ++ ++; All integer vector modes supported in a vector register + TImode ++(define_mode_iterator VIT [V1QI V2QI V4QI V8QI V16QI V1HI V2HI V4HI V8HI V1SI V2SI V4SI V1DI V2DI V1TI TI]) ++(define_mode_iterator VI [V1QI V2QI V4QI V8QI V16QI V1HI V2HI V4HI V8HI V1SI V2SI V4SI V1DI V2DI]) ++(define_mode_iterator VI_QHS [V1QI V2QI V4QI V8QI V16QI V1HI V2HI V4HI V8HI V1SI V2SI V4SI]) ++ ++(define_mode_iterator V_8 [V1QI]) ++(define_mode_iterator V_16 [V2QI V1HI]) ++(define_mode_iterator V_32 [V4QI V2HI V1SI V1SF]) ++(define_mode_iterator V_64 [V8QI V4HI V2SI V2SF V1DI V1DF]) ++(define_mode_iterator V_128 [V16QI V8HI V4SI V4SF V2DI V2DF V1TI V1TF]) ++ ++; A blank for vector modes and a * for TImode. This is used to hide ++; the TImode expander name in case it is defined already. See addti3 ++; for an example. ++(define_mode_attr ti* [(V1QI "") (V2QI "") (V4QI "") (V8QI "") (V16QI "") ++ (V1HI "") (V2HI "") (V4HI "") (V8HI "") ++ (V1SI "") (V2SI "") (V4SI "") ++ (V1DI "") (V2DI "") ++ (V1TI "*") (TI "*")]) ++ ++; The element type of the vector. ++(define_mode_attr non_vec[(V1QI "QI") (V2QI "QI") (V4QI "QI") (V8QI "QI") (V16QI "QI") ++ (V1HI "HI") (V2HI "HI") (V4HI "HI") (V8HI "HI") ++ (V1SI "SI") (V2SI "SI") (V4SI "SI") ++ (V1DI "DI") (V2DI "DI") ++ (V1TI "TI") ++ (V1SF "SF") (V2SF "SF") (V4SF "SF") ++ (V1DF "DF") (V2DF "DF") ++ (V1TF "TF")]) ++ ++; The instruction suffix ++(define_mode_attr bhfgq[(V1QI "b") (V2QI "b") (V4QI "b") (V8QI "b") (V16QI "b") ++ (V1HI "h") (V2HI "h") (V4HI "h") (V8HI "h") ++ (V1SI "f") (V2SI "f") (V4SI "f") ++ (V1DI "g") (V2DI "g") ++ (V1TI "q") (TI "q") ++ (V1SF "f") (V2SF "f") (V4SF "f") ++ (V1DF "g") (V2DF "g") ++ (V1TF "q")]) ++ ++; This is for vmalhw. It gets an 'w' attached to avoid confusion with ++; multiply and add logical high vmalh. ++(define_mode_attr w [(V1QI "") (V2QI "") (V4QI "") (V8QI "") (V16QI "") ++ (V1HI "w") (V2HI "w") (V4HI "w") (V8HI "w") ++ (V1SI "") (V2SI "") (V4SI "") ++ (V1DI "") (V2DI "")]) ++ ++; Resulting mode of a vector comparison. For floating point modes an ++; integer vector mode with the same element size is picked. ++(define_mode_attr tointvec [(V1QI "V1QI") (V2QI "V2QI") (V4QI "V4QI") (V8QI "V8QI") (V16QI "V16QI") ++ (V1HI "V1HI") (V2HI "V2HI") (V4HI "V4HI") (V8HI "V8HI") ++ (V1SI "V1SI") (V2SI "V2SI") (V4SI "V4SI") ++ (V1DI "V1DI") (V2DI "V2DI") ++ (V1TI "V1TI") ++ (V1SF "V1SI") (V2SF "V2SI") (V4SF "V4SI") ++ (V1DF "V1DI") (V2DF "V2DI") ++ (V1TF "V1TI")]) ++ ++; Vector with doubled element size. ++(define_mode_attr vec_double [(V1QI "V1HI") (V2QI "V1HI") (V4QI "V2HI") (V8QI "V4HI") (V16QI "V8HI") ++ (V1HI "V1SI") (V2HI "V1SI") (V4HI "V2SI") (V8HI "V4SI") ++ (V1SI "V1DI") (V2SI "V1DI") (V4SI "V2DI") ++ (V1DI "V1TI") (V2DI "V1TI") ++ (V1SF "V1DF") (V2SF "V1DF") (V4SF "V2DF")]) ++ ++; Vector with half the element size. ++(define_mode_attr vec_half [(V1HI "V2QI") (V2HI "V4QI") (V4HI "V8QI") (V8HI "V16QI") ++ (V1SI "V2HI") (V2SI "V4HI") (V4SI "V8HI") ++ (V1DI "V2SI") (V2DI "V4SI") ++ (V1TI "V2DI") ++ (V1DF "V2SF") (V2DF "V4SF") ++ (V1TF "V1DF")]) ++ ++; The comparisons not setting CC iterate over the rtx code. ++(define_code_iterator VFCMP_HW_OP [eq gt ge]) ++(define_code_attr asm_fcmp_op [(eq "e") (gt "h") (ge "he")]) ++ ++ ++ ++; Comparison operators on int and fp compares which are directly ++; supported by the HW. ++(define_code_iterator VICMP_HW_OP [eq gt gtu]) ++; For int insn_cmp_op can be used in the insn name as well as in the asm output. ++(define_code_attr insn_cmp_op [(eq "eq") (gt "h") (gtu "hl") (ge "he")]) ++ ++; Flags for vector string instructions (vfae all 4, vfee only ZS and CS, vstrc all 4) ++(define_constants ++ [(VSTRING_FLAG_IN 8) ; invert result ++ (VSTRING_FLAG_RT 4) ; result type ++ (VSTRING_FLAG_ZS 2) ; zero search ++ (VSTRING_FLAG_CS 1)]) ; condition code set ++ ++(include "vx-builtins.md") ++ ++; Full HW vector size moves ++(define_insn "mov" ++ [(set (match_operand:V_128 0 "nonimmediate_operand" "=v, v,QR, v, v, v, v, v,v,d") ++ (match_operand:V_128 1 "general_operand" " v,QR, v,j00,jm1,jyy,jxx,jKK,d,v"))] ++ "TARGET_VX" ++ "@ ++ vlr\t%v0,%v1 ++ vl\t%v0,%1 ++ vst\t%v1,%0 ++ vzero\t%v0 ++ vone\t%v0 ++ vgbm\t%v0,%t1 ++ vgm\t%v0,%s1,%e1 ++ vrepi\t%v0,%h1 ++ vlvgp\t%v0,%1,%N1 ++ #" ++ [(set_attr "op_type" "VRR,VRX,VRX,VRI,VRI,VRI,VRI,VRI,VRR,*")]) ++ ++(define_split ++ [(set (match_operand:V_128 0 "register_operand" "") ++ (match_operand:V_128 1 "register_operand" ""))] ++ "TARGET_VX && GENERAL_REG_P (operands[0]) && VECTOR_REG_P (operands[1])" ++ [(set (match_dup 2) ++ (unspec:DI [(subreg:V2DI (match_dup 1) 0) ++ (const_int 0)] UNSPEC_VEC_EXTRACT)) ++ (set (match_dup 3) ++ (unspec:DI [(subreg:V2DI (match_dup 1) 0) ++ (const_int 1)] UNSPEC_VEC_EXTRACT))] ++{ ++ operands[2] = operand_subword (operands[0], 0, 0, mode); ++ operands[3] = operand_subword (operands[0], 1, 0, mode); ++}) ++ ++; Moves for smaller vector modes. ++ ++; In these patterns only the vlr, vone, and vzero instructions write ++; VR bytes outside the mode. This should be ok since we disallow ++; formerly bigger modes being accessed with smaller modes via ++; subreg. Note: The vone, vzero instructions could easily be replaced ++; with vlei which would only access the bytes belonging to the mode. ++; However, this would probably be slower. ++ ++(define_insn "mov" ++ [(set (match_operand:V_8 0 "nonimmediate_operand" "=v,v,d, v,QR, v, v, v, v,d, Q, S, Q, S, d, d,d,d,d,R,T") ++ (match_operand:V_8 1 "general_operand" " v,d,v,QR, v,j00,jm1,jyy,jxx,d,j00,j00,jm1,jm1,j00,jm1,R,T,b,d,d"))] ++ "" ++ "@ ++ vlr\t%v0,%v1 ++ vlvgb\t%v0,%1,0 ++ vlgvb\t%0,%v1,0 ++ vleb\t%v0,%1,0 ++ vsteb\t%v1,%0,0 ++ vzero\t%v0 ++ vone\t%v0 ++ vgbm\t%v0,%t1 ++ vgm\t%v0,%s1,%e1 ++ lr\t%0,%1 ++ mvi\t%0,0 ++ mviy\t%0,0 ++ mvi\t%0,-1 ++ mviy\t%0,-1 ++ lhi\t%0,0 ++ lhi\t%0,-1 ++ lh\t%0,%1 ++ lhy\t%0,%1 ++ lhrl\t%0,%1 ++ stc\t%1,%0 ++ stcy\t%1,%0" ++ [(set_attr "op_type" "VRR,VRS,VRS,VRX,VRX,VRI,VRI,VRI,VRI,RR,SI,SIY,SI,SIY,RI,RI,RX,RXY,RIL,RX,RXY")]) ++ ++(define_insn "mov" ++ [(set (match_operand:V_16 0 "nonimmediate_operand" "=v,v,d, v,QR, v, v, v, v,d, Q, Q, d, d,d,d,d,R,T,b") ++ (match_operand:V_16 1 "general_operand" " v,d,v,QR, v,j00,jm1,jyy,jxx,d,j00,jm1,j00,jm1,R,T,b,d,d,d"))] ++ "" ++ "@ ++ vlr\t%v0,%v1 ++ vlvgh\t%v0,%1,0 ++ vlgvh\t%0,%v1,0 ++ vleh\t%v0,%1,0 ++ vsteh\t%v1,%0,0 ++ vzero\t%v0 ++ vone\t%v0 ++ vgbm\t%v0,%t1 ++ vgm\t%v0,%s1,%e1 ++ lr\t%0,%1 ++ mvhhi\t%0,0 ++ mvhhi\t%0,-1 ++ lhi\t%0,0 ++ lhi\t%0,-1 ++ lh\t%0,%1 ++ lhy\t%0,%1 ++ lhrl\t%0,%1 ++ sth\t%1,%0 ++ sthy\t%1,%0 ++ sthrl\t%1,%0" ++ [(set_attr "op_type" "VRR,VRS,VRS,VRX,VRX,VRI,VRI,VRI,VRI,RR,SIL,SIL,RI,RI,RX,RXY,RIL,RX,RXY,RIL")]) ++ ++(define_insn "mov" ++ [(set (match_operand:V_32 0 "nonimmediate_operand" "=f,f,f,R,T,v,v,d, v,QR, f, v, v, v, v, Q, Q, d, d,d,d,d,d,R,T,b") ++ (match_operand:V_32 1 "general_operand" " f,R,T,f,f,v,d,v,QR, v,j00,j00,jm1,jyy,jxx,j00,jm1,j00,jm1,b,d,R,T,d,d,d"))] ++ "TARGET_VX" ++ "@ ++ lder\t%v0,%v1 ++ lde\t%0,%1 ++ ley\t%0,%1 ++ ste\t%1,%0 ++ stey\t%1,%0 ++ vlr\t%v0,%v1 ++ vlvgf\t%v0,%1,0 ++ vlgvf\t%0,%v1,0 ++ vlef\t%v0,%1,0 ++ vstef\t%1,%0,0 ++ lzer\t%v0 ++ vzero\t%v0 ++ vone\t%v0 ++ vgbm\t%v0,%t1 ++ vgm\t%v0,%s1,%e1 ++ mvhi\t%0,0 ++ mvhi\t%0,-1 ++ lhi\t%0,0 ++ lhi\t%0,-1 ++ lrl\t%0,%1 ++ lr\t%0,%1 ++ l\t%0,%1 ++ ly\t%0,%1 ++ st\t%1,%0 ++ sty\t%1,%0 ++ strl\t%1,%0" ++ [(set_attr "op_type" "RRE,RXE,RXY,RX,RXY,VRR,VRS,VRS,VRX,VRX,RRE,VRI,VRI,VRI,VRI,SIL,SIL,RI,RI, ++ RIL,RR,RX,RXY,RX,RXY,RIL")]) ++ ++(define_insn "mov" ++ [(set (match_operand:V_64 0 "nonimmediate_operand" ++ "=f,f,f,R,T,v,v,d, v,QR, f, v, v, v, v, Q, Q, d, d,f,d,d,d, d,RT,b") ++ (match_operand:V_64 1 "general_operand" ++ " f,R,T,f,f,v,d,v,QR, v,j00,j00,jm1,jyy,jxx,j00,jm1,j00,jm1,d,f,b,d,RT, d,d"))] ++ "TARGET_ZARCH" ++ "@ ++ ldr\t%0,%1 ++ ld\t%0,%1 ++ ldy\t%0,%1 ++ std\t%1,%0 ++ stdy\t%1,%0 ++ vlr\t%v0,%v1 ++ vlvgg\t%v0,%1,0 ++ vlgvg\t%0,%v1,0 ++ vleg\t%v0,%1,0 ++ vsteg\t%v1,%0,0 ++ lzdr\t%0 ++ vzero\t%v0 ++ vone\t%v0 ++ vgbm\t%v0,%t1 ++ vgm\t%v0,%s1,%e1 ++ mvghi\t%0,0 ++ mvghi\t%0,-1 ++ lghi\t%0,0 ++ lghi\t%0,-1 ++ ldgr\t%0,%1 ++ lgdr\t%0,%1 ++ lgrl\t%0,%1 ++ lgr\t%0,%1 ++ lg\t%0,%1 ++ stg\t%1,%0 ++ stgrl\t%1,%0" ++ [(set_attr "op_type" "RRE,RX,RXY,RX,RXY,VRR,VRS,VRS,VRX,VRX,RRE,VRI,VRI,VRI,VRI, ++ SIL,SIL,RI,RI,RRE,RRE,RIL,RR,RXY,RXY,RIL")]) ++ ++ ++; vec_load_lanes? ++ ++; vec_store_lanes? ++ ++; FIXME: Support also vector mode operands for 1 ++; FIXME: A target memory operand seems to be useful otherwise we end ++; up with vl vlvgg vst. Shouldn't the middle-end be able to handle ++; that itself? ++(define_insn "*vec_set" ++ [(set (match_operand:V 0 "register_operand" "=v, v,v") ++ (unspec:V [(match_operand: 1 "general_operand" "d,QR,K") ++ (match_operand:SI 2 "shift_count_or_setmem_operand" "Y, I,I") ++ (match_operand:V 3 "register_operand" "0, 0,0")] ++ UNSPEC_VEC_SET))] ++ "TARGET_VX" ++ "@ ++ vlvg\t%v0,%1,%Y2 ++ vle\t%v0,%1,%2 ++ vlei\t%v0,%1,%2" ++ [(set_attr "op_type" "VRS,VRX,VRI")]) ++ ++; vec_set is supposed to *modify* an existing vector so operand 0 is ++; duplicated as input operand. ++(define_expand "vec_set" ++ [(set (match_operand:V 0 "register_operand" "") ++ (unspec:V [(match_operand: 1 "general_operand" "") ++ (match_operand:SI 2 "shift_count_or_setmem_operand" "") ++ (match_dup 0)] ++ UNSPEC_VEC_SET))] ++ "TARGET_VX") ++ ++; FIXME: Support also vector mode operands for 0 ++; FIXME: This should be (vec_select ..) or something but it does only allow constant selectors :( ++; This is used via RTL standard name as well as for expanding the builtin ++(define_insn "vec_extract" ++ [(set (match_operand: 0 "nonimmediate_operand" "=d,QR") ++ (unspec: [(match_operand:V 1 "register_operand" " v, v") ++ (match_operand:SI 2 "shift_count_or_setmem_operand" " Y, I")] ++ UNSPEC_VEC_EXTRACT))] ++ "TARGET_VX" ++ "@ ++ vlgv\t%0,%v1,%Y2 ++ vste\t%v1,%0,%2" ++ [(set_attr "op_type" "VRS,VRX")]) ++ ++(define_expand "vec_init" ++ [(match_operand:V_HW 0 "register_operand" "") ++ (match_operand:V_HW 1 "nonmemory_operand" "")] ++ "TARGET_VX" ++{ ++ s390_expand_vec_init (operands[0], operands[1]); ++ DONE; ++}) ++ ++; Replicate from vector element ++(define_insn "*vec_splat" ++ [(set (match_operand:V_HW 0 "register_operand" "=v") ++ (vec_duplicate:V_HW ++ (vec_select: ++ (match_operand:V_HW 1 "register_operand" "v") ++ (parallel ++ [(match_operand:QI 2 "const_mask_operand" "C")]))))] ++ "TARGET_VX && UINTVAL (operands[2]) < GET_MODE_NUNITS (mode)" ++ "vrep\t%v0,%v1,%2" ++ [(set_attr "op_type" "VRI")]) ++ ++(define_insn "*vec_splats" ++ [(set (match_operand:V_HW 0 "register_operand" "=v,v,v,v") ++ (vec_duplicate:V_HW (match_operand: 1 "general_operand" "QR,K,v,d")))] ++ "TARGET_VX" ++ "@ ++ vlrep\t%v0,%1 ++ vrepi\t%v0,%h1 ++ vrep\t%v0,%v1,0 ++ #" ++ [(set_attr "op_type" "VRX,VRI,VRI,*")]) ++ ++; vec_splats is supposed to replicate op1 into all elements of op0 ++; This splitter first sets the rightmost element of op0 to op1 and ++; then does a vec_splat to replicate that element into all other ++; elements. ++(define_split ++ [(set (match_operand:V_HW 0 "register_operand" "") ++ (vec_duplicate:V_HW (match_operand: 1 "register_operand" "")))] ++ "TARGET_VX && GENERAL_REG_P (operands[1])" ++ [(set (match_dup 0) ++ (unspec:V_HW [(match_dup 1) (match_dup 2) (match_dup 0)] UNSPEC_VEC_SET)) ++ (set (match_dup 0) ++ (vec_duplicate:V_HW ++ (vec_select: ++ (match_dup 0) (parallel [(match_dup 2)]))))] ++{ ++ operands[2] = GEN_INT (GET_MODE_NUNITS (mode) - 1); ++}) ++ ++(define_expand "vcond" ++ [(set (match_operand:V_HW 0 "register_operand" "") ++ (if_then_else:V_HW ++ (match_operator 3 "comparison_operator" ++ [(match_operand:V_HW2 4 "register_operand" "") ++ (match_operand:V_HW2 5 "register_operand" "")]) ++ (match_operand:V_HW 1 "nonmemory_operand" "") ++ (match_operand:V_HW 2 "nonmemory_operand" "")))] ++ "TARGET_VX && GET_MODE_NUNITS (mode) == GET_MODE_NUNITS (mode)" ++{ ++ s390_expand_vcond (operands[0], operands[1], operands[2], ++ GET_CODE (operands[3]), operands[4], operands[5]); ++ DONE; ++}) ++ ++(define_expand "vcondu" ++ [(set (match_operand:V_HW 0 "register_operand" "") ++ (if_then_else:V_HW ++ (match_operator 3 "comparison_operator" ++ [(match_operand:V_HW2 4 "register_operand" "") ++ (match_operand:V_HW2 5 "register_operand" "")]) ++ (match_operand:V_HW 1 "nonmemory_operand" "") ++ (match_operand:V_HW 2 "nonmemory_operand" "")))] ++ "TARGET_VX && GET_MODE_NUNITS (mode) == GET_MODE_NUNITS (mode)" ++{ ++ s390_expand_vcond (operands[0], operands[1], operands[2], ++ GET_CODE (operands[3]), operands[4], operands[5]); ++ DONE; ++}) ++ ++; We only have HW support for byte vectors. The middle-end is ++; supposed to lower the mode if required. ++(define_insn "vec_permv16qi" ++ [(set (match_operand:V16QI 0 "register_operand" "=v") ++ (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v") ++ (match_operand:V16QI 2 "register_operand" "v") ++ (match_operand:V16QI 3 "register_operand" "v")] ++ UNSPEC_VEC_PERM))] ++ "TARGET_VX" ++ "vperm\t%v0,%v1,%v2,%v3" ++ [(set_attr "op_type" "VRR")]) ++ ++; vec_perm_const for V2DI using vpdi? ++ ++;; ++;; Vector integer arithmetic instructions ++;; ++ ++; vab, vah, vaf, vag, vaq ++ ++; We use nonimmediate_operand instead of register_operand since it is ++; better to have the reloads into VRs instead of splitting the ++; operation into two DImode ADDs. ++(define_insn "add3" ++ [(set (match_operand:VIT 0 "nonimmediate_operand" "=v") ++ (plus:VIT (match_operand:VIT 1 "nonimmediate_operand" "%v") ++ (match_operand:VIT 2 "general_operand" "v")))] ++ "TARGET_VX" ++ "va\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++; vsb, vsh, vsf, vsg, vsq ++(define_insn "sub3" ++ [(set (match_operand:VIT 0 "nonimmediate_operand" "=v") ++ (minus:VIT (match_operand:VIT 1 "nonimmediate_operand" "v") ++ (match_operand:VIT 2 "general_operand" "v")))] ++ "TARGET_VX" ++ "vs\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++; vmlb, vmlhw, vmlf ++(define_insn "mul3" ++ [(set (match_operand:VI_QHS 0 "register_operand" "=v") ++ (mult:VI_QHS (match_operand:VI_QHS 1 "register_operand" "%v") ++ (match_operand:VI_QHS 2 "register_operand" "v")))] ++ "TARGET_VX" ++ "vml\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++; vlcb, vlch, vlcf, vlcg ++(define_insn "neg2" ++ [(set (match_operand:VI 0 "register_operand" "=v") ++ (neg:VI (match_operand:VI 1 "register_operand" "v")))] ++ "TARGET_VX" ++ "vlc\t%v0,%v1" ++ [(set_attr "op_type" "VRR")]) ++ ++; vlpb, vlph, vlpf, vlpg ++(define_insn "abs2" ++ [(set (match_operand:VI 0 "register_operand" "=v") ++ (abs:VI (match_operand:VI 1 "register_operand" "v")))] ++ "TARGET_VX" ++ "vlp\t%v0,%v1" ++ [(set_attr "op_type" "VRR")]) ++ ++ ++; Vector sum across ++ ++; Sum across DImode parts of the 1st operand and add the rightmost ++; element of 2nd operand ++; vsumgh, vsumgf ++(define_insn "*vec_sum2" ++ [(set (match_operand:V2DI 0 "register_operand" "=v") ++ (unspec:V2DI [(match_operand:VI_HW_HS 1 "register_operand" "v") ++ (match_operand:VI_HW_HS 2 "register_operand" "v")] ++ UNSPEC_VEC_VSUMG))] ++ "TARGET_VX" ++ "vsumg\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++; vsumb, vsumh ++(define_insn "*vec_sum4" ++ [(set (match_operand:V4SI 0 "register_operand" "=v") ++ (unspec:V4SI [(match_operand:VI_HW_QH 1 "register_operand" "v") ++ (match_operand:VI_HW_QH 2 "register_operand" "v")] ++ UNSPEC_VEC_VSUM))] ++ "TARGET_VX" ++ "vsum\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++;; ++;; Vector bit instructions (int + fp) ++;; ++ ++; Vector and ++ ++(define_insn "and3" ++ [(set (match_operand:VT 0 "register_operand" "=v") ++ (and:VT (match_operand:VT 1 "register_operand" "%v") ++ (match_operand:VT 2 "register_operand" "v")))] ++ "TARGET_VX" ++ "vn\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++ ++; Vector or ++ ++(define_insn "ior3" ++ [(set (match_operand:VT 0 "register_operand" "=v") ++ (ior:VT (match_operand:VT 1 "register_operand" "%v") ++ (match_operand:VT 2 "register_operand" "v")))] ++ "TARGET_VX" ++ "vo\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++ ++; Vector xor ++ ++(define_insn "xor3" ++ [(set (match_operand:VT 0 "register_operand" "=v") ++ (xor:VT (match_operand:VT 1 "register_operand" "%v") ++ (match_operand:VT 2 "register_operand" "v")))] ++ "TARGET_VX" ++ "vx\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++ ++; Bitwise inversion of a vector - used for vec_cmpne ++(define_insn "*not" ++ [(set (match_operand:VT 0 "register_operand" "=v") ++ (not:VT (match_operand:VT 1 "register_operand" "v")))] ++ "TARGET_VX" ++ "vnot\t%v0,%v1" ++ [(set_attr "op_type" "VRR")]) ++ ++; Vector population count ++ ++(define_insn "popcountv16qi2" ++ [(set (match_operand:V16QI 0 "register_operand" "=v") ++ (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v")] ++ UNSPEC_POPCNT))] ++ "TARGET_VX" ++ "vpopct\t%v0,%v1,0" ++ [(set_attr "op_type" "VRR")]) ++ ++; vpopct only counts bits in byte elements. Bigger element sizes need ++; to be emulated. Word and doubleword elements can use the sum across ++; instructions. For halfword sized elements we do a shift of a copy ++; of the result, add it to the result and extend it to halfword ++; element size (unpack). ++ ++(define_expand "popcountv8hi2" ++ [(set (match_dup 2) ++ (unspec:V16QI [(subreg:V16QI (match_operand:V8HI 1 "register_operand" "v") 0)] ++ UNSPEC_POPCNT)) ++ ; Make a copy of the result ++ (set (match_dup 3) (match_dup 2)) ++ ; Generate the shift count operand in a VR (8->byte 7) ++ (set (match_dup 4) (match_dup 5)) ++ (set (match_dup 4) (unspec:V16QI [(const_int 8) ++ (const_int 7) ++ (match_dup 4)] UNSPEC_VEC_SET)) ++ ; Vector shift right logical by one byte ++ (set (match_dup 3) ++ (unspec:V16QI [(match_dup 3) (match_dup 4)] UNSPEC_VEC_SRLB)) ++ ; Add the shifted and the original result ++ (set (match_dup 2) ++ (plus:V16QI (match_dup 2) (match_dup 3))) ++ ; Generate mask for the odd numbered byte elements ++ (set (match_dup 3) ++ (const_vector:V16QI [(const_int 0) (const_int 255) ++ (const_int 0) (const_int 255) ++ (const_int 0) (const_int 255) ++ (const_int 0) (const_int 255) ++ (const_int 0) (const_int 255) ++ (const_int 0) (const_int 255) ++ (const_int 0) (const_int 255) ++ (const_int 0) (const_int 255)])) ++ ; Zero out the even indexed bytes ++ (set (match_operand:V8HI 0 "register_operand" "=v") ++ (and:V8HI (subreg:V8HI (match_dup 2) 0) ++ (subreg:V8HI (match_dup 3) 0))) ++] ++ "TARGET_VX" ++{ ++ operands[2] = gen_reg_rtx (V16QImode); ++ operands[3] = gen_reg_rtx (V16QImode); ++ operands[4] = gen_reg_rtx (V16QImode); ++ operands[5] = CONST0_RTX (V16QImode); ++}) ++ ++(define_expand "popcountv4si2" ++ [(set (match_dup 2) ++ (unspec:V16QI [(subreg:V16QI (match_operand:V4SI 1 "register_operand" "v") 0)] ++ UNSPEC_POPCNT)) ++ (set (match_operand:V4SI 0 "register_operand" "=v") ++ (unspec:V4SI [(match_dup 2) (match_dup 3)] ++ UNSPEC_VEC_VSUM))] ++ "TARGET_VX" ++{ ++ operands[2] = gen_reg_rtx (V16QImode); ++ operands[3] = force_reg (V16QImode, CONST0_RTX (V16QImode)); ++}) ++ ++(define_expand "popcountv2di2" ++ [(set (match_dup 2) ++ (unspec:V16QI [(subreg:V16QI (match_operand:V2DI 1 "register_operand" "v") 0)] ++ UNSPEC_POPCNT)) ++ (set (match_dup 3) ++ (unspec:V4SI [(match_dup 2) (match_dup 4)] ++ UNSPEC_VEC_VSUM)) ++ (set (match_operand:V2DI 0 "register_operand" "=v") ++ (unspec:V2DI [(match_dup 3) (match_dup 5)] ++ UNSPEC_VEC_VSUMG))] ++ "TARGET_VX" ++{ ++ operands[2] = gen_reg_rtx (V16QImode); ++ operands[3] = gen_reg_rtx (V4SImode); ++ operands[4] = force_reg (V16QImode, CONST0_RTX (V16QImode)); ++ operands[5] = force_reg (V4SImode, CONST0_RTX (V4SImode)); ++}) ++ ++; Count leading zeros ++(define_insn "clz2" ++ [(set (match_operand:V 0 "register_operand" "=v") ++ (clz:V (match_operand:V 1 "register_operand" "v")))] ++ "TARGET_VX" ++ "vclz\t%v0,%v1" ++ [(set_attr "op_type" "VRR")]) ++ ++; Count trailing zeros ++(define_insn "ctz2" ++ [(set (match_operand:V 0 "register_operand" "=v") ++ (ctz:V (match_operand:V 1 "register_operand" "v")))] ++ "TARGET_VX" ++ "vctz\t%v0,%v1" ++ [(set_attr "op_type" "VRR")]) ++ ++ ++; Vector rotate instructions ++ ++; Each vector element rotated by a scalar ++; verllb, verllh, verllf, verllg ++(define_insn "rotl3" ++ [(set (match_operand:VI 0 "register_operand" "=v") ++ (rotate:VI (match_operand:VI 1 "register_operand" "v") ++ (match_operand:SI 2 "shift_count_or_setmem_operand" "Y")))] ++ "TARGET_VX" ++ "verll\t%v0,%v1,%Y2" ++ [(set_attr "op_type" "VRS")]) ++ ++; Each vector element rotated by the corresponding vector element ++; verllvb, verllvh, verllvf, verllvg ++(define_insn "vrotl3" ++ [(set (match_operand:VI 0 "register_operand" "=v") ++ (rotate:VI (match_operand:VI 1 "register_operand" "v") ++ (match_operand:VI 2 "register_operand" "v")))] ++ "TARGET_VX" ++ "verllv\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++ ++; Shift each element by scalar value ++ ++; veslb, veslh, veslf, veslg ++(define_insn "ashl3" ++ [(set (match_operand:VI 0 "register_operand" "=v") ++ (ashift:VI (match_operand:VI 1 "register_operand" "v") ++ (match_operand:SI 2 "shift_count_or_setmem_operand" "Y")))] ++ "TARGET_VX" ++ "vesl\t%v0,%v1,%Y2" ++ [(set_attr "op_type" "VRS")]) ++ ++; vesrab, vesrah, vesraf, vesrag ++(define_insn "ashr3" ++ [(set (match_operand:VI 0 "register_operand" "=v") ++ (ashiftrt:VI (match_operand:VI 1 "register_operand" "v") ++ (match_operand:SI 2 "shift_count_or_setmem_operand" "Y")))] ++ "TARGET_VX" ++ "vesra\t%v0,%v1,%Y2" ++ [(set_attr "op_type" "VRS")]) ++ ++; vesrlb, vesrlh, vesrlf, vesrlg ++(define_insn "lshr3" ++ [(set (match_operand:VI 0 "register_operand" "=v") ++ (lshiftrt:VI (match_operand:VI 1 "register_operand" "v") ++ (match_operand:SI 2 "shift_count_or_setmem_operand" "Y")))] ++ "TARGET_VX" ++ "vesrl\t%v0,%v1,%Y2" ++ [(set_attr "op_type" "VRS")]) ++ ++ ++; Shift each element by corresponding vector element ++ ++; veslvb, veslvh, veslvf, veslvg ++(define_insn "vashl3" ++ [(set (match_operand:VI 0 "register_operand" "=v") ++ (ashift:VI (match_operand:VI 1 "register_operand" "v") ++ (match_operand:VI 2 "register_operand" "v")))] ++ "TARGET_VX" ++ "veslv\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++; vesravb, vesravh, vesravf, vesravg ++(define_insn "vashr3" ++ [(set (match_operand:VI 0 "register_operand" "=v") ++ (ashiftrt:VI (match_operand:VI 1 "register_operand" "v") ++ (match_operand:VI 2 "register_operand" "v")))] ++ "TARGET_VX" ++ "vesrav\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++; vesrlvb, vesrlvh, vesrlvf, vesrlvg ++(define_insn "vlshr3" ++ [(set (match_operand:VI 0 "register_operand" "=v") ++ (lshiftrt:VI (match_operand:VI 1 "register_operand" "v") ++ (match_operand:VI 2 "register_operand" "v")))] ++ "TARGET_VX" ++ "vesrlv\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++; Vector shift right logical by byte ++ ++; Pattern used by e.g. popcount ++(define_insn "*vec_srb" ++ [(set (match_operand:V_HW 0 "register_operand" "=v") ++ (unspec:V_HW [(match_operand:V_HW 1 "register_operand" "v") ++ (match_operand: 2 "register_operand" "v")] ++ UNSPEC_VEC_SRLB))] ++ "TARGET_VX" ++ "vsrlb\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++ ++; vmnb, vmnh, vmnf, vmng ++(define_insn "smin3" ++ [(set (match_operand:VI 0 "register_operand" "=v") ++ (smin:VI (match_operand:VI 1 "register_operand" "%v") ++ (match_operand:VI 2 "register_operand" "v")))] ++ "TARGET_VX" ++ "vmn\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++; vmxb, vmxh, vmxf, vmxg ++(define_insn "smax3" ++ [(set (match_operand:VI 0 "register_operand" "=v") ++ (smax:VI (match_operand:VI 1 "register_operand" "%v") ++ (match_operand:VI 2 "register_operand" "v")))] ++ "TARGET_VX" ++ "vmx\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++; vmnlb, vmnlh, vmnlf, vmnlg ++(define_insn "umin3" ++ [(set (match_operand:VI 0 "register_operand" "=v") ++ (umin:VI (match_operand:VI 1 "register_operand" "%v") ++ (match_operand:VI 2 "register_operand" "v")))] ++ "TARGET_VX" ++ "vmnl\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++; vmxlb, vmxlh, vmxlf, vmxlg ++(define_insn "umax3" ++ [(set (match_operand:VI 0 "register_operand" "=v") ++ (umax:VI (match_operand:VI 1 "register_operand" "%v") ++ (match_operand:VI 2 "register_operand" "v")))] ++ "TARGET_VX" ++ "vmxl\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++; vmeb, vmeh, vmef ++(define_insn "vec_widen_smult_even_" ++ [(set (match_operand: 0 "register_operand" "=v") ++ (unspec: [(match_operand:VI_QHS 1 "register_operand" "%v") ++ (match_operand:VI_QHS 2 "register_operand" "v")] ++ UNSPEC_VEC_SMULT_EVEN))] ++ "TARGET_VX" ++ "vme\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++; vmleb, vmleh, vmlef ++(define_insn "vec_widen_umult_even_" ++ [(set (match_operand: 0 "register_operand" "=v") ++ (unspec: [(match_operand:VI_QHS 1 "register_operand" "%v") ++ (match_operand:VI_QHS 2 "register_operand" "v")] ++ UNSPEC_VEC_UMULT_EVEN))] ++ "TARGET_VX" ++ "vmle\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++; vmob, vmoh, vmof ++(define_insn "vec_widen_smult_odd_" ++ [(set (match_operand: 0 "register_operand" "=v") ++ (unspec: [(match_operand:VI_QHS 1 "register_operand" "%v") ++ (match_operand:VI_QHS 2 "register_operand" "v")] ++ UNSPEC_VEC_SMULT_ODD))] ++ "TARGET_VX" ++ "vmo\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++; vmlob, vmloh, vmlof ++(define_insn "vec_widen_umult_odd_" ++ [(set (match_operand: 0 "register_operand" "=v") ++ (unspec: [(match_operand:VI_QHS 1 "register_operand" "%v") ++ (match_operand:VI_QHS 2 "register_operand" "v")] ++ UNSPEC_VEC_UMULT_ODD))] ++ "TARGET_VX" ++ "vmlo\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++; vec_widen_umult_hi ++; vec_widen_umult_lo ++; vec_widen_smult_hi ++; vec_widen_smult_lo ++ ++; vec_widen_ushiftl_hi ++; vec_widen_ushiftl_lo ++; vec_widen_sshiftl_hi ++; vec_widen_sshiftl_lo ++ ++;; ++;; Vector floating point arithmetic instructions ++;; ++ ++(define_insn "addv2df3" ++ [(set (match_operand:V2DF 0 "register_operand" "=v") ++ (plus:V2DF (match_operand:V2DF 1 "register_operand" "%v") ++ (match_operand:V2DF 2 "register_operand" "v")))] ++ "TARGET_VX" ++ "vfadb\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++(define_insn "subv2df3" ++ [(set (match_operand:V2DF 0 "register_operand" "=v") ++ (minus:V2DF (match_operand:V2DF 1 "register_operand" "%v") ++ (match_operand:V2DF 2 "register_operand" "v")))] ++ "TARGET_VX" ++ "vfsdb\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++(define_insn "mulv2df3" ++ [(set (match_operand:V2DF 0 "register_operand" "=v") ++ (mult:V2DF (match_operand:V2DF 1 "register_operand" "%v") ++ (match_operand:V2DF 2 "register_operand" "v")))] ++ "TARGET_VX" ++ "vfmdb\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++(define_insn "divv2df3" ++ [(set (match_operand:V2DF 0 "register_operand" "=v") ++ (div:V2DF (match_operand:V2DF 1 "register_operand" "v") ++ (match_operand:V2DF 2 "register_operand" "v")))] ++ "TARGET_VX" ++ "vfddb\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++(define_insn "sqrtv2df2" ++ [(set (match_operand:V2DF 0 "register_operand" "=v") ++ (sqrt:V2DF (match_operand:V2DF 1 "register_operand" "v")))] ++ "TARGET_VX" ++ "vfsqdb\t%v0,%v1" ++ [(set_attr "op_type" "VRR")]) ++ ++(define_insn "fmav2df4" ++ [(set (match_operand:V2DF 0 "register_operand" "=v") ++ (fma:V2DF (match_operand:V2DF 1 "register_operand" "%v") ++ (match_operand:V2DF 2 "register_operand" "v") ++ (match_operand:V2DF 3 "register_operand" "v")))] ++ "TARGET_VX" ++ "vfmadb\t%v0,%v1,%v2,%v3" ++ [(set_attr "op_type" "VRR")]) ++ ++(define_insn "fmsv2df4" ++ [(set (match_operand:V2DF 0 "register_operand" "=v") ++ (fma:V2DF (match_operand:V2DF 1 "register_operand" "%v") ++ (match_operand:V2DF 2 "register_operand" "v") ++ (neg:V2DF (match_operand:V2DF 3 "register_operand" "v"))))] ++ "TARGET_VX" ++ "vfmsdb\t%v0,%v1,%v2,%v3" ++ [(set_attr "op_type" "VRR")]) ++ ++(define_insn "negv2df2" ++ [(set (match_operand:V2DF 0 "register_operand" "=v") ++ (neg:V2DF (match_operand:V2DF 1 "register_operand" "v")))] ++ "TARGET_VX" ++ "vflcdb\t%v0,%v1" ++ [(set_attr "op_type" "VRR")]) ++ ++(define_insn "absv2df2" ++ [(set (match_operand:V2DF 0 "register_operand" "=v") ++ (abs:V2DF (match_operand:V2DF 1 "register_operand" "v")))] ++ "TARGET_VX" ++ "vflpdb\t%v0,%v1" ++ [(set_attr "op_type" "VRR")]) ++ ++(define_insn "*negabsv2df2" ++ [(set (match_operand:V2DF 0 "register_operand" "=v") ++ (neg:V2DF (abs:V2DF (match_operand:V2DF 1 "register_operand" "v"))))] ++ "TARGET_VX" ++ "vflndb\t%v0,%v1" ++ [(set_attr "op_type" "VRR")]) ++ ++; Emulate with compare + select ++(define_insn_and_split "smaxv2df3" ++ [(set (match_operand:V2DF 0 "register_operand" "=v") ++ (smax:V2DF (match_operand:V2DF 1 "register_operand" "%v") ++ (match_operand:V2DF 2 "register_operand" "v")))] ++ "TARGET_VX" ++ "#" ++ "" ++ [(set (match_dup 3) ++ (gt:V2DI (match_dup 1) (match_dup 2))) ++ (set (match_dup 0) ++ (if_then_else:V2DF ++ (eq (match_dup 3) (match_dup 4)) ++ (match_dup 2) ++ (match_dup 1)))] ++{ ++ operands[3] = gen_reg_rtx (V2DImode); ++ operands[4] = CONST0_RTX (V2DImode); ++}) ++ ++; Emulate with compare + select ++(define_insn_and_split "sminv2df3" ++ [(set (match_operand:V2DF 0 "register_operand" "=v") ++ (smin:V2DF (match_operand:V2DF 1 "register_operand" "%v") ++ (match_operand:V2DF 2 "register_operand" "v")))] ++ "TARGET_VX" ++ "#" ++ "" ++ [(set (match_dup 3) ++ (gt:V2DI (match_dup 1) (match_dup 2))) ++ (set (match_dup 0) ++ (if_then_else:V2DF ++ (eq (match_dup 3) (match_dup 4)) ++ (match_dup 1) ++ (match_dup 2)))] ++{ ++ operands[3] = gen_reg_rtx (V2DImode); ++ operands[4] = CONST0_RTX (V2DImode); ++}) ++ ++ ++;; ++;; Integer compares ++;; ++ ++(define_insn "*vec_cmp_nocc" ++ [(set (match_operand:VI 2 "register_operand" "=v") ++ (VICMP_HW_OP:VI (match_operand:VI 0 "register_operand" "v") ++ (match_operand:VI 1 "register_operand" "v")))] ++ "TARGET_VX" ++ "vc\t%v2,%v0,%v1" ++ [(set_attr "op_type" "VRR")]) ++ ++ ++;; ++;; Floating point compares ++;; ++ ++; EQ, GT, GE ++(define_insn "*vec_cmpv2df_nocc" ++ [(set (match_operand:V2DI 0 "register_operand" "=v") ++ (VFCMP_HW_OP:V2DI (match_operand:V2DF 1 "register_operand" "v") ++ (match_operand:V2DF 2 "register_operand" "v")))] ++ "TARGET_VX" ++ "vfcdb\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++; Expanders for not directly supported comparisons ++ ++; UNEQ a u== b -> !(a > b | b > a) ++(define_expand "vec_cmpuneqv2df" ++ [(set (match_operand:V2DI 0 "register_operand" "=v") ++ (gt:V2DI (match_operand:V2DF 1 "register_operand" "v") ++ (match_operand:V2DF 2 "register_operand" "v"))) ++ (set (match_dup 3) ++ (gt:V2DI (match_dup 2) (match_dup 1))) ++ (set (match_dup 0) (ior:V2DI (match_dup 0) (match_dup 3))) ++ (set (match_dup 0) (not:V2DI (match_dup 0)))] ++ "TARGET_VX" ++{ ++ operands[3] = gen_reg_rtx (V2DImode); ++}) ++ ++; LTGT a <> b -> a > b | b > a ++(define_expand "vec_cmpltgtv2df" ++ [(set (match_operand:V2DI 0 "register_operand" "=v") ++ (gt:V2DI (match_operand:V2DF 1 "register_operand" "v") ++ (match_operand:V2DF 2 "register_operand" "v"))) ++ (set (match_dup 3) (gt:V2DI (match_dup 2) (match_dup 1))) ++ (set (match_dup 0) (ior:V2DI (match_dup 0) (match_dup 3)))] ++ "TARGET_VX" ++{ ++ operands[3] = gen_reg_rtx (V2DImode); ++}) ++ ++; ORDERED (a, b): a >= b | b > a ++(define_expand "vec_orderedv2df" ++ [(set (match_operand:V2DI 0 "register_operand" "=v") ++ (ge:V2DI (match_operand:V2DF 1 "register_operand" "v") ++ (match_operand:V2DF 2 "register_operand" "v"))) ++ (set (match_dup 3) (gt:V2DI (match_dup 2) (match_dup 1))) ++ (set (match_dup 0) (ior:V2DI (match_dup 0) (match_dup 3)))] ++ "TARGET_VX" ++{ ++ operands[3] = gen_reg_rtx (V2DImode); ++}) ++ ++; UNORDERED (a, b): !ORDERED (a, b) ++(define_expand "vec_unorderedv2df" ++ [(set (match_operand:V2DI 0 "register_operand" "=v") ++ (ge:V2DI (match_operand:V2DF 1 "register_operand" "v") ++ (match_operand:V2DF 2 "register_operand" "v"))) ++ (set (match_dup 3) (gt:V2DI (match_dup 2) (match_dup 1))) ++ (set (match_dup 0) (ior:V2DI (match_dup 0) (match_dup 3))) ++ (set (match_dup 0) (not:V2DI (match_dup 0)))] ++ "TARGET_VX" ++{ ++ operands[3] = gen_reg_rtx (V2DImode); ++}) ++ ++(define_insn "*vec_load_pairv2di" ++ [(set (match_operand:V2DI 0 "register_operand" "=v") ++ (vec_concat:V2DI (match_operand:DI 1 "register_operand" "d") ++ (match_operand:DI 2 "register_operand" "d")))] ++ "TARGET_VX" ++ "vlvgp\t%v0,%1,%2" ++ [(set_attr "op_type" "VRR")]) ++ ++(define_insn "vllv16qi" ++ [(set (match_operand:V16QI 0 "register_operand" "=v") ++ (unspec:V16QI [(match_operand:SI 1 "register_operand" "d") ++ (match_operand:BLK 2 "memory_operand" "Q")] ++ UNSPEC_VEC_LOAD_LEN))] ++ "TARGET_VX" ++ "vll\t%v0,%1,%2" ++ [(set_attr "op_type" "VRS")]) ++ ++; vfenebs, vfenehs, vfenefs ++; vfenezbs, vfenezhs, vfenezfs ++(define_insn "vec_vfenes" ++ [(set (match_operand:VI_HW_QHS 0 "register_operand" "=v") ++ (unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "v") ++ (match_operand:VI_HW_QHS 2 "register_operand" "v") ++ (match_operand:QI 3 "const_mask_operand" "C")] ++ UNSPEC_VEC_VFENE)) ++ (set (reg:CCRAW CC_REGNUM) ++ (unspec:CCRAW [(match_dup 1) ++ (match_dup 2) ++ (match_dup 3)] ++ UNSPEC_VEC_VFENECC))] ++ "TARGET_VX" ++{ ++ unsigned HOST_WIDE_INT flags = INTVAL (operands[3]); ++ ++ gcc_assert (!(flags & ~(VSTRING_FLAG_ZS | VSTRING_FLAG_CS))); ++ flags &= ~VSTRING_FLAG_CS; ++ ++ if (flags == VSTRING_FLAG_ZS) ++ return "vfenezs\t%v0,%v1,%v2"; ++ return "vfenes\t%v0,%v1,%v2"; ++} ++ [(set_attr "op_type" "VRR")]) ++ ++ ++; Vector select ++ ++; The following splitters simplify vec_sel for constant 0 or -1 ++; selection sources. This is required to generate efficient code for ++; vcond. ++ ++; a = b == c; ++(define_split ++ [(set (match_operand:V 0 "register_operand" "") ++ (if_then_else:V ++ (eq (match_operand: 3 "register_operand" "") ++ (match_operand:V 4 "const0_operand" "")) ++ (match_operand:V 1 "const0_operand" "") ++ (match_operand:V 2 "all_ones_operand" "")))] ++ "TARGET_VX" ++ [(set (match_dup 0) (match_dup 3))] ++{ ++ PUT_MODE (operands[3], mode); ++}) ++ ++; a = ~(b == c) ++(define_split ++ [(set (match_operand:V 0 "register_operand" "") ++ (if_then_else:V ++ (eq (match_operand: 3 "register_operand" "") ++ (match_operand:V 4 "const0_operand" "")) ++ (match_operand:V 1 "all_ones_operand" "") ++ (match_operand:V 2 "const0_operand" "")))] ++ "TARGET_VX" ++ [(set (match_dup 0) (not:V (match_dup 3)))] ++{ ++ PUT_MODE (operands[3], mode); ++}) ++ ++; a = b != c ++(define_split ++ [(set (match_operand:V 0 "register_operand" "") ++ (if_then_else:V ++ (ne (match_operand: 3 "register_operand" "") ++ (match_operand:V 4 "const0_operand" "")) ++ (match_operand:V 1 "all_ones_operand" "") ++ (match_operand:V 2 "const0_operand" "")))] ++ "TARGET_VX" ++ [(set (match_dup 0) (match_dup 3))] ++{ ++ PUT_MODE (operands[3], mode); ++}) ++ ++; a = ~(b != c) ++(define_split ++ [(set (match_operand:V 0 "register_operand" "") ++ (if_then_else:V ++ (ne (match_operand: 3 "register_operand" "") ++ (match_operand:V 4 "const0_operand" "")) ++ (match_operand:V 1 "const0_operand" "") ++ (match_operand:V 2 "all_ones_operand" "")))] ++ "TARGET_VX" ++ [(set (match_dup 0) (not:V (match_dup 3)))] ++{ ++ PUT_MODE (operands[3], mode); ++}) ++ ++; op0 = op3 == 0 ? op1 : op2 ++(define_insn "*vec_sel0" ++ [(set (match_operand:V 0 "register_operand" "=v") ++ (if_then_else:V ++ (eq (match_operand: 3 "register_operand" "v") ++ (match_operand: 4 "const0_operand" "")) ++ (match_operand:V 1 "register_operand" "v") ++ (match_operand:V 2 "register_operand" "v")))] ++ "TARGET_VX" ++ "vsel\t%v0,%2,%1,%3" ++ [(set_attr "op_type" "VRR")]) ++ ++; op0 = !op3 == 0 ? op1 : op2 ++(define_insn "*vec_sel0" ++ [(set (match_operand:V 0 "register_operand" "=v") ++ (if_then_else:V ++ (eq (not: (match_operand: 3 "register_operand" "v")) ++ (match_operand: 4 "const0_operand" "")) ++ (match_operand:V 1 "register_operand" "v") ++ (match_operand:V 2 "register_operand" "v")))] ++ "TARGET_VX" ++ "vsel\t%v0,%1,%2,%3" ++ [(set_attr "op_type" "VRR")]) ++ ++; op0 = op3 == -1 ? op1 : op2 ++(define_insn "*vec_sel1" ++ [(set (match_operand:V 0 "register_operand" "=v") ++ (if_then_else:V ++ (eq (match_operand: 3 "register_operand" "v") ++ (match_operand: 4 "all_ones_operand" "")) ++ (match_operand:V 1 "register_operand" "v") ++ (match_operand:V 2 "register_operand" "v")))] ++ "TARGET_VX" ++ "vsel\t%v0,%1,%2,%3" ++ [(set_attr "op_type" "VRR")]) ++ ++; op0 = !op3 == -1 ? op1 : op2 ++(define_insn "*vec_sel1" ++ [(set (match_operand:V 0 "register_operand" "=v") ++ (if_then_else:V ++ (eq (not: (match_operand: 3 "register_operand" "v")) ++ (match_operand: 4 "all_ones_operand" "")) ++ (match_operand:V 1 "register_operand" "v") ++ (match_operand:V 2 "register_operand" "v")))] ++ "TARGET_VX" ++ "vsel\t%v0,%2,%1,%3" ++ [(set_attr "op_type" "VRR")]) ++ ++ ++ ++; reduc_smin ++; reduc_smax ++; reduc_umin ++; reduc_umax ++ ++; vec_shl vrep + vsl ++; vec_shr ++ ++; vec_pack_trunc ++; vec_pack_ssat ++; vec_pack_usat ++; vec_pack_sfix_trunc ++; vec_pack_ufix_trunc ++; vec_unpacks_hi ++; vec_unpacks_low ++; vec_unpacku_hi ++; vec_unpacku_low ++; vec_unpacks_float_hi ++; vec_unpacks_float_lo ++; vec_unpacku_float_hi ++; vec_unpacku_float_lo +--- gcc/config/s390/vx-builtins.md 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/config/s390/vx-builtins.md 2016-05-11 19:46:05.504890170 +0200 +@@ -0,0 +1,2081 @@ ++;;- Instruction patterns for the System z vector facility builtins. ++;; Copyright (C) 2015 Free Software Foundation, Inc. ++;; Contributed by Andreas Krebbel (Andreas.Krebbel@de.ibm.com) ++ ++;; This file is part of GCC. ++ ++;; GCC is free software; you can redistribute it and/or modify it under ++;; the terms of the GNU General Public License as published by the Free ++;; Software Foundation; either version 3, or (at your option) any later ++;; version. ++ ++;; GCC is distributed in the hope that it will be useful, but WITHOUT ANY ++;; WARRANTY; without even the implied warranty of MERCHANTABILITY or ++;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++;; for more details. ++ ++;; You should have received a copy of the GNU General Public License ++;; along with GCC; see the file COPYING3. If not see ++;; . ++ ++; The patterns in this file are enabled with -mzvector ++ ++(define_mode_iterator V_HW_64 [V2DI V2DF]) ++(define_mode_iterator V_HW_32_64 [V4SI V2DI V2DF]) ++(define_mode_iterator VI_HW_SD [V4SI V2DI]) ++(define_mode_iterator V_HW_HSD [V8HI V4SI V2DI V2DF]) ++(define_mode_iterator VI_HW_HSD [V8HI V4SI V2DI]) ++ ++; The element type of the vector with floating point modes translated ++; to int modes of the same size. ++(define_mode_attr non_vec_int[(V1QI "QI") (V2QI "QI") (V4QI "QI") (V8QI "QI") (V16QI "QI") ++ (V1HI "HI") (V2HI "HI") (V4HI "HI") (V8HI "HI") ++ (V1SI "SI") (V2SI "SI") (V4SI "SI") ++ (V1DI "DI") (V2DI "DI") ++ (V1SF "SI") (V2SF "SI") (V4SF "SI") ++ (V1DF "DI") (V2DF "DI")]) ++ ++; Condition code modes generated by int comparisons ++(define_mode_iterator VICMP [CCVEQ CCVH CCVHU]) ++ ++; Comparisons supported by the vec_cmp* builtins ++(define_code_iterator intcmp [eq gt gtu ge geu lt ltu le leu]) ++(define_code_iterator fpcmp [eq gt ge lt le]) ++ ++; Comparisons supported by the vec_all/any* builtins ++(define_code_iterator intcmpcc [eq ne gt ge lt le gtu geu ltu leu]) ++(define_code_iterator fpcmpcc [eq ne gt ge unle unlt lt le]) ++ ++; Flags for vector string instructions (vfae all 4, vfee only ZS and CS, vstrc all 4) ++(define_constants ++ [(VSTRING_FLAG_IN 8) ; invert result ++ (VSTRING_FLAG_RT 4) ; result type ++ (VSTRING_FLAG_ZS 2) ; zero search ++ (VSTRING_FLAG_CS 1)]) ; condition code set ++ ++; Rounding modes as being used for e.g. VFI ++(define_constants ++ [(VEC_RND_CURRENT 0) ++ (VEC_RND_NEAREST_AWAY_FROM_ZERO 1) ++ (VEC_RND_SHORT_PREC 3) ++ (VEC_RND_NEAREST_TO_EVEN 4) ++ (VEC_RND_TO_ZERO 5) ++ (VEC_RND_TO_INF 6) ++ (VEC_RND_TO_MINF 7)]) ++ ++ ++; Vector gather element ++ ++(define_insn "vec_gather_element" ++ [(set (match_operand:V_HW_32_64 0 "register_operand" "=v") ++ (unspec:V_HW_32_64 [(match_operand:V_HW_32_64 1 "register_operand" "0") ++ (match_operand: 2 "register_operand" "v") ++ (match_operand:BLK 3 "memory_operand" "QR") ++ (match_operand:QI 4 "const_mask_operand" "C")] ++ UNSPEC_VEC_GATHER))] ++ "TARGET_VX && UINTVAL (operands[4]) < GET_MODE_NUNITS (mode)" ++ "vge\t%0,%O3(%v2,%R3),%b4" ++ [(set_attr "op_type" "VRV")]) ++ ++(define_expand "vec_genmask" ++ [(match_operand:VI_HW 0 "register_operand" "=v") ++ (match_operand:QI 1 "const_int_operand" "C") ++ (match_operand:QI 2 "const_int_operand" "C")] ++ "TARGET_VX" ++{ ++ int nunits = GET_MODE_NUNITS (mode); ++ int bitlen = GET_MODE_UNIT_BITSIZE (mode); ++ /* To bit little endian style. */ ++ int end = bitlen - 1 - INTVAL (operands[1]); ++ int start = bitlen - 1 - INTVAL (operands[2]); ++ rtx const_vec[16]; ++ int i; ++ unsigned HOST_WIDE_INT mask; ++ bool swapped_p = false; ++ ++ if (start > end) ++ { ++ i = start - 1; start = end + 1; end = i; ++ swapped_p = true; ++ } ++ if (end == 63) ++ mask = (unsigned HOST_WIDE_INT) -1; ++ else ++ mask = ((unsigned HOST_WIDE_INT) 1 << (end + 1)) - 1; ++ ++ mask &= ~(((unsigned HOST_WIDE_INT) 1 << start) - 1); ++ ++ if (swapped_p) ++ mask = ~mask; ++ ++ for (i = 0; i < nunits; i++) ++ const_vec[i] = GEN_INT (trunc_int_for_mode (mask, ++ GET_MODE_INNER (mode))); ++ ++ emit_insn (gen_rtx_SET (VOIDmode, operands[0], ++ gen_rtx_CONST_VECTOR (mode, ++ gen_rtvec_v (nunits, const_vec)))); ++ DONE; ++}) ++ ++(define_expand "vec_genbytemaskv16qi" ++ [(match_operand:V16QI 0 "register_operand" "") ++ (match_operand:HI 1 "const_int_operand" "")] ++ "TARGET_VX" ++{ ++ int i; ++ unsigned mask = 0x8000; ++ rtx const_vec[16]; ++ unsigned HOST_WIDE_INT byte_mask = INTVAL (operands[1]); ++ ++ for (i = 0; i < 16; i++) ++ { ++ if (mask & byte_mask) ++ const_vec[i] = constm1_rtx; ++ else ++ const_vec[i] = const0_rtx; ++ mask = mask >> 1; ++ } ++ emit_insn (gen_rtx_SET (VOIDmode, operands[0], ++ gen_rtx_CONST_VECTOR (V16QImode, ++ gen_rtvec_v (16, const_vec)))); ++ DONE; ++}) ++ ++(define_expand "vec_splats" ++ [(set (match_operand:V_HW 0 "register_operand" "") ++ (vec_duplicate:V_HW (match_operand: 1 "general_operand" "")))] ++ "TARGET_VX") ++ ++(define_expand "vec_insert" ++ [(set (match_operand:V_HW 0 "register_operand" "") ++ (unspec:V_HW [(match_operand: 2 "register_operand" "") ++ (match_operand:SI 3 "shift_count_or_setmem_operand" "") ++ (match_operand:V_HW 1 "register_operand" "")] ++ UNSPEC_VEC_SET))] ++ "TARGET_VX" ++ "") ++ ++; This is vec_set + modulo arithmetic on the element selector (op 2) ++(define_expand "vec_promote" ++ [(set (match_operand:V_HW 0 "register_operand" "") ++ (unspec:V_HW [(match_operand: 1 "register_operand" "") ++ (match_operand:SI 2 "shift_count_or_setmem_operand" "") ++ (match_dup 0)] ++ UNSPEC_VEC_SET))] ++ "TARGET_VX" ++ "") ++ ++; vec_extract is also an RTL standard name -> vector.md ++ ++(define_insn "vec_insert_and_zero" ++ [(set (match_operand:V_HW 0 "register_operand" "=v") ++ (unspec:V_HW [(match_operand: 1 "memory_operand" "QR")] ++ UNSPEC_VEC_INSERT_AND_ZERO))] ++ "TARGET_VX" ++ "vllez\t%v0,%1" ++ [(set_attr "op_type" "VRX")]) ++ ++(define_insn "vlbb" ++ [(set (match_operand:V16QI 0 "register_operand" "=v") ++ (unspec:V16QI [(match_operand:BLK 1 "memory_operand" "QR") ++ (match_operand:QI 2 "const_mask_operand" "C")] ++ UNSPEC_VEC_LOAD_BNDRY))] ++ "TARGET_VX && UINTVAL (operands[2]) < 7" ++ "vlbb\t%v0,%1,%2" ++ [(set_attr "op_type" "VRX")]) ++ ++; FIXME: The following two patterns might using vec_merge. But what is ++; the canonical form: (vec_select (vec_merge op0 op1)) or (vec_merge ++; (vec_select op0) (vec_select op1) ++(define_insn "vec_mergeh" ++ [(set (match_operand:V_HW 0 "register_operand" "=v") ++ (unspec:V_HW [(match_operand:V_HW 1 "register_operand" "v") ++ (match_operand:V_HW 2 "register_operand" "v")] ++ UNSPEC_VEC_MERGEH))] ++ "TARGET_VX" ++ "vmrh\t%v0,%1,%2" ++ [(set_attr "op_type" "VRR")]) ++ ++(define_insn "vec_mergel" ++ [(set (match_operand:V_HW 0 "register_operand" "=v") ++ (unspec:V_HW [(match_operand:V_HW 1 "register_operand" "v") ++ (match_operand:V_HW 2 "register_operand" "v")] ++ UNSPEC_VEC_MERGEL))] ++ "TARGET_VX" ++ "vmrl\t%v0,%1,%2" ++ [(set_attr "op_type" "VRR")]) ++ ++ ++; Vector pack ++ ++(define_insn "vec_pack" ++ [(set (match_operand: 0 "register_operand" "=v") ++ (unspec: [(match_operand:VI_HW_HSD 1 "register_operand" "v") ++ (match_operand:VI_HW_HSD 2 "register_operand" "v")] ++ UNSPEC_VEC_PACK))] ++ "TARGET_VX" ++ "vpk\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++ ++; Vector pack saturate ++ ++(define_insn "vec_packs" ++ [(set (match_operand: 0 "register_operand" "=v") ++ (unspec: [(match_operand:VI_HW_HSD 1 "register_operand" "v") ++ (match_operand:VI_HW_HSD 2 "register_operand" "v")] ++ UNSPEC_VEC_PACK_SATURATE))] ++ "TARGET_VX" ++ "vpks\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++ ++; This is vec_packs_cc + loading cc into a caller specified memory location. ++(define_expand "vec_packs_cc" ++ [(parallel ++ [(set (reg:CCRAW CC_REGNUM) ++ (unspec:CCRAW [(match_operand:VI_HW_HSD 1 "register_operand" "") ++ (match_operand:VI_HW_HSD 2 "register_operand" "")] ++ UNSPEC_VEC_PACK_SATURATE_GENCC)) ++ (set (match_operand: 0 "register_operand" "") ++ (unspec: [(match_dup 1) (match_dup 2)] ++ UNSPEC_VEC_PACK_SATURATE_CC))]) ++ (set (match_dup 4) ++ (unspec:SI [(reg:CCRAW CC_REGNUM)] UNSPEC_CC_TO_INT)) ++ (set (match_operand:SI 3 "memory_operand" "") ++ (match_dup 4))] ++ "TARGET_VX" ++{ ++ operands[4] = gen_reg_rtx (SImode); ++}) ++ ++(define_insn "*vec_packs_cc" ++ [(set (reg:CCRAW CC_REGNUM) ++ (unspec:CCRAW [(match_operand:VI_HW_HSD 1 "register_operand" "v") ++ (match_operand:VI_HW_HSD 2 "register_operand" "v")] ++ UNSPEC_VEC_PACK_SATURATE_GENCC)) ++ (set (match_operand: 0 "register_operand" "=v") ++ (unspec: [(match_dup 1) (match_dup 2)] ++ UNSPEC_VEC_PACK_SATURATE_CC))] ++ "TARGET_VX" ++ "vpkss\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++ ++; Vector pack logical saturate ++ ++(define_insn "vec_packsu" ++ [(set (match_operand: 0 "register_operand" "=v") ++ (unspec: [(match_operand:VI_HW_HSD 1 "register_operand" "v") ++ (match_operand:VI_HW_HSD 2 "register_operand" "v")] ++ UNSPEC_VEC_PACK_UNSIGNED_SATURATE))] ++ "TARGET_VX" ++ "vpkls\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++; Emulate saturate unsigned pack on signed operands. ++; Zero out negative elements and continue with the unsigned saturating pack. ++(define_expand "vec_packsu_u" ++ [(set (match_operand: 0 "register_operand" "=v") ++ (unspec: [(match_operand:VI_HW_HSD 1 "register_operand" "v") ++ (match_operand:VI_HW_HSD 2 "register_operand" "v")] ++ UNSPEC_VEC_PACK_UNSIGNED_SATURATE))] ++ "TARGET_VX" ++{ ++ rtx null_vec = CONST0_RTX(mode); ++ enum machine_mode half_mode; ++ switch (mode) ++ { ++ case V8HImode: half_mode = V16QImode; break; ++ case V4SImode: half_mode = V8HImode; break; ++ case V2DImode: half_mode = V4SImode; break; ++ default: gcc_unreachable (); ++ } ++ s390_expand_vcond (operands[1], operands[1], null_vec, ++ GE, operands[1], null_vec); ++ s390_expand_vcond (operands[2], operands[2], null_vec, ++ GE, operands[2], null_vec); ++ emit_insn (gen_rtx_SET (VOIDmode, operands[0], ++ gen_rtx_UNSPEC (half_mode, ++ gen_rtvec (2, operands[1], operands[2]), ++ UNSPEC_VEC_PACK_UNSIGNED_SATURATE))); ++ DONE; ++}) ++ ++; This is vec_packsu_cc + loading cc into a caller specified memory location. ++; FIXME: The reg to target mem copy should be issued by reload?! ++(define_expand "vec_packsu_cc" ++ [(parallel ++ [(set (reg:CCRAW CC_REGNUM) ++ (unspec:CCRAW [(match_operand:VI_HW_HSD 1 "register_operand" "") ++ (match_operand:VI_HW_HSD 2 "register_operand" "")] ++ UNSPEC_VEC_PACK_UNSIGNED_SATURATE_GENCC)) ++ (set (match_operand: 0 "register_operand" "") ++ (unspec: [(match_dup 1) (match_dup 2)] ++ UNSPEC_VEC_PACK_UNSIGNED_SATURATE_CC))]) ++ (set (match_dup 4) ++ (unspec:SI [(reg:CCRAW CC_REGNUM)] UNSPEC_CC_TO_INT)) ++ (set (match_operand:SI 3 "memory_operand" "") ++ (match_dup 4))] ++ "TARGET_VX" ++{ ++ operands[4] = gen_reg_rtx (SImode); ++}) ++ ++(define_insn "*vec_packsu_cc" ++ [(set (reg:CCRAW CC_REGNUM) ++ (unspec:CCRAW [(match_operand:VI_HW_HSD 1 "register_operand" "v") ++ (match_operand:VI_HW_HSD 2 "register_operand" "v")] ++ UNSPEC_VEC_PACK_UNSIGNED_SATURATE_GENCC)) ++ (set (match_operand: 0 "register_operand" "=v") ++ (unspec: [(match_dup 1) (match_dup 2)] ++ UNSPEC_VEC_PACK_UNSIGNED_SATURATE_CC))] ++ "TARGET_VX" ++ "vpklss\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++ ++; Vector permute ++ ++; vec_perm is also RTL standard name, but we can only use it for V16QI ++ ++(define_insn "vec_zperm" ++ [(set (match_operand:V_HW_HSD 0 "register_operand" "=v") ++ (unspec:V_HW_HSD [(match_operand:V_HW_HSD 1 "register_operand" "v") ++ (match_operand:V_HW_HSD 2 "register_operand" "v") ++ (match_operand:V16QI 3 "register_operand" "v")] ++ UNSPEC_VEC_PERM))] ++ "TARGET_VX" ++ "vperm\t%v0,%v1,%v2,%v3" ++ [(set_attr "op_type" "VRR")]) ++ ++(define_expand "vec_permi" ++ [(set (match_operand:V_HW_64 0 "register_operand" "") ++ (unspec:V_HW_64 [(match_operand:V_HW_64 1 "register_operand" "") ++ (match_operand:V_HW_64 2 "register_operand" "") ++ (match_operand:QI 3 "const_mask_operand" "")] ++ UNSPEC_VEC_PERMI))] ++ "TARGET_VX" ++{ ++ HOST_WIDE_INT val = INTVAL (operands[3]); ++ operands[3] = GEN_INT ((val & 1) | (val & 2) << 1); ++}) ++ ++(define_insn "*vec_permi" ++ [(set (match_operand:V_HW_64 0 "register_operand" "=v") ++ (unspec:V_HW_64 [(match_operand:V_HW_64 1 "register_operand" "v") ++ (match_operand:V_HW_64 2 "register_operand" "v") ++ (match_operand:QI 3 "const_mask_operand" "C")] ++ UNSPEC_VEC_PERMI))] ++ "TARGET_VX && (UINTVAL (operands[3]) & 10) == 0" ++ "vpdi\t%v0,%v1,%v2,%b3" ++ [(set_attr "op_type" "VRR")]) ++ ++ ++; Vector replicate ++ ++ ++; Replicate from vector element ++(define_expand "vec_splat" ++ [(set (match_operand:V_HW 0 "register_operand" "") ++ (vec_duplicate:V_HW (vec_select: ++ (match_operand:V_HW 1 "register_operand" "") ++ (parallel ++ [(match_operand:QI 2 "const_mask_operand" "")]))))] ++ "TARGET_VX") ++ ++; Vector scatter element ++ ++; vscef, vsceg ++ ++; A 64 bit target adress generated from 32 bit elements ++(define_insn "vec_scatter_elementv4si_DI" ++ [(set (mem:SI ++ (plus:DI (zero_extend:DI ++ (unspec:SI [(match_operand:V4SI 1 "register_operand" "v") ++ (match_operand:QI 3 "const_mask_operand" "C")] ++ UNSPEC_VEC_EXTRACT)) ++ (match_operand:SI 2 "address_operand" "ZQ"))) ++ (unspec:SI [(match_operand:V4SI 0 "register_operand" "v") ++ (match_dup 3)] UNSPEC_VEC_EXTRACT))] ++ "TARGET_VX && TARGET_64BIT && UINTVAL (operands[3]) < 4" ++ "vscef\t%v0,%O2(%v1,%R2),%3" ++ [(set_attr "op_type" "VRV")]) ++ ++; A 31 bit target address is generated from 64 bit elements ++(define_insn "vec_scatter_element_SI" ++ [(set (mem: ++ (plus:SI (subreg:SI ++ (unspec: [(match_operand:V_HW_64 1 "register_operand" "v") ++ (match_operand:QI 3 "const_mask_operand" "C")] ++ UNSPEC_VEC_EXTRACT) 4) ++ (match_operand:SI 2 "address_operand" "ZQ"))) ++ (unspec: [(match_operand:V_HW_64 0 "register_operand" "v") ++ (match_dup 3)] UNSPEC_VEC_EXTRACT))] ++ "TARGET_VX && !TARGET_64BIT && UINTVAL (operands[3]) < GET_MODE_NUNITS (mode)" ++ "vsce\t%v0,%O2(%v1,%R2),%3" ++ [(set_attr "op_type" "VRV")]) ++ ++; Element size and target adress size is the same ++(define_insn "vec_scatter_element_" ++ [(set (mem: ++ (plus: (unspec: ++ [(match_operand: 1 "register_operand" "v") ++ (match_operand:QI 3 "const_mask_operand" "C")] ++ UNSPEC_VEC_EXTRACT) ++ (match_operand:DI 2 "address_operand" "ZQ"))) ++ (unspec: [(match_operand:V_HW_32_64 0 "register_operand" "v") ++ (match_dup 3)] UNSPEC_VEC_EXTRACT))] ++ "TARGET_VX && UINTVAL (operands[3]) < GET_MODE_NUNITS (mode)" ++ "vsce\t%v0,%O2(%v1,%R2),%3" ++ [(set_attr "op_type" "VRV")]) ++ ++; Depending on the address size we have to expand a different pattern. ++; This however cannot be represented in s390-builtins.def so we do the ++; multiplexing here in the expander. ++(define_expand "vec_scatter_element" ++ [(match_operand:V_HW_32_64 0 "register_operand" "") ++ (match_operand: 1 "register_operand" "") ++ (match_operand 2 "address_operand" "") ++ (match_operand:QI 3 "const_mask_operand" "")] ++ "TARGET_VX" ++{ ++ if (TARGET_64BIT) ++ { ++ PUT_MODE (operands[2], DImode); ++ emit_insn ( ++ gen_vec_scatter_element_DI (operands[0], operands[1], ++ operands[2], operands[3])); ++ } ++ else ++ { ++ PUT_MODE (operands[2], SImode); ++ emit_insn ( ++ gen_vec_scatter_element_SI (operands[0], operands[1], ++ operands[2], operands[3])); ++ } ++ DONE; ++}) ++ ++ ++; Vector select ++ ++; Operand 3 selects bits from either OP1 (0) or OP2 (1) ++ ++; Comparison operator should not matter as long as we always use the same ?! ++ ++; Operands 1 and 2 are swapped in order to match the altivec builtin. ++; If operand 3 is a const_int bitmask this would be vec_merge ++(define_expand "vec_sel" ++ [(set (match_operand:V_HW 0 "register_operand" "") ++ (if_then_else:V_HW ++ (eq (match_operand: 3 "register_operand" "") ++ (match_dup 4)) ++ (match_operand:V_HW 2 "register_operand" "") ++ (match_operand:V_HW 1 "register_operand" "")))] ++ "TARGET_VX" ++{ ++ operands[4] = CONST0_RTX (mode); ++}) ++ ++ ++; Vector sign extend to doubleword ++ ++; Sign extend of right most vector element to respective double-word ++(define_insn "vec_extend" ++ [(set (match_operand:VI_HW_QHS 0 "register_operand" "=v") ++ (unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "v")] ++ UNSPEC_VEC_EXTEND))] ++ "TARGET_VX" ++ "vseg\t%v0,%1" ++ [(set_attr "op_type" "VRR")]) ++ ++ ++; Vector store with length ++ ++; Store bytes in OP1 from OP0 with the highest indexed byte to be ++; stored from OP0 given by OP2 ++(define_insn "vstl" ++ [(set (match_operand:BLK 2 "memory_operand" "=Q") ++ (unspec:BLK [(match_operand:V 0 "register_operand" "v") ++ (match_operand:SI 1 "register_operand" "d")] ++ UNSPEC_VEC_STORE_LEN))] ++ "TARGET_VX" ++ "vstl\t%v0,%1,%2" ++ [(set_attr "op_type" "VRS")]) ++ ++ ++; Vector unpack high ++ ++; vuphb, vuphh, vuphf ++(define_insn "vec_unpackh" ++ [(set (match_operand: 0 "register_operand" "=v") ++ (unspec: [(match_operand:VI_HW_QHS 1 "register_operand" "v")] ++ UNSPEC_VEC_UNPACKH))] ++ "TARGET_VX" ++ "vuph\t%v0,%v1" ++ [(set_attr "op_type" "VRR")]) ++ ++; vuplhb, vuplhh, vuplhf ++(define_insn "vec_unpackh_l" ++ [(set (match_operand: 0 "register_operand" "=v") ++ (unspec: [(match_operand:VI_HW_QHS 1 "register_operand" "v")] ++ UNSPEC_VEC_UNPACKH_L))] ++ "TARGET_VX" ++ "vuplh\t%v0,%v1" ++ [(set_attr "op_type" "VRR")]) ++ ++ ++; Vector unpack low ++ ++; vuplb, vuplhw, vuplf ++(define_insn "vec_unpackl" ++ [(set (match_operand: 0 "register_operand" "=v") ++ (unspec: [(match_operand:VI_HW_QHS 1 "register_operand" "v")] ++ UNSPEC_VEC_UNPACKL))] ++ "TARGET_VX" ++ "vupl\t%v0,%v1" ++ [(set_attr "op_type" "VRR")]) ++ ++; vupllb, vupllh, vupllf ++(define_insn "vec_unpackl_l" ++ [(set (match_operand: 0 "register_operand" "=v") ++ (unspec: [(match_operand:VI_HW_QHS 1 "register_operand" "v")] ++ UNSPEC_VEC_UNPACKL_L))] ++ "TARGET_VX" ++ "vupll\t%v0,%v1" ++ [(set_attr "op_type" "VRR")]) ++ ++ ++; Vector add ++ ++; vaq ++ ++; zvector builtins uses V16QI operands. So replace the modes in order ++; to map this to a TImode add. We have to keep the V16QI mode ++; operands in the expander in order to allow some operand type ++; checking when expanding the builtin. ++(define_expand "vec_add_u128" ++ [(match_operand:V16QI 0 "register_operand" "") ++ (match_operand:V16QI 1 "register_operand" "") ++ (match_operand:V16QI 2 "register_operand" "")] ++ "TARGET_VX" ++{ ++ rtx op0 = gen_rtx_SUBREG (TImode, operands[0], 0); ++ rtx op1 = gen_rtx_SUBREG (TImode, operands[1], 0); ++ rtx op2 = gen_rtx_SUBREG (TImode, operands[2], 0); ++ ++ emit_insn (gen_rtx_SET (VOIDmode, op0, ++ gen_rtx_PLUS (TImode, op1, op2))); ++ DONE; ++}) ++ ++; Vector add compute carry ++ ++(define_insn "vec_addc" ++ [(set (match_operand:VI_HW 0 "register_operand" "=v") ++ (unspec:VI_HW [(match_operand:VI_HW 1 "register_operand" "%v") ++ (match_operand:VI_HW 2 "register_operand" "v")] ++ UNSPEC_VEC_ADDC))] ++ "TARGET_VX" ++ "vacc\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++(define_insn "vec_addc_u128" ++ [(set (match_operand:V16QI 0 "register_operand" "=v") ++ (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "%v") ++ (match_operand:V16QI 2 "register_operand" "v")] ++ UNSPEC_VEC_ADDC_U128))] ++ "TARGET_VX" ++ "vaccq\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++ ++; Vector add with carry ++ ++(define_insn "vec_adde_u128" ++ [(set (match_operand:V16QI 0 "register_operand" "=v") ++ (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "%v") ++ (match_operand:V16QI 2 "register_operand" "v") ++ (match_operand:V16QI 3 "register_operand" "v")] ++ UNSPEC_VEC_ADDE_U128))] ++ "TARGET_VX" ++ "vacq\t%v0,%v1,%v2,%v3" ++ [(set_attr "op_type" "VRR")]) ++ ++ ++; Vector add with carry compute carry ++ ++(define_insn "vec_addec_u128" ++ [(set (match_operand:V16QI 0 "register_operand" "=v") ++ (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "%v") ++ (match_operand:V16QI 2 "register_operand" "v") ++ (match_operand:V16QI 3 "register_operand" "v")] ++ UNSPEC_VEC_ADDEC_U128))] ++ "TARGET_VX" ++ "vacccq\t%v0,%v1,%v2,%v3" ++ [(set_attr "op_type" "VRR")]) ++ ++ ++; Vector and ++ ++; The following two patterns allow mixed mode and's as required for the intrinsics. ++(define_insn "and_av2df3" ++ [(set (match_operand:V2DF 0 "register_operand" "=v") ++ (and:V2DF (subreg:V2DF (match_operand:V2DI 1 "register_operand" "v") 0) ++ (match_operand:V2DF 2 "register_operand" "v")))] ++ "TARGET_VX" ++ "vn\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++(define_insn "and_cv2df3" ++ [(set (match_operand:V2DF 0 "register_operand" "=v") ++ (and:V2DF (match_operand:V2DF 1 "register_operand" "v") ++ (subreg:V2DF (match_operand:V2DI 2 "register_operand" "v") 0)))] ++ "TARGET_VX" ++ "vn\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++ ++; Vector and with complement ++ ++; vnc ++(define_insn "vec_andc3" ++ [(set (match_operand:VT_HW 0 "register_operand" "=v") ++ (and:VT_HW (not:VT_HW (match_operand:VT_HW 2 "register_operand" "v")) ++ (match_operand:VT_HW 1 "register_operand" "v")))] ++ "TARGET_VX" ++ "vnc\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++; The following two patterns allow mixed mode and's as required for the intrinsics. ++(define_insn "vec_andc_av2df3" ++ [(set (match_operand:V2DF 0 "register_operand" "=v") ++ (and:V2DF (not:V2DF (match_operand:V2DF 2 "register_operand" "v")) ++ (subreg:V2DF (match_operand:V2DI 1 "register_operand" "v") 0)))] ++ ++ "TARGET_VX" ++ "vnc\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++(define_insn "vec_andc_cv2df3" ++ [(set (match_operand:V2DF 0 "register_operand" "=v") ++ (and:V2DF (not:V2DF (subreg:V2DF (match_operand:V2DI 2 "register_operand" "v") 0)) ++ (match_operand:V2DF 1 "register_operand" "v")))] ++ "TARGET_VX" ++ "vnc\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++ ++; Vector average ++ ++(define_insn "vec_avg" ++ [(set (match_operand:VI_HW 0 "register_operand" "=v") ++ (unspec:VI_HW [(match_operand:VI_HW 1 "register_operand" "%v") ++ (match_operand:VI_HW 2 "register_operand" "v")] ++ UNSPEC_VEC_AVG))] ++ "TARGET_VX" ++ "vavg\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++; Vector average logical ++ ++(define_insn "vec_avgu" ++ [(set (match_operand:VI_HW 0 "register_operand" "=v") ++ (unspec:VI_HW [(match_operand:VI_HW 1 "register_operand" "%v") ++ (match_operand:VI_HW 2 "register_operand" "v")] ++ UNSPEC_VEC_AVGU))] ++ "TARGET_VX" ++ "vavgl\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++ ++; Vector checksum ++ ++(define_insn "vec_checksum" ++ [(set (match_operand:V4SI 0 "register_operand" "=v") ++ (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v") ++ (match_operand:V4SI 2 "register_operand" "v")] ++ UNSPEC_VEC_CHECKSUM))] ++ "TARGET_VX" ++ "vcksm\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++;; ++;; Vector compare ++;; ++ ++; vec_all/any int compares ++ ++(define_expand "vec_all_" ++ [(match_operand:SI 0 "register_operand" "") ++ (intcmpcc (match_operand:VI_HW 1 "register_operand" "") ++ (match_operand:VI_HW 2 "register_operand" ""))] ++ "TARGET_VX" ++{ ++ s390_expand_vec_compare_cc (operands[0], ++ , ++ operands[1], ++ operands[2], ++ true); ++ DONE; ++}) ++ ++(define_expand "vec_any_" ++ [(match_operand:SI 0 "register_operand" "") ++ (intcmpcc (match_operand:VI_HW 1 "register_operand" "") ++ (match_operand:VI_HW 2 "register_operand" ""))] ++ "TARGET_VX" ++{ ++ s390_expand_vec_compare_cc (operands[0], ++ , ++ operands[1], ++ operands[2], ++ false); ++ DONE; ++}) ++ ++; vec_all/any fp compares ++ ++(define_expand "vec_all_v2df" ++ [(match_operand:SI 0 "register_operand" "") ++ (fpcmpcc (match_operand:V2DF 1 "register_operand" "") ++ (match_operand:V2DF 2 "register_operand" ""))] ++ "TARGET_VX" ++{ ++ s390_expand_vec_compare_cc (operands[0], ++ , ++ operands[1], ++ operands[2], ++ true); ++ DONE; ++}) ++ ++(define_expand "vec_any_v2df" ++ [(match_operand:SI 0 "register_operand" "") ++ (fpcmpcc (match_operand:V2DF 1 "register_operand" "") ++ (match_operand:V2DF 2 "register_operand" ""))] ++ "TARGET_VX" ++{ ++ s390_expand_vec_compare_cc (operands[0], ++ , ++ operands[1], ++ operands[2], ++ false); ++ DONE; ++}) ++ ++ ++; Compare without generating CC ++ ++(define_expand "vec_cmp" ++ [(set (match_operand:VI_HW 0 "register_operand" "=v") ++ (intcmp:VI_HW (match_operand:VI_HW 1 "register_operand" "v") ++ (match_operand:VI_HW 2 "register_operand" "v")))] ++ "TARGET_VX" ++{ ++ s390_expand_vec_compare (operands[0], , operands[1], operands[2]); ++ DONE; ++}) ++ ++(define_expand "vec_cmpv2df" ++ [(set (match_operand:V2DI 0 "register_operand" "=v") ++ (fpcmp:V2DI (match_operand:V2DF 1 "register_operand" "v") ++ (match_operand:V2DF 2 "register_operand" "v")))] ++ "TARGET_VX" ++{ ++ s390_expand_vec_compare (operands[0], , operands[1], operands[2]); ++ DONE; ++}) ++ ++ ++; Vector count leading zeros ++ ++; vec_cntlz -> clz ++; vec_cnttz -> ctz ++ ++; Vector xor ++ ++; vec_xor -> xor ++ ++; The following two patterns allow mixed mode xor's as required for the intrinsics. ++(define_insn "xor_av2df3" ++ [(set (match_operand:V2DF 0 "register_operand" "=v") ++ (xor:V2DF (subreg:V2DF (match_operand:V2DI 1 "register_operand" "v") 0) ++ (match_operand:V2DF 2 "register_operand" "v")))] ++ "TARGET_VX" ++ "vx\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++(define_insn "xor_cv2df3" ++ [(set (match_operand:V2DF 0 "register_operand" "=v") ++ (xor:V2DF (match_operand:V2DF 1 "register_operand" "v") ++ (subreg:V2DF (match_operand:V2DI 2 "register_operand" "v") 0)))] ++ "TARGET_VX" ++ "vx\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++ ++; Vector Galois field multiply sum ++ ++(define_insn "vec_gfmsum" ++ [(set (match_operand:VI_HW_QHS 0 "register_operand" "=v") ++ (unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "v") ++ (match_operand:VI_HW_QHS 2 "register_operand" "v")] ++ UNSPEC_VEC_GFMSUM))] ++ "TARGET_VX" ++ "vgfm\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++(define_insn "vec_gfmsum_128" ++ [(set (match_operand:V16QI 0 "register_operand" "=v") ++ (unspec:V16QI [(match_operand:V2DI 1 "register_operand" "v") ++ (match_operand:V2DI 2 "register_operand" "v")] ++ UNSPEC_VEC_GFMSUM_128))] ++ "TARGET_VX" ++ "vgfmg\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++(define_insn "vec_gfmsum_accum" ++ [(set (match_operand: 0 "register_operand" "=v") ++ (unspec: [(match_operand:VI_HW_QHS 1 "register_operand" "v") ++ (match_operand:VI_HW_QHS 2 "register_operand" "v") ++ (match_operand: 3 "register_operand" "v")] ++ UNSPEC_VEC_GFMSUM_ACCUM))] ++ "TARGET_VX" ++ "vgfma\t%v0,%v1,%v2,%v3" ++ [(set_attr "op_type" "VRR")]) ++ ++(define_insn "vec_gfmsum_accum_128" ++ [(set (match_operand:V16QI 0 "register_operand" "=v") ++ (unspec:V16QI [(match_operand:V2DI 1 "register_operand" "v") ++ (match_operand:V2DI 2 "register_operand" "v") ++ (match_operand:V16QI 3 "register_operand" "v")] ++ UNSPEC_VEC_GFMSUM_ACCUM_128))] ++ "TARGET_VX" ++ "vgfmag\t%v0,%v1,%v2,%v3" ++ [(set_attr "op_type" "VRR")]) ++ ++ ++; FIXME: vec_neg ? ++ ++; Vector load positive: vec_abs -> abs ++; Vector maximum vec_max -> smax, logical vec_max -> umax ++; Vector maximum vec_min -> smin, logical vec_min -> umin ++ ++ ++; Vector multiply and add high ++ ++; vec_mladd -> vec_vmal ++; vmalb, vmalh, vmalf, vmalg ++(define_insn "vec_vmal" ++ [(set (match_operand:VI_HW_QHS 0 "register_operand" "=v") ++ (unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "%v") ++ (match_operand:VI_HW_QHS 2 "register_operand" "v") ++ (match_operand:VI_HW_QHS 3 "register_operand" "v")] ++ UNSPEC_VEC_VMAL))] ++ "TARGET_VX" ++ "vmal\t%v0,%v1,%v2,%v3" ++ [(set_attr "op_type" "VRR")]) ++ ++; vec_mhadd -> vec_vmah/vec_vmalh ++ ++; vmahb; vmahh, vmahf, vmahg ++(define_insn "vec_vmah" ++ [(set (match_operand:VI_HW_QHS 0 "register_operand" "=v") ++ (unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "%v") ++ (match_operand:VI_HW_QHS 2 "register_operand" "v") ++ (match_operand:VI_HW_QHS 3 "register_operand" "v")] ++ UNSPEC_VEC_VMAH))] ++ "TARGET_VX" ++ "vmah\t%v0,%v1,%v2,%v3" ++ [(set_attr "op_type" "VRR")]) ++ ++; vmalhb; vmalhh, vmalhf, vmalhg ++(define_insn "vec_vmalh" ++ [(set (match_operand:VI_HW_QHS 0 "register_operand" "=v") ++ (unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "%v") ++ (match_operand:VI_HW_QHS 2 "register_operand" "v") ++ (match_operand:VI_HW_QHS 3 "register_operand" "v")] ++ UNSPEC_VEC_VMALH))] ++ "TARGET_VX" ++ "vmalh\t%v0,%v1,%v2,%v3" ++ [(set_attr "op_type" "VRR")]) ++ ++; vec_meadd -> vec_vmae/vec_vmale ++ ++; vmaeb; vmaeh, vmaef, vmaeg ++(define_insn "vec_vmae" ++ [(set (match_operand: 0 "register_operand" "=v") ++ (unspec: [(match_operand:VI_HW_QHS 1 "register_operand" "%v") ++ (match_operand:VI_HW_QHS 2 "register_operand" "v") ++ (match_operand: 3 "register_operand" "v")] ++ UNSPEC_VEC_VMAE))] ++ "TARGET_VX" ++ "vmae\t%v0,%v1,%v2,%v3" ++ [(set_attr "op_type" "VRR")]) ++ ++; vmaleb; vmaleh, vmalef, vmaleg ++(define_insn "vec_vmale" ++ [(set (match_operand: 0 "register_operand" "=v") ++ (unspec: [(match_operand:VI_HW_QHS 1 "register_operand" "%v") ++ (match_operand:VI_HW_QHS 2 "register_operand" "v") ++ (match_operand: 3 "register_operand" "v")] ++ UNSPEC_VEC_VMALE))] ++ "TARGET_VX" ++ "vmale\t%v0,%v1,%v2,%v3" ++ [(set_attr "op_type" "VRR")]) ++ ++; vec_moadd -> vec_vmao/vec_vmalo ++ ++; vmaob; vmaoh, vmaof, vmaog ++(define_insn "vec_vmao" ++ [(set (match_operand: 0 "register_operand" "=v") ++ (unspec: [(match_operand:VI_HW_QHS 1 "register_operand" "%v") ++ (match_operand:VI_HW_QHS 2 "register_operand" "v") ++ (match_operand: 3 "register_operand" "v")] ++ UNSPEC_VEC_VMAO))] ++ "TARGET_VX" ++ "vmao\t%v0,%v1,%v2,%v3" ++ [(set_attr "op_type" "VRR")]) ++ ++; vmalob; vmaloh, vmalof, vmalog ++(define_insn "vec_vmalo" ++ [(set (match_operand: 0 "register_operand" "=v") ++ (unspec: [(match_operand:VI_HW_QHS 1 "register_operand" "v") ++ (match_operand:VI_HW_QHS 2 "register_operand" "v") ++ (match_operand: 3 "register_operand" "v")] ++ UNSPEC_VEC_VMALO))] ++ "TARGET_VX" ++ "vmalo\t%v0,%v1,%v2,%v3" ++ [(set_attr "op_type" "VRR")]) ++ ++ ++; Vector multiply high ++ ++; vec_mulh -> vec_smulh/vec_umulh ++ ++; vmhb, vmhh, vmhf ++(define_insn "vec_smulh" ++ [(set (match_operand:VI_HW_QHS 0 "register_operand" "=v") ++ (unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "%v") ++ (match_operand:VI_HW_QHS 2 "register_operand" "v")] ++ UNSPEC_VEC_SMULT_HI))] ++ "TARGET_VX" ++ "vmh\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++; vmlhb, vmlhh, vmlhf ++(define_insn "vec_umulh" ++ [(set (match_operand:VI_HW_QHS 0 "register_operand" "=v") ++ (unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "%v") ++ (match_operand:VI_HW_QHS 2 "register_operand" "v")] ++ UNSPEC_VEC_UMULT_HI))] ++ "TARGET_VX" ++ "vmlh\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++ ++; Vector multiply low ++ ++; vec_mule -> vec_widen_umult_even/vec_widen_smult_even ++; vec_mulo -> vec_widen_umult_odd/vec_widen_smult_odd ++ ++ ++; Vector nor ++ ++(define_insn "vec_nor3" ++ [(set (match_operand:VT_HW 0 "register_operand" "=v") ++ (not:VT_HW (ior:VT_HW (match_operand:VT_HW 1 "register_operand" "%v") ++ (match_operand:VT_HW 2 "register_operand" "v"))))] ++ "TARGET_VX" ++ "vno\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++; The following two patterns allow mixed mode and's as required for the intrinsics. ++(define_insn "vec_nor_av2df3" ++ [(set (match_operand:V2DF 0 "register_operand" "=v") ++ (not:V2DF (ior:V2DF (subreg:V2DF (match_operand:V2DI 1 "register_operand" "v") 0) ++ (match_operand:V2DF 2 "register_operand" "v"))))] ++ "TARGET_VX" ++ "vno\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++(define_insn "vec_nor_cv2df3" ++ [(set (match_operand:V2DF 0 "register_operand" "=v") ++ (not:V2DF (ior:V2DF (match_operand:V2DF 1 "register_operand" "v") ++ (subreg:V2DF (match_operand:V2DI 2 "register_operand" "v") 0))))] ++ "TARGET_VX" ++ "vno\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++ ++; Vector or ++ ++; The following two patterns allow mixed mode or's as required for the intrinsics. ++(define_insn "ior_av2df3" ++ [(set (match_operand:V2DF 0 "register_operand" "=v") ++ (ior:V2DF (subreg:V2DF (match_operand:V2DI 1 "register_operand" "v") 0) ++ (match_operand:V2DF 2 "register_operand" "v")))] ++ "TARGET_VX" ++ "vo\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++(define_insn "ior_cv2df3" ++ [(set (match_operand:V2DF 0 "register_operand" "=v") ++ (ior:V2DF (match_operand:V2DF 1 "register_operand" "v") ++ (subreg:V2DF (match_operand:V2DI 2 "register_operand" "v") 0)))] ++ "TARGET_VX" ++ "vo\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++ ++; Vector population count vec_popcnt -> popcount ++; Vector element rotate left logical vec_rl -> vrotl, vec_rli -> rot ++ ++; Vector element rotate and insert under mask ++ ++; verimb, verimh, verimf, verimg ++(define_insn "verim" ++ [(set (match_operand:VI_HW 0 "register_operand" "=v") ++ (unspec:VI_HW [(match_operand:VI_HW 1 "register_operand" "0") ++ (match_operand:VI_HW 2 "register_operand" "v") ++ (match_operand:VI_HW 3 "register_operand" "v") ++ (match_operand:QI 4 "const_int_operand" "C")] ++ UNSPEC_VEC_RL_MASK))] ++ "TARGET_VX" ++ "verim\t%v0,%v2,%v3,%b4" ++ [(set_attr "op_type" "VRI")]) ++ ++ ++; Vector shift left ++ ++(define_insn "vec_sll" ++ [(set (match_operand:VI_HW 0 "register_operand" "=v") ++ (unspec:VI_HW [(match_operand:VI_HW 1 "register_operand" "v") ++ (match_operand:VI_HW_QHS 2 "register_operand" "v")] ++ UNSPEC_VEC_SLL))] ++ "TARGET_VX" ++ "vsl\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++ ++; Vector shift left by byte ++ ++(define_insn "vec_slb" ++ [(set (match_operand:V_HW 0 "register_operand" "=v") ++ (unspec:V_HW [(match_operand:V_HW 1 "register_operand" "v") ++ (match_operand: 2 "register_operand" "v")] ++ UNSPEC_VEC_SLB))] ++ "TARGET_VX" ++ "vslb\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++ ++; Vector shift left double by byte ++ ++(define_insn "vec_sld" ++ [(set (match_operand:V_HW 0 "register_operand" "=v") ++ (unspec:V_HW [(match_operand:V_HW 1 "register_operand" "v") ++ (match_operand:V_HW 2 "register_operand" "v") ++ (match_operand:QI 3 "const_int_operand" "C")] ++ UNSPEC_VEC_SLDB))] ++ "TARGET_VX" ++ "vsldb\t%v0,%v1,%v2,%b3" ++ [(set_attr "op_type" "VRI")]) ++ ++(define_expand "vec_sldw" ++ [(set (match_operand:V_HW 0 "register_operand" "") ++ (unspec:V_HW [(match_operand:V_HW 1 "register_operand" "") ++ (match_operand:V_HW 2 "register_operand" "") ++ (match_operand:QI 3 "const_int_operand" "")] ++ UNSPEC_VEC_SLDB))] ++ "TARGET_VX" ++{ ++ operands[3] = GEN_INT (INTVAL (operands[3]) << 2); ++}) ++ ++; Vector shift right arithmetic ++ ++(define_insn "vec_sral" ++ [(set (match_operand:VI_HW 0 "register_operand" "=v") ++ (unspec:VI_HW [(match_operand:VI_HW 1 "register_operand" "v") ++ (match_operand:VI_HW_QHS 2 "register_operand" "v")] ++ UNSPEC_VEC_SRAL))] ++ "TARGET_VX" ++ "vsra\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++ ++; Vector shift right arithmetic by byte ++ ++(define_insn "vec_srab" ++ [(set (match_operand:V_HW 0 "register_operand" "=v") ++ (unspec:V_HW [(match_operand:V_HW 1 "register_operand" "v") ++ (match_operand: 2 "register_operand" "v")] ++ UNSPEC_VEC_SRAB))] ++ "TARGET_VX" ++ "vsrab\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++ ++; Vector shift right logical ++ ++(define_insn "vec_srl" ++ [(set (match_operand:VI_HW 0 "register_operand" "=v") ++ (unspec:VI_HW [(match_operand:VI_HW 1 "register_operand" "v") ++ (match_operand:VI_HW_QHS 2 "register_operand" "v")] ++ UNSPEC_VEC_SRL))] ++ "TARGET_VX" ++ "vsrl\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++ ++; Vector shift right logical by byte ++ ++; Pattern definition in vector.md ++(define_expand "vec_srb" ++ [(set (match_operand:V_HW 0 "register_operand" "") ++ (unspec:V_HW [(match_operand:V_HW 1 "register_operand" "") ++ (match_operand: 2 "register_operand" "")] ++ UNSPEC_VEC_SRLB))] ++ "TARGET_VX") ++ ++ ++; Vector subtract ++ ++(define_insn "vec_sub_u128" ++ [(set (match_operand:V16QI 0 "register_operand" "=v") ++ (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v") ++ (match_operand:V16QI 2 "register_operand" "v")] ++ UNSPEC_VEC_SUB_U128))] ++ "TARGET_VX" ++ "vsq\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++ ++; Vector subtract compute borrow indication ++ ++(define_insn "vec_subc" ++ [(set (match_operand:VI_HW 0 "register_operand" "=v") ++ (unspec:VI_HW [(match_operand:VI_HW 1 "register_operand" "v") ++ (match_operand:VI_HW 2 "register_operand" "v")] ++ UNSPEC_VEC_SUBC))] ++ "TARGET_VX" ++ "vscbi\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++(define_insn "vec_subc_u128" ++ [(set (match_operand:V16QI 0 "register_operand" "=v") ++ (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v") ++ (match_operand:V16QI 2 "register_operand" "v")] ++ UNSPEC_VEC_SUBC_U128))] ++ "TARGET_VX" ++ "vscbiq\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++ ++; Vector subtract with borrow indication ++ ++(define_insn "vec_sube_u128" ++ [(set (match_operand:V16QI 0 "register_operand" "=v") ++ (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v") ++ (match_operand:V16QI 2 "register_operand" "v") ++ (match_operand:V16QI 3 "register_operand" "v")] ++ UNSPEC_VEC_SUBE_U128))] ++ "TARGET_VX" ++ "vsbiq\t%v0,%v1,%v2,%v3" ++ [(set_attr "op_type" "VRR")]) ++ ++ ++; Vector subtract with borrow compute and borrow indication ++ ++(define_insn "vec_subec_u128" ++ [(set (match_operand:V16QI 0 "register_operand" "=v") ++ (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v") ++ (match_operand:V16QI 2 "register_operand" "v") ++ (match_operand:V16QI 3 "register_operand" "v")] ++ UNSPEC_VEC_SUBEC_U128))] ++ "TARGET_VX" ++ "vsbcbiq\t%v0,%v1,%v2,%v3" ++ [(set_attr "op_type" "VRR")]) ++ ++ ++; Vector sum across ++ ++; Sum across DImode parts of the 1st operand and add the rightmost ++; element of 2nd operand ++; vsumgh, vsumgf ++(define_expand "vec_sum2" ++ [(set (match_operand:V2DI 0 "register_operand" "") ++ (unspec:V2DI [(match_operand:VI_HW_HS 1 "register_operand" "") ++ (match_operand:VI_HW_HS 2 "register_operand" "")] ++ UNSPEC_VEC_VSUMG))] ++ "TARGET_VX") ++ ++; vsumqh, vsumqf ++(define_insn "vec_sum_u128" ++ [(set (match_operand:V2DI 0 "register_operand" "=v") ++ (unspec:V2DI [(match_operand:VI_HW_SD 1 "register_operand" "v") ++ (match_operand:VI_HW_SD 2 "register_operand" "v")] ++ UNSPEC_VEC_VSUMQ))] ++ "TARGET_VX" ++ "vsumq\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++; vsumb, vsumh ++(define_expand "vec_sum4" ++ [(set (match_operand:V4SI 0 "register_operand" "") ++ (unspec:V4SI [(match_operand:VI_HW_QH 1 "register_operand" "") ++ (match_operand:VI_HW_QH 2 "register_operand" "")] ++ UNSPEC_VEC_VSUM))] ++ "TARGET_VX") ++ ++ ++; Vector test under mask ++ ++(define_expand "vec_test_mask_int" ++ [(set (reg:CCRAW CC_REGNUM) ++ (unspec:CCRAW [(match_operand:V_HW 1 "register_operand" "") ++ (match_operand: 2 "register_operand" "")] ++ UNSPEC_VEC_TEST_MASK)) ++ (set (match_operand:SI 0 "register_operand" "") ++ (unspec:SI [(reg:CCRAW CC_REGNUM)] UNSPEC_CC_TO_INT))] ++ "TARGET_VX") ++ ++(define_insn "*vec_test_mask" ++ [(set (reg:CCRAW CC_REGNUM) ++ (unspec:CCRAW [(match_operand:V_HW 0 "register_operand" "v") ++ (match_operand: 1 "register_operand" "v")] ++ UNSPEC_VEC_TEST_MASK))] ++ "TARGET_VX" ++ "vtm\t%v0,%v1" ++ [(set_attr "op_type" "VRR")]) ++ ++ ++; Vector find any element equal ++ ++; vfaeb, vfaeh, vfaef ++; vfaezb, vfaezh, vfaezf ++(define_insn "vfae" ++ [(set (match_operand:VI_HW_QHS 0 "register_operand" "=v") ++ (unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "v") ++ (match_operand:VI_HW_QHS 2 "register_operand" "v") ++ (match_operand:QI 3 "const_mask_operand" "C")] ++ UNSPEC_VEC_VFAE))] ++ "TARGET_VX" ++{ ++ unsigned HOST_WIDE_INT flags = INTVAL (operands[3]); ++ ++ if (flags & VSTRING_FLAG_ZS) ++ { ++ flags &= ~VSTRING_FLAG_ZS; ++ operands[3] = GEN_INT (flags); ++ return "vfaez\t%v0,%v1,%v2,%b3"; ++ } ++ return "vfae\t%v0,%v1,%v2,%b3"; ++} ++[(set_attr "op_type" "VRR")]) ++ ++; vfaebs, vfaehs, vfaefs ++; vfaezbs, vfaezhs, vfaezfs ++(define_insn "*vfaes" ++ [(set (match_operand:VI_HW_QHS 0 "register_operand" "=v") ++ (unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "v") ++ (match_operand:VI_HW_QHS 2 "register_operand" "v") ++ (match_operand:QI 3 "const_mask_operand" "C")] ++ UNSPEC_VEC_VFAE)) ++ (set (reg:CCRAW CC_REGNUM) ++ (unspec:CCRAW [(match_dup 1) ++ (match_dup 2) ++ (match_dup 3)] ++ UNSPEC_VEC_VFAECC))] ++ "TARGET_VX" ++{ ++ unsigned HOST_WIDE_INT flags = INTVAL (operands[3]); ++ ++ if (flags & VSTRING_FLAG_ZS) ++ { ++ flags &= ~VSTRING_FLAG_ZS; ++ operands[3] = GEN_INT (flags); ++ return "vfaezs\t%v0,%v1,%v2,%b3"; ++ } ++ return "vfaes\t%v0,%v1,%v2,%b3"; ++} ++ [(set_attr "op_type" "VRR")]) ++ ++(define_expand "vfaez" ++ [(set (match_operand:VI_HW_QHS 0 "register_operand" "=v") ++ (unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "") ++ (match_operand:VI_HW_QHS 2 "register_operand" "") ++ (match_operand:QI 3 "const_mask_operand" "")] ++ UNSPEC_VEC_VFAE))] ++ "TARGET_VX" ++{ ++ operands[3] = GEN_INT (INTVAL (operands[3]) | VSTRING_FLAG_ZS); ++}) ++ ++(define_expand "vfaes" ++ [(parallel ++ [(set (match_operand:VI_HW_QHS 0 "register_operand" "") ++ (unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "") ++ (match_operand:VI_HW_QHS 2 "register_operand" "") ++ (match_operand:QI 3 "const_mask_operand" "")] ++ UNSPEC_VEC_VFAE)) ++ (set (reg:CCRAW CC_REGNUM) ++ (unspec:CCRAW [(match_dup 1) ++ (match_dup 2) ++ (match_dup 3)] ++ UNSPEC_VEC_VFAECC))]) ++ (set (match_operand:SI 4 "memory_operand" "") ++ (unspec:SI [(reg:CCRAW CC_REGNUM)] UNSPEC_CC_TO_INT))] ++ "TARGET_VX" ++{ ++ operands[3] = GEN_INT (INTVAL (operands[3]) | VSTRING_FLAG_CS); ++}) ++ ++(define_expand "vfaezs" ++ [(parallel ++ [(set (match_operand:VI_HW_QHS 0 "register_operand" "") ++ (unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "") ++ (match_operand:VI_HW_QHS 2 "register_operand" "") ++ (match_operand:SI 3 "const_mask_operand" "")] ++ UNSPEC_VEC_VFAE)) ++ (set (reg:CCRAW CC_REGNUM) ++ (unspec:CCRAW [(match_dup 1) ++ (match_dup 2) ++ (match_dup 3)] ++ UNSPEC_VEC_VFAECC))]) ++ (set (match_operand:SI 4 "memory_operand" "") ++ (unspec:SI [(reg:CCRAW CC_REGNUM)] UNSPEC_CC_TO_INT))] ++ "TARGET_VX" ++{ ++ operands[3] = GEN_INT (INTVAL (operands[3]) | VSTRING_FLAG_CS | VSTRING_FLAG_ZS); ++}) ++ ++ ++; Vector find element equal ++ ++; vfeebs, vfeehs, vfeefs ++; vfeezbs, vfeezhs, vfeezfs ++(define_insn "*vfees" ++ [(set (match_operand:VI_HW_QHS 0 "register_operand" "=v") ++ (unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "v") ++ (match_operand:VI_HW_QHS 2 "register_operand" "v") ++ (match_operand:QI 3 "const_mask_operand" "C")] ++ UNSPEC_VEC_VFEE)) ++ (set (reg:CCRAW CC_REGNUM) ++ (unspec:CCRAW [(match_dup 1) ++ (match_dup 2) ++ (match_dup 3)] ++ UNSPEC_VEC_VFEECC))] ++ "TARGET_VX" ++{ ++ unsigned HOST_WIDE_INT flags = INTVAL (operands[3]); ++ ++ gcc_assert (!(flags & ~(VSTRING_FLAG_ZS | VSTRING_FLAG_CS))); ++ flags &= ~VSTRING_FLAG_CS; ++ ++ if (flags == VSTRING_FLAG_ZS) ++ return "vfeezs\t%v0,%v1,%v2"; ++ return "vfees\t%v0,%v1,%v2,%b3"; ++} ++ [(set_attr "op_type" "VRR")]) ++ ++; vfeeb, vfeeh, vfeef ++(define_insn "vfee" ++ [(set (match_operand:VI_HW_QHS 0 "register_operand" "") ++ (unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "") ++ (match_operand:VI_HW_QHS 2 "register_operand" "") ++ (const_int 0)] ++ UNSPEC_VEC_VFEE))] ++ "TARGET_VX" ++ "vfee\t%v0,%v1,%v2,0" ++ [(set_attr "op_type" "VRR")]) ++ ++; vfeezb, vfeezh, vfeezf ++(define_insn "vfeez" ++ [(set (match_operand:VI_HW_QHS 0 "register_operand" "") ++ (unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "") ++ (match_operand:VI_HW_QHS 2 "register_operand" "") ++ (const_int VSTRING_FLAG_ZS)] ++ UNSPEC_VEC_VFEE))] ++ "TARGET_VX" ++ "vfeezs\t%v0,%v1,%v2,2" ++ [(set_attr "op_type" "VRR")]) ++ ++(define_expand "vfees" ++ [(parallel ++ [(set (match_operand:VI_HW_QHS 0 "register_operand" "") ++ (unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "") ++ (match_operand:VI_HW_QHS 2 "register_operand" "") ++ (const_int VSTRING_FLAG_CS)] ++ UNSPEC_VEC_VFEE)) ++ (set (reg:CCRAW CC_REGNUM) ++ (unspec:CCRAW [(match_dup 1) ++ (match_dup 2) ++ (const_int VSTRING_FLAG_CS)] ++ UNSPEC_VEC_VFEECC))]) ++ (set (match_operand:SI 3 "memory_operand" "") ++ (unspec:SI [(reg:CCRAW CC_REGNUM)] UNSPEC_CC_TO_INT))] ++ "TARGET_VX") ++ ++(define_expand "vfeezs" ++ [(parallel ++ [(set (match_operand:VI_HW_QHS 0 "register_operand" "") ++ (unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "") ++ (match_operand:VI_HW_QHS 2 "register_operand" "") ++ (match_dup 4)] ++ UNSPEC_VEC_VFEE)) ++ (set (reg:CCRAW CC_REGNUM) ++ (unspec:CCRAW [(match_dup 1) ++ (match_dup 2) ++ (match_dup 4)] ++ UNSPEC_VEC_VFEECC))]) ++ (set (match_operand:SI 3 "memory_operand" "") ++ (unspec:SI [(reg:CCRAW CC_REGNUM)] UNSPEC_CC_TO_INT))] ++ "TARGET_VX" ++{ ++ operands[4] = GEN_INT (VSTRING_FLAG_ZS | VSTRING_FLAG_CS); ++}) ++ ++; Vector find element not equal ++ ++; vfeneb, vfeneh, vfenef ++(define_insn "vfene" ++ [(set (match_operand:VI_HW_QHS 0 "register_operand" "=v") ++ (unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "v") ++ (match_operand:VI_HW_QHS 2 "register_operand" "v") ++ (const_int 0)] ++ UNSPEC_VEC_VFENE))] ++ "TARGET_VX" ++ "vfene\t%v0,%v1,%v2,0" ++ [(set_attr "op_type" "VRR")]) ++ ++; vec_vfenes can be found in vector.md since it is used for strlen ++ ++; vfenezb, vfenezh, vfenezf ++(define_insn "vfenez" ++ [(set (match_operand:VI_HW_QHS 0 "register_operand" "") ++ (unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "") ++ (match_operand:VI_HW_QHS 2 "register_operand" "") ++ (const_int VSTRING_FLAG_ZS)] ++ UNSPEC_VEC_VFENE))] ++ "TARGET_VX" ++ "vfenez\t%v0,%v1,%v2" ++ [(set_attr "op_type" "VRR")]) ++ ++(define_expand "vfenes" ++ [(parallel ++ [(set (match_operand:VI_HW_QHS 0 "register_operand" "") ++ (unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "") ++ (match_operand:VI_HW_QHS 2 "register_operand" "") ++ (const_int VSTRING_FLAG_CS)] ++ UNSPEC_VEC_VFENE)) ++ (set (reg:CCRAW CC_REGNUM) ++ (unspec:CCRAW [(match_dup 1) ++ (match_dup 2) ++ (const_int VSTRING_FLAG_CS)] ++ UNSPEC_VEC_VFENECC))]) ++ (set (match_operand:SI 3 "memory_operand" "") ++ (unspec:SI [(reg:CCRAW CC_REGNUM)] UNSPEC_CC_TO_INT))] ++ "TARGET_VX") ++ ++(define_expand "vfenezs" ++ [(parallel ++ [(set (match_operand:VI_HW_QHS 0 "register_operand" "") ++ (unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "") ++ (match_operand:VI_HW_QHS 2 "register_operand" "") ++ (match_dup 4)] ++ UNSPEC_VEC_VFENE)) ++ (set (reg:CCRAW CC_REGNUM) ++ (unspec:CCRAW [(match_dup 1) ++ (match_dup 2) ++ (match_dup 4)] ++ UNSPEC_VEC_VFENECC))]) ++ (set (match_operand:SI 3 "memory_operand" "") ++ (unspec:SI [(reg:CCRAW CC_REGNUM)] UNSPEC_CC_TO_INT))] ++ "TARGET_VX" ++{ ++ operands[4] = GEN_INT (VSTRING_FLAG_ZS | VSTRING_FLAG_CS); ++}) ++ ++; Vector isolate string ++ ++; vistrb, vistrh, vistrf ++(define_insn "vistr" ++ [(set (match_operand:VI_HW_QHS 0 "register_operand" "=v") ++ (unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "v")] ++ UNSPEC_VEC_VISTR))] ++ "TARGET_VX" ++ "vistr\t%v0,%v1" ++ [(set_attr "op_type" "VRR")]) ++ ++; vistrbs, vistrhs, vistrfs ++(define_insn "*vistrs" ++ [(set (match_operand:VI_HW_QHS 0 "register_operand" "=v") ++ (unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "v")] ++ UNSPEC_VEC_VISTR)) ++ (set (reg:CCRAW CC_REGNUM) ++ (unspec:CCRAW [(match_dup 1)] UNSPEC_VEC_VISTRCC))] ++ "TARGET_VX" ++ "vistrs\t%v0,%v1" ++ [(set_attr "op_type" "VRR")]) ++ ++(define_expand "vistrs" ++ [(parallel ++ [(set (match_operand:VI_HW_QHS 0 "register_operand" "") ++ (unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "")] ++ UNSPEC_VEC_VISTR)) ++ (set (reg:CCRAW CC_REGNUM) ++ (unspec:CCRAW [(match_dup 1)] ++ UNSPEC_VEC_VISTRCC))]) ++ (set (match_operand:SI 2 "memory_operand" "") ++ (unspec:SI [(reg:CCRAW CC_REGNUM)] UNSPEC_CC_TO_INT))] ++ "TARGET_VX") ++ ++ ++; Vector compare range ++ ++; vstrcb, vstrch, vstrcf ++; vstrczb, vstrczh, vstrczf ++(define_insn "vstrc" ++ [(set (match_operand:VI_HW_QHS 0 "register_operand" "=v") ++ (unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "v") ++ (match_operand:VI_HW_QHS 2 "register_operand" "v") ++ (match_operand:VI_HW_QHS 3 "register_operand" "v") ++ (match_operand:QI 4 "const_mask_operand" "C")] ++ UNSPEC_VEC_VSTRC))] ++ "TARGET_VX" ++{ ++ unsigned HOST_WIDE_INT flags = INTVAL (operands[4]); ++ ++ if (flags & VSTRING_FLAG_ZS) ++ { ++ flags &= ~VSTRING_FLAG_ZS; ++ operands[4] = GEN_INT (flags); ++ return "vstrcz\t%v0,%v1,%v2,%v3,%b4"; ++ } ++ return "vstrc\t%v0,%v1,%v2,%v3,%b4"; ++} ++[(set_attr "op_type" "VRR")]) ++ ++; vstrcbs, vstrchs, vstrcfs ++; vstrczbs, vstrczhs, vstrczfs ++(define_insn "*vstrcs" ++ [(set (match_operand:VI_HW_QHS 0 "register_operand" "=v") ++ (unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "v") ++ (match_operand:VI_HW_QHS 2 "register_operand" "v") ++ (match_operand:VI_HW_QHS 3 "register_operand" "v") ++ (match_operand:QI 4 "const_mask_operand" "C")] ++ UNSPEC_VEC_VSTRC)) ++ (set (reg:CCRAW CC_REGNUM) ++ (unspec:CCRAW [(match_dup 1) ++ (match_dup 2) ++ (match_dup 3) ++ (match_dup 4)] ++ UNSPEC_VEC_VSTRCCC))] ++ "TARGET_VX" ++{ ++ unsigned HOST_WIDE_INT flags = INTVAL (operands[4]); ++ ++ if (flags & VSTRING_FLAG_ZS) ++ { ++ flags &= ~VSTRING_FLAG_ZS; ++ operands[4] = GEN_INT (flags); ++ return "vstrczs\t%v0,%v1,%v2,%v3,%b4"; ++ } ++ return "vstrcs\t%v0,%v1,%v2,%v3,%b4"; ++} ++ [(set_attr "op_type" "VRR")]) ++ ++(define_expand "vstrcz" ++ [(set (match_operand:VI_HW_QHS 0 "register_operand" "") ++ (unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "") ++ (match_operand:VI_HW_QHS 2 "register_operand" "") ++ (match_operand:VI_HW_QHS 3 "register_operand" "") ++ (match_operand:QI 4 "const_mask_operand" "")] ++ UNSPEC_VEC_VSTRC))] ++ "TARGET_VX" ++{ ++ operands[4] = GEN_INT (INTVAL (operands[4]) | VSTRING_FLAG_ZS); ++}) ++ ++(define_expand "vstrcs" ++ [(parallel ++ [(set (match_operand:VI_HW_QHS 0 "register_operand" "") ++ (unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "") ++ (match_operand:VI_HW_QHS 2 "register_operand" "") ++ (match_operand:VI_HW_QHS 3 "register_operand" "") ++ (match_operand:QI 4 "const_mask_operand" "")] ++ UNSPEC_VEC_VSTRC)) ++ (set (reg:CCRAW CC_REGNUM) ++ (unspec:CCRAW [(match_dup 1) ++ (match_dup 2) ++ (match_dup 3) ++ (match_dup 4)] ++ UNSPEC_VEC_VSTRCCC))]) ++ (set (match_operand:SI 5 "memory_operand" "") ++ (unspec:SI [(reg:CCRAW CC_REGNUM)] UNSPEC_CC_TO_INT))] ++ "TARGET_VX" ++{ ++ operands[4] = GEN_INT (INTVAL (operands[4]) | VSTRING_FLAG_CS); ++}) ++ ++(define_expand "vstrczs" ++ [(parallel ++ [(set (match_operand:VI_HW_QHS 0 "register_operand" "") ++ (unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "") ++ (match_operand:VI_HW_QHS 2 "register_operand" "") ++ (match_operand:VI_HW_QHS 3 "register_operand" "") ++ (match_operand:QI 4 "const_mask_operand" "")] ++ UNSPEC_VEC_VSTRC)) ++ (set (reg:CCRAW CC_REGNUM) ++ (unspec:CCRAW [(match_dup 1) ++ (match_dup 2) ++ (match_dup 3) ++ (match_dup 4)] ++ UNSPEC_VEC_VSTRCCC))]) ++ (set (match_operand:SI 5 "memory_operand" "") ++ (unspec:SI [(reg:CCRAW CC_REGNUM)] UNSPEC_CC_TO_INT))] ++ "TARGET_VX" ++{ ++ operands[4] = GEN_INT (INTVAL (operands[4]) | VSTRING_FLAG_CS | VSTRING_FLAG_ZS); ++}) ++ ++ ++; Signed V2DI -> V2DF conversion - inexact exception disabled ++(define_insn "vec_di_to_df_s64" ++ [(set (match_operand:V2DF 0 "register_operand" "=v") ++ (unspec:V2DF [(match_operand:V2DI 1 "register_operand" "v") ++ (match_operand:QI 2 "const_mask_operand" "C")] ++ UNSPEC_VEC_VCDGB))] ++ "TARGET_VX && UINTVAL (operands[2]) != 2 && UINTVAL (operands[2]) <= 7" ++ "vcdgb\t%v0,%v1,4,%b2" ++ [(set_attr "op_type" "VRR")]) ++ ++; The result needs to be multiplied with 2**-op2 ++(define_expand "vec_ctd_s64" ++ [(set (match_operand:V2DF 0 "register_operand" "") ++ (unspec:V2DF [(match_operand:V2DI 1 "register_operand" "") ++ (const_int 0)] ; According to current BFP rounding mode ++ UNSPEC_VEC_VCDGB)) ++ (use (match_operand:QI 2 "const_int_operand" "")) ++ (set (match_dup 0) (mult:V2DF (match_dup 0) (match_dup 3)))] ++ "TARGET_VX" ++{ ++ REAL_VALUE_TYPE f; ++ rtx c; ++ ++ real_2expN (&f, -INTVAL (operands[2]), DFmode); ++ c = CONST_DOUBLE_FROM_REAL_VALUE (f, DFmode); ++ ++ operands[3] = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, c, c)); ++ operands[3] = force_reg (V2DFmode, operands[3]); ++}) ++ ++; Unsigned V2DI -> V2DF conversion - inexact exception disabled ++(define_insn "vec_di_to_df_u64" ++ [(set (match_operand:V2DF 0 "register_operand" "=v") ++ (unspec:V2DF [(match_operand:V2DI 1 "register_operand" "v") ++ (match_operand:QI 2 "const_int_operand" "C")] ++ UNSPEC_VEC_VCDLGB))] ++ "TARGET_VX" ++ "vcdlgb\t%v0,%v1,4,%b2" ++ [(set_attr "op_type" "VRR")]) ++ ++; The result needs to be multiplied with 2**-op2 ++(define_expand "vec_ctd_u64" ++ [(set (match_operand:V2DF 0 "register_operand" "") ++ (unspec:V2DF [(match_operand:V2DI 1 "register_operand" "") ++ (const_int 0)] ; According to current BFP rounding mode ++ UNSPEC_VEC_VCDLGB)) ++ (use (match_operand:QI 2 "const_int_operand" "")) ++ (set (match_dup 0) (mult:V2DF (match_dup 0) (match_dup 3)))] ++ "TARGET_VX" ++{ ++ REAL_VALUE_TYPE f; ++ rtx c; ++ ++ real_2expN (&f, -INTVAL (operands[2]), DFmode); ++ c = CONST_DOUBLE_FROM_REAL_VALUE (f, DFmode); ++ ++ operands[3] = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, c, c)); ++ operands[3] = force_reg (V2DFmode, operands[3]); ++}) ++ ++ ++; Signed V2DF -> V2DI conversion - inexact exception disabled ++(define_insn "vec_df_to_di_s64" ++ [(set (match_operand:V2DI 0 "register_operand" "=v") ++ (unspec:V2DI [(match_operand:V2DF 1 "register_operand" "v") ++ (match_operand:QI 2 "const_int_operand" "C")] ++ UNSPEC_VEC_VCGDB))] ++ "TARGET_VX" ++ "vcgdb\t%v0,%v1,4,%b2" ++ [(set_attr "op_type" "VRR")]) ++ ++; The input needs to be multiplied with 2**op2 ++(define_expand "vec_ctsl" ++ [(use (match_operand:QI 2 "const_int_operand" "")) ++ (set (match_dup 4) (mult:V2DF (match_operand:V2DF 1 "register_operand" "") ++ (match_dup 3))) ++ (set (match_operand:V2DI 0 "register_operand" "") ++ (unspec:V2DI [(match_dup 4) (const_int 0)] ; According to current BFP rounding mode ++ UNSPEC_VEC_VCGDB))] ++ "TARGET_VX" ++{ ++ REAL_VALUE_TYPE f; ++ rtx c; ++ ++ real_2expN (&f, INTVAL (operands[2]), DFmode); ++ c = CONST_DOUBLE_FROM_REAL_VALUE (f, DFmode); ++ ++ operands[3] = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, c, c)); ++ operands[3] = force_reg (V2DFmode, operands[3]); ++ operands[4] = gen_reg_rtx (V2DFmode); ++}) ++ ++; Unsigned V2DF -> V2DI conversion - inexact exception disabled ++(define_insn "vec_df_to_di_u64" ++ [(set (match_operand:V2DI 0 "register_operand" "=v") ++ (unspec:V2DI [(match_operand:V2DF 1 "register_operand" "v") ++ (match_operand:QI 2 "const_mask_operand" "C")] ++ UNSPEC_VEC_VCLGDB))] ++ "TARGET_VX && UINTVAL (operands[2]) <= 7" ++ "vclgdb\t%v0,%v1,4,%b2" ++ [(set_attr "op_type" "VRR")]) ++ ++; The input needs to be multiplied with 2**op2 ++(define_expand "vec_ctul" ++ [(use (match_operand:QI 2 "const_int_operand" "")) ++ (set (match_dup 4) (mult:V2DF (match_operand:V2DF 1 "register_operand" "") ++ (match_dup 3))) ++ (set (match_operand:V2DI 0 "register_operand" "") ++ (unspec:V2DI [(match_dup 4) (const_int 0)] ; According to current BFP rounding mode ++ UNSPEC_VEC_VCLGDB))] ++ "TARGET_VX" ++{ ++ REAL_VALUE_TYPE f; ++ rtx c; ++ ++ real_2expN (&f, INTVAL (operands[2]), DFmode); ++ c = CONST_DOUBLE_FROM_REAL_VALUE (f, DFmode); ++ ++ operands[3] = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, c, c)); ++ operands[3] = force_reg (V2DFmode, operands[3]); ++ operands[4] = gen_reg_rtx (V2DFmode); ++}) ++ ++; Vector load fp integer - IEEE inexact exception is suppressed ++(define_insn "vfidb" ++ [(set (match_operand:V2DI 0 "register_operand" "=v") ++ (unspec:V2DI [(match_operand:V2DF 1 "register_operand" "v") ++ (match_operand:QI 2 "const_mask_operand" "C") ++ (match_operand:QI 3 "const_mask_operand" "C")] ++ UNSPEC_VEC_VFIDB))] ++ "TARGET_VX && !(UINTVAL (operands[2]) & 3) && UINTVAL (operands[3]) <= 7" ++ "vfidb\t%v0,%v1,%b2,%b3" ++ [(set_attr "op_type" "VRR")]) ++ ++(define_expand "vec_ceil" ++ [(set (match_operand:V2DI 0 "register_operand" "") ++ (unspec:V2DI [(match_operand:V2DF 1 "register_operand" "") ++ (const_int VEC_RND_TO_INF)] ++ UNSPEC_VEC_VFIDB))] ++ "TARGET_VX") ++ ++(define_expand "vec_floor" ++ [(set (match_operand:V2DI 0 "register_operand" "") ++ (unspec:V2DI [(match_operand:V2DF 1 "register_operand" "") ++ (const_int VEC_RND_TO_MINF)] ++ UNSPEC_VEC_VFIDB))] ++ "TARGET_VX") ++ ++(define_expand "vec_trunc" ++ [(set (match_operand:V2DI 0 "register_operand" "") ++ (unspec:V2DI [(match_operand:V2DF 1 "register_operand" "") ++ (const_int VEC_RND_TO_ZERO)] ++ UNSPEC_VEC_VFIDB))] ++ "TARGET_VX") ++ ++(define_expand "vec_roundc" ++ [(set (match_operand:V2DI 0 "register_operand" "") ++ (unspec:V2DI [(match_operand:V2DF 1 "register_operand" "") ++ (const_int VEC_RND_CURRENT)] ++ UNSPEC_VEC_VFIDB))] ++ "TARGET_VX") ++ ++(define_expand "vec_round" ++ [(set (match_operand:V2DI 0 "register_operand" "") ++ (unspec:V2DI [(match_operand:V2DF 1 "register_operand" "") ++ (const_int VEC_RND_NEAREST_TO_EVEN)] ++ UNSPEC_VEC_VFIDB))] ++ "TARGET_VX") ++ ++ ++; Vector load lengthened - V4SF -> V2DF ++ ++(define_insn "*vldeb" ++ [(set (match_operand:V2DF 0 "register_operand" "=v") ++ (unspec:V2DF [(match_operand:V4SF 1 "register_operand" "v")] ++ UNSPEC_VEC_VLDEB))] ++ "TARGET_VX" ++ "vldeb\t%v0,%v1" ++ [(set_attr "op_type" "VRR")]) ++ ++(define_expand "vec_ld2f" ++ [; Initialize a vector to all zeroes. FIXME: This should not be ++ ; necessary since all elements of the vector will be set anyway. ++ ; This is just to make it explicit to the data flow framework. ++ (set (match_dup 2) (match_dup 3)) ++ (set (match_dup 2) (unspec:V4SF [(match_operand:SF 1 "memory_operand" "") ++ (const_int 0) ++ (match_dup 2)] ++ UNSPEC_VEC_SET)) ++ (set (match_dup 2) (unspec:V4SF [(match_dup 4) ++ (const_int 2) ++ (match_dup 2)] ++ UNSPEC_VEC_SET)) ++ (set (match_operand:V2DF 0 "register_operand" "") ++ (unspec:V2DF [(match_dup 2)] UNSPEC_VEC_VLDEB))] ++ "TARGET_VX" ++{ ++ operands[2] = gen_reg_rtx (V4SFmode); ++ operands[3] = CONST0_RTX (V4SFmode); ++ operands[4] = adjust_address (operands[1], SFmode, 4); ++}) ++ ++ ++; Vector load rounded - V2DF -> V4SF ++ ++(define_insn "*vledb" ++ [(set (match_operand:V4SF 0 "register_operand" "=v") ++ (unspec:V4SF [(match_operand:V2DF 1 "register_operand" "v")] ++ UNSPEC_VEC_VLEDB))] ++ "TARGET_VX" ++ "vledb\t%v0,%v1,0,0" ++ [(set_attr "op_type" "VRR")]) ++ ++(define_expand "vec_st2f" ++ [(set (match_dup 2) ++ (unspec:V4SF [(match_operand:V2DF 0 "register_operand" "")] ++ UNSPEC_VEC_VLEDB)) ++ (set (match_operand:SF 1 "memory_operand" "") ++ (unspec:SF [(match_dup 2) (const_int 0)] UNSPEC_VEC_EXTRACT)) ++ (set (match_dup 3) ++ (unspec:SF [(match_dup 2) (const_int 2)] UNSPEC_VEC_EXTRACT))] ++ "TARGET_VX" ++{ ++ operands[2] = gen_reg_rtx (V4SFmode); ++ operands[3] = adjust_address (operands[1], SFmode, 4); ++}) ++ ++ ++; Vector load negated fp ++ ++(define_expand "vec_nabs" ++ [(set (match_operand:V2DF 0 "register_operand" "") ++ (neg:V2DF (abs:V2DF (match_operand:V2DF 1 "register_operand" ""))))] ++ "TARGET_VX") ++ ++; Vector square root fp vec_sqrt -> sqrt rtx standard name ++ ++; Vector FP test data class immediate ++ ++(define_insn "*vftcidb" ++ [(set (match_operand:V2DF 0 "register_operand" "=v") ++ (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "v") ++ (match_operand:HI 2 "const_int_operand" "J")] ++ UNSPEC_VEC_VFTCIDB)) ++ (set (reg:CCRAW CC_REGNUM) ++ (unspec:CCRAW [(match_dup 1) (match_dup 2)] UNSPEC_VEC_VFTCIDBCC))] ++ "TARGET_VX && CONST_OK_FOR_CONSTRAINT_P (INTVAL (operands[2]), 'J', \"J\")" ++ "vftcidb\t%v0,%v1,%x2" ++ [(set_attr "op_type" "VRR")]) ++ ++(define_insn "*vftcidb_cconly" ++ [(set (reg:CCRAW CC_REGNUM) ++ (unspec:CCRAW [(match_operand:V2DF 1 "register_operand" "v") ++ (match_operand:HI 2 "const_int_operand" "J")] ++ UNSPEC_VEC_VFTCIDBCC)) ++ (clobber (match_scratch:V2DI 0 "=v"))] ++ "TARGET_VX && CONST_OK_FOR_CONSTRAINT_P (INTVAL (operands[2]), 'J', \"J\")" ++ "vftcidb\t%v0,%v1,%x2" ++ [(set_attr "op_type" "VRR")]) ++ ++(define_expand "vftcidb" ++ [(parallel ++ [(set (match_operand:V2DF 0 "register_operand" "") ++ (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "") ++ (match_operand:HI 2 "const_int_operand" "")] ++ UNSPEC_VEC_VFTCIDB)) ++ (set (reg:CCRAW CC_REGNUM) ++ (unspec:CCRAW [(match_dup 1) (match_dup 2)] UNSPEC_VEC_VFTCIDBCC))]) ++ (set (match_operand:SI 3 "memory_operand" "") ++ (unspec:SI [(reg:CCRAW CC_REGNUM)] UNSPEC_CC_TO_INT))] ++ "TARGET_VX && CONST_OK_FOR_CONSTRAINT_P (INTVAL (operands[2]), 'J', \"J\")") ++ ++;; ++;; Integer compares ++;; ++ ++; All comparisons which produce a CC need fully populated (VI_HW) ++; vector arguments. Otherwise the any/all CCs would be just bogus. ++ ++(define_insn "*vec_cmp_cconly" ++ [(set (reg:VICMP CC_REGNUM) ++ (compare:VICMP (match_operand:VI_HW 0 "register_operand" "v") ++ (match_operand:VI_HW 1 "register_operand" "v"))) ++ (clobber (match_scratch:VI_HW 2 "=v"))] ++ "TARGET_VX" ++ "vcs\t%v2,%v0,%v1" ++ [(set_attr "op_type" "VRR")]) ++ ++; FIXME: The following 2x3 definitions should be merged into 2 with ++; VICMP like above but I could not find a way to set the comparison ++; operator (eq) depending on the mode CCVEQ (mode_iterator). Or the ++; other way around - setting the mode depending on the code ++; (code_iterator). ++(define_expand "vec_cmpeq_cc" ++ [(parallel ++ [(set (reg:CCVEQ CC_REGNUM) ++ (compare:CCVEQ (match_operand:VI_HW 1 "register_operand" "v") ++ (match_operand:VI_HW 2 "register_operand" "v"))) ++ (set (match_operand:VI_HW 0 "register_operand" "=v") ++ (eq:VI_HW (match_dup 1) (match_dup 2)))]) ++ (set (match_operand:SI 3 "memory_operand" "") ++ (unspec:SI [(reg:CCVEQ CC_REGNUM)] UNSPEC_CC_TO_INT))] ++ "TARGET_VX") ++ ++(define_expand "vec_cmph_cc" ++ [(parallel ++ [(set (reg:CCVH CC_REGNUM) ++ (compare:CCVH (match_operand:VI_HW 1 "register_operand" "v") ++ (match_operand:VI_HW 2 "register_operand" "v"))) ++ (set (match_operand:VI_HW 0 "register_operand" "=v") ++ (gt:VI_HW (match_dup 1) (match_dup 2)))]) ++ (set (match_operand:SI 3 "memory_operand" "") ++ (unspec:SI [(reg:CCVH CC_REGNUM)] UNSPEC_CC_TO_INT))] ++ "TARGET_VX") ++ ++(define_expand "vec_cmphl_cc" ++ [(parallel ++ [(set (reg:CCVHU CC_REGNUM) ++ (compare:CCVHU (match_operand:VI_HW 1 "register_operand" "v") ++ (match_operand:VI_HW 2 "register_operand" "v"))) ++ (set (match_operand:VI_HW 0 "register_operand" "=v") ++ (gtu:VI_HW (match_dup 1) (match_dup 2)))]) ++ (set (match_operand:SI 3 "memory_operand" "") ++ (unspec:SI [(reg:CCVHU CC_REGNUM)] UNSPEC_CC_TO_INT))] ++ "TARGET_VX") ++ ++ ++(define_insn "*vec_cmpeq_cc" ++ [(set (reg:CCVEQ CC_REGNUM) ++ (compare:CCVEQ (match_operand:VI_HW 0 "register_operand" "v") ++ (match_operand:VI_HW 1 "register_operand" "v"))) ++ (set (match_operand:VI_HW 2 "register_operand" "=v") ++ (eq:VI_HW (match_dup 0) (match_dup 1)))] ++ "TARGET_VX" ++ "vceqs\t%v2,%v0,%v1" ++ [(set_attr "op_type" "VRR")]) ++ ++(define_insn "*vec_cmph_cc" ++ [(set (reg:CCVH CC_REGNUM) ++ (compare:CCVH (match_operand:VI_HW 0 "register_operand" "v") ++ (match_operand:VI_HW 1 "register_operand" "v"))) ++ (set (match_operand:VI_HW 2 "register_operand" "=v") ++ (gt:VI_HW (match_dup 0) (match_dup 1)))] ++ "TARGET_VX" ++ "vchs\t%v2,%v0,%v1" ++ [(set_attr "op_type" "VRR")]) ++ ++(define_insn "*vec_cmphl_cc" ++ [(set (reg:CCVHU CC_REGNUM) ++ (compare:CCVHU (match_operand:VI_HW 0 "register_operand" "v") ++ (match_operand:VI_HW 1 "register_operand" "v"))) ++ (set (match_operand:VI_HW 2 "register_operand" "=v") ++ (gtu:VI_HW (match_dup 0) (match_dup 1)))] ++ "TARGET_VX" ++ "vchls\t%v2,%v0,%v1" ++ [(set_attr "op_type" "VRR")]) ++ ++;; ++;; Floating point comparesg ++;; ++ ++(define_insn "*vec_cmpv2df_cconly" ++ [(set (reg:VFCMP CC_REGNUM) ++ (compare:VFCMP (match_operand:V2DF 0 "register_operand" "v") ++ (match_operand:V2DF 1 "register_operand" "v"))) ++ (clobber (match_scratch:V2DI 2 "=v"))] ++ "TARGET_VX" ++ "vfcdbs\t%v2,%v0,%v1" ++ [(set_attr "op_type" "VRR")]) ++ ++; FIXME: Merge the following 2x3 patterns with VFCMP ++(define_expand "vec_cmpeqv2df_cc" ++ [(parallel ++ [(set (reg:CCVEQ CC_REGNUM) ++ (compare:CCVEQ (match_operand:V2DF 1 "register_operand" "v") ++ (match_operand:V2DF 2 "register_operand" "v"))) ++ (set (match_operand:V2DI 0 "register_operand" "=v") ++ (eq:V2DI (match_dup 1) (match_dup 2)))]) ++ (set (match_operand:SI 3 "memory_operand" "") ++ (unspec:SI [(reg:CCVEQ CC_REGNUM)] UNSPEC_CC_TO_INT))] ++ "TARGET_VX") ++ ++(define_expand "vec_cmphv2df_cc" ++ [(parallel ++ [(set (reg:CCVH CC_REGNUM) ++ (compare:CCVH (match_operand:V2DF 1 "register_operand" "v") ++ (match_operand:V2DF 2 "register_operand" "v"))) ++ (set (match_operand:V2DI 0 "register_operand" "=v") ++ (gt:V2DI (match_dup 1) (match_dup 2)))]) ++ (set (match_operand:SI 3 "memory_operand" "") ++ (unspec:SI [(reg:CCVH CC_REGNUM)] UNSPEC_CC_TO_INT))] ++ "TARGET_VX") ++ ++(define_expand "vec_cmphev2df_cc" ++ [(parallel ++ [(set (reg:CCVFHE CC_REGNUM) ++ (compare:CCVFHE (match_operand:V2DF 1 "register_operand" "v") ++ (match_operand:V2DF 2 "register_operand" "v"))) ++ (set (match_operand:V2DI 0 "register_operand" "=v") ++ (ge:V2DI (match_dup 1) (match_dup 2)))]) ++ (set (match_operand:SI 3 "memory_operand" "") ++ (unspec:SI [(reg:CCVFHE CC_REGNUM)] UNSPEC_CC_TO_INT))] ++ "TARGET_VX") ++ ++ ++(define_insn "*vec_cmpeqv2df_cc" ++ [(set (reg:CCVEQ CC_REGNUM) ++ (compare:CCVEQ (match_operand:V2DF 0 "register_operand" "v") ++ (match_operand:V2DF 1 "register_operand" "v"))) ++ (set (match_operand:V2DI 2 "register_operand" "=v") ++ (eq:V2DI (match_dup 0) (match_dup 1)))] ++ "TARGET_VX" ++ "vfcedbs\t%v2,%v0,%v1" ++ [(set_attr "op_type" "VRR")]) ++ ++(define_insn "*vec_cmphv2df_cc" ++ [(set (reg:CCVH CC_REGNUM) ++ (compare:CCVH (match_operand:V2DF 0 "register_operand" "v") ++ (match_operand:V2DF 1 "register_operand" "v"))) ++ (set (match_operand:V2DI 2 "register_operand" "=v") ++ (gt:V2DI (match_dup 0) (match_dup 1)))] ++ "TARGET_VX" ++ "vfchdbs\t%v2,%v0,%v1" ++ [(set_attr "op_type" "VRR")]) ++ ++(define_insn "*vec_cmphev2df_cc" ++ [(set (reg:CCVFHE CC_REGNUM) ++ (compare:CCVFHE (match_operand:V2DF 0 "register_operand" "v") ++ (match_operand:V2DF 1 "register_operand" "v"))) ++ (set (match_operand:V2DI 2 "register_operand" "=v") ++ (ge:V2DI (match_dup 0) (match_dup 1)))] ++ "TARGET_VX" ++ "vfchedbs\t%v2,%v0,%v1" ++ [(set_attr "op_type" "VRR")]) +--- gcc/config.gcc 2016-05-11 14:46:08.298981685 +0200 ++++ gcc/config.gcc 2016-05-11 17:17:32.000000000 +0200 +@@ -452,7 +452,7 @@ s390*-*-*) + cpu_type=s390 + need_64bit_hwint=yes + extra_options="${extra_options} fused-madd.opt" +- extra_headers="s390intrin.h htmintrin.h htmxlintrin.h" ++ extra_headers="s390intrin.h htmintrin.h htmxlintrin.h vecintrin.h" + ;; + # Note the 'l'; we need to be able to match e.g. "shle" or "shl". + sh[123456789lbe]*-*-* | sh-*-*) +@@ -2249,27 +2249,35 @@ rx-*-elf*) + s390-*-linux*) + default_gnu_indirect_function=yes + tm_file="s390/s390.h dbxelf.h elfos.h gnu-user.h linux.h glibc-stdint.h s390/linux.h" ++ c_target_objs="${c_target_objs} s390-c.o" ++ cxx_target_objs="${cxx_target_objs} s390-c.o" + if test x$enable_targets = xall; then + tmake_file="${tmake_file} s390/t-linux64" + fi ++ tmake_file="${tmake_file} s390/t-s390" + ;; + s390x-*-linux*) + default_gnu_indirect_function=yes + tm_file="s390/s390x.h s390/s390.h dbxelf.h elfos.h gnu-user.h linux.h glibc-stdint.h s390/linux.h" + tm_p_file=s390/s390-protos.h ++ c_target_objs="${c_target_objs} s390-c.o" ++ cxx_target_objs="${cxx_target_objs} s390-c.o" + md_file=s390/s390.md + extra_modes=s390/s390-modes.def + out_file=s390/s390.c +- tmake_file="${tmake_file} s390/t-linux64" ++ tmake_file="${tmake_file} s390/t-linux64 s390/t-s390" + ;; + s390x-ibm-tpf*) +- tm_file="s390/s390x.h s390/s390.h dbxelf.h elfos.h s390/tpf.h" +- tm_p_file=s390/s390-protos.h +- md_file=s390/s390.md +- extra_modes=s390/s390-modes.def +- out_file=s390/s390.c +- thread_file='tpf' ++ tm_file="s390/s390x.h s390/s390.h dbxelf.h elfos.h s390/tpf.h" ++ tm_p_file=s390/s390-protos.h ++ c_target_objs="${c_target_objs} s390-c.o" ++ cxx_target_objs="${cxx_target_objs} s390-c.o" ++ md_file=s390/s390.md ++ extra_modes=s390/s390-modes.def ++ out_file=s390/s390.c ++ thread_file='tpf' + extra_options="${extra_options} s390/tpf.opt" ++ tmake_file="${tmake_file} s390/t-s390" + ;; + score-*-elf) + gas=yes +@@ -3603,7 +3611,7 @@ case "${target}" in + for which in arch tune; do + eval "val=\$with_$which" + case ${val} in +- "" | g5 | g6 | z900 | z990 | z9-109 | z9-ec | z10 | z196 | zEC12) ++ "" | g5 | g6 | z900 | z990 | z9-109 | z9-ec | z10 | z196 | zEC12 | z13) + # OK + ;; + *) +--- gcc/configure 2016-05-11 14:46:08.719976035 +0200 ++++ gcc/configure 2016-05-11 19:41:14.975813805 +0200 +@@ -26000,6 +26000,42 @@ $as_echo "#define HAVE_LD_PERSONALITY_RE + + fi + ;; ++ s390*-*-*) ++ { $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for .gnu_attribute support" >&5 ++$as_echo_n "checking assembler for .gnu_attribute support... " >&6; } ++if test "${gcc_cv_as_s390_gnu_attribute+set}" = set; then : ++ $as_echo_n "(cached) " >&6 ++else ++ gcc_cv_as_s390_gnu_attribute=no ++ if test $in_tree_gas = yes; then ++ if test $gcc_cv_gas_vers -ge `expr \( \( 2 \* 1000 \) + 18 \) \* 1000 + 0` ++ then gcc_cv_as_s390_gnu_attribute=yes ++fi ++ elif test x$gcc_cv_as != x; then ++ $as_echo '.gnu_attribute 8,1' > conftest.s ++ if { ac_try='$gcc_cv_as $gcc_cv_as_flags -o conftest.o conftest.s >&5' ++ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 ++ (eval $ac_try) 2>&5 ++ ac_status=$? ++ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 ++ test $ac_status = 0; }; } ++ then ++ gcc_cv_as_s390_gnu_attribute=yes ++ else ++ echo "configure: failed program was" >&5 ++ cat conftest.s >&5 ++ fi ++ rm -f conftest.o conftest.s ++ fi ++fi ++{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gcc_cv_as_s390_gnu_attribute" >&5 ++$as_echo "$gcc_cv_as_s390_gnu_attribute" >&6; } ++if test $gcc_cv_as_s390_gnu_attribute = yes; then ++ ++$as_echo "#define HAVE_AS_GNU_ATTRIBUTE 1" >>confdefs.h ++ ++fi ++ ;; + esac + + # Mips and HP-UX need the GNU assembler. +--- gcc/configure.ac 2015-06-18 16:32:50.000000000 +0200 ++++ gcc/configure.ac 2016-05-11 19:34:04.507631160 +0200 +@@ -4207,6 +4207,13 @@ EOF + pointers into PC-relative form.]) + fi + ;; ++ s390*-*-*) ++ gcc_GAS_CHECK_FEATURE([.gnu_attribute support], ++ gcc_cv_as_s390_gnu_attribute, [2,18,0],, ++ [.gnu_attribute 8,1],, ++ [AC_DEFINE(HAVE_AS_GNU_ATTRIBUTE, 1, ++ [Define if your assembler supports .gnu_attribute.])]) ++ ;; + esac + + # Mips and HP-UX need the GNU assembler. +--- gcc/doc/invoke.texi 2016-05-11 14:46:08.615977431 +0200 ++++ gcc/doc/invoke.texi 2016-05-11 19:27:23.065121001 +0200 +@@ -885,6 +885,7 @@ See RS/6000 and PowerPC Options. + -mbackchain -mno-backchain -mpacked-stack -mno-packed-stack @gol + -msmall-exec -mno-small-exec -mmvcle -mno-mvcle @gol + -m64 -m31 -mdebug -mno-debug -mesa -mzarch @gol ++-mhtm -mvx -mzvector @gol + -mtpf-trace -mno-tpf-trace -mfused-madd -mno-fused-madd @gol + -mwarn-framesize -mwarn-dynamicstack -mstack-size -mstack-guard @gol + -mhotpatch=@var{halfwords},@var{halfwords}} +@@ -18596,6 +18597,46 @@ When generating code compliant to the GN + the default is @option{-mesa}. When generating code compliant + to the GNU/Linux for zSeries ABI, the default is @option{-mzarch}. + ++@item -mhtm ++@itemx -mno-htm ++@opindex mhtm ++@opindex mno-htm ++The @option{-mhtm} option enables a set of builtins making use of ++instructions available with the transactional execution facility ++introduced with the IBM zEnterprise EC12 machine generation ++@ref{S/390 System z Built-in Functions}. ++@option{-mhtm} is enabled by default when using @option{-march=zEC12}. ++ ++@item -mvx ++@itemx -mno-vx ++@opindex mvx ++@opindex mno-vx ++When @option{-mvx} is specified, generate code using the instructions ++available with the vector extension facility introduced with the IBM ++z13 machine generation. ++This option changes the ABI for some vector type values with regard to ++alignment and calling conventions. In case vector type values are ++being used in an ABI-relevant context a GAS @samp{.gnu_attribute} ++command will be added to mark the resulting binary with the ABI used. ++@option{-mvx} is enabled by default when using @option{-march=z13}. ++ ++@item -mzvector ++@itemx -mno-zvector ++@opindex mzvector ++@opindex mno-zvector ++The @option{-mzvector} option enables vector language extensions and ++builtins using instructions available with the vector extension ++facility introduced with the IBM z13 machine generation. ++This option adds support for @samp{vector} to be used as a keyword to ++define vector type variables and arguments. @samp{vector} is only ++available when GNU extensions are enabled. It will not be expanded ++when requesting strict standard compliance e.g. with @option{-std=c99}. ++In addition to the GCC low-level builtins @option{-mzvector} enables ++a set of builtins added for compatibility with Altivec-style ++implementations like Power and Cell. In order to make use of these ++builtins the header file @file{vecintrin.h} needs to be included. ++@option{-mzvector} is disabled by default. ++ + @item -mmvcle + @itemx -mno-mvcle + @opindex mmvcle +@@ -18617,7 +18658,8 @@ The default is to not print debug inform + Generate code that runs on @var{cpu-type}, which is the name of a system + representing a certain processor type. Possible values for + @var{cpu-type} are @samp{g5}, @samp{g6}, @samp{z900}, @samp{z990}, +-@samp{z9-109}, @samp{z9-ec}, @samp{z10}, @samp{z196}, and @samp{zEC12}. ++@samp{z9-109}, @samp{z9-ec}, @samp{z10}, @samp{z196}, @samp{zEC12}, ++and @samp{z13}. + When generating code using the instructions available on z/Architecture, + the default is @option{-march=z900}. Otherwise, the default is + @option{-march=g5}. +--- gcc/doc/tm.texi 2016-05-11 14:46:08.216982786 +0200 ++++ gcc/doc/tm.texi 2016-05-11 15:41:36.000000000 +0200 +@@ -8983,6 +8983,13 @@ register in Dwarf. Otherwise, this hook + If not defined, the default is to return @code{NULL_RTX}. + @end deftypefn + ++@deftypefn {Target Hook} {enum machine_mode} TARGET_DWARF_FRAME_REG_MODE (int @var{regno}) ++Given a register, this hook should return the mode which the ++corresponding Dwarf frame register should have. This is normally ++used to return a smaller mode than the raw mode to prevent call ++clobbered parts of a register altering the frame register size ++@end deftypefn ++ + @deftypefn {Target Hook} void TARGET_INIT_DWARF_REG_SIZES_EXTRA (tree @var{address}) + If some registers are represented in Dwarf-2 unwind information in + multiple pieces, define this hook to fill in information about the +--- gcc/doc/tm.texi.in 2016-05-11 14:46:08.213982826 +0200 ++++ gcc/doc/tm.texi.in 2016-05-11 15:41:36.000000000 +0200 +@@ -8854,6 +8854,8 @@ register in Dwarf. Otherwise, this hook + If not defined, the default is to return @code{NULL_RTX}. + @end deftypefn + ++@hook TARGET_DWARF_FRAME_REG_MODE ++ + @hook TARGET_INIT_DWARF_REG_SIZES_EXTRA + If some registers are represented in Dwarf-2 unwind information in + multiple pieces, define this hook to fill in information about the +--- gcc/dwarf2cfi.c 2013-01-21 16:10:46.000000000 +0100 ++++ gcc/dwarf2cfi.c 2016-05-11 15:41:36.000000000 +0200 +@@ -244,11 +244,9 @@ expand_builtin_init_dwarf_reg_sizes (tre + if (rnum < DWARF_FRAME_REGISTERS) + { + HOST_WIDE_INT offset = rnum * GET_MODE_SIZE (mode); +- enum machine_mode save_mode = reg_raw_mode[i]; + HOST_WIDE_INT size; ++ enum machine_mode save_mode = targetm.dwarf_frame_reg_mode (i); + +- if (HARD_REGNO_CALL_PART_CLOBBERED (i, save_mode)) +- save_mode = choose_hard_reg_mode (i, 1, true); + if (dnum == DWARF_FRAME_RETURN_COLUMN) + { + if (save_mode == VOIDmode) +--- gcc/genattrtab.c 2013-01-21 16:08:23.000000000 +0100 ++++ gcc/genattrtab.c 2016-05-11 17:32:29.000000000 +0200 +@@ -229,7 +229,7 @@ static int *insn_n_alternatives; + /* Stores, for each insn code, a bitmap that has bits on for each possible + alternative. */ + +-static int *insn_alternatives; ++static uint64_t *insn_alternatives; + + /* Used to simplify expressions. */ + +@@ -257,7 +257,7 @@ static char *attr_printf (unsi + ATTRIBUTE_PRINTF_2; + static rtx make_numeric_value (int); + static struct attr_desc *find_attr (const char **, int); +-static rtx mk_attr_alt (int); ++static rtx mk_attr_alt (uint64_t); + static char *next_comma_elt (const char **); + static rtx insert_right_side (enum rtx_code, rtx, rtx, int, int); + static rtx copy_boolean (rtx); +@@ -771,7 +771,7 @@ check_attr_test (rtx exp, int is_const, + if (attr == NULL) + { + if (! strcmp (XSTR (exp, 0), "alternative")) +- return mk_attr_alt (1 << atoi (XSTR (exp, 1))); ++ return mk_attr_alt (((uint64_t) 1) << atoi (XSTR (exp, 1))); + else + fatal ("unknown attribute `%s' in EQ_ATTR", XSTR (exp, 0)); + } +@@ -817,7 +817,7 @@ check_attr_test (rtx exp, int is_const, + + name_ptr = XSTR (exp, 1); + while ((p = next_comma_elt (&name_ptr)) != NULL) +- set |= 1 << atoi (p); ++ set |= ((uint64_t) 1) << atoi (p); + + return mk_attr_alt (set); + } +@@ -1292,7 +1292,7 @@ static struct attr_value * + get_attr_value (rtx value, struct attr_desc *attr, int insn_code) + { + struct attr_value *av; +- int num_alt = 0; ++ uint64_t num_alt = 0; + + value = make_canonical (attr, value); + if (compares_alternatives_p (value)) +@@ -1934,7 +1934,7 @@ insert_right_side (enum rtx_code code, r + This routine is passed an expression and either AND or IOR. It returns a + bitmask indicating which alternatives are mentioned within EXP. */ + +-static int ++static uint64_t + compute_alternative_mask (rtx exp, enum rtx_code code) + { + const char *string; +@@ -1965,15 +1965,15 @@ compute_alternative_mask (rtx exp, enum + return 0; + + if (string[1] == 0) +- return 1 << (string[0] - '0'); +- return 1 << atoi (string); ++ return ((uint64_t) 1) << (string[0] - '0'); ++ return ((uint64_t) 1) << atoi (string); + } + + /* Given I, a single-bit mask, return RTX to compare the `alternative' + attribute with the value represented by that bit. */ + + static rtx +-make_alternative_compare (int mask) ++make_alternative_compare (uint64_t mask) + { + return mk_attr_alt (mask); + } +@@ -2472,7 +2472,7 @@ attr_alt_complement (rtx s) + in E. */ + + static rtx +-mk_attr_alt (int e) ++mk_attr_alt (uint64_t e) + { + rtx result = rtx_alloc (EQ_ATTR_ALT); + +@@ -2499,7 +2499,7 @@ simplify_test_exp (rtx exp, int insn_cod + struct attr_value *av; + struct insn_ent *ie; + struct attr_value_list *iv; +- int i; ++ uint64_t i; + rtx newexp = exp; + bool left_alt, right_alt; + +@@ -2779,7 +2779,7 @@ simplify_test_exp (rtx exp, int insn_cod + case EQ_ATTR: + if (XSTR (exp, 0) == alternative_name) + { +- newexp = mk_attr_alt (1 << atoi (XSTR (exp, 1))); ++ newexp = mk_attr_alt (((uint64_t) 1) << atoi (XSTR (exp, 1))); + break; + } + +@@ -5240,10 +5240,11 @@ main (int argc, char **argv) + expand_delays (); + + /* Make `insn_alternatives'. */ +- insn_alternatives = oballocvec (int, insn_code_number); ++ insn_alternatives = oballocvec (uint64_t, insn_code_number); + for (id = defs; id; id = id->next) + if (id->insn_code >= 0) +- insn_alternatives[id->insn_code] = (1 << id->num_alternatives) - 1; ++ insn_alternatives[id->insn_code] ++ = (((uint64_t) 1) << id->num_alternatives) - 1; + + /* Make `insn_n_alternatives'. */ + insn_n_alternatives = oballocvec (int, insn_code_number); +--- gcc/optabs.c 2014-05-15 10:46:12.000000000 +0200 ++++ gcc/optabs.c 2016-05-11 15:53:11.000000000 +0200 +@@ -6659,11 +6659,11 @@ expand_vec_perm (enum machine_mode mode, + enum machine_mode selmode = GET_MODE (sel); + if (u == 2) + sel = expand_simple_binop (selmode, PLUS, sel, sel, +- sel, 0, OPTAB_DIRECT); ++ NULL, 0, OPTAB_DIRECT); + else + sel = expand_simple_binop (selmode, ASHIFT, sel, + GEN_INT (exact_log2 (u)), +- sel, 0, OPTAB_DIRECT); ++ NULL, 0, OPTAB_DIRECT); + gcc_assert (sel != NULL); + + /* Broadcast the low byte each element into each of its bytes. */ +--- gcc/recog.h 2013-09-09 19:16:08.000000000 +0200 ++++ gcc/recog.h 2016-05-11 15:52:48.000000000 +0200 +@@ -21,7 +21,7 @@ along with GCC; see the file COPYING3. + #define GCC_RECOG_H + + /* Random number that should be large enough for all purposes. */ +-#define MAX_RECOG_ALTERNATIVES 30 ++#define MAX_RECOG_ALTERNATIVES 35 + + /* Types of operands. */ + enum op_type { +--- gcc/target.def 2013-03-04 12:46:23.000000000 +0100 ++++ gcc/target.def 2016-05-11 15:41:36.000000000 +0200 +@@ -1834,6 +1834,17 @@ DEFHOOK + rtx, (rtx reg), + hook_rtx_rtx_null) + ++/* Given a register return the mode of the corresponding DWARF frame ++ register. */ ++DEFHOOK ++(dwarf_frame_reg_mode, ++ "Given a register, this hook should return the mode which the\n\ ++corresponding Dwarf frame register should have. This is normally\n\ ++used to return a smaller mode than the raw mode to prevent call\n\ ++clobbered parts of a register altering the frame register size", ++ enum machine_mode, (int regno), ++ default_dwarf_frame_reg_mode) ++ + /* If expand_builtin_init_dwarf_reg_sizes needs to fill in table + entries not corresponding directly to registers below + FIRST_PSEUDO_REGISTER, this hook should generate the necessary +--- gcc/targhooks.c 2013-01-21 16:02:59.000000000 +0100 ++++ gcc/targhooks.c 2016-05-11 15:41:36.000000000 +0200 +@@ -1411,6 +1411,19 @@ default_debug_unwind_info (void) + return UI_NONE; + } + ++/* Determine the correct mode for a Dwarf frame register that represents ++ register REGNO. */ ++ ++enum machine_mode ++default_dwarf_frame_reg_mode (int regno) ++{ ++ enum machine_mode save_mode = reg_raw_mode[regno]; ++ ++ if (HARD_REGNO_CALL_PART_CLOBBERED (regno, save_mode)) ++ save_mode = choose_hard_reg_mode (regno, 1, true); ++ return save_mode; ++} ++ + /* To be used by targets where reg_raw_mode doesn't return the right + mode for registers used in apply_builtin_return and apply_builtin_arg. */ + +--- gcc/targhooks.h 2013-01-21 16:03:00.000000000 +0100 ++++ gcc/targhooks.h 2016-05-11 15:42:21.000000000 +0200 +@@ -186,6 +186,7 @@ extern int default_label_align_max_skip + extern int default_jump_align_max_skip (rtx); + extern section * default_function_section(tree decl, enum node_frequency freq, + bool startup, bool exit); ++extern enum machine_mode default_dwarf_frame_reg_mode (int); + extern enum machine_mode default_get_reg_raw_mode(int); + + extern void *default_get_pch_validity (size_t *); +--- gcc/testsuite/gcc.dg/tree-ssa/gen-vect-11b.c 2012-12-13 11:28:46.000000000 +0100 ++++ gcc/testsuite/gcc.dg/tree-ssa/gen-vect-11b.c 2016-05-11 17:30:16.000000000 +0200 +@@ -1,5 +1,6 @@ + /* { dg-do run { target vect_cmdline_needed } } */ + /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details" } */ ++/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details -mno-vx" { target { s390*-*-* } } } */ + /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details -mno-sse" { target { i?86-*-* x86_64-*-* } } } */ + + #include +--- gcc/testsuite/gcc.dg/tree-ssa/gen-vect-11c.c 2012-12-13 11:28:46.000000000 +0100 ++++ gcc/testsuite/gcc.dg/tree-ssa/gen-vect-11c.c 2016-05-11 17:30:16.000000000 +0200 +@@ -1,5 +1,6 @@ + /* { dg-do run { target vect_cmdline_needed } } */ + /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details" } */ ++/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details -mno-vx" { target { s390*-*-* } } } */ + /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details -mno-sse" { target { i?86-*-* x86_64-*-* } } } */ + + #include +--- gcc/testsuite/gcc.target/s390/htm-builtins-z13-1.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/htm-builtins-z13-1.c 2016-05-11 17:34:03.000000000 +0200 +@@ -0,0 +1,34 @@ ++/* Verify if VRs are saved and restored. */ ++ ++/* { dg-do run } */ ++/* { dg-require-effective-target vector } */ ++/* { dg-options "-O3 -march=z13 -mzarch" } */ ++ ++typedef int __attribute__((vector_size(16))) v4si; ++ ++v4si __attribute__((noinline)) ++foo (v4si a) ++{ ++ a += (v4si){ 1, 1, 1, 1 }; ++ if (__builtin_tbegin (0) == 0) ++ { ++ a += (v4si){ 1, 1, 1, 1 }; ++ __builtin_tabort (256); ++ __builtin_tend (); ++ } ++ else ++ a -= (v4si){ 1, 1, 1, 1 }; ++ ++ return a; ++} ++ ++int ++main () ++{ ++ v4si a = (v4si){ 0, 0, 0, 0 }; ++ ++ a = foo (a); ++ ++ if (a[0] != 0) ++ __builtin_abort (); ++} +--- gcc/testsuite/gcc.target/s390/s390.exp 2015-06-18 16:32:12.000000000 +0200 ++++ gcc/testsuite/gcc.target/s390/s390.exp 2016-05-11 17:12:20.000000000 +0200 +@@ -37,6 +37,21 @@ proc check_effective_target_htm { } { + }] "-march=zEC12 -mzarch" ] } { return 0 } else { return 1 } + } + ++# Return 1 if vector (va - vector add) instructions are understood by ++# the assembler and can be executed. This also covers checking for ++# the VX kernel feature. A kernel without that feature does not ++# enable the vector facility and the following check will die with a ++# signal. ++proc check_effective_target_vector { } { ++ if { ![check_runtime s390_check_vector [subst { ++ int main (void) ++ { ++ asm ("va %%v24, %%v26, %%v28, 3" : : : "v24", "v26", "v28"); ++ return 0; ++ } ++ }] "-march=z13 -mzarch" ] } { return 0 } else { return 1 } ++} ++ + # If a testcase doesn't have special options, use these. + global DEFAULT_CFLAGS + if ![info exists DEFAULT_CFLAGS] then { +@@ -59,5 +74,8 @@ set-torture-options $HOTPATCH_TEST_OPTS + gcc-dg-runtest [lsort [glob -nocomplain $hotpatch_tests]] $DEFAULT_CFLAGS + torture-finish + ++dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*vector*/*.\[cS\]]] \ ++ "" $DEFAULT_CFLAGS ++ + # All done. + dg-finish +--- gcc/testsuite/gcc.target/s390/vector/int128-1.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/vector/int128-1.c 2016-05-11 18:10:56.000000000 +0200 +@@ -0,0 +1,47 @@ ++/* Check that vaq/vsq are used for int128 operations. */ ++ ++/* { dg-do compile { target { lp64 } } } */ ++/* { dg-options "-O3 -mzarch -march=z13" } */ ++ ++ ++const __int128 c = (__int128)0x0123456789abcd55 + ((__int128)7 << 64); ++ ++ ++__int128 ++addreg(__int128 a, __int128 b) ++{ ++ return a + b; ++} ++ ++__int128 ++addconst(__int128 a) ++{ ++ return a + c; ++} ++ ++__int128 ++addmem(__int128 *a, __int128_t *b) ++{ ++ return *a + *b; ++} ++ ++__int128 ++subreg(__int128 a, __int128 b) ++{ ++ return a - b; ++} ++ ++__int128 ++subconst(__int128 a) ++{ ++ return a - c; /* This becomes vaq as well. */ ++} ++ ++__int128 ++submem(__int128 *a, __int128_t *b) ++{ ++ return *a - *b; ++} ++ ++/* { dg-final { scan-assembler-times "vaq" 4 } } */ ++/* { dg-final { scan-assembler-times "vsq" 2 } } */ +--- gcc/testsuite/gcc.target/s390/vector/stpcpy-1.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/vector/stpcpy-1.c 2016-05-11 18:11:22.000000000 +0200 +@@ -0,0 +1,100 @@ ++/* The z13 stpcpy implementation plays some alignment tricks for good ++ performance. This test tries to make sure it works correctly and ++ does not access bytes beyond the source and destination ++ strings. */ ++ ++/* { dg-do run } */ ++/* { dg-require-effective-target vector } */ ++/* { dg-options "-O3 -mzarch -march=z13" } */ ++ ++#include ++#include ++ ++#define PAGE_SIZE 4096 ++ ++struct { ++ char unused[PAGE_SIZE - 32]; ++ char m32[15]; /* page bndry - 32 */ ++ char m17[1]; ++ char m16[1]; ++ char m15[14]; ++ char m1[1]; ++ char next_page[PAGE_SIZE]; ++} s, d __attribute__((aligned(PAGE_SIZE))); ++ ++char *__attribute__((noinline)) ++my_stpcpy(char *dest, const char *src) ++{ ++ return __builtin_stpcpy (dest, src); ++} ++ ++void __attribute__ ((noinline)) ++check (char *dest, char *src, size_t len) ++{ ++ char *result; ++ ++ result = my_stpcpy (dest, src); ++ if (result != dest + len) ++ __builtin_abort (); ++ if (__builtin_memcmp (src, dest, len) != 0) ++ __builtin_abort (); ++} ++ ++int ++main () ++{ ++ char *src[5] = { s.m32, s.m17, s.m16, s.m15, s.m1 }; ++ char *dst[5] = { d.m32, d.m17, d.m16, d.m15, d.m1 }; ++ int len[8] = { 33, 32, 31, 17, 16, 15, 1, 0 }; ++ int i, j, k; ++ char backup; ++ ++ for (i = 0; i < sizeof (s); i++) ++ ((char*)&s)[i] = i % 26 + 97; ++ ++ for (i = 0; i < 5; i++) ++ for (j = 0; j < 5; j++) ++ for (k = 0; k < 8; k++) ++ { ++ backup = src[j][len[k]]; ++ src[j][len[k]] = 0; ++ __builtin_memset (&d, 0, sizeof (d)); ++ check (dst[i], src[j], len[k]); ++ src[j][len[k]] = backup; ++ } ++ ++ /* Make all source strings end before the page boundary. */ ++ backup = s.m1[0]; ++ s.m1[0] = 0; ++ ++ if (mprotect (&s.next_page, PAGE_SIZE, PROT_NONE) == -1) ++ perror ("mprotect src"); ++ ++ for (i = 0; i < 5; i++) ++ for (j = 0; j < 5; j++) ++ check (dst[i], src[j], ++ PAGE_SIZE - ((unsigned long)src[j] & ((1UL << 12) - 1)) - 1); ++ ++ if (mprotect (&s.next_page, PAGE_SIZE, PROT_READ | PROT_WRITE) == -1) ++ perror ("mprotect src"); ++ ++ s.m1[0] = backup; ++ ++ if (mprotect (&d.next_page, PAGE_SIZE, PROT_NONE) == -1) ++ perror ("mprotect dst"); ++ ++ for (i = 0; i < 5; i++) ++ for (j = 0; j < 5; j++) ++ { ++ int len = PAGE_SIZE - ((unsigned long)dst[i] & ((1UL << 12) - 1)) - 1; ++ char backup = src[j][len]; ++ ++ src[j][len] = 0; ++ __builtin_memset (&d, 0, ++ (unsigned long)&d.next_page - (unsigned long)&d); ++ check (dst[i], src[j], len); ++ src[j][len] = backup; ++ } ++ ++ return 0; ++} +--- gcc/testsuite/gcc.target/s390/vector/vec-abi-1.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/vector/vec-abi-1.c 2016-05-11 17:32:39.000000000 +0200 +@@ -0,0 +1,18 @@ ++/* Check calling convention in the vector ABI. */ ++ ++/* { dg-do compile { target { s390*-*-* } } } */ ++/* { dg-options "-O3 -mzarch -march=z13" } */ ++ ++/* Make sure the last argument is fetched from the argument overflow area. */ ++/* { dg-final { scan-assembler "vl\t%v\[0-9\]*,160\\(%r15\\)" { target lp64 } } } */ ++/* { dg-final { scan-assembler "vl\t%v\[0-9\]*,96\\(%r15\\)" { target ilp32 } } } */ ++/* { dg-final { scan-assembler "gnu_attribute 8, 2" } } */ ++ ++typedef double v2df __attribute__((vector_size(16))); ++ ++v2df ++add (v2df a, v2df b, v2df c, v2df d, ++ v2df e, v2df f, v2df g, v2df h, v2df i) ++{ ++ return a + b + c + d + e + f + g + h + i; ++} +--- gcc/testsuite/gcc.target/s390/vector/vec-abi-2.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/vector/vec-abi-2.c 2016-05-11 17:12:20.000000000 +0200 +@@ -0,0 +1,15 @@ ++/* Check calling convention in the vector ABI. */ ++ ++/* { dg-do compile { target { s390*-*-* } } } */ ++/* { dg-options "-O3 -mzarch -march=z13" } */ ++ ++/* This needs to be v24 = v24 * v26 + v28 */ ++/* { dg-final { scan-assembler "vfmadb\t%v24,%v24,%v26,%v28" } } */ ++ ++typedef double v2df __attribute__((vector_size(16))); ++ ++v2df ++madd (v2df a, v2df b, v2df c) ++{ ++ return a * b + c; ++} +--- gcc/testsuite/gcc.target/s390/vector/vec-abi-3.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/vector/vec-abi-3.c 2016-05-11 17:12:20.000000000 +0200 +@@ -0,0 +1,101 @@ ++/* Check calling convention in the vector ABI regarding vector like structs. */ ++ ++/* { dg-do compile { target { s390*-*-* } } } */ ++/* { dg-options "-O3 -mzarch -march=z13" } */ ++ ++/* addA */ ++/* { dg-final { scan-assembler-times "vfadb\t%v24,%v24,%v26" 1 } } */ ++ ++/* addB and addE*/ ++/* { dg-final { scan-assembler-times "vah\t%v24,%v\[0-9\]*,%v\[0-9\]*" 2 } } */ ++ ++/* addC */ ++/* { dg-final { scan-assembler-times "vag\t%v24,%v\[0-9\]*,%v\[0-9\]*" 1 } } */ ++ ++/* addB and addC are expected to read the arguments via pointers in r2 and r3 */ ++/* { dg-final { scan-assembler-times "vl\t%v\[0-9\]*,0\\(%r2\\)" 2 } } */ ++/* { dg-final { scan-assembler-times "vl\t%v\[0-9\]*,0\\(%r3\\)" 2 } } */ ++ ++/* addD */ ++/* { dg-final { scan-assembler-times "vaf\t%v24,%v24,%v26" 1 } } */ ++ ++/* addE */ ++/* { dg-final { scan-assembler-times "vah\t%v24,%v24,%v26" 1 } } */ ++ ++/* addF */ ++/* { dg-final { scan-assembler-times "vab\t%v24,%v\[0-9\]*,%v\[0-9\]*" 1 } } */ ++/* { dg-final { scan-assembler-times "srlg\t%r\[0-9\]*,%r2,32" 1 { target lp64 } } } */ ++/* { dg-final { scan-assembler-times "srlg\t%r\[0-9\]*,%r3,32" 1 { target lp64 } } } */ ++/* { dg-final { scan-assembler-times "llgfr\t%.*,%r2" 1 { target { ! lp64 } } } } */ ++/* { dg-final { scan-assembler-times "llgfr\t%.*,%r4" 1 { target { ! lp64 } } } } */ ++ ++ ++typedef double v2df __attribute__((vector_size(16))); ++typedef long long v2di __attribute__((vector_size(16))); ++typedef int v4si __attribute__((vector_size(16))); ++typedef short v8hi __attribute__((vector_size(16))); ++ ++typedef short v2hi __attribute__((vector_size(4))); ++typedef char v4qi __attribute__((vector_size(4))); ++ ++/* Vector like structs are passed in VRs. */ ++struct A { v2df a; }; ++ ++v2df ++addA (struct A a, struct A b) ++{ ++ return a.a + b.a; ++} ++ ++/* Only single element vectors qualify as vector type parms. This one ++ is passed as a struct. Since it is bigger than 8 bytes it is passed ++ on the stack with the reference being put into r2/r3. */ ++struct B { v8hi a; char b;}; ++ ++v8hi ++addB (struct B a, struct B b) ++{ ++ return a.a + b.a; ++} ++ ++/* The resulting struct is bigger than 16 bytes and therefore passed ++ on the stack with the references residing in r2/r3. */ ++struct C { v2di __attribute__((aligned(32))) a; }; ++ ++v2di ++addC (struct C a, struct C b) ++{ ++ return a.a + b.a; ++} ++ ++/* The attribute here does not have any effect. So this struct stays ++ vector like and hence is passed in a VR. */ ++struct D { v4si __attribute__((aligned(16))) a; }; ++ ++v4si ++addD (struct D a, struct D b) ++{ ++ return a.a + b.a; ++} ++ ++ ++/* Smaller vectors are passed in vector registers. This also applies ++ for vector like structs. */ ++struct E { v2hi a; }; ++ ++v2hi ++addE (struct E a, struct E b) ++{ ++ return a.a + b.a; ++} ++ ++/* This struct is not passed in VRs because of padding. But since it ++ fits in a GPR and has a power of two size. It is passed in ++ GPRs. */ ++struct F { v4qi __attribute__((aligned(8))) a; }; ++ ++v4qi ++addF (struct F a, struct F b) ++{ ++ return a.a + b.a; ++} +--- gcc/testsuite/gcc.target/s390/vector/vec-abi-4.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/vector/vec-abi-4.c 2016-05-11 17:12:20.000000000 +0200 +@@ -0,0 +1,19 @@ ++/* Check calling convention in the vector ABI. Smaller vector need to ++ be placed left-justified in the stack slot. */ ++ ++/* { dg-do compile { target { s390*-*-* } } } */ ++/* { dg-options "-O3 -mzarch -march=z13" } */ ++ ++/* { dg-final { scan-assembler-times "lde\t%.*,160\\\(%r15\\\)" 1 { target lp64 } } } */ ++/* { dg-final { scan-assembler-times "lde\t%.*,168\\\(%r15\\\)" 1 { target lp64 } } } */ ++/* { dg-final { scan-assembler-times "lde\t%.*,96\\\(%r15\\\)" 1 { target { ! lp64 } } } } */ ++/* { dg-final { scan-assembler-times "lde\t%.*,100\\\(%r15\\\)" 1 { target { ! lp64 } } } } */ ++ ++typedef char __attribute__((vector_size(4))) v4qi; ++ ++v4qi ++foo (v4qi a, v4qi b, v4qi c, v4qi d, v4qi e, ++ v4qi f, v4qi g, v4qi h, v4qi i, v4qi j) ++{ ++ return (a + b + c + d + e + f + g + h + i + j); ++} +--- gcc/testsuite/gcc.target/s390/vector/vec-abi-align-1.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/vector/vec-abi-align-1.c 2016-05-11 17:12:20.000000000 +0200 +@@ -0,0 +1,48 @@ ++/* Check alignment convention in the vector ABI. */ ++ ++/* { dg-do compile { target { s390*-*-* } } } */ ++/* { dg-options "-O3 -mzarch -march=z13" } */ ++ ++#include ++ ++/* Vector types get an 8 byte alignment. */ ++typedef double v2df __attribute__((vector_size(16))); ++typedef struct ++{ ++ char a; ++ v2df b; ++} A; ++char c1[offsetof (A, b) == 8 ? 0 : -1]; ++ ++/* Smaller vector allow for smaller alignments. */ ++typedef char v4qi __attribute__((vector_size(4))); ++typedef struct ++{ ++ char a; ++ v4qi b; ++} B; ++char c2[offsetof (B, b) == 4 ? 0 : -1]; ++ ++ ++typedef double v4df __attribute__((vector_size(32))); ++typedef struct ++{ ++ char a; ++ v4df b; ++} C; ++char c3[offsetof (C, b) == 8 ? 0 : -1]; ++ ++/* However, we allow the programmer to chose a bigger alignment. */ ++typedef struct ++{ ++ char a; ++ v2df b __attribute__((aligned(16))); ++} D; ++char c4[offsetof (D, b) == 16 ? 0 : -1]; ++ ++typedef struct ++{ ++ char a; ++ v2df b; ++} __attribute__((packed)) E; ++char c5[offsetof (E, b) == 1 ? 0 : -1]; +--- gcc/testsuite/gcc.target/s390/vector/vec-abi-attr-1.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/vector/vec-abi-attr-1.c 2016-05-11 17:32:39.000000000 +0200 +@@ -0,0 +1,18 @@ ++/* Check calling convention in the vector ABI. */ ++ ++/* { dg-do compile { target { s390*-*-* } } } */ ++/* { dg-options "-O3 -mzarch -march=z13 -mno-vx" } */ ++ ++/* The function passes arguments whose calling conventions change with ++ -mvx/-mno-vx. In that case GCC has to emit the ABI attribute to ++ allow GDB and Binutils to detect this. */ ++/* { dg-final { scan-assembler "gnu_attribute 8, 1" } } */ ++ ++typedef double v2df __attribute__((vector_size(16))); ++ ++v2df ++add (v2df a, v2df b, v2df c, v2df d, ++ v2df e, v2df f, v2df g, v2df h, v2df i) ++{ ++ return a + b + c + d + e + f + g + h + i; ++} +--- gcc/testsuite/gcc.target/s390/vector/vec-abi-attr-2.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/vector/vec-abi-attr-2.c 2016-05-11 17:32:39.000000000 +0200 +@@ -0,0 +1,53 @@ ++/* Check calling convention in the vector ABI. */ ++ ++/* { dg-do compile { target { s390*-*-* } } } */ ++/* { dg-options "-O3 -mzarch -march=z13" } */ ++ ++/* No abi attribute should be emitted when nothing relevant happened. */ ++/* { dg-final { scan-assembler-not "gnu_attribute" } } */ ++ ++#include ++ ++/* Local use is ok. */ ++ ++typedef int v4si __attribute__((vector_size(16))); ++ ++static ++v4si __attribute__((__noinline__)) ++foo (v4si a) ++{ ++ return a + (v4si){ 1, 2, 3, 4 }; ++} ++ ++int ++bar (int a) ++{ ++ return foo ((v4si){ 1, 1, 1, 1 })[1]; ++} ++ ++/* Big vector type only used as function argument and return value ++ without being a struct/union member. The alignment change is not ++ relevant here. */ ++ ++typedef double v4df __attribute__((vector_size(32))); ++ ++v4df ++add (v4df a, v4df b, v4df c, v4df d, ++ v4df e, v4df f, v4df g, v4df h, v4df i) ++{ ++ return a + b + c + d + e + f + g + h + i; ++} ++ ++double ++bar2 (int n, ...) ++{ ++ double ret; ++ v4df a; ++ va_list va; ++ ++ va_start (va, n); ++ ret = va_arg (va, v4df)[2]; ++ va_end (va); ++ ++ return ret; ++} +--- gcc/testsuite/gcc.target/s390/vector/vec-abi-attr-3.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/vector/vec-abi-attr-3.c 2016-05-11 17:32:39.000000000 +0200 +@@ -0,0 +1,18 @@ ++/* Check calling convention in the vector ABI. */ ++ ++/* { dg-do compile { target { s390*-*-* } } } */ ++/* { dg-options "-O3 -mzarch -march=z13" } */ ++ ++/* { dg-final { scan-assembler "gnu_attribute 8, 2" } } */ ++ ++typedef double v4df __attribute__((vector_size(32))); ++typedef struct { v4df a; } s; ++ ++s ++add (v4df a, v4df b, v4df c, v4df d, ++ v4df e, v4df f, v4df g, v4df h, v4df i) ++{ ++ s t; ++ t.a = a + b + c + d + e + f + g + h + i; ++ return t; ++} +--- gcc/testsuite/gcc.target/s390/vector/vec-abi-attr-4.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/vector/vec-abi-attr-4.c 2016-05-11 17:32:39.000000000 +0200 +@@ -0,0 +1,17 @@ ++/* Check calling convention in the vector ABI. */ ++ ++/* { dg-do compile { target { s390*-*-* } } } */ ++/* { dg-options "-O3 -mzarch -march=z13" } */ ++ ++/* { dg-final { scan-assembler "gnu_attribute 8, 2" } } */ ++ ++typedef int __attribute__((vector_size(16))) v4si; ++ ++extern void bar (v4si); ++ ++void ++foo (int a) ++{ ++ v4si b = (v4si){ a, a, a, a }; ++ bar (b); ++} +--- gcc/testsuite/gcc.target/s390/vector/vec-abi-attr-5.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/vector/vec-abi-attr-5.c 2016-05-11 17:32:39.000000000 +0200 +@@ -0,0 +1,19 @@ ++/* Check calling convention in the vector ABI. */ ++ ++/* { dg-do compile { target { s390*-*-* } } } */ ++/* { dg-options "-O3 -mzarch -march=z13" } */ ++ ++/* { dg-final { scan-assembler "gnu_attribute 8, 2" } } */ ++ ++#include ++ ++typedef int __attribute__((vector_size(16))) v4si; ++ ++extern void bar (int, ...); ++ ++void ++foo (int a) ++{ ++ v4si b = (v4si){ a, a, a, a }; ++ bar (1, b); ++} +--- gcc/testsuite/gcc.target/s390/vector/vec-abi-attr-6.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/vector/vec-abi-attr-6.c 2016-05-11 17:32:39.000000000 +0200 +@@ -0,0 +1,24 @@ ++/* Check calling convention in the vector ABI. */ ++ ++/* { dg-do compile { target { s390*-*-* } } } */ ++/* { dg-options "-O3 -mzarch -march=z13" } */ ++ ++/* { dg-final { scan-assembler "gnu_attribute 8, 2" } } */ ++ ++#include ++ ++typedef int __attribute__((vector_size(16))) v4si; ++ ++int ++bar (int n, ...) ++{ ++ int ret; ++ v4si a; ++ va_list va; ++ ++ va_start (va, n); ++ ret = va_arg (va, v4si)[2]; ++ va_end (va); ++ ++ return ret; ++} +--- gcc/testsuite/gcc.target/s390/vector/vec-abi-single-1.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/vector/vec-abi-single-1.c 2016-05-11 17:12:20.000000000 +0200 +@@ -0,0 +1,24 @@ ++/* Check calling convention in the vector ABI for single element vectors. */ ++ ++/* { dg-do compile { target { s390*-*-* } } } */ ++/* { dg-options "-O3 -mzarch -march=z13" } */ ++ ++/* { dg-final { scan-assembler-times "vlr\t%v24,%v26" 7 } } */ ++ ++typedef int __attribute__((vector_size(16))) v4si; ++ ++typedef char __attribute__((vector_size(1))) v1qi; ++typedef short int __attribute__((vector_size(2))) v1hi; ++typedef int __attribute__((vector_size(4))) v1si; ++typedef long long __attribute__((vector_size(8))) v1di; ++typedef float __attribute__((vector_size(4))) v1sf; ++typedef double __attribute__((vector_size(8))) v1df; ++typedef long double __attribute__((vector_size(16))) v1tf; ++ ++v1qi foo1 (v4si a, v1qi b) { return b; } ++v1hi foo2 (v4si a, v1hi b) { return b; } ++v1si foo3 (v4si a, v1si b) { return b; } ++v1di foo4 (v4si a, v1di b) { return b; } ++v1sf foo5 (v4si a, v1sf b) { return b; } ++v1df foo6 (v4si a, v1df b) { return b; } ++v1tf foo7 (v4si a, v1tf b) { return b; } +--- gcc/testsuite/gcc.target/s390/vector/vec-abi-single-2.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/vector/vec-abi-single-2.c 2016-05-11 17:12:20.000000000 +0200 +@@ -0,0 +1,12 @@ ++/* Check calling convention in the vector ABI for single element vectors. */ ++ ++/* { dg-do compile { target { lp64 } } } */ ++/* { dg-options "-O3 -mzarch -march=z13" } */ ++ ++/* { dg-final { scan-assembler-times "vlr\t%v24,%v26" 1 } } */ ++ ++typedef int __attribute__((vector_size(16))) v4si; ++ ++typedef __int128_t __attribute__((vector_size(16))) v1ti; ++ ++v1ti foo (v4si a, v1ti b) { return b; } +--- gcc/testsuite/gcc.target/s390/vector/vec-abi-struct-1.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/vector/vec-abi-struct-1.c 2016-05-11 17:12:20.000000000 +0200 +@@ -0,0 +1,37 @@ ++/* Check calling convention in the vector ABI. */ ++ ++/* { dg-do compile { target { s390*-*-* } } } */ ++/* { dg-options "-O3 -mzarch -march=z13" } */ ++ ++/* c.i and c.j are passed by reference since a struct with two ++ elements is no vector type argument. */ ++/* { dg-final { scan-assembler "ld\t%v\[0-9\]*,0\\(%r3\\)" } } */ ++/* { dg-final { scan-assembler "ld\t%v\[0-9\]*,8\\(%r3\\)" } } */ ++ ++/* just_v2si is passed in a vector reg if it as an incoming arg. ++ However, as return value it is passed via hidden first pointer ++ argument. */ ++/* { dg-final { scan-assembler ".*st.*\t%v\[0-9\]*,0\\(%r2\\)" } } */ ++ ++/* { dg-final { scan-assembler "gnu_attribute 8, 2" } } */ ++ ++typedef int __attribute__ ((vector_size(8))) v2si; ++ ++struct just_v2si ++{ ++ v2si i; ++}; ++ ++struct two_v2si ++{ ++ v2si i, j; ++}; ++ ++struct just_v2si ++add_structvecs (v2si a, struct just_v2si b, struct two_v2si c) ++{ ++ struct just_v2si res; ++ ++ res.i = a + b.i + c.i + c.j; ++ return res; ++} +--- gcc/testsuite/gcc.target/s390/vector/vec-abi-vararg-1.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/vector/vec-abi-vararg-1.c 2016-05-11 17:12:20.000000000 +0200 +@@ -0,0 +1,60 @@ ++/* Check calling convention with variable argument lists in the vector ++ ABI. */ ++ ++/* { dg-do run { target { s390*-*-* } } } */ ++/* { dg-require-effective-target vector } */ ++/* { dg-options "-O3 -mzarch -march=z13 --save-temps" } */ ++ ++/* Make sure arguments are fetched from the argument overflow area. */ ++/* { dg-final { scan-assembler "vl\t%v\[0-9\]*,352\\(%r15\\)" { target lp64 } } } */ ++/* { dg-final { scan-assembler "ld\t%v\[0-9\]*,368\\(%r15\\)" { target lp64 } } } */ ++/* { dg-final { scan-assembler "vl\t%v\[0-9\]*,376\\(%r15\\)" { target lp64 } } } */ ++/* { dg-final { scan-assembler "ld\t%v\[0-9\]*,392\\(%r15\\)" { target lp64 } } } */ ++ ++/* { dg-final { scan-assembler "vl\t%v\[0-9\]*,208\\(%r15\\)" { target ilp32 } } } */ ++/* { dg-final { scan-assembler "ld\t%v\[0-9\]*,224\\(%r15\\)" { target ilp32 } } } */ ++/* { dg-final { scan-assembler "vl\t%v\[0-9\]*,232\\(%r15\\)" { target ilp32 } } } */ ++/* { dg-final { scan-assembler "ld\t%v\[0-9\]*,248\\(%r15\\)" { target ilp32 } } } */ ++ ++/* { dg-final { cleanup-saved-temps } } */ ++ ++#include ++ ++extern void abort (void); ++ ++typedef long long v2di __attribute__((vector_size(16))); ++typedef int v2si __attribute__((vector_size(8))); ++ ++v2di __attribute__((noinline)) ++add (int a, ...) ++{ ++ int i; ++ va_list va; ++ v2di di_result = { 0, 0 }; ++ v2si si_result = (v2si){ 0, 0 }; ++ ++ va_start (va, a); ++ ++ di_result += va_arg (va, v2di); ++ si_result += va_arg (va, v2si); ++ di_result += va_arg (va, v2di); ++ si_result += va_arg (va, v2si); ++ ++ va_end (va); ++ ++ di_result[0] += si_result[0]; ++ di_result[1] += si_result[1]; ++ ++ return di_result; ++} ++ ++int ++main () ++{ ++ v2di r = add (4, (v2di){ 11, 21 }, (v2si){ 12, 22 }, (v2di){ 13, 23 }, (v2si){ 14, 24 }); ++ ++ if (r[0] != 50 || r[1] != 90) ++ abort (); ++ ++ return 0; ++} +--- gcc/testsuite/gcc.target/s390/vector/vec-abi-vararg-2.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/vector/vec-abi-vararg-2.c 2016-05-11 17:12:20.000000000 +0200 +@@ -0,0 +1,18 @@ ++/* Check calling convention in the vector ABI. */ ++ ++/* { dg-do compile { target { s390*-*-* } } } */ ++/* { dg-options "-O3 -mzarch -march=z13 -Wno-implicit-function-declaration" } */ ++ ++ ++typedef long v2di __attribute__((vector_size(16))); ++extern v2di foo1 (int, v2di); ++extern v2di foo2 (int, int); ++extern v2di foo3 (int, ...); ++ ++v2di bar1 (int a) { return foo2 (1, a); } ++v2di bar2 (int a) { return foo3 (1, a); } ++v2di bar3 (v2di a) { return foo1 (1, a); } ++v2di bar4 (v2di a) { return foo3 (1, a); } ++ ++int bar5 (int a) { return foo4 (1, a); } ++int bar6 (v2di a) { return foo4 (1, a); } /* { dg-error "Vector argument passed to unprototyped function" } */ +--- gcc/testsuite/gcc.target/s390/vector/vec-clobber-1.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/vector/vec-clobber-1.c 2016-05-11 17:12:20.000000000 +0200 +@@ -0,0 +1,38 @@ ++/* { dg-do run { target { s390*-*-* } } } */ ++/* { dg-require-effective-target vector } */ ++/* { dg-options "-O3 -mzarch -march=z13" } */ ++ ++/* For FP zero checks we use the ltdbr instruction. Since this is an ++ load and test it actually writes the FPR. Whenever an FPR gets ++ written the rest of the overlapping VR is clobbered. */ ++typedef double __attribute__((vector_size(16))) v2df; ++ ++v2df a = { 1.0, 2.0 }; ++ ++extern void abort (void); ++ ++void __attribute__((noinline)) ++foo (v2df a) ++{ ++ v2df b = { 1.0, 3.0 }; ++ ++ b -= a; ++ ++ /* Take away all the VRs not overlapping with FPRs. */ ++ asm volatile ("" : : : ++ "v16","v17","v18","v19", ++ "v20","v21","v22","v23", ++ "v24","v25","v26","v27", ++ "v28","v29","v30","v31"); ++ if (b[0] != 0.0) /* ltdbr */ ++ abort (); ++ if (b[1] != 1.0) ++ abort (); ++} ++ ++int ++main () ++{ ++ foo (a); ++ return 0; ++} +--- gcc/testsuite/gcc.target/s390/vector/vec-cmp-1.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/vector/vec-cmp-1.c 2016-05-11 17:12:20.000000000 +0200 +@@ -0,0 +1,45 @@ ++/* Check that the proper unsigned compare instructions are being generated. */ ++ ++/* { dg-do compile { target { s390*-*-* } } } */ ++/* { dg-options "-O3 -mzarch -march=z13" } */ ++ ++/* { dg-final { scan-assembler-times "vchlb" 1 } } */ ++/* { dg-final { scan-assembler-times "vchlh" 1 } } */ ++/* { dg-final { scan-assembler-times "vchlf" 1 } } */ ++/* { dg-final { scan-assembler-times "vchlg" 1 } } */ ++ ++typedef __attribute__((vector_size(16))) signed char v16qi; ++typedef __attribute__((vector_size(16))) unsigned char uv16qi; ++ ++typedef __attribute__((vector_size(16))) signed short v8hi; ++typedef __attribute__((vector_size(16))) unsigned short uv8hi; ++ ++typedef __attribute__((vector_size(16))) signed int v4si; ++typedef __attribute__((vector_size(16))) unsigned int uv4si; ++ ++typedef __attribute__((vector_size(16))) signed long long v2di; ++typedef __attribute__((vector_size(16))) unsigned long long uv2di; ++ ++v16qi ++f (uv16qi a, uv16qi b) ++{ ++ return a > b; ++} ++ ++v8hi ++g (uv8hi a, uv8hi b) ++{ ++ return a > b; ++} ++ ++v4si ++h (uv4si a, uv4si b) ++{ ++ return a > b; ++} ++ ++v2di ++i (uv2di a, uv2di b) ++{ ++ return a > b; ++} +--- gcc/testsuite/gcc.target/s390/vector/vec-cmp-2.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/vector/vec-cmp-2.c 2016-05-11 17:12:20.000000000 +0200 +@@ -0,0 +1,38 @@ ++/* Check that the proper signed compare instructions are being generated. */ ++ ++/* { dg-do compile { target { s390*-*-* } } } */ ++/* { dg-options "-O3 -mzarch -march=z13" } */ ++ ++/* { dg-final { scan-assembler-times "vchb" 1 } } */ ++/* { dg-final { scan-assembler-times "vchh" 1 } } */ ++/* { dg-final { scan-assembler-times "vchf" 1 } } */ ++/* { dg-final { scan-assembler-times "vchg" 1 } } */ ++ ++typedef __attribute__((vector_size(16))) signed char v16qi; ++typedef __attribute__((vector_size(16))) signed short v8hi; ++typedef __attribute__((vector_size(16))) signed int v4si; ++typedef __attribute__((vector_size(16))) signed long long v2di; ++ ++v16qi ++f (v16qi a, v16qi b) ++{ ++ return a > b; ++} ++ ++v8hi ++g (v8hi a, v8hi b) ++{ ++ return a > b; ++} ++ ++v4si ++h (v4si a, v4si b) ++{ ++ return a > b; ++} ++ ++v2di ++i (v2di a, v2di b) ++{ ++ return a > b; ++} +--- gcc/testsuite/gcc.target/s390/vector/vec-dbl-math-compile-1.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/vector/vec-dbl-math-compile-1.c 2016-05-11 17:12:20.000000000 +0200 +@@ -0,0 +1,48 @@ ++/* { dg-do compile { target { s390*-*-* } } } */ ++/* { dg-options "-O3 -mzarch -march=z13 --save-temps" } */ ++ ++typedef __attribute__((vector_size(16))) double v2df; ++ ++v2df ++adddbl (v2df a, v2df b) ++{ ++ return a + b; ++} ++/* { dg-final { scan-assembler-times "vfadb" 1 } } */ ++ ++v2df ++subdbl (v2df a, v2df b) ++{ ++ return a - b; ++} ++/* { dg-final { scan-assembler-times "vfsdb" 1 } } */ ++ ++v2df ++muldbl (v2df a, v2df b) ++{ ++ return a * b; ++} ++/* { dg-final { scan-assembler-times "vfmdb" 1 } } */ ++ ++v2df ++divdbl (v2df a, v2df b) ++{ ++ return a / b; ++} ++/* { dg-final { scan-assembler-times "vfd" 1 } } */ ++ ++v2df ++fmadbl (v2df a, v2df b, v2df c) ++{ ++ return a * b + c; ++} ++/* { dg-final { scan-assembler-times "vfma" 1 } } */ ++ ++v2df ++fmsdbl (v2df a, v2df b, v2df c) ++{ ++ return a * b - c; ++} ++/* { dg-final { scan-assembler-times "vfms" 1 } } */ ++ ++/* { dg-final { cleanup-saved-temps } } */ +--- gcc/testsuite/gcc.target/s390/vector/vec-genbytemask-1.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/vector/vec-genbytemask-1.c 2016-05-11 17:38:00.000000000 +0200 +@@ -0,0 +1,83 @@ ++/* { dg-do run } */ ++/* { dg-options "-O3 -mzarch -march=z13 --save-temps" } */ ++/* { dg-require-effective-target vector } */ ++/* { dg-require-effective-target int128 } */ ++ ++typedef unsigned char uv16qi __attribute__((vector_size(16))); ++typedef unsigned short uv8hi __attribute__((vector_size(16))); ++typedef unsigned int uv4si __attribute__((vector_size(16))); ++typedef unsigned long long uv2di __attribute__((vector_size(16))); ++typedef unsigned __int128 uv1ti __attribute__((vector_size(16))); ++ ++uv2di __attribute__((noinline)) ++foo1 () ++{ ++ return (uv2di){ 0xff00ff00ff00ff00, 0x00ff00ff00ff00ff }; ++} ++/* { dg-final { scan-assembler-times "vgbm\t%v24,43605" 1 } } */ ++ ++uv4si __attribute__((noinline)) ++foo2 () ++{ ++ return (uv4si){ 0xff0000ff, 0x0000ffff, 0xffff0000, 0x00ffff00 }; ++} ++/* { dg-final { scan-assembler-times "vgbm\t%v24,37830" 1 } } */ ++ ++uv8hi __attribute__((noinline)) ++foo3a () ++{ ++ return (uv8hi){ 0xff00, 0xff00, 0xff00, 0xff00, ++ 0xff00, 0xff00, 0xff00, 0xff00 }; ++} ++/* { dg-final { scan-assembler-times "vgbm\t%v24,43690" 1 } } */ ++ ++uv8hi __attribute__((noinline)) ++foo3b () ++{ ++ return (uv8hi){ 0x00ff, 0x00ff, 0x00ff, 0x00ff, ++ 0x00ff, 0x00ff, 0x00ff, 0x00ff }; ++} ++/* { dg-final { scan-assembler-times "vgbm\t%v24,21845" 1 } } */ ++ ++uv16qi __attribute__((noinline)) ++foo4 () ++{ ++ return (uv16qi){ 0xff, 0xff, 0xff, 0xff, ++ 0, 0, 0, 0, ++ 0xff, 0, 0xff, 0, ++ 0, 0xff, 0, 0xff }; ++} ++ ++uv1ti __attribute__((noinline)) ++foo5 () ++{ ++ return (uv1ti){ 0xff00ff00ff00ff00ULL }; ++} ++ ++/* { dg-final { scan-assembler-times "vgbm\t%v24,61605" 1 } } */ ++ ++int ++main () ++{ ++ if (foo1()[1] != 0x00ff00ff00ff00ffULL) ++ __builtin_abort (); ++ ++ if (foo2()[1] != 0x0000ffff) ++ __builtin_abort (); ++ ++ if (foo3a()[1] != 0xff00) ++ __builtin_abort (); ++ ++ if (foo3b()[1] != 0x00ff) ++ __builtin_abort (); ++ ++ if (foo4()[1] != 0xff) ++ __builtin_abort (); ++ ++ if (foo5()[0] != 0xff00ff00ff00ff00ULL) ++ __builtin_abort (); ++ ++ return 0; ++} ++ ++/* { dg-final { cleanup-saved-temps } } */ +--- gcc/testsuite/gcc.target/s390/vector/vec-genbytemask-2.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/vector/vec-genbytemask-2.c 2016-05-11 17:12:20.000000000 +0200 +@@ -0,0 +1,46 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O3 -mzarch -march=z13" } */ ++ ++typedef unsigned char uv16qi __attribute__((vector_size(16))); ++typedef unsigned short uv8hi __attribute__((vector_size(16))); ++typedef unsigned int uv4si __attribute__((vector_size(16))); ++typedef unsigned long long uv2di __attribute__((vector_size(16))); ++ ++/* The elements differ. */ ++uv2di __attribute__((noinline)) ++foo1 () ++{ ++ return (uv2di){ 0x001fffffffffff00, 0x0000ffffffffff00 }; ++} ++ ++/* Non-contiguous bitmasks */ ++ ++uv4si __attribute__((noinline)) ++foo2 () ++{ ++ return (uv4si){ 0xff00100f, 0xff00100f, 0xff00100f, 0xff00100f }; ++} ++ ++uv8hi __attribute__((noinline)) ++foo3a () ++{ ++ return (uv8hi){ 0xf700, 0xf700, 0xf700, 0xf700, ++ 0xf700, 0xf700, 0xf700, 0xf700 }; ++} ++ ++uv8hi __attribute__((noinline)) ++foo3b () ++{ ++ return (uv8hi){ 0x10ff, 0x10ff, 0x10ff, 0x10ff, ++ 0x10ff, 0x10ff, 0x10ff, 0x10ff }; ++} ++ ++uv16qi __attribute__((noinline)) ++foo4 () ++{ ++ return (uv16qi){ 0x82, 0x82, 0x82, 0x82, ++ 0x82, 0x82, 0x82, 0x82, ++ 0x82, 0x82, 0x82, 0x82, ++ 0x82, 0x82, 0x82, 0x82 }; ++} ++/* { dg-final { scan-assembler-not "vgbm" } } */ +--- gcc/testsuite/gcc.target/s390/vector/vec-genmask-1.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/vector/vec-genmask-1.c 2016-05-11 17:12:20.000000000 +0200 +@@ -0,0 +1,70 @@ ++/* { dg-do run } */ ++/* { dg-options "-O3 -mzarch -march=z13 --save-temps" } */ ++/* { dg-require-effective-target vector } */ ++ ++typedef unsigned char uv16qi __attribute__((vector_size(16))); ++typedef unsigned short uv8hi __attribute__((vector_size(16))); ++typedef unsigned int uv4si __attribute__((vector_size(16))); ++typedef unsigned long long uv2di __attribute__((vector_size(16))); ++ ++uv2di __attribute__((noinline)) ++foo1 () ++{ ++ return (uv2di){ 0x000fffffffffff00, 0x000fffffffffff00 }; ++} ++/* { dg-final { scan-assembler-times "vgmg\t%v24,12,55" 1 } } */ ++ ++uv4si __attribute__((noinline)) ++foo2 () ++{ ++ return (uv4si){ 0xff00000f, 0xff00000f, 0xff00000f, 0xff00000f }; ++} ++/* { dg-final { scan-assembler-times "vgmf\t%v24,28,7" 1 } } */ ++ ++uv8hi __attribute__((noinline)) ++foo3a () ++{ ++ return (uv8hi){ 0xfff0, 0xfff0, 0xfff0, 0xfff0, ++ 0xfff0, 0xfff0, 0xfff0, 0xfff0 }; ++} ++/* { dg-final { scan-assembler-times "vgmh\t%v24,0,11" 1 } } */ ++ ++uv8hi __attribute__((noinline)) ++foo3b () ++{ ++ return (uv8hi){ 0x0fff, 0x0fff, 0x0fff, 0x0fff, ++ 0x0fff, 0x0fff, 0x0fff, 0x0fff }; ++} ++/* { dg-final { scan-assembler-times "vgmh\t%v24,4,15" 1 } } */ ++ ++uv16qi __attribute__((noinline)) ++foo4 () ++{ ++ return (uv16qi){ 0x8, 0x8, 0x8, 0x8, ++ 0x8, 0x8, 0x8, 0x8, ++ 0x8, 0x8, 0x8, 0x8, ++ 0x8, 0x8, 0x8, 0x8 }; ++} ++/* { dg-final { scan-assembler-times "vgmb\t%v24,4,4" 1 } } */ ++ ++int ++main () ++{ ++ if (foo1()[1] != 0x000fffffffffff00ULL) ++ __builtin_abort (); ++ ++ if (foo2()[1] != 0xff00000f) ++ __builtin_abort (); ++ ++ if (foo3a()[1] != 0xfff0) ++ __builtin_abort (); ++ ++ if (foo3b()[1] != 0x0fff) ++ __builtin_abort (); ++ ++ if (foo4()[1] != 0x8) ++ __builtin_abort (); ++ return 0; ++} ++ ++/* { dg-final { cleanup-saved-temps } } */ +--- gcc/testsuite/gcc.target/s390/vector/vec-genmask-2.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/vector/vec-genmask-2.c 2016-05-11 17:38:00.000000000 +0200 +@@ -0,0 +1,55 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O3 -mzarch -march=z13" } */ ++/* { dg-require-effective-target int128 } */ ++ ++typedef unsigned char uv16qi __attribute__((vector_size(16))); ++typedef unsigned short uv8hi __attribute__((vector_size(16))); ++typedef unsigned int uv4si __attribute__((vector_size(16))); ++typedef unsigned long long uv2di __attribute__((vector_size(16))); ++typedef unsigned __int128 uv1ti __attribute__((vector_size(16))); ++ ++/* The elements differ. */ ++uv2di __attribute__((noinline)) ++foo1 () ++{ ++ return (uv2di){ 0x000fffffffffff00, 0x0000ffffffffff00 }; ++} ++ ++/* Non-contiguous bitmasks */ ++ ++uv4si __attribute__((noinline)) ++foo2 () ++{ ++ return (uv4si){ 0xff00100f, 0xff00100f, 0xff00100f, 0xff00100f }; ++} ++ ++uv8hi __attribute__((noinline)) ++foo3a () ++{ ++ return (uv8hi){ 0xf700, 0xf700, 0xf700, 0xf700, ++ 0xf700, 0xf700, 0xf700, 0xf700 }; ++} ++ ++uv8hi __attribute__((noinline)) ++foo3b () ++{ ++ return (uv8hi){ 0x10ff, 0x10ff, 0x10ff, 0x10ff, ++ 0x10ff, 0x10ff, 0x10ff, 0x10ff }; ++} ++ ++uv16qi __attribute__((noinline)) ++foo4 () ++{ ++ return (uv16qi){ 0x82, 0x82, 0x82, 0x82, ++ 0x82, 0x82, 0x82, 0x82, ++ 0x82, 0x82, 0x82, 0x82, ++ 0x82, 0x82, 0x82, 0x82 }; ++} ++ ++/* We do not have vgmq. */ ++uv1ti ++foo5() ++{ ++ return (uv1ti){ ((unsigned __int128)1 << 53) - 1 }; ++} ++/* { dg-final { scan-assembler-not "vgm" } } */ +--- gcc/testsuite/gcc.target/s390/vector/vec-init-1.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/vector/vec-init-1.c 2016-05-11 17:12:20.000000000 +0200 +@@ -0,0 +1,68 @@ ++/* Check that the vec_init expander does its job. */ ++ ++/* { dg-do compile } */ ++/* { dg-options "-O3 -mzarch -march=z13" } */ ++ ++ ++ ++ ++ ++typedef __attribute__((vector_size(16))) signed int v4si; ++ ++extern v4si G; ++ ++v4si ++f (signed int a) ++{ ++ return G == a; ++} ++/* { dg-final { scan-assembler-times "vrepf" 1 } } */ ++ ++v4si ++g (signed int *a) ++{ ++ return G == *a; ++} ++/* { dg-final { scan-assembler-times "vlrepf" 1 } } */ ++ ++v4si ++h () ++{ ++ return G == 1; ++} ++/* { dg-final { scan-assembler-times "vgmf\t%v.*,31,31" 1 } } */ ++ ++v4si ++i () ++{ ++ return G == -1; ++} ++/* { dg-final { scan-assembler-times "vone" 1 } } */ ++ ++v4si ++j () ++{ ++ return G == 0; ++} ++/* { dg-final { scan-assembler-times "vzero" 1 } } */ ++ ++v4si ++k () ++{ ++ return G == (v4si){ 0xff80, 0xff80, 0xff80, 0xff80 }; ++} ++/* { dg-final { scan-assembler-times "vgmf\t%v.*,16,24" 1 } } */ ++ ++v4si ++l () ++{ ++ return G == (v4si){ 0xf000000f, 0xf000000f, 0xf000000f, 0xf000000f }; ++} ++/* { dg-final { scan-assembler-times "vgmf\t%v.*,28,3" 1 } } */ ++ ++v4si ++m () ++{ ++ return G == (v4si){ 0x00ff00ff, 0x0000ffff, 0xffff0000, 0xff00ff00 }; ++} ++/* { dg-final { scan-assembler-times "vgbm\t%v.*,21450" 1 } } */ +--- gcc/testsuite/gcc.target/s390/vector/vec-int-math-compile-1.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/vector/vec-int-math-compile-1.c 2016-05-11 17:12:20.000000000 +0200 +@@ -0,0 +1,40 @@ ++/* { dg-do compile { target { s390*-*-* } } } */ ++/* { dg-options "-O3 -mzarch -march=z13" } */ ++ ++typedef __attribute__((vector_size(16))) signed int v4si; ++ ++v4si ++adddbl (v4si a, v4si b) ++{ ++ return a + b; ++} ++ ++v4si ++subdbl (v4si a, v4si b) ++{ ++ return a - b; ++} ++ ++v4si ++muldbl (v4si a, v4si b) ++{ ++ return a * b; ++} ++ ++v4si ++divdbl (v4si a, v4si b) ++{ ++ return a / b; ++} ++ ++v4si ++fmadbl (v4si a, v4si b, v4si c) ++{ ++ return a * b + c; ++} ++ ++v4si ++fmsdbl (v4si a, v4si b, v4si c) ++{ ++ return a * b - c; ++} +--- gcc/testsuite/gcc.target/s390/vector/vec-scalar-cmp-1.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/vector/vec-scalar-cmp-1.c 2016-05-11 17:12:28.000000000 +0200 +@@ -0,0 +1,49 @@ ++/* Check that we use the scalar variants of vector compares. */ ++ ++/* { dg-do compile { target { s390*-*-* } } } */ ++/* { dg-options "-O3 -mzarch -march=z13" } */ ++ ++/* { dg-final { scan-assembler-times "wfcedbs\t%v\[0-9\]*,%v0,%v2" 2 } } */ ++/* { dg-final { scan-assembler-times "wfchdbs\t%v\[0-9\]*,%v0,%v2" 1 } } */ ++/* { dg-final { scan-assembler-times "wfchedbs\t%v\[0-9\]*,%v2,%v0" 1 } } */ ++/* { dg-final { scan-assembler-times "wfchdbs\t%v\[0-9\]*,%v2,%v0" 1 } } */ ++/* { dg-final { scan-assembler-times "wfchedbs\t%v\[0-9\]*,%v2,%v0" 1 } } */ ++/* { dg-final { scan-assembler-times "locrne" 5 } } */ ++/* { dg-final { scan-assembler-times "locrno" 1 } } */ ++ ++ ++int ++eq (double a, double b) ++{ ++ return a == b; ++} ++ ++int ++ne (double a, double b) ++{ ++ return a != b; ++} ++ ++int ++gt (double a, double b) ++{ ++ return a > b; ++} ++ ++int ++ge (double a, double b) ++{ ++ return a >= b; ++} ++ ++int ++lt (double a, double b) ++{ ++ return a < b; ++} ++ ++int ++le (double a, double b) ++{ ++ return a <= b; ++} +--- gcc/testsuite/gcc.target/s390/vector/vec-shift-1.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/vector/vec-shift-1.c 2016-05-11 17:12:20.000000000 +0200 +@@ -0,0 +1,108 @@ ++/* { dg-do compile { target { s390*-*-* } } } */ ++/* { dg-options "-O3 -mzarch -march=z13" } */ ++ ++/* { dg-final { scan-assembler-times "veslb" 2 } } */ ++/* { dg-final { scan-assembler-times "veslh" 2 } } */ ++/* { dg-final { scan-assembler-times "veslf" 2 } } */ ++/* { dg-final { scan-assembler-times "veslg" 2 } } */ ++ ++/* { dg-final { scan-assembler-times "vesrab" 1 } } */ ++/* { dg-final { scan-assembler-times "vesrah" 1 } } */ ++/* { dg-final { scan-assembler-times "vesraf" 1 } } */ ++/* { dg-final { scan-assembler-times "vesrag" 1 } } */ ++ ++/* { dg-final { scan-assembler-times "vesrlb" 1 } } */ ++/* { dg-final { scan-assembler-times "vesrlh" 1 } } */ ++/* { dg-final { scan-assembler-times "vesrlf" 1 } } */ ++/* { dg-final { scan-assembler-times "vesrlg" 1 } } */ ++ ++/* { dg-final { scan-assembler-times "veslvb" 2 } } */ ++/* { dg-final { scan-assembler-times "veslvh" 2 } } */ ++/* { dg-final { scan-assembler-times "veslvf" 2 } } */ ++/* { dg-final { scan-assembler-times "veslvg" 2 } } */ ++ ++/* { dg-final { scan-assembler-times "vesravb" 1 } } */ ++/* { dg-final { scan-assembler-times "vesravh" 1 } } */ ++/* { dg-final { scan-assembler-times "vesravf" 1 } } */ ++/* { dg-final { scan-assembler-times "vesravg" 1 } } */ ++ ++/* { dg-final { scan-assembler-times "vesrlvb" 1 } } */ ++/* { dg-final { scan-assembler-times "vesrlvh" 1 } } */ ++/* { dg-final { scan-assembler-times "vesrlvf" 1 } } */ ++/* { dg-final { scan-assembler-times "vesrlvg" 1 } } */ ++ ++typedef __attribute__((vector_size(16))) signed char v16qi; ++typedef __attribute__((vector_size(16))) unsigned char uv16qi; ++ ++typedef __attribute__((vector_size(16))) signed short v8hi; ++typedef __attribute__((vector_size(16))) unsigned short uv8hi; ++ ++typedef __attribute__((vector_size(16))) signed int v4si; ++typedef __attribute__((vector_size(16))) unsigned int uv4si; ++ ++typedef __attribute__((vector_size(16))) signed long long v2di; ++typedef __attribute__((vector_size(16))) unsigned long long uv2di; ++ ++uv16qi g_uvqi0, g_uvqi1, g_uvqi2; ++v16qi g_vqi0, g_vqi1, g_vqi2; ++ ++uv8hi g_uvhi0, g_uvhi1, g_uvhi2; ++v8hi g_vhi0, g_vhi1, g_vhi2; ++ ++uv4si g_uvsi0, g_uvsi1, g_uvsi2; ++v4si g_vsi0, g_vsi1, g_vsi2; ++ ++uv2di g_uvdi0, g_uvdi1, g_uvdi2; ++v2di g_vdi0, g_vdi1, g_vdi2; ++ ++void ++shift_left_by_scalar (int s) ++{ ++ g_uvqi0 = g_uvqi1 << s; ++ g_vqi0 = g_vqi1 << s; ++ g_uvhi0 = g_uvhi1 << s; ++ g_vhi0 = g_vhi1 << s; ++ g_uvsi0 = g_uvsi1 << s; ++ g_vsi0 = g_vsi1 << s; ++ g_uvdi0 = g_uvdi1 << s; ++ g_vdi0 = g_vdi1 << s; ++} ++ ++void ++shift_right_by_scalar (int s) ++{ ++ g_uvqi0 = g_uvqi1 >> s; ++ g_vqi0 = g_vqi1 >> s; ++ g_uvhi0 = g_uvhi1 >> s; ++ g_vhi0 = g_vhi1 >> s; ++ g_uvsi0 = g_uvsi1 >> s; ++ g_vsi0 = g_vsi1 >> s; ++ g_uvdi0 = g_uvdi1 >> s; ++ g_vdi0 = g_vdi1 >> s; ++} ++ ++void ++shift_left_by_vector () ++{ ++ g_uvqi0 = g_uvqi1 << g_uvqi2; ++ g_vqi0 = g_vqi1 << g_vqi2; ++ g_uvhi0 = g_uvhi1 << g_uvhi2; ++ g_vhi0 = g_vhi1 << g_vhi2; ++ g_uvsi0 = g_uvsi1 << g_uvsi2; ++ g_vsi0 = g_vsi1 << g_vsi2; ++ g_uvdi0 = g_uvdi1 << g_uvdi2; ++ g_vdi0 = g_vdi1 << g_vdi2; ++} ++ ++void ++shift_right_by_vector () ++{ ++ g_uvqi0 = g_uvqi1 >> g_uvqi2; ++ g_vqi0 = g_vqi1 >> g_vqi2; ++ g_uvhi0 = g_uvhi1 >> g_uvhi2; ++ g_vhi0 = g_vhi1 >> g_vhi2; ++ g_uvsi0 = g_uvsi1 >> g_uvsi2; ++ g_vsi0 = g_vsi1 >> g_vsi2; ++ g_uvdi0 = g_uvdi1 >> g_uvdi2; ++ g_vdi0 = g_vdi1 >> g_vdi2; ++} +--- gcc/testsuite/gcc.target/s390/vector/vec-sub-1.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/vector/vec-sub-1.c 2016-05-11 17:12:20.000000000 +0200 +@@ -0,0 +1,51 @@ ++/* { dg-do compile { target { s390*-*-* } } } */ ++/* { dg-options "-O3 -mzarch -march=z13" } */ ++ ++/* { dg-final { scan-assembler-times "vsb" 2 } } */ ++/* { dg-final { scan-assembler-times "vsh" 2 } } */ ++/* { dg-final { scan-assembler-times "vsf" 2 } } */ ++/* { dg-final { scan-assembler-times "vsg" 2 } } */ ++/* { dg-final { scan-assembler-times "vfs" 1 } } */ ++ ++ ++typedef unsigned char uv16qi __attribute__((vector_size(16))); ++typedef signed char v16qi __attribute__((vector_size(16))); ++typedef unsigned short uv8hi __attribute__((vector_size(16))); ++typedef signed short v8hi __attribute__((vector_size(16))); ++typedef unsigned int uv4si __attribute__((vector_size(16))); ++typedef signed int v4si __attribute__((vector_size(16))); ++typedef unsigned long long uv2di __attribute__((vector_size(16))); ++typedef signed long long v2di __attribute__((vector_size(16))); ++typedef double v2df __attribute__((vector_size(16))); ++ ++uv16qi g_uvqi0, g_uvqi1, g_uvqi2; ++v16qi g_vqi0, g_vqi1, g_vqi2; ++ ++uv8hi g_uvhi0, g_uvhi1, g_uvhi2; ++v8hi g_vhi0, g_vhi1, g_vhi2; ++ ++uv4si g_uvsi0, g_uvsi1, g_uvsi2; ++v4si g_vsi0, g_vsi1, g_vsi2; ++ ++uv2di g_uvdi0, g_uvdi1, g_uvdi2; ++v2di g_vdi0, g_vdi1, g_vdi2; ++ ++v2df g_vdf0, g_vdf1, g_vdf2; ++ ++void ++sub1 () ++{ ++ g_vqi0 = g_vqi1 - g_vqi2; ++ g_uvqi0 = g_uvqi1 - g_uvqi2; ++ ++ g_vhi0 = g_vhi1 - g_vhi2; ++ g_uvhi0 = g_uvhi1 - g_uvhi2; ++ ++ g_vsi0 = g_vsi1 - g_vsi2; ++ g_uvsi0 = g_uvsi1 - g_uvsi2; ++ ++ g_vdi0 = g_vdi1 - g_vdi2; ++ g_uvdi0 = g_uvdi1 - g_uvdi2; ++ ++ g_vdf0 = g_vdf1 - g_vdf2; ++} +--- gcc/testsuite/gcc.target/s390/vector/vec-vcond-1.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/vector/vec-vcond-1.c 2016-05-11 18:08:10.000000000 +0200 +@@ -0,0 +1,23 @@ ++/* A const vector operand is forced into a register in ++ s390_expand_vcond. ++ This testcase once failed because the target mode (v2di) was picked ++ for the reg instead of the mode of the other comparison ++ operand. */ ++ ++/* { dg-do compile { target { s390*-*-* } } } */ ++/* { dg-options "-O3 -mzarch -march=z13" } */ ++ ++typedef __attribute__((vector_size(16))) long v2di; ++typedef __attribute__((vector_size(16))) double v2df; ++ ++v2di ++foo (v2df a) ++{ ++ return a == (v2df){ 0.0, 0.0 }; ++} ++ ++v2di ++bar (v2df a) ++{ ++ return (v2df){ 1.0, 1.0 } == (v2df){ 0.0, 0.0 }; ++} +--- gcc/testsuite/gcc.target/s390/vector/vec-vrepi-1.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/vector/vec-vrepi-1.c 2016-05-11 17:41:29.000000000 +0200 +@@ -0,0 +1,58 @@ ++/* { dg-do run } */ ++/* { dg-options "-O3 -mzarch -march=z13 --save-temps" } */ ++/* { dg-require-effective-target vector } */ ++ ++typedef unsigned char uv16qi __attribute__((vector_size(16))); ++typedef unsigned short uv8hi __attribute__((vector_size(16))); ++typedef unsigned int uv4si __attribute__((vector_size(16))); ++typedef unsigned long long uv2di __attribute__((vector_size(16))); ++ ++uv2di __attribute__((noinline)) ++foo1 () ++{ ++ return (uv2di){ 0x7f0f, 0x7f0f }; ++} ++/* { dg-final { scan-assembler-times "vrepig\t%v24,32527" 1 } } */ ++ ++uv4si __attribute__((noinline)) ++foo2 () ++{ ++ return (uv4si){ 0x7f0f, 0x7f0f, 0x7f0f, 0x7f0f }; ++} ++/* { dg-final { scan-assembler-times "vrepif\t%v24,32527" 1 } } */ ++ ++uv8hi __attribute__((noinline)) ++foo3 () ++{ ++ return (uv8hi){ 0x7f0f, 0x7f0f, 0x7f0f, 0x7f0f, ++ 0x7f0f, 0x7f0f, 0x7f0f, 0x7f0f }; ++} ++/* { dg-final { scan-assembler-times "vrepih\t%v24,32527" 1 } } */ ++ ++uv16qi __attribute__((noinline)) ++foo4 () ++{ ++ return (uv16qi){ 0x77, 0x77, 0x77, 0x77, ++ 0x77, 0x77, 0x77, 0x77, ++ 0x77, 0x77, 0x77, 0x77, ++ 0x77, 0x77, 0x77, 0x77 }; ++} ++/* { dg-final { scan-assembler-times "vrepib\t%v24,119" 1 } } */ ++ ++int ++main () ++{ ++ if (foo1()[1] != 0x7f0f) ++ __builtin_abort (); ++ ++ if (foo2()[1] != 0x7f0f) ++ __builtin_abort (); ++ ++ if (foo3()[1] != 0x7f0f) ++ __builtin_abort (); ++ ++ if (foo4()[1] != 0x77) ++ __builtin_abort (); ++ ++ return 0; ++} +--- gcc/testsuite/gcc.target/s390/zvector/vec-dbl-math-compile-1.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/zvector/vec-dbl-math-compile-1.c 2016-05-11 17:30:06.000000000 +0200 +@@ -0,0 +1,67 @@ ++/* { dg-do compile { target { s390*-*-* } } } */ ++/* { dg-options "-O3 -mzarch -march=z13 -mzvector --save-temps" } */ ++ ++/* { dg-final { scan-assembler-times "vfcedb\t" 1 } } */ ++/* { dg-final { scan-assembler-times "vfchdb\t" 2 } } */ ++/* { dg-final { scan-assembler-times "vfchedb\t" 2 } } */ ++ ++/* { dg-final { scan-assembler-times "vfcedbs\t" 2 } } */ ++/* { dg-final { scan-assembler-times "vfchdbs\t" 2 } } */ ++ ++/* { dg-final { cleanup-saved-temps } } */ ++ ++#include ++ ++vector bool long long ++cmpeq (vector double a, vector double b) ++{ ++ return vec_cmpeq (a, b); /* vfcedb */ ++} ++ ++vector bool long long ++cmpgt (vector double a, vector double b) ++{ ++ return vec_cmpgt (a, b); /* vfchdb */ ++} ++ ++vector bool long long ++cmpge (vector double a, vector double b) ++{ ++ return vec_cmpge (a, b); /* vfchedb */ ++} ++ ++vector bool long long ++cmplt (vector double a, vector double b) ++{ ++ return vec_cmplt (a, b); /* vfchdb */ ++} ++ ++vector bool long long ++cmple (vector double a, vector double b) ++{ ++ return vec_cmple (a, b); /* vfchedb */ ++} ++ ++int ++all_eq (vector double a, vector double b) ++{ ++ return vec_all_eq (a, b); ++} ++ ++int ++any_eq (vector double a, vector double b) ++{ ++ return vec_any_eq (a, b); ++} ++ ++int ++all_lt (vector double a, vector double b) ++{ ++ return vec_all_lt (a, b); ++} ++ ++int ++any_lt (vector double a, vector double b) ++{ ++ return vec_any_lt (a, b); ++} +--- gcc/testsuite/gcc.target/s390/zvector/vec-elem-1.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/zvector/vec-elem-1.c 2016-05-11 17:30:06.000000000 +0200 +@@ -0,0 +1,11 @@ ++/* { dg-do compile { target { s390*-*-* } } } */ ++/* { dg-options "-O3 -mzarch -march=z13 -mzvector" } */ ++ ++/* { dg-final { scan-assembler "nilf\t%r2,15" } } */ ++/* { dg-final { scan-assembler "vlgvb" } } */ ++ ++signed char ++foo(unsigned char uc) ++{ ++ return __builtin_s390_vec_extract((__vector signed char){ 0 }, uc); ++} +--- gcc/testsuite/gcc.target/s390/zvector/vec-genbytemask-1.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/zvector/vec-genbytemask-1.c 2016-05-11 17:30:06.000000000 +0200 +@@ -0,0 +1,21 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O3 -mzarch -march=z13 -mzvector" } */ ++ ++#include ++ ++ ++vector unsigned char a, b, c, d; ++ ++int ++foo () ++{ ++ a = vec_genmask (0); ++ b = vec_genmask (65535); ++ c = vec_genmask (43605); ++ d = vec_genmask (37830); ++} ++ ++/* { dg-final { scan-assembler-times "vzero" 1 } } */ ++/* { dg-final { scan-assembler-times "vone" 1 } } */ ++/* { dg-final { scan-assembler-times "vgbm\t%v.*,43605" 1 } } */ ++/* { dg-final { scan-assembler-times "vgbm\t%v.*,37830" 1 } } */ +--- gcc/testsuite/gcc.target/s390/zvector/vec-genmask-1.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/zvector/vec-genmask-1.c 2016-05-11 17:30:06.000000000 +0200 +@@ -0,0 +1,24 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O3 -mzarch -march=z13 -mzvector" } */ ++ ++#include ++ ++ ++vector unsigned int a, b, c, d, e, f; ++ ++int ++foo () ++{ ++ a = vec_genmasks_32 (0, 31); ++ b = vec_genmasks_32 (0, 0); ++ c = vec_genmasks_32 (31, 31); ++ d = vec_genmasks_32 (5, 5); ++ e = vec_genmasks_32 (31, 0); ++ f = vec_genmasks_32 (6, 5); ++} ++/* { dg-final { scan-assembler-times "vone" 1 } } */ ++/* { dg-final { scan-assembler-times "vgmf\t%v.*,0,0" 1 } } */ ++/* { dg-final { scan-assembler-times "vgmf\t%v.*,31,31" 1 } } */ ++/* { dg-final { scan-assembler-times "vgmf\t%v.*,5,5" 1 } } */ ++/* { dg-final { scan-assembler-times "vgmf\t%v.*,31,0" 1 } } */ ++/* { dg-final { scan-assembler-times "vone" 1 } } */ +--- gcc/testsuite/gcc.target/s390/zvector/vec-lcbb-1.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/zvector/vec-lcbb-1.c 2016-05-11 17:30:06.000000000 +0200 +@@ -0,0 +1,31 @@ ++/* { dg-do compile { target { s390*-*-* } } } */ ++/* { dg-options "-O3 -mzarch -march=z13 -mzvector" } */ ++ ++/* { dg-final { scan-assembler-times "\tlcbb\t" 4 } } */ ++ ++#include ++ ++/* CC will be extracted into a GPR and returned. */ ++int ++foo1 (void *ptr) ++{ ++ return __lcbb (ptr, 64); ++} ++ ++int ++foo2 (void *ptr) ++{ ++ return __lcbb (ptr, 128) > 16; ++} ++ ++int ++foo3 (void *ptr) ++{ ++ return __lcbb (ptr, 256) == 16; ++} ++ ++int ++foo4 (void *ptr) ++{ ++ return __lcbb (ptr, 512) < 16; ++} +--- gcc/testsuite/gcc.target/s390/zvector/vec-load_bndry-1.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/zvector/vec-load_bndry-1.c 2016-05-11 17:34:31.000000000 +0200 +@@ -0,0 +1,80 @@ ++/* { dg-do compile { target { s390*-*-* } } } */ ++/* { dg-options "-O0 -mzarch -march=z13 -mzvector" } */ ++ ++#include ++ ++signed char ++foo64 (signed char *p) ++{ ++ return vec_load_bndry (p, 64)[0]; ++ /* { dg-final { scan-assembler-times "\tvlbb\t%v..?,0\\(%r..?\\),0" 1 } } */ ++} ++ ++signed char ++foo128 (signed char *p) ++{ ++ return ++ vec_load_bndry (p, 128)[0] ++ + vec_load_bndry (p + 16, 128)[0]; ++ /* { dg-final { scan-assembler-times "\tvlbb\t%v..?,0\\(%r..?\\),1" 2 } } */ ++} ++ ++signed char ++foo256 (signed char *p) ++{ ++ return ++ vec_load_bndry (p, 256)[0] ++ + vec_load_bndry (p + 16, 256)[0] ++ + vec_load_bndry (p + 32, 256)[0]; ++ /* { dg-final { scan-assembler-times "\tvlbb\t%v..?,0\\(%r..?\\),2" 3 } } */ ++} ++ ++signed char ++foo512 (signed char *p) ++{ ++ return ++ vec_load_bndry (p, 512)[0] ++ + vec_load_bndry (p + 16, 512)[0] ++ + vec_load_bndry (p + 32, 512)[0] ++ + vec_load_bndry (p + 48, 512)[0]; ++ /* { dg-final { scan-assembler-times "\tvlbb\t%v..?,0\\(%r..?\\),3" 4 } } */ ++} ++ ++signed char ++foo1024 (signed char *p) ++{ ++ return ++ vec_load_bndry (p, 1024)[0] ++ + vec_load_bndry (p + 16, 1024)[0] ++ + vec_load_bndry (p + 32, 1024)[0] ++ + vec_load_bndry (p + 48, 1024)[0] ++ + vec_load_bndry (p + 64, 1024)[0]; ++ /* { dg-final { scan-assembler-times "\tvlbb\t%v..?,0\\(%r..?\\),4" 5 } } */ ++} ++ ++signed char ++foo2048 (signed char *p) ++{ ++ return ++ vec_load_bndry (p, 2048)[0] ++ + vec_load_bndry (p + 16, 2048)[0] ++ + vec_load_bndry (p + 32, 2048)[0] ++ + vec_load_bndry (p + 48, 2048)[0] ++ + vec_load_bndry (p + 64, 2048)[0] ++ + vec_load_bndry (p + 80, 2048)[0]; ++ /* { dg-final { scan-assembler-times "\tvlbb\t%v..?,0\\(%r..?\\),5" 6 } } */ ++} ++ ++signed char ++foo4096 (signed char *p) ++{ ++ return ++ vec_load_bndry (p, 4096)[0] ++ + vec_load_bndry (p + 16, 4096)[0] ++ + vec_load_bndry (p + 32, 4096)[0] ++ + vec_load_bndry (p + 48, 4096)[0] ++ + vec_load_bndry (p + 64, 4096)[0] ++ + vec_load_bndry (p + 80, 4096)[0] ++ + vec_load_bndry (p + 96, 4096)[0]; ++ /* { dg-final { scan-assembler-times "\tvlbb\t%v..?,0\\(%r..?\\),6" 7 } } */ ++} +--- gcc/testsuite/gcc.target/s390/zvector/vec-overloading-1.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/zvector/vec-overloading-1.c 2016-05-11 17:30:06.000000000 +0200 +@@ -0,0 +1,77 @@ ++/* Test whether overloading works as expected. */ ++ ++/* { dg-do compile { target { s390*-*-* } } } */ ++/* { dg-options "-march=z13 -mzarch -mzvector -fdump-tree-original" } */ ++ ++__vector int var_v4si; ++__vector unsigned var_uv4si; ++__vector bool var_bv4si; ++__vector long long var_v2di; ++__vector unsigned long long var_uv2di; ++__vector bool long long var_bv2di; ++__vector double var_v2df; ++ ++int *intptr; ++unsigned *uintptr; ++double *dblptr; ++unsigned long long ull; ++const int *cintptr; ++long long* llptr; ++unsigned long long* ullptr; ++ ++typedef __vector int v4si; ++typedef __vector unsigned int uv4si; ++ ++v4si var2_v4si; ++uv4si var2_uv4si; ++ ++void ++foo () ++{ ++ __builtin_s390_vec_scatter_element (var_v4si, var_uv4si, intptr, (unsigned long long)0); ++ __builtin_s390_vec_scatter_element (var2_v4si, var2_uv4si, intptr, (unsigned long long)0); ++ __builtin_s390_vec_scatter_element (var_bv4si, var_uv4si, uintptr, (unsigned long long)0); ++ __builtin_s390_vec_scatter_element (var_uv4si, var_uv4si, uintptr, (unsigned long long)0); ++ __builtin_s390_vec_scatter_element (var_v2di, var_uv2di, llptr, (unsigned long long)0); ++ __builtin_s390_vec_scatter_element (var_bv2di, var_uv2di, ullptr, (unsigned long long)0); ++ __builtin_s390_vec_scatter_element (var_uv2di, var_uv2di, ullptr, (unsigned long long)0); ++ __builtin_s390_vec_scatter_element (var_v2df, var_uv2di, dblptr, (unsigned long long)0); ++ ++ /* While the last argument is a int there is a way to convert it to ++ unsigned long long, so this variant is supposed to match. */ ++ __builtin_s390_vec_scatter_element (var_v4si, var_uv4si, intptr, 0); ++ ++ __builtin_s390_vec_insert_and_zero (intptr); ++ __builtin_s390_vec_insert_and_zero (cintptr); ++ ++ __builtin_s390_vec_promote ((signed char)1, 1); ++ __builtin_s390_vec_promote ((unsigned char)1, 1); ++ __builtin_s390_vec_promote ((short int)1, 1); ++ __builtin_s390_vec_promote ((unsigned short int)1, 1); ++ __builtin_s390_vec_promote ((int)1, 1); ++ __builtin_s390_vec_promote ((unsigned)1, 1); ++ __builtin_s390_vec_promote ((long long)1, 1); ++ __builtin_s390_vec_promote ((unsigned long long)1, 1); ++ __builtin_s390_vec_promote ((double)1, 1); ++ ++ /* This is supposed to match vec_promote_s32 */ ++ __builtin_s390_vec_promote (1, (signed char) -1); ++ ++ /* Constants in C usually are considered int. */ ++ __builtin_s390_vec_promote (1, 1); ++ ++ /* And (unsigned) long if they are too big for int. */ ++ __builtin_s390_vec_promote (1ULL << 32, 1); ++ __builtin_s390_vec_promote (1LL << 32, 1); ++} ++ ++/* { dg-final { scan-tree-dump-times "__builtin_s390_vscef " 5 "original" } } */ ++/* { dg-final { scan-tree-dump-times "__builtin_s390_vsceg " 4 "original" } } */ ++ ++/* { dg-final { scan-tree-dump-times "__builtin_s390_vllezf " 2 "original" } } */ ++ ++/* { dg-final { scan-tree-dump-times "__builtin_s390_vlvgb_noin " 2 "original" } } */ ++/* { dg-final { scan-tree-dump-times "__builtin_s390_vlvgh_noin " 2 "original" } } */ ++/* { dg-final { scan-tree-dump-times "__builtin_s390_vlvgf_noin " 4 "original" } } */ ++/* { dg-final { scan-tree-dump-times "__builtin_s390_vlvgg_noin " 4 "original" } } */ ++/* { dg-final { scan-tree-dump-times "__builtin_s390_vlvgg_dbl_noin " 1 "original" } } */ +--- gcc/testsuite/gcc.target/s390/zvector/vec-overloading-2.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/zvector/vec-overloading-2.c 2016-05-11 17:30:06.000000000 +0200 +@@ -0,0 +1,54 @@ ++/* Test whether overloading works as expected. */ ++ ++/* { dg-do compile { target { s390*-*-* } } } */ ++/* { dg-options "-march=z13 -mzarch -mzvector" } */ ++ ++__vector int v4si; ++__vector unsigned uv4si; ++__vector bool bv4si; ++__vector long long v2di; ++__vector unsigned long long uv2di; ++__vector bool long long bv2di; ++__vector double v2df; ++int *intptr; ++unsigned *uintptr; ++double *dblptr; ++long long ll; ++unsigned long long ull; ++const int *cintptr; ++long long* llptr; ++unsigned long long* ullptr; ++ ++void ++foo () ++{ ++ __builtin_s390_vec_scatter_element (v4si, uv4si, (int*)0, 0); /* ok */ ++ __builtin_s390_vec_insert_and_zero (intptr); /* ok */ ++ ++ /* The unsigned pointer must not match the signed pointer. */ ++ __builtin_s390_vec_scatter_element (v4si, uv4si, uintptr, 0); /* { dg-error "invalid parameter combination for intrinsic" } */ ++ ++ /* Make sure signed int pointers don't match unsigned int pointers. */ ++ __builtin_s390_vec_scatter_element (bv4si, uv4si, intptr, 0); /* { dg-error "invalid parameter combination for intrinsic" } */ ++ ++ /* Const pointers do not match unqualified operands. */ ++ __builtin_s390_vec_scatter_element (v4si, uv4si, cintptr, 0); /* { dg-error "invalid parameter combination for intrinsic" } */ ++ ++ /* Volatile pointers do not match unqualified operands. */ ++ __builtin_s390_vec_scatter_element (v4si, uv4si, cintptr, 0); /* { dg-error "invalid parameter combination for intrinsic" } */ ++ ++ /* The third operands needs to be double *. */ ++ __builtin_s390_vec_scatter_element (v2df, uv4si, intptr, 0); /* { dg-error "invalid parameter combination for intrinsic" } */ ++ ++ /* This is an ambigious overload. */ ++ __builtin_s390_vec_scatter_element (v4si, uv4si, 0, 0); /* { dg-error "invalid parameter combination for intrinsic" } */ ++ ++ /* Pointer to vector must not match. */ ++ __builtin_s390_vec_scatter_element (v4si, uv4si, &v4si, 0); /* { dg-error "invalid parameter combination for intrinsic" } */ ++ ++ /* Don't accept const int* for int*. */ ++ __builtin_s390_vec_scatter_element (v4si, uv4si, cintptr, 0); /* { dg-error "invalid parameter combination for intrinsic" } */ ++ ++ __builtin_s390_vec_load_pair (ll, ull); /* { dg-error "ambiguous overload for intrinsic" } */ ++ __builtin_s390_vec_load_pair (ull, ll); /* { dg-error "ambiguous overload for intrinsic" } */ ++} +--- gcc/testsuite/gcc.target/s390/zvector/vec-overloading-3.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/zvector/vec-overloading-3.c 2016-05-11 17:30:06.000000000 +0200 +@@ -0,0 +1,19 @@ ++/* Check for error messages supposed to be issued during overloading. */ ++ ++/* { dg-do compile { target { s390*-*-* } } } */ ++/* { dg-options "-march=z13 -mzarch -mzvector" } */ ++ ++__vector int v4si; ++__vector unsigned uv4si; ++ ++int *intptr; ++unsigned long long ull; ++const unsigned int *ucintptr; ++ ++void ++foo () ++{ ++ /* A backend check makes sure the forth operand is a literal. */ ++ __builtin_s390_vec_gather_element (uv4si, uv4si, ucintptr, 256); /* { dg-error "constant argument 4 for builtin.*is out of range for target type" } */ ++ __builtin_s390_vec_gather_element (uv4si, uv4si, ucintptr, 5); /* { dg-error "constant argument 4 for builtin.*is out of range" } */ ++} +--- gcc/testsuite/gcc.target/s390/zvector/vec-overloading-4.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/zvector/vec-overloading-4.c 2016-05-11 17:30:06.000000000 +0200 +@@ -0,0 +1,18 @@ ++/* Check for error messages supposed to be issued during builtin expansion. */ ++ ++/* { dg-do compile { target { s390*-*-* } } } */ ++/* { dg-options "-march=z13 -mzarch -mzvector" } */ ++ ++__vector int v4si; ++__vector unsigned uv4si; ++ ++int *intptr; ++unsigned long long ull; ++const unsigned int *ucintptr; ++ ++void ++foo () ++{ ++ /* A backend check makes sure the forth operand is a literal. */ ++ __builtin_s390_vec_scatter_element (v4si, uv4si, intptr, ull); /* { dg-error "constant value required for builtin" } */ ++} +--- gcc/testsuite/gcc.target/s390/zvector/vec-splat-1.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/zvector/vec-splat-1.c 2016-05-11 17:41:24.000000000 +0200 +@@ -0,0 +1,42 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O3 -mzarch -march=z13 -mzvector" } */ ++ ++#include ++ ++vector signed char v16qi; ++vector short v8hi; ++vector int v4si; ++vector long long v2di; ++ ++vector unsigned char uv16qi; ++vector unsigned short uv8hi; ++vector unsigned int uv4si; ++vector unsigned long long uv2di; ++ ++int ++foo () ++{ ++ v16qi = vec_splats ((signed char)0x77); ++ uv16qi = vec_splats ((unsigned char)0x77); ++ ++ v8hi = vec_splats ((short int)0x7f0f); ++ uv8hi = vec_splats ((unsigned short int)0x7f0f); ++ ++ v4si = vec_splats ((int)0x7f0f); ++ uv4si = vec_splats ((unsigned int)0x7f0f); ++ ++ v2di = vec_splats ((long long)0x7f0f); ++ uv2di = vec_splats ((unsigned long long)0x7f0f); ++} ++ ++/* { dg-final { scan-assembler-times "vrepib\t%v.*,119" 1 } } */ ++/* { dg-final { scan-assembler-times "vrepib\t%v.*,119" 1 } } */ ++ ++/* { dg-final { scan-assembler-times "vrepih\t%v.*,32527" 1 } } */ ++/* { dg-final { scan-assembler-times "vrepih\t%v.*,32527" 1 } } */ ++ ++/* { dg-final { scan-assembler-times "vrepif\t%v.*,32527" 1 } } */ ++/* { dg-final { scan-assembler-times "vrepif\t%v.*,32527" 1 } } */ ++ ++/* { dg-final { scan-assembler-times "vrepig\t%v.*,32527" 1 } } */ ++/* { dg-final { scan-assembler-times "vrepig\t%v.*,32527" 1 } } */ +--- gcc/testsuite/gcc.target/s390/zvector/vec-splat-2.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/zvector/vec-splat-2.c 2016-05-11 17:53:39.000000000 +0200 +@@ -0,0 +1,42 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O3 -mzarch -march=z13 -mzvector" } */ ++ ++#include ++ ++vector signed char v16qi; ++vector short v8hi; ++vector int v4si; ++vector long long v2di; ++ ++vector unsigned char uv16qi; ++vector unsigned short uv8hi; ++vector unsigned int uv4si; ++vector unsigned long long uv2di; ++ ++int ++foo () ++{ ++ v16qi = vec_splat_s8 (-112); ++ uv16qi = vec_splat_u8 (215); ++ ++ v8hi = vec_splat_s16 (-32000); ++ uv8hi = vec_splat_u16 (64000); ++ ++ v4si = vec_splat_s32 (-32000); ++ uv4si = vec_splat_u32 (64000); ++ ++ v2di = vec_splat_s64 (-32000); ++ uv2di = vec_splat_u64 (64000); ++} ++ ++/* { dg-final { scan-assembler-times "vrepib\t%v.*,-112" 1 } } */ ++/* { dg-final { scan-assembler-times "vrepib\t%v.*,-41" 1 } } */ ++ ++/* { dg-final { scan-assembler-times "vrepih\t%v.*,-32000" 1 } } */ ++/* { dg-final { scan-assembler-times "vrepih\t%v.*,-1536" 1 } } */ ++ ++/* { dg-final { scan-assembler-times "vrepif\t%v.*,-32000" 1 } } */ ++/* { dg-final { scan-assembler-times "vrepif\t%v.*,-1536" 1 } } */ ++ ++/* { dg-final { scan-assembler-times "vrepig\t%v.*,-32000" 1 } } */ ++/* { dg-final { scan-assembler-times "vrepig\t%v.*,-1536" 1 } } */ +--- gcc/testsuite/gcc.target/s390/zvector/vec-test-mask-1.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc/testsuite/gcc.target/s390/zvector/vec-test-mask-1.c 2016-05-11 17:30:06.000000000 +0200 +@@ -0,0 +1,25 @@ ++/* { dg-do compile { target { s390*-*-* } } } */ ++/* { dg-options "-O3 -mzarch -march=z13 -mzvector" } */ ++ ++/* { dg-final { scan-assembler-times "vtm" 2 } } */ ++/* { dg-final { scan-assembler-times "ipm" 1 } } */ ++ ++#include ++ ++/* CC will be extracted into a GPR and returned. */ ++int ++foo (vector unsigned int a, vector unsigned b) ++{ ++ return vec_test_mask (a, b); ++} ++ ++extern void baz (void); ++ ++/* In that case the ipm/srl is supposed to optimized out by ++ combine/s390_canonicalize_comparison. */ ++int ++bar (vector unsigned int a, vector unsigned b) ++{ ++ if (vec_test_mask (a, b) == 2) ++ baz (); ++} +--- gcc/testsuite/lib/target-supports.exp 2015-06-18 16:32:16.000000000 +0200 ++++ gcc/testsuite/lib/target-supports.exp 2016-05-11 17:32:08.000000000 +0200 +@@ -3800,7 +3800,8 @@ proc check_effective_target_vect_natural + verbose "check_effective_target_vect_natural_alignment: using cached result" 2 + } else { + set et_vect_natural_alignment_saved 1 +- if { [check_effective_target_arm_eabi] } { ++ if { [check_effective_target_arm_eabi] ++ || [istarget s390*-*-*] } { + set et_vect_natural_alignment_saved 0 + } + } diff --git a/SPECS/gcc.spec b/SPECS/gcc.spec index d243c09..5675774 100644 --- a/SPECS/gcc.spec +++ b/SPECS/gcc.spec @@ -2,7 +2,7 @@ %global SVNREV 225304 # Note, gcc_release must be integer, if you want to add suffixes to # %{release}, append them after %{gcc_release} on Release: line. -%global gcc_release 4 +%global gcc_release 11 %global _unpackaged_files_terminate_build 0 %global _performance_build 1 %global multilib_64_archs sparc64 ppc64 ppc64p7 s390x x86_64 @@ -218,6 +218,21 @@ Patch18: gcc48-aarch64-ada.patch Patch19: gcc48-aarch64-async-unw-tables.patch Patch20: gcc48-aarch64-unwind-opt.patch Patch21: gcc48-rh1243366.patch +Patch22: gcc48-rh1180633.patch +Patch23: gcc48-rh1278872.patch +Patch24: gcc48-pr67281.patch +Patch25: gcc48-pr68680.patch +Patch26: gcc48-rh1312436.patch +Patch27: gcc48-pr53477.patch +Patch28: gcc48-rh1296211.patch +Patch29: gcc48-rh1304449.patch +Patch30: gcc48-s390-z13.patch +Patch31: gcc48-rh1312850.patch +Patch32: gcc48-pr65142.patch +Patch33: gcc48-pr52714.patch +Patch34: gcc48-rh1344807.patch +Patch35: gcc48-libgomp-20160715.patch +Patch36: gcc48-pr63293.patch Patch1000: fastjar-0.97-segfault.patch Patch1001: fastjar-0.97-len1.patch @@ -916,6 +931,22 @@ rm -f libgo/go/crypto/elliptic/p224{,_test}.go %patch19 -p0 -b .aarch64-async-unw-tables~ %patch20 -p0 -b .aarch64-unwind-opt~ %patch21 -p0 -b .rh1243366~ +%patch22 -p0 -b .rh1180633~ +%patch23 -p0 -b .rh1278872~ +%patch24 -p0 -b .pr67281~ +%patch25 -p0 -b .pr68680~ +%patch26 -p0 -b .rh1312436~ +%patch27 -p0 -b .pr53477~ +touch -r %{PATCH27} libstdc++-v3/python/libstdcxx/v6/printers.py +%patch28 -p0 -b .rh1296211~ +%patch29 -p0 -b .rh1304449~ +%patch30 -p0 -b .s390-z13~ +%patch31 -p0 -b .rh1312850~ +%patch32 -p0 -b .pr65142~ +%patch33 -p0 -b .pr52714~ +%patch34 -p0 -b .rh1344807~ +%patch35 -p0 -b .libgomp-20160715~ +%patch36 -p0 -b .pr63293~ %if 0%{?_enable_debug_packages} cat > split-debuginfo.sh <<\EOF @@ -1229,7 +1260,7 @@ CC="$CC" CFLAGS="$OPT_FLAGS" \ --host=%{gcc_target_platform} --build=%{gcc_target_platform} --target=%{gcc_target_platform} --with-cpu=v7 %endif %ifarch ppc64le - --with-cpu-64=power7 --with-tune-64=power8 \ + --with-cpu-64=power8 --with-tune-64=power8 \ %endif %ifarch ppc ppc64 ppc64p7 %if 0%{?rhel} >= 7 @@ -2381,6 +2412,7 @@ fi %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_version}/include/fxsrintrin.h %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_version}/include/xsaveintrin.h %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_version}/include/xsaveoptintrin.h +%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_version}/include/pkuintrin.h %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_version}/include/mm_malloc.h %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_version}/include/mm3dnow.h %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_version}/include/cpuid.h @@ -2416,6 +2448,7 @@ fi %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_version}/include/s390intrin.h %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_version}/include/htmintrin.h %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_version}/include/htmxlintrin.h +%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_version}/include/vecintrin.h %endif %{_prefix}/libexec/gcc/%{gcc_target_platform}/%{gcc_version}/collect2 %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_version}/crt*.o @@ -3357,6 +3390,45 @@ fi %{_prefix}/libexec/gcc/%{gcc_target_platform}/%{gcc_version}/plugin %changelog +* Wed Aug 31 2016 Jakub Jelinek 4.8.5-11 +- on aarch64 emit scheduling barriers before stack deallocation in + function epilogues (#1362635, PR target/63293) + +* Wed Aug 10 2016 Jakub Jelinek 4.8.5-10 +- include vecintrin.h intrinsic header on s390 (#1182152) + +* Fri Jul 15 2016 Jakub Jelinek 4.8.5-9 +- backport OpenMP 4.5 support to libgomp (library only; #1357060, + PRs libgomp/68579, libgomp/64625) + +* Wed Jun 15 2016 Jakub Jelinek 4.8.5-8 +- fix a bug in C++ ref-to-ptr conversion (#1344807) +- fix combiner handling of jumps on aarch64 (#1344672, + PR rtl-optimization/52714) + +* Thu Jun 9 2016 Jakub Jelinek 4.8.5-7 +- ensure the timestamp on printers.py is always the same (#1344291) + +* Mon Jun 6 2016 Jakub Jelinek 4.8.5-6 +- backport s390 z13 support (#1182152) +- fix up -fsanitize=address on powerpc64 with 46-bit virtual address space + (#1312850) +- throw exception on std::random_device::_M_getval() failure (#1262846, + PR libstdc++/65142, CVE-2015-5276) + +* Tue May 10 2016 Jakub Jelinek 4.8.5-5 +- fix up libitm HTM fastpath (#1180633) +- on ppc64le default to -mcpu=power8 instead of -mcpu=power7 (#1213268) +- fix up size in .debug_pubnames (#1278872) +- turn powerpc* HTM insns into memory barriers (#1282755, PR target/67281) +- make sure to handle __builtin_alloca_with_align like alloca in + -fstack-protector* (#1289022, PR tree-optimization/68680) +- improve DW_AT_abstract_origin of DW_TAG_GNU_call_site on s390 with -fPIC + (#1312436) +- fix up libstdc++ pretty-printers (#1076690, PR libstdc++/53477) +- don't pass explicit --oformat option to ld on powerpc* (#1296211) +- backport Intel Memory Protection Keys ISA support - -mpku (#1304449) + * Wed Jul 15 2015 Jakub Jelinek 4.8.5-4 - fix up basic_streambuf copy constructor and assignment operator (#1243366)