diff --git a/.gitignore b/.gitignore
index e4327ad..c7daf07 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1 @@
-SOURCES/libsecret-0.15.tar.xz
+SOURCES/libsecret-0.18.2.tar.xz
diff --git a/.libsecret.metadata b/.libsecret.metadata
index b6461aa..c10b2c5 100644
--- a/.libsecret.metadata
+++ b/.libsecret.metadata
@@ -1 +1 @@
-da54ec18c3aa1581b4080237f082ce0105132eaf SOURCES/libsecret-0.15.tar.xz
+069a081f6c958646be57ac864e1a0da61f9db2fd SOURCES/libsecret-0.18.2.tar.xz
diff --git a/SOURCES/libsecret-0.18-update-valgrind.h-and-memcheck.h.patch b/SOURCES/libsecret-0.18-update-valgrind.h-and-memcheck.h.patch
new file mode 100644
index 0000000..4ddbf5d
--- /dev/null
+++ b/SOURCES/libsecret-0.18-update-valgrind.h-and-memcheck.h.patch
@@ -0,0 +1,7339 @@
+diff -urN libsecret-0.15.old/build/valgrind/memcheck.h libsecret-0.15/build/valgrind/memcheck.h
+--- libsecret-0.15.old/build/valgrind/memcheck.h	2012-02-02 13:11:10.000000000 +0000
++++ libsecret-0.15/build/valgrind/memcheck.h	2014-09-08 13:28:05.000000000 +0100
+@@ -13,7 +13,7 @@
+    This file is part of MemCheck, a heavyweight Valgrind tool for
+    detecting memory errors.
+ 
+-   Copyright (C) 2000-2010 Julian Seward.  All rights reserved.
++   Copyright (C) 2000-2013 Julian Seward.  All rights reserved.
+ 
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions
+@@ -71,12 +71,6 @@
+ 
+ #include "valgrind.h"
+ 
+-#if defined(__GNUC__)
+-# define VG_UNUSED __attribute__((unused))
+-#else
+-# define VG_UNUSED
+-#endif
+-
+ /* !! ABIWARNING !! ABIWARNING !! ABIWARNING !! ABIWARNING !! 
+    This enum comprises an ABI exported by Valgrind to programs
+    which use client requests.  DO NOT CHANGE THE ORDER OF THESE
+@@ -102,6 +96,9 @@
+       /* Not next to VG_USERREQ__COUNT_LEAKS because it was added later. */
+       VG_USERREQ__COUNT_LEAK_BLOCKS,
+ 
++      VG_USERREQ__ENABLE_ADDR_ERROR_REPORTING_IN_RANGE,
++      VG_USERREQ__DISABLE_ADDR_ERROR_REPORTING_IN_RANGE,
++
+       /* This is just for memcheck's internal use - don't use it */
+       _VG_USERREQ__MEMCHECK_RECORD_OVERLAP_ERROR 
+          = VG_USERREQ_TOOL_BASE('M','C') + 256
+@@ -190,19 +187,27 @@
+ 
+ /* Do a full memory leak check (like --leak-check=full) mid-execution. */
+ #define VALGRIND_DO_LEAK_CHECK                                   \
+-   {unsigned long _qzz_res VG_UNUSED;                            \
+-    VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,                      \
+-                            VG_USERREQ__DO_LEAK_CHECK,           \
+-                            0, 0, 0, 0, 0);                      \
+-   }
++    VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__DO_LEAK_CHECK,   \
++                                    0, 0, 0, 0, 0)
++
++/* Same as VALGRIND_DO_LEAK_CHECK but only showing the entries for
++   which there was an increase in leaked bytes or leaked nr of blocks
++   since the previous leak search. */
++#define VALGRIND_DO_ADDED_LEAK_CHECK                            \
++    VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__DO_LEAK_CHECK,  \
++                                    0, 1, 0, 0, 0)
++
++/* Same as VALGRIND_DO_ADDED_LEAK_CHECK but showing entries with
++   increased or decreased leaked bytes/blocks since previous leak
++   search. */
++#define VALGRIND_DO_CHANGED_LEAK_CHECK                          \
++    VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__DO_LEAK_CHECK,  \
++                                    0, 2, 0, 0, 0)
+ 
+ /* Do a summary memory leak check (like --leak-check=summary) mid-execution. */
+-#define VALGRIND_DO_QUICK_LEAK_CHECK				 \
+-   {unsigned long _qzz_res;                                      \
+-    VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,                      \
+-                            VG_USERREQ__DO_LEAK_CHECK,           \
+-                            1, 0, 0, 0, 0);                      \
+-   }
++#define VALGRIND_DO_QUICK_LEAK_CHECK                             \
++    VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__DO_LEAK_CHECK,   \
++                                    1, 0, 0, 0, 0)
+ 
+ /* Return number of leaked, dubious, reachable and suppressed bytes found by
+    all previous leak checks.  They must be lvalues.  */
+@@ -213,10 +218,10 @@
+       are.  We also initialise '_qzz_leaked', etc because
+       VG_USERREQ__COUNT_LEAKS doesn't mark the values returned as
+       defined. */                                                        \
+-   {unsigned long _qzz_res;                                              \
++   {                                                                     \
+     unsigned long _qzz_leaked    = 0, _qzz_dubious    = 0;               \
+     unsigned long _qzz_reachable = 0, _qzz_suppressed = 0;               \
+-    VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,                              \
++    VALGRIND_DO_CLIENT_REQUEST_STMT(                                     \
+                                VG_USERREQ__COUNT_LEAKS,                  \
+                                &_qzz_leaked, &_qzz_dubious,              \
+                                &_qzz_reachable, &_qzz_suppressed, 0);    \
+@@ -235,10 +240,10 @@
+       are.  We also initialise '_qzz_leaked', etc because
+       VG_USERREQ__COUNT_LEAKS doesn't mark the values returned as
+       defined. */                                                        \
+-   {unsigned long _qzz_res;                                              \
++   {                                                                     \
+     unsigned long _qzz_leaked    = 0, _qzz_dubious    = 0;               \
+     unsigned long _qzz_reachable = 0, _qzz_suppressed = 0;               \
+-    VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,                              \
++    VALGRIND_DO_CLIENT_REQUEST_STMT(                                     \
+                                VG_USERREQ__COUNT_LEAK_BLOCKS,            \
+                                &_qzz_leaked, &_qzz_dubious,              \
+                                &_qzz_reachable, &_qzz_suppressed, 0);    \
+@@ -258,10 +263,11 @@
+    The metadata is not copied in cases 0, 2 or 3 so it should be
+    impossible to segfault your system by using this call.
+ */
+-#define VALGRIND_GET_VBITS(zza,zzvbits,zznbytes)                     \
+-    VALGRIND_DO_CLIENT_REQUEST_EXPR(0,                               \
+-                                    VG_USERREQ__GET_VBITS,           \
+-                                    (char*)(zza), (char*)(zzvbits),  \
++#define VALGRIND_GET_VBITS(zza,zzvbits,zznbytes)                \
++    (unsigned)VALGRIND_DO_CLIENT_REQUEST_EXPR(0,                \
++                                    VG_USERREQ__GET_VBITS,      \
++                                    (const char*)(zza),         \
++                                    (char*)(zzvbits),           \
+                                     (zznbytes), 0, 0)
+ 
+ /* Set the validity data for addresses [zza..zza+zznbytes-1], copying it
+@@ -273,11 +279,24 @@
+    The metadata is not copied in cases 0, 2 or 3 so it should be
+    impossible to segfault your system by using this call.
+ */
+-#define VALGRIND_SET_VBITS(zza,zzvbits,zznbytes)                     \
+-    VALGRIND_DO_CLIENT_REQUEST_EXPR(0,                               \
+-                                    VG_USERREQ__SET_VBITS,           \
+-                                    (char*)(zza), (char*)(zzvbits),  \
++#define VALGRIND_SET_VBITS(zza,zzvbits,zznbytes)                \
++    (unsigned)VALGRIND_DO_CLIENT_REQUEST_EXPR(0,                \
++                                    VG_USERREQ__SET_VBITS,      \
++                                    (const char*)(zza),         \
++                                    (const char*)(zzvbits),     \
+                                     (zznbytes), 0, 0 )
+ 
++/* Disable and re-enable reporting of addressing errors in the
++   specified address range. */
++#define VALGRIND_DISABLE_ADDR_ERROR_REPORTING_IN_RANGE(_qzz_addr,_qzz_len) \
++    VALGRIND_DO_CLIENT_REQUEST_EXPR(0 /* default return */,    \
++       VG_USERREQ__DISABLE_ADDR_ERROR_REPORTING_IN_RANGE,      \
++       (_qzz_addr), (_qzz_len), 0, 0, 0)
++
++#define VALGRIND_ENABLE_ADDR_ERROR_REPORTING_IN_RANGE(_qzz_addr,_qzz_len) \
++    VALGRIND_DO_CLIENT_REQUEST_EXPR(0 /* default return */,    \
++       VG_USERREQ__ENABLE_ADDR_ERROR_REPORTING_IN_RANGE,       \
++       (_qzz_addr), (_qzz_len), 0, 0, 0)
++
+ #endif
+ 
+diff -urN libsecret-0.15.old/build/valgrind/valgrind.h libsecret-0.15/build/valgrind/valgrind.h
+--- libsecret-0.15.old/build/valgrind/valgrind.h	2012-02-02 13:11:10.000000000 +0000
++++ libsecret-0.15/build/valgrind/valgrind.h	2014-09-08 13:28:23.000000000 +0100
+@@ -12,7 +12,7 @@
+    This file is part of Valgrind, a dynamic binary instrumentation
+    framework.
+ 
+-   Copyright (C) 2000-2010 Julian Seward.  All rights reserved.
++   Copyright (C) 2000-2013 Julian Seward.  All rights reserved.
+ 
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions
+@@ -89,7 +89,7 @@
+         || (__VALGRIND_MAJOR__ == 3 && __VALGRIND_MINOR__ >= 6))
+ */
+ #define __VALGRIND_MAJOR__    3
+-#define __VALGRIND_MINOR__    6
++#define __VALGRIND_MINOR__    10
+ 
+ 
+ #include <stdarg.h>
+@@ -108,41 +108,55 @@
+    Misc note: how to find out what's predefined in gcc by default:
+    gcc -Wp,-dM somefile.c
+ */
+-#undef PLAT_ppc64_aix5
+-#undef PLAT_ppc32_aix5
+ #undef PLAT_x86_darwin
+ #undef PLAT_amd64_darwin
+ #undef PLAT_x86_win32
++#undef PLAT_amd64_win64
+ #undef PLAT_x86_linux
+ #undef PLAT_amd64_linux
+ #undef PLAT_ppc32_linux
+-#undef PLAT_ppc64_linux
++#undef PLAT_ppc64be_linux
++#undef PLAT_ppc64le_linux
+ #undef PLAT_arm_linux
++#undef PLAT_arm64_linux
+ #undef PLAT_s390x_linux
++#undef PLAT_mips32_linux
++#undef PLAT_mips64_linux
+ 
+ 
+-#if defined(_AIX) && defined(__64BIT__)
+-#  define PLAT_ppc64_aix5 1
+-#elif defined(_AIX) && !defined(__64BIT__)
+-#  define PLAT_ppc32_aix5 1
+-#elif defined(__APPLE__) && defined(__i386__)
++#if defined(__APPLE__) && defined(__i386__)
+ #  define PLAT_x86_darwin 1
+ #elif defined(__APPLE__) && defined(__x86_64__)
+ #  define PLAT_amd64_darwin 1
+-#elif defined(__MINGW32__) || defined(__CYGWIN32__) || defined(_WIN32) && defined(_M_IX86)
++#elif (defined(__MINGW32__) && !defined(__MINGW64__)) \
++      || defined(__CYGWIN32__) \
++      || (defined(_WIN32) && defined(_M_IX86))
+ #  define PLAT_x86_win32 1
++#elif defined(__MINGW64__) \
++      || (defined(_WIN64) && defined(_M_X64))
++#  define PLAT_amd64_win64 1
+ #elif defined(__linux__) && defined(__i386__)
+ #  define PLAT_x86_linux 1
+ #elif defined(__linux__) && defined(__x86_64__)
+ #  define PLAT_amd64_linux 1
+ #elif defined(__linux__) && defined(__powerpc__) && !defined(__powerpc64__)
+ #  define PLAT_ppc32_linux 1
+-#elif defined(__linux__) && defined(__powerpc__) && defined(__powerpc64__)
+-#  define PLAT_ppc64_linux 1
+-#elif defined(__linux__) && defined(__arm__)
++#elif defined(__linux__) && defined(__powerpc__) && defined(__powerpc64__) && _CALL_ELF != 2
++/* Big Endian uses ELF version 1 */
++#  define PLAT_ppc64be_linux 1
++#elif defined(__linux__) && defined(__powerpc__) && defined(__powerpc64__) && _CALL_ELF == 2
++/* Little Endian uses ELF version 2 */
++#  define PLAT_ppc64le_linux 1
++#elif defined(__linux__) && defined(__arm__) && !defined(__aarch64__)
+ #  define PLAT_arm_linux 1
++#elif defined(__linux__) && defined(__aarch64__) && !defined(__arm__)
++#  define PLAT_arm64_linux 1
+ #elif defined(__linux__) && defined(__s390__) && defined(__s390x__)
+ #  define PLAT_s390x_linux 1
++#elif defined(__linux__) && defined(__mips__) && (__mips==64)
++#  define PLAT_mips64_linux 1
++#elif defined(__linux__) && defined(__mips__) && (__mips!=64)
++#  define PLAT_mips32_linux 1
+ #else
+ /* If we're not compiling for our target platform, don't generate
+    any inline asms.  */
+@@ -157,17 +171,41 @@
+ /* in here of use to end-users -- skip to the next section.           */
+ /* ------------------------------------------------------------------ */
+ 
++/*
++ * VALGRIND_DO_CLIENT_REQUEST(): a statement that invokes a Valgrind client
++ * request. Accepts both pointers and integers as arguments.
++ *
++ * VALGRIND_DO_CLIENT_REQUEST_STMT(): a statement that invokes a Valgrind
++ * client request that does not return a value.
++
++ * VALGRIND_DO_CLIENT_REQUEST_EXPR(): a C expression that invokes a Valgrind
++ * client request and whose value equals the client request result.  Accepts
++ * both pointers and integers as arguments.  Note that such calls are not
++ * necessarily pure functions -- they may have side effects.
++ */
++
++#define VALGRIND_DO_CLIENT_REQUEST(_zzq_rlval, _zzq_default,            \
++                                   _zzq_request, _zzq_arg1, _zzq_arg2,  \
++                                   _zzq_arg3, _zzq_arg4, _zzq_arg5)     \
++  do { (_zzq_rlval) = VALGRIND_DO_CLIENT_REQUEST_EXPR((_zzq_default),   \
++                        (_zzq_request), (_zzq_arg1), (_zzq_arg2),       \
++                        (_zzq_arg3), (_zzq_arg4), (_zzq_arg5)); } while (0)
++
++#define VALGRIND_DO_CLIENT_REQUEST_STMT(_zzq_request, _zzq_arg1,        \
++                           _zzq_arg2,  _zzq_arg3, _zzq_arg4, _zzq_arg5) \
++  do { (void) VALGRIND_DO_CLIENT_REQUEST_EXPR(0,                        \
++                    (_zzq_request), (_zzq_arg1), (_zzq_arg2),           \
++                    (_zzq_arg3), (_zzq_arg4), (_zzq_arg5)); } while (0)
++
+ #if defined(NVALGRIND)
+ 
+ /* Define NVALGRIND to completely remove the Valgrind magic sequence
+    from the compiled code (analogous to NDEBUG's effects on
+    assert()) */
+-#define VALGRIND_DO_CLIENT_REQUEST(                               \
+-        _zzq_rlval, _zzq_default, _zzq_request,                   \
++#define VALGRIND_DO_CLIENT_REQUEST_EXPR(                          \
++        _zzq_default, _zzq_request,                               \
+         _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5)    \
+-   {                                                              \
+-      (_zzq_rlval) = (_zzq_default);                              \
+-   }
++      (_zzq_default)
+ 
+ #else  /* ! NVALGRIND */
+ 
+@@ -221,10 +259,11 @@
+                      "roll $3,  %%edi ; roll $13, %%edi\n\t"      \
+                      "roll $29, %%edi ; roll $19, %%edi\n\t"
+ 
+-#define VALGRIND_DO_CLIENT_REQUEST(                               \
+-        _zzq_rlval, _zzq_default, _zzq_request,                   \
++#define VALGRIND_DO_CLIENT_REQUEST_EXPR(                          \
++        _zzq_default, _zzq_request,                               \
+         _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5)    \
+-  { volatile unsigned int _zzq_args[6];                           \
++  __extension__                                                   \
++  ({volatile unsigned int _zzq_args[6];                           \
+     volatile unsigned int _zzq_result;                            \
+     _zzq_args[0] = (unsigned int)(_zzq_request);                  \
+     _zzq_args[1] = (unsigned int)(_zzq_arg1);                     \
+@@ -239,8 +278,8 @@
+                      : "a" (&_zzq_args[0]), "0" (_zzq_default)    \
+                      : "cc", "memory"                             \
+                     );                                            \
+-    _zzq_rlval = _zzq_result;                                     \
+-  }
++    _zzq_result;                                                  \
++  })
+ 
+ #define VALGRIND_GET_NR_CONTEXT(_zzq_rlval)                       \
+   { volatile OrigFn* _zzq_orig = &(_zzq_rlval);                   \
+@@ -259,6 +298,15 @@
+                      __SPECIAL_INSTRUCTION_PREAMBLE               \
+                      /* call-noredir *%EAX */                     \
+                      "xchgl %%edx,%%edx\n\t"
++
++#define VALGRIND_VEX_INJECT_IR()                                 \
++ do {                                                            \
++    __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE              \
++                     "xchgl %%edi,%%edi\n\t"                     \
++                     : : : "cc", "memory"                        \
++                    );                                           \
++ } while (0)
++
+ #endif /* PLAT_x86_linux || PLAT_x86_darwin || (PLAT_x86_win32 && __GNUC__) */
+ 
+ /* ------------------------- x86-Win32 ------------------------- */
+@@ -277,25 +325,36 @@
+                      __asm rol edi, 3  __asm rol edi, 13          \
+                      __asm rol edi, 29 __asm rol edi, 19
+ 
+-#define VALGRIND_DO_CLIENT_REQUEST(                               \
+-        _zzq_rlval, _zzq_default, _zzq_request,                   \
++#define VALGRIND_DO_CLIENT_REQUEST_EXPR(                          \
++        _zzq_default, _zzq_request,                               \
+         _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5)    \
+-  { volatile uintptr_t _zzq_args[6];                              \
+-    volatile unsigned int _zzq_result;                            \
+-    _zzq_args[0] = (uintptr_t)(_zzq_request);                     \
+-    _zzq_args[1] = (uintptr_t)(_zzq_arg1);                        \
+-    _zzq_args[2] = (uintptr_t)(_zzq_arg2);                        \
+-    _zzq_args[3] = (uintptr_t)(_zzq_arg3);                        \
+-    _zzq_args[4] = (uintptr_t)(_zzq_arg4);                        \
+-    _zzq_args[5] = (uintptr_t)(_zzq_arg5);                        \
+-    __asm { __asm lea eax, _zzq_args __asm mov edx, _zzq_default  \
+-            __SPECIAL_INSTRUCTION_PREAMBLE                        \
+-            /* %EDX = client_request ( %EAX ) */                  \
+-            __asm xchg ebx,ebx                                    \
+-            __asm mov _zzq_result, edx                            \
+-    }                                                             \
+-    _zzq_rlval = _zzq_result;                                     \
+-  }
++    valgrind_do_client_request_expr((uintptr_t)(_zzq_default),    \
++        (uintptr_t)(_zzq_request), (uintptr_t)(_zzq_arg1),        \
++        (uintptr_t)(_zzq_arg2), (uintptr_t)(_zzq_arg3),           \
++        (uintptr_t)(_zzq_arg4), (uintptr_t)(_zzq_arg5))
++
++static __inline uintptr_t
++valgrind_do_client_request_expr(uintptr_t _zzq_default, uintptr_t _zzq_request,
++                                uintptr_t _zzq_arg1, uintptr_t _zzq_arg2,
++                                uintptr_t _zzq_arg3, uintptr_t _zzq_arg4,
++                                uintptr_t _zzq_arg5)
++{
++    volatile uintptr_t _zzq_args[6];
++    volatile unsigned int _zzq_result;
++    _zzq_args[0] = (uintptr_t)(_zzq_request);
++    _zzq_args[1] = (uintptr_t)(_zzq_arg1);
++    _zzq_args[2] = (uintptr_t)(_zzq_arg2);
++    _zzq_args[3] = (uintptr_t)(_zzq_arg3);
++    _zzq_args[4] = (uintptr_t)(_zzq_arg4);
++    _zzq_args[5] = (uintptr_t)(_zzq_arg5);
++    __asm { __asm lea eax, _zzq_args __asm mov edx, _zzq_default
++            __SPECIAL_INSTRUCTION_PREAMBLE
++            /* %EDX = client_request ( %EAX ) */
++            __asm xchg ebx,ebx
++            __asm mov _zzq_result, edx
++    }
++    return _zzq_result;
++}
+ 
+ #define VALGRIND_GET_NR_CONTEXT(_zzq_rlval)                       \
+   { volatile OrigFn* _zzq_orig = &(_zzq_rlval);                   \
+@@ -310,6 +369,13 @@
+ 
+ #define VALGRIND_CALL_NOREDIR_EAX ERROR
+ 
++#define VALGRIND_VEX_INJECT_IR()                                 \
++ do {                                                            \
++    __asm { __SPECIAL_INSTRUCTION_PREAMBLE                       \
++            __asm xchg edi,edi                                   \
++    }                                                            \
++ } while (0)
++
+ #else
+ #error Unsupported compiler.
+ #endif
+@@ -318,7 +384,8 @@
+ 
+ /* ------------------------ amd64-{linux,darwin} --------------- */
+ 
+-#if defined(PLAT_amd64_linux)  ||  defined(PLAT_amd64_darwin)
++#if defined(PLAT_amd64_linux)  ||  defined(PLAT_amd64_darwin) \
++    ||  (defined(PLAT_amd64_win64) && defined(__GNUC__))
+ 
+ typedef
+    struct { 
+@@ -330,10 +397,11 @@
+                      "rolq $3,  %%rdi ; rolq $13, %%rdi\n\t"      \
+                      "rolq $61, %%rdi ; rolq $51, %%rdi\n\t"
+ 
+-#define VALGRIND_DO_CLIENT_REQUEST(                               \
+-        _zzq_rlval, _zzq_default, _zzq_request,                   \
++#define VALGRIND_DO_CLIENT_REQUEST_EXPR(                          \
++        _zzq_default, _zzq_request,                               \
+         _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5)    \
+-  { volatile unsigned long long int _zzq_args[6];                 \
++    __extension__                                                 \
++    ({ volatile unsigned long long int _zzq_args[6];              \
+     volatile unsigned long long int _zzq_result;                  \
+     _zzq_args[0] = (unsigned long long int)(_zzq_request);        \
+     _zzq_args[1] = (unsigned long long int)(_zzq_arg1);           \
+@@ -348,8 +416,8 @@
+                      : "a" (&_zzq_args[0]), "0" (_zzq_default)    \
+                      : "cc", "memory"                             \
+                     );                                            \
+-    _zzq_rlval = _zzq_result;                                     \
+-  }
++    _zzq_result;                                                  \
++    })
+ 
+ #define VALGRIND_GET_NR_CONTEXT(_zzq_rlval)                       \
+   { volatile OrigFn* _zzq_orig = &(_zzq_rlval);                   \
+@@ -368,8 +436,25 @@
+                      __SPECIAL_INSTRUCTION_PREAMBLE               \
+                      /* call-noredir *%RAX */                     \
+                      "xchgq %%rdx,%%rdx\n\t"
++
++#define VALGRIND_VEX_INJECT_IR()                                 \
++ do {                                                            \
++    __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE              \
++                     "xchgq %%rdi,%%rdi\n\t"                     \
++                     : : : "cc", "memory"                        \
++                    );                                           \
++ } while (0)
++
+ #endif /* PLAT_amd64_linux || PLAT_amd64_darwin */
+ 
++/* ------------------------- amd64-Win64 ------------------------- */
++
++#if defined(PLAT_amd64_win64) && !defined(__GNUC__)
++
++#error Unsupported compiler.
++
++#endif /* PLAT_amd64_win64 */
++
+ /* ------------------------ ppc32-linux ------------------------ */
+ 
+ #if defined(PLAT_ppc32_linux)
+@@ -381,14 +466,15 @@
+    OrigFn;
+ 
+ #define __SPECIAL_INSTRUCTION_PREAMBLE                            \
+-                     "rlwinm 0,0,3,0,0  ; rlwinm 0,0,13,0,0\n\t"  \
+-                     "rlwinm 0,0,29,0,0 ; rlwinm 0,0,19,0,0\n\t"
++                    "rlwinm 0,0,3,0,31  ; rlwinm 0,0,13,0,31\n\t" \
++                    "rlwinm 0,0,29,0,31 ; rlwinm 0,0,19,0,31\n\t"
+ 
+-#define VALGRIND_DO_CLIENT_REQUEST(                               \
+-        _zzq_rlval, _zzq_default, _zzq_request,                   \
++#define VALGRIND_DO_CLIENT_REQUEST_EXPR(                          \
++        _zzq_default, _zzq_request,                               \
+         _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5)    \
+                                                                   \
+-  {          unsigned int  _zzq_args[6];                          \
++    __extension__                                                 \
++  ({         unsigned int  _zzq_args[6];                          \
+              unsigned int  _zzq_result;                           \
+              unsigned int* _zzq_ptr;                              \
+     _zzq_args[0] = (unsigned int)(_zzq_request);                  \
+@@ -407,8 +493,8 @@
+                      : "=b" (_zzq_result)                         \
+                      : "b" (_zzq_default), "b" (_zzq_ptr)         \
+                      : "cc", "memory", "r3", "r4");               \
+-    _zzq_rlval = _zzq_result;                                     \
+-  }
++    _zzq_result;                                                  \
++    })
+ 
+ #define VALGRIND_GET_NR_CONTEXT(_zzq_rlval)                       \
+   { volatile OrigFn* _zzq_orig = &(_zzq_rlval);                   \
+@@ -428,11 +514,19 @@
+                      __SPECIAL_INSTRUCTION_PREAMBLE               \
+                      /* branch-and-link-to-noredir *%R11 */       \
+                      "or 3,3,3\n\t"
++
++#define VALGRIND_VEX_INJECT_IR()                                 \
++ do {                                                            \
++    __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE              \
++                     "or 5,5,5\n\t"                              \
++                    );                                           \
++ } while (0)
++
+ #endif /* PLAT_ppc32_linux */
+ 
+ /* ------------------------ ppc64-linux ------------------------ */
+ 
+-#if defined(PLAT_ppc64_linux)
++#if defined(PLAT_ppc64be_linux)
+ 
+ typedef
+    struct { 
+@@ -445,13 +539,14 @@
+                      "rotldi 0,0,3  ; rotldi 0,0,13\n\t"          \
+                      "rotldi 0,0,61 ; rotldi 0,0,51\n\t"
+ 
+-#define VALGRIND_DO_CLIENT_REQUEST(                               \
+-        _zzq_rlval, _zzq_default, _zzq_request,                   \
++#define VALGRIND_DO_CLIENT_REQUEST_EXPR(                          \
++        _zzq_default, _zzq_request,                               \
+         _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5)    \
+                                                                   \
+-  {          unsigned long long int  _zzq_args[6];                \
+-    register unsigned long long int  _zzq_result __asm__("r3");   \
+-    register unsigned long long int* _zzq_ptr __asm__("r4");      \
++  __extension__                                                   \
++  ({         unsigned long long int  _zzq_args[6];                \
++             unsigned long long int  _zzq_result;                 \
++             unsigned long long int* _zzq_ptr;                    \
+     _zzq_args[0] = (unsigned long long int)(_zzq_request);        \
+     _zzq_args[1] = (unsigned long long int)(_zzq_arg1);           \
+     _zzq_args[2] = (unsigned long long int)(_zzq_arg2);           \
+@@ -459,32 +554,37 @@
+     _zzq_args[4] = (unsigned long long int)(_zzq_arg4);           \
+     _zzq_args[5] = (unsigned long long int)(_zzq_arg5);           \
+     _zzq_ptr = _zzq_args;                                         \
+-    __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE               \
++    __asm__ volatile("mr 3,%1\n\t" /*default*/                    \
++                     "mr 4,%2\n\t" /*ptr*/                        \
++                     __SPECIAL_INSTRUCTION_PREAMBLE               \
+                      /* %R3 = client_request ( %R4 ) */           \
+-                     "or 1,1,1"                                   \
+-                     : "=r" (_zzq_result)                         \
+-                     : "0" (_zzq_default), "r" (_zzq_ptr)         \
+-                     : "cc", "memory");                           \
+-    _zzq_rlval = _zzq_result;                                     \
+-  }
++                     "or 1,1,1\n\t"                               \
++                     "mr %0,3"     /*result*/                     \
++                     : "=b" (_zzq_result)                         \
++                     : "b" (_zzq_default), "b" (_zzq_ptr)         \
++                     : "cc", "memory", "r3", "r4");               \
++    _zzq_result;                                                  \
++  })
+ 
+ #define VALGRIND_GET_NR_CONTEXT(_zzq_rlval)                       \
+   { volatile OrigFn* _zzq_orig = &(_zzq_rlval);                   \
+-    register unsigned long long int __addr __asm__("r3");         \
++    unsigned long long int __addr;                                \
+     __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE               \
+                      /* %R3 = guest_NRADDR */                     \
+-                     "or 2,2,2"                                   \
+-                     : "=r" (__addr)                              \
++                     "or 2,2,2\n\t"                               \
++                     "mr %0,3"                                    \
++                     : "=b" (__addr)                              \
+                      :                                            \
+-                     : "cc", "memory"                             \
++                     : "cc", "memory", "r3"                       \
+                     );                                            \
+     _zzq_orig->nraddr = __addr;                                   \
+     __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE               \
+                      /* %R3 = guest_NRADDR_GPR2 */                \
+-                     "or 4,4,4"                                   \
+-                     : "=r" (__addr)                              \
++                     "or 4,4,4\n\t"                               \
++                     "mr %0,3"                                    \
++                     : "=b" (__addr)                              \
+                      :                                            \
+-                     : "cc", "memory"                             \
++                     : "cc", "memory", "r3"                       \
+                     );                                            \
+     _zzq_orig->r2 = __addr;                                       \
+   }
+@@ -494,210 +594,227 @@
+                      /* branch-and-link-to-noredir *%R11 */       \
+                      "or 3,3,3\n\t"
+ 
+-#endif /* PLAT_ppc64_linux */
++#define VALGRIND_VEX_INJECT_IR()                                 \
++ do {                                                            \
++    __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE              \
++                     "or 5,5,5\n\t"                              \
++                    );                                           \
++ } while (0)
+ 
+-/* ------------------------- arm-linux ------------------------- */
++#endif /* PLAT_ppc64be_linux */
+ 
+-#if defined(PLAT_arm_linux)
++#if defined(PLAT_ppc64le_linux)
+ 
+ typedef
+-   struct { 
+-      unsigned int nraddr; /* where's the code? */
++   struct {
++      unsigned long long int nraddr; /* where's the code? */
++      unsigned long long int r2;     /* what tocptr do we need? */
+    }
+    OrigFn;
+ 
+ #define __SPECIAL_INSTRUCTION_PREAMBLE                            \
+-            "mov r12, r12, ror #3  ; mov r12, r12, ror #13 \n\t"  \
+-            "mov r12, r12, ror #29 ; mov r12, r12, ror #19 \n\t"
++                     "rotldi 0,0,3  ; rotldi 0,0,13\n\t"          \
++                     "rotldi 0,0,61 ; rotldi 0,0,51\n\t"
+ 
+-#define VALGRIND_DO_CLIENT_REQUEST(                               \
+-        _zzq_rlval, _zzq_default, _zzq_request,                   \
++#define VALGRIND_DO_CLIENT_REQUEST_EXPR(                          \
++        _zzq_default, _zzq_request,                               \
+         _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5)    \
+                                                                   \
+-  { volatile unsigned int  _zzq_args[6];                          \
+-    volatile unsigned int  _zzq_result;                           \
+-    _zzq_args[0] = (unsigned int)(_zzq_request);                  \
+-    _zzq_args[1] = (unsigned int)(_zzq_arg1);                     \
+-    _zzq_args[2] = (unsigned int)(_zzq_arg2);                     \
+-    _zzq_args[3] = (unsigned int)(_zzq_arg3);                     \
+-    _zzq_args[4] = (unsigned int)(_zzq_arg4);                     \
+-    _zzq_args[5] = (unsigned int)(_zzq_arg5);                     \
+-    __asm__ volatile("mov r3, %1\n\t" /*default*/                 \
+-                     "mov r4, %2\n\t" /*ptr*/                     \
++  __extension__                                                   \
++  ({         unsigned long long int  _zzq_args[6];                \
++             unsigned long long int  _zzq_result;                 \
++             unsigned long long int* _zzq_ptr;                    \
++    _zzq_args[0] = (unsigned long long int)(_zzq_request);        \
++    _zzq_args[1] = (unsigned long long int)(_zzq_arg1);           \
++    _zzq_args[2] = (unsigned long long int)(_zzq_arg2);           \
++    _zzq_args[3] = (unsigned long long int)(_zzq_arg3);           \
++    _zzq_args[4] = (unsigned long long int)(_zzq_arg4);           \
++    _zzq_args[5] = (unsigned long long int)(_zzq_arg5);           \
++    _zzq_ptr = _zzq_args;                                         \
++    __asm__ volatile("mr 3,%1\n\t" /*default*/                    \
++                     "mr 4,%2\n\t" /*ptr*/                        \
+                      __SPECIAL_INSTRUCTION_PREAMBLE               \
+-                     /* R3 = client_request ( R4 ) */             \
+-                     "orr r10, r10, r10\n\t"                      \
+-                     "mov %0, r3"     /*result*/                  \
+-                     : "=r" (_zzq_result)                         \
+-                     : "r" (_zzq_default), "r" (&_zzq_args[0])    \
+-                     : "cc","memory", "r3", "r4");                \
+-    _zzq_rlval = _zzq_result;                                     \
+-  }
++                     /* %R3 = client_request ( %R4 ) */           \
++                     "or 1,1,1\n\t"                               \
++                     "mr %0,3"     /*result*/                     \
++                     : "=b" (_zzq_result)                         \
++                     : "b" (_zzq_default), "b" (_zzq_ptr)         \
++                     : "cc", "memory", "r3", "r4");               \
++    _zzq_result;                                                  \
++  })
+ 
+ #define VALGRIND_GET_NR_CONTEXT(_zzq_rlval)                       \
+   { volatile OrigFn* _zzq_orig = &(_zzq_rlval);                   \
+-    unsigned int __addr;                                          \
++    unsigned long long int __addr;                                \
+     __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE               \
+-                     /* R3 = guest_NRADDR */                      \
+-                     "orr r11, r11, r11\n\t"                      \
+-                     "mov %0, r3"                                 \
+-                     : "=r" (__addr)                              \
++                     /* %R3 = guest_NRADDR */                     \
++                     "or 2,2,2\n\t"                               \
++                     "mr %0,3"                                    \
++                     : "=b" (__addr)                              \
+                      :                                            \
+                      : "cc", "memory", "r3"                       \
+                     );                                            \
+     _zzq_orig->nraddr = __addr;                                   \
++    __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE               \
++                     /* %R3 = guest_NRADDR_GPR2 */                \
++                     "or 4,4,4\n\t"                               \
++                     "mr %0,3"                                    \
++                     : "=b" (__addr)                              \
++                     :                                            \
++                     : "cc", "memory", "r3"                       \
++                    );                                            \
++    _zzq_orig->r2 = __addr;                                       \
+   }
+ 
+-#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4                    \
++#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R12                   \
+                      __SPECIAL_INSTRUCTION_PREAMBLE               \
+-                     /* branch-and-link-to-noredir *%R4 */        \
+-                     "orr r12, r12, r12\n\t"
++                     /* branch-and-link-to-noredir *%R12 */       \
++                     "or 3,3,3\n\t"
+ 
+-#endif /* PLAT_arm_linux */
++#define VALGRIND_VEX_INJECT_IR()                                 \
++ do {                                                            \
++    __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE              \
++                     "or 5,5,5\n\t"                              \
++                    );                                           \
++ } while (0)
+ 
+-/* ------------------------ ppc32-aix5 ------------------------- */
++#endif /* PLAT_ppc64le_linux */
++
++/* ------------------------- arm-linux ------------------------- */
+ 
+-#if defined(PLAT_ppc32_aix5)
++#if defined(PLAT_arm_linux)
+ 
+ typedef
+    struct { 
+       unsigned int nraddr; /* where's the code? */
+-      unsigned int r2;  /* what tocptr do we need? */
+    }
+    OrigFn;
+ 
+ #define __SPECIAL_INSTRUCTION_PREAMBLE                            \
+-                     "rlwinm 0,0,3,0,0  ; rlwinm 0,0,13,0,0\n\t"  \
+-                     "rlwinm 0,0,29,0,0 ; rlwinm 0,0,19,0,0\n\t"
++            "mov r12, r12, ror #3  ; mov r12, r12, ror #13 \n\t"  \
++            "mov r12, r12, ror #29 ; mov r12, r12, ror #19 \n\t"
+ 
+-#define VALGRIND_DO_CLIENT_REQUEST(                               \
+-        _zzq_rlval, _zzq_default, _zzq_request,                   \
++#define VALGRIND_DO_CLIENT_REQUEST_EXPR(                          \
++        _zzq_default, _zzq_request,                               \
+         _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5)    \
+                                                                   \
+-  {          unsigned int  _zzq_args[7];                          \
+-    register unsigned int  _zzq_result;                           \
+-    register unsigned int* _zzq_ptr;                              \
++  __extension__                                                   \
++  ({volatile unsigned int  _zzq_args[6];                          \
++    volatile unsigned int  _zzq_result;                           \
+     _zzq_args[0] = (unsigned int)(_zzq_request);                  \
+     _zzq_args[1] = (unsigned int)(_zzq_arg1);                     \
+     _zzq_args[2] = (unsigned int)(_zzq_arg2);                     \
+     _zzq_args[3] = (unsigned int)(_zzq_arg3);                     \
+     _zzq_args[4] = (unsigned int)(_zzq_arg4);                     \
+     _zzq_args[5] = (unsigned int)(_zzq_arg5);                     \
+-    _zzq_args[6] = (unsigned int)(_zzq_default);                  \
+-    _zzq_ptr = _zzq_args;                                         \
+-    __asm__ volatile("mr 4,%1\n\t"                                \
+-                     "lwz 3, 24(4)\n\t"                           \
++    __asm__ volatile("mov r3, %1\n\t" /*default*/                 \
++                     "mov r4, %2\n\t" /*ptr*/                     \
+                      __SPECIAL_INSTRUCTION_PREAMBLE               \
+-                     /* %R3 = client_request ( %R4 ) */           \
+-                     "or 1,1,1\n\t"                               \
+-                     "mr %0,3"                                    \
+-                     : "=b" (_zzq_result)                         \
+-                     : "b" (_zzq_ptr)                             \
+-                     : "r3", "r4", "cc", "memory");               \
+-    _zzq_rlval = _zzq_result;                                     \
+-  }
++                     /* R3 = client_request ( R4 ) */             \
++                     "orr r10, r10, r10\n\t"                      \
++                     "mov %0, r3"     /*result*/                  \
++                     : "=r" (_zzq_result)                         \
++                     : "r" (_zzq_default), "r" (&_zzq_args[0])    \
++                     : "cc","memory", "r3", "r4");                \
++    _zzq_result;                                                  \
++  })
+ 
+ #define VALGRIND_GET_NR_CONTEXT(_zzq_rlval)                       \
+   { volatile OrigFn* _zzq_orig = &(_zzq_rlval);                   \
+-    register unsigned int __addr;                                 \
++    unsigned int __addr;                                          \
+     __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE               \
+-                     /* %R3 = guest_NRADDR */                     \
+-                     "or 2,2,2\n\t"                               \
+-                     "mr %0,3"                                    \
+-                     : "=b" (__addr)                              \
++                     /* R3 = guest_NRADDR */                      \
++                     "orr r11, r11, r11\n\t"                      \
++                     "mov %0, r3"                                 \
++                     : "=r" (__addr)                              \
+                      :                                            \
+-                     : "r3", "cc", "memory"                       \
++                     : "cc", "memory", "r3"                       \
+                     );                                            \
+     _zzq_orig->nraddr = __addr;                                   \
+-    __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE               \
+-                     /* %R3 = guest_NRADDR_GPR2 */                \
+-                     "or 4,4,4\n\t"                               \
+-                     "mr %0,3"                                    \
+-                     : "=b" (__addr)                              \
+-                     :                                            \
+-                     : "r3", "cc", "memory"                       \
+-                    );                                            \
+-    _zzq_orig->r2 = __addr;                                       \
+   }
+ 
+-#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                   \
++#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4                    \
+                      __SPECIAL_INSTRUCTION_PREAMBLE               \
+-                     /* branch-and-link-to-noredir *%R11 */       \
+-                     "or 3,3,3\n\t"
++                     /* branch-and-link-to-noredir *%R4 */        \
++                     "orr r12, r12, r12\n\t"
+ 
+-#endif /* PLAT_ppc32_aix5 */
++#define VALGRIND_VEX_INJECT_IR()                                 \
++ do {                                                            \
++    __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE              \
++                     "orr r9, r9, r9\n\t"                        \
++                     : : : "cc", "memory"                        \
++                    );                                           \
++ } while (0)
++
++#endif /* PLAT_arm_linux */
+ 
+-/* ------------------------ ppc64-aix5 ------------------------- */
++/* ------------------------ arm64-linux ------------------------- */
+ 
+-#if defined(PLAT_ppc64_aix5)
++#if defined(PLAT_arm64_linux)
+ 
+ typedef
+    struct { 
+       unsigned long long int nraddr; /* where's the code? */
+-      unsigned long long int r2;  /* what tocptr do we need? */
+    }
+    OrigFn;
+ 
+ #define __SPECIAL_INSTRUCTION_PREAMBLE                            \
+-                     "rotldi 0,0,3  ; rotldi 0,0,13\n\t"          \
+-                     "rotldi 0,0,61 ; rotldi 0,0,51\n\t"
++            "ror x12, x12, #3  ;  ror x12, x12, #13 \n\t"         \
++            "ror x12, x12, #51 ;  ror x12, x12, #61 \n\t"
+ 
+-#define VALGRIND_DO_CLIENT_REQUEST(                               \
+-        _zzq_rlval, _zzq_default, _zzq_request,                   \
++#define VALGRIND_DO_CLIENT_REQUEST_EXPR(                          \
++        _zzq_default, _zzq_request,                               \
+         _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5)    \
+                                                                   \
+-  {          unsigned long long int  _zzq_args[7];                \
+-    register unsigned long long int  _zzq_result;                 \
+-    register unsigned long long int* _zzq_ptr;                    \
+-    _zzq_args[0] = (unsigned int long long)(_zzq_request);        \
+-    _zzq_args[1] = (unsigned int long long)(_zzq_arg1);           \
+-    _zzq_args[2] = (unsigned int long long)(_zzq_arg2);           \
+-    _zzq_args[3] = (unsigned int long long)(_zzq_arg3);           \
+-    _zzq_args[4] = (unsigned int long long)(_zzq_arg4);           \
+-    _zzq_args[5] = (unsigned int long long)(_zzq_arg5);           \
+-    _zzq_args[6] = (unsigned int long long)(_zzq_default);        \
+-    _zzq_ptr = _zzq_args;                                         \
+-    __asm__ volatile("mr 4,%1\n\t"                                \
+-                     "ld 3, 48(4)\n\t"                            \
++  __extension__                                                   \
++  ({volatile unsigned long long int  _zzq_args[6];                \
++    volatile unsigned long long int  _zzq_result;                 \
++    _zzq_args[0] = (unsigned long long int)(_zzq_request);        \
++    _zzq_args[1] = (unsigned long long int)(_zzq_arg1);           \
++    _zzq_args[2] = (unsigned long long int)(_zzq_arg2);           \
++    _zzq_args[3] = (unsigned long long int)(_zzq_arg3);           \
++    _zzq_args[4] = (unsigned long long int)(_zzq_arg4);           \
++    _zzq_args[5] = (unsigned long long int)(_zzq_arg5);           \
++    __asm__ volatile("mov x3, %1\n\t" /*default*/                 \
++                     "mov x4, %2\n\t" /*ptr*/                     \
+                      __SPECIAL_INSTRUCTION_PREAMBLE               \
+-                     /* %R3 = client_request ( %R4 ) */           \
+-                     "or 1,1,1\n\t"                               \
+-                     "mr %0,3"                                    \
+-                     : "=b" (_zzq_result)                         \
+-                     : "b" (_zzq_ptr)                             \
+-                     : "r3", "r4", "cc", "memory");               \
+-    _zzq_rlval = _zzq_result;                                     \
+-  }
++                     /* X3 = client_request ( X4 ) */             \
++                     "orr x10, x10, x10\n\t"                      \
++                     "mov %0, x3"     /*result*/                  \
++                     : "=r" (_zzq_result)                         \
++                     : "r" (_zzq_default), "r" (&_zzq_args[0])    \
++                     : "cc","memory", "x3", "x4");                \
++    _zzq_result;                                                  \
++  })
+ 
+ #define VALGRIND_GET_NR_CONTEXT(_zzq_rlval)                       \
+   { volatile OrigFn* _zzq_orig = &(_zzq_rlval);                   \
+-    register unsigned long long int __addr;                       \
++    unsigned long long int __addr;                                \
+     __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE               \
+-                     /* %R3 = guest_NRADDR */                     \
+-                     "or 2,2,2\n\t"                               \
+-                     "mr %0,3"                                    \
+-                     : "=b" (__addr)                              \
++                     /* X3 = guest_NRADDR */                      \
++                     "orr x11, x11, x11\n\t"                      \
++                     "mov %0, x3"                                 \
++                     : "=r" (__addr)                              \
+                      :                                            \
+-                     : "r3", "cc", "memory"                       \
++                     : "cc", "memory", "x3"                       \
+                     );                                            \
+     _zzq_orig->nraddr = __addr;                                   \
+-    __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE               \
+-                     /* %R3 = guest_NRADDR_GPR2 */                \
+-                     "or 4,4,4\n\t"                               \
+-                     "mr %0,3"                                    \
+-                     : "=b" (__addr)                              \
+-                     :                                            \
+-                     : "r3", "cc", "memory"                       \
+-                    );                                            \
+-    _zzq_orig->r2 = __addr;                                       \
+   }
+ 
+-#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                   \
++#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_X8                    \
+                      __SPECIAL_INSTRUCTION_PREAMBLE               \
+-                     /* branch-and-link-to-noredir *%R11 */       \
+-                     "or 3,3,3\n\t"
++                     /* branch-and-link-to-noredir X8 */          \
++                     "orr x12, x12, x12\n\t"
++
++#define VALGRIND_VEX_INJECT_IR()                                 \
++ do {                                                            \
++    __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE              \
++                     "orr x9, x9, x9\n\t"                        \
++                     : : : "cc", "memory"                        \
++                    );                                           \
++ } while (0)
+ 
+-#endif /* PLAT_ppc64_aix5 */
++#endif /* PLAT_arm64_linux */
+ 
+ /* ------------------------ s390x-linux ------------------------ */
+ 
+@@ -722,11 +839,13 @@
+ #define __CLIENT_REQUEST_CODE "lr 2,2\n\t"
+ #define __GET_NR_CONTEXT_CODE "lr 3,3\n\t"
+ #define __CALL_NO_REDIR_CODE  "lr 4,4\n\t"
++#define __VEX_INJECT_IR_CODE  "lr 5,5\n\t"
+ 
+-#define VALGRIND_DO_CLIENT_REQUEST(                              \
+-       _zzq_rlval, _zzq_default, _zzq_request,                   \
++#define VALGRIND_DO_CLIENT_REQUEST_EXPR(                         \
++       _zzq_default, _zzq_request,                               \
+        _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5)    \
+- { volatile unsigned long long int _zzq_args[6];                 \
++  __extension__                                                  \
++ ({volatile unsigned long long int _zzq_args[6];                 \
+    volatile unsigned long long int _zzq_result;                  \
+    _zzq_args[0] = (unsigned long long int)(_zzq_request);        \
+    _zzq_args[1] = (unsigned long long int)(_zzq_arg1);           \
+@@ -735,39 +854,187 @@
+    _zzq_args[4] = (unsigned long long int)(_zzq_arg4);           \
+    _zzq_args[5] = (unsigned long long int)(_zzq_arg5);           \
+    __asm__ volatile(/* r2 = args */                              \
+-		    "lgr 2,%1\n\t"                               \
+-		    /* r3 = default */                           \
+-		    "lgr 3,%2\n\t"                               \
+-		    __SPECIAL_INSTRUCTION_PREAMBLE               \
+-		    __CLIENT_REQUEST_CODE                        \
+-		    /* results = r3 */                           \
+-		    "lgr %0, 3\n\t"                              \
+-		    : "=d" (_zzq_result)                         \
+-		    : "a" (&_zzq_args[0]), "0" (_zzq_default)    \
+-		    : "cc", "2", "3", "memory"                   \
+-		   );                                            \
+-   _zzq_rlval = _zzq_result;                                     \
+- }
++                    "lgr 2,%1\n\t"                               \
++                    /* r3 = default */                           \
++                    "lgr 3,%2\n\t"                               \
++                    __SPECIAL_INSTRUCTION_PREAMBLE               \
++                    __CLIENT_REQUEST_CODE                        \
++                    /* results = r3 */                           \
++                    "lgr %0, 3\n\t"                              \
++                    : "=d" (_zzq_result)                         \
++                    : "a" (&_zzq_args[0]), "0" (_zzq_default)    \
++                    : "cc", "2", "3", "memory"                   \
++                   );                                            \
++   _zzq_result;                                                  \
++ })
+ 
+ #define VALGRIND_GET_NR_CONTEXT(_zzq_rlval)                      \
+  { volatile OrigFn* _zzq_orig = &(_zzq_rlval);                   \
+    volatile unsigned long long int __addr;                       \
+    __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE               \
+-		    __GET_NR_CONTEXT_CODE                        \
+-		    "lgr %0, 3\n\t"                              \
+-		    : "=a" (__addr)                              \
+-		    :                                            \
+-		    : "cc", "3", "memory"                        \
+-		   );                                            \
++                    __GET_NR_CONTEXT_CODE                        \
++                    "lgr %0, 3\n\t"                              \
++                    : "=a" (__addr)                              \
++                    :                                            \
++                    : "cc", "3", "memory"                        \
++                   );                                            \
+    _zzq_orig->nraddr = __addr;                                   \
+  }
+ 
+ #define VALGRIND_CALL_NOREDIR_R1                                 \
+-		    __SPECIAL_INSTRUCTION_PREAMBLE               \
++                    __SPECIAL_INSTRUCTION_PREAMBLE               \
+                     __CALL_NO_REDIR_CODE
+ 
++#define VALGRIND_VEX_INJECT_IR()                                 \
++ do {                                                            \
++    __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE              \
++                     __VEX_INJECT_IR_CODE);                      \
++ } while (0)
++
+ #endif /* PLAT_s390x_linux */
+ 
++/* ------------------------- mips32-linux ---------------- */
++
++#if defined(PLAT_mips32_linux)
++
++typedef
++   struct { 
++      unsigned int nraddr; /* where's the code? */
++   }
++   OrigFn;
++
++/* .word  0x342
++ * .word  0x742
++ * .word  0xC2
++ * .word  0x4C2*/
++#define __SPECIAL_INSTRUCTION_PREAMBLE          \
++                     "srl $0, $0, 13\n\t"       \
++                     "srl $0, $0, 29\n\t"       \
++                     "srl $0, $0, 3\n\t"        \
++                     "srl $0, $0, 19\n\t"
++                    
++#define VALGRIND_DO_CLIENT_REQUEST_EXPR(                          \
++       _zzq_default, _zzq_request,                                \
++       _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5)     \
++  __extension__                                                   \
++  ({ volatile unsigned int _zzq_args[6];                          \
++    volatile unsigned int _zzq_result;                            \
++    _zzq_args[0] = (unsigned int)(_zzq_request);                  \
++    _zzq_args[1] = (unsigned int)(_zzq_arg1);                     \
++    _zzq_args[2] = (unsigned int)(_zzq_arg2);                     \
++    _zzq_args[3] = (unsigned int)(_zzq_arg3);                     \
++    _zzq_args[4] = (unsigned int)(_zzq_arg4);                     \
++    _zzq_args[5] = (unsigned int)(_zzq_arg5);                     \
++        __asm__ volatile("move $11, %1\n\t" /*default*/           \
++                     "move $12, %2\n\t" /*ptr*/                   \
++                     __SPECIAL_INSTRUCTION_PREAMBLE               \
++                     /* T3 = client_request ( T4 ) */             \
++                     "or $13, $13, $13\n\t"                       \
++                     "move %0, $11\n\t"     /*result*/            \
++                     : "=r" (_zzq_result)                         \
++                     : "r" (_zzq_default), "r" (&_zzq_args[0])    \
++                     : "$11", "$12");                             \
++    _zzq_result;                                                  \
++  })
++
++#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval)                       \
++  { volatile OrigFn* _zzq_orig = &(_zzq_rlval);                   \
++    volatile unsigned int __addr;                                 \
++    __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE               \
++                     /* %t9 = guest_NRADDR */                     \
++                     "or $14, $14, $14\n\t"                       \
++                     "move %0, $11"     /*result*/                \
++                     : "=r" (__addr)                              \
++                     :                                            \
++                     : "$11"                                      \
++                    );                                            \
++    _zzq_orig->nraddr = __addr;                                   \
++  }
++
++#define VALGRIND_CALL_NOREDIR_T9                                 \
++                     __SPECIAL_INSTRUCTION_PREAMBLE              \
++                     /* call-noredir *%t9 */                     \
++                     "or $15, $15, $15\n\t"
++
++#define VALGRIND_VEX_INJECT_IR()                                 \
++ do {                                                            \
++    __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE              \
++                     "or $11, $11, $11\n\t"                      \
++                    );                                           \
++ } while (0)
++
++
++#endif /* PLAT_mips32_linux */
++
++/* ------------------------- mips64-linux ---------------- */
++
++#if defined(PLAT_mips64_linux)
++
++typedef
++   struct {
++      unsigned long long nraddr; /* where's the code? */
++   }
++   OrigFn;
++
++/* dsll $0,$0, 3
++ * dsll $0,$0, 13
++ * dsll $0,$0, 29
++ * dsll $0,$0, 19*/
++#define __SPECIAL_INSTRUCTION_PREAMBLE                              \
++                     "dsll $0,$0, 3 ; dsll $0,$0,13\n\t"            \
++                     "dsll $0,$0,29 ; dsll $0,$0,19\n\t"
++
++#define VALGRIND_DO_CLIENT_REQUEST_EXPR(                            \
++       _zzq_default, _zzq_request,                                  \
++       _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5)       \
++  __extension__                                                     \
++  ({ volatile unsigned long long int _zzq_args[6];                  \
++    volatile unsigned long long int _zzq_result;                    \
++    _zzq_args[0] = (unsigned long long int)(_zzq_request);          \
++    _zzq_args[1] = (unsigned long long int)(_zzq_arg1);             \
++    _zzq_args[2] = (unsigned long long int)(_zzq_arg2);             \
++    _zzq_args[3] = (unsigned long long int)(_zzq_arg3);             \
++    _zzq_args[4] = (unsigned long long int)(_zzq_arg4);             \
++    _zzq_args[5] = (unsigned long long int)(_zzq_arg5);             \
++        __asm__ volatile("move $11, %1\n\t" /*default*/             \
++                         "move $12, %2\n\t" /*ptr*/                 \
++                         __SPECIAL_INSTRUCTION_PREAMBLE             \
++                         /* $11 = client_request ( $12 ) */         \
++                         "or $13, $13, $13\n\t"                     \
++                         "move %0, $11\n\t"     /*result*/          \
++                         : "=r" (_zzq_result)                       \
++                         : "r" (_zzq_default), "r" (&_zzq_args[0])  \
++                         : "$11", "$12");                           \
++    _zzq_result;                                                    \
++  })
++
++#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval)                         \
++  { volatile OrigFn* _zzq_orig = &(_zzq_rlval);                     \
++    volatile unsigned long long int __addr;                         \
++    __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE                 \
++                     /* $11 = guest_NRADDR */                       \
++                     "or $14, $14, $14\n\t"                         \
++                     "move %0, $11"     /*result*/                  \
++                     : "=r" (__addr)                                \
++                     :                                              \
++                     : "$11");                                      \
++    _zzq_orig->nraddr = __addr;                                     \
++  }
++
++#define VALGRIND_CALL_NOREDIR_T9                                    \
++                     __SPECIAL_INSTRUCTION_PREAMBLE                 \
++                     /* call-noredir $25 */                         \
++                     "or $15, $15, $15\n\t"
++
++#define VALGRIND_VEX_INJECT_IR()                                    \
++ do {                                                               \
++    __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE                 \
++                     "or $11, $11, $11\n\t"                         \
++                    );                                              \
++ } while (0)
++
++#endif /* PLAT_mips64_linux */
++
+ /* Insert assembly code for other platforms here... */
+ 
+ #endif /* NVALGRIND */
+@@ -797,17 +1064,20 @@
+ */
+ 
+ /* Use these to write the name of your wrapper.  NOTE: duplicates
+-   VG_WRAP_FUNCTION_Z{U,Z} in pub_tool_redir.h. */
++   VG_WRAP_FUNCTION_Z{U,Z} in pub_tool_redir.h.  NOTE also: inserts
++   the default behaviour equivalance class tag "0000" into the name.
++   See pub_tool_redir.h for details -- normally you don't need to
++   think about this, though. */
+ 
+ /* Use an extra level of macroisation so as to ensure the soname/fnname
+    args are fully macro-expanded before pasting them together. */
+ #define VG_CONCAT4(_aa,_bb,_cc,_dd) _aa##_bb##_cc##_dd
+ 
+ #define I_WRAP_SONAME_FNNAME_ZU(soname,fnname)                    \
+-   VG_CONCAT4(_vgwZU_,soname,_,fnname)
++   VG_CONCAT4(_vgw00000ZU_,soname,_,fnname)
+ 
+ #define I_WRAP_SONAME_FNNAME_ZZ(soname,fnname)                    \
+-   VG_CONCAT4(_vgwZZ_,soname,_,fnname)
++   VG_CONCAT4(_vgw00000ZZ_,soname,_,fnname)
+ 
+ /* Use this macro from within a wrapper function to collect the
+    context (address and possibly other info) of the original function.
+@@ -815,6 +1085,18 @@
+    macros.  The type of the argument _lval is OrigFn. */
+ #define VALGRIND_GET_ORIG_FN(_lval)  VALGRIND_GET_NR_CONTEXT(_lval)
+ 
++/* Also provide end-user facilities for function replacement, rather
++   than wrapping.  A replacement function differs from a wrapper in
++   that it has no way to get hold of the original function being
++   called, and hence no way to call onwards to it.  In a replacement
++   function, VALGRIND_GET_ORIG_FN always returns zero. */
++
++#define I_REPLACE_SONAME_FNNAME_ZU(soname,fnname)                 \
++   VG_CONCAT4(_vgr00000ZU_,soname,_,fnname)
++
++#define I_REPLACE_SONAME_FNNAME_ZZ(soname,fnname)                 \
++   VG_CONCAT4(_vgr00000ZZ_,soname,_,fnname)
++
+ /* Derivatives of the main macros below, for calling functions
+    returning void. */
+ 
+@@ -858,6 +1140,17 @@
+    as gcc can already see that, plus causes gcc to bomb. */
+ #define __CALLER_SAVED_REGS /*"eax"*/ "ecx", "edx"
+ 
++/* Macros to save and align the stack before making a function
++   call and restore it afterwards as gcc may not keep the stack
++   pointer aligned if it doesn't realise calls are being made
++   to other functions. */
++
++#define VALGRIND_ALIGN_STACK               \
++      "movl %%esp,%%edi\n\t"               \
++      "andl $0xfffffff0,%%esp\n\t"
++#define VALGRIND_RESTORE_STACK             \
++      "movl %%edi,%%esp\n\t"
++
+ /* These CALL_FN_ macros assume that on x86-linux, sizeof(unsigned
+    long) == 4. */
+ 
+@@ -868,11 +1161,13 @@
+       volatile unsigned long _res;                                \
+       _argvec[0] = (unsigned long)_orig.nraddr;                   \
+       __asm__ volatile(                                           \
++         VALGRIND_ALIGN_STACK                                     \
+          "movl (%%eax), %%eax\n\t"  /* target->%eax */            \
+          VALGRIND_CALL_NOREDIR_EAX                                \
++         VALGRIND_RESTORE_STACK                                   \
+          : /*out*/   "=a" (_res)                                  \
+          : /*in*/    "a" (&_argvec[0])                            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "edi"   \
+       );                                                          \
+       lval = (__typeof__(lval)) _res;                             \
+    } while (0)
+@@ -885,14 +1180,15 @@
+       _argvec[0] = (unsigned long)_orig.nraddr;                   \
+       _argvec[1] = (unsigned long)(arg1);                         \
+       __asm__ volatile(                                           \
++         VALGRIND_ALIGN_STACK                                     \
+          "subl $12, %%esp\n\t"                                    \
+          "pushl 4(%%eax)\n\t"                                     \
+          "movl (%%eax), %%eax\n\t"  /* target->%eax */            \
+          VALGRIND_CALL_NOREDIR_EAX                                \
+-         "addl $16, %%esp\n"                                      \
++         VALGRIND_RESTORE_STACK                                   \
+          : /*out*/   "=a" (_res)                                  \
+          : /*in*/    "a" (&_argvec[0])                            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "edi"   \
+       );                                                          \
+       lval = (__typeof__(lval)) _res;                             \
+    } while (0)
+@@ -906,15 +1202,16 @@
+       _argvec[1] = (unsigned long)(arg1);                         \
+       _argvec[2] = (unsigned long)(arg2);                         \
+       __asm__ volatile(                                           \
++         VALGRIND_ALIGN_STACK                                     \
+          "subl $8, %%esp\n\t"                                     \
+          "pushl 8(%%eax)\n\t"                                     \
+          "pushl 4(%%eax)\n\t"                                     \
+          "movl (%%eax), %%eax\n\t"  /* target->%eax */            \
+          VALGRIND_CALL_NOREDIR_EAX                                \
+-         "addl $16, %%esp\n"                                      \
++         VALGRIND_RESTORE_STACK                                   \
+          : /*out*/   "=a" (_res)                                  \
+          : /*in*/    "a" (&_argvec[0])                            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "edi"   \
+       );                                                          \
+       lval = (__typeof__(lval)) _res;                             \
+    } while (0)
+@@ -929,16 +1226,17 @@
+       _argvec[2] = (unsigned long)(arg2);                         \
+       _argvec[3] = (unsigned long)(arg3);                         \
+       __asm__ volatile(                                           \
++         VALGRIND_ALIGN_STACK                                     \
+          "subl $4, %%esp\n\t"                                     \
+          "pushl 12(%%eax)\n\t"                                    \
+          "pushl 8(%%eax)\n\t"                                     \
+          "pushl 4(%%eax)\n\t"                                     \
+          "movl (%%eax), %%eax\n\t"  /* target->%eax */            \
+          VALGRIND_CALL_NOREDIR_EAX                                \
+-         "addl $16, %%esp\n"                                      \
++         VALGRIND_RESTORE_STACK                                   \
+          : /*out*/   "=a" (_res)                                  \
+          : /*in*/    "a" (&_argvec[0])                            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "edi"   \
+       );                                                          \
+       lval = (__typeof__(lval)) _res;                             \
+    } while (0)
+@@ -954,16 +1252,17 @@
+       _argvec[3] = (unsigned long)(arg3);                         \
+       _argvec[4] = (unsigned long)(arg4);                         \
+       __asm__ volatile(                                           \
++         VALGRIND_ALIGN_STACK                                     \
+          "pushl 16(%%eax)\n\t"                                    \
+          "pushl 12(%%eax)\n\t"                                    \
+          "pushl 8(%%eax)\n\t"                                     \
+          "pushl 4(%%eax)\n\t"                                     \
+          "movl (%%eax), %%eax\n\t"  /* target->%eax */            \
+          VALGRIND_CALL_NOREDIR_EAX                                \
+-         "addl $16, %%esp\n"                                      \
++         VALGRIND_RESTORE_STACK                                   \
+          : /*out*/   "=a" (_res)                                  \
+          : /*in*/    "a" (&_argvec[0])                            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "edi"   \
+       );                                                          \
+       lval = (__typeof__(lval)) _res;                             \
+    } while (0)
+@@ -980,6 +1279,7 @@
+       _argvec[4] = (unsigned long)(arg4);                         \
+       _argvec[5] = (unsigned long)(arg5);                         \
+       __asm__ volatile(                                           \
++         VALGRIND_ALIGN_STACK                                     \
+          "subl $12, %%esp\n\t"                                    \
+          "pushl 20(%%eax)\n\t"                                    \
+          "pushl 16(%%eax)\n\t"                                    \
+@@ -988,10 +1288,10 @@
+          "pushl 4(%%eax)\n\t"                                     \
+          "movl (%%eax), %%eax\n\t"  /* target->%eax */            \
+          VALGRIND_CALL_NOREDIR_EAX                                \
+-         "addl $32, %%esp\n"                                      \
++         VALGRIND_RESTORE_STACK                                   \
+          : /*out*/   "=a" (_res)                                  \
+          : /*in*/    "a" (&_argvec[0])                            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "edi"   \
+       );                                                          \
+       lval = (__typeof__(lval)) _res;                             \
+    } while (0)
+@@ -1009,6 +1309,7 @@
+       _argvec[5] = (unsigned long)(arg5);                         \
+       _argvec[6] = (unsigned long)(arg6);                         \
+       __asm__ volatile(                                           \
++         VALGRIND_ALIGN_STACK                                     \
+          "subl $8, %%esp\n\t"                                     \
+          "pushl 24(%%eax)\n\t"                                    \
+          "pushl 20(%%eax)\n\t"                                    \
+@@ -1018,10 +1319,10 @@
+          "pushl 4(%%eax)\n\t"                                     \
+          "movl (%%eax), %%eax\n\t"  /* target->%eax */            \
+          VALGRIND_CALL_NOREDIR_EAX                                \
+-         "addl $32, %%esp\n"                                      \
++         VALGRIND_RESTORE_STACK                                   \
+          : /*out*/   "=a" (_res)                                  \
+          : /*in*/    "a" (&_argvec[0])                            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "edi"   \
+       );                                                          \
+       lval = (__typeof__(lval)) _res;                             \
+    } while (0)
+@@ -1041,6 +1342,7 @@
+       _argvec[6] = (unsigned long)(arg6);                         \
+       _argvec[7] = (unsigned long)(arg7);                         \
+       __asm__ volatile(                                           \
++         VALGRIND_ALIGN_STACK                                     \
+          "subl $4, %%esp\n\t"                                     \
+          "pushl 28(%%eax)\n\t"                                    \
+          "pushl 24(%%eax)\n\t"                                    \
+@@ -1051,10 +1353,10 @@
+          "pushl 4(%%eax)\n\t"                                     \
+          "movl (%%eax), %%eax\n\t"  /* target->%eax */            \
+          VALGRIND_CALL_NOREDIR_EAX                                \
+-         "addl $32, %%esp\n"                                      \
++         VALGRIND_RESTORE_STACK                                   \
+          : /*out*/   "=a" (_res)                                  \
+          : /*in*/    "a" (&_argvec[0])                            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "edi"   \
+       );                                                          \
+       lval = (__typeof__(lval)) _res;                             \
+    } while (0)
+@@ -1075,6 +1377,7 @@
+       _argvec[7] = (unsigned long)(arg7);                         \
+       _argvec[8] = (unsigned long)(arg8);                         \
+       __asm__ volatile(                                           \
++         VALGRIND_ALIGN_STACK                                     \
+          "pushl 32(%%eax)\n\t"                                    \
+          "pushl 28(%%eax)\n\t"                                    \
+          "pushl 24(%%eax)\n\t"                                    \
+@@ -1085,10 +1388,10 @@
+          "pushl 4(%%eax)\n\t"                                     \
+          "movl (%%eax), %%eax\n\t"  /* target->%eax */            \
+          VALGRIND_CALL_NOREDIR_EAX                                \
+-         "addl $32, %%esp\n"                                      \
++         VALGRIND_RESTORE_STACK                                   \
+          : /*out*/   "=a" (_res)                                  \
+          : /*in*/    "a" (&_argvec[0])                            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "edi"   \
+       );                                                          \
+       lval = (__typeof__(lval)) _res;                             \
+    } while (0)
+@@ -1110,6 +1413,7 @@
+       _argvec[8] = (unsigned long)(arg8);                         \
+       _argvec[9] = (unsigned long)(arg9);                         \
+       __asm__ volatile(                                           \
++         VALGRIND_ALIGN_STACK                                     \
+          "subl $12, %%esp\n\t"                                    \
+          "pushl 36(%%eax)\n\t"                                    \
+          "pushl 32(%%eax)\n\t"                                    \
+@@ -1122,10 +1426,10 @@
+          "pushl 4(%%eax)\n\t"                                     \
+          "movl (%%eax), %%eax\n\t"  /* target->%eax */            \
+          VALGRIND_CALL_NOREDIR_EAX                                \
+-         "addl $48, %%esp\n"                                      \
++         VALGRIND_RESTORE_STACK                                   \
+          : /*out*/   "=a" (_res)                                  \
+          : /*in*/    "a" (&_argvec[0])                            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "edi"   \
+       );                                                          \
+       lval = (__typeof__(lval)) _res;                             \
+    } while (0)
+@@ -1148,6 +1452,7 @@
+       _argvec[9] = (unsigned long)(arg9);                         \
+       _argvec[10] = (unsigned long)(arg10);                       \
+       __asm__ volatile(                                           \
++         VALGRIND_ALIGN_STACK                                     \
+          "subl $8, %%esp\n\t"                                     \
+          "pushl 40(%%eax)\n\t"                                    \
+          "pushl 36(%%eax)\n\t"                                    \
+@@ -1161,10 +1466,10 @@
+          "pushl 4(%%eax)\n\t"                                     \
+          "movl (%%eax), %%eax\n\t"  /* target->%eax */            \
+          VALGRIND_CALL_NOREDIR_EAX                                \
+-         "addl $48, %%esp\n"                                      \
++         VALGRIND_RESTORE_STACK                                   \
+          : /*out*/   "=a" (_res)                                  \
+          : /*in*/    "a" (&_argvec[0])                            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "edi"   \
+       );                                                          \
+       lval = (__typeof__(lval)) _res;                             \
+    } while (0)
+@@ -1189,6 +1494,7 @@
+       _argvec[10] = (unsigned long)(arg10);                       \
+       _argvec[11] = (unsigned long)(arg11);                       \
+       __asm__ volatile(                                           \
++         VALGRIND_ALIGN_STACK                                     \
+          "subl $4, %%esp\n\t"                                     \
+          "pushl 44(%%eax)\n\t"                                    \
+          "pushl 40(%%eax)\n\t"                                    \
+@@ -1203,10 +1509,10 @@
+          "pushl 4(%%eax)\n\t"                                     \
+          "movl (%%eax), %%eax\n\t"  /* target->%eax */            \
+          VALGRIND_CALL_NOREDIR_EAX                                \
+-         "addl $48, %%esp\n"                                      \
++         VALGRIND_RESTORE_STACK                                   \
+          : /*out*/   "=a" (_res)                                  \
+          : /*in*/    "a" (&_argvec[0])                            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "edi"   \
+       );                                                          \
+       lval = (__typeof__(lval)) _res;                             \
+    } while (0)
+@@ -1232,6 +1538,7 @@
+       _argvec[11] = (unsigned long)(arg11);                       \
+       _argvec[12] = (unsigned long)(arg12);                       \
+       __asm__ volatile(                                           \
++         VALGRIND_ALIGN_STACK                                     \
+          "pushl 48(%%eax)\n\t"                                    \
+          "pushl 44(%%eax)\n\t"                                    \
+          "pushl 40(%%eax)\n\t"                                    \
+@@ -1246,10 +1553,10 @@
+          "pushl 4(%%eax)\n\t"                                     \
+          "movl (%%eax), %%eax\n\t"  /* target->%eax */            \
+          VALGRIND_CALL_NOREDIR_EAX                                \
+-         "addl $48, %%esp\n"                                      \
++         VALGRIND_RESTORE_STACK                                   \
+          : /*out*/   "=a" (_res)                                  \
+          : /*in*/    "a" (&_argvec[0])                            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "edi"   \
+       );                                                          \
+       lval = (__typeof__(lval)) _res;                             \
+    } while (0)
+@@ -1337,6 +1644,16 @@
+ #  define VALGRIND_CFI_EPILOGUE
+ #endif
+ 
++/* Macros to save and align the stack before making a function
++   call and restore it afterwards as gcc may not keep the stack
++   pointer aligned if it doesn't realise calls are being made
++   to other functions. */
++
++#define VALGRIND_ALIGN_STACK               \
++      "movq %%rsp,%%r14\n\t"               \
++      "andq $0xfffffffffffffff0,%%rsp\n\t"
++#define VALGRIND_RESTORE_STACK             \
++      "movq %%r14,%%rsp\n\t"
+ 
+ /* These CALL_FN_ macros assume that on amd64-linux, sizeof(unsigned
+    long) == 8. */
+@@ -1357,437 +1674,444 @@
+    redzone, for the duration of the hidden call, to make it safe.
+ 
+    Probably the same problem afflicts the other redzone-style ABIs too
+-   (ppc64-linux, ppc32-aix5, ppc64-aix5); but for those, the stack is
++   (ppc64-linux); but for those, the stack is
+    self describing (none of this CFI nonsense) so at least messing
+    with the stack pointer doesn't give a danger of non-unwindable
+    stack. */
+ 
+-#define CALL_FN_W_v(lval, orig)                                   \
+-   do {                                                           \
+-      volatile OrigFn        _orig = (orig);                      \
+-      volatile unsigned long _argvec[1];                          \
+-      volatile unsigned long _res;                                \
+-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+-      __asm__ volatile(                                           \
+-         VALGRIND_CFI_PROLOGUE                                    \
+-         "subq $128,%%rsp\n\t"                                    \
+-         "movq (%%rax), %%rax\n\t"  /* target->%rax */            \
+-         VALGRIND_CALL_NOREDIR_RAX                                \
+-         "addq $128,%%rsp\n\t"                                    \
+-         VALGRIND_CFI_EPILOGUE                                    \
+-         : /*out*/   "=a" (_res)                                  \
+-         : /*in*/    "a" (&_argvec[0]) __FRAME_POINTER            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r15"   \
+-      );                                                          \
+-      lval = (__typeof__(lval)) _res;                             \
+-   } while (0)
+-
+-#define CALL_FN_W_W(lval, orig, arg1)                             \
+-   do {                                                           \
+-      volatile OrigFn        _orig = (orig);                      \
+-      volatile unsigned long _argvec[2];                          \
+-      volatile unsigned long _res;                                \
+-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+-      _argvec[1] = (unsigned long)(arg1);                         \
+-      __asm__ volatile(                                           \
+-         VALGRIND_CFI_PROLOGUE                                    \
+-         "subq $128,%%rsp\n\t"                                    \
+-         "movq 8(%%rax), %%rdi\n\t"                               \
+-         "movq (%%rax), %%rax\n\t"  /* target->%rax */            \
+-         VALGRIND_CALL_NOREDIR_RAX                                \
+-         "addq $128,%%rsp\n\t"                                    \
+-         VALGRIND_CFI_EPILOGUE                                    \
+-         : /*out*/   "=a" (_res)                                  \
+-         : /*in*/    "a" (&_argvec[0]) __FRAME_POINTER            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r15"   \
+-      );                                                          \
+-      lval = (__typeof__(lval)) _res;                             \
+-   } while (0)
+-
+-#define CALL_FN_W_WW(lval, orig, arg1,arg2)                       \
+-   do {                                                           \
+-      volatile OrigFn        _orig = (orig);                      \
+-      volatile unsigned long _argvec[3];                          \
+-      volatile unsigned long _res;                                \
+-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+-      _argvec[1] = (unsigned long)(arg1);                         \
+-      _argvec[2] = (unsigned long)(arg2);                         \
+-      __asm__ volatile(                                           \
+-         VALGRIND_CFI_PROLOGUE                                    \
+-         "subq $128,%%rsp\n\t"                                    \
+-         "movq 16(%%rax), %%rsi\n\t"                              \
+-         "movq 8(%%rax), %%rdi\n\t"                               \
+-         "movq (%%rax), %%rax\n\t"  /* target->%rax */            \
+-         VALGRIND_CALL_NOREDIR_RAX                                \
+-         "addq $128,%%rsp\n\t"                                    \
+-         VALGRIND_CFI_EPILOGUE                                    \
+-         : /*out*/   "=a" (_res)                                  \
+-         : /*in*/    "a" (&_argvec[0]) __FRAME_POINTER            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r15"   \
+-      );                                                          \
+-      lval = (__typeof__(lval)) _res;                             \
+-   } while (0)
+-
+-#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3)                 \
+-   do {                                                           \
+-      volatile OrigFn        _orig = (orig);                      \
+-      volatile unsigned long _argvec[4];                          \
+-      volatile unsigned long _res;                                \
+-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+-      _argvec[1] = (unsigned long)(arg1);                         \
+-      _argvec[2] = (unsigned long)(arg2);                         \
+-      _argvec[3] = (unsigned long)(arg3);                         \
+-      __asm__ volatile(                                           \
+-         VALGRIND_CFI_PROLOGUE                                    \
+-         "subq $128,%%rsp\n\t"                                    \
+-         "movq 24(%%rax), %%rdx\n\t"                              \
+-         "movq 16(%%rax), %%rsi\n\t"                              \
+-         "movq 8(%%rax), %%rdi\n\t"                               \
+-         "movq (%%rax), %%rax\n\t"  /* target->%rax */            \
+-         VALGRIND_CALL_NOREDIR_RAX                                \
+-         "addq $128,%%rsp\n\t"                                    \
+-         VALGRIND_CFI_EPILOGUE                                    \
+-         : /*out*/   "=a" (_res)                                  \
+-         : /*in*/    "a" (&_argvec[0]) __FRAME_POINTER            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r15"   \
+-      );                                                          \
+-      lval = (__typeof__(lval)) _res;                             \
+-   } while (0)
+-
+-#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4)           \
+-   do {                                                           \
+-      volatile OrigFn        _orig = (orig);                      \
+-      volatile unsigned long _argvec[5];                          \
+-      volatile unsigned long _res;                                \
+-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+-      _argvec[1] = (unsigned long)(arg1);                         \
+-      _argvec[2] = (unsigned long)(arg2);                         \
+-      _argvec[3] = (unsigned long)(arg3);                         \
+-      _argvec[4] = (unsigned long)(arg4);                         \
+-      __asm__ volatile(                                           \
+-         VALGRIND_CFI_PROLOGUE                                    \
+-         "subq $128,%%rsp\n\t"                                    \
+-         "movq 32(%%rax), %%rcx\n\t"                              \
+-         "movq 24(%%rax), %%rdx\n\t"                              \
+-         "movq 16(%%rax), %%rsi\n\t"                              \
+-         "movq 8(%%rax), %%rdi\n\t"                               \
+-         "movq (%%rax), %%rax\n\t"  /* target->%rax */            \
+-         VALGRIND_CALL_NOREDIR_RAX                                \
+-         "addq $128,%%rsp\n\t"                                    \
+-         VALGRIND_CFI_EPILOGUE                                    \
+-         : /*out*/   "=a" (_res)                                  \
+-         : /*in*/    "a" (&_argvec[0]) __FRAME_POINTER            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r15"   \
+-      );                                                          \
+-      lval = (__typeof__(lval)) _res;                             \
+-   } while (0)
+-
+-#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5)        \
+-   do {                                                           \
+-      volatile OrigFn        _orig = (orig);                      \
+-      volatile unsigned long _argvec[6];                          \
+-      volatile unsigned long _res;                                \
+-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+-      _argvec[1] = (unsigned long)(arg1);                         \
+-      _argvec[2] = (unsigned long)(arg2);                         \
+-      _argvec[3] = (unsigned long)(arg3);                         \
+-      _argvec[4] = (unsigned long)(arg4);                         \
+-      _argvec[5] = (unsigned long)(arg5);                         \
+-      __asm__ volatile(                                           \
+-         VALGRIND_CFI_PROLOGUE                                    \
+-         "subq $128,%%rsp\n\t"                                    \
+-         "movq 40(%%rax), %%r8\n\t"                               \
+-         "movq 32(%%rax), %%rcx\n\t"                              \
+-         "movq 24(%%rax), %%rdx\n\t"                              \
+-         "movq 16(%%rax), %%rsi\n\t"                              \
+-         "movq 8(%%rax), %%rdi\n\t"                               \
+-         "movq (%%rax), %%rax\n\t"  /* target->%rax */            \
+-         VALGRIND_CALL_NOREDIR_RAX                                \
+-         "addq $128,%%rsp\n\t"                                    \
+-         VALGRIND_CFI_EPILOGUE                                    \
+-         : /*out*/   "=a" (_res)                                  \
+-         : /*in*/    "a" (&_argvec[0]) __FRAME_POINTER            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r15"   \
+-      );                                                          \
+-      lval = (__typeof__(lval)) _res;                             \
+-   } while (0)
+-
+-#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6)   \
+-   do {                                                           \
+-      volatile OrigFn        _orig = (orig);                      \
+-      volatile unsigned long _argvec[7];                          \
+-      volatile unsigned long _res;                                \
+-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+-      _argvec[1] = (unsigned long)(arg1);                         \
+-      _argvec[2] = (unsigned long)(arg2);                         \
+-      _argvec[3] = (unsigned long)(arg3);                         \
+-      _argvec[4] = (unsigned long)(arg4);                         \
+-      _argvec[5] = (unsigned long)(arg5);                         \
+-      _argvec[6] = (unsigned long)(arg6);                         \
+-      __asm__ volatile(                                           \
+-         VALGRIND_CFI_PROLOGUE                                    \
+-         "subq $128,%%rsp\n\t"                                    \
+-         "movq 48(%%rax), %%r9\n\t"                               \
+-         "movq 40(%%rax), %%r8\n\t"                               \
+-         "movq 32(%%rax), %%rcx\n\t"                              \
+-         "movq 24(%%rax), %%rdx\n\t"                              \
+-         "movq 16(%%rax), %%rsi\n\t"                              \
+-         "movq 8(%%rax), %%rdi\n\t"                               \
+-         "movq (%%rax), %%rax\n\t"  /* target->%rax */            \
+-         VALGRIND_CALL_NOREDIR_RAX                                \
+-         "addq $128,%%rsp\n\t"                                    \
+-         VALGRIND_CFI_EPILOGUE                                    \
+-         : /*out*/   "=a" (_res)                                  \
+-         : /*in*/    "a" (&_argvec[0]) __FRAME_POINTER            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r15"   \
+-      );                                                          \
+-      lval = (__typeof__(lval)) _res;                             \
+-   } while (0)
+-
+-#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
+-                                 arg7)                            \
+-   do {                                                           \
+-      volatile OrigFn        _orig = (orig);                      \
+-      volatile unsigned long _argvec[8];                          \
+-      volatile unsigned long _res;                                \
+-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+-      _argvec[1] = (unsigned long)(arg1);                         \
+-      _argvec[2] = (unsigned long)(arg2);                         \
+-      _argvec[3] = (unsigned long)(arg3);                         \
+-      _argvec[4] = (unsigned long)(arg4);                         \
+-      _argvec[5] = (unsigned long)(arg5);                         \
+-      _argvec[6] = (unsigned long)(arg6);                         \
+-      _argvec[7] = (unsigned long)(arg7);                         \
+-      __asm__ volatile(                                           \
+-         VALGRIND_CFI_PROLOGUE                                    \
+-         "subq $136,%%rsp\n\t"                                    \
+-         "pushq 56(%%rax)\n\t"                                    \
+-         "movq 48(%%rax), %%r9\n\t"                               \
+-         "movq 40(%%rax), %%r8\n\t"                               \
+-         "movq 32(%%rax), %%rcx\n\t"                              \
+-         "movq 24(%%rax), %%rdx\n\t"                              \
+-         "movq 16(%%rax), %%rsi\n\t"                              \
+-         "movq 8(%%rax), %%rdi\n\t"                               \
+-         "movq (%%rax), %%rax\n\t"  /* target->%rax */            \
+-         VALGRIND_CALL_NOREDIR_RAX                                \
+-         "addq $8, %%rsp\n"                                       \
+-         "addq $136,%%rsp\n\t"                                    \
+-         VALGRIND_CFI_EPILOGUE                                    \
+-         : /*out*/   "=a" (_res)                                  \
+-         : /*in*/    "a" (&_argvec[0]) __FRAME_POINTER            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r15"   \
+-      );                                                          \
+-      lval = (__typeof__(lval)) _res;                             \
+-   } while (0)
+-
+-#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
+-                                 arg7,arg8)                       \
+-   do {                                                           \
+-      volatile OrigFn        _orig = (orig);                      \
+-      volatile unsigned long _argvec[9];                          \
+-      volatile unsigned long _res;                                \
+-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+-      _argvec[1] = (unsigned long)(arg1);                         \
+-      _argvec[2] = (unsigned long)(arg2);                         \
+-      _argvec[3] = (unsigned long)(arg3);                         \
+-      _argvec[4] = (unsigned long)(arg4);                         \
+-      _argvec[5] = (unsigned long)(arg5);                         \
+-      _argvec[6] = (unsigned long)(arg6);                         \
+-      _argvec[7] = (unsigned long)(arg7);                         \
+-      _argvec[8] = (unsigned long)(arg8);                         \
+-      __asm__ volatile(                                           \
+-         VALGRIND_CFI_PROLOGUE                                    \
+-         "subq $128,%%rsp\n\t"                                    \
+-         "pushq 64(%%rax)\n\t"                                    \
+-         "pushq 56(%%rax)\n\t"                                    \
+-         "movq 48(%%rax), %%r9\n\t"                               \
+-         "movq 40(%%rax), %%r8\n\t"                               \
+-         "movq 32(%%rax), %%rcx\n\t"                              \
+-         "movq 24(%%rax), %%rdx\n\t"                              \
+-         "movq 16(%%rax), %%rsi\n\t"                              \
+-         "movq 8(%%rax), %%rdi\n\t"                               \
+-         "movq (%%rax), %%rax\n\t"  /* target->%rax */            \
+-         VALGRIND_CALL_NOREDIR_RAX                                \
+-         "addq $16, %%rsp\n"                                      \
+-         "addq $128,%%rsp\n\t"                                    \
+-         VALGRIND_CFI_EPILOGUE                                    \
+-         : /*out*/   "=a" (_res)                                  \
+-         : /*in*/    "a" (&_argvec[0]) __FRAME_POINTER            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r15"   \
+-      );                                                          \
+-      lval = (__typeof__(lval)) _res;                             \
+-   } while (0)
+-
+-#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
+-                                 arg7,arg8,arg9)                  \
+-   do {                                                           \
+-      volatile OrigFn        _orig = (orig);                      \
+-      volatile unsigned long _argvec[10];                         \
+-      volatile unsigned long _res;                                \
+-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+-      _argvec[1] = (unsigned long)(arg1);                         \
+-      _argvec[2] = (unsigned long)(arg2);                         \
+-      _argvec[3] = (unsigned long)(arg3);                         \
+-      _argvec[4] = (unsigned long)(arg4);                         \
+-      _argvec[5] = (unsigned long)(arg5);                         \
+-      _argvec[6] = (unsigned long)(arg6);                         \
+-      _argvec[7] = (unsigned long)(arg7);                         \
+-      _argvec[8] = (unsigned long)(arg8);                         \
+-      _argvec[9] = (unsigned long)(arg9);                         \
+-      __asm__ volatile(                                           \
+-         VALGRIND_CFI_PROLOGUE                                    \
+-         "subq $136,%%rsp\n\t"                                    \
+-         "pushq 72(%%rax)\n\t"                                    \
+-         "pushq 64(%%rax)\n\t"                                    \
+-         "pushq 56(%%rax)\n\t"                                    \
+-         "movq 48(%%rax), %%r9\n\t"                               \
+-         "movq 40(%%rax), %%r8\n\t"                               \
+-         "movq 32(%%rax), %%rcx\n\t"                              \
+-         "movq 24(%%rax), %%rdx\n\t"                              \
+-         "movq 16(%%rax), %%rsi\n\t"                              \
+-         "movq 8(%%rax), %%rdi\n\t"                               \
+-         "movq (%%rax), %%rax\n\t"  /* target->%rax */            \
+-         VALGRIND_CALL_NOREDIR_RAX                                \
+-         "addq $24, %%rsp\n"                                      \
+-         "addq $136,%%rsp\n\t"                                    \
+-         VALGRIND_CFI_EPILOGUE                                    \
+-         : /*out*/   "=a" (_res)                                  \
+-         : /*in*/    "a" (&_argvec[0]) __FRAME_POINTER            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r15"   \
+-      );                                                          \
+-      lval = (__typeof__(lval)) _res;                             \
+-   } while (0)
+-
+-#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
+-                                  arg7,arg8,arg9,arg10)           \
+-   do {                                                           \
+-      volatile OrigFn        _orig = (orig);                      \
+-      volatile unsigned long _argvec[11];                         \
+-      volatile unsigned long _res;                                \
+-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+-      _argvec[1] = (unsigned long)(arg1);                         \
+-      _argvec[2] = (unsigned long)(arg2);                         \
+-      _argvec[3] = (unsigned long)(arg3);                         \
+-      _argvec[4] = (unsigned long)(arg4);                         \
+-      _argvec[5] = (unsigned long)(arg5);                         \
+-      _argvec[6] = (unsigned long)(arg6);                         \
+-      _argvec[7] = (unsigned long)(arg7);                         \
+-      _argvec[8] = (unsigned long)(arg8);                         \
+-      _argvec[9] = (unsigned long)(arg9);                         \
+-      _argvec[10] = (unsigned long)(arg10);                       \
+-      __asm__ volatile(                                           \
+-         VALGRIND_CFI_PROLOGUE                                    \
+-         "subq $128,%%rsp\n\t"                                    \
+-         "pushq 80(%%rax)\n\t"                                    \
+-         "pushq 72(%%rax)\n\t"                                    \
+-         "pushq 64(%%rax)\n\t"                                    \
+-         "pushq 56(%%rax)\n\t"                                    \
+-         "movq 48(%%rax), %%r9\n\t"                               \
+-         "movq 40(%%rax), %%r8\n\t"                               \
+-         "movq 32(%%rax), %%rcx\n\t"                              \
+-         "movq 24(%%rax), %%rdx\n\t"                              \
+-         "movq 16(%%rax), %%rsi\n\t"                              \
+-         "movq 8(%%rax), %%rdi\n\t"                               \
+-         "movq (%%rax), %%rax\n\t"  /* target->%rax */            \
+-         VALGRIND_CALL_NOREDIR_RAX                                \
+-         "addq $32, %%rsp\n"                                      \
+-         "addq $128,%%rsp\n\t"                                    \
+-         VALGRIND_CFI_EPILOGUE                                    \
+-         : /*out*/   "=a" (_res)                                  \
+-         : /*in*/    "a" (&_argvec[0]) __FRAME_POINTER            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r15"   \
+-      );                                                          \
+-      lval = (__typeof__(lval)) _res;                             \
+-   } while (0)
+-
+-#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
+-                                  arg7,arg8,arg9,arg10,arg11)     \
+-   do {                                                           \
+-      volatile OrigFn        _orig = (orig);                      \
+-      volatile unsigned long _argvec[12];                         \
+-      volatile unsigned long _res;                                \
+-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+-      _argvec[1] = (unsigned long)(arg1);                         \
+-      _argvec[2] = (unsigned long)(arg2);                         \
+-      _argvec[3] = (unsigned long)(arg3);                         \
+-      _argvec[4] = (unsigned long)(arg4);                         \
+-      _argvec[5] = (unsigned long)(arg5);                         \
+-      _argvec[6] = (unsigned long)(arg6);                         \
+-      _argvec[7] = (unsigned long)(arg7);                         \
+-      _argvec[8] = (unsigned long)(arg8);                         \
+-      _argvec[9] = (unsigned long)(arg9);                         \
+-      _argvec[10] = (unsigned long)(arg10);                       \
+-      _argvec[11] = (unsigned long)(arg11);                       \
+-      __asm__ volatile(                                           \
+-         VALGRIND_CFI_PROLOGUE                                    \
+-         "subq $136,%%rsp\n\t"                                    \
+-         "pushq 88(%%rax)\n\t"                                    \
+-         "pushq 80(%%rax)\n\t"                                    \
+-         "pushq 72(%%rax)\n\t"                                    \
+-         "pushq 64(%%rax)\n\t"                                    \
+-         "pushq 56(%%rax)\n\t"                                    \
+-         "movq 48(%%rax), %%r9\n\t"                               \
+-         "movq 40(%%rax), %%r8\n\t"                               \
+-         "movq 32(%%rax), %%rcx\n\t"                              \
+-         "movq 24(%%rax), %%rdx\n\t"                              \
+-         "movq 16(%%rax), %%rsi\n\t"                              \
+-         "movq 8(%%rax), %%rdi\n\t"                               \
+-         "movq (%%rax), %%rax\n\t"  /* target->%rax */            \
+-         VALGRIND_CALL_NOREDIR_RAX                                \
+-         "addq $40, %%rsp\n"                                      \
+-         "addq $136,%%rsp\n\t"                                    \
+-         VALGRIND_CFI_EPILOGUE                                    \
+-         : /*out*/   "=a" (_res)                                  \
+-         : /*in*/    "a" (&_argvec[0]) __FRAME_POINTER            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r15"   \
+-      );                                                          \
+-      lval = (__typeof__(lval)) _res;                             \
+-   } while (0)
+-
+-#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
+-                                arg7,arg8,arg9,arg10,arg11,arg12) \
+-   do {                                                           \
+-      volatile OrigFn        _orig = (orig);                      \
+-      volatile unsigned long _argvec[13];                         \
+-      volatile unsigned long _res;                                \
+-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+-      _argvec[1] = (unsigned long)(arg1);                         \
+-      _argvec[2] = (unsigned long)(arg2);                         \
+-      _argvec[3] = (unsigned long)(arg3);                         \
+-      _argvec[4] = (unsigned long)(arg4);                         \
+-      _argvec[5] = (unsigned long)(arg5);                         \
+-      _argvec[6] = (unsigned long)(arg6);                         \
+-      _argvec[7] = (unsigned long)(arg7);                         \
+-      _argvec[8] = (unsigned long)(arg8);                         \
+-      _argvec[9] = (unsigned long)(arg9);                         \
+-      _argvec[10] = (unsigned long)(arg10);                       \
+-      _argvec[11] = (unsigned long)(arg11);                       \
+-      _argvec[12] = (unsigned long)(arg12);                       \
+-      __asm__ volatile(                                           \
+-         VALGRIND_CFI_PROLOGUE                                    \
+-         "subq $128,%%rsp\n\t"                                    \
+-         "pushq 96(%%rax)\n\t"                                    \
+-         "pushq 88(%%rax)\n\t"                                    \
+-         "pushq 80(%%rax)\n\t"                                    \
+-         "pushq 72(%%rax)\n\t"                                    \
+-         "pushq 64(%%rax)\n\t"                                    \
+-         "pushq 56(%%rax)\n\t"                                    \
+-         "movq 48(%%rax), %%r9\n\t"                               \
+-         "movq 40(%%rax), %%r8\n\t"                               \
+-         "movq 32(%%rax), %%rcx\n\t"                              \
+-         "movq 24(%%rax), %%rdx\n\t"                              \
+-         "movq 16(%%rax), %%rsi\n\t"                              \
+-         "movq 8(%%rax), %%rdi\n\t"                               \
+-         "movq (%%rax), %%rax\n\t"  /* target->%rax */            \
+-         VALGRIND_CALL_NOREDIR_RAX                                \
+-         "addq $48, %%rsp\n"                                      \
+-         "addq $128,%%rsp\n\t"                                    \
+-         VALGRIND_CFI_EPILOGUE                                    \
+-         : /*out*/   "=a" (_res)                                  \
+-         : /*in*/    "a" (&_argvec[0]) __FRAME_POINTER            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r15"   \
+-      );                                                          \
+-      lval = (__typeof__(lval)) _res;                             \
++#define CALL_FN_W_v(lval, orig)                                        \
++   do {                                                                \
++      volatile OrigFn        _orig = (orig);                           \
++      volatile unsigned long _argvec[1];                               \
++      volatile unsigned long _res;                                     \
++      _argvec[0] = (unsigned long)_orig.nraddr;                        \
++      __asm__ volatile(                                                \
++         VALGRIND_CFI_PROLOGUE                                         \
++         VALGRIND_ALIGN_STACK                                          \
++         "subq $128,%%rsp\n\t"                                         \
++         "movq (%%rax), %%rax\n\t"  /* target->%rax */                 \
++         VALGRIND_CALL_NOREDIR_RAX                                     \
++         VALGRIND_RESTORE_STACK                                        \
++         VALGRIND_CFI_EPILOGUE                                         \
++         : /*out*/   "=a" (_res)                                       \
++         : /*in*/    "a" (&_argvec[0]) __FRAME_POINTER                 \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r14", "r15" \
++      );                                                               \
++      lval = (__typeof__(lval)) _res;                                  \
++   } while (0)
++
++#define CALL_FN_W_W(lval, orig, arg1)                                  \
++   do {                                                                \
++      volatile OrigFn        _orig = (orig);                           \
++      volatile unsigned long _argvec[2];                               \
++      volatile unsigned long _res;                                     \
++      _argvec[0] = (unsigned long)_orig.nraddr;                        \
++      _argvec[1] = (unsigned long)(arg1);                              \
++      __asm__ volatile(                                                \
++         VALGRIND_CFI_PROLOGUE                                         \
++         VALGRIND_ALIGN_STACK                                          \
++         "subq $128,%%rsp\n\t"                                         \
++         "movq 8(%%rax), %%rdi\n\t"                                    \
++         "movq (%%rax), %%rax\n\t"  /* target->%rax */                 \
++         VALGRIND_CALL_NOREDIR_RAX                                     \
++         VALGRIND_RESTORE_STACK                                        \
++         VALGRIND_CFI_EPILOGUE                                         \
++         : /*out*/   "=a" (_res)                                       \
++         : /*in*/    "a" (&_argvec[0]) __FRAME_POINTER                 \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r14", "r15" \
++      );                                                               \
++      lval = (__typeof__(lval)) _res;                                  \
++   } while (0)
++
++#define CALL_FN_W_WW(lval, orig, arg1,arg2)                            \
++   do {                                                                \
++      volatile OrigFn        _orig = (orig);                           \
++      volatile unsigned long _argvec[3];                               \
++      volatile unsigned long _res;                                     \
++      _argvec[0] = (unsigned long)_orig.nraddr;                        \
++      _argvec[1] = (unsigned long)(arg1);                              \
++      _argvec[2] = (unsigned long)(arg2);                              \
++      __asm__ volatile(                                                \
++         VALGRIND_CFI_PROLOGUE                                         \
++         VALGRIND_ALIGN_STACK                                          \
++         "subq $128,%%rsp\n\t"                                         \
++         "movq 16(%%rax), %%rsi\n\t"                                   \
++         "movq 8(%%rax), %%rdi\n\t"                                    \
++         "movq (%%rax), %%rax\n\t"  /* target->%rax */                 \
++         VALGRIND_CALL_NOREDIR_RAX                                     \
++         VALGRIND_RESTORE_STACK                                        \
++         VALGRIND_CFI_EPILOGUE                                         \
++         : /*out*/   "=a" (_res)                                       \
++         : /*in*/    "a" (&_argvec[0]) __FRAME_POINTER                 \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r14", "r15" \
++      );                                                               \
++      lval = (__typeof__(lval)) _res;                                  \
++   } while (0)
++
++#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3)                      \
++   do {                                                                \
++      volatile OrigFn        _orig = (orig);                           \
++      volatile unsigned long _argvec[4];                               \
++      volatile unsigned long _res;                                     \
++      _argvec[0] = (unsigned long)_orig.nraddr;                        \
++      _argvec[1] = (unsigned long)(arg1);                              \
++      _argvec[2] = (unsigned long)(arg2);                              \
++      _argvec[3] = (unsigned long)(arg3);                              \
++      __asm__ volatile(                                                \
++         VALGRIND_CFI_PROLOGUE                                         \
++         VALGRIND_ALIGN_STACK                                          \
++         "subq $128,%%rsp\n\t"                                         \
++         "movq 24(%%rax), %%rdx\n\t"                                   \
++         "movq 16(%%rax), %%rsi\n\t"                                   \
++         "movq 8(%%rax), %%rdi\n\t"                                    \
++         "movq (%%rax), %%rax\n\t"  /* target->%rax */                 \
++         VALGRIND_CALL_NOREDIR_RAX                                     \
++         VALGRIND_RESTORE_STACK                                        \
++         VALGRIND_CFI_EPILOGUE                                         \
++         : /*out*/   "=a" (_res)                                       \
++         : /*in*/    "a" (&_argvec[0]) __FRAME_POINTER                 \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r14", "r15" \
++      );                                                               \
++      lval = (__typeof__(lval)) _res;                                  \
++   } while (0)
++
++#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4)                \
++   do {                                                                \
++      volatile OrigFn        _orig = (orig);                           \
++      volatile unsigned long _argvec[5];                               \
++      volatile unsigned long _res;                                     \
++      _argvec[0] = (unsigned long)_orig.nraddr;                        \
++      _argvec[1] = (unsigned long)(arg1);                              \
++      _argvec[2] = (unsigned long)(arg2);                              \
++      _argvec[3] = (unsigned long)(arg3);                              \
++      _argvec[4] = (unsigned long)(arg4);                              \
++      __asm__ volatile(                                                \
++         VALGRIND_CFI_PROLOGUE                                         \
++         VALGRIND_ALIGN_STACK                                          \
++         "subq $128,%%rsp\n\t"                                         \
++         "movq 32(%%rax), %%rcx\n\t"                                   \
++         "movq 24(%%rax), %%rdx\n\t"                                   \
++         "movq 16(%%rax), %%rsi\n\t"                                   \
++         "movq 8(%%rax), %%rdi\n\t"                                    \
++         "movq (%%rax), %%rax\n\t"  /* target->%rax */                 \
++         VALGRIND_CALL_NOREDIR_RAX                                     \
++         VALGRIND_RESTORE_STACK                                        \
++         VALGRIND_CFI_EPILOGUE                                         \
++         : /*out*/   "=a" (_res)                                       \
++         : /*in*/    "a" (&_argvec[0]) __FRAME_POINTER                 \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r14", "r15" \
++      );                                                               \
++      lval = (__typeof__(lval)) _res;                                  \
++   } while (0)
++
++#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5)             \
++   do {                                                                \
++      volatile OrigFn        _orig = (orig);                           \
++      volatile unsigned long _argvec[6];                               \
++      volatile unsigned long _res;                                     \
++      _argvec[0] = (unsigned long)_orig.nraddr;                        \
++      _argvec[1] = (unsigned long)(arg1);                              \
++      _argvec[2] = (unsigned long)(arg2);                              \
++      _argvec[3] = (unsigned long)(arg3);                              \
++      _argvec[4] = (unsigned long)(arg4);                              \
++      _argvec[5] = (unsigned long)(arg5);                              \
++      __asm__ volatile(                                                \
++         VALGRIND_CFI_PROLOGUE                                         \
++         VALGRIND_ALIGN_STACK                                          \
++         "subq $128,%%rsp\n\t"                                         \
++         "movq 40(%%rax), %%r8\n\t"                                    \
++         "movq 32(%%rax), %%rcx\n\t"                                   \
++         "movq 24(%%rax), %%rdx\n\t"                                   \
++         "movq 16(%%rax), %%rsi\n\t"                                   \
++         "movq 8(%%rax), %%rdi\n\t"                                    \
++         "movq (%%rax), %%rax\n\t"  /* target->%rax */                 \
++         VALGRIND_CALL_NOREDIR_RAX                                     \
++         VALGRIND_RESTORE_STACK                                        \
++         VALGRIND_CFI_EPILOGUE                                         \
++         : /*out*/   "=a" (_res)                                       \
++         : /*in*/    "a" (&_argvec[0]) __FRAME_POINTER                 \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r14", "r15" \
++      );                                                               \
++      lval = (__typeof__(lval)) _res;                                  \
++   } while (0)
++
++#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6)        \
++   do {                                                                \
++      volatile OrigFn        _orig = (orig);                           \
++      volatile unsigned long _argvec[7];                               \
++      volatile unsigned long _res;                                     \
++      _argvec[0] = (unsigned long)_orig.nraddr;                        \
++      _argvec[1] = (unsigned long)(arg1);                              \
++      _argvec[2] = (unsigned long)(arg2);                              \
++      _argvec[3] = (unsigned long)(arg3);                              \
++      _argvec[4] = (unsigned long)(arg4);                              \
++      _argvec[5] = (unsigned long)(arg5);                              \
++      _argvec[6] = (unsigned long)(arg6);                              \
++      __asm__ volatile(                                                \
++         VALGRIND_CFI_PROLOGUE                                         \
++         VALGRIND_ALIGN_STACK                                          \
++         "subq $128,%%rsp\n\t"                                         \
++         "movq 48(%%rax), %%r9\n\t"                                    \
++         "movq 40(%%rax), %%r8\n\t"                                    \
++         "movq 32(%%rax), %%rcx\n\t"                                   \
++         "movq 24(%%rax), %%rdx\n\t"                                   \
++         "movq 16(%%rax), %%rsi\n\t"                                   \
++         "movq 8(%%rax), %%rdi\n\t"                                    \
++         "movq (%%rax), %%rax\n\t"  /* target->%rax */                 \
++         VALGRIND_CALL_NOREDIR_RAX                                     \
++         VALGRIND_RESTORE_STACK                                        \
++         VALGRIND_CFI_EPILOGUE                                         \
++         : /*out*/   "=a" (_res)                                       \
++         : /*in*/    "a" (&_argvec[0]) __FRAME_POINTER                 \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r14", "r15" \
++      );                                                               \
++      lval = (__typeof__(lval)) _res;                                  \
++   } while (0)
++
++#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,        \
++                                 arg7)                                 \
++   do {                                                                \
++      volatile OrigFn        _orig = (orig);                           \
++      volatile unsigned long _argvec[8];                               \
++      volatile unsigned long _res;                                     \
++      _argvec[0] = (unsigned long)_orig.nraddr;                        \
++      _argvec[1] = (unsigned long)(arg1);                              \
++      _argvec[2] = (unsigned long)(arg2);                              \
++      _argvec[3] = (unsigned long)(arg3);                              \
++      _argvec[4] = (unsigned long)(arg4);                              \
++      _argvec[5] = (unsigned long)(arg5);                              \
++      _argvec[6] = (unsigned long)(arg6);                              \
++      _argvec[7] = (unsigned long)(arg7);                              \
++      __asm__ volatile(                                                \
++         VALGRIND_CFI_PROLOGUE                                         \
++         VALGRIND_ALIGN_STACK                                          \
++         "subq $136,%%rsp\n\t"                                         \
++         "pushq 56(%%rax)\n\t"                                         \
++         "movq 48(%%rax), %%r9\n\t"                                    \
++         "movq 40(%%rax), %%r8\n\t"                                    \
++         "movq 32(%%rax), %%rcx\n\t"                                   \
++         "movq 24(%%rax), %%rdx\n\t"                                   \
++         "movq 16(%%rax), %%rsi\n\t"                                   \
++         "movq 8(%%rax), %%rdi\n\t"                                    \
++         "movq (%%rax), %%rax\n\t"  /* target->%rax */                 \
++         VALGRIND_CALL_NOREDIR_RAX                                     \
++         VALGRIND_RESTORE_STACK                                        \
++         VALGRIND_CFI_EPILOGUE                                         \
++         : /*out*/   "=a" (_res)                                       \
++         : /*in*/    "a" (&_argvec[0]) __FRAME_POINTER                 \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r14", "r15" \
++      );                                                               \
++      lval = (__typeof__(lval)) _res;                                  \
++   } while (0)
++
++#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,        \
++                                 arg7,arg8)                            \
++   do {                                                                \
++      volatile OrigFn        _orig = (orig);                           \
++      volatile unsigned long _argvec[9];                               \
++      volatile unsigned long _res;                                     \
++      _argvec[0] = (unsigned long)_orig.nraddr;                        \
++      _argvec[1] = (unsigned long)(arg1);                              \
++      _argvec[2] = (unsigned long)(arg2);                              \
++      _argvec[3] = (unsigned long)(arg3);                              \
++      _argvec[4] = (unsigned long)(arg4);                              \
++      _argvec[5] = (unsigned long)(arg5);                              \
++      _argvec[6] = (unsigned long)(arg6);                              \
++      _argvec[7] = (unsigned long)(arg7);                              \
++      _argvec[8] = (unsigned long)(arg8);                              \
++      __asm__ volatile(                                                \
++         VALGRIND_CFI_PROLOGUE                                         \
++         VALGRIND_ALIGN_STACK                                          \
++         "subq $128,%%rsp\n\t"                                         \
++         "pushq 64(%%rax)\n\t"                                         \
++         "pushq 56(%%rax)\n\t"                                         \
++         "movq 48(%%rax), %%r9\n\t"                                    \
++         "movq 40(%%rax), %%r8\n\t"                                    \
++         "movq 32(%%rax), %%rcx\n\t"                                   \
++         "movq 24(%%rax), %%rdx\n\t"                                   \
++         "movq 16(%%rax), %%rsi\n\t"                                   \
++         "movq 8(%%rax), %%rdi\n\t"                                    \
++         "movq (%%rax), %%rax\n\t"  /* target->%rax */                 \
++         VALGRIND_CALL_NOREDIR_RAX                                     \
++         VALGRIND_RESTORE_STACK                                        \
++         VALGRIND_CFI_EPILOGUE                                         \
++         : /*out*/   "=a" (_res)                                       \
++         : /*in*/    "a" (&_argvec[0]) __FRAME_POINTER                 \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r14", "r15" \
++      );                                                               \
++      lval = (__typeof__(lval)) _res;                                  \
++   } while (0)
++
++#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,        \
++                                 arg7,arg8,arg9)                       \
++   do {                                                                \
++      volatile OrigFn        _orig = (orig);                           \
++      volatile unsigned long _argvec[10];                              \
++      volatile unsigned long _res;                                     \
++      _argvec[0] = (unsigned long)_orig.nraddr;                        \
++      _argvec[1] = (unsigned long)(arg1);                              \
++      _argvec[2] = (unsigned long)(arg2);                              \
++      _argvec[3] = (unsigned long)(arg3);                              \
++      _argvec[4] = (unsigned long)(arg4);                              \
++      _argvec[5] = (unsigned long)(arg5);                              \
++      _argvec[6] = (unsigned long)(arg6);                              \
++      _argvec[7] = (unsigned long)(arg7);                              \
++      _argvec[8] = (unsigned long)(arg8);                              \
++      _argvec[9] = (unsigned long)(arg9);                              \
++      __asm__ volatile(                                                \
++         VALGRIND_CFI_PROLOGUE                                         \
++         VALGRIND_ALIGN_STACK                                          \
++         "subq $136,%%rsp\n\t"                                         \
++         "pushq 72(%%rax)\n\t"                                         \
++         "pushq 64(%%rax)\n\t"                                         \
++         "pushq 56(%%rax)\n\t"                                         \
++         "movq 48(%%rax), %%r9\n\t"                                    \
++         "movq 40(%%rax), %%r8\n\t"                                    \
++         "movq 32(%%rax), %%rcx\n\t"                                   \
++         "movq 24(%%rax), %%rdx\n\t"                                   \
++         "movq 16(%%rax), %%rsi\n\t"                                   \
++         "movq 8(%%rax), %%rdi\n\t"                                    \
++         "movq (%%rax), %%rax\n\t"  /* target->%rax */                 \
++         VALGRIND_CALL_NOREDIR_RAX                                     \
++         VALGRIND_RESTORE_STACK                                        \
++         VALGRIND_CFI_EPILOGUE                                         \
++         : /*out*/   "=a" (_res)                                       \
++         : /*in*/    "a" (&_argvec[0]) __FRAME_POINTER                 \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r14", "r15" \
++      );                                                               \
++      lval = (__typeof__(lval)) _res;                                  \
++   } while (0)
++
++#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,       \
++                                  arg7,arg8,arg9,arg10)                \
++   do {                                                                \
++      volatile OrigFn        _orig = (orig);                           \
++      volatile unsigned long _argvec[11];                              \
++      volatile unsigned long _res;                                     \
++      _argvec[0] = (unsigned long)_orig.nraddr;                        \
++      _argvec[1] = (unsigned long)(arg1);                              \
++      _argvec[2] = (unsigned long)(arg2);                              \
++      _argvec[3] = (unsigned long)(arg3);                              \
++      _argvec[4] = (unsigned long)(arg4);                              \
++      _argvec[5] = (unsigned long)(arg5);                              \
++      _argvec[6] = (unsigned long)(arg6);                              \
++      _argvec[7] = (unsigned long)(arg7);                              \
++      _argvec[8] = (unsigned long)(arg8);                              \
++      _argvec[9] = (unsigned long)(arg9);                              \
++      _argvec[10] = (unsigned long)(arg10);                            \
++      __asm__ volatile(                                                \
++         VALGRIND_CFI_PROLOGUE                                         \
++         VALGRIND_ALIGN_STACK                                          \
++         "subq $128,%%rsp\n\t"                                         \
++         "pushq 80(%%rax)\n\t"                                         \
++         "pushq 72(%%rax)\n\t"                                         \
++         "pushq 64(%%rax)\n\t"                                         \
++         "pushq 56(%%rax)\n\t"                                         \
++         "movq 48(%%rax), %%r9\n\t"                                    \
++         "movq 40(%%rax), %%r8\n\t"                                    \
++         "movq 32(%%rax), %%rcx\n\t"                                   \
++         "movq 24(%%rax), %%rdx\n\t"                                   \
++         "movq 16(%%rax), %%rsi\n\t"                                   \
++         "movq 8(%%rax), %%rdi\n\t"                                    \
++         "movq (%%rax), %%rax\n\t"  /* target->%rax */                 \
++         VALGRIND_CALL_NOREDIR_RAX                                     \
++         VALGRIND_RESTORE_STACK                                        \
++         VALGRIND_CFI_EPILOGUE                                         \
++         : /*out*/   "=a" (_res)                                       \
++         : /*in*/    "a" (&_argvec[0]) __FRAME_POINTER                 \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r14", "r15" \
++      );                                                               \
++      lval = (__typeof__(lval)) _res;                                  \
++   } while (0)
++
++#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,       \
++                                  arg7,arg8,arg9,arg10,arg11)          \
++   do {                                                                \
++      volatile OrigFn        _orig = (orig);                           \
++      volatile unsigned long _argvec[12];                              \
++      volatile unsigned long _res;                                     \
++      _argvec[0] = (unsigned long)_orig.nraddr;                        \
++      _argvec[1] = (unsigned long)(arg1);                              \
++      _argvec[2] = (unsigned long)(arg2);                              \
++      _argvec[3] = (unsigned long)(arg3);                              \
++      _argvec[4] = (unsigned long)(arg4);                              \
++      _argvec[5] = (unsigned long)(arg5);                              \
++      _argvec[6] = (unsigned long)(arg6);                              \
++      _argvec[7] = (unsigned long)(arg7);                              \
++      _argvec[8] = (unsigned long)(arg8);                              \
++      _argvec[9] = (unsigned long)(arg9);                              \
++      _argvec[10] = (unsigned long)(arg10);                            \
++      _argvec[11] = (unsigned long)(arg11);                            \
++      __asm__ volatile(                                                \
++         VALGRIND_CFI_PROLOGUE                                         \
++         VALGRIND_ALIGN_STACK                                          \
++         "subq $136,%%rsp\n\t"                                         \
++         "pushq 88(%%rax)\n\t"                                         \
++         "pushq 80(%%rax)\n\t"                                         \
++         "pushq 72(%%rax)\n\t"                                         \
++         "pushq 64(%%rax)\n\t"                                         \
++         "pushq 56(%%rax)\n\t"                                         \
++         "movq 48(%%rax), %%r9\n\t"                                    \
++         "movq 40(%%rax), %%r8\n\t"                                    \
++         "movq 32(%%rax), %%rcx\n\t"                                   \
++         "movq 24(%%rax), %%rdx\n\t"                                   \
++         "movq 16(%%rax), %%rsi\n\t"                                   \
++         "movq 8(%%rax), %%rdi\n\t"                                    \
++         "movq (%%rax), %%rax\n\t"  /* target->%rax */                 \
++         VALGRIND_CALL_NOREDIR_RAX                                     \
++         VALGRIND_RESTORE_STACK                                        \
++         VALGRIND_CFI_EPILOGUE                                         \
++         : /*out*/   "=a" (_res)                                       \
++         : /*in*/    "a" (&_argvec[0]) __FRAME_POINTER                 \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r14", "r15" \
++      );                                                               \
++      lval = (__typeof__(lval)) _res;                                  \
++   } while (0)
++
++#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,       \
++                                arg7,arg8,arg9,arg10,arg11,arg12)      \
++   do {                                                                \
++      volatile OrigFn        _orig = (orig);                           \
++      volatile unsigned long _argvec[13];                              \
++      volatile unsigned long _res;                                     \
++      _argvec[0] = (unsigned long)_orig.nraddr;                        \
++      _argvec[1] = (unsigned long)(arg1);                              \
++      _argvec[2] = (unsigned long)(arg2);                              \
++      _argvec[3] = (unsigned long)(arg3);                              \
++      _argvec[4] = (unsigned long)(arg4);                              \
++      _argvec[5] = (unsigned long)(arg5);                              \
++      _argvec[6] = (unsigned long)(arg6);                              \
++      _argvec[7] = (unsigned long)(arg7);                              \
++      _argvec[8] = (unsigned long)(arg8);                              \
++      _argvec[9] = (unsigned long)(arg9);                              \
++      _argvec[10] = (unsigned long)(arg10);                            \
++      _argvec[11] = (unsigned long)(arg11);                            \
++      _argvec[12] = (unsigned long)(arg12);                            \
++      __asm__ volatile(                                                \
++         VALGRIND_CFI_PROLOGUE                                         \
++         VALGRIND_ALIGN_STACK                                          \
++         "subq $128,%%rsp\n\t"                                         \
++         "pushq 96(%%rax)\n\t"                                         \
++         "pushq 88(%%rax)\n\t"                                         \
++         "pushq 80(%%rax)\n\t"                                         \
++         "pushq 72(%%rax)\n\t"                                         \
++         "pushq 64(%%rax)\n\t"                                         \
++         "pushq 56(%%rax)\n\t"                                         \
++         "movq 48(%%rax), %%r9\n\t"                                    \
++         "movq 40(%%rax), %%r8\n\t"                                    \
++         "movq 32(%%rax), %%rcx\n\t"                                   \
++         "movq 24(%%rax), %%rdx\n\t"                                   \
++         "movq 16(%%rax), %%rsi\n\t"                                   \
++         "movq 8(%%rax), %%rdi\n\t"                                    \
++         "movq (%%rax), %%rax\n\t"  /* target->%rax */                 \
++         VALGRIND_CALL_NOREDIR_RAX                                     \
++         VALGRIND_RESTORE_STACK                                        \
++         VALGRIND_CFI_EPILOGUE                                         \
++         : /*out*/   "=a" (_res)                                       \
++         : /*in*/    "a" (&_argvec[0]) __FRAME_POINTER                 \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r14", "r15" \
++      );                                                               \
++      lval = (__typeof__(lval)) _res;                                  \
+    } while (0)
+ 
+ #endif /* PLAT_amd64_linux || PLAT_amd64_darwin */
+@@ -1826,6 +2150,17 @@
+    "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10",   \
+    "r11", "r12", "r13"
+ 
++/* Macros to save and align the stack before making a function
++   call and restore it afterwards as gcc may not keep the stack
++   pointer aligned if it doesn't realise calls are being made
++   to other functions. */
++
++#define VALGRIND_ALIGN_STACK               \
++      "mr 28,1\n\t"                        \
++      "rlwinm 1,1,0,0,27\n\t"
++#define VALGRIND_RESTORE_STACK             \
++      "mr 1,28\n\t"
++
+ /* These CALL_FN_ macros assume that on ppc32-linux, 
+    sizeof(unsigned long) == 4. */
+ 
+@@ -1836,13 +2171,15 @@
+       volatile unsigned long _res;                                \
+       _argvec[0] = (unsigned long)_orig.nraddr;                   \
+       __asm__ volatile(                                           \
++         VALGRIND_ALIGN_STACK                                     \
+          "mr 11,%1\n\t"                                           \
+          "lwz 11,0(11)\n\t"  /* target->r11 */                    \
+          VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
++         VALGRIND_RESTORE_STACK                                   \
+          "mr %0,3"                                                \
+          : /*out*/   "=r" (_res)                                  \
+          : /*in*/    "r" (&_argvec[0])                            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28"   \
+       );                                                          \
+       lval = (__typeof__(lval)) _res;                             \
+    } while (0)
+@@ -1855,14 +2192,16 @@
+       _argvec[0] = (unsigned long)_orig.nraddr;                   \
+       _argvec[1] = (unsigned long)arg1;                           \
+       __asm__ volatile(                                           \
++         VALGRIND_ALIGN_STACK                                     \
+          "mr 11,%1\n\t"                                           \
+          "lwz 3,4(11)\n\t"   /* arg1->r3 */                       \
+          "lwz 11,0(11)\n\t"  /* target->r11 */                    \
+          VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
++         VALGRIND_RESTORE_STACK                                   \
+          "mr %0,3"                                                \
+          : /*out*/   "=r" (_res)                                  \
+          : /*in*/    "r" (&_argvec[0])                            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28"   \
+       );                                                          \
+       lval = (__typeof__(lval)) _res;                             \
+    } while (0)
+@@ -1876,15 +2215,17 @@
+       _argvec[1] = (unsigned long)arg1;                           \
+       _argvec[2] = (unsigned long)arg2;                           \
+       __asm__ volatile(                                           \
++         VALGRIND_ALIGN_STACK                                     \
+          "mr 11,%1\n\t"                                           \
+          "lwz 3,4(11)\n\t"   /* arg1->r3 */                       \
+          "lwz 4,8(11)\n\t"                                        \
+          "lwz 11,0(11)\n\t"  /* target->r11 */                    \
+          VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
++         VALGRIND_RESTORE_STACK                                   \
+          "mr %0,3"                                                \
+          : /*out*/   "=r" (_res)                                  \
+          : /*in*/    "r" (&_argvec[0])                            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28"   \
+       );                                                          \
+       lval = (__typeof__(lval)) _res;                             \
+    } while (0)
+@@ -1899,16 +2240,18 @@
+       _argvec[2] = (unsigned long)arg2;                           \
+       _argvec[3] = (unsigned long)arg3;                           \
+       __asm__ volatile(                                           \
++         VALGRIND_ALIGN_STACK                                     \
+          "mr 11,%1\n\t"                                           \
+          "lwz 3,4(11)\n\t"   /* arg1->r3 */                       \
+          "lwz 4,8(11)\n\t"                                        \
+          "lwz 5,12(11)\n\t"                                       \
+          "lwz 11,0(11)\n\t"  /* target->r11 */                    \
+          VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
++         VALGRIND_RESTORE_STACK                                   \
+          "mr %0,3"                                                \
+          : /*out*/   "=r" (_res)                                  \
+          : /*in*/    "r" (&_argvec[0])                            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28"   \
+       );                                                          \
+       lval = (__typeof__(lval)) _res;                             \
+    } while (0)
+@@ -1924,6 +2267,7 @@
+       _argvec[3] = (unsigned long)arg3;                           \
+       _argvec[4] = (unsigned long)arg4;                           \
+       __asm__ volatile(                                           \
++         VALGRIND_ALIGN_STACK                                     \
+          "mr 11,%1\n\t"                                           \
+          "lwz 3,4(11)\n\t"   /* arg1->r3 */                       \
+          "lwz 4,8(11)\n\t"                                        \
+@@ -1931,10 +2275,11 @@
+          "lwz 6,16(11)\n\t"  /* arg4->r6 */                       \
+          "lwz 11,0(11)\n\t"  /* target->r11 */                    \
+          VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
++         VALGRIND_RESTORE_STACK                                   \
+          "mr %0,3"                                                \
+          : /*out*/   "=r" (_res)                                  \
+          : /*in*/    "r" (&_argvec[0])                            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28"   \
+       );                                                          \
+       lval = (__typeof__(lval)) _res;                             \
+    } while (0)
+@@ -1951,6 +2296,7 @@
+       _argvec[4] = (unsigned long)arg4;                           \
+       _argvec[5] = (unsigned long)arg5;                           \
+       __asm__ volatile(                                           \
++         VALGRIND_ALIGN_STACK                                     \
+          "mr 11,%1\n\t"                                           \
+          "lwz 3,4(11)\n\t"   /* arg1->r3 */                       \
+          "lwz 4,8(11)\n\t"                                        \
+@@ -1959,10 +2305,11 @@
+          "lwz 7,20(11)\n\t"                                       \
+          "lwz 11,0(11)\n\t"  /* target->r11 */                    \
+          VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
++         VALGRIND_RESTORE_STACK                                   \
+          "mr %0,3"                                                \
+          : /*out*/   "=r" (_res)                                  \
+          : /*in*/    "r" (&_argvec[0])                            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28"   \
+       );                                                          \
+       lval = (__typeof__(lval)) _res;                             \
+    } while (0)
+@@ -1980,6 +2327,7 @@
+       _argvec[5] = (unsigned long)arg5;                           \
+       _argvec[6] = (unsigned long)arg6;                           \
+       __asm__ volatile(                                           \
++         VALGRIND_ALIGN_STACK                                     \
+          "mr 11,%1\n\t"                                           \
+          "lwz 3,4(11)\n\t"   /* arg1->r3 */                       \
+          "lwz 4,8(11)\n\t"                                        \
+@@ -1989,10 +2337,11 @@
+          "lwz 8,24(11)\n\t"                                       \
+          "lwz 11,0(11)\n\t"  /* target->r11 */                    \
+          VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
++         VALGRIND_RESTORE_STACK                                   \
+          "mr %0,3"                                                \
+          : /*out*/   "=r" (_res)                                  \
+          : /*in*/    "r" (&_argvec[0])                            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28"   \
+       );                                                          \
+       lval = (__typeof__(lval)) _res;                             \
+    } while (0)
+@@ -2012,6 +2361,7 @@
+       _argvec[6] = (unsigned long)arg6;                           \
+       _argvec[7] = (unsigned long)arg7;                           \
+       __asm__ volatile(                                           \
++         VALGRIND_ALIGN_STACK                                     \
+          "mr 11,%1\n\t"                                           \
+          "lwz 3,4(11)\n\t"   /* arg1->r3 */                       \
+          "lwz 4,8(11)\n\t"                                        \
+@@ -2022,10 +2372,11 @@
+          "lwz 9,28(11)\n\t"                                       \
+          "lwz 11,0(11)\n\t"  /* target->r11 */                    \
+          VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
++         VALGRIND_RESTORE_STACK                                   \
+          "mr %0,3"                                                \
+          : /*out*/   "=r" (_res)                                  \
+          : /*in*/    "r" (&_argvec[0])                            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28"   \
+       );                                                          \
+       lval = (__typeof__(lval)) _res;                             \
+    } while (0)
+@@ -2046,6 +2397,7 @@
+       _argvec[7] = (unsigned long)arg7;                           \
+       _argvec[8] = (unsigned long)arg8;                           \
+       __asm__ volatile(                                           \
++         VALGRIND_ALIGN_STACK                                     \
+          "mr 11,%1\n\t"                                           \
+          "lwz 3,4(11)\n\t"   /* arg1->r3 */                       \
+          "lwz 4,8(11)\n\t"                                        \
+@@ -2057,10 +2409,11 @@
+          "lwz 10,32(11)\n\t" /* arg8->r10 */                      \
+          "lwz 11,0(11)\n\t"  /* target->r11 */                    \
+          VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
++         VALGRIND_RESTORE_STACK                                   \
+          "mr %0,3"                                                \
+          : /*out*/   "=r" (_res)                                  \
+          : /*in*/    "r" (&_argvec[0])                            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28"   \
+       );                                                          \
+       lval = (__typeof__(lval)) _res;                             \
+    } while (0)
+@@ -2082,6 +2435,7 @@
+       _argvec[8] = (unsigned long)arg8;                           \
+       _argvec[9] = (unsigned long)arg9;                           \
+       __asm__ volatile(                                           \
++         VALGRIND_ALIGN_STACK                                     \
+          "mr 11,%1\n\t"                                           \
+          "addi 1,1,-16\n\t"                                       \
+          /* arg9 */                                               \
+@@ -2098,11 +2452,11 @@
+          "lwz 10,32(11)\n\t" /* arg8->r10 */                      \
+          "lwz 11,0(11)\n\t"  /* target->r11 */                    \
+          VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+-         "addi 1,1,16\n\t"                                        \
++         VALGRIND_RESTORE_STACK                                   \
+          "mr %0,3"                                                \
+          : /*out*/   "=r" (_res)                                  \
+          : /*in*/    "r" (&_argvec[0])                            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28"   \
+       );                                                          \
+       lval = (__typeof__(lval)) _res;                             \
+    } while (0)
+@@ -2125,6 +2479,7 @@
+       _argvec[9] = (unsigned long)arg9;                           \
+       _argvec[10] = (unsigned long)arg10;                         \
+       __asm__ volatile(                                           \
++         VALGRIND_ALIGN_STACK                                     \
+          "mr 11,%1\n\t"                                           \
+          "addi 1,1,-16\n\t"                                       \
+          /* arg10 */                                              \
+@@ -2144,11 +2499,11 @@
+          "lwz 10,32(11)\n\t" /* arg8->r10 */                      \
+          "lwz 11,0(11)\n\t"  /* target->r11 */                    \
+          VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+-         "addi 1,1,16\n\t"                                        \
++         VALGRIND_RESTORE_STACK                                   \
+          "mr %0,3"                                                \
+          : /*out*/   "=r" (_res)                                  \
+          : /*in*/    "r" (&_argvec[0])                            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28"   \
+       );                                                          \
+       lval = (__typeof__(lval)) _res;                             \
+    } while (0)
+@@ -2172,6 +2527,7 @@
+       _argvec[10] = (unsigned long)arg10;                         \
+       _argvec[11] = (unsigned long)arg11;                         \
+       __asm__ volatile(                                           \
++         VALGRIND_ALIGN_STACK                                     \
+          "mr 11,%1\n\t"                                           \
+          "addi 1,1,-32\n\t"                                       \
+          /* arg11 */                                              \
+@@ -2194,11 +2550,11 @@
+          "lwz 10,32(11)\n\t" /* arg8->r10 */                      \
+          "lwz 11,0(11)\n\t"  /* target->r11 */                    \
+          VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+-         "addi 1,1,32\n\t"                                        \
++         VALGRIND_RESTORE_STACK                                   \
+          "mr %0,3"                                                \
+          : /*out*/   "=r" (_res)                                  \
+          : /*in*/    "r" (&_argvec[0])                            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28"   \
+       );                                                          \
+       lval = (__typeof__(lval)) _res;                             \
+    } while (0)
+@@ -2223,6 +2579,7 @@
+       _argvec[11] = (unsigned long)arg11;                         \
+       _argvec[12] = (unsigned long)arg12;                         \
+       __asm__ volatile(                                           \
++         VALGRIND_ALIGN_STACK                                     \
+          "mr 11,%1\n\t"                                           \
+          "addi 1,1,-32\n\t"                                       \
+          /* arg12 */                                              \
+@@ -2248,11 +2605,11 @@
+          "lwz 10,32(11)\n\t" /* arg8->r10 */                      \
+          "lwz 11,0(11)\n\t"  /* target->r11 */                    \
+          VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+-         "addi 1,1,32\n\t"                                        \
++         VALGRIND_RESTORE_STACK                                   \
+          "mr %0,3"                                                \
+          : /*out*/   "=r" (_res)                                  \
+          : /*in*/    "r" (&_argvec[0])                            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28"   \
+       );                                                          \
+       lval = (__typeof__(lval)) _res;                             \
+    } while (0)
+@@ -2261,7 +2618,7 @@
+ 
+ /* ------------------------ ppc64-linux ------------------------ */
+ 
+-#if defined(PLAT_ppc64_linux)
++#if defined(PLAT_ppc64be_linux)
+ 
+ /* ARGREGS: r3 r4 r5 r6 r7 r8 r9 r10 (the rest on stack somewhere) */
+ 
+@@ -2272,6 +2629,17 @@
+    "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10",   \
+    "r11", "r12", "r13"
+ 
++/* Macros to save and align the stack before making a function
++   call and restore it afterwards as gcc may not keep the stack
++   pointer aligned if it doesn't realise calls are being made
++   to other functions. */
++
++#define VALGRIND_ALIGN_STACK               \
++      "mr 28,1\n\t"                        \
++      "rldicr 1,1,0,59\n\t"
++#define VALGRIND_RESTORE_STACK             \
++      "mr 1,28\n\t"
++
+ /* These CALL_FN_ macros assume that on ppc64-linux, sizeof(unsigned
+    long) == 8. */
+ 
+@@ -2284,6 +2652,7 @@
+       _argvec[1] = (unsigned long)_orig.r2;                       \
+       _argvec[2] = (unsigned long)_orig.nraddr;                   \
+       __asm__ volatile(                                           \
++         VALGRIND_ALIGN_STACK                                     \
+          "mr 11,%1\n\t"                                           \
+          "std 2,-16(11)\n\t"  /* save tocptr */                   \
+          "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
+@@ -2291,10 +2660,11 @@
+          VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+          "mr 11,%1\n\t"                                           \
+          "mr %0,3\n\t"                                            \
+-         "ld 2,-16(11)" /* restore tocptr */                      \
++         "ld 2,-16(11)\n\t" /* restore tocptr */                  \
++         VALGRIND_RESTORE_STACK                                   \
+          : /*out*/   "=r" (_res)                                  \
+          : /*in*/    "r" (&_argvec[2])                            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28"   \
+       );                                                          \
+       lval = (__typeof__(lval)) _res;                             \
+    } while (0)
+@@ -2309,6 +2679,7 @@
+       _argvec[2]   = (unsigned long)_orig.nraddr;                 \
+       _argvec[2+1] = (unsigned long)arg1;                         \
+       __asm__ volatile(                                           \
++         VALGRIND_ALIGN_STACK                                     \
+          "mr 11,%1\n\t"                                           \
+          "std 2,-16(11)\n\t"  /* save tocptr */                   \
+          "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
+@@ -2317,10 +2688,11 @@
+          VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+          "mr 11,%1\n\t"                                           \
+          "mr %0,3\n\t"                                            \
+-         "ld 2,-16(11)" /* restore tocptr */                      \
++         "ld 2,-16(11)\n\t" /* restore tocptr */                  \
++         VALGRIND_RESTORE_STACK                                   \
+          : /*out*/   "=r" (_res)                                  \
+          : /*in*/    "r" (&_argvec[2])                            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28"   \
+       );                                                          \
+       lval = (__typeof__(lval)) _res;                             \
+    } while (0)
+@@ -2336,6 +2708,7 @@
+       _argvec[2+1] = (unsigned long)arg1;                         \
+       _argvec[2+2] = (unsigned long)arg2;                         \
+       __asm__ volatile(                                           \
++         VALGRIND_ALIGN_STACK                                     \
+          "mr 11,%1\n\t"                                           \
+          "std 2,-16(11)\n\t"  /* save tocptr */                   \
+          "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
+@@ -2345,10 +2718,11 @@
+          VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+          "mr 11,%1\n\t"                                           \
+          "mr %0,3\n\t"                                            \
+-         "ld 2,-16(11)" /* restore tocptr */                      \
++         "ld 2,-16(11)\n\t" /* restore tocptr */                  \
++         VALGRIND_RESTORE_STACK                                   \
+          : /*out*/   "=r" (_res)                                  \
+          : /*in*/    "r" (&_argvec[2])                            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28"   \
+       );                                                          \
+       lval = (__typeof__(lval)) _res;                             \
+    } while (0)
+@@ -2365,6 +2739,7 @@
+       _argvec[2+2] = (unsigned long)arg2;                         \
+       _argvec[2+3] = (unsigned long)arg3;                         \
+       __asm__ volatile(                                           \
++         VALGRIND_ALIGN_STACK                                     \
+          "mr 11,%1\n\t"                                           \
+          "std 2,-16(11)\n\t"  /* save tocptr */                   \
+          "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
+@@ -2375,10 +2750,11 @@
+          VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+          "mr 11,%1\n\t"                                           \
+          "mr %0,3\n\t"                                            \
+-         "ld 2,-16(11)" /* restore tocptr */                      \
++         "ld 2,-16(11)\n\t" /* restore tocptr */                  \
++         VALGRIND_RESTORE_STACK                                   \
+          : /*out*/   "=r" (_res)                                  \
+          : /*in*/    "r" (&_argvec[2])                            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28"   \
+       );                                                          \
+       lval = (__typeof__(lval)) _res;                             \
+    } while (0)
+@@ -2396,6 +2772,7 @@
+       _argvec[2+3] = (unsigned long)arg3;                         \
+       _argvec[2+4] = (unsigned long)arg4;                         \
+       __asm__ volatile(                                           \
++         VALGRIND_ALIGN_STACK                                     \
+          "mr 11,%1\n\t"                                           \
+          "std 2,-16(11)\n\t"  /* save tocptr */                   \
+          "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
+@@ -2407,10 +2784,11 @@
+          VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+          "mr 11,%1\n\t"                                           \
+          "mr %0,3\n\t"                                            \
+-         "ld 2,-16(11)" /* restore tocptr */                      \
++         "ld 2,-16(11)\n\t" /* restore tocptr */                  \
++         VALGRIND_RESTORE_STACK                                   \
+          : /*out*/   "=r" (_res)                                  \
+          : /*in*/    "r" (&_argvec[2])                            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28"   \
+       );                                                          \
+       lval = (__typeof__(lval)) _res;                             \
+    } while (0)
+@@ -2429,6 +2807,7 @@
+       _argvec[2+4] = (unsigned long)arg4;                         \
+       _argvec[2+5] = (unsigned long)arg5;                         \
+       __asm__ volatile(                                           \
++         VALGRIND_ALIGN_STACK                                     \
+          "mr 11,%1\n\t"                                           \
+          "std 2,-16(11)\n\t"  /* save tocptr */                   \
+          "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
+@@ -2441,10 +2820,11 @@
+          VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+          "mr 11,%1\n\t"                                           \
+          "mr %0,3\n\t"                                            \
+-         "ld 2,-16(11)" /* restore tocptr */                      \
++         "ld 2,-16(11)\n\t" /* restore tocptr */                  \
++         VALGRIND_RESTORE_STACK                                   \
+          : /*out*/   "=r" (_res)                                  \
+          : /*in*/    "r" (&_argvec[2])                            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28"   \
+       );                                                          \
+       lval = (__typeof__(lval)) _res;                             \
+    } while (0)
+@@ -2464,6 +2844,7 @@
+       _argvec[2+5] = (unsigned long)arg5;                         \
+       _argvec[2+6] = (unsigned long)arg6;                         \
+       __asm__ volatile(                                           \
++         VALGRIND_ALIGN_STACK                                     \
+          "mr 11,%1\n\t"                                           \
+          "std 2,-16(11)\n\t"  /* save tocptr */                   \
+          "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
+@@ -2477,10 +2858,11 @@
+          VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+          "mr 11,%1\n\t"                                           \
+          "mr %0,3\n\t"                                            \
+-         "ld 2,-16(11)" /* restore tocptr */                      \
++         "ld 2,-16(11)\n\t" /* restore tocptr */                  \
++         VALGRIND_RESTORE_STACK                                   \
+          : /*out*/   "=r" (_res)                                  \
+          : /*in*/    "r" (&_argvec[2])                            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28"   \
+       );                                                          \
+       lval = (__typeof__(lval)) _res;                             \
+    } while (0)
+@@ -2502,6 +2884,7 @@
+       _argvec[2+6] = (unsigned long)arg6;                         \
+       _argvec[2+7] = (unsigned long)arg7;                         \
+       __asm__ volatile(                                           \
++         VALGRIND_ALIGN_STACK                                     \
+          "mr 11,%1\n\t"                                           \
+          "std 2,-16(11)\n\t"  /* save tocptr */                   \
+          "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
+@@ -2516,10 +2899,11 @@
+          VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+          "mr 11,%1\n\t"                                           \
+          "mr %0,3\n\t"                                            \
+-         "ld 2,-16(11)" /* restore tocptr */                      \
++         "ld 2,-16(11)\n\t" /* restore tocptr */                  \
++         VALGRIND_RESTORE_STACK                                   \
+          : /*out*/   "=r" (_res)                                  \
+          : /*in*/    "r" (&_argvec[2])                            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28"   \
+       );                                                          \
+       lval = (__typeof__(lval)) _res;                             \
+    } while (0)
+@@ -2542,6 +2926,7 @@
+       _argvec[2+7] = (unsigned long)arg7;                         \
+       _argvec[2+8] = (unsigned long)arg8;                         \
+       __asm__ volatile(                                           \
++         VALGRIND_ALIGN_STACK                                     \
+          "mr 11,%1\n\t"                                           \
+          "std 2,-16(11)\n\t"  /* save tocptr */                   \
+          "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
+@@ -2557,10 +2942,11 @@
+          VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+          "mr 11,%1\n\t"                                           \
+          "mr %0,3\n\t"                                            \
+-         "ld 2,-16(11)" /* restore tocptr */                      \
++         "ld 2,-16(11)\n\t" /* restore tocptr */                  \
++         VALGRIND_RESTORE_STACK                                   \
+          : /*out*/   "=r" (_res)                                  \
+          : /*in*/    "r" (&_argvec[2])                            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28"   \
+       );                                                          \
+       lval = (__typeof__(lval)) _res;                             \
+    } while (0)
+@@ -2584,6 +2970,7 @@
+       _argvec[2+8] = (unsigned long)arg8;                         \
+       _argvec[2+9] = (unsigned long)arg9;                         \
+       __asm__ volatile(                                           \
++         VALGRIND_ALIGN_STACK                                     \
+          "mr 11,%1\n\t"                                           \
+          "std 2,-16(11)\n\t"  /* save tocptr */                   \
+          "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
+@@ -2605,10 +2992,10 @@
+          "mr 11,%1\n\t"                                           \
+          "mr %0,3\n\t"                                            \
+          "ld 2,-16(11)\n\t" /* restore tocptr */                  \
+-         "addi 1,1,128"     /* restore frame */                   \
++         VALGRIND_RESTORE_STACK                                   \
+          : /*out*/   "=r" (_res)                                  \
+          : /*in*/    "r" (&_argvec[2])                            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28"   \
+       );                                                          \
+       lval = (__typeof__(lval)) _res;                             \
+    } while (0)
+@@ -2633,6 +3020,7 @@
+       _argvec[2+9] = (unsigned long)arg9;                         \
+       _argvec[2+10] = (unsigned long)arg10;                       \
+       __asm__ volatile(                                           \
++         VALGRIND_ALIGN_STACK                                     \
+          "mr 11,%1\n\t"                                           \
+          "std 2,-16(11)\n\t"  /* save tocptr */                   \
+          "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
+@@ -2657,10 +3045,10 @@
+          "mr 11,%1\n\t"                                           \
+          "mr %0,3\n\t"                                            \
+          "ld 2,-16(11)\n\t" /* restore tocptr */                  \
+-         "addi 1,1,128"     /* restore frame */                   \
++         VALGRIND_RESTORE_STACK                                   \
+          : /*out*/   "=r" (_res)                                  \
+          : /*in*/    "r" (&_argvec[2])                            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28"   \
+       );                                                          \
+       lval = (__typeof__(lval)) _res;                             \
+    } while (0)
+@@ -2686,6 +3074,7 @@
+       _argvec[2+10] = (unsigned long)arg10;                       \
+       _argvec[2+11] = (unsigned long)arg11;                       \
+       __asm__ volatile(                                           \
++         VALGRIND_ALIGN_STACK                                     \
+          "mr 11,%1\n\t"                                           \
+          "std 2,-16(11)\n\t"  /* save tocptr */                   \
+          "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
+@@ -2713,10 +3102,10 @@
+          "mr 11,%1\n\t"                                           \
+          "mr %0,3\n\t"                                            \
+          "ld 2,-16(11)\n\t" /* restore tocptr */                  \
+-         "addi 1,1,144"     /* restore frame */                   \
++         VALGRIND_RESTORE_STACK                                   \
+          : /*out*/   "=r" (_res)                                  \
+          : /*in*/    "r" (&_argvec[2])                            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28"   \
+       );                                                          \
+       lval = (__typeof__(lval)) _res;                             \
+    } while (0)
+@@ -2743,6 +3132,7 @@
+       _argvec[2+11] = (unsigned long)arg11;                       \
+       _argvec[2+12] = (unsigned long)arg12;                       \
+       __asm__ volatile(                                           \
++         VALGRIND_ALIGN_STACK                                     \
+          "mr 11,%1\n\t"                                           \
+          "std 2,-16(11)\n\t"  /* save tocptr */                   \
+          "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
+@@ -2773,39 +3163,64 @@
+          "mr 11,%1\n\t"                                           \
+          "mr %0,3\n\t"                                            \
+          "ld 2,-16(11)\n\t" /* restore tocptr */                  \
+-         "addi 1,1,144"     /* restore frame */                   \
++         VALGRIND_RESTORE_STACK                                   \
+          : /*out*/   "=r" (_res)                                  \
+          : /*in*/    "r" (&_argvec[2])                            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28"   \
+       );                                                          \
+       lval = (__typeof__(lval)) _res;                             \
+    } while (0)
+ 
+-#endif /* PLAT_ppc64_linux */
++#endif /* PLAT_ppc64be_linux */
+ 
+-/* ------------------------- arm-linux ------------------------- */
++/* ------------------------- ppc64le-linux ----------------------- */
++#if defined(PLAT_ppc64le_linux)
+ 
+-#if defined(PLAT_arm_linux)
++/* ARGREGS: r3 r4 r5 r6 r7 r8 r9 r10 (the rest on stack somewhere) */
+ 
+ /* These regs are trashed by the hidden call. */
+-#define __CALLER_SAVED_REGS "r0", "r1", "r2", "r3","r4","r14"
++#define __CALLER_SAVED_REGS                                       \
++   "lr", "ctr", "xer",                                            \
++   "cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7",        \
++   "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10",   \
++   "r11", "r12", "r13"
+ 
+-/* These CALL_FN_ macros assume that on arm-linux, sizeof(unsigned
+-   long) == 4. */
++/* Macros to save and align the stack before making a function
++   call and restore it afterwards as gcc may not keep the stack
++   pointer aligned if it doesn't realise calls are being made
++   to other functions. */
++
++#define VALGRIND_ALIGN_STACK               \
++      "mr 28,1\n\t"                        \
++      "rldicr 1,1,0,59\n\t"
++#define VALGRIND_RESTORE_STACK             \
++      "mr 1,28\n\t"
++
++/* These CALL_FN_ macros assume that on ppc64-linux, sizeof(unsigned
++   long) == 8. */
+ 
+ #define CALL_FN_W_v(lval, orig)                                   \
+    do {                                                           \
+       volatile OrigFn        _orig = (orig);                      \
+-      volatile unsigned long _argvec[1];                          \
++      volatile unsigned long _argvec[3+0];                        \
+       volatile unsigned long _res;                                \
+-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
++      /* _argvec[0] holds current r2 across the call */           \
++      _argvec[1] = (unsigned long)_orig.r2;                       \
++      _argvec[2] = (unsigned long)_orig.nraddr;                   \
+       __asm__ volatile(                                           \
+-         "ldr r4, [%1] \n\t"  /* target->r4 */                    \
+-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4                   \
+-         "mov %0, r0\n"                                           \
++         VALGRIND_ALIGN_STACK                                     \
++         "mr 12,%1\n\t"                                           \
++         "std 2,-16(12)\n\t"  /* save tocptr */                   \
++         "ld   2,-8(12)\n\t"  /* use nraddr's tocptr */           \
++         "ld  12, 0(12)\n\t"  /* target->r12 */                   \
++         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R12                  \
++         "mr 12,%1\n\t"                                           \
++         "mr %0,3\n\t"                                            \
++         "ld 2,-16(12)\n\t" /* restore tocptr */                  \
++         VALGRIND_RESTORE_STACK                                   \
+          : /*out*/   "=r" (_res)                                  \
+-         : /*in*/    "0" (&_argvec[0])                            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
++         : /*in*/    "r" (&_argvec[2])                            \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28"   \
+       );                                                          \
+       lval = (__typeof__(lval)) _res;                             \
+    } while (0)
+@@ -2813,18 +3228,27 @@
+ #define CALL_FN_W_W(lval, orig, arg1)                             \
+    do {                                                           \
+       volatile OrigFn        _orig = (orig);                      \
+-      volatile unsigned long _argvec[2];                          \
++      volatile unsigned long _argvec[3+1];                        \
+       volatile unsigned long _res;                                \
+-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+-      _argvec[1] = (unsigned long)(arg1);                         \
++      /* _argvec[0] holds current r2 across the call */           \
++      _argvec[1]   = (unsigned long)_orig.r2;                     \
++      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
++      _argvec[2+1] = (unsigned long)arg1;                         \
+       __asm__ volatile(                                           \
+-         "ldr r0, [%1, #4] \n\t"                                  \
+-         "ldr r4, [%1] \n\t"  /* target->r4 */                    \
+-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4                   \
+-         "mov %0, r0\n"                                           \
++         VALGRIND_ALIGN_STACK                                     \
++         "mr 12,%1\n\t"                                           \
++         "std 2,-16(12)\n\t"  /* save tocptr */                   \
++         "ld   2,-8(12)\n\t"  /* use nraddr's tocptr */           \
++         "ld   3, 8(12)\n\t"  /* arg1->r3 */                      \
++         "ld  12, 0(12)\n\t"  /* target->r12 */                   \
++         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R12                  \
++         "mr 12,%1\n\t"                                           \
++         "mr %0,3\n\t"                                            \
++         "ld 2,-16(12)\n\t" /* restore tocptr */                  \
++         VALGRIND_RESTORE_STACK                                   \
+          : /*out*/   "=r" (_res)                                  \
+-         : /*in*/    "0" (&_argvec[0])                            \
+-         : /*trash*/ "cc", "memory",  __CALLER_SAVED_REGS         \
++         : /*in*/    "r" (&_argvec[2])                            \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28"   \
+       );                                                          \
+       lval = (__typeof__(lval)) _res;                             \
+    } while (0)
+@@ -2832,20 +3256,29 @@
+ #define CALL_FN_W_WW(lval, orig, arg1,arg2)                       \
+    do {                                                           \
+       volatile OrigFn        _orig = (orig);                      \
+-      volatile unsigned long _argvec[3];                          \
++      volatile unsigned long _argvec[3+2];                        \
+       volatile unsigned long _res;                                \
+-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+-      _argvec[1] = (unsigned long)(arg1);                         \
+-      _argvec[2] = (unsigned long)(arg2);                         \
++      /* _argvec[0] holds current r2 across the call */           \
++      _argvec[1]   = (unsigned long)_orig.r2;                     \
++      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
++      _argvec[2+1] = (unsigned long)arg1;                         \
++      _argvec[2+2] = (unsigned long)arg2;                         \
+       __asm__ volatile(                                           \
+-         "ldr r0, [%1, #4] \n\t"                                  \
+-         "ldr r1, [%1, #8] \n\t"                                  \
+-         "ldr r4, [%1] \n\t"  /* target->r4 */                    \
+-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4                   \
+-         "mov %0, r0\n"                                           \
++         VALGRIND_ALIGN_STACK                                     \
++         "mr 12,%1\n\t"                                           \
++         "std 2,-16(12)\n\t"  /* save tocptr */                   \
++         "ld   2,-8(12)\n\t"  /* use nraddr's tocptr */           \
++         "ld   3, 8(12)\n\t"  /* arg1->r3 */                      \
++         "ld   4, 16(12)\n\t" /* arg2->r4 */                      \
++         "ld  12, 0(12)\n\t"  /* target->r12 */                   \
++         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R12                  \
++         "mr 12,%1\n\t"                                           \
++         "mr %0,3\n\t"                                            \
++         "ld 2,-16(12)\n\t" /* restore tocptr */                  \
++         VALGRIND_RESTORE_STACK                                   \
+          : /*out*/   "=r" (_res)                                  \
+-         : /*in*/    "0" (&_argvec[0])                            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
++         : /*in*/    "r" (&_argvec[2])                            \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28"   \
+       );                                                          \
+       lval = (__typeof__(lval)) _res;                             \
+    } while (0)
+@@ -2853,22 +3286,31 @@
+ #define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3)                 \
+    do {                                                           \
+       volatile OrigFn        _orig = (orig);                      \
+-      volatile unsigned long _argvec[4];                          \
++      volatile unsigned long _argvec[3+3];                        \
+       volatile unsigned long _res;                                \
+-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+-      _argvec[1] = (unsigned long)(arg1);                         \
+-      _argvec[2] = (unsigned long)(arg2);                         \
+-      _argvec[3] = (unsigned long)(arg3);                         \
++      /* _argvec[0] holds current r2 across the call */           \
++      _argvec[1]   = (unsigned long)_orig.r2;                     \
++      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
++      _argvec[2+1] = (unsigned long)arg1;                         \
++      _argvec[2+2] = (unsigned long)arg2;                         \
++      _argvec[2+3] = (unsigned long)arg3;                         \
+       __asm__ volatile(                                           \
+-         "ldr r0, [%1, #4] \n\t"                                  \
+-         "ldr r1, [%1, #8] \n\t"                                  \
+-         "ldr r2, [%1, #12] \n\t"                                 \
+-         "ldr r4, [%1] \n\t"  /* target->r4 */                    \
+-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4                   \
+-         "mov %0, r0\n"                                           \
++         VALGRIND_ALIGN_STACK                                     \
++         "mr 12,%1\n\t"                                           \
++         "std 2,-16(12)\n\t"  /* save tocptr */                   \
++         "ld   2,-8(12)\n\t"  /* use nraddr's tocptr */           \
++         "ld   3, 8(12)\n\t"  /* arg1->r3 */                      \
++         "ld   4, 16(12)\n\t" /* arg2->r4 */                      \
++         "ld   5, 24(12)\n\t" /* arg3->r5 */                      \
++         "ld  12, 0(12)\n\t"  /* target->r12 */                   \
++         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R12                  \
++         "mr 12,%1\n\t"                                           \
++         "mr %0,3\n\t"                                            \
++         "ld 2,-16(12)\n\t" /* restore tocptr */                  \
++         VALGRIND_RESTORE_STACK                                   \
+          : /*out*/   "=r" (_res)                                  \
+-         : /*in*/    "0" (&_argvec[0])                            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
++         : /*in*/    "r" (&_argvec[2])                            \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28"   \
+       );                                                          \
+       lval = (__typeof__(lval)) _res;                             \
+    } while (0)
+@@ -2876,24 +3318,33 @@
+ #define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4)           \
+    do {                                                           \
+       volatile OrigFn        _orig = (orig);                      \
+-      volatile unsigned long _argvec[5];                          \
++      volatile unsigned long _argvec[3+4];                        \
+       volatile unsigned long _res;                                \
+-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+-      _argvec[1] = (unsigned long)(arg1);                         \
+-      _argvec[2] = (unsigned long)(arg2);                         \
+-      _argvec[3] = (unsigned long)(arg3);                         \
+-      _argvec[4] = (unsigned long)(arg4);                         \
++      /* _argvec[0] holds current r2 across the call */           \
++      _argvec[1]   = (unsigned long)_orig.r2;                     \
++      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
++      _argvec[2+1] = (unsigned long)arg1;                         \
++      _argvec[2+2] = (unsigned long)arg2;                         \
++      _argvec[2+3] = (unsigned long)arg3;                         \
++      _argvec[2+4] = (unsigned long)arg4;                         \
+       __asm__ volatile(                                           \
+-         "ldr r0, [%1, #4] \n\t"                                  \
+-         "ldr r1, [%1, #8] \n\t"                                  \
+-         "ldr r2, [%1, #12] \n\t"                                 \
+-         "ldr r3, [%1, #16] \n\t"                                 \
+-         "ldr r4, [%1] \n\t"  /* target->r4 */                    \
+-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4                   \
+-         "mov %0, r0"                                             \
++         VALGRIND_ALIGN_STACK                                     \
++         "mr 12,%1\n\t"                                           \
++         "std 2,-16(12)\n\t"  /* save tocptr */                   \
++         "ld   2,-8(12)\n\t"  /* use nraddr's tocptr */           \
++         "ld   3, 8(12)\n\t"  /* arg1->r3 */                      \
++         "ld   4, 16(12)\n\t" /* arg2->r4 */                      \
++         "ld   5, 24(12)\n\t" /* arg3->r5 */                      \
++         "ld   6, 32(12)\n\t" /* arg4->r6 */                      \
++         "ld  12, 0(12)\n\t"  /* target->r12 */                   \
++         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R12                  \
++         "mr 12,%1\n\t"                                           \
++         "mr %0,3\n\t"                                            \
++         "ld 2,-16(12)\n\t" /* restore tocptr */                  \
++         VALGRIND_RESTORE_STACK                                   \
+          : /*out*/   "=r" (_res)                                  \
+-         : /*in*/    "0" (&_argvec[0])                            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
++         : /*in*/    "r" (&_argvec[2])                            \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28"   \
+       );                                                          \
+       lval = (__typeof__(lval)) _res;                             \
+    } while (0)
+@@ -2901,28 +3352,35 @@
+ #define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5)        \
+    do {                                                           \
+       volatile OrigFn        _orig = (orig);                      \
+-      volatile unsigned long _argvec[6];                          \
++      volatile unsigned long _argvec[3+5];                        \
+       volatile unsigned long _res;                                \
+-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+-      _argvec[1] = (unsigned long)(arg1);                         \
+-      _argvec[2] = (unsigned long)(arg2);                         \
+-      _argvec[3] = (unsigned long)(arg3);                         \
+-      _argvec[4] = (unsigned long)(arg4);                         \
+-      _argvec[5] = (unsigned long)(arg5);                         \
++      /* _argvec[0] holds current r2 across the call */           \
++      _argvec[1]   = (unsigned long)_orig.r2;                     \
++      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
++      _argvec[2+1] = (unsigned long)arg1;                         \
++      _argvec[2+2] = (unsigned long)arg2;                         \
++      _argvec[2+3] = (unsigned long)arg3;                         \
++      _argvec[2+4] = (unsigned long)arg4;                         \
++      _argvec[2+5] = (unsigned long)arg5;                         \
+       __asm__ volatile(                                           \
+-         "ldr r0, [%1, #20] \n\t"                                 \
+-         "push {r0} \n\t"                                         \
+-         "ldr r0, [%1, #4] \n\t"                                  \
+-         "ldr r1, [%1, #8] \n\t"                                  \
+-         "ldr r2, [%1, #12] \n\t"                                 \
+-         "ldr r3, [%1, #16] \n\t"                                 \
+-         "ldr r4, [%1] \n\t"  /* target->r4 */                    \
+-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4                   \
+-         "add sp, sp, #4 \n\t"                                    \
+-         "mov %0, r0"                                             \
++         VALGRIND_ALIGN_STACK                                     \
++         "mr 12,%1\n\t"                                           \
++         "std 2,-16(12)\n\t"  /* save tocptr */                   \
++         "ld   2,-8(12)\n\t"  /* use nraddr's tocptr */           \
++         "ld   3, 8(12)\n\t"  /* arg1->r3 */                      \
++         "ld   4, 16(12)\n\t" /* arg2->r4 */                      \
++         "ld   5, 24(12)\n\t" /* arg3->r5 */                      \
++         "ld   6, 32(12)\n\t" /* arg4->r6 */                      \
++         "ld   7, 40(12)\n\t" /* arg5->r7 */                      \
++         "ld  12, 0(12)\n\t"  /* target->r12 */                   \
++         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R12                  \
++         "mr 12,%1\n\t"                                           \
++         "mr %0,3\n\t"                                            \
++         "ld 2,-16(12)\n\t" /* restore tocptr */                  \
++         VALGRIND_RESTORE_STACK                                   \
+          : /*out*/   "=r" (_res)                                  \
+-         : /*in*/    "0" (&_argvec[0])                            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
++         : /*in*/    "r" (&_argvec[2])                            \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28"   \
+       );                                                          \
+       lval = (__typeof__(lval)) _res;                             \
+    } while (0)
+@@ -2930,30 +3388,37 @@
+ #define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6)   \
+    do {                                                           \
+       volatile OrigFn        _orig = (orig);                      \
+-      volatile unsigned long _argvec[7];                          \
++      volatile unsigned long _argvec[3+6];                        \
+       volatile unsigned long _res;                                \
+-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+-      _argvec[1] = (unsigned long)(arg1);                         \
+-      _argvec[2] = (unsigned long)(arg2);                         \
+-      _argvec[3] = (unsigned long)(arg3);                         \
+-      _argvec[4] = (unsigned long)(arg4);                         \
+-      _argvec[5] = (unsigned long)(arg5);                         \
+-      _argvec[6] = (unsigned long)(arg6);                         \
++      /* _argvec[0] holds current r2 across the call */           \
++      _argvec[1]   = (unsigned long)_orig.r2;                     \
++      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
++      _argvec[2+1] = (unsigned long)arg1;                         \
++      _argvec[2+2] = (unsigned long)arg2;                         \
++      _argvec[2+3] = (unsigned long)arg3;                         \
++      _argvec[2+4] = (unsigned long)arg4;                         \
++      _argvec[2+5] = (unsigned long)arg5;                         \
++      _argvec[2+6] = (unsigned long)arg6;                         \
+       __asm__ volatile(                                           \
+-         "ldr r0, [%1, #20] \n\t"                                 \
+-         "ldr r1, [%1, #24] \n\t"                                 \
+-         "push {r0, r1} \n\t"                                     \
+-         "ldr r0, [%1, #4] \n\t"                                  \
+-         "ldr r1, [%1, #8] \n\t"                                  \
+-         "ldr r2, [%1, #12] \n\t"                                 \
+-         "ldr r3, [%1, #16] \n\t"                                 \
+-         "ldr r4, [%1] \n\t"  /* target->r4 */                    \
+-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4                   \
+-         "add sp, sp, #8 \n\t"                                    \
+-         "mov %0, r0"                                             \
++         VALGRIND_ALIGN_STACK                                     \
++         "mr 12,%1\n\t"                                           \
++         "std 2,-16(12)\n\t"  /* save tocptr */                   \
++         "ld   2,-8(12)\n\t"  /* use nraddr's tocptr */           \
++         "ld   3, 8(12)\n\t"  /* arg1->r3 */                      \
++         "ld   4, 16(12)\n\t" /* arg2->r4 */                      \
++         "ld   5, 24(12)\n\t" /* arg3->r5 */                      \
++         "ld   6, 32(12)\n\t" /* arg4->r6 */                      \
++         "ld   7, 40(12)\n\t" /* arg5->r7 */                      \
++         "ld   8, 48(12)\n\t" /* arg6->r8 */                      \
++         "ld  12, 0(12)\n\t"  /* target->r12 */                   \
++         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R12                  \
++         "mr 12,%1\n\t"                                           \
++         "mr %0,3\n\t"                                            \
++         "ld 2,-16(12)\n\t" /* restore tocptr */                  \
++         VALGRIND_RESTORE_STACK                                   \
+          : /*out*/   "=r" (_res)                                  \
+-         : /*in*/    "0" (&_argvec[0])                            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
++         : /*in*/    "r" (&_argvec[2])                            \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28"   \
+       );                                                          \
+       lval = (__typeof__(lval)) _res;                             \
+    } while (0)
+@@ -2962,32 +3427,39 @@
+                                  arg7)                            \
+    do {                                                           \
+       volatile OrigFn        _orig = (orig);                      \
+-      volatile unsigned long _argvec[8];                          \
++      volatile unsigned long _argvec[3+7];                        \
+       volatile unsigned long _res;                                \
+-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+-      _argvec[1] = (unsigned long)(arg1);                         \
+-      _argvec[2] = (unsigned long)(arg2);                         \
+-      _argvec[3] = (unsigned long)(arg3);                         \
+-      _argvec[4] = (unsigned long)(arg4);                         \
+-      _argvec[5] = (unsigned long)(arg5);                         \
+-      _argvec[6] = (unsigned long)(arg6);                         \
+-      _argvec[7] = (unsigned long)(arg7);                         \
++      /* _argvec[0] holds current r2 across the call */           \
++      _argvec[1]   = (unsigned long)_orig.r2;                     \
++      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
++      _argvec[2+1] = (unsigned long)arg1;                         \
++      _argvec[2+2] = (unsigned long)arg2;                         \
++      _argvec[2+3] = (unsigned long)arg3;                         \
++      _argvec[2+4] = (unsigned long)arg4;                         \
++      _argvec[2+5] = (unsigned long)arg5;                         \
++      _argvec[2+6] = (unsigned long)arg6;                         \
++      _argvec[2+7] = (unsigned long)arg7;                         \
+       __asm__ volatile(                                           \
+-         "ldr r0, [%1, #20] \n\t"                                 \
+-         "ldr r1, [%1, #24] \n\t"                                 \
+-         "ldr r2, [%1, #28] \n\t"                                 \
+-         "push {r0, r1, r2} \n\t"                                 \
+-         "ldr r0, [%1, #4] \n\t"                                  \
+-         "ldr r1, [%1, #8] \n\t"                                  \
+-         "ldr r2, [%1, #12] \n\t"                                 \
+-         "ldr r3, [%1, #16] \n\t"                                 \
+-         "ldr r4, [%1] \n\t"  /* target->r4 */                    \
+-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4                   \
+-         "add sp, sp, #12 \n\t"                                   \
+-         "mov %0, r0"                                             \
++         VALGRIND_ALIGN_STACK                                     \
++         "mr 12,%1\n\t"                                           \
++         "std 2,-16(12)\n\t"  /* save tocptr */                   \
++         "ld   2,-8(12)\n\t"  /* use nraddr's tocptr */           \
++         "ld   3, 8(12)\n\t"  /* arg1->r3 */                      \
++         "ld   4, 16(12)\n\t" /* arg2->r4 */                      \
++         "ld   5, 24(12)\n\t" /* arg3->r5 */                      \
++         "ld   6, 32(12)\n\t" /* arg4->r6 */                      \
++         "ld   7, 40(12)\n\t" /* arg5->r7 */                      \
++         "ld   8, 48(12)\n\t" /* arg6->r8 */                      \
++         "ld   9, 56(12)\n\t" /* arg7->r9 */                      \
++         "ld  12, 0(12)\n\t"  /* target->r12 */                   \
++         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R12                  \
++         "mr 12,%1\n\t"                                           \
++         "mr %0,3\n\t"                                            \
++         "ld 2,-16(12)\n\t" /* restore tocptr */                  \
++         VALGRIND_RESTORE_STACK                                   \
+          : /*out*/   "=r" (_res)                                  \
+-         : /*in*/    "0" (&_argvec[0])                            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
++         : /*in*/    "r" (&_argvec[2])                            \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28"   \
+       );                                                          \
+       lval = (__typeof__(lval)) _res;                             \
+    } while (0)
+@@ -2996,34 +3468,41 @@
+                                  arg7,arg8)                       \
+    do {                                                           \
+       volatile OrigFn        _orig = (orig);                      \
+-      volatile unsigned long _argvec[9];                          \
++      volatile unsigned long _argvec[3+8];                        \
+       volatile unsigned long _res;                                \
+-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+-      _argvec[1] = (unsigned long)(arg1);                         \
+-      _argvec[2] = (unsigned long)(arg2);                         \
+-      _argvec[3] = (unsigned long)(arg3);                         \
+-      _argvec[4] = (unsigned long)(arg4);                         \
+-      _argvec[5] = (unsigned long)(arg5);                         \
+-      _argvec[6] = (unsigned long)(arg6);                         \
+-      _argvec[7] = (unsigned long)(arg7);                         \
+-      _argvec[8] = (unsigned long)(arg8);                         \
++      /* _argvec[0] holds current r2 across the call */           \
++      _argvec[1]   = (unsigned long)_orig.r2;                     \
++      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
++      _argvec[2+1] = (unsigned long)arg1;                         \
++      _argvec[2+2] = (unsigned long)arg2;                         \
++      _argvec[2+3] = (unsigned long)arg3;                         \
++      _argvec[2+4] = (unsigned long)arg4;                         \
++      _argvec[2+5] = (unsigned long)arg5;                         \
++      _argvec[2+6] = (unsigned long)arg6;                         \
++      _argvec[2+7] = (unsigned long)arg7;                         \
++      _argvec[2+8] = (unsigned long)arg8;                         \
+       __asm__ volatile(                                           \
+-         "ldr r0, [%1, #20] \n\t"                                 \
+-         "ldr r1, [%1, #24] \n\t"                                 \
+-         "ldr r2, [%1, #28] \n\t"                                 \
+-         "ldr r3, [%1, #32] \n\t"                                 \
+-         "push {r0, r1, r2, r3} \n\t"                             \
+-         "ldr r0, [%1, #4] \n\t"                                  \
+-         "ldr r1, [%1, #8] \n\t"                                  \
+-         "ldr r2, [%1, #12] \n\t"                                 \
+-         "ldr r3, [%1, #16] \n\t"                                 \
+-         "ldr r4, [%1] \n\t"  /* target->r4 */                    \
+-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4                   \
+-         "add sp, sp, #16 \n\t"                                   \
+-         "mov %0, r0"                                             \
++         VALGRIND_ALIGN_STACK                                     \
++         "mr 12,%1\n\t"                                           \
++         "std 2,-16(12)\n\t"  /* save tocptr */                   \
++         "ld   2,-8(12)\n\t"  /* use nraddr's tocptr */           \
++         "ld   3, 8(12)\n\t"  /* arg1->r3 */                      \
++         "ld   4, 16(12)\n\t" /* arg2->r4 */                      \
++         "ld   5, 24(12)\n\t" /* arg3->r5 */                      \
++         "ld   6, 32(12)\n\t" /* arg4->r6 */                      \
++         "ld   7, 40(12)\n\t" /* arg5->r7 */                      \
++         "ld   8, 48(12)\n\t" /* arg6->r8 */                      \
++         "ld   9, 56(12)\n\t" /* arg7->r9 */                      \
++         "ld  10, 64(12)\n\t" /* arg8->r10 */                     \
++         "ld  12, 0(12)\n\t"  /* target->r12 */                   \
++         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R12                  \
++         "mr 12,%1\n\t"                                           \
++         "mr %0,3\n\t"                                            \
++         "ld 2,-16(12)\n\t" /* restore tocptr */                  \
++         VALGRIND_RESTORE_STACK                                   \
+          : /*out*/   "=r" (_res)                                  \
+-         : /*in*/    "0" (&_argvec[0])                            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
++         : /*in*/    "r" (&_argvec[2])                            \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28"   \
+       );                                                          \
+       lval = (__typeof__(lval)) _res;                             \
+    } while (0)
+@@ -3032,36 +3511,47 @@
+                                  arg7,arg8,arg9)                  \
+    do {                                                           \
+       volatile OrigFn        _orig = (orig);                      \
+-      volatile unsigned long _argvec[10];                         \
++      volatile unsigned long _argvec[3+9];                        \
+       volatile unsigned long _res;                                \
+-      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+-      _argvec[1] = (unsigned long)(arg1);                         \
+-      _argvec[2] = (unsigned long)(arg2);                         \
+-      _argvec[3] = (unsigned long)(arg3);                         \
+-      _argvec[4] = (unsigned long)(arg4);                         \
+-      _argvec[5] = (unsigned long)(arg5);                         \
+-      _argvec[6] = (unsigned long)(arg6);                         \
+-      _argvec[7] = (unsigned long)(arg7);                         \
+-      _argvec[8] = (unsigned long)(arg8);                         \
+-      _argvec[9] = (unsigned long)(arg9);                         \
++      /* _argvec[0] holds current r2 across the call */           \
++      _argvec[1]   = (unsigned long)_orig.r2;                     \
++      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
++      _argvec[2+1] = (unsigned long)arg1;                         \
++      _argvec[2+2] = (unsigned long)arg2;                         \
++      _argvec[2+3] = (unsigned long)arg3;                         \
++      _argvec[2+4] = (unsigned long)arg4;                         \
++      _argvec[2+5] = (unsigned long)arg5;                         \
++      _argvec[2+6] = (unsigned long)arg6;                         \
++      _argvec[2+7] = (unsigned long)arg7;                         \
++      _argvec[2+8] = (unsigned long)arg8;                         \
++      _argvec[2+9] = (unsigned long)arg9;                         \
+       __asm__ volatile(                                           \
+-         "ldr r0, [%1, #20] \n\t"                                 \
+-         "ldr r1, [%1, #24] \n\t"                                 \
+-         "ldr r2, [%1, #28] \n\t"                                 \
+-         "ldr r3, [%1, #32] \n\t"                                 \
+-         "ldr r4, [%1, #36] \n\t"                                 \
+-         "push {r0, r1, r2, r3, r4} \n\t"                         \
+-         "ldr r0, [%1, #4] \n\t"                                  \
+-         "ldr r1, [%1, #8] \n\t"                                  \
+-         "ldr r2, [%1, #12] \n\t"                                 \
+-         "ldr r3, [%1, #16] \n\t"                                 \
+-         "ldr r4, [%1] \n\t"  /* target->r4 */                    \
+-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4                   \
+-         "add sp, sp, #20 \n\t"                                   \
+-         "mov %0, r0"                                             \
++         VALGRIND_ALIGN_STACK                                     \
++         "mr 12,%1\n\t"                                           \
++         "std 2,-16(12)\n\t"  /* save tocptr */                   \
++         "ld   2,-8(12)\n\t"  /* use nraddr's tocptr */           \
++         "addi 1,1,-128\n\t"  /* expand stack frame */            \
++         /* arg9 */                                               \
++         "ld  3,72(12)\n\t"                                       \
++         "std 3,96(1)\n\t"                                        \
++         /* args1-8 */                                            \
++         "ld   3, 8(12)\n\t"  /* arg1->r3 */                      \
++         "ld   4, 16(12)\n\t" /* arg2->r4 */                      \
++         "ld   5, 24(12)\n\t" /* arg3->r5 */                      \
++         "ld   6, 32(12)\n\t" /* arg4->r6 */                      \
++         "ld   7, 40(12)\n\t" /* arg5->r7 */                      \
++         "ld   8, 48(12)\n\t" /* arg6->r8 */                      \
++         "ld   9, 56(12)\n\t" /* arg7->r9 */                      \
++         "ld  10, 64(12)\n\t" /* arg8->r10 */                     \
++         "ld  12, 0(12)\n\t"  /* target->r12 */                   \
++         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R12                  \
++         "mr 12,%1\n\t"                                           \
++         "mr %0,3\n\t"                                            \
++         "ld 2,-16(12)\n\t" /* restore tocptr */                  \
++         VALGRIND_RESTORE_STACK                                   \
+          : /*out*/   "=r" (_res)                                  \
+-         : /*in*/    "0" (&_argvec[0])                            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
++         : /*in*/    "r" (&_argvec[2])                            \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28"   \
+       );                                                          \
+       lval = (__typeof__(lval)) _res;                             \
+    } while (0)
+@@ -3070,49 +3560,325 @@
+                                   arg7,arg8,arg9,arg10)           \
+    do {                                                           \
+       volatile OrigFn        _orig = (orig);                      \
+-      volatile unsigned long _argvec[11];                         \
++      volatile unsigned long _argvec[3+10];                       \
++      volatile unsigned long _res;                                \
++      /* _argvec[0] holds current r2 across the call */           \
++      _argvec[1]   = (unsigned long)_orig.r2;                     \
++      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
++      _argvec[2+1] = (unsigned long)arg1;                         \
++      _argvec[2+2] = (unsigned long)arg2;                         \
++      _argvec[2+3] = (unsigned long)arg3;                         \
++      _argvec[2+4] = (unsigned long)arg4;                         \
++      _argvec[2+5] = (unsigned long)arg5;                         \
++      _argvec[2+6] = (unsigned long)arg6;                         \
++      _argvec[2+7] = (unsigned long)arg7;                         \
++      _argvec[2+8] = (unsigned long)arg8;                         \
++      _argvec[2+9] = (unsigned long)arg9;                         \
++      _argvec[2+10] = (unsigned long)arg10;                       \
++      __asm__ volatile(                                           \
++         VALGRIND_ALIGN_STACK                                     \
++         "mr 12,%1\n\t"                                           \
++         "std 2,-16(12)\n\t"  /* save tocptr */                   \
++         "ld   2,-8(12)\n\t"  /* use nraddr's tocptr */           \
++         "addi 1,1,-128\n\t"  /* expand stack frame */            \
++         /* arg10 */                                              \
++         "ld  3,80(12)\n\t"                                       \
++         "std 3,104(1)\n\t"                                       \
++         /* arg9 */                                               \
++         "ld  3,72(12)\n\t"                                       \
++         "std 3,96(1)\n\t"                                        \
++         /* args1-8 */                                            \
++         "ld   3, 8(12)\n\t"  /* arg1->r3 */                      \
++         "ld   4, 16(12)\n\t" /* arg2->r4 */                      \
++         "ld   5, 24(12)\n\t" /* arg3->r5 */                      \
++         "ld   6, 32(12)\n\t" /* arg4->r6 */                      \
++         "ld   7, 40(12)\n\t" /* arg5->r7 */                      \
++         "ld   8, 48(12)\n\t" /* arg6->r8 */                      \
++         "ld   9, 56(12)\n\t" /* arg7->r9 */                      \
++         "ld  10, 64(12)\n\t" /* arg8->r10 */                     \
++         "ld  12, 0(12)\n\t"  /* target->r12 */                   \
++         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R12                  \
++         "mr 12,%1\n\t"                                           \
++         "mr %0,3\n\t"                                            \
++         "ld 2,-16(12)\n\t" /* restore tocptr */                  \
++         VALGRIND_RESTORE_STACK                                   \
++         : /*out*/   "=r" (_res)                                  \
++         : /*in*/    "r" (&_argvec[2])                            \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28"   \
++      );                                                          \
++      lval = (__typeof__(lval)) _res;                             \
++   } while (0)
++
++#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
++                                  arg7,arg8,arg9,arg10,arg11)     \
++   do {                                                           \
++      volatile OrigFn        _orig = (orig);                      \
++      volatile unsigned long _argvec[3+11];                       \
++      volatile unsigned long _res;                                \
++      /* _argvec[0] holds current r2 across the call */           \
++      _argvec[1]   = (unsigned long)_orig.r2;                     \
++      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
++      _argvec[2+1] = (unsigned long)arg1;                         \
++      _argvec[2+2] = (unsigned long)arg2;                         \
++      _argvec[2+3] = (unsigned long)arg3;                         \
++      _argvec[2+4] = (unsigned long)arg4;                         \
++      _argvec[2+5] = (unsigned long)arg5;                         \
++      _argvec[2+6] = (unsigned long)arg6;                         \
++      _argvec[2+7] = (unsigned long)arg7;                         \
++      _argvec[2+8] = (unsigned long)arg8;                         \
++      _argvec[2+9] = (unsigned long)arg9;                         \
++      _argvec[2+10] = (unsigned long)arg10;                       \
++      _argvec[2+11] = (unsigned long)arg11;                       \
++      __asm__ volatile(                                           \
++         VALGRIND_ALIGN_STACK                                     \
++         "mr 12,%1\n\t"                                           \
++         "std 2,-16(12)\n\t"  /* save tocptr */                   \
++         "ld   2,-8(12)\n\t"  /* use nraddr's tocptr */           \
++         "addi 1,1,-144\n\t"  /* expand stack frame */            \
++         /* arg11 */                                              \
++         "ld  3,88(12)\n\t"                                       \
++         "std 3,112(1)\n\t"                                       \
++         /* arg10 */                                              \
++         "ld  3,80(12)\n\t"                                       \
++         "std 3,104(1)\n\t"                                       \
++         /* arg9 */                                               \
++         "ld  3,72(12)\n\t"                                       \
++         "std 3,96(1)\n\t"                                        \
++         /* args1-8 */                                            \
++         "ld   3, 8(12)\n\t"  /* arg1->r3 */                      \
++         "ld   4, 16(12)\n\t" /* arg2->r4 */                      \
++         "ld   5, 24(12)\n\t" /* arg3->r5 */                      \
++         "ld   6, 32(12)\n\t" /* arg4->r6 */                      \
++         "ld   7, 40(12)\n\t" /* arg5->r7 */                      \
++         "ld   8, 48(12)\n\t" /* arg6->r8 */                      \
++         "ld   9, 56(12)\n\t" /* arg7->r9 */                      \
++         "ld  10, 64(12)\n\t" /* arg8->r10 */                     \
++         "ld  12, 0(12)\n\t"  /* target->r12 */                   \
++         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R12                  \
++         "mr 12,%1\n\t"                                           \
++         "mr %0,3\n\t"                                            \
++         "ld 2,-16(12)\n\t" /* restore tocptr */                  \
++         VALGRIND_RESTORE_STACK                                   \
++         : /*out*/   "=r" (_res)                                  \
++         : /*in*/    "r" (&_argvec[2])                            \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28"   \
++      );                                                          \
++      lval = (__typeof__(lval)) _res;                             \
++   } while (0)
++
++#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
++                                arg7,arg8,arg9,arg10,arg11,arg12) \
++   do {                                                           \
++      volatile OrigFn        _orig = (orig);                      \
++      volatile unsigned long _argvec[3+12];                       \
++      volatile unsigned long _res;                                \
++      /* _argvec[0] holds current r2 across the call */           \
++      _argvec[1]   = (unsigned long)_orig.r2;                     \
++      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
++      _argvec[2+1] = (unsigned long)arg1;                         \
++      _argvec[2+2] = (unsigned long)arg2;                         \
++      _argvec[2+3] = (unsigned long)arg3;                         \
++      _argvec[2+4] = (unsigned long)arg4;                         \
++      _argvec[2+5] = (unsigned long)arg5;                         \
++      _argvec[2+6] = (unsigned long)arg6;                         \
++      _argvec[2+7] = (unsigned long)arg7;                         \
++      _argvec[2+8] = (unsigned long)arg8;                         \
++      _argvec[2+9] = (unsigned long)arg9;                         \
++      _argvec[2+10] = (unsigned long)arg10;                       \
++      _argvec[2+11] = (unsigned long)arg11;                       \
++      _argvec[2+12] = (unsigned long)arg12;                       \
++      __asm__ volatile(                                           \
++         VALGRIND_ALIGN_STACK                                     \
++         "mr 12,%1\n\t"                                           \
++         "std 2,-16(12)\n\t"  /* save tocptr */                   \
++         "ld   2,-8(12)\n\t"  /* use nraddr's tocptr */           \
++         "addi 1,1,-144\n\t"  /* expand stack frame */            \
++         /* arg12 */                                              \
++         "ld  3,96(12)\n\t"                                       \
++         "std 3,120(1)\n\t"                                       \
++         /* arg11 */                                              \
++         "ld  3,88(12)\n\t"                                       \
++         "std 3,112(1)\n\t"                                       \
++         /* arg10 */                                              \
++         "ld  3,80(12)\n\t"                                       \
++         "std 3,104(1)\n\t"                                       \
++         /* arg9 */                                               \
++         "ld  3,72(12)\n\t"                                       \
++         "std 3,96(1)\n\t"                                        \
++         /* args1-8 */                                            \
++         "ld   3, 8(12)\n\t"  /* arg1->r3 */                      \
++         "ld   4, 16(12)\n\t" /* arg2->r4 */                      \
++         "ld   5, 24(12)\n\t" /* arg3->r5 */                      \
++         "ld   6, 32(12)\n\t" /* arg4->r6 */                      \
++         "ld   7, 40(12)\n\t" /* arg5->r7 */                      \
++         "ld   8, 48(12)\n\t" /* arg6->r8 */                      \
++         "ld   9, 56(12)\n\t" /* arg7->r9 */                      \
++         "ld  10, 64(12)\n\t" /* arg8->r10 */                     \
++         "ld  12, 0(12)\n\t"  /* target->r12 */                   \
++         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R12                  \
++         "mr 12,%1\n\t"                                           \
++         "mr %0,3\n\t"                                            \
++         "ld 2,-16(12)\n\t" /* restore tocptr */                  \
++         VALGRIND_RESTORE_STACK                                   \
++         : /*out*/   "=r" (_res)                                  \
++         : /*in*/    "r" (&_argvec[2])                            \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28"   \
++      );                                                          \
++      lval = (__typeof__(lval)) _res;                             \
++   } while (0)
++
++#endif /* PLAT_ppc64le_linux */
++
++/* ------------------------- arm-linux ------------------------- */
++
++#if defined(PLAT_arm_linux)
++
++/* These regs are trashed by the hidden call. */
++#define __CALLER_SAVED_REGS "r0", "r1", "r2", "r3","r4","r14"
++
++/* Macros to save and align the stack before making a function
++   call and restore it afterwards as gcc may not keep the stack
++   pointer aligned if it doesn't realise calls are being made
++   to other functions. */
++
++/* This is a bit tricky.  We store the original stack pointer in r10
++   as it is callee-saves.  gcc doesn't allow the use of r11 for some
++   reason.  Also, we can't directly "bic" the stack pointer in thumb
++   mode since r13 isn't an allowed register number in that context.
++   So use r4 as a temporary, since that is about to get trashed
++   anyway, just after each use of this macro.  Side effect is we need
++   to be very careful about any future changes, since
++   VALGRIND_ALIGN_STACK simply assumes r4 is usable. */
++#define VALGRIND_ALIGN_STACK               \
++      "mov r10, sp\n\t"                    \
++      "mov r4,  sp\n\t"                    \
++      "bic r4,  r4, #7\n\t"                \
++      "mov sp,  r4\n\t"
++#define VALGRIND_RESTORE_STACK             \
++      "mov sp,  r10\n\t"
++
++/* These CALL_FN_ macros assume that on arm-linux, sizeof(unsigned
++   long) == 4. */
++
++#define CALL_FN_W_v(lval, orig)                                   \
++   do {                                                           \
++      volatile OrigFn        _orig = (orig);                      \
++      volatile unsigned long _argvec[1];                          \
++      volatile unsigned long _res;                                \
++      _argvec[0] = (unsigned long)_orig.nraddr;                   \
++      __asm__ volatile(                                           \
++         VALGRIND_ALIGN_STACK                                     \
++         "ldr r4, [%1] \n\t"  /* target->r4 */                    \
++         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4                   \
++         VALGRIND_RESTORE_STACK                                   \
++         "mov %0, r0\n"                                           \
++         : /*out*/   "=r" (_res)                                  \
++         : /*in*/    "0" (&_argvec[0])                            \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r10"   \
++      );                                                          \
++      lval = (__typeof__(lval)) _res;                             \
++   } while (0)
++
++#define CALL_FN_W_W(lval, orig, arg1)                             \
++   do {                                                           \
++      volatile OrigFn        _orig = (orig);                      \
++      volatile unsigned long _argvec[2];                          \
++      volatile unsigned long _res;                                \
++      _argvec[0] = (unsigned long)_orig.nraddr;                   \
++      _argvec[1] = (unsigned long)(arg1);                         \
++      __asm__ volatile(                                           \
++         VALGRIND_ALIGN_STACK                                     \
++         "ldr r0, [%1, #4] \n\t"                                  \
++         "ldr r4, [%1] \n\t"  /* target->r4 */                    \
++         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4                   \
++         VALGRIND_RESTORE_STACK                                   \
++         "mov %0, r0\n"                                           \
++         : /*out*/   "=r" (_res)                                  \
++         : /*in*/    "0" (&_argvec[0])                            \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r10"   \
++      );                                                          \
++      lval = (__typeof__(lval)) _res;                             \
++   } while (0)
++
++#define CALL_FN_W_WW(lval, orig, arg1,arg2)                       \
++   do {                                                           \
++      volatile OrigFn        _orig = (orig);                      \
++      volatile unsigned long _argvec[3];                          \
++      volatile unsigned long _res;                                \
++      _argvec[0] = (unsigned long)_orig.nraddr;                   \
++      _argvec[1] = (unsigned long)(arg1);                         \
++      _argvec[2] = (unsigned long)(arg2);                         \
++      __asm__ volatile(                                           \
++         VALGRIND_ALIGN_STACK                                     \
++         "ldr r0, [%1, #4] \n\t"                                  \
++         "ldr r1, [%1, #8] \n\t"                                  \
++         "ldr r4, [%1] \n\t"  /* target->r4 */                    \
++         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4                   \
++         VALGRIND_RESTORE_STACK                                   \
++         "mov %0, r0\n"                                           \
++         : /*out*/   "=r" (_res)                                  \
++         : /*in*/    "0" (&_argvec[0])                            \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r10"   \
++      );                                                          \
++      lval = (__typeof__(lval)) _res;                             \
++   } while (0)
++
++#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3)                 \
++   do {                                                           \
++      volatile OrigFn        _orig = (orig);                      \
++      volatile unsigned long _argvec[4];                          \
++      volatile unsigned long _res;                                \
++      _argvec[0] = (unsigned long)_orig.nraddr;                   \
++      _argvec[1] = (unsigned long)(arg1);                         \
++      _argvec[2] = (unsigned long)(arg2);                         \
++      _argvec[3] = (unsigned long)(arg3);                         \
++      __asm__ volatile(                                           \
++         VALGRIND_ALIGN_STACK                                     \
++         "ldr r0, [%1, #4] \n\t"                                  \
++         "ldr r1, [%1, #8] \n\t"                                  \
++         "ldr r2, [%1, #12] \n\t"                                 \
++         "ldr r4, [%1] \n\t"  /* target->r4 */                    \
++         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4                   \
++         VALGRIND_RESTORE_STACK                                   \
++         "mov %0, r0\n"                                           \
++         : /*out*/   "=r" (_res)                                  \
++         : /*in*/    "0" (&_argvec[0])                            \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r10"   \
++      );                                                          \
++      lval = (__typeof__(lval)) _res;                             \
++   } while (0)
++
++#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4)           \
++   do {                                                           \
++      volatile OrigFn        _orig = (orig);                      \
++      volatile unsigned long _argvec[5];                          \
+       volatile unsigned long _res;                                \
+       _argvec[0] = (unsigned long)_orig.nraddr;                   \
+       _argvec[1] = (unsigned long)(arg1);                         \
+       _argvec[2] = (unsigned long)(arg2);                         \
+       _argvec[3] = (unsigned long)(arg3);                         \
+       _argvec[4] = (unsigned long)(arg4);                         \
+-      _argvec[5] = (unsigned long)(arg5);                         \
+-      _argvec[6] = (unsigned long)(arg6);                         \
+-      _argvec[7] = (unsigned long)(arg7);                         \
+-      _argvec[8] = (unsigned long)(arg8);                         \
+-      _argvec[9] = (unsigned long)(arg9);                         \
+-      _argvec[10] = (unsigned long)(arg10);                       \
+       __asm__ volatile(                                           \
+-         "ldr r0, [%1, #40] \n\t"                                 \
+-         "push {r0} \n\t"                                         \
+-         "ldr r0, [%1, #20] \n\t"                                 \
+-         "ldr r1, [%1, #24] \n\t"                                 \
+-         "ldr r2, [%1, #28] \n\t"                                 \
+-         "ldr r3, [%1, #32] \n\t"                                 \
+-         "ldr r4, [%1, #36] \n\t"                                 \
+-         "push {r0, r1, r2, r3, r4} \n\t"                         \
++         VALGRIND_ALIGN_STACK                                     \
+          "ldr r0, [%1, #4] \n\t"                                  \
+          "ldr r1, [%1, #8] \n\t"                                  \
+          "ldr r2, [%1, #12] \n\t"                                 \
+          "ldr r3, [%1, #16] \n\t"                                 \
+          "ldr r4, [%1] \n\t"  /* target->r4 */                    \
+          VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4                   \
+-         "add sp, sp, #24 \n\t"                                   \
++         VALGRIND_RESTORE_STACK                                   \
+          "mov %0, r0"                                             \
+          : /*out*/   "=r" (_res)                                  \
+          : /*in*/    "0" (&_argvec[0])                            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r10"   \
+       );                                                          \
+       lval = (__typeof__(lval)) _res;                             \
+    } while (0)
+ 
+-#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,       \
+-                                  arg6,arg7,arg8,arg9,arg10,      \
+-                                  arg11)                          \
++#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5)        \
+    do {                                                           \
+       volatile OrigFn        _orig = (orig);                      \
+-      volatile unsigned long _argvec[12];                         \
++      volatile unsigned long _argvec[6];                          \
+       volatile unsigned long _res;                                \
+       _argvec[0] = (unsigned long)_orig.nraddr;                   \
+       _argvec[1] = (unsigned long)(arg1);                         \
+@@ -3120,43 +3886,30 @@
+       _argvec[3] = (unsigned long)(arg3);                         \
+       _argvec[4] = (unsigned long)(arg4);                         \
+       _argvec[5] = (unsigned long)(arg5);                         \
+-      _argvec[6] = (unsigned long)(arg6);                         \
+-      _argvec[7] = (unsigned long)(arg7);                         \
+-      _argvec[8] = (unsigned long)(arg8);                         \
+-      _argvec[9] = (unsigned long)(arg9);                         \
+-      _argvec[10] = (unsigned long)(arg10);                       \
+-      _argvec[11] = (unsigned long)(arg11);                       \
+       __asm__ volatile(                                           \
+-         "ldr r0, [%1, #40] \n\t"                                 \
+-         "ldr r1, [%1, #44] \n\t"                                 \
+-         "push {r0, r1} \n\t"                                     \
++         VALGRIND_ALIGN_STACK                                     \
++         "sub sp, sp, #4 \n\t"                                    \
+          "ldr r0, [%1, #20] \n\t"                                 \
+-         "ldr r1, [%1, #24] \n\t"                                 \
+-         "ldr r2, [%1, #28] \n\t"                                 \
+-         "ldr r3, [%1, #32] \n\t"                                 \
+-         "ldr r4, [%1, #36] \n\t"                                 \
+-         "push {r0, r1, r2, r3, r4} \n\t"                         \
++         "push {r0} \n\t"                                         \
+          "ldr r0, [%1, #4] \n\t"                                  \
+          "ldr r1, [%1, #8] \n\t"                                  \
+          "ldr r2, [%1, #12] \n\t"                                 \
+          "ldr r3, [%1, #16] \n\t"                                 \
+          "ldr r4, [%1] \n\t"  /* target->r4 */                    \
+          VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4                   \
+-         "add sp, sp, #28 \n\t"                                   \
++         VALGRIND_RESTORE_STACK                                   \
+          "mov %0, r0"                                             \
+          : /*out*/   "=r" (_res)                                  \
+          : /*in*/    "0" (&_argvec[0])                            \
+-         : /*trash*/ "cc", "memory",__CALLER_SAVED_REGS           \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r10"   \
+       );                                                          \
+       lval = (__typeof__(lval)) _res;                             \
+    } while (0)
+ 
+-#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,       \
+-                                  arg6,arg7,arg8,arg9,arg10,      \
+-                                  arg11,arg12)                    \
++#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6)   \
+    do {                                                           \
+       volatile OrigFn        _orig = (orig);                      \
+-      volatile unsigned long _argvec[13];                         \
++      volatile unsigned long _argvec[7];                          \
+       volatile unsigned long _res;                                \
+       _argvec[0] = (unsigned long)_orig.nraddr;                   \
+       _argvec[1] = (unsigned long)(arg1);                         \
+@@ -3165,1167 +3918,732 @@
+       _argvec[4] = (unsigned long)(arg4);                         \
+       _argvec[5] = (unsigned long)(arg5);                         \
+       _argvec[6] = (unsigned long)(arg6);                         \
+-      _argvec[7] = (unsigned long)(arg7);                         \
+-      _argvec[8] = (unsigned long)(arg8);                         \
+-      _argvec[9] = (unsigned long)(arg9);                         \
+-      _argvec[10] = (unsigned long)(arg10);                       \
+-      _argvec[11] = (unsigned long)(arg11);                       \
+-      _argvec[12] = (unsigned long)(arg12);                       \
+       __asm__ volatile(                                           \
+-         "ldr r0, [%1, #40] \n\t"                                 \
+-         "ldr r1, [%1, #44] \n\t"                                 \
+-         "ldr r2, [%1, #48] \n\t"                                 \
+-         "push {r0, r1, r2} \n\t"                                 \
++         VALGRIND_ALIGN_STACK                                     \
+          "ldr r0, [%1, #20] \n\t"                                 \
+          "ldr r1, [%1, #24] \n\t"                                 \
+-         "ldr r2, [%1, #28] \n\t"                                 \
+-         "ldr r3, [%1, #32] \n\t"                                 \
+-         "ldr r4, [%1, #36] \n\t"                                 \
+-         "push {r0, r1, r2, r3, r4} \n\t"                         \
++         "push {r0, r1} \n\t"                                     \
+          "ldr r0, [%1, #4] \n\t"                                  \
+          "ldr r1, [%1, #8] \n\t"                                  \
+          "ldr r2, [%1, #12] \n\t"                                 \
+          "ldr r3, [%1, #16] \n\t"                                 \
+          "ldr r4, [%1] \n\t"  /* target->r4 */                    \
+          VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4                   \
+-         "add sp, sp, #32 \n\t"                                   \
++         VALGRIND_RESTORE_STACK                                   \
+          "mov %0, r0"                                             \
+          : /*out*/   "=r" (_res)                                  \
+          : /*in*/    "0" (&_argvec[0])                            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r10"   \
+       );                                                          \
+       lval = (__typeof__(lval)) _res;                             \
+    } while (0)
+ 
+-#endif /* PLAT_arm_linux */
+-
+-/* ------------------------ ppc32-aix5 ------------------------- */
+-
+-#if defined(PLAT_ppc32_aix5)
+-
+-/* ARGREGS: r3 r4 r5 r6 r7 r8 r9 r10 (the rest on stack somewhere) */
+-
+-/* These regs are trashed by the hidden call. */
+-#define __CALLER_SAVED_REGS                                       \
+-   "lr", "ctr", "xer",                                            \
+-   "cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7",        \
+-   "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10",   \
+-   "r11", "r12", "r13"
+-
+-/* Expand the stack frame, copying enough info that unwinding
+-   still works.  Trashes r3. */
+-
+-#define VG_EXPAND_FRAME_BY_trashes_r3(_n_fr)                      \
+-         "addi 1,1,-" #_n_fr "\n\t"                               \
+-         "lwz  3," #_n_fr "(1)\n\t"                               \
+-         "stw  3,0(1)\n\t"
+-
+-#define VG_CONTRACT_FRAME_BY(_n_fr)                               \
+-         "addi 1,1," #_n_fr "\n\t"
+-
+-/* These CALL_FN_ macros assume that on ppc32-aix5, sizeof(unsigned
+-   long) == 4. */
+-
+-#define CALL_FN_W_v(lval, orig)                                   \
++#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
++                                 arg7)                            \
+    do {                                                           \
+       volatile OrigFn        _orig = (orig);                      \
+-      volatile unsigned long _argvec[3+0];                        \
++      volatile unsigned long _argvec[8];                          \
+       volatile unsigned long _res;                                \
+-      /* _argvec[0] holds current r2 across the call */           \
+-      _argvec[1] = (unsigned long)_orig.r2;                       \
+-      _argvec[2] = (unsigned long)_orig.nraddr;                   \
++      _argvec[0] = (unsigned long)_orig.nraddr;                   \
++      _argvec[1] = (unsigned long)(arg1);                         \
++      _argvec[2] = (unsigned long)(arg2);                         \
++      _argvec[3] = (unsigned long)(arg3);                         \
++      _argvec[4] = (unsigned long)(arg4);                         \
++      _argvec[5] = (unsigned long)(arg5);                         \
++      _argvec[6] = (unsigned long)(arg6);                         \
++      _argvec[7] = (unsigned long)(arg7);                         \
+       __asm__ volatile(                                           \
+-         "mr 11,%1\n\t"                                           \
+-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
+-         "stw  2,-8(11)\n\t"  /* save tocptr */                   \
+-         "lwz  2,-4(11)\n\t"  /* use nraddr's tocptr */           \
+-         "lwz 11, 0(11)\n\t"  /* target->r11 */                   \
+-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+-         "mr 11,%1\n\t"                                           \
+-         "mr %0,3\n\t"                                            \
+-         "lwz 2,-8(11)\n\t" /* restore tocptr */                  \
+-         VG_CONTRACT_FRAME_BY(512)                                \
++         VALGRIND_ALIGN_STACK                                     \
++         "sub sp, sp, #4 \n\t"                                    \
++         "ldr r0, [%1, #20] \n\t"                                 \
++         "ldr r1, [%1, #24] \n\t"                                 \
++         "ldr r2, [%1, #28] \n\t"                                 \
++         "push {r0, r1, r2} \n\t"                                 \
++         "ldr r0, [%1, #4] \n\t"                                  \
++         "ldr r1, [%1, #8] \n\t"                                  \
++         "ldr r2, [%1, #12] \n\t"                                 \
++         "ldr r3, [%1, #16] \n\t"                                 \
++         "ldr r4, [%1] \n\t"  /* target->r4 */                    \
++         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4                   \
++         VALGRIND_RESTORE_STACK                                   \
++         "mov %0, r0"                                             \
+          : /*out*/   "=r" (_res)                                  \
+-         : /*in*/    "r" (&_argvec[2])                            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
++         : /*in*/    "0" (&_argvec[0])                            \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r10"   \
+       );                                                          \
+       lval = (__typeof__(lval)) _res;                             \
+    } while (0)
+ 
+-#define CALL_FN_W_W(lval, orig, arg1)                             \
++#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
++                                 arg7,arg8)                       \
+    do {                                                           \
+       volatile OrigFn        _orig = (orig);                      \
+-      volatile unsigned long _argvec[3+1];                        \
++      volatile unsigned long _argvec[9];                          \
+       volatile unsigned long _res;                                \
+-      /* _argvec[0] holds current r2 across the call */           \
+-      _argvec[1]   = (unsigned long)_orig.r2;                     \
+-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
+-      _argvec[2+1] = (unsigned long)arg1;                         \
++      _argvec[0] = (unsigned long)_orig.nraddr;                   \
++      _argvec[1] = (unsigned long)(arg1);                         \
++      _argvec[2] = (unsigned long)(arg2);                         \
++      _argvec[3] = (unsigned long)(arg3);                         \
++      _argvec[4] = (unsigned long)(arg4);                         \
++      _argvec[5] = (unsigned long)(arg5);                         \
++      _argvec[6] = (unsigned long)(arg6);                         \
++      _argvec[7] = (unsigned long)(arg7);                         \
++      _argvec[8] = (unsigned long)(arg8);                         \
+       __asm__ volatile(                                           \
+-         "mr 11,%1\n\t"                                           \
+-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
+-         "stw  2,-8(11)\n\t"  /* save tocptr */                   \
+-         "lwz  2,-4(11)\n\t"  /* use nraddr's tocptr */           \
+-         "lwz  3, 4(11)\n\t"  /* arg1->r3 */                      \
+-         "lwz 11, 0(11)\n\t"  /* target->r11 */                   \
+-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+-         "mr 11,%1\n\t"                                           \
+-         "mr %0,3\n\t"                                            \
+-         "lwz 2,-8(11)\n\t" /* restore tocptr */                  \
+-         VG_CONTRACT_FRAME_BY(512)                                \
++         VALGRIND_ALIGN_STACK                                     \
++         "ldr r0, [%1, #20] \n\t"                                 \
++         "ldr r1, [%1, #24] \n\t"                                 \
++         "ldr r2, [%1, #28] \n\t"                                 \
++         "ldr r3, [%1, #32] \n\t"                                 \
++         "push {r0, r1, r2, r3} \n\t"                             \
++         "ldr r0, [%1, #4] \n\t"                                  \
++         "ldr r1, [%1, #8] \n\t"                                  \
++         "ldr r2, [%1, #12] \n\t"                                 \
++         "ldr r3, [%1, #16] \n\t"                                 \
++         "ldr r4, [%1] \n\t"  /* target->r4 */                    \
++         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4                   \
++         VALGRIND_RESTORE_STACK                                   \
++         "mov %0, r0"                                             \
+          : /*out*/   "=r" (_res)                                  \
+-         : /*in*/    "r" (&_argvec[2])                            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
++         : /*in*/    "0" (&_argvec[0])                            \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r10"   \
+       );                                                          \
+       lval = (__typeof__(lval)) _res;                             \
+    } while (0)
+ 
+-#define CALL_FN_W_WW(lval, orig, arg1,arg2)                       \
++#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
++                                 arg7,arg8,arg9)                  \
+    do {                                                           \
+       volatile OrigFn        _orig = (orig);                      \
+-      volatile unsigned long _argvec[3+2];                        \
++      volatile unsigned long _argvec[10];                         \
+       volatile unsigned long _res;                                \
+-      /* _argvec[0] holds current r2 across the call */           \
+-      _argvec[1]   = (unsigned long)_orig.r2;                     \
+-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
+-      _argvec[2+1] = (unsigned long)arg1;                         \
+-      _argvec[2+2] = (unsigned long)arg2;                         \
++      _argvec[0] = (unsigned long)_orig.nraddr;                   \
++      _argvec[1] = (unsigned long)(arg1);                         \
++      _argvec[2] = (unsigned long)(arg2);                         \
++      _argvec[3] = (unsigned long)(arg3);                         \
++      _argvec[4] = (unsigned long)(arg4);                         \
++      _argvec[5] = (unsigned long)(arg5);                         \
++      _argvec[6] = (unsigned long)(arg6);                         \
++      _argvec[7] = (unsigned long)(arg7);                         \
++      _argvec[8] = (unsigned long)(arg8);                         \
++      _argvec[9] = (unsigned long)(arg9);                         \
+       __asm__ volatile(                                           \
+-         "mr 11,%1\n\t"                                           \
+-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
+-         "stw  2,-8(11)\n\t"  /* save tocptr */                   \
+-         "lwz  2,-4(11)\n\t"  /* use nraddr's tocptr */           \
+-         "lwz  3, 4(11)\n\t"  /* arg1->r3 */                      \
+-         "lwz  4, 8(11)\n\t"  /* arg2->r4 */                      \
+-         "lwz 11, 0(11)\n\t"  /* target->r11 */                   \
+-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+-         "mr 11,%1\n\t"                                           \
+-         "mr %0,3\n\t"                                            \
+-         "lwz 2,-8(11)\n\t" /* restore tocptr */                  \
+-         VG_CONTRACT_FRAME_BY(512)                                \
++         VALGRIND_ALIGN_STACK                                     \
++         "sub sp, sp, #4 \n\t"                                    \
++         "ldr r0, [%1, #20] \n\t"                                 \
++         "ldr r1, [%1, #24] \n\t"                                 \
++         "ldr r2, [%1, #28] \n\t"                                 \
++         "ldr r3, [%1, #32] \n\t"                                 \
++         "ldr r4, [%1, #36] \n\t"                                 \
++         "push {r0, r1, r2, r3, r4} \n\t"                         \
++         "ldr r0, [%1, #4] \n\t"                                  \
++         "ldr r1, [%1, #8] \n\t"                                  \
++         "ldr r2, [%1, #12] \n\t"                                 \
++         "ldr r3, [%1, #16] \n\t"                                 \
++         "ldr r4, [%1] \n\t"  /* target->r4 */                    \
++         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4                   \
++         VALGRIND_RESTORE_STACK                                   \
++         "mov %0, r0"                                             \
+          : /*out*/   "=r" (_res)                                  \
+-         : /*in*/    "r" (&_argvec[2])                            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
++         : /*in*/    "0" (&_argvec[0])                            \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r10"   \
+       );                                                          \
+       lval = (__typeof__(lval)) _res;                             \
+    } while (0)
+ 
+-#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3)                 \
++#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
++                                  arg7,arg8,arg9,arg10)           \
+    do {                                                           \
+       volatile OrigFn        _orig = (orig);                      \
+-      volatile unsigned long _argvec[3+3];                        \
++      volatile unsigned long _argvec[11];                         \
+       volatile unsigned long _res;                                \
+-      /* _argvec[0] holds current r2 across the call */           \
+-      _argvec[1]   = (unsigned long)_orig.r2;                     \
+-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
+-      _argvec[2+1] = (unsigned long)arg1;                         \
+-      _argvec[2+2] = (unsigned long)arg2;                         \
+-      _argvec[2+3] = (unsigned long)arg3;                         \
++      _argvec[0] = (unsigned long)_orig.nraddr;                   \
++      _argvec[1] = (unsigned long)(arg1);                         \
++      _argvec[2] = (unsigned long)(arg2);                         \
++      _argvec[3] = (unsigned long)(arg3);                         \
++      _argvec[4] = (unsigned long)(arg4);                         \
++      _argvec[5] = (unsigned long)(arg5);                         \
++      _argvec[6] = (unsigned long)(arg6);                         \
++      _argvec[7] = (unsigned long)(arg7);                         \
++      _argvec[8] = (unsigned long)(arg8);                         \
++      _argvec[9] = (unsigned long)(arg9);                         \
++      _argvec[10] = (unsigned long)(arg10);                       \
+       __asm__ volatile(                                           \
+-         "mr 11,%1\n\t"                                           \
+-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
+-         "stw  2,-8(11)\n\t"  /* save tocptr */                   \
+-         "lwz  2,-4(11)\n\t"  /* use nraddr's tocptr */           \
+-         "lwz  3, 4(11)\n\t"  /* arg1->r3 */                      \
+-         "lwz  4, 8(11)\n\t"  /* arg2->r4 */                      \
+-         "lwz  5, 12(11)\n\t" /* arg3->r5 */                      \
+-         "lwz 11, 0(11)\n\t"  /* target->r11 */                   \
+-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+-         "mr 11,%1\n\t"                                           \
+-         "mr %0,3\n\t"                                            \
+-         "lwz 2,-8(11)\n\t" /* restore tocptr */                  \
+-         VG_CONTRACT_FRAME_BY(512)                                \
++         VALGRIND_ALIGN_STACK                                     \
++         "ldr r0, [%1, #40] \n\t"                                 \
++         "push {r0} \n\t"                                         \
++         "ldr r0, [%1, #20] \n\t"                                 \
++         "ldr r1, [%1, #24] \n\t"                                 \
++         "ldr r2, [%1, #28] \n\t"                                 \
++         "ldr r3, [%1, #32] \n\t"                                 \
++         "ldr r4, [%1, #36] \n\t"                                 \
++         "push {r0, r1, r2, r3, r4} \n\t"                         \
++         "ldr r0, [%1, #4] \n\t"                                  \
++         "ldr r1, [%1, #8] \n\t"                                  \
++         "ldr r2, [%1, #12] \n\t"                                 \
++         "ldr r3, [%1, #16] \n\t"                                 \
++         "ldr r4, [%1] \n\t"  /* target->r4 */                    \
++         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4                   \
++         VALGRIND_RESTORE_STACK                                   \
++         "mov %0, r0"                                             \
+          : /*out*/   "=r" (_res)                                  \
+-         : /*in*/    "r" (&_argvec[2])                            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
++         : /*in*/    "0" (&_argvec[0])                            \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r10"   \
+       );                                                          \
+       lval = (__typeof__(lval)) _res;                             \
+    } while (0)
+ 
+-#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4)           \
++#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,       \
++                                  arg6,arg7,arg8,arg9,arg10,      \
++                                  arg11)                          \
+    do {                                                           \
+       volatile OrigFn        _orig = (orig);                      \
+-      volatile unsigned long _argvec[3+4];                        \
++      volatile unsigned long _argvec[12];                         \
+       volatile unsigned long _res;                                \
+-      /* _argvec[0] holds current r2 across the call */           \
+-      _argvec[1]   = (unsigned long)_orig.r2;                     \
+-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
+-      _argvec[2+1] = (unsigned long)arg1;                         \
+-      _argvec[2+2] = (unsigned long)arg2;                         \
+-      _argvec[2+3] = (unsigned long)arg3;                         \
+-      _argvec[2+4] = (unsigned long)arg4;                         \
++      _argvec[0] = (unsigned long)_orig.nraddr;                   \
++      _argvec[1] = (unsigned long)(arg1);                         \
++      _argvec[2] = (unsigned long)(arg2);                         \
++      _argvec[3] = (unsigned long)(arg3);                         \
++      _argvec[4] = (unsigned long)(arg4);                         \
++      _argvec[5] = (unsigned long)(arg5);                         \
++      _argvec[6] = (unsigned long)(arg6);                         \
++      _argvec[7] = (unsigned long)(arg7);                         \
++      _argvec[8] = (unsigned long)(arg8);                         \
++      _argvec[9] = (unsigned long)(arg9);                         \
++      _argvec[10] = (unsigned long)(arg10);                       \
++      _argvec[11] = (unsigned long)(arg11);                       \
+       __asm__ volatile(                                           \
+-         "mr 11,%1\n\t"                                           \
+-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
+-         "stw  2,-8(11)\n\t"  /* save tocptr */                   \
+-         "lwz  2,-4(11)\n\t"  /* use nraddr's tocptr */           \
+-         "lwz  3, 4(11)\n\t"  /* arg1->r3 */                      \
+-         "lwz  4, 8(11)\n\t"  /* arg2->r4 */                      \
+-         "lwz  5, 12(11)\n\t" /* arg3->r5 */                      \
+-         "lwz  6, 16(11)\n\t" /* arg4->r6 */                      \
+-         "lwz 11, 0(11)\n\t"  /* target->r11 */                   \
+-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+-         "mr 11,%1\n\t"                                           \
+-         "mr %0,3\n\t"                                            \
+-         "lwz 2,-8(11)\n\t" /* restore tocptr */                  \
+-         VG_CONTRACT_FRAME_BY(512)                                \
++         VALGRIND_ALIGN_STACK                                     \
++         "sub sp, sp, #4 \n\t"                                    \
++         "ldr r0, [%1, #40] \n\t"                                 \
++         "ldr r1, [%1, #44] \n\t"                                 \
++         "push {r0, r1} \n\t"                                     \
++         "ldr r0, [%1, #20] \n\t"                                 \
++         "ldr r1, [%1, #24] \n\t"                                 \
++         "ldr r2, [%1, #28] \n\t"                                 \
++         "ldr r3, [%1, #32] \n\t"                                 \
++         "ldr r4, [%1, #36] \n\t"                                 \
++         "push {r0, r1, r2, r3, r4} \n\t"                         \
++         "ldr r0, [%1, #4] \n\t"                                  \
++         "ldr r1, [%1, #8] \n\t"                                  \
++         "ldr r2, [%1, #12] \n\t"                                 \
++         "ldr r3, [%1, #16] \n\t"                                 \
++         "ldr r4, [%1] \n\t"  /* target->r4 */                    \
++         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4                   \
++         VALGRIND_RESTORE_STACK                                   \
++         "mov %0, r0"                                             \
+          : /*out*/   "=r" (_res)                                  \
+-         : /*in*/    "r" (&_argvec[2])                            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
++         : /*in*/    "0" (&_argvec[0])                            \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r10"   \
+       );                                                          \
+       lval = (__typeof__(lval)) _res;                             \
+    } while (0)
+ 
+-#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5)        \
++#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,       \
++                                  arg6,arg7,arg8,arg9,arg10,      \
++                                  arg11,arg12)                    \
+    do {                                                           \
+       volatile OrigFn        _orig = (orig);                      \
+-      volatile unsigned long _argvec[3+5];                        \
++      volatile unsigned long _argvec[13];                         \
+       volatile unsigned long _res;                                \
+-      /* _argvec[0] holds current r2 across the call */           \
+-      _argvec[1]   = (unsigned long)_orig.r2;                     \
+-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
+-      _argvec[2+1] = (unsigned long)arg1;                         \
+-      _argvec[2+2] = (unsigned long)arg2;                         \
+-      _argvec[2+3] = (unsigned long)arg3;                         \
+-      _argvec[2+4] = (unsigned long)arg4;                         \
+-      _argvec[2+5] = (unsigned long)arg5;                         \
++      _argvec[0] = (unsigned long)_orig.nraddr;                   \
++      _argvec[1] = (unsigned long)(arg1);                         \
++      _argvec[2] = (unsigned long)(arg2);                         \
++      _argvec[3] = (unsigned long)(arg3);                         \
++      _argvec[4] = (unsigned long)(arg4);                         \
++      _argvec[5] = (unsigned long)(arg5);                         \
++      _argvec[6] = (unsigned long)(arg6);                         \
++      _argvec[7] = (unsigned long)(arg7);                         \
++      _argvec[8] = (unsigned long)(arg8);                         \
++      _argvec[9] = (unsigned long)(arg9);                         \
++      _argvec[10] = (unsigned long)(arg10);                       \
++      _argvec[11] = (unsigned long)(arg11);                       \
++      _argvec[12] = (unsigned long)(arg12);                       \
+       __asm__ volatile(                                           \
+-         "mr 11,%1\n\t"                                           \
+-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
+-         "stw  2,-8(11)\n\t"  /* save tocptr */                   \
+-         "lwz  2,-4(11)\n\t"  /* use nraddr's tocptr */           \
+-         "lwz  3, 4(11)\n\t"  /* arg1->r3 */                      \
+-         "lwz  4, 8(11)\n\t" /* arg2->r4 */                       \
+-         "lwz  5, 12(11)\n\t" /* arg3->r5 */                      \
+-         "lwz  6, 16(11)\n\t" /* arg4->r6 */                      \
+-         "lwz  7, 20(11)\n\t" /* arg5->r7 */                      \
+-         "lwz 11, 0(11)\n\t"  /* target->r11 */                   \
+-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+-         "mr 11,%1\n\t"                                           \
+-         "mr %0,3\n\t"                                            \
+-         "lwz 2,-8(11)\n\t" /* restore tocptr */                  \
+-         VG_CONTRACT_FRAME_BY(512)                                \
++         VALGRIND_ALIGN_STACK                                     \
++         "ldr r0, [%1, #40] \n\t"                                 \
++         "ldr r1, [%1, #44] \n\t"                                 \
++         "ldr r2, [%1, #48] \n\t"                                 \
++         "push {r0, r1, r2} \n\t"                                 \
++         "ldr r0, [%1, #20] \n\t"                                 \
++         "ldr r1, [%1, #24] \n\t"                                 \
++         "ldr r2, [%1, #28] \n\t"                                 \
++         "ldr r3, [%1, #32] \n\t"                                 \
++         "ldr r4, [%1, #36] \n\t"                                 \
++         "push {r0, r1, r2, r3, r4} \n\t"                         \
++         "ldr r0, [%1, #4] \n\t"                                  \
++         "ldr r1, [%1, #8] \n\t"                                  \
++         "ldr r2, [%1, #12] \n\t"                                 \
++         "ldr r3, [%1, #16] \n\t"                                 \
++         "ldr r4, [%1] \n\t"  /* target->r4 */                    \
++         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4                   \
++         VALGRIND_RESTORE_STACK                                   \
++         "mov %0, r0"                                             \
+          : /*out*/   "=r" (_res)                                  \
+-         : /*in*/    "r" (&_argvec[2])                            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
++         : /*in*/    "0" (&_argvec[0])                            \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r10"   \
+       );                                                          \
+       lval = (__typeof__(lval)) _res;                             \
+    } while (0)
+ 
+-#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6)   \
++#endif /* PLAT_arm_linux */
++
++/* ------------------------ arm64-linux ------------------------ */
++
++#if defined(PLAT_arm64_linux)
++
++/* These regs are trashed by the hidden call. */
++#define __CALLER_SAVED_REGS \
++     "x0", "x1", "x2", "x3","x4", "x5", "x6", "x7", "x8", "x9",   \
++     "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17",      \
++     "x18", "x19", "x20", "x30",                                  \
++     "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",  \
++     "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",      \
++     "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",      \
++     "v26", "v27", "v28", "v29", "v30", "v31"
++
++/* x21 is callee-saved, so we can use it to save and restore SP around
++   the hidden call. */
++#define VALGRIND_ALIGN_STACK               \
++      "mov x21, sp\n\t"                    \
++      "bic sp, x21, #15\n\t"
++#define VALGRIND_RESTORE_STACK             \
++      "mov sp,  x21\n\t"
++
++/* These CALL_FN_ macros assume that on arm64-linux,
++   sizeof(unsigned long) == 8. */
++
++#define CALL_FN_W_v(lval, orig)                                   \
+    do {                                                           \
+       volatile OrigFn        _orig = (orig);                      \
+-      volatile unsigned long _argvec[3+6];                        \
++      volatile unsigned long _argvec[1];                          \
+       volatile unsigned long _res;                                \
+-      /* _argvec[0] holds current r2 across the call */           \
+-      _argvec[1]   = (unsigned long)_orig.r2;                     \
+-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
+-      _argvec[2+1] = (unsigned long)arg1;                         \
+-      _argvec[2+2] = (unsigned long)arg2;                         \
+-      _argvec[2+3] = (unsigned long)arg3;                         \
+-      _argvec[2+4] = (unsigned long)arg4;                         \
+-      _argvec[2+5] = (unsigned long)arg5;                         \
+-      _argvec[2+6] = (unsigned long)arg6;                         \
++      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+       __asm__ volatile(                                           \
+-         "mr 11,%1\n\t"                                           \
+-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
+-         "stw  2,-8(11)\n\t"  /* save tocptr */                   \
+-         "lwz  2,-4(11)\n\t"  /* use nraddr's tocptr */           \
+-         "lwz  3, 4(11)\n\t"  /* arg1->r3 */                      \
+-         "lwz  4, 8(11)\n\t"  /* arg2->r4 */                      \
+-         "lwz  5, 12(11)\n\t" /* arg3->r5 */                      \
+-         "lwz  6, 16(11)\n\t" /* arg4->r6 */                      \
+-         "lwz  7, 20(11)\n\t" /* arg5->r7 */                      \
+-         "lwz  8, 24(11)\n\t" /* arg6->r8 */                      \
+-         "lwz 11, 0(11)\n\t"  /* target->r11 */                   \
+-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+-         "mr 11,%1\n\t"                                           \
+-         "mr %0,3\n\t"                                            \
+-         "lwz 2,-8(11)\n\t" /* restore tocptr */                  \
+-         VG_CONTRACT_FRAME_BY(512)                                \
++         VALGRIND_ALIGN_STACK                                     \
++         "ldr x8, [%1] \n\t"  /* target->x8 */                    \
++         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_X8                   \
++         VALGRIND_RESTORE_STACK                                   \
++         "mov %0, x0\n"                                           \
+          : /*out*/   "=r" (_res)                                  \
+-         : /*in*/    "r" (&_argvec[2])                            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
++         : /*in*/    "0" (&_argvec[0])                            \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "x21"   \
+       );                                                          \
+       lval = (__typeof__(lval)) _res;                             \
+    } while (0)
+ 
+-#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
+-                                 arg7)                            \
++#define CALL_FN_W_W(lval, orig, arg1)                             \
+    do {                                                           \
+       volatile OrigFn        _orig = (orig);                      \
+-      volatile unsigned long _argvec[3+7];                        \
++      volatile unsigned long _argvec[2];                          \
+       volatile unsigned long _res;                                \
+-      /* _argvec[0] holds current r2 across the call */           \
+-      _argvec[1]   = (unsigned long)_orig.r2;                     \
+-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
+-      _argvec[2+1] = (unsigned long)arg1;                         \
+-      _argvec[2+2] = (unsigned long)arg2;                         \
+-      _argvec[2+3] = (unsigned long)arg3;                         \
+-      _argvec[2+4] = (unsigned long)arg4;                         \
+-      _argvec[2+5] = (unsigned long)arg5;                         \
+-      _argvec[2+6] = (unsigned long)arg6;                         \
+-      _argvec[2+7] = (unsigned long)arg7;                         \
++      _argvec[0] = (unsigned long)_orig.nraddr;                   \
++      _argvec[1] = (unsigned long)(arg1);                         \
+       __asm__ volatile(                                           \
+-         "mr 11,%1\n\t"                                           \
+-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
+-         "stw  2,-8(11)\n\t"  /* save tocptr */                   \
+-         "lwz  2,-4(11)\n\t"  /* use nraddr's tocptr */           \
+-         "lwz  3, 4(11)\n\t"  /* arg1->r3 */                      \
+-         "lwz  4, 8(11)\n\t"  /* arg2->r4 */                      \
+-         "lwz  5, 12(11)\n\t" /* arg3->r5 */                      \
+-         "lwz  6, 16(11)\n\t" /* arg4->r6 */                      \
+-         "lwz  7, 20(11)\n\t" /* arg5->r7 */                      \
+-         "lwz  8, 24(11)\n\t" /* arg6->r8 */                      \
+-         "lwz  9, 28(11)\n\t" /* arg7->r9 */                      \
+-         "lwz 11, 0(11)\n\t"  /* target->r11 */                   \
+-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+-         "mr 11,%1\n\t"                                           \
+-         "mr %0,3\n\t"                                            \
+-         "lwz 2,-8(11)\n\t" /* restore tocptr */                  \
+-         VG_CONTRACT_FRAME_BY(512)                                \
++         VALGRIND_ALIGN_STACK                                     \
++         "ldr x0, [%1, #8] \n\t"                                  \
++         "ldr x8, [%1] \n\t"  /* target->x8 */                    \
++         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_X8                   \
++         VALGRIND_RESTORE_STACK                                   \
++         "mov %0, x0\n"                                           \
+          : /*out*/   "=r" (_res)                                  \
+-         : /*in*/    "r" (&_argvec[2])                            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
++         : /*in*/    "0" (&_argvec[0])                            \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "x21"   \
+       );                                                          \
+       lval = (__typeof__(lval)) _res;                             \
+    } while (0)
+ 
+-#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
+-                                 arg7,arg8)                       \
++#define CALL_FN_W_WW(lval, orig, arg1,arg2)                       \
+    do {                                                           \
+       volatile OrigFn        _orig = (orig);                      \
+-      volatile unsigned long _argvec[3+8];                        \
++      volatile unsigned long _argvec[3];                          \
+       volatile unsigned long _res;                                \
+-      /* _argvec[0] holds current r2 across the call */           \
+-      _argvec[1]   = (unsigned long)_orig.r2;                     \
+-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
+-      _argvec[2+1] = (unsigned long)arg1;                         \
+-      _argvec[2+2] = (unsigned long)arg2;                         \
+-      _argvec[2+3] = (unsigned long)arg3;                         \
+-      _argvec[2+4] = (unsigned long)arg4;                         \
+-      _argvec[2+5] = (unsigned long)arg5;                         \
+-      _argvec[2+6] = (unsigned long)arg6;                         \
+-      _argvec[2+7] = (unsigned long)arg7;                         \
+-      _argvec[2+8] = (unsigned long)arg8;                         \
++      _argvec[0] = (unsigned long)_orig.nraddr;                   \
++      _argvec[1] = (unsigned long)(arg1);                         \
++      _argvec[2] = (unsigned long)(arg2);                         \
+       __asm__ volatile(                                           \
+-         "mr 11,%1\n\t"                                           \
+-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
+-         "stw  2,-8(11)\n\t"  /* save tocptr */                   \
+-         "lwz  2,-4(11)\n\t"  /* use nraddr's tocptr */           \
+-         "lwz  3, 4(11)\n\t"  /* arg1->r3 */                      \
+-         "lwz  4, 8(11)\n\t"  /* arg2->r4 */                      \
+-         "lwz  5, 12(11)\n\t" /* arg3->r5 */                      \
+-         "lwz  6, 16(11)\n\t" /* arg4->r6 */                      \
+-         "lwz  7, 20(11)\n\t" /* arg5->r7 */                      \
+-         "lwz  8, 24(11)\n\t" /* arg6->r8 */                      \
+-         "lwz  9, 28(11)\n\t" /* arg7->r9 */                      \
+-         "lwz 10, 32(11)\n\t" /* arg8->r10 */                     \
+-         "lwz 11, 0(11)\n\t"  /* target->r11 */                   \
+-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+-         "mr 11,%1\n\t"                                           \
+-         "mr %0,3\n\t"                                            \
+-         "lwz 2,-8(11)\n\t" /* restore tocptr */                  \
+-         VG_CONTRACT_FRAME_BY(512)                                \
++         VALGRIND_ALIGN_STACK                                     \
++         "ldr x0, [%1, #8] \n\t"                                  \
++         "ldr x1, [%1, #16] \n\t"                                 \
++         "ldr x8, [%1] \n\t"  /* target->x8 */                    \
++         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_X8                   \
++         VALGRIND_RESTORE_STACK                                   \
++         "mov %0, x0\n"                                           \
+          : /*out*/   "=r" (_res)                                  \
+-         : /*in*/    "r" (&_argvec[2])                            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
++         : /*in*/    "0" (&_argvec[0])                            \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "x21"   \
+       );                                                          \
+       lval = (__typeof__(lval)) _res;                             \
+    } while (0)
+ 
+-#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
+-                                 arg7,arg8,arg9)                  \
++#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3)                 \
+    do {                                                           \
+       volatile OrigFn        _orig = (orig);                      \
+-      volatile unsigned long _argvec[3+9];                        \
++      volatile unsigned long _argvec[4];                          \
+       volatile unsigned long _res;                                \
+-      /* _argvec[0] holds current r2 across the call */           \
+-      _argvec[1]   = (unsigned long)_orig.r2;                     \
+-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
+-      _argvec[2+1] = (unsigned long)arg1;                         \
+-      _argvec[2+2] = (unsigned long)arg2;                         \
+-      _argvec[2+3] = (unsigned long)arg3;                         \
+-      _argvec[2+4] = (unsigned long)arg4;                         \
+-      _argvec[2+5] = (unsigned long)arg5;                         \
+-      _argvec[2+6] = (unsigned long)arg6;                         \
+-      _argvec[2+7] = (unsigned long)arg7;                         \
+-      _argvec[2+8] = (unsigned long)arg8;                         \
+-      _argvec[2+9] = (unsigned long)arg9;                         \
++      _argvec[0] = (unsigned long)_orig.nraddr;                   \
++      _argvec[1] = (unsigned long)(arg1);                         \
++      _argvec[2] = (unsigned long)(arg2);                         \
++      _argvec[3] = (unsigned long)(arg3);                         \
+       __asm__ volatile(                                           \
+-         "mr 11,%1\n\t"                                           \
+-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
+-         "stw  2,-8(11)\n\t"  /* save tocptr */                   \
+-         "lwz  2,-4(11)\n\t"  /* use nraddr's tocptr */           \
+-         VG_EXPAND_FRAME_BY_trashes_r3(64)                        \
+-         /* arg9 */                                               \
+-         "lwz 3,36(11)\n\t"                                       \
+-         "stw 3,56(1)\n\t"                                        \
+-         /* args1-8 */                                            \
+-         "lwz  3, 4(11)\n\t"  /* arg1->r3 */                      \
+-         "lwz  4, 8(11)\n\t"  /* arg2->r4 */                      \
+-         "lwz  5, 12(11)\n\t" /* arg3->r5 */                      \
+-         "lwz  6, 16(11)\n\t" /* arg4->r6 */                      \
+-         "lwz  7, 20(11)\n\t" /* arg5->r7 */                      \
+-         "lwz  8, 24(11)\n\t" /* arg6->r8 */                      \
+-         "lwz  9, 28(11)\n\t" /* arg7->r9 */                      \
+-         "lwz 10, 32(11)\n\t" /* arg8->r10 */                     \
+-         "lwz 11, 0(11)\n\t"  /* target->r11 */                   \
+-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+-         "mr 11,%1\n\t"                                           \
+-         "mr %0,3\n\t"                                            \
+-         "lwz 2,-8(11)\n\t" /* restore tocptr */                  \
+-         VG_CONTRACT_FRAME_BY(64)                                 \
+-         VG_CONTRACT_FRAME_BY(512)                                \
++         VALGRIND_ALIGN_STACK                                     \
++         "ldr x0, [%1, #8] \n\t"                                  \
++         "ldr x1, [%1, #16] \n\t"                                 \
++         "ldr x2, [%1, #24] \n\t"                                 \
++         "ldr x8, [%1] \n\t"  /* target->x8 */                    \
++         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_X8                   \
++         VALGRIND_RESTORE_STACK                                   \
++         "mov %0, x0\n"                                           \
+          : /*out*/   "=r" (_res)                                  \
+-         : /*in*/    "r" (&_argvec[2])                            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
++         : /*in*/    "0" (&_argvec[0])                            \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "x21"   \
+       );                                                          \
+       lval = (__typeof__(lval)) _res;                             \
+    } while (0)
+ 
+-#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
+-                                  arg7,arg8,arg9,arg10)           \
++#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4)           \
+    do {                                                           \
+       volatile OrigFn        _orig = (orig);                      \
+-      volatile unsigned long _argvec[3+10];                       \
++      volatile unsigned long _argvec[5];                          \
+       volatile unsigned long _res;                                \
+-      /* _argvec[0] holds current r2 across the call */           \
+-      _argvec[1]   = (unsigned long)_orig.r2;                     \
+-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
+-      _argvec[2+1] = (unsigned long)arg1;                         \
+-      _argvec[2+2] = (unsigned long)arg2;                         \
+-      _argvec[2+3] = (unsigned long)arg3;                         \
+-      _argvec[2+4] = (unsigned long)arg4;                         \
+-      _argvec[2+5] = (unsigned long)arg5;                         \
+-      _argvec[2+6] = (unsigned long)arg6;                         \
+-      _argvec[2+7] = (unsigned long)arg7;                         \
+-      _argvec[2+8] = (unsigned long)arg8;                         \
+-      _argvec[2+9] = (unsigned long)arg9;                         \
+-      _argvec[2+10] = (unsigned long)arg10;                       \
++      _argvec[0] = (unsigned long)_orig.nraddr;                   \
++      _argvec[1] = (unsigned long)(arg1);                         \
++      _argvec[2] = (unsigned long)(arg2);                         \
++      _argvec[3] = (unsigned long)(arg3);                         \
++      _argvec[4] = (unsigned long)(arg4);                         \
+       __asm__ volatile(                                           \
+-         "mr 11,%1\n\t"                                           \
+-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
+-         "stw  2,-8(11)\n\t"  /* save tocptr */                   \
+-         "lwz  2,-4(11)\n\t"  /* use nraddr's tocptr */           \
+-         VG_EXPAND_FRAME_BY_trashes_r3(64)                        \
+-         /* arg10 */                                              \
+-         "lwz 3,40(11)\n\t"                                       \
+-         "stw 3,60(1)\n\t"                                        \
+-         /* arg9 */                                               \
+-         "lwz 3,36(11)\n\t"                                       \
+-         "stw 3,56(1)\n\t"                                        \
+-         /* args1-8 */                                            \
+-         "lwz  3, 4(11)\n\t"  /* arg1->r3 */                      \
+-         "lwz  4, 8(11)\n\t"  /* arg2->r4 */                      \
+-         "lwz  5, 12(11)\n\t" /* arg3->r5 */                      \
+-         "lwz  6, 16(11)\n\t" /* arg4->r6 */                      \
+-         "lwz  7, 20(11)\n\t" /* arg5->r7 */                      \
+-         "lwz  8, 24(11)\n\t" /* arg6->r8 */                      \
+-         "lwz  9, 28(11)\n\t" /* arg7->r9 */                      \
+-         "lwz 10, 32(11)\n\t" /* arg8->r10 */                     \
+-         "lwz 11, 0(11)\n\t"  /* target->r11 */                   \
+-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+-         "mr 11,%1\n\t"                                           \
+-         "mr %0,3\n\t"                                            \
+-         "lwz 2,-8(11)\n\t" /* restore tocptr */                  \
+-         VG_CONTRACT_FRAME_BY(64)                                 \
+-         VG_CONTRACT_FRAME_BY(512)                                \
++         VALGRIND_ALIGN_STACK                                     \
++         "ldr x0, [%1, #8] \n\t"                                  \
++         "ldr x1, [%1, #16] \n\t"                                 \
++         "ldr x2, [%1, #24] \n\t"                                 \
++         "ldr x3, [%1, #32] \n\t"                                 \
++         "ldr x8, [%1] \n\t"  /* target->x8 */                    \
++         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_X8                   \
++         VALGRIND_RESTORE_STACK                                   \
++         "mov %0, x0"                                             \
+          : /*out*/   "=r" (_res)                                  \
+-         : /*in*/    "r" (&_argvec[2])                            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
++         : /*in*/    "0" (&_argvec[0])                            \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "x21"   \
+       );                                                          \
+       lval = (__typeof__(lval)) _res;                             \
+    } while (0)
+ 
+-#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
+-                                  arg7,arg8,arg9,arg10,arg11)     \
++#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5)        \
+    do {                                                           \
+       volatile OrigFn        _orig = (orig);                      \
+-      volatile unsigned long _argvec[3+11];                       \
++      volatile unsigned long _argvec[6];                          \
+       volatile unsigned long _res;                                \
+-      /* _argvec[0] holds current r2 across the call */           \
+-      _argvec[1]   = (unsigned long)_orig.r2;                     \
+-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
+-      _argvec[2+1] = (unsigned long)arg1;                         \
+-      _argvec[2+2] = (unsigned long)arg2;                         \
+-      _argvec[2+3] = (unsigned long)arg3;                         \
+-      _argvec[2+4] = (unsigned long)arg4;                         \
+-      _argvec[2+5] = (unsigned long)arg5;                         \
+-      _argvec[2+6] = (unsigned long)arg6;                         \
+-      _argvec[2+7] = (unsigned long)arg7;                         \
+-      _argvec[2+8] = (unsigned long)arg8;                         \
+-      _argvec[2+9] = (unsigned long)arg9;                         \
+-      _argvec[2+10] = (unsigned long)arg10;                       \
+-      _argvec[2+11] = (unsigned long)arg11;                       \
++      _argvec[0] = (unsigned long)_orig.nraddr;                   \
++      _argvec[1] = (unsigned long)(arg1);                         \
++      _argvec[2] = (unsigned long)(arg2);                         \
++      _argvec[3] = (unsigned long)(arg3);                         \
++      _argvec[4] = (unsigned long)(arg4);                         \
++      _argvec[5] = (unsigned long)(arg5);                         \
+       __asm__ volatile(                                           \
+-         "mr 11,%1\n\t"                                           \
+-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
+-         "stw  2,-8(11)\n\t"  /* save tocptr */                   \
+-         "lwz  2,-4(11)\n\t"  /* use nraddr's tocptr */           \
+-         VG_EXPAND_FRAME_BY_trashes_r3(72)                        \
+-         /* arg11 */                                              \
+-         "lwz 3,44(11)\n\t"                                       \
+-         "stw 3,64(1)\n\t"                                        \
+-         /* arg10 */                                              \
+-         "lwz 3,40(11)\n\t"                                       \
+-         "stw 3,60(1)\n\t"                                        \
+-         /* arg9 */                                               \
+-         "lwz 3,36(11)\n\t"                                       \
+-         "stw 3,56(1)\n\t"                                        \
+-         /* args1-8 */                                            \
+-         "lwz  3, 4(11)\n\t"  /* arg1->r3 */                      \
+-         "lwz  4, 8(11)\n\t"  /* arg2->r4 */                      \
+-         "lwz  5, 12(11)\n\t" /* arg3->r5 */                      \
+-         "lwz  6, 16(11)\n\t" /* arg4->r6 */                      \
+-         "lwz  7, 20(11)\n\t" /* arg5->r7 */                      \
+-         "lwz  8, 24(11)\n\t" /* arg6->r8 */                      \
+-         "lwz  9, 28(11)\n\t" /* arg7->r9 */                      \
+-         "lwz 10, 32(11)\n\t" /* arg8->r10 */                     \
+-         "lwz 11, 0(11)\n\t"  /* target->r11 */                   \
+-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+-         "mr 11,%1\n\t"                                           \
+-         "mr %0,3\n\t"                                            \
+-         "lwz 2,-8(11)\n\t" /* restore tocptr */                  \
+-         VG_CONTRACT_FRAME_BY(72)                                 \
+-         VG_CONTRACT_FRAME_BY(512)                                \
++         VALGRIND_ALIGN_STACK                                     \
++         "ldr x0, [%1, #8] \n\t"                                  \
++         "ldr x1, [%1, #16] \n\t"                                 \
++         "ldr x2, [%1, #24] \n\t"                                 \
++         "ldr x3, [%1, #32] \n\t"                                 \
++         "ldr x4, [%1, #40] \n\t"                                 \
++         "ldr x8, [%1] \n\t"  /* target->x8 */                    \
++         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_X8                   \
++         VALGRIND_RESTORE_STACK                                   \
++         "mov %0, x0"                                             \
+          : /*out*/   "=r" (_res)                                  \
+-         : /*in*/    "r" (&_argvec[2])                            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
++         : /*in*/    "0" (&_argvec[0])                            \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "x21"   \
+       );                                                          \
+       lval = (__typeof__(lval)) _res;                             \
+    } while (0)
+ 
+-#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
+-                                arg7,arg8,arg9,arg10,arg11,arg12) \
++#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6)   \
+    do {                                                           \
+       volatile OrigFn        _orig = (orig);                      \
+-      volatile unsigned long _argvec[3+12];                       \
++      volatile unsigned long _argvec[7];                          \
+       volatile unsigned long _res;                                \
+-      /* _argvec[0] holds current r2 across the call */           \
+-      _argvec[1]   = (unsigned long)_orig.r2;                     \
+-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
+-      _argvec[2+1] = (unsigned long)arg1;                         \
+-      _argvec[2+2] = (unsigned long)arg2;                         \
+-      _argvec[2+3] = (unsigned long)arg3;                         \
+-      _argvec[2+4] = (unsigned long)arg4;                         \
+-      _argvec[2+5] = (unsigned long)arg5;                         \
+-      _argvec[2+6] = (unsigned long)arg6;                         \
+-      _argvec[2+7] = (unsigned long)arg7;                         \
+-      _argvec[2+8] = (unsigned long)arg8;                         \
+-      _argvec[2+9] = (unsigned long)arg9;                         \
+-      _argvec[2+10] = (unsigned long)arg10;                       \
+-      _argvec[2+11] = (unsigned long)arg11;                       \
+-      _argvec[2+12] = (unsigned long)arg12;                       \
++      _argvec[0] = (unsigned long)_orig.nraddr;                   \
++      _argvec[1] = (unsigned long)(arg1);                         \
++      _argvec[2] = (unsigned long)(arg2);                         \
++      _argvec[3] = (unsigned long)(arg3);                         \
++      _argvec[4] = (unsigned long)(arg4);                         \
++      _argvec[5] = (unsigned long)(arg5);                         \
++      _argvec[6] = (unsigned long)(arg6);                         \
+       __asm__ volatile(                                           \
+-         "mr 11,%1\n\t"                                           \
+-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
+-         "stw  2,-8(11)\n\t"  /* save tocptr */                   \
+-         "lwz  2,-4(11)\n\t"  /* use nraddr's tocptr */           \
+-         VG_EXPAND_FRAME_BY_trashes_r3(72)                        \
+-         /* arg12 */                                              \
+-         "lwz 3,48(11)\n\t"                                       \
+-         "stw 3,68(1)\n\t"                                        \
+-         /* arg11 */                                              \
+-         "lwz 3,44(11)\n\t"                                       \
+-         "stw 3,64(1)\n\t"                                        \
+-         /* arg10 */                                              \
+-         "lwz 3,40(11)\n\t"                                       \
+-         "stw 3,60(1)\n\t"                                        \
+-         /* arg9 */                                               \
+-         "lwz 3,36(11)\n\t"                                       \
+-         "stw 3,56(1)\n\t"                                        \
+-         /* args1-8 */                                            \
+-         "lwz  3, 4(11)\n\t"  /* arg1->r3 */                      \
+-         "lwz  4, 8(11)\n\t"  /* arg2->r4 */                      \
+-         "lwz  5, 12(11)\n\t" /* arg3->r5 */                      \
+-         "lwz  6, 16(11)\n\t" /* arg4->r6 */                      \
+-         "lwz  7, 20(11)\n\t" /* arg5->r7 */                      \
+-         "lwz  8, 24(11)\n\t" /* arg6->r8 */                      \
+-         "lwz  9, 28(11)\n\t" /* arg7->r9 */                      \
+-         "lwz 10, 32(11)\n\t" /* arg8->r10 */                     \
+-         "lwz 11, 0(11)\n\t"  /* target->r11 */                   \
+-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+-         "mr 11,%1\n\t"                                           \
+-         "mr %0,3\n\t"                                            \
+-         "lwz 2,-8(11)\n\t" /* restore tocptr */                  \
+-         VG_CONTRACT_FRAME_BY(72)                                 \
+-         VG_CONTRACT_FRAME_BY(512)                                \
++         VALGRIND_ALIGN_STACK                                     \
++         "ldr x0, [%1, #8] \n\t"                                  \
++         "ldr x1, [%1, #16] \n\t"                                 \
++         "ldr x2, [%1, #24] \n\t"                                 \
++         "ldr x3, [%1, #32] \n\t"                                 \
++         "ldr x4, [%1, #40] \n\t"                                 \
++         "ldr x5, [%1, #48] \n\t"                                 \
++         "ldr x8, [%1] \n\t"  /* target->x8 */                    \
++         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_X8                   \
++         VALGRIND_RESTORE_STACK                                   \
++         "mov %0, x0"                                             \
+          : /*out*/   "=r" (_res)                                  \
+-         : /*in*/    "r" (&_argvec[2])                            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
++         : /*in*/    "0" (&_argvec[0])                            \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "x21"   \
+       );                                                          \
+       lval = (__typeof__(lval)) _res;                             \
+    } while (0)
+ 
+-#endif /* PLAT_ppc32_aix5 */
+-
+-/* ------------------------ ppc64-aix5 ------------------------- */
+-
+-#if defined(PLAT_ppc64_aix5)
+-
+-/* ARGREGS: r3 r4 r5 r6 r7 r8 r9 r10 (the rest on stack somewhere) */
+-
+-/* These regs are trashed by the hidden call. */
+-#define __CALLER_SAVED_REGS                                       \
+-   "lr", "ctr", "xer",                                            \
+-   "cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7",        \
+-   "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10",   \
+-   "r11", "r12", "r13"
+-
+-/* Expand the stack frame, copying enough info that unwinding
+-   still works.  Trashes r3. */
+-
+-#define VG_EXPAND_FRAME_BY_trashes_r3(_n_fr)                      \
+-         "addi 1,1,-" #_n_fr "\n\t"                               \
+-         "ld   3," #_n_fr "(1)\n\t"                               \
+-         "std  3,0(1)\n\t"
+-
+-#define VG_CONTRACT_FRAME_BY(_n_fr)                               \
+-         "addi 1,1," #_n_fr "\n\t"
+-
+-/* These CALL_FN_ macros assume that on ppc64-aix5, sizeof(unsigned
+-   long) == 8. */
+-
+-#define CALL_FN_W_v(lval, orig)                                   \
++#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
++                                 arg7)                            \
+    do {                                                           \
+       volatile OrigFn        _orig = (orig);                      \
+-      volatile unsigned long _argvec[3+0];                        \
++      volatile unsigned long _argvec[8];                          \
+       volatile unsigned long _res;                                \
+-      /* _argvec[0] holds current r2 across the call */           \
+-      _argvec[1] = (unsigned long)_orig.r2;                       \
+-      _argvec[2] = (unsigned long)_orig.nraddr;                   \
++      _argvec[0] = (unsigned long)_orig.nraddr;                   \
++      _argvec[1] = (unsigned long)(arg1);                         \
++      _argvec[2] = (unsigned long)(arg2);                         \
++      _argvec[3] = (unsigned long)(arg3);                         \
++      _argvec[4] = (unsigned long)(arg4);                         \
++      _argvec[5] = (unsigned long)(arg5);                         \
++      _argvec[6] = (unsigned long)(arg6);                         \
++      _argvec[7] = (unsigned long)(arg7);                         \
+       __asm__ volatile(                                           \
+-         "mr 11,%1\n\t"                                           \
+-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
+-         "std  2,-16(11)\n\t" /* save tocptr */                   \
+-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
+-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
+-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+-         "mr 11,%1\n\t"                                           \
+-         "mr %0,3\n\t"                                            \
+-         "ld 2,-16(11)\n\t" /* restore tocptr */                  \
+-         VG_CONTRACT_FRAME_BY(512)                                \
++         VALGRIND_ALIGN_STACK                                     \
++         "ldr x0, [%1, #8] \n\t"                                  \
++         "ldr x1, [%1, #16] \n\t"                                 \
++         "ldr x2, [%1, #24] \n\t"                                 \
++         "ldr x3, [%1, #32] \n\t"                                 \
++         "ldr x4, [%1, #40] \n\t"                                 \
++         "ldr x5, [%1, #48] \n\t"                                 \
++         "ldr x6, [%1, #56] \n\t"                                 \
++         "ldr x8, [%1] \n\t"  /* target->x8 */                    \
++         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_X8                   \
++         VALGRIND_RESTORE_STACK                                   \
++         "mov %0, x0"                                             \
+          : /*out*/   "=r" (_res)                                  \
+-         : /*in*/    "r" (&_argvec[2])                            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
++         : /*in*/    "0" (&_argvec[0])                            \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "x21"   \
+       );                                                          \
+       lval = (__typeof__(lval)) _res;                             \
+    } while (0)
+ 
+-#define CALL_FN_W_W(lval, orig, arg1)                             \
++#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
++                                 arg7,arg8)                       \
+    do {                                                           \
+       volatile OrigFn        _orig = (orig);                      \
+-      volatile unsigned long _argvec[3+1];                        \
++      volatile unsigned long _argvec[9];                          \
+       volatile unsigned long _res;                                \
+-      /* _argvec[0] holds current r2 across the call */           \
+-      _argvec[1]   = (unsigned long)_orig.r2;                     \
+-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
+-      _argvec[2+1] = (unsigned long)arg1;                         \
++      _argvec[0] = (unsigned long)_orig.nraddr;                   \
++      _argvec[1] = (unsigned long)(arg1);                         \
++      _argvec[2] = (unsigned long)(arg2);                         \
++      _argvec[3] = (unsigned long)(arg3);                         \
++      _argvec[4] = (unsigned long)(arg4);                         \
++      _argvec[5] = (unsigned long)(arg5);                         \
++      _argvec[6] = (unsigned long)(arg6);                         \
++      _argvec[7] = (unsigned long)(arg7);                         \
++      _argvec[8] = (unsigned long)(arg8);                         \
+       __asm__ volatile(                                           \
+-         "mr 11,%1\n\t"                                           \
+-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
+-         "std  2,-16(11)\n\t" /* save tocptr */                   \
+-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
+-         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
+-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
+-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+-         "mr 11,%1\n\t"                                           \
+-         "mr %0,3\n\t"                                            \
+-         "ld 2,-16(11)\n\t" /* restore tocptr */                  \
+-         VG_CONTRACT_FRAME_BY(512)                                \
++         VALGRIND_ALIGN_STACK                                     \
++         "ldr x0, [%1, #8] \n\t"                                  \
++         "ldr x1, [%1, #16] \n\t"                                 \
++         "ldr x2, [%1, #24] \n\t"                                 \
++         "ldr x3, [%1, #32] \n\t"                                 \
++         "ldr x4, [%1, #40] \n\t"                                 \
++         "ldr x5, [%1, #48] \n\t"                                 \
++         "ldr x6, [%1, #56] \n\t"                                 \
++         "ldr x7, [%1, #64] \n\t"                                 \
++         "ldr x8, [%1] \n\t"  /* target->x8 */                    \
++         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_X8                   \
++         VALGRIND_RESTORE_STACK                                   \
++         "mov %0, x0"                                             \
+          : /*out*/   "=r" (_res)                                  \
+-         : /*in*/    "r" (&_argvec[2])                            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
++         : /*in*/    "0" (&_argvec[0])                            \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "x21"   \
+       );                                                          \
+       lval = (__typeof__(lval)) _res;                             \
+    } while (0)
+ 
+-#define CALL_FN_W_WW(lval, orig, arg1,arg2)                       \
++#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
++                                 arg7,arg8,arg9)                  \
+    do {                                                           \
+       volatile OrigFn        _orig = (orig);                      \
+-      volatile unsigned long _argvec[3+2];                        \
++      volatile unsigned long _argvec[10];                         \
+       volatile unsigned long _res;                                \
+-      /* _argvec[0] holds current r2 across the call */           \
+-      _argvec[1]   = (unsigned long)_orig.r2;                     \
+-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
+-      _argvec[2+1] = (unsigned long)arg1;                         \
+-      _argvec[2+2] = (unsigned long)arg2;                         \
++      _argvec[0] = (unsigned long)_orig.nraddr;                   \
++      _argvec[1] = (unsigned long)(arg1);                         \
++      _argvec[2] = (unsigned long)(arg2);                         \
++      _argvec[3] = (unsigned long)(arg3);                         \
++      _argvec[4] = (unsigned long)(arg4);                         \
++      _argvec[5] = (unsigned long)(arg5);                         \
++      _argvec[6] = (unsigned long)(arg6);                         \
++      _argvec[7] = (unsigned long)(arg7);                         \
++      _argvec[8] = (unsigned long)(arg8);                         \
++      _argvec[9] = (unsigned long)(arg9);                         \
+       __asm__ volatile(                                           \
+-         "mr 11,%1\n\t"                                           \
+-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
+-         "std  2,-16(11)\n\t" /* save tocptr */                   \
+-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
+-         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
+-         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
+-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
+-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+-         "mr 11,%1\n\t"                                           \
+-         "mr %0,3\n\t"                                            \
+-         "ld  2,-16(11)\n\t" /* restore tocptr */                 \
+-         VG_CONTRACT_FRAME_BY(512)                                \
++         VALGRIND_ALIGN_STACK                                     \
++         "sub sp, sp, #0x20 \n\t"                                 \
++         "ldr x0, [%1, #8] \n\t"                                  \
++         "ldr x1, [%1, #16] \n\t"                                 \
++         "ldr x2, [%1, #24] \n\t"                                 \
++         "ldr x3, [%1, #32] \n\t"                                 \
++         "ldr x4, [%1, #40] \n\t"                                 \
++         "ldr x5, [%1, #48] \n\t"                                 \
++         "ldr x6, [%1, #56] \n\t"                                 \
++         "ldr x7, [%1, #64] \n\t"                                 \
++         "ldr x8, [%1, #72] \n\t"                                 \
++         "str x8, [sp, #0]  \n\t"                                 \
++         "ldr x8, [%1] \n\t"  /* target->x8 */                    \
++         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_X8                   \
++         VALGRIND_RESTORE_STACK                                   \
++         "mov %0, x0"                                             \
+          : /*out*/   "=r" (_res)                                  \
+-         : /*in*/    "r" (&_argvec[2])                            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
++         : /*in*/    "0" (&_argvec[0])                            \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "x21"   \
+       );                                                          \
+       lval = (__typeof__(lval)) _res;                             \
+    } while (0)
+ 
+-#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3)                 \
++#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
++                                  arg7,arg8,arg9,arg10)           \
+    do {                                                           \
+       volatile OrigFn        _orig = (orig);                      \
+-      volatile unsigned long _argvec[3+3];                        \
++      volatile unsigned long _argvec[11];                         \
+       volatile unsigned long _res;                                \
+-      /* _argvec[0] holds current r2 across the call */           \
+-      _argvec[1]   = (unsigned long)_orig.r2;                     \
+-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
+-      _argvec[2+1] = (unsigned long)arg1;                         \
+-      _argvec[2+2] = (unsigned long)arg2;                         \
+-      _argvec[2+3] = (unsigned long)arg3;                         \
++      _argvec[0] = (unsigned long)_orig.nraddr;                   \
++      _argvec[1] = (unsigned long)(arg1);                         \
++      _argvec[2] = (unsigned long)(arg2);                         \
++      _argvec[3] = (unsigned long)(arg3);                         \
++      _argvec[4] = (unsigned long)(arg4);                         \
++      _argvec[5] = (unsigned long)(arg5);                         \
++      _argvec[6] = (unsigned long)(arg6);                         \
++      _argvec[7] = (unsigned long)(arg7);                         \
++      _argvec[8] = (unsigned long)(arg8);                         \
++      _argvec[9] = (unsigned long)(arg9);                         \
++      _argvec[10] = (unsigned long)(arg10);                       \
+       __asm__ volatile(                                           \
+-         "mr 11,%1\n\t"                                           \
+-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
+-         "std  2,-16(11)\n\t" /* save tocptr */                   \
+-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
+-         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
+-         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
+-         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
+-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
+-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+-         "mr 11,%1\n\t"                                           \
+-         "mr %0,3\n\t"                                            \
+-         "ld  2,-16(11)\n\t" /* restore tocptr */                 \
+-         VG_CONTRACT_FRAME_BY(512)                                \
++         VALGRIND_ALIGN_STACK                                     \
++         "sub sp, sp, #0x20 \n\t"                                 \
++         "ldr x0, [%1, #8] \n\t"                                  \
++         "ldr x1, [%1, #16] \n\t"                                 \
++         "ldr x2, [%1, #24] \n\t"                                 \
++         "ldr x3, [%1, #32] \n\t"                                 \
++         "ldr x4, [%1, #40] \n\t"                                 \
++         "ldr x5, [%1, #48] \n\t"                                 \
++         "ldr x6, [%1, #56] \n\t"                                 \
++         "ldr x7, [%1, #64] \n\t"                                 \
++         "ldr x8, [%1, #72] \n\t"                                 \
++         "str x8, [sp, #0]  \n\t"                                 \
++         "ldr x8, [%1, #80] \n\t"                                 \
++         "str x8, [sp, #8]  \n\t"                                 \
++         "ldr x8, [%1] \n\t"  /* target->x8 */                    \
++         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_X8                   \
++         VALGRIND_RESTORE_STACK                                   \
++         "mov %0, x0"                                             \
+          : /*out*/   "=r" (_res)                                  \
+-         : /*in*/    "r" (&_argvec[2])                            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
++         : /*in*/    "0" (&_argvec[0])                            \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "x21"   \
+       );                                                          \
+       lval = (__typeof__(lval)) _res;                             \
+    } while (0)
+ 
+-#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4)           \
++#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
++                                  arg7,arg8,arg9,arg10,arg11)     \
+    do {                                                           \
+       volatile OrigFn        _orig = (orig);                      \
+-      volatile unsigned long _argvec[3+4];                        \
++      volatile unsigned long _argvec[12];                         \
+       volatile unsigned long _res;                                \
+-      /* _argvec[0] holds current r2 across the call */           \
+-      _argvec[1]   = (unsigned long)_orig.r2;                     \
+-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
+-      _argvec[2+1] = (unsigned long)arg1;                         \
+-      _argvec[2+2] = (unsigned long)arg2;                         \
+-      _argvec[2+3] = (unsigned long)arg3;                         \
+-      _argvec[2+4] = (unsigned long)arg4;                         \
++      _argvec[0] = (unsigned long)_orig.nraddr;                   \
++      _argvec[1] = (unsigned long)(arg1);                         \
++      _argvec[2] = (unsigned long)(arg2);                         \
++      _argvec[3] = (unsigned long)(arg3);                         \
++      _argvec[4] = (unsigned long)(arg4);                         \
++      _argvec[5] = (unsigned long)(arg5);                         \
++      _argvec[6] = (unsigned long)(arg6);                         \
++      _argvec[7] = (unsigned long)(arg7);                         \
++      _argvec[8] = (unsigned long)(arg8);                         \
++      _argvec[9] = (unsigned long)(arg9);                         \
++      _argvec[10] = (unsigned long)(arg10);                       \
++      _argvec[11] = (unsigned long)(arg11);                       \
+       __asm__ volatile(                                           \
+-         "mr 11,%1\n\t"                                           \
+-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
+-         "std  2,-16(11)\n\t" /* save tocptr */                   \
+-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
+-         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
+-         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
+-         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
+-         "ld   6, 32(11)\n\t" /* arg4->r6 */                      \
+-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
+-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+-         "mr 11,%1\n\t"                                           \
+-         "mr %0,3\n\t"                                            \
+-         "ld  2,-16(11)\n\t" /* restore tocptr */                 \
+-         VG_CONTRACT_FRAME_BY(512)                                \
++         VALGRIND_ALIGN_STACK                                     \
++         "sub sp, sp, #0x30 \n\t"                                 \
++         "ldr x0, [%1, #8] \n\t"                                  \
++         "ldr x1, [%1, #16] \n\t"                                 \
++         "ldr x2, [%1, #24] \n\t"                                 \
++         "ldr x3, [%1, #32] \n\t"                                 \
++         "ldr x4, [%1, #40] \n\t"                                 \
++         "ldr x5, [%1, #48] \n\t"                                 \
++         "ldr x6, [%1, #56] \n\t"                                 \
++         "ldr x7, [%1, #64] \n\t"                                 \
++         "ldr x8, [%1, #72] \n\t"                                 \
++         "str x8, [sp, #0]  \n\t"                                 \
++         "ldr x8, [%1, #80] \n\t"                                 \
++         "str x8, [sp, #8]  \n\t"                                 \
++         "ldr x8, [%1, #88] \n\t"                                 \
++         "str x8, [sp, #16] \n\t"                                 \
++         "ldr x8, [%1] \n\t"  /* target->x8 */                    \
++         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_X8                   \
++         VALGRIND_RESTORE_STACK                                   \
++         "mov %0, x0"                                             \
+          : /*out*/   "=r" (_res)                                  \
+-         : /*in*/    "r" (&_argvec[2])                            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
++         : /*in*/    "0" (&_argvec[0])                            \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "x21"   \
+       );                                                          \
+       lval = (__typeof__(lval)) _res;                             \
+    } while (0)
+ 
+-#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5)        \
++#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
++                                  arg7,arg8,arg9,arg10,arg11,     \
++                                  arg12)                          \
+    do {                                                           \
+       volatile OrigFn        _orig = (orig);                      \
+-      volatile unsigned long _argvec[3+5];                        \
++      volatile unsigned long _argvec[13];                         \
+       volatile unsigned long _res;                                \
+-      /* _argvec[0] holds current r2 across the call */           \
+-      _argvec[1]   = (unsigned long)_orig.r2;                     \
+-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
+-      _argvec[2+1] = (unsigned long)arg1;                         \
+-      _argvec[2+2] = (unsigned long)arg2;                         \
+-      _argvec[2+3] = (unsigned long)arg3;                         \
+-      _argvec[2+4] = (unsigned long)arg4;                         \
+-      _argvec[2+5] = (unsigned long)arg5;                         \
++      _argvec[0] = (unsigned long)_orig.nraddr;                   \
++      _argvec[1] = (unsigned long)(arg1);                         \
++      _argvec[2] = (unsigned long)(arg2);                         \
++      _argvec[3] = (unsigned long)(arg3);                         \
++      _argvec[4] = (unsigned long)(arg4);                         \
++      _argvec[5] = (unsigned long)(arg5);                         \
++      _argvec[6] = (unsigned long)(arg6);                         \
++      _argvec[7] = (unsigned long)(arg7);                         \
++      _argvec[8] = (unsigned long)(arg8);                         \
++      _argvec[9] = (unsigned long)(arg9);                         \
++      _argvec[10] = (unsigned long)(arg10);                       \
++      _argvec[11] = (unsigned long)(arg11);                       \
++      _argvec[12] = (unsigned long)(arg12);                       \
+       __asm__ volatile(                                           \
+-         "mr 11,%1\n\t"                                           \
+-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
+-         "std  2,-16(11)\n\t" /* save tocptr */                   \
+-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
+-         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
+-         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
+-         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
+-         "ld   6, 32(11)\n\t" /* arg4->r6 */                      \
+-         "ld   7, 40(11)\n\t" /* arg5->r7 */                      \
+-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
+-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+-         "mr 11,%1\n\t"                                           \
+-         "mr %0,3\n\t"                                            \
+-         "ld  2,-16(11)\n\t" /* restore tocptr */                 \
+-         VG_CONTRACT_FRAME_BY(512)                                \
++         VALGRIND_ALIGN_STACK                                     \
++         "sub sp, sp, #0x30 \n\t"                                 \
++         "ldr x0, [%1, #8] \n\t"                                  \
++         "ldr x1, [%1, #16] \n\t"                                 \
++         "ldr x2, [%1, #24] \n\t"                                 \
++         "ldr x3, [%1, #32] \n\t"                                 \
++         "ldr x4, [%1, #40] \n\t"                                 \
++         "ldr x5, [%1, #48] \n\t"                                 \
++         "ldr x6, [%1, #56] \n\t"                                 \
++         "ldr x7, [%1, #64] \n\t"                                 \
++         "ldr x8, [%1, #72] \n\t"                                 \
++         "str x8, [sp, #0]  \n\t"                                 \
++         "ldr x8, [%1, #80] \n\t"                                 \
++         "str x8, [sp, #8]  \n\t"                                 \
++         "ldr x8, [%1, #88] \n\t"                                 \
++         "str x8, [sp, #16] \n\t"                                 \
++         "ldr x8, [%1, #96] \n\t"                                 \
++         "str x8, [sp, #24] \n\t"                                 \
++         "ldr x8, [%1] \n\t"  /* target->x8 */                    \
++         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_X8                   \
++         VALGRIND_RESTORE_STACK                                   \
++         "mov %0, x0"                                             \
+          : /*out*/   "=r" (_res)                                  \
+-         : /*in*/    "r" (&_argvec[2])                            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
++         : /*in*/    "0" (&_argvec[0])                            \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "x21"   \
+       );                                                          \
+       lval = (__typeof__(lval)) _res;                             \
+    } while (0)
+ 
+-#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6)   \
+-   do {                                                           \
+-      volatile OrigFn        _orig = (orig);                      \
+-      volatile unsigned long _argvec[3+6];                        \
+-      volatile unsigned long _res;                                \
+-      /* _argvec[0] holds current r2 across the call */           \
+-      _argvec[1]   = (unsigned long)_orig.r2;                     \
+-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
+-      _argvec[2+1] = (unsigned long)arg1;                         \
+-      _argvec[2+2] = (unsigned long)arg2;                         \
+-      _argvec[2+3] = (unsigned long)arg3;                         \
+-      _argvec[2+4] = (unsigned long)arg4;                         \
+-      _argvec[2+5] = (unsigned long)arg5;                         \
+-      _argvec[2+6] = (unsigned long)arg6;                         \
+-      __asm__ volatile(                                           \
+-         "mr 11,%1\n\t"                                           \
+-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
+-         "std  2,-16(11)\n\t" /* save tocptr */                   \
+-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
+-         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
+-         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
+-         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
+-         "ld   6, 32(11)\n\t" /* arg4->r6 */                      \
+-         "ld   7, 40(11)\n\t" /* arg5->r7 */                      \
+-         "ld   8, 48(11)\n\t" /* arg6->r8 */                      \
+-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
+-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+-         "mr 11,%1\n\t"                                           \
+-         "mr %0,3\n\t"                                            \
+-         "ld  2,-16(11)\n\t" /* restore tocptr */                 \
+-         VG_CONTRACT_FRAME_BY(512)                                \
+-         : /*out*/   "=r" (_res)                                  \
+-         : /*in*/    "r" (&_argvec[2])                            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+-      );                                                          \
+-      lval = (__typeof__(lval)) _res;                             \
+-   } while (0)
++#endif /* PLAT_arm64_linux */
+ 
+-#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
+-                                 arg7)                            \
+-   do {                                                           \
+-      volatile OrigFn        _orig = (orig);                      \
+-      volatile unsigned long _argvec[3+7];                        \
+-      volatile unsigned long _res;                                \
+-      /* _argvec[0] holds current r2 across the call */           \
+-      _argvec[1]   = (unsigned long)_orig.r2;                     \
+-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
+-      _argvec[2+1] = (unsigned long)arg1;                         \
+-      _argvec[2+2] = (unsigned long)arg2;                         \
+-      _argvec[2+3] = (unsigned long)arg3;                         \
+-      _argvec[2+4] = (unsigned long)arg4;                         \
+-      _argvec[2+5] = (unsigned long)arg5;                         \
+-      _argvec[2+6] = (unsigned long)arg6;                         \
+-      _argvec[2+7] = (unsigned long)arg7;                         \
+-      __asm__ volatile(                                           \
+-         "mr 11,%1\n\t"                                           \
+-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
+-         "std  2,-16(11)\n\t" /* save tocptr */                   \
+-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
+-         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
+-         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
+-         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
+-         "ld   6, 32(11)\n\t" /* arg4->r6 */                      \
+-         "ld   7, 40(11)\n\t" /* arg5->r7 */                      \
+-         "ld   8, 48(11)\n\t" /* arg6->r8 */                      \
+-         "ld   9, 56(11)\n\t" /* arg7->r9 */                      \
+-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
+-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+-         "mr 11,%1\n\t"                                           \
+-         "mr %0,3\n\t"                                            \
+-         "ld  2,-16(11)\n\t" /* restore tocptr */                 \
+-         VG_CONTRACT_FRAME_BY(512)                                \
+-         : /*out*/   "=r" (_res)                                  \
+-         : /*in*/    "r" (&_argvec[2])                            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+-      );                                                          \
+-      lval = (__typeof__(lval)) _res;                             \
+-   } while (0)
+-
+-#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
+-                                 arg7,arg8)                       \
+-   do {                                                           \
+-      volatile OrigFn        _orig = (orig);                      \
+-      volatile unsigned long _argvec[3+8];                        \
+-      volatile unsigned long _res;                                \
+-      /* _argvec[0] holds current r2 across the call */           \
+-      _argvec[1]   = (unsigned long)_orig.r2;                     \
+-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
+-      _argvec[2+1] = (unsigned long)arg1;                         \
+-      _argvec[2+2] = (unsigned long)arg2;                         \
+-      _argvec[2+3] = (unsigned long)arg3;                         \
+-      _argvec[2+4] = (unsigned long)arg4;                         \
+-      _argvec[2+5] = (unsigned long)arg5;                         \
+-      _argvec[2+6] = (unsigned long)arg6;                         \
+-      _argvec[2+7] = (unsigned long)arg7;                         \
+-      _argvec[2+8] = (unsigned long)arg8;                         \
+-      __asm__ volatile(                                           \
+-         "mr 11,%1\n\t"                                           \
+-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
+-         "std  2,-16(11)\n\t" /* save tocptr */                   \
+-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
+-         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
+-         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
+-         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
+-         "ld   6, 32(11)\n\t" /* arg4->r6 */                      \
+-         "ld   7, 40(11)\n\t" /* arg5->r7 */                      \
+-         "ld   8, 48(11)\n\t" /* arg6->r8 */                      \
+-         "ld   9, 56(11)\n\t" /* arg7->r9 */                      \
+-         "ld  10, 64(11)\n\t" /* arg8->r10 */                     \
+-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
+-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+-         "mr 11,%1\n\t"                                           \
+-         "mr %0,3\n\t"                                            \
+-         "ld  2,-16(11)\n\t" /* restore tocptr */                 \
+-         VG_CONTRACT_FRAME_BY(512)                                \
+-         : /*out*/   "=r" (_res)                                  \
+-         : /*in*/    "r" (&_argvec[2])                            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+-      );                                                          \
+-      lval = (__typeof__(lval)) _res;                             \
+-   } while (0)
+-
+-#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
+-                                 arg7,arg8,arg9)                  \
+-   do {                                                           \
+-      volatile OrigFn        _orig = (orig);                      \
+-      volatile unsigned long _argvec[3+9];                        \
+-      volatile unsigned long _res;                                \
+-      /* _argvec[0] holds current r2 across the call */           \
+-      _argvec[1]   = (unsigned long)_orig.r2;                     \
+-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
+-      _argvec[2+1] = (unsigned long)arg1;                         \
+-      _argvec[2+2] = (unsigned long)arg2;                         \
+-      _argvec[2+3] = (unsigned long)arg3;                         \
+-      _argvec[2+4] = (unsigned long)arg4;                         \
+-      _argvec[2+5] = (unsigned long)arg5;                         \
+-      _argvec[2+6] = (unsigned long)arg6;                         \
+-      _argvec[2+7] = (unsigned long)arg7;                         \
+-      _argvec[2+8] = (unsigned long)arg8;                         \
+-      _argvec[2+9] = (unsigned long)arg9;                         \
+-      __asm__ volatile(                                           \
+-         "mr 11,%1\n\t"                                           \
+-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
+-         "std  2,-16(11)\n\t" /* save tocptr */                   \
+-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
+-         VG_EXPAND_FRAME_BY_trashes_r3(128)                       \
+-         /* arg9 */                                               \
+-         "ld  3,72(11)\n\t"                                       \
+-         "std 3,112(1)\n\t"                                       \
+-         /* args1-8 */                                            \
+-         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
+-         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
+-         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
+-         "ld   6, 32(11)\n\t" /* arg4->r6 */                      \
+-         "ld   7, 40(11)\n\t" /* arg5->r7 */                      \
+-         "ld   8, 48(11)\n\t" /* arg6->r8 */                      \
+-         "ld   9, 56(11)\n\t" /* arg7->r9 */                      \
+-         "ld  10, 64(11)\n\t" /* arg8->r10 */                     \
+-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
+-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+-         "mr 11,%1\n\t"                                           \
+-         "mr %0,3\n\t"                                            \
+-         "ld  2,-16(11)\n\t" /* restore tocptr */                 \
+-         VG_CONTRACT_FRAME_BY(128)                                \
+-         VG_CONTRACT_FRAME_BY(512)                                \
+-         : /*out*/   "=r" (_res)                                  \
+-         : /*in*/    "r" (&_argvec[2])                            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+-      );                                                          \
+-      lval = (__typeof__(lval)) _res;                             \
+-   } while (0)
+-
+-#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
+-                                  arg7,arg8,arg9,arg10)           \
+-   do {                                                           \
+-      volatile OrigFn        _orig = (orig);                      \
+-      volatile unsigned long _argvec[3+10];                       \
+-      volatile unsigned long _res;                                \
+-      /* _argvec[0] holds current r2 across the call */           \
+-      _argvec[1]   = (unsigned long)_orig.r2;                     \
+-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
+-      _argvec[2+1] = (unsigned long)arg1;                         \
+-      _argvec[2+2] = (unsigned long)arg2;                         \
+-      _argvec[2+3] = (unsigned long)arg3;                         \
+-      _argvec[2+4] = (unsigned long)arg4;                         \
+-      _argvec[2+5] = (unsigned long)arg5;                         \
+-      _argvec[2+6] = (unsigned long)arg6;                         \
+-      _argvec[2+7] = (unsigned long)arg7;                         \
+-      _argvec[2+8] = (unsigned long)arg8;                         \
+-      _argvec[2+9] = (unsigned long)arg9;                         \
+-      _argvec[2+10] = (unsigned long)arg10;                       \
+-      __asm__ volatile(                                           \
+-         "mr 11,%1\n\t"                                           \
+-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
+-         "std  2,-16(11)\n\t" /* save tocptr */                   \
+-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
+-         VG_EXPAND_FRAME_BY_trashes_r3(128)                       \
+-         /* arg10 */                                              \
+-         "ld  3,80(11)\n\t"                                       \
+-         "std 3,120(1)\n\t"                                       \
+-         /* arg9 */                                               \
+-         "ld  3,72(11)\n\t"                                       \
+-         "std 3,112(1)\n\t"                                       \
+-         /* args1-8 */                                            \
+-         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
+-         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
+-         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
+-         "ld   6, 32(11)\n\t" /* arg4->r6 */                      \
+-         "ld   7, 40(11)\n\t" /* arg5->r7 */                      \
+-         "ld   8, 48(11)\n\t" /* arg6->r8 */                      \
+-         "ld   9, 56(11)\n\t" /* arg7->r9 */                      \
+-         "ld  10, 64(11)\n\t" /* arg8->r10 */                     \
+-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
+-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+-         "mr 11,%1\n\t"                                           \
+-         "mr %0,3\n\t"                                            \
+-         "ld  2,-16(11)\n\t" /* restore tocptr */                 \
+-         VG_CONTRACT_FRAME_BY(128)                                \
+-         VG_CONTRACT_FRAME_BY(512)                                \
+-         : /*out*/   "=r" (_res)                                  \
+-         : /*in*/    "r" (&_argvec[2])                            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+-      );                                                          \
+-      lval = (__typeof__(lval)) _res;                             \
+-   } while (0)
+-
+-#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
+-                                  arg7,arg8,arg9,arg10,arg11)     \
+-   do {                                                           \
+-      volatile OrigFn        _orig = (orig);                      \
+-      volatile unsigned long _argvec[3+11];                       \
+-      volatile unsigned long _res;                                \
+-      /* _argvec[0] holds current r2 across the call */           \
+-      _argvec[1]   = (unsigned long)_orig.r2;                     \
+-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
+-      _argvec[2+1] = (unsigned long)arg1;                         \
+-      _argvec[2+2] = (unsigned long)arg2;                         \
+-      _argvec[2+3] = (unsigned long)arg3;                         \
+-      _argvec[2+4] = (unsigned long)arg4;                         \
+-      _argvec[2+5] = (unsigned long)arg5;                         \
+-      _argvec[2+6] = (unsigned long)arg6;                         \
+-      _argvec[2+7] = (unsigned long)arg7;                         \
+-      _argvec[2+8] = (unsigned long)arg8;                         \
+-      _argvec[2+9] = (unsigned long)arg9;                         \
+-      _argvec[2+10] = (unsigned long)arg10;                       \
+-      _argvec[2+11] = (unsigned long)arg11;                       \
+-      __asm__ volatile(                                           \
+-         "mr 11,%1\n\t"                                           \
+-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
+-         "std  2,-16(11)\n\t" /* save tocptr */                   \
+-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
+-         VG_EXPAND_FRAME_BY_trashes_r3(144)                       \
+-         /* arg11 */                                              \
+-         "ld  3,88(11)\n\t"                                       \
+-         "std 3,128(1)\n\t"                                       \
+-         /* arg10 */                                              \
+-         "ld  3,80(11)\n\t"                                       \
+-         "std 3,120(1)\n\t"                                       \
+-         /* arg9 */                                               \
+-         "ld  3,72(11)\n\t"                                       \
+-         "std 3,112(1)\n\t"                                       \
+-         /* args1-8 */                                            \
+-         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
+-         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
+-         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
+-         "ld   6, 32(11)\n\t" /* arg4->r6 */                      \
+-         "ld   7, 40(11)\n\t" /* arg5->r7 */                      \
+-         "ld   8, 48(11)\n\t" /* arg6->r8 */                      \
+-         "ld   9, 56(11)\n\t" /* arg7->r9 */                      \
+-         "ld  10, 64(11)\n\t" /* arg8->r10 */                     \
+-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
+-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+-         "mr 11,%1\n\t"                                           \
+-         "mr %0,3\n\t"                                            \
+-         "ld  2,-16(11)\n\t" /* restore tocptr */                 \
+-         VG_CONTRACT_FRAME_BY(144)                                \
+-         VG_CONTRACT_FRAME_BY(512)                                \
+-         : /*out*/   "=r" (_res)                                  \
+-         : /*in*/    "r" (&_argvec[2])                            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+-      );                                                          \
+-      lval = (__typeof__(lval)) _res;                             \
+-   } while (0)
+-
+-#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
+-                                arg7,arg8,arg9,arg10,arg11,arg12) \
+-   do {                                                           \
+-      volatile OrigFn        _orig = (orig);                      \
+-      volatile unsigned long _argvec[3+12];                       \
+-      volatile unsigned long _res;                                \
+-      /* _argvec[0] holds current r2 across the call */           \
+-      _argvec[1]   = (unsigned long)_orig.r2;                     \
+-      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
+-      _argvec[2+1] = (unsigned long)arg1;                         \
+-      _argvec[2+2] = (unsigned long)arg2;                         \
+-      _argvec[2+3] = (unsigned long)arg3;                         \
+-      _argvec[2+4] = (unsigned long)arg4;                         \
+-      _argvec[2+5] = (unsigned long)arg5;                         \
+-      _argvec[2+6] = (unsigned long)arg6;                         \
+-      _argvec[2+7] = (unsigned long)arg7;                         \
+-      _argvec[2+8] = (unsigned long)arg8;                         \
+-      _argvec[2+9] = (unsigned long)arg9;                         \
+-      _argvec[2+10] = (unsigned long)arg10;                       \
+-      _argvec[2+11] = (unsigned long)arg11;                       \
+-      _argvec[2+12] = (unsigned long)arg12;                       \
+-      __asm__ volatile(                                           \
+-         "mr 11,%1\n\t"                                           \
+-         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
+-         "std  2,-16(11)\n\t" /* save tocptr */                   \
+-         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
+-         VG_EXPAND_FRAME_BY_trashes_r3(144)                       \
+-         /* arg12 */                                              \
+-         "ld  3,96(11)\n\t"                                       \
+-         "std 3,136(1)\n\t"                                       \
+-         /* arg11 */                                              \
+-         "ld  3,88(11)\n\t"                                       \
+-         "std 3,128(1)\n\t"                                       \
+-         /* arg10 */                                              \
+-         "ld  3,80(11)\n\t"                                       \
+-         "std 3,120(1)\n\t"                                       \
+-         /* arg9 */                                               \
+-         "ld  3,72(11)\n\t"                                       \
+-         "std 3,112(1)\n\t"                                       \
+-         /* args1-8 */                                            \
+-         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
+-         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
+-         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
+-         "ld   6, 32(11)\n\t" /* arg4->r6 */                      \
+-         "ld   7, 40(11)\n\t" /* arg5->r7 */                      \
+-         "ld   8, 48(11)\n\t" /* arg6->r8 */                      \
+-         "ld   9, 56(11)\n\t" /* arg7->r9 */                      \
+-         "ld  10, 64(11)\n\t" /* arg8->r10 */                     \
+-         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
+-         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+-         "mr 11,%1\n\t"                                           \
+-         "mr %0,3\n\t"                                            \
+-         "ld  2,-16(11)\n\t" /* restore tocptr */                 \
+-         VG_CONTRACT_FRAME_BY(144)                                \
+-         VG_CONTRACT_FRAME_BY(512)                                \
+-         : /*out*/   "=r" (_res)                                  \
+-         : /*in*/    "r" (&_argvec[2])                            \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+-      );                                                          \
+-      lval = (__typeof__(lval)) _res;                             \
+-   } while (0)
+-
+-#endif /* PLAT_ppc64_aix5 */
+-
+-/* ------------------------- s390x-linux ------------------------- */
++/* ------------------------- s390x-linux ------------------------- */
+ 
+ #if defined(PLAT_s390x_linux)
+ 
+-/* Similar craziness as x86 (see above), but we use r11 as frame
++/* Similar workaround as amd64 (see above), but we use r11 as frame
+    pointer and save the old r11 in r7. r11 might be used for
+    argvec, therefore we copy argvec in r1 since r1 is clobbered
+    after the call anyway.  */
+@@ -4348,8 +4666,10 @@
+ #  define VALGRIND_CFI_EPILOGUE
+ #endif
+ 
+-
+-
++/* Nb: On s390 the stack pointer is properly aligned *at all times*
++   according to the s390 GCC maintainer. (The ABI specification is not
++   precise in this regard.) Therefore, VALGRIND_ALIGN_STACK and
++   VALGRIND_RESTORE_STACK are not defined here. */
+ 
+ /* These regs are trashed by the hidden call. Note that we overwrite
+    r14 in s390_irgen_noredir (VEX/priv/guest_s390_irgen.c) to give the
+@@ -4358,6 +4678,14 @@
+ #define __CALLER_SAVED_REGS "0","1","2","3","4","5","14", \
+                            "f0","f1","f2","f3","f4","f5","f6","f7"
+ 
++/* Nb: Although r11 is modified in the asm snippets below (inside 
++   VALGRIND_CFI_PROLOGUE) it is not listed in the clobber section, for
++   two reasons:
++   (1) r11 is restored in VALGRIND_CFI_EPILOGUE, so effectively it is not
++       modified
++   (2) GCC will complain that r11 cannot appear inside a clobber section,
++       when compiled with -O -fno-omit-frame-pointer
++ */
+ 
+ #define CALL_FN_W_v(lval, orig)                                  \
+    do {                                                          \
+@@ -4706,98 +5034,1054 @@
+       lval = (__typeof__(lval)) _res;                            \
+    } while (0)
+ 
+-#define CALL_FN_W_11W(lval, orig, arg1, arg2, arg3, arg4, arg5,  \
+-                     arg6, arg7 ,arg8, arg9, arg10, arg11)       \
+-   do {                                                          \
+-      volatile OrigFn        _orig = (orig);                     \
+-      volatile unsigned long _argvec[12];                        \
+-      volatile unsigned long _res;                               \
+-      _argvec[0] = (unsigned long)_orig.nraddr;                  \
+-      _argvec[1] = (unsigned long)arg1;                          \
+-      _argvec[2] = (unsigned long)arg2;                          \
+-      _argvec[3] = (unsigned long)arg3;                          \
+-      _argvec[4] = (unsigned long)arg4;                          \
+-      _argvec[5] = (unsigned long)arg5;                          \
+-      _argvec[6] = (unsigned long)arg6;                          \
+-      _argvec[7] = (unsigned long)arg7;                          \
+-      _argvec[8] = (unsigned long)arg8;                          \
+-      _argvec[9] = (unsigned long)arg9;                          \
+-      _argvec[10] = (unsigned long)arg10;                        \
+-      _argvec[11] = (unsigned long)arg11;                        \
+-      __asm__ volatile(                                          \
+-         VALGRIND_CFI_PROLOGUE                                   \
+-         "aghi 15,-208\n\t"                                      \
+-         "lg 2, 8(1)\n\t"                                        \
+-         "lg 3,16(1)\n\t"                                        \
+-         "lg 4,24(1)\n\t"                                        \
+-         "lg 5,32(1)\n\t"                                        \
+-         "lg 6,40(1)\n\t"                                        \
+-         "mvc 160(8,15), 48(1)\n\t"                              \
+-         "mvc 168(8,15), 56(1)\n\t"                              \
+-         "mvc 176(8,15), 64(1)\n\t"                              \
+-         "mvc 184(8,15), 72(1)\n\t"                              \
+-         "mvc 192(8,15), 80(1)\n\t"                              \
+-         "mvc 200(8,15), 88(1)\n\t"                              \
+-         "lg 1, 0(1)\n\t"                                        \
+-         VALGRIND_CALL_NOREDIR_R1                                \
+-         "lgr %0, 2\n\t"                                         \
+-         "aghi 15,208\n\t"                                       \
+-         VALGRIND_CFI_EPILOGUE                                   \
+-         : /*out*/   "=d" (_res)                                 \
+-         : /*in*/    "a" (&_argvec[0]) __FRAME_POINTER           \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS,"6","7" \
+-      );                                                         \
+-      lval = (__typeof__(lval)) _res;                            \
++#define CALL_FN_W_11W(lval, orig, arg1, arg2, arg3, arg4, arg5,  \
++                     arg6, arg7 ,arg8, arg9, arg10, arg11)       \
++   do {                                                          \
++      volatile OrigFn        _orig = (orig);                     \
++      volatile unsigned long _argvec[12];                        \
++      volatile unsigned long _res;                               \
++      _argvec[0] = (unsigned long)_orig.nraddr;                  \
++      _argvec[1] = (unsigned long)arg1;                          \
++      _argvec[2] = (unsigned long)arg2;                          \
++      _argvec[3] = (unsigned long)arg3;                          \
++      _argvec[4] = (unsigned long)arg4;                          \
++      _argvec[5] = (unsigned long)arg5;                          \
++      _argvec[6] = (unsigned long)arg6;                          \
++      _argvec[7] = (unsigned long)arg7;                          \
++      _argvec[8] = (unsigned long)arg8;                          \
++      _argvec[9] = (unsigned long)arg9;                          \
++      _argvec[10] = (unsigned long)arg10;                        \
++      _argvec[11] = (unsigned long)arg11;                        \
++      __asm__ volatile(                                          \
++         VALGRIND_CFI_PROLOGUE                                   \
++         "aghi 15,-208\n\t"                                      \
++         "lg 2, 8(1)\n\t"                                        \
++         "lg 3,16(1)\n\t"                                        \
++         "lg 4,24(1)\n\t"                                        \
++         "lg 5,32(1)\n\t"                                        \
++         "lg 6,40(1)\n\t"                                        \
++         "mvc 160(8,15), 48(1)\n\t"                              \
++         "mvc 168(8,15), 56(1)\n\t"                              \
++         "mvc 176(8,15), 64(1)\n\t"                              \
++         "mvc 184(8,15), 72(1)\n\t"                              \
++         "mvc 192(8,15), 80(1)\n\t"                              \
++         "mvc 200(8,15), 88(1)\n\t"                              \
++         "lg 1, 0(1)\n\t"                                        \
++         VALGRIND_CALL_NOREDIR_R1                                \
++         "lgr %0, 2\n\t"                                         \
++         "aghi 15,208\n\t"                                       \
++         VALGRIND_CFI_EPILOGUE                                   \
++         : /*out*/   "=d" (_res)                                 \
++         : /*in*/    "a" (&_argvec[0]) __FRAME_POINTER           \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS,"6","7" \
++      );                                                         \
++      lval = (__typeof__(lval)) _res;                            \
++   } while (0)
++
++#define CALL_FN_W_12W(lval, orig, arg1, arg2, arg3, arg4, arg5,  \
++                     arg6, arg7 ,arg8, arg9, arg10, arg11, arg12)\
++   do {                                                          \
++      volatile OrigFn        _orig = (orig);                     \
++      volatile unsigned long _argvec[13];                        \
++      volatile unsigned long _res;                               \
++      _argvec[0] = (unsigned long)_orig.nraddr;                  \
++      _argvec[1] = (unsigned long)arg1;                          \
++      _argvec[2] = (unsigned long)arg2;                          \
++      _argvec[3] = (unsigned long)arg3;                          \
++      _argvec[4] = (unsigned long)arg4;                          \
++      _argvec[5] = (unsigned long)arg5;                          \
++      _argvec[6] = (unsigned long)arg6;                          \
++      _argvec[7] = (unsigned long)arg7;                          \
++      _argvec[8] = (unsigned long)arg8;                          \
++      _argvec[9] = (unsigned long)arg9;                          \
++      _argvec[10] = (unsigned long)arg10;                        \
++      _argvec[11] = (unsigned long)arg11;                        \
++      _argvec[12] = (unsigned long)arg12;                        \
++      __asm__ volatile(                                          \
++         VALGRIND_CFI_PROLOGUE                                   \
++         "aghi 15,-216\n\t"                                      \
++         "lg 2, 8(1)\n\t"                                        \
++         "lg 3,16(1)\n\t"                                        \
++         "lg 4,24(1)\n\t"                                        \
++         "lg 5,32(1)\n\t"                                        \
++         "lg 6,40(1)\n\t"                                        \
++         "mvc 160(8,15), 48(1)\n\t"                              \
++         "mvc 168(8,15), 56(1)\n\t"                              \
++         "mvc 176(8,15), 64(1)\n\t"                              \
++         "mvc 184(8,15), 72(1)\n\t"                              \
++         "mvc 192(8,15), 80(1)\n\t"                              \
++         "mvc 200(8,15), 88(1)\n\t"                              \
++         "mvc 208(8,15), 96(1)\n\t"                              \
++         "lg 1, 0(1)\n\t"                                        \
++         VALGRIND_CALL_NOREDIR_R1                                \
++         "lgr %0, 2\n\t"                                         \
++         "aghi 15,216\n\t"                                       \
++         VALGRIND_CFI_EPILOGUE                                   \
++         : /*out*/   "=d" (_res)                                 \
++         : /*in*/    "a" (&_argvec[0]) __FRAME_POINTER           \
++         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS,"6","7" \
++      );                                                         \
++      lval = (__typeof__(lval)) _res;                            \
++   } while (0)
++
++
++#endif /* PLAT_s390x_linux */
++
++/* ------------------------- mips32-linux ----------------------- */
++ 
++#if defined(PLAT_mips32_linux)
++
++/* These regs are trashed by the hidden call. */
++#define __CALLER_SAVED_REGS "$2", "$3", "$4", "$5", "$6",       \
++"$7", "$8", "$9", "$10", "$11", "$12", "$13", "$14", "$15", "$24", \
++"$25", "$31"
++
++/* These CALL_FN_ macros assume that on mips-linux, sizeof(unsigned
++   long) == 4. */
++
++#define CALL_FN_W_v(lval, orig)                                   \
++   do {                                                           \
++      volatile OrigFn        _orig = (orig);                      \
++      volatile unsigned long _argvec[1];                          \
++      volatile unsigned long _res;                                \
++      _argvec[0] = (unsigned long)_orig.nraddr;                   \
++      __asm__ volatile(                                           \
++         "subu $29, $29, 8 \n\t"                                  \
++         "sw $28, 0($29) \n\t"                                    \
++         "sw $31, 4($29) \n\t"                                    \
++         "subu $29, $29, 16 \n\t"                                 \
++         "lw $25, 0(%1) \n\t"  /* target->t9 */                   \
++         VALGRIND_CALL_NOREDIR_T9                                 \
++         "addu $29, $29, 16\n\t"                                  \
++         "lw $28, 0($29) \n\t"                                    \
++         "lw $31, 4($29) \n\t"                                    \
++         "addu $29, $29, 8 \n\t"                                  \
++         "move %0, $2\n"                                          \
++         : /*out*/   "=r" (_res)                                  \
++         : /*in*/    "0" (&_argvec[0])                            \
++         : /*trash*/ "memory", __CALLER_SAVED_REGS                \
++      );                                                          \
++      lval = (__typeof__(lval)) _res;                             \
++   } while (0)
++
++#define CALL_FN_W_W(lval, orig, arg1)                             \
++   do {                                                           \
++      volatile OrigFn        _orig = (orig);                      \
++     volatile unsigned long _argvec[2];                           \
++      volatile unsigned long _res;                                \
++      _argvec[0] = (unsigned long)_orig.nraddr;                   \
++      _argvec[1] = (unsigned long)(arg1);                         \
++      __asm__ volatile(                                           \
++         "subu $29, $29, 8 \n\t"                                  \
++         "sw $28, 0($29) \n\t"                                    \
++         "sw $31, 4($29) \n\t"                                    \
++         "subu $29, $29, 16 \n\t"                                 \
++         "lw $4, 4(%1) \n\t"   /* arg1*/                          \
++         "lw $25, 0(%1) \n\t"  /* target->t9 */                   \
++         VALGRIND_CALL_NOREDIR_T9                                 \
++         "addu $29, $29, 16 \n\t"                                 \
++         "lw $28, 0($29) \n\t"                                    \
++         "lw $31, 4($29) \n\t"                                    \
++         "addu $29, $29, 8 \n\t"                                  \
++         "move %0, $2\n"                                          \
++         : /*out*/   "=r" (_res)                                  \
++         : /*in*/    "0" (&_argvec[0])                            \
++         : /*trash*/ "memory",  __CALLER_SAVED_REGS               \
++      );                                                          \
++      lval = (__typeof__(lval)) _res;                             \
++   } while (0)
++
++#define CALL_FN_W_WW(lval, orig, arg1,arg2)                       \
++   do {                                                           \
++      volatile OrigFn        _orig = (orig);                      \
++      volatile unsigned long _argvec[3];                          \
++      volatile unsigned long _res;                                \
++      _argvec[0] = (unsigned long)_orig.nraddr;                   \
++      _argvec[1] = (unsigned long)(arg1);                         \
++      _argvec[2] = (unsigned long)(arg2);                         \
++      __asm__ volatile(                                           \
++         "subu $29, $29, 8 \n\t"                                  \
++         "sw $28, 0($29) \n\t"                                    \
++         "sw $31, 4($29) \n\t"                                    \
++         "subu $29, $29, 16 \n\t"                                 \
++         "lw $4, 4(%1) \n\t"                                      \
++         "lw $5, 8(%1) \n\t"                                      \
++         "lw $25, 0(%1) \n\t"  /* target->t9 */                   \
++         VALGRIND_CALL_NOREDIR_T9                                 \
++         "addu $29, $29, 16 \n\t"                                 \
++         "lw $28, 0($29) \n\t"                                    \
++         "lw $31, 4($29) \n\t"                                    \
++         "addu $29, $29, 8 \n\t"                                  \
++         "move %0, $2\n"                                          \
++         : /*out*/   "=r" (_res)                                  \
++         : /*in*/    "0" (&_argvec[0])                            \
++         : /*trash*/ "memory", __CALLER_SAVED_REGS                \
++      );                                                          \
++      lval = (__typeof__(lval)) _res;                             \
++   } while (0)
++
++#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3)                 \
++   do {                                                           \
++      volatile OrigFn        _orig = (orig);                      \
++      volatile unsigned long _argvec[4];                          \
++      volatile unsigned long _res;                                \
++      _argvec[0] = (unsigned long)_orig.nraddr;                   \
++      _argvec[1] = (unsigned long)(arg1);                         \
++      _argvec[2] = (unsigned long)(arg2);                         \
++      _argvec[3] = (unsigned long)(arg3);                         \
++      __asm__ volatile(                                           \
++         "subu $29, $29, 8 \n\t"                                  \
++         "sw $28, 0($29) \n\t"                                    \
++         "sw $31, 4($29) \n\t"                                    \
++         "subu $29, $29, 16 \n\t"                                 \
++         "lw $4, 4(%1) \n\t"                                      \
++         "lw $5, 8(%1) \n\t"                                      \
++         "lw $6, 12(%1) \n\t"                                     \
++         "lw $25, 0(%1) \n\t"  /* target->t9 */                   \
++         VALGRIND_CALL_NOREDIR_T9                                 \
++         "addu $29, $29, 16 \n\t"                                 \
++         "lw $28, 0($29) \n\t"                                    \
++         "lw $31, 4($29) \n\t"                                    \
++         "addu $29, $29, 8 \n\t"                                  \
++         "move %0, $2\n"                                          \
++         : /*out*/   "=r" (_res)                                  \
++         : /*in*/    "0" (&_argvec[0])                            \
++         : /*trash*/ "memory", __CALLER_SAVED_REGS                \
++      );                                                          \
++      lval = (__typeof__(lval)) _res;                             \
++   } while (0)
++
++#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4)           \
++   do {                                                           \
++      volatile OrigFn        _orig = (orig);                      \
++      volatile unsigned long _argvec[5];                          \
++      volatile unsigned long _res;                                \
++      _argvec[0] = (unsigned long)_orig.nraddr;                   \
++      _argvec[1] = (unsigned long)(arg1);                         \
++      _argvec[2] = (unsigned long)(arg2);                         \
++      _argvec[3] = (unsigned long)(arg3);                         \
++      _argvec[4] = (unsigned long)(arg4);                         \
++      __asm__ volatile(                                           \
++         "subu $29, $29, 8 \n\t"                                  \
++         "sw $28, 0($29) \n\t"                                    \
++         "sw $31, 4($29) \n\t"                                    \
++         "subu $29, $29, 16 \n\t"                                 \
++         "lw $4, 4(%1) \n\t"                                      \
++         "lw $5, 8(%1) \n\t"                                      \
++         "lw $6, 12(%1) \n\t"                                     \
++         "lw $7, 16(%1) \n\t"                                     \
++         "lw $25, 0(%1) \n\t"  /* target->t9 */                   \
++         VALGRIND_CALL_NOREDIR_T9                                 \
++         "addu $29, $29, 16 \n\t"                                 \
++         "lw $28, 0($29) \n\t"                                    \
++         "lw $31, 4($29) \n\t"                                    \
++         "addu $29, $29, 8 \n\t"                                  \
++         "move %0, $2\n"                                          \
++         : /*out*/   "=r" (_res)                                  \
++         : /*in*/    "0" (&_argvec[0])                            \
++         : /*trash*/ "memory", __CALLER_SAVED_REGS                \
++      );                                                          \
++      lval = (__typeof__(lval)) _res;                             \
++   } while (0)
++
++#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5)        \
++   do {                                                           \
++      volatile OrigFn        _orig = (orig);                      \
++      volatile unsigned long _argvec[6];                          \
++      volatile unsigned long _res;                                \
++      _argvec[0] = (unsigned long)_orig.nraddr;                   \
++      _argvec[1] = (unsigned long)(arg1);                         \
++      _argvec[2] = (unsigned long)(arg2);                         \
++      _argvec[3] = (unsigned long)(arg3);                         \
++      _argvec[4] = (unsigned long)(arg4);                         \
++      _argvec[5] = (unsigned long)(arg5);                         \
++      __asm__ volatile(                                           \
++         "subu $29, $29, 8 \n\t"                                  \
++         "sw $28, 0($29) \n\t"                                    \
++         "sw $31, 4($29) \n\t"                                    \
++         "lw $4, 20(%1) \n\t"                                     \
++         "subu $29, $29, 24\n\t"                                  \
++         "sw $4, 16($29) \n\t"                                    \
++         "lw $4, 4(%1) \n\t"                                      \
++         "lw $5, 8(%1) \n\t"                                      \
++         "lw $6, 12(%1) \n\t"                                     \
++         "lw $7, 16(%1) \n\t"                                     \
++         "lw $25, 0(%1) \n\t"  /* target->t9 */                   \
++         VALGRIND_CALL_NOREDIR_T9                                 \
++         "addu $29, $29, 24 \n\t"                                 \
++         "lw $28, 0($29) \n\t"                                    \
++         "lw $31, 4($29) \n\t"                                    \
++         "addu $29, $29, 8 \n\t"                                  \
++         "move %0, $2\n"                                          \
++         : /*out*/   "=r" (_res)                                  \
++         : /*in*/    "0" (&_argvec[0])                            \
++         : /*trash*/ "memory", __CALLER_SAVED_REGS                \
++      );                                                          \
++      lval = (__typeof__(lval)) _res;                             \
++   } while (0)
++#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6)   \
++   do {                                                           \
++      volatile OrigFn        _orig = (orig);                      \
++      volatile unsigned long _argvec[7];                          \
++      volatile unsigned long _res;                                \
++      _argvec[0] = (unsigned long)_orig.nraddr;                   \
++      _argvec[1] = (unsigned long)(arg1);                         \
++      _argvec[2] = (unsigned long)(arg2);                         \
++      _argvec[3] = (unsigned long)(arg3);                         \
++      _argvec[4] = (unsigned long)(arg4);                         \
++      _argvec[5] = (unsigned long)(arg5);                         \
++      _argvec[6] = (unsigned long)(arg6);                         \
++      __asm__ volatile(                                           \
++         "subu $29, $29, 8 \n\t"                                  \
++         "sw $28, 0($29) \n\t"                                    \
++         "sw $31, 4($29) \n\t"                                    \
++         "lw $4, 20(%1) \n\t"                                     \
++         "subu $29, $29, 32\n\t"                                  \
++         "sw $4, 16($29) \n\t"                                    \
++         "lw $4, 24(%1) \n\t"                                     \
++         "nop\n\t"                                                \
++         "sw $4, 20($29) \n\t"                                    \
++         "lw $4, 4(%1) \n\t"                                      \
++         "lw $5, 8(%1) \n\t"                                      \
++         "lw $6, 12(%1) \n\t"                                     \
++         "lw $7, 16(%1) \n\t"                                     \
++         "lw $25, 0(%1) \n\t"  /* target->t9 */                   \
++         VALGRIND_CALL_NOREDIR_T9                                 \
++         "addu $29, $29, 32 \n\t"                                 \
++         "lw $28, 0($29) \n\t"                                    \
++         "lw $31, 4($29) \n\t"                                    \
++         "addu $29, $29, 8 \n\t"                                  \
++         "move %0, $2\n"                                          \
++         : /*out*/   "=r" (_res)                                  \
++         : /*in*/    "0" (&_argvec[0])                            \
++         : /*trash*/ "memory", __CALLER_SAVED_REGS                \
++      );                                                          \
++      lval = (__typeof__(lval)) _res;                             \
++   } while (0)
++
++#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
++                                 arg7)                            \
++   do {                                                           \
++      volatile OrigFn        _orig = (orig);                      \
++      volatile unsigned long _argvec[8];                          \
++      volatile unsigned long _res;                                \
++      _argvec[0] = (unsigned long)_orig.nraddr;                   \
++      _argvec[1] = (unsigned long)(arg1);                         \
++      _argvec[2] = (unsigned long)(arg2);                         \
++      _argvec[3] = (unsigned long)(arg3);                         \
++      _argvec[4] = (unsigned long)(arg4);                         \
++      _argvec[5] = (unsigned long)(arg5);                         \
++      _argvec[6] = (unsigned long)(arg6);                         \
++      _argvec[7] = (unsigned long)(arg7);                         \
++      __asm__ volatile(                                           \
++         "subu $29, $29, 8 \n\t"                                  \
++         "sw $28, 0($29) \n\t"                                    \
++         "sw $31, 4($29) \n\t"                                    \
++         "lw $4, 20(%1) \n\t"                                     \
++         "subu $29, $29, 32\n\t"                                  \
++         "sw $4, 16($29) \n\t"                                    \
++         "lw $4, 24(%1) \n\t"                                     \
++         "sw $4, 20($29) \n\t"                                    \
++         "lw $4, 28(%1) \n\t"                                     \
++         "sw $4, 24($29) \n\t"                                    \
++         "lw $4, 4(%1) \n\t"                                      \
++         "lw $5, 8(%1) \n\t"                                      \
++         "lw $6, 12(%1) \n\t"                                     \
++         "lw $7, 16(%1) \n\t"                                     \
++         "lw $25, 0(%1) \n\t"  /* target->t9 */                   \
++         VALGRIND_CALL_NOREDIR_T9                                 \
++         "addu $29, $29, 32 \n\t"                                 \
++         "lw $28, 0($29) \n\t"                                    \
++         "lw $31, 4($29) \n\t"                                    \
++         "addu $29, $29, 8 \n\t"                                  \
++         "move %0, $2\n"                                          \
++         : /*out*/   "=r" (_res)                                  \
++         : /*in*/    "0" (&_argvec[0])                            \
++         : /*trash*/ "memory", __CALLER_SAVED_REGS                \
++      );                                                          \
++      lval = (__typeof__(lval)) _res;                             \
++   } while (0)
++
++#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
++                                 arg7,arg8)                       \
++   do {                                                           \
++      volatile OrigFn        _orig = (orig);                      \
++      volatile unsigned long _argvec[9];                          \
++      volatile unsigned long _res;                                \
++      _argvec[0] = (unsigned long)_orig.nraddr;                   \
++      _argvec[1] = (unsigned long)(arg1);                         \
++      _argvec[2] = (unsigned long)(arg2);                         \
++      _argvec[3] = (unsigned long)(arg3);                         \
++      _argvec[4] = (unsigned long)(arg4);                         \
++      _argvec[5] = (unsigned long)(arg5);                         \
++      _argvec[6] = (unsigned long)(arg6);                         \
++      _argvec[7] = (unsigned long)(arg7);                         \
++      _argvec[8] = (unsigned long)(arg8);                         \
++      __asm__ volatile(                                           \
++         "subu $29, $29, 8 \n\t"                                  \
++         "sw $28, 0($29) \n\t"                                    \
++         "sw $31, 4($29) \n\t"                                    \
++         "lw $4, 20(%1) \n\t"                                     \
++         "subu $29, $29, 40\n\t"                                  \
++         "sw $4, 16($29) \n\t"                                    \
++         "lw $4, 24(%1) \n\t"                                     \
++         "sw $4, 20($29) \n\t"                                    \
++         "lw $4, 28(%1) \n\t"                                     \
++         "sw $4, 24($29) \n\t"                                    \
++         "lw $4, 32(%1) \n\t"                                     \
++         "sw $4, 28($29) \n\t"                                    \
++         "lw $4, 4(%1) \n\t"                                      \
++         "lw $5, 8(%1) \n\t"                                      \
++         "lw $6, 12(%1) \n\t"                                     \
++         "lw $7, 16(%1) \n\t"                                     \
++         "lw $25, 0(%1) \n\t"  /* target->t9 */                   \
++         VALGRIND_CALL_NOREDIR_T9                                 \
++         "addu $29, $29, 40 \n\t"                                 \
++         "lw $28, 0($29) \n\t"                                    \
++         "lw $31, 4($29) \n\t"                                    \
++         "addu $29, $29, 8 \n\t"                                  \
++         "move %0, $2\n"                                          \
++         : /*out*/   "=r" (_res)                                  \
++         : /*in*/    "0" (&_argvec[0])                            \
++         : /*trash*/ "memory", __CALLER_SAVED_REGS                \
++      );                                                          \
++      lval = (__typeof__(lval)) _res;                             \
++   } while (0)
++
++#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
++                                 arg7,arg8,arg9)                  \
++   do {                                                           \
++      volatile OrigFn        _orig = (orig);                      \
++      volatile unsigned long _argvec[10];                         \
++      volatile unsigned long _res;                                \
++      _argvec[0] = (unsigned long)_orig.nraddr;                   \
++      _argvec[1] = (unsigned long)(arg1);                         \
++      _argvec[2] = (unsigned long)(arg2);                         \
++      _argvec[3] = (unsigned long)(arg3);                         \
++      _argvec[4] = (unsigned long)(arg4);                         \
++      _argvec[5] = (unsigned long)(arg5);                         \
++      _argvec[6] = (unsigned long)(arg6);                         \
++      _argvec[7] = (unsigned long)(arg7);                         \
++      _argvec[8] = (unsigned long)(arg8);                         \
++      _argvec[9] = (unsigned long)(arg9);                         \
++      __asm__ volatile(                                           \
++         "subu $29, $29, 8 \n\t"                                  \
++         "sw $28, 0($29) \n\t"                                    \
++         "sw $31, 4($29) \n\t"                                    \
++         "lw $4, 20(%1) \n\t"                                     \
++         "subu $29, $29, 40\n\t"                                  \
++         "sw $4, 16($29) \n\t"                                    \
++         "lw $4, 24(%1) \n\t"                                     \
++         "sw $4, 20($29) \n\t"                                    \
++         "lw $4, 28(%1) \n\t"                                     \
++         "sw $4, 24($29) \n\t"                                    \
++         "lw $4, 32(%1) \n\t"                                     \
++         "sw $4, 28($29) \n\t"                                    \
++         "lw $4, 36(%1) \n\t"                                     \
++         "sw $4, 32($29) \n\t"                                    \
++         "lw $4, 4(%1) \n\t"                                      \
++         "lw $5, 8(%1) \n\t"                                      \
++         "lw $6, 12(%1) \n\t"                                     \
++         "lw $7, 16(%1) \n\t"                                     \
++         "lw $25, 0(%1) \n\t"  /* target->t9 */                   \
++         VALGRIND_CALL_NOREDIR_T9                                 \
++         "addu $29, $29, 40 \n\t"                                 \
++         "lw $28, 0($29) \n\t"                                    \
++         "lw $31, 4($29) \n\t"                                    \
++         "addu $29, $29, 8 \n\t"                                  \
++         "move %0, $2\n"                                          \
++         : /*out*/   "=r" (_res)                                  \
++         : /*in*/    "0" (&_argvec[0])                            \
++         : /*trash*/ "memory", __CALLER_SAVED_REGS                \
++      );                                                          \
++      lval = (__typeof__(lval)) _res;                             \
++   } while (0)
++
++#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
++                                  arg7,arg8,arg9,arg10)           \
++   do {                                                           \
++      volatile OrigFn        _orig = (orig);                      \
++      volatile unsigned long _argvec[11];                         \
++      volatile unsigned long _res;                                \
++      _argvec[0] = (unsigned long)_orig.nraddr;                   \
++      _argvec[1] = (unsigned long)(arg1);                         \
++      _argvec[2] = (unsigned long)(arg2);                         \
++      _argvec[3] = (unsigned long)(arg3);                         \
++      _argvec[4] = (unsigned long)(arg4);                         \
++      _argvec[5] = (unsigned long)(arg5);                         \
++      _argvec[6] = (unsigned long)(arg6);                         \
++      _argvec[7] = (unsigned long)(arg7);                         \
++      _argvec[8] = (unsigned long)(arg8);                         \
++      _argvec[9] = (unsigned long)(arg9);                         \
++      _argvec[10] = (unsigned long)(arg10);                       \
++      __asm__ volatile(                                           \
++         "subu $29, $29, 8 \n\t"                                  \
++         "sw $28, 0($29) \n\t"                                    \
++         "sw $31, 4($29) \n\t"                                    \
++         "lw $4, 20(%1) \n\t"                                     \
++         "subu $29, $29, 48\n\t"                                  \
++         "sw $4, 16($29) \n\t"                                    \
++         "lw $4, 24(%1) \n\t"                                     \
++         "sw $4, 20($29) \n\t"                                    \
++         "lw $4, 28(%1) \n\t"                                     \
++         "sw $4, 24($29) \n\t"                                    \
++         "lw $4, 32(%1) \n\t"                                     \
++         "sw $4, 28($29) \n\t"                                    \
++         "lw $4, 36(%1) \n\t"                                     \
++         "sw $4, 32($29) \n\t"                                    \
++         "lw $4, 40(%1) \n\t"                                     \
++         "sw $4, 36($29) \n\t"                                    \
++         "lw $4, 4(%1) \n\t"                                      \
++         "lw $5, 8(%1) \n\t"                                      \
++         "lw $6, 12(%1) \n\t"                                     \
++         "lw $7, 16(%1) \n\t"                                     \
++         "lw $25, 0(%1) \n\t"  /* target->t9 */                   \
++         VALGRIND_CALL_NOREDIR_T9                                 \
++         "addu $29, $29, 48 \n\t"                                 \
++         "lw $28, 0($29) \n\t"                                    \
++         "lw $31, 4($29) \n\t"                                    \
++         "addu $29, $29, 8 \n\t"                                  \
++         "move %0, $2\n"                                          \
++         : /*out*/   "=r" (_res)                                  \
++         : /*in*/    "0" (&_argvec[0])                            \
++         : /*trash*/ "memory", __CALLER_SAVED_REGS                \
++      );                                                          \
++      lval = (__typeof__(lval)) _res;                             \
++   } while (0)
++
++#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,       \
++                                  arg6,arg7,arg8,arg9,arg10,      \
++                                  arg11)                          \
++   do {                                                           \
++      volatile OrigFn        _orig = (orig);                      \
++      volatile unsigned long _argvec[12];                         \
++      volatile unsigned long _res;                                \
++      _argvec[0] = (unsigned long)_orig.nraddr;                   \
++      _argvec[1] = (unsigned long)(arg1);                         \
++      _argvec[2] = (unsigned long)(arg2);                         \
++      _argvec[3] = (unsigned long)(arg3);                         \
++      _argvec[4] = (unsigned long)(arg4);                         \
++      _argvec[5] = (unsigned long)(arg5);                         \
++      _argvec[6] = (unsigned long)(arg6);                         \
++      _argvec[7] = (unsigned long)(arg7);                         \
++      _argvec[8] = (unsigned long)(arg8);                         \
++      _argvec[9] = (unsigned long)(arg9);                         \
++      _argvec[10] = (unsigned long)(arg10);                       \
++      _argvec[11] = (unsigned long)(arg11);                       \
++      __asm__ volatile(                                           \
++         "subu $29, $29, 8 \n\t"                                  \
++         "sw $28, 0($29) \n\t"                                    \
++         "sw $31, 4($29) \n\t"                                    \
++         "lw $4, 20(%1) \n\t"                                     \
++         "subu $29, $29, 48\n\t"                                  \
++         "sw $4, 16($29) \n\t"                                    \
++         "lw $4, 24(%1) \n\t"                                     \
++         "sw $4, 20($29) \n\t"                                    \
++         "lw $4, 28(%1) \n\t"                                     \
++         "sw $4, 24($29) \n\t"                                    \
++         "lw $4, 32(%1) \n\t"                                     \
++         "sw $4, 28($29) \n\t"                                    \
++         "lw $4, 36(%1) \n\t"                                     \
++         "sw $4, 32($29) \n\t"                                    \
++         "lw $4, 40(%1) \n\t"                                     \
++         "sw $4, 36($29) \n\t"                                    \
++         "lw $4, 44(%1) \n\t"                                     \
++         "sw $4, 40($29) \n\t"                                    \
++         "lw $4, 4(%1) \n\t"                                      \
++         "lw $5, 8(%1) \n\t"                                      \
++         "lw $6, 12(%1) \n\t"                                     \
++         "lw $7, 16(%1) \n\t"                                     \
++         "lw $25, 0(%1) \n\t"  /* target->t9 */                   \
++         VALGRIND_CALL_NOREDIR_T9                                 \
++         "addu $29, $29, 48 \n\t"                                 \
++         "lw $28, 0($29) \n\t"                                    \
++         "lw $31, 4($29) \n\t"                                    \
++         "addu $29, $29, 8 \n\t"                                  \
++         "move %0, $2\n"                                          \
++         : /*out*/   "=r" (_res)                                  \
++         : /*in*/    "0" (&_argvec[0])                            \
++         : /*trash*/ "memory", __CALLER_SAVED_REGS                \
++      );                                                          \
++      lval = (__typeof__(lval)) _res;                             \
++   } while (0)
++
++#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,       \
++                                  arg6,arg7,arg8,arg9,arg10,      \
++                                  arg11,arg12)                    \
++   do {                                                           \
++      volatile OrigFn        _orig = (orig);                      \
++      volatile unsigned long _argvec[13];                         \
++      volatile unsigned long _res;                                \
++      _argvec[0] = (unsigned long)_orig.nraddr;                   \
++      _argvec[1] = (unsigned long)(arg1);                         \
++      _argvec[2] = (unsigned long)(arg2);                         \
++      _argvec[3] = (unsigned long)(arg3);                         \
++      _argvec[4] = (unsigned long)(arg4);                         \
++      _argvec[5] = (unsigned long)(arg5);                         \
++      _argvec[6] = (unsigned long)(arg6);                         \
++      _argvec[7] = (unsigned long)(arg7);                         \
++      _argvec[8] = (unsigned long)(arg8);                         \
++      _argvec[9] = (unsigned long)(arg9);                         \
++      _argvec[10] = (unsigned long)(arg10);                       \
++      _argvec[11] = (unsigned long)(arg11);                       \
++      _argvec[12] = (unsigned long)(arg12);                       \
++      __asm__ volatile(                                           \
++         "subu $29, $29, 8 \n\t"                                  \
++         "sw $28, 0($29) \n\t"                                    \
++         "sw $31, 4($29) \n\t"                                    \
++         "lw $4, 20(%1) \n\t"                                     \
++         "subu $29, $29, 56\n\t"                                  \
++         "sw $4, 16($29) \n\t"                                    \
++         "lw $4, 24(%1) \n\t"                                     \
++         "sw $4, 20($29) \n\t"                                    \
++         "lw $4, 28(%1) \n\t"                                     \
++         "sw $4, 24($29) \n\t"                                    \
++         "lw $4, 32(%1) \n\t"                                     \
++         "sw $4, 28($29) \n\t"                                    \
++         "lw $4, 36(%1) \n\t"                                     \
++         "sw $4, 32($29) \n\t"                                    \
++         "lw $4, 40(%1) \n\t"                                     \
++         "sw $4, 36($29) \n\t"                                    \
++         "lw $4, 44(%1) \n\t"                                     \
++         "sw $4, 40($29) \n\t"                                    \
++         "lw $4, 48(%1) \n\t"                                     \
++         "sw $4, 44($29) \n\t"                                    \
++         "lw $4, 4(%1) \n\t"                                      \
++         "lw $5, 8(%1) \n\t"                                      \
++         "lw $6, 12(%1) \n\t"                                     \
++         "lw $7, 16(%1) \n\t"                                     \
++         "lw $25, 0(%1) \n\t"  /* target->t9 */                   \
++         VALGRIND_CALL_NOREDIR_T9                                 \
++         "addu $29, $29, 56 \n\t"                                 \
++         "lw $28, 0($29) \n\t"                                    \
++         "lw $31, 4($29) \n\t"                                    \
++         "addu $29, $29, 8 \n\t"                                  \
++         "move %0, $2\n"                                          \
++         : /*out*/   "=r" (_res)                                  \
++         : /*in*/    "r" (&_argvec[0])                            \
++         : /*trash*/ "memory", __CALLER_SAVED_REGS                \
++      );                                                          \
++      lval = (__typeof__(lval)) _res;                             \
++   } while (0)
++
++#endif /* PLAT_mips32_linux */
++
++/* ------------------------- mips64-linux ------------------------- */
++
++#if defined(PLAT_mips64_linux)
++
++/* These regs are trashed by the hidden call. */
++#define __CALLER_SAVED_REGS "$2", "$3", "$4", "$5", "$6",       \
++"$7", "$8", "$9", "$10", "$11", "$12", "$13", "$14", "$15", "$24", \
++"$25", "$31"
++
++/* These CALL_FN_ macros assume that on mips-linux, sizeof(unsigned
++   long) == 4. */
++
++#define CALL_FN_W_v(lval, orig)                                   \
++   do {                                                           \
++      volatile OrigFn        _orig = (orig);                      \
++      volatile unsigned long _argvec[1];                          \
++      volatile unsigned long _res;                                \
++      _argvec[0] = (unsigned long)_orig.nraddr;                   \
++      __asm__ volatile(                                           \
++         "ld $25, 0(%1)\n\t"  /* target->t9 */                    \
++         VALGRIND_CALL_NOREDIR_T9                                 \
++         "move %0, $2\n"                                          \
++         : /*out*/   "=r" (_res)                                  \
++         : /*in*/    "0" (&_argvec[0])                            \
++         : /*trash*/ "memory", __CALLER_SAVED_REGS                \
++      );                                                          \
++      lval = (__typeof__(lval)) _res;                             \
++   } while (0)
++
++#define CALL_FN_W_W(lval, orig, arg1)                             \
++   do {                                                           \
++      volatile OrigFn        _orig = (orig);                      \
++      volatile unsigned long _argvec[2];                          \
++      volatile unsigned long _res;                                \
++      _argvec[0] = (unsigned long)_orig.nraddr;                   \
++      _argvec[1] = (unsigned long)(arg1);                         \
++      __asm__ volatile(                                           \
++         "ld $4, 8(%1)\n\t"   /* arg1*/                           \
++         "ld $25, 0(%1)\n\t"  /* target->t9 */                    \
++         VALGRIND_CALL_NOREDIR_T9                                 \
++         "move %0, $2\n"                                          \
++         : /*out*/   "=r" (_res)                                  \
++         : /*in*/    "r" (&_argvec[0])                            \
++         : /*trash*/ "memory", __CALLER_SAVED_REGS                \
++      );                                                          \
++      lval = (__typeof__(lval)) _res;                             \
++   } while (0)
++
++#define CALL_FN_W_WW(lval, orig, arg1,arg2)                       \
++   do {                                                           \
++      volatile OrigFn        _orig = (orig);                      \
++      volatile unsigned long _argvec[3];                          \
++      volatile unsigned long _res;                                \
++      _argvec[0] = (unsigned long)_orig.nraddr;                   \
++      _argvec[1] = (unsigned long)(arg1);                         \
++      _argvec[2] = (unsigned long)(arg2);                         \
++      __asm__ volatile(                                           \
++         "ld $4, 8(%1)\n\t"                                       \
++         "ld $5, 16(%1)\n\t"                                      \
++         "ld $25, 0(%1)\n\t"  /* target->t9 */                    \
++         VALGRIND_CALL_NOREDIR_T9                                 \
++         "move %0, $2\n"                                          \
++         : /*out*/   "=r" (_res)                                  \
++         : /*in*/    "r" (&_argvec[0])                            \
++         : /*trash*/ "memory", __CALLER_SAVED_REGS                \
++      );                                                          \
++      lval = (__typeof__(lval)) _res;                             \
++   } while (0)
++
++#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3)                 \
++   do {                                                           \
++      volatile OrigFn        _orig = (orig);                      \
++      volatile unsigned long _argvec[4];                          \
++      volatile unsigned long _res;                                \
++      _argvec[0] = (unsigned long)_orig.nraddr;                   \
++      _argvec[1] = (unsigned long)(arg1);                         \
++      _argvec[2] = (unsigned long)(arg2);                         \
++      _argvec[3] = (unsigned long)(arg3);                         \
++      __asm__ volatile(                                           \
++         "ld $4, 8(%1)\n\t"                                       \
++         "ld $5, 16(%1)\n\t"                                      \
++         "ld $6, 24(%1)\n\t"                                      \
++         "ld $25, 0(%1)\n\t"  /* target->t9 */                    \
++         VALGRIND_CALL_NOREDIR_T9                                 \
++         "move %0, $2\n"                                          \
++         : /*out*/   "=r" (_res)                                  \
++         : /*in*/    "r" (&_argvec[0])                            \
++         : /*trash*/ "memory", __CALLER_SAVED_REGS                \
++      );                                                          \
++      lval = (__typeof__(lval)) _res;                             \
++   } while (0)
++
++#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4)           \
++   do {                                                           \
++      volatile OrigFn        _orig = (orig);                      \
++      volatile unsigned long _argvec[5];                          \
++      volatile unsigned long _res;                                \
++      _argvec[0] = (unsigned long)_orig.nraddr;                   \
++      _argvec[1] = (unsigned long)(arg1);                         \
++      _argvec[2] = (unsigned long)(arg2);                         \
++      _argvec[3] = (unsigned long)(arg3);                         \
++      _argvec[4] = (unsigned long)(arg4);                         \
++      __asm__ volatile(                                           \
++         "ld $4, 8(%1)\n\t"                                       \
++         "ld $5, 16(%1)\n\t"                                      \
++         "ld $6, 24(%1)\n\t"                                      \
++         "ld $7, 32(%1)\n\t"                                      \
++         "ld $25, 0(%1)\n\t"  /* target->t9 */                    \
++         VALGRIND_CALL_NOREDIR_T9                                 \
++         "move %0, $2\n"                                          \
++         : /*out*/   "=r" (_res)                                  \
++         : /*in*/    "r" (&_argvec[0])                            \
++         : /*trash*/ "memory", __CALLER_SAVED_REGS                \
++      );                                                          \
++      lval = (__typeof__(lval)) _res;                             \
++   } while (0)
++
++#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5)        \
++   do {                                                           \
++      volatile OrigFn        _orig = (orig);                      \
++      volatile unsigned long _argvec[6];                          \
++      volatile unsigned long _res;                                \
++      _argvec[0] = (unsigned long)_orig.nraddr;                   \
++      _argvec[1] = (unsigned long)(arg1);                         \
++      _argvec[2] = (unsigned long)(arg2);                         \
++      _argvec[3] = (unsigned long)(arg3);                         \
++      _argvec[4] = (unsigned long)(arg4);                         \
++      _argvec[5] = (unsigned long)(arg5);                         \
++      __asm__ volatile(                                           \
++         "ld $4, 8(%1)\n\t"                                       \
++         "ld $5, 16(%1)\n\t"                                      \
++         "ld $6, 24(%1)\n\t"                                      \
++         "ld $7, 32(%1)\n\t"                                      \
++         "ld $8, 40(%1)\n\t"                                      \
++         "ld $25, 0(%1)\n\t"  /* target->t9 */                    \
++         VALGRIND_CALL_NOREDIR_T9                                 \
++         "move %0, $2\n"                                          \
++         : /*out*/   "=r" (_res)                                  \
++         : /*in*/    "r" (&_argvec[0])                            \
++         : /*trash*/ "memory", __CALLER_SAVED_REGS                \
++      );                                                          \
++      lval = (__typeof__(lval)) _res;                             \
++   } while (0)
++
++#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6)   \
++   do {                                                           \
++      volatile OrigFn        _orig = (orig);                      \
++      volatile unsigned long _argvec[7];                          \
++      volatile unsigned long _res;                                \
++      _argvec[0] = (unsigned long)_orig.nraddr;                   \
++      _argvec[1] = (unsigned long)(arg1);                         \
++      _argvec[2] = (unsigned long)(arg2);                         \
++      _argvec[3] = (unsigned long)(arg3);                         \
++      _argvec[4] = (unsigned long)(arg4);                         \
++      _argvec[5] = (unsigned long)(arg5);                         \
++      _argvec[6] = (unsigned long)(arg6);                         \
++      __asm__ volatile(                                           \
++         "ld $4, 8(%1)\n\t"                                       \
++         "ld $5, 16(%1)\n\t"                                      \
++         "ld $6, 24(%1)\n\t"                                      \
++         "ld $7, 32(%1)\n\t"                                      \
++         "ld $8, 40(%1)\n\t"                                      \
++         "ld $9, 48(%1)\n\t"                                      \
++         "ld $25, 0(%1)\n\t"  /* target->t9 */                    \
++         VALGRIND_CALL_NOREDIR_T9                                 \
++         "move %0, $2\n"                                          \
++         : /*out*/   "=r" (_res)                                  \
++         : /*in*/    "r" (&_argvec[0])                            \
++         : /*trash*/ "memory", __CALLER_SAVED_REGS                \
++      );                                                          \
++      lval = (__typeof__(lval)) _res;                             \
++   } while (0)
++
++#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
++                                 arg7)                            \
++   do {                                                           \
++      volatile OrigFn        _orig = (orig);                      \
++      volatile unsigned long _argvec[8];                          \
++      volatile unsigned long _res;                                \
++      _argvec[0] = (unsigned long)_orig.nraddr;                   \
++      _argvec[1] = (unsigned long)(arg1);                         \
++      _argvec[2] = (unsigned long)(arg2);                         \
++      _argvec[3] = (unsigned long)(arg3);                         \
++      _argvec[4] = (unsigned long)(arg4);                         \
++      _argvec[5] = (unsigned long)(arg5);                         \
++      _argvec[6] = (unsigned long)(arg6);                         \
++      _argvec[7] = (unsigned long)(arg7);                         \
++      __asm__ volatile(                                           \
++         "ld $4, 8(%1)\n\t"                                       \
++         "ld $5, 16(%1)\n\t"                                      \
++         "ld $6, 24(%1)\n\t"                                      \
++         "ld $7, 32(%1)\n\t"                                      \
++         "ld $8, 40(%1)\n\t"                                      \
++         "ld $9, 48(%1)\n\t"                                      \
++         "ld $10, 56(%1)\n\t"                                     \
++         "ld $25, 0(%1) \n\t"  /* target->t9 */                   \
++         VALGRIND_CALL_NOREDIR_T9                                 \
++         "move %0, $2\n"                                          \
++         : /*out*/   "=r" (_res)                                  \
++         : /*in*/    "r" (&_argvec[0])                            \
++         : /*trash*/ "memory", __CALLER_SAVED_REGS                \
++      );                                                          \
++      lval = (__typeof__(lval)) _res;                             \
++   } while (0)
++
++#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
++                                 arg7,arg8)                       \
++   do {                                                           \
++      volatile OrigFn        _orig = (orig);                      \
++      volatile unsigned long _argvec[9];                          \
++      volatile unsigned long _res;                                \
++      _argvec[0] = (unsigned long)_orig.nraddr;                   \
++      _argvec[1] = (unsigned long)(arg1);                         \
++      _argvec[2] = (unsigned long)(arg2);                         \
++      _argvec[3] = (unsigned long)(arg3);                         \
++      _argvec[4] = (unsigned long)(arg4);                         \
++      _argvec[5] = (unsigned long)(arg5);                         \
++      _argvec[6] = (unsigned long)(arg6);                         \
++      _argvec[7] = (unsigned long)(arg7);                         \
++      _argvec[8] = (unsigned long)(arg8);                         \
++      __asm__ volatile(                                           \
++         "ld $4, 8(%1)\n\t"                                       \
++         "ld $5, 16(%1)\n\t"                                      \
++         "ld $6, 24(%1)\n\t"                                      \
++         "ld $7, 32(%1)\n\t"                                      \
++         "ld $8, 40(%1)\n\t"                                      \
++         "ld $9, 48(%1)\n\t"                                      \
++         "ld $10, 56(%1)\n\t"                                     \
++         "ld $11, 64(%1)\n\t"                                     \
++         "ld $25, 0(%1) \n\t"  /* target->t9 */                   \
++         VALGRIND_CALL_NOREDIR_T9                                 \
++         "move %0, $2\n"                                          \
++         : /*out*/   "=r" (_res)                                  \
++         : /*in*/    "r" (&_argvec[0])                            \
++         : /*trash*/ "memory", __CALLER_SAVED_REGS                \
++      );                                                          \
++      lval = (__typeof__(lval)) _res;                             \
++   } while (0)
++
++#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
++                                 arg7,arg8,arg9)                  \
++   do {                                                           \
++      volatile OrigFn        _orig = (orig);                      \
++      volatile unsigned long _argvec[10];                         \
++      volatile unsigned long _res;                                \
++      _argvec[0] = (unsigned long)_orig.nraddr;                   \
++      _argvec[1] = (unsigned long)(arg1);                         \
++      _argvec[2] = (unsigned long)(arg2);                         \
++      _argvec[3] = (unsigned long)(arg3);                         \
++      _argvec[4] = (unsigned long)(arg4);                         \
++      _argvec[5] = (unsigned long)(arg5);                         \
++      _argvec[6] = (unsigned long)(arg6);                         \
++      _argvec[7] = (unsigned long)(arg7);                         \
++      _argvec[8] = (unsigned long)(arg8);                         \
++      _argvec[9] = (unsigned long)(arg9);                         \
++      __asm__ volatile(                                           \
++         "dsubu $29, $29, 8\n\t"                                  \
++         "ld $4, 72(%1)\n\t"                                      \
++         "sd $4, 0($29)\n\t"                                      \
++         "ld $4, 8(%1)\n\t"                                       \
++         "ld $5, 16(%1)\n\t"                                      \
++         "ld $6, 24(%1)\n\t"                                      \
++         "ld $7, 32(%1)\n\t"                                      \
++         "ld $8, 40(%1)\n\t"                                      \
++         "ld $9, 48(%1)\n\t"                                      \
++         "ld $10, 56(%1)\n\t"                                     \
++         "ld $11, 64(%1)\n\t"                                     \
++         "ld $25, 0(%1)\n\t"  /* target->t9 */                    \
++         VALGRIND_CALL_NOREDIR_T9                                 \
++         "daddu $29, $29, 8\n\t"                                  \
++         "move %0, $2\n"                                          \
++         : /*out*/   "=r" (_res)                                  \
++         : /*in*/    "r" (&_argvec[0])                            \
++         : /*trash*/ "memory", __CALLER_SAVED_REGS                \
++      );                                                          \
++      lval = (__typeof__(lval)) _res;                             \
+    } while (0)
+ 
+-#define CALL_FN_W_12W(lval, orig, arg1, arg2, arg3, arg4, arg5,  \
+-                     arg6, arg7 ,arg8, arg9, arg10, arg11, arg12)\
+-   do {                                                          \
+-      volatile OrigFn        _orig = (orig);                     \
+-      volatile unsigned long _argvec[13];                        \
+-      volatile unsigned long _res;                               \
+-      _argvec[0] = (unsigned long)_orig.nraddr;                  \
+-      _argvec[1] = (unsigned long)arg1;                          \
+-      _argvec[2] = (unsigned long)arg2;                          \
+-      _argvec[3] = (unsigned long)arg3;                          \
+-      _argvec[4] = (unsigned long)arg4;                          \
+-      _argvec[5] = (unsigned long)arg5;                          \
+-      _argvec[6] = (unsigned long)arg6;                          \
+-      _argvec[7] = (unsigned long)arg7;                          \
+-      _argvec[8] = (unsigned long)arg8;                          \
+-      _argvec[9] = (unsigned long)arg9;                          \
+-      _argvec[10] = (unsigned long)arg10;                        \
+-      _argvec[11] = (unsigned long)arg11;                        \
+-      _argvec[12] = (unsigned long)arg12;                        \
+-      __asm__ volatile(                                          \
+-         VALGRIND_CFI_PROLOGUE                                   \
+-         "aghi 15,-216\n\t"                                      \
+-         "lg 2, 8(1)\n\t"                                        \
+-         "lg 3,16(1)\n\t"                                        \
+-         "lg 4,24(1)\n\t"                                        \
+-         "lg 5,32(1)\n\t"                                        \
+-         "lg 6,40(1)\n\t"                                        \
+-         "mvc 160(8,15), 48(1)\n\t"                              \
+-         "mvc 168(8,15), 56(1)\n\t"                              \
+-         "mvc 176(8,15), 64(1)\n\t"                              \
+-         "mvc 184(8,15), 72(1)\n\t"                              \
+-         "mvc 192(8,15), 80(1)\n\t"                              \
+-         "mvc 200(8,15), 88(1)\n\t"                              \
+-         "mvc 208(8,15), 96(1)\n\t"                              \
+-         "lg 1, 0(1)\n\t"                                        \
+-         VALGRIND_CALL_NOREDIR_R1                                \
+-         "lgr %0, 2\n\t"                                         \
+-         "aghi 15,216\n\t"                                       \
+-         VALGRIND_CFI_EPILOGUE                                   \
+-         : /*out*/   "=d" (_res)                                 \
+-         : /*in*/    "a" (&_argvec[0]) __FRAME_POINTER           \
+-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS,"6","7" \
+-      );                                                         \
+-      lval = (__typeof__(lval)) _res;                            \
++#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
++                                  arg7,arg8,arg9,arg10)           \
++   do {                                                           \
++      volatile OrigFn        _orig = (orig);                      \
++      volatile unsigned long _argvec[11];                         \
++      volatile unsigned long _res;                                \
++      _argvec[0] = (unsigned long)_orig.nraddr;                   \
++      _argvec[1] = (unsigned long)(arg1);                         \
++      _argvec[2] = (unsigned long)(arg2);                         \
++      _argvec[3] = (unsigned long)(arg3);                         \
++      _argvec[4] = (unsigned long)(arg4);                         \
++      _argvec[5] = (unsigned long)(arg5);                         \
++      _argvec[6] = (unsigned long)(arg6);                         \
++      _argvec[7] = (unsigned long)(arg7);                         \
++      _argvec[8] = (unsigned long)(arg8);                         \
++      _argvec[9] = (unsigned long)(arg9);                         \
++      _argvec[10] = (unsigned long)(arg10);                       \
++      __asm__ volatile(                                           \
++         "dsubu $29, $29, 16\n\t"                                 \
++         "ld $4, 72(%1)\n\t"                                      \
++         "sd $4, 0($29)\n\t"                                      \
++         "ld $4, 80(%1)\n\t"                                      \
++         "sd $4, 8($29)\n\t"                                      \
++         "ld $4, 8(%1)\n\t"                                       \
++         "ld $5, 16(%1)\n\t"                                      \
++         "ld $6, 24(%1)\n\t"                                      \
++         "ld $7, 32(%1)\n\t"                                      \
++         "ld $8, 40(%1)\n\t"                                      \
++         "ld $9, 48(%1)\n\t"                                      \
++         "ld $10, 56(%1)\n\t"                                     \
++         "ld $11, 64(%1)\n\t"                                     \
++         "ld $25, 0(%1)\n\t"  /* target->t9 */                    \
++         VALGRIND_CALL_NOREDIR_T9                                 \
++         "daddu $29, $29, 16\n\t"                                 \
++         "move %0, $2\n"                                          \
++         : /*out*/   "=r" (_res)                                  \
++         : /*in*/    "r" (&_argvec[0])                            \
++         : /*trash*/ "memory", __CALLER_SAVED_REGS                \
++      );                                                          \
++      lval = (__typeof__(lval)) _res;                             \
+    } while (0)
+ 
++#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,       \
++                                  arg6,arg7,arg8,arg9,arg10,      \
++                                  arg11)                          \
++   do {                                                           \
++      volatile OrigFn        _orig = (orig);                      \
++      volatile unsigned long _argvec[12];                         \
++      volatile unsigned long _res;                                \
++      _argvec[0] = (unsigned long)_orig.nraddr;                   \
++      _argvec[1] = (unsigned long)(arg1);                         \
++      _argvec[2] = (unsigned long)(arg2);                         \
++      _argvec[3] = (unsigned long)(arg3);                         \
++      _argvec[4] = (unsigned long)(arg4);                         \
++      _argvec[5] = (unsigned long)(arg5);                         \
++      _argvec[6] = (unsigned long)(arg6);                         \
++      _argvec[7] = (unsigned long)(arg7);                         \
++      _argvec[8] = (unsigned long)(arg8);                         \
++      _argvec[9] = (unsigned long)(arg9);                         \
++      _argvec[10] = (unsigned long)(arg10);                       \
++      _argvec[11] = (unsigned long)(arg11);                       \
++      __asm__ volatile(                                           \
++         "dsubu $29, $29, 24\n\t"                                 \
++         "ld $4, 72(%1)\n\t"                                      \
++         "sd $4, 0($29)\n\t"                                      \
++         "ld $4, 80(%1)\n\t"                                      \
++         "sd $4, 8($29)\n\t"                                      \
++         "ld $4, 88(%1)\n\t"                                      \
++         "sd $4, 16($29)\n\t"                                     \
++         "ld $4, 8(%1)\n\t"                                       \
++         "ld $5, 16(%1)\n\t"                                      \
++         "ld $6, 24(%1)\n\t"                                      \
++         "ld $7, 32(%1)\n\t"                                      \
++         "ld $8, 40(%1)\n\t"                                      \
++         "ld $9, 48(%1)\n\t"                                      \
++         "ld $10, 56(%1)\n\t"                                     \
++         "ld $11, 64(%1)\n\t"                                     \
++         "ld $25, 0(%1)\n\t"  /* target->t9 */                    \
++         VALGRIND_CALL_NOREDIR_T9                                 \
++         "daddu $29, $29, 24\n\t"                                 \
++         "move %0, $2\n"                                          \
++         : /*out*/   "=r" (_res)                                  \
++         : /*in*/    "r" (&_argvec[0])                            \
++         : /*trash*/ "memory", __CALLER_SAVED_REGS                \
++      );                                                          \
++      lval = (__typeof__(lval)) _res;                             \
++   } while (0)
+ 
+-#endif /* PLAT_s390x_linux */
++#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,       \
++                                  arg6,arg7,arg8,arg9,arg10,      \
++                                  arg11,arg12)                    \
++   do {                                                           \
++      volatile OrigFn        _orig = (orig);                      \
++      volatile unsigned long _argvec[13];                         \
++      volatile unsigned long _res;                                \
++      _argvec[0] = (unsigned long)_orig.nraddr;                   \
++      _argvec[1] = (unsigned long)(arg1);                         \
++      _argvec[2] = (unsigned long)(arg2);                         \
++      _argvec[3] = (unsigned long)(arg3);                         \
++      _argvec[4] = (unsigned long)(arg4);                         \
++      _argvec[5] = (unsigned long)(arg5);                         \
++      _argvec[6] = (unsigned long)(arg6);                         \
++      _argvec[7] = (unsigned long)(arg7);                         \
++      _argvec[8] = (unsigned long)(arg8);                         \
++      _argvec[9] = (unsigned long)(arg9);                         \
++      _argvec[10] = (unsigned long)(arg10);                       \
++      _argvec[11] = (unsigned long)(arg11);                       \
++      _argvec[12] = (unsigned long)(arg12);                       \
++      __asm__ volatile(                                           \
++         "dsubu $29, $29, 32\n\t"                                 \
++         "ld $4, 72(%1)\n\t"                                      \
++         "sd $4, 0($29)\n\t"                                      \
++         "ld $4, 80(%1)\n\t"                                      \
++         "sd $4, 8($29)\n\t"                                      \
++         "ld $4, 88(%1)\n\t"                                      \
++         "sd $4, 16($29)\n\t"                                     \
++         "ld $4, 96(%1)\n\t"                                      \
++         "sd $4, 24($29)\n\t"                                     \
++         "ld $4, 8(%1)\n\t"                                       \
++         "ld $5, 16(%1)\n\t"                                      \
++         "ld $6, 24(%1)\n\t"                                      \
++         "ld $7, 32(%1)\n\t"                                      \
++         "ld $8, 40(%1)\n\t"                                      \
++         "ld $9, 48(%1)\n\t"                                      \
++         "ld $10, 56(%1)\n\t"                                     \
++         "ld $11, 64(%1)\n\t"                                     \
++         "ld $25, 0(%1)\n\t"  /* target->t9 */                    \
++         VALGRIND_CALL_NOREDIR_T9                                 \
++         "daddu $29, $29, 32\n\t"                                 \
++         "move %0, $2\n"                                          \
++         : /*out*/   "=r" (_res)                                  \
++         : /*in*/    "r" (&_argvec[0])                            \
++         : /*trash*/ "memory", __CALLER_SAVED_REGS                \
++      );                                                          \
++      lval = (__typeof__(lval)) _res;                             \
++   } while (0)
++
++#endif /* PLAT_mips64_linux */
+ 
+ 
+ /* ------------------------------------------------------------------ */
+@@ -4843,9 +6127,14 @@
+              errors. */
+           VG_USERREQ__COUNT_ERRORS = 0x1201,
+ 
++          /* Allows the client program and/or gdbserver to execute a monitor
++             command. */
++          VG_USERREQ__GDB_MONITOR_COMMAND = 0x1202,
++
+           /* These are useful and can be interpreted by any tool that
+              tracks malloc() et al, by using vg_replace_malloc.c. */
+           VG_USERREQ__MALLOCLIKE_BLOCK = 0x1301,
++          VG_USERREQ__RESIZEINPLACE_BLOCK = 0x130b,
+           VG_USERREQ__FREELIKE_BLOCK   = 0x1302,
+           /* Memory pool support. */
+           VG_USERREQ__CREATE_MEMPOOL   = 0x1303,
+@@ -4879,7 +6168,17 @@
+           VG_USERREQ__LOAD_PDB_DEBUGINFO = 0x1601,
+ 
+           /* Querying of debug info. */
+-          VG_USERREQ__MAP_IP_TO_SRCLOC = 0x1701
++          VG_USERREQ__MAP_IP_TO_SRCLOC = 0x1701,
++
++          /* Disable/enable error reporting level.  Takes a single
++             Word arg which is the delta to this thread's error
++             disablement indicator.  Hence 1 disables or further
++             disables errors, and -1 moves back towards enablement.
++             Other values are not allowed. */
++          VG_USERREQ__CHANGE_ERR_DISABLEMENT = 0x1801,
++
++          /* Initialise IR injection */
++          VG_USERREQ__VEX_INIT_FOR_IRI = 0x1901
+    } Vg_ClientRequest;
+ 
+ #if !defined(__GNUC__)
+@@ -4887,64 +6186,12 @@
+ #endif
+ 
+ 
+-/*
+- * VALGRIND_DO_CLIENT_REQUEST_EXPR(): a C expression that invokes a Valgrind
+- * client request and whose value equals the client request result.
+- */
+-
+-#if defined(NVALGRIND)
+-
+-#define VALGRIND_DO_CLIENT_REQUEST_EXPR(                               \
+-        _zzq_default, _zzq_request,                                    \
+-        _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5)         \
+-   (_zzq_default)
+-
+-#else /*defined(NVALGRIND)*/
+-
+-#if defined(_MSC_VER)
+-
+-#define VALGRIND_DO_CLIENT_REQUEST_EXPR(                                \
+-        _zzq_default, _zzq_request,                                     \
+-        _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5)          \
+-   (vg_VALGRIND_DO_CLIENT_REQUEST_EXPR((uintptr_t)(_zzq_default),       \
+-        (_zzq_request), (uintptr_t)(_zzq_arg1), (uintptr_t)(_zzq_arg2), \
+-        (uintptr_t)(_zzq_arg3), (uintptr_t)(_zzq_arg4),                 \
+-        (uintptr_t)(_zzq_arg5)))
+-
+-static __inline unsigned
+-vg_VALGRIND_DO_CLIENT_REQUEST_EXPR(uintptr_t _zzq_default,
+-                                   unsigned _zzq_request, uintptr_t _zzq_arg1,
+-                                   uintptr_t _zzq_arg2, uintptr_t _zzq_arg3,
+-                                   uintptr_t _zzq_arg4, uintptr_t _zzq_arg5)
+-{
+-    unsigned _zzq_rlval;
+-    VALGRIND_DO_CLIENT_REQUEST(_zzq_rlval, _zzq_default, _zzq_request,
+-                      _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5);
+-    return _zzq_rlval;
+-}
+-
+-#else /*defined(_MSC_VER)*/
+-
+-#define VALGRIND_DO_CLIENT_REQUEST_EXPR(                               \
+-        _zzq_default, _zzq_request,                                    \
+-        _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5)         \
+-   (__extension__({unsigned int _zzq_rlval;                            \
+-    VALGRIND_DO_CLIENT_REQUEST(_zzq_rlval, _zzq_default, _zzq_request, \
+-                _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \
+-    _zzq_rlval;                                                        \
+-   }))
+-
+-#endif /*defined(_MSC_VER)*/
+-
+-#endif /*defined(NVALGRIND)*/
+-
+-
+ /* Returns the number of Valgrinds this code is running under.  That
+    is, 0 if running natively, 1 if running under Valgrind, 2 if
+    running under Valgrind which is running under another Valgrind,
+    etc. */
+ #define RUNNING_ON_VALGRIND                                           \
+-    VALGRIND_DO_CLIENT_REQUEST_EXPR(0 /* if not */,                   \
++    (unsigned)VALGRIND_DO_CLIENT_REQUEST_EXPR(0 /* if not */,         \
+                                     VG_USERREQ__RUNNING_ON_VALGRIND,  \
+                                     0, 0, 0, 0, 0)                    \
+ 
+@@ -4953,12 +6200,9 @@
+    _qzz_len - 1].  Useful if you are debugging a JITter or some such,
+    since it provides a way to make sure valgrind will retranslate the
+    invalidated area.  Returns no value. */
+-#define VALGRIND_DISCARD_TRANSLATIONS(_qzz_addr,_qzz_len)         \
+-   {unsigned int _qzz_res VG_UNUSED;                                        \
+-    VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,                       \
+-                               VG_USERREQ__DISCARD_TRANSLATIONS,  \
+-                               _qzz_addr, _qzz_len, 0, 0, 0);     \
+-   }
++#define VALGRIND_DISCARD_TRANSLATIONS(_qzz_addr,_qzz_len)              \
++    VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__DISCARD_TRANSLATIONS,  \
++                                    _qzz_addr, _qzz_len, 0, 0, 0)
+ 
+ 
+ /* These requests are for getting Valgrind itself to print something.
+@@ -4966,14 +6210,7 @@
+    is the number of characters printed, excluding the "**<pid>** " part at the
+    start and the backtrace (if present). */
+ 
+-#if defined(NVALGRIND)
+-
+-#  define VALGRIND_PRINTF(...)
+-#  define VALGRIND_PRINTF_BACKTRACE(...)
+-
+-#else /* NVALGRIND */
+-
+-#if !defined(_MSC_VER)
++#if defined(__GNUC__) || defined(__INTEL_COMPILER) && !defined(_MSC_VER)
+ /* Modern GCC will optimize the static routine out if unused,
+    and unused attribute will shut down warnings about it.  */
+ static int VALGRIND_PRINTF(const char *format, ...)
+@@ -4985,17 +6222,24 @@
+ #endif
+ VALGRIND_PRINTF(const char *format, ...)
+ {
++#if defined(NVALGRIND)
++   return 0;
++#else /* NVALGRIND */
++#if defined(_MSC_VER) || defined(__MINGW64__)
++   uintptr_t _qzz_res;
++#else
+    unsigned long _qzz_res;
++#endif
+    va_list vargs;
+    va_start(vargs, format);
+-#if defined(_MSC_VER)
+-   VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,
++#if defined(_MSC_VER) || defined(__MINGW64__)
++   _qzz_res = VALGRIND_DO_CLIENT_REQUEST_EXPR(0,
+                               VG_USERREQ__PRINTF_VALIST_BY_REF,
+                               (uintptr_t)format,
+                               (uintptr_t)&vargs,
+                               0, 0, 0);
+ #else
+-   VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,
++   _qzz_res = VALGRIND_DO_CLIENT_REQUEST_EXPR(0,
+                               VG_USERREQ__PRINTF_VALIST_BY_REF,
+                               (unsigned long)format,
+                               (unsigned long)&vargs, 
+@@ -5003,9 +6247,10 @@
+ #endif
+    va_end(vargs);
+    return (int)_qzz_res;
++#endif /* NVALGRIND */
+ }
+ 
+-#if !defined(_MSC_VER)
++#if defined(__GNUC__) || defined(__INTEL_COMPILER) && !defined(_MSC_VER)
+ static int VALGRIND_PRINTF_BACKTRACE(const char *format, ...)
+    __attribute__((format(__printf__, 1, 2), __unused__));
+ #endif
+@@ -5015,17 +6260,24 @@
+ #endif
+ VALGRIND_PRINTF_BACKTRACE(const char *format, ...)
+ {
++#if defined(NVALGRIND)
++   return 0;
++#else /* NVALGRIND */
++#if defined(_MSC_VER) || defined(__MINGW64__)
++   uintptr_t _qzz_res;
++#else
+    unsigned long _qzz_res;
++#endif
+    va_list vargs;
+    va_start(vargs, format);
+-#if defined(_MSC_VER)
+-   VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,
++#if defined(_MSC_VER) || defined(__MINGW64__)
++   _qzz_res = VALGRIND_DO_CLIENT_REQUEST_EXPR(0,
+                               VG_USERREQ__PRINTF_BACKTRACE_VALIST_BY_REF,
+                               (uintptr_t)format,
+                               (uintptr_t)&vargs,
+                               0, 0, 0);
+ #else
+-   VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,
++   _qzz_res = VALGRIND_DO_CLIENT_REQUEST_EXPR(0,
+                               VG_USERREQ__PRINTF_BACKTRACE_VALIST_BY_REF,
+                               (unsigned long)format,
+                               (unsigned long)&vargs, 
+@@ -5033,9 +6285,8 @@
+ #endif
+    va_end(vargs);
+    return (int)_qzz_res;
+-}
+-
+ #endif /* NVALGRIND */
++}
+ 
+ 
+ /* These requests allow control to move from the simulated CPU to the
+@@ -5062,58 +6313,39 @@
+    with a lot in the past.
+ */
+ #define VALGRIND_NON_SIMD_CALL0(_qyy_fn)                          \
+-   __extension__                                                  \
+-   ({unsigned long _qyy_res;                                      \
+-    VALGRIND_DO_CLIENT_REQUEST(_qyy_res, 0 /* default return */,  \
+-                               VG_USERREQ__CLIENT_CALL0,          \
+-                               _qyy_fn,                           \
+-                               0, 0, 0, 0);                       \
+-    _qyy_res;                                                     \
+-   })
+-
+-#define VALGRIND_NON_SIMD_CALL1(_qyy_fn, _qyy_arg1)               \
+-   __extension__                                                  \
+-   ({unsigned long _qyy_res;                                      \
+-    VALGRIND_DO_CLIENT_REQUEST(_qyy_res, 0 /* default return */,  \
+-                               VG_USERREQ__CLIENT_CALL1,          \
+-                               _qyy_fn,                           \
+-                               _qyy_arg1, 0, 0, 0);               \
+-    _qyy_res;                                                     \
+-   })
+-
+-#define VALGRIND_NON_SIMD_CALL2(_qyy_fn, _qyy_arg1, _qyy_arg2)    \
+-   __extension__                                                  \
+-   ({unsigned long _qyy_res;                                      \
+-    VALGRIND_DO_CLIENT_REQUEST(_qyy_res, 0 /* default return */,  \
+-                               VG_USERREQ__CLIENT_CALL2,          \
+-                               _qyy_fn,                           \
+-                               _qyy_arg1, _qyy_arg2, 0, 0);       \
+-    _qyy_res;                                                     \
+-   })
++    VALGRIND_DO_CLIENT_REQUEST_EXPR(0 /* default return */,       \
++                                    VG_USERREQ__CLIENT_CALL0,     \
++                                    _qyy_fn,                      \
++                                    0, 0, 0, 0)
++
++#define VALGRIND_NON_SIMD_CALL1(_qyy_fn, _qyy_arg1)                    \
++    VALGRIND_DO_CLIENT_REQUEST_EXPR(0 /* default return */,            \
++                                    VG_USERREQ__CLIENT_CALL1,          \
++                                    _qyy_fn,                           \
++                                    _qyy_arg1, 0, 0, 0)
++
++#define VALGRIND_NON_SIMD_CALL2(_qyy_fn, _qyy_arg1, _qyy_arg2)         \
++    VALGRIND_DO_CLIENT_REQUEST_EXPR(0 /* default return */,            \
++                                    VG_USERREQ__CLIENT_CALL2,          \
++                                    _qyy_fn,                           \
++                                    _qyy_arg1, _qyy_arg2, 0, 0)
+ 
+ #define VALGRIND_NON_SIMD_CALL3(_qyy_fn, _qyy_arg1, _qyy_arg2, _qyy_arg3) \
+-   __extension__                                                  \
+-   ({unsigned long _qyy_res;                                      \
+-    VALGRIND_DO_CLIENT_REQUEST(_qyy_res, 0 /* default return */,  \
+-                               VG_USERREQ__CLIENT_CALL3,          \
+-                               _qyy_fn,                           \
+-                               _qyy_arg1, _qyy_arg2,              \
+-                               _qyy_arg3, 0);                     \
+-    _qyy_res;                                                     \
+-   })
++    VALGRIND_DO_CLIENT_REQUEST_EXPR(0 /* default return */,             \
++                                    VG_USERREQ__CLIENT_CALL3,           \
++                                    _qyy_fn,                            \
++                                    _qyy_arg1, _qyy_arg2,               \
++                                    _qyy_arg3, 0)
+ 
+ 
+ /* Counts the number of errors that have been recorded by a tool.  Nb:
+    the tool must record the errors with VG_(maybe_record_error)() or
+    VG_(unique_error)() for them to be counted. */
+ #define VALGRIND_COUNT_ERRORS                                     \
+-   __extension__                                                  \
+-   ({unsigned int _qyy_res;                                       \
+-    VALGRIND_DO_CLIENT_REQUEST(_qyy_res, 0 /* default return */,  \
++    (unsigned)VALGRIND_DO_CLIENT_REQUEST_EXPR(                    \
++                               0 /* default return */,            \
+                                VG_USERREQ__COUNT_ERRORS,          \
+-                               0, 0, 0, 0, 0);                    \
+-    _qyy_res;                                                     \
+-   })
++                               0, 0, 0, 0, 0)
+ 
+ /* Several Valgrind tools (Memcheck, Massif, Helgrind, DRD) rely on knowing
+    when heap blocks are allocated in order to give accurate results.  This
+@@ -5179,7 +6411,24 @@
+    VALGRIND_FREELIKE_BLOCK should be put immediately after the point where a
+    heap block is deallocated.
+ 
+-   In many cases, these two client requests will not be enough to get your
++   VALGRIND_RESIZEINPLACE_BLOCK informs a tool about reallocation. For
++   Memcheck, it does four things:
++
++   - It records that the size of a block has been changed.  This assumes that
++     the block was annotated as having been allocated via
++     VALGRIND_MALLOCLIKE_BLOCK.  Otherwise, an error will be issued.
++
++   - If the block shrunk, it marks the freed memory as being unaddressable.
++
++   - If the block grew, it marks the new area as undefined and defines a red
++     zone past the end of the new block.
++
++   - The V-bits of the overlap between the old and the new block are preserved.
++
++   VALGRIND_RESIZEINPLACE_BLOCK should be put after allocation of the new block
++   and before deallocation of the old block.
++
++   In many cases, these three client requests will not be enough to get your
+    allocator working well with Memcheck.  More specifically, if your allocator
+    writes to freed blocks in any way then a VALGRIND_MAKE_MEM_UNDEFINED call
+    will be necessary to mark the memory as addressable just before the zeroing
+@@ -5197,148 +6446,142 @@
+    understand the distinction between the allocator and the rest of the
+    program.
+ 
+-   Note: there is currently no VALGRIND_REALLOCLIKE_BLOCK client request;  it
+-   has to be emulated with MALLOCLIKE/FREELIKE and memory copying.
+-   
+    Ignored if addr == 0.
+ */
+-#define VALGRIND_MALLOCLIKE_BLOCK(addr, sizeB, rzB, is_zeroed)    \
+-   {unsigned int _qzz_res VG_UNUSED;                                        \
+-    VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,                       \
+-                               VG_USERREQ__MALLOCLIKE_BLOCK,      \
+-                               addr, sizeB, rzB, is_zeroed, 0);   \
+-   }
++#define VALGRIND_MALLOCLIKE_BLOCK(addr, sizeB, rzB, is_zeroed)          \
++    VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__MALLOCLIKE_BLOCK,       \
++                                    addr, sizeB, rzB, is_zeroed, 0)
+ 
+ /* See the comment for VALGRIND_MALLOCLIKE_BLOCK for details.
+    Ignored if addr == 0.
+ */
+-#define VALGRIND_FREELIKE_BLOCK(addr, rzB)                        \
+-   {unsigned int _qzz_res VG_UNUSED;                                        \
+-    VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,                       \
+-                               VG_USERREQ__FREELIKE_BLOCK,        \
+-                               addr, rzB, 0, 0, 0);               \
+-   }
++#define VALGRIND_RESIZEINPLACE_BLOCK(addr, oldSizeB, newSizeB, rzB)     \
++    VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__RESIZEINPLACE_BLOCK,    \
++                                    addr, oldSizeB, newSizeB, rzB, 0)
++
++/* See the comment for VALGRIND_MALLOCLIKE_BLOCK for details.
++   Ignored if addr == 0.
++*/
++#define VALGRIND_FREELIKE_BLOCK(addr, rzB)                              \
++    VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__FREELIKE_BLOCK,         \
++                                    addr, rzB, 0, 0, 0)
+ 
+ /* Create a memory pool. */
+ #define VALGRIND_CREATE_MEMPOOL(pool, rzB, is_zeroed)             \
+-   {unsigned int _qzz_res VG_UNUSED;                              \
+-    VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,                       \
+-                               VG_USERREQ__CREATE_MEMPOOL,        \
+-                               pool, rzB, is_zeroed, 0, 0);       \
+-   }
++    VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__CREATE_MEMPOOL,   \
++                                    pool, rzB, is_zeroed, 0, 0)
+ 
+ /* Destroy a memory pool. */
+ #define VALGRIND_DESTROY_MEMPOOL(pool)                            \
+-   {unsigned int _qzz_res VG_UNUSED;                                        \
+-    VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,                       \
+-                               VG_USERREQ__DESTROY_MEMPOOL,       \
+-                               pool, 0, 0, 0, 0);                 \
+-   }
++    VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__DESTROY_MEMPOOL,  \
++                                    pool, 0, 0, 0, 0)
+ 
+ /* Associate a piece of memory with a memory pool. */
+ #define VALGRIND_MEMPOOL_ALLOC(pool, addr, size)                  \
+-   {unsigned int _qzz_res VG_UNUSED;                              \
+-    VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,                       \
+-                               VG_USERREQ__MEMPOOL_ALLOC,         \
+-                               pool, addr, size, 0, 0);           \
+-   }
++    VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__MEMPOOL_ALLOC,    \
++                                    pool, addr, size, 0, 0)
+ 
+ /* Disassociate a piece of memory from a memory pool. */
+ #define VALGRIND_MEMPOOL_FREE(pool, addr)                         \
+-   {unsigned int _qzz_res VG_UNUSED;                              \
+-    VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,                       \
+-                               VG_USERREQ__MEMPOOL_FREE,          \
+-                               pool, addr, 0, 0, 0);              \
+-   }
++    VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__MEMPOOL_FREE,     \
++                                    pool, addr, 0, 0, 0)
+ 
+ /* Disassociate any pieces outside a particular range. */
+ #define VALGRIND_MEMPOOL_TRIM(pool, addr, size)                   \
+-   {unsigned int _qzz_res VG_UNUSED;                              \
+-    VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,                       \
+-                               VG_USERREQ__MEMPOOL_TRIM,          \
+-                               pool, addr, size, 0, 0);           \
+-   }
++    VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__MEMPOOL_TRIM,     \
++                                    pool, addr, size, 0, 0)
+ 
+ /* Resize and/or move a piece associated with a memory pool. */
+ #define VALGRIND_MOVE_MEMPOOL(poolA, poolB)                       \
+-   {unsigned int _qzz_res VG_UNUSED;                              \
+-    VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,                       \
+-                               VG_USERREQ__MOVE_MEMPOOL,          \
+-                               poolA, poolB, 0, 0, 0);            \
+-   }
++    VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__MOVE_MEMPOOL,     \
++                                    poolA, poolB, 0, 0, 0)
+ 
+ /* Resize and/or move a piece associated with a memory pool. */
+ #define VALGRIND_MEMPOOL_CHANGE(pool, addrA, addrB, size)         \
+-   {unsigned int _qzz_res VG_UNUSED;                              \
+-    VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,                       \
+-                               VG_USERREQ__MEMPOOL_CHANGE,        \
+-                               pool, addrA, addrB, size, 0);      \
+-   }
++    VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__MEMPOOL_CHANGE,   \
++                                    pool, addrA, addrB, size, 0)
+ 
+ /* Return 1 if a mempool exists, else 0. */
+ #define VALGRIND_MEMPOOL_EXISTS(pool)                             \
+-   __extension__                                                  \
+-   ({unsigned int _qzz_res;                                       \
+-    VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,                       \
++    (unsigned)VALGRIND_DO_CLIENT_REQUEST_EXPR(0,                  \
+                                VG_USERREQ__MEMPOOL_EXISTS,        \
+-                               pool, 0, 0, 0, 0);                 \
+-    _qzz_res;                                                     \
+-   })
++                               pool, 0, 0, 0, 0)
+ 
+-/* Mark a piece of memory as being a stack. Returns a stack id. */
++/* Mark a piece of memory as being a stack. Returns a stack id.
++   start is the lowest addressable stack byte, end is the highest
++   addressable stack byte. */
+ #define VALGRIND_STACK_REGISTER(start, end)                       \
+-   __extension__                                                  \
+-   ({unsigned int _qzz_res;                                       \
+-    VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,                       \
++    (unsigned)VALGRIND_DO_CLIENT_REQUEST_EXPR(0,                  \
+                                VG_USERREQ__STACK_REGISTER,        \
+-                               start, end, 0, 0, 0);              \
+-    _qzz_res;                                                     \
+-   })
++                               start, end, 0, 0, 0)
+ 
+ /* Unmark the piece of memory associated with a stack id as being a
+    stack. */
+ #define VALGRIND_STACK_DEREGISTER(id)                             \
+-   {unsigned int _qzz_res VG_UNUSED;                              \
+-    VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,                       \
+-                               VG_USERREQ__STACK_DEREGISTER,      \
+-                               id, 0, 0, 0, 0);                   \
+-   }
++    VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__STACK_DEREGISTER, \
++                                    id, 0, 0, 0, 0)
+ 
+-/* Change the start and end address of the stack id. */
++/* Change the start and end address of the stack id.
++   start is the new lowest addressable stack byte, end is the new highest
++   addressable stack byte. */
+ #define VALGRIND_STACK_CHANGE(id, start, end)                     \
+-   {unsigned int _qzz_res VG_UNUSED;                              \
+-    VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,                       \
+-                               VG_USERREQ__STACK_CHANGE,          \
+-                               id, start, end, 0, 0);             \
+-   }
++    VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__STACK_CHANGE,     \
++                                    id, start, end, 0, 0)
+ 
+ /* Load PDB debug info for Wine PE image_map. */
+-#define VALGRIND_LOAD_PDB_DEBUGINFO(fd, ptr, total_size, delta)   \
+-   {unsigned int _qzz_res VG_UNUSED;                              \
+-    VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,                       \
+-                               VG_USERREQ__LOAD_PDB_DEBUGINFO,    \
+-                               fd, ptr, total_size, delta, 0);    \
+-   }
++#define VALGRIND_LOAD_PDB_DEBUGINFO(fd, ptr, total_size, delta)     \
++    VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__LOAD_PDB_DEBUGINFO, \
++                                    fd, ptr, total_size, delta, 0)
+ 
+ /* Map a code address to a source file name and line number.  buf64
+    must point to a 64-byte buffer in the caller's address space.  The
+    result will be dumped in there and is guaranteed to be zero
+    terminated.  If no info is found, the first byte is set to zero. */
+ #define VALGRIND_MAP_IP_TO_SRCLOC(addr, buf64)                    \
+-   {unsigned int _qzz_res VG_UNUSED;                              \
+-    VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,                       \
++    (unsigned)VALGRIND_DO_CLIENT_REQUEST_EXPR(0,                  \
+                                VG_USERREQ__MAP_IP_TO_SRCLOC,      \
+-                               addr, buf64, 0, 0, 0);             \
+-   }
++                               addr, buf64, 0, 0, 0)
+ 
++/* Disable error reporting for this thread.  Behaves in a stack like
++   way, so you can safely call this multiple times provided that
++   VALGRIND_ENABLE_ERROR_REPORTING is called the same number of times
++   to re-enable reporting.  The first call of this macro disables
++   reporting.  Subsequent calls have no effect except to increase the
++   number of VALGRIND_ENABLE_ERROR_REPORTING calls needed to re-enable
++   reporting.  Child threads do not inherit this setting from their
++   parents -- they are always created with reporting enabled. */
++#define VALGRIND_DISABLE_ERROR_REPORTING                                \
++    VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__CHANGE_ERR_DISABLEMENT, \
++                                    1, 0, 0, 0, 0)
++
++/* Re-enable error reporting, as per comments on
++   VALGRIND_DISABLE_ERROR_REPORTING. */
++#define VALGRIND_ENABLE_ERROR_REPORTING                                 \
++    VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__CHANGE_ERR_DISABLEMENT, \
++                                    -1, 0, 0, 0, 0)
++
++/* Execute a monitor command from the client program.
++   If a connection is opened with GDB, the output will be sent
++   according to the output mode set for vgdb.
++   If no connection is opened, output will go to the log output.
++   Returns 1 if command not recognised, 0 otherwise. */
++#define VALGRIND_MONITOR_COMMAND(command)                               \
++   VALGRIND_DO_CLIENT_REQUEST_EXPR(0, VG_USERREQ__GDB_MONITOR_COMMAND, \
++                                   command, 0, 0, 0, 0)
+ 
++
++#undef PLAT_x86_darwin
++#undef PLAT_amd64_darwin
++#undef PLAT_x86_win32
++#undef PLAT_amd64_win64
+ #undef PLAT_x86_linux
+ #undef PLAT_amd64_linux
+ #undef PLAT_ppc32_linux
+-#undef PLAT_ppc64_linux
++#undef PLAT_ppc64be_linux
++#undef PLAT_ppc64le_linux
+ #undef PLAT_arm_linux
+ #undef PLAT_s390x_linux
+-#undef PLAT_ppc32_aix5
+-#undef PLAT_ppc64_aix5
++#undef PLAT_mips32_linux
++#undef PLAT_mips64_linux
+ 
+ #endif   /* __VALGRIND_H */
diff --git a/SPECS/libsecret.spec b/SPECS/libsecret.spec
index dc1d8aa..1e46ac5 100644
--- a/SPECS/libsecret.spec
+++ b/SPECS/libsecret.spec
@@ -2,13 +2,15 @@
 %global release_version %%(echo %{version} | awk -F. '{print $1"."$2}')
 
 Name:           libsecret
-Version:        0.15
-Release:        3%{?dist}
+Version:        0.18.2
+Release:        2%{?dist}
 Summary:        Library for storing and retrieving passwords and other secrets
 
 License:        LGPLv2+
 URL:            https://live.gnome.org/Libsecret
 Source0:        http://download.gnome.org/sources/libsecret/%{release_version}/libsecret-%{version}.tar.xz
+# https://bugzilla.redhat.com/show_bug.cgi?id=1142140
+Patch0:         libsecret-0.18-update-valgrind.h-and-memcheck.h.patch
 
 BuildRequires:  glib2-devel
 BuildRequires:  gobject-introspection-devel
@@ -39,6 +41,7 @@ developing applications that use %{name}.
 
 %prep
 %setup -q
+%patch0 -p1
 
 
 %build
@@ -64,7 +67,6 @@ find $RPM_BUILD_ROOT -name '*.la' -exec rm -f {} ';'
 %{_bindir}/secret-tool
 %{_libdir}/libsecret-1.so.*
 %{_libdir}/girepository-1.0/Secret-1.typelib
-%{_libdir}/girepository-1.0/SecretUnstable-0.typelib
 %doc %{_mandir}/man1/secret-tool.1.gz
 
 %files devel
@@ -73,16 +75,19 @@ find $RPM_BUILD_ROOT -name '*.la' -exec rm -f {} ';'
 %{_libdir}/pkgconfig/libsecret-1.pc
 %{_libdir}/pkgconfig/libsecret-unstable.pc
 %{_datadir}/gir-1.0/Secret-1.gir
-%{_datadir}/gir-1.0/SecretUnstable-0.gir
 %{_datadir}/vala/vapi/libsecret-1.deps
 %{_datadir}/vala/vapi/libsecret-1.vapi
-%{_datadir}/vala/vapi/libsecret-unstable.deps
-%{_datadir}/vala/vapi/libsecret-unstable.vapi
-%{_datadir}/vala/vapi/mock-service-0.vapi
 %doc %{_datadir}/gtk-doc/
 
 
 %changelog
+* Mon May 18 2015 David King <dking@redhat.com> - 0.18.2-2
+- Update valgrind.h and memcheck.h (#1142140)
+
+* Thu Apr 30 2015 Richard Hughes <rhughes@redhat.com> - 0.18.2-1
+- Update to 0.18.2
+- Resolves: #1174539
+
 * Fri Jan 24 2014 Daniel Mach <dmach@redhat.com> - 0.15-3
 - Mass rebuild 2014-01-24