Blob Blame History Raw
From cb1fa6b1a906fdb06fbeeb1ce813e8cdcd5a604a Mon Sep 17 00:00:00 2001
From: Carl Love <carll@us.ibm.com>
Date: Fri, 20 Jun 2014 12:33:52 -0500
Subject: [PATCH 2/3] POWER PC, add the functional Little Endian support

The IBM POWER processor now supports both Big Endian and Little Endian.
The ABI for Little Endian also changes.  Specifically, the function
descriptor is not used, the stack size changed, accessing the TOC
changed.  Functions now have a local and a global entry point.  Register
r2 contains the TOC for local calls and register r12 contains the TOC
for global calls.  This patch makes the functional changes to the
Valgrind tool.  The patch makes the changes needed for the
none/tests/ppc32 and none/tests/ppc64 Makefile.am.  A number of the
ppc specific tests have Endian dependencies that are not fixed in
this patch.  They are fixed in the next patch.

Signed-off-by: Carl Love <carll@us.ibm.com>
---
 VEX/priv/guest_amd64_defs.h                 |    1 +
 VEX/priv/guest_amd64_toIR.c                 |    1 +
 VEX/priv/guest_arm64_defs.h                 |    1 +
 VEX/priv/guest_arm64_toIR.c                 |    1 +
 VEX/priv/guest_arm_defs.h                   |    1 +
 VEX/priv/guest_arm_toIR.c                   |    1 +
 VEX/priv/guest_generic_bb_to_IR.c           |   15 +
 VEX/priv/guest_generic_bb_to_IR.h           |    3 +
 VEX/priv/guest_mips_defs.h                  |    1 +
 VEX/priv/guest_mips_toIR.c                  |    1 +
 VEX/priv/guest_ppc_defs.h                   |    4 +-
 VEX/priv/guest_ppc_helpers.c                |   20 +-
 VEX/priv/guest_ppc_toIR.c                   | 1288 ++++++++++++++++-----------
 VEX/priv/guest_s390_defs.h                  |    1 +
 VEX/priv/guest_s390_toIR.c                  |    1 +
 VEX/priv/guest_x86_defs.h                   |    1 +
 VEX/priv/guest_x86_toIR.c                   |    1 +
 VEX/priv/host_ppc_defs.c                    |   22 +-
 VEX/priv/host_ppc_isel.c                    |  171 ++--
 VEX/priv/main_main.c                        |    8 +
 coregrind/launcher-darwin.c                 |    1 +
 coregrind/launcher-linux.c                  |    5 +
 coregrind/m_coredump/coredump-elf.c         |   21 +
 coregrind/m_debuginfo/debuginfo.c           |    3 +
 coregrind/m_debuginfo/priv_storage.h        |    5 +-
 coregrind/m_debuginfo/readelf.c             |   80 +-
 coregrind/m_debuginfo/readmacho.c           |    2 +
 coregrind/m_dispatch/dispatch-ppc64-linux.S |  115 ++-
 coregrind/m_initimg/initimg-linux.c         |    5 +-
 coregrind/m_libcsetjmp.c                    |  108 ++-
 coregrind/m_main.c                          |   49 +
 coregrind/m_redir.c                         |   45 +-
 coregrind/m_sigframe/sigframe-ppc64-linux.c |   12 +-
 coregrind/m_signals.c                       |   17 +
 coregrind/m_syscall.c                       |   39 +
 coregrind/m_syswrap/syscall-ppc64-linux.S   |   23 +-
 coregrind/m_syswrap/syswrap-main.c          |   19 +
 coregrind/m_syswrap/syswrap-ppc64-linux.c   |  125 ++-
 coregrind/m_trampoline.S                    |   41 +-
 coregrind/m_translate.c                     |   73 +-
 coregrind/m_ume/elf.c                       |   16 +
 coregrind/m_ume/macho.c                     |    2 +
 coregrind/pub_core_machine.h                |    5 +
 coregrind/vgdb-invoker-ptrace.c             |   10 +
 helgrind/hg_main.c                          |    1 +
 include/pub_tool_debuginfo.h                |    1 +
 include/pub_tool_redir.h                    |    1 +
 include/valgrind.h                          |  637 +++++++++++++
 include/vki/vki-ppc64-linux.h               |    6 +-
 memcheck/mc_machine.c                       |   10 +-
 memcheck/tests/atomic_incs.c                |   51 ++
 tests/arch_test.c                           |    4 +
 52 files changed, 2449 insertions(+), 626 deletions(-)

diff --git a/VEX/priv/guest_amd64_defs.h b/VEX/priv/guest_amd64_defs.h
index 42a6a37..0f565c1 100644
--- a/VEX/priv/guest_amd64_defs.h
+++ b/VEX/priv/guest_amd64_defs.h
@@ -61,6 +61,7 @@ DisResult disInstr_AMD64 ( IRSB*        irbb,
                            VexArchInfo* archinfo,
                            VexAbiInfo*  abiinfo,
                            Bool         host_bigendian,
+                           Bool         guest_bigendian,
                            Bool         sigill_diag );
 
 /* Used by the optimiser to specialise calls to helpers. */
diff --git a/VEX/priv/guest_amd64_toIR.c b/VEX/priv/guest_amd64_toIR.c
index 563e957..0bd6221 100644
--- a/VEX/priv/guest_amd64_toIR.c
+++ b/VEX/priv/guest_amd64_toIR.c
@@ -31746,6 +31746,7 @@ DisResult disInstr_AMD64 ( IRSB*        irsb_IN,
                            VexArchInfo* archinfo,
                            VexAbiInfo*  abiinfo,
                            Bool         host_bigendian_IN,
+                           Bool         guest_bigendian_IN,
                            Bool         sigill_diag_IN )
 {
    Int       i, x1, x2;
diff --git a/VEX/priv/guest_arm64_defs.h b/VEX/priv/guest_arm64_defs.h
index 342312e..707a411 100644
--- a/VEX/priv/guest_arm64_defs.h
+++ b/VEX/priv/guest_arm64_defs.h
@@ -51,6 +51,7 @@ DisResult disInstr_ARM64 ( IRSB*        irbb,
                            VexArchInfo* archinfo,
                            VexAbiInfo*  abiinfo,
                            Bool         host_bigendian,
+                           Bool         guest_bigendian,
                            Bool         sigill_diag );
 
 /* Used by the optimiser to specialise calls to helpers. */
diff --git a/VEX/priv/guest_arm64_toIR.c b/VEX/priv/guest_arm64_toIR.c
index 07d19c9..9faec24 100644
--- a/VEX/priv/guest_arm64_toIR.c
+++ b/VEX/priv/guest_arm64_toIR.c
@@ -8496,6 +8496,7 @@ DisResult disInstr_ARM64 ( IRSB*        irsb_IN,
                            VexArchInfo* archinfo,
                            VexAbiInfo*  abiinfo,
                            Bool         host_bigendian_IN,
+                           Bool         guest_bigendian_IN,
                            Bool         sigill_diag_IN )
 {
    DisResult dres;
diff --git a/VEX/priv/guest_arm_defs.h b/VEX/priv/guest_arm_defs.h
index 776abb7..c3c38f7 100644
--- a/VEX/priv/guest_arm_defs.h
+++ b/VEX/priv/guest_arm_defs.h
@@ -53,6 +53,7 @@ DisResult disInstr_ARM ( IRSB*        irbb,
                          VexArchInfo* archinfo,
                          VexAbiInfo*  abiinfo,
                          Bool         host_bigendian,
+                         Bool         guest_bigendian,
                          Bool         sigill_diag );
 
 /* Used by the optimiser to specialise calls to helpers. */
diff --git a/VEX/priv/guest_arm_toIR.c b/VEX/priv/guest_arm_toIR.c
index 7ab367c..2c42bf7 100644
--- a/VEX/priv/guest_arm_toIR.c
+++ b/VEX/priv/guest_arm_toIR.c
@@ -21975,6 +21975,7 @@ DisResult disInstr_ARM ( IRSB*        irsb_IN,
                          VexArchInfo* archinfo,
                          VexAbiInfo*  abiinfo,
                          Bool         host_bigendian_IN,
+                         Bool         guest_bigendian_IN,
                          Bool         sigill_diag_IN )
 {
    DisResult dres;
diff --git a/VEX/priv/guest_generic_bb_to_IR.c b/VEX/priv/guest_generic_bb_to_IR.c
index 8bba8de..225b745 100644
--- a/VEX/priv/guest_generic_bb_to_IR.c
+++ b/VEX/priv/guest_generic_bb_to_IR.c
@@ -203,6 +203,7 @@ IRSB* bb_to_IR (
    Long       delta;
    Int        i, n_instrs, first_stmt_idx;
    Bool       resteerOK, debug_print;
+   Bool       guest_bigendian;
    DisResult  dres;
    IRStmt*    imark;
    IRStmt*    nop;
@@ -216,6 +217,19 @@ IRSB* bb_to_IR (
 
    Bool (*resteerOKfn)(void*,Addr64) = NULL;
 
+   /* WORK IN PROGRESS:  Need to know the endianess of the guest code.
+    * The goal is to remove the #defines in the VEX code for the
+    * guest code endianess on the supported archietectures and
+    * instead pass in the guest and host endiannesses via the
+    * VexTranslateArgs structure to LibVEX_Translate().  The function
+    * will then pass the Endianess args to this function.   June 27, 2014
+    *
+    * For now, just assume the host and guest have the same Endianess.
+    *
+    * Currently PPC64 uses the guest_bigendian parameter to dis_instr_fn().
+    */
+   guest_bigendian = host_bigendian;
+
    debug_print = toBool(vex_traceflags & VEX_TRACE_FE);
 
    /* check sanity .. */
@@ -363,6 +377,7 @@ IRSB* bb_to_IR (
                             archinfo_guest,
                             abiinfo_both,
                             host_bigendian,
+                            guest_bigendian,
                             sigill_diag );
 
       /* stay sane ... */
diff --git a/VEX/priv/guest_generic_bb_to_IR.h b/VEX/priv/guest_generic_bb_to_IR.h
index 30e216d..0763032 100644
--- a/VEX/priv/guest_generic_bb_to_IR.h
+++ b/VEX/priv/guest_generic_bb_to_IR.h
@@ -155,6 +155,9 @@ typedef
       /* Is the host bigendian? */
       /*IN*/  Bool         host_bigendian,
 
+      /* Is the guest bigendian? */
+      /*IN*/  Bool         guest_bigendian,
+
       /* Should diagnostics be printed for illegal instructions? */
       /*IN*/  Bool         sigill_diag
 
diff --git a/VEX/priv/guest_mips_defs.h b/VEX/priv/guest_mips_defs.h
index da112c5..27e467a 100644
--- a/VEX/priv/guest_mips_defs.h
+++ b/VEX/priv/guest_mips_defs.h
@@ -52,6 +52,7 @@ extern DisResult disInstr_MIPS ( IRSB*        irbb,
                                  VexArchInfo* archinfo,
                                  VexAbiInfo*  abiinfo,
                                  Bool         host_bigendian,
+                                 Bool         guest_bigendian,
                                  Bool         sigill_diag );
 
 /* Used by the optimiser to specialise calls to helpers. */
diff --git a/VEX/priv/guest_mips_toIR.c b/VEX/priv/guest_mips_toIR.c
index 0e9f160..1589263 100644
--- a/VEX/priv/guest_mips_toIR.c
+++ b/VEX/priv/guest_mips_toIR.c
@@ -17203,6 +17203,7 @@ DisResult disInstr_MIPS( IRSB*        irsb_IN,
                          VexArchInfo* archinfo,
                          VexAbiInfo*  abiinfo,
                          Bool         host_bigendian_IN,
+                         Bool         guest_bigendian_IN,
                          Bool         sigill_diag_IN )
 {
    DisResult dres;
diff --git a/VEX/priv/guest_ppc_defs.h b/VEX/priv/guest_ppc_defs.h
index a27c96f..342b043 100644
--- a/VEX/priv/guest_ppc_defs.h
+++ b/VEX/priv/guest_ppc_defs.h
@@ -62,6 +62,7 @@ DisResult disInstr_PPC ( IRSB*        irbb,
                          VexArchInfo* archinfo,
                          VexAbiInfo*  abiinfo,
                          Bool         host_bigendian,
+                         Bool         guest_bigendian,
                          Bool         sigill_diag );
 
 /* Used by the optimiser to specialise calls to helpers. */
@@ -161,7 +162,8 @@ extern void ppc32g_dirtyhelper_LVS ( VexGuestPPC32State* gst,
 
 extern void ppc64g_dirtyhelper_LVS ( VexGuestPPC64State* gst,
                                      UInt vD_idx, UInt sh,
-                                     UInt shift_right );
+                                     UInt shift_right,
+                                     Bool guest_is_BE );
 
 #endif /* ndef __VEX_GUEST_PPC_DEFS_H */
 
diff --git a/VEX/priv/guest_ppc_helpers.c b/VEX/priv/guest_ppc_helpers.c
index 2db109b..21c699c 100644
--- a/VEX/priv/guest_ppc_helpers.c
+++ b/VEX/priv/guest_ppc_helpers.c
@@ -153,10 +153,12 @@ void ppc32g_dirtyhelper_LVS ( VexGuestPPC32State* gst,
 /* CALLED FROM GENERATED CODE */
 /* DIRTY HELPER (reads guest state, writes guest mem) */
 void ppc64g_dirtyhelper_LVS ( VexGuestPPC64State* gst,
-                              UInt vD_off, UInt sh, UInt shift_right )
+                              UInt vD_off, UInt sh, UInt shift_right,
+                              Bool guest_is_BE )
 {
   UChar ref[32];
   ULong i;
+  Int k;
   /* ref[] used to be a static const array, but this doesn't work on
      ppc64 because VEX doesn't load the TOC pointer for the call here,
      and so we wind up picking up some totally random other data.
@@ -179,10 +181,18 @@ void ppc64g_dirtyhelper_LVS ( VexGuestPPC64State* gst,
   pU128_src = (U128*)&ref[sh];
   pU128_dst = (U128*)( ((UChar*)gst) + vD_off );
 
-  (*pU128_dst)[0] = (*pU128_src)[0];
-  (*pU128_dst)[1] = (*pU128_src)[1];
-  (*pU128_dst)[2] = (*pU128_src)[2];
-  (*pU128_dst)[3] = (*pU128_src)[3];
+  if (!guest_is_BE) {
+     unsigned char *srcp, *dstp;
+     srcp = (unsigned char *)pU128_src;
+     dstp = (unsigned char *)pU128_dst;
+     for (k = 15; k >= 0; k--, srcp++)
+        dstp[k] = *srcp;
+  } else {
+     (*pU128_dst)[0] = (*pU128_src)[0];
+     (*pU128_dst)[1] = (*pU128_src)[1];
+     (*pU128_dst)[2] = (*pU128_src)[2];
+     (*pU128_dst)[3] = (*pU128_src)[3];
+  }
 }
 
 
diff --git a/VEX/priv/guest_ppc_toIR.c b/VEX/priv/guest_ppc_toIR.c
index 81c2ccf..080773a 100644
--- a/VEX/priv/guest_ppc_toIR.c
+++ b/VEX/priv/guest_ppc_toIR.c
@@ -97,7 +97,8 @@
 
       7C210B78 (or 1,1,1)   %R3 = client_request ( %R4 )
       7C421378 (or 2,2,2)   %R3 = guest_NRADDR
-      7C631B78 (or 3,3,3)   branch-and-link-to-noredir %R11
+      7C631B78 (or 3,3,3)   branch-and-link-to-noredir %R11  Big endian
+      7C631B78 (or 3,3,3)   branch-and-link-to-noredir %R12  Little endian
       7C842378 (or 4,4,4)   %R3 = guest_NRADDR_GPR2
       7CA52B78 (or 5,5,5)   IR injection
 
@@ -107,7 +108,55 @@
    fragments designed for Valgrind to catch.
 */
 
-
+/*  Little Endian notes  */
+/*
+ * Vector operations in little Endian mode behave in non-obvious ways at times.
+ * Below is an attempt at explaining this.
+ *
+ * LE/BE vector example
+ *   With a vector of unsigned ints declared as follows:
+ *     vector unsigned int vec_inA =
+                            { 0x11111111, 0x22222222, 0x33333333, 0x44444444 };
+ *   The '0x11111111' word is word zero in both LE and BE format.  But the
+ *   loaded vector register will have word zero on the far left in BE mode and
+ *   on the far right in LE mode. The lvx and stvx instructions work naturally
+ *   for whatever endianness is in effect.  For example, in LE mode, the stvx
+ *   stores word zero (far right word) of the vector at the lowest memory
+ *   address of the EA; in BE mode, stvx still stores word zero at the lowest
+ *   memory address, but with word zero interpreted as the one at the far left
+ *   of the register.
+ *
+ *   The lxvd2x and stxvd2x instructions are not so well suited for LE mode.
+ *   When the compiler generates an lxvd2x instruction to load the
+ *   above-declared vector of unsigned integers, it loads the vector as two
+ *   double words, but they are in BE word-wise format.  To put the vector in
+ *   the right order for LE, the compiler also generates an xxswapd after the
+ *   load, which puts it in proper LE format.  Similarly, the stxvd2x
+ *   instruction has a BE bias, storing the vector in BE word-wise format. But
+ *   the compiler also generates an xxswapd prior to the store, thus ensuring
+ *   the vector is stored in memory in the correct LE order.
+ *
+ *   Vector-flavored Iops, such Iop_V128Hito64, reference the hi and lo parts
+ *   of a double words and words within a vector.  Because of the reverse order
+ *   of numbering for LE as described above, the high part refers to word 1 in
+ *   LE format. When input data is saved to a guest state vector register
+ *   (e.g., via Iop_64HLtoV128), it is first saved to memory and then the
+ *   register is loaded via PPCInstr_AvLdSt, which does an lvx instruction.
+ *   The saving of the data to memory must be done in proper LE order.  For the
+ *   inverse operation of extracting data from a vector register (e.g.,
+ *   Iop_V128Hito64), the register is first saved (by PPCInstr_AvLdSt resulting
+ *   in stvx), and then integer registers are loaded from the memory location
+ *   from where the vector register was saved.  Again, this must be done in
+ *   proper LE order.  So for these various vector Iops, we have LE-specific
+ *   code in host_ppc_isel.c
+ *
+ *   Another unique behavior of vectors in LE mode is with the vector scalar
+ *   (VSX) operations that operate on "double word 0" of the source register,
+ *   storing the result in "double word 0" of the output vector register.  For
+ *   these operations, "double word 0" is interpreted as "high half of the
+ *   register" (i.e, the part on the left side).
+ *
+ */
 /* Translates PPC32/64 code to IR. */
 
 /* References
@@ -143,8 +192,6 @@
 #include "guest_generic_bb_to_IR.h"
 #include "guest_ppc_defs.h"
 
-
-#define IENDIANESS   Iend_BE
 /*------------------------------------------------------------*/
 /*--- Globals                                              ---*/
 /*------------------------------------------------------------*/
@@ -506,13 +553,20 @@ static ULong extend_s_32to64 ( UInt x )
 
 /* Do a proper-endian load of a 32-bit word, regardless of the endianness
    of the underlying host. */
-static UInt getUIntPPCendianly ( UChar* p )
+static UInt getUIntPPCendianly ( UChar* p, Bool guest_is_BE )
 {
    UInt w = 0;
-   w = (w << 8) | p[0];
-   w = (w << 8) | p[1];
-   w = (w << 8) | p[2];
-   w = (w << 8) | p[3];
+   if (guest_is_BE) {
+       w = (w << 8) | p[0];
+       w = (w << 8) | p[1];
+       w = (w << 8) | p[2];
+       w = (w << 8) | p[3];
+   } else {
+       w = (w << 8) | p[3];
+       w = (w << 8) | p[2];
+       w = (w << 8) | p[1];
+       w = (w << 8) | p[0];
+   }
    return w;
 }
 
@@ -527,11 +581,13 @@ static void assign ( IRTemp dst, IRExpr* e )
 }
 
 /* This generates a normal (non store-conditional) store. */
-static void store ( IRExpr* addr, IRExpr* data )
+static void store ( IRExpr* addr, IRExpr* data, Bool guest_is_BE )
 {
    IRType tyA = typeOfIRExpr(irsb->tyenv, addr);
+   IREndness endian = guest_is_BE ? Iend_BE: Iend_LE;
+
    vassert(tyA == Ity_I32 || tyA == Ity_I64);
-   stmt( IRStmt_Store(IENDIANESS, addr, data) );
+   stmt( IRStmt_Store(endian, addr, data) );
 }
 
 static IRExpr* unop ( IROp op, IRExpr* a )
@@ -587,9 +643,19 @@ static IRExpr* mkV128 ( UShort i )
 }
 
 /* This generates a normal (non load-linked) load. */
-static IRExpr* load ( IRType ty, IRExpr* addr )
+static IRExpr* load ( IRType ty, IRExpr* addr, Bool guest_is_BE )
+{
+   IREndness endian = guest_is_BE ? Iend_BE: Iend_LE;
+
+   return IRExpr_Load(endian, ty, addr);
+}
+
+static IRStmt* stmt_load ( Bool guest_is_BE, IRTemp result,
+                           IRExpr* addr, IRExpr* storedata )
 {
-   return IRExpr_Load(IENDIANESS, ty, addr);
+   IREndness endian = guest_is_BE ? Iend_BE: Iend_LE;
+
+   return IRStmt_LLSC(endian, result, addr, storedata);
 }
 
 static IRExpr* mkOR1 ( IRExpr* arg1, IRExpr* arg2 )
@@ -1040,7 +1106,6 @@ static Int integerGuestRegOffset ( UInt archreg )
    // jrs: probably not necessary; only matters if we reference sub-parts
    // of the ppc registers, but that isn't the case
    // later: this might affect Altivec though?
-   vassert(host_is_bigendian);
 
    switch (archreg) {
    case  0: return offsetofPPCGuestState(guest_GPR0);
@@ -1098,100 +1163,138 @@ static void putIReg ( UInt archreg, IRExpr* e )
 
 
 /* Floating point egisters are mapped to VSX registers[0..31]. */
-static Int floatGuestRegOffset ( UInt archreg )
+static Int floatGuestRegOffset ( UInt archreg, Bool guest_is_BE )
 {
    vassert(archreg < 32);
    
-   switch (archreg) {
-   case  0: return offsetofPPCGuestState(guest_VSR0);
-   case  1: return offsetofPPCGuestState(guest_VSR1);
-   case  2: return offsetofPPCGuestState(guest_VSR2);
-   case  3: return offsetofPPCGuestState(guest_VSR3);
-   case  4: return offsetofPPCGuestState(guest_VSR4);
-   case  5: return offsetofPPCGuestState(guest_VSR5);
-   case  6: return offsetofPPCGuestState(guest_VSR6);
-   case  7: return offsetofPPCGuestState(guest_VSR7);
-   case  8: return offsetofPPCGuestState(guest_VSR8);
-   case  9: return offsetofPPCGuestState(guest_VSR9);
-   case 10: return offsetofPPCGuestState(guest_VSR10);
-   case 11: return offsetofPPCGuestState(guest_VSR11);
-   case 12: return offsetofPPCGuestState(guest_VSR12);
-   case 13: return offsetofPPCGuestState(guest_VSR13);
-   case 14: return offsetofPPCGuestState(guest_VSR14);
-   case 15: return offsetofPPCGuestState(guest_VSR15);
-   case 16: return offsetofPPCGuestState(guest_VSR16);
-   case 17: return offsetofPPCGuestState(guest_VSR17);
-   case 18: return offsetofPPCGuestState(guest_VSR18);
-   case 19: return offsetofPPCGuestState(guest_VSR19);
-   case 20: return offsetofPPCGuestState(guest_VSR20);
-   case 21: return offsetofPPCGuestState(guest_VSR21);
-   case 22: return offsetofPPCGuestState(guest_VSR22);
-   case 23: return offsetofPPCGuestState(guest_VSR23);
-   case 24: return offsetofPPCGuestState(guest_VSR24);
-   case 25: return offsetofPPCGuestState(guest_VSR25);
-   case 26: return offsetofPPCGuestState(guest_VSR26);
-   case 27: return offsetofPPCGuestState(guest_VSR27);
-   case 28: return offsetofPPCGuestState(guest_VSR28);
-   case 29: return offsetofPPCGuestState(guest_VSR29);
-   case 30: return offsetofPPCGuestState(guest_VSR30);
-   case 31: return offsetofPPCGuestState(guest_VSR31);
-   default: break;
+   if (!guest_is_BE) {
+      switch (archreg) {
+         case  0: return offsetofPPCGuestState(guest_VSR0 + 8);
+         case  1: return offsetofPPCGuestState(guest_VSR1 + 8);
+         case  2: return offsetofPPCGuestState(guest_VSR2 + 8);
+         case  3: return offsetofPPCGuestState(guest_VSR3 + 8);
+         case  4: return offsetofPPCGuestState(guest_VSR4 + 8);
+         case  5: return offsetofPPCGuestState(guest_VSR5 + 8);
+         case  6: return offsetofPPCGuestState(guest_VSR6 + 8);
+         case  7: return offsetofPPCGuestState(guest_VSR7 + 8);
+         case  8: return offsetofPPCGuestState(guest_VSR8 + 8);
+         case  9: return offsetofPPCGuestState(guest_VSR9 + 8);
+         case 10: return offsetofPPCGuestState(guest_VSR10 + 8);
+         case 11: return offsetofPPCGuestState(guest_VSR11 + 8);
+         case 12: return offsetofPPCGuestState(guest_VSR12 + 8);
+         case 13: return offsetofPPCGuestState(guest_VSR13 + 8);
+         case 14: return offsetofPPCGuestState(guest_VSR14 + 8);
+         case 15: return offsetofPPCGuestState(guest_VSR15 + 8);
+         case 16: return offsetofPPCGuestState(guest_VSR16 + 8);
+         case 17: return offsetofPPCGuestState(guest_VSR17 + 8);
+         case 18: return offsetofPPCGuestState(guest_VSR18 + 8);
+         case 19: return offsetofPPCGuestState(guest_VSR19 + 8);
+         case 20: return offsetofPPCGuestState(guest_VSR20 + 8);
+         case 21: return offsetofPPCGuestState(guest_VSR21 + 8);
+         case 22: return offsetofPPCGuestState(guest_VSR22 + 8);
+         case 23: return offsetofPPCGuestState(guest_VSR23 + 8);
+         case 24: return offsetofPPCGuestState(guest_VSR24 + 8);
+         case 25: return offsetofPPCGuestState(guest_VSR25 + 8);
+         case 26: return offsetofPPCGuestState(guest_VSR26 + 8);
+         case 27: return offsetofPPCGuestState(guest_VSR27 + 8);
+         case 28: return offsetofPPCGuestState(guest_VSR28 + 8);
+         case 29: return offsetofPPCGuestState(guest_VSR29 + 8);
+         case 30: return offsetofPPCGuestState(guest_VSR30 + 8);
+         case 31: return offsetofPPCGuestState(guest_VSR31 + 8);
+         default: break;
+      }
+   } else {
+      switch (archreg) {
+         case  0: return offsetofPPCGuestState(guest_VSR0);
+         case  1: return offsetofPPCGuestState(guest_VSR1);
+         case  2: return offsetofPPCGuestState(guest_VSR2);
+         case  3: return offsetofPPCGuestState(guest_VSR3);
+         case  4: return offsetofPPCGuestState(guest_VSR4);
+         case  5: return offsetofPPCGuestState(guest_VSR5);
+         case  6: return offsetofPPCGuestState(guest_VSR6);
+         case  7: return offsetofPPCGuestState(guest_VSR7);
+         case  8: return offsetofPPCGuestState(guest_VSR8);
+         case  9: return offsetofPPCGuestState(guest_VSR9);
+         case 10: return offsetofPPCGuestState(guest_VSR10);
+         case 11: return offsetofPPCGuestState(guest_VSR11);
+         case 12: return offsetofPPCGuestState(guest_VSR12);
+         case 13: return offsetofPPCGuestState(guest_VSR13);
+         case 14: return offsetofPPCGuestState(guest_VSR14);
+         case 15: return offsetofPPCGuestState(guest_VSR15);
+         case 16: return offsetofPPCGuestState(guest_VSR16);
+         case 17: return offsetofPPCGuestState(guest_VSR17);
+         case 18: return offsetofPPCGuestState(guest_VSR18);
+         case 19: return offsetofPPCGuestState(guest_VSR19);
+         case 20: return offsetofPPCGuestState(guest_VSR20);
+         case 21: return offsetofPPCGuestState(guest_VSR21);
+         case 22: return offsetofPPCGuestState(guest_VSR22);
+         case 23: return offsetofPPCGuestState(guest_VSR23);
+         case 24: return offsetofPPCGuestState(guest_VSR24);
+         case 25: return offsetofPPCGuestState(guest_VSR25);
+         case 26: return offsetofPPCGuestState(guest_VSR26);
+         case 27: return offsetofPPCGuestState(guest_VSR27);
+         case 28: return offsetofPPCGuestState(guest_VSR28);
+         case 29: return offsetofPPCGuestState(guest_VSR29);
+         case 30: return offsetofPPCGuestState(guest_VSR30);
+         case 31: return offsetofPPCGuestState(guest_VSR31);
+         default: break;
+      }
    }
    vpanic("floatGuestRegOffset(ppc)"); /*notreached*/
 }
 
-static IRExpr* getFReg ( UInt archreg )
+static IRExpr* getFReg ( UInt archreg, Bool guest_is_BE )
 {
    vassert(archreg < 32);
-   return IRExpr_Get( floatGuestRegOffset(archreg), Ity_F64 );
+   return IRExpr_Get( floatGuestRegOffset(archreg, guest_is_BE), Ity_F64 );
 }
 
 /* Ditto, but write to a reg instead. */
-static void putFReg ( UInt archreg, IRExpr* e )
+static void putFReg ( UInt archreg, IRExpr* e, Bool guest_is_BE )
 {
    vassert(archreg < 32);
    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_F64);
-   stmt( IRStmt_Put(floatGuestRegOffset(archreg), e) );
+   stmt( IRStmt_Put(floatGuestRegOffset(archreg, guest_is_BE), e) );
 }
 
 /* get Decimal float value.  Note, they share floating point register file. */
-static IRExpr* getDReg(UInt archreg) {
+static IRExpr* getDReg(UInt archreg, Bool guest_is_BE) {
    IRExpr *e;
    vassert( archreg < 32 );
-   e = IRExpr_Get( floatGuestRegOffset( archreg ), Ity_D64 );
+   e = IRExpr_Get( floatGuestRegOffset( archreg, guest_is_BE ), Ity_D64 );
    return e;
 }
-static IRExpr* getDReg32(UInt archreg) {
+static IRExpr* getDReg32(UInt archreg, Bool guest_is_BE) {
    IRExpr *e;
    vassert( archreg < 32 );
-   e = IRExpr_Get( floatGuestRegOffset( archreg ), Ity_D32 );
+   e = IRExpr_Get( floatGuestRegOffset( archreg, guest_is_BE ), Ity_D32 );
    return e;
 }
 
 /* Read a floating point register pair and combine their contents into a
  128-bit value */
-static IRExpr *getDReg_pair(UInt archreg) {
-   IRExpr *high = getDReg( archreg );
-   IRExpr *low = getDReg( archreg + 1 );
+static IRExpr *getDReg_pair(UInt archreg, Bool guest_is_BE ) {
+   IRExpr *high = getDReg( archreg, guest_is_BE );
+   IRExpr *low = getDReg( archreg + 1, guest_is_BE );
 
    return binop( Iop_D64HLtoD128, high, low );
 }
 
 /* Ditto, but write to a reg instead. */
-static void putDReg32(UInt archreg, IRExpr* e) {
+static void putDReg32(UInt archreg, IRExpr* e, Bool guest_is_BE) {
    vassert( archreg < 32 );
    vassert( typeOfIRExpr(irsb->tyenv, e) == Ity_D32 );
-   stmt( IRStmt_Put( floatGuestRegOffset( archreg ), e ) );
+   stmt( IRStmt_Put( floatGuestRegOffset( archreg, guest_is_BE ), e ) );
 }
 
-static void putDReg(UInt archreg, IRExpr* e) {
+static void putDReg(UInt archreg, IRExpr* e, Bool guest_is_BE) {
    vassert( archreg < 32 );
    vassert( typeOfIRExpr(irsb->tyenv, e) == Ity_D64 );
-   stmt( IRStmt_Put( floatGuestRegOffset( archreg ), e ) );
+   stmt( IRStmt_Put( floatGuestRegOffset( archreg, guest_is_BE), e ) );
 }
 
 /* Write a 128-bit floating point value into a register pair. */
-static void putDReg_pair(UInt archreg, IRExpr *e) {
+static void putDReg_pair(UInt archreg, IRExpr *e, Bool guest_is_BE) {
    IRTemp low = newTemp( Ity_D64 );
    IRTemp high = newTemp( Ity_D64 );
 
@@ -1201,8 +1304,10 @@ static void putDReg_pair(UInt archreg, IRExpr *e) {
    assign( low, unop( Iop_D128LOtoD64, e ) );
    assign( high, unop( Iop_D128HItoD64, e ) );
 
-   stmt( IRStmt_Put( floatGuestRegOffset( archreg ), mkexpr( high ) ) );
-   stmt( IRStmt_Put( floatGuestRegOffset( archreg + 1 ), mkexpr( low ) ) );
+   stmt( IRStmt_Put( floatGuestRegOffset( archreg, guest_is_BE ),
+                     mkexpr( high ) ) );
+   stmt( IRStmt_Put( floatGuestRegOffset( archreg + 1, guest_is_BE ),
+                     mkexpr( low ) ) );
 }
 
 static Int vsxGuestRegOffset ( UInt archreg )
@@ -3963,7 +4068,7 @@ static Bool dis_int_cmp ( UInt theInstr )
 /*
   Integer Logical Instructions
 */
-static Bool dis_int_logic ( UInt theInstr )
+static Bool dis_int_logic ( UInt theInstr, Bool guest_is_BE )
 {
    /* D-Form, X-Form */
    UChar opc1    = ifieldOPC(theInstr);
@@ -4194,7 +4299,7 @@ static Bool dis_int_logic ( UInt theInstr )
          IRTemp frB = newTemp(Ity_F64);
          DIP("mftgpr r%u,fr%u\n", rS_addr, rB_addr);
 
-         assign( frB, getFReg(rB_addr));  // always F64
+         assign( frB, getFReg(rB_addr, guest_is_BE ));  // always F64
          if (mode64)
             assign( rA, unop( Iop_ReinterpF64asI64, mkexpr(frB)) );
          else
@@ -4213,7 +4318,7 @@ static Bool dis_int_logic ( UInt theInstr )
          else
             assign( frA, unop( Iop_ReinterpI64asF64, unop( Iop_32Uto64, mkexpr(rB))) );
 
-         putFReg( rS_addr, mkexpr(frA));
+         putFReg( rS_addr, mkexpr(frA), guest_is_BE );
          return True;
       }
       case 0x1FA: // popcntd (population count doubleword
@@ -4720,7 +4825,7 @@ static Bool dis_int_rot ( UInt theInstr )
 /*
   Integer Load Instructions
 */
-static Bool dis_int_load ( UInt theInstr )
+static Bool dis_int_load ( UInt theInstr, Bool guest_is_BE )
 {
    /* D-Form, X-Form, DS-Form */
    UChar opc1     = ifieldOPC(theInstr);
@@ -4759,7 +4864,7 @@ static Bool dis_int_load ( UInt theInstr )
    switch (opc1) {
    case 0x22: // lbz (Load B & Zero, PPC32 p433)
       DIP("lbz r%u,%d(r%u)\n", rD_addr, (Int)simm16, rA_addr);
-      val = load(Ity_I8, mkexpr(EA));
+      val = load(Ity_I8, mkexpr(EA), guest_is_BE );
       putIReg( rD_addr, mkWidenFrom8(ty, val, False) );
       break;
       
@@ -4769,14 +4874,14 @@ static Bool dis_int_load ( UInt theInstr )
          return False;
       }
       DIP("lbzu r%u,%d(r%u)\n", rD_addr, (Int)simm16, rA_addr);
-      val = load(Ity_I8, mkexpr(EA));
+      val = load(Ity_I8, mkexpr(EA), guest_is_BE );
       putIReg( rD_addr, mkWidenFrom8(ty, val, False) );
       putIReg( rA_addr, mkexpr(EA) );
       break;
       
    case 0x2A: // lha (Load HW Alg, PPC32 p445)
       DIP("lha r%u,%d(r%u)\n", rD_addr, (Int)simm16, rA_addr);
-      val = load(Ity_I16, mkexpr(EA));
+      val = load(Ity_I16, mkexpr(EA), guest_is_BE );
       putIReg( rD_addr, mkWidenFrom16(ty, val, True) );
       break;
 
@@ -4786,14 +4891,14 @@ static Bool dis_int_load ( UInt theInstr )
          return False;
       }
       DIP("lhau r%u,%d(r%u)\n", rD_addr, (Int)simm16, rA_addr);
-      val = load(Ity_I16, mkexpr(EA));
+      val = load(Ity_I16, mkexpr(EA), guest_is_BE );
       putIReg( rD_addr, mkWidenFrom16(ty, val, True) );
       putIReg( rA_addr, mkexpr(EA) );
       break;
       
    case 0x28: // lhz (Load HW & Zero, PPC32 p450)
       DIP("lhz r%u,%d(r%u)\n", rD_addr, (Int)simm16, rA_addr);
-      val = load(Ity_I16, mkexpr(EA));
+      val = load(Ity_I16, mkexpr(EA), guest_is_BE );
       putIReg( rD_addr, mkWidenFrom16(ty, val, False) );
       break;
       
@@ -4803,14 +4908,14 @@ static Bool dis_int_load ( UInt theInstr )
          return False;
       }
       DIP("lhzu r%u,%d(r%u)\n", rD_addr, (Int)simm16, rA_addr);
-      val = load(Ity_I16, mkexpr(EA));
+      val = load(Ity_I16, mkexpr(EA), guest_is_BE );
       putIReg( rD_addr, mkWidenFrom16(ty, val, False) );
       putIReg( rA_addr, mkexpr(EA) );
       break;
 
    case 0x20: // lwz (Load W & Zero, PPC32 p460)
       DIP("lwz r%u,%d(r%u)\n", rD_addr, (Int)simm16, rA_addr);
-      val = load(Ity_I32, mkexpr(EA));
+      val = load(Ity_I32, mkexpr(EA), guest_is_BE );
       putIReg( rD_addr, mkWidenFrom32(ty, val, False) );
       break;
       
@@ -4820,7 +4925,7 @@ static Bool dis_int_load ( UInt theInstr )
          return False;
       }
       DIP("lwzu r%u,%d(r%u)\n", rD_addr, (Int)simm16, rA_addr);
-      val = load(Ity_I32, mkexpr(EA));
+      val = load(Ity_I32, mkexpr(EA), guest_is_BE );
       putIReg( rD_addr, mkWidenFrom32(ty, val, False) );
       putIReg( rA_addr, mkexpr(EA) );
       break;
@@ -4839,14 +4944,14 @@ static Bool dis_int_load ( UInt theInstr )
             vex_printf("dis_int_load(ppc)(lwzux,rA_addr|rD_addr)\n");
             return False;
          }
-         val = load(Ity_I8, mkexpr(EA));
+         val = load(Ity_I8, mkexpr(EA), guest_is_BE );
          putIReg( rD_addr, mkWidenFrom8(ty, val, False) );
          putIReg( rA_addr, mkexpr(EA) );
          break;
          
       case 0x057: // lbzx (Load B & Zero, Indexed, PPC32 p436)
          DIP("lbzx r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr);
-         val = load(Ity_I8, mkexpr(EA));
+         val = load(Ity_I8, mkexpr(EA), guest_is_BE );
          putIReg( rD_addr, mkWidenFrom8(ty, val, False) );
          break;
          
@@ -4856,14 +4961,14 @@ static Bool dis_int_load ( UInt theInstr )
             return False;
          }
          DIP("lhaux r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr);
-         val = load(Ity_I16, mkexpr(EA));
+         val = load(Ity_I16, mkexpr(EA), guest_is_BE );
          putIReg( rD_addr, mkWidenFrom16(ty, val, True) );
          putIReg( rA_addr, mkexpr(EA) );
          break;
          
       case 0x157: // lhax (Load HW Alg, Indexed, PPC32 p448)
          DIP("lhax r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr);
-         val = load(Ity_I16, mkexpr(EA));
+         val = load(Ity_I16, mkexpr(EA), guest_is_BE );
          putIReg( rD_addr, mkWidenFrom16(ty, val, True) );
          break;
          
@@ -4873,14 +4978,14 @@ static Bool dis_int_load ( UInt theInstr )
             return False;
          }
          DIP("lhzux r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr);
-         val = load(Ity_I16, mkexpr(EA));
+         val = load(Ity_I16, mkexpr(EA), guest_is_BE );
          putIReg( rD_addr, mkWidenFrom16(ty, val, False) );
          putIReg( rA_addr, mkexpr(EA) );
          break;
          
       case 0x117: // lhzx (Load HW & Zero, Indexed, PPC32 p453)
          DIP("lhzx r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr);
-         val = load(Ity_I16, mkexpr(EA));
+         val = load(Ity_I16, mkexpr(EA), guest_is_BE );
          putIReg( rD_addr, mkWidenFrom16(ty, val, False) );
          break;
 
@@ -4890,14 +4995,14 @@ static Bool dis_int_load ( UInt theInstr )
             return False;
          }
          DIP("lwzux r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr);
-         val = load(Ity_I32, mkexpr(EA));
+         val = load(Ity_I32, mkexpr(EA), guest_is_BE );
          putIReg( rD_addr, mkWidenFrom32(ty, val, False) );
          putIReg( rA_addr, mkexpr(EA) );
          break;
          
       case 0x017: // lwzx (Load W & Zero, Indexed, PPC32 p463)
          DIP("lwzx r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr);
-         val = load(Ity_I32, mkexpr(EA));
+         val = load(Ity_I32, mkexpr(EA), guest_is_BE );
          putIReg( rD_addr, mkWidenFrom32(ty, val, False) );
          break;
 
@@ -4909,13 +5014,13 @@ static Bool dis_int_load ( UInt theInstr )
             return False;
          }
          DIP("ldux r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr);
-         putIReg( rD_addr, load(Ity_I64, mkexpr(EA)) );
+         putIReg( rD_addr, load(Ity_I64, mkexpr(EA), guest_is_BE ) );
          putIReg( rA_addr, mkexpr(EA) );
          break;
 
       case 0x015: // ldx (Load DWord, Indexed, PPC64 p476)
          DIP("ldx r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr);
-         putIReg( rD_addr, load(Ity_I64, mkexpr(EA)) );
+         putIReg( rD_addr, load(Ity_I64, mkexpr(EA), guest_is_BE ) );
          break;
 
       case 0x175: // lwaux (Load W Alg, Update Indexed, PPC64 p501)
@@ -4925,14 +5030,14 @@ static Bool dis_int_load ( UInt theInstr )
          }
          DIP("lwaux r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr);
          putIReg( rD_addr,
-                  unop(Iop_32Sto64, load(Ity_I32, mkexpr(EA))) );
+                  unop(Iop_32Sto64, load(Ity_I32, mkexpr(EA), guest_is_BE )) );
          putIReg( rA_addr, mkexpr(EA) );
          break;
 
       case 0x155: // lwax (Load W Alg, Indexed, PPC64 p502)
          DIP("lwax r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr);
          putIReg( rD_addr,
-                  unop(Iop_32Sto64, load(Ity_I32, mkexpr(EA))) );
+                  unop(Iop_32Sto64, load(Ity_I32, mkexpr(EA), guest_is_BE )) );
          break;
 
       default:
@@ -4947,7 +5052,7 @@ static Bool dis_int_load ( UInt theInstr )
       switch ((b1<<1) | b0) {
       case 0x0: // ld (Load DWord, PPC64 p472)
          DIP("ld r%u,%d(r%u)\n", rD_addr, simm16, rA_addr);
-         putIReg( rD_addr, load(Ity_I64, mkexpr(EA)) );
+         putIReg( rD_addr, load(Ity_I64, mkexpr(EA), guest_is_BE ) );
          break;
 
       case 0x1: // ldu (Load DWord, Update, PPC64 p474)
@@ -4956,14 +5061,14 @@ static Bool dis_int_load ( UInt theInstr )
             return False;
          }
          DIP("ldu r%u,%d(r%u)\n", rD_addr, simm16, rA_addr);
-         putIReg( rD_addr, load(Ity_I64, mkexpr(EA)) );
+         putIReg( rD_addr, load(Ity_I64, mkexpr(EA), guest_is_BE ) );
          putIReg( rA_addr, mkexpr(EA) );
          break;
 
       case 0x2: // lwa (Load Word Alg, PPC64 p499)
          DIP("lwa r%u,%d(r%u)\n", rD_addr, simm16, rA_addr);
          putIReg( rD_addr,
-                  unop(Iop_32Sto64, load(Ity_I32, mkexpr(EA))) );
+                  unop(Iop_32Sto64, load(Ity_I32, mkexpr(EA), guest_is_BE )) );
          break;
 
       default:
@@ -4982,17 +5087,19 @@ static Bool dis_int_load ( UInt theInstr )
        */
       // trap if EA misaligned on 16 byte address
       if (mode64) {
-         assign(high, load(ty, mkexpr( EA ) ) );
+         assign(high, load(ty, mkexpr( EA ), guest_is_BE ) );
          assign(low, load(ty, binop( Iop_Add64,
                                      mkexpr( EA ),
-                                     mkU64( 8 ) ) ) );
+                                     mkU64( 8 ) ), guest_is_BE ));
       } else {
          assign(high, load(ty, binop( Iop_Add32,
                                       mkexpr( EA ),
-                                      mkU32( 4 ) ) ) );
+                                      mkU32( 4 ) ),
+                           guest_is_BE ) );
          assign(low, load(ty, binop( Iop_Add32,
                                       mkexpr( EA ),
-                                      mkU32( 12 ) ) ) );
+                                      mkU32( 12 ) ),
+                          guest_is_BE ) );
       }
       gen_SIGBUS_if_misaligned( EA, 16 );
       putIReg( rD_addr,  mkexpr( high) );
@@ -5011,7 +5118,7 @@ static Bool dis_int_load ( UInt theInstr )
 /*
   Integer Store Instructions
 */
-static Bool dis_int_store ( UInt theInstr, VexAbiInfo* vbi )
+static Bool dis_int_store ( UInt theInstr, VexAbiInfo* vbi, Bool guest_is_BE )
 {
    /* D-Form, X-Form, DS-Form */
    UChar opc1    = ifieldOPC(theInstr);
@@ -5047,7 +5154,7 @@ static Bool dis_int_store ( UInt theInstr, VexAbiInfo* vbi )
    switch (opc1) {
    case 0x26: // stb (Store B, PPC32 p509)
       DIP("stb r%u,%d(r%u)\n", rS_addr, simm16, rA_addr);
-      store( mkexpr(EA), mkNarrowTo8(ty, mkexpr(rS)) );
+      store( mkexpr(EA), mkNarrowTo8(ty, mkexpr(rS)), guest_is_BE );
       break;
        
    case 0x27: // stbu (Store B, Update, PPC32 p510)
@@ -5057,12 +5164,12 @@ static Bool dis_int_store ( UInt theInstr, VexAbiInfo* vbi )
       }
       DIP("stbu r%u,%d(r%u)\n", rS_addr, simm16, rA_addr);
       putIReg( rA_addr, mkexpr(EA) );
-      store( mkexpr(EA), mkNarrowTo8(ty, mkexpr(rS)) );
+      store( mkexpr(EA), mkNarrowTo8(ty, mkexpr(rS)), guest_is_BE );
       break;
 
    case 0x2C: // sth (Store HW, PPC32 p522)
       DIP("sth r%u,%d(r%u)\n", rS_addr, simm16, rA_addr);
-      store( mkexpr(EA), mkNarrowTo16(ty, mkexpr(rS)) );
+      store( mkexpr(EA), mkNarrowTo16(ty, mkexpr(rS)), guest_is_BE );
       break;
       
    case 0x2D: // sthu (Store HW, Update, PPC32 p524)
@@ -5072,12 +5179,12 @@ static Bool dis_int_store ( UInt theInstr, VexAbiInfo* vbi )
       }
       DIP("sthu r%u,%d(r%u)\n", rS_addr, simm16, rA_addr);
       putIReg( rA_addr, mkexpr(EA) );
-      store( mkexpr(EA), mkNarrowTo16(ty, mkexpr(rS)) );
+      store( mkexpr(EA), mkNarrowTo16(ty, mkexpr(rS)), guest_is_BE );
       break;
 
    case 0x24: // stw (Store W, PPC32 p530)
       DIP("stw r%u,%d(r%u)\n", rS_addr, simm16, rA_addr);
-      store( mkexpr(EA), mkNarrowTo32(ty, mkexpr(rS)) );
+      store( mkexpr(EA), mkNarrowTo32(ty, mkexpr(rS)), guest_is_BE );
       break;
 
    case 0x25: // stwu (Store W, Update, PPC32 p534)
@@ -5087,7 +5194,7 @@ static Bool dis_int_store ( UInt theInstr, VexAbiInfo* vbi )
       }
       DIP("stwu r%u,%d(r%u)\n", rS_addr, simm16, rA_addr);
       putIReg( rA_addr, mkexpr(EA) );
-      store( mkexpr(EA), mkNarrowTo32(ty, mkexpr(rS)) );
+      store( mkexpr(EA), mkNarrowTo32(ty, mkexpr(rS)), guest_is_BE );
       break;
       
    /* X Form : all these use EA_indexed */
@@ -5105,12 +5212,12 @@ static Bool dis_int_store ( UInt theInstr, VexAbiInfo* vbi )
          }
          DIP("stbux r%u,r%u,r%u\n", rS_addr, rA_addr, rB_addr);
          putIReg( rA_addr, mkexpr(EA) );
-         store( mkexpr(EA), mkNarrowTo8(ty, mkexpr(rS)) );
+         store( mkexpr(EA), mkNarrowTo8(ty, mkexpr(rS)), guest_is_BE );
          break;
          
       case 0x0D7: // stbx (Store B Indexed, PPC32 p512)
          DIP("stbx r%u,r%u,r%u\n", rS_addr, rA_addr, rB_addr);
-         store( mkexpr(EA), mkNarrowTo8(ty, mkexpr(rS)) );
+         store( mkexpr(EA), mkNarrowTo8(ty, mkexpr(rS)), guest_is_BE );
          break;
          
       case 0x1B7: // sthux (Store HW, Update Indexed, PPC32 p525)
@@ -5120,12 +5227,12 @@ static Bool dis_int_store ( UInt theInstr, VexAbiInfo* vbi )
          }
          DIP("sthux r%u,r%u,r%u\n", rS_addr, rA_addr, rB_addr);
          putIReg( rA_addr, mkexpr(EA) );
-         store( mkexpr(EA), mkNarrowTo16(ty, mkexpr(rS)) );
+         store( mkexpr(EA), mkNarrowTo16(ty, mkexpr(rS)), guest_is_BE );
          break;
          
       case 0x197: // sthx (Store HW Indexed, PPC32 p526)
          DIP("sthx r%u,r%u,r%u\n", rS_addr, rA_addr, rB_addr);
-         store( mkexpr(EA), mkNarrowTo16(ty, mkexpr(rS)) );
+         store( mkexpr(EA), mkNarrowTo16(ty, mkexpr(rS)), guest_is_BE );
          break;
          
       case 0x0B7: // stwux (Store W, Update Indexed, PPC32 p535)
@@ -5135,12 +5242,12 @@ static Bool dis_int_store ( UInt theInstr, VexAbiInfo* vbi )
          }
          DIP("stwux r%u,r%u,r%u\n", rS_addr, rA_addr, rB_addr);
          putIReg( rA_addr, mkexpr(EA) );
-         store( mkexpr(EA), mkNarrowTo32(ty, mkexpr(rS)) );
+         store( mkexpr(EA), mkNarrowTo32(ty, mkexpr(rS)), guest_is_BE );
          break;
 
       case 0x097: // stwx (Store W Indexed, PPC32 p536)
          DIP("stwx r%u,r%u,r%u\n", rS_addr, rA_addr, rB_addr);
-         store( mkexpr(EA), mkNarrowTo32(ty, mkexpr(rS)) );
+         store( mkexpr(EA), mkNarrowTo32(ty, mkexpr(rS)), guest_is_BE );
          break;
          
 
@@ -5152,12 +5259,12 @@ static Bool dis_int_store ( UInt theInstr, VexAbiInfo* vbi )
          }
          DIP("stdux r%u,r%u,r%u\n", rS_addr, rA_addr, rB_addr);
          putIReg( rA_addr, mkexpr(EA) );
-         store( mkexpr(EA), mkexpr(rS) );
+         store( mkexpr(EA), mkexpr(rS), guest_is_BE );
          break;
 
       case 0x095: // stdx (Store DWord Indexed, PPC64 p585)
          DIP("stdx r%u,r%u,r%u\n", rS_addr, rA_addr, rB_addr);
-         store( mkexpr(EA), mkexpr(rS) );
+         store( mkexpr(EA), mkexpr(rS), guest_is_BE );
          break;
 
       default:
@@ -5175,7 +5282,7 @@ static Bool dis_int_store ( UInt theInstr, VexAbiInfo* vbi )
             return False;
 
          DIP("std r%u,%d(r%u)\n", rS_addr, simm16, rA_addr);
-         store( mkexpr(EA), mkexpr(rS) );
+         store( mkexpr(EA), mkexpr(rS), guest_is_BE );
          break;
 
       case 0x1: // stdu (Store DWord, Update, PPC64 p583)
@@ -5184,7 +5291,7 @@ static Bool dis_int_store ( UInt theInstr, VexAbiInfo* vbi )
 
          DIP("stdu r%u,%d(r%u)\n", rS_addr, simm16, rA_addr);
          putIReg( rA_addr, mkexpr(EA) );
-         store( mkexpr(EA), mkexpr(rS) );
+         store( mkexpr(EA), mkexpr(rS), guest_is_BE );
          break;
 
       case 0x2: { // stq (Store QuadWord, Update, PPC64 p583)
@@ -5206,9 +5313,9 @@ static Bool dis_int_store ( UInt theInstr, VexAbiInfo* vbi )
             assign( EA_lo, ea_rAor0_simm( rA_addr, simm16+12 ) );
          }
          putIReg( rA_addr, mkexpr(EA_hi) );
-         store( mkexpr(EA_hi), mkexpr(rS) );
+         store( mkexpr(EA_hi), mkexpr(rS), guest_is_BE );
          putIReg( rA_addr, mkexpr( EA_lo) );
-         store( mkexpr(EA_lo), getIReg( rS_addr+1 ) );
+         store( mkexpr(EA_lo), getIReg( rS_addr+1 ), guest_is_BE );
          break;
       }
       default:
@@ -5229,7 +5336,7 @@ static Bool dis_int_store ( UInt theInstr, VexAbiInfo* vbi )
 /*
   Integer Load/Store Multiple Instructions
 */
-static Bool dis_int_ldst_mult ( UInt theInstr )
+static Bool dis_int_ldst_mult ( UInt theInstr, Bool guest_is_BE )
 {
    /* D-Form */
    UChar opc1     = ifieldOPC(theInstr);
@@ -5257,7 +5364,7 @@ static Bool dis_int_ldst_mult ( UInt theInstr )
       DIP("lmw r%u,%d(r%u)\n", rD_addr, simm16, rA_addr);
       for (r = rD_addr; r <= 31; r++) {
          irx_addr = binop(mkAdd, mkexpr(EA), mode64 ? mkU64(ea_off) : mkU32(ea_off));
-         putIReg( r, mkWidenFrom32(ty, load(Ity_I32, irx_addr ),
+         putIReg( r, mkWidenFrom32(ty, load(Ity_I32, irx_addr, guest_is_BE ),
                                        False) );
          ea_off += 4;
       }
@@ -5267,7 +5374,7 @@ static Bool dis_int_ldst_mult ( UInt theInstr )
       DIP("stmw r%u,%d(r%u)\n", rS_addr, simm16, rA_addr);
       for (r = rS_addr; r <= 31; r++) {
          irx_addr = binop(mkAdd, mkexpr(EA), mode64 ? mkU64(ea_off) : mkU32(ea_off));
-         store( irx_addr, mkNarrowTo32(ty, getIReg(r)) );
+         store( irx_addr, mkNarrowTo32(ty, getIReg(r)), guest_is_BE );
          ea_off += 4;
       }
       break;
@@ -5288,7 +5395,8 @@ static
 void generate_lsw_sequence ( IRTemp tNBytes,   // # bytes, :: Ity_I32
                              IRTemp EA,        // EA
                              Int    rD,        // first dst register
-                             Int    maxBytes ) // 32 or 128
+                             Int    maxBytes,
+                             Bool   guest_is_BE ) // 32 or 128
 {
    Int     i, shift = 24;
    IRExpr* e_nbytes = mkexpr(tNBytes);
@@ -5324,7 +5432,8 @@ void generate_lsw_sequence ( IRTemp tNBytes,   // # bytes, :: Ity_I32
                      Iop_8Uto32, 
                      load( Ity_I8,
                            binop( mkSzOp(ty,Iop_Add8),
-                                  e_EA, mkSzImm(ty,i)))
+                                  e_EA, mkSzImm(ty,i)),
+                           guest_is_BE )
                   ), 
                   mkU8(toUChar(shift))
                )
@@ -5340,7 +5449,8 @@ static
 void generate_stsw_sequence ( IRTemp tNBytes,   // # bytes, :: Ity_I32
                               IRTemp EA,        // EA
                               Int    rS,        // first src register
-                              Int    maxBytes ) // 32 or 128
+                              Int    maxBytes,  // 32 or 128
+                              Bool   guest_is_BE)
 {
    Int     i, shift = 24;
    IRExpr* e_nbytes = mkexpr(tNBytes);
@@ -5367,13 +5477,14 @@ void generate_stsw_sequence ( IRTemp tNBytes,   // # bytes, :: Ity_I32
             unop( Iop_32to8,
                   binop( Iop_Shr32,
                          mkNarrowTo32( ty, getIReg(rS) ),
-                         mkU8( toUChar(shift) )))
-      );
+                         mkU8( toUChar(shift) ))),
+            guest_is_BE);
       shift -= 8;
    }
 }
 
-static Bool dis_int_ldst_str ( UInt theInstr, /*OUT*/Bool* stopHere )
+static Bool dis_int_ldst_str ( UInt theInstr, /*OUT*/Bool* stopHere,
+                               Bool guest_is_BE )
 {
    /* X-Form */
    UChar opc1     = ifieldOPC(theInstr);
@@ -5406,14 +5517,15 @@ static Bool dis_int_ldst_str ( UInt theInstr, /*OUT*/Bool* stopHere )
          /* Special case hack */
          /* rD = Mem[EA]; (rD+1)%32 = Mem[EA+4] */
          putIReg( rD_addr,          
-                  load(Ity_I32, mkexpr(t_EA)) );
+                  load(Ity_I32, mkexpr(t_EA), guest_is_BE ) );
          putIReg( (rD_addr+1) % 32, 
                   load(Ity_I32,
-                       binop(Iop_Add32, mkexpr(t_EA), mkU32(4))) );
+                       binop(Iop_Add32, mkexpr(t_EA), mkU32(4)),
+                       guest_is_BE ));
       } else {
          t_nbytes = newTemp(Ity_I32);
          assign( t_nbytes, mkU32(NumBytes==0 ? 32 : NumBytes) );
-         generate_lsw_sequence( t_nbytes, t_EA, rD_addr, 32 );
+         generate_lsw_sequence( t_nbytes, t_EA, rD_addr, 32, guest_is_BE );
          *stopHere = True;
       }
       return True;
@@ -5431,7 +5543,7 @@ static Bool dis_int_ldst_str ( UInt theInstr, /*OUT*/Bool* stopHere )
       t_nbytes = newTemp(Ity_I32);
       assign( t_EA, ea_rAor0_idxd(rA_addr,rB_addr) );
       assign( t_nbytes, unop( Iop_8Uto32, getXER_BC() ) );
-      generate_lsw_sequence( t_nbytes, t_EA, rD_addr, 128 );
+      generate_lsw_sequence( t_nbytes, t_EA, rD_addr, 128, guest_is_BE );
       *stopHere = True;
       return True;
 
@@ -5441,14 +5553,13 @@ static Bool dis_int_ldst_str ( UInt theInstr, /*OUT*/Bool* stopHere )
       if (NumBytes == 8 && !mode64) {
          /* Special case hack */
          /* Mem[EA] = rD; Mem[EA+4] = (rD+1)%32 */
-         store( mkexpr(t_EA),
-                getIReg(rD_addr) );
+         store( mkexpr(t_EA), getIReg(rD_addr), guest_is_BE );
          store( binop(Iop_Add32, mkexpr(t_EA), mkU32(4)),
-                getIReg((rD_addr+1) % 32) );
+                getIReg((rD_addr+1) % 32), guest_is_BE );
       } else {
          t_nbytes = newTemp(Ity_I32);
          assign( t_nbytes, mkU32(NumBytes==0 ? 32 : NumBytes) );
-         generate_stsw_sequence( t_nbytes, t_EA, rD_addr, 32 );
+         generate_stsw_sequence( t_nbytes, t_EA, rD_addr, 32, guest_is_BE );
          *stopHere = True;
       }
       return True;
@@ -5458,7 +5569,7 @@ static Bool dis_int_ldst_str ( UInt theInstr, /*OUT*/Bool* stopHere )
       t_nbytes = newTemp(Ity_I32);
       assign( t_EA, ea_rAor0_idxd(rA_addr,rB_addr) );
       assign( t_nbytes, unop( Iop_8Uto32, getXER_BC() ) );
-      generate_stsw_sequence( t_nbytes, t_EA, rS_addr, 128 );
+      generate_stsw_sequence( t_nbytes, t_EA, rS_addr, 128, guest_is_BE );
       *stopHere = True;
       return True;
 
@@ -6087,7 +6198,7 @@ static Bool dis_syslink ( UInt theInstr,
   check any stores it does.  Instead, the reservation is cancelled when
   the scheduler switches to another thread (run_thread_for_a_while()).
 */
-static Bool dis_memsync ( UInt theInstr )
+static Bool dis_memsync ( UInt theInstr, Bool guest_is_BE )
 {
    /* X-Form, XL-Form */
    UChar opc1    = ifieldOPC(theInstr);
@@ -6147,7 +6258,7 @@ static Bool dis_memsync ( UInt theInstr )
 
          // and actually do the load
          res = newTemp(Ity_I32);
-         stmt( IRStmt_LLSC(IENDIANESS, res, mkexpr(EA), NULL/*this is a load*/) );
+         stmt( stmt_load(guest_is_BE, res, mkexpr(EA), NULL/*this is a load*/) );
 
          putIReg( rD_addr, mkWidenFrom32(ty, mkexpr(res), False) );
          break;
@@ -6173,7 +6284,7 @@ static Bool dis_memsync ( UInt theInstr )
 
          // Do the store, and get success/failure bit into resSC
          resSC = newTemp(Ity_I1);
-         stmt( IRStmt_LLSC(IENDIANESS, resSC, mkexpr(EA), mkexpr(rS)) );
+         stmt( stmt_load(guest_is_BE, resSC, mkexpr(EA), mkexpr(rS)) );
 
          // Set CR0[LT GT EQ S0] = 0b000 || XER[SO]  on failure
          // Set CR0[LT GT EQ S0] = 0b001 || XER[SO]  on success
@@ -6240,7 +6351,7 @@ static Bool dis_memsync ( UInt theInstr )
 
          // and actually do the load
          res = newTemp(Ity_I64);
-         stmt( IRStmt_LLSC(IENDIANESS, res, mkexpr(EA), NULL/*this is a load*/) );
+         stmt( stmt_load(guest_is_BE, res, mkexpr(EA), NULL/*this is a load*/) );
 
          putIReg( rD_addr, mkexpr(res) );
          break;
@@ -6266,7 +6377,7 @@ static Bool dis_memsync ( UInt theInstr )
 
          // Do the store, and get success/failure bit into resSC
          resSC = newTemp(Ity_I1);
-         stmt( IRStmt_LLSC(IENDIANESS, resSC, mkexpr(EA), mkexpr(rS)) );
+         stmt( stmt_load(guest_is_BE, resSC, mkexpr(EA), mkexpr(rS)) );
 
          // Set CR0[LT GT EQ S0] = 0b000 || XER[SO]  on failure
          // Set CR0[LT GT EQ S0] = 0b001 || XER[SO]  on success
@@ -6296,18 +6407,18 @@ static Bool dis_memsync ( UInt theInstr )
 
          // and actually do the load
          if (mode64) {
-            stmt( IRStmt_LLSC( IENDIANESS, res_hi,
-                               mkexpr(EA), NULL/*this is a load*/) );
-            stmt( IRStmt_LLSC( IENDIANESS, res_lo,
-                               binop(Iop_Add64, mkexpr(EA), mkU64(8) ),
-                               NULL/*this is a load*/) );
+            stmt( stmt_load( guest_is_BE, res_hi,
+                             mkexpr(EA), NULL/*this is a load*/) );
+            stmt( stmt_load( guest_is_BE, res_lo,
+                             binop(Iop_Add64, mkexpr(EA), mkU64(8) ),
+                             NULL/*this is a load*/) );
          } else {
-            stmt( IRStmt_LLSC( IENDIANESS, res_hi,
-                               binop( Iop_Add32, mkexpr(EA), mkU32(4) ),
-                               NULL/*this is a load*/) );
-            stmt( IRStmt_LLSC( IENDIANESS, res_lo,
-                               binop( Iop_Add32, mkexpr(EA), mkU32(12) ),
-                               NULL/*this is a load*/) );
+            stmt( stmt_load( guest_is_BE, res_hi,
+                             binop( Iop_Add32, mkexpr(EA), mkU32(4) ),
+                             NULL/*this is a load*/) );
+            stmt( stmt_load( guest_is_BE, res_lo,
+                             binop( Iop_Add32, mkexpr(EA), mkU32(12) ),
+                             NULL/*this is a load*/) );
          }
          putIReg( rD_addr,   mkexpr(res_hi) );
          putIReg( rD_addr+1, mkexpr(res_lo) );
@@ -6336,14 +6447,16 @@ static Bool dis_memsync ( UInt theInstr )
          resSC = newTemp(Ity_I1);
 
          if (mode64) {
-            stmt( IRStmt_LLSC( IENDIANESS, resSC, mkexpr(EA), mkexpr(rS_hi) ) );
-            store( binop( Iop_Add64, mkexpr(EA), mkU64(8) ), mkexpr(rS_lo) );
+            stmt( stmt_load( guest_is_BE, resSC, mkexpr(EA), mkexpr(rS_hi) ) );
+            store( binop( Iop_Add64, mkexpr(EA), mkU64(8) ), mkexpr(rS_lo),
+                   guest_is_BE );
          } else {
-            stmt( IRStmt_LLSC( IENDIANESS, resSC, binop( Iop_Add32,
-                                                         mkexpr(EA),
-                                                         mkU32(4) ),
-                                                         mkexpr(rS_hi) ) );
-            store( binop(Iop_Add32, mkexpr(EA), mkU32(12) ), mkexpr(rS_lo) );
+            stmt( stmt_load( guest_is_BE, resSC, binop( Iop_Add32,
+                                                        mkexpr(EA),
+                                                        mkU32(4) ),
+                                                        mkexpr(rS_hi) ) );
+            store( binop(Iop_Add32, mkexpr(EA), mkU32(12) ), mkexpr(rS_lo),
+                   guest_is_BE );
          }
 
          // Set CR0[LT GT EQ S0] = 0b000 || XER[SO]  on failure
@@ -6639,7 +6752,7 @@ static IRExpr* /* :: Ity_I32 */ gen_byterev16 ( IRTemp t )
       );
 }
 
-static Bool dis_int_ldst_rev ( UInt theInstr )
+static Bool dis_int_ldst_rev ( UInt theInstr, Bool guest_is_BE )
 {
    /* X-Form */
    UChar opc1    = ifieldOPC(theInstr);
@@ -6666,7 +6779,8 @@ static Bool dis_int_ldst_rev ( UInt theInstr )
 
       case 0x316: // lhbrx (Load Halfword Byte-Reverse Indexed, PPC32 p449)
          DIP("lhbrx r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr);
-         assign( w1, unop(Iop_16Uto32, load(Ity_I16, mkexpr(EA))) );
+         assign( w1, unop(Iop_16Uto32, load(Ity_I16, mkexpr(EA),
+                                            guest_is_BE )) );
          assign( w2, gen_byterev16(w1) );
          putIReg( rD_addr, mkWidenFrom32(ty, mkexpr(w2),
                                          /* Signed */False) );
@@ -6674,7 +6788,7 @@ static Bool dis_int_ldst_rev ( UInt theInstr )
 
       case 0x216: // lwbrx (Load Word Byte-Reverse Indexed, PPC32 p459)
          DIP("lwbrx r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr);
-         assign( w1, load(Ity_I32, mkexpr(EA)) );
+         assign( w1, load(Ity_I32, mkexpr(EA), guest_is_BE ));
          assign( w2, gen_byterev32(w1) );
          putIReg( rD_addr, mkWidenFrom32(ty, mkexpr(w2),
                                          /* Signed */False) );
@@ -6686,26 +6800,30 @@ static Bool dis_int_ldst_rev ( UInt theInstr )
          IRTemp w3 = newTemp( Ity_I32 );
          IRTemp w4 = newTemp( Ity_I32 );
          DIP("ldbrx r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr);
-         assign( w1, load( Ity_I32, mkexpr( EA ) ) );
+         assign( w1, load( Ity_I32, mkexpr( EA ), guest_is_BE ) );
          assign( w2, gen_byterev32( w1 ) );
          nextAddr = binop( mkSzOp( ty, Iop_Add8 ), mkexpr( EA ),
                            ty == Ity_I64 ? mkU64( 4 ) : mkU32( 4 ) );
-         assign( w3, load( Ity_I32, nextAddr ) );
+         assign( w3, load( Ity_I32, nextAddr, guest_is_BE ));
          assign( w4, gen_byterev32( w3 ) );
-         putIReg( rD_addr, binop( Iop_32HLto64, mkexpr( w4 ), mkexpr( w2 ) ) );
+         if (!guest_is_BE)
+            putIReg( rD_addr, binop( Iop_32HLto64, mkexpr( w2 ), mkexpr( w4 ) ) );
+         else
+            putIReg( rD_addr, binop( Iop_32HLto64, mkexpr( w4 ), mkexpr( w2 ) ) );
          break;
       }
 
       case 0x396: // sthbrx (Store Half Word Byte-Reverse Indexed, PPC32 p523)
          DIP("sthbrx r%u,r%u,r%u\n", rS_addr, rA_addr, rB_addr);
          assign( w1, mkNarrowTo32(ty, getIReg(rS_addr)) );
-         store( mkexpr(EA), unop(Iop_32to16, gen_byterev16(w1)) );
+         store( mkexpr(EA), unop(Iop_32to16, gen_byterev16(w1)),
+                guest_is_BE );
          break;
       
       case 0x296: // stwbrx (Store Word Byte-Reverse Indxd, PPC32 p531)
          DIP("stwbrx r%u,r%u,r%u\n", rS_addr, rA_addr, rB_addr);
          assign( w1, mkNarrowTo32(ty, getIReg(rS_addr)) );
-         store( mkexpr(EA), gen_byterev32(w1) );
+         store( mkexpr(EA), gen_byterev32(w1), guest_is_BE );
          break;
 
       case 0x294: // stdbrx (Store Doubleword Byte-Reverse Indexed)
@@ -6719,7 +6837,8 @@ static Bool dis_int_ldst_rev ( UInt theInstr )
          assign(hi, unop(Iop_64to32, mkexpr(rS)));
          store( mkexpr( EA ),
                 binop( Iop_32HLto64, gen_byterev32( hi ),
-                       gen_byterev32( lo ) ) );
+                       gen_byterev32( lo ) ),
+                guest_is_BE );
          break;
       }
 
@@ -7138,7 +7257,8 @@ static Bool dis_proc_ctl ( VexAbiInfo* vbi, UInt theInstr )
 */
 static Bool dis_cache_manage ( UInt         theInstr, 
                                DisResult*   dres,
-                               VexArchInfo* guest_archinfo )
+                               VexArchInfo* guest_archinfo,
+                               Bool         guest_is_BE)
 {
    /* X-Form */
    UChar opc1    = ifieldOPC(theInstr);
@@ -7237,7 +7357,7 @@ static Bool dis_cache_manage ( UInt         theInstr,
          
          for (i = 0; i < clearszB / 8; i++) {
             irx_addr = binop( Iop_Add64, mkexpr(addr), mkU64(i*8) );
-            store( irx_addr, mkU64(0) );
+            store( irx_addr, mkU64(0), guest_is_BE );
          }
       } else {
          /* Round EA down to the start of the containing block. */
@@ -7247,7 +7367,7 @@ static Bool dis_cache_manage ( UInt         theInstr,
          
          for (i = 0; i < clearszB / 4; i++) {
             irx_addr = binop( Iop_Add32, mkexpr(addr), mkU32(i*4) );
-            store( irx_addr, mkU32(0) );
+            store( irx_addr, mkU32(0), guest_is_BE );
          }
       }
       break;
@@ -7436,7 +7556,7 @@ static IRExpr * Complement_non_NaN( IRExpr * value, IRExpr * nan_mask )
 /*
   Floating Point Load Instructions
 */
-static Bool dis_fp_load ( UInt theInstr )
+static Bool dis_fp_load ( UInt theInstr, Bool guest_is_BE )
 {
    /* X-Form, D-Form */
    UChar opc1      = ifieldOPC(theInstr);
@@ -7467,7 +7587,8 @@ static Bool dis_fp_load ( UInt theInstr )
       DIP("lfs fr%u,%d(r%u)\n", frD_addr, simm16, rA_addr);
       assign( EA, ea_rAor0_simm(rA_addr, simm16) );
       putFReg( frD_addr,
-               unop(Iop_F32toF64, load(Ity_F32, mkexpr(EA))) );
+               unop(Iop_F32toF64, load(Ity_F32, mkexpr(EA), guest_is_BE)),
+               guest_is_BE);
       break;
 
    case 0x31: // lfsu (Load Float Single, Update, PPC32 p442)
@@ -7476,14 +7597,16 @@ static Bool dis_fp_load ( UInt theInstr )
       DIP("lfsu fr%u,%d(r%u)\n", frD_addr, simm16, rA_addr);
       assign( EA, ea_rA_simm(rA_addr, simm16) );
       putFReg( frD_addr,
-               unop(Iop_F32toF64, load(Ity_F32, mkexpr(EA))) );
+               unop(Iop_F32toF64, load(Ity_F32, mkexpr(EA), guest_is_BE)),
+               guest_is_BE );
       putIReg( rA_addr, mkexpr(EA) );
       break;
       
    case 0x32: // lfd (Load Float Double, PPC32 p437)
       DIP("lfd fr%u,%d(r%u)\n", frD_addr, simm16, rA_addr);
       assign( EA, ea_rAor0_simm(rA_addr, simm16) );
-      putFReg( frD_addr, load(Ity_F64, mkexpr(EA)) );
+      putFReg( frD_addr, load(Ity_F64, mkexpr(EA), guest_is_BE ),
+               guest_is_BE );
       break;
 
    case 0x33: // lfdu (Load Float Double, Update, PPC32 p438)
@@ -7491,7 +7614,8 @@ static Bool dis_fp_load ( UInt theInstr )
          return False;
       DIP("lfdu fr%u,%d(r%u)\n", frD_addr, simm16, rA_addr);
       assign( EA, ea_rA_simm(rA_addr, simm16) );
-      putFReg( frD_addr, load(Ity_F64, mkexpr(EA)) );
+      putFReg( frD_addr, load(Ity_F64, mkexpr(EA), guest_is_BE),
+               guest_is_BE );
       putIReg( rA_addr, mkexpr(EA) );
       break;
 
@@ -7506,7 +7630,8 @@ static Bool dis_fp_load ( UInt theInstr )
          DIP("lfsx fr%u,r%u,r%u\n", frD_addr, rA_addr, rB_addr);
          assign( EA, ea_rAor0_idxd(rA_addr, rB_addr) );
          putFReg( frD_addr, unop( Iop_F32toF64, 
-                                  load(Ity_F32, mkexpr(EA))) );
+                                  load(Ity_F32, mkexpr(EA), guest_is_BE)),
+                  guest_is_BE );
          break;
          
       case 0x237: // lfsux (Load Float Single, Update Indxd, PPC32 p443)
@@ -7515,14 +7640,16 @@ static Bool dis_fp_load ( UInt theInstr )
          DIP("lfsux fr%u,r%u,r%u\n", frD_addr, rA_addr, rB_addr);
          assign( EA, ea_rA_idxd(rA_addr, rB_addr) );
          putFReg( frD_addr,
-                  unop(Iop_F32toF64, load(Ity_F32, mkexpr(EA))) );
+                  unop(Iop_F32toF64, load(Ity_F32, mkexpr(EA), guest_is_BE)),
+                  guest_is_BE );
          putIReg( rA_addr, mkexpr(EA) );
          break;
          
       case 0x257: // lfdx (Load Float Double Indexed, PPC32 p440)
          DIP("lfdx fr%u,r%u,r%u\n", frD_addr, rA_addr, rB_addr);
          assign( EA, ea_rAor0_idxd(rA_addr, rB_addr) );
-         putFReg( frD_addr, load(Ity_F64, mkexpr(EA)) );
+         putFReg( frD_addr, load(Ity_F64, mkexpr(EA), guest_is_BE),
+                  guest_is_BE );
          break;
          
       case 0x277: // lfdux (Load Float Double, Update Indxd, PPC32 p439)
@@ -7530,19 +7657,22 @@ static Bool dis_fp_load ( UInt theInstr )
             return False;
          DIP("lfdux fr%u,r%u,r%u\n", frD_addr, rA_addr, rB_addr);
          assign( EA, ea_rA_idxd(rA_addr, rB_addr) );
-         putFReg( frD_addr, load(Ity_F64, mkexpr(EA)) );
+         putFReg( frD_addr, load(Ity_F64, mkexpr(EA), guest_is_BE),
+                  guest_is_BE );
          putIReg( rA_addr, mkexpr(EA) );
          break;
          
       case 0x357: // lfiwax (Load Float As Integer, Indxd, ISA 2.05 p120)
          DIP("lfiwax fr%u,r%u,r%u\n", frD_addr, rA_addr, rB_addr);
          assign( EA, ea_rAor0_idxd( rA_addr, rB_addr ) );
-         assign( iLo, load(Ity_I32, mkexpr(EA)) );
+         assign( iLo, load(Ity_I32, mkexpr(EA), guest_is_BE) );
          assign( iHi, binop(Iop_Sub32,
                             mkU32(0),
                             binop(Iop_Shr32, mkexpr(iLo), mkU8(31)))  );
-         putFReg( frD_addr, unop(Iop_ReinterpI64asF64,
-                                 binop(Iop_32HLto64, mkexpr(iHi), mkexpr(iLo))) );
+         putFReg( frD_addr,
+                  unop(Iop_ReinterpI64asF64,
+                       binop(Iop_32HLto64, mkexpr(iHi), mkexpr(iLo))),
+                  guest_is_BE );
          break;
 
       case 0x377: // lfiwzx (Load floating-point as integer word, zero indexed
@@ -7550,9 +7680,10 @@ static Bool dis_fp_load ( UInt theInstr )
          IRTemp dw = newTemp( Ity_I64 );
          DIP("lfiwzx fr%u,r%u,r%u\n", frD_addr, rA_addr, rB_addr);
          assign( EA, ea_rAor0_idxd( rA_addr, rB_addr ) );
-         assign( iLo, load(Ity_I32, mkexpr(EA)) );
+         assign( iLo, load(Ity_I32, mkexpr(EA), guest_is_BE) );
          assign( dw, binop( Iop_32HLto64, mkU32( 0 ), mkexpr( iLo ) ) );
-         putFReg( frD_addr, unop( Iop_ReinterpI64asF64, mkexpr( dw ) ) );
+         putFReg( frD_addr, unop( Iop_ReinterpI64asF64, mkexpr( dw ) ),
+                  guest_is_BE);
          break;
       }
 
@@ -7574,7 +7705,7 @@ static Bool dis_fp_load ( UInt theInstr )
 /*
   Floating Point Store Instructions
 */
-static Bool dis_fp_store ( UInt theInstr )
+static Bool dis_fp_store ( UInt theInstr, Bool guest_is_BE )
 {
    /* X-Form, D-Form */
    UChar opc1      = ifieldOPC(theInstr);
@@ -7592,7 +7723,7 @@ static Bool dis_fp_store ( UInt theInstr )
    IRTemp rA     = newTemp(ty);
    IRTemp rB     = newTemp(ty);
 
-   assign( frS, getFReg(frS_addr) );
+   assign( frS, getFReg(frS_addr, guest_is_BE) );
    assign( rA,  getIReg(rA_addr) );
    assign( rB,  getIReg(rB_addr) );
 
@@ -7609,7 +7740,7 @@ static Bool dis_fp_store ( UInt theInstr )
       /* Use Iop_TruncF64asF32 to truncate and possible denormalise
          the value to be stored in the correct way, without any
          rounding. */
-      store( mkexpr(EA), unop(Iop_TruncF64asF32, mkexpr(frS)) );
+      store( mkexpr(EA), unop(Iop_TruncF64asF32, mkexpr(frS)), guest_is_BE );
       break;
 
    case 0x35: // stfsu (Store Float Single, Update, PPC32 p519)
@@ -7618,14 +7749,14 @@ static Bool dis_fp_store ( UInt theInstr )
       DIP("stfsu fr%u,%d(r%u)\n", frS_addr, simm16, rA_addr);
       assign( EA, ea_rA_simm(rA_addr, simm16) );
       /* See comment for stfs */
-      store( mkexpr(EA), unop(Iop_TruncF64asF32, mkexpr(frS)) );
+      store( mkexpr(EA), unop(Iop_TruncF64asF32, mkexpr(frS)), guest_is_BE );
       putIReg( rA_addr, mkexpr(EA) );
       break;
 
    case 0x36: // stfd (Store Float Double, PPC32 p513)
       DIP("stfd fr%u,%d(r%u)\n", frS_addr, simm16, rA_addr);
       assign( EA, ea_rAor0_simm(rA_addr, simm16) );
-      store( mkexpr(EA), mkexpr(frS) );
+      store( mkexpr(EA), mkexpr(frS), guest_is_BE );
       break;
 
    case 0x37: // stfdu (Store Float Double, Update, PPC32 p514)
@@ -7633,7 +7764,7 @@ static Bool dis_fp_store ( UInt theInstr )
          return False;
       DIP("stfdu fr%u,%d(r%u)\n", frS_addr, simm16, rA_addr);
       assign( EA, ea_rA_simm(rA_addr, simm16) );
-      store( mkexpr(EA), mkexpr(frS) );
+      store( mkexpr(EA), mkexpr(frS), guest_is_BE );
       putIReg( rA_addr, mkexpr(EA) );
       break;
 
@@ -7648,7 +7779,8 @@ static Bool dis_fp_store ( UInt theInstr )
          assign( EA, ea_rAor0_idxd(rA_addr, rB_addr) );
          /* See note for stfs */
          store( mkexpr(EA),
-                unop(Iop_TruncF64asF32, mkexpr(frS)) );
+                unop(Iop_TruncF64asF32, mkexpr(frS)),
+                guest_is_BE );
          break;
          
       case 0x2B7: // stfsux (Store Float Sgl, Update Indxd, PPC32 p520)
@@ -7657,14 +7789,15 @@ static Bool dis_fp_store ( UInt theInstr )
          DIP("stfsux fr%u,r%u,r%u\n", frS_addr, rA_addr, rB_addr);
          assign( EA, ea_rA_idxd(rA_addr, rB_addr) );
          /* See note for stfs */
-         store( mkexpr(EA), unop(Iop_TruncF64asF32, mkexpr(frS)) );
+         store( mkexpr(EA), unop(Iop_TruncF64asF32, mkexpr(frS)),
+                guest_is_BE );
          putIReg( rA_addr, mkexpr(EA) );
          break;
 
       case 0x2D7: // stfdx (Store Float Double Indexed, PPC32 p516)
          DIP("stfdx fr%u,r%u,r%u\n", frS_addr, rA_addr, rB_addr);
          assign( EA, ea_rAor0_idxd(rA_addr, rB_addr) );
-         store( mkexpr(EA), mkexpr(frS) );
+         store( mkexpr(EA), mkexpr(frS), guest_is_BE );
          break;
          
       case 0x2F7: // stfdux (Store Float Dbl, Update Indxd, PPC32 p515)
@@ -7672,7 +7805,7 @@ static Bool dis_fp_store ( UInt theInstr )
             return False;
          DIP("stfdux fr%u,r%u,r%u\n", frS_addr, rA_addr, rB_addr);
          assign( EA, ea_rA_idxd(rA_addr, rB_addr) );
-         store( mkexpr(EA), mkexpr(frS) );
+         store( mkexpr(EA), mkexpr(frS), guest_is_BE );
          putIReg( rA_addr, mkexpr(EA) );
          break;
 
@@ -7681,7 +7814,8 @@ static Bool dis_fp_store ( UInt theInstr )
          DIP("stfiwx fr%u,r%u,r%u\n", frS_addr, rA_addr, rB_addr);
          assign( EA, ea_rAor0_idxd(rA_addr, rB_addr) );
          store( mkexpr(EA),
-                unop(Iop_64to32, unop(Iop_ReinterpF64asI64, mkexpr(frS))) );
+                unop(Iop_64to32, unop(Iop_ReinterpF64asI64, mkexpr(frS))),
+                guest_is_BE );
          break;
 
       default:
@@ -7702,7 +7836,7 @@ static Bool dis_fp_store ( UInt theInstr )
 /*
   Floating Point Arith Instructions
 */
-static Bool dis_fp_arith ( UInt theInstr )
+static Bool dis_fp_arith ( UInt theInstr, Bool guest_is_BE )
 {
    /* A-Form */
    UChar opc1     = ifieldOPC(theInstr);
@@ -7730,9 +7864,9 @@ static Bool dis_fp_arith ( UInt theInstr )
       zero.  Hence cr1 should be cleared if this is a . form insn. */
    Bool clear_CR1 = True;
 
-   assign( frA, getFReg(frA_addr));
-   assign( frB, getFReg(frB_addr));
-   assign( frC, getFReg(frC_addr));
+   assign( frA, getFReg(frA_addr, guest_is_BE));
+   assign( frB, getFReg(frB_addr, guest_is_BE));
+   assign( frC, getFReg(frC_addr, guest_is_BE));
 
    switch (opc1) {
    case 0x3B:
@@ -7920,7 +8054,7 @@ static Bool dis_fp_arith ( UInt theInstr )
       return False;
    }
 
-   putFReg( frD_addr, mkexpr(frD) );
+   putFReg( frD_addr, mkexpr(frD), guest_is_BE );
 
    if (set_FPRF) {
       // XXX XXX XXX FIXME
@@ -7940,7 +8074,7 @@ static Bool dis_fp_arith ( UInt theInstr )
 /*
   Floating Point Mult-Add Instructions
 */
-static Bool dis_fp_multadd ( UInt theInstr )
+static Bool dis_fp_multadd ( UInt theInstr, Bool guest_is_BE )
 {
    /* A-Form */
    UChar opc1     = ifieldOPC(theInstr);
@@ -7978,9 +8112,9 @@ static Bool dis_fp_multadd ( UInt theInstr )
    assign( rmt, get_IR_roundingmode() );
    rm = mkexpr(rmt);
 
-   assign( frA, getFReg(frA_addr));
-   assign( frB, getFReg(frB_addr));
-   assign( frC, getFReg(frC_addr));
+   assign( frA, getFReg(frA_addr, guest_is_BE));
+   assign( frB, getFReg(frB_addr, guest_is_BE));
+   assign( frC, getFReg(frC_addr, guest_is_BE));
 
    /* The rounding in this is all a bit dodgy.  The idea is to only do
       one rounding.  That clearly isn't achieveable without dedicated
@@ -8084,7 +8218,7 @@ static Bool dis_fp_multadd ( UInt theInstr )
       return False;
    }
 
-   putFReg( frD_addr, mkexpr(frD) );
+   putFReg( frD_addr, mkexpr(frD), guest_is_BE );
 
    if (set_FPRF) {
       // XXX XXX XXX FIXME
@@ -8420,7 +8554,7 @@ static IRExpr * do_fp_tdiv(IRTemp frA_int, IRTemp frB_int)
                  binop( Iop_Shl32, mkexpr(fe_flag), mkU8( 1 ) ) );
 }
 
-static Bool dis_fp_tests ( UInt theInstr )
+static Bool dis_fp_tests ( UInt theInstr, Bool guest_is_BE )
 {
    UChar opc1     = ifieldOPC(theInstr);
    UChar crfD     = toUChar( IFIELD( theInstr, 23, 3 ) );
@@ -8433,7 +8567,8 @@ static Bool dis_fp_tests ( UInt theInstr )
       vex_printf("dis_fp_tests(ppc)(ftdiv)\n");
       return False;
    }
-   assign( frB_I64, unop( Iop_ReinterpF64asI64, getFReg( frB_addr ) ) );
+   assign( frB_I64, unop( Iop_ReinterpF64asI64, getFReg( frB_addr,
+                                                         guest_is_BE ) ) );
 
    switch (opc2) {
       case 0x080: // ftdiv
@@ -8446,7 +8581,8 @@ static Bool dis_fp_tests ( UInt theInstr )
             return False;
          }
 
-         assign( frA_I64, unop( Iop_ReinterpF64asI64, getFReg( frA_addr ) ) );
+         assign( frA_I64, unop( Iop_ReinterpF64asI64,
+                                getFReg( frA_addr, guest_is_BE ) ) );
          putGST_field( PPC_GST_CR, do_fp_tdiv(frA_I64, frB_I64), crfD );
 
          DIP("ftdiv crf%d,fr%u,fr%u\n", crfD, frA_addr, frB_addr);
@@ -8487,7 +8623,7 @@ static Bool dis_fp_tests ( UInt theInstr )
 /*
   Floating Point Compare Instructions
 */
-static Bool dis_fp_cmp ( UInt theInstr )
+static Bool dis_fp_cmp ( UInt theInstr, Bool guest_is_BE )
 {   
    /* X-Form */
    UChar opc1     = ifieldOPC(theInstr);
@@ -8509,8 +8645,8 @@ static Bool dis_fp_cmp ( UInt theInstr )
       return False;
    }
 
-   assign( frA, getFReg(frA_addr));
-   assign( frB, getFReg(frB_addr));
+   assign( frA, getFReg(frA_addr, guest_is_BE));
+   assign( frB, getFReg(frB_addr, guest_is_BE));
 
    assign( ccIR, binop(Iop_CmpF64, mkexpr(frA), mkexpr(frB)) );
    
@@ -8586,7 +8722,7 @@ static Bool dis_fp_cmp ( UInt theInstr )
 /*
   Floating Point Rounding/Conversion Instructions
 */
-static Bool dis_fp_round ( UInt theInstr )
+static Bool dis_fp_round ( UInt theInstr, Bool guest_is_BE )
 {
    /* X-Form */
    UChar opc1     = ifieldOPC(theInstr);
@@ -8617,7 +8753,7 @@ static Bool dis_fp_round ( UInt theInstr )
       return False;
    }
 
-   assign( frB, getFReg(frB_addr));
+   assign( frB, getFReg(frB_addr, guest_is_BE));
    if (opc1 == 0x3B) {
       /* The fcfid[u]s instructions (from ISA 2.06) are a bit odd because
        * they're very similar to the other instructions handled here, but have
@@ -8781,7 +8917,7 @@ static Bool dis_fp_round ( UInt theInstr )
       return False;
    }
 putFR:
-   putFReg( frD_addr, mkexpr(frD) );
+   putFReg( frD_addr, mkexpr(frD), guest_is_BE );
 
    if (set_FPRF) {
       // XXX XXX XXX FIXME
@@ -8799,7 +8935,7 @@ putFR:
 /*
   Floating Point Pair Instructions
 */
-static Bool dis_fp_pair ( UInt theInstr )
+static Bool dis_fp_pair ( UInt theInstr, Bool guest_is_BE )
 {
    /* X-Form/DS-Form */
    UChar  opc1         = ifieldOPC(theInstr);
@@ -8863,15 +8999,17 @@ static Bool dis_fp_pair ( UInt theInstr )
    else
       assign( EA_lo, binop(Iop_Add32, mkexpr(EA_hi), mkU32(8)) );
 
-   assign( frT_hi, getFReg(frT_hi_addr) );
-   assign( frT_lo, getFReg(frT_lo_addr) );
+   assign( frT_hi, getFReg(frT_hi_addr, guest_is_BE) );
+   assign( frT_lo, getFReg(frT_lo_addr, guest_is_BE) );
 
    if (is_load) {
-      putFReg( frT_hi_addr, load(Ity_F64, mkexpr(EA_hi)) );
-      putFReg( frT_lo_addr, load(Ity_F64, mkexpr(EA_lo)) );
+      putFReg( frT_hi_addr, load(Ity_F64, mkexpr(EA_hi), guest_is_BE),
+               guest_is_BE );
+      putFReg( frT_lo_addr, load(Ity_F64, mkexpr(EA_lo), guest_is_BE),
+               guest_is_BE );
    } else {
-      store( mkexpr(EA_hi), mkexpr(frT_hi) );
-      store( mkexpr(EA_lo), mkexpr(frT_lo) );
+      store( mkexpr(EA_hi), mkexpr(frT_hi), guest_is_BE );
+      store( mkexpr(EA_lo), mkexpr(frT_lo), guest_is_BE );
    }
 
    return True;
@@ -8881,7 +9019,7 @@ static Bool dis_fp_pair ( UInt theInstr )
 /*
   Floating Point Merge Instructions
 */
-static Bool dis_fp_merge ( UInt theInstr )
+static Bool dis_fp_merge ( UInt theInstr, Bool guest_is_BE )
 {
    /* X-Form */
    UInt  opc2     = ifieldOPClo10(theInstr);
@@ -8893,8 +9031,8 @@ static Bool dis_fp_merge ( UInt theInstr )
    IRTemp frA = newTemp(Ity_F64);
    IRTemp frB = newTemp(Ity_F64);
 
-   assign( frA, getFReg(frA_addr));
-   assign( frB, getFReg(frB_addr));
+   assign( frA, getFReg(frA_addr, guest_is_BE));
+   assign( frB, getFReg(frB_addr, guest_is_BE));
 
    switch (opc2) {
    case 0x3c6: // fmrgew floating merge even word
@@ -8928,14 +9066,14 @@ static Bool dis_fp_merge ( UInt theInstr )
       return False;
    }
 
-   putFReg( frD_addr, mkexpr(frD) );
+   putFReg( frD_addr, mkexpr(frD), guest_is_BE );
    return True;
 }
 
 /*
   Floating Point Move Instructions
 */
-static Bool dis_fp_move ( UInt theInstr )
+static Bool dis_fp_move ( UInt theInstr, Bool guest_is_BE )
 {
    /* X-Form */
    UChar opc1     = ifieldOPC(theInstr);
@@ -8957,7 +9095,7 @@ static Bool dis_fp_move ( UInt theInstr )
       return False;
    }
 
-   assign( frB, getFReg(frB_addr));
+   assign( frB, getFReg(frB_addr, guest_is_BE));
 
    switch (opc2) {
    case 0x008: // fcpsgn (Floating Copy Sign, ISA_V2.05 p126)
@@ -8967,7 +9105,7 @@ static Bool dis_fp_move ( UInt theInstr )
       hiD = newTemp(Ity_I32);
       itmpB = newTemp(Ity_I64);
       frA = newTemp(Ity_F64);
-      assign( frA, getFReg(frA_addr) );
+      assign( frA, getFReg(frA_addr, guest_is_BE) );
 
       /* get A's sign bit */
       assign(signA, binop(Iop_And32,
@@ -9018,7 +9156,7 @@ static Bool dis_fp_move ( UInt theInstr )
       return False;
    }
 
-   putFReg( frD_addr, mkexpr(frD) );
+   putFReg( frD_addr, mkexpr(frD), guest_is_BE );
 
    /* None of these change FPRF.  cr1 is set in the usual way though,
       if flag_rC is set. */
@@ -9036,7 +9174,7 @@ static Bool dis_fp_move ( UInt theInstr )
 /*
   Floating Point Status/Control Register Instructions
 */
-static Bool dis_fp_scr ( UInt theInstr, Bool GX_level )
+static Bool dis_fp_scr ( UInt theInstr, Bool GX_level, Bool guest_is_BE )
 {
    /* Many forms - see each switch case */
    UChar opc1    = ifieldOPC(theInstr);
@@ -9138,8 +9276,9 @@ static Bool dis_fp_scr ( UInt theInstr, Bool GX_level )
       }
       DIP("mffs%s fr%u\n", flag_rC ? ".":"", frD_addr);
       putFReg( frD_addr,
-          unop( Iop_ReinterpI64asF64,
-                binop( Iop_32HLto64, fpscr_upper, fpscr_lower ) ) );
+               unop( Iop_ReinterpI64asF64,
+                     binop( Iop_32HLto64, fpscr_upper, fpscr_lower ) ),
+               guest_is_BE );
       break;
    }
 
@@ -9195,7 +9334,7 @@ static Bool dis_fp_scr ( UInt theInstr, Bool GX_level )
             }
          }
       }
-      assign( frB, getFReg(frB_addr));
+      assign( frB, getFReg(frB_addr, guest_is_BE));
       assign( rB_64, unop( Iop_ReinterpF64asI64, mkexpr( frB ) ) );
       putGST_masked( PPC_GST_FPSCR, mkexpr( rB_64 ), mask );
       break;
@@ -9855,7 +9994,7 @@ static IRExpr * Check_unordered(IRExpr * val)
 /*------------------------------------------------------------*/
 
 /* DFP Arithmetic instructions */
-static Bool dis_dfp_arith(UInt theInstr)
+static Bool dis_dfp_arith(UInt theInstr, Bool guest_is_BE)
 {
    UInt opc2 = ifieldOPClo10( theInstr );
    UChar frS_addr = ifieldRegDS( theInstr );
@@ -9876,8 +10015,8 @@ static Bool dis_dfp_arith(UInt theInstr)
     */
    Bool clear_CR1 = True;
 
-   assign( frA, getDReg( frA_addr ) );
-   assign( frB, getDReg( frB_addr ) );
+   assign( frA, getDReg( frA_addr, guest_is_BE ) );
+   assign( frB, getDReg( frB_addr, guest_is_BE ) );
 
    switch (opc2) {
    case 0x2: // dadd
@@ -9902,7 +10041,7 @@ static Bool dis_dfp_arith(UInt theInstr)
       break;
    }
 
-   putDReg( frS_addr, mkexpr( frS ) );
+   putDReg( frS_addr, mkexpr( frS), guest_is_BE );
 
    if (flag_rC && clear_CR1) {
       putCR321( 1, mkU8( 0 ) );
@@ -9913,7 +10052,7 @@ static Bool dis_dfp_arith(UInt theInstr)
 }
 
 /* Quad DFP Arithmetic instructions */
-static Bool dis_dfp_arithq(UInt theInstr)
+static Bool dis_dfp_arithq(UInt theInstr, Bool guest_is_BE)
 {
    UInt opc2 = ifieldOPClo10( theInstr );
    UChar frS_addr = ifieldRegDS( theInstr );
@@ -9934,8 +10073,8 @@ static Bool dis_dfp_arithq(UInt theInstr)
     */
    Bool clear_CR1 = True;
 
-   assign( frA, getDReg_pair( frA_addr ) );
-   assign( frB, getDReg_pair( frB_addr ) );
+   assign( frA, getDReg_pair( frA_addr, guest_is_BE ) );
+   assign( frB, getDReg_pair( frB_addr, guest_is_BE ) );
 
    switch (opc2) {
    case 0x2: // daddq
@@ -9960,7 +10099,7 @@ static Bool dis_dfp_arithq(UInt theInstr)
       break;
    }
 
-   putDReg_pair( frS_addr, mkexpr( frS ) );
+   putDReg_pair( frS_addr, mkexpr( frS ), guest_is_BE );
 
    if (flag_rC && clear_CR1) {
       putCR321( 1, mkU8( 0 ) );
@@ -9971,7 +10110,7 @@ static Bool dis_dfp_arithq(UInt theInstr)
 }
 
 /* DFP 64-bit logical shift instructions  */
-static Bool dis_dfp_shift(UInt theInstr) {
+static Bool dis_dfp_shift(UInt theInstr, Bool guest_is_BE) {
    UInt opc2       = ifieldOPClo9( theInstr );
    UChar frS_addr  = ifieldRegDS( theInstr );
    UChar frA_addr  = ifieldRegA( theInstr );
@@ -9982,7 +10121,7 @@ static Bool dis_dfp_shift(UInt theInstr) {
    IRTemp frS = newTemp( Ity_D64 );
    Bool clear_CR1 = True;
 
-   assign( frA, getDReg( frA_addr ) );
+   assign( frA, getDReg( frA_addr, guest_is_BE ) );
 
    switch (opc2) {
    case 0x42: // dscli
@@ -9997,7 +10136,7 @@ static Bool dis_dfp_shift(UInt theInstr) {
       break;
    }
 
-   putDReg( frS_addr, mkexpr( frS ) );
+   putDReg( frS_addr, mkexpr( frS ), guest_is_BE );
 
    if (flag_rC && clear_CR1) {
       putCR321( 1, mkU8( 0 ) );
@@ -10008,7 +10147,7 @@ static Bool dis_dfp_shift(UInt theInstr) {
 }
 
 /* Quad DFP  logical shift instructions  */
-static Bool dis_dfp_shiftq(UInt theInstr) {
+static Bool dis_dfp_shiftq(UInt theInstr, Bool guest_is_BE) {
    UInt opc2       = ifieldOPClo9( theInstr );
    UChar frS_addr  = ifieldRegDS( theInstr );
    UChar frA_addr  = ifieldRegA( theInstr );
@@ -10019,7 +10158,7 @@ static Bool dis_dfp_shiftq(UInt theInstr) {
    IRTemp frS = newTemp( Ity_D128 );
    Bool clear_CR1 = True;
 
-   assign( frA, getDReg_pair( frA_addr ) );
+   assign( frA, getDReg_pair( frA_addr, guest_is_BE ) );
 
    switch (opc2) {
    case 0x42: // dscliq
@@ -10034,7 +10173,7 @@ static Bool dis_dfp_shiftq(UInt theInstr) {
       break;
    }
 
-   putDReg_pair( frS_addr, mkexpr( frS ) );
+   putDReg_pair( frS_addr, mkexpr( frS ), guest_is_BE );
 
    if (flag_rC && clear_CR1) {
       putCR321( 1, mkU8( 0 ) );
@@ -10045,7 +10184,7 @@ static Bool dis_dfp_shiftq(UInt theInstr) {
 }
 
 /* DFP 64-bit format conversion instructions */
-static Bool dis_dfp_fmt_conv(UInt theInstr) {
+static Bool dis_dfp_fmt_conv(UInt theInstr, Bool guest_is_BE) {
    UInt opc2      = ifieldOPClo10( theInstr );
    UChar frS_addr = ifieldRegDS( theInstr );
    UChar frB_addr = ifieldRegB( theInstr );
@@ -10062,18 +10201,18 @@ static Bool dis_dfp_fmt_conv(UInt theInstr) {
 
       frB = newTemp( Ity_D32 );
       frS = newTemp( Ity_D64 );
-      assign( frB, getDReg32( frB_addr ) );
+      assign( frB, getDReg32( frB_addr, guest_is_BE ) );
       assign( frS, unop( Iop_D32toD64, mkexpr( frB ) ) );
-      putDReg( frS_addr, mkexpr( frS ) );
+      putDReg( frS_addr, mkexpr( frS), guest_is_BE );
       break;
    case 0x302: // drsp
       DIP( "drsp%s fr%u,fr%u\n",
            flag_rC ? ".":"", frS_addr, frB_addr );
       frB = newTemp( Ity_D64 );
       frS = newTemp( Ity_D32 );
-      assign( frB, getDReg( frB_addr ) );
+      assign( frB, getDReg( frB_addr, guest_is_BE ) );
       assign( frS, binop( Iop_D64toD32, round, mkexpr( frB ) ) );
-      putDReg32( frS_addr, mkexpr( frS ) );
+      putDReg32( frS_addr, mkexpr( frS ), guest_is_BE );
       break;
    case 0x122: // dctfix
       {
@@ -10083,10 +10222,10 @@ static Bool dis_dfp_fmt_conv(UInt theInstr) {
               flag_rC ? ".":"", frS_addr, frB_addr );
          frB = newTemp( Ity_D64 );
          frS = newTemp( Ity_D64 );
-         assign( frB, getDReg( frB_addr ) );
+         assign( frB, getDReg( frB_addr, guest_is_BE ) );
          assign( tmp, binop( Iop_D64toI64S, round, mkexpr( frB ) ) );
          assign( frS, unop( Iop_ReinterpI64asD64, mkexpr( tmp ) ) );
-         putDReg( frS_addr, mkexpr( frS ) );
+         putDReg( frS_addr, mkexpr( frS ), guest_is_BE );
       }
       break;
    case 0x322: // dcffix
@@ -10094,11 +10233,11 @@ static Bool dis_dfp_fmt_conv(UInt theInstr) {
            flag_rC ? ".":"", frS_addr, frB_addr );
       frB = newTemp( Ity_D64 );
       frS = newTemp( Ity_D64 );
-      assign( frB, getDReg( frB_addr ) );
+      assign( frB, getDReg( frB_addr, guest_is_BE ) );
       assign( frS, binop( Iop_I64StoD64,
                           round,
                           unop( Iop_ReinterpD64asI64, mkexpr( frB ) ) ) );
-      putDReg( frS_addr, mkexpr( frS ) );
+      putDReg( frS_addr, mkexpr( frS ), guest_is_BE );
       break;
    }
 
@@ -10111,7 +10250,7 @@ static Bool dis_dfp_fmt_conv(UInt theInstr) {
 }
 
 /* Quad DFP format conversion instructions */
-static Bool dis_dfp_fmt_convq(UInt theInstr) {
+static Bool dis_dfp_fmt_convq(UInt theInstr, Bool guest_is_BE) {
    UInt opc2      = ifieldOPClo10( theInstr );
    UChar frS_addr = ifieldRegDS( theInstr );
    UChar frB_addr = ifieldRegB( theInstr );
@@ -10127,9 +10266,9 @@ static Bool dis_dfp_fmt_convq(UInt theInstr) {
    case 0x102: // dctqpq
       DIP( "dctqpq%s fr%u,fr%u\n",
            flag_rC ? ".":"", frS_addr, frB_addr );
-      assign( frB64, getDReg( frB_addr ) );
+      assign( frB64, getDReg( frB_addr, guest_is_BE ) );
       assign( frS128, unop( Iop_D64toD128, mkexpr( frB64 ) ) );
-      putDReg_pair( frS_addr, mkexpr( frS128 ) );
+      putDReg_pair( frS_addr, mkexpr( frS128 ), guest_is_BE );
       break;
    case 0x122: // dctfixq
       {
@@ -10137,18 +10276,18 @@ static Bool dis_dfp_fmt_convq(UInt theInstr) {
 
          DIP( "dctfixq%s fr%u,fr%u\n",
               flag_rC ? ".":"", frS_addr, frB_addr );
-         assign( frB128, getDReg_pair( frB_addr ) );
+         assign( frB128, getDReg_pair( frB_addr, guest_is_BE ) );
          assign( tmp, binop( Iop_D128toI64S, round, mkexpr( frB128 ) ) );
          assign( frS64, unop( Iop_ReinterpI64asD64, mkexpr( tmp ) ) );
-         putDReg( frS_addr, mkexpr( frS64 ) );
+         putDReg( frS_addr, mkexpr( frS64 ), guest_is_BE );
       }
       break;
    case 0x302: //drdpq
       DIP( "drdpq%s fr%u,fr%u\n",
            flag_rC ? ".":"", frS_addr, frB_addr );
-      assign( frB128, getDReg_pair( frB_addr ) );
+      assign( frB128, getDReg_pair( frB_addr, guest_is_BE ) );
       assign( frS64, binop( Iop_D128toD64, round, mkexpr( frB128 ) ) );
-      putDReg( frS_addr, mkexpr( frS64 ) );
+      putDReg( frS_addr, mkexpr( frS64 ), guest_is_BE );
       break;
    case 0x322: // dcffixq
      {
@@ -10158,11 +10297,11 @@ static Bool dis_dfp_fmt_convq(UInt theInstr) {
        */
       DIP( "dcffixq%s fr%u,fr%u\n",
            flag_rC ? ".":"", frS_addr, frB_addr );
-      assign( frB64, getDReg( frB_addr ) );
+      assign( frB64, getDReg( frB_addr, guest_is_BE ) );
       assign( frS128, unop( Iop_I64StoD128,
                             unop( Iop_ReinterpD64asI64,
                                   mkexpr( frB64 ) ) ) );
-      putDReg_pair( frS_addr, mkexpr( frS128 ) );
+      putDReg_pair( frS_addr, mkexpr( frS128 ), guest_is_BE );
       break;
      }
    }
@@ -10175,7 +10314,7 @@ static Bool dis_dfp_fmt_convq(UInt theInstr) {
    return True;
 }
 
-static Bool dis_dfp_round( UInt theInstr ) {
+static Bool dis_dfp_round( UInt theInstr, Bool guest_is_BE ) {
    UChar frS_addr = ifieldRegDS(theInstr);
    UChar R        = IFIELD(theInstr, 16, 1);
    UChar RMC      = IFIELD(theInstr, 9, 2);
@@ -10201,11 +10340,11 @@ static Bool dis_dfp_round( UInt theInstr ) {
        * is zero.  The result is a floating point number.
        */
       /* pass the value of R and RMC in the same field */
-      assign( frB, getDReg( frB_addr ) );
+      assign( frB, getDReg( frB_addr, guest_is_BE ) );
       assign( frS, binop( Iop_RoundD64toInt,
                           mkU32( ( R << 3 ) | RMC ),
                           mkexpr( frB ) ) );
-      putDReg( frS_addr, mkexpr( frS ) );
+      putDReg( frS_addr, mkexpr( frS ), guest_is_BE );
       break;
    default:
       vex_printf("dis_dfp_round(ppc)(opc2)\n");
@@ -10220,7 +10359,7 @@ static Bool dis_dfp_round( UInt theInstr ) {
    return True;
 }
 
-static Bool dis_dfp_roundq(UInt theInstr) {
+static Bool dis_dfp_roundq(UInt theInstr, Bool guest_is_BE) {
    UChar frS_addr = ifieldRegDS( theInstr );
    UChar frB_addr = ifieldRegB( theInstr );
    UChar R = IFIELD(theInstr, 16, 1);
@@ -10242,11 +10381,11 @@ static Bool dis_dfp_roundq(UInt theInstr) {
            flag_rC ? ".":"", frS_addr, frB_addr );
 
       /* pass the value of R and RMC in the same field */
-      assign( frB, getDReg_pair( frB_addr ) );
+      assign( frB, getDReg_pair( frB_addr, guest_is_BE ) );
       assign( frS, binop( Iop_RoundD128toInt,
                           mkU32( ( R << 3 ) | RMC ),
                           mkexpr( frB ) ) );
-      putDReg_pair( frS_addr, mkexpr( frS ) );
+      putDReg_pair( frS_addr, mkexpr( frS ), guest_is_BE );
       break;
    default:
       vex_printf("dis_dfp_roundq(ppc)(opc2)\n");
@@ -10261,7 +10400,7 @@ static Bool dis_dfp_roundq(UInt theInstr) {
    return True;
 }
 
-static Bool dis_dfp_quantize_sig_rrnd(UInt theInstr) {
+static Bool dis_dfp_quantize_sig_rrnd(UInt theInstr, Bool guest_is_BE) {
    UInt opc2 = ifieldOPClo8( theInstr );
    UChar frS_addr = ifieldRegDS( theInstr );
    UChar frA_addr = ifieldRegA( theInstr );
@@ -10275,7 +10414,7 @@ static Bool dis_dfp_quantize_sig_rrnd(UInt theInstr) {
    IRTemp frS = newTemp( Ity_D64 );
    Bool clear_CR1 = True;
 
-   assign( frB, getDReg( frB_addr ) );
+   assign( frB, getDReg( frB_addr, guest_is_BE ) );
 
    switch (opc2) {
    case 0x43: // dquai
@@ -10317,7 +10456,7 @@ static Bool dis_dfp_quantize_sig_rrnd(UInt theInstr) {
    case 0x3: // dqua
       DIP( "dqua%s fr%u,fr%u,fr%u\n",
            flag_rC ? ".":"", frS_addr, frA_addr, frB_addr );
-      assign( frA, getDReg( frA_addr ) );
+      assign( frA, getDReg( frA_addr, guest_is_BE ) );
       assign( frS, triop( Iop_QuantizeD64,
                           mkU32( RMC ),
                           mkexpr( frA ),
@@ -10329,7 +10468,7 @@ static Bool dis_dfp_quantize_sig_rrnd(UInt theInstr) {
 
          DIP( "drrnd%s fr%u,fr%u,fr%u\n",
               flag_rC ? ".":"", frS_addr, frA_addr, frB_addr );
-         assign( frA, getDReg( frA_addr ) );
+         assign( frA, getDReg( frA_addr, guest_is_BE ) );
          /* Iop_64to8 not supported in 32 bit mode, do it in two steps. */
          assign( tmp, unop( Iop_32to8,
                             unop( Iop_64to32,
@@ -10345,7 +10484,7 @@ static Bool dis_dfp_quantize_sig_rrnd(UInt theInstr) {
       vex_printf("dis_dfp_quantize_sig_rrnd(ppc)(opc2)\n");
       return False;
    }
-   putDReg( frS_addr, mkexpr( frS ) );
+   putDReg( frS_addr, mkexpr( frS ), guest_is_BE );
 
    if (flag_rC && clear_CR1) {
       putCR321( 1, mkU8( 0 ) );
@@ -10355,7 +10494,7 @@ static Bool dis_dfp_quantize_sig_rrnd(UInt theInstr) {
    return True;
 }
 
-static Bool dis_dfp_quantize_sig_rrndq(UInt theInstr) {
+static Bool dis_dfp_quantize_sig_rrndq(UInt theInstr, Bool guest_is_BE) {
    UInt opc2 = ifieldOPClo8( theInstr );
    UChar frS_addr = ifieldRegDS( theInstr );
    UChar frA_addr = ifieldRegA( theInstr );
@@ -10369,7 +10508,7 @@ static Bool dis_dfp_quantize_sig_rrndq(UInt theInstr) {
    IRTemp frS = newTemp( Ity_D128 );
    Bool clear_CR1 = True;
 
-   assign( frB, getDReg_pair( frB_addr ) );
+   assign( frB, getDReg_pair( frB_addr, guest_is_BE ) );
 
    switch (opc2) {
    case 0x43: // dquaiq
@@ -10412,7 +10551,7 @@ static Bool dis_dfp_quantize_sig_rrndq(UInt theInstr) {
    case 0x3: // dquaq
       DIP( "dquaiq%s fr%u,fr%u,fr%u\n",
            flag_rC ? ".":"", frS_addr, frA_addr, frB_addr );
-      assign( frA, getDReg_pair( frA_addr ) );
+      assign( frA, getDReg_pair( frA_addr, guest_is_BE ) );
       assign( frS, triop( Iop_QuantizeD128,
                           mkU32( RMC ),
                           mkexpr( frA ),
@@ -10424,7 +10563,7 @@ static Bool dis_dfp_quantize_sig_rrndq(UInt theInstr) {
 
          DIP( "drrndq%s fr%u,fr%u,fr%u\n",
               flag_rC ? ".":"", frS_addr, frA_addr, frB_addr );
-         assign( frA, getDReg_pair( frA_addr ) );
+         assign( frA, getDReg_pair( frA_addr, guest_is_BE ) );
          assign( tmp, unop( Iop_32to8,
                             unop( Iop_64to32,
                                   unop( Iop_ReinterpD64asI64,
@@ -10440,7 +10579,7 @@ static Bool dis_dfp_quantize_sig_rrndq(UInt theInstr) {
       vex_printf("dis_dfp_quantize_sig_rrndq(ppc)(opc2)\n");
       return False;
    }
-   putDReg_pair( frS_addr, mkexpr( frS ) );
+   putDReg_pair( frS_addr, mkexpr( frS ), guest_is_BE );
 
    if (flag_rC && clear_CR1) {
       putCR321( 1, mkU8( 0 ) );
@@ -10450,7 +10589,7 @@ static Bool dis_dfp_quantize_sig_rrndq(UInt theInstr) {
    return True;
 }
 
-static Bool dis_dfp_extract_insert(UInt theInstr) {
+static Bool dis_dfp_extract_insert(UInt theInstr, Bool guest_is_BE) {
    UInt opc2 = ifieldOPClo10( theInstr );
    UChar frS_addr = ifieldRegDS( theInstr );
    UChar frA_addr = ifieldRegA( theInstr );
@@ -10463,8 +10602,8 @@ static Bool dis_dfp_extract_insert(UInt theInstr) {
    IRTemp frS = newTemp( Ity_D64 );
    IRTemp tmp = newTemp( Ity_I64 );
 
-   assign( frA, getDReg( frA_addr ) );
-   assign( frB, getDReg( frB_addr ) );
+   assign( frA, getDReg( frA_addr, guest_is_BE ) );
+   assign( frB, getDReg( frB_addr, guest_is_BE ) );
 
    switch (opc2) {
    case 0x162: // dxex
@@ -10486,7 +10625,7 @@ static Bool dis_dfp_extract_insert(UInt theInstr) {
       return False;
    }
 
-   putDReg( frS_addr, mkexpr( frS ) );
+   putDReg( frS_addr, mkexpr( frS ), guest_is_BE );
 
    if (flag_rC && clear_CR1) {
       putCR321( 1, mkU8( 0 ) );
@@ -10496,7 +10635,7 @@ static Bool dis_dfp_extract_insert(UInt theInstr) {
    return True;
 }
 
-static Bool dis_dfp_extract_insertq(UInt theInstr) {
+static Bool dis_dfp_extract_insertq(UInt theInstr, Bool guest_is_BE) {
    UInt opc2 = ifieldOPClo10( theInstr );
    UChar frS_addr = ifieldRegDS( theInstr );
    UChar frA_addr = ifieldRegA( theInstr );
@@ -10510,7 +10649,7 @@ static Bool dis_dfp_extract_insertq(UInt theInstr) {
    IRTemp tmp   = newTemp( Ity_I64 );
    Bool clear_CR1 = True;
 
-   assign( frB, getDReg_pair( frB_addr ) );
+   assign( frB, getDReg_pair( frB_addr, guest_is_BE ) );
 
    switch (opc2) {
    case 0x162:  // dxexq
@@ -10522,16 +10661,16 @@ static Bool dis_dfp_extract_insertq(UInt theInstr) {
        */
       assign( tmp, unop( Iop_ExtractExpD128, mkexpr( frB ) ) );
       assign( frS64, unop( Iop_ReinterpI64asD64, mkexpr( tmp ) ) );
-      putDReg( frS_addr, mkexpr( frS64 ) );
+      putDReg( frS_addr, mkexpr( frS64), guest_is_BE );
       break;
    case 0x362:  // diexq
       DIP( "diexq%s fr%u,fr%u,fr%u\n",
            flag_rC ? ".":"", frS_addr, frA_addr, frB_addr );
-      assign( frA, getDReg( frA_addr ) );
+      assign( frA, getDReg( frA_addr, guest_is_BE ) );
       assign( frS, binop( Iop_InsertExpD128,
                           unop( Iop_ReinterpD64asI64, mkexpr( frA ) ),
                           mkexpr( frB ) ) );
-      putDReg_pair( frS_addr, mkexpr( frS ) );
+      putDReg_pair( frS_addr, mkexpr( frS ), guest_is_BE );
       break;
    default:
       vex_printf("dis_dfp_extract_insertq(ppc)(opc2)\n");
@@ -10547,7 +10686,7 @@ static Bool dis_dfp_extract_insertq(UInt theInstr) {
 }
 
 /* DFP 64-bit comparison instructions */
-static Bool dis_dfp_compare(UInt theInstr) {
+static Bool dis_dfp_compare(UInt theInstr, Bool guest_is_BE) {
    /* X-Form */
    UChar crfD = toUChar( IFIELD( theInstr, 23, 3 ) ); // AKA BF
    UChar frA_addr = ifieldRegA( theInstr );
@@ -10568,8 +10707,8 @@ static Bool dis_dfp_compare(UInt theInstr) {
       frA = newTemp( Ity_D64 );
       frB = newTemp( Ity_D64 );
 
-      assign( frA, getDReg( frA_addr ) );
-      assign( frB, getDReg( frB_addr ) );
+      assign( frA, getDReg( frA_addr, guest_is_BE ) );
+      assign( frB, getDReg( frB_addr, guest_is_BE ) );
 
       assign( ccIR, binop( Iop_CmpD64, mkexpr( frA ), mkexpr( frB ) ) );
       break;
@@ -10578,8 +10717,8 @@ static Bool dis_dfp_compare(UInt theInstr) {
       frA = newTemp( Ity_D128 );
       frB = newTemp( Ity_D128 );
 
-      assign( frA, getDReg_pair( frA_addr ) );
-      assign( frB, getDReg_pair( frB_addr ) );
+      assign( frA, getDReg_pair( frA_addr, guest_is_BE ) );
+      assign( frB, getDReg_pair( frB_addr, guest_is_BE ) );
       assign( ccIR, binop( Iop_CmpD128, mkexpr( frA ), mkexpr( frB ) ) );
       break;
    default:
@@ -10621,7 +10760,7 @@ static Bool dis_dfp_compare(UInt theInstr) {
 }
 
 /* Test class/group/exponent/significance instructions. */
-static Bool dis_dfp_exponent_test ( UInt theInstr )
+static Bool dis_dfp_exponent_test ( UInt theInstr, Bool guest_is_BE )
 {
    UChar frA_addr   = ifieldRegA( theInstr );
    UChar frB_addr   = ifieldRegB( theInstr );
@@ -10655,8 +10794,8 @@ static Bool dis_dfp_exponent_test ( UInt theInstr )
    switch (opc1) {
    case 0x3b: // dtstex       Extended instruction setup
       DIP("dtstex %u,r%u,r%d\n", crfD, frA_addr, frB_addr);
-      assign( frA, getDReg( frA_addr ) );
-      assign( frB, getDReg( frB_addr ) );
+      assign( frA, getDReg( frA_addr , guest_is_BE) );
+      assign( frB, getDReg( frB_addr , guest_is_BE) );
       assign( gfield_mask, mkU32( DFP_G_FIELD_LONG_MASK ) );
       assign(exponent_A, unop( Iop_64to32,
                                unop( Iop_ExtractExpD64,
@@ -10668,8 +10807,8 @@ static Bool dis_dfp_exponent_test ( UInt theInstr )
 
    case 0x3F: //  dtstexq      Quad instruction setup
       DIP("dtstexq %u,r%u,r%d\n", crfD, frA_addr, frB_addr);
-      assign( frA128, getDReg_pair( frA_addr ) );
-      assign( frB128, getDReg_pair( frB_addr ) );
+      assign( frA128, getDReg_pair( frA_addr, guest_is_BE ) );
+      assign( frB128, getDReg_pair( frB_addr, guest_is_BE ) );
       assign( frA, unop( Iop_D128HItoD64, mkexpr( frA128 ) ) );
       assign( frB, unop( Iop_D128HItoD64, mkexpr( frB128 ) ) );
       assign( gfield_mask, mkU32( DFP_G_FIELD_EXTND_MASK ) );
@@ -10813,7 +10952,7 @@ static Bool dis_dfp_exponent_test ( UInt theInstr )
 }
 
 /* Test class/group/exponent/significance instructions. */
-static Bool dis_dfp_class_test ( UInt theInstr )
+static Bool dis_dfp_class_test ( UInt theInstr, Bool guest_is_BE )
 {
    UChar frA_addr   = ifieldRegA( theInstr );
    IRTemp frA       = newTemp( Ity_D64 );
@@ -10875,7 +11014,7 @@ static Bool dis_dfp_class_test ( UInt theInstr )
     *	 LT             | 0x01
     */
 
-   assign( frA, getDReg( frA_addr ) );
+   assign( frA, getDReg( frA_addr, guest_is_BE ) );
    assign( frAI64_hi, unop( Iop_ReinterpD64asI64, mkexpr( frA ) ) );
 
    assign( abs_frA, unop( Iop_ReinterpI64asD64,
@@ -10928,14 +11067,14 @@ static Bool dis_dfp_class_test ( UInt theInstr )
        * two instructions
        */
       assign( frAI64_lo, unop( Iop_ReinterpD64asI64,
-                               getDReg( frA_addr+1 ) ) );
+                               getDReg( frA_addr+1, guest_is_BE ) ) );
 
       assign( gfield_mask, mkU32( DFP_G_FIELD_EXTND_MASK ) );
       max_exp = DFP_EXTND_EXP_MAX;
       min_exp = DFP_EXTND_EXP_MIN;
       assign( exponent, unop( Iop_64to32, 
                               unop( Iop_ExtractExpD128,
-                                    getDReg_pair( frA_addr) ) ) );
+                                    getDReg_pair( frA_addr, guest_is_BE ) ) ) );
 
       /* create quand exponent for minimum normal number */
       assign( exp_min_normal, mkU64( 6176 - 6143 ) );
@@ -10957,13 +11096,13 @@ static Bool dis_dfp_class_test ( UInt theInstr )
                                          unop( Iop_ReinterpD64asI64,
                                                mkexpr( frA ) ),
                                          mkU64( 0x7FFFFFFFFFFFFFFFULL ) ) ),
-                            getDReg( frA_addr+1 ) ),
+                            getDReg( frA_addr+1, guest_is_BE ) ),
                      mkexpr( min_subnormalD128 ) ) );
       assign( ccIR_zero,
               binop( Iop_CmpD128,
                      binop( Iop_D64HLtoD128,
                             mkexpr( abs_frA ),
-                            getDReg( frA_addr+1 ) ),
+                            getDReg( frA_addr+1, guest_is_BE ) ),
                      unop( Iop_D64toD128,
                            unop( Iop_ReinterpI64asD64,
                                  mkU64( 0x0ULL ) ) ) ) );
@@ -11256,7 +11395,7 @@ static Bool dis_dfp_class_test ( UInt theInstr )
    return True;
 }
 
-static Bool dis_dfp_bcd(UInt theInstr) {
+static Bool dis_dfp_bcd(UInt theInstr, Bool guest_is_BE) {
    UInt opc2        = ifieldOPClo10( theInstr );
    ULong sp         = IFIELD(theInstr, 19, 2);
    ULong s          = IFIELD(theInstr, 20, 1);
@@ -11273,7 +11412,7 @@ static Bool dis_dfp_bcd(UInt theInstr) {
    IRTemp dbcd_l    = newTemp( Ity_I32 );
    IRTemp lmd       = newTemp( Ity_I32 );
 
-   assign( frB, getDReg( frB_addr ) );
+   assign( frB, getDReg( frB_addr, guest_is_BE ) );
    assign( frBI64, unop( Iop_ReinterpD64asI64, mkexpr( frB ) ) );
 
    switch ( opc2 ) {
@@ -11352,7 +11491,8 @@ static Bool dis_dfp_bcd(UInt theInstr) {
                                       mkU8( 4 ) ) ) ) );
       }
 
-      putDReg( frT_addr, unop( Iop_ReinterpI64asD64, mkexpr( result ) ) );
+      putDReg( frT_addr, unop( Iop_ReinterpI64asD64, mkexpr( result ) ),
+               guest_is_BE );
       break;
 
    case 0x342: // denbcd   DFP Encode BCD to DPD
@@ -11506,7 +11646,7 @@ static Bool dis_dfp_bcd(UInt theInstr) {
                                   binop( Iop_And32,
                                          mkU32( 0x0 ),
                                          mkexpr( invalid_mask ) ) ) ) ) );
-      putDReg( frT_addr, mkexpr( resultD64 ) );
+      putDReg( frT_addr, mkexpr( resultD64 ), guest_is_BE );
    }
    break;
    default:
@@ -11516,7 +11656,7 @@ static Bool dis_dfp_bcd(UInt theInstr) {
    return True;
 }
 
-static Bool dis_dfp_bcdq( UInt theInstr )
+static Bool dis_dfp_bcdq( UInt theInstr, Bool guest_is_BE )
 {
    UInt opc2        = ifieldOPClo10( theInstr );
    ULong sp         = IFIELD(theInstr, 19, 2);
@@ -11532,8 +11672,8 @@ static Bool dis_dfp_bcdq( UInt theInstr )
    IRTemp result_hi = newTemp( Ity_I64 );
    IRTemp result_lo = newTemp( Ity_I64 );
 
-   assign( frB_hi, getDReg( frB_addr ) );
-   assign( frB_lo, getDReg( frB_addr + 1 ) );
+   assign( frB_hi, getDReg( frB_addr, guest_is_BE ) );
+   assign( frB_lo, getDReg( frB_addr + 1, guest_is_BE ) );
    assign( frBI64_hi, unop( Iop_ReinterpD64asI64, mkexpr( frB_hi ) ) );
    assign( frBI64_lo, unop( Iop_ReinterpD64asI64, mkexpr( frB_lo ) ) );
 
@@ -11647,9 +11787,11 @@ static Bool dis_dfp_bcdq( UInt theInstr )
                                mkexpr( sign ) ) ) );
       }
 
-      putDReg( frT_addr, unop( Iop_ReinterpI64asD64, mkexpr( result_hi ) ) );
+      putDReg( frT_addr, unop( Iop_ReinterpI64asD64, mkexpr( result_hi ) ),
+	       guest_is_BE);
       putDReg( frT_addr + 1,
-               unop( Iop_ReinterpI64asD64, mkexpr( result_lo ) ) );
+               unop( Iop_ReinterpI64asD64, mkexpr( result_lo ) ),
+	       guest_is_BE);
    }
    break;
    case 0x342: // denbcdq   DFP Encode BCD to DPD
@@ -11912,9 +12054,11 @@ static Bool dis_dfp_bcdq( UInt theInstr )
                                    mkU32( 0x0 ),
                                    mkexpr( invalid_mask ) ) ) ) );
 
-      putDReg( frT_addr, unop( Iop_ReinterpI64asD64, mkexpr( result_hi ) ) );
+      putDReg( frT_addr, unop( Iop_ReinterpI64asD64, mkexpr( result_hi ) ),
+	       guest_is_BE);
       putDReg( frT_addr + 1,
-               unop( Iop_ReinterpI64asD64, mkexpr( result_lo ) ) );
+               unop( Iop_ReinterpI64asD64, mkexpr( result_lo ) ),
+               guest_is_BE );
 
    }
    break;
@@ -11925,7 +12069,7 @@ static Bool dis_dfp_bcdq( UInt theInstr )
    return True;
 }
 
-static Bool dis_dfp_significant_digits( UInt theInstr )
+static Bool dis_dfp_significant_digits( UInt theInstr, Bool guest_is_BE )
 {
    UChar frA_addr = ifieldRegA( theInstr );
    UChar frB_addr = ifieldRegB( theInstr );
@@ -11944,7 +12088,7 @@ static Bool dis_dfp_significant_digits( UInt theInstr )
    IRTemp KisZero_false_mask = newTemp( Ity_I32 );
 
    /* Get the reference singificance stored in frA */
-   assign( frA, getDReg( frA_addr ) );
+   assign( frA, getDReg( frA_addr, guest_is_BE ) );
 
    /* Convert from 64 bit to 8 bits in two steps.  The Iop_64to8 is not 
     * supported in 32-bit mode.
@@ -11967,7 +12111,7 @@ static Bool dis_dfp_significant_digits( UInt theInstr )
 
       DIP( "dtstsf %u,r%u,r%u\n", crfD, frA_addr, frB_addr );
 
-      assign( frB, getDReg( frB_addr ) );
+      assign( frB, getDReg( frB_addr, guest_is_BE ) );
       assign( frBI64, unop( Iop_ReinterpD64asI64, mkexpr( frB ) ) );
 
       /* Get the BCD string for the value stored in a series of I32 values.
@@ -12007,8 +12151,8 @@ static Bool dis_dfp_significant_digits( UInt theInstr )
 
       DIP( "dtstsfq %u,r%u,r%u\n", crfD, frA_addr, frB_addr );
 
-      assign( frB_hi, getDReg( frB_addr ) );
-      assign( frB_lo, getDReg( frB_addr + 1 ) );
+      assign( frB_hi, getDReg( frB_addr, guest_is_BE ) );
+      assign( frB_lo, getDReg( frB_addr + 1, guest_is_BE ) );
 
       assign( frBI64_hi, unop( Iop_ReinterpD64asI64, mkexpr( frB_hi ) ) );
       assign( frBI64_lo, unop( Iop_ReinterpD64asI64, mkexpr( frB_lo ) ) );
@@ -14767,8 +14911,9 @@ dis_vvec_cmp( UInt theInstr, UInt opc2 )
  * Miscellaneous VSX Scalar Instructions
  */
 static Bool
-dis_vxs_misc( UInt theInstr, UInt opc2 )
+dis_vxs_misc( UInt theInstr, UInt opc2, Bool guest_is_BE )
 {
+#define VG_PPC_SIGN_MASK 0x7fffffffffffffffULL
    /* XX3-Form and XX2-Form */
    UChar opc1 = ifieldOPC( theInstr );
    UChar XT = ifieldRegXT ( theInstr );
@@ -14795,7 +14940,20 @@ dis_vxs_misc( UInt theInstr, UInt opc2 )
       {
          /* Move abs val of dw 0 of VSX[XB] to dw 0 of VSX[XT]. */
          IRTemp absVal = newTemp(Ity_V128);
-         assign(absVal, binop(Iop_ShrV128, binop(Iop_ShlV128, mkexpr(vB), mkU8(1)), mkU8(1)));
+         if (!guest_is_BE) {
+            IRTemp hi64 = newTemp(Ity_I64);
+            IRTemp lo64 = newTemp(Ity_I64);
+            assign( hi64, unop( Iop_V128HIto64, mkexpr(vB) ) );
+            assign( lo64, unop( Iop_V128to64, mkexpr(vB) ) );
+            assign( absVal, binop( Iop_64HLtoV128,
+                                   binop( Iop_And64, mkexpr(hi64),
+                                          mkU64(VG_PPC_SIGN_MASK) ),
+                                   mkexpr(lo64) ) );
+         } else {
+            assign(absVal, binop(Iop_ShrV128,
+                                 binop(Iop_ShlV128, mkexpr(vB),
+                                       mkU8(1)), mkU8(1)));
+         }
          DIP("xsabsdp v%d,v%d\n", (UInt)XT, (UInt)XB);
          putVSReg(XT, mkexpr(absVal));
          break;
@@ -14803,51 +14961,73 @@ dis_vxs_misc( UInt theInstr, UInt opc2 )
       case 0x2C0: // xscpsgndp
       {
          /* Scalar copy sign double-precision */
-         IRTemp vecA_signbit = newTemp(Ity_V128);
-         IRTemp vecB_no_signbit = newTemp(Ity_V128);
+         IRTemp vecA_signed = newTemp(Ity_I64);
+         IRTemp vecB_unsigned = newTemp(Ity_I64);
          IRTemp vec_result = newTemp(Ity_V128);
          DIP("xscpsgndp v%d,v%d v%d\n", (UInt)XT, (UInt)XA, (UInt)XB);
-         assign( vecB_no_signbit, binop( Iop_ShrV128, binop( Iop_ShlV128,
-                                                             mkexpr( vB ),
-                                                             mkU8( 1 ) ),
-                                         mkU8( 1 ) ) );
-         assign( vecA_signbit, binop( Iop_ShlV128, binop( Iop_ShrV128,
-                                                          mkexpr( vA ),
-                                                          mkU8( 127 ) ),
-                                      mkU8( 127 ) ) );
-         assign( vec_result, binop( Iop_OrV128, mkexpr(vecA_signbit), mkexpr( vecB_no_signbit ) ) );
+         assign( vecA_signed, binop( Iop_And64,
+                                     unop( Iop_V128HIto64,
+                                           mkexpr(vA)),
+                                           mkU64(~VG_PPC_SIGN_MASK) ) );
+         assign( vecB_unsigned, binop( Iop_And64,
+                                       unop( Iop_V128HIto64,
+                                             mkexpr(vB) ),
+                                             mkU64(VG_PPC_SIGN_MASK) ) );
+         assign( vec_result, binop( Iop_64HLtoV128,
+                                    binop( Iop_Or64,
+                                           mkexpr(vecA_signed),
+                                           mkexpr(vecB_unsigned) ),
+                                    mkU64(0x0ULL)));
          putVSReg(XT, mkexpr(vec_result));
          break;
       }
       case 0x2D2: // xsnabsdp
       {
          /* Scalar negative absolute value double-precision */
-         IRTemp vec_neg_signbit = newTemp(Ity_V128);
+         IRTemp BHi_signed = newTemp(Ity_I64);
          DIP("xsnabsdp v%d,v%d\n", (UInt)XT, (UInt)XB);
-         assign( vec_neg_signbit, unop( Iop_NotV128, binop( Iop_ShrV128,
-                                                            mkV128( 0xffff ),
-                                                            mkU8( 1 ) ) ) );
-         putVSReg(XT, binop(Iop_OrV128, mkexpr(vec_neg_signbit), mkexpr(vB)));
+         assign( BHi_signed, binop( Iop_Or64,
+                                    unop( Iop_V128HIto64,
+                                          mkexpr(vB) ),
+                                          mkU64(~VG_PPC_SIGN_MASK) ) );
+         putVSReg(XT, binop( Iop_64HLtoV128,
+                             mkexpr(BHi_signed), mkU64(0x0ULL) ) );
          break;
       }
       case 0x2F2: // xsnegdp
       {
          /* Scalar negate double-precision */
-         IRTemp vecB_no_signbit = newTemp(Ity_V128);
-         IRTemp vecB_signbit_comp = newTemp(Ity_V128);
+         IRTemp BHi_signed = newTemp(Ity_I64);
+         IRTemp BHi_unsigned = newTemp(Ity_I64);
+         IRTemp BHi_negated = newTemp(Ity_I64);
+         IRTemp BHi_negated_signbit = newTemp(Ity_I1);
+         IRTemp vec_result = newTemp(Ity_V128);
          DIP("xsnabsdp v%d,v%d\n", (UInt)XT, (UInt)XB);
-         assign( vecB_no_signbit, binop( Iop_ShrV128, binop( Iop_ShlV128,
-                                                             mkexpr( vB ),
-                                                             mkU8( 1 ) ),
-                                         mkU8( 1 ) ) );
-         assign( vecB_signbit_comp, binop( Iop_ShlV128,
-                                           unop( Iop_NotV128,
-                                                 binop( Iop_ShrV128,
-                                                        mkexpr( vB ),
-                                                        mkU8( 127 ) ) ),
-                                           mkU8( 127 ) ) );
-         putVSReg( XT, binop( Iop_OrV128, mkexpr( vecB_no_signbit ),
-                              mkexpr( vecB_signbit_comp ) ) );
+         assign( BHi_signed, unop( Iop_V128HIto64, mkexpr(vB) ) );
+         assign( BHi_unsigned, binop( Iop_And64, mkexpr(BHi_signed),
+                                      mkU64(VG_PPC_SIGN_MASK) ) );
+         assign( BHi_negated_signbit,
+                 unop( Iop_Not1,
+                       unop( Iop_32to1,
+                             binop( Iop_Shr32,
+                                    unop( Iop_64HIto32,
+                                          binop( Iop_And64,
+                                                 mkexpr(BHi_signed),
+                                                 mkU64(~VG_PPC_SIGN_MASK) )
+                                          ),
+                                    mkU8(31) ) ) ) );
+         assign( BHi_negated,
+                 binop( Iop_Or64,
+                        binop( Iop_32HLto64,
+                               binop( Iop_Shl32,
+                                      unop( Iop_1Uto32,
+                                            mkexpr(BHi_negated_signbit) ),
+                                      mkU8(31) ),
+                               mkU32(0) ),
+                        mkexpr(BHi_unsigned) ) );
+         assign( vec_result, binop( Iop_64HLtoV128, mkexpr(BHi_negated),
+                                    mkU64(0x0ULL)));
+         putVSReg( XT, mkexpr(vec_result));
          break;
       }
       case 0x280: // xsmaxdp (VSX Scalar Maximum Double-Precision)
@@ -15048,7 +15228,7 @@ dis_vx_logic ( UInt theInstr, UInt opc2 )
  * NOTE: VSX supports word-aligned storage access.
  */
 static Bool
-dis_vx_load ( UInt theInstr )
+dis_vx_load ( UInt theInstr, Bool guest_is_BE )
 {
    /* XX1-Form */
    UChar opc1 = ifieldOPC( theInstr );
@@ -15072,7 +15252,7 @@ dis_vx_load ( UInt theInstr )
    {
       IRExpr * exp;
       DIP("lxsiwzx %d,r%u,r%u\n", (UInt)XT, rA_addr, rB_addr);
-      exp = unop( Iop_64HIto32, load( Ity_I64, mkexpr( EA ) ) );
+      exp = unop( Iop_64HIto32, load( Ity_I64, mkexpr( EA ), guest_is_BE ) );
       putVSReg( XT, binop( Iop_64HLtoV128,
                            unop( Iop_32Uto64, exp),
                            mkU64(0) ) );
@@ -15082,7 +15262,7 @@ dis_vx_load ( UInt theInstr )
    {
       IRExpr * exp;
       DIP("lxsiwax %d,r%u,r%u\n", (UInt)XT, rA_addr, rB_addr);
-      exp = unop( Iop_64HIto32, load( Ity_I64, mkexpr( EA ) ) );
+      exp = unop( Iop_64HIto32, load( Ity_I64, mkexpr( EA ), guest_is_BE ) );
       putVSReg( XT, binop( Iop_64HLtoV128,
                            unop( Iop_32Sto64, exp),
                            mkU64(0) ) );
@@ -15099,7 +15279,7 @@ dis_vx_load ( UInt theInstr )
       exp = unop( Iop_ReinterpF64asI64,
                   unop( Iop_F32toF64,
                         unop( Iop_ReinterpI32asF32,
-                              load( Ity_I32, mkexpr( EA ) ) ) ) );
+                              load( Ity_I32, mkexpr( EA ), guest_is_BE ) ) ) );
 
       putVSReg( XT, binop( Iop_64HLtoV128, exp, mkU64( 0 ) ) );
       break;
@@ -15108,7 +15288,7 @@ dis_vx_load ( UInt theInstr )
    {
       IRExpr * exp;
       DIP("lxsdx %d,r%u,r%u\n", (UInt)XT, rA_addr, rB_addr);
-      exp = load( Ity_I64, mkexpr( EA ) );
+      exp = load( Ity_I64, mkexpr( EA ), guest_is_BE );
       // We need to pass an expression of type Ity_V128 with putVSReg, but the load
       // we just performed is only a DW.  But since the contents of VSR[XT] element 1
       // are undefined after this operation, we can just do a splat op.
@@ -15122,10 +15302,10 @@ dis_vx_load ( UInt theInstr )
       ULong ea_off = 8;
       IRExpr* high_addr;
       DIP("lxvd2x %d,r%u,r%u\n", (UInt)XT, rA_addr, rB_addr);
-      high = load( Ity_I64, mkexpr( EA ) );
+      high = load( Ity_I64, mkexpr( EA ), guest_is_BE );
       high_addr = binop( addOp, mkexpr( EA ), ty == Ity_I64 ? mkU64( ea_off )
             : mkU32( ea_off ) );
-      low = load( Ity_I64, high_addr );
+      low = load( Ity_I64, high_addr, guest_is_BE );
       putVSReg( XT, binop( Iop_64HLtoV128, high, low ) );
       break;
    }
@@ -15133,7 +15313,7 @@ dis_vx_load ( UInt theInstr )
    {
       IRTemp data = newTemp(Ity_I64);
       DIP("lxvdsx %d,r%u,r%u\n", (UInt)XT, rA_addr, rB_addr);
-      assign( data, load( Ity_I64, mkexpr( EA ) ) );
+      assign( data, load( Ity_I64, mkexpr( EA ), guest_is_BE ) );
       putVSReg( XT, binop( Iop_64HLtoV128, mkexpr( data ), mkexpr( data ) ) );
       break;
    }
@@ -15144,19 +15324,19 @@ dis_vx_load ( UInt theInstr )
       IRExpr* irx_addr;
 
       DIP("lxvw4x %d,r%u,r%u\n", (UInt)XT, rA_addr, rB_addr);
-      t3 = load( Ity_I32,  mkexpr( EA ) );
+      t3 = load( Ity_I32,  mkexpr( EA ), guest_is_BE );
       ea_off += 4;
       irx_addr = binop( mkSzOp( ty, Iop_Add8 ), mkexpr( EA ),
                         ty == Ity_I64 ? mkU64( ea_off ) : mkU32( ea_off ) );
-      t2 = load( Ity_I32, irx_addr );
+      t2 = load( Ity_I32, irx_addr, guest_is_BE );
       ea_off += 4;
       irx_addr = binop( mkSzOp( ty, Iop_Add8 ), mkexpr( EA ),
                         ty == Ity_I64 ? mkU64( ea_off ) : mkU32( ea_off ) );
-      t1 = load( Ity_I32, irx_addr );
+      t1 = load( Ity_I32, irx_addr, guest_is_BE );
       ea_off += 4;
       irx_addr = binop( mkSzOp( ty, Iop_Add8 ), mkexpr( EA ),
                         ty == Ity_I64 ? mkU64( ea_off ) : mkU32( ea_off ) );
-      t0 = load( Ity_I32, irx_addr );
+      t0 = load( Ity_I32, irx_addr, guest_is_BE );
       putVSReg( XT, binop( Iop_64HLtoV128, binop( Iop_32HLto64, t3, t2 ),
                            binop( Iop_32HLto64, t1, t0 ) ) );
       break;
@@ -15173,7 +15353,7 @@ dis_vx_load ( UInt theInstr )
  * NOTE: VSX supports word-aligned storage access.
  */
 static Bool
-dis_vx_store ( UInt theInstr )
+dis_vx_store ( UInt theInstr, Bool guest_is_BE )
 {
    /* XX1-Form */
    UChar opc1 = ifieldOPC( theInstr );
@@ -15204,7 +15384,7 @@ dis_vx_store ( UInt theInstr )
       DIP("stxsiwx %d,r%u,r%u\n", (UInt)XS, rA_addr, rB_addr);
       high64 = unop( Iop_V128HIto64, mkexpr( vS ) );
       low32  = unop( Iop_64to32, high64 );
-      store( mkexpr( EA ), low32 );
+      store( mkexpr( EA ), low32, guest_is_BE );
       break;
    }
    case 0x28C:
@@ -15217,7 +15397,7 @@ dis_vx_store ( UInt theInstr )
       assign(val32, unop( Iop_ReinterpF32asI32,
                           unop( Iop_TruncF64asF32,
                                 mkexpr(high64) ) ) );
-      store( mkexpr( EA ), mkexpr( val32 ) );
+      store( mkexpr( EA ), mkexpr( val32 ), guest_is_BE );
       break;
    }
    case 0x2CC:
@@ -15225,7 +15405,7 @@ dis_vx_store ( UInt theInstr )
       IRExpr * high64;
       DIP("stxsdx %d,r%u,r%u\n", (UInt)XS, rA_addr, rB_addr);
       high64 = unop( Iop_V128HIto64, mkexpr( vS ) );
-      store( mkexpr( EA ), high64 );
+      store( mkexpr( EA ), high64, guest_is_BE );
       break;
    }
    case 0x3CC:
@@ -15234,9 +15414,10 @@ dis_vx_store ( UInt theInstr )
       DIP("stxvd2x %d,r%u,r%u\n", (UInt)XS, rA_addr, rB_addr);
       high64 = unop( Iop_V128HIto64, mkexpr( vS ) );
       low64 = unop( Iop_V128to64, mkexpr( vS ) );
-      store( mkexpr( EA ), high64 );
+      store( mkexpr( EA ), high64, guest_is_BE );
       store( binop( mkSzOp( ty, Iop_Add8 ), mkexpr( EA ),
-                    ty == Ity_I64 ? mkU64( 8 ) : mkU32( 8 ) ), low64 );
+                    ty == Ity_I64 ? mkU64( 8 ) : mkU32( 8 ) ), low64,
+	     guest_is_BE );
       break;
    }
    case 0x38C:
@@ -15252,19 +15433,20 @@ dis_vx_store ( UInt theInstr )
       // quad-word aligned.  Therefore, do 4 individual word-size stores.
       assign( hi64, unop( Iop_V128HIto64, mkexpr( vS ) ) );
       assign( lo64, unop( Iop_V128to64, mkexpr( vS ) ) );
-      store( mkexpr( EA ), unop( Iop_64HIto32, mkexpr( hi64 ) ) );
+      store( mkexpr( EA ), unop( Iop_64HIto32, mkexpr( hi64 ) ),
+             guest_is_BE );
       ea_off += 4;
       irx_addr = binop( mkSzOp( ty, Iop_Add8 ), mkexpr( EA ),
                         ty == Ity_I64 ? mkU64( ea_off ) : mkU32( ea_off ) );
-      store( irx_addr, unop( Iop_64to32, mkexpr( hi64 ) ) );
+      store( irx_addr, unop( Iop_64to32, mkexpr( hi64 ) ), guest_is_BE );
       ea_off += 4;
       irx_addr = binop( mkSzOp( ty, Iop_Add8 ), mkexpr( EA ),
                         ty == Ity_I64 ? mkU64( ea_off ) : mkU32( ea_off ) );
-      store( irx_addr, unop( Iop_64HIto32, mkexpr( lo64 ) ) );
+      store( irx_addr, unop( Iop_64HIto32, mkexpr( lo64 ) ), guest_is_BE );
       ea_off += 4;
       irx_addr = binop( mkSzOp( ty, Iop_Add8 ), mkexpr( EA ),
                         ty == Ity_I64 ? mkU64( ea_off ) : mkU32( ea_off ) );
-      store( irx_addr, unop( Iop_64to32, mkexpr( lo64 ) ) );
+      store( irx_addr, unop( Iop_64to32, mkexpr( lo64 ) ), guest_is_BE );
 
       break;
    }
@@ -15399,7 +15581,7 @@ dis_vx_permute_misc( UInt theInstr, UInt opc2 )
 /*
   AltiVec Load Instructions
 */
-static Bool dis_av_load ( VexAbiInfo* vbi, UInt theInstr )
+static Bool dis_av_load ( VexAbiInfo* vbi, UInt theInstr, Bool guest_is_BE )
 {
    /* X-Form */
    UChar opc1     = ifieldOPC(theInstr);
@@ -15426,24 +15608,39 @@ static Bool dis_av_load ( VexAbiInfo* vbi, UInt theInstr )
    case 0x006: { // lvsl (Load Vector for Shift Left, AV p123)
       IRDirty* d;
       UInt vD_off = vectorGuestRegOffset(vD_addr);
-      IRExpr** args = mkIRExprVec_4(
+      IRExpr** args_be = mkIRExprVec_5(
+                         IRExpr_BBPTR(),
+                         mkU32(vD_off),
+                         binop(Iop_And32, mkNarrowTo32(ty, mkexpr(EA)),
+                                          mkU32(0xF)),
+                         mkU32(0)/*left*/,
+                         mkU32(1)/*Big Endian*/);
+      IRExpr** args_le = mkIRExprVec_5(
                          IRExpr_BBPTR(),
-                         mkU32(vD_off), 
+                         mkU32(vD_off),
                          binop(Iop_And32, mkNarrowTo32(ty, mkexpr(EA)),
                                           mkU32(0xF)),
-                         mkU32(0)/*left*/ );
+                         mkU32(0)/*left*/,
+                         mkU32(0)/*Little Endian*/);
       if (!mode64) {
          d = unsafeIRDirty_0_N (
                         0/*regparms*/, 
                         "ppc32g_dirtyhelper_LVS",
                         fnptr_to_fnentry(vbi, &ppc32g_dirtyhelper_LVS),
-                        args );
+                        args_be );
       } else {
-         d = unsafeIRDirty_0_N (
-                        0/*regparms*/, 
-                        "ppc64g_dirtyhelper_LVS",
-                        fnptr_to_fnentry(vbi, &ppc64g_dirtyhelper_LVS),
-                        args );
+         if (guest_is_BE)
+            d = unsafeIRDirty_0_N (
+                           0/*regparms*/,
+                           "ppc64g_dirtyhelper_LVS",
+                           fnptr_to_fnentry(vbi, &ppc64g_dirtyhelper_LVS),
+                           args_be );
+	else
+            d = unsafeIRDirty_0_N (
+                           0/*regparms*/,
+                           "ppc64g_dirtyhelper_LVS",
+                           &ppc64g_dirtyhelper_LVS,
+                           args_le );
       }
       DIP("lvsl v%d,r%u,r%u\n", vD_addr, rA_addr, rB_addr);
       /* declare guest state effects */
@@ -15460,24 +15657,40 @@ static Bool dis_av_load ( VexAbiInfo* vbi, UInt theInstr )
    case 0x026: { // lvsr (Load Vector for Shift Right, AV p125)
       IRDirty* d;
       UInt vD_off = vectorGuestRegOffset(vD_addr);
-      IRExpr** args = mkIRExprVec_4(
-                         IRExpr_BBPTR(),
-                         mkU32(vD_off), 
-                         binop(Iop_And32, mkNarrowTo32(ty, mkexpr(EA)),
-                                          mkU32(0xF)),
-                         mkU32(1)/*right*/ );
+      IRExpr** args_be = mkIRExprVec_5(
+                             IRExpr_BBPTR(),
+                             mkU32(vD_off),
+                             binop(Iop_And32, mkNarrowTo32(ty, mkexpr(EA)),
+                                              mkU32(0xF)),
+                             mkU32(1)/*right*/,
+                             mkU32(1)/*Big Endian*/);
+      IRExpr** args_le = mkIRExprVec_5(
+                             IRExpr_BBPTR(),
+                             mkU32(vD_off),
+                             binop(Iop_And32, mkNarrowTo32(ty, mkexpr(EA)),
+                                              mkU32(0xF)),
+                             mkU32(1)/*right*/,
+                             mkU32(0)/*Little Endian*/);
+
       if (!mode64) {
          d = unsafeIRDirty_0_N (
-                        0/*regparms*/, 
+                        0/*regparms*/,
                         "ppc32g_dirtyhelper_LVS",
                         fnptr_to_fnentry(vbi, &ppc32g_dirtyhelper_LVS),
-                        args );
+                        args_be );
       } else {
-         d = unsafeIRDirty_0_N (
-                        0/*regparms*/, 
-                        "ppc64g_dirtyhelper_LVS",
-                        fnptr_to_fnentry(vbi, &ppc64g_dirtyhelper_LVS),
-                        args );
+         if (guest_is_BE)
+            d = unsafeIRDirty_0_N (
+                           0/*regparms*/,
+                           "ppc64g_dirtyhelper_LVS",
+                           fnptr_to_fnentry(vbi, &ppc64g_dirtyhelper_LVS),
+                           args_be );
+         else
+            d = unsafeIRDirty_0_N (
+                           0/*regparms*/,
+                           "ppc64g_dirtyhelper_LVS",
+                           &ppc64g_dirtyhelper_LVS,
+                           args_le );
       }
       DIP("lvsr v%d,r%u,r%u\n", vD_addr, rA_addr, rB_addr);
       /* declare guest state effects */
@@ -15496,29 +15709,29 @@ static Bool dis_av_load ( VexAbiInfo* vbi, UInt theInstr )
       /* loads addressed byte into vector[EA[0:3]
          since all other destination bytes are undefined,
          can simply load entire vector from 16-aligned EA */
-      putVReg( vD_addr, load(Ity_V128, mkexpr(EA_align16)) );
+      putVReg( vD_addr, load(Ity_V128, mkexpr(EA_align16), guest_is_BE) );
       break;
 
    case 0x027: // lvehx (Load Vector Element Half Word Indexed, AV p121)
       DIP("lvehx v%d,r%u,r%u\n", vD_addr, rA_addr, rB_addr);
       /* see note for lvebx */
-      putVReg( vD_addr, load(Ity_V128, mkexpr(EA_align16)) );
+      putVReg( vD_addr, load(Ity_V128, mkexpr(EA_align16), guest_is_BE) );
       break;
 
    case 0x047: // lvewx (Load Vector Element Word Indexed, AV p122)
       DIP("lvewx v%d,r%u,r%u\n", vD_addr, rA_addr, rB_addr);
       /* see note for lvebx */
-      putVReg( vD_addr, load(Ity_V128, mkexpr(EA_align16)) );
+      putVReg( vD_addr, load(Ity_V128, mkexpr(EA_align16), guest_is_BE) );
       break;
 
    case 0x067: // lvx (Load Vector Indexed, AV p127)
       DIP("lvx v%d,r%u,r%u\n", vD_addr, rA_addr, rB_addr);
-      putVReg( vD_addr, load(Ity_V128, mkexpr(EA_align16)) );
+      putVReg( vD_addr, load(Ity_V128, mkexpr(EA_align16), guest_is_BE) );
       break;
 
    case 0x167: // lvxl (Load Vector Indexed LRU, AV p128)
       DIP("lvxl v%d,r%u,r%u\n", vD_addr, rA_addr, rB_addr);
-      putVReg( vD_addr, load(Ity_V128, mkexpr(EA_align16)) );
+      putVReg( vD_addr, load(Ity_V128, mkexpr(EA_align16), guest_is_BE) );
       break;
 
    default:
@@ -15531,7 +15744,7 @@ static Bool dis_av_load ( VexAbiInfo* vbi, UInt theInstr )
 /*
   AltiVec Store Instructions
 */
-static Bool dis_av_store ( UInt theInstr )
+static Bool dis_av_store ( UInt theInstr, Bool guest_is_BE )
 {
    /* X-Form */
    UChar opc1     = ifieldOPC(theInstr);
@@ -15562,12 +15775,17 @@ static Bool dis_av_store ( UInt theInstr )
       assign( eb, binop(Iop_And8, mkU8(0xF),
                         unop(Iop_32to8,
                              mkNarrowTo32(ty, mkexpr(EA)) )) );
-      assign( idx, binop(Iop_Shl8,
-                         binop(Iop_Sub8, mkU8(15), mkexpr(eb)),
-                         mkU8(3)) );
+      if (!guest_is_BE) {
+         assign( idx, binop(Iop_Shl8, mkexpr(eb), mkU8(3)) );
+      } else {
+         assign( idx, binop(Iop_Shl8,
+                            binop(Iop_Sub8, mkU8(15), mkexpr(eb)),
+                            mkU8(3)) );
+      }
       store( mkexpr(EA),
              unop( Iop_32to8, unop(Iop_V128to32,
-                   binop(Iop_ShrV128, mkexpr(vS), mkexpr(idx)))) );
+                   binop(Iop_ShrV128, mkexpr(vS), mkexpr(idx)))),
+	     guest_is_BE );
       break;
    }
    case 0x0A7: { // stvehx (Store Vector Half Word Indexed, AV p132)
@@ -15575,12 +15793,17 @@ static Bool dis_av_store ( UInt theInstr )
       assign( addr_aligned, addr_align(mkexpr(EA), 2) );
       assign( eb, binop(Iop_And8, mkU8(0xF),
                         mkNarrowTo8(ty, mkexpr(addr_aligned) )) );
-      assign( idx, binop(Iop_Shl8,
-                         binop(Iop_Sub8, mkU8(14), mkexpr(eb)),
-                         mkU8(3)) );
+      if (!guest_is_BE) {
+         assign( idx, binop(Iop_Shl8, mkexpr(eb), mkU8(3)) );
+      } else {
+         assign( idx, binop(Iop_Shl8,
+                            binop(Iop_Sub8, mkU8(14), mkexpr(eb)),
+                            mkU8(3)) );
+      }
       store( mkexpr(addr_aligned),
              unop( Iop_32to16, unop(Iop_V128to32,
-                   binop(Iop_ShrV128, mkexpr(vS), mkexpr(idx)))) );
+                   binop(Iop_ShrV128, mkexpr(vS), mkexpr(idx)))),
+             guest_is_BE );
       break;
    }
    case 0x0C7: { // stvewx (Store Vector Word Indexed, AV p133)
@@ -15588,23 +15811,28 @@ static Bool dis_av_store ( UInt theInstr )
       assign( addr_aligned, addr_align(mkexpr(EA), 4) );
       assign( eb, binop(Iop_And8, mkU8(0xF),
                         mkNarrowTo8(ty, mkexpr(addr_aligned) )) );
-      assign( idx, binop(Iop_Shl8,
-                         binop(Iop_Sub8, mkU8(12), mkexpr(eb)),
-                         mkU8(3)) );
+      if (!guest_is_BE) {
+         assign( idx, binop(Iop_Shl8, mkexpr(eb), mkU8(3)) );
+      } else {
+         assign( idx, binop(Iop_Shl8,
+                            binop(Iop_Sub8, mkU8(12), mkexpr(eb)),
+                            mkU8(3)) );
+      }
       store( mkexpr( addr_aligned),
              unop( Iop_V128to32,
-                   binop(Iop_ShrV128, mkexpr(vS), mkexpr(idx))) );
+                   binop(Iop_ShrV128, mkexpr(vS), mkexpr(idx))),
+             guest_is_BE );
       break;
    }
 
    case 0x0E7: // stvx (Store Vector Indexed, AV p134)
       DIP("stvx v%d,r%u,r%u\n", vS_addr, rA_addr, rB_addr);
-      store( addr_align( mkexpr(EA), 16 ), mkexpr(vS) );
+      store( addr_align( mkexpr(EA), 16 ), mkexpr(vS), guest_is_BE );
       break;
 
    case 0x1E7: // stvxl (Store Vector Indexed LRU, AV p135)
       DIP("stvxl v%d,r%u,r%u\n", vS_addr, rA_addr, rB_addr);
-      store( addr_align( mkexpr(EA), 16 ), mkexpr(vS) );
+      store( addr_align( mkexpr(EA), 16 ), mkexpr(vS), guest_is_BE );
       break;
 
    default:
@@ -18484,7 +18712,8 @@ DisResult disInstr_PPC_WRK (
              Long         delta64,
              VexArchInfo* archinfo,
              VexAbiInfo*  abiinfo,
-             Bool         sigill_diag
+             Bool         sigill_diag,
+             Bool         guest_is_BE
           )
 {
    UChar     opc1;
@@ -18533,7 +18762,8 @@ DisResult disInstr_PPC_WRK (
    /* At least this is simple on PPC32: insns are all 4 bytes long, and
       4-aligned.  So just fish the whole thing out of memory right now
       and have done. */
-   theInstr = getUIntPPCendianly( (UChar*)(&guest_code[delta]) );
+   theInstr = getUIntPPCendianly( (UChar*)(&guest_code[delta]),
+                                  guest_is_BE);
 
    if (0) vex_printf("insn: 0x%x\n", theInstr);
 
@@ -18558,12 +18788,12 @@ DisResult disInstr_PPC_WRK (
       UInt word2 = mode64 ? 0x78006800 : 0x5400683E;
       UInt word3 = mode64 ? 0x7800E802 : 0x5400E83E;
       UInt word4 = mode64 ? 0x78009802 : 0x5400983E;
-      if (getUIntPPCendianly(code+ 0) == word1 &&
-          getUIntPPCendianly(code+ 4) == word2 &&
-          getUIntPPCendianly(code+ 8) == word3 &&
-          getUIntPPCendianly(code+12) == word4) {
+      if (getUIntPPCendianly(code+ 0, guest_is_BE) == word1 &&
+          getUIntPPCendianly(code+ 4, guest_is_BE) == word2 &&
+          getUIntPPCendianly(code+ 8, guest_is_BE) == word3 &&
+          getUIntPPCendianly(code+12,guest_is_BE) == word4) {
          /* Got a "Special" instruction preamble.  Which one is it? */
-         if (getUIntPPCendianly(code+16) == 0x7C210B78 /* or 1,1,1 */) {
+         if (getUIntPPCendianly(code+16, guest_is_BE) == 0x7C210B78 /* or 1,1,1 */) {
             /* %R3 = client_request ( %R4 ) */
             DIP("r3 = client_request ( %%r4 )\n");
             delta += 20;
@@ -18573,7 +18803,7 @@ DisResult disInstr_PPC_WRK (
             goto decode_success;
          }
          else
-         if (getUIntPPCendianly(code+16) == 0x7C421378 /* or 2,2,2 */) {
+         if (getUIntPPCendianly(code+16, guest_is_BE) == 0x7C421378 /* or 2,2,2 */) {
             /* %R3 = guest_NRADDR */
             DIP("r3 = guest_NRADDR\n");
             delta += 20;
@@ -18582,18 +18812,27 @@ DisResult disInstr_PPC_WRK (
             goto decode_success;
          }
          else
-         if (getUIntPPCendianly(code+16) == 0x7C631B78 /* or 3,3,3 */) {
-            /*  branch-and-link-to-noredir %R11 */
-            DIP("branch-and-link-to-noredir r11\n");
+         if (getUIntPPCendianly(code+16, guest_is_BE) == 0x7C631B78 /* or 3,3,3 */) {
             delta += 20;
-            putGST( PPC_GST_LR, mkSzImm(ty, guest_CIA_bbstart + (Long)delta) );
-            putGST( PPC_GST_CIA, getIReg(11));
+            if( !guest_is_BE) {
+                /*  branch-and-link-to-noredir %R12 */
+                DIP("branch-and-link-to-noredir r12\n");
+                putGST( PPC_GST_LR,
+                        mkSzImm(ty, guest_CIA_bbstart + (Long)delta) );
+                putGST( PPC_GST_CIA, getIReg(12));
+            } else {
+                /*  branch-and-link-to-noredir %R11 */
+                DIP("branch-and-link-to-noredir r11\n");
+                putGST( PPC_GST_LR,
+                        mkSzImm(ty, guest_CIA_bbstart + (Long)delta) );
+                putGST( PPC_GST_CIA, getIReg(11));
+            }
             dres.jk_StopHere = Ijk_NoRedir;
             dres.whatNext    = Dis_StopHere;
             goto decode_success;
          }
          else
-         if (getUIntPPCendianly(code+16) == 0x7C842378 /* or 4,4,4 */) {
+         if (getUIntPPCendianly(code+16, guest_is_BE) == 0x7C842378 /* or 4,4,4 */) {
             /* %R3 = guest_NRADDR_GPR2 */
             DIP("r3 = guest_NRADDR_GPR2\n");
             delta += 20;
@@ -18602,10 +18841,11 @@ DisResult disInstr_PPC_WRK (
             goto decode_success;
          }
          else
-         if (getUIntPPCendianly(code+16) == 0x7CA52B78 /* or 5,5,5 */) {
+         if (getUIntPPCendianly(code+16, guest_is_BE) == 0x7CA52B78 /* or 5,5,5 */) {
             DIP("IR injection\n");
+            IREndness endian = guest_is_BE ? Iend_BE: Iend_LE;
 
-            vex_inject_ir(irsb, IENDIANESS);
+            vex_inject_ir(irsb, endian);
 
             delta += 20;
             dres.len = 20;
@@ -18625,7 +18865,7 @@ DisResult disInstr_PPC_WRK (
          }
          /* We don't know what it is.  Set opc1/opc2 so decode_failure
             can print the insn following the Special-insn preamble. */
-         theInstr = getUIntPPCendianly(code+16);
+         theInstr = getUIntPPCendianly(code+16, guest_is_BE);
          opc1     = ifieldOPC(theInstr);
          opc2     = ifieldOPClo10(theInstr);
          goto decode_failure;
@@ -18653,7 +18893,7 @@ DisResult disInstr_PPC_WRK (
    /* Integer Logical Instructions */
    case 0x1C: case 0x1D: case 0x18: // andi., andis., ori
    case 0x19: case 0x1A: case 0x1B: // oris,  xori,   xoris
-      if (dis_int_logic( theInstr )) goto decode_success;
+      if (dis_int_logic( theInstr, guest_is_BE )) goto decode_success;
       goto decode_failure;
 
    /* Integer Rotate Instructions */
@@ -18671,18 +18911,18 @@ DisResult disInstr_PPC_WRK (
    case 0x22: case 0x23: case 0x2A: // lbz,  lbzu, lha
    case 0x2B: case 0x28: case 0x29: // lhau, lhz,  lhzu
    case 0x20: case 0x21:            // lwz,  lwzu
-      if (dis_int_load( theInstr )) goto decode_success;
+      if (dis_int_load( theInstr, guest_is_BE )) goto decode_success;
       goto decode_failure;
 
    /* Integer Store Instructions */
    case 0x26: case 0x27: case 0x2C: // stb,  stbu, sth
    case 0x2D: case 0x24: case 0x25: // sthu, stw,  stwu
-      if (dis_int_store( theInstr, abiinfo )) goto decode_success;
+      if (dis_int_store( theInstr, abiinfo, guest_is_BE )) goto decode_success;
       goto decode_failure;
 
    /* Integer Load and Store Multiple Instructions */
    case 0x2E: case 0x2F: // lmw, stmw
-      if (dis_int_ldst_mult( theInstr )) goto decode_success;
+      if (dis_int_ldst_mult( theInstr, guest_is_BE )) goto decode_success;
       goto decode_failure;
 
    /* Branch Instructions */
@@ -18711,31 +18951,31 @@ DisResult disInstr_PPC_WRK (
    case 0x30: case 0x31: case 0x32: // lfs, lfsu, lfd
    case 0x33:                       // lfdu
       if (!allow_F) goto decode_noF;
-      if (dis_fp_load( theInstr )) goto decode_success;
+      if (dis_fp_load( theInstr, guest_is_BE )) goto decode_success;
       goto decode_failure;
 
    /* Floating Point Store Instructions */
    case 0x34: case 0x35: case 0x36: // stfsx, stfsux, stfdx
    case 0x37:                       // stfdux
       if (!allow_F) goto decode_noF;
-      if (dis_fp_store( theInstr )) goto decode_success;
+      if (dis_fp_store( theInstr, guest_is_BE )) goto decode_success;
       goto decode_failure;
 
       /* Floating Point Load Double Pair Instructions */
    case 0x39: case 0x3D:
       if (!allow_F) goto decode_noF;
-      if (dis_fp_pair( theInstr )) goto decode_success;
+      if (dis_fp_pair( theInstr, guest_is_BE )) goto decode_success;
       goto decode_failure;
 
    /* 128-bit Integer Load */
    case 0x38:  // lq
-      if (dis_int_load( theInstr )) goto decode_success;
+      if (dis_int_load( theInstr, guest_is_BE )) goto decode_success;
       goto decode_failure;
 
    /* 64bit Integer Loads */
    case 0x3A:  // ld, ldu, lwa
       if (!mode64) goto decode_failure;
-      if (dis_int_load( theInstr )) goto decode_success;
+      if (dis_int_load( theInstr, guest_is_BE )) goto decode_success;
       goto decode_failure;
 
    case 0x3B:
@@ -18748,13 +18988,13 @@ DisResult disInstr_PPC_WRK (
          case 0x22:   // dmul - DFP Mult
          case 0x222:  // ddiv - DFP Divide
             if (!allow_DFP) goto decode_noDFP;
-            if (dis_dfp_arith( theInstr ))
+            if (dis_dfp_arith( theInstr, guest_is_BE ))
                goto decode_success;
          case 0x82:   // dcmpo, DFP comparison ordered instruction
          case 0x282:  // dcmpu, DFP comparison unordered instruction
             if (!allow_DFP)
                goto decode_failure;
-            if (dis_dfp_compare( theInstr ) )
+            if (dis_dfp_compare( theInstr, guest_is_BE ) )
                goto decode_success;
             goto decode_failure;
          case 0x102: // dctdp  - DFP convert to DFP long
@@ -18762,43 +19002,43 @@ DisResult disInstr_PPC_WRK (
          case 0x122: // dctfix - DFP convert to fixed
             if (!allow_DFP)
                goto decode_failure;
-            if (dis_dfp_fmt_conv( theInstr ))
+            if (dis_dfp_fmt_conv( theInstr, guest_is_BE ))
                goto decode_success;
             goto decode_failure;
          case 0x322: // POWER 7 inst, dcffix - DFP convert from fixed
             if (!allow_VX)
                goto decode_failure;
-            if (dis_dfp_fmt_conv( theInstr ))
+            if (dis_dfp_fmt_conv( theInstr, guest_is_BE ))
                goto decode_success;
             goto decode_failure;
          case 0x2A2: // dtstsf - DFP number of significant digits
             if (!allow_DFP)
                goto decode_failure;
-            if (dis_dfp_significant_digits(theInstr))
+            if (dis_dfp_significant_digits(theInstr, guest_is_BE))
                goto decode_success;
             goto decode_failure;
          case 0x142: // ddedpd   DFP Decode DPD to BCD
          case 0x342: // denbcd   DFP Encode BCD to DPD
             if (!allow_DFP)
                goto decode_failure;
-            if (dis_dfp_bcd(theInstr))
+            if (dis_dfp_bcd(theInstr, guest_is_BE))
                goto decode_success;
             goto decode_failure;
          case 0x162:  // dxex - Extract exponent 
          case 0x362:  // diex - Insert exponent
             if (!allow_DFP)
                goto decode_failure;
-            if (dis_dfp_extract_insert( theInstr ) )
+            if (dis_dfp_extract_insert( theInstr, guest_is_BE ) )
                goto decode_success;
             goto decode_failure;
          case 0x3CE: // fcfidus (implemented as native insn)
             if (!allow_VX)
                goto decode_noVX;
-            if (dis_fp_round( theInstr ))
+            if (dis_fp_round( theInstr, guest_is_BE ))
                goto decode_success;
             goto decode_failure;
          case 0x34E: // fcfids
-            if (dis_fp_round( theInstr ))
+            if (dis_fp_round( theInstr, guest_is_BE ))
                goto decode_success;
             goto decode_failure;
       }
@@ -18809,14 +19049,14 @@ DisResult disInstr_PPC_WRK (
       case 0x62: // dscri, DFP shift right
          if (!allow_DFP)
             goto decode_failure;
-         if (dis_dfp_shift( theInstr ))
+         if (dis_dfp_shift( theInstr, guest_is_BE ))
             goto decode_success;
          goto decode_failure;
       case 0xc2:  // dtstdc, DFP test data class
       case 0xe2:  // dtstdg, DFP test data group
          if (!allow_DFP)
             goto decode_failure;
-         if (dis_dfp_class_test( theInstr ))
+         if (dis_dfp_class_test( theInstr, guest_is_BE ))
             goto decode_success;
          goto decode_failure;
       }
@@ -18828,20 +19068,20 @@ DisResult disInstr_PPC_WRK (
       case 0x43:  // dquai - DFP Quantize immediate
          if (!allow_DFP)
             goto decode_failure;
-         if (dis_dfp_quantize_sig_rrnd( theInstr ) )
+         if (dis_dfp_quantize_sig_rrnd( theInstr, guest_is_BE ) )
             goto decode_success;
          goto decode_failure;
       case 0xA2: // dtstex - DFP Test exponent
          if (!allow_DFP)
             goto decode_failure;
-         if (dis_dfp_exponent_test( theInstr ) )
+         if (dis_dfp_exponent_test( theInstr, guest_is_BE ) )
             goto decode_success;
          goto decode_failure;
       case 0x63: // drintx - Round to an integer value
       case 0xE3: // drintn - Round to an integer value
          if (!allow_DFP)
             goto decode_failure;
-         if (dis_dfp_round( theInstr ) ) {
+         if (dis_dfp_round( theInstr, guest_is_BE ) ) {
             goto decode_success;
          }
          goto decode_failure;
@@ -18854,26 +19094,26 @@ DisResult disInstr_PPC_WRK (
       /* Floating Point Arith Instructions */
       case 0x12: case 0x14: case 0x15: // fdivs,  fsubs, fadds
       case 0x19:                       // fmuls
-         if (dis_fp_arith(theInstr)) goto decode_success;
+         if (dis_fp_arith(theInstr, guest_is_BE)) goto decode_success;
          goto decode_failure;
       case 0x16:                       // fsqrts
          if (!allow_FX) goto decode_noFX;
-         if (dis_fp_arith(theInstr)) goto decode_success;
+         if (dis_fp_arith(theInstr, guest_is_BE)) goto decode_success;
          goto decode_failure;
       case 0x18:                       // fres
          if (!allow_GX) goto decode_noGX;
-         if (dis_fp_arith(theInstr)) goto decode_success;
+         if (dis_fp_arith(theInstr, guest_is_BE)) goto decode_success;
          goto decode_failure;
 
       /* Floating Point Mult-Add Instructions */
       case 0x1C: case 0x1D: case 0x1E: // fmsubs, fmadds, fnmsubs
       case 0x1F:                       // fnmadds
-         if (dis_fp_multadd(theInstr)) goto decode_success;
+	if (dis_fp_multadd(theInstr, guest_is_BE)) goto decode_success;
          goto decode_failure;
 
       case 0x1A:                       // frsqrtes
          if (!allow_GX) goto decode_noGX;
-         if (dis_fp_arith(theInstr)) goto decode_success;
+         if (dis_fp_arith(theInstr, guest_is_BE)) goto decode_success;
          goto decode_failure;
 
       default:
@@ -18913,7 +19153,7 @@ DisResult disInstr_PPC_WRK (
          case 0x0B4: case 0x094: // xsredp, xsrsqrtedp
          case 0x0D6: case 0x0B2: // xsrdpic, xsrdpiz
          case 0x092: case 0x232: // xsrdpi, xsrsp
-            if (dis_vxs_misc(theInstr, vsxOpc2)) goto decode_success;
+            if (dis_vxs_misc(theInstr, vsxOpc2, guest_is_BE)) goto decode_success;
             goto decode_failure;
          case 0x08C: case 0x0AC: // xscmpudp, xscmpodp
             if (dis_vx_cmp(theInstr, vsxOpc2)) goto decode_success;
@@ -19022,7 +19262,7 @@ DisResult disInstr_PPC_WRK (
 
    /* 64bit Integer Stores */
    case 0x3E:  // std, stdu, stq
-      if (dis_int_store( theInstr, abiinfo )) goto decode_success;
+      if (dis_int_store( theInstr, abiinfo, guest_is_BE )) goto decode_success;
       goto decode_failure;
 
    case 0x3F:
@@ -19035,26 +19275,26 @@ DisResult disInstr_PPC_WRK (
       /* Floating Point Arith Instructions */
       case 0x12: case 0x14: case 0x15: // fdiv, fsub, fadd
       case 0x19:                       // fmul
-         if (dis_fp_arith(theInstr)) goto decode_success;
+         if (dis_fp_arith(theInstr, guest_is_BE)) goto decode_success;
          goto decode_failure;
       case 0x16:                       // fsqrt
          if (!allow_FX) goto decode_noFX;
-         if (dis_fp_arith(theInstr)) goto decode_success;
+         if (dis_fp_arith(theInstr, guest_is_BE)) goto decode_success;
          goto decode_failure;
       case 0x17: case 0x1A:            // fsel, frsqrte
          if (!allow_GX) goto decode_noGX;
-         if (dis_fp_arith(theInstr)) goto decode_success;
+         if (dis_fp_arith(theInstr, guest_is_BE)) goto decode_success;
          goto decode_failure;
          
       /* Floating Point Mult-Add Instructions */         
       case 0x1C: case 0x1D: case 0x1E: // fmsub, fmadd, fnmsub
       case 0x1F:                       // fnmadd
-         if (dis_fp_multadd(theInstr)) goto decode_success;
+         if (dis_fp_multadd(theInstr, guest_is_BE)) goto decode_success;
          goto decode_failure;
 
       case 0x18:                       // fre
          if (!allow_GX) goto decode_noGX;
-         if (dis_fp_arith(theInstr)) goto decode_success;
+         if (dis_fp_arith(theInstr, guest_is_BE)) goto decode_success;
          goto decode_failure;
 
       default:
@@ -19069,14 +19309,14 @@ DisResult disInstr_PPC_WRK (
       case 0x22:   // dmulq - DFP Mult
       case 0x222:  // ddivq - DFP Divide
          if (!allow_DFP) goto decode_noDFP;
-         if (dis_dfp_arithq( theInstr ))
+         if (dis_dfp_arithq( theInstr, guest_is_BE ))
             goto decode_success;
          goto decode_failure;
       case 0x162:  // dxexq - DFP Extract exponent
       case 0x362:  // diexq - DFP Insert exponent
          if (!allow_DFP)
             goto decode_failure;
-         if (dis_dfp_extract_insertq( theInstr ))
+         if (dis_dfp_extract_insertq( theInstr, guest_is_BE ))
             goto decode_success;
          goto decode_failure;
 
@@ -19084,7 +19324,7 @@ DisResult disInstr_PPC_WRK (
       case 0x282:  // dcmpuq, DFP comparison unordered instruction
          if (!allow_DFP)
             goto decode_failure;
-         if (dis_dfp_compare( theInstr ) )
+         if (dis_dfp_compare( theInstr, guest_is_BE ) )
             goto decode_success;
          goto decode_failure;
 
@@ -19094,14 +19334,14 @@ DisResult disInstr_PPC_WRK (
       case 0x322: // dcffixq - DFP convert from fixed quad
          if (!allow_DFP)
             goto decode_failure;
-         if (dis_dfp_fmt_convq( theInstr ))
+         if (dis_dfp_fmt_convq( theInstr, guest_is_BE ))
             goto decode_success;
          goto decode_failure;
 
       case 0x2A2: // dtstsfq - DFP number of significant digits
          if (!allow_DFP)
             goto decode_failure;
-         if (dis_dfp_significant_digits(theInstr))
+         if (dis_dfp_significant_digits(theInstr, guest_is_BE))
             goto decode_success;
          goto decode_failure;
 
@@ -19109,19 +19349,19 @@ DisResult disInstr_PPC_WRK (
       case 0x342: // denbcdq   DFP Encode BCD to DPD
          if (!allow_DFP)
             goto decode_failure;
-         if (dis_dfp_bcdq(theInstr))
+         if (dis_dfp_bcdq(theInstr, guest_is_BE))
             goto decode_success;
          goto decode_failure;
 
       /* Floating Point Compare Instructions */         
       case 0x000: // fcmpu
       case 0x020: // fcmpo
-         if (dis_fp_cmp(theInstr)) goto decode_success;
+         if (dis_fp_cmp(theInstr, guest_is_BE)) goto decode_success;
          goto decode_failure;
          
       case 0x080: // ftdiv
       case 0x0A0: // ftsqrt
-         if (dis_fp_tests(theInstr)) goto decode_success;
+         if (dis_fp_tests(theInstr, guest_is_BE)) goto decode_success;
          goto decode_failure;
 
       /* Floating Point Rounding/Conversion Instructions */         
@@ -19131,12 +19371,12 @@ DisResult disInstr_PPC_WRK (
       case 0x32E: // fctid
       case 0x32F: // fctidz
       case 0x34E: // fcfid
-         if (dis_fp_round(theInstr)) goto decode_success;
+         if (dis_fp_round(theInstr, guest_is_BE)) goto decode_success;
          goto decode_failure;
       case 0x3CE: case 0x3AE: case 0x3AF: // fcfidu, fctidu[z] (implemented as native insns)
       case 0x08F: case 0x08E: // fctiwu[z] (implemented as native insns)
          if (!allow_VX) goto decode_noVX;
-         if (dis_fp_round(theInstr)) goto decode_success;
+         if (dis_fp_round(theInstr, guest_is_BE)) goto decode_success;
          goto decode_failure;
 
       /* Power6 rounding stuff */
@@ -19146,7 +19386,7 @@ DisResult disInstr_PPC_WRK (
       case 0x1A8: // friz
          /* A hack to check for P6 capability . . . */
          if ((allow_F && allow_V && allow_FX && allow_GX) &&
-             (dis_fp_round(theInstr)))
+             (dis_fp_round(theInstr, guest_is_BE)))
             goto decode_success;
          goto decode_failure;
          
@@ -19156,11 +19396,11 @@ DisResult disInstr_PPC_WRK (
       case 0x048: // fmr
       case 0x088: // fnabs
       case 0x108: // fabs
-         if (dis_fp_move( theInstr )) goto decode_success;
+         if (dis_fp_move( theInstr, guest_is_BE )) goto decode_success;
          goto decode_failure;
 
       case 0x3c6: case 0x346:          // fmrgew, fmrgow
-         if (dis_fp_merge( theInstr )) goto decode_success;
+         if (dis_fp_merge( theInstr, guest_is_BE )) goto decode_success;
          goto decode_failure;
 
       /* Floating Point Status/Control Register Instructions */         
@@ -19172,7 +19412,7 @@ DisResult disInstr_PPC_WRK (
       case 0x2C7: // mtfsf
          // Some of the above instructions need to know more about the
          // ISA level supported by the host.
-         if (dis_fp_scr( theInstr, allow_GX )) goto decode_success;
+         if (dis_fp_scr( theInstr, allow_GX, guest_is_BE )) goto decode_success;
          goto decode_failure;
 
       default:
@@ -19185,14 +19425,14 @@ DisResult disInstr_PPC_WRK (
       case 0x62: // dscri, DFP shift right
          if (!allow_DFP)
             goto decode_failure;
-         if (dis_dfp_shiftq( theInstr ))
+         if (dis_dfp_shiftq( theInstr, guest_is_BE ))
             goto decode_success;
          goto decode_failure;
       case 0xc2:  // dtstdc, DFP test data class
       case 0xe2:  // dtstdg, DFP test data group
          if (!allow_DFP)
             goto decode_failure;
-         if (dis_dfp_class_test( theInstr ))
+         if (dis_dfp_class_test( theInstr, guest_is_BE ))
             goto decode_success;
          goto decode_failure;
       default:
@@ -19206,18 +19446,18 @@ DisResult disInstr_PPC_WRK (
       case 0x43:  // dquaiq - DFP Quantize immediate Quad
          if (!allow_DFP)
             goto decode_failure;
-         if (dis_dfp_quantize_sig_rrndq( theInstr ))
+         if (dis_dfp_quantize_sig_rrndq( theInstr, guest_is_BE ))
             goto decode_success;
          goto decode_failure;
       case 0xA2: // dtstexq - DFP Test exponent Quad
-         if (dis_dfp_exponent_test( theInstr ) )
+         if (dis_dfp_exponent_test( theInstr, guest_is_BE ) )
             goto decode_success;
          goto decode_failure;
       case 0x63:  // drintxq - DFP Round to an integer value
       case 0xE3:  // drintnq - DFP Round to an integer value
          if (!allow_DFP)
             goto decode_failure;
-         if (dis_dfp_roundq( theInstr ))
+         if (dis_dfp_roundq( theInstr, guest_is_BE ))
             goto decode_success;
          goto decode_failure;
 
@@ -19245,7 +19485,7 @@ DisResult disInstr_PPC_WRK (
          
       /* Memory Synchronization Instructions */
       case 0x096: // isync
-         if (dis_memsync( theInstr )) goto decode_success;
+         if (dis_memsync( theInstr, guest_is_BE )) goto decode_success;
          goto decode_failure;
 
       default:
@@ -19291,7 +19531,7 @@ DisResult disInstr_PPC_WRK (
          goto decode_failure;
 
       case 0x1FC:                         // cmpb
-         if (dis_int_logic( theInstr )) goto decode_success;
+         if (dis_int_logic( theInstr, guest_is_BE )) goto decode_success;
          goto decode_failure;
 
       default:
@@ -19313,7 +19553,7 @@ DisResult disInstr_PPC_WRK (
       case 0x1DC: case 0x07C: case 0x1BC: // nand, nor,   or
       case 0x19C: case 0x13C:             // orc,  xor
       case 0x2DF: case 0x25F:            // mftgpr, mffgpr
-         if (dis_int_logic( theInstr )) goto decode_success;
+         if (dis_int_logic( theInstr, guest_is_BE )) goto decode_success;
          goto decode_failure;
 
       case 0x28E: case 0x2AE:             // tbegin., tend.
@@ -19321,7 +19561,8 @@ DisResult disInstr_PPC_WRK (
       case 0x32E: case 0x34E: case 0x36E: // tabortdc., tabortwci., tabortdci.
       case 0x38E: case 0x3AE: case 0x3EE: // tabort., treclaim., trechkpt.
       if (dis_transactional_memory( theInstr,
-                                    getUIntPPCendianly( (UChar*)(&guest_code[delta + 4])),
+                                    getUIntPPCendianly( (UChar*)(&guest_code[delta + 4]),
+                                                         guest_is_BE),
                                     abiinfo, &dres,
                                     resteerOkFn, callback_opaque))
             goto decode_success;
@@ -19330,7 +19571,7 @@ DisResult disInstr_PPC_WRK (
       /* 64bit Integer Logical Instructions */
       case 0x3DA: case 0x03A: // extsw, cntlzd
          if (!mode64) goto decode_failure;
-         if (dis_int_logic( theInstr )) goto decode_success;
+         if (dis_int_logic( theInstr, guest_is_BE )) goto decode_success;
          goto decode_failure;
 
          /* 64bit Integer Parity Instructions */
@@ -19361,44 +19602,44 @@ DisResult disInstr_PPC_WRK (
       case 0x057: case 0x077: case 0x157: // lbzx,  lbzux, lhax
       case 0x177: case 0x117: case 0x137: // lhaux, lhzx,  lhzux
       case 0x017: case 0x037:             // lwzx,  lwzux
-         if (dis_int_load( theInstr )) goto decode_success;
+         if (dis_int_load( theInstr, guest_is_BE )) goto decode_success;
          goto decode_failure;
 
       /* 64bit Integer Load Instructions */
       case 0x035: case 0x015:             // ldux,  ldx
       case 0x175: case 0x155:             // lwaux, lwax
          if (!mode64) goto decode_failure;
-         if (dis_int_load( theInstr )) goto decode_success;
+         if (dis_int_load( theInstr, guest_is_BE )) goto decode_success;
          goto decode_failure;
 
       /* Integer Store Instructions */
       case 0x0F7: case 0x0D7: case 0x1B7: // stbux, stbx,  sthux
       case 0x197: case 0x0B7: case 0x097: // sthx,  stwux, stwx
-         if (dis_int_store( theInstr, abiinfo )) goto decode_success;
+         if (dis_int_store( theInstr, abiinfo, guest_is_BE )) goto decode_success;
          goto decode_failure;
 
       /* 64bit Integer Store Instructions */
       case 0x0B5: case 0x095: // stdux, stdx
          if (!mode64) goto decode_failure;
-         if (dis_int_store( theInstr, abiinfo )) goto decode_success;
+         if (dis_int_store( theInstr, abiinfo, guest_is_BE )) goto decode_success;
          goto decode_failure;
 
       /* Integer Load and Store with Byte Reverse Instructions */
       case 0x214: case 0x294: // ldbrx, stdbrx
          if (!mode64) goto decode_failure;
-         if (dis_int_ldst_rev( theInstr )) goto decode_success;
+         if (dis_int_ldst_rev( theInstr, guest_is_BE )) goto decode_success;
          goto decode_failure;
 
       case 0x216: case 0x316: case 0x296:    // lwbrx, lhbrx, stwbrx
       case 0x396:                            // sthbrx
-         if (dis_int_ldst_rev( theInstr )) goto decode_success;
+         if (dis_int_ldst_rev( theInstr, guest_is_BE )) goto decode_success;
          goto decode_failure;
          
       /* Integer Load and Store String Instructions */
       case 0x255: case 0x215: case 0x2D5: // lswi, lswx, stswi
       case 0x295: {                       // stswx
          Bool stopHere = False;
-         Bool ok = dis_int_ldst_str( theInstr, &stopHere );
+         Bool ok = dis_int_ldst_str( theInstr, &stopHere, guest_is_BE );
          if (!ok) goto decode_failure;
          if (stopHere) {
             putGST( PPC_GST_CIA, mkSzImm(ty, nextInsnAddr()) );
@@ -19411,17 +19652,17 @@ DisResult disInstr_PPC_WRK (
       /* Memory Synchronization Instructions */
       case 0x356: case 0x014: case 0x096: // eieio, lwarx, stwcx.
       case 0x256:                         // sync
-         if (dis_memsync( theInstr )) goto decode_success;
+         if (dis_memsync( theInstr, guest_is_BE )) goto decode_success;
          goto decode_failure;
          
       /* 64bit Memory Synchronization Instructions */
       case 0x054: case 0x0D6: // ldarx, stdcx.
          if (!mode64) goto decode_failure;
-         if (dis_memsync( theInstr )) goto decode_success;
+         if (dis_memsync( theInstr, guest_is_BE )) goto decode_success;
          goto decode_failure;
 
       case 0x114: case 0x0B6: // lqarx, stqcx.
-         if (dis_memsync( theInstr )) goto decode_success;
+         if (dis_memsync( theInstr, guest_is_BE )) goto decode_success;
          goto decode_failure;
 
       /* Processor Control Instructions */
@@ -19437,7 +19678,7 @@ DisResult disInstr_PPC_WRK (
       case 0x2F6: case 0x056: case 0x036: // dcba, dcbf,   dcbst
       case 0x116: case 0x0F6: case 0x3F6: // dcbt, dcbtst, dcbz
       case 0x3D6:                         // icbi
-         if (dis_cache_manage( theInstr, &dres, archinfo )) 
+         if (dis_cache_manage( theInstr, &dres, archinfo, guest_is_BE ))
             goto decode_success;
          goto decode_failure;
 
@@ -19460,36 +19701,36 @@ DisResult disInstr_PPC_WRK (
       case 0x217: case 0x237: case 0x257: // lfsx, lfsux, lfdx
       case 0x277:                         // lfdux
          if (!allow_F) goto decode_noF;
-         if (dis_fp_load( theInstr )) goto decode_success;
+         if (dis_fp_load( theInstr, guest_is_BE )) goto decode_success;
          goto decode_failure;
 
       /* Floating Point Store Instructions */
       case 0x297: case 0x2B7: case 0x2D7: // stfs,  stfsu, stfd
       case 0x2F7:                         // stfdu, stfiwx
          if (!allow_F) goto decode_noF;
-         if (dis_fp_store( theInstr )) goto decode_success;
+         if (dis_fp_store( theInstr, guest_is_BE )) goto decode_success;
          goto decode_failure;
       case 0x3D7:                         // stfiwx
          if (!allow_F) goto decode_noF;
          if (!allow_GX) goto decode_noGX;
-         if (dis_fp_store( theInstr )) goto decode_success;
+         if (dis_fp_store( theInstr, guest_is_BE )) goto decode_success;
          goto decode_failure;
 
          /* Floating Point Double Pair Indexed Instructions */
       case 0x317: // lfdpx (Power6)
       case 0x397: // stfdpx (Power6)
          if (!allow_F) goto decode_noF;
-         if (dis_fp_pair(theInstr)) goto decode_success;
+         if (dis_fp_pair(theInstr, guest_is_BE)) goto decode_success;
          goto decode_failure;
 
       case 0x357:                         // lfiwax
          if (!allow_F) goto decode_noF;
-         if (dis_fp_load( theInstr )) goto decode_success;
+         if (dis_fp_load( theInstr, guest_is_BE )) goto decode_success;
          goto decode_failure;
 
       case 0x377:                         // lfiwzx
          if (!allow_F) goto decode_noF;
-         if (dis_fp_load( theInstr )) goto decode_success;
+         if (dis_fp_load( theInstr, guest_is_BE )) goto decode_success;
          goto decode_failure;
 
       /* AltiVec instructions */
@@ -19505,14 +19746,14 @@ DisResult disInstr_PPC_WRK (
       case 0x007: case 0x027: case 0x047: // lvebx, lvehx, lvewx
       case 0x067: case 0x167:             // lvx, lvxl
          if (!allow_V) goto decode_noV;
-         if (dis_av_load( abiinfo, theInstr )) goto decode_success;
+         if (dis_av_load( abiinfo, theInstr, guest_is_BE )) goto decode_success;
          goto decode_failure;
 
       /* AV Store */
       case 0x087: case 0x0A7: case 0x0C7: // stvebx, stvehx, stvewx
       case 0x0E7: case 0x1E7:             // stvx, stvxl
          if (!allow_V) goto decode_noV;
-         if (dis_av_store( theInstr )) goto decode_success;
+         if (dis_av_store( theInstr, guest_is_BE )) goto decode_success;
          goto decode_failure;
 
       /* VSX Load */
@@ -19527,7 +19768,7 @@ DisResult disInstr_PPC_WRK (
         // if allow_V is not set, we'll skip trying to decode.
         if (!allow_V) goto decode_noV;
 
-    	  if (dis_vx_load( theInstr )) goto decode_success;
+        if (dis_vx_load( theInstr, guest_is_BE )) goto decode_success;
           goto decode_failure;
 
       /* VSX Store */
@@ -19540,19 +19781,19 @@ DisResult disInstr_PPC_WRK (
         // if allow_V is not set, we'll skip trying to decode.
         if (!allow_V) goto decode_noV;
 
-    	  if (dis_vx_store( theInstr )) goto decode_success;
+        if (dis_vx_store( theInstr, guest_is_BE )) goto decode_success;
     	  goto decode_failure;
 
       /* Miscellaneous ISA 2.06 instructions */
       case 0x1FA: // popcntd
       case 0x17A: // popcntw
       case 0x7A:  // popcntb
-    	  if (dis_int_logic( theInstr )) goto decode_success;
+          if (dis_int_logic( theInstr, guest_is_BE )) goto decode_success;
     	  goto decode_failure;
 
       case 0x0FC: // bpermd
          if (!mode64) goto decode_failure;
-         if (dis_int_logic( theInstr )) goto decode_success;
+         if (dis_int_logic( theInstr, guest_is_BE )) goto decode_success;
          goto decode_failure;
 
       default:
@@ -19942,6 +20183,7 @@ DisResult disInstr_PPC ( IRSB*        irsb_IN,
                          VexArchInfo* archinfo,
                          VexAbiInfo*  abiinfo,
                          Bool         host_bigendian_IN,
+                         Bool         guest_bigendian_IN,
                          Bool         sigill_diag_IN )
 {
    IRType     ty;
@@ -19954,6 +20196,13 @@ DisResult disInstr_PPC ( IRSB*        irsb_IN,
    /* global -- ick */
    mode64 = guest_arch == VexArchPPC64;
    ty = mode64 ? Ity_I64 : Ity_I32;
+   if (!mode64 && !guest_bigendian_IN) {
+      vex_printf("disInstr(ppc): Little Endian 32-bit mode is not supported\n");
+      dres.whatNext    = Dis_StopHere;
+      dres.jk_StopHere = Ijk_NoDecode;
+      dres.len         = 0;
+      return dres;
+   }
 
    /* do some sanity checks */
    mask32 = VEX_HWCAPS_PPC32_F | VEX_HWCAPS_PPC32_V
@@ -19979,7 +20228,8 @@ DisResult disInstr_PPC ( IRSB*        irsb_IN,
    guest_CIA_bbstart    = mkSzAddr(ty, guest_IP - delta);
 
    dres = disInstr_PPC_WRK ( resteerOkFn, resteerCisOk, callback_opaque,
-                             delta, archinfo, abiinfo, sigill_diag_IN );
+                             delta, archinfo, abiinfo, sigill_diag_IN,
+			     guest_bigendian_IN);
 
    return dres;
 }
diff --git a/VEX/priv/guest_s390_defs.h b/VEX/priv/guest_s390_defs.h
index 63dd1af..b11dcc6 100644
--- a/VEX/priv/guest_s390_defs.h
+++ b/VEX/priv/guest_s390_defs.h
@@ -51,6 +51,7 @@ DisResult disInstr_S390 ( IRSB*        irbb,
                           VexArchInfo* archinfo,
                           VexAbiInfo*  abiinfo,
                           Bool         host_bigendian,
+                          Bool         guest_bigendian,
                           Bool         sigill_diag );
 
 /* Used by the optimiser to specialise calls to helpers. */
diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c
index b6fc165..06b6ca0 100644
--- a/VEX/priv/guest_s390_toIR.c
+++ b/VEX/priv/guest_s390_toIR.c
@@ -16566,6 +16566,7 @@ disInstr_S390(IRSB        *irsb_IN,
               VexArchInfo *archinfo,
               VexAbiInfo  *abiinfo,
               Bool         host_bigendian,
+              Bool         guest_bigendian,
               Bool         sigill_diag_IN)
 {
    vassert(guest_arch == VexArchS390X);
diff --git a/VEX/priv/guest_x86_defs.h b/VEX/priv/guest_x86_defs.h
index 1c64912..c87e23c 100644
--- a/VEX/priv/guest_x86_defs.h
+++ b/VEX/priv/guest_x86_defs.h
@@ -61,6 +61,7 @@ DisResult disInstr_X86 ( IRSB*        irbb,
                          VexArchInfo* archinfo,
                          VexAbiInfo*  abiinfo,
                          Bool         host_bigendian,
+                         Bool         guest_bigendian_IN,
                          Bool         sigill_diag );
 
 /* Used by the optimiser to specialise calls to helpers. */
diff --git a/VEX/priv/guest_x86_toIR.c b/VEX/priv/guest_x86_toIR.c
index 37afd97..8da303c 100644
--- a/VEX/priv/guest_x86_toIR.c
+++ b/VEX/priv/guest_x86_toIR.c
@@ -15422,6 +15422,7 @@ DisResult disInstr_X86 ( IRSB*        irsb_IN,
                          VexArchInfo* archinfo,
                          VexAbiInfo*  abiinfo,
                          Bool         host_bigendian_IN,
+                         Bool         guest_bigendian_IN,
                          Bool         sigill_diag_IN )
 {
    Int       i, x1, x2;
diff --git a/VEX/priv/host_ppc_defs.c b/VEX/priv/host_ppc_defs.c
index 7c98aeb..95e64f8 100644
--- a/VEX/priv/host_ppc_defs.c
+++ b/VEX/priv/host_ppc_defs.c
@@ -3115,24 +3115,42 @@ static UInt vregNo ( HReg v )
    return n;
 }
 
-/* Emit an instruction big-endianly */
+/* Emit an instruction ppc-endianly */
 static UChar* emit32 ( UChar* p, UInt w32 )
 {
+#if defined(VGP_ppc32_linux) || defined(VGP_ppc64be_linux)
    *p++ = toUChar((w32 >> 24) & 0x000000FF);
    *p++ = toUChar((w32 >> 16) & 0x000000FF);
    *p++ = toUChar((w32 >>  8) & 0x000000FF);
    *p++ = toUChar((w32)       & 0x000000FF);
+#elif defined(VGP_ppc64le_linux)
+   *p++ = toUChar((w32)       & 0x000000FF);
+   *p++ = toUChar((w32 >>  8) & 0x000000FF);
+   *p++ = toUChar((w32 >> 16) & 0x000000FF);
+   *p++ = toUChar((w32 >> 24) & 0x000000FF);
+#else
+   //error Unknown variant of PowerPC
+#endif
    return p;
 }
 
-/* Fetch an instruction big-endianly */
+/* Fetch an instruction ppc-endianly */
 static UInt fetch32 ( UChar* p )
 {
    UInt w32 = 0;
+#if defined(VGP_ppc32_linux) || defined(VGP_ppc64be_linux)
    w32 |= ((0xFF & (UInt)p[0]) << 24);
    w32 |= ((0xFF & (UInt)p[1]) << 16);
    w32 |= ((0xFF & (UInt)p[2]) <<  8);
    w32 |= ((0xFF & (UInt)p[3]) <<  0);
+#elif defined(VGP_ppc64le_linux)
+   w32 |= ((0xFF & (UInt)p[3]) << 24);
+   w32 |= ((0xFF & (UInt)p[2]) << 16);
+   w32 |= ((0xFF & (UInt)p[1]) <<  8);
+   w32 |= ((0xFF & (UInt)p[0]) <<  0);
+#else
+   //error Unknown variant of PowerPC
+#endif
    return w32;
 }
 
diff --git a/VEX/priv/host_ppc_isel.c b/VEX/priv/host_ppc_isel.c
index d35ea6d..eb58ba3 100644
--- a/VEX/priv/host_ppc_isel.c
+++ b/VEX/priv/host_ppc_isel.c
@@ -47,6 +47,11 @@
 /* GPR register class for ppc32/64 */
 #define HRcGPR(__mode64) (__mode64 ? HRcInt64 : HRcInt32)
 
+#if defined(VGP_ppc64le_linux)
+#define IENDIANESS   Iend_LE
+#else
+#define IENDIANESS   Iend_BE
+#endif
 
 /*---------------------------------------------------------*/
 /*--- Register Usage Conventions                        ---*/
@@ -1275,29 +1280,28 @@ static HReg mk_AvDuplicateRI( ISelEnv* env, IRExpr* e )
       r_src = ri->Pri.Reg;
    }
 
-   /* default case: store r_src in lowest lane of 16-aligned mem,
-      load vector, splat lowest lane to dst */
    {
-      /* CAB: Maybe faster to store r_src multiple times (sz dependent),
-              and simply load the vector? */
+      /* Store r_src multiple times (sz dependent); then load the dest vector. */
       HReg r_aligned16;
-      HReg v_src = newVRegV(env);
-      PPCAMode *am_off12;
+      PPCAMode *am_offset, *am_offset_zero;
 
       sub_from_sp( env, 32 );     // Move SP down
       /* Get a 16-aligned address within our stack space */
       r_aligned16 = get_sp_aligned16( env );
-      am_off12 = PPCAMode_IR( 12, r_aligned16 );
 
-      /* Store r_src in low word of 16-aligned mem */
-      addInstr(env, PPCInstr_Store( 4, am_off12, r_src, env->mode64 ));
+      Int i;
+      Int stride = (sz == 8) ? 1 : (sz == 16) ? 2 : 4;
+      UChar num_bytes_to_store = stride;
+      am_offset_zero = PPCAMode_IR( 0, r_aligned16 );
+      am_offset = am_offset_zero;
+      for (i = 0; i < 16; i+=stride, am_offset = PPCAMode_IR( i, r_aligned16)) {
+         addInstr(env, PPCInstr_Store( num_bytes_to_store, am_offset, r_src, env->mode64 ));
+      }
 
-      /* Load src to vector[low lane] */
-      addInstr(env, PPCInstr_AvLdSt( True/*ld*/, 4, v_src, am_off12 ) );
+      /* Effectively splat the r_src value to dst */
+      addInstr(env, PPCInstr_AvLdSt( True/*ld*/, 4, dst, am_offset_zero ) );
       add_to_sp( env, 32 );       // Reset SP
 
-      /* Finally, splat v_src[low_lane] to dst */
-      addInstr(env, PPCInstr_AvSplat(sz, dst, PPCVI5s_Reg(v_src)));
       return dst;
    }
 }
@@ -1382,7 +1386,7 @@ static HReg iselWordExpr_R_wrk ( ISelEnv* env, IRExpr* e )
    case Iex_Load: {
       HReg      r_dst;
       PPCAMode* am_addr;
-      if (e->Iex.Load.end != Iend_BE)
+      if (e->Iex.Load.end != IENDIANESS)
          goto irreducible;
       r_dst   = newVRegI(env);
       am_addr = iselWordExpr_AMode( env, e->Iex.Load.addr, ty/*of xfer*/ );
@@ -1828,7 +1832,7 @@ static HReg iselWordExpr_R_wrk ( ISelEnv* env, IRExpr* e )
          DECLARE_PATTERN(p_LDbe16_then_16Uto32);
          DEFINE_PATTERN(p_LDbe16_then_16Uto32,
                         unop(Iop_16Uto32,
-                             IRExpr_Load(Iend_BE,Ity_I16,bind(0))) );
+                             IRExpr_Load(IENDIANESS,Ity_I16,bind(0))) );
          if (matchIRExpr(&mi,p_LDbe16_then_16Uto32,e)) {
             HReg r_dst = newVRegI(env);
             PPCAMode* amode
@@ -2058,19 +2062,30 @@ static HReg iselWordExpr_R_wrk ( ISelEnv* env, IRExpr* e )
          HReg        r_aligned16;
          HReg        dst  = newVRegI(env);
          HReg        vec  = iselVecExpr(env, e->Iex.Unop.arg);
-         PPCAMode *am_off0, *am_off12;
+         PPCAMode *am_off0, *am_off_word0;
          sub_from_sp( env, 32 );     // Move SP down 32 bytes
 
          // get a quadword aligned address within our stack space
          r_aligned16 = get_sp_aligned16( env );
          am_off0  = PPCAMode_IR( 0, r_aligned16 );
-         am_off12 = PPCAMode_IR( 12,r_aligned16 );
+
+         /* Note that the store below (done via PPCInstr_AvLdSt) uses
+          * stvx, which stores the vector in proper LE format,
+          * with byte zero (far right byte of the register in LE format)
+          * stored at the lowest memory address.  Therefore, to obtain
+          * integer word zero, we need to use that lowest memory address
+          * as the base for the load.
+          */
+         if (IENDIANESS == Iend_LE)
+            am_off_word0 = am_off0;
+         else
+            am_off_word0 = PPCAMode_IR( 12,r_aligned16 );
 
          // store vec, load low word to dst
          addInstr(env,
                   PPCInstr_AvLdSt( False/*store*/, 16, vec, am_off0 ));
          addInstr(env,
-                  PPCInstr_Load( 4, dst, am_off12, mode64 ));
+                  PPCInstr_Load( 4, dst, am_off_word0, mode64 ));
 
          add_to_sp( env, 32 );       // Reset SP
          return dst;
@@ -2082,7 +2097,7 @@ static HReg iselWordExpr_R_wrk ( ISelEnv* env, IRExpr* e )
             HReg     r_aligned16;
             HReg     dst = newVRegI(env);
             HReg     vec = iselVecExpr(env, e->Iex.Unop.arg);
-            PPCAMode *am_off0, *am_off8;
+            PPCAMode *am_off0, *am_off8, *am_off_arg;
             sub_from_sp( env, 32 );     // Move SP down 32 bytes
 
             // get a quadword aligned address within our stack space
@@ -2090,13 +2105,24 @@ static HReg iselWordExpr_R_wrk ( ISelEnv* env, IRExpr* e )
             am_off0 = PPCAMode_IR( 0, r_aligned16 );
             am_off8 = PPCAMode_IR( 8 ,r_aligned16 );
 
-            // store vec, load low word (+8) or high (+0) to dst
+            // store vec, load low word or high to dst
             addInstr(env,
                      PPCInstr_AvLdSt( False/*store*/, 16, vec, am_off0 ));
+            if (IENDIANESS == Iend_LE) {
+               if (op_unop == Iop_V128HIto64)
+                  am_off_arg = am_off8;
+               else
+                  am_off_arg = am_off0;
+            } else {
+               if (op_unop == Iop_V128HIto64)
+                  am_off_arg = am_off0;
+               else
+                  am_off_arg = am_off8;
+            }
             addInstr(env,
                      PPCInstr_Load( 
                         8, dst, 
-                        op_unop == Iop_V128HIto64 ? am_off0 : am_off8, 
+                        am_off_arg,
                         mode64 ));
 
             add_to_sp( env, 32 );       // Reset SP
@@ -2185,7 +2211,6 @@ static HReg iselWordExpr_R_wrk ( ISelEnv* env, IRExpr* e )
          HReg        argregs[1];
          HReg        r_dst  = newVRegI(env);
          Int         argreg;
-         HWord*      fdescr;
 
          argiregs = 0;
          argreg = 0;
@@ -2196,11 +2221,18 @@ static HReg iselWordExpr_R_wrk ( ISelEnv* env, IRExpr* e )
                                      iselWordExpr_R(env, e->Iex.Unop.arg) ) );
 
          cc = mk_PPCCondCode( Pct_ALWAYS, Pcf_NONE );
-
-         fdescr = (HWord*)h_calc_BCDtoDPB;
-         addInstr(env, PPCInstr_Call( cc, (Addr64)(fdescr[0]),
+#if defined(VGP_ppc64le_linux)
+         addInstr(env, PPCInstr_Call( cc, (Addr64) h_calc_BCDtoDPB,
                                       argiregs, mk_RetLoc_simple(RLPri_Int)) );
-
+#else
+	 {
+             HWord*      fdescr;
+             fdescr = (HWord*)h_calc_BCDtoDPB;
+             addInstr(env, PPCInstr_Call( cc, (Addr64)(fdescr[0]),
+                                          argiregs,
+                                          mk_RetLoc_simple(RLPri_Int)) );
+         }
+#endif
          addInstr(env, mk_iMOVds_RR(r_dst, argregs[0]));
          return r_dst;
       }
@@ -2214,7 +2246,6 @@ static HReg iselWordExpr_R_wrk ( ISelEnv* env, IRExpr* e )
          HReg        argregs[1];
          HReg        r_dst  = newVRegI(env);
          Int         argreg;
-         HWord*      fdescr;
 
          argiregs = 0;
          argreg = 0;
@@ -2225,11 +2256,18 @@ static HReg iselWordExpr_R_wrk ( ISelEnv* env, IRExpr* e )
                                      iselWordExpr_R(env, e->Iex.Unop.arg) ) );
 
          cc = mk_PPCCondCode( Pct_ALWAYS, Pcf_NONE );
-
-         fdescr = (HWord*)h_calc_DPBtoBCD;
-         addInstr(env, PPCInstr_Call( cc, (Addr64)(fdescr[0]),
+#if defined(VGP_ppc64le_linux)
+         addInstr(env, PPCInstr_Call( cc, (Addr64)(h_calc_DPBtoBCD),
                                       argiregs, mk_RetLoc_simple(RLPri_Int) ) );
-
+#else
+         {
+             HWord*      fdescr;
+             fdescr = (HWord*)h_calc_DPBtoBCD;
+             addInstr(env, PPCInstr_Call( cc, (Addr64)(fdescr[0]),
+                                          argiregs,
+                                          mk_RetLoc_simple(RLPri_Int) ) );
+         }
+#endif
          addInstr(env, mk_iMOVds_RR(r_dst, argregs[0]));
          return r_dst;
       }
@@ -3075,7 +3113,7 @@ static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo,
    vassert(typeOfIRExpr(env->type_env,e) == Ity_I64);
 
    /* 64-bit load */
-   if (e->tag == Iex_Load && e->Iex.Load.end == Iend_BE) {
+   if (e->tag == Iex_Load && e->Iex.Load.end == IENDIANESS) {
       HReg tLo    = newVRegI(env);
       HReg tHi    = newVRegI(env);
       HReg r_addr = iselWordExpr_R(env, e->Iex.Load.addr);
@@ -3552,7 +3590,6 @@ static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo,
          HReg        tHi = newVRegI(env);
          HReg        tmpHi;
          HReg        tmpLo;
-         ULong       target;
          Bool        mode64 = env->mode64;
 
          argregs[0] = hregPPC_GPR3(mode64);
@@ -3570,11 +3607,19 @@ static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo,
          addInstr( env, mk_iMOVds_RR( argregs[argreg], tmpLo ) );
 
          cc = mk_PPCCondCode( Pct_ALWAYS, Pcf_NONE );
-         target = toUInt( Ptr_to_ULong(h_calc_BCDtoDPB ) );
-
-         addInstr( env, PPCInstr_Call( cc, (Addr64)target,
+#if defined(VGP_ppc64le_linux)
+         addInstr( env, PPCInstr_Call( cc, (Addr64)h_calc_BCDtoDPB,
                                        argiregs,
                                        mk_RetLoc_simple(RLPri_2Int) ) );
+#else
+         {
+             ULong       target;
+             target = toUInt( Ptr_to_ULong(h_calc_BCDtoDPB ) );
+             addInstr( env, PPCInstr_Call( cc, (Addr64)target,
+                                           argiregs,
+                                           mk_RetLoc_simple(RLPri_2Int) ) );
+         }
+#endif
          addInstr( env, mk_iMOVds_RR( tHi, argregs[argreg-1] ) );
          addInstr( env, mk_iMOVds_RR( tLo, argregs[argreg] ) );
 
@@ -3592,7 +3637,6 @@ static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo,
          HReg        tHi = newVRegI(env);
          HReg        tmpHi;
          HReg        tmpLo;
-         ULong       target;
          Bool        mode64 = env->mode64;
 
          argregs[0] = hregPPC_GPR3(mode64);
@@ -3611,10 +3655,18 @@ static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo,
 
          cc = mk_PPCCondCode( Pct_ALWAYS, Pcf_NONE );
 
-         target = toUInt( Ptr_to_ULong( h_calc_DPBtoBCD ) );
-
-         addInstr(env, PPCInstr_Call( cc, (Addr64)target, argiregs,
+#if defined(VGP_ppc64le_linux)
+         addInstr(env, PPCInstr_Call( cc, (Addr64)h_calc_DPBtoBCD, argiregs,
                                       mk_RetLoc_simple(RLPri_2Int) ) );
+#else
+         {
+             ULong       target;
+             target = toUInt( Ptr_to_ULong( h_calc_DPBtoBCD ) );
+             addInstr(env, PPCInstr_Call( cc, (Addr64)target, argiregs,
+                                          mk_RetLoc_simple(RLPri_2Int) ) );
+         }
+#endif
+
          addInstr(env, mk_iMOVds_RR(tHi, argregs[argreg-1]));
          addInstr(env, mk_iMOVds_RR(tLo, argregs[argreg]));
 
@@ -3664,7 +3716,7 @@ static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e )
       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
    }
 
-   if (e->tag == Iex_Load && e->Iex.Load.end == Iend_BE) {
+   if (e->tag == Iex_Load && e->Iex.Load.end == IENDIANESS) {
       PPCAMode* am_addr;
       HReg r_dst = newVRegF(env);
       vassert(e->Iex.Load.ty == Ity_F32);
@@ -3866,7 +3918,7 @@ static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
    }
 
    /* --------- LOAD --------- */
-   if (e->tag == Iex_Load && e->Iex.Load.end == Iend_BE) {
+   if (e->tag == Iex_Load && e->Iex.Load.end == IENDIANESS) {
       HReg r_dst = newVRegF(env);
       PPCAMode* am_addr;
       vassert(e->Iex.Load.ty == Ity_F64);
@@ -4127,7 +4179,7 @@ static HReg iselDfp32Expr_wrk(ISelEnv* env, IRExpr* e)
    }
 
    /* --------- LOAD --------- */
-   if (e->tag == Iex_Load && e->Iex.Load.end == Iend_BE) {
+   if (e->tag == Iex_Load && e->Iex.Load.end == IENDIANESS) {
       PPCAMode* am_addr;
       HReg r_dst = newVRegF(env);
       vassert(e->Iex.Load.ty == Ity_D32);
@@ -4182,7 +4234,7 @@ static HReg iselDfp64Expr_wrk(ISelEnv* env, IRExpr* e)
       return r_dst;
    }
 
-   if (e->tag == Iex_Load && e->Iex.Load.end == Iend_BE) {
+   if (e->tag == Iex_Load && e->Iex.Load.end == IENDIANESS) {
       PPCAMode* am_addr;
       HReg r_dst = newVRegF(env);
       vassert(e->Iex.Load.ty == Ity_D64);
@@ -4732,7 +4784,7 @@ static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e )
       return dst;
    }
 
-   if (e->tag == Iex_Load && e->Iex.Load.end == Iend_BE) {
+   if (e->tag == Iex_Load && e->Iex.Load.end == IENDIANESS) {
       PPCAMode* am_addr;
       HReg v_dst = newVRegV(env);
       vassert(e->Iex.Load.ty == Ity_V128);
@@ -4826,15 +4878,24 @@ static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e )
          /* Store zeros */
          r_zeros = newVRegI(env);
          addInstr(env, PPCInstr_LI(r_zeros, 0x0, mode64));
-         addInstr(env, PPCInstr_Store( 4, am_off0, r_zeros, mode64 ));
+         if (IENDIANESS == Iend_LE)
+            addInstr(env, PPCInstr_Store( 4, am_off0, r_src, mode64 ));
+         else
+            addInstr(env, PPCInstr_Store( 4, am_off0, r_zeros, mode64 ));
          addInstr(env, PPCInstr_Store( 4, am_off4, r_zeros, mode64 ));
          addInstr(env, PPCInstr_Store( 4, am_off8, r_zeros, mode64 ));
 
          /* Store r_src in low word of quadword-aligned mem */
-         addInstr(env, PPCInstr_Store( 4, am_off12, r_src, mode64 ));
+         if (IENDIANESS == Iend_LE)
+            addInstr(env, PPCInstr_Store( 4, am_off12, r_zeros, mode64 ));
+         else
+            addInstr(env, PPCInstr_Store( 4, am_off12, r_src, mode64 ));
 
          /* Load word into low word of quadword vector reg */
-         addInstr(env, PPCInstr_AvLdSt( True/*ld*/, 4, dst, am_off12 ));
+         if (IENDIANESS == Iend_LE)
+            addInstr(env, PPCInstr_AvLdSt( True/*ld*/, 4, dst, am_off0 ));
+         else
+            addInstr(env, PPCInstr_AvLdSt( True/*ld*/, 4, dst, am_off12 ));
 
          add_to_sp( env, 32 );       // Reset SP
          return dst;
@@ -4918,9 +4979,13 @@ static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e )
             am_off8  = PPCAMode_IR( 8,  r_aligned16 );
             
             /* Store 2*I64 to stack */
-            addInstr(env, PPCInstr_Store( 8, am_off0, rHi, mode64 ));
-            addInstr(env, PPCInstr_Store( 8, am_off8, rLo, mode64 ));
-
+            if (IENDIANESS == Iend_LE) {
+               addInstr(env, PPCInstr_Store( 8, am_off0, rLo, mode64 ));
+               addInstr(env, PPCInstr_Store( 8, am_off8, rHi, mode64 ));
+            } else {
+               addInstr(env, PPCInstr_Store( 8, am_off0, rHi, mode64 ));
+               addInstr(env, PPCInstr_Store( 8, am_off8, rLo, mode64 ));
+            }
             /* Fetch result back from stack. */
             addInstr(env, PPCInstr_AvLdSt(True/*ld*/, 16, dst, am_off0));
             
@@ -5274,7 +5339,7 @@ static void iselStmt ( ISelEnv* env, IRStmt* stmt )
       IRType    tyd   = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
       IREndness end   = stmt->Ist.Store.end;
 
-      if (end != Iend_BE)
+      if (end != IENDIANESS)
          goto stmt_fail;
       if (!mode64 && (tya != Ity_I32))
          goto stmt_fail;
@@ -5532,7 +5597,7 @@ static void iselStmt ( ISelEnv* env, IRStmt* stmt )
       IRType tyRes  = typeOfIRTemp(env->type_env, res);
       IRType tyAddr = typeOfIRExpr(env->type_env, stmt->Ist.LLSC.addr);
 
-      if (stmt->Ist.LLSC.end != Iend_BE)
+      if (stmt->Ist.LLSC.end != IENDIANESS)
          goto stmt_fail;
       if (!mode64 && (tyAddr != Ity_I32))
          goto stmt_fail;
diff --git a/VEX/priv/main_main.c b/VEX/priv/main_main.c
index 495942a..c02c425 100644
--- a/VEX/priv/main_main.c
+++ b/VEX/priv/main_main.c
@@ -377,7 +377,15 @@ VexTranslateResult LibVEX_Translate ( VexTranslateArgs* vta )
          emit        = (Int(*)(Bool*,UChar*,Int,HInstr*,Bool,
                                void*,void*,void*,void*))
                        emit_PPCInstr;
+#if defined(VGP_ppc64be_linux)
          host_is_bigendian = True;
+#elif defined(VGP_ppc64le_linux)
+         host_is_bigendian = False;
+#else
+	 /* unknown varient of PPC64 */
+	 vex_printf("ERROR: call to getUIntPPCendianly(); Unknown variant of PowerPC\n");
+#endif
+
          host_word_type    = Ity_I64;
          vassert(are_valid_hwcaps(VexArchPPC64, vta->archinfo_host.hwcaps));
          break;
diff --git a/coregrind/launcher-darwin.c b/coregrind/launcher-darwin.c
index 1f99026..8449285 100644
--- a/coregrind/launcher-darwin.c
+++ b/coregrind/launcher-darwin.c
@@ -64,6 +64,7 @@ static struct {
    { CPU_TYPE_ARM,         "arm",     "arm" },
    { CPU_TYPE_POWERPC,     "ppc",     "ppc32" },
    { CPU_TYPE_POWERPC64BE, "ppc64be", "ppc64be" },
+   { CPU_TYPE_POWERPC64LE, "ppc64le", "ppc64le" },
 };
 static int valid_archs_count = sizeof(valid_archs)/sizeof(valid_archs[0]);
 
diff --git a/coregrind/launcher-linux.c b/coregrind/launcher-linux.c
index 38e4857..4f6b274 100644
--- a/coregrind/launcher-linux.c
+++ b/coregrind/launcher-linux.c
@@ -228,6 +228,10 @@ static const char *select_platform(const char *clientname)
                 (ehdr->e_ident[EI_OSABI] == ELFOSABI_SYSV ||
                  ehdr->e_ident[EI_OSABI] == ELFOSABI_LINUX)) {
                platform = "arm64-linux";
+            } else if (ehdr->e_machine == EM_PPC64 &&
+                (ehdr->e_ident[EI_OSABI] == ELFOSABI_SYSV ||
+                 ehdr->e_ident[EI_OSABI] == ELFOSABI_LINUX)) {
+               platform = "ppc64le-linux";
             }
          } else if (header[EI_DATA] == ELFDATA2MSB) {
 #           if !defined(VGPV_arm_linux_android) \
@@ -321,6 +325,7 @@ int main(int argc, char** argv, char** envp)
        (0==strcmp(VG_PLATFORM,"amd64-linux"))  ||
        (0==strcmp(VG_PLATFORM,"ppc32-linux"))  ||
        (0==strcmp(VG_PLATFORM,"ppc64be-linux"))  ||
+       (0==strcmp(VG_PLATFORM,"ppc64le-linux"))  ||
        (0==strcmp(VG_PLATFORM,"arm-linux"))    ||
        (0==strcmp(VG_PLATFORM,"arm64-linux"))  ||
        (0==strcmp(VG_PLATFORM,"s390x-linux"))  ||
diff --git a/coregrind/m_coredump/coredump-elf.c b/coregrind/m_coredump/coredump-elf.c
index 08ddddd..cba4204 100644
--- a/coregrind/m_coredump/coredump-elf.c
+++ b/coregrind/m_coredump/coredump-elf.c
@@ -343,6 +343,27 @@ static void fill_prstatus(const ThreadState *tst,
    regs->dsisr = 0;
    regs->result = 0;
 
+#elif defined(VGP_ppc64le_linux)
+#  define DO(n)  regs->gpr[n] = arch->vex.guest_GPR##n
+   DO(0);  DO(1);  DO(2);  DO(3);  DO(4);  DO(5);  DO(6);  DO(7);
+   DO(8);  DO(9);  DO(10); DO(11); DO(12); DO(13); DO(14); DO(15);
+   DO(16); DO(17); DO(18); DO(19); DO(20); DO(21); DO(22); DO(23);
+   DO(24); DO(25); DO(26); DO(27); DO(28); DO(29); DO(30); DO(31);
+#  undef DO
+
+   regs->nip = arch->vex.guest_CIA;
+   regs->msr = 0xf033;   /* pretty arbitrary */
+   regs->orig_gpr3 = arch->vex.guest_GPR3;
+   regs->ctr = arch->vex.guest_CTR;
+   regs->link = arch->vex.guest_LR;
+   regs->xer = LibVEX_GuestPPC64_get_XER( &((ThreadArchState*)arch)->vex );
+   regs->ccr = LibVEX_GuestPPC64_get_CR( &((ThreadArchState*)arch)->vex );
+   /* regs->mq = 0; */
+   regs->trap = 0;
+   regs->dar = 0; /* should be fault address? */
+   regs->dsisr = 0;
+   regs->result = 0;
+
 #elif defined(VGP_arm_linux)
    regs->ARM_r0   = arch->vex.guest_R0;
    regs->ARM_r1   = arch->vex.guest_R1;
diff --git a/coregrind/m_debuginfo/debuginfo.c b/coregrind/m_debuginfo/debuginfo.c
index 421ccf3..923636f 100644
--- a/coregrind/m_debuginfo/debuginfo.c
+++ b/coregrind/m_debuginfo/debuginfo.c
@@ -1,4 +1,5 @@
 
+
 /*--------------------------------------------------------------------*/
 /*--- Top level management of symbols and debugging information.   ---*/
 /*---                                                  debuginfo.c ---*/
@@ -4079,6 +4080,7 @@ void VG_(DebugInfo_syms_getidx) ( const DebugInfo *si,
                                         Int idx,
                                   /*OUT*/Addr*    avma,
                                   /*OUT*/Addr*    tocptr,
+                                  /*OUT*/Addr*    second_ep,
                                   /*OUT*/UInt*    size,
                                   /*OUT*/HChar**  pri_name,
                                   /*OUT*/HChar*** sec_names,
@@ -4088,6 +4090,7 @@ void VG_(DebugInfo_syms_getidx) ( const DebugInfo *si,
    vg_assert(idx >= 0 && idx < si->symtab_used);
    if (avma)      *avma      = si->symtab[idx].addr;
    if (tocptr)    *tocptr    = si->symtab[idx].tocptr;
+   if (second_ep) *second_ep = si->symtab[idx].second_ep;
    if (size)      *size      = si->symtab[idx].size;
    if (pri_name)  *pri_name  = si->symtab[idx].pri_name;
    if (sec_names) *sec_names = (HChar **)si->symtab[idx].sec_names; // FIXME
diff --git a/coregrind/m_debuginfo/priv_storage.h b/coregrind/m_debuginfo/priv_storage.h
index 63328ef..d485517 100644
--- a/coregrind/m_debuginfo/priv_storage.h
+++ b/coregrind/m_debuginfo/priv_storage.h
@@ -71,7 +71,8 @@
 typedef 
    struct { 
       Addr    addr;    /* lowest address of entity */
-      Addr    tocptr;  /* ppc64-linux only: value that R2 should have */
+      Addr    tocptr;  /* ppc64be-linux only: value that R2 should have */
+      Addr    second_ep; /* address for secondary entry point, ppc64le */
       HChar*  pri_name;  /* primary name, never NULL */
       HChar** sec_names; /* NULL, or a NULL term'd array of other names */
       // XXX: this could be shrunk (on 32-bit platforms) by using 30
@@ -791,7 +792,7 @@ struct _DebugInfo {
    Bool   gotplt_present;
    Addr   gotplt_avma;
    SizeT  gotplt_size;
-   /* .opd -- needed on ppc64-linux for finding symbols */
+   /* .opd -- needed on ppc64be-linux for finding symbols */
    Bool   opd_present;
    Addr   opd_avma;
    SizeT  opd_size;
diff --git a/coregrind/m_debuginfo/readelf.c b/coregrind/m_debuginfo/readelf.c
index 21a42cb..d8fb8d6 100644
--- a/coregrind/m_debuginfo/readelf.c
+++ b/coregrind/m_debuginfo/readelf.c
@@ -240,7 +240,11 @@ Bool get_elf_symbol_info (
         Bool*   from_opd_out,   /* ppc64be-linux only: did we deref an
                                   .opd entry? */
         Bool*   is_text_out,    /* is this a text symbol? */
-        Bool*   is_ifunc        /* is this a  STT_GNU_IFUNC function ?*/
+        Bool*   is_ifunc,       /* is this a  STT_GNU_IFUNC function ?*/
+        Addr*   sym_second_ep   /* addr for second entry point.  PPC64 LE
+                                   supports a local and global entry point.
+                                   Use this value to return the local
+                                   entry point. */
      )
 {
    Bool plausible;
@@ -258,6 +262,8 @@ Bool get_elf_symbol_info (
    *sym_tocptr_out    = 0; /* unknown/inapplicable */
    *from_opd_out      = False;
    *is_ifunc          = False;
+   *sym_second_ep     = 0; /* unknown/inapplicable */
+
    /* Get the symbol size, but restrict it to fit in a signed 32 bit
       int.  Also, deal with the stupid case of negative size by making
       the size be 1.  Note that sym->st_size has type UWord,
@@ -670,23 +676,65 @@ Bool get_elf_symbol_info (
    }
 
 #  if defined(VGP_ppc64be_linux)
-   /* It's crucial that we never add symbol addresses in the .opd
-      section.  This would completely mess up function redirection and
-      intercepting.  This assert ensures that any symbols that make it
-      into the symbol table on ppc64-linux don't point into .opd. */
    if (di->opd_present && di->opd_size > 0) {
       vg_assert(*sym_avma_out + *sym_size_out <= di->opd_avma
                 || *sym_avma_out >= di->opd_avma + di->opd_size);
    }
+#endif
+
+#  if defined(VGP_ppc64le_linux)
+   /* PPC64 LE ABI uses three bits in the st_other field to indicate the number
+    * of instructions between the function's global and local entry points. An
+    * offset of 0 indicates that there is one entry point.  The value must be:
+    *
+    * 0  - one entry point, local and global are the same
+    * 1  - reserved
+    * 2  - local entry point is one instruction after the global entry point
+    * 3  - local entry point is two instructions after the global entry point
+    * 4  - local entry point is four instructions after the global entry point
+    * 5  - local entry point is eight instructions after the global entry point
+    * 6  - local entry point is sixteen two instructions after the global entry point
+    * 7  - reserved
+    *
+    *  The extract the three bit field from the other field.
+    *        (other_field & STO_PPC64_LOCAL_MASK) >> STO_PPC_LOCAL_BIT
+    *
+    *  where the #define values are given in include/elf/powerpc.h file for
+    *  the PPC binutils.
+    *
+    * coversion of the three bit field to bytes is given by
+    *
+    *       ((1 << bit_field) >> 2) << 2
+    */
+
+   #define STO_PPC64_LOCAL_BIT             5
+   #define STO_PPC64_LOCAL_MASK            (7 << STO_PPC64_LOCAL_BIT)
+   {
+      unsigned int bit_field, dist_to_local_entry;
+      /* extract the other filed */
+      bit_field = (sym->st_other & STO_PPC64_LOCAL_MASK) >> STO_PPC64_LOCAL_BIT;
+
+      if ((bit_field > 0) && (bit_field < 7)) {
+         /* store the local entry point address */
+         dist_to_local_entry = ((1 << bit_field) >> 2) << 2;
+         *sym_second_ep = *sym_avma_out + dist_to_local_entry;
+
+         if (TRACE_SYMTAB_ENABLED) {
+            HChar* sym_name = ML_(img_strdup)(escn_strtab->img,
+                                             "di.gesi.5", sym_name_ioff);
+            VG_(printf)("Second entry point: %s at %#010x\n",
+			sym_name, (unsigned int)*sym_second_ep);
+         }
+      }
+   }
 #  endif
 
    /* Acquire! */
    return True;
 }
 
-
 /* Read an ELF symbol table (normal or dynamic).  This one is for the
-   "normal" case ({x86,amd64,ppc32,arm,mips32,mips64}-linux). */
+   "normal" case ({x86,amd64,ppc32,arm,mips32,mips64, ppc64le}-linux). */
 static
 __attribute__((unused)) /* not referred to on all targets */
 void read_elf_symtab__normal( 
@@ -725,6 +773,7 @@ void read_elf_symtab__normal(
       Addr   sym_avma_really = 0;
       Int    sym_size = 0;
       Addr   sym_tocptr = 0;
+      Addr   sym_second_ep = 0;
       Bool   from_opd = False, is_text = False, is_ifunc = False;
       DiOffT sym_name_really = DiOffT_INVALID;
       if (get_elf_symbol_info(di, &sym, sym_name, escn_strtab, 
@@ -734,7 +783,8 @@ void read_elf_symtab__normal(
                               &sym_avma_really,
                               &sym_size,
                               &sym_tocptr,
-                              &from_opd, &is_text, &is_ifunc)) {
+                              &from_opd, &is_text, &is_ifunc,
+                              &sym_second_ep)) {
 
          DiSym  disym;
          VG_(memset)(&disym, 0, sizeof(disym));
@@ -742,6 +792,9 @@ void read_elf_symtab__normal(
                                        "di.res__n.1", sym_name_really);
          disym.addr      = sym_avma_really;
          disym.tocptr    = sym_tocptr;
+#  if defined(VGP_ppc64le_linux)
+         disym.second_ep = sym_second_ep;
+#endif
          disym.pri_name  = ML_(addStr) ( di, cstr, -1 );
          disym.sec_names = NULL;
          disym.size      = sym_size;
@@ -749,7 +802,7 @@ void read_elf_symtab__normal(
          disym.isIFunc   = is_ifunc;
          if (cstr) { ML_(dinfo_free)(cstr); cstr = NULL; }
          vg_assert(disym.pri_name);
-         vg_assert(disym.tocptr == 0); /* has no role except on ppc64-linux */
+         vg_assert(disym.tocptr == 0); /* has no role except on ppc64be-linux */
          ML_(addSym) ( di, &disym );
 
          if (TRACE_SYMTAB_ENABLED) {
@@ -761,6 +814,10 @@ void read_elf_symtab__normal(
                          (Int)disym.size,
                          (HChar*)disym.pri_name
             );
+	    if (sym_second_ep != 0) {
+               TRACE_SYMTAB("               second entry point %#010lx\n",
+                            sym_second_ep)
+	    }
          }
 
       }
@@ -856,6 +913,7 @@ void read_elf_symtab__ppc64be_linux(
       Addr   sym_avma_really = 0;
       Int    sym_size = 0;
       Addr   sym_tocptr = 0;
+      Addr   sym_second_ep = 0;
       Bool   from_opd = False, is_text = False, is_ifunc = False;
       DiOffT sym_name_really = DiOffT_INVALID;
       DiSym  disym;
@@ -867,7 +925,8 @@ void read_elf_symtab__ppc64be_linux(
                               &sym_avma_really,
                               &sym_size,
                               &sym_tocptr,
-                              &from_opd, &is_text, &is_ifunc)) {
+                              &from_opd, &is_text, &is_ifunc,
+                              &sym_second_ep)) {
 
          /* Check if we've seen this (name,addr) key before. */
          key.addr = sym_avma_really;
@@ -2776,6 +2835,7 @@ Bool ML_(read_elf_debug_info) ( struct _DebugInfo* di )
 #     if !defined(VGP_amd64_linux) \
          && !defined(VGP_s390x_linux) \
          && !defined(VGP_ppc64be_linux) \
+         && !defined(VGP_ppc64le_linux) \
          && !defined(VGPV_arm_linux_android) \
          && !defined(VGPV_x86_linux_android) \
          && !defined(VGP_mips64_linux)
diff --git a/coregrind/m_debuginfo/readmacho.c b/coregrind/m_debuginfo/readmacho.c
index 9926237..95b665c 100644
--- a/coregrind/m_debuginfo/readmacho.c
+++ b/coregrind/m_debuginfo/readmacho.c
@@ -199,6 +199,8 @@ static DiSlice map_image_aboard ( DebugInfo* di, /* only for err msgs */
          Int cputype = CPU_TYPE_POWERPC;
 #        elif defined(VGA_ppc64be)
          Int cputype = CPU_TYPE_POWERPC64BE;
+#        elif defined(VGA_ppc64le)
+         Int cputype = CPU_TYPE_POWERPC64LE;
 #        elif defined(VGA_x86)
          Int cputype = CPU_TYPE_X86;
 #        elif defined(VGA_amd64)
diff --git a/coregrind/m_dispatch/dispatch-ppc64-linux.S b/coregrind/m_dispatch/dispatch-ppc64-linux.S
index 35cefdf..0fadef6 100644
--- a/coregrind/m_dispatch/dispatch-ppc64-linux.S
+++ b/coregrind/m_dispatch/dispatch-ppc64-linux.S
@@ -35,6 +35,15 @@
 #include "pub_core_transtab_asm.h"
 #include "libvex_guest_offsets.h"	/* for OFFSET_ppc64_CIA */
 
+/* NOTE: PPC64 supports Big Endian and Little Endian.  It also supports the
+	ELF version 1 and ELF version 2 APIs.
+
+	Currently LE uses ELF version 2 and BE uses ELF version 1.  However,
+	BE and LE may support the other ELF version in the future.  So, the
+	_CALL_ELF is used in the assembly function to enable code for a
+	specific ELF version independently of the Enianess of the machine.
+	The test "#if  _CALL_ELF == 2" checks if ELF version 2 is being used.
+*/
 
 /* References to globals via the TOC */
 
@@ -74,14 +83,26 @@ void VG_(disp_run_translations)( UWord* two_words,
 .section ".text"
 .align   2
 .globl   VG_(disp_run_translations)
+#if !defined VGP_ppc64be_linux || _CALL_ELF == 2
+.type VG_(disp_run_translations),@function
+VG_(disp_run_translations):
+.type    .VG_(disp_run_translations),@function
+#else
 .section ".opd","aw"
 .align   3
 VG_(disp_run_translations):
 .quad    .VG_(disp_run_translations),.TOC.@tocbase,0
 .previous
 .type    .VG_(disp_run_translations),@function
+#endif
 .globl   .VG_(disp_run_translations)
 .VG_(disp_run_translations):
+#if  _CALL_ELF == 2
+0:      addis 2, 12,.TOC.-0b@ha
+        addi  2,2,.TOC.-0b@l
+        .localentry VG_(disp_run_translations), .-VG_(disp_run_translations)
+#endif
+
 	/* r3 holds two_words */
 	/* r4 holds guest_state */
         /* r5 holds host_addr */
@@ -228,8 +249,13 @@ VG_(disp_run_translations):
         /* make a stack frame for the code we are calling */
         stdu    1,-48(1)
 
-        /* Set up the guest state ptr */
+	/* Set up the guest state ptr */
         mr      31,4      /* r31 (generated code gsp) = r4 */
+#if  _CALL_ELF == 2
+/*  for the LE ABI need to setup r2 and r12 */
+0:      addis 2, 12,.TOC.-0b@ha
+        addi  2,2,.TOC.-0b@l
+#endif
 
         /* and jump into the code cache.  Chained translations in
            the code cache run, until for whatever reason, they can't
@@ -384,6 +410,9 @@ VG_(disp_run_translations):
         mtlr    0
         addi    1,1,624   /* stack_size */
         blr
+#if !defined VGP_ppc64be_linux || _CALL_ELF == 2
+	.size VG_(disp_run_translations),.-VG_(disp_run_translations)
+#endif
 
 
 /*----------------------------------------------------*/
@@ -394,15 +423,25 @@ VG_(disp_run_translations):
         .section ".text"
         .align   2
         .globl   VG_(disp_cp_chain_me_to_slowEP)
-        .section ".opd","aw"
+#if !defined VGP_ppc64be_linux || _CALL_ELF == 2
+        .type VG_(disp_cp_chain_me_to_slowEP),@function
+	VG_(disp_cp_chain_me_to_slowEP):
+#else
+	.section ".opd","aw"
         .align   3
 VG_(disp_cp_chain_me_to_slowEP):
         .quad    .VG_(disp_cp_chain_me_to_slowEP),.TOC.@tocbase,0
         .previous
+#endif
         .type    .VG_(disp_cp_chain_me_to_slowEP),@function
         .globl   .VG_(disp_cp_chain_me_to_slowEP)
 .VG_(disp_cp_chain_me_to_slowEP):
-        /* We got called.  The return address indicates
+#if  _CALL_ELF == 2
+0:      addis 2, 12,.TOC.-0b@ha
+        addi  2,2,.TOC.-0b@l
+        .localentry VG_(disp_cp_chain_me_to_slowEP), .-VG_(disp_cp_chain_me_to_slowEP)
+#endif
+	/* We got called.  The return address indicates
            where the patching needs to happen.  Collect
            the return address and, exit back to C land,
            handing the caller the pair (Chain_me_S, RA) */
@@ -414,20 +453,33 @@ VG_(disp_cp_chain_me_to_slowEP):
         */
         subi 7,7,20+4+4
         b    .postamble
+#if !defined VGP_ppc64be_linux || _CALL_ELF == 2
+        .size VG_(disp_cp_chain_me_to_slowEP),.-VG_(disp_cp_chain_me_to_slowEP)
+#endif
 
 /* ------ Chain me to fast entry point ------ */
         .section ".text"
         .align   2
         .globl   VG_(disp_cp_chain_me_to_fastEP)
-        .section ".opd","aw"
+#if !defined VGP_ppc64be_linux || _CALL_ELF == 2
+        .type VG_(disp_cp_chain_me_to_fastEP),@function
+VG_(disp_cp_chain_me_to_fastEP):
+#else
+	.section ".opd","aw"
         .align   3
 VG_(disp_cp_chain_me_to_fastEP):
         .quad    .VG_(disp_cp_chain_me_to_fastEP),.TOC.@tocbase,0
         .previous
+#endif
         .type    .VG_(disp_cp_chain_me_to_fastEP),@function
         .globl   .VG_(disp_cp_chain_me_to_fastEP)
 .VG_(disp_cp_chain_me_to_fastEP):
-        /* We got called.  The return address indicates
+#if  _CALL_ELF == 2
+0:      addis 2, 12,.TOC.-0b@ha
+        addi  2,2,.TOC.-0b@l
+        .localentry VG_(disp_cp_chain_me_to_fastEP), .-VG_(disp_cp_chain_me_to_fastEP)
+#endif
+	/* We got called.  The return address indicates
            where the patching needs to happen.  Collect
            the return address and, exit back to C land,
            handing the caller the pair (Chain_me_S, RA) */
@@ -439,20 +491,33 @@ VG_(disp_cp_chain_me_to_fastEP):
         */
         subi 7,7,20+4+4
         b    .postamble
+#if !defined VGP_ppc64be_linux || _CALL_ELF == 2
+        .size VG_(disp_cp_chain_me_to_fastEP),.-VG_(disp_cp_chain_me_to_fastEP)
+#endif
 
 /* ------ Indirect but boring jump ------ */
         .section ".text"
         .align   2
         .globl   VG_(disp_cp_xindir)
-        .section ".opd","aw"
+#if !defined VGP_ppc64be_linux || _CALL_ELF == 2
+        .type VG_(disp_cp_xindir),@function
+VG_(disp_cp_xindir):
+#else
+	.section ".opd","aw"
         .align   3
 VG_(disp_cp_xindir):
         .quad    .VG_(disp_cp_xindir),.TOC.@tocbase,0
         .previous
+#endif
         .type    .VG_(disp_cp_xindir),@function
         .globl   .VG_(disp_cp_xindir)
 .VG_(disp_cp_xindir):
-        /* Where are we going? */
+#if  _CALL_ELF == 2
+0:      addis 2, 12,.TOC.-0b@ha
+        addi  2,2,.TOC.-0b@l
+        .localentry VG_(disp_cp_xindir), .-VG_(disp_cp_xindir)
+#endif
+	/* Where are we going? */
         ld      3,OFFSET_ppc64_CIA(31)
 
         /* stats only */
@@ -478,6 +543,9 @@ VG_(disp_cp_xindir):
         /* Found a match.  Jump to .host. */
         mtctr   7
         bctr
+#if !defined VGP_ppc64be_linux || _CALL_ELF == 2
+        .size VG_(disp_cp_xindir),.-VG_(disp_cp_xindir)
+#endif
 
 .fast_lookup_failed:
         /* stats only */
@@ -495,36 +563,61 @@ VG_(disp_cp_xindir):
 .section ".text"
         .align   2
         .globl   VG_(disp_cp_xassisted)
-        .section ".opd","aw"
+#if !defined VGP_ppc64be_linux || _CALL_ELF == 2
+        .type VG_(disp_cp_xassisted),@function
+VG_(disp_cp_xassisted):
+#else
+	.section ".opd","aw"
         .align   3
 VG_(disp_cp_xassisted):
         .quad    .VG_(disp_cp_xassisted),.TOC.@tocbase,0
         .previous
-        .type    .VG_(disp_cp_xassisted),@function
+#endif
+#if  _CALL_ELF == 2
+0:      addis 2, 12,.TOC.-0b@ha
+        addi  2,2,.TOC.-0b@l
+        .localentry VG_(disp_cp_xassisted), .-VG_(disp_cp_xassisted)
+#endif
+	.type    .VG_(disp_cp_xassisted),@function
         .globl   .VG_(disp_cp_xassisted)
 .VG_(disp_cp_xassisted):
         /* r31 contains the TRC */
         mr      6,31
         li      7,0
         b       .postamble
+#if !defined VGP_ppc64be_linux || _CALL_ELF == 2
+        .size VG_(disp_cp_xassisted),.-VG_(disp_cp_xassisted)
+#endif
 
 /* ------ Event check failed ------ */
         .section ".text"
         .align   2
         .globl   VG_(disp_cp_evcheck_fail)
-        .section ".opd","aw"
+#if !defined VGP_ppc64be_linux || _CALL_ELF == 2
+        .type VG_(disp_cp_evcheck_fail),@function
+VG_(disp_cp_evcheck_fail):
+#else
+	.section ".opd","aw"
         .align   3
 VG_(disp_cp_evcheck_fail):
         .quad    .VG_(disp_cp_evcheck_fail),.TOC.@tocbase,0
         .previous
+#endif
+#if  _CALL_ELF == 2
+0:      addis 2, 12,.TOC.-0b@ha
+        addi  2,2,.TOC.-0b@l
+        .localentry VG_(disp_cp_evcheck_fail), .-VG_(disp_cp_evcheck_fail)
+#endif
         .type    .VG_(disp_cp_evcheck_fail),@function
         .globl   .VG_(disp_cp_evcheck_fail)
 .VG_(disp_cp_evcheck_fail):
         li      6,VG_TRC_INNER_COUNTERZERO
         li      7,0
         b       .postamble
+#if !defined VGP_ppc64be_linux || _CALL_ELF == 2
+       .size VG_(disp_cp_evcheck_fail),.-VG_(disp_cp_evcheck_fail)
+#endif
 
-        
 .size .VG_(disp_run_translations), .-.VG_(disp_run_translations)
 
 /* Let the linker know we don't need an executable stack */
diff --git a/coregrind/m_initimg/initimg-linux.c b/coregrind/m_initimg/initimg-linux.c
index b4c6d92..400fa1b 100644
--- a/coregrind/m_initimg/initimg-linux.c
+++ b/coregrind/m_initimg/initimg-linux.c
@@ -742,7 +742,7 @@ Addr setup_client_stack( void*  init_sp,
             break;
 
 #        if !defined(VGP_ppc32_linux) && !defined(VGP_ppc64be_linux) \
-            && !defined(VGP_ppc64le)
+            && !defined(VGP_ppc64le_linux)
          case AT_SYSINFO_EHDR: {
             /* Trash this, because we don't reproduce it */
             const NSegment* ehdrseg = VG_(am_find_nsegment)((Addr)auxv->u.a_ptr);
@@ -1043,6 +1043,9 @@ void VG_(ii_finalise_image)( IIFinaliseImageInfo iifii )
    arch->vex.guest_GPR1 = iifii.initial_client_SP;
    arch->vex.guest_GPR2 = iifii.initial_client_TOC;
    arch->vex.guest_CIA  = iifii.initial_client_IP;
+#if defined(VGP_ppc64le_linux)
+   arch->vex.guest_GPR12 = iifii.initial_client_IP;
+#endif
 
 #  elif defined(VGP_arm_linux)
    /* Zero out the initial state, and set up the simulated FPU in a
diff --git a/coregrind/m_libcsetjmp.c b/coregrind/m_libcsetjmp.c
index 488e8e8..df56e3b 100644
--- a/coregrind/m_libcsetjmp.c
+++ b/coregrind/m_libcsetjmp.c
@@ -158,7 +158,6 @@ __asm__(
 ".align 2"                          "\n"
 ".p2align 4,,15"                    "\n"
 ".globl VG_MINIMAL_SETJMP"          "\n"
-
 ".section \".opd\",\"aw\""          "\n"
 ".align 3"                          "\n"
 "VG_MINIMAL_SETJMP:"                "\n"
@@ -267,9 +266,114 @@ __asm__(
 ""       "\n"
 
 ".previous"  "\n"
-".previous"  "\n"
 );
 
+#elif defined(VGP_ppc64le_linux)
+__asm__(
+".section \".toc\",\"aw\""          "\n"
+
+".section \".text\""                "\n"
+".align 2"                          "\n"
+".p2align 4,,15"                    "\n"
+".globl VG_MINIMAL_SETJMP"          "\n"
+".type VG_MINIMAL_SETJMP,@function" "\n"
+"VG_MINIMAL_SETJMP:"                "\n"
+"       .localentry VG_MINIMAL_SETJMP, .-VG_MINIMAL_SETJMP" "\n"
+"        std     0, 0(3)"  "\n"
+"        std     1, 8(3)"  "\n"
+"        std     2, 16(3)"  "\n"
+"        std     3, 24(3)"  "\n"
+"        std     4, 32(3)"  "\n"
+"        std     5, 40(3)"  "\n"
+"        std     6, 48(3)"  "\n"
+"        std     7, 56(3)"  "\n"
+"        std     8, 64(3)"  "\n"
+"        std     9, 72(3)"  "\n"
+"        std     10, 80(3)"  "\n"
+"        std     11, 88(3)"  "\n"
+"        std     12, 96(3)"  "\n"
+"        std     13, 104(3)"  "\n"
+"        std     14, 112(3)"  "\n"
+"        std     15, 120(3)"  "\n"
+"        std     16, 128(3)"  "\n"
+"        std     17, 136(3)"  "\n"
+"        std     18, 144(3)"  "\n"
+"        std     19, 152(3)"  "\n"
+"        std     20, 160(3)"  "\n"
+"        std     21, 168(3)"  "\n"
+"        std     22, 176(3)"  "\n"
+"        std     23, 184(3)"  "\n"
+"        std     24, 192(3)"  "\n"
+"        std     25, 200(3)"  "\n"
+"        std     26, 208(3)"  "\n"
+"        std     27, 216(3)"  "\n"
+"        std     28, 224(3)"  "\n"
+"        std     29, 232(3)"  "\n"
+"        std     30, 240(3)"  "\n"
+"        std     31, 248(3)"  "\n"
+// must use a caller-save register here as scratch, hence r4
+"        mflr    4"  "\n"
+"        std     4, 256(3)"  "\n"
+"        mfcr    4"  "\n"
+"        std     4, 264(3)"  "\n"
+"        li      3, 0"  "\n"
+"        blr"  "\n"
+""       "\n"
+
+
+".globl VG_MINIMAL_LONGJMP"                "\n"
+".type   VG_MINIMAL_LONGJMP, @function"    "\n"
+"VG_MINIMAL_LONGJMP:"                      "\n"
+"        .localentry VG_MINIMAL_LONGJMP, .-VG_MINIMAL_LONGJMP" "\n"
+         // do r4 = 1
+         // and park it in the restore slot for r3 (the ret reg)
+"        li      4, 1"  "\n"
+"        std     4, 24(3)"  "\n"
+         // restore everything except r3
+         // then r3 last of all
+         // then blr
+"        ld      0, 256(3)"  "\n"
+"        mtlr    0"  "\n"
+"        ld      0, 264(3)"  "\n"
+"        mtcr    0"  "\n"
+"        ld      0, 0(3)"  "\n"
+"        ld      1, 8(3)"  "\n"
+"        ld      2, 16(3)"  "\n"
+         // r3 is done at the end
+"        ld      4, 32(3)"  "\n"
+"        ld      5, 40(3)"  "\n"
+"        ld      6, 48(3)"  "\n"
+"        ld      7, 56(3)"  "\n"
+"        ld      8, 64(3)"  "\n"
+"        ld      9, 72(3)"  "\n"
+"        ld      10, 80(3)"  "\n"
+"        ld      11, 88(3)"  "\n"
+"        ld      12, 96(3)"  "\n"
+"        ld      13, 104(3)"  "\n"
+"        ld      14, 112(3)"  "\n"
+"        ld      15, 120(3)"  "\n"
+"        ld      16, 128(3)"  "\n"
+"        ld      17, 136(3)"  "\n"
+"        ld      18, 144(3)"  "\n"
+"        ld      19, 152(3)"  "\n"
+"        ld      20, 160(3)"  "\n"
+"        ld      21, 168(3)"  "\n"
+"        ld      22, 176(3)"  "\n"
+"        ld      23, 184(3)"  "\n"
+"        ld      24, 192(3)"  "\n"
+"        ld      25, 200(3)"  "\n"
+"        ld      26, 208(3)"  "\n"
+"        ld      27, 216(3)"  "\n"
+"        ld      28, 224(3)"  "\n"
+"        ld      29, 232(3)"  "\n"
+"        ld      30, 240(3)"  "\n"
+"        ld      31, 248(3)"  "\n"
+"        ld      3, 24(3)"  "\n"
+"        blr"               "\n"
+""       "\n"
+
+".previous"  "\n"
+);
 #endif /* VGP_ppc64be_linux */
 
 
diff --git a/coregrind/m_main.c b/coregrind/m_main.c
index 9e2ee96..082dc4f 100644
--- a/coregrind/m_main.c
+++ b/coregrind/m_main.c
@@ -2625,6 +2625,10 @@ static void final_tidyup(ThreadId tid)
    VG_(set_IP)(tid, __libc_freeres_wrapper);
 #  if defined(VGP_ppc64be_linux)
    VG_(threads)[tid].arch.vex.guest_GPR2 = r2;
+#  elif  defined(VGP_ppc64le_linux)
+   /* setting GPR2 but not really needed, GPR12 is needed */
+   VG_(threads)[tid].arch.vex.guest_GPR2  = __libc_freeres_wrapper;
+   VG_(threads)[tid].arch.vex.guest_GPR12 = __libc_freeres_wrapper;
 #  endif
    /* mips-linux note: we need to set t9 */
 #  if defined(VGP_mips32_linux) || defined(VGP_mips64_linux)
@@ -2866,6 +2870,51 @@ asm("\n"
     "\tnop\n"
     "\ttrap\n"
 );
+#elif defined(VGP_ppc64le_linux)
+/* Little Endian uses ELF version 2 but in the future may also
+ * support other ELF versions.
+ */
+asm("\n"
+    "\t.align 2\n"
+    "\t.global _start\n"
+    "\t.type _start,@function\n"
+    "_start:\n"
+    "#if _CALL_ELF == 2    \n"
+    "0:  addis        2,12,.TOC.-0b@ha\n"
+    "    addi         2,2,.TOC.-0b@l\n"
+    "    .localentry  _start, .-_start\n"
+    "#endif \n"
+    /* set up the new stack in r16 */
+    "\tlis  16,   vgPlain_interim_stack@highest\n"
+    "\tori  16,16,vgPlain_interim_stack@higher\n"
+    "\tsldi 16,16,32\n"
+    "\toris 16,16,vgPlain_interim_stack@h\n"
+    "\tori  16,16,vgPlain_interim_stack@l\n"
+    "\txor  17,17,17\n"
+    "\tlis    17,("VG_STRINGIFY(VG_STACK_GUARD_SZB)" >> 16)\n"
+    "\tori 17,17,("VG_STRINGIFY(VG_STACK_GUARD_SZB)" & 0xFFFF)\n"
+    "\txor 18,18,18\n"
+    "\tlis    18,("VG_STRINGIFY(VG_STACK_ACTIVE_SZB)" >> 16)\n"
+    "\tori 18,18,("VG_STRINGIFY(VG_STACK_ACTIVE_SZB)" & 0xFFFF)\n"
+    "\tadd 16,17,16\n"
+    "\tadd 16,18,16\n"
+    "\trldicr 16,16,0,59\n"
+    /* now r16 = &vgPlain_interim_stack + VG_STACK_GUARD_SZB +
+       VG_STACK_ACTIVE_SZB rounded down to the nearest 16-byte
+       boundary.  And r1 is the original SP.  Set the SP to r16 and
+       call _start_in_C_linux, passing it the initial SP. */
+    "\tmr 3,1\n"
+    "\tmr 1,16\n"
+    "\tlis  14,   _start_in_C_linux@highest\n"
+    "\tori  14,14,_start_in_C_linux@higher\n"
+    "\tsldi 14,14,32\n"
+    "\toris 14,14,_start_in_C_linux@h\n"
+    "\tori  14,14,_start_in_C_linux@l\n"
+    "\tmtctr 14\n"
+    "\tbctrl\n"
+    "\tnop\n"
+    "\ttrap\n"
+);
 #elif defined(VGP_s390x_linux)
 /*
     This is the canonical entry point, usually the first thing in the text
diff --git a/coregrind/m_redir.c b/coregrind/m_redir.c
index 26bb94a..71ce230 100644
--- a/coregrind/m_redir.c
+++ b/coregrind/m_redir.c
@@ -397,7 +397,7 @@ void VG_(redir_notify_new_DebugInfo)( DebugInfo* newdi )
    TopSpec*     newts;
    HChar*       sym_name_pri;
    HChar**      sym_names_sec;
-   Addr         sym_addr, sym_toc;
+   Addr         sym_addr, sym_toc, second_ep;
    HChar        demangled_sopatt[N_DEMANGLED];
    HChar        demangled_fnpatt[N_DEMANGLED];
    Bool         check_ppcTOCs = False;
@@ -499,7 +499,7 @@ void VG_(redir_notify_new_DebugInfo)( DebugInfo* newdi )
 
    nsyms = VG_(DebugInfo_syms_howmany)( newdi );
    for (i = 0; i < nsyms; i++) {
-      VG_(DebugInfo_syms_getidx)( newdi, i, &sym_addr, &sym_toc,
+      VG_(DebugInfo_syms_getidx)( newdi, i, &sym_addr, &sym_toc, &second_ep,
                                   NULL, &sym_name_pri, &sym_names_sec,
                                   &isText, NULL );
       /* Set up to conveniently iterate over all names for this symbol. */
@@ -592,7 +592,7 @@ void VG_(redir_notify_new_DebugInfo)( DebugInfo* newdi )
 
    if (check_ppcTOCs) {
       for (i = 0; i < nsyms; i++) {
-         VG_(DebugInfo_syms_getidx)( newdi, i, &sym_addr, &sym_toc,
+         VG_(DebugInfo_syms_getidx)( newdi, i, &sym_addr, &sym_toc, &second_ep,
                                      NULL, &sym_name_pri, &sym_names_sec,
                                      &isText, NULL );
          HChar*  twoslots[2];
@@ -755,7 +755,9 @@ void generate_and_add_actives (
       of trashing the caches less. */
    nsyms = VG_(DebugInfo_syms_howmany)( di );
    for (i = 0; i < nsyms; i++) {
-      VG_(DebugInfo_syms_getidx)( di, i, &sym_addr, NULL,
+      Addr second_ep = 0;
+
+      VG_(DebugInfo_syms_getidx)( di, i, &sym_addr, NULL, &second_ep,
                                   NULL, &sym_name_pri, &sym_names_sec,
                                   &isText, &isIFunc );
       HChar*  twoslots[2];
@@ -783,6 +785,18 @@ void generate_and_add_actives (
                act.isIFunc     = isIFunc;
                sp->done = True;
                maybe_add_active( act );
+
+#if defined(VGP_ppc64le_linux)
+               /* If the function being wrapped has a secondary entry point
+                * redirect it to the global entry point.  The redirection
+                * must save and setup r2 then setup r12 for the new function.
+                * On return, r2 must be restored.
+                */
+               if (second_ep != 0) {
+                  act.from_addr = second_ep;
+                  maybe_add_active( act );
+               }
+#endif
             }
          } /* for (sp = specs; sp; sp = sp->next) */
 
@@ -1298,6 +1312,27 @@ void VG_(redir_initialise) ( void )
       );
    }
 
+#  elif defined(VGP_ppc64le_linux)
+   /* If we're using memcheck, use these intercepts right from
+    * the start, otherwise ld.so makes a lot of noise.
+    */
+   if (0==VG_(strcmp)("Memcheck", VG_(details).name)) {
+
+      /* this is mandatory - can't sanely continue without it */
+      add_hardwired_spec(
+         "ld64.so.2", "strlen",
+         (Addr)&VG_(ppc64_linux_REDIR_FOR_strlen),
+         complain_about_stripped_glibc_ldso
+      );
+
+      add_hardwired_spec(
+         "ld64.so.2", "index",
+         (Addr)&VG_(ppc64_linux_REDIR_FOR_strchr),
+         NULL /* not mandatory - so why bother at all? */
+         /* glibc-2.5 (FC6, ppc64) seems fine without it */
+      );
+   }
+
 #  elif defined(VGP_arm_linux)
    /* If we're using memcheck, use these intercepts right from the
       start, otherwise ld.so makes a lot of noise.  In most ARM-linux
@@ -1565,7 +1600,7 @@ static void handle_require_text_symbols ( DebugInfo* di )
          Bool    isText        = False;
          HChar*  sym_name_pri  = NULL;
          HChar** sym_names_sec = NULL;
-         VG_(DebugInfo_syms_getidx)( di, j, NULL, NULL,
+         VG_(DebugInfo_syms_getidx)( di, j, NULL, NULL, NULL,
                                      NULL, &sym_name_pri, &sym_names_sec,
                                      &isText, NULL );
          HChar*  twoslots[2];
diff --git a/coregrind/m_sigframe/sigframe-ppc64-linux.c b/coregrind/m_sigframe/sigframe-ppc64-linux.c
index bb53806..17a3c50 100644
--- a/coregrind/m_sigframe/sigframe-ppc64-linux.c
+++ b/coregrind/m_sigframe/sigframe-ppc64-linux.c
@@ -252,7 +252,11 @@ void VG_(sigframe_create)( ThreadId tid,
 #  undef DO
 
    frame->uc.uc_mcontext.gp_regs[VKI_PT_NIP]     = tst->arch.vex.guest_CIA;
-   frame->uc.uc_mcontext.gp_regs[VKI_PT_MSR]     = 0xf032;   /* pretty arbitrary */
+#ifdef VGP_ppc64le_linux
+   frame->uc.uc_mcontext.gp_regs[VKI_PT_MSR]     = 0xf033;  /* pretty arbitrary */
+#else
+   frame->uc.uc_mcontext.gp_regs[VKI_PT_MSR]     = 0xf032;  /* pretty arbitrary */
+#endif
    frame->uc.uc_mcontext.gp_regs[VKI_PT_ORIG_R3] = tst->arch.vex.guest_GPR3;
    frame->uc.uc_mcontext.gp_regs[VKI_PT_CTR]     = tst->arch.vex.guest_CTR;
    frame->uc.uc_mcontext.gp_regs[VKI_PT_LNK]     = tst->arch.vex.guest_LR;
@@ -302,9 +306,13 @@ void VG_(sigframe_create)( ThreadId tid,
 
    /* Handler is in fact a standard ppc64-linux function descriptor, 
       so extract the function entry point and also the toc ptr to use. */
+#if defined(VGP_ppc64be_linux)
    SET_SIGNAL_GPR(tid, 2, (Addr) ((ULong*)handler)[1]);
    tst->arch.vex.guest_CIA = (Addr) ((ULong*)handler)[0];
-
+#else
+   SET_SIGNAL_GPR(tid, 12, (Addr) handler);
+   tst->arch.vex.guest_CIA = (Addr) handler;
+#endif
    priv = &frame->priv;
    priv->magicPI       = 0x31415927;
    priv->sigNo_private = sigNo;
diff --git a/coregrind/m_signals.c b/coregrind/m_signals.c
index 0fd11c4..6ce03f8 100644
--- a/coregrind/m_signals.c
+++ b/coregrind/m_signals.c
@@ -866,6 +866,23 @@ extern void my_sigreturn(void);
    "	li	0, " #name "\n" \
    "	sc\n"
 
+#elif defined(VGP_ppc64le_linux)
+/* Little Endian supports ELF version 2.  In the future, it may
+ * support other versions.
+ */
+#  define _MY_SIGRETURN(name) \
+   ".align   2\n" \
+   ".globl   my_sigreturn\n" \
+   ".type    .my_sigreturn,@function\n" \
+   "my_sigreturn:\n" \
+   "#if _CALL_ELF == 2 \n" \
+   "0: addis        2,12,.TOC.-0b@ha\n" \
+   "   addi         2,2,.TOC.-0b@l\n" \
+   "   .localentry my_sigreturn,.-my_sigreturn\n" \
+   "#endif \n" \
+   "   sc\n" \
+   "   .size my_sigreturn,.-my_sigreturn\n"
+
 #elif defined(VGP_arm_linux)
 #  define _MY_SIGRETURN(name) \
    ".text\n" \
diff --git a/coregrind/m_syscall.c b/coregrind/m_syscall.c
index b111e5c..cd33958 100644
--- a/coregrind/m_syscall.c
+++ b/coregrind/m_syscall.c
@@ -422,6 +422,45 @@ asm(
 "        blr\n"
 );
 
+#elif defined(VGP_ppc64le_linux)
+/* Due to the need to return 65 bits of result, this is completely
+   different from the ppc32 case.  The single arg register points to a
+   7-word block containing the syscall # and the 6 args.  The syscall
+   result proper is put in [0] of the block, and %cr0.so is in the
+   bottom bit of [1]. */
+extern void do_syscall_WRK ( ULong* argblock );
+/* Little Endian supports ELF version 2.  In the future, it may support
+ * other versions as well.
+ */
+asm(
+".align   2\n"
+".globl   do_syscall_WRK\n"
+".type    do_syscall_WRK,@function\n"
+"do_syscall_WRK:\n"
+"#if  _CALL_ELF == 2"               "\n"
+"0:      addis        2,12,.TOC.-0b@ha\n"
+"        addi         2,2,.TOC.-0b@l\n"
+"        .localentry do_syscall_WRK, .-do_syscall_WRK\n"
+"#endif"                            "\n"
+"        std  3,-16(1)\n"  /* stash arg */
+"        ld   8, 48(3)\n"  /* sc arg 6 */
+"        ld   7, 40(3)\n"  /* sc arg 5 */
+"        ld   6, 32(3)\n"  /* sc arg 4 */
+"        ld   5, 24(3)\n"  /* sc arg 3 */
+"        ld   4, 16(3)\n"  /* sc arg 2 */
+"        ld   0,  0(3)\n"  /* sc number */
+"        ld   3,  8(3)\n"  /* sc arg 1 */
+"        sc\n"             /* result in r3 and cr0.so */
+"        ld   5,-16(1)\n"  /* reacquire argblock ptr (r5 is caller-save) */
+"        std  3,0(5)\n"    /* argblock[0] = r3 */
+"        mfcr 3\n"
+"        srwi 3,3,28\n"
+"        andi. 3,3,1\n"
+"        std  3,8(5)\n"    /* argblock[1] = cr0.s0 & 1 */
+"        blr\n"
+"        .size do_syscall_WRK, .-do_syscall_WRK\n"
+);
+
 #elif defined(VGP_arm_linux)
 /* I think the conventions are:
    args  in r0 r1 r2 r3 r4 r5
diff --git a/coregrind/m_syswrap/syscall-ppc64-linux.S b/coregrind/m_syswrap/syscall-ppc64-linux.S
index df73940..47fda64 100644
--- a/coregrind/m_syswrap/syscall-ppc64-linux.S
+++ b/coregrind/m_syswrap/syscall-ppc64-linux.S
@@ -75,12 +75,25 @@
 
 .align 2
 .globl ML_(do_syscall_for_client_WRK)
+#if !defined VGP_ppc64be_linux || _CALL_ELF == 2
+.type .ML_(do_syscall_for_client_WRK),@function
+ML_(do_syscall_for_client_WRK):
+0:      addis         2,12,.TOC.-0b@ha
+        addi          2,2,.TOC.-0b@l
+        .localentry   ML_(do_syscall_for_client_WRK), .-ML_(do_syscall_for_client_WRK)
+#else
 .section ".opd","aw"
 .align 3
-ML_(do_syscall_for_client_WRK):	
+ML_(do_syscall_for_client_WRK):
 .quad .ML_(do_syscall_for_client_WRK),.TOC.@tocbase,0
 .previous
-.type .ML_(do_syscall_for_client_WRK),@function
+#endif
+#if _CALL_ELF == 2
+0:      addis        2,12,.TOC.-0b@ha
+        addi         2,2,.TOC.-0b@l
+        .localentry  ML_(do_syscall_for_client_WRK), .-ML_(do_syscall_for_client_WRK)
+#endif
+.type  .ML_(do_syscall_for_client_WRK),@function
 .globl .ML_(do_syscall_for_client_WRK)
 .ML_(do_syscall_for_client_WRK):
         /* make a stack frame */
@@ -144,7 +157,11 @@ ML_(do_syscall_for_client_WRK):
 	/* failure: return 0x8000 | error code */
 7:	ori	3,3,0x8000	/* FAILURE -- ensure return value is nonzero */
         b       5b
-
+#if !defined VGP_ppc64be_linux || _CALL_ELF == 2
+        .size .ML_(do_syscall_for_client_WRK),.-.ML_(do_syscall_for_client_WRK)
+#else
+        .size .ML_(do_syscall_for_client_WRK),.-.ML_(do_syscall_for_client_WRK)
+#endif
 .section .rodata
 /* export the ranges so that
    VG_(fixup_guest_state_after_syscall_interrupted) can do the
diff --git a/coregrind/m_syswrap/syswrap-main.c b/coregrind/m_syswrap/syswrap-main.c
index c26ed5c..ab51ee1 100644
--- a/coregrind/m_syswrap/syswrap-main.c
+++ b/coregrind/m_syswrap/syswrap-main.c
@@ -2006,6 +2006,25 @@ void ML_(fixup_guest_state_to_restart_syscall) ( ThreadArchState* arch )
       vg_assert(p[0] == 0x44 && p[1] == 0x0 && p[2] == 0x0 && p[3] == 0x2);
    }
 
+#elif defined(VGP_ppc64le_linux)
+   arch->vex.guest_CIA -= 4;             // sizeof(ppc32 instr)
+
+   /* Make sure our caller is actually sane, and we're really backing
+      back over a syscall.
+
+      sc == 44 00 00 02
+   */
+   {
+      UChar *p = (UChar *)arch->vex.guest_CIA;
+
+      if (p[3] != 0x44 || p[2] != 0x0 || p[1] != 0x0 || p[0] != 0x02)
+         VG_(message)(Vg_DebugMsg,
+                      "?! restarting over syscall at %#llx %02x %02x %02x %02x\n",
+                      arch->vex.guest_CIA + 0ULL, p[3], p[2], p[1], p[0]);
+
+      vg_assert(p[3] == 0x44 && p[2] == 0x0 && p[1] == 0x0 && p[0] == 0x2);
+   }
+
 #elif defined(VGP_arm_linux)
    if (arch->vex.guest_R15T & 1) {
       // Thumb mode.  SVC is a encoded as
diff --git a/coregrind/m_syswrap/syswrap-ppc64-linux.c b/coregrind/m_syswrap/syswrap-ppc64-linux.c
index 5d266b8..0f7ac88 100644
--- a/coregrind/m_syswrap/syswrap-ppc64-linux.c
+++ b/coregrind/m_syswrap/syswrap-ppc64-linux.c
@@ -78,6 +78,7 @@ void ML_(call_on_new_stack_0_1) ( Addr stack,
    address, the second word is the TOC ptr (r2), and the third word is
    the static chain value. */
 asm(
+#if defined(VGP_ppc64be_linux)
 "   .align   2\n"
 "   .globl   vgModuleLocal_call_on_new_stack_0_1\n"
 "   .section \".opd\",\"aw\"\n"
@@ -126,6 +127,55 @@ asm(
 "   mtcr 0\n\t"            // CAB: Need this?
 "   bctr\n\t"              // jump to dst
 "   trap\n"                // should never get here
+#else
+//  ppc64le_linux
+"   .align   2\n"
+"   .globl   vgModuleLocal_call_on_new_stack_0_1\n"
+"vgModuleLocal_call_on_new_stack_0_1:\n"
+"   .type    .vgModuleLocal_call_on_new_stack_0_1,@function\n"
+"#if _CALL_ELF == 2 \n"
+"0: addis        2,12,.TOC.-0b@ha\n"
+"   addi         2,2,.TOC.-0b@l\n"
+"#endif\n"
+".localentry vgModuleLocal_call_on_new_stack_0_1, .-vgModuleLocal_call_on_new_stack_0_1\n"
+"   mr    %r1,%r3\n\t"     // stack to %sp
+"   mtlr  %r4\n\t"         // retaddr to %lr
+"   mtctr %r5\n\t"         // f_ptr to count reg
+"   mr %r3,%r6\n\t"        // arg1 to %r3
+"   li 0,0\n\t"            // zero all GP regs
+"   li 4,0\n\t"
+"   li 5,0\n\t"
+"   li 6,0\n\t"
+"   li 7,0\n\t"
+"   li 8,0\n\t"
+"   li 9,0\n\t"
+"   li 10,0\n\t"
+"   li 11,0\n\t"
+"   li 12,0\n\t"
+"   li 13,0\n\t"
+"   li 14,0\n\t"
+"   li 15,0\n\t"
+"   li 16,0\n\t"
+"   li 17,0\n\t"
+"   li 18,0\n\t"
+"   li 19,0\n\t"
+"   li 20,0\n\t"
+"   li 21,0\n\t"
+"   li 22,0\n\t"
+"   li 23,0\n\t"
+"   li 24,0\n\t"
+"   li 25,0\n\t"
+"   li 26,0\n\t"
+"   li 27,0\n\t"
+"   li 28,0\n\t"
+"   li 29,0\n\t"
+"   li 30,0\n\t"
+"   li 31,0\n\t"
+"   mtxer 0\n\t"           // CAB: Need this?
+"   mtcr 0\n\t"            // CAB: Need this?
+"   bctr\n\t"              // jump to dst
+"   trap\n"                // should never get here
+#endif
 );
 
 
@@ -170,6 +220,7 @@ ULong do_syscall_clone_ppc64_linux ( Word (*fn)(void *),
                                      Int*  parent_tid, 
                                      void/*vki_modify_ldt_t*/ * );
 asm(
+#if defined(VGP_ppc64be_linux)
 "   .align   2\n"
 "   .globl   do_syscall_clone_ppc64_linux\n"
 "   .section \".opd\",\"aw\"\n"
@@ -240,6 +291,78 @@ asm(
 "       ld      31,56(1)\n"
 "       addi    1,1,64\n"
 "       blr\n"
+#else
+"   .align   2\n"
+"   .globl   do_syscall_clone_ppc64_linux\n"
+"   .type    do_syscall_clone_ppc64_linux,@function\n"
+"do_syscall_clone_ppc64_linux:\n"
+"   .globl   .do_syscall_clone_ppc64_linux\n"
+".do_syscall_clone_ppc64_linux:\n"
+"#if _CALL_ELF == 2 \n"
+"0:     addis        2,12,.TOC.-0b@ha \n"
+"       addi         2,2,.TOC.-0b@l \n"
+"#endif \n"
+"   .localentry  do_syscall_clone_ppc64_linux, .-do_syscall_clone_ppc64_linux \n"
+"       stdu    1,-64(1)\n"
+"       std     29,40(1)\n"
+"       std     30,48(1)\n"
+"       std     31,56(1)\n"
+"       mr      30,3\n"              // preserve fn
+"       mr      31,6\n"              // preserve arg
+
+        // setup child stack
+"       rldicr  4,4, 0,59\n"         // trim sp to multiple of 16 bytes
+                                     // (r4 &= ~0xF)
+"       li      0,0\n"
+"       stdu    0,-32(4)\n"          // make initial stack frame
+"       mr      29,4\n"              // preserve sp
+
+        // setup syscall
+"       li      0,"__NR_CLONE"\n"    // syscall number
+"       mr      3,5\n"               // syscall arg1: flags
+        // r4 already setup          // syscall arg2: child_stack
+"       mr      5,8\n"               // syscall arg3: parent_tid
+"       mr      6,13\n"              // syscall arg4: REAL THREAD tls
+"       mr      7,7\n"               // syscall arg5: child_tid
+"       mr      8,8\n"               // syscall arg6: ????
+"       mr      9,9\n"               // syscall arg7: ????
+
+"       sc\n"                        // clone()
+
+"       mfcr    4\n"                 // CR now in low half r4
+"       sldi    4,4,32\n"            // CR now in hi half r4
+
+"       sldi    3,3,32\n"
+"       srdi    3,3,32\n"            // zero out hi half r3
+
+"       or      3,3,4\n"             // r3 = CR : syscall-retval
+"       cmpwi   3,0\n"               // child if retval == 0 (note, cmpw)
+"       bne     1f\n"                // jump if !child
+
+        /* CHILD - call thread function */
+        /* Note: 2.4 kernel doesn't set the child stack pointer,
+           so we do it here.
+           That does leave a small window for a signal to be delivered
+           on the wrong stack, unfortunately. */
+"       mr      1,29\n"
+"       mtctr   30\n"                // ctr reg = fn
+"       mr      3,31\n"              // r3 = arg
+"       bctrl\n"                     // call fn()
+
+        // exit with result
+"       li      0,"__NR_EXIT"\n"
+"       sc\n"
+
+        // Exit returned?!
+"       .long   0\n"
+
+        // PARENT or ERROR - return
+"1:     ld      29,40(1)\n"
+"       ld      30,48(1)\n"
+"       ld      31,56(1)\n"
+"       addi    1,1,64\n"
+"       blr\n"
+#endif
 );
 
 #undef __NR_CLONE
@@ -552,7 +675,7 @@ PRE(sys_clone)
    }
    if (ARG1 & (VKI_CLONE_CHILD_SETTID | VKI_CLONE_CHILD_CLEARTID)) {
       PRE_MEM_WRITE("clone(child_tidptr)", ARG5, sizeof(Int));
-      if (!VG_(am_is_valid_for_client)(ARG5, sizeof(Int), 
+      if (!VG_(am_is_valid_for_client)(ARG5, sizeof(Int),
                                              VKI_PROT_WRITE)) {
          SET_STATUS_Failure( VKI_EFAULT );
          return;
diff --git a/coregrind/m_trampoline.S b/coregrind/m_trampoline.S
index 8fabec9..0718fd3 100644
--- a/coregrind/m_trampoline.S
+++ b/coregrind/m_trampoline.S
@@ -444,6 +444,12 @@ VG_(ppctoc_magic_redirect_return_stub):
 	/* this function is written using the "dotless" ABI convention */
 	.align 2
 	.globl VG_(ppc64_linux_REDIR_FOR_strlen)
+#if !defined VGP_ppc64be_linux || _CALL_ELF == 2
+        /* Little Endian uses ELF version 2 */
+        .type VG_(ppc64_linux_REDIR_FOR_strlen),@function
+VG_(ppc64_linux_REDIR_FOR_strlen):
+#else
+        /* Big Endian uses ELF version 1 */
 	.section        ".opd","aw"
 	.align 3
 VG_(ppc64_linux_REDIR_FOR_strlen):
@@ -454,7 +460,13 @@ VG_(ppc64_linux_REDIR_FOR_strlen):
 	.type	VG_(ppc64_linux_REDIR_FOR_strlen), @function
 
 .L.VG_(ppc64_linux_REDIR_FOR_strlen):
-        mr 9,3
+#endif
+#if _CALL_ELF == 2
+0:      addis        2,12,.TOC.-0b@ha
+        addi         2,2,.TOC.-0b@l
+        .localentry  VG_(ppc64_linux_REDIR_FOR_strlen), .-VG_(ppc64_linux_REDIR_FOR_strlen)
+#endif
+	mr 9,3
         lbz 0,0(3)
         li 3,0
         cmpwi 7,0,0
@@ -467,14 +479,24 @@ VG_(ppc64_linux_REDIR_FOR_strlen):
         cmpwi 7,0,0
         bne 7,.L01
         blr
-        .long 0
+
+#if !defined VGP_ppc64be_linux || _CALL_ELF == 2
+        .size VG_(ppc64_linux_REDIR_FOR_strlen),.-VG_(ppc64_linux_REDIR_FOR_strlen)
+#else
+        .size VG_(ppc64_linux_REDIR_FOR_strlen),.-.L.VG_(ppc64_linux_REDIR_FOR_strlen)
+#endif
+	.long 0
         .byte 0,0,0,0,0,0,0,0
 .L0end:
 
         /* this function is written using the "dotless" ABI convention */
         .align 2
         .globl VG_(ppc64_linux_REDIR_FOR_strchr)
-	.section        ".opd","aw"
+#if !defined VGP_ppc64be_linux || _CALL_ELF == 2
+        .type   VG_(ppc64_linux_REDIR_FOR_strchr),@function
+VG_(ppc64_linux_REDIR_FOR_strchr):
+#else
+ 	.section        ".opd","aw"
 	.align 3
 VG_(ppc64_linux_REDIR_FOR_strchr):
         .quad   .L.VG_(ppc64_linux_REDIR_FOR_strchr),.TOC.@tocbase,0
@@ -482,8 +504,14 @@ VG_(ppc64_linux_REDIR_FOR_strchr):
         .size   VG_(ppc64_linux_REDIR_FOR_strchr), \
                         .L1end-.L.VG_(ppc64_linux_REDIR_FOR_strchr)
         .type   VG_(ppc64_linux_REDIR_FOR_strchr),@function
-	
+
 .L.VG_(ppc64_linux_REDIR_FOR_strchr):
+#endif
+#if  _CALL_ELF == 2
+0:      addis         2,12,.TOC.-0b@ha
+        addi         2,2,.TOC.-0b@l
+        .localentry VG_(ppc64_linux_REDIR_FOR_strchr), .-VG_(ppc64_linux_REDIR_FOR_strchr)
+#endif
         lbz 0,0(3)
         rldicl 4,4,0,56
         cmpw 7,4,0
@@ -491,6 +519,11 @@ VG_(ppc64_linux_REDIR_FOR_strchr):
         cmpdi 7,0,0
         bne 7,.L18
         b .L14
+#if !defined VGP_ppc64be_linux || _CALL_ELF == 2
+        .size VG_(ppc64_linux_REDIR_FOR_strchr),.-VG_(ppc64_linux_REDIR_FOR_strchr)
+#else
+        .size VG_(ppc64_linux_REDIR_FOR_strchr),.-.L.VG_(ppc64_linux_REDIR_FOR_strchr)
+#endif
 .L19:	
         beq 6,.L14
 .L18:	
diff --git a/coregrind/m_translate.c b/coregrind/m_translate.c
index 0919394..fc4ca51 100644
--- a/coregrind/m_translate.c
+++ b/coregrind/m_translate.c
@@ -1118,6 +1118,10 @@ static IRTemp gen_POP ( IRSB* bb )
    return res;
 }
 
+#endif
+
+#if defined(VG_PLAT_USES_PPCTOC)
+
 /* Generate code to push LR and R2 onto this thread's redir stack,
    then set R2 to the new value (which is the TOC pointer to be used
    for the duration of the replacement function, as determined by
@@ -1140,6 +1144,9 @@ static void gen_push_and_set_LR_R2 ( IRSB* bb, Addr64 new_R2_value )
 #    error Platform is not TOC-afflicted, fortunately
 #  endif
 }
+#endif
+
+#if defined(VG_PLAT_USES_PPCTOC) || defined(VGP_ppc64le_linux)
 
 static void gen_pop_R2_LR_then_bLR ( IRSB* bb )
 {
@@ -1166,6 +1173,9 @@ static void gen_pop_R2_LR_then_bLR ( IRSB* bb )
 #    error Platform is not TOC-afflicted, fortunately
 #  endif
 }
+#endif
+
+#if defined(VG_PLAT_USES_PPCTOC) || defined(VGP_ppc64le_linux)
 
 static
 Bool mk_preamble__ppctoc_magic_return_stub ( void* closureV, IRSB* bb )
@@ -1187,6 +1197,30 @@ Bool mk_preamble__ppctoc_magic_return_stub ( void* closureV, IRSB* bb )
 }
 #endif
 
+#if defined(VGP_ppc64le_linux)
+/* Generate code to push LR and R2 onto this thread's redir stack.
+   Need to save R2 in case we redirect to a global entry point.  The
+   value of R2 is not preserved when entering the global entry point.
+   Need to make sure R2 gets restored on return.  Set LR to the magic
+   return stub, so we get to intercept the return and restore R2 and
+   L2 to the values saved here.
+
+   The existing infrastruture for the TOC enabled architectures is
+   being exploited here.  So, we need to enable a number of the
+   code sections used by VG_PLAT_USES_PPCTOC.
+*/
+
+static void gen_push_R2_and_set_LR ( IRSB* bb )
+{
+   Addr64 bogus_RA  = (Addr64)&VG_(ppctoc_magic_redirect_return_stub);
+   Int    offB_GPR2 = offsetof(VexGuestPPC64State,guest_GPR2);
+   Int    offB_LR   = offsetof(VexGuestPPC64State,guest_LR);
+   gen_PUSH( bb, IRExpr_Get(offB_LR,   Ity_I64) );
+   gen_PUSH( bb, IRExpr_Get(offB_GPR2, Ity_I64) );
+   addStmtToIRSB( bb, IRStmt_Put( offB_LR,   mkU64( bogus_RA )) );
+}
+#  endif
+
 /* --------------- END helpers for with-TOC platforms --------------- */
 
 
@@ -1244,6 +1278,19 @@ Bool mk_preamble__set_NRADDR_to_zero ( void* closureV, IRSB* bb )
      gen_push_and_set_LR_R2 ( bb, VG_(get_tocptr)( closure->readdr ) );
    }
 #  endif
+
+#if defined(VGP_ppc64le_linux)
+   VgCallbackClosure* closure = (VgCallbackClosure*)closureV;
+   Int offB_GPR12 = offsetof(VexGuestArchState, guest_GPR12);
+   addStmtToIRSB(bb, IRStmt_Put(offB_GPR12, mkU64(closure->readdr)));
+   addStmtToIRSB(bb,
+      IRStmt_Put(
+         offsetof(VexGuestArchState,guest_NRADDR_GPR2),
+         VG_WORDSIZE==8 ? mkU64(0) : mkU32(0)
+      )
+   );
+   gen_push_R2_and_set_LR ( bb );
+#endif
    return False;
 }
 
@@ -1277,7 +1324,7 @@ Bool mk_preamble__set_NRADDR_to_nraddr ( void* closureV, IRSB* bb )
    Int offB_GPR25 = offsetof(VexGuestMIPS64State, guest_r25);
    addStmtToIRSB(bb, IRStmt_Put(offB_GPR25, mkU64(closure->readdr)));
 #  endif
-#  if defined(VGP_ppc64be_linux)
+#  if defined(VG_PLAT_USES_PPCTOC) && !defined(VGP_ppc64le_linux)
    addStmtToIRSB( 
       bb,
       IRStmt_Put( 
@@ -1288,6 +1335,22 @@ Bool mk_preamble__set_NRADDR_to_nraddr ( void* closureV, IRSB* bb )
    );
    gen_push_and_set_LR_R2 ( bb, VG_(get_tocptr)( closure->readdr ) );
 #  endif
+#if defined(VGP_ppc64le_linux)
+   /* This saves the r2 before leaving the function.  We need to move
+    * guest_NRADDR_GPR2 back to R2 on return.
+    */
+   Int offB_GPR12 = offsetof(VexGuestArchState, guest_GPR12);
+   addStmtToIRSB(
+      bb,
+      IRStmt_Put(
+         offsetof(VexGuestArchState,guest_NRADDR_GPR2),
+         IRExpr_Get(offsetof(VexGuestArchState,guest_GPR2),
+                    VG_WORDSIZE==8 ? Ity_I64 : Ity_I32)
+      )
+   );
+   addStmtToIRSB(bb, IRStmt_Put(offB_GPR12, mkU64(closure->readdr)));
+   gen_push_R2_and_set_LR ( bb );
+#endif
    return False;
 }
 
@@ -1485,7 +1548,8 @@ Bool VG_(translate) ( ThreadId tid,
    if (kind == T_Redir_Wrap)
       preamble_fn = mk_preamble__set_NRADDR_to_nraddr;
 
-#  if defined(VG_PLAT_USES_PPCTOC)
+   /* LE we setup the LR */
+#  if defined(VG_PLAT_USES_PPCTOC) || defined(VGP_ppc64le_linux)
    if (ULong_to_Ptr(nraddr)
        == (void*)&VG_(ppctoc_magic_redirect_return_stub)) {
       /* If entering the special return stub, this means a wrapped or
@@ -1528,6 +1592,11 @@ Bool VG_(translate) ( ThreadId tid,
    vex_abiinfo.guest_ppc_zap_RZ_at_bl         = const_True;
    vex_abiinfo.host_ppc_calls_use_fndescrs    = True;
 #  endif
+#  if defined(VGP_ppc64le_linux)
+   vex_abiinfo.guest_ppc_zap_RZ_at_blr        = True;
+   vex_abiinfo.guest_ppc_zap_RZ_at_bl         = const_True;
+   vex_abiinfo.host_ppc_calls_use_fndescrs    = False;
+#  endif
 
    /* Set up closure args. */
    closure.tid    = tid;
diff --git a/coregrind/m_ume/elf.c b/coregrind/m_ume/elf.c
index 910bb7a..fdbd431 100644
--- a/coregrind/m_ume/elf.c
+++ b/coregrind/m_ume/elf.c
@@ -310,6 +310,14 @@ Int VG_(load_ELF)(Int fd, const HChar* name, /*MOD*/ExeInfo* info)
    void *entry;
    ESZ(Addr) ebase = 0;
 
+#if defined(VGP_ppc64be_linux) || defined(VGP_ppc64le_linux)
+   /* The difference between where the interpreter got mapped and
+      where it asked to be mapped.  Needed for computing the ppc64 ELF
+      entry point and initial tocptr (R2) value.
+    */
+   ESZ(Word) interp_offset = 0;
+#endif
+
 #  if defined(HAVE_PIE)
    ebase = info->exe_base;
 #  endif
@@ -495,7 +503,10 @@ Int VG_(load_ELF)(Int fd, const HChar* name, /*MOD*/ExeInfo* info)
       VG_(close)(interp->fd);
 
       entry = (void *)(advised - interp_addr + interp->e.e_entry);
+
+#if defined(VGP_ppc64be_linux) || defined(VGP_ppc64le_linux)
       info->interp_offset = advised - interp_addr;
+#endif
 
       VG_(free)(interp->p);
       VG_(free)(interp);
@@ -514,6 +525,11 @@ Int VG_(load_ELF)(Int fd, const HChar* name, /*MOD*/ExeInfo* info)
    info->init_toc = ((ULong*)entry)[1];
    info->init_ip  += info->interp_offset;
    info->init_toc += info->interp_offset;
+#elif defined(VGP_ppc64le_linux)
+   /* On PPC64LE, ELF ver 2. API doesn't use a func ptr */
+   info->init_ip  = (Addr)entry;
+   info->init_toc = 0; /* meaningless on this platform */
+   (void) interp_offset; /* stop gcc complaining it is unused */
 #else
    info->init_ip  = (Addr)entry;
    info->init_toc = 0; /* meaningless on this platform */
diff --git a/coregrind/m_ume/macho.c b/coregrind/m_ume/macho.c
index 7608811..3dfddaf 100644
--- a/coregrind/m_ume/macho.c
+++ b/coregrind/m_ume/macho.c
@@ -701,6 +701,8 @@ load_fat_file(int fd, vki_off_t offset, vki_off_t size, unsigned long filetype,
    good_arch = CPU_TYPE_POWERPC;
 #elif defined(VGA_ppc64be)
    good_arch = CPU_TYPE_POWERPC64BE;
+#elif defined(VGA_ppc64le)
+   good_arch = CPU_TYPE_POWERPC64LE;
 #elif defined(VGA_x86)
    good_arch = CPU_TYPE_I386;
 #elif defined(VGA_amd64)
diff --git a/coregrind/pub_core_machine.h b/coregrind/pub_core_machine.h
index f1d839a..14ea354 100644
--- a/coregrind/pub_core_machine.h
+++ b/coregrind/pub_core_machine.h
@@ -61,6 +61,11 @@
 #  define VG_ELF_MACHINE      EM_PPC64
 #  define VG_ELF_CLASS        ELFCLASS64
 #  define VG_PLAT_USES_PPCTOC 1
+#elif defined(VGP_ppc64le_linux)
+#  define VG_ELF_DATA2XXX     ELFDATA2LSB
+#  define VG_ELF_MACHINE      EM_PPC64
+#  define VG_ELF_CLASS        ELFCLASS64
+#  undef VG_PLAT_USES_PPCTOC
 #elif defined(VGP_arm_linux)
 #  define VG_ELF_DATA2XXX     ELFDATA2LSB
 #  define VG_ELF_MACHINE      EM_ARM
diff --git a/coregrind/vgdb-invoker-ptrace.c b/coregrind/vgdb-invoker-ptrace.c
index bad4df5..c7aa342 100644
--- a/coregrind/vgdb-invoker-ptrace.c
+++ b/coregrind/vgdb-invoker-ptrace.c
@@ -1015,6 +1015,16 @@ Bool invoker_invoke_gdbserver (pid_t pid)
       user_mod.regs.gpr[3] = check;
       /* put bad_return return address in Link Register */
       user_mod.regs.link = bad_return;
+#elif defined(VGA_ppc64le)
+      /* LE does not use the function pointer structure used in BE */
+      user_mod.regs.nip = shared64->invoke_gdbserver;
+      user_mod.regs.gpr[1] = sp - 512;
+      user_mod.regs.gpr[12] = user_mod.regs.nip;
+      user_mod.regs.trap = -1L;
+      /* put check arg in register 3 */
+      user_mod.regs.gpr[3] = check;
+      /* put bad_return return address in Link Register */
+      user_mod.regs.link = bad_return;
 #elif defined(VGA_s390x)
       /* put check arg in register r2 */
       user_mod.regs.gprs[2] = check;
diff --git a/helgrind/hg_main.c b/helgrind/hg_main.c
index f5d02b2..b956549 100644
--- a/helgrind/hg_main.c
+++ b/helgrind/hg_main.c
@@ -4473,6 +4473,7 @@ static Bool is_in_dynamic_linker_shared_object( Addr64 ga )
    if (VG_STREQ(soname, VG_U_LD_LINUX_SO_2))        return True;
    if (VG_STREQ(soname, VG_U_LD_LINUX_X86_64_SO_2)) return True;
    if (VG_STREQ(soname, VG_U_LD64_SO_1))            return True;
+   if (VG_STREQ(soname, VG_U_LD64_SO_2))            return True;
    if (VG_STREQ(soname, VG_U_LD_SO_1))              return True;
 #  elif defined(VGO_darwin)
    if (VG_STREQ(soname, VG_U_DYLD)) return True;
diff --git a/include/pub_tool_debuginfo.h b/include/pub_tool_debuginfo.h
index 4f7a09d..4de8508 100644
--- a/include/pub_tool_debuginfo.h
+++ b/include/pub_tool_debuginfo.h
@@ -249,6 +249,7 @@ void VG_(DebugInfo_syms_getidx)  ( const DebugInfo *di,
                                    Int idx,
                                    /*OUT*/Addr*    avma,
                                    /*OUT*/Addr*    tocptr,
+                                   /*OUT*/Addr*    second_ep,
                                    /*OUT*/UInt*    size,
                                    /*OUT*/HChar**  pri_name,
                                    /*OUT*/HChar*** sec_names,
diff --git a/include/pub_tool_redir.h b/include/pub_tool_redir.h
index 0a679e1..bf00e2f 100644
--- a/include/pub_tool_redir.h
+++ b/include/pub_tool_redir.h
@@ -287,6 +287,7 @@
 
 #define  VG_Z_LD64_SO_1             ld64ZdsoZd1                // ld64.so.1
 #define  VG_U_LD64_SO_1             "ld64.so.1"
+#define  VG_U_LD64_SO_2             "ld64.so.2"                // PPC LE loader
 
 #define  VG_Z_LD_SO_1               ldZdsoZd1                  // ld.so.1
 #define  VG_U_LD_SO_1               "ld.so.1"
diff --git a/include/valgrind.h b/include/valgrind.h
index 81c946a..a65f03a 100644
--- a/include/valgrind.h
+++ b/include/valgrind.h
@@ -116,6 +116,7 @@
 #undef PLAT_amd64_linux
 #undef PLAT_ppc32_linux
 #undef PLAT_ppc64be_linux
+#undef PLAT_ppc64le_linux
 #undef PLAT_arm_linux
 #undef PLAT_arm64_linux
 #undef PLAT_s390x_linux
@@ -143,6 +144,9 @@
 #elif defined(__linux__) && defined(__powerpc__) && defined(__powerpc64__) && _CALL_ELF != 2
 /* Big Endian uses ELF version 1 */
 #  define PLAT_ppc64be_linux 1
+#elif defined(__linux__) && defined(__powerpc__) && defined(__powerpc64__) && _CALL_ELF == 2
+/* Little Endian uses ELF version 2 */
+#  define PLAT_ppc64le_linux 1
 #elif defined(__linux__) && defined(__arm__) && !defined(__aarch64__)
 #  define PLAT_arm_linux 1
 #elif defined(__linux__) && defined(__aarch64__) && !defined(__arm__)
@@ -599,6 +603,82 @@ typedef
 
 #endif /* PLAT_ppc64be_linux */
 
+#if defined(PLAT_ppc64le_linux)
+
+typedef
+   struct {
+      unsigned long long int nraddr; /* where's the code? */
+      unsigned long long int r2;     /* what tocptr do we need? */
+   }
+   OrigFn;
+
+#define __SPECIAL_INSTRUCTION_PREAMBLE                            \
+                     "rotldi 0,0,3  ; rotldi 0,0,13\n\t"          \
+                     "rotldi 0,0,61 ; rotldi 0,0,51\n\t"
+
+#define VALGRIND_DO_CLIENT_REQUEST_EXPR(                          \
+        _zzq_default, _zzq_request,                               \
+        _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5)    \
+                                                                  \
+  __extension__                                                   \
+  ({         unsigned long long int  _zzq_args[6];                \
+             unsigned long long int  _zzq_result;                 \
+             unsigned long long int* _zzq_ptr;                    \
+    _zzq_args[0] = (unsigned long long int)(_zzq_request);        \
+    _zzq_args[1] = (unsigned long long int)(_zzq_arg1);           \
+    _zzq_args[2] = (unsigned long long int)(_zzq_arg2);           \
+    _zzq_args[3] = (unsigned long long int)(_zzq_arg3);           \
+    _zzq_args[4] = (unsigned long long int)(_zzq_arg4);           \
+    _zzq_args[5] = (unsigned long long int)(_zzq_arg5);           \
+    _zzq_ptr = _zzq_args;                                         \
+    __asm__ volatile("mr 3,%1\n\t" /*default*/                    \
+                     "mr 4,%2\n\t" /*ptr*/                        \
+                     __SPECIAL_INSTRUCTION_PREAMBLE               \
+                     /* %R3 = client_request ( %R4 ) */           \
+                     "or 1,1,1\n\t"                               \
+                     "mr %0,3"     /*result*/                     \
+                     : "=b" (_zzq_result)                         \
+                     : "b" (_zzq_default), "b" (_zzq_ptr)         \
+                     : "cc", "memory", "r3", "r4");               \
+    _zzq_result;                                                  \
+  })
+
+#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval)                       \
+  { volatile OrigFn* _zzq_orig = &(_zzq_rlval);                   \
+    unsigned long long int __addr;                                \
+    __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE               \
+                     /* %R3 = guest_NRADDR */                     \
+                     "or 2,2,2\n\t"                               \
+                     "mr %0,3"                                    \
+                     : "=b" (__addr)                              \
+                     :                                            \
+                     : "cc", "memory", "r3"                       \
+                    );                                            \
+    _zzq_orig->nraddr = __addr;                                   \
+    __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE               \
+                     /* %R3 = guest_NRADDR_GPR2 */                \
+                     "or 4,4,4\n\t"                               \
+                     "mr %0,3"                                    \
+                     : "=b" (__addr)                              \
+                     :                                            \
+                     : "cc", "memory", "r3"                       \
+                    );                                            \
+    _zzq_orig->r2 = __addr;                                       \
+  }
+
+#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R12                   \
+                     __SPECIAL_INSTRUCTION_PREAMBLE               \
+                     /* branch-and-link-to-noredir *%R12 */       \
+                     "or 3,3,3\n\t"
+
+#define VALGRIND_VEX_INJECT_IR()                                 \
+ do {                                                            \
+    __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE              \
+                     "or 5,5,5\n\t"                              \
+                    );                                           \
+ } while (0)
+
+#endif /* PLAT_ppc64le_linux */
 
 /* ------------------------- arm-linux ------------------------- */
 
@@ -3093,6 +3173,562 @@ typedef
 
 #endif /* PLAT_ppc64be_linux */
 
+/* ------------------------- ppc64le-linux ----------------------- */
+#if defined(PLAT_ppc64le_linux)
+
+/* ARGREGS: r3 r4 r5 r6 r7 r8 r9 r10 (the rest on stack somewhere) */
+
+/* These regs are trashed by the hidden call. */
+#define __CALLER_SAVED_REGS                                       \
+   "lr", "ctr", "xer",                                            \
+   "cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7",        \
+   "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10",   \
+   "r11", "r12", "r13"
+
+/* Macros to save and align the stack before making a function
+   call and restore it afterwards as gcc may not keep the stack
+   pointer aligned if it doesn't realise calls are being made
+   to other functions. */
+
+#define VALGRIND_ALIGN_STACK               \
+      "mr 28,1\n\t"                        \
+      "rldicr 1,1,0,59\n\t"
+#define VALGRIND_RESTORE_STACK             \
+      "mr 1,28\n\t"
+
+/* These CALL_FN_ macros assume that on ppc64-linux, sizeof(unsigned
+   long) == 8. */
+
+#define CALL_FN_W_v(lval, orig)                                   \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[3+0];                        \
+      volatile unsigned long _res;                                \
+      /* _argvec[0] holds current r2 across the call */           \
+      _argvec[1] = (unsigned long)_orig.r2;                       \
+      _argvec[2] = (unsigned long)_orig.nraddr;                   \
+      __asm__ volatile(                                           \
+         VALGRIND_ALIGN_STACK                                     \
+         "mr 12,%1\n\t"                                           \
+         "std 2,-16(12)\n\t"  /* save tocptr */                   \
+         "ld   2,-8(12)\n\t"  /* use nraddr's tocptr */           \
+         "ld  12, 0(12)\n\t"  /* target->r12 */                   \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R12                  \
+         "mr 12,%1\n\t"                                           \
+         "mr %0,3\n\t"                                            \
+         "ld 2,-16(12)\n\t" /* restore tocptr */                  \
+         VALGRIND_RESTORE_STACK                                   \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "r" (&_argvec[2])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28"   \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_W(lval, orig, arg1)                             \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[3+1];                        \
+      volatile unsigned long _res;                                \
+      /* _argvec[0] holds current r2 across the call */           \
+      _argvec[1]   = (unsigned long)_orig.r2;                     \
+      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
+      _argvec[2+1] = (unsigned long)arg1;                         \
+      __asm__ volatile(                                           \
+         VALGRIND_ALIGN_STACK                                     \
+         "mr 12,%1\n\t"                                           \
+         "std 2,-16(12)\n\t"  /* save tocptr */                   \
+         "ld   2,-8(12)\n\t"  /* use nraddr's tocptr */           \
+         "ld   3, 8(12)\n\t"  /* arg1->r3 */                      \
+         "ld  12, 0(12)\n\t"  /* target->r12 */                   \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R12                  \
+         "mr 12,%1\n\t"                                           \
+         "mr %0,3\n\t"                                            \
+         "ld 2,-16(12)\n\t" /* restore tocptr */                  \
+         VALGRIND_RESTORE_STACK                                   \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "r" (&_argvec[2])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28"   \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_WW(lval, orig, arg1,arg2)                       \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[3+2];                        \
+      volatile unsigned long _res;                                \
+      /* _argvec[0] holds current r2 across the call */           \
+      _argvec[1]   = (unsigned long)_orig.r2;                     \
+      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
+      _argvec[2+1] = (unsigned long)arg1;                         \
+      _argvec[2+2] = (unsigned long)arg2;                         \
+      __asm__ volatile(                                           \
+         VALGRIND_ALIGN_STACK                                     \
+         "mr 12,%1\n\t"                                           \
+         "std 2,-16(12)\n\t"  /* save tocptr */                   \
+         "ld   2,-8(12)\n\t"  /* use nraddr's tocptr */           \
+         "ld   3, 8(12)\n\t"  /* arg1->r3 */                      \
+         "ld   4, 16(12)\n\t" /* arg2->r4 */                      \
+         "ld  12, 0(12)\n\t"  /* target->r12 */                   \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R12                  \
+         "mr 12,%1\n\t"                                           \
+         "mr %0,3\n\t"                                            \
+         "ld 2,-16(12)\n\t" /* restore tocptr */                  \
+         VALGRIND_RESTORE_STACK                                   \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "r" (&_argvec[2])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28"   \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3)                 \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[3+3];                        \
+      volatile unsigned long _res;                                \
+      /* _argvec[0] holds current r2 across the call */           \
+      _argvec[1]   = (unsigned long)_orig.r2;                     \
+      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
+      _argvec[2+1] = (unsigned long)arg1;                         \
+      _argvec[2+2] = (unsigned long)arg2;                         \
+      _argvec[2+3] = (unsigned long)arg3;                         \
+      __asm__ volatile(                                           \
+         VALGRIND_ALIGN_STACK                                     \
+         "mr 12,%1\n\t"                                           \
+         "std 2,-16(12)\n\t"  /* save tocptr */                   \
+         "ld   2,-8(12)\n\t"  /* use nraddr's tocptr */           \
+         "ld   3, 8(12)\n\t"  /* arg1->r3 */                      \
+         "ld   4, 16(12)\n\t" /* arg2->r4 */                      \
+         "ld   5, 24(12)\n\t" /* arg3->r5 */                      \
+         "ld  12, 0(12)\n\t"  /* target->r12 */                   \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R12                  \
+         "mr 12,%1\n\t"                                           \
+         "mr %0,3\n\t"                                            \
+         "ld 2,-16(12)\n\t" /* restore tocptr */                  \
+         VALGRIND_RESTORE_STACK                                   \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "r" (&_argvec[2])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28"   \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4)           \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[3+4];                        \
+      volatile unsigned long _res;                                \
+      /* _argvec[0] holds current r2 across the call */           \
+      _argvec[1]   = (unsigned long)_orig.r2;                     \
+      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
+      _argvec[2+1] = (unsigned long)arg1;                         \
+      _argvec[2+2] = (unsigned long)arg2;                         \
+      _argvec[2+3] = (unsigned long)arg3;                         \
+      _argvec[2+4] = (unsigned long)arg4;                         \
+      __asm__ volatile(                                           \
+         VALGRIND_ALIGN_STACK                                     \
+         "mr 12,%1\n\t"                                           \
+         "std 2,-16(12)\n\t"  /* save tocptr */                   \
+         "ld   2,-8(12)\n\t"  /* use nraddr's tocptr */           \
+         "ld   3, 8(12)\n\t"  /* arg1->r3 */                      \
+         "ld   4, 16(12)\n\t" /* arg2->r4 */                      \
+         "ld   5, 24(12)\n\t" /* arg3->r5 */                      \
+         "ld   6, 32(12)\n\t" /* arg4->r6 */                      \
+         "ld  12, 0(12)\n\t"  /* target->r12 */                   \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R12                  \
+         "mr 12,%1\n\t"                                           \
+         "mr %0,3\n\t"                                            \
+         "ld 2,-16(12)\n\t" /* restore tocptr */                  \
+         VALGRIND_RESTORE_STACK                                   \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "r" (&_argvec[2])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28"   \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5)        \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[3+5];                        \
+      volatile unsigned long _res;                                \
+      /* _argvec[0] holds current r2 across the call */           \
+      _argvec[1]   = (unsigned long)_orig.r2;                     \
+      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
+      _argvec[2+1] = (unsigned long)arg1;                         \
+      _argvec[2+2] = (unsigned long)arg2;                         \
+      _argvec[2+3] = (unsigned long)arg3;                         \
+      _argvec[2+4] = (unsigned long)arg4;                         \
+      _argvec[2+5] = (unsigned long)arg5;                         \
+      __asm__ volatile(                                           \
+         VALGRIND_ALIGN_STACK                                     \
+         "mr 12,%1\n\t"                                           \
+         "std 2,-16(12)\n\t"  /* save tocptr */                   \
+         "ld   2,-8(12)\n\t"  /* use nraddr's tocptr */           \
+         "ld   3, 8(12)\n\t"  /* arg1->r3 */                      \
+         "ld   4, 16(12)\n\t" /* arg2->r4 */                      \
+         "ld   5, 24(12)\n\t" /* arg3->r5 */                      \
+         "ld   6, 32(12)\n\t" /* arg4->r6 */                      \
+         "ld   7, 40(12)\n\t" /* arg5->r7 */                      \
+         "ld  12, 0(12)\n\t"  /* target->r12 */                   \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R12                  \
+         "mr 12,%1\n\t"                                           \
+         "mr %0,3\n\t"                                            \
+         "ld 2,-16(12)\n\t" /* restore tocptr */                  \
+         VALGRIND_RESTORE_STACK                                   \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "r" (&_argvec[2])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28"   \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6)   \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[3+6];                        \
+      volatile unsigned long _res;                                \
+      /* _argvec[0] holds current r2 across the call */           \
+      _argvec[1]   = (unsigned long)_orig.r2;                     \
+      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
+      _argvec[2+1] = (unsigned long)arg1;                         \
+      _argvec[2+2] = (unsigned long)arg2;                         \
+      _argvec[2+3] = (unsigned long)arg3;                         \
+      _argvec[2+4] = (unsigned long)arg4;                         \
+      _argvec[2+5] = (unsigned long)arg5;                         \
+      _argvec[2+6] = (unsigned long)arg6;                         \
+      __asm__ volatile(                                           \
+         VALGRIND_ALIGN_STACK                                     \
+         "mr 12,%1\n\t"                                           \
+         "std 2,-16(12)\n\t"  /* save tocptr */                   \
+         "ld   2,-8(12)\n\t"  /* use nraddr's tocptr */           \
+         "ld   3, 8(12)\n\t"  /* arg1->r3 */                      \
+         "ld   4, 16(12)\n\t" /* arg2->r4 */                      \
+         "ld   5, 24(12)\n\t" /* arg3->r5 */                      \
+         "ld   6, 32(12)\n\t" /* arg4->r6 */                      \
+         "ld   7, 40(12)\n\t" /* arg5->r7 */                      \
+         "ld   8, 48(12)\n\t" /* arg6->r8 */                      \
+         "ld  12, 0(12)\n\t"  /* target->r12 */                   \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R12                  \
+         "mr 12,%1\n\t"                                           \
+         "mr %0,3\n\t"                                            \
+         "ld 2,-16(12)\n\t" /* restore tocptr */                  \
+         VALGRIND_RESTORE_STACK                                   \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "r" (&_argvec[2])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28"   \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
+                                 arg7)                            \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[3+7];                        \
+      volatile unsigned long _res;                                \
+      /* _argvec[0] holds current r2 across the call */           \
+      _argvec[1]   = (unsigned long)_orig.r2;                     \
+      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
+      _argvec[2+1] = (unsigned long)arg1;                         \
+      _argvec[2+2] = (unsigned long)arg2;                         \
+      _argvec[2+3] = (unsigned long)arg3;                         \
+      _argvec[2+4] = (unsigned long)arg4;                         \
+      _argvec[2+5] = (unsigned long)arg5;                         \
+      _argvec[2+6] = (unsigned long)arg6;                         \
+      _argvec[2+7] = (unsigned long)arg7;                         \
+      __asm__ volatile(                                           \
+         VALGRIND_ALIGN_STACK                                     \
+         "mr 12,%1\n\t"                                           \
+         "std 2,-16(12)\n\t"  /* save tocptr */                   \
+         "ld   2,-8(12)\n\t"  /* use nraddr's tocptr */           \
+         "ld   3, 8(12)\n\t"  /* arg1->r3 */                      \
+         "ld   4, 16(12)\n\t" /* arg2->r4 */                      \
+         "ld   5, 24(12)\n\t" /* arg3->r5 */                      \
+         "ld   6, 32(12)\n\t" /* arg4->r6 */                      \
+         "ld   7, 40(12)\n\t" /* arg5->r7 */                      \
+         "ld   8, 48(12)\n\t" /* arg6->r8 */                      \
+         "ld   9, 56(12)\n\t" /* arg7->r9 */                      \
+         "ld  12, 0(12)\n\t"  /* target->r12 */                   \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R12                  \
+         "mr 12,%1\n\t"                                           \
+         "mr %0,3\n\t"                                            \
+         "ld 2,-16(12)\n\t" /* restore tocptr */                  \
+         VALGRIND_RESTORE_STACK                                   \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "r" (&_argvec[2])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28"   \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
+                                 arg7,arg8)                       \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[3+8];                        \
+      volatile unsigned long _res;                                \
+      /* _argvec[0] holds current r2 across the call */           \
+      _argvec[1]   = (unsigned long)_orig.r2;                     \
+      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
+      _argvec[2+1] = (unsigned long)arg1;                         \
+      _argvec[2+2] = (unsigned long)arg2;                         \
+      _argvec[2+3] = (unsigned long)arg3;                         \
+      _argvec[2+4] = (unsigned long)arg4;                         \
+      _argvec[2+5] = (unsigned long)arg5;                         \
+      _argvec[2+6] = (unsigned long)arg6;                         \
+      _argvec[2+7] = (unsigned long)arg7;                         \
+      _argvec[2+8] = (unsigned long)arg8;                         \
+      __asm__ volatile(                                           \
+         VALGRIND_ALIGN_STACK                                     \
+         "mr 12,%1\n\t"                                           \
+         "std 2,-16(12)\n\t"  /* save tocptr */                   \
+         "ld   2,-8(12)\n\t"  /* use nraddr's tocptr */           \
+         "ld   3, 8(12)\n\t"  /* arg1->r3 */                      \
+         "ld   4, 16(12)\n\t" /* arg2->r4 */                      \
+         "ld   5, 24(12)\n\t" /* arg3->r5 */                      \
+         "ld   6, 32(12)\n\t" /* arg4->r6 */                      \
+         "ld   7, 40(12)\n\t" /* arg5->r7 */                      \
+         "ld   8, 48(12)\n\t" /* arg6->r8 */                      \
+         "ld   9, 56(12)\n\t" /* arg7->r9 */                      \
+         "ld  10, 64(12)\n\t" /* arg8->r10 */                     \
+         "ld  12, 0(12)\n\t"  /* target->r12 */                   \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R12                  \
+         "mr 12,%1\n\t"                                           \
+         "mr %0,3\n\t"                                            \
+         "ld 2,-16(12)\n\t" /* restore tocptr */                  \
+         VALGRIND_RESTORE_STACK                                   \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "r" (&_argvec[2])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28"   \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
+                                 arg7,arg8,arg9)                  \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[3+9];                        \
+      volatile unsigned long _res;                                \
+      /* _argvec[0] holds current r2 across the call */           \
+      _argvec[1]   = (unsigned long)_orig.r2;                     \
+      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
+      _argvec[2+1] = (unsigned long)arg1;                         \
+      _argvec[2+2] = (unsigned long)arg2;                         \
+      _argvec[2+3] = (unsigned long)arg3;                         \
+      _argvec[2+4] = (unsigned long)arg4;                         \
+      _argvec[2+5] = (unsigned long)arg5;                         \
+      _argvec[2+6] = (unsigned long)arg6;                         \
+      _argvec[2+7] = (unsigned long)arg7;                         \
+      _argvec[2+8] = (unsigned long)arg8;                         \
+      _argvec[2+9] = (unsigned long)arg9;                         \
+      __asm__ volatile(                                           \
+         VALGRIND_ALIGN_STACK                                     \
+         "mr 12,%1\n\t"                                           \
+         "std 2,-16(12)\n\t"  /* save tocptr */                   \
+         "ld   2,-8(12)\n\t"  /* use nraddr's tocptr */           \
+         "addi 1,1,-128\n\t"  /* expand stack frame */            \
+         /* arg9 */                                               \
+         "ld  3,72(12)\n\t"                                       \
+         "std 3,96(1)\n\t"                                        \
+         /* args1-8 */                                            \
+         "ld   3, 8(12)\n\t"  /* arg1->r3 */                      \
+         "ld   4, 16(12)\n\t" /* arg2->r4 */                      \
+         "ld   5, 24(12)\n\t" /* arg3->r5 */                      \
+         "ld   6, 32(12)\n\t" /* arg4->r6 */                      \
+         "ld   7, 40(12)\n\t" /* arg5->r7 */                      \
+         "ld   8, 48(12)\n\t" /* arg6->r8 */                      \
+         "ld   9, 56(12)\n\t" /* arg7->r9 */                      \
+         "ld  10, 64(12)\n\t" /* arg8->r10 */                     \
+         "ld  12, 0(12)\n\t"  /* target->r12 */                   \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R12                  \
+         "mr 12,%1\n\t"                                           \
+         "mr %0,3\n\t"                                            \
+         "ld 2,-16(12)\n\t" /* restore tocptr */                  \
+         VALGRIND_RESTORE_STACK                                   \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "r" (&_argvec[2])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28"   \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
+                                  arg7,arg8,arg9,arg10)           \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[3+10];                       \
+      volatile unsigned long _res;                                \
+      /* _argvec[0] holds current r2 across the call */           \
+      _argvec[1]   = (unsigned long)_orig.r2;                     \
+      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
+      _argvec[2+1] = (unsigned long)arg1;                         \
+      _argvec[2+2] = (unsigned long)arg2;                         \
+      _argvec[2+3] = (unsigned long)arg3;                         \
+      _argvec[2+4] = (unsigned long)arg4;                         \
+      _argvec[2+5] = (unsigned long)arg5;                         \
+      _argvec[2+6] = (unsigned long)arg6;                         \
+      _argvec[2+7] = (unsigned long)arg7;                         \
+      _argvec[2+8] = (unsigned long)arg8;                         \
+      _argvec[2+9] = (unsigned long)arg9;                         \
+      _argvec[2+10] = (unsigned long)arg10;                       \
+      __asm__ volatile(                                           \
+         VALGRIND_ALIGN_STACK                                     \
+         "mr 12,%1\n\t"                                           \
+         "std 2,-16(12)\n\t"  /* save tocptr */                   \
+         "ld   2,-8(12)\n\t"  /* use nraddr's tocptr */           \
+         "addi 1,1,-128\n\t"  /* expand stack frame */            \
+         /* arg10 */                                              \
+         "ld  3,80(12)\n\t"                                       \
+         "std 3,104(1)\n\t"                                       \
+         /* arg9 */                                               \
+         "ld  3,72(12)\n\t"                                       \
+         "std 3,96(1)\n\t"                                        \
+         /* args1-8 */                                            \
+         "ld   3, 8(12)\n\t"  /* arg1->r3 */                      \
+         "ld   4, 16(12)\n\t" /* arg2->r4 */                      \
+         "ld   5, 24(12)\n\t" /* arg3->r5 */                      \
+         "ld   6, 32(12)\n\t" /* arg4->r6 */                      \
+         "ld   7, 40(12)\n\t" /* arg5->r7 */                      \
+         "ld   8, 48(12)\n\t" /* arg6->r8 */                      \
+         "ld   9, 56(12)\n\t" /* arg7->r9 */                      \
+         "ld  10, 64(12)\n\t" /* arg8->r10 */                     \
+         "ld  12, 0(12)\n\t"  /* target->r12 */                   \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R12                  \
+         "mr 12,%1\n\t"                                           \
+         "mr %0,3\n\t"                                            \
+         "ld 2,-16(12)\n\t" /* restore tocptr */                  \
+         VALGRIND_RESTORE_STACK                                   \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "r" (&_argvec[2])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28"   \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
+                                  arg7,arg8,arg9,arg10,arg11)     \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[3+11];                       \
+      volatile unsigned long _res;                                \
+      /* _argvec[0] holds current r2 across the call */           \
+      _argvec[1]   = (unsigned long)_orig.r2;                     \
+      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
+      _argvec[2+1] = (unsigned long)arg1;                         \
+      _argvec[2+2] = (unsigned long)arg2;                         \
+      _argvec[2+3] = (unsigned long)arg3;                         \
+      _argvec[2+4] = (unsigned long)arg4;                         \
+      _argvec[2+5] = (unsigned long)arg5;                         \
+      _argvec[2+6] = (unsigned long)arg6;                         \
+      _argvec[2+7] = (unsigned long)arg7;                         \
+      _argvec[2+8] = (unsigned long)arg8;                         \
+      _argvec[2+9] = (unsigned long)arg9;                         \
+      _argvec[2+10] = (unsigned long)arg10;                       \
+      _argvec[2+11] = (unsigned long)arg11;                       \
+      __asm__ volatile(                                           \
+         VALGRIND_ALIGN_STACK                                     \
+         "mr 12,%1\n\t"                                           \
+         "std 2,-16(12)\n\t"  /* save tocptr */                   \
+         "ld   2,-8(12)\n\t"  /* use nraddr's tocptr */           \
+         "addi 1,1,-144\n\t"  /* expand stack frame */            \
+         /* arg11 */                                              \
+         "ld  3,88(12)\n\t"                                       \
+         "std 3,112(1)\n\t"                                       \
+         /* arg10 */                                              \
+         "ld  3,80(12)\n\t"                                       \
+         "std 3,104(1)\n\t"                                       \
+         /* arg9 */                                               \
+         "ld  3,72(12)\n\t"                                       \
+         "std 3,96(1)\n\t"                                        \
+         /* args1-8 */                                            \
+         "ld   3, 8(12)\n\t"  /* arg1->r3 */                      \
+         "ld   4, 16(12)\n\t" /* arg2->r4 */                      \
+         "ld   5, 24(12)\n\t" /* arg3->r5 */                      \
+         "ld   6, 32(12)\n\t" /* arg4->r6 */                      \
+         "ld   7, 40(12)\n\t" /* arg5->r7 */                      \
+         "ld   8, 48(12)\n\t" /* arg6->r8 */                      \
+         "ld   9, 56(12)\n\t" /* arg7->r9 */                      \
+         "ld  10, 64(12)\n\t" /* arg8->r10 */                     \
+         "ld  12, 0(12)\n\t"  /* target->r12 */                   \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R12                  \
+         "mr 12,%1\n\t"                                           \
+         "mr %0,3\n\t"                                            \
+         "ld 2,-16(12)\n\t" /* restore tocptr */                  \
+         VALGRIND_RESTORE_STACK                                   \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "r" (&_argvec[2])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28"   \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
+                                arg7,arg8,arg9,arg10,arg11,arg12) \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[3+12];                       \
+      volatile unsigned long _res;                                \
+      /* _argvec[0] holds current r2 across the call */           \
+      _argvec[1]   = (unsigned long)_orig.r2;                     \
+      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
+      _argvec[2+1] = (unsigned long)arg1;                         \
+      _argvec[2+2] = (unsigned long)arg2;                         \
+      _argvec[2+3] = (unsigned long)arg3;                         \
+      _argvec[2+4] = (unsigned long)arg4;                         \
+      _argvec[2+5] = (unsigned long)arg5;                         \
+      _argvec[2+6] = (unsigned long)arg6;                         \
+      _argvec[2+7] = (unsigned long)arg7;                         \
+      _argvec[2+8] = (unsigned long)arg8;                         \
+      _argvec[2+9] = (unsigned long)arg9;                         \
+      _argvec[2+10] = (unsigned long)arg10;                       \
+      _argvec[2+11] = (unsigned long)arg11;                       \
+      _argvec[2+12] = (unsigned long)arg12;                       \
+      __asm__ volatile(                                           \
+         VALGRIND_ALIGN_STACK                                     \
+         "mr 12,%1\n\t"                                           \
+         "std 2,-16(12)\n\t"  /* save tocptr */                   \
+         "ld   2,-8(12)\n\t"  /* use nraddr's tocptr */           \
+         "addi 1,1,-144\n\t"  /* expand stack frame */            \
+         /* arg12 */                                              \
+         "ld  3,96(12)\n\t"                                       \
+         "std 3,120(1)\n\t"                                       \
+         /* arg11 */                                              \
+         "ld  3,88(12)\n\t"                                       \
+         "std 3,112(1)\n\t"                                       \
+         /* arg10 */                                              \
+         "ld  3,80(12)\n\t"                                       \
+         "std 3,104(1)\n\t"                                       \
+         /* arg9 */                                               \
+         "ld  3,72(12)\n\t"                                       \
+         "std 3,96(1)\n\t"                                        \
+         /* args1-8 */                                            \
+         "ld   3, 8(12)\n\t"  /* arg1->r3 */                      \
+         "ld   4, 16(12)\n\t" /* arg2->r4 */                      \
+         "ld   5, 24(12)\n\t" /* arg3->r5 */                      \
+         "ld   6, 32(12)\n\t" /* arg4->r6 */                      \
+         "ld   7, 40(12)\n\t" /* arg5->r7 */                      \
+         "ld   8, 48(12)\n\t" /* arg6->r8 */                      \
+         "ld   9, 56(12)\n\t" /* arg7->r9 */                      \
+         "ld  10, 64(12)\n\t" /* arg8->r10 */                     \
+         "ld  12, 0(12)\n\t"  /* target->r12 */                   \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R12                  \
+         "mr 12,%1\n\t"                                           \
+         "mr %0,3\n\t"                                            \
+         "ld 2,-16(12)\n\t" /* restore tocptr */                  \
+         VALGRIND_RESTORE_STACK                                   \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "r" (&_argvec[2])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28"   \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#endif /* PLAT_ppc64le_linux */
+
 /* ------------------------- arm-linux ------------------------- */
 
 #if defined(PLAT_arm_linux)
@@ -5938,6 +6574,7 @@ VALGRIND_PRINTF_BACKTRACE(const char *format, ...)
 #undef PLAT_amd64_linux
 #undef PLAT_ppc32_linux
 #undef PLAT_ppc64be_linux
+#undef PLAT_ppc64le_linux
 #undef PLAT_arm_linux
 #undef PLAT_s390x_linux
 #undef PLAT_mips32_linux
diff --git a/include/vki/vki-ppc64-linux.h b/include/vki/vki-ppc64-linux.h
index dca1c24..33ad840 100644
--- a/include/vki/vki-ppc64-linux.h
+++ b/include/vki/vki-ppc64-linux.h
@@ -31,9 +31,11 @@
 #ifndef __VKI_PPC64_LINUX_H
 #define __VKI_PPC64_LINUX_H
 
-// ppc64 is big-endian.
+#if defined(VGP_ppc32_linux) || defined(VGP_ppc64be_linux)
 #define VKI_BIG_ENDIAN  1
-
+#elif defined(VGP_ppc64le_linux)
+#define VKI_LITTLE_ENDIAN  1
+#endif
 //----------------------------------------------------------------------
 // From linux-2.6.13/include/asm-ppc64/types.h
 //----------------------------------------------------------------------
diff --git a/memcheck/mc_machine.c b/memcheck/mc_machine.c
index 36c80aa..cc8be14 100644
--- a/memcheck/mc_machine.c
+++ b/memcheck/mc_machine.c
@@ -167,7 +167,11 @@ static Int get_otrack_shadow_offset_wrk ( Int offset, Int szB )
             return GOF(GPRn);
          by testing ox instead of o, and setting ox back 4 bytes when sz == 4.
       */
-      Int ox = sz == 8 ? o : (o - 4);
+#if defined(VGA_ppc64le)
+     Int ox = o;
+#else
+     Int ox = sz == 8 ? o : (o - 4);
+#endif
       if (ox == GOF(GPR0)) return ox;
       if (ox == GOF(GPR1)) return ox;
       if (ox == GOF(GPR2)) return ox;
@@ -367,7 +371,11 @@ static Int get_otrack_shadow_offset_wrk ( Int offset, Int szB )
    Int  o  = offset;
    Int  sz = szB;
    tl_assert(sz > 0);
+#if defined(VGA_ppc64)
    tl_assert(host_is_big_endian());
+#elif defined(VGA_ppc64le)
+   tl_assert(host_is_little_endian());
+#endif
 
    if (o == GOF(GPR0) && sz == 4) return o;
    if (o == GOF(GPR1) && sz == 4) return o;
diff --git a/memcheck/tests/atomic_incs.c b/memcheck/tests/atomic_incs.c
index 0029d8c..1d74d39 100644
--- a/memcheck/tests/atomic_incs.c
+++ b/memcheck/tests/atomic_incs.c
@@ -79,6 +79,23 @@ __attribute__((noinline)) void atomic_add_8bit ( char* p, int n )
          : /*trash*/ "memory", "cc", "r15"
       );
    } while (success != 1);
+#elif defined(VGA_ppc64le)
+   /* Nasty hack.  Does correctly atomically do *p += n, but only if p
+      is 8-aligned -- guaranteed by caller. */
+   unsigned long success;
+   do {
+     __asm__ __volatile__(
+         "ldarx  15,0,%1"    "\n\t"
+         "add    15,15,%2"   "\n\t"
+         "stdcx. 15,0,%1"    "\n\t"
+         "mfcr   %0"         "\n\t"
+         "srwi   %0,%0,29"   "\n\t"
+         "andi.  %0,%0,1"    "\n"
+         : /*out*/"=b"(success)
+         : /*in*/ "b"(p), "b"(((unsigned long)n))
+         : /*trash*/ "memory", "cc", "r15"
+			  );
+   } while (success != 1);
 #elif defined(VGA_arm)
    unsigned int block[3]
       = { (unsigned int)p, (unsigned int)n, 0xFFFFFFFF };
@@ -278,6 +295,23 @@ __attribute__((noinline)) void atomic_add_16bit ( short* p, int n )
          : /*trash*/ "memory", "cc", "r15"
       );
    } while (success != 1);
+#elif defined(VGA_ppc64le)
+   /* Nasty hack.  Does correctly atomically do *p += n, but only if p
+      is 8-aligned -- guaranteed by caller. */
+   unsigned long success;
+   do {
+     __asm__ __volatile__(
+         "ldarx  15,0,%1"    "\n\t"
+         "add    15,15,%2"   "\n\t"
+         "stdcx. 15,0,%1"    "\n\t"
+         "mfcr   %0"         "\n\t"
+         "srwi   %0,%0,29"   "\n\t"
+         "andi.  %0,%0,1"    "\n"
+         : /*out*/"=b"(success)
+         : /*in*/ "b"(p), "b"(((unsigned long)n))
+         : /*trash*/ "memory", "cc", "r15"
+			  );
+   } while (success != 1);
 #elif defined(VGA_arm)
    unsigned int block[3]
       = { (unsigned int)p, (unsigned int)n, 0xFFFFFFFF };
@@ -474,6 +508,23 @@ __attribute__((noinline)) void atomic_add_32bit ( int* p, int n )
          : /*trash*/ "memory", "cc", "r15"
       );
    } while (success != 1);
+#elif defined(VGA_ppc64le)
+   /* Nasty hack.  Does correctly atomically do *p += n, but only if p
+      is 8-aligned -- guaranteed by caller. */
+   unsigned long success;
+   do {
+     __asm__ __volatile__(
+         "ldarx  15,0,%1"    "\n\t"
+         "add    15,15,%2"   "\n\t"
+         "stdcx. 15,0,%1"    "\n\t"
+         "mfcr   %0"         "\n\t"
+         "srwi   %0,%0,29"   "\n\t"
+         "andi.  %0,%0,1"    "\n"
+         : /*out*/"=b"(success)
+         : /*in*/ "b"(p), "b"(((unsigned long)n))
+         : /*trash*/ "memory", "cc", "r15"
+			  );
+   } while (success != 1);
 #elif defined(VGA_arm)
    unsigned int block[3]
       = { (unsigned int)p, (unsigned int)n, 0xFFFFFFFF };
diff --git a/tests/arch_test.c b/tests/arch_test.c
index 2fa3b48..ef00785 100644
--- a/tests/arch_test.c
+++ b/tests/arch_test.c
@@ -27,6 +27,7 @@ char* all_archs[] = {
    "amd64",
    "ppc32",
    "ppc64",
+   "ppc64le",
    "arm",
    "s390x",
    "mips32",
@@ -54,6 +55,9 @@ static Bool go(char* arch)
    if ( 0 == strcmp( arch, "ppc32" ) ) return True;
 #endif
 
+#elif defined(VGP_ppc64le_linux)
+   if ( 0 == strcmp( arch, "ppc64" ) ) return True;
+
 #elif defined(VGP_s390x_linux)
    if ( 0 == strcmp( arch, "s390x" ) ) return True;
 
-- 
1.8.4.2