dcavalca / rpms / qemu

Forked from rpms/qemu 11 months ago
Clone

Blame 0085-tcg-sparc-Fix-qemu_ld-st-to-handle-32-bit-host.patch

5544c1
From 138dfa905538bf918af390ff365a27de49364578 Mon Sep 17 00:00:00 2001
5544c1
From: Richard Henderson <rth@twiddle.net>
5544c1
Date: Fri, 23 Mar 2012 23:27:39 +0100
5544c1
Subject: [PATCH] tcg-sparc: Fix qemu_ld/st to handle 32-bit host.
5544c1
5544c1
At the same time, split out the tlb load logic to a new function.
5544c1
Fixes the cases of two data registers and two address registers.
5544c1
Fixes the signature of, and adds missing, qemu_ld/st opcodes.
5544c1
5544c1
Signed-off-by: Richard Henderson <rth@twiddle.net>
5544c1
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
5544c1
---
5544c1
 tcg/sparc/tcg-target.c | 777 ++++++++++++++++++++++---------------------------
5544c1
 1 file changed, 348 insertions(+), 429 deletions(-)
5544c1
5544c1
diff --git a/tcg/sparc/tcg-target.c b/tcg/sparc/tcg-target.c
5544c1
index 23c2fda..d89c19b 100644
5544c1
--- a/tcg/sparc/tcg-target.c
5544c1
+++ b/tcg/sparc/tcg-target.c
5544c1
@@ -59,8 +59,6 @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
5544c1
 };
5544c1
 #endif
5544c1
 
5544c1
-#define ARG_OFFSET 1
5544c1
-
5544c1
 static const int tcg_target_reg_alloc_order[] = {
5544c1
     TCG_REG_L0,
5544c1
     TCG_REG_L1,
5544c1
@@ -288,6 +286,16 @@ static inline int tcg_target_const_match(tcg_target_long val,
5544c1
 #define ASI_PRIMARY_LITTLE 0x88
5544c1
 #endif
5544c1
 
5544c1
+#define LDUH_LE    (LDUHA | INSN_ASI(ASI_PRIMARY_LITTLE))
5544c1
+#define LDSH_LE    (LDSHA | INSN_ASI(ASI_PRIMARY_LITTLE))
5544c1
+#define LDUW_LE    (LDUWA | INSN_ASI(ASI_PRIMARY_LITTLE))
5544c1
+#define LDSW_LE    (LDSWA | INSN_ASI(ASI_PRIMARY_LITTLE))
5544c1
+#define LDX_LE     (LDXA  | INSN_ASI(ASI_PRIMARY_LITTLE))
5544c1
+
5544c1
+#define STH_LE     (STHA  | INSN_ASI(ASI_PRIMARY_LITTLE))
5544c1
+#define STW_LE     (STWA  | INSN_ASI(ASI_PRIMARY_LITTLE))
5544c1
+#define STX_LE     (STXA  | INSN_ASI(ASI_PRIMARY_LITTLE))
5544c1
+
5544c1
 static inline void tcg_out_arith(TCGContext *s, int rd, int rs1, int rs2,
5544c1
                                  int op)
5544c1
 {
5544c1
@@ -360,64 +368,43 @@ static inline void tcg_out_movi(TCGContext *s, TCGType type,
5544c1
     }
5544c1
 }
5544c1
 
5544c1
-static inline void tcg_out_ld_raw(TCGContext *s, int ret,
5544c1
-                                  tcg_target_long arg)
5544c1
-{
5544c1
-    tcg_out_sethi(s, ret, arg);
5544c1
-    tcg_out32(s, LDUW | INSN_RD(ret) | INSN_RS1(ret) |
5544c1
-              INSN_IMM13(arg & 0x3ff));
5544c1
-}
5544c1
-
5544c1
-static inline void tcg_out_ld_ptr(TCGContext *s, int ret,
5544c1
-                                  tcg_target_long arg)
5544c1
+static inline void tcg_out_ldst_rr(TCGContext *s, int data, int a1,
5544c1
+                                   int a2, int op)
5544c1
 {
5544c1
-    if (!check_fit_tl(arg, 10))
5544c1
-        tcg_out_movi(s, TCG_TYPE_PTR, ret, arg & ~0x3ffULL);
5544c1
-    if (TCG_TARGET_REG_BITS == 64) {
5544c1
-        tcg_out32(s, LDX | INSN_RD(ret) | INSN_RS1(ret) |
5544c1
-                  INSN_IMM13(arg & 0x3ff));
5544c1
-    } else {
5544c1
-        tcg_out32(s, LDUW | INSN_RD(ret) | INSN_RS1(ret) |
5544c1
-                  INSN_IMM13(arg & 0x3ff));
5544c1
-    }
5544c1
+    tcg_out32(s, op | INSN_RD(data) | INSN_RS1(a1) | INSN_RS2(a2));
5544c1
 }
5544c1
 
5544c1
-static inline void tcg_out_ldst(TCGContext *s, int ret, int addr, int offset, int op)
5544c1
+static inline void tcg_out_ldst(TCGContext *s, int ret, int addr,
5544c1
+                                int offset, int op)
5544c1
 {
5544c1
-    if (check_fit_tl(offset, 13))
5544c1
+    if (check_fit_tl(offset, 13)) {
5544c1
         tcg_out32(s, op | INSN_RD(ret) | INSN_RS1(addr) |
5544c1
                   INSN_IMM13(offset));
5544c1
-    else {
5544c1
+    } else {
5544c1
         tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_I5, offset);
5544c1
-        tcg_out32(s, op | INSN_RD(ret) | INSN_RS1(TCG_REG_I5) |
5544c1
-                  INSN_RS2(addr));
5544c1
+        tcg_out_ldst_rr(s, ret, addr, TCG_REG_I5, op);
5544c1
     }
5544c1
 }
5544c1
 
5544c1
-static inline void tcg_out_ldst_asi(TCGContext *s, int ret, int addr,
5544c1
-                                    int offset, int op, int asi)
5544c1
-{
5544c1
-    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_I5, offset);
5544c1
-    tcg_out32(s, op | INSN_RD(ret) | INSN_RS1(TCG_REG_I5) |
5544c1
-              INSN_ASI(asi) | INSN_RS2(addr));
5544c1
-}
5544c1
-
5544c1
 static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
5544c1
                               TCGReg arg1, tcg_target_long arg2)
5544c1
 {
5544c1
-    if (type == TCG_TYPE_I32)
5544c1
-        tcg_out_ldst(s, ret, arg1, arg2, LDUW);
5544c1
-    else
5544c1
-        tcg_out_ldst(s, ret, arg1, arg2, LDX);
5544c1
+    tcg_out_ldst(s, ret, arg1, arg2, (type == TCG_TYPE_I32 ? LDUW : LDX));
5544c1
 }
5544c1
 
5544c1
 static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
5544c1
                               TCGReg arg1, tcg_target_long arg2)
5544c1
 {
5544c1
-    if (type == TCG_TYPE_I32)
5544c1
-        tcg_out_ldst(s, arg, arg1, arg2, STW);
5544c1
-    else
5544c1
-        tcg_out_ldst(s, arg, arg1, arg2, STX);
5544c1
+    tcg_out_ldst(s, arg, arg1, arg2, (type == TCG_TYPE_I32 ? STW : STX));
5544c1
+}
5544c1
+
5544c1
+static inline void tcg_out_ld_ptr(TCGContext *s, int ret,
5544c1
+                                  tcg_target_long arg)
5544c1
+{
5544c1
+    if (!check_fit_tl(arg, 10)) {
5544c1
+        tcg_out_movi(s, TCG_TYPE_PTR, ret, arg & ~0x3ff);
5544c1
+    }
5544c1
+    tcg_out_ld(s, TCG_TYPE_PTR, ret, ret, arg & 0x3ff);
5544c1
 }
5544c1
 
5544c1
 static inline void tcg_out_sety(TCGContext *s, int rs)
5544c1
@@ -442,14 +429,15 @@ static inline void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val)
5544c1
     }
5544c1
 }
5544c1
 
5544c1
-static inline void tcg_out_andi(TCGContext *s, int reg, tcg_target_long val)
5544c1
+static inline void tcg_out_andi(TCGContext *s, int rd, int rs,
5544c1
+                                tcg_target_long val)
5544c1
 {
5544c1
     if (val != 0) {
5544c1
         if (check_fit_tl(val, 13))
5544c1
-            tcg_out_arithi(s, reg, reg, val, ARITH_AND);
5544c1
+            tcg_out_arithi(s, rd, rs, val, ARITH_AND);
5544c1
         else {
5544c1
             tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_I5, val);
5544c1
-            tcg_out_arith(s, reg, reg, TCG_REG_I5, ARITH_AND);
5544c1
+            tcg_out_arith(s, rd, rs, TCG_REG_I5, ARITH_AND);
5544c1
         }
5544c1
     }
5544c1
 }
5544c1
@@ -718,418 +706,328 @@ static const void * const qemu_st_helpers[4] = {
5544c1
     helper_stl_mmu,
5544c1
     helper_stq_mmu,
5544c1
 };
5544c1
-#endif
5544c1
 
5544c1
-#if TARGET_LONG_BITS == 32
5544c1
-#define TARGET_LD_OP LDUW
5544c1
-#else
5544c1
-#define TARGET_LD_OP LDX
5544c1
-#endif
5544c1
+/* Perform the TLB load and compare.
5544c1
 
5544c1
-#if defined(CONFIG_SOFTMMU)
5544c1
-#if HOST_LONG_BITS == 32
5544c1
-#define TARGET_ADDEND_LD_OP LDUW
5544c1
+   Inputs:
5544c1
+   ADDRLO_IDX contains the index into ARGS of the low part of the
5544c1
+   address; the high part of the address is at ADDR_LOW_IDX+1.
5544c1
+
5544c1
+   MEM_INDEX and S_BITS are the memory context and log2 size of the load.
5544c1
+
5544c1
+   WHICH is the offset into the CPUTLBEntry structure of the slot to read.
5544c1
+   This should be offsetof addr_read or addr_write.
5544c1
+
5544c1
+   The result of the TLB comparison is in %[ix]cc.  The sanitized address
5544c1
+   is in the returned register, maybe %o0.  The TLB addend is in %o1.  */
5544c1
+
5544c1
+static int tcg_out_tlb_load(TCGContext *s, int addrlo_idx, int mem_index,
5544c1
+                            int s_bits, const TCGArg *args, int which)
5544c1
+{
5544c1
+    const int addrlo = args[addrlo_idx];
5544c1
+    const int r0 = TCG_REG_O0;
5544c1
+    const int r1 = TCG_REG_O1;
5544c1
+    const int r2 = TCG_REG_O2;
5544c1
+    int addr = addrlo;
5544c1
+    int tlb_ofs;
5544c1
+
5544c1
+    if (TCG_TARGET_REG_BITS == 32 && TARGET_LONG_BITS == 64) {
5544c1
+        /* Assemble the 64-bit address in R0.  */
5544c1
+        tcg_out_arithi(s, r0, addrlo, 0, SHIFT_SRL);
5544c1
+        tcg_out_arithi(s, r1, args[addrlo_idx + 1], 32, SHIFT_SLLX);
5544c1
+        tcg_out_arith(s, r0, r0, r1, ARITH_OR);
5544c1
+    }
5544c1
+
5544c1
+    /* Shift the page number down to tlb-entry.  */
5544c1
+    tcg_out_arithi(s, r1, addrlo,
5544c1
+                   TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS, SHIFT_SRL);
5544c1
+
5544c1
+    /* Mask out the page offset, except for the required alignment.  */
5544c1
+    tcg_out_andi(s, r0, addr, TARGET_PAGE_MASK | ((1 << s_bits) - 1));
5544c1
+
5544c1
+    /* Compute tlb index, modulo tlb size.  */
5544c1
+    tcg_out_andi(s, r1, r1, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS);
5544c1
+
5544c1
+    /* Relative to the current ENV.  */
5544c1
+    tcg_out_arith(s, r1, TCG_AREG0, r1, ARITH_ADD);
5544c1
+
5544c1
+    /* Find a base address that can load both tlb comparator and addend.  */
5544c1
+    tlb_ofs = offsetof(CPUArchState, tlb_table[mem_index][0]);
5544c1
+    if (!check_fit_tl(tlb_ofs + sizeof(CPUTLBEntry), 13)) {
5544c1
+        tcg_out_addi(s, r1, tlb_ofs);
5544c1
+        tlb_ofs = 0;
5544c1
+    }
5544c1
+
5544c1
+    /* Load the tlb comparator and the addend.  */
5544c1
+    tcg_out_ld(s, TCG_TYPE_TL, r2, r1, tlb_ofs + which);
5544c1
+    tcg_out_ld(s, TCG_TYPE_PTR, r1, r1, tlb_ofs+offsetof(CPUTLBEntry, addend));
5544c1
+
5544c1
+    /* subcc arg0, arg2, %g0 */
5544c1
+    tcg_out_cmp(s, r0, r2, 0);
5544c1
+
5544c1
+    /* If the guest address must be zero-extended, do so now.  */
5544c1
+    if (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 32) {
5544c1
+        tcg_out_arithi(s, r0, addrlo, 0, SHIFT_SRL);
5544c1
+        return r0;
5544c1
+    }
5544c1
+    return addrlo;
5544c1
+}
5544c1
+#endif /* CONFIG_SOFTMMU */
5544c1
+
5544c1
+static const int qemu_ld_opc[8] = {
5544c1
+#ifdef TARGET_WORDS_BIGENDIAN
5544c1
+    LDUB, LDUH, LDUW, LDX, LDSB, LDSH, LDSW, LDX
5544c1
 #else
5544c1
-#define TARGET_ADDEND_LD_OP LDX
5544c1
-#endif
5544c1
+    LDUB, LDUH_LE, LDUW_LE, LDX_LE, LDSB, LDSH_LE, LDSW_LE, LDX_LE
5544c1
 #endif
5544c1
+};
5544c1
 
5544c1
-#if TCG_TARGET_REG_BITS == 64
5544c1
-#define HOST_LD_OP LDX
5544c1
-#define HOST_ST_OP STX
5544c1
-#define HOST_SLL_OP SHIFT_SLLX
5544c1
-#define HOST_SRA_OP SHIFT_SRAX
5544c1
+static const int qemu_st_opc[4] = {
5544c1
+#ifdef TARGET_WORDS_BIGENDIAN
5544c1
+    STB, STH, STW, STX
5544c1
 #else
5544c1
-#define HOST_LD_OP LDUW
5544c1
-#define HOST_ST_OP STW
5544c1
-#define HOST_SLL_OP SHIFT_SLL
5544c1
-#define HOST_SRA_OP SHIFT_SRA
5544c1
+    STB, STH_LE, STW_LE, STX_LE
5544c1
 #endif
5544c1
+};
5544c1
 
5544c1
-static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
5544c1
-                            int opc)
5544c1
+static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int sizeop)
5544c1
 {
5544c1
-    int addr_reg, data_reg, arg0, arg1, arg2, mem_index, s_bits;
5544c1
+    int addrlo_idx = 1, datalo, datahi, addr_reg;
5544c1
 #if defined(CONFIG_SOFTMMU)
5544c1
-    uint32_t *label1_ptr, *label2_ptr;
5544c1
+    int memi_idx, memi, s_bits, n;
5544c1
+    uint32_t *label_ptr[2];
5544c1
 #endif
5544c1
 
5544c1
-    data_reg = *args++;
5544c1
-    addr_reg = *args++;
5544c1
-    mem_index = *args;
5544c1
-    s_bits = opc & 3;
5544c1
-
5544c1
-    arg0 = TCG_REG_O0;
5544c1
-    arg1 = TCG_REG_O1;
5544c1
-    arg2 = TCG_REG_O2;
5544c1
+    datahi = datalo = args[0];
5544c1
+    if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) {
5544c1
+        datahi = args[1];
5544c1
+        addrlo_idx = 2;
5544c1
+    }
5544c1
 
5544c1
 #if defined(CONFIG_SOFTMMU)
5544c1
-    /* srl addr_reg, x, arg1 */
5544c1
-    tcg_out_arithi(s, arg1, addr_reg, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS,
5544c1
-                   SHIFT_SRL);
5544c1
-    /* and addr_reg, x, arg0 */
5544c1
-    tcg_out_arithi(s, arg0, addr_reg, TARGET_PAGE_MASK | ((1 << s_bits) - 1),
5544c1
-                   ARITH_AND);
5544c1
+    memi_idx = addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS);
5544c1
+    memi = args[memi_idx];
5544c1
+    s_bits = sizeop & 3;
5544c1
+
5544c1
+    addr_reg = tcg_out_tlb_load(s, addrlo_idx, memi, s_bits, args,
5544c1
+                                offsetof(CPUTLBEntry, addr_read));
5544c1
+
5544c1
+    if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) {
5544c1
+        int reg64;
5544c1
+
5544c1
+        /* bne,pn %[xi]cc, label0 */
5544c1
+        label_ptr[0] = (uint32_t *)s->code_ptr;
5544c1
+        tcg_out32(s, (INSN_OP(0) | INSN_COND(COND_NE, 0) | INSN_OP2(0x1)
5544c1
+                      | ((TARGET_LONG_BITS == 64) << 21)));
5544c1
+
5544c1
+        /* TLB Hit.  */
5544c1
+        /* Load all 64-bits into an O/G register.  */
5544c1
+        reg64 = (datalo < 16 ? datalo : TCG_REG_O0);
5544c1
+        tcg_out_ldst_rr(s, reg64, addr_reg, TCG_REG_O1, qemu_ld_opc[sizeop]);
5544c1
+
5544c1
+        /* Move the two 32-bit pieces into the destination registers.  */
5544c1
+        tcg_out_arithi(s, datahi, reg64, 32, SHIFT_SRLX);
5544c1
+        if (reg64 != datalo) {
5544c1
+            tcg_out_mov(s, TCG_TYPE_I32, datalo, reg64);
5544c1
+        }
5544c1
 
5544c1
-    /* and arg1, x, arg1 */
5544c1
-    tcg_out_andi(s, arg1, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS);
5544c1
+        /* b,a,pt label1 */
5544c1
+        label_ptr[1] = (uint32_t *)s->code_ptr;
5544c1
+        tcg_out32(s, (INSN_OP(0) | INSN_COND(COND_A, 0) | INSN_OP2(0x1)
5544c1
+                      | (1 << 29) | (1 << 19)));
5544c1
+    } else {
5544c1
+        /* The fast path is exactly one insn.  Thus we can perform the
5544c1
+           entire TLB Hit in the (annulled) delay slot of the branch
5544c1
+           over the TLB Miss case.  */
5544c1
+
5544c1
+        /* beq,a,pt %[xi]cc, label0 */
5544c1
+        label_ptr[0] = NULL;
5544c1
+        label_ptr[1] = (uint32_t *)s->code_ptr;
5544c1
+        tcg_out32(s, (INSN_OP(0) | INSN_COND(COND_E, 0) | INSN_OP2(0x1)
5544c1
+                      | ((TARGET_LONG_BITS == 64) << 21)
5544c1
+                      | (1 << 29) | (1 << 19)));
5544c1
+        /* delay slot */
5544c1
+        tcg_out_ldst_rr(s, datalo, addr_reg, TCG_REG_O1, qemu_ld_opc[sizeop]);
5544c1
+    }
5544c1
 
5544c1
-    /* add arg1, x, arg1 */
5544c1
-    tcg_out_addi(s, arg1, offsetof(CPUArchState,
5544c1
-                                   tlb_table[mem_index][0].addr_read));
5544c1
+    /* TLB Miss.  */
5544c1
 
5544c1
-    /* add env, arg1, arg1 */
5544c1
-    tcg_out_arith(s, arg1, TCG_AREG0, arg1, ARITH_ADD);
5544c1
+    if (label_ptr[0]) {
5544c1
+        *label_ptr[0] |= INSN_OFF19((unsigned long)s->code_ptr -
5544c1
+                                    (unsigned long)label_ptr[0]);
5544c1
+    }
5544c1
+    n = 0;
5544c1
+    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[n++], TCG_AREG0);
5544c1
+    if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
5544c1
+        tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[n++],
5544c1
+                    args[addrlo_idx + 1]);
5544c1
+    }
5544c1
+    tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[n++],
5544c1
+                args[addrlo_idx]);
5544c1
 
5544c1
-    /* ld [arg1], arg2 */
5544c1
-    tcg_out32(s, TARGET_LD_OP | INSN_RD(arg2) | INSN_RS1(arg1) |
5544c1
-              INSN_RS2(TCG_REG_G0));
5544c1
+    /* Store AREG0 in stack to avoid ugly glibc bugs that mangle
5544c1
+       global registers */
5544c1
+    tcg_out_st(s, TCG_TYPE_REG, TCG_AREG0, TCG_REG_CALL_STACK,
5544c1
+               TCG_TARGET_CALL_STACK_OFFSET - TCG_STATIC_CALL_ARGS_SIZE -
5544c1
+               sizeof(long));
5544c1
 
5544c1
-    /* subcc arg0, arg2, %g0 */
5544c1
-    tcg_out_arith(s, TCG_REG_G0, arg0, arg2, ARITH_SUBCC);
5544c1
-
5544c1
-    /* will become:
5544c1
-       be label1
5544c1
-        or
5544c1
-       be,pt %xcc label1 */
5544c1
-    label1_ptr = (uint32_t *)s->code_ptr;
5544c1
-    tcg_out32(s, 0);
5544c1
-
5544c1
-    /* mov (delay slot) */
5544c1
-    tcg_out_mov(s, TCG_TYPE_PTR, arg0, addr_reg);
5544c1
-
5544c1
-    /* mov */
5544c1
-    tcg_out_movi(s, TCG_TYPE_I32, arg1, mem_index);
5544c1
-    /* XXX/FIXME: suboptimal */
5544c1
-    tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3],
5544c1
-                tcg_target_call_iarg_regs[2]);
5544c1
-    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
5544c1
-                tcg_target_call_iarg_regs[1]);
5544c1
-    tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
5544c1
-                tcg_target_call_iarg_regs[0]);
5544c1
-    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
5544c1
-                TCG_AREG0);
5544c1
-
5544c1
-    /* XXX: move that code at the end of the TB */
5544c1
     /* qemu_ld_helper[s_bits](arg0, arg1) */
5544c1
     tcg_out32(s, CALL | ((((tcg_target_ulong)qemu_ld_helpers[s_bits]
5544c1
                            - (tcg_target_ulong)s->code_ptr) >> 2)
5544c1
                          & 0x3fffffff));
5544c1
-    /* Store AREG0 in stack to avoid ugly glibc bugs that mangle
5544c1
-       global registers */
5544c1
-    // delay slot
5544c1
-    tcg_out_ldst(s, TCG_AREG0, TCG_REG_CALL_STACK,
5544c1
-                 TCG_TARGET_CALL_STACK_OFFSET - TCG_STATIC_CALL_ARGS_SIZE -
5544c1
-                 sizeof(long), HOST_ST_OP);
5544c1
-    tcg_out_ldst(s, TCG_AREG0, TCG_REG_CALL_STACK,
5544c1
-                 TCG_TARGET_CALL_STACK_OFFSET - TCG_STATIC_CALL_ARGS_SIZE -
5544c1
-                 sizeof(long), HOST_LD_OP);
5544c1
-
5544c1
-    /* data_reg = sign_extend(arg0) */
5544c1
-    switch(opc) {
5544c1
+    /* delay slot */
5544c1
+    tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[n], memi);
5544c1
+
5544c1
+    /* Reload AREG0.  */
5544c1
+    tcg_out_ld(s, TCG_TYPE_REG, TCG_AREG0, TCG_REG_CALL_STACK,
5544c1
+               TCG_TARGET_CALL_STACK_OFFSET - TCG_STATIC_CALL_ARGS_SIZE -
5544c1
+               sizeof(long));
5544c1
+
5544c1
+    n = tcg_target_call_oarg_regs[0];
5544c1
+    /* datalo = sign_extend(arg0) */
5544c1
+    switch (sizeop) {
5544c1
     case 0 | 4:
5544c1
-        /* sll arg0, 24/56, data_reg */
5544c1
-        tcg_out_arithi(s, data_reg, arg0, (int)sizeof(tcg_target_long) * 8 - 8,
5544c1
-                       HOST_SLL_OP);
5544c1
-        /* sra data_reg, 24/56, data_reg */
5544c1
-        tcg_out_arithi(s, data_reg, data_reg,
5544c1
-                       (int)sizeof(tcg_target_long) * 8 - 8, HOST_SRA_OP);
5544c1
+        /* Recall that SRA sign extends from bit 31 through bit 63.  */
5544c1
+        tcg_out_arithi(s, datalo, n, 24, SHIFT_SLL);
5544c1
+        tcg_out_arithi(s, datalo, datalo, 24, SHIFT_SRA);
5544c1
         break;
5544c1
     case 1 | 4:
5544c1
-        /* sll arg0, 16/48, data_reg */
5544c1
-        tcg_out_arithi(s, data_reg, arg0,
5544c1
-                       (int)sizeof(tcg_target_long) * 8 - 16, HOST_SLL_OP);
5544c1
-        /* sra data_reg, 16/48, data_reg */
5544c1
-        tcg_out_arithi(s, data_reg, data_reg,
5544c1
-                       (int)sizeof(tcg_target_long) * 8 - 16, HOST_SRA_OP);
5544c1
+        tcg_out_arithi(s, datalo, n, 16, SHIFT_SLL);
5544c1
+        tcg_out_arithi(s, datalo, datalo, 16, SHIFT_SRA);
5544c1
         break;
5544c1
     case 2 | 4:
5544c1
-        /* sll arg0, 32, data_reg */
5544c1
-        tcg_out_arithi(s, data_reg, arg0, 32, HOST_SLL_OP);
5544c1
-        /* sra data_reg, 32, data_reg */
5544c1
-        tcg_out_arithi(s, data_reg, data_reg, 32, HOST_SRA_OP);
5544c1
+        tcg_out_arithi(s, datalo, n, 0, SHIFT_SRA);
5544c1
         break;
5544c1
+    case 3:
5544c1
+        if (TCG_TARGET_REG_BITS == 32) {
5544c1
+            tcg_out_mov(s, TCG_TYPE_REG, datahi, n);
5544c1
+            tcg_out_mov(s, TCG_TYPE_REG, datalo, n + 1);
5544c1
+            break;
5544c1
+        }
5544c1
+        /* FALLTHRU */
5544c1
     case 0:
5544c1
     case 1:
5544c1
     case 2:
5544c1
-    case 3:
5544c1
     default:
5544c1
         /* mov */
5544c1
-        tcg_out_mov(s, TCG_TYPE_REG, data_reg, arg0);
5544c1
+        tcg_out_mov(s, TCG_TYPE_REG, datalo, n);
5544c1
         break;
5544c1
     }
5544c1
 
5544c1
-    /* will become:
5544c1
-       ba label2 */
5544c1
-    label2_ptr = (uint32_t *)s->code_ptr;
5544c1
-    tcg_out32(s, 0);
5544c1
-
5544c1
-    /* nop (delay slot */
5544c1
-    tcg_out_nop(s);
5544c1
-
5544c1
-    /* label1: */
5544c1
-#if TARGET_LONG_BITS == 32
5544c1
-    /* be label1 */
5544c1
-    *label1_ptr = (INSN_OP(0) | INSN_COND(COND_E, 0) | INSN_OP2(0x2) |
5544c1
-                   INSN_OFF22((unsigned long)s->code_ptr -
5544c1
-                              (unsigned long)label1_ptr));
5544c1
-#else
5544c1
-    /* be,pt %xcc label1 */
5544c1
-    *label1_ptr = (INSN_OP(0) | INSN_COND(COND_E, 0) | INSN_OP2(0x1) |
5544c1
-                   (0x5 << 19) | INSN_OFF19((unsigned long)s->code_ptr -
5544c1
-                              (unsigned long)label1_ptr));
5544c1
-#endif
5544c1
-
5544c1
-    /* ld [arg1 + x], arg1 */
5544c1
-    tcg_out_ldst(s, arg1, arg1, offsetof(CPUTLBEntry, addend) -
5544c1
-                 offsetof(CPUTLBEntry, addr_read), TARGET_ADDEND_LD_OP);
5544c1
-
5544c1
-#if TARGET_LONG_BITS == 32
5544c1
-    /* and addr_reg, x, arg0 */
5544c1
-    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_I5, 0xffffffff);
5544c1
-    tcg_out_arith(s, arg0, addr_reg, TCG_REG_I5, ARITH_AND);
5544c1
-    /* add arg0, arg1, arg0 */
5544c1
-    tcg_out_arith(s, arg0, arg0, arg1, ARITH_ADD);
5544c1
+    *label_ptr[1] |= INSN_OFF19((unsigned long)s->code_ptr -
5544c1
+                                (unsigned long)label_ptr[1]);
5544c1
 #else
5544c1
-    /* add addr_reg, arg1, arg0 */
5544c1
-    tcg_out_arith(s, arg0, addr_reg, arg1, ARITH_ADD);
5544c1
-#endif
5544c1
+    addr_reg = args[addrlo_idx];
5544c1
+    if (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 32) {
5544c1
+        tcg_out_arithi(s, TCG_REG_I5, addr_reg, 0, SHIFT_SRL);
5544c1
+        addr_reg = TCG_REG_I5;
5544c1
+    }
5544c1
+    if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) {
5544c1
+        int reg64 = (datalo < 16 ? datalo : TCG_REG_O0);
5544c1
 
5544c1
-#else
5544c1
-    arg0 = addr_reg;
5544c1
-#endif
5544c1
+        tcg_out_ldst_rr(s, reg64, addr_reg, TCG_REG_G0, qemu_ld_opc[sizeop]);
5544c1
 
5544c1
-    switch(opc) {
5544c1
-    case 0:
5544c1
-        /* ldub [arg0], data_reg */
5544c1
-        tcg_out_ldst(s, data_reg, arg0, 0, LDUB);
5544c1
-        break;
5544c1
-    case 0 | 4:
5544c1
-        /* ldsb [arg0], data_reg */
5544c1
-        tcg_out_ldst(s, data_reg, arg0, 0, LDSB);
5544c1
-        break;
5544c1
-    case 1:
5544c1
-#ifdef TARGET_WORDS_BIGENDIAN
5544c1
-        /* lduh [arg0], data_reg */
5544c1
-        tcg_out_ldst(s, data_reg, arg0, 0, LDUH);
5544c1
-#else
5544c1
-        /* lduha [arg0] ASI_PRIMARY_LITTLE, data_reg */
5544c1
-        tcg_out_ldst_asi(s, data_reg, arg0, 0, LDUHA, ASI_PRIMARY_LITTLE);
5544c1
-#endif
5544c1
-        break;
5544c1
-    case 1 | 4:
5544c1
-#ifdef TARGET_WORDS_BIGENDIAN
5544c1
-        /* ldsh [arg0], data_reg */
5544c1
-        tcg_out_ldst(s, data_reg, arg0, 0, LDSH);
5544c1
-#else
5544c1
-        /* ldsha [arg0] ASI_PRIMARY_LITTLE, data_reg */
5544c1
-        tcg_out_ldst_asi(s, data_reg, arg0, 0, LDSHA, ASI_PRIMARY_LITTLE);
5544c1
-#endif
5544c1
-        break;
5544c1
-    case 2:
5544c1
-#ifdef TARGET_WORDS_BIGENDIAN
5544c1
-        /* lduw [arg0], data_reg */
5544c1
-        tcg_out_ldst(s, data_reg, arg0, 0, LDUW);
5544c1
-#else
5544c1
-        /* lduwa [arg0] ASI_PRIMARY_LITTLE, data_reg */
5544c1
-        tcg_out_ldst_asi(s, data_reg, arg0, 0, LDUWA, ASI_PRIMARY_LITTLE);
5544c1
-#endif
5544c1
-        break;
5544c1
-    case 2 | 4:
5544c1
-#ifdef TARGET_WORDS_BIGENDIAN
5544c1
-        /* ldsw [arg0], data_reg */
5544c1
-        tcg_out_ldst(s, data_reg, arg0, 0, LDSW);
5544c1
-#else
5544c1
-        /* ldswa [arg0] ASI_PRIMARY_LITTLE, data_reg */
5544c1
-        tcg_out_ldst_asi(s, data_reg, arg0, 0, LDSWA, ASI_PRIMARY_LITTLE);
5544c1
-#endif
5544c1
-        break;
5544c1
-    case 3:
5544c1
-#ifdef TARGET_WORDS_BIGENDIAN
5544c1
-        /* ldx [arg0], data_reg */
5544c1
-        tcg_out_ldst(s, data_reg, arg0, 0, LDX);
5544c1
-#else
5544c1
-        /* ldxa [arg0] ASI_PRIMARY_LITTLE, data_reg */
5544c1
-        tcg_out_ldst_asi(s, data_reg, arg0, 0, LDXA, ASI_PRIMARY_LITTLE);
5544c1
-#endif
5544c1
-        break;
5544c1
-    default:
5544c1
-        tcg_abort();
5544c1
+        tcg_out_arithi(s, datahi, reg64, 32, SHIFT_SRLX);
5544c1
+        if (reg64 != datalo) {
5544c1
+            tcg_out_mov(s, TCG_TYPE_I32, datalo, reg64);
5544c1
+        }
5544c1
+    } else {
5544c1
+        tcg_out_ldst_rr(s, datalo, addr_reg, TCG_REG_G0, qemu_ld_opc[sizeop]);
5544c1
     }
5544c1
-
5544c1
-#if defined(CONFIG_SOFTMMU)
5544c1
-    /* label2: */
5544c1
-    *label2_ptr = (INSN_OP(0) | INSN_COND(COND_A, 0) | INSN_OP2(0x2) |
5544c1
-                   INSN_OFF22((unsigned long)s->code_ptr -
5544c1
-                              (unsigned long)label2_ptr));
5544c1
-#endif
5544c1
+#endif /* CONFIG_SOFTMMU */
5544c1
 }
5544c1
 
5544c1
-static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
5544c1
-                            int opc)
5544c1
+static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int sizeop)
5544c1
 {
5544c1
-    int addr_reg, data_reg, arg0, arg1, arg2, mem_index, s_bits;
5544c1
+    int addrlo_idx = 1, datalo, datahi, addr_reg;
5544c1
 #if defined(CONFIG_SOFTMMU)
5544c1
-    uint32_t *label1_ptr, *label2_ptr;
5544c1
+    int memi_idx, memi, n;
5544c1
+    uint32_t *label_ptr;
5544c1
 #endif
5544c1
 
5544c1
-    data_reg = *args++;
5544c1
-    addr_reg = *args++;
5544c1
-    mem_index = *args;
5544c1
-
5544c1
-    s_bits = opc;
5544c1
-
5544c1
-    arg0 = TCG_REG_O0;
5544c1
-    arg1 = TCG_REG_O1;
5544c1
-    arg2 = TCG_REG_O2;
5544c1
+    datahi = datalo = args[0];
5544c1
+    if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) {
5544c1
+        datahi = args[1];
5544c1
+        addrlo_idx = 2;
5544c1
+    }
5544c1
 
5544c1
 #if defined(CONFIG_SOFTMMU)
5544c1
-    /* srl addr_reg, x, arg1 */
5544c1
-    tcg_out_arithi(s, arg1, addr_reg, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS,
5544c1
-                   SHIFT_SRL);
5544c1
-
5544c1
-    /* and addr_reg, x, arg0 */
5544c1
-    tcg_out_arithi(s, arg0, addr_reg, TARGET_PAGE_MASK | ((1 << s_bits) - 1),
5544c1
-                   ARITH_AND);
5544c1
-
5544c1
-    /* and arg1, x, arg1 */
5544c1
-    tcg_out_andi(s, arg1, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS);
5544c1
-
5544c1
-    /* add arg1, x, arg1 */
5544c1
-    tcg_out_addi(s, arg1, offsetof(CPUArchState,
5544c1
-                                   tlb_table[mem_index][0].addr_write));
5544c1
+    memi_idx = addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS);
5544c1
+    memi = args[memi_idx];
5544c1
+
5544c1
+    addr_reg = tcg_out_tlb_load(s, addrlo_idx, memi, sizeop, args,
5544c1
+                                offsetof(CPUTLBEntry, addr_write));
5544c1
+
5544c1
+    if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) {
5544c1
+        /* Reconstruct the full 64-bit value in %g1, using %o2 as temp.  */
5544c1
+        /* ??? Redefine the temps from %i4/%i5 so that we have a o/g temp. */
5544c1
+        tcg_out_arithi(s, TCG_REG_G1, datalo, 0, SHIFT_SRL);
5544c1
+        tcg_out_arithi(s, TCG_REG_O2, datahi, 32, SHIFT_SLLX);
5544c1
+        tcg_out_arith(s, TCG_REG_G1, TCG_REG_G1, TCG_REG_O2, ARITH_OR);
5544c1
+        datalo = TCG_REG_G1;
5544c1
+    }
5544c1
 
5544c1
-    /* add env, arg1, arg1 */
5544c1
-    tcg_out_arith(s, arg1, TCG_AREG0, arg1, ARITH_ADD);
5544c1
+    /* The fast path is exactly one insn.  Thus we can perform the entire
5544c1
+       TLB Hit in the (annulled) delay slot of the branch over TLB Miss.  */
5544c1
+    /* beq,a,pt %[xi]cc, label0 */
5544c1
+    label_ptr = (uint32_t *)s->code_ptr;
5544c1
+    tcg_out32(s, (INSN_OP(0) | INSN_COND(COND_E, 0) | INSN_OP2(0x1)
5544c1
+                  | ((TARGET_LONG_BITS == 64) << 21)
5544c1
+                  | (1 << 29) | (1 << 19)));
5544c1
+    /* delay slot */
5544c1
+    tcg_out_ldst_rr(s, datalo, addr_reg, TCG_REG_O1, qemu_st_opc[sizeop]);
5544c1
+
5544c1
+    /* TLB Miss.  */
5544c1
+
5544c1
+    n = 0;
5544c1
+    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[n++], TCG_AREG0);
5544c1
+    if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
5544c1
+        tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[n++],
5544c1
+                    args[addrlo_idx + 1]);
5544c1
+    }
5544c1
+    tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[n++],
5544c1
+                args[addrlo_idx]);
5544c1
+    if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) {
5544c1
+        tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[n++], datahi);
5544c1
+    }
5544c1
+    tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[n++], datalo);
5544c1
 
5544c1
-    /* ld [arg1], arg2 */
5544c1
-    tcg_out32(s, TARGET_LD_OP | INSN_RD(arg2) | INSN_RS1(arg1) |
5544c1
-              INSN_RS2(TCG_REG_G0));
5544c1
+    /* Store AREG0 in stack to avoid ugly glibc bugs that mangle
5544c1
+       global registers */
5544c1
+    tcg_out_st(s, TCG_TYPE_REG, TCG_AREG0, TCG_REG_CALL_STACK,
5544c1
+               TCG_TARGET_CALL_STACK_OFFSET - TCG_STATIC_CALL_ARGS_SIZE -
5544c1
+               sizeof(long));
5544c1
 
5544c1
-    /* subcc arg0, arg2, %g0 */
5544c1
-    tcg_out_arith(s, TCG_REG_G0, arg0, arg2, ARITH_SUBCC);
5544c1
-
5544c1
-    /* will become:
5544c1
-       be label1
5544c1
-        or
5544c1
-       be,pt %xcc label1 */
5544c1
-    label1_ptr = (uint32_t *)s->code_ptr;
5544c1
-    tcg_out32(s, 0);
5544c1
-
5544c1
-    /* mov (delay slot) */
5544c1
-    tcg_out_mov(s, TCG_TYPE_PTR, arg0, addr_reg);
5544c1
-
5544c1
-    /* mov */
5544c1
-    tcg_out_mov(s, TCG_TYPE_REG, arg1, data_reg);
5544c1
-
5544c1
-    /* mov */
5544c1
-    tcg_out_movi(s, TCG_TYPE_I32, arg2, mem_index);
5544c1
-
5544c1
-    /* XXX/FIXME: suboptimal */
5544c1
-    tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3],
5544c1
-                tcg_target_call_iarg_regs[2]);
5544c1
-    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
5544c1
-                tcg_target_call_iarg_regs[1]);
5544c1
-    tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
5544c1
-                tcg_target_call_iarg_regs[0]);
5544c1
-    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
5544c1
-                TCG_AREG0);
5544c1
-    /* XXX: move that code at the end of the TB */
5544c1
     /* qemu_st_helper[s_bits](arg0, arg1, arg2) */
5544c1
-    tcg_out32(s, CALL | ((((tcg_target_ulong)qemu_st_helpers[s_bits]
5544c1
+    tcg_out32(s, CALL | ((((tcg_target_ulong)qemu_st_helpers[sizeop]
5544c1
                            - (tcg_target_ulong)s->code_ptr) >> 2)
5544c1
                          & 0x3fffffff));
5544c1
-    /* Store AREG0 in stack to avoid ugly glibc bugs that mangle
5544c1
-       global registers */
5544c1
-    // delay slot
5544c1
-    tcg_out_ldst(s, TCG_AREG0, TCG_REG_CALL_STACK,
5544c1
-                 TCG_TARGET_CALL_STACK_OFFSET - TCG_STATIC_CALL_ARGS_SIZE -
5544c1
-                 sizeof(long), HOST_ST_OP);
5544c1
-    tcg_out_ldst(s, TCG_AREG0, TCG_REG_CALL_STACK,
5544c1
-                 TCG_TARGET_CALL_STACK_OFFSET - TCG_STATIC_CALL_ARGS_SIZE -
5544c1
-                 sizeof(long), HOST_LD_OP);
5544c1
-
5544c1
-    /* will become:
5544c1
-       ba label2 */
5544c1
-    label2_ptr = (uint32_t *)s->code_ptr;
5544c1
-    tcg_out32(s, 0);
5544c1
-
5544c1
-    /* nop (delay slot) */
5544c1
-    tcg_out_nop(s);
5544c1
+    /* delay slot */
5544c1
+    tcg_out_movi(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[n], memi);
5544c1
 
5544c1
-#if TARGET_LONG_BITS == 32
5544c1
-    /* be label1 */
5544c1
-    *label1_ptr = (INSN_OP(0) | INSN_COND(COND_E, 0) | INSN_OP2(0x2) |
5544c1
-                   INSN_OFF22((unsigned long)s->code_ptr -
5544c1
-                              (unsigned long)label1_ptr));
5544c1
-#else
5544c1
-    /* be,pt %xcc label1 */
5544c1
-    *label1_ptr = (INSN_OP(0) | INSN_COND(COND_E, 0) | INSN_OP2(0x1) |
5544c1
-                   (0x5 << 19) | INSN_OFF19((unsigned long)s->code_ptr -
5544c1
-                              (unsigned long)label1_ptr));
5544c1
-#endif
5544c1
-
5544c1
-    /* ld [arg1 + x], arg1 */
5544c1
-    tcg_out_ldst(s, arg1, arg1, offsetof(CPUTLBEntry, addend) -
5544c1
-                 offsetof(CPUTLBEntry, addr_write), TARGET_ADDEND_LD_OP);
5544c1
+    /* Reload AREG0.  */
5544c1
+    tcg_out_ld(s, TCG_TYPE_REG, TCG_AREG0, TCG_REG_CALL_STACK,
5544c1
+               TCG_TARGET_CALL_STACK_OFFSET - TCG_STATIC_CALL_ARGS_SIZE -
5544c1
+               sizeof(long));
5544c1
 
5544c1
-#if TARGET_LONG_BITS == 32
5544c1
-    /* and addr_reg, x, arg0 */
5544c1
-    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_I5, 0xffffffff);
5544c1
-    tcg_out_arith(s, arg0, addr_reg, TCG_REG_I5, ARITH_AND);
5544c1
-    /* add arg0, arg1, arg0 */
5544c1
-    tcg_out_arith(s, arg0, arg0, arg1, ARITH_ADD);
5544c1
+    *label_ptr |= INSN_OFF19((unsigned long)s->code_ptr -
5544c1
+                             (unsigned long)label_ptr);
5544c1
 #else
5544c1
-    /* add addr_reg, arg1, arg0 */
5544c1
-    tcg_out_arith(s, arg0, addr_reg, arg1, ARITH_ADD);
5544c1
-#endif
5544c1
-
5544c1
-#else
5544c1
-    arg0 = addr_reg;
5544c1
-#endif
5544c1
-
5544c1
-    switch(opc) {
5544c1
-    case 0:
5544c1
-        /* stb data_reg, [arg0] */
5544c1
-        tcg_out_ldst(s, data_reg, arg0, 0, STB);
5544c1
-        break;
5544c1
-    case 1:
5544c1
-#ifdef TARGET_WORDS_BIGENDIAN
5544c1
-        /* sth data_reg, [arg0] */
5544c1
-        tcg_out_ldst(s, data_reg, arg0, 0, STH);
5544c1
-#else
5544c1
-        /* stha data_reg, [arg0] ASI_PRIMARY_LITTLE */
5544c1
-        tcg_out_ldst_asi(s, data_reg, arg0, 0, STHA, ASI_PRIMARY_LITTLE);
5544c1
-#endif
5544c1
-        break;
5544c1
-    case 2:
5544c1
-#ifdef TARGET_WORDS_BIGENDIAN
5544c1
-        /* stw data_reg, [arg0] */
5544c1
-        tcg_out_ldst(s, data_reg, arg0, 0, STW);
5544c1
-#else
5544c1
-        /* stwa data_reg, [arg0] ASI_PRIMARY_LITTLE */
5544c1
-        tcg_out_ldst_asi(s, data_reg, arg0, 0, STWA, ASI_PRIMARY_LITTLE);
5544c1
-#endif
5544c1
-        break;
5544c1
-    case 3:
5544c1
-#ifdef TARGET_WORDS_BIGENDIAN
5544c1
-        /* stx data_reg, [arg0] */
5544c1
-        tcg_out_ldst(s, data_reg, arg0, 0, STX);
5544c1
-#else
5544c1
-        /* stxa data_reg, [arg0] ASI_PRIMARY_LITTLE */
5544c1
-        tcg_out_ldst_asi(s, data_reg, arg0, 0, STXA, ASI_PRIMARY_LITTLE);
5544c1
-#endif
5544c1
-        break;
5544c1
-    default:
5544c1
-        tcg_abort();
5544c1
+    addr_reg = args[addrlo_idx];
5544c1
+    if (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 32) {
5544c1
+        tcg_out_arithi(s, TCG_REG_I5, addr_reg, 0, SHIFT_SRL);
5544c1
+        addr_reg = TCG_REG_I5;
5544c1
     }
5544c1
-
5544c1
-#if defined(CONFIG_SOFTMMU)
5544c1
-    /* label2: */
5544c1
-    *label2_ptr = (INSN_OP(0) | INSN_COND(COND_A, 0) | INSN_OP2(0x2) |
5544c1
-                   INSN_OFF22((unsigned long)s->code_ptr -
5544c1
-                              (unsigned long)label2_ptr));
5544c1
-#endif
5544c1
+    if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) {
5544c1
+        /* Reconstruct the full 64-bit value in %g1, using %o2 as temp.  */
5544c1
+        /* ??? Redefine the temps from %i4/%i5 so that we have a o/g temp. */
5544c1
+        tcg_out_arithi(s, TCG_REG_G1, datalo, 0, SHIFT_SRL);
5544c1
+        tcg_out_arithi(s, TCG_REG_O2, datahi, 32, SHIFT_SLLX);
5544c1
+        tcg_out_arith(s, TCG_REG_G1, TCG_REG_G1, TCG_REG_O2, ARITH_OR);
5544c1
+        datalo = TCG_REG_G1;
5544c1
+    }
5544c1
+    tcg_out_ldst_rr(s, datalo, addr_reg, TCG_REG_G0, qemu_st_opc[sizeop]);
5544c1
+#endif /* CONFIG_SOFTMMU */
5544c1
 }
5544c1
 
5544c1
 static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
5544c1
@@ -1175,12 +1073,12 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
5544c1
         /* Store AREG0 in stack to avoid ugly glibc bugs that mangle
5544c1
            global registers */
5544c1
         // delay slot
5544c1
-        tcg_out_ldst(s, TCG_AREG0, TCG_REG_CALL_STACK,
5544c1
-                     TCG_TARGET_CALL_STACK_OFFSET - TCG_STATIC_CALL_ARGS_SIZE -
5544c1
-                     sizeof(long), HOST_ST_OP);
5544c1
-        tcg_out_ldst(s, TCG_AREG0, TCG_REG_CALL_STACK,
5544c1
-                     TCG_TARGET_CALL_STACK_OFFSET - TCG_STATIC_CALL_ARGS_SIZE -
5544c1
-                     sizeof(long), HOST_LD_OP);
5544c1
+        tcg_out_st(s, TCG_TYPE_REG, TCG_AREG0, TCG_REG_CALL_STACK,
5544c1
+                   TCG_TARGET_CALL_STACK_OFFSET - TCG_STATIC_CALL_ARGS_SIZE -
5544c1
+                   sizeof(long));
5544c1
+        tcg_out_ld(s, TCG_TYPE_REG, TCG_AREG0, TCG_REG_CALL_STACK,
5544c1
+                   TCG_TARGET_CALL_STACK_OFFSET - TCG_STATIC_CALL_ARGS_SIZE -
5544c1
+                   sizeof(long));
5544c1
         break;
5544c1
     case INDEX_op_jmp:
5544c1
     case INDEX_op_br:
5544c1
@@ -1348,6 +1246,9 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
5544c1
         tcg_out_qemu_ld(s, args, 2 | 4);
5544c1
         break;
5544c1
 #endif
5544c1
+    case INDEX_op_qemu_ld64:
5544c1
+        tcg_out_qemu_ld(s, args, 3);
5544c1
+        break;
5544c1
     case INDEX_op_qemu_st8:
5544c1
         tcg_out_qemu_st(s, args, 0);
5544c1
         break;
5544c1
@@ -1357,6 +1258,9 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
5544c1
     case INDEX_op_qemu_st32:
5544c1
         tcg_out_qemu_st(s, args, 2);
5544c1
         break;
5544c1
+    case INDEX_op_qemu_st64:
5544c1
+        tcg_out_qemu_st(s, args, 3);
5544c1
+        break;
5544c1
 
5544c1
 #if TCG_TARGET_REG_BITS == 64
5544c1
     case INDEX_op_movi_i64:
5544c1
@@ -1421,13 +1325,6 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
5544c1
                             args[2], const_args[2]);
5544c1
         break;
5544c1
 
5544c1
-    case INDEX_op_qemu_ld64:
5544c1
-        tcg_out_qemu_ld(s, args, 3);
5544c1
-        break;
5544c1
-    case INDEX_op_qemu_st64:
5544c1
-        tcg_out_qemu_st(s, args, 3);
5544c1
-        break;
5544c1
-
5544c1
 #endif
5544c1
     gen_arith:
5544c1
         tcg_out_arithc(s, args[0], args[1], args[2], const_args[2], c);
5544c1
@@ -1492,20 +1389,6 @@ static const TCGTargetOpDef sparc_op_defs[] = {
5544c1
     { INDEX_op_mulu2_i32, { "r", "r", "r", "rJ" } },
5544c1
 #endif
5544c1
 
5544c1
-    { INDEX_op_qemu_ld8u, { "r", "L" } },
5544c1
-    { INDEX_op_qemu_ld8s, { "r", "L" } },
5544c1
-    { INDEX_op_qemu_ld16u, { "r", "L" } },
5544c1
-    { INDEX_op_qemu_ld16s, { "r", "L" } },
5544c1
-    { INDEX_op_qemu_ld32, { "r", "L" } },
5544c1
-#if TCG_TARGET_REG_BITS == 64
5544c1
-    { INDEX_op_qemu_ld32u, { "r", "L" } },
5544c1
-    { INDEX_op_qemu_ld32s, { "r", "L" } },
5544c1
-#endif
5544c1
-
5544c1
-    { INDEX_op_qemu_st8, { "L", "L" } },
5544c1
-    { INDEX_op_qemu_st16, { "L", "L" } },
5544c1
-    { INDEX_op_qemu_st32, { "L", "L" } },
5544c1
-
5544c1
 #if TCG_TARGET_REG_BITS == 64
5544c1
     { INDEX_op_mov_i64, { "r", "r" } },
5544c1
     { INDEX_op_movi_i64, { "r" } },
5544c1
@@ -1520,8 +1403,6 @@ static const TCGTargetOpDef sparc_op_defs[] = {
5544c1
     { INDEX_op_st16_i64, { "r", "r" } },
5544c1
     { INDEX_op_st32_i64, { "r", "r" } },
5544c1
     { INDEX_op_st_i64, { "r", "r" } },
5544c1
-    { INDEX_op_qemu_ld64, { "L", "L" } },
5544c1
-    { INDEX_op_qemu_st64, { "L", "L" } },
5544c1
 
5544c1
     { INDEX_op_add_i64, { "r", "r", "rJ" } },
5544c1
     { INDEX_op_mul_i64, { "r", "r", "rJ" } },
5544c1
@@ -1548,10 +1429,48 @@ static const TCGTargetOpDef sparc_op_defs[] = {
5544c1
 
5544c1
     { INDEX_op_brcond_i64, { "r", "rJ" } },
5544c1
     { INDEX_op_setcond_i64, { "r", "r", "rJ" } },
5544c1
-#else
5544c1
-    { INDEX_op_qemu_ld64, { "L", "L", "L" } },
5544c1
+#endif
5544c1
+
5544c1
+#if TCG_TARGET_REG_BITS == 64
5544c1
+    { INDEX_op_qemu_ld8u, { "r", "L" } },
5544c1
+    { INDEX_op_qemu_ld8s, { "r", "L" } },
5544c1
+    { INDEX_op_qemu_ld16u, { "r", "L" } },
5544c1
+    { INDEX_op_qemu_ld16s, { "r", "L" } },
5544c1
+    { INDEX_op_qemu_ld32, { "r", "L" } },
5544c1
+    { INDEX_op_qemu_ld32u, { "r", "L" } },
5544c1
+    { INDEX_op_qemu_ld32s, { "r", "L" } },
5544c1
+    { INDEX_op_qemu_ld64, { "r", "L" } },
5544c1
+
5544c1
+    { INDEX_op_qemu_st8, { "L", "L" } },
5544c1
+    { INDEX_op_qemu_st16, { "L", "L" } },
5544c1
+    { INDEX_op_qemu_st32, { "L", "L" } },
5544c1
+    { INDEX_op_qemu_st64, { "L", "L" } },
5544c1
+#elif TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
5544c1
+    { INDEX_op_qemu_ld8u, { "r", "L" } },
5544c1
+    { INDEX_op_qemu_ld8s, { "r", "L" } },
5544c1
+    { INDEX_op_qemu_ld16u, { "r", "L" } },
5544c1
+    { INDEX_op_qemu_ld16s, { "r", "L" } },
5544c1
+    { INDEX_op_qemu_ld32, { "r", "L" } },
5544c1
+    { INDEX_op_qemu_ld64, { "r", "r", "L" } },
5544c1
+
5544c1
+    { INDEX_op_qemu_st8, { "L", "L" } },
5544c1
+    { INDEX_op_qemu_st16, { "L", "L" } },
5544c1
+    { INDEX_op_qemu_st32, { "L", "L" } },
5544c1
     { INDEX_op_qemu_st64, { "L", "L", "L" } },
5544c1
+#else
5544c1
+    { INDEX_op_qemu_ld8u, { "r", "L", "L" } },
5544c1
+    { INDEX_op_qemu_ld8s, { "r", "L", "L" } },
5544c1
+    { INDEX_op_qemu_ld16u, { "r", "L", "L" } },
5544c1
+    { INDEX_op_qemu_ld16s, { "r", "L", "L" } },
5544c1
+    { INDEX_op_qemu_ld32, { "r", "L", "L" } },
5544c1
+    { INDEX_op_qemu_ld64, { "L", "L", "L", "L" } },
5544c1
+
5544c1
+    { INDEX_op_qemu_st8, { "L", "L", "L" } },
5544c1
+    { INDEX_op_qemu_st16, { "L", "L", "L" } },
5544c1
+    { INDEX_op_qemu_st32, { "L", "L", "L" } },
5544c1
+    { INDEX_op_qemu_st64, { "L", "L", "L", "L" } },
5544c1
 #endif
5544c1
+
5544c1
     { -1 },
5544c1
 };
5544c1
 
5544c1
-- 
5544c1
1.7.12.1
5544c1