Blame 0057-tcg-hppa-Fix-broken-load-store-helpers.patch

5544c1
From c13ecfea174994d3f7f7d392f0faaed6d40efd9e Mon Sep 17 00:00:00 2001
5544c1
From: Richard Henderson <rth@twiddle.net>
5544c1
Date: Tue, 18 Sep 2012 19:59:48 -0700
5544c1
Subject: [PATCH] tcg-hppa: Fix broken load/store helpers
5544c1
5544c1
The CONFIG_TCG_PASS_AREG0 code for calling ld/st helpers
5544c1
was not respecting the ABI requirement for 64-bit values
5544c1
being aligned in registers.
5544c1
5544c1
Mirror the ARM port in use of helper functions to marshal
5544c1
arguments into the correct registers.
5544c1
5544c1
Signed-off-by: Richard Henderson <rth@twiddle.net>
5544c1
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
5544c1
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
5544c1
---
5544c1
 tcg/hppa/tcg-target.c | 136 +++++++++++++++++++++++++++-----------------------
5544c1
 1 file changed, 74 insertions(+), 62 deletions(-)
5544c1
5544c1
diff --git a/tcg/hppa/tcg-target.c b/tcg/hppa/tcg-target.c
5544c1
index a76569d..5385d45 100644
5544c1
--- a/tcg/hppa/tcg-target.c
5544c1
+++ b/tcg/hppa/tcg-target.c
5544c1
@@ -976,10 +976,11 @@ static int tcg_out_tlb_read(TCGContext *s, int r0, int r1, int addrlo,
5544c1
         tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R20, r1, offset);
5544c1
     }
5544c1
 
5544c1
-    /* Compute the value that ought to appear in the TLB for a hit, namely, the page
5544c1
-       of the address.  We include the low N bits of the address to catch unaligned
5544c1
-       accesses and force them onto the slow path.  Do this computation after having
5544c1
-       issued the load from the TLB slot to give the load time to complete.  */
5544c1
+    /* Compute the value that ought to appear in the TLB for a hit, namely,
5544c1
+       the page of the address.  We include the low N bits of the address
5544c1
+       to catch unaligned accesses and force them onto the slow path.  Do
5544c1
+       this computation after having issued the load from the TLB slot to
5544c1
+       give the load time to complete.  */
5544c1
     tcg_out_andi(s, r0, addrlo, TARGET_PAGE_MASK | ((1 << s_bits) - 1));
5544c1
 
5544c1
     /* If not equal, jump to lab_miss. */
5544c1
@@ -992,6 +993,36 @@ static int tcg_out_tlb_read(TCGContext *s, int r0, int r1, int addrlo,
5544c1
 
5544c1
     return ret;
5544c1
 }
5544c1
+
5544c1
+static int tcg_out_arg_reg32(TCGContext *s, int argno, TCGArg v, bool vconst)
5544c1
+{
5544c1
+    if (argno < 4) {
5544c1
+        if (vconst) {
5544c1
+            tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[argno], v);
5544c1
+        } else {
5544c1
+            tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[argno], v);
5544c1
+        }
5544c1
+    } else {
5544c1
+        if (vconst && v != 0) {
5544c1
+            tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R20, v);
5544c1
+            v = TCG_REG_R20;
5544c1
+        }
5544c1
+        tcg_out_st(s, TCG_TYPE_I32, v, TCG_REG_CALL_STACK,
5544c1
+                   TCG_TARGET_CALL_STACK_OFFSET - ((argno - 3) * 4));
5544c1
+    }
5544c1
+    return argno + 1;
5544c1
+}
5544c1
+
5544c1
+static int tcg_out_arg_reg64(TCGContext *s, int argno, TCGArg vl, TCGArg vh)
5544c1
+{
5544c1
+    /* 64-bit arguments must go in even reg pairs and stack slots.  */
5544c1
+    if (argno & 1) {
5544c1
+        argno++;
5544c1
+    }
5544c1
+    argno = tcg_out_arg_reg32(s, argno, vl, false);
5544c1
+    argno = tcg_out_arg_reg32(s, argno, vh, false);
5544c1
+    return argno;
5544c1
+}
5544c1
 #endif
5544c1
 
5544c1
 static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo_reg, int datahi_reg,
5544c1
@@ -1072,39 +1103,36 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
5544c1
     /* Note that addrhi_reg is only used for 64-bit guests.  */
5544c1
     int addrhi_reg = (TARGET_LONG_BITS == 64 ? *args++ : TCG_REG_R0);
5544c1
     int mem_index = *args;
5544c1
-    int lab1, lab2, argreg, offset;
5544c1
+    int lab1, lab2, argno, offset;
5544c1
 
5544c1
     lab1 = gen_new_label();
5544c1
     lab2 = gen_new_label();
5544c1
 
5544c1
     offset = offsetof(CPUArchState, tlb_table[mem_index][0].addr_read);
5544c1
-    offset = tcg_out_tlb_read(s, TCG_REG_R26, TCG_REG_R25, addrlo_reg, addrhi_reg,
5544c1
-                              opc & 3, lab1, offset);
5544c1
+    offset = tcg_out_tlb_read(s, TCG_REG_R26, TCG_REG_R25, addrlo_reg,
5544c1
+                              addrhi_reg, opc & 3, lab1, offset);
5544c1
 
5544c1
     /* TLB Hit.  */
5544c1
-    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R20, (offset ? TCG_REG_R1 : TCG_REG_R25),
5544c1
+    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R20,
5544c1
+               (offset ? TCG_REG_R1 : TCG_REG_R25),
5544c1
                offsetof(CPUArchState, tlb_table[mem_index][0].addend) - offset);
5544c1
-    tcg_out_qemu_ld_direct(s, datalo_reg, datahi_reg, addrlo_reg, TCG_REG_R20, opc);
5544c1
+    tcg_out_qemu_ld_direct(s, datalo_reg, datahi_reg, addrlo_reg,
5544c1
+                           TCG_REG_R20, opc);
5544c1
     tcg_out_branch(s, lab2, 1);
5544c1
 
5544c1
     /* TLB Miss.  */
5544c1
     /* label1: */
5544c1
     tcg_out_label(s, lab1, s->code_ptr);
5544c1
 
5544c1
-    argreg = TCG_REG_R26;
5544c1
-    tcg_out_mov(s, TCG_TYPE_I32, argreg--, addrlo_reg);
5544c1
+    argno = 0;
5544c1
+    argno = tcg_out_arg_reg32(s, argno, TCG_AREG0, false);
5544c1
     if (TARGET_LONG_BITS == 64) {
5544c1
-        tcg_out_mov(s, TCG_TYPE_I32, argreg--, addrhi_reg);
5544c1
+        argno = tcg_out_arg_reg64(s, argno, addrlo_reg, addrhi_reg);
5544c1
+    } else {
5544c1
+        argno = tcg_out_arg_reg32(s, argno, addrlo_reg, false);
5544c1
     }
5544c1
-    tcg_out_movi(s, TCG_TYPE_I32, argreg, mem_index);
5544c1
-
5544c1
-    /* XXX/FIXME: suboptimal */
5544c1
-    tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2],
5544c1
-                tcg_target_call_iarg_regs[1]);
5544c1
-    tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
5544c1
-                tcg_target_call_iarg_regs[0]);
5544c1
-    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
5544c1
-                TCG_AREG0);
5544c1
+    argno = tcg_out_arg_reg32(s, argno, mem_index, true);
5544c1
+
5544c1
     tcg_out_call(s, qemu_ld_helpers[opc & 3]);
5544c1
 
5544c1
     switch (opc) {
5544c1
@@ -1140,8 +1168,8 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
5544c1
 #endif
5544c1
 }
5544c1
 
5544c1
-static void tcg_out_qemu_st_direct(TCGContext *s, int datalo_reg, int datahi_reg,
5544c1
-                                   int addr_reg, int opc)
5544c1
+static void tcg_out_qemu_st_direct(TCGContext *s, int datalo_reg,
5544c1
+                                   int datahi_reg, int addr_reg, int opc)
5544c1
 {
5544c1
 #ifdef TARGET_WORDS_BIGENDIAN
5544c1
     const int bswap = 0;
5544c1
@@ -1194,17 +1222,18 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
5544c1
     /* Note that addrhi_reg is only used for 64-bit guests.  */
5544c1
     int addrhi_reg = (TARGET_LONG_BITS == 64 ? *args++ : TCG_REG_R0);
5544c1
     int mem_index = *args;
5544c1
-    int lab1, lab2, argreg, offset;
5544c1
+    int lab1, lab2, argno, next, offset;
5544c1
 
5544c1
     lab1 = gen_new_label();
5544c1
     lab2 = gen_new_label();
5544c1
 
5544c1
     offset = offsetof(CPUArchState, tlb_table[mem_index][0].addr_write);
5544c1
-    offset = tcg_out_tlb_read(s, TCG_REG_R26, TCG_REG_R25, addrlo_reg, addrhi_reg,
5544c1
-                              opc, lab1, offset);
5544c1
+    offset = tcg_out_tlb_read(s, TCG_REG_R26, TCG_REG_R25, addrlo_reg,
5544c1
+                              addrhi_reg, opc, lab1, offset);
5544c1
 
5544c1
     /* TLB Hit.  */
5544c1
-    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R20, (offset ? TCG_REG_R1 : TCG_REG_R25),
5544c1
+    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R20,
5544c1
+               (offset ? TCG_REG_R1 : TCG_REG_R25),
5544c1
                offsetof(CPUArchState, tlb_table[mem_index][0].addend) - offset);
5544c1
 
5544c1
     /* There are no indexed stores, so we must do this addition explitly.
5544c1
@@ -1217,63 +1246,46 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
5544c1
     /* label1: */
5544c1
     tcg_out_label(s, lab1, s->code_ptr);
5544c1
 
5544c1
-    argreg = TCG_REG_R26;
5544c1
-    tcg_out_mov(s, TCG_TYPE_I32, argreg--, addrlo_reg);
5544c1
+    argno = 0;
5544c1
+    argno = tcg_out_arg_reg32(s, argno, TCG_AREG0, false);
5544c1
     if (TARGET_LONG_BITS == 64) {
5544c1
-        tcg_out_mov(s, TCG_TYPE_I32, argreg--, addrhi_reg);
5544c1
+        argno = tcg_out_arg_reg64(s, argno, addrlo_reg, addrhi_reg);
5544c1
+    } else {
5544c1
+        argno = tcg_out_arg_reg32(s, argno, addrlo_reg, false);
5544c1
     }
5544c1
 
5544c1
+    next = (argno < 4 ? tcg_target_call_iarg_regs[argno] : TCG_REG_R20);
5544c1
     switch(opc) {
5544c1
     case 0:
5544c1
-        tcg_out_andi(s, argreg--, datalo_reg, 0xff);
5544c1
-        tcg_out_movi(s, TCG_TYPE_I32, argreg, mem_index);
5544c1
+        tcg_out_andi(s, next, datalo_reg, 0xff);
5544c1
+        argno = tcg_out_arg_reg32(s, argno, next, false);
5544c1
         break;
5544c1
     case 1:
5544c1
-        tcg_out_andi(s, argreg--, datalo_reg, 0xffff);
5544c1
-        tcg_out_movi(s, TCG_TYPE_I32, argreg, mem_index);
5544c1
+        tcg_out_andi(s, next, datalo_reg, 0xffff);
5544c1
+        argno = tcg_out_arg_reg32(s, argno, next, false);
5544c1
         break;
5544c1
     case 2:
5544c1
-        tcg_out_mov(s, TCG_TYPE_I32, argreg--, datalo_reg);
5544c1
-        tcg_out_movi(s, TCG_TYPE_I32, argreg, mem_index);
5544c1
+        argno = tcg_out_arg_reg32(s, argno, datalo_reg, false);
5544c1
         break;
5544c1
     case 3:
5544c1
-        /* Because of the alignment required by the 64-bit data argument,
5544c1
-           we will always use R23/R24.  Also, we will always run out of
5544c1
-           argument registers for storing mem_index, so that will have
5544c1
-           to go on the stack.  */
5544c1
-        if (mem_index == 0) {
5544c1
-            argreg = TCG_REG_R0;
5544c1
-        } else {
5544c1
-            argreg = TCG_REG_R20;
5544c1
-            tcg_out_movi(s, TCG_TYPE_I32, argreg, mem_index);
5544c1
-        }
5544c1
-        tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R23, datahi_reg);
5544c1
-        tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R24, datalo_reg);
5544c1
-        tcg_out_st(s, TCG_TYPE_I32, argreg, TCG_REG_CALL_STACK,
5544c1
-                   TCG_TARGET_CALL_STACK_OFFSET - 4);
5544c1
+        argno = tcg_out_arg_reg64(s, argno, datalo_reg, datahi_reg);
5544c1
         break;
5544c1
     default:
5544c1
         tcg_abort();
5544c1
     }
5544c1
+    argno = tcg_out_arg_reg32(s, argno, mem_index, true);
5544c1
 
5544c1
-    /* XXX/FIXME: suboptimal */
5544c1
-    tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3],
5544c1
-                tcg_target_call_iarg_regs[2]);
5544c1
-    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
5544c1
-                tcg_target_call_iarg_regs[1]);
5544c1
-    tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
5544c1
-                tcg_target_call_iarg_regs[0]);
5544c1
-    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
5544c1
-                TCG_AREG0);
5544c1
     tcg_out_call(s, qemu_st_helpers[opc]);
5544c1
 
5544c1
     /* label2: */
5544c1
     tcg_out_label(s, lab2, s->code_ptr);
5544c1
 #else
5544c1
-    /* There are no indexed stores, so if GUEST_BASE is set we must do the add
5544c1
-       explicitly.  Careful to avoid R20, which is used for the bswaps to follow.  */
5544c1
+    /* There are no indexed stores, so if GUEST_BASE is set we must do
5544c1
+       the add explicitly.  Careful to avoid R20, which is used for the
5544c1
+       bswaps to follow.  */
5544c1
     if (GUEST_BASE != 0) {
5544c1
-        tcg_out_arith(s, TCG_REG_R31, addrlo_reg, TCG_GUEST_BASE_REG, INSN_ADDL);
5544c1
+        tcg_out_arith(s, TCG_REG_R31, addrlo_reg,
5544c1
+                      TCG_GUEST_BASE_REG, INSN_ADDL);
5544c1
         addrlo_reg = TCG_REG_R31;
5544c1
     }
5544c1
     tcg_out_qemu_st_direct(s, datalo_reg, datahi_reg, addrlo_reg, opc);
5544c1
-- 
5544c1
1.7.12.1
5544c1