Blob Blame History Raw
From c13ecfea174994d3f7f7d392f0faaed6d40efd9e Mon Sep 17 00:00:00 2001
From: Richard Henderson <rth@twiddle.net>
Date: Tue, 18 Sep 2012 19:59:48 -0700
Subject: [PATCH] tcg-hppa: Fix broken load/store helpers

The CONFIG_TCG_PASS_AREG0 code for calling ld/st helpers
was not respecting the ABI requirement for 64-bit values
being aligned in registers.

Mirror the ARM port in use of helper functions to marshal
arguments into the correct registers.

Signed-off-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
---
 tcg/hppa/tcg-target.c | 136 +++++++++++++++++++++++++++-----------------------
 1 file changed, 74 insertions(+), 62 deletions(-)

diff --git a/tcg/hppa/tcg-target.c b/tcg/hppa/tcg-target.c
index a76569d..5385d45 100644
--- a/tcg/hppa/tcg-target.c
+++ b/tcg/hppa/tcg-target.c
@@ -976,10 +976,11 @@ static int tcg_out_tlb_read(TCGContext *s, int r0, int r1, int addrlo,
         tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R20, r1, offset);
     }
 
-    /* Compute the value that ought to appear in the TLB for a hit, namely, the page
-       of the address.  We include the low N bits of the address to catch unaligned
-       accesses and force them onto the slow path.  Do this computation after having
-       issued the load from the TLB slot to give the load time to complete.  */
+    /* Compute the value that ought to appear in the TLB for a hit, namely,
+       the page of the address.  We include the low N bits of the address
+       to catch unaligned accesses and force them onto the slow path.  Do
+       this computation after having issued the load from the TLB slot to
+       give the load time to complete.  */
     tcg_out_andi(s, r0, addrlo, TARGET_PAGE_MASK | ((1 << s_bits) - 1));
 
     /* If not equal, jump to lab_miss. */
@@ -992,6 +993,36 @@ static int tcg_out_tlb_read(TCGContext *s, int r0, int r1, int addrlo,
 
     return ret;
 }
+
+static int tcg_out_arg_reg32(TCGContext *s, int argno, TCGArg v, bool vconst)
+{
+    if (argno < 4) {
+        if (vconst) {
+            tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[argno], v);
+        } else {
+            tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[argno], v);
+        }
+    } else {
+        if (vconst && v != 0) {
+            tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R20, v);
+            v = TCG_REG_R20;
+        }
+        tcg_out_st(s, TCG_TYPE_I32, v, TCG_REG_CALL_STACK,
+                   TCG_TARGET_CALL_STACK_OFFSET - ((argno - 3) * 4));
+    }
+    return argno + 1;
+}
+
+static int tcg_out_arg_reg64(TCGContext *s, int argno, TCGArg vl, TCGArg vh)
+{
+    /* 64-bit arguments must go in even reg pairs and stack slots.  */
+    if (argno & 1) {
+        argno++;
+    }
+    argno = tcg_out_arg_reg32(s, argno, vl, false);
+    argno = tcg_out_arg_reg32(s, argno, vh, false);
+    return argno;
+}
 #endif
 
 static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo_reg, int datahi_reg,
@@ -1072,39 +1103,36 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
     /* Note that addrhi_reg is only used for 64-bit guests.  */
     int addrhi_reg = (TARGET_LONG_BITS == 64 ? *args++ : TCG_REG_R0);
     int mem_index = *args;
-    int lab1, lab2, argreg, offset;
+    int lab1, lab2, argno, offset;
 
     lab1 = gen_new_label();
     lab2 = gen_new_label();
 
     offset = offsetof(CPUArchState, tlb_table[mem_index][0].addr_read);
-    offset = tcg_out_tlb_read(s, TCG_REG_R26, TCG_REG_R25, addrlo_reg, addrhi_reg,
-                              opc & 3, lab1, offset);
+    offset = tcg_out_tlb_read(s, TCG_REG_R26, TCG_REG_R25, addrlo_reg,
+                              addrhi_reg, opc & 3, lab1, offset);
 
     /* TLB Hit.  */
-    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R20, (offset ? TCG_REG_R1 : TCG_REG_R25),
+    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R20,
+               (offset ? TCG_REG_R1 : TCG_REG_R25),
                offsetof(CPUArchState, tlb_table[mem_index][0].addend) - offset);
-    tcg_out_qemu_ld_direct(s, datalo_reg, datahi_reg, addrlo_reg, TCG_REG_R20, opc);
+    tcg_out_qemu_ld_direct(s, datalo_reg, datahi_reg, addrlo_reg,
+                           TCG_REG_R20, opc);
     tcg_out_branch(s, lab2, 1);
 
     /* TLB Miss.  */
     /* label1: */
     tcg_out_label(s, lab1, s->code_ptr);
 
-    argreg = TCG_REG_R26;
-    tcg_out_mov(s, TCG_TYPE_I32, argreg--, addrlo_reg);
+    argno = 0;
+    argno = tcg_out_arg_reg32(s, argno, TCG_AREG0, false);
     if (TARGET_LONG_BITS == 64) {
-        tcg_out_mov(s, TCG_TYPE_I32, argreg--, addrhi_reg);
+        argno = tcg_out_arg_reg64(s, argno, addrlo_reg, addrhi_reg);
+    } else {
+        argno = tcg_out_arg_reg32(s, argno, addrlo_reg, false);
     }
-    tcg_out_movi(s, TCG_TYPE_I32, argreg, mem_index);
-
-    /* XXX/FIXME: suboptimal */
-    tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2],
-                tcg_target_call_iarg_regs[1]);
-    tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
-                tcg_target_call_iarg_regs[0]);
-    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
-                TCG_AREG0);
+    argno = tcg_out_arg_reg32(s, argno, mem_index, true);
+
     tcg_out_call(s, qemu_ld_helpers[opc & 3]);
 
     switch (opc) {
@@ -1140,8 +1168,8 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
 #endif
 }
 
-static void tcg_out_qemu_st_direct(TCGContext *s, int datalo_reg, int datahi_reg,
-                                   int addr_reg, int opc)
+static void tcg_out_qemu_st_direct(TCGContext *s, int datalo_reg,
+                                   int datahi_reg, int addr_reg, int opc)
 {
 #ifdef TARGET_WORDS_BIGENDIAN
     const int bswap = 0;
@@ -1194,17 +1222,18 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
     /* Note that addrhi_reg is only used for 64-bit guests.  */
     int addrhi_reg = (TARGET_LONG_BITS == 64 ? *args++ : TCG_REG_R0);
     int mem_index = *args;
-    int lab1, lab2, argreg, offset;
+    int lab1, lab2, argno, next, offset;
 
     lab1 = gen_new_label();
     lab2 = gen_new_label();
 
     offset = offsetof(CPUArchState, tlb_table[mem_index][0].addr_write);
-    offset = tcg_out_tlb_read(s, TCG_REG_R26, TCG_REG_R25, addrlo_reg, addrhi_reg,
-                              opc, lab1, offset);
+    offset = tcg_out_tlb_read(s, TCG_REG_R26, TCG_REG_R25, addrlo_reg,
+                              addrhi_reg, opc, lab1, offset);
 
     /* TLB Hit.  */
-    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R20, (offset ? TCG_REG_R1 : TCG_REG_R25),
+    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R20,
+               (offset ? TCG_REG_R1 : TCG_REG_R25),
                offsetof(CPUArchState, tlb_table[mem_index][0].addend) - offset);
 
     /* There are no indexed stores, so we must do this addition explitly.
@@ -1217,63 +1246,46 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
     /* label1: */
     tcg_out_label(s, lab1, s->code_ptr);
 
-    argreg = TCG_REG_R26;
-    tcg_out_mov(s, TCG_TYPE_I32, argreg--, addrlo_reg);
+    argno = 0;
+    argno = tcg_out_arg_reg32(s, argno, TCG_AREG0, false);
     if (TARGET_LONG_BITS == 64) {
-        tcg_out_mov(s, TCG_TYPE_I32, argreg--, addrhi_reg);
+        argno = tcg_out_arg_reg64(s, argno, addrlo_reg, addrhi_reg);
+    } else {
+        argno = tcg_out_arg_reg32(s, argno, addrlo_reg, false);
     }
 
+    next = (argno < 4 ? tcg_target_call_iarg_regs[argno] : TCG_REG_R20);
     switch(opc) {
     case 0:
-        tcg_out_andi(s, argreg--, datalo_reg, 0xff);
-        tcg_out_movi(s, TCG_TYPE_I32, argreg, mem_index);
+        tcg_out_andi(s, next, datalo_reg, 0xff);
+        argno = tcg_out_arg_reg32(s, argno, next, false);
         break;
     case 1:
-        tcg_out_andi(s, argreg--, datalo_reg, 0xffff);
-        tcg_out_movi(s, TCG_TYPE_I32, argreg, mem_index);
+        tcg_out_andi(s, next, datalo_reg, 0xffff);
+        argno = tcg_out_arg_reg32(s, argno, next, false);
         break;
     case 2:
-        tcg_out_mov(s, TCG_TYPE_I32, argreg--, datalo_reg);
-        tcg_out_movi(s, TCG_TYPE_I32, argreg, mem_index);
+        argno = tcg_out_arg_reg32(s, argno, datalo_reg, false);
         break;
     case 3:
-        /* Because of the alignment required by the 64-bit data argument,
-           we will always use R23/R24.  Also, we will always run out of
-           argument registers for storing mem_index, so that will have
-           to go on the stack.  */
-        if (mem_index == 0) {
-            argreg = TCG_REG_R0;
-        } else {
-            argreg = TCG_REG_R20;
-            tcg_out_movi(s, TCG_TYPE_I32, argreg, mem_index);
-        }
-        tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R23, datahi_reg);
-        tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R24, datalo_reg);
-        tcg_out_st(s, TCG_TYPE_I32, argreg, TCG_REG_CALL_STACK,
-                   TCG_TARGET_CALL_STACK_OFFSET - 4);
+        argno = tcg_out_arg_reg64(s, argno, datalo_reg, datahi_reg);
         break;
     default:
         tcg_abort();
     }
+    argno = tcg_out_arg_reg32(s, argno, mem_index, true);
 
-    /* XXX/FIXME: suboptimal */
-    tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3],
-                tcg_target_call_iarg_regs[2]);
-    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
-                tcg_target_call_iarg_regs[1]);
-    tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
-                tcg_target_call_iarg_regs[0]);
-    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
-                TCG_AREG0);
     tcg_out_call(s, qemu_st_helpers[opc]);
 
     /* label2: */
     tcg_out_label(s, lab2, s->code_ptr);
 #else
-    /* There are no indexed stores, so if GUEST_BASE is set we must do the add
-       explicitly.  Careful to avoid R20, which is used for the bswaps to follow.  */
+    /* There are no indexed stores, so if GUEST_BASE is set we must do
+       the add explicitly.  Careful to avoid R20, which is used for the
+       bswaps to follow.  */
     if (GUEST_BASE != 0) {
-        tcg_out_arith(s, TCG_REG_R31, addrlo_reg, TCG_GUEST_BASE_REG, INSN_ADDL);
+        tcg_out_arith(s, TCG_REG_R31, addrlo_reg,
+                      TCG_GUEST_BASE_REG, INSN_ADDL);
         addrlo_reg = TCG_REG_R31;
     }
     tcg_out_qemu_st_direct(s, datalo_reg, datahi_reg, addrlo_reg, opc);
-- 
1.7.12.1