Blame 0064-tcg-mips-optimize-bswap-16-16s-32-on-MIPS32R2.patch

5544c1
From 879794c3d3974b1206bbc52011c8f2525709f396 Mon Sep 17 00:00:00 2001
5544c1
From: Aurelien Jarno <aurelien@aurel32.net>
5544c1
Date: Fri, 21 Sep 2012 18:20:26 +0200
5544c1
Subject: [PATCH] tcg/mips: optimize bswap{16,16s,32} on MIPS32R2
5544c1
5544c1
bswap operations can be optimized on MIPS32 Release 2 using the ROTR,
5544c1
WSBH and SEH instructions. We can't use the non-R2 code to implement the
5544c1
ops due to registers constraints, so don't define the corresponding
5544c1
TCG_TARGET_HAS_bswap* values.
5544c1
5544c1
Also bswap16* operations are supposed to be called with the 16 high bits
5544c1
zeroed. This is the case everywhere (including for TCG by definition)
5544c1
except when called from the store helper. Remove the AND instructions from
5544c1
bswap16* and move it there.
5544c1
5544c1
Reviewed-by: Richard Henderson <rth@twiddle.net>
5544c1
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
5544c1
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
5544c1
---
5544c1
 tcg/mips/tcg-target.c | 34 +++++++++++++++++++++++++++++-----
5544c1
 tcg/mips/tcg-target.h | 11 +++++++++--
5544c1
 2 files changed, 38 insertions(+), 7 deletions(-)
5544c1
5544c1
diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c
5544c1
index 6aa4527..8b2f9fc 100644
5544c1
--- a/tcg/mips/tcg-target.c
5544c1
+++ b/tcg/mips/tcg-target.c
5544c1
@@ -326,6 +326,7 @@ enum {
5544c1
     OPC_BGEZ     = OPC_REGIMM | (0x01 << 16),
5544c1
 
5544c1
     OPC_SPECIAL3 = 0x1f << 26,
5544c1
+    OPC_WSBH     = OPC_SPECIAL3 | 0x0a0,
5544c1
     OPC_SEB      = OPC_SPECIAL3 | 0x420,
5544c1
     OPC_SEH      = OPC_SPECIAL3 | 0x620,
5544c1
 };
5544c1
@@ -419,36 +420,45 @@ static inline void tcg_out_movi(TCGContext *s, TCGType type,
5544c1
 
5544c1
 static inline void tcg_out_bswap16(TCGContext *s, TCGReg ret, TCGReg arg)
5544c1
 {
5544c1
+#ifdef _MIPS_ARCH_MIPS32R2
5544c1
+    tcg_out_opc_reg(s, OPC_WSBH, ret, 0, arg);
5544c1
+#else
5544c1
     /* ret and arg can't be register at */
5544c1
     if (ret == TCG_REG_AT || arg == TCG_REG_AT) {
5544c1
         tcg_abort();
5544c1
     }
5544c1
 
5544c1
     tcg_out_opc_sa(s, OPC_SRL, TCG_REG_AT, arg, 8);
5544c1
-    tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_AT, TCG_REG_AT, 0x00ff);
5544c1
-
5544c1
     tcg_out_opc_sa(s, OPC_SLL, ret, arg, 8);
5544c1
     tcg_out_opc_imm(s, OPC_ANDI, ret, ret, 0xff00);
5544c1
     tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_REG_AT);
5544c1
+#endif
5544c1
 }
5544c1
 
5544c1
 static inline void tcg_out_bswap16s(TCGContext *s, TCGReg ret, TCGReg arg)
5544c1
 {
5544c1
+#ifdef _MIPS_ARCH_MIPS32R2
5544c1
+    tcg_out_opc_reg(s, OPC_WSBH, ret, 0, arg);
5544c1
+    tcg_out_opc_reg(s, OPC_SEH, ret, 0, ret);
5544c1
+#else
5544c1
     /* ret and arg can't be register at */
5544c1
     if (ret == TCG_REG_AT || arg == TCG_REG_AT) {
5544c1
         tcg_abort();
5544c1
     }
5544c1
 
5544c1
     tcg_out_opc_sa(s, OPC_SRL, TCG_REG_AT, arg, 8);
5544c1
-    tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_AT, TCG_REG_AT, 0xff);
5544c1
-
5544c1
     tcg_out_opc_sa(s, OPC_SLL, ret, arg, 24);
5544c1
     tcg_out_opc_sa(s, OPC_SRA, ret, ret, 16);
5544c1
     tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_REG_AT);
5544c1
+#endif
5544c1
 }
5544c1
 
5544c1
 static inline void tcg_out_bswap32(TCGContext *s, TCGReg ret, TCGReg arg)
5544c1
 {
5544c1
+#ifdef _MIPS_ARCH_MIPS32R2
5544c1
+    tcg_out_opc_reg(s, OPC_WSBH, ret, 0, arg);
5544c1
+    tcg_out_opc_sa(s, OPC_ROTR, ret, ret, 16);
5544c1
+#else
5544c1
     /* ret and arg must be different and can't be register at */
5544c1
     if (ret == arg || ret == TCG_REG_AT || arg == TCG_REG_AT) {
5544c1
         tcg_abort();
5544c1
@@ -466,6 +476,7 @@ static inline void tcg_out_bswap32(TCGContext *s, TCGReg ret, TCGReg arg)
5544c1
     tcg_out_opc_sa(s, OPC_SRL, TCG_REG_AT, arg, 8);
5544c1
     tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_AT, TCG_REG_AT, 0xff00);
5544c1
     tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_REG_AT);
5544c1
+#endif
5544c1
 }
5544c1
 
5544c1
 static inline void tcg_out_ext8s(TCGContext *s, TCGReg ret, TCGReg arg)
5544c1
@@ -1188,7 +1199,8 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
5544c1
         break;
5544c1
     case 1:
5544c1
         if (TCG_NEED_BSWAP) {
5544c1
-            tcg_out_bswap16(s, TCG_REG_T0, data_reg1);
5544c1
+            tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_T0, data_reg1, 0xffff);
5544c1
+            tcg_out_bswap16(s, TCG_REG_T0, TCG_REG_T0);
5544c1
             tcg_out_opc_imm(s, OPC_SH, TCG_REG_T0, TCG_REG_A0, 0);
5544c1
         } else {
5544c1
             tcg_out_opc_imm(s, OPC_SH, data_reg1, TCG_REG_A0, 0);
5544c1
@@ -1409,6 +1421,15 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
5544c1
         }
5544c1
         break;
5544c1
 
5544c1
+    /* The bswap routines do not work on non-R2 CPU. In that case
5544c1
+       we let TCG generating the corresponding code. */
5544c1
+    case INDEX_op_bswap16_i32:
5544c1
+        tcg_out_bswap16(s, args[0], args[1]);
5544c1
+        break;
5544c1
+    case INDEX_op_bswap32_i32:
5544c1
+        tcg_out_bswap32(s, args[0], args[1]);
5544c1
+        break;
5544c1
+
5544c1
     case INDEX_op_ext8s_i32:
5544c1
         tcg_out_ext8s(s, args[0], args[1]);
5544c1
         break;
5544c1
@@ -1503,6 +1524,9 @@ static const TCGTargetOpDef mips_op_defs[] = {
5544c1
     { INDEX_op_shr_i32, { "r", "rZ", "ri" } },
5544c1
     { INDEX_op_sar_i32, { "r", "rZ", "ri" } },
5544c1
 
5544c1
+    { INDEX_op_bswap16_i32, { "r", "r" } },
5544c1
+    { INDEX_op_bswap32_i32, { "r", "r" } },
5544c1
+
5544c1
     { INDEX_op_ext8s_i32, { "r", "rZ" } },
5544c1
     { INDEX_op_ext16s_i32, { "r", "rZ" } },
5544c1
 
5544c1
diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
5544c1
index 9c68a32..c5c13f7 100644
5544c1
--- a/tcg/mips/tcg-target.h
5544c1
+++ b/tcg/mips/tcg-target.h
5544c1
@@ -83,8 +83,6 @@ typedef enum {
5544c1
 #define TCG_TARGET_HAS_rot_i32          0
5544c1
 #define TCG_TARGET_HAS_ext8s_i32        1
5544c1
 #define TCG_TARGET_HAS_ext16s_i32       1
5544c1
-#define TCG_TARGET_HAS_bswap32_i32      0
5544c1
-#define TCG_TARGET_HAS_bswap16_i32      0
5544c1
 #define TCG_TARGET_HAS_andc_i32         0
5544c1
 #define TCG_TARGET_HAS_orc_i32          0
5544c1
 #define TCG_TARGET_HAS_eqv_i32          0
5544c1
@@ -92,6 +90,15 @@ typedef enum {
5544c1
 #define TCG_TARGET_HAS_deposit_i32      0
5544c1
 #define TCG_TARGET_HAS_movcond_i32      0
5544c1
 
5544c1
+/* optional instructions only implemented on MIPS32R2 */
5544c1
+#ifdef _MIPS_ARCH_MIPS32R2
5544c1
+#define TCG_TARGET_HAS_bswap16_i32      1
5544c1
+#define TCG_TARGET_HAS_bswap32_i32      1
5544c1
+#else
5544c1
+#define TCG_TARGET_HAS_bswap16_i32      0
5544c1
+#define TCG_TARGET_HAS_bswap32_i32      0
5544c1
+#endif
5544c1
+
5544c1
 /* optional instructions automatically implemented */
5544c1
 #define TCG_TARGET_HAS_neg_i32          0 /* sub  rd, zero, rt   */
5544c1
 #define TCG_TARGET_HAS_ext8u_i32        0 /* andi rt, rs, 0xff   */
5544c1
-- 
5544c1
1.7.12.1
5544c1