|
|
5544c1 |
From 879794c3d3974b1206bbc52011c8f2525709f396 Mon Sep 17 00:00:00 2001
|
|
|
5544c1 |
From: Aurelien Jarno <aurelien@aurel32.net>
|
|
|
5544c1 |
Date: Fri, 21 Sep 2012 18:20:26 +0200
|
|
|
5544c1 |
Subject: [PATCH] tcg/mips: optimize bswap{16,16s,32} on MIPS32R2
|
|
|
5544c1 |
|
|
|
5544c1 |
bswap operations can be optimized on MIPS32 Release 2 using the ROTR,
|
|
|
5544c1 |
WSBH and SEH instructions. We can't use the non-R2 code to implement the
|
|
|
5544c1 |
ops due to registers constraints, so don't define the corresponding
|
|
|
5544c1 |
TCG_TARGET_HAS_bswap* values.
|
|
|
5544c1 |
|
|
|
5544c1 |
Also bswap16* operations are supposed to be called with the 16 high bits
|
|
|
5544c1 |
zeroed. This is the case everywhere (including for TCG by definition)
|
|
|
5544c1 |
except when called from the store helper. Remove the AND instructions from
|
|
|
5544c1 |
bswap16* and move it there.
|
|
|
5544c1 |
|
|
|
5544c1 |
Reviewed-by: Richard Henderson <rth@twiddle.net>
|
|
|
5544c1 |
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
|
|
|
5544c1 |
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
|
|
|
5544c1 |
|
|
|
5544c1 |
tcg/mips/tcg-target.c | 34 +++++++++++++++++++++++++++++
|
|
|
5544c1 |
tcg/mips/tcg-target.h | 11 +++++++++
|
|
|
5544c1 |
2 files changed, 38 insertions(+), 7 deletions(-)
|
|
|
5544c1 |
|
|
|
5544c1 |
diff
|
|
|
5544c1 |
index 6aa4527..8b2f9fc 100644
|
|
|
5544c1 |
|
|
|
5544c1 |
|
|
|
5544c1 |
@@ -326,6 +326,7 @@ enum {
|
|
|
5544c1 |
OPC_BGEZ = OPC_REGIMM | (0x01 << 16),
|
|
|
5544c1 |
|
|
|
5544c1 |
OPC_SPECIAL3 = 0x1f << 26,
|
|
|
5544c1 |
+ OPC_WSBH = OPC_SPECIAL3 | 0x0a0,
|
|
|
5544c1 |
OPC_SEB = OPC_SPECIAL3 | 0x420,
|
|
|
5544c1 |
OPC_SEH = OPC_SPECIAL3 | 0x620,
|
|
|
5544c1 |
};
|
|
|
5544c1 |
@@ -419,36 +420,45 @@ static inline void tcg_out_movi(TCGContext *s, TCGType type,
|
|
|
5544c1 |
|
|
|
5544c1 |
static inline void tcg_out_bswap16(TCGContext *s, TCGReg ret, TCGReg arg)
|
|
|
5544c1 |
{
|
|
|
5544c1 |
+#ifdef _MIPS_ARCH_MIPS32R2
|
|
|
5544c1 |
+ tcg_out_opc_reg(s, OPC_WSBH, ret, 0, arg);
|
|
|
5544c1 |
+#else
|
|
|
5544c1 |
/* ret and arg can't be register at */
|
|
|
5544c1 |
if (ret == TCG_REG_AT || arg == TCG_REG_AT) {
|
|
|
5544c1 |
tcg_abort();
|
|
|
5544c1 |
}
|
|
|
5544c1 |
|
|
|
5544c1 |
tcg_out_opc_sa(s, OPC_SRL, TCG_REG_AT, arg, 8);
|
|
|
5544c1 |
- tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_AT, TCG_REG_AT, 0x00ff);
|
|
|
5544c1 |
-
|
|
|
5544c1 |
tcg_out_opc_sa(s, OPC_SLL, ret, arg, 8);
|
|
|
5544c1 |
tcg_out_opc_imm(s, OPC_ANDI, ret, ret, 0xff00);
|
|
|
5544c1 |
tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_REG_AT);
|
|
|
5544c1 |
+#endif
|
|
|
5544c1 |
}
|
|
|
5544c1 |
|
|
|
5544c1 |
static inline void tcg_out_bswap16s(TCGContext *s, TCGReg ret, TCGReg arg)
|
|
|
5544c1 |
{
|
|
|
5544c1 |
+#ifdef _MIPS_ARCH_MIPS32R2
|
|
|
5544c1 |
+ tcg_out_opc_reg(s, OPC_WSBH, ret, 0, arg);
|
|
|
5544c1 |
+ tcg_out_opc_reg(s, OPC_SEH, ret, 0, ret);
|
|
|
5544c1 |
+#else
|
|
|
5544c1 |
/* ret and arg can't be register at */
|
|
|
5544c1 |
if (ret == TCG_REG_AT || arg == TCG_REG_AT) {
|
|
|
5544c1 |
tcg_abort();
|
|
|
5544c1 |
}
|
|
|
5544c1 |
|
|
|
5544c1 |
tcg_out_opc_sa(s, OPC_SRL, TCG_REG_AT, arg, 8);
|
|
|
5544c1 |
- tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_AT, TCG_REG_AT, 0xff);
|
|
|
5544c1 |
-
|
|
|
5544c1 |
tcg_out_opc_sa(s, OPC_SLL, ret, arg, 24);
|
|
|
5544c1 |
tcg_out_opc_sa(s, OPC_SRA, ret, ret, 16);
|
|
|
5544c1 |
tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_REG_AT);
|
|
|
5544c1 |
+#endif
|
|
|
5544c1 |
}
|
|
|
5544c1 |
|
|
|
5544c1 |
static inline void tcg_out_bswap32(TCGContext *s, TCGReg ret, TCGReg arg)
|
|
|
5544c1 |
{
|
|
|
5544c1 |
+#ifdef _MIPS_ARCH_MIPS32R2
|
|
|
5544c1 |
+ tcg_out_opc_reg(s, OPC_WSBH, ret, 0, arg);
|
|
|
5544c1 |
+ tcg_out_opc_sa(s, OPC_ROTR, ret, ret, 16);
|
|
|
5544c1 |
+#else
|
|
|
5544c1 |
/* ret and arg must be different and can't be register at */
|
|
|
5544c1 |
if (ret == arg || ret == TCG_REG_AT || arg == TCG_REG_AT) {
|
|
|
5544c1 |
tcg_abort();
|
|
|
5544c1 |
@@ -466,6 +476,7 @@ static inline void tcg_out_bswap32(TCGContext *s, TCGReg ret, TCGReg arg)
|
|
|
5544c1 |
tcg_out_opc_sa(s, OPC_SRL, TCG_REG_AT, arg, 8);
|
|
|
5544c1 |
tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_AT, TCG_REG_AT, 0xff00);
|
|
|
5544c1 |
tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_REG_AT);
|
|
|
5544c1 |
+#endif
|
|
|
5544c1 |
}
|
|
|
5544c1 |
|
|
|
5544c1 |
static inline void tcg_out_ext8s(TCGContext *s, TCGReg ret, TCGReg arg)
|
|
|
5544c1 |
@@ -1188,7 +1199,8 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
|
|
|
5544c1 |
break;
|
|
|
5544c1 |
case 1:
|
|
|
5544c1 |
if (TCG_NEED_BSWAP) {
|
|
|
5544c1 |
- tcg_out_bswap16(s, TCG_REG_T0, data_reg1);
|
|
|
5544c1 |
+ tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_T0, data_reg1, 0xffff);
|
|
|
5544c1 |
+ tcg_out_bswap16(s, TCG_REG_T0, TCG_REG_T0);
|
|
|
5544c1 |
tcg_out_opc_imm(s, OPC_SH, TCG_REG_T0, TCG_REG_A0, 0);
|
|
|
5544c1 |
} else {
|
|
|
5544c1 |
tcg_out_opc_imm(s, OPC_SH, data_reg1, TCG_REG_A0, 0);
|
|
|
5544c1 |
@@ -1409,6 +1421,15 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
|
|
|
5544c1 |
}
|
|
|
5544c1 |
break;
|
|
|
5544c1 |
|
|
|
5544c1 |
+ /* The bswap routines do not work on non-R2 CPU. In that case
|
|
|
5544c1 |
+ we let TCG generating the corresponding code. */
|
|
|
5544c1 |
+ case INDEX_op_bswap16_i32:
|
|
|
5544c1 |
+ tcg_out_bswap16(s, args[0], args[1]);
|
|
|
5544c1 |
+ break;
|
|
|
5544c1 |
+ case INDEX_op_bswap32_i32:
|
|
|
5544c1 |
+ tcg_out_bswap32(s, args[0], args[1]);
|
|
|
5544c1 |
+ break;
|
|
|
5544c1 |
+
|
|
|
5544c1 |
case INDEX_op_ext8s_i32:
|
|
|
5544c1 |
tcg_out_ext8s(s, args[0], args[1]);
|
|
|
5544c1 |
break;
|
|
|
5544c1 |
@@ -1503,6 +1524,9 @@ static const TCGTargetOpDef mips_op_defs[] = {
|
|
|
5544c1 |
{ INDEX_op_shr_i32, { "r", "rZ", "ri" } },
|
|
|
5544c1 |
{ INDEX_op_sar_i32, { "r", "rZ", "ri" } },
|
|
|
5544c1 |
|
|
|
5544c1 |
+ { INDEX_op_bswap16_i32, { "r", "r" } },
|
|
|
5544c1 |
+ { INDEX_op_bswap32_i32, { "r", "r" } },
|
|
|
5544c1 |
+
|
|
|
5544c1 |
{ INDEX_op_ext8s_i32, { "r", "rZ" } },
|
|
|
5544c1 |
{ INDEX_op_ext16s_i32, { "r", "rZ" } },
|
|
|
5544c1 |
|
|
|
5544c1 |
diff
|
|
|
5544c1 |
index 9c68a32..c5c13f7 100644
|
|
|
5544c1 |
|
|
|
5544c1 |
|
|
|
5544c1 |
@@ -83,8 +83,6 @@ typedef enum {
|
|
|
5544c1 |
#define TCG_TARGET_HAS_rot_i32 0
|
|
|
5544c1 |
#define TCG_TARGET_HAS_ext8s_i32 1
|
|
|
5544c1 |
#define TCG_TARGET_HAS_ext16s_i32 1
|
|
|
5544c1 |
-#define TCG_TARGET_HAS_bswap32_i32 0
|
|
|
5544c1 |
-#define TCG_TARGET_HAS_bswap16_i32 0
|
|
|
5544c1 |
#define TCG_TARGET_HAS_andc_i32 0
|
|
|
5544c1 |
#define TCG_TARGET_HAS_orc_i32 0
|
|
|
5544c1 |
#define TCG_TARGET_HAS_eqv_i32 0
|
|
|
5544c1 |
@@ -92,6 +90,15 @@ typedef enum {
|
|
|
5544c1 |
#define TCG_TARGET_HAS_deposit_i32 0
|
|
|
5544c1 |
#define TCG_TARGET_HAS_movcond_i32 0
|
|
|
5544c1 |
|
|
|
5544c1 |
+
|
|
|
5544c1 |
+#ifdef _MIPS_ARCH_MIPS32R2
|
|
|
5544c1 |
+#define TCG_TARGET_HAS_bswap16_i32 1
|
|
|
5544c1 |
+#define TCG_TARGET_HAS_bswap32_i32 1
|
|
|
5544c1 |
+#else
|
|
|
5544c1 |
+#define TCG_TARGET_HAS_bswap16_i32 0
|
|
|
5544c1 |
+#define TCG_TARGET_HAS_bswap32_i32 0
|
|
|
5544c1 |
+#endif
|
|
|
5544c1 |
+
|
|
|
5544c1 |
|
|
|
5544c1 |
#define TCG_TARGET_HAS_neg_i32 0
|
|
|
5544c1 |
#define TCG_TARGET_HAS_ext8u_i32 0
|
|
|
5544c1 |
--
|
|
|
5544c1 |
1.7.12.1
|
|
|
5544c1 |
|