From c13ecfea174994d3f7f7d392f0faaed6d40efd9e Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 18 Sep 2012 19:59:48 -0700 Subject: [PATCH] tcg-hppa: Fix broken load/store helpers The CONFIG_TCG_PASS_AREG0 code for calling ld/st helpers was not respecting the ABI requirement for 64-bit values being aligned in registers. Mirror the ARM port in use of helper functions to marshal arguments into the correct registers. Signed-off-by: Richard Henderson Signed-off-by: Aurelien Jarno Signed-off-by: Michael Roth --- tcg/hppa/tcg-target.c | 136 +++++++++++++++++++++++++++----------------------- 1 file changed, 74 insertions(+), 62 deletions(-) diff --git a/tcg/hppa/tcg-target.c b/tcg/hppa/tcg-target.c index a76569d..5385d45 100644 --- a/tcg/hppa/tcg-target.c +++ b/tcg/hppa/tcg-target.c @@ -976,10 +976,11 @@ static int tcg_out_tlb_read(TCGContext *s, int r0, int r1, int addrlo, tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R20, r1, offset); } - /* Compute the value that ought to appear in the TLB for a hit, namely, the page - of the address. We include the low N bits of the address to catch unaligned - accesses and force them onto the slow path. Do this computation after having - issued the load from the TLB slot to give the load time to complete. */ + /* Compute the value that ought to appear in the TLB for a hit, namely, + the page of the address. We include the low N bits of the address + to catch unaligned accesses and force them onto the slow path. Do + this computation after having issued the load from the TLB slot to + give the load time to complete. */ tcg_out_andi(s, r0, addrlo, TARGET_PAGE_MASK | ((1 << s_bits) - 1)); /* If not equal, jump to lab_miss. */ @@ -992,6 +993,36 @@ static int tcg_out_tlb_read(TCGContext *s, int r0, int r1, int addrlo, return ret; } + +static int tcg_out_arg_reg32(TCGContext *s, int argno, TCGArg v, bool vconst) +{ + if (argno < 4) { + if (vconst) { + tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[argno], v); + } else { + tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[argno], v); + } + } else { + if (vconst && v != 0) { + tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R20, v); + v = TCG_REG_R20; + } + tcg_out_st(s, TCG_TYPE_I32, v, TCG_REG_CALL_STACK, + TCG_TARGET_CALL_STACK_OFFSET - ((argno - 3) * 4)); + } + return argno + 1; +} + +static int tcg_out_arg_reg64(TCGContext *s, int argno, TCGArg vl, TCGArg vh) +{ + /* 64-bit arguments must go in even reg pairs and stack slots. */ + if (argno & 1) { + argno++; + } + argno = tcg_out_arg_reg32(s, argno, vl, false); + argno = tcg_out_arg_reg32(s, argno, vh, false); + return argno; +} #endif static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo_reg, int datahi_reg, @@ -1072,39 +1103,36 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc) /* Note that addrhi_reg is only used for 64-bit guests. */ int addrhi_reg = (TARGET_LONG_BITS == 64 ? *args++ : TCG_REG_R0); int mem_index = *args; - int lab1, lab2, argreg, offset; + int lab1, lab2, argno, offset; lab1 = gen_new_label(); lab2 = gen_new_label(); offset = offsetof(CPUArchState, tlb_table[mem_index][0].addr_read); - offset = tcg_out_tlb_read(s, TCG_REG_R26, TCG_REG_R25, addrlo_reg, addrhi_reg, - opc & 3, lab1, offset); + offset = tcg_out_tlb_read(s, TCG_REG_R26, TCG_REG_R25, addrlo_reg, + addrhi_reg, opc & 3, lab1, offset); /* TLB Hit. */ - tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R20, (offset ? TCG_REG_R1 : TCG_REG_R25), + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R20, + (offset ? TCG_REG_R1 : TCG_REG_R25), offsetof(CPUArchState, tlb_table[mem_index][0].addend) - offset); - tcg_out_qemu_ld_direct(s, datalo_reg, datahi_reg, addrlo_reg, TCG_REG_R20, opc); + tcg_out_qemu_ld_direct(s, datalo_reg, datahi_reg, addrlo_reg, + TCG_REG_R20, opc); tcg_out_branch(s, lab2, 1); /* TLB Miss. */ /* label1: */ tcg_out_label(s, lab1, s->code_ptr); - argreg = TCG_REG_R26; - tcg_out_mov(s, TCG_TYPE_I32, argreg--, addrlo_reg); + argno = 0; + argno = tcg_out_arg_reg32(s, argno, TCG_AREG0, false); if (TARGET_LONG_BITS == 64) { - tcg_out_mov(s, TCG_TYPE_I32, argreg--, addrhi_reg); + argno = tcg_out_arg_reg64(s, argno, addrlo_reg, addrhi_reg); + } else { + argno = tcg_out_arg_reg32(s, argno, addrlo_reg, false); } - tcg_out_movi(s, TCG_TYPE_I32, argreg, mem_index); - - /* XXX/FIXME: suboptimal */ - tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2], - tcg_target_call_iarg_regs[1]); - tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1], - tcg_target_call_iarg_regs[0]); - tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], - TCG_AREG0); + argno = tcg_out_arg_reg32(s, argno, mem_index, true); + tcg_out_call(s, qemu_ld_helpers[opc & 3]); switch (opc) { @@ -1140,8 +1168,8 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc) #endif } -static void tcg_out_qemu_st_direct(TCGContext *s, int datalo_reg, int datahi_reg, - int addr_reg, int opc) +static void tcg_out_qemu_st_direct(TCGContext *s, int datalo_reg, + int datahi_reg, int addr_reg, int opc) { #ifdef TARGET_WORDS_BIGENDIAN const int bswap = 0; @@ -1194,17 +1222,18 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc) /* Note that addrhi_reg is only used for 64-bit guests. */ int addrhi_reg = (TARGET_LONG_BITS == 64 ? *args++ : TCG_REG_R0); int mem_index = *args; - int lab1, lab2, argreg, offset; + int lab1, lab2, argno, next, offset; lab1 = gen_new_label(); lab2 = gen_new_label(); offset = offsetof(CPUArchState, tlb_table[mem_index][0].addr_write); - offset = tcg_out_tlb_read(s, TCG_REG_R26, TCG_REG_R25, addrlo_reg, addrhi_reg, - opc, lab1, offset); + offset = tcg_out_tlb_read(s, TCG_REG_R26, TCG_REG_R25, addrlo_reg, + addrhi_reg, opc, lab1, offset); /* TLB Hit. */ - tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R20, (offset ? TCG_REG_R1 : TCG_REG_R25), + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R20, + (offset ? TCG_REG_R1 : TCG_REG_R25), offsetof(CPUArchState, tlb_table[mem_index][0].addend) - offset); /* There are no indexed stores, so we must do this addition explitly. @@ -1217,63 +1246,46 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc) /* label1: */ tcg_out_label(s, lab1, s->code_ptr); - argreg = TCG_REG_R26; - tcg_out_mov(s, TCG_TYPE_I32, argreg--, addrlo_reg); + argno = 0; + argno = tcg_out_arg_reg32(s, argno, TCG_AREG0, false); if (TARGET_LONG_BITS == 64) { - tcg_out_mov(s, TCG_TYPE_I32, argreg--, addrhi_reg); + argno = tcg_out_arg_reg64(s, argno, addrlo_reg, addrhi_reg); + } else { + argno = tcg_out_arg_reg32(s, argno, addrlo_reg, false); } + next = (argno < 4 ? tcg_target_call_iarg_regs[argno] : TCG_REG_R20); switch(opc) { case 0: - tcg_out_andi(s, argreg--, datalo_reg, 0xff); - tcg_out_movi(s, TCG_TYPE_I32, argreg, mem_index); + tcg_out_andi(s, next, datalo_reg, 0xff); + argno = tcg_out_arg_reg32(s, argno, next, false); break; case 1: - tcg_out_andi(s, argreg--, datalo_reg, 0xffff); - tcg_out_movi(s, TCG_TYPE_I32, argreg, mem_index); + tcg_out_andi(s, next, datalo_reg, 0xffff); + argno = tcg_out_arg_reg32(s, argno, next, false); break; case 2: - tcg_out_mov(s, TCG_TYPE_I32, argreg--, datalo_reg); - tcg_out_movi(s, TCG_TYPE_I32, argreg, mem_index); + argno = tcg_out_arg_reg32(s, argno, datalo_reg, false); break; case 3: - /* Because of the alignment required by the 64-bit data argument, - we will always use R23/R24. Also, we will always run out of - argument registers for storing mem_index, so that will have - to go on the stack. */ - if (mem_index == 0) { - argreg = TCG_REG_R0; - } else { - argreg = TCG_REG_R20; - tcg_out_movi(s, TCG_TYPE_I32, argreg, mem_index); - } - tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R23, datahi_reg); - tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R24, datalo_reg); - tcg_out_st(s, TCG_TYPE_I32, argreg, TCG_REG_CALL_STACK, - TCG_TARGET_CALL_STACK_OFFSET - 4); + argno = tcg_out_arg_reg64(s, argno, datalo_reg, datahi_reg); break; default: tcg_abort(); } + argno = tcg_out_arg_reg32(s, argno, mem_index, true); - /* XXX/FIXME: suboptimal */ - tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3], - tcg_target_call_iarg_regs[2]); - tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2], - tcg_target_call_iarg_regs[1]); - tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1], - tcg_target_call_iarg_regs[0]); - tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], - TCG_AREG0); tcg_out_call(s, qemu_st_helpers[opc]); /* label2: */ tcg_out_label(s, lab2, s->code_ptr); #else - /* There are no indexed stores, so if GUEST_BASE is set we must do the add - explicitly. Careful to avoid R20, which is used for the bswaps to follow. */ + /* There are no indexed stores, so if GUEST_BASE is set we must do + the add explicitly. Careful to avoid R20, which is used for the + bswaps to follow. */ if (GUEST_BASE != 0) { - tcg_out_arith(s, TCG_REG_R31, addrlo_reg, TCG_GUEST_BASE_REG, INSN_ADDL); + tcg_out_arith(s, TCG_REG_R31, addrlo_reg, + TCG_GUEST_BASE_REG, INSN_ADDL); addrlo_reg = TCG_REG_R31; } tcg_out_qemu_st_direct(s, datalo_reg, datahi_reg, addrlo_reg, opc); -- 1.7.12.1