1 From 138dfa905538bf918af390ff365a27de49364578 Mon Sep 17 00:00:00 2001
2 From: Richard Henderson <rth@twiddle.net>
3 Date: Fri, 23 Mar 2012 23:27:39 +0100
4 Subject: [PATCH] tcg-sparc: Fix qemu_ld/st to handle 32-bit host.
6 At the same time, split out the tlb load logic to a new function.
7 Fixes the cases of two data registers and two address registers.
8 Fixes the signature of, and adds missing, qemu_ld/st opcodes.
10 Signed-off-by: Richard Henderson <rth@twiddle.net>
11 Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
13 tcg/sparc/tcg-target.c | 777 ++++++++++++++++++++++---------------------------
14 1 file changed, 348 insertions(+), 429 deletions(-)
16 diff --git a/tcg/sparc/tcg-target.c b/tcg/sparc/tcg-target.c
17 index 23c2fda..d89c19b 100644
18 --- a/tcg/sparc/tcg-target.c
19 +++ b/tcg/sparc/tcg-target.c
20 @@ -59,8 +59,6 @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
26 static const int tcg_target_reg_alloc_order[] = {
29 @@ -288,6 +286,16 @@ static inline int tcg_target_const_match(tcg_target_long val,
30 #define ASI_PRIMARY_LITTLE 0x88
33 +#define LDUH_LE (LDUHA | INSN_ASI(ASI_PRIMARY_LITTLE))
34 +#define LDSH_LE (LDSHA | INSN_ASI(ASI_PRIMARY_LITTLE))
35 +#define LDUW_LE (LDUWA | INSN_ASI(ASI_PRIMARY_LITTLE))
36 +#define LDSW_LE (LDSWA | INSN_ASI(ASI_PRIMARY_LITTLE))
37 +#define LDX_LE (LDXA | INSN_ASI(ASI_PRIMARY_LITTLE))
39 +#define STH_LE (STHA | INSN_ASI(ASI_PRIMARY_LITTLE))
40 +#define STW_LE (STWA | INSN_ASI(ASI_PRIMARY_LITTLE))
41 +#define STX_LE (STXA | INSN_ASI(ASI_PRIMARY_LITTLE))
43 static inline void tcg_out_arith(TCGContext *s, int rd, int rs1, int rs2,
46 @@ -360,64 +368,43 @@ static inline void tcg_out_movi(TCGContext *s, TCGType type,
50 -static inline void tcg_out_ld_raw(TCGContext *s, int ret,
51 - tcg_target_long arg)
53 - tcg_out_sethi(s, ret, arg);
54 - tcg_out32(s, LDUW | INSN_RD(ret) | INSN_RS1(ret) |
55 - INSN_IMM13(arg & 0x3ff));
58 -static inline void tcg_out_ld_ptr(TCGContext *s, int ret,
59 - tcg_target_long arg)
60 +static inline void tcg_out_ldst_rr(TCGContext *s, int data, int a1,
63 - if (!check_fit_tl(arg, 10))
64 - tcg_out_movi(s, TCG_TYPE_PTR, ret, arg & ~0x3ffULL);
65 - if (TCG_TARGET_REG_BITS == 64) {
66 - tcg_out32(s, LDX | INSN_RD(ret) | INSN_RS1(ret) |
67 - INSN_IMM13(arg & 0x3ff));
69 - tcg_out32(s, LDUW | INSN_RD(ret) | INSN_RS1(ret) |
70 - INSN_IMM13(arg & 0x3ff));
72 + tcg_out32(s, op | INSN_RD(data) | INSN_RS1(a1) | INSN_RS2(a2));
75 -static inline void tcg_out_ldst(TCGContext *s, int ret, int addr, int offset, int op)
76 +static inline void tcg_out_ldst(TCGContext *s, int ret, int addr,
79 - if (check_fit_tl(offset, 13))
80 + if (check_fit_tl(offset, 13)) {
81 tcg_out32(s, op | INSN_RD(ret) | INSN_RS1(addr) |
85 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_I5, offset);
86 - tcg_out32(s, op | INSN_RD(ret) | INSN_RS1(TCG_REG_I5) |
88 + tcg_out_ldst_rr(s, ret, addr, TCG_REG_I5, op);
92 -static inline void tcg_out_ldst_asi(TCGContext *s, int ret, int addr,
93 - int offset, int op, int asi)
95 - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_I5, offset);
96 - tcg_out32(s, op | INSN_RD(ret) | INSN_RS1(TCG_REG_I5) |
97 - INSN_ASI(asi) | INSN_RS2(addr));
100 static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
101 TCGReg arg1, tcg_target_long arg2)
103 - if (type == TCG_TYPE_I32)
104 - tcg_out_ldst(s, ret, arg1, arg2, LDUW);
106 - tcg_out_ldst(s, ret, arg1, arg2, LDX);
107 + tcg_out_ldst(s, ret, arg1, arg2, (type == TCG_TYPE_I32 ? LDUW : LDX));
110 static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
111 TCGReg arg1, tcg_target_long arg2)
113 - if (type == TCG_TYPE_I32)
114 - tcg_out_ldst(s, arg, arg1, arg2, STW);
116 - tcg_out_ldst(s, arg, arg1, arg2, STX);
117 + tcg_out_ldst(s, arg, arg1, arg2, (type == TCG_TYPE_I32 ? STW : STX));
120 +static inline void tcg_out_ld_ptr(TCGContext *s, int ret,
121 + tcg_target_long arg)
123 + if (!check_fit_tl(arg, 10)) {
124 + tcg_out_movi(s, TCG_TYPE_PTR, ret, arg & ~0x3ff);
126 + tcg_out_ld(s, TCG_TYPE_PTR, ret, ret, arg & 0x3ff);
129 static inline void tcg_out_sety(TCGContext *s, int rs)
130 @@ -442,14 +429,15 @@ static inline void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val)
134 -static inline void tcg_out_andi(TCGContext *s, int reg, tcg_target_long val)
135 +static inline void tcg_out_andi(TCGContext *s, int rd, int rs,
136 + tcg_target_long val)
139 if (check_fit_tl(val, 13))
140 - tcg_out_arithi(s, reg, reg, val, ARITH_AND);
141 + tcg_out_arithi(s, rd, rs, val, ARITH_AND);
143 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_I5, val);
144 - tcg_out_arith(s, reg, reg, TCG_REG_I5, ARITH_AND);
145 + tcg_out_arith(s, rd, rs, TCG_REG_I5, ARITH_AND);
149 @@ -718,418 +706,328 @@ static const void * const qemu_st_helpers[4] = {
155 -#if TARGET_LONG_BITS == 32
156 -#define TARGET_LD_OP LDUW
158 -#define TARGET_LD_OP LDX
160 +/* Perform the TLB load and compare.
162 -#if defined(CONFIG_SOFTMMU)
163 -#if HOST_LONG_BITS == 32
164 -#define TARGET_ADDEND_LD_OP LDUW
166 + ADDRLO_IDX contains the index into ARGS of the low part of the
167 + address; the high part of the address is at ADDR_LOW_IDX+1.
169 + MEM_INDEX and S_BITS are the memory context and log2 size of the load.
171 + WHICH is the offset into the CPUTLBEntry structure of the slot to read.
172 + This should be offsetof addr_read or addr_write.
174 + The result of the TLB comparison is in %[ix]cc. The sanitized address
175 + is in the returned register, maybe %o0. The TLB addend is in %o1. */
177 +static int tcg_out_tlb_load(TCGContext *s, int addrlo_idx, int mem_index,
178 + int s_bits, const TCGArg *args, int which)
180 + const int addrlo = args[addrlo_idx];
181 + const int r0 = TCG_REG_O0;
182 + const int r1 = TCG_REG_O1;
183 + const int r2 = TCG_REG_O2;
187 + if (TCG_TARGET_REG_BITS == 32 && TARGET_LONG_BITS == 64) {
188 + /* Assemble the 64-bit address in R0. */
189 + tcg_out_arithi(s, r0, addrlo, 0, SHIFT_SRL);
190 + tcg_out_arithi(s, r1, args[addrlo_idx + 1], 32, SHIFT_SLLX);
191 + tcg_out_arith(s, r0, r0, r1, ARITH_OR);
194 + /* Shift the page number down to tlb-entry. */
195 + tcg_out_arithi(s, r1, addrlo,
196 + TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS, SHIFT_SRL);
198 + /* Mask out the page offset, except for the required alignment. */
199 + tcg_out_andi(s, r0, addr, TARGET_PAGE_MASK | ((1 << s_bits) - 1));
201 + /* Compute tlb index, modulo tlb size. */
202 + tcg_out_andi(s, r1, r1, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS);
204 + /* Relative to the current ENV. */
205 + tcg_out_arith(s, r1, TCG_AREG0, r1, ARITH_ADD);
207 + /* Find a base address that can load both tlb comparator and addend. */
208 + tlb_ofs = offsetof(CPUArchState, tlb_table[mem_index][0]);
209 + if (!check_fit_tl(tlb_ofs + sizeof(CPUTLBEntry), 13)) {
210 + tcg_out_addi(s, r1, tlb_ofs);
214 + /* Load the tlb comparator and the addend. */
215 + tcg_out_ld(s, TCG_TYPE_TL, r2, r1, tlb_ofs + which);
216 + tcg_out_ld(s, TCG_TYPE_PTR, r1, r1, tlb_ofs+offsetof(CPUTLBEntry, addend));
218 + /* subcc arg0, arg2, %g0 */
219 + tcg_out_cmp(s, r0, r2, 0);
221 + /* If the guest address must be zero-extended, do so now. */
222 + if (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 32) {
223 + tcg_out_arithi(s, r0, addrlo, 0, SHIFT_SRL);
228 +#endif /* CONFIG_SOFTMMU */
230 +static const int qemu_ld_opc[8] = {
231 +#ifdef TARGET_WORDS_BIGENDIAN
232 + LDUB, LDUH, LDUW, LDX, LDSB, LDSH, LDSW, LDX
234 -#define TARGET_ADDEND_LD_OP LDX
236 + LDUB, LDUH_LE, LDUW_LE, LDX_LE, LDSB, LDSH_LE, LDSW_LE, LDX_LE
240 -#if TCG_TARGET_REG_BITS == 64
241 -#define HOST_LD_OP LDX
242 -#define HOST_ST_OP STX
243 -#define HOST_SLL_OP SHIFT_SLLX
244 -#define HOST_SRA_OP SHIFT_SRAX
245 +static const int qemu_st_opc[4] = {
246 +#ifdef TARGET_WORDS_BIGENDIAN
249 -#define HOST_LD_OP LDUW
250 -#define HOST_ST_OP STW
251 -#define HOST_SLL_OP SHIFT_SLL
252 -#define HOST_SRA_OP SHIFT_SRA
253 + STB, STH_LE, STW_LE, STX_LE
257 -static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
259 +static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int sizeop)
261 - int addr_reg, data_reg, arg0, arg1, arg2, mem_index, s_bits;
262 + int addrlo_idx = 1, datalo, datahi, addr_reg;
263 #if defined(CONFIG_SOFTMMU)
264 - uint32_t *label1_ptr, *label2_ptr;
265 + int memi_idx, memi, s_bits, n;
266 + uint32_t *label_ptr[2];
269 - data_reg = *args++;
270 - addr_reg = *args++;
277 + datahi = datalo = args[0];
278 + if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) {
283 #if defined(CONFIG_SOFTMMU)
284 - /* srl addr_reg, x, arg1 */
285 - tcg_out_arithi(s, arg1, addr_reg, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS,
287 - /* and addr_reg, x, arg0 */
288 - tcg_out_arithi(s, arg0, addr_reg, TARGET_PAGE_MASK | ((1 << s_bits) - 1),
290 + memi_idx = addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS);
291 + memi = args[memi_idx];
292 + s_bits = sizeop & 3;
294 + addr_reg = tcg_out_tlb_load(s, addrlo_idx, memi, s_bits, args,
295 + offsetof(CPUTLBEntry, addr_read));
297 + if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) {
300 + /* bne,pn %[xi]cc, label0 */
301 + label_ptr[0] = (uint32_t *)s->code_ptr;
302 + tcg_out32(s, (INSN_OP(0) | INSN_COND(COND_NE, 0) | INSN_OP2(0x1)
303 + | ((TARGET_LONG_BITS == 64) << 21)));
306 + /* Load all 64-bits into an O/G register. */
307 + reg64 = (datalo < 16 ? datalo : TCG_REG_O0);
308 + tcg_out_ldst_rr(s, reg64, addr_reg, TCG_REG_O1, qemu_ld_opc[sizeop]);
310 + /* Move the two 32-bit pieces into the destination registers. */
311 + tcg_out_arithi(s, datahi, reg64, 32, SHIFT_SRLX);
312 + if (reg64 != datalo) {
313 + tcg_out_mov(s, TCG_TYPE_I32, datalo, reg64);
316 - /* and arg1, x, arg1 */
317 - tcg_out_andi(s, arg1, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS);
318 + /* b,a,pt label1 */
319 + label_ptr[1] = (uint32_t *)s->code_ptr;
320 + tcg_out32(s, (INSN_OP(0) | INSN_COND(COND_A, 0) | INSN_OP2(0x1)
321 + | (1 << 29) | (1 << 19)));
323 + /* The fast path is exactly one insn. Thus we can perform the
324 + entire TLB Hit in the (annulled) delay slot of the branch
325 + over the TLB Miss case. */
327 + /* beq,a,pt %[xi]cc, label0 */
328 + label_ptr[0] = NULL;
329 + label_ptr[1] = (uint32_t *)s->code_ptr;
330 + tcg_out32(s, (INSN_OP(0) | INSN_COND(COND_E, 0) | INSN_OP2(0x1)
331 + | ((TARGET_LONG_BITS == 64) << 21)
332 + | (1 << 29) | (1 << 19)));
334 + tcg_out_ldst_rr(s, datalo, addr_reg, TCG_REG_O1, qemu_ld_opc[sizeop]);
337 - /* add arg1, x, arg1 */
338 - tcg_out_addi(s, arg1, offsetof(CPUArchState,
339 - tlb_table[mem_index][0].addr_read));
342 - /* add env, arg1, arg1 */
343 - tcg_out_arith(s, arg1, TCG_AREG0, arg1, ARITH_ADD);
344 + if (label_ptr[0]) {
345 + *label_ptr[0] |= INSN_OFF19((unsigned long)s->code_ptr -
346 + (unsigned long)label_ptr[0]);
349 + tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[n++], TCG_AREG0);
350 + if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
351 + tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[n++],
352 + args[addrlo_idx + 1]);
354 + tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[n++],
357 - /* ld [arg1], arg2 */
358 - tcg_out32(s, TARGET_LD_OP | INSN_RD(arg2) | INSN_RS1(arg1) |
359 - INSN_RS2(TCG_REG_G0));
360 + /* Store AREG0 in stack to avoid ugly glibc bugs that mangle
361 + global registers */
362 + tcg_out_st(s, TCG_TYPE_REG, TCG_AREG0, TCG_REG_CALL_STACK,
363 + TCG_TARGET_CALL_STACK_OFFSET - TCG_STATIC_CALL_ARGS_SIZE -
366 - /* subcc arg0, arg2, %g0 */
367 - tcg_out_arith(s, TCG_REG_G0, arg0, arg2, ARITH_SUBCC);
372 - be,pt %xcc label1 */
373 - label1_ptr = (uint32_t *)s->code_ptr;
376 - /* mov (delay slot) */
377 - tcg_out_mov(s, TCG_TYPE_PTR, arg0, addr_reg);
380 - tcg_out_movi(s, TCG_TYPE_I32, arg1, mem_index);
381 - /* XXX/FIXME: suboptimal */
382 - tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3],
383 - tcg_target_call_iarg_regs[2]);
384 - tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
385 - tcg_target_call_iarg_regs[1]);
386 - tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
387 - tcg_target_call_iarg_regs[0]);
388 - tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
391 - /* XXX: move that code at the end of the TB */
392 /* qemu_ld_helper[s_bits](arg0, arg1) */
393 tcg_out32(s, CALL | ((((tcg_target_ulong)qemu_ld_helpers[s_bits]
394 - (tcg_target_ulong)s->code_ptr) >> 2)
396 - /* Store AREG0 in stack to avoid ugly glibc bugs that mangle
397 - global registers */
399 - tcg_out_ldst(s, TCG_AREG0, TCG_REG_CALL_STACK,
400 - TCG_TARGET_CALL_STACK_OFFSET - TCG_STATIC_CALL_ARGS_SIZE -
401 - sizeof(long), HOST_ST_OP);
402 - tcg_out_ldst(s, TCG_AREG0, TCG_REG_CALL_STACK,
403 - TCG_TARGET_CALL_STACK_OFFSET - TCG_STATIC_CALL_ARGS_SIZE -
404 - sizeof(long), HOST_LD_OP);
406 - /* data_reg = sign_extend(arg0) */
409 + tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[n], memi);
411 + /* Reload AREG0. */
412 + tcg_out_ld(s, TCG_TYPE_REG, TCG_AREG0, TCG_REG_CALL_STACK,
413 + TCG_TARGET_CALL_STACK_OFFSET - TCG_STATIC_CALL_ARGS_SIZE -
416 + n = tcg_target_call_oarg_regs[0];
417 + /* datalo = sign_extend(arg0) */
420 - /* sll arg0, 24/56, data_reg */
421 - tcg_out_arithi(s, data_reg, arg0, (int)sizeof(tcg_target_long) * 8 - 8,
423 - /* sra data_reg, 24/56, data_reg */
424 - tcg_out_arithi(s, data_reg, data_reg,
425 - (int)sizeof(tcg_target_long) * 8 - 8, HOST_SRA_OP);
426 + /* Recall that SRA sign extends from bit 31 through bit 63. */
427 + tcg_out_arithi(s, datalo, n, 24, SHIFT_SLL);
428 + tcg_out_arithi(s, datalo, datalo, 24, SHIFT_SRA);
431 - /* sll arg0, 16/48, data_reg */
432 - tcg_out_arithi(s, data_reg, arg0,
433 - (int)sizeof(tcg_target_long) * 8 - 16, HOST_SLL_OP);
434 - /* sra data_reg, 16/48, data_reg */
435 - tcg_out_arithi(s, data_reg, data_reg,
436 - (int)sizeof(tcg_target_long) * 8 - 16, HOST_SRA_OP);
437 + tcg_out_arithi(s, datalo, n, 16, SHIFT_SLL);
438 + tcg_out_arithi(s, datalo, datalo, 16, SHIFT_SRA);
441 - /* sll arg0, 32, data_reg */
442 - tcg_out_arithi(s, data_reg, arg0, 32, HOST_SLL_OP);
443 - /* sra data_reg, 32, data_reg */
444 - tcg_out_arithi(s, data_reg, data_reg, 32, HOST_SRA_OP);
445 + tcg_out_arithi(s, datalo, n, 0, SHIFT_SRA);
448 + if (TCG_TARGET_REG_BITS == 32) {
449 + tcg_out_mov(s, TCG_TYPE_REG, datahi, n);
450 + tcg_out_mov(s, TCG_TYPE_REG, datalo, n + 1);
460 - tcg_out_mov(s, TCG_TYPE_REG, data_reg, arg0);
461 + tcg_out_mov(s, TCG_TYPE_REG, datalo, n);
467 - label2_ptr = (uint32_t *)s->code_ptr;
470 - /* nop (delay slot */
474 -#if TARGET_LONG_BITS == 32
476 - *label1_ptr = (INSN_OP(0) | INSN_COND(COND_E, 0) | INSN_OP2(0x2) |
477 - INSN_OFF22((unsigned long)s->code_ptr -
478 - (unsigned long)label1_ptr));
480 - /* be,pt %xcc label1 */
481 - *label1_ptr = (INSN_OP(0) | INSN_COND(COND_E, 0) | INSN_OP2(0x1) |
482 - (0x5 << 19) | INSN_OFF19((unsigned long)s->code_ptr -
483 - (unsigned long)label1_ptr));
486 - /* ld [arg1 + x], arg1 */
487 - tcg_out_ldst(s, arg1, arg1, offsetof(CPUTLBEntry, addend) -
488 - offsetof(CPUTLBEntry, addr_read), TARGET_ADDEND_LD_OP);
490 -#if TARGET_LONG_BITS == 32
491 - /* and addr_reg, x, arg0 */
492 - tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_I5, 0xffffffff);
493 - tcg_out_arith(s, arg0, addr_reg, TCG_REG_I5, ARITH_AND);
494 - /* add arg0, arg1, arg0 */
495 - tcg_out_arith(s, arg0, arg0, arg1, ARITH_ADD);
496 + *label_ptr[1] |= INSN_OFF19((unsigned long)s->code_ptr -
497 + (unsigned long)label_ptr[1]);
499 - /* add addr_reg, arg1, arg0 */
500 - tcg_out_arith(s, arg0, addr_reg, arg1, ARITH_ADD);
502 + addr_reg = args[addrlo_idx];
503 + if (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 32) {
504 + tcg_out_arithi(s, TCG_REG_I5, addr_reg, 0, SHIFT_SRL);
505 + addr_reg = TCG_REG_I5;
507 + if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) {
508 + int reg64 = (datalo < 16 ? datalo : TCG_REG_O0);
513 + tcg_out_ldst_rr(s, reg64, addr_reg, TCG_REG_G0, qemu_ld_opc[sizeop]);
517 - /* ldub [arg0], data_reg */
518 - tcg_out_ldst(s, data_reg, arg0, 0, LDUB);
521 - /* ldsb [arg0], data_reg */
522 - tcg_out_ldst(s, data_reg, arg0, 0, LDSB);
525 -#ifdef TARGET_WORDS_BIGENDIAN
526 - /* lduh [arg0], data_reg */
527 - tcg_out_ldst(s, data_reg, arg0, 0, LDUH);
529 - /* lduha [arg0] ASI_PRIMARY_LITTLE, data_reg */
530 - tcg_out_ldst_asi(s, data_reg, arg0, 0, LDUHA, ASI_PRIMARY_LITTLE);
534 -#ifdef TARGET_WORDS_BIGENDIAN
535 - /* ldsh [arg0], data_reg */
536 - tcg_out_ldst(s, data_reg, arg0, 0, LDSH);
538 - /* ldsha [arg0] ASI_PRIMARY_LITTLE, data_reg */
539 - tcg_out_ldst_asi(s, data_reg, arg0, 0, LDSHA, ASI_PRIMARY_LITTLE);
543 -#ifdef TARGET_WORDS_BIGENDIAN
544 - /* lduw [arg0], data_reg */
545 - tcg_out_ldst(s, data_reg, arg0, 0, LDUW);
547 - /* lduwa [arg0] ASI_PRIMARY_LITTLE, data_reg */
548 - tcg_out_ldst_asi(s, data_reg, arg0, 0, LDUWA, ASI_PRIMARY_LITTLE);
552 -#ifdef TARGET_WORDS_BIGENDIAN
553 - /* ldsw [arg0], data_reg */
554 - tcg_out_ldst(s, data_reg, arg0, 0, LDSW);
556 - /* ldswa [arg0] ASI_PRIMARY_LITTLE, data_reg */
557 - tcg_out_ldst_asi(s, data_reg, arg0, 0, LDSWA, ASI_PRIMARY_LITTLE);
561 -#ifdef TARGET_WORDS_BIGENDIAN
562 - /* ldx [arg0], data_reg */
563 - tcg_out_ldst(s, data_reg, arg0, 0, LDX);
565 - /* ldxa [arg0] ASI_PRIMARY_LITTLE, data_reg */
566 - tcg_out_ldst_asi(s, data_reg, arg0, 0, LDXA, ASI_PRIMARY_LITTLE);
571 + tcg_out_arithi(s, datahi, reg64, 32, SHIFT_SRLX);
572 + if (reg64 != datalo) {
573 + tcg_out_mov(s, TCG_TYPE_I32, datalo, reg64);
576 + tcg_out_ldst_rr(s, datalo, addr_reg, TCG_REG_G0, qemu_ld_opc[sizeop]);
579 -#if defined(CONFIG_SOFTMMU)
581 - *label2_ptr = (INSN_OP(0) | INSN_COND(COND_A, 0) | INSN_OP2(0x2) |
582 - INSN_OFF22((unsigned long)s->code_ptr -
583 - (unsigned long)label2_ptr));
585 +#endif /* CONFIG_SOFTMMU */
588 -static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
590 +static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int sizeop)
592 - int addr_reg, data_reg, arg0, arg1, arg2, mem_index, s_bits;
593 + int addrlo_idx = 1, datalo, datahi, addr_reg;
594 #if defined(CONFIG_SOFTMMU)
595 - uint32_t *label1_ptr, *label2_ptr;
596 + int memi_idx, memi, n;
597 + uint32_t *label_ptr;
600 - data_reg = *args++;
601 - addr_reg = *args++;
609 + datahi = datalo = args[0];
610 + if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) {
615 #if defined(CONFIG_SOFTMMU)
616 - /* srl addr_reg, x, arg1 */
617 - tcg_out_arithi(s, arg1, addr_reg, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS,
620 - /* and addr_reg, x, arg0 */
621 - tcg_out_arithi(s, arg0, addr_reg, TARGET_PAGE_MASK | ((1 << s_bits) - 1),
624 - /* and arg1, x, arg1 */
625 - tcg_out_andi(s, arg1, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS);
627 - /* add arg1, x, arg1 */
628 - tcg_out_addi(s, arg1, offsetof(CPUArchState,
629 - tlb_table[mem_index][0].addr_write));
630 + memi_idx = addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS);
631 + memi = args[memi_idx];
633 + addr_reg = tcg_out_tlb_load(s, addrlo_idx, memi, sizeop, args,
634 + offsetof(CPUTLBEntry, addr_write));
636 + if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) {
637 + /* Reconstruct the full 64-bit value in %g1, using %o2 as temp. */
638 + /* ??? Redefine the temps from %i4/%i5 so that we have a o/g temp. */
639 + tcg_out_arithi(s, TCG_REG_G1, datalo, 0, SHIFT_SRL);
640 + tcg_out_arithi(s, TCG_REG_O2, datahi, 32, SHIFT_SLLX);
641 + tcg_out_arith(s, TCG_REG_G1, TCG_REG_G1, TCG_REG_O2, ARITH_OR);
642 + datalo = TCG_REG_G1;
645 - /* add env, arg1, arg1 */
646 - tcg_out_arith(s, arg1, TCG_AREG0, arg1, ARITH_ADD);
647 + /* The fast path is exactly one insn. Thus we can perform the entire
648 + TLB Hit in the (annulled) delay slot of the branch over TLB Miss. */
649 + /* beq,a,pt %[xi]cc, label0 */
650 + label_ptr = (uint32_t *)s->code_ptr;
651 + tcg_out32(s, (INSN_OP(0) | INSN_COND(COND_E, 0) | INSN_OP2(0x1)
652 + | ((TARGET_LONG_BITS == 64) << 21)
653 + | (1 << 29) | (1 << 19)));
655 + tcg_out_ldst_rr(s, datalo, addr_reg, TCG_REG_O1, qemu_st_opc[sizeop]);
660 + tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[n++], TCG_AREG0);
661 + if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
662 + tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[n++],
663 + args[addrlo_idx + 1]);
665 + tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[n++],
667 + if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) {
668 + tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[n++], datahi);
670 + tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[n++], datalo);
672 - /* ld [arg1], arg2 */
673 - tcg_out32(s, TARGET_LD_OP | INSN_RD(arg2) | INSN_RS1(arg1) |
674 - INSN_RS2(TCG_REG_G0));
675 + /* Store AREG0 in stack to avoid ugly glibc bugs that mangle
676 + global registers */
677 + tcg_out_st(s, TCG_TYPE_REG, TCG_AREG0, TCG_REG_CALL_STACK,
678 + TCG_TARGET_CALL_STACK_OFFSET - TCG_STATIC_CALL_ARGS_SIZE -
681 - /* subcc arg0, arg2, %g0 */
682 - tcg_out_arith(s, TCG_REG_G0, arg0, arg2, ARITH_SUBCC);
687 - be,pt %xcc label1 */
688 - label1_ptr = (uint32_t *)s->code_ptr;
691 - /* mov (delay slot) */
692 - tcg_out_mov(s, TCG_TYPE_PTR, arg0, addr_reg);
695 - tcg_out_mov(s, TCG_TYPE_REG, arg1, data_reg);
698 - tcg_out_movi(s, TCG_TYPE_I32, arg2, mem_index);
700 - /* XXX/FIXME: suboptimal */
701 - tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3],
702 - tcg_target_call_iarg_regs[2]);
703 - tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
704 - tcg_target_call_iarg_regs[1]);
705 - tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
706 - tcg_target_call_iarg_regs[0]);
707 - tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
709 - /* XXX: move that code at the end of the TB */
710 /* qemu_st_helper[s_bits](arg0, arg1, arg2) */
711 - tcg_out32(s, CALL | ((((tcg_target_ulong)qemu_st_helpers[s_bits]
712 + tcg_out32(s, CALL | ((((tcg_target_ulong)qemu_st_helpers[sizeop]
713 - (tcg_target_ulong)s->code_ptr) >> 2)
715 - /* Store AREG0 in stack to avoid ugly glibc bugs that mangle
716 - global registers */
718 - tcg_out_ldst(s, TCG_AREG0, TCG_REG_CALL_STACK,
719 - TCG_TARGET_CALL_STACK_OFFSET - TCG_STATIC_CALL_ARGS_SIZE -
720 - sizeof(long), HOST_ST_OP);
721 - tcg_out_ldst(s, TCG_AREG0, TCG_REG_CALL_STACK,
722 - TCG_TARGET_CALL_STACK_OFFSET - TCG_STATIC_CALL_ARGS_SIZE -
723 - sizeof(long), HOST_LD_OP);
727 - label2_ptr = (uint32_t *)s->code_ptr;
730 - /* nop (delay slot) */
733 + tcg_out_movi(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[n], memi);
735 -#if TARGET_LONG_BITS == 32
737 - *label1_ptr = (INSN_OP(0) | INSN_COND(COND_E, 0) | INSN_OP2(0x2) |
738 - INSN_OFF22((unsigned long)s->code_ptr -
739 - (unsigned long)label1_ptr));
741 - /* be,pt %xcc label1 */
742 - *label1_ptr = (INSN_OP(0) | INSN_COND(COND_E, 0) | INSN_OP2(0x1) |
743 - (0x5 << 19) | INSN_OFF19((unsigned long)s->code_ptr -
744 - (unsigned long)label1_ptr));
747 - /* ld [arg1 + x], arg1 */
748 - tcg_out_ldst(s, arg1, arg1, offsetof(CPUTLBEntry, addend) -
749 - offsetof(CPUTLBEntry, addr_write), TARGET_ADDEND_LD_OP);
750 + /* Reload AREG0. */
751 + tcg_out_ld(s, TCG_TYPE_REG, TCG_AREG0, TCG_REG_CALL_STACK,
752 + TCG_TARGET_CALL_STACK_OFFSET - TCG_STATIC_CALL_ARGS_SIZE -
755 -#if TARGET_LONG_BITS == 32
756 - /* and addr_reg, x, arg0 */
757 - tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_I5, 0xffffffff);
758 - tcg_out_arith(s, arg0, addr_reg, TCG_REG_I5, ARITH_AND);
759 - /* add arg0, arg1, arg0 */
760 - tcg_out_arith(s, arg0, arg0, arg1, ARITH_ADD);
761 + *label_ptr |= INSN_OFF19((unsigned long)s->code_ptr -
762 + (unsigned long)label_ptr);
764 - /* add addr_reg, arg1, arg0 */
765 - tcg_out_arith(s, arg0, addr_reg, arg1, ARITH_ADD);
774 - /* stb data_reg, [arg0] */
775 - tcg_out_ldst(s, data_reg, arg0, 0, STB);
778 -#ifdef TARGET_WORDS_BIGENDIAN
779 - /* sth data_reg, [arg0] */
780 - tcg_out_ldst(s, data_reg, arg0, 0, STH);
782 - /* stha data_reg, [arg0] ASI_PRIMARY_LITTLE */
783 - tcg_out_ldst_asi(s, data_reg, arg0, 0, STHA, ASI_PRIMARY_LITTLE);
787 -#ifdef TARGET_WORDS_BIGENDIAN
788 - /* stw data_reg, [arg0] */
789 - tcg_out_ldst(s, data_reg, arg0, 0, STW);
791 - /* stwa data_reg, [arg0] ASI_PRIMARY_LITTLE */
792 - tcg_out_ldst_asi(s, data_reg, arg0, 0, STWA, ASI_PRIMARY_LITTLE);
796 -#ifdef TARGET_WORDS_BIGENDIAN
797 - /* stx data_reg, [arg0] */
798 - tcg_out_ldst(s, data_reg, arg0, 0, STX);
800 - /* stxa data_reg, [arg0] ASI_PRIMARY_LITTLE */
801 - tcg_out_ldst_asi(s, data_reg, arg0, 0, STXA, ASI_PRIMARY_LITTLE);
806 + addr_reg = args[addrlo_idx];
807 + if (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 32) {
808 + tcg_out_arithi(s, TCG_REG_I5, addr_reg, 0, SHIFT_SRL);
809 + addr_reg = TCG_REG_I5;
812 -#if defined(CONFIG_SOFTMMU)
814 - *label2_ptr = (INSN_OP(0) | INSN_COND(COND_A, 0) | INSN_OP2(0x2) |
815 - INSN_OFF22((unsigned long)s->code_ptr -
816 - (unsigned long)label2_ptr));
818 + if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) {
819 + /* Reconstruct the full 64-bit value in %g1, using %o2 as temp. */
820 + /* ??? Redefine the temps from %i4/%i5 so that we have a o/g temp. */
821 + tcg_out_arithi(s, TCG_REG_G1, datalo, 0, SHIFT_SRL);
822 + tcg_out_arithi(s, TCG_REG_O2, datahi, 32, SHIFT_SLLX);
823 + tcg_out_arith(s, TCG_REG_G1, TCG_REG_G1, TCG_REG_O2, ARITH_OR);
824 + datalo = TCG_REG_G1;
826 + tcg_out_ldst_rr(s, datalo, addr_reg, TCG_REG_G0, qemu_st_opc[sizeop]);
827 +#endif /* CONFIG_SOFTMMU */
830 static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
831 @@ -1175,12 +1073,12 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
832 /* Store AREG0 in stack to avoid ugly glibc bugs that mangle
835 - tcg_out_ldst(s, TCG_AREG0, TCG_REG_CALL_STACK,
836 - TCG_TARGET_CALL_STACK_OFFSET - TCG_STATIC_CALL_ARGS_SIZE -
837 - sizeof(long), HOST_ST_OP);
838 - tcg_out_ldst(s, TCG_AREG0, TCG_REG_CALL_STACK,
839 - TCG_TARGET_CALL_STACK_OFFSET - TCG_STATIC_CALL_ARGS_SIZE -
840 - sizeof(long), HOST_LD_OP);
841 + tcg_out_st(s, TCG_TYPE_REG, TCG_AREG0, TCG_REG_CALL_STACK,
842 + TCG_TARGET_CALL_STACK_OFFSET - TCG_STATIC_CALL_ARGS_SIZE -
844 + tcg_out_ld(s, TCG_TYPE_REG, TCG_AREG0, TCG_REG_CALL_STACK,
845 + TCG_TARGET_CALL_STACK_OFFSET - TCG_STATIC_CALL_ARGS_SIZE -
850 @@ -1348,6 +1246,9 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
851 tcg_out_qemu_ld(s, args, 2 | 4);
854 + case INDEX_op_qemu_ld64:
855 + tcg_out_qemu_ld(s, args, 3);
857 case INDEX_op_qemu_st8:
858 tcg_out_qemu_st(s, args, 0);
860 @@ -1357,6 +1258,9 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
861 case INDEX_op_qemu_st32:
862 tcg_out_qemu_st(s, args, 2);
864 + case INDEX_op_qemu_st64:
865 + tcg_out_qemu_st(s, args, 3);
868 #if TCG_TARGET_REG_BITS == 64
869 case INDEX_op_movi_i64:
870 @@ -1421,13 +1325,6 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
871 args[2], const_args[2]);
874 - case INDEX_op_qemu_ld64:
875 - tcg_out_qemu_ld(s, args, 3);
877 - case INDEX_op_qemu_st64:
878 - tcg_out_qemu_st(s, args, 3);
883 tcg_out_arithc(s, args[0], args[1], args[2], const_args[2], c);
884 @@ -1492,20 +1389,6 @@ static const TCGTargetOpDef sparc_op_defs[] = {
885 { INDEX_op_mulu2_i32, { "r", "r", "r", "rJ" } },
888 - { INDEX_op_qemu_ld8u, { "r", "L" } },
889 - { INDEX_op_qemu_ld8s, { "r", "L" } },
890 - { INDEX_op_qemu_ld16u, { "r", "L" } },
891 - { INDEX_op_qemu_ld16s, { "r", "L" } },
892 - { INDEX_op_qemu_ld32, { "r", "L" } },
893 -#if TCG_TARGET_REG_BITS == 64
894 - { INDEX_op_qemu_ld32u, { "r", "L" } },
895 - { INDEX_op_qemu_ld32s, { "r", "L" } },
898 - { INDEX_op_qemu_st8, { "L", "L" } },
899 - { INDEX_op_qemu_st16, { "L", "L" } },
900 - { INDEX_op_qemu_st32, { "L", "L" } },
902 #if TCG_TARGET_REG_BITS == 64
903 { INDEX_op_mov_i64, { "r", "r" } },
904 { INDEX_op_movi_i64, { "r" } },
905 @@ -1520,8 +1403,6 @@ static const TCGTargetOpDef sparc_op_defs[] = {
906 { INDEX_op_st16_i64, { "r", "r" } },
907 { INDEX_op_st32_i64, { "r", "r" } },
908 { INDEX_op_st_i64, { "r", "r" } },
909 - { INDEX_op_qemu_ld64, { "L", "L" } },
910 - { INDEX_op_qemu_st64, { "L", "L" } },
912 { INDEX_op_add_i64, { "r", "r", "rJ" } },
913 { INDEX_op_mul_i64, { "r", "r", "rJ" } },
914 @@ -1548,10 +1429,48 @@ static const TCGTargetOpDef sparc_op_defs[] = {
916 { INDEX_op_brcond_i64, { "r", "rJ" } },
917 { INDEX_op_setcond_i64, { "r", "r", "rJ" } },
919 - { INDEX_op_qemu_ld64, { "L", "L", "L" } },
922 +#if TCG_TARGET_REG_BITS == 64
923 + { INDEX_op_qemu_ld8u, { "r", "L" } },
924 + { INDEX_op_qemu_ld8s, { "r", "L" } },
925 + { INDEX_op_qemu_ld16u, { "r", "L" } },
926 + { INDEX_op_qemu_ld16s, { "r", "L" } },
927 + { INDEX_op_qemu_ld32, { "r", "L" } },
928 + { INDEX_op_qemu_ld32u, { "r", "L" } },
929 + { INDEX_op_qemu_ld32s, { "r", "L" } },
930 + { INDEX_op_qemu_ld64, { "r", "L" } },
932 + { INDEX_op_qemu_st8, { "L", "L" } },
933 + { INDEX_op_qemu_st16, { "L", "L" } },
934 + { INDEX_op_qemu_st32, { "L", "L" } },
935 + { INDEX_op_qemu_st64, { "L", "L" } },
936 +#elif TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
937 + { INDEX_op_qemu_ld8u, { "r", "L" } },
938 + { INDEX_op_qemu_ld8s, { "r", "L" } },
939 + { INDEX_op_qemu_ld16u, { "r", "L" } },
940 + { INDEX_op_qemu_ld16s, { "r", "L" } },
941 + { INDEX_op_qemu_ld32, { "r", "L" } },
942 + { INDEX_op_qemu_ld64, { "r", "r", "L" } },
944 + { INDEX_op_qemu_st8, { "L", "L" } },
945 + { INDEX_op_qemu_st16, { "L", "L" } },
946 + { INDEX_op_qemu_st32, { "L", "L" } },
947 { INDEX_op_qemu_st64, { "L", "L", "L" } },
949 + { INDEX_op_qemu_ld8u, { "r", "L", "L" } },
950 + { INDEX_op_qemu_ld8s, { "r", "L", "L" } },
951 + { INDEX_op_qemu_ld16u, { "r", "L", "L" } },
952 + { INDEX_op_qemu_ld16s, { "r", "L", "L" } },
953 + { INDEX_op_qemu_ld32, { "r", "L", "L" } },
954 + { INDEX_op_qemu_ld64, { "L", "L", "L", "L" } },
956 + { INDEX_op_qemu_st8, { "L", "L", "L" } },
957 + { INDEX_op_qemu_st16, { "L", "L", "L" } },
958 + { INDEX_op_qemu_st32, { "L", "L", "L" } },
959 + { INDEX_op_qemu_st64, { "L", "L", "L", "L" } },