1 From bf408071104de13f79a0c3c8cac892f440462e7c Mon Sep 17 00:00:00 2001
2 From: Aurelien Jarno <aurelien@aurel32.net>
3 Date: Tue, 11 Sep 2012 12:31:21 +0200
4 Subject: [PATCH] tcg/optimize: rework copy progagation
6 The copy propagation pass tries to keep track what is a copy of what
7 and what has copy of what, and in addition it keep a circular list of
8 of all the copies. Unfortunately this doesn't fully work: a mov from
9 a temp which has a state "COPY" changed it into a state "HAS_COPY".
10 Later when this temp is used again, it is considered has not having
11 copy and thus no propagation is done.
13 This patch fixes that by removing the hiearchy between copies, and thus
14 only keeping a "COPY" state both meaning "is a copy" and "has a copy".
15 The decision of which copy to use is deferred to the actual temp
16 replacement. At this stage there is not one best choice to do, but only
17 better choices than others. For doing the best choice the operation
18 would have to be parsed in reversed to know if a temp is going to be
19 used later or not. That what is done by the liveness analysis. At this
20 stage it is known that globals will be always live, that local temps
21 will be dead at the end of the translation block, and that the temps
22 will be dead at the end of the basic block. This means that this stage
23 should try to replace temps by local temps or globals and local temps
26 Reviewed-by: Richard Henderson <rth@twiddle.net>
27 Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
28 Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
30 tcg/optimize.c | 167 +++++++++++++++++++++++++++++++--------------------------
31 1 file changed, 92 insertions(+), 75 deletions(-)
33 diff --git a/tcg/optimize.c b/tcg/optimize.c
34 index da8dffe..1904b39 100644
37 @@ -39,7 +39,6 @@ typedef enum {
44 struct tcg_temp_info {
45 @@ -51,39 +50,19 @@ struct tcg_temp_info {
47 static struct tcg_temp_info temps[TCG_MAX_TEMPS];
49 -/* Reset TEMP's state to TCG_TEMP_UNDEF. If TEMP was a representative of some
50 - class of equivalent temp's, a new representative should be chosen in this
52 -static void reset_temp(TCGArg temp, int nb_temps, int nb_globals)
53 +/* Reset TEMP's state to TCG_TEMP_UNDEF. If TEMP only had one copy, remove
54 + the copy flag from the left temp. */
55 +static void reset_temp(TCGArg temp)
58 - TCGArg new_base = (TCGArg)-1;
59 - if (temps[temp].state == TCG_TEMP_HAS_COPY) {
60 - for (i = temps[temp].next_copy; i != temp; i = temps[i].next_copy) {
61 - if (i >= nb_globals) {
62 - temps[i].state = TCG_TEMP_HAS_COPY;
67 - for (i = temps[temp].next_copy; i != temp; i = temps[i].next_copy) {
68 - if (new_base == (TCGArg)-1) {
69 - temps[i].state = TCG_TEMP_UNDEF;
71 - temps[i].val = new_base;
73 + if (temps[temp].state == TCG_TEMP_COPY) {
74 + if (temps[temp].prev_copy == temps[temp].next_copy) {
75 + temps[temps[temp].next_copy].state = TCG_TEMP_UNDEF;
77 + temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy;
78 + temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy;
80 - temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy;
81 - temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy;
82 - } else if (temps[temp].state == TCG_TEMP_COPY) {
83 - temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy;
84 - temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy;
85 - new_base = temps[temp].val;
87 temps[temp].state = TCG_TEMP_UNDEF;
88 - if (new_base != (TCGArg)-1 && temps[new_base].next_copy == new_base) {
89 - temps[new_base].state = TCG_TEMP_UNDEF;
93 static int op_bits(TCGOpcode op)
94 @@ -106,34 +85,83 @@ static TCGOpcode op_to_movi(TCGOpcode op)
98 +static TCGArg find_better_copy(TCGContext *s, TCGArg temp)
102 + /* If this is already a global, we can't do better. */
103 + if (temp < s->nb_globals) {
107 + /* Search for a global first. */
108 + for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) {
109 + if (i < s->nb_globals) {
114 + /* If it is a temp, search for a temp local. */
115 + if (!s->temps[temp].temp_local) {
116 + for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) {
117 + if (s->temps[i].temp_local) {
123 + /* Failure to find a better representation, return the same temp. */
127 +static bool temps_are_copies(TCGArg arg1, TCGArg arg2)
131 + if (arg1 == arg2) {
135 + if (temps[arg1].state != TCG_TEMP_COPY
136 + || temps[arg2].state != TCG_TEMP_COPY) {
140 + for (i = temps[arg1].next_copy ; i != arg1 ; i = temps[i].next_copy) {
149 static void tcg_opt_gen_mov(TCGContext *s, TCGArg *gen_args,
150 TCGArg dst, TCGArg src)
152 - reset_temp(dst, s->nb_temps, s->nb_globals);
153 - assert(temps[src].state != TCG_TEMP_COPY);
154 - /* Only consider temps with the same type (width) as copies. */
155 - if (src >= s->nb_globals && s->temps[dst].type == s->temps[src].type) {
156 - assert(temps[src].state != TCG_TEMP_CONST);
157 - if (temps[src].state != TCG_TEMP_HAS_COPY) {
158 - temps[src].state = TCG_TEMP_HAS_COPY;
160 + assert(temps[src].state != TCG_TEMP_CONST);
162 + if (s->temps[src].type == s->temps[dst].type) {
163 + if (temps[src].state != TCG_TEMP_COPY) {
164 + temps[src].state = TCG_TEMP_COPY;
165 temps[src].next_copy = src;
166 temps[src].prev_copy = src;
168 temps[dst].state = TCG_TEMP_COPY;
169 - temps[dst].val = src;
170 temps[dst].next_copy = temps[src].next_copy;
171 temps[dst].prev_copy = src;
172 temps[temps[dst].next_copy].prev_copy = dst;
173 temps[src].next_copy = dst;
180 -static void tcg_opt_gen_movi(TCGArg *gen_args, TCGArg dst, TCGArg val,
181 - int nb_temps, int nb_globals)
182 +static void tcg_opt_gen_movi(TCGArg *gen_args, TCGArg dst, TCGArg val)
184 - reset_temp(dst, nb_temps, nb_globals);
186 temps[dst].state = TCG_TEMP_CONST;
187 temps[dst].val = val;
189 @@ -324,7 +352,6 @@ static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
194 /* Propagate constants and copies, fold constant expressions. */
195 static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
196 TCGArg *args, TCGOpDef *tcg_op_defs)
197 @@ -338,10 +365,8 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
199 /* Array VALS has an element for each temp.
200 If this temp holds a constant then its value is kept in VALS' element.
201 - If this temp is a copy of other ones then this equivalence class'
202 - representative is kept in VALS' element.
203 - If this temp is neither copy nor constant then corresponding VALS'
204 - element is unused. */
205 + If this temp is a copy of other ones then the other copies are
206 + available through the doubly linked circular list. */
208 nb_temps = s->nb_temps;
209 nb_globals = s->nb_globals;
210 @@ -357,7 +382,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
211 assert(op != INDEX_op_call);
212 for (i = def->nb_oargs; i < def->nb_oargs + def->nb_iargs; i++) {
213 if (temps[args[i]].state == TCG_TEMP_COPY) {
214 - args[i] = temps[args[i]].val;
215 + args[i] = find_better_copy(s, args[i]);
219 @@ -429,7 +454,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
220 if (temps[args[1]].state == TCG_TEMP_CONST
221 && temps[args[1]].val == 0) {
222 gen_opc_buf[op_index] = op_to_movi(op);
223 - tcg_opt_gen_movi(gen_args, args[0], 0, nb_temps, nb_globals);
224 + tcg_opt_gen_movi(gen_args, args[0], 0);
228 @@ -456,9 +481,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
230 if (temps[args[2]].state == TCG_TEMP_CONST
231 && temps[args[2]].val == 0) {
232 - if ((temps[args[0]].state == TCG_TEMP_COPY
233 - && temps[args[0]].val == args[1])
234 - || args[0] == args[1]) {
235 + if (temps_are_copies(args[0], args[1])) {
236 gen_opc_buf[op_index] = INDEX_op_nop;
238 gen_opc_buf[op_index] = op_to_mov(op);
239 @@ -480,7 +503,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
240 if ((temps[args[2]].state == TCG_TEMP_CONST
241 && temps[args[2]].val == 0)) {
242 gen_opc_buf[op_index] = op_to_movi(op);
243 - tcg_opt_gen_movi(gen_args, args[0], 0, nb_temps, nb_globals);
244 + tcg_opt_gen_movi(gen_args, args[0], 0);
248 @@ -495,7 +518,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
251 if (args[1] == args[2]) {
252 - if (args[1] == args[0]) {
253 + if (temps_are_copies(args[0], args[1])) {
254 gen_opc_buf[op_index] = INDEX_op_nop;
256 gen_opc_buf[op_index] = op_to_mov(op);
257 @@ -515,9 +538,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
258 allocator where needed and possible. Also detect copies. */
261 - if ((temps[args[1]].state == TCG_TEMP_COPY
262 - && temps[args[1]].val == args[0])
263 - || args[0] == args[1]) {
264 + if (temps_are_copies(args[0], args[1])) {
266 gen_opc_buf[op_index] = INDEX_op_nop;
268 @@ -535,7 +556,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
269 args[1] = temps[args[1]].val;
272 - tcg_opt_gen_movi(gen_args, args[0], args[1], nb_temps, nb_globals);
273 + tcg_opt_gen_movi(gen_args, args[0], args[1]);
277 @@ -550,9 +571,9 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
278 if (temps[args[1]].state == TCG_TEMP_CONST) {
279 gen_opc_buf[op_index] = op_to_movi(op);
280 tmp = do_constant_folding(op, temps[args[1]].val, 0);
281 - tcg_opt_gen_movi(gen_args, args[0], tmp, nb_temps, nb_globals);
282 + tcg_opt_gen_movi(gen_args, args[0], tmp);
284 - reset_temp(args[0], nb_temps, nb_globals);
285 + reset_temp(args[0]);
286 gen_args[0] = args[0];
287 gen_args[1] = args[1];
289 @@ -580,10 +601,10 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
290 gen_opc_buf[op_index] = op_to_movi(op);
291 tmp = do_constant_folding(op, temps[args[1]].val,
293 - tcg_opt_gen_movi(gen_args, args[0], tmp, nb_temps, nb_globals);
294 + tcg_opt_gen_movi(gen_args, args[0], tmp);
297 - reset_temp(args[0], nb_temps, nb_globals);
298 + reset_temp(args[0]);
299 gen_args[0] = args[0];
300 gen_args[1] = args[1];
301 gen_args[2] = args[2];
302 @@ -597,10 +618,10 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
303 gen_opc_buf[op_index] = op_to_movi(op);
304 tmp = do_constant_folding_cond(op, temps[args[1]].val,
305 temps[args[2]].val, args[3]);
306 - tcg_opt_gen_movi(gen_args, args[0], tmp, nb_temps, nb_globals);
307 + tcg_opt_gen_movi(gen_args, args[0], tmp);
310 - reset_temp(args[0], nb_temps, nb_globals);
311 + reset_temp(args[0]);
312 gen_args[0] = args[0];
313 gen_args[1] = args[1];
314 gen_args[2] = args[2];
315 @@ -623,7 +644,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
318 memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
319 - reset_temp(args[0], nb_temps, nb_globals);
320 + reset_temp(args[0]);
321 gen_args[0] = args[0];
322 gen_args[1] = args[1];
323 gen_args[2] = args[2];
324 @@ -637,23 +658,19 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
325 && temps[args[2]].state == TCG_TEMP_CONST) {
326 tmp = do_constant_folding_cond(op, temps[args[1]].val,
327 temps[args[2]].val, args[5]);
328 - if (args[0] == args[4-tmp]
329 - || (temps[args[4-tmp]].state == TCG_TEMP_COPY
330 - && temps[args[4-tmp]].val == args[0])) {
331 + if (temps_are_copies(args[0], args[4-tmp])) {
332 gen_opc_buf[op_index] = INDEX_op_nop;
333 } else if (temps[args[4-tmp]].state == TCG_TEMP_CONST) {
334 gen_opc_buf[op_index] = op_to_movi(op);
335 - tcg_opt_gen_movi(gen_args, args[0], temps[args[4-tmp]].val,
336 - nb_temps, nb_globals);
337 + tcg_opt_gen_movi(gen_args, args[0], temps[args[4-tmp]].val);
340 gen_opc_buf[op_index] = op_to_mov(op);
341 - tcg_opt_gen_mov(gen_args, args[0], args[4-tmp],
342 - nb_temps, nb_globals);
343 + tcg_opt_gen_mov(s, gen_args, args[0], args[4-tmp]);
347 - reset_temp(args[0], nb_temps, nb_globals);
348 + reset_temp(args[0]);
349 gen_args[0] = args[0];
350 gen_args[1] = args[1];
351 gen_args[2] = args[2];
352 @@ -668,11 +685,11 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
353 nb_call_args = (args[0] >> 16) + (args[0] & 0xffff);
354 if (!(args[nb_call_args + 1] & (TCG_CALL_CONST | TCG_CALL_PURE))) {
355 for (i = 0; i < nb_globals; i++) {
356 - reset_temp(i, nb_temps, nb_globals);
360 for (i = 0; i < (args[0] >> 16); i++) {
361 - reset_temp(args[i + 1], nb_temps, nb_globals);
362 + reset_temp(args[i + 1]);
364 i = nb_call_args + 3;
366 @@ -691,7 +708,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
367 memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
369 for (i = 0; i < def->nb_oargs; i++) {
370 - reset_temp(args[i], nb_temps, nb_globals);
371 + reset_temp(args[i]);
374 for (i = 0; i < def->nb_args; i++) {