Stack space allocation problem under different optimization levels of ARM (original) (raw)
When I compiled the following C codes into the ARM backend and enabled O0, I found that the stack space allocation of the foo function was 424, while when O2 was enabled, the stack space allocation exceeded 1000. I would like to ask which optimization caused this and why this situation occurred?
The compilation command I use is as follows:
clang --target=arm -fdata-sections -ffunction-sections -S -O0 20021120-1.c -o 20021120-1.s
clang --target=arm -fdata-sections -ffunction-sections -S -O2 20021120-1.c -o 20021120-1.s
C codes:
/* Macros to emit "L Nxx R" for each octal number xx between 000 and 037. */
#define OP1(L, N, R, I, J) L N##I##J R
#define OP2(L, N, R, I) \
OP1(L, N, R, 0, I), OP1(L, N, R, 1, I), \
OP1(L, N, R, 2, I), OP1(L, N, R, 3, I)
#define OP(L, N, R) \
OP2(L, N, R, 0), OP2(L, N, R, 1), OP2(L, N, R, 2), OP2(L, N, R, 3), \
OP2(L, N, R, 4), OP2(L, N, R, 5), OP2(L, N, R, 6), OP2(L, N, R, 7)
/* Declare 32 unique variables with prefix N. */
#define DECLARE(N) OP (, N,)
/* Copy 32 variables with prefix N from the array at ADDR.
Leave ADDR pointing to the end of the array. */
#define COPYIN(N, ADDR) OP (, N, = *(ADDR++))
/* Likewise, but copy the other way. */
#define COPYOUT(N, ADDR) OP (*(ADDR++) =, N,)
/* Add the contents of the array at ADDR to 32 variables with prefix N.
Leave ADDR pointing to the end of the array. */
#define ADD(N, ADDR) OP (, N, += *(ADDR++))
volatile double gd[32];
volatile float gf[32];
void foo (int n)
{
double DECLARE(d);
float DECLARE(f);
volatile double *pd;
volatile float *pf;
int i;
pd = gd; COPYIN (d, pd);
for (i = 0; i < n; i++)
{
pf = gf; COPYIN (f, pf);
pd = gd; ADD (d, pd);
pd = gd; ADD (d, pd);
pd = gd; ADD (d, pd);
pf = gf; COPYOUT (f, pf);
}
pd = gd; COPYOUT (d, pd);
}
int main ()
{
int i;
for (i = 0; i < 32; i++)
gd[i] = i, gf[i] = i;
foo (1);
for (i = 0; i < 32; i++)
if (gd[i] != i * 4 || gf[i] != i)
abort ();
exit (0);
}
Partial code generated by O0:
……
foo:
.fnstart
@ %bb.0:
push {r11, lr}
mov r11, sp
sub sp, sp, #424
str r0, [r11, #-4]
……
Partial code generated by O2:
……
foo:
.fnstart
@ %bb.0:
push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
add r11, sp, #28
sub sp, sp, #92
sub sp, sp, #1024
……