[llvm-dev] Where's the optimiser gone (part 10): sptting a cookie (original) (raw)
Stefan Kanthak via llvm-dev llvm-dev at lists.llvm.org
Mon Jan 14 10:59:15 PST 2019
- Previous message: [llvm-dev] LLVM Weekly - #263, January 14th 2019
- Next message: [llvm-dev] A tiny little subset of LLVM runtime
- Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]
Compile with -O3 -m32, or generate an assembly listing of __divdi3 and __moddi3 as shipped in clang_rt.builtins-i386.lib
unsigned long long __udivmoddi4(unsigned long long numerator, unsigned long long denominator, unsigned long long *remainder);
long long __moddi3(long long dividend, long long divisor) { long long r = divisor >> 63; // r = divisor < 0 ? -1 : 0 long long s = dividend >> 63; // s = dividend < 0 ? -1 : 0 divisor = (divisor ^ r) - r; // negate if divisor < 0 dividend = (dividend ^ s) - s; // negate if dividend < 0 __udivmoddi4(dividend, divisor, (unsigned long long *) &r); return (r ^ s) - s; // negate if dividend < 0 }
___moddi3:
00: 55 push ebp |
01: 89 E5 mov ebp, esp |
03: 53 push ebx | push ebx
04: 57 push edi |
05: 56 push esi |
06: 83 E4 F8 and esp, 0FFFFFFF8h |
09: 83 EC 10 sub esp, 10h | sub esp, 8
0C: 8B 45 14 mov eax, [ebp+14h] | mov eax, [esp+28]
0F: 8B 55 10 mov edx, [ebp+10h] | mov ecx, [esp+24]
12: 8B 35 00 00 00 00 mov esi, [___security_cookie] |
18: 89 E7 mov edi, esp | push esp
1A: 89 C1 mov ecx, eax |
1C: C1 F9 1F sar ecx, 1Fh | cdq
1F: 01 CA add edx, ecx | xor ecx, edx
21: 11 C8 adc eax, ecx | xor eax, edx
23: 31 CA xor edx, ecx | sub ecx, edx
25: 31 EE xor esi, ebp |
27: 31 C8 xor eax, ecx | sbb eax, edx
| push eax
| push ecx
29: 8B 4D 0C mov ecx, [ebp+0Ch] | mov eax, [esp+32]
2C: 89 74 24 08 mov [esp+8],esi |
30: 8B 75 08 mov esi, [ebp+8] | mov ecx, [esp+28]
33: 89 CB mov ebx, ecx | cdq
35: C1 FB 1F sar ebx, 1Fh | mov ebx, edx
38: 31 DE xor esi, ebx | xor ecx, edx
3A: 31 D9 xor ecx, ebx | xor eax, edx
3C: 29 DE sub esi, ebx | sub ecx, edx
3E: 19 D9 sbb ecx, ebx | sbb eax, edx
40: 57 push edi |
41: 50 push eax | push eax
42: 52 push edx |
43: 51 push ecx | push ecx
44: 56 push esi |
45: E8 00 00 00 00 call ___udivmoddi4 | call ___udivmoddi4
4A: 83 C4 14 add esp, 14h | add esp, 20
4D: 8B 3C 24 mov edi, [esp] |
50: 8B 74 24 04 mov esi, [esp+4] | mov eax, [esp]
54: 8B 4C 24 08 mov ecx, [esp+8] | mov edx, [esp+4]
58: 31 DF xor edi, ebx | xor eax, ebx
5A: 31 DE xor esi, ebx | xor edx, ebx
5C: 29 DF sub edi, ebx | sub eax, ebx
5E: 19 DE sbb esi, ebx | sbb edx, ebx
60: 31 E9 xor ecx, ebp |
62: E8 00 00 00 00 call @__security_check_cookie at 4|
67: 89 F8 mov eax, edi |
69: 89 F2 mov edx, esi |
6B: 8D 65 F4 lea esp, [ebp-0Ch] | add esp, 8
6E: 5E pop esi |
6F: 5F pop edi |
70: 5B pop ebx | pop ebx
71: 5D pop ebp |
72: C3 ret | ret
clang generates 51 instructions, 18 more than properly optimised code, tinkers with a stack cookie, although there is no array allocated on the stack, and clobbers registers EDI and ESI without necessity.
long long __divdi3(long long dividend, long long divisor) { long long r = divisor >> 63; // r = divisor < 0 ? -1 : 0 long long s = dividend >> 63; // s = dividend < 0 ? -1 : 0 divisor = (divisor ^ r) - r; // negate if divisor < 0 dividend = (dividend ^ s) - s; // negate if dividend < 0 s ^= r; // sign of quotient // negate if quotient < 0 return (__udivmoddi4(dividend, divisor, 0) ^ s) - s; }
__divdi3: # @__divdi3 push ebx | push ebx push edi | push esi | mov ecx, dword ptr [esp + 28] | mov eax, [esp+20] mov eax, dword ptr [esp + 20] | mov edi, dword ptr [esp + 24] | mov ecx, [esp+16] mov ebx, dword ptr [esp + 16] | mov edx, ecx | mov esi, eax | sar edx, 31 | cdq sar esi, 31 | mov ebx, edx xor edi, edx | xor ecx, edx xor ecx, edx | xor eax, edx sub edi, edx | sub ecx, edx sbb ecx, edx | sbb eax, edx xor ebx, esi | xor eax, esi | sub ebx, esi | sbb eax, esi | xor esi, edx | sub esp, 12 # WTF? | push 0 | push 0 push ecx | push eax push edi | push ecx | mov eax, [esp+24] | mov ecx, [esp+20] | cdq | xor ecx, edx | xor eax, edx | sub ecx, edx | sbb eax, edx | xor ebx, edx push eax | push eax push ebx | push ecx call __udivmoddi4 | call __udivmoddi4 add esp, 32 | add esp, 20 xor eax, esi | xor eax, ebx xor edx, esi | xor edx, ebx sub eax, esi | sub eax, ebx sbb edx, esi | sbb edx, ebx pop esi | pop edi | pop ebx | pop ebx ret | ret
clang generates 36 instructions, 6 more than properly optimised code, tinkers with ESP and clobbers registers EDI and ESI without necessity.
stay tuned Stefan Kanthak
- Previous message: [llvm-dev] LLVM Weekly - #263, January 14th 2019
- Next message: [llvm-dev] A tiny little subset of LLVM runtime
- Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]