Stripping sub from rsp and function returns another function

I am currently playing around with gcc, gdb and assembly and am trying to figure it out. I looked at some of the tutorials and got some key points.

So, I decided to use a small .c file, looked at the result, and some things are not quite clear.

This is the file:

#include <stdio.h>

void func1(){
    int x = 8;
    int y = x + 5;
}

void func2(){
    int x = 12;
}

void func3(){
    int x = 10+20;
}

void func4(){
    int x;
    x = 1;
}

void func5(){
    int x;
    int y;

    x = 2;
    y = 1;
}

void func6(){
    int x;
    int y;

    x=15;
    y=6;
    y += x;
}

int main(int argc, char *argv[]) {
    func1();
    func2();
    func3();
    func4();
    func5();
    func6();
    return 20;
}

      

These are the disassembly results:

Dump of assembler code for function main:
0x0000000100000f60 <+0> :   push   %rbp
0x0000000100000f61 <+1> :   mov    %rsp,%rbp
0x0000000100000f64 <+4> :   sub    $0x10,%rsp
0x0000000100000f68 <+8> :   movl   $0x0,-0x4(%rbp)
0x0000000100000f6f <+15>:   mov    %edi,-0x8(%rbp)
0x0000000100000f72 <+18>:   mov    %rsi,-0x10(%rbp)
0x0000000100000f76 <+22>:   callq  0x100000ed0 <func1>
0x0000000100000f7b <+27>:   callq  0x100000ef0 <func2>
0x0000000100000f80 <+32>:   callq  0x100000f00 <func3>
0x0000000100000f85 <+37>:   callq  0x100000f10 <func4>
0x0000000100000f8a <+42>:   callq  0x100000f20 <func5>
0x0000000100000f8f <+47>:   callq  0x100000f40 <func6>
0x0000000100000f94 <+52>:   mov    $0x14,%eax
0x0000000100000f99 <+57>:   add    $0x10,%rsp
0x0000000100000f9d <+61>:   pop    %rbp
0x0000000100000f9e <+62>:   retq

Dump of assembler code for function func1:
0x0000000100000ed0 <+0> :   push   %rbp
0x0000000100000ed1 <+1> :   mov    %rsp,%rbp
0x0000000100000ed4 <+4> :   movl   $0x8,-0x4(%rbp)
0x0000000100000edb <+11>:   mov    -0x4(%rbp),%eax
0x0000000100000ede <+14>:   add    $0x5,%eax
0x0000000100000ee3 <+19>:   mov    %eax,-0x8(%rbp)
0x0000000100000ee6 <+22>:   pop    %rbp
0x0000000100000ee7 <+23>:   retq
0x0000000100000ee8 <+24>:   nopl   0x0(%rax,%rax,1)

Dump of assembler code for function func2:
0x0000000100000ef0 <+0> :   push   %rbp
0x0000000100000ef1 <+1> :   mov    %rsp,%rbp
0x0000000100000ef4 <+4> :   movl   $0xc,-0x4(%rbp)
0x0000000100000efb <+11>:   pop    %rbp
0x0000000100000efc <+12>:   retq
0x0000000100000efd <+13>:   nopl   (%rax)

Dump of assembler code for function func3:
0x0000000100000f00 <+0> :   push   %rbp
0x0000000100000f01 <+1> :   mov    %rsp,%rbp
0x0000000100000f04 <+4> :   movl   $0x1e,-0x4(%rbp)
0x0000000100000f0b <+11>:   pop    %rbp
0x0000000100000f0c <+12>:   retq
0x0000000100000f0d <+13>:   nopl   (%rax)

Dump of assembler code for function func4:
0x0000000100000f10 <+0> :   push   %rbp
0x0000000100000f11 <+1> :   mov    %rsp,%rbp
0x0000000100000f14 <+4> :   movl   $0x1,-0x4(%rbp)
0x0000000100000f1b <+11>:   pop    %rbp
0x0000000100000f1c <+12>:   retq
0x0000000100000f1d <+13>:   nopl   (%rax)

Dump of assembler code for function func5:
0x0000000100000f20 <+0> :   push   %rbp
0x0000000100000f21 <+1> :   mov    %rsp,%rbp
0x0000000100000f24 <+4> :   movl   $0x2,-0x4(%rbp)
0x0000000100000f2b <+11>:   movl   $0x1,-0x8(%rbp)
0x0000000100000f32 <+18>:   pop    %rbp
0x0000000100000f33 <+19>:   retq
0x0000000100000f34 <+20>:   data16 data16 nopw %cs:0x0(%rax,%rax,1)

Dump of assembler code for function func6:
0x0000000100000f40 <+0> :   push   %rbp
0x0000000100000f41 <+1> :   mov    %rsp,%rbp
0x0000000100000f44 <+4> :   movl   $0xf,-0x4(%rbp)
0x0000000100000f4b <+11>:   movl   $0x6,-0x8(%rbp)
0x0000000100000f52 <+18>:   mov    -0x4(%rbp),%eax
0x0000000100000f55 <+21>:   mov    -0x8(%rbp),%ecx
0x0000000100000f58 <+24>:   add    %eax,%ecx
0x0000000100000f5a <+26>:   mov    %ecx,-0x8(%rbp)
0x0000000100000f5d <+29>:   pop    %rbp
0x0000000100000f5e <+30>:   retq
0x0000000100000f5f <+31>:   nop

      

I will compile this with:

gcc  -o example example.c

      

I don't quite understand some things:

  • If all functions end the same (in code, for example, it returns void) why
    • func1 ends with nopl 0x0 (% rax,% rax, 1)
    • func2 and func3 and func4 ends with nopl (% rax)
    • func6 exits nop
    • func5 ends with data data1616 nopw% cs: 0x0 (% rax,% rax, 1).
  • What exactly does data16 data16 nopw% cs: 0x0 (% rax,% rax, 1) mean?
  • There are mainly
    • sub $ 0x10,% rsp
    • add $ 0x10,% rsp
    • Are they mem allocation for local variables in a method? if so why do they always round to 0x10, 0x20, 0x30 ... is this not something of a waste?
+3


source to share


2 answers


All these instructions nopl 0x0(%rax,%rax,1)

, etc. are variations of the instructions nop

. They are used to make sure functions are multiples of 16 bytes in length. You might ask why they don't just use multiple instructions 0x90

( nop

). The answer is that if these nops are executed, it is slightly faster to execute one long multibyte nop, for example, data16 data16 nopw %cs:0x0(%rax,%rax,1)

or nopl (%rax)

instead of executing multiple short notes. Nops can be executed when they appear inside a function; such code is generated when the compiler wants to align the jump point for performance. Nops are generated by an assembler that doesn't know which nops can be executed and which nops won't, because that is not resolvable at all.



For the stack part: You are compiling without optimization, and you shouldn't ask about strange code generated without optimization. The compiler is told not to be smart when compiling without optimization, so why would you expect it to conserve space?

+3


source


  • Functions are terminated by an operator retq

    . The opcode shown when disassembling is just garbage to actually execute, but can be pre-emulated (and discarded) in modern predictive processors. You can ignore them. There are instructions for other processors that have a "latency leg" but x86 does not have this feature. The gap between the boundaries retq

    and the next 16-byte freedom allows the function to start at an even address. This allows for faster execution.

  • data16

    probably means there is 16-bit data that does not match the opcode known to the disassembler. Just ignore it, it won't affect the action.

  • The x86 architecture allows access to any address without alignment. But accessing an immutable variable may take more than one bus cycle to access memory. Aligning the stack point rsp

    ensures that access to uint64_t only calls one bus cycle.



+1


source







All Articles