How to optimize the default definition
I have the following bit of code. Given how foo is called, what compiler arguments can GCC and Clang provide to optimize an if expression like icc?
Code:
#include <cstdlib>
int foo(int i, bool b = false)
{
if (b) ++i;
return ++i;
}
int boo(int i)
{
return ++i;
}
static const bool global_b = false;
int goo(int i, bool b = global_b)
{
if (b) ++i;
return ++i;
}
int main(int argc, char* argv[])
{
int i = atoi(argv[1]);
return 2 * foo(i) + 3 * boo(i) + 7 * goo(i);
}
Disassembly GCC 4.9-O2:
foo(int, bool):
cmp sil, 1
sbb edi, -1
lea eax, [rdi+1]
ret
goo(int, bool):
cmp sil, 1
sbb edi, -1
lea eax, [rdi+1]
ret
boo(int):
lea eax, [rdi+1]
ret
Clang Conversation 3.4 -O2:
foo(int, bool):
movzbl %sil, %eax
leal 1(%rdi,%rax), %eax
ret
goo(int, bool):
movzbl %sil, %eax
leal 1(%rdi,%rax), %eax
ret
boo(int):
leal 1(%rdi), %eax
ret
Disassembly IntelCC 13 -O2:
foo(int, bool):
incl %edi
movl %edi, %eax
ret
goo(int):
incl %edi
movl %edi, %eax
ret
boo(int):
incl %edi
movl %edi, %eax
ret
Templatising foo we get the following:
template <typename T>
T foo_t(T i, bool b = false)
{
if (b) ++i;
return ++i;
}
GCC 4.9 is implicitly inlined:
add eax, 1
Incorrect Intel compiler. Without the gcc option -fwhole-program
(which automatically marks all functions, but main
how static
, i.e., local to that translation unit), we don't know if it is called foo
from another translation unit, so the compiler cannot assume that it is always called with a second argument equal to false
...
Is there a built-in qualifier, and also what is the point of increment for a local variable that was passed by value? Here's an optimized version:
#include <cstdlib>
inline int foo(int i, bool b = false)
{
// i is passed by value, no point incrementing it
//if (b) ++i;
//return ++i;
return (b)? i+2 : i+1;
}
inline int boo(int i)
{
// i is passed by value, no point incrementing it
return i+1;//++i;
}
static const bool global_b = false;
inline int goo(int i, bool b = global_b)
{
// i is passed by value, no point incrementing it
//if (b) ++i;
//return ++i;
return (b)? i+2 : i+1;
}
int main(int argc, char* argv[])
{
int i = atoi(argv[1]);
return 2 * foo(i) + 3 * boo(i) + 7 * goo(i);
}
Try to rewrite the body foo
as return i+1+b;
.
As stated, -fwhole-program
is the key to success.
Alternatively, you can define your function as static (inline will work too):
static int foo(int i, bool b = false)
{
if (b) ++i;
return ++i;
}
this will only generate:
add eax, 1
OP only tells part of the story :).
gcc -O2
actually includes all function calls and main
looks like this:
main:
subq $8, %rsp
movq 8(%rsi), %rdi
movl $10, %edx
xorl %esi, %esi
call strtol
addl $1, %eax
addq $8, %rsp
leal 0(,%rax,8), %edx
leal (%rdx,%rax,4), %eax
ret