If the conditional tree switches operator efficiency issues

I am trying to optimize functions with long series of if statements. I would find a solution using switch statements as a replacement, but after testing and disassembling, I found out that they just complicated the matter. Here's some code for the demo;

 int ifFunc(int A, int B, int C){
   int ret;
   if (A > 0){
     if (B > 0){
       if (C > 0){
         ret = C;
       }
       else{
         ret = B;
       }
     }
     else{
       if(C > 0){
         ret = C;
       }
       else{
         ret = A;
       }
     }
   }
   else{
     if (B > 0){
       if (C > 0){
         ret = C;
       }
       else{
         ret = B;
       }
     }
     else{
       if (C > 0){
         ret = C;
       }
       else{
         ret = 0;
       }
     }
   }
   return ret;
 }

 int swFunc(int A, int B, int C){
   int ret; int code = 0;
   code += (A > 0) * 4, code += (B > 0) * 2, code += (C > 0) * 1;
   switch(code){
   case 0: // 000
     ret = 0;
     break;
   case 1: // 001
     ret = C;
     break;
   case 2: // 010
     ret = B;
     break;
   case 3: // 011
     ret = C;
     break;
   case 4: // 100
     ret = A;
     break;
   case 5: // 101
     ret = C;
     break;
   case 6: // 110
     ret = B;
     break;
   case 7: // 111
     ret = C;
     break;
   }
   return ret;
 }
 // All these functions do is select a number that is positive,
 // Giving preference to C, then B, then A

      

I may have made a few mistakes so they may not be doing the same, but that is beside the point. What I am trying to do with a switch statement was to create a version of ifFunc with just one jump by converting the result of each if statement to a numeric code that matches a bit, so that each possible endpoint would have a unique numeric code.

This falls down, however, as comparison functions (B> 0) etc. internally use jumps. It ends up with the switch version of the function being an order of magnitude slower than the if version.

I would like to know if there is anyway to do a comparison operator and output zero for false and one for true without using (internally or otherwise) an if statement or jump.

+3


source to share


5 answers


Not sure if this will be much better, but you can try playing with bitfields:



union SOMECONDTIONS {
  unsigned char aggregate;
  struct {
    int c1:1;
    int c2:1;
    int c3:1;
    int c4:1;
    int c5:1;
    int c6:1;
    int c7:1;
    int c8:1;
  } conditions;
}

SOMECODITIONS c;
c.aggregate = 0;
c.conditions.c1 = A > 0;
c.conditions.c2 = B > 0;
c.conditions.c3 = C > 0;
switch(c.aggregate) {
...

      

+2


source


I hope this code helps you to remove assembly transitions ...



static const int index[8] = {0, 1, 2, 2, 3, 3, 3, 3};

int ifFunc(int a, int b, int c)
{
    const int ret[4] = {0, a, b, c};
    const int bits = sizeof(int) * 8 - 1;
    //const int i = ((((c - 1) >> bits) + 1) * 4) +
    //              ((((b - 1) >> bits) + 1) * 2) +
    //               (((a - 1) >> bits) + 1);
    const int i = ((-c >> bits) * -4) -
                  ((-b >> bits) *  2) -
                   (-a >> bits);

    return ret[index[i]];
}

      

+4


source


Perhaps it will be ...?

int ifFunc(int A, int B, int C){
  if( C > 0 ) return C;
  if( B > 0 ) return B;
  if( A > 0 ) return A;
  return 0;
}

      

EDIT

Sorry, I must have misunderstood you - I thought you just need to reduce the number of conditional branches, not delete them entirely. Here's a solution (unless I make a mistake ...), assuming your system is running in Two's complement code :

static const unsigned iMIN = ~(~0u >> 1);  // binary 1000...0
static const int BITS_PER_BYTE = 8;

static inline int partialmask( int x ) {
    // returns positive (0....) for positive x,
    // negative (1....) for negative or zero x
    return x | (int)(iMIN - (unsigned)x);
}
static inline int fullmask( int x ) {
    // extends the sign bit so that
    // positive becomes binary 0000...0 for positive x
    // negative becomes binary 1111...1 for negative or zero x
    return partialmask( x ) >> (BITS_PER_BYTE * sizeof(int) - 1);
}

int noIfFunc(int A, int B, int C){
  int res = 0, mask;

  mask = fullmask( A );  // negative or zero A causes an all-ones mask
  res &= mask;           // to preserve the res value
  res |= A & ~mask;      // and keep it from overwriting with A

  mask = fullmask( B );  // positive B causes
  res &= mask;           // res to be cleared with all-zeros mask
  res |= B & ~mask;      // then overwritten with B

  mask = fullmask( C );
  res &= mask;
  res |= C & ~mask;

  return res;  // finally res == most recent positive value (else zero)
}

      

Perhaps this is not the shortest code. However, it should not contain branches as long as they are properly minified (with built-in functions).

+3


source


What about

if (C > 0) return C;
if (B > 0) return B;
if (A > 0) return A;
return 0;

      

or

return C > 0 ? C : (B > 0 ? B : (A > 0 ? A : 0)));

      

?

Also, if the compiler implements conditional assignments,

R= 0;
if (A > 0) R= A;
if (B > 0) R= B;
if (C > 0) R= C;

      


In the latter case, MSVC uses conditional assignments, but for the third, as it prefers to return null register :(

; 7    :     int R= 0;
; 8    :     if (A > 0) R= A;

    mov ecx, DWORD PTR _A$[esp-4]
    xor eax, eax
    test    ecx, ecx
    cmovg   eax, ecx

; 9    :     if (B > 0) R= B;

    mov ecx, DWORD PTR _B$[esp-4]
    test    ecx, ecx
    cmovg   eax, ecx

; 10   :     if (C > 0) R= C;

    mov ecx, DWORD PTR _C$[esp-4]
    test    ecx, ecx
    jle SHORT $LN1@f1

; 12   :    return R;

    mov eax, ecx
$LN1@f1:

    ret 0

      

+1


source


Here is a solution that nearly outperforms the compiler version using inline assembly. My apologies if this is a little, I only found out about it last night.

 int swFunc(int A, int B, int C){
   int ret; char code;
   //code += (A > 0) * 4, code += (B > 0) * 2, code += (C > 0) * 1
   asm("movb $0, %[out];"
     "cmpl $0, %[C];"
     "setg %%al;"
     "addb %%al, %[out];"
     "cmpl $0, %[B];"
     "setg %%al;"
     "shlb $1, %%al;"
     "addb %%al, %[out];"
     "cmpl $0, %[A];"
     "setg %%al;"
     "shlb $2, %%al;"
     "addb %%al, %[out];"
   : [out] "+dl" (code)
   : [A] "m" (A), [B] "m" (B), [C] "m" (C)
   : "%eax", "%edx");

  switch(code){
   ...
    //same old switch statement
   ...
  }
   return ret;
 }

      

interesting (and rather annoying) when I change asm to;

   asm ("cmpl $0, %[C];"
     "setg %[out];"
     "cmpl $0, %[B];"
     "setg %%al;"
     "shlb $1, %%al;"
     "addb %%al, %[out];"
     "cmpl $0, %[A];"
     "setg %%al;"
     "shlb $2, %%al;"
     "addb %%al, %[out];"
   : [out] "+dl" (code)
   : [A] "m" (A), [B] "m" (B), [C] "m" (C)
   : "%eax", "%edx");

      

Which should do the same, but with a shorter, sweeter way, the performance is the same because the compiler decides to type the% cl register like this:

 movzbl -0x5(%ebp),%eax // load 'code' into the register (not needed!)
 mov    %eax,%ecx   // load 'code' into ecx?
 cmpl   $0x0,0x10(%ebp) // compare 'C' and 0
 setg   %cl         // set byte for code imediately, skips zeroing 'code'
 cmpl   $0x0,0xc(%ebp)  // compare 'B' and 0
 setg   %al
 shl    %al         // set %al, then make it equal to two if not equal to zero
 add    %al,%cl     // add it to code
 cmpl   $0x0,0x8(%ebp)  // compare 'A' and 0
 setg   %al
 shl    $0x2,%al    // set %al, then make it equal to four if not equal to zero
 add    %al,%cl     // add it to code
 mov    %cl,-0x5(%ebp)  // move the final value back onto the stack

      

It should be noted that the compiler will use% dl if I ask to use% cl and vice versa. The obstructing car doesn't seem to want it to crash.

Forcing conditions directly seems to be a good way, as this method is pretty close to beating the compiler

0


source







All Articles