How to optimize

What I am trying to do is take this code:

char naive_smooth_descr[] = "naive_smooth: Naive baseline implementation";

void naive_smooth(int dim, pixel *src, pixel *dst) 

{

    int i, j;

    for (i = 0; i < dim; i++)
    for (j = 0; j < dim; j++)
        dst[RIDX(i, j, dim)] = avg(dim, i, j, src);
}

      

and replace the function call avg(dim, i, j, src);

with the actual code at the very bottom of the page. Then take this code and replace all function calls in that code with actual code, etc.

If you are asking why this is all, the reason is simple: when you get rid of the function calls the program runs faster and I try to achieve the fastest loops for each element when the above code works by getting rid of all the function calls and replacing it with the actual code.

Now I really have a lot of problems with this. I take the code with parentheses and then just copy and paste? Am I leaving parentheses? Do I include the beginning of the code, for example, static pixel avg(int dim, int i, int j, pixel *src)

followed by the parentheses, and then the code to replace the function call?

I'm going to paste all the code here:

/* A struct used to compute averaged pixel value */

typedef struct {

    int red;
    int green;
    int blue;
    int num;

}  pixel_sum;

/* Compute min and max of two integers, respectively */


static int min(int a, int b) { return (a < b ? a : b); }

static int max(int a, int b) { return (a > b ? a : b); }



/* 
 * initialize_ pixel_ sum - Initializes all fields of sum to 0 
 */


static void initialize_ pixel_ sum (pixel_sum *sum) 

{

    sum->red = sum->green = sum->blue = 0;
    sum->num = 0;
    return;

}

/* 
 * accumulate_sum - Accumulates field values of p in corresponding 
 * fields of sum 
 */

static void accumulate_ sum (pixel_sum *sum, pixel p) 

{

    sum->red += (int) p.red;
    sum->green += (int) p.green;
    sum->blue += (int) p.blue;
    sum->num++;
    return;

}


/* 
 * assign_ sum_ to_ pixel - Computes averaged pixel value in current_pixel 
 */

static void assign_ sum_ to_ pixel (pixel *current_ pixel, pixel_ sum sum) 

{

    current_pixel->red = (unsigned short) (sum.red/sum.num);
    current_pixel->green = (unsigned short) (sum.green/sum.num);
    current_pixel->blue = (unsigned short) (sum.blue/sum.num);
    return;

}

/* 
 * avg - Returns averaged pixel value at (i,j) 
 */

      

This is the code I want to replace the function call avg(dim, i, j, src);

with:

static pixel avg (int dim, int i, int j, pixel *src) 

{

    int ii, jj;
    pixel_sum sum;
    pixel current_pixel;

    initialize_pixel_sum(&sum);
    for(ii = max(i-1, 0); ii <= min(i+1, dim-1); ii++) 
    for(jj = max(j-1, 0); jj <= min(j+1, dim-1); jj++) 
         accumulate_sum(&sum, src[RIDX(ii, jj, dim)]);

    assign_sum_to_pixel(&current_pixel, sum);
    return current_pixel;


}

      


/*
 * mysmooth - my smooth 
 */

char mysmooth_ descr[] = "my smooth: My smooth";

void mysmooth (int dim, pixel *src, pixel *dst) 

{    

int i, j;
int ii, jj;
pixel_sum sum;
pixel current_pixel;

for (i = 0; i < dim; i++)
for (j = 0; j < dim; j++)
{
initialize_pixel_sum(&sum);
for(ii = max(i-1, 0); ii <= min(i+1, dim-1); ii++) 
for(jj = max(j-1, 0); jj <= min(j+1, dim-1); jj++) 
    accumulate_sum(&sum, src[RIDX(ii, jj, dim)]);

assign_sum_to_pixel(&current_pixel, sum);
dst[RIDX(i, j, dim)] = current_pixel;
}

      

So what does my code look like after I have finished using the code from avg () and replaced it with a function?

+2


source to share


5 answers


I have to say that I agree that you are using compiler optimizations and inline ... but if you still want an answer to your specific question, I think what you get is something like:



for (j = 0; j < dim; j++)
{

    /* ...avg() code body except for the return... */ 

    dst[RIDX(i, j, dim)] = current_pixel;
}

      

+2


source


If your codebase is small, with as many as 10-12 functions, you can try having a keyword inline

in front of each function.

Second option, use a compiler option that concatenates all function calls, don't do it manually (that's why compilers exist). Which compiler are you using? You can look on the internet for your option, which combines all function calls (if any).



Third, if you are using GCC to compile your code, you can specify an attribute always_inline

for this function. Here's how to use it:

static pixel avg (int dim, int i, int j, pixel *src) __attribute__((always_inline));

      

+8


source


  • If you are using a C99 compiler or a C ++ compiler, you can use the keyword inline

    . However, this does not guarantee that the call will be replaced by actual code only if the compiler deems it more efficient.
  • Otherwise if you are using pure C89 then it avg()

    should be a macro. You are then guaranteed that the bell function has been replaced with an actual code.
+4


source


+2


source


I expanded the start and end of the loops to exclude min () and max () from the code:

void smooth_B(int dim, struct pixel src[dim][dim], struct pixel dst[dim][dim]){
  dst[0][0].red  =(src[0][0].red  +src[1][0].red  +src[0][1].red  +src[1][1].red  )/4;
  dst[0][0].green=(src[0][0].green+src[1][0].green+src[0][1].green+src[1][1].green)/4;
  dst[0][0].blue =(src[0][0].blue +src[1][0].blue +src[0][1].blue +src[1][1].blue )/4;
  for( int j=1; j<dim-1; j++){
    dst[0][j].red  =(src[0][j-1].red  +src[1][j-1].red  +src[0][j].red  +src[1][j].red  +src[0][j+1].red  +src[1][j+1].red  )/6;
    dst[0][j].green=(src[0][j-1].green+src[1][j-1].green+src[0][j].green+src[1][j].green+src[0][j+1].green+src[1][j+1].green)/6;
    dst[0][j].blue =(src[0][j-1].blue +src[1][j-1].blue +src[0][j].blue +src[1][j].blue +src[0][j+1].blue +src[1][j+1].blue )/6;
  }
  dst[0][dim-1].red  =(src[0][dim-2].red  +src[1][dim-2].red  +src[0][dim-1].red  +src[1][dim-1].red  )/4;
  dst[0][dim-1].green=(src[0][dim-2].green+src[1][dim-2].green+src[0][dim-1].green+src[1][dim-1].green)/4;
  dst[0][dim-1].blue =(src[0][dim-2].blue +src[1][dim-2].blue +src[0][dim-1].blue +src[1][dim-1].blue )/4;

  for( int i=1; i<dim-1; i++){
    dst[i][0].red  =(src[i-1][0].red  +src[i-1][1].red  +src[i][0].red  +src[i][1].red  +src[i+1][0].red  +src[i+1][1].red  )/6;
    dst[i][0].green=(src[i-1][0].green+src[i-1][1].green+src[i][0].green+src[i][1].green+src[i+1][0].green+src[i+1][1].green)/6;
    dst[i][0].blue =(src[i-1][0].blue +src[i-1][1].blue +src[i][0].blue +src[i][1].blue +src[i+1][0].blue +src[i+1][1].blue )/6;
    for( int j=1; j<dim; j++){
      dst[i][j].red  =(src[i-1][j-1].red  +src[i][j-1].red  +src[i+1][j-1].red  +src[i-1][j].red  +src[i][j].red  +src[i+1][j].red  +src[i-1][j+1].red  +src[i][j+1].red  +src[i+1][j+1].red  )/9;
      dst[i][j].green=(src[i-1][j-1].green+src[i][j-1].green+src[i+1][j-1].green+src[i-1][j].green+src[i][j].green+src[i+1][j].green+src[i-1][j+1].green+src[i][j+1].green+src[i+1][j+1].green)/9;
      dst[i][j].blue =(src[i-1][j-1].blue +src[i][j-1].blue +src[i+1][j-1].blue +src[i-1][j].blue +src[i][j].blue +src[i+1][j].blue +src[i-1][j+1].blue +src[i][j+1].blue +src[i+1][j+1].blue )/9;
    }
    dst[i][dim-1].red  =(src[i-1][dim-2].red  +src[i][dim-2].red  +src[i+1][dim-2].red  +src[i-1][dim-1].red  +src[i][dim-1].red  +src[i+1][dim-1].red  )/6;
    dst[i][dim-1].green=(src[i-1][dim-2].green+src[i][dim-2].green+src[i+1][dim-2].green+src[i-1][dim-1].green+src[i][dim-1].green+src[i+1][dim-1].green)/6;
    dst[i][dim-1].blue =(src[i-1][dim-2].blue +src[i][dim-2].blue +src[i+1][dim-2].blue +src[i-1][dim-1].blue +src[i][dim-1].blue +src[i+1][dim-1].blue )/6;
  }
  dst[dim-1][0].red  =(src[dim-2][0].red  +src[dim-2][1].red  +src[dim-1][0].red  +src[dim-1][1].red  )/4;
  dst[dim-1][0].green=(src[dim-2][0].green+src[dim-2][1].green+src[dim-1][0].green+src[dim-1][1].green)/4;
  dst[dim-1][0].blue =(src[dim-2][0].blue +src[dim-2][1].blue +src[dim-1][0].blue +src[dim-1][1].blue )/4;
  for( int j=1; j<dim; j++){
    dst[dim-1][j].red  =(src[dim-2][j-1].red  +src[dim-1][j-1].red  +src[dim-2][j].red  +src[dim-1][j].red  +src[dim-2][j+1].red  +src[dim-1][j+1].red  )/6;
    dst[dim-1][j].green=(src[dim-2][j-1].green+src[dim-1][j-1].green+src[dim-2][j].green+src[dim-1][j].green+src[dim-2][j+1].green+src[dim-1][j+1].green)/6;
    dst[dim-1][j].blue =(src[dim-2][j-1].blue +src[dim-1][j-1].blue +src[dim-2][j].blue +src[dim-1][j].blue +src[dim-2][j+1].blue +src[dim-1][j+1].blue )/6;
  }
  dst[dim-1][dim-1].red  =(src[dim-2][dim-2].red  +src[dim-1][dim-2].red  +src[dim-2][dim-1].red  +src[dim-1][dim-1].red  )/4;
  dst[dim-1][dim-1].green=(src[dim-2][dim-2].green+src[dim-1][dim-2].green+src[dim-2][dim-1].green+src[dim-1][dim-1].green)/4;
  dst[dim-1][dim-1].blue =(src[dim-2][dim-2].blue +src[dim-1][dim-2].blue +src[dim-2][dim-1].blue +src[dim-1][dim-1].blue )/4;
}

      

As I measured it is ~ 50% faster than the original code. The next step is to eliminate re-computation.

0


source







All Articles