Why increase the execution time in my openmp code?

I am trying to calculate the speedup of parallel simple code. This is a simple loop. I first use open-mp in C ++ to parallelize it. Then I want to find the execution time of each thread, I use the maximum thread time as the parallel execution time. I repeat this for different numbers, but the times are worse! Could you help me?

#include "stdafx.h"
#include "omp.h"
#include "conio.h"
double diftime[64];
int a,i,threadnum;
int main()
{
threadnum=2;
omp_set_nested(1);
omp_set_dynamic(0);
#pragma omp parallel num_threads(threadnum) 
{
    double start_time,end_time;
    int id = omp_get_thread_num();
    start_time=omp_get_wtime();
    #pragma omp for nowait schedule(static)
    for (i=0;i<2000000;i++){a++;}
    end_time=omp_get_wtime();
    diftime[id]=diftime[id]+(end_time-start_time);      
    printf("thread[%d] = %.32g\n",id,end_time-start_time);  
}
getch();
return 0;
}

      

+3


source to share


1 answer


The reason is that your loop operation is so simple that the compiler replaces the loop with the result a

after the loop. Take a look at this example:

#include <stdio.h>

int main()
{
   size_t i;
   unsigned a = 0;
   for (i = 0; i < (1UL << 20); i++) // the loop should run 1048576 times
      a++;
   printf("%ud\n", a);
   return 0;
}

      

But when we look at the generated commands through gcc -O2 -S test.c

, we find

_main:
LFB20:
   subq   $8, %rsp
LCFI0:
   movl   $1048576, %esi  # the loop is replaced by a value!
   xorl   %eax, %eax
   leaq   LC0(%rip), %rdi
   call   _printf
   xorl   %eax, %eax
   addq   $8, %rsp
LCFI1:
   ret

      



So, the reason your measured time goes up is because it takes longer to create and process more threads (which don't actually do anything).


If you want to force the compiler to create a loop, you have to make the variable in the loop volatile

, for example:

#include <stdio.h>

#include <omp.h>

double diftime[64];
int main()
{
   int i;
   unsigned a = 0;
#pragma omp parallel
   {
      double start_time, end_time;
      int id = omp_get_thread_num();
      start_time = omp_get_wtime();
      volatile int b = 0;  // #############################
#pragma omp for nowait schedule(static)
      for (i = 0; i < (1UL << 20); i++)
         b++;
      end_time = omp_get_wtime();
      diftime[id] = diftime[id] + (end_time - start_time);
      printf("thread[%d] = %.32g\n", id, end_time - start_time);
// ensure only one thread at a time executes the next line
#pragma omp critical  
      a += b;
   }
   printf("a = %d\n", a);
   return 0;
}

      

+3


source







All Articles