Maximum profit from one sale - Parallel version

Question

Maximum profit from one sale - Parallel version

I am trying to use the OpenMP API (or pthreads) to parallelize the following code. Its time complexity is O (n). I am wondering if it is possible to split an array of records into X

chunks ( X

= number of threads) and execute the process in parallel for each of them.

This is a very classic algorithm problem and I haven't seen anyone try to implement a parallelized version yet.

Important note . A simple shortcut does not solve this problem as I only read the array from left to right. So it's not so obvious to parallelize ...

 #include<stdio.h>

/* The function assumes that there are at least two
   elements in array.
   The function returns a negative value if the array is
   sorted in decreasing order.
   Returns 0 if elements are equal  */
int maxDiff(int arr[], int arr_size)
{
  int max_diff = arr[1] - arr[0];
  int min_element = arr[0];
  int i;
  for(i = 1; i < arr_size; i++)
  {       
    if(arr[i] - min_element > max_diff)                               
      max_diff = arr[i] - min_element;
    if(arr[i] < min_element)
         min_element = arr[i];                     
  }
  return max_diff;
}

+3

algorithm pthreads parallel-processing openmp

user1197918 09 oct. 14 at 12:06

source to share

2 answers

I'm not sure about OpenMP in particular, but here's an associative operator for the problem that lends itself to parallelism.

struct intermediate {
    int min_elem;
    int max_elem;
    int max_diff;
};

Prepare a list of singletons using this function.

struct intermediate singleton(int x) {
    return (struct intermediate){x, x, INT_MIN};
}

Combine two adjacent intermediates using this function.

struct intermediate combine(struct intermediate a, struct intermediate b) {
    return (struct intermediate){min(a.min_elem, b.min_elem),
                                 max(a.max_elem, b.max_elem),
                                 max(max(a.max_diff, b.max_diff),
                                     b.max_elem - a.min_elem)};
}

One possible evaluation strategy can be selected.

        C
       / \
      C   \
     / \   \
    /   \   \
   /     \   \
  C       C   \
 / \     / \   \
S   S   S   S   S
|   |   |   |   |
0   1   2   3   4

Here C

means union, and S

means singleton. Since union is associative, any binary tree will do. Here's another strategy.

        C
       / \
      /   \
     /     \
    /       C
   /       / \
  C       /   C
 / \     /   / \
S   S   S   S   S
|   |   |   |   |
0   1   2   3   4

+1

David Eisenstat 09 oct. 14 at 13:39

source to share

Jonathan Dursi · Accepted Answer · 2014-10-09T17:00:10+0000

Due to data dependencies and low computational requirements, this is unlikely to ever give you a lot of speedups in multi-core mode - however you can do something by calculating in each block of the array the local min, max, and local best, and then compare that via pieces. Because of this last step is performed in O (N) + O (P ² ) of time, which further limits the scalability.

#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <sys/time.h>
#include <limits.h>
#include <omp.h>

void tick(struct timeval *t);
double tock(const struct timeval * const t);

unsigned int maxDiff(const int * const arr, const int arr_size)
{
  int max_diff = arr[1] - arr[0];
  int min_element = arr[0];
  int i;
  for(i = 1; i < arr_size; i++)
  {
    if(arr[i] - min_element > max_diff)
      max_diff = arr[i] - min_element;
    if(arr[i] < min_element)
         min_element = arr[i];
  }
  return max_diff;
}

unsigned int ompMaxDiff(const int * const arr, const int arr_size)
{
  int nthreads=omp_get_max_threads();
  int maxes[nthreads];
  int mins [nthreads];
  unsigned int best = 0;

  for (int i=0; i<nthreads; i++) {
    mins [i] = INT_MAX;
    maxes[i] = INT_MIN;
  }

  #pragma omp parallel num_threads(nthreads) default(none) shared(mins, maxes) reduction(max:best) 
  {
      int idx = omp_get_thread_num();
      int min = INT_MAX, max = INT_MIN;

      #pragma omp for schedule(static) 
      for(int i=0; i<arr_size; i++) {
        if (arr[i] < min) min=arr[i];
        if (arr[i] > max) max=arr[i];
        if ((arr[i] - min) > best) best = arr[i] - min;
      }

      mins [idx] = min;
      maxes[idx] = max;
  }

  for (int i=0; i<nthreads-1; i++)
    for (int j=i+1; j<nthreads; j++)
        if ((maxes[j] - mins[i]) > best) best = maxes[j]-mins[i];

  return best;
}

int main(int argc, char **argv) {
    const int nitems=1000000;
    int *data = malloc(nitems*sizeof(int));

    srand(time(NULL));
    for (int i=0; i<nitems; i++)
        data[i] = rand() % 500;    /* numbers between 0 and 500 */


    data[(nitems/2)+1] = -700;
    data[(nitems/2)]   = 700;      /* a trick! shouldn't get 1400, */
                                   /* should get <= 1200 */

    struct timeval start;
    tick(&start);
    unsigned int res = maxDiff(data, nitems);
    double restime = tock(&start);

    printf("Serial: answer = %u, time = %lf\n", res, restime);

    tick(&start);
    res = ompMaxDiff(data, nitems);
    restime = tock(&start);

    printf("OpenMP: answer = %u, time = %lf\n", res, restime);

    free(data);

    return 0;
}

void tick(struct timeval *t) {
    gettimeofday(t, NULL);
}

double tock(const struct timeval * const t) {
    struct timeval now;
    gettimeofday(&now, NULL);
    return (double)(now.tv_sec - t->tv_sec) + ((double)(now.tv_usec - t->tv_usec)/1000000.);
}

Running on 8 cores gives:

$ gcc -fopenmp -O3 -Wall -std=c11 maxdiff.c -o maxdiff
$ ./maxdiff 
Serial: answer = 1199, time = 0.001760
OpenMP: answer = 1199, time = 0.000488

Maximum profit from one sale - Parallel version

More articles: