/* $Id: pi-pthreads.c 818 2006-06-09 12:13:00Z olau $ */

#include <stdio.h>
#include <pthread.h>
#include <math.h>

#ifdef WIN32
#include <windows.h>
#include "getopt/getopt.h"
#else
#include <getopt.h>
#include <malloc.h>
/* posix_memalign() importieren */
#define __USE_XOPEN2K
#include <stdlib.h>
#endif

#include "globaldefs.h"
#include "timer/timer.h"

#define DEFAULT_NUMTHREADS   (2)
#define DEFAULT_ITERATIONS   (100000000)

typedef struct _calcthreadparam
{
  CACHE_ALIGN int myrank;
  int size;
  int iterations;
} CALCTHREADPARAM;

typedef struct _calcthreadresult
{
  CACHE_ALIGN volatile double sum;
} CALCTHREADRESULT;

static pthread_t *tid;
static CALCTHREADPARAM *params;
static CALCTHREADRESULT *subresult;

void *calcthread(void *params)
{
  int i;
  double x;
  double sum = 0.0;
  int myrank = ((CALCTHREADPARAM *) params)->myrank;
  int size = ((CALCTHREADPARAM *) params)->size;
  int iterations = ((CALCTHREADPARAM *) params)->iterations;
  double w = 1.0 / (double) iterations;
#ifdef DEBUG
  printf("Dies ist Thread %2d von %2d\n", myrank, size);
#endif
  for (i = myrank + 1; i <= iterations; i += size) {
    x = w * ((double)i - 0.5);
    sum += f(x);
  }
  subresult[myrank].sum = sum;
  pthread_exit((void *) 0);
  return (void *) 0; /* wird nie ausgefhrt; existiert daher nur, um Compiler zufriedenstellen */
}

void usage(void)
{
  printf("Aufruf mit: pi-pthreads [-n <Anzahl Threads>] [-i <Anzahl Iterationen>]\n");
}

int main(int argc, char *argv[])
{
  int i;
  int numThreads = DEFAULT_NUMTHREADS;
  int iterations = DEFAULT_ITERATIONS;
  double sum, pi;
  pthread_attr_t attr;
  LONGLONG duration;
  int option;
  while ((option = getopt(argc, argv, "i:n:?")) != -1) {
    switch (option) {
      case 'n':
        if (optarg)
          numThreads = atoi(optarg);
        break;
      case 'i':
        if (optarg)
          iterations = atoi(optarg);
        break;
      case '?':
        usage();
        break;
      default:
        /* ignore */
        break;
    }
  }

  START();
  sum = 0.0;
  pthread_attr_init(&attr);
  tid = (pthread_t *) malloc(numThreads * sizeof(pthread_t));
#ifdef WIN32
  params = (CALCTHREADPARAM *) _aligned_malloc(numThreads * sizeof(CALCTHREADPARAM), CACHE_LINE_SIZE);
  subresult = (CALCTHREADRESULT *) _aligned_malloc(numThreads * sizeof(CALCTHREADRESULT), CACHE_LINE_SIZE);
#else
  posix_memalign((void *) &params, CACHE_LINE_SIZE, numThreads * sizeof(CALCTHREADPARAM));
  posix_memalign((void *) &subresult, CACHE_LINE_SIZE, numThreads * sizeof(CALCTHREADRESULT));
#endif

  /* Threads starten */
  for (i = 0; i < numThreads; i++) {
    params[i].myrank = i;
    params[i].size = numThreads;
    params[i].iterations = iterations;
    pthread_create(&tid[i], &attr, calcthread, (void *) &params[i]);
  }
  pthread_attr_destroy(&attr);

  /* Ergebnisse der Threads einsammeln */
  for (i = 0; i < numThreads; i++) {
    pthread_join(tid[i], 0);
    sum += subresult[i].sum;
  }
  pi = 1.0 / (double) iterations * sum;

#ifdef WIN32
  _aligned_free((void *) subresult);
  _aligned_free((void *) params);
#else
  free((void *) subresult);
  free((void *) params);
#endif
  free((void *) tid);
  STOP(duration);

  printf("Ergebnis nach %d Iterationen, aufgeteilt auf %d Threads:\n",
	 iterations, numThreads);
  printf("pi = %.13lf (Fehler: %.13lf)\n", pi, pi - PI);
  printf("Ausfuehrungsdauer: %lld ms\n\n", duration / 1000LL);
  pthread_exit(NULL);
  return 0;
}
