/* $Id: pi-wthreads.c 836 2006-06-15 09:27:17Z olau $ */

#include <windows.h>
#include <excpt.h>
#include <process.h>
#include <stdio.h>
#include <math.h>
#include "globaldefs.h"
#include "timer/timer.h"
#include "getopt/getopt.h"

#define DEFAULT_NUMTHREADS   (2)

#if defined(__x86_64) || defined(_M_IX64)
typedef __int64 affinitymask_t;
#else 
typedef __int32 affinitymask_t; 
#endif 

typedef struct _calcthreadparam
{
  CACHE_ALIGN int myrank;
  int size;
  long iterations;
} CALCTHREADPARAM;

typedef struct _calcthreadresult
{
  CACHE_ALIGN volatile double sum;
} CALCTHREADRESULT;

static HANDLE *tid;
static CALCTHREADPARAM *params;
static volatile CALCTHREADRESULT *subresult;

unsigned int __stdcall calcthread(void *params)
{
  long i;
  double x;
  double sum = 0.0;
  int myrank = ((CALCTHREADPARAM *) params)->myrank;
  int size = ((CALCTHREADPARAM *) params)->size;
  long iterations = ((CALCTHREADPARAM *) params)->iterations;
  double w = 1.0 / (double) iterations;
#ifdef DEBUG
  printf("Dies ist Thread %2d von %2d\n", myrank, size);
#endif
  for (i = (long) myrank + 1; i <= iterations; i += (long) size) {
    x = w * ((double)i - 0.5);
    sum += f(x);
  }
  subresult[myrank].sum = sum;
  _endthread();
  return  0; // wird nie ausgefhrt; existiert daher nur, um Compiler zufriedenstellen 
}

void printInfo(void)
{
  SYSTEM_INFO siSysInfo;
# include "compilerinfo.inc" 
	GetSystemInfo(&siSysInfo); 
	printf("Hardware information: \n");  
	printf("  OEM ID: %u\n", siSysInfo.dwOemId);
	printf("  Number of processors: %u\n", siSysInfo.dwNumberOfProcessors); 
	printf("  Page size: %u\n", siSysInfo.dwPageSize); 
	printf("  Processor type: %u\n", siSysInfo.dwProcessorType); 
	printf("  Active processor mask: 0x%x\n", siSysInfo.dwActiveProcessorMask); 
}

void usage(void)
{
  printf("Aufruf mit: pi-pthreads [-n <Anzahl Threads> | -m <ThreadAffinityMask>] [-i <Anzahl Iterationen>]\n");
}

int main(int argc, char *argv[])
{
  int i;
  int numThreads = DEFAULT_NUMTHREADS;
  long iterations = DEFAULT_ITERATIONS;
  double sum, pi;
  unsigned int threadID;
  LONGLONG duration;
  affinitymask_t *ThreadAffinityMask = NULL;
  affinitymask_t mask;
  char *maskptr, *maskptr_backup;
  int option;

  while ((option = getopt(argc, argv, "i:n:m:")) != -1) {
    switch (option) {
      case 'n':
        if (optarg)
          numThreads = atoi(optarg);
        break;
      case 'i':
        if (optarg)
          iterations = atol(optarg);
        break;
      case 'm': 
        if (optarg) {
          /* zhlen, wie viel Threads starten sollen */
          maskptr = optarg;
          maskptr_backup = maskptr;
          numThreads = 0;
          while (*maskptr != 0) {
            if (*maskptr == '1')
              ++numThreads;
            maskptr++;
          }
          if (numThreads > 0) {
            ThreadAffinityMask = (affinitymask_t *) malloc(numThreads * sizeof(affinitymask_t));
          }
          else {
            usage();
            exit(1);
          }
          /* Array der ThreadAffinityMasks fllen */
          mask = 1;
          maskptr = maskptr_backup;
          i = 0;
          while (*maskptr != 0) {
            if (*maskptr == '1')
              ThreadAffinityMask[i++] = mask;
            mask <<= 1;
            maskptr++;
          }
        }
        break;
      default:
        usage();
        exit(1);
        break;
    }
  }
  // printInfo();

  START();
  sum = 0.0;
  tid = (HANDLE *) malloc(numThreads * sizeof(HANDLE));
  params = (CALCTHREADPARAM *) _aligned_malloc(numThreads * sizeof(CALCTHREADPARAM), CACHE_LINE_SIZE);
  subresult = (CALCTHREADRESULT *) _aligned_malloc(numThreads * sizeof(CALCTHREADRESULT), CACHE_LINE_SIZE);

  /* Threads starten */
  for (i = 0; i < numThreads; i++) {
    params[i].myrank = i;
    params[i].size = numThreads;
    params[i].iterations = iterations;
    tid[i] = (HANDLE) _beginthreadex(NULL, 0, &calcthread, &params[i], CREATE_SUSPENDED, &threadID);
    if (ThreadAffinityMask != NULL)
      SetThreadAffinityMask(tid[i], ThreadAffinityMask[i]);
    ResumeThread(tid[i]); 
  }

  /* Ergebnisse der Threads einsammeln */
  WaitForMultipleObjects(numThreads, tid, TRUE, INFINITE);  
  for (i = 0; i < numThreads; i++)
    sum += subresult[i].sum;
  pi = 1.0 / iterations * sum;

  _aligned_free((void *) subresult);
  _aligned_free((void *) params);
  free((void *) tid);
  STOP(duration);

  printf("Ergebnis nach %d Iterationen, aufgeteilt auf %d Threads:\n",
         iterations, numThreads);
  printf("pi = %.13lf (Fehler: %.13lf)\n", pi, pi - PI);
  printf("Ausfuehrungsdauer: %lld ms\n\n", duration / 1000LL);
  if (ThreadAffinityMask != NULL)
    free((void *) ThreadAffinityMask);
  return 0;
}
