// syscomp, 13.03.94, c't/ps
//
// if defined syscomp just displays some state information
//
#define QUIET           1
//
// name of file into which the results are written
//
#define RESFILENAME     "SYSCOMP.RES"

//
// NT only:
//
// for multiple threads number of THREADS has to be defined
// additionally MT has to be defined for synchronisation
// if it is not defined, no sync takes place
//
#define THREADS 1
//#define MT 1		// define for multiple threads !

// fr den Georg:
// #define GS 1

// only needed for synchronisation
#ifdef MT
#include <windows.h>

HANDLE  hStartEvent;	// event for syncronized start
HANDLE  hInitEvent;     // dito for initialisation

#define START_EVENT     "startthreads"	// Win32 needs names for objects 
#define INIT_EVENT      "initserialize" 

//
// these macros are defined anyway (here the multi thread version)
//
#define START_SYNC      SetEvent( hInitEvent);  \
                        WaitForSingleObject( hStartEvent, INFINITE);
#define STOP_SYNC       ;	// do nothing in this case

#else   

//
// no synchronisation takes place:
//
#define START_SYNC      GetTimeStamp( &tsStart);
#define STOP_SYNC       GetTimeStamp( &tsStop); \
						p-> dwRunTime= StampDifference( &tsStart, &tsStop);

#endif

//
// standard includes
// 
#include <assert.h>
#include <stdlib.h>
#include <stdio.h>
#include <math.h>

//
// hack for acorn archimedes
//
// #define ACORN
#ifdef ACORN
#include "pso_h"
#include "pst_h"
#else
#include "psosdep.h"
#include "pstimer.h"
#endif

// 
// hack for 16 bit windows
// 
#if defined (DOS_16) || defined(WIN_16)
#include <malloc.h>
#endif

#define VERINFO "SPAC-Suite (c) c't 94, %s from %s, %s", __FILE__, __DATE__, __TIME__

//
// syscomp uses 64 bit floating point type
//
#define FPVAR   double


//
// internal structures & prototypes
//

// has to be thread safe 
typedef struct tagBenchFrame
{
        DWORD   dwDataItems;
        DWORD   dwIterations;
} BenchFrame;
typedef BenchFrame *pBenchFrame;

typedef struct tagBenchDataSet
{
        void    *pBaseData;
        void    *pResultData;
} BenchDataSet;
typedef BenchDataSet *pBenchDataSet;

typedef struct tagBenchPara
{
        DWORD                   dwRunTime;
        DWORD                   dwOpsms;
        pBenchDataSet   pDataSet;
        pBenchFrame             pFrame;
} BenchPara;
typedef BenchPara *pBenchPara;

typedef void BenchFunc( pBenchPara);
typedef BenchFunc *pBenchFunc;

typedef struct tagBenchEntry
{
        pBenchFunc      pBench;
        DWORD           dwGroup;
        DWORD           dwWeight;
} BenchEntry;

// 
// global data sets used for benchmarking
// 
void    *pBaseData;
void    *pResultData;

// 
// random number generator by knuth and stiller
// 
#define RANDBASE WORD

DWORD           j, k;
RANDBASE        y [56];

// init
void Randomize( void)
{       WORD    i;

        for (i= 0; i < 55; i++)
                y [i]= i* 1127;
        j= 24; k= 55;
}

// distributor
RANDBASE BaseRand( void)
{       
        RANDBASE ret;
        
        y [k]+= y[j];
		ret= y [k];
        j= (j==0)?55:j-1;
        k= (k==0)?55:k-1;
        return ret;
}

//
// hack for 16 bit dos or windows to get correct integer size 
//
#if defined (DOS_16) || defined(WIN_16)
#define INT signed long int
#else
#define INT signed int
#endif

// create random 32 bit integer
INT IntRand( void)
{
        return (INT)(( DWORD)BaseRand()+ (( DWORD)BaseRand() << (DWORD)16));
}

// init integer data set
void IntInitData( INT *pBase, INT *pResult, DWORD dwDataItems)
{
        Randomize();    
        do      
        {       
                *(pBase++)= IntRand();
                *(pResult++)= IntRand();
        }
        while ( --dwDataItems > 0);
}

// try to get a feeling of how much overhead is involved 
void IntNse( pBenchPara p)
{
        INT         *pBase= p-> pDataSet-> pBaseData;
        INT         *pResult= p-> pDataSet-> pResultData;
#ifndef MT
        TIME_STAMP	tsStart, tsStop;
#endif
        DWORD       dwLoop;
        DWORD       dwDataItems= p-> pFrame-> dwDataItems;
        DWORD       dwIterations= p-> pFrame-> dwIterations/ 2;

        IntInitData( pBase, pResult, dwDataItems);
        START_SYNC
        do
        {
                for ( dwLoop= 0; dwLoop < dwDataItems; dwLoop+= 4)
                {
                        *(pResult++)= *(pBase++);
                        *(pResult++)= *(pBase++);
                        *(pResult++)= *(pBase++);
                        *(pResult++)= *(pBase++);
                }
                for ( dwLoop= 0; dwLoop < dwDataItems; dwLoop+= 4)
                {
                        *(--pResult)= *(--pBase);
                        *(--pResult)= *(--pBase);
                        *(--pResult)= *(--pBase);
                        *(--pResult)= *(--pBase);
                }
        }
        while ( --( dwIterations) > 0);
        STOP_SYNC
}

// addition
void IntAdd( pBenchPara p)
{
        INT         *pBase= p-> pDataSet-> pBaseData;
        INT         *pResult= p-> pDataSet-> pResultData;
#ifndef MT
        TIME_STAMP	tsStart, tsStop;
#endif
        DWORD       dwLoop;
        DWORD       dwDataItems= p-> pFrame-> dwDataItems;
        DWORD       dwIterations= p-> pFrame-> dwIterations/ 2;
        INT         iTemp= *pBase;

        IntInitData( pBase, pResult, dwDataItems);
        START_SYNC
        do      
        {
                for ( dwLoop= 0; dwLoop < dwDataItems; dwLoop+= 4)
                {
                        *(pResult++)= *(pBase++)+ iTemp;
                        *(pResult++)= *(pBase++)+ iTemp;
                        *(pResult++)= *(pBase++)+ iTemp;
                        *(pResult++)= *(pBase++)+ iTemp;
                }
                for ( dwLoop= 0; dwLoop < dwDataItems; dwLoop+= 4)
                {
                        *(--pResult)= *(--pBase)+ iTemp;
                        *(--pResult)= *(--pBase)+ iTemp;
                        *(--pResult)= *(--pBase)+ iTemp;
                        *(--pResult)= *(--pBase)+ iTemp;
                }
        }
        while ( --( dwIterations) > 0);
        STOP_SYNC
}

// mul
void IntMul( pBenchPara p)
{
        INT                     *pBase= p-> pDataSet-> pBaseData;
        INT                     *pResult= p-> pDataSet-> pResultData;
#ifndef MT
        TIME_STAMP      tsStart, tsStop;
#endif
        DWORD           dwLoop;
        DWORD           dwDataItems= p-> pFrame-> dwDataItems;
        DWORD           dwIterations= p-> pFrame-> dwIterations/ 2;
        INT                     iTemp= *pBase;

        IntInitData( pBase, pResult, dwDataItems);
        START_SYNC
        do      
        {
                for ( dwLoop= 0; dwLoop < dwDataItems; dwLoop+= 4)
                {
                        *(pResult++)= *(pBase++)* iTemp;
                        *(pResult++)= *(pBase++)* iTemp;
                        *(pResult++)= *(pBase++)* iTemp;
                        *(pResult++)= *(pBase++)* iTemp;
                }
                for ( dwLoop= 0; dwLoop < dwDataItems; dwLoop+= 4)
                {
                        *(--pResult)= *(--pBase)* iTemp;
                        *(--pResult)= *(--pBase)* iTemp;
                        *(--pResult)= *(--pBase)* iTemp;
                        *(--pResult)= *(--pBase)* iTemp;
                }
        }
        while ( --( dwIterations) > 0);
        STOP_SYNC
}

// div
void IntDiv( pBenchPara p)
{
        INT                     *pBase= p-> pDataSet-> pBaseData;
        INT                     *pResult= p-> pDataSet-> pResultData;
#ifndef MT
        TIME_STAMP      tsStart, tsStop;
#endif
        DWORD           dwLoop;
        DWORD           dwDataItems= p-> pFrame-> dwDataItems;
        DWORD           dwIterations= p-> pFrame-> dwIterations/ 2;
        INT                     iTemp= *pBase;

        IntInitData( pBase, pResult, dwDataItems);
        START_SYNC
        do      
        {
                for ( dwLoop= 0; dwLoop < dwDataItems; dwLoop+= 4)
                {
                        *(pResult++)= *(pBase++)/ iTemp;
                        *(pResult++)= *(pBase++)/ iTemp;
                        *(pResult++)= *(pBase++)/ iTemp;
                        *(pResult++)= *(pBase++)/ iTemp;
                }
                for ( dwLoop= 0; dwLoop < dwDataItems; dwLoop+= 4)
                {
                        *(--pResult)= *(--pBase)/ iTemp;
                        *(--pResult)= *(--pBase)/ iTemp;
                        *(--pResult)= *(--pBase)/ iTemp;
                        *(--pResult)= *(--pBase)/ iTemp;
                }

        }
        while ( --( dwIterations) > 0);
        STOP_SYNC
}

// floating point 

#define FP double

FP FPRand( void)
{
        return (FP)(( FP)BaseRand()+ ( FP)BaseRand()/ (FP)10000);
}

void FPInitData( FP *pBase, FP *pResult, DWORD dwDataItems)
{
        Randomize();    
        do      
        {       
                *( pBase++)= FPRand();
                *( pResult++)= FPRand();
        }
        while ( --dwDataItems > 0);
}

void FPNse( pBenchPara p)
{
        FP                      *pBase= p-> pDataSet-> pBaseData;
        FP                      *pResult= p-> pDataSet-> pResultData;
#ifndef MT
        TIME_STAMP      tsStart, tsStop;
#endif
        DWORD           dwLoop;
        DWORD           dwDataItems= p-> pFrame-> dwDataItems;
        DWORD           dwIterations= p-> pFrame-> dwIterations/ 2;

        FPInitData( pBase, pResult, dwDataItems);
        START_SYNC
        do
        {
                for ( dwLoop= 0; dwLoop < dwDataItems; dwLoop+= 4)
                {
                        *(pResult++)= *(pBase++);
                        *(pResult++)= *(pBase++);
                        *(pResult++)= *(pBase++);
                        *(pResult++)= *(pBase++);
                }
                for ( dwLoop= 0; dwLoop < dwDataItems; dwLoop+= 4)
                {
                        *(--pResult)= *(--pBase);
                        *(--pResult)= *(--pBase);
                        *(--pResult)= *(--pBase);
                        *(--pResult)= *(--pBase);
                }
        }
        while ( --( dwIterations) > 0);
        STOP_SYNC
}

void FPAdd( pBenchPara p)
{
        FP                      *pBase= p-> pDataSet-> pBaseData;
        FP                      *pResult= p-> pDataSet-> pResultData;
#ifndef MT
        TIME_STAMP      tsStart, tsStop;
#endif
        DWORD           dwLoop;
        DWORD           dwDataItems= p-> pFrame-> dwDataItems;
        DWORD           dwIterations= p-> pFrame-> dwIterations/ 2;
        FP                      fpTemp= *pBase;

        FPInitData( pBase, pResult, dwDataItems);
        START_SYNC
        do
        {
                for ( dwLoop= 0; dwLoop < dwDataItems; dwLoop+= 4)
                {
                        *(pResult++)= *(pBase++)+ fpTemp;
                        *(pResult++)= *(pBase++)+ fpTemp;
                        *(pResult++)= *(pBase++)+ fpTemp;
                        *(pResult++)= *(pBase++)+ fpTemp;
                }
                for ( dwLoop= 0; dwLoop < dwDataItems; dwLoop+= 4)
                {
                        *(--pResult)= *(--pBase)+ fpTemp;
                        *(--pResult)= *(--pBase)+ fpTemp;
                        *(--pResult)= *(--pBase)+ fpTemp;
                        *(--pResult)= *(--pBase)+ fpTemp;
                }
        }
        while ( --( dwIterations) > 0);
        STOP_SYNC
}

void FPMul( pBenchPara p)
{
        FP                      *pBase= p-> pDataSet-> pBaseData;
        FP                      *pResult= p-> pDataSet-> pResultData;
#ifndef MT
        TIME_STAMP      tsStart, tsStop;
#endif
        DWORD           dwLoop;
        DWORD           dwDataItems= p-> pFrame-> dwDataItems;
        DWORD           dwIterations= p-> pFrame-> dwIterations/ 2;
        FP                      fpTemp= *(pBase);

        FPInitData( pBase, pResult, dwDataItems);
        START_SYNC
        do
        {
                for ( dwLoop= 0; dwLoop < dwDataItems; dwLoop+= 4)
                {
                        *(pResult++)= *(pBase++)* fpTemp;
                        *(pResult++)= *(pBase++)* fpTemp;
                        *(pResult++)= *(pBase++)* fpTemp;
                        *(pResult++)= *(pBase++)* fpTemp;
                }
                for ( dwLoop= 0; dwLoop < dwDataItems; dwLoop+= 4)
                {
                        *(--pResult)= *(--pBase)* fpTemp;
                        *(--pResult)= *(--pBase)* fpTemp;
                        *(--pResult)= *(--pBase)* fpTemp;
                        *(--pResult)= *(--pBase)* fpTemp;
                }
        }
        while ( --( dwIterations) > 0);
        STOP_SYNC
}

void FPDiv( pBenchPara p)
{
        FP                      *pBase= p-> pDataSet-> pBaseData;
        FP                      *pResult= p-> pDataSet-> pResultData;
#ifndef MT
        TIME_STAMP      tsStart, tsStop;
#endif
        DWORD           dwLoop;
        DWORD           dwDataItems= p-> pFrame-> dwDataItems;
        DWORD           dwIterations= p-> pFrame-> dwIterations/ 2;
        FP                      fpTemp= *(pBase);

        FPInitData( pBase, pResult, dwDataItems);
        START_SYNC
        do
        {
                for ( dwLoop= 0; dwLoop < dwDataItems; dwLoop+= 4)
                {
                        *(pResult++)= *(pBase++)/ fpTemp;
                        *(pResult++)= *(pBase++)/ fpTemp;
                        *(pResult++)= *(pBase++)/ fpTemp;
                        *(pResult++)= *(pBase++)/ fpTemp;
                }
                for ( dwLoop= 0; dwLoop < dwDataItems; dwLoop+= 4)
                {
                        *(--pResult)= *(--pBase)/ fpTemp;
                        *(--pResult)= *(--pBase)/ fpTemp;
                        *(--pResult)= *(--pBase)/ fpTemp;
                        *(--pResult)= *(--pBase)/ fpTemp;
                }
        }
        while ( --( dwIterations) > 0);
        STOP_SYNC
}

void FPLn( pBenchPara p)
{
        FP                      *pBase= p-> pDataSet-> pBaseData;
        FP                      *pResult= p-> pDataSet-> pResultData;
#ifndef MT
        TIME_STAMP      tsStart, tsStop;
#endif
        DWORD           dwLoop;
        DWORD           dwDataItems= p-> pFrame-> dwDataItems;
        DWORD           dwIterations= p-> pFrame-> dwIterations/ 2;

        FPInitData( pBase, pResult, dwDataItems);
        START_SYNC
        do
        {
                for ( dwLoop= 0; dwLoop < dwDataItems; dwLoop+= 4)
                {
                        *(pResult++)= log( *(pBase++));
                        *(pResult++)= log( *(pBase++));
                        *(pResult++)= log( *(pBase++));
                        *(pResult++)= log( *(pBase++));
                }
                for ( dwLoop= 0; dwLoop < dwDataItems; dwLoop+= 4)
                {
                        *(--pResult)= log( *(--pBase));
                        *(--pResult)= log( *(--pBase));
                        *(--pResult)= log( *(--pBase));
                        *(--pResult)= log( *(--pBase));
                }

        }
        while ( --( dwIterations) > 0);
        STOP_SYNC
}

void FPTan( pBenchPara p)
{
        FP                      *pBase= p-> pDataSet-> pBaseData;
        FP                      *pResult= p-> pDataSet-> pResultData;
#ifndef MT
        TIME_STAMP      tsStart, tsStop;
#endif
        DWORD           dwLoop;
        DWORD           dwDataItems= p-> pFrame-> dwDataItems;
        DWORD           dwIterations= p-> pFrame-> dwIterations/ 2;

        FPInitData( pBase, pResult, dwDataItems);
        START_SYNC
        do
        {
                for ( dwLoop= 0; dwLoop < dwDataItems; dwLoop+= 4)
                {
                        *(pResult++)= tan( *(pBase++));
                        *(pResult++)= tan( *(pBase++));
                        *(pResult++)= tan( *(pBase++));
                        *(pResult++)= tan( *(pBase++));
                }
                for ( dwLoop= 0; dwLoop < dwDataItems; dwLoop+= 4)
                {
                        *(--pResult)= tan( *(--pBase));
                        *(--pResult)= tan( *(--pBase));
                        *(--pResult)= tan( *(--pBase));
                        *(--pResult)= tan( *(--pBase));
                }
        }
        while ( --( dwIterations) > 0);
        STOP_SYNC
}               

void FPSqrt( pBenchPara p)
{
        FP                      *pBase= p-> pDataSet-> pBaseData;
        FP                      *pResult= p-> pDataSet-> pResultData;
#ifndef MT
        TIME_STAMP      tsStart, tsStop;
#endif
        DWORD           dwLoop;
        DWORD           dwDataItems= p-> pFrame-> dwDataItems;
        DWORD           dwIterations= p-> pFrame-> dwIterations/ 2;

        FPInitData( pBase, pResult, dwDataItems);
        START_SYNC
        do
        {
                for ( dwLoop= 0; dwLoop < dwDataItems; dwLoop+= 4)
                {
                        *(pResult++)= sqrt( *(pBase++));
                        *(pResult++)= sqrt( *(pBase++));
                        *(pResult++)= sqrt( *(pBase++));
                        *(pResult++)= sqrt( *(pBase++));
                }
                for ( dwLoop= 0; dwLoop < dwDataItems; dwLoop+= 4)
                {
                        *(--pResult)= sqrt(*(--pBase));
                        *(--pResult)= sqrt(*(--pBase));
                        *(--pResult)= sqrt(*(--pBase));
                        *(--pResult)= sqrt(*(--pBase));
                }
        }
        while ( --( dwIterations) > 0);
        STOP_SYNC
}

// to be more precise every run is repeated SAMPLE times
#define SAMPLES 4

// minimum run time (MINTIME) for each function in ms 
#define MINTIME 250

// each run is repeated SAMPLE times for each DataSet size
#if defined (DOS_16) || defined(WIN_16)	// data sets have to be small there
#define RUNS            2
DWORD   DataSets [RUNS]= { 2, 16};		// size of each data set (because syscomp
										// uses two sets, total memory used is
										// twice kbytes
#else
#define RUNS            11				// number of sets defined
DWORD   DataSets [RUNS]= { 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024};
#endif

// syscomp ratings are done in two groups: integer and floating point
#define GROUPS          2
#define GRP_INT         0
#define GRP_FP          1

DWORD   DataSizes [GROUPS]=
        { sizeof( INT),
          sizeof( FP)};

// number of functions used
#define TOTALFUNCS      11

//
// the rating of each single function depends on the following synthetic scale
//
BenchEntry      Benches [TOTALFUNCS]=	
	{{ &IntNse, GRP_INT, 0},	// nothing
	 { &IntAdd, GRP_INT,20},	// int add two times of int mul
	 { &IntMul, GRP_INT,10},	// int mul half of int add
	 { &IntDiv, GRP_INT, 5},	// int div half of int mul
	 { &FPNse,  GRP_FP, 0},
	 { &FPAdd,  GRP_FP,20},		// see above
	 { &FPMul,  GRP_FP,10},
	 { &FPDiv,  GRP_FP, 5},
	 { &FPLn,   GRP_FP, 1},
	 { &FPTan,  GRP_FP, 1},
	 { &FPSqrt, GRP_FP, 1}}; 

// ???
BenchFrame		BenchFrames  [TOTALFUNCS][RUNS];
// ???
BenchDataSet    BenchDataSets [THREADS];        
// ???
BenchPara       Results [TOTALFUNCS][RUNS][SAMPLES];

// allocate memory for data sets
void AllocThreadData( pBenchDataSet     pDataSet)
{
        DWORD   dwRunPtr;
        DWORD   dwMaxDataSize= 0;

        // calculate maximum data set size
        for ( dwRunPtr= 0; dwRunPtr < RUNS; dwRunPtr++)
                if (( DataSets [dwRunPtr]* 1024) > dwMaxDataSize)
                        dwMaxDataSize= DataSets [dwRunPtr]* 1024;
        assert( dwMaxDataSize > 0);

        // allocation has to be done twice (base and result set)
		
#if defined (DOS_16) || defined(WIN_16) // 16 bit platforms need special care
        pDataSet-> pBaseData= halloc( dwMaxDataSize, (size_t)sizeof( char));
#else
        pDataSet-> pBaseData= malloc( dwMaxDataSize);
#endif
        assert( ( pDataSet-> pBaseData) != NULL);

#if defined (DOS_16) || defined(WIN_16) // 16 bit platforms need special care
        pDataSet-> pResultData= halloc( dwMaxDataSize, (size_t)sizeof( char));
#else
        pDataSet-> pResultData= malloc( dwMaxDataSize);
#endif
        assert( ( pDataSet-> pResultData) != NULL);
}

// free memory
void FreeThreadData( pBenchDataSet pDataSet)
{
#if defined (DOS_16) || defined(WIN_16) // 16 bit platforms need special care
        hfree( pDataSet-> pBaseData);
        hfree( pDataSet-> pResultData);
#else
        free( pDataSet-> pBaseData);
        free( pDataSet-> pResultData);
#endif
}

//
// additional global data is needed for multiple threads
//
#ifdef MT

HANDLE  hThreads [THREADS];     // win32 thread handles

// initialisation performed to get multi thread support setup

void CThreadInit( void)
{
        // create non signaled event to start threads
        hStartEvent= CreateEvent( NULL, TRUE, FALSE, START_EVENT);
        assert( hStartEvent != NULL);
        // create non signaled event to sync after initializing
        hInitEvent= CreateEvent( NULL, TRUE, FALSE, INIT_EVENT);
        assert( hInitEvent != NULL);
}

// free resources created on init
void CThreadExit( void)
{
        assert( CloseHandle( hStartEvent) != FALSE);
        assert( CloseHandle( hInitEvent) != FALSE);
}

// reset start event
void CThreadBlockAll( void)
{
        // set to non signaled
        assert( ResetEvent( hStartEvent) != FALSE);
}

// signal start event to start threads
void CThreadUnblockAll( void)
{
        // Event in "signalisiert" Status versetzen
        assert ( SetEvent( hStartEvent) != FALSE);
}

// wait for all threads to terminate 
void CThreadWaitToTerminate( void)
{
        // can wait for a maximum of 64 threads
		assert( WaitForMultipleObjects( THREADS, (HANDLE*)&hThreads, TRUE, INFINITE) != WAIT_FAILED);
}

// create thread for bench function with given parameters
HANDLE CThreadCreate( pBenchFunc pFunc, pBenchPara pPara)
{
        DWORD   idThread;
        HANDLE  hTemp;

        // reset init event
        assert( ResetEvent( hInitEvent) != FALSE);

        // create thread and let him run
        hTemp= CreateThread( NULL, 0, (LPTHREAD_START_ROUTINE)pFunc, (LPVOID)pPara, 0, &idThread);
        assert( hTemp != NULL);

        // wait until thread signals init event (ready with initialisation)
        assert ( WAIT_FAILED != WaitForSingleObject( hInitEvent, INFINITE));
        return hTemp;
}

// close all thread resources
void CThreadKillThreads()
{
        DWORD   dwThread;

        for ( dwThread= 0; dwThread < THREADS; dwThread++)
                assert( FALSE != CloseHandle( hThreads [dwThread]));
}
#endif

// run bench functions
void RunThem( FILE *fResFile)
{
#ifdef MT
        TIME_STAMP      tsStart, tsStop;
#endif
        DWORD           dwFuncPtr, dwRunPtr, dwSample;
        DWORD           dwThread;
                
        // each thread needs its own data sets
        for ( dwThread= 0; dwThread < THREADS; dwThread++)
                AllocThreadData( &( BenchDataSets [dwThread]));

        // run them as declared in bench array
        for ( dwRunPtr= 0; dwRunPtr < RUNS; dwRunPtr++)
			for ( dwFuncPtr= 0; dwFuncPtr < TOTALFUNCS; dwFuncPtr++)
            {
#ifndef QUIET
				fprintf( fResFile, "Lauf "DW(1)" ("DW(1)") - Funktion "DW(2)" ("DW(2)") - Kalibrierung     \b\b\b\b\b", 
                         dwRunPtr, (DWORD)( RUNS- 1), dwFuncPtr, (DWORD)(TOTALFUNCS- 1));
#endif
#if defined ( GS) && defined( QUIET)
                fprintf( stdout, "Lauf "DW(1)" ("DW(1)") - Funktion "DW(2)" ("DW(2)") - Kalibrierung     \b\b\b\b\b", 
						 dwRunPtr, (DWORD)( RUNS- 1), dwFuncPtr, (DWORD)(TOTALFUNCS- 1));
#endif
                // set up run info
				BenchFrames [dwFuncPtr][dwRunPtr]. dwIterations= 1;
                BenchFrames [dwFuncPtr][dwRunPtr]. dwDataItems= 
					DataSets [dwRunPtr]* 1024/ DataSizes [ Benches [dwFuncPtr]. dwGroup];

                // setup pointers to data set and paramters 
                for ( dwSample= 0; dwSample < SAMPLES; dwSample++)
                {
					Results [dwFuncPtr][dwRunPtr][dwSample]. pFrame= 
						&( BenchFrames [dwFuncPtr][dwRunPtr]);
                    Results [dwFuncPtr][dwRunPtr][dwSample]. pDataSet= 
						&( BenchDataSets [0]);
                }

                //
				// first there is one run for calibration:
				//

				// get current run time and adopt iterations on MINTIME
                do
                {
#ifdef MT			// no syncronisation done thereby
                    CThreadUnblockAll();
#endif
                    // double iterations
                    BenchFrames [dwFuncPtr][dwRunPtr]. dwIterations<<= 1;
                    assert( BenchFrames [dwFuncPtr][dwRunPtr]. dwIterations != 1);
#ifdef MT
                    GetTimeStamp( &tsStart);	// get start time
#endif
                    Benches [dwFuncPtr]. pBench( &( Results [dwFuncPtr][dwRunPtr][0]));
#ifdef MT               
                    GetTimeStamp( &tsStop);		// get stop time
#endif
                }
#ifdef MT
                while ( StampDifference( &tsStart, &tsStop) < MINTIME); // until MINTIME reached
#else
                while ( Results [dwFuncPtr][dwRunPtr][0]. dwRunTime < MINTIME);
#endif

#ifdef MT       // threads have to start simultaneous, so start over again
				for ( dwSample= 0; dwSample < SAMPLES; dwSample++)
#else				// single thread version has first result already
					for ( dwSample= 1; dwSample < SAMPLES; dwSample++)
#endif
                    {
#ifndef QUIET
						fprintf( fResFile, "\rLauf "DW(1)" ("DW(1)") - Funktion "DW(2)" ("DW(2)") - Wiederholung ("DW(2)") ", 
                                 dwRunPtr, (DWORD)( RUNS- 1), dwFuncPtr, (DWORD)(TOTALFUNCS- 1), dwSample);
#endif
#if defined ( GS) && defined( QUIET)
                        fprintf( stdout, "\rLauf "DW(1)" ("DW(1)") - Funktion "DW(2)" ("DW(2)") - Wiederholung ("DW(2)") ", 
								 dwRunPtr, (DWORD)( RUNS- 1), dwFuncPtr, (DWORD)(TOTALFUNCS- 1), dwSample);
#endif
#ifdef MT               // create threads, block them all until every body intialized
                        CThreadBlockAll();
                        for ( dwThread= 0; dwThread < THREADS; dwThread++)
                        {
							Results [dwFuncPtr][dwRunPtr][dwSample]. pDataSet=
								&( BenchDataSets [dwThread]);
                            hThreads [dwThread]= 
								CThreadCreate( Benches [dwFuncPtr]. pBench, 
											   &(Results [dwFuncPtr][dwRunPtr][dwSample]));
                        }
                        GetTimeStamp( &tsStart);	// get start time
                        CThreadUnblockAll();		// let them run
                        CThreadWaitToTerminate();	// wait until everbody terminated
                        GetTimeStamp( &tsStop);		// get stop time
                        Results [dwFuncPtr][dwRunPtr][dwSample]. dwRunTime=
							StampDifference( &tsStart, &tsStop);	// get time used
                        CThreadKillThreads();		// resource clean up

#else                   // much more simplier for just one function
                        Benches [dwFuncPtr]. pBench( &(Results [dwFuncPtr][dwRunPtr][dwSample]));
#endif
                    }

					// dont forget to mention that more work was done in each thread
					BenchFrames [dwFuncPtr][dwRunPtr]. dwIterations*= THREADS;
#ifndef QUIET
                    fprintf( fResFile, "\r");
#endif
#if defined ( GS) && defined( QUIET)
                    fprintf( stdout, "\r");
#endif
            }

        // clean up thread local data
        for ( dwThread= 0; dwThread < THREADS; dwThread++)
                FreeThreadData( &( BenchDataSets [dwThread]));
}

// this array concentrates the results
typedef struct tagResEntry
{
        DWORD   dwSum;
        DWORD   dwMembers;
        DWORD   dwMaxDif;
        DWORD   dwMidDif;
} ResEntry;

ResEntry        ConcentratedResults [GROUPS][RUNS];

// ???
void PrintTotals( FILE  *fResFile, DWORD dwGroup)
{
        DWORD   dwTempPtr;
        double  fpOpRatio= 0.0, fpMaxDif= 0.0, fpMidDif= 0.0;

        fprintf( fResFile, "\n%-7s ", "");
        for ( dwTempPtr= 0; dwTempPtr < RUNS; dwTempPtr++)
                fprintf( fResFile, "%8s %17s ", "--------", "");
        fprintf( fResFile, "\n%-7s ", "Total");
        for ( dwTempPtr= 0; dwTempPtr < RUNS; dwTempPtr++)
        {

                fprintf( fResFile, DW(8)" ("DW(3)", "DW(3)") %6s ", 
                        ( ConcentratedResults [dwGroup][dwTempPtr]. dwSum/
                          ConcentratedResults [dwGroup][dwTempPtr]. dwMembers),
                        ( ConcentratedResults [dwGroup][dwTempPtr]. dwMaxDif/
                          ConcentratedResults [dwGroup][dwTempPtr]. dwMembers),
                        ( ConcentratedResults [dwGroup][dwTempPtr]. dwMidDif/
                          ConcentratedResults [dwGroup][dwTempPtr]. dwMembers), "");
                fpOpRatio+= log( (double)ConcentratedResults [dwGroup][dwTempPtr]. dwSum/
                                                 (double)ConcentratedResults [dwGroup][dwTempPtr]. dwMembers);
                if ( ConcentratedResults [dwGroup][dwTempPtr]. dwMaxDif != 0)
                        fpMaxDif+= log( (double)ConcentratedResults [dwGroup][dwTempPtr]. dwMaxDif/
                                                        (double)ConcentratedResults [dwGroup][dwTempPtr]. dwMembers);
                if ( ConcentratedResults [dwGroup][dwTempPtr]. dwMidDif!= 0)
                        fpMidDif+= log( (double)ConcentratedResults [dwGroup][dwTempPtr]. dwMidDif/
                                                        (double)ConcentratedResults [dwGroup][dwTempPtr]. dwMembers);
        }
        fprintf( fResFile, "\n--> %.2f SPACs (%.2f, %.2f)", 
                exp( fpOpRatio/ (double)RUNS), exp( fpMaxDif/ (double)RUNS), exp( fpMidDif/ (double)RUNS));
        fprintf( fResFile, "\n\n");
}

DWORD dif( DWORD a, DWORD b)
{
        if ( a > b)
                return ( a- b);
        else
                return ( b- a);
}

void PrintResults( FILE* fResFile)
{       
	char    Names [TOTALFUNCS][20]= { "Int Nix", "Add", "Mul", "Div",
									  "FP  Nix", "Add", "Mul", "Div",       
									  "Log", "Tan", "Sqr"};
	DWORD   dwRunPtr, dwFuncPtr;
    DWORD   dwGroup;
    DWORD   dwSample= 0;

    fprintf( fResFile, VERINFO);
    fprintf( fResFile, "\n\n");
    fprintf( fResFile, "%-7s ", "");
    for ( dwRunPtr= 0; dwRunPtr < RUNS; dwRunPtr++) 
		fprintf( fResFile, DW(5)" %2s %-17s ", ( DataSets [dwRunPtr]* 2), "KB", "(ms, max, mid)",
                                                   ( DataSets [dwRunPtr]* 2), "KB", "(ms, max, mid)");
#ifndef QUIET
        fprintf( fResFile, "                          ");
#endif  
        
	dwGroup= Benches [0]. dwGroup;
    for ( dwFuncPtr= 0; dwFuncPtr < TOTALFUNCS; dwFuncPtr++)
    {
		if ( Benches [dwFuncPtr]. dwGroup != dwGroup)
            PrintTotals( fResFile, dwGroup);
		else
			fprintf( fResFile, "\n");
		
		fprintf( fResFile, "%7s ", Names [dwFuncPtr]);

		for ( dwRunPtr= 0; dwRunPtr < RUNS; dwRunPtr++)
		{
            DWORD   dwMidOpsms= 0, dwMidTime= 0;
            DWORD   dwMidDif= 0, dwMaxDif= 0;

            // calculate operations per ms 
            for ( dwSample= 0; dwSample < SAMPLES; dwSample++)
            {
				if ( Results [dwFuncPtr][dwRunPtr][dwSample]. dwRunTime != 0)
					Results [dwFuncPtr][dwRunPtr][dwSample]. dwOpsms= 
						( Results [dwFuncPtr][dwRunPtr][dwSample]. pFrame-> dwIterations* 
                          Results [dwFuncPtr][dwRunPtr][dwSample]. pFrame-> dwDataItems)                                 
                          / Results [dwFuncPtr][dwRunPtr][dwSample]. dwRunTime;
                else
					Results [dwFuncPtr][dwRunPtr][dwSample]. dwOpsms= 0;

                dwMidOpsms+= Results [dwFuncPtr][dwRunPtr][dwSample]. dwOpsms;
                dwMidTime+= Results [dwFuncPtr][dwRunPtr][dwSample]. dwRunTime;
            }

            dwMidTime/= SAMPLES;
            dwMidOpsms/= SAMPLES;

            // Abweichungen bestimmen                                               
            for ( dwSample= 0; dwSample < SAMPLES; dwSample++)
            {
				dwMidDif+= dif( Results [dwFuncPtr][dwRunPtr][dwSample]. dwRunTime, dwMidTime);
                if ( dwMaxDif < dif( Results [dwFuncPtr][dwRunPtr][dwSample]. dwRunTime, dwMidTime))
					dwMaxDif= dif( Results [dwFuncPtr][dwRunPtr][dwSample]. dwRunTime, dwMidTime);
            }            
            dwMidDif/= SAMPLES;

            if ( dwMidTime != 0)
            {
				dwMaxDif= (( dwMaxDif* 100)/ dwMidTime);
                dwMidDif= (( dwMidDif* 100)/ dwMidTime);
            }

            fprintf( fResFile, DW(8)" ("DW(5)", "DW(3)", "DW(3)") ", dwMidOpsms, dwMidTime, dwMaxDif, dwMidDif);
            ConcentratedResults [dwGroup][dwRunPtr]. dwSum+= Benches [dwFuncPtr].dwWeight* dwMidOpsms;
            ConcentratedResults [dwGroup][dwRunPtr]. dwMaxDif+= Benches [dwFuncPtr].dwWeight* dwMaxDif;
            ConcentratedResults [dwGroup][dwRunPtr]. dwMidDif+= Benches [dwFuncPtr].dwWeight* dwMidDif;
            ConcentratedResults [dwGroup][dwRunPtr]. dwMembers+= Benches [dwFuncPtr]. dwWeight;
		}
		dwGroup= Benches [dwFuncPtr]. dwGroup;
	}
	PrintTotals( fResFile, dwGroup);
}

void main( void)
{
        FILE    *fResFile;
        
#ifdef QUIET
        fResFile= fopen( RESFILENAME, "w");
        assert( fResFile != NULL);
#if defined ( GS) && defined( QUIET)
        setbuf( stdout, NULL);  // no buffering 
#endif
#else
        setbuf( stdout, NULL);  // no buffering
        fResFile= stdout;
        fprintf( fResFile, VERINFO);
        fprintf( fResFile, "\n\n");
#endif
        InitTime();				// init timer 
#ifdef MT
        CThreadInit();			// init thread related stuff
#endif          
        RunThem( fResFile);     // do it
        PrintResults( fResFile);// 
#ifdef MT
        CThreadExit();			// clean up thread stuff
#endif
#ifdef QUIET
        fclose( fResFile);		// close result file
#endif
}

// 
// entry point hack for win32s - it does not know console mode!
//
#if defined( NT_LITE) || defined( WIN_16)
int WINAPI WinMain( HINSTANCE hInstance, HINSTANCE hPrevInstance, LPSTR lpszCmdLine, int nCmdShow)
{
        main();
}
#endif
