// absbench.cpp : Definiert den Einstiegspunkt fr die Konsolenanwendung.
//

#include "stdafx.h"
#include <Windows.h>
#include <intrin.h> 
#include <emmintrin.h>
#include "compilerinfo.h"

#define N 1000000


inline __int64 absx64(__int64 x ) {
	if (x>=0) return x; 
	else return -x;
}

inline int absx(int x ) {
	if (x>=0) return x; 
	else return -x;
}

__int64 runloop  (int count) {
	int x=0;
	for (int i=-count; i<=count; i++) x+=abs (i);
	return x;
}

__int64 runloop64  (int count) {
	__int64 x=0;
	for (__int64 i=-count; i<=count; i++) x+=_abs64 (i);	
	return x;
}

__int64 runloopx64  (int count) {
	__int64 x=0;; 
	for (__int64 i=-count; i<=count; i++) x+=absx64 (i);
	return x;
}
__int64 runloopx  (int count) {
	int x=0;
	for (int i=-count; i<=count; i++) x+=absx (i);
	return x;
}



#ifdef __INTEL_COMPILER

// No unrolling for Intel Compiler

__int64 runloop_0  (int count) {
	int x=0;

   #pragma nounroll
	for (int i=-count; i<=count; i++) x+=abs (i);
	return x;
}

__int64 runloop64_0  (int count) {
	__int64 x=0;
    #pragma nounroll
	for (int i=-count; i<=count; i++) x+=_abs64 (i);
	return x;
}

__int64 runloopx64_0  (int count) {
	__int64 x=0;
 #pragma nounroll
	for (__int64 i=-count; i<=count; i++) x+=absx64 (i);
	return x;
}
__int64 runloopx_0  (int count) {
	int x=0;
  #pragma nounroll
	for (int i=-count; i<=count; i++) x+=absx (i);
	return x;
}

_int64 runloop_0n  (int count) {
	int x=0;

   #pragma nounroll
   #pragma novector
	for (int i=-count; i<=count; i++) x+=abs (i);
	return x;
}

__int64 runloop64_0n  (int count) {
	__int64 x=0;
    #pragma nounroll
	#pragma novector
	for (int i=-count; i<=count; i++) x+=_abs64 (i);
	return x;
}

__int64 runloopx64_0n  (int count) {
	__int64 x=0;
 #pragma nounroll
 #pragma novector
	for (__int64 i=-count; i<=count; i++) x+=absx64 (i);
	return x;
}
__int64 runloopx_0n  (int count) {
	int x=0;
  #pragma nounroll
  #pragma novector
	for (int i=-count; i<=count; i++) x+=absx (i);
	return x;
}

#endif


void bench (__int64 (*aloop)(int),char name[80]) { 
   volatile _int64 res=0;
   volatile int vn=N;
	LARGE_INTEGER qa,qe,qf;
	__int64 ta,te,td,tdmin;
	__int64 d,dmin;
	unsigned int id; 
	int reg[4];
  	tdmin=MAXLONGLONG;
	dmin =MAXLONGLONG;
	for(int i=0; i< 100; i++) {
    SetThreadPriority(GetCurrentThread(),THREAD_PRIORITY_TIME_CRITICAL);
	QueryPerformanceCounter (&qa);
	 __cpuid(reg,0);
	ta=__rdtsc ();
	res=(*aloop)(vn);
	 te=__rdtscp(&id);
	 __cpuid(reg,0);
	
	QueryPerformanceCounter (&qe);
	QueryPerformanceFrequency(&qf);
	d=qe.QuadPart-qa.QuadPart;    	
	td=te-ta;
	if (td< tdmin) tdmin=td; 
	if (d< dmin) dmin=d;
	//printf ("%4.2f Clocks, %f micros qf=%I64d\n",(double)td/(2*N+1), (double) d/(2*N+1)/qf.QuadPart*1e9,res);
	
	}
	printf_s ("%s %4.2f Clocks %f ns\n",name,(double)tdmin/(2*N+1),(double)dmin/(2*N+1)/qf.QuadPart*1e9 );

 }


int _tmain(int argc, _TCHAR* argv[])
{   volatile __int64 res=0; 
    
	SetThreadAffinityMask(GetCurrentThread(),0x1);
	printf_s ("Benchprogramm abs() und _abs64() aus stdlib.h, c't 4/13, as\n");
	printf_s ("absx() und absx64() sind Inline-Funktionen zum Vergleich\n");
	printf_s ("\n");
    printf_s ("Compilerinfo:\n");
	print_compilerinfo();
	printf_s ("\n");
	
	SetPriorityClass(GetCurrentProcess(),HIGH_PRIORITY_CLASS); 
	printf_s("Kern fuer Turbo Mode ein paar s hochfahren\n");   
	for (_int64 i=0; i< 3300000000; i++) res++;  

	printf_s("ok, Messung startet\n"); 
	bench(&runloop64,    "_abs64 Unroll: default   :");
    bench(&runloopx64,   "absx64 Unroll: default   :");
	bench(&runloop,      "abs    Unroll: default   :");
    bench(&runloopx,     "absx   Unroll: default   :");
#if defined __INTEL_COMPILER
	bench(&runloop64_0,  "_abs64 Unroll(0)         :");
    bench(&runloopx64_0, "absx64 Unroll(0)         :");
	bench(&runloop_0,    "abs    Unroll(0)         :");
    bench(&runloopx_0,   "absx   Unroll(0)         :");
	bench(&runloop64_0n, "_abs64 Unroll(0) novector:");
    bench(&runloopx64_0n,"absx64 Unroll(0) novector:");
	bench(&runloop_0n,   "abs    Unroll(0) novector:");
    bench(&runloopx_0n,  "absx   Unroll(0) novector:");
#endif

	SetThreadPriority(GetCurrentThread(),THREAD_PRIORITY_NORMAL);	
	return 0;
}

