// MPdetect.cpp fr Win-32
// gemaess Intel IA-32 Intel Architecture Software Developers Manual 2a Jan 06, 25366618.pdf Kap 7.10.3
// bzw. IA-32 Intel Architecture Optimization Reference Manual Jun 05, 24896612.pdf Example 7.6
// und AMD CPUID Specification 25481 Rev 2.18, Jan 06 
// (c) c't/Andreas Stiller Mrz 2006  && Intel && AMD... 
// compile with: /MT
// 64-Bit-Version hier nicht dabei  
// Consol-Applikation, erzeugt pro logischen Processor eine Struktur mit 
// Package-ID, Core-ID und SMT-ID 


#include <windows.h>
#include <stdio.h> 
#include <excpt.h>
#include <process.h>
#include <conio.h>


typedef unsigned long long u64;
typedef unsigned int u32; 
typedef unsigned char u8;

#if defined  __x86_64 || defined _M_IX64
typedef u64 AFFINITYMASK;
#else 
typedef u32 AFFINITYMASK; 
#endif 

// MSVC ab Ver 1400 (VS 2005) enthlt __cpuid (u32 a[4], u32 b) 
// setzt aber bei Aufruf nur Register eax, nicht ecx! 
// Intel-Compiler & ltere MSVC besitzen kein __CPUID
// Dieses CPUID bergibt auch ebx,ecx,edx aus Ein/Ausgabefeld a[]  

#if (_MSC_VER < 1400) || (defined __INTEL_COMPILER  && (defined  __i386 || defined _M_IX86))
void __cpuid (u32 a[4], u32 b) 
{
	__asm
	{
	    push ebx 
		push edx 
		push ecx
		push edi
        mov edi,a 
		mov eax,b 
        mov ebx,[edi+4] 
		mov ecx,[edi+8]
		mov edx,[edi+12]
		cpuid
		mov [edi],eax
		mov [edi+4],ebx
		mov [edi+8],ecx
		mov [edi+12],edx 
		pop edi 
		pop ecx
		pop edx
		pop ebx
	}
}

#else 
#if defined(__cplusplus)
 extern "C" {
 #endif

  void __cpuid (u32 a[4], u32 b);

 #if defined(__cplusplus)
 }
 #endif
#endif 



u32 CPUInfo[4] = {0xFFFFFFFF};

#define rEAX 0 
#define rEBX 1
#define rECX 2
#define rEDX 3



// Prolog: Detect CPUID & Company
char CPUString[16];
u32 CPUID;

char* NoCPUID="No CPUID";
char* Intel="GenuineIntel";
char* AMD  ="AuthenticAMD";
u32 ApicIdCoreIdSize;

char* GetCompany(void)
{
__try { // verify cpuid instruction is supported
//execute cpuid with eax = 0 to get vendor string 

	    __cpuid(CPUInfo, 0);
}
__except (EXCEPTION_EXECUTE_HANDLER) {
return NoCPUID ; // CPUID is not supported; So vendor information is not present
}

    memset(CPUString, 0, sizeof(CPUString));
    *((int*)CPUString) = CPUInfo[1];
    *((int*)(CPUString+4)) = CPUInfo[3];
    *((int*)(CPUString+8)) = CPUInfo[2];
	__cpuid(CPUInfo, 1);
	CPUID=CPUInfo[0];

return CPUString; 

return 0;
}



// 1. Detect support for Hardware Multi-Threading Support in a processor.
//-----------------------------------------------------------------------------------
// Returns a non-zero value if CPUID reports the presence of hardware multi-threading
// support in the physical package where the current logical processor is located.
// This does not guarrantee BIOS or OS will enable all logical processors in the physical
// package and make them available to applications.
// Returns zero if hardware multi-threading is not present.
#define HWMT_BIT 0x10000000

u32 HWMTSupported(void)
{
__try { // verify cpuid instruction is supported
//execute cpuid with eax = 0 to get vendor string 
//execute cpuid with eax = 1 to get feature flag and signature
	    __cpuid(CPUInfo, 1);
}
__except (EXCEPTION_EXECUTE_HANDLER) {
return 0 ; // CPUID is not supported; So HW Multi-threading capability is not present
}

return (CPUInfo[3] & HWMT_BIT); // bit 28

return 0;
}

// 2. Find the Max number of logical processors per physical processor package.
// ---------------------------------------------------------------------------------
// EBX[23:16] indicates the max number of logical processors per package.
// Returns the max number of logical processors per physical processor package;
// the actual number of logical processors per package enabled by OS may be less.
// Software should not assume the value of (cpuid.1.ebx[23:16]) must be power of 2.

#define NUM_LOGICAL_BITS 0x00FF0000

u8 MaxLPPerPackage(void)
{
if (!HWMTSupported()) return 1;
__cpuid (CPUInfo,1);

return (u8) ((CPUInfo[rEBX] & NUM_LOGICAL_BITS) >> 16);
}

// 3. Find the max number of processor cores per physical processor package.
//------------------------------------------------------------------------------
// Returns the max number of processor cores per physical processor package;
// the actual number of processor cores per package that are enabled may be less.
// Software should not assume the value of (cpuid.4.eax[31:26] +1) must be power of 2.

u8 MaxCoresPerPackage(void)
{
if (!HWMTSupported()) return 1;
__cpuid(CPUInfo,0);
if (CPUInfo[rEAX] >=4)  //if  cpuid supports leaf number 4

{ // we can retrieve multi-core topology info using leaf 4
	  CPUInfo[rECX]=0;
  	  __asm mov ecx,0   // "workaround" for __cpuid of MSVC 2005, 32 Bit
	__cpuid(CPUInfo,4);
// execute cpuid with eax = 4, ecx = 0 
return (unsigned ) ((CPUInfo[rEAX] >> 26) +1);
}
else  // must be a single-core processor
return 1;
}

u8 MaxAMDCoresPerPackage(void)
{
u32 mnc;

__cpuid(CPUInfo,0x80000000);
u32 nExIds = CPUInfo[rEAX];

if (nExIds >=0x80000008 ) 
{
__cpuid(CPUInfo,0x80000008);

u32 nc = CPUInfo[rECX]&& 0xFF; 
ApicIdCoreIdSize = (CPUInfo[rECX]>>12) && 0xFF;
if (ApicIdCoreIdSize==0) mnc=nc+1; 
else mnc=1 << ApicIdCoreIdSize; 

return mnc;
}
else  // must be a single-core processor
return 1;
}

//4. Extract the initial APIC ID of a logical processor.
//-------------------------------------------------------------------------------------
// EBX[31:24] initial APIC ID
// Returns the 8-bit unique initial APIC ID for the processor ruuning the code.
// Software can use OS services to affinitize the current thread to each logical processor
// available under the OS to gather the initial APIC_IDs for each logical processor.
#define INITIAL_APIC_ID_BITS 0xFF000000 

u8 GetInitAPIC_ID (void)
{
__cpuid (CPUInfo,1);

return (u8) ((CPUInfo[rEBX] & INITIAL_APIC_ID_BITS) >> 24);
}

//5. Find the width of a bit-field mask from the maximum count of the bit-field.
//---------------------------------------------------------------------------------------------
// Returns the mask bit width of a bit field from the maximum count that bit field can represent.
// This algorithm does not assume Max_Count to have a value equal to power of 2.

u32 FindMaskWidth(u32 Max_Count)
{u32 mask_width, cnt = Max_Count;
__asm {
mov eax, cnt
mov ecx, 0
mov mask_width, ecx
dec eax
bsr cx, ax
jz next
inc cx
mov mask_width, ecx
next:
mov eax, mask_width
}
return mask_width;
}


//6. Extract a sub ID given a full ID, maximum sub ID value and shift count.
//---------------------------------------------------------------------------
// Returns the value of the sub ID, this is not a zero-based value
// ##as why that? 
// Returns the value of the sub ID, this is not a zero-based value
//Unsigned char GetSubID(unsigned char Full_ID, unsigned char MaxSubIDvalue, unsigned
//char Shift_Count)
//{
//MaskWidth = FindMaskWidth(MaxSubIDValue);
//MaskBits = ((uchar) (0xff << Shift_Count)) ^ ((uchar) (0xff << Shift_Count + MaskWidth)) ;
//SubID = Full_ID & MaskBits;
//##as SubID >> Shift_Count makes more sense  
//Return SubID;
//}


u8 GetSubID(u8 Full_ID, u8 MaxSubIDValue, u8 Shift_Count)
{
u8 MaskWidth = FindMaskWidth(MaxSubIDValue);
u8 MaskBits = ((u8) (0xff << Shift_Count)) ^ ((u8) (0xff << (Shift_Count + MaskWidth))) ;

u8 SubID = (Full_ID & MaskBits) >> Shift_Count;
return SubID;
}
/*
Example 7-2 Pseudo Code Depicting Three-level Extraction Algorithm
For Each local_APIC_ID{
// Determine MaxLPPerCore available in hardware
// This algorithm assumes there is symmetry across core boundary, i.e. each core within a
package has the same number of logical processors
MaxLPPerCore = MaxLPPerPackage()/MaxCoresPerPackage();
// Extract SMT_ID first, this is the innermost of the three levels
// bit mask width is determined from MaxLPPerCore topological info.
// shift size is 0, corresponding to the right-most bit-field
SMT_ID = GetSubID(local_APIC_ID, MaxLPPerCore, 0);

##as Does Intels really means the local_APIC_ID from the OS here?  

// Extract CORE_ID:
// bit width is determined from maximum number of cores per package possible in hardware
// shift count is determined by maximum logical processors per core in hardware
CORE_ID = GetSubID(InitAPIC_ID, MaxCoresPerPackage(), FindMaskWidth(
MaxLPPerCore) );
// Extract PACKAGE_ID:
// Assume single cluster.
// Shift out the mask width for maximum logical processors per package
PackageIDMask = ((uchar) (0xff << FindMaskWidth(MaxLPPerPackage())); (!!!hier fehlt ne Klammer) 
PACKAGE_ID = InitAPIC_ID & PackageIDMask;
*/
typedef struct {
   AFFINITYMASK affinity_mask; // 8 byte in 64-bit mode, 4 byte otherwise.
     u8 local_APIC_ID;           
	 u8 InitAPIC_ID;  
     u8 MaxLPPerCore; 
     u8 SMT_ID; 
	 u8 CORE_ID; 
     u8 PackageIDMask; 
     u8 PACKAGE_ID;
	 u8 Cache_ID;
} APIC_MAP_T;
APIC_MAP_T* apic_conf;  

unsigned int __stdcall LoginIntelCpu( void* pNr )

{    
	 u8 nr=*(u8*) pNr; 

	 apic_conf[nr].local_APIC_ID = nr; //  falls korrekt durchnummeriert!!! hier schludert Intel in der Doku  
	 apic_conf[nr].InitAPIC_ID  = GetInitAPIC_ID();

     apic_conf[nr].MaxLPPerCore = MaxLPPerPackage()/MaxCoresPerPackage();
	 
     //benutzt abweichend von der Intel-Doko die Inital_APIC-ID fr SMT-ID; 
	 //die Local APIC-ID auf 0xFEE00020 ist unter Win32 gleich der Inital-APIC-ID
     apic_conf[nr].SMT_ID = GetSubID(apic_conf[nr].InitAPIC_ID, apic_conf[nr].MaxLPPerCore, 0);
     apic_conf[nr].CORE_ID = GetSubID(apic_conf[nr].InitAPIC_ID, MaxCoresPerPackage(), FindMaskWidth(apic_conf[nr].MaxLPPerCore) );
      
	 apic_conf[nr].PackageIDMask = ((u8) (0xff << FindMaskWidth(MaxLPPerPackage())));
	 // auch hier wie wie GetSubID wird die Package-ID entsprechend nach rechts geschoben; 
	 apic_conf[nr].PACKAGE_ID = (apic_conf[nr].InitAPIC_ID & apic_conf[nr].PackageIDMask) >> FindMaskWidth(MaxLPPerPackage());

	_endthread();
  
   return 0;
}

unsigned int __stdcall LoginAMDCpu( void* pNr )

{    
	 u8 nr=*(u8*) pNr; 
	 apic_conf[nr].local_APIC_ID = nr; //  falls korrekt durchnummeriert
	 apic_conf[nr].InitAPIC_ID   = GetInitAPIC_ID();

     apic_conf[nr].MaxLPPerCore = MaxLPPerPackage()/MaxAMDCoresPerPackage();

     // Aufteilung der Initial APIC-ID kompatibel zu Intel angenommen
	 // ApicIdCoreIdSize (falls <>0 gibt die Zahl der unteren Bits in APICI-ID fr die CoreID an     
	 // -- ergibt sich indirekt ber MaxCoresPerPackage() und MaxLPPerCore
     //SMT gibts derzeit bei AMD noch nicht => SMT_ID=0
     apic_conf[nr].SMT_ID = GetSubID(apic_conf[nr].InitAPIC_ID, apic_conf[nr].MaxLPPerCore, 0);

	 apic_conf[nr].CORE_ID = GetSubID(apic_conf[nr].InitAPIC_ID, MaxAMDCoresPerPackage(), FindMaskWidth(apic_conf[nr].MaxLPPerCore) );
     apic_conf[nr].PackageIDMask = ((u8) (0xff << FindMaskWidth(MaxLPPerPackage())));
	 apic_conf[nr].PACKAGE_ID = (apic_conf[nr].InitAPIC_ID & apic_conf[nr].PackageIDMask) >>FindMaskWidth(MaxLPPerPackage());

	_endthread();
  
   return 0;
}

int main(int argc, CHAR* argv[])
{
	HANDLE hThread;
	unsigned threadID;
  

	SYSTEM_INFO siSysInfo;

	// Copy the hardware information to the SYSTEM_INFO structure. 

	GetSystemInfo(&siSysInfo); 

	// Display the contents of the SYSTEM_INFO structure. 

	printf("Hardware information: \n");  
	printf("  OEM ID: %u\n", siSysInfo.dwOemId);
	printf("  Number of processors: %u\n", 
		siSysInfo.dwNumberOfProcessors); 
	printf("  Page size: %u\n", siSysInfo.dwPageSize); 
	printf("  Processor type: %u\n", siSysInfo.dwProcessorType); 
	printf("  Active processor mask: 0x%x\n", 
		siSysInfo.dwActiveProcessorMask); 

	// CPUID-Info von gerade aktiver CPU holen (Annahme: gleiche CPUs in den Sockeln)
	if (GetCompany()==NoCPUID) 
	{
		printf ("No CPUID, program aborted");
		return (0); 
	}
	printf("  CPUID  %s  ID= 0x%x\n",CPUString,CPUID); 
    bool isIntel=(strcmp(CPUString,Intel)==0); 
	bool isAMD=(strcmp(CPUString,AMD)==0);
	if (!isIntel && !isAMD) return (0);
	



	/*
	a) Assemble lists of PACKAGE_ID, CORE_ID, and SMT_ID of each enabled logical processors
	//The BIOS and/or OS may limit the number of logical processors available to applications
	// after system boot. The below algorithm will compute topology for the processors visible
	// to the thread that is computing it.
	// Extract the 3-levels of IDs on every processor
	// SystemAffinity is a bitmask of all the processors started by the OS. Use OS specific APIs to
	obtain it.
	// ThreadAffinityMask is used to affinitize the topology enumeration thread to each processor
	using OS specific APIs.
	// Allocate per processor arrays to store the Package_ID, Core_ID and SMT_ID for every
	started processor
	ThreadAffinityMask = 1;
	ProcessorNum = 0;
	while (ThreadAffinityMask != 0 && ThreadAffinityMask <= SystemAffinity) {
	// Check to make sure we can utilize this processor first.
	if (ThreadAffinityMask & SystemAffinity){
	Set thread to run on the processor specified in ThreadAffinityMask
	Wait if necessary and ensure thread is running on specified processor

	InitAPIC_ID = GetInitAPIC_ID();
	Extract the Package, Core and SMT ID as explained in three level extraction
	algorithm
	PackageID[ProcessorNUM] = PACKAGE_ID;
	CoreID[ProcessorNum] = CORE_ID;
	SmtID[ProcessorNum] = SMT_ID;
	ProcessorNum++;
	}
	ThreadAffinityMask <<= 1;
	}
	NumStartedLPs = ProcessorNum;
	*/
	apic_conf = new APIC_MAP_T [siSysInfo.dwNumberOfProcessors];  
	AFFINITYMASK SystemAffinity= (AFFINITYMASK) siSysInfo.dwActiveProcessorMask;
	AFFINITYMASK ThreadAffinityMask = 1;
	u8  ProcessorNum = 0;
	while (ThreadAffinityMask != 0 && ThreadAffinityMask <= SystemAffinity) {
		// Check to make sure we can utilize this processor first.
		if (ThreadAffinityMask & SystemAffinity){
			apic_conf[ProcessorNum].affinity_mask=ThreadAffinityMask; 
			if (isIntel) hThread=(HANDLE) _beginthreadex (NULL,0,&LoginIntelCpu,&ProcessorNum,CREATE_SUSPENDED,&threadID);  
			if (isAMD)   hThread=(HANDLE) _beginthreadex (NULL,0,&LoginAMDCpu,&ProcessorNum,CREATE_SUSPENDED,&threadID);
			SetThreadAffinityMask (hThread,ThreadAffinityMask);
			ResumeThread (hThread); 
			WaitForSingleObject (hThread,INFINITE); 
			if (apic_conf[ProcessorNum].MaxLPPerCore==1) // No HTT or SMT!  
			{
				printf ("  cpu %d, Initial APIC-ID %d, Package-ID %d, Core-ID %d, no HTT/SMT \n",
				apic_conf[ProcessorNum].local_APIC_ID,
				apic_conf[ProcessorNum].InitAPIC_ID, 
				apic_conf[ProcessorNum].PACKAGE_ID,
				apic_conf[ProcessorNum].CORE_ID);
			}
			else
			{

			printf ("  cpu %d, Initial APIC-ID %d, Package-ID %d, Core-ID %d, SMT-ID %d\n",
				apic_conf[ProcessorNum].local_APIC_ID,
				apic_conf[ProcessorNum].InitAPIC_ID, 
				apic_conf[ProcessorNum].PACKAGE_ID,
				apic_conf[ProcessorNum].CORE_ID,
				apic_conf[ProcessorNum].SMT_ID);
			}

		}
		ThreadAffinityMask <<= 1;
		ProcessorNum++;
	}
	u8 NumStartedLPs = ProcessorNum; // sollte = siSysInfo.dwNumberOfProcessors sein! 
	_getch();
	return 0;



}

