/*
    Die Loesung des c't Puzzles
    (c) 2003, Harald Woerndl-Aichriedler
    
    Compiler call:
    gcc-3.0 -Wall -O3 -funroll-loops -march=i586 -fomit-frame-pointer puzzle.c -o puzzle
*/

#include <stdio.h>
#include <assert.h>
#include <time.h>
#include <stdlib.h>

#define NUM_KLOTZ	12
#define TOTAL_X		5
#define TOTAL_Y		4
#define TOTAL_Z		3
#define NUM_BITS	(TOTAL_X * TOTAL_Y * TOTAL_Z)
#define MAX_CUBES	6


/**
 * BIT OPERATIONS:
 */

typedef unsigned long long bitvector;

static int bitposmap[TOTAL_Z][TOTAL_Y][TOTAL_X] =
{
    // bad try: Start from the center and be like a ball
    // Pretty GOOD VERSION:
    {   {0,	12,	24,	36,	48 },
	{6,	18,	30,	42,	54 },
	{11,	23,	35,	47,	59 },
	{5,	17,	29,	41,	53 }
    },
    {   {1,	13,	25,	37,	49 },
	{7,	19,	31,	43,	55 },
	{10,	22,	34,	46,	58 },
	{4,	16,	28,	40,	52 }
    },
    {   {2,	14,	26,	38,	50 },
	{8,	20,	32,	44,	56 },
	{9,	21,	33,	45,	57 },
	{3,	15,	27,	39,	51 }
    }
};

// Table based version:
#define BIT_POS(x, y, z)	bitposmap[z][y][x]
// old index (classic version):
// #define BIT_POS(x, y, z)	((x)*(TOTAL_Z*TOTAL_Y)+(y)*TOTAL_Z+(z))
// bad old index (classic bad version):
// #define BIT_POS(x, y, z)	((z)*(TOTAL_X*TOTAL_Y)+(y)*TOTAL_X+(x))

// schnelle 64 Einzelbits in Tabelle: (~13% schneller)
static bitvector bit_table[64];
#define BIT_NR(bpos)		bit_table[bpos]
// #define BIT_NR(bpos)		(((bitvector) 1) << (bpos))

#define SET_BIT(bv, x, y, z)	((bv) |= BIT_NR(BIT_POS(x, y, z)))
#define HAS_BIT(bv, x, y, z)	((bv) & BIT_NR(BIT_POS(x, y, z)))


/**
 * TYPE DEFINITIONS:
 */

typedef struct {
    int x,y,z;
} coordinate;

typedef struct {
    bitvector	*values __attribute__ ((aligned (16)));
    int		num;
    int 	size;
} pos_array;

typedef struct {
    char	*name;
    int		less_mirrored;
    int		numcubes;
    coordinate	cubes[MAX_CUBES];
    int		total_num;		// holds total number of positions
} klotz;


/**
 * GLOBAL VARS:
 */

pos_array options[NUM_BITS][NUM_KLOTZ];

// ein int bitfield ist langsamer!
unsigned char klotz_used[NUM_KLOTZ];
#define USE_KLOTZ(i)		(klotz_used[i] = 1)
#define FREE_KLOTZ(i)		(klotz_used[i] = 0)
#define UNAVAIL_KLOTZ(i)	(klotz_used[i])


static clock_t timer;
static int solutions;
static long long tcnt = 0;

klotz kloetze[NUM_KLOTZ] = {
    { "1",  0, 5, {{1, 0, 0}, {0, 0, 0}, {0, 1, 0}, {0, 2, 0}, {1, 2, 0}} },
    { "2",  0, 5, {{0, 0, 0}, {1, 0, 0}, {1, 1, 0}, {1, 2, 0}, {2, 2, 0}} },
    { "3",  0, 5, {{0, 0, 0}, {1, 0, 0}, {1, 1, 0}, {1, 2, 0}, {1, 3, 0}} },
    { "4",  0, 5, {{0, 0, 0}, {0, 1, 0}, {0, 2, 0}, {0, 3, 0}, {1, 2, 0}} },
    { "5",  1, 5, {{0, 0, 0}, {0, 1, 0}, {1, 0, 0}, {1, 1, 0}, {0, 0, 1}} },
    
    { "6",  0, 5, {{1, 0, 0}, {1, 1, 0}, {1, 2, 0}, {0, 2, 0}, {2, 2, 0}} },
    { "7",  0, 5, {{0, 0, 0}, {0, 1, 0}, {1, 0, 0}, {1, 1, 0}, {1, 2, 0}} },
    { "8",  0, 5, {{0, 0, 0}, {1, 0, 0}, {1, 1, 0}, {2, 1, 0}, {2, 2, 0}} },
    { "9",  0, 4, {{0, 0, 0}, {1, 0, 0}, {1, 1, 0}, {0, 0, 1}} },
    { "10", 0, 5, {{0, 0, 0}, {1, 0, 0}, {1, 1, 0}, {1, 2, 0}, {2, 1, 0}} },

    { "11", 0, 5, {{1, 1, 0}, {1, 0, 0}, {0, 1, 0}, {1, 2, 0}, {2, 1, 0}} },
    { "12", 0, 6, {{0, 0, 0}, {0, 1, 0}, {0, 2, 0}, {0, 3, 0}, {1, 0, 0}, {1, 2, 0}} }
};


/**
 * UNCRITIC FUNCTIONS:
 */

static void get_size(klotz *k, coordinate *s) {
    int i;
    s->x = s->y = s->z = 0;
    for(i=0; i<k->numcubes; i++) {
	if(k->cubes[i].x > s->x) s->x = k->cubes[i].x;
	if(k->cubes[i].y > s->y) s->y = k->cubes[i].y;
	if(k->cubes[i].z > s->z) s->z = k->cubes[i].z;
    }
    s->x++; s->y++; s->z++;
}

/* Eine haessliche Symmetrie-Erkennung: */

/* calculates the 3 symmetric versions: */
static void build_symmetrics(const bitvector bv, bitvector sym[3])
{
    int x, y, z;
    sym[0] = sym[1] = sym[2] = 0;
    for(x=0; x<TOTAL_X; x++)
	for(y=0; y<TOTAL_Y; y++)
	    for(z=0; z<TOTAL_Z; z++)
		if(HAS_BIT(bv, x, y, z)) {
		    SET_BIT(sym[0], TOTAL_X-1-x, TOTAL_Y-1-y, z);
		    SET_BIT(sym[1], x, TOTAL_Y-1-y, TOTAL_Z-1-z);
		    SET_BIT(sym[2], TOTAL_X-1-x, y, TOTAL_Z-1-z);
		}
}
/* returns the number of the first set bit: */
static int start_bit(const bitvector bv) {
    int bstart;
    for(bstart=0; !(bv & BIT_NR(bstart)); bstart++)	// skip zero-bits
	;
    return bstart;
}
/* searches for an existing bitvecotr in a list: */
static int has_bv(const bitvector bv, pos_array *pos) {
    int i;
    for(i=0; i<pos->num; i++)
	if(pos->values[i] == bv)
	    return 1;
    return 0;
}
static void store_bv(int knum, const bitvector bv) {
    pos_array *pos;

    if(kloetze[knum].less_mirrored) {
	bitvector sym[3];
	build_symmetrics(bv, sym);
	if(has_bv(sym[0], &options[start_bit(sym[0])][knum]) ||
	   has_bv(sym[1], &options[start_bit(sym[1])][knum]) ||
	   has_bv(sym[2], &options[start_bit(sym[2])][knum]))
	{
	    return;
	}
    }

    // only store in LOWEST bit list
    pos = &options[start_bit(bv)][knum];    
    if(has_bv(bv, pos))					// existing?
	return;     

    if(pos->num >= pos->size) {
	pos->size = (pos->size > 0) ? (pos->size * 2) : 4;
	pos->values = (bitvector *) realloc(pos->values, pos->size * sizeof(bitvector));
	if(pos->values == NULL)
	    exit(1);
    }
    
    pos->values[pos->num++] = bv;
    kloetze[knum].total_num++;
}

/* Eine haessliche Varianten-Produktion: */
static void build_variants(int knum) {
    klotz *k = &kloetze[knum];
    int rx, ry, rz, i, prev;
    coordinate size;
        
    for(rx=0; rx<4; rx++) {
	for(ry=0; ry<4; ry++) {
	    for(rz=0; rz<4; rz++) {
		int x, y, z;
		
		/* move within boundaries: */
		get_size(k, &size);
		for(x=0; x<=TOTAL_X-size.x; x++) {
		    for(y=0; y<=TOTAL_Y-size.y; y++) {
			for(z=0; z<=TOTAL_Z-size.z; z++) {
			    bitvector bv=0;
			    for(i=0; i<k->numcubes; i++) {
				SET_BIT(bv, x + k->cubes[i].x, y + k->cubes[i].y, z + k->cubes[i].z);
			    }
			    store_bv(knum, bv);
			}
		    }
		}
		/* rotate 1/4 around z */
		for(i=0; i<k->numcubes; i++) {
		    prev = k->cubes[i].x;
		    k->cubes[i].x = (size.y-1) - k->cubes[i].y;
		    k->cubes[i].y = prev; 
		}
	    }
	    /* rotate 1/4 around y */
	    get_size(k, &size);
	    for(i=0; i<k->numcubes; i++) {
		prev = k->cubes[i].x;
		k->cubes[i].x = (size.z-1) - k->cubes[i].z;
		k->cubes[i].z = prev; 
	    }
	}
	/* rotate 1/4 around x */
	get_size(k, &size);
	for(i=0; i<k->numcubes; i++) {
	    prev = k->cubes[i].z;
	    k->cubes[i].z = (size.y-1) - k->cubes[i].y;
	    k->cubes[i].y = prev; 
	}
    }
}

void solution(void) {
    solutions++;
    if((solutions % 20) == 0)
	fputc('.', stderr);
    if((solutions % 1000) == 0) {
	clock_t now = clock();
	double tdiff = (now-timer) / (double) CLOCKS_PER_SEC;
	printf(": %i (%.2f s, %.0f l/s, n=%.0f)\n", solutions,
		tdiff, 1000.0/tdiff, (double) tcnt);
	timer = now;
    }
}


/**
 * CRITIC FUNCTION:
 */

static void solve(const bitvector predef, int bpos) {
    int i, j, nextpos;
    bitvector *val, next;
    pos_array *check;

    check = options[bpos];
    for(i=0; i<NUM_KLOTZ; i++) {
	if(UNAVAIL_KLOTZ(i))				// bereits verwendet?
	    continue;

	val=check[i].values;
	for(j=check[i].num; j>0; --j, val++) { 		// liegt sicher auf Feld !
	    if(predef & *val)				// Kollision?
		continue;

	    tcnt++;
	    next = predef | *val;
	    for(nextpos = bpos+1; next & BIT_NR(nextpos); nextpos++)	// skip set bits
		;

	    if(nextpos == NUM_BITS) {			// filled everything?
		solution();
	    } else {
		USE_KLOTZ(i);
		solve(next, nextpos);			// loese weiter...
		FREE_KLOTZ(i);
	    }
	}
    }
}

int main() {
    int i;
    time_t start, end;

    setlinebuf(stdout);		// immediate write loglines into file output

    for(i=0; i<64; i++) {		// init predefined single bits
	bit_table[i] = ((bitvector) 1) << i;
    }

    for(i=0; i<NUM_KLOTZ; i++)
	build_variants(i);

    {   // OPTIONAL: print some statistics...
	int sum = 0;
	double total = 1.0;
	printf("VARIANTEN:\n");
	for(i=0; i<NUM_KLOTZ; i++) {
	    printf("  Klotz %2s: %i%s\n", kloetze[i].name, kloetze[i].total_num,
		   kloetze[i].less_mirrored ? " (nicht gespiegelt)" : "");
		    
	    total *= kloetze[i].total_num;
	    sum += kloetze[i].total_num;
	}
	printf("Loesungsraum: %.3e\n", total);
	printf("Anzahl Steinpositionen: %i\n\n", sum);
    }

    printf("LOESEN:\n");
    solutions = 0;

    time(&start);
    solve(0, 0);
    time(&end);

    printf("\nGesamte Loesungszahl: %i\n", solutions);
    printf("Dauer: %i s (est)\n", (int) (end-start));
    printf("Tests: %.0f\n", (double)tcnt);

    return 0;
}
