// $Id: MandelbrotSet.cpp 276 2007-04-30 09:30:44Z olau $

#include "stdafx.h"

#ifdef _DEBUG
#define NOP() __asm nop
#else
#define NOP()
#endif //_DEBUG


MandelbrotSet::MandelbrotSet()
: threshold(4.0), maxIterations(MandelbrotDefaultDepth), algorithm(C), scheme(NULL)
{
}


__int64
MandelbrotSet::renderLine_C(double re0, double re1, double im, COLORREF *scanLine, double scaleFactor)
{
    __int64 cumulatedIters = 0;
    __declspec(align(16)) double scale = scaleFactor;
    __declspec(align(16)) double _re0 = re0;
    __declspec(align(16)) double _re1 = re1 - scale; /* XXX */
    __declspec(align(16)) double re;
    __declspec(align(16)) double thres = threshold;
    for (re = _re0; re < _re1; re += scale)
    {
        __declspec(align(16)) double sum = 0;
        __declspec(align(16)) double x = 0;
        __declspec(align(16)) double y = 0;
        int i;
        __asm align 16;
        for (i = 0; i < maxIterations; ++i)
        {
            double xt = x * x - y * y + re;
            double yt = x * y + x * y + im;
            x = xt;
            y = yt;
            sum = x * x + y * y;
            if (sum > thres)
                break;
        }
        cumulatedIters += i;
        *scanLine++ = (i == maxIterations)? 0 : scheme->color(i); 
    }
    return cumulatedIters;
}


__int64
MandelbrotSet::renderLine_SSE2(double re0, double re1, double im, COLORREF *scanLine, double scaleFactor)
{
    __declspec(align(16)) double _re0 = re0;
    __declspec(align(16)) double _re1 = re1;
    __declspec(align(16)) double re;
    __declspec(align(16)) double thres[] = {threshold, threshold};
    __declspec(align(16)) double _cy = (double) im;
    __declspec(align(16)) double scale = scaleFactor;
    __int64 cumulatedIters = 0;
    const int maxIter = maxIterations;
    int iters;
    for (re = _re0; re < _re1; re += scale)
    {
        _asm
        {
		    movsd    xmm6, re                 // xmm6 = ?,re
		    shufpd   xmm6, xmm6, 0            // xmm6 = re,re
		    movapd   xmm7, _cy                // xmm7 = ?,im
		    shufpd   xmm7, xmm7, 0            // xmm7 = im,im
            movapd   xmm5, mmword ptr [thres] // xmm5 = 4,4
		    mov      ecx, [maxIter]
            align 16
        iterloop:
            // xmm0 = zx             xmm1 = zy
            movapd   xmm2, xmm0
            mulpd    xmm0, xmm0
            movapd   xmm3, xmm1
            addpd    xmm1, xmm1
            // xmm0 = zx^2           xmm1 = 2 * zy     xmm2 = zx           xmm3 = zy
            mulpd    xmm1, xmm2
            movapd   xmm2, xmm0
            mulpd    xmm3, xmm3
            // xmm0 = zx^2           xmm1 = 2*zy*zx    xmm2 = zx^2         xmm3 = zy^2
            addpd    xmm1, xmm7
            subpd    xmm0, xmm3
            addpd    xmm2, xmm3
            // xmm0 = zx^2 - zy^2    xmm1=2*zy*zx+py   xmm2 = zx^2 + zy^2  xmm3 = zy^2
            cmplepd  xmm2, xmm5
            addpd    xmm0, xmm6
            movmskpd eax, xmm2
            test     eax, eax
            jz       done
            andpd    xmm2, xmm5      // xmm4 += (xmm2 < threshold) ? threshold : 0.0;
            addpd    xmm4, xmm2
            dec      ecx
            jnz      iterloop
        done:
            mov      [iters], ecx
        }
        cumulatedIters += (maxIter - iters);
        *scanLine++ = (iters == 0)? 0 : scheme->color(maxIter - iters); 
    }
    return cumulatedIters;
}


__int64
MandelbrotSet::render(double reC, double imC, double scale, const BITMAP *bitmap, bool *restart)
{
    if (scheme == NULL)
        scheme = new ColorScheme;
    __int64 iters = 0;
    for (;;)
    {
        iters = 0;
        int y;
        const int width = bitmap->bmWidth;
        const int halfWidth = width / 2;
        const int height = bitmap->bmHeight;
        const int halfHeight = height / 2;
        const double re0 = reC - scale * halfWidth;
        const double re1 = reC + scale * halfWidth;
        for (y = 0; y < height; ++y)
        {
            if (*restart)
            {
                NOP();
                break;
            }
            COLORREF *scanLine = (COLORREF *) bitmap->bmBits + y * width;
            iters += renderLine_C(re0, re1, imC + scale * (y - halfHeight), scanLine, scale);
        }
        *restart = false;
        if (y == height)
            break;
    }
    return iters;
}
