//########################################################################
// (C) Socionext Embedded Software Austria GmbH (SESA)
// All rights reserved.
// -----------------------------------------------------
// This document contains proprietary information belonging to
// Socionext Embedded Software Austria GmbH (SESA).
// Passing on and copying of this document, use and communication
// of its contents is not permitted without prior written authorization.
//########################################################################

#include <CanderaPlatform/Device/Common/BitmapConverter/ClutConverter.h>
#include <CanderaPlatform/Device/Common/BitmapConverter/NeuQuant.h>
#include <CanderaPlatform/Device/Common/BitmapConverter/MedianCut.h>

#include <CanderaPlatform/Device/Common/BitmapConverter/GenericBitmapBaseConvertor.h>
#include <CanderaPlatform/Device/Common/BitmapConverter/GenericBitmapFormat.h>

namespace Candera {
    namespace ClutBitmapConverter {

        using namespace FeatStd::Internal;

        const UInt32 c_bytesPerPixel = 4;
        //const UInt32 c_bitsPerByte = 8;

        const UInt32 c_bufferSizeLocation           = 0;
        const UInt32 c_bufferClutOffsetLocation     = 1;
        const UInt32 c_bufferIdxOffsetLocation      = 2;
        const UInt32 c_bufferColorSizeLocation      = 3;
        const UInt32 c_bufferColorOffsetLocation    = 4;
        const UInt32 c_bufferColorBppLocation       = 5;
        const UInt32 c_bufferClutDataLocation       = 6;


        const UInt32 c_bufferHeaderSize = 28;

        bool Convert(
            UInt32 dstWidth, UInt32 dstHeight,
            //Int dstPixelFormat,
            //Bitmap::PackAlignment dstColorPackAllignment,
            UInt8* dstBuffer, UInt32* dstBufferSize,
            const GenericBitmapFormat* dstExtProp,
            UInt32 clutData,
            UInt32 srcWidth, UInt32 srcHeight,
            Int srcPixelFormat,
            Bitmap::PackAlignment srcColorPackAllignment,
            const UInt8* srcBuffer, UInt32 srcBufferSize,
            const GenericBitmapFormat* srcExtProp,
            GenericBitmapConvertor::PixelConversionType conversionType)
        {
            //First convert source from srcPixelFormat, srcExtProp, srcColorPackAllignment, srcWidth, srcHeight
            // to r8g8a8b8 and dstWidth, dstHeight;

            UInt32 intermediateBufferSize = dstWidth*dstHeight * c_bytesPerPixel;

            UInt8* intermediateBuffer = ALLOC_ARRAY(UInt8, intermediateBufferSize);

            GenericBitmapFormat intermediateProperties;

            bool ret = GenericBitmapConvertor::Convert(
                dstWidth, dstHeight,
                Bitmap::RgbaUnsignedBytePixelFormat,
                Bitmap::PackAlignment1,
                intermediateBuffer, &intermediateBufferSize,
                &intermediateProperties,
                srcWidth, srcHeight,
                srcPixelFormat,
                srcColorPackAllignment,
                srcBuffer, srcBufferSize,
                srcExtProp,
                conversionType
                );

            if (!ret) {
                DELETE_ARRAY(intermediateBuffer);
                return false;
            }

            ret = ClutConvert(
                dstWidth, dstHeight,
                dstBuffer, dstBufferSize,
                dstExtProp,
                clutData,
                dstWidth, dstHeight,
                intermediateBuffer, intermediateBufferSize
                );

            DELETE_ARRAY(intermediateBuffer);
            return ret;

        }

        //encode
        bool ClutConvert(
            UInt32 dstWidth, UInt32 dstHeight,
            UInt8* dstBuffer, UInt32* dstBufferSize,
            const GenericBitmapFormat* dstExtProp,
            UInt32 clutData,
            UInt32 srcWidth, UInt32 srcHeight,
            const UInt8* srcBuffer, UInt32 srcBufferSize

            )
        {
            ClutParameters clutParameters(clutData, dstExtProp);

            UInt32 bufferSizeBytes = c_bufferHeaderSize + clutParameters.m_clutColorsCount*clutParameters.m_clutColorBpp / 8 + dstWidth*dstHeight*clutParameters.m_clutIdxBytes;
            *dstBufferSize = bufferSizeBytes;
            //3 UInt32: size, clutOffset, idxOffset, 
            //3 UInt32: colorSizes, colorOffset, table color bpp, clutData - 32-bit masks
            //table of colors, dstW*dstH indexes;

            //first, reduce the set of 8888 colors to the set of target colors scaled to 8888;
            //could probably do fine without this step..?
            UInt8* intermediateBuffer = ALLOC_ARRAY(UInt8, srcBufferSize);
            for (UInt32 i = 0; i < srcBufferSize / c_bytesPerPixel; i++) {
                intermediateBuffer[c_bytesPerPixel * i + 0] = (srcBuffer[c_bytesPerPixel * i + 0] >> (8 - clutParameters.m_clutColorSizeAlpha)) << (8 - clutParameters.m_clutColorSizeAlpha);
                intermediateBuffer[c_bytesPerPixel * i + 1] = (srcBuffer[c_bytesPerPixel * i + 1] >> (8 - clutParameters.m_clutColorSizeBlue)) << (8 - clutParameters.m_clutColorSizeBlue);
                intermediateBuffer[c_bytesPerPixel * i + 2] = (srcBuffer[c_bytesPerPixel * i + 2] >> (8 - clutParameters.m_clutColorSizeGreen)) << (8 - clutParameters.m_clutColorSizeGreen);
                intermediateBuffer[c_bytesPerPixel * i + 3] = (srcBuffer[c_bytesPerPixel * i + 3] >> (8 - clutParameters.m_clutColorSizeRed)) << (8 - clutParameters.m_clutColorSizeRed);
            }
            
            UInt8* clutBuffer = ALLOC_ARRAY(UInt8, static_cast<SizeType>(clutParameters.m_clutColorsCount*c_bytesPerPixel));
            GetClut(clutBuffer, clutParameters.m_clutColorsCount, intermediateBuffer /*srcBuffer*/, srcBufferSize, clutParameters.m_clutAlgorithm);

            UInt16* indexBuffer = ALLOC_ARRAY(UInt16, static_cast<SizeType>(dstWidth*dstHeight));

            if (clutParameters.m_clutDithering == 0) {
                GetIndexBuffer(clutBuffer, clutParameters.m_clutColorsCount, indexBuffer, srcWidth, srcHeight, srcBuffer, clutParameters.m_clutIdxAlphaBits, clutParameters.m_bpp);
            }
            else {
                GetDitheredIndexBuffer(clutBuffer, clutParameters.m_clutColorsCount, indexBuffer, srcWidth, srcHeight, srcBuffer, clutParameters.m_clutIdxAlphaBits, clutParameters.m_bpp);
            }

            UInt32 indexOffset = c_bufferHeaderSize + clutParameters.m_clutColorsCount*clutParameters.m_clutColorBpp / 8;
            UInt32 colorSize = dstExtProp->GetSize();
            UInt32 colorOffset = dstExtProp->GetOffset();
            UInt32 colorBpp = dstExtProp->GetBitsPerPixel();

            Memory::Copy(dstBuffer + c_bufferSizeLocation *       sizeof(UInt32), &bufferSizeBytes,       sizeof(UInt32));
            Memory::Copy(dstBuffer + c_bufferClutOffsetLocation * sizeof(UInt32), &c_bufferHeaderSize,    sizeof(UInt32));
            Memory::Copy(dstBuffer + c_bufferIdxOffsetLocation *  sizeof(UInt32), &indexOffset,           sizeof(UInt32));
            Memory::Copy(dstBuffer + c_bufferColorSizeLocation *  sizeof(UInt32), &colorSize,             sizeof(UInt32));
            Memory::Copy(dstBuffer + c_bufferColorOffsetLocation* sizeof(UInt32), &colorOffset,           sizeof(UInt32));
            Memory::Copy(dstBuffer + c_bufferColorBppLocation *   sizeof(UInt32), &colorBpp,              sizeof(UInt32));
            Memory::Copy(dstBuffer + c_bufferClutDataLocation *   sizeof(UInt32), &clutData,              sizeof(UInt32));


            UInt8* p = dstBuffer + c_bufferHeaderSize;
            for (UInt32 i = 0; i < clutParameters.m_clutColorsCount; i++) {
                UInt8 a = static_cast<UInt8*>(clutBuffer)[c_bytesPerPixel * i + 0];
                UInt8 b = static_cast<UInt8*>(clutBuffer)[c_bytesPerPixel * i + 1];
                UInt8 g = static_cast<UInt8*>(clutBuffer)[c_bytesPerPixel * i + 2];
                UInt8 r = static_cast<UInt8*>(clutBuffer)[c_bytesPerPixel * i + 3];
                
                a = a >> (8 - clutParameters.m_clutColorSizeAlpha);
                b = b >> (8 - clutParameters.m_clutColorSizeBlue);
                g = g >> (8 - clutParameters.m_clutColorSizeGreen);
                r = r >> (8 - clutParameters.m_clutColorSizeRed);

                if (clutParameters.m_clutColorBpp > 16) {

                    UInt32 container = a;
                    container = (container << clutParameters.m_clutColorSizeBlue) + b;
                    container = (container << clutParameters.m_clutColorSizeGreen) + g;
                    container = (container << clutParameters.m_clutColorSizeRed) + r;


                    for (UInt8 bytes = 0; bytes < clutParameters.m_clutColorBpp / 8; bytes++) {
                        UInt32 dividend = container >> (clutParameters.m_clutColorBpp - 8 * (bytes + 1));
                        UInt32 divisor = static_cast<UInt32>(1) << 8;
                        *p = static_cast<UInt8>(dividend % divisor);
                        p++;
                    }
                }
                else {
                    UInt16 container = a;
                    container = static_cast<UInt16>(container << clutParameters.m_clutColorSizeRed) + r;
                    container = static_cast<UInt16>(container << clutParameters.m_clutColorSizeGreen) + g;
                    container = static_cast<UInt16>(container << clutParameters.m_clutColorSizeBlue) + b;

                    container = static_cast<UInt16>(container << 8) | static_cast<UInt16>(container >> 8U);

                    for (UInt8 bytes = 0; bytes < clutParameters.m_clutColorBpp / 8; bytes++) {
                        UInt16 dividend = container >> (clutParameters.m_clutColorBpp - 8 * (bytes + 1));
                        UInt16 divisor = static_cast<UInt16>(static_cast<UInt16>(1) << 8);
                        *p = static_cast<UInt8>(dividend % divisor);
                        p++;
                    }

                }


            }

            for (UInt32 i = 0; i < srcWidth*srcHeight; i++) {
                //for (int bytes = 0; bytes < clutParameters.m_clutIdxBytes; bytes++)
                for (Int8 bytes = static_cast<Int8>(clutParameters.m_clutIdxBytes - 1); bytes >= 0; bytes--) {
                    UInt16 dividend = indexBuffer[i] >> (8 * ((clutParameters.m_clutIdxBytes - 1) - bytes));
                    UInt16 divisor = static_cast<UInt16>(static_cast<UInt16>(1) << 8);
                    *p = static_cast<UInt8>(dividend % divisor);
                    p++;
                }
                
            }

            DELETE_ARRAY(intermediateBuffer);
            DELETE_ARRAY(clutBuffer);
            DELETE_ARRAY(indexBuffer);

            return true;
        }

        //decode 
        bool Convert(
            UInt32 dstWidth, UInt32 dstHeight,
            Int dstPixelFormat,
            Bitmap::PackAlignment dstColorPackAllignment,
            UInt8* dstBuffer, UInt32* dstBufferSize,
            const GenericBitmapFormat* dstExtProp,
            UInt32 srcWidth, UInt32 srcHeight,
            const UInt8* srcBuffer)
        {
            return DecodeFromClut(dstWidth, dstHeight,
                dstPixelFormat,
                dstColorPackAllignment,
                dstBuffer, dstBufferSize,
                dstExtProp,
                srcWidth, srcHeight,
                srcBuffer);
        }

        void GetNeuQuantClut(UInt8* clutBuffer, UInt32 clutCount, const UInt8* srcBuffer, UInt32 srcBufferSize)
        {
            UInt8* neuQuantRGBBuffer = ALLOC_ARRAY(UInt8, static_cast<SizeType>(3 * srcBufferSize / c_bytesPerPixel));
            UInt8* p = neuQuantRGBBuffer;
            for (UInt32 i = 0; i < srcBufferSize; i++) {
                if (i %  4!= 0) { // not copying alpha; 
                    *p = srcBuffer[i];
                    p++;
                }

            }

            NeuQuant nq(clutCount);

            nq.InitNet(neuQuantRGBBuffer, static_cast<Int32>(3 * srcBufferSize / c_bytesPerPixel), 1);
            nq.Learn();
            //nq.unbiasnet();

            nq.WriteColorMap(clutBuffer);
            DELETE_ARRAY(neuQuantRGBBuffer);
        }

        void GetMedianCutClut(UInt8* clutBuffer, UInt32 clutCount, const UInt8* srcBuffer, UInt32 srcBufferSize)
        {
            UInt32 numPoints = srcBufferSize / c_bytesPerPixel;
            Point* points = ALLOC_ARRAY(Point, numPoints);

            for (UInt32 i = 0; i < numPoints; i++) {
                for (UInt32 j = 0; j < c_bytesPerPixel; j++) {
                    points[i].x[j] = srcBuffer[c_bytesPerPixel * i + j];
                }
            }

            VECTOR(Point) palette =
                MedianCut(points, numPoints, clutCount);

            VECTOR_ITERATOR(Point) iter;
            UInt32 clutIdx = 0;
            for (iter = VECTOR_BEGIN(palette); iter != VECTOR_END(palette); iter++) {
                Point p = *iter;
                for (UInt32 j = 0; j < c_bytesPerPixel; j++) {
                    clutBuffer[clutIdx * c_bytesPerPixel + j] = p.x[j];
                }
                clutIdx++;
            }
            DELETE_ARRAY(points);
        }

        void GetClut(UInt8* clutBuffer, UInt32 clutCount, const UInt8* srcBuffer, UInt32 srcBufferSize, UInt32 clutAlgorithm)
        {
            switch (clutAlgorithm) {
                case c_NeuQuantAlgorithm:
                    GetNeuQuantClut(clutBuffer, clutCount, srcBuffer, srcBufferSize);
                    break;
                case c_MedianCutAlgorithm:
                    GetMedianCutClut(clutBuffer, clutCount, srcBuffer, srcBufferSize);
                    break;
                default:
                    break;
            }
            
        }

        UInt16 GetIndexForColor(UInt8 r, UInt8 g, UInt8 b, UInt8 a, const UInt8* clutBuffer, UInt32 clutCount, UInt8 indexAlphaSize, UInt8 bpp)
        {
            UInt32 minSquareDistance = 4 * 255 * 255;
            UInt16 minIdx = 0;
            for (UInt32 i = 0; i < clutCount; i++) {
                UInt8 ac = clutBuffer[c_bytesPerPixel * i + 0];
                UInt8 bc = clutBuffer[c_bytesPerPixel * i + 1];
                UInt8 gc = clutBuffer[c_bytesPerPixel * i + 2];
                UInt8 rc = clutBuffer[c_bytesPerPixel * i + 3];

                if (indexAlphaSize > 0) {
                    ac = a;
                }

                UInt32 squareDistance = static_cast<UInt32>((a - ac)*(a - ac) + (b - bc)*(b - bc) + (g - gc)*(g - gc) + (r - rc)*(r - rc));
                if (squareDistance < minSquareDistance) {
                    minSquareDistance = squareDistance;
                    minIdx = static_cast<UInt16>(i);
                }
            }

            if (indexAlphaSize > 0) {
                a = a >> (8U - indexAlphaSize);
                //minIdx = (minIdx << indexAlphaSize) + a;
                minIdx = static_cast<UInt16>(static_cast<UInt16>(a) << (bpp-indexAlphaSize)) + minIdx;
            }
            return minIdx;
        }

        void GetIndexBuffer(const UInt8* clutBuffer, UInt32 clutCount, UInt16* indexBuffer,
            UInt32 width, UInt32 height, const UInt8* srcBuffer, UInt8 idxAlphaSize, UInt8 bpp)
        {

            for (UInt32 y = 0; y < height; y++) {
                for (UInt32 x = 0; x < width; x++) {
                    UInt8 a = srcBuffer[x * c_bytesPerPixel + y*width * c_bytesPerPixel + 0];
                    UInt8 b = srcBuffer[x * c_bytesPerPixel + y*width * c_bytesPerPixel + 1];
                    UInt8 g = srcBuffer[x * c_bytesPerPixel + y*width * c_bytesPerPixel + 2];
                    UInt8 r = srcBuffer[x * c_bytesPerPixel + y*width * c_bytesPerPixel + 3];

                    indexBuffer[x + y*width] = GetIndexForColor(r, g, b, a, clutBuffer, clutCount, idxAlphaSize, bpp);
                }
            }
        }

        Float GetErrorComponent(const Int16* errorBuffer, UInt32 x, UInt32 y, UInt32 width, UInt8 componentIdx)
        {
            return  (1.0F / 16.0F)*static_cast<Float>(errorBuffer[(x - 1)*c_bytesPerPixel + (y - 1)*width * c_bytesPerPixel + componentIdx]) +
                    (5.0F / 16.0F)*static_cast<Float>(errorBuffer[      x*c_bytesPerPixel + (y - 1)*width * c_bytesPerPixel + componentIdx]) +
                    (3.0F / 16.0F)*static_cast<Float>(errorBuffer[(x + 1)*c_bytesPerPixel + (y - 1)*width * c_bytesPerPixel + componentIdx]) +
                    (7.0F / 16.0F)*static_cast<Float>(errorBuffer[(x - 1)*c_bytesPerPixel +             y * width * c_bytesPerPixel + componentIdx]);
        }

        UInt8 capValue(Int16 x)
        { 
            if (x < 0) {
                return 0;
            }

            if (x>0xff) {
                return 0xff;
            }

            return static_cast<UInt8>(x);
        }

        void GetDitheredIndexBuffer(const UInt8* clutBuffer, UInt32 clutCount, UInt16* indexBuffer,
            UInt32 width, UInt32 height, const UInt8* srcBuffer, UInt8 idxAlphaSize, UInt8 bpp)
        {

            Int16* error = ALLOC_ARRAY(Int16, static_cast<SizeType>(width*height * 4));

            for (UInt32 y = 0; y < height; y++) {
                for (UInt32 x = 0; x < width; x++) {
                    UInt8 a = srcBuffer[x * c_bytesPerPixel + y*width * c_bytesPerPixel + 0];
                    UInt8 b = srcBuffer[x * c_bytesPerPixel + y*width * c_bytesPerPixel + 1];
                    UInt8 g = srcBuffer[x * c_bytesPerPixel + y*width * c_bytesPerPixel + 2];
                    UInt8 r = srcBuffer[x * c_bytesPerPixel + y*width * c_bytesPerPixel + 3];

                    UInt32 idx = 0;

                    if ((x == 0) || (y == 0) || (x == width - 1)) {
                        idx = GetIndexForColor(r, g, b, a, clutBuffer, clutCount, idxAlphaSize, bpp);
                    }
                    else {
                        //Float alphaError = GetErrorComponent(error, x, y, width, 0);
                        Float blueError = GetErrorComponent(error, x, y, width, 1);
                        Float greenError = GetErrorComponent(error, x, y, width, 2);
                        Float redError = GetErrorComponent(error, x, y, width, 3);

                        //Int16 ca = a + alphaError;
                        Int16 cb = b + static_cast<Int16>(blueError);
                        Int16 cg = g + static_cast<Int16>(greenError);
                        Int16 cr = r + static_cast<Int16>(redError);

                        // a = capValue(ca);
                        b = capValue(cb);
                        g = capValue(cg);
                        r = capValue(cr);

                        idx = GetIndexForColor(r, g, b, a, clutBuffer, clutCount, idxAlphaSize, bpp);
                    }

                    indexBuffer[x + y*width] = static_cast<UInt16>(idx);
                    //idx >>= idxAlphaSize;
                    idx = idx % static_cast<UInt32>(static_cast<UInt32>(1) << (bpp - idxAlphaSize));
                    error[x*c_bytesPerPixel + y*width * c_bytesPerPixel + 0] = static_cast<Int16>(a - clutBuffer[c_bytesPerPixel * idx + 0]);
                    error[x*c_bytesPerPixel + y*width * c_bytesPerPixel + 1] = static_cast<Int16>(b - clutBuffer[c_bytesPerPixel * idx + 1]);
                    error[x*c_bytesPerPixel + y*width * c_bytesPerPixel + 2] = static_cast<Int16>(g - clutBuffer[c_bytesPerPixel * idx + 2]);
                    error[x*c_bytesPerPixel + y*width * c_bytesPerPixel + 3] = static_cast<Int16>(r - clutBuffer[c_bytesPerPixel * idx + 3]);

                }
            }

            DELETE_ARRAY(error);

        }

        UInt8 GetNormalisedValue(UInt8 actualValue, UInt8 bpp)
        {
            if (bpp == 0) {
                return 0;
            }

            UInt8 maxVal = 0;
            for (UInt8 i = 0; i < bpp; i++) {
                maxVal = static_cast<UInt8>(maxVal << 1) | 1U;
            }

            CANDERA_SUPPRESS_LINT_FOR_NEXT_EXPRESSION(795, "Variable maxVal is only 0 when this line is reached if bpp is smaller than zero.");
            UInt32 res = static_cast<UInt32>(actualValue * 0xff / maxVal);
            return static_cast<UInt8>(res);
        }

        bool DecodeFromClut(
            UInt32 dstWidth, UInt32 dstHeight,
            Int dstPixelFormat,
            Bitmap::PackAlignment dstColorPackAllignment,
            UInt8* dstBuffer, UInt32* dstBufferSize,
            const GenericBitmapFormat* dstExtProp,
            UInt32 srcWidth, UInt32 srcHeight,
            const UInt8* srcBuffer
            )
        {

            UInt32 clutOffset = 0;
            Memory::Copy(&clutOffset, srcBuffer + c_bufferClutOffsetLocation * sizeof(UInt32), sizeof(UInt32));
            UInt32 idxOffset = 0;
            Memory::Copy(&idxOffset, srcBuffer + c_bufferIdxOffsetLocation * sizeof(UInt32), sizeof(UInt32));
            UInt32 colorSize = 0;
            Memory::Copy(&colorSize, srcBuffer + c_bufferColorSizeLocation * sizeof(UInt32), sizeof(UInt32));
            UInt32 colorOffsets = 0;
            Memory::Copy(&colorOffsets, srcBuffer + c_bufferColorOffsetLocation * sizeof(UInt32), sizeof(UInt32));
            UInt32 colorBpp = 0;
            Memory::Copy(&colorBpp, srcBuffer + c_bufferColorBppLocation * sizeof(UInt32), sizeof(UInt32));
            UInt32 clutData = 0;
            Memory::Copy(&clutData, srcBuffer + c_bufferClutDataLocation * sizeof(UInt32), sizeof(UInt32));

            GenericBitmapFormat tableFormat(static_cast<UInt8>(colorBpp), colorOffsets, colorSize);
            ClutParameters clutParams(clutData, &tableFormat);


            UInt32 intermediateBufferSize = srcWidth*srcHeight * 4;
            UInt8* intermediateBuffer = ALLOC_ARRAY(UInt8, intermediateBufferSize);
            GenericBitmapFormat intermediateProperties;

            const UInt8* clutStart = srcBuffer + clutOffset;
            const UInt8* idxStart = srcBuffer + idxOffset;

            for (UInt32 y = 0; y < srcHeight; y++) {
                for (UInt32 x = 0; x < srcWidth; x++) {

                    UInt16 colorIdx = 0;

                    //for (int i = 0; i < clutParams.m_clutIdxBytes; i++)
                    for (Int8 i = static_cast<Int8>(clutParams.m_clutIdxBytes - 1); i >= 0; i--) {
                        colorIdx = static_cast<UInt16>(colorIdx << 8) + idxStart[clutParams.m_clutIdxBytes*(x + y*srcWidth) + i];
                    }
                    //idxAlpha = colorIdx % (1 << clutParams.m_clutIdxAlphaBits);
                    //colorIdx = colorIdx >> clutParams.m_clutIdxAlphaBits;
                    UInt16 idxAlpha = colorIdx >> (clutParams.m_channelSize);
                    colorIdx = static_cast<UInt16>(static_cast<UInt32>(colorIdx) % (static_cast<UInt32>(1) << clutParams.m_channelSize));


                    UInt32 color = 0;
                    for (UInt8 i = 0; i < static_cast<Int>(clutParams.m_clutColorBpp / 8); i++) {
                        UInt16 index = colorIdx * clutParams.m_clutColorBpp / 8 + i;
                        color = (color << 8U) + clutStart[index];
                    }

                    color = color % static_cast<UInt32>(static_cast<UInt32>(1) << clutParams.m_clutColorBpp);
                    //breaking
                    UInt8 r = 0;
                    UInt8 g = 0;
                    UInt8 b = 0;
                    UInt8 a = 0;
                    if (clutParams.m_clutColorBpp == 16) {

                        UInt16 cl16 = static_cast<UInt16>(color);
                        cl16 = static_cast<UInt16>(cl16 << 8U) | static_cast<UInt16>(cl16 >> 8U);
                        color = cl16;

                        UInt8 sr = static_cast<UInt8>(color % static_cast<UInt32>(static_cast<UInt32>(1) << clutParams.m_clutColorSizeRed));
                        color >>= clutParams.m_clutColorSizeRed;
                        UInt8 sg = static_cast<UInt8>(color % static_cast<UInt32>(static_cast<UInt32>(1) << clutParams.m_clutColorSizeGreen));
                        color >>= clutParams.m_clutColorSizeGreen;
                        UInt8 sb = static_cast<UInt8>(color % static_cast<UInt32>(static_cast<UInt32>(1) << clutParams.m_clutColorSizeBlue));
                        color >>= clutParams.m_clutColorSizeBlue;
                        UInt8 sa = static_cast<UInt8>(color % static_cast<UInt32>(static_cast<UInt32>(1) << clutParams.m_clutColorSizeAlpha));

                        r = sb;
                        g = sg;
                        b = sr;
                        a = sa;
                    }
                    else {
                        r = static_cast<UInt8>(color % static_cast<UInt32>(static_cast<UInt32>(1) << clutParams.m_clutColorSizeRed));
                        color >>= clutParams.m_clutColorSizeRed;
                        g = static_cast<UInt8>(color % static_cast<UInt32>(static_cast<UInt32>(1) << clutParams.m_clutColorSizeGreen));
                        color >>= clutParams.m_clutColorSizeGreen;
                        b = static_cast<UInt8>(color % static_cast<UInt32>(static_cast<UInt32>(1) << clutParams.m_clutColorSizeBlue));
                        color >>= clutParams.m_clutColorSizeBlue;
                        a = static_cast<UInt8>(color % static_cast<UInt32>(static_cast<UInt32>(1) << clutParams.m_clutColorSizeAlpha));
                    }

                    /*
                    UInt8 a = clutStart[4 * colorIdx + 0];
                    UInt8 b = clutStart[4 * colorIdx + 1];
                    UInt8 g = clutStart[4 * colorIdx + 2];
                    UInt8 r = clutStart[4 * colorIdx + 3];
                    */
                    if (clutParams.m_clutColorSizeAlpha == 0) {
                        if (clutParams.m_clutIdxAlphaBits == 0) {
                            intermediateBuffer[x * c_bytesPerPixel + y*srcWidth * c_bytesPerPixel + 0] = 0xff;
                        }
                        else {
                            intermediateBuffer[x * c_bytesPerPixel + y*srcWidth * c_bytesPerPixel + 0] = GetNormalisedValue(static_cast<UInt8>(idxAlpha), clutParams.m_clutIdxAlphaBits);
                        }
                    }
                    else {
                        intermediateBuffer[x * c_bytesPerPixel + y*srcWidth * c_bytesPerPixel + 0] = GetNormalisedValue(a, clutParams.m_clutColorSizeAlpha);
                    }

                    intermediateBuffer[x * c_bytesPerPixel + y*srcWidth * c_bytesPerPixel + 1] = GetNormalisedValue(b, clutParams.m_clutColorSizeBlue);
                    intermediateBuffer[x * c_bytesPerPixel + y*srcWidth * c_bytesPerPixel + 2] = GetNormalisedValue(g, clutParams.m_clutColorSizeGreen);
                    intermediateBuffer[x * c_bytesPerPixel + y*srcWidth * c_bytesPerPixel + 3] = GetNormalisedValue(r, clutParams.m_clutColorSizeRed);

                }
            }

            bool ret = GenericBitmapConvertor::Convert(
                dstWidth, dstHeight,
                dstPixelFormat,
                dstColorPackAllignment,
                dstBuffer, dstBufferSize,
                dstExtProp,
                srcWidth, srcHeight,
                Bitmap::RgbaUnsignedBytePixelFormat,
                Bitmap::PackAlignment1,
                intermediateBuffer, intermediateBufferSize,
                &intermediateProperties
                );

            DELETE_ARRAY(intermediateBuffer);

            return ret;
        }

    } //ClutBitmapConverter
} //Candera

