/**
 * code_converter.c
 *
 * Converts one and two byte encoded characters to Unicode and UTF8
 *
 * Component: midw_common\CodeConversion
 *
 * Author: Ian Lisney
 *
 ***********************************************************************/

/***********************************************************************
 * Include Files
 ***********************************************************************/

#include "code_converter.h"
#include "CharMap.h"
#include "CharMapVar.h"
#include "language_converter.h"

/***********************************************************************
 * Local Definitions
 ***********************************************************************/
#define MAX_LENGTH_PER_CHARACTER      8
#define UNKNOWN_CHAR                  0x3F /* ? */
#define VAL_TWO                       0x02
#define HEX_VAL_FOURTY                0x40
#define HEX_VAL_THIRTY                0x30
#define HEX_VAL_THIRTY_NINE           0x39
#define SHIFT_8_BITS                  0x08
#define SHIFT_16_BITS                 0x10
#define UTF8_CHAR_LIMIT_MINUS_TW0     0xFE
#define SHIFT_6_BITS                  0x06
#define SHIFT_12_BITS                 0x0C
#define SHIFT_18_BITS                 0x12
#define SHIFT_24_BITS                 0x18
#define UTF8_UNICODE_LIMIT1           (tU32)0x80
#define UTF8_UNICODE_LIMIT2           (tU32)0x800
#define UTF16_MAX_BMP                 (tU32)0x0000FFFF /* Largest valid code point for a BMP value */
#define UTF16_MAX_UTF32               (tU32)0x7FFFFFFF

/***********************************************************************
 * Function Prototypes
 ***********************************************************************/

tU32 ConvertUnicodeChartoUTF8(tU8* sConverted,
                              tU32 uni_code);

MapIdEnum CharMap_getLanguageMapping (tenCodeConvMapping language);



/***********************************************************************
 * Converts an array of character codes to an array of unicode
 * character codes.
 * The output array must have the same or greater size than the input array.
 *
 * in  tU32* const*        startCodes    pointer to start of two byte chars
 * in  const tU32*         endCodes      pointer to end of two byte chars
 * in  tU32* const*        startUnicode  pointer to start of unicode codes
 * in  const tU32*         endUnicode    pointer to end of unicode codes
 * in  tenCodeConvMapping  language      current language
 * out tU32                              number of Unicode codes written
 ***********************************************************************/
tU32 ConvertCodestoUnicode(tU32* const*       startCodes,
                           const tU32*        endCodes,
                           tU32* const*       startUnicode,
                           const tU32*        endUnicode,
                           tenCodeConvMapping language)
{
   tU32 codes_converted = 0;
   tU32 i = 0;
   tU32 number_of_codes;
   tU32 output_size;
   tU32 converted_code = 0;
   MapIdEnum currentMap = {MAP_ISO_8859_01_V1_TO_UNICODE};
   const tU32* source = *startCodes;
   tU32* target = *startUnicode;

   /* calculate input and output sizes */
   number_of_codes = (tU32)(endCodes - source);
   output_size = (tU32)(endUnicode - target);

   if((number_of_codes > 0) && (number_of_codes <= output_size))
   {

      /* Select the map */
      currentMap = CharMap_getLanguageMapping (language);
      CharMap_vSelectMap(currentMap);

      for (i = 0; i < number_of_codes; ++i)
      {
         converted_code = CharMap_ulwTranspose((tU32)source[i]);
         if (converted_code == 0)
         {
            /* unknown characters */
            codes_converted++;
            target[i] = UNKNOWN_CHAR;
         }
         else
         {
            codes_converted++;
            target[i] = converted_code;
         }
      }
   }
   return codes_converted;
}

/***********************************************************************
 * Converts an array of Unicode character codes to a UTF8 coded string
 *
 * in  tU32* const*  start_unicode_array   pointer to start of unicode codes
 * in  const tU32*   end_unicode_array     pointer to end of unicode codes
 * in  tU8* const*   start_utf8_array      pointer to start of unicode codes
 * in  const tU8*    end_utf8_array        pointer to end of unicode codes
 * out tU32                                number of UTF8 charas written
 ***********************************************************************/
tU32 ConvertUnicodetoUTF8(tU32* const*  start_unicode_array,
                          const tU32*   end_unicode_array,
                          tU8* const*   start_utf8_array,
                          const tU8*    end_utf8_array)
{

   tU8 sUTF8Char[MAX_LENGTH_PER_CHARACTER];
   tU32 i  = 0;
   tU32 nLength = 0;
   tU32 nCurrentLength = 0;
   tBool break_loop = FALSE;
   tU32 number_of_codes;
   tU32 output_size;
   const tU32* source = *start_unicode_array;
   tU8* target = *start_utf8_array;

   /* calculate input and output sizes */
   number_of_codes = (tU32)(end_unicode_array - source);
   output_size = (tU32)(end_utf8_array - target);

   if((number_of_codes > 0))
   {
      target[0] = '\0'; /* make the string empty */

      for (i = 0; ((i < number_of_codes)&&(break_loop == FALSE)); ++i)
      {
         /* convert character to UTF8 string for this single character */
         nLength = ConvertUnicodeChartoUTF8((tU8*)sUTF8Char, source[i]);
         nCurrentLength += (tU32)nLength; /* check if output string size is big enough */
         if (nCurrentLength <= output_size)
         {
            /* append UTF8 character to string */
            strcat((char*)target, (const char*)sUTF8Char);
         }
         else
         {
            /* stop conversion because max output stringssize is reached (string is cutted) */
            nCurrentLength -= (tU32)nLength;
            break_loop = TRUE;
         }
      }
   }
   return nCurrentLength;
}

tS32 ConvertStringtoLanguageCodes(tU32*               codes,
                                  tU32               max_size_output,
                                  const tU8*         input_string,
                                  tU32               string_length,
                                  tenCodeConvMapping language)
{
   tU32 count = 0;
   tU8 b2 = 0;
   tU8 b1 = 0;
   tU8 b3 = 0;
   tU8 b4 = 0;
   tU32 te = 0;

   tU8* input_start = (tU8*)input_string;

   if((codes!= NULL) && (input_string != NULL))
   {
      /* Handle languages with 2 or 4 byte codes */
      if ( ( language == CODECONV_CHINESE_UNICODE_1_1 )  ||  /* Chinese Unicode 1.1 */
            ( language == CODECONV_WINDOWS_CODEPAGE_936 ) ||  /* Simplified Chinese GBK */
            ( language == CODECONV_WINDOWS_CODEPAGE_950 ) ||  /* Traditional Chinese Big5 */
            ( language == CODECONV_WINDOWS_CODEPAGE_951 ) ||  /* Traditional Chinese Big5 Extensions */
            ( language == CODECONV_WINDOWS_CODEPAGE_932) ||  /* Japanese */
            ( language == CODECONV_WINDOWS_CODEPAGE_949) )   /* Korean */
      {
         while (((tU32)(input_string - input_start) < string_length) && ((tU32)count < max_size_output))
         {
            b1=  (tU8)(*(input_string++));

            /* 0x00 to 0x7F is a valid single byte */
            if(b1 <= (UTF8_UNICODE_LIMIT1-1))
            {
               codes[count++] = b1; /* b1 is valid - code is single byte */
            }

            /* 0x81 to 0xFE is valid first byte of two or four byte code */
            else if (((UTF8_UNICODE_LIMIT1+1) <= b1) && (b1 <= UTF8_CHAR_LIMIT_MINUS_TW0))
            {
               b2 = (tU8)(*(input_string++));

               /* 0x40 to 0x7E or 0x80 to 0xFE is valid second byte of two byte code */
               if (((HEX_VAL_FOURTY <= b2) &&  (b2 <= (UTF8_UNICODE_LIMIT1- VAL_TWO))) || ((UTF8_UNICODE_LIMIT1 <= b2) && (b2 <= 0xFE)))
               {
                  codes[count++] = (b1 << SHIFT_8_BITS) + b2; /* b1 & b2 ok */
               }
               else
               {

                  /* Not a two byte code - check for four byte */
                  /* 0x30 to 0x39 is valid second byte of a four byte code */
                  if ((HEX_VAL_THIRTY <= b2) && (b2 <= HEX_VAL_THIRTY_NINE))
                  {
                     /* Four byte code */
                     b3 = (tU8)(*(input_string++));

                     /* Only invalid value for third byte is 0x00 */
                     if (b3 == 0)
                     {
                        /* b3 is invalid - throw away and attempt recovery */
                        codes[count - 1] = 0x00;
                        count =+ 2; /* step past b4 */
                     }
                     else
                     {
                        /* b1, b2, b3 & b4 ok */
                        b4 = (tU8)(*(input_string++));
                        te = b4; /* Temporary assigment to keep lint happy */
                        codes[count++] = ((te << SHIFT_24_BITS) +
                                          (b3 << SHIFT_16_BITS) +
                                          (b1 << SHIFT_8_BITS) +
                                          b2);
                     }
                  }
                  else
                  {
                     /* b2 is invalid - throw it away*/
                  }
               }
            }
            else
            {
               /* b1 is invalid - throw it away*/
            }
         }
         return (tS32)count;
      }
      else
      {
         /* Languages with single byte codes - just convert from 8 bit array to 32 bit */
         while (((tU32)(input_string - input_start) < string_length) && ((tU32)count < max_size_output))
         {
            codes [count++] = (tU32)(*(input_string++));
         }
         return (tS32)count;
      }
   }
   return (tS32)count;
}

/***********************************************************************
 * Converts a character code to the corresponding UTF8 string.
 * Output string must have a minimum size of 7 characters
 *
 * in  tU8*       sConverted    pointer to output array
 * in  tU32       c             character code to convert
 * out tU32                     number of UTF8 charas written
 *                              otherwise 0 if conversion fails
 ***********************************************************************/
tU32 ConvertUnicodeChartoUTF8(tU8* sConverted,
                              tU32 c)
{
   tU32 num_bytes = 0;

   if(sConverted != NULL)
   {
      if (c < UTF8_UNICODE_LIMIT1)
      {
         *(sConverted++) = (tU8)c;
         *sConverted     = '\0';
         return 1;
      }
      else if (c < UTF8_UNICODE_LIMIT2)
      {
         *(sConverted++) = (tU8)((c >> SHIFT_6_BITS) | 0xC0);
         *(sConverted++) = (tU8)((c & 0x3F) | UTF8_UNICODE_LIMIT1);
         *(sConverted++) = '\0';
         return 2;
      }
      else if (c <= UTF16_MAX_BMP)
      {
         *(sConverted++) =  (tU8)((c >> SHIFT_12_BITS) | 0xE0);
         *(sConverted++) =  (tU8)(((c >> SHIFT_6_BITS ) & 0x3F) | UTF8_UNICODE_LIMIT1);
         *(sConverted++) =  (tU8)((c & 0x3F) | UTF8_UNICODE_LIMIT1);
         *sConverted = '\0';
         return 3;
      }
      else if (c <= 0x1FFFFF)
      {
         *(sConverted++) =   (tU8)((c >> SHIFT_18_BITS) | 0xF0);
         *(sConverted++) =   (tU8)(((c >> SHIFT_12_BITS) & 0x3F) | UTF8_UNICODE_LIMIT1);
         *(sConverted++) =   (tU8)(((c >> SHIFT_6_BITS ) & 0x3F) | UTF8_UNICODE_LIMIT1);
         *(sConverted++) =   (tU8)((c & 0x3F)| UTF8_UNICODE_LIMIT1);
         *sConverted = '\0';
         return 4;
      }
      else if (c <= 0x3FFFFFF)
      {
         *(sConverted++) =  (tU8)((c >> SHIFT_24_BITS) | 0xF8);
         *(sConverted++) =  (tU8)(((c >> SHIFT_18_BITS) & 0x3F) | UTF8_UNICODE_LIMIT1);
         *(sConverted++) =  (tU8)(((c >> SHIFT_12_BITS) & 0x3F) | UTF8_UNICODE_LIMIT1);
         *(sConverted++) =  (tU8)(((c >> SHIFT_6_BITS) & 0x3F) | UTF8_UNICODE_LIMIT1);
         *(sConverted++) =  (tU8)((c & 0x3F)| UTF8_UNICODE_LIMIT1);
         *sConverted = '\0';
         return 5;
      }
      else if (c <= UTF16_MAX_UTF32)
      {
         *(sConverted++) =  (tU8)((c >> 30) | 0xFC);
         *(sConverted++) =  (tU8)(((c >> SHIFT_24_BITS) & 0x3F) | UTF8_UNICODE_LIMIT1);
         *(sConverted++) =  (tU8)(((c >> SHIFT_18_BITS) & 0x3F) | UTF8_UNICODE_LIMIT1);
         *(sConverted++) =  (tU8)(((c >> SHIFT_12_BITS) & 0x3F) | UTF8_UNICODE_LIMIT1);
         *(sConverted++) =  (tU8)(((c >> SHIFT_6_BITS) & 0x3F) | UTF8_UNICODE_LIMIT1);
         *(sConverted++) =  (tU8)((c & 0x3F)| UTF8_UNICODE_LIMIT1);
         *sConverted = 0;
         return 6;
      }
      else
      {
         /* character out of range and can't be converted */
         *sConverted = '\0';
         return 0;
      }
   }
   else
   {
      num_bytes = 0;
   }
   return num_bytes;

}
