/**
 * language_converter.c
 *
 * Converts language strings to UTF8
 *
 * Component: midw_common\CodeConversion
 *
 * Author: Ian Lisney
 *

 *----------------------------------------------------------------------------
 * Description       : see header file.
 * 17/09/2014| Tapeswar Puhan| fix for NIKAI2-6013	, '' at end of string
 *----------------------------------------------------------------------------**/
 
/***********************************************************************
 * Include Files
 ***********************************************************************/

#include "language_converter.h"
#include "code_converter.h"
#include <stdio.h> /*lint !e451 */ /* Warning 451: Repeated include file '...' */

#include "CodeConvTrace.h"
#ifdef VARIANT_S_FTR_ENABLE_TRC_GEN
#define ETG_DEFAULT_TRACE_CLASS CODECONV_TRACE_CLASS_LANGUAGE_CONVERTER
#include "trcGenProj/Header/language_converter.c.trc.h"
#endif

tU8 strcat_total = 0;
char strcat_string[80];

/* Debug/trace print functions */

void trace_output (const char* op_string)
{
   if (op_string != NULL)
   {
#ifdef CODECONV_DEBUG
      (void) fprintf (stderr, "%s", op_string);
#endif /* CODECONV_DEBUG */

      ETG_TRACE_COMP(("language_converter: %s", op_string));
   }
}

void trace_start (const char* o_string)
{
   trace_output (o_string);
   strcat_total = 0;
}

void trace_body (const char* o_string)
{
   strcat_total++;
   (void) strcat (strcat_string, o_string);
   if (strcat_total == 10)
   {
      (void) strcat (strcat_string, "\n");
      trace_output (strcat_string);
      strcat_string[0] = 0;
      strcat_total = 0;
   }
}

void trace_end ()
{
   (void) strcat (strcat_string, "\n");
   trace_output (strcat_string);
   strcat_string[0] = 0;
   strcat_total = 0;
}

void tU8_to_hex (const tU8* start_s, const tU8* end_s)
{
   tU16 i = 0;
   char o_buffer[4];
   while ((i < (tU16)(end_s - start_s)) && i<(CODECONV_MAX_INPUTLENGTH * 4))
   {
      (void) sprintf (o_buffer," %x",start_s[i++]);
      trace_body (o_buffer);
   }
   trace_end ();
}

void tU16_to_hex (const tU16* start_s, const tU16* end_s)
{
   tU16 i = 0;
   char o_buffer[8];
   while ((i < (tU16)(end_s - start_s)) && i<CODECONV_MAX_INPUTLENGTH)
   {
      (void) sprintf (o_buffer," %x",start_s[i++]);
      trace_body (o_buffer);
   }
   trace_end ();
}

void tU32_to_hex (const tU32* start_s, const tU32* end_s)
{
   int i = 0;
   char o_buffer[16];
   while ((i < (tU16)(end_s - start_s)) && i<CODECONV_MAX_INPUTLENGTH)
   {
      (void) sprintf (o_buffer," %lx",start_s[i++]);
      trace_body (o_buffer);
   }
   trace_end ();
}

/***********************************************************************
 * Function Prototypes
 ***********************************************************************/

/***********************************************************************
 * Check that the next 'number_of_items' items in the string (starting from position str[start_index])
 * are in the range 'lower' to 'upper'. Range includes endpoints.
 *
 * in  tU8*         str  pointer to UTF8 string to be checked
 * in  tU32         start_index  position in the string to start comparison
 * in  tU32         targetStart  pointer to output array
 * in  const tU8*          targetEnd    pointer to end of output array
 * in  tenCodeConvMapping  language     language coding identifier
 * out tU32                             -1 if failure
 ***********************************************************************/
tBool checkNextN( const tU8* str,
                  tU32 start_index,
                  tU32 number_of_items,
                  unsigned char lower,
                  unsigned char upper )
{
   tU32 end_index;
   tU32 i;

   end_index = number_of_items + start_index;
   i = start_index;

   for( ; i < end_index && str[i] != '\0'; ++i)
   {
      if( (str[i] < lower) || (str[i] > upper ))
      {
         return 0;
      }
   }
   return 1;
}

/***********************************************************************
 * Check whether input string is validUTF8.
 *
 * Handles 1, 2,3 and 4 byte sequences.
 * No recovery from errors in the input string
 *
 * in  const tU8*          input_string   pointer to string to check
 * in  tU32                string_length  count of characters in string
 * out tBool               TRUE if string is UTF8, FALSE if not
 ***********************************************************************/
tBool CheckStringIsUTF8( const tU8* input_string, tU32 string_length )
{
   tU32 i = 0;

   for( ; i < string_length; )
   {
      if( 0x7f >= input_string[i] )
      {
         ++i;
         continue; // ASCII or ASCII compat utf8
      }

      if( 0xf5 <= input_string[i] )
      {
         return 0;   // Totally out of range for utf-8.
      }

      // Start of 2 byte seq
      if( input_string[i] >= 0xc2 && input_string[i] <= 0xdf )
      {
         if( !checkNextN( input_string, i, 1, 0x80, 0xbf ))
         {
            return 0;
         }
         i += 2;
         continue;
      }

      // Start of 3 byte seq
      if( input_string[i] >= 0xe0 && input_string[i] <= 0xef )
      {
         if( !checkNextN( input_string, i, 2, 0x80, 0xbf ))
         {
            return 0;
         }
         i += 3;
         continue;
      }

      // Start of 4 byte seq
      if( input_string[i] >= 0xf0 && input_string[i] <= 0xf4 )
      {
         if( !checkNextN( input_string, i, 3, 0x80, 0xbf ))
         {
            return 0;
         }
         i += 4;
         continue;
      }

      return 0;
   }

   return 1;
}


/***********************************************************************
 * Converts a string of native language codes to UTF8.
 *
 * The output array must have the same or greater size than the input array.
 *
 * in  tU8* const*         sourceStart  pointer to language codes to convert
 * in  const tU8*          sourceEnd    pointer to end of language codes
 * in  tU8* const*         targetStart  pointer to output array
 * in  const tU8*          targetEnd    pointer to end of output array
 * in  tenCodeConvMapping  language     language coding identifier
 * out tU32                             -1 if failure
 ***********************************************************************/
tU32 ConvertLanguagetoUTF8 (tU8* const*        sourceStart,
                            const tU8*         sourceEnd,
                            tU8* const*        targetStart,
                            const tU8*         targetEnd,
                            tenCodeConvMapping language)
{
   tU32 codes [CODECONV_MAX_INPUTLENGTH] = {0};
   tU32* p_codes = codes;
   tU32 unicode_array [CODECONV_MAX_INPUTLENGTH] = {0};
   tU32* p_unicode_array = unicode_array;
   tU32 return_value = 0;
   tS32 number_codes = 0;
   tU32 number_of_chars;
   tU32 i = 0;
   const tU8* source = *sourceStart;
   tU8* target = *targetStart;
   char trace_string [128];
   tU32 target_length;

   target[0] = 0x00; /* terminate string with null */
   target_length = (tU32)(targetEnd - target);

   /* calculate input and output sizes */
   number_of_chars = (tU32)(sourceEnd - source);
   while ((i < number_of_chars) && (source[i] > 0))
   {
      i++;
   }
   number_of_chars = i;  /* Adjust for the string termination being earlier than the given length */

   if(number_of_chars > 0)
   {
      (void) sprintf (trace_string,"Language is %i\n", language);
      trace_start (trace_string);
      tU8_to_hex (source, sourceEnd);

      /* No need to convert if already utf8 */
      if(!CheckStringIsUTF8(source, number_of_chars))
      {
         /* Convert input string to code array */
         number_codes = ConvertStringtoLanguageCodes(codes,
                        CODECONV_MAX_INPUTLENGTH - 1,
                        source,
                        number_of_chars,
                        language);

         if(number_codes == 0)
         {
            (void) sprintf (trace_string,"No valid language codes\n");
            trace_start (trace_string);
         }

         /* if conversion is ok */
         if(number_codes > 0)
         {
            /* String contains at least some valid language codes */
            (void) sprintf (trace_string,"%i Language Codes\n", number_codes);
            trace_start (trace_string);
            tU32_to_hex (codes, codes + number_codes);

            return_value = ConvertCodestoUnicode(&p_codes,
                                                 (codes + number_codes),
                                                 &p_unicode_array,
                                                 (unicode_array + CODECONV_MAX_INPUTLENGTH),
                                                 language);

            (void) sprintf (trace_string,"%lu Unicode codes\n", return_value);
            trace_start (trace_string);
            tU32_to_hex (unicode_array, unicode_array + return_value);

            if(return_value > 0)
            {
               return_value = ConvertUnicodetoUTF8((tU32* const*)&p_unicode_array,
                                                   (const tU32*)(unicode_array + number_codes),
                                                   (tU8* const*)&target,
                                                   (const tU8*)targetEnd);
               target[return_value] = 0x00; /* terminate string with null */

               (void) sprintf (trace_string,"%lu UTF8 Codes\n", return_value);
               trace_start (trace_string);
               tU8_to_hex ((const tU8*)target, (const tU8*)(target + return_value));
            }
         }
      }
      else
      {
         /* Already utf8 - just copy input to output */
         for (i=0; ( (i<=number_of_chars) && (i<= target_length) ); i++)
         {
            target[i] = source [i];
         }
         (void) sprintf (trace_string,"No conversion (already UTF8) - %lu Codes\n", number_of_chars);
         trace_start (trace_string);
         tU8_to_hex ((const tU8*)target, (const tU8*)(target + number_of_chars));
      }
   }
   else
   {
      (void) sprintf (trace_string,"Language is %i - zero length - ", language);
      trace_start (trace_string);
   }
   return return_value;
}
