
#include "Utf8StringBase.h"
#include "Utf8String.h"
#include "Utf8StringComparator.h"

#ifndef TESTGUI
#define ETRACE_S_IMPORT_INTERFACE_GENERIC
#define ET_TRACE_INFO_ON
#include "etrace_if.h"

#ifdef VARIANT_S_FTR_ENABLE_TRC_GEN
#define ETG_DEFAULT_TRACE_CLASS TR_CLASS_MIDW_COMMON_UTF8STRINGBASE
#include "trcGenProj/Header/Utf8StringBase.cpp.trc.h"
#endif

#include "../midw_common_trace.h"    // my trace channel
#include "../midw_common_trace_macros.h"
#else
#define UTF8SORT_DEBUG
#endif //TESTGUI

#define MAX_FIRST_LETTERS 5
#define LATIN_LOWER_A 0x61
#define LATIN_LOWER_Z 0x7A
//#define LATIN_UPPER_A 0x41
//#define LATIN_UPPER_Z 0x5A
#define LATIN_SHIFT   0x20

// currentLanguage is set by static method setCurrentLanguage.
tenMidwCommonLanguage tclUtf8StringBase::currentLanguage = tenMidwCommonLanguage (MIDW_COMMON_LANG_NONE);

// currentSortLanguage is set by static method setCurrentLanguage.
tenUtf8SortLanguage tclUtf8StringBase::currentSortLanguage = tenUtf8SortLanguage (UTF8SORT_DEFAULT);

//Note - loading of pre-created data moved to end of file to avoid confusing Doxygen

//******************************************************************************

//FUNCTION:    tclUtf8StringBase::tclUtf8StringBase(tPCU8 pu8String,
//                                                  tU32 u32StringSize,
//                                                  tBool bAlloc)

//!            Constructor

//!            if string length parameter is zero\n
//!            ___calculate length of string\n
//!            if Alloc parameter is TRUE\n
//!            ___make a copy of the string\n
//!            Initialise number values to invalid
//!            Traverse string and extra values of all embedded strings of numbers

//PARAMETER:   tPCU8 pu8String - the string
//             tU32 u32StringSize - the length of the string in bytes (not UTF8 characters)
//                                - if length is zero, the constructor calculates the length
//             tBool bAlloc - flag indicating whether to make a copy of the string (TRUE) or not (FALSE)

//RETURNVALUE: None

//******************************************************************************
tclUtf8StringBase::tclUtf8StringBase(tPCU8 pu8String,
                                     tU32 u32StringSize,
                                     tBool bAlloc)
   :
   _pu8String(pu8String)
   ,_pu8Data(OSAL_NULL)
   ,_u32StringSize(u32StringSize)
//,_u32NumberValue[0](cu32InvalidNumberValue)
   ,_u32Offset(0)
   ,_bSZOnce(FALSE)
   ,_bAlloc(bAlloc)
{
   po_string = this;

   if(!u32StringSize && pu8String)
   {
      _u32StringSize = strlen((const char*)pu8String);
   }

   if((currentLanguage == MIDW_COMMON_LANG_CHN) && (_u32StringSize != 0))
   {
      _pu8Data = new tU8[_u32StringSize * 5]; //Allocate sufficient space for average of 5 Pinyin characters per Chinese Character
      if(_pu8Data)
      {
         tU32 converted_length = convertToPinyin (pu8String, _u32StringSize, _pu8Data, _u32StringSize * 5);
         _pu8Data[converted_length]=0;
         _u32StringSize = converted_length;
         _pu8String = _pu8Data;

         if(_pu8Data[0] != 0)
         {
#ifdef UTF8SORT_DEBUG
            fprintf (stderr,"Converted Pinyin %s\n",_pu8Data);
#else
            ETG_TRACE_USR4(("tclUtf8StringBase() Converted Pinyin %s\n",_pu8Data));
#endif //UTF8SORT_DEBUG
         }
      }
   }
   else
   {
      if(_bAlloc && _u32StringSize && pu8String)
      {
         _pu8Data = new tU8[_u32StringSize+1];
         if(_pu8Data)
         {
            (tVoid)OSAL_pvMemoryCopy(_pu8Data,pu8String,_u32StringSize);
            _pu8Data[_u32StringSize]=0;
            _pu8String = _pu8Data;
         }
      }
      else
      {
         _bAlloc = FALSE;
      }
   }

   // Initialise number values to invalid
   tU8 no_values;
   for (no_values = 0; no_values < MAX_NUMBERS; no_values++)
   {
      _u32NumberValue[no_values] = cu32InvalidNumberValue;
   }
   // Traverse string and extra values of all embedded strings of numbers
   u32CalculateNumberValues();
};

//******************************************************************************

//FUNCTION:    tclUtf8StringBase::tclUtf8StringBase( const tclUtf8StringBase& roTheOther)

//!            Constructor

//PARAMETER:   const tclUtf8StringBase& roTheOther

//RETURNVALUE: None

//******************************************************************************
tclUtf8StringBase::tclUtf8StringBase( const tclUtf8StringBase& roTheOther)
{
   *this = roTheOther;
}

//******************************************************************************

//FUNCTION:    tclUtf8StringBase::~tclUtf8StringBase()

//!            Destructor

//!            if a copy of the string was previously created, it is deleted

//PARAMETER:   None

//RETURNVALUE: None

//******************************************************************************
tclUtf8StringBase::~tclUtf8StringBase()
{
   if(_bAlloc && _pu8Data)
   {
      delete [] _pu8Data;
   }
   _pu8String = OSAL_NULL;
};


//FUNCTION:    tBool set_Current_Language (tenMidwCommonLanguage new_language)

//!            Sets the language to be used for sorting.

//!            currentLanguage is set to the parameter passed\n
//!            currentSortLanguage is set to UTF8SORT_DEFAULT unless the selected
//!            language is a known variant, in which case it is set to the
//!            variant language enum

//PARAMETER:   tenMidwCommonLanguage new_language - the language to be used by UTF8Sort

//RETURNVALUE: tBool - TRUE if language in range else FALSE

//*****************************************************************************
tBool tclUtf8StringBase::set_Current_Language (tenMidwCommonLanguage new_language)
   {
#ifdef UTF8SORT_DEBUG
      fprintf (stderr,"set_Current_Language called with new_language [%i]\n",(int)new_language);
#else
      ETG_TRACE_USR4(("set_Current_Language called with new_language [%i]\n",(int)new_language));
#endif //UTF8SORT_DEBUG

      if (new_language > MIDW_COMMON_LANG_NONE && new_language < MIDW_COMMON_LANG_LAST)
      {
         // New language is in range, so set it
         currentLanguage = tenMidwCommonLanguage (new_language);

         // Select sort language based on current language
         switch (currentLanguage)
         {
         case (tenMidwCommonLanguage)MIDW_COMMON_LANG_NONE:
         case (tenMidwCommonLanguage)MIDW_COMMON_LANG_GER:
         case (tenMidwCommonLanguage)MIDW_COMMON_LANG_ENG_US:
         case (tenMidwCommonLanguage)MIDW_COMMON_LANG_ESP_LAT:
         case (tenMidwCommonLanguage)MIDW_COMMON_LANG_FRA_CAN:
         case (tenMidwCommonLanguage)MIDW_COMMON_LANG_POR:
         case (tenMidwCommonLanguage)MIDW_COMMON_LANG_ITA:
         {
            currentSortLanguage = tenUtf8SortLanguage (UTF8SORT_DEFAULT); // No variance - use Default Sort Table entry
            break;
         }

         case (tenMidwCommonLanguage)MIDW_COMMON_LANG_CZE:
         {
            currentSortLanguage = tenUtf8SortLanguage (UTF8SORT_CZECH);
            break;
         }

         case (tenMidwCommonLanguage)MIDW_COMMON_LANG_DUT:
         case (tenMidwCommonLanguage)MIDW_COMMON_LANG_TUR:
         case (tenMidwCommonLanguage)MIDW_COMMON_LANG_RUS:
         case (tenMidwCommonLanguage)MIDW_COMMON_LANG_ENG:
         case (tenMidwCommonLanguage)MIDW_COMMON_LANG_FRA:
         case (tenMidwCommonLanguage)MIDW_COMMON_LANG_ESP:
         {
            currentSortLanguage = tenUtf8SortLanguage (UTF8SORT_DEFAULT); // No variance - use Default Sort Table entry
            break;
         }

         case (tenMidwCommonLanguage)MIDW_COMMON_LANG_DAN:
         {
            currentSortLanguage = tenUtf8SortLanguage (UTF8SORT_DANISH);
            break;
         }

         case (tenMidwCommonLanguage)MIDW_COMMON_LANG_SWE:
         case (tenMidwCommonLanguage)MIDW_COMMON_LANG_FIN:
         {
            currentSortLanguage = tenUtf8SortLanguage (UTF8SORT_SWEDISH);
            break;
         }

         case (tenMidwCommonLanguage)MIDW_COMMON_LANG_NOR:
         {
            currentSortLanguage = tenUtf8SortLanguage (UTF8SORT_NORWEGIAN);
            break;
         }

         case (tenMidwCommonLanguage)MIDW_COMMON_LANG_POL:
         {
            currentSortLanguage = tenUtf8SortLanguage (UTF8SORT_POLISH);
            break;
         }

         case (tenMidwCommonLanguage)MIDW_COMMON_LANG_SLO:
         {
            currentSortLanguage = tenUtf8SortLanguage (UTF8SORT_SLOVAK);
            break;
         }

         case (tenMidwCommonLanguage)MIDW_COMMON_LANG_HUN:
         {
            currentSortLanguage = tenUtf8SortLanguage (UTF8SORT_HUNGARIAN);
            break;
         }


         case (tenMidwCommonLanguage)MIDW_COMMON_LANG_GRE:
         {
            currentSortLanguage = tenUtf8SortLanguage (UTF8SORT_GREEK);
            break;
         }

         case (tenMidwCommonLanguage)MIDW_COMMON_LANG_BRA:
         case (tenMidwCommonLanguage)MIDW_COMMON_LANG_ARA:
         case (tenMidwCommonLanguage)MIDW_COMMON_LANG_THA:
         case (tenMidwCommonLanguage)MIDW_COMMON_LANG_AUS:
         case (tenMidwCommonLanguage)MIDW_COMMON_LANG_UKR:
         case (tenMidwCommonLanguage)MIDW_COMMON_LANG_CHN:
         case (tenMidwCommonLanguage)MIDW_COMMON_LANG_KOR:
         case (tenMidwCommonLanguage)MIDW_COMMON_LANG_IND:

         case (tenMidwCommonLanguage)MIDW_COMMON_LANG_LAST:
         {
            currentSortLanguage = tenUtf8SortLanguage (UTF8SORT_DEFAULT); // No variance - use Default Sort Table entry
            break;
         }

         default:
         {
            currentSortLanguage = tenUtf8SortLanguage (UTF8SORT_DEFAULT); // No variance - use Default Sort Table entry
            break;
         }
         }
         return TRUE;
      }
      else
      {
         return FALSE;
      }

   }


//******************************************************************************

//FUNCTION:    tU32 tclUtf8StringBase::u32GetSortValue(tU32 u32Utf8Char) const

//!            Returns the sort value corresponding to the given character

//!            Convert UTF8 value to string of UTF8 bytes\n
//!            Convert UTF8 string to UTF16 string\n
//!            if UTF16 value > maximum\n
//!            ___return max value\n
//!            Get the sort value from the Default Sort Table\n
//!            (this will be used unless there is a non-zero entry in the Latin or Diacritics tables)\n
//!            Get the current sort language\n
//!            if the sort language is a variant language\n
//!            ___if the UTF8 value is in the Latin range\n
//!            ______Get sort value offset from Latin table\n
//!            ______if offset > 0\n
//!            _________Add offset to sort value\n
//!            _________(previously subtracted so that sort value would fit into a byte)\n
//!            _________and add flag to indicate special entry\n
//!            ___if the UTF8 value is in the Diacritic range\n
//!            ______Get sort value offset from Diacritic table\n
//!            ______if offset > 0\n
//!            _________Add offset to sort value\n
//!            _________(previously subtracted so that sort value would fit into a byte)\n
//!            _________and add flag to indicate special entry\n

//PARAMETER:   tU32 u32Utf8Char - the value of the UTF8 character

//RETURNVALUE: tU32 - the sort value or 0 if UTF8 value is invalid

//******************************************************************************
tU32 tclUtf8StringBase::u32GetSortValue(tU32 u32Utf8Char) const
{
#ifdef UTF8SORT_DEBUG
         fprintf (stderr,"Lang = %i, UTF8 = 0x%x ", currentLanguage, u32Utf8Char);
#else
         ETG_TRACE_USR4(("Lang = %i, UTF8 = 0x%x ", currentLanguage, u32Utf8Char));
#endif //UTF8SORT_DEBUG

   //Convert UTF8 value to UTF16 before accessing table
   tU8 utf8_string [5];
   const tU8* src_start = utf8_string;
   ConversionResult conv_result;
   tU16 output_string [1] = {'\0'};
   tU16* p_output_string = output_string;
   tU8 len = 4;
   tU8 i = 0;
   tU32 sort_value = 0;

   utf8_string [0] = (tU8)((u32Utf8Char & 0xFF000000) >> 24);
   utf8_string [1] = (tU8)((u32Utf8Char & 0x00FF0000) >> 16);
   utf8_string [2] = (tU8)((u32Utf8Char & 0x0000FF00) >> 8);
   utf8_string [3] = (tU8)((u32Utf8Char & 0x000000FF));
   utf8_string [4] = 0;

   // Strip off leading zero bytes
   while (utf8_string [i++] == 0)
   {
      len--;
      src_start++;
   }

   conv_result = ConvertUTF8toUTF16 (&src_start,
                                     (src_start + len),
                                     &p_output_string,
                                     (p_output_string + 1),
                                     (ConversionFlags) lenientConversion);

   if (conv_result == conversionOK)
   {
      if (output_string[0] < tclUtf8StringBase::cu32MaxUTF8Value)
      {
         // Get the sort value from the Default Sort Table - this will be used unless there is a non-zero entry
         // in the Latin or Diacritics tables

         if(currentLanguage == MIDW_COMMON_LANG_CHN)
         {
            sort_value = *(tclUtf8StringBase::aru32SortValues_Chinese_Unicode_1_1 + output_string[0]);
         }
         else
         {
            if(currentLanguage == MIDW_COMMON_LANG_KOR)
            {
               sort_value = *(tclUtf8StringBase::aru32SortValues_Korean + output_string[0]);
            }
            else
            {
               // Use Chinese as the default
               sort_value = *(tclUtf8StringBase::aru32SortValues_Chinese_Unicode_1_1 + output_string[0]);
            }
         }

         // Check whether the current language has a variance from the Default Sort Order
         tU8 variant_language = (tU8)tclUtf8StringBase::get_sort_language();

#ifdef UTF8SORT_DEBUG
         fprintf (stderr," sort = %i\n", sort_value - cu32ValidOffset);
#else
         ETG_TRACE_USR4((" sort = %i\n", sort_value - cu32ValidOffset));
#endif //UTF8SORT_DEBUG

         if (variant_language > 0)
         {
            // Check whether utf8value is in Latin range
            if (u32Utf8Char >= BASE_LATIN && u32Utf8Char <= TOP_LATIN)
            {
               // Get sort value from the Latin table
               tU32 new_sort_value = tclUtf8StringBase::aru8SortValues_Variant_Latin [u32Utf8Char - BASE_LATIN][variant_language - 1];

               // The variant sort value will be more than zero if there is one
               if (new_sort_value > 0)
               {
                  // Add offset, previously subtracted so that sort value would fit into a byte, and add flag to indicate special entry
                  sort_value = new_sort_value + BASE_SORT + tclUtf8StringBase::cu32ValidOffset;
               }

#ifdef UTF8SORT_DEBUG
               fprintf(stderr,"Latin new sort = %i\n", sort_value - cu32ValidOffset);
#else
               ETG_TRACE_USR4(("Latin new sort = %i\n", sort_value - cu32ValidOffset));
#endif //UTF8SORT_DEBUG

               return sort_value;
            }

            // Check whether utf8value is in Diacritic range
            if (u32Utf8Char >= BASE_DIACRITICS && u32Utf8Char <= TOP_DIACRITICS)
            {
               // Get sort value from the Latin table
               tU32 new_sort_value = tclUtf8StringBase::aru8SortValues_Variant_Diacritics [u32Utf8Char - BASE_DIACRITICS][variant_language - 1];

               // The variant sort value will be more than zero if there is one
               if (new_sort_value > 0)
               {
                  sort_value = new_sort_value + BASE_SORT + cu32ValidOffset;
               }

#ifdef UTF8SORT_DEBUG
               fprintf(stderr,"Diacritics new sort = %i\n", sort_value - cu32ValidOffset);
#else
               ETG_TRACE_USR4(("Diacritics new sort = %i\n", sort_value - cu32ValidOffset));
#endif //UTF8SORT_DEBUG

               return sort_value;
            }
         }

         return sort_value;
      }
      else
      {
         return tclUtf8StringBase::u32GetBigSortValue(u32Utf8Char);
      }
   }
   else
   {
#ifdef UTF8SORT_DEBUG
      fprintf(stderr,"Conversion failure = %i\n", conv_result);
#else
      ETG_TRACE_USR4(("Conversion failure = %i\n", conv_result));
#endif //UTF8SORT_DEBUG
      return 0;
   }
}


//******************************************************************************

//FUNCTION:    tU32 tclUtf8StringBase::u32CalculateNumberValues()

//!            Extracts and saves the values of all numeric sequences in the string

//!            Called by the Constructor\n
//!            Navigate string and for any sequence of embedded numeric characters, calculate and store the numeric value\n
//!            The number of characters allowed is limited.

//PARAMETER:   None

//RETURNVALUE: tVoid

//*******************************************************************************
tVoid tclUtf8StringBase::u32CalculateNumberValues()
{
   if( (_u32NumberValue[0] == cu32InvalidNumberValue) && (_pu8String != OSAL_NULL) )
   {
      tU32 u32Index;
      tBool number_proc = FALSE;
      tU8 digit_count = 0;
      tU8 number_count = 0;

      //calculate as long as we have characters to check
      for(u32Index = 0 ; u32Index < _u32StringSize ; u32Index++)
      {
         tU8 u8Char = *(_pu8String+u32Index);
         if((u8Char < '0') || (u8Char > '9') ) //not a number
         {
            if (number_proc == TRUE)
            {
               // We have a non-digit ending a digit string
               number_count++; // Next number
               number_proc = FALSE; // No longer processing string of digits
            }
         }
         else
         {
            if (number_proc == FALSE)
            {
               // Starting a digit string
               number_proc = TRUE; // Processing digits
               digit_count = 1; // Start counting digits
               _u32NumberValue[number_count] = u8Char-'0';
            }
            else
            {
               if (digit_count < cu32NumOfDigits)
               {
                  _u32NumberValue[number_count] = 10*_u32NumberValue[number_count] + (u8Char-'0');
                  digit_count++;
               }
               else
               {
                  //Ignore digits after cu32NumOfDigits
                  digit_count++;
               }
            }
         }
      }

      if (number_proc == TRUE)
      {
         // We were processing a digit string when we reached end of string
         number_count++; // Next number
      }

      // if no number found, set number value to default for a pure string
      if(number_count == 0)
      {
         _u32NumberValue[0] = cu32StringNumberValue;
      }
   }
   return;
}

//******************************************************************************

//FUNCTION:    tU32 tclUtf8StringBase::u32GetNextUtf8Value()

//!            Returns the next Utf8 value in the string

//PARAMETER:   None

//RETURNVALUE: tU32 - the value of the next UTF8 character

//*******************************************************************************
tU32 tclUtf8StringBase::u32GetNextUtf8Value()
{
   return u32GetUtf8Value(_u32Offset, _bSZOnce);
}

//******************************************************************************

//FUNCTION:    tU32 tclUtf8StringBase::u32GetUtf8Value(tU32& ru32OffSet,
//                                                     tBool& rbSZOnce,
//                                                     tBool& rbFirstCh) const

//!            Returns the value of the next UTF8 character in the string, which
//!            may be a multi-byte character (2, 3 or 4 bytes)
//!            If the character is a digit, then returns its character value but
//!            skips to end of sequence of digits

//PARAMETER:   tU32& ru32OffSet - the offset from the start of the string.
//                                If zero, then rbFirstCh is set to TRUE
//             tBool& rbSZOnce  - flag used for handling of German ''

//RETURNVALUE: tU32 - returns value of UTF8 character or zero if invalid or to be ignored

//*******************************************************************************
tU32 tclUtf8StringBase::u32GetUtf8Value(tU32& ru32OffSet,
                                        tBool& rbSZOnce) const
{
   if(_pu8String == OSAL_NULL)
   {
      return cu32EndOfString;   //invalid pointer
   }

   if(rbSZOnce)
   {
      rbSZOnce = FALSE;    // '' should be handled as 'ss', this is the second 's'
      return (tU32)'s';
   }

   if( ru32OffSet >= _u32StringSize )
   {
      return cu32EndOfString;   //found end of string
   }

   tU32 sort_value = 0;
   tU32 u32Utf8Value = 0;

   while (sort_value == 0)
   {
      // Get next character
      u32Utf8Value = *(_pu8String + ru32OffSet++);//get next byte from string

      if( u32Utf8Value == 0)
      {
         return cu32EndOfString;   //end of string
      }

      if(u32Utf8Value > cu32BiggestOneByteChar)//this utf8 char consists of more than one byte
      {
         if( ru32OffSet >= _u32StringSize )
         {
            return cu32EndOfString;   //found end of string
         }

         tU32 u32Utf8Value2 = *(_pu8String + ru32OffSet++); //get 2nd byte
         if( u32Utf8Value2 == 0)
         {
            return cu32EndOfString;   //end of string
         }
         u32Utf8Value<<=8;
         u32Utf8Value+=u32Utf8Value2;
         if( (ru32OffSet < _u32StringSize) && ((*(_pu8String + ru32OffSet))&(0x80)) && ((*(_pu8String + ru32OffSet))&(0x40)) == 0)
         {
            tU32 u32Utf8Value3 = *(_pu8String + ru32OffSet++); //get 3rd byte
            u32Utf8Value<<=8;
            u32Utf8Value+=u32Utf8Value3;
         }

         if( (ru32OffSet < _u32StringSize) && ((*(_pu8String + ru32OffSet))&(0x80)) && ((*(_pu8String + ru32OffSet))&(0x40)) == 0)
         {
            tU32 u32Utf8Value4 = *(_pu8String + ru32OffSet++); //get 4th byte
            u32Utf8Value<<=8;
            u32Utf8Value+=u32Utf8Value4;
         }
      }

      // Get sort value to see whether to ignore or not
      sort_value = u32GetSortValue (u32Utf8Value) - cu32ValidOffset;

   }

   if(u32Utf8Value == 0xc39f )
   {
      rbSZOnce = TRUE;    //the character is a ''
      return (tU32)'s';
   }

   // If character is a digit, then skip to the first character that is not a digit
   if (bIsANumber(u32Utf8Value))
   {
      while (ru32OffSet < _u32StringSize)
      {
         // Get next character
         tU32 u32Utf8Value_next = *(_pu8String + ru32OffSet++);//get next byte from string
         if (!bIsANumber(u32Utf8Value_next))
         {
            ru32OffSet--; //Don't skip the next character after the number
            break;
         }
      }
   }

   return u32Utf8Value;
}

//******************************************************************************

//FUNCTION:    tBool tclUtf8StringBase::bThisIsLess ( const tclUtf8StringBase& oOtherString) const

//!            Compares "this" string with "other" string

//!            for ever do\n
//!            ___get next utf8 character from each string\n
//!            ___get sort value for current utf8 character for "this" and "other"\n
//!            ___if both strings start with a number and they do not have the same value then\n
//!            ______return the value comparison\n
//!            ______else ignore the number\n
//!            ___else\n
//!            ______if end of "this" string reached then "this" < "other"\n
//!            ______if end of "other" string reached then "this" > "other"\n
//!            ______if end of both "this" and "other" strings are reached then "this" > "other"\n
//!            ______if the sort values are different\n
//!            _________return the sort value comparison\n
//!            done


//PARAMETER:   const tclUtf8StringBase& oOtherString - pointer to the other string

//RETURNVALUE: tBool - returns TRUE if this string is >= the other, else FALSE

//*******************************************************************************
tBool tclUtf8StringBase::bThisIsLess ( const tclUtf8StringBase& oOtherString) const
{
#ifdef UTF8SORT_DEBUG
   printf("tclUtf8StringBase::bThisIsLess entered - current language [%i]\n", currentLanguage);
#else
   ETG_TRACE_USR4(("tclUtf8StringBase::bThisIsLess entered - current language [%i]\n", currentLanguage));
#endif //UTF8SORT_DEBUG

   if(_pu8String == OSAL_NULL)
   {
      return TRUE;
   }
   if(oOtherString._pu8String == OSAL_NULL)
   {
      return FALSE;
   }

   tU32 u32Utf8Value1;
   tU32 u32Utf8Value2;

   tU32 u32SortValue1;
   tU32 u32SortValue2;

   tU32 u32OffSet1 = 0;
   tU32 u32OffSet2 = 0;

   tBool bSZOnce1  = FALSE;
   tBool bSZOnce2  = FALSE;

   tU8 CurrentNumber = 0;

   for(;/*ever*/;)
   {
      u32Utf8Value1 = u32GetUtf8Value(u32OffSet1, bSZOnce1);
      u32Utf8Value2 = oOtherString.u32GetUtf8Value(u32OffSet2, bSZOnce2);

      u32SortValue1 = u32GetSortValue(u32Utf8Value1);
      u32SortValue2 = u32GetSortValue(u32Utf8Value2);

      if(( bIsANumber(u32Utf8Value1) && bIsANumber(u32Utf8Value2)  )) //current position in both strings starts with a digit
      {
         if( u32GetNumberValue(CurrentNumber) != oOtherString.u32GetNumberValue(CurrentNumber) )
         {
            //the number values differ
            return u32GetNumberValue(CurrentNumber) < oOtherString.u32GetNumberValue(CurrentNumber);
         }
         else
         {
            //the number values are the same
            CurrentNumber++;
         }
      }
      else
      {
         //end of this string reached, so it is shorter, so this string < the other
         if( (u32Utf8Value1 == cu32EndOfString) && (u32Utf8Value2 != cu32EndOfString) )
         {
            return TRUE;
         }

         //end of string2 reached, so it is shorter, so this string > the other
         if( (u32Utf8Value1 != cu32EndOfString) && (u32Utf8Value2 == cu32EndOfString) )
         {
            return FALSE;
         }

         //end of both strings reached, so they are equal, so this is not less than the other
         if( (u32Utf8Value1 == cu32EndOfString) && (u32Utf8Value2 == cu32EndOfString) )
         {
            return FALSE;
         }

         if(u32SortValue1 != u32SortValue2)
         {
            return (u32SortValue1 < u32SortValue2);
         }
      }
   }
}


//******************************************************************************

//FUNCTION:    tS32 tclUtf8StringBase::compareUtf8Strings(tVoid* pvArg,
//                                                        tS32 len0,
//                                                        tVoid const* str0,
//                                                        tS32 len1,
//                                                        tVoid const* str1)

//!            Compare two strings

//!            Create instances of both strings\n
//!            for ever do\n
//!            ___get next utf8 character from each string\n
//!            ___get sort value for current utf8 character for first and second\n
//!            ___if both strings start with a number and they do not have the same value then\n
//!            ______return the value comparison\n
//!            ______else ignore the number\n
//!            ___else\n
//!            ______if end of "this" string reached then first < second\n
//!            ______if end of "other" string reached then first > second\n
//!            ______if end of both first and second strings are reached then first > second\n
//!            ______if the sort values are different\n
//!            _________return the sort value comparison\n
//!            done

//PARAMETER:   tVoid* pvArg      - ignored
//             tS32 len0         - length of first string in utf8 bytes
//                               - Note - Diacritic characters are two bytes
//                               - If length is zero, the string length in bytes
//                                 is calculated by the string constructor
//             tVoid const* str0 - first string
//             tS32 len1         - length of second string in utf8 bytes
//                               - Note - Diacritic characters are two bytes
//                               - If length is zero, the string length in bytes
//                                 is calculated by the string constructor
//             tVoid const* str1 - second string

//RETURNVALUE: tS32 - +1 if first > second, 0 if first == second and -1 if first < second

//*******************************************************************************
tS32 tclUtf8StringBase::compareUtf8Strings(tVoid* pvArg,
      tS32 len0,
      tVoid const* str0,
      tS32 len1,
      tVoid const* str1)
{
#ifdef UTF8SORT_DEBUG
   printf("tclUtf8StringBase::compareUtf8Strings  - current language [%i]\n", currentLanguage);
#else
   ETG_TRACE_USR4(("tclUtf8StringBase::compareUtf8Strings  - current language [%i]\n", currentLanguage));
#endif //UTF8SORT_DEBUG

   //ignore pvArg
   (tVoid)(pvArg);

   tU32 u32Utf8Value1;
   tU32 u32Utf8Value2;

   tU32 u32SortValue1;
   tU32 u32SortValue2;
   tU32 u32OffSet1 = 0;
   tU32 u32OffSet2 = 0;

   tBool bSZOnce1  = FALSE;
   tBool bSZOnce2  = FALSE;

   tVoid const* null_string = 0;
   tVoid const* string0 = str0;
   tVoid const* string1 = str1;

   tU8 CurrentNumber = 0;

   //check length values
   if(len0 < 0)
   {
      len0 = 0;
      string0 = null_string;
   }

   if(len1 < 0)
   {
      len1 = 0;
      string1 = null_string;
   }

   //create UTF8 string objects
   tclUtf8StringBase oString0((tPCU8)string0, (tU32)len0, FALSE);
   tclUtf8StringBase oString1((tPCU8)string1, (tU32)len1, FALSE);

   //compare
   for(;/*ever*/;)
   {
      u32Utf8Value1 = oString0.u32GetUtf8Value(u32OffSet1, bSZOnce1);
      u32Utf8Value2 = oString1.u32GetUtf8Value(u32OffSet2, bSZOnce2);

      u32SortValue1 = u32GetSortValue(u32Utf8Value1);
      u32SortValue2 = u32GetSortValue(u32Utf8Value2);

      if(( oString0.bIsANumber(u32Utf8Value1) && oString1.bIsANumber(u32Utf8Value2)  )) //both strings start with a digit
      {
         if( oString0.u32GetNumberValue(CurrentNumber) != oString1.u32GetNumberValue(CurrentNumber) )
         {
            //the number values differ
            if( oString0.u32GetNumberValue(CurrentNumber) < oString1.u32GetNumberValue(CurrentNumber))
            {
               return (tS32)(-1);
            }
            else
            {
               return (tS32)(1);
            }
         }
         else
         {
            //the number values are the same
            CurrentNumber++;
         }
      }
      else
      {
         //end of this string reached, so it is shorter, so this string < the other
         if( (u32Utf8Value1 == cu32EndOfString) && (u32Utf8Value2 != cu32EndOfString) )
         {
            return (tS32)(-1);
         }

         //end of string2 reached, so it is shorter, so this string > the other
         if( (u32Utf8Value1 != cu32EndOfString) && (u32Utf8Value2 == cu32EndOfString) )
         {
            return (tS32)(1);
         }

         //end of both strings reached, so they are equal, so this string == the other
         if( (u32Utf8Value1 == cu32EndOfString) && (u32Utf8Value2 == cu32EndOfString) )
         {
            return (tS32)(0);
         }

         if(u32SortValue1 != u32SortValue2)
         {
            if(u32SortValue1 < u32SortValue2)
            {
               return (tS32)(-1);
            }
            else if(u32SortValue1 > u32SortValue2)
            {
               return (tS32)(1);
            }
         }
      }
   }
} //lint !e1762: Member function '...' could be made const
// Cannot be made const as needs to align with fc_phone_ugzzc_SQL_Clint.h


//******************************************************************************

//FUNCTION:    tclUtf8StringBase& tclUtf8StringBase::operator=( const tclUtf8StringBase& roTheOther)

//!            Assignment Operator for utf8 string

//PARAMETER:   const tclUtf8StringBase& roTheOther

//RETURNVALUE: Value of this

//*******************************************************************************
tclUtf8StringBase& tclUtf8StringBase::operator=( const tclUtf8StringBase& roTheOther)
{
   if(this != &roTheOther)
   {
      _u32StringSize  = roTheOther._u32StringSize;

      tU8 number_index;
      for (number_index = 0; number_index < MAX_NUMBERS; number_index++)
      {
         _u32NumberValue[number_index] = roTheOther._u32NumberValue[number_index];
      }

      _u32Offset      = 0;
      _bSZOnce        = FALSE;
      _bAlloc         = roTheOther._bAlloc;

      tPCU8 pu8Temp   = roTheOther._pu8String; //lint complains direct pointer copy ;)
      _pu8String      = pu8Temp;

      if(_bAlloc && _u32StringSize)
      {
         _pu8Data = new tU8[_u32StringSize+1];
         if(_pu8Data)
         {
            (tVoid)OSAL_pvMemoryCopy(_pu8Data,roTheOther._pu8String,_u32StringSize);
            _pu8Data[_u32StringSize]=0;
            _pu8String = _pu8Data;
         }
      }
   }
   return *this;
}


//******************************************************************************

//FUNCTION:    void tU8_to_hex (const tU8* start_s, tU32 len)

//!            Debug only function that prints a string in hex

//PARAMETER:   start_s - pointer to start of string
//             len     - number of characters in string

//RETURNVALUE: None

//*******************************************************************************
#ifdef UTF8SORT_DEBUG
void tU8_to_hex (const tU8* start_s, tU32 len)
{
   tU16 i = 0;
   while ((i <= len) && (start_s[i] != 0))
   {
      fprintf (stderr," %x",start_s[i]);
      i++;
   }
   fprintf (stderr,"\n");
}
#else
void tU8_to_hex (const tU8* start_s, tU32 len)
{
   tU16 i = 0;
   while ((i <= len) && (start_s[i] != 0))
   {
      ETG_TRACE_USR4((" %x",start_s[i]));
      i++;
   }
      ETG_TRACE_USR4(("\n"));
}
#endif //UTF8SORT_DEBUG


//******************************************************************************

//FUNCTION:    tU8 tclUtf8StringBase::process_chinese (const tU8* input_ptr,
//                                                     tU32* no_utf8)

//!            Converts a Chinese character in utf8 to the first letter of its Phonetic representation

//PARAMETER:   input_ptr - pointer to start of utf8 for character
//             no_utf8 - returns number of utf8 characters for symbol
//             (2,3 or 4)

//RETURNVALUE: First letter of the Phonetic representation

//*****************************************************************************
tU8 tclUtf8StringBase::process_chinese (const tU8* input_ptr,
                                        tU32* no_utf8)
{
   tU8 first_char = 0;
   ConversionResult conv_result;
   tU16 output_string [1] = {'\0'};
   tU16* p_output_string = output_string;

   //Convert UTF8 value to UTF16 before accessing table
   conv_result = ConvertUTF8toUTF16 ((const UTF8**)&input_ptr,
                                     (const UTF8*)(input_ptr+3),
                                     &p_output_string,
                                     (p_output_string + 1),
                                     (ConversionFlags) lenientConversion);

   if (conv_result == conversionOK)
   {
      tU32 chinese_sort_value;

      chinese_sort_value =  *(aru32SortValues_Chinese_Unicode_1_1+output_string[0]);

      first_char = (tU8)((((chinese_sort_value & 0x7F000000) >> 24) + LATIN_LOWER_A - 1));
   }
   *no_utf8 = 3;
   return first_char;
}


//******************************************************************************

//FUNCTION:    tU32 tclUtf8StringBase::getFirstLetters (const tU8* input,
//                                                      tU32 len_input,
//                                                      tU8* dest,
//                                                      tU32 max_len_dest,
//                                                      const tU8* seperators

//!            Produce a list of the first characters of tokens found in input string

//!            Parses an input string and returns a list of the first
//!            character of each token in the string. Tokens are
//!            delimited by seperator characters (supplied as a parameter). In the case of
//!            Chinese tokens, the first letter is that of the
//!            Pinjin phonetic equivalent.

//PARAMETER:   input        - utf8 string to analyze. May have a
//                            combination of latin and Chinese
//                            characters.
//             len_input    - Length of the input string in bytes
//             dest         - Buffer to write the string of first
//                            letters. The characters are in
//                            lower case latin.
//             max_len_dest - Maximum number of bytes that can be
//                            written to dest.
//             seperators   - A zero terminated list of characters
//                            that delimit latin words.

//RETURNVALUE: Number of bytes written to dest

//*****************************************************************************
tU32 tclUtf8StringBase::getFirstLetters (const tU8* input,
      tU32 len_input,
      tU8* dest,
      tU32 max_len_dest,
      const tU8* seperators)
{
   tU32 first_letters_written = 0;
   tU32 input_pointer = 0;
   tBool ignore_rest_of_latin_token = 0;

   tU8 first_char = '\0';
   tU32 no_utf8 = 0;

   tBool seperator_found = 0;
   tU32 letter_index = 0;
   tU8 current_char = 0;

#ifdef UTF8SORT_DEBUG
   fprintf (stderr,"tclUtf8StringBase::getFirstLetters\nInput (%i) - ",len_input);
   tU8_to_hex (input, len_input);
   fprintf (stderr,"max_len_dest = %i\n", max_len_dest);
   fprintf (stderr,"seperators = %s\n", seperators);
#else
   ETG_TRACE_USR4(("tclUtf8StringBase::getFirstLetters\nInput (%i) - ",len_input));
   tU8_to_hex (input, len_input);
   ETG_TRACE_USR4(("max_len_dest = %i\n", max_len_dest));
   ETG_TRACE_USR4(("seperators = %s\n", seperators));
#endif //UTF8SORT_DEBUG

   if (max_len_dest > MAX_FIRST_LETTERS + 1)
   {
      FATAL_M_ASSERT_ALWAYS();
   }

   while (input_pointer < len_input)
   {
      current_char = input [input_pointer];

      // A seperator character is found if the next character in the input
      // buffer matches one of the entries in the seperator list
      letter_index = 0;
      seperator_found = 0;
      while (seperators [letter_index] != '\0')
      {
         if (current_char == seperators [letter_index])
         {
            seperator_found = 1; //Found one
            ignore_rest_of_latin_token = 0; //Switch off, so that can see next token
            break;
         }
         letter_index++;
      }

      // If a seperator is found, just ignore it
      if (seperator_found)
      {
#ifdef UTF8SORT_DEBUG
         fprintf (stderr,"Ignore Seperator (%c)\n",current_char);
#else
         ETG_TRACE_USR4(("Ignore Seperator (%c)\n",current_char));
#endif //UTF8SORT_DEBUG

         input_pointer++;
      }
      else
      {
         // Check for latin character - upper and lower case alphabetic only
         if ( (current_char > 0) && (current_char <= 0x7F) )
         {
            if (ignore_rest_of_latin_token)
            {
#ifdef UTF8SORT_DEBUG
               fprintf (stderr,"Ignore latin chr (%c)\n",current_char);
#else
               ETG_TRACE_USR4(("Ignore latin chr (%c)\n",current_char));
#endif //UTF8SORT_DEBUG

               input_pointer++;
            }
            else
            {
               ignore_rest_of_latin_token = 1;
               if (first_letters_written < MAX_FIRST_LETTERS)
               {
                  if ( (current_char >= LATIN_LOWER_A) && (current_char <= LATIN_LOWER_Z) )
                  {
                     current_char = current_char - LATIN_SHIFT;
                  }
                  dest [first_letters_written] = current_char; // Write first char to dest
                  first_letters_written++;

#ifdef UTF8SORT_DEBUG
                  fprintf(stderr,"First char written (%c)\n", current_char);
#else
                  ETG_TRACE_USR4(("First char written (%c)\n", current_char));
#endif //UTF8SORT_DEBUG

               }
               input_pointer++;
            }
         }
         else
         {
            // Check for Chinese symbol
            first_char = process_chinese ((const tU8*)(input + input_pointer), &no_utf8);

            if ( (first_char >= LATIN_LOWER_A) && (first_char <= LATIN_LOWER_Z) )
            {
#ifdef UTF8SORT_DEBUG
               fprintf (stderr,"Chinese utf8 (%i), first_char = %c\n", no_utf8, first_char);
#else
               ETG_TRACE_USR4(("Chinese utf8 (%i), first_char = %c\n", no_utf8, first_char));
#endif //UTF8SORT_DEBUG

               ignore_rest_of_latin_token = 0;
               input_pointer = input_pointer + no_utf8;
               if (first_letters_written < MAX_FIRST_LETTERS)
               {
                  dest [first_letters_written] = first_char - LATIN_SHIFT; // Write first char to dest
                  first_letters_written++;
               }
            }
            else
            {
               // Not recognised - ignore
               input_pointer++;

#ifdef UTF8SORT_DEBUG
               fprintf (stderr,"%x invalid - ignored\n", current_char);
#else
               ETG_TRACE_USR4(("%x invalid - ignored\n", current_char));
#endif //UTF8SORT_DEBUG

            }
         }
      }
   }
   dest [first_letters_written] = '\0';
#ifdef UTF8SORT_DEBUG
   fprintf (stderr,"first letters (%i) - ",first_letters_written);
   if (first_letters_written > 0)
   {
      letter_index = 0;
      while (letter_index < first_letters_written)
      {
         fprintf (stderr,"%c",dest [letter_index]);
         letter_index++;
      }
      fprintf (stderr, "\n");
   }
#else
   ETG_TRACE_USR4(("first letters (%i) - ",first_letters_written));
   if (first_letters_written > 0)
   {
      letter_index = 0;
      while (letter_index < first_letters_written)
      {
         ETG_TRACE_USR4(("%c",dest [letter_index]));
         letter_index++;
      }
      ETG_TRACE_USR4(("\n"))
   }
#endif //UTF8SORT_DEBUG


   return first_letters_written;
}


//******************************************************************************

//FUNCTION:    tBool tclUtf8StringBase::getChinesePinyin (const tU8* input_ptr,
//                                                        tU32* no_utf8,
//                                                        tU8* pinyin_str,
//                                                        tU32* pinyin_len);

//!            Checks for a valid Chinese character in utf8 and if found returns
//!            its full Pinyin string

//PARAMETER:   const tU8* input_ptr - pointer to start of utf8 for character
//             tU32* &no_utf8 - returns number of utf8 characters for symbol (2,3 or 4)
//             tu8* pinyin_str - returns Pinyin phonetic representation as a string
//             tU32 pinyin_len - returns length of Pinyin string

//RETURNVALUE: tBool - TRUE if valid chinese character found

//*****************************************************************************
tBool tclUtf8StringBase::getChinesePinyin (const tU8* input_ptr,
      tU32* no_utf8,
      tU8* pinyin_str,
      tU32* pinyin_len)
{
   tU8 first_char = 0;
   ConversionResult conv_result;
   tU16 output_string [1] = {'\0'};
   tU16* p_output_string = output_string;

   //Convert UTF8 value to UTF16 before accessing table
   conv_result = ConvertUTF8toUTF16 ((const UTF8**)&input_ptr,
                                     (const UTF8*)(input_ptr+3),
                                     &p_output_string,
                                     (p_output_string + 1),
                                     (ConversionFlags) lenientConversion);

   if (conv_result == conversionOK)
   {
#ifdef UTF8SORT_DEBUG
      fprintf(stderr,"getChinesePinyin Chinese index [%d] Pinyin 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x \n\n",
              output_string[0],
              aru8PinyinStrings [(output_string[0])] [0],
              aru8PinyinStrings [(output_string[0])] [1],
              aru8PinyinStrings [(output_string[0])] [2],
              aru8PinyinStrings [(output_string[0])] [3],
              aru8PinyinStrings [(output_string[0])] [4],
              aru8PinyinStrings [(output_string[0])] [5],
              aru8PinyinStrings [(output_string[0])] [6],
              aru8PinyinStrings [(output_string[0])] [7]);
#else
      ETG_TRACE_USR4(("getChinesePinyin Chinese index [%d] Pinyin 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x \n\n",
              output_string[0],
              aru8PinyinStrings [(output_string[0])] [0],
              aru8PinyinStrings [(output_string[0])] [1],
              aru8PinyinStrings [(output_string[0])] [2],
              aru8PinyinStrings [(output_string[0])] [3],
              aru8PinyinStrings [(output_string[0])] [4],
              aru8PinyinStrings [(output_string[0])] [5],
              aru8PinyinStrings [(output_string[0])] [6],
              aru8PinyinStrings [(output_string[0])] [7]));
#endif //UTF8SORT_DEBUG

      tU8 i = 0;
      for (i = 0; i < 8; i++)
      {
         pinyin_str[i] = aru8PinyinStrings [(output_string[0])] [i];
         if(pinyin_str[i] == 0)
         {
            break;
         }
      }
      *pinyin_len = i;
      *no_utf8 = 3;
      return true;
   }
   else
   {
      return false;
   }
}


/********************************************************************
*FUNCTION:     convertToPinyin
*DESCRIPTION:  Parses an input string and returns an output string
*              where all Chinese characters are converted to their
*              full Pinyin equivalent, i.e. "shang".
*              Non-Chinese characters are copied unchanged.
*              Only valid when current sort language is chinese.
*
*PARAMETER:   const tU8* input      - utf8 string to analyze. May have a
*                                     combination of Latin and Chinese
*                                     characters.
*             tU32 len_input        - Length of the input string in bytes
*             tU8* dest             - Output Buffers for the processed string
*             tU32 len_dest         - Number of entries in dest

*RETURNVALUE: tU32                  -Number of chars written to dest
*******************************************************************/

tU32 tclUtf8StringBase::convertToPinyin (const tU8* input,
      tU32 len_input,
      tU8* dest,
      tU32 len_dest)
{
   tU32 input_pointer = 0;
   tU32 dest_pointer = 0;
   tU32 pinyin_len = 0;
   tU8 pinyin_str [8];
   tU8* pinyin_ptr = &pinyin_str[0];
   tU32 no_utf8 = 0;
   tU32 chars_written = 0;

#ifdef UTF8SORT_DEBUG
   fprintf (stderr,"convertToPinyin - Input (%i) - ",len_input);
   tU8_to_hex (input, len_input);
#else
      ETG_TRACE_USR4(("convertToPinyin - Input (%i) - ",len_input));
#endif //UTF8SORT_DEBUG

   while (input_pointer < len_input)
   {
      // Check for Chinese symbol
      tBool bValid = getChinesePinyin ((const tU8*)(input + input_pointer), &no_utf8, pinyin_ptr, &pinyin_len);

      if (bValid)
      {
         input_pointer = input_pointer + no_utf8;
         int i = 0;
         for(i = 0; i <= pinyin_len; i++)
         {
            dest[dest_pointer++] = pinyin_str[i];
         }
      }
      else
      {
         // Not recognised - no conversion
         dest[dest_pointer++] = input [input_pointer++];
      }
   }

#ifdef UTF8SORT_DEBUG
   fprintf (stderr,"tclUtf8StringBase::convertToPinyin - Output - ");
   tU8_to_hex (dest, dest_pointer);
   fprintf (stderr,"\n");
#else
   ETG_TRACE_USR4(("tclUtf8StringBase::convertToPinyin - Output - "));
   tU8_to_hex (dest, dest_pointer);
   ETG_TRACE_USR4(("\n"));
#endif //UTF8SORT_DEBUG

   return dest_pointer;
}


//******************************************************************************
//
// Load pre-created data from header files


// Load sort value tables from pre-prepared data. Saves code and improves start up time

tU32  tclUtf8StringBase::aru32SortValues_Chinese_Unicode_1_1 [cu32MaxUTF8Value] =
{
#include "Utf8SortTable_Chinese_Unicode_1_1.h" // Includes both default and Chinese Unicode 1.1 (Simplified) and Traditional (same as Big5)

   tU32  tclUtf8StringBase::aru32SortValues_Korean [cu32MaxUTF8Value] = {
#include "Utf8SortTable_Korean.h" // Includes both default and Korean data

// Load Pinyin strings
      tU8  tclUtf8StringBase::aru8PinyinStrings [cu32MaxUTF8Value] [8] = {
#include "Utf8SortTable_Chinese_Unicode_1_1_pinyin.h" // Includes multiple Pinyin pronunciation characters

// Load Latin language variants data
         tU16  tclUtf8StringBase::aru8SortValues_Variant_Latin [RANGE_LATIN] [NO_VAR_LANGS] = {
#include "Utf8SortTable_latin_var.h"

// Load Diacritics language variants data
            tU16  tclUtf8StringBase::aru8SortValues_Variant_Diacritics [RANGE_DIACRITICS] [NO_VAR_LANGS] = {
#include "Utf8SortTable_diacritics_var.h"
