/* ***************************************************************************************
* FILE:          StrUtf8.cpp
* SW-COMPONENT:  HMI-BASE
*  DESCRIPTION:  StrUtf8.cpp is part of HMI-Base framework Library
*    COPYRIGHT:  (c) 2015-2016 Robert Bosch Car Multimedia GmbH
*
* The reproduction, distribution and utilization of this file as well as the
* communication of its contents to others without express authorization is
* prohibited. Offenders will be held liable for the payment of damages.
* All rights reserved in the event of the grant of a patent, utility model or design.
*
*************************************************************************************** */


#include "hmibase/util/StrUtf8.h"
#include <string.h>
#include <stdio.h>
#include <stdarg.h>
#include "hmibase/util/Macros.h"


namespace hmibase {
namespace util {

static bool bIsPartOfMultiByteChar(tU8 u8AnyByte);
static size_t GetCharacterByteLength(tU8 u8StartByte);

// privat local init NIL object
static struct cInitData
{
   cStrUtf8::cDataHelper d;
   tU8                   data[4];
   cInitData()
   {
      memset(data, 0, sizeof(data));
   }
} EmtyString;

//= {{0}};


cStrUtf8::tenLanguage cStrUtf8::_lang = DEFAULT;
unsigned cStrUtf8::fact_LATIN, cStrUtf8::fact_RUS, cStrUtf8::fact_GRE, cStrUtf8::fact_THA, cStrUtf8::fact_ARA;

// Construction/Destruction
cStrUtf8::cStrUtf8()
{
   Init();
}


void cStrUtf8::AssignStringPtr(tU8* s)
{
   UTF8_LOCK();
   _pUtf8String = s;
   pMgt()->_nRefCnt++;
   UTF8_UNLOCK();
}


// Construction/Destruction
cStrUtf8::cStrUtf8(const cStrUtf8& stringSrc)
{
   if (stringSrc.getRefCnt() == 0)
   {
      HMI_APP_ASSERT_ALWAYS();
   }

   if (stringSrc.getRefCnt() >= 0)
   {
      if (stringSrc.pMgt() == &EmtyString.d)
      {
         HMI_APP_ASSERT_ALWAYS();
      }
      AssignStringPtr(stringSrc._pUtf8String);
      return;
   }
   Init();
   *this = stringSrc._pUtf8String;
}


// Construction/Destruction
cStrUtf8::cStrUtf8(const tU8* pszUtf8)
{
   Init();
   size_t nLen = u8StrLen(pszUtf8);
   if (nLen != 0)
   {
      _pUtf8String = AllocMem(nLen)->data();
      if (_pUtf8String)
      {
         memcpy(_pUtf8String, pszUtf8, nLen * sizeof(tU8));
         _pUtf8String[nLen] = '\0';
      }
   }
   else
   {
      _pUtf8String = EmtyString.data;
   }
}


// Construction/Destruction
cStrUtf8::cStrUtf8(const cStrUtf8& stringSrc, int nCharacters)
{
   Init();
   int n = stringSrc.Characters();
   if (nCharacters > n)
   {
      nCharacters = n;
   }
   *this = stringSrc.substr(0, nCharacters);
}


// Construction/Destruction
cStrUtf8::cStrUtf8(tUniCode32 code)
{
   Init();

   tU8 sUtf8String[8];

   if (code < 0x80)
   {
      sUtf8String[0] = (tU8) code;
      sUtf8String[1] = '\0';
   }
   else if (code < 0x800)
   {
      sUtf8String[0] = (tU8)(0xC0 | (code >> 6));
      sUtf8String[1] = (tU8)(0x80 | (code & 0x3F));
      sUtf8String[2] = '\0';
   }
   else if (code < 0x10000)
   {
      sUtf8String[0] = (tU8)(0xE0 | (code >> 12));
      sUtf8String[1] = (tU8)(0x80 | ((code >> 6) & 0x3F));
      sUtf8String[2] = (tU8)(0x80 | (code & 0x3F));
      sUtf8String[3] = '\0';
   }
   else if (code < 0x110000)    // max Unicode: (http://en.wikipedia.org/wiki/UTF-8) In November 2003,UTF-8 was restricted by RFC 3629 to end at U+10FFFF
   {
      sUtf8String[0] = (tU8)(0xF0 | (code >> 18));
      sUtf8String[1] = (tU8)(0x80 | ((code >> 12) & 0x3F));
      sUtf8String[2] = (tU8)(0x80 | ((code >> 6) & 0x3F));
      sUtf8String[3] = (tU8)(0x80 | (code & 0x3F));
      sUtf8String[4] = '\0';
   }
   else
   {
      sUtf8String[0] = '\0';
   }
   *this = sUtf8String;
}


// Construction/Destruction
cStrUtf8::cStrUtf8(const std::string& str)
{
   Init();
   *this = IMP_ASCII_STR(str.c_str());
}


// Construction/Destruction
void cStrUtf8::Init()
{
   _pUtf8String = EmtyString.data;
}


// Construction/Destruction
cStrUtf8::~cStrUtf8()
{
   if (pMgt() != &EmtyString.d)
   {
      UTF8_LOCK();
      pMgt()->_nRefCnt--;
      if (getRefCnt() <= 0)
      {
         delete /*(tU8*)*/pMgt();
      }
      UTF8_UNLOCK();
   }
}


// overloaded assignment
cStrUtf8& cStrUtf8::operator=(const cStrUtf8& stringSrc)
{
   if (&stringSrc != this && _pUtf8String != stringSrc._pUtf8String)
   {
      if ((getRefCnt() < 0 && pMgt() != &EmtyString.d) ||
            stringSrc.getRefCnt() < 0)
      {
         // actual copy necessary since one of the strings is locked
         AssignCopy(stringSrc.pMgt()->_nStrLen, stringSrc._pUtf8String);
      }
      else
      {
         // can just copy references around
         UTF8_LOCK();
         Release();
         UTF8_UNLOCK();
         if (stringSrc.pMgt() == &EmtyString.d)
         {
            HMI_APP_ASSERT_ALWAYS();
         }
         AssignStringPtr(stringSrc._pUtf8String);
      }
   }
   return *this;
}


// overloaded assignment
const cStrUtf8& cStrUtf8::operator=(const std::string& stringSrc)
{
   AssignCopy(stringSrc.size(), IMP_ASCII_STR(stringSrc.c_str()));
   return *this;
}


// overloaded assignment
const cStrUtf8& cStrUtf8::operator=(const tU8* pszUtf8)
{
   AssignCopy(u8StrLen(pszUtf8), pszUtf8);
   return *this;
}


// append, concatenation
cStrUtf8  operator+(const cStrUtf8& string1, const cStrUtf8& string2)
{
   cStrUtf8 s;
   s.ConcatCopy(string1.pMgt()->_nStrLen, string1._pUtf8String,
                string2.pMgt()->_nStrLen, string2._pUtf8String);
   return s;
}


// append, concatenation
cStrUtf8  operator+(const cStrUtf8& string, const tU8* pszUtf8)
{
   cStrUtf8 s;
   s.ConcatCopy(string.pMgt()->_nStrLen, string._pUtf8String,
                cStrUtf8::u8StrLen(pszUtf8), pszUtf8);
   return s;
}


// append, concatenation
cStrUtf8 operator+(const tU8* pszUtf8, const cStrUtf8& string)
{
   cStrUtf8 s;
   s.ConcatCopy(cStrUtf8::u8StrLen(pszUtf8), pszUtf8,
                string.pMgt()->_nStrLen, string._pUtf8String);
   return s;
}


// append, concatenation
cStrUtf8 operator+(const cStrUtf8& string, tUniCode32 code)
{
   cStrUtf8 s;
   cStrUtf8 c(code);
   s.ConcatCopy(string.pMgt()->_nStrLen, string._pUtf8String,
                c.pMgt()->_nStrLen, c._pUtf8String);
   return s;
}


// append, concatenation
cStrUtf8 operator+(tUniCode32 code, const cStrUtf8& string)
{
   cStrUtf8 c(code);
   cStrUtf8 s;
   s.ConcatCopy(c.pMgt()->_nStrLen, c._pUtf8String,
                string.pMgt()->_nStrLen, string._pUtf8String);
   return s;
}


// append, concatenation
const cStrUtf8& cStrUtf8::operator+=(const tU8* pszUtf8)
{
   ConcatInPlace(u8StrLen(pszUtf8), pszUtf8);
   return *this;
}


// append, concatenation
const cStrUtf8& cStrUtf8::operator+=(const cStrUtf8& string)
{
   ConcatInPlace(string.pMgt()->_nStrLen, string._pUtf8String);
   return *this;
}


// append, concatenation
const cStrUtf8& cStrUtf8::operator+=(tUniCode32 code)
{
   cStrUtf8 add(code);
   ConcatInPlace(add.pMgt()->_nStrLen, add._pUtf8String);
   return *this;
}


// returns the number of characters in the string
int cStrUtf8::Characters() const
{
   int nCount = 0;
   cUtf8StringIter itU8 = itBegin();

   while (itU8 != itEnd())
   {
      itU8 = itNext(itU8);
      nCount++;
   }
   return nCount;
}


// grow up the buffer for the string and split if multiple references
void cStrUtf8::MakeCopyBeforeModify(size_t nNewLen)
{
   // if multiple references or more mem needed ...
   if (getRefCnt() > 1 || nNewLen > pMgt()->_nAllocLen)
   {
      UTF8_LOCK();
      cDataHelper* pOldData = pMgt();
      size_t nOrgLen = pMgt()->_nStrLen;
      if (nNewLen < nOrgLen)
      {
         nNewLen = nOrgLen;
      }
      AllocMem(nNewLen);
      memcpy(_pUtf8String, pOldData->data(), (nOrgLen + 1)*sizeof(tU8));
      pMgt()->_nStrLen = nOrgLen;
      cStrUtf8::Release(pOldData);
      UTF8_UNLOCK();
   }
   if (getRefCnt() > 1)
   {
      HMI_APP_ASSERT_ALWAYS();
   }
   HMI_APP_ASSERT((_pUtf8String != NULL));
}


// duplicate string for modifications if multiple references
void cStrUtf8::MakeCopyBeforeModify()
{
   if (getRefCnt() > 1)
   {
      MakeCopyBeforeModify(pMgt()->_nStrLen);
   }
}


// free unused memory of self
void cStrUtf8::FreeUnusedSpace()
{
   if (pMgt()->_nStrLen > pMgt()->_nAllocLen)
   {
      HMI_APP_ASSERT_ALWAYS();
   }
   if (pMgt()->_nStrLen < pMgt()->_nAllocLen)
   {
      cDataHelper* pOldData = pMgt();
      AllocMem(pOldData->_nStrLen);
      UTF8_LOCK();
      memcpy(_pUtf8String, pOldData->data(), (pOldData->_nStrLen + 1)*sizeof(tU8));
      if (_pUtf8String[pMgt()->_nStrLen] != '\0')
      {
         HMI_APP_ASSERT_ALWAYS();
      }
      cStrUtf8::Release(pOldData);
      UTF8_UNLOCK();
   }
   if (pMgt() == NULL)
   {
      HMI_APP_ASSERT_ALWAYS();
   }
}


// Calculates the 32 bit unicode character pointed by iterator.
tUniCode32 cStrUtf8::at(cUtf8StringIter iter) const
{
   if (_pUtf8String == 0 || IsEmpty())
   {
      return 0;
   }
   // Check out-of-bounds access.
   if (iter >= itEnd())
   {
      if (iter > itEnd())
      {
         HMI_APP_ASSERT_ALWAYS();   // "ERROR: Access beyond end in cStrUtf8::ulwGetU8Code
      }
      return 0;
   }

   if (_pUtf8String[iter] < 0x80)
   {
      // The most common case: an ASCII symbol in the range 0 to 127.
      return _pUtf8String[iter];
   }

   tUniCode32      result;
   unsigned short  numBytes, idx;
   // Is a multibyte (or an invalid second-byte). Count size.
   tU8 ubB = _pUtf8String[iter];
   for (numBytes = 1; numBytes < 8; numBytes++)
   {
      if (!(ubB & (0x80 >> numBytes)))
      {
         break;
      }
   }
   if (numBytes <= 1 || numBytes >= 8)
   {
      // Its an invalid second-byte (or or an 0xFF byte).
      // "Bad UTF-8 String or bad iterator. Found byte 0x02X." ,(unsigned int)ubB);
      HMI_APP_ASSERT_ALWAYS();
      return '!';
   }
   if (numBytes > (Length() - iter))
   {
      // The multi-byte character would exceed the remaining bytes in the buffer. This is bad.
      // "Bad UTF-8 String or bad iterator. Not enough buffer content for a %u-byte symbol." ,(unsigned int)numBytes);
      HMI_APP_ASSERT_ALWAYS();
      return '!';
   }
   // Assemble the result and return it.
   result = ubB & (0xFFU >> numBytes);
   for (idx = 1 ; idx < numBytes ; idx++)
   {
      result = (result << 6) | (_pUtf8String[iter + idx] & 0x3F) ;
   }
   return result;
}


// Calculates the 32 bit unicode character pointed by iterator.
tUniCode32 cStrUtf8::operator[](cUtf8StringIter iter) const
{
   return at(iter);
}


// Calculates the 32 bit unicode character pointed by charecter position
tUniCode32 cStrUtf8::at(int nCharacterPos) const
{
   return at(itSeek(nCharacterPos));
}


// Calculates the 32 bit unicode character pointed by charecter position
tUniCode32 cStrUtf8::operator[](int nCharacterPos) const
{
   return at(itSeek(nCharacterPos));
}


// Empty the string
void cStrUtf8::Clear()
{
   static tU8 datNil[] = "\0";
   if (pMgt()->_nStrLen == 0)
   {
      return;
   }
   if (getRefCnt() >= 0)
   {
      UTF8_LOCK();
      Release();
      UTF8_UNLOCK();
      return;
   }
   *this = datNil;
   if (getRefCnt() < 0 || pMgt()->_nAllocLen == 0)
   {
      return;
   }
   HMI_APP_ASSERT_ALWAYS();
}


// set function, same as snprintf ...
void cStrUtf8::Format(const char* lpszFormat, ...)
{
   Clear();

   va_list argp;
   va_start(argp, lpszFormat);

   for (size_t nBufLen = 200; nBufLen > 0;)
   {
      char* pBuf = new char [nBufLen];
      if (pBuf != NULL)
      {
         int ii = vsnprintf(pBuf, nBufLen - 2, lpszFormat, argp);
         if (ii >= 0)
         {
            // ok fits
            *this = IMP_ASCII_STR(pBuf);
            delete [] pBuf;
            va_end(argp);
            return;
         }
         delete [] pBuf;
      }
      nBufLen *= 2;   // next step
      if (nBufLen > 10000)
      {
         break;
      }
   }
   va_end(argp);
}


// generate hex string with blanks "0A 0B 01 05"
void cStrUtf8::SetHex(const tU8* pszUtf8, bool bWithBlanks)
{
   Clear();
   size_t len = (u8StrLen(pszUtf8) + 1) * (bWithBlanks ? 3 : 2); // "0A 0B 01 05"
   AllocMem(len);
   char buf[100];
   SNPRINTF(buf, 4, "%02x ", pszUtf8[0]);
   for (size_t i = 0; pszUtf8[i] != '\0' && len >= 3; i++)
   {
      len -= static_cast<size_t>(SNPRINTF((char*)&_pUtf8String[i * 3], len, bWithBlanks ? "%02x " : "%02x", pszUtf8[i]));
   }
   pMgt()->_nStrLen = u8StrLen(_pUtf8String);
   if (pMgt()->_nStrLen > 0)
   {
      _pUtf8String[pMgt()->_nStrLen - 1] = '\0';
   }
}


// Sets the string content to the zero terminated string
void cStrUtf8::SetUtf8(const tU8* pubCStr)
{
   Clear();
   Append(pubCStr);
}


// Encode a string of 16 bit symbols to UTF-8.
void cStrUtf8::SetUtf16(const unsigned short* pUTF16_string)
{
   // Conversion done in here. Will cut string if buffer is too short.
   Clear();
   MakeCopyBeforeModify(strlen((const char*)pUTF16_string) * 2);

   char* pDestBuffer = (char*)(_pUtf8String);

   char*   wr, * pEnd;
   if (pMgt()->_nAllocLen == 0)
   {
      HMI_APP_ASSERT_ALWAYS();   // make sure a useful value is passed int
   }
   wr = pDestBuffer;
   pEnd = pDestBuffer + pMgt()->_nAllocLen;
   // loop 16 bit symbols.
   while (wr + 1 < pEnd)
   {
      // Get symbol.
      unsigned short ch = *(pUTF16_string++);
      // null-terminator?
      if (!ch)
      {
         break;
      }
      // check type.
      if (ch < 0x80u)
      {
         // Encode as one byte.
         *(wr++) = (tU8) ch;
      }
      else if (ch < 0x0800u)
      {
         // Encode as two bytes.
         if (wr + 2 >= pEnd)
         {
            break;
         } // would not fit.
         *(wr++) = (tU8)((ch >> 6) | 0xC0u);
         *(wr++) = (tU8)((ch & 0x3Fu) | 0x80u);
      }
      else
      {
         // Encode as three bytes.
         if (wr + 3 >= pEnd)
         {
            break;
         } // would not fit.
         *(wr++) = (tU8)((ch >> 12) | 0xE0u);
         *(wr++) = (tU8)(((ch >> 6) & 0x3Fu) | 0x80u);
         *(wr++) = (tU8)((ch & 0x3Fu) | 0x80u);
      }
      // 16 bit input chars can never encode to 4-byte UTF-8.
   }
   // null-terminate output string.
   *wr = 0;
   size_t ll = static_cast<size_t>(wr - pDestBuffer);
   pMgt()->_nStrLen = ll;
}


// returns an iterator to the first character of the string.
cUtf8StringIter cStrUtf8::itBegin() const
{
   return 0;
}


// returns an iterator to the character position behind the
// last character of the string.
cUtf8StringIter cStrUtf8::itEnd() const
{
   return pMgt()->_nStrLen;
}


// returns an iterator to the next character position relative
// to the given iterator.
cUtf8StringIter cStrUtf8::itNext(const cUtf8StringIter iter) const
{
   if (_pUtf8String == 0 || IsEmpty() || iter >= pMgt()->_nStrLen)
   {
      return itEnd();
   }
   if ((_pUtf8String[iter] & 0xC0) == 0x80)
   {
      // Oops: not a starting byte */
      //FixString(); // special case : function is here not const !!! very dirty hack
      HMI_APP_ASSERT_ALWAYS();
      return itEnd();
   }
   tU8 dd = _pUtf8String[iter];
   size_t nCharByteLength = GetCharacterByteLength(dd);
   if (nCharByteLength > 4)
   {
      // not supported
      return itEnd();
   }
   if (iter >= pMgt()->_nStrLen)
   {
      // Oops, ouside the string
      HMI_APP_ASSERT_ALWAYS();
      return itEnd();
   }
   return iter + nCharByteLength;
}


// returns an iterator to the previous character position relative
// to the given iterator.
cUtf8StringIter cStrUtf8::itPrev(const cUtf8StringIter iter2) const
{
   if (_pUtf8String == 0 || IsEmpty())
   {
      return itBegin();   // should be never happened, but saftey leave the loop
   }
   cUtf8StringIter iter = iter2;
   do
   {
      if (iter == 0) // itBegin
      {
         HMI_APP_ASSERT_ALWAYS();  /* iterator can't be moved in front of string */
         return itBegin();
      }
      iter--;
   }
   while ((_pUtf8String[iter] >= 0x80) && ((_pUtf8String[iter] & 0xC0) == 0x80));

   if ((_pUtf8String[iter] >= 0xF8)
         || ((iter + GetCharacterByteLength(_pUtf8String[iter])) != iter2))
   {
      // not supported code || buggy string
      HMI_APP_ASSERT_ALWAYS();
      return itBegin();
   }
   return iter;
}


// returns an iterator to the character position from begin
cUtf8StringIter cStrUtf8::itSeek(int nCharacters) const
{
   cUtf8StringIter itU8 = itBegin();
   while (itU8 != itEnd() && nCharacters > 0)
   {
      itU8 = itNext(itU8);
      nCharacters--;
   }
   return itU8;
}


// returns a new string filled with n characters from beginning
cStrUtf8 cStrUtf8::Left(int nCharacters) const
{
   return substr(0, nCharacters);
}


// returns a new string filled with n characters from mid
cStrUtf8 cStrUtf8::Mid(cUtf8StringIter iter, int nCharacters) const
{
   return substr(iter, nCharacters);
}


// returns a new string filled with n characters from mid
cStrUtf8 cStrUtf8::Mid(int CharacterPos, int nCharacters) const
{
   return substr(CharacterPos, nCharacters);
}


// returns a new string filled with n characters from the end
cStrUtf8 cStrUtf8::Right(int nCharacters) const
{
   int nCount = nCharacters;
   cUtf8StringIter itU8 = itEnd();

   while (nCount > 0 && itU8 != itBegin())
   {
      itU8 = itPrev(itU8);
      nCount--;
   }
   return substr(itU8, nCharacters);
}


// returns a new sub-string
cStrUtf8 cStrUtf8::substr(int CharacterPos, int nCharacters) const
{
   cUtf8StringIter iter = itSeek(CharacterPos);
   return substr(iter, nCharacters);
}


// returns a new sub-string
cStrUtf8 cStrUtf8::substr(cUtf8StringIter iter, int nCharacters) const
{
   if (_pUtf8String == 0)
   {
      cStrUtf8 dest;
      dest.Init();
      return dest;
   }
   cUtf8StringIter nFirst = iter;

   //if (iter < 0)   // lint: always unsigned
   //{
   //   iter = 0;
   //}

   if (iter == itEnd())
   {
      HMI_APP_ASSERT_ALWAYS();
   }

   if (nCharacters < 0)
   {
      nCharacters = 0;
   }

   size_t copyLen = 0;
   for (unsigned int nChars = static_cast<unsigned int>(nCharacters); nChars > 0 && iter < itEnd(); nChars--)
   {
      size_t len = GetCharacterByteLength(_pUtf8String[iter]);
      copyLen += len;
      iter    += len;
   }
   if ((nFirst + copyLen) > (pMgt()->_nStrLen))
   {
      HMI_APP_ASSERT_ALWAYS();
   }

   if (nFirst == 0 && nFirst + copyLen == pMgt()->_nStrLen)
   {
      return *this;
   }
   cStrUtf8 dest;
   CreateCopyFromThis(dest, nFirst, copyLen);
   return dest;
}


void cStrUtf8::MakeUpper()
{
   if (_pUtf8String == 0)
   {
      return;
   }
   // todo: bisher nur f�r Standard latin chars
   cUtf8StringIter iter = itBegin();
   while (iter != itEnd())
   {
      tUniCode32 code = at(iter);
      if (code >= 'a' && code <= 'z')
      {
         tUniCode32 upper = code - 0x20;
         _pUtf8String[iter] = static_cast<tU8>(upper);
      }
      iter = itNext(iter);
   }
}


void cStrUtf8::MakeLower()
{
   if (_pUtf8String == 0)
   {
      return;
   }
   // todo: bisher nur f�r Standard latin chars
   cUtf8StringIter iter = itBegin();
   while (iter != itEnd())
   {
      tUniCode32 code = at(iter);
      if (code >= 'A' && code <= 'Z')
      {
         tUniCode32 lower = code + 0x20;
         _pUtf8String[iter] = static_cast<tU8>(lower);
      }
      iter = itNext(iter);
   }
}


// If the character is found returns an iterator to the
// character, otherwise returns an iterator to the end of the string.
cUtf8StringIter cStrUtf8::itFind(tUniCode32 code, cUtf8StringIter iter) const
{
   if (_pUtf8String == 0 || IsEmpty())
   {
      return itEnd();   // should be never happened, but saftey leave the loop
   }
   if (bIsPartOfMultiByteChar(_pUtf8String[iter]) == true)
   {
      // Oops
      HMI_APP_ASSERT_ALWAYS();
      return itEnd();
   }
   cStrUtf8 oSearch(code);
   return itFind(oSearch._pUtf8String, 1);
}


// If the character is found returns an iterator to the
// character, otherwise returns an iterator to the end of the string.
cUtf8StringIter cStrUtf8::itFind(const cStrUtf8& oSearch, int nCharacters) const
{
   if (nCharacters < 0)
   {
      HMI_APP_ASSERT_ALWAYS();
      return itEnd();
   }
   if (oSearch.IsEmpty() || IsEmpty())
   {
      return itEnd();
   }
   cUtf8StringIter iter = itSeek(nCharacters);
   if (iter == itEnd())
   {
      return itEnd();
   }
   return StrStrIter(oSearch, static_cast<size_t>(nCharacters));
}


// Deletes last character
void cStrUtf8::DeleteLastCharacter()
{
   if (!IsEmpty() && pMgt()->_nStrLen >= 1)
   {
      MakeCopyBeforeModify();
      pMgt()->_nStrLen = (size_t) itPrev(itEnd());
      _pUtf8String[pMgt()->_nStrLen] = '\0';
   }
}


// Deletes character at iter
void cStrUtf8::DeleteCharacter(cUtf8StringIter itPosition)
{
   if (!IsEmpty() && (itPosition < Length()))
   {
      MakeCopyBeforeModify();

      cUtf8StringIter itNextPosition = itNext(itPosition);
      memmove(
         &_pUtf8String[itPosition],
         &_pUtf8String[itNextPosition],
         (itEnd() - itNextPosition) + 1);
      // new length is the old length - length off deleted letter
      pMgt()->_nStrLen -= (size_t)(itNextPosition - itPosition);
   }
}


// Insert string at position in self
void cStrUtf8::Insert(const cUtf8StringIter itPosition, const cStrUtf8& strIns)
{
   cUtf8StringIter iter = itPosition;
   if (iter < itBegin())
   {
      iter = itBegin();
   }
   if (iter > Length())
   {
      iter = itEnd();
   }

   size_t nNewLen = Length() + strIns.Length() + 1;
   MakeCopyBeforeModify(nNewLen);

   // keep last part of the string
   cStrUtf8 str2(&_pUtf8String[iter]);
   memcpy(&_pUtf8String[iter], strIns._pUtf8String, strIns.Length());
   memcpy(&_pUtf8String[iter + strIns.Length()], str2._pUtf8String, str2.Length() + 1);
}


// Replace character at position
void cStrUtf8::SetAt(cUtf8StringIter iter, tUniCode32 code)
{
   if (iter >= (cUtf8StringIter)pMgt()->_nStrLen)
   {
      HMI_APP_ASSERT_ALWAYS();
   }

   MakeCopyBeforeModify();
   DeleteCharacter(iter);
   Insert(iter, cStrUtf8(code));
}


// replace character at position
void cStrUtf8::SetAt(int CharacterPos, tUniCode32 code)
{
   cUtf8StringIter iter = itSeek(CharacterPos);
   SetAt(iter, code);
}


// remove blanks at begining of the string
void cStrUtf8::TrimLeft()
{
   if (_pUtf8String == 0 || IsEmpty() || _pUtf8String[0] != ' ')
   {
      return;
   }
   MakeCopyBeforeModify();
   while (_pUtf8String[0] == ' ')
   {
      _pUtf8String[0] = '\0';
      memmove(_pUtf8String, _pUtf8String + 1, pMgt()->_nStrLen);
      pMgt()->_nStrLen--;
   }
}


// cut of blanks at the end of the string
void cStrUtf8::TrimRight()
{
   if (_pUtf8String == 0)
   {
      return;
   }

   cUtf8StringIter pos = itPrev(itEnd());
   if (_pUtf8String[pos] != ' ')
   {
      return;
   }
   MakeCopyBeforeModify();
   while (_pUtf8String[pos] == ' ')
   {
      _pUtf8String[pos] = '\0';
      pMgt()->_nStrLen = pos;
      pos = itPrev(itEnd());
   }
}


// Check for a correct UTF-8 string and returns true if the string is valid
bool cStrUtf8::CheckValid() const
{
   if ((_pUtf8String == 0) || (IsEmpty()))
   {
      return false;
   }
   tU8* p = _pUtf8String;

   unsigned int cnt, t; //   Warning 661: Possible access of out-of-bounds pointer (1 beyond end of data) by operator 'unary *' lines 69, 135, 894, 932]
   while ((p != 0) && *p)
   {
      if (*p >= 0xC0)
      {
         // Do not allow a 0xFF.
         if (*p == 0xFFu)
         {
            return false;
         }
         // Is a multi-byte symbol. Count bytes needed for this symbol
         for (cnt = 2; cnt < 7; cnt++)
         {
            if (!(*p & (0x80 >> cnt)))
            {
               break;
            }
         }
         // now we need this number of corret continue-bytes.
         for (t = 1; t < cnt; t++)
         {
            if (((p[t]) & 0xC0u) != 0x80)
            {
               return false;
            }
         }
         // skip to next.
         for (unsigned int ii = 0; ii < cnt && *p; ii++)
         {
            p++;   // lint : p += cnt;
         }
         continue;
      }
      if (*p >= 0x80)
      {
         return false;
      }   // unexpected continue-byte
      // Otherwise, its a normal character.
      p++;
   }
   return true;
}


// Check for a correct UTF-8 string. If any errors are detected,
// overwrite bad bytes with '?'. Returns true if any byte was changed.
bool cStrUtf8::FixString()
{
   if (_pUtf8String == 0)
   {
      return false;
   }

   tU8* p = _pUtf8String;
   unsigned int cnt, t;
   bool result = false;
   while ((p != 0) && *p)
   {
      if (*p >= 0xC0)
      {
         // Is a multi-byte symbol. Count bytes needed for this symbol
         for (cnt = 2; cnt < 7; cnt++)
            if (!(*p & (0x80 >> cnt)))
            {
               break;
            }
         // now we need this number of corret continue-bytes.
         for (t = 1; t < cnt; t++)
         {
            if (((p[t]) & 0xC0u) != 0x80)
            {
               // bad follow-up sequence
               *p = '?';       // overwrite first byte (!) with a question mark. Follow-up bytes will be overwritten later in the loop.
               cnt = 1;
               result = true;
               break;
            }
         }
         // skip to next.
         for (unsigned int ii = 0; ii < cnt && *p; ii++)
         {
            p++;   // lint : p += cnt;
         }
         continue;
      }
      else if (*p >= 0x80)
      {
         // unexpected second byte
         *p = '?';
         result = true;
      }
      // Otherwise, its a normal character.
      p++;
   }
   return result;
}


////////////////////////////////////////////////////////////////////////////////////
// private functions/methods
////////////////////////////////////////////////////////////////////////////////////

// allocate memory with no consideration for already allocated memory
cStrUtf8::cDataHelper* cStrUtf8::AllocMem(size_t nSize)
{
   HMI_APP_ASSERT((nSize <= _STRUTF8MAXLEN));
   if (nSize == 0)
   {
      Init();
      return &EmtyString.d;
   }
   // always allocate one extra character for '\0' termination
   cDataHelper* pData = reinterpret_cast<cDataHelper*>(new tU8[sizeof(cDataHelper) + (nSize + 1)*sizeof(tU8)]);
   if (pData == 0)
   {
      return &EmtyString.d;
   }
   pData->_nRefCnt = 1;
   pData->_nStrLen = nSize;
   pData->_nAllocLen = nSize;
   _pUtf8String = pData->data();
   _pUtf8String[0] = '\0';
   _pUtf8String[nSize] = '\0';
   return pData;
}


void cStrUtf8::AllocBeforeWrite(size_t nLen)
{
   if (getRefCnt() > 1 || nLen > pMgt()->_nAllocLen)
   {
      UTF8_LOCK();
      Release();
      UTF8_UNLOCK();
      AllocMem(nLen);
   }
   if (getRefCnt() > 1)
   {
      HMI_APP_ASSERT_ALWAYS();
   }
}


// release the obejct and free memory perhaps
bool cStrUtf8::Release(cDataHelper* pData)
{
   if (pData == &EmtyString.d)
   {
      return false;
   }
   HMI_APP_ASSERT((pData->_nRefCnt != 0));
   pData->_nRefCnt--;
   if (pData->_nRefCnt <= 0)
   {
      delete[](tU8*)pData;
   }
   return true;
}


// release the obejct and free memory perhaps
void cStrUtf8::Release()
{
   if (Release(pMgt()))
   {
      Init();
   }
}


// will clone the data attached to this string and places results in  'dest'
void cStrUtf8::CreateCopyFromThis(cStrUtf8& dest, cUtf8StringIter nStart, size_t nLength) const
{
   if ((nLength == 0) || (_pUtf8String == 0))
   {
      dest.Init();
      return;
   }
   dest.AllocMem(nLength);
   memcpy(dest._pUtf8String, _pUtf8String + nStart, nLength * sizeof(tU8));
   dest._pUtf8String[nLength * sizeof(tU8)] = '\0';
}


// assign a new value to the string
void cStrUtf8::AssignCopy(size_t nSrcLen, const tU8* lpszSrcData)
{
   if (nSrcLen == 0)
   {
      Init();
      return;
   }
   AllocBeforeWrite(nSrcLen);

   if (_pUtf8String == 0)
   {
      return;
   }
   memcpy(_pUtf8String, lpszSrcData, nSrcLen * sizeof(tU8));
   pMgt()->_nStrLen = nSrcLen;
   _pUtf8String[nSrcLen] = '\0';
}


//  "operator+" is done as friend functions for simplicity
//  !! expects, that self object is just initialized or saved memory ptr;
void cStrUtf8::ConcatCopy(size_t nSrc1Len, const tU8* lpszSrc1Data,
                          size_t nSrc2Len, const tU8* lpszSrc2Data)
{
   size_t nLen = nSrc1Len + nSrc2Len;
   if (nLen != 0)
   {
      AllocMem(nLen);
      memcpy(_pUtf8String, lpszSrc1Data, nSrc1Len * sizeof(tU8));
      memcpy(_pUtf8String + nSrc1Len, lpszSrc2Data, nSrc2Len * sizeof(tU8));
      _pUtf8String[nLen] = '\0';
   }
}


void cStrUtf8::ConcatInPlace(size_t nSrcLen, const tU8* lpszSrcData)
{
   // concatenating an empty string is a no-op!
   if (nSrcLen == 0)
   {
      return;
   }

   // allocate a new buffer if more than 2 objects connected
   // allocate a new buffer if it doesn't fit
   bool bNewBufferNecessaryOrMoreRefs = getRefCnt() > 1
                                        || (pMgt()->_nStrLen + nSrcLen) > pMgt()->_nAllocLen;
   if (bNewBufferNecessaryOrMoreRefs)
   {
      UTF8_LOCK();
      cDataHelper* pOldData = pMgt();
      HMI_APP_ASSERT((pOldData != NULL));
      ConcatCopy(pMgt()->_nStrLen, _pUtf8String, nSrcLen, lpszSrcData);
      cStrUtf8::Release(pOldData);
      UTF8_UNLOCK();
   }
   else
   {
      if (_pUtf8String == 0)
      {
         return;
      }
      // fits! fast concatenation when buffer big enough
      memcpy(_pUtf8String + pMgt()->_nStrLen, lpszSrcData, nSrcLen * sizeof(tU8));
      pMgt()->_nStrLen += nSrcLen;
      if ((pMgt()->_nStrLen > pMgt()->_nAllocLen))
      {
         HMI_APP_ASSERT_ALWAYS();
      }
      _pUtf8String[pMgt()->_nStrLen] = '\0';
   }
}


// find substring
cUtf8StringIter cStrUtf8::StrStrIter(const cStrUtf8& oSearch, size_t nCharacters) const
{
   if (oSearch.IsEmpty() || IsEmpty() || nCharacters == 0)
   {
      return itEnd();
   }

   cUtf8StringIter iterSrc = itBegin();

   while (iterSrc != itEnd())
   {
      cUtf8StringIter iter1 = iterSrc;
      cUtf8StringIter iter2 = oSearch.itBegin();
      for (unsigned int i = 1; i <= nCharacters; i++)
      {
         //end of this string reached, so it is shorter, so this string < the other
         if (iter1 == itEnd() || iter2 == oSearch.itEnd())
         {
            return itEnd();
         }

         tUniCode32 uu1 = at(iter1);
         tUniCode32 uu2 = oSearch.at(iter2);

         if (uu1 != uu2)
         {
            break;
         }
         iter1 = itNext(iter1);
         iter2 = oSearch.itNext(iter2);

         if (i == nCharacters)
         {
            return iterSrc;   // found
         }
      }
      iterSrc = itNext(iterSrc);
   }
   return itEnd();
}


/// static local methods ///////////////////////////////////////////////////////////////

// returns true if byte is part of a multibyte and not startbyte
static bool bIsPartOfMultiByteChar(tU8 u8AnyByte)
{
   return ((u8AnyByte >= 0xC0 || u8AnyByte < 0x80) == false);
}


// returns the number of bytes the character pointed toby pubUtf8Char consists
// of. A UTF8 character may consists of 1 .. 4 bytes in this implementation.
static size_t GetCharacterByteLength(tU8 u8StartByte)
{
   if ((u8StartByte == 0) || bIsPartOfMultiByteChar(u8StartByte))
   {
      HMI_APP_ASSERT_ALWAYS();
      return 0;
   }
   if (u8StartByte < 0xC0)
   {
      // standard ascii
      return 1;
   }
   if (u8StartByte < 0xE0)
   {
      // 2  byte MultiByteChar
      return 2;
   }
   if (u8StartByte < 0xF0)
   {
      // 3 byte MultiByteChar
      return 3;
   }
   if (u8StartByte < 0xF8)
   {
      // 4 byte MultiByteChar
      HMI_APP_ASSERT(u8StartByte < 0xF5); // max unicode: (http://en.wikipedia.org/wiki/UTF-8) In November 2003,UTF-8 was restricted by RFC 3629 to end at U+10FFFF
      return 4;                           // So for a complete check 2. byte would be necessary. if leading byte == 0xF4 the 1. continuation byte must be < 0x90
   }
   if (u8StartByte < 0xFC)
   {
      // fifth bit is set, invalid unicode range
      HMI_APP_ASSERT_ALWAYS();
      return 5;
   }
   // sixth bit is set, invalid unicode range
   HMI_APP_ASSERT_ALWAYS();
   return 6;
}


// static
size_t cStrUtf8::u8StrLen(const tU8* pszUtf8)
{
   return (pszUtf8 == NULL) ? 0 : strlen((const char*)pszUtf8);
}


tUniCode32 cStrUtf8::GetWeightedValueFromUniCode(tUniCode32 code) const
{
   if (code == 0)
   {
      return 0;
   }
   // latin chars
   if (code >= 0x20 && code <= 0x7F)
   {
      // latin base linear
      const unsigned short weight_tab_latin_0x20[] =
      {
         8, 292, 293, 248, 274, 249, 250, 294, 295, 296, 251, 252, 297, 253, 298, 299, 11, 14, 17, 20, 23, 26, 29, 32, 35, 38, 300, 301, 254,
         255, 256, 302, 257, 41, 63, 65, 74, 81, 99, 103, 108, 112, 130, 132, 136, 146, 148, 158, 178, 180, 182, 190, 200, 205, 226, 228, 230,
         232, 238, 303, 304, 305, 1, 9, 2, 52, 64, 69, 76, 90, 100, 105, 109, 119, 131, 134, 141, 147, 153, 168, 179, 181, 185, 195, 202, 215,
         227, 229, 231, 235, 242, 306, 307, 308, 309, 500
      };
      return weight_tab_latin_0x20[code - 0x20] + fact_LATIN;
   }
   else if (code >= 0x0A0 && code <= 0x017E)
   {
      // latin Eastern Europe
      const unsigned short weight_tab_latin_0xA0[] =
      {
         10, 310, 273, 275, 272, 276, 311, 258, 5, 259, 285, 312, 260, 7, 261, 6, 262, 263, 282, 283, 3, 289, 264, 265, 4, 279,
         286, 313, 281, 280, 284, 314, 42, 43, 44, 45, 46, 50, 51, 66, 82, 83, 84, 85, 113, 114, 115, 116, 79, 149, 159, 160, 161,
         163, 162, 266, 166, 206, 207, 208, 209, 234, 246, 194, 53, 54, 55, 56, 57, 58, 59, 70, 91, 92, 93, 94, 120, 121, 122,
         123, 80, 154, 169, 170, 171, 172, 173, 267, 174, 216, 217, 218, 219, 236, 247, 237, 48, 60, 49, 61, 47, 62, 68, 71,
         500, 500, 500, 500, 67, 72, 75, 77, 73, 78, 89, 95, 500, 500, 88, 96, 87, 97, 86, 98, 500, 500, 102, 106, 500, 500, 104,
         107, 500, 500, 500, 500, 500, 500, 118, 124, 110, 125, 117, 126, 129, 127, 111, 128, 500, 500, 133, 135, 500, 139,
         142, 140, 143, 138, 144, 500, 500, 137, 145, 151, 155, 152, 156, 150, 157, 500, 500, 500, 165, 175, 500, 500, 164,
         176, 167, 177, 184, 186, 187, 188, 183, 189, 191, 196, 500, 500, 193, 197, 192, 198, 199, 203, 201, 204, 500, 500,
         211, 220, 214, 221, 500, 500, 210, 222, 212, 223, 213, 224, 500, 500, 500, 500, 233, 239, 243, 240, 244, 241, 245
      };
      //   101          0x0192 todo
      //   225          0x01D3 todo
      //   225          0x01D4 todo
      return weight_tab_latin_0xA0[code - 0xA0] + fact_LATIN;
   }

   else if (code >= 0x0E01 && code <= 0x0E5B)
   {
      // Thai linear
      if (code >= 0x0E50 && code <= 0x0E59)
      {
         // Thai Digits linear (sorted above letters)
         return (code - 0x0E50) + fact_THA + 0;
      }
      return (code - 0x0E01) + fact_THA + 500;
   }
   else if (code >= 0x0600 && code <= 0x06FF)
   {
      // Arabic letters linear
      if (code >= 0x0660 && code <= 0x0669)
      {
         // Arabic Indic Digits linear (sorted above letters)
         return (code - 0x0660) + fact_ARA + 0;
      }
      return (code - 0x0600) + fact_ARA + 500;
   }
   else if (code >= 0x0401 && code < 0x0492)
   {
      // Cyrillic Table
      const unsigned char weight_tab_cyrillic_401[] =
      {
         19, 13, 15, 21, 27, 31, 33, 39, 45, 51, 63, 65, 37, 69, 79, 1, 3, 5, 7, 11, 17, 23, 25, 29, 35, 41, 43, 47,
         49, 53, 55, 57, 59, 61, 67, 71, 73, 75, 77, 81, 83, 85, 87, 89, 91, 93, 95, 2, 4, 6, 8, 12, 18, 24, 26, 30,
         36, 42, 44, 48, 50, 54, 56, 58, 60, 62, 68, 72, 74, 76, 78, 82, 84, 86, 88, 90, 92, 94, 96, 255,
         20, 14, 16, 22, 28, 32, 34, 40, 46, 52, 64, 66, 38, 70, 80, 9, 10
      };
      size_t elements = sizeof(weight_tab_cyrillic_401) / sizeof(unsigned char);
      tUniCode32 idx = code - 0x401;
      unsigned char c = idx > elements - 1 ? 255 : weight_tab_cyrillic_401[idx];

      return c + fact_RUS;
   }
   else if (code >= 0x0374 && code < 0x03FF)
   {
      // Greek Table
      const unsigned char weight_tab_greek_374[] =
      {
         240, 241, 255, 255, 255, 255, 243, 255, 255, 255, 250, 255, 255, 255, 255, 255, 255, 255,
         4, 242, 14, 20, 27, 27, 44, 57, 71, 30, 2, 6, 8, 10, 12, 16, 18, 22, 25, 32, 34, 36, 38, 40, 42, 46, 48, 255,
         50, 53, 55, 62, 65, 67, 69, 29, 59, 3, 13, 19, 26, 60, 1, 5, 7, 9, 11, 15, 17, 21, 24, 31, 33, 35, 37, 39, 41,
         45, 47, 51, 49, 52, 54, 61, 64, 66, 68, 28, 58, 43, 56, 70, 255, 255, 23, 255, 255, 255, 63, 255, 245, 248,
         246, 72, 73, 74, 75, 77, 76, 79, 78
      };
      size_t elements = sizeof(weight_tab_greek_374) / sizeof(unsigned char);
      tUniCode32 idx = code - 0x374;
      unsigned char c = idx > elements - 1 ? 255 : weight_tab_greek_374[idx];

      return c + fact_GRE;
   }
   return code + fact_LATIN + 1000; // behind latin characters
}


// returns the number of bytes this character occupies in UTF8 format.
size_t cStrUtf8::GetByteLengthOfUniCodeChar(tUniCode32 code)
{
   if (code < 0x80)
   {
      return 1;
   }
   if (code < 0x800)
   {
      return 2;
   }
   if (code < 0x10000)
   {
      return 3;
   }
   if (code < 0x110000)
   {
      return 4;
   }
   HMI_APP_ASSERT_ALWAYS(); // max Unicode: (http://en.wikipedia.org/wiki/UTF-8) In November 2003,UTF-8 was restricted by RFC 3629 to end at U+10FFFF
   return 0;
}


// Compare function for unicode
int cStrUtf8::Compare(const tU8* pszUtf8) const
{
   return Compare(cStrUtf8(pszUtf8));
}


// Compare function for unicode
int cStrUtf8::Compare(const cStrUtf8& sCmp) const
{
   if (_pUtf8String == NULL || IsEmpty())
   {
      return -1;   // kleiner
   }

   if (sCmp.Length() == pMgt()->_nStrLen)
   {
      if (memcmp(sCmp._pUtf8String, _pUtf8String, pMgt()->_nStrLen) == 0)
      {
         return 0;
      }
   }
   if (sCmp._pUtf8String == NULL || sCmp.IsEmpty())
   {
      return +1;   // gr��er
   }

   cUtf8StringIter iter1 = itBegin();
   cUtf8StringIter iter2 = sCmp.itBegin();

   for (;;)
   {
      //end of this string reached, so it is shorter, so this string < the other
      if ((iter1 == itEnd()) && (iter2 != sCmp.itEnd()))
      {
         return -1;   // kleiner
      }
      if ((iter1 != itEnd()) && (iter2 == sCmp.itEnd()))
      {
         return +1;   // gr��er
      }
      //end of both strings reached, so they are equal, so this is not less than the other
      if ((iter1 != itEnd()) && (iter2 == sCmp.itEnd()))
      {
         return 0;   // gleich
      }

      tUniCode32 uu1 = at(iter1);
      tUniCode32 uu2 = sCmp.at(iter2);

      if (uu1 == uu2)
      {
         iter1 = itNext(iter1);
         iter2 = sCmp.itNext(iter2);
         continue;
      }
      // calculate the weighted value for each code
      return (GetWeightedValueFromUniCode(uu1) < GetWeightedValueFromUniCode(uu2)) ? -1 : +1;
   }
}


// std::sort(&sArray[0],&sArray[n], cStrUtf8::StrUtf8SortFunc)
bool cStrUtf8::StrUtf8SortFunc(const cStrUtf8& a, const cStrUtf8& b)
{
   return a < b;
}


// set language for sort algorithm ( default is latin )
// todo: special eastern europe characters for DAN,FIN,SWE,SLO,POL,CZE and HUN
bool cStrUtf8::setLanguageForSort(cStrUtf8::tenLanguage language)
{
   if (language > DEFAULT && language <= AUS)
   {
      _lang = language;

      fact_LATIN = 0x10000;
      fact_RUS   = 0x20000;
      fact_GRE   = 0x30000;
      fact_THA   = 0x40000;
      fact_ARA   = 0x50000;

      switch (_lang)
      {
         case CZE:
            fact_LATIN = 0;
            break;
         case DAN:
            fact_LATIN = 0;
            break;
         case FIN:
         case SWE:
            fact_LATIN = 0;
            break;
         case NOR:
            fact_LATIN = 0;
            break;
         case POL:
            fact_LATIN = 0;
            break;
         case SLO:
            fact_LATIN = 0;
            break;
         case HUN:
            fact_LATIN = 0;
            break;

         case GRE: // GRI-latin-RUS-THA-ARA
            fact_GRE = 0;
            break;
         case RUS: // RUS-latin-GRE-THA-ARA
            fact_RUS = 0;
            break;
         case THA: // THA-latin-RUS-GRE-ARA
            fact_THA = 0;
            break;
         case ARA: // ARA-latin-RUS-GRE-THA
            fact_ARA = 0;
            break;

         default: /* GER, ENG_US, ESP_LAT, FRA_CAN,_POR,_ITA, DUT, TUR, ENG, FRA, ESP, BRA, AUS */
            fact_LATIN = 0;   // latin-RUS-GRE-THA-ARA
            break;
      }
      return true;
   }
   return false;
}


class cStrUtf8InitHelper
{
   public:
      cStrUtf8InitHelper()
      {
         cStrUtf8::setLanguageForSort(cStrUtf8::DEFAULT);
      }
};


static cStrUtf8InitHelper _initStrUtf8;
}


}
