



/**
 * This function cuts off a faulty UTF8 character.
 * It is possible that bytes of an UTF8 string are cut off when the string is
 * written to a buffer if the buffer is too small. If the last character is
 * coded as more than one byte, there might be some bytes missing at the end.
 * This function cuts off the tailing bytes until the last character is well
 * coded.\n
 * It starts at the last byte of a string and checks the type of byte. There
 * are three cases possible:\n
 * 1st: \nIt is part of a multi byte character (10xxxxxx):
 *      Walk in head direction and count the number of bytes until the first
 *      byte of the multi bytes character and check if its coded size matches.
 *      If not, the last character is cut off (the first byte of the multi byte
 *      character is set to NULL).\n
 * 2nd: \nThe last byte is the first character of a multi byte character (11xxxxxx):
 *      Cut off this byte, because following bytes are missing.\n
 * 3rd: \nThe last byte is a single byte coded character:
 *      --> do nothing
 *
 * @param szString        string to be checked
 * @param u32BufferLength length of the buffer
 * @pre   szString is not NULL
 * @pre   szString was cut off at the end
 * @attention This function only works if the string was cut off at the end!
 * @author CM-DI/ESP2-Brandes
 * @date   21.03.2005
 *
 */


#ifndef UTF8_STRINGUTIL_HEADER
#define UTF8_STRINGUTIL_HEADER

#define C_MULTI_CHARACTER_BYTE  (tU8)0x80
#define C_FIRST_BYTE_OF_MULTI_CHARACTER (tU8)0xC0

tVoid UTF8_vCutOffFaultyCharacter( tString szString, tU32 u32BufferLength );

/**
 * This function determines the next character of given character.
 *
 * @param szCurrentCharacter the current character
 * @pre   szCurrentCharacter is not NULL
 * @return pointer to next character or NULL if end of string is reached
 * @author CM-DI/ESP2-Brandes
 * @date   21.03.2005
 */
tString UTF8_szGetNextCharacter( tString szCurrentCharacter );

/**
 * This function determines the next character of given character.
 *
 * @param szCurrentCharacter the current character
 * @pre   szCurrentCharacter is not NULL
 * @return pointer to next character or NULL if end of string is reached
 * @author CM-DI/ESP2-Brandes
 * @date   21.03.2005
 */
tCString UTF8_coszGetNextCharacter( tCString szCurrentCharacter );

/**
 * This function is the equivalent to the function OSALUTIL_s32SaveNPrintFormat.
 * It writes formatted data into the given buffer and takes care for a null
 * termination (by using the function UTF8_vCutOffFaultyCharacter).
 *
 * @param szDest          buffer the string is written to
 * @param u32BufferLength length of given buffer
 * @param coszFormat      string which contains the format behaviour
 * @param ...             list of parameter
 * @pre   szDest is not NULL
 * @pre   coszFormat is not NULL
 * @return the number of characters printed, or a negative value if an error occurs
 * @author CM-DI/ESP2-Brandes
 * @date   21.03.2005
 */
tS32 UTF8_s32SaveNPrintFormat( tString szDest, tU32 u32BufferLength, tCString coszFormat, ... );

/**
 * This function is the equivalent to the function
 * OSALUTIL_s32SaveVarNPrintFormat. It writes formatted data into the given
 * buffer and takes care for a null termination (by using the function
 * UTF8_vCutOffFaultyCharacter).
 *
 * @param szDest          buffer the string is written to
 * @param u32BufferLength length of given buffer
 * @param coszFormat      string which contains the format behaviour
 * @param ...             list of parameter
 * @pre   szDest is not NULL
 * @pre   coszFormat is not NULL
 * @return returns the number of characters stored in buffer, not counting the terminating null character
 * @author CM-DI/ESP2-Brandes
 * @date   21.03.2005
 */
tS32 UTF8_s32SaveVarNPrintFormat( tString szDest, tU32 u32BufferLength, tCString coszFormat, va_list argptr );

/**
 * This function is the equivalent to the function OSALUTIL_ szSaveStringNCopy.
 * It copies data into the given buffer and takes care for a null termination
 * (by using the function UTF8_vCutOffFaultyCharacter).
 *
 * @param szDest          buffer the string is written to
 * @param coszSource      string be be copied
 * @param u32BufferLength length of given buffer
 * @pre   szDest is not NULL
 * @pre   coszSource is not NULL
 * @return pointer to destination string
 * @author CM-DI/ESP2-Brandes
 * @date   22.03.2005
 */
tString UTF8_szSaveStringCopy( tString szDest, tCString coszSource, tU32 u32BufferLength );

/**
 * This function is the equivalent to the function OSALUTIL_ szSaveStringNCopy.
 * It copies data into the given buffer and takes care for a null termination
 * (by using the function UTF8_vCutOffFaultyCharacter).
 *
 * @param szDest          buffer the string is written to
 * @param coszSource      string be be copied
 * @param u32BufferLength length of given buffer
 * @param u32CntChar      number of characters to be copied
 * @pre   szDest is not NULL
 * @pre   coszSource is not NULL
 * @return pointer to destination string
 * @author CM-DI/ESP2-Brandes
 * @date   21.03.2005
 */
tString UTF8_szSaveStringNCopy( tString szDest, tCString coszSource, tU32 u32BufferLength, tU32 u32CntChar );

/**
 * This function is the equivalent to the function
 * OSALUTIL_ szSaveStringNConcat. It appends one string to another one and
 * takes care for a null termination (by using the function
 * UTF8_vCutOffFaultyCharacter).
 *
 * @param szDest          buffer the string is written to
 * @param coszSource      string be be copied
 * @param u32BufferLength length of given buffer (size of whole buffer including the already used bytes)
 * @pre   szDest is not NULL
 * @pre   coszSource is not NULL
 * @return a pointer to the destination string
 * @author CM-DI/ESP2-Brandes
 * @date   21.03.2005
 */
tString UTF8_szSaveStringConcat( tString szDest, tCString coszSource, tU32 u32BufferLength );

/**
 * This function is the equivalent to the function
 * OSALUTIL_ szSaveStringNConcat. It appends one string to another one and
 * takes care for a null termination (by using the function
 * UTF8_vCutOffFaultyCharacter).
 *
 * @param szDest          buffer the string is written to
 * @param coszSource      string be be copied
 * @param u32BufferLength length of given buffer
 * @param u32CntChar      number of character to be appended
 * @pre   szDest is not NULL
 * @pre   coszSource is not NULL
 * @return a pointer to the destination string
 * @author CM-DI/ESP2-Brandes
 * @date   21.03.2005
 */
tString UTF8_szSaveStringNConcat( tString szDest, tCString coszSource, tU32 u32BufferLength, tU32 u32CntChar );

/**
 * This function is an equivalent to the function OSAL_s32StringNCompare.
 * It compares up to u32CntChar.
 *
 * @param coszStr1   first string to be compared
 * @param coszStr2   second string to be compared
 * @param u32CntChar number of characters to be compared
 * @pre coszStr1 is not NULL
 * @pre coszStr2 is not NULL
 * @return < 0 string1 less than string2 \n
 *           0 string1 identical to string2 \n
 *         > 0 string1 greater than string2
 * @author CM-DI/ESP2-Brandes
 * @date   21.03.2005
 */
tS32 UTF8_s32StringNCharCompare( tCString coszStr1, tCString coszStr2, tU32 u32CntChar );

/**
 * This function searches the first occurrence of a character within a string.
 *
 * @param coszSource     string in which should be searched
 * @param coszSearchChar character to be searched (it needs not be NULL terminated)
 * @pre   coszSource is not NULL
 * @pre   coszSearchChar is not NULL
 * @return a pointer to the first occurrence of coszSearchChar in string,
 *         or NULL if coszSearchChar is not found
 * @author CM-DI/ESP2-Brandes
 * @date   21.03.2005
 */
tCString UTF8_ps8StringSearchChar( tCString coszSource, tCString coszSearchChar );

/**
 * This function scans a string for the last occurrence of a character
 *
 * @param coszSource     string in which should be searched
 * @param coszSearchChar null terminated character to be searched
 * @pre   coszSource is not NULL
 * @pre   coszSearchChar is not NULL
 * @return a pointer to the last occurrence of coszSearchChar in string,
 *         or NULL if coszSearchChar is not found
 * @author CM-DI/ESP2-Brandes
 * @date   21.03.2005
 */
tCString UTF8_ps8StringRSearchChar( tCString coszSource, tCString coszSearchChar );

/**
 * This function determines a value specifying the length of the substring in
 * coszSource that consists entirely of characters in coszCharSet. If string 
 * begins with a character not in coszCharSet, the function returns 0.
 *
 * @param coszSource  string in which should be searched
 * @param coszCharSet characters to be searched
 * @pre   coszSource is not NULL
 * @pre   coszCharSet is not NULL
 * @return an integer value specifying the length of the substring in coszSource
 *         that consists entirely of characters in coszCharSet. If coszSource
 *         begins with a character not in coszCharSet, the function returns 0
 * @author CM-DI/ESP2-Brandes
 * @date   21.03.2005
 */
tU32 UTF8_u32StringSegment( tCString coszSource, tCString coszCharSet );

/**
 * This function determines a value value specifying the length of the initial
 * segment of coszSource that consists entirely of characters not in coszCharSet. 
 * If coszSource begins with a character that is in coszCharSet, the function returns 0.
 *
 * @param coszSource  string in which should be searched
 * @param coszCharSet characters to be searched
 * @pre   coszSource is not NULL
 * @pre   coszCharSet is not NULL
 * @return an integer value specifying the length of the substring in string
 *         that consists entirely of characters not in coszCharSet. If string
 *         begins with a character that is in coszCharSet, the function returns 0
 * @author CM-DI/ESP2-Brandes
 * @date   21.03.2005
 */
tU32 UTF8_u32StringNotSegment( tCString coszSource, tCString coszCharSet );

/**
 * This function scans strings for characters in specified character sets.
 * It searches for the first occurrence of a character given by character
 * set in the given string (equivalent to ANSI strpbrk).
 *
 * @param coszSource  string in which should be searched
 * @param coszCharSet characters to be searched
 * @pre   coszSource is not NULL
 * @pre   coszCharSet is not NULL
 * @return a pointer to the first occurrence of any character from coszCharSet
 *         in string, or a NULL pointer if the two string arguments have no
 *         characters in common
 * @author CM-DI/ESP2-Brandes
 * @date   21.03.2005
 */
tCString UTF8_ps8StringBreak( tCString coszSource, tCString coszCharSet );

/**
 * This function determines the number of character of given string.
 * It starts at the begin of the string and walks up to the end.
 * When counting there are two cases possible:\n
 * 1st: \nCurrent byte is a single coded character (0xxxxxxx) \n -->
 *      increase number of characters and go to the next byte.\n
 * 2nd: \nCurrent byte is the first byte of a multi byte character (11xxxxxx) \n -->
 *      increase number of characters, get count coded number of used bytes and
 *      jump to the first byte of the next character.
 *
 * @param coszStr string of which the characters should be counted
 * @pre   coszStr is not NULL
 * @return the number of characters in string, excluding the terminating NULL
 * @author CM-DI/ESP2-Brandes
 * @date   21.03.2005
 */
tU32 UTF8_u32StringCharCount( tCString coszStr );

/**
 * This function determines the number of character of given string.
 * @param coszStr string of which the characters should be counted
 * @param u32MaxCntChar upperbound of char count
 * @pre   coszStr is not NULL
 * @return the number of characters in string, excluding the terminating NULL
 * @author TMS CM-AI/PJ-CF11-Tiessen
 * @date   26.11.2008
 */
tU32 UTF8_u32StringCharCount( tCString coszStr, tU32 u32MaxCntChar);

/**
 * This function checks if given character is alphanumeric
 * ('A'-'Z', 'a'-'z' or '0'-'9')
 *
 * @param coszStr character to be checked
 * @pre coszStr is not NULL
 * @attention This function only works with single byte characters.
 * @return a non-zero value if coszStr is within the ranges
 *         A  Z, a  z, or 0  9
 * @author CM-DI/ESP2-Brandes
 * @date   21.03.2005
 */
tS32 UTF8_s32IsAlphaNum( tCString coszStr );

/**
 * This function checks if given character is alpha
 * ('A'-'Z' or 'a'-'z')
 *
 * @param coszStr character to be checked
 * @pre coszStr is not NULL
 * @return a non-zero value if coszStr is within the ranges
 *         A  Z or a  z
 * @author CM-DI/ESP2-Brandes
 * @date   21.03.2005
 */
tS32 UTF8_s32IsAlpha( tCString coszStr );

/**
 * This function checks if given character is numeric ('0'-'9').
 *
 * @param coszStr character to be checked
 * @pre coszStr is not NULL
 * @return a non-zero value if coszStr is within the ranges
 *         0  9
 * @author CM-DI/ESP2-Brandes
 * @date   21.03.2005
 */
tS32 UTF8_s32IsDigit( tCString coszStr );

/**
 * This function checks if given character is a particular representation
 * of a space character
 *
 * @param coszStr character to be checked
 * @pre coszStr is not NULL
 * @return a non-zero value if coszStr is a white-space character
 *         (0x09  0x0D or 0x20)
 * @author CM-DI/ESP2-Brandes
 * @date   21.03.2005
 */
tS32 UTF8_s32IsSpace( tCString coszStr );

/**
 * This function checks if given character is a particular representation
 * of a uppercase letter  (A - Z)
 *
 * @param coszStr character to be checked
 * @pre coszStr is not NULL
 * @return a non-zero value if coszStr is a uppercase letter
 * @author CM-DI/ESP2-Brandes
 * @date   21.03.2005
 */
tS32 UTF8_s32IsUpper( tCString coszStr );

/**
 * This function checks if given character is a particular representation
 * of a lowercase letter (a  z)
 *
 * @param coszStr character to be checked
 * @pre coszStr is not NULL
 * @return a non-zero value if coszStr is a lowercase letter
 * @author CM-DI/ESP2-Brandes
 * @date   21.03.2005
 */
tS32 UTF8_s32IsLower( tCString coszStr );

/**
 * This function checks if given character is a particular representation
 * of a hexadecimal digit
 *
 * @param coszStr character to be checked
 * @pre coszStr is not NULL
 * @return a non-zero value if coszStr is a hexadecimal digit
 *         (A  F, a  f, or 0  9).
 * @author CM-DI/ESP2-Brandes
 * @date   21.03.2005
 */
tS32 UTF8_s32IsHexDigit( tCString coszStr );

/**
 * This function checks if given character is a backspace.
 *
 * @param coszStr character to be checked
 * @pre coszStr is not NULL
 * @return a non-zero value if coszStr is a backspace (\b)
 *         
 * @author CM-DI/ESP2-Brandes
 * @date   12.08.2005
 */
tS32 UTF8_s32IsBackspace( tCString coszStr );

/**
 * This function converts character to lowercase one.
 *
 * @param coszStr character to be converted
 * @pre coszStr is not NULL
 * @return 0 if given string uses more than one byte,
 *         otherwise the (converted) value
 * @attention This function only converts one byte values in range from
 *            'A' - 'Z'.
 * @author CM-DI/ESP2-Brandes
 * @date   21.03.2005
 */
tS32 UTF8_s32ToLower( tCString coszStr );

/**
 * This function converts character to uppercase one.
 *
 * @param coszStr   character to be converted
 * @pre coszStr is not NULL
 * @return 0 if given string uses more than one byte,
 *         otherwise the (converted) value
 * @attention This function only converts one byte values in range from
 *            'a' - 'z'.
 * @author CM-DI/ESP2-Brandes
 * @date   21.03.2005
 */
tS32 UTF8_s32ToUpper( tCString coszStr );

/**
 * This function counts the number of equal characters from begining till a mismatch is found.
 *
 * @param coszStr1 first string to be compared
 * @param coszStr2 second string to be compared
 * @pre coszStr1 is not NULL
 * @pre coszStr2 is not NULL
 * @return the number of equal character
 * @author CM-DI/ESP2-Brandes
 * @date   21.03.2005
 */
tU32 UTF8_u32GetCntEqualChar( tCString coszStr1, tCString coszStr2 );

/**
 * This function counts the number of equal characters from begining till one of the string ends.
 *
 * @param coszStr1 first string to be compared
 * @param coszStr2 second string to be compared
 * @pre coszStr1 is not NULL
 * @pre coszStr2 is not NULL
 * @return the number of equal character
 * @author CM-AI-PJ-CF13 - ani1hi
 * @date   04.05.2011
 */
tU32 UTF8_u32GetCntAllEqualChar( tCString coszStr1, tCString coszStr2 );

/**
 * This function checks if the current character is part of a multibyte
 * character (and not the first character). It checks the high bits if
 * the first two ones are set to 10xxxxxx.
 *
 * @param coszChar byte to be checked
 * @pre coszChar is not NULL
 * @return a non zero value if given character is part of a multibyte
 *  characer, but not the first one
 * @author CM-DI/ESP2-Brandes
 * @date   21.03.2005
 */
tS32 UTF8_s32IsPartOfMultiByteChar( tCString coszChar );

/**
 * This function checks if the current character is coded in one single byte.
 * It checks the high bits if the first bit is not set 0xxxxxxx.
 *
 * @param coszChar byte to be checked
 * @pre coszChar is not NULL
 * @return a non zero value if given character is coded in one single byte.
 * @author CM-DI/ESP2-Brandes
 * @date   21.03.2005
 */
OSAL_FORCEINLINE tS32 UTF8_s32IsSingleByteCodedChar( tCString coszChar )
{
   if( *((tCU8*)coszChar) < C_MULTI_CHARACTER_BYTE )
      return 1;

   return 0;
};

/**
 * This function checks if the current character is the first byte of a
 * multibyte character. It checks the high bits if the first two ones are set
 * 11xxxxxx.
 *
 * @param coszChar byte to be checked
 * @pre coszChar is not NULL
 * @return a non zero value if given character the first byte of a multibyte
 *  character
 * @author CM-DI/ESP2-Brandes
 * @date   21.03.2005
 */
OSAL_FORCEINLINE tS32 UTF8_s32IsBeginOfMultiByteChar( tCString coszChar )
{
   if( *((tCU8*)coszChar) >= C_FIRST_BYTE_OF_MULTI_CHARACTER )
      return 1;

   return 0;
};

/**
 * This function determines the number of bytes of a multibyte character.
 *
 * @param coszChar byte to be checked
 * @pre coszChar is not NULL
 * @return 0 if given pointer does not point to the first byte of a character,\n
 *         the number of used bytes for this character otherwise
 * @author CM-DI/ESP2-Brandes
 * @date   21.03.2005
 */
tU32 UTF8_u32GetCharacterByteLength( tCString coszChar );

/**
 * This function determines the number of bytes of a multibyte character.
 *
 * @param coszString string whose characters should be checked
 * @param u32CntChar number of characters whose bytes should be counted
 * @pre coszString is not NULL
 * @return the number of bytes of the given number of characters
 * @author CM-DI/ESP2-Brandes
 * @date   21.03.2005
 */
tU32 UTF8_u32GetNCharacterByteLength( tCString coszString, tU32 u32CntChar );

#endif /* UTF8_STRINGUTIL_HEADER */

