/*
 * Copyright (c) 2010 Gracenote.
 *
 * This software may not be used in any way or distributed without
 * permission. All rights reserved.
 *
 * Some code herein may be covered by US and international patents.
 */

/*
 * gn_phonetic_conversion_data.c - 
 *
 * This contains the the X-SAMPA to L&H+ conversion map and well as
 * the language specific L&H+ post-processing conversion map.  Not all
 * languages need L&H+ post-processing, so some languages don’t contain
 * L&H+ conversion post-processing map.  It also provides two functions
 * to retrieve this data.
 */

#include "gn_phonetic_conversion_data.h"

/*
 * Globals and typedefs
 */

static gn_phonetic_conversion_rule_t g_xsampa_to_lnh_phoneme_rules[] = 
{
	{"\"i\"", "\"i \""},
	{"_j", "[j"},
	{"%", "!"},
	{"I_1_\"", "I55%0"},
	{"I_2_\"", "I35%0"},
	{"I_3_\"", "I214%0"},
	{"I_4_\"", "I51%0"},
	{"I_5_\"", "I11%0"},
	{"_\"", "%0"},
	{"\"", "'"},
	{"_0", "+%>"},
	{"'20", "%0"},
	{"'_~", "%\"~r\""},
	{"_1'", "_1%r"},
	{"_2'", "_2%r"},
	{"_3'", "_3%r"},
	{"_4'", "_4%r"},
	{"_5'", "_5%r"},
	{"I'", "I%r"},
	{"U'", "U%r"},
	{"u'", "u%r"},
	{"i'", "i%r"},
	{"_5", "11"},
	{"_F", "'F"},
	{"@'", "$%r"},
	{"O'", "O%r"},
	{"@", "$"},
	{"{", "@"},
	{"_h", "{"},
	{"_3", "214"},
	{"_2", "35"},
	{"_4", "51"},
	{"_1", "55"},
	{"i_X", "i:~"},
	{"M_X", "u-%>"},
	{"1", "i0"},
	{"i0i0", "11"},
	{"2i04", "214"},
	{"5i0", "51"},
	{"2", "e+"},
	{"e+14", "214"},
	{"3", "E0"},
	{"E05", "35"},
	{"6", "a0"},
	{"4", "r6"},
	{"21r6", "214"},
	{"7'", "o-%r"},
	{"7", "o-"},
	{"8", "o0"},
	{"Vy", "^&y"},
	{"9", "E+"},
	{"_-", "%\\"},
	{"_}", "%7"},
	{"e55I", "e55&I"},
	{"e51I", "e51&I"},
	{"e35I", "e35&I"},
	{"e214I", "e214&I"},
	{"e11I", "e11&I"},
	{"a55I", "a55&I"},
	{"a35I", "a35&I"},
	{"a214I", "a214&I"},
	{"a51I", "a51&I"},
	{"a11I", "a11&I"},
	{"o55U", "o55&U"},
	{"o35U", "o35&U"},
	{"o214U", "o214&U"},
	{"o51U", "o51&U"},
	{"o11U", "o11&U"},
	{"a55U", "a55&U"},
	{"a35U", "a35&U"},
	{"a214U", "a214&U"},
	{"a51U", "a51&U"},
	{"a11U", "a11&U"},
	{"_~", "%~"},
	{"}", "u0"},
	{"aI", "a&I"},
	{"ai", "ai"},
	{"aU", "a&U"},
	{"Au", "Au"},
	{"au", "au"},
	{"A'", "a%r"}, // Changed due to mail from Nuance 2011/06/17
	{"E'", "E%r"},
	{"d'", "d+"}, // doubtful rules to be checked
	{"d_z:\\", "d&z~:"},
	{"d_z\\:", "d&z~:"},
	{"d_z\\", "d&z~"},
	{"d_z", "d&z"},
	{"dZ", "d&Z"},
	{"E$", "E$"},
	{"eI", "e&I"},
	{"Ei", "Ei"},
	{"ei", "ei"},
	{"eu", "eu"},
	{"Eu", "Eu"},
	{"eU", "eU"},
	{"h\\", "h6"},
	{"H", "h\\"},
	{"I$", "I$"},
	{"J\\", "j-"},
	{"J", "n~"},
	{"j\\", "J"},
	{"k_s", "k&s"},
	{"L", "l~"},
	{"l\'", "l+"},
	{"M", "u-"},
	{"N\\", "<ng>"},
	{"N", "nK"},
	{"<ng>", "N"},
	{"n'", "n+"}, // doubtful rules to be checked
	{"OI", "O&I"},
	{"oi", "oi"},
	{"oU", "o&U"},
	{"ou", "ou"},
	{"Oy", "Oy"},
	{"V", "^"},
	{"P", "V"},
	{"p:\\", "P:"},
	{"p\\:", "P:"},
	{"p\\", "P"},
	{"p_f", "p&f"},
	{"R\\", "<rr>"},
	{"R", "R\\"},
	{"<rr>", "R"},
	{"r\\'", "R+"},
	{"s'", "s+"}, // doubtful rules to be checked 
	{"t_s:\\", "t&c~:"},
	{"t_s\\:", "t&c~:"},
	{"t_s\\", "t&c~"},
	{"t_s", "t&s"},
	{"s:\\", "S:"},
	{"s\\:", "S:"},
	{"s\\", "S"},
	{"t'", "t+"}, // doubtful rules to be checked
	{"t_C", "t&c~"},
	{"tS", "t&S"},
	{"U$", "U$"},
	{"z'", "z+"},
	{"z\\", "z~"},
	{" ", "_"},
	{"Q", "A+"},
	{"!", "'2"},
	{"[j", "%j"},
	{"U55%r", "u55%r"},
	{"U35%r", "u35%r"},
	{"U214%r", "u214%r"},
	{"U51%r", "u51%r"},
	{"U11%r", "u11%r"},
	{"U%r", "u%r"},
	{"&u%r", "&U%r"},
	{"%r11", "11%r"},
	{"%r55", "55%r"},
	{"%r214", "214%r"},
	{"%r35", "35%r"},
	{"%r51", "51%r"},
	{"a&I$", "a&I.$"},
	{"a&U$", "a&U.$"},
	{"e&I$", "e&I.$"},
	{"O&I$", "O&I.$"},
	{"o&U$", "o&U.$"},
	{"ai:", "a.i:"},
	{"au:", "a.u:"},
	{"Oy:", "O.y:"},
	{"nK%j", "nK"},
	{GN_NULL, GN_NULL}
};

static gn_phonetic_conversion_rule_t g_xsampa_to_lnh_kor_kor_phoneme_rules[] = 
{
	{"t_s\\_h", "c{"},
	{"t_s\\`", "c`"},
	{"d_z\\", "j-"},
	{"t_s\\", "c"},
	{"jMi", "ju-&i"},
	{"k_h", "k{"},
	{"p_h", "p{"},
	{"t_h", "t{"},
	{"h\\", "h6"},
	{"j2", "je+"},
	{"ja", "ja"},
	{"jE", "jE"},
	{"je", "je"},
	{"jM", "ju-"},
	{"jO", "jO"},
	{"jo", "jo"},
	{"ju", "ju"},
	{"k`", "k`"},
	{"Mi", "u-&i"},
	{"p`", "p`"},
	{"s`", "s`"},
	{"t`", "t`"},
	{"w2", "we+"},
	{"wa", "wa"},
	{"wE", "wE"},
	{"we", "we"},
	{"wi", "wi"},
	{"wO", "wO"},
	{"2", "e+"},
	{"4", "r"},
	{" ", "_"},
	{"#", "#"},
	{".", "."},
	{"a", "a"},
	{"b", "b"},
	{"d", "d"},
	{"E", "E"},
	{"e", "e"},
	{"g", "g"},
	{"h", "h"},
	{"i", "i"},
	{"j", "j"},
	{"k", "k"},
	{"l", "l"},
	{"M", "u-"},
	{"m", "m"},
	{"N", "nK"},
	{"n", "n"},
	{"O", "O"},
	{"o", "o"},
	{"p", "p"},
	{"s", "s"},
	{"t", "t"},
	{"u", "u"},
	{"w", "w"},
	{GN_NULL, GN_NULL}
};


gn_phonetic_conversion_map_t g_xsampa_to_lnh_phoneme_maps[SPEECH_LANG_COUNT] =
{
/* USA_eng */	{PHONETIC_MAP_ALGORITHM_1, g_xsampa_to_lnh_phoneme_rules},
/* MEX_spa */	{PHONETIC_MAP_ALGORITHM_1, g_xsampa_to_lnh_phoneme_rules},
/* CAN_fre */	{PHONETIC_MAP_ALGORITHM_1, g_xsampa_to_lnh_phoneme_rules},
/* GBR_eng */	{PHONETIC_MAP_ALGORITHM_1, g_xsampa_to_lnh_phoneme_rules},
/* FRA_fre */	{PHONETIC_MAP_ALGORITHM_1, g_xsampa_to_lnh_phoneme_rules},
/* ESP_spa */	{PHONETIC_MAP_ALGORITHM_1, g_xsampa_to_lnh_phoneme_rules},
/* DEU_ger */	{PHONETIC_MAP_ALGORITHM_1, g_xsampa_to_lnh_phoneme_rules},
/* ITA_ita */	{PHONETIC_MAP_ALGORITHM_1, g_xsampa_to_lnh_phoneme_rules},
/* JPN_jpn */	{PHONETIC_MAP_ALGORITHM_1, g_xsampa_to_lnh_phoneme_rules},
/* CHN_qad */	{PHONETIC_MAP_ALGORITHM_1, g_xsampa_to_lnh_phoneme_rules},
/* RUS_rus */	{PHONETIC_MAP_ALGORITHM_1, g_xsampa_to_lnh_phoneme_rules},
/* NLD_dut */	{PHONETIC_MAP_ALGORITHM_1, g_xsampa_to_lnh_phoneme_rules},
/* KOR_kor */	{PHONETIC_MAP_ALGORITHM_2, g_xsampa_to_lnh_kor_kor_phoneme_rules},
/* BRA_por */	{PHONETIC_MAP_ALGORITHM_1, g_xsampa_to_lnh_phoneme_rules},
/* PRT_por */	{PHONETIC_MAP_ALGORITHM_1, g_xsampa_to_lnh_phoneme_rules},
/* TUR_tur */	{PHONETIC_MAP_ALGORITHM_1, g_xsampa_to_lnh_phoneme_rules},
/* AUS_eng */	{PHONETIC_MAP_ALGORITHM_1, g_xsampa_to_lnh_phoneme_rules}
};


static gn_phonetic_conversion_rule_t g_lnh_post_process_aus_eng_phoneme_rules[] = 
{
	{"E$", "E&$"},
	{"I$", "I&$"},
	{"U$", "U&$"},
	{"?", ""},
	{GN_NULL, GN_NULL}
};

static gn_phonetic_conversion_rule_t g_lnh_post_process_can_fre_phoneme_rules[] = 
{
	{"a&I", "aI"},
	{"a&U", "aU"},
	{"d&Z", "dZ"},
	{"e&I", "eI"},
	{"O&I", "OI"},
	{"o&U", "oU"},
	{"t&S", "tS"},
	{GN_NULL, GN_NULL}
};

static gn_phonetic_conversion_rule_t g_lnh_post_process_chn_qad_phoneme_rules[] = 
{
	{"C", "c~"},
	{"i55%\"~r\"", "i55z+"},
	{"i35%\"~r\"", "i35z+"},
	{"i214%\"~r\"", "i214z+"},
	{"i51%\"~r\"", "i51z+"},
	{"i11%\"~r\"", "i11z+"},
	{"A55%r", "a55%r"},
	{"A35%r", "a35%r"},
	{"A214%r", "a214%r"},
	{"A51%r", "a51%r"},
	{"A11%r", "a11%r"},
	{"i%\"~r\"", "iz+"},
	{"i55%r", "i55z+"},
	{"i35%r", "i35z+"},
	{"i214%r", "i214z+"},
	{"i51%r", "i51z+"},
	{"i11%r", "i11z+"},
	{"A%r", "a%r"},
	{"i%r", "iz+"},
	{GN_NULL, GN_NULL}
};

static gn_phonetic_conversion_rule_t g_lnh_post_process_deu_ger_phoneme_rules[] = 
{
	{"a&I", "aI"},
	{"a&U", "aU"},
	{"O&I", "OI"},
	{"ai", "a&i"},
	{"au", "a&u"},
	{"Oy", "O&y"},
	{GN_NULL, GN_NULL}
};

static gn_phonetic_conversion_rule_t g_lnh_post_process_fra_fre_phoneme_rules[] = 
{
	{"d&Z", "dZ"},
	{"t&S", "tS"},
	{GN_NULL, GN_NULL}
};

static gn_phonetic_conversion_rule_t g_lnh_post_process_gbr_eng_phoneme_rules[] = 
{
	{"E$", "E&$"},
	{"I$", "I&$"},
	{"U$", "U&$"},
	{GN_NULL, GN_NULL}
};

static gn_phonetic_conversion_rule_t g_lnh_post_process_jap_jap_phoneme_rules[] = 
{
	{"i:~", "i%>"},
	{"m:", "m"},
	{"n:", "n"},
	{GN_NULL, GN_NULL}
};

static gn_phonetic_conversion_rule_t g_lnh_post_process_ndl_dut_phoneme_rules[] = 
{
	{"a&I", "aI"},
	{"d&Z", "dZ"},
	{"e&I", "eI"},
	{"O&I", "OI"},
	{"t&S", "tS"},
	{"Au", "A&u"},
	{"Ei", "E&i"},
	{GN_NULL, GN_NULL}
};

static gn_phonetic_conversion_rule_t g_lnh_post_process_prt_por_phoneme_rules[] = 
{
	{"d&Z", "dZ"},
	{"t&S", "tS"},
	{GN_NULL, GN_NULL}
};

static gn_phonetic_conversion_rule_t g_lnh_post_process_rus_rus_phoneme_rules[] = 
{
	{"a&I", "aI"},
	{"e&I", "eI"},
	{GN_NULL, GN_NULL}
};

gn_phonetic_conversion_map_t g_lnh_post_process_maps[SPEECH_LANG_COUNT] =
{
/* USA_eng */	{PHONETIC_MAP_ALGORITHM_NONE, GN_NULL},
/* MEX_spa */	{PHONETIC_MAP_ALGORITHM_NONE, GN_NULL},
/* CAN_fre */	{PHONETIC_MAP_ALGORITHM_1, g_lnh_post_process_can_fre_phoneme_rules},
/* GBR_eng */	{PHONETIC_MAP_ALGORITHM_1, g_lnh_post_process_gbr_eng_phoneme_rules},
/* FRA_fre */	{PHONETIC_MAP_ALGORITHM_1, g_lnh_post_process_fra_fre_phoneme_rules},
/* ESP_spa */	{PHONETIC_MAP_ALGORITHM_NONE, GN_NULL},
/* DEU_ger */	{PHONETIC_MAP_ALGORITHM_1, g_lnh_post_process_deu_ger_phoneme_rules},
/* ITA_ita */	{PHONETIC_MAP_ALGORITHM_NONE, GN_NULL},
/* JPN_jpn */	{PHONETIC_MAP_ALGORITHM_1, g_lnh_post_process_jap_jap_phoneme_rules},
/* CHN_qad */	{PHONETIC_MAP_ALGORITHM_1, g_lnh_post_process_chn_qad_phoneme_rules},
/* RUS_rus */	{PHONETIC_MAP_ALGORITHM_1, g_lnh_post_process_rus_rus_phoneme_rules},
/* NLD_dut */	{PHONETIC_MAP_ALGORITHM_1, g_lnh_post_process_ndl_dut_phoneme_rules},
/* KOR_kor */	{PHONETIC_MAP_ALGORITHM_NONE, GN_NULL},
/* BRA_por */	{PHONETIC_MAP_ALGORITHM_NONE, GN_NULL},
/* PRT_por */	{PHONETIC_MAP_ALGORITHM_1, g_lnh_post_process_prt_por_phoneme_rules},
/* TUR_tur */	{PHONETIC_MAP_ALGORITHM_NONE, GN_NULL},
/* AUS_eng */	{PHONETIC_MAP_ALGORITHM_1, g_lnh_post_process_aus_eng_phoneme_rules}
};


/*
 * Public API Implementation
 */

/*
 * gn_phonetic_conversion_post_process_map_get
 *
 * Get the language specific xsampa to lnh map.  Korean uses a conversion
 * map that is created for Algorithm2 while all other languages uses an
 * agnostic conversion map created for Algorithm1.
 */
gnex_error_t
gn_phonetic_conversion_lnh_map_get(
	gn_phonetic_conversion_map_t** conversion_map,
	gn_uint32_t lang_idx
	)
{
	if (	(conversion_map == GN_NULL)
		||	(*conversion_map != GN_NULL)
		||	(lang_idx >= SPEECH_LANG_COUNT)
		)
	{
		return GNEX_PHOCVRT_InvalidArg;
	}
	
	*conversion_map = &g_xsampa_to_lnh_phoneme_maps[lang_idx];	
	return GNEX_PHOCVRT_NoError;
}

/*
 * gn_phonetic_conversion_post_process_map_get
 *
 * Get the post process xsampa to lnh map by language string.
 */
gnex_error_t
gn_phonetic_conversion_lnh_post_process_map_get(
	gn_phonetic_conversion_map_t** conversion_map,
	gn_uint32_t lang_idx
	)
{
	gnex_error_t error = GNEX_PHOCVRT_NoError;

	if (	(conversion_map == GN_NULL)
		||	(*conversion_map != GN_NULL)
		||	(lang_idx >= SPEECH_LANG_COUNT)
		)
	{
		return GNEX_PHOCVRT_InvalidArg;
	}
	
	*conversion_map = &g_lnh_post_process_maps[lang_idx];
	return error;
}
