/*
 * Copyright (c) 2010 Gracenote.
 *
 * This software may not be used in any way or distributed without
 * permission. All rights reserved.
 *
 * Some code herein may be covered by US and international patents.
 */

/*
 * gn_phonetic_conversion.c - 
 *
 * Is the backbone to the conversion process, both the X-SAMPA to L&H+
 * as well as the L&H+ post-processing conversion maps use this same 
 * algorithm.
 */

#include "gn_phonetic_conversion.h"
#include "gn_phonetic_conversion_utility.h"

/* EPAL Headers */
#include "gnepal_string.h"

/*
 * Static Prototypes
 */
static gnex_error_t
_phonetic_conversion_algorithm(
	const gn_phonetic_conversion_map_t* conversion_map,
	const gn_uchar_t* src,
	gn_dynamic_string_t* dynamic_string
	);
	
static gnex_error_t
_phonetic_conversion_algorithm_1(
	const gn_phonetic_conversion_map_t* conversion_map,
	const gn_uchar_t* src,
	gn_dynamic_string_t* dynamic_string
	);
	
static gnex_error_t
_phonetic_conversion_algorithm_2(
	const gn_phonetic_conversion_map_t* conversion_map,
	const gn_uchar_t* src,
	gn_dynamic_string_t* dynamic_string
	);

/*
 * Public API Implementation
 */

/* 
 * gn_phonetic_conversion_map_convert
 *
 * The conversion checks the algorithm identifier of the conversion
 * map to specify using Algorithm1 or Algorithm2.
 */
gnex_error_t
gn_phonetic_conversion_map_convert(
	const gn_phonetic_conversion_map_t* conversion_map,
	const gn_uchar_t* src,
	gn_uchar_t** dst
	)
{
	gnex_error_t error = GNEX_PHOCVRT_NoError;
	gn_dynamic_string_t* dynamic_string = GN_NULL;

	if(		(conversion_map == GN_NULL)
		||	(src == GN_NULL)
		||	(dst == GN_NULL)
		||	(*dst != GN_NULL)
		)
	{
		return GNEX_PHOCVRT_InvalidArg;
	}
	
	error = gn_dynamic_string_create (&dynamic_string, gnepal_strlen(src));
	if (error == GNEX_PHOCVRT_NoError)
	{
		error = _phonetic_conversion_algorithm (conversion_map, src, dynamic_string);
		if (error == GNEX_PHOCVRT_NoError)
		{
			*dst = gnepal_strdup (dynamic_string->buffer);
			if (*dst == GN_NULL)
			{
				error = GNEX_PHOCVRT_NoMemory;
			}
		}
	}
	
	gn_dynamic_string_free(&dynamic_string);
	return error;
}

/*
 * Static API Implementation
 */

/*
 * Static API Implementation
 */

/* 
 * _phonetic_conversion_algorithm
 */
static gnex_error_t
_phonetic_conversion_algorithm(
	const gn_phonetic_conversion_map_t* conversion_map,
	const gn_uchar_t* src,
	gn_dynamic_string_t* dynamic_string
	)
{
	gnex_error_t error = GNEX_PHOCVRT_NoError;

	if(		(conversion_map == GN_NULL)
		||	(src == GN_NULL)
		||	(dynamic_string == GN_NULL)
		)
	{
		return GNEX_PHOCVRT_InvalidArg;
	}
		
	/* Use algorithm ID to specify conversion */
	switch (conversion_map->algorithm_id)
	{
		case PHONETIC_MAP_ALGORITHM_1:
			error = _phonetic_conversion_algorithm_1(conversion_map, src, dynamic_string);
			break;

		case PHONETIC_MAP_ALGORITHM_2:
			error = _phonetic_conversion_algorithm_2(conversion_map, src, dynamic_string);
			break;
			
		default:
			error = GNEX_PHOCVRT_InvalidAlgID;
			break;
	}
	
	return error;
}

/* 
 * _phonetic_conversion_algorithm_1
 * 
 * It iterates over each rule in a conversion map and applies that
 * rule to whole input data.  If a source phoneme is found in the 
 * rule the target phoneme is copied in its place and keeps searching
 * for that rule until the end of the string.  If the source phoneme
 * doesn’t match the current character, that input character is copied
 * over to the output.  Once a rule has been applied to the whole
 * string, the output data now becomes the input data and the next rule
 * is used, until there are no rules remaining.
 */
static gnex_error_t
_phonetic_conversion_algorithm_1(
	const gn_phonetic_conversion_map_t* conversion_map,
	const gn_uchar_t* src,
	gn_dynamic_string_t* dynamic_string
	)
{
	const gn_phonetic_conversion_rule_t* rule = GN_NULL;
	gnex_error_t error = GNEX_PHOCVRT_NoError;
	gn_dynamic_string_t* string[2] =  {0, 0};
	gn_dynamic_string_t* dst = GN_NULL;
	unsigned int input_idx = 0;
	unsigned int output_idx = 1;

	if (	(conversion_map == GN_NULL)
		||	(src == GN_NULL)
		||	(dynamic_string == GN_NULL)
		)
	{
		return GNEX_PHOCVRT_InvalidArg;
	}

	error = gn_dynamic_string_create(&string[0], gnepal_strlen(src));
	if (error != GNEX_PHOCVRT_NoError)
	{
		return error;
	}
	
	error = gn_dynamic_string_create(&string[1], gnepal_strlen(src));
	if (error != GNEX_PHOCVRT_NoError)
	{
		gn_dynamic_string_free(&string[0]);
		return error;
	}
	
	dst = string[output_idx];
	error = gn_dynamic_string_append_string(string[input_idx], src);
	if (error == GNEX_PHOCVRT_NoError)
	{
		rule = conversion_map->rules;
		while ((rule != GN_NULL) && (rule->src_token != GN_NULL))
		{
			src = string[input_idx]->buffer;
			dst = string[output_idx];

			error = gn_dynamic_string_clear(dst);
			while ((*src != 0) && (error == GNEX_PHOCVRT_NoError))
			{
				if (gnepal_strncmp(src, rule->src_token, gnepal_strlen(rule->src_token)) == 0)
				{
					error = gn_dynamic_string_append_string(dst, rule->trg_token);
					src += gnepal_strlen(rule->src_token);
				}
				else
				{
					error = gn_dynamic_string_append_char(dst, *src);
					src++;
				}
			}
			
			if (error != GNEX_PHOCVRT_NoError)
			{
				break;
			}

			output_idx = input_idx;
			input_idx ^= 1;
			rule++;
		}

		if (error == GNEX_PHOCVRT_NoError)
		{
			gn_dynamic_string_append_string(dynamic_string, dst->buffer);
		}
	}
	
	gn_dynamic_string_free(&string[0]);
	gn_dynamic_string_free(&string[1]);

	return error;
}

/* 
 * _phonetic_conversion_algorithm_2
 *
 * For each Conversion-Rule examine the transcription, once a match is found
 * move the Target-Phoneme to the destination, and move the transcription 
 * pointer to the next Source-Phoneme.  If a Conversion-Rule isn't found for
 * a Source-Phoneme, return an error.
 */
static gnex_error_t
_phonetic_conversion_algorithm_2(
	const gn_phonetic_conversion_map_t* conversion_map,
	const gn_uchar_t* src,
	gn_dynamic_string_t* dynamic_string
	)
{
	const gn_phonetic_conversion_rule_t* rule = GN_NULL;
	gnex_error_t error = GNEX_PHOCVRT_NoError;

	if (	(conversion_map == GN_NULL)
		||	(src == GN_NULL)
		||	(dynamic_string == GN_NULL)
		)
	{
		return GNEX_PHOCVRT_InvalidArg;
	}
	
	error = gn_dynamic_string_clear(dynamic_string);
	while ((*src != 0) && (error == GNEX_PHOCVRT_NoError))
	{
		error = GNEX_PHOCVRT_InvalidConversion;
		rule = conversion_map->rules;
		while ((rule != GN_NULL) && (rule->src_token != GN_NULL))
		{
			if (gnepal_strncmp(src, rule->src_token, gnepal_strlen(rule->src_token)) == 0)
			{
				src += gnepal_strlen(rule->src_token);
				error = gn_dynamic_string_append_string(dynamic_string, rule->trg_token);
				break;
			}

			rule++;
		}
	}

	return error;
}
