/*
 * Copyright (c) 1985 Corporation for Research and Educational Networking
 * Copyright (c) 1988 University of Illinois Board of Trustees, Steven
 *		Dorner, and Paul Pomes
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *	This product includes software developed by the Corporation for
 *	Research and Educational Networking (CREN), the University of
 *	Illinois at Urbana, and their contributors.
 * 4. Neither the name of CREN, the University nor the names of their
 *    contributors may be used to endorse or promote products derived from
 *    this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE TRUSTEES AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE TRUSTEES OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#ifndef lint
static char  RcsId[] = "@(#)$Id: phoneme.c,v 1.13 1994/03/12 00:24:45 paul Exp $";
#endif

#include "protos.h"

/*
 *  English to Phoneme translation.
 *
 *  Rules are made up of four parts:
 *  
 *	The left context.
 *	The text to match.
 *	The right context.
 *	The phonemes to substitute for the matched text.
 *
 *  Procedure:
 *
 *	Seperate each block of letters (apostrophes included) 
 *	and add a space on each side.  For each unmatched 
 *	letter in the word, look through the rules where the 
 *	text to match starts with the letter in the word.  If 
 *	the text to match is found and the right and left 
 *	context patterns also match, output the phonemes for 
 *	that rule and skip to the next unmatched letter.
 *
 *
 *  Special Context Symbols:
 *
 *	#   One or more vowels
 *	:   Zero or more consonants
 *	^   One consonant.
 *	.   One of B, D, V, G, J, L, M, N, R, W or Z (voiced 
 *	    consonants)
 *	%   One of ER, E, ES, ED, ING, ELY (a suffix)
 *	    (Right context only)
 *	+   One of E, I or Y (a "front" vowel)
 */

#ifndef TRUE
#define FALSE (0)
#define TRUE (!0)
#endif

typedef char *Rule[4];		/* A rule is four character pointers */

extern Rule *rules[];		/* An array of pointers to rules */

static char *xlate_word __P((char *));
static int find_rule __P((char *, int, Rule *, char **));
static int leftmatch __P((char *, char *));
static int rightmatch __P((char *, char *));

#define isvowel(chr)  \
    (chr=='A' || chr=='E' || chr=='I' || chr=='Y' || chr=='O' || chr=='U')

#define isconsonant(chr)    (islower(chr) && !isvowel(chr))

char *
phonemify(word)
	char *word;
{
	char	scratch[80];
	register char *cp;

	*scratch = ' ';
	for (cp = scratch + 1; *word; word++)
	{
		if (isupper(*word))
			*word = tolower(*word);

		if (!isconsonant(*word) || *word != *(cp - 1))
			*cp++ = *word;
	}
	*cp++ = ' ';
	*cp = '\0';
	return (xlate_word(scratch));
}

static char *
xlate_word(word)
	char word[];
{
	int	indx;		/* Current position in word */
	int	type;		/* First letter of match part */
	static char phonetics[1024];
	char	*phoneme;

	phonetics[0] = 0;
	indx = 1;		/* Skip the initial blank */
	do
	{
		if (islower(word[indx]))
			type = word[indx] - 'a' + 1;
		else
			type = 0;

		indx = find_rule(word, indx, rules[type], &phoneme);
		if (phoneme)
			(void) strcat(phonetics, phoneme);
	}
	while (word[indx] != '\0');
	return (phonetics);
}

static int 
find_rule(word, indx, rules, phoneme)
	char word[];
	int indx;
	Rule *rules;
	char **phoneme;
{
	Rule	*rule;
	char	*left, *match, *right;
	int	remainder;

	*phoneme = NULL;

	for (;;)		/* Search for the rule */
	{
		rule = rules++;
		match = (*rule)[1];

		if (match == 0) /* bad symbol! */
		{
			/* fprintf(stderr, */
			/*
			 * "Error: Can't find rule for: '%c' in \"%s\"\n", word[indx],
			 * word);
			 */
			return indx + 1;	/* Skip it! */
		}
		for (remainder = indx; *match != '\0'; match++, remainder++)
		{
			if (*match != word[remainder])
				break;
		}

		if (*match != '\0')	/* found missmatch */
			continue;
		/*
		 * printf("\nWord: \"%s\", Index:%4d, Trying: \"%s/%s/%s\" =
		 * \"%s\"\n",
		 */
		/* word, indx, (*rule)[0], (*rule)[1], (*rule)[2], (*rule)[3]); */
		left = (*rule)[0];
		right = (*rule)[2];

		if (!leftmatch(left, &word[indx - 1]))
			continue;
		/*
		 * printf("leftmatch(\"%s\",\"...%c\") succeded!\n", left,
		 * word[indx-1]);
		 */
		if (!rightmatch(right, &word[remainder]))
			continue;
		/*
		 * printf("rightmatch(\"%s\",\"%s\") succeded!\n", right,
		 * &word[remainder]);
		 */
		*phoneme = (*rule)[3];
		/*
		 * printf("Success: ");
		 */
		/* outstring(output); */
		return remainder;
	}
}


static int 
leftmatch(pattern, context)
	char *pattern, *context;
{
	char	*pat;
	char	*text;
	int	count;

	if (*pattern == '\0')	/* null string matches any context */
	{
		return TRUE;
	}
	/* point to last character in pattern string */
	count = strlen(pattern);
	pat = pattern + (count - 1);

	text = context;

	for (; count > 0; pat--, count--)
	{
		/* First check for simple text or space */
		if (isalpha(*pat) || *pat == '\'' || *pat == ' ')
			if (*pat != *text)
				return FALSE;
			else
			{
				text--;
				continue;
			}
		switch (*pat)
		{
		case '#':	/* One or more vowels */
			if (!isvowel(*text))
				return FALSE;

			text--;

			while (isvowel(*text))
				text--;
			break;

		case ':':	/* Zero or more consonants */
			while (isconsonant(*text))
				text--;
			break;

		case '^':	/* One consonant */
			if (!isconsonant(*text))
				return FALSE;
			text--;
			break;

		case '.':	/* B, D, V, G, J, L, M, N, R, W, Z */
			if (*text != 'B' && *text != 'D' && *text != 'V'
			    && *text != 'G' && *text != 'J' && *text != 'L'
			    && *text != 'M' && *text != 'N' && *text != 'R'
			    && *text != 'W' && *text != 'Z')
				return FALSE;
			text--;
			break;

		case '+':	/* E, I or Y (front vowel) */
			if (*text != 'E' && *text != 'I' && *text != 'Y')
				return FALSE;
			text--;
			break;

		case '%':
		default:
			/* fprintf(stderr, "Bad char in left rule: '%c'\n", *pat); */
			return FALSE;
		}
	}

	return TRUE;
}


static int 
rightmatch(pattern, context)
	char *pattern, *context;
{
	char	*pat;
	char	*text;

	if (*pattern == '\0')	/* null string matches any context */
		return TRUE;

	pat = pattern;
	text = context;

	for (pat = pattern; *pat != '\0'; pat++)
	{
		/* First check for simple text or space */
		if (isalpha(*pat) || *pat == '\'' || *pat == ' ')
			if (*pat != *text)
				return FALSE;
			else
			{
				text++;
				continue;
			}
		switch (*pat)
		{
		case '#':	/* One or more vowels */
			if (!isvowel(*text))
				return FALSE;

			text++;

			while (isvowel(*text))
				text++;
			break;

		case ':':	/* Zero or more consonants */
			while (isconsonant(*text))
				text++;
			break;

		case '^':	/* One consonant */
			if (!isconsonant(*text))
				return FALSE;
			text++;
			break;

		case '.':	/* B, D, V, G, J, L, M, N, R, W, Z */
			if (*text != 'B' && *text != 'D' && *text != 'V'
			    && *text != 'G' && *text != 'J' && *text != 'L'
			    && *text != 'M' && *text != 'N' && *text != 'R'
			    && *text != 'W' && *text != 'Z')
				return FALSE;
			text++;
			break;

		case '+':	/* E, I or Y (front vowel) */
			if (*text != 'E' && *text != 'I' && *text != 'Y')
				return FALSE;
			text++;
			break;

		case '%':	/* ER, E, ES, ED, ING, ELY (a suffix) */
			if (*text == 'E')
			{
				text++;
				if (*text == 'L')
				{
					text++;
					if (*text == 'Y')
					{
						text++;
						break;
					} else
					{
						text--; /* Don't gobble L */
						break;
					}
				} else if (*text == 'R' || *text == 'S'
					   || *text == 'D')
					text++;
				break;
			} else if (*text == 'I')
			{
				text++;
				if (*text == 'N')
				{
					text++;
					if (*text == 'G')
					{
						text++;
						break;
					}
				}
				return FALSE;
			} else
				return FALSE;

		default:
			/* fprintf(stderr, "Bad char in right rule:'%c'\n", *pat); */
			return FALSE;
		}
	}

	return TRUE;
}