File: Internal\AlphabetConverter.cs
Web Access
Project: src\src\runtime\src\libraries\System.Speech\src\System.Speech.csproj (System.Speech)
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Collections;
using System.Diagnostics;
using System.IO;
using System.Reflection;
using System.Text;
using Microsoft.Win32;

namespace System.Speech.Internal
{
    internal enum AlphabetType
    {
        Sapi,
        Ipa,
        Ups
    }

    /// <summary>
    /// This class allows conversion between SAPI and IPA phonemes.
    /// Objects of this class are not thread safe for modifying state.
    /// </summary>
    internal class AlphabetConverter
    {
        #region Constructors

        internal AlphabetConverter(int langId)
        {
            _currentLangId = -1;
            SetLanguageId(langId);
        }

        #endregion

        #region internal Methods

        /// <summary>
        /// Convert from SAPI phonemes to IPA phonemes.
        /// </summary>
        /// <returns>
        /// Return an array of unicode characters each of which represents an IPA phoneme if the SAPI phonemes are valid.
        /// Otherwise, return null.
        /// </returns>
        internal char[]? SapiToIpa(char[] phonemes)
        {
            return Convert(phonemes, true);
        }

        /// <summary>
        /// Convert from IPA phonemes to SAPI phonemes.
        /// </summary>
        /// Return an array of unicode characters each of which represents a SAPI phoneme if the IPA phonemes are valid.
        /// Otherwise, return null.
        internal char[]? IpaToSapi(char[] phonemes)
        {
            return Convert(phonemes, false);
        }

        /// <summary>
        /// Determines whether a given string of SAPI ids can be potentially converted using a single
        /// conversion unit, that is, a prefix of some convertible string.
        /// </summary>
        /// <param name="phonemes">The string of SAPI or UPS phoneme ids</param>
        /// <param name="isSapi">To indicate whether parameter phonemes is in SAPI or UPS phonemes</param>
        internal bool IsPrefix(string phonemes, bool isSapi)
        {
            if (_phoneMap == null)
                return false;

            return _phoneMap.IsPrefix(phonemes, isSapi);
        }

        internal bool IsConvertibleUnit(string phonemes, bool isSapi)
        {
            if (_phoneMap == null)
                return false;

            return _phoneMap.ConvertPhoneme(phonemes, isSapi) != null;
        }

        internal int SetLanguageId(int langId)
        {
            if (langId < 0)
            {
                throw new ArgumentException(SR.Get(SRID.MustBeGreaterThanZero), nameof(langId));
            }
            if (langId == _currentLangId)
            {
                return _currentLangId;
            }

            int i;
            int oldLangId = _currentLangId;
            for (i = 0; i < s_langIds.Length; i++)
            {
                if (s_langIds[i] == langId)
                {
                    break;
                }
            }
            if (i == s_langIds.Length)
            {
                //Debug.Fail($"No phoneme map for LCID {langId}, maps exist for {string.Join(',', s_langIds)}\n");
                _currentLangId = langId;
                _phoneMap = null;
            }
            else
            {
                lock (s_staticLock)
                {
                    if (s_phoneMaps[i] == null)
                    {
                        s_phoneMaps[i] = CreateMap(s_resourceNames[i]);
                    }
                    _phoneMap = s_phoneMaps[i];
                    _currentLangId = langId;
                }
            }
            return oldLangId;
        }
        #endregion

        #region Private Methods

        private char[]? Convert(char[] phonemes, bool isSapi)
        {
            // If the phoneset of the selected language is UPS anyway, that is phone mapping is unnecessary,
            // we return the same phoneme string. But we still need to make a copy.
            if (_phoneMap == null || phonemes.Length == 0)
            {
                return (char[])phonemes.Clone();
            }

            //
            // We break the phoneme string into substrings of phonemes, each of which is directly convertible from
            // the mapping table. If there is ambiguity, we always choose the largest substring as we go from left
            // to right.
            //
            // In order to do this, we check whether a given substring is a potential prefix of a convertible substring.
            //

            StringBuilder result = new();
            int startIndex; // Starting index of a substring being considered
            int endIndex;   // The ending index of the last convertible substring
            string token;           // Holds a substring of phonemes that are directly convertible from the mapping table.
            string? lastConvert;     // Holds last convertible substring, starting from startIndex.

            string? tempConvert;
            string source = new(phonemes);
            int i;

            lastConvert = null;
            startIndex = i = 0;
            endIndex = -1;

            while (i < source.Length)
            {
                token = source.Substring(startIndex, i - startIndex + 1);
                if (_phoneMap.IsPrefix(token, isSapi))
                {
                    tempConvert = _phoneMap.ConvertPhoneme(token, isSapi);
                    // Note we may have an empty string for conversion result here
                    if (tempConvert != null)
                    {
                        lastConvert = tempConvert;
                        endIndex = i;
                    }
                }
                else
                {
                    // If we have not had a convertible substring, the input is not convertible.
                    if (lastConvert == null)
                    {
                        break;
                    }
                    else
                    {
                        // Use the converted substring, and start over from the last convertible position.
                        result.Append(lastConvert);
                        i = endIndex;
                        startIndex = endIndex + 1;
                        lastConvert = null;
                    }
                }
                i++;
            }

            if (lastConvert != null && endIndex == phonemes.Length - 1)
            {
                result.Append(lastConvert);
            }
            else
            {
                return null;
            }

            return result.ToString().ToCharArray();
        }

        private PhoneMapData CreateMap(string resourceName)
        {
            Assembly assembly = Assembly.GetAssembly(GetType())!;
            Stream? stream = assembly.GetManifestResourceStream(resourceName);
            if (stream == null)
            {
                throw new FileLoadException(SR.Get(SRID.CannotLoadResourceFromManifest, resourceName, assembly.FullName));
            }
            return new PhoneMapData(new BufferedStream(stream));
        }

        #endregion

        #region Private Fields

        private int _currentLangId;
        private PhoneMapData? _phoneMap;

        private static readonly int[] s_langIds = new int[] { 0x804, 0x404, 0x407, 0x409, 0x40A, 0x40C, 0x411 };
        private static readonly string[] s_resourceNames =
                    new string[] { "upstable_chs.upsmap", "upstable_cht.upsmap", "upstable_deu.upsmap", "upstable_enu.upsmap",
                                   "upstable_esp.upsmap", "upstable_fra.upsmap", "upstable_jpn.upsmap",
};
        private static readonly PhoneMapData[] s_phoneMaps = new PhoneMapData[7];
        private static readonly object s_staticLock = new();

        #endregion

        #region Private Type

        internal class PhoneMapData
        {
            private sealed class ConversionUnit
            {
                public string sapi;
                public string ups;
                public bool isDefault;
                public ConversionUnit(string sapi, string ups, bool isDefault)
                {
                    this.sapi = sapi;
                    this.ups = ups;
                    this.isDefault = isDefault;
                }
            }

            internal PhoneMapData(Stream input)
            {
                using (BinaryReader reader = new(input, System.Text.Encoding.Unicode))
                {
                    int size = reader.ReadInt32();
                    _convertTable = new ConversionUnit[size];
                    int i;
                    for (i = 0; i < size; i++)
                    {
                        _convertTable[i] = new ConversionUnit
                        (
                            sapi: ReadPhoneString(reader),
                            ups: ReadPhoneString(reader),
                            isDefault: reader.ReadInt32() != 0 ? true : false
                        );
                    }

                    _prefixSapiTable = InitializePrefix(true);
                    _prefixUpsTable = InitializePrefix(false);
                }
            }

            internal bool IsPrefix(string prefix, bool isSapi)
            {
                if (isSapi)
                {
                    return _prefixSapiTable.ContainsKey(prefix);
                }
                else
                {
                    return _prefixUpsTable.ContainsKey(prefix);
                }
            }

            internal string? ConvertPhoneme(string phoneme, bool isSapi)
            {
                ConversionUnit? unit;
                if (isSapi)
                {
                    unit = (ConversionUnit?)_prefixSapiTable[phoneme];
                }
                else
                {
                    unit = (ConversionUnit?)_prefixUpsTable[phoneme];
                }
                if (unit == null)
                {
                    return null;
                }
                return isSapi ? unit.ups : unit.sapi;
            }

            /// <summary>
            /// Create a hash table of all possible prefix substrings for each ConversionUnit
            /// </summary>
            /// <param name="isSapi">Creating a SAPI or UPS prefix table</param>
            private Hashtable InitializePrefix(bool isSapi)
            {
                int i, j;
                Hashtable prefixTable = Hashtable.Synchronized(new Hashtable());
                string from, key;
                for (i = 0; i < _convertTable.Length; i++)
                {
                    if (isSapi)
                    {
                        from = _convertTable[i].sapi;
                    }
                    else
                    {
                        from = _convertTable[i].ups;
                    }

                    for (j = 0; j + 1 < from.Length; j++)
                    {
                        key = from.Substring(0, j + 1);
                        if (!prefixTable.ContainsKey(key))
                        {
                            prefixTable[key] = null;
                        }
                    }

                    if (_convertTable[i].isDefault || prefixTable[from] == null)
                    {
                        prefixTable[from] = _convertTable[i];
                    }
                }
                return prefixTable;
            }

            private static string ReadPhoneString(BinaryReader reader)
            {
                int phoneLength;
                char[] phoneString;
                phoneLength = reader.ReadInt16() / 2;
                phoneString = reader.ReadChars(phoneLength);
                return new string(phoneString, 0, phoneLength - 1);
            }

            private Hashtable _prefixSapiTable, _prefixUpsTable;
            private ConversionUnit[] _convertTable;
        }

        #endregion
    }
}