File: Synthesis\TTSEngine\TTSEngineTypes.cs
Web Access
Project: src\src\runtime\src\libraries\System.Speech\src\System.Speech.csproj (System.Speech)
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Collections.Generic;
using System.Collections.ObjectModel;
using System.ComponentModel;
using System.Diagnostics;
using System.Diagnostics.CodeAnalysis;
using System.IO;
using System.Runtime.InteropServices;
using System.Speech.Internal;

namespace System.Speech.Synthesis.TtsEngine
{
    #region Public Interface
    public abstract class TtsEngineSsml
    {
        /// <summary>
        /// Constructor for the TTS engine
        /// </summary>
        /// <param name="registryKey">Voice token registry entry
        /// from where this engine was created from</param>
        protected TtsEngineSsml(string registryKey) { }

        /// <summary>
        /// Queries the engine about the output format it supports.
        /// </summary>
        /// <param name="speakOutputFormat">Wave or Text</param>
        /// <param name="targetWaveFormat">Wave format header</param>
        /// <returns>Returns the closest format that it supports</returns>
        public abstract IntPtr GetOutputFormat(SpeakOutputFormat speakOutputFormat, IntPtr targetWaveFormat);

        /// <summary>
        /// Add a lexicon for this engine
        /// </summary>
        /// <param name="uri">uri</param>
        /// <param name="mediaType">media type</param>
        /// <param name="site">Engine site</param>
        public abstract void AddLexicon(Uri uri, string? mediaType, ITtsEngineSite site);

        /// <summary>
        /// Removes a lexicon for this engine
        /// </summary>
        /// <param name="uri">uri</param>
        /// <param name="site">Engine site</param>
        public abstract void RemoveLexicon(Uri uri, ITtsEngineSite site);

        /// <summary>
        /// Renders the specified text fragments array in the
        /// specified output format.
        /// </summary>
        /// <param name="fragment">Text fragment with SSML
        /// attributes information</param>
        /// <param name="waveHeader">Wave format header</param>
        /// <param name="site">Engine site</param>
        public abstract void Speak(TextFragment[] fragment, IntPtr waveHeader, ITtsEngineSite site);
    }

    [StructLayout(LayoutKind.Sequential)]
    [ImmutableObject(true)]
    public struct SpeechEventInfo : IEquatable<SpeechEventInfo>
    {
        public short EventId { get { return _eventId; } internal set { _eventId = value; } }
        public short ParameterType { get { return _parameterType; } internal set { _parameterType = value; } }

        /// <summary>
        /// Always just a numeric type - contains no unmanaged resources so does not need special clean-up.
        /// </summary>
        public int Param1 { get { return _param1; } internal set { _param1 = value; } }

        /// <summary>
        /// Can be a numeric type, or pointer to string.
        /// </summary>
        public IntPtr Param2 { get { return _param2; } internal set { _param2 = value; } }

        public SpeechEventInfo(short eventId,
                               short parameterType,
                               int param1,
                               IntPtr param2)
        {
            _eventId = eventId;
            _parameterType = parameterType;
            _param1 = param1;
            _param2 = param2;
        }
        public static bool operator ==(SpeechEventInfo event1, SpeechEventInfo event2)
        {
            return event1.EventId == event2.EventId && event1.ParameterType == event2.ParameterType && event1.Param1 == event2.Param1 && event1.Param2 == event2.Param2;
        }
        public static bool operator !=(SpeechEventInfo event1, SpeechEventInfo event2)
        {
            return !(event1 == event2);
        }
        public bool Equals(SpeechEventInfo other)
        {
            return this == other;
        }
        public override bool Equals([NotNullWhen(true)] object? obj)
        {
            if (obj is not SpeechEventInfo sei)
            {
                return false;
            }

            return Equals(sei);
        }
        public override int GetHashCode()
        {
            return base.GetHashCode();
        }

        private short _eventId;
        private short _parameterType;
        private int _param1;   // Always just a numeric type - contains no unmanaged resources so does not need special clean-up.
        private IntPtr _param2;   // Can be a numeric type, or pointer to string or object. Use SafeSapiLParamHandle to cleanup.
    }
    public interface ITtsEngineSite
    {
        void AddEvents(SpeechEventInfo[] events, int count);
        int Write(IntPtr data, int count);
        SkipInfo GetSkipInfo();
        void CompleteSkip(int skipped);
        Stream? LoadResource(Uri uri, string mediaType);
        int EventInterest { get; }
        int Actions { get; }
        int Rate { get; }
        int Volume { get; }
    }
    public class SkipInfo
    {
        internal SkipInfo(int type, int count)
        {
            _type = type;
            _count = count;
        }
        public int Type
        {
            get
            {
                return _type;
            }
            set
            {
                _type = value;
            }
        }
        public int Count
        {
            get
            {
                return _count;
            }
            set
            {
                _count = value;
            }
        }
        public SkipInfo()
        {
        }

        private int _type;
        private int _count;
    }

    #endregion

    #region Public Types
    [StructLayout(LayoutKind.Sequential)]
    [DebuggerDisplay("{State.Action} = {TextToSpeak!=null?TextToSpeak:\"\"}")]
    public class TextFragment
    {
        public TextFragment()
        {
        }
        public FragmentState State { get { return _state; } set { _state = value; } }
        public string TextToSpeak { get { return _textToSpeak; } set { Helpers.ThrowIfEmptyOrNull(value, nameof(value)); _textToSpeak = value; } }
        public int TextOffset { get { return _textOffset; } set { _textOffset = value; } }
        public int TextLength { get { return _textLength; } set { _textLength = value; } }

        internal TextFragment(FragmentState fragState)
            : this(fragState, null, null, 0, 0)
        {
        }

        internal TextFragment(FragmentState fragState, string textToSpeak)
            : this(fragState, textToSpeak, textToSpeak, 0, textToSpeak.Length)
        {
        }

        internal TextFragment(FragmentState fragState, string? textToSpeak, string? textFrag, int offset, int length)
        {
            if (fragState.Action == TtsEngineAction.Speak || fragState.Action == TtsEngineAction.Pronounce)
            {
                textFrag = textToSpeak;
            }
            if (!string.IsNullOrEmpty(textFrag))
            {
                TextToSpeak = textFrag;
            }
            State = fragState;
            TextOffset = offset;
            TextLength = length;
        }

        private FragmentState _state;
        [MarshalAs(UnmanagedType.LPWStr)]
        private string _textToSpeak = string.Empty;
        private int _textOffset;
        private int _textLength;
    }
    [ImmutableObject(true)]

    public struct FragmentState : IEquatable<FragmentState>
    {
        public TtsEngineAction Action { get { return _action; } internal set { _action = value; } }
        public int LangId { get { return _langId; } internal set { _langId = value; } }
        public int Emphasis { get { return _emphasis; } internal set { _emphasis = value; } }
        public int Duration { get { return _duration; } internal set { _duration = value; } }
        [DisallowNull] public SayAs? SayAs { get { return _sayAs; } internal set { ArgumentNullException.ThrowIfNull(value); _sayAs = value; } }
        [DisallowNull] public Prosody? Prosody { get { return _prosody; } internal set { ArgumentNullException.ThrowIfNull(value); _prosody = value; } }
        [DisallowNull] public char[]? Phoneme { get { return _phoneme; } internal set { ArgumentNullException.ThrowIfNull(value); _phoneme = value; } }
        public FragmentState(TtsEngineAction action,
                             int langId,
                             int emphasis,
                             int duration,
                             SayAs? sayAs,
                             Prosody? prosody,
                             char[] phonemes)
        {
            _action = action;
            _langId = langId;
            _emphasis = emphasis;
            _duration = duration;
            _sayAs = sayAs;
            _prosody = prosody;
            _phoneme = phonemes;
        }
        public static bool operator ==(FragmentState state1, FragmentState state2)
        {
            return state1.Action == state2.Action && state1.LangId == state2.LangId && state1.Emphasis == state2.Emphasis && state1.Duration == state2.Duration && state1.SayAs == state2.SayAs && state1.Prosody == state2.Prosody && Array.Equals(state1.Phoneme, state2.Phoneme);
        }
        public static bool operator !=(FragmentState state1, FragmentState state2)
        {
            return !(state1 == state2);
        }
        public bool Equals(FragmentState other)
        {
            return this == other;
        }
        public override bool Equals([NotNullWhen(true)] object? obj)
        {
            if (obj is not FragmentState fs)
            {
                return false;
            }

            return Equals(fs);
        }
        public override int GetHashCode()
        {
            return base.GetHashCode();
        }

        private TtsEngineAction _action;
        private int _langId;
        private int _emphasis;
        private int _duration;
        private SayAs? _sayAs;
        private Prosody? _prosody;
        private char[]? _phoneme;
    }
    [StructLayout(LayoutKind.Sequential)]
    public class Prosody
    {
        public ProsodyNumber Pitch { get { return _pitch; } set { _pitch = value; } }
        public ProsodyNumber Range { get { return _range; } set { _range = value; } }
        public ProsodyNumber Rate { get { return _rate; } set { _rate = value; } }
        public int Duration { get { return _duration; } set { _duration = value; } }
        public ProsodyNumber Volume { get { return _volume; } set { _volume = value; } }
        public ContourPoint[]? GetContourPoints() { return _contourPoints; }
        public void SetContourPoints(ContourPoint[] points)
        {
            ArgumentNullException.ThrowIfNull(points);

            _contourPoints = (ContourPoint[])points.Clone();
        }
        public Prosody()
        {
            Pitch = new ProsodyNumber((int)ProsodyPitch.Default);
            Range = new ProsodyNumber((int)ProsodyRange.Default);
            Rate = new ProsodyNumber((int)ProsodyRate.Default);
            Volume = new ProsodyNumber((int)ProsodyVolume.Default);
        }

        internal Prosody Clone()
        {
            Prosody cloned = new();
            cloned._pitch = _pitch;
            cloned._range = _range;
            cloned._rate = _rate;
            cloned._duration = _duration;
            cloned._volume = _volume;
            return cloned;
        }

        internal ProsodyNumber _pitch;
        internal ProsodyNumber _range;
        internal ProsodyNumber _rate; // can be casted to a Prosody Rate
        internal int _duration;
        internal ProsodyNumber _volume;
        internal ContourPoint[]? _contourPoints;
    }
    [ImmutableObject(true)]

    public struct ContourPoint : IEquatable<ContourPoint>
    {
        public float Start { get { return _start; } /* internal set { _start = value; }  */}
        public float Change { get { return _change; } /* internal set { _change = value; } */ }
        public ContourPointChangeType ChangeType { get { return _changeType; } /* internal set { _changeType = value; } */ }
        public ContourPoint(float start, float change, ContourPointChangeType changeType)
        {
            _start = start;
            _change = change;
            _changeType = changeType;
        }
        public static bool operator ==(ContourPoint point1, ContourPoint point2)
        {
            return point1.Start.Equals(point2.Start) && point1.Change.Equals(point2.Change) && point1.ChangeType.Equals(point2.ChangeType);
        }
        public static bool operator !=(ContourPoint point1, ContourPoint point2)
        {
            return !(point1 == point2);
        }
        public bool Equals(ContourPoint other)
        {
            return this == other;
        }
        public override bool Equals([NotNullWhen(true)] object? obj)
        {
            if (obj is not ContourPoint cp)
            {
                return false;
            }

            return Equals(cp);
        }
        public override int GetHashCode()
        {
            return base.GetHashCode();
        }

        private float _start;
        private float _change;
        private ContourPointChangeType _changeType;
    }
    [ImmutableObject(true)]
    public struct ProsodyNumber : IEquatable<ProsodyNumber>
    {
        public int SsmlAttributeId { get { return _ssmlAttributeId; } internal set { _ssmlAttributeId = value; } }
        public bool IsNumberPercent { get { return _isPercent; } internal set { _isPercent = value; } }
        public float Number { get { return _number; } internal set { _number = value; } }
        public ProsodyUnit Unit { get { return _unit; } internal set { _unit = value; } }
        public const int AbsoluteNumber = int.MaxValue;
        public ProsodyNumber(int ssmlAttributeId)
        {
            _ssmlAttributeId = ssmlAttributeId;
            _number = 1.0f;
            _isPercent = true;
            _unit = ProsodyUnit.Default;
        }
        public ProsodyNumber(float number)
        {
            _ssmlAttributeId = int.MaxValue;
            _number = number;
            _isPercent = false;
            _unit = ProsodyUnit.Default;
        }
        public static bool operator ==(ProsodyNumber prosodyNumber1, ProsodyNumber prosodyNumber2)
        {
            return prosodyNumber1._ssmlAttributeId == prosodyNumber2._ssmlAttributeId && prosodyNumber1.Number.Equals(prosodyNumber2.Number) && prosodyNumber1.IsNumberPercent == prosodyNumber2.IsNumberPercent && prosodyNumber1.Unit == prosodyNumber2.Unit;
        }
        public static bool operator !=(ProsodyNumber prosodyNumber1, ProsodyNumber prosodyNumber2)
        {
            return !(prosodyNumber1 == prosodyNumber2);
        }
        public bool Equals(ProsodyNumber other)
        {
            return this == other;
        }
        public override bool Equals([NotNullWhen(true)] object? obj)
        {
            if (obj is not ProsodyNumber pn)
            {
                return false;
            }

            return Equals(pn);
        }
        public override int GetHashCode()
        {
            return base.GetHashCode();
        }

        private int _ssmlAttributeId;
        private bool _isPercent;
        private float _number;
        private ProsodyUnit _unit;
    }
    [StructLayout(LayoutKind.Sequential)]
    public class SayAs
    {
        [DisallowNull] public string? InterpretAs { get { return _interpretAs; } set { Helpers.ThrowIfEmptyOrNull(value, nameof(value)); _interpretAs = value; } }
        [DisallowNull] public string? Format { get { return _format; } set { Helpers.ThrowIfEmptyOrNull(value, nameof(value)); _format = value; } }
        [DisallowNull] public string? Detail { get { return _detail; } set { Helpers.ThrowIfEmptyOrNull(value, nameof(value)); _detail = value; } }

        [MarshalAs(UnmanagedType.LPWStr)]
        private string? _interpretAs;

        [MarshalAs(UnmanagedType.LPWStr)]
        private string? _format;

        [MarshalAs(UnmanagedType.LPWStr)]
        private string? _detail;
    }

    #endregion

    #region Public Enums
    public enum TtsEngineAction
    {
        Speak,
        Silence,
        Pronounce,
        Bookmark,
        SpellOut,
        StartSentence,
        StartParagraph,
        ParseUnknownTag,
    }
    public enum EmphasisWord : int
    {
        Default,
        Strong,
        Moderate,
        None,
        Reduced
    }
    public enum EmphasisBreak : int
    {
        None = -1,
        ExtraWeak = -2,
        Weak = -3,
        Medium = -4,
        Strong = -5,
        ExtraStrong = -6,
        /// <summary>
        /// Equivalent to the empty <Break />
        /// </summary>
        Default = -7,
    }
    public enum ProsodyPitch
    {
        Default,
        ExtraLow,
        Low,
        Medium,
        High,
        ExtraHigh
    }
    public enum ProsodyRange
    {
        Default,
        ExtraLow,
        Low,
        Medium,
        High,
        ExtraHigh
    }
    public enum ProsodyRate
    {
        Default,
        ExtraSlow,
        Slow,
        Medium,
        Fast,
        ExtraFast
    }
    public enum ProsodyVolume : int
    {
        Default = -1,
        Silent = -2,
        ExtraSoft = -3,
        Soft = -4,
        Medium = -5,
        Loud = -6,
        ExtraLoud = -7
    }
    public enum ProsodyUnit : int
    {
        Default,
        Hz,
        Semitone
    }
    public enum TtsEventId
    {
        StartInputStream = 1,
        EndInputStream = 2,
        VoiceChange = 3,   // lparam_is_token
        Bookmark = 4,   // lparam_is_string
        WordBoundary = 5,
        Phoneme = 6,
        SentenceBoundary = 7,
        Viseme = 8,
        AudioLevel = 9,   // wparam contains current output audio level
    }
    public enum EventParameterType
    {
        Undefined = 0x0000,
        Token = 0x0001,
        Object = 0x0002,
        Pointer = 0x0003,
        String = 0x0004
    }
    public enum SpeakOutputFormat
    {
        WaveFormat = 0,
        Text = 1
    }
    public enum ContourPointChangeType
    {
        Hz = 0,
        Percentage = 1
    }

    #endregion
}