File: Internal\SrgsCompiler\BackEnd.cs
Web Access
Project: src\src\runtime\src\libraries\System.Speech\src\System.Speech.csproj (System.Speech)
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Collections.Generic;
using System.Collections.ObjectModel;
using System.Diagnostics.CodeAnalysis;
using System.Globalization;
using System.Runtime.InteropServices;
using System.Speech.Internal.SrgsParser;
using System.Text;

namespace System.Speech.Internal.SrgsCompiler
{
    internal sealed partial class Backend
    {
        #region Constructors

        internal Backend()
        {
            _words = new StringBlob();
            _symbols = new StringBlob();
        }

        internal Backend(StreamMarshaler streamHelper)
        {
            InitFromBinaryGrammar(streamHelper);
        }

        #endregion

        #region Internal Methods

        /// <summary>
        /// Optimizes the grammar network by removing the epsilon states and merging
        /// duplicate transitions.
        /// </summary>
        internal void Optimize()
        {
            _states.Optimize();

            // Most likely, there will be an arc with a weight != 1.  So we need a weight table.
            _fNeedWeightTable = true;
        }

        /// <summary>
        /// Performs consistency checks of the grammar structure, creates the
        /// serialized format and either saves it to the stream provided by SetSaveOptions,
        /// or reloads it into the CFG engine.
        /// </summary>
        internal void Commit(StreamMarshaler streamBuffer)
        {
            // For debugging purpose, assert if the position is not it is assumed it should be
            // Keep the start position in the stream
            long startStreamPosition = streamBuffer.Stream.Position;

            // put all states State into a sorted array by rule parent index and serialized index
            List<State> sortedStates = new(_states);

            // Release the memory for the original list of states
            _states = null!;

            sortedStates.Sort();

            // Validate the grammar
            ValidateAndTagRules();
            CheckLeftRecursion(sortedStates);

            // Include null terminator
            int cBasePath = _basePath != null ? _basePath.Length + 1 : 0;
            float[]? pWeights;
            int cArcs;

            // Add the top level semantic interpretation tag
            // This should be set as the first symbol in the symbol string blog since it must hold on a 16 bits value.
            int semanticInterpretationGlobals = 0;
            if (_globalTags.Count > 0)
            {
                StringBuilder sb = new();
                foreach (string s in _globalTags)
                {
                    sb.Append(s);
                }
                _symbols.Add(sb.ToString(), out semanticInterpretationGlobals);
                semanticInterpretationGlobals = _symbols.OffsetFromId(semanticInterpretationGlobals);
                if (semanticInterpretationGlobals > ushort.MaxValue)
                {
                    throw new OverflowException(SR.Get(SRID.TooManyRulesWithSemanticsGlobals));
                }
            }

            // Write the method names as symbols
            foreach (ScriptRef script in _scriptRefs)
            {
                _symbols.Add(script._sMethod, out script._idSymbol);
            }
            // get the header
            CfgGrammar.CfgSerializedHeader header = BuildHeader(sortedStates, cBasePath, unchecked((ushort)semanticInterpretationGlobals), out cArcs, out pWeights);
            streamBuffer.WriteStream(header);

            //
            //  For the string blobs, we must explicitly report I/O error since the blobs don't
            //  use the error log facility.
            //
            System.Diagnostics.Debug.Assert(streamBuffer.Stream.Position - startStreamPosition == header.pszWords);
            streamBuffer.WriteArrayChar(_words.SerializeData(), _words.SerializeSize());

            System.Diagnostics.Debug.Assert(streamBuffer.Stream.Position - startStreamPosition == header.pszSymbols);
            streamBuffer.WriteArrayChar(_symbols.SerializeData(), _symbols.SerializeSize());

            System.Diagnostics.Debug.Assert(streamBuffer.Stream.Position - startStreamPosition == header.pRules);
            foreach (Rule rule in _rules)
            {
                rule.Serialize(streamBuffer);
            }

            if (cBasePath > 0)
            {
                System.Diagnostics.Debug.Assert(_basePath != null, "cBasePath should only be positive when we have a _basePath");
                streamBuffer.WriteArrayChar(_basePath.ToCharArray(), _basePath.Length);

                // Add a zero to be compatible with SAPI 5
                System.Diagnostics.Debug.Assert(_basePath.Length + 1 == cBasePath);
                streamBuffer.WriteArrayChar(s_achZero, 1);

                // Zero-pad to align following structures
                streamBuffer.WriteArray(s_abZero3, cBasePath * Helpers._sizeOfChar & 3);
            }

            //
            //  Write a dummy 0 index state entry
            //
            CfgArc dummyArc = new();

            System.Diagnostics.Debug.Assert(streamBuffer.Stream.Position - startStreamPosition == header.pArcs);
            streamBuffer.WriteStream(dummyArc);

            int ulWeightOffset = 1;
            uint arcOffset = 1;

            bool semanticInterpretation = (GrammarOptions & GrammarOptions.MssV1) == GrammarOptions.MssV1;
            foreach (State state in sortedStates)
            {
                state.SerializeStateEntries(streamBuffer, semanticInterpretation, pWeights!, ref arcOffset, ref ulWeightOffset);
            }

            System.Diagnostics.Debug.Assert(streamBuffer.Stream.Position - startStreamPosition == header.pWeights);
            if (_fNeedWeightTable)
            {
                System.Diagnostics.Debug.Assert(pWeights is not null, "If we need a weight table, we should have gotten one");
                streamBuffer.WriteArray<float>(pWeights, cArcs);
            }

            System.Diagnostics.Debug.Assert(streamBuffer.Stream.Position - startStreamPosition == header.tags);
            if (!semanticInterpretation)
            {
                foreach (State state in sortedStates)
                {
                    state.SetEndArcIndexForTags();
                }
            }

            // Remove the orphaned arcs
            // This could happen in the case of a <item repeat=0-0"> <tag /></item>
            for (int i = _tags.Count - 1; i >= 0; i--)
            {
                // When arc are created the index is set to zero. This value changes during serialization
                // if an arc references it
                if (_tags[i]._cfgTag.ArcIndex == 0)
                {
                    _tags.RemoveAt(i);
                }
            }
            // Sort the _tags array by ArcIndex
            _tags.Sort();

            // Write the _tags array
            foreach (Tag tag in _tags)
            {
                tag.Serialize(streamBuffer);
            }

            // Write the script references and the IL write after the header so getting it for the grammar
            // Does not require a seek to the end of the file
            System.Diagnostics.Debug.Assert(header.pScripts == 0 || streamBuffer.Stream.Position - startStreamPosition == header.pScripts);
            foreach (ScriptRef script in _scriptRefs)
            {
                script.Serialize(_symbols, streamBuffer);
            }

            // Write the assembly bits
            // (Not supported on this platform)
        }

        /// <summary>
        /// Description:
        /// Combine the current data in a grammar with one coming from a CFG
        /// </summary>
        internal static Backend CombineGrammar(string? ruleName, Backend org, Backend extra)
        {
            Backend be = new();
            be._fLoadedFromBinary = true;
            be._fNeedWeightTable = org._fNeedWeightTable;
            be._grammarMode = org._grammarMode;
            be._grammarOptions = org._grammarOptions;

            // Hash source state to destination state
            Dictionary<State, State> srcToDestHash = new();

            // Find the rule
            foreach (Rule orgRule in org._rules)
            {
                if (orgRule.Name == ruleName)
                {
                    be.CloneSubGraph(orgRule, org, extra, srcToDestHash, true);
                }
            }
            return be;
        }

        internal State CreateNewState(Rule rule)
        {
            return _states.CreateNewState(rule);
        }

        internal void DeleteState(State state)
        {
            _states.DeleteState(state);
        }

        internal void MoveInputTransitionsAndDeleteState(State from, State to)
        {
            _states.MoveInputTransitionsAndDeleteState(from, to);
        }

        internal void MoveOutputTransitionsAndDeleteState(State from, State to)
        {
            _states.MoveOutputTransitionsAndDeleteState(from, to);
        }

        /// <summary>
        /// Tries to find the rule's initial state handle. If both a name and an id
        /// are provided, then both have to match in order for this call to succeed.
        /// If the rule doesn't already exist then we define it if fCreateIfNotExists,
        /// otherwise we return an error ().
        ///
        ///     - pszRuleName   name of rule to find/define     (null: don't care)
        ///     - ruleId      id of rule to find/define       (0: don't care)
        ///     - dwAttribute   rule attribute for defining the rule
        ///     - fCreateIfNotExists    creates the rule using name, id, and attributes
        ///                             in case the rule doesn't already exist
        ///
        /// throws:
        ///       S_OK, E_INVALIDARG, E_OUTOFMEMORY
        ///       SPERR_RULE_NOT_FOUND        -- no rule found and we don't create a new one
        ///       SPERR_RULE_NAME_ID_CONFLICT -- rule name and id don't match
        /// </summary>
        internal Rule CreateRule(string name, SPCFGRULEATTRIBUTES attributes)
        {

            SPCFGRULEATTRIBUTES allFlags = SPCFGRULEATTRIBUTES.SPRAF_TopLevel | SPCFGRULEATTRIBUTES.SPRAF_Active | SPCFGRULEATTRIBUTES.SPRAF_Export | SPCFGRULEATTRIBUTES.SPRAF_Import | SPCFGRULEATTRIBUTES.SPRAF_Interpreter | SPCFGRULEATTRIBUTES.SPRAF_Dynamic | SPCFGRULEATTRIBUTES.SPRAF_Root;

            if (attributes != 0 && ((attributes & ~allFlags) != 0 || ((attributes & SPCFGRULEATTRIBUTES.SPRAF_Import) != 0 && (attributes & SPCFGRULEATTRIBUTES.SPRAF_Export) != 0)))
            {
                throw new ArgumentException(SR.Get(SRID.InvalidFlagsSet), nameof(attributes));
            }

            // SAPI does not properly handle a rule marked as Import and TopLevel/Active/Root.
            // - To maintain maximal backwards compatibility, if a rule is marked as Import, we will unmark TopLevel/Active/Root.
            // - This changes the behavior when application tries to activate this rule.  However, given that it is already
            //   broken/fragile, we believe it is better to change the behavior.
            if ((attributes & SPCFGRULEATTRIBUTES.SPRAF_Import) != 0 && ((attributes & SPCFGRULEATTRIBUTES.SPRAF_TopLevel) != 0 || (attributes & SPCFGRULEATTRIBUTES.SPRAF_Active) != 0 || (attributes & SPCFGRULEATTRIBUTES.SPRAF_Root) != 0))
            {
                attributes &= ~(SPCFGRULEATTRIBUTES.SPRAF_TopLevel | SPCFGRULEATTRIBUTES.SPRAF_Active | SPCFGRULEATTRIBUTES.SPRAF_Root);
            }

            if ((attributes & SPCFGRULEATTRIBUTES.SPRAF_Import) != 0 && (name[0] == '\0'))
            {
                LogError(name, SRID.InvalidImport);
            }

            if (_fLoadedFromBinary)
            {
                // Scan all non-dynamic names and prevent a duplicate...
                foreach (Rule r in _rules)
                {
                    string wpszName = _symbols[r._cfgRule._nameOffset];

                    if (!r._cfgRule.Dynamic && name == wpszName)
                    {
                        LogError(name, SRID.DuplicatedRuleName);
                    }
                }
            }

            int idString;
            int cImportedRule = 0;
            Rule rule = new(this, name, _symbols.Add(name, out idString), attributes, _ruleIndex, 0, _grammarOptions & GrammarOptions.TagFormat, ref cImportedRule);

            rule._iSerialize2 = _ruleIndex++;

            if ((attributes & SPCFGRULEATTRIBUTES.SPRAF_Root) != 0)
            {
                if (_rootRule != null)
                {
                    //We already have a root rule, return error code.
                    LogError(name, SRID.RootRuleAlreadyDefined);
                }
                else
                {
                    _rootRule = rule;
                }
            }

            // Add rule to RuleListByName and RuleListByID hash tables.
            if (rule._cfgRule._nameOffset != 0)
            {
                _nameOffsetRules.Add(rule._cfgRule._nameOffset, rule);
            }

            //
            //  It is important to insert this at the tail for dynamic rules to
            //  retain their slot number.
            //
            _rules.Add(rule);
            _rules.Sort();

            return rule;
        }

        /// <summary>
        /// Internal method for finding rule in rule list
        /// </summary>
        internal Rule? FindRule(string sRule)
        {
            Rule? rule = null;

            if (_nameOffsetRules.Count > 0)
            {
                // Find rule corresponding to name symbol offset corresponding to the RuleName
                int iWord = _symbols.Find(sRule);

                if (iWord > 0)
                {
                    int dwSymbolOffset = _symbols.OffsetFromId(iWord);

                    System.Diagnostics.Debug.Assert(dwSymbolOffset == 0 || _symbols[iWord] == sRule);

                    rule = dwSymbolOffset > 0 && _nameOffsetRules.TryGetValue(dwSymbolOffset, out Rule? value) ? value : null;
                }
            }

            if (rule != null)
            {
                string sRuleFound = rule.Name;

                // at least one of the 2 arguments matched
                // names either match or they are both null!
                if (!((string.IsNullOrEmpty(sRule) || (!string.IsNullOrEmpty(sRule) && !string.IsNullOrEmpty(sRuleFound) && sRuleFound == sRule))))
                {
                    LogError(sRule, SRID.RuleNameIdConflict);
                }
            }

            return rule;
        }

        /// <summary>
        /// Adds a word transition from hFromState to hToState. If hToState == null
        /// then the arc will be to the (implicit) terminal state. If psz == null then
        /// we add an epsilon transition. Properties are pushed back to the
        /// first un-ambiguous arc in case we can share a common initial state path.
        /// The weight will be placed on the first arc (if there exists an arc with
        /// the same word but different weight we will create a new arc).
        /// </summary>
        internal Arc WordTransition(string sWord, float flWeight, int requiredConfidence)
        {
            return CreateTransition(sWord, flWeight, requiredConfidence);
        }

        internal Arc SubsetTransition(string text, MatchMode matchMode)
        {
            // Performs white space normalization in place
            text = NormalizeTokenWhiteSpace(text);

            return new Arc(text, null, _words, 1.0f, CfgGrammar.SP_NORMAL_CONFIDENCE, null, matchMode, ref _fNeedWeightTable);
        }

        /// <summary>
        /// Adds a rule (reference) transition from hFromState to hToState.
        /// hRule can also be one of these special transition handles:
        ///     SPRULETRANS_WILDCARD   :    "WILDCARD" transition
        ///     SPRULETRANS_DICTATION  :    single word from dictation
        ///     SPRULETRANS_TEXTBUFFER :    "TEXTBUFFER" transition
        /// </summary>
        /// <param name="rule">must be initial state of rule</param>
        /// <param name="parentRule">Rule calling the ruleref</param>
        /// <param name="flWeight">Weight</param>
        internal Arc RuleTransition(Rule rule, Rule? parentRule, float flWeight)
        {
            Rule? ruleToTransitionTo = null;

            if (flWeight < 0.0f)
            {
                XmlParser.ThrowSrgsException(SRID.UnsupportedFormat);
            }

            Rule? specialRuleTrans = null;

            if (rule == CfgGrammar.SPRULETRANS_WILDCARD || rule == CfgGrammar.SPRULETRANS_DICTATION || rule == CfgGrammar.SPRULETRANS_TEXTBUFFER)
            {
                specialRuleTrans = rule;
            }
            else
            {
                ruleToTransitionTo = rule;
            }

            bool fNeedWeightTable = false;
            Arc arc = new(null, ruleToTransitionTo, _words, flWeight, '\0', specialRuleTrans, MatchMode.AllWords, ref fNeedWeightTable);

            AddArc(arc);

            if (ruleToTransitionTo != null && parentRule != null)
            {
                ruleToTransitionTo._listRules.Insert(0, parentRule);
            }

            return arc;
        }

        /// <summary>
        /// Adds a word transition from hFromState to hToState. If hToState == null
        /// then the arc will be to the (implicit) terminal state. If psz == null then
        /// we add an epsilon transition. Properties are pushed back to the
        /// first un-ambiguous arc in case we can share a common initial state path.
        /// The weight will be placed on the first arc (if there exists an arc with
        /// the same word but different weight we will create a new arc).
        /// </summary>
        internal Arc EpsilonTransition(float flWeight)
        {
            return CreateTransition(null, flWeight, CfgGrammar.SP_NORMAL_CONFIDENCE);
        }

        internal void AddSemanticInterpretationTag(Arc arc, CfgGrammar.CfgProperty propertyInfo)
        {

            Tag tag = new(this, propertyInfo);
            _tags.Add(tag);

            arc.AddStartTag(tag);
            arc.AddEndTag(tag);
        }

        internal void AddPropertyTag(Arc start, Arc end, CfgGrammar.CfgProperty propertyInfo)
        {

            Tag tag = new(this, propertyInfo);
            _tags.Add(tag);

            start.AddStartTag(tag);
            end.AddEndTag(tag);
        }

        /// <summary>
        /// Traverse the graph starting from SrcStartState, cloning each state as we go along,
        /// cloning each transition except ones originating from SrcEndState, and return
        /// the cloned state corresponding to SrcEndState.
        /// </summary>
        internal State CloneSubGraph(State srcFromState, State srcEndState, State destFromState)
        {
            Dictionary<State, State> SrcToDestHash = new();    // Hash source state to destination state
            Stack<State> CloneStack = new();       // States to process
            Dictionary<Tag, Tag> tags = new();

            // Add initial state to CloneStack and SrcToDestHash.
            SrcToDestHash.Add(srcFromState, destFromState);
            CloneStack.Push(srcFromState);

            // While there are still states on the CloneStack (ToDo collection)
            while (CloneStack.Count > 0)
            {
                srcFromState = CloneStack.Pop();
                destFromState = SrcToDestHash[srcFromState];
                System.Diagnostics.Debug.Assert(destFromState != null);

                // For each transition from srcFromState (except SrcEndState)
                foreach (Arc arc in srcFromState.OutArcs)
                {
                    // - Lookup the DestToState corresponding to SrcToState
                    State? srcToState = arc.End;
                    State? destToState = null;

                    if (srcToState != null)
                    {
                        // - If not found, clone a new DestToState, add SrcToState.DestToState to SrcToDestHash, and add SrcToState to CloneStack.
                        if (!SrcToDestHash.TryGetValue(srcToState, out destToState))
                        {
                            destToState = CreateNewState(srcToState.Rule);
                            SrcToDestHash.Add(srcToState, destToState);
                            CloneStack.Push(srcToState);
                        }
                    }

                    // - Clone the transition from SrcFromState.SrcToState at DestFromState.DestToState
                    // -- Clone Arc
                    Arc newArc = new(arc, destFromState, destToState);
                    AddArc(newArc);

                    // -- Clone SemanticTag
                    newArc.CloneTags(arc, _tags, tags, null);

                    // -- Add Arc
                    newArc.ConnectStates();
                }
            }

            System.Diagnostics.Debug.Assert(tags.Count == 0);
            return SrcToDestHash[srcEndState];
        }

        /// <summary>
        /// Traverse the graph starting from SrcStartState, cloning each state as we go along,
        /// cloning each transition except ones originating from SrcEndState, and return
        /// the cloned state corresponding to SrcEndState.
        /// </summary>
        internal void CloneSubGraph(Rule rule, Backend org, Backend extra, Dictionary<State, State> srcToDestHash, bool fromOrg)
        {
            System.Diagnostics.Debug.Assert(rule._firstState != null);
            Backend beSrc = fromOrg ? org : extra;

            List<State> CloneStack = new();       // States to process
            Dictionary<Tag, Tag> tags = new();

            // Push all the state for the top level rule
            CloneState(rule._firstState, CloneStack, srcToDestHash);

            // While there are still states on the CloneStack (ToDo collection)
            while (CloneStack.Count > 0)
            {
                State srcFromState = CloneStack[0];
                CloneStack.RemoveAt(0);
                State destFromState = srcToDestHash[srcFromState];
                // For each transition from srcFromState (except SrcEndState)
                foreach (Arc arc in srcFromState.OutArcs)
                {
                    // - Lookup the DestToState corresponding to SrcToState
                    State? srcToState = arc.End;
                    State? destToState = null;

                    if (srcToState != null)
                    {
                        if (!srcToDestHash.ContainsKey(srcToState))
                        {
                            // - If not found, then it is a new rule, just clown it.
                            CloneState(srcToState, CloneStack, srcToDestHash);
                        }
                        destToState = srcToDestHash[srcToState];
                    }

                    // - Clone the transition from SrcFromState.SrcToState at DestFromState.DestToState
                    // -- Clone Arc
                    int newWordId = arc.WordId;
                    if (beSrc != null && arc.WordId > 0)
                    {
                        _words.Add(beSrc.Words[arc.WordId], out newWordId);
                    }

                    Arc newArc = new(arc, destFromState, destToState, newWordId);

                    // -- Clone SemanticTag
                    newArc.CloneTags(arc, _tags, tags, this);

                    // For rule ref push the first state of the ruleref
                    if (arc.RuleRef != null)
                    {
                        string ruleName;

                        // Check for DYNAMIC grammars
                        if (arc.RuleRef.Name.StartsWith("URL:DYNAMIC#", StringComparison.Ordinal))
                        {
                            ruleName = arc.RuleRef.Name.Substring(12);
                            if (fromOrg && FindInRules(ruleName) == null)
                            {
                                Rule? ruleExtra = extra.FindInRules(ruleName);
                                if (ruleExtra == null)
                                {
                                    XmlParser.ThrowSrgsException(SRID.DynamicRuleNotFound, ruleName);
                                }
                                CloneSubGraph(ruleExtra, org, extra, srcToDestHash, false);
                            }
                        }
                        else if (arc.RuleRef.Name.StartsWith("URL:STATIC#", StringComparison.Ordinal))
                        {
                            ruleName = arc.RuleRef.Name.Substring(11);
                            if (!fromOrg && FindInRules(ruleName) == null)
                            {
                                Rule? ruleOrg = org.FindInRules(ruleName);
                                if (ruleOrg == null)
                                {
                                    XmlParser.ThrowSrgsException(SRID.DynamicRuleNotFound, ruleName);
                                }
                                CloneSubGraph(ruleOrg, org, extra, srcToDestHash, true);
                            }
                        }
                        else
                        {
                            ruleName = arc.RuleRef.Name;
                            Rule? ruleExtra = org.FindInRules(ruleName);
                            if (!fromOrg)
                            {
                                CloneSubGraph(arc.RuleRef, org, extra, srcToDestHash, true);
                            }
                        }
                        Rule? refRule = FindInRules(ruleName);
                        refRule ??= CloneState(arc.RuleRef._firstState!, CloneStack, srcToDestHash);
                        newArc.RuleRef = refRule;
                    }

                    // -- Add Arc
                    newArc.ConnectStates();
                }
            }
            System.Diagnostics.Debug.Assert(tags.Count == 0);
        }

        /// <summary>
        /// Delete disconnected subgraph starting at hState.
        /// Traverse the graph starting from SrcStartState, and delete each state as we go along.
        /// </summary>
        internal void DeleteSubGraph(State state)
        {
            // Add initial state to DeleteStack.
            Stack<State> stateToProcess = new();           // States to delete
            Collection<Arc> arcsToDelete = new();
            Collection<State> statesToDelete = new();
            stateToProcess.Push(state);

            // While there are still states on the listDelete (ToDo collection)
            while (stateToProcess.Count > 0)
            {
                // For each transition from state,
                state = stateToProcess.Pop();
                statesToDelete.Add(state);
                arcsToDelete.Clear();

                // Accumulate the arcs to delete and add new states to the stack of states to process
                foreach (Arc arc in state.OutArcs)
                {
                    // Add EndState to listDelete, if unique
                    State? endState = arc.End;

                    // Add this state to the list of states to delete
                    if (endState != null && !stateToProcess.Contains(endState) && !statesToDelete.Contains(endState))
                    {
                        stateToProcess.Push(endState);
                    }
                    arcsToDelete.Add(arc);
                }
                // Clear up the arcs
                foreach (Arc arc in arcsToDelete)
                {
                    arc.Start = arc.End = null;
                }
            }

            foreach (State stateToDelete in statesToDelete)
            {
                // Delete state and remove from listDelete
                System.Diagnostics.Debug.Assert(stateToDelete != null);
                System.Diagnostics.Debug.Assert(stateToDelete.InArcs.IsEmpty);
                System.Diagnostics.Debug.Assert(stateToDelete.OutArcs.IsEmpty);
                DeleteState(stateToDelete);
            }
        }

        /// <summary>
        /// Modify the placeholder rule attributes after it has been created.
        /// This is only safe to use in the context of SrgsGrammarCompiler.
        /// </summary>
        internal void SetRuleAttributes(Rule rule, SPCFGRULEATTRIBUTES dwAttributes)
        {
            // Check if this is the Root rule
            if ((dwAttributes & SPCFGRULEATTRIBUTES.SPRAF_Root) != 0)
            {
                if (_rootRule != null)
                {
                    //We already have a root rule, return error code.
                    XmlParser.ThrowSrgsException(SRID.RootRuleAlreadyDefined);
                }
                else
                {
                    _rootRule = rule;
                }
            }

            rule._cfgRule.TopLevel = ((dwAttributes & SPCFGRULEATTRIBUTES.SPRAF_TopLevel) != 0);
            rule._cfgRule.DefaultActive = ((dwAttributes & SPCFGRULEATTRIBUTES.SPRAF_Active) != 0);
            rule._cfgRule.PropRule = ((dwAttributes & SPCFGRULEATTRIBUTES.SPRAF_Interpreter) != 0);
            rule._cfgRule.Export = ((dwAttributes & SPCFGRULEATTRIBUTES.SPRAF_Export) != 0);
            rule._cfgRule.Dynamic = ((dwAttributes & SPCFGRULEATTRIBUTES.SPRAF_Dynamic) != 0);
            rule._cfgRule.Import = ((dwAttributes & SPCFGRULEATTRIBUTES.SPRAF_Import) != 0);
        }

        /// <summary>
        /// Set the path from which relative grammar imports are calculated. As specified by xml:base / meta base
        /// Null or empty string will clear any existing base path.
        /// </summary>
        internal void SetBasePath(string sBasePath)
        {
            if (!string.IsNullOrEmpty(sBasePath))
            {
                // Validate base path.
                Uri uri = new(sBasePath, UriKind.RelativeOrAbsolute);

                //Url Canonicalized
                _basePath = uri.ToString();
            }
            else
            {
                _basePath = null;
            }
        }

        /// <summary>
        /// Perform white space normalization in place.
        /// - Trim leading/trailing white spaces.
        /// - Collapse white space sequences to a single ' '.
        /// </summary>
        internal static string NormalizeTokenWhiteSpace(string sToken)
        {
            System.Diagnostics.Debug.Assert(!string.IsNullOrEmpty(sToken));

            // Trim leading and ending white spaces
            sToken = sToken.Trim(Helpers._achTrimChars);

            // Easy out if there are no consecutive double white spaces
            if (!sToken.Contains("  ", StringComparison.Ordinal))
            {
                return sToken;
            }

            // Normalize internal spaces
            char[] achSrc = sToken.ToCharArray();
            int iDest = 0;

            for (int i = 0; i < achSrc.Length;)
            {
                // Collapsed multiple white spaces into ' '
                if (achSrc[i] == ' ')
                {
                    do
                    {
                        i++;
                    } while (achSrc[i] == ' ');

                    achSrc[iDest++] = ' ';
                    continue;
                }

                // Copy the non-white space character
                achSrc[iDest++] = achSrc[i++];
            }

            return new string(achSrc, 0, iDest);
        }

        #endregion

        #region Internal Property

        internal StringBlob Words
        {
            get
            {
                return _words;
            }
        }

        internal StringBlob Symbols
        {
            get
            {
                return _symbols;
            }
        }

        #endregion

        #region Private Methods

        /// <summary>
        /// Description:
        /// Load compiled grammar data. This overwrites any existing data in the grammar
        /// We end up with containers of words, symbols, rules, arcs, states and state handles, etc.
        /// </summary>
        [MemberNotNull(nameof(_words))]
        [MemberNotNull(nameof(_symbols))]
        internal void InitFromBinaryGrammar(StreamMarshaler streamHelper)
        {
            CfgGrammar.CfgHeader header = CfgGrammar.ConvertCfgHeader(streamHelper);

            System.Diagnostics.Debug.Assert(header.pszWords != null, "ConvertCfgHeader should set includeAllGrammarData");
            System.Diagnostics.Debug.Assert(header.pszSymbols != null, "ConvertCfgHeader should set includeAllGrammarData");
            System.Diagnostics.Debug.Assert(header.arcs != null, "ConvertCfgHeader should set includeAllGrammarData");
            System.Diagnostics.Debug.Assert(header.tags != null, "ConvertCfgHeader should set includeAllGrammarData");
            _words = header.pszWords;
            _symbols = header.pszSymbols;

            _grammarOptions = header.GrammarOptions;

            //
            // Build up the internal representation
            //
            State?[] apStateTable = new State[header.arcs.Length];
            SortedDictionary<int, Rule> ruleFirstArcs = new();

            //
            // Initialize the rules
            //

            int previousCfgLastRules = _rules.Count;

            BuildRulesFromBinaryGrammar(header, apStateTable, ruleFirstArcs, previousCfgLastRules);

            //
            //  Initialize the arcs
            //
            Arc[] apArcTable = new Arc[header.arcs.Length];
            bool fLastArcNull = true;
            CfgArc pLastArc = new();
            State? currentState = null;
            SortedDictionary<int, Rule>.Enumerator ieFirstArcs = ruleFirstArcs.GetEnumerator();

            // If no rules, then we have no arcs
            if (ieFirstArcs.MoveNext())
            {
                KeyValuePair<int, Rule> kvFirstArc = ieFirstArcs.Current;
                Rule ruleCur = kvFirstArc.Value;

                //  We repersist the static AND dynamic parts for now. This allows the grammar to be queried
                //  with the automation interfaces
                for (int k = 1; k < header.arcs.Length; k++)
                {
                    CfgArc arc = header.arcs[k];

                    // Reset the Transition index based on the combined string blobs
                    if (arc.RuleRef)
                    {
                        // for a ruleref offset the rule index
                        ruleCur._listRules.Add(_rules[(int)arc.TransitionIndex]);
                    }

                    if (kvFirstArc.Key == k)
                    {
                        // we are entering a new rule now
                        ruleCur = kvFirstArc.Value;

                        // Reset to zero once we have read the last rule.
                        if (ieFirstArcs.MoveNext())
                        {
                            kvFirstArc = ieFirstArcs.Current;
                        }
                    }

                    // new currentState?
                    if (fLastArcNull || pLastArc.LastArc)
                    {
                        if (apStateTable[k] == null)
                        {
                            uint hNewState = CfgGrammar.NextHandle;

                            var newState = new State(ruleCur, hNewState, k);
                            apStateTable[k] = newState;
                            AddState(newState);
                        }

                        currentState = apStateTable[k];
                    }

                    //
                    // now get the arc
                    //
                    int iNextArc = (int)(arc.NextStartArcIndex);
                    Arc newArc;
                    State? targetState = null;

                    if (currentState != null && iNextArc != 0)
                    {
                        if (apStateTable[iNextArc] == null)
                        {
                            uint hNewState = CfgGrammar.NextHandle;

                            var nextState = new State(ruleCur, hNewState, iNextArc);
                            apStateTable[iNextArc] = nextState;
                            AddState(nextState);
                        }

                        targetState = apStateTable[iNextArc];
                    }

                    float flWeight = header.weights != null ? header.weights[k] : CfgGrammar.DEFAULT_WEIGHT;

                    // determine properties of the arc now ...
                    if (arc.RuleRef)
                    {
                        Rule ruleToTransitionTo = _rules[(int)arc.TransitionIndex];

                        newArc = new Arc(null, ruleToTransitionTo, _words!, flWeight, CfgGrammar.SP_NORMAL_CONFIDENCE, null, MatchMode.AllWords, ref _fNeedWeightTable);
                    }
                    else
                    {
                        int transitionIndex = (int)arc.TransitionIndex;
                        int ulSpecialTransitionIndex = (transitionIndex == CfgGrammar.SPWILDCARDTRANSITION || transitionIndex == CfgGrammar.SPDICTATIONTRANSITION || transitionIndex == CfgGrammar.SPTEXTBUFFERTRANSITION) ? transitionIndex : 0;
                        newArc = new Arc((ulSpecialTransitionIndex != 0) ? 0 : (int)arc.TransitionIndex, flWeight, arc.LowConfRequired ? CfgGrammar.SP_LOW_CONFIDENCE : arc.HighConfRequired ? CfgGrammar.SP_HIGH_CONFIDENCE : CfgGrammar.SP_NORMAL_CONFIDENCE, ulSpecialTransitionIndex, MatchMode.AllWords, ref _fNeedWeightTable);
                    }
                    newArc.Start = currentState;
                    newArc.End = targetState;

                    AddArc(newArc);
                    apArcTable[k] = newArc;
                    fLastArcNull = false;
                    pLastArc = arc;
                }
            }

            //  Initialize the Semantics tags
            for (int k = 1, iCurTag = 0; k < header.arcs.Length; k++)
            {
                CfgArc arc = header.arcs[k];

                if (arc.HasSemanticTag)
                {
                    System.Diagnostics.Debug.Assert(header.tags[iCurTag].StartArcIndex == k);

                    while (iCurTag < header.tags.Length && header.tags[iCurTag].StartArcIndex == k)
                    {
                        // we should already point to the tag
                        CfgSemanticTag semTag = header.tags[iCurTag];

                        Tag tag = new(this, semTag);

                        _tags.Add(tag);
                        apArcTable[tag._cfgTag.StartArcIndex].AddStartTag(tag);
                        apArcTable[tag._cfgTag.EndArcIndex].AddEndTag(tag);

                        // If we have ms-properties than _nameOffset != otherwise it is w3c tags.
                        if (semTag._nameOffset > 0)
                        {
                            tag._cfgTag._nameOffset = _symbols.OffsetFromId(_symbols.Find(_symbols.FromOffset(semTag._nameOffset)));
                        }
                        else
                        {
                            // The offset of the JScrip expression is stored in the value field.
                            tag._cfgTag._valueOffset = _symbols.OffsetFromId(_symbols.Find(_symbols.FromOffset(semTag._valueOffset)));
                        }
                        iCurTag++;
                    }
                }
            }
            _fNeedWeightTable = true;
            if (header.BasePath != null)
            {
                SetBasePath(header.BasePath);
            }

            _guid = header.GrammarGUID;
            _langId = header.langId;
            _grammarMode = header.GrammarMode;

            _fLoadedFromBinary = true;
            // Save Last ArcIndex

        }

        private Arc CreateTransition(string? sWord, float flWeight, int requiredConfidence)
        {
            // epsilon transition for empty words
            return AddSingleWordTransition(!string.IsNullOrEmpty(sWord) ? sWord : null, flWeight, requiredConfidence);
        }

        private CfgGrammar.CfgSerializedHeader BuildHeader(List<State> sortedStates, int cBasePath, ushort iSemanticGlobals, out int cArcs, out float[]? pWeights)
        {
            cArcs = 1; // Start with offset one! (0 indicates dead state).
            pWeights = null;

            int cSemanticTags = 0;
            int cLargest = 0;

            foreach (State state in sortedStates)
            {
                // For new states SerializeId is INFINITE so we set it correctly here.
                // For existing states we preserve the index from loading,
                //  unless new states have been added in.
                state.SerializeId = cArcs;

                int thisState = state.NumArcs;

#if DEBUG
                if (thisState == 0 && state.InArcs.IsEmpty && state.Rule._cStates > 1)
                {
                    XmlParser.ThrowSrgsException(SRID.StateWithNoArcs);
                }
#endif
                cArcs += thisState;
                if (cLargest < thisState)
                {
                    cLargest = thisState;
                }
                cSemanticTags += state.NumSemanticTags;
            }

            CfgGrammar.CfgSerializedHeader header = new();
            uint ulOffset = (uint)Marshal.SizeOf<CfgGrammar.CfgSerializedHeader>();

            header.FormatId = CfgGrammar._SPGDF_ContextFree;
            _guid = Guid.NewGuid();
            header.GrammarGUID = _guid;
            header.LangID = (ushort)_langId;
            header.pszSemanticInterpretationGlobals = iSemanticGlobals;
            header.cArcsInLargestState = cLargest;

            header.cchWords = _words.StringSize();
            header.cWords = _words.Count;

            // For compat with SAPI 5.x add one to cWords if there's more than one word.
            // The CFGEngine code assumes cWords includes the initial empty-string word.
            // See PS 11491 and 61982.
            if (header.cWords > 0)
            {
                header.cWords++;
            }

            header.pszWords = ulOffset;
            ulOffset += (uint)_words.SerializeSize() * Helpers._sizeOfChar;
            header.cchSymbols = _symbols.StringSize();
            header.pszSymbols = ulOffset;
            ulOffset += (uint)_symbols.SerializeSize() * Helpers._sizeOfChar;
            header.cRules = _rules.Count;
            header.pRules = ulOffset;
            ulOffset += (uint)(_rules.Count * Marshal.SizeOf<CfgRule>());
            header.cBasePath = cBasePath > 0 ? ulOffset : 0; //If there is no base path offset is set to zero
            ulOffset += (uint)((cBasePath * Helpers._sizeOfChar + 3) & ~3);
            header.cArcs = cArcs;
            header.pArcs = ulOffset;
            ulOffset += (uint)(cArcs * Marshal.SizeOf<CfgArc>());
            if (_fNeedWeightTable)
            {
                header.pWeights = ulOffset;
                ulOffset += (uint)(cArcs * sizeof(float));
                pWeights = new float[cArcs];
                pWeights[0] = 0.0f;
            }
            else
            {
                header.pWeights = 0;
                ulOffset += 0;
            }

            if (_rootRule != null)
            {
                //We have a root rule
                header.ulRootRuleIndex = (uint)_rootRule._iSerialize;
            }
            else
            {
                //-1 means there is no root rule
                header.ulRootRuleIndex = 0xFFFFFFFF;
            }

            header.GrammarOptions = _grammarOptions | ((_alphabet == AlphabetType.Sapi) ? 0 : GrammarOptions.IpaPhoneme);
            header.GrammarOptions |= _scriptRefs.Count > 0 ? GrammarOptions.STG | GrammarOptions.KeyValuePairSrgs : 0;
            header.GrammarMode = (uint)_grammarMode;
            header.cTags = cSemanticTags;
            header.tags = ulOffset;
            ulOffset += (uint)(cSemanticTags * Marshal.SizeOf<CfgSemanticTag>());
            header.cScripts = _scriptRefs.Count;
            header.pScripts = header.cScripts > 0 ? ulOffset : 0;
            ulOffset += (uint)(_scriptRefs.Count * Marshal.SizeOf<CfgScriptRef>());
            header.cIL = 0;
            header.pIL = 0;
            ulOffset += (uint)(header.cIL * sizeof(byte));
            header.cPDB = 0;
            header.pPDB = 0;
            ulOffset += (uint)(header.cPDB * sizeof(byte));
            header.ulTotalSerializedSize = ulOffset;
            return header;
        }

        private CfgGrammar.CfgHeader BuildRulesFromBinaryGrammar(CfgGrammar.CfgHeader header, State?[] apStateTable, SortedDictionary<int, Rule> ruleFirstArcs, int previousCfgLastRules)
        {
            for (int i = 0; i < header.rules.Length; i++)
            {
                // Check if the rule does not exist already
                CfgRule cfgRule = header.rules[i];
                int firstArc = (int)cfgRule.FirstArcIndex;

                cfgRule._nameOffset = _symbols.OffsetFromId(_symbols.Find(header.pszSymbols!.FromOffset(cfgRule._nameOffset)));

                string? ruleName = _symbols.FromOffset(cfgRule._nameOffset);
                System.Diagnostics.Debug.Assert(ruleName is not null);
                Rule rule = new(this, ruleName, cfgRule, i + previousCfgLastRules, _grammarOptions & GrammarOptions.TagFormat, ref _cImportedRules);

                rule._firstState = _states.CreateNewState(rule);
                _rules.Add(rule);

                // Add the rule to the list of firstArc/rule
                if (firstArc > 0)
                {
                    ruleFirstArcs.Add((int)cfgRule.FirstArcIndex, rule);
                }

                rule._fStaticRule = (cfgRule.Dynamic) ? false : true;
                rule._cfgRule.DirtyRule = false;

                // by default loaded static rules have an exist
                rule._fHasExitPath = (rule._fStaticRule) ? true : false;

                // or they wouldn't be there in the first place
                if (firstArc != 0)
                {
                    System.Diagnostics.Debug.Assert(apStateTable[firstArc] == null);
                    rule._firstState.SerializeId = (int)cfgRule.FirstArcIndex;
                    apStateTable[firstArc] = rule._firstState;
                }

                if (rule._cfgRule.HasResources)
                {
                    throw new NotImplementedException();
                }

                if (header.ulRootRuleIndex == i)
                {
                    _rootRule = rule;
                }

                // Add rule to RuleListByName and RuleListByID hash tables.
                if (rule._cfgRule._nameOffset != 0)
                {
                    // Look for the rule in the original CFG and map it in the combined string blobs
                    _nameOffsetRules.Add(rule._cfgRule._nameOffset, rule);
                }
            }
            return header;
        }

        private Rule CloneState(State srcToState, List<State> CloneStack, Dictionary<State, State> srcToDestHash)
        {
            bool newRule = false;
            int posDynamic = srcToState.Rule.Name.IndexOf("URL:DYNAMIC#", StringComparison.Ordinal);
            string ruleName = posDynamic != 0 ? srcToState.Rule.Name : srcToState.Rule.Name.Substring(12);
            Rule? dstRule = FindInRules(ruleName);

            // Clone this rule into this GrammarBuilder if it does not exist yet
            if (dstRule == null)
            {
                dstRule = srcToState.Rule.Clone(_symbols, ruleName);
                _rules.Add(dstRule);
                newRule = true;
            }

            // Should not exist yet
            System.Diagnostics.Debug.Assert(!srcToDestHash.ContainsKey(srcToState));

            // push all the states for that rule
            State newState = CreateNewState(dstRule);
            srcToDestHash.Add(srcToState, newState);
            CloneStack.Add(srcToState);

            if (newRule)
            {
                dstRule._firstState = newState;
            }

            return dstRule;
        }

        private Rule? FindInRules(string ruleName)
        {
            foreach (Rule rule in _rules)
            {
                if (rule.Name == ruleName)
                {
                    return rule;
                }
            }
            return null;
        }

        [DoesNotReturn]
        private static void LogError(string rule, SRID srid, params object[] args)
        {
            string sError = SR.Get(srid, args);
            throw new FormatException(string.Format(CultureInfo.InvariantCulture, "Rule=\"{0}\" - ", rule) + sError);
        }

        /// <summary>
        /// Connect arc to the state graph.
        /// </summary>
#if DEBUG
        private
#else
        private static
#endif
        void AddArc(Arc arc)
        {
#if DEBUG
            arc.Backend = this;
#endif
        }

        private void ValidateAndTagRules()
        {
            //

            bool fAtLeastOneRule = false;
            int ulIndex = 0;

            foreach (Rule rule in _rules)
            {
                // set _fHasExitPath = true for empty dynamic grammars and imported rules
                // Clear this for the next loop through the rules....
                rule._fHasExitPath |= (rule._cfgRule.Dynamic | rule._cfgRule.Import) ? true : false;
                rule._iSerialize = ulIndex++;
                fAtLeastOneRule |= (rule._cfgRule.Dynamic || rule._cfgRule.TopLevel || rule._cfgRule.Export);
                rule.Validate();
            }
#if DEBUG
            //
            //  Now make sure that all rules have an exit path.
            //
            foreach (Rule rule in _rules)
            {
                _ulRecursiveDepth = 0;

                //The following function will use recursive function that might change _ulRecursiveDepth
                rule.CheckForExitPath(ref _ulRecursiveDepth);
            }
#endif
            //
            //  Check each exported rule if it has a dynamic rule in its "scope"
            //
            foreach (Rule rule in _rules)
            {
                if (rule._cfgRule.Dynamic)
                {
                    rule._cfgRule.HasDynamicRef = true;
                    _ulRecursiveDepth = 0;
                    rule.PopulateDynamicRef(ref _ulRecursiveDepth);
                }
            }
        }

        private void CheckLeftRecursion(List<State> states)
        {
            bool fReachedEndState;
            foreach (State state in states)
            {
                state.CheckLeftRecursion(out fReachedEndState);
            }
        }

        private Arc AddSingleWordTransition(string? s, float flWeight, int requiredConfidence)
        {

            Arc arc = new(s, null, _words, flWeight, requiredConfidence, null, MatchMode.AllWords, ref _fNeedWeightTable);
            AddArc(arc);
            return arc;
        }

        internal void AddState(State state)
        {
            _states.Add(state);
        }

        #endregion

        #region Internal Properties

        internal int LangId
        {
            get
            {
                return _langId;
            }
            set
            {
                _langId = value;
            }
        }

        internal GrammarOptions GrammarOptions
        {
            get
            {
                return _grammarOptions;
            }
            set
            {
                _grammarOptions = value;
            }
        }

        internal GrammarType GrammarMode
        {
            set
            {
                _grammarMode = value;
            }
        }

        internal AlphabetType Alphabet
        {
            get
            {
                return _alphabet;
            }
            set
            {
                _alphabet = value;
            }
        }

        internal Collection<string> GlobalTags
        {
            get
            {
                return _globalTags;
            }
            set
            {
                _globalTags = value;
            }
        }

        internal Collection<ScriptRef> ScriptRefs
        {
            set
            {
                _scriptRefs = value;
            }
        }

        #endregion

        #region Private Fields

        private int _langId = CultureInfo.CurrentUICulture.LCID;

        private StringBlob _words;

        private StringBlob _symbols;

        //private int _cResources;

        private Guid _guid;

        private bool _fNeedWeightTable;

        private Graph _states = new();

        private List<Rule> _rules = new();

        private int _ruleIndex;

        private Dictionary<int, Rule> _nameOffsetRules = new();

        private Rule? _rootRule;

        private GrammarOptions _grammarOptions = GrammarOptions.KeyValuePairs;

        // It is used sequentially. So there is no thread issue
        private int _ulRecursiveDepth;

        // Path from which relative grammar imports are calculated. As specified by xml:base
        private string? _basePath;

        // Collection of all SemanticTags in the grammar (sorted by StartArc)
        private List<Tag> _tags = new();

        // Voice or DTMF
        private GrammarType _grammarMode = GrammarType.VoiceGrammar;

        // Pron information is either IPA or SAPI
        private AlphabetType _alphabet = AlphabetType.Sapi;

        // Global value for the semantic interpretation tags
        private Collection<string> _globalTags = new();

        //
        private static byte[] s_abZero3 = new byte[] { 0, 0, 0 };

        private static char[] s_achZero = new char[] { '\0' };
        private int _cImportedRules;

        // List of cd /reference Rule->rule 'on'method-> .NET method
        private Collection<ScriptRef> _scriptRefs = new();

        private bool _fLoadedFromBinary;

        #endregion
    }
}