File: System\Text\RegularExpressions\Symbolic\BDD.cs
Web Access
Project: src\src\libraries\System.Text.RegularExpressions\src\System.Text.RegularExpressions.csproj (System.Text.RegularExpressions)
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
 
using System.Collections.Generic;
using System.Diagnostics;
using System.Diagnostics.CodeAnalysis;
using System.Runtime.CompilerServices;
 
namespace System.Text.RegularExpressions.Symbolic
{
    /// <summary>
    /// Represents nodes in a Binary Decision Diagram (BDD), which compactly represent sets of integers and allow for fast
    /// querying of whether an integer is in the set (and if so, what value it maps to, typically True or False).
    /// </summary>
    /// <remarks>
    /// All non-leaf nodes have an Ordinal, which indicates the position of the bit the node relates to (0 for the least significant
    /// bit), and two children, One and Zero, for the cases of the current bit being 1 or 0, respectively. An integer
    /// belongs to the set represented by the BDD if the path from the root following the branches that correspond to
    /// the bits of the integer leads to the True leaf. This class also supports multi-terminal BDDs (MTBDD), i.e. ones where
    /// the leaves are something other than True or False, which are used for representing classifiers.
    /// </remarks>
    internal sealed class BDD : IComparable<BDD>, IEquatable<BDD>
    {
        /// <summary>
        /// The ordinal for the True special value.
        /// </summary>
        private const int TrueOrdinal = -2;
 
        /// <summary>
        /// The ordinal for the False special value.
        /// </summary>
        private const int FalseOrdinal = -1;
 
        /// <summary>
        /// The unique BDD leaf that represents the full set or true.
        /// </summary>
        public static readonly BDD True = new BDD(TrueOrdinal, null, null);
 
        /// <summary>
        /// The unique BDD leaf that represents the empty set or false.
        /// </summary>
        public static readonly BDD False = new BDD(FalseOrdinal, null, null);
 
        /// <summary>
        /// The encoding of the set for lower ordinals for the case when the current bit is 1.
        /// The value is null iff IsLeaf is true.
        /// </summary>
        public readonly BDD? One;
 
        /// <summary>
        /// The encoding of the set for lower ordinals for the case when the current bit is 0.
        /// The value is null iff IsLeaf is true.
        /// </summary>
        public readonly BDD? Zero;
 
        /// <summary>
        /// Ordinal of this bit if nonleaf else MTBDD terminal value when nonnegative
        /// </summary>
        public readonly int Ordinal;
 
        /// <summary>
        /// Preassigned hashcode value that respects equivalence: equivalent BDDs have equal hashcodes
        /// </summary>
        private readonly int _hashcode;
 
        internal BDD(int ordinal, BDD? one, BDD? zero)
        {
            Debug.Assert((one is null) == (zero is null), "Neither or both children should be null.");
 
            One = one;
            Zero = zero;
            Ordinal = ordinal;
 
            // Precompute a hashchode value that respects BDD equivalence.
            // Two equivalent BDDs will always have the same hashcode
            // that is independent of object id values of the BDD objects.
            _hashcode = HashCode.Combine(ordinal, one, zero);
        }
 
        /// <summary>
        /// True iff the node is a terminal (One and Zero are both null).
        /// True and False are terminals.
        /// </summary>
        [MemberNotNullWhen(false, nameof(One))]
        [MemberNotNullWhen(false, nameof(Zero))]
        public bool IsLeaf
        {
            get
            {
                Debug.Assert((One is null) == (Zero is null));
                return One is null;
            }
        }
 
        /// <summary>
        /// True iff the BDD is True.
        /// </summary>
        public bool IsFull => this == True;
 
        /// <summary>
        /// True iff the BDD is False.
        /// </summary>
        public bool IsEmpty => this == False;
 
        /// <summary>
        /// Gets the lexicographically minimum bitvector in this BDD as a ulong.
        /// The BDD must be nonempty.
        /// </summary>
        public ulong GetMin()
        {
            BDD set = this;
            Debug.Assert(!set.IsEmpty);
 
            if (set.IsFull)
                return 0;
 
            // starting from all 0, bits will be flipped to 1 as necessary
            ulong result = 0;
 
            // follow the minimum path through the branches to a True leaf
            while (!set.IsLeaf)
            {
                if (set.Zero.IsEmpty) //the bit must be set to 1
                {
                    // the bit must be set to 1 when the zero branch is False
                    result |= (ulong)1 << set.Ordinal;
                    // if zero is empty then by the way BDDs are constructed one is not
                    set = set.One;
                }
                else
                {
                    // otherwise, leaving the bit as 0 gives the smaller bitvector
                    set = set.Zero;
                }
            }
 
            return result;
        }
 
        /// <summary>Gets a hashcode for the BDD.</summary>
        public override int GetHashCode() => _hashcode;
 
        /// <summary>A shallow equality check that holds if ordinals are identical and one's are identical and zero's are identical.</summary>
        public override bool Equals(object? obj) => Equals(obj as BDD);
 
        /// <summary>A shallow equality check that holds if ordinals are identical and one's are identical and zero's are identical.</summary>
        public bool Equals(BDD? bdd) =>
            bdd is not null &&
            (this == bdd || (Ordinal == bdd.Ordinal && One == bdd.One && Zero == bdd.Zero));
 
        #region Serialization
#if DEBUG // currently used only from the debug-only code that regenerates the embedded serialized BDD data
        /// <summary>
        /// Serialize this BDD in a flat ulong array. The BDD may have at most 2^k ordinals and 2^n nodes, such that k+2n &lt; 64
        /// BDD.False is represented by return value ulong[]{0}.
        /// BDD.True is represented by return value ulong[]{1}.
        /// Serializer uses more compacted representations when fewer bits are needed, which is reflected in the first
        /// two numbers of the return value. MTBDD terminals are represented by negated numbers as -id.
        /// </summary>
        [ExcludeFromCodeCoverage(Justification = "Used only to generate src data files")]
        public long[] Serialize()
        {
            if (IsEmpty)
                return [0];
 
            if (IsFull)
                return [1];
 
            if (IsLeaf)
                return [0, 0, -Ordinal];
 
            BDD[] nodes = TopologicalSort();
 
            Debug.Assert(nodes[nodes.Length - 1] == this);
            Debug.Assert(nodes.Length <= (1 << 24));
 
            // As few bits as possible are used to for ordinals and node identifiers for compact serialization.
            // Use at least a nibble (4 bits) to represent the ordinal and count how many are needed.
            int ordinal_bits = 4;
            while (Ordinal >= (1 << ordinal_bits))
            {
                ordinal_bits += 1;
            }
 
            // Use at least 2 bits to represent the node identifier and count how many are needed
            int node_bits = 2;
            while (nodes.Length >= (1 << node_bits))
            {
                node_bits += 1;
            }
 
            // Reserve space for all nodes plus 2 extra: index 0 and 1 are reserved for False and True
            long[] result = new long[nodes.Length + 2];
            result[0] = ordinal_bits;
            result[1] = node_bits;
 
            //use the following bit layout
            BitLayout(ordinal_bits, node_bits, out int zero_node_shift, out int one_node_shift, out int ordinal_shift);
 
            //here we know that bdd is neither False nor True
            //but it could still be a MTBDD leaf if both children are null
            var idmap = new Dictionary<BDD, long>
            {
                [True] = 1,
                [False] = 0
            };
 
            // Give all nodes ascending identifiers and produce their serializations into the result
            for (int i = 0; i < nodes.Length; i++)
            {
                BDD node = nodes[i];
                idmap[node] = i + 2;
 
                if (node.IsLeaf)
                {
                    // This is MTBDD leaf. Negating it should make it less than or equal to zero, as True and False are
                    // excluded here and MTBDD Ordinals are required to be non-negative.
                    result[i + 2] = -node.Ordinal;
                }
                else
                {
                    // Combine ordinal and child identifiers according to the bit layout
                    long v = (((long)node.Ordinal) << ordinal_shift) | (idmap[node.One] << one_node_shift) | (idmap[node.Zero] << zero_node_shift);
                    Debug.Assert(v >= 0);
                    result[i + 2] = v; // children ids are well-defined due to the topological order of nodes
                }
            }
            return result;
        }
 
        /// <summary>
        /// Returns a topologically sorted array of all the nodes (other than True or False) in this BDD
        /// such that, all MTBDD leaves (other than True or False) appear first in the array
        /// and all nonterminals with smaller ordinal appear before nodes with larger ordinal.
        /// So this BDD itself (if different from True or False) appears last.
        /// In the case of True or False returns the empty array.
        /// </summary>
        [ExcludeFromCodeCoverage(Justification = "Used only to generate src data files")]
        private BDD[] TopologicalSort()
        {
            if (IsFull || IsEmpty)
                return [];
 
            if (IsLeaf)
                return [this];
 
            // Order the nodes according to their ordinals into the nonterminals array
            var nonterminals = new List<BDD>[Ordinal + 1];
            var sorted = new List<BDD>();
            var toVisit = new Stack<BDD>();
            var visited = new HashSet<BDD>();
 
            toVisit.Push(this);
 
            while (toVisit.Count > 0)
            {
                BDD node = toVisit.Pop();
                // True and False are not included in the result
                if (node.IsFull || node.IsEmpty)
                    continue;
 
                if (node.IsLeaf)
                {
                    // MTBDD terminals can be directly added to the sorted nodes, since they have no children that
                    // would come first in the topological ordering.
                    sorted.Add(node);
                }
                else
                {
                    // Non-terminals are grouped by their ordinal so that they can be sorted into a topological order.
                    (nonterminals[node.Ordinal] ??= new List<BDD>()).Add(node);
 
                    if (visited.Add(node.Zero))
                        toVisit.Push(node.Zero);
 
                    if (visited.Add(node.One))
                        toVisit.Push(node.One);
                }
            }
 
            // Flush the grouped non-terminals into the sorted nodes from smallest to highest ordinal. The highest
            // ordinal is guaranteed to have only one node, which places the root of the BDD at the end.
            for (int i = 0; i < nonterminals.Length; i++)
            {
                if (nonterminals[i] != null)
                {
                    sorted.AddRange(nonterminals[i]);
                }
            }
 
            return sorted.ToArray();
        }
 
        /// <summary>
        /// Serialize this BDD into a byte array.
        /// This method is not valid for MTBDDs where some elements may be negative.
        /// </summary>
        [ExcludeFromCodeCoverage(Justification = "Used only to generate src data files")]
        public byte[] SerializeToBytes()
        {
            if (IsEmpty)
                return [0];
 
            if (IsFull)
                return [1];
 
            // in other cases make use of the general serializer to long[]
            long[] serialized = Serialize();
 
            // get the maximal element from the array
            long m = 0;
            for (int i = 0; i < serialized.Length; i++)
            {
                // make sure this serialization is not applied to MTBDDs
                Debug.Assert(serialized[i] > 0);
                m = Math.Max(serialized[i], m);
            }
 
            // k is the number of bytes needed to represent the maximal element
            int k = m <= 0xFFFF ? 2 : (m <= 0xFF_FFFF ? 3 : (m <= 0xFFFF_FFFF ? 4 : (m <= 0xFF_FFFF_FFFF ? 5 : (m <= 0xFFFF_FFFF_FFFF ? 6 : (m <= 0xFF_FFFF_FFFF_FFFF ? 7 : 8)))));
 
            // the result will contain k as the first element and the number of serialized elements times k
            byte[] result = new byte[(k * serialized.Length) + 1];
            result[0] = (byte)k;
            for (int i = 0; i < serialized.Length; i += 1)
            {
                // add the serialized longs as k-byte subsequences
                long serialized_i = serialized[i];
                for (int j = 1; j <= k; j++)
                {
                    result[(i * k) + j] = (byte)serialized_i;
                    serialized_i >>= 8;
                }
            }
            return result;
        }
#endif
 
        /// <summary>
        /// Recreates a BDD from a byte array that has been created using SerializeToBytes.
        /// </summary>
        public static BDD Deserialize(ReadOnlySpan<byte> bytes)
        {
            Debug.Assert(bytes.Length > 1, "All inputs are expected to be larger than a single byte, which would map to False or True.");
 
            // here bytes represents an array of longs with k = the number of bytes used per long
            int bytesPerLong = bytes[0];
 
            // n is the total nr of longs that corresponds also to the total number of BDD nodes needed
            int n = (bytes.Length - 1) / bytesPerLong;
 
            // make sure the represented nr of longs divides precisely without remainder
            Debug.Assert((bytes.Length - 1) % bytesPerLong == 0);
 
            // the number of bits used for ordinals and node identifiers are stored in the first two longs
            int ordinal_bits = (int)Get(bytesPerLong, bytes, 0);
            int node_bits = (int)Get(bytesPerLong, bytes, 1);
 
            // create bit masks for the sizes of ordinals and node identifiers
            long ordinal_mask = (1 << ordinal_bits) - 1;
            long node_mask = (1 << node_bits) - 1;
            BitLayout(ordinal_bits, node_bits, out int zero_node_shift, out int one_node_shift, out int ordinal_shift);
 
            // store BDD nodes by their id when they are created
            BDD[] nodes = new BDD[n];
            nodes[0] = False;
            nodes[1] = True;
 
            for (int i = 2; i < n; i++)
            {
                // represents the triple (ordinal, one, zero)
                long arc = Get(bytesPerLong, bytes, i);
 
                // reconstruct the ordinal and child identifiers for a non-terminal
                int ord = (int)((arc >> ordinal_shift) & ordinal_mask);
                int oneId = (int)((arc >> one_node_shift) & node_mask);
                int zeroId = (int)((arc >> zero_node_shift) & node_mask);
 
                // the BDD nodes for the children are guaranteed to exist already
                // because of the topological order they were serialized by
                Debug.Assert(oneId < i && zeroId < i);
                Debug.Assert(nodes[oneId] is not null);
                Debug.Assert(nodes[zeroId] is not null);
                nodes[i] = new BDD(ord, nodes[oneId], nodes[zeroId]);
            }
 
            //the result is the last BDD in the nodes array
            return nodes[n - 1];
 
            // Gets the i'th element from the underlying array of longs represented by bytes
            [MethodImpl(MethodImplOptions.AggressiveInlining)]
            static long Get(int bytesPerLong, ReadOnlySpan<byte> bytes, int i)
            {
                ulong value = 0;
                for (int j = bytesPerLong; j > 0; j--)
                {
                    value = (value << 8) | bytes[(bytesPerLong * i) + j];
                }
 
                return (long)value;
            }
        }
 
        /// <summary>
        /// Use this bit layout in the serialization
        /// </summary>
        private static void BitLayout(int ordinal_bits, int node_bits, out int zero_node_shift, out int one_node_shift, out int ordinal_shift)
        {
            //this bit layout seems to work best: zero,one,ord
            zero_node_shift = ordinal_bits + node_bits;
            one_node_shift = ordinal_bits;
            ordinal_shift = 0;
        }
        #endregion
 
        /// <summary>
        /// Finds the terminal for the input in a Multi-Terminal-BDD.
        /// Bits of the input are used to determine the path in the BDD.
        /// Returns -1 if False is reached and -2 if True is reached,
        /// else returns the MTBDD terminal number that is reached.
        /// If this is a nonterminal, Find does not care about input bits &gt; Ordinal.
        /// </summary>
        public int Find(int input)
        {
            BDD bdd = this;
            while (!bdd.IsLeaf)
            {
                bdd = (input & (1 << bdd.Ordinal)) == 0 ? bdd.Zero : bdd.One;
            }
            return bdd.Ordinal;
        }
 
        /// <summary>
        /// Assumes BDD is not MTBDD and returns true iff it contains the input.
        /// (Otherwise use BDD.Find if this is if fact a MTBDD.)
        /// </summary>
        public bool Contains(int input) => Find(input) == TrueOrdinal; //-2 is the Ordinal of BDD.True
 
        /// <summary>
        /// Returns true if the only other terminal besides False is a MTBDD terminal that is different from True.
        /// If this is the case, outputs that terminal.
        /// </summary>
        public bool IsEssentiallyBoolean([NotNullWhen(true)] out BDD? terminalActingAsTrue)
        {
            if (IsFull || IsEmpty)
            {
                terminalActingAsTrue = null;
                return false;
            }
 
            if (IsLeaf)
            {
                terminalActingAsTrue = this;
                return true;
            }
 
            var toVisit = new Stack<BDD>();
            var visited = new HashSet<BDD>();
 
            toVisit.Push(this);
 
            // this will hold the unique MTBDD leaf
            BDD? leaf = null;
 
            while (toVisit.Count > 0)
            {
                BDD node = toVisit.Pop();
                if (node.IsEmpty)
                    continue;
 
                if (node.IsFull)
                {
                    //contains the True leaf
                    terminalActingAsTrue = null;
                    return false;
                }
 
                if (node.IsLeaf)
                {
                    if (leaf is null)
                    {
                        // remember the first MTBDD leaf seen
                        leaf = node;
                    }
                    else if (leaf != node)
                    {
                        // found two different MTBDD leaves
                        terminalActingAsTrue = null;
                        return false;
                    }
                }
                else
                {
                    if (visited.Add(node.Zero))
                        toVisit.Push(node.Zero);
 
                    if (visited.Add(node.One))
                        toVisit.Push(node.One);
                }
            }
 
            Debug.Assert(leaf is not null, "this should never happen because there must exist another leaf besides False");
            // found an MTBDD leaf and didn't find any other (non-False) leaves
            terminalActingAsTrue = leaf;
            return true;
        }
 
        /// <summary>
        /// All terminals precede all nonterminals. Compares Ordinals for terminals.
        /// Compare non-terminals by comparing their minimal elements.
        /// If minimal elements are the same, compare Ordinals.
        /// This provides a total order for terminals.
        /// </summary>
        public int CompareTo(BDD? other)
        {
            if (other is null)
            {
                return -1;
            }
 
            if (IsLeaf)
            {
                return
                    !other.IsLeaf || Ordinal < other.Ordinal ? -1 :
                    Ordinal == other.Ordinal ? 0 :
                    1;
            }
 
            if (other.IsLeaf)
            {
                return 1;
            }
 
            ulong min = GetMin();
            ulong bdd_min = other.GetMin();
            return
                min < bdd_min ? -1 :
                bdd_min < min ? 1 :
                Ordinal.CompareTo(other.Ordinal);
        }
    }
}