File: Utils\Globbing\StaticWebAssetGlobMatcher.cs
Web Access
Project: ..\..\..\src\StaticWebAssetsSdk\Tasks\Microsoft.NET.Sdk.StaticWebAssets.Tasks.csproj (Microsoft.NET.Sdk.StaticWebAssets.Tasks)
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
 
#nullable disable
 
using System.Diagnostics;
 
namespace Microsoft.AspNetCore.StaticWebAssets.Tasks;
 
public class StaticWebAssetGlobMatcher(GlobNode includes, GlobNode excludes)
{
    // For testing only
    internal GlobMatch Match(string path)
    {
        var context = CreateMatchContext();
        context.SetPathAndReinitialize(path);
        return Match(context);
    }
 
    public GlobMatch Match(MatchContext context)
    {
        var stateStack = context.MatchStates;
        var tokenizer = new PathTokenizer(context.Path);
        var segments = tokenizer.Fill(context.Segments);
        if (segments.Count == 0)
        {
            return new(false, string.Empty);
        }
 
        if (excludes != null)
        {
            var excluded = MatchCore(excludes, segments, stateStack);
            if (excluded.IsMatch)
            {
                return new(false, null);
            }
        }
 
        return MatchCore(includes, segments, stateStack);
    }
 
    private static GlobMatch MatchCore(GlobNode includes, PathTokenizer.SegmentCollection segments, Stack<MatchState> stateStack)
    {
        stateStack.Push(new(includes));
        while (stateStack.Count > 0)
        {
            var state = stateStack.Pop();
            var stage = state.Stage;
            var currentIndex = state.SegmentIndex;
            var node = state.Node;
 
            switch (stage)
            {
                case MatchStage.Done:
                    if (currentIndex == segments.Count)
                    {
                        if (node.Match != null)
                        {
                            var stem = ComputeStem(segments, state.StemStartIndex);
                            return new(true, node.Match, stem);
                        }
 
                        // We got to the end with no matches, pop the next element on the stack.
                        continue;
                    }
                    break;
                case MatchStage.Literal:
                    if (currentIndex == segments.Count)
                    {
                        // We ran out of segments to match
                        continue;
                    }
                    PushNextStageIfAvailable(stateStack, state);
                    MatchLiteral(segments, stateStack, state);
                    break;
                case MatchStage.Extension:
                    if (currentIndex == segments.Count)
                    {
                        // We ran out of segments to match
                        continue;
                    }
                    PushNextStageIfAvailable(stateStack, state);
                    MatchExtension(segments, stateStack, state);
                    break;
                case MatchStage.Complex:
                    if (currentIndex == segments.Count)
                    {
                        // We ran out of segments to match
                        continue;
                    }
                    PushNextStageIfAvailable(stateStack, state);
                    MatchComplex(segments, stateStack, state);
                    break;
                case MatchStage.WildCard:
                    if (currentIndex == segments.Count)
                    {
                        // We ran out of segments to match
                        continue;
                    }
                    PushNextStageIfAvailable(stateStack, state);
                    MatchWildCard(stateStack, state);
                    break;
                case MatchStage.RecursiveWildCard:
                    MatchRecursiveWildCard(segments, stateStack, state);
                    break;
            }
        }
 
        return new(false, null);
    }
 
    private static string ComputeStem(PathTokenizer.SegmentCollection segments, int stemStartIndex)
    {
        if (stemStartIndex == -1)
        {
            return segments[segments.Count - 1].ToString();
        }
#if NET9_0_OR_GREATER
        var stemLength = 0;
        for (var i = stemStartIndex; i < segments.Count; i++)
        {
            stemLength += segments[i].Length;
        }
        // Separators
        stemLength += segments.Count - stemStartIndex - 1;
 
        return string.Create(stemLength, segments.Slice(stemStartIndex), (span, segments) =>
        {
            var index = 0;
            for (var i = 0; i < segments.Count; i++)
            {
                var segment = segments[i];
                segment.CopyTo(span.Slice(index));
                index += segment.Length;
                if (i < segments.Count - 1)
                {
                    span[index++] = '/';
                }
            }
        });
#else
        var stem = new StringBuilder();
        for (var i = stemStartIndex; i < segments.Count; i++)
        {
            stem.Append(segments[i].ToString());
            if (i < segments.Count - 1)
            {
                stem.Append('/');
            }
        }
        return stem.ToString();
#endif
    }
 
    private static void MatchComplex(PathTokenizer.SegmentCollection segments, Stack<MatchState> stateStack, MatchState state)
    {
        // We need to try all the complex segments until we find one that matches or we run out of segments to try.
        // If we find a match for the current segment, we need to make sure that the rest of the segments match the remainder of the pattern.
        // For that reason, if we find a match, we need to push a state that will try the next complex segment in the list (if any) and one
        // state that will try the next segment in the current match, so that if for some reason the rest of the pattern doesn't match, we can
        // continue trying the rest of the complex segments.
        var complexSegmentIndex = state.ComplexSegmentIndex;
        var currentIndex = state.SegmentIndex;
        var node = state.Node;
        var segment = segments[currentIndex];
        var complexSegment = node.ComplexGlobSegments[complexSegmentIndex];
        var parts = complexSegment.Parts;
 
        if (TryMatchParts(segment, parts))
        {
            // We have a match for the current segment
            if (complexSegmentIndex + 1 < node.ComplexGlobSegments.Count)
            {
                // Push a state that will try the next complex segment
                stateStack.Push(state.NextComplex());
            }
 
            // Push a state to try the remainder of the segments
            stateStack.Push(state.NextSegment(complexSegment.Node));
        }
    }
 
    private static bool TryMatchParts(ReadOnlySpan<char> span, List<GlobSegmentPart> parts, int index = 0, int partIndex = 0)
    {
        for (var i = partIndex; i < parts.Count; i++)
        {
            if (index > span.Length)
            {
                // No more characters to consume but we still have parts to process
                return false;
            }
 
            var part = parts[i];
            switch (part.Kind)
            {
                case GlobSegmentPartKind.Literal:
                    if (!span.Slice(index).StartsWith(part.Value.Span, StringComparison.OrdinalIgnoreCase))
                    {
                        // Literal didn't match
                        return false;
                    }
                    index += part.Value.Length;
                    break;
                case GlobSegmentPartKind.QuestionMark:
                    index++;
                    break;
                case GlobSegmentPartKind.WildCard:
                    // Wildcards require trying to match 0 or more characters, so we need to try matching the rest of the parts after
                    // having consumed 0, 1, 2, ... characters and so on.
                    // Instead of jumping 0, 1, 2, etc, we are going to calculate the next step by finding the next literal on the list.
                    // If we find another * we can discard the current one.
                    // If we find one or moe '?' we can require that at least as many characters as '?' are consumed.
                    // When we find a literal, we can try to find the index of the literal in the remaining string, and if we find it, we can
                    // try to match the rest of the parts, jumping ahead after the literal.
                    // If we happen to not find a literal, we have a match (trailing *) or at most we can require that there are N characters
                    // left in the string, where N is the number of '?' in the remaining parts.
                    var minimumCharactersToConsume = 0;
                    for (var j = i + 1; j < parts.Count; j++)
                    {
                        var nextPart = parts[j];
                        switch (nextPart.Kind)
                        {
                            case GlobSegmentPartKind.Literal:
                                // Start searching after the current index + the minimum characters to consume
                                var remainingSpan = span.Slice(index + minimumCharactersToConsume);
                                var nextLiteralIndex = remainingSpan.IndexOf(nextPart.Value.Span, StringComparison.OrdinalIgnoreCase);
                                while (nextLiteralIndex != -1)
                                {
                                    // Consume the characters before the literal and the literal itself before we try
                                    // to match the rest of the parts.
                                    remainingSpan = remainingSpan.Slice(nextLiteralIndex + nextPart.Value.Length);
 
                                    if (remainingSpan.Length == 0 && j == parts.Count - 1)
                                    {
                                        // We were looking at the last literal, so we have a match
                                        return true;
                                    }
 
                                    if (!TryMatchParts(remainingSpan, parts, 0, j + 1))
                                    {
                                        // If we couldn't match the rest of the parts, try the next literal
                                        nextLiteralIndex = remainingSpan.IndexOf(nextPart.Value.Span, StringComparison.OrdinalIgnoreCase);
                                    }
                                    else
                                    {
                                        return true;
                                    }
                                }
                                // At this point we couldn't match the next literal, in the list, so this pattern is not a match
                                return false;
                            case GlobSegmentPartKind.QuestionMark:
                                minimumCharactersToConsume++;
                                break;
                            case GlobSegmentPartKind.WildCard:
                                // Ignore any wildcard that comes right after the original one
                                break;
                        }
                    }
 
                    // There were no trailing literals, so we have a match if there are at least as many characters as '?' in the remaining parts
                    return index + minimumCharactersToConsume <= span.Length;
            }
        }
 
        return index == span.Length;
    }
 
    private static void MatchRecursiveWildCard(PathTokenizer.SegmentCollection segments, Stack<MatchState> stateStack, MatchState state)
    {
        var node = state.Node;
        for (var i = segments.Count - state.SegmentIndex; i >= 0; i--)
        {
            var nextSegment = state.NextSegment(node.RecursiveWildCard, i);
            // The stem is calculated as the first time the /**/ pattern is matched til the remainder of the path, otherwise, the stem is
            // the file name.
            if (nextSegment.StemStartIndex == -1)
            {
                nextSegment.StemStartIndex = state.SegmentIndex;
            }
 
            stateStack.Push(nextSegment);
        }
    }
 
    private static void MatchWildCard(Stack<MatchState> stateStack, MatchState state)
    {
        // A wildcard matches any segment, so we can continue with the next
        stateStack.Push(state.NextSegment(state.Node.WildCard));
    }
 
    private static void MatchExtension(PathTokenizer.SegmentCollection segments, Stack<MatchState> stateStack, MatchState state)
    {
        var node = state.Node;
        var currentIndex = state.SegmentIndex;
        var extensionIndex = state.ExtensionSegmentIndex;
        var segment = segments[currentIndex];
        if (extensionIndex >= segment.Length)
        {
            // We couldn't find any path that matched the extensions we have
            return;
        }
 
        // We start from something.else.txt matching.else.txt and then .txt
        var remaining = segment.Slice(extensionIndex);
        var indexOfDot = remaining.IndexOf('.');
        if (indexOfDot != -1)
        {
            if (TryMatchExtension(node, remaining.Slice(indexOfDot), out var extensionCandidate))
            {
                stateStack.Push(state.NextSegment(extensionCandidate));
            }
            else
            {
                // If we fail to match, try and match the next extension.
                stateStack.Push(state.NextExtension(extensionIndex + indexOfDot + 1));
            }
        }
    }
 
    private static void MatchLiteral(PathTokenizer.SegmentCollection segments, Stack<MatchState> stateStack, MatchState state)
    {
        var currentIndex = state.SegmentIndex;
        var node = state.Node;
        // Push the next stage to the stack so we can continue searching in case we don't match the entire path
        PushNextStageIfAvailable(stateStack, state);
        if (TryMatchLiteral(node, segments[currentIndex], out var literalCandidate))
        {
            // Push the found node to the stack to match the remaining path segments
            stateStack.Push(state.NextSegment(literalCandidate));
        }
    }
 
    private static void PushNextStageIfAvailable(Stack<MatchState> stateStack, MatchState state)
    {
        if (state.ExtensionSegmentIndex == 0 && state.ComplexSegmentIndex == 0)
        {
            var nextStage = state.NextStage();
            if (nextStage.HasValue)
            {
                stateStack.Push(nextStage);
            }
        }
    }
 
    [DebuggerDisplay($"{{{nameof(GetDebuggerDisplay)}(),nq}}")]
    internal struct MatchState(GlobNode node, MatchStage stage, int segmentIndex, int extensionSegmentIndex, int complexSegmentIndex)
    {
        public MatchState(GlobNode node) : this(node, GetInitialStage(node), 0, 0, 0) { }
 
        public GlobNode Node { get; set; } = node;
 
        public MatchStage Stage { get; set; } = stage;
 
        // Index on the list of segments for the path
        public int SegmentIndex { get; set; } = segmentIndex;
 
        public int ExtensionSegmentIndex { get; set; } = extensionSegmentIndex;
 
        public int ComplexSegmentIndex { get; set; } = complexSegmentIndex;
 
        public int StemStartIndex { get; set; } = -1;
 
        internal readonly bool HasValue => Node != null;
 
        public readonly void Deconstruct(out GlobNode node, out MatchStage stage, out int segmentIndex, out int extensionIndex, out int complexIndex)
        {
            node = Node;
            stage = Stage;
            segmentIndex = SegmentIndex;
            extensionIndex = ExtensionSegmentIndex;
            complexIndex = ComplexSegmentIndex;
        }
 
        internal MatchState NextSegment(GlobNode candidate, int elements = 1, int complexIndex = 0) =>
            new(candidate, GetInitialStage(candidate), SegmentIndex + elements, 0, complexIndex) { StemStartIndex = StemStartIndex };
 
        internal MatchState NextStage()
        {
            switch (Stage)
            {
                case MatchStage.Literal:
                    if (Node.HasExtensions())
                    {
                        return new(Node, MatchStage.Extension, SegmentIndex, 0, 0)
                        { StemStartIndex = StemStartIndex };
                    }
 
                    if (Node.ComplexGlobSegments != null && Node.ComplexGlobSegments.Count > 0)
                    {
                        return new(Node, MatchStage.Complex, SegmentIndex, 0, 0)
                        { StemStartIndex = StemStartIndex };
                    }
 
                    if (Node.WildCard != null)
                    {
                        return new(Node, MatchStage.WildCard, SegmentIndex, 0, 0)
                        { StemStartIndex = StemStartIndex };
                    }
 
                    if (Node.RecursiveWildCard != null)
                    {
                        return new(Node, MatchStage.RecursiveWildCard, SegmentIndex, 0, 0)
                        { StemStartIndex = StemStartIndex };
                    }
                    break;
                case MatchStage.Extension:
                    if (Node.ComplexGlobSegments != null && Node.ComplexGlobSegments.Count > 0)
                    {
                        return new(Node, MatchStage.Complex, SegmentIndex, 0, 0)
                        { StemStartIndex = StemStartIndex };
                    }
 
                    if (Node.WildCard != null)
                    {
                        return new(Node, MatchStage.WildCard, SegmentIndex, 0, 0)
                        { StemStartIndex = StemStartIndex };
                    }
 
                    if (Node.RecursiveWildCard != null)
                    {
                        return new(Node, MatchStage.RecursiveWildCard, SegmentIndex, 0, 0)
                        { StemStartIndex = StemStartIndex };
                    }
                    break;
                case MatchStage.Complex:
                    if (Node.WildCard != null)
                    {
                        return new(Node, MatchStage.WildCard, SegmentIndex, 0, 0)
                        { StemStartIndex = StemStartIndex };
                    }
                    if (Node.RecursiveWildCard != null)
                    {
                        return new(Node, MatchStage.RecursiveWildCard, SegmentIndex, 0, 0)
                        { StemStartIndex = StemStartIndex };
                    }
                    break;
                case MatchStage.WildCard:
                    if (Node.RecursiveWildCard != null)
                    {
                        return new(Node, MatchStage.RecursiveWildCard, SegmentIndex, 0, 0)
                        { StemStartIndex = StemStartIndex };
                    }
                    break;
                case MatchStage.RecursiveWildCard:
                    return new(Node, MatchStage.Done, SegmentIndex, 0, 0)
                    { StemStartIndex = StemStartIndex };
            }
 
            return default;
        }
 
        private static MatchStage GetInitialStage(GlobNode node)
        {
            if (node.HasLiterals())
            {
                return MatchStage.Literal;
            }
 
            if (node.HasExtensions())
            {
                return MatchStage.Extension;
            }
 
            if (node.ComplexGlobSegments != null && node.ComplexGlobSegments.Count > 0)
            {
                return MatchStage.Complex;
            }
 
            if (node.WildCard != null)
            {
                return MatchStage.WildCard;
            }
 
            if (node.RecursiveWildCard != null)
            {
                return MatchStage.RecursiveWildCard;
            }
 
            return MatchStage.Done;
        }
 
        internal readonly MatchState NextExtension(int extensionIndex) => new(Node, MatchStage.Extension, SegmentIndex, extensionIndex, ComplexSegmentIndex);
 
        internal readonly MatchState NextComplex() => new(Node, MatchStage.Complex, SegmentIndex, ExtensionSegmentIndex, ComplexSegmentIndex + 1);
 
        private readonly string GetDebuggerDisplay()
        {
            return $"Node: {Node}, Stage: {Stage}, SegmentIndex: {SegmentIndex}, ExtensionIndex: {ExtensionSegmentIndex}, ComplexSegmentIndex: {ComplexSegmentIndex}";
        }
 
    }
 
    internal enum MatchStage
    {
        Done,
        Literal,
        Extension,
        Complex,
        WildCard,
        RecursiveWildCard
    }
 
    private static bool TryMatchExtension(GlobNode node, ReadOnlySpan<char> extension, out GlobNode extensionCandidate) =>
#if NET9_0_OR_GREATER
        node.Extensions.TryGetValue(extension, out extensionCandidate);
#else
        node.Extensions.TryGetValue(extension.ToString(), out extensionCandidate);
#endif
 
    private static bool TryMatchLiteral(GlobNode node, ReadOnlySpan<char> current, out GlobNode nextNode) =>
#if NET9_0_OR_GREATER
        node.Literals.TryGetValue(current, out nextNode);
#else
        node.Literals.TryGetValue(current.ToString(), out nextNode);
#endif
 
    // The matchContext holds all the state for the underlying matching algorithm.
    // It is reused so that we avoid allocating memory for each match.
    // It is not thread-safe and should not be shared across threads.
    public static MatchContext CreateMatchContext() => new();
 
    public ref struct MatchContext()
    {
        public ReadOnlySpan<char> Path;
        public string PathString;
 
        internal List<PathTokenizer.Segment> Segments { get; set; } = [];
        internal Stack<MatchState> MatchStates { get; set; } = [];
 
        public void SetPathAndReinitialize(string path)
        {
            PathString = path;
            Path = path.AsSpan();
            Segments.Clear();
            MatchStates.Clear();
        }
 
        public void SetPathAndReinitialize(ReadOnlySpan<char> path)
        {
            Path = path;
            Segments.Clear();
            MatchStates.Clear();
        }
 
        public void SetPathAndReinitialize(ReadOnlyMemory<char> path)
        {
            Path = path.Span;
            Segments.Clear();
            MatchStates.Clear();
        }
 
    }
}