File: GitDataReader\Glob.cs
Web Access
Project: src\src\sourcelink\src\Microsoft.Build.Tasks.Git\Microsoft.Build.Tasks.Git.csproj (Microsoft.Build.Tasks.Git)
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the License.txt file in the project root for more information.

// Implementation based on documentation:
// - https://git-scm.com/docs/gitignore
// - http://man7.org/linux/man-pages/man7/glob.7.html
// - https://research.swtch.com/glob

using System;
using System.Diagnostics;

namespace Microsoft.Build.Tasks.Git
{
    // https://github.com/dotnet/corefx/issues/18922
    // https://github.com/dotnet/corefx/issues/25873

    internal static class Glob
    {
        internal static bool IsMatch(string pattern, string path, bool ignoreCase, bool matchWildCardWithDirectorySeparator)
        {
            var patternIndex = 0;
            var pathIndex = 0;

            // true if the next matching character must be the first character of a directory name
            var matchDirectoryNameStart = false;

            var stopAtPathSlash = false;

            var nextSinglePatternIndex = -1;
            var nextSinglePathIndex = -1;
            var nextDoublePatternIndex = -1;
            var nextDoublePathIndex = -1;

            bool equal(char x, char y)
                => x == y || ignoreCase && char.ToLowerInvariant(x) == char.ToLowerInvariant(y);

            while (patternIndex < pattern.Length)
            {
                var c = pattern[patternIndex++];
                if (c == '*')
                {
                    // "a/**/*" does not match "a", although it might appear from the spec that it should.

                    var isDoubleAsterisk = patternIndex < pattern.Length && pattern[patternIndex] == '*' && 
                                            (patternIndex == pattern.Length - 1 || pattern[patternIndex + 1] == '/') && 
                                            (patternIndex == 1 || pattern[patternIndex - 2] == '/');

                    if (isDoubleAsterisk)
                    {
                        // trailing "/**"
                        if (patternIndex == pattern.Length - 1)
                        {
                            // remaining path definitely matches
                            return true;
                        }

                        // At this point the initial '/' (if any) is already matched.
                        Debug.Assert(pattern[patternIndex] == '*' && pattern[patternIndex + 1] == '/');

                        // Continue matching remainder of the pattern following "**/" with the remainder of the path.
                        // The next path character only matches if it is preceded by '/'.
                        // Consider the following cases
                        //   "**/a*" ~ "abc"          
                        //   "**/b" ~ "x/yb/b"       (do not match the first 'b', only the second one)
                        //   "**/?" ~ "x/yz/u"       (do not match 'y', 'z'; match 'u')
                        //   "a/**/b*" ~ "a/bcd"
                        //   "a/**/b" ~ "a/x/yb/b"   (do not match the first 'b', only the second one)
                        patternIndex += 2;

                        stopAtPathSlash = false;
                        matchDirectoryNameStart = true;
                    }
                    else
                    {
                        // trailing "*"
                        if (patternIndex == pattern.Length)
                        {
                            return matchWildCardWithDirectorySeparator || path.IndexOf('/', pathIndex) == -1;
                        }

                        stopAtPathSlash = !matchWildCardWithDirectorySeparator;
                        matchDirectoryNameStart = false;
                    }

                    // If the rest of the pattern fails to match the rest of the path, we restart matching at the following indices.
                    // A sequence of consecutive resets is effectively searching the path for a substring that matches the span of the pattern
                    // in between the current wildcard and the next one.
                    //
                    // For example, consider matching pattern "A/**/B/**/C/*.D" to path "A/z/u/B/q/r/C/z.D".
                    // Processing the first ** wildcard keeps resetting until the pattern is alligned with "/B/" in the path (wildcard matches "z/u").
                    // Processing the next ** wildcard keeps resetting until the pattern is alligned with "/C/" in the path (wildcard matches "q/r").
                    // Finally, processing the * wildcard aligns on ".D" and the wildcard matches "z".
                    //
                    // If ** and * differ in matching '/' (matchWildCardWithDirectorySeparator is false) we need to reset them independently. 
                    // Consider pattern "A/**/B*C*D/**/E" matching to "A/u/v/BaaCaaX/u/BoCoD/u/E".
                    // If we aligned on substring "/B" in between the first ** and the next * we would not match the path correctly.
                    // Instead, we need to align on the sub-pattern "/B*C*D/" in between the first and the second **. 
                    if (stopAtPathSlash)
                    {
                        nextSinglePatternIndex = patternIndex;
                        nextSinglePathIndex = pathIndex + 1;
                    }
                    else
                    {
                        nextDoublePatternIndex = patternIndex;
                        nextDoublePathIndex = pathIndex + 1;
                    }

                    continue;
                }

                bool matching;

                if (c == '?')
                {
                    // "?" matches any character except for "/" (when matchWildCardWithDirectorySeparator is false)
                    matching = pathIndex < path.Length && (matchWildCardWithDirectorySeparator || path[pathIndex] != '/');
                }
                else if (c == '[')
                {
                    // "[]" matches a single character in the range
                    matching = pathIndex < path.Length && IsRangeMatch(pattern, ref patternIndex, path[pathIndex], ignoreCase, matchWildCardWithDirectorySeparator);
                }
                else 
                {
                    if (c == '\\' && patternIndex < pattern.Length)
                    {
                        c = pattern[patternIndex++];
                    }

                    // match specific character:
                    matching = pathIndex < path.Length && equal(c, path[pathIndex]);
                }

                if (matching && (!matchDirectoryNameStart || pathIndex == 0 || path[pathIndex - 1] == '/'))
                {
                    matchDirectoryNameStart = false;
                    pathIndex++;
                }
                else if (nextDoublePatternIndex >= 0 || nextSinglePatternIndex >= 0)
                {
                    // mismatch while matching pattern following a wildcard ** or *

                    // "*" matches anything but "/" (when matchWildCardWithDirectorySeparator is false)
                    if (!stopAtPathSlash || pathIndex < path.Length && path[pathIndex] == '/')
                    {
                        // Reset to the last saved ** position, if any. 
                        // Also handles reset of * when matchWildCardWithDirectorySeparator is true.

                        if (nextDoublePatternIndex < 0)
                        {
                            return false;
                        }

                        patternIndex = nextDoublePatternIndex;
                        pathIndex = nextDoublePathIndex;

                        nextDoublePathIndex++;
                    }
                    else
                    {
                        // Reset to the last saved * position.

                        patternIndex = nextSinglePatternIndex;
                        pathIndex = nextSinglePathIndex;

                        nextSinglePathIndex++;
                    }

                    Debug.Assert(patternIndex >= 0);
                    Debug.Assert(pathIndex >= 0);

                    if (pathIndex >= path.Length)
                    {
                        return false;
                    }
                }
                else
                {
                    // character mismatch
                    return false;
                }
            }

            return pathIndex == path.Length;
        }

        private static bool IsRangeMatch(string pattern, ref int patternIndex, char pathChar, bool ignoreCase, bool matchWildCardWithDirectorySeparator)
        {
            Debug.Assert(pattern[patternIndex - 1] == '[');

            if (patternIndex == pattern.Length)
            {
                return false;
            }

            if (ignoreCase)
            {
                pathChar = char.ToLowerInvariant(pathChar);
            }

            var negate = false;
            var isEmpty = true;
            var isMatching = false;

            var c = pattern[patternIndex];
            if (c is '!' or '^')
            {
                negate = true;
                patternIndex++;
            }

            while (patternIndex < pattern.Length)
            {
                c = pattern[patternIndex++];
                if (c == ']' && !isEmpty)
                {
                    // Range does not match '/', but [^a] matches '/' if matchWildCardWithDirectorySeparator=true.
                    return (pathChar != '/' || matchWildCardWithDirectorySeparator) && (negate ? !isMatching : isMatching);
                }

                if (ignoreCase)
                {
                    c = char.ToLowerInvariant(c);
                }

                char d;

                if (patternIndex + 1 < pattern.Length && pattern[patternIndex] == '-' && (d = pattern[patternIndex + 1]) != ']')
                {
                    if (ignoreCase)
                    {
                        d = char.ToLowerInvariant(d);
                    }

                    isMatching |= pathChar == c || pathChar > c && pathChar <= d;
                    patternIndex += 2;
                }
                else
                {
                    // continue parsing to validate the range is well-formed
                    isMatching |= pathChar == c;
                }

                isEmpty = false;
            }

            // malformed range
            return false;
        }
    }
}