File: Utilities\EscapingUtilities.cs
Web Access
Project: ..\..\..\src\MSBuildTaskHost\MSBuildTaskHost.csproj (MSBuildTaskHost)
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
 
using System;
using System.Collections.Generic;
using System.Text;
using Microsoft.NET.StringTools;
 
namespace Microsoft.Build.TaskHost.Utilities;
 
/// <summary>
/// This class implements static methods to assist with unescaping of %XX codes
/// in the MSBuild file format.
/// </summary>
/// <remarks>
/// PERF: since we escape and unescape relatively frequently, it may be worth caching
/// the last N strings that were (un)escaped
/// </remarks>
internal static class EscapingUtilities
{
    /// <summary>
    /// Special characters that need escaping.
    /// It's VERY important that the percent character is the FIRST on the list - since it's both a character
    /// we escape and use in escape sequences, we can unintentionally escape other escape sequences if we
    /// don't process it first. Of course we'll have a similar problem if we ever decide to escape hex digits
    /// (that would require rewriting the algorithm) but since it seems unlikely that we ever do, this should
    /// be good enough to avoid complicating the algorithm at this point.
    /// </summary>
    private static readonly char[] s_charsToEscape = ['%', '*', '?', '@', '$', '(', ')', ';', '\''];
 
    /// <summary>
    /// Optional cache of escaped strings for use when needing to escape in performance-critical scenarios with significant
    /// expected string reuse.
    /// </summary>
    private static readonly Dictionary<string, string> s_unescapedToEscapedStrings = new(StringComparer.Ordinal);
 
    private static bool TryDecodeHexDigit(char character, out int value)
    {
        switch (character)
        {
            case >= '0' and <= '9':
                value = character - '0';
                return true;
            case >= 'A' and <= 'F':
                value = character - 'A' + 10;
                return true;
            case >= 'a' and <= 'f':
                value = character - 'a' + 10;
                return true;
        }
 
        value = default;
        return false;
    }
 
    /// <summary>
    /// Replaces all instances of %XX in the input string with the character represented
    /// by the hexadecimal number XX.
    /// </summary>
    /// <param name="escapedString">The string to unescape.</param>
    /// <param name="trim">If the string should be trimmed before being unescaped.</param>
    /// <returns>unescaped string</returns>
    internal static string UnescapeAll(string escapedString, bool trim = false)
    {
        // If the string doesn't contain anything, then by definition it doesn't
        // need unescaping.
        if (string.IsNullOrEmpty(escapedString))
        {
            return escapedString;
        }
 
        // If there are no percent signs, just return the original string immediately.
        // Don't even instantiate the StringBuilder.
        int indexOfPercent = escapedString.IndexOf('%');
        if (indexOfPercent == -1)
        {
            return trim ? escapedString.Trim() : escapedString;
        }
 
        // This is where we're going to build up the final string to return to the caller.
        StringBuilder unescapedString = StringBuilderCache.Acquire(escapedString.Length);
 
        int currentPosition = 0;
        int escapedStringLength = escapedString.Length;
        if (trim)
        {
            while (currentPosition < escapedString.Length && char.IsWhiteSpace(escapedString[currentPosition]))
            {
                currentPosition++;
            }
 
            if (currentPosition == escapedString.Length)
            {
                return string.Empty;
            }
 
            while (char.IsWhiteSpace(escapedString[escapedStringLength - 1]))
            {
                escapedStringLength--;
            }
        }
 
        // Loop until there are no more percent signs in the input string.
        while (indexOfPercent != -1)
        {
            // There must be two hex characters following the percent sign
            // for us to even consider doing anything with this.
            if ((indexOfPercent <= (escapedStringLength - 3)) &&
                TryDecodeHexDigit(escapedString[indexOfPercent + 1], out int digit1) &&
                TryDecodeHexDigit(escapedString[indexOfPercent + 2], out int digit2))
            {
                // First copy all the characters up to the current percent sign into
                // the destination.
                unescapedString.Append(escapedString, currentPosition, indexOfPercent - currentPosition);
 
                // Convert the %XX to an actual real character.
                char unescapedCharacter = (char)((digit1 << 4) + digit2);
 
                // if the unescaped character is not on the exception list, append it
                unescapedString.Append(unescapedCharacter);
 
                // Advance the current pointer to reflect the fact that the destination string
                // is up to date with everything up to and including this escape code we just found.
                currentPosition = indexOfPercent + 3;
            }
 
            // Find the next percent sign.
            indexOfPercent = escapedString.IndexOf('%', indexOfPercent + 1);
        }
 
        // Okay, there are no more percent signs in the input string, so just copy the remaining
        // characters into the destination.
        unescapedString.Append(escapedString, currentPosition, escapedStringLength - currentPosition);
 
        return StringBuilderCache.GetStringAndRelease(unescapedString);
    }
 
    /// <summary>
    /// Adds instances of %XX in the input string where the char to be escaped appears
    /// XX is the hex value of the ASCII code for the char.  Interns and caches the result.
    /// </summary>
    /// <comment>
    /// NOTE:  Only recommended for use in scenarios where there's expected to be significant
    /// repetition of the escaped string.  Cache currently grows unbounded.
    /// </comment>
    internal static string EscapeWithCaching(string unescapedString)
        => EscapeWithOptionalCaching(unescapedString, cache: true);
 
    /// <summary>
    /// Adds instances of %XX in the input string where the char to be escaped appears
    /// XX is the hex value of the ASCII code for the char.
    /// </summary>
    /// <param name="unescapedString">The string to escape.</param>
    /// <returns>escaped string</returns>
    internal static string Escape(string unescapedString)
        => EscapeWithOptionalCaching(unescapedString, cache: false);
 
    /// <summary>
    /// Adds instances of %XX in the input string where the char to be escaped appears
    /// XX is the hex value of the ASCII code for the char.  Caches if requested.
    /// </summary>
    /// <param name="unescapedString">The string to escape.</param>
    /// <param name="cache">
    /// True if the cache should be checked, and if the resultant string
    /// should be cached.
    /// </param>
    private static string EscapeWithOptionalCaching(string unescapedString, bool cache)
    {
        // If there are no special chars, just return the original string immediately.
        // Don't even instantiate the StringBuilder.
        if (string.IsNullOrEmpty(unescapedString) || !ContainsReservedCharacters(unescapedString))
        {
            return unescapedString;
        }
 
        // next, if we're caching, check to see if it's already there.
        if (cache)
        {
            lock (s_unescapedToEscapedStrings)
            {
                if (s_unescapedToEscapedStrings.TryGetValue(unescapedString, out string? cachedEscapedString))
                {
                    return cachedEscapedString;
                }
            }
        }
 
        // This is where we're going to build up the final string to return to the caller.
        StringBuilder escapedStringBuilder = StringBuilderCache.Acquire(unescapedString.Length * 2);
 
        AppendEscapedString(escapedStringBuilder, unescapedString);
 
        if (!cache)
        {
            return StringBuilderCache.GetStringAndRelease(escapedStringBuilder);
        }
 
        string escapedString = Strings.WeakIntern(escapedStringBuilder.ToString());
        StringBuilderCache.Release(escapedStringBuilder);
 
        lock (s_unescapedToEscapedStrings)
        {
            s_unescapedToEscapedStrings[unescapedString] = escapedString;
        }
 
        return escapedString;
    }
 
    /// <summary>
    /// Before trying to actually escape the string, it can be useful to call this method to determine
    /// if escaping is necessary at all.  This can save lots of calls to copy around item metadata
    /// that is really the same whether escaped or not.
    /// </summary>
    /// <param name="unescapedString"></param>
    /// <returns></returns>
    private static bool ContainsReservedCharacters(string unescapedString)
        => unescapedString.IndexOfAny(s_charsToEscape) >= 0;
 
    /// <summary>
    /// Convert the given integer into its hexadecimal representation.
    /// </summary>
    /// <param name="x">The number to convert, which must be non-negative and less than 16</param>
    /// <returns>The character which is the hexadecimal representation of <paramref name="x"/>.</returns>
    private static char HexDigitChar(int x)
        => (char)(x + (x < 10 ? '0' : ('a' - 10)));
 
    /// <summary>
    /// Append the escaped version of the given character to a <see cref="StringBuilder"/>.
    /// </summary>
    /// <param name="sb">The <see cref="StringBuilder"/> to which to append.</param>
    /// <param name="ch">The character to escape.</param>
    private static void AppendEscapedChar(StringBuilder sb, char ch)
    {
        // Append the escaped version which is a percent sign followed by two hexadecimal digits
        sb.Append('%');
        sb.Append(HexDigitChar(ch / 0x10));
        sb.Append(HexDigitChar(ch & 0x0F));
    }
 
    /// <summary>
    /// Append the escaped version of the given string to a <see cref="StringBuilder"/>.
    /// </summary>
    /// <param name="sb">The <see cref="StringBuilder"/> to which to append.</param>
    /// <param name="unescapedString">The unescaped string.</param>
    private static void AppendEscapedString(StringBuilder sb, string unescapedString)
    {
        // Replace each unescaped special character with an escape sequence one
        for (int idx = 0; ;)
        {
            int nextIdx = unescapedString.IndexOfAny(s_charsToEscape, idx);
            if (nextIdx == -1)
            {
                sb.Append(unescapedString, idx, unescapedString.Length - idx);
                break;
            }
 
            sb.Append(unescapedString, idx, nextIdx - idx);
            AppendEscapedChar(sb, unescapedString[nextIdx]);
            idx = nextIdx + 1;
        }
    }
}