File: QuotingUtilities.cs
Web Access
Project: ..\..\..\src\MSBuild\MSBuild.csproj (MSBuild)
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
 
using System.Collections;
using System.Collections.Generic;
using System.Diagnostics;
using System.Text;
 
#nullable disable
 
namespace Microsoft.Build.Shared
{
    /// <summary>
    /// This class implements some static methods to assist with command-line parsing of
    /// parameters that could be quoted, and thus could contain nested escaped quotes.
    /// </summary>
    internal static class QuotingUtilities
    {
        /*
         * Quoting Rules:
         *
         * A string is considered quoted if it is enclosed in double-quotes. A double-quote can be escaped with a backslash, or it
         * is automatically escaped if it is the last character in an explicitly terminated quoted string. A backslash itself can
         * be escaped with another backslash IFF it precedes a double-quote, otherwise it is interpreted literally.
         *
         * e.g.
         *      abc"cde"xyz         --> "cde" is quoted
         *      abc"xyz             --> "xyz" is quoted (the terminal double-quote is assumed)
         *      abc"xyz"            --> "xyz" is quoted (the terminal double-quote is explicit)
         *
         *      abc\"cde"xyz        --> "xyz" is quoted (the terminal double-quote is assumed)
         *      abc\\"cde"xyz       --> "cde" is quoted
         *      abc\\\"cde"xyz      --> "xyz" is quoted (the terminal double-quote is assumed)
         *
         *      abc"""xyz           --> """ is quoted
         *      abc""""xyz          --> """ and "xyz" are quoted (the terminal double-quote is assumed)
         *      abc"""""xyz         --> """ is quoted
         *      abc""""""xyz        --> """ and """ are quoted
         *      abc"cde""xyz        --> "cde"" is quoted
         *      abc"xyz""           --> "xyz"" is quoted (the terminal double-quote is explicit)
         *
         *      abc""xyz            --> nothing is quoted
         *      abc""cde""xyz       --> nothing is quoted
         */
 
        // the null character is used to mark a string for splitting
        private static readonly char[] s_splitMarker = { '\0' };
 
        /// <summary>
        /// Splits the given string on every instance of a separator character, as long as the separator is not quoted. Each split
        /// piece is then unquoted if requested.
        /// </summary>
        /// <remarks>
        /// 1) Unless requested to keep empty splits, a block of contiguous (unquoted) separators is treated as one separator.
        /// 2) If no separators are given, the string is split on whitespace.
        /// </remarks>
        /// <param name="input"></param>
        /// <param name="maxSplits"></param>
        /// <param name="keepEmptySplits"></param>
        /// <param name="unquote"></param>
        /// <param name="emptySplits">[out] a count of all pieces that were empty, and thus discarded, per remark (1) above</param>
        /// <param name="separator"></param>
        /// <returns>ArrayList of all the pieces the string was split into.</returns>
        internal static List<string> SplitUnquoted(
            string input,
            int maxSplits,
            bool keepEmptySplits,
            bool unquote,
            out int emptySplits,
            params char[] separator)
        {
            ErrorUtilities.VerifyThrow(maxSplits >= 2, "There is no point calling this method for less than two splits.");
 
            string separators = new StringBuilder().Append(separator).ToString();
 
            ErrorUtilities.VerifyThrow(separators.IndexOf('"') == -1, "The double-quote character is not supported as a separator.");
 
            StringBuilder splitString = new StringBuilder();
            splitString.EnsureCapacity(input.Length);
 
            bool isQuoted = false;
            int precedingBackslashes = 0;
            int splits = 1;
 
            for (int i = 0; (i < input.Length) && (splits < maxSplits); i++)
            {
                switch (input[i])
                {
                    case '\0':
                        // Pretend null characters just aren't there.  Ignore them.
                        Debug.Assert(false, "Null character in parameter");
                        break;
 
                    case '\\':
                        splitString.Append('\\');
                        precedingBackslashes++;
                        break;
 
                    case '"':
                        splitString.Append('"');
                        if ((precedingBackslashes % 2) == 0)
                        {
                            if (isQuoted &&
                                (i < (input.Length - 1)) &&
                                (input[i + 1] == '"'))
                            {
                                splitString.Append('"');
                                i++;
                            }
                            isQuoted = !isQuoted;
                        }
                        precedingBackslashes = 0;
                        break;
 
                    default:
                        if (!isQuoted &&
                            (((separators.Length == 0) && char.IsWhiteSpace(input[i])) ||
                            (separators.IndexOf(input[i]) != -1)))
                        {
                            splitString.Append('\0');
                            if (++splits == maxSplits)
                            {
                                splitString.Append(input, i + 1, input.Length - (i + 1));
                            }
                        }
                        else
                        {
                            splitString.Append(input[i]);
                        }
                        precedingBackslashes = 0;
                        break;
                }
            }
 
            var pieces = new List<string>();
            emptySplits = 0;
 
            foreach (string splitPiece in splitString.ToString().Split(s_splitMarker, maxSplits))
            {
                string piece = (unquote
                    ? Unquote(splitPiece)
                    : splitPiece);
 
                if ((piece.Length > 0) || keepEmptySplits)
                {
                    pieces.Add(piece);
                }
                else
                {
                    emptySplits++;
                }
            }
 
            return pieces;
        }
 
        /// <summary>
        /// Splits the given string on every instance of a separator character, as long as the separator is not quoted.
        /// </summary>
        /// <remarks>
        /// 1) A block of contiguous (unquoted) separators is considered as one separator.
        /// 2) If no separators are given, the string is split on blocks of contiguous (unquoted) whitespace.
        /// </remarks>
        /// <param name="input"></param>
        /// <param name="separator"></param>
        /// <returns>ArrayList of all the pieces the string was split into.</returns>
        internal static List<string> SplitUnquoted(string input, params char[] separator)
        {
            int emptySplits;
            return SplitUnquoted(input, int.MaxValue, false /* discard empty splits */, false /* don't unquote the split pieces */, out emptySplits, separator);
        }
 
        /// <summary>
        /// Removes unescaped (i.e. non-literal) double-quotes, and escaping backslashes, from the given string.
        /// </summary>
        /// <param name="input"></param>
        /// <param name="doubleQuotesRemoved">[out] the number of double-quotes removed from the string</param>
        /// <returns>The given string in unquoted form.</returns>
        internal static string Unquote(string input, out int doubleQuotesRemoved)
        {
            StringBuilder unquotedString = new StringBuilder();
            unquotedString.EnsureCapacity(input.Length);
 
            bool isQuoted = false;
            int precedingBackslashes = 0;
            doubleQuotesRemoved = 0;
 
            for (int i = 0; i < input.Length; i++)
            {
                switch (input[i])
                {
                    case '\\':
                        precedingBackslashes++;
                        break;
 
                    case '"':
                        unquotedString.Append('\\', precedingBackslashes / 2);
                        if ((precedingBackslashes % 2) == 0)
                        {
                            if (isQuoted &&
                                (i < (input.Length - 1)) &&
                                (input[i + 1] == '"'))
                            {
                                unquotedString.Append('"');
                                i++;
                            }
                            isQuoted = !isQuoted;
                            doubleQuotesRemoved++;
                        }
                        else
                        {
                            unquotedString.Append('"');
                        }
                        precedingBackslashes = 0;
                        break;
 
                    default:
                        unquotedString.Append('\\', precedingBackslashes);
                        unquotedString.Append(input[i]);
                        precedingBackslashes = 0;
                        break;
                }
            }
 
            return unquotedString.Append('\\', precedingBackslashes).ToString();
        }
 
        /// <summary>
        /// Removes unescaped (i.e. non-literal) double-quotes, and escaping backslashes, from the given string.
        /// </summary>
        /// <param name="input"></param>
        /// <returns>The given string in unquoted form.</returns>
        internal static string Unquote(string input)
        {
            int doubleQuotesRemoved;
            return Unquote(input, out doubleQuotesRemoved);
        }
    }
}