File: Licenses\LicenseExpressionTokenizer.cs
Web Access
Project: src\src\nuget-client\src\NuGet.Core\NuGet.Packaging\NuGet.Packaging.csproj (NuGet.Packaging)
// Copyright (c) .NET Foundation. All rights reserved.
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.

using System;
using System.Collections.Generic;
using System.Globalization;
using System.Text.RegularExpressions;

namespace NuGet.Packaging.Licenses
{
    internal class LicenseExpressionTokenizer
    {
        private static readonly Regex ValidCharactersRegex = new Regex("^[a-zA-Z0-9\\.\\-\\s\\+\\(\\)]+$", RegexOptions.CultureInvariant);
        private readonly string _value;

        /// <summary>
        /// A tokenizer for a license expression.
        /// This implementation assumes that the input has been sanitized and that there are no invalid characters.
        /// </summary>
        /// <param name="value">value to be tokenized</param>
        /// <exception cref="ArgumentException">If the string is null or whitespace.</exception>
        internal LicenseExpressionTokenizer(string value)
        {
            if (string.IsNullOrWhiteSpace(value))
            {
                throw new ArgumentException(string.Format(CultureInfo.CurrentCulture, Strings.ArgumentCannotBeNullOrEmpty, nameof(value)));
            }
            _value = value.Trim();
        }

        /// <summary>
        /// The valid characters for a license identifier are a-zA-Z0-9.-+
        /// The valid characters for a license expression are the above whitespace and ().
        /// </summary>
        /// <returns>Whether the value has valid characters.</returns>
        internal bool HasValidCharacters()
        {
            return ValidCharactersRegex.IsMatch(_value);
        }

        /// <summary>
        /// Given a string, tokenizes by space into operators and values. The considered operators are, AND, OR, WITH, (, and ). 
        /// </summary>
        /// <returns>tokens, <see cref="LicenseExpressionToken"/>/></returns>
        internal IEnumerable<LicenseExpressionToken> Tokenize()
        {
            var potentialTokens = _value.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
            foreach (var token in potentialTokens)
            {
                var processingToken = token;

                IList<LicenseExpressionToken>? tokensAfterValue = null;

                while (processingToken.Length > 0 && (processingToken[0] == '(' || processingToken[0] == ')'))
                {
                    yield return ParseBracket(processingToken[0]);

                    processingToken = processingToken.Substring(1);
                }

                while (processingToken.Length > 0 && (processingToken[processingToken.Length - 1] == '(' || processingToken[processingToken.Length - 1] == ')'))
                {
                    if (tokensAfterValue == null)
                    {
                        tokensAfterValue = new List<LicenseExpressionToken>();
                    }

                    tokensAfterValue.Add(ParseBracket(processingToken[processingToken.Length - 1]));
                    processingToken = processingToken.Substring(0, processingToken.Length - 1);
                }

                if (!string.IsNullOrEmpty(processingToken))
                {
                    yield return ParseTokenType(processingToken);
                }

                if (tokensAfterValue != null)
                {
                    foreach (var tokenAfterValue in tokensAfterValue)
                    {
                        yield return tokenAfterValue;
                    }
                }

            }
        }

        private LicenseExpressionToken ParseBracket(char bracket)
        {
            if (bracket == '(')
            {
                return new LicenseExpressionToken(bracket.ToString(CultureInfo.CurrentCulture), LicenseTokenType.OPENING_BRACKET);
            }
            if (bracket == ')')
            {
                return new LicenseExpressionToken(bracket.ToString(CultureInfo.CurrentCulture), LicenseTokenType.CLOSING_BRACKET);
            }
            throw new ArgumentException("Invalid bracket character: " + bracket);
        }

        /// <summary>
        /// Parses a token type given a string.
        /// This method assumes that the brackets have been parsed out. 
        /// </summary>
        /// <param name="token">The token</param>
        /// <returns>A parsed token, operator or value.</returns>
        /// <remarks>This method assumes the brackets have already been parsed.</remarks>
        private LicenseExpressionToken ParseTokenType(string token)
        {
            var expressionToken = Enum.TryParse(value: token, result: out LicenseTokenType result);

            if (expressionToken && result.IsOperator())
            {
                return new LicenseExpressionToken(token, result);
            }
            else // We already covered the brackets earlier, so it has to be a value.
            {
                return new LicenseExpressionToken(token, LicenseTokenType.IDENTIFIER);

            }
        }
    }
}