File: EmbeddedLanguages\Json\JsonParser.StrictSyntaxChecker.cs
Web Access
Project: src\src\Features\Core\Portable\Microsoft.CodeAnalysis.Features.csproj (Microsoft.CodeAnalysis.Features)
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
 
using System.Collections.Immutable;
using System.Text.RegularExpressions;
using Microsoft.CodeAnalysis.EmbeddedLanguages.Common;
using Microsoft.CodeAnalysis.Text;
 
namespace Microsoft.CodeAnalysis.Features.EmbeddedLanguages.Json;
 
using static EmbeddedSyntaxHelpers;
 
using JsonToken = EmbeddedSyntaxToken<JsonKind>;
using JsonTrivia = EmbeddedSyntaxTrivia<JsonKind>;
 
internal partial struct JsonParser
{
    /// <summary>
    /// Checks the superset-tree for constructs that aren't allowed in strict rfc8259
    /// (https://tools.ietf.org/html/rfc8259) mode.
    /// </summary>
    private static class StrictSyntaxChecker
    {
        public static EmbeddedDiagnostic? CheckRootSyntax(JsonCompilationUnit node, JsonOptions options)
        {
            var allowComments = options.HasFlag(JsonOptions.Comments);
            var allowTrailingCommas = options.HasFlag(JsonOptions.TrailingCommas);
            return CheckSyntax(node, allowComments, allowTrailingCommas);
        }
 
        private static EmbeddedDiagnostic? CheckSyntax(
            JsonNode node, bool allowComments, bool allowTrailingCommas)
        {
            var diagnostic = node.Kind switch
            {
                JsonKind.Constructor => CheckConstructor((JsonConstructorNode)node),
                JsonKind.Literal => CheckLiteral((JsonLiteralNode)node, allowComments),
                JsonKind.NegativeLiteral => CheckNegativeLiteral((JsonNegativeLiteralNode)node),
                JsonKind.Property => CheckProperty((JsonPropertyNode)node, allowComments),
                JsonKind.Array => CheckArray((JsonArrayNode)node, allowTrailingCommas),
                JsonKind.Object => CheckObject((JsonObjectNode)node, allowTrailingCommas),
                _ => null,
            };
 
            return Earliest(diagnostic, CheckChildren(node));
 
            EmbeddedDiagnostic? CheckChildren(JsonNode node)
            {
                foreach (var child in node)
                {
                    var diagnostic = child.IsNode
                        ? CheckSyntax(child.Node, allowComments, allowTrailingCommas)
                        : CheckToken(child.Token, allowComments);
                    if (diagnostic != null)
                        return diagnostic;
                }
 
                return null;
            }
        }
 
        private static EmbeddedDiagnostic? CheckToken(JsonToken token, bool allowComments)
            => CheckTrivia(token.LeadingTrivia, allowComments) ?? CheckTrivia(token.TrailingTrivia, allowComments);
 
        private static EmbeddedDiagnostic? CheckTrivia(
            ImmutableArray<JsonTrivia> triviaList, bool allowComments)
        {
            foreach (var trivia in triviaList)
            {
                var diagnostic = CheckTrivia(trivia, allowComments);
                if (diagnostic != null)
                    return diagnostic;
            }
 
            return null;
        }
 
        private static EmbeddedDiagnostic? CheckTrivia(JsonTrivia trivia, bool allowComments)
            => trivia.Kind switch
            {
                // Strict mode doesn't allow comments at all.
                JsonKind.MultiLineCommentTrivia or JsonKind.SingleLineCommentTrivia when !allowComments
                    => new EmbeddedDiagnostic(FeaturesResources.Comments_not_allowed, GetSpan(trivia.VirtualChars)),
                JsonKind.WhitespaceTrivia => CheckWhitespace(trivia),
                _ => null,
            };
 
        private static EmbeddedDiagnostic? CheckWhitespace(JsonTrivia trivia)
        {
            foreach (var ch in trivia.VirtualChars)
            {
                switch (ch.Value)
                {
                    case ' ':
                    case '\t':
                        break;
 
                    default:
                        // Strict mode only allows spaces and horizontal tabs.  Everything else
                        // is illegal.
                        return new EmbeddedDiagnostic(FeaturesResources.Illegal_whitespace_character, ch.Span);
                }
            }
 
            return null;
        }
 
        private static EmbeddedDiagnostic? CheckObject(JsonObjectNode node, bool allowTrailingComma)
        {
            foreach (var child in node.Sequence)
            {
                if (child.Kind != JsonKind.Property)
                    return new EmbeddedDiagnostic(FeaturesResources.Only_properties_allowed_in_an_object, GetFirstToken(child).GetSpan());
            }
 
            if (!allowTrailingComma && node.Sequence.NodesAndTokens.Length != 0 && node.Sequence.NodesAndTokens.Length % 2 == 0)
                return new EmbeddedDiagnostic(FeaturesResources.Trailing_comma_not_allowed, node.Sequence.NodesAndTokens[^1].Token.GetSpan());
 
            return null;
        }
 
        private static EmbeddedDiagnostic? CheckArray(JsonArrayNode node, bool allowTrailingComma)
            => CheckProperSeparation(node.Sequence, allowTrailingComma);
 
        private static EmbeddedDiagnostic? CheckProperSeparation(
            ImmutableArray<JsonValueNode> sequence,
            bool allowTrailingComma)
        {
            // Ensure that this sequence is actually a separated list.
            for (int i = 0, n = sequence.Length; i < n; i++)
            {
                var child = sequence[i];
                if (i % 2 == 0)
                {
                    if (child.Kind == JsonKind.CommaValue)
                        return new EmbeddedDiagnostic(string.Format(FeaturesResources._0_unexpected, ","), child.GetSpan());
                }
                else
                {
                    if (child.Kind != JsonKind.CommaValue)
                        return new EmbeddedDiagnostic(string.Format(FeaturesResources._0_expected, ","), GetFirstToken(child).GetSpan());
                }
            }
 
            if (!allowTrailingComma && sequence.Length != 0 && sequence.Length % 2 == 0)
                return new EmbeddedDiagnostic(FeaturesResources.Trailing_comma_not_allowed, sequence[^1].GetSpan());
 
            return null;
        }
 
        private static EmbeddedDiagnostic? CheckProperty(JsonPropertyNode node, bool allowComments)
        {
            if (node.NameToken.Kind != JsonKind.StringToken)
                return new EmbeddedDiagnostic(FeaturesResources.Property_name_must_be_a_string, node.NameToken.GetSpan());
 
            if (node.Value.Kind == JsonKind.CommaValue)
                return new EmbeddedDiagnostic(FeaturesResources.Value_required, new TextSpan(node.ColonToken.VirtualChars[0].Span.End, 0));
 
            return CheckString(node.NameToken, allowComments);
        }
 
        private static EmbeddedDiagnostic? CheckLiteral(JsonLiteralNode node, bool allowComments)
            => node.LiteralToken.Kind switch
            {
                // These are all json.net extensions.  Disallow them all.
                JsonKind.NaNLiteralToken or JsonKind.InfinityLiteralToken or JsonKind.UndefinedLiteralToken
                    => InvalidLiteral(node.LiteralToken),
                JsonKind.NumberToken => CheckNumber(node.LiteralToken, allowComments),
                JsonKind.StringToken => CheckString(node.LiteralToken, allowComments),
                _ => null,
            };
 
        /*
           From: https://tools.ietf.org/html/rfc8259
         
           The representation of numbers is similar to that used in most
           programming languages.  A number is represented in base 10 using
           decimal digits.  It contains an integer component that may be
           prefixed with an optional minus sign, which may be followed by a
           fraction part and/or an exponent part.  Leading zeros are not
           allowed.
 
           A fraction part is a decimal point followed by one or more digits.
 
           An exponent part begins with the letter E in uppercase or lowercase,
           which may be followed by a plus or minus sign.  The E and optional
           sign are followed by one or more digits.
 
           Numeric values that cannot be represented in the grammar below (such
           as Infinity and NaN) are not permitted.
 
              number = [ minus ] int [ frac ] [ exp ]
              decimal-point = %x2E       ; .
              digit1-9 = %x31-39         ; 1-9
              e = %x65 / %x45            ; e E
 
              exp = e [ minus / plus ] 1*DIGIT
              frac = decimal-point 1*DIGIT
              int = zero / ( digit1-9 *DIGIT )
              minus = %x2D               ; -
              plus = %x2B                ; +
              zero = %x30                ; 0
        */
 
        private static readonly Regex s_validNumberRegex =
            new(
@"^
-?                 # [ minus ]
(0|([1-9][0-9]*))  # int
(\.[0-9]+)?        # [ frac ]
([eE][-+]?[0-9]+)? # [ exp ]
$",
                RegexOptions.Compiled | RegexOptions.IgnorePatternWhitespace);
 
        private static EmbeddedDiagnostic? CheckNumber(JsonToken literalToken, bool allowComments)
        {
            var literalText = literalToken.VirtualChars.CreateString();
            return !s_validNumberRegex.IsMatch(literalText)
                ? new EmbeddedDiagnostic(FeaturesResources.Invalid_number, literalToken.GetSpan())
                : CheckToken(literalToken, allowComments);
        }
 
        private static EmbeddedDiagnostic? CheckString(JsonToken literalToken, bool allowComments)
        {
            var chars = literalToken.VirtualChars;
            if (chars[0] == '\'')
                return new EmbeddedDiagnostic(FeaturesResources.Strings_must_start_with_double_quote_not_single_quote, chars[0].Span);
 
            for (int i = 1, n = chars.Length - 1; i < n; i++)
            {
                if (chars[i] < ' ')
                    return new EmbeddedDiagnostic(FeaturesResources.Illegal_string_character, chars[i].Span);
            }
 
            // Lexer allows \' as that's ok in json.net.  Check and block that here.
            for (int i = 1, n = chars.Length - 1; i < n;)
            {
                if (chars[i] == '\\')
                {
                    if (chars[i + 1] == '\'')
                        return new EmbeddedDiagnostic(FeaturesResources.Invalid_escape_sequence, TextSpan.FromBounds(chars[i].Span.Start, chars[i + 1].Span.End));
 
                    // Legal escape.  just jump forward past it.  Note, this works for simple
                    // escape and unicode \uXXXX escapes.
                    i += 2;
                    continue;
                }
 
                i++;
            }
 
            return CheckToken(literalToken, allowComments);
        }
 
        private static EmbeddedDiagnostic? InvalidLiteral(JsonToken literalToken)
            => new(string.Format(FeaturesResources._0_literal_not_allowed, literalToken.VirtualChars.CreateString()), literalToken.GetSpan());
 
        private static EmbeddedDiagnostic? CheckNegativeLiteral(JsonNegativeLiteralNode node)
            => new(string.Format(FeaturesResources._0_literal_not_allowed, "-Infinity"), node.GetSpan());
 
        private static EmbeddedDiagnostic? CheckConstructor(JsonConstructorNode node)
            => new(FeaturesResources.Constructors_not_allowed, node.NewKeyword.GetSpan());
    }
}