File: CSharptokenEnumerator.cs
Web Access
Project: ..\..\..\src\Tasks\Microsoft.Build.Tasks.csproj (Microsoft.Build.Tasks.Core)
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
 
using System;
using System.Diagnostics;
using System.IO;
 
#nullable disable
 
namespace Microsoft.Build.Shared.LanguageParser
{
    /*
    * Class:   CSharpTokenEnumerator
    *
    * Given C# sources, enumerate over all tokens.
    *
    */
    internal sealed class CSharpTokenEnumerator : TokenEnumerator
    {
        // Reader over the sources.
        private CSharpTokenCharReader _reader = null;
 
        /*
        * Method:  TokenEnumerator
        *
        * Construct
        */
        internal CSharpTokenEnumerator(Stream binaryStream, bool forceANSI)
        {
            _reader = new CSharpTokenCharReader(binaryStream, forceANSI);
        }
 
        /*
        * Method:  FindNextToken
        *
        * Find the next token. Return 'true' if one was found. False, otherwise.
        */
        internal override bool FindNextToken()
        {
            int startPosition = _reader.Position;
 
            // Dealing with whitespace?
            if (_reader.SinkMultipleWhiteSpace())
            {
                current = new WhitespaceToken();
                return true;
            }
            // Check for one-line comment
            else if (_reader.Sink("//"))
            {
                // Looks like a one-line comment. Follow it to the End-of-line
                _reader.SinkToEndOfLine();
 
                current = new CommentToken();
                return true;
            }
            // Check for multi-line comment
            else if (_reader.Sink("/*"))
            {
                _reader.SinkUntil("*/");
 
                // Was the ending */ found?
                if (_reader.EndOfLines)
                {
                    // No. There was a /* without a */. Return this a syntax error token.
                    current = new CSharpTokenizer.EndOfFileInsideCommentToken();
                    return true;
                }
 
                current = new CommentToken();
                return true;
            }
            // Handle chars
            else if (_reader.Sink("\'"))
            {
                while (_reader.CurrentCharacter != '\'')
                {
                    if (_reader.Sink("\\"))
                    {
                        /* reader.Skip the escape sequence.
                            This isn't exactly right. We should detect:
 
                            simple-escape-sequence: one of
                            \' \" \\ \0 \a \b \f \n \r \t \v
 
                            hexadecimal-escape-sequence:
                            \x   hex-digit   hex-digit[opt]   hex-digit[opt]  hex-digit[opt]
                        */
                    }
 
                    _reader.SinkCharacter();
                }
 
                if (_reader.SinkCharacter() != '\'')
                {
                    Debug.Assert(false, "Code defect in tokenizer: Should have yielded a closing tick.");
                }
                current = new CSharpTokenizer.CharLiteralToken();
                return true;
            }
            // Check for verbatim string
            else if (_reader.Sink("@\""))
            {
                do
                {
                    // Inside a verbatim string "" is treated as a special character
                    while (_reader.Sink("\"\""))
                    {
                    }
                }
                while (!_reader.EndOfLines && _reader.SinkCharacter() != '\"');
 
                // Can't end a file inside a string
                if (_reader.EndOfLines)
                {
                    current = new EndOfFileInsideStringToken();
                    return true;
                }
 
                // reader.Skip the ending quote.
                current = new StringLiteralToken();
                current.InnerText = _reader.GetCurrentMatchedString(startPosition).Substring(1);
                return true;
            }
            // Check for a quoted string.
            else if (_reader.Sink("\""))
            {
                while (_reader.CurrentCharacter == '\\' || _reader.MatchRegularStringLiteral())
                {
                    // See if we have an escape sequence.
                    if (_reader.SinkCharacter() == '\\')
                    {
                        // This is probably an escape character.
                        if (_reader.SinkStringEscape())
                        {
                            // This isn't nearly right. We just do barely enough to make a string
                            // with an embedded escape sequence return _some_ string whose start and
                            // end match the real bounds of the string.
                        }
                        else
                        {
                            // This is a compiler error.
                            _reader.SinkCharacter();
                            current = new CSharpTokenizer.UnrecognizedStringEscapeToken();
                            return true;
                        }
                    }
                }
 
                // Is it a newline?
                if (TokenChar.IsNewLine(_reader.CurrentCharacter))
                {
                    current = new CSharpTokenizer.NewlineInsideStringToken();
                    return true;
                }
 
                // Create the token.
                if (_reader.SinkCharacter() != '\"')
                {
                    Debug.Assert(false, "Defect in tokenizer: Should have yielded a terminating quote.");
                }
                current = new StringLiteralToken();
                return true;
            }
            // Identifier or keyword?
            else if
            (
                // From 2.4.2 Identifiers: A '@' can be used to prefix an identifier so that a keyword can be used as an identifier.
                _reader.CurrentCharacter == '@' ||
                _reader.MatchNextIdentifierStart())
            {
                if (_reader.CurrentCharacter == '@')
                {
                    _reader.SinkCharacter();
                }
 
                // Now, the next character must be an identifier start.
                if (!_reader.SinkIdentifierStart())
                {
                    current = new ExpectedIdentifierToken();
                    return true;
                }
 
                // Sink the rest of the identifier.
                while (_reader.SinkIdentifierPart())
                {
                }
                string identifierOrKeyword = _reader.GetCurrentMatchedString(startPosition);
 
                switch (identifierOrKeyword)
                {
                    default:
 
                        if (Array.IndexOf(s_keywordList, identifierOrKeyword) >= 0)
                        {
                            current = new KeywordToken();
                            return true;
                        }
 
                        // If the identifier starts with '@' then we need to strip it off.
                        // The '@' is for escaping so that we can have an identifier called
                        // the same thing as a reserved keyword (i.e. class, if, foreach, etc)
                        string identifier = _reader.GetCurrentMatchedString(startPosition);
                        if (identifier.StartsWith("@", StringComparison.Ordinal))
                        {
                            identifier = identifier.Substring(1);
                        }
 
                        // Create the token.
                        current = new IdentifierToken();
                        current.InnerText = identifier;
                        return true;
                    case "false":
                    case "true":
                        current = new BooleanLiteralToken();
                        return true;
                    case "null":
                        current = new CSharpTokenizer.NullLiteralToken();
                        return true;
                }
            }
            // Open scope
            else if (_reader.Sink("{"))
            {
                current = new CSharpTokenizer.OpenScopeToken();
                return true;
            }
            // Close scope
            else if (_reader.Sink("}"))
            {
                current = new CSharpTokenizer.CloseScopeToken();
                return true;
            }
            // Hexidecimal integer literal
            else if (_reader.SinkIgnoreCase("0x"))
            {
                // Sink the hex digits.
                if (!_reader.SinkMultipleHexDigits())
                {
                    current = new ExpectedValidHexDigitToken();
                    return true;
                }
 
                // Skip the L, U, l, u, ul, etc.
                _reader.SinkLongIntegerSuffix();
 
                current = new HexIntegerLiteralToken();
                return true;
            }
            // Decimal integer literal
            else if (_reader.SinkMultipleDecimalDigits())
            {
                // reader.Skip the L, U, l, u, ul, etc.
                _reader.SinkLongIntegerSuffix();
 
                current = new DecimalIntegerLiteralToken();
                return true;
            }
            // Check for single-digit operators and punctuators
            else if (_reader.SinkOperatorOrPunctuator())
            {
                current = new OperatorOrPunctuatorToken();
                return true;
            }
            // Preprocessor line
            else if (_reader.CurrentCharacter == '#')
            {
                if (_reader.Sink("#if"))
                {
                    current = new OpenConditionalDirectiveToken();
                }
                else if (_reader.Sink("#endif"))
                {
                    current = new CloseConditionalDirectiveToken();
                }
                else
                {
                    current = new PreprocessorToken();
                }
 
                _reader.SinkToEndOfLine();
 
                return true;
            }
 
            // We didn't recognize the token, so this is a syntax error.
            _reader.SinkCharacter();
            current = new UnrecognizedToken();
            return true;
        }
 
        private static readonly string[] s_keywordList =
                                              { "abstract",  "as",  "base",  "bool",  "break",
                                                "byte",  "case",  "catch",  "char",  "checked",
                                                "class",  "const",  "continue",  "decimal",  "default",
                                                "delegate",  "do",  "double",  "else",  "enum",
                                                "event",  "explicit",  "extern",  "finally",  "fixed",
                                                "float",  "for",  "foreach",  "goto",  "if",
                                                "implicit",  "in",  "int",  "interface",  "internal",
                                                "is",  "lock",  "long",  "namespace",  "new",
                                                "object",  "operator",  "out",  "override",  "params",
                                                "private",  "protected",  "public",  "readonly",
                                                "ref",  "return",  "sbyte",  "sealed",  "short",
                                                "sizeof",  "stackalloc",  "static",  "string",
                                                "struct",  "switch",  "this",  "throw",  "try",
                                                "typeof",  "uint",  "ulong",  "unchecked",  "unsafe",
                                                "ushort",  "using",  "virtual",  "void",  "volatile",
                                                "while" };
 
 
        /*
        * Method:  Reader
        *
        * Return the token char reader.
        */
        internal override TokenCharReader Reader
        {
            get
            {
                return _reader;
            }
        }
    }
}