// Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. using System; using System.Collections.Generic; using System.Diagnostics; using System.Threading; using Microsoft.CodeAnalysis.Collections; using Microsoft.CodeAnalysis.Internal.Log; using Microsoft.CodeAnalysis.Shared.Extensions; using Microsoft.CodeAnalysis.Text; namespace Microsoft.CodeAnalysis.Formatting; /// <summary> /// This class takes care of tokens consumed in the formatting engine. /// /// It will maintain information changed compared to original token information. and answers /// information about tokens. /// </summary> internal sealed partial class TokenStream { // number to guess number of tokens in a formatting span private const int MagicTextLengthToTokensRatio = 10; // caches token information within given formatting span to improve perf private readonly SegmentedList<SyntaxToken> _tokens; // caches original trivia info to improve perf private readonly SegmentedArray<TriviaData> _cachedOriginalTriviaInfo; // formatting engine can be used either with syntax tree or without // this will reconstruct information that reside in syntax tree from root node // if syntax tree is not given private readonly TreeData _treeData; private readonly SyntaxFormattingOptions _options; // hold onto information that are made to original trivia info private Changes _changes; // factory that will cache trivia info private readonly AbstractTriviaDataFactory _factory; // func caches private readonly Func<TokenData, TokenData, TriviaData> _getTriviaData; private readonly Func<TokenData, TokenData, TriviaData> _getOriginalTriviaData; public TokenStream(TreeData treeData, SyntaxFormattingOptions options, TextSpan spanToFormat, AbstractTriviaDataFactory factory) { using (Logger.LogBlock(FunctionId.Formatting_TokenStreamConstruction, CancellationToken.None)) { // initialize basic info _factory = factory; _treeData = treeData; _options = options; // use some heuristics to get initial size of list rather than blindly start from default size == 4 var sizeOfList = spanToFormat.Length / MagicTextLengthToTokensRatio; _tokens = new SegmentedList<SyntaxToken>(sizeOfList); _tokens.AddRange(_treeData.GetApplicableTokens(spanToFormat)); Debug.Assert(this.TokenCount > 0); // initialize trivia related info _cachedOriginalTriviaInfo = new SegmentedArray<TriviaData>(this.TokenCount - 1); // Func Cache _getTriviaData = this.GetTriviaData; _getOriginalTriviaData = this.GetOriginalTriviaData; } DebugCheckTokenOrder(); } [Conditional("DEBUG")] private void DebugCheckTokenOrder() { // things should be already in sorted manner, but just to make sure // run sort var previousToken = _tokens[0]; for (var i = 1; i < _tokens.Count; i++) { var currentToken = _tokens[i]; Debug.Assert(previousToken.FullSpan.End <= currentToken.FullSpan.Start); previousToken = currentToken; } } public bool FormatBeginningOfTree { get { return _treeData.IsFirstToken(this.FirstTokenInStream.Token); } } public bool FormatEndOfTree { get { // last token except end of file token return _treeData.IsLastToken(this.LastTokenInStream.Token); } } public bool IsFormattingWholeDocument { get { return this.FormatBeginningOfTree && this.FormatEndOfTree; } } public TokenData FirstTokenInStream { get { return new TokenData(this, 0, _tokens[0]); } } public TokenData LastTokenInStream { get { return new TokenData(this, this.TokenCount - 1, _tokens[this.TokenCount - 1]); } } public int TokenCount => _tokens.Count; public SyntaxToken GetToken(int index) { Contract.ThrowIfFalse(0 <= index && index < this.TokenCount); return _tokens[index]; } public TokenData GetTokenData(SyntaxToken token) { var indexInStream = GetTokenIndexInStream(token); return new TokenData(this, indexInStream, token); } public TokenData GetPreviousTokenData(TokenData tokenData) { if (tokenData.IndexInStream > 0 && tokenData.IndexInStream < this.TokenCount) { return new TokenData(this, tokenData.IndexInStream - 1, _tokens[tokenData.IndexInStream - 1]); } // get previous token and check whether it is the last token in the stream var previousToken = tokenData.Token.GetPreviousToken(includeZeroWidth: true); var lastIndex = this.TokenCount - 1; if (_tokens[lastIndex].Equals(previousToken)) { return new TokenData(this, lastIndex, _tokens[lastIndex]); } return new TokenData(this, -1, previousToken); } public TokenData GetNextTokenData(TokenData tokenData) { if (tokenData.IndexInStream >= 0 && tokenData.IndexInStream < this.TokenCount - 1) { return new TokenData(this, tokenData.IndexInStream + 1, _tokens[tokenData.IndexInStream + 1]); } // get next token and check whether it is the first token in the stream var nextToken = tokenData.Token.GetNextToken(includeZeroWidth: true); if (_tokens[0].Equals(nextToken)) { return new TokenData(this, 0, _tokens[0]); } return new TokenData(this, -1, nextToken); } internal SyntaxToken FirstTokenOfBaseTokenLine(SyntaxToken token) { var currentTokenData = this.GetTokenData(token); while (!this.IsFirstTokenOnLine(token)) { var previousTokenData = this.GetPreviousTokenData(currentTokenData); token = previousTokenData.Token; currentTokenData = previousTokenData; } return token; } public bool TwoTokensOriginallyOnSameLine(SyntaxToken token1, SyntaxToken token2) => TwoTokensOnSameLineWorker(token1, token2, _getOriginalTriviaData); public bool TwoTokensOnSameLine(SyntaxToken token1, SyntaxToken token2) => TwoTokensOnSameLineWorker(token1, token2, _getTriviaData); private bool TwoTokensOnSameLineWorker(SyntaxToken token1, SyntaxToken token2, Func<TokenData, TokenData, TriviaData> triviaDataGetter) { // check easy case if (token1 == token2) { return true; } // this can happen on invalid code. if (token1.Span.End > token2.SpanStart) { return false; } // this has potential to be very expansive if everything is on same line in a big file. but chances to that happen are very low var tokenData1 = GetTokenData(token1); var tokenData2 = GetTokenData(token2); var previousToken = tokenData1; for (var current = tokenData1.GetNextTokenData(); current < tokenData2; current = current.GetNextTokenData()) { if (triviaDataGetter(previousToken, current).SecondTokenIsFirstTokenOnLine) { return false; } previousToken = current; } // check last one. return !triviaDataGetter(previousToken, tokenData2).SecondTokenIsFirstTokenOnLine; } public void ApplyBeginningOfTreeChange(TriviaData data) { Contract.ThrowIfNull(data); _changes.AddOrReplace(Changes.BeginningOfTreeKey, data); } public void ApplyEndOfTreeChange(TriviaData data) { Contract.ThrowIfNull(data); _changes.AddOrReplace(Changes.EndOfTreeKey, data); } public void ApplyChange(int pairIndex, TriviaData data) { Contract.ThrowIfNull(data); Contract.ThrowIfFalse(0 <= pairIndex && pairIndex < this.TokenCount - 1); // do reference equality check var sameAsOriginal = GetOriginalTriviaData(pairIndex) == data; if (sameAsOriginal) { _changes.TryRemove(pairIndex); } else { _changes.AddOrReplace(pairIndex, data); } } public int GetCurrentColumn(SyntaxToken token) { var tokenWithIndex = this.GetTokenData(token); return GetCurrentColumn(tokenWithIndex); } public int GetCurrentColumn(TokenData tokenData) => GetColumn(tokenData, _getTriviaData); public int GetOriginalColumn(SyntaxToken token) { var tokenWithIndex = this.GetTokenData(token); return GetColumn(tokenWithIndex, _getOriginalTriviaData); } /// <summary> /// Get column of the token /// * column means text position on a line where all tabs are converted to spaces that first position on a line becomes 0 /// </summary> private int GetColumn(TokenData tokenData, Func<TokenData, TokenData, TriviaData> triviaDataGetter) { // at the beginning of a file. var previousToken = tokenData.GetPreviousTokenData(); var spaces = 0; for (; previousToken.Token.RawKind != 0; previousToken = previousToken.GetPreviousTokenData()) { var triviaInfo = triviaDataGetter(previousToken, tokenData); if (triviaInfo.SecondTokenIsFirstTokenOnLine) { // current token is the first token on line. // add up spaces so far and triviaInfo.Space which means indentation in this case return spaces + triviaInfo.Spaces; } // add spaces so far spaces += triviaInfo.Spaces; // here, we can't just add token's length since there is token that span multiple lines. GetTokenLength(previousToken.Token, out var tokenLength, out var multipleLines); if (multipleLines) { return spaces + tokenLength; } spaces += tokenLength; tokenData = previousToken; } // we reached beginning of the tree, add spaces at the beginning of the tree return spaces + triviaDataGetter(previousToken, tokenData).Spaces; } public void GetTokenLength(SyntaxToken token, out int length, out bool onMultipleLines) { // here, we can't just add token's length since there is token that span multiple lines. var text = token.ToString(); // multiple lines if (text.ContainsLineBreak()) { // get indentation from last line of the text onMultipleLines = true; length = text.GetTextColumn(_options.TabSize, initialColumn: 0); return; } onMultipleLines = false; // add spaces so far if (text.ContainsTab()) { // do expansive calculation var initialColumn = _treeData.GetOriginalColumn(_options.TabSize, token); length = text.ConvertTabToSpace(_options.TabSize, initialColumn, text.Length); return; } length = text.Length; } public IEnumerable<ValueTuple<ValueTuple<SyntaxToken, SyntaxToken>, TriviaData>> GetTriviaDataWithTokenPair(CancellationToken cancellationToken) { // the very first trivia in the file case if (this.FormatBeginningOfTree) { var token = this.FirstTokenInStream.Token; var trivia = this.GetTriviaDataAtBeginningOfTree(); yield return ValueTuple.Create(ValueTuple.Create(default(SyntaxToken), token), trivia); } // regular trivia cases for (var pairIndex = 0; pairIndex < this.TokenCount - 1; pairIndex++) { cancellationToken.ThrowIfCancellationRequested(); var trivia = this.GetTriviaData(pairIndex); yield return ValueTuple.Create(ValueTuple.Create(_tokens[pairIndex], _tokens[pairIndex + 1]), trivia); } // the very last trivia in the file case if (this.FormatEndOfTree) { var token = this.LastTokenInStream.Token; var trivia = this.GetTriviaDataAtEndOfTree(); yield return ValueTuple.Create(ValueTuple.Create(token, default(SyntaxToken)), trivia); } } public TriviaData GetTriviaData(TokenData token1, TokenData token2) { // special cases (beginning of a file, end of a file) if (_treeData.IsFirstToken(token2.Token)) { return this.FormatBeginningOfTree ? GetTriviaDataAtBeginningOfTree() : GetOriginalTriviaData(token1, token2); } if (_treeData.IsLastToken(token1.Token)) { return this.FormatEndOfTree ? GetTriviaDataAtEndOfTree() : GetOriginalTriviaData(token1, token2); } // normal cases Debug.Assert(token1.Token.Span.End <= token2.Token.SpanStart); Debug.Assert(token1.IndexInStream < 0 || token2.IndexInStream < 0 || (token1.IndexInStream + 1 == token2.IndexInStream)); Debug.Assert((token1.IndexInStream >= 0 && token2.IndexInStream >= 0) || token1.Token.Equals(token2.Token.GetPreviousToken(includeZeroWidth: true)) || token2.Token.LeadingTrivia.Span.Contains(token1.Token.Span)); // one of token is out side of cached token stream if (token1.IndexInStream < 0 || token2.IndexInStream < 0) { return GetOriginalTriviaData(token1, token2); } return GetTriviaData(token1.IndexInStream); } private TriviaData GetOriginalTriviaData(TokenData token1, TokenData token2) { // special cases (beginning of a file, end of a file) if (_treeData.IsFirstToken(token2.Token)) { return _factory.CreateLeadingTrivia(token2.Token); } else if (_treeData.IsLastToken(token1.Token)) { return _factory.CreateTrailingTrivia(token1.Token); } Debug.Assert(token1.Token.Span.End <= token2.Token.SpanStart); Debug.Assert(token1.IndexInStream < 0 || token2.IndexInStream < 0 || (token1.IndexInStream + 1 == token2.IndexInStream)); Debug.Assert((token1.IndexInStream >= 0 && token2.IndexInStream >= 0) || token1.Token.Equals(token2.Token.GetPreviousToken(includeZeroWidth: true)) || token2.Token.LeadingTrivia.Span.Contains(token1.Token.Span)); if (token1.IndexInStream < 0 || token2.IndexInStream < 0) { return _factory.Create(token1.Token, token2.Token); } return GetOriginalTriviaData(token1.IndexInStream); } public TriviaData GetTriviaDataAtBeginningOfTree() { Contract.ThrowIfFalse(this.FormatBeginningOfTree); if (_changes.TryGet(Changes.BeginningOfTreeKey, out var data)) { return data; } Debug.Assert(_treeData.IsFirstToken(this.FirstTokenInStream.Token)); return GetOriginalTriviaData(default, this.FirstTokenInStream); } public TriviaData GetTriviaDataAtEndOfTree() { Contract.ThrowIfFalse(this.FormatEndOfTree); if (_changes.TryGet(Changes.EndOfTreeKey, out var data)) { return data; } Debug.Assert(_treeData.IsLastToken(this.LastTokenInStream.Token)); return GetOriginalTriviaData(this.LastTokenInStream, default); } public TriviaData GetTriviaData(int pairIndex) { Contract.ThrowIfFalse(0 <= pairIndex && pairIndex < this.TokenCount - 1); if (_changes.TryGet(pairIndex, out var data)) { return data; } // no change between two tokens, return trivia info from original code return GetOriginalTriviaData(pairIndex); } private TriviaData GetOriginalTriviaData(int pairIndex) { Contract.ThrowIfFalse(0 <= pairIndex && pairIndex < this.TokenCount - 1); if (_cachedOriginalTriviaInfo[pairIndex] == null) { var info = _factory.Create(_tokens[pairIndex], _tokens[pairIndex + 1]); _cachedOriginalTriviaInfo[pairIndex] = info; } return _cachedOriginalTriviaInfo[pairIndex]; } public bool IsFirstTokenOnLine(SyntaxToken token) { Contract.ThrowIfTrue(token.RawKind == 0); var tokenWithIndex = this.GetTokenData(token); var previousTokenWithIndex = tokenWithIndex.GetPreviousTokenData(); return IsFirstTokenOnLine(previousTokenWithIndex, tokenWithIndex); } // this can be called with tokens that are outside of token stream private bool IsFirstTokenOnLine(TokenData tokenData1, TokenData tokenData2) { if (tokenData1.Token.RawKind == 0) { // reached first line inside of tree return true; } Debug.Assert(tokenData2 == tokenData1.GetNextTokenData()); // see if there are changes for a given token pair return this.GetTriviaData(tokenData1, tokenData2).SecondTokenIsFirstTokenOnLine; } private int GetTokenIndexInStream(SyntaxToken token) { var tokenIndex = _tokens.BinarySearch(token, TokenOrderComparer.Instance); if (tokenIndex < 0) { return -1; } // Source characters cannot be assigned to multiple tokens. If the token has non-zero width, it will be an // exact match for at most one token. if (!token.FullSpan.IsEmpty) { return _tokens[tokenIndex] == token ? tokenIndex : -1; } // Multiple tokens can have empty spans. The binary search operation will return one of them; look forward // and then backward to locate the desired token within the set of one or more zero-width tokens located at // the same position. Debug.Assert(token.FullSpan.IsEmpty); for (var i = tokenIndex; i < _tokens.Count; i++) { if (!_tokens[i].FullSpan.IsEmpty) { // Current token can't match because the span is different, and future tokens won't match because // they are lexicographically after the token we are interested in. break; } if (_tokens[i] == token) { return i; } } for (var i = tokenIndex - 1; i >= 0; i--) { if (!_tokens[i].FullSpan.IsEmpty) { // Current token can't match because the span is different, and future tokens won't match because // they are lexicographically before the token we are interested in. break; } if (_tokens[i] == token) { return i; } } return -1; } public Iterator TokenIterator => new(_tokens); private sealed class TokenOrderComparer : IComparer<SyntaxToken> { public static readonly TokenOrderComparer Instance = new(); private TokenOrderComparer() { } public int Compare(SyntaxToken x, SyntaxToken y) => x.FullSpan.CompareTo(y.FullSpan); } } |