// Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. using System; using System.Text; using Microsoft.CodeAnalysis.Text; namespace Microsoft.CodeAnalysis.EmbeddedLanguages.VirtualChars; /// <summary> /// <see cref="VirtualCharGreen"/> provides a uniform view of a language's string token characters regardless if they /// were written raw in source, or are the production of a language escape sequence. For example, in C#, in a normal /// <c>""</c> string a <c>Tab</c> character can be written either as the raw tab character (value <c>9</c> in ASCII), /// or as <c>\t</c>. The format is a single character in the source, while the latter is two characters (<c>\</c> and /// <c>t</c>). <see cref="VirtualCharGreen"/> will represent both, providing the raw <see cref="char"/> value of /// <c>9</c> as well as what offset and width within original <see cref="SyntaxToken"/> the character was found at. /// </summary> internal readonly record struct VirtualCharGreen { private const int MaxWidth = 12; private const int WidthMask = 0b1111; // 4 bits for width (max 10) private const int OffsetShift = 4; // remaining bits for offset public readonly char Char; /// <summary> /// The offset and width combined into a single integer. Because the width of a VirtualChar can't be more than /// 10 (for <c>\UXXXXXXX</c>), we can store the width in the lower 4 bits, and the offset in the upper 28. /// </summary> private readonly int _offsetAndWidth; /// <summary> /// Offset in the original token that this character was found at. /// </summary> public int Offset => _offsetAndWidth >> OffsetShift; /// <summary> /// The width of characters in the original <see cref="SourceText"/> that represent this <see cref="VirtualCharGreen"/>. /// This can be as low as 1 (for normal characters) or up to 12 (for escape sequences like <c>\u1234\uABCD</c>). /// </summary> public int Width => _offsetAndWidth & WidthMask; public VirtualCharGreen(char ch, int offset, int width) { Contract.ThrowIfTrue(width > MaxWidth); if (offset < 0) throw new ArgumentException("Offset cannot be negative", nameof(offset)); if (width <= 0) throw new ArgumentException("Width must be greater than zero.", nameof(width)); Char = ch; _offsetAndWidth = (offset << OffsetShift) | width; } public VirtualCharGreen WithOffset(int offset) => new(this.Char, offset, this.Width); } /// <summary> /// <see cref="VirtualChar"/> provides a uniform view of a language's string token characters regardless if they /// were written raw in source, or are the production of a language escape sequence. For example, in C#, in a /// normal <c>""</c> string a <c>Tab</c> character can be written either as the raw tab character (value <c>9</c> in /// ASCII), or as <c>\t</c>. The format is a single character in the source, while the latter is two characters /// (<c>\</c> and <c>t</c>). <see cref="VirtualChar"/> will represent both, providing the raw <see cref="char"/> /// value of <c>9</c> as well as what <see cref="TextSpan"/> in the original <see cref="SourceText"/> they occupied. /// </summary> /// <remarks> /// A core consumer of this system is the Regex parser. That parser wants to work over an array of characters, /// however this array of characters is not the same as the array of characters a user types into a string in C# or /// VB. For example In C# someone may write: @"\z". This should appear to the user the same as if they wrote "\\z" /// and the same as "\\\u007a". However, as these all have wildly different presentations for the user, there needs /// to be a way to map back the characters it sees ( '\' and 'z' ) back to the ranges of characters the user wrote. /// </remarks> internal readonly record struct VirtualChar { public VirtualChar(VirtualCharGreen green, int tokenStart) { if (tokenStart < 0) throw new ArgumentException("Token start must be non-negative", nameof(tokenStart)); Green = green; TokenStart = tokenStart; } internal VirtualCharGreen Green { get; } internal int TokenStart { get; } public static implicit operator char(VirtualChar ch) => ch.Value; /// <inheritdoc cref="VirtualCharGreen.Char"/> public char Value => Green.Char; public TextSpan Span => new(TokenStart + Green.Offset, Green.Width); /// <inheritdoc/> public override string ToString() => Value.ToString(); #region equality public static bool operator ==(VirtualChar ch1, char ch2) => ch1.Green.Char == ch2; public static bool operator !=(VirtualChar ch1, char ch2) => !(ch1 == ch2); #endregion } |