|
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
using System.Collections.Generic;
using System.Diagnostics;
using System.Diagnostics.CodeAnalysis;
using System.Runtime.CompilerServices;
namespace System.Text
{
/// <summary>Represents a parsed composite format string.</summary>
[DebuggerDisplay("{Format}")]
public sealed class CompositeFormat
{
/// <summary>The parsed segments that make up the composite format string.</summary>
/// <remarks>
/// Every segment represents either a literal or a format hole, based on whether Literal
/// is non-null or ArgIndex is non-negative.
/// </remarks>
internal readonly (string? Literal, int ArgIndex, int Alignment, string? Format)[] _segments;
/// <summary>The sum of the lengths of all of the literals in <see cref="_segments"/>.</summary>
internal readonly int _literalLength;
/// <summary>The number of segments in <see cref="_segments"/> that represent format holes.</summary>
internal readonly int _formattedCount;
/// <summary>The number of args required to satisfy the format holes.</summary>
/// <remarks>This is equal to one more than the largest index required by any format hole.</remarks>
internal readonly int _argsRequired;
/// <summary>Initializes the instance.</summary>
/// <param name="format">The composite format string that was parsed.</param>
/// <param name="segments">The parsed segments.</param>
private CompositeFormat(string format, (string? Literal, int ArgIndex, int Alignment, string? Format)[] segments)
{
// Store the format.
Debug.Assert(format is not null);
Format = format;
// Store the segments.
Debug.Assert(segments is not null);
_segments = segments;
// Compute derivative information from the segments.
int literalLength = 0, formattedCount = 0, argsRequired = 0;
foreach ((string? Literal, int ArgIndex, int Alignment, string? Format) segment in segments)
{
Debug.Assert((segment.Literal is not null) ^ (segment.ArgIndex >= 0), "The segment should represent a literal or a format hole, but not both.");
if (segment.Literal is string literal)
{
literalLength += literal.Length; // no concern about overflow as these were parsed out of a single string
}
else if (segment.ArgIndex >= 0)
{
formattedCount++;
argsRequired = Math.Max(argsRequired, segment.ArgIndex + 1);
}
}
// Store the derivative information.
Debug.Assert(literalLength >= 0);
Debug.Assert(formattedCount >= 0);
Debug.Assert(formattedCount == 0 || argsRequired > 0);
_literalLength = literalLength;
_formattedCount = formattedCount;
_argsRequired = argsRequired;
}
/// <summary>Parse the composite format string <paramref name="format"/>.</summary>
/// <param name="format">The string to parse.</param>
/// <returns>The parsed <see cref="CompositeFormat"/>.</returns>
/// <exception cref="ArgumentNullException"><paramref name="format"/> is null.</exception>
/// <exception cref="FormatException">A format item in <paramref name="format"/> is invalid.</exception>
public static CompositeFormat Parse([StringSyntax(StringSyntaxAttribute.CompositeFormat)] string format)
{
ArgumentNullException.ThrowIfNull(format);
var segments = new List<(string? Literal, int ArgIndex, int Alignment, string? Format)>();
int failureOffset = default;
ExceptionResource failureReason = default;
if (!TryParseLiterals(format, segments, ref failureOffset, ref failureReason))
{
ThrowHelper.ThrowFormatInvalidString(failureOffset, failureReason);
}
return new CompositeFormat(format, segments.ToArray());
}
/// <summary>Gets the original composite format string used to create this <see cref="CompositeFormat"/> instance.</summary>
public string Format { get; }
/// <summary>Gets the minimum number of arguments that must be passed to a formatting operation using this <see cref="CompositeFormat"/>.</summary>
/// <remarks>It's permissible to supply more arguments than this value, but it's an error to pass fewer.</remarks>
public int MinimumArgumentCount => _argsRequired;
/// <summary>Throws an exception if the specified number of arguments is fewer than the number required.</summary>
/// <param name="numArgs">The number of arguments provided by the caller.</param>
/// <exception cref="FormatException">An insufficient number of arguments were provided.</exception>
internal void ValidateNumberOfArgs(int numArgs)
{
if (numArgs < _argsRequired)
{
ThrowHelper.ThrowFormatIndexOutOfRange();
}
}
/// <summary>Parse the composite format string into segments.</summary>
/// <param name="format">The format string.</param>
/// <param name="segments">The list into which to store the segments.</param>
/// <param name="failureOffset">The offset at which a parsing error occured if <see langword="false"/> is returned.</param>
/// <param name="failureReason">The reason for a parsing failure if <see langword="false"/> is returned.</param>
/// <returns>true if the format string can be parsed successfully; otherwise, false.</returns>
private static bool TryParseLiterals(ReadOnlySpan<char> format, List<(string? Literal, int ArgIndex, int Alignment, string? Format)> segments, ref int failureOffset, ref ExceptionResource failureReason)
{
// This parsing logic is copied from string.Format. It's the same code modified to not format
// as part of parsing and instead store the parsed literals and argument specifiers (alignment
// and format) for later use.
// Rather than parsing directly into the segments list, literals are parsed into a reusable builder.
// Due to the nature of the parsing logic copied from string.Format, and our desire not to veer from
// it significantly in order to maintain compatibility and accidental regression, multiple literals
// next to each other might be parsed separately due to braces in between them. This builder then
// allows us to merge those segments back together easily prior to their being appended to the list.
var vsb = new ValueStringBuilder(stackalloc char[string.StackallocCharBufferSizeLimit]);
// Repeatedly find the next hole and process it.
int pos = 0;
char ch;
while (true)
{
// Skip until either the end of the input or the first unescaped opening brace, whichever comes first.
// Along the way we need to also unescape escaped closing braces.
while (true)
{
// Find the next brace. If there isn't one, the remainder of the input is text to be appended, and we're done.
ReadOnlySpan<char> remainder = format.Slice(pos);
int countUntilNextBrace = remainder.IndexOfAny('{', '}');
if (countUntilNextBrace < 0)
{
vsb.Append(remainder);
segments.Add((vsb.ToString(), -1, 0, null));
return true;
}
// Append the text until the brace.
vsb.Append(remainder.Slice(0, countUntilNextBrace));
pos += countUntilNextBrace;
// Get the brace. It must be followed by another character, either a copy of itself in the case of being
// escaped, or an arbitrary character that's part of the hole in the case of an opening brace.
char brace = format[pos];
if (!TryMoveNext(format, ref pos, out ch))
{
goto FailureUnclosedFormatItem;
}
if (brace == ch)
{
vsb.Append(ch);
pos++;
continue;
}
// This wasn't an escape, so it must be an opening brace.
if (brace != '{')
{
goto FailureUnexpectedClosingBrace;
}
// Proceed to parse the hole.
segments.Add((vsb.ToString(), -1, 0, null));
vsb.Length = 0;
break;
}
// We're now positioned just after the opening brace of an argument hole, which consists of
// an opening brace, an index, an optional width preceded by a comma, and an optional format
// preceded by a colon, with arbitrary amounts of spaces throughout.
int width = 0;
string? itemFormat = null; // used if itemFormat is null
// First up is the index parameter, which is of the form:
// at least on digit
// optional any number of spaces
// We've already read the first digit into ch.
Debug.Assert(format[pos - 1] == '{');
Debug.Assert(ch != '{');
int index = ch - '0';
if ((uint)index >= 10u)
{
goto FailureExpectedAsciiDigit;
}
// Common case is a single digit index followed by a closing brace. If it's not a closing brace,
// proceed to finish parsing the full hole format.
if (!TryMoveNext(format, ref pos, out ch))
{
goto FailureUnclosedFormatItem;
}
if (ch != '}')
{
// Continue consuming optional additional digits.
while (char.IsAsciiDigit(ch))
{
index = index * 10 + ch - '0';
if (!TryMoveNext(format, ref pos, out ch))
{
goto FailureUnclosedFormatItem;
}
}
// Consume optional whitespace.
while (ch == ' ')
{
if (!TryMoveNext(format, ref pos, out ch))
{
goto FailureUnclosedFormatItem;
}
}
// Parse the optional alignment, which is of the form:
// comma
// optional any number of spaces
// optional -
// at least one digit
// optional any number of spaces
if (ch == ',')
{
// Consume optional whitespace.
do
{
if (!TryMoveNext(format, ref pos, out ch))
{
goto FailureUnclosedFormatItem;
}
}
while (ch == ' ');
// Consume an optional minus sign indicating left alignment.
int leftJustify = 1;
if (ch == '-')
{
leftJustify = -1;
if (!TryMoveNext(format, ref pos, out ch))
{
goto FailureUnclosedFormatItem;
}
}
// Parse alignment digits. The read character must be a digit.
width = ch - '0';
if ((uint)width >= 10u)
{
goto FailureExpectedAsciiDigit;
}
if (!TryMoveNext(format, ref pos, out ch))
{
goto FailureUnclosedFormatItem;
}
while (char.IsAsciiDigit(ch))
{
width = width * 10 + ch - '0';
if (!TryMoveNext(format, ref pos, out ch))
{
goto FailureUnclosedFormatItem;
}
}
width *= leftJustify;
// Consume optional whitespace
while (ch == ' ')
{
if (!TryMoveNext(format, ref pos, out ch))
{
goto FailureUnclosedFormatItem;
}
}
}
// The next character needs to either be a closing brace for the end of the hole,
// or a colon indicating the start of the format.
if (ch != '}')
{
if (ch != ':')
{
// Unexpected character
goto FailureUnclosedFormatItem;
}
// Search for the closing brace; everything in between is the format,
// but opening braces aren't allowed.
int startingPos = pos;
while (true)
{
if (!TryMoveNext(format, ref pos, out ch))
{
goto FailureUnclosedFormatItem;
}
if (ch == '}')
{
// Argument hole closed
break;
}
if (ch == '{')
{
// Braces inside the argument hole are not supported
goto FailureUnclosedFormatItem;
}
}
startingPos++;
itemFormat = format.Slice(startingPos, pos - startingPos).ToString();
}
}
Debug.Assert(format[pos] == '}');
pos++;
segments.Add((null, index, width, itemFormat));
// Continue parsing the rest of the format string.
}
FailureUnexpectedClosingBrace:
failureReason = ExceptionResource.Format_UnexpectedClosingBrace;
failureOffset = pos;
return false;
FailureUnclosedFormatItem:
failureReason = ExceptionResource.Format_UnclosedFormatItem;
failureOffset = pos;
return false;
FailureExpectedAsciiDigit:
failureReason = ExceptionResource.Format_ExpectedAsciiDigit;
failureOffset = pos;
return false;
[MethodImpl(MethodImplOptions.AggressiveInlining)]
static bool TryMoveNext(ReadOnlySpan<char> format, ref int pos, out char nextChar)
{
pos++;
if ((uint)pos >= (uint)format.Length)
{
nextChar = '\0';
return false;
}
nextChar = format[pos];
return true;
}
}
}
}
|