|
// <auto-generated/>
#nullable enable
#pragma warning disable CS0162 // Unreachable code
#pragma warning disable CS0164 // Unreferenced label
#pragma warning disable CS0219 // Variable assigned but never used
namespace Microsoft.ML.Tokenizers
{
partial class TiktokenTokenizer
{
/// <remarks>
/// Pattern:<br/>
/// <code>'(?i:[sdmt]|ll|ve|re)|(?>[^\\r\\n\\p{L}\\p{N}]?)(?>\\p{L}+)|(?>\\p{N}{1,3})| ?(?>[^\\s\\p{L}\\p{N}]+)(?>[\\r\\n]*)|(?>\\s+)$|\\s*[\\r\\n]|\\s+(?!\\S)|\\s</code><br/>
/// Explanation:<br/>
/// <code>
/// ○ Match with 8 alternative expressions, atomically.<br/>
/// ○ Match a sequence of expressions.<br/>
/// ○ Match '\''.<br/>
/// ○ Match with 4 alternative expressions, atomically.<br/>
/// ○ Match a character in the set [DMSTdmst].<br/>
/// ○ Match a character in the set [Ll] exactly 2 times.<br/>
/// ○ Match a sequence of expressions.<br/>
/// ○ Match a character in the set [Vv].<br/>
/// ○ Match a character in the set [Ee].<br/>
/// ○ Match a sequence of expressions.<br/>
/// ○ Match a character in the set [Rr].<br/>
/// ○ Match a character in the set [Ee].<br/>
/// ○ Match a sequence of expressions.<br/>
/// ○ Match a character in the set [^\n\r\p{L}\p{N}] atomically, optionally.<br/>
/// ○ Match a character in the set [\p{L}] atomically at least once.<br/>
/// ○ Match a character in the set [\p{N}] atomically at least 1 and at most 3 times.<br/>
/// ○ Match a sequence of expressions.<br/>
/// ○ Match ' ' atomically, optionally.<br/>
/// ○ Match a character in the set [^\s\p{L}\p{N}] atomically at least once.<br/>
/// ○ Match a character in the set [\n\r] atomically any number of times.<br/>
/// ○ Match a sequence of expressions.<br/>
/// ○ Match a whitespace character atomically at least once.<br/>
/// ○ Match if at the end of the string or if before an ending newline.<br/>
/// ○ Match a sequence of expressions.<br/>
/// ○ Match a whitespace character greedily any number of times.<br/>
/// ○ Match a character in the set [\n\r].<br/>
/// ○ Match a sequence of expressions.<br/>
/// ○ Match a whitespace character greedily at least once.<br/>
/// ○ Zero-width negative lookahead.<br/>
/// ○ Match any character other than a whitespace character.<br/>
/// ○ Match a whitespace character.<br/>
/// </code>
/// </remarks>
[global::System.CodeDom.Compiler.GeneratedCodeAttribute("System.Text.RegularExpressions.Generator", "8.0.12.41914")]
private static partial global::System.Text.RegularExpressions.Regex Cl100kBaseRegex() => global::System.Text.RegularExpressions.Generated.Cl100kBaseRegex_0.Instance;
}
}
namespace Microsoft.ML.Tokenizers
{
partial class TiktokenTokenizer
{
/// <remarks>
/// Pattern:<br/>
/// <code>'(?:[sdmt]|ll|ve|re)| ?(?>\\p{L}+)| ?(?>\\p{N}+)| ?(?>[^\\s\\p{L}\\p{N}]+)|(?>\\s+)$|\\s+(?!\\S)|\\s</code><br/>
/// Explanation:<br/>
/// <code>
/// ○ Match with 7 alternative expressions, atomically.<br/>
/// ○ Match a sequence of expressions.<br/>
/// ○ Match '\''.<br/>
/// ○ Match with 4 alternative expressions, atomically.<br/>
/// ○ Match a character in the set [dmst].<br/>
/// ○ Match the string "ll".<br/>
/// ○ Match the string "ve".<br/>
/// ○ Match the string "re".<br/>
/// ○ Match a sequence of expressions.<br/>
/// ○ Match ' ' atomically, optionally.<br/>
/// ○ Match a character in the set [\p{L}] atomically at least once.<br/>
/// ○ Match a sequence of expressions.<br/>
/// ○ Match ' ' atomically, optionally.<br/>
/// ○ Match a character in the set [\p{N}] atomically at least once.<br/>
/// ○ Match a sequence of expressions.<br/>
/// ○ Match ' ' atomically, optionally.<br/>
/// ○ Match a character in the set [^\s\p{L}\p{N}] atomically at least once.<br/>
/// ○ Match a sequence of expressions.<br/>
/// ○ Match a whitespace character atomically at least once.<br/>
/// ○ Match if at the end of the string or if before an ending newline.<br/>
/// ○ Match a sequence of expressions.<br/>
/// ○ Match a whitespace character greedily at least once.<br/>
/// ○ Zero-width negative lookahead.<br/>
/// ○ Match any character other than a whitespace character.<br/>
/// ○ Match a whitespace character.<br/>
/// </code>
/// </remarks>
[global::System.CodeDom.Compiler.GeneratedCodeAttribute("System.Text.RegularExpressions.Generator", "8.0.12.41914")]
internal static partial global::System.Text.RegularExpressions.Regex P50kBaseRegex() => global::System.Text.RegularExpressions.Generated.P50kBaseRegex_1.Instance;
}
}
namespace Microsoft.ML.Tokenizers
{
partial class TiktokenTokenizer
{
/// <remarks>
/// Pattern:<br/>
/// <code>[^\\r\\n\\p{L}\\p{N}]?[\\p{Lu}\\p{Lt}\\p{Lm}\\p{Lo}\\p{M}]*[\\p{Ll}\\p{Lm}\\p{Lo}\\p{M}]+(?i:'s|'t|'re|'ve|'m|'ll|'d)?|[^\\r\\n\\p{L}\\p{N}]?[\\p{Lu}\\p{Lt}\\p{Lm}\\p{Lo}\\p{M}]+[\\p{Ll}\\p{Lm}\\p{Lo}\\p{M}]*(?i:'s|'t|'re|'ve|'m|'ll|'d)?|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n/]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+</code><br/>
/// Explanation:<br/>
/// <code>
/// ○ Match with 7 alternative expressions, atomically.<br/>
/// ○ Match a sequence of expressions.<br/>
/// ○ Match a character in the set [^\n\r\p{L}\p{N}] greedily, optionally.<br/>
/// ○ Match a character in the set [\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}] greedily any number of times.<br/>
/// ○ Match a character in the set [\p{Ll}\p{Lm}\p{Lo}\p{M}] greedily at least once.<br/>
/// ○ Optional (greedy).<br/>
/// ○ Match '\''.<br/>
/// ○ Match with 6 alternative expressions, atomically.<br/>
/// ○ Match a character in the set [STst].<br/>
/// ○ Match a sequence of expressions.<br/>
/// ○ Match a character in the set [Rr].<br/>
/// ○ Match a character in the set [Ee].<br/>
/// ○ Match a sequence of expressions.<br/>
/// ○ Match a character in the set [Vv].<br/>
/// ○ Match a character in the set [Ee].<br/>
/// ○ Match a character in the set [Mm].<br/>
/// ○ Match a character in the set [Ll] exactly 2 times.<br/>
/// ○ Match a character in the set [Dd].<br/>
/// ○ Match a sequence of expressions.<br/>
/// ○ Match a character in the set [^\n\r\p{L}\p{N}] greedily, optionally.<br/>
/// ○ Match a character in the set [\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}] greedily at least once.<br/>
/// ○ Match a character in the set [\p{Ll}\p{Lm}\p{Lo}\p{M}] greedily any number of times.<br/>
/// ○ Optional (greedy).<br/>
/// ○ Match '\''.<br/>
/// ○ Match with 6 alternative expressions, atomically.<br/>
/// ○ Match a character in the set [STst].<br/>
/// ○ Match a sequence of expressions.<br/>
/// ○ Match a character in the set [Rr].<br/>
/// ○ Match a character in the set [Ee].<br/>
/// ○ Match a sequence of expressions.<br/>
/// ○ Match a character in the set [Vv].<br/>
/// ○ Match a character in the set [Ee].<br/>
/// ○ Match a character in the set [Mm].<br/>
/// ○ Match a character in the set [Ll] exactly 2 times.<br/>
/// ○ Match a character in the set [Dd].<br/>
/// ○ Match a character in the set [\p{N}] atomically at least 1 and at most 3 times.<br/>
/// ○ Match a sequence of expressions.<br/>
/// ○ Match ' ' atomically, optionally.<br/>
/// ○ Match a character in the set [^\s\p{L}\p{N}] greedily at least once.<br/>
/// ○ Match a character in the set [\n\r/] atomically any number of times.<br/>
/// ○ Match a sequence of expressions.<br/>
/// ○ Match a whitespace character greedily any number of times.<br/>
/// ○ Match a character in the set [\n\r] atomically at least once.<br/>
/// ○ Match a sequence of expressions.<br/>
/// ○ Match a whitespace character greedily at least once.<br/>
/// ○ Zero-width negative lookahead.<br/>
/// ○ Match any character other than a whitespace character.<br/>
/// ○ Match a whitespace character atomically at least once.<br/>
/// </code>
/// </remarks>
[global::System.CodeDom.Compiler.GeneratedCodeAttribute("System.Text.RegularExpressions.Generator", "8.0.12.41914")]
internal static partial global::System.Text.RegularExpressions.Regex O200kBaseRegex() => global::System.Text.RegularExpressions.Generated.O200kBaseRegex_2.Instance;
}
}
namespace Microsoft.ML.Tokenizers
{
partial class PreTokenizer
{
/// <remarks>
/// Pattern:<br/>
/// <code>\\w+|[\\p{P}]</code><br/>
/// Explanation:<br/>
/// <code>
/// ○ Match with 2 alternative expressions, atomically.<br/>
/// ○ Match a word character atomically at least once.<br/>
/// ○ Match a character in the set [\p{P}].<br/>
/// </code>
/// </remarks>
[global::System.CodeDom.Compiler.GeneratedCodeAttribute("System.Text.RegularExpressions.Generator", "8.0.12.41914")]
private static partial global::System.Text.RegularExpressions.Regex WhiteSpaceOrPunctuationRegex() => global::System.Text.RegularExpressions.Generated.WhiteSpaceOrPunctuationRegex_3.Instance;
}
}
namespace Microsoft.ML.Tokenizers
{
partial class PreTokenizer
{
/// <remarks>
/// Pattern:<br/>
/// <code>\\w+|[^\\w\\s]+</code><br/>
/// Explanation:<br/>
/// <code>
/// ○ Match with 2 alternative expressions, atomically.<br/>
/// ○ Match a word character atomically at least once.<br/>
/// ○ Match a character in the set [^\w\s] atomically at least once.<br/>
/// </code>
/// </remarks>
[global::System.CodeDom.Compiler.GeneratedCodeAttribute("System.Text.RegularExpressions.Generator", "8.0.12.41914")]
private static partial global::System.Text.RegularExpressions.Regex WordOrNonWordRegex() => global::System.Text.RegularExpressions.Generated.WordOrNonWordRegex_4.Instance;
}
}
namespace Microsoft.ML.Tokenizers
{
partial class PreTokenizer
{
/// <remarks>
/// Pattern:<br/>
/// <code>\\S+</code><br/>
/// Explanation:<br/>
/// <code>
/// ○ Match any character other than a whitespace character atomically at least once.<br/>
/// </code>
/// </remarks>
[global::System.CodeDom.Compiler.GeneratedCodeAttribute("System.Text.RegularExpressions.Generator", "8.0.12.41914")]
private static partial global::System.Text.RegularExpressions.Regex WhiteSpaceRegex() => global::System.Text.RegularExpressions.Generated.WhiteSpaceRegex_5.Instance;
}
}
namespace System.Text.RegularExpressions.Generated
{
using System;
using System.Buffers;
using System.CodeDom.Compiler;
using System.Collections;
using System.ComponentModel;
using System.Globalization;
using System.Runtime.CompilerServices;
using System.Text.RegularExpressions;
using System.Threading;
/// <summary>Custom <see cref="Regex"/>-derived type for the Cl100kBaseRegex method.</summary>
[GeneratedCodeAttribute("System.Text.RegularExpressions.Generator", "8.0.12.41914")]
[SkipLocalsInit]
file sealed class Cl100kBaseRegex_0 : Regex
{
/// <summary>Cached, thread-safe singleton instance.</summary>
internal static readonly Cl100kBaseRegex_0 Instance = new();
/// <summary>Initializes the instance.</summary>
private Cl100kBaseRegex_0()
{
base.pattern = "'(?i:[sdmt]|ll|ve|re)|(?>[^\\r\\n\\p{L}\\p{N}]?)(?>\\p{L}+)|(?>\\p{N}{1,3})| ?(?>[^\\s\\p{L}\\p{N}]+)(?>[\\r\\n]*)|(?>\\s+)$|\\s*[\\r\\n]|\\s+(?!\\S)|\\s";
base.roptions = RegexOptions.None;
base.internalMatchTimeout = TimeSpan.FromMilliseconds(30000);
base.factory = new RunnerFactory();
base.capsize = 1;
}
/// <summary>Provides a factory for creating <see cref="RegexRunner"/> instances to be used by methods on <see cref="Regex"/>.</summary>
private sealed class RunnerFactory : RegexRunnerFactory
{
/// <summary>Creates an instance of a <see cref="RegexRunner"/> used by methods on <see cref="Regex"/>.</summary>
protected override RegexRunner CreateInstance() => new Runner();
/// <summary>Provides the runner that contains the custom logic implementing the specified regular expression.</summary>
private sealed class Runner : RegexRunner
{
/// <summary>Scan the <paramref name="inputSpan"/> starting from base.runtextstart for the next match.</summary>
/// <param name="inputSpan">The text being scanned by the regular expression.</param>
protected override void Scan(ReadOnlySpan<char> inputSpan)
{
// Search until we can't find a valid starting position, we find a match, or we reach the end of the input.
while (TryFindNextPossibleStartingPosition(inputSpan) &&
!TryMatchAtCurrentPosition(inputSpan) &&
base.runtextpos != inputSpan.Length)
{
base.runtextpos++;
base.CheckTimeout();
}
}
/// <summary>Search <paramref name="inputSpan"/> starting from base.runtextpos for the next location a match could possibly start.</summary>
/// <param name="inputSpan">The text being scanned by the regular expression.</param>
/// <returns>true if a possible match was found; false if no more matches are possible.</returns>
private bool TryFindNextPossibleStartingPosition(ReadOnlySpan<char> inputSpan)
{
int pos = base.runtextpos;
// Empty matches aren't possible.
if ((uint)pos < (uint)inputSpan.Length)
{
return true;
}
// No match found.
base.runtextpos = inputSpan.Length;
return false;
}
/// <summary>Determine whether <paramref name="inputSpan"/> at base.runtextpos is a match for the regular expression.</summary>
/// <param name="inputSpan">The text being scanned by the regular expression.</param>
/// <returns>true if the regular expression matches at the current position; otherwise, false.</returns>
private bool TryMatchAtCurrentPosition(ReadOnlySpan<char> inputSpan)
{
int pos = base.runtextpos;
int matchStart = pos;
char ch;
int charloop_starting_pos = 0, charloop_ending_pos = 0;
int charloop_starting_pos1 = 0, charloop_ending_pos1 = 0;
int stackpos = 0;
ulong charMinusLowUInt64;
ReadOnlySpan<char> slice = inputSpan.Slice(pos);
// Atomic group.
{
int atomic_stackpos = stackpos;
// Match with 8 alternative expressions, atomically.
//{
int alternation_starting_pos = pos;
// Branch 0
{
// Match '\''.
if (slice.IsEmpty || slice[0] != '\'')
{
goto AlternationBranch;
}
// Match with 4 alternative expressions, atomically.
{
int alternation_starting_pos1 = pos;
// Branch 0
{
// Match a character in the set [DMSTdmst].
if ((uint)slice.Length < 2 || ((long)((0x8041800080418000UL << (int)(charMinusLowUInt64 = (uint)slice[1] - 'D')) & (charMinusLowUInt64 - 64)) >= 0))
{
goto AlternationBranch1;
}
pos += 2;
slice = inputSpan.Slice(pos);
goto AlternationMatch1;
AlternationBranch1:
pos = alternation_starting_pos1;
slice = inputSpan.Slice(pos);
}
// Branch 1
{
// Match a character in the set [Ll] exactly 2 times.
{
if ((uint)slice.Length < 3 ||
((slice[1] | 0x20) != 'l') ||
((slice[2] | 0x20) != 'l'))
{
goto AlternationBranch2;
}
}
pos += 3;
slice = inputSpan.Slice(pos);
goto AlternationMatch1;
AlternationBranch2:
pos = alternation_starting_pos1;
slice = inputSpan.Slice(pos);
}
// Branch 2
{
if ((uint)slice.Length < 3 ||
!slice.Slice(1).StartsWith("ve", StringComparison.OrdinalIgnoreCase)) // Match the string "ve" (ordinal case-insensitive)
{
goto AlternationBranch3;
}
pos += 3;
slice = inputSpan.Slice(pos);
goto AlternationMatch1;
AlternationBranch3:
pos = alternation_starting_pos1;
slice = inputSpan.Slice(pos);
}
// Branch 3
{
if ((uint)slice.Length < 3 ||
!slice.Slice(1).StartsWith("re", StringComparison.OrdinalIgnoreCase)) // Match the string "re" (ordinal case-insensitive)
{
goto AlternationBranch;
}
pos += 3;
slice = inputSpan.Slice(pos);
}
AlternationMatch1:;
}
goto AlternationMatch;
AlternationBranch:
pos = alternation_starting_pos;
slice = inputSpan.Slice(pos);
}
// Branch 1
{
// Match a character in the set [^\n\r\p{L}\p{N}] atomically, optionally.
{
if (!slice.IsEmpty && ((ch = slice[0]) < 128 ? ("\udbff\uffff\uffffﰀ\u0001\u0001"[ch >> 4] & (1 << (ch & 0xF))) != 0 : RegexRunner.CharInClass((char)ch, "\u0001\u0004\f\n\v\r\u000e\0\u0002\u0004\u0005\u0003\u0001\0\0\t\n\v\0")))
{
slice = slice.Slice(1);
pos++;
}
}
// Match a character in the set [\p{L}] atomically at least once.
{
int iteration = 0;
while ((uint)iteration < (uint)slice.Length && char.IsLetter(slice[iteration]))
{
iteration++;
}
if (iteration == 0)
{
goto AlternationBranch4;
}
slice = slice.Slice(iteration);
pos += iteration;
}
goto AlternationMatch;
AlternationBranch4:
pos = alternation_starting_pos;
slice = inputSpan.Slice(pos);
}
// Branch 2
{
// Match a character in the set [\p{N}] atomically at least 1 and at most 3 times.
{
int iteration1 = 0;
while (iteration1 < 3 && (uint)iteration1 < (uint)slice.Length && char.IsNumber(slice[iteration1]))
{
iteration1++;
}
if (iteration1 == 0)
{
goto AlternationBranch5;
}
slice = slice.Slice(iteration1);
pos += iteration1;
}
goto AlternationMatch;
AlternationBranch5:
pos = alternation_starting_pos;
slice = inputSpan.Slice(pos);
}
// Branch 3
{
// Match ' ' atomically, optionally.
{
if (!slice.IsEmpty && slice[0] == ' ')
{
slice = slice.Slice(1);
pos++;
}
}
// Match a character in the set [^\s\p{L}\p{N}] atomically at least once.
{
int iteration2 = 0;
while ((uint)iteration2 < (uint)slice.Length && ((ch = slice[iteration2]) < 128 ? ("쇿\uffff\ufffeﰀ\u0001\u0001"[ch >> 4] & (1 << (ch & 0xF))) != 0 : RegexRunner.CharInClass((char)ch, "\u0001\0\rd\0\u0002\u0004\u0005\u0003\u0001\0\0\t\n\v\0")))
{
iteration2++;
}
if (iteration2 == 0)
{
goto AlternationBranch6;
}
slice = slice.Slice(iteration2);
pos += iteration2;
}
// Match a character in the set [\n\r] atomically any number of times.
{
int iteration3 = slice.IndexOfAnyExcept('\n', '\r');
if (iteration3 < 0)
{
iteration3 = slice.Length;
}
slice = slice.Slice(iteration3);
pos += iteration3;
}
goto AlternationMatch;
AlternationBranch6:
pos = alternation_starting_pos;
slice = inputSpan.Slice(pos);
}
// Branch 4
{
// Match a whitespace character atomically at least once.
{
int iteration4 = 0;
while ((uint)iteration4 < (uint)slice.Length && char.IsWhiteSpace(slice[iteration4]))
{
iteration4++;
}
if (iteration4 == 0)
{
goto AlternationBranch7;
}
slice = slice.Slice(iteration4);
pos += iteration4;
}
// Match if at the end of the string or if before an ending newline.
if (pos < inputSpan.Length - 1 || ((uint)pos < (uint)inputSpan.Length && inputSpan[pos] != '\n'))
{
goto AlternationBranch7;
}
goto AlternationMatch;
AlternationBranch7:
pos = alternation_starting_pos;
slice = inputSpan.Slice(pos);
}
// Branch 5
{
// Match a whitespace character greedily any number of times.
//{
charloop_starting_pos = pos;
int iteration5 = 0;
while ((uint)iteration5 < (uint)slice.Length && char.IsWhiteSpace(slice[iteration5]))
{
iteration5++;
}
slice = slice.Slice(iteration5);
pos += iteration5;
charloop_ending_pos = pos;
goto CharLoopEnd;
CharLoopBacktrack:
base.CheckTimeout();
if (charloop_starting_pos >= charloop_ending_pos ||
(charloop_ending_pos = inputSpan.Slice(charloop_starting_pos, charloop_ending_pos - charloop_starting_pos).LastIndexOfAny('\n', '\r')) < 0)
{
goto AlternationBranch8;
}
charloop_ending_pos += charloop_starting_pos;
pos = charloop_ending_pos;
slice = inputSpan.Slice(pos);
CharLoopEnd:
//}
// Match a character in the set [\n\r].
if (slice.IsEmpty || (((ch = slice[0]) != '\n') & (ch != '\r')))
{
goto CharLoopBacktrack;
}
pos++;
slice = inputSpan.Slice(pos);
goto AlternationMatch;
AlternationBranch8:
pos = alternation_starting_pos;
slice = inputSpan.Slice(pos);
}
// Branch 6
{
// Match a whitespace character greedily at least once.
//{
charloop_starting_pos1 = pos;
int iteration6 = 0;
while ((uint)iteration6 < (uint)slice.Length && char.IsWhiteSpace(slice[iteration6]))
{
iteration6++;
}
if (iteration6 == 0)
{
goto AlternationBranch9;
}
slice = slice.Slice(iteration6);
pos += iteration6;
charloop_ending_pos1 = pos;
charloop_starting_pos1++;
goto CharLoopEnd1;
CharLoopBacktrack1:
base.CheckTimeout();
if (charloop_starting_pos1 >= charloop_ending_pos1)
{
goto AlternationBranch9;
}
pos = --charloop_ending_pos1;
slice = inputSpan.Slice(pos);
CharLoopEnd1:
//}
// Zero-width negative lookahead.
{
int negativelookahead_starting_pos = pos;
base.CheckTimeout();
// Match any character other than a whitespace character.
if (slice.IsEmpty || char.IsWhiteSpace(slice[0]))
{
goto NegativeLookaroundMatch;
}
goto CharLoopBacktrack1;
NegativeLookaroundMatch:
pos = negativelookahead_starting_pos;
slice = inputSpan.Slice(pos);
}
goto AlternationMatch;
AlternationBranch9:
pos = alternation_starting_pos;
slice = inputSpan.Slice(pos);
}
// Branch 7
{
// Match a whitespace character.
if (slice.IsEmpty || !char.IsWhiteSpace(slice[0]))
{
return false; // The input didn't match.
}
pos++;
slice = inputSpan.Slice(pos);
}
AlternationMatch:;
//}
stackpos = atomic_stackpos;
}
// The input matched.
base.runtextpos = pos;
base.Capture(0, matchStart, pos);
return true;
}
}
}
}
/// <summary>Custom <see cref="Regex"/>-derived type for the P50kBaseRegex method.</summary>
[GeneratedCodeAttribute("System.Text.RegularExpressions.Generator", "8.0.12.41914")]
[SkipLocalsInit]
file sealed class P50kBaseRegex_1 : Regex
{
/// <summary>Cached, thread-safe singleton instance.</summary>
internal static readonly P50kBaseRegex_1 Instance = new();
/// <summary>Initializes the instance.</summary>
private P50kBaseRegex_1()
{
base.pattern = "'(?:[sdmt]|ll|ve|re)| ?(?>\\p{L}+)| ?(?>\\p{N}+)| ?(?>[^\\s\\p{L}\\p{N}]+)|(?>\\s+)$|\\s+(?!\\S)|\\s";
base.roptions = RegexOptions.None;
base.internalMatchTimeout = TimeSpan.FromMilliseconds(30000);
base.factory = new RunnerFactory();
base.capsize = 1;
}
/// <summary>Provides a factory for creating <see cref="RegexRunner"/> instances to be used by methods on <see cref="Regex"/>.</summary>
private sealed class RunnerFactory : RegexRunnerFactory
{
/// <summary>Creates an instance of a <see cref="RegexRunner"/> used by methods on <see cref="Regex"/>.</summary>
protected override RegexRunner CreateInstance() => new Runner();
/// <summary>Provides the runner that contains the custom logic implementing the specified regular expression.</summary>
private sealed class Runner : RegexRunner
{
/// <summary>Scan the <paramref name="inputSpan"/> starting from base.runtextstart for the next match.</summary>
/// <param name="inputSpan">The text being scanned by the regular expression.</param>
protected override void Scan(ReadOnlySpan<char> inputSpan)
{
// Search until we can't find a valid starting position, we find a match, or we reach the end of the input.
while (TryFindNextPossibleStartingPosition(inputSpan) &&
!TryMatchAtCurrentPosition(inputSpan) &&
base.runtextpos != inputSpan.Length)
{
base.runtextpos++;
base.CheckTimeout();
}
}
/// <summary>Search <paramref name="inputSpan"/> starting from base.runtextpos for the next location a match could possibly start.</summary>
/// <param name="inputSpan">The text being scanned by the regular expression.</param>
/// <returns>true if a possible match was found; false if no more matches are possible.</returns>
private bool TryFindNextPossibleStartingPosition(ReadOnlySpan<char> inputSpan)
{
int pos = base.runtextpos;
// Empty matches aren't possible.
if ((uint)pos < (uint)inputSpan.Length)
{
return true;
}
// No match found.
base.runtextpos = inputSpan.Length;
return false;
}
/// <summary>Determine whether <paramref name="inputSpan"/> at base.runtextpos is a match for the regular expression.</summary>
/// <param name="inputSpan">The text being scanned by the regular expression.</param>
/// <returns>true if the regular expression matches at the current position; otherwise, false.</returns>
private bool TryMatchAtCurrentPosition(ReadOnlySpan<char> inputSpan)
{
int pos = base.runtextpos;
int matchStart = pos;
char ch;
int charloop_starting_pos = 0, charloop_ending_pos = 0;
int stackpos = 0;
ReadOnlySpan<char> slice = inputSpan.Slice(pos);
// Atomic group.
{
int atomic_stackpos = stackpos;
// Match with 7 alternative expressions, atomically.
//{
int alternation_starting_pos = pos;
// Branch 0
{
// Match '\''.
if (slice.IsEmpty || slice[0] != '\'')
{
goto AlternationBranch;
}
// Match with 4 alternative expressions, atomically.
{
if ((uint)slice.Length < 2)
{
goto AlternationBranch;
}
switch (slice[1])
{
case 'd' or 'm' or 's' or 't':
pos += 2;
slice = inputSpan.Slice(pos);
break;
case 'l':
// Match 'l'.
if ((uint)slice.Length < 3 || slice[2] != 'l')
{
goto AlternationBranch;
}
pos += 3;
slice = inputSpan.Slice(pos);
break;
case 'v':
// Match 'e'.
if ((uint)slice.Length < 3 || slice[2] != 'e')
{
goto AlternationBranch;
}
pos += 3;
slice = inputSpan.Slice(pos);
break;
case 'r':
// Match 'e'.
if ((uint)slice.Length < 3 || slice[2] != 'e')
{
goto AlternationBranch;
}
pos += 3;
slice = inputSpan.Slice(pos);
break;
default:
goto AlternationBranch;
}
}
goto AlternationMatch;
AlternationBranch:
pos = alternation_starting_pos;
slice = inputSpan.Slice(pos);
}
// Branch 1
{
// Match ' ' atomically, optionally.
{
if (!slice.IsEmpty && slice[0] == ' ')
{
slice = slice.Slice(1);
pos++;
}
}
// Match a character in the set [\p{L}] atomically at least once.
{
int iteration = 0;
while ((uint)iteration < (uint)slice.Length && char.IsLetter(slice[iteration]))
{
iteration++;
}
if (iteration == 0)
{
goto AlternationBranch1;
}
slice = slice.Slice(iteration);
pos += iteration;
}
goto AlternationMatch;
AlternationBranch1:
pos = alternation_starting_pos;
slice = inputSpan.Slice(pos);
}
// Branch 2
{
// Match ' ' atomically, optionally.
{
if (!slice.IsEmpty && slice[0] == ' ')
{
slice = slice.Slice(1);
pos++;
}
}
// Match a character in the set [\p{N}] atomically at least once.
{
int iteration1 = 0;
while ((uint)iteration1 < (uint)slice.Length && char.IsNumber(slice[iteration1]))
{
iteration1++;
}
if (iteration1 == 0)
{
goto AlternationBranch2;
}
slice = slice.Slice(iteration1);
pos += iteration1;
}
goto AlternationMatch;
AlternationBranch2:
pos = alternation_starting_pos;
slice = inputSpan.Slice(pos);
}
// Branch 3
{
// Match ' ' atomically, optionally.
{
if (!slice.IsEmpty && slice[0] == ' ')
{
slice = slice.Slice(1);
pos++;
}
}
// Match a character in the set [^\s\p{L}\p{N}] atomically at least once.
{
int iteration2 = 0;
while ((uint)iteration2 < (uint)slice.Length && ((ch = slice[iteration2]) < 128 ? ("쇿\uffff\ufffeﰀ\u0001\u0001"[ch >> 4] & (1 << (ch & 0xF))) != 0 : RegexRunner.CharInClass((char)ch, "\u0001\0\rd\0\u0002\u0004\u0005\u0003\u0001\0\0\t\n\v\0")))
{
iteration2++;
}
if (iteration2 == 0)
{
goto AlternationBranch3;
}
slice = slice.Slice(iteration2);
pos += iteration2;
}
goto AlternationMatch;
AlternationBranch3:
pos = alternation_starting_pos;
slice = inputSpan.Slice(pos);
}
// Branch 4
{
// Match a whitespace character atomically at least once.
{
int iteration3 = 0;
while ((uint)iteration3 < (uint)slice.Length && char.IsWhiteSpace(slice[iteration3]))
{
iteration3++;
}
if (iteration3 == 0)
{
goto AlternationBranch4;
}
slice = slice.Slice(iteration3);
pos += iteration3;
}
// Match if at the end of the string or if before an ending newline.
if (pos < inputSpan.Length - 1 || ((uint)pos < (uint)inputSpan.Length && inputSpan[pos] != '\n'))
{
goto AlternationBranch4;
}
goto AlternationMatch;
AlternationBranch4:
pos = alternation_starting_pos;
slice = inputSpan.Slice(pos);
}
// Branch 5
{
// Match a whitespace character greedily at least once.
//{
charloop_starting_pos = pos;
int iteration4 = 0;
while ((uint)iteration4 < (uint)slice.Length && char.IsWhiteSpace(slice[iteration4]))
{
iteration4++;
}
if (iteration4 == 0)
{
goto AlternationBranch5;
}
slice = slice.Slice(iteration4);
pos += iteration4;
charloop_ending_pos = pos;
charloop_starting_pos++;
goto CharLoopEnd;
CharLoopBacktrack:
base.CheckTimeout();
if (charloop_starting_pos >= charloop_ending_pos)
{
goto AlternationBranch5;
}
pos = --charloop_ending_pos;
slice = inputSpan.Slice(pos);
CharLoopEnd:
//}
// Zero-width negative lookahead.
{
int negativelookahead_starting_pos = pos;
base.CheckTimeout();
// Match any character other than a whitespace character.
if (slice.IsEmpty || char.IsWhiteSpace(slice[0]))
{
goto NegativeLookaroundMatch;
}
goto CharLoopBacktrack;
NegativeLookaroundMatch:
pos = negativelookahead_starting_pos;
slice = inputSpan.Slice(pos);
}
goto AlternationMatch;
AlternationBranch5:
pos = alternation_starting_pos;
slice = inputSpan.Slice(pos);
}
// Branch 6
{
// Match a whitespace character.
if (slice.IsEmpty || !char.IsWhiteSpace(slice[0]))
{
return false; // The input didn't match.
}
pos++;
slice = inputSpan.Slice(pos);
}
AlternationMatch:;
//}
stackpos = atomic_stackpos;
}
// The input matched.
base.runtextpos = pos;
base.Capture(0, matchStart, pos);
return true;
}
}
}
}
/// <summary>Custom <see cref="Regex"/>-derived type for the O200kBaseRegex method.</summary>
[GeneratedCodeAttribute("System.Text.RegularExpressions.Generator", "8.0.12.41914")]
[SkipLocalsInit]
file sealed class O200kBaseRegex_2 : Regex
{
/// <summary>Cached, thread-safe singleton instance.</summary>
internal static readonly O200kBaseRegex_2 Instance = new();
/// <summary>Initializes the instance.</summary>
private O200kBaseRegex_2()
{
base.pattern = "[^\\r\\n\\p{L}\\p{N}]?[\\p{Lu}\\p{Lt}\\p{Lm}\\p{Lo}\\p{M}]*[\\p{Ll}\\p{Lm}\\p{Lo}\\p{M}]+(?i:'s|'t|'re|'ve|'m|'ll|'d)?|[^\\r\\n\\p{L}\\p{N}]?[\\p{Lu}\\p{Lt}\\p{Lm}\\p{Lo}\\p{M}]+[\\p{Ll}\\p{Lm}\\p{Lo}\\p{M}]*(?i:'s|'t|'re|'ve|'m|'ll|'d)?|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n/]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+";
base.roptions = RegexOptions.None;
base.internalMatchTimeout = TimeSpan.FromMilliseconds(30000);
base.factory = new RunnerFactory();
base.capsize = 1;
}
/// <summary>Provides a factory for creating <see cref="RegexRunner"/> instances to be used by methods on <see cref="Regex"/>.</summary>
private sealed class RunnerFactory : RegexRunnerFactory
{
/// <summary>Creates an instance of a <see cref="RegexRunner"/> used by methods on <see cref="Regex"/>.</summary>
protected override RegexRunner CreateInstance() => new Runner();
/// <summary>Provides the runner that contains the custom logic implementing the specified regular expression.</summary>
private sealed class Runner : RegexRunner
{
/// <summary>Scan the <paramref name="inputSpan"/> starting from base.runtextstart for the next match.</summary>
/// <param name="inputSpan">The text being scanned by the regular expression.</param>
protected override void Scan(ReadOnlySpan<char> inputSpan)
{
// Search until we can't find a valid starting position, we find a match, or we reach the end of the input.
while (TryFindNextPossibleStartingPosition(inputSpan) &&
!TryMatchAtCurrentPosition(inputSpan) &&
base.runtextpos != inputSpan.Length)
{
base.runtextpos++;
base.CheckTimeout();
}
}
/// <summary>Search <paramref name="inputSpan"/> starting from base.runtextpos for the next location a match could possibly start.</summary>
/// <param name="inputSpan">The text being scanned by the regular expression.</param>
/// <returns>true if a possible match was found; false if no more matches are possible.</returns>
private bool TryFindNextPossibleStartingPosition(ReadOnlySpan<char> inputSpan)
{
int pos = base.runtextpos;
// Empty matches aren't possible.
if ((uint)pos < (uint)inputSpan.Length)
{
return true;
}
// No match found.
base.runtextpos = inputSpan.Length;
return false;
}
/// <summary>Determine whether <paramref name="inputSpan"/> at base.runtextpos is a match for the regular expression.</summary>
/// <param name="inputSpan">The text being scanned by the regular expression.</param>
/// <returns>true if the regular expression matches at the current position; otherwise, false.</returns>
private bool TryMatchAtCurrentPosition(ReadOnlySpan<char> inputSpan)
{
int pos = base.runtextpos;
int matchStart = pos;
char ch;
int charloop_starting_pos = 0, charloop_ending_pos = 0;
int charloop_starting_pos1 = 0, charloop_ending_pos1 = 0;
int charloop_starting_pos2 = 0, charloop_ending_pos2 = 0;
int charloop_starting_pos3 = 0, charloop_ending_pos3 = 0;
int charloop_starting_pos4 = 0, charloop_ending_pos4 = 0;
int charloop_starting_pos5 = 0, charloop_ending_pos5 = 0;
int charloop_starting_pos6 = 0, charloop_ending_pos6 = 0;
int charloop_starting_pos7 = 0, charloop_ending_pos7 = 0;
int charloop_starting_pos8 = 0, charloop_ending_pos8 = 0;
int loop_iteration = 0;
int loop_iteration1 = 0;
int stackpos = 0;
int startingStackpos = 0;
int startingStackpos1 = 0;
ReadOnlySpan<char> slice = inputSpan.Slice(pos);
// Atomic group.
{
int atomic_stackpos = stackpos;
// Match with 7 alternative expressions, atomically.
//{
int alternation_starting_pos = pos;
// Branch 0
{
// Match a character in the set [^\n\r\p{L}\p{N}] greedily, optionally.
//{
charloop_starting_pos = pos;
if (!slice.IsEmpty && ((ch = slice[0]) < 128 ? ("\udbff\uffff\uffffﰀ\u0001\u0001"[ch >> 4] & (1 << (ch & 0xF))) != 0 : RegexRunner.CharInClass((char)ch, "\u0001\u0004\f\n\v\r\u000e\0\u0002\u0004\u0005\u0003\u0001\0\0\t\n\v\0")))
{
slice = slice.Slice(1);
pos++;
}
charloop_ending_pos = pos;
goto CharLoopEnd;
CharLoopBacktrack:
base.CheckTimeout();
if (charloop_starting_pos >= charloop_ending_pos)
{
goto AlternationBranch;
}
pos = --charloop_ending_pos;
slice = inputSpan.Slice(pos);
CharLoopEnd:
//}
// Match a character in the set [\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}] greedily any number of times.
//{
charloop_starting_pos1 = pos;
int iteration = 0;
while ((uint)iteration < (uint)slice.Length && ((0xFD & (1 << (int)char.GetUnicodeCategory(slice[iteration]))) != 0))
{
iteration++;
}
slice = slice.Slice(iteration);
pos += iteration;
charloop_ending_pos1 = pos;
goto CharLoopEnd1;
CharLoopBacktrack1:
base.CheckTimeout();
if (charloop_starting_pos1 >= charloop_ending_pos1)
{
goto CharLoopBacktrack;
}
pos = --charloop_ending_pos1;
slice = inputSpan.Slice(pos);
CharLoopEnd1:
//}
// Match a character in the set [\p{Ll}\p{Lm}\p{Lo}\p{M}] greedily at least once.
//{
charloop_starting_pos2 = pos;
int iteration1 = 0;
while ((uint)iteration1 < (uint)slice.Length && ((0xFA & (1 << (int)char.GetUnicodeCategory(slice[iteration1]))) != 0))
{
iteration1++;
}
if (iteration1 == 0)
{
goto CharLoopBacktrack1;
}
slice = slice.Slice(iteration1);
pos += iteration1;
charloop_ending_pos2 = pos;
charloop_starting_pos2++;
goto CharLoopEnd2;
CharLoopBacktrack2:
base.CheckTimeout();
if (charloop_starting_pos2 >= charloop_ending_pos2)
{
goto CharLoopBacktrack1;
}
pos = --charloop_ending_pos2;
slice = inputSpan.Slice(pos);
CharLoopEnd2:
//}
// Optional (greedy).
{
startingStackpos = stackpos;
loop_iteration = 0;
LoopBody:
Utilities.StackPush(ref base.runstack!, ref stackpos, pos);
loop_iteration++;
// Match '\''.
if (slice.IsEmpty || slice[0] != '\'')
{
goto LoopIterationNoMatch;
}
// Match with 6 alternative expressions, atomically.
{
int alternation_starting_pos1 = pos;
// Branch 0
{
// Match a character in the set [STst].
if ((uint)slice.Length < 2 || ((uint)((slice[1] | 0x20) - 's') > (uint)('t' - 's')))
{
goto AlternationBranch1;
}
pos += 2;
slice = inputSpan.Slice(pos);
goto AlternationMatch1;
AlternationBranch1:
pos = alternation_starting_pos1;
slice = inputSpan.Slice(pos);
}
// Branch 1
{
if ((uint)slice.Length < 3 ||
!slice.Slice(1).StartsWith("re", StringComparison.OrdinalIgnoreCase)) // Match the string "re" (ordinal case-insensitive)
{
goto AlternationBranch2;
}
pos += 3;
slice = inputSpan.Slice(pos);
goto AlternationMatch1;
AlternationBranch2:
pos = alternation_starting_pos1;
slice = inputSpan.Slice(pos);
}
// Branch 2
{
if ((uint)slice.Length < 3 ||
!slice.Slice(1).StartsWith("ve", StringComparison.OrdinalIgnoreCase)) // Match the string "ve" (ordinal case-insensitive)
{
goto AlternationBranch3;
}
pos += 3;
slice = inputSpan.Slice(pos);
goto AlternationMatch1;
AlternationBranch3:
pos = alternation_starting_pos1;
slice = inputSpan.Slice(pos);
}
// Branch 3
{
// Match a character in the set [Mm].
if ((uint)slice.Length < 2 || ((slice[1] | 0x20) != 'm'))
{
goto AlternationBranch4;
}
pos += 2;
slice = inputSpan.Slice(pos);
goto AlternationMatch1;
AlternationBranch4:
pos = alternation_starting_pos1;
slice = inputSpan.Slice(pos);
}
// Branch 4
{
// Match a character in the set [Ll] exactly 2 times.
{
if ((uint)slice.Length < 3 ||
((slice[1] | 0x20) != 'l') ||
((slice[2] | 0x20) != 'l'))
{
goto AlternationBranch5;
}
}
pos += 3;
slice = inputSpan.Slice(pos);
goto AlternationMatch1;
AlternationBranch5:
pos = alternation_starting_pos1;
slice = inputSpan.Slice(pos);
}
// Branch 5
{
// Match a character in the set [Dd].
if ((uint)slice.Length < 2 || ((slice[1] | 0x20) != 'd'))
{
goto LoopIterationNoMatch;
}
pos += 2;
slice = inputSpan.Slice(pos);
}
AlternationMatch1:;
}
// The loop has an upper bound of 1. Continue iterating greedily if it hasn't yet been reached.
if (loop_iteration == 0)
{
goto LoopBody;
}
goto LoopEnd;
// The loop iteration failed. Put state back to the way it was before the iteration.
LoopIterationNoMatch:
if (--loop_iteration < 0)
{
// Unable to match the remainder of the expression after exhausting the loop.
goto CharLoopBacktrack2;
}
pos = base.runstack![--stackpos];
slice = inputSpan.Slice(pos);
LoopEnd:
stackpos = startingStackpos; // Ensure any remaining backtracking state is removed.
}
goto AlternationMatch;
AlternationBranch:
pos = alternation_starting_pos;
slice = inputSpan.Slice(pos);
}
// Branch 1
{
// Match a character in the set [^\n\r\p{L}\p{N}] greedily, optionally.
//{
charloop_starting_pos3 = pos;
if (!slice.IsEmpty && ((ch = slice[0]) < 128 ? ("\udbff\uffff\uffffﰀ\u0001\u0001"[ch >> 4] & (1 << (ch & 0xF))) != 0 : RegexRunner.CharInClass((char)ch, "\u0001\u0004\f\n\v\r\u000e\0\u0002\u0004\u0005\u0003\u0001\0\0\t\n\v\0")))
{
slice = slice.Slice(1);
pos++;
}
charloop_ending_pos3 = pos;
goto CharLoopEnd3;
CharLoopBacktrack3:
base.CheckTimeout();
if (charloop_starting_pos3 >= charloop_ending_pos3)
{
goto AlternationBranch6;
}
pos = --charloop_ending_pos3;
slice = inputSpan.Slice(pos);
CharLoopEnd3:
//}
// Match a character in the set [\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}] greedily at least once.
//{
charloop_starting_pos4 = pos;
int iteration2 = 0;
while ((uint)iteration2 < (uint)slice.Length && ((0xFD & (1 << (int)char.GetUnicodeCategory(slice[iteration2]))) != 0))
{
iteration2++;
}
if (iteration2 == 0)
{
goto CharLoopBacktrack3;
}
slice = slice.Slice(iteration2);
pos += iteration2;
charloop_ending_pos4 = pos;
charloop_starting_pos4++;
goto CharLoopEnd4;
CharLoopBacktrack4:
base.CheckTimeout();
if (charloop_starting_pos4 >= charloop_ending_pos4)
{
goto CharLoopBacktrack3;
}
pos = --charloop_ending_pos4;
slice = inputSpan.Slice(pos);
CharLoopEnd4:
//}
// Match a character in the set [\p{Ll}\p{Lm}\p{Lo}\p{M}] greedily any number of times.
//{
charloop_starting_pos5 = pos;
int iteration3 = 0;
while ((uint)iteration3 < (uint)slice.Length && ((0xFA & (1 << (int)char.GetUnicodeCategory(slice[iteration3]))) != 0))
{
iteration3++;
}
slice = slice.Slice(iteration3);
pos += iteration3;
charloop_ending_pos5 = pos;
goto CharLoopEnd5;
CharLoopBacktrack5:
base.CheckTimeout();
if (charloop_starting_pos5 >= charloop_ending_pos5)
{
goto CharLoopBacktrack4;
}
pos = --charloop_ending_pos5;
slice = inputSpan.Slice(pos);
CharLoopEnd5:
//}
// Optional (greedy).
{
startingStackpos1 = stackpos;
loop_iteration1 = 0;
LoopBody1:
Utilities.StackPush(ref base.runstack!, ref stackpos, pos);
loop_iteration1++;
// Match '\''.
if (slice.IsEmpty || slice[0] != '\'')
{
goto LoopIterationNoMatch1;
}
// Match with 6 alternative expressions, atomically.
{
int alternation_starting_pos2 = pos;
// Branch 0
{
// Match a character in the set [STst].
if ((uint)slice.Length < 2 || ((uint)((slice[1] | 0x20) - 's') > (uint)('t' - 's')))
{
goto AlternationBranch7;
}
pos += 2;
slice = inputSpan.Slice(pos);
goto AlternationMatch2;
AlternationBranch7:
pos = alternation_starting_pos2;
slice = inputSpan.Slice(pos);
}
// Branch 1
{
if ((uint)slice.Length < 3 ||
!slice.Slice(1).StartsWith("re", StringComparison.OrdinalIgnoreCase)) // Match the string "re" (ordinal case-insensitive)
{
goto AlternationBranch8;
}
pos += 3;
slice = inputSpan.Slice(pos);
goto AlternationMatch2;
AlternationBranch8:
pos = alternation_starting_pos2;
slice = inputSpan.Slice(pos);
}
// Branch 2
{
if ((uint)slice.Length < 3 ||
!slice.Slice(1).StartsWith("ve", StringComparison.OrdinalIgnoreCase)) // Match the string "ve" (ordinal case-insensitive)
{
goto AlternationBranch9;
}
pos += 3;
slice = inputSpan.Slice(pos);
goto AlternationMatch2;
AlternationBranch9:
pos = alternation_starting_pos2;
slice = inputSpan.Slice(pos);
}
// Branch 3
{
// Match a character in the set [Mm].
if ((uint)slice.Length < 2 || ((slice[1] | 0x20) != 'm'))
{
goto AlternationBranch10;
}
pos += 2;
slice = inputSpan.Slice(pos);
goto AlternationMatch2;
AlternationBranch10:
pos = alternation_starting_pos2;
slice = inputSpan.Slice(pos);
}
// Branch 4
{
// Match a character in the set [Ll] exactly 2 times.
{
if ((uint)slice.Length < 3 ||
((slice[1] | 0x20) != 'l') ||
((slice[2] | 0x20) != 'l'))
{
goto AlternationBranch11;
}
}
pos += 3;
slice = inputSpan.Slice(pos);
goto AlternationMatch2;
AlternationBranch11:
pos = alternation_starting_pos2;
slice = inputSpan.Slice(pos);
}
// Branch 5
{
// Match a character in the set [Dd].
if ((uint)slice.Length < 2 || ((slice[1] | 0x20) != 'd'))
{
goto LoopIterationNoMatch1;
}
pos += 2;
slice = inputSpan.Slice(pos);
}
AlternationMatch2:;
}
// The loop has an upper bound of 1. Continue iterating greedily if it hasn't yet been reached.
if (loop_iteration1 == 0)
{
goto LoopBody1;
}
goto LoopEnd1;
// The loop iteration failed. Put state back to the way it was before the iteration.
LoopIterationNoMatch1:
if (--loop_iteration1 < 0)
{
// Unable to match the remainder of the expression after exhausting the loop.
goto CharLoopBacktrack5;
}
pos = base.runstack![--stackpos];
slice = inputSpan.Slice(pos);
LoopEnd1:
stackpos = startingStackpos1; // Ensure any remaining backtracking state is removed.
}
goto AlternationMatch;
AlternationBranch6:
pos = alternation_starting_pos;
slice = inputSpan.Slice(pos);
}
// Branch 2
{
// Match a character in the set [\p{N}] atomically at least 1 and at most 3 times.
{
int iteration4 = 0;
while (iteration4 < 3 && (uint)iteration4 < (uint)slice.Length && char.IsNumber(slice[iteration4]))
{
iteration4++;
}
if (iteration4 == 0)
{
goto AlternationBranch12;
}
slice = slice.Slice(iteration4);
pos += iteration4;
}
goto AlternationMatch;
AlternationBranch12:
pos = alternation_starting_pos;
slice = inputSpan.Slice(pos);
}
// Branch 3
{
// Match ' ' atomically, optionally.
{
if (!slice.IsEmpty && slice[0] == ' ')
{
slice = slice.Slice(1);
pos++;
}
}
// Match a character in the set [^\s\p{L}\p{N}] greedily at least once.
//{
charloop_starting_pos6 = pos;
int iteration5 = 0;
while ((uint)iteration5 < (uint)slice.Length && ((ch = slice[iteration5]) < 128 ? ("쇿\uffff\ufffeﰀ\u0001\u0001"[ch >> 4] & (1 << (ch & 0xF))) != 0 : RegexRunner.CharInClass((char)ch, "\u0001\0\rd\0\u0002\u0004\u0005\u0003\u0001\0\0\t\n\v\0")))
{
iteration5++;
}
if (iteration5 == 0)
{
goto AlternationBranch13;
}
slice = slice.Slice(iteration5);
pos += iteration5;
charloop_ending_pos6 = pos;
charloop_starting_pos6++;
goto CharLoopEnd6;
CharLoopBacktrack6:
base.CheckTimeout();
if (charloop_starting_pos6 >= charloop_ending_pos6)
{
goto AlternationBranch13;
}
pos = --charloop_ending_pos6;
slice = inputSpan.Slice(pos);
CharLoopEnd6:
//}
// Match a character in the set [\n\r/] atomically any number of times.
{
int iteration6 = slice.IndexOfAnyExcept('\n', '\r', '/');
if (iteration6 < 0)
{
iteration6 = slice.Length;
}
slice = slice.Slice(iteration6);
pos += iteration6;
}
goto AlternationMatch;
AlternationBranch13:
pos = alternation_starting_pos;
slice = inputSpan.Slice(pos);
}
// Branch 4
{
// Match a whitespace character greedily any number of times.
//{
charloop_starting_pos7 = pos;
int iteration7 = 0;
while ((uint)iteration7 < (uint)slice.Length && char.IsWhiteSpace(slice[iteration7]))
{
iteration7++;
}
slice = slice.Slice(iteration7);
pos += iteration7;
charloop_ending_pos7 = pos;
goto CharLoopEnd7;
CharLoopBacktrack7:
base.CheckTimeout();
if (charloop_starting_pos7 >= charloop_ending_pos7 ||
(charloop_ending_pos7 = inputSpan.Slice(charloop_starting_pos7, charloop_ending_pos7 - charloop_starting_pos7).LastIndexOfAny('\n', '\r')) < 0)
{
goto AlternationBranch14;
}
charloop_ending_pos7 += charloop_starting_pos7;
pos = charloop_ending_pos7;
slice = inputSpan.Slice(pos);
CharLoopEnd7:
//}
// Match a character in the set [\n\r] atomically at least once.
{
int iteration8 = slice.IndexOfAnyExcept('\n', '\r');
if (iteration8 < 0)
{
iteration8 = slice.Length;
}
if (iteration8 == 0)
{
goto CharLoopBacktrack7;
}
slice = slice.Slice(iteration8);
pos += iteration8;
}
goto AlternationMatch;
AlternationBranch14:
pos = alternation_starting_pos;
slice = inputSpan.Slice(pos);
}
// Branch 5
{
// Match a whitespace character greedily at least once.
//{
charloop_starting_pos8 = pos;
int iteration9 = 0;
while ((uint)iteration9 < (uint)slice.Length && char.IsWhiteSpace(slice[iteration9]))
{
iteration9++;
}
if (iteration9 == 0)
{
goto AlternationBranch15;
}
slice = slice.Slice(iteration9);
pos += iteration9;
charloop_ending_pos8 = pos;
charloop_starting_pos8++;
goto CharLoopEnd8;
CharLoopBacktrack8:
base.CheckTimeout();
if (charloop_starting_pos8 >= charloop_ending_pos8)
{
goto AlternationBranch15;
}
pos = --charloop_ending_pos8;
slice = inputSpan.Slice(pos);
CharLoopEnd8:
//}
// Zero-width negative lookahead.
{
int negativelookahead_starting_pos = pos;
base.CheckTimeout();
// Match any character other than a whitespace character.
if (slice.IsEmpty || char.IsWhiteSpace(slice[0]))
{
goto NegativeLookaroundMatch;
}
goto CharLoopBacktrack8;
NegativeLookaroundMatch:
pos = negativelookahead_starting_pos;
slice = inputSpan.Slice(pos);
}
goto AlternationMatch;
AlternationBranch15:
pos = alternation_starting_pos;
slice = inputSpan.Slice(pos);
}
// Branch 6
{
// Match a whitespace character atomically at least once.
{
int iteration10 = 0;
while ((uint)iteration10 < (uint)slice.Length && char.IsWhiteSpace(slice[iteration10]))
{
iteration10++;
}
if (iteration10 == 0)
{
return false; // The input didn't match.
}
slice = slice.Slice(iteration10);
pos += iteration10;
}
}
AlternationMatch:;
//}
stackpos = atomic_stackpos;
}
// The input matched.
base.runtextpos = pos;
base.Capture(0, matchStart, pos);
return true;
}
}
}
}
/// <summary>Custom <see cref="Regex"/>-derived type for the WhiteSpaceOrPunctuationRegex method.</summary>
[GeneratedCodeAttribute("System.Text.RegularExpressions.Generator", "8.0.12.41914")]
[SkipLocalsInit]
file sealed class WhiteSpaceOrPunctuationRegex_3 : Regex
{
/// <summary>Cached, thread-safe singleton instance.</summary>
internal static readonly WhiteSpaceOrPunctuationRegex_3 Instance = new();
/// <summary>Initializes the instance.</summary>
private WhiteSpaceOrPunctuationRegex_3()
{
base.pattern = "\\w+|[\\p{P}]";
base.roptions = RegexOptions.None;
base.internalMatchTimeout = TimeSpan.FromMilliseconds(30000);
base.factory = new RunnerFactory();
base.capsize = 1;
}
/// <summary>Provides a factory for creating <see cref="RegexRunner"/> instances to be used by methods on <see cref="Regex"/>.</summary>
private sealed class RunnerFactory : RegexRunnerFactory
{
/// <summary>Creates an instance of a <see cref="RegexRunner"/> used by methods on <see cref="Regex"/>.</summary>
protected override RegexRunner CreateInstance() => new Runner();
/// <summary>Provides the runner that contains the custom logic implementing the specified regular expression.</summary>
private sealed class Runner : RegexRunner
{
/// <summary>Scan the <paramref name="inputSpan"/> starting from base.runtextstart for the next match.</summary>
/// <param name="inputSpan">The text being scanned by the regular expression.</param>
protected override void Scan(ReadOnlySpan<char> inputSpan)
{
// Search until we can't find a valid starting position, we find a match, or we reach the end of the input.
while (TryFindNextPossibleStartingPosition(inputSpan) &&
!TryMatchAtCurrentPosition(inputSpan) &&
base.runtextpos != inputSpan.Length)
{
base.runtextpos++;
base.CheckTimeout();
}
}
/// <summary>Search <paramref name="inputSpan"/> starting from base.runtextpos for the next location a match could possibly start.</summary>
/// <param name="inputSpan">The text being scanned by the regular expression.</param>
/// <returns>true if a possible match was found; false if no more matches are possible.</returns>
private bool TryFindNextPossibleStartingPosition(ReadOnlySpan<char> inputSpan)
{
int pos = base.runtextpos;
// Empty matches aren't possible.
if ((uint)pos < (uint)inputSpan.Length)
{
// The pattern begins with a character in the set [\w\p{P}].
// Find the next occurrence. If it can't be found, there's no match.
int i = inputSpan.Slice(pos).IndexOfNonAsciiOrAny_A2156A68B3FF2CEFCBABF7078C1AEC356AC590A34A9D31C18E0C21F77ECF6097();
if (i >= 0)
{
base.runtextpos = pos + i;
return true;
}
}
// No match found.
base.runtextpos = inputSpan.Length;
return false;
}
/// <summary>Determine whether <paramref name="inputSpan"/> at base.runtextpos is a match for the regular expression.</summary>
/// <param name="inputSpan">The text being scanned by the regular expression.</param>
/// <returns>true if the regular expression matches at the current position; otherwise, false.</returns>
private bool TryMatchAtCurrentPosition(ReadOnlySpan<char> inputSpan)
{
int pos = base.runtextpos;
int matchStart = pos;
ReadOnlySpan<char> slice = inputSpan.Slice(pos);
// Match with 2 alternative expressions, atomically.
{
int alternation_starting_pos = pos;
// Branch 0
{
// Match a word character atomically at least once.
{
int iteration = 0;
while ((uint)iteration < (uint)slice.Length && Utilities.IsWordChar(slice[iteration]))
{
iteration++;
}
if (iteration == 0)
{
goto AlternationBranch;
}
slice = slice.Slice(iteration);
pos += iteration;
}
goto AlternationMatch;
AlternationBranch:
pos = alternation_starting_pos;
slice = inputSpan.Slice(pos);
}
// Branch 1
{
// Match a character in the set [\p{P}].
if (slice.IsEmpty || !char.IsPunctuation(slice[0]))
{
return false; // The input didn't match.
}
pos++;
slice = inputSpan.Slice(pos);
}
AlternationMatch:;
}
// The input matched.
base.runtextpos = pos;
base.Capture(0, matchStart, pos);
return true;
}
}
}
}
/// <summary>Custom <see cref="Regex"/>-derived type for the WordOrNonWordRegex method.</summary>
[GeneratedCodeAttribute("System.Text.RegularExpressions.Generator", "8.0.12.41914")]
[SkipLocalsInit]
file sealed class WordOrNonWordRegex_4 : Regex
{
/// <summary>Cached, thread-safe singleton instance.</summary>
internal static readonly WordOrNonWordRegex_4 Instance = new();
/// <summary>Initializes the instance.</summary>
private WordOrNonWordRegex_4()
{
base.pattern = "\\w+|[^\\w\\s]+";
base.roptions = RegexOptions.None;
base.internalMatchTimeout = TimeSpan.FromMilliseconds(30000);
base.factory = new RunnerFactory();
base.capsize = 1;
}
/// <summary>Provides a factory for creating <see cref="RegexRunner"/> instances to be used by methods on <see cref="Regex"/>.</summary>
private sealed class RunnerFactory : RegexRunnerFactory
{
/// <summary>Creates an instance of a <see cref="RegexRunner"/> used by methods on <see cref="Regex"/>.</summary>
protected override RegexRunner CreateInstance() => new Runner();
/// <summary>Provides the runner that contains the custom logic implementing the specified regular expression.</summary>
private sealed class Runner : RegexRunner
{
/// <summary>Scan the <paramref name="inputSpan"/> starting from base.runtextstart for the next match.</summary>
/// <param name="inputSpan">The text being scanned by the regular expression.</param>
protected override void Scan(ReadOnlySpan<char> inputSpan)
{
// Search until we can't find a valid starting position, we find a match, or we reach the end of the input.
while (TryFindNextPossibleStartingPosition(inputSpan) &&
!TryMatchAtCurrentPosition(inputSpan) &&
base.runtextpos != inputSpan.Length)
{
base.runtextpos++;
base.CheckTimeout();
}
}
/// <summary>Search <paramref name="inputSpan"/> starting from base.runtextpos for the next location a match could possibly start.</summary>
/// <param name="inputSpan">The text being scanned by the regular expression.</param>
/// <returns>true if a possible match was found; false if no more matches are possible.</returns>
private bool TryFindNextPossibleStartingPosition(ReadOnlySpan<char> inputSpan)
{
int pos = base.runtextpos;
// Empty matches aren't possible.
if ((uint)pos < (uint)inputSpan.Length)
{
return true;
}
// No match found.
base.runtextpos = inputSpan.Length;
return false;
}
/// <summary>Determine whether <paramref name="inputSpan"/> at base.runtextpos is a match for the regular expression.</summary>
/// <param name="inputSpan">The text being scanned by the regular expression.</param>
/// <returns>true if the regular expression matches at the current position; otherwise, false.</returns>
private bool TryMatchAtCurrentPosition(ReadOnlySpan<char> inputSpan)
{
int pos = base.runtextpos;
int matchStart = pos;
char ch;
ReadOnlySpan<char> slice = inputSpan.Slice(pos);
// Match with 2 alternative expressions, atomically.
{
int alternation_starting_pos = pos;
// Branch 0
{
// Match a word character atomically at least once.
{
int iteration = 0;
while ((uint)iteration < (uint)slice.Length && Utilities.IsWordChar(slice[iteration]))
{
iteration++;
}
if (iteration == 0)
{
goto AlternationBranch;
}
slice = slice.Slice(iteration);
pos += iteration;
}
goto AlternationMatch;
AlternationBranch:
pos = alternation_starting_pos;
slice = inputSpan.Slice(pos);
}
// Branch 1
{
// Match a character in the set [^\w\s] atomically at least once.
{
int iteration1 = 0;
while ((uint)iteration1 < (uint)slice.Length && ((ch = slice[iteration1]) < 128 ? ("쇿\uffff\ufffeﰀ\u0001砀\u0001"[ch >> 4] & (1 << (ch & 0xF))) != 0 : RegexRunner.CharInClass((char)ch, "\u0001\0\v\0\u0002\u0004\u0005\u0003\u0001\u0006\t\u0013\0d")))
{
iteration1++;
}
if (iteration1 == 0)
{
return false; // The input didn't match.
}
slice = slice.Slice(iteration1);
pos += iteration1;
}
}
AlternationMatch:;
}
// The input matched.
base.runtextpos = pos;
base.Capture(0, matchStart, pos);
return true;
}
}
}
}
/// <summary>Custom <see cref="Regex"/>-derived type for the WhiteSpaceRegex method.</summary>
[GeneratedCodeAttribute("System.Text.RegularExpressions.Generator", "8.0.12.41914")]
[SkipLocalsInit]
file sealed class WhiteSpaceRegex_5 : Regex
{
/// <summary>Cached, thread-safe singleton instance.</summary>
internal static readonly WhiteSpaceRegex_5 Instance = new();
/// <summary>Initializes the instance.</summary>
private WhiteSpaceRegex_5()
{
base.pattern = "\\S+";
base.roptions = RegexOptions.None;
base.internalMatchTimeout = TimeSpan.FromMilliseconds(30000);
base.factory = new RunnerFactory();
base.capsize = 1;
}
/// <summary>Provides a factory for creating <see cref="RegexRunner"/> instances to be used by methods on <see cref="Regex"/>.</summary>
private sealed class RunnerFactory : RegexRunnerFactory
{
/// <summary>Creates an instance of a <see cref="RegexRunner"/> used by methods on <see cref="Regex"/>.</summary>
protected override RegexRunner CreateInstance() => new Runner();
/// <summary>Provides the runner that contains the custom logic implementing the specified regular expression.</summary>
private sealed class Runner : RegexRunner
{
/// <summary>Scan the <paramref name="inputSpan"/> starting from base.runtextstart for the next match.</summary>
/// <param name="inputSpan">The text being scanned by the regular expression.</param>
protected override void Scan(ReadOnlySpan<char> inputSpan)
{
// Search until we can't find a valid starting position, we find a match, or we reach the end of the input.
while (TryFindNextPossibleStartingPosition(inputSpan) &&
!TryMatchAtCurrentPosition(inputSpan) &&
base.runtextpos != inputSpan.Length)
{
base.runtextpos++;
base.CheckTimeout();
}
}
/// <summary>Search <paramref name="inputSpan"/> starting from base.runtextpos for the next location a match could possibly start.</summary>
/// <param name="inputSpan">The text being scanned by the regular expression.</param>
/// <returns>true if a possible match was found; false if no more matches are possible.</returns>
private bool TryFindNextPossibleStartingPosition(ReadOnlySpan<char> inputSpan)
{
int pos = base.runtextpos;
// Empty matches aren't possible.
if ((uint)pos < (uint)inputSpan.Length)
{
// The pattern begins with any character other than a whitespace character.
// Find the next occurrence. If it can't be found, there's no match.
int i = inputSpan.Slice(pos).IndexOfAnyExceptWhiteSpace();
if (i >= 0)
{
base.runtextpos = pos + i;
return true;
}
}
// No match found.
base.runtextpos = inputSpan.Length;
return false;
}
/// <summary>Determine whether <paramref name="inputSpan"/> at base.runtextpos is a match for the regular expression.</summary>
/// <param name="inputSpan">The text being scanned by the regular expression.</param>
/// <returns>true if the regular expression matches at the current position; otherwise, false.</returns>
private bool TryMatchAtCurrentPosition(ReadOnlySpan<char> inputSpan)
{
int pos = base.runtextpos;
int matchStart = pos;
ReadOnlySpan<char> slice = inputSpan.Slice(pos);
// Match any character other than a whitespace character atomically at least once.
{
int iteration = 0;
while ((uint)iteration < (uint)slice.Length && !char.IsWhiteSpace(slice[iteration]))
{
iteration++;
}
if (iteration == 0)
{
return false; // The input didn't match.
}
slice = slice.Slice(iteration);
pos += iteration;
}
// The input matched.
base.runtextpos = pos;
base.Capture(0, matchStart, pos);
return true;
}
}
}
}
/// <summary>Helper methods used by generated <see cref="Regex"/>-derived implementations.</summary>
[GeneratedCodeAttribute("System.Text.RegularExpressions.Generator", "8.0.12.41914")]
file static class Utilities
{
/// <summary>Finds the next index of any character that matches any character other than a whitespace character.</summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static int IndexOfAnyExceptWhiteSpace(this ReadOnlySpan<char> span)
{
int i = span.IndexOfAnyExcept(Utilities.s_asciiWhiteSpace);
if ((uint)i < (uint)span.Length)
{
if (char.IsAscii(span[i]))
{
return i;
}
do
{
if (!char.IsWhiteSpace(span[i]))
{
return i;
}
i++;
}
while ((uint)i < (uint)span.Length);
}
return -1;
}
/// <summary>Finds the next index of any character that matches a character in the set [\w\p{P}].</summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static int IndexOfNonAsciiOrAny_A2156A68B3FF2CEFCBABF7078C1AEC356AC590A34A9D31C18E0C21F77ECF6097(this ReadOnlySpan<char> span)
{
int i = span.IndexOfAnyExcept(Utilities.s_ascii_FFFFFFFF1108007000000040010000D0);
if ((uint)i < (uint)span.Length)
{
if (char.IsAscii(span[i]))
{
return i;
}
do
{
if (((0x1FC013F & (1 << (int)char.GetUnicodeCategory(span[i]))) != 0))
{
return i;
}
i++;
}
while ((uint)i < (uint)span.Length);
}
return -1;
}
/// <summary>Determines whether the character is part of the [\w] set.</summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static bool IsWordChar(char ch)
{
// Mask of Unicode categories that combine to form [\w]
const int WordCategoriesMask =
1 << (int)UnicodeCategory.UppercaseLetter |
1 << (int)UnicodeCategory.LowercaseLetter |
1 << (int)UnicodeCategory.TitlecaseLetter |
1 << (int)UnicodeCategory.ModifierLetter |
1 << (int)UnicodeCategory.OtherLetter |
1 << (int)UnicodeCategory.NonSpacingMark |
1 << (int)UnicodeCategory.DecimalDigitNumber |
1 << (int)UnicodeCategory.ConnectorPunctuation;
// Bitmap for whether each character 0 through 127 is in [\w]
ReadOnlySpan<byte> ascii = new byte[]
{
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0x03,
0xFE, 0xFF, 0xFF, 0x87, 0xFE, 0xFF, 0xFF, 0x07
};
// If the char is ASCII, look it up in the bitmap. Otherwise, query its Unicode category.
int chDiv8 = ch >> 3;
return (uint)chDiv8 < (uint)ascii.Length ?
(ascii[chDiv8] & (1 << (ch & 0x7))) != 0 :
(WordCategoriesMask & (1 << (int)CharUnicodeInfo.GetUnicodeCategory(ch))) != 0;
}
/// <summary>Pushes 1 value onto the backtracking stack.</summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static void StackPush(ref int[] stack, ref int pos, int arg0)
{
// If there's space available for the value, store it.
int[] s = stack;
int p = pos;
if ((uint)p < (uint)s.Length)
{
s[p] = arg0;
pos++;
return;
}
// Otherwise, resize the stack to make room and try again.
WithResize(ref stack, ref pos, arg0);
// <summary>Resize the backtracking stack array and push 1 value onto the stack.</summary>
[MethodImpl(MethodImplOptions.NoInlining)]
static void WithResize(ref int[] stack, ref int pos, int arg0)
{
Array.Resize(ref stack, (pos + 0) * 2);
StackPush(ref stack, ref pos, arg0);
}
}
/// <summary>Supports searching for characters in or not in "\t\n\v\f\r ".</summary>
internal static readonly SearchValues<char> s_asciiWhiteSpace = SearchValues.Create("\t\n\v\f\r ");
/// <summary>Supports searching for characters in or not in "\0\u0001\u0002\u0003\u0004\u0005\u0006\a\b\t\n\v\f\r\u000e\u000f\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001a\u001b\u001c\u001d\u001e\u001f $+<=>^`|~\u007f".</summary>
internal static readonly SearchValues<char> s_ascii_FFFFFFFF1108007000000040010000D0 = SearchValues.Create("\0\u0001\u0002\u0003\u0004\u0005\u0006\a\b\t\n\v\f\r\u000e\u000f\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001a\u001b\u001c\u001d\u001e\u001f $+<=>^`|~\u007f");
}
}
|