File: src\Workspaces\SharedUtilitiesAndExtensions\Compiler\Core\Utilities\StringBreaker.cs
Web Access
Project: src\src\Workspaces\Core\Portable\Microsoft.CodeAnalysis.Workspaces.csproj (Microsoft.CodeAnalysis.Workspaces)
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
using System.Diagnostics;
using Microsoft.CodeAnalysis.Shared.Collections;
using Microsoft.CodeAnalysis.Text;
namespace Microsoft.CodeAnalysis.Shared.Utilities;
internal static class StringBreaker
    /// <summary>
    /// Breaks an identifier string into constituent parts.
    /// </summary>
    public static void AddWordParts(string identifier, ref TemporaryArray<TextSpan> parts)
        => AddParts(identifier, word: true, ref parts);
    public static void AddCharacterParts(string identifier, ref TemporaryArray<TextSpan> parts)
        => AddParts(identifier, word: false, ref parts);
    public static void AddParts(string text, bool word, ref TemporaryArray<TextSpan> parts)
        for (var start = 0; start < text.Length;)
            var span = StringBreaker.GenerateSpan(text, start, word);
            if (span.IsEmpty)
                // All done
            Debug.Assert(span.Start >= start, "Bad generator.");
            start = span.End;
    public static TextSpan GenerateSpan(string identifier, int wordStart, bool word)
        var length = identifier.Length;
        wordStart = SkipPunctuation(identifier, length, wordStart);
        if (wordStart < length)
            var firstChar = identifier[wordStart];
            if (char.IsUpper(firstChar))
                if (wordStart + 1 == length)
                    return new TextSpan(wordStart, 1);
                if (word)
                    return ScanWordRun(identifier, length, wordStart);
                    return ScanCharacterRun(identifier, length, wordStart);
            else if (IsLower(firstChar))
                return ScanLowerCaseRun(identifier, length, wordStart);
            else if (firstChar == '_')
                return new TextSpan(wordStart, 1);
            else if (char.IsDigit(firstChar))
                return ScanNumber(identifier, length, wordStart);
        return default;
    private static TextSpan ScanCharacterRun(string identifier, int length, int wordStart)
        // In a character run, if we have XMLDocument, then we will break that up into
        // X, M, L, and Document.
        var current = wordStart + 1;
        Debug.Assert(current < length);
        var c = identifier[current];
        if (IsLower(c))
            // "Do"
            // scan the lowercase letters from here on to scna out 'Document'.
            return ScanLowerCaseRun(identifier, length, wordStart);
            return new TextSpan(wordStart, 1);
    private static TextSpan ScanWordRun(string identifier, int length, int wordStart)
        // In a word run, if we have XMLDocument, then we will break that up into
        // XML and Document.
        var current = wordStart + 1;
        Debug.Assert(current < length);
        var c = identifier[current];
        if (char.IsUpper(c))
            // "XM"
            // scan all the upper case letters until we hit one followed by a lower
            // case letter.
            while (current < length && char.IsUpper(identifier[current]))
            if (current < length && IsLower(identifier[current]))
                // hit the 'o' in XMLDo.  Return "XML"
                Debug.Assert(char.IsUpper(identifier[current - 1]));
                var end = current - 1;
                return new TextSpan(wordStart, end - wordStart);
                // Hit something else (punctuation, end of string, etc.)
                // return the entire upper-case section.
                return new TextSpan(wordStart, current - wordStart);
        else if (IsLower(c))
            // "Do"
            // scan the lowercase letters from here on to scan out 'Document'.
            return ScanLowerCaseRun(identifier, length, wordStart);
            return new TextSpan(wordStart, 1);
    private static TextSpan ScanLowerCaseRun(string identifier, int length, int wordStart)
        var current = wordStart + 1;
        while (current < length && IsLower(identifier[current]))
        return new TextSpan(wordStart, current - wordStart);
    private static TextSpan ScanNumber(string identifier, int length, int wordStart)
        var current = wordStart + 1;
        while (current < length && char.IsDigit(identifier[current]))
        return TextSpan.FromBounds(wordStart, current);
    private static int SkipPunctuation(string identifier, int length, int wordStart)
        while (wordStart < length)
            var ch = identifier[wordStart];
            if (ch != '_' && char.IsPunctuation(ch))
        return wordStart;
    private static bool IsLower(char c)
        if (IsAscii(c))
            return c is >= 'a' and <= 'z';
        return char.IsLower(c);
    private static bool IsAscii(char v)
        => v < 0x80;