File: src\Workspaces\SharedUtilitiesAndExtensions\Compiler\Core\Utilities\StringBreaker.cs
Web Access
Project: src\src\Workspaces\Core\Portable\Microsoft.CodeAnalysis.Workspaces.csproj (Microsoft.CodeAnalysis.Workspaces)
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
 
using System.Diagnostics;
using Microsoft.CodeAnalysis.Shared.Collections;
using Microsoft.CodeAnalysis.Text;
 
namespace Microsoft.CodeAnalysis.Shared.Utilities;
 
internal static class StringBreaker
{
    /// <summary>
    /// Breaks an identifier string into constituent parts.
    /// </summary>
    public static void AddWordParts(string identifier, ref TemporaryArray<TextSpan> parts)
        => AddParts(identifier, word: true, ref parts);
 
    public static void AddCharacterParts(string identifier, ref TemporaryArray<TextSpan> parts)
        => AddParts(identifier, word: false, ref parts);
 
    public static void AddParts(string text, bool word, ref TemporaryArray<TextSpan> parts)
    {
        for (var start = 0; start < text.Length;)
        {
            var span = StringBreaker.GenerateSpan(text, start, word);
            if (span.IsEmpty)
            {
                // All done
                break;
            }
 
            Debug.Assert(span.Start >= start, "Bad generator.");
 
            parts.Add(span);
            start = span.End;
        }
    }
 
    public static TextSpan GenerateSpan(string identifier, int wordStart, bool word)
    {
        var length = identifier.Length;
        wordStart = SkipPunctuation(identifier, length, wordStart);
        if (wordStart < length)
        {
            var firstChar = identifier[wordStart];
            if (char.IsUpper(firstChar))
            {
                if (wordStart + 1 == length)
                {
                    return new TextSpan(wordStart, 1);
                }
 
                if (word)
                {
                    return ScanWordRun(identifier, length, wordStart);
                }
                else
                {
                    return ScanCharacterRun(identifier, length, wordStart);
                }
            }
            else if (IsLower(firstChar))
            {
                return ScanLowerCaseRun(identifier, length, wordStart);
            }
            else if (firstChar == '_')
            {
                return new TextSpan(wordStart, 1);
            }
            else if (char.IsDigit(firstChar))
            {
                return ScanNumber(identifier, length, wordStart);
            }
        }
 
        return default;
    }
 
    private static TextSpan ScanCharacterRun(string identifier, int length, int wordStart)
    {
        // In a character run, if we have XMLDocument, then we will break that up into
        // X, M, L, and Document.
        var current = wordStart + 1;
        Debug.Assert(current < length);
        var c = identifier[current];
 
        if (IsLower(c))
        {
            // "Do"
            // 
            // scan the lowercase letters from here on to scna out 'Document'.
            return ScanLowerCaseRun(identifier, length, wordStart);
        }
        else
        {
            return new TextSpan(wordStart, 1);
        }
    }
 
    private static TextSpan ScanWordRun(string identifier, int length, int wordStart)
    {
        // In a word run, if we have XMLDocument, then we will break that up into
        // XML and Document.
 
        var current = wordStart + 1;
        Debug.Assert(current < length);
        var c = identifier[current];
 
        if (char.IsUpper(c))
        {
            // "XM"
 
            current++;
 
            // scan all the upper case letters until we hit one followed by a lower
            // case letter.
            while (current < length && char.IsUpper(identifier[current]))
            {
                current++;
            }
 
            if (current < length && IsLower(identifier[current]))
            {
                // hit the 'o' in XMLDo.  Return "XML"
                Debug.Assert(char.IsUpper(identifier[current - 1]));
                var end = current - 1;
                return new TextSpan(wordStart, end - wordStart);
            }
            else
            {
                // Hit something else (punctuation, end of string, etc.)
                // return the entire upper-case section.
                return new TextSpan(wordStart, current - wordStart);
            }
        }
        else if (IsLower(c))
        {
            // "Do"
            // 
            // scan the lowercase letters from here on to scan out 'Document'.
            return ScanLowerCaseRun(identifier, length, wordStart);
        }
        else
        {
            return new TextSpan(wordStart, 1);
        }
    }
 
    private static TextSpan ScanLowerCaseRun(string identifier, int length, int wordStart)
    {
        var current = wordStart + 1;
        while (current < length && IsLower(identifier[current]))
        {
            current++;
        }
 
        return new TextSpan(wordStart, current - wordStart);
    }
 
    private static TextSpan ScanNumber(string identifier, int length, int wordStart)
    {
        var current = wordStart + 1;
        while (current < length && char.IsDigit(identifier[current]))
        {
            current++;
        }
 
        return TextSpan.FromBounds(wordStart, current);
    }
 
    private static int SkipPunctuation(string identifier, int length, int wordStart)
    {
        while (wordStart < length)
        {
            var ch = identifier[wordStart];
            if (ch != '_' && char.IsPunctuation(ch))
            {
                wordStart++;
                continue;
            }
 
            break;
        }
 
        return wordStart;
    }
 
    private static bool IsLower(char c)
    {
        if (IsAscii(c))
        {
            return c is >= 'a' and <= 'z';
        }
 
        return char.IsLower(c);
    }
 
    private static bool IsAscii(char v)
        => v < 0x80;
}