// Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. using System.Diagnostics; using Microsoft.CodeAnalysis.Collections; using Microsoft.CodeAnalysis.Text; namespace Microsoft.CodeAnalysis.Shared.Utilities; internal static class StringBreaker { /// <summary> /// Breaks an identifier string into constituent parts. /// </summary> public static void AddWordParts(string identifier, ref TemporaryArray<TextSpan> parts) => AddParts(identifier, word: true, ref parts); public static void AddCharacterParts(string identifier, ref TemporaryArray<TextSpan> parts) => AddParts(identifier, word: false, ref parts); public static void AddParts(string text, bool word, ref TemporaryArray<TextSpan> parts) { for (var start = 0; start < text.Length;) { var span = StringBreaker.GenerateSpan(text, start, word); if (span.IsEmpty) { // All done break; } Debug.Assert(span.Start >= start, "Bad generator."); parts.Add(span); start = span.End; } } public static TextSpan GenerateSpan(string identifier, int wordStart, bool word) { var length = identifier.Length; wordStart = SkipPunctuation(identifier, length, wordStart); if (wordStart < length) { var firstChar = identifier[wordStart]; if (char.IsUpper(firstChar)) { if (wordStart + 1 == length) { return new TextSpan(wordStart, 1); } if (word) { return ScanWordRun(identifier, length, wordStart); } else { return ScanCharacterRun(identifier, length, wordStart); } } else if (IsLower(firstChar)) { return ScanLowerCaseRun(identifier, length, wordStart); } else if (firstChar == '_') { return new TextSpan(wordStart, 1); } else if (char.IsDigit(firstChar)) { return ScanNumber(identifier, length, wordStart); } } return default; } private static TextSpan ScanCharacterRun(string identifier, int length, int wordStart) { // In a character run, if we have XMLDocument, then we will break that up into // X, M, L, and Document. var current = wordStart + 1; Debug.Assert(current < length); var c = identifier[current]; if (IsLower(c)) { // "Do" // // scan the lowercase letters from here on to scna out 'Document'. return ScanLowerCaseRun(identifier, length, wordStart); } else { return new TextSpan(wordStart, 1); } } private static TextSpan ScanWordRun(string identifier, int length, int wordStart) { // In a word run, if we have XMLDocument, then we will break that up into // XML and Document. var current = wordStart + 1; Debug.Assert(current < length); var c = identifier[current]; if (char.IsUpper(c)) { // "XM" current++; // scan all the upper case letters until we hit one followed by a lower // case letter. while (current < length && char.IsUpper(identifier[current])) { current++; } if (current < length && IsLower(identifier[current])) { // hit the 'o' in XMLDo. Return "XML" Debug.Assert(char.IsUpper(identifier[current - 1])); var end = current - 1; return new TextSpan(wordStart, end - wordStart); } else { // Hit something else (punctuation, end of string, etc.) // return the entire upper-case section. return new TextSpan(wordStart, current - wordStart); } } else if (IsLower(c)) { // "Do" // // scan the lowercase letters from here on to scan out 'Document'. return ScanLowerCaseRun(identifier, length, wordStart); } else { return new TextSpan(wordStart, 1); } } private static TextSpan ScanLowerCaseRun(string identifier, int length, int wordStart) { var current = wordStart + 1; while (current < length && IsLower(identifier[current])) { current++; } return new TextSpan(wordStart, current - wordStart); } private static TextSpan ScanNumber(string identifier, int length, int wordStart) { var current = wordStart + 1; while (current < length && char.IsDigit(identifier[current])) { current++; } return TextSpan.FromBounds(wordStart, current); } private static int SkipPunctuation(string identifier, int length, int wordStart) { while (wordStart < length) { var ch = identifier[wordStart]; if (ch != '_' && char.IsPunctuation(ch)) { wordStart++; continue; } break; } return wordStart; } private static bool IsLower(char c) { if (IsAscii(c)) { return c is >= 'a' and <= 'z'; } return char.IsLower(c); } private static bool IsAscii(char v) => v < 0x80; } |