File: InternableString.Simple.cs
Web Access
Project: ..\..\..\src\StringTools\StringTools.csproj (Microsoft.NET.StringTools.net35)
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
 
using System;
using System.Text;
 
namespace System
{
    /// <summary>
    /// A bare minimum and inefficient version of MemoryExtensions as provided in System.Memory on .NET 4.5.
    /// </summary>
    public static class MemoryExtensions
    {
        public static string AsSpan<T>(this T[] array, int start, int length)
        {
            if (array is char[] charArray)
            {
                return new string(charArray, start, length);
            }
            throw new ArgumentException(nameof(array));
        }
    }
}
 
namespace Microsoft.NET.StringTools
{
    /// <summary>
    /// Represents a string that can be converted to System.String with interning, i.e. by returning an existing string if it has been seen before
    /// and is still tracked in the intern table.
    /// </summary>
    /// <remarks>
    /// This is a simple and inefficient implementation compatible with .NET Framework 3.5.
    /// </remarks>
    internal struct InternableString
    {
        /// <summary>
        /// Enumerator for the top-level struct. Enumerates characters of the string.
        /// </summary>
        public struct Enumerator
        {
            /// <summary>
            /// The InternableString being enumerated.
            /// </summary>
            private InternableString _string;
 
            /// <summary>
            /// Index of the current character, -1 if MoveNext has not been called yet.
            /// </summary>
            private int _charIndex;
 
            public Enumerator(InternableString spanBuilder)
            {
                _string = spanBuilder;
                _charIndex = -1;
            }
 
            /// <summary>
            /// Returns the current character.
            /// </summary>
            public readonly char Current => (_string._builder == null ? _string.FirstString[_charIndex] : _string._builder[_charIndex]);
 
            /// <summary>
            /// Moves to the next character.
            /// </summary>
            /// <returns>True if there is another character, false if the enumerator reached the end.</returns>
            public bool MoveNext()
            {
                int newIndex = _charIndex + 1;
                if (newIndex < _string.Length)
                {
                    _charIndex = newIndex;
                    return true;
                }
                return false;
            }
        }
 
        /// <summary>
        /// If this instance wraps a StringBuilder, it uses this backing field.
        /// </summary>
        private StringBuilder? _builder;
 
        /// <summary>
        /// If this instance represents one contiguous string, it may be held in this field.
        /// </summary>
        private string? _firstString;
 
        /// <summary>
        /// A convenience getter to ensure that we always operate on a non-null string.
        /// </summary>
        private readonly string FirstString => _firstString ?? string.Empty;
 
        /// <summary>
        /// Constructs a new InternableString wrapping the given string.
        /// </summary>
        /// <param name="str">The string to wrap, must be non-null.</param>
        internal InternableString(string str)
        {
            if (str == null)
            {
                throw new ArgumentNullException(nameof(str));
            }
            _builder = null;
            _firstString = str;
        }
 
        /// <summary>
        /// Constructs a new InternableString wrapping the given SpanBasedStringBuilder.
        /// </summary>
        internal InternableString(SpanBasedStringBuilder builder)
        {
            _builder = builder.Builder;
            _firstString = null;
        }
 
        /// <summary>
        /// Gets the length of the string.
        /// </summary>
        public readonly int Length => (_builder == null ? FirstString.Length : _builder.Length);
 
        /// <summary>
        /// Creates a new enumerator for enumerating characters in this string. Does not allocate.
        /// </summary>
        /// <returns>The enumerator.</returns>
        public readonly Enumerator GetEnumerator()
        {
            return new Enumerator(this);
        }
 
        /// <summary>
        /// Returns true if the string is equal to another string by ordinal comparison.
        /// </summary>
        /// <param name="other">Another string.</param>
        /// <returns>True if this string is equal to <paramref name="other"/>.</returns>
        public readonly bool Equals(string other)
        {
            if (other.Length != Length)
            {
                return false;
            }
 
            if (_firstString != null)
            {
                return _firstString.Equals(other);
            }
            if (_builder != null)
            {
                for (int i = 0; i < other.Length; i++)
                {
                    // Note: This indexing into the StringBuilder could be O(N). We prefer it over allocating
                    // a new string with ToString().
                    if (other[i] != _builder[i])
                    {
                        return false;
                    }
                }
            }
            return true;
        }
 
        /// <summary>
        /// Returns a System.String representing this string. Allocates memory unless this InternableString was created by wrapping a
        /// System.String in which case the original string is returned.
        /// </summary>
        /// <returns>The string.</returns>
        public readonly string ExpensiveConvertToString()
        {
            // Special case: if we hold just one string, we can directly return it.
            if (_firstString != null)
            {
                return _firstString;
            }
            return _builder?.ToString() ?? string.Empty;
        }
 
        /// <summary>
        /// Returns true if this InternableString wraps a System.String and the same System.String is passed as the argument.
        /// </summary>
        /// <param name="str">The string to compare to.</param>
        /// <returns>True is this instance wraps the given string.</returns>
        public readonly bool ReferenceEquals(string str)
        {
            return ReferenceEquals(str, _firstString);
        }
 
        /// <summary>
        /// Converts this instance to a System.String while first searching for a match in the intern table.
        /// </summary>
        /// <remarks>
        /// May allocate depending on whether the string has already been interned.
        /// </remarks>
        public override unsafe string ToString()
        {
            return WeakStringCacheInterner.Instance.InternableToString(ref this);
        }
 
        /// <summary>
        /// Implements the simple yet very decently performing djb2 hash function (xor version).
        /// </summary>
        /// <returns>A stable hashcode of the string represented by this instance.</returns>
        public override readonly int GetHashCode()
        {
            uint hash = (5381 << 16) + 5381;
            bool isOddIndex = false;
 
            if (_firstString != null)
            {
                foreach (char ch in _firstString)
                {
                    hash = HashOneCharacter(hash, ch, isOddIndex);
                    isOddIndex = !isOddIndex;
                }
            }
            else if (_builder != null)
            {
                for (int i = 0; i < _builder.Length; i++)
                {
                    hash = HashOneCharacter(hash, _builder[i], isOddIndex);
                    isOddIndex = !isOddIndex;
                }
            }
            return (int)hash;
        }
 
        /// <summary>
        /// A helper to hash one character.
        /// </summary>
        /// <param name="hash">The running hash code.</param>
        /// <param name="ch">The character to hash.</param>
        /// <param name="isOddIndex">True if the index of the character in the string is odd.</param>
        /// <returns></returns>
        private static uint HashOneCharacter(uint hash, char ch, bool isOddIndex)
        {
            if (isOddIndex)
            {
                // The hash code was rotated for the previous character, just xor.
                return hash ^ ((uint)ch << 16);
            }
 
            uint rotatedHash = (hash << 5) | (hash >> (32 - 5));
            return (rotatedHash + hash) ^ ch;
        }
    }
}