File: System\Text\RegularExpressions\MatchCollection.cs
Web Access
Project: src\src\libraries\System.Text.RegularExpressions\src\System.Text.RegularExpressions.csproj (System.Text.RegularExpressions)
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
 
using System.Collections;
using System.Collections.Generic;
using System.Diagnostics;
 
namespace System.Text.RegularExpressions
{
    /// <summary>
    /// Represents the set of successful matches found by iteratively applying a regular expression
    /// pattern to the input string. The collection is immutable (read-only) and has no public
    /// constructor. The <see cref="Regex.Matches(string)" /> method returns a
    /// <see cref="MatchCollection" /> object.
    /// </summary>
    /// <remarks>
    /// <para>
    /// The collection contains zero or more <see cref="Match" /> objects. If the match is successful,
    /// the collection is populated with one <see cref="Match" /> object for each match found in the
    /// input string. If the match is unsuccessful, the collection contains no <see cref="Match" />
    /// objects, and its <see cref="Count" /> property equals zero.
    /// </para>
    /// <para>
    /// When applying a regular expression pattern to a particular input string, the regular expression
    /// engine uses either of two techniques to build the <see cref="MatchCollection" /> object:
    /// </para>
    /// <list type="bullet">
    /// <item>
    /// <term>Direct evaluation</term>
    /// <description>
    /// The <see cref="MatchCollection" /> object is populated all at once,
    /// with all matches resulting from a particular call to the <see cref="Regex.Matches(string)" />
    /// method. This technique is used when the collection's <see cref="Count" /> property is accessed.
    /// It typically is the more expensive method of populating the collection and entails a greater
    /// performance hit.
    /// </description>
    /// </item>
    /// <item>
    /// <term>Lazy evaluation</term>
    /// <description>
    /// The <see cref="MatchCollection" /> object is populated as needed on a
    /// match-by-match basis. It is equivalent to the regular expression engine calling the
    /// <see cref="Regex.Match(string)" /> method repeatedly and adding each match to the collection.
    /// This technique is used when the collection is accessed through its <see cref="GetEnumerator" />
    /// method, or when it is accessed using the <c>foreach</c> statement.
    /// </description>
    /// </item>
    /// </list>
    /// <para>
    /// To iterate through the members of the collection, you should use the collection iteration
    /// construct provided by your language (such as <c>foreach</c> in C#) instead of retrieving the
    /// enumerator that is returned by the <see cref="GetEnumerator" /> method.
    /// </para>
    /// </remarks>
    [DebuggerDisplay("Count = {Count}")]
    [DebuggerTypeProxy(typeof(CollectionDebuggerProxy<Match>))]
    public class MatchCollection : IList<Match>, IReadOnlyList<Match>, IList
    {
        private readonly Regex _regex;
        private readonly List<Match> _matches;
        private readonly string _input;
        private int _startat;
        private int _prevlen;
        private bool _done;
 
        internal MatchCollection(Regex regex, string input, int startat)
        {
            if ((uint)startat > (uint)input.Length)
            {
                ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.startat, ExceptionResource.BeginIndexNotNegative);
            }
 
            _regex = regex;
            _input = input;
            _startat = startat;
            _prevlen = -1;
            _matches = new List<Match>();
            _done = false;
        }
 
        /// <summary>Gets a value that indicates whether the collection is read only.</summary>
        /// <value><see langword="true" /> in all cases.</value>
        public bool IsReadOnly => true;
 
        /// <summary>Gets the number of matches.</summary>
        /// <value>The number of matches.</value>
        /// <remarks>
        /// <para>
        /// Accessing the <see cref="Count" /> property causes the regular expression engine to populate
        /// the collection using direct evaluation. In contrast, calling the <see cref="GetEnumerator" />
        /// method (or using the <c>foreach</c> statement) causes the regular expression engine to
        /// populate the collection on an as-needed basis using lazy evaluation. Direct evaluation can be
        /// a much more expensive method of building the collection than lazy evaluation.
        /// </para>
        /// <para>
        /// Because the <see cref="MatchCollection" /> object is generally populated by using lazy
        /// evaluation, trying to determine the number of elements in the collection before it has been
        /// fully populated may throw a <see cref="RegexMatchTimeoutException" /> exception. This
        /// exception can be thrown if a time-out value for matching operations is in effect, and the
        /// attempt to find a single match exceeds that time-out interval.
        /// </para>
        /// </remarks>
        /// <exception cref="RegexMatchTimeoutException">A time-out occurred.</exception>
        public int Count
        {
            get
            {
                EnsureInitialized();
                return _matches.Count;
            }
        }
 
        /// <summary>Gets an individual member of the collection.</summary>
        /// <param name="i">Index into the <see cref="Match" /> collection.</param>
        /// <value>The captured substring at position <paramref name="i" /> in the collection.</value>
        /// <remarks>
        /// <para>
        /// In C#, the <see cref="this[int]" /> property is an indexer; it is not explicitly referenced
        /// in code, but instead allows the <see cref="MatchCollection" /> to be accessed as if it were
        /// an array.
        /// </para>
        /// <para>
        /// Typically, individual items in the <see cref="MatchCollection" /> are accessed by their index
        /// only after the total number of items in the collection has been determined from the
        /// <see cref="Count" /> property. However, accessing the <see cref="Count" /> property causes
        /// the regular expression engine to use direct evaluation to build the collection all at once.
        /// This is typically more expensive than iterating the collection using the
        /// <see cref="GetEnumerator" /> method or the <c>foreach</c> statement.
        /// </para>
        /// <para>
        /// Because the <see cref="MatchCollection" /> object is generally populated by using lazy
        /// evaluation, trying to navigate to a specific match may throw a
        /// <see cref="RegexMatchTimeoutException" /> exception. This exception can be thrown if a
        /// time-out value for matching operations is in effect, and the attempt to find a specific
        /// match exceeds that time-out interval.
        /// </para>
        /// </remarks>
        /// <exception cref="ArgumentOutOfRangeException">
        /// <paramref name="i" /> is less than 0 or greater than or equal to <see cref="Count" />.
        /// </exception>
        /// <exception cref="RegexMatchTimeoutException">A time-out occurred.</exception>
        public virtual Match this[int i]
        {
            get
            {
                Match? match = null;
                if (i < 0 || (match = GetMatch(i)) is null)
                {
                    ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.i);
                }
                return match;
            }
        }
 
        /// <summary>Provides an enumerator that iterates through the collection.</summary>
        /// <returns>
        /// An object that contains all <see cref="Match" /> objects within the
        /// <see cref="MatchCollection" />.
        /// </returns>
        /// <remarks>
        /// <para>
        /// Instead of calling the <see cref="GetEnumerator" /> method to retrieve an enumerator that
        /// lets you iterate through the <see cref="Match" /> objects in the collection, you should use
        /// the collection iteration construct provided by your programming language (such as
        /// <c>foreach</c> in C#).
        /// </para>
        /// <para>
        /// Iterating the members of the <see cref="MatchCollection" /> using the
        /// <see cref="GetEnumerator" /> method (or the <c>foreach</c> statement) causes the regular
        /// expression engine to populate the collection on an as-needed basis using lazy evaluation.
        /// In contrast, the regular expression engine uses direct evaluation to populate the collection
        /// all at once when the <see cref="Count" /> property is accessed. This can be a much more
        /// expensive method of building the collection than lazy evaluation.
        /// </para>
        /// <para>
        /// Because the <see cref="MatchCollection" /> object is generally populated by using lazy
        /// evaluation, trying to navigate to the next member of the collection may throw a
        /// <see cref="RegexMatchTimeoutException" /> exception. This exception can be thrown if a
        /// time-out value for matching operations is in effect, and the attempt to find the next match
        /// exceeds that time-out interval.
        /// </para>
        /// </remarks>
        /// <exception cref="RegexMatchTimeoutException">A time-out occurred.</exception>
        public IEnumerator GetEnumerator() => new Enumerator(this);
 
        IEnumerator<Match> IEnumerable<Match>.GetEnumerator() => new Enumerator(this);
 
        private Match? GetMatch(int i)
        {
            Debug.Assert(i >= 0, "i cannot be negative.");
 
            if (_matches.Count > i)
            {
                return _matches[i];
            }
 
            if (_done)
            {
                return null;
            }
 
            Match match;
            do
            {
                match = _regex.RunSingleMatch(RegexRunnerMode.FullMatchRequired, _prevlen, _input, 0, _input.Length, _startat)!;
                if (!match.Success)
                {
                    _done = true;
                    return null;
                }
 
                _matches.Add(match);
                _prevlen = match.Length;
                _startat = match._textpos;
            } while (_matches.Count <= i);
 
            return match;
        }
 
        private void EnsureInitialized()
        {
            if (!_done)
            {
                GetMatch(int.MaxValue);
            }
        }
 
        /// <summary>
        /// Gets a value indicating whether access to the collection is synchronized (thread-safe).
        /// </summary>
        /// <value><see langword="false" /> in all cases.</value>
        public bool IsSynchronized => false;
 
        /// <summary>Gets an object that can be used to synchronize access to the collection.</summary>
        /// <value>
        /// An object that can be used to synchronize access to the collection. This property always
        /// returns the object itself.
        /// </value>
        public object SyncRoot => this;
 
        /// <summary>
        /// Copies all the elements of the collection to the given array starting at the given index.
        /// </summary>
        /// <param name="array">The array the collection is to be copied into.</param>
        /// <param name="arrayIndex">The position in the array where copying is to begin.</param>
        /// <remarks>
        /// Because the <see cref="MatchCollection" /> object is generally populated by using lazy
        /// evaluation, trying to copy the collection before it has been fully populated may throw a
        /// <see cref="RegexMatchTimeoutException" /> exception. This exception can be thrown if a
        /// time-out value for matching operations is in effect, and the attempt to find a single match
        /// exceeds that time-out interval.
        /// </remarks>
        /// <exception cref="ArgumentException">
        /// <paramref name="array" /> is multi-dimensional.
        /// -or-
        /// The number of elements in the source <see cref="MatchCollection" /> is greater than the
        /// available space from <paramref name="arrayIndex" /> to the end of <paramref name="array" />.
        /// -or-
        /// The type of the source <see cref="MatchCollection" /> cannot be cast automatically to the
        /// type of the destination <paramref name="array" />.
        /// </exception>
        /// <exception cref="ArgumentOutOfRangeException">
        /// <paramref name="arrayIndex" /> is less than the lower bound of <paramref name="array" />.
        /// </exception>
        /// <exception cref="RegexMatchTimeoutException">A time-out occurred.</exception>
        public void CopyTo(Array array, int arrayIndex)
        {
            EnsureInitialized();
            ((ICollection)_matches).CopyTo(array, arrayIndex);
        }
 
        /// <summary>
        /// Copies the elements of the collection to an <see cref="Array" />, starting at a particular
        /// <see cref="Array" /> index.
        /// </summary>
        /// <param name="array">
        /// The one-dimensional <see cref="Array" /> that is the destination of the elements copied from
        /// the collection. The <see cref="Array" /> must have zero-based indexing.
        /// </param>
        /// <param name="arrayIndex">
        /// The zero-based index in <paramref name="array" /> at which copying begins.
        /// </param>
        public void CopyTo(Match[] array, int arrayIndex)
        {
            EnsureInitialized();
            _matches.CopyTo(array, arrayIndex);
        }
 
        int IList<Match>.IndexOf(Match item)
        {
            EnsureInitialized();
            return _matches.IndexOf(item);
        }
 
        void IList<Match>.Insert(int index, Match item) =>
            throw new NotSupportedException(SR.NotSupported_ReadOnlyCollection);
 
        void IList<Match>.RemoveAt(int index) =>
            throw new NotSupportedException(SR.NotSupported_ReadOnlyCollection);
 
        Match IList<Match>.this[int index]
        {
            get => this[index];
            set => throw new NotSupportedException(SR.NotSupported_ReadOnlyCollection);
        }
 
        void ICollection<Match>.Add(Match item) =>
            throw new NotSupportedException(SR.NotSupported_ReadOnlyCollection);
 
        void ICollection<Match>.Clear() =>
            throw new NotSupportedException(SR.NotSupported_ReadOnlyCollection);
 
        bool ICollection<Match>.Contains(Match item)
        {
            EnsureInitialized();
            return _matches.Contains(item);
        }
 
        bool ICollection<Match>.Remove(Match item) =>
            throw new NotSupportedException(SR.NotSupported_ReadOnlyCollection);
 
        int IList.Add(object? value) =>
            throw new NotSupportedException(SR.NotSupported_ReadOnlyCollection);
 
        void IList.Clear() =>
            throw new NotSupportedException(SR.NotSupported_ReadOnlyCollection);
 
        bool IList.Contains(object? value) =>
            value is Match match && ((ICollection<Match>)this).Contains(match);
 
        int IList.IndexOf(object? value) =>
            value is Match other ? ((IList<Match>)this).IndexOf(other) : -1;
 
        void IList.Insert(int index, object? value) =>
            throw new NotSupportedException(SR.NotSupported_ReadOnlyCollection);
 
        bool IList.IsFixedSize => true;
 
        void IList.Remove(object? value) =>
            throw new NotSupportedException(SR.NotSupported_ReadOnlyCollection);
 
        void IList.RemoveAt(int index) =>
            throw new NotSupportedException(SR.NotSupported_ReadOnlyCollection);
 
        object? IList.this[int index]
        {
            get => this[index];
            set => throw new NotSupportedException(SR.NotSupported_ReadOnlyCollection);
        }
 
        private sealed class Enumerator : IEnumerator<Match>
        {
            private readonly MatchCollection _collection;
            private int _index;
 
            internal Enumerator(MatchCollection collection)
            {
                Debug.Assert(collection != null, "collection cannot be null.");
 
                _collection = collection;
                _index = -1;
            }
 
            public bool MoveNext()
            {
                if (_index == -2)
                {
                    return false;
                }
 
                _index++;
                Match? match = _collection.GetMatch(_index);
 
                if (match is null)
                {
                    _index = -2;
                    return false;
                }
 
                return true;
            }
 
            public Match Current
            {
                get
                {
                    if (_index < 0)
                    {
                        throw new InvalidOperationException(SR.EnumNotStarted);
                    }
 
                    return _collection.GetMatch(_index)!;
                }
            }
 
            object IEnumerator.Current => Current;
 
            void IEnumerator.Reset() => _index = -1;
 
            void IDisposable.Dispose() { }
        }
    }
}