File: System\Xaml\Parser\XamlText.cs
Web Access
Project: src\src\Microsoft.DotNet.Wpf\src\System.Xaml\System.Xaml.csproj (System.Xaml)
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
 
#nullable disable
 
using System.Text;
using System.Xaml;
 
namespace MS.Internal.Xaml.Parser
{
    [DebuggerDisplay("{Text}")]
    internal class XamlText
    {
        private const Char SPACE = ' ';
        private const Char NEWLINE = '\n';
        private const Char RETURN = '\r';
        private const Char TAB = '\t';
        private const Char OPENCURLIE = '{';
        private const Char CLOSECURLIE = '}';
        private const String ME_ESCAPE = "{}";
        private const string RETURN_STRING = "\r";
 
        private StringBuilder _sb;
        private readonly bool _isSpacePreserve;   // should space be preserved or collapsed?
        private bool _isWhiteSpaceOnly;           // is the whole thing whitespace.
 
        public XamlText(bool spacePreserve)
        {
            // The majority of TEXT found in a XamlParse results in a single string call to Paste().
            // A possible perf improvement might be to hold the first string and
            // put off allocating the StringBuilder until a second string is Paste()ed.
 
            _sb = new StringBuilder();
            _isSpacePreserve = spacePreserve;
            _isWhiteSpaceOnly = true;
        }
 
        public bool IsEmpty
        {
            get { return _sb.Length==0; }
        }
 
        public string Text
        {
            get
            {
                return _sb.ToString();
            }
        }
 
        public string AttributeText
        {
            get
            {
                String text = Text;
                if (text.StartsWith(ME_ESCAPE, false, TypeConverterHelper.InvariantEnglishUS))
                {
                    return text.Remove(0, ME_ESCAPE.Length);
                }
                return text;
            }
        }
 
        public bool IsSpacePreserved
        {
            get { return _isSpacePreserve; }
        }
 
        public bool IsWhiteSpaceOnly
        {
            get { return _isWhiteSpaceOnly; }
        }
 
        public void Paste(string text, bool trimLeadingWhitespace, bool convertCRLFtoLF=true)
        {
            bool newTextIsWhitespace = IsWhitespace(text);
 
            if (_isSpacePreserve)
            {
                if (convertCRLFtoLF)
                {
                    // (GetFixedDocumentSequence raises Exception "UnicodeString property does not contain
                    // enough characters to correspond to the contents of Indices property.")
                    //
                    // Convert CRLF into just LF.  Including attribute text values.
                    // Attribute text is internal set to "preserve" to prevent (other) processing.
                    // We processing attribute values in this way because 3.x did it.
                    // Normally new lines are converted to SPACE by the XML reader.
                    //
                    // Note that the code actually just removes CR.  Thus it affects
                    // attribute values that contain CR without LF, which can
                    // arise by explicit user declaration:  <Element Prop="a&#13;b" />
                    // This is the root cause of the GetFixedDocumentSequence bug.  I'm leaving it as-is, in
                    // case there are compat issues, but if the intent was to convert
                    // CRLF to LF, it should really say   Replace("\r\n", "\n")
                    //
                    // To fix the GetFixedDocumentSequence bug, we don't do any substitutions if the
                    // property is Glyphs.UnicodeString, which should never be changed
                    // without changing the corresponding Glyphs.Indices property.
                    // See XamlScanner.EnqueueAnotherAttribute for the fixed call.
                    text = text.Replace(RETURN_STRING, "");
                }
                _sb.Append(text);
            }
            else if (newTextIsWhitespace)
            {
                if (IsEmpty && !trimLeadingWhitespace)
                {
                    _sb.Append(SPACE);
                }
            }
            else
            {
                bool textHadLeadingWhitespace = IsWhitespaceChar(text[0]);
                bool textHadTrailingWhitespace = IsWhitespaceChar(text[text.Length - 1]);
                bool existingTextHasTrailingWhitespace = false;
                string trimmed = CollapseWhitespace(text);
 
                // When Text is split by a Comment (or other non-Element/PropElement) we will paste
                // two pieces of Text together.
                // This ensures that the spacing between them is correct.
                if (_sb.Length > 0)
                {
                    // If the existing text is WS then just strike it!
                    if (_isWhiteSpaceOnly)
                    {
                        _sb = new StringBuilder();
                    }
                    else
                    {
                        // Notice if it ends in WS
                        if (IsWhitespaceChar(_sb[_sb.Length-1]))
                        {
                            existingTextHasTrailingWhitespace = true;
                        }
                    }
                }
 
                // If the new text item had leading whitespace
                // AND we were not asked to trim it off
                // AND there isn't existing text that ends in whitespace.
                // THEN put a space before the trimmed Text.
                if (textHadLeadingWhitespace && !trimLeadingWhitespace && !existingTextHasTrailingWhitespace)
                {
                    _sb.Append(SPACE);
                }
                _sb.Append(trimmed);
 
                // Always leave trailing WS, if it was present.
                // Trimming trailing WS can only be figured out from higher level context.
                //
                if (textHadTrailingWhitespace)
                {
                    _sb.Append(SPACE);
                }
            }
            _isWhiteSpaceOnly = _isWhiteSpaceOnly && newTextIsWhitespace;
        }
 
        public bool LooksLikeAMarkupExtension
        {
            get
            {
                int length = _sb.Length;
                if (length > 0 && _sb[0] == OPENCURLIE)
                {
                    if (length > 1 && _sb[1] == CLOSECURLIE)
                        return false;
                    return true;
                }
                return false;
            }
        }
 
        // ===========================================================
 
        static bool IsWhitespace(string text)
        {
            for (int i=0; i<text.Length; i++)
            {
                if (!IsWhitespaceChar(text[i]))
                    return false;
            }
            return true;
        }
 
        static bool IsWhitespaceChar(Char ch)
        {
            return (ch == SPACE || ch == TAB || ch == NEWLINE || ch == RETURN);
        }
 
        // This removes all leading and trailing whitespace, and it
        // collapses any internal runs of whitespace to a single space.
        //
        static string CollapseWhitespace(string text)
        {
            StringBuilder sb = new StringBuilder(text.Length);
            int firstIdx=0;
            while (firstIdx < text.Length)
            {
                // If it is not whitespace copy it to the destination.
                char ch = text[firstIdx];
                if (!IsWhitespaceChar(ch))
                {
                    sb.Append(ch);
                    ++firstIdx;
                    continue;
                }
 
                // Skip any runs of whitespace.
                int advancingIdx = firstIdx;
                while (++advancingIdx < text.Length)
                {
                    if (!IsWhitespaceChar(text[advancingIdx]))
                        break;
                }
 
                // If the spacing is in the middle.  (not at the ends)
                if (firstIdx != 0 && advancingIdx != text.Length)
                {
                    bool skipSpace = false;
                    // check some easy things before digging deep
                    // for Asian chars.  (only process a single newline)
                    if (ch == NEWLINE)
                    {
                        if ((advancingIdx - firstIdx == 2) && text[firstIdx - 1] >= 0x1100)
                        {
                            if (HasSurroundingEastAsianChars(firstIdx, advancingIdx, text))
                            {
                                skipSpace = true;
                            }
                        }
                    }
                    if (!skipSpace)
                    {
                        sb.Append(SPACE);
                    }
                }
                firstIdx = advancingIdx;
            }
            return sb.ToString();
        }
 
        public static string TrimLeadingWhitespace(string source)
        {
            string result = source.TrimStart(SPACE, TAB, NEWLINE);
            return result;
        }
 
        public static string TrimTrailingWhitespace(string source)
        {
            string result = source.TrimEnd(SPACE, TAB, NEWLINE);
            return result;
        }
 
        // ---- Asian newline suppression code ------
 
        // this code was modeled from the 3.5 XamlReaderHelper.cs
        static bool HasSurroundingEastAsianChars(int start, int end, string text)
        {
            Debug.Assert(start > 0);
            Debug.Assert(end < text.Length);
 
            int beforeValue;
            if (start - 2 < 0)
            {
                beforeValue = text[0];
            }
            else
            {
                beforeValue = ComputeUnicodeScalarValue(start - 1, start - 2, text);
            }
 
            if (IsEastAsianCodePoint(beforeValue))
            {
                int afterValue;
                if (end + 1 >= text.Length)
                {
                    afterValue = text[end];
                }
                else
                {
                    afterValue = ComputeUnicodeScalarValue(end, end, text);
                }
 
                if (IsEastAsianCodePoint(afterValue))
                {
                    return true;
                }
            }
            return false;
        }
 
        static int ComputeUnicodeScalarValue(int takeOneIdx, int takeTwoIdx, string text)
        {
            int unicodeScalarValue=0;
            bool isSurrogate = false;
 
            Char highChar = text[takeTwoIdx];
            if (Char.IsHighSurrogate(highChar))
            {
                Char lowChar = text[takeTwoIdx + 1];
                if (Char.IsLowSurrogate(lowChar))
                {
                    isSurrogate = true;
                    unicodeScalarValue = (((highChar & 0x03FF) << 10) | (lowChar & 0x3FF)) + 0x1000;
                }
            }
 
            if (!isSurrogate)
            {
                unicodeScalarValue = text[takeOneIdx];
            }
            return unicodeScalarValue;
        }
 
        struct CodePointRange
        {
            public readonly int Min;
            public readonly int Max;
            public CodePointRange(int min, int max)
            {
                Min = min;
                Max = max;
            }
        }
 
        static CodePointRange[] EastAsianCodePointRanges = new CodePointRange[]
        {
            new CodePointRange ( 0x1100, 0x11FF ),     // Hangul
            new CodePointRange ( 0x2E80, 0x2FD5 ),     // CJK and KangXi Radicals
            new CodePointRange ( 0x2FF0, 0x2FFB ),     // Ideographic Description
#if NICE_COMMENTED_RANGES
            new CodePointRange ( 0x3040, 0x309F ),     // Hiragana
            new CodePointRange ( 0x30A0, 0x30FF ),     // Katakana
            new CodePointRange ( 0x3100, 0x312F ),     // Bopomofo
            new CodePointRange ( 0x3130, 0x318F ),     // Hangul Compatibility Jamo
            new CodePointRange ( 0x3190, 0x319F ),     // Kanbun
 
            new CodePointRange ( 0x31F0, 0x31FF ),     // Katakana Phonetic Extensions
            new CodePointRange ( 0x3400, 0x4DFF ),     // CJK Unified Ideographs Extension A
            new CodePointRange ( 0x4E00, 0x9FFF ),     // CJK Unified Ideographs
            new CodePointRange ( 0xA000, 0xA4CF ),     // Yi
#else
            new CodePointRange ( 0x3040, 0x319F ),
            new CodePointRange ( 0x31F0, 0xA4CF ),
#endif
            new CodePointRange ( 0xAC00, 0xD7A3 ),     // Hangul Syllables
            new CodePointRange ( 0xF900, 0xFAFF ),     // CJK Compatibility
            new CodePointRange ( 0xFF00, 0xFFEF ),     // Halfwidth and Fullwidth forms
                // surrogates
            new CodePointRange ( 0x20000, 0x2a6d6 ),   // CJK Unified Ext. B
            new CodePointRange ( 0x2F800, 0x2FA1D ),   // CJK Compatibility Supplement
        };
 
        // taken directly from WPF 3.5
        static bool IsEastAsianCodePoint(int unicodeScalarValue)
        {
            for (int i = 0; i < EastAsianCodePointRanges.Length; i++)
            {
                if (unicodeScalarValue >= EastAsianCodePointRanges[i].Min)
                {
                    if (unicodeScalarValue <= EastAsianCodePointRanges[i].Max)
                        return true;
                }
            }
            return false;
        }
    }
}