File: FrameworkFork\Microsoft.Xml\Xml\Core\HtmlUtf8RawTextWriter.cs
Web Access
Project: src\src\dotnet-svcutil\lib\src\dotnet-svcutil-lib.csproj (dotnet-svcutil-lib)
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
using System;
using System.IO;
using System.Text;
using Microsoft.Xml;
using Microsoft.Xml.Schema;
//using Microsoft.Xml.Query;
using System.Diagnostics;
using MS.Internal.Xml;
namespace Microsoft.Xml
    using System;
    internal class HtmlUtf8RawTextWriter : XmlUtf8RawTextWriter
        // Fields
        protected ByteStack elementScope;
        protected ElementProperties currentElementProperties;
        private AttributeProperties _currentAttributeProperties;
        private bool _endsWithAmpersand;
        private byte[] _uriEscapingBuffer;
        // settings
        private string _mediaType;
        private bool _doNotEscapeUriAttributes;
        // Static fields
        protected static TernaryTreeReadOnly elementPropertySearch;
        protected static TernaryTreeReadOnly attributePropertySearch;
        // Constants
        private const int StackIncrement = 10;
        // Constructors
        public HtmlUtf8RawTextWriter(Stream stream, XmlWriterSettings settings) : base(stream, settings)
        // XmlRawWriter implementation
        internal override void WriteXmlDeclaration(XmlStandalone standalone)
            // Ignore xml declaration
        internal override void WriteXmlDeclaration(string xmldecl)
            // Ignore xml declaration
        /// Html rules allow public ID without system ID and always output "html"
        public override void WriteDocType(string name, string pubid, string sysid, string subset)
            Debug.Assert(name != null && name.Length > 0);
            RawText("<!DOCTYPE ");
            // Bug 114337: Always output "html" or "HTML" in doc-type, even if "name" is something else
            if (name == "HTML")
            if (pubid != null)
                RawText(" PUBLIC \"");
                if (sysid != null)
                    RawText("\" \"");
                bufBytes[bufPos++] = (byte)'"';
            else if (sysid != null)
                RawText(" SYSTEM \"");
                bufBytes[bufPos++] = (byte)'"';
                bufBytes[bufPos++] = (byte)' ';
            if (subset != null)
                bufBytes[bufPos++] = (byte)'[';
                bufBytes[bufPos++] = (byte)']';
            bufBytes[this.bufPos++] = (byte)'>';
        // For the HTML element, it should call this method with ns and prefix as String.Empty
        public override void WriteStartElement(string prefix, string localName, string ns)
            Debug.Assert(localName != null && localName.Length != 0 && prefix != null && ns != null);
            if (ns.Length == 0)
                Debug.Assert(prefix.Length == 0);
                currentElementProperties = (ElementProperties)elementPropertySearch.FindCaseInsensitiveString(localName);
                base.bufBytes[bufPos++] = (byte)'<';
                base.attrEndPos = bufPos;
                // Since the HAS_NS has no impact to the ElementTextBlock behavior,
                // we don't need to push it into the stack.
                currentElementProperties = ElementProperties.HAS_NS;
                base.WriteStartElement(prefix, localName, ns);
        // Output >. For HTML needs to output META info
        internal override void StartElementContent()
            base.bufBytes[base.bufPos++] = (byte)'>';
            // Detect whether content is output
            this.contentPos = this.bufPos;
            if ((currentElementProperties & ElementProperties.HEAD) != 0)
        // end element with />
        // for HTML(ns.Length == 0)
        //    not an empty tag <h1></h1>
        //    empty tag <basefont>
        internal override void WriteEndElement(string prefix, string localName, string ns)
            if (ns.Length == 0)
                Debug.Assert(prefix.Length == 0);
                if ((currentElementProperties & ElementProperties.EMPTY) == 0)
                    bufBytes[base.bufPos++] = (byte)'<';
                    bufBytes[base.bufPos++] = (byte)'/';
                    bufBytes[base.bufPos++] = (byte)'>';
                //xml content
                base.WriteEndElement(prefix, localName, ns);
            currentElementProperties = (ElementProperties)elementScope.Pop();
        internal override void WriteFullEndElement(string prefix, string localName, string ns)
            if (ns.Length == 0)
                Debug.Assert(prefix.Length == 0);
                if ((currentElementProperties & ElementProperties.EMPTY) == 0)
                    bufBytes[base.bufPos++] = (byte)'<';
                    bufBytes[base.bufPos++] = (byte)'/';
                    bufBytes[base.bufPos++] = (byte)'>';
                //xml content
                base.WriteFullEndElement(prefix, localName, ns);
            currentElementProperties = (ElementProperties)elementScope.Pop();
        // 1. How the outputBooleanAttribute(fBOOL) and outputHtmlUriText(fURI) being set?
        // When SA is called.
        //             BOOL_PARENT   URI_PARENT   Others
        //  fURI
        //  URI att       false         true       false
        //  fBOOL
        //  BOOL att      true          false      false
        //  How they change the attribute output behaviors?
        //  1)       fURI=true             fURI=false
        //  SA         a="                      a="
        //  AT       HtmlURIText             HtmlText
        //  EA          "                       "
        //  2)      fBOOL=true             fBOOL=false
        //  SA         a                       a="
        //  AT      HtmlText                output nothing
        //  EA     output nothing               "
        // When they get reset?
        //  At the end of attribute.
        // 2. How the outputXmlTextElementScoped(fENs) and outputXmlTextattributeScoped(fANs) are set?
        //  fANs is in the scope of the fENs.
        //          SE(localName)    SE(ns, pre, localName)  SA(localName)  SA(ns, pre, localName)
        //  fENs      false(default)      true(action)
        //  fANs      false(default)     false(default)      false(default)      true(action)
        // how they get reset?
        //          EE(localName)  EE(ns, pre, localName) EENC(ns, pre, localName) EA(localName)  EA(ns, pre, localName)
        //  fENs                      false(action)
        //  fANs                                                                                        false(action)
        // How they change the TextOutput?
        //         fENs | fANs              Else
        //  AT      XmlText                  HtmlText
        // 3. Flags for processing &{ split situations
        // When the flag is set?
        //  AT     src[lastchar]='&' flag&{ = true;
        // when it get result?
        //  AT method.
        // How it changes the behaviors?
        //         flag&{=true
        //  AT     if (src[0] == '{') {
        //             output "&{"
        //         }
        //         else {
        //             output &amp;
        //         }
        //  EA     output amp;
        //  SA  if (flagBOOL == false) { output =";}
        //  AT  if (flagBOOL) { return};
        //      if (flagNS) {XmlText;} {
        //      }
        //      else if (flagURI) {
        //          HtmlURIText;
        //      }
        //      else {
        //          HtmlText;
        //      }
        //  AT  if (flagNS) {XmlText;} {
        //      }
        //      else if (flagURI) {
        //          HtmlURIText;
        //      }
        //      else if (!flagBOOL) {
        //          HtmlText; //flag&{ handling
        //      }
        //  EA if (flagBOOL == false) { output "
        //     }
        //     else if (flag&{) {
        //          output amp;
        //     }
        public override void WriteStartAttribute(string prefix, string localName, string ns)
            Debug.Assert(localName != null && localName.Length != 0 && prefix != null && ns != null);
            if (ns.Length == 0)
                Debug.Assert(prefix.Length == 0);
                if (base.attrEndPos == bufPos)
                    base.bufBytes[bufPos++] = (byte)' ';
                if ((currentElementProperties & (ElementProperties.BOOL_PARENT | ElementProperties.URI_PARENT | ElementProperties.NAME_PARENT)) != 0)
                    _currentAttributeProperties = (AttributeProperties)attributePropertySearch.FindCaseInsensitiveString(localName) &
                    if ((_currentAttributeProperties & AttributeProperties.BOOLEAN) != 0)
                        base.inAttributeValue = true;
                    _currentAttributeProperties = AttributeProperties.DEFAULT;
                base.bufBytes[bufPos++] = (byte)'=';
                base.bufBytes[bufPos++] = (byte)'"';
                base.WriteStartAttribute(prefix, localName, ns);
                _currentAttributeProperties = AttributeProperties.DEFAULT;
            base.inAttributeValue = true;
        // Output the amp; at end of EndAttribute
        public override void WriteEndAttribute()
            if ((_currentAttributeProperties & AttributeProperties.BOOLEAN) != 0)
                base.attrEndPos = bufPos;
                if (_endsWithAmpersand)
                    _endsWithAmpersand = false;
                base.bufBytes[bufPos++] = (byte)'"';
            base.inAttributeValue = false;
            base.attrEndPos = bufPos;
        // HTML PI's use ">" to terminate rather than "?>".
        public override void WriteProcessingInstruction(string target, string text)
            Debug.Assert(target != null && target.Length != 0 && text != null);
            bufBytes[base.bufPos++] = (byte)'<';
            bufBytes[base.bufPos++] = (byte)'?';
            bufBytes[base.bufPos++] = (byte)' ';
            base.WriteCommentOrPi(text, '?');
            base.bufBytes[base.bufPos++] = (byte)'>';
            if (base.bufPos > base.bufLen)
        // Serialize either attribute or element text using HTML rules.
        public override unsafe void WriteString(string text)
            Debug.Assert(text != null);
            fixed (char* pSrc = text)
                char* pSrcEnd = pSrc + text.Length;
                if (base.inAttributeValue)
                    WriteHtmlAttributeTextBlock(pSrc, pSrcEnd);
                    WriteHtmlElementTextBlock(pSrc, pSrcEnd);
        public override void WriteEntityRef(string name)
            throw new InvalidOperationException(ResXml.Xml_InvalidOperation);
        public override void WriteCharEntity(char ch)
            throw new InvalidOperationException(ResXml.Xml_InvalidOperation);
        public override void WriteSurrogateCharEntity(char lowChar, char highChar)
            throw new InvalidOperationException(ResXml.Xml_InvalidOperation);
        public override unsafe void WriteChars(char[] buffer, int index, int count)
            Debug.Assert(buffer != null);
            Debug.Assert(index >= 0);
            Debug.Assert(count >= 0 && index + count <= buffer.Length);
            fixed (char* pSrcBegin = &buffer[index])
                if (inAttributeValue)
                    WriteAttributeTextBlock(pSrcBegin, pSrcBegin + count);
                    WriteElementTextBlock(pSrcBegin, pSrcBegin + count);
        // Private methods
        private void Init(XmlWriterSettings settings)
            Debug.Assert((int)ElementProperties.URI_PARENT == (int)AttributeProperties.URI);
            Debug.Assert((int)ElementProperties.BOOL_PARENT == (int)AttributeProperties.BOOLEAN);
            Debug.Assert((int)ElementProperties.NAME_PARENT == (int)AttributeProperties.NAME);
            if (elementPropertySearch == null)
                //elementPropertySearch should be init last for the mutli thread safe situation.
                attributePropertySearch = new TernaryTreeReadOnly(HtmlTernaryTree.htmlAttributes);
                elementPropertySearch = new TernaryTreeReadOnly(HtmlTernaryTree.htmlElements);
            elementScope = new ByteStack(StackIncrement);
            _uriEscapingBuffer = new byte[5];
            currentElementProperties = ElementProperties.DEFAULT;
            _mediaType = settings.MediaType;
            _doNotEscapeUriAttributes = settings.DoNotEscapeUriAttributes;
        protected void WriteMetaElement()
            base.RawText("<META http-equiv=\"Content-Type\"");
            if (_mediaType == null)
                _mediaType = "text/html";
            base.RawText(" content=\"");
            base.RawText("; charset=");
        // Justify the stack usage:
        // Nested elements has following possible position combinations
        // 1. <E1>Content1<E2>Content2</E2></E1>
        // 2. <E1><E2>Content2</E2>Content1</E1>
        // 3. <E1>Content<E2>Cotent2</E2>Content1</E1>
        // In the situation 2 and 3, the stored currentElementProrperties will be E2's,
        // only the top of the stack is the real E1 element properties.
        protected unsafe void WriteHtmlElementTextBlock(char* pSrc, char* pSrcEnd)
            if ((currentElementProperties & ElementProperties.NO_ENTITIES) != 0)
                base.RawText(pSrc, pSrcEnd);
                base.WriteElementTextBlock(pSrc, pSrcEnd);
        protected unsafe void WriteHtmlAttributeTextBlock(char* pSrc, char* pSrcEnd)
            if ((_currentAttributeProperties & (AttributeProperties.BOOLEAN | AttributeProperties.URI | AttributeProperties.NAME)) != 0)
                if ((_currentAttributeProperties & AttributeProperties.BOOLEAN) != 0)
                    //if output boolean attribute, ignore this call.
                if ((_currentAttributeProperties & (AttributeProperties.URI | AttributeProperties.NAME)) != 0 && !_doNotEscapeUriAttributes)
                    WriteUriAttributeText(pSrc, pSrcEnd);
                    WriteHtmlAttributeText(pSrc, pSrcEnd);
            else if ((currentElementProperties & ElementProperties.HAS_NS) != 0)
                base.WriteAttributeTextBlock(pSrc, pSrcEnd);
                WriteHtmlAttributeText(pSrc, pSrcEnd);
        // &{ split cases
        // 1). HtmlAttributeText("a&");
        //     HtmlAttributeText("{b}");
        // 2). HtmlAttributeText("a&");
        //     EndAttribute();
        // 3).split with Flush by the user
        //     HtmlAttributeText("a&");
        //     FlushBuffer();
        //     HtmlAttributeText("{b}");
        // Solutions:
        // case 1)hold the &amp; output as &
        //      if the next income character is {, output {
        //      else output amp;
        private unsafe void WriteHtmlAttributeText(char* pSrc, char* pSrcEnd)
            if (_endsWithAmpersand)
                if (pSrcEnd - pSrc > 0 && pSrc[0] != '{')
                _endsWithAmpersand = false;
            fixed (byte* pDstBegin = bufBytes)
                byte* pDst = pDstBegin + this.bufPos;
                char ch = (char)0;
                for (; ; )
                    byte* pDstEnd = pDst + (pSrcEnd - pSrc);
                    if (pDstEnd > pDstBegin + bufLen)
                        pDstEnd = pDstBegin + bufLen;
                    while (pDst < pDstEnd && (((xmlCharType.charProperties[(ch = *pSrc)] & XmlCharType.fAttrValue) != 0) && ch <= 0x7F))
                        *pDst++ = (byte)ch;
                    Debug.Assert(pSrc <= pSrcEnd);
                    // end of value
                    if (pSrc >= pSrcEnd)
                    // end of buffer
                    if (pDst >= pDstEnd)
                        bufPos = (int)(pDst - pDstBegin);
                        pDst = pDstBegin + 1;
                    // some character needs to be escaped
                    switch (ch)
                        case '&':
                            if (pSrc + 1 == pSrcEnd)
                                _endsWithAmpersand = true;
                            else if (pSrc[1] != '{')
                                pDst = XmlUtf8RawTextWriter.AmpEntity(pDst);
                            *pDst++ = (byte)ch;
                        case '"':
                            pDst = QuoteEntity(pDst);
                        case '<':
                        case '>':
                        case '\'':
                        case (char)0x9:
                            *pDst++ = (byte)ch;
                        case (char)0xD:
                            // do not normalize new lines in attributes - just escape them
                            pDst = CarriageReturnEntity(pDst);
                        case (char)0xA:
                            // do not normalize new lines in attributes - just escape them
                            pDst = LineFeedEntity(pDst);
                            EncodeChar(ref pSrc, pSrcEnd, ref pDst);
                bufPos = (int)(pDst - pDstBegin);
        private unsafe void WriteUriAttributeText(char* pSrc, char* pSrcEnd)
            if (_endsWithAmpersand)
                if (pSrcEnd - pSrc > 0 && pSrc[0] != '{')
                _endsWithAmpersand = false;
            fixed (byte* pDstBegin = bufBytes)
                byte* pDst = pDstBegin + this.bufPos;
                char ch = (char)0;
                for (; ; )
                    byte* pDstEnd = pDst + (pSrcEnd - pSrc);
                    if (pDstEnd > pDstBegin + bufLen)
                        pDstEnd = pDstBegin + bufLen;
                    while (pDst < pDstEnd && (((xmlCharType.charProperties[(ch = *pSrc)] & XmlCharType.fAttrValue) != 0) && ch < 0x80))
                        *pDst++ = (byte)ch;
                    Debug.Assert(pSrc <= pSrcEnd);
                    // end of value
                    if (pSrc >= pSrcEnd)
                    // end of buffer
                    if (pDst >= pDstEnd)
                        bufPos = (int)(pDst - pDstBegin);
                        pDst = pDstBegin + 1;
                    // some character needs to be escaped
                    switch (ch)
                        case '&':
                            if (pSrc + 1 == pSrcEnd)
                                _endsWithAmpersand = true;
                            else if (pSrc[1] != '{')
                                pDst = XmlUtf8RawTextWriter.AmpEntity(pDst);
                            *pDst++ = (byte)ch;
                        case '"':
                            pDst = QuoteEntity(pDst);
                        case '<':
                        case '>':
                        case '\'':
                        case (char)0x9:
                            *pDst++ = (byte)ch;
                        case (char)0xD:
                            // do not normalize new lines in attributes - just escape them
                            pDst = CarriageReturnEntity(pDst);
                        case (char)0xA:
                            // do not normalize new lines in attributes - just escape them
                            pDst = LineFeedEntity(pDst);
                            const string hexDigits = "0123456789ABCDEF";
                            fixed (byte* pUriEscapingBuffer = _uriEscapingBuffer)
                                byte* pByte = pUriEscapingBuffer;
                                byte* pEnd = pByte;
                                XmlUtf8RawTextWriter.CharToUTF8(ref pSrc, pSrcEnd, ref pEnd);
                                while (pByte < pEnd)
                                    *pDst++ = (byte)'%';
                                    *pDst++ = (byte)hexDigits[*pByte >> 4];
                                    *pDst++ = (byte)hexDigits[*pByte & 0xF];
                bufPos = (int)(pDst - pDstBegin);
        // For handling &{ in Html text field. If & is not followed by {, it still needs to be escaped.
        private void OutputRestAmps()
            base.bufBytes[bufPos++] = (byte)'a';
            base.bufBytes[bufPos++] = (byte)'m';
            base.bufBytes[bufPos++] = (byte)'p';
            base.bufBytes[bufPos++] = (byte)';';
    // Indentation HtmlWriter only indent <BLOCK><BLOCK> situations
    // Here are all the cases:
    //       ELEMENT1     actions          ELEMENT2          actions                                 SC              EE
    // 1).    SE SC   store SE blockPro       SE           a). check ELEMENT1 blockPro                  <A>           </A>
    //        EE     if SE, EE are blocks                  b). true: check ELEMENT2 blockPro                <B>            <B>
    //                                                     c). detect ELEMENT is SE, SC
    //                                                     d). increase the indexlevel
    // 2).    SE SC,  Store EE blockPro       EE            a). check stored blockPro                    <A></A>            </A>
    //         EE    if SE, EE are blocks                  b). true:  indexLevel same                                  </B>
    // This is an alternative way to make the output looks better
    // Indentation HtmlWriter only indent <BLOCK><BLOCK> situations
    // Here are all the cases:
    //       ELEMENT1     actions           ELEMENT2          actions                                 Samples
    // 1).    SE SC   store SE blockPro       SE            a). check ELEMENT1 blockPro                  <A>(blockPos)
    //                                                     b). true: check ELEMENT2 blockPro                <B>
    //                                                     c). detect ELEMENT is SE, SC
    //                                                     d). increase the indentLevel
    // 2).     EE     Store EE blockPro       SE            a). check stored blockPro                    </A>
    //                                                     b). true:  indentLevel same                   <B>
    //                                                     c). output block2
    // 3).     EE      same as above          EE            a). check stored blockPro                          </A>
    //                                                     b). true:  --indentLevel                        </B>
    //                                                     c). output block2
    // 4).    SE SC    same as above          EE            a). check stored blockPro                      <A></A>
    //                                                     b). true:  indentLevel no change
    internal class HtmlUtf8RawTextWriterIndent : HtmlUtf8RawTextWriter
        // Fields
        private int _indentLevel;
        // for detecting SE SC sitution
        private int _endBlockPos;
        // settings
        private string _indentChars;
        private bool _newLineOnAttributes;
        // Constructors
        public HtmlUtf8RawTextWriterIndent(Stream stream, XmlWriterSettings settings) : base(stream, settings)
        // XmlRawWriter overrides
        /// <summary>
        /// Serialize the document type declaration.
        /// </summary>
        public override void WriteDocType(string name, string pubid, string sysid, string subset)
            base.WriteDocType(name, pubid, sysid, subset);
            // Allow indentation after DocTypeDecl
            _endBlockPos = base.bufPos;
        public override void WriteStartElement(string prefix, string localName, string ns)
            Debug.Assert(localName != null && localName.Length != 0 && prefix != null && ns != null);
            if (ns.Length == 0)
                Debug.Assert(prefix.Length == 0);
                base.currentElementProperties = (ElementProperties)elementPropertySearch.FindCaseInsensitiveString(localName);
                if (_endBlockPos == base.bufPos && (base.currentElementProperties & ElementProperties.BLOCK_WS) != 0)
                base.bufBytes[bufPos++] = (byte)'<';
                base.currentElementProperties = ElementProperties.HAS_NS | ElementProperties.BLOCK_WS;
                if (_endBlockPos == base.bufPos)
                base.bufBytes[base.bufPos++] = (byte)'<';
                if (prefix.Length != 0)
                    base.bufBytes[base.bufPos++] = (byte)':';
            base.attrEndPos = bufPos;
        internal override void StartElementContent()
            base.bufBytes[base.bufPos++] = (byte)'>';
            // Detect whether content is output
            base.contentPos = base.bufPos;
            if ((currentElementProperties & ElementProperties.HEAD) != 0)
                _endBlockPos = base.bufPos;
            else if ((base.currentElementProperties & ElementProperties.BLOCK_WS) != 0)
                // store the element block position
                _endBlockPos = base.bufPos;
        internal override void WriteEndElement(string prefix, string localName, string ns)
            bool isBlockWs;
            Debug.Assert(localName != null && localName.Length != 0 && prefix != null && ns != null);
            // If this element has block whitespace properties,
            isBlockWs = (base.currentElementProperties & ElementProperties.BLOCK_WS) != 0;
            if (isBlockWs)
                // And if the last node to be output had block whitespace properties,
                // And if content was output within this element,
                if (_endBlockPos == base.bufPos && base.contentPos != base.bufPos)
                    // Then indent
            base.WriteEndElement(prefix, localName, ns);
            // Reset contentPos in case of empty elements
            base.contentPos = 0;
            // Mark end of element in buffer for element's with block whitespace properties
            if (isBlockWs)
                _endBlockPos = base.bufPos;
        public override void WriteStartAttribute(string prefix, string localName, string ns)
            if (_newLineOnAttributes)
            base.WriteStartAttribute(prefix, localName, ns);
        protected override void FlushBuffer()
            // Make sure the buffer will reset the block position
            _endBlockPos = (_endBlockPos == base.bufPos) ? 1 : 0;
        // Private methods
        private void Init(XmlWriterSettings settings)
            _indentLevel = 0;
            _indentChars = settings.IndentChars;
            _newLineOnAttributes = settings.NewLineOnAttributes;
        private void WriteIndent()
            // <block><inline>  -- suppress ws betw <block> and <inline>
            // <block><block>   -- don't suppress ws betw <block> and <block>
            // <block>text      -- suppress ws betw <block> and text (handled by wcharText method)
            // <block><?PI?>    -- suppress ws betw <block> and PI
            // <block><!-- -->  -- suppress ws betw <block> and comment
            // <inline><block>  -- suppress ws betw <inline> and <block>
            // <inline><inline> -- suppress ws betw <inline> and <inline>
            // <inline>text     -- suppress ws betw <inline> and text (handled by wcharText method)
            // <inline><?PI?>   -- suppress ws betw <inline> and PI
            // <inline><!-- --> -- suppress ws betw <inline> and comment
            for (int i = _indentLevel; i > 0; i--)