File: System\IO\Packaging\ContentType.cs
Web Access
Project: src\src\libraries\System.IO.Packaging\src\System.IO.Packaging.csproj (System.IO.Packaging)
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
 
//-----------------------------------------------------------------------------
//
// Description:
//  ContentType class parses and validates the content-type string.
//  It provides functionality to compare the type/subtype values.
//
// Details:
// Grammar which this class follows -
//
// Content-type grammar MUST conform to media-type grammar as per
// RFC 2616 (ABNF notation):
//
// media-type     = type "/" subtype *( ";" parameter )
// type           = token
// subtype        = token
// parameter      = attribute "=" value
// attribute      = token
// value          = token | quoted-string
// quoted-string  = ( <"> *(qdtext | quoted-pair ) <"> )
// qdtext         = <any TEXT except <">>
// quoted-pair    = "\" CHAR
// token          = 1*<any CHAR except CTLs or separators>
// separators     = "(" | ")" | "<" | ">" | "@"
//                  | "," | ";" | ":" | "\" | <">
//                  | "/" | "[" | "]" | "?" | "="
//                  | "{" | "}" | SP | HT
// TEXT           = <any OCTET except CTLs, but including LWS>
// OCTET          = <any 8-bit sequence of data>
// CHAR           = <any US-ASCII character (octets 0 - 127)>
// CTL            = <any US-ASCII control character(octets 0 - 31)and DEL(127)>
// CR             = <US-ASCII CR, carriage return (13)>
// LF             = <US-ASCII LF, linefeed (10)>
// SP             = <US-ASCII SP, space (32)>
// HT             = <US-ASCII HT, horizontal-tab (9)>
// <">            = <US-ASCII double-quote mark (34)>
// LWS            = [CRLF] 1*( SP | HT )
// CRLF           = CR LF
// Linear white space (LWS) MUST NOT be used between the type and subtype, nor
// between an attribute and its value. Leading and trailing LWS are prohibited.
//
//-----------------------------------------------------------------------------
 
using System;
using System.Collections.Generic;   // For Dictionary<string, string>
using System.Diagnostics;           // For Debug.Assert
using System.Diagnostics.CodeAnalysis;
using System.Text;                  // For StringBuilder
 
namespace System.IO.Packaging
{
    /// <summary>
    /// Content Type class
    /// </summary>
    internal sealed class ContentType
    {
        #region Internal Constructors
 
        /// <summary>
        /// This constructor creates a ContentType object that represents
        /// the content-type string. At construction time we validate the
        /// string as per the grammar specified in RFC 2616.
        /// Note: We allow empty strings as valid input. Empty string should
        /// we used more as an indication of an absent/unknown ContentType.
        /// </summary>
        /// <param name="contentType">content-type</param>
        /// <exception cref="ArgumentNullException">If the contentType parameter is null</exception>
        /// <exception cref="ArgumentException">If the contentType string has leading or
        /// trailing Linear White Spaces(LWS) characters</exception>
        /// <exception cref="ArgumentException">If the contentType string invalid CR-LF characters</exception>
        internal ContentType(string contentType)
        {
            if (contentType is null)
            {
                throw new ArgumentNullException(nameof(contentType));
            }
 
            if (contentType.Length == 0)
            {
                _contentType = string.Empty;
            }
            else
            {
                if (IsLinearWhiteSpaceChar(contentType[0]) || IsLinearWhiteSpaceChar(contentType[contentType.Length - 1]))
                    throw new ArgumentException(SR.ContentTypeCannotHaveLeadingTrailingLWS);
 
                //Carriage return can be expressed as '\r\n' or '\n\r'
                //We need to make sure that a \r is accompanied by \n
                ValidateCarriageReturns(contentType);
 
                //Begin Parsing
                int semiColonIndex = contentType.IndexOf(';');
 
                if (semiColonIndex == -1)
                {
                    // Parse content type similar to - type/subtype
                    ParseTypeAndSubType(contentType.AsSpan());
                }
                else
                {
                    // Parse content type similar to - type/subtype ; param1=value1 ; param2=value2 ; param3="value3"
                    ParseTypeAndSubType(contentType.AsSpan(0, semiColonIndex));
                    ParseParameterAndValue(contentType.AsSpan(semiColonIndex));
                }
            }
        }
 
        #endregion Internal Constructors
 
        #region Internal Properties
 
        /// <summary>
        /// TypeComponent of the Content Type
        /// If the content type is "text/xml". This property will return "text"
        /// </summary>
        internal string TypeComponent
        {
            get
            {
                return _type;
            }
        }
 
        /// <summary>
        /// SubType component
        /// If the content type is "text/xml". This property will return "xml"
        /// </summary>
        internal string SubTypeComponent
        {
            get
            {
                return _subType;
            }
        }
 
        /// <summary>
        /// Enumerator which iterates over the Parameter and Value pairs which are stored
        /// in a dictionary. We hand out just the enumerator in order to make this property
        /// ReadOnly
        /// Consider following Content type -
        /// type/subtype ; param1=value1 ; param2=value2 ; param3="value3"
        /// This will return an enumerator over a dictionary of the parameter/value pairs.
        /// </summary>
        internal Dictionary<string, string>.Enumerator ParameterValuePairs =>
            (_parameterDictionary ??= new Dictionary<string, string>()).GetEnumerator();
        #endregion Internal Properties
 
        #region Internal Methods
 
        /// <summary>
        /// This method does a strong comparison of the content types, as parameters are not allowed.
        /// We only compare the type and subType values in an ASCII case-insensitive manner.
        /// Parameters are not allowed to be present on any of the content type operands.
        /// </summary>
        /// <param name="contentType">Content type to be compared with</param>
        /// <returns></returns>
        internal bool AreTypeAndSubTypeEqual(ContentType contentType)
        {
            return AreTypeAndSubTypeEqual(contentType, false);
        }
 
        /// <summary>
        /// This method does a weak comparison of the content types. We only compare the
        /// type and subType values in an ASCII case-insensitive manner.
        /// Parameter and value pairs are not used for the comparison.
        /// If you wish to compare the parameters too, then you must get the ParameterValuePairs from
        /// both the ContentType objects and compare each parameter entry.
        /// The allowParameterValuePairs parameter is used to indicate whether the
        /// comparison is tolerant to parameters being present or no.
        /// </summary>
        /// <param name="contentType">Content type to be compared with</param>
        /// <param name="allowParameterValuePairs">If true, allows the presence of parameter value pairs.
        /// If false, parameter/value pairs cannot be present in the content type string.
        /// In either case, the parameter value pair is not used for the comparison.</param>
        /// <returns></returns>
        internal bool AreTypeAndSubTypeEqual(ContentType contentType, bool allowParameterValuePairs)
        {
            bool result = false;
 
            if (contentType != null)
            {
                if (!allowParameterValuePairs)
                {
                    //Return false if this content type object has parameters
                    if (_parameterDictionary != null)
                    {
                        if (_parameterDictionary.Count > 0)
                            return false;
                    }
 
                    //Return false if the content type object passed in has parameters
                    Dictionary<string, string>.Enumerator contentTypeEnumerator;
                    contentTypeEnumerator = contentType.ParameterValuePairs;
                    contentTypeEnumerator.MoveNext();
                    if (contentTypeEnumerator.Current.Key != null)
                        return false;
                }
 
                // Perform a case-insensitive comparison on the type/subtype strings.  This is a
                // safe comparison because the _type and _subType strings have been restricted to
                // ASCII characters, digits, and a small set of symbols.  This is not a safe comparison
                // for the broader set of strings that have not been restricted in the same way.
                result = (string.Equals(_type, contentType.TypeComponent, StringComparison.OrdinalIgnoreCase) &&
                          string.Equals(_subType, contentType.SubTypeComponent, StringComparison.OrdinalIgnoreCase));
            }
            return result;
        }
 
        /// <summary>
        /// ToString - outputs a normalized form of the content type string
        /// </summary>
        /// <returns></returns>
        public override string ToString()
        {
            if (_contentType == null)
            {
                Debug.Assert(!string.IsNullOrEmpty(_type) || !string.IsNullOrEmpty(_subType));
 
                StringBuilder stringBuilder = new StringBuilder(_type);
                stringBuilder.Append(PackUriHelper.ForwardSlashChar);
                stringBuilder.Append(_subType);
 
                if (_parameterDictionary != null && _parameterDictionary.Count > 0)
                {
                    foreach (string parameterKey in _parameterDictionary.Keys)
                    {
                        stringBuilder.Append(s_linearWhiteSpaceChars[0]);
                        stringBuilder.Append(';');
                        stringBuilder.Append(s_linearWhiteSpaceChars[0]);
                        stringBuilder.Append(parameterKey);
                        stringBuilder.Append('=');
                        stringBuilder.Append(_parameterDictionary[parameterKey]);
                    }
                }
 
                _contentType = stringBuilder.ToString();
            }
 
            return _contentType;
        }
 
        #endregion Internal Methods
 
        #region Private Methods
 
 
        /// <summary>
        /// This method validates if the content type string has
        /// valid CR-LF characters. Specifically we test if '\r' is
        /// accompanied by a '\n' in the string, else its an error.
        /// </summary>
        /// <param name="contentType"></param>
        private static void ValidateCarriageReturns(string contentType)
        {
            Debug.Assert(!IsLinearWhiteSpaceChar(contentType[0]) && !IsLinearWhiteSpaceChar(contentType[contentType.Length - 1]));
 
            //Prior to calling this method we have already checked that first and last
            //character of the content type are not Linear White Spaces. So its safe to
            //assume that the index will be greater than 0 and less that length-2.
 
            int index = contentType.IndexOf(s_linearWhiteSpaceChars[2]);
 
            while (index != -1)
            {
                if (contentType[index - 1] == s_linearWhiteSpaceChars[1] || contentType[index + 1] == s_linearWhiteSpaceChars[1])
                {
                    index = contentType.IndexOf(s_linearWhiteSpaceChars[2], ++index);
                }
                else
                {
                    throw new ArgumentException(SR.InvalidLinearWhiteSpaceCharacter);
                }
            }
        }
 
        /// <summary>
        /// Parses the type and subType tokens from the string.
        /// Also verifies if the Tokens are valid as per the grammar.
        /// </summary>
        /// <param name="typeAndSubType">substring that has the type and subType of the content type</param>
        /// <exception cref="ArgumentException">If the typeAndSubType parameter does not have the "/" character</exception>
        private void ParseTypeAndSubType(ReadOnlySpan<char> typeAndSubType)
        {
            //okay to trim at this point the end of the string as Linear White Spaces(LWS) chars are allowed here.
            typeAndSubType = typeAndSubType.TrimEnd(s_linearWhiteSpaceChars);
 
            int forwardSlashPos = typeAndSubType.IndexOf('/');
            if (forwardSlashPos < 0 || // no slashes
                typeAndSubType.Slice(forwardSlashPos + 1).IndexOf('/') >= 0) // more than one slash
            {
                throw new ArgumentException(SR.InvalidTypeSubType);
            }
 
            _type = ValidateToken(typeAndSubType.Slice(0, forwardSlashPos).ToString());
            _subType = ValidateToken(typeAndSubType.Slice(forwardSlashPos + 1).ToString());
        }
 
        /// <summary>
        /// Parse the individual parameter=value strings
        /// </summary>
        /// <param name="parameterAndValue">This string has the parameter and value pair of the form
        /// parameter=value</param>
        /// <exception cref="ArgumentException">If the string does not have the required "="</exception>
        private void ParseParameterAndValue(ReadOnlySpan<char> parameterAndValue)
        {
            while (!parameterAndValue.IsEmpty)
            {
                //At this point the first character MUST be a semi-colon
                //First time through this test is serving more as an assert.
                if (parameterAndValue[0] != ';')
                    throw new ArgumentException(SR.ExpectingSemicolon);
 
                //At this point if we have just one semicolon, then its an error.
                //Also, there can be no trailing LWS characters, as we already checked for that
                //in the constructor.
                if (parameterAndValue.Length == 1)
                    throw new ArgumentException(SR.ExpectingParameterValuePairs);
 
                //Removing the leading ; from the string
                parameterAndValue = parameterAndValue.Slice(1);
 
                //okay to trim start as there can be spaces before the beginning
                //of the parameter name.
                parameterAndValue = parameterAndValue.TrimStart(s_linearWhiteSpaceChars);
 
                int equalSignIndex = parameterAndValue.IndexOf('=');
 
                if (equalSignIndex <= 0 || equalSignIndex == (parameterAndValue.Length - 1))
                    throw new ArgumentException(SR.InvalidParameterValuePair);
 
                int parameterStartIndex = equalSignIndex + 1;
 
                //Get length of the parameter value
                int parameterValueLength = GetLengthOfParameterValue(parameterAndValue, parameterStartIndex);
 
                (_parameterDictionary ??= new Dictionary<string, string>()).Add(
                    ValidateToken(parameterAndValue.Slice(0, equalSignIndex).ToString()),
                    ValidateQuotedStringOrToken(parameterAndValue.Slice(parameterStartIndex, parameterValueLength).ToString()));
 
                parameterAndValue = parameterAndValue.Slice(parameterStartIndex + parameterValueLength).TrimStart(s_linearWhiteSpaceChars);
            }
        }
 
        /// <summary>
        /// This method returns the length of the first parameter value in the input string.
        /// </summary>
        /// <param name="s"></param>
        /// <param name="startIndex">Starting index for parsing</param>
        /// <returns></returns>
        private static int GetLengthOfParameterValue(ReadOnlySpan<char> s, int startIndex)
        {
            int length;
 
            //if the parameter value does not start with a '"' then,
            //we expect a valid token. So we look for Linear White Spaces or
            //a ';' as the terminator for the token value.
            if (s[startIndex] != '"')
            {
                int semicolonIndex = s.Slice(startIndex).IndexOf(';');
 
                if (semicolonIndex != -1)
                {
                    int lwsIndex = s.Slice(startIndex).IndexOfAny(s_linearWhiteSpaceChars);
                    length = lwsIndex != -1 && lwsIndex < semicolonIndex ? lwsIndex : semicolonIndex;
                    length += startIndex; // the indexes from IndexOf{Any} are based on slicing from startIndex
                }
                else
                {
                    //If there is no linear white space found we treat the entire remaining string as
                    //parameter value.
                    length = s.Length;
                }
            }
            else
            {
                //if the parameter value starts with a '"' then, we need to look for the
                //pairing '"' that is not preceded by a "\" ["\" is used to escape the '"']
                bool found = false;
                length = startIndex;
 
                while (!found)
                {
                    int startingLength = ++length;
                    length = s.Slice(startingLength).IndexOf('"');
 
                    if (length == -1)
                    {
                        throw new ArgumentException(SR.InvalidParameterValue);
                    }
                    length += startingLength; // IndexOf result is based on slicing from startingLength
 
                    if (s[length - 1] != '\\')
                    {
                        found = true;
                        length++;
                    }
                }
            }
            return length - startIndex;
        }
 
        /// <summary>
        /// Validating the given token
        /// The following checks are being made -
        /// 1. If all the characters in the token are either ASCII letter or digit.
        /// 2. If all the characters in the token are either from the remaining allowed character set.
        /// </summary>
        /// <param name="token">string token</param>
        /// <returns>validated string token</returns>
        /// <exception cref="ArgumentException">If the token is Empty</exception>
        private static string ValidateToken(string token)
        {
            if (string.IsNullOrEmpty(token))
                throw new ArgumentException(SR.InvalidToken_ContentType);
 
            for (int i = 0; i < token.Length; i++)
            {
                if (!IsAsciiLetterOrDigit(token[i]) && !IsAllowedCharacter(token[i]))
                {
                    throw new ArgumentException(SR.InvalidToken_ContentType);
                }
            }
 
            return token;
        }
 
        /// <summary>
        /// Validating if the value of a parameter is either a valid token or a
        /// valid quoted string
        /// </summary>
        /// <param name="parameterValue">parameter value string</param>
        /// <returns>validate parameter value string</returns>
        /// <exception cref="ArgumentException">If the parameter value is empty</exception>
        private static string ValidateQuotedStringOrToken(string parameterValue)
        {
            if (string.IsNullOrEmpty(parameterValue))
                throw new ArgumentException(SR.InvalidParameterValue);
 
            if (parameterValue.Length >= 2 &&
                parameterValue[0] == '"' &&
                parameterValue[parameterValue.Length - 1] == '"')
            {
                ValidateQuotedText(parameterValue.AsSpan(1, parameterValue.Length - 2));
            }
            else
            {
                ValidateToken(parameterValue);
            }
 
            return parameterValue;
        }
 
        /// <summary>
        /// This method validates if the text in the quoted string
        /// </summary>
        /// <param name="quotedText"></param>
        private static void ValidateQuotedText(ReadOnlySpan<char> quotedText)
        {
            //empty is okay
 
            for (int i = 0; i < quotedText.Length; i++)
            {
                if (IsLinearWhiteSpaceChar(quotedText[i]))
                    continue;
 
                if (quotedText[i] <= ' ' || quotedText[i] >= 0xFF)
                    throw new ArgumentException(SR.InvalidParameterValue);
 
                if (quotedText[i] == '"' && (i == 0 || quotedText[i - 1] != '\\'))
                    throw new ArgumentException(SR.InvalidParameterValue);
            }
        }
 
        /// <summary>
        /// Returns true if the input character is an allowed character
        /// Returns false if the input character is not an allowed character
        /// </summary>
        /// <param name="character">input character</param>
        /// <returns></returns>
        private static bool IsAllowedCharacter(char character) =>
            Array.IndexOf(s_allowedCharacters, character) >= 0;
 
        /// <summary>
        /// Returns true if the input character is an ASCII digit or letter
        /// Returns false if the input character is not an ASCII digit or letter
        /// </summary>
        /// <param name="character">input character</param>
        /// <returns></returns>
        private static bool IsAsciiLetterOrDigit(char character) =>
            ((((uint)character - 'A') & ~0x20) < 26) ||
            (((uint)character - '0') < 10);
 
        /// <summary>
        /// Returns true if the input character is one of the Linear White Space characters -
        /// ' ', '\t', '\n', '\r'
        /// Returns false if the input character is none of the above
        /// </summary>
        /// <param name="ch">input character</param>
        /// <returns></returns>
        private static bool IsLinearWhiteSpaceChar(char ch) =>
            ch <= ' ' && Array.IndexOf(s_linearWhiteSpaceChars, ch) != -1;
 
        #endregion Private Methods
 
        #region Private Members
 
        private string? _contentType;
        private string _type = string.Empty;
        private string _subType = string.Empty;
        private Dictionary<string, string>? _parameterDictionary;
 
        //This array is sorted by the ascii value of these characters.
        private static readonly char[] s_allowedCharacters =
        {
            '!' /*33*/, '#'  /*35*/, '$'  /*36*/,
            '%' /*37*/, '&'  /*38*/, '\'' /*39*/,
            '*' /*42*/, '+'  /*43*/, '-'  /*45*/,
            '.' /*46*/, '^'  /*94*/, '_'  /*95*/,
            '`' /*96*/, '|' /*124*/, '~' /*126*/,
        };
 
        //Linear White Space characters
        private static readonly char[] s_linearWhiteSpaceChars =
         { ' ',  // space           - \x20
           '\n', // new line        - \x0A
           '\r', // carriage return - \x0D
           '\t'  // horizontal tab  - \x09
         };
 
        #endregion Private Members
    }
}