HttpListenerRequestUriBuilder.cs

// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Collections.Generic;
using System.Diagnostics;
using System.Globalization;
using System.Text;

namespace System.Net
{
    // We don't use the cooked URL because http.sys unescapes all percent-encoded values. However,
    // we also can't just use the raw Uri, since http.sys supports not only Utf-8, but also ANSI/DBCS and
    // Unicode code points. System.Uri only supports Utf-8.
    // The purpose of this class is to convert all ANSI, DBCS, and Unicode code points into percent encoded
    // Utf-8 characters.
    internal sealed class HttpListenerRequestUriBuilder
    {
        private static readonly UTF8Encoding s_utf8Encoding = new UTF8Encoding(false, true);
        private static readonly Encoding s_ansiEncoding = Encoding.GetEncoding(0, new EncoderExceptionFallback(), new DecoderExceptionFallback());

        private readonly string _rawUri;
        private readonly string _cookedUriScheme;
        private readonly string _cookedUriHost;
        private readonly string _cookedUriPath;
        private readonly string _cookedUriQuery;

        // This field is used to build the final request Uri string from the Uri parts passed to the ctor.
        private StringBuilder? _requestUriString;

        // The raw path is parsed by looping through all characters from left to right. 'rawOctets'
        // is used to store consecutive percent encoded octets as actual byte values: e.g. for path /pa%C3%84th%2F/
        // rawOctets will be set to { 0xC3, 0x84 } when we reach character 't' and it will be { 0x2F } when
        // we reach the final '/'. I.e. after a sequence of percent encoded octets ends, we use rawOctets as
        // input to the encoding and percent encode the resulting string into UTF-8 octets.
        //
        // When parsing ANSI (Latin 1) encoded path '/pa%C4th/', %C4 will be added to rawOctets and when
        // we reach 't', the content of rawOctets { 0xC4 } will be fed into the ANSI encoding. The resulting
        // string '\u00C4' will be percent encoded into UTF-8 octets and appended to requestUriString. The final
        // path will be '/pa%C3%84th/', where '%C3%84' is the UTF-8 percent encoded character.
        private List<byte>? _rawOctets;
        private string? _rawPath;

        // Holds the final request Uri.
        private Uri? _requestUri;

        private HttpListenerRequestUriBuilder(string rawUri, string cookedUriScheme, string cookedUriHost,
            string cookedUriPath, string cookedUriQuery)
        {
            Debug.Assert(!string.IsNullOrEmpty(rawUri), "Empty raw URL.");
            Debug.Assert(!string.IsNullOrEmpty(cookedUriScheme), "Empty cooked URL scheme.");
            Debug.Assert(!string.IsNullOrEmpty(cookedUriHost), "Empty cooked URL host.");
            Debug.Assert(!string.IsNullOrEmpty(cookedUriPath), "Empty cooked URL path.");

            _rawUri = rawUri;
            _cookedUriScheme = cookedUriScheme;
            _cookedUriHost = cookedUriHost;
            _cookedUriPath = AddSlashToAsteriskOnlyPath(cookedUriPath);
            _cookedUriQuery = cookedUriQuery ?? string.Empty;
        }

        public static Uri GetRequestUri(string rawUri, string cookedUriScheme, string cookedUriHost,
            string cookedUriPath, string cookedUriQuery)
        {
            HttpListenerRequestUriBuilder builder = new HttpListenerRequestUriBuilder(rawUri,
                cookedUriScheme, cookedUriHost, cookedUriPath, cookedUriQuery);

            return builder.Build();
        }

        private Uri Build()
        {
            BuildRequestUriUsingRawPath();

            if (_requestUri == null)
            {
                BuildRequestUriUsingCookedPath();
            }

            return _requestUri!;
        }

        private void BuildRequestUriUsingCookedPath()
        {
            bool isValid = Uri.TryCreate(_cookedUriScheme + Uri.SchemeDelimiter + _cookedUriHost + _cookedUriPath +
                _cookedUriQuery, UriKind.Absolute, out _requestUri);

            // Creating a Uri from the cooked Uri should really always work: If not, we log at least.
            if (!isValid)
            {
                if (NetEventSource.Log.IsEnabled())
                    NetEventSource.Error(this, SR.Format(SR.net_log_listener_cant_create_uri, _cookedUriScheme, _cookedUriHost, _cookedUriPath, _cookedUriQuery));
            }
        }

        private void BuildRequestUriUsingRawPath()
        {
            bool isValid;

            // Initialize 'rawPath' only if really needed; i.e. if we build the request Uri from the raw Uri.
            _rawPath = GetPath(_rawUri);

            // Try to check the raw path using first the primary encoding (according to http.sys settings);
            // if it fails try the secondary encoding.
            ParsingResult result = BuildRequestUriUsingRawPath(GetEncoding(EncodingType.Primary));
            if (result == ParsingResult.EncodingError)
            {
                Encoding secondaryEncoding = GetEncoding(EncodingType.Secondary);
                result = BuildRequestUriUsingRawPath(secondaryEncoding);
            }
            isValid = (result == ParsingResult.Success) ? true : false;

            // Log that we weren't able to create a Uri from the raw string.
            if (!isValid)
            {
                if (NetEventSource.Log.IsEnabled())
                    NetEventSource.Error(this, SR.Format(SR.net_log_listener_cant_create_uri, _cookedUriScheme, _cookedUriHost, _rawPath, _cookedUriQuery));
            }
        }

        private static Encoding GetEncoding(EncodingType type)
        {
            Debug.Assert((type == EncodingType.Primary) || (type == EncodingType.Secondary), $"Unknown 'EncodingType' value: {type}");

            if (type == EncodingType.Secondary)
            {
                return s_ansiEncoding;
            }
            else
            {
                return s_utf8Encoding;
            }
        }

        private ParsingResult BuildRequestUriUsingRawPath(Encoding encoding)
        {
            Debug.Assert(encoding != null, "'encoding' must be assigned.");
            Debug.Assert(!string.IsNullOrEmpty(_rawPath), "'rawPath' must have at least one character.");

            _rawOctets = new List<byte>();
            _requestUriString = new StringBuilder();
            _requestUriString.Append(_cookedUriScheme);
            _requestUriString.Append(Uri.SchemeDelimiter);
            _requestUriString.Append(_cookedUriHost);

            ParsingResult result = ParseRawPath(encoding);
            if (result == ParsingResult.Success)
            {
                _requestUriString.Append(_cookedUriQuery);

                Debug.Assert(_rawOctets.Count == 0,
                    "Still raw octets left. They must be added to the result path.");

                if (!Uri.TryCreate(_requestUriString.ToString(), UriKind.Absolute, out _requestUri))
                {
                    // If we can't create a Uri from the string, this is an invalid string and it doesn't make
                    // sense to try another encoding.
                    result = ParsingResult.InvalidString;
                }
            }

            if (result != ParsingResult.Success)
            {
                if (NetEventSource.Log.IsEnabled())
                    NetEventSource.Error(this, SR.Format(SR.net_log_listener_cant_convert_raw_path, _rawPath, encoding.EncodingName));
            }

            return result;
        }

        private ParsingResult ParseRawPath(Encoding encoding)
        {
            Debug.Assert(encoding != null, "'encoding' must be assigned.");

            int index = 0;
            char current;
            Debug.Assert(_rawPath != null);
            while (index < _rawPath.Length)
            {
                current = _rawPath[index];
                if (current == '%')
                {
                    if (index + 2 >= _rawPath.Length)
                    {
                        // Not enough data for a percent encoded byte.
                        return ParsingResult.InvalidString;
                    }

                    index++;
                    current = _rawPath[index];
                    if (current == 'u' || current == 'U')
                    {
                        if (index + 4 >= _rawPath.Length)
                        {
                            // Not enough data for "%uXXXX".
                            return ParsingResult.InvalidString;
                        }

                        // Decode the content of rawOctets into percent encoded UTF-8 characters and append them
                        // to requestUriString.
                        if (!EmptyDecodeAndAppendRawOctetsList(encoding))
                        {
                            return ParsingResult.EncodingError;
                        }
                        if (!AppendUnicodeCodePointValuePercentEncoded(_rawPath.Substring(index + 1, 4)))
                        {
                            return ParsingResult.InvalidString;
                        }
                        index += 5;
                    }
                    else
                    {
                        // We found '%', but not followed by 'u', i.e. we have a percent encoded octed: %XX
                        if (!AddPercentEncodedOctetToRawOctetsList(_rawPath.Substring(index, 2)))
                        {
                            return ParsingResult.InvalidString;
                        }
                        index += 2;
                    }
                }
                else
                {
                    // We found a non-'%' character: decode the content of rawOctets into percent encoded
                    // UTF-8 characters and append it to the result.
                    if (!EmptyDecodeAndAppendRawOctetsList(encoding))
                    {
                        return ParsingResult.EncodingError;
                    }
                    // Append the current character to the result.
                    Debug.Assert(_requestUriString != null);
                    _requestUriString!.Append(current);
                    index++;
                }
            }

            // if the raw path ends with a sequence of percent encoded octets, make sure those get added to the
            // result (requestUriString).
            if (!EmptyDecodeAndAppendRawOctetsList(encoding))
            {
                return ParsingResult.EncodingError;
            }

            return ParsingResult.Success;
        }

        private bool AppendUnicodeCodePointValuePercentEncoded(string codePoint)
        {
            // http.sys only supports %uXXXX (4 hex-digits), even though unicode code points could have up to
            // 6 hex digits. Therefore we parse always 4 characters after %u and convert them to an int.
            int codePointValue;
            if (!int.TryParse(codePoint, NumberStyles.HexNumber, CultureInfo.InvariantCulture, out codePointValue))
            {
                if (NetEventSource.Log.IsEnabled())
                    NetEventSource.Error(this, SR.Format(SR.net_log_listener_cant_convert_percent_value, codePoint));
                return false;
            }

            string? unicodeString = null;
            try
            {
                unicodeString = char.ConvertFromUtf32(codePointValue);
                AppendOctetsPercentEncoded(_requestUriString!, s_utf8Encoding.GetBytes(unicodeString));

                return true;
            }
            catch (ArgumentOutOfRangeException)
            {
                if (NetEventSource.Log.IsEnabled())
                    NetEventSource.Error(this, SR.Format(SR.net_log_listener_cant_convert_percent_value, codePoint));
            }
            catch (EncoderFallbackException e)
            {
                // If utf8Encoding.GetBytes() fails
                if (NetEventSource.Log.IsEnabled()) NetEventSource.Error(this, SR.Format(SR.net_log_listener_cant_convert_to_utf8, unicodeString, e.Message));
            }

            return false;
        }

        private bool AddPercentEncodedOctetToRawOctetsList(string escapedCharacter)
        {
            byte encodedValue;
            if (!byte.TryParse(escapedCharacter, NumberStyles.HexNumber, CultureInfo.InvariantCulture, out encodedValue))
            {
                if (NetEventSource.Log.IsEnabled()) NetEventSource.Error(this, SR.Format(SR.net_log_listener_cant_convert_percent_value, escapedCharacter));
                return false;
            }

            Debug.Assert(_rawOctets != null);
            _rawOctets!.Add(encodedValue);

            return true;
        }

        private bool EmptyDecodeAndAppendRawOctetsList(Encoding encoding)
        {
            if (_rawOctets!.Count == 0)
            {
                return true;
            }

            string? decodedString = null;
            try
            {
                // If the encoding can get a string out of the byte array, this is a valid string in the
                // 'encoding' encoding.
                decodedString = encoding.GetString(_rawOctets.ToArray());

                if (encoding == s_utf8Encoding)
                {
                    AppendOctetsPercentEncoded(_requestUriString!, _rawOctets.ToArray());
                }
                else
                {
                    AppendOctetsPercentEncoded(_requestUriString!, s_utf8Encoding.GetBytes(decodedString));
                }

                _rawOctets.Clear();

                return true;
            }
            catch (DecoderFallbackException e)
            {
                if (NetEventSource.Log.IsEnabled()) NetEventSource.Error(this, SR.Format(SR.net_log_listener_cant_convert_bytes, GetOctetsAsString(_rawOctets), e.Message));
            }
            catch (EncoderFallbackException e)
            {
                // If utf8Encoding.GetBytes() fails
                if (NetEventSource.Log.IsEnabled()) NetEventSource.Error(this, SR.Format(SR.net_log_listener_cant_convert_to_utf8, decodedString, e.Message));
            }

            return false;
        }

        private static void AppendOctetsPercentEncoded(StringBuilder target, IEnumerable<byte> octets)
        {
            foreach (byte octet in octets)
            {
                target.Append($"%{octet:X2}");
            }
        }

        private static string GetOctetsAsString(List<byte> octets)
        {
            StringBuilder octetString = new StringBuilder(octets.Count * 3);

            bool first = true;
            foreach (byte octet in octets)
            {
                if (first)
                {
                    first = false;
                }
                else
                {
                    octetString.Append(' ');
                }
                octetString.Append($"{octet:X2}");
            }

            return octetString.ToString();
        }

        private static string GetPath(string uriString)
        {
            Debug.Assert(uriString != null, "uriString must not be null");
            Debug.Assert(uriString.Length > 0, "uriString must not be empty");

            int pathStartIndex = 0;

            // Perf. improvement: nearly all strings are relative Uris. So just look if the
            // string starts with '/'. If so, we have a relative Uri and the path starts at position 0.
            // (http.sys already trimmed leading whitespaces)
            if (uriString[0] != '/')
            {
                // We can't check against cookedUriScheme, since http.sys allows for request http://myserver/ to
                // use a request line 'GET https://myserver/' (note http vs. https). Therefore check if the
                // Uri starts with either http:// or https://.
                int authorityStartIndex = 0;
                if (uriString.StartsWith("http://", StringComparison.OrdinalIgnoreCase))
                {
                    authorityStartIndex = 7;
                }
                else if (uriString.StartsWith("https://", StringComparison.OrdinalIgnoreCase))
                {
                    authorityStartIndex = 8;
                }

                if (authorityStartIndex > 0)
                {
                    // we have an absolute Uri. Find out where the authority ends and the path begins.
                    // Note that Uris like "http://server?query=value/1/2" are invalid according to RFC2616
                    // and http.sys behavior: If the Uri contains a query, there must be at least one '/'
                    // between the authority and the '?' character: It's safe to just look for the first
                    // '/' after the authority to determine the beginning of the path.
                    pathStartIndex = uriString.IndexOf('/', authorityStartIndex);
                    if (pathStartIndex == -1)
                    {
                        // e.g. for request lines like: 'GET http://myserver' (no final '/')
                        pathStartIndex = uriString.Length;
                    }
                }
                else
                {
                    // RFC2616: Request-URI = "*" | absoluteURI | abs_path | authority
                    // 'authority' can only be used with CONNECT which is never received by HttpListener.
                    // I.e. if we don't have an absolute path (must start with '/') and we don't have
                    // an absolute Uri (must start with http:// or https://), then 'uriString' must be '*'.
                    Debug.Assert((uriString.Length == 1) && (uriString[0] == '*'), "Unknown request Uri string format",
                        "Request Uri string is not an absolute Uri, absolute path, or '*': {0}", uriString);

                    // Should we ever get here, be consistent with 2.0/3.5 behavior: just add an initial
                    // slash to the string and treat it as a path:
                    uriString = "/" + uriString;
                }
            }

            // Find end of path: The path is terminated by
            // - the first '?' character
            // - the first '#' character: This is never the case here, since http.sys won't accept
            //   Uris containing fragments. Also, RFC2616 doesn't allow fragments in request Uris.
            // - end of Uri string
            int queryIndex = uriString.IndexOf('?', pathStartIndex);
            if (queryIndex == -1)
            {
                queryIndex = uriString.Length;
            }

            // will always return a != null string.
            return AddSlashToAsteriskOnlyPath(uriString.Substring(pathStartIndex, queryIndex - pathStartIndex));
        }

        private static string AddSlashToAsteriskOnlyPath(string path)
        {
            Debug.Assert(path != null, "'path' must not be null");

            // If a request like "OPTIONS * HTTP/1.1" is sent to the listener, then the request Uri
            // should be "http[s]://server[:port]/*" to be compatible with pre-4.0 behavior.
            if ((path.Length == 1) && (path[0] == '*'))
            {
                return "/*";
            }

            return path;
        }

        private enum ParsingResult
        {
            Success,
            InvalidString,
            EncodingError
        }

        private enum EncodingType
        {
            Primary,
            Secondary
        }
    }
}
File: System\Net\HttpListenerRequestUriBuilder.cs	Web Access
Project: src\runtime\src\libraries\System.Net.HttpListener\src\System.Net.HttpListener.csproj (System.Net.HttpListener)