HttpParser.cs

// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
 
using System.Buffers;
using System.Diagnostics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using Microsoft.AspNetCore.Http;
using Microsoft.AspNetCore.Server.Kestrel.Core.Internal.Infrastructure;
 
namespace Microsoft.AspNetCore.Server.Kestrel.Core.Internal.Http;
 
using BadHttpRequestException = Microsoft.AspNetCore.Http.BadHttpRequestException;
 
/// <summary>
/// This API supports framework infrastructure and is not intended to be used
/// directly from application code.
/// </summary>
/// <typeparam name="TRequestHandler">This API supports framework infrastructure and is not intended to be used
/// directly from application code.</typeparam>
public class HttpParser<TRequestHandler> : IHttpParser<TRequestHandler> where TRequestHandler : IHttpHeadersHandler, IHttpRequestLineHandler
{
    private readonly bool _showErrorDetails;
    private readonly bool _disableHttp1LineFeedTerminators;
 
    /// <summary>
    /// This API supports framework infrastructure and is not intended to be used
    /// directly from application code.
    /// </summary>
    public HttpParser() : this(showErrorDetails: true)
    {
    }
 
    /// <summary>
    /// This API supports framework infrastructure and is not intended to be used
    /// directly from application code.
    /// </summary>
    public HttpParser(bool showErrorDetails) : this(showErrorDetails, AppContext.TryGetSwitch(KestrelServerOptions.DisableHttp1LineFeedTerminatorsSwitchKey, out var disabled) && disabled)
    {
    }
 
    internal HttpParser(bool showErrorDetails, bool disableHttp1LineFeedTerminators)
    {
        _showErrorDetails = showErrorDetails;
        _disableHttp1LineFeedTerminators = disableHttp1LineFeedTerminators;
    }
 
    // byte types don't have a data type annotation so we pre-cast them; to avoid in-place casts
    private const byte ByteCR = (byte)'\r';
    private const byte ByteLF = (byte)'\n';
    private const byte ByteColon = (byte)':';
    private const byte ByteSpace = (byte)' ';
    private const byte ByteTab = (byte)'\t';
    private const byte ByteQuestionMark = (byte)'?';
    private const byte BytePercentage = (byte)'%';
    private const int MinTlsRequestSize = 1; // We need at least 1 byte to check for a proper TLS request line
    private static ReadOnlySpan<byte> RequestLineDelimiters => [ByteLF, 0];
 
    /// <summary>
    /// This API supports framework infrastructure and is not intended to be used
    /// directly from application code.
    /// </summary>
    public bool ParseRequestLine(TRequestHandler handler, ref SequenceReader<byte> reader)
    {
        // Find the next delimiter.
        if (!reader.TryReadToAny(out ReadOnlySpan<byte> requestLine, RequestLineDelimiters, advancePastDelimiter: false))
        {
            return false;
        }
 
        // Consume the delimiter.
        var foundDelimiter = reader.TryRead(out var next);
        Debug.Assert(foundDelimiter);
        // If null character found, or request line is empty
        if (next == 0 || requestLine.Length == 0)
        {
            // Rewind and re-read to format error message correctly
            reader.Rewind(requestLine.Length + 1);
            var readResult = reader.TryReadExact(requestLine.Length + 1, out var requestLineSequence);
            Debug.Assert(readResult);
            requestLine = requestLineSequence.IsSingleSegment ? requestLineSequence.FirstSpan : requestLineSequence.ToArray();
            RejectRequestLine(requestLine);
        }
 
        // Get Method and set the offset
        var method = requestLine.GetKnownMethod(out var methodEnd);
        if (method == HttpMethod.Custom)
        {
            methodEnd = GetUnknownMethodLength(requestLine);
        }
 
        var versionAndMethod = new HttpVersionAndMethod(method, methodEnd);
 
        // Use a new offset var as methodEnd needs to be on stack
        // as its passed by reference above so can't be in register.
        // Skip space
        var offset = methodEnd + 1;
        if ((uint)offset >= (uint)requestLine.Length)
        {
            // Start of path not found
            RejectRequestLine(requestLine);
        }
 
        var ch = requestLine[offset];
        if (ch == ByteSpace || ch == ByteQuestionMark || ch == BytePercentage)
        {
            // Empty path is illegal, or path starting with percentage
            RejectRequestLine(requestLine);
        }
 
        // Target = Path and Query
        var targetStart = offset;
        var pathEncoded = false;
        // Skip first char (just checked)
        offset++;
 
        // Find end of path and if path is encoded
        var index = requestLine.Slice(offset).IndexOfAny(ByteSpace, ByteQuestionMark, BytePercentage);
        if (index >= 0)
        {
            if (requestLine[offset + index] == BytePercentage)
            {
                pathEncoded = true;
                offset += index;
                // Found an encoded character, now just search for end of path
                index = requestLine.Slice(offset).IndexOfAny(ByteSpace, ByteQuestionMark);
            }
 
            offset += index;
            ch = requestLine[offset];
        }
 
        var path = new TargetOffsetPathLength(targetStart, length: offset - targetStart, pathEncoded);
 
        // Query string
        if (ch == ByteQuestionMark)
        {
            // We have a query string
            for (; (uint)offset < (uint)requestLine.Length; offset++)
            {
                ch = requestLine[offset];
                if (ch == ByteSpace)
                {
                    break;
                }
            }
        }
 
        var queryEnd = offset;
        // Consume space
        offset++;
 
        while ((uint)offset < (uint)requestLine.Length
            && requestLine[offset] == ByteSpace)
        {
            // It's invalid to have multiple spaces between the url resource and version
            // but some clients do it. Skip them.
            offset++;
        }
 
        // Version + CR is 9 bytes which should take us to .Length
        // LF should have been dropped prior to method call
        if ((uint)offset + 9 != (uint)requestLine.Length || requestLine[offset + 8] != ByteCR)
        {
            // LF should have been dropped prior to method call
            // If !_disableHttp1LineFeedTerminators and offset + 8 is .Length,
            // then requestLine is valid since it means LF was the next char
            if (_disableHttp1LineFeedTerminators || (uint)offset + 8 != (uint)requestLine.Length)
            {
                RejectRequestLine(requestLine);
            }
        }
 
        // Version
        var remaining = requestLine.Slice(offset);
        var httpVersion = remaining.GetKnownVersion();
        versionAndMethod.Version = httpVersion;
        if (httpVersion == HttpVersion.Unknown)
        {
            // HTTP version is unsupported.
            RejectUnknownVersion(remaining);
        }
 
        // We need to reinterpret from ReadOnlySpan into Span to allow path mutation for
        // in-place normalization and decoding to transform into a canonical path
        var startLine = MemoryMarshal.CreateSpan(ref MemoryMarshal.GetReference(requestLine), queryEnd);
        handler.OnStartLine(versionAndMethod, path, startLine);
 
        return true;
    }
 
    /// <summary>
    /// This API supports framework infrastructure and is not intended to be used
    /// directly from application code.
    /// </summary>
    public bool ParseHeaders(TRequestHandler handler, ref SequenceReader<byte> reader)
    {
        while (!reader.End)
        {
            // Check if the reader's span contains an LF to skip the reader if possible
            var span = reader.UnreadSpan;
 
            // Fast path, CR/LF at the beginning
            if (span.Length >= 2 && span[0] == ByteCR && span[1] == ByteLF)
            {
                reader.Advance(2);
                handler.OnHeadersComplete(endStream: false);
                return true;
            }
 
            var foundCrlf = false;
 
            var lfOrCrIndex = span.IndexOfAny(ByteCR, ByteLF);
            if (lfOrCrIndex >= 0)
            {
                if (span[lfOrCrIndex] == ByteCR)
                {
                    // We got a CR. Is this a CR/LF sequence?
                    var crIndex = lfOrCrIndex;
                    reader.Advance(crIndex + 1);
 
                    bool hasDataAfterCr;
 
                    if ((uint)span.Length > (uint)(crIndex + 1) && span[crIndex + 1] == ByteLF)
                    {
                        // CR/LF in the same span (common case)
                        span = span.Slice(0, crIndex);
                        foundCrlf = true;
                    }
                    else if ((hasDataAfterCr = reader.TryPeek(out byte lfMaybe)) && lfMaybe == ByteLF)
                    {
                        // CR/LF but split between spans
                        span = span.Slice(0, span.Length - 1);
                        foundCrlf = true;
                    }
                    else
                    {
                        // What's after the CR?
                        if (!hasDataAfterCr)
                        {
                            // No more chars after CR? Don't consume an incomplete header
                            reader.Rewind(crIndex + 1);
                            return false;
                        }
                        else if (crIndex == 0)
                        {
                            // CR followed by something other than LF
                            KestrelBadHttpRequestException.Throw(RequestRejectionReason.InvalidRequestHeadersNoCRLF);
                        }
                        else
                        {
                            // Include the thing after the CR in the rejection exception.
                            var stopIndex = Math.Min(crIndex + 2, span.Length);
                            RejectRequestHeader(span[..stopIndex]);
                        }
                    }
 
                    if (foundCrlf)
                    {
                        // Advance past the LF too
                        reader.Advance(1);
 
                        // Empty line?
                        if (crIndex == 0)
                        {
                            handler.OnHeadersComplete(endStream: false);
                            return true;
                        }
                    }
                }
                else
                {
                    // We got an LF with no CR before it.
                    var lfIndex = lfOrCrIndex;
                    if (_disableHttp1LineFeedTerminators)
                    {
                        RejectRequestHeader(AppendEndOfLine(span[..lfIndex], lineFeedOnly: true));
                    }
 
                    // Consume the header including the LF
                    reader.Advance(lfIndex + 1);
 
                    span = span.Slice(0, lfIndex);
                    if (span.Length == 0)
                    {
                        handler.OnHeadersComplete(endStream: false);
                        return true;
                    }
                }
            }
            else
            {
                // No CR or LF. Is this a multi-span header?
                int length = ParseMultiSpanHeader(handler, ref reader);
                if (length < 0)
                {
                    // Not multi-line, just bad.
                    return false;
                }
 
                // This was a multi-line header. Advance the reader.
                reader.Advance(length);
 
                continue;
            }
 
            // We got to a point where we believe we have a header.
            if (!TryTakeSingleHeader(handler, span))
            {
                // Sequence needs to be CRLF and not contain an inner CR not part of terminator.
                // Not parsable as a valid name:value header pair.
                RejectRequestHeader(AppendEndOfLine(span, lineFeedOnly: !foundCrlf));
            }
        }
 
        return false;
    }
 
    private static byte[] AppendEndOfLine(ReadOnlySpan<byte> span, bool lineFeedOnly)
    {
        var array = new byte[span.Length + (lineFeedOnly ? 1 : 2)];
 
        span.CopyTo(array);
        array[^1] = ByteLF;
 
        if (!lineFeedOnly)
        {
            array[^2] = ByteCR;
        }
 
        return array;
    }
 
    // Parse a header that might cross multiple spans, and return the length of the header
    // or -1 if there was a failure during parsing.
    private int ParseMultiSpanHeader(TRequestHandler handler, ref SequenceReader<byte> reader)
    {
        var currentSlice = reader.UnreadSequence;
 
        SequencePosition position = currentSlice.Start;
 
        // Skip the first segment as the caller already searched it for CR/LF
        var result = currentSlice.TryGet(ref position, out ReadOnlyMemory<byte> memory);
        // there will always be at least 1 segment so this will never return false
        Debug.Assert(result);
 
        if (position.GetObject() == null)
        {
            // Only 1 segment in the reader currently, this is a partial header, wait for more data
            return -1;
        }
 
        var index = -1;
        var headerLength = memory.Length;
        while (currentSlice.TryGet(ref position, out memory))
        {
            index = memory.Span.IndexOfAny(ByteCR, ByteLF);
            if (index >= 0)
            {
                headerLength += index;
                break;
            }
            else if (position.GetObject() == null)
            {
                return -1;
            }
 
            headerLength += memory.Length;
        }
 
        // No CR or LF found in the SequenceReader
        if (index == -1)
        {
            return -1;
        }
 
        // Is the first EOL char the last of the current slice?
        if (headerLength == currentSlice.Length - 1)
        {
            // Check the EOL char
            if (memory.Span[index] == ByteCR)
            {
                // CR without LF, can't read the header
                return -1;
            }
            else
            {
                if (_disableHttp1LineFeedTerminators)
                {
                    // LF only but disabled
 
                    // Advance 1 to include LF in result
                    RejectRequestHeader(currentSlice.Slice(0, headerLength + 1).ToSpan());
                }
            }
        }
 
        ReadOnlySequence<byte> header;
        if (memory.Span[index] == ByteCR)
        {
            // First EOL char is CR, include the char after CR
            // Advance 2 to include CR and LF
            headerLength += 2;
            header = currentSlice.Slice(0, headerLength);
        }
        else if (_disableHttp1LineFeedTerminators)
        {
            // The terminator is an LF and we don't allow it.
            // Advance 1 to include LF in result
            RejectRequestHeader(currentSlice.Slice(0, headerLength + 1).ToSpan());
            return -1;
        }
        else
        {
            // First EOL char is LF. only include this one
            headerLength += 1;
            header = currentSlice.Slice(0, headerLength);
        }
 
        // 'a:b\n' or 'a:b\r\n'
        var minHeaderSpan = _disableHttp1LineFeedTerminators ? 5 : 4;
        if (headerLength < minHeaderSpan)
        {
            RejectRequestHeader(currentSlice.Slice(0, headerLength).ToSpan());
        }
 
        byte[]? array = null;
        Span<byte> headerSpan = headerLength <= 256 ? stackalloc byte[256] : array = ArrayPool<byte>.Shared.Rent(headerLength);
 
        header.CopyTo(headerSpan);
        headerSpan = headerSpan.Slice(0, headerLength);
 
        var terminatorSize = -1;
 
        if (headerSpan[^1] == ByteLF)
        {
            if (headerSpan[^2] == ByteCR)
            {
                terminatorSize = 2;
            }
            else if (!_disableHttp1LineFeedTerminators)
            {
                terminatorSize = 1;
            }
        }
 
        // Last chance to bail if the terminator size is not valid or the header doesn't parse.
        if (terminatorSize == -1 || !TryTakeSingleHeader(handler, headerSpan.Slice(0, headerSpan.Length - terminatorSize)))
        {
            RejectRequestHeader(headerSpan);
        }
 
        if (array is not null)
        {
            ArrayPool<byte>.Shared.Return(array);
        }
 
        return headerLength;
    }
 
    private static bool TryTakeSingleHeader(TRequestHandler handler, ReadOnlySpan<byte> headerLine)
    {
        // We are looking for a colon to terminate the header name.
        // However, the header name cannot contain a space or tab so look for all three
        // and see which is found first.
        var nameEnd = headerLine.IndexOfAny(ByteColon, ByteSpace, ByteTab);
        // If not found length with be -1; casting to uint will turn it to uint.MaxValue
        // which will be larger than any possible headerLine.Length. This also serves to eliminate
        // the bounds check for the next lookup of headerLine[nameEnd]
        if ((uint)nameEnd >= (uint)headerLine.Length)
        {
            // Colon not found.
            return false;
        }
 
        // Early memory read to hide latency
        var expectedColon = headerLine[nameEnd];
        if (nameEnd == 0)
        {
            // Header name is empty.
            return false;
        }
        if (expectedColon != ByteColon)
        {
            // Header name space or tab.
            return false;
        }
 
        // Skip colon to get to the value start.
        var valueStart = nameEnd + 1;
 
        // Generally there will only be one space, so we will check it directly
        if ((uint)valueStart < (uint)headerLine.Length)
        {
            var ch = headerLine[valueStart];
            if (ch == ByteSpace || ch == ByteTab)
            {
                // Ignore first whitespace.
                valueStart++;
 
                // More header chars?
                if ((uint)valueStart < (uint)headerLine.Length)
                {
                    ch = headerLine[valueStart];
                    // Do a fast check; as we now expect non-space, before moving into loop.
                    if (ch <= ByteSpace && (ch == ByteSpace || ch == ByteTab))
                    {
                        valueStart++;
                        // Is more whitespace, so we will loop to find the end. This is the slow path.
                        for (; valueStart < headerLine.Length; valueStart++)
                        {
                            ch = headerLine[valueStart];
                            if (ch != ByteTab && ch != ByteSpace)
                            {
                                // Non-whitespace char found, valueStart is now start of value.
                                break;
                            }
                        }
                    }
                }
            }
        }
 
        var valueEnd = headerLine.Length - 1;
        // Ignore end whitespace. Generally there will no spaces
        // so we will check the first before moving to a loop.
        if (valueEnd > valueStart)
        {
            var ch = headerLine[valueEnd];
            // Do a fast check; as we now expect non-space, before moving into loop.
            if (ch <= ByteSpace && (ch == ByteSpace || ch == ByteTab))
            {
                // Is whitespace so move to loop
                valueEnd--;
                for (; valueEnd > valueStart; valueEnd--)
                {
                    ch = headerLine[valueEnd];
                    if (ch != ByteTab && ch != ByteSpace)
                    {
                        // Non-whitespace char found, valueEnd is now start of value.
                        break;
                    }
                }
            }
        }
 
        // Range end is exclusive, so add 1 to valueEnd
        valueEnd++;
        handler.OnHeader(name: headerLine.Slice(0, nameEnd), value: headerLine[valueStart..valueEnd]);
 
        return true;
    }
 
    [MethodImpl(MethodImplOptions.NoInlining)]
    private int GetUnknownMethodLength(ReadOnlySpan<byte> span)
    {
        var invalidIndex = HttpCharacters.IndexOfInvalidTokenChar(span);
 
        if (invalidIndex <= 0 || span[invalidIndex] != ByteSpace)
        {
            RejectRequestLine(span);
        }
 
        return invalidIndex;
    }
 
    private static bool IsTlsHandshake(ReadOnlySpan<byte> requestLine)
    {
        const byte SslRecordTypeHandshake = (byte)0x16;
 
        // Make sure we can check at least for the existence of a TLS handshake - we check the first byte
        // See https://serializethoughts.com/2014/07/27/dissecting-tls-client-hello-message/
 
        return (requestLine.Length >= MinTlsRequestSize && requestLine[0] == SslRecordTypeHandshake);
    }
 
    [StackTraceHidden]
    private void RejectRequestLine(ReadOnlySpan<byte> requestLine)
    {
        throw GetInvalidRequestException(
            IsTlsHandshake(requestLine) ?
            RequestRejectionReason.TlsOverHttpError :
            RequestRejectionReason.InvalidRequestLine,
            requestLine);
    }
 
    [StackTraceHidden]
    private void RejectRequestHeader(ReadOnlySpan<byte> headerLine)
        => throw GetInvalidRequestException(RequestRejectionReason.InvalidRequestHeader, headerLine);
 
    [StackTraceHidden]
    private void RejectUnknownVersion(ReadOnlySpan<byte> version)
        => throw GetInvalidRequestException(RequestRejectionReason.UnrecognizedHTTPVersion, version[..^1]);
 
    [MethodImpl(MethodImplOptions.NoInlining)]
    private BadHttpRequestException GetInvalidRequestException(RequestRejectionReason reason, ReadOnlySpan<byte> headerLine)
        => KestrelBadHttpRequestException.GetException(
            reason,
            _showErrorDetails
                ? headerLine.GetAsciiStringEscaped(Constants.MaxExceptionDetailSize)
                : string.Empty);
}
File: Internal\Http\HttpParser.cs	Web Access
Project: src\src\Servers\Kestrel\Core\src\Microsoft.AspNetCore.Server.Kestrel.Core.csproj (Microsoft.AspNetCore.Server.Kestrel.Core)