File: LegacyFeed\V2FeedParser.cs
Web Access
Project: src\src\nuget-client\src\NuGet.Core\NuGet.Protocol\NuGet.Protocol.csproj (NuGet.Protocol)
// Copyright (c) .NET Foundation. All rights reserved.
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.

#nullable disable

using System;
using System.Collections.Generic;
using System.Globalization;
using System.IO;
using System.Linq;
using System.Net;
using System.Net.Http;
using System.Net.Http.Headers;
using System.Threading;
using System.Threading.Tasks;
using System.Xml;
using System.Xml.Linq;
using NuGet.Common;
using NuGet.Packaging.Core;
using NuGet.Protocol.Core.Types;
using NuGet.Versioning;

namespace NuGet.Protocol
{
    /// <summary>
    /// A light weight XML parser for NuGet V2 Feeds
    /// </summary>
    public sealed class V2FeedParser : IV2FeedParser
    {
        private const string W3Atom = "http://www.w3.org/2005/Atom";
        private const string MetadataNS = "http://schemas.microsoft.com/ado/2007/08/dataservices/metadata";
        private const string DataServicesNS = "http://schemas.microsoft.com/ado/2007/08/dataservices";

        // XNames used in the feed
        private static readonly XName _xnameEntry = XName.Get("entry", W3Atom);
        private static readonly XName _xnameTitle = XName.Get("title", W3Atom);
        private static readonly XName _xnameContent = XName.Get("content", W3Atom);
        private static readonly XName _xnameLink = XName.Get("link", W3Atom);
        private static readonly XName _xnameProperties = XName.Get("properties", MetadataNS);
        private static readonly XName _xnameId = XName.Get("Id", DataServicesNS);
        private static readonly XName _xnameVersion = XName.Get("Version", DataServicesNS);
        private static readonly XName _xnameSummary = XName.Get("summary", W3Atom);
        private static readonly XName _xnameDescription = XName.Get("Description", DataServicesNS);
        private static readonly XName _xnameIconUrl = XName.Get("IconUrl", DataServicesNS);
        private static readonly XName _xnameLicenseUrl = XName.Get("LicenseUrl", DataServicesNS);
        private static readonly XName _xnameProjectUrl = XName.Get("ProjectUrl", DataServicesNS);
        private static readonly XName _xnameTags = XName.Get("Tags", DataServicesNS);
        private static readonly XName _xnameGalleryDetailsUrl = XName.Get("GalleryDetailsUrl", DataServicesNS);
        private static readonly XName _xnameReportAbuseUrl = XName.Get("ReportAbuseUrl", DataServicesNS);
        private static readonly XName _xnameDependencies = XName.Get("Dependencies", DataServicesNS);
        private static readonly XName _xnameRequireLicenseAcceptance = XName.Get("RequireLicenseAcceptance", DataServicesNS);
        private static readonly XName _xnameDownloadCount = XName.Get("DownloadCount", DataServicesNS);
        private static readonly XName _xnameCreated = XName.Get("Created", DataServicesNS);
        private static readonly XName _xnameLastEdited = XName.Get("LastEdited", DataServicesNS);
        private static readonly XName _xnamePublished = XName.Get("Published", DataServicesNS);
        private static readonly XName _xnameName = XName.Get("name", W3Atom);
        private static readonly XName _xnameAuthor = XName.Get("author", W3Atom);
        private static readonly XName _xnamePackageHash = XName.Get("PackageHash", DataServicesNS);
        private static readonly XName _xnamePackageHashAlgorithm = XName.Get("PackageHashAlgorithm", DataServicesNS);
        private static readonly XName _xnameMinClientVersion = XName.Get("MinClientVersion", DataServicesNS);

        private readonly HttpSource _httpSource;
        private readonly string _baseAddress;
        private readonly V2FeedQueryBuilder _queryBuilder;

        /// <summary>
        /// Creates a V2 parser
        /// </summary>
        /// <param name="httpSource">HttpSource and message handler containing auth/proxy support</param>
        /// <param name="baseAddress">base address for all services from this OData service</param>
        public V2FeedParser(HttpSource httpSource, string baseAddress)
            : this(httpSource, baseAddress, baseAddress)
        {
        }

        /// <summary>
        /// Creates a V2 parser
        /// </summary>
        /// <param name="httpSource">HttpSource and message handler containing auth/proxy support</param>
        /// <param name="baseAddress">base address for all services from this OData service</param>
        /// <param name="source">PackageSource useful for reporting meaningful errors that relate back to the configuration</param>
        public V2FeedParser(HttpSource httpSource, string baseAddress, string source)
        {
            if (httpSource == null)
            {
                throw new ArgumentNullException(nameof(httpSource));
            }

            if (baseAddress == null)
            {
                throw new ArgumentNullException(nameof(baseAddress));
            }

            if (source == null)
            {
                throw new ArgumentNullException(nameof(source));
            }

            _httpSource = httpSource;
            _baseAddress = baseAddress.Trim('/');
            _queryBuilder = new V2FeedQueryBuilder();
            Source = source;
        }

        public string Source { get; private set; }

        /// <summary>
        /// Get an exact package
        /// </summary>
        public async Task<V2FeedPackageInfo> GetPackage(
            PackageIdentity package,
            SourceCacheContext sourceCacheContext,
            ILogger log,
            CancellationToken token)
        {
            if (log == null)
            {
                throw new ArgumentNullException(nameof(log));
            }

            var uri = _queryBuilder.BuildGetPackageUri(package);

            // Try to find the package directly
            // Set max count to -1, get all packages
            var packages = await QueryV2FeedAsync(
                uri,
                package.Id,
                max: -1,
                ignoreNotFounds: true,
                sourceCacheContext: sourceCacheContext,
                log: log,
                token: token);

            // If not found use FindPackagesById
            if (packages.Items.Count < 1)
            {
                var allPackages = await FindPackagesByIdAsync(package.Id, sourceCacheContext, log, token);

                return allPackages
                    .FirstOrDefault(p => p.Version == package.Version);
            }

            return packages.Items.FirstOrDefault();
        }

        /// <summary>
        /// Retrieves all packages with the given Id from a V2 feed.
        /// </summary>
        public async Task<IReadOnlyList<V2FeedPackageInfo>> FindPackagesByIdAsync(
            string id,
            bool includeUnlisted,
            bool includePrerelease,
            SourceCacheContext sourceCacheContext,
            ILogger log,
            CancellationToken token)
        {
            if (string.IsNullOrEmpty(id))
            {
                throw new ArgumentException(Strings.Argument_Cannot_Be_Null_Or_Empty, nameof(id));
            }

            if (log == null)
            {
                throw new ArgumentNullException(nameof(log));
            }

            var uri = _queryBuilder.BuildFindPackagesByIdUri(id);

            // Set max count to -1, get all packages
            var packages = await QueryV2FeedAsync(
                uri,
                id,
                max: -1,
                ignoreNotFounds: false,
                sourceCacheContext: sourceCacheContext,
                log: log,
                token: token);

            var filtered = packages
                .Items
                .Where(p => (includeUnlisted || p.IsListed) && (includePrerelease || !p.Version.IsPrerelease));

            return filtered.OrderByDescending(p => p.Version).Distinct().ToList();
        }

        /// <summary>
        /// Retrieves all packages with the given Id from a V2 feed.
        /// </summary>
        public Task<IReadOnlyList<V2FeedPackageInfo>> FindPackagesByIdAsync(string id, SourceCacheContext sourceCacheContext, ILogger log, CancellationToken token)
        {
            return FindPackagesByIdAsync(id, includeUnlisted: true, includePrerelease: true, sourceCacheContext: sourceCacheContext, log: log, token: token);
        }

        public async Task<V2FeedPage> GetPackagesPageAsync(
            string searchTerm,
            SearchFilter filters,
            int skip,
            int take,
            ILogger log,
            CancellationToken token)
        {
            var uri = _queryBuilder.BuildGetPackagesUri(
                searchTerm,
                filters,
                skip,
                take);

            var page = await QueryV2FeedAsync(
                uri,
                id: null,
                max: take, // Only get the first page.
                ignoreNotFounds: false,
                sourceCacheContext: null,
                log: log,
                token: token);

            return page;
        }

        public async Task<V2FeedPage> GetSearchPageAsync(
            string searchTerm,
            SearchFilter filters,
            int skip,
            int take,
            ILogger log,
            CancellationToken token)
        {
            var uri = _queryBuilder.BuildSearchUri(
                searchTerm,
                filters,
                skip: skip,
                take: take);

            var page = await QueryV2FeedAsync(
                uri,
                id: null,
                max: take, // Only get the first page.
                ignoreNotFounds: false,
                sourceCacheContext: null,
                log: log,
                token: token);

            return page;
        }

        public async Task<IReadOnlyList<V2FeedPackageInfo>> Search(
            string searchTerm,
            SearchFilter filters,
            int skip,
            int take,
            ILogger log,
            CancellationToken token)
        {
            var uri = _queryBuilder.BuildSearchUri(searchTerm, filters, skip, take);

            var page = await QueryV2FeedAsync(
                uri,
                id: null,
                max: take,
                ignoreNotFounds: false,
                sourceCacheContext: null,
                log: log,
                token: token);

            return page.Items;
        }

        public async Task<DownloadResourceResult> DownloadFromUrl(
            PackageIdentity package,
            Uri downloadUri,
            PackageDownloadContext downloadContext,
            string globalPackagesFolder,
            ILogger log,
            CancellationToken token)
        {
            return await GetDownloadResultUtility.GetDownloadResultAsync(
                _httpSource,
                package,
                downloadUri,
                downloadContext,
                globalPackagesFolder,
                log,
                token);
        }

        public async Task<DownloadResourceResult> DownloadFromIdentity(
            PackageIdentity package,
            PackageDownloadContext downloadContext,
            string globalPackagesFolder,
            SourceCacheContext sourceCacheContext,
            ILogger log,
            CancellationToken token)
        {
            var packageInfo = await GetPackage(package, sourceCacheContext, log, token);

            if (packageInfo == null)
            {
                return new DownloadResourceResult(DownloadResourceResultStatus.NotFound);
            }

            return await GetDownloadResultUtility.GetDownloadResultAsync(
                _httpSource,
                package,
                new Uri(packageInfo.DownloadUrl),
                downloadContext,
                globalPackagesFolder,
                log,
                token);
        }

        /// <summary>
        /// Finds all entries on the page and parses them
        /// </summary>
        private IEnumerable<V2FeedPackageInfo> ParsePage(XDocument doc, string id, MetadataReferenceCache metadataCache)
        {
            if (doc.Root.Name == _xnameEntry)
            {
                return new List<V2FeedPackageInfo> { ParsePackage(id, doc.Root, metadataCache) };
            }
            else
            {
                return doc.Root.Elements(_xnameEntry)
                    .Select(x => ParsePackage(id, x, metadataCache));
            }
        }

        /// <summary>
        /// Parse an entry into a V2FeedPackageInfo
        /// </summary>
        private V2FeedPackageInfo ParsePackage(string id, XElement element, MetadataReferenceCache metadataCache)
        {
            var properties = element.Element(_xnameProperties);
            var idElement = properties.Element(_xnameId);
            var titleElement = element.Element(_xnameTitle);

            // If 'Id' element exist, use its value as accurate package Id
            // Otherwise, use the value of 'title' if it exist
            // Use the given Id as final fallback if all elements above don't exist
            var identityId = metadataCache.GetString(idElement?.Value ?? titleElement?.Value ?? id);
            var versionString = properties.Element(_xnameVersion).Value;
            var version = metadataCache.GetVersion(metadataCache.GetString(versionString));
            var downloadUrl = metadataCache.GetString(element.Element(_xnameContent).Attribute("src").Value);

            var title = metadataCache.GetString(titleElement?.Value);
            var summary = metadataCache.GetString(GetString(element, _xnameSummary));
            var description = metadataCache.GetString(GetString(properties, _xnameDescription));
            var iconUrl = metadataCache.GetString(GetString(properties, _xnameIconUrl));
            var licenseUrl = metadataCache.GetString(GetString(properties, _xnameLicenseUrl));
            var projectUrl = metadataCache.GetString(GetString(properties, _xnameProjectUrl));
            var galleryDetailsUrl = metadataCache.GetString(GetString(properties, _xnameGalleryDetailsUrl));
            var reportAbuseUrl = metadataCache.GetString(GetString(properties, _xnameReportAbuseUrl));
            var tags = metadataCache.GetString(GetString(properties, _xnameTags));
            var dependencies = metadataCache.GetString(GetString(properties, _xnameDependencies));

            var downloadCount = metadataCache.GetString(GetString(properties, _xnameDownloadCount));
            var requireLicenseAcceptance = StringComparer.OrdinalIgnoreCase.Equals(bool.TrueString, GetString(properties, _xnameRequireLicenseAcceptance));

            var packageHash = metadataCache.GetString(GetString(properties, _xnamePackageHash));
            var packageHashAlgorithm = metadataCache.GetString(GetString(properties, _xnamePackageHashAlgorithm));

            NuGetVersion minClientVersion = null;

            var minClientVersionString = GetString(properties, _xnameMinClientVersion);
            if (!string.IsNullOrEmpty(minClientVersionString))
            {
                if (NuGetVersion.TryParse(minClientVersionString, out minClientVersion))
                {
                    minClientVersion = metadataCache.GetVersion(minClientVersionString);
                }
            }

            var created = GetDate(properties, _xnameCreated);
            var lastEdited = GetDate(properties, _xnameLastEdited);
            var published = GetDate(properties, _xnamePublished);

            IEnumerable<string> owners = null;
            IEnumerable<string> authors = null;

            var authorNode = element.Element(_xnameAuthor);
            if (authorNode != null)
            {
                authors = authorNode.Elements(_xnameName).Select(e => metadataCache.GetString(e.Value));
            }

            return new V2FeedPackageInfo(new PackageIdentity(identityId, version), title, summary, description, authors,
                owners, iconUrl, licenseUrl, projectUrl, reportAbuseUrl, galleryDetailsUrl, tags, created, lastEdited,
                published, dependencies, requireLicenseAcceptance, downloadUrl, downloadCount, packageHash,
                packageHashAlgorithm, minClientVersion);
        }

        /// <summary>
        /// Retrieve an XML <see cref="string"/> value safely
        /// </summary>
        private static string GetString(XElement parent, XName childName)
        {
            string value = null;

            if (parent != null)
            {
                var child = parent.Element(childName);

                if (child != null)
                {
                    value = child.Value;
                }
            }

            return value;
        }

        /// <summary>
        /// Retrieve an XML <see cref="DateTimeOffset"/> value safely
        /// </summary>
        private static DateTimeOffset? GetDate(XElement parent, XName childName)
        {
            var dateString = GetString(parent, childName);

            DateTimeOffset date;
            if (DateTimeOffset.TryParse(dateString, out date))
            {
                return date;
            }

            return null;
        }

        public async Task<V2FeedPage> QueryV2FeedAsync(
            string relativeUri,
            string id,
            int max,
            bool ignoreNotFounds,
            SourceCacheContext sourceCacheContext,
            ILogger log,
            CancellationToken token)
        {
            var metadataCache = new MetadataReferenceCache();
            var results = new List<V2FeedPackageInfo>();
            var uris = new HashSet<string>(StringComparer.OrdinalIgnoreCase);

            var uri = string.Format(CultureInfo.InvariantCulture, "{0}{1}", _baseAddress, relativeUri);
            uris.Add(uri);

            // page
            var page = 1;

            // http cache key
            var cacheKey = GetCacheKey(relativeUri, page);

            // first request
            Task<XDocument> docRequest = LoadXmlAsync(uri, cacheKey, ignoreNotFounds, sourceCacheContext, log, token);

            // TODO: re-implement caching at a higher level for both v2 and v3
            string nextUri = null;
            while (!token.IsCancellationRequested && docRequest != null)
            {
                // TODO: Pages for a package Id are cached separately.
                // So we will get inaccurate data when a page shrinks.
                // However, (1) In most cases the pages grow rather than shrink;
                // (2) cache for pages is valid for only 30 min.
                // So we decide to leave current logic and observe.
                var doc = await docRequest;
                if (doc != null)
                {
                    var result = ParsePage(doc, id, metadataCache);
                    results.AddRange(result);

                    nextUri = GetNextUrl(doc);
                }

                docRequest = null;
                if (max < 0 || results.Count < max)
                {

                    // Request the next url in parallel to parsing the current page
                    if (!string.IsNullOrEmpty(nextUri))
                    {
                        // a bug on the server side causes the same next link to be returned
                        // for every page. To avoid falling into an infinite loop we must
                        // keep track of all uri and error out for any duplicate uri which means
                        // potential bug at server side.

                        if (!uris.Add(nextUri))
                        {
                            throw new FatalProtocolException(string.Format(
                                CultureInfo.CurrentCulture,
                                Strings.Protocol_duplicateUri,
                                nextUri));
                        }
                        page++;
                        cacheKey = GetCacheKey(relativeUri, page);
                        docRequest = LoadXmlAsync(nextUri, cacheKey, ignoreNotFounds, sourceCacheContext, log, token);
                    }
                }
            }

            if (max > -1 && results.Count > max)
            {
                // Remove extra results if the page contained extras
                results = results.Take(max).ToList();
            }

            if (docRequest != null)
            {
                // explicitly ignore exception to prevent it from going unobserved
                _ = docRequest.ContinueWith(t => { _ = t.Exception; },
                    TaskContinuationOptions.OnlyOnFaulted |
                    TaskContinuationOptions.ExecuteSynchronously);
            }

            return new V2FeedPage(
                results,
                string.IsNullOrEmpty(nextUri) ? null : nextUri);
        }

        private string GetCacheKey(string relativeUri, int page)
        {
            return $"list_{relativeUri}_page{page}";
        }

        internal async Task<XDocument> LoadXmlAsync(
            string uri,
            string cacheKey,
            bool ignoreNotFounds,
            SourceCacheContext sourceCacheContext,
            ILogger log,
            CancellationToken token)
        {
            if (cacheKey != null && sourceCacheContext != null)
            {
                var httpSourceCacheContext = HttpSourceCacheContext.Create(sourceCacheContext, 0);

                try
                {
                    return await _httpSource.GetAsync(
                        new HttpSourceCachedRequest(
                            uri,
                            cacheKey,
                            httpSourceCacheContext)
                        {
                            AcceptHeaderValues =
                            {
                            new MediaTypeWithQualityHeaderValue("application/atom+xml"),
                            new MediaTypeWithQualityHeaderValue("application/xml")
                            },
                            EnsureValidContents = stream => HttpStreamValidation.ValidateXml(uri, stream),
                            MaxTries = 1,
                            IgnoreNotFounds = ignoreNotFounds
                        },
                        async response =>
                        {
                            if (ignoreNotFounds && response.Status == HttpSourceResultStatus.NotFound)
                            {
                                // Treat "404 Not Found" as an empty response.
                                return null;
                            }
                            else if (response.Status == HttpSourceResultStatus.NoContent)
                            {
                                // Always treat "204 No Content" as exactly that.
                                return null;
                            }
                            else
                            {
                                return await LoadXmlAsync(response.Stream, token);
                            }
                        },
                        log,
                        token);
                }
                catch (Exception ex)
                {
                    var message = string.Format(
                        CultureInfo.CurrentCulture,
                        Strings.Log_FailedToFetchV2FeedHttp,
                        uri,
                        ex.Message);

                    throw new FatalProtocolException(message, ex);
                }
            }
            else
            {
                // return results without httpCache
                return await _httpSource.ProcessResponseAsync(
                new HttpSourceRequest(
                    () =>
                    {
                        var request = HttpRequestMessageFactory.Create(HttpMethod.Get, uri, log);
                        request.Headers.Accept.Add(new MediaTypeWithQualityHeaderValue("application/atom+xml"));
                        request.Headers.Accept.Add(new MediaTypeWithQualityHeaderValue("application/xml"));
                        return request;
                    })
                {
                    IsRetry = true
                },
                async response =>
                {
                    if (response.StatusCode == HttpStatusCode.OK)
                    {
                        var networkStream = await response.Content.ReadAsStreamAsync();
                        return await LoadXmlAsync(networkStream, token);
                    }
                    else if (ignoreNotFounds && response.StatusCode == HttpStatusCode.NotFound)
                    {
                        // Treat "404 Not Found" as an empty response.
                        return null;
                    }
                    else if (response.StatusCode == HttpStatusCode.NoContent)
                    {
                        // Always treat "204 No Content" as exactly that.
                        return null;
                    }
                    else
                    {
                        throw new FatalProtocolException(string.Format(
                            CultureInfo.CurrentCulture,
                            Strings.Log_FailedToFetchV2Feed,
                            uri,
                            (int)response.StatusCode,
                            response.ReasonPhrase));
                    }
                },
                sourceCacheContext,
                log,
                token);
            }
        }

        internal static string GetNextUrl(XDocument doc)
        {
            // Example of what this looks like in the odata feed:
            // <link rel="next" href="{nextLink}" />
            return (from e in doc.Root.Elements(_xnameLink)
                    let attr = e.Attribute("rel")
                    where attr != null && string.Equals(attr.Value, "next", StringComparison.OrdinalIgnoreCase)
                    select e.Attribute("href") into nextLink
                    where nextLink != null
                    select nextLink.Value).FirstOrDefault();
        }

        internal static async Task<XDocument> LoadXmlAsync(Stream stream, CancellationToken token)
        {
            using var memStream = await stream.AsSeekableStreamAsync(token);
            using var xmlReader = XmlReader.Create(memStream, new XmlReaderSettings()
            {
                CloseInput = true,
                IgnoreWhitespace = true,
                IgnoreComments = true,
                IgnoreProcessingInstructions = true,
                DtdProcessing = DtdProcessing.Ignore, // for consistency with earlier behavior (v3.3 and before)
            });

            return XDocument.Load(xmlReader, LoadOptions.None);
        }
    }
}