26 instantiations of IngestionDocument
Microsoft.Extensions.DataIngestion.Markdig (1)
MarkdownParser.cs (1)
42IngestionDocument result = new(identifier)
Microsoft.Extensions.DataIngestion.MarkItDown (1)
src\Libraries\Microsoft.Extensions.DataIngestion.Markdig\MarkdownParser.cs (1)
42IngestionDocument result = new(identifier)
Microsoft.Extensions.DataIngestion.Tests (24)
Chunkers\DocumentChunkerTests.cs (1)
26IngestionDocument emptyDoc = new("emptyDoc");
Chunkers\HeaderChunkerTests.cs (5)
18IngestionDocument doc = new("nonTrivial"); 59IngestionDocument doc = new("longOne"); 84IngestionDocument doc = new("longOne"); 106IngestionDocument doc = new("withNewLines"); 233IngestionDocument doc = new("withNewLines");
Chunkers\SectionChunkerTests.cs (6)
23IngestionDocument doc = new IngestionDocument("doc"); 42IngestionDocument doc = new("doc") 78IngestionDocument doc = new IngestionDocument("doc"); 91IngestionDocument doc = new("doc") 147IngestionDocument doc = new IngestionDocument("twoChunksNoOverlapDoc"); 166IngestionDocument doc = new IngestionDocument("doc");
Chunkers\SemanticSimilarityChunkerTests.cs (3)
37IngestionDocument doc = new IngestionDocument("doc"); 65IngestionDocument doc = new IngestionDocument("doc"); 139IngestionDocument doc = new("dotnet-languages");
IngestionDocumentTests.cs (1)
22IngestionDocument doc = new("withSubSections");
Processors\AlternativeTextEnricherTests.cs (2)
95IngestionDocument document = new("withImage") 199return new("batchTest")
Processors\ClassificationEnricherTests.cs (1)
18private static readonly IngestionDocument _document = new("test");
Processors\KeywordEnricherTests.cs (1)
18private static readonly IngestionDocument _document = new("test");
Processors\SentimentEnricherTests.cs (1)
18private static readonly IngestionDocument _document = new("test");
Processors\SummaryEnricherTests.cs (1)
18private static readonly IngestionDocument _document = new("test");
Writers\VectorStoreWriterTests.cs (2)
26IngestionDocument document = new(documentId); 75IngestionDocument document = new(documentId);
95 references to IngestionDocument
Microsoft.Extensions.DataIngestion (18)
Chunkers\ElementsChunker.cs (1)
35internal IEnumerable<IngestionChunk<string>> Process(IngestionDocument document, string context, List<IngestionDocumentElement> elements)
Chunkers\HeaderChunker.cs (2)
32public override async IAsyncEnumerable<IngestionChunk<string>> ProcessAsync(IngestionDocument document, 68private IEnumerable<IngestionChunk<string>> SplitIntoChunks(IngestionDocument document, string?[] headers, List<IngestionDocumentElement> elements)
Chunkers\SectionChunker.cs (3)
13/// Treats each <see cref="IngestionDocumentSection" /> in a <see cref="IngestionDocument.Sections"/> as a separate entity. 29public override async IAsyncEnumerable<IngestionChunk<string>> ProcessAsync(IngestionDocument document, [EnumeratorCancellation] CancellationToken cancellationToken = default) 47private void Process(IngestionDocument document, IngestionDocumentSection section, List<IngestionChunk<string>> chunks, string? parentContext = null)
Chunkers\SemanticSimilarityChunker.cs (4)
16/// Splits a <see cref="IngestionDocument"/> into chunks based on semantic similarity between its elements based on cosine distance of their embeddings. 47public override async IAsyncEnumerable<IngestionChunk<string>> ProcessAsync(IngestionDocument document, 59private async Task<List<(IngestionDocumentElement element, float distance)>> CalculateDistancesAsync(IngestionDocument documents, CancellationToken cancellationToken) 96private IEnumerable<IngestionChunk<string>> MakeChunks(IngestionDocument document, List<(IngestionDocumentElement element, float distance)> elementDistances)
IngestionPipeline.cs (3)
148IngestionDocument? document = null; 173private async Task<IngestionDocument> IngestAsync(IngestionDocument document, Activity? parentActivity, CancellationToken cancellationToken)
IngestionResult.cs (2)
22public IngestionDocument? Document { get; } 34internal IngestionResult(string documentId, IngestionDocument? document, Exception? exception)
Processors\ImageAlternativeTextEnricher.cs (2)
36public override async Task<IngestionDocument> ProcessAsync(IngestionDocument document, CancellationToken cancellationToken = default)
Writers\VectorStoreWriter.cs (1)
164private async Task<IReadOnlyList<object>> GetPreExistingChunksIdsAsync(IngestionDocument document, CancellationToken cancellationToken)
Microsoft.Extensions.DataIngestion.Abstractions (18)
IngestionChunk.cs (3)
12/// Represents a chunk of content extracted from an <see cref="IngestionDocument"/>. 32public IngestionChunk(T content, IngestionDocument document, string? context = null) 55public IngestionDocument Document { get; }
IngestionChunker.cs (2)
10/// Splits an <see cref="IngestionDocument"/> into chunks. 21public abstract IAsyncEnumerable<IngestionChunk<T>> ProcessAsync(IngestionDocument document, CancellationToken cancellationToken = default);
IngestionDocument.cs (1)
16/// Initializes a new instance of the <see cref="IngestionDocument"/> class.
IngestionDocumentElement.cs (1)
15/// Represents an element within an <see cref="IngestionDocument"/>.
IngestionDocumentProcessor.cs (4)
10/// Part of the document processing pipeline that takes a <see cref="IngestionDocument"/> as input and produces a (potentially modified) <see cref="IngestionDocument"/> as output. 20public abstract Task<IngestionDocument> ProcessAsync(IngestionDocument document, CancellationToken cancellationToken = default);
IngestionDocumentReader.cs (7)
13/// Reads source content and converts it to an <see cref="IngestionDocument"/>. 18/// Reads a file and converts it to an <see cref="IngestionDocument"/>. 24public Task<IngestionDocument> ReadAsync(FileInfo source, CancellationToken cancellationToken = default) 31/// Reads a file and converts it to an <see cref="IngestionDocument"/>. 39public virtual async Task<IngestionDocument> ReadAsync(FileInfo source, string identifier, string? mediaType = null, CancellationToken cancellationToken = default) 49/// Reads a stream and converts it to an <see cref="IngestionDocument"/>. 56public abstract Task<IngestionDocument> ReadAsync(Stream source, string identifier, string mediaType, CancellationToken cancellationToken = default);
Microsoft.Extensions.DataIngestion.Markdig (6)
MarkdownParser.cs (3)
18internal static IngestionDocument Parse(string markdown, string identifier) 39private static IngestionDocument Map(MarkdownDocument markdownDocument, string documentMarkdown, string identifier) 42IngestionDocument result = new(identifier)
MarkdownReader.cs (3)
12/// Reads Markdown content and converts it to an <see cref="IngestionDocument"/>. 17public override async Task<IngestionDocument> ReadAsync(FileInfo source, string identifier, string? mediaType = null, CancellationToken cancellationToken = default) 32public override async Task<IngestionDocument> ReadAsync(Stream source, string identifier, string mediaType, CancellationToken cancellationToken = default)
Microsoft.Extensions.DataIngestion.MarkItDown (7)
MarkItDownMcpReader.cs (2)
36public override async Task<IngestionDocument> ReadAsync(FileInfo source, string identifier, string? mediaType = null, CancellationToken cancellationToken = default) 68public override async Task<IngestionDocument> ReadAsync(Stream source, string identifier, string mediaType, CancellationToken cancellationToken = default)
MarkItDownReader.cs (2)
34public override async Task<IngestionDocument> ReadAsync(FileInfo source, string identifier, string? mediaType = null, CancellationToken cancellationToken = default) 97public override async Task<IngestionDocument> ReadAsync(Stream source, string identifier, string mediaType, CancellationToken cancellationToken = default)
src\Libraries\Microsoft.Extensions.DataIngestion.Markdig\MarkdownParser.cs (3)
18internal static IngestionDocument Parse(string markdown, string identifier) 39private static IngestionDocument Map(MarkdownDocument markdownDocument, string documentMarkdown, string identifier) 42IngestionDocument result = new(identifier)
Microsoft.Extensions.DataIngestion.Tests (46)
Chunkers\DocumentChunkerTests.cs (1)
26IngestionDocument emptyDoc = new("emptyDoc");
Chunkers\HeaderChunkerTests.cs (9)
18IngestionDocument doc = new("nonTrivial"); 59IngestionDocument doc = new("longOne"); 84IngestionDocument doc = new("longOne"); 106IngestionDocument doc = new("withNewLines"); 133IngestionDocument document = CreateDocumentWithLargeTable(); 144IngestionDocument document = CreateDocumentWithLargeTable(); 173IngestionDocument document = CreateDocumentWithLargeTable(); 220private static IngestionDocument CreateDocumentWithLargeTable() 233IngestionDocument doc = new("withNewLines");
Chunkers\SectionChunkerTests.cs (6)
23IngestionDocument doc = new IngestionDocument("doc"); 42IngestionDocument doc = new("doc") 78IngestionDocument doc = new IngestionDocument("doc"); 91IngestionDocument doc = new("doc") 147IngestionDocument doc = new IngestionDocument("twoChunksNoOverlapDoc"); 166IngestionDocument doc = new IngestionDocument("doc");
Chunkers\SemanticSimilarityChunkerTests.cs (3)
37IngestionDocument doc = new IngestionDocument("doc"); 65IngestionDocument doc = new IngestionDocument("doc"); 139IngestionDocument doc = new("dotnet-languages");
IngestionDocumentTests.cs (1)
22IngestionDocument doc = new("withSubSections");
IngestionPipelineTests.cs (2)
178public override IAsyncEnumerable<IngestionChunk<DataContent>> ProcessAsync(IngestionDocument document, CancellationToken cancellationToken = default) 194? Task.FromException<IngestionDocument>(new ExpectedException())
Processors\AlternativeTextEnricherTests.cs (5)
95IngestionDocument document = new("withImage") 153IngestionDocument document = CreateDocument(batchSize, batchCount, _imageContent); 173IngestionDocument document = CreateDocument(options.BatchSize, BatchCount, _imageContent); 174IngestionDocument got = await sut.ProcessAsync(document); 184private static IngestionDocument CreateDocument(int batchSize, int batchCount, ReadOnlyMemory<byte> imageContent)
Processors\ClassificationEnricherTests.cs (1)
18private static readonly IngestionDocument _document = new("test");
Processors\KeywordEnricherTests.cs (1)
18private static readonly IngestionDocument _document = new("test");
Processors\SentimentEnricherTests.cs (1)
18private static readonly IngestionDocument _document = new("test");
Processors\SummaryEnricherTests.cs (1)
18private static readonly IngestionDocument _document = new("test");
Readers\DocumentReaderConformanceTests.cs (5)
83IngestionDocument document = await CreateDocumentReader().ReadAsync( 98IngestionDocument document = await CreateDocumentReader().ReadAsync(inputFile); 119var document = await reader.ReadAsync(inputFile); 144var document = await CreateDocumentReader().ReadAsync(wordDoc, "doc", "application/vnd.openxmlformats-officedocument.wordprocessingml.document"); 199protected virtual void SimpleAsserts(IngestionDocument document, string source, string expectedId)
Readers\MarkdownReaderTests.cs (4)
44IngestionDocument document = await ReadAsync(markdownContent); 88IngestionDocument document = await ReadAsync(markdownContent); 116IngestionDocument document = await ReadAsync(markdownContent); 136private async Task<IngestionDocument> ReadAsync(string content)
Readers\MarkItDownReaderTests.cs (1)
20protected override void SimpleAsserts(IngestionDocument document, string source, string expectedId)
Utils\TestReader.cs (3)
13public TestReader(Func<Stream, string, string, CancellationToken, Task<IngestionDocument>> readAsyncCallback) 18public Func<Stream, string, string, CancellationToken, Task<IngestionDocument>> ReadAsyncCallback { get; } 20public override Task<IngestionDocument> ReadAsync(Stream source, string identifier, string mediaType, CancellationToken cancellationToken = default)
Writers\VectorStoreWriterTests.cs (2)
26IngestionDocument document = new(documentId); 75IngestionDocument document = new(documentId);