26 instantiations of IngestionDocument
Microsoft.Extensions.DataIngestion.Markdig (1)
MarkdownParser.cs (1)
42
IngestionDocument result =
new
(identifier)
Microsoft.Extensions.DataIngestion.MarkItDown (1)
src\Libraries\Microsoft.Extensions.DataIngestion.Markdig\MarkdownParser.cs (1)
42
IngestionDocument result =
new
(identifier)
Microsoft.Extensions.DataIngestion.Tests (24)
Chunkers\DocumentChunkerTests.cs (1)
26
IngestionDocument emptyDoc =
new
("emptyDoc");
Chunkers\HeaderChunkerTests.cs (5)
18
IngestionDocument doc =
new
("nonTrivial");
59
IngestionDocument doc =
new
("longOne");
84
IngestionDocument doc =
new
("longOne");
106
IngestionDocument doc =
new
("withNewLines");
233
IngestionDocument doc =
new
("withNewLines");
Chunkers\SectionChunkerTests.cs (6)
23
IngestionDocument doc = new
IngestionDocument
("doc");
42
IngestionDocument doc =
new
("doc")
78
IngestionDocument doc = new
IngestionDocument
("doc");
91
IngestionDocument doc =
new
("doc")
147
IngestionDocument doc = new
IngestionDocument
("twoChunksNoOverlapDoc");
166
IngestionDocument doc = new
IngestionDocument
("doc");
Chunkers\SemanticSimilarityChunkerTests.cs (3)
37
IngestionDocument doc = new
IngestionDocument
("doc");
65
IngestionDocument doc = new
IngestionDocument
("doc");
139
IngestionDocument doc =
new
("dotnet-languages");
IngestionDocumentTests.cs (1)
22
IngestionDocument doc =
new
("withSubSections");
Processors\AlternativeTextEnricherTests.cs (2)
95
IngestionDocument document =
new
("withImage")
199
return
new
("batchTest")
Processors\ClassificationEnricherTests.cs (1)
18
private static readonly IngestionDocument _document =
new
("test");
Processors\KeywordEnricherTests.cs (1)
18
private static readonly IngestionDocument _document =
new
("test");
Processors\SentimentEnricherTests.cs (1)
18
private static readonly IngestionDocument _document =
new
("test");
Processors\SummaryEnricherTests.cs (1)
18
private static readonly IngestionDocument _document =
new
("test");
Writers\VectorStoreWriterTests.cs (2)
26
IngestionDocument document =
new
(documentId);
75
IngestionDocument document =
new
(documentId);
95 references to IngestionDocument
Microsoft.Extensions.DataIngestion (18)
Chunkers\ElementsChunker.cs (1)
35
internal IEnumerable<IngestionChunk<string>> Process(
IngestionDocument
document, string context, List<IngestionDocumentElement> elements)
Chunkers\HeaderChunker.cs (2)
32
public override async IAsyncEnumerable<IngestionChunk<string>> ProcessAsync(
IngestionDocument
document,
68
private IEnumerable<IngestionChunk<string>> SplitIntoChunks(
IngestionDocument
document, string?[] headers, List<IngestionDocumentElement> elements)
Chunkers\SectionChunker.cs (3)
13
/// Treats each <see cref="IngestionDocumentSection" /> in a <see cref="
IngestionDocument
.Sections"/> as a separate entity.
29
public override async IAsyncEnumerable<IngestionChunk<string>> ProcessAsync(
IngestionDocument
document, [EnumeratorCancellation] CancellationToken cancellationToken = default)
47
private void Process(
IngestionDocument
document, IngestionDocumentSection section, List<IngestionChunk<string>> chunks, string? parentContext = null)
Chunkers\SemanticSimilarityChunker.cs (4)
16
/// Splits a <see cref="
IngestionDocument
"/> into chunks based on semantic similarity between its elements based on cosine distance of their embeddings.
47
public override async IAsyncEnumerable<IngestionChunk<string>> ProcessAsync(
IngestionDocument
document,
59
private async Task<List<(IngestionDocumentElement element, float distance)>> CalculateDistancesAsync(
IngestionDocument
documents, CancellationToken cancellationToken)
96
private IEnumerable<IngestionChunk<string>> MakeChunks(
IngestionDocument
document, List<(IngestionDocumentElement element, float distance)> elementDistances)
IngestionPipeline.cs (3)
148
IngestionDocument
? document = null;
173
private async Task<
IngestionDocument
> IngestAsync(
IngestionDocument
document, Activity? parentActivity, CancellationToken cancellationToken)
IngestionResult.cs (2)
22
public
IngestionDocument
? Document { get; }
34
internal IngestionResult(string documentId,
IngestionDocument
? document, Exception? exception)
Processors\ImageAlternativeTextEnricher.cs (2)
36
public override async Task<
IngestionDocument
> ProcessAsync(
IngestionDocument
document, CancellationToken cancellationToken = default)
Writers\VectorStoreWriter.cs (1)
164
private async Task<IReadOnlyList<object>> GetPreExistingChunksIdsAsync(
IngestionDocument
document, CancellationToken cancellationToken)
Microsoft.Extensions.DataIngestion.Abstractions (18)
IngestionChunk.cs (3)
12
/// Represents a chunk of content extracted from an <see cref="
IngestionDocument
"/>.
32
public IngestionChunk(T content,
IngestionDocument
document, string? context = null)
55
public
IngestionDocument
Document { get; }
IngestionChunker.cs (2)
10
/// Splits an <see cref="
IngestionDocument
"/> into chunks.
21
public abstract IAsyncEnumerable<IngestionChunk<T>> ProcessAsync(
IngestionDocument
document, CancellationToken cancellationToken = default);
IngestionDocument.cs (1)
16
/// Initializes a new instance of the <see cref="
IngestionDocument
"/> class.
IngestionDocumentElement.cs (1)
15
/// Represents an element within an <see cref="
IngestionDocument
"/>.
IngestionDocumentProcessor.cs (4)
10
/// Part of the document processing pipeline that takes a <see cref="
IngestionDocument
"/> as input and produces a (potentially modified) <see cref="
IngestionDocument
"/> as output.
20
public abstract Task<
IngestionDocument
> ProcessAsync(
IngestionDocument
document, CancellationToken cancellationToken = default);
IngestionDocumentReader.cs (7)
13
/// Reads source content and converts it to an <see cref="
IngestionDocument
"/>.
18
/// Reads a file and converts it to an <see cref="
IngestionDocument
"/>.
24
public Task<
IngestionDocument
> ReadAsync(FileInfo source, CancellationToken cancellationToken = default)
31
/// Reads a file and converts it to an <see cref="
IngestionDocument
"/>.
39
public virtual async Task<
IngestionDocument
> ReadAsync(FileInfo source, string identifier, string? mediaType = null, CancellationToken cancellationToken = default)
49
/// Reads a stream and converts it to an <see cref="
IngestionDocument
"/>.
56
public abstract Task<
IngestionDocument
> ReadAsync(Stream source, string identifier, string mediaType, CancellationToken cancellationToken = default);
Microsoft.Extensions.DataIngestion.Markdig (6)
MarkdownParser.cs (3)
18
internal static
IngestionDocument
Parse(string markdown, string identifier)
39
private static
IngestionDocument
Map(MarkdownDocument markdownDocument, string documentMarkdown, string identifier)
42
IngestionDocument
result = new(identifier)
MarkdownReader.cs (3)
12
/// Reads Markdown content and converts it to an <see cref="
IngestionDocument
"/>.
17
public override async Task<
IngestionDocument
> ReadAsync(FileInfo source, string identifier, string? mediaType = null, CancellationToken cancellationToken = default)
32
public override async Task<
IngestionDocument
> ReadAsync(Stream source, string identifier, string mediaType, CancellationToken cancellationToken = default)
Microsoft.Extensions.DataIngestion.MarkItDown (7)
MarkItDownMcpReader.cs (2)
36
public override async Task<
IngestionDocument
> ReadAsync(FileInfo source, string identifier, string? mediaType = null, CancellationToken cancellationToken = default)
68
public override async Task<
IngestionDocument
> ReadAsync(Stream source, string identifier, string mediaType, CancellationToken cancellationToken = default)
MarkItDownReader.cs (2)
34
public override async Task<
IngestionDocument
> ReadAsync(FileInfo source, string identifier, string? mediaType = null, CancellationToken cancellationToken = default)
97
public override async Task<
IngestionDocument
> ReadAsync(Stream source, string identifier, string mediaType, CancellationToken cancellationToken = default)
src\Libraries\Microsoft.Extensions.DataIngestion.Markdig\MarkdownParser.cs (3)
18
internal static
IngestionDocument
Parse(string markdown, string identifier)
39
private static
IngestionDocument
Map(MarkdownDocument markdownDocument, string documentMarkdown, string identifier)
42
IngestionDocument
result = new(identifier)
Microsoft.Extensions.DataIngestion.Tests (46)
Chunkers\DocumentChunkerTests.cs (1)
26
IngestionDocument
emptyDoc = new("emptyDoc");
Chunkers\HeaderChunkerTests.cs (9)
18
IngestionDocument
doc = new("nonTrivial");
59
IngestionDocument
doc = new("longOne");
84
IngestionDocument
doc = new("longOne");
106
IngestionDocument
doc = new("withNewLines");
133
IngestionDocument
document = CreateDocumentWithLargeTable();
144
IngestionDocument
document = CreateDocumentWithLargeTable();
173
IngestionDocument
document = CreateDocumentWithLargeTable();
220
private static
IngestionDocument
CreateDocumentWithLargeTable()
233
IngestionDocument
doc = new("withNewLines");
Chunkers\SectionChunkerTests.cs (6)
23
IngestionDocument
doc = new IngestionDocument("doc");
42
IngestionDocument
doc = new("doc")
78
IngestionDocument
doc = new IngestionDocument("doc");
91
IngestionDocument
doc = new("doc")
147
IngestionDocument
doc = new IngestionDocument("twoChunksNoOverlapDoc");
166
IngestionDocument
doc = new IngestionDocument("doc");
Chunkers\SemanticSimilarityChunkerTests.cs (3)
37
IngestionDocument
doc = new IngestionDocument("doc");
65
IngestionDocument
doc = new IngestionDocument("doc");
139
IngestionDocument
doc = new("dotnet-languages");
IngestionDocumentTests.cs (1)
22
IngestionDocument
doc = new("withSubSections");
IngestionPipelineTests.cs (2)
178
public override IAsyncEnumerable<IngestionChunk<DataContent>> ProcessAsync(
IngestionDocument
document, CancellationToken cancellationToken = default)
194
? Task.FromException<
IngestionDocument
>(new ExpectedException())
Processors\AlternativeTextEnricherTests.cs (5)
95
IngestionDocument
document = new("withImage")
153
IngestionDocument
document = CreateDocument(batchSize, batchCount, _imageContent);
173
IngestionDocument
document = CreateDocument(options.BatchSize, BatchCount, _imageContent);
174
IngestionDocument
got = await sut.ProcessAsync(document);
184
private static
IngestionDocument
CreateDocument(int batchSize, int batchCount, ReadOnlyMemory<byte> imageContent)
Processors\ClassificationEnricherTests.cs (1)
18
private static readonly
IngestionDocument
_document = new("test");
Processors\KeywordEnricherTests.cs (1)
18
private static readonly
IngestionDocument
_document = new("test");
Processors\SentimentEnricherTests.cs (1)
18
private static readonly
IngestionDocument
_document = new("test");
Processors\SummaryEnricherTests.cs (1)
18
private static readonly
IngestionDocument
_document = new("test");
Readers\DocumentReaderConformanceTests.cs (5)
83
IngestionDocument
document = await CreateDocumentReader().ReadAsync(
98
IngestionDocument
document = await CreateDocumentReader().ReadAsync(inputFile);
119
var
document = await reader.ReadAsync(inputFile);
144
var
document = await CreateDocumentReader().ReadAsync(wordDoc, "doc", "application/vnd.openxmlformats-officedocument.wordprocessingml.document");
199
protected virtual void SimpleAsserts(
IngestionDocument
document, string source, string expectedId)
Readers\MarkdownReaderTests.cs (4)
44
IngestionDocument
document = await ReadAsync(markdownContent);
88
IngestionDocument
document = await ReadAsync(markdownContent);
116
IngestionDocument
document = await ReadAsync(markdownContent);
136
private async Task<
IngestionDocument
> ReadAsync(string content)
Readers\MarkItDownReaderTests.cs (1)
20
protected override void SimpleAsserts(
IngestionDocument
document, string source, string expectedId)
Utils\TestReader.cs (3)
13
public TestReader(Func<Stream, string, string, CancellationToken, Task<
IngestionDocument
>> readAsyncCallback)
18
public Func<Stream, string, string, CancellationToken, Task<
IngestionDocument
>> ReadAsyncCallback { get; }
20
public override Task<
IngestionDocument
> ReadAsync(Stream source, string identifier, string mediaType, CancellationToken cancellationToken = default)
Writers\VectorStoreWriterTests.cs (2)
26
IngestionDocument
document = new(documentId);
75
IngestionDocument
document = new(documentId);