1 instantiation of BertTokenizer
Microsoft.ML.Tokenizers (1)
Model\BertTokenizer.cs (1)
803return new BertTokenizer(vocab, vocabReverse, options);
34 references to BertTokenizer
Microsoft.ML.Tokenizers (12)
Model\BertTokenizer.cs (12)
653/// Create a new instance of the <see cref="BertTokenizer"/> class. 657/// <returns>A new instance of the <see cref="BertTokenizer"/> class.</returns> 661public static BertTokenizer Create( 669/// Create a new instance of the <see cref="BertTokenizer"/> class. 673/// <returns>A new instance of the <see cref="BertTokenizer"/> class.</returns> 677public static BertTokenizer Create( 683/// Create a new instance of the <see cref="BertTokenizer"/> class asynchronously. 692public static async Task<BertTokenizer> CreateAsync( 708/// Create a new instance of the <see cref="BertTokenizer"/> class asynchronously. 717public static async Task<BertTokenizer> CreateAsync( 734private static BertTokenizer Create(Stream vocabStream, BertOptions? options, bool disposeStream) 756private static BertTokenizer Create(
Microsoft.ML.Tokenizers.Tests (22)
BertTokenizerTests.cs (22)
28BertTokenizer[] bertTokenizers = [BertTokenizer.Create(vocabFile), BertTokenizer.Create(vocabStream)]; 30foreach (var tokenizer in bertTokenizers) 95BertTokenizer[] bertTokenizers = [BertTokenizer.Create(vocabFile, new BertOptions { LowerCaseBeforeTokenization = false }), 96BertTokenizer.Create(vocabStream, new BertOptions { LowerCaseBeforeTokenization = false })]; 98foreach (var tokenizer in bertTokenizers) 146BertTokenizer bertTokenizer = await BertTokenizer.CreateAsync(vocabStream); // lowercasing and no accent stripping 163bertTokenizer = await BertTokenizer.CreateAsync(vocabStream, new BertOptions { LowerCaseBeforeTokenization = false }); // no lowercasing and no accent stripping 178bertTokenizer = await BertTokenizer.CreateAsync(vocabStream, new BertOptions { RemoveNonSpacingMarks = true }); // lowercasing and accent stripping 192bertTokenizer = await BertTokenizer.CreateAsync(vocabStream, new BertOptions { LowerCaseBeforeTokenization = false, RemoveNonSpacingMarks = true }); // no lowercasing and accent stripping 221BertTokenizer bertTokenizer = await BertTokenizer.CreateAsync(vocabStream); // tokenize Chinese characters 240bertTokenizer = await BertTokenizer.CreateAsync(vocabStream, new BertOptions { IndividuallyTokenizeCjk = false }); // do not tokenize Chinese characters 274BertTokenizer bertTokenizer = BertTokenizer.Create(vocabFile); 374BertTokenizer bertTokenizer = BertTokenizer.Create(vocabFile); 467BertTokenizer bertTokenizer = BertTokenizer.Create(vocabFile);