1 instantiation of BertTokenizer
Microsoft.ML.Tokenizers (1)
Model\BertTokenizer.cs (1)
811return new BertTokenizer(vocab, vocabReverse, options);
38 references to BertTokenizer
Microsoft.ML.Tokenizers (12)
Model\BertTokenizer.cs (12)
653/// Create a new instance of the <see cref="BertTokenizer"/> class. 657/// <returns>A new instance of the <see cref="BertTokenizer"/> class.</returns> 661public static BertTokenizer Create( 669/// Create a new instance of the <see cref="BertTokenizer"/> class. 673/// <returns>A new instance of the <see cref="BertTokenizer"/> class.</returns> 677public static BertTokenizer Create( 683/// Create a new instance of the <see cref="BertTokenizer"/> class asynchronously. 692public static async Task<BertTokenizer> CreateAsync( 708/// Create a new instance of the <see cref="BertTokenizer"/> class asynchronously. 717public static async Task<BertTokenizer> CreateAsync( 734private static BertTokenizer Create(Stream vocabStream, BertOptions? options, bool disposeStream) 756private static BertTokenizer Create(
Microsoft.ML.Tokenizers.Tests (26)
BertTokenizerTests.cs (26)
42BertTokenizer[] bertTokenizers = [BertTokenizer.Create(vocabFile, bertOptions), BertTokenizer.Create(vocabStream, bertOptions)]; 44foreach (var tokenizer in bertTokenizers) 113BertTokenizer[] bertTokenizers = [BertTokenizer.Create(vocabFile), BertTokenizer.Create(vocabStream)]; 115foreach (var tokenizer in bertTokenizers) 184BertTokenizer[] bertTokenizers = [BertTokenizer.Create(vocabFile, new BertOptions { LowerCaseBeforeTokenization = false }), 185BertTokenizer.Create(vocabStream, new BertOptions { LowerCaseBeforeTokenization = false })]; 187foreach (var tokenizer in bertTokenizers) 235BertTokenizer bertTokenizer = await BertTokenizer.CreateAsync(vocabStream); // lowercasing and no accent stripping 252bertTokenizer = await BertTokenizer.CreateAsync(vocabStream, new BertOptions { LowerCaseBeforeTokenization = false }); // no lowercasing and no accent stripping 267bertTokenizer = await BertTokenizer.CreateAsync(vocabStream, new BertOptions { RemoveNonSpacingMarks = true }); // lowercasing and accent stripping 281bertTokenizer = await BertTokenizer.CreateAsync(vocabStream, new BertOptions { LowerCaseBeforeTokenization = false, RemoveNonSpacingMarks = true }); // no lowercasing and accent stripping 310BertTokenizer bertTokenizer = await BertTokenizer.CreateAsync(vocabStream); // tokenize Chinese characters 329bertTokenizer = await BertTokenizer.CreateAsync(vocabStream, new BertOptions { IndividuallyTokenizeCjk = false }); // do not tokenize Chinese characters 363BertTokenizer bertTokenizer = BertTokenizer.Create(vocabFile); 463BertTokenizer bertTokenizer = BertTokenizer.Create(vocabFile); 556BertTokenizer bertTokenizer = BertTokenizer.Create(vocabFile);