2 writes to RemoveNonSpacingMarks
Microsoft.ML.Tokenizers.Tests (2)
BertTokenizerTests.cs (2)
178
bertTokenizer = await BertTokenizer.CreateAsync(vocabStream, new BertOptions {
RemoveNonSpacingMarks
= true }); // lowercasing and accent stripping
192
bertTokenizer = await BertTokenizer.CreateAsync(vocabStream, new BertOptions { LowerCaseBeforeTokenization = false,
RemoveNonSpacingMarks
= true }); // no lowercasing and accent stripping
2 references to RemoveNonSpacingMarks
Microsoft.ML.Tokenizers (2)
Model\BertTokenizer.cs (2)
51
RemoveNonSpacingMarks = options.
RemoveNonSpacingMarks
;
763
options.Normalizer ??= options.ApplyBasicTokenization ? new BertNormalizer(options.LowerCaseBeforeTokenization, options.IndividuallyTokenizeCjk, options.
RemoveNonSpacingMarks
) : null;