2 writes to _vocab
Microsoft.ML.Tokenizers (2)
Model\SentencePieceUnigramModel.cs (2)
29
_vocab
= new SortedDictionary<string, int>(OrdinalUtf8StringComparer.Instance);
90
_vocab
= new SortedDictionary<string, int>(OrdinalUtf8StringComparer.Instance);
14 references to _vocab
Microsoft.ML.Tokenizers (14)
Model\SentencePieceUnigramModel.cs (14)
44
_vocab
.Add(piece, i);
59
ByteCodeToIdOffset =
_vocab
.TryGetValue("<0x00>", out int id) ? id : MaxByteId;
63
_trie = new DoubleArrayTrie(
_vocab
);
73
_vocab
[modelProto.TrainerSpec.UnkPiece] = modelProto.TrainerSpec.UnkId;
74
_vocab
[modelProto.TrainerSpec.BosPiece] = modelProto.TrainerSpec.BosId;
75
_vocab
[modelProto.TrainerSpec.EosPiece] = modelProto.TrainerSpec.EosId;
83
_vocab
[modelProto.TrainerSpec.PadPiece] = modelProto.TrainerSpec.PadId;
102
_vocab
.Add(item.Token, id++);
112
if (!
_vocab
.TryGetValue("<0x00>", out id))
122
_trie = new DoubleArrayTrie(
_vocab
);
127
if (!
_vocab
.TryGetValue(options.UnknownToken, out int unknownToken))
133
if (!
_vocab
.TryGetValue(options.BeginningOfSentenceToken, out int beginOfSentenceToken))
139
if (!
_vocab
.TryGetValue(options.EndOfSentenceToken, out int endOfSentenceToken))
146
public override IReadOnlyDictionary<string, int> Vocabulary => new ReadOnlyDictionary<string, int>(
_vocab
);