1 write to _vocab
Microsoft.ML.Tokenizers (1)
Model\SentencePieceUnigramModel.cs (1)
29
_vocab
= new SortedDictionary<string, int>(OrdinalUtf8StringComparer.Instance);
8 references to _vocab
Microsoft.ML.Tokenizers (8)
Model\SentencePieceUnigramModel.cs (8)
52
_vocab
.Add(piece, i);
67
ByteCodeToIdOffset =
_vocab
.TryGetValue("<0x00>", out int id) ? id : MaxByteId;
71
_trie = new DoubleArrayTrie(
_vocab
);
81
_vocab
[modelProto.TrainerSpec.UnkPiece] = modelProto.TrainerSpec.UnkId;
82
_vocab
[modelProto.TrainerSpec.BosPiece] = modelProto.TrainerSpec.BosId;
83
_vocab
[modelProto.TrainerSpec.EosPiece] = modelProto.TrainerSpec.EosId;
91
_vocab
[modelProto.TrainerSpec.PadPiece] = modelProto.TrainerSpec.PadId;
96
public override IReadOnlyDictionary<string, int> Vocabulary => new ReadOnlyDictionary<string, int>(
_vocab
);